From 79ab80beb792fc56141ca9bc5675b2109b729955 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 10 Dec 2014 17:06:38 -0500
Subject: [PATCH 0001/1911] [drbd] use sock_sendmsg()

... and keep ->msg_iter through the loop

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/block/drbd/drbd_main.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 83482721bc0127..d96a41a7e257db 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -1846,7 +1846,7 @@ int drbd_send_out_of_sync(struct drbd_peer_device *peer_device, struct drbd_requ
 int drbd_send(struct drbd_connection *connection, struct socket *sock,
 	      void *buf, size_t size, unsigned msg_flags)
 {
-	struct kvec iov;
+	struct kvec iov = {.iov_base = buf, .iov_len = size};
 	struct msghdr msg;
 	int rv, sent = 0;
 
@@ -1855,15 +1855,14 @@ int drbd_send(struct drbd_connection *connection, struct socket *sock,
 
 	/* THINK  if (signal_pending) return ... ? */
 
-	iov.iov_base = buf;
-	iov.iov_len  = size;
-
 	msg.msg_name       = NULL;
 	msg.msg_namelen    = 0;
 	msg.msg_control    = NULL;
 	msg.msg_controllen = 0;
 	msg.msg_flags      = msg_flags | MSG_NOSIGNAL;
 
+	iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, &iov, 1, size);
+
 	if (sock == connection->data.socket) {
 		rcu_read_lock();
 		connection->ko_count = rcu_dereference(connection->net_conf)->ko_count;
@@ -1871,7 +1870,7 @@ int drbd_send(struct drbd_connection *connection, struct socket *sock,
 		drbd_update_congested(connection);
 	}
 	do {
-		rv = kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len);
+		rv = sock_sendmsg(sock, &msg);
 		if (rv == -EAGAIN) {
 			if (we_should_drop_the_connection(connection, sock))
 				break;
@@ -1885,8 +1884,6 @@ int drbd_send(struct drbd_connection *connection, struct socket *sock,
 		if (rv < 0)
 			break;
 		sent += rv;
-		iov.iov_base += rv;
-		iov.iov_len  -= rv;
 	} while (sent < size);
 
 	if (sock == connection->data.socket)

From c1696cab700588f8493df7b51e096abf5bfb1d40 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 12 Nov 2015 04:51:19 -0500
Subject: [PATCH 0002/1911] [nbd] switch sock_xmit() to sock_{send,recv}msg()

Step 1 - don't reinintialize ->msg_iter on each iteration.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/block/nbd.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 38c576f76d36fc..8e63caecdd00d8 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -215,7 +215,7 @@ static int sock_xmit(struct nbd_device *nbd, int index, int send, void *buf,
 	struct socket *sock = nbd->socks[index]->sock;
 	int result;
 	struct msghdr msg;
-	struct kvec iov;
+	struct kvec iov = {.iov_base = buf, .iov_len = size};
 	unsigned long pflags = current->flags;
 
 	if (unlikely(!sock)) {
@@ -225,11 +225,12 @@ static int sock_xmit(struct nbd_device *nbd, int index, int send, void *buf,
 		return -EINVAL;
 	}
 
+	iov_iter_kvec(&msg.msg_iter, (send ? WRITE : READ) | ITER_KVEC,
+		      &iov, 1, size);
+
 	current->flags |= PF_MEMALLOC;
 	do {
 		sock->sk->sk_allocation = GFP_NOIO | __GFP_MEMALLOC;
-		iov.iov_base = buf;
-		iov.iov_len = size;
 		msg.msg_name = NULL;
 		msg.msg_namelen = 0;
 		msg.msg_control = NULL;
@@ -237,19 +238,16 @@ static int sock_xmit(struct nbd_device *nbd, int index, int send, void *buf,
 		msg.msg_flags = msg_flags | MSG_NOSIGNAL;
 
 		if (send)
-			result = kernel_sendmsg(sock, &msg, &iov, 1, size);
+			result = sock_sendmsg(sock, &msg);
 		else
-			result = kernel_recvmsg(sock, &msg, &iov, 1, size,
-						msg.msg_flags);
+			result = sock_recvmsg(sock, &msg, msg.msg_flags);
 
 		if (result <= 0) {
 			if (result == 0)
 				result = -EPIPE; /* short read */
 			break;
 		}
-		size -= result;
-		buf += result;
-	} while (size > 0);
+	} while (msg_data_left(&msg));
 
 	tsk_restore_flags(current, pflags, PF_MEMALLOC);
 

From c9f2b6aeb92286f15ffc80d2ba16bc24e530f560 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 12 Nov 2015 05:09:35 -0500
Subject: [PATCH 0003/1911] [nbd] pass iov_iter to nbd_xmit()

... and don't mess with kmap() - just use BVEC_ITER for those parts.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/block/nbd.c | 66 +++++++++++++++++++--------------------------
 1 file changed, 27 insertions(+), 39 deletions(-)

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 8e63caecdd00d8..3c2dbe412c0235 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -209,13 +209,12 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
 /*
  *  Send or receive packet.
  */
-static int sock_xmit(struct nbd_device *nbd, int index, int send, void *buf,
-		     int size, int msg_flags)
+static int sock_xmit(struct nbd_device *nbd, int index, int send,
+		     struct iov_iter *iter, int msg_flags)
 {
 	struct socket *sock = nbd->socks[index]->sock;
 	int result;
 	struct msghdr msg;
-	struct kvec iov = {.iov_base = buf, .iov_len = size};
 	unsigned long pflags = current->flags;
 
 	if (unlikely(!sock)) {
@@ -225,8 +224,7 @@ static int sock_xmit(struct nbd_device *nbd, int index, int send, void *buf,
 		return -EINVAL;
 	}
 
-	iov_iter_kvec(&msg.msg_iter, (send ? WRITE : READ) | ITER_KVEC,
-		      &iov, 1, size);
+	msg.msg_iter = *iter;
 
 	current->flags |= PF_MEMALLOC;
 	do {
@@ -254,28 +252,21 @@ static int sock_xmit(struct nbd_device *nbd, int index, int send, void *buf,
 	return result;
 }
 
-static inline int sock_send_bvec(struct nbd_device *nbd, int index,
-				 struct bio_vec *bvec, int flags)
-{
-	int result;
-	void *kaddr = kmap(bvec->bv_page);
-	result = sock_xmit(nbd, index, 1, kaddr + bvec->bv_offset,
-			   bvec->bv_len, flags);
-	kunmap(bvec->bv_page);
-	return result;
-}
-
 /* always call with the tx_lock held */
 static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
 {
 	struct request *req = blk_mq_rq_from_pdu(cmd);
 	int result, flags;
-	struct nbd_request request;
+	struct nbd_request request = {.magic = htonl(NBD_REQUEST_MAGIC)};
+	struct kvec iov = {.iov_base = &request, .iov_len = sizeof(request)};
+	struct iov_iter from;
 	unsigned long size = blk_rq_bytes(req);
 	struct bio *bio;
 	u32 type;
 	u32 tag = blk_mq_unique_tag(req);
 
+	iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request));
+
 	if (req_op(req) == REQ_OP_DISCARD)
 		type = NBD_CMD_TRIM;
 	else if (req_op(req) == REQ_OP_FLUSH)
@@ -285,8 +276,6 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
 	else
 		type = NBD_CMD_READ;
 
-	memset(&request, 0, sizeof(request));
-	request.magic = htonl(NBD_REQUEST_MAGIC);
 	request.type = htonl(type);
 	if (type != NBD_CMD_FLUSH) {
 		request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
@@ -297,7 +286,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
 	dev_dbg(nbd_to_dev(nbd), "request %p: sending control (%s@%llu,%uB)\n",
 		cmd, nbdcmd_to_ascii(type),
 		(unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req));
-	result = sock_xmit(nbd, index, 1, &request, sizeof(request),
+	result = sock_xmit(nbd, index, 1, &from,
 			(type == NBD_CMD_WRITE) ? MSG_MORE : 0);
 	if (result <= 0) {
 		dev_err_ratelimited(disk_to_dev(nbd->disk),
@@ -322,7 +311,9 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
 				flags = MSG_MORE;
 			dev_dbg(nbd_to_dev(nbd), "request %p: sending %d bytes data\n",
 				cmd, bvec.bv_len);
-			result = sock_send_bvec(nbd, index, &bvec, flags);
+			iov_iter_bvec(&from, ITER_BVEC | WRITE,
+				      &bvec, 1, bvec.bv_len);
+			result = sock_xmit(nbd, index, 1, &from, flags);
 			if (result <= 0) {
 				dev_err(disk_to_dev(nbd->disk),
 					"Send data failed (result %d)\n",
@@ -343,17 +334,6 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
 	return 0;
 }
 
-static inline int sock_recv_bvec(struct nbd_device *nbd, int index,
-				 struct bio_vec *bvec)
-{
-	int result;
-	void *kaddr = kmap(bvec->bv_page);
-	result = sock_xmit(nbd, index, 0, kaddr + bvec->bv_offset,
-			   bvec->bv_len, MSG_WAITALL);
-	kunmap(bvec->bv_page);
-	return result;
-}
-
 /* NULL returned = something went wrong, inform userspace */
 static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
 {
@@ -363,9 +343,12 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
 	struct request *req = NULL;
 	u16 hwq;
 	u32 tag;
+	struct kvec iov = {.iov_base = &reply, .iov_len = sizeof(reply)};
+	struct iov_iter to;
 
 	reply.magic = 0;
-	result = sock_xmit(nbd, index, 0, &reply, sizeof(reply), MSG_WAITALL);
+	iov_iter_kvec(&to, READ | ITER_KVEC, &iov, 1, sizeof(reply));
+	result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL);
 	if (result <= 0) {
 		if (!test_bit(NBD_DISCONNECTED, &nbd->runtime_flags) &&
 		    !test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags))
@@ -405,7 +388,9 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
 		struct bio_vec bvec;
 
 		rq_for_each_segment(bvec, req, iter) {
-			result = sock_recv_bvec(nbd, index, &bvec);
+			iov_iter_bvec(&to, ITER_BVEC | READ,
+				      &bvec, 1, bvec.bv_len);
+			result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL);
 			if (result <= 0) {
 				dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n",
 					result);
@@ -645,14 +630,17 @@ static void nbd_parse_flags(struct nbd_device *nbd, struct block_device *bdev)
 
 static void send_disconnects(struct nbd_device *nbd)
 {
-	struct nbd_request request = {};
+	struct nbd_request request = {
+		.magic = htonl(NBD_REQUEST_MAGIC),
+		.type = htonl(NBD_CMD_DISC),
+	};
+	struct kvec iov = {.iov_base = &request, .iov_len = sizeof(request)};
+	struct iov_iter from;
 	int i, ret;
 
-	request.magic = htonl(NBD_REQUEST_MAGIC);
-	request.type = htonl(NBD_CMD_DISC);
-
 	for (i = 0; i < nbd->num_connections; i++) {
-		ret = sock_xmit(nbd, i, 1, &request, sizeof(request), 0);
+		iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request));
+		ret = sock_xmit(nbd, i, 1, &from, 0);
 		if (ret <= 0)
 			dev_err(disk_to_dev(nbd->disk),
 				"Send disconnect failed %d\n", ret);

From 4113e47b3d0931879d75dc5ad7c4c294651b3c1d Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 21 Dec 2014 02:14:47 -0500
Subject: [PATCH 0004/1911] iscsi_target: deal with short writes on the tx side

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/target/iscsi/iscsi_target_util.c | 64 +++++++++---------------
 1 file changed, 24 insertions(+), 40 deletions(-)

diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c
index b5a1b4ccba124d..a9ba2479374f52 100644
--- a/drivers/target/iscsi/iscsi_target_util.c
+++ b/drivers/target/iscsi/iscsi_target_util.c
@@ -1304,39 +1304,6 @@ static int iscsit_do_rx_data(
 	return total_rx;
 }
 
-static int iscsit_do_tx_data(
-	struct iscsi_conn *conn,
-	struct iscsi_data_count *count)
-{
-	int ret, iov_len;
-	struct kvec *iov_p;
-	struct msghdr msg;
-
-	if (!conn || !conn->sock || !conn->conn_ops)
-		return -1;
-
-	if (count->data_length <= 0) {
-		pr_err("Data length is: %d\n", count->data_length);
-		return -1;
-	}
-
-	memset(&msg, 0, sizeof(struct msghdr));
-
-	iov_p = count->iov;
-	iov_len = count->iov_count;
-
-	ret = kernel_sendmsg(conn->sock, &msg, iov_p, iov_len,
-			     count->data_length);
-	if (ret != count->data_length) {
-		pr_err("Unexpected ret: %d send data %d\n",
-		       ret, count->data_length);
-		return -EPIPE;
-	}
-	pr_debug("ret: %d, sent data: %d\n", ret, count->data_length);
-
-	return ret;
-}
-
 int rx_data(
 	struct iscsi_conn *conn,
 	struct kvec *iov,
@@ -1363,18 +1330,35 @@ int tx_data(
 	int iov_count,
 	int data)
 {
-	struct iscsi_data_count c;
+	struct msghdr msg;
+	int total_tx = 0;
 
 	if (!conn || !conn->sock || !conn->conn_ops)
 		return -1;
 
-	memset(&c, 0, sizeof(struct iscsi_data_count));
-	c.iov = iov;
-	c.iov_count = iov_count;
-	c.data_length = data;
-	c.type = ISCSI_TX_DATA;
+	if (data <= 0) {
+		pr_err("Data length is: %d\n", data);
+		return -1;
+	}
+
+	memset(&msg, 0, sizeof(struct msghdr));
+
+	iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC,
+		      iov, iov_count, data);
+
+	while (msg_data_left(&msg)) {
+		int tx_loop = sock_sendmsg(conn->sock, &msg);
+		if (tx_loop <= 0) {
+			pr_debug("tx_loop: %d total_tx %d\n",
+				tx_loop, total_tx);
+			return tx_loop;
+		}
+		total_tx += tx_loop;
+		pr_debug("tx_loop: %d, total_tx: %d, data: %d\n",
+					tx_loop, total_tx, data);
+	}
 
-	return iscsit_do_tx_data(conn, &c);
+	return total_tx;
 }
 
 static bool sockaddr_equal(struct sockaddr_storage *x, struct sockaddr_storage *y)

From 3995d1610713c1a62af687872e460c3dca82d17c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 21 Dec 2014 03:53:10 -0500
Subject: [PATCH 0005/1911] usbip_recv(): switch to sock_recvmsg()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/usb/usbip/usbip_common.c | 32 +++++++++++---------------------
 1 file changed, 11 insertions(+), 21 deletions(-)

diff --git a/drivers/usb/usbip/usbip_common.c b/drivers/usb/usbip/usbip_common.c
index 8b232290be6b97..f123bfee5d1b52 100644
--- a/drivers/usb/usbip/usbip_common.c
+++ b/drivers/usb/usbip/usbip_common.c
@@ -327,13 +327,11 @@ EXPORT_SYMBOL_GPL(usbip_dump_header);
 int usbip_recv(struct socket *sock, void *buf, int size)
 {
 	int result;
-	struct msghdr msg;
-	struct kvec iov;
+	struct kvec iov = {.iov_base = buf, .iov_len = size};
+	struct msghdr msg = {.msg_flags = MSG_NOSIGNAL};
 	int total = 0;
 
-	/* for blocks of if (usbip_dbg_flag_xmit) */
-	char *bp = buf;
-	int osize = size;
+	iov_iter_kvec(&msg.msg_iter, READ|ITER_KVEC, &iov, 1, size);
 
 	usbip_dbg_xmit("enter\n");
 
@@ -344,26 +342,18 @@ int usbip_recv(struct socket *sock, void *buf, int size)
 	}
 
 	do {
+		int sz = msg_data_left(&msg);
 		sock->sk->sk_allocation = GFP_NOIO;
-		iov.iov_base    = buf;
-		iov.iov_len     = size;
-		msg.msg_name    = NULL;
-		msg.msg_namelen = 0;
-		msg.msg_control = NULL;
-		msg.msg_controllen = 0;
-		msg.msg_flags      = MSG_NOSIGNAL;
-
-		result = kernel_recvmsg(sock, &msg, &iov, 1, size, MSG_WAITALL);
+
+		result = sock_recvmsg(sock, &msg, MSG_WAITALL);
 		if (result <= 0) {
 			pr_debug("receive sock %p buf %p size %u ret %d total %d\n",
-				 sock, buf, size, result, total);
+				 sock, buf + total, sz, result, total);
 			goto err;
 		}
 
-		size -= result;
-		buf += result;
 		total += result;
-	} while (size > 0);
+	} while (msg_data_left(&msg));
 
 	if (usbip_dbg_flag_xmit) {
 		if (!in_interrupt())
@@ -372,9 +362,9 @@ int usbip_recv(struct socket *sock, void *buf, int size)
 			pr_debug("interrupt  :");
 
 		pr_debug("receiving....\n");
-		usbip_dump_buffer(bp, osize);
-		pr_debug("received, osize %d ret %d size %d total %d\n",
-			 osize, result, size, total);
+		usbip_dump_buffer(buf, size);
+		pr_debug("received, osize %d ret %d size %zd total %d\n",
+			 size, result, msg_data_left(&msg), total);
 	}
 
 	return total;

From 100803a84d3cb84bd3ee36e8ec4274019ad667ac Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 15 Nov 2015 00:12:48 -0500
Subject: [PATCH 0006/1911] ceph: switch to sock_recvmsg()

... and use ITER_BVEC instead of playing with kmap()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 net/ceph/messenger.c | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 770c52701efa3e..9e46db7e59684f 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -520,7 +520,8 @@ static int ceph_tcp_recvmsg(struct socket *sock, void *buf, size_t len)
 	struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL };
 	int r;
 
-	r = kernel_recvmsg(sock, &msg, &iov, 1, len, msg.msg_flags);
+	iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, len);
+	r = sock_recvmsg(sock, &msg, msg.msg_flags);
 	if (r == -EAGAIN)
 		r = 0;
 	return r;
@@ -529,17 +530,20 @@ static int ceph_tcp_recvmsg(struct socket *sock, void *buf, size_t len)
 static int ceph_tcp_recvpage(struct socket *sock, struct page *page,
 		     int page_offset, size_t length)
 {
-	void *kaddr;
-	int ret;
+	struct bio_vec bvec = {
+		.bv_page = page,
+		.bv_offset = page_offset,
+		.bv_len = length
+	};
+	struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL };
+	int r;
 
 	BUG_ON(page_offset + length > PAGE_SIZE);
-
-	kaddr = kmap(page);
-	BUG_ON(!kaddr);
-	ret = ceph_tcp_recvmsg(sock, kaddr + page_offset, length);
-	kunmap(page);
-
-	return ret;
+	iov_iter_bvec(&msg.msg_iter, READ | ITER_BVEC, &bvec, 1, length);
+	r = sock_recvmsg(sock, &msg, msg.msg_flags);
+	if (r == -EAGAIN)
+		r = 0;
+	return r;
 }
 
 /*

From be6e4d66f01335827d734ee9bde79862ed8a235b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 15 Nov 2015 15:05:32 -0500
Subject: [PATCH 0007/1911] rds: remove dead code

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 net/rds/page.c | 29 -----------------------------
 net/rds/rds.h  |  7 -------
 2 files changed, 36 deletions(-)

diff --git a/net/rds/page.c b/net/rds/page.c
index e2b5a5832d3d52..7cc57e098ddb98 100644
--- a/net/rds/page.c
+++ b/net/rds/page.c
@@ -45,35 +45,6 @@ struct rds_page_remainder {
 static
 DEFINE_PER_CPU_SHARED_ALIGNED(struct rds_page_remainder, rds_page_remainders);
 
-/*
- * returns 0 on success or -errno on failure.
- *
- * We don't have to worry about flush_dcache_page() as this only works
- * with private pages.  If, say, we were to do directed receive to pinned
- * user pages we'd have to worry more about cache coherence.  (Though
- * the flush_dcache_page() in get_user_pages() would probably be enough).
- */
-int rds_page_copy_user(struct page *page, unsigned long offset,
-		       void __user *ptr, unsigned long bytes,
-		       int to_user)
-{
-	unsigned long ret;
-	void *addr;
-
-	addr = kmap(page);
-	if (to_user) {
-		rds_stats_add(s_copy_to_user, bytes);
-		ret = copy_to_user(ptr, addr + offset, bytes);
-	} else {
-		rds_stats_add(s_copy_from_user, bytes);
-		ret = copy_from_user(addr + offset, ptr, bytes);
-	}
-	kunmap(page);
-
-	return ret ? -EFAULT : 0;
-}
-EXPORT_SYMBOL_GPL(rds_page_copy_user);
-
 /**
  * rds_page_remainder_alloc - build up regions of a message.
  *
diff --git a/net/rds/rds.h b/net/rds/rds.h
index ebbf909b87ec3f..8635993066ae6e 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -781,13 +781,6 @@ static inline int rds_message_verify_checksum(const struct rds_header *hdr)
 /* page.c */
 int rds_page_remainder_alloc(struct scatterlist *scat, unsigned long bytes,
 			     gfp_t gfp);
-int rds_page_copy_user(struct page *page, unsigned long offset,
-		       void __user *ptr, unsigned long bytes,
-		       int to_user);
-#define rds_page_copy_to_user(page, offset, ptr, bytes) \
-	rds_page_copy_user(page, offset, ptr, bytes, 1)
-#define rds_page_copy_from_user(page, offset, ptr, bytes) \
-	rds_page_copy_user(page, offset, ptr, bytes, 0)
 void rds_page_exit(void);
 
 /* recv.c */

From 39c6aceae961776a11a3767553b0e295fc9d413b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 9 Jan 2016 20:36:51 -0500
Subject: [PATCH 0008/1911] afs_send_pages(): use ITER_BVEC

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/afs/rxrpc.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 25f05a8d21b195..bfba8f0c3d43cc 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -249,8 +249,7 @@ void afs_flat_call_destructor(struct afs_call *call)
 /*
  * attach the data from a bunch of pages on an inode to a call
  */
-static int afs_send_pages(struct afs_call *call, struct msghdr *msg,
-			  struct kvec *iov)
+static int afs_send_pages(struct afs_call *call, struct msghdr *msg)
 {
 	struct page *pages[8];
 	unsigned count, n, loop, offset, to;
@@ -273,20 +272,21 @@ static int afs_send_pages(struct afs_call *call, struct msghdr *msg,
 
 		loop = 0;
 		do {
+			struct bio_vec bvec = {.bv_page = pages[loop],
+					       .bv_offset = offset};
 			msg->msg_flags = 0;
 			to = PAGE_SIZE;
 			if (first + loop >= last)
 				to = call->last_to;
 			else
 				msg->msg_flags = MSG_MORE;
-			iov->iov_base = kmap(pages[loop]) + offset;
-			iov->iov_len = to - offset;
+			bvec.bv_len = to - offset;
 			offset = 0;
 
 			_debug("- range %u-%u%s",
 			       offset, to, msg->msg_flags ? " [more]" : "");
-			iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC,
-				      iov, 1, to - offset);
+			iov_iter_bvec(&msg->msg_iter, WRITE | ITER_BVEC,
+				      &bvec, 1, to - offset);
 
 			/* have to change the state *before* sending the last
 			 * packet as RxRPC might give us the reply before it
@@ -295,7 +295,6 @@ static int afs_send_pages(struct afs_call *call, struct msghdr *msg,
 				call->state = AFS_CALL_AWAIT_REPLY;
 			ret = rxrpc_kernel_send_data(afs_socket, call->rxcall,
 						     msg, to - offset);
-			kunmap(pages[loop]);
 			if (ret < 0)
 				break;
 		} while (++loop < count);
@@ -379,7 +378,7 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
 		goto error_do_abort;
 
 	if (call->send_pages) {
-		ret = afs_send_pages(call, &msg, iov);
+		ret = afs_send_pages(call, &msg);
 		if (ret < 0)
 			goto error_do_abort;
 	}

From 61ff6e9b452d6158e0fd659c6d987f47cfcfbd7b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 9 Jan 2016 20:55:45 -0500
Subject: [PATCH 0009/1911] ceph_tcp_sendpage(): use ITER_BVEC sendmsg

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 net/ceph/messenger.c | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 9e46db7e59684f..6f3b5754cc7ec2 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -583,18 +583,28 @@ static int __ceph_tcp_sendpage(struct socket *sock, struct page *page,
 static int ceph_tcp_sendpage(struct socket *sock, struct page *page,
 		     int offset, size_t size, bool more)
 {
+	struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL };
+	struct bio_vec bvec;
 	int ret;
-	struct kvec iov;
 
 	/* sendpage cannot properly handle pages with page_count == 0,
 	 * we need to fallback to sendmsg if that's the case */
 	if (page_count(page) >= 1)
 		return __ceph_tcp_sendpage(sock, page, offset, size, more);
 
-	iov.iov_base = kmap(page) + offset;
-	iov.iov_len = size;
-	ret = ceph_tcp_sendmsg(sock, &iov, 1, size, more);
-	kunmap(page);
+	bvec.bv_page = page;
+	bvec.bv_offset = offset;
+	bvec.bv_len = size;
+
+	if (more)
+		msg.msg_flags |= MSG_MORE;
+	else
+		msg.msg_flags |= MSG_EOR;  /* superfluous, but what the hell */
+
+	iov_iter_bvec(&msg.msg_iter, WRITE | ITER_BVEC, &bvec, 1, size);
+	ret = sock_sendmsg(sock, &msg);
+	if (ret == -EAGAIN)
+		ret = 0;
 
 	return ret;
 }

From 4b4fbad37f70f1dca26e060cf8dc71371b01899c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 9 Jan 2016 21:12:37 -0500
Subject: [PATCH 0010/1911] ncpfs: sendmsg does *not* bugger iovec these days

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ncpfs/sock.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/fs/ncpfs/sock.c b/fs/ncpfs/sock.c
index f32f272ee50118..97cfccefccc5bb 100644
--- a/fs/ncpfs/sock.c
+++ b/fs/ncpfs/sock.c
@@ -205,10 +205,7 @@ static inline void __ncptcp_abort(struct ncp_server *server)
 
 static int ncpdgram_send(struct socket *sock, struct ncp_request_reply *req)
 {
-	struct kvec vec[3];
-	/* sock_sendmsg updates iov pointers for us :-( */
-	memcpy(vec, req->tx_ciov, req->tx_iovlen * sizeof(vec[0]));
-	return do_send(sock, vec, req->tx_iovlen,
+	return do_send(sock, req->tx_ciov, req->tx_iovlen,
 		       req->tx_totallen, MSG_DONTWAIT);
 }
 
@@ -216,16 +213,13 @@ static void __ncptcp_try_send(struct ncp_server *server)
 {
 	struct ncp_request_reply *rq;
 	struct kvec *iov;
-	struct kvec iovc[3];
 	int result;
 
 	rq = server->tx.creq;
 	if (!rq)
 		return;
 
-	/* sock_sendmsg updates iov pointers for us :-( */
-	memcpy(iovc, rq->tx_ciov, rq->tx_iovlen * sizeof(iov[0]));
-	result = do_send(server->ncp_sock, iovc, rq->tx_iovlen,
+	result = do_send(server->ncp_sock, rq->tx_ciov, rq->tx_iovlen,
 			 rq->tx_totallen, MSG_NOSIGNAL | MSG_DONTWAIT);
 
 	if (result == -EAGAIN)

From 2cebcc78fab5f56c9a9795fc49546d2436491312 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 9 Jan 2016 22:01:58 -0500
Subject: [PATCH 0011/1911] ncpfs: don't mess with manually advancing iovec on
 send

just keep iov_iter in req

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ncpfs/sock.c | 69 +++++++++++++++++++------------------------------
 1 file changed, 27 insertions(+), 42 deletions(-)

diff --git a/fs/ncpfs/sock.c b/fs/ncpfs/sock.c
index 97cfccefccc5bb..a13c0b54f078e7 100644
--- a/fs/ncpfs/sock.c
+++ b/fs/ncpfs/sock.c
@@ -63,9 +63,7 @@ struct ncp_request_reply {
 	size_t datalen;
 	int result;
 	enum { RQ_DONE, RQ_INPROGRESS, RQ_QUEUED, RQ_IDLE, RQ_ABANDONED } status;
-	struct kvec* tx_ciov;
-	size_t tx_totallen;
-	size_t tx_iovlen;
+	struct iov_iter from;
 	struct kvec tx_iov[3];
 	u_int16_t tx_type;
 	u_int32_t sign[6];
@@ -205,22 +203,22 @@ static inline void __ncptcp_abort(struct ncp_server *server)
 
 static int ncpdgram_send(struct socket *sock, struct ncp_request_reply *req)
 {
-	return do_send(sock, req->tx_ciov, req->tx_iovlen,
-		       req->tx_totallen, MSG_DONTWAIT);
+	struct msghdr msg = { .msg_iter = req->from, .msg_flags = MSG_DONTWAIT };
+	return sock_sendmsg(sock, &msg);
 }
 
 static void __ncptcp_try_send(struct ncp_server *server)
 {
 	struct ncp_request_reply *rq;
-	struct kvec *iov;
+	struct msghdr msg = { .msg_flags = MSG_NOSIGNAL | MSG_DONTWAIT };
 	int result;
 
 	rq = server->tx.creq;
 	if (!rq)
 		return;
 
-	result = do_send(server->ncp_sock, rq->tx_ciov, rq->tx_iovlen,
-			 rq->tx_totallen, MSG_NOSIGNAL | MSG_DONTWAIT);
+	msg.msg_iter = rq->from;
+	result = sock_sendmsg(server->ncp_sock, &msg);
 
 	if (result == -EAGAIN)
 		return;
@@ -230,21 +228,12 @@ static void __ncptcp_try_send(struct ncp_server *server)
 		__ncp_abort_request(server, rq, result);
 		return;
 	}
-	if (result >= rq->tx_totallen) {
+	if (!msg_data_left(&msg)) {
 		server->rcv.creq = rq;
 		server->tx.creq = NULL;
 		return;
 	}
-	rq->tx_totallen -= result;
-	iov = rq->tx_ciov;
-	while (iov->iov_len <= result) {
-		result -= iov->iov_len;
-		iov++;
-		rq->tx_iovlen--;
-	}
-	iov->iov_base += result;
-	iov->iov_len -= result;
-	rq->tx_ciov = iov;
+	rq->from = msg.msg_iter;
 }
 
 static inline void ncp_init_header(struct ncp_server *server, struct ncp_request_reply *req, struct ncp_request_header *h)
@@ -257,22 +246,21 @@ static inline void ncp_init_header(struct ncp_server *server, struct ncp_request
 	
 static void ncpdgram_start_request(struct ncp_server *server, struct ncp_request_reply *req)
 {
-	size_t signlen;
-	struct ncp_request_header* h;
+	size_t signlen, len = req->tx_iov[1].iov_len;
+	struct ncp_request_header *h = req->tx_iov[1].iov_base;
 	
-	req->tx_ciov = req->tx_iov + 1;
-
-	h = req->tx_iov[1].iov_base;
 	ncp_init_header(server, req, h);
-	signlen = sign_packet(server, req->tx_iov[1].iov_base + sizeof(struct ncp_request_header) - 1, 
-			req->tx_iov[1].iov_len - sizeof(struct ncp_request_header) + 1,
-			cpu_to_le32(req->tx_totallen), req->sign);
+	signlen = sign_packet(server,
+			req->tx_iov[1].iov_base + sizeof(struct ncp_request_header) - 1, 
+			len - sizeof(struct ncp_request_header) + 1,
+			cpu_to_le32(len), req->sign);
 	if (signlen) {
-		req->tx_ciov[1].iov_base = req->sign;
-		req->tx_ciov[1].iov_len = signlen;
-		req->tx_iovlen += 1;
-		req->tx_totallen += signlen;
+		/* NCP over UDP appends signature */
+		req->tx_iov[2].iov_base = req->sign;
+		req->tx_iov[2].iov_len = signlen;
 	}
+	iov_iter_kvec(&req->from, WRITE | ITER_KVEC,
+			req->tx_iov + 1, signlen ? 2 : 1, len + signlen);
 	server->rcv.creq = req;
 	server->timeout_last = server->m.time_out;
 	server->timeout_retries = server->m.retry_count;
@@ -286,24 +274,23 @@ static void ncpdgram_start_request(struct ncp_server *server, struct ncp_request
 
 static void ncptcp_start_request(struct ncp_server *server, struct ncp_request_reply *req)
 {
-	size_t signlen;
-	struct ncp_request_header* h;
+	size_t signlen, len = req->tx_iov[1].iov_len;
+	struct ncp_request_header *h = req->tx_iov[1].iov_base;
 
-	req->tx_ciov = req->tx_iov;
-	h = req->tx_iov[1].iov_base;
 	ncp_init_header(server, req, h);
 	signlen = sign_packet(server, req->tx_iov[1].iov_base + sizeof(struct ncp_request_header) - 1,
-			req->tx_iov[1].iov_len - sizeof(struct ncp_request_header) + 1,
-			cpu_to_be32(req->tx_totallen + 24), req->sign + 4) + 16;
+			len - sizeof(struct ncp_request_header) + 1,
+			cpu_to_be32(len + 24), req->sign + 4) + 16;
 
 	req->sign[0] = htonl(NCP_TCP_XMIT_MAGIC);
-	req->sign[1] = htonl(req->tx_totallen + signlen);
+	req->sign[1] = htonl(len + signlen);
 	req->sign[2] = htonl(NCP_TCP_XMIT_VERSION);
 	req->sign[3] = htonl(req->datalen + 8);
+	/* NCP over TCP prepends signature */
 	req->tx_iov[0].iov_base = req->sign;
 	req->tx_iov[0].iov_len = signlen;
-	req->tx_iovlen += 1;
-	req->tx_totallen += signlen;
+	iov_iter_kvec(&req->from, WRITE | ITER_KVEC,
+			req->tx_iov, 2, len + signlen);
 
 	server->tx.creq = req;
 	__ncptcp_try_send(server);
@@ -705,8 +692,6 @@ static int do_ncp_rpc_call(struct ncp_server *server, int size,
 	req->datalen = max_reply_size;
 	req->tx_iov[1].iov_base = server->packet;
 	req->tx_iov[1].iov_len = size;
-	req->tx_iovlen = 1;
-	req->tx_totallen = size;
 	req->tx_type = *(u_int16_t*)server->packet;
 
 	result = ncp_add_request(server, req);

From b8e2df1f812add27bfdb1e9d4fdf1051cc51fe1b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 9 Jan 2016 22:12:55 -0500
Subject: [PATCH 0012/1911] ncpfs: switch to sock_sendmsg()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ncpfs/sock.c | 36 ++++++++++++++----------------------
 1 file changed, 14 insertions(+), 22 deletions(-)

diff --git a/fs/ncpfs/sock.c b/fs/ncpfs/sock.c
index a13c0b54f078e7..f013c92bb5d83e 100644
--- a/fs/ncpfs/sock.c
+++ b/fs/ncpfs/sock.c
@@ -40,19 +40,12 @@ static int _recv(struct socket *sock, void *buf, int size, unsigned flags)
 	return kernel_recvmsg(sock, &msg, &iov, 1, size, flags);
 }
 
-static inline int do_send(struct socket *sock, struct kvec *vec, int count,
-			  int len, unsigned flags)
-{
-	struct msghdr msg = { .msg_flags = flags };
-	return kernel_sendmsg(sock, &msg, vec, count, len);
-}
-
 static int _send(struct socket *sock, const void *buff, int len)
 {
-	struct kvec vec;
-	vec.iov_base = (void *) buff;
-	vec.iov_len = len;
-	return do_send(sock, &vec, 1, len, 0);
+	struct msghdr msg = { .msg_flags = 0 };
+	struct kvec vec = {.iov_base = (void *)buff, .iov_len = len};
+	iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, &vec, 1, len);
+	return sock_sendmsg(sock, &msg);
 }
 
 struct ncp_request_reply {
@@ -345,18 +338,17 @@ static void __ncp_next_request(struct ncp_server *server)
 static void info_server(struct ncp_server *server, unsigned int id, const void * data, size_t len)
 {
 	if (server->info_sock) {
-		struct kvec iov[2];
-		__be32 hdr[2];
-	
-		hdr[0] = cpu_to_be32(len + 8);
-		hdr[1] = cpu_to_be32(id);
-	
-		iov[0].iov_base = hdr;
-		iov[0].iov_len = 8;
-		iov[1].iov_base = (void *) data;
-		iov[1].iov_len = len;
+		struct msghdr msg = { .msg_flags = MSG_NOSIGNAL };
+		__be32 hdr[2] = {cpu_to_be32(len + 8), cpu_to_be32(id)};
+		struct kvec iov[2] = {
+			{.iov_base = hdr, .iov_len = 8},
+			{.iov_base = (void *)data, .iov_len = len},
+		};
+
+		iov_iter_kvec(&msg.msg_iter, ITER_KVEC | WRITE,
+				iov, 2, len + 8);
 
-		do_send(server->info_sock, iov, 2, len + 8, MSG_NOSIGNAL);
+		sock_sendmsg(server->info_sock, &msg);
 	}
 }
 

From ad1633a151df9869a222bd99ba04643dc2e0052b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 8 Jan 2017 22:35:31 -0500
Subject: [PATCH 0013/1911] namei: fold unlazy_link() into its sole caller

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c | 32 +++++++++++---------------------
 1 file changed, 11 insertions(+), 21 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index ad74877e1442c0..f08eca2b788b22 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -749,24 +749,6 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry, unsigned seq
 	return -ECHILD;
 }
 
-static int unlazy_link(struct nameidata *nd, struct path *link, unsigned seq)
-{
-	if (unlikely(!legitimize_path(nd, link, seq))) {
-		drop_links(nd);
-		nd->depth = 0;
-		nd->flags &= ~LOOKUP_RCU;
-		nd->path.mnt = NULL;
-		nd->path.dentry = NULL;
-		if (!(nd->flags & LOOKUP_ROOT))
-			nd->root.mnt = NULL;
-		rcu_read_unlock();
-	} else if (likely(unlazy_walk(nd, NULL, 0)) == 0) {
-		return 0;
-	}
-	path_put(link);
-	return -ECHILD;
-}
-
 static inline int d_revalidate(struct dentry *dentry, unsigned int flags)
 {
 	return dentry->d_op->d_revalidate(dentry, flags);
@@ -1706,9 +1688,17 @@ static int pick_link(struct nameidata *nd, struct path *link,
 	error = nd_alloc_stack(nd);
 	if (unlikely(error)) {
 		if (error == -ECHILD) {
-			if (unlikely(unlazy_link(nd, link, seq)))
-				return -ECHILD;
-			error = nd_alloc_stack(nd);
+			if (unlikely(!legitimize_path(nd, link, seq))) {
+				drop_links(nd);
+				nd->depth = 0;
+				nd->flags &= ~LOOKUP_RCU;
+				nd->path.mnt = NULL;
+				nd->path.dentry = NULL;
+				if (!(nd->flags & LOOKUP_ROOT))
+					nd->root.mnt = NULL;
+				rcu_read_unlock();
+			} else if (likely(unlazy_walk(nd, NULL, 0)) == 0)
+				error = nd_alloc_stack(nd);
 		}
 		if (error) {
 			path_put(link);

From 209a7fb2104f2724f651870306c65f86850ee953 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 9 Jan 2017 01:35:39 -0500
Subject: [PATCH 0014/1911] lookup_fast(): clean up the logics around the
 fallback to non-rcu mode

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index f08eca2b788b22..dfe6e32aeec6af 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1558,12 +1558,7 @@ static int lookup_fast(struct nameidata *nd,
 		*seqp = seq;
 		if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE))
 			status = d_revalidate(dentry, nd->flags);
-		if (unlikely(status <= 0)) {
-			if (unlazy_walk(nd, dentry, seq))
-				return -ECHILD;
-			if (status == -ECHILD)
-				status = d_revalidate(dentry, nd->flags);
-		} else {
+		if (likely(status > 0)) {
 			/*
 			 * Note: do negative dentry check after revalidation in
 			 * case that drops it.
@@ -1574,9 +1569,12 @@ static int lookup_fast(struct nameidata *nd,
 			path->dentry = dentry;
 			if (likely(__follow_mount_rcu(nd, path, inode, seqp)))
 				return 1;
-			if (unlazy_walk(nd, dentry, seq))
-				return -ECHILD;
 		}
+		if (unlazy_walk(nd, dentry, seq))
+			return -ECHILD;
+		if (unlikely(status == -ECHILD))
+			/* we'd been told to redo it in non-rcu mode */
+			status = d_revalidate(dentry, nd->flags);
 	} else {
 		dentry = __d_lookup(parent, &nd->last);
 		if (unlikely(!dentry))

From a89f833737e6c75df0091ccf6c767b94745463c1 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 9 Jan 2017 22:25:28 -0500
Subject: [PATCH 0015/1911] namei.c: fold the check for DCACHE_OP_REVALIDATE
 into d_revalidate()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c | 36 ++++++++++++++----------------------
 1 file changed, 14 insertions(+), 22 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index dfe6e32aeec6af..2ed2701a74b563 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -751,7 +751,10 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry, unsigned seq
 
 static inline int d_revalidate(struct dentry *dentry, unsigned int flags)
 {
-	return dentry->d_op->d_revalidate(dentry, flags);
+	if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE))
+		return dentry->d_op->d_revalidate(dentry, flags);
+	else
+		return 1;
 }
 
 /**
@@ -1454,19 +1457,14 @@ static struct dentry *lookup_dcache(const struct qstr *name,
 				    struct dentry *dir,
 				    unsigned int flags)
 {
-	struct dentry *dentry;
-	int error;
-
-	dentry = d_lookup(dir, name);
+	struct dentry *dentry = d_lookup(dir, name);
 	if (dentry) {
-		if (dentry->d_flags & DCACHE_OP_REVALIDATE) {
-			error = d_revalidate(dentry, flags);
-			if (unlikely(error <= 0)) {
-				if (!error)
-					d_invalidate(dentry);
-				dput(dentry);
-				return ERR_PTR(error);
-			}
+		int error = d_revalidate(dentry, flags);
+		if (unlikely(error <= 0)) {
+			if (!error)
+				d_invalidate(dentry);
+			dput(dentry);
+			return ERR_PTR(error);
 		}
 	}
 	return dentry;
@@ -1556,8 +1554,7 @@ static int lookup_fast(struct nameidata *nd,
 			return -ECHILD;
 
 		*seqp = seq;
-		if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE))
-			status = d_revalidate(dentry, nd->flags);
+		status = d_revalidate(dentry, nd->flags);
 		if (likely(status > 0)) {
 			/*
 			 * Note: do negative dentry check after revalidation in
@@ -1579,8 +1576,7 @@ static int lookup_fast(struct nameidata *nd,
 		dentry = __d_lookup(parent, &nd->last);
 		if (unlikely(!dentry))
 			return 0;
-		if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE))
-			status = d_revalidate(dentry, nd->flags);
+		status = d_revalidate(dentry, nd->flags);
 	}
 	if (unlikely(status <= 0)) {
 		if (!status)
@@ -1619,8 +1615,7 @@ static struct dentry *lookup_slow(const struct qstr *name,
 	if (IS_ERR(dentry))
 		goto out;
 	if (unlikely(!d_in_lookup(dentry))) {
-		if ((dentry->d_flags & DCACHE_OP_REVALIDATE) &&
-		    !(flags & LOOKUP_NO_REVAL)) {
+		if (!(flags & LOOKUP_NO_REVAL)) {
 			int error = d_revalidate(dentry, flags);
 			if (unlikely(error <= 0)) {
 				if (!error) {
@@ -3057,9 +3052,6 @@ static int lookup_open(struct nameidata *nd, struct path *path,
 		if (d_in_lookup(dentry))
 			break;
 
-		if (!(dentry->d_flags & DCACHE_OP_REVALIDATE))
-			break;
-
 		error = d_revalidate(dentry, nd->flags);
 		if (likely(error > 0))
 			break;

From 4675ac39b5dd5ff08dd8cb2be9ddd3cba778aa39 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 9 Jan 2017 22:29:15 -0500
Subject: [PATCH 0016/1911] namei.c: split unlazy_walk()

In all but one case, the last two arguments are NULL and 0 resp.;
almost everyone just wants to switch nameidata to non-RCU mode.
The only exception is lookup_fast(), where we have a child dentry
we want to legitimize as well.  Split these two cases.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c | 105 +++++++++++++++++++++++++++++++++--------------------
 1 file changed, 66 insertions(+), 39 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index 2ed2701a74b563..76cfeb641e975d 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -672,52 +672,83 @@ static bool legitimize_links(struct nameidata *nd)
 /**
  * unlazy_walk - try to switch to ref-walk mode.
  * @nd: nameidata pathwalk data
- * @dentry: child of nd->path.dentry or NULL
- * @seq: seq number to check dentry against
  * Returns: 0 on success, -ECHILD on failure
  *
- * unlazy_walk attempts to legitimize the current nd->path, nd->root and dentry
- * for ref-walk mode.  @dentry must be a path found by a do_lookup call on
- * @nd or NULL.  Must be called from rcu-walk context.
+ * unlazy_walk attempts to legitimize the current nd->path and nd->root
+ * for ref-walk mode.
+ * Must be called from rcu-walk context.
  * Nothing should touch nameidata between unlazy_walk() failure and
  * terminate_walk().
  */
-static int unlazy_walk(struct nameidata *nd, struct dentry *dentry, unsigned seq)
+static int unlazy_walk(struct nameidata *nd)
 {
 	struct dentry *parent = nd->path.dentry;
 
 	BUG_ON(!(nd->flags & LOOKUP_RCU));
 
+	nd->flags &= ~LOOKUP_RCU;
+	if (unlikely(!legitimize_links(nd)))
+		goto out2;
+	if (unlikely(!legitimize_path(nd, &nd->path, nd->seq)))
+		goto out1;
+	if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
+		if (unlikely(!legitimize_path(nd, &nd->root, nd->root_seq)))
+			goto out;
+	}
+	rcu_read_unlock();
+	BUG_ON(nd->inode != parent->d_inode);
+	return 0;
+
+out2:
+	nd->path.mnt = NULL;
+	nd->path.dentry = NULL;
+out1:
+	if (!(nd->flags & LOOKUP_ROOT))
+		nd->root.mnt = NULL;
+out:
+	rcu_read_unlock();
+	return -ECHILD;
+}
+
+/**
+ * unlazy_child - try to switch to ref-walk mode.
+ * @nd: nameidata pathwalk data
+ * @dentry: child of nd->path.dentry
+ * @seq: seq number to check dentry against
+ * Returns: 0 on success, -ECHILD on failure
+ *
+ * unlazy_child attempts to legitimize the current nd->path, nd->root and dentry
+ * for ref-walk mode.  @dentry must be a path found by a do_lookup call on
+ * @nd.  Must be called from rcu-walk context.
+ * Nothing should touch nameidata between unlazy_child() failure and
+ * terminate_walk().
+ */
+static int unlazy_child(struct nameidata *nd, struct dentry *dentry, unsigned seq)
+{
+	BUG_ON(!(nd->flags & LOOKUP_RCU));
+
 	nd->flags &= ~LOOKUP_RCU;
 	if (unlikely(!legitimize_links(nd)))
 		goto out2;
 	if (unlikely(!legitimize_mnt(nd->path.mnt, nd->m_seq)))
 		goto out2;
-	if (unlikely(!lockref_get_not_dead(&parent->d_lockref)))
+	if (unlikely(!lockref_get_not_dead(&nd->path.dentry->d_lockref)))
 		goto out1;
 
 	/*
-	 * For a negative lookup, the lookup sequence point is the parents
-	 * sequence point, and it only needs to revalidate the parent dentry.
-	 *
-	 * For a positive lookup, we need to move both the parent and the
-	 * dentry from the RCU domain to be properly refcounted. And the
-	 * sequence number in the dentry validates *both* dentry counters,
-	 * since we checked the sequence number of the parent after we got
-	 * the child sequence number. So we know the parent must still
-	 * be valid if the child sequence number is still valid.
+	 * We need to move both the parent and the dentry from the RCU domain
+	 * to be properly refcounted. And the sequence number in the dentry
+	 * validates *both* dentry counters, since we checked the sequence
+	 * number of the parent after we got the child sequence number. So we
+	 * know the parent must still be valid if the child sequence number is
 	 */
-	if (!dentry) {
-		if (read_seqcount_retry(&parent->d_seq, nd->seq))
-			goto out;
-		BUG_ON(nd->inode != parent->d_inode);
-	} else {
-		if (!lockref_get_not_dead(&dentry->d_lockref))
-			goto out;
-		if (read_seqcount_retry(&dentry->d_seq, seq))
-			goto drop_dentry;
+	if (unlikely(!lockref_get_not_dead(&dentry->d_lockref)))
+		goto out;
+	if (unlikely(read_seqcount_retry(&dentry->d_seq, seq))) {
+		rcu_read_unlock();
+		dput(dentry);
+		goto drop_root_mnt;
 	}
-
 	/*
 	 * Sequence counts matched. Now make sure that the root is
 	 * still valid and get it if required.
@@ -733,10 +764,6 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry, unsigned seq
 	rcu_read_unlock();
 	return 0;
 
-drop_dentry:
-	rcu_read_unlock();
-	dput(dentry);
-	goto drop_root_mnt;
 out2:
 	nd->path.mnt = NULL;
 out1:
@@ -775,7 +802,7 @@ static int complete_walk(struct nameidata *nd)
 	if (nd->flags & LOOKUP_RCU) {
 		if (!(nd->flags & LOOKUP_ROOT))
 			nd->root.mnt = NULL;
-		if (unlikely(unlazy_walk(nd, NULL, 0)))
+		if (unlikely(unlazy_walk(nd)))
 			return -ECHILD;
 	}
 
@@ -1001,7 +1028,7 @@ const char *get_link(struct nameidata *nd)
 		touch_atime(&last->link);
 		cond_resched();
 	} else if (atime_needs_update_rcu(&last->link, inode)) {
-		if (unlikely(unlazy_walk(nd, NULL, 0)))
+		if (unlikely(unlazy_walk(nd)))
 			return ERR_PTR(-ECHILD);
 		touch_atime(&last->link);
 	}
@@ -1020,7 +1047,7 @@ const char *get_link(struct nameidata *nd)
 		if (nd->flags & LOOKUP_RCU) {
 			res = get(NULL, inode, &last->done);
 			if (res == ERR_PTR(-ECHILD)) {
-				if (unlikely(unlazy_walk(nd, NULL, 0)))
+				if (unlikely(unlazy_walk(nd)))
 					return ERR_PTR(-ECHILD);
 				res = get(dentry, inode, &last->done);
 			}
@@ -1529,7 +1556,7 @@ static int lookup_fast(struct nameidata *nd,
 		bool negative;
 		dentry = __d_lookup_rcu(parent, &nd->last, &seq);
 		if (unlikely(!dentry)) {
-			if (unlazy_walk(nd, NULL, 0))
+			if (unlazy_walk(nd))
 				return -ECHILD;
 			return 0;
 		}
@@ -1567,7 +1594,7 @@ static int lookup_fast(struct nameidata *nd,
 			if (likely(__follow_mount_rcu(nd, path, inode, seqp)))
 				return 1;
 		}
-		if (unlazy_walk(nd, dentry, seq))
+		if (unlazy_child(nd, dentry, seq))
 			return -ECHILD;
 		if (unlikely(status == -ECHILD))
 			/* we'd been told to redo it in non-rcu mode */
@@ -1646,7 +1673,7 @@ static inline int may_lookup(struct nameidata *nd)
 		int err = inode_permission(nd->inode, MAY_EXEC|MAY_NOT_BLOCK);
 		if (err != -ECHILD)
 			return err;
-		if (unlazy_walk(nd, NULL, 0))
+		if (unlazy_walk(nd))
 			return -ECHILD;
 	}
 	return inode_permission(nd->inode, MAY_EXEC);
@@ -1690,7 +1717,7 @@ static int pick_link(struct nameidata *nd, struct path *link,
 				if (!(nd->flags & LOOKUP_ROOT))
 					nd->root.mnt = NULL;
 				rcu_read_unlock();
-			} else if (likely(unlazy_walk(nd, NULL, 0)) == 0)
+			} else if (likely(unlazy_walk(nd)) == 0)
 				error = nd_alloc_stack(nd);
 		}
 		if (error) {
@@ -2108,7 +2135,7 @@ static int link_path_walk(const char *name, struct nameidata *nd)
 		}
 		if (unlikely(!d_can_lookup(nd->path.dentry))) {
 			if (nd->flags & LOOKUP_RCU) {
-				if (unlazy_walk(nd, NULL, 0))
+				if (unlazy_walk(nd))
 					return -ECHILD;
 			}
 			return -ENOTDIR;
@@ -2565,7 +2592,7 @@ mountpoint_last(struct nameidata *nd)
 
 	/* If we're in rcuwalk, drop out of it to handle last component */
 	if (nd->flags & LOOKUP_RCU) {
-		if (unlazy_walk(nd, NULL, 0))
+		if (unlazy_walk(nd))
 			return -ECHILD;
 	}
 

From 47961f1353b8195cb258ee3068d0969ef87acd20 Mon Sep 17 00:00:00 2001
From: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Date: Sat, 3 Dec 2016 00:08:48 +0100
Subject: [PATCH 0017/1911] ARM64: dts: meson-gx: move the SCPI and SRAM nodes
 to meson-gx

SCPI and SRAM are identical on GXBB and GXL. Moving the corresponding
nodes to meson-gx adds support for the thermal sensor on GXL based
devices.

Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Tested-by: Neil Armstrong <narmstrong@baylibre.com>
Acked-by: Neil Armstrong <narmstrong@baylibre.com>
[khilman: add scpi_clocks label]
Signed-off-by: Kevin Hilman <khilman@baylibre.com>
---
 arch/arm64/boot/dts/amlogic/meson-gx.dtsi   | 45 ++++++++++++++++
 arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi | 57 ---------------------
 2 files changed, 45 insertions(+), 57 deletions(-)

diff --git a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi
index eada0b58ba1c76..be56b9fae2aa4c 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi
@@ -65,6 +65,7 @@
 			reg = <0x0 0x0>;
 			enable-method = "psci";
 			next-level-cache = <&l2>;
+			clocks = <&scpi_dvfs 0>;
 		};
 
 		cpu1: cpu@1 {
@@ -73,6 +74,7 @@
 			reg = <0x0 0x1>;
 			enable-method = "psci";
 			next-level-cache = <&l2>;
+			clocks = <&scpi_dvfs 0>;
 		};
 
 		cpu2: cpu@2 {
@@ -81,6 +83,7 @@
 			reg = <0x0 0x2>;
 			enable-method = "psci";
 			next-level-cache = <&l2>;
+			clocks = <&scpi_dvfs 0>;
 		};
 
 		cpu3: cpu@3 {
@@ -89,6 +92,7 @@
 			reg = <0x0 0x3>;
 			enable-method = "psci";
 			next-level-cache = <&l2>;
+			clocks = <&scpi_dvfs 0>;
 		};
 
 		l2: l2-cache0 {
@@ -153,6 +157,28 @@
 		};
 	};
 
+	scpi {
+		compatible = "amlogic,meson-gxbb-scpi", "arm,scpi-pre-1.0";
+		mboxes = <&mailbox 1 &mailbox 2>;
+		shmem = <&cpu_scp_lpri &cpu_scp_hpri>;
+
+		scpi_clocks: clocks {
+			compatible = "arm,scpi-clocks";
+
+			scpi_dvfs: scpi_clocks@0 {
+				compatible = "arm,scpi-dvfs-clocks";
+				#clock-cells = <1>;
+				clock-indices = <0>;
+				clock-output-names = "vcpu";
+			};
+		};
+
+		scpi_sensors: sensors {
+			compatible = "arm,scpi-sensors";
+			#thermal-sensor-cells = <1>;
+		};
+	};
+
 	soc {
 		compatible = "simple-bus";
 		#address-cells = <2>;
@@ -264,6 +290,25 @@
 			#address-cells = <0>;
 		};
 
+		sram: sram@c8000000 {
+			compatible = "amlogic,meson-gxbb-sram", "mmio-sram";
+			reg = <0x0 0xc8000000 0x0 0x14000>;
+
+			#address-cells = <1>;
+			#size-cells = <1>;
+			ranges = <0 0x0 0xc8000000 0x14000>;
+
+			cpu_scp_lpri: scp-shmem@0 {
+				compatible = "amlogic,meson-gxbb-scp-shmem";
+				reg = <0x13000 0x400>;
+			};
+
+			cpu_scp_hpri: scp-shmem@200 {
+				compatible = "amlogic,meson-gxbb-scp-shmem";
+				reg = <0x13400 0x400>;
+			};
+		};
+
 		aobus: aobus@c8100000 {
 			compatible = "simple-bus";
 			reg = <0x0 0xc8100000 0x0 0x100000>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi
index 596240c38a9cdd..5d686334f6929e 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi
@@ -50,28 +50,6 @@
 / {
 	compatible = "amlogic,meson-gxbb";
 
-	scpi {
-		compatible = "amlogic,meson-gxbb-scpi", "arm,scpi-pre-1.0";
-		mboxes = <&mailbox 1 &mailbox 2>;
-		shmem = <&cpu_scp_lpri &cpu_scp_hpri>;
-
-		clocks {
-			compatible = "arm,scpi-clocks";
-
-			scpi_dvfs: scpi_clocks@0 {
-				compatible = "arm,scpi-dvfs-clocks";
-				#clock-cells = <1>;
-				clock-indices = <0>;
-				clock-output-names = "vcpu";
-			};
-		};
-
-		scpi_sensors: sensors {
-			compatible = "arm,scpi-sensors";
-			#thermal-sensor-cells = <1>;
-		};
-	};
-
 	soc {
 		usb0_phy: phy@c0000000 {
 			compatible = "amlogic,meson-gxbb-usb2-phy";
@@ -93,25 +71,6 @@
 			status = "disabled";
 		};
 
-		sram: sram@c8000000 {
-			compatible = "amlogic,meson-gxbb-sram", "mmio-sram";
-			reg = <0x0 0xc8000000 0x0 0x14000>;
-
-			#address-cells = <1>;
-			#size-cells = <1>;
-			ranges = <0 0x0 0xc8000000 0x14000>;
-
-			cpu_scp_lpri: scp-shmem@0 {
-				compatible = "amlogic,meson-gxbb-scp-shmem";
-				reg = <0x13000 0x400>;
-			};
-
-			cpu_scp_hpri: scp-shmem@200 {
-				compatible = "amlogic,meson-gxbb-scp-shmem";
-				reg = <0x13400 0x400>;
-			};
-		};
-
 		usb0: usb@c9000000 {
 			compatible = "amlogic,meson-gxbb-usb", "snps,dwc2";
 			reg = <0x0 0xc9000000 0x0 0x40000>;
@@ -138,22 +97,6 @@
 	};
 };
 
-&cpu0 {
-	clocks = <&scpi_dvfs 0>;
-};
-
-&cpu1 {
-	clocks = <&scpi_dvfs 0>;
-};
-
-&cpu2 {
-	clocks = <&scpi_dvfs 0>;
-};
-
-&cpu3 {
-	clocks = <&scpi_dvfs 0>;
-};
-
 &cbus {
 	spifc: spi@8c80 {
 		compatible = "amlogic,meson-gxbb-spifc";

From bd97abc0d04930d0369411824d437f19e72e13cc Mon Sep 17 00:00:00 2001
From: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Date: Sat, 3 Dec 2016 00:08:49 +0100
Subject: [PATCH 0018/1911] ARM64: dts: meson-gxm: add SCPI configuration for
 GXM

This adds the SCPI DVFS clock index and configures the CPU cores
accordingly.

Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Tested-by: Neil Armstrong <narmstrong@baylibre.com>
Acked-by: Neil Armstrong <narmstrong@baylibre.com>
Signed-off-by: Kevin Hilman <khilman@baylibre.com>
---
 arch/arm64/boot/dts/amlogic/meson-gxm.dtsi | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/arch/arm64/boot/dts/amlogic/meson-gxm.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxm.dtsi
index eb2f0c3e5e538e..4c55665a253f40 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxm.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gxm.dtsi
@@ -85,6 +85,7 @@
 			reg = <0x0 0x100>;
 			enable-method = "psci";
 			next-level-cache = <&l2>;
+			clocks = <&scpi_dvfs 1>;
 		};
 
 		cpu5: cpu@101 {
@@ -93,6 +94,7 @@
 			reg = <0x0 0x101>;
 			enable-method = "psci";
 			next-level-cache = <&l2>;
+			clocks = <&scpi_dvfs 1>;
 		};
 
 		cpu6: cpu@102 {
@@ -101,6 +103,7 @@
 			reg = <0x0 0x102>;
 			enable-method = "psci";
 			next-level-cache = <&l2>;
+			clocks = <&scpi_dvfs 1>;
 		};
 
 		cpu7: cpu@103 {
@@ -109,10 +112,17 @@
 			reg = <0x0 0x103>;
 			enable-method = "psci";
 			next-level-cache = <&l2>;
+			clocks = <&scpi_dvfs 1>;
 		};
 	};
 };
 
+&scpi_dvfs {
+	clock-indices = <0 1>;
+	clock-output-names = "vbig", "vlittle";
+};
+
 &vpu {
 	compatible = "amlogic,meson-gxm-vpu", "amlogic,meson-gx-vpu";
 };
+

From 7880b43bdfc9580700ee4568c75c383a5bcdd2ca Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 12 Jan 2017 04:01:17 -0500
Subject: [PATCH 0019/1911] 9p: constify ->d_name handling

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/9p/fid.c             | 10 +++++-----
 fs/9p/vfs_inode.c       | 10 +++++-----
 fs/9p/vfs_inode_dotl.c  | 20 ++++++++++----------
 include/net/9p/9p.h     |  8 ++++----
 include/net/9p/client.h | 18 +++++++++---------
 net/9p/client.c         | 18 +++++++++---------
 6 files changed, 42 insertions(+), 42 deletions(-)

diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index 60fb47469c86bb..ed4f8519b62706 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -91,10 +91,10 @@ static struct p9_fid *v9fs_fid_find(struct dentry *dentry, kuid_t uid, int any)
  * dentry names.
  */
 static int build_path_from_dentry(struct v9fs_session_info *v9ses,
-				  struct dentry *dentry, char ***names)
+				  struct dentry *dentry, const unsigned char ***names)
 {
 	int n = 0, i;
-	char **wnames;
+	const unsigned char **wnames;
 	struct dentry *ds;
 
 	for (ds = dentry; !IS_ROOT(ds); ds = ds->d_parent)
@@ -105,7 +105,7 @@ static int build_path_from_dentry(struct v9fs_session_info *v9ses,
 		goto err_out;
 
 	for (ds = dentry, i = (n-1); i >= 0; i--, ds = ds->d_parent)
-		wnames[i] = (char  *)ds->d_name.name;
+		wnames[i] = ds->d_name.name;
 
 	*names = wnames;
 	return n;
@@ -117,7 +117,7 @@ static struct p9_fid *v9fs_fid_lookup_with_uid(struct dentry *dentry,
 					       kuid_t uid, int any)
 {
 	struct dentry *ds;
-	char **wnames, *uname;
+	const unsigned char **wnames, *uname;
 	int i, n, l, clone, access;
 	struct v9fs_session_info *v9ses;
 	struct p9_fid *fid, *old_fid = NULL;
@@ -137,7 +137,7 @@ static struct p9_fid *v9fs_fid_lookup_with_uid(struct dentry *dentry,
 	fid = v9fs_fid_find(ds, uid, any);
 	if (fid) {
 		/* Found the parent fid do a lookup with that */
-		fid = p9_client_walk(fid, 1, (char **)&dentry->d_name.name, 1);
+		fid = p9_client_walk(fid, 1, &dentry->d_name.name, 1);
 		goto fid_out;
 	}
 	up_read(&v9ses->rename_sem);
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index f4f4450119e42f..e3e0d6581d4cd6 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -643,7 +643,7 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
 		struct dentry *dentry, char *extension, u32 perm, u8 mode)
 {
 	int err;
-	char *name;
+	const unsigned char *name;
 	struct p9_fid *dfid, *ofid, *fid;
 	struct inode *inode;
 
@@ -652,7 +652,7 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
 	err = 0;
 	ofid = NULL;
 	fid = NULL;
-	name = (char *) dentry->d_name.name;
+	name = dentry->d_name.name;
 	dfid = v9fs_parent_fid(dentry);
 	if (IS_ERR(dfid)) {
 		err = PTR_ERR(dfid);
@@ -788,7 +788,7 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
 	struct v9fs_session_info *v9ses;
 	struct p9_fid *dfid, *fid;
 	struct inode *inode;
-	char *name;
+	const unsigned char *name;
 
 	p9_debug(P9_DEBUG_VFS, "dir: %p dentry: (%pd) %p flags: %x\n",
 		 dir, dentry, dentry, flags);
@@ -802,7 +802,7 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
 	if (IS_ERR(dfid))
 		return ERR_CAST(dfid);
 
-	name = (char *) dentry->d_name.name;
+	name = dentry->d_name.name;
 	fid = p9_client_walk(dfid, 1, &name, 1);
 	if (IS_ERR(fid)) {
 		if (fid == ERR_PTR(-ENOENT)) {
@@ -1012,7 +1012,7 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	}
 	v9fs_blank_wstat(&wstat);
 	wstat.muid = v9ses->uname;
-	wstat.name = (char *) new_dentry->d_name.name;
+	wstat.name = new_dentry->d_name.name;
 	retval = p9_client_wstat(oldfid, &wstat);
 
 clunk_newdir:
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 5999bd050678cd..28130b1e53e4fe 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -244,7 +244,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
 	int err = 0;
 	kgid_t gid;
 	umode_t mode;
-	char *name = NULL;
+	const unsigned char *name = NULL;
 	struct p9_qid qid;
 	struct inode *inode;
 	struct p9_fid *fid = NULL;
@@ -269,7 +269,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
 
 	v9ses = v9fs_inode2v9ses(dir);
 
-	name = (char *) dentry->d_name.name;
+	name = dentry->d_name.name;
 	p9_debug(P9_DEBUG_VFS, "name:%s flags:0x%x mode:0x%hx\n",
 		 name, flags, omode);
 
@@ -385,7 +385,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
 	struct v9fs_session_info *v9ses;
 	struct p9_fid *fid = NULL, *dfid = NULL;
 	kgid_t gid;
-	char *name;
+	const unsigned char *name;
 	umode_t mode;
 	struct inode *inode;
 	struct p9_qid qid;
@@ -416,7 +416,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
 			 err);
 		goto error;
 	}
-	name = (char *) dentry->d_name.name;
+	name = dentry->d_name.name;
 	err = p9_client_mkdir_dotl(dfid, name, mode, gid, &qid);
 	if (err < 0)
 		goto error;
@@ -678,14 +678,14 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
 {
 	int err;
 	kgid_t gid;
-	char *name;
+	const unsigned char *name;
 	struct p9_qid qid;
 	struct inode *inode;
 	struct p9_fid *dfid;
 	struct p9_fid *fid = NULL;
 	struct v9fs_session_info *v9ses;
 
-	name = (char *) dentry->d_name.name;
+	name = dentry->d_name.name;
 	p9_debug(P9_DEBUG_VFS, "%lu,%s,%s\n", dir->i_ino, name, symname);
 	v9ses = v9fs_inode2v9ses(dir);
 
@@ -699,7 +699,7 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
 	gid = v9fs_get_fsgid_for_create(dir);
 
 	/* Server doesn't alter fid on TSYMLINK. Hence no need to clone it. */
-	err = p9_client_symlink(dfid, name, (char *)symname, gid, &qid);
+	err = p9_client_symlink(dfid, name, symname, gid, &qid);
 
 	if (err < 0) {
 		p9_debug(P9_DEBUG_VFS, "p9_client_symlink failed %d\n", err);
@@ -775,7 +775,7 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
 	if (IS_ERR(oldfid))
 		return PTR_ERR(oldfid);
 
-	err = p9_client_link(dfid, oldfid, (char *)dentry->d_name.name);
+	err = p9_client_link(dfid, oldfid, dentry->d_name.name);
 
 	if (err < 0) {
 		p9_debug(P9_DEBUG_VFS, "p9_client_link failed %d\n", err);
@@ -812,7 +812,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
 {
 	int err;
 	kgid_t gid;
-	char *name;
+	const unsigned char *name;
 	umode_t mode;
 	struct v9fs_session_info *v9ses;
 	struct p9_fid *fid = NULL, *dfid = NULL;
@@ -842,7 +842,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
 			 err);
 		goto error;
 	}
-	name = (char *) dentry->d_name.name;
+	name = dentry->d_name.name;
 
 	err = p9_client_mknod_dotl(dfid, name, mode, rdev, gid, &qid);
 	if (err < 0)
diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h
index 27dfe85772b1b2..b8eb51a661e560 100644
--- a/include/net/9p/9p.h
+++ b/include/net/9p/9p.h
@@ -402,10 +402,10 @@ struct p9_wstat {
 	u32 atime;
 	u32 mtime;
 	u64 length;
-	char *name;
-	char *uid;
-	char *gid;
-	char *muid;
+	const char *name;
+	const char *uid;
+	const char *gid;
+	const char *muid;
 	char *extension;	/* 9p2000.u extensions */
 	kuid_t n_uid;		/* 9p2000.u extensions */
 	kgid_t n_gid;		/* 9p2000.u extensions */
diff --git a/include/net/9p/client.h b/include/net/9p/client.h
index c6b97e58cf8455..b582339ccef5c6 100644
--- a/include/net/9p/client.h
+++ b/include/net/9p/client.h
@@ -223,16 +223,16 @@ void p9_client_destroy(struct p9_client *clnt);
 void p9_client_disconnect(struct p9_client *clnt);
 void p9_client_begin_disconnect(struct p9_client *clnt);
 struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid,
-				char *uname, kuid_t n_uname, char *aname);
+				const char *uname, kuid_t n_uname, const char *aname);
 struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname,
-		char **wnames, int clone);
+		const unsigned char * const *wnames, int clone);
 int p9_client_open(struct p9_fid *fid, int mode);
-int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode,
+int p9_client_fcreate(struct p9_fid *fid, const char *name, u32 perm, int mode,
 							char *extension);
-int p9_client_link(struct p9_fid *fid, struct p9_fid *oldfid, char *newname);
-int p9_client_symlink(struct p9_fid *fid, char *name, char *symname, kgid_t gid,
-							struct p9_qid *qid);
-int p9_client_create_dotl(struct p9_fid *ofid, char *name, u32 flags, u32 mode,
+int p9_client_link(struct p9_fid *fid, struct p9_fid *oldfid, const char *newname);
+int p9_client_symlink(struct p9_fid *fid, const char *name, const char *symname,
+		kgid_t gid, struct p9_qid *qid);
+int p9_client_create_dotl(struct p9_fid *ofid, const char *name, u32 flags, u32 mode,
 		kgid_t gid, struct p9_qid *qid);
 int p9_client_clunk(struct p9_fid *fid);
 int p9_client_fsync(struct p9_fid *fid, int datasync);
@@ -250,9 +250,9 @@ int p9_client_setattr(struct p9_fid *fid, struct p9_iattr_dotl *attr);
 struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid,
 							u64 request_mask);
 
-int p9_client_mknod_dotl(struct p9_fid *oldfid, char *name, int mode,
+int p9_client_mknod_dotl(struct p9_fid *oldfid, const char *name, int mode,
 			dev_t rdev, kgid_t gid, struct p9_qid *);
-int p9_client_mkdir_dotl(struct p9_fid *fid, char *name, int mode,
+int p9_client_mkdir_dotl(struct p9_fid *fid, const char *name, int mode,
 				kgid_t gid, struct p9_qid *);
 int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status);
 int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *fl);
diff --git a/net/9p/client.c b/net/9p/client.c
index 3fc94a49ccd53c..5a0c3a64af14d0 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -1101,7 +1101,7 @@ void p9_client_begin_disconnect(struct p9_client *clnt)
 EXPORT_SYMBOL(p9_client_begin_disconnect);
 
 struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid,
-	char *uname, kuid_t n_uname, char *aname)
+	const char *uname, kuid_t n_uname, const char *aname)
 {
 	int err = 0;
 	struct p9_req_t *req;
@@ -1149,7 +1149,7 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid,
 EXPORT_SYMBOL(p9_client_attach);
 
 struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname,
-		char **wnames, int clone)
+		const unsigned char * const *wnames, int clone)
 {
 	int err;
 	struct p9_client *clnt;
@@ -1271,7 +1271,7 @@ int p9_client_open(struct p9_fid *fid, int mode)
 }
 EXPORT_SYMBOL(p9_client_open);
 
-int p9_client_create_dotl(struct p9_fid *ofid, char *name, u32 flags, u32 mode,
+int p9_client_create_dotl(struct p9_fid *ofid, const char *name, u32 flags, u32 mode,
 		kgid_t gid, struct p9_qid *qid)
 {
 	int err = 0;
@@ -1316,7 +1316,7 @@ int p9_client_create_dotl(struct p9_fid *ofid, char *name, u32 flags, u32 mode,
 }
 EXPORT_SYMBOL(p9_client_create_dotl);
 
-int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode,
+int p9_client_fcreate(struct p9_fid *fid, const char *name, u32 perm, int mode,
 		     char *extension)
 {
 	int err;
@@ -1361,8 +1361,8 @@ int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode,
 }
 EXPORT_SYMBOL(p9_client_fcreate);
 
-int p9_client_symlink(struct p9_fid *dfid, char *name, char *symtgt, kgid_t gid,
-		struct p9_qid *qid)
+int p9_client_symlink(struct p9_fid *dfid, const char *name,
+		const char *symtgt, kgid_t gid, struct p9_qid *qid)
 {
 	int err = 0;
 	struct p9_client *clnt;
@@ -1395,7 +1395,7 @@ int p9_client_symlink(struct p9_fid *dfid, char *name, char *symtgt, kgid_t gid,
 }
 EXPORT_SYMBOL(p9_client_symlink);
 
-int p9_client_link(struct p9_fid *dfid, struct p9_fid *oldfid, char *newname)
+int p9_client_link(struct p9_fid *dfid, struct p9_fid *oldfid, const char *newname)
 {
 	struct p9_client *clnt;
 	struct p9_req_t *req;
@@ -2117,7 +2117,7 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)
 }
 EXPORT_SYMBOL(p9_client_readdir);
 
-int p9_client_mknod_dotl(struct p9_fid *fid, char *name, int mode,
+int p9_client_mknod_dotl(struct p9_fid *fid, const char *name, int mode,
 			dev_t rdev, kgid_t gid, struct p9_qid *qid)
 {
 	int err;
@@ -2148,7 +2148,7 @@ int p9_client_mknod_dotl(struct p9_fid *fid, char *name, int mode,
 }
 EXPORT_SYMBOL(p9_client_mknod_dotl);
 
-int p9_client_mkdir_dotl(struct p9_fid *fid, char *name, int mode,
+int p9_client_mkdir_dotl(struct p9_fid *fid, const char *name, int mode,
 				kgid_t gid, struct p9_qid *qid)
 {
 	int err;

From 5a582cff47c90af29ecb293caa2f667bd4d45e54 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Tue, 17 Jan 2017 13:08:48 +0100
Subject: [PATCH 0020/1911] clk: meson-gxbb: Export HDMI clocks

Export HDMI clock from internal to dt-bindings.

Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Acked-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Kevin Hilman <khilman@baylibre.com>
---
 drivers/clk/meson/gxbb.h              | 4 ++--
 include/dt-bindings/clock/gxbb-clkc.h | 2 ++
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/clk/meson/gxbb.h b/drivers/clk/meson/gxbb.h
index 0252939ba58f3e..2139e97f5e3911 100644
--- a/drivers/clk/meson/gxbb.h
+++ b/drivers/clk/meson/gxbb.h
@@ -231,7 +231,7 @@
 #define CLKID_AHB_DATA_BUS	  60
 #define CLKID_AHB_CTRL_BUS	  61
 #define CLKID_HDMI_INTR_SYNC	  62
-#define CLKID_HDMI_PCLK		  63
+/* CLKID_HDMI_PCLK */
 /* CLKID_USB1_DDR_BRIDGE */
 /* CLKID_USB0_DDR_BRIDGE */
 #define CLKID_MMC_PCLK		  66
@@ -245,7 +245,7 @@
 #define CLKID_VCLK2_VENCI1	  74
 #define CLKID_VCLK2_VENCP0	  75
 #define CLKID_VCLK2_VENCP1	  76
-#define CLKID_GCLK_VENCI_INT0	  77
+/* CLKID_GCLK_VENCI_INT0 */
 #define CLKID_GCLK_VENCI_INT	  78
 #define CLKID_DAC_CLK		  79
 #define CLKID_AOCLK_GATE	  80
diff --git a/include/dt-bindings/clock/gxbb-clkc.h b/include/dt-bindings/clock/gxbb-clkc.h
index baade6f429d01a..da1d473a5a3ad1 100644
--- a/include/dt-bindings/clock/gxbb-clkc.h
+++ b/include/dt-bindings/clock/gxbb-clkc.h
@@ -18,8 +18,10 @@
 #define CLKID_USB0		50
 #define CLKID_USB1		51
 #define CLKID_USB		55
+#define CLKID_HDMI_PCLK		63
 #define CLKID_USB1_DDR_BRIDGE	64
 #define CLKID_USB0_DDR_BRIDGE	65
+#define CLKID_GCLK_VENCI_INT0	77
 #define CLKID_AO_I2C		93
 #define CLKID_SD_EMMC_A		94
 #define CLKID_SD_EMMC_B		95

From 890a96a257b497e8361055ffbf66e2fd08833074 Mon Sep 17 00:00:00 2001
From: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Date: Sun, 15 Jan 2017 23:20:29 +0100
Subject: [PATCH 0021/1911] ARM64: dts: meson-gx: add the missing uart_AO_B

This adds the missing node for the uart_AO_B port to the meson-gx.dtsi
(as this is supported by GXBB, GXL and GXM) along with the required
pinctrl pins. This is required as some boards are using it (the boards
from the Khadas VIM series for example have it exposed on the pin
headers).

Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Kevin Hilman <khilman@baylibre.com>
---
 arch/arm64/boot/dts/amlogic/meson-gx.dtsi   | 8 ++++++++
 arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi | 7 +++++++
 arch/arm64/boot/dts/amlogic/meson-gxl.dtsi  | 7 +++++++
 3 files changed, 22 insertions(+)

diff --git a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi
index be56b9fae2aa4c..9212fca6eb94bc 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi
@@ -324,6 +324,14 @@
 				status = "disabled";
 			};
 
+			uart_AO_B: serial@4e0 {
+				compatible = "amlogic,meson-uart";
+				reg = <0x0 0x004e0 0x0 0x14>;
+				interrupts = <GIC_SPI 197 IRQ_TYPE_EDGE_RISING>;
+				clocks = <&xtal>;
+				status = "disabled";
+			};
+
 			ir: ir@580 {
 				compatible = "amlogic,meson-gxbb-ir";
 				reg = <0x0 0x00580 0x0 0x40>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi
index 5d686334f6929e..474435e217591b 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi
@@ -138,6 +138,13 @@
 			};
 		};
 
+		uart_ao_b_pins: uart_ao_b {
+			mux {
+				groups = "uart_tx_ao_b", "uart_rx_ao_b";
+				function = "uart_ao_b";
+			};
+		};
+
 		remote_input_ao_pins: remote_input_ao {
 			mux {
 				groups = "remote_input_ao";
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi
index 69216246275dfa..6d815f9930c9f1 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi
@@ -88,6 +88,13 @@
 			};
 		};
 
+		uart_ao_b_pins: uart_ao_b {
+			mux {
+				groups = "uart_tx_ao_b", "uart_rx_ao_b";
+				function = "uart_ao_b";
+			};
+		};
+
 		remote_input_ao_pins: remote_input_ao {
 			mux {
 				groups = "remote_input_ao";

From 261e1d5cc5ee994f17089d8f0d4f8443743d732d Mon Sep 17 00:00:00 2001
From: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Date: Sun, 15 Jan 2017 23:32:53 +0100
Subject: [PATCH 0022/1911] ARM64: dts: meson-gx: add the serial CTS and RTS
 pin groups
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This adds pinctrl group nodes for the CTS and RTS pins of each serial
controller. This makes it possible to enable the CTS and RTS pins which
are controlled by the serial controller hardware (through the meson_uart
driver).

Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Tested-by: Andreas Färber <afaerber@suse.de>
Signed-off-by: Kevin Hilman <khilman@baylibre.com>
---
 arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi | 40 +++++++++++++++++++++
 arch/arm64/boot/dts/amlogic/meson-gxl.dtsi  | 40 +++++++++++++++++++++
 2 files changed, 80 insertions(+)

diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi
index 474435e217591b..f001c4d007bc84 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi
@@ -138,6 +138,14 @@
 			};
 		};
 
+		uart_ao_a_cts_rts_pins: uart_ao_a_cts_rts {
+			mux {
+				groups = "uart_cts_ao_a",
+				       "uart_rts_ao_a";
+				function = "uart_ao";
+			};
+		};
+
 		uart_ao_b_pins: uart_ao_b {
 			mux {
 				groups = "uart_tx_ao_b", "uart_rx_ao_b";
@@ -145,6 +153,14 @@
 			};
 		};
 
+		uart_ao_b_cts_rts_pins: uart_ao_b_cts_rts {
+			mux {
+				groups = "uart_cts_ao_b",
+				       "uart_rts_ao_b";
+				function = "uart_ao_b";
+			};
+		};
+
 		remote_input_ao_pins: remote_input_ao {
 			mux {
 				groups = "remote_input_ao";
@@ -290,6 +306,14 @@
 			};
 		};
 
+		uart_a_cts_rts_pins: uart_a_cts_rts {
+			mux {
+				groups = "uart_cts_a",
+				       "uart_rts_a";
+				function = "uart_a";
+			};
+		};
+
 		uart_b_pins: uart_b {
 			mux {
 				groups = "uart_tx_b",
@@ -298,6 +322,14 @@
 			};
 		};
 
+		uart_b_cts_rts_pins: uart_b_cts_rts {
+			mux {
+				groups = "uart_cts_b",
+				       "uart_rts_b";
+				function = "uart_b";
+			};
+		};
+
 		uart_c_pins: uart_c {
 			mux {
 				groups = "uart_tx_c",
@@ -306,6 +338,14 @@
 			};
 		};
 
+		uart_c_cts_rts_pins: uart_c_cts_rts {
+			mux {
+				groups = "uart_cts_c",
+				       "uart_rts_c";
+				function = "uart_c";
+			};
+		};
+
 		i2c_a_pins: i2c_a {
 			mux {
 				groups = "i2c_sck_a",
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi
index 6d815f9930c9f1..f3d1566c0a098c 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi
@@ -88,6 +88,14 @@
 			};
 		};
 
+		uart_ao_a_cts_rts_pins: uart_ao_a_cts_rts {
+			mux {
+				groups = "uart_cts_ao_a",
+				       "uart_rts_ao_a";
+				function = "uart_ao";
+			};
+		};
+
 		uart_ao_b_pins: uart_ao_b {
 			mux {
 				groups = "uart_tx_ao_b", "uart_rx_ao_b";
@@ -95,6 +103,14 @@
 			};
 		};
 
+		uart_ao_b_cts_rts_pins: uart_ao_b_cts_rts {
+			mux {
+				groups = "uart_cts_ao_b",
+				       "uart_rts_ao_b";
+				function = "uart_ao_b";
+			};
+		};
+
 		remote_input_ao_pins: remote_input_ao {
 			mux {
 				groups = "remote_input_ao";
@@ -170,6 +186,14 @@
 			};
 		};
 
+		uart_a_cts_rts_pins: uart_a_cts_rts {
+			mux {
+				groups = "uart_cts_a",
+				       "uart_rts_a";
+				function = "uart_a";
+			};
+		};
+
 		uart_b_pins: uart_b {
 			mux {
 				groups = "uart_tx_b",
@@ -178,6 +202,14 @@
 			};
 		};
 
+		uart_b_cts_rts_pins: uart_b_cts_rts {
+			mux {
+				groups = "uart_cts_b",
+				       "uart_rts_b";
+				function = "uart_b";
+			};
+		};
+
 		uart_c_pins: uart_c {
 			mux {
 				groups = "uart_tx_c",
@@ -186,6 +218,14 @@
 			};
 		};
 
+		uart_c_cts_rts_pins: uart_c_cts_rts {
+			mux {
+				groups = "uart_cts_c",
+				       "uart_rts_c";
+				function = "uart_c";
+			};
+		};
+
 		i2c_a_pins: i2c_a {
 			mux {
 				groups = "i2c_sck_a",

From 2fbbc4bf69f293df317559a267f4120f290b8fc4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andreas=20F=C3=A4rber?= <afaerber@suse.de>
Date: Tue, 17 Jan 2017 02:17:53 +0100
Subject: [PATCH 0023/1911] ARM64: dts: meson-gxbb-vega-s95: Add LED
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There is one blue LED on the front of the device. Keep it lit and
configure it as panic indicator.

Signed-off-by: Andreas Färber <afaerber@suse.de>
Signed-off-by: Kevin Hilman <khilman@baylibre.com>
---
 arch/arm64/boot/dts/amlogic/meson-gxbb-vega-s95.dtsi | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-vega-s95.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxbb-vega-s95.dtsi
index e59ad308192f62..86709929fd208c 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxbb-vega-s95.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-vega-s95.dtsi
@@ -53,6 +53,17 @@
 		stdout-path = "serial0:115200n8";
 	};
 
+	leds {
+		compatible = "gpio-leds";
+
+		blue {
+			label = "vega-s95:blue:on";
+			gpios = <&gpio_ao GPIOAO_13 GPIO_ACTIVE_HIGH>;
+			default-state = "on";
+			panic-indicator;
+		};
+	};
+
 	usb_vbus: regulator-usb0-vbus {
 		compatible = "regulator-fixed";
 

From b949165c86cdd230fbb7997a2b2966e860caf372 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Tue, 17 Jan 2017 13:05:38 +0100
Subject: [PATCH 0024/1911] ARM64: dts: meson-gx: Add HDMI HPD/DDC pinctrl
 nodes

Add pinctrl nodes for HDMI HPD and DDC pins modes for Amlogic Meson GXL
and GXBB SoCs.

Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Kevin Hilman <khilman@baylibre.com>
---
 arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi | 14 ++++++++++++++
 arch/arm64/boot/dts/amlogic/meson-gxl.dtsi  | 14 ++++++++++++++
 2 files changed, 28 insertions(+)

diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi
index f001c4d007bc84..39a774ad83ce13 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi
@@ -453,6 +453,20 @@
 				function = "pwm_f_y";
 			};
 		};
+
+		hdmi_hpd_pins: hdmi_hpd {
+			mux {
+				groups = "hdmi_hpd";
+				function = "hdmi_hpd";
+			};
+		};
+
+		hdmi_i2c_pins: hdmi_i2c {
+			mux {
+				groups = "hdmi_sda", "hdmi_scl";
+				function = "hdmi_i2c";
+			};
+		};
 	};
 };
 
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi
index f3d1566c0a098c..bdf2305a2e25b4 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi
@@ -276,6 +276,20 @@
 				function = "pwm_e";
 			};
 		};
+
+		hdmi_hpd_pins: hdmi_hpd {
+			mux {
+				groups = "hdmi_hpd";
+				function = "hdmi_hpd";
+			};
+		};
+
+		hdmi_i2c_pins: hdmi_i2c {
+			mux {
+				groups = "hdmi_sda", "hdmi_scl";
+				function = "hdmi_i2c";
+			};
+		};
 	};
 
 	eth-phy-mux {

From 3d071d8da1f586c24863a57349586a1611b9aa67 Mon Sep 17 00:00:00 2001
From: Stephan Mueller <stephan.mueller@atsec.com>
Date: Thu, 15 Dec 2016 12:42:33 +0100
Subject: [PATCH 0025/1911] random: remove stale maybe_reseed_primary_crng

The function maybe_reseed_primary_crng is not used anywhere and thus can
be removed.

Signed-off-by: Stephan Mueller <smueller@chronox.de>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 drivers/char/random.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 1ef26403bcc83f..8e5ab20848c067 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -855,13 +855,6 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r)
 	spin_unlock_irqrestore(&primary_crng.lock, flags);
 }
 
-static inline void maybe_reseed_primary_crng(void)
-{
-	if (crng_init > 2 &&
-	    time_after(jiffies, primary_crng.init_time + CRNG_RESEED_INTERVAL))
-		crng_reseed(&primary_crng, &input_pool);
-}
-
 static inline void crng_wait_ready(void)
 {
 	wait_event_interruptible(crng_init_wait, crng_ready());

From 2e03c36f25ebb52d3358b8baebcdf96895c33a87 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stephan=20M=C3=BCller?= <smueller@chronox.de>
Date: Tue, 27 Dec 2016 23:39:31 +0100
Subject: [PATCH 0026/1911] random: remove stale urandom_init_wait

The urandom_init_wait wait queue is a left over from the pre-ChaCha20
times and can therefore be savely removed.

Signed-off-by: Stephan Mueller <smueller@chronox.de>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 drivers/char/random.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 8e5ab20848c067..482531d87fb8f0 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -409,7 +409,6 @@ static struct poolinfo {
  */
 static DECLARE_WAIT_QUEUE_HEAD(random_read_wait);
 static DECLARE_WAIT_QUEUE_HEAD(random_write_wait);
-static DECLARE_WAIT_QUEUE_HEAD(urandom_init_wait);
 static struct fasync_struct *fasync;
 
 static DEFINE_SPINLOCK(random_ready_list_lock);

From 43d8a72cd985ca5279a9eb84d61fcbb3ee3d3774 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stephan=20M=C3=BCller?= <smueller@chronox.de>
Date: Tue, 27 Dec 2016 23:40:59 +0100
Subject: [PATCH 0027/1911] random: remove variable limit

The variable limit was used to identify the nonblocking pool's unlimited
random number generation. As the nonblocking pool is a thing of the
past, remove the limit variable and any conditions around it (i.e.
preserve the branches for limit == 1).

Signed-off-by: Stephan Mueller <smueller@chronox.de>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 drivers/char/random.c | 30 +++++++-----------------------
 1 file changed, 7 insertions(+), 23 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 482531d87fb8f0..92d6dd24c86ea5 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -466,7 +466,6 @@ struct entropy_store {
 	int entropy_count;
 	int entropy_total;
 	unsigned int initialized:1;
-	unsigned int limit:1;
 	unsigned int last_data_init:1;
 	__u8 last_data[EXTRACT_SIZE];
 };
@@ -484,7 +483,6 @@ static __u32 blocking_pool_data[OUTPUT_POOL_WORDS] __latent_entropy;
 static struct entropy_store input_pool = {
 	.poolinfo = &poolinfo_table[0],
 	.name = "input",
-	.limit = 1,
 	.lock = __SPIN_LOCK_UNLOCKED(input_pool.lock),
 	.pool = input_pool_data
 };
@@ -492,7 +490,6 @@ static struct entropy_store input_pool = {
 static struct entropy_store blocking_pool = {
 	.poolinfo = &poolinfo_table[1],
 	.name = "blocking",
-	.limit = 1,
 	.pull = &input_pool,
 	.lock = __SPIN_LOCK_UNLOCKED(blocking_pool.lock),
 	.pool = blocking_pool_data,
@@ -1212,15 +1209,6 @@ static void xfer_secondary_pool(struct entropy_store *r, size_t nbytes)
 	    r->entropy_count > r->poolinfo->poolfracbits)
 		return;
 
-	if (r->limit == 0 && random_min_urandom_seed) {
-		unsigned long now = jiffies;
-
-		if (time_before(now,
-				r->last_pulled + random_min_urandom_seed * HZ))
-			return;
-		r->last_pulled = now;
-	}
-
 	_xfer_secondary_pool(r, nbytes);
 }
 
@@ -1228,8 +1216,6 @@ static void _xfer_secondary_pool(struct entropy_store *r, size_t nbytes)
 {
 	__u32	tmp[OUTPUT_POOL_WORDS];
 
-	/* For /dev/random's pool, always leave two wakeups' worth */
-	int rsvd_bytes = r->limit ? 0 : random_read_wakeup_bits / 4;
 	int bytes = nbytes;
 
 	/* pull at least as much as a wakeup */
@@ -1240,7 +1226,7 @@ static void _xfer_secondary_pool(struct entropy_store *r, size_t nbytes)
 	trace_xfer_secondary_pool(r->name, bytes * 8, nbytes * 8,
 				  ENTROPY_BITS(r), ENTROPY_BITS(r->pull));
 	bytes = extract_entropy(r->pull, tmp, bytes,
-				random_read_wakeup_bits / 8, rsvd_bytes);
+				random_read_wakeup_bits / 8, 0);
 	mix_pool_bytes(r, tmp, bytes);
 	credit_entropy_bits(r, bytes*8);
 }
@@ -1268,7 +1254,7 @@ static void push_to_pool(struct work_struct *work)
 static size_t account(struct entropy_store *r, size_t nbytes, int min,
 		      int reserved)
 {
-	int entropy_count, orig;
+	int entropy_count, orig, have_bytes;
 	size_t ibytes, nfrac;
 
 	BUG_ON(r->entropy_count > r->poolinfo->poolfracbits);
@@ -1277,14 +1263,12 @@ static size_t account(struct entropy_store *r, size_t nbytes, int min,
 retry:
 	entropy_count = orig = ACCESS_ONCE(r->entropy_count);
 	ibytes = nbytes;
-	/* If limited, never pull more than available */
-	if (r->limit) {
-		int have_bytes = entropy_count >> (ENTROPY_SHIFT + 3);
+	/* never pull more than available */
+	have_bytes = entropy_count >> (ENTROPY_SHIFT + 3);
 
-		if ((have_bytes -= reserved) < 0)
-			have_bytes = 0;
-		ibytes = min_t(size_t, ibytes, have_bytes);
-	}
+	if ((have_bytes -= reserved) < 0)
+		have_bytes = 0;
+	ibytes = min_t(size_t, ibytes, have_bytes);
 	if (ibytes < min)
 		ibytes = 0;
 

From 5d0e5ea343a0f70351428476bcf8715e0731f26a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stephan=20M=C3=BCller?= <smueller@chronox.de>
Date: Tue, 27 Dec 2016 23:41:22 +0100
Subject: [PATCH 0028/1911] random: fix comment for unused
 random_min_urandom_seed

The variable random_min_urandom_seed is not needed any more as it
defined the reseeding behavior of the nonblocking pool. Though it is not
needed any more, it is left in the code for user space interface
compatibility.

Signed-off-by: Stephan Mueller <smueller@chronox.de>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 drivers/char/random.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 92d6dd24c86ea5..9d147d456598e2 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -313,9 +313,7 @@ static int random_read_wakeup_bits = 64;
 static int random_write_wakeup_bits = 28 * OUTPUT_POOL_WORDS;
 
 /*
- * The minimum number of seconds between urandom pool reseeding.  We
- * do this to limit the amount of entropy that can be drained from the
- * input pool even if there are heavy demands on /dev/urandom.
+ * Variable is currently unused by left for user space compatibility.
  */
 static int random_min_urandom_seed = 60;
 

From 4e6118974c7ab5d65e77fe8c47c87a85e9c7ed7d Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Thu, 19 Jan 2017 09:21:57 +0100
Subject: [PATCH 0029/1911] ARM64: dts: meson-gxm: Rename q200 and q201 DT
 files for consistency
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In order to keep consistency naming with the Nexbox A1 DTS file, remove the
S912 SoC name in the GXM DT files.

Suggested-by: Andreas Färber <afaerber@suse.de>
Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Signed-off-by: Kevin Hilman <khilman@baylibre.com>
---
 arch/arm64/boot/dts/amlogic/Makefile                          | 4 ++--
 .../amlogic/{meson-gxm-s912-q200.dts => meson-gxm-q200.dts}   | 0
 .../amlogic/{meson-gxm-s912-q201.dts => meson-gxm-q201.dts}   | 0
 3 files changed, 2 insertions(+), 2 deletions(-)
 rename arch/arm64/boot/dts/amlogic/{meson-gxm-s912-q200.dts => meson-gxm-q200.dts} (100%)
 rename arch/arm64/boot/dts/amlogic/{meson-gxm-s912-q201.dts => meson-gxm-q201.dts} (100%)

diff --git a/arch/arm64/boot/dts/amlogic/Makefile b/arch/arm64/boot/dts/amlogic/Makefile
index 0d7bfbf7d922bb..1aca8e58ad2bd4 100644
--- a/arch/arm64/boot/dts/amlogic/Makefile
+++ b/arch/arm64/boot/dts/amlogic/Makefile
@@ -9,8 +9,8 @@ dtb-$(CONFIG_ARCH_MESON) += meson-gxl-s905x-p212.dtb
 dtb-$(CONFIG_ARCH_MESON) += meson-gxl-s905d-p230.dtb
 dtb-$(CONFIG_ARCH_MESON) += meson-gxl-s905d-p231.dtb
 dtb-$(CONFIG_ARCH_MESON) += meson-gxl-nexbox-a95x.dtb
-dtb-$(CONFIG_ARCH_MESON) += meson-gxm-s912-q200.dtb
-dtb-$(CONFIG_ARCH_MESON) += meson-gxm-s912-q201.dtb
+dtb-$(CONFIG_ARCH_MESON) += meson-gxm-q200.dtb
+dtb-$(CONFIG_ARCH_MESON) += meson-gxm-q201.dtb
 dtb-$(CONFIG_ARCH_MESON) += meson-gxm-nexbox-a1.dtb
 
 always		:= $(dtb-y)
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxm-s912-q200.dts b/arch/arm64/boot/dts/amlogic/meson-gxm-q200.dts
similarity index 100%
rename from arch/arm64/boot/dts/amlogic/meson-gxm-s912-q200.dts
rename to arch/arm64/boot/dts/amlogic/meson-gxm-q200.dts
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxm-s912-q201.dts b/arch/arm64/boot/dts/amlogic/meson-gxm-q201.dts
similarity index 100%
rename from arch/arm64/boot/dts/amlogic/meson-gxm-s912-q201.dts
rename to arch/arm64/boot/dts/amlogic/meson-gxm-q201.dts

From 100bd9a961e368490daa8cdbb2f41b03ef50164a Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Wed, 18 Jan 2017 11:50:40 +0100
Subject: [PATCH 0030/1911] dt-bindings: vendor-prefix: Add wetek vendor prefix

Add prefix for WeTek Electronics, limited, a company producing multimedia
Set-Top-Boxes and supporting KODI and LibreELEC distributions.

Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Kevin Hilman <khilman@baylibre.com>
---
 Documentation/devicetree/bindings/vendor-prefixes.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt
index 16d3b5e7f5d1f5..0c16d8581d13d3 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.txt
+++ b/Documentation/devicetree/bindings/vendor-prefixes.txt
@@ -320,6 +320,7 @@ virtio	Virtual I/O Device Specification, developed by the OASIS consortium
 vivante	Vivante Corporation
 voipac	Voipac Technologies s.r.o.
 wd	Western Digital Corp.
+wetek	WeTek Electronics, limited.
 wexler	Wexler
 winbond Winbond Electronics corp.
 wlf	Wolfson Microelectronics

From d537d289de06f57f5342106208ecf17ea83f23e2 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Wed, 18 Jan 2017 11:50:41 +0100
Subject: [PATCH 0031/1911] ARM64: dts: meson-gxbb: Add support for WeTek Hub
 and Play

Adds support for the WeTek Hub and Play2 boards.
The Hub is an extremely small IPTv Set-Top-Box and the Play2 is a more
traditionnal Satellite or Terrestrial and IPTv Set-Top-Box.

Both are based on the p200 Reference Design and out-of-tree support is
based on LibreELEC kernel at [1].

[1] https://github.com/wetek-enigma/linux-amlogic

Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Signed-off-by: Kevin Hilman <khilman@baylibre.com>
---
 arch/arm64/boot/dts/amlogic/Makefile          |  2 +
 .../boot/dts/amlogic/meson-gxbb-wetek-hub.dts | 66 +++++++++++++
 .../dts/amlogic/meson-gxbb-wetek-play2.dts    | 94 +++++++++++++++++++
 3 files changed, 162 insertions(+)
 create mode 100644 arch/arm64/boot/dts/amlogic/meson-gxbb-wetek-hub.dts
 create mode 100644 arch/arm64/boot/dts/amlogic/meson-gxbb-wetek-play2.dts

diff --git a/arch/arm64/boot/dts/amlogic/Makefile b/arch/arm64/boot/dts/amlogic/Makefile
index 1aca8e58ad2bd4..293e985e3dfac1 100644
--- a/arch/arm64/boot/dts/amlogic/Makefile
+++ b/arch/arm64/boot/dts/amlogic/Makefile
@@ -5,6 +5,8 @@ dtb-$(CONFIG_ARCH_MESON) += meson-gxbb-p201.dtb
 dtb-$(CONFIG_ARCH_MESON) += meson-gxbb-vega-s95-pro.dtb
 dtb-$(CONFIG_ARCH_MESON) += meson-gxbb-vega-s95-meta.dtb
 dtb-$(CONFIG_ARCH_MESON) += meson-gxbb-vega-s95-telos.dtb
+dtb-$(CONFIG_ARCH_MESON) += meson-gxbb-wetek-hub.dtb
+dtb-$(CONFIG_ARCH_MESON) += meson-gxbb-wetek-play2.dtb
 dtb-$(CONFIG_ARCH_MESON) += meson-gxl-s905x-p212.dtb
 dtb-$(CONFIG_ARCH_MESON) += meson-gxl-s905d-p230.dtb
 dtb-$(CONFIG_ARCH_MESON) += meson-gxl-s905d-p231.dtb
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-wetek-hub.dts b/arch/arm64/boot/dts/amlogic/meson-gxbb-wetek-hub.dts
new file mode 100644
index 00000000000000..56f855901262c3
--- /dev/null
+++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-wetek-hub.dts
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2016 BayLibre, Inc.
+ * Author: Neil Armstrong <narmstrong@baylibre.com>
+ *
+ * This file is dual-licensed: you can use it either under the terms
+ * of the GPL or the X11 license, at your option. Note that this dual
+ * licensing only applies to this file, and not this project as a
+ * whole.
+ *
+ *  a) This library is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License as
+ *     published by the Free Software Foundation; either version 2 of the
+ *     License, or (at your option) any later version.
+ *
+ *     This library is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *     GNU General Public License for more details.
+ *
+ * Or, alternatively,
+ *
+ *  b) Permission is hereby granted, free of charge, to any person
+ *     obtaining a copy of this software and associated documentation
+ *     files (the "Software"), to deal in the Software without
+ *     restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or
+ *     sell copies of the Software, and to permit persons to whom the
+ *     Software is furnished to do so, subject to the following
+ *     conditions:
+ *
+ *     The above copyright notice and this permission notice shall be
+ *     included in all copies or substantial portions of the Software.
+ *
+ *     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ *     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ *     OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ *     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ *     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ *     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ *     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ *     OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/dts-v1/;
+
+#include "meson-gxbb-p20x.dtsi"
+
+/ {
+	compatible = "wetek,hub", "amlogic,meson-gxbb";
+	model = "WeTek Hub";
+
+	leds {
+		compatible = "gpio-leds";
+
+		system {
+			label = "wetek-play:system-status";
+			gpios = <&gpio_ao GPIOAO_13 GPIO_ACTIVE_HIGH>;
+			default-state = "on";
+			panic-indicator;
+		};
+	};
+
+	cvbs-connector {
+		status = "disabled";
+	};
+};
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-wetek-play2.dts b/arch/arm64/boot/dts/amlogic/meson-gxbb-wetek-play2.dts
new file mode 100644
index 00000000000000..ea79fdd2c248a3
--- /dev/null
+++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-wetek-play2.dts
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2016 BayLibre, Inc.
+ * Author: Neil Armstrong <narmstrong@baylibre.com>
+ *
+ * This file is dual-licensed: you can use it either under the terms
+ * of the GPL or the X11 license, at your option. Note that this dual
+ * licensing only applies to this file, and not this project as a
+ * whole.
+ *
+ *  a) This library is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License as
+ *     published by the Free Software Foundation; either version 2 of the
+ *     License, or (at your option) any later version.
+ *
+ *     This library is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *     GNU General Public License for more details.
+ *
+ * Or, alternatively,
+ *
+ *  b) Permission is hereby granted, free of charge, to any person
+ *     obtaining a copy of this software and associated documentation
+ *     files (the "Software"), to deal in the Software without
+ *     restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or
+ *     sell copies of the Software, and to permit persons to whom the
+ *     Software is furnished to do so, subject to the following
+ *     conditions:
+ *
+ *     The above copyright notice and this permission notice shall be
+ *     included in all copies or substantial portions of the Software.
+ *
+ *     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ *     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ *     OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ *     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ *     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ *     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ *     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ *     OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/dts-v1/;
+
+#include "meson-gxbb-p20x.dtsi"
+#include <dt-bindings/input/input.h>
+
+/ {
+	compatible = "wetek,play2", "amlogic,meson-gxbb";
+	model = "WeTek Play 2";
+
+	leds {
+		compatible = "gpio-leds";
+
+		system {
+			label = "wetek-play:system-status";
+			gpios = <&gpio_ao GPIOAO_13 GPIO_ACTIVE_HIGH>;
+			default-state = "on";
+			panic-indicator;
+		};
+
+		wifi {
+			label = "wetek-play:wifi-status";
+			gpios = <&gpio GPIODV_26 GPIO_ACTIVE_HIGH>;
+			default-state = "off";
+		};
+
+		ethernet {
+			label = "wetek-play:ethernet-status";
+			gpios = <&gpio GPIODV_27 GPIO_ACTIVE_HIGH>;
+			default-state = "off";
+		};
+	};
+
+	gpio-keys-polled {
+		compatible = "gpio-keys-polled";
+		#address-cells = <1>;
+		#size-cells = <0>;
+		poll-interval = <100>;
+
+		button@0 {
+			label = "reset";
+			linux,code = <KEY_RESTART>;
+			gpios = <&gpio_ao GPIOAO_3 GPIO_ACTIVE_LOW>;
+		};
+	};
+};
+
+&i2c_A {
+	status = "okay";
+	pinctrl-0 = <&i2c_a_pins>;
+	pinctrl-names = "default";
+};

From 0264a88d6153e6cd5ee61239058b2002f36dde6b Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Wed, 18 Jan 2017 11:50:42 +0100
Subject: [PATCH 0032/1911] dt-bindings: amlogic: Add WeTek boards

Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Kevin Hilman <khilman@baylibre.com>
---
 Documentation/devicetree/bindings/arm/amlogic.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Documentation/devicetree/bindings/arm/amlogic.txt b/Documentation/devicetree/bindings/arm/amlogic.txt
index 9b2b41ab68177d..c246cd2730d906 100644
--- a/Documentation/devicetree/bindings/arm/amlogic.txt
+++ b/Documentation/devicetree/bindings/arm/amlogic.txt
@@ -40,6 +40,8 @@ Board compatible values:
   - "hardkernel,odroid-c2" (Meson gxbb)
   - "amlogic,p200" (Meson gxbb)
   - "amlogic,p201" (Meson gxbb)
+  - "wetek,hub" (Meson gxbb)
+  - "wetek,play2" (Meson gxbb)
   - "amlogic,p212" (Meson gxl s905x)
   - "amlogic,p230" (Meson gxl s905d)
   - "amlogic,p231" (Meson gxl s905d)

From 33d0fcdfe0e87070d96c678e554d711ae15b9fa6 Mon Sep 17 00:00:00 2001
From: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Date: Thu, 19 Jan 2017 15:58:20 +0100
Subject: [PATCH 0033/1911] clk: gxbb: add the SAR ADC clocks and expose them

The HHI_SAR_CLK_CNTL contains three SAR ADC specific clocks:
- a mux clock to choose between different ADC reference clocks (this is
  2-bit wide, but the datasheet only lists the parents for the first
  bit)
- a divider for the input/reference clock
- a gate which enables the ADC clock

Additionally this exposes the ADC core clock (CLKID_SAR_ADC) and
CLKID_SANA (which seems to enable the analog inputs, but unfortunately
there is no documentation for this - we just mimic what the vendor
driver does).

Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Tested-by: Neil Armstrong <narmstrong@baylibre.com>
Acked-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Kevin Hilman <khilman@baylibre.com>
---
 drivers/clk/meson/gxbb.c              | 48 +++++++++++++++++++++++++++
 drivers/clk/meson/gxbb.h              |  9 +++--
 include/dt-bindings/clock/gxbb-clkc.h |  4 +++
 3 files changed, 58 insertions(+), 3 deletions(-)

diff --git a/drivers/clk/meson/gxbb.c b/drivers/clk/meson/gxbb.c
index 9d9af446bafc94..1c1ec137a3cc72 100644
--- a/drivers/clk/meson/gxbb.c
+++ b/drivers/clk/meson/gxbb.c
@@ -564,6 +564,46 @@ static struct clk_gate gxbb_clk81 = {
 	},
 };
 
+static struct clk_mux gxbb_sar_adc_clk_sel = {
+	.reg = (void *)HHI_SAR_CLK_CNTL,
+	.mask = 0x3,
+	.shift = 9,
+	.lock = &clk_lock,
+	.hw.init = &(struct clk_init_data){
+		.name = "sar_adc_clk_sel",
+		.ops = &clk_mux_ops,
+		/* NOTE: The datasheet doesn't list the parents for bit 10 */
+		.parent_names = (const char *[]){ "xtal", "clk81", },
+		.num_parents = 2,
+	},
+};
+
+static struct clk_divider gxbb_sar_adc_clk_div = {
+	.reg = (void *)HHI_SAR_CLK_CNTL,
+	.shift = 0,
+	.width = 8,
+	.lock = &clk_lock,
+	.hw.init = &(struct clk_init_data){
+		.name = "sar_adc_clk_div",
+		.ops = &clk_divider_ops,
+		.parent_names = (const char *[]){ "sar_adc_clk_sel" },
+		.num_parents = 1,
+	},
+};
+
+static struct clk_gate gxbb_sar_adc_clk = {
+	.reg = (void *)HHI_SAR_CLK_CNTL,
+	.bit_idx = 8,
+	.lock = &clk_lock,
+	.hw.init = &(struct clk_init_data){
+		.name = "sar_adc_clk",
+		.ops = &clk_gate_ops,
+		.parent_names = (const char *[]){ "sar_adc_clk_div" },
+		.num_parents = 1,
+		.flags = CLK_SET_RATE_PARENT,
+	},
+};
+
 /* Everything Else (EE) domain gates */
 static MESON_GATE(gxbb_ddr, HHI_GCLK_MPEG0, 0);
 static MESON_GATE(gxbb_dos, HHI_GCLK_MPEG0, 1);
@@ -754,6 +794,9 @@ static struct clk_hw_onecell_data gxbb_hw_onecell_data = {
 		[CLKID_SD_EMMC_A]	    = &gxbb_emmc_a.hw,
 		[CLKID_SD_EMMC_B]	    = &gxbb_emmc_b.hw,
 		[CLKID_SD_EMMC_C]	    = &gxbb_emmc_c.hw,
+		[CLKID_SAR_ADC_CLK]	    = &gxbb_sar_adc_clk.hw,
+		[CLKID_SAR_ADC_SEL]	    = &gxbb_sar_adc_clk_sel.hw,
+		[CLKID_SAR_ADC_DIV]	    = &gxbb_sar_adc_clk_div.hw,
 	},
 	.num = NR_CLKS,
 };
@@ -856,6 +899,7 @@ static struct clk_gate *gxbb_clk_gates[] = {
 	&gxbb_emmc_a,
 	&gxbb_emmc_b,
 	&gxbb_emmc_c,
+	&gxbb_sar_adc_clk,
 };
 
 static int gxbb_clkc_probe(struct platform_device *pdev)
@@ -888,6 +932,10 @@ static int gxbb_clkc_probe(struct platform_device *pdev)
 	gxbb_mpeg_clk_sel.reg = clk_base + (u64)gxbb_mpeg_clk_sel.reg;
 	gxbb_mpeg_clk_div.reg = clk_base + (u64)gxbb_mpeg_clk_div.reg;
 
+	/* Populate the base address for the SAR ADC clks */
+	gxbb_sar_adc_clk_sel.reg = clk_base + (u64)gxbb_sar_adc_clk_sel.reg;
+	gxbb_sar_adc_clk_div.reg = clk_base + (u64)gxbb_sar_adc_clk_div.reg;
+
 	/* Populate base address for gates */
 	for (i = 0; i < ARRAY_SIZE(gxbb_clk_gates); i++)
 		gxbb_clk_gates[i]->reg = clk_base +
diff --git a/drivers/clk/meson/gxbb.h b/drivers/clk/meson/gxbb.h
index 2139e97f5e3911..dc487180f84775 100644
--- a/drivers/clk/meson/gxbb.h
+++ b/drivers/clk/meson/gxbb.h
@@ -191,7 +191,7 @@
 #define CLKID_PERIPHS		  20
 #define CLKID_SPICC		  21
 /* CLKID_I2C */
-#define CLKID_SAR_ADC		  23
+/* #define CLKID_SAR_ADC */
 #define CLKID_SMART_CARD	  24
 #define CLKID_RNG0		  25
 #define CLKID_UART0		  26
@@ -237,7 +237,7 @@
 #define CLKID_MMC_PCLK		  66
 #define CLKID_DVIN		  67
 #define CLKID_UART2		  68
-#define CLKID_SANA		  69
+/* #define CLKID_SANA */
 #define CLKID_VPU_INTR		  70
 #define CLKID_SEC_AHB_AHB3_BRIDGE 71
 #define CLKID_CLK81_A53		  72
@@ -265,8 +265,11 @@
 /* CLKID_SD_EMMC_A */
 /* CLKID_SD_EMMC_B */
 /* CLKID_SD_EMMC_C */
+/* CLKID_SAR_ADC_CLK */
+/* CLKID_SAR_ADC_SEL */
+#define CLKID_SAR_ADC_DIV	  99
 
-#define NR_CLKS			  97
+#define NR_CLKS			  100
 
 /* include the CLKIDs that have been made part of the stable DT binding */
 #include <dt-bindings/clock/gxbb-clkc.h>
diff --git a/include/dt-bindings/clock/gxbb-clkc.h b/include/dt-bindings/clock/gxbb-clkc.h
index da1d473a5a3ad1..692846c7941b53 100644
--- a/include/dt-bindings/clock/gxbb-clkc.h
+++ b/include/dt-bindings/clock/gxbb-clkc.h
@@ -14,6 +14,7 @@
 #define CLKID_MPLL2		15
 #define CLKID_SPI		34
 #define CLKID_I2C		22
+#define CLKID_SAR_ADC		23
 #define CLKID_ETH		36
 #define CLKID_USB0		50
 #define CLKID_USB1		51
@@ -21,10 +22,13 @@
 #define CLKID_HDMI_PCLK		63
 #define CLKID_USB1_DDR_BRIDGE	64
 #define CLKID_USB0_DDR_BRIDGE	65
+#define CLKID_SANA		69
 #define CLKID_GCLK_VENCI_INT0	77
 #define CLKID_AO_I2C		93
 #define CLKID_SD_EMMC_A		94
 #define CLKID_SD_EMMC_B		95
 #define CLKID_SD_EMMC_C		96
+#define CLKID_SAR_ADC_CLK	97
+#define CLKID_SAR_ADC_SEL	98
 
 #endif /* __GXBB_CLKC_H */

From 7eea67101b9713ae438955e8899b3c4b078419f9 Mon Sep 17 00:00:00 2001
From: Kevin Hilman <khilman@baylibre.com>
Date: Fri, 20 Jan 2017 07:57:52 -0800
Subject: [PATCH 0034/1911] ARM64: dts: meson-gxl: rename Nexbox A95x for
 consistency
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since the GXL family has S905X and S905D SoCs, we're keeping the SoC
name in the DTS filename for clarity.  Rename this file accordingly to
be consistent with the rest of the GXL DTS files.

Cc: Neil Armstrong <narmstrong@baylibre.com>
Reviewed-by: Andreas Färber <afaerber@suse.de>
Signed-off-by: Kevin Hilman <khilman@baylibre.com>
---
 arch/arm64/boot/dts/amlogic/Makefile                            | 2 +-
 ...eson-gxl-nexbox-a95x.dts => meson-gxl-s905x-nexbox-a95x.dts} | 0
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename arch/arm64/boot/dts/amlogic/{meson-gxl-nexbox-a95x.dts => meson-gxl-s905x-nexbox-a95x.dts} (100%)

diff --git a/arch/arm64/boot/dts/amlogic/Makefile b/arch/arm64/boot/dts/amlogic/Makefile
index 293e985e3dfac1..3f94bce33b7f4a 100644
--- a/arch/arm64/boot/dts/amlogic/Makefile
+++ b/arch/arm64/boot/dts/amlogic/Makefile
@@ -10,7 +10,7 @@ dtb-$(CONFIG_ARCH_MESON) += meson-gxbb-wetek-play2.dtb
 dtb-$(CONFIG_ARCH_MESON) += meson-gxl-s905x-p212.dtb
 dtb-$(CONFIG_ARCH_MESON) += meson-gxl-s905d-p230.dtb
 dtb-$(CONFIG_ARCH_MESON) += meson-gxl-s905d-p231.dtb
-dtb-$(CONFIG_ARCH_MESON) += meson-gxl-nexbox-a95x.dtb
+dtb-$(CONFIG_ARCH_MESON) += meson-gxl-s905x-nexbox-a95x.dtb
 dtb-$(CONFIG_ARCH_MESON) += meson-gxm-q200.dtb
 dtb-$(CONFIG_ARCH_MESON) += meson-gxm-q201.dtb
 dtb-$(CONFIG_ARCH_MESON) += meson-gxm-nexbox-a1.dtb
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-nexbox-a95x.dts b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-nexbox-a95x.dts
similarity index 100%
rename from arch/arm64/boot/dts/amlogic/meson-gxl-nexbox-a95x.dts
rename to arch/arm64/boot/dts/amlogic/meson-gxl-s905x-nexbox-a95x.dts

From 2d3b74d3836901c8601e147b1ae24170f9fa2982 Mon Sep 17 00:00:00 2001
From: jbrunet <jbrunet@baylibre.com>
Date: Thu, 26 Jan 2017 17:05:49 +0100
Subject: [PATCH 0035/1911] clk: gxbb: fix CLKID_ETH defined twice

CLKID_ETH is define in the dt-bindings but has not be commented out in the
clock driver. Just do it now.

Fixes: 738f66d3211d ("clk: gxbb: add AmLogic GXBB clk controller driver")
Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
Acked-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Kevin Hilman <khilman@baylibre.com>
---
 drivers/clk/meson/gxbb.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clk/meson/gxbb.h b/drivers/clk/meson/gxbb.h
index dc487180f84775..8ee2022ce5d563 100644
--- a/drivers/clk/meson/gxbb.h
+++ b/drivers/clk/meson/gxbb.h
@@ -204,7 +204,7 @@
 #define CLKID_ASSIST_MISC	  33
 /* CLKID_SPI */
 #define CLKID_I2S_SPDIF		  35
-#define CLKID_ETH		  36
+/* CLKID_ETH */
 #define CLKID_DEMUX		  37
 #define CLKID_AIU_GLUE		  38
 #define CLKID_IEC958		  39

From e48512244f653960cfbb002597686362558533d1 Mon Sep 17 00:00:00 2001
From: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Date: Sun, 22 Jan 2017 22:05:26 +0100
Subject: [PATCH 0036/1911] ARM64: dts: meson-gx: add the missing pwm_AO_ab
 node

All Meson GX SoCs (GXBB, GXL and GXM) have a PWM controller within the
AO domain. When one of the board's LEDs is connected to one of the AO
PWM pins then this can be used to dim that LED (when the leds-pwm driver
is used).
Add the pwm_AO_ab to allow such devices to use the leds-pwm driver.

Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Signed-off-by: Kevin Hilman <khilman@baylibre.com>
---
 arch/arm64/boot/dts/amlogic/meson-gx.dtsi | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi
index 9212fca6eb94bc..9110dc1a248104 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi
@@ -332,6 +332,13 @@
 				status = "disabled";
 			};
 
+			pwm_AO_ab: pwm@550 {
+				compatible = "amlogic,meson-gx-pwm", "amlogic,meson-gxbb-pwm";
+				reg = <0x0 0x00550 0x0 0x10>;
+				#pwm-cells = <3>;
+				status = "disabled";
+			};
+
 			ir: ir@580 {
 				compatible = "amlogic,meson-gxbb-ir";
 				reg = <0x0 0x00580 0x0 0x40>;

From 249a2243e97edcd2cb0bef01a934d4ba21fb167f Mon Sep 17 00:00:00 2001
From: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Date: Sun, 22 Jan 2017 22:05:28 +0100
Subject: [PATCH 0037/1911] ARM64: dts: meson-gxl: add the pwm_ao_b pin

This adds the pwm_ao_b pin to allow boards which have an LED connected
to GPIOAO_9 to use the leds-pwm driver (by activating the pwm_AO_ab node
and passing the pwm_ao_b_pin pinctrl-reference).

Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Signed-off-by: Kevin Hilman <khilman@baylibre.com>
---
 arch/arm64/boot/dts/amlogic/meson-gxl.dtsi | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi
index bdf2305a2e25b4..5f1100af72b15f 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi
@@ -117,6 +117,13 @@
 				function = "remote_input_ao";
 			};
 		};
+
+		pwm_ao_b_pins: pwm_ao_b {
+			mux {
+				groups = "pwm_ao_b";
+				function = "pwm_ao_b";
+			};
+		};
 	};
 };
 

From f5b98461cb8167ba362ad9f74c41d126b7becea7 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Fri, 6 Jan 2017 19:32:01 +0100
Subject: [PATCH 0038/1911] random: use chacha20 for get_random_int/long

Now that our crng uses chacha20, we can rely on its speedy
characteristics for replacing MD5, while simultaneously achieving a
higher security guarantee. Before the idea was to use these functions if
you wanted random integers that aren't stupidly insecure but aren't
necessarily secure either, a vague gray zone, that hopefully was "good
enough" for its users. With chacha20, we can strengthen this claim,
since either we're using an rdrand-like instruction, or we're using the
same crng as /dev/urandom. And it's faster than what was before.

We could have chosen to replace this with a SipHash-derived function,
which might be slightly faster, but at the cost of having yet another
RNG construction in the kernel. By moving to chacha20, we have a single
RNG to analyze and verify, and we also already get good performance
improvements on all platforms.

Implementation-wise, rather than use a generic buffer for both
get_random_int/long and memcpy based on the size needs, we use a
specific buffer for 32-bit reads and for 64-bit reads. This way, we're
guaranteed to always have aligned accesses on all platforms. While
slightly more verbose in C, the assembly this generates is a lot
simpler than otherwise.

Finally, on 32-bit platforms where longs and ints are the same size,
we simply alias get_random_int to get_random_long.

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Suggested-by: Theodore Ts'o <tytso@mit.edu>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 drivers/char/random.c  | 84 +++++++++++++++++++++---------------------
 include/linux/random.h |  1 -
 init/main.c            |  1 -
 3 files changed, 43 insertions(+), 43 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 9d147d456598e2..b800e5479b7dc8 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -2016,63 +2016,65 @@ struct ctl_table random_table[] = {
 };
 #endif 	/* CONFIG_SYSCTL */
 
-static u32 random_int_secret[MD5_MESSAGE_BYTES / 4] ____cacheline_aligned;
-
-int random_int_secret_init(void)
-{
-	get_random_bytes(random_int_secret, sizeof(random_int_secret));
-	return 0;
-}
-
-static DEFINE_PER_CPU(__u32 [MD5_DIGEST_WORDS], get_random_int_hash)
-		__aligned(sizeof(unsigned long));
+struct batched_entropy {
+	union {
+		unsigned long entropy_long[CHACHA20_BLOCK_SIZE / sizeof(unsigned long)];
+		unsigned int entropy_int[CHACHA20_BLOCK_SIZE / sizeof(unsigned int)];
+	};
+	unsigned int position;
+};
 
 /*
- * Get a random word for internal kernel use only. Similar to urandom but
- * with the goal of minimal entropy pool depletion. As a result, the random
- * value is not cryptographically secure but for several uses the cost of
- * depleting entropy is too high
+ * Get a random word for internal kernel use only. The quality of the random
+ * number is either as good as RDRAND or as good as /dev/urandom, with the
+ * goal of being quite fast and not depleting entropy.
  */
-unsigned int get_random_int(void)
+static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_long);
+unsigned long get_random_long(void)
 {
-	__u32 *hash;
-	unsigned int ret;
+	unsigned long ret;
+	struct batched_entropy *batch;
 
-	if (arch_get_random_int(&ret))
+	if (arch_get_random_long(&ret))
 		return ret;
 
-	hash = get_cpu_var(get_random_int_hash);
-
-	hash[0] += current->pid + jiffies + random_get_entropy();
-	md5_transform(hash, random_int_secret);
-	ret = hash[0];
-	put_cpu_var(get_random_int_hash);
-
+	batch = &get_cpu_var(batched_entropy_long);
+	if (batch->position % ARRAY_SIZE(batch->entropy_long) == 0) {
+		extract_crng((u8 *)batch->entropy_long);
+		batch->position = 0;
+	}
+	ret = batch->entropy_long[batch->position++];
+	put_cpu_var(batched_entropy_long);
 	return ret;
 }
-EXPORT_SYMBOL(get_random_int);
+EXPORT_SYMBOL(get_random_long);
 
-/*
- * Same as get_random_int(), but returns unsigned long.
- */
-unsigned long get_random_long(void)
+#if BITS_PER_LONG == 32
+unsigned int get_random_int(void)
 {
-	__u32 *hash;
-	unsigned long ret;
+	return get_random_long();
+}
+#else
+static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_int);
+unsigned int get_random_int(void)
+{
+	unsigned int ret;
+	struct batched_entropy *batch;
 
-	if (arch_get_random_long(&ret))
+	if (arch_get_random_int(&ret))
 		return ret;
 
-	hash = get_cpu_var(get_random_int_hash);
-
-	hash[0] += current->pid + jiffies + random_get_entropy();
-	md5_transform(hash, random_int_secret);
-	ret = *(unsigned long *)hash;
-	put_cpu_var(get_random_int_hash);
-
+	batch = &get_cpu_var(batched_entropy_int);
+	if (batch->position % ARRAY_SIZE(batch->entropy_int) == 0) {
+		extract_crng((u8 *)batch->entropy_int);
+		batch->position = 0;
+	}
+	ret = batch->entropy_int[batch->position++];
+	put_cpu_var(batched_entropy_int);
 	return ret;
 }
-EXPORT_SYMBOL(get_random_long);
+#endif
+EXPORT_SYMBOL(get_random_int);
 
 /**
  * randomize_page - Generate a random, page aligned address
diff --git a/include/linux/random.h b/include/linux/random.h
index 7bd2403e4fef1a..16ab429735a783 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -37,7 +37,6 @@ extern void get_random_bytes(void *buf, int nbytes);
 extern int add_random_ready_callback(struct random_ready_callback *rdy);
 extern void del_random_ready_callback(struct random_ready_callback *rdy);
 extern void get_random_bytes_arch(void *buf, int nbytes);
-extern int random_int_secret_init(void);
 
 #ifndef MODULE
 extern const struct file_operations random_fops, urandom_fops;
diff --git a/init/main.c b/init/main.c
index b0c9d6facef9a5..09beb7fc6e8c7b 100644
--- a/init/main.c
+++ b/init/main.c
@@ -879,7 +879,6 @@ static void __init do_basic_setup(void)
 	do_ctors();
 	usermodehelper_enable();
 	do_initcalls();
-	random_int_secret_init();
 }
 
 static void __init do_pre_smp_initcalls(void)

From c440408cf6901eeb2c09563397e24a9097907078 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Sun, 22 Jan 2017 16:34:08 +0100
Subject: [PATCH 0039/1911] random: convert get_random_int/long into
 get_random_u32/u64

Many times, when a user wants a random number, he wants a random number
of a guaranteed size. So, thinking of get_random_int and get_random_long
in terms of get_random_u32 and get_random_u64 makes it much easier to
achieve this. It also makes the code simpler.

On 32-bit platforms, get_random_int and get_random_long are both aliased
to get_random_u32. On 64-bit platforms, int->u32 and long->u64.

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 drivers/char/random.c  | 55 +++++++++++++++++++++---------------------
 include/linux/random.h | 17 +++++++++++--
 2 files changed, 42 insertions(+), 30 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index b800e5479b7dc8..066ae125f2c83e 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -2018,8 +2018,8 @@ struct ctl_table random_table[] = {
 
 struct batched_entropy {
 	union {
-		unsigned long entropy_long[CHACHA20_BLOCK_SIZE / sizeof(unsigned long)];
-		unsigned int entropy_int[CHACHA20_BLOCK_SIZE / sizeof(unsigned int)];
+		u64 entropy_u64[CHACHA20_BLOCK_SIZE / sizeof(u64)];
+		u32 entropy_u32[CHACHA20_BLOCK_SIZE / sizeof(u32)];
 	};
 	unsigned int position;
 };
@@ -2029,52 +2029,51 @@ struct batched_entropy {
  * number is either as good as RDRAND or as good as /dev/urandom, with the
  * goal of being quite fast and not depleting entropy.
  */
-static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_long);
-unsigned long get_random_long(void)
+static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u64);
+u64 get_random_u64(void)
 {
-	unsigned long ret;
+	u64 ret;
 	struct batched_entropy *batch;
 
-	if (arch_get_random_long(&ret))
+#if BITS_PER_LONG == 64
+	if (arch_get_random_long((unsigned long *)&ret))
 		return ret;
+#else
+	if (arch_get_random_long((unsigned long *)&ret) &&
+	    arch_get_random_long((unsigned long *)&ret + 1))
+	    return ret;
+#endif
 
-	batch = &get_cpu_var(batched_entropy_long);
-	if (batch->position % ARRAY_SIZE(batch->entropy_long) == 0) {
-		extract_crng((u8 *)batch->entropy_long);
+	batch = &get_cpu_var(batched_entropy_u64);
+	if (batch->position % ARRAY_SIZE(batch->entropy_u64) == 0) {
+		extract_crng((u8 *)batch->entropy_u64);
 		batch->position = 0;
 	}
-	ret = batch->entropy_long[batch->position++];
-	put_cpu_var(batched_entropy_long);
+	ret = batch->entropy_u64[batch->position++];
+	put_cpu_var(batched_entropy_u64);
 	return ret;
 }
-EXPORT_SYMBOL(get_random_long);
+EXPORT_SYMBOL(get_random_u64);
 
-#if BITS_PER_LONG == 32
-unsigned int get_random_int(void)
-{
-	return get_random_long();
-}
-#else
-static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_int);
-unsigned int get_random_int(void)
+static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u32);
+u32 get_random_u32(void)
 {
-	unsigned int ret;
+	u32 ret;
 	struct batched_entropy *batch;
 
 	if (arch_get_random_int(&ret))
 		return ret;
 
-	batch = &get_cpu_var(batched_entropy_int);
-	if (batch->position % ARRAY_SIZE(batch->entropy_int) == 0) {
-		extract_crng((u8 *)batch->entropy_int);
+	batch = &get_cpu_var(batched_entropy_u32);
+	if (batch->position % ARRAY_SIZE(batch->entropy_u32) == 0) {
+		extract_crng((u8 *)batch->entropy_u32);
 		batch->position = 0;
 	}
-	ret = batch->entropy_int[batch->position++];
-	put_cpu_var(batched_entropy_int);
+	ret = batch->entropy_u32[batch->position++];
+	put_cpu_var(batched_entropy_u32);
 	return ret;
 }
-#endif
-EXPORT_SYMBOL(get_random_int);
+EXPORT_SYMBOL(get_random_u32);
 
 /**
  * randomize_page - Generate a random, page aligned address
diff --git a/include/linux/random.h b/include/linux/random.h
index 16ab429735a783..ed5c3838780de5 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -42,8 +42,21 @@ extern void get_random_bytes_arch(void *buf, int nbytes);
 extern const struct file_operations random_fops, urandom_fops;
 #endif
 
-unsigned int get_random_int(void);
-unsigned long get_random_long(void);
+u32 get_random_u32(void);
+u64 get_random_u64(void);
+static inline unsigned int get_random_int(void)
+{
+	return get_random_u32();
+}
+static inline unsigned long get_random_long(void)
+{
+#if BITS_PER_LONG == 64
+	return get_random_u64();
+#else
+	return get_random_u32();
+#endif
+}
+
 unsigned long randomize_page(unsigned long start, unsigned long range);
 
 u32 prandom_u32(void);

From bd80ef5ed46233d90f216805ff40aba326ff1bdd Mon Sep 17 00:00:00 2001
From: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Date: Sun, 22 Jan 2017 19:17:14 +0100
Subject: [PATCH 0040/1911] ARM64: dts: meson: meson-gx: add the SAR ADC
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add the SAR ADC to meson-gxbb.dtsi and meson-gxl.dtsi. GXBB provides a
10-bit ADC while GXL and GXM provide a 12-bit ADC.
Some boards use resistor ladder buttons connected through one of the ADC
channels. On newer devices (GXL and GXM) some boards use pull-ups/downs
to change the resistance (and thus the ADC value) on one of the ADC
channels to indicate the board revision.

Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Tested-by: Neil Armstrong <narmstrong@baylibre.com>
Reviewed-by: Andreas Färber <afaerber@suse.de>
Signed-off-by: Kevin Hilman <khilman@baylibre.com>
---
 arch/arm64/boot/dts/amlogic/meson-gx.dtsi   |  8 ++++++++
 arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi | 10 ++++++++++
 arch/arm64/boot/dts/amlogic/meson-gxl.dtsi  | 10 ++++++++++
 arch/arm64/boot/dts/amlogic/meson-gxm.dtsi  |  4 ++++
 4 files changed, 32 insertions(+)

diff --git a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi
index 9110dc1a248104..f100d0b013b417 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi
@@ -237,6 +237,14 @@
 				status = "disabled";
 			};
 
+			saradc: adc@8680 {
+				compatible = "amlogic,meson-saradc";
+				reg = <0x0 0x8680 0x0 0x34>;
+				#io-channel-cells = <1>;
+				interrupts = <GIC_SPI 73 IRQ_TYPE_EDGE_RISING>;
+				status = "disabled";
+			};
+
 			pwm_ef: pwm@86c0 {
 				compatible = "amlogic,meson-gx-pwm", "amlogic,meson-gxbb-pwm";
 				reg = <0x0 0x086c0 0x0 0x10>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi
index 39a774ad83ce13..04b3324bc1329d 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi
@@ -490,6 +490,16 @@
 	clocks = <&clkc CLKID_I2C>;
 };
 
+&saradc {
+	compatible = "amlogic,meson-gxbb-saradc", "amlogic,meson-saradc";
+	clocks = <&xtal>,
+		 <&clkc CLKID_SAR_ADC>,
+		 <&clkc CLKID_SANA>,
+		 <&clkc CLKID_SAR_ADC_CLK>,
+		 <&clkc CLKID_SAR_ADC_SEL>;
+	clock-names = "clkin", "core", "sana", "adc_clk", "adc_sel";
+};
+
 &sd_emmc_a {
 	clocks = <&clkc CLKID_SD_EMMC_A>,
 		 <&xtal>,
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi
index 5f1100af72b15f..fe11b5fc61f78e 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi
@@ -347,6 +347,16 @@
 	clocks = <&clkc CLKID_I2C>;
 };
 
+&saradc {
+	compatible = "amlogic,meson-gxl-saradc", "amlogic,meson-saradc";
+	clocks = <&xtal>,
+		 <&clkc CLKID_SAR_ADC>,
+		 <&clkc CLKID_SANA>,
+		 <&clkc CLKID_SAR_ADC_CLK>,
+		 <&clkc CLKID_SAR_ADC_SEL>;
+	clock-names = "clkin", "core", "sana", "adc_clk", "adc_sel";
+};
+
 &sd_emmc_a {
 	clocks = <&clkc CLKID_SD_EMMC_A>,
 		 <&xtal>,
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxm.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxm.dtsi
index 4c55665a253f40..ddea7305c644af 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxm.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gxm.dtsi
@@ -117,6 +117,10 @@
 	};
 };
 
+&saradc {
+	compatible = "amlogic,meson-gxm-saradc", "amlogic,meson-saradc";
+};
+
 &scpi_dvfs {
 	clock-indices = <0 1>;
 	clock-output-names = "vbig", "vlittle";

From 6b6a1867667a90f399762dc1ee6add5b4aa646c9 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Mon, 16 Jan 2017 11:22:45 +0100
Subject: [PATCH 0041/1911] ARM64: dts: meson-gxbb-p200: add ADC laddered keys

Add the 5 buttons connected to a resistor laddered matrix and sampled
by the SAR ADC channel 0.

Only the p200 board has these buttons, the P201 doesn't.

Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Signed-off-by: Kevin Hilman <khilman@baylibre.com>
---
 .../boot/dts/amlogic/meson-gxbb-p200.dts      | 50 +++++++++++++++++++
 1 file changed, 50 insertions(+)

diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-p200.dts b/arch/arm64/boot/dts/amlogic/meson-gxbb-p200.dts
index 03e3d76626ddc4..fc0e86cb4cdedc 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxbb-p200.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-p200.dts
@@ -45,10 +45,55 @@
 /dts-v1/;
 
 #include "meson-gxbb-p20x.dtsi"
+#include <dt-bindings/input/input.h>
 
 / {
 	compatible = "amlogic,p200", "amlogic,meson-gxbb";
 	model = "Amlogic Meson GXBB P200 Development Board";
+
+	avdd18_usb_adc: regulator-avdd18_usb_adc {
+		compatible = "regulator-fixed";
+		regulator-name = "AVDD18_USB_ADC";
+		regulator-min-microvolt = <1800000>;
+		regulator-max-microvolt = <1800000>;
+	};
+
+	adc_keys {
+		compatible = "adc-keys";
+		io-channels = <&saradc 0>;
+		io-channel-names = "buttons";
+		keyup-threshold-microvolt = <1800000>;
+
+		button-home {
+			label = "Home";
+			linux,code = <KEY_HOME>;
+			press-threshold-microvolt = <900000>; /* 50% */
+		};
+
+		button-esc {
+			label = "Esc";
+			linux,code = <KEY_ESC>;
+			press-threshold-microvolt = <684000>; /* 38% */
+		};
+
+		button-up {
+			label = "Volume Up";
+			linux,code = <KEY_VOLUMEUP>;
+			press-threshold-microvolt = <468000>; /* 26% */
+		};
+
+		button-down {
+			label = "Volume Down";
+			linux,code = <KEY_VOLUMEDOWN>;
+			press-threshold-microvolt = <252000>; /* 14% */
+		};
+
+		button-menu {
+			label = "Menu";
+			linux,code = <KEY_MENU>;
+			press-threshold-microvolt = <0>; /* 0% */
+		};
+	};
 };
 
 &i2c_B {
@@ -56,3 +101,8 @@
 	pinctrl-0 = <&i2c_b_pins>;
 	pinctrl-names = "default";
 };
+
+&saradc {
+	status = "okay";
+	vref-supply = <&avdd18_usb_adc>;
+};

From d1160ebff1fea7ac7f6e6533b43144d377b56e6f Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Mon, 30 Jan 2017 11:57:36 +0100
Subject: [PATCH 0042/1911] arm64: dts: exynos: Add initial configuration for
 DISP clocks for TM2/TM2e

Add initial clock configuration for display subsystem for Exynos5433
based TM2/TM2e boards in device tree in order to avoid dependency on the
configuration left by the bootloader. This initial configuration is also
needed to ensure that display subsystem is operational if display power
domain gets turned off before clock controller is probed and the inital
clock configuration left by the bootloader saved.

TM2 and TM2e uses different rate for DISP PLL clock, but for better
maintainability all 'assigned-clocks-*' properties for DISP CMU are
defines in each board dts instead of redefining the rates property.

Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Reviewed-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
---
 .../dts/exynos/exynos5433-tm2-common.dtsi     | 12 -------
 arch/arm64/boot/dts/exynos/exynos5433-tm2.dts | 34 +++++++++++++++++++
 .../arm64/boot/dts/exynos/exynos5433-tm2e.dts | 34 +++++++++++++++++++
 3 files changed, 68 insertions(+), 12 deletions(-)

diff --git a/arch/arm64/boot/dts/exynos/exynos5433-tm2-common.dtsi b/arch/arm64/boot/dts/exynos/exynos5433-tm2-common.dtsi
index 53fd0683d4001b..098ad557fee325 100644
--- a/arch/arm64/boot/dts/exynos/exynos5433-tm2-common.dtsi
+++ b/arch/arm64/boot/dts/exynos/exynos5433-tm2-common.dtsi
@@ -217,18 +217,6 @@
 	assigned-clock-parents = <&cmu_top CLK_FOUT_AUD_PLL>;
 };
 
-&cmu_disp {
-	assigned-clocks = <&cmu_mif CLK_MOUT_SCLK_DECON_TV_ECLK_A>,
-			  <&cmu_mif CLK_DIV_SCLK_DECON_TV_ECLK>,
-			  <&cmu_disp CLK_MOUT_SCLK_DECON_TV_ECLK_USER>,
-			  <&cmu_disp CLK_MOUT_SCLK_DECON_TV_ECLK>;
-	assigned-clock-parents = <&cmu_mif CLK_MOUT_BUS_PLL_DIV2>,
-				 <0>,
-				 <&cmu_mif CLK_SCLK_DECON_TV_ECLK_DISP>,
-				 <&cmu_disp CLK_MOUT_SCLK_DECON_TV_ECLK_USER>;
-	assigned-clock-rates = <0>, <400000000>;
-};
-
 &cmu_fsys {
 	assigned-clocks = <&cmu_top CLK_MOUT_SCLK_USBDRD30>,
 		<&cmu_top CLK_MOUT_SCLK_USBHOST30>,
diff --git a/arch/arm64/boot/dts/exynos/exynos5433-tm2.dts b/arch/arm64/boot/dts/exynos/exynos5433-tm2.dts
index ddba2f889326b1..dea0a6f5bc18f0 100644
--- a/arch/arm64/boot/dts/exynos/exynos5433-tm2.dts
+++ b/arch/arm64/boot/dts/exynos/exynos5433-tm2.dts
@@ -18,6 +18,40 @@
 	compatible = "samsung,tm2", "samsung,exynos5433";
 };
 
+&cmu_disp {
+	/*
+	 * TM2 and TM2e differ only by DISP_PLL rate, but define all assigned
+	 * clocks properties for DISP CMU for each board to keep them together
+	 * for easier review and maintenance.
+	 */
+	assigned-clocks = <&cmu_disp CLK_FOUT_DISP_PLL>,
+			  <&cmu_mif CLK_DIV_SCLK_DECON_TV_ECLK>,
+			  <&cmu_disp CLK_MOUT_ACLK_DISP_333_USER>,
+			  <&cmu_disp CLK_MOUT_SCLK_DSIM0_USER>,
+			  <&cmu_disp CLK_MOUT_SCLK_DSIM0>,
+			  <&cmu_disp CLK_MOUT_SCLK_DECON_ECLK_USER>,
+			  <&cmu_disp CLK_MOUT_SCLK_DECON_ECLK>,
+			  <&cmu_disp CLK_MOUT_PHYCLK_MIPIDPHY0_RXCLKESC0_USER>,
+			  <&cmu_disp CLK_MOUT_PHYCLK_MIPIDPHY0_BITCLKDIV8_USER>,
+			  <&cmu_disp CLK_MOUT_DISP_PLL>,
+			  <&cmu_mif CLK_MOUT_SCLK_DECON_TV_ECLK_A>,
+			  <&cmu_disp CLK_MOUT_SCLK_DECON_TV_ECLK_USER>,
+			  <&cmu_disp CLK_MOUT_SCLK_DECON_TV_ECLK>;
+	assigned-clock-parents = <0>, <0>,
+				 <&cmu_mif CLK_ACLK_DISP_333>,
+				 <&cmu_mif CLK_SCLK_DSIM0_DISP>,
+				 <&cmu_disp CLK_MOUT_SCLK_DSIM0_USER>,
+				 <&cmu_mif CLK_SCLK_DECON_ECLK_DISP>,
+				 <&cmu_disp CLK_MOUT_SCLK_DECON_ECLK_USER>,
+				 <&cmu_disp CLK_PHYCLK_MIPIDPHY0_RXCLKESC0_PHY>,
+				 <&cmu_disp CLK_PHYCLK_MIPIDPHY0_BITCLKDIV8_PHY>,
+				 <&cmu_disp CLK_FOUT_DISP_PLL>,
+				 <&cmu_mif CLK_MOUT_BUS_PLL_DIV2>,
+				 <&cmu_mif CLK_SCLK_DECON_TV_ECLK_DISP>,
+				 <&cmu_disp CLK_MOUT_SCLK_DECON_TV_ECLK_USER>;
+	assigned-clock-rates = <250000000>, <400000000>;
+};
+
 &hsi2c_9 {
 	status = "okay";
 
diff --git a/arch/arm64/boot/dts/exynos/exynos5433-tm2e.dts b/arch/arm64/boot/dts/exynos/exynos5433-tm2e.dts
index 2fbf3a86031689..7891a31adc1759 100644
--- a/arch/arm64/boot/dts/exynos/exynos5433-tm2e.dts
+++ b/arch/arm64/boot/dts/exynos/exynos5433-tm2e.dts
@@ -18,6 +18,40 @@
 	compatible = "samsung,tm2e", "samsung,exynos5433";
 };
 
+&cmu_disp {
+	/*
+	 * TM2 and TM2e differ only by DISP_PLL rate, but define all assigned
+	 * clocks properties for DISP CMU for each board to keep them together
+	 * for easier review and maintenance.
+	 */
+	assigned-clocks = <&cmu_disp CLK_FOUT_DISP_PLL>,
+			  <&cmu_mif CLK_DIV_SCLK_DECON_TV_ECLK>,
+			  <&cmu_disp CLK_MOUT_ACLK_DISP_333_USER>,
+			  <&cmu_disp CLK_MOUT_SCLK_DSIM0_USER>,
+			  <&cmu_disp CLK_MOUT_SCLK_DSIM0>,
+			  <&cmu_disp CLK_MOUT_SCLK_DECON_ECLK_USER>,
+			  <&cmu_disp CLK_MOUT_SCLK_DECON_ECLK>,
+			  <&cmu_disp CLK_MOUT_PHYCLK_MIPIDPHY0_RXCLKESC0_USER>,
+			  <&cmu_disp CLK_MOUT_PHYCLK_MIPIDPHY0_BITCLKDIV8_USER>,
+			  <&cmu_disp CLK_MOUT_DISP_PLL>,
+			  <&cmu_mif CLK_MOUT_SCLK_DECON_TV_ECLK_A>,
+			  <&cmu_disp CLK_MOUT_SCLK_DECON_TV_ECLK_USER>,
+			  <&cmu_disp CLK_MOUT_SCLK_DECON_TV_ECLK>;
+	assigned-clock-parents = <0>, <0>,
+				 <&cmu_mif CLK_ACLK_DISP_333>,
+				 <&cmu_mif CLK_SCLK_DSIM0_DISP>,
+				 <&cmu_disp CLK_MOUT_SCLK_DSIM0_USER>,
+				 <&cmu_mif CLK_SCLK_DECON_ECLK_DISP>,
+				 <&cmu_disp CLK_MOUT_SCLK_DECON_ECLK_USER>,
+				 <&cmu_disp CLK_PHYCLK_MIPIDPHY0_RXCLKESC0_PHY>,
+				 <&cmu_disp CLK_PHYCLK_MIPIDPHY0_BITCLKDIV8_PHY>,
+				 <&cmu_disp CLK_FOUT_DISP_PLL>,
+				 <&cmu_mif CLK_MOUT_BUS_PLL_DIV2>,
+				 <&cmu_mif CLK_SCLK_DECON_TV_ECLK_DISP>,
+				 <&cmu_disp CLK_MOUT_SCLK_DECON_TV_ECLK_USER>;
+	assigned-clock-rates = <278000000>, <400000000>;
+};
+
 &ldo31_reg {
 	regulator-name = "TSP_VDD_1.8V_AP";
 	regulator-min-microvolt = <1800000>;

From 860ac88abecab5d3d0e07306365f55cbc690c220 Mon Sep 17 00:00:00 2001
From: Pankaj Dubey <pankaj.dubey@samsung.com>
Date: Fri, 20 Jan 2017 09:49:56 +0530
Subject: [PATCH 0043/1911] pinctrl: dt-bindings: samsung: Add Exynos7 specific
 pinctrl macro definitions

Exynos7 SoC pinctrl configurations are similar to existing Exynos4/5 except
for FSYS1 pinctrl drive strengths. So adding Exynos7 specific FSYS1 blocks
pinctrl driver strength related macros which will be used in Exynos7
DTSi files.

Signed-off-by: Pankaj Dubey <pankaj.dubey@samsung.com>
Reviewed-by: Alim Akhtar <alim.akhtar@samsung.com>
Acked-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
---
 include/dt-bindings/pinctrl/samsung.h | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/include/dt-bindings/pinctrl/samsung.h b/include/dt-bindings/pinctrl/samsung.h
index e0ebb20ffdd3a7..b7aa3646208b1c 100644
--- a/include/dt-bindings/pinctrl/samsung.h
+++ b/include/dt-bindings/pinctrl/samsung.h
@@ -68,4 +68,12 @@
 #define EXYNOS_PIN_FUNC_6		6
 #define EXYNOS_PIN_FUNC_F		0xf
 
+/* Drive strengths for Exynos7 FSYS1 block */
+#define EXYNOS7_FSYS1_PIN_DRV_LV1	0
+#define EXYNOS7_FSYS1_PIN_DRV_LV2	4
+#define EXYNOS7_FSYS1_PIN_DRV_LV3	2
+#define EXYNOS7_FSYS1_PIN_DRV_LV4	6
+#define EXYNOS7_FSYS1_PIN_DRV_LV5	1
+#define EXYNOS7_FSYS1_PIN_DRV_LV6	5
+
 #endif /* __DT_BINDINGS_PINCTRL_SAMSUNG_H__ */

From 51a2de551755c28c98191f80537acd3c7e1adda4 Mon Sep 17 00:00:00 2001
From: Pankaj Dubey <pankaj.dubey@samsung.com>
Date: Fri, 20 Jan 2017 09:49:57 +0530
Subject: [PATCH 0044/1911] arm64: dts: exynos: Use macros for pinctrl
 configuration on Exynos7

Usage of DTS macros instead of hard-coded numbers makes code easier to
read.  One does not have to remember which value means pull-up/down or
specific driver strength.

Signed-off-by: Pankaj Dubey <pankaj.dubey@samsung.com>
Reviewed-by: Alim Akhtar <alim.akhtar@samsung.com>
Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
---
 .../boot/dts/exynos/exynos7-espresso.dts      |   4 +-
 .../boot/dts/exynos/exynos7-pinctrl.dtsi      | 302 +++++++++---------
 2 files changed, 154 insertions(+), 152 deletions(-)

diff --git a/arch/arm64/boot/dts/exynos/exynos7-espresso.dts b/arch/arm64/boot/dts/exynos/exynos7-espresso.dts
index c528dd52ba2d39..25d9b4aa0ff652 100644
--- a/arch/arm64/boot/dts/exynos/exynos7-espresso.dts
+++ b/arch/arm64/boot/dts/exynos/exynos7-espresso.dts
@@ -328,8 +328,8 @@
 &pinctrl_alive {
 	pmic_irq: pmic-irq {
 		samsung,pins = "gpa0-2";
-		samsung,pin-pud = <3>;
-		samsung,pin-drv = <3>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_UP>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV4>;
 	};
 };
 
diff --git a/arch/arm64/boot/dts/exynos/exynos7-pinctrl.dtsi b/arch/arm64/boot/dts/exynos/exynos7-pinctrl.dtsi
index 7ebb93927f136a..8f58850cd28cdd 100644
--- a/arch/arm64/boot/dts/exynos/exynos7-pinctrl.dtsi
+++ b/arch/arm64/boot/dts/exynos/exynos7-pinctrl.dtsi
@@ -12,6 +12,8 @@
  * published by the Free Software Foundation.
 */
 
+#include <dt-bindings/pinctrl/samsung.h>
+
 &pinctrl_alive {
 	gpa0: gpa0 {
 		gpio-controller;
@@ -187,163 +189,163 @@
 
 	hs_i2c10_bus: hs-i2c10-bus {
 		samsung,pins = "gpb0-1", "gpb0-0";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <3>;
-		samsung,pin-drv = <0>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_UP>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV1>;
 	};
 
 	hs_i2c11_bus: hs-i2c11-bus {
 		samsung,pins = "gpb0-3", "gpb0-2";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <3>;
-		samsung,pin-drv = <0>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_UP>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV1>;
 	};
 
 	hs_i2c2_bus: hs-i2c2-bus {
 		samsung,pins = "gpd0-3", "gpd0-2";
-		samsung,pin-function = <3>;
-		samsung,pin-pud = <3>;
-		samsung,pin-drv = <0>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_3>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_UP>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV1>;
 	};
 
 	uart0_data: uart0-data {
 		samsung,pins = "gpd0-0", "gpd0-1";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <0>;
-		samsung,pin-drv = <0>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_NONE>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV1>;
 	};
 
 	uart0_fctl: uart0-fctl {
 		samsung,pins = "gpd0-2", "gpd0-3";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <0>;
-		samsung,pin-drv = <0>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_NONE>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV1>;
 	};
 
 	uart2_data: uart2-data {
 		samsung,pins = "gpd1-4", "gpd1-5";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <0>;
-		samsung,pin-drv = <0>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_NONE>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV1>;
 	};
 
 	hs_i2c3_bus: hs-i2c3-bus {
 		samsung,pins = "gpd1-3", "gpd1-2";
-		samsung,pin-function = <3>;
-		samsung,pin-pud = <3>;
-		samsung,pin-drv = <0>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_3>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_UP>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV1>;
 	};
 
 	uart1_data: uart1-data {
 		samsung,pins = "gpd1-0", "gpd1-1";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <0>;
-		samsung,pin-drv = <0>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_NONE>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV1>;
 	};
 
 	uart1_fctl: uart1-fctl {
 		samsung,pins = "gpd1-2", "gpd1-3";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <0>;
-		samsung,pin-drv = <0>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_NONE>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV1>;
 	};
 
 	hs_i2c0_bus: hs-i2c0-bus {
 		samsung,pins = "gpd2-1", "gpd2-0";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <3>;
-		samsung,pin-drv = <0>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_UP>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV1>;
 	};
 
 	hs_i2c1_bus: hs-i2c1-bus {
 		samsung,pins = "gpd2-3", "gpd2-2";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <3>;
-		samsung,pin-drv = <0>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_UP>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV1>;
 	};
 
 	hs_i2c9_bus: hs-i2c9-bus {
 		samsung,pins = "gpd2-7", "gpd2-6";
-		samsung,pin-function = <3>;
-		samsung,pin-pud = <3>;
-		samsung,pin-drv = <0>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_3>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_UP>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV1>;
 	};
 
 	pwm0_out: pwm0-out {
 		samsung,pins = "gpd2-4";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <0>;
-		samsung,pin-drv = <0>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_NONE>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV1>;
 	};
 
 	pwm1_out: pwm1-out {
 		samsung,pins = "gpd2-5";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <0>;
-		samsung,pin-drv = <0>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_NONE>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV1>;
 	};
 
 	pwm2_out: pwm2-out {
 		samsung,pins = "gpd2-6";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <0>;
-		samsung,pin-drv = <0>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_NONE>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV1>;
 	};
 
 	pwm3_out: pwm3-out {
 		samsung,pins = "gpd2-7";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <0>;
-		samsung,pin-drv = <0>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_NONE>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV1>;
 	};
 
 	hs_i2c8_bus: hs-i2c8-bus {
 		samsung,pins = "gpd5-3", "gpd5-2";
-		samsung,pin-function = <3>;
-		samsung,pin-pud = <3>;
-		samsung,pin-drv = <0>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_3>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_UP>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV1>;
 	};
 
 	uart3_data: uart3-data {
 		samsung,pins = "gpd5-0", "gpd5-1";
-		samsung,pin-function = <3>;
-		samsung,pin-pud = <0>;
-		samsung,pin-drv = <0>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_3>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_NONE>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV1>;
 	};
 
 	spi2_bus: spi2-bus {
 		samsung,pins = "gpd5-0", "gpd5-1", "gpd5-2", "gpd5-3";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <3>;
-		samsung,pin-drv = <0>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_UP>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV1>;
 	};
 
 	spi1_bus: spi1-bus {
 		samsung,pins = "gpd6-2", "gpd6-3", "gpd6-4", "gpd6-5";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <3>;
-		samsung,pin-drv = <0>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_UP>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV1>;
 	};
 
 	spi0_bus: spi0-bus {
 		samsung,pins = "gpd8-0", "gpd8-1", "gpd6-0", "gpd6-1";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <3>;
-		samsung,pin-drv = <0>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_UP>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV1>;
 	};
 
 	hs_i2c4_bus: hs-i2c4-bus {
 		samsung,pins = "gpg3-1", "gpg3-0";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <3>;
-		samsung,pin-drv = <0>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_UP>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV1>;
 	};
 
 	hs_i2c5_bus: hs-i2c5-bus {
 		samsung,pins = "gpg3-3", "gpg3-2";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <3>;
-		samsung,pin-drv = <0>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_UP>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV1>;
 	};
 };
 
@@ -358,9 +360,9 @@
 
 	hs_i2c6_bus: hs-i2c6-bus {
 		samsung,pins = "gpj0-1", "gpj0-0";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <3>;
-		samsung,pin-drv = <0>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_UP>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV1>;
 	};
 };
 
@@ -375,9 +377,9 @@
 
 	hs_i2c7_bus: hs-i2c7-bus {
 		samsung,pins = "gpj1-1", "gpj1-0";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <3>;
-		samsung,pin-drv = <0>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_UP>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV1>;
 	};
 };
 
@@ -392,9 +394,9 @@
 
 	spi3_bus: spi3-bus {
 		samsung,pins = "gpg4-0", "gpg4-1", "gpg4-2", "gpg4-3";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <3>;
-		samsung,pin-drv = <0>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_UP>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV1>;
 	};
 };
 
@@ -409,9 +411,9 @@
 
 	spi4_bus: spi4-bus {
 		samsung,pins = "gpv7-0", "gpv7-1", "gpv7-2", "gpv7-3";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <3>;
-		samsung,pin-drv = <0>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_UP>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV1>;
 	};
 };
 
@@ -426,37 +428,37 @@
 
 	sd2_clk: sd2-clk {
 		samsung,pins = "gpr4-0";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <0>;
-		samsung,pin-drv = <3>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_NONE>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV4>;
 	};
 
 	sd2_cmd: sd2-cmd {
 		samsung,pins = "gpr4-1";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <0>;
-		samsung,pin-drv = <3>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_NONE>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV4>;
 	};
 
 	sd2_cd: sd2-cd {
 		samsung,pins = "gpr4-2";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <3>;
-		samsung,pin-drv = <3>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_UP>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV4>;
 	};
 
 	sd2_bus1: sd2-bus-width1 {
 		samsung,pins = "gpr4-3";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <3>;
-		samsung,pin-drv = <3>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_UP>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV4>;
 	};
 
 	sd2_bus4: sd2-bus-width4 {
 		samsung,pins = "gpr4-4", "gpr4-5", "gpr4-6";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <3>;
-		samsung,pin-drv = <3>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_UP>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV4>;
 	};
 };
 
@@ -495,107 +497,107 @@
 
 	sd0_clk: sd0-clk {
 		samsung,pins = "gpr0-0";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <0>;
-		samsung,pin-drv = <4>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_NONE>;
+		samsung,pin-drv = <EXYNOS7_FSYS1_PIN_DRV_LV2>;
 	};
 
 	sd0_cmd: sd0-cmd {
 		samsung,pins = "gpr0-1";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <3>;
-		samsung,pin-drv = <4>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_UP>;
+		samsung,pin-drv = <EXYNOS7_FSYS1_PIN_DRV_LV2>;
 	};
 
 	sd0_ds: sd0-ds {
 		samsung,pins = "gpr0-2";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <1>;
-		samsung,pin-drv = <4>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_DOWN>;
+		samsung,pin-drv = <EXYNOS7_FSYS1_PIN_DRV_LV2>;
 	};
 
 	sd0_qrdy: sd0-qrdy {
 		samsung,pins = "gpr0-3";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <1>;
-		samsung,pin-drv = <4>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_DOWN>;
+		samsung,pin-drv = <EXYNOS7_FSYS1_PIN_DRV_LV2>;
 	};
 
 	sd0_bus1: sd0-bus-width1 {
 		samsung,pins = "gpr1-0";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <3>;
-		samsung,pin-drv = <4>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_UP>;
+		samsung,pin-drv = <EXYNOS7_FSYS1_PIN_DRV_LV2>;
 	};
 
 	sd0_bus4: sd0-bus-width4 {
 		samsung,pins = "gpr1-1", "gpr1-2", "gpr1-3";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <3>;
-		samsung,pin-drv = <4>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_UP>;
+		samsung,pin-drv = <EXYNOS7_FSYS1_PIN_DRV_LV2>;
 	};
 
 	sd0_bus8: sd0-bus-width8 {
 		samsung,pins = "gpr1-4", "gpr1-5", "gpr1-6", "gpr1-7";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <3>;
-		samsung,pin-drv = <4>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_UP>;
+		samsung,pin-drv = <EXYNOS7_FSYS1_PIN_DRV_LV2>;
 	};
 
 	sd1_clk: sd1-clk {
 		samsung,pins = "gpr2-0";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <0>;
-		samsung,pin-drv = <2>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_NONE>;
+		samsung,pin-drv = <EXYNOS7_FSYS1_PIN_DRV_LV3>;
 	};
 
 	sd1_cmd: sd1-cmd {
 		samsung,pins = "gpr2-1";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <0>;
-		samsung,pin-drv = <2>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_NONE>;
+		samsung,pin-drv = <EXYNOS7_FSYS1_PIN_DRV_LV3>;
 	};
 
 	sd1_ds: sd1-ds {
 		samsung,pins = "gpr2-2";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <1>;
-		samsung,pin-drv = <6>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_DOWN>;
+		samsung,pin-drv = <EXYNOS7_FSYS1_PIN_DRV_LV4>;
 	};
 
 	sd1_qrdy: sd1-qrdy {
 		samsung,pins = "gpr2-3";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <1>;
-		samsung,pin-drv = <6>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_DOWN>;
+		samsung,pin-drv = <EXYNOS7_FSYS1_PIN_DRV_LV4>;
 	};
 
 	sd1_int: sd1-int {
 		samsung,pins = "gpr2-4";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <1>;
-		samsung,pin-drv = <6>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_DOWN>;
+		samsung,pin-drv = <EXYNOS7_FSYS1_PIN_DRV_LV4>;
 	};
 
 	sd1_bus1: sd1-bus-width1 {
 		samsung,pins = "gpr3-0";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <3>;
-		samsung,pin-drv = <2>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_UP>;
+		samsung,pin-drv = <EXYNOS7_FSYS1_PIN_DRV_LV3>;
 	};
 
 	sd1_bus4: sd1-bus-width4 {
 		samsung,pins = "gpr3-1", "gpr3-2", "gpr3-3";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <3>;
-		samsung,pin-drv = <2>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_UP>;
+		samsung,pin-drv = <EXYNOS7_FSYS1_PIN_DRV_LV3>;
 	};
 
 	sd1_bus8: sd1-bus-width8 {
 		samsung,pins = "gpr3-4", "gpr3-5", "gpr3-6", "gpr3-7";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <3>;
-		samsung,pin-drv = <2>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_UP>;
+		samsung,pin-drv = <EXYNOS7_FSYS1_PIN_DRV_LV3>;
 	};
 };
 
@@ -682,22 +684,22 @@
 
 	spi5_bus: spi5-bus {
 		samsung,pins = "gpf2-0", "gpf2-1", "gpf2-2", "gpf2-3";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <3>;
-		samsung,pin-drv = <0>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_UP>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV1>;
 	};
 
 	ufs_refclk_out: ufs-refclk-out {
 		samsung,pins = "gpg2-4";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <0>;
-		samsung,pin-drv = <2>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_NONE>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV2>;
 	};
 
 	ufs_rst_n: ufs-rst-n {
 		samsung,pins = "gph1-5";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <3>;
-		samsung,pin-drv = <0>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_UP>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV1>;
 	};
 };

From ad6afec832a77845a9a0f9353fe049225a976b16 Mon Sep 17 00:00:00 2001
From: Vivek Gautam <gautamvivek1987@gmail.com>
Date: Wed, 18 Jan 2017 12:02:52 +0530
Subject: [PATCH 0045/1911] arm64: dts: exynos: Add USB 3.0 controller node for
 Exynos7

Add USB 3.0 DRD controller device node, with its clock
and phy information to enable the same on Exynos7.

Signed-off-by: Vivek Gautam <gautamvivek1987@gmail.com>
Signed-off-by: Pankaj Dubey <pankaj.dubey@samsung.com>
Reviewed-by: Javier Martinez Canillas <javier@osg.samsung.com>
Reviewed-by: Alim Akhtar <alim.akhtar@samsung.com>
Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
---
 arch/arm64/boot/dts/exynos/exynos7.dtsi | 34 +++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/arch/arm64/boot/dts/exynos/exynos7.dtsi b/arch/arm64/boot/dts/exynos/exynos7.dtsi
index 80aa60e38237a2..9a3fbed1765af6 100644
--- a/arch/arm64/boot/dts/exynos/exynos7.dtsi
+++ b/arch/arm64/boot/dts/exynos/exynos7.dtsi
@@ -603,6 +603,40 @@
 				#include "exynos7-trip-points.dtsi"
 			};
 		};
+
+		usbdrd_phy: phy@15500000 {
+			compatible = "samsung,exynos7-usbdrd-phy";
+			reg = <0x15500000 0x100>;
+			clocks = <&clock_fsys0 ACLK_USBDRD300>,
+			       <&clock_fsys0 OSCCLK_PHY_CLKOUT_USB30_PHY>,
+			       <&clock_fsys0 PHYCLK_USBDRD300_UDRD30_PIPE_PCLK_USER>,
+			       <&clock_fsys0 PHYCLK_USBDRD300_UDRD30_PHYCLK_USER>,
+			       <&clock_fsys0 SCLK_USBDRD300_REFCLK>;
+			clock-names = "phy", "ref", "phy_pipe",
+				"phy_utmi", "itp";
+			samsung,pmu-syscon = <&pmu_system_controller>;
+			#phy-cells = <1>;
+		};
+
+		usbdrd3 {
+			compatible = "samsung,exynos7-dwusb3";
+			clocks = <&clock_fsys0 ACLK_USBDRD300>,
+			       <&clock_fsys0 SCLK_USBDRD300_SUSPENDCLK>,
+			       <&clock_fsys0 ACLK_AXIUS_USBDRD30X_FSYS0X>;
+			clock-names = "usbdrd30", "usbdrd30_susp_clk",
+				"usbdrd30_axius_clk";
+			#address-cells = <1>;
+			#size-cells = <1>;
+			ranges;
+
+			dwc3@15400000 {
+				compatible = "snps,dwc3";
+				reg = <0x15400000 0x10000>;
+				interrupts = <GIC_SPI 223 IRQ_TYPE_LEVEL_HIGH>;
+				phys = <&usbdrd_phy 0>, <&usbdrd_phy 1>;
+				phy-names = "usb2-phy", "usb3-phy";
+			};
+		};
 	};
 };
 

From 6629490aa027a8af3160e63cffcddc97c1602c96 Mon Sep 17 00:00:00 2001
From: Vivek Gautam <gautamvivek1987@gmail.com>
Date: Wed, 18 Jan 2017 12:02:53 +0530
Subject: [PATCH 0046/1911] arm64: dts: exynos: Add regulators for Vbus and
 Vbus-Boost

Adding fixed voltage regulators for Vbus and Vbus-boost required
by USB 3.0 DRD controller on Exynos7-espresso board.

Signed-off-by: Vivek Gautam <gautamvivek1987@gmail.com>
Signed-off-by: Pankaj Dubey <pankaj.dubey@samsung.com>
Reviewed-by: Javier Martinez Canillas <javier@osg.samsung.com>
Reviewed-by: Alim Akhtar <alim.akhtar@samsung.com>
Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
---
 .../boot/dts/exynos/exynos7-espresso.dts      | 45 +++++++++++++++++++
 1 file changed, 45 insertions(+)

diff --git a/arch/arm64/boot/dts/exynos/exynos7-espresso.dts b/arch/arm64/boot/dts/exynos/exynos7-espresso.dts
index 25d9b4aa0ff652..e5892bb0ae6e55 100644
--- a/arch/arm64/boot/dts/exynos/exynos7-espresso.dts
+++ b/arch/arm64/boot/dts/exynos/exynos7-espresso.dts
@@ -13,6 +13,7 @@
 #include "exynos7.dtsi"
 #include <dt-bindings/interrupt-controller/irq.h>
 #include <dt-bindings/clock/samsung,s2mps11.h>
+#include <dt-bindings/gpio/gpio.h>
 
 / {
 	model = "Samsung Exynos7 Espresso board based on EXYNOS7";
@@ -32,6 +33,29 @@
 		device_type = "memory";
 		reg = <0x0 0x40000000 0x0 0xC0000000>;
 	};
+
+	usb30_vbus_reg: regulator-usb30 {
+		compatible = "regulator-fixed";
+		regulator-name = "VBUS_5V";
+		regulator-min-microvolt = <5000000>;
+		regulator-max-microvolt = <5000000>;
+		gpio = <&gph1 1 GPIO_ACTIVE_HIGH>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&usb30_vbus_en>;
+		enable-active-high;
+	};
+
+	usb3drd_boost_5v: regulator-usb3drd-boost {
+		compatible = "regulator-fixed";
+		regulator-name = "VUSB_VBUS_5V";
+		regulator-min-microvolt = <5000000>;
+		regulator-max-microvolt = <5000000>;
+		gpio = <&gpf4 1 GPIO_ACTIVE_HIGH>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&usb3drd_boost_en>;
+		enable-active-high;
+	};
+
 };
 
 &fin_pll {
@@ -365,3 +389,24 @@
 	vqmmc-supply = <&ldo2_reg>;
 	disable-wp;
 };
+
+&pinctrl_bus1 {
+	usb30_vbus_en: usb30-vbus-en {
+		samsung,pins = "gph1-1";
+		samsung,pin-function = <EXYNOS_PIN_FUNC_OUTPUT>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_NONE>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV1>;
+	};
+
+	usb3drd_boost_en: usb3drd-boost-en {
+		samsung,pins = "gpf4-1";
+		samsung,pin-function = <EXYNOS_PIN_FUNC_OUTPUT>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_NONE>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV1>;
+	};
+};
+
+&usbdrd_phy {
+	vbus-supply = <&usb30_vbus_reg>;
+	vbus-boost-supply = <&usb3drd_boost_5v>;
+};

From 17fa2dcbd6325ab877651bef04fca9dd828a2758 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 3 Feb 2017 18:01:23 +0100
Subject: [PATCH 0047/1911] iio: adc: handle unknow of_device_id data

If we get an unknown 'childmode' value, a number of variables are not
initialized properly:

drivers/iio/adc/rcar-gyroadc.c: In function 'rcar_gyroadc_probe':
drivers/iio/adc/rcar-gyroadc.c:390:5: error: 'num_channels' may be used uninitialized in this function [-Werror=maybe-uninitialized]
drivers/iio/adc/rcar-gyroadc.c:426:22: error: 'sample_width' may be used uninitialized in this function [-Werror=maybe-uninitialized]
drivers/iio/adc/rcar-gyroadc.c:428:23: error: 'channels' may be used uninitialized in this function [-Werror=maybe-uninitialized]

The driver is currently correct, but handling this properly is more robust
for possible modifications.

There is also a false-positive warning about adcmode being possibly uninitialized,
but that cannot happen as we also check the 'first' flag:

drivers/iio/adc/rcar-gyroadc.c:398:26: error: 'adcmode' may be used uninitialized in this function [-Werror=maybe-uninitialized]

This adds an initialization for 'adcmode' and bails out for any unknown childmode.

Fixes: 059c53b32329 ("iio: adc: Add Renesas GyroADC driver")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Marek Vasut <marek.vasut@gmail.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/adc/rcar-gyroadc.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/iio/adc/rcar-gyroadc.c b/drivers/iio/adc/rcar-gyroadc.c
index 0c44f72c32a859..018ed360e717cd 100644
--- a/drivers/iio/adc/rcar-gyroadc.c
+++ b/drivers/iio/adc/rcar-gyroadc.c
@@ -336,7 +336,7 @@ static int rcar_gyroadc_parse_subdevs(struct iio_dev *indio_dev)
 	struct device_node *child;
 	struct regulator *vref;
 	unsigned int reg;
-	unsigned int adcmode, childmode;
+	unsigned int adcmode = -1, childmode;
 	unsigned int sample_width;
 	unsigned int num_channels;
 	int ret, first = 1;
@@ -366,6 +366,8 @@ static int rcar_gyroadc_parse_subdevs(struct iio_dev *indio_dev)
 			channels = rcar_gyroadc_iio_channels_3;
 			num_channels = ARRAY_SIZE(rcar_gyroadc_iio_channels_3);
 			break;
+		default:
+			return -EINVAL;
 		}
 
 		/*

From db61ffe3a71c697aaa91c42c862a5f7557a0e562 Mon Sep 17 00:00:00 2001
From: Fabio Estevam <fabio.estevam@nxp.com>
Date: Tue, 31 Jan 2017 14:36:07 -0200
Subject: [PATCH 0048/1911] random: move random_min_urandom_seed into
 CONFIG_SYSCTL ifdef block

Building arm allnodefconfig causes the following build warning:

drivers/char/random.c:318:12: warning: 'random_min_urandom_seed' defined but not used [-Wunused-variable]

Fix the warning by moving 'random_min_urandom_seed' declaration inside
the CONFIG_SYSCTL ifdef block, where it is actually used.

While at it, remove the comment prior to the variable declaration.

Signed-off-by: Fabio Estevam <fabio.estevam@nxp.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 drivers/char/random.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 066ae125f2c83e..0ab0249189072b 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -312,11 +312,6 @@ static int random_read_wakeup_bits = 64;
  */
 static int random_write_wakeup_bits = 28 * OUTPUT_POOL_WORDS;
 
-/*
- * Variable is currently unused by left for user space compatibility.
- */
-static int random_min_urandom_seed = 60;
-
 /*
  * Originally, we used a primitive polynomial of degree .poolwords
  * over GF(2).  The taps for various sizes are defined below.  They
@@ -1886,6 +1881,7 @@ SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count,
 static int min_read_thresh = 8, min_write_thresh;
 static int max_read_thresh = OUTPUT_POOL_WORDS * 32;
 static int max_write_thresh = INPUT_POOL_WORDS * 32;
+static int random_min_urandom_seed = 60;
 static char sysctl_bootid[16];
 
 /*

From af7bd4dc13093bf1477f370722bbab24cf457b91 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Tue, 17 Jan 2017 06:34:52 +0200
Subject: [PATCH 0049/1911] vfs: create vfs helper vfs_tmpfile()

Factor out some common vfs bits from do_tmpfile()
to be used by overlayfs for concurrent copy up.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/namei.c         | 68 +++++++++++++++++++++++++++++-----------------
 include/linux/fs.h |  3 ++
 2 files changed, 46 insertions(+), 25 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index ad74877e1442c0..7d87699c3e2ee1 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3353,13 +3353,50 @@ static int do_last(struct nameidata *nd,
 	return error;
 }
 
+struct dentry *vfs_tmpfile(struct dentry *dentry, umode_t mode, int open_flag)
+{
+	static const struct qstr name = QSTR_INIT("/", 1);
+	struct dentry *child = NULL;
+	struct inode *dir = dentry->d_inode;
+	struct inode *inode;
+	int error;
+
+	/* we want directory to be writable */
+	error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
+	if (error)
+		goto out_err;
+	error = -EOPNOTSUPP;
+	if (!dir->i_op->tmpfile)
+		goto out_err;
+	error = -ENOMEM;
+	child = d_alloc(dentry, &name);
+	if (unlikely(!child))
+		goto out_err;
+	error = dir->i_op->tmpfile(dir, child, mode);
+	if (error)
+		goto out_err;
+	error = -ENOENT;
+	inode = child->d_inode;
+	if (unlikely(!inode))
+		goto out_err;
+	if (!(open_flag & O_EXCL)) {
+		spin_lock(&inode->i_lock);
+		inode->i_state |= I_LINKABLE;
+		spin_unlock(&inode->i_lock);
+	}
+	return child;
+
+out_err:
+	dput(child);
+	return ERR_PTR(error);
+}
+EXPORT_SYMBOL(vfs_tmpfile);
+
 static int do_tmpfile(struct nameidata *nd, unsigned flags,
 		const struct open_flags *op,
 		struct file *file, int *opened)
 {
-	static const struct qstr name = QSTR_INIT("/", 1);
 	struct dentry *child;
-	struct inode *dir;
 	struct path path;
 	int error = path_lookupat(nd, flags | LOOKUP_DIRECTORY, &path);
 	if (unlikely(error))
@@ -3367,25 +3404,12 @@ static int do_tmpfile(struct nameidata *nd, unsigned flags,
 	error = mnt_want_write(path.mnt);
 	if (unlikely(error))
 		goto out;
-	dir = path.dentry->d_inode;
-	/* we want directory to be writable */
-	error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
-	if (error)
-		goto out2;
-	if (!dir->i_op->tmpfile) {
-		error = -EOPNOTSUPP;
-		goto out2;
-	}
-	child = d_alloc(path.dentry, &name);
-	if (unlikely(!child)) {
-		error = -ENOMEM;
+	child = vfs_tmpfile(path.dentry, op->mode, op->open_flag);
+	error = PTR_ERR(child);
+	if (unlikely(IS_ERR(child)))
 		goto out2;
-	}
 	dput(path.dentry);
 	path.dentry = child;
-	error = dir->i_op->tmpfile(dir, child, op->mode);
-	if (error)
-		goto out2;
 	audit_inode(nd->name, child, 0);
 	/* Don't check for other permissions, the inode was just created */
 	error = may_open(&path, 0, op->open_flag);
@@ -3396,14 +3420,8 @@ static int do_tmpfile(struct nameidata *nd, unsigned flags,
 	if (error)
 		goto out2;
 	error = open_check_o_direct(file);
-	if (error) {
+	if (error)
 		fput(file);
-	} else if (!(op->open_flag & O_EXCL)) {
-		struct inode *inode = file_inode(file);
-		spin_lock(&inode->i_lock);
-		inode->i_state |= I_LINKABLE;
-		spin_unlock(&inode->i_lock);
-	}
 out2:
 	mnt_drop_write(path.mnt);
 out:
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2ba074328894ce..4a7f3cc9edab4b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1561,6 +1561,9 @@ extern int vfs_unlink(struct inode *, struct dentry *, struct inode **);
 extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **, unsigned int);
 extern int vfs_whiteout(struct inode *, struct dentry *);
 
+extern struct dentry *vfs_tmpfile(struct dentry *dentry, umode_t mode,
+				  int open_flag);
+
 /*
  * VFS file helper functions.
  */

From 9e79b1326302589a035fe20e8cce7c1a7d8333ed Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Tue, 31 Jan 2017 10:34:55 +0200
Subject: [PATCH 0050/1911] vfs: deny fallocate() on directory

There was an obscure use case of fallocate of directory inode
in the vfs helper with the comment:
"Let individual file system decide if it supports preallocation
 for directories or not."

But there is no in-tree file system that implements fallocate
for directory operations.

Deny an attempt to fallocate a directory with EISDIR error.

This change is needed prior to converting sb_start_write()
to  file_start_write(), so freeze protection is correctly
handled for cases of fallocate file and blockdev.

Cc: linux-api@vger.kernel.org
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/open.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/fs/open.c b/fs/open.c
index 9921f70bc5ca07..f9118f4113e7e4 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -301,12 +301,10 @@ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 	if (S_ISFIFO(inode->i_mode))
 		return -ESPIPE;
 
-	/*
-	 * Let individual file system decide if it supports preallocation
-	 * for directories or not.
-	 */
-	if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode) &&
-	    !S_ISBLK(inode->i_mode))
+	if (S_ISDIR(inode->i_mode))
+		return -EISDIR;
+
+	if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))
 		return -ENODEV;
 
 	/* Check for wrap through zero too */

From 11cbfb10775aa2a01cee966d118049ede9d0bdf2 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Tue, 31 Jan 2017 10:34:56 +0200
Subject: [PATCH 0051/1911] vfs: deny copy_file_range() for non regular files

There is no in-tree file system that implements copy_file_range()
for non regular files.

Deny an attempt to copy_file_range() a directory with EISDIR
and any other non regualr file with EINVAL to conform with
behavior of vfs_{clone,dedup}_file_range().

This change is needed prior to converting sb_start_write()
to  file_start_write() in the vfs helper.

Cc: linux-api@vger.kernel.org
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/read_write.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/fs/read_write.c b/fs/read_write.c
index 5816d4c4cab09c..511178d7723ba9 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1518,6 +1518,11 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
 	if (flags != 0)
 		return -EINVAL;
 
+	if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
+		return -EISDIR;
+	if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
+		return -EINVAL;
+
 	ret = rw_verify_area(READ, file_in, &pos_in, len);
 	if (unlikely(ret))
 		return ret;

From bfe219d373cadab761373aeea4c70406bc27ea2c Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Tue, 31 Jan 2017 10:34:57 +0200
Subject: [PATCH 0052/1911] vfs: wrap write f_ops with file_{start,end}_write()

Before calling write f_ops, call file_start_write() instead
of sb_start_write().

Replace {sb,file}_start_write() for {copy,clone}_file_range() and
for fallocate().

Beyond correct semantics, this avoids freeze protection to sb when
operating on special inodes, such as fallocate() on a blockdev.

Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/open.c          |  4 ++--
 fs/read_write.c    |  4 ++--
 include/linux/fs.h | 26 +++++++++++++-------------
 3 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/fs/open.c b/fs/open.c
index f9118f4113e7e4..949cef29c3bba9 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -314,7 +314,7 @@ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 	if (!file->f_op->fallocate)
 		return -EOPNOTSUPP;
 
-	sb_start_write(inode->i_sb);
+	file_start_write(file);
 	ret = file->f_op->fallocate(file, mode, offset, len);
 
 	/*
@@ -327,7 +327,7 @@ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 	if (ret == 0)
 		fsnotify_modify(file);
 
-	sb_end_write(inode->i_sb);
+	file_end_write(file);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(vfs_fallocate);
diff --git a/fs/read_write.c b/fs/read_write.c
index 511178d7723ba9..820a3f06c46a1a 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1543,7 +1543,7 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
 	if (len == 0)
 		return 0;
 
-	sb_start_write(inode_out->i_sb);
+	file_start_write(file_out);
 
 	/*
 	 * Try cloning first, this is supported by more file systems, and
@@ -1579,7 +1579,7 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
 	inc_syscr(current);
 	inc_syscw(current);
 
-	sb_end_write(inode_out->i_sb);
+	file_end_write(file_out);
 
 	return ret;
 }
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4a7f3cc9edab4b..78c81e6f5d76f7 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1741,19 +1741,6 @@ extern int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
 extern int vfs_dedupe_file_range(struct file *file,
 				 struct file_dedupe_range *same);
 
-static inline int do_clone_file_range(struct file *file_in, loff_t pos_in,
-				      struct file *file_out, loff_t pos_out,
-				      u64 len)
-{
-	int ret;
-
-	sb_start_write(file_inode(file_out)->i_sb);
-	ret = vfs_clone_file_range(file_in, pos_in, file_out, pos_out, len);
-	sb_end_write(file_inode(file_out)->i_sb);
-
-	return ret;
-}
-
 struct super_operations {
    	struct inode *(*alloc_inode)(struct super_block *sb);
 	void (*destroy_inode)(struct inode *);
@@ -2564,6 +2551,19 @@ static inline void file_end_write(struct file *file)
 	__sb_end_write(file_inode(file)->i_sb, SB_FREEZE_WRITE);
 }
 
+static inline int do_clone_file_range(struct file *file_in, loff_t pos_in,
+				      struct file *file_out, loff_t pos_out,
+				      u64 len)
+{
+	int ret;
+
+	file_start_write(file_out);
+	ret = vfs_clone_file_range(file_in, pos_in, file_out, pos_out, len);
+	file_end_write(file_out);
+
+	return ret;
+}
+
 /*
  * get_write_access() gets write permission for a file.
  * put_write_access() releases this write permission.

From e7f52429b4a5b2e3224948d1737eb264c8f7e15f Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Tue, 17 Jan 2017 06:34:53 +0200
Subject: [PATCH 0053/1911] ovl: check if upperdir fs supports O_TMPFILE

This is needed for choosing between concurrent copyup
using O_TMPFILE and legacy copyup using workdir+rename.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/overlayfs.h |  9 +++++++++
 fs/overlayfs/ovl_entry.h |  1 +
 fs/overlayfs/super.c     | 10 ++++++++++
 3 files changed, 20 insertions(+)

diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 8af450b0e57a2d..3822a909ce1f41 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -127,6 +127,15 @@ static inline int ovl_do_whiteout(struct inode *dir, struct dentry *dentry)
 	return err;
 }
 
+static inline struct dentry *ovl_do_tmpfile(struct dentry *dentry, umode_t mode)
+{
+	struct dentry *ret = vfs_tmpfile(dentry, mode, 0);
+	int err = IS_ERR(ret) ? PTR_ERR(ret) : 0;
+
+	pr_debug("tmpfile(%pd2, 0%o) = %i\n", dentry, mode, err);
+	return ret;
+}
+
 static inline struct inode *ovl_inode_real(struct inode *inode, bool *is_upper)
 {
 	unsigned long x = (unsigned long) READ_ONCE(inode->i_private);
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index d14bca1850d95a..65f240001aa698 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -27,6 +27,7 @@ struct ovl_fs {
 	struct ovl_config config;
 	/* creds of process who forced instantiation of super block */
 	const struct cred *creator_cred;
+	bool tmpfile;
 };
 
 /* private information held for every overlayfs dentry */
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 20f48abbb82fd3..ff05065b510f7f 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -825,6 +825,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 		 * creation of workdir in previous step.
 		 */
 		if (ufs->workdir) {
+			struct dentry *temp;
+
 			err = ovl_check_d_type_supported(&workpath);
 			if (err < 0)
 				goto out_put_workdir;
@@ -836,6 +838,14 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 			 */
 			if (!err)
 				pr_warn("overlayfs: upper fs needs to support d_type.\n");
+
+			/* Check if upper/work fs supports O_TMPFILE */
+			temp = ovl_do_tmpfile(ufs->workdir, S_IFREG | 0);
+			ufs->tmpfile = !IS_ERR(temp);
+			if (ufs->tmpfile)
+				dput(temp);
+			else
+				pr_warn("overlayfs: upper fs does not support tmpfile.\n");
 		}
 	}
 

From 42f269b925405d9dd45014823e5057786d6ca452 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Tue, 17 Jan 2017 06:34:54 +0200
Subject: [PATCH 0054/1911] ovl: rearrange code in ovl_copy_up_locked()

As preparation to implementing copy up with O_TMPFILE,
name the variable for dentry before final rename 'temp' and
assign it to 'newdentry' only after rename.

Also lookup upper dentry before looking up temp dentry and
move ovl_set_timestamps() into ovl_copy_up_locked(), because
that is going to be more convenient for upcoming change.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/copy_up.c | 49 +++++++++++++++++++++---------------------
 1 file changed, 25 insertions(+), 24 deletions(-)

diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index f57043dace6287..01e332725e94da 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -232,12 +232,14 @@ int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
 
 static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
 			      struct dentry *dentry, struct path *lowerpath,
-			      struct kstat *stat, const char *link)
+			      struct kstat *stat, const char *link,
+			      struct kstat *pstat)
 {
 	struct inode *wdir = workdir->d_inode;
 	struct inode *udir = upperdir->d_inode;
 	struct dentry *newdentry = NULL;
 	struct dentry *upper = NULL;
+	struct dentry *temp = NULL;
 	int err;
 	const struct cred *old_creds = NULL;
 	struct cred *new_creds = NULL;
@@ -248,25 +250,25 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
 		.link = link
 	};
 
-	newdentry = ovl_lookup_temp(workdir, dentry);
-	err = PTR_ERR(newdentry);
-	if (IS_ERR(newdentry))
-		goto out;
-
 	upper = lookup_one_len(dentry->d_name.name, upperdir,
 			       dentry->d_name.len);
 	err = PTR_ERR(upper);
 	if (IS_ERR(upper))
-		goto out1;
+		goto out;
 
 	err = security_inode_copy_up(dentry, &new_creds);
 	if (err < 0)
-		goto out2;
+		goto out1;
 
 	if (new_creds)
 		old_creds = override_creds(new_creds);
 
-	err = ovl_create_real(wdir, newdentry, &cattr, NULL, true);
+	temp = ovl_lookup_temp(workdir, dentry);
+	err = PTR_ERR(temp);
+	if (IS_ERR(temp))
+		goto out1;
+
+	err = ovl_create_real(wdir, temp, &cattr, NULL, true);
 
 	if (new_creds) {
 		revert_creds(old_creds);
@@ -281,39 +283,42 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
 
 		ovl_path_upper(dentry, &upperpath);
 		BUG_ON(upperpath.dentry != NULL);
-		upperpath.dentry = newdentry;
+		upperpath.dentry = temp;
 
 		err = ovl_copy_up_data(lowerpath, &upperpath, stat->size);
 		if (err)
 			goto out_cleanup;
 	}
 
-	err = ovl_copy_xattr(lowerpath->dentry, newdentry);
+	err = ovl_copy_xattr(lowerpath->dentry, temp);
 	if (err)
 		goto out_cleanup;
 
-	inode_lock(newdentry->d_inode);
-	err = ovl_set_attr(newdentry, stat);
-	inode_unlock(newdentry->d_inode);
+	inode_lock(temp->d_inode);
+	err = ovl_set_attr(temp, stat);
+	inode_unlock(temp->d_inode);
 	if (err)
 		goto out_cleanup;
 
-	err = ovl_do_rename(wdir, newdentry, udir, upper, 0);
+	err = ovl_do_rename(wdir, temp, udir, upper, 0);
 	if (err)
 		goto out_cleanup;
 
+	newdentry = dget(temp);
 	ovl_dentry_update(dentry, newdentry);
 	ovl_inode_update(d_inode(dentry), d_inode(newdentry));
-	newdentry = NULL;
+
+	/* Restore timestamps on parent (best effort) */
+	ovl_set_timestamps(upperdir, pstat);
 out2:
-	dput(upper);
+	dput(temp);
 out1:
-	dput(newdentry);
+	dput(upper);
 out:
 	return err;
 
 out_cleanup:
-	ovl_cleanup(wdir, newdentry);
+	ovl_cleanup(wdir, temp);
 	goto out2;
 }
 
@@ -368,11 +373,7 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
 	}
 
 	err = ovl_copy_up_locked(workdir, upperdir, dentry, lowerpath,
-				 stat, link);
-	if (!err) {
-		/* Restore timestamps on parent (best effort) */
-		ovl_set_timestamps(upperdir, &pstat);
-	}
+				 stat, link, &pstat);
 out_unlock:
 	unlock_rename(workdir, upperdir);
 	do_delayed_call(&done);

From d8514d8edb5b045cf7f708e14f888ce760d60f0b Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Tue, 17 Jan 2017 06:34:55 +0200
Subject: [PATCH 0055/1911] ovl: copy up regular file using O_TMPFILE

In preparation for concurrent copy up, implement copy up
of regular file as O_TMPFILE that is linked to upperdir
instead of a file in workdir that is moved to upperdir.

Suggested-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/copy_up.c | 27 ++++++++++++++++++++-------
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 01e332725e94da..6e39e90b560523 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -20,6 +20,7 @@
 #include <linux/fdtable.h>
 #include <linux/ratelimit.h>
 #include "overlayfs.h"
+#include "ovl_entry.h"
 
 #define OVL_COPY_UP_CHUNK_SIZE (1 << 20)
 
@@ -233,7 +234,7 @@ int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
 static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
 			      struct dentry *dentry, struct path *lowerpath,
 			      struct kstat *stat, const char *link,
-			      struct kstat *pstat)
+			      struct kstat *pstat, bool tmpfile)
 {
 	struct inode *wdir = workdir->d_inode;
 	struct inode *udir = upperdir->d_inode;
@@ -263,12 +264,17 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
 	if (new_creds)
 		old_creds = override_creds(new_creds);
 
-	temp = ovl_lookup_temp(workdir, dentry);
+	if (tmpfile)
+		temp = ovl_do_tmpfile(upperdir, stat->mode);
+	else
+		temp = ovl_lookup_temp(workdir, dentry);
 	err = PTR_ERR(temp);
 	if (IS_ERR(temp))
 		goto out1;
 
-	err = ovl_create_real(wdir, temp, &cattr, NULL, true);
+	err = 0;
+	if (!tmpfile)
+		err = ovl_create_real(wdir, temp, &cattr, NULL, true);
 
 	if (new_creds) {
 		revert_creds(old_creds);
@@ -300,11 +306,14 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
 	if (err)
 		goto out_cleanup;
 
-	err = ovl_do_rename(wdir, temp, udir, upper, 0);
+	if (tmpfile)
+		err = ovl_do_link(temp, udir, upper, true);
+	else
+		err = ovl_do_rename(wdir, temp, udir, upper, 0);
 	if (err)
 		goto out_cleanup;
 
-	newdentry = dget(temp);
+	newdentry = dget(tmpfile ? upper : temp);
 	ovl_dentry_update(dentry, newdentry);
 	ovl_inode_update(d_inode(dentry), d_inode(newdentry));
 
@@ -318,7 +327,8 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
 	return err;
 
 out_cleanup:
-	ovl_cleanup(wdir, temp);
+	if (!tmpfile)
+		ovl_cleanup(wdir, temp);
 	goto out2;
 }
 
@@ -342,6 +352,9 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
 	struct dentry *lowerdentry = lowerpath->dentry;
 	struct dentry *upperdir;
 	const char *link = NULL;
+	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
+	/* Should we copyup with O_TMPFILE or with workdir? */
+	bool tmpfile = S_ISREG(stat->mode) && ofs->tmpfile;
 
 	if (WARN_ON(!workdir))
 		return -EROFS;
@@ -373,7 +386,7 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
 	}
 
 	err = ovl_copy_up_locked(workdir, upperdir, dentry, lowerpath,
-				 stat, link, &pstat);
+				 stat, link, &pstat, tmpfile);
 out_unlock:
 	unlock_rename(workdir, upperdir);
 	do_delayed_call(&done);

From 39d3d60a54df05a1a32fa71159d7a26a530dee6c Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Tue, 17 Jan 2017 06:34:56 +0200
Subject: [PATCH 0056/1911] ovl: introduce copy up waitqueue

The overlay sb 'copyup_wq' and overlay inode 'copying' condition
variable are about to replace the upper sb rename_lock, as finer
grained synchronization objects for concurrent copy up.

Suggested-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/overlayfs.h |  2 ++
 fs/overlayfs/ovl_entry.h |  2 ++
 fs/overlayfs/super.c     |  1 +
 fs/overlayfs/util.c      | 30 ++++++++++++++++++++++++++++++
 4 files changed, 35 insertions(+)

diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 3822a909ce1f41..741dc0b6931fe9 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -178,6 +178,8 @@ void ovl_dentry_version_inc(struct dentry *dentry);
 u64 ovl_dentry_version_get(struct dentry *dentry);
 bool ovl_is_whiteout(struct dentry *dentry);
 struct file *ovl_path_open(struct path *path, int flags);
+int ovl_copy_up_start(struct dentry *dentry);
+void ovl_copy_up_end(struct dentry *dentry);
 
 /* namei.c */
 int ovl_path_next(int idx, struct dentry *dentry, struct path *path);
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index 65f240001aa698..59614faa14c315 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -28,6 +28,7 @@ struct ovl_fs {
 	/* creds of process who forced instantiation of super block */
 	const struct cred *creator_cred;
 	bool tmpfile;
+	wait_queue_head_t copyup_wq;
 };
 
 /* private information held for every overlayfs dentry */
@@ -39,6 +40,7 @@ struct ovl_entry {
 			u64 version;
 			const char *redirect;
 			bool opaque;
+			bool copying;
 		};
 		struct rcu_head rcu;
 	};
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index ff05065b510f7f..6792bb70b4ac13 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -708,6 +708,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 	if (!ufs)
 		goto out;
 
+	init_waitqueue_head(&ufs->copyup_wq);
 	ufs->config.redirect_dir = ovl_redirect_dir_def;
 	err = ovl_parse_opt((char *) data, &ufs->config);
 	if (err)
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 952286f4826cc5..01157d6e8cfe05 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -263,3 +263,33 @@ struct file *ovl_path_open(struct path *path, int flags)
 {
 	return dentry_open(path, flags | O_NOATIME, current_cred());
 }
+
+int ovl_copy_up_start(struct dentry *dentry)
+{
+	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
+	struct ovl_entry *oe = dentry->d_fsdata;
+	int err;
+
+	spin_lock(&ofs->copyup_wq.lock);
+	err = wait_event_interruptible_locked(ofs->copyup_wq, !oe->copying);
+	if (!err) {
+		if (oe->__upperdentry)
+			err = 1; /* Already copied up */
+		else
+			oe->copying = true;
+	}
+	spin_unlock(&ofs->copyup_wq.lock);
+
+	return err;
+}
+
+void ovl_copy_up_end(struct dentry *dentry)
+{
+	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
+	struct ovl_entry *oe = dentry->d_fsdata;
+
+	spin_lock(&ofs->copyup_wq.lock);
+	oe->copying = false;
+	wake_up_locked(&ofs->copyup_wq);
+	spin_unlock(&ofs->copyup_wq.lock);
+}

From 01ad3eb8a07385bc8849f0ee7c800e7c8bd7287e Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Tue, 17 Jan 2017 06:34:57 +0200
Subject: [PATCH 0057/1911] ovl: concurrent copy up of regular files

Now that copy up of regular file is done using O_TMPFILE,
we don't need to hold rename_lock throughout copy up.

Use the copy up waitqueue to synchronize concurrent copy up
of the same file. Different regular files can be copied up
concurrently.

The upper dir inode_lock is taken instead of rename_lock,
because it is needed for lookup and later for linking the
temp file, but it is released while copying up data.

Suggested-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/copy_up.c | 35 +++++++++++++++++++++++++++++++----
 1 file changed, 31 insertions(+), 4 deletions(-)

diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 6e39e90b560523..48eb8812ac5b22 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -291,7 +291,16 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
 		BUG_ON(upperpath.dentry != NULL);
 		upperpath.dentry = temp;
 
-		err = ovl_copy_up_data(lowerpath, &upperpath, stat->size);
+		if (tmpfile) {
+			inode_unlock(udir);
+			err = ovl_copy_up_data(lowerpath, &upperpath,
+					       stat->size);
+			inode_lock_nested(udir, I_MUTEX_PARENT);
+		} else {
+			err = ovl_copy_up_data(lowerpath, &upperpath,
+					       stat->size);
+		}
+
 		if (err)
 			goto out_cleanup;
 	}
@@ -353,8 +362,6 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
 	struct dentry *upperdir;
 	const char *link = NULL;
 	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
-	/* Should we copyup with O_TMPFILE or with workdir? */
-	bool tmpfile = S_ISREG(stat->mode) && ofs->tmpfile;
 
 	if (WARN_ON(!workdir))
 		return -EROFS;
@@ -374,6 +381,25 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
 			return PTR_ERR(link);
 	}
 
+	/* Should we copyup with O_TMPFILE or with workdir? */
+	if (S_ISREG(stat->mode) && ofs->tmpfile) {
+		err = ovl_copy_up_start(dentry);
+		/* err < 0: interrupted, err > 0: raced with another copy-up */
+		if (unlikely(err)) {
+			pr_debug("ovl_copy_up_start(%pd2) = %i\n", dentry, err);
+			if (err > 0)
+				err = 0;
+			goto out_done;
+		}
+
+		inode_lock_nested(upperdir->d_inode, I_MUTEX_PARENT);
+		err = ovl_copy_up_locked(workdir, upperdir, dentry, lowerpath,
+					 stat, link, &pstat, true);
+		inode_unlock(upperdir->d_inode);
+		ovl_copy_up_end(dentry);
+		goto out_done;
+	}
+
 	err = -EIO;
 	if (lock_rename(workdir, upperdir) != NULL) {
 		pr_err("overlayfs: failed to lock workdir+upperdir\n");
@@ -386,9 +412,10 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
 	}
 
 	err = ovl_copy_up_locked(workdir, upperdir, dentry, lowerpath,
-				 stat, link, &pstat, tmpfile);
+				 stat, link, &pstat, false);
 out_unlock:
 	unlock_rename(workdir, upperdir);
+out_done:
 	do_delayed_call(&done);
 
 	return err;

From e593b2bf513dd4d3fbfa0f435392eea2c7f776f0 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Mon, 23 Jan 2017 14:32:21 +0200
Subject: [PATCH 0058/1911] ovl: properly implement sync_filesystem()

overlayfs syncs all inode pages on sync_filesystem(), but it also
needs to call s_op->sync_fs() of upper fs for metadata sync.

This fixes correctness of syncfs(2) as demonstrated by following
xfs specific test:

xfs_sync_stats()
{
	echo $1
	echo -n "xfs_log_force = "
	grep log /proc/fs/xfs/stat  | awk '{ print $5 }'
}

xfs_sync_stats "before touch"
touch x
xfs_sync_stats "after touch"
xfs_io -c syncfs .
xfs_sync_stats "after syncfs"
xfs_io -c fsync x
xfs_sync_stats "after fsync"
xfs_io -c fsync x
xfs_sync_stats "after fsync #2"

When this test is run in overlay mount over xfs, log force
count does not increase with syncfs command.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/super.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 6792bb70b4ac13..346f668e7df027 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -160,6 +160,25 @@ static void ovl_put_super(struct super_block *sb)
 	kfree(ufs);
 }
 
+static int ovl_sync_fs(struct super_block *sb, int wait)
+{
+	struct ovl_fs *ufs = sb->s_fs_info;
+	struct super_block *upper_sb;
+	int ret;
+
+	if (!ufs->upper_mnt)
+		return 0;
+	upper_sb = ufs->upper_mnt->mnt_sb;
+	if (!upper_sb->s_op->sync_fs)
+		return 0;
+
+	/* real inodes have already been synced by sync_filesystem(ovl_sb) */
+	down_read(&upper_sb->s_umount);
+	ret = upper_sb->s_op->sync_fs(upper_sb, wait);
+	up_read(&upper_sb->s_umount);
+	return ret;
+}
+
 /**
  * ovl_statfs
  * @sb: The overlayfs super block
@@ -222,6 +241,7 @@ static int ovl_remount(struct super_block *sb, int *flags, char *data)
 
 static const struct super_operations ovl_super_operations = {
 	.put_super	= ovl_put_super,
+	.sync_fs	= ovl_sync_fs,
 	.statfs		= ovl_statfs,
 	.show_options	= ovl_show_options,
 	.remount_fs	= ovl_remount,

From 51f8f3c4e22535933ef9aecc00e9a6069e051b57 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Date: Tue, 10 Jan 2017 21:30:21 +0300
Subject: [PATCH 0059/1911] ovl: drop CAP_SYS_RESOURCE from saved mounter's
 credentials

If overlay was mounted by root then quota set for upper layer does not work
because overlay now always use mounter's credentials for operations.
Also overlay might deplete reserved space and inodes in ext4.

This patch drops capability SYS_RESOURCE from saved credentials.
This affects creation new files, whiteouts, and copy-up operations.

Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Fixes: 1175b6b8d963 ("ovl: do operations on underlying file system in mounter's context")
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/super.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 346f668e7df027..7b218818136779 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -721,6 +721,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 	unsigned int stacklen = 0;
 	unsigned int i;
 	bool remote = false;
+	struct cred *cred;
 	int err;
 
 	err = -ENOMEM;
@@ -901,10 +902,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 	else
 		sb->s_d_op = &ovl_dentry_operations;
 
-	ufs->creator_cred = prepare_creds();
-	if (!ufs->creator_cred)
+	ufs->creator_cred = cred = prepare_creds();
+	if (!cred)
 		goto out_put_lower_mnt;
 
+	/* Never override disk quota limits or use reserved space */
+	cap_lower(cred->cap_effective, CAP_SYS_RESOURCE);
+
 	err = -ENOMEM;
 	oe = ovl_alloc_entry(numlower);
 	if (!oe)

From df2e32c5ada8590b859863cf89f418c7962b9a21 Mon Sep 17 00:00:00 2001
From: Quinn Tran <quinn.tran@cavium.com>
Date: Thu, 19 Jan 2017 22:27:53 -0800
Subject: [PATCH 0060/1911] qla2xxx: Remove direct access of scsi_status field
 in se_cmd

Signed-off-by: Quinn Tran <quinn.tran@cavium.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/qla2xxx/qla_def.h    | 1 +
 drivers/scsi/qla2xxx/qla_target.c | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index 5b1287a63c494b..4aae861a671331 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -1794,6 +1794,7 @@ typedef struct {
 #define SS_RESIDUAL_OVER		BIT_10
 #define SS_SENSE_LEN_VALID		BIT_9
 #define SS_RESPONSE_INFO_LEN_VALID	BIT_8
+#define SS_SCSI_STATUS_BYTE	0xff
 
 #define SS_RESERVE_CONFLICT		(BIT_4 | BIT_3)
 #define SS_BUSY_CONDITION		BIT_3
diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index e4fda84b959eca..439c1fc44c7a48 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -2288,7 +2288,7 @@ static void qlt_24xx_init_ctio_to_isp(struct ctio7_to_24xx *ctio,
 		int i;
 
 		if (qlt_need_explicit_conf(prm->tgt->ha, prm->cmd, 1)) {
-			if (prm->cmd->se_cmd.scsi_status != 0) {
+			if ((prm->rq_result & SS_SCSI_STATUS_BYTE) != 0) {
 				ql_dbg(ql_dbg_tgt, prm->cmd->vha, 0xe017,
 				    "Skipping EXPLICIT_CONFORM and "
 				    "CTIO7_FLAGS_CONFORM_REQ for FCP READ w/ "

From be92fc3fde95c94bb54ff471219552a581640a6b Mon Sep 17 00:00:00 2001
From: Quinn Tran <quinn.tran@cavium.com>
Date: Thu, 19 Jan 2017 22:27:54 -0800
Subject: [PATCH 0061/1911] qla2xxx: Cleanup TMF code translation from
 qla_target

Move code code which converts Task Mgmt Command flags for
ATIO to TCM #defines, from qla2xxx driver to tcm_qla2xxx
driver.

Signed-off-by: Quinn Tran <quinn.tran@cavium.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/qla2xxx/qla_target.c  | 75 +++---------------------------
 drivers/scsi/qla2xxx/qla_target.h  |  6 ++-
 drivers/scsi/qla2xxx/tcm_qla2xxx.c | 40 +++++++++++++++-
 3 files changed, 49 insertions(+), 72 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index 439c1fc44c7a48..fcdd5c9ed3bf46 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -1592,8 +1592,9 @@ static int __qlt_24xx_handle_abts(struct scsi_qla_host *vha,
 	mcmd->sess = sess;
 	memcpy(&mcmd->orig_iocb.abts, abts, sizeof(mcmd->orig_iocb.abts));
 	mcmd->reset_count = vha->hw->chip_reset;
+	mcmd->tmr_func = QLA_TGT_ABTS;
 
-	rc = ha->tgt.tgt_ops->handle_tmr(mcmd, lun, TMR_ABORT_TASK,
+	rc = ha->tgt.tgt_ops->handle_tmr(mcmd, lun, mcmd->tmr_func,
 	    abts->exchange_addr_to_abort);
 	if (rc != 0) {
 		ql_dbg(ql_dbg_tgt_mgt, vha, 0xf052,
@@ -4051,7 +4052,6 @@ static int qlt_issue_task_mgmt(struct qla_tgt_sess *sess, uint32_t lun,
 	struct qla_tgt_mgmt_cmd *mcmd;
 	struct atio_from_isp *a = (struct atio_from_isp *)iocb;
 	int res;
-	uint8_t tmr_func;
 
 	mcmd = mempool_alloc(qla_tgt_mgmt_cmd_mempool, GFP_ATOMIC);
 	if (!mcmd) {
@@ -4073,74 +4073,12 @@ static int qlt_issue_task_mgmt(struct qla_tgt_sess *sess, uint32_t lun,
 	mcmd->reset_count = vha->hw->chip_reset;
 
 	switch (fn) {
-	case QLA_TGT_CLEAR_ACA:
-		ql_dbg(ql_dbg_tgt_tmr, vha, 0x10000,
-		    "qla_target(%d): CLEAR_ACA received\n", sess->vha->vp_idx);
-		tmr_func = TMR_CLEAR_ACA;
-		break;
-
-	case QLA_TGT_TARGET_RESET:
-		ql_dbg(ql_dbg_tgt_tmr, vha, 0x10001,
-		    "qla_target(%d): TARGET_RESET received\n",
-		    sess->vha->vp_idx);
-		tmr_func = TMR_TARGET_WARM_RESET;
-		break;
-
 	case QLA_TGT_LUN_RESET:
-		ql_dbg(ql_dbg_tgt_tmr, vha, 0x10002,
-		    "qla_target(%d): LUN_RESET received\n", sess->vha->vp_idx);
-		tmr_func = TMR_LUN_RESET;
-		abort_cmds_for_lun(vha, lun, a->u.isp24.fcp_hdr.s_id);
-		break;
-
-	case QLA_TGT_CLEAR_TS:
-		ql_dbg(ql_dbg_tgt_tmr, vha, 0x10003,
-		    "qla_target(%d): CLEAR_TS received\n", sess->vha->vp_idx);
-		tmr_func = TMR_CLEAR_TASK_SET;
-		break;
-
-	case QLA_TGT_ABORT_TS:
-		ql_dbg(ql_dbg_tgt_tmr, vha, 0x10004,
-		    "qla_target(%d): ABORT_TS received\n", sess->vha->vp_idx);
-		tmr_func = TMR_ABORT_TASK_SET;
-		break;
-#if 0
-	case QLA_TGT_ABORT_ALL:
-		ql_dbg(ql_dbg_tgt_tmr, vha, 0x10005,
-		    "qla_target(%d): Doing ABORT_ALL_TASKS\n",
-		    sess->vha->vp_idx);
-		tmr_func = 0;
-		break;
-
-	case QLA_TGT_ABORT_ALL_SESS:
-		ql_dbg(ql_dbg_tgt_tmr, vha, 0x10006,
-		    "qla_target(%d): Doing ABORT_ALL_TASKS_SESS\n",
-		    sess->vha->vp_idx);
-		tmr_func = 0;
-		break;
-
-	case QLA_TGT_NEXUS_LOSS_SESS:
-		ql_dbg(ql_dbg_tgt_tmr, vha, 0x10007,
-		    "qla_target(%d): Doing NEXUS_LOSS_SESS\n",
-		    sess->vha->vp_idx);
-		tmr_func = 0;
-		break;
-
-	case QLA_TGT_NEXUS_LOSS:
-		ql_dbg(ql_dbg_tgt_tmr, vha, 0x10008,
-		    "qla_target(%d): Doing NEXUS_LOSS\n", sess->vha->vp_idx);
-		tmr_func = 0;
-		break;
-#endif
-	default:
-		ql_dbg(ql_dbg_tgt_tmr, vha, 0x1000a,
-		    "qla_target(%d): Unknown task mgmt fn 0x%x\n",
-		    sess->vha->vp_idx, fn);
-		mempool_free(mcmd, qla_tgt_mgmt_cmd_mempool);
-		return -ENOSYS;
+	    abort_cmds_for_lun(vha, lun, a->u.isp24.fcp_hdr.s_id);
+	    break;
 	}
 
-	res = ha->tgt.tgt_ops->handle_tmr(mcmd, lun, tmr_func, 0);
+	res = ha->tgt.tgt_ops->handle_tmr(mcmd, lun, mcmd->tmr_func, 0);
 	if (res != 0) {
 		ql_dbg(ql_dbg_tgt_tmr, vha, 0x1000b,
 		    "qla_target(%d): tgt.tgt_ops->handle_tmr() failed: %d\n",
@@ -4215,8 +4153,9 @@ static int __qlt_abort_task(struct scsi_qla_host *vha,
 	lun = a->u.isp24.fcp_cmnd.lun;
 	unpacked_lun = scsilun_to_int((struct scsi_lun *)&lun);
 	mcmd->reset_count = vha->hw->chip_reset;
+	mcmd->tmr_func = QLA_TGT_2G_ABORT_TASK;
 
-	rc = ha->tgt.tgt_ops->handle_tmr(mcmd, unpacked_lun, TMR_ABORT_TASK,
+	rc = ha->tgt.tgt_ops->handle_tmr(mcmd, unpacked_lun, mcmd->tmr_func,
 	    le16_to_cpu(iocb->u.isp2x.seq_id));
 	if (rc != 0) {
 		ql_dbg(ql_dbg_tgt_mgt, vha, 0xf060,
diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h
index 0824a8164a2494..14f2e241db8cb5 100644
--- a/drivers/scsi/qla2xxx/qla_target.h
+++ b/drivers/scsi/qla2xxx/qla_target.h
@@ -744,7 +744,7 @@ struct qla_tgt_func_tmpl {
 			unsigned char *, uint32_t, int, int, int);
 	void (*handle_data)(struct qla_tgt_cmd *);
 	void (*handle_dif_err)(struct qla_tgt_cmd *);
-	int (*handle_tmr)(struct qla_tgt_mgmt_cmd *, uint32_t, uint8_t,
+	int (*handle_tmr)(struct qla_tgt_mgmt_cmd *, uint32_t, uint16_t,
 			uint32_t);
 	void (*free_cmd)(struct qla_tgt_cmd *);
 	void (*free_mcmd)(struct qla_tgt_mgmt_cmd *);
@@ -795,6 +795,8 @@ int qla2x00_wait_for_hba_online(struct scsi_qla_host *);
 #define QLA_TGT_ABORT_ALL               0xFFFE
 #define QLA_TGT_NEXUS_LOSS_SESS         0xFFFD
 #define QLA_TGT_NEXUS_LOSS              0xFFFC
+#define QLA_TGT_ABTS					0xFFFB
+#define QLA_TGT_2G_ABORT_TASK			0xFFFA
 
 /* Notify Acknowledge flags */
 #define NOTIFY_ACK_RES_COUNT        BIT_8
@@ -1056,7 +1058,7 @@ struct qla_tgt_sess_work_param {
 };
 
 struct qla_tgt_mgmt_cmd {
-	uint8_t tmr_func;
+	uint16_t tmr_func;
 	uint8_t fc_tm_rsp;
 	struct qla_tgt_sess *sess;
 	struct se_cmd se_cmd;
diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
index d925910be761df..1bec6ab66aafa2 100644
--- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c
+++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
@@ -563,13 +563,49 @@ static void tcm_qla2xxx_handle_dif_err(struct qla_tgt_cmd *cmd)
  * Called from qla_target.c:qlt_issue_task_mgmt()
  */
 static int tcm_qla2xxx_handle_tmr(struct qla_tgt_mgmt_cmd *mcmd, uint32_t lun,
-	uint8_t tmr_func, uint32_t tag)
+	uint16_t tmr_func, uint32_t tag)
 {
 	struct qla_tgt_sess *sess = mcmd->sess;
 	struct se_cmd *se_cmd = &mcmd->se_cmd;
+	int transl_tmr_func = 0;
+
+	switch (tmr_func) {
+	case QLA_TGT_ABTS:
+		pr_debug("%ld: ABTS received\n", sess->vha->host_no);
+		transl_tmr_func = TMR_ABORT_TASK;
+		break;
+	case QLA_TGT_2G_ABORT_TASK:
+		pr_debug("%ld: 2G Abort Task received\n", sess->vha->host_no);
+		transl_tmr_func = TMR_ABORT_TASK;
+		break;
+	case QLA_TGT_CLEAR_ACA:
+		pr_debug("%ld: CLEAR_ACA received\n", sess->vha->host_no);
+		transl_tmr_func = TMR_CLEAR_ACA;
+		break;
+	case QLA_TGT_TARGET_RESET:
+		pr_debug("%ld: TARGET_RESET received\n", sess->vha->host_no);
+		transl_tmr_func = TMR_TARGET_WARM_RESET;
+		break;
+	case QLA_TGT_LUN_RESET:
+		pr_debug("%ld: LUN_RESET received\n", sess->vha->host_no);
+		transl_tmr_func = TMR_LUN_RESET;
+		break;
+	case QLA_TGT_CLEAR_TS:
+		pr_debug("%ld: CLEAR_TS received\n", sess->vha->host_no);
+		transl_tmr_func = TMR_CLEAR_TASK_SET;
+		break;
+	case QLA_TGT_ABORT_TS:
+		pr_debug("%ld: ABORT_TS received\n", sess->vha->host_no);
+		transl_tmr_func = TMR_ABORT_TASK_SET;
+		break;
+	default:
+		pr_debug("%ld: Unknown task mgmt fn 0x%x\n",
+		    sess->vha->host_no, tmr_func);
+		return -ENOSYS;
+	}
 
 	return target_submit_tmr(se_cmd, sess->se_sess, NULL, lun, mcmd,
-			tmr_func, GFP_ATOMIC, tag, TARGET_SCF_ACK_KREF);
+	    transl_tmr_func, GFP_ATOMIC, tag, TARGET_SCF_ACK_KREF);
 }
 
 static int tcm_qla2xxx_queue_data_in(struct se_cmd *se_cmd)

From 1eb42f965cedafb700e9c902ddafb1c51e3117f7 Mon Sep 17 00:00:00 2001
From: Quinn Tran <quinn.tran@cavium.com>
Date: Thu, 19 Jan 2017 22:27:55 -0800
Subject: [PATCH 0062/1911] qla2xxx: Make trace flags more readable

Trace flags are useful during debugging crash dumps
using crash utility. These trace flags makes it easier
to understand various states a command has successfully
completed.

Signed-off-by: Quinn Tran <quinn.tran@cavium.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/qla2xxx/qla_target.c  | 28 +++++++--------
 drivers/scsi/qla2xxx/qla_target.h  | 55 ++++++++++++++----------------
 drivers/scsi/qla2xxx/tcm_qla2xxx.c | 43 +++++++++++------------
 3 files changed, 59 insertions(+), 67 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index fcdd5c9ed3bf46..5f31ae99fbe2a1 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -3297,7 +3297,7 @@ int qlt_abort_cmd(struct qla_tgt_cmd *cmd)
 		return EIO;
 	}
 	cmd->aborted = 1;
-	cmd->cmd_flags |= BIT_6;
+	cmd->trc_flags |= TRC_ABORT;
 	spin_unlock_irqrestore(&cmd->cmd_lock, flags);
 
 	qlt_send_term_exchange(vha, cmd, &cmd->atio, 0, 1);
@@ -3345,7 +3345,7 @@ static int qlt_prepare_srr_ctio(struct scsi_qla_host *vha,
 	struct qla_tgt_srr_imm *imm;
 
 	tgt->ctio_srr_id++;
-	cmd->cmd_flags |= BIT_15;
+	cmd->trc_flags |= TRC_SRR_CTIO;
 
 	ql_dbg(ql_dbg_tgt_mgt, vha, 0xf019,
 	    "qla_target(%d): CTIO with SRR status received\n", vha->vp_idx);
@@ -3528,7 +3528,7 @@ qlt_abort_cmd_on_host_reset(struct scsi_qla_host *vha, struct qla_tgt_cmd *cmd)
 		dump_stack();
 	}
 
-	cmd->cmd_flags |= BIT_17;
+	cmd->trc_flags |= TRC_FLUSH;
 	ha->tgt.tgt_ops->free_cmd(cmd);
 }
 
@@ -3694,7 +3694,7 @@ static void qlt_do_ctio_completion(struct scsi_qla_host *vha, uint32_t handle,
 		 */
 		if ((cmd->state != QLA_TGT_STATE_NEED_DATA) &&
 		    (!cmd->aborted)) {
-			cmd->cmd_flags |= BIT_13;
+			cmd->trc_flags |= TRC_CTIO_ERR;
 			if (qlt_term_ctio_exchange(vha, ctio, cmd, status))
 				return;
 		}
@@ -3702,7 +3702,7 @@ static void qlt_do_ctio_completion(struct scsi_qla_host *vha, uint32_t handle,
 skip_term:
 
 	if (cmd->state == QLA_TGT_STATE_PROCESSED) {
-		cmd->cmd_flags |= BIT_12;
+		cmd->trc_flags |= TRC_CTIO_DONE;
 	} else if (cmd->state == QLA_TGT_STATE_NEED_DATA) {
 		cmd->state = QLA_TGT_STATE_DATA_IN;
 
@@ -3712,11 +3712,11 @@ static void qlt_do_ctio_completion(struct scsi_qla_host *vha, uint32_t handle,
 		ha->tgt.tgt_ops->handle_data(cmd);
 		return;
 	} else if (cmd->aborted) {
-		cmd->cmd_flags |= BIT_18;
+		cmd->trc_flags |= TRC_CTIO_ABORTED;
 		ql_dbg(ql_dbg_tgt_mgt, vha, 0xf01e,
 		  "Aborted command %p (tag %lld) finished\n", cmd, se_cmd->tag);
 	} else {
-		cmd->cmd_flags |= BIT_19;
+		cmd->trc_flags |= TRC_CTIO_STRANGE;
 		ql_dbg(ql_dbg_tgt_mgt, vha, 0xf05c,
 		    "qla_target(%d): A command in state (%d) should "
 		    "not return a CTIO complete\n", vha->vp_idx, cmd->state);
@@ -3781,7 +3781,7 @@ static void __qlt_do_work(struct qla_tgt_cmd *cmd)
 	int ret, fcp_task_attr, data_dir, bidi = 0;
 
 	cmd->cmd_in_wq = 0;
-	cmd->cmd_flags |= BIT_1;
+	cmd->trc_flags |= TRC_DO_WORK;
 	if (tgt->tgt_stop)
 		goto out_term;
 
@@ -3833,7 +3833,7 @@ static void __qlt_do_work(struct qla_tgt_cmd *cmd)
 	 * cmd has not sent to target yet, so pass NULL as the second
 	 * argument to qlt_send_term_exchange() and free the memory here.
 	 */
-	cmd->cmd_flags |= BIT_2;
+	cmd->trc_flags |= TRC_DO_WORK_ERR;
 	spin_lock_irqsave(&ha->hardware_lock, flags);
 	qlt_send_term_exchange(vha, NULL, &cmd->atio, 1, 0);
 
@@ -3884,7 +3884,7 @@ static struct qla_tgt_cmd *qlt_get_tag(scsi_qla_host_t *vha,
 	cmd->loop_id = sess->loop_id;
 	cmd->conf_compl_supported = sess->conf_compl_supported;
 
-	cmd->cmd_flags = 0;
+	cmd->trc_flags = 0;
 	cmd->jiffies_at_alloc = get_jiffies_64();
 
 	cmd->reset_count = vha->hw->chip_reset;
@@ -4020,7 +4020,7 @@ static int qlt_handle_cmd_for_atio(struct scsi_qla_host *vha,
 	}
 
 	cmd->cmd_in_wq = 1;
-	cmd->cmd_flags |= BIT_0;
+	cmd->trc_flags |= TRC_NEW_CMD;
 	cmd->se_cmd.cpuid = ha->msix_count ?
 		ha->tgt.rspq_vector_cpuid : WORK_CPU_UNBOUND;
 
@@ -4712,7 +4712,7 @@ static void qlt_handle_srr(struct scsi_qla_host *vha,
 			    0, 0, 0, NOTIFY_ACK_SRR_FLAGS_ACCEPT, 0, 0);
 			spin_unlock_irqrestore(&ha->hardware_lock, flags);
 			if (xmit_type & QLA_TGT_XMIT_DATA) {
-				cmd->cmd_flags |= BIT_8;
+				cmd->trc_flags |= TRC_SRR_XRDY;
 				qlt_rdy_to_xfer(cmd);
 			}
 		} else {
@@ -4731,7 +4731,7 @@ static void qlt_handle_srr(struct scsi_qla_host *vha,
 
 	/* Transmit response in case of status and data-in cases */
 	if (resp) {
-		cmd->cmd_flags |= BIT_7;
+		cmd->trc_flags |= TRC_SRR_RSP;
 		qlt_xmit_response(cmd, xmit_type, se_cmd->scsi_status);
 	}
 
@@ -4747,7 +4747,7 @@ static void qlt_handle_srr(struct scsi_qla_host *vha,
 		cmd->state = QLA_TGT_STATE_DATA_IN;
 		dump_stack();
 	} else {
-		cmd->cmd_flags |= BIT_9;
+		cmd->trc_flags |= TRC_SRR_TERM;
 		qlt_send_term_exchange(vha, cmd, &cmd->atio, 1, 0);
 	}
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h
index 14f2e241db8cb5..5d00529b68c4e7 100644
--- a/drivers/scsi/qla2xxx/qla_target.h
+++ b/drivers/scsi/qla2xxx/qla_target.h
@@ -965,35 +965,28 @@ struct qla_tgt_sess {
 	qlt_plogi_ack_t *plogi_link[QLT_PLOGI_LINK_MAX];
 };
 
-typedef enum {
-	/*
-	 * BIT_0 - Atio Arrival / schedule to work
-	 * BIT_1 - qlt_do_work
-	 * BIT_2 - qlt_do work failed
-	 * BIT_3 - xfer rdy/tcm_qla2xxx_write_pending
-	 * BIT_4 - read respond/tcm_qla2xx_queue_data_in
-	 * BIT_5 - status respond / tcm_qla2xx_queue_status
-	 * BIT_6 - tcm request to abort/Term exchange.
-	 *	pre_xmit_response->qlt_send_term_exchange
-	 * BIT_7 - SRR received (qlt_handle_srr->qlt_xmit_response)
-	 * BIT_8 - SRR received (qlt_handle_srr->qlt_rdy_to_xfer)
-	 * BIT_9 - SRR received (qla_handle_srr->qlt_send_term_exchange)
-	 * BIT_10 - Data in - hanlde_data->tcm_qla2xxx_handle_data
-
-	 * BIT_12 - good completion - qlt_ctio_do_completion -->free_cmd
-	 * BIT_13 - Bad completion -
-	 *	qlt_ctio_do_completion --> qlt_term_ctio_exchange
-	 * BIT_14 - Back end data received/sent.
-	 * BIT_15 - SRR prepare ctio
-	 * BIT_16 - complete free
-	 * BIT_17 - flush - qlt_abort_cmd_on_host_reset
-	 * BIT_18 - completion w/abort status
-	 * BIT_19 - completion w/unknown status
-	 * BIT_20 - tcm_qla2xxx_free_cmd
-	 */
-	CMD_FLAG_DATA_WORK = BIT_11,
-	CMD_FLAG_DATA_WORK_FREE = BIT_21,
-} cmd_flags_t;
+enum trace_flags {
+	TRC_NEW_CMD = BIT_0,
+	TRC_DO_WORK = BIT_1,
+	TRC_DO_WORK_ERR = BIT_2,
+	TRC_XFR_RDY = BIT_3,
+	TRC_XMIT_DATA = BIT_4,
+	TRC_XMIT_STATUS = BIT_5,
+	TRC_SRR_RSP =  BIT_6,
+	TRC_SRR_XRDY = BIT_7,
+	TRC_SRR_TERM = BIT_8,
+	TRC_SRR_CTIO = BIT_9,
+	TRC_FLUSH = BIT_10,
+	TRC_CTIO_ERR = BIT_11,
+	TRC_CTIO_DONE = BIT_12,
+	TRC_CTIO_ABORTED =  BIT_13,
+	TRC_CTIO_STRANGE= BIT_14,
+	TRC_CMD_DONE = BIT_15,
+	TRC_CMD_CHK_STOP = BIT_16,
+	TRC_CMD_FREE = BIT_17,
+	TRC_DATA_IN = BIT_18,
+	TRC_ABORT = BIT_19,
+};
 
 struct qla_tgt_cmd {
 	struct se_cmd se_cmd;
@@ -1016,6 +1009,8 @@ struct qla_tgt_cmd {
 	unsigned int cmd_sent_to_fw:1;
 	unsigned int cmd_in_wq:1;
 	unsigned int aborted:1;
+	unsigned int data_work:1;
+	unsigned int data_work_free:1;
 
 	struct scatterlist *sg;	/* cmd data buffer SG vector */
 	int sg_cnt;		/* SG segments count */
@@ -1040,7 +1035,7 @@ struct qla_tgt_cmd {
 	uint64_t jiffies_at_alloc;
 	uint64_t jiffies_at_free;
 
-	cmd_flags_t cmd_flags;
+	enum trace_flags trc_flags;
 };
 
 struct qla_tgt_sess_work_param {
diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
index 1bec6ab66aafa2..af4a198e553738 100644
--- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c
+++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
@@ -282,10 +282,10 @@ static void tcm_qla2xxx_complete_free(struct work_struct *work)
 
 	cmd->cmd_in_wq = 0;
 
-	WARN_ON(cmd->cmd_flags &  BIT_16);
+	WARN_ON(cmd->trc_flags & TRC_CMD_FREE);
 
 	cmd->vha->tgt_counters.qla_core_ret_sta_ctio++;
-	cmd->cmd_flags |= BIT_16;
+	cmd->trc_flags |= TRC_CMD_FREE;
 	transport_generic_free_cmd(&cmd->se_cmd, 0);
 }
 
@@ -299,8 +299,8 @@ static void tcm_qla2xxx_free_cmd(struct qla_tgt_cmd *cmd)
 	cmd->vha->tgt_counters.core_qla_free_cmd++;
 	cmd->cmd_in_wq = 1;
 
-	BUG_ON(cmd->cmd_flags & BIT_20);
-	cmd->cmd_flags |= BIT_20;
+	WARN_ON(cmd->trc_flags & TRC_CMD_DONE);
+	cmd->trc_flags |= TRC_CMD_DONE;
 
 	INIT_WORK(&cmd->work, tcm_qla2xxx_complete_free);
 	queue_work_on(smp_processor_id(), tcm_qla2xxx_free_wq, &cmd->work);
@@ -315,7 +315,7 @@ static int tcm_qla2xxx_check_stop_free(struct se_cmd *se_cmd)
 
 	if ((se_cmd->se_cmd_flags & SCF_SCSI_TMR_CDB) == 0) {
 		cmd = container_of(se_cmd, struct qla_tgt_cmd, se_cmd);
-		cmd->cmd_flags |= BIT_14;
+		cmd->trc_flags |= TRC_CMD_CHK_STOP;
 	}
 
 	return target_put_sess_cmd(se_cmd);
@@ -377,7 +377,7 @@ static int tcm_qla2xxx_write_pending(struct se_cmd *se_cmd)
 			cmd->se_cmd.se_cmd_flags);
 		return 0;
 	}
-	cmd->cmd_flags |= BIT_3;
+	cmd->trc_flags |= TRC_XFR_RDY;
 	cmd->bufflen = se_cmd->data_length;
 	cmd->dma_data_direction = target_reverse_dma_direction(se_cmd);
 
@@ -493,9 +493,9 @@ static void tcm_qla2xxx_handle_data_work(struct work_struct *work)
 	cmd->cmd_in_wq = 0;
 
 	spin_lock_irqsave(&cmd->cmd_lock, flags);
-	cmd->cmd_flags |= CMD_FLAG_DATA_WORK;
+	cmd->data_work = 1;
 	if (cmd->aborted) {
-		cmd->cmd_flags |= CMD_FLAG_DATA_WORK_FREE;
+		cmd->data_work_free = 1;
 		spin_unlock_irqrestore(&cmd->cmd_lock, flags);
 
 		tcm_qla2xxx_free_cmd(cmd);
@@ -532,7 +532,7 @@ static void tcm_qla2xxx_handle_data_work(struct work_struct *work)
  */
 static void tcm_qla2xxx_handle_data(struct qla_tgt_cmd *cmd)
 {
-	cmd->cmd_flags |= BIT_10;
+	cmd->trc_flags |= TRC_DATA_IN;
 	cmd->cmd_in_wq = 1;
 	INIT_WORK(&cmd->work, tcm_qla2xxx_handle_data_work);
 	queue_work_on(smp_processor_id(), tcm_qla2xxx_free_wq, &cmd->work);
@@ -627,7 +627,7 @@ static int tcm_qla2xxx_queue_data_in(struct se_cmd *se_cmd)
 		return 0;
 	}
 
-	cmd->cmd_flags |= BIT_4;
+	cmd->trc_flags |= TRC_XMIT_DATA;
 	cmd->bufflen = se_cmd->data_length;
 	cmd->dma_data_direction = target_reverse_dma_direction(se_cmd);
 
@@ -658,11 +658,11 @@ static int tcm_qla2xxx_queue_status(struct se_cmd *se_cmd)
 	cmd->sg_cnt = 0;
 	cmd->offset = 0;
 	cmd->dma_data_direction = target_reverse_dma_direction(se_cmd);
-	if (cmd->cmd_flags &  BIT_5) {
-		pr_crit("Bit_5 already set for cmd = %p.\n", cmd);
+	if (cmd->trc_flags & TRC_XMIT_STATUS) {
+		pr_crit("Multiple calls for status = %p.\n", cmd);
 		dump_stack();
 	}
-	cmd->cmd_flags |= BIT_5;
+	cmd->trc_flags |= TRC_XMIT_STATUS;
 
 	if (se_cmd->data_direction == DMA_FROM_DEVICE) {
 		/*
@@ -718,10 +718,7 @@ static void tcm_qla2xxx_queue_tm_rsp(struct se_cmd *se_cmd)
 	qlt_xmit_tm_rsp(mcmd);
 }
 
-
-#define DATA_WORK_NOT_FREE(_flags) \
-	(( _flags & (CMD_FLAG_DATA_WORK|CMD_FLAG_DATA_WORK_FREE)) == \
-	 CMD_FLAG_DATA_WORK)
+#define DATA_WORK_NOT_FREE(_cmd) (_cmd->data_work && !_cmd->data_work_free)
 static void tcm_qla2xxx_aborted_task(struct se_cmd *se_cmd)
 {
 	struct qla_tgt_cmd *cmd = container_of(se_cmd,
@@ -733,13 +730,13 @@ static void tcm_qla2xxx_aborted_task(struct se_cmd *se_cmd)
 
 	spin_lock_irqsave(&cmd->cmd_lock, flags);
 	if ((cmd->state == QLA_TGT_STATE_NEW)||
-		((cmd->state == QLA_TGT_STATE_DATA_IN) &&
-		 DATA_WORK_NOT_FREE(cmd->cmd_flags)) ) {
-
-		cmd->cmd_flags |= CMD_FLAG_DATA_WORK_FREE;
+	    ((cmd->state == QLA_TGT_STATE_DATA_IN) &&
+		DATA_WORK_NOT_FREE(cmd))) {
+		cmd->data_work_free = 1;
 		spin_unlock_irqrestore(&cmd->cmd_lock, flags);
-		/* Cmd have not reached firmware.
-		 * Use this trigger to free it. */
+		/*
+		 * cmd has not reached fw, Use this trigger to free it.
+		 */
 		tcm_qla2xxx_free_cmd(cmd);
 		return;
 	}

From 2c39b5ca2a8cb99fc463fd622e8ea3b9fbb980a2 Mon Sep 17 00:00:00 2001
From: Himanshu Madhani <himanshu.madhani@cavium.com>
Date: Thu, 19 Jan 2017 22:27:56 -0800
Subject: [PATCH 0063/1911] qla2xxx: Remove SRR code

During initial implementation, tape support was included but not
enabled by default on target. So far, we don't see any target
customer requesting this support. Since this code is not being
used actively, we want to remove it and we will add back if there
are any request in future for SRR support.

Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Giridhar Malavali <giridhar.malavali@cavium.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/qla2xxx/qla_target.c | 641 ------------------------------
 drivers/scsi/qla2xxx/qla_target.h |  21 -
 2 files changed, 662 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index 5f31ae99fbe2a1..119a445a82f6ba 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -106,8 +106,6 @@ static int qlt_issue_task_mgmt(struct qla_tgt_sess *sess, uint32_t lun,
 	int fn, void *iocb, int flags);
 static void qlt_send_term_exchange(struct scsi_qla_host *ha, struct qla_tgt_cmd
 	*cmd, struct atio_from_isp *atio, int ha_locked, int ul_abort);
-static void qlt_reject_free_srr_imm(struct scsi_qla_host *ha,
-	struct qla_tgt_srr_imm *imm, int ha_lock);
 static void qlt_abort_cmd_on_host_reset(struct scsi_qla_host *vha,
 	struct qla_tgt_cmd *cmd);
 static void qlt_alloc_qfull_cmd(struct scsi_qla_host *vha,
@@ -2183,95 +2181,6 @@ static inline int qlt_need_explicit_conf(struct qla_hw_data *ha,
 		    cmd->conf_compl_supported;
 }
 
-#ifdef CONFIG_QLA_TGT_DEBUG_SRR
-/*
- *  Original taken from the XFS code
- */
-static unsigned long qlt_srr_random(void)
-{
-	static int Inited;
-	static unsigned long RandomValue;
-	static DEFINE_SPINLOCK(lock);
-	/* cycles pseudo-randomly through all values between 1 and 2^31 - 2 */
-	register long rv;
-	register long lo;
-	register long hi;
-	unsigned long flags;
-
-	spin_lock_irqsave(&lock, flags);
-	if (!Inited) {
-		RandomValue = jiffies;
-		Inited = 1;
-	}
-	rv = RandomValue;
-	hi = rv / 127773;
-	lo = rv % 127773;
-	rv = 16807 * lo - 2836 * hi;
-	if (rv <= 0)
-		rv += 2147483647;
-	RandomValue = rv;
-	spin_unlock_irqrestore(&lock, flags);
-	return rv;
-}
-
-static void qlt_check_srr_debug(struct qla_tgt_cmd *cmd, int *xmit_type)
-{
-#if 0 /* This is not a real status packets lost, so it won't lead to SRR */
-	if ((*xmit_type & QLA_TGT_XMIT_STATUS) && (qlt_srr_random() % 200)
-	    == 50) {
-		*xmit_type &= ~QLA_TGT_XMIT_STATUS;
-		ql_dbg(ql_dbg_tgt_mgt, cmd->vha, 0xf015,
-		    "Dropping cmd %p (tag %d) status", cmd, se_cmd->tag);
-	}
-#endif
-	/*
-	 * It's currently not possible to simulate SRRs for FCP_WRITE without
-	 * a physical link layer failure, so don't even try here..
-	 */
-	if (cmd->dma_data_direction != DMA_FROM_DEVICE)
-		return;
-
-	if (qlt_has_data(cmd) && (cmd->sg_cnt > 1) &&
-	    ((qlt_srr_random() % 100) == 20)) {
-		int i, leave = 0;
-		unsigned int tot_len = 0;
-
-		while (leave == 0)
-			leave = qlt_srr_random() % cmd->sg_cnt;
-
-		for (i = 0; i < leave; i++)
-			tot_len += cmd->sg[i].length;
-
-		ql_dbg(ql_dbg_tgt_mgt, cmd->vha, 0xf016,
-		    "Cutting cmd %p (tag %d) buffer"
-		    " tail to len %d, sg_cnt %d (cmd->bufflen %d,"
-		    " cmd->sg_cnt %d)", cmd, se_cmd->tag, tot_len, leave,
-		    cmd->bufflen, cmd->sg_cnt);
-
-		cmd->bufflen = tot_len;
-		cmd->sg_cnt = leave;
-	}
-
-	if (qlt_has_data(cmd) && ((qlt_srr_random() % 100) == 70)) {
-		unsigned int offset = qlt_srr_random() % cmd->bufflen;
-
-		ql_dbg(ql_dbg_tgt_mgt, cmd->vha, 0xf017,
-		    "Cutting cmd %p (tag %d) buffer head "
-		    "to offset %d (cmd->bufflen %d)", cmd, se_cmd->tag, offset,
-		    cmd->bufflen);
-		if (offset == 0)
-			*xmit_type &= ~QLA_TGT_XMIT_DATA;
-		else if (qlt_set_data_offset(cmd, offset)) {
-			ql_dbg(ql_dbg_tgt_mgt, cmd->vha, 0xf018,
-			    "qlt_set_data_offset() failed (tag %d)", se_cmd->tag);
-		}
-	}
-}
-#else
-static inline void qlt_check_srr_debug(struct qla_tgt_cmd *cmd, int *xmit_type)
-{}
-#endif
-
 static void qlt_24xx_init_ctio_to_isp(struct ctio7_to_24xx *ctio,
 	struct qla_tgt_prm *prm)
 {
@@ -2686,7 +2595,6 @@ int qlt_xmit_response(struct qla_tgt_cmd *cmd, int xmit_type,
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
 
 	memset(&prm, 0, sizeof(prm));
-	qlt_check_srr_debug(cmd, &xmit_type);
 
 	ql_dbg(ql_dbg_tgt, cmd->vha, 0xe018,
 	    "is_send_status=%d, cmd->bufflen=%d, cmd->sg_cnt=%d, cmd->dma_data_direction=%d se_cmd[%p]\n",
@@ -3336,90 +3244,6 @@ void qlt_free_cmd(struct qla_tgt_cmd *cmd)
 }
 EXPORT_SYMBOL(qlt_free_cmd);
 
-/* ha->hardware_lock supposed to be held on entry */
-static int qlt_prepare_srr_ctio(struct scsi_qla_host *vha,
-	struct qla_tgt_cmd *cmd, void *ctio)
-{
-	struct qla_tgt_srr_ctio *sc;
-	struct qla_tgt *tgt = vha->vha_tgt.qla_tgt;
-	struct qla_tgt_srr_imm *imm;
-
-	tgt->ctio_srr_id++;
-	cmd->trc_flags |= TRC_SRR_CTIO;
-
-	ql_dbg(ql_dbg_tgt_mgt, vha, 0xf019,
-	    "qla_target(%d): CTIO with SRR status received\n", vha->vp_idx);
-
-	if (!ctio) {
-		ql_dbg(ql_dbg_tgt_mgt, vha, 0xf055,
-		    "qla_target(%d): SRR CTIO, but ctio is NULL\n",
-		    vha->vp_idx);
-		return -EINVAL;
-	}
-
-	sc = kzalloc(sizeof(*sc), GFP_ATOMIC);
-	if (sc != NULL) {
-		sc->cmd = cmd;
-		/* IRQ is already OFF */
-		spin_lock(&tgt->srr_lock);
-		sc->srr_id = tgt->ctio_srr_id;
-		list_add_tail(&sc->srr_list_entry,
-		    &tgt->srr_ctio_list);
-		ql_dbg(ql_dbg_tgt_mgt, vha, 0xf01a,
-		    "CTIO SRR %p added (id %d)\n", sc, sc->srr_id);
-		if (tgt->imm_srr_id == tgt->ctio_srr_id) {
-			int found = 0;
-			list_for_each_entry(imm, &tgt->srr_imm_list,
-			    srr_list_entry) {
-				if (imm->srr_id == sc->srr_id) {
-					found = 1;
-					break;
-				}
-			}
-			if (found) {
-				ql_dbg(ql_dbg_tgt_mgt, vha, 0xf01b,
-				    "Scheduling srr work\n");
-				schedule_work(&tgt->srr_work);
-			} else {
-				ql_dbg(ql_dbg_tgt_mgt, vha, 0xf056,
-				    "qla_target(%d): imm_srr_id "
-				    "== ctio_srr_id (%d), but there is no "
-				    "corresponding SRR IMM, deleting CTIO "
-				    "SRR %p\n", vha->vp_idx,
-				    tgt->ctio_srr_id, sc);
-				list_del(&sc->srr_list_entry);
-				spin_unlock(&tgt->srr_lock);
-
-				kfree(sc);
-				return -EINVAL;
-			}
-		}
-		spin_unlock(&tgt->srr_lock);
-	} else {
-		struct qla_tgt_srr_imm *ti;
-
-		ql_dbg(ql_dbg_tgt_mgt, vha, 0xf057,
-		    "qla_target(%d): Unable to allocate SRR CTIO entry\n",
-		    vha->vp_idx);
-		spin_lock(&tgt->srr_lock);
-		list_for_each_entry_safe(imm, ti, &tgt->srr_imm_list,
-		    srr_list_entry) {
-			if (imm->srr_id == tgt->ctio_srr_id) {
-				ql_dbg(ql_dbg_tgt_mgt, vha, 0xf01c,
-				    "IMM SRR %p deleted (id %d)\n",
-				    imm, imm->srr_id);
-				list_del(&imm->srr_list_entry);
-				qlt_reject_free_srr_imm(vha, imm, 1);
-			}
-		}
-		spin_unlock(&tgt->srr_lock);
-
-		return -ENOMEM;
-	}
-
-	return 0;
-}
-
 /*
  * ha->hardware_lock supposed to be held on entry. Might drop it, then reaquire
  */
@@ -3637,16 +3461,6 @@ static void qlt_do_ctio_completion(struct scsi_qla_host *vha, uint32_t handle,
 			}
 			break;
 		}
-		case CTIO_SRR_RECEIVED:
-			ql_dbg(ql_dbg_tgt_mgt, vha, 0xf05a,
-			    "qla_target(%d): CTIO with SRR_RECEIVED"
-			    " status %x received (state %x, se_cmd %p)\n",
-			    vha->vp_idx, status, cmd->state, se_cmd);
-			if (qlt_prepare_srr_ctio(vha, cmd, ctio) != 0)
-				break;
-			else
-				return;
-
 		case CTIO_DIF_ERROR: {
 			struct ctio_crc_from_fw *crc =
 				(struct ctio_crc_from_fw *)ctio;
@@ -4499,451 +4313,6 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha,
 	return res;
 }
 
-static int qlt_set_data_offset(struct qla_tgt_cmd *cmd, uint32_t offset)
-{
-#if 1
-	/*
-	 * FIXME: Reject non zero SRR relative offset until we can test
-	 * this code properly.
-	 */
-	pr_debug("Rejecting non zero SRR rel_offs: %u\n", offset);
-	return -1;
-#else
-	struct scatterlist *sg, *sgp, *sg_srr, *sg_srr_start = NULL;
-	size_t first_offset = 0, rem_offset = offset, tmp = 0;
-	int i, sg_srr_cnt, bufflen = 0;
-
-	ql_dbg(ql_dbg_tgt, cmd->vha, 0xe023,
-	    "Entering qla_tgt_set_data_offset: cmd: %p, cmd->sg: %p, "
-	    "cmd->sg_cnt: %u, direction: %d\n",
-	    cmd, cmd->sg, cmd->sg_cnt, cmd->dma_data_direction);
-
-	if (!cmd->sg || !cmd->sg_cnt) {
-		ql_dbg(ql_dbg_tgt, cmd->vha, 0xe055,
-		    "Missing cmd->sg or zero cmd->sg_cnt in"
-		    " qla_tgt_set_data_offset\n");
-		return -EINVAL;
-	}
-	/*
-	 * Walk the current cmd->sg list until we locate the new sg_srr_start
-	 */
-	for_each_sg(cmd->sg, sg, cmd->sg_cnt, i) {
-		ql_dbg(ql_dbg_tgt, cmd->vha, 0xe024,
-		    "sg[%d]: %p page: %p, length: %d, offset: %d\n",
-		    i, sg, sg_page(sg), sg->length, sg->offset);
-
-		if ((sg->length + tmp) > offset) {
-			first_offset = rem_offset;
-			sg_srr_start = sg;
-			ql_dbg(ql_dbg_tgt, cmd->vha, 0xe025,
-			    "Found matching sg[%d], using %p as sg_srr_start, "
-			    "and using first_offset: %zu\n", i, sg,
-			    first_offset);
-			break;
-		}
-		tmp += sg->length;
-		rem_offset -= sg->length;
-	}
-
-	if (!sg_srr_start) {
-		ql_dbg(ql_dbg_tgt, cmd->vha, 0xe056,
-		    "Unable to locate sg_srr_start for offset: %u\n", offset);
-		return -EINVAL;
-	}
-	sg_srr_cnt = (cmd->sg_cnt - i);
-
-	sg_srr = kzalloc(sizeof(struct scatterlist) * sg_srr_cnt, GFP_KERNEL);
-	if (!sg_srr) {
-		ql_dbg(ql_dbg_tgt, cmd->vha, 0xe057,
-		    "Unable to allocate sgp\n");
-		return -ENOMEM;
-	}
-	sg_init_table(sg_srr, sg_srr_cnt);
-	sgp = &sg_srr[0];
-	/*
-	 * Walk the remaining list for sg_srr_start, mapping to the newly
-	 * allocated sg_srr taking first_offset into account.
-	 */
-	for_each_sg(sg_srr_start, sg, sg_srr_cnt, i) {
-		if (first_offset) {
-			sg_set_page(sgp, sg_page(sg),
-			    (sg->length - first_offset), first_offset);
-			first_offset = 0;
-		} else {
-			sg_set_page(sgp, sg_page(sg), sg->length, 0);
-		}
-		bufflen += sgp->length;
-
-		sgp = sg_next(sgp);
-		if (!sgp)
-			break;
-	}
-
-	cmd->sg = sg_srr;
-	cmd->sg_cnt = sg_srr_cnt;
-	cmd->bufflen = bufflen;
-	cmd->offset += offset;
-	cmd->free_sg = 1;
-
-	ql_dbg(ql_dbg_tgt, cmd->vha, 0xe026, "New cmd->sg: %p\n", cmd->sg);
-	ql_dbg(ql_dbg_tgt, cmd->vha, 0xe027, "New cmd->sg_cnt: %u\n",
-	    cmd->sg_cnt);
-	ql_dbg(ql_dbg_tgt, cmd->vha, 0xe028, "New cmd->bufflen: %u\n",
-	    cmd->bufflen);
-	ql_dbg(ql_dbg_tgt, cmd->vha, 0xe029, "New cmd->offset: %u\n",
-	    cmd->offset);
-
-	if (cmd->sg_cnt < 0)
-		BUG();
-
-	if (cmd->bufflen < 0)
-		BUG();
-
-	return 0;
-#endif
-}
-
-static inline int qlt_srr_adjust_data(struct qla_tgt_cmd *cmd,
-	uint32_t srr_rel_offs, int *xmit_type)
-{
-	int res = 0, rel_offs;
-
-	rel_offs = srr_rel_offs - cmd->offset;
-	ql_dbg(ql_dbg_tgt_mgt, cmd->vha, 0xf027, "srr_rel_offs=%d, rel_offs=%d",
-	    srr_rel_offs, rel_offs);
-
-	*xmit_type = QLA_TGT_XMIT_ALL;
-
-	if (rel_offs < 0) {
-		ql_dbg(ql_dbg_tgt_mgt, cmd->vha, 0xf062,
-		    "qla_target(%d): SRR rel_offs (%d) < 0",
-		    cmd->vha->vp_idx, rel_offs);
-		res = -1;
-	} else if (rel_offs == cmd->bufflen)
-		*xmit_type = QLA_TGT_XMIT_STATUS;
-	else if (rel_offs > 0)
-		res = qlt_set_data_offset(cmd, rel_offs);
-
-	return res;
-}
-
-/* No locks, thread context */
-static void qlt_handle_srr(struct scsi_qla_host *vha,
-	struct qla_tgt_srr_ctio *sctio, struct qla_tgt_srr_imm *imm)
-{
-	struct imm_ntfy_from_isp *ntfy =
-	    (struct imm_ntfy_from_isp *)&imm->imm_ntfy;
-	struct qla_hw_data *ha = vha->hw;
-	struct qla_tgt_cmd *cmd = sctio->cmd;
-	struct se_cmd *se_cmd = &cmd->se_cmd;
-	unsigned long flags;
-	int xmit_type = 0, resp = 0;
-	uint32_t offset;
-	uint16_t srr_ui;
-
-	offset = le32_to_cpu(ntfy->u.isp24.srr_rel_offs);
-	srr_ui = ntfy->u.isp24.srr_ui;
-
-	ql_dbg(ql_dbg_tgt_mgt, vha, 0xf028, "SRR cmd %p, srr_ui %x\n",
-	    cmd, srr_ui);
-
-	switch (srr_ui) {
-	case SRR_IU_STATUS:
-		spin_lock_irqsave(&ha->hardware_lock, flags);
-		qlt_send_notify_ack(vha, ntfy,
-		    0, 0, 0, NOTIFY_ACK_SRR_FLAGS_ACCEPT, 0, 0);
-		spin_unlock_irqrestore(&ha->hardware_lock, flags);
-		xmit_type = QLA_TGT_XMIT_STATUS;
-		resp = 1;
-		break;
-	case SRR_IU_DATA_IN:
-		if (!cmd->sg || !cmd->sg_cnt) {
-			ql_dbg(ql_dbg_tgt_mgt, vha, 0xf063,
-			    "Unable to process SRR_IU_DATA_IN due to"
-			    " missing cmd->sg, state: %d\n", cmd->state);
-			dump_stack();
-			goto out_reject;
-		}
-		if (se_cmd->scsi_status != 0) {
-			ql_dbg(ql_dbg_tgt, vha, 0xe02a,
-			    "Rejecting SRR_IU_DATA_IN with non GOOD "
-			    "scsi_status\n");
-			goto out_reject;
-		}
-		cmd->bufflen = se_cmd->data_length;
-
-		if (qlt_has_data(cmd)) {
-			if (qlt_srr_adjust_data(cmd, offset, &xmit_type) != 0)
-				goto out_reject;
-			spin_lock_irqsave(&ha->hardware_lock, flags);
-			qlt_send_notify_ack(vha, ntfy,
-			    0, 0, 0, NOTIFY_ACK_SRR_FLAGS_ACCEPT, 0, 0);
-			spin_unlock_irqrestore(&ha->hardware_lock, flags);
-			resp = 1;
-		} else {
-			ql_dbg(ql_dbg_tgt_mgt, vha, 0xf064,
-			       "qla_target(%d): SRR for in data for cmd without them (tag %lld, SCSI status %d), reject",
-			       vha->vp_idx, se_cmd->tag,
-			    cmd->se_cmd.scsi_status);
-			goto out_reject;
-		}
-		break;
-	case SRR_IU_DATA_OUT:
-		if (!cmd->sg || !cmd->sg_cnt) {
-			ql_dbg(ql_dbg_tgt_mgt, vha, 0xf065,
-			    "Unable to process SRR_IU_DATA_OUT due to"
-			    " missing cmd->sg\n");
-			dump_stack();
-			goto out_reject;
-		}
-		if (se_cmd->scsi_status != 0) {
-			ql_dbg(ql_dbg_tgt, vha, 0xe02b,
-			    "Rejecting SRR_IU_DATA_OUT"
-			    " with non GOOD scsi_status\n");
-			goto out_reject;
-		}
-		cmd->bufflen = se_cmd->data_length;
-
-		if (qlt_has_data(cmd)) {
-			if (qlt_srr_adjust_data(cmd, offset, &xmit_type) != 0)
-				goto out_reject;
-			spin_lock_irqsave(&ha->hardware_lock, flags);
-			qlt_send_notify_ack(vha, ntfy,
-			    0, 0, 0, NOTIFY_ACK_SRR_FLAGS_ACCEPT, 0, 0);
-			spin_unlock_irqrestore(&ha->hardware_lock, flags);
-			if (xmit_type & QLA_TGT_XMIT_DATA) {
-				cmd->trc_flags |= TRC_SRR_XRDY;
-				qlt_rdy_to_xfer(cmd);
-			}
-		} else {
-			ql_dbg(ql_dbg_tgt_mgt, vha, 0xf066,
-			    "qla_target(%d): SRR for out data for cmd without them (tag %lld, SCSI status %d), reject",
-			       vha->vp_idx, se_cmd->tag, cmd->se_cmd.scsi_status);
-			goto out_reject;
-		}
-		break;
-	default:
-		ql_dbg(ql_dbg_tgt_mgt, vha, 0xf067,
-		    "qla_target(%d): Unknown srr_ui value %x",
-		    vha->vp_idx, srr_ui);
-		goto out_reject;
-	}
-
-	/* Transmit response in case of status and data-in cases */
-	if (resp) {
-		cmd->trc_flags |= TRC_SRR_RSP;
-		qlt_xmit_response(cmd, xmit_type, se_cmd->scsi_status);
-	}
-
-	return;
-
-out_reject:
-	spin_lock_irqsave(&ha->hardware_lock, flags);
-	qlt_send_notify_ack(vha, ntfy, 0, 0, 0,
-	    NOTIFY_ACK_SRR_FLAGS_REJECT,
-	    NOTIFY_ACK_SRR_REJECT_REASON_UNABLE_TO_PERFORM,
-	    NOTIFY_ACK_SRR_FLAGS_REJECT_EXPL_NO_EXPL);
-	if (cmd->state == QLA_TGT_STATE_NEED_DATA) {
-		cmd->state = QLA_TGT_STATE_DATA_IN;
-		dump_stack();
-	} else {
-		cmd->trc_flags |= TRC_SRR_TERM;
-		qlt_send_term_exchange(vha, cmd, &cmd->atio, 1, 0);
-	}
-	spin_unlock_irqrestore(&ha->hardware_lock, flags);
-}
-
-static void qlt_reject_free_srr_imm(struct scsi_qla_host *vha,
-	struct qla_tgt_srr_imm *imm, int ha_locked)
-{
-	struct qla_hw_data *ha = vha->hw;
-	unsigned long flags = 0;
-
-#ifndef __CHECKER__
-	if (!ha_locked)
-		spin_lock_irqsave(&ha->hardware_lock, flags);
-#endif
-
-	qlt_send_notify_ack(vha, (void *)&imm->imm_ntfy, 0, 0, 0,
-	    NOTIFY_ACK_SRR_FLAGS_REJECT,
-	    NOTIFY_ACK_SRR_REJECT_REASON_UNABLE_TO_PERFORM,
-	    NOTIFY_ACK_SRR_FLAGS_REJECT_EXPL_NO_EXPL);
-
-#ifndef __CHECKER__
-	if (!ha_locked)
-		spin_unlock_irqrestore(&ha->hardware_lock, flags);
-#endif
-
-	kfree(imm);
-}
-
-static void qlt_handle_srr_work(struct work_struct *work)
-{
-	struct qla_tgt *tgt = container_of(work, struct qla_tgt, srr_work);
-	struct scsi_qla_host *vha = tgt->vha;
-	struct qla_tgt_srr_ctio *sctio;
-	unsigned long flags;
-
-	ql_dbg(ql_dbg_tgt_mgt, vha, 0xf029, "Entering SRR work (tgt %p)\n",
-	    tgt);
-
-restart:
-	spin_lock_irqsave(&tgt->srr_lock, flags);
-	list_for_each_entry(sctio, &tgt->srr_ctio_list, srr_list_entry) {
-		struct qla_tgt_srr_imm *imm, *i, *ti;
-		struct qla_tgt_cmd *cmd;
-		struct se_cmd *se_cmd;
-
-		imm = NULL;
-		list_for_each_entry_safe(i, ti, &tgt->srr_imm_list,
-						srr_list_entry) {
-			if (i->srr_id == sctio->srr_id) {
-				list_del(&i->srr_list_entry);
-				if (imm) {
-					ql_dbg(ql_dbg_tgt_mgt, vha, 0xf068,
-					  "qla_target(%d): There must be "
-					  "only one IMM SRR per CTIO SRR "
-					  "(IMM SRR %p, id %d, CTIO %p\n",
-					  vha->vp_idx, i, i->srr_id, sctio);
-					qlt_reject_free_srr_imm(tgt->vha, i, 0);
-				} else
-					imm = i;
-			}
-		}
-
-		ql_dbg(ql_dbg_tgt_mgt, vha, 0xf02a,
-		    "IMM SRR %p, CTIO SRR %p (id %d)\n", imm, sctio,
-		    sctio->srr_id);
-
-		if (imm == NULL) {
-			ql_dbg(ql_dbg_tgt_mgt, vha, 0xf02b,
-			    "Not found matching IMM for SRR CTIO (id %d)\n",
-			    sctio->srr_id);
-			continue;
-		} else
-			list_del(&sctio->srr_list_entry);
-
-		spin_unlock_irqrestore(&tgt->srr_lock, flags);
-
-		cmd = sctio->cmd;
-		/*
-		 * Reset qla_tgt_cmd SRR values and SGL pointer+count to follow
-		 * tcm_qla2xxx_write_pending() and tcm_qla2xxx_queue_data_in()
-		 * logic..
-		 */
-		cmd->offset = 0;
-		if (cmd->free_sg) {
-			kfree(cmd->sg);
-			cmd->sg = NULL;
-			cmd->free_sg = 0;
-		}
-		se_cmd = &cmd->se_cmd;
-
-		cmd->sg_cnt = se_cmd->t_data_nents;
-		cmd->sg = se_cmd->t_data_sg;
-
-		ql_dbg(ql_dbg_tgt_mgt, vha, 0xf02c,
-		       "SRR cmd %p (se_cmd %p, tag %lld, op %x), sg_cnt=%d, offset=%d",
-		       cmd, &cmd->se_cmd, se_cmd->tag, se_cmd->t_task_cdb ?
-		       se_cmd->t_task_cdb[0] : 0, cmd->sg_cnt, cmd->offset);
-
-		qlt_handle_srr(vha, sctio, imm);
-
-		kfree(imm);
-		kfree(sctio);
-		goto restart;
-	}
-	spin_unlock_irqrestore(&tgt->srr_lock, flags);
-}
-
-/* ha->hardware_lock supposed to be held on entry */
-static void qlt_prepare_srr_imm(struct scsi_qla_host *vha,
-	struct imm_ntfy_from_isp *iocb)
-{
-	struct qla_tgt_srr_imm *imm;
-	struct qla_tgt *tgt = vha->vha_tgt.qla_tgt;
-	struct qla_tgt_srr_ctio *sctio;
-
-	tgt->imm_srr_id++;
-
-	ql_log(ql_log_warn, vha, 0xf02d, "qla_target(%d): SRR received\n",
-	    vha->vp_idx);
-
-	imm = kzalloc(sizeof(*imm), GFP_ATOMIC);
-	if (imm != NULL) {
-		memcpy(&imm->imm_ntfy, iocb, sizeof(imm->imm_ntfy));
-
-		/* IRQ is already OFF */
-		spin_lock(&tgt->srr_lock);
-		imm->srr_id = tgt->imm_srr_id;
-		list_add_tail(&imm->srr_list_entry,
-		    &tgt->srr_imm_list);
-		ql_dbg(ql_dbg_tgt_mgt, vha, 0xf02e,
-		    "IMM NTFY SRR %p added (id %d, ui %x)\n",
-		    imm, imm->srr_id, iocb->u.isp24.srr_ui);
-		if (tgt->imm_srr_id == tgt->ctio_srr_id) {
-			int found = 0;
-			list_for_each_entry(sctio, &tgt->srr_ctio_list,
-			    srr_list_entry) {
-				if (sctio->srr_id == imm->srr_id) {
-					found = 1;
-					break;
-				}
-			}
-			if (found) {
-				ql_dbg(ql_dbg_tgt_mgt, vha, 0xf02f, "%s",
-				    "Scheduling srr work\n");
-				schedule_work(&tgt->srr_work);
-			} else {
-				ql_dbg(ql_dbg_tgt_mgt, vha, 0xf030,
-				    "qla_target(%d): imm_srr_id "
-				    "== ctio_srr_id (%d), but there is no "
-				    "corresponding SRR CTIO, deleting IMM "
-				    "SRR %p\n", vha->vp_idx, tgt->ctio_srr_id,
-				    imm);
-				list_del(&imm->srr_list_entry);
-
-				kfree(imm);
-
-				spin_unlock(&tgt->srr_lock);
-				goto out_reject;
-			}
-		}
-		spin_unlock(&tgt->srr_lock);
-	} else {
-		struct qla_tgt_srr_ctio *ts;
-
-		ql_dbg(ql_dbg_tgt_mgt, vha, 0xf069,
-		    "qla_target(%d): Unable to allocate SRR IMM "
-		    "entry, SRR request will be rejected\n", vha->vp_idx);
-
-		/* IRQ is already OFF */
-		spin_lock(&tgt->srr_lock);
-		list_for_each_entry_safe(sctio, ts, &tgt->srr_ctio_list,
-		    srr_list_entry) {
-			if (sctio->srr_id == tgt->imm_srr_id) {
-				ql_dbg(ql_dbg_tgt_mgt, vha, 0xf031,
-				    "CTIO SRR %p deleted (id %d)\n",
-				    sctio, sctio->srr_id);
-				list_del(&sctio->srr_list_entry);
-				qlt_send_term_exchange(vha, sctio->cmd,
-				    &sctio->cmd->atio, 1, 0);
-				kfree(sctio);
-			}
-		}
-		spin_unlock(&tgt->srr_lock);
-		goto out_reject;
-	}
-
-	return;
-
-out_reject:
-	qlt_send_notify_ack(vha, iocb, 0, 0, 0,
-	    NOTIFY_ACK_SRR_FLAGS_REJECT,
-	    NOTIFY_ACK_SRR_REJECT_REASON_UNABLE_TO_PERFORM,
-	    NOTIFY_ACK_SRR_FLAGS_REJECT_EXPL_NO_EXPL);
-}
-
 /*
  * ha->hardware_lock supposed to be held on entry. Might drop it, then reaquire
  */
@@ -5065,12 +4434,6 @@ static void qlt_handle_imm_notify(struct scsi_qla_host *vha,
 		if (qlt_24xx_handle_els(vha, iocb) == 0)
 			send_notify_ack = 0;
 		break;
-
-	case IMM_NTFY_SRR:
-		qlt_prepare_srr_imm(vha, iocb);
-		send_notify_ack = 0;
-		break;
-
 	default:
 		ql_dbg(ql_dbg_tgt_mgt, vha, 0xf06d,
 		    "qla_target(%d): Received unknown immediate "
@@ -6021,10 +5384,6 @@ int qlt_add_target(struct qla_hw_data *ha, struct scsi_qla_host *base_vha)
 	spin_lock_init(&tgt->sess_work_lock);
 	INIT_WORK(&tgt->sess_work, qlt_sess_work_fn);
 	INIT_LIST_HEAD(&tgt->sess_works_list);
-	spin_lock_init(&tgt->srr_lock);
-	INIT_LIST_HEAD(&tgt->srr_ctio_list);
-	INIT_LIST_HEAD(&tgt->srr_imm_list);
-	INIT_WORK(&tgt->srr_work, qlt_handle_srr_work);
 	atomic_set(&tgt->tgt_global_resets_count, 0);
 
 	base_vha->vha_tgt.qla_tgt = tgt;
diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h
index 5d00529b68c4e7..f72bd0755a598a 100644
--- a/drivers/scsi/qla2xxx/qla_target.h
+++ b/drivers/scsi/qla2xxx/qla_target.h
@@ -890,16 +890,7 @@ struct qla_tgt {
 	int notify_ack_expected;
 	int abts_resp_expected;
 	int modify_lun_expected;
-
-	int ctio_srr_id;
-	int imm_srr_id;
-	spinlock_t srr_lock;
-	struct list_head srr_ctio_list;
-	struct list_head srr_imm_list;
-	struct work_struct srr_work;
-
 	atomic_t tgt_global_resets_count;
-
 	struct list_head tgt_list_entry;
 };
 
@@ -1087,18 +1078,6 @@ struct qla_tgt_prm {
 	uint16_t tot_dsds;
 };
 
-struct qla_tgt_srr_imm {
-	struct list_head srr_list_entry;
-	int srr_id;
-	struct imm_ntfy_from_isp imm_ntfy;
-};
-
-struct qla_tgt_srr_ctio {
-	struct list_head srr_list_entry;
-	int srr_id;
-	struct qla_tgt_cmd *cmd;
-};
-
 /* Check for Switch reserved address */
 #define IS_SW_RESV_ADDR(_s_id) \
 	((_s_id.b.domain == 0xff) && (_s_id.b.area == 0xfc))

From 5e4deaf6a47f397af68d3297bdc2913ae890bd4a Mon Sep 17 00:00:00 2001
From: Quinn Tran <quinn.tran@cavium.com>
Date: Thu, 19 Jan 2017 22:27:57 -0800
Subject: [PATCH 0064/1911] qla2xxx: Fix wrong argument in sp done callback

Callback for sp->done expects scsi_qla_host is passed in as argument,
Instead qla_hw_data is passed in.

Signed-off-by: Quinn Tran <quinn.tran@cavium.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/qla2xxx/qla_isr.c | 8 ++++----
 drivers/scsi/qla2xxx/qla_mr.c  | 6 +++---
 drivers/scsi/qla2xxx/qla_os.c  | 2 +-
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index dc88a09f9043c9..802b50cc89d0ab 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -1212,7 +1212,7 @@ qla2x00_process_completed_request(struct scsi_qla_host *vha,
 		req->outstanding_cmds[index] = NULL;
 
 		/* Save ISP completion status */
-		sp->done(ha, sp, DID_OK << 16);
+		sp->done(vha, sp, DID_OK << 16);
 	} else {
 		ql_log(ql_log_warn, vha, 0x3016, "Invalid SCSI SRB.\n");
 
@@ -2405,7 +2405,7 @@ qla2x00_status_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, void *pkt)
 		    resid_len, fw_resid_len, sp, cp);
 
 	if (rsp->status_srb == NULL)
-		sp->done(ha, sp, res);
+		sp->done(vha, sp, res);
 }
 
 /**
@@ -2462,7 +2462,7 @@ qla2x00_status_cont_entry(struct rsp_que *rsp, sts_cont_entry_t *pkt)
 	/* Place command on done queue. */
 	if (sense_len == 0) {
 		rsp->status_srb = NULL;
-		sp->done(ha, sp, cp->result);
+		sp->done(vha, sp, cp->result);
 	}
 }
 
@@ -2498,7 +2498,7 @@ qla2x00_error_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, sts_entry_t *pkt)
 
 	sp = qla2x00_get_sp_from_handle(vha, func, req, pkt);
 	if (sp) {
-		sp->done(ha, sp, res);
+		sp->done(vha, sp, res);
 		return;
 	}
 fatal:
diff --git a/drivers/scsi/qla2xxx/qla_mr.c b/drivers/scsi/qla2xxx/qla_mr.c
index 02f1de18bc2b61..d38bc6452abd4c 100644
--- a/drivers/scsi/qla2xxx/qla_mr.c
+++ b/drivers/scsi/qla2xxx/qla_mr.c
@@ -2537,7 +2537,7 @@ qlafx00_status_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, void *pkt)
 		    par_sense_len, rsp_info_len);
 
 	if (rsp->status_srb == NULL)
-		sp->done(ha, sp, res);
+		sp->done(vha, sp, res);
 }
 
 /**
@@ -2614,7 +2614,7 @@ qlafx00_status_cont_entry(struct rsp_que *rsp, sts_cont_entry_t *pkt)
 	/* Place command on done queue. */
 	if (sense_len == 0) {
 		rsp->status_srb = NULL;
-		sp->done(ha, sp, cp->result);
+		sp->done(vha, sp, cp->result);
 	}
 }
 
@@ -2695,7 +2695,7 @@ qlafx00_error_entry(scsi_qla_host_t *vha, struct rsp_que *rsp,
 
 	sp = qla2x00_get_sp_from_handle(vha, func, req, pkt);
 	if (sp) {
-		sp->done(ha, sp, res);
+		sp->done(vha, sp, res);
 		return;
 	}
 
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 0a000ecf088141..62ca1ddb0cc717 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -1203,7 +1203,7 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd)
 	}
 
 	spin_lock_irqsave(&ha->hardware_lock, flags);
-	sp->done(ha, sp, 0);
+	sp->done(vha, sp, 0);
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
 
 	/* Did the command return during mailbox execution? */

From 37cacc0a13012eee9fa0b07563eb149ec9244b1e Mon Sep 17 00:00:00 2001
From: Quinn Tran <quinn.tran@cavium.com>
Date: Thu, 19 Jan 2017 22:27:58 -0800
Subject: [PATCH 0065/1911] qla2xxx: Use d_id instead of s_id for more clarity

Updated code with d_id from s_id for better readability and
clarity.

Signed-off-by: Quinn Tran <quinn.tran@cavium.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
[ bvanassche: fixed spelling of patch description ]
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>

Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/qla2xxx/qla_dfs.c     |  9 +++----
 drivers/scsi/qla2xxx/qla_target.c  | 24 ++++++++---------
 drivers/scsi/qla2xxx/qla_target.h  |  2 +-
 drivers/scsi/qla2xxx/tcm_qla2xxx.c | 41 ++++++++++++++++--------------
 4 files changed, 39 insertions(+), 37 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_dfs.c b/drivers/scsi/qla2xxx/qla_dfs.c
index 34272fde8a5b0d..98550c9b61619e 100644
--- a/drivers/scsi/qla2xxx/qla_dfs.c
+++ b/drivers/scsi/qla2xxx/qla_dfs.c
@@ -26,12 +26,11 @@ qla2x00_dfs_tgt_sess_show(struct seq_file *s, void *unused)
 		seq_printf(s, "Port ID   Port Name                Handle\n");
 
 		spin_lock_irqsave(&ha->tgt.sess_lock, flags);
-		list_for_each_entry(sess, &tgt->sess_list, sess_list_entry) {
+		list_for_each_entry(sess, &tgt->sess_list, sess_list_entry)
 			seq_printf(s, "%02x:%02x:%02x  %8phC  %d\n",
-					   sess->s_id.b.domain,sess->s_id.b.area,
-					   sess->s_id.b.al_pa,	sess->port_name,
-					   sess->loop_id);
-		}
+			    sess->d_id.b.domain, sess->d_id.b.area,
+			    sess->d_id.b.al_pa, sess->port_name,
+			    sess->loop_id);
 		spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
 	}
 
diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index 119a445a82f6ba..671875a5095146 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -530,7 +530,7 @@ static void qlt_free_session_done(struct work_struct *work)
 		"%s: se_sess %p / sess %p from port %8phC loop_id %#04x"
 		" s_id %02x:%02x:%02x logout %d keep %d els_logo %d\n",
 		__func__, sess->se_sess, sess, sess->port_name, sess->loop_id,
-		sess->s_id.b.domain, sess->s_id.b.area, sess->s_id.b.al_pa,
+		sess->d_id.b.domain, sess->d_id.b.area, sess->d_id.b.al_pa,
 		sess->logout_on_delete, sess->keep_nport_handle,
 		sess->send_els_logo);
 
@@ -538,7 +538,7 @@ static void qlt_free_session_done(struct work_struct *work)
 
 	if (sess->send_els_logo) {
 		qlt_port_logo_t logo;
-		logo.id = sess->s_id;
+		logo.id = sess->d_id;
 		logo.cmd_count = 0;
 		qlt_send_first_logo(vha, &logo);
 	}
@@ -548,7 +548,7 @@ static void qlt_free_session_done(struct work_struct *work)
 
 		memset(&fcport, 0, sizeof(fcport));
 		fcport.loop_id = sess->loop_id;
-		fcport.d_id = sess->s_id;
+		fcport.d_id = sess->d_id;
 		memcpy(fcport.port_name, sess->port_name, WWN_SIZE);
 		fcport.vha = vha;
 		fcport.tgt_session = sess;
@@ -757,7 +757,7 @@ static void qlt_schedule_sess_for_deletion(struct qla_tgt_sess *sess,
 	    "qla_target(%d): session for port %8phC (loop ID %d s_id %02x:%02x:%02x)"
 	    " scheduled for deletion in %u secs (expires: %lu) immed: %d, logout: %d, gen: %#x\n",
 	    sess->vha->vp_idx, sess->port_name, sess->loop_id,
-	    sess->s_id.b.domain, sess->s_id.b.area, sess->s_id.b.al_pa,
+	    sess->d_id.b.domain, sess->d_id.b.area, sess->d_id.b.al_pa,
 	    dev_loss_tmo, sess->expires, immediate, sess->logout_on_delete,
 	    sess->generation);
 
@@ -892,8 +892,8 @@ static struct qla_tgt_sess *qlt_create_sess(
 			ql_dbg(ql_dbg_tgt_mgt, vha, 0xf005,
 			    "Double sess %p found (s_id %x:%x:%x, "
 			    "loop_id %d), updating to d_id %x:%x:%x, "
-			    "loop_id %d", sess, sess->s_id.b.domain,
-			    sess->s_id.b.al_pa, sess->s_id.b.area,
+			    "loop_id %d", sess, sess->d_id.b.domain,
+			    sess->d_id.b.al_pa, sess->d_id.b.area,
 			    sess->loop_id, fcport->d_id.b.domain,
 			    fcport->d_id.b.al_pa, fcport->d_id.b.area,
 			    fcport->loop_id);
@@ -943,7 +943,7 @@ static struct qla_tgt_sess *qlt_create_sess(
 	}
 	sess->tgt = vha->vha_tgt.qla_tgt;
 	sess->vha = vha;
-	sess->s_id = fcport->d_id;
+	sess->d_id = fcport->d_id;
 	sess->loop_id = fcport->loop_id;
 	sess->local = local;
 	kref_init(&sess->sess_kref);
@@ -974,8 +974,8 @@ static struct qla_tgt_sess *qlt_create_sess(
 	    "qla_target(%d): %ssession for wwn %8phC (loop_id %d, "
 	    "s_id %x:%x:%x, confirmed completion %ssupported) added\n",
 	    vha->vp_idx, local ?  "local " : "", fcport->port_name,
-	    fcport->loop_id, sess->s_id.b.domain, sess->s_id.b.area,
-	    sess->s_id.b.al_pa, sess->conf_compl_supported ?  "" : "not ");
+	    fcport->loop_id, sess->d_id.b.domain, sess->d_id.b.area,
+	    sess->d_id.b.al_pa, sess->conf_compl_supported ?  "" : "not ");
 
 	/*
 	 * Determine if this fc_port->port_name is allowed to access
@@ -4055,7 +4055,7 @@ qlt_find_sess_invalidate_other(struct qla_tgt *tgt, uint64_t wwn,
 		}
 
 		/* find other sess with nport_id collision */
-		if (port_id.b24 == other_sess->s_id.b24) {
+		if (port_id.b24 == other_sess->d_id.b24) {
 			if (loop_id != other_sess->loop_id) {
 				ql_dbg(ql_dbg_tgt_tmr, tgt->vha, 0x1000c,
 				    "Invalidating sess %p loop_id %d wwn %llx.\n",
@@ -4216,7 +4216,7 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha,
 		  *    PLOGI and situation will correct itself.
 		  */
 		sess->keep_nport_handle = ((sess->loop_id == loop_id) &&
-					   (sess->s_id.b24 == port_id.b24));
+					   (sess->d_id.b24 == port_id.b24));
 		qlt_schedule_sess_for_deletion(sess, true);
 		break;
 
@@ -4264,7 +4264,7 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha,
 
 			sess->local = 0;
 			sess->loop_id = loop_id;
-			sess->s_id = port_id;
+			sess->d_id = port_id;
 
 			if (wd3_lo & BIT_7)
 				sess->conf_compl_supported = 1;
diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h
index f72bd0755a598a..07ccf81f6d22d2 100644
--- a/drivers/scsi/qla2xxx/qla_target.h
+++ b/drivers/scsi/qla2xxx/qla_target.h
@@ -928,7 +928,7 @@ typedef struct {
  */
 struct qla_tgt_sess {
 	uint16_t loop_id;
-	port_id_t s_id;
+	port_id_t d_id;
 
 	unsigned int conf_compl_supported:1;
 	unsigned int deleted:2;
diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
index af4a198e553738..c96de1cfbd8508 100644
--- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c
+++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
@@ -1388,9 +1388,9 @@ static void tcm_qla2xxx_clear_sess_lookup(struct tcm_qla2xxx_lport *lport,
 	struct se_session *se_sess = sess->se_sess;
 	unsigned char be_sid[3];
 
-	be_sid[0] = sess->s_id.b.domain;
-	be_sid[1] = sess->s_id.b.area;
-	be_sid[2] = sess->s_id.b.al_pa;
+	be_sid[0] = sess->d_id.b.domain;
+	be_sid[1] = sess->d_id.b.area;
+	be_sid[2] = sess->d_id.b.al_pa;
 
 	tcm_qla2xxx_set_sess_by_s_id(lport, NULL, nacl, se_sess,
 				sess, be_sid);
@@ -1442,9 +1442,9 @@ static int tcm_qla2xxx_session_cb(struct se_portal_group *se_tpg,
 	unsigned long flags;
 	unsigned char be_sid[3];
 
-	be_sid[0] = qlat_sess->s_id.b.domain;
-	be_sid[1] = qlat_sess->s_id.b.area;
-	be_sid[2] = qlat_sess->s_id.b.al_pa;
+	be_sid[0] = qlat_sess->d_id.b.domain;
+	be_sid[1] = qlat_sess->d_id.b.area;
+	be_sid[2] = qlat_sess->d_id.b.al_pa;
 
 	/*
 	 * And now setup se_nacl and session pointers into HW lport internal
@@ -1524,11 +1524,11 @@ static void tcm_qla2xxx_update_sess(struct qla_tgt_sess *sess, port_id_t s_id,
 	u32 key;
 
 
-	if (sess->loop_id != loop_id || sess->s_id.b24 != s_id.b24)
+	if (sess->loop_id != loop_id || sess->d_id.b24 != s_id.b24)
 		pr_info("Updating session %p from port %8phC loop_id %d -> %d s_id %x:%x:%x -> %x:%x:%x\n",
 		    sess, sess->port_name,
-		    sess->loop_id, loop_id, sess->s_id.b.domain,
-		    sess->s_id.b.area, sess->s_id.b.al_pa, s_id.b.domain,
+		    sess->loop_id, loop_id, sess->d_id.b.domain,
+		    sess->d_id.b.area, sess->d_id.b.al_pa, s_id.b.domain,
 		    s_id.b.area, s_id.b.al_pa);
 
 	if (sess->loop_id != loop_id) {
@@ -1548,18 +1548,20 @@ static void tcm_qla2xxx_update_sess(struct qla_tgt_sess *sess, port_id_t s_id,
 		sess->loop_id = loop_id;
 	}
 
-	if (sess->s_id.b24 != s_id.b24) {
-		key = (((u32) sess->s_id.b.domain << 16) |
-		       ((u32) sess->s_id.b.area   <<  8) |
-		       ((u32) sess->s_id.b.al_pa));
+	if (sess->d_id.b24 != s_id.b24) {
+		key = (((u32) sess->d_id.b.domain << 16) |
+		       ((u32) sess->d_id.b.area   <<  8) |
+		       ((u32) sess->d_id.b.al_pa));
 
 		if (btree_lookup32(&lport->lport_fcport_map, key))
-			WARN(btree_remove32(&lport->lport_fcport_map, key) != se_nacl,
-			     "Found wrong se_nacl when updating s_id %x:%x:%x\n",
-			     sess->s_id.b.domain, sess->s_id.b.area, sess->s_id.b.al_pa);
+			WARN(btree_remove32(&lport->lport_fcport_map, key) !=
+			    se_nacl, "Found wrong se_nacl when updating s_id %x:%x:%x\n",
+			    sess->d_id.b.domain, sess->d_id.b.area,
+			    sess->d_id.b.al_pa);
 		else
 			WARN(1, "No lport_fcport_map entry for s_id %x:%x:%x\n",
-			     sess->s_id.b.domain, sess->s_id.b.area, sess->s_id.b.al_pa);
+			     sess->d_id.b.domain, sess->d_id.b.area,
+			     sess->d_id.b.al_pa);
 
 		key = (((u32) s_id.b.domain << 16) |
 		       ((u32) s_id.b.area   <<  8) |
@@ -1570,10 +1572,11 @@ static void tcm_qla2xxx_update_sess(struct qla_tgt_sess *sess, port_id_t s_id,
 			     s_id.b.domain, s_id.b.area, s_id.b.al_pa);
 			btree_update32(&lport->lport_fcport_map, key, se_nacl);
 		} else {
-			btree_insert32(&lport->lport_fcport_map, key, se_nacl, GFP_ATOMIC);
+			btree_insert32(&lport->lport_fcport_map, key, se_nacl,
+			    GFP_ATOMIC);
 		}
 
-		sess->s_id = s_id;
+		sess->d_id = s_id;
 		nacl->nport_id = key;
 	}
 

From 5d964837c6a743193c63c8912f98834c7457ba5c Mon Sep 17 00:00:00 2001
From: Quinn Tran <quinn.tran@cavium.com>
Date: Thu, 19 Jan 2017 22:27:59 -0800
Subject: [PATCH 0066/1911] qla2xxx: Track I-T nexus as single fc_port struct

Current code merges qla_tgt_sess and fc_port structure
into single fc_port structure representing same I-T nexus.

Signed-off-by: Quinn Tran <quinn.tran@cavium.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
[ bvanassche: fixed spelling of patch description ]
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>

Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/qla2xxx/qla_def.h     | 112 +++++++++-
 drivers/scsi/qla2xxx/qla_dfs.c     |   4 +-
 drivers/scsi/qla2xxx/qla_init.c    |  12 --
 drivers/scsi/qla2xxx/qla_iocb.c    |   2 +-
 drivers/scsi/qla2xxx/qla_target.c  | 326 ++++++++++-------------------
 drivers/scsi/qla2xxx/qla_target.h  | 150 ++-----------
 drivers/scsi/qla2xxx/tcm_qla2xxx.c | 130 +++++++-----
 drivers/scsi/qla2xxx/tcm_qla2xxx.h |   4 +-
 8 files changed, 310 insertions(+), 430 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index 4aae861a671331..1bdcc7e010ff5e 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -1976,6 +1976,84 @@ struct mbx_entry {
 	uint8_t port_name[WWN_SIZE];
 };
 
+#ifndef IMMED_NOTIFY_TYPE
+#define IMMED_NOTIFY_TYPE 0x0D		/* Immediate notify entry. */
+/*
+ * ISP queue -	immediate notify entry structure definition.
+ *		This is sent by the ISP to the Target driver.
+ *		This IOCB would have report of events sent by the
+ *		initiator, that needs to be handled by the target
+ *		driver immediately.
+ */
+struct imm_ntfy_from_isp {
+	uint8_t	 entry_type;		    /* Entry type. */
+	uint8_t	 entry_count;		    /* Entry count. */
+	uint8_t	 sys_define;		    /* System defined. */
+	uint8_t	 entry_status;		    /* Entry Status. */
+	union {
+		struct {
+			uint32_t sys_define_2; /* System defined. */
+			target_id_t target;
+			uint16_t lun;
+			uint8_t  target_id;
+			uint8_t  reserved_1;
+			uint16_t status_modifier;
+			uint16_t status;
+			uint16_t task_flags;
+			uint16_t seq_id;
+			uint16_t srr_rx_id;
+			uint32_t srr_rel_offs;
+			uint16_t srr_ui;
+#define SRR_IU_DATA_IN	0x1
+#define SRR_IU_DATA_OUT	0x5
+#define SRR_IU_STATUS	0x7
+			uint16_t srr_ox_id;
+			uint8_t reserved_2[28];
+		} isp2x;
+		struct {
+			uint32_t reserved;
+			uint16_t nport_handle;
+			uint16_t reserved_2;
+			uint16_t flags;
+#define NOTIFY24XX_FLAGS_GLOBAL_TPRLO   BIT_1
+#define NOTIFY24XX_FLAGS_PUREX_IOCB     BIT_0
+			uint16_t srr_rx_id;
+			uint16_t status;
+			uint8_t  status_subcode;
+			uint8_t  fw_handle;
+			uint32_t exchange_address;
+			uint32_t srr_rel_offs;
+			uint16_t srr_ui;
+			uint16_t srr_ox_id;
+			union {
+				struct {
+					uint8_t node_name[8];
+				} plogi; /* PLOGI/ADISC/PDISC */
+				struct {
+					/* PRLI word 3 bit 0-15 */
+					uint16_t wd3_lo;
+					uint8_t resv0[6];
+				} prli;
+				struct {
+					uint8_t port_id[3];
+					uint8_t resv1;
+					uint16_t nport_handle;
+					uint16_t resv2;
+				} req_els;
+			} u;
+			uint8_t port_name[8];
+			uint8_t resv3[3];
+			uint8_t  vp_index;
+			uint32_t reserved_5;
+			uint8_t  port_id[3];
+			uint8_t  reserved_6;
+		} isp24;
+	} u;
+	uint16_t reserved_7;
+	uint16_t ox_id;
+} __packed;
+#endif
+
 /*
  * ISP request and response queue entry sizes
  */
@@ -2026,7 +2104,7 @@ typedef struct {
 /*
  * Fibre channel port type.
  */
- typedef enum {
+typedef enum {
 	FCT_UNKNOWN,
 	FCT_RSCN,
 	FCT_SWITCH,
@@ -2035,6 +2113,19 @@ typedef struct {
 	FCT_TARGET
 } fc_port_type_t;
 
+enum qlt_plogi_link_t {
+	QLT_PLOGI_LINK_SAME_WWN,
+	QLT_PLOGI_LINK_CONFLICT,
+	QLT_PLOGI_LINK_MAX
+};
+
+struct qlt_plogi_ack_t {
+	struct list_head	list;
+	struct imm_ntfy_from_isp iocb;
+	port_id_t	id;
+	int		ref_count;
+};
+
 /*
  * Fibre channel port structure.
  */
@@ -2048,6 +2139,25 @@ typedef struct fc_port {
 	uint16_t loop_id;
 	uint16_t old_loop_id;
 
+	unsigned int conf_compl_supported:1;
+	unsigned int deleted:2;
+	unsigned int local:1;
+	unsigned int logout_on_delete:1;
+	unsigned int keep_nport_handle:1;
+	unsigned int send_els_logo:1;
+
+	unsigned char logout_completed;
+	int generation;
+
+	struct se_session *se_sess;
+	struct kref sess_kref;
+	struct qla_tgt *tgt;
+	unsigned long expires;
+	struct list_head del_list_entry;
+	struct work_struct free_work;
+
+	struct qlt_plogi_ack_t *plogi_link[QLT_PLOGI_LINK_MAX];
+
 	uint16_t tgt_id;
 	uint16_t old_tgt_id;
 
diff --git a/drivers/scsi/qla2xxx/qla_dfs.c b/drivers/scsi/qla2xxx/qla_dfs.c
index 98550c9b61619e..b48cce696bac77 100644
--- a/drivers/scsi/qla2xxx/qla_dfs.c
+++ b/drivers/scsi/qla2xxx/qla_dfs.c
@@ -18,7 +18,7 @@ qla2x00_dfs_tgt_sess_show(struct seq_file *s, void *unused)
 	scsi_qla_host_t *vha = s->private;
 	struct qla_hw_data *ha = vha->hw;
 	unsigned long flags;
-	struct qla_tgt_sess *sess = NULL;
+	struct fc_port *sess = NULL;
 	struct qla_tgt *tgt= vha->vha_tgt.qla_tgt;
 
 	seq_printf(s, "%s\n",vha->host_str);
@@ -26,7 +26,7 @@ qla2x00_dfs_tgt_sess_show(struct seq_file *s, void *unused)
 		seq_printf(s, "Port ID   Port Name                Handle\n");
 
 		spin_lock_irqsave(&ha->tgt.sess_lock, flags);
-		list_for_each_entry(sess, &tgt->sess_list, sess_list_entry)
+		list_for_each_entry(sess, &vha->vp_fcports, list)
 			seq_printf(s, "%02x:%02x:%02x  %8phC  %d\n",
 			    sess->d_id.b.domain, sess->d_id.b.area,
 			    sess->d_id.b.al_pa, sess->port_name,
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index 7b6317c8c2e93b..5978b79d4a619a 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -3352,12 +3352,6 @@ qla2x00_reg_remote_port(scsi_qla_host_t *vha, fc_port_t *fcport)
 		    "Unable to allocate fc remote port.\n");
 		return;
 	}
-	/*
-	 * Create target mode FC NEXUS in qla_target.c if target mode is
-	 * enabled..
-	 */
-
-	qlt_fc_port_added(vha, fcport);
 
 	spin_lock_irqsave(fcport->vha->host->host_lock, flags);
 	*((fc_port_t **)rport->dd_data) = fcport;
@@ -3407,12 +3401,6 @@ qla2x00_update_fcport(scsi_qla_host_t *vha, fc_port_t *fcport)
 reg_port:
 	if (qla_ini_mode_enabled(vha))
 		qla2x00_reg_remote_port(vha, fcport);
-	else {
-		/*
-		 * Create target mode FC NEXUS in qla_target.c
-		 */
-		qlt_fc_port_added(vha, fcport);
-	}
 }
 
 /*
diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c
index 58e49a3e1de8bc..834e2219784260 100644
--- a/drivers/scsi/qla2xxx/qla_iocb.c
+++ b/drivers/scsi/qla2xxx/qla_iocb.c
@@ -2248,7 +2248,7 @@ qla24xx_logout_iocb(srb_t *sp, struct logio_entry_24xx *logio)
 	logio->control_flags =
 	    cpu_to_le16(LCF_COMMAND_LOGO|LCF_IMPL_LOGO);
 	if (!sp->fcport->tgt_session ||
-	    !sp->fcport->tgt_session->keep_nport_handle)
+	    !sp->fcport->keep_nport_handle)
 		logio->control_flags |= cpu_to_le16(LCF_FREE_NPORT);
 	logio->nport_handle = cpu_to_le16(sp->fcport->loop_id);
 	logio->port_id[0] = sp->fcport->d_id.b.al_pa;
diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index 671875a5095146..40b61a327786dc 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -102,7 +102,7 @@ enum fcp_resp_rsp_codes {
 static void qlt_24xx_atio_pkt(struct scsi_qla_host *ha,
 	struct atio_from_isp *pkt, uint8_t);
 static void qlt_response_pkt(struct scsi_qla_host *ha, response_t *pkt);
-static int qlt_issue_task_mgmt(struct qla_tgt_sess *sess, uint32_t lun,
+static int qlt_issue_task_mgmt(struct fc_port *sess, u64 lun,
 	int fn, void *iocb, int flags);
 static void qlt_send_term_exchange(struct scsi_qla_host *ha, struct qla_tgt_cmd
 	*cmd, struct atio_from_isp *atio, int ha_locked, int ul_abort);
@@ -138,21 +138,6 @@ void qlt_do_generation_tick(struct scsi_qla_host *vha, int *dest)
 	wmb();
 }
 
-/* ha->hardware_lock supposed to be held on entry (to protect tgt->sess_list) */
-static struct qla_tgt_sess *qlt_find_sess_by_port_name(
-	struct qla_tgt *tgt,
-	const uint8_t *port_name)
-{
-	struct qla_tgt_sess *sess;
-
-	list_for_each_entry(sess, &tgt->sess_list, sess_list_entry) {
-		if (!memcmp(sess->port_name, port_name, WWN_SIZE))
-			return sess;
-	}
-
-	return NULL;
-}
-
 /* Might release hw lock, then reaquire!! */
 static inline int qlt_issue_marker(struct scsi_qla_host *vha, int vha_locked)
 {
@@ -399,16 +384,16 @@ void qlt_response_pkt_all_vps(struct scsi_qla_host *vha, response_t *pkt)
  * guarantees that ref_count is not modified concurrently.
  * Upon successful return content of iocb is undefined
  */
-static qlt_plogi_ack_t *
+static struct qlt_plogi_ack_t *
 qlt_plogi_ack_find_add(struct scsi_qla_host *vha, port_id_t *id,
 		       struct imm_ntfy_from_isp *iocb)
 {
-	qlt_plogi_ack_t *pla;
+	struct qlt_plogi_ack_t *pla;
 
 	list_for_each_entry(pla, &vha->plogi_ack_list, list) {
 		if (pla->id.b24 == id->b24) {
 			qlt_send_term_imm_notif(vha, &pla->iocb, 1);
-			pla->iocb = *iocb;
+			memcpy(&pla->iocb, iocb, sizeof(pla->iocb));
 			return pla;
 		}
 	}
@@ -421,15 +406,17 @@ qlt_plogi_ack_find_add(struct scsi_qla_host *vha, port_id_t *id,
 		return NULL;
 	}
 
-	pla->iocb = *iocb;
+	memcpy(&pla->iocb, iocb, sizeof(pla->iocb));
 	pla->id = *id;
 	list_add_tail(&pla->list, &vha->plogi_ack_list);
 
 	return pla;
 }
 
-static void qlt_plogi_ack_unref(struct scsi_qla_host *vha, qlt_plogi_ack_t *pla)
+static void qlt_plogi_ack_unref(struct scsi_qla_host *vha,
+    struct qlt_plogi_ack_t *pla)
 {
+	struct imm_ntfy_from_isp *iocb = &pla->iocb;
 	BUG_ON(!pla->ref_count);
 	pla->ref_count--;
 
@@ -438,21 +425,22 @@ static void qlt_plogi_ack_unref(struct scsi_qla_host *vha, qlt_plogi_ack_t *pla)
 
 	ql_dbg(ql_dbg_async, vha, 0x5089,
 	    "Sending PLOGI ACK to wwn %8phC s_id %02x:%02x:%02x loop_id %#04x"
-	    " exch %#x ox_id %#x\n", pla->iocb.u.isp24.port_name,
-	    pla->iocb.u.isp24.port_id[2], pla->iocb.u.isp24.port_id[1],
-	    pla->iocb.u.isp24.port_id[0],
-	    le16_to_cpu(pla->iocb.u.isp24.nport_handle),
-	    pla->iocb.u.isp24.exchange_address, pla->iocb.ox_id);
-	qlt_send_notify_ack(vha, &pla->iocb, 0, 0, 0, 0, 0, 0);
+	    " exch %#x ox_id %#x\n", iocb->u.isp24.port_name,
+	    iocb->u.isp24.port_id[2], iocb->u.isp24.port_id[1],
+	    iocb->u.isp24.port_id[0],
+	    le16_to_cpu(iocb->u.isp24.nport_handle),
+	    iocb->u.isp24.exchange_address, iocb->ox_id);
+	qlt_send_notify_ack(vha, iocb, 0, 0, 0, 0, 0, 0);
 
 	list_del(&pla->list);
 	kmem_cache_free(qla_tgt_plogi_cachep, pla);
 }
 
 static void
-qlt_plogi_ack_link(struct scsi_qla_host *vha, qlt_plogi_ack_t *pla,
-    struct qla_tgt_sess *sess, qlt_plogi_link_t link)
+qlt_plogi_ack_link(struct scsi_qla_host *vha, struct qlt_plogi_ack_t *pla,
+    struct fc_port *sess, enum qlt_plogi_link_t link)
 {
+	struct imm_ntfy_from_isp *iocb = &pla->iocb;
 	/* Inc ref_count first because link might already be pointing at pla */
 	pla->ref_count++;
 
@@ -462,8 +450,8 @@ qlt_plogi_ack_link(struct scsi_qla_host *vha, qlt_plogi_ack_t *pla,
 	ql_dbg(ql_dbg_tgt_mgt, vha, 0xf097,
 	    "Linking sess %p [%d] wwn %8phC with PLOGI ACK to wwn %8phC"
 	    " s_id %02x:%02x:%02x, ref=%d\n", sess, link, sess->port_name,
-	    pla->iocb.u.isp24.port_name, pla->iocb.u.isp24.port_id[2],
-	    pla->iocb.u.isp24.port_id[1], pla->iocb.u.isp24.port_id[0],
+	    iocb->u.isp24.port_name, iocb->u.isp24.port_id[2],
+	    iocb->u.isp24.port_id[1], iocb->u.isp24.port_id[0],
 	    pla->ref_count);
 
 	sess->plogi_link[link] = pla;
@@ -517,7 +505,7 @@ qlt_send_first_logo(struct scsi_qla_host *vha, qlt_port_logo_t *logo)
 
 static void qlt_free_session_done(struct work_struct *work)
 {
-	struct qla_tgt_sess *sess = container_of(work, struct qla_tgt_sess,
+	struct fc_port *sess = container_of(work, struct fc_port,
 	    free_work);
 	struct qla_tgt *tgt = sess->tgt;
 	struct scsi_qla_host *vha = sess->vha;
@@ -551,7 +539,6 @@ static void qlt_free_session_done(struct work_struct *work)
 		fcport.d_id = sess->d_id;
 		memcpy(fcport.port_name, sess->port_name, WWN_SIZE);
 		fcport.vha = vha;
-		fcport.tgt_session = sess;
 
 		rc = qla2x00_post_async_logout_work(vha, &fcport, NULL);
 		if (rc != QLA_SUCCESS)
@@ -587,22 +574,22 @@ static void qlt_free_session_done(struct work_struct *work)
 	}
 
 	spin_lock_irqsave(&ha->hardware_lock, flags);
-
 	{
-		qlt_plogi_ack_t *own =
+		struct qlt_plogi_ack_t *own =
 		    sess->plogi_link[QLT_PLOGI_LINK_SAME_WWN];
-		qlt_plogi_ack_t *con =
+		struct qlt_plogi_ack_t *con =
 		    sess->plogi_link[QLT_PLOGI_LINK_CONFLICT];
+		struct imm_ntfy_from_isp *iocb;
 
 		if (con) {
+			iocb = &con->iocb;
 			ql_dbg(ql_dbg_tgt_mgt, vha, 0xf099,
-			    "se_sess %p / sess %p port %8phC is gone,"
-			    " %s (ref=%d), releasing PLOGI for %8phC (ref=%d)\n",
-			    sess->se_sess, sess, sess->port_name,
-			    own ? "releasing own PLOGI" :
-			    "no own PLOGI pending",
-			    own ? own->ref_count : -1,
-			    con->iocb.u.isp24.port_name, con->ref_count);
+				 "se_sess %p / sess %p port %8phC is gone,"
+				 " %s (ref=%d), releasing PLOGI for %8phC (ref=%d)\n",
+				 sess->se_sess, sess, sess->port_name,
+				 own ? "releasing own PLOGI" : "no own PLOGI pending",
+				 own ? own->ref_count : -1,
+				 iocb->u.isp24.port_name, con->ref_count);
 			qlt_plogi_ack_unref(vha, con);
 		} else {
 			ql_dbg(ql_dbg_tgt_mgt, vha, 0xf09a,
@@ -616,15 +603,15 @@ static void qlt_free_session_done(struct work_struct *work)
 		if (own)
 			qlt_plogi_ack_unref(vha, own);
 	}
-
-	list_del(&sess->sess_list_entry);
-
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
 
+	spin_lock_irqsave(&ha->tgt.sess_lock, flags);
+	sess->se_sess = NULL;
+	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
+
 	ql_dbg(ql_dbg_tgt_mgt, vha, 0xf001,
 	    "Unregistration of sess %p finished\n", sess);
 
-	kfree(sess);
 	/*
 	 * We need to protect against race, when tgt is freed before or
 	 * inside wake_up()
@@ -635,37 +622,30 @@ static void qlt_free_session_done(struct work_struct *work)
 }
 
 /* ha->tgt.sess_lock supposed to be held on entry */
-static void qlt_release_session(struct kref *kref)
+void qlt_unreg_sess(struct fc_port *sess)
 {
-	struct qla_tgt_sess *sess =
-		container_of(kref, struct qla_tgt_sess, sess_kref);
 	struct scsi_qla_host *vha = sess->vha;
 
+	ql_dbg(ql_dbg_disc, sess->vha, 0xffff,
+	    "%s sess %p for deletion %8phC\n",
+	    __func__, sess, sess->port_name);
+
 	if (sess->se_sess)
 		vha->hw->tgt.tgt_ops->clear_nacl_from_fcport_map(sess);
 
-	if (!list_empty(&sess->del_list_entry))
-		list_del_init(&sess->del_list_entry);
+	qla2x00_mark_device_lost(vha, sess, 1, 1);
+
 	sess->deleted = QLA_SESS_DELETION_IN_PROGRESS;
 
 	INIT_WORK(&sess->free_work, qlt_free_session_done);
 	schedule_work(&sess->free_work);
 }
-
-void qlt_put_sess(struct qla_tgt_sess *sess)
-{
-	if (!sess)
-		return;
-
-	assert_spin_locked(&sess->vha->hw->tgt.sess_lock);
-	kref_put(&sess->sess_kref, qlt_release_session);
-}
-EXPORT_SYMBOL(qlt_put_sess);
+EXPORT_SYMBOL(qlt_unreg_sess);
 
 static int qlt_reset(struct scsi_qla_host *vha, void *iocb, int mcmd)
 {
 	struct qla_hw_data *ha = vha->hw;
-	struct qla_tgt_sess *sess = NULL;
+	struct fc_port *sess = NULL;
 	uint16_t loop_id;
 	int res = 0;
 	struct imm_ntfy_from_isp *n = (struct imm_ntfy_from_isp *)iocb;
@@ -678,31 +658,6 @@ static int qlt_reset(struct scsi_qla_host *vha, void *iocb, int mcmd)
 		spin_lock_irqsave(&ha->tgt.sess_lock, flags);
 		qlt_clear_tgt_db(vha->vha_tgt.qla_tgt);
 		spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
-#if 0 /* FIXME: do we need to choose a session here? */
-		if (!list_empty(&ha->tgt.qla_tgt->sess_list)) {
-			sess = list_entry(ha->tgt.qla_tgt->sess_list.next,
-			    typeof(*sess), sess_list_entry);
-			switch (mcmd) {
-			case QLA_TGT_NEXUS_LOSS_SESS:
-				mcmd = QLA_TGT_NEXUS_LOSS;
-				break;
-			case QLA_TGT_ABORT_ALL_SESS:
-				mcmd = QLA_TGT_ABORT_ALL;
-				break;
-			case QLA_TGT_NEXUS_LOSS:
-			case QLA_TGT_ABORT_ALL:
-				break;
-			default:
-				ql_dbg(ql_dbg_tgt, vha, 0xe046,
-				    "qla_target(%d): Not allowed "
-				    "command %x in %s", vha->vp_idx,
-				    mcmd, __func__);
-				sess = NULL;
-				break;
-			}
-		} else
-			sess = NULL;
-#endif
 	} else {
 		spin_lock_irqsave(&ha->tgt.sess_lock, flags);
 		sess = ha->tgt.tgt_ops->find_sess_by_loop_id(vha, loop_id);
@@ -725,7 +680,7 @@ static int qlt_reset(struct scsi_qla_host *vha, void *iocb, int mcmd)
 }
 
 /* ha->tgt.sess_lock supposed to be held on entry */
-static void qlt_schedule_sess_for_deletion(struct qla_tgt_sess *sess,
+static void qlt_schedule_sess_for_deletion(struct fc_port *sess,
 	bool immediate)
 {
 	struct qla_tgt *tgt = sess->tgt;
@@ -771,10 +726,13 @@ static void qlt_schedule_sess_for_deletion(struct qla_tgt_sess *sess,
 /* ha->tgt.sess_lock supposed to be held on entry */
 static void qlt_clear_tgt_db(struct qla_tgt *tgt)
 {
-	struct qla_tgt_sess *sess;
+	struct fc_port *sess;
+	scsi_qla_host_t *vha = tgt->vha;
 
-	list_for_each_entry(sess, &tgt->sess_list, sess_list_entry)
-		qlt_schedule_sess_for_deletion(sess, true);
+	list_for_each_entry(sess, &vha->vp_fcports, list) {
+		if (sess->se_sess)
+			qlt_schedule_sess_for_deletion(sess, true);
+	}
 
 	/* At this point tgt could be already dead */
 }
@@ -829,7 +787,7 @@ static int qla24xx_get_loop_id(struct scsi_qla_host *vha, const uint8_t *s_id,
 }
 
 /* ha->tgt.sess_lock supposed to be held on entry */
-static void qlt_undelete_sess(struct qla_tgt_sess *sess)
+static void qlt_undelete_sess(struct fc_port *sess)
 {
 	BUG_ON(sess->deleted != QLA_SESS_DELETION_PENDING);
 
@@ -843,7 +801,7 @@ static void qlt_del_sess_work_fn(struct delayed_work *work)
 	    sess_del_work);
 	struct scsi_qla_host *vha = tgt->vha;
 	struct qla_hw_data *ha = vha->hw;
-	struct qla_tgt_sess *sess;
+	struct fc_port *sess;
 	unsigned long flags, elapsed;
 
 	spin_lock_irqsave(&ha->tgt.sess_lock, flags);
@@ -861,7 +819,7 @@ static void qlt_del_sess_work_fn(struct delayed_work *work)
 			    sess);
 			if (sess->se_sess)
 				ha->tgt.tgt_ops->shutdown_sess(sess);
-			qlt_put_sess(sess);
+			ha->tgt.tgt_ops->put_sess(sess);
 		} else {
 			schedule_delayed_work(&tgt->sess_del_work,
 			    sess->expires - elapsed);
@@ -875,19 +833,18 @@ static void qlt_del_sess_work_fn(struct delayed_work *work)
  * Adds an extra ref to allow to drop hw lock after adding sess to the list.
  * Caller must put it.
  */
-static struct qla_tgt_sess *qlt_create_sess(
+static struct fc_port *qlt_create_sess(
 	struct scsi_qla_host *vha,
 	fc_port_t *fcport,
 	bool local)
 {
 	struct qla_hw_data *ha = vha->hw;
-	struct qla_tgt_sess *sess;
+	struct fc_port *sess;
 	unsigned long flags;
 
 	/* Check to avoid double sessions */
 	spin_lock_irqsave(&ha->tgt.sess_lock, flags);
-	list_for_each_entry(sess, &vha->vha_tgt.qla_tgt->sess_list,
-				sess_list_entry) {
+	list_for_each_entry(sess, &vha->vp_fcports, list) {
 		if (!memcmp(sess->port_name, fcport->port_name, WWN_SIZE)) {
 			ql_dbg(ql_dbg_tgt_mgt, vha, 0xf005,
 			    "Double sess %p found (s_id %x:%x:%x, "
@@ -965,7 +922,7 @@ static struct qla_tgt_sess *qlt_create_sess(
 	memcpy(sess->port_name, fcport->port_name, sizeof(sess->port_name));
 
 	spin_lock_irqsave(&ha->tgt.sess_lock, flags);
-	list_add_tail(&sess->sess_list_entry, &vha->vha_tgt.qla_tgt->sess_list);
+	list_add_tail(&sess->list, &vha->vp_fcports);
 	vha->vha_tgt.qla_tgt->sess_count++;
 	qlt_do_generation_tick(vha, &sess->generation);
 	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
@@ -988,7 +945,7 @@ static struct qla_tgt_sess *qlt_create_sess(
 		return NULL;
 	} else {
 		/*
-		 * Take an extra reference to ->sess_kref here to handle qla_tgt_sess
+		 * Take an extra reference to ->sess_kref here to handle fc_port
 		 * access across ->tgt.sess_lock reaquire.
 		 */
 		kref_get(&sess->sess_kref);
@@ -997,73 +954,6 @@ static struct qla_tgt_sess *qlt_create_sess(
 	return sess;
 }
 
-/*
- * Called from qla2x00_reg_remote_port()
- */
-void qlt_fc_port_added(struct scsi_qla_host *vha, fc_port_t *fcport)
-{
-	struct qla_hw_data *ha = vha->hw;
-	struct qla_tgt *tgt = vha->vha_tgt.qla_tgt;
-	struct qla_tgt_sess *sess;
-	unsigned long flags;
-
-	if (!vha->hw->tgt.tgt_ops)
-		return;
-
-	if (!tgt || (fcport->port_type != FCT_INITIATOR))
-		return;
-
-	if (qla_ini_mode_enabled(vha))
-		return;
-
-	spin_lock_irqsave(&ha->tgt.sess_lock, flags);
-	if (tgt->tgt_stop) {
-		spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
-		return;
-	}
-	sess = qlt_find_sess_by_port_name(tgt, fcport->port_name);
-	if (!sess) {
-		spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
-
-		mutex_lock(&vha->vha_tgt.tgt_mutex);
-		sess = qlt_create_sess(vha, fcport, false);
-		mutex_unlock(&vha->vha_tgt.tgt_mutex);
-
-		spin_lock_irqsave(&ha->tgt.sess_lock, flags);
-	} else if (sess->deleted == QLA_SESS_DELETION_IN_PROGRESS) {
-		/* Point of no return */
-		spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
-		return;
-	} else {
-		kref_get(&sess->sess_kref);
-
-		if (sess->deleted) {
-			qlt_undelete_sess(sess);
-
-			ql_dbg(ql_dbg_tgt_mgt, vha, 0xf04c,
-			    "qla_target(%u): %ssession for port %8phC "
-			    "(loop ID %d) reappeared\n", vha->vp_idx,
-			    sess->local ? "local " : "", sess->port_name,
-			    sess->loop_id);
-
-			ql_dbg(ql_dbg_tgt_mgt, vha, 0xf007,
-			    "Reappeared sess %p\n", sess);
-		}
-		ha->tgt.tgt_ops->update_sess(sess, fcport->d_id, fcport->loop_id,
-					(fcport->flags & FCF_CONF_COMP_SUPPORTED));
-	}
-
-	if (sess && sess->local) {
-		ql_dbg(ql_dbg_tgt_mgt, vha, 0xf04d,
-		    "qla_target(%u): local session for "
-		    "port %8phC (loop ID %d) became global\n", vha->vp_idx,
-		    fcport->port_name, sess->loop_id);
-		sess->local = 0;
-	}
-	qlt_put_sess(sess);
-	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
-}
-
 /*
  * max_gen - specifies maximum session generation
  * at which this deletion requestion is still valid
@@ -1072,7 +962,7 @@ void
 qlt_fc_port_deleted(struct scsi_qla_host *vha, fc_port_t *fcport, int max_gen)
 {
 	struct qla_tgt *tgt = vha->vha_tgt.qla_tgt;
-	struct qla_tgt_sess *sess;
+	struct fc_port *sess = fcport;
 	unsigned long flags;
 
 	if (!vha->hw->tgt.tgt_ops)
@@ -1086,8 +976,7 @@ qlt_fc_port_deleted(struct scsi_qla_host *vha, fc_port_t *fcport, int max_gen)
 		spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
 		return;
 	}
-	sess = qlt_find_sess_by_port_name(tgt, fcport->port_name);
-	if (!sess) {
+	if (!sess->se_sess) {
 		spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
 		return;
 	}
@@ -1120,8 +1009,8 @@ static inline int test_tgt_sess_count(struct qla_tgt *tgt)
 	 */
 	spin_lock_irqsave(&ha->hardware_lock, flags);
 	ql_dbg(ql_dbg_tgt, tgt->vha, 0xe002,
-	    "tgt %p, empty(sess_list)=%d sess_count=%d\n",
-	    tgt, list_empty(&tgt->sess_list), tgt->sess_count);
+	    "tgt %p, sess_count=%d\n",
+	    tgt, tgt->sess_count);
 	res = (tgt->sess_count == 0);
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
 
@@ -1184,9 +1073,7 @@ int qlt_stop_phase1(struct qla_tgt *tgt)
 	spin_unlock_irqrestore(&tgt->sess_work_lock, flags);
 
 	ql_dbg(ql_dbg_tgt_mgt, vha, 0xf00a,
-	    "Waiting for tgt %p: list_empty(sess_list)=%d "
-	    "sess_count=%d\n", tgt, list_empty(&tgt->sess_list),
-	    tgt->sess_count);
+	    "Waiting for tgt %p: sess_count=%d\n", tgt, tgt->sess_count);
 
 	wait_event(tgt->waitQ, test_tgt_sess_count(tgt));
 
@@ -1538,7 +1425,7 @@ static void abort_cmds_for_lun(struct scsi_qla_host *vha,
 
 /* ha->hardware_lock supposed to be held on entry */
 static int __qlt_24xx_handle_abts(struct scsi_qla_host *vha,
-	struct abts_recv_from_24xx *abts, struct qla_tgt_sess *sess)
+	struct abts_recv_from_24xx *abts, struct fc_port *sess)
 {
 	struct qla_hw_data *ha = vha->hw;
 	struct se_session *se_sess = sess->se_sess;
@@ -1547,8 +1434,9 @@ static int __qlt_24xx_handle_abts(struct scsi_qla_host *vha,
 	u32 lun = 0;
 	int rc;
 	bool found_lun = false;
+	unsigned long flags;
 
-	spin_lock(&se_sess->sess_cmd_lock);
+	spin_lock_irqsave(&se_sess->sess_cmd_lock, flags);
 	list_for_each_entry(se_cmd, &se_sess->sess_cmd_list, se_cmd_list) {
 		struct qla_tgt_cmd *cmd =
 			container_of(se_cmd, struct qla_tgt_cmd, se_cmd);
@@ -1558,7 +1446,7 @@ static int __qlt_24xx_handle_abts(struct scsi_qla_host *vha,
 			break;
 		}
 	}
-	spin_unlock(&se_sess->sess_cmd_lock);
+	spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
 
 	/* cmd not in LIO lists, look in qla list */
 	if (!found_lun) {
@@ -1612,7 +1500,7 @@ static void qlt_24xx_handle_abts(struct scsi_qla_host *vha,
 	struct abts_recv_from_24xx *abts)
 {
 	struct qla_hw_data *ha = vha->hw;
-	struct qla_tgt_sess *sess;
+	struct fc_port *sess;
 	uint32_t tag = abts->exchange_addr_to_abort;
 	uint8_t s_id[3];
 	int rc;
@@ -3215,7 +3103,7 @@ EXPORT_SYMBOL(qlt_abort_cmd);
 
 void qlt_free_cmd(struct qla_tgt_cmd *cmd)
 {
-	struct qla_tgt_sess *sess = cmd->sess;
+	struct fc_port *sess = cmd->sess;
 
 	ql_dbg(ql_dbg_tgt, cmd->vha, 0xe074,
 	    "%s: se_cmd[%p] ox_id %04x\n",
@@ -3577,7 +3465,7 @@ static inline int qlt_get_fcp_task_attr(struct scsi_qla_host *vha,
 	return fcp_task_attr;
 }
 
-static struct qla_tgt_sess *qlt_make_local_sess(struct scsi_qla_host *,
+static struct fc_port *qlt_make_local_sess(struct scsi_qla_host *,
 					uint8_t *);
 /*
  * Process context for I/O path into tcm_qla2xxx code
@@ -3587,7 +3475,7 @@ static void __qlt_do_work(struct qla_tgt_cmd *cmd)
 	scsi_qla_host_t *vha = cmd->vha;
 	struct qla_hw_data *ha = vha->hw;
 	struct qla_tgt *tgt = vha->vha_tgt.qla_tgt;
-	struct qla_tgt_sess *sess = cmd->sess;
+	struct fc_port *sess = cmd->sess;
 	struct atio_from_isp *atio = &cmd->atio;
 	unsigned char *cdb;
 	unsigned long flags;
@@ -3637,7 +3525,7 @@ static void __qlt_do_work(struct qla_tgt_cmd *cmd)
 	 * Drop extra session reference from qla_tgt_handle_cmd_for_atio*(
 	 */
 	spin_lock_irqsave(&ha->tgt.sess_lock, flags);
-	qlt_put_sess(sess);
+	ha->tgt.tgt_ops->put_sess(sess);
 	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
 	return;
 
@@ -3656,7 +3544,7 @@ static void __qlt_do_work(struct qla_tgt_cmd *cmd)
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
 
 	spin_lock_irqsave(&ha->tgt.sess_lock, flags);
-	qlt_put_sess(sess);
+	ha->tgt.tgt_ops->put_sess(sess);
 	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
 }
 
@@ -3674,7 +3562,7 @@ static void qlt_do_work(struct work_struct *work)
 }
 
 static struct qla_tgt_cmd *qlt_get_tag(scsi_qla_host_t *vha,
-				       struct qla_tgt_sess *sess,
+				       struct fc_port *sess,
 				       struct atio_from_isp *atio)
 {
 	struct se_session *se_sess = sess->se_sess;
@@ -3715,7 +3603,7 @@ static void qlt_create_sess_from_atio(struct work_struct *work)
 					struct qla_tgt_sess_op, work);
 	scsi_qla_host_t *vha = op->vha;
 	struct qla_hw_data *ha = vha->hw;
-	struct qla_tgt_sess *sess;
+	struct fc_port *sess;
 	struct qla_tgt_cmd *cmd;
 	unsigned long flags;
 	uint8_t *s_id = op->atio.u.isp24.fcp_hdr.s_id;
@@ -3756,7 +3644,7 @@ static void qlt_create_sess_from_atio(struct work_struct *work)
 	if (!cmd) {
 		spin_lock_irqsave(&ha->hardware_lock, flags);
 		qlt_send_busy(vha, &op->atio, SAM_STAT_BUSY);
-		qlt_put_sess(sess);
+		ha->tgt.tgt_ops->put_sess(sess);
 		spin_unlock_irqrestore(&ha->hardware_lock, flags);
 		kfree(op);
 		return;
@@ -3783,8 +3671,9 @@ static int qlt_handle_cmd_for_atio(struct scsi_qla_host *vha,
 {
 	struct qla_hw_data *ha = vha->hw;
 	struct qla_tgt *tgt = vha->vha_tgt.qla_tgt;
-	struct qla_tgt_sess *sess;
+	struct fc_port *sess;
 	struct qla_tgt_cmd *cmd;
+	unsigned long flags;
 
 	if (unlikely(tgt->tgt_stop)) {
 		ql_dbg(ql_dbg_io, vha, 0x3061,
@@ -3829,7 +3718,9 @@ static int qlt_handle_cmd_for_atio(struct scsi_qla_host *vha,
 	if (!cmd) {
 		ql_dbg(ql_dbg_io, vha, 0x3062,
 		    "qla_target(%d): Allocation of cmd failed\n", vha->vp_idx);
-		qlt_put_sess(sess);
+		spin_lock_irqsave(&ha->tgt.sess_lock, flags);
+		ha->tgt.tgt_ops->put_sess(sess);
+		spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
 		return -ENOMEM;
 	}
 
@@ -3858,7 +3749,7 @@ static int qlt_handle_cmd_for_atio(struct scsi_qla_host *vha,
 }
 
 /* ha->hardware_lock supposed to be held on entry */
-static int qlt_issue_task_mgmt(struct qla_tgt_sess *sess, uint32_t lun,
+static int qlt_issue_task_mgmt(struct fc_port *sess, u64 lun,
 	int fn, void *iocb, int flags)
 {
 	struct scsi_qla_host *vha = sess->vha;
@@ -3910,7 +3801,7 @@ static int qlt_handle_task_mgmt(struct scsi_qla_host *vha, void *iocb)
 	struct atio_from_isp *a = (struct atio_from_isp *)iocb;
 	struct qla_hw_data *ha = vha->hw;
 	struct qla_tgt *tgt;
-	struct qla_tgt_sess *sess;
+	struct fc_port *sess;
 	uint32_t lun, unpacked_lun;
 	int fn;
 	unsigned long flags;
@@ -3943,7 +3834,7 @@ static int qlt_handle_task_mgmt(struct scsi_qla_host *vha, void *iocb)
 
 /* ha->hardware_lock supposed to be held on entry */
 static int __qlt_abort_task(struct scsi_qla_host *vha,
-	struct imm_ntfy_from_isp *iocb, struct qla_tgt_sess *sess)
+	struct imm_ntfy_from_isp *iocb, struct fc_port *sess)
 {
 	struct atio_from_isp *a = (struct atio_from_isp *)iocb;
 	struct qla_hw_data *ha = vha->hw;
@@ -3987,7 +3878,7 @@ static int qlt_abort_task(struct scsi_qla_host *vha,
 	struct imm_ntfy_from_isp *iocb)
 {
 	struct qla_hw_data *ha = vha->hw;
-	struct qla_tgt_sess *sess;
+	struct fc_port *sess;
 	int loop_id;
 	unsigned long flags;
 
@@ -4017,14 +3908,14 @@ void qlt_logo_completion_handler(fc_port_t *fcport, int rc)
 				" port %8phC loop_id %#04x s_id %02x:%02x:%02x"
 				" LOGO failed: %#x\n",
 				__func__,
-				fcport->tgt_session->se_sess,
+				fcport->se_sess,
 				fcport->tgt_session,
 				fcport->port_name, fcport->loop_id,
 				fcport->d_id.b.domain, fcport->d_id.b.area,
 				fcport->d_id.b.al_pa, rc);
 		}
 
-		fcport->tgt_session->logout_completed = 1;
+		fcport->logout_completed = 1;
 	}
 }
 
@@ -4035,16 +3926,17 @@ void qlt_logo_completion_handler(fc_port_t *fcport, int rc)
 * deletion. Returns existing session with matching wwn if present.
 * Null otherwise.
 */
-static struct qla_tgt_sess *
+static struct fc_port *
 qlt_find_sess_invalidate_other(struct qla_tgt *tgt, uint64_t wwn,
-    port_id_t port_id, uint16_t loop_id, struct qla_tgt_sess **conflict_sess)
+    port_id_t port_id, uint16_t loop_id, struct fc_port **conflict_sess)
 {
-	struct qla_tgt_sess *sess = NULL, *other_sess;
+	struct fc_port *sess = NULL, *other_sess;
 	uint64_t other_wwn;
+	scsi_qla_host_t *vha = tgt->vha;
 
 	*conflict_sess = NULL;
 
-	list_for_each_entry(other_sess, &tgt->sess_list, sess_list_entry) {
+	list_for_each_entry(other_sess, &vha->vp_fcports, list) {
 
 		other_wwn = wwn_to_u64(other_sess->port_name);
 
@@ -4136,13 +4028,13 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha,
 {
 	struct qla_tgt *tgt = vha->vha_tgt.qla_tgt;
 	struct qla_hw_data *ha = vha->hw;
-	struct qla_tgt_sess *sess = NULL, *conflict_sess = NULL;
+	struct fc_port *sess = NULL, *conflict_sess = NULL;
 	uint64_t wwn;
 	port_id_t port_id;
 	uint16_t loop_id;
 	uint16_t wd3_lo;
 	int res = 0;
-	qlt_plogi_ack_t *pla;
+	struct qlt_plogi_ack_t *pla;
 	unsigned long flags;
 
 	wwn = wwn_to_u64(iocb->u.isp24.port_name);
@@ -4455,7 +4347,7 @@ static int __qlt_send_busy(struct scsi_qla_host *vha,
 	struct ctio7_to_24xx *ctio24;
 	struct qla_hw_data *ha = vha->hw;
 	request_t *pkt;
-	struct qla_tgt_sess *sess = NULL;
+	struct fc_port *sess = NULL;
 	unsigned long flags;
 
 	spin_lock_irqsave(&ha->tgt.sess_lock, flags);
@@ -4516,7 +4408,7 @@ qlt_alloc_qfull_cmd(struct scsi_qla_host *vha,
 {
 	struct qla_tgt *tgt = vha->vha_tgt.qla_tgt;
 	struct qla_hw_data *ha = vha->hw;
-	struct qla_tgt_sess *sess;
+	struct fc_port *sess;
 	struct se_session *se_sess;
 	struct qla_tgt_cmd *cmd;
 	int tag;
@@ -5112,10 +5004,10 @@ static fc_port_t *qlt_get_port_database(struct scsi_qla_host *vha,
 }
 
 /* Must be called under tgt_mutex */
-static struct qla_tgt_sess *qlt_make_local_sess(struct scsi_qla_host *vha,
+static struct fc_port *qlt_make_local_sess(struct scsi_qla_host *vha,
 	uint8_t *s_id)
 {
-	struct qla_tgt_sess *sess = NULL;
+	struct fc_port *sess = NULL;
 	fc_port_t *fcport = NULL;
 	int rc, global_resets;
 	uint16_t loop_id = 0;
@@ -5177,7 +5069,6 @@ static struct qla_tgt_sess *qlt_make_local_sess(struct scsi_qla_host *vha,
 
 	mutex_unlock(&vha->vha_tgt.tgt_mutex);
 
-	kfree(fcport);
 	return sess;
 }
 
@@ -5186,7 +5077,7 @@ static void qlt_abort_work(struct qla_tgt *tgt,
 {
 	struct scsi_qla_host *vha = tgt->vha;
 	struct qla_hw_data *ha = vha->hw;
-	struct qla_tgt_sess *sess = NULL;
+	struct fc_port *sess = NULL;
 	unsigned long flags = 0, flags2 = 0;
 	uint32_t be_s_id;
 	uint8_t s_id[3];
@@ -5230,8 +5121,8 @@ static void qlt_abort_work(struct qla_tgt *tgt,
 	if (rc != 0)
 		goto out_term;
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
-
-	qlt_put_sess(sess);
+	if (sess)
+		ha->tgt.tgt_ops->put_sess(sess);
 	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags2);
 	return;
 
@@ -5242,7 +5133,7 @@ static void qlt_abort_work(struct qla_tgt *tgt,
 	qlt_24xx_send_abts_resp(vha, &prm->abts, FCP_TMF_REJECTED, false);
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
 
-	qlt_put_sess(sess);
+	ha->tgt.tgt_ops->put_sess(sess);
 	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags2);
 }
 
@@ -5252,7 +5143,7 @@ static void qlt_tmr_work(struct qla_tgt *tgt,
 	struct atio_from_isp *a = &prm->tm_iocb2;
 	struct scsi_qla_host *vha = tgt->vha;
 	struct qla_hw_data *ha = vha->hw;
-	struct qla_tgt_sess *sess = NULL;
+	struct fc_port *sess = NULL;
 	unsigned long flags;
 	uint8_t *s_id = NULL; /* to hide compiler warnings */
 	int rc;
@@ -5294,13 +5185,14 @@ static void qlt_tmr_work(struct qla_tgt *tgt,
 	if (rc != 0)
 		goto out_term;
 
-	qlt_put_sess(sess);
+	ha->tgt.tgt_ops->put_sess(sess);
 	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
 	return;
 
 out_term:
 	qlt_send_term_exchange(vha, NULL, &prm->tm_iocb2, 1, 0);
-	qlt_put_sess(sess);
+	if (sess)
+		ha->tgt.tgt_ops->put_sess(sess);
 	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
 }
 
@@ -5377,7 +5269,6 @@ int qlt_add_target(struct qla_hw_data *ha, struct scsi_qla_host *base_vha)
 	tgt->ha = ha;
 	tgt->vha = base_vha;
 	init_waitqueue_head(&tgt->waitQ);
-	INIT_LIST_HEAD(&tgt->sess_list);
 	INIT_LIST_HEAD(&tgt->del_sess_list);
 	INIT_DELAYED_WORK(&tgt->sess_del_work,
 		(void (*)(struct work_struct *))qlt_del_sess_work_fn);
@@ -6233,9 +6124,8 @@ int __init qlt_init(void)
 	}
 
 	qla_tgt_plogi_cachep = kmem_cache_create("qla_tgt_plogi_cachep",
-						 sizeof(qlt_plogi_ack_t),
-						 __alignof__(qlt_plogi_ack_t),
-						 0, NULL);
+	    sizeof(struct qlt_plogi_ack_t), __alignof__(struct qlt_plogi_ack_t),
+	    0, NULL);
 
 	if (!qla_tgt_plogi_cachep) {
 		ql_log(ql_log_fatal, NULL, 0xe06d,
diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h
index 07ccf81f6d22d2..9c35ba15c687e7 100644
--- a/drivers/scsi/qla2xxx/qla_target.h
+++ b/drivers/scsi/qla2xxx/qla_target.h
@@ -118,84 +118,6 @@
 			 ? le16_to_cpu((iocb)->u.isp2x.target.extended)	\
 			 : (uint16_t)(iocb)->u.isp2x.target.id.standard)
 
-#ifndef IMMED_NOTIFY_TYPE
-#define IMMED_NOTIFY_TYPE 0x0D		/* Immediate notify entry. */
-/*
- * ISP queue -	immediate notify entry structure definition.
- *		This is sent by the ISP to the Target driver.
- *		This IOCB would have report of events sent by the
- *		initiator, that needs to be handled by the target
- *		driver immediately.
- */
-struct imm_ntfy_from_isp {
-	uint8_t	 entry_type;		    /* Entry type. */
-	uint8_t	 entry_count;		    /* Entry count. */
-	uint8_t	 sys_define;		    /* System defined. */
-	uint8_t	 entry_status;		    /* Entry Status. */
-	union {
-		struct {
-			uint32_t sys_define_2; /* System defined. */
-			target_id_t target;
-			uint16_t lun;
-			uint8_t  target_id;
-			uint8_t  reserved_1;
-			uint16_t status_modifier;
-			uint16_t status;
-			uint16_t task_flags;
-			uint16_t seq_id;
-			uint16_t srr_rx_id;
-			uint32_t srr_rel_offs;
-			uint16_t srr_ui;
-#define SRR_IU_DATA_IN	0x1
-#define SRR_IU_DATA_OUT	0x5
-#define SRR_IU_STATUS	0x7
-			uint16_t srr_ox_id;
-			uint8_t reserved_2[28];
-		} isp2x;
-		struct {
-			uint32_t reserved;
-			uint16_t nport_handle;
-			uint16_t reserved_2;
-			uint16_t flags;
-#define NOTIFY24XX_FLAGS_GLOBAL_TPRLO   BIT_1
-#define NOTIFY24XX_FLAGS_PUREX_IOCB     BIT_0
-			uint16_t srr_rx_id;
-			uint16_t status;
-			uint8_t  status_subcode;
-			uint8_t  fw_handle;
-			uint32_t exchange_address;
-			uint32_t srr_rel_offs;
-			uint16_t srr_ui;
-			uint16_t srr_ox_id;
-			union {
-				struct {
-					uint8_t node_name[8];
-				} plogi; /* PLOGI/ADISC/PDISC */
-				struct {
-					/* PRLI word 3 bit 0-15 */
-					uint16_t wd3_lo;
-					uint8_t resv0[6];
-				} prli;
-				struct {
-					uint8_t port_id[3];
-					uint8_t resv1;
-					uint16_t nport_handle;
-					uint16_t resv2;
-				} req_els;
-			} u;
-			uint8_t port_name[8];
-			uint8_t resv3[3];
-			uint8_t  vp_index;
-			uint32_t reserved_5;
-			uint8_t  port_id[3];
-			uint8_t  reserved_6;
-		} isp24;
-	} u;
-	uint16_t reserved_7;
-	uint16_t ox_id;
-} __packed;
-#endif
-
 #ifndef NOTIFY_ACK_TYPE
 #define NOTIFY_ACK_TYPE 0x0E	  /* Notify acknowledge entry. */
 /*
@@ -731,7 +653,7 @@ struct abts_resp_from_24xx_fw {
 \********************************************************************/
 
 struct qla_tgt_mgmt_cmd;
-struct qla_tgt_sess;
+struct fc_port;
 
 /*
  * This structure provides a template of function calls that the
@@ -748,17 +670,18 @@ struct qla_tgt_func_tmpl {
 			uint32_t);
 	void (*free_cmd)(struct qla_tgt_cmd *);
 	void (*free_mcmd)(struct qla_tgt_mgmt_cmd *);
-	void (*free_session)(struct qla_tgt_sess *);
+	void (*free_session)(struct fc_port *);
 
 	int (*check_initiator_node_acl)(struct scsi_qla_host *, unsigned char *,
-					struct qla_tgt_sess *);
-	void (*update_sess)(struct qla_tgt_sess *, port_id_t, uint16_t, bool);
-	struct qla_tgt_sess *(*find_sess_by_loop_id)(struct scsi_qla_host *,
+					struct fc_port *);
+	void (*update_sess)(struct fc_port *, port_id_t, uint16_t, bool);
+	struct fc_port *(*find_sess_by_loop_id)(struct scsi_qla_host *,
 						const uint16_t);
-	struct qla_tgt_sess *(*find_sess_by_s_id)(struct scsi_qla_host *,
+	struct fc_port *(*find_sess_by_s_id)(struct scsi_qla_host *,
 						const uint8_t *);
-	void (*clear_nacl_from_fcport_map)(struct qla_tgt_sess *);
-	void (*shutdown_sess)(struct qla_tgt_sess *);
+	void (*clear_nacl_from_fcport_map)(struct fc_port *);
+	void (*put_sess)(struct fc_port *);
+	void (*shutdown_sess)(struct fc_port *);
 };
 
 int qla2x00_wait_for_hba_online(struct scsi_qla_host *);
@@ -874,9 +797,6 @@ struct qla_tgt {
 	/* Count of sessions refering qla_tgt. Protected by hardware_lock. */
 	int sess_count;
 
-	/* Protected by hardware_lock. Addition also protected by tgt_mutex. */
-	struct list_head sess_list;
-
 	/* Protected by hardware_lock */
 	struct list_head del_sess_list;
 	struct delayed_work sess_del_work;
@@ -910,52 +830,6 @@ enum qla_sess_deletion {
 	QLA_SESS_DELETION_IN_PROGRESS	= 2,
 };
 
-typedef enum {
-	QLT_PLOGI_LINK_SAME_WWN,
-	QLT_PLOGI_LINK_CONFLICT,
-	QLT_PLOGI_LINK_MAX
-} qlt_plogi_link_t;
-
-typedef struct {
-	struct list_head		list;
-	struct imm_ntfy_from_isp	iocb;
-	port_id_t			id;
-	int				ref_count;
-} qlt_plogi_ack_t;
-
-/*
- * Equivilant to IT Nexus (Initiator-Target)
- */
-struct qla_tgt_sess {
-	uint16_t loop_id;
-	port_id_t d_id;
-
-	unsigned int conf_compl_supported:1;
-	unsigned int deleted:2;
-	unsigned int local:1;
-	unsigned int logout_on_delete:1;
-	unsigned int keep_nport_handle:1;
-	unsigned int send_els_logo:1;
-
-	unsigned char logout_completed;
-
-	int generation;
-
-	struct se_session *se_sess;
-	struct kref sess_kref;
-	struct scsi_qla_host *vha;
-	struct qla_tgt *tgt;
-
-	struct list_head sess_list_entry;
-	unsigned long expires;
-	struct list_head del_list_entry;
-
-	uint8_t port_name[WWN_SIZE];
-	struct work_struct free_work;
-
-	qlt_plogi_ack_t *plogi_link[QLT_PLOGI_LINK_MAX];
-};
-
 enum trace_flags {
 	TRC_NEW_CMD = BIT_0,
 	TRC_DO_WORK = BIT_1,
@@ -981,7 +855,7 @@ enum trace_flags {
 
 struct qla_tgt_cmd {
 	struct se_cmd se_cmd;
-	struct qla_tgt_sess *sess;
+	struct fc_port *sess;
 	int state;
 	struct work_struct free_work;
 	struct work_struct work;
@@ -1046,7 +920,7 @@ struct qla_tgt_sess_work_param {
 struct qla_tgt_mgmt_cmd {
 	uint16_t tmr_func;
 	uint8_t fc_tm_rsp;
-	struct qla_tgt_sess *sess;
+	struct fc_port *sess;
 	struct se_cmd se_cmd;
 	struct work_struct free_work;
 	unsigned int flags;
@@ -1097,7 +971,7 @@ extern int qlt_remove_target(struct qla_hw_data *, struct scsi_qla_host *);
 extern int qlt_lport_register(void *, u64, u64, u64,
 			int (*callback)(struct scsi_qla_host *, void *, u64, u64));
 extern void qlt_lport_deregister(struct scsi_qla_host *);
-void qlt_put_sess(struct qla_tgt_sess *sess);
+extern void qlt_unreg_sess(struct fc_port *);
 extern void qlt_fc_port_added(struct scsi_qla_host *, fc_port_t *);
 extern void qlt_fc_port_deleted(struct scsi_qla_host *, fc_port_t *, int);
 extern int __init qlt_init(void);
diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
index c96de1cfbd8508..e37d7ee9547324 100644
--- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c
+++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
@@ -339,9 +339,26 @@ static void tcm_qla2xxx_release_cmd(struct se_cmd *se_cmd)
 	qlt_free_cmd(cmd);
 }
 
+static void tcm_qla2xxx_release_session(struct kref *kref)
+{
+	struct fc_port  *sess = container_of(kref,
+	    struct fc_port, sess_kref);
+
+	qlt_unreg_sess(sess);
+}
+
+static void tcm_qla2xxx_put_sess(struct fc_port *sess)
+{
+	if (!sess)
+		return;
+
+	assert_spin_locked(&sess->vha->hw->tgt.sess_lock);
+	kref_put(&sess->sess_kref, tcm_qla2xxx_release_session);
+}
+
 static void tcm_qla2xxx_close_session(struct se_session *se_sess)
 {
-	struct qla_tgt_sess *sess = se_sess->fabric_sess_ptr;
+	struct fc_port *sess = se_sess->fabric_sess_ptr;
 	struct scsi_qla_host *vha;
 	unsigned long flags;
 
@@ -350,7 +367,7 @@ static void tcm_qla2xxx_close_session(struct se_session *se_sess)
 
 	spin_lock_irqsave(&vha->hw->tgt.sess_lock, flags);
 	target_sess_cmd_list_set_waiting(se_sess);
-	qlt_put_sess(sess);
+	tcm_qla2xxx_put_sess(sess);
 	spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
 }
 
@@ -441,7 +458,7 @@ static int tcm_qla2xxx_handle_cmd(scsi_qla_host_t *vha, struct qla_tgt_cmd *cmd,
 {
 	struct se_cmd *se_cmd = &cmd->se_cmd;
 	struct se_session *se_sess;
-	struct qla_tgt_sess *sess;
+	struct fc_port *sess;
 #ifdef CONFIG_TCM_QLA2XXX_DEBUG
 	struct se_portal_group *se_tpg;
 	struct tcm_qla2xxx_tpg *tpg;
@@ -456,7 +473,7 @@ static int tcm_qla2xxx_handle_cmd(scsi_qla_host_t *vha, struct qla_tgt_cmd *cmd,
 
 	sess = cmd->sess;
 	if (!sess) {
-		pr_err("Unable to locate struct qla_tgt_sess from qla_tgt_cmd\n");
+		pr_err("Unable to locate struct fc_port from qla_tgt_cmd\n");
 		return -EINVAL;
 	}
 
@@ -565,7 +582,7 @@ static void tcm_qla2xxx_handle_dif_err(struct qla_tgt_cmd *cmd)
 static int tcm_qla2xxx_handle_tmr(struct qla_tgt_mgmt_cmd *mcmd, uint32_t lun,
 	uint16_t tmr_func, uint32_t tag)
 {
-	struct qla_tgt_sess *sess = mcmd->sess;
+	struct fc_port *sess = mcmd->sess;
 	struct se_cmd *se_cmd = &mcmd->se_cmd;
 	int transl_tmr_func = 0;
 
@@ -746,11 +763,11 @@ static void tcm_qla2xxx_aborted_task(struct se_cmd *se_cmd)
 }
 
 static void tcm_qla2xxx_clear_sess_lookup(struct tcm_qla2xxx_lport *,
-			struct tcm_qla2xxx_nacl *, struct qla_tgt_sess *);
+			struct tcm_qla2xxx_nacl *, struct fc_port *);
 /*
  * Expected to be called with struct qla_hw_data->tgt.sess_lock held
  */
-static void tcm_qla2xxx_clear_nacl_from_fcport_map(struct qla_tgt_sess *sess)
+static void tcm_qla2xxx_clear_nacl_from_fcport_map(struct fc_port *sess)
 {
 	struct se_node_acl *se_nacl = sess->se_sess->se_node_acl;
 	struct se_portal_group *se_tpg = se_nacl->se_tpg;
@@ -789,7 +806,7 @@ static void tcm_qla2xxx_clear_nacl_from_fcport_map(struct qla_tgt_sess *sess)
 	tcm_qla2xxx_clear_sess_lookup(lport, nacl, sess);
 }
 
-static void tcm_qla2xxx_shutdown_sess(struct qla_tgt_sess *sess)
+static void tcm_qla2xxx_shutdown_sess(struct fc_port *sess)
 {
 	assert_spin_locked(&sess->vha->hw->tgt.sess_lock);
 	target_sess_cmd_list_set_waiting(sess->se_sess);
@@ -1174,7 +1191,7 @@ static struct se_portal_group *tcm_qla2xxx_npiv_make_tpg(
 /*
  * Expected to be called with struct qla_hw_data->tgt.sess_lock held
  */
-static struct qla_tgt_sess *tcm_qla2xxx_find_sess_by_s_id(
+static struct fc_port *tcm_qla2xxx_find_sess_by_s_id(
 	scsi_qla_host_t *vha,
 	const uint8_t *s_id)
 {
@@ -1202,12 +1219,12 @@ static struct qla_tgt_sess *tcm_qla2xxx_find_sess_by_s_id(
 	    se_nacl, se_nacl->initiatorname);
 
 	nacl = container_of(se_nacl, struct tcm_qla2xxx_nacl, se_node_acl);
-	if (!nacl->qla_tgt_sess) {
-		pr_err("Unable to locate struct qla_tgt_sess\n");
+	if (!nacl->fc_port) {
+		pr_err("Unable to locate struct fc_port\n");
 		return NULL;
 	}
 
-	return nacl->qla_tgt_sess;
+	return nacl->fc_port;
 }
 
 /*
@@ -1218,7 +1235,7 @@ static void tcm_qla2xxx_set_sess_by_s_id(
 	struct se_node_acl *new_se_nacl,
 	struct tcm_qla2xxx_nacl *nacl,
 	struct se_session *se_sess,
-	struct qla_tgt_sess *qla_tgt_sess,
+	struct fc_port *fc_port,
 	uint8_t *s_id)
 {
 	u32 key;
@@ -1242,22 +1259,22 @@ static void tcm_qla2xxx_set_sess_by_s_id(
 			pr_debug("Wiping nonexisting fc_port entry\n");
 		}
 
-		qla_tgt_sess->se_sess = se_sess;
-		nacl->qla_tgt_sess = qla_tgt_sess;
+		fc_port->se_sess = se_sess;
+		nacl->fc_port = fc_port;
 		return;
 	}
 
-	if (nacl->qla_tgt_sess) {
+	if (nacl->fc_port) {
 		if (new_se_nacl == NULL) {
-			pr_debug("Clearing existing nacl->qla_tgt_sess and fc_port entry\n");
+			pr_debug("Clearing existing nacl->fc_port and fc_port entry\n");
 			btree_remove32(&lport->lport_fcport_map, key);
-			nacl->qla_tgt_sess = NULL;
+			nacl->fc_port = NULL;
 			return;
 		}
-		pr_debug("Replacing existing nacl->qla_tgt_sess and fc_port entry\n");
+		pr_debug("Replacing existing nacl->fc_port and fc_port entry\n");
 		btree_update32(&lport->lport_fcport_map, key, new_se_nacl);
-		qla_tgt_sess->se_sess = se_sess;
-		nacl->qla_tgt_sess = qla_tgt_sess;
+		fc_port->se_sess = se_sess;
+		nacl->fc_port = fc_port;
 		return;
 	}
 
@@ -1267,19 +1284,19 @@ static void tcm_qla2xxx_set_sess_by_s_id(
 		return;
 	}
 
-	pr_debug("Replacing existing fc_port entry w/o active nacl->qla_tgt_sess\n");
+	pr_debug("Replacing existing fc_port entry w/o active nacl->fc_port\n");
 	btree_update32(&lport->lport_fcport_map, key, new_se_nacl);
-	qla_tgt_sess->se_sess = se_sess;
-	nacl->qla_tgt_sess = qla_tgt_sess;
+	fc_port->se_sess = se_sess;
+	nacl->fc_port = fc_port;
 
-	pr_debug("Setup nacl->qla_tgt_sess %p by s_id for se_nacl: %p, initiatorname: %s\n",
-	    nacl->qla_tgt_sess, new_se_nacl, new_se_nacl->initiatorname);
+	pr_debug("Setup nacl->fc_port %p by s_id for se_nacl: %p, initiatorname: %s\n",
+	    nacl->fc_port, new_se_nacl, new_se_nacl->initiatorname);
 }
 
 /*
  * Expected to be called with struct qla_hw_data->tgt.sess_lock held
  */
-static struct qla_tgt_sess *tcm_qla2xxx_find_sess_by_loop_id(
+static struct fc_port *tcm_qla2xxx_find_sess_by_loop_id(
 	scsi_qla_host_t *vha,
 	const uint16_t loop_id)
 {
@@ -1307,12 +1324,12 @@ static struct qla_tgt_sess *tcm_qla2xxx_find_sess_by_loop_id(
 
 	nacl = container_of(se_nacl, struct tcm_qla2xxx_nacl, se_node_acl);
 
-	if (!nacl->qla_tgt_sess) {
-		pr_err("Unable to locate struct qla_tgt_sess\n");
+	if (!nacl->fc_port) {
+		pr_err("Unable to locate struct fc_port\n");
 		return NULL;
 	}
 
-	return nacl->qla_tgt_sess;
+	return nacl->fc_port;
 }
 
 /*
@@ -1323,7 +1340,7 @@ static void tcm_qla2xxx_set_sess_by_loop_id(
 	struct se_node_acl *new_se_nacl,
 	struct tcm_qla2xxx_nacl *nacl,
 	struct se_session *se_sess,
-	struct qla_tgt_sess *qla_tgt_sess,
+	struct fc_port *fc_port,
 	uint16_t loop_id)
 {
 	struct se_node_acl *saved_nacl;
@@ -1338,27 +1355,27 @@ static void tcm_qla2xxx_set_sess_by_loop_id(
 	if (!saved_nacl) {
 		pr_debug("Setting up new fc_loopid->se_nacl to new_se_nacl\n");
 		fc_loopid->se_nacl = new_se_nacl;
-		if (qla_tgt_sess->se_sess != se_sess)
-			qla_tgt_sess->se_sess = se_sess;
-		if (nacl->qla_tgt_sess != qla_tgt_sess)
-			nacl->qla_tgt_sess = qla_tgt_sess;
+		if (fc_port->se_sess != se_sess)
+			fc_port->se_sess = se_sess;
+		if (nacl->fc_port != fc_port)
+			nacl->fc_port = fc_port;
 		return;
 	}
 
-	if (nacl->qla_tgt_sess) {
+	if (nacl->fc_port) {
 		if (new_se_nacl == NULL) {
-			pr_debug("Clearing nacl->qla_tgt_sess and fc_loopid->se_nacl\n");
+			pr_debug("Clearing nacl->fc_port and fc_loopid->se_nacl\n");
 			fc_loopid->se_nacl = NULL;
-			nacl->qla_tgt_sess = NULL;
+			nacl->fc_port = NULL;
 			return;
 		}
 
-		pr_debug("Replacing existing nacl->qla_tgt_sess and fc_loopid->se_nacl\n");
+		pr_debug("Replacing existing nacl->fc_port and fc_loopid->se_nacl\n");
 		fc_loopid->se_nacl = new_se_nacl;
-		if (qla_tgt_sess->se_sess != se_sess)
-			qla_tgt_sess->se_sess = se_sess;
-		if (nacl->qla_tgt_sess != qla_tgt_sess)
-			nacl->qla_tgt_sess = qla_tgt_sess;
+		if (fc_port->se_sess != se_sess)
+			fc_port->se_sess = se_sess;
+		if (nacl->fc_port != fc_port)
+			nacl->fc_port = fc_port;
 		return;
 	}
 
@@ -1368,22 +1385,22 @@ static void tcm_qla2xxx_set_sess_by_loop_id(
 		return;
 	}
 
-	pr_debug("Replacing existing fc_loopid->se_nacl w/o active nacl->qla_tgt_sess\n");
+	pr_debug("Replacing existing fc_loopid->se_nacl w/o active nacl->fc_port\n");
 	fc_loopid->se_nacl = new_se_nacl;
-	if (qla_tgt_sess->se_sess != se_sess)
-		qla_tgt_sess->se_sess = se_sess;
-	if (nacl->qla_tgt_sess != qla_tgt_sess)
-		nacl->qla_tgt_sess = qla_tgt_sess;
+	if (fc_port->se_sess != se_sess)
+		fc_port->se_sess = se_sess;
+	if (nacl->fc_port != fc_port)
+		nacl->fc_port = fc_port;
 
-	pr_debug("Setup nacl->qla_tgt_sess %p by loop_id for se_nacl: %p, initiatorname: %s\n",
-	    nacl->qla_tgt_sess, new_se_nacl, new_se_nacl->initiatorname);
+	pr_debug("Setup nacl->fc_port %p by loop_id for se_nacl: %p, initiatorname: %s\n",
+	    nacl->fc_port, new_se_nacl, new_se_nacl->initiatorname);
 }
 
 /*
  * Should always be called with qla_hw_data->tgt.sess_lock held.
  */
 static void tcm_qla2xxx_clear_sess_lookup(struct tcm_qla2xxx_lport *lport,
-		struct tcm_qla2xxx_nacl *nacl, struct qla_tgt_sess *sess)
+		struct tcm_qla2xxx_nacl *nacl, struct fc_port *sess)
 {
 	struct se_session *se_sess = sess->se_sess;
 	unsigned char be_sid[3];
@@ -1398,7 +1415,7 @@ static void tcm_qla2xxx_clear_sess_lookup(struct tcm_qla2xxx_lport *lport,
 				sess, sess->loop_id);
 }
 
-static void tcm_qla2xxx_free_session(struct qla_tgt_sess *sess)
+static void tcm_qla2xxx_free_session(struct fc_port *sess)
 {
 	struct qla_tgt *tgt = sess->tgt;
 	struct qla_hw_data *ha = tgt->ha;
@@ -1410,7 +1427,7 @@ static void tcm_qla2xxx_free_session(struct qla_tgt_sess *sess)
 
 	se_sess = sess->se_sess;
 	if (!se_sess) {
-		pr_err("struct qla_tgt_sess->se_sess is NULL\n");
+		pr_err("struct fc_port->se_sess is NULL\n");
 		dump_stack();
 		return;
 	}
@@ -1437,7 +1454,7 @@ static int tcm_qla2xxx_session_cb(struct se_portal_group *se_tpg,
 	struct se_node_acl *se_nacl = se_sess->se_node_acl;
 	struct tcm_qla2xxx_nacl *nacl = container_of(se_nacl,
 				struct tcm_qla2xxx_nacl, se_node_acl);
-	struct qla_tgt_sess *qlat_sess = p;
+	struct fc_port *qlat_sess = p;
 	uint16_t loop_id = qlat_sess->loop_id;
 	unsigned long flags;
 	unsigned char be_sid[3];
@@ -1467,7 +1484,7 @@ static int tcm_qla2xxx_session_cb(struct se_portal_group *se_tpg,
 static int tcm_qla2xxx_check_initiator_node_acl(
 	scsi_qla_host_t *vha,
 	unsigned char *fc_wwpn,
-	struct qla_tgt_sess *qlat_sess)
+	struct fc_port *qlat_sess)
 {
 	struct qla_hw_data *ha = vha->hw;
 	struct tcm_qla2xxx_lport *lport;
@@ -1511,7 +1528,7 @@ static int tcm_qla2xxx_check_initiator_node_acl(
 	return 0;
 }
 
-static void tcm_qla2xxx_update_sess(struct qla_tgt_sess *sess, port_id_t s_id,
+static void tcm_qla2xxx_update_sess(struct fc_port *sess, port_id_t s_id,
 				    uint16_t loop_id, bool conf_compl_supported)
 {
 	struct qla_tgt *tgt = sess->tgt;
@@ -1603,6 +1620,7 @@ static struct qla_tgt_func_tmpl tcm_qla2xxx_template = {
 	.find_sess_by_s_id	= tcm_qla2xxx_find_sess_by_s_id,
 	.find_sess_by_loop_id	= tcm_qla2xxx_find_sess_by_loop_id,
 	.clear_nacl_from_fcport_map = tcm_qla2xxx_clear_nacl_from_fcport_map,
+	.put_sess		= tcm_qla2xxx_put_sess,
 	.shutdown_sess		= tcm_qla2xxx_shutdown_sess,
 };
 
diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.h b/drivers/scsi/qla2xxx/tcm_qla2xxx.h
index cf8430be183b6b..071035dfa99ae8 100644
--- a/drivers/scsi/qla2xxx/tcm_qla2xxx.h
+++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.h
@@ -20,8 +20,8 @@ struct tcm_qla2xxx_nacl {
 	u64 nport_wwnn;
 	/* ASCII formatted WWPN for FC Initiator Nport */
 	char nport_name[TCM_QLA2XXX_NAMELEN];
-	/* Pointer to qla_tgt_sess */
-	struct qla_tgt_sess *qla_tgt_sess;
+	/* Pointer to fc_port */
+	struct fc_port *fc_port;
 	/* Pointer to TCM FC nexus */
 	struct se_session *nport_nexus;
 };

From 726b85487067d7f5b23495bc33c484b8517c4074 Mon Sep 17 00:00:00 2001
From: Quinn Tran <quinn.tran@cavium.com>
Date: Thu, 19 Jan 2017 22:28:00 -0800
Subject: [PATCH 0067/1911] qla2xxx: Add framework for async fabric discovery

Currently code performs a full scan of the fabric for
every RSCN. Its an expensive process in a noisy large SAN.

This patch optimizes expensive fabric discovery process by
scanning switch for the affected port when RSCN is received.

Currently Initiator Mode code makes login/logout decision without
knowledge of target mode. This causes driver and firmware to go
out-of-sync. This framework synchronizes both initiator mode
personality and target mode personality in making login/logout
decision.

This patch adds following capabilities in the driver

- Send Notification Acknowledgement asynchronously.
- Update session/fcport state asynchronously.
- Create a session or fcport struct asynchronously.
- Send GNL asynchronously. The command will ask FW to
  provide a list of FC Port entries FW knows about.
- Send GPDB asynchronously. The command will ask FW to
  provide detail data of an FC Port FW knows about or
  perform ADISC to verify the state of the session.
- Send GPNID asynchronously. The command will ask switch
  to provide WWPN for provided NPort ID.
- Send GPSC asynchronously. The command will ask switch
  to provide registered port speed for provided WWPN.
- Send GIDPN asynchronously. The command will ask the
  switch to provide Nport ID for provided WWPN.
- In driver unload path, schedule all session for deletion
  and wait for deletion to complete before allowing driver
  unload to proceed.

Signed-off-by: Quinn Tran <quinn.tran@cavium.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
[ bvanassche: fixed spelling in patch description ]
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/qla2xxx/qla_attr.c   |    3 +
 drivers/scsi/qla2xxx/qla_def.h    |  175 +++-
 drivers/scsi/qla2xxx/qla_fw.h     |   31 +
 drivers/scsi/qla2xxx/qla_gbl.h    |   56 +-
 drivers/scsi/qla2xxx/qla_gs.c     |  724 +++++++++++++--
 drivers/scsi/qla2xxx/qla_init.c   | 1396 ++++++++++++++++++++++-------
 drivers/scsi/qla2xxx/qla_inline.h |    7 +-
 drivers/scsi/qla2xxx/qla_iocb.c   |   71 +-
 drivers/scsi/qla2xxx/qla_isr.c    |  260 ++++--
 drivers/scsi/qla2xxx/qla_mbx.c    |   88 --
 drivers/scsi/qla2xxx/qla_os.c     |  271 ++++--
 drivers/scsi/qla2xxx/qla_target.c | 1002 ++++++++++++++-------
 drivers/scsi/qla2xxx/qla_target.h |   17 +-
 13 files changed, 3114 insertions(+), 987 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c
index f201f40996205c..f610103994afd4 100644
--- a/drivers/scsi/qla2xxx/qla_attr.c
+++ b/drivers/scsi/qla2xxx/qla_attr.c
@@ -2163,6 +2163,9 @@ qla24xx_vport_delete(struct fc_vport *fc_vport)
 	clear_bit(vha->vp_idx, ha->vp_idx_map);
 	mutex_unlock(&ha->vport_lock);
 
+	dma_free_coherent(&ha->pdev->dev, vha->gnl.size, vha->gnl.l,
+	    vha->gnl.ldma);
+
 	if (vha->qpair->vp_idx == vha->vp_idx) {
 		if (qla2xxx_delete_qpair(vha, vha->qpair) != QLA_SUCCESS)
 			ql_log(ql_log_warn, vha, 0x7087,
diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index 1bdcc7e010ff5e..79b4e88db3b136 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -55,6 +55,8 @@
 
 #include "qla_settings.h"
 
+#define MODE_DUAL (MODE_TARGET | MODE_INITIATOR)
+
 /*
  * Data bit definitions
  */
@@ -251,6 +253,14 @@
 
 #define MAX_CMDSZ	16		/* SCSI maximum CDB size. */
 #include "qla_fw.h"
+
+struct name_list_extended {
+	struct get_name_list_extended *l;
+	dma_addr_t		ldma;
+	struct list_head 	fcports;	/* protect by sess_list */
+	u32			size;
+	u8			sent;
+};
 /*
  * Timeout timer counts in seconds
  */
@@ -309,6 +319,17 @@ struct els_logo_payload {
 	uint8_t wwpn[WWN_SIZE];
 };
 
+struct ct_arg {
+	void		*iocb;
+	u16		nport_handle;
+	dma_addr_t	req_dma;
+	dma_addr_t	rsp_dma;
+	u32		req_size;
+	u32		rsp_size;
+	void		*req;
+	void		*rsp;
+};
+
 /*
  * SRB extensions.
  */
@@ -320,6 +341,7 @@ struct srb_iocb {
 #define SRB_LOGIN_COND_PLOGI	BIT_1
 #define SRB_LOGIN_SKIP_PRLI	BIT_2
 			uint16_t data[2];
+			u32 iop[2];
 		} logio;
 		struct {
 #define ELS_DCMD_TIMEOUT 20
@@ -372,6 +394,16 @@ struct srb_iocb {
 			__le16 comp_status;
 			struct completion comp;
 		} abt;
+		struct ct_arg ctarg;
+		struct {
+			__le16 in_mb[28]; 	/* fr fw */
+			__le16 out_mb[28];	/* to fw */
+			void *out, *in;
+			dma_addr_t out_dma, in_dma;
+		} mbx;
+		struct {
+			struct imm_ntfy_from_isp *ntfy;
+		} nack;
 	} u;
 
 	struct timer_list timer;
@@ -392,16 +424,24 @@ struct srb_iocb {
 #define SRB_FXIOCB_BCMD	11
 #define SRB_ABT_CMD	12
 #define SRB_ELS_DCMD	13
+#define SRB_MB_IOCB	14
+#define SRB_CT_PTHRU_CMD 15
+#define SRB_NACK_PLOGI	16
+#define SRB_NACK_PRLI	17
+#define SRB_NACK_LOGO	18
 
 typedef struct srb {
 	atomic_t ref_count;
 	struct fc_port *fcport;
+	void *vha;
 	uint32_t handle;
 	uint16_t flags;
 	uint16_t type;
 	char *name;
 	int iocbs;
 	struct qla_qpair *qpair;
+	u32 gen1;	/* scratch */
+	u32 gen2;	/* scratch */
 	union {
 		struct srb_iocb iocb_cmd;
 		struct bsg_job *bsg_job;
@@ -2101,6 +2141,18 @@ typedef struct {
 #define FC4_TYPE_OTHER		0x0
 #define FC4_TYPE_UNKNOWN	0xff
 
+/* mailbox command 4G & above */
+struct mbx_24xx_entry {
+	uint8_t		entry_type;
+	uint8_t		entry_count;
+	uint8_t		sys_define1;
+	uint8_t		entry_status;
+	uint32_t	handle;
+	uint16_t	mb[28];
+};
+
+#define IOCB_SIZE 64
+
 /*
  * Fibre channel port type.
  */
@@ -2113,6 +2165,12 @@ typedef enum {
 	FCT_TARGET
 } fc_port_type_t;
 
+enum qla_sess_deletion {
+	QLA_SESS_DELETION_NONE		= 0,
+	QLA_SESS_DELETION_IN_PROGRESS,
+	QLA_SESS_DELETED,
+};
+
 enum qlt_plogi_link_t {
 	QLT_PLOGI_LINK_SAME_WWN,
 	QLT_PLOGI_LINK_CONFLICT,
@@ -2124,6 +2182,48 @@ struct qlt_plogi_ack_t {
 	struct imm_ntfy_from_isp iocb;
 	port_id_t	id;
 	int		ref_count;
+	void		*fcport;
+};
+
+struct ct_sns_desc {
+	struct ct_sns_pkt	*ct_sns;
+	dma_addr_t		ct_sns_dma;
+};
+
+enum discovery_state {
+	DSC_DELETED,
+	DSC_GID_PN,
+	DSC_GNL,
+	DSC_LOGIN_PEND,
+	DSC_LOGIN_FAILED,
+	DSC_GPDB,
+	DSC_GPSC,
+	DSC_UPD_FCPORT,
+	DSC_LOGIN_COMPLETE,
+	DSC_DELETE_PEND,
+};
+
+enum login_state {	/* FW control Target side */
+	DSC_LS_LLIOCB_SENT = 2,
+	DSC_LS_PLOGI_PEND,
+	DSC_LS_PLOGI_COMP,
+	DSC_LS_PRLI_PEND,
+	DSC_LS_PRLI_COMP,
+	DSC_LS_PORT_UNAVAIL,
+	DSC_LS_PRLO_PEND = 9,
+	DSC_LS_LOGO_PEND,
+};
+
+enum fcport_mgt_event {
+	FCME_RELOGIN = 1,
+	FCME_RSCN,
+	FCME_GIDPN_DONE,
+	FCME_PLOGI_DONE,	/* Initiator side sent LLIOCB */
+	FCME_GNL_DONE,
+	FCME_GPSC_DONE,
+	FCME_GPDB_DONE,
+	FCME_GPNID_DONE,
+	FCME_DELETE_DONE,
 };
 
 /*
@@ -2143,9 +2243,13 @@ typedef struct fc_port {
 	unsigned int deleted:2;
 	unsigned int local:1;
 	unsigned int logout_on_delete:1;
+	unsigned int logo_ack_needed:1;
 	unsigned int keep_nport_handle:1;
 	unsigned int send_els_logo:1;
+	unsigned int login_pause:1;
+	unsigned int login_succ:1;
 
+	struct fc_port *conflict;
 	unsigned char logout_completed;
 	int generation;
 
@@ -2186,8 +2290,30 @@ typedef struct fc_port {
 
 	unsigned long retry_delay_timestamp;
 	struct qla_tgt_sess *tgt_session;
+	struct ct_sns_desc ct_desc;
+	enum discovery_state disc_state;
+	enum login_state fw_login_state;
+	u32 login_gen, last_login_gen;
+	u32 rscn_gen, last_rscn_gen;
+	u32 chip_reset;
+	struct list_head gnl_entry;
+	struct work_struct del_work;
+	u8 iocb[IOCB_SIZE];
 } fc_port_t;
 
+#define QLA_FCPORT_SCAN		1
+#define QLA_FCPORT_FOUND	2
+
+struct event_arg {
+	enum fcport_mgt_event	event;
+	fc_port_t		*fcport;
+	srb_t			*sp;
+	port_id_t		id;
+	u16			data[2], rc;
+	u8			port_name[WWN_SIZE];
+	u32			iop[2];
+};
+
 #include "qla_mr.h"
 
 /*
@@ -2265,6 +2391,10 @@ static const char * const port_state_str[] = {
 #define	GFT_ID_REQ_SIZE	(16 + 4)
 #define	GFT_ID_RSP_SIZE	(16 + 32)
 
+#define GID_PN_CMD 0x121
+#define GID_PN_REQ_SIZE (16 + 8)
+#define GID_PN_RSP_SIZE (16 + 4)
+
 #define	RFT_ID_CMD	0x217
 #define	RFT_ID_REQ_SIZE	(16 + 4 + 32)
 #define	RFT_ID_RSP_SIZE	16
@@ -2590,6 +2720,10 @@ struct ct_sns_req {
 			uint8_t reserved;
 			uint8_t port_name[3];
 		} gff_id;
+
+		struct {
+			uint8_t port_name[8];
+		} gid_pn;
 	} req;
 };
 
@@ -2669,6 +2803,10 @@ struct ct_sns_rsp {
 		struct {
 			uint8_t fc4_features[128];
 		} gff_id;
+		struct {
+			uint8_t reserved;
+			uint8_t port_id[3];
+		} gid_pn;
 	} rsp;
 };
 
@@ -2810,11 +2948,11 @@ struct isp_operations {
 
 	uint16_t (*calc_req_entries) (uint16_t);
 	void (*build_iocbs) (srb_t *, cmd_entry_t *, uint16_t);
-	void * (*prep_ms_iocb) (struct scsi_qla_host *, uint32_t, uint32_t);
-	void * (*prep_ms_fdmi_iocb) (struct scsi_qla_host *, uint32_t,
+	void *(*prep_ms_iocb) (struct scsi_qla_host *, struct ct_arg *);
+	void *(*prep_ms_fdmi_iocb) (struct scsi_qla_host *, uint32_t,
 	    uint32_t);
 
-	uint8_t * (*read_nvram) (struct scsi_qla_host *, uint8_t *,
+	uint8_t *(*read_nvram) (struct scsi_qla_host *, uint8_t *,
 		uint32_t, uint32_t);
 	int (*write_nvram) (struct scsi_qla_host *, uint8_t *, uint32_t,
 		uint32_t);
@@ -2876,13 +3014,21 @@ enum qla_work_type {
 	QLA_EVT_AEN,
 	QLA_EVT_IDC_ACK,
 	QLA_EVT_ASYNC_LOGIN,
-	QLA_EVT_ASYNC_LOGIN_DONE,
 	QLA_EVT_ASYNC_LOGOUT,
 	QLA_EVT_ASYNC_LOGOUT_DONE,
 	QLA_EVT_ASYNC_ADISC,
 	QLA_EVT_ASYNC_ADISC_DONE,
 	QLA_EVT_UEVENT,
 	QLA_EVT_AENFX,
+	QLA_EVT_GIDPN,
+	QLA_EVT_GPNID,
+	QLA_EVT_GPNID_DONE,
+	QLA_EVT_NEW_SESS,
+	QLA_EVT_GPDB,
+	QLA_EVT_GPSC,
+	QLA_EVT_UPD_FCPORT,
+	QLA_EVT_GNL,
+	QLA_EVT_NACK,
 };
 
 
@@ -2918,6 +3064,23 @@ struct qla_work_evt {
 		struct {
 			srb_t *sp;
 		} iosb;
+		struct {
+			port_id_t id;
+		} gpnid;
+		struct {
+			port_id_t id;
+			u8 port_name[8];
+			void *pla;
+		} new_sess;
+		struct { /*Get PDB, Get Speed, update fcport, gnl, gidpn */
+			fc_port_t *fcport;
+			u8 opt;
+		} fcport;
+		struct {
+			fc_port_t *fcport;
+			u8 iocb[IOCB_SIZE];
+			int type;
+		} nack;
 	 } u;
 };
 
@@ -3899,6 +4062,10 @@ typedef struct scsi_qla_host {
 	struct qla8044_reset_template reset_tmplt;
 	struct qla_tgt_counters tgt_counters;
 	uint16_t	bbcr;
+	struct name_list_extended gnl;
+	/* Count of active session/fcport */
+	int fcport_count;
+	wait_queue_head_t fcport_waitQ;
 } scsi_qla_host_t;
 
 struct qla27xx_image_status {
diff --git a/drivers/scsi/qla2xxx/qla_fw.h b/drivers/scsi/qla2xxx/qla_fw.h
index 8a2368b32dece2..ee135cf96aee59 100644
--- a/drivers/scsi/qla2xxx/qla_fw.h
+++ b/drivers/scsi/qla2xxx/qla_fw.h
@@ -72,6 +72,37 @@ struct port_database_24xx {
 	uint8_t reserved_3[24];
 };
 
+/*
+ * MB 75h returns a list of DB entries similar to port_database_24xx(64B).
+ * However, in this case it returns 1st 40 bytes.
+ */
+struct get_name_list_extended {
+	__le16 flags;
+	u8 current_login_state;
+	u8 last_login_state;
+	u8 hard_address[3];
+	u8 reserved_1;
+	u8 port_id[3];
+	u8 sequence_id;
+	__le16 port_timer;
+	__le16 nport_handle;			/* N_PORT handle. */
+	__le16 receive_data_size;
+	__le16 reserved_2;
+
+	/* PRLI SVC Param are Big endian */
+	u8 prli_svc_param_word_0[2]; /* Bits 15-0 of word 0 */
+	u8 prli_svc_param_word_3[2]; /* Bits 15-0 of word 3 */
+	u8 port_name[WWN_SIZE];
+	u8 node_name[WWN_SIZE];
+};
+
+/* MB 75h: This is the short version of the database */
+struct get_name_list {
+	u8 port_node_name[WWN_SIZE]; /* B7 most sig, B0 least sig */
+	__le16 nport_handle;
+	u8 reserved;
+};
+
 struct vp_database_24xx {
 	uint16_t vp_status;
 	uint8_t  options;
diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h
index afa0116a163b12..dd95ff620ae3c4 100644
--- a/drivers/scsi/qla2xxx/qla_gbl.h
+++ b/drivers/scsi/qla2xxx/qla_gbl.h
@@ -73,6 +73,10 @@ extern void qla2x00_async_logout_done(struct scsi_qla_host *, fc_port_t *,
     uint16_t *);
 extern void qla2x00_async_adisc_done(struct scsi_qla_host *, fc_port_t *,
     uint16_t *);
+struct qla_work_evt *qla2x00_alloc_work(struct scsi_qla_host *,
+    enum qla_work_type);
+extern int qla24xx_async_gnl(struct scsi_qla_host *, fc_port_t *);
+int qla2x00_post_work(struct scsi_qla_host *vha, struct qla_work_evt *e);
 extern void *qla2x00_alloc_iocbs(struct scsi_qla_host *, srb_t *);
 extern void *qla2x00_alloc_iocbs_ready(struct scsi_qla_host *, srb_t *);
 extern int qla24xx_update_fcport_fcp_prio(scsi_qla_host_t *, fc_port_t *);
@@ -94,6 +98,13 @@ extern uint8_t qla27xx_find_valid_image(struct scsi_qla_host *);
 extern struct qla_qpair *qla2xxx_create_qpair(struct scsi_qla_host *,
 	int, int);
 extern int qla2xxx_delete_qpair(struct scsi_qla_host *, struct qla_qpair *);
+void qla2x00_fcport_event_handler(scsi_qla_host_t *, struct event_arg *);
+int qla24xx_async_gpdb(struct scsi_qla_host *, fc_port_t *, u8);
+int qla24xx_async_notify_ack(scsi_qla_host_t *, fc_port_t *,
+	struct imm_ntfy_from_isp *, int);
+int qla24xx_post_newsess_work(struct scsi_qla_host *, port_id_t *, u8 *,
+    void *);
+int qla24xx_fcport_handle_login(struct scsi_qla_host *, fc_port_t *);
 
 /*
  * Global Data in qla_os.c source file.
@@ -135,8 +146,6 @@ extern int qla2x00_post_aen_work(struct scsi_qla_host *, enum
 extern int qla2x00_post_idc_ack_work(struct scsi_qla_host *, uint16_t *);
 extern int qla2x00_post_async_login_work(struct scsi_qla_host *, fc_port_t *,
     uint16_t *);
-extern int qla2x00_post_async_login_done_work(struct scsi_qla_host *,
-    fc_port_t *, uint16_t *);
 extern int qla2x00_post_async_logout_work(struct scsi_qla_host *, fc_port_t *,
     uint16_t *);
 extern int qla2x00_post_async_logout_done_work(struct scsi_qla_host *,
@@ -179,6 +188,10 @@ extern void qla2x00_disable_board_on_pci_error(struct work_struct *);
 extern void qla2x00_sp_compl(void *, void *, int);
 extern void qla2xxx_qpair_sp_free_dma(void *, void *);
 extern void qla2xxx_qpair_sp_compl(void *, void *, int);
+extern int qla24xx_post_upd_fcport_work(struct scsi_qla_host *, fc_port_t *);
+void qla2x00_handle_login_done_event(struct scsi_qla_host *, fc_port_t *,
+	uint16_t *);
+int qla24xx_post_gnl_work(struct scsi_qla_host *, fc_port_t *);
 
 /*
  * Global Functions in qla_mid.c source file.
@@ -301,9 +314,6 @@ qla2x00_get_retry_cnt(scsi_qla_host_t *, uint8_t *, uint8_t *, uint16_t *);
 extern int
 qla2x00_init_firmware(scsi_qla_host_t *, uint16_t);
 
-extern int
-qla2x00_get_node_name_list(scsi_qla_host_t *, void **, int *);
-
 extern int
 qla2x00_get_port_database(scsi_qla_host_t *, fc_port_t *, uint8_t);
 
@@ -483,6 +493,9 @@ qla2x00_process_completed_request(struct scsi_qla_host *, struct req_que *,
 	uint32_t);
 extern irqreturn_t
 qla2xxx_msix_rsp_q(int irq, void *dev_id);
+fc_port_t *qla2x00_find_fcport_by_loopid(scsi_qla_host_t *, uint16_t);
+fc_port_t *qla2x00_find_fcport_by_wwpn(scsi_qla_host_t *, u8 *, u8);
+fc_port_t *qla2x00_find_fcport_by_nportid(scsi_qla_host_t *, port_id_t *, u8);
 
 /*
  * Global Function Prototypes in qla_sup.c source file.
@@ -574,8 +587,8 @@ extern void qla2xxx_dump_post_process(scsi_qla_host_t *, int);
 /*
  * Global Function Prototypes in qla_gs.c source file.
  */
-extern void *qla2x00_prep_ms_iocb(scsi_qla_host_t *, uint32_t, uint32_t);
-extern void *qla24xx_prep_ms_iocb(scsi_qla_host_t *, uint32_t, uint32_t);
+extern void *qla2x00_prep_ms_iocb(scsi_qla_host_t *, struct ct_arg *);
+extern void *qla24xx_prep_ms_iocb(scsi_qla_host_t *, struct ct_arg *);
 extern int qla2x00_ga_nxt(scsi_qla_host_t *, fc_port_t *);
 extern int qla2x00_gid_pt(scsi_qla_host_t *, sw_info_t *);
 extern int qla2x00_gpn_id(scsi_qla_host_t *, sw_info_t *);
@@ -591,6 +604,23 @@ extern int qla2x00_fdmi_register(scsi_qla_host_t *);
 extern int qla2x00_gfpn_id(scsi_qla_host_t *, sw_info_t *);
 extern int qla2x00_gpsc(scsi_qla_host_t *, sw_info_t *);
 extern void qla2x00_get_sym_node_name(scsi_qla_host_t *, uint8_t *, size_t);
+extern int qla2x00_chk_ms_status(scsi_qla_host_t *, ms_iocb_entry_t *,
+	struct ct_sns_rsp *, const char *);
+extern void qla2x00_async_iocb_timeout(void *data);
+extern int qla24xx_async_gidpn(scsi_qla_host_t *, fc_port_t *);
+int qla24xx_post_gidpn_work(struct scsi_qla_host *, fc_port_t *);
+void qla24xx_handle_gidpn_event(scsi_qla_host_t *, struct event_arg *);
+
+extern void qla2x00_free_fcport(fc_port_t *);
+
+extern int qla24xx_post_gpnid_work(struct scsi_qla_host *, port_id_t *);
+extern int qla24xx_async_gpnid(scsi_qla_host_t *, port_id_t *);
+void qla24xx_async_gpnid_done(scsi_qla_host_t *, srb_t*);
+void qla24xx_handle_gpnid_event(scsi_qla_host_t *, struct event_arg *);
+
+int qla24xx_post_gpsc_work(struct scsi_qla_host *, fc_port_t *);
+int qla24xx_async_gpsc(scsi_qla_host_t *, fc_port_t *);
+int qla2x00_mgmt_svr_login(scsi_qla_host_t *);
 
 /*
  * Global Function Prototypes in qla_attr.c source file.
@@ -803,4 +833,16 @@ extern int qla_get_exchoffld_status(scsi_qla_host_t *, uint16_t *, uint16_t *);
 extern int qla_set_exchoffld_mem_cfg(scsi_qla_host_t *, dma_addr_t);
 extern void qlt_handle_abts_recv(struct scsi_qla_host *, response_t *);
 
+int qla24xx_async_notify_ack(scsi_qla_host_t *, fc_port_t *,
+	struct imm_ntfy_from_isp *, int);
+void qla24xx_do_nack_work(struct scsi_qla_host *, struct qla_work_evt *);
+void qlt_plogi_ack_link(struct scsi_qla_host *, struct qlt_plogi_ack_t *,
+	struct fc_port *, enum qlt_plogi_link_t);
+void qlt_plogi_ack_unref(struct scsi_qla_host *, struct qlt_plogi_ack_t *);
+extern void qlt_schedule_sess_for_deletion(struct fc_port *, bool);
+extern void qlt_schedule_sess_for_deletion_lock(struct fc_port *);
+extern struct fc_port *qlt_find_sess_invalidate_other(scsi_qla_host_t *,
+	uint64_t wwn, port_id_t port_id, uint16_t loop_id, struct fc_port **);
+void qla24xx_delete_sess_fn(struct work_struct *);
+
 #endif /* _QLA_GBL_H */
diff --git a/drivers/scsi/qla2xxx/qla_gs.c b/drivers/scsi/qla2xxx/qla_gs.c
index 94e8a8592f6904..8f7054dc742b4b 100644
--- a/drivers/scsi/qla2xxx/qla_gs.c
+++ b/drivers/scsi/qla2xxx/qla_gs.c
@@ -24,12 +24,12 @@ static int qla2x00_sns_rnn_id(scsi_qla_host_t *);
  * Returns a pointer to the @ha's ms_iocb.
  */
 void *
-qla2x00_prep_ms_iocb(scsi_qla_host_t *vha, uint32_t req_size, uint32_t rsp_size)
+qla2x00_prep_ms_iocb(scsi_qla_host_t *vha, struct ct_arg *arg)
 {
 	struct qla_hw_data *ha = vha->hw;
 	ms_iocb_entry_t *ms_pkt;
 
-	ms_pkt = ha->ms_iocb;
+	ms_pkt = (ms_iocb_entry_t *)arg->iocb;
 	memset(ms_pkt, 0, sizeof(ms_iocb_entry_t));
 
 	ms_pkt->entry_type = MS_IOCB_TYPE;
@@ -39,15 +39,15 @@ qla2x00_prep_ms_iocb(scsi_qla_host_t *vha, uint32_t req_size, uint32_t rsp_size)
 	ms_pkt->timeout = cpu_to_le16(ha->r_a_tov / 10 * 2);
 	ms_pkt->cmd_dsd_count = cpu_to_le16(1);
 	ms_pkt->total_dsd_count = cpu_to_le16(2);
-	ms_pkt->rsp_bytecount = cpu_to_le32(rsp_size);
-	ms_pkt->req_bytecount = cpu_to_le32(req_size);
+	ms_pkt->rsp_bytecount = cpu_to_le32(arg->rsp_size);
+	ms_pkt->req_bytecount = cpu_to_le32(arg->req_size);
 
-	ms_pkt->dseg_req_address[0] = cpu_to_le32(LSD(ha->ct_sns_dma));
-	ms_pkt->dseg_req_address[1] = cpu_to_le32(MSD(ha->ct_sns_dma));
+	ms_pkt->dseg_req_address[0] = cpu_to_le32(LSD(arg->req_dma));
+	ms_pkt->dseg_req_address[1] = cpu_to_le32(MSD(arg->req_dma));
 	ms_pkt->dseg_req_length = ms_pkt->req_bytecount;
 
-	ms_pkt->dseg_rsp_address[0] = cpu_to_le32(LSD(ha->ct_sns_dma));
-	ms_pkt->dseg_rsp_address[1] = cpu_to_le32(MSD(ha->ct_sns_dma));
+	ms_pkt->dseg_rsp_address[0] = cpu_to_le32(LSD(arg->rsp_dma));
+	ms_pkt->dseg_rsp_address[1] = cpu_to_le32(MSD(arg->rsp_dma));
 	ms_pkt->dseg_rsp_length = ms_pkt->rsp_bytecount;
 
 	vha->qla_stats.control_requests++;
@@ -64,29 +64,29 @@ qla2x00_prep_ms_iocb(scsi_qla_host_t *vha, uint32_t req_size, uint32_t rsp_size)
  * Returns a pointer to the @ha's ms_iocb.
  */
 void *
-qla24xx_prep_ms_iocb(scsi_qla_host_t *vha, uint32_t req_size, uint32_t rsp_size)
+qla24xx_prep_ms_iocb(scsi_qla_host_t *vha, struct ct_arg *arg)
 {
 	struct qla_hw_data *ha = vha->hw;
 	struct ct_entry_24xx *ct_pkt;
 
-	ct_pkt = (struct ct_entry_24xx *)ha->ms_iocb;
+	ct_pkt = (struct ct_entry_24xx *)arg->iocb;
 	memset(ct_pkt, 0, sizeof(struct ct_entry_24xx));
 
 	ct_pkt->entry_type = CT_IOCB_TYPE;
 	ct_pkt->entry_count = 1;
-	ct_pkt->nport_handle = cpu_to_le16(NPH_SNS);
+	ct_pkt->nport_handle = cpu_to_le16(arg->nport_handle);
 	ct_pkt->timeout = cpu_to_le16(ha->r_a_tov / 10 * 2);
 	ct_pkt->cmd_dsd_count = cpu_to_le16(1);
 	ct_pkt->rsp_dsd_count = cpu_to_le16(1);
-	ct_pkt->rsp_byte_count = cpu_to_le32(rsp_size);
-	ct_pkt->cmd_byte_count = cpu_to_le32(req_size);
+	ct_pkt->rsp_byte_count = cpu_to_le32(arg->rsp_size);
+	ct_pkt->cmd_byte_count = cpu_to_le32(arg->req_size);
 
-	ct_pkt->dseg_0_address[0] = cpu_to_le32(LSD(ha->ct_sns_dma));
-	ct_pkt->dseg_0_address[1] = cpu_to_le32(MSD(ha->ct_sns_dma));
+	ct_pkt->dseg_0_address[0] = cpu_to_le32(LSD(arg->req_dma));
+	ct_pkt->dseg_0_address[1] = cpu_to_le32(MSD(arg->req_dma));
 	ct_pkt->dseg_0_len = ct_pkt->cmd_byte_count;
 
-	ct_pkt->dseg_1_address[0] = cpu_to_le32(LSD(ha->ct_sns_dma));
-	ct_pkt->dseg_1_address[1] = cpu_to_le32(MSD(ha->ct_sns_dma));
+	ct_pkt->dseg_1_address[0] = cpu_to_le32(LSD(arg->rsp_dma));
+	ct_pkt->dseg_1_address[1] = cpu_to_le32(MSD(arg->rsp_dma));
 	ct_pkt->dseg_1_len = ct_pkt->rsp_byte_count;
 	ct_pkt->vp_index = vha->vp_idx;
 
@@ -117,7 +117,7 @@ qla2x00_prep_ct_req(struct ct_sns_pkt *p, uint16_t cmd, uint16_t rsp_size)
 	return &p->p.req;
 }
 
-static int
+int
 qla2x00_chk_ms_status(scsi_qla_host_t *vha, ms_iocb_entry_t *ms_pkt,
     struct ct_sns_rsp *ct_rsp, const char *routine)
 {
@@ -183,14 +183,21 @@ qla2x00_ga_nxt(scsi_qla_host_t *vha, fc_port_t *fcport)
 	struct ct_sns_req	*ct_req;
 	struct ct_sns_rsp	*ct_rsp;
 	struct qla_hw_data *ha = vha->hw;
+	struct ct_arg arg;
 
 	if (IS_QLA2100(ha) || IS_QLA2200(ha))
 		return qla2x00_sns_ga_nxt(vha, fcport);
 
+	arg.iocb = ha->ms_iocb;
+	arg.req_dma = ha->ct_sns_dma;
+	arg.rsp_dma = ha->ct_sns_dma;
+	arg.req_size = GA_NXT_REQ_SIZE;
+	arg.rsp_size = GA_NXT_RSP_SIZE;
+	arg.nport_handle = NPH_SNS;
+
 	/* Issue GA_NXT */
 	/* Prepare common MS IOCB */
-	ms_pkt = ha->isp_ops->prep_ms_iocb(vha, GA_NXT_REQ_SIZE,
-	    GA_NXT_RSP_SIZE);
+	ms_pkt = ha->isp_ops->prep_ms_iocb(vha, &arg);
 
 	/* Prepare CT request */
 	ct_req = qla2x00_prep_ct_req(ha->ct_sns, GA_NXT_CMD,
@@ -269,16 +276,24 @@ qla2x00_gid_pt(scsi_qla_host_t *vha, sw_info_t *list)
 	struct ct_sns_gid_pt_data *gid_data;
 	struct qla_hw_data *ha = vha->hw;
 	uint16_t gid_pt_rsp_size;
+	struct ct_arg arg;
 
 	if (IS_QLA2100(ha) || IS_QLA2200(ha))
 		return qla2x00_sns_gid_pt(vha, list);
 
 	gid_data = NULL;
 	gid_pt_rsp_size = qla2x00_gid_pt_rsp_size(vha);
+
+	arg.iocb = ha->ms_iocb;
+	arg.req_dma = ha->ct_sns_dma;
+	arg.rsp_dma = ha->ct_sns_dma;
+	arg.req_size = GID_PT_REQ_SIZE;
+	arg.rsp_size = gid_pt_rsp_size;
+	arg.nport_handle = NPH_SNS;
+
 	/* Issue GID_PT */
 	/* Prepare common MS IOCB */
-	ms_pkt = ha->isp_ops->prep_ms_iocb(vha, GID_PT_REQ_SIZE,
-	    gid_pt_rsp_size);
+	ms_pkt = ha->isp_ops->prep_ms_iocb(vha, &arg);
 
 	/* Prepare CT request */
 	ct_req = qla2x00_prep_ct_req(ha->ct_sns, GID_PT_CMD, gid_pt_rsp_size);
@@ -344,15 +359,22 @@ qla2x00_gpn_id(scsi_qla_host_t *vha, sw_info_t *list)
 	struct ct_sns_req	*ct_req;
 	struct ct_sns_rsp	*ct_rsp;
 	struct qla_hw_data *ha = vha->hw;
+	struct ct_arg arg;
 
 	if (IS_QLA2100(ha) || IS_QLA2200(ha))
 		return qla2x00_sns_gpn_id(vha, list);
 
+	arg.iocb = ha->ms_iocb;
+	arg.req_dma = ha->ct_sns_dma;
+	arg.rsp_dma = ha->ct_sns_dma;
+	arg.req_size = GPN_ID_REQ_SIZE;
+	arg.rsp_size = GPN_ID_RSP_SIZE;
+	arg.nport_handle = NPH_SNS;
+
 	for (i = 0; i < ha->max_fibre_devices; i++) {
 		/* Issue GPN_ID */
 		/* Prepare common MS IOCB */
-		ms_pkt = ha->isp_ops->prep_ms_iocb(vha, GPN_ID_REQ_SIZE,
-		    GPN_ID_RSP_SIZE);
+		ms_pkt = ha->isp_ops->prep_ms_iocb(vha, &arg);
 
 		/* Prepare CT request */
 		ct_req = qla2x00_prep_ct_req(ha->ct_sns, GPN_ID_CMD,
@@ -406,15 +428,22 @@ qla2x00_gnn_id(scsi_qla_host_t *vha, sw_info_t *list)
 	ms_iocb_entry_t	*ms_pkt;
 	struct ct_sns_req	*ct_req;
 	struct ct_sns_rsp	*ct_rsp;
+	struct ct_arg arg;
 
 	if (IS_QLA2100(ha) || IS_QLA2200(ha))
 		return qla2x00_sns_gnn_id(vha, list);
 
+	arg.iocb = ha->ms_iocb;
+	arg.req_dma = ha->ct_sns_dma;
+	arg.rsp_dma = ha->ct_sns_dma;
+	arg.req_size = GNN_ID_REQ_SIZE;
+	arg.rsp_size = GNN_ID_RSP_SIZE;
+	arg.nport_handle = NPH_SNS;
+
 	for (i = 0; i < ha->max_fibre_devices; i++) {
 		/* Issue GNN_ID */
 		/* Prepare common MS IOCB */
-		ms_pkt = ha->isp_ops->prep_ms_iocb(vha, GNN_ID_REQ_SIZE,
-		    GNN_ID_RSP_SIZE);
+		ms_pkt = ha->isp_ops->prep_ms_iocb(vha, &arg);
 
 		/* Prepare CT request */
 		ct_req = qla2x00_prep_ct_req(ha->ct_sns, GNN_ID_CMD,
@@ -473,14 +502,21 @@ qla2x00_rft_id(scsi_qla_host_t *vha)
 	ms_iocb_entry_t	*ms_pkt;
 	struct ct_sns_req	*ct_req;
 	struct ct_sns_rsp	*ct_rsp;
+	struct ct_arg arg;
 
 	if (IS_QLA2100(ha) || IS_QLA2200(ha))
 		return qla2x00_sns_rft_id(vha);
 
+	arg.iocb = ha->ms_iocb;
+	arg.req_dma = ha->ct_sns_dma;
+	arg.rsp_dma = ha->ct_sns_dma;
+	arg.req_size = RFT_ID_REQ_SIZE;
+	arg.rsp_size = RFT_ID_RSP_SIZE;
+	arg.nport_handle = NPH_SNS;
+
 	/* Issue RFT_ID */
 	/* Prepare common MS IOCB */
-	ms_pkt = ha->isp_ops->prep_ms_iocb(vha, RFT_ID_REQ_SIZE,
-	    RFT_ID_RSP_SIZE);
+	ms_pkt = ha->isp_ops->prep_ms_iocb(vha, &arg);
 
 	/* Prepare CT request */
 	ct_req = qla2x00_prep_ct_req(ha->ct_sns, RFT_ID_CMD,
@@ -526,6 +562,7 @@ qla2x00_rff_id(scsi_qla_host_t *vha)
 	ms_iocb_entry_t	*ms_pkt;
 	struct ct_sns_req	*ct_req;
 	struct ct_sns_rsp	*ct_rsp;
+	struct ct_arg arg;
 
 	if (IS_QLA2100(ha) || IS_QLA2200(ha)) {
 		ql_dbg(ql_dbg_disc, vha, 0x2046,
@@ -533,10 +570,16 @@ qla2x00_rff_id(scsi_qla_host_t *vha)
 		return (QLA_SUCCESS);
 	}
 
+	arg.iocb = ha->ms_iocb;
+	arg.req_dma = ha->ct_sns_dma;
+	arg.rsp_dma = ha->ct_sns_dma;
+	arg.req_size = RFF_ID_REQ_SIZE;
+	arg.rsp_size = RFF_ID_RSP_SIZE;
+	arg.nport_handle = NPH_SNS;
+
 	/* Issue RFF_ID */
 	/* Prepare common MS IOCB */
-	ms_pkt = ha->isp_ops->prep_ms_iocb(vha, RFF_ID_REQ_SIZE,
-	    RFF_ID_RSP_SIZE);
+	ms_pkt = ha->isp_ops->prep_ms_iocb(vha, &arg);
 
 	/* Prepare CT request */
 	ct_req = qla2x00_prep_ct_req(ha->ct_sns, RFF_ID_CMD,
@@ -584,14 +627,21 @@ qla2x00_rnn_id(scsi_qla_host_t *vha)
 	ms_iocb_entry_t	*ms_pkt;
 	struct ct_sns_req	*ct_req;
 	struct ct_sns_rsp	*ct_rsp;
+	struct ct_arg arg;
 
 	if (IS_QLA2100(ha) || IS_QLA2200(ha))
 		return qla2x00_sns_rnn_id(vha);
 
+	arg.iocb = ha->ms_iocb;
+	arg.req_dma = ha->ct_sns_dma;
+	arg.rsp_dma = ha->ct_sns_dma;
+	arg.req_size = RNN_ID_REQ_SIZE;
+	arg.rsp_size = RNN_ID_RSP_SIZE;
+	arg.nport_handle = NPH_SNS;
+
 	/* Issue RNN_ID */
 	/* Prepare common MS IOCB */
-	ms_pkt = ha->isp_ops->prep_ms_iocb(vha, RNN_ID_REQ_SIZE,
-	    RNN_ID_RSP_SIZE);
+	ms_pkt = ha->isp_ops->prep_ms_iocb(vha, &arg);
 
 	/* Prepare CT request */
 	ct_req = qla2x00_prep_ct_req(ha->ct_sns, RNN_ID_CMD, RNN_ID_RSP_SIZE);
@@ -651,6 +701,7 @@ qla2x00_rsnn_nn(scsi_qla_host_t *vha)
 	ms_iocb_entry_t	*ms_pkt;
 	struct ct_sns_req	*ct_req;
 	struct ct_sns_rsp	*ct_rsp;
+	struct ct_arg arg;
 
 	if (IS_QLA2100(ha) || IS_QLA2200(ha)) {
 		ql_dbg(ql_dbg_disc, vha, 0x2050,
@@ -658,10 +709,17 @@ qla2x00_rsnn_nn(scsi_qla_host_t *vha)
 		return (QLA_SUCCESS);
 	}
 
+	arg.iocb = ha->ms_iocb;
+	arg.req_dma = ha->ct_sns_dma;
+	arg.rsp_dma = ha->ct_sns_dma;
+	arg.req_size = 0;
+	arg.rsp_size = RSNN_NN_RSP_SIZE;
+	arg.nport_handle = NPH_SNS;
+
 	/* Issue RSNN_NN */
 	/* Prepare common MS IOCB */
 	/*   Request size adjusted after CT preparation */
-	ms_pkt = ha->isp_ops->prep_ms_iocb(vha, 0, RSNN_NN_RSP_SIZE);
+	ms_pkt = ha->isp_ops->prep_ms_iocb(vha, &arg);
 
 	/* Prepare CT request */
 	ct_req = qla2x00_prep_ct_req(ha->ct_sns, RSNN_NN_CMD,
@@ -1103,7 +1161,7 @@ qla2x00_sns_rnn_id(scsi_qla_host_t *vha)
  *
  * Returns 0 on success.
  */
-static int
+int
 qla2x00_mgmt_svr_login(scsi_qla_host_t *vha)
 {
 	int ret, rval;
@@ -2425,15 +2483,22 @@ qla2x00_gfpn_id(scsi_qla_host_t *vha, sw_info_t *list)
 	ms_iocb_entry_t	*ms_pkt;
 	struct ct_sns_req	*ct_req;
 	struct ct_sns_rsp	*ct_rsp;
+	struct ct_arg arg;
 
 	if (!IS_IIDMA_CAPABLE(ha))
 		return QLA_FUNCTION_FAILED;
 
+	arg.iocb = ha->ms_iocb;
+	arg.req_dma = ha->ct_sns_dma;
+	arg.rsp_dma = ha->ct_sns_dma;
+	arg.req_size = GFPN_ID_REQ_SIZE;
+	arg.rsp_size = GFPN_ID_RSP_SIZE;
+	arg.nport_handle = NPH_SNS;
+
 	for (i = 0; i < ha->max_fibre_devices; i++) {
 		/* Issue GFPN_ID */
 		/* Prepare common MS IOCB */
-		ms_pkt = ha->isp_ops->prep_ms_iocb(vha, GFPN_ID_REQ_SIZE,
-		    GFPN_ID_RSP_SIZE);
+		ms_pkt = ha->isp_ops->prep_ms_iocb(vha, &arg);
 
 		/* Prepare CT request */
 		ct_req = qla2x00_prep_ct_req(ha->ct_sns, GFPN_ID_CMD,
@@ -2471,36 +2536,6 @@ qla2x00_gfpn_id(scsi_qla_host_t *vha, sw_info_t *list)
 	return (rval);
 }
 
-static inline void *
-qla24xx_prep_ms_fm_iocb(scsi_qla_host_t *vha, uint32_t req_size,
-    uint32_t rsp_size)
-{
-	struct ct_entry_24xx *ct_pkt;
-	struct qla_hw_data *ha = vha->hw;
-	ct_pkt = (struct ct_entry_24xx *)ha->ms_iocb;
-	memset(ct_pkt, 0, sizeof(struct ct_entry_24xx));
-
-	ct_pkt->entry_type = CT_IOCB_TYPE;
-	ct_pkt->entry_count = 1;
-	ct_pkt->nport_handle = cpu_to_le16(vha->mgmt_svr_loop_id);
-	ct_pkt->timeout = cpu_to_le16(ha->r_a_tov / 10 * 2);
-	ct_pkt->cmd_dsd_count = cpu_to_le16(1);
-	ct_pkt->rsp_dsd_count = cpu_to_le16(1);
-	ct_pkt->rsp_byte_count = cpu_to_le32(rsp_size);
-	ct_pkt->cmd_byte_count = cpu_to_le32(req_size);
-
-	ct_pkt->dseg_0_address[0] = cpu_to_le32(LSD(ha->ct_sns_dma));
-	ct_pkt->dseg_0_address[1] = cpu_to_le32(MSD(ha->ct_sns_dma));
-	ct_pkt->dseg_0_len = ct_pkt->cmd_byte_count;
-
-	ct_pkt->dseg_1_address[0] = cpu_to_le32(LSD(ha->ct_sns_dma));
-	ct_pkt->dseg_1_address[1] = cpu_to_le32(MSD(ha->ct_sns_dma));
-	ct_pkt->dseg_1_len = ct_pkt->rsp_byte_count;
-	ct_pkt->vp_index = vha->vp_idx;
-
-	return ct_pkt;
-}
-
 
 static inline struct ct_sns_req *
 qla24xx_prep_ct_fm_req(struct ct_sns_pkt *p, uint16_t cmd,
@@ -2530,9 +2565,10 @@ qla2x00_gpsc(scsi_qla_host_t *vha, sw_info_t *list)
 	int		rval;
 	uint16_t	i;
 	struct qla_hw_data *ha = vha->hw;
-	ms_iocb_entry_t	*ms_pkt;
+	ms_iocb_entry_t *ms_pkt;
 	struct ct_sns_req	*ct_req;
 	struct ct_sns_rsp	*ct_rsp;
+	struct ct_arg arg;
 
 	if (!IS_IIDMA_CAPABLE(ha))
 		return QLA_FUNCTION_FAILED;
@@ -2543,11 +2579,17 @@ qla2x00_gpsc(scsi_qla_host_t *vha, sw_info_t *list)
 	if (rval)
 		return rval;
 
+	arg.iocb = ha->ms_iocb;
+	arg.req_dma = ha->ct_sns_dma;
+	arg.rsp_dma = ha->ct_sns_dma;
+	arg.req_size = GPSC_REQ_SIZE;
+	arg.rsp_size = GPSC_RSP_SIZE;
+	arg.nport_handle = vha->mgmt_svr_loop_id;
+
 	for (i = 0; i < ha->max_fibre_devices; i++) {
 		/* Issue GFPN_ID */
 		/* Prepare common MS IOCB */
-		ms_pkt = qla24xx_prep_ms_fm_iocb(vha, GPSC_REQ_SIZE,
-		    GPSC_RSP_SIZE);
+		ms_pkt = qla24xx_prep_ms_iocb(vha, &arg);
 
 		/* Prepare CT request */
 		ct_req = qla24xx_prep_ct_fm_req(ha->ct_sns, GPSC_CMD,
@@ -2641,6 +2683,7 @@ qla2x00_gff_id(scsi_qla_host_t *vha, sw_info_t *list)
 	struct ct_sns_rsp	*ct_rsp;
 	struct qla_hw_data *ha = vha->hw;
 	uint8_t fcp_scsi_features = 0;
+	struct ct_arg arg;
 
 	for (i = 0; i < ha->max_fibre_devices; i++) {
 		/* Set default FC4 Type as UNKNOWN so the default is to
@@ -2651,9 +2694,15 @@ qla2x00_gff_id(scsi_qla_host_t *vha, sw_info_t *list)
 		if (!IS_FWI2_CAPABLE(ha))
 			continue;
 
+		arg.iocb = ha->ms_iocb;
+		arg.req_dma = ha->ct_sns_dma;
+		arg.rsp_dma = ha->ct_sns_dma;
+		arg.req_size = GFF_ID_REQ_SIZE;
+		arg.rsp_size = GFF_ID_RSP_SIZE;
+		arg.nport_handle = NPH_SNS;
+
 		/* Prepare common MS IOCB */
-		ms_pkt = ha->isp_ops->prep_ms_iocb(vha, GFF_ID_REQ_SIZE,
-		    GFF_ID_RSP_SIZE);
+		ms_pkt = ha->isp_ops->prep_ms_iocb(vha, &arg);
 
 		/* Prepare CT request */
 		ct_req = qla2x00_prep_ct_req(ha->ct_sns, GFF_ID_CMD,
@@ -2692,3 +2741,536 @@ qla2x00_gff_id(scsi_qla_host_t *vha, sw_info_t *list)
 			break;
 	}
 }
+
+/* GID_PN completion processing. */
+void qla24xx_handle_gidpn_event(scsi_qla_host_t *vha, struct event_arg *ea)
+{
+	fc_port_t *fcport = ea->fcport;
+
+	ql_dbg(ql_dbg_disc, vha, 0xffff,
+		"%s %8phC login state %d \n",
+		__func__, fcport->port_name, fcport->fw_login_state);
+
+	if (ea->sp->gen2 != fcport->login_gen) {
+		/* PLOGI/PRLI/LOGO came in while cmd was out.*/
+		ql_dbg(ql_dbg_disc, vha, 0xffff,
+		    "%s %8phC generation changed rscn %d|%d login %d|%d \n",
+		    __func__, fcport->port_name, fcport->last_rscn_gen,
+		    fcport->rscn_gen, fcport->last_login_gen, fcport->login_gen);
+		return;
+	}
+
+	if (!ea->rc) {
+		if (ea->sp->gen1 == fcport->rscn_gen) {
+			fcport->scan_state = QLA_FCPORT_FOUND;
+			fcport->flags |= FCF_FABRIC_DEVICE;
+
+			if (fcport->d_id.b24 == ea->id.b24) {
+				/* cable plugged into the same place */
+				switch (vha->host->active_mode) {
+				case MODE_TARGET:
+					/* NOOP. let the other guy login to us.*/
+					break;
+				case MODE_INITIATOR:
+				case MODE_DUAL:
+				default:
+					if (atomic_read(&fcport->state) ==
+					    FCS_ONLINE)
+						break;
+					ql_dbg(ql_dbg_disc, vha, 0xffff,
+					    "%s %d %8phC post gnl\n",
+					    __func__, __LINE__, fcport->port_name);
+					qla24xx_post_gnl_work(vha, fcport);
+					break;
+				}
+			} else { /* fcport->d_id.b24 != ea->id.b24 */
+				fcport->d_id.b24 = ea->id.b24;
+				if (fcport->deleted == QLA_SESS_DELETED) {
+					ql_dbg(ql_dbg_disc, vha, 0xffff,
+					    "%s %d %8phC post del sess\n",
+					    __func__, __LINE__, fcport->port_name);
+					qlt_schedule_sess_for_deletion_lock(fcport);
+				}
+			}
+		} else { /* ea->sp->gen1 != fcport->rscn_gen */
+			ql_dbg(ql_dbg_disc, vha, 0xffff,
+			    "%s %d %8phC post gidpn\n",
+			    __func__, __LINE__, fcport->port_name);
+			/* rscn came in while cmd was out */
+			qla24xx_post_gidpn_work(vha, fcport);
+		}
+	} else { /* ea->rc */
+		/* cable pulled */
+		if (ea->sp->gen1 == fcport->rscn_gen) {
+			if (ea->sp->gen2 == fcport->login_gen) {
+				ql_dbg(ql_dbg_disc, vha, 0xffff,
+				    "%s %d %8phC post del sess\n", __func__,
+				    __LINE__, fcport->port_name);
+				qlt_schedule_sess_for_deletion_lock(fcport);
+			} else {
+				ql_dbg(ql_dbg_disc, vha, 0xffff,
+				    "%s %d %8phC login\n", __func__, __LINE__,
+				    fcport->port_name);
+				qla24xx_fcport_handle_login(vha, fcport);
+			}
+		} else {
+			ql_dbg(ql_dbg_disc, vha, 0xffff,
+			    "%s %d %8phC post gidpn\n", __func__, __LINE__,
+			    fcport->port_name);
+			qla24xx_post_gidpn_work(vha, fcport);
+		}
+	}
+} /* gidpn_event */
+
+static void qla2x00_async_gidpn_sp_done(void *v, void *s, int res)
+{
+	struct scsi_qla_host *vha = (struct scsi_qla_host *)v;
+	struct srb *sp = (struct srb *)s;
+	fc_port_t *fcport = sp->fcport;
+	u8 *id = fcport->ct_desc.ct_sns->p.rsp.rsp.gid_pn.port_id;
+	struct event_arg ea;
+
+	fcport->flags &= ~FCF_ASYNC_SENT;
+
+	memset(&ea, 0, sizeof(ea));
+	ea.fcport = fcport;
+	ea.id.b.domain = id[0];
+	ea.id.b.area = id[1];
+	ea.id.b.al_pa = id[2];
+	ea.sp = sp;
+	ea.rc = res;
+	ea.event = FCME_GIDPN_DONE;
+
+	ql_dbg(ql_dbg_disc, vha, 0xffff,
+	    "Async done-%s res %x, WWPN %8phC ID %3phC \n",
+	    sp->name, res, fcport->port_name, id);
+
+	qla2x00_fcport_event_handler(vha, &ea);
+
+	sp->free(vha, sp);
+}
+
+int qla24xx_async_gidpn(scsi_qla_host_t *vha, fc_port_t *fcport)
+{
+	int rval = QLA_FUNCTION_FAILED;
+	struct ct_sns_req       *ct_req;
+	srb_t *sp;
+
+	if (!vha->flags.online)
+		goto done;
+
+	fcport->flags |= FCF_ASYNC_SENT;
+	fcport->disc_state = DSC_GID_PN;
+	fcport->scan_state = QLA_FCPORT_SCAN;
+	sp = qla2x00_get_sp(vha, fcport, GFP_ATOMIC);
+	if (!sp)
+		goto done;
+
+	sp->type = SRB_CT_PTHRU_CMD;
+	sp->name = "gidpn";
+	sp->gen1 = fcport->rscn_gen;
+	sp->gen2 = fcport->login_gen;
+
+	qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2);
+
+	/* CT_IU preamble  */
+	ct_req = qla2x00_prep_ct_req(fcport->ct_desc.ct_sns, GID_PN_CMD,
+		GID_PN_RSP_SIZE);
+
+	/* GIDPN req */
+	memcpy(ct_req->req.gid_pn.port_name, fcport->port_name,
+		WWN_SIZE);
+
+	/* req & rsp use the same buffer */
+	sp->u.iocb_cmd.u.ctarg.req = fcport->ct_desc.ct_sns;
+	sp->u.iocb_cmd.u.ctarg.req_dma = fcport->ct_desc.ct_sns_dma;
+	sp->u.iocb_cmd.u.ctarg.rsp = fcport->ct_desc.ct_sns;
+	sp->u.iocb_cmd.u.ctarg.rsp_dma = fcport->ct_desc.ct_sns_dma;
+	sp->u.iocb_cmd.u.ctarg.req_size = GID_PN_REQ_SIZE;
+	sp->u.iocb_cmd.u.ctarg.rsp_size = GID_PN_RSP_SIZE;
+	sp->u.iocb_cmd.u.ctarg.nport_handle = NPH_SNS;
+
+	sp->u.iocb_cmd.timeout = qla2x00_async_iocb_timeout;
+	sp->done = qla2x00_async_gidpn_sp_done;
+
+	rval = qla2x00_start_sp(sp);
+	if (rval != QLA_SUCCESS)
+		goto done_free_sp;
+
+	ql_dbg(ql_dbg_disc, vha, 0x206f,
+		"Async-%s - %8phC hdl=%x loopid=%x portid %02x%02x%02x.\n",
+		sp->name, fcport->port_name,
+		sp->handle, fcport->loop_id, fcport->d_id.b.domain,
+		fcport->d_id.b.area, fcport->d_id.b.al_pa);
+	return rval;
+
+done_free_sp:
+	sp->free(vha, sp);
+done:
+	fcport->flags &= ~FCF_ASYNC_SENT;
+	return rval;
+}
+
+int qla24xx_post_gidpn_work(struct scsi_qla_host *vha, fc_port_t *fcport)
+{
+	struct qla_work_evt *e;
+
+	if ((atomic_read(&vha->loop_state) != LOOP_READY) ||
+		test_bit(UNLOADING, &vha->dpc_flags))
+		return 0;
+
+	e = qla2x00_alloc_work(vha, QLA_EVT_GIDPN);
+	if (!e)
+		return QLA_FUNCTION_FAILED;
+
+	e->u.fcport.fcport = fcport;
+	return qla2x00_post_work(vha, e);
+}
+
+int qla24xx_post_gpsc_work(struct scsi_qla_host *vha, fc_port_t *fcport)
+{
+	struct qla_work_evt *e;
+
+	e = qla2x00_alloc_work(vha, QLA_EVT_GPSC);
+	if (!e)
+		return QLA_FUNCTION_FAILED;
+
+	e->u.fcport.fcport = fcport;
+	return qla2x00_post_work(vha, e);
+}
+
+static void qla24xx_async_gpsc_sp_done(void *v, void *s, int res)
+{
+	struct scsi_qla_host *vha = (struct scsi_qla_host *)v;
+	struct qla_hw_data *ha = vha->hw;
+	struct srb *sp = (struct srb *)s;
+	fc_port_t *fcport = sp->fcport;
+	struct ct_sns_rsp       *ct_rsp;
+	struct event_arg ea;
+
+	ct_rsp = &fcport->ct_desc.ct_sns->p.rsp;
+
+	ql_dbg(ql_dbg_disc, vha, 0xffff,
+	    "Async done-%s res %x, WWPN %8phC \n",
+	    sp->name, res, fcport->port_name);
+
+	fcport->flags &= ~FCF_ASYNC_SENT;
+
+	if (res == (DID_ERROR << 16)) {
+		/* entry status error */
+		goto done;
+	} else if (res) {
+		if ((ct_rsp->header.reason_code ==
+			 CT_REASON_INVALID_COMMAND_CODE) ||
+			(ct_rsp->header.reason_code ==
+			 CT_REASON_COMMAND_UNSUPPORTED)) {
+			ql_dbg(ql_dbg_disc, vha, 0x205a,
+				"GPSC command unsupported, disabling "
+				"query.\n");
+			ha->flags.gpsc_supported = 0;
+			res = QLA_SUCCESS;
+		}
+	} else {
+		switch (be16_to_cpu(ct_rsp->rsp.gpsc.speed)) {
+		case BIT_15:
+			fcport->fp_speed = PORT_SPEED_1GB;
+			break;
+		case BIT_14:
+			fcport->fp_speed = PORT_SPEED_2GB;
+			break;
+		case BIT_13:
+			fcport->fp_speed = PORT_SPEED_4GB;
+			break;
+		case BIT_12:
+			fcport->fp_speed = PORT_SPEED_10GB;
+			break;
+		case BIT_11:
+			fcport->fp_speed = PORT_SPEED_8GB;
+			break;
+		case BIT_10:
+			fcport->fp_speed = PORT_SPEED_16GB;
+			break;
+		case BIT_8:
+			fcport->fp_speed = PORT_SPEED_32GB;
+			break;
+		}
+
+		ql_dbg(ql_dbg_disc, vha, 0xffff,
+		"Async-%s OUT WWPN %8phC speeds=%04x speed=%04x.\n",
+			sp->name,
+			fcport->fabric_port_name,
+			be16_to_cpu(ct_rsp->rsp.gpsc.speeds),
+			be16_to_cpu(ct_rsp->rsp.gpsc.speed));
+	}
+done:
+	memset(&ea, 0, sizeof(ea));
+	ea.event = FCME_GPSC_DONE;
+	ea.rc = res;
+	ea.fcport = fcport;
+	qla2x00_fcport_event_handler(vha, &ea);
+
+	sp->free(vha, sp);
+}
+
+int qla24xx_async_gpsc(scsi_qla_host_t *vha, fc_port_t *fcport)
+{
+	int rval = QLA_FUNCTION_FAILED;
+	struct ct_sns_req       *ct_req;
+	srb_t *sp;
+
+	if (!vha->flags.online)
+		goto done;
+
+	fcport->flags |= FCF_ASYNC_SENT;
+	sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL);
+	if (!sp)
+		goto done;
+
+	sp->type = SRB_CT_PTHRU_CMD;
+	sp->name = "gpsc";
+	sp->gen1 = fcport->rscn_gen;
+	sp->gen2 = fcport->login_gen;
+
+	qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2);
+
+	/* CT_IU preamble  */
+	ct_req = qla24xx_prep_ct_fm_req(fcport->ct_desc.ct_sns, GPSC_CMD,
+		GPSC_RSP_SIZE);
+
+	/* GPSC req */
+	memcpy(ct_req->req.gpsc.port_name, fcport->port_name,
+		WWN_SIZE);
+
+	sp->u.iocb_cmd.u.ctarg.req = fcport->ct_desc.ct_sns;
+	sp->u.iocb_cmd.u.ctarg.req_dma = fcport->ct_desc.ct_sns_dma;
+	sp->u.iocb_cmd.u.ctarg.rsp = fcport->ct_desc.ct_sns;
+	sp->u.iocb_cmd.u.ctarg.rsp_dma = fcport->ct_desc.ct_sns_dma;
+	sp->u.iocb_cmd.u.ctarg.req_size = GPSC_REQ_SIZE;
+	sp->u.iocb_cmd.u.ctarg.rsp_size = GPSC_RSP_SIZE;
+	sp->u.iocb_cmd.u.ctarg.nport_handle = vha->mgmt_svr_loop_id;
+
+	sp->u.iocb_cmd.timeout = qla2x00_async_iocb_timeout;
+	sp->done = qla24xx_async_gpsc_sp_done;
+
+	rval = qla2x00_start_sp(sp);
+	if (rval != QLA_SUCCESS)
+		goto done_free_sp;
+
+	ql_dbg(ql_dbg_disc, vha, 0xffff,
+		"Async-%s %8phC hdl=%x loopid=%x portid=%02x%02x%02x.\n",
+		sp->name, fcport->port_name, sp->handle,
+		fcport->loop_id, fcport->d_id.b.domain,
+		fcport->d_id.b.area, fcport->d_id.b.al_pa);
+	return rval;
+
+done_free_sp:
+	sp->free(vha, sp);
+done:
+	fcport->flags &= ~FCF_ASYNC_SENT;
+	return rval;
+}
+
+int qla24xx_post_gpnid_work(struct scsi_qla_host *vha, port_id_t *id)
+{
+	struct qla_work_evt *e;
+
+	if (test_bit(UNLOADING, &vha->dpc_flags))
+		return 0;
+
+	e = qla2x00_alloc_work(vha, QLA_EVT_GPNID);
+	if (!e)
+		return QLA_FUNCTION_FAILED;
+
+	e->u.gpnid.id = *id;
+	return qla2x00_post_work(vha, e);
+}
+
+void qla24xx_async_gpnid_done(scsi_qla_host_t *vha, srb_t *sp)
+{
+	if (sp->u.iocb_cmd.u.ctarg.req) {
+		dma_free_coherent(&vha->hw->pdev->dev,
+			sizeof(struct ct_sns_pkt),
+			sp->u.iocb_cmd.u.ctarg.req,
+			sp->u.iocb_cmd.u.ctarg.req_dma);
+		sp->u.iocb_cmd.u.ctarg.req = NULL;
+	}
+	if (sp->u.iocb_cmd.u.ctarg.rsp) {
+		dma_free_coherent(&vha->hw->pdev->dev,
+			sizeof(struct ct_sns_pkt),
+			sp->u.iocb_cmd.u.ctarg.rsp,
+			sp->u.iocb_cmd.u.ctarg.rsp_dma);
+		sp->u.iocb_cmd.u.ctarg.rsp = NULL;
+	}
+
+	sp->free(vha, sp);
+}
+
+void qla24xx_handle_gpnid_event(scsi_qla_host_t *vha, struct event_arg *ea)
+{
+	fc_port_t *fcport;
+	unsigned long flags;
+
+	spin_lock_irqsave(&vha->hw->tgt.sess_lock, flags);
+	fcport = qla2x00_find_fcport_by_wwpn(vha, ea->port_name, 1);
+	spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
+
+	if (fcport) {
+		/* cable moved. just plugged in */
+		ql_dbg(ql_dbg_disc, vha, 0xffff,
+			   "%s %d %8phC post del sess\n",
+			   __func__, __LINE__, fcport->port_name);
+
+		fcport->rscn_gen++;
+		fcport->d_id = ea->id;
+		fcport->scan_state = QLA_FCPORT_FOUND;
+		fcport->flags |= FCF_FABRIC_DEVICE;
+
+		qlt_schedule_sess_for_deletion_lock(fcport);
+	} else {
+		/* create new fcport */
+		ql_dbg(ql_dbg_disc, vha, 0xffff,
+			   "%s %d %8phC post new sess\n",
+			   __func__, __LINE__, ea->port_name);
+
+		qla24xx_post_newsess_work(vha, &ea->id, ea->port_name, NULL);
+	}
+}
+
+static void qla2x00_async_gpnid_sp_done(void *v, void *s, int res)
+{
+	struct scsi_qla_host *vha = (struct scsi_qla_host *)v;
+	struct srb *sp = (struct srb *)s;
+	struct ct_sns_req *ct_req =
+	    (struct ct_sns_req *)sp->u.iocb_cmd.u.ctarg.req;
+	struct ct_sns_rsp *ct_rsp =
+	    (struct ct_sns_rsp *)sp->u.iocb_cmd.u.ctarg.rsp;
+	struct event_arg ea;
+	struct qla_work_evt *e;
+
+	ql_dbg(ql_dbg_disc, vha, 0xffff,
+		"Async done-%s res %x ID %3phC. %8phC\n",
+		sp->name, res, ct_req->req.port_id.port_id,
+		ct_rsp->rsp.gpn_id.port_name);
+
+	memset(&ea, 0, sizeof(ea));
+	memcpy(ea.port_name, ct_rsp->rsp.gpn_id.port_name, WWN_SIZE);
+	ea.sp = sp;
+	ea.id.b.domain = ct_req->req.port_id.port_id[0];
+	ea.id.b.area = ct_req->req.port_id.port_id[1];
+	ea.id.b.al_pa = ct_req->req.port_id.port_id[2];
+	ea.rc = res;
+	ea.event = FCME_GPNID_DONE;
+
+	qla2x00_fcport_event_handler(vha, &ea);
+
+	e = qla2x00_alloc_work(vha, QLA_EVT_GPNID_DONE);
+	if (!e) {
+		/* please ignore kernel warning. otherwise, we have mem leak. */
+		if (sp->u.iocb_cmd.u.ctarg.req) {
+			dma_free_coherent(&vha->hw->pdev->dev,
+				sizeof(struct ct_sns_pkt),
+				sp->u.iocb_cmd.u.ctarg.req,
+				sp->u.iocb_cmd.u.ctarg.req_dma);
+			sp->u.iocb_cmd.u.ctarg.req = NULL;
+		}
+		if (sp->u.iocb_cmd.u.ctarg.rsp) {
+			dma_free_coherent(&vha->hw->pdev->dev,
+				sizeof(struct ct_sns_pkt),
+				sp->u.iocb_cmd.u.ctarg.rsp,
+				sp->u.iocb_cmd.u.ctarg.rsp_dma);
+			sp->u.iocb_cmd.u.ctarg.rsp = NULL;
+		}
+
+		sp->free(vha, sp);
+		return;
+	}
+
+	e->u.iosb.sp = sp;
+	qla2x00_post_work(vha, e);
+}
+
+/* Get WWPN with Nport ID. */
+int qla24xx_async_gpnid(scsi_qla_host_t *vha, port_id_t *id)
+{
+	int rval = QLA_FUNCTION_FAILED;
+	struct ct_sns_req       *ct_req;
+	srb_t *sp;
+	struct ct_sns_pkt *ct_sns;
+
+	if (!vha->flags.online)
+		goto done;
+
+	sp = qla2x00_get_sp(vha, NULL, GFP_KERNEL);
+	if (!sp)
+		goto done;
+
+	sp->type = SRB_CT_PTHRU_CMD;
+	sp->name = "gpnid";
+	qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2);
+
+	sp->u.iocb_cmd.u.ctarg.req = dma_alloc_coherent(&vha->hw->pdev->dev,
+		sizeof(struct ct_sns_pkt), &sp->u.iocb_cmd.u.ctarg.req_dma,
+		GFP_KERNEL);
+	if (!sp->u.iocb_cmd.u.ctarg.req) {
+		ql_log(ql_log_warn, vha, 0xffff,
+			"Failed to allocate ct_sns request.\n");
+		goto done_free_sp;
+	}
+
+	sp->u.iocb_cmd.u.ctarg.rsp = dma_alloc_coherent(&vha->hw->pdev->dev,
+		sizeof(struct ct_sns_pkt), &sp->u.iocb_cmd.u.ctarg.rsp_dma,
+		GFP_KERNEL);
+	if (!sp->u.iocb_cmd.u.ctarg.rsp) {
+		ql_log(ql_log_warn, vha, 0xffff,
+			"Failed to allocate ct_sns request.\n");
+		goto done_free_sp;
+	}
+
+	ct_sns = (struct ct_sns_pkt *)sp->u.iocb_cmd.u.ctarg.rsp;
+	memset(ct_sns, 0, sizeof(*ct_sns));
+
+	ct_sns = (struct ct_sns_pkt *)sp->u.iocb_cmd.u.ctarg.req;
+	/* CT_IU preamble  */
+	ct_req = qla2x00_prep_ct_req(ct_sns, GPN_ID_CMD, GPN_ID_RSP_SIZE);
+
+	/* GPN_ID req */
+	ct_req->req.port_id.port_id[0] = id->b.domain;
+	ct_req->req.port_id.port_id[1] = id->b.area;
+	ct_req->req.port_id.port_id[2] = id->b.al_pa;
+
+	sp->u.iocb_cmd.u.ctarg.req_size = GPN_ID_REQ_SIZE;
+	sp->u.iocb_cmd.u.ctarg.rsp_size = GPN_ID_RSP_SIZE;
+	sp->u.iocb_cmd.u.ctarg.nport_handle = NPH_SNS;
+
+	sp->u.iocb_cmd.timeout = qla2x00_async_iocb_timeout;
+	sp->done = qla2x00_async_gpnid_sp_done;
+
+	rval = qla2x00_start_sp(sp);
+	if (rval != QLA_SUCCESS)
+		goto done_free_sp;
+
+	ql_dbg(ql_dbg_disc, vha, 0xffff,
+		"Async-%s hdl=%x ID %3phC.\n", sp->name,
+		sp->handle, ct_req->req.port_id.port_id);
+	return rval;
+
+done_free_sp:
+	if (sp->u.iocb_cmd.u.ctarg.req) {
+		dma_free_coherent(&vha->hw->pdev->dev,
+			sizeof(struct ct_sns_pkt),
+			sp->u.iocb_cmd.u.ctarg.req,
+			sp->u.iocb_cmd.u.ctarg.req_dma);
+		sp->u.iocb_cmd.u.ctarg.req = NULL;
+	}
+	if (sp->u.iocb_cmd.u.ctarg.rsp) {
+		dma_free_coherent(&vha->hw->pdev->dev,
+			sizeof(struct ct_sns_pkt),
+			sp->u.iocb_cmd.u.ctarg.rsp,
+			sp->u.iocb_cmd.u.ctarg.rsp_dma);
+		sp->u.iocb_cmd.u.ctarg.rsp = NULL;
+	}
+
+	sp->free(vha, sp);
+done:
+	return rval;
+}
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index 5978b79d4a619a..8b7c0468a0e0c6 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -30,15 +30,15 @@ static int qla2x00_configure_hba(scsi_qla_host_t *);
 static int qla2x00_configure_loop(scsi_qla_host_t *);
 static int qla2x00_configure_local_loop(scsi_qla_host_t *);
 static int qla2x00_configure_fabric(scsi_qla_host_t *);
-static int qla2x00_find_all_fabric_devs(scsi_qla_host_t *, struct list_head *);
-static int qla2x00_fabric_dev_login(scsi_qla_host_t *, fc_port_t *,
-    uint16_t *);
-
+static int qla2x00_find_all_fabric_devs(scsi_qla_host_t *);
 static int qla2x00_restart_isp(scsi_qla_host_t *);
 
 static struct qla_chip_state_84xx *qla84xx_get_chip(struct scsi_qla_host *);
 static int qla84xx_init_chip(scsi_qla_host_t *);
 static int qla25xx_init_queues(struct qla_hw_data *);
+static int qla24xx_post_gpdb_work(struct scsi_qla_host *, fc_port_t *, u8);
+static void qla24xx_handle_plogi_done_event(struct scsi_qla_host *,
+    struct event_arg *);
 
 /* SRB Extensions ---------------------------------------------------------- */
 
@@ -47,8 +47,8 @@ qla2x00_sp_timeout(unsigned long __data)
 {
 	srb_t *sp = (srb_t *)__data;
 	struct srb_iocb *iocb;
-	fc_port_t *fcport = sp->fcport;
-	struct qla_hw_data *ha = fcport->vha->hw;
+	scsi_qla_host_t *vha = (scsi_qla_host_t *)sp->vha;
+	struct qla_hw_data *ha = vha->hw;
 	struct req_que *req;
 	unsigned long flags;
 
@@ -57,7 +57,7 @@ qla2x00_sp_timeout(unsigned long __data)
 	req->outstanding_cmds[sp->handle] = NULL;
 	iocb = &sp->u.iocb_cmd;
 	iocb->timeout(sp);
-	sp->free(fcport->vha, sp);
+	sp->free(vha, sp);
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
 }
 
@@ -94,29 +94,44 @@ qla2x00_get_async_timeout(struct scsi_qla_host *vha)
 	return tmo;
 }
 
-static void
+void
 qla2x00_async_iocb_timeout(void *data)
 {
 	srb_t *sp = (srb_t *)data;
 	fc_port_t *fcport = sp->fcport;
+	struct srb_iocb *lio = &sp->u.iocb_cmd;
+	struct event_arg ea;
 
 	ql_dbg(ql_dbg_disc, fcport->vha, 0x2071,
-	    "Async-%s timeout - hdl=%x portid=%02x%02x%02x.\n",
-	    sp->name, sp->handle, fcport->d_id.b.domain, fcport->d_id.b.area,
-	    fcport->d_id.b.al_pa);
+	    "Async-%s timeout - hdl=%x portid=%06x %8phC.\n",
+	    sp->name, sp->handle, fcport->d_id.b24, fcport->port_name);
 
 	fcport->flags &= ~FCF_ASYNC_SENT;
-	if (sp->type == SRB_LOGIN_CMD) {
-		struct srb_iocb *lio = &sp->u.iocb_cmd;
-		qla2x00_post_async_logout_work(fcport->vha, fcport, NULL);
+
+	switch (sp->type) {
+	case SRB_LOGIN_CMD:
 		/* Retry as needed. */
 		lio->u.logio.data[0] = MBS_COMMAND_ERROR;
 		lio->u.logio.data[1] = lio->u.logio.flags & SRB_LOGIN_RETRIED ?
 			QLA_LOGIO_LOGIN_RETRIED : 0;
-		qla2x00_post_async_login_done_work(fcport->vha, fcport,
-			lio->u.logio.data);
-	} else if (sp->type == SRB_LOGOUT_CMD) {
+		memset(&ea, 0, sizeof(ea));
+		ea.event = FCME_PLOGI_DONE;
+		ea.fcport = sp->fcport;
+		ea.data[0] = lio->u.logio.data[0];
+		ea.data[1] = lio->u.logio.data[1];
+		ea.sp = sp;
+		qla24xx_handle_plogi_done_event(fcport->vha, &ea);
+		break;
+	case SRB_LOGOUT_CMD:
 		qlt_logo_completion_handler(fcport, QLA_FUNCTION_TIMEOUT);
+		break;
+	case SRB_CT_PTHRU_CMD:
+	case SRB_MB_IOCB:
+	case SRB_NACK_PLOGI:
+	case SRB_NACK_PRLI:
+	case SRB_NACK_LOGO:
+		sp->done(sp->vha, sp, QLA_FUNCTION_TIMEOUT);
+		break;
 	}
 }
 
@@ -126,10 +141,25 @@ qla2x00_async_login_sp_done(void *data, void *ptr, int res)
 	srb_t *sp = (srb_t *)ptr;
 	struct srb_iocb *lio = &sp->u.iocb_cmd;
 	struct scsi_qla_host *vha = (scsi_qla_host_t *)data;
+	struct event_arg ea;
+
+	ql_dbg(ql_dbg_disc, vha, 0xffff,
+		"%s %8phC res %d \n",
+		   __func__, sp->fcport->port_name, res);
+
+	sp->fcport->flags &= ~FCF_ASYNC_SENT;
+	if (!test_bit(UNLOADING, &vha->dpc_flags)) {
+		memset(&ea, 0, sizeof(ea));
+		ea.event = FCME_PLOGI_DONE;
+		ea.fcport = sp->fcport;
+		ea.data[0] = lio->u.logio.data[0];
+		ea.data[1] = lio->u.logio.data[1];
+		ea.iop[0] = lio->u.logio.iop[0];
+		ea.iop[1] = lio->u.logio.iop[1];
+		ea.sp = sp;
+		qla2x00_fcport_event_handler(vha, &ea);
+	}
 
-	if (!test_bit(UNLOADING, &vha->dpc_flags))
-		qla2x00_post_async_login_done_work(sp->fcport->vha, sp->fcport,
-		    lio->u.logio.data);
 	sp->free(sp->fcport->vha, sp);
 }
 
@@ -139,13 +169,23 @@ qla2x00_async_login(struct scsi_qla_host *vha, fc_port_t *fcport,
 {
 	srb_t *sp;
 	struct srb_iocb *lio;
-	int rval;
+	int rval = QLA_FUNCTION_FAILED;
+
+	if (!vha->flags.online)
+		goto done;
+
+	if ((fcport->fw_login_state == DSC_LS_PLOGI_PEND) ||
+	    (fcport->fw_login_state == DSC_LS_PLOGI_COMP) ||
+	    (fcport->fw_login_state == DSC_LS_PRLI_PEND))
+		goto done;
 
-	rval = QLA_FUNCTION_FAILED;
 	sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL);
 	if (!sp)
 		goto done;
 
+	fcport->flags |= FCF_ASYNC_SENT;
+	fcport->logout_completed = 0;
+
 	sp->type = SRB_LOGIN_CMD;
 	sp->name = "login";
 	qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2);
@@ -165,8 +205,8 @@ qla2x00_async_login(struct scsi_qla_host *vha, fc_port_t *fcport,
 	}
 
 	ql_dbg(ql_dbg_disc, vha, 0x2072,
-	    "Async-login - hdl=%x, loopid=%x portid=%02x%02x%02x "
-	    "retries=%d.\n", sp->handle, fcport->loop_id,
+	    "Async-login - %8phC hdl=%x, loopid=%x portid=%02x%02x%02x "
+		"retries=%d.\n", fcport->port_name, sp->handle, fcport->loop_id,
 	    fcport->d_id.b.domain, fcport->d_id.b.area, fcport->d_id.b.al_pa,
 	    fcport->login_retry);
 	return rval;
@@ -174,6 +214,7 @@ qla2x00_async_login(struct scsi_qla_host *vha, fc_port_t *fcport,
 done_free_sp:
 	sp->free(fcport->vha, sp);
 done:
+	fcport->flags &= ~FCF_ASYNC_SENT;
 	return rval;
 }
 
@@ -184,6 +225,7 @@ qla2x00_async_logout_sp_done(void *data, void *ptr, int res)
 	struct srb_iocb *lio = &sp->u.iocb_cmd;
 	struct scsi_qla_host *vha = (scsi_qla_host_t *)data;
 
+	sp->fcport->flags &= ~FCF_ASYNC_SENT;
 	if (!test_bit(UNLOADING, &vha->dpc_flags))
 		qla2x00_post_async_logout_done_work(sp->fcport->vha, sp->fcport,
 		    lio->u.logio.data);
@@ -198,6 +240,7 @@ qla2x00_async_logout(struct scsi_qla_host *vha, fc_port_t *fcport)
 	int rval;
 
 	rval = QLA_FUNCTION_FAILED;
+	fcport->flags |= FCF_ASYNC_SENT;
 	sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL);
 	if (!sp)
 		goto done;
@@ -214,14 +257,16 @@ qla2x00_async_logout(struct scsi_qla_host *vha, fc_port_t *fcport)
 		goto done_free_sp;
 
 	ql_dbg(ql_dbg_disc, vha, 0x2070,
-	    "Async-logout - hdl=%x loop-id=%x portid=%02x%02x%02x.\n",
+	    "Async-logout - hdl=%x loop-id=%x portid=%02x%02x%02x %8phC.\n",
 	    sp->handle, fcport->loop_id, fcport->d_id.b.domain,
-	    fcport->d_id.b.area, fcport->d_id.b.al_pa);
+		fcport->d_id.b.area, fcport->d_id.b.al_pa,
+		fcport->port_name);
 	return rval;
 
 done_free_sp:
 	sp->free(fcport->vha, sp);
 done:
+	fcport->flags &= ~FCF_ASYNC_SENT;
 	return rval;
 }
 
@@ -247,6 +292,7 @@ qla2x00_async_adisc(struct scsi_qla_host *vha, fc_port_t *fcport,
 	int rval;
 
 	rval = QLA_FUNCTION_FAILED;
+	fcport->flags |= FCF_ASYNC_SENT;
 	sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL);
 	if (!sp)
 		goto done;
@@ -273,9 +319,817 @@ qla2x00_async_adisc(struct scsi_qla_host *vha, fc_port_t *fcport,
 done_free_sp:
 	sp->free(fcport->vha, sp);
 done:
+	fcport->flags &= ~FCF_ASYNC_SENT;
+	return rval;
+}
+
+static void qla24xx_handle_gnl_done_event(scsi_qla_host_t *vha,
+	struct event_arg *ea)
+{
+	fc_port_t *fcport, *conflict_fcport;
+	struct get_name_list_extended *e;
+	u16 i, n, found = 0, loop_id;
+	port_id_t id;
+	u64 wwn;
+	u8 opt = 0;
+
+	fcport = ea->fcport;
+
+	if (ea->rc) { /* rval */
+		if (fcport->login_retry == 0) {
+			fcport->login_retry = vha->hw->login_retry_count;
+			ql_dbg(ql_dbg_disc, vha, 0xffff,
+				"GNL failed Port login retry %8phN, retry cnt=%d.\n",
+				fcport->port_name, fcport->login_retry);
+		}
+		return;
+	}
+
+	if (fcport->last_rscn_gen != fcport->rscn_gen) {
+		ql_dbg(ql_dbg_disc, vha, 0xffff,
+		    "%s %8phC rscn gen changed rscn %d|%d \n",
+		    __func__, fcport->port_name,
+		    fcport->last_rscn_gen, fcport->rscn_gen);
+		qla24xx_post_gidpn_work(vha, fcport);
+		return;
+	} else if (fcport->last_login_gen != fcport->login_gen) {
+		ql_dbg(ql_dbg_disc, vha, 0xffff,
+			"%s %8phC login gen changed login %d|%d \n",
+			__func__, fcport->port_name,
+			fcport->last_login_gen, fcport->login_gen);
+		return;
+	}
+
+	n = ea->data[0] / sizeof(struct get_name_list_extended);
+
+	ql_dbg(ql_dbg_disc, vha, 0xffff,
+	    "%s %d %8phC n %d %02x%02x%02x lid %d \n",
+	    __func__, __LINE__, fcport->port_name, n,
+	    fcport->d_id.b.domain, fcport->d_id.b.area,
+	    fcport->d_id.b.al_pa, fcport->loop_id);
+
+	for (i = 0; i < n; i++) {
+		e = &vha->gnl.l[i];
+		wwn = wwn_to_u64(e->port_name);
+
+		if (memcmp((u8 *)&wwn, fcport->port_name, WWN_SIZE))
+			continue;
+
+		found = 1;
+		id.b.domain = e->port_id[2];
+		id.b.area = e->port_id[1];
+		id.b.al_pa = e->port_id[0];
+		id.b.rsvd_1 = 0;
+
+		loop_id = le16_to_cpu(e->nport_handle);
+		loop_id = (loop_id & 0x7fff);
+
+		ql_dbg(ql_dbg_disc, vha, 0xffff,
+			"%s found %8phC CLS [%d|%d] ID[%02x%02x%02x|%02x%02x%02x] lid[%d|%d]\n",
+			   __func__, fcport->port_name,
+			e->current_login_state, fcport->fw_login_state,
+			id.b.domain, id.b.area, id.b.al_pa,
+			fcport->d_id.b.domain, fcport->d_id.b.area,
+			fcport->d_id.b.al_pa, loop_id, fcport->loop_id);
+
+		if ((id.b24 != fcport->d_id.b24) ||
+		    ((fcport->loop_id != FC_NO_LOOP_ID) &&
+			(fcport->loop_id != loop_id))) {
+			ql_dbg(ql_dbg_disc, vha, 0xffff,
+			   "%s %d %8phC post del sess\n",
+			   __func__, __LINE__, fcport->port_name);
+			qlt_schedule_sess_for_deletion(fcport, 1);
+			return;
+		}
+
+		fcport->loop_id = loop_id;
+
+		wwn = wwn_to_u64(fcport->port_name);
+		qlt_find_sess_invalidate_other(vha, wwn,
+			id, loop_id, &conflict_fcport);
+
+		if (conflict_fcport) {
+			/*
+			 * Another share fcport share the same loop_id &
+			 * nport id. Conflict fcport needs to finish
+			 * cleanup before this fcport can proceed to login.
+			 */
+			conflict_fcport->conflict = fcport;
+			fcport->login_pause = 1;
+		}
+
+		switch (e->current_login_state) {
+		case DSC_LS_PRLI_COMP:
+			ql_dbg(ql_dbg_disc, vha, 0xffff,
+			   "%s %d %8phC post gpdb\n",
+			   __func__, __LINE__, fcport->port_name);
+			opt = PDO_FORCE_ADISC;
+			qla24xx_post_gpdb_work(vha, fcport, opt);
+			break;
+
+		case DSC_LS_PORT_UNAVAIL:
+		default:
+			if (fcport->loop_id == FC_NO_LOOP_ID) {
+				qla2x00_find_new_loop_id(vha, fcport);
+				fcport->fw_login_state = DSC_LS_PORT_UNAVAIL;
+			}
+			ql_dbg(ql_dbg_disc, vha, 0xffff,
+			   "%s %d %8phC \n",
+			   __func__, __LINE__, fcport->port_name);
+			qla24xx_fcport_handle_login(vha, fcport);
+			break;
+		}
+	}
+
+	if (!found) {
+		/* fw has no record of this port */
+		if (fcport->loop_id == FC_NO_LOOP_ID) {
+			qla2x00_find_new_loop_id(vha, fcport);
+			fcport->fw_login_state = DSC_LS_PORT_UNAVAIL;
+		} else {
+			for (i = 0; i < n; i++) {
+				e = &vha->gnl.l[i];
+				id.b.domain = e->port_id[0];
+				id.b.area = e->port_id[1];
+				id.b.al_pa = e->port_id[2];
+				id.b.rsvd_1 = 0;
+				loop_id = le16_to_cpu(e->nport_handle);
+
+				if (fcport->d_id.b24 == id.b24) {
+					conflict_fcport =
+					    qla2x00_find_fcport_by_wwpn(vha,
+						e->port_name, 0);
+
+					ql_dbg(ql_dbg_disc, vha, 0xffff,
+					    "%s %d %8phC post del sess\n",
+					    __func__, __LINE__,
+					    conflict_fcport->port_name);
+					qlt_schedule_sess_for_deletion
+						(conflict_fcport, 1);
+				}
+
+				if (fcport->loop_id == loop_id) {
+					/* FW already picked this loop id for another fcport */
+					qla2x00_find_new_loop_id(vha, fcport);
+				}
+			}
+		}
+		qla24xx_fcport_handle_login(vha, fcport);
+	}
+} /* gnl_event */
+
+static void
+qla24xx_async_gnl_sp_done(void *v, void *s, int res)
+{
+	struct scsi_qla_host *vha = (struct scsi_qla_host *)v;
+	struct srb *sp = (struct srb *)s;
+	unsigned long flags;
+	struct fc_port *fcport = NULL, *tf;
+	u16 i, n = 0, loop_id;
+	struct event_arg ea;
+	struct get_name_list_extended *e;
+	u64 wwn;
+	struct list_head h;
+
+	ql_dbg(ql_dbg_disc, vha, 0xffff,
+	    "Async done-%s res %x mb[1]=%x mb[2]=%x \n",
+	    sp->name, res, sp->u.iocb_cmd.u.mbx.in_mb[1],
+	    sp->u.iocb_cmd.u.mbx.in_mb[2]);
+
+	memset(&ea, 0, sizeof(ea));
+	ea.sp = sp;
+	ea.rc = res;
+	ea.event = FCME_GNL_DONE;
+
+	if (sp->u.iocb_cmd.u.mbx.in_mb[1] >=
+	    sizeof(struct get_name_list_extended)) {
+		n = sp->u.iocb_cmd.u.mbx.in_mb[1] /
+		    sizeof(struct get_name_list_extended);
+		ea.data[0] = sp->u.iocb_cmd.u.mbx.in_mb[1]; /* amnt xfered */
+	}
+
+	for (i = 0; i < n; i++) {
+		e = &vha->gnl.l[i];
+		loop_id = le16_to_cpu(e->nport_handle);
+		/* mask out reserve bit */
+		loop_id = (loop_id & 0x7fff);
+		set_bit(loop_id, vha->hw->loop_id_map);
+		wwn = wwn_to_u64(e->port_name);
+
+		ql_dbg(ql_dbg_disc + ql_dbg_verbose, vha, 0xffff,
+		    "%s %8phC %02x:%02x:%02x state %d/%d lid %x \n",
+		    __func__, (void *)&wwn, e->port_id[2], e->port_id[1],
+		    e->port_id[0], e->current_login_state, e->last_login_state,
+		    (loop_id & 0x7fff));
+	}
+
+	spin_lock_irqsave(&vha->hw->tgt.sess_lock, flags);
+	vha->gnl.sent = 0;
+
+	INIT_LIST_HEAD(&h);
+	fcport = tf = NULL;
+	if (!list_empty(&vha->gnl.fcports))
+		list_splice_init(&vha->gnl.fcports, &h);
+
+	list_for_each_entry_safe(fcport, tf, &h, gnl_entry) {
+		list_del_init(&fcport->gnl_entry);
+		fcport->flags &= ~FCF_ASYNC_SENT;
+		ea.fcport = fcport;
+
+		qla2x00_fcport_event_handler(vha, &ea);
+	}
+
+	spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
+
+	sp->free(vha, sp);
+}
+
+int qla24xx_async_gnl(struct scsi_qla_host *vha, fc_port_t *fcport)
+{
+	srb_t *sp;
+	struct srb_iocb *mbx;
+	int rval = QLA_FUNCTION_FAILED;
+	unsigned long flags;
+	u16 *mb;
+
+	if (!vha->flags.online)
+		goto done;
+
+	ql_dbg(ql_dbg_disc, vha, 0xffff,
+	    "Async-gnlist WWPN %8phC \n", fcport->port_name);
+
+	spin_lock_irqsave(&vha->hw->tgt.sess_lock, flags);
+	fcport->flags |= FCF_ASYNC_SENT;
+	fcport->disc_state = DSC_GNL;
+	fcport->last_rscn_gen = fcport->rscn_gen;
+	fcport->last_login_gen = fcport->login_gen;
+
+	list_add_tail(&fcport->gnl_entry, &vha->gnl.fcports);
+	if (vha->gnl.sent) {
+		spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
+		rval = QLA_SUCCESS;
+		goto done;
+	}
+	vha->gnl.sent = 1;
+	spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
+
+	sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL);
+	if (!sp)
+		goto done;
+	sp->type = SRB_MB_IOCB;
+	sp->name = "gnlist";
+	sp->gen1 = fcport->rscn_gen;
+	sp->gen2 = fcport->login_gen;
+
+	qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha)+2);
+
+	mb = sp->u.iocb_cmd.u.mbx.out_mb;
+	mb[0] = MBC_PORT_NODE_NAME_LIST;
+	mb[1] = BIT_2 | BIT_3;
+	mb[2] = MSW(vha->gnl.ldma);
+	mb[3] = LSW(vha->gnl.ldma);
+	mb[6] = MSW(MSD(vha->gnl.ldma));
+	mb[7] = LSW(MSD(vha->gnl.ldma));
+	mb[8] = vha->gnl.size;
+	mb[9] = vha->vp_idx;
+
+	mbx = &sp->u.iocb_cmd;
+	mbx->timeout = qla2x00_async_iocb_timeout;
+
+	sp->done = qla24xx_async_gnl_sp_done;
+
+	rval = qla2x00_start_sp(sp);
+	if (rval != QLA_SUCCESS)
+		goto done_free_sp;
+
+	ql_dbg(ql_dbg_disc, vha, 0xffff,
+		"Async-%s - OUT WWPN %8phC hndl %x\n",
+		sp->name, fcport->port_name, sp->handle);
+
+	return rval;
+
+done_free_sp:
+	sp->free(fcport->vha, sp);
+done:
+	fcport->flags &= ~FCF_ASYNC_SENT;
+	return rval;
+}
+
+int qla24xx_post_gnl_work(struct scsi_qla_host *vha, fc_port_t *fcport)
+{
+	struct qla_work_evt *e;
+
+	e = qla2x00_alloc_work(vha, QLA_EVT_GNL);
+	if (!e)
+		return QLA_FUNCTION_FAILED;
+
+	e->u.fcport.fcport = fcport;
+	return qla2x00_post_work(vha, e);
+}
+
+static
+void qla24xx_async_gpdb_sp_done(void *v, void *s, int res)
+{
+	struct scsi_qla_host *vha = (struct scsi_qla_host *)v;
+	struct srb *sp = (struct srb *)s;
+	struct qla_hw_data *ha = vha->hw;
+	uint64_t zero = 0;
+	struct port_database_24xx *pd;
+	fc_port_t *fcport = sp->fcport;
+	u16 *mb = sp->u.iocb_cmd.u.mbx.in_mb;
+	int rval = QLA_SUCCESS;
+	struct event_arg ea;
+
+	ql_dbg(ql_dbg_disc, vha, 0xffff,
+	    "Async done-%s res %x, WWPN %8phC mb[1]=%x mb[2]=%x \n",
+	    sp->name, res, fcport->port_name, mb[1], mb[2]);
+
+	fcport->flags &= ~FCF_ASYNC_SENT;
+
+	if (res) {
+		rval = res;
+		goto gpd_error_out;
+	}
+
+	pd = (struct port_database_24xx *)sp->u.iocb_cmd.u.mbx.in;
+
+	/* Check for logged in state. */
+	if (pd->current_login_state != PDS_PRLI_COMPLETE &&
+	    pd->last_login_state != PDS_PRLI_COMPLETE) {
+		ql_dbg(ql_dbg_mbx, vha, 0xffff,
+		    "Unable to verify login-state (%x/%x) for "
+		    "loop_id %x.\n", pd->current_login_state,
+		    pd->last_login_state, fcport->loop_id);
+		rval = QLA_FUNCTION_FAILED;
+		goto gpd_error_out;
+	}
+
+	if (fcport->loop_id == FC_NO_LOOP_ID ||
+	    (memcmp(fcport->port_name, (uint8_t *)&zero, 8) &&
+		memcmp(fcport->port_name, pd->port_name, 8))) {
+		/* We lost the device mid way. */
+		rval = QLA_NOT_LOGGED_IN;
+		goto gpd_error_out;
+	}
+
+	/* Names are little-endian. */
+	memcpy(fcport->node_name, pd->node_name, WWN_SIZE);
+
+	/* Get port_id of device. */
+	fcport->d_id.b.domain = pd->port_id[0];
+	fcport->d_id.b.area = pd->port_id[1];
+	fcport->d_id.b.al_pa = pd->port_id[2];
+	fcport->d_id.b.rsvd_1 = 0;
+
+	/* If not target must be initiator or unknown type. */
+	if ((pd->prli_svc_param_word_3[0] & BIT_4) == 0)
+		fcport->port_type = FCT_INITIATOR;
+	else
+		fcport->port_type = FCT_TARGET;
+
+	/* Passback COS information. */
+	fcport->supported_classes = (pd->flags & PDF_CLASS_2) ?
+		FC_COS_CLASS2 : FC_COS_CLASS3;
+
+	if (pd->prli_svc_param_word_3[0] & BIT_7) {
+		fcport->flags |= FCF_CONF_COMP_SUPPORTED;
+		fcport->conf_compl_supported = 1;
+	}
+
+gpd_error_out:
+	memset(&ea, 0, sizeof(ea));
+	ea.event = FCME_GPDB_DONE;
+	ea.rc = rval;
+	ea.fcport = fcport;
+	ea.sp = sp;
+
+	qla2x00_fcport_event_handler(vha, &ea);
+
+	dma_pool_free(ha->s_dma_pool, sp->u.iocb_cmd.u.mbx.in,
+		sp->u.iocb_cmd.u.mbx.in_dma);
+
+	sp->free(vha, sp);
+}
+
+static int qla24xx_post_gpdb_work(struct scsi_qla_host *vha, fc_port_t *fcport,
+    u8 opt)
+{
+	struct qla_work_evt *e;
+
+	e = qla2x00_alloc_work(vha, QLA_EVT_GPDB);
+	if (!e)
+		return QLA_FUNCTION_FAILED;
+
+	e->u.fcport.fcport = fcport;
+	e->u.fcport.opt = opt;
+	return qla2x00_post_work(vha, e);
+}
+
+int qla24xx_async_gpdb(struct scsi_qla_host *vha, fc_port_t *fcport, u8 opt)
+{
+	srb_t *sp;
+	struct srb_iocb *mbx;
+	int rval = QLA_FUNCTION_FAILED;
+	u16 *mb;
+	dma_addr_t pd_dma;
+	struct port_database_24xx *pd;
+	struct qla_hw_data *ha = vha->hw;
+
+	if (!vha->flags.online)
+		goto done;
+
+	fcport->flags |= FCF_ASYNC_SENT;
+	fcport->disc_state = DSC_GPDB;
+
+	sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL);
+	if (!sp)
+		goto done;
+
+	pd = dma_pool_alloc(ha->s_dma_pool, GFP_KERNEL, &pd_dma);
+	if (pd == NULL) {
+		ql_log(ql_log_warn, vha, 0xffff,
+			"Failed to allocate port database structure.\n");
+		goto done_free_sp;
+	}
+	memset(pd, 0, max(PORT_DATABASE_SIZE, PORT_DATABASE_24XX_SIZE));
+
+	sp->type = SRB_MB_IOCB;
+	sp->name = "gpdb";
+	sp->gen1 = fcport->rscn_gen;
+	sp->gen2 = fcport->login_gen;
+	qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2);
+
+	mb = sp->u.iocb_cmd.u.mbx.out_mb;
+	mb[0] = MBC_GET_PORT_DATABASE;
+	mb[1] = fcport->loop_id;
+	mb[2] = MSW(pd_dma);
+	mb[3] = LSW(pd_dma);
+	mb[6] = MSW(MSD(pd_dma));
+	mb[7] = LSW(MSD(pd_dma));
+	mb[9] = vha->vp_idx;
+	mb[10] = opt;
+
+	mbx = &sp->u.iocb_cmd;
+	mbx->timeout = qla2x00_async_iocb_timeout;
+	mbx->u.mbx.in = (void *)pd;
+	mbx->u.mbx.in_dma = pd_dma;
+
+	sp->done = qla24xx_async_gpdb_sp_done;
+
+	rval = qla2x00_start_sp(sp);
+	if (rval != QLA_SUCCESS)
+		goto done_free_sp;
+
+	ql_dbg(ql_dbg_disc, vha, 0xffff,
+		"Async-%s %8phC hndl %x opt %x\n",
+		sp->name, fcport->port_name, sp->handle, opt);
+
+	return rval;
+
+done_free_sp:
+	if (pd)
+		dma_pool_free(ha->s_dma_pool, pd, pd_dma);
+
+	sp->free(vha, sp);
+done:
+	fcport->flags &= ~FCF_ASYNC_SENT;
+	qla24xx_post_gpdb_work(vha, fcport, opt);
 	return rval;
 }
 
+static
+void qla24xx_handle_gpdb_event(scsi_qla_host_t *vha, struct event_arg *ea)
+{
+	int rval = ea->rc;
+	fc_port_t *fcport = ea->fcport;
+	unsigned long flags;
+
+	fcport->flags &= ~FCF_ASYNC_SENT;
+
+	ql_dbg(ql_dbg_disc, vha, 0xffff,
+	    "%s %8phC DS %d LS %d rval %d\n", __func__, fcport->port_name,
+	    fcport->disc_state, fcport->fw_login_state, rval);
+
+	if (ea->sp->gen2 != fcport->login_gen) {
+		/* target side must have changed it. */
+		ql_dbg(ql_dbg_disc, vha, 0xffff,
+		    "%s %8phC generation changed rscn %d|%d login %d|%d \n",
+		    __func__, fcport->port_name, fcport->last_rscn_gen,
+		    fcport->rscn_gen, fcport->last_login_gen,
+		    fcport->login_gen);
+		return;
+	} else if (ea->sp->gen1 != fcport->rscn_gen) {
+		ql_dbg(ql_dbg_disc, vha, 0xffff, "%s %d %8phC post gidpn\n",
+		    __func__, __LINE__, fcport->port_name);
+		qla24xx_post_gidpn_work(vha, fcport);
+		return;
+	}
+
+	if (rval != QLA_SUCCESS) {
+		ql_dbg(ql_dbg_disc, vha, 0xffff, "%s %d %8phC post del sess\n",
+		    __func__, __LINE__, fcport->port_name);
+		qlt_schedule_sess_for_deletion_lock(fcport);
+		return;
+	}
+
+	spin_lock_irqsave(&vha->hw->tgt.sess_lock, flags);
+	ea->fcport->login_gen++;
+	ea->fcport->deleted = 0;
+	ea->fcport->logout_on_delete = 1;
+
+	if (!ea->fcport->login_succ && !IS_SW_RESV_ADDR(ea->fcport->d_id)) {
+		vha->fcport_count++;
+		ea->fcport->login_succ = 1;
+
+		if (!IS_IIDMA_CAPABLE(vha->hw) ||
+		    !vha->hw->flags.gpsc_supported) {
+			ql_dbg(ql_dbg_disc, vha, 0xffff,
+			    "%s %d %8phC post upd_fcport fcp_cnt %d\n",
+			    __func__, __LINE__, fcport->port_name,
+			    vha->fcport_count);
+
+			qla24xx_post_upd_fcport_work(vha, fcport);
+		} else {
+			ql_dbg(ql_dbg_disc, vha, 0xffff,
+			    "%s %d %8phC post gpsc fcp_cnt %d\n",
+			    __func__, __LINE__, fcport->port_name,
+			    vha->fcport_count);
+
+			qla24xx_post_gpsc_work(vha, fcport);
+		}
+	}
+	spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
+} /* gpdb event */
+
+int qla24xx_fcport_handle_login(struct scsi_qla_host *vha, fc_port_t *fcport)
+{
+	if (fcport->login_retry == 0)
+		return 0;
+
+	if (fcport->scan_state != QLA_FCPORT_FOUND)
+		return 0;
+
+	ql_dbg(ql_dbg_disc, vha, 0xffff,
+	    "%s %8phC DS %d LS %d P %d fl %x confl %p rscn %d|%d login %d|%d retry %d lid %d\n",
+	    __func__, fcport->port_name, fcport->disc_state,
+	    fcport->fw_login_state, fcport->login_pause, fcport->flags,
+	    fcport->conflict, fcport->last_rscn_gen, fcport->rscn_gen,
+	    fcport->last_login_gen, fcport->login_gen, fcport->login_retry,
+	    fcport->loop_id);
+
+	fcport->login_retry--;
+
+	if ((fcport->fw_login_state == DSC_LS_PLOGI_PEND) ||
+	    (fcport->fw_login_state == DSC_LS_PLOGI_COMP) ||
+	    (fcport->fw_login_state == DSC_LS_PRLI_PEND))
+		return 0;
+
+	/* for pure Target Mode. Login will not be initiated */
+	if (vha->host->active_mode == MODE_TARGET)
+		return 0;
+
+	if (fcport->flags & FCF_ASYNC_SENT) {
+		set_bit(RELOGIN_NEEDED, &vha->dpc_flags);
+		return 0;
+	}
+
+	switch (fcport->disc_state) {
+	case DSC_DELETED:
+		if (fcport->loop_id == FC_NO_LOOP_ID) {
+			ql_dbg(ql_dbg_disc, vha, 0xffff,
+			   "%s %d %8phC post gnl\n",
+			   __func__, __LINE__, fcport->port_name);
+			qla24xx_async_gnl(vha, fcport);
+		} else {
+			ql_dbg(ql_dbg_disc, vha, 0xffff,
+			   "%s %d %8phC post login\n",
+			   __func__, __LINE__, fcport->port_name);
+			fcport->disc_state = DSC_LOGIN_PEND;
+			qla2x00_post_async_login_work(vha, fcport, NULL);
+		}
+		break;
+
+	case DSC_GNL:
+		if (fcport->login_pause) {
+			fcport->last_rscn_gen = fcport->rscn_gen;
+			fcport->last_login_gen = fcport->login_gen;
+			set_bit(RELOGIN_NEEDED, &vha->dpc_flags);
+			break;
+		}
+
+		if (fcport->flags & FCF_FCP2_DEVICE) {
+			u8 opt = PDO_FORCE_ADISC;
+
+			ql_dbg(ql_dbg_disc, vha, 0xffff,
+			   "%s %d %8phC post gpdb\n",
+			   __func__, __LINE__, fcport->port_name);
+
+			fcport->disc_state = DSC_GPDB;
+			qla24xx_post_gpdb_work(vha, fcport, opt);
+		} else {
+			ql_dbg(ql_dbg_disc, vha, 0xffff,
+			   "%s %d %8phC post login \n",
+			   __func__, __LINE__, fcport->port_name);
+			fcport->disc_state = DSC_LOGIN_PEND;
+			qla2x00_post_async_login_work(vha, fcport, NULL);
+		}
+
+		break;
+
+	case DSC_LOGIN_FAILED:
+		ql_dbg(ql_dbg_disc, vha, 0xffff,
+			   "%s %d %8phC post gidpn \n",
+			   __func__, __LINE__, fcport->port_name);
+
+		qla24xx_post_gidpn_work(vha, fcport);
+		break;
+
+	case DSC_LOGIN_COMPLETE:
+		/* recheck login state */
+		ql_dbg(ql_dbg_disc, vha, 0xffff,
+			   "%s %d %8phC post gpdb \n",
+			   __func__, __LINE__, fcport->port_name);
+
+		qla24xx_post_gpdb_work(vha, fcport, PDO_FORCE_ADISC);
+		break;
+
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static
+void qla24xx_handle_rscn_event(fc_port_t *fcport, struct event_arg *ea)
+{
+	fcport->rscn_gen++;
+
+	ql_dbg(ql_dbg_disc, fcport->vha, 0xffff,
+		"%s %8phC DS %d LS %d\n",
+		__func__, fcport->port_name, fcport->disc_state,
+		fcport->fw_login_state);
+
+	if (fcport->flags & FCF_ASYNC_SENT)
+		return;
+
+	switch (fcport->disc_state) {
+	case DSC_DELETED:
+	case DSC_LOGIN_COMPLETE:
+		qla24xx_post_gidpn_work(fcport->vha, fcport);
+		break;
+
+	default:
+		break;
+	}
+}
+
+int qla24xx_post_newsess_work(struct scsi_qla_host *vha, port_id_t *id,
+	u8 *port_name, void *pla)
+{
+	struct qla_work_evt *e;
+	e = qla2x00_alloc_work(vha, QLA_EVT_NEW_SESS);
+	if (!e)
+		return QLA_FUNCTION_FAILED;
+
+	e->u.new_sess.id = *id;
+	e->u.new_sess.pla = pla;
+	memcpy(e->u.new_sess.port_name, port_name, WWN_SIZE);
+
+	return qla2x00_post_work(vha, e);
+}
+
+static
+int qla24xx_handle_delete_done_event(scsi_qla_host_t *vha,
+	struct event_arg *ea)
+{
+	fc_port_t *fcport = ea->fcport;
+
+	if (test_bit(UNLOADING, &vha->dpc_flags))
+		return 0;
+
+	switch (vha->host->active_mode) {
+	case MODE_INITIATOR:
+	case MODE_DUAL:
+		if (fcport->scan_state == QLA_FCPORT_FOUND)
+			qla24xx_fcport_handle_login(vha, fcport);
+		break;
+
+	case MODE_TARGET:
+	default:
+		/* no-op */
+		break;
+	}
+
+	return 0;
+}
+
+static
+void qla24xx_handle_relogin_event(scsi_qla_host_t *vha,
+	struct event_arg *ea)
+{
+	fc_port_t *fcport = ea->fcport;
+
+	if (fcport->scan_state != QLA_FCPORT_FOUND) {
+		fcport->login_retry++;
+		return;
+	}
+
+	ql_dbg(ql_dbg_disc, vha, 0xffff,
+		"%s %8phC DS %d LS %d P %d del %d cnfl %p rscn %d|%d login %d|%d fl %x\n",
+		__func__, fcport->port_name, fcport->disc_state,
+		fcport->fw_login_state, fcport->login_pause,
+		fcport->deleted, fcport->conflict,
+		fcport->last_rscn_gen, fcport->rscn_gen,
+		fcport->last_login_gen, fcport->login_gen,
+		fcport->flags);
+
+	if ((fcport->fw_login_state == DSC_LS_PLOGI_PEND) ||
+	    (fcport->fw_login_state == DSC_LS_PLOGI_COMP) ||
+	    (fcport->fw_login_state == DSC_LS_PRLI_PEND))
+		return;
+
+	if (fcport->flags & FCF_ASYNC_SENT) {
+		fcport->login_retry++;
+		set_bit(RELOGIN_NEEDED, &vha->dpc_flags);
+		return;
+	}
+
+	if (fcport->disc_state == DSC_DELETE_PEND) {
+		fcport->login_retry++;
+		return;
+	}
+
+	if (fcport->last_rscn_gen != fcport->rscn_gen) {
+		ql_dbg(ql_dbg_disc, vha, 0xffff, "%s %d %8phC post gidpn\n",
+		    __func__, __LINE__, fcport->port_name);
+
+		qla24xx_async_gidpn(vha, fcport);
+		return;
+	}
+
+	qla24xx_fcport_handle_login(vha, fcport);
+}
+
+void qla2x00_fcport_event_handler(scsi_qla_host_t *vha,
+	struct event_arg *ea)
+{
+	fc_port_t *fcport;
+	int rc;
+
+	switch (ea->event) {
+	case FCME_RELOGIN:
+		if (test_bit(UNLOADING, &vha->dpc_flags))
+			return;
+
+		qla24xx_handle_relogin_event(vha, ea);
+		break;
+	case FCME_RSCN:
+		if (test_bit(UNLOADING, &vha->dpc_flags))
+			return;
+
+		fcport = qla2x00_find_fcport_by_nportid(vha, &ea->id, 1);
+		if (!fcport) {
+			/* cable moved */
+			rc = qla24xx_post_gpnid_work(vha, &ea->id);
+			if (rc) {
+				ql_log(ql_log_warn, vha, 0xffff,
+					"RSCN GPNID work failed %02x%02x%02x\n",
+					ea->id.b.domain, ea->id.b.area,
+					ea->id.b.al_pa);
+			}
+		} else {
+			ea->fcport = fcport;
+			qla24xx_handle_rscn_event(fcport, ea);
+		}
+		break;
+	case FCME_GIDPN_DONE:
+		qla24xx_handle_gidpn_event(vha, ea);
+		break;
+	case FCME_GNL_DONE:
+		qla24xx_handle_gnl_done_event(vha, ea);
+		break;
+	case FCME_GPSC_DONE:
+		qla24xx_post_upd_fcport_work(vha, ea->fcport);
+		break;
+	case FCME_PLOGI_DONE:	/* Initiator side sent LLIOCB */
+		qla24xx_handle_plogi_done_event(vha, ea);
+		break;
+	case FCME_GPDB_DONE:
+		qla24xx_handle_gpdb_event(vha, ea);
+		break;
+	case FCME_GPNID_DONE:
+		qla24xx_handle_gpnid_event(vha, ea);
+		break;
+	case FCME_DELETE_DONE:
+		qla24xx_handle_delete_done_event(vha, ea);
+		break;
+	default:
+		BUG_ON(1);
+		break;
+	}
+}
+
 static void
 qla2x00_tmf_iocb_timeout(void *data)
 {
@@ -441,59 +1295,65 @@ qla24xx_async_abort_command(srb_t *sp)
 	return qla24xx_async_abort_cmd(sp);
 }
 
-void
-qla2x00_async_login_done(struct scsi_qla_host *vha, fc_port_t *fcport,
-    uint16_t *data)
+static void
+qla24xx_handle_plogi_done_event(struct scsi_qla_host *vha, struct event_arg *ea)
 {
-	int rval;
+	port_id_t cid;	/* conflict Nport id */
 
-	switch (data[0]) {
+	switch (ea->data[0]) {
 	case MBS_COMMAND_COMPLETE:
 		/*
 		 * Driver must validate login state - If PRLI not complete,
 		 * force a relogin attempt via implicit LOGO, PLOGI, and PRLI
 		 * requests.
 		 */
-		rval = qla2x00_get_port_database(vha, fcport, 0);
-		if (rval == QLA_NOT_LOGGED_IN) {
-			fcport->flags &= ~FCF_ASYNC_SENT;
-			fcport->flags |= FCF_LOGIN_NEEDED;
-			set_bit(RELOGIN_NEEDED, &vha->dpc_flags);
-			break;
-		}
-
-		if (rval != QLA_SUCCESS) {
-			qla2x00_post_async_logout_work(vha, fcport, NULL);
-			qla2x00_post_async_login_work(vha, fcport, NULL);
-			break;
-		}
-		if (fcport->flags & FCF_FCP2_DEVICE) {
-			qla2x00_post_async_adisc_work(vha, fcport, data);
-			break;
-		}
-		qla2x00_update_fcport(vha, fcport);
+		ql_dbg(ql_dbg_disc, vha, 0xffff,
+			   "%s %d %8phC post gpdb\n",
+			   __func__, __LINE__, ea->fcport->port_name);
+		ea->fcport->chip_reset = vha->hw->chip_reset;
+		ea->fcport->logout_on_delete = 1;
+		qla24xx_post_gpdb_work(vha, ea->fcport, 0);
 		break;
 	case MBS_COMMAND_ERROR:
-		fcport->flags &= ~FCF_ASYNC_SENT;
-		if (data[1] & QLA_LOGIO_LOGIN_RETRIED)
+		ql_dbg(ql_dbg_disc, vha, 0xffff, "%s %d %8phC cmd error %x\n",
+		    __func__, __LINE__, ea->fcport->port_name, ea->data[1]);
+
+		ea->fcport->flags &= ~FCF_ASYNC_SENT;
+		ea->fcport->disc_state = DSC_LOGIN_FAILED;
+		if (ea->data[1] & QLA_LOGIO_LOGIN_RETRIED)
 			set_bit(RELOGIN_NEEDED, &vha->dpc_flags);
 		else
-			qla2x00_mark_device_lost(vha, fcport, 1, 0);
-		break;
-	case MBS_PORT_ID_USED:
-		fcport->loop_id = data[1];
-		qla2x00_post_async_logout_work(vha, fcport, NULL);
-		qla2x00_post_async_login_work(vha, fcport, NULL);
+			qla2x00_mark_device_lost(vha, ea->fcport, 1, 0);
 		break;
 	case MBS_LOOP_ID_USED:
-		fcport->loop_id++;
-		rval = qla2x00_find_new_loop_id(vha, fcport);
-		if (rval != QLA_SUCCESS) {
-			fcport->flags &= ~FCF_ASYNC_SENT;
-			qla2x00_mark_device_lost(vha, fcport, 1, 0);
-			break;
+		/* data[1] = IO PARAM 1 = nport ID  */
+		cid.b.domain = (ea->iop[1] >> 16) & 0xff;
+		cid.b.area   = (ea->iop[1] >>  8) & 0xff;
+		cid.b.al_pa  = ea->iop[1] & 0xff;
+		cid.b.rsvd_1 = 0;
+
+		ql_dbg(ql_dbg_disc, vha, 0xffff,
+			"%s %d %8phC LoopID 0x%x in use post gnl\n",
+			__func__, __LINE__, ea->fcport->port_name,
+			ea->fcport->loop_id);
+
+		if (IS_SW_RESV_ADDR(cid)) {
+			set_bit(ea->fcport->loop_id, vha->hw->loop_id_map);
+			ea->fcport->loop_id = FC_NO_LOOP_ID;
+		} else {
+			qla2x00_clear_loop_id(ea->fcport);
 		}
-		qla2x00_post_async_login_work(vha, fcport, NULL);
+		qla24xx_post_gnl_work(vha, ea->fcport);
+		break;
+	case MBS_PORT_ID_USED:
+		ql_dbg(ql_dbg_disc, vha, 0xffff,
+			"%s %d %8phC NPortId %02x%02x%02x inuse post gidpn\n",
+			__func__, __LINE__, ea->fcport->port_name,
+			ea->fcport->d_id.b.domain, ea->fcport->d_id.b.area,
+			ea->fcport->d_id.b.al_pa);
+
+		qla2x00_clear_loop_id(ea->fcport);
+		qla24xx_post_gidpn_work(vha, ea->fcport);
 		break;
 	}
 	return;
@@ -503,10 +1363,9 @@ void
 qla2x00_async_logout_done(struct scsi_qla_host *vha, fc_port_t *fcport,
     uint16_t *data)
 {
-	/* Don't re-login in target mode */
-	if (!fcport->tgt_session)
-		qla2x00_mark_device_lost(vha, fcport, 1, 0);
+	qla2x00_mark_device_lost(vha, fcport, 1, 0);
 	qlt_logo_completion_handler(fcport, data[0]);
+	fcport->login_gen++;
 	return;
 }
 
@@ -709,7 +1568,8 @@ qla2x00_initialize_adapter(scsi_qla_host_t *vha)
 		}
 	}
 
-	if (qla_ini_mode_enabled(vha))
+	if (qla_ini_mode_enabled(vha) ||
+		qla_dual_mode_enabled(vha))
 		rval = qla2x00_init_rings(vha);
 
 	ha->flags.chip_reset_done = 1;
@@ -2968,8 +3828,14 @@ qla2x00_rport_del(void *data)
 	rport = fcport->drport ? fcport->drport: fcport->rport;
 	fcport->drport = NULL;
 	spin_unlock_irqrestore(fcport->vha->host->host_lock, flags);
-	if (rport)
+	if (rport) {
+		ql_dbg(ql_dbg_disc, fcport->vha, 0xffff,
+			"%s %8phN. rport %p roles %x \n",
+			__func__, fcport->port_name, rport,
+			rport->roles);
+
 		fc_remote_port_delete(rport);
+	}
 }
 
 /**
@@ -2995,9 +3861,42 @@ qla2x00_alloc_fcport(scsi_qla_host_t *vha, gfp_t flags)
 	qla2x00_set_fcport_state(fcport, FCS_UNCONFIGURED);
 	fcport->supported_classes = FC_COS_UNSPECIFIED;
 
+	fcport->ct_desc.ct_sns = dma_alloc_coherent(&vha->hw->pdev->dev,
+		sizeof(struct ct_sns_pkt), &fcport->ct_desc.ct_sns_dma,
+			GFP_ATOMIC);
+	fcport->disc_state = DSC_DELETED;
+	fcport->fw_login_state = DSC_LS_PORT_UNAVAIL;
+	fcport->deleted = QLA_SESS_DELETED;
+	fcport->login_retry = vha->hw->login_retry_count;
+	fcport->login_retry = 5;
+	fcport->logout_on_delete = 1;
+
+	if (!fcport->ct_desc.ct_sns) {
+		ql_log(ql_log_warn, vha, 0xffff,
+		    "Failed to allocate ct_sns request.\n");
+		kfree(fcport);
+		fcport = NULL;
+	}
+	INIT_WORK(&fcport->del_work, qla24xx_delete_sess_fn);
+	INIT_LIST_HEAD(&fcport->gnl_entry);
+	INIT_LIST_HEAD(&fcport->list);
+
 	return fcport;
 }
 
+void
+qla2x00_free_fcport(fc_port_t *fcport)
+{
+	if (fcport->ct_desc.ct_sns) {
+		dma_free_coherent(&fcport->vha->hw->pdev->dev,
+			sizeof(struct ct_sns_pkt), fcport->ct_desc.ct_sns,
+			fcport->ct_desc.ct_sns_dma);
+
+		fcport->ct_desc.ct_sns = NULL;
+	}
+	kfree(fcport);
+}
+
 /*
  * qla2x00_configure_loop
  *      Updates Fibre Channel Device Database with what is actually on loop.
@@ -3334,6 +4233,7 @@ qla2x00_iidma_fcport(scsi_qla_host_t *vha, fc_port_t *fcport)
 	}
 }
 
+/* qla2x00_reg_remote_port is reserved for Initiator Mode only.*/
 static void
 qla2x00_reg_remote_port(scsi_qla_host_t *vha, fc_port_t *fcport)
 {
@@ -3364,6 +4264,12 @@ qla2x00_reg_remote_port(scsi_qla_host_t *vha, fc_port_t *fcport)
 		rport_ids.roles |= FC_RPORT_ROLE_FCP_INITIATOR;
 	if (fcport->port_type == FCT_TARGET)
 		rport_ids.roles |= FC_RPORT_ROLE_FCP_TARGET;
+
+	ql_dbg(ql_dbg_disc, vha, 0xffff,
+		"%s %8phN. rport %p is %s mode \n",
+		__func__, fcport->port_name, rport,
+		(fcport->port_type == FCT_TARGET) ? "tgt" : "ini");
+
 	fc_remote_port_rolechg(rport, rport_ids.roles);
 }
 
@@ -3387,20 +4293,45 @@ qla2x00_update_fcport(scsi_qla_host_t *vha, fc_port_t *fcport)
 {
 	fcport->vha = vha;
 
+	if (IS_SW_RESV_ADDR(fcport->d_id))
+		return;
+
+	ql_dbg(ql_dbg_disc, vha, 0xffff, "%s %8phC \n",
+	    __func__, fcport->port_name);
+
 	if (IS_QLAFX00(vha->hw)) {
 		qla2x00_set_fcport_state(fcport, FCS_ONLINE);
 		goto reg_port;
 	}
 	fcport->login_retry = 0;
 	fcport->flags &= ~(FCF_LOGIN_NEEDED | FCF_ASYNC_SENT);
+	fcport->disc_state = DSC_LOGIN_COMPLETE;
+	fcport->deleted = 0;
+	fcport->logout_on_delete = 1;
 
 	qla2x00_set_fcport_state(fcport, FCS_ONLINE);
 	qla2x00_iidma_fcport(vha, fcport);
 	qla24xx_update_fcport_fcp_prio(vha, fcport);
 
 reg_port:
-	if (qla_ini_mode_enabled(vha))
+	switch (vha->host->active_mode) {
+	case MODE_INITIATOR:
+		qla2x00_reg_remote_port(vha, fcport);
+		break;
+	case MODE_TARGET:
+		if (!vha->vha_tgt.qla_tgt->tgt_stop &&
+			!vha->vha_tgt.qla_tgt->tgt_stopped)
+			qlt_fc_port_added(vha, fcport);
+		break;
+	case MODE_DUAL:
 		qla2x00_reg_remote_port(vha, fcport);
+		if (!vha->vha_tgt.qla_tgt->tgt_stop &&
+			!vha->vha_tgt.qla_tgt->tgt_stopped)
+			qlt_fc_port_added(vha, fcport);
+		break;
+	default:
+		break;
+	}
 }
 
 /*
@@ -3418,13 +4349,11 @@ static int
 qla2x00_configure_fabric(scsi_qla_host_t *vha)
 {
 	int	rval;
-	fc_port_t	*fcport, *fcptemp;
-	uint16_t	next_loopid;
+	fc_port_t	*fcport;
 	uint16_t	mb[MAILBOX_REGISTER_COUNT];
 	uint16_t	loop_id;
 	LIST_HEAD(new_fcports);
 	struct qla_hw_data *ha = vha->hw;
-	struct scsi_qla_host *base_vha = pci_get_drvdata(ha->pdev);
 	int		discovery_gen;
 
 	/* If FL port exists, then SNS is present */
@@ -3443,6 +4372,8 @@ qla2x00_configure_fabric(scsi_qla_host_t *vha)
 	vha->device_flags |= SWITCH_FOUND;
 
 	do {
+		qla2x00_mgmt_svr_login(vha);
+
 		/* FDMI support. */
 		if (ql2xfdmienable &&
 		    test_and_clear_bit(REGISTER_FDMI_NEEDED, &vha->dpc_flags))
@@ -3489,9 +4420,6 @@ qla2x00_configure_fabric(scsi_qla_host_t *vha)
 			}
 		}
 
-#define QLA_FCPORT_SCAN		1
-#define QLA_FCPORT_FOUND	2
-
 		list_for_each_entry(fcport, &vha->vp_fcports, list) {
 			fcport->scan_state = QLA_FCPORT_SCAN;
 		}
@@ -3504,174 +4432,14 @@ qla2x00_configure_fabric(scsi_qla_host_t *vha)
 		 * will be newer than discovery_gen. */
 		qlt_do_generation_tick(vha, &discovery_gen);
 
-		rval = qla2x00_find_all_fabric_devs(vha, &new_fcports);
+		rval = qla2x00_find_all_fabric_devs(vha);
 		if (rval != QLA_SUCCESS)
 			break;
-
-		/*
-		 * Logout all previous fabric devices marked lost, except
-		 * FCP2 devices.
-		 */
-		list_for_each_entry(fcport, &vha->vp_fcports, list) {
-			if (test_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags))
-				break;
-
-			if ((fcport->flags & FCF_FABRIC_DEVICE) == 0)
-				continue;
-
-			if (fcport->scan_state == QLA_FCPORT_SCAN) {
-				if (qla_ini_mode_enabled(base_vha) &&
-				    atomic_read(&fcport->state) == FCS_ONLINE) {
-					qla2x00_mark_device_lost(vha, fcport,
-					    ql2xplogiabsentdevice, 0);
-					if (fcport->loop_id != FC_NO_LOOP_ID &&
-					    (fcport->flags & FCF_FCP2_DEVICE) == 0 &&
-					    fcport->port_type != FCT_INITIATOR &&
-					    fcport->port_type != FCT_BROADCAST) {
-						ha->isp_ops->fabric_logout(vha,
-						    fcport->loop_id,
-						    fcport->d_id.b.domain,
-						    fcport->d_id.b.area,
-						    fcport->d_id.b.al_pa);
-						qla2x00_clear_loop_id(fcport);
-					}
-				} else if (!qla_ini_mode_enabled(base_vha)) {
-					/*
-					 * In target mode, explicitly kill
-					 * sessions and log out of devices
-					 * that are gone, so that we don't
-					 * end up with an initiator using the
-					 * wrong ACL (if the fabric recycles
-					 * an FC address and we have a stale
-					 * session around) and so that we don't
-					 * report initiators that are no longer
-					 * on the fabric.
-					 */
-					ql_dbg(ql_dbg_tgt_mgt, vha, 0xf077,
-					    "port gone, logging out/killing session: "
-					    "%8phC state 0x%x flags 0x%x fc4_type 0x%x "
-					    "scan_state %d\n",
-					    fcport->port_name,
-					    atomic_read(&fcport->state),
-					    fcport->flags, fcport->fc4_type,
-					    fcport->scan_state);
-					qlt_fc_port_deleted(vha, fcport,
-					    discovery_gen);
-				}
-			}
-		}
-
-		/* Starting free loop ID. */
-		next_loopid = ha->min_external_loopid;
-
-		/*
-		 * Scan through our port list and login entries that need to be
-		 * logged in.
-		 */
-		list_for_each_entry(fcport, &vha->vp_fcports, list) {
-			if (atomic_read(&vha->loop_down_timer) ||
-			    test_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags))
-				break;
-
-			if ((fcport->flags & FCF_FABRIC_DEVICE) == 0 ||
-			    (fcport->flags & FCF_LOGIN_NEEDED) == 0)
-				continue;
-
-			/*
-			 * If we're not an initiator, skip looking for devices
-			 * and logging in.  There's no reason for us to do it,
-			 * and it seems to actively cause problems in target
-			 * mode if we race with the initiator logging into us
-			 * (we might get the "port ID used" status back from
-			 * our login command and log out the initiator, which
-			 * seems to cause havoc).
-			 */
-			if (!qla_ini_mode_enabled(base_vha)) {
-				if (fcport->scan_state == QLA_FCPORT_FOUND) {
-					ql_dbg(ql_dbg_tgt_mgt, vha, 0xf078,
-					    "port %8phC state 0x%x flags 0x%x fc4_type 0x%x "
-					    "scan_state %d (initiator mode disabled; skipping "
-					    "login)\n", fcport->port_name,
-					    atomic_read(&fcport->state),
-					    fcport->flags, fcport->fc4_type,
-					    fcport->scan_state);
-				}
-				continue;
-			}
-
-			if (fcport->loop_id == FC_NO_LOOP_ID) {
-				fcport->loop_id = next_loopid;
-				rval = qla2x00_find_new_loop_id(
-				    base_vha, fcport);
-				if (rval != QLA_SUCCESS) {
-					/* Ran out of IDs to use */
-					break;
-				}
-			}
-			/* Login and update database */
-			qla2x00_fabric_dev_login(vha, fcport, &next_loopid);
-		}
-
-		/* Exit if out of loop IDs. */
-		if (rval != QLA_SUCCESS) {
-			break;
-		}
-
-		/*
-		 * Login and add the new devices to our port list.
-		 */
-		list_for_each_entry_safe(fcport, fcptemp, &new_fcports, list) {
-			if (atomic_read(&vha->loop_down_timer) ||
-			    test_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags))
-				break;
-
-			/*
-			 * If we're not an initiator, skip looking for devices
-			 * and logging in.  There's no reason for us to do it,
-			 * and it seems to actively cause problems in target
-			 * mode if we race with the initiator logging into us
-			 * (we might get the "port ID used" status back from
-			 * our login command and log out the initiator, which
-			 * seems to cause havoc).
-			 */
-			if (qla_ini_mode_enabled(base_vha)) {
-				/* Find a new loop ID to use. */
-				fcport->loop_id = next_loopid;
-				rval = qla2x00_find_new_loop_id(base_vha,
-				    fcport);
-				if (rval != QLA_SUCCESS) {
-					/* Ran out of IDs to use */
-					break;
-				}
-
-				/* Login and update database */
-				qla2x00_fabric_dev_login(vha, fcport,
-				    &next_loopid);
-			} else {
-				ql_dbg(ql_dbg_tgt_mgt, vha, 0xf079,
-					"new port %8phC state 0x%x flags 0x%x fc4_type "
-					"0x%x scan_state %d (initiator mode disabled; "
-					"skipping login)\n",
-					fcport->port_name,
-					atomic_read(&fcport->state),
-					fcport->flags, fcport->fc4_type,
-					fcport->scan_state);
-			}
-
-			list_move_tail(&fcport->list, &vha->vp_fcports);
-		}
 	} while (0);
 
-	/* Free all new device structures not processed. */
-	list_for_each_entry_safe(fcport, fcptemp, &new_fcports, list) {
-		list_del(&fcport->list);
-		kfree(fcport);
-	}
-
-	if (rval) {
+	if (rval)
 		ql_dbg(ql_dbg_disc, vha, 0x2068,
 		    "Configure fabric error exit rval=%d.\n", rval);
-	}
 
 	return (rval);
 }
@@ -3690,12 +4458,11 @@ qla2x00_configure_fabric(scsi_qla_host_t *vha)
  *	Kernel context.
  */
 static int
-qla2x00_find_all_fabric_devs(scsi_qla_host_t *vha,
-	struct list_head *new_fcports)
+qla2x00_find_all_fabric_devs(scsi_qla_host_t *vha)
 {
 	int		rval;
 	uint16_t	loop_id;
-	fc_port_t	*fcport, *new_fcport, *fcptemp;
+	fc_port_t	*fcport, *new_fcport;
 	int		found;
 
 	sw_info_t	*swl;
@@ -3704,6 +4471,7 @@ qla2x00_find_all_fabric_devs(scsi_qla_host_t *vha,
 	port_id_t	wrap = {}, nxt_d_id;
 	struct qla_hw_data *ha = vha->hw;
 	struct scsi_qla_host *base_vha = pci_get_drvdata(ha->pdev);
+	unsigned long flags;
 
 	rval = QLA_SUCCESS;
 
@@ -3724,9 +4492,8 @@ qla2x00_find_all_fabric_devs(scsi_qla_host_t *vha,
 			swl = NULL;
 		} else if (qla2x00_gnn_id(vha, swl) != QLA_SUCCESS) {
 			swl = NULL;
-		} else if (ql2xiidmaenable &&
-		    qla2x00_gfpn_id(vha, swl) == QLA_SUCCESS) {
-			qla2x00_gpsc(vha, swl);
+		} else if (qla2x00_gfpn_id(vha, swl) != QLA_SUCCESS) {
+			swl = NULL;
 		}
 
 		/* If other queries succeeded probe for FC-4 type */
@@ -3788,11 +4555,6 @@ qla2x00_find_all_fabric_devs(scsi_qla_host_t *vha,
 				ql_log(ql_log_warn, vha, 0x2064,
 				    "SNS scan failed -- assuming "
 				    "zero-entry result.\n");
-				list_for_each_entry_safe(fcport, fcptemp,
-				    new_fcports, list) {
-					list_del(&fcport->list);
-					kfree(fcport);
-				}
 				rval = QLA_SUCCESS;
 				break;
 			}
@@ -3835,6 +4597,8 @@ qla2x00_find_all_fabric_devs(scsi_qla_host_t *vha,
 		    new_fcport->fc4_type != FC4_TYPE_UNKNOWN))
 			continue;
 
+		spin_lock_irqsave(&vha->hw->tgt.sess_lock, flags);
+
 		/* Locate matching device in database. */
 		found = 0;
 		list_for_each_entry(fcport, &vha->vp_fcports, list) {
@@ -3857,7 +4621,7 @@ qla2x00_find_all_fabric_devs(scsi_qla_host_t *vha,
 			 */
 			if (fcport->d_id.b24 == new_fcport->d_id.b24 &&
 			    (atomic_read(&fcport->state) == FCS_ONLINE ||
-			     !qla_ini_mode_enabled(base_vha))) {
+			     (vha->host->active_mode == MODE_TARGET))) {
 				break;
 			}
 
@@ -3877,7 +4641,7 @@ qla2x00_find_all_fabric_devs(scsi_qla_host_t *vha,
 			 * Log it out if still logged in and mark it for
 			 * relogin later.
 			 */
-			if (!qla_ini_mode_enabled(base_vha)) {
+			if (qla_tgt_mode_enabled(base_vha)) {
 				ql_dbg(ql_dbg_tgt_mgt, vha, 0xf080,
 					 "port changed FC ID, %8phC"
 					 " old %x:%x:%x (loop_id 0x%04x)-> new %x:%x:%x\n",
@@ -3895,25 +4659,19 @@ qla2x00_find_all_fabric_devs(scsi_qla_host_t *vha,
 
 			fcport->d_id.b24 = new_fcport->d_id.b24;
 			fcport->flags |= FCF_LOGIN_NEEDED;
-			if (fcport->loop_id != FC_NO_LOOP_ID &&
-			    (fcport->flags & FCF_FCP2_DEVICE) == 0 &&
-			    (fcport->flags & FCF_ASYNC_SENT) == 0 &&
-			    fcport->port_type != FCT_INITIATOR &&
-			    fcport->port_type != FCT_BROADCAST) {
-				ha->isp_ops->fabric_logout(vha, fcport->loop_id,
-				    fcport->d_id.b.domain, fcport->d_id.b.area,
-				    fcport->d_id.b.al_pa);
-				qla2x00_clear_loop_id(fcport);
-			}
-
 			break;
 		}
 
-		if (found)
+		if (found) {
+			spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
 			continue;
+		}
 		/* If device was not in our fcports list, then add it. */
 		new_fcport->scan_state = QLA_FCPORT_FOUND;
-		list_add_tail(&new_fcport->list, new_fcports);
+		list_add_tail(&new_fcport->list, &vha->vp_fcports);
+
+		spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
+
 
 		/* Allocate a new replacement fcport. */
 		nxt_d_id.b24 = new_fcport->d_id.b24;
@@ -3927,8 +4685,44 @@ qla2x00_find_all_fabric_devs(scsi_qla_host_t *vha,
 		new_fcport->d_id.b24 = nxt_d_id.b24;
 	}
 
-	kfree(new_fcport);
+	qla2x00_free_fcport(new_fcport);
+
+	/*
+	 * Logout all previous fabric dev marked lost, except FCP2 devices.
+	 */
+	list_for_each_entry(fcport, &vha->vp_fcports, list) {
+		if (test_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags))
+			break;
+
+		if ((fcport->flags & FCF_FABRIC_DEVICE) == 0 ||
+		    (fcport->flags & FCF_LOGIN_NEEDED) == 0)
+			continue;
+
+		if (fcport->scan_state == QLA_FCPORT_SCAN) {
+			if ((qla_dual_mode_enabled(vha) ||
+			    qla_ini_mode_enabled(vha)) &&
+			    atomic_read(&fcport->state) == FCS_ONLINE) {
+				qla2x00_mark_device_lost(vha, fcport,
+					ql2xplogiabsentdevice, 0);
+				if (fcport->loop_id != FC_NO_LOOP_ID &&
+				    (fcport->flags & FCF_FCP2_DEVICE) == 0 &&
+				    fcport->port_type != FCT_INITIATOR &&
+				    fcport->port_type != FCT_BROADCAST) {
+					ql_dbg(ql_dbg_disc, vha, 0xffff,
+					    "%s %d %8phC post del sess\n",
+					    __func__, __LINE__,
+					    fcport->port_name);
+
+					qlt_schedule_sess_for_deletion_lock
+						(fcport);
+					continue;
+				}
+			}
+		}
 
+		if (fcport->scan_state == QLA_FCPORT_FOUND)
+			qla24xx_fcport_handle_login(vha, fcport);
+	}
 	return (rval);
 }
 
@@ -3980,64 +4774,6 @@ qla2x00_find_new_loop_id(scsi_qla_host_t *vha, fc_port_t *dev)
 	return (rval);
 }
 
-/*
- * qla2x00_fabric_dev_login
- *	Login fabric target device and update FC port database.
- *
- * Input:
- *	ha:		adapter state pointer.
- *	fcport:		port structure list pointer.
- *	next_loopid:	contains value of a new loop ID that can be used
- *			by the next login attempt.
- *
- * Returns:
- *	qla2x00 local function return status code.
- *
- * Context:
- *	Kernel context.
- */
-static int
-qla2x00_fabric_dev_login(scsi_qla_host_t *vha, fc_port_t *fcport,
-    uint16_t *next_loopid)
-{
-	int	rval;
-	uint8_t opts;
-	struct qla_hw_data *ha = vha->hw;
-
-	rval = QLA_SUCCESS;
-
-	if (IS_ALOGIO_CAPABLE(ha)) {
-		if (fcport->flags & FCF_ASYNC_SENT)
-			return rval;
-		fcport->flags |= FCF_ASYNC_SENT;
-		rval = qla2x00_post_async_login_work(vha, fcport, NULL);
-		if (!rval)
-			return rval;
-	}
-
-	fcport->flags &= ~FCF_ASYNC_SENT;
-	rval = qla2x00_fabric_login(vha, fcport, next_loopid);
-	if (rval == QLA_SUCCESS) {
-		/* Send an ADISC to FCP2 devices.*/
-		opts = 0;
-		if (fcport->flags & FCF_FCP2_DEVICE)
-			opts |= BIT_1;
-		rval = qla2x00_get_port_database(vha, fcport, opts);
-		if (rval != QLA_SUCCESS) {
-			ha->isp_ops->fabric_logout(vha, fcport->loop_id,
-			    fcport->d_id.b.domain, fcport->d_id.b.area,
-			    fcport->d_id.b.al_pa);
-			qla2x00_mark_device_lost(vha, fcport, 1, 0);
-		} else {
-			qla2x00_update_fcport(vha, fcport);
-		}
-	} else {
-		/* Retry Login. */
-		qla2x00_mark_device_lost(vha, fcport, 1, 0);
-	}
-
-	return (rval);
-}
 
 /*
  * qla2x00_fabric_login
@@ -4329,13 +5065,6 @@ qla2x00_update_fcports(scsi_qla_host_t *base_vha)
 				spin_unlock_irqrestore(&ha->vport_slock, flags);
 				qla2x00_rport_del(fcport);
 
-				/*
-				 * Release the target mode FC NEXUS in
-				 * qla_target.c, if target mod is enabled.
-				 */
-				qlt_fc_port_deleted(vha, fcport,
-				    base_vha->total_fcport_update_gen);
-
 				spin_lock_irqsave(&ha->vport_slock, flags);
 			}
 		}
@@ -4718,6 +5447,8 @@ qla2x00_abort_isp_cleanup(scsi_qla_host_t *vha)
 	if (!(IS_P3P_TYPE(ha)))
 		ha->isp_ops->reset_chip(vha);
 
+	ha->chip_reset++;
+
 	atomic_set(&vha->loop_down_timer, LOOP_DOWN_TIME);
 	if (atomic_read(&vha->loop_state) != LOOP_DOWN) {
 		atomic_set(&vha->loop_state, LOOP_DOWN);
@@ -4772,8 +5503,6 @@ qla2x00_abort_isp_cleanup(scsi_qla_host_t *vha)
 		/* Requeue all commands in outstanding command list. */
 		qla2x00_abort_all_cmds(vha, DID_RESET << 16);
 	}
-
-	ha->chip_reset++;
 	/* memory barrier */
 	wmb();
 }
@@ -4969,7 +5698,6 @@ qla2x00_restart_isp(scsi_qla_host_t *vha)
 		if (!status) {
 			/* Issue a marker after FW becomes ready. */
 			qla2x00_marker(vha, req, rsp, 0, 0, MK_SYNC_ALL);
-
 			set_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags);
 		}
 
@@ -5197,7 +5925,7 @@ qla24xx_nvram_config(scsi_qla_host_t *vha)
 		rval = 1;
 	}
 
-	if (!qla_ini_mode_enabled(vha)) {
+	if (qla_tgt_mode_enabled(vha)) {
 		/* Don't enable full login after initial LIP */
 		nv->firmware_options_1 &= cpu_to_le32(~BIT_13);
 		/* Don't enable LIP full login for initiator */
diff --git a/drivers/scsi/qla2xxx/qla_inline.h b/drivers/scsi/qla2xxx/qla_inline.h
index 44e404583c86fc..647828b9e6226e 100644
--- a/drivers/scsi/qla2xxx/qla_inline.h
+++ b/drivers/scsi/qla2xxx/qla_inline.h
@@ -166,8 +166,8 @@ qla2x00_set_fcport_state(fc_port_t *fcport, int state)
 	/* Don't print state transitions during initial allocation of fcport */
 	if (old_state && old_state != state) {
 		ql_dbg(ql_dbg_disc, fcport->vha, 0x207d,
-		    "FCPort state transitioned from %s to %s - "
-		    "portid=%02x%02x%02x.\n",
+		    "FCPort %8phC state transitioned from %s to %s - "
+			"portid=%02x%02x%02x.\n", fcport->port_name,
 		    port_state_str[old_state], port_state_str[state],
 		    fcport->d_id.b.domain, fcport->d_id.b.area,
 		    fcport->d_id.b.al_pa);
@@ -263,6 +263,7 @@ qla2x00_get_sp(scsi_qla_host_t *vha, fc_port_t *fcport, gfp_t flag)
 	memset(sp, 0, sizeof(*sp));
 	sp->fcport = fcport;
 	sp->iocbs = 1;
+	sp->vha = vha;
 done:
 	if (!sp)
 		QLA_VHA_MARK_NOT_BUSY(vha);
@@ -285,7 +286,7 @@ qla2x00_init_timer(srb_t *sp, unsigned long tmo)
 	sp->u.iocb_cmd.timer.function = qla2x00_sp_timeout;
 	add_timer(&sp->u.iocb_cmd.timer);
 	sp->free = qla2x00_sp_free;
-	if ((IS_QLAFX00(sp->fcport->vha->hw)) &&
+	if ((IS_QLAFX00(((scsi_qla_host_t *)sp->vha)->hw)) &&
 	    (sp->type == SRB_FXIOCB_DCMD))
 		init_completion(&sp->u.iocb_cmd.u.fxiocb.fxiocb_comp);
 	if (sp->type == SRB_ELS_DCMD)
diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c
index 834e2219784260..33d3d90c089b46 100644
--- a/drivers/scsi/qla2xxx/qla_iocb.c
+++ b/drivers/scsi/qla2xxx/qla_iocb.c
@@ -2247,7 +2247,7 @@ qla24xx_logout_iocb(srb_t *sp, struct logio_entry_24xx *logio)
 	logio->entry_type = LOGINOUT_PORT_IOCB_TYPE;
 	logio->control_flags =
 	    cpu_to_le16(LCF_COMMAND_LOGO|LCF_IMPL_LOGO);
-	if (!sp->fcport->tgt_session ||
+	if (!sp->fcport->se_sess ||
 	    !sp->fcport->keep_nport_handle)
 		logio->control_flags |= cpu_to_le16(LCF_FREE_NPORT);
 	logio->nport_handle = cpu_to_le16(sp->fcport->loop_id);
@@ -3079,19 +3079,69 @@ qla24xx_abort_iocb(srb_t *sp, struct abort_entry_24xx *abt_iocb)
 	wmb();
 }
 
+static void
+qla2x00_mb_iocb(srb_t *sp, struct mbx_24xx_entry *mbx)
+{
+	int i, sz;
+
+	mbx->entry_type = MBX_IOCB_TYPE;
+	mbx->handle = sp->handle;
+	sz = min(ARRAY_SIZE(mbx->mb), ARRAY_SIZE(sp->u.iocb_cmd.u.mbx.out_mb));
+
+	for (i = 0; i < sz; i++)
+		mbx->mb[i] = cpu_to_le16(sp->u.iocb_cmd.u.mbx.out_mb[i]);
+}
+
+static void
+qla2x00_ctpthru_cmd_iocb(srb_t *sp, struct ct_entry_24xx *ct_pkt)
+{
+	sp->u.iocb_cmd.u.ctarg.iocb = ct_pkt;
+	qla24xx_prep_ms_iocb(sp->vha, &sp->u.iocb_cmd.u.ctarg);
+	ct_pkt->handle = sp->handle;
+}
+
+static void qla2x00_send_notify_ack_iocb(srb_t *sp,
+	struct nack_to_isp *nack)
+{
+	struct imm_ntfy_from_isp *ntfy = sp->u.iocb_cmd.u.nack.ntfy;
+
+	nack->entry_type = NOTIFY_ACK_TYPE;
+	nack->entry_count = 1;
+	nack->ox_id = ntfy->ox_id;
+
+	nack->u.isp24.handle = sp->handle;
+	nack->u.isp24.nport_handle = ntfy->u.isp24.nport_handle;
+	if (le16_to_cpu(ntfy->u.isp24.status) == IMM_NTFY_ELS) {
+		nack->u.isp24.flags = ntfy->u.isp24.flags &
+			cpu_to_le32(NOTIFY24XX_FLAGS_PUREX_IOCB);
+	}
+	nack->u.isp24.srr_rx_id = ntfy->u.isp24.srr_rx_id;
+	nack->u.isp24.status = ntfy->u.isp24.status;
+	nack->u.isp24.status_subcode = ntfy->u.isp24.status_subcode;
+	nack->u.isp24.fw_handle = ntfy->u.isp24.fw_handle;
+	nack->u.isp24.exchange_address = ntfy->u.isp24.exchange_address;
+	nack->u.isp24.srr_rel_offs = ntfy->u.isp24.srr_rel_offs;
+	nack->u.isp24.srr_ui = ntfy->u.isp24.srr_ui;
+	nack->u.isp24.srr_flags = 0;
+	nack->u.isp24.srr_reject_code = 0;
+	nack->u.isp24.srr_reject_code_expl = 0;
+	nack->u.isp24.vp_index = ntfy->u.isp24.vp_index;
+}
+
 int
 qla2x00_start_sp(srb_t *sp)
 {
 	int rval;
-	struct qla_hw_data *ha = sp->fcport->vha->hw;
+	scsi_qla_host_t *vha = (scsi_qla_host_t *)sp->vha;
+	struct qla_hw_data *ha = vha->hw;
 	void *pkt;
 	unsigned long flags;
 
 	rval = QLA_FUNCTION_FAILED;
 	spin_lock_irqsave(&ha->hardware_lock, flags);
-	pkt = qla2x00_alloc_iocbs(sp->fcport->vha, sp);
+	pkt = qla2x00_alloc_iocbs(vha, sp);
 	if (!pkt) {
-		ql_log(ql_log_warn, sp->fcport->vha, 0x700c,
+		ql_log(ql_log_warn, vha, 0x700c,
 		    "qla2x00_alloc_iocbs failed.\n");
 		goto done;
 	}
@@ -3139,12 +3189,23 @@ qla2x00_start_sp(srb_t *sp)
 	case SRB_ELS_DCMD:
 		qla24xx_els_logo_iocb(sp, pkt);
 		break;
+	case SRB_CT_PTHRU_CMD:
+		qla2x00_ctpthru_cmd_iocb(sp, pkt);
+		break;
+	case SRB_MB_IOCB:
+		qla2x00_mb_iocb(sp, pkt);
+		break;
+	case SRB_NACK_PLOGI:
+	case SRB_NACK_PRLI:
+	case SRB_NACK_LOGO:
+		qla2x00_send_notify_ack_iocb(sp, pkt);
+		break;
 	default:
 		break;
 	}
 
 	wmb();
-	qla2x00_start_iocbs(sp->fcport->vha, ha->req_q_map[0]);
+	qla2x00_start_iocbs(vha, ha->req_q_map[0]);
 done:
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
 	return rval;
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index 802b50cc89d0ab..bb747d7ebdab30 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -561,14 +561,50 @@ qla2x00_is_a_vp_did(scsi_qla_host_t *vha, uint32_t rscn_entry)
 	return ret;
 }
 
-static inline fc_port_t *
+fc_port_t *
 qla2x00_find_fcport_by_loopid(scsi_qla_host_t *vha, uint16_t loop_id)
 {
-	fc_port_t *fcport;
+	fc_port_t *f, *tf;
+
+	f = tf = NULL;
+	list_for_each_entry_safe(f, tf, &vha->vp_fcports, list)
+		if (f->loop_id == loop_id)
+			return f;
+	return NULL;
+}
+
+fc_port_t *
+qla2x00_find_fcport_by_wwpn(scsi_qla_host_t *vha, u8 *wwpn, u8 incl_deleted)
+{
+	fc_port_t *f, *tf;
+
+	f = tf = NULL;
+	list_for_each_entry_safe(f, tf, &vha->vp_fcports, list) {
+		if (memcmp(f->port_name, wwpn, WWN_SIZE) == 0) {
+			if (incl_deleted)
+				return f;
+			else if (f->deleted == 0)
+				return f;
+		}
+	}
+	return NULL;
+}
 
-	list_for_each_entry(fcport, &vha->vp_fcports, list)
-		if (fcport->loop_id == loop_id)
-			return fcport;
+fc_port_t *
+qla2x00_find_fcport_by_nportid(scsi_qla_host_t *vha, port_id_t *id,
+	u8 incl_deleted)
+{
+	fc_port_t *f, *tf;
+
+	f = tf = NULL;
+	list_for_each_entry_safe(f, tf, &vha->vp_fcports, list) {
+		if (f->d_id.b24 == id->b24) {
+			if (incl_deleted)
+				return f;
+			else if (f->deleted == 0)
+				return f;
+		}
+	}
 	return NULL;
 }
 
@@ -934,7 +970,11 @@ qla2x00_async_event(scsi_qla_host_t *vha, struct rsp_que *rsp, uint16_t *mb)
 			ql_dbg(ql_dbg_async, vha, 0x508a,
 			    "Marking port lost loopid=%04x portid=%06x.\n",
 			    fcport->loop_id, fcport->d_id.b24);
-			qla2x00_mark_device_lost(fcport->vha, fcport, 1, 1);
+			if (qla_ini_mode_enabled(vha)) {
+				qla2x00_mark_device_lost(fcport->vha, fcport, 1, 1);
+				fcport->logout_on_delete = 0;
+				qlt_schedule_sess_for_deletion_lock(fcport);
+			}
 			break;
 
 global_port_update:
@@ -1024,27 +1064,17 @@ qla2x00_async_event(scsi_qla_host_t *vha, struct rsp_que *rsp, uint16_t *mb)
 		if (qla2x00_is_a_vp_did(vha, rscn_entry))
 			break;
 
-		/*
-		 * Search for the rport related to this RSCN entry and mark it
-		 * as lost.
-		 */
-		list_for_each_entry(fcport, &vha->vp_fcports, list) {
-			if (atomic_read(&fcport->state) != FCS_ONLINE)
-				continue;
-			if (fcport->d_id.b24 == rscn_entry) {
-				qla2x00_mark_device_lost(vha, fcport, 0, 0);
-				break;
-			}
-		}
-
 		atomic_set(&vha->loop_down_timer, 0);
 		vha->flags.management_server_logged_in = 0;
+		{
+			struct event_arg ea;
 
-		set_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags);
-		set_bit(RSCN_UPDATE, &vha->dpc_flags);
-		qla2x00_post_aen_work(vha, FCH_EVT_RSCN, rscn_entry);
+			memset(&ea, 0, sizeof(ea));
+			ea.event = FCME_RSCN;
+			ea.id.b24 = rscn_entry;
+			qla2x00_fcport_event_handler(vha, &ea);
+		}
 		break;
-
 	/* case MBA_RIO_RESPONSE: */
 	case MBA_ZIO_RESPONSE:
 		ql_dbg(ql_dbg_async, vha, 0x5015,
@@ -1235,7 +1265,8 @@ qla2x00_get_sp_from_handle(scsi_qla_host_t *vha, const char *func,
 	index = LSW(pkt->handle);
 	if (index >= req->num_outstanding_cmds) {
 		ql_log(ql_log_warn, vha, 0x5031,
-		    "Invalid command index (%x).\n", index);
+			   "Invalid command index (%x) type %8ph.\n",
+			   index, iocb);
 		if (IS_P3P_TYPE(ha))
 			set_bit(FCOE_CTX_RESET_NEEDED, &vha->dpc_flags);
 		else
@@ -1346,6 +1377,49 @@ qla2x00_mbx_iocb_entry(scsi_qla_host_t *vha, struct req_que *req,
 	sp->done(vha, sp, 0);
 }
 
+static void
+qla24xx_mbx_iocb_entry(scsi_qla_host_t *vha, struct req_que *req,
+    struct mbx_24xx_entry *pkt)
+{
+	const char func[] = "MBX-IOCB2";
+	srb_t *sp;
+	struct srb_iocb *si;
+	u16 sz, i;
+	int res;
+
+	sp = qla2x00_get_sp_from_handle(vha, func, req, pkt);
+	if (!sp)
+		return;
+
+	si = &sp->u.iocb_cmd;
+	sz = min(ARRAY_SIZE(pkt->mb), ARRAY_SIZE(sp->u.iocb_cmd.u.mbx.in_mb));
+
+	for (i = 0; i < sz; i++)
+		si->u.mbx.in_mb[i] = le16_to_cpu(pkt->mb[i]);
+
+	res = (si->u.mbx.in_mb[0] & MBS_MASK);
+
+	sp->done(vha, sp, res);
+}
+
+static void
+qla24xxx_nack_iocb_entry(scsi_qla_host_t *vha, struct req_que *req,
+    struct nack_to_isp *pkt)
+{
+	const char func[] = "nack";
+	srb_t *sp;
+	int res = 0;
+
+	sp = qla2x00_get_sp_from_handle(vha, func, req, pkt);
+	if (!sp)
+		return;
+
+	if (pkt->u.isp2x.status != cpu_to_le16(NOTIFY_ACK_SUCCESS))
+		res = QLA_FUNCTION_FAILED;
+
+	sp->done(vha, sp, res);
+}
+
 static void
 qla2x00_ct_entry(scsi_qla_host_t *vha, struct req_que *req,
     sts_entry_t *pkt, int iocb_type)
@@ -1356,50 +1430,63 @@ qla2x00_ct_entry(scsi_qla_host_t *vha, struct req_que *req,
 	struct bsg_job *bsg_job;
 	struct fc_bsg_reply *bsg_reply;
 	uint16_t comp_status;
-	int res;
+	int res = 0;
 
 	sp = qla2x00_get_sp_from_handle(vha, func, req, pkt);
 	if (!sp)
 		return;
 
-	bsg_job = sp->u.bsg_job;
-	bsg_reply = bsg_job->reply;
-
-	type = "ct pass-through";
-
-	comp_status = le16_to_cpu(pkt->comp_status);
-
-	/* return FC_CTELS_STATUS_OK and leave the decoding of the ELS/CT
-	 * fc payload  to the caller
-	 */
-	bsg_reply->reply_data.ctels_reply.status = FC_CTELS_STATUS_OK;
-	bsg_job->reply_len = sizeof(struct fc_bsg_reply);
-
-	if (comp_status != CS_COMPLETE) {
-		if (comp_status == CS_DATA_UNDERRUN) {
-			res = DID_OK << 16;
-			bsg_reply->reply_payload_rcv_len =
-			    le16_to_cpu(((sts_entry_t *)pkt)->rsp_info_len);
-
-			ql_log(ql_log_warn, vha, 0x5048,
-			    "CT pass-through-%s error "
-			    "comp_status-status=0x%x total_byte = 0x%x.\n",
-			    type, comp_status,
-			    bsg_reply->reply_payload_rcv_len);
-		} else {
-			ql_log(ql_log_warn, vha, 0x5049,
-			    "CT pass-through-%s error "
-			    "comp_status-status=0x%x.\n", type, comp_status);
-			res = DID_ERROR << 16;
-			bsg_reply->reply_payload_rcv_len = 0;
-		}
-		ql_dump_buffer(ql_dbg_async + ql_dbg_buffer, vha, 0x5035,
-		    (uint8_t *)pkt, sizeof(*pkt));
-	} else {
-		res = DID_OK << 16;
-		bsg_reply->reply_payload_rcv_len =
-		    bsg_job->reply_payload.payload_len;
-		bsg_job->reply_len = 0;
+	switch (sp->type) {
+	case SRB_CT_CMD:
+	    bsg_job = sp->u.bsg_job;
+	    bsg_reply = bsg_job->reply;
+
+	    type = "ct pass-through";
+
+	    comp_status = le16_to_cpu(pkt->comp_status);
+
+	    /*
+	     * return FC_CTELS_STATUS_OK and leave the decoding of the ELS/CT
+	     * fc payload  to the caller
+	     */
+	    bsg_reply->reply_data.ctels_reply.status = FC_CTELS_STATUS_OK;
+	    bsg_job->reply_len = sizeof(struct fc_bsg_reply);
+
+	    if (comp_status != CS_COMPLETE) {
+		    if (comp_status == CS_DATA_UNDERRUN) {
+			    res = DID_OK << 16;
+			    bsg_reply->reply_payload_rcv_len =
+				le16_to_cpu(((sts_entry_t *)pkt)->rsp_info_len);
+
+			    ql_log(ql_log_warn, vha, 0x5048,
+				"CT pass-through-%s error comp_status=0x%x total_byte=0x%x.\n",
+				type, comp_status,
+				bsg_reply->reply_payload_rcv_len);
+		    } else {
+			    ql_log(ql_log_warn, vha, 0x5049,
+				"CT pass-through-%s error comp_status=0x%x.\n",
+				type, comp_status);
+			    res = DID_ERROR << 16;
+			    bsg_reply->reply_payload_rcv_len = 0;
+		    }
+		    ql_dump_buffer(ql_dbg_async + ql_dbg_buffer, vha, 0x5035,
+			(uint8_t *)pkt, sizeof(*pkt));
+	    } else {
+		    res = DID_OK << 16;
+		    bsg_reply->reply_payload_rcv_len =
+			bsg_job->reply_payload.payload_len;
+		    bsg_job->reply_len = 0;
+	    }
+	    break;
+	case SRB_CT_PTHRU_CMD:
+	    /*
+	     * borrowing sts_entry_24xx.comp_status.
+	     * same location as ct_entry_24xx.comp_status
+	     */
+	     res = qla2x00_chk_ms_status(vha, (ms_iocb_entry_t *)pkt,
+		 (struct ct_sns_rsp *)sp->u.iocb_cmd.u.ctarg.rsp,
+		 sp->name);
+	     break;
 	}
 
 	sp->done(vha, sp, res);
@@ -1440,6 +1527,15 @@ qla24xx_els_ct_entry(scsi_qla_host_t *vha, struct req_que *req,
 		    "Completing %s: (%p) type=%d.\n", type, sp, sp->type);
 		sp->done(vha, sp, 0);
 		return;
+	case SRB_CT_PTHRU_CMD:
+		/* borrowing sts_entry_24xx.comp_status.
+		   same location as ct_entry_24xx.comp_status
+		 */
+		res = qla2x00_chk_ms_status(vha, (ms_iocb_entry_t *)pkt,
+			(struct ct_sns_rsp *)sp->u.iocb_cmd.u.ctarg.rsp,
+			sp->name);
+		sp->done(vha, sp, res);
+		return;
 	default:
 		ql_dbg(ql_dbg_user, vha, 0x503e,
 		    "Unrecognized SRB: (%p) type=%d.\n", sp, sp->type);
@@ -1566,6 +1662,8 @@ qla24xx_logio_entry(scsi_qla_host_t *vha, struct req_que *req,
 
 	iop[0] = le32_to_cpu(logio->io_parameter[0]);
 	iop[1] = le32_to_cpu(logio->io_parameter[1]);
+	lio->u.logio.iop[0] = iop[0];
+	lio->u.logio.iop[1] = iop[1];
 	switch (iop[0]) {
 	case LSC_SCODE_PORTID_USED:
 		data[0] = MBS_PORT_ID_USED;
@@ -2074,6 +2172,7 @@ qla2x00_status_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, void *pkt)
 	int res = 0;
 	uint16_t state_flags = 0;
 	uint16_t retry_delay = 0;
+	uint8_t no_logout = 0;
 
 	sts = (sts_entry_t *) pkt;
 	sts24 = (struct sts_entry_24xx *) pkt;
@@ -2334,6 +2433,7 @@ qla2x00_status_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, void *pkt)
 		break;
 
 	case CS_PORT_LOGGED_OUT:
+		no_logout = 1;
 	case CS_PORT_CONFIG_CHG:
 	case CS_PORT_BUSY:
 	case CS_INCOMPLETE:
@@ -2356,14 +2456,21 @@ qla2x00_status_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, void *pkt)
 				break;
 		}
 
-		ql_dbg(ql_dbg_io, fcport->vha, 0x3021,
-		    "Port to be marked lost on fcport=%02x%02x%02x, current "
-		    "port state= %s.\n", fcport->d_id.b.domain,
-		    fcport->d_id.b.area, fcport->d_id.b.al_pa,
-		    port_state_str[atomic_read(&fcport->state)]);
+		if (atomic_read(&fcport->state) == FCS_ONLINE) {
+			ql_dbg(ql_dbg_disc, fcport->vha, 0x3021,
+				"Port to be marked lost on fcport=%02x%02x%02x, current "
+				"port state= %s comp_status %x.\n", fcport->d_id.b.domain,
+				fcport->d_id.b.area, fcport->d_id.b.al_pa,
+				port_state_str[atomic_read(&fcport->state)],
+				comp_status);
+
+			if (no_logout)
+				fcport->logout_on_delete = 0;
 
-		if (atomic_read(&fcport->state) == FCS_ONLINE)
 			qla2x00_mark_device_lost(fcport->vha, fcport, 1, 1);
+			qlt_schedule_sess_for_deletion_lock(fcport);
+		}
+
 		break;
 
 	case CS_ABORTED:
@@ -2627,10 +2734,16 @@ void qla24xx_process_response_queue(struct scsi_qla_host *vha,
 			}
 		case ABTS_RESP_24XX:
 		case CTIO_TYPE7:
-		case NOTIFY_ACK_TYPE:
 		case CTIO_CRC2:
 			qlt_response_pkt_all_vps(vha, (response_t *)pkt);
 			break;
+		case NOTIFY_ACK_TYPE:
+			if (pkt->handle == QLA_TGT_SKIP_HANDLE)
+				qlt_response_pkt_all_vps(vha, (response_t *)pkt);
+			else
+				qla24xxx_nack_iocb_entry(vha, rsp->req,
+					(struct nack_to_isp *)pkt);
+			break;
 		case MARKER_TYPE:
 			/* Do nothing in this case, this check is to prevent it
 			 * from falling into default case
@@ -2640,6 +2753,10 @@ void qla24xx_process_response_queue(struct scsi_qla_host *vha,
 			qla24xx_abort_iocb_entry(vha, rsp->req,
 			    (struct abort_entry_24xx *)pkt);
 			break;
+		case MBX_IOCB_TYPE:
+			qla24xx_mbx_iocb_entry(vha, rsp->req,
+			    (struct mbx_24xx_entry *)pkt);
+			break;
 		default:
 			/* Type Not Supported. */
 			ql_dbg(ql_dbg_async, vha, 0x5042,
@@ -2656,8 +2773,9 @@ void qla24xx_process_response_queue(struct scsi_qla_host *vha,
 	if (IS_P3P_TYPE(ha)) {
 		struct device_reg_82xx __iomem *reg = &ha->iobase->isp82;
 		WRT_REG_DWORD(&reg->rsp_q_out[0], rsp->ring_index);
-	} else
+	} else {
 		WRT_REG_DWORD(rsp->rsp_q_out, rsp->ring_index);
+	}
 }
 
 static void
diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c
index 67f64db390b0cd..64f04aa2a503af 100644
--- a/drivers/scsi/qla2xxx/qla_mbx.c
+++ b/drivers/scsi/qla2xxx/qla_mbx.c
@@ -1637,94 +1637,6 @@ qla2x00_init_firmware(scsi_qla_host_t *vha, uint16_t size)
 	return rval;
 }
 
-/*
- * qla2x00_get_node_name_list
- *      Issue get node name list mailbox command, kmalloc()
- *      and return the resulting list. Caller must kfree() it!
- *
- * Input:
- *      ha = adapter state pointer.
- *      out_data = resulting list
- *      out_len = length of the resulting list
- *
- * Returns:
- *      qla2x00 local function return status code.
- *
- * Context:
- *      Kernel context.
- */
-int
-qla2x00_get_node_name_list(scsi_qla_host_t *vha, void **out_data, int *out_len)
-{
-	struct qla_hw_data *ha = vha->hw;
-	struct qla_port_24xx_data *list = NULL;
-	void *pmap;
-	mbx_cmd_t mc;
-	dma_addr_t pmap_dma;
-	ulong dma_size;
-	int rval, left;
-
-	left = 1;
-	while (left > 0) {
-		dma_size = left * sizeof(*list);
-		pmap = dma_alloc_coherent(&ha->pdev->dev, dma_size,
-					 &pmap_dma, GFP_KERNEL);
-		if (!pmap) {
-			ql_log(ql_log_warn, vha, 0x113f,
-			    "%s(%ld): DMA Alloc failed of %ld\n",
-			    __func__, vha->host_no, dma_size);
-			rval = QLA_MEMORY_ALLOC_FAILED;
-			goto out;
-		}
-
-		mc.mb[0] = MBC_PORT_NODE_NAME_LIST;
-		mc.mb[1] = BIT_1 | BIT_3;
-		mc.mb[2] = MSW(pmap_dma);
-		mc.mb[3] = LSW(pmap_dma);
-		mc.mb[6] = MSW(MSD(pmap_dma));
-		mc.mb[7] = LSW(MSD(pmap_dma));
-		mc.mb[8] = dma_size;
-		mc.out_mb = MBX_0|MBX_1|MBX_2|MBX_3|MBX_6|MBX_7|MBX_8;
-		mc.in_mb = MBX_0|MBX_1;
-		mc.tov = 30;
-		mc.flags = MBX_DMA_IN;
-
-		rval = qla2x00_mailbox_command(vha, &mc);
-		if (rval != QLA_SUCCESS) {
-			if ((mc.mb[0] == MBS_COMMAND_ERROR) &&
-			    (mc.mb[1] == 0xA)) {
-				left += le16_to_cpu(mc.mb[2]) /
-				    sizeof(struct qla_port_24xx_data);
-				goto restart;
-			}
-			goto out_free;
-		}
-
-		left = 0;
-
-		list = kmemdup(pmap, dma_size, GFP_KERNEL);
-		if (!list) {
-			ql_log(ql_log_warn, vha, 0x1140,
-			    "%s(%ld): failed to allocate node names list "
-			    "structure.\n", __func__, vha->host_no);
-			rval = QLA_MEMORY_ALLOC_FAILED;
-			goto out_free;
-		}
-
-restart:
-		dma_free_coherent(&ha->pdev->dev, dma_size, pmap, pmap_dma);
-	}
-
-	*out_data = list;
-	*out_len = dma_size;
-
-out:
-	return rval;
-
-out_free:
-	dma_free_coherent(&ha->pdev->dev, dma_size, pmap, pmap_dma);
-	return rval;
-}
 
 /*
  * qla2x00_get_port_database
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 62ca1ddb0cc717..9c4699623410d2 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -1043,6 +1043,34 @@ qla2x00_wait_for_hba_online(scsi_qla_host_t *vha)
 	return (return_status);
 }
 
+static inline int test_fcport_count(scsi_qla_host_t *vha)
+{
+	struct qla_hw_data *ha = vha->hw;
+	unsigned long flags;
+	int res;
+
+	spin_lock_irqsave(&ha->tgt.sess_lock, flags);
+	ql_dbg(ql_dbg_init, vha, 0xffff,
+		"tgt %p, fcport_count=%d\n",
+		vha, vha->fcport_count);
+	res = (vha->fcport_count == 0);
+	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
+
+	return res;
+}
+
+/*
+ * qla2x00_wait_for_sess_deletion can only be called from remove_one.
+ * it has dependency on UNLOADING flag to stop device discovery
+ */
+static void
+qla2x00_wait_for_sess_deletion(scsi_qla_host_t *vha)
+{
+	qla2x00_mark_all_devices_lost(vha, 0);
+
+	wait_event(vha->fcport_waitQ, test_fcport_count(vha));
+}
+
 /*
  * qla2x00_wait_for_hba_ready
  * Wait till the HBA is ready before doing driver unload
@@ -2904,6 +2932,18 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (ret)
 		goto probe_init_failed;
 
+	base_vha->gnl.size = (ha->max_loop_id + 1) *
+		sizeof(struct get_name_list_extended);
+	base_vha->gnl.l = dma_alloc_coherent(&ha->pdev->dev,
+		base_vha->gnl.size, &base_vha->gnl.ldma, GFP_KERNEL);
+	INIT_LIST_HEAD(&base_vha->gnl.fcports);
+
+	if (base_vha->gnl.l == NULL) {
+		ql_log(ql_log_fatal, base_vha, 0xffff,
+			"Alloc failed for name list.\n");
+		goto probe_init_failed;
+	}
+
 	/* Alloc arrays of request and response ring ptrs */
 	if (!qla2x00_alloc_queues(ha, req, rsp)) {
 		ql_log(ql_log_fatal, base_vha, 0x003d,
@@ -3123,7 +3163,8 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	ql_dbg(ql_dbg_init, base_vha, 0x00f2,
 	    "Init done and hba is online.\n");
 
-	if (qla_ini_mode_enabled(base_vha))
+	if (qla_ini_mode_enabled(base_vha) ||
+		qla_dual_mode_enabled(base_vha))
 		scsi_scan_host(host);
 	else
 		ql_dbg(ql_dbg_init, base_vha, 0x0122,
@@ -3372,21 +3413,26 @@ qla2x00_remove_one(struct pci_dev *pdev)
 	 * resources.
 	 */
 	if (!atomic_read(&pdev->enable_cnt)) {
+		dma_free_coherent(&ha->pdev->dev, base_vha->gnl.size,
+		    base_vha->gnl.l, base_vha->gnl.ldma);
+
 		scsi_host_put(base_vha->host);
 		kfree(ha);
 		pci_set_drvdata(pdev, NULL);
 		return;
 	}
-
 	qla2x00_wait_for_hba_ready(base_vha);
 
-	/* if UNLOAD flag is already set, then continue unload,
+	/*
+	 * if UNLOAD flag is already set, then continue unload,
 	 * where it was set first.
 	 */
 	if (test_bit(UNLOADING, &base_vha->dpc_flags))
 		return;
 
 	set_bit(UNLOADING, &base_vha->dpc_flags);
+	dma_free_coherent(&ha->pdev->dev,
+		base_vha->gnl.size, base_vha->gnl.l, base_vha->gnl.ldma);
 
 	if (IS_QLAFX00(ha))
 		qlafx00_driver_shutdown(base_vha, 20);
@@ -3535,10 +3581,14 @@ qla2x00_schedule_rport_del(struct scsi_qla_host *vha, fc_port_t *fcport,
 		qla2xxx_wake_dpc(base_vha);
 	} else {
 		int now;
-		if (rport)
+		if (rport) {
+			ql_dbg(ql_dbg_disc, fcport->vha, 0xffff,
+				"%s %8phN. rport %p roles %x \n",
+				__func__, fcport->port_name, rport,
+				rport->roles);
 			fc_remote_port_delete(rport);
+		}
 		qlt_do_generation_tick(vha, &now);
-		qlt_fc_port_deleted(vha, fcport, now);
 	}
 }
 
@@ -3581,7 +3631,7 @@ void qla2x00_mark_device_lost(scsi_qla_host_t *vha, fc_port_t *fcport,
 		fcport->login_retry = vha->hw->login_retry_count;
 
 		ql_dbg(ql_dbg_disc, vha, 0x2067,
-		    "Port login retry %8phN, id = 0x%04x retry cnt=%d.\n",
+		    "Port login retry %8phN, lid 0x%04x retry cnt=%d.\n",
 		    fcport->port_name, fcport->loop_id, fcport->login_retry);
 	}
 }
@@ -3604,7 +3654,13 @@ qla2x00_mark_all_devices_lost(scsi_qla_host_t *vha, int defer)
 {
 	fc_port_t *fcport;
 
+	ql_dbg(ql_dbg_disc, vha, 0xffff,
+		   "Mark all dev lost\n");
+
 	list_for_each_entry(fcport, &vha->vp_fcports, list) {
+		fcport->scan_state = 0;
+		qlt_schedule_sess_for_deletion_lock(fcport);
+
 		if (vha->vp_idx != 0 && vha->vp_idx != fcport->vha->vp_idx)
 			continue;
 
@@ -4219,6 +4275,7 @@ struct scsi_qla_host *qla2x00_create_host(struct scsi_host_template *sht,
 
 	spin_lock_init(&vha->work_lock);
 	spin_lock_init(&vha->cmd_list_lock);
+	init_waitqueue_head(&vha->fcport_waitQ);
 
 	sprintf(vha->host_str, "%s_%ld", QLA2XXX_DRIVER_NAME, vha->host_no);
 	ql_dbg(ql_dbg_init, vha, 0x0041,
@@ -4232,7 +4289,7 @@ struct scsi_qla_host *qla2x00_create_host(struct scsi_host_template *sht,
 	return vha;
 }
 
-static struct qla_work_evt *
+struct qla_work_evt *
 qla2x00_alloc_work(struct scsi_qla_host *vha, enum qla_work_type type)
 {
 	struct qla_work_evt *e;
@@ -4254,7 +4311,7 @@ qla2x00_alloc_work(struct scsi_qla_host *vha, enum qla_work_type type)
 	return e;
 }
 
-static int
+int
 qla2x00_post_work(struct scsi_qla_host *vha, struct qla_work_evt *e)
 {
 	unsigned long flags;
@@ -4315,7 +4372,6 @@ int qla2x00_post_async_##name##_work(		\
 }
 
 qla2x00_post_async_work(login, QLA_EVT_ASYNC_LOGIN);
-qla2x00_post_async_work(login_done, QLA_EVT_ASYNC_LOGIN_DONE);
 qla2x00_post_async_work(logout, QLA_EVT_ASYNC_LOGOUT);
 qla2x00_post_async_work(logout_done, QLA_EVT_ASYNC_LOGOUT_DONE);
 qla2x00_post_async_work(adisc, QLA_EVT_ASYNC_ADISC);
@@ -4368,6 +4424,67 @@ qlafx00_post_aenfx_work(struct scsi_qla_host *vha,  uint32_t evtcode,
 	return qla2x00_post_work(vha, e);
 }
 
+int qla24xx_post_upd_fcport_work(struct scsi_qla_host *vha, fc_port_t *fcport)
+{
+	struct qla_work_evt *e;
+
+	e = qla2x00_alloc_work(vha, QLA_EVT_UPD_FCPORT);
+	if (!e)
+		return QLA_FUNCTION_FAILED;
+
+	e->u.fcport.fcport = fcport;
+	return qla2x00_post_work(vha, e);
+}
+
+static
+void qla24xx_create_new_sess(struct scsi_qla_host *vha, struct qla_work_evt *e)
+{
+	unsigned long flags;
+	fc_port_t *fcport =  NULL;
+	struct qlt_plogi_ack_t *pla =
+	    (struct qlt_plogi_ack_t *)e->u.new_sess.pla;
+
+	spin_lock_irqsave(&vha->hw->tgt.sess_lock, flags);
+	fcport = qla2x00_find_fcport_by_wwpn(vha, e->u.new_sess.port_name, 1);
+	if (fcport) {
+		fcport->d_id = e->u.new_sess.id;
+		if (pla) {
+			fcport->fw_login_state = DSC_LS_PLOGI_PEND;
+			qlt_plogi_ack_link(vha, pla, fcport, QLT_PLOGI_LINK_SAME_WWN);
+			/* we took an extra ref_count to prevent PLOGI ACK when
+			 * fcport/sess has not been created.
+			 */
+			pla->ref_count--;
+		}
+	} else {
+		fcport = qla2x00_alloc_fcport(vha, GFP_KERNEL);
+		if (fcport) {
+			fcport->d_id = e->u.new_sess.id;
+			fcport->scan_state = QLA_FCPORT_FOUND;
+			fcport->flags |= FCF_FABRIC_DEVICE;
+			fcport->fw_login_state = DSC_LS_PLOGI_PEND;
+
+			memcpy(fcport->port_name, e->u.new_sess.port_name,
+			    WWN_SIZE);
+			list_add_tail(&fcport->list, &vha->vp_fcports);
+
+			if (pla) {
+				qlt_plogi_ack_link(vha, pla, fcport,
+				    QLT_PLOGI_LINK_SAME_WWN);
+				pla->ref_count--;
+			}
+		}
+	}
+	spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
+
+	if (fcport) {
+		if (pla)
+			qlt_plogi_ack_unref(vha, pla);
+		else
+			qla24xx_async_gnl(vha, fcport);
+	}
+}
+
 void
 qla2x00_do_work(struct scsi_qla_host *vha)
 {
@@ -4394,10 +4511,6 @@ qla2x00_do_work(struct scsi_qla_host *vha)
 			qla2x00_async_login(vha, e->u.logio.fcport,
 			    e->u.logio.data);
 			break;
-		case QLA_EVT_ASYNC_LOGIN_DONE:
-			qla2x00_async_login_done(vha, e->u.logio.fcport,
-			    e->u.logio.data);
-			break;
 		case QLA_EVT_ASYNC_LOGOUT:
 			qla2x00_async_logout(vha, e->u.logio.fcport);
 			break;
@@ -4419,6 +4532,34 @@ qla2x00_do_work(struct scsi_qla_host *vha)
 		case QLA_EVT_AENFX:
 			qlafx00_process_aen(vha, e);
 			break;
+		case QLA_EVT_GIDPN:
+			qla24xx_async_gidpn(vha, e->u.fcport.fcport);
+			break;
+		case QLA_EVT_GPNID:
+			qla24xx_async_gpnid(vha, &e->u.gpnid.id);
+			break;
+		case QLA_EVT_GPNID_DONE:
+			qla24xx_async_gpnid_done(vha, e->u.iosb.sp);
+			break;
+		case QLA_EVT_NEW_SESS:
+			qla24xx_create_new_sess(vha, e);
+			break;
+		case QLA_EVT_GPDB:
+			qla24xx_async_gpdb(vha, e->u.fcport.fcport,
+			    e->u.fcport.opt);
+			break;
+		case QLA_EVT_GPSC:
+			qla24xx_async_gpsc(vha, e->u.fcport.fcport);
+			break;
+		case QLA_EVT_UPD_FCPORT:
+			qla2x00_update_fcport(vha, e->u.fcport.fcport);
+			break;
+		case QLA_EVT_GNL:
+			qla24xx_async_gnl(vha, e->u.fcport.fcport);
+			break;
+		case QLA_EVT_NACK:
+			qla24xx_do_nack_work(vha, e);
+			break;
 		}
 		if (e->flags & QLA_EVT_FLAG_FREE)
 			kfree(e);
@@ -4435,9 +4576,7 @@ void qla2x00_relogin(struct scsi_qla_host *vha)
 {
 	fc_port_t       *fcport;
 	int status;
-	uint16_t        next_loopid = 0;
-	struct qla_hw_data *ha = vha->hw;
-	uint16_t data[2];
+	struct event_arg ea;
 
 	list_for_each_entry(fcport, &vha->vp_fcports, list) {
 	/*
@@ -4448,77 +4587,38 @@ void qla2x00_relogin(struct scsi_qla_host *vha)
 		    fcport->login_retry && !(fcport->flags & FCF_ASYNC_SENT)) {
 			fcport->login_retry--;
 			if (fcport->flags & FCF_FABRIC_DEVICE) {
-				if (fcport->flags & FCF_FCP2_DEVICE)
-					ha->isp_ops->fabric_logout(vha,
-							fcport->loop_id,
-							fcport->d_id.b.domain,
-							fcport->d_id.b.area,
-							fcport->d_id.b.al_pa);
-
-				if (fcport->loop_id == FC_NO_LOOP_ID) {
-					fcport->loop_id = next_loopid =
-					    ha->min_external_loopid;
-					status = qla2x00_find_new_loop_id(
-					    vha, fcport);
-					if (status != QLA_SUCCESS) {
-						/* Ran out of IDs to use */
-						break;
-					}
-				}
-
-				if (IS_ALOGIO_CAPABLE(ha)) {
-					fcport->flags |= FCF_ASYNC_SENT;
-					data[0] = 0;
-					data[1] = QLA_LOGIO_LOGIN_RETRIED;
-					status = qla2x00_post_async_login_work(
-					    vha, fcport, data);
-					if (status == QLA_SUCCESS)
-						continue;
-					/* Attempt a retry. */
-					status = 1;
-				} else {
-					status = qla2x00_fabric_login(vha,
-					    fcport, &next_loopid);
-					if (status ==  QLA_SUCCESS) {
-						int status2;
-						uint8_t opts;
-
-						opts = 0;
-						if (fcport->flags &
-						    FCF_FCP2_DEVICE)
-							opts |= BIT_1;
-						status2 =
-						    qla2x00_get_port_database(
-							vha, fcport, opts);
-						if (status2 != QLA_SUCCESS)
-							status = 1;
-					}
-				}
-			} else
+				ql_dbg(ql_dbg_disc, fcport->vha, 0xffff,
+				    "%s %8phC DS %d LS %d\n", __func__,
+				    fcport->port_name, fcport->disc_state,
+				    fcport->fw_login_state);
+				memset(&ea, 0, sizeof(ea));
+				ea.event = FCME_RELOGIN;
+				ea.fcport = fcport;
+				qla2x00_fcport_event_handler(vha, &ea);
+			} else {
 				status = qla2x00_local_device_login(vha,
 								fcport);
+				if (status == QLA_SUCCESS) {
+					fcport->old_loop_id = fcport->loop_id;
+					ql_dbg(ql_dbg_disc, vha, 0x2003,
+					    "Port login OK: logged in ID 0x%x.\n",
+					    fcport->loop_id);
+					qla2x00_update_fcport(vha, fcport);
+				} else if (status == 1) {
+					set_bit(RELOGIN_NEEDED, &vha->dpc_flags);
+					/* retry the login again */
+					ql_dbg(ql_dbg_disc, vha, 0x2007,
+					    "Retrying %d login again loop_id 0x%x.\n",
+					    fcport->login_retry,
+					    fcport->loop_id);
+				} else {
+					fcport->login_retry = 0;
+				}
 
-			if (status == QLA_SUCCESS) {
-				fcport->old_loop_id = fcport->loop_id;
-
-				ql_dbg(ql_dbg_disc, vha, 0x2003,
-				    "Port login OK: logged in ID 0x%x.\n",
-				    fcport->loop_id);
-
-				qla2x00_update_fcport(vha, fcport);
-
-			} else if (status == 1) {
-				set_bit(RELOGIN_NEEDED, &vha->dpc_flags);
-				/* retry the login again */
-				ql_dbg(ql_dbg_disc, vha, 0x2007,
-				    "Retrying %d login again loop_id 0x%x.\n",
-				    fcport->login_retry, fcport->loop_id);
-			} else {
-				fcport->login_retry = 0;
+				if (fcport->login_retry == 0 &&
+				    status != QLA_SUCCESS)
+					qla2x00_clear_loop_id(fcport);
 			}
-
-			if (fcport->login_retry == 0 && status != QLA_SUCCESS)
-				qla2x00_clear_loop_id(fcport);
 		}
 		if (test_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags))
 			break;
@@ -5182,7 +5282,8 @@ qla2x00_disable_board_on_pci_error(struct work_struct *work)
 	struct pci_dev *pdev = ha->pdev;
 	scsi_qla_host_t *base_vha = pci_get_drvdata(ha->pdev);
 
-	/* if UNLOAD flag is already set, then continue unload,
+	/*
+	 * if UNLOAD flag is already set, then continue unload,
 	 * where it was set first.
 	 */
 	if (test_bit(UNLOADING, &base_vha->dpc_flags))
@@ -5191,6 +5292,8 @@ qla2x00_disable_board_on_pci_error(struct work_struct *work)
 	ql_log(ql_log_warn, base_vha, 0x015b,
 	    "Disabling adapter.\n");
 
+	qla2x00_wait_for_sess_deletion(base_vha);
+
 	set_bit(UNLOADING, &base_vha->dpc_flags);
 
 	qla2x00_delete_all_vps(ha, base_vha);
diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index 40b61a327786dc..e4d7d32c82e74c 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -118,6 +118,9 @@ static void qlt_send_notify_ack(struct scsi_qla_host *vha,
 	uint16_t srr_flags, uint16_t srr_reject_code, uint8_t srr_explan);
 static void qlt_send_term_imm_notif(struct scsi_qla_host *vha,
 	struct imm_ntfy_from_isp *imm, int ha_locked);
+static struct fc_port *qlt_create_sess(struct scsi_qla_host *vha,
+	fc_port_t *fcport, bool local);
+void qlt_unreg_sess(struct fc_port *sess);
 /*
  * Global Variables
  */
@@ -378,6 +381,247 @@ void qlt_response_pkt_all_vps(struct scsi_qla_host *vha, response_t *pkt)
 /*
  * All qlt_plogi_ack_t operations are protected by hardware_lock
  */
+static int qla24xx_post_nack_work(struct scsi_qla_host *vha, fc_port_t *fcport,
+	struct imm_ntfy_from_isp *ntfy, int type)
+{
+	struct qla_work_evt *e;
+	e = qla2x00_alloc_work(vha, QLA_EVT_NACK);
+	if (!e)
+		return QLA_FUNCTION_FAILED;
+
+	e->u.nack.fcport = fcport;
+	e->u.nack.type = type;
+	memcpy(e->u.nack.iocb, ntfy, sizeof(struct imm_ntfy_from_isp));
+	return qla2x00_post_work(vha, e);
+}
+
+static
+void qla2x00_async_nack_sp_done(void *v, void *s, int res)
+{
+	struct scsi_qla_host *vha = (struct scsi_qla_host *)v;
+	struct srb *sp = (struct srb *)s;
+	unsigned long flags;
+
+	ql_dbg(ql_dbg_disc, vha, 0xffff,
+		"Async done-%s res %x %8phC  type %d\n",
+		sp->name, res, sp->fcport->port_name, sp->type);
+
+	spin_lock_irqsave(&vha->hw->tgt.sess_lock, flags);
+	sp->fcport->flags &= ~FCF_ASYNC_SENT;
+	sp->fcport->chip_reset = vha->hw->chip_reset;
+
+	switch (sp->type) {
+	case SRB_NACK_PLOGI:
+		sp->fcport->login_gen++;
+		sp->fcport->fw_login_state = DSC_LS_PLOGI_COMP;
+		sp->fcport->logout_on_delete = 1;
+		break;
+
+	case SRB_NACK_PRLI:
+		sp->fcport->fw_login_state = DSC_LS_PRLI_COMP;
+		sp->fcport->deleted = 0;
+
+		if (!sp->fcport->login_succ &&
+		    !IS_SW_RESV_ADDR(sp->fcport->d_id)) {
+			sp->fcport->login_succ = 1;
+
+			vha->fcport_count++;
+
+			if (!IS_IIDMA_CAPABLE(vha->hw) ||
+			    !vha->hw->flags.gpsc_supported) {
+				ql_dbg(ql_dbg_disc, vha, 0xffff,
+					"%s %d %8phC post upd_fcport fcp_cnt %d\n",
+					__func__, __LINE__,
+					sp->fcport->port_name,
+					vha->fcport_count);
+
+				qla24xx_post_upd_fcport_work(vha, sp->fcport);
+			} else {
+				ql_dbg(ql_dbg_disc, vha, 0xffff,
+					"%s %d %8phC post gpsc fcp_cnt %d\n",
+					__func__, __LINE__,
+					sp->fcport->port_name,
+					vha->fcport_count);
+
+				qla24xx_post_gpsc_work(vha, sp->fcport);
+			}
+		}
+		break;
+
+	case SRB_NACK_LOGO:
+		sp->fcport->login_gen++;
+		sp->fcport->fw_login_state = DSC_LS_PORT_UNAVAIL;
+		qlt_logo_completion_handler(sp->fcport, MBS_COMMAND_COMPLETE);
+		break;
+	}
+	spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
+
+	sp->free(vha, sp);
+}
+
+int qla24xx_async_notify_ack(scsi_qla_host_t *vha, fc_port_t *fcport,
+	struct imm_ntfy_from_isp *ntfy, int type)
+{
+	int rval = QLA_FUNCTION_FAILED;
+	srb_t *sp;
+	char *c = NULL;
+
+	fcport->flags |= FCF_ASYNC_SENT;
+	switch (type) {
+	case SRB_NACK_PLOGI:
+		fcport->fw_login_state = DSC_LS_PLOGI_PEND;
+		c = "PLOGI";
+		break;
+	case SRB_NACK_PRLI:
+		fcport->fw_login_state = DSC_LS_PRLI_PEND;
+		c = "PRLI";
+		break;
+	case SRB_NACK_LOGO:
+		fcport->fw_login_state = DSC_LS_LOGO_PEND;
+		c = "LOGO";
+		break;
+	}
+
+	sp = qla2x00_get_sp(vha, fcport, GFP_ATOMIC);
+	if (!sp)
+		goto done;
+
+	sp->type = type;
+	sp->name = "nack";
+
+	qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha)+2);
+
+	sp->u.iocb_cmd.u.nack.ntfy = ntfy;
+
+	sp->done = qla2x00_async_nack_sp_done;
+
+	rval = qla2x00_start_sp(sp);
+	if (rval != QLA_SUCCESS)
+		goto done_free_sp;
+
+	ql_dbg(ql_dbg_disc, vha, 0xffff,
+		"Async-%s %8phC hndl %x %s\n",
+		sp->name, fcport->port_name, sp->handle, c);
+
+	return rval;
+
+done_free_sp:
+	sp->free(vha, sp);
+done:
+	fcport->flags &= ~FCF_ASYNC_SENT;
+	return rval;
+}
+
+void qla24xx_do_nack_work(struct scsi_qla_host *vha, struct qla_work_evt *e)
+{
+	fc_port_t *t;
+	unsigned long flags;
+
+	switch (e->u.nack.type) {
+	case SRB_NACK_PRLI:
+		mutex_lock(&vha->vha_tgt.tgt_mutex);
+		t = qlt_create_sess(vha, e->u.nack.fcport, 0);
+		mutex_unlock(&vha->vha_tgt.tgt_mutex);
+		if (t) {
+			ql_log(ql_log_info, vha, 0xffff,
+			    "%s create sess success %p", __func__, t);
+			spin_lock_irqsave(&vha->hw->tgt.sess_lock, flags);
+			/* create sess has an extra kref */
+			vha->hw->tgt.tgt_ops->put_sess(e->u.nack.fcport);
+			spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
+		}
+		break;
+	}
+	qla24xx_async_notify_ack(vha, e->u.nack.fcport,
+	    (struct imm_ntfy_from_isp*)e->u.nack.iocb, e->u.nack.type);
+}
+
+void qla24xx_delete_sess_fn(struct work_struct *work)
+{
+	fc_port_t *fcport = container_of(work, struct fc_port, del_work);
+	struct qla_hw_data *ha = fcport->vha->hw;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ha->tgt.sess_lock, flags);
+
+	if (fcport->se_sess) {
+		ha->tgt.tgt_ops->shutdown_sess(fcport);
+		ha->tgt.tgt_ops->put_sess(fcport);
+	} else {
+		qlt_unreg_sess(fcport);
+	}
+	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
+}
+
+/*
+ * Called from qla2x00_reg_remote_port()
+ */
+void qlt_fc_port_added(struct scsi_qla_host *vha, fc_port_t *fcport)
+{
+	struct qla_hw_data *ha = vha->hw;
+	struct qla_tgt *tgt = vha->vha_tgt.qla_tgt;
+	struct fc_port *sess = fcport;
+	unsigned long flags;
+
+	if (!vha->hw->tgt.tgt_ops)
+		return;
+
+	spin_lock_irqsave(&ha->tgt.sess_lock, flags);
+	if (tgt->tgt_stop) {
+		spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
+		return;
+	}
+
+	if (fcport->disc_state == DSC_DELETE_PEND) {
+		spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
+		return;
+	}
+
+	if (!sess->se_sess) {
+		spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
+
+		mutex_lock(&vha->vha_tgt.tgt_mutex);
+		sess = qlt_create_sess(vha, fcport, false);
+		mutex_unlock(&vha->vha_tgt.tgt_mutex);
+
+		spin_lock_irqsave(&ha->tgt.sess_lock, flags);
+	} else {
+		if (fcport->fw_login_state == DSC_LS_PRLI_COMP) {
+			spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
+			return;
+		}
+
+		if (!kref_get_unless_zero(&sess->sess_kref)) {
+			ql_dbg(ql_dbg_disc, vha, 0xffff,
+			    "%s: kref_get fail sess %8phC \n",
+			    __func__, sess->port_name);
+			spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
+			return;
+		}
+
+		ql_dbg(ql_dbg_tgt_mgt, vha, 0xf04c,
+		    "qla_target(%u): %ssession for port %8phC "
+		    "(loop ID %d) reappeared\n", vha->vp_idx,
+		    sess->local ? "local " : "", sess->port_name, sess->loop_id);
+
+		ql_dbg(ql_dbg_tgt_mgt, vha, 0xf007,
+		    "Reappeared sess %p\n", sess);
+
+		ha->tgt.tgt_ops->update_sess(sess, fcport->d_id,
+		    fcport->loop_id,
+		    (fcport->flags & FCF_CONF_COMP_SUPPORTED));
+	}
+
+	if (sess && sess->local) {
+		ql_dbg(ql_dbg_tgt_mgt, vha, 0xf04d,
+		    "qla_target(%u): local session for "
+		    "port %8phC (loop ID %d) became global\n", vha->vp_idx,
+		    fcport->port_name, sess->loop_id);
+		sess->local = 0;
+	}
+	ha->tgt.tgt_ops->put_sess(sess);
+	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
+}
 
 /*
  * This is a zero-base ref-counting solution, since hardware_lock
@@ -413,30 +657,51 @@ qlt_plogi_ack_find_add(struct scsi_qla_host *vha, port_id_t *id,
 	return pla;
 }
 
-static void qlt_plogi_ack_unref(struct scsi_qla_host *vha,
+void qlt_plogi_ack_unref(struct scsi_qla_host *vha,
     struct qlt_plogi_ack_t *pla)
 {
 	struct imm_ntfy_from_isp *iocb = &pla->iocb;
+	port_id_t port_id;
+	uint16_t loop_id;
+	fc_port_t *fcport = pla->fcport;
+
 	BUG_ON(!pla->ref_count);
 	pla->ref_count--;
 
 	if (pla->ref_count)
 		return;
 
-	ql_dbg(ql_dbg_async, vha, 0x5089,
+	ql_dbg(ql_dbg_disc, vha, 0x5089,
 	    "Sending PLOGI ACK to wwn %8phC s_id %02x:%02x:%02x loop_id %#04x"
 	    " exch %#x ox_id %#x\n", iocb->u.isp24.port_name,
 	    iocb->u.isp24.port_id[2], iocb->u.isp24.port_id[1],
 	    iocb->u.isp24.port_id[0],
 	    le16_to_cpu(iocb->u.isp24.nport_handle),
 	    iocb->u.isp24.exchange_address, iocb->ox_id);
-	qlt_send_notify_ack(vha, iocb, 0, 0, 0, 0, 0, 0);
+
+	port_id.b.domain = iocb->u.isp24.port_id[2];
+	port_id.b.area   = iocb->u.isp24.port_id[1];
+	port_id.b.al_pa  = iocb->u.isp24.port_id[0];
+	port_id.b.rsvd_1 = 0;
+
+	loop_id = le16_to_cpu(iocb->u.isp24.nport_handle);
+
+	fcport->loop_id = loop_id;
+	fcport->d_id = port_id;
+	qla24xx_post_nack_work(vha, fcport, iocb, SRB_NACK_PLOGI);
+
+	list_for_each_entry(fcport, &vha->vp_fcports, list) {
+		if (fcport->plogi_link[QLT_PLOGI_LINK_SAME_WWN] == pla)
+			fcport->plogi_link[QLT_PLOGI_LINK_SAME_WWN] = NULL;
+		if (fcport->plogi_link[QLT_PLOGI_LINK_CONFLICT] == pla)
+			fcport->plogi_link[QLT_PLOGI_LINK_CONFLICT] = NULL;
+	}
 
 	list_del(&pla->list);
 	kmem_cache_free(qla_tgt_plogi_cachep, pla);
 }
 
-static void
+void
 qlt_plogi_ack_link(struct scsi_qla_host *vha, struct qlt_plogi_ack_t *pla,
     struct fc_port *sess, enum qlt_plogi_link_t link)
 {
@@ -444,15 +709,19 @@ qlt_plogi_ack_link(struct scsi_qla_host *vha, struct qlt_plogi_ack_t *pla,
 	/* Inc ref_count first because link might already be pointing at pla */
 	pla->ref_count++;
 
+	ql_dbg(ql_dbg_tgt_mgt, vha, 0xf097,
+		"Linking sess %p [%d] wwn %8phC with PLOGI ACK to wwn %8phC"
+		" s_id %02x:%02x:%02x, ref=%d pla %p link %d\n",
+		sess, link, sess->port_name,
+		iocb->u.isp24.port_name, iocb->u.isp24.port_id[2],
+		iocb->u.isp24.port_id[1], iocb->u.isp24.port_id[0],
+		pla->ref_count, pla, link);
+
 	if (sess->plogi_link[link])
 		qlt_plogi_ack_unref(vha, sess->plogi_link[link]);
 
-	ql_dbg(ql_dbg_tgt_mgt, vha, 0xf097,
-	    "Linking sess %p [%d] wwn %8phC with PLOGI ACK to wwn %8phC"
-	    " s_id %02x:%02x:%02x, ref=%d\n", sess, link, sess->port_name,
-	    iocb->u.isp24.port_name, iocb->u.isp24.port_id[2],
-	    iocb->u.isp24.port_id[1], iocb->u.isp24.port_id[0],
-	    pla->ref_count);
+	if (link == QLT_PLOGI_LINK_SAME_WWN)
+		pla->fcport = sess;
 
 	sess->plogi_link[link] = pla;
 }
@@ -512,7 +781,7 @@ static void qlt_free_session_done(struct work_struct *work)
 	struct qla_hw_data *ha = vha->hw;
 	unsigned long flags;
 	bool logout_started = false;
-	fc_port_t fcport;
+	struct event_arg ea;
 
 	ql_dbg(ql_dbg_tgt_mgt, vha, 0xf084,
 		"%s: se_sess %p / sess %p from port %8phC loop_id %#04x"
@@ -522,8 +791,8 @@ static void qlt_free_session_done(struct work_struct *work)
 		sess->logout_on_delete, sess->keep_nport_handle,
 		sess->send_els_logo);
 
-	BUG_ON(!tgt);
 
+	if (!IS_SW_RESV_ADDR(sess->d_id)) {
 	if (sess->send_els_logo) {
 		qlt_port_logo_t logo;
 		logo.id = sess->d_id;
@@ -533,14 +802,7 @@ static void qlt_free_session_done(struct work_struct *work)
 
 	if (sess->logout_on_delete) {
 		int rc;
-
-		memset(&fcport, 0, sizeof(fcport));
-		fcport.loop_id = sess->loop_id;
-		fcport.d_id = sess->d_id;
-		memcpy(fcport.port_name, sess->port_name, WWN_SIZE);
-		fcport.vha = vha;
-
-		rc = qla2x00_post_async_logout_work(vha, &fcport, NULL);
+		rc = qla2x00_post_async_logout_work(vha, sess, NULL);
 		if (rc != QLA_SUCCESS)
 			ql_log(ql_log_warn, vha, 0xf085,
 			       "Schedule logo failed sess %p rc %d\n",
@@ -548,6 +810,7 @@ static void qlt_free_session_done(struct work_struct *work)
 		else
 			logout_started = true;
 	}
+	}
 
 	/*
 	 * Release the target session for FC Nexus from fabric module code.
@@ -568,12 +831,38 @@ static void qlt_free_session_done(struct work_struct *work)
 			msleep(100);
 		}
 
-		ql_dbg(ql_dbg_tgt_mgt, vha, 0xf087,
+		ql_dbg(ql_dbg_disc, vha, 0xf087,
 			"%s: sess %p logout completed\n",
 			__func__, sess);
 	}
 
-	spin_lock_irqsave(&ha->hardware_lock, flags);
+	spin_lock_irqsave(&ha->tgt.sess_lock, flags);
+	if (sess->se_sess) {
+		sess->se_sess = NULL;
+		if (tgt && !IS_SW_RESV_ADDR(sess->d_id))
+			tgt->sess_count--;
+	}
+
+	sess->disc_state = DSC_DELETED;
+	sess->fw_login_state = DSC_LS_PORT_UNAVAIL;
+	sess->deleted = QLA_SESS_DELETED;
+	sess->login_retry = vha->hw->login_retry_count;
+
+	if (sess->login_succ && !IS_SW_RESV_ADDR(sess->d_id)) {
+		vha->fcport_count--;
+		sess->login_succ = 0;
+	}
+
+	if (sess->chip_reset != sess->vha->hw->chip_reset)
+		qla2x00_clear_loop_id(sess);
+
+	if (sess->conflict) {
+		sess->conflict->login_pause = 0;
+		sess->conflict = NULL;
+		if (!test_bit(UNLOADING, &vha->dpc_flags))
+			set_bit(RELOGIN_NEEDED, &vha->dpc_flags);
+	}
+
 	{
 		struct qlt_plogi_ack_t *own =
 		    sess->plogi_link[QLT_PLOGI_LINK_SAME_WWN];
@@ -591,6 +880,7 @@ static void qlt_free_session_done(struct work_struct *work)
 				 own ? own->ref_count : -1,
 				 iocb->u.isp24.port_name, con->ref_count);
 			qlt_plogi_ack_unref(vha, con);
+			sess->plogi_link[QLT_PLOGI_LINK_CONFLICT] = NULL;
 		} else {
 			ql_dbg(ql_dbg_tgt_mgt, vha, 0xf09a,
 			    "se_sess %p / sess %p port %8phC is gone, %s (ref=%d)\n",
@@ -600,25 +890,30 @@ static void qlt_free_session_done(struct work_struct *work)
 			    own ? own->ref_count : -1);
 		}
 
-		if (own)
+		if (own) {
+			sess->fw_login_state = DSC_LS_PLOGI_PEND;
 			qlt_plogi_ack_unref(vha, own);
+			sess->plogi_link[QLT_PLOGI_LINK_SAME_WWN] = NULL;
+		}
 	}
-	spin_unlock_irqrestore(&ha->hardware_lock, flags);
-
-	spin_lock_irqsave(&ha->tgt.sess_lock, flags);
-	sess->se_sess = NULL;
 	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
 
 	ql_dbg(ql_dbg_tgt_mgt, vha, 0xf001,
-	    "Unregistration of sess %p finished\n", sess);
+	    "Unregistration of sess %p %8phC finished fcp_cnt %d\n",
+		sess, sess->port_name, vha->fcport_count);
 
-	/*
-	 * We need to protect against race, when tgt is freed before or
-	 * inside wake_up()
-	 */
-	tgt->sess_count--;
-	if (tgt->sess_count == 0)
+	if (tgt && (tgt->sess_count == 0))
 		wake_up_all(&tgt->waitQ);
+
+	if (vha->fcport_count == 0)
+		wake_up_all(&vha->fcport_waitQ);
+
+	if (!tgt || !tgt->tgt_stop) {
+		memset(&ea, 0, sizeof(ea));
+		ea.event = FCME_DELETE_DONE;
+		ea.fcport = sess;
+		qla2x00_fcport_event_handler(vha, &ea);
+	}
 }
 
 /* ha->tgt.sess_lock supposed to be held on entry */
@@ -636,6 +931,9 @@ void qlt_unreg_sess(struct fc_port *sess)
 	qla2x00_mark_device_lost(vha, sess, 1, 1);
 
 	sess->deleted = QLA_SESS_DELETION_IN_PROGRESS;
+	sess->disc_state = DSC_DELETE_PEND;
+	sess->last_rscn_gen = sess->rscn_gen;
+	sess->last_login_gen = sess->login_gen;
 
 	INIT_WORK(&sess->free_work, qlt_free_session_done);
 	schedule_work(&sess->free_work);
@@ -679,48 +977,57 @@ static int qlt_reset(struct scsi_qla_host *vha, void *iocb, int mcmd)
 	return qlt_issue_task_mgmt(sess, 0, mcmd, iocb, QLA24XX_MGMT_SEND_NACK);
 }
 
+static void qla24xx_chk_fcp_state(struct fc_port *sess)
+{
+	if (sess->chip_reset != sess->vha->hw->chip_reset) {
+		sess->logout_on_delete = 0;
+		sess->logo_ack_needed = 0;
+		sess->fw_login_state = DSC_LS_PORT_UNAVAIL;
+		sess->scan_state = 0;
+	}
+}
+
 /* ha->tgt.sess_lock supposed to be held on entry */
-static void qlt_schedule_sess_for_deletion(struct fc_port *sess,
+void qlt_schedule_sess_for_deletion(struct fc_port *sess,
 	bool immediate)
 {
 	struct qla_tgt *tgt = sess->tgt;
-	uint32_t dev_loss_tmo = tgt->ha->port_down_retry_count + 5;
 
-	if (sess->deleted) {
-		/* Upgrade to unconditional deletion in case it was temporary */
-		if (immediate && sess->deleted == QLA_SESS_DELETION_PENDING)
-			list_del(&sess->del_list_entry);
-		else
+	if (sess->disc_state == DSC_DELETE_PEND)
+		return;
+
+	if (sess->disc_state == DSC_DELETED) {
+		if (tgt && tgt->tgt_stop && (tgt->sess_count == 0))
+			wake_up_all(&tgt->waitQ);
+		if (sess->vha->fcport_count == 0)
+			wake_up_all(&sess->vha->fcport_waitQ);
+
+		if (!sess->plogi_link[QLT_PLOGI_LINK_SAME_WWN] &&
+			!sess->plogi_link[QLT_PLOGI_LINK_CONFLICT])
 			return;
 	}
 
-	ql_dbg(ql_dbg_tgt, sess->vha, 0xe001,
-	    "Scheduling sess %p for deletion\n", sess);
+	sess->disc_state = DSC_DELETE_PEND;
 
-	if (immediate) {
-		dev_loss_tmo = 0;
-		sess->deleted = QLA_SESS_DELETION_IN_PROGRESS;
-		list_add(&sess->del_list_entry, &tgt->del_sess_list);
-	} else {
-		sess->deleted = QLA_SESS_DELETION_PENDING;
-		list_add_tail(&sess->del_list_entry, &tgt->del_sess_list);
-	}
+	if (sess->deleted == QLA_SESS_DELETED)
+		sess->logout_on_delete = 0;
+
+	sess->deleted = QLA_SESS_DELETION_IN_PROGRESS;
+	qla24xx_chk_fcp_state(sess);
 
-	sess->expires = jiffies + dev_loss_tmo * HZ;
+	ql_dbg(ql_dbg_tgt, sess->vha, 0xe001,
+	    "Scheduling sess %p for deletion\n", sess);
 
-	ql_dbg(ql_dbg_tgt, sess->vha, 0xe048,
-	    "qla_target(%d): session for port %8phC (loop ID %d s_id %02x:%02x:%02x)"
-	    " scheduled for deletion in %u secs (expires: %lu) immed: %d, logout: %d, gen: %#x\n",
-	    sess->vha->vp_idx, sess->port_name, sess->loop_id,
-	    sess->d_id.b.domain, sess->d_id.b.area, sess->d_id.b.al_pa,
-	    dev_loss_tmo, sess->expires, immediate, sess->logout_on_delete,
-	    sess->generation);
+	schedule_work(&sess->del_work);
+}
 
-	if (immediate)
-		mod_delayed_work(system_wq, &tgt->sess_del_work, 0);
-	else
-		schedule_delayed_work(&tgt->sess_del_work,
-		    sess->expires - jiffies);
+void qlt_schedule_sess_for_deletion_lock(struct fc_port *sess)
+{
+	unsigned long flags;
+	struct qla_hw_data *ha = sess->vha->hw;
+	spin_lock_irqsave(&ha->tgt.sess_lock, flags);
+	qlt_schedule_sess_for_deletion(sess, 1);
+	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
 }
 
 /* ha->tgt.sess_lock supposed to be held on entry */
@@ -731,7 +1038,7 @@ static void qlt_clear_tgt_db(struct qla_tgt *tgt)
 
 	list_for_each_entry(sess, &vha->vp_fcports, list) {
 		if (sess->se_sess)
-			qlt_schedule_sess_for_deletion(sess, true);
+			qlt_schedule_sess_for_deletion(sess, 1);
 	}
 
 	/* At this point tgt could be already dead */
@@ -786,49 +1093,6 @@ static int qla24xx_get_loop_id(struct scsi_qla_host *vha, const uint8_t *s_id,
 	return res;
 }
 
-/* ha->tgt.sess_lock supposed to be held on entry */
-static void qlt_undelete_sess(struct fc_port *sess)
-{
-	BUG_ON(sess->deleted != QLA_SESS_DELETION_PENDING);
-
-	list_del_init(&sess->del_list_entry);
-	sess->deleted = 0;
-}
-
-static void qlt_del_sess_work_fn(struct delayed_work *work)
-{
-	struct qla_tgt *tgt = container_of(work, struct qla_tgt,
-	    sess_del_work);
-	struct scsi_qla_host *vha = tgt->vha;
-	struct qla_hw_data *ha = vha->hw;
-	struct fc_port *sess;
-	unsigned long flags, elapsed;
-
-	spin_lock_irqsave(&ha->tgt.sess_lock, flags);
-	while (!list_empty(&tgt->del_sess_list)) {
-		sess = list_entry(tgt->del_sess_list.next, typeof(*sess),
-		    del_list_entry);
-		elapsed = jiffies;
-		if (time_after_eq(elapsed, sess->expires)) {
-			/* No turning back */
-			list_del_init(&sess->del_list_entry);
-			sess->deleted = QLA_SESS_DELETION_IN_PROGRESS;
-
-			ql_dbg(ql_dbg_tgt_mgt, vha, 0xf004,
-			    "Timeout: sess %p about to be deleted\n",
-			    sess);
-			if (sess->se_sess)
-				ha->tgt.tgt_ops->shutdown_sess(sess);
-			ha->tgt.tgt_ops->put_sess(sess);
-		} else {
-			schedule_delayed_work(&tgt->sess_del_work,
-			    sess->expires - elapsed);
-			break;
-		}
-	}
-	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
-}
-
 /*
  * Adds an extra ref to allow to drop hw lock after adding sess to the list.
  * Caller must put it.
@@ -839,93 +1103,65 @@ static struct fc_port *qlt_create_sess(
 	bool local)
 {
 	struct qla_hw_data *ha = vha->hw;
-	struct fc_port *sess;
+	struct fc_port *sess = fcport;
 	unsigned long flags;
 
-	/* Check to avoid double sessions */
-	spin_lock_irqsave(&ha->tgt.sess_lock, flags);
-	list_for_each_entry(sess, &vha->vp_fcports, list) {
-		if (!memcmp(sess->port_name, fcport->port_name, WWN_SIZE)) {
-			ql_dbg(ql_dbg_tgt_mgt, vha, 0xf005,
-			    "Double sess %p found (s_id %x:%x:%x, "
-			    "loop_id %d), updating to d_id %x:%x:%x, "
-			    "loop_id %d", sess, sess->d_id.b.domain,
-			    sess->d_id.b.al_pa, sess->d_id.b.area,
-			    sess->loop_id, fcport->d_id.b.domain,
-			    fcport->d_id.b.al_pa, fcport->d_id.b.area,
-			    fcport->loop_id);
-
-			/* Cannot undelete at this point */
-			if (sess->deleted == QLA_SESS_DELETION_IN_PROGRESS) {
-				spin_unlock_irqrestore(&ha->tgt.sess_lock,
-				    flags);
-				return NULL;
-			}
-
-			if (sess->deleted)
-				qlt_undelete_sess(sess);
-
-			if (!sess->se_sess) {
-				if (ha->tgt.tgt_ops->check_initiator_node_acl(vha,
-				    &sess->port_name[0], sess) < 0) {
-					spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
-					return NULL;
-				}
-			}
-
-			kref_get(&sess->sess_kref);
-			ha->tgt.tgt_ops->update_sess(sess, fcport->d_id, fcport->loop_id,
-						(fcport->flags & FCF_CONF_COMP_SUPPORTED));
-
-			if (sess->local && !local)
-				sess->local = 0;
-
-			qlt_do_generation_tick(vha, &sess->generation);
-
-			spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
+	if (vha->vha_tgt.qla_tgt->tgt_stop)
+		return NULL;
 
-			return sess;
+	if (fcport->se_sess) {
+		if (!kref_get_unless_zero(&sess->sess_kref)) {
+			ql_dbg(ql_dbg_disc, vha, 0xffff,
+			    "%s: kref_get_unless_zero failed for %8phC\n",
+			    __func__, sess->port_name);
+			return NULL;
 		}
-	}
-	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
-
-	sess = kzalloc(sizeof(*sess), GFP_KERNEL);
-	if (!sess) {
-		ql_dbg(ql_dbg_tgt_mgt, vha, 0xf04a,
-		    "qla_target(%u): session allocation failed, all commands "
-		    "from port %8phC will be refused", vha->vp_idx,
-		    fcport->port_name);
-
-		return NULL;
+		return fcport;
 	}
 	sess->tgt = vha->vha_tgt.qla_tgt;
-	sess->vha = vha;
-	sess->d_id = fcport->d_id;
-	sess->loop_id = fcport->loop_id;
 	sess->local = local;
-	kref_init(&sess->sess_kref);
-	INIT_LIST_HEAD(&sess->del_list_entry);
 
-	/* Under normal circumstances we want to logout from firmware when
+	/*
+	 * Under normal circumstances we want to logout from firmware when
 	 * session eventually ends and release corresponding nport handle.
 	 * In the exception cases (e.g. when new PLOGI is waiting) corresponding
-	 * code will adjust these flags as necessary. */
+	 * code will adjust these flags as necessary.
+	 */
 	sess->logout_on_delete = 1;
 	sess->keep_nport_handle = 0;
+	sess->logout_completed = 0;
 
-	ql_dbg(ql_dbg_tgt_mgt, vha, 0xf006,
-	    "Adding sess %p to tgt %p via ->check_initiator_node_acl()\n",
-	    sess, vha->vha_tgt.qla_tgt);
+	if (ha->tgt.tgt_ops->check_initiator_node_acl(vha,
+	    &fcport->port_name[0], sess) < 0) {
+		ql_dbg(ql_dbg_tgt_mgt, vha, 0xffff,
+		    "(%d) %8phC check_initiator_node_acl failed\n",
+		    vha->vp_idx, fcport->port_name);
+		return NULL;
+	} else {
+		kref_init(&fcport->sess_kref);
+		/*
+		 * Take an extra reference to ->sess_kref here to handle
+		 * fc_port access across ->tgt.sess_lock reaquire.
+		 */
+		if (!kref_get_unless_zero(&sess->sess_kref)) {
+			ql_dbg(ql_dbg_disc, vha, 0xffff,
+			    "%s: kref_get_unless_zero failed for %8phC\n",
+			    __func__, sess->port_name);
+			return NULL;
+		}
 
-	sess->conf_compl_supported = (fcport->flags & FCF_CONF_COMP_SUPPORTED);
-	BUILD_BUG_ON(sizeof(sess->port_name) != sizeof(fcport->port_name));
-	memcpy(sess->port_name, fcport->port_name, sizeof(sess->port_name));
+		spin_lock_irqsave(&ha->tgt.sess_lock, flags);
+		if (!IS_SW_RESV_ADDR(sess->d_id))
+			vha->vha_tgt.qla_tgt->sess_count++;
 
-	spin_lock_irqsave(&ha->tgt.sess_lock, flags);
-	list_add_tail(&sess->list, &vha->vp_fcports);
-	vha->vha_tgt.qla_tgt->sess_count++;
-	qlt_do_generation_tick(vha, &sess->generation);
-	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
+		qlt_do_generation_tick(vha, &sess->generation);
+		spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
+	}
+
+	ql_dbg(ql_dbg_tgt_mgt, vha, 0xf006,
+	    "Adding sess %p se_sess %p  to tgt %p sess_count %d\n",
+	    sess, sess->se_sess, vha->vha_tgt.qla_tgt,
+	    vha->vha_tgt.qla_tgt->sess_count);
 
 	ql_dbg(ql_dbg_tgt_mgt, vha, 0xf04b,
 	    "qla_target(%d): %ssession for wwn %8phC (loop_id %d, "
@@ -934,23 +1170,6 @@ static struct fc_port *qlt_create_sess(
 	    fcport->loop_id, sess->d_id.b.domain, sess->d_id.b.area,
 	    sess->d_id.b.al_pa, sess->conf_compl_supported ?  "" : "not ");
 
-	/*
-	 * Determine if this fc_port->port_name is allowed to access
-	 * target mode using explict NodeACLs+MappedLUNs, or using
-	 * TPG demo mode.  If this is successful a target mode FC nexus
-	 * is created.
-	 */
-	if (ha->tgt.tgt_ops->check_initiator_node_acl(vha,
-	    &fcport->port_name[0], sess) < 0) {
-		return NULL;
-	} else {
-		/*
-		 * Take an extra reference to ->sess_kref here to handle fc_port
-		 * access across ->tgt.sess_lock reaquire.
-		 */
-		kref_get(&sess->sess_kref);
-	}
-
 	return sess;
 }
 
@@ -1007,12 +1226,12 @@ static inline int test_tgt_sess_count(struct qla_tgt *tgt)
 	 * We need to protect against race, when tgt is freed before or
 	 * inside wake_up()
 	 */
-	spin_lock_irqsave(&ha->hardware_lock, flags);
+	spin_lock_irqsave(&ha->tgt.sess_lock, flags);
 	ql_dbg(ql_dbg_tgt, tgt->vha, 0xe002,
 	    "tgt %p, sess_count=%d\n",
 	    tgt, tgt->sess_count);
 	res = (tgt->sess_count == 0);
-	spin_unlock_irqrestore(&ha->hardware_lock, flags);
+	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
 
 	return res;
 }
@@ -1060,8 +1279,6 @@ int qlt_stop_phase1(struct qla_tgt *tgt)
 	mutex_unlock(&vha->vha_tgt.tgt_mutex);
 	mutex_unlock(&qla_tgt_mutex);
 
-	flush_delayed_work(&tgt->sess_del_work);
-
 	ql_dbg(ql_dbg_tgt_mgt, vha, 0xf009,
 	    "Waiting for sess works (tgt %p)", tgt);
 	spin_lock_irqsave(&tgt->sess_work_lock, flags);
@@ -1205,6 +1422,7 @@ static void qlt_send_notify_ack(struct scsi_qla_host *vha,
 	nack = (struct nack_to_isp *)pkt;
 	nack->ox_id = ntfy->ox_id;
 
+	nack->u.isp24.handle = QLA_TGT_SKIP_HANDLE;
 	nack->u.isp24.nport_handle = ntfy->u.isp24.nport_handle;
 	if (le16_to_cpu(ntfy->u.isp24.status) == IMM_NTFY_ELS) {
 		nack->u.isp24.flags = ntfy->u.isp24.flags &
@@ -1552,7 +1770,7 @@ static void qlt_24xx_handle_abts(struct scsi_qla_host *vha,
 	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
 
 
-	if (sess->deleted == QLA_SESS_DELETION_IN_PROGRESS) {
+	if (sess->deleted) {
 		qlt_24xx_send_abts_resp(vha, abts, FCP_TMF_REJECTED, false);
 		return;
 	}
@@ -1650,10 +1868,19 @@ void qlt_xmit_tm_rsp(struct qla_tgt_mgmt_cmd *mcmd)
 		return;
 	}
 
-	if (mcmd->flags == QLA24XX_MGMT_SEND_NACK)
-		qlt_send_notify_ack(vha, &mcmd->orig_iocb.imm_ntfy,
-		    0, 0, 0, 0, 0, 0);
-	else {
+	if (mcmd->flags == QLA24XX_MGMT_SEND_NACK) {
+		if (mcmd->orig_iocb.imm_ntfy.u.isp24.status_subcode ==
+		    ELS_LOGO) {
+			ql_dbg(ql_dbg_disc, vha, 0xffff,
+			    "TM response logo %phC status %#x state %#x",
+			    mcmd->sess->port_name, mcmd->fc_tm_rsp,
+			    mcmd->flags);
+			qlt_schedule_sess_for_deletion_lock(mcmd->sess);
+		} else {
+			qlt_send_notify_ack(vha, &mcmd->orig_iocb.imm_ntfy,
+				0, 0, 0, 0, 0, 0);
+		}
+	} else {
 		if (mcmd->orig_iocb.atio.u.raw.entry_type == ABTS_RECV_24XX)
 			qlt_24xx_send_abts_resp(vha, &mcmd->orig_iocb.abts,
 			    mcmd->fc_tm_rsp, false);
@@ -2470,7 +2697,7 @@ int qlt_xmit_response(struct qla_tgt_cmd *cmd, int xmit_type,
 	int res;
 
 	spin_lock_irqsave(&ha->hardware_lock, flags);
-	if (cmd->sess && cmd->sess->deleted == QLA_SESS_DELETION_IN_PROGRESS) {
+	if (cmd->sess && cmd->sess->deleted) {
 		cmd->state = QLA_TGT_STATE_PROCESSED;
 		if (cmd->sess->logout_completed)
 			/* no need to terminate. FW already freed exchange. */
@@ -2645,7 +2872,7 @@ int qlt_rdy_to_xfer(struct qla_tgt_cmd *cmd)
 	spin_lock_irqsave(&ha->hardware_lock, flags);
 
 	if (!vha->flags.online || (cmd->reset_count != ha->chip_reset) ||
-	    (cmd->sess && cmd->sess->deleted == QLA_SESS_DELETION_IN_PROGRESS)) {
+	    (cmd->sess && cmd->sess->deleted)) {
 		/*
 		 * Either the port is not online or this request was from
 		 * previous life, just abort the processing.
@@ -3345,7 +3572,11 @@ static void qlt_do_ctio_completion(struct scsi_qla_host *vha, uint32_t handle,
 				 */
 				cmd->sess->logout_on_delete = 0;
 				cmd->sess->send_els_logo = 1;
-				qlt_schedule_sess_for_deletion(cmd->sess, true);
+				ql_dbg(ql_dbg_disc, vha, 0xffff,
+				    "%s %d %8phC post del sess\n",
+				    __func__, __LINE__, cmd->sess->port_name);
+
+				qlt_schedule_sess_for_deletion_lock(cmd->sess);
 			}
 			break;
 		}
@@ -3649,6 +3880,7 @@ static void qlt_create_sess_from_atio(struct work_struct *work)
 		kfree(op);
 		return;
 	}
+
 	/*
 	 * __qlt_do_work() will call qlt_put_sess() to release
 	 * the extra reference taken above by qlt_make_local_sess()
@@ -3656,13 +3888,11 @@ static void qlt_create_sess_from_atio(struct work_struct *work)
 	__qlt_do_work(cmd);
 	kfree(op);
 	return;
-
 out_term:
 	spin_lock_irqsave(&ha->hardware_lock, flags);
 	qlt_send_term_exchange(vha, NULL, &op->atio, 1, 0);
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
 	kfree(op);
-
 }
 
 /* ha->hardware_lock supposed to be held on entry */
@@ -3702,7 +3932,7 @@ static int qlt_handle_cmd_for_atio(struct scsi_qla_host *vha,
 
 	/* Another WWN used to have our s_id. Our PLOGI scheduled its
 	 * session deletion, but it's still in sess_del_work wq */
-	if (sess->deleted == QLA_SESS_DELETION_IN_PROGRESS) {
+	if (sess->deleted) {
 		ql_dbg(ql_dbg_io, vha, 0x3061,
 		    "New command while old session %p is being deleted\n",
 		    sess);
@@ -3712,7 +3942,13 @@ static int qlt_handle_cmd_for_atio(struct scsi_qla_host *vha,
 	/*
 	 * Do kref_get() before returning + dropping qla_hw_data->hardware_lock.
 	 */
-	kref_get(&sess->sess_kref);
+	if (!kref_get_unless_zero(&sess->sess_kref)) {
+		ql_dbg(ql_dbg_tgt, vha, 0xffff,
+		    "%s: kref_get fail, %8phC oxid %x \n",
+		    __func__, sess->port_name,
+		     be16_to_cpu(atio->u.isp24.fcp_hdr.ox_id));
+		return -EFAULT;
+	}
 
 	cmd = qlt_get_tag(vha, sess, atio);
 	if (!cmd) {
@@ -3729,9 +3965,9 @@ static int qlt_handle_cmd_for_atio(struct scsi_qla_host *vha,
 	cmd->se_cmd.cpuid = ha->msix_count ?
 		ha->tgt.rspq_vector_cpuid : WORK_CPU_UNBOUND;
 
-	spin_lock(&vha->cmd_list_lock);
+	spin_lock_irqsave(&vha->cmd_list_lock, flags);
 	list_add_tail(&cmd->cmd_list, &vha->qla_cmd_list);
-	spin_unlock(&vha->cmd_list_lock);
+	spin_unlock_irqrestore(&vha->cmd_list_lock, flags);
 
 	INIT_WORK(&cmd->work, qlt_do_work);
 	if (ha->msix_count) {
@@ -3826,7 +4062,7 @@ static int qlt_handle_task_mgmt(struct scsi_qla_host *vha, void *iocb)
 		    sizeof(struct atio_from_isp));
 	}
 
-	if (sess->deleted == QLA_SESS_DELETION_IN_PROGRESS)
+	if (sess->deleted)
 		return -EFAULT;
 
 	return qlt_issue_task_mgmt(sess, unpacked_lun, fn, iocb, 0);
@@ -3901,22 +4137,20 @@ static int qlt_abort_task(struct scsi_qla_host *vha,
 
 void qlt_logo_completion_handler(fc_port_t *fcport, int rc)
 {
-	if (fcport->tgt_session) {
-		if (rc != MBS_COMMAND_COMPLETE) {
-			ql_dbg(ql_dbg_tgt_mgt, fcport->vha, 0xf093,
-				"%s: se_sess %p / sess %p from"
-				" port %8phC loop_id %#04x s_id %02x:%02x:%02x"
-				" LOGO failed: %#x\n",
-				__func__,
-				fcport->se_sess,
-				fcport->tgt_session,
-				fcport->port_name, fcport->loop_id,
-				fcport->d_id.b.domain, fcport->d_id.b.area,
-				fcport->d_id.b.al_pa, rc);
-		}
-
-		fcport->logout_completed = 1;
+	if (rc != MBS_COMMAND_COMPLETE) {
+		ql_dbg(ql_dbg_tgt_mgt, fcport->vha, 0xf093,
+			"%s: se_sess %p / sess %p from"
+			" port %8phC loop_id %#04x s_id %02x:%02x:%02x"
+			" LOGO failed: %#x\n",
+			__func__,
+			fcport->se_sess,
+			fcport,
+			fcport->port_name, fcport->loop_id,
+			fcport->d_id.b.domain, fcport->d_id.b.area,
+			fcport->d_id.b.al_pa, rc);
 	}
+
+	fcport->logout_completed = 1;
 }
 
 /*
@@ -3926,13 +4160,12 @@ void qlt_logo_completion_handler(fc_port_t *fcport, int rc)
 * deletion. Returns existing session with matching wwn if present.
 * Null otherwise.
 */
-static struct fc_port *
-qlt_find_sess_invalidate_other(struct qla_tgt *tgt, uint64_t wwn,
+struct fc_port *
+qlt_find_sess_invalidate_other(scsi_qla_host_t *vha, uint64_t wwn,
     port_id_t port_id, uint16_t loop_id, struct fc_port **conflict_sess)
 {
 	struct fc_port *sess = NULL, *other_sess;
 	uint64_t other_wwn;
-	scsi_qla_host_t *vha = tgt->vha;
 
 	*conflict_sess = NULL;
 
@@ -3949,7 +4182,7 @@ qlt_find_sess_invalidate_other(struct qla_tgt *tgt, uint64_t wwn,
 		/* find other sess with nport_id collision */
 		if (port_id.b24 == other_sess->d_id.b24) {
 			if (loop_id != other_sess->loop_id) {
-				ql_dbg(ql_dbg_tgt_tmr, tgt->vha, 0x1000c,
+				ql_dbg(ql_dbg_tgt_tmr, vha, 0x1000c,
 				    "Invalidating sess %p loop_id %d wwn %llx.\n",
 				    other_sess, other_sess->loop_id, other_wwn);
 
@@ -3965,6 +4198,11 @@ qlt_find_sess_invalidate_other(struct qla_tgt *tgt, uint64_t wwn,
 				 * Another wwn used to have our s_id/loop_id
 				 * kill the session, but don't free the loop_id
 				 */
+				ql_dbg(ql_dbg_tgt_tmr, vha, 0xffff,
+				    "Invalidating sess %p loop_id %d wwn %llx.\n",
+				    other_sess, other_sess->loop_id, other_wwn);
+
+
 				other_sess->keep_nport_handle = 1;
 				*conflict_sess = other_sess;
 				qlt_schedule_sess_for_deletion(other_sess,
@@ -3974,8 +4212,9 @@ qlt_find_sess_invalidate_other(struct qla_tgt *tgt, uint64_t wwn,
 		}
 
 		/* find other sess with nport handle collision */
-		if (loop_id == other_sess->loop_id) {
-			ql_dbg(ql_dbg_tgt_tmr, tgt->vha, 0x1000d,
+		if ((loop_id == other_sess->loop_id) &&
+			(loop_id != FC_NO_LOOP_ID)) {
+			ql_dbg(ql_dbg_tgt_tmr, vha, 0x1000d,
 			       "Invalidating sess %p loop_id %d wwn %llx.\n",
 			       other_sess, other_sess->loop_id, other_wwn);
 
@@ -4046,9 +4285,12 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha,
 
 	loop_id = le16_to_cpu(iocb->u.isp24.nport_handle);
 
-	ql_dbg(ql_dbg_tgt_mgt, vha, 0xf026,
-	    "qla_target(%d): Port ID: 0x%3phC ELS opcode: 0x%02x\n",
-	    vha->vp_idx, iocb->u.isp24.port_id, iocb->u.isp24.status_subcode);
+	ql_dbg(ql_dbg_disc, vha, 0xf026,
+	    "qla_target(%d): Port ID: %02x:%02x:%02x ELS opcode: 0x%02x lid %d %8phC\n",
+	    vha->vp_idx, iocb->u.isp24.port_id[2],
+		iocb->u.isp24.port_id[1], iocb->u.isp24.port_id[0],
+		   iocb->u.isp24.status_subcode, loop_id,
+		iocb->u.isp24.port_name);
 
 	/* res = 1 means ack at the end of thread
 	 * res = 0 means ack async/later.
@@ -4061,12 +4303,12 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha,
 
 		if (wwn) {
 			spin_lock_irqsave(&tgt->ha->tgt.sess_lock, flags);
-			sess = qlt_find_sess_invalidate_other(tgt, wwn,
-			    port_id, loop_id, &conflict_sess);
+			sess = qlt_find_sess_invalidate_other(vha, wwn,
+				port_id, loop_id, &conflict_sess);
 			spin_unlock_irqrestore(&tgt->ha->tgt.sess_lock, flags);
 		}
 
-		if (IS_SW_RESV_ADDR(port_id) || (!sess && !conflict_sess)) {
+		if (IS_SW_RESV_ADDR(port_id)) {
 			res = 1;
 			break;
 		}
@@ -4074,42 +4316,66 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha,
 		pla = qlt_plogi_ack_find_add(vha, &port_id, iocb);
 		if (!pla) {
 			qlt_send_term_imm_notif(vha, iocb, 1);
-
-			res = 0;
 			break;
 		}
 
 		res = 0;
 
-		if (conflict_sess)
+		if (conflict_sess) {
+			conflict_sess->login_gen++;
 			qlt_plogi_ack_link(vha, pla, conflict_sess,
-			    QLT_PLOGI_LINK_CONFLICT);
+				QLT_PLOGI_LINK_CONFLICT);
+		}
 
-		if (!sess)
+		if (!sess) {
+			pla->ref_count++;
+			qla24xx_post_newsess_work(vha, &port_id,
+				iocb->u.isp24.port_name, pla);
+			res = 0;
 			break;
+		}
 
 		qlt_plogi_ack_link(vha, pla, sess, QLT_PLOGI_LINK_SAME_WWN);
-		 /*
-		  * Under normal circumstances we want to release nport handle
-		  * during LOGO process to avoid nport handle leaks inside FW.
-		  * The exception is when LOGO is done while another PLOGI with
-		  * the same nport handle is waiting as might be the case here.
-		  * Note: there is always a possibily of a race where session
-		  * deletion has already started for other reasons (e.g. ACL
-		  * removal) and now PLOGI arrives:
-		  * 1. if PLOGI arrived in FW after nport handle has been freed,
-		  *    FW must have assigned this PLOGI a new/same handle and we
-		  *    can proceed ACK'ing it as usual when session deletion
-		  *    completes.
-		  * 2. if PLOGI arrived in FW before LOGO with LCF_FREE_NPORT
-		  *    bit reached it, the handle has now been released. We'll
-		  *    get an error when we ACK this PLOGI. Nothing will be sent
-		  *    back to initiator. Initiator should eventually retry
-		  *    PLOGI and situation will correct itself.
-		  */
-		sess->keep_nport_handle = ((sess->loop_id == loop_id) &&
-					   (sess->d_id.b24 == port_id.b24));
-		qlt_schedule_sess_for_deletion(sess, true);
+		sess->fw_login_state = DSC_LS_PLOGI_PEND;
+		sess->d_id = port_id;
+		sess->login_gen++;
+
+		switch (sess->disc_state) {
+		case DSC_DELETED:
+			qlt_plogi_ack_unref(vha, pla);
+			break;
+
+		default:
+			/*
+			 * Under normal circumstances we want to release nport handle
+			 * during LOGO process to avoid nport handle leaks inside FW.
+			 * The exception is when LOGO is done while another PLOGI with
+			 * the same nport handle is waiting as might be the case here.
+			 * Note: there is always a possibily of a race where session
+			 * deletion has already started for other reasons (e.g. ACL
+			 * removal) and now PLOGI arrives:
+			 * 1. if PLOGI arrived in FW after nport handle has been freed,
+			 *    FW must have assigned this PLOGI a new/same handle and we
+			 *    can proceed ACK'ing it as usual when session deletion
+			 *    completes.
+			 * 2. if PLOGI arrived in FW before LOGO with LCF_FREE_NPORT
+			 *    bit reached it, the handle has now been released. We'll
+			 *    get an error when we ACK this PLOGI. Nothing will be sent
+			 *    back to initiator. Initiator should eventually retry
+			 *    PLOGI and situation will correct itself.
+			 */
+			sess->keep_nport_handle = ((sess->loop_id == loop_id) &&
+			   (sess->d_id.b24 == port_id.b24));
+
+			ql_dbg(ql_dbg_disc, vha, 0xffff,
+				   "%s %d %8phC post del sess\n",
+				   __func__, __LINE__, sess->port_name);
+
+
+			qlt_schedule_sess_for_deletion_lock(sess);
+			break;
+		}
+
 		break;
 
 	case ELS_PRLI:
@@ -4117,8 +4383,8 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha,
 
 		if (wwn) {
 			spin_lock_irqsave(&tgt->ha->tgt.sess_lock, flags);
-			sess = qlt_find_sess_invalidate_other(tgt, wwn, port_id,
-			    loop_id, &conflict_sess);
+			sess = qlt_find_sess_invalidate_other(vha, wwn, port_id,
+				loop_id, &conflict_sess);
 			spin_unlock_irqrestore(&tgt->ha->tgt.sess_lock, flags);
 		}
 
@@ -4132,7 +4398,7 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha,
 		}
 
 		if (sess != NULL) {
-			if (sess->deleted) {
+			if (sess->fw_login_state == DSC_LS_PLOGI_PEND) {
 				/*
 				 * Impatient initiator sent PRLI before last
 				 * PLOGI could finish. Will force him to re-try,
@@ -4157,10 +4423,15 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha,
 			sess->local = 0;
 			sess->loop_id = loop_id;
 			sess->d_id = port_id;
+			sess->fw_login_state = DSC_LS_PRLI_PEND;
 
 			if (wd3_lo & BIT_7)
 				sess->conf_compl_supported = 1;
 
+			if ((wd3_lo & BIT_4) == 0)
+				sess->port_type = FCT_INITIATOR;
+			else
+				sess->port_type = FCT_TARGET;
 		}
 		res = 1; /* send notify ack */
 
@@ -4170,15 +4441,50 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha,
 			set_bit(LOCAL_LOOP_UPDATE, &vha->dpc_flags);
 			qla2xxx_wake_dpc(vha);
 		} else {
-			/* todo: else - create sess here. */
-			res = 1; /* send notify ack */
-		}
+			if (sess) {
+				ql_dbg(ql_dbg_disc, vha, 0xffff,
+					   "%s %d %8phC post nack\n",
+					   __func__, __LINE__, sess->port_name);
 
+				qla24xx_post_nack_work(vha, sess, iocb,
+					SRB_NACK_PRLI);
+				res = 0;
+			}
+		}
 		break;
 
 	case ELS_LOGO:
 	case ELS_PRLO:
+		spin_lock_irqsave(&ha->tgt.sess_lock, flags);
+		sess = qla2x00_find_fcport_by_loopid(vha, loop_id);
+		spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
+
+		if (sess) {
+			sess->login_gen++;
+			sess->fw_login_state = DSC_LS_LOGO_PEND;
+			sess->logout_on_delete = 0;
+			sess->logo_ack_needed = 1;
+			memcpy(sess->iocb, iocb, IOCB_SIZE);
+		}
+
 		res = qlt_reset(vha, iocb, QLA_TGT_NEXUS_LOSS_SESS);
+
+		ql_dbg(ql_dbg_disc, vha, 0xffff,
+		    "%s: logo %llx res %d sess %p ",
+		    __func__, wwn, res, sess);
+		if (res == 0) {
+			/* cmd went up to ULP. look for qlt_xmit_tm_rsp()
+			   for LOGO_ACK */
+			BUG_ON(!sess);
+			res = 0;
+		} else {
+			/* cmd did not go upstair. */
+			if (sess) {
+				qlt_schedule_sess_for_deletion_lock(sess);
+				res = 0;
+			}
+			/* else logo will be ack */
+		}
 		break;
 	case ELS_PDISC:
 	case ELS_ADISC:
@@ -4189,6 +4495,16 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha,
 			    0, 0, 0, 0, 0, 0);
 			tgt->link_reinit_iocb_pending = 0;
 		}
+
+		sess = qla2x00_find_fcport_by_wwpn(vha,
+		    iocb->u.isp24.port_name, 1);
+		if (sess) {
+			ql_dbg(ql_dbg_disc, vha, 0xffff,
+				"sess %p lid %d|%d DS %d LS %d\n",
+				sess, sess->loop_id, loop_id,
+				sess->disc_state, sess->fw_login_state);
+		}
+
 		res = 1; /* send notify ack */
 		break;
 	}
@@ -4966,7 +5282,6 @@ void qlt_async_event(uint16_t code, struct scsi_qla_host *vha,
 			ql_dbg(ql_dbg_tgt_mgt, vha, 0xf03f,
 			    "Async MB 2: Port Logged Out\n");
 		break;
-
 	default:
 		break;
 	}
@@ -4977,8 +5292,10 @@ void qlt_async_event(uint16_t code, struct scsi_qla_host *vha,
 static fc_port_t *qlt_get_port_database(struct scsi_qla_host *vha,
 	uint16_t loop_id)
 {
-	fc_port_t *fcport;
+	fc_port_t *fcport, *tfcp, *del;
 	int rc;
+	unsigned long flags;
+	u8 newfcport = 0;
 
 	fcport = kzalloc(sizeof(*fcport), GFP_KERNEL);
 	if (!fcport) {
@@ -5000,6 +5317,59 @@ static fc_port_t *qlt_get_port_database(struct scsi_qla_host *vha,
 		return NULL;
 	}
 
+	del = NULL;
+	spin_lock_irqsave(&vha->hw->tgt.sess_lock, flags);
+	tfcp = qla2x00_find_fcport_by_wwpn(vha, fcport->port_name, 1);
+
+	if (tfcp) {
+		tfcp->d_id = fcport->d_id;
+		tfcp->port_type = fcport->port_type;
+		tfcp->supported_classes = fcport->supported_classes;
+		tfcp->flags |= fcport->flags;
+
+		del = fcport;
+		fcport = tfcp;
+	} else {
+		if (vha->hw->current_topology == ISP_CFG_F)
+			fcport->flags |= FCF_FABRIC_DEVICE;
+
+		list_add_tail(&fcport->list, &vha->vp_fcports);
+		if (!IS_SW_RESV_ADDR(fcport->d_id))
+		   vha->fcport_count++;
+		fcport->login_gen++;
+		fcport->disc_state = DSC_LOGIN_COMPLETE;
+		fcport->login_succ = 1;
+		newfcport = 1;
+	}
+
+	fcport->deleted = 0;
+	spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
+
+	switch (vha->host->active_mode) {
+	case MODE_INITIATOR:
+	case MODE_DUAL:
+		if (newfcport) {
+			if (!IS_IIDMA_CAPABLE(vha->hw) || !vha->hw->flags.gpsc_supported) {
+				ql_dbg(ql_dbg_disc, vha, 0xffff,
+				   "%s %d %8phC post upd_fcport fcp_cnt %d\n",
+				   __func__, __LINE__, fcport->port_name, vha->fcport_count);
+				qla24xx_post_upd_fcport_work(vha, fcport);
+			} else {
+				ql_dbg(ql_dbg_disc, vha, 0xffff,
+				   "%s %d %8phC post gpsc fcp_cnt %d\n",
+				   __func__, __LINE__, fcport->port_name, vha->fcport_count);
+				qla24xx_post_gpsc_work(vha, fcport);
+			}
+		}
+		break;
+
+	case MODE_TARGET:
+	default:
+		break;
+	}
+	if (del)
+		qla2x00_free_fcport(del);
+
 	return fcport;
 }
 
@@ -5012,6 +5382,17 @@ static struct fc_port *qlt_make_local_sess(struct scsi_qla_host *vha,
 	int rc, global_resets;
 	uint16_t loop_id = 0;
 
+	if ((s_id[0] == 0xFF) && (s_id[1] == 0xFC)) {
+		/*
+		 * This is Domain Controller, so it should be
+		 * OK to drop SCSI commands from it.
+		 */
+		ql_dbg(ql_dbg_tgt_mgt, vha, 0xf042,
+		    "Unable to find initiator with S_ID %x:%x:%x",
+		    s_id[0], s_id[1], s_id[2]);
+		return NULL;
+	}
+
 	mutex_lock(&vha->vha_tgt.tgt_mutex);
 
 retry:
@@ -5022,21 +5403,11 @@ static struct fc_port *qlt_make_local_sess(struct scsi_qla_host *vha,
 	if (rc != 0) {
 		mutex_unlock(&vha->vha_tgt.tgt_mutex);
 
-		if ((s_id[0] == 0xFF) &&
-		    (s_id[1] == 0xFC)) {
-			/*
-			 * This is Domain Controller, so it should be
-			 * OK to drop SCSI commands from it.
-			 */
-			ql_dbg(ql_dbg_tgt_mgt, vha, 0xf042,
-			    "Unable to find initiator with S_ID %x:%x:%x",
-			    s_id[0], s_id[1], s_id[2]);
-		} else
-			ql_log(ql_log_info, vha, 0xf071,
-			    "qla_target(%d): Unable to find "
-			    "initiator with S_ID %x:%x:%x",
-			    vha->vp_idx, s_id[0], s_id[1],
-			    s_id[2]);
+		ql_log(ql_log_info, vha, 0xf071,
+		    "qla_target(%d): Unable to find "
+		    "initiator with S_ID %x:%x:%x",
+		    vha->vp_idx, s_id[0], s_id[1],
+		    s_id[2]);
 
 		if (rc == -ENOENT) {
 			qlt_port_logo_t logo;
@@ -5104,12 +5475,18 @@ static void qlt_abort_work(struct qla_tgt *tgt,
 		if (!sess)
 			goto out_term2;
 	} else {
-		if (sess->deleted == QLA_SESS_DELETION_IN_PROGRESS) {
+		if (sess->deleted) {
 			sess = NULL;
 			goto out_term2;
 		}
 
-		kref_get(&sess->sess_kref);
+		if (!kref_get_unless_zero(&sess->sess_kref)) {
+			ql_dbg(ql_dbg_tgt_tmr, vha, 0xffff,
+			    "%s: kref_get fail %8phC \n",
+			     __func__, sess->port_name);
+			sess = NULL;
+			goto out_term2;
+		}
 	}
 
 	spin_lock_irqsave(&ha->hardware_lock, flags);
@@ -5133,7 +5510,8 @@ static void qlt_abort_work(struct qla_tgt *tgt,
 	qlt_24xx_send_abts_resp(vha, &prm->abts, FCP_TMF_REJECTED, false);
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
 
-	ha->tgt.tgt_ops->put_sess(sess);
+	if (sess)
+		ha->tgt.tgt_ops->put_sess(sess);
 	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags2);
 }
 
@@ -5168,12 +5546,18 @@ static void qlt_tmr_work(struct qla_tgt *tgt,
 		if (!sess)
 			goto out_term;
 	} else {
-		if (sess->deleted == QLA_SESS_DELETION_IN_PROGRESS) {
+		if (sess->deleted) {
 			sess = NULL;
 			goto out_term;
 		}
 
-		kref_get(&sess->sess_kref);
+		if (!kref_get_unless_zero(&sess->sess_kref)) {
+			ql_dbg(ql_dbg_tgt_tmr, vha, 0xffff,
+			    "%s: kref_get fail %8phC\n",
+			     __func__, sess->port_name);
+			sess = NULL;
+			goto out_term;
+		}
 	}
 
 	iocb = a;
@@ -5191,8 +5575,7 @@ static void qlt_tmr_work(struct qla_tgt *tgt,
 
 out_term:
 	qlt_send_term_exchange(vha, NULL, &prm->tm_iocb2, 1, 0);
-	if (sess)
-		ha->tgt.tgt_ops->put_sess(sess);
+	ha->tgt.tgt_ops->put_sess(sess);
 	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
 }
 
@@ -5270,8 +5653,6 @@ int qlt_add_target(struct qla_hw_data *ha, struct scsi_qla_host *base_vha)
 	tgt->vha = base_vha;
 	init_waitqueue_head(&tgt->waitQ);
 	INIT_LIST_HEAD(&tgt->del_sess_list);
-	INIT_DELAYED_WORK(&tgt->sess_del_work,
-		(void (*)(struct work_struct *))qlt_del_sess_work_fn);
 	spin_lock_init(&tgt->sess_work_lock);
 	INIT_WORK(&tgt->sess_work, qlt_sess_work_fn);
 	INIT_LIST_HEAD(&tgt->sess_works_list);
@@ -5472,6 +5853,7 @@ static void qlt_clear_mode(struct scsi_qla_host *vha)
 		break;
 	case QLA2XXX_INI_MODE_ENABLED:
 		vha->host->active_mode &= ~MODE_TARGET;
+		vha->host->active_mode |= MODE_INITIATOR;
 		break;
 	default:
 		break;
@@ -5594,13 +5976,12 @@ qlt_rff_id(struct scsi_qla_host *vha, struct ct_sns_req *ct_req)
 	 * FC-4 Feature bit 0 indicates target functionality to the name server.
 	 */
 	if (qla_tgt_mode_enabled(vha)) {
-		if (qla_ini_mode_enabled(vha))
-			ct_req->req.rff_id.fc4_feature = BIT_0 | BIT_1;
-		else
-			ct_req->req.rff_id.fc4_feature = BIT_0;
+		ct_req->req.rff_id.fc4_feature = BIT_0;
 	} else if (qla_ini_mode_enabled(vha)) {
 		ct_req->req.rff_id.fc4_feature = BIT_1;
-	}
+	} else if (qla_dual_mode_enabled(vha))
+		ct_req->req.rff_id.fc4_feature = BIT_0 | BIT_1;
+
 }
 
 /*
@@ -5732,7 +6113,7 @@ qlt_24xx_config_nvram_stage1(struct scsi_qla_host *vha, struct nvram_24xx *nv)
 		nv->firmware_options_1 |= cpu_to_le32(BIT_4);
 
 		/* Disable ini mode, if requested */
-		if (!qla_ini_mode_enabled(vha))
+		if (qla_tgt_mode_enabled(vha))
 			nv->firmware_options_1 |= cpu_to_le32(BIT_5);
 
 		/* Disable Full Login after LIP */
@@ -5834,7 +6215,7 @@ qlt_81xx_config_nvram_stage1(struct scsi_qla_host *vha, struct nvram_81xx *nv)
 		nv->firmware_options_1 |= cpu_to_le32(BIT_4);
 
 		/* Disable ini mode, if requested */
-		if (!qla_ini_mode_enabled(vha))
+		if (qla_tgt_mode_enabled(vha))
 			nv->firmware_options_1 |= cpu_to_le32(BIT_5);
 		/* Disable Full Login after LIP */
 		nv->firmware_options_1 &= cpu_to_le32(~BIT_13);
@@ -5940,8 +6321,9 @@ qlt_modify_vp_config(struct scsi_qla_host *vha,
 {
 	if (qla_tgt_mode_enabled(vha))
 		vpmod->options_idx1 &= ~BIT_5;
-	/* Disable ini mode, if requested */
-	if (!qla_ini_mode_enabled(vha))
+
+	/* Disable ini mode, if requested. */
+	if (qla_tgt_mode_enabled(vha))
 		vpmod->options_idx1 &= ~BIT_4;
 }
 
diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h
index 9c35ba15c687e7..5e56192a487724 100644
--- a/drivers/scsi/qla2xxx/qla_target.h
+++ b/drivers/scsi/qla2xxx/qla_target.h
@@ -799,7 +799,6 @@ struct qla_tgt {
 
 	/* Protected by hardware_lock */
 	struct list_head del_sess_list;
-	struct delayed_work sess_del_work;
 
 	spinlock_t sess_work_lock;
 	struct list_head sess_works_list;
@@ -823,13 +822,6 @@ struct qla_tgt_sess_op {
 	bool aborted;
 };
 
-enum qla_sess_deletion {
-	QLA_SESS_DELETION_NONE		= 0,
-	QLA_SESS_DELETION_PENDING	= 1, /* hopefully we can get rid of
-					      * this one */
-	QLA_SESS_DELETION_IN_PROGRESS	= 2,
-};
-
 enum trace_flags {
 	TRC_NEW_CMD = BIT_0,
 	TRC_DO_WORK = BIT_1,
@@ -987,12 +979,17 @@ extern int ql2x_ini_mode;
 
 static inline bool qla_tgt_mode_enabled(struct scsi_qla_host *ha)
 {
-	return ha->host->active_mode & MODE_TARGET;
+	return ha->host->active_mode == MODE_TARGET;
 }
 
 static inline bool qla_ini_mode_enabled(struct scsi_qla_host *ha)
 {
-	return ha->host->active_mode & MODE_INITIATOR;
+	return ha->host->active_mode == MODE_INITIATOR;
+}
+
+static inline bool qla_dual_mode_enabled(struct scsi_qla_host *ha)
+{
+	return (ha->host->active_mode == MODE_DUAL);
 }
 
 static inline void qla_reverse_ini_mode(struct scsi_qla_host *ha)

From ead038556f646788e22ad7f0398556d10981ca5f Mon Sep 17 00:00:00 2001
From: Quinn Tran <quinn.tran@cavium.com>
Date: Thu, 19 Jan 2017 22:28:01 -0800
Subject: [PATCH 0068/1911] qla2xxx: Add Dual mode support in the driver

Add switch to allow both Initiator Mode & Target
mode to operate at the same time.

Signed-off-by: Quinn Tran <quinn.tran@cavium.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/qla2xxx/qla_def.h     |   1 +
 drivers/scsi/qla2xxx/qla_init.c    |   6 +-
 drivers/scsi/qla2xxx/qla_isr.c     |  19 ++++-
 drivers/scsi/qla2xxx/qla_target.c  | 110 ++++++++++++++++++++++++-----
 drivers/scsi/qla2xxx/qla_target.h  |   3 +
 drivers/scsi/qla2xxx/tcm_qla2xxx.c |   2 +-
 6 files changed, 119 insertions(+), 22 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index 79b4e88db3b136..f67a1c82502ccd 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -3389,6 +3389,7 @@ struct qla_hw_data {
 #define FLOGI_SP_SUPPORT        BIT_13
 
 	uint8_t		port_no;		/* Physical port of adapter */
+	uint8_t		exch_starvation;
 
 	/* Timeout timers. */
 	uint8_t 	loop_down_abort_time;    /* port down timer */
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index 8b7c0468a0e0c6..4c0a2d8aa96465 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -1568,8 +1568,7 @@ qla2x00_initialize_adapter(scsi_qla_host_t *vha)
 		}
 	}
 
-	if (qla_ini_mode_enabled(vha) ||
-		qla_dual_mode_enabled(vha))
+	if (qla_ini_mode_enabled(vha) || qla_dual_mode_enabled(vha))
 		rval = qla2x00_init_rings(vha);
 
 	ha->flags.chip_reset_done = 1;
@@ -3994,7 +3993,8 @@ qla2x00_configure_loop(scsi_qla_host_t *vha)
 			 * Process any ATIO queue entries that came in
 			 * while we weren't online.
 			 */
-			if (qla_tgt_mode_enabled(vha)) {
+			if (qla_tgt_mode_enabled(vha) ||
+			    qla_dual_mode_enabled(vha)) {
 				if (IS_QLA27XX(ha) || IS_QLA83XX(ha)) {
 					spin_lock_irqsave(&ha->tgt.atio_lock,
 					    flags);
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index bb747d7ebdab30..a0a3412030cb64 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -1025,7 +1025,8 @@ qla2x00_async_event(scsi_qla_host_t *vha, struct rsp_que *rsp, uint16_t *mb)
 
 		qla2x00_mark_all_devices_lost(vha, 1);
 
-		if (vha->vp_idx == 0 && !qla_ini_mode_enabled(vha))
+		if (vha->vp_idx == 0 &&
+		    (qla_tgt_mode_enabled(vha) || qla_dual_mode_enabled(vha)))
 			set_bit(SCR_PENDING, &vha->dpc_flags);
 
 		set_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags);
@@ -1637,6 +1638,7 @@ qla24xx_logio_entry(scsi_qla_host_t *vha, struct req_que *req,
 		    fcport->d_id.b.area, fcport->d_id.b.al_pa,
 		    le32_to_cpu(logio->io_parameter[0]));
 
+		vha->hw->exch_starvation = 0;
 		data[0] = MBS_COMMAND_COMPLETE;
 		if (sp->type != SRB_LOGIN_CMD)
 			goto logio_done;
@@ -1672,6 +1674,21 @@ qla24xx_logio_entry(scsi_qla_host_t *vha, struct req_que *req,
 	case LSC_SCODE_NPORT_USED:
 		data[0] = MBS_LOOP_ID_USED;
 		break;
+	case LSC_SCODE_NOXCB:
+		vha->hw->exch_starvation++;
+		if (vha->hw->exch_starvation > 5) {
+			ql_log(ql_log_warn, vha, 0xffff,
+			    "Exchange starvation. Resetting RISC\n");
+
+			vha->hw->exch_starvation = 0;
+
+			if (IS_P3P_TYPE(vha->hw))
+				set_bit(FCOE_CTX_RESET_NEEDED, &vha->dpc_flags);
+			else
+				set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags);
+			qla2xxx_wake_dpc(vha);
+		}
+		/* drop through */
 	default:
 		data[0] = MBS_COMMAND_ERROR;
 		break;
diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index e4d7d32c82e74c..c8f312f8e0ffd5 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -55,8 +55,17 @@ MODULE_PARM_DESC(qlini_mode,
 	"disabled on enabling target mode and then on disabling target mode "
 	"enabled back; "
 	"\"disabled\" - initiator mode will never be enabled; "
+	"\"dual\" - Initiator Modes will be enabled. Target Mode can be activated "
+	"when ready "
 	"\"enabled\" (default) - initiator mode will always stay enabled.");
 
+static int ql_dm_tgt_ex_pct = 50;
+module_param(ql_dm_tgt_ex_pct, int, S_IRUGO|S_IWUSR);
+MODULE_PARM_DESC(ql_dm_tgt_ex_pct,
+	"For Dual Mode (qlini_mode=dual), this parameter determines "
+	"the percentage of exchanges/cmds FW will allocate resources "
+	"for Target mode.");
+
 int ql2x_ini_mode = QLA2XXX_INI_MODE_EXCLUSIVE;
 
 static int temp_sam_status = SAM_STAT_BUSY;
@@ -1295,7 +1304,8 @@ int qlt_stop_phase1(struct qla_tgt *tgt)
 	wait_event(tgt->waitQ, test_tgt_sess_count(tgt));
 
 	/* Big hammer */
-	if (!ha->flags.host_shutting_down && qla_tgt_mode_enabled(vha))
+	if (!ha->flags.host_shutting_down &&
+	    (qla_tgt_mode_enabled(vha) || qla_dual_mode_enabled(vha)))
 		qlt_disable_vha(vha);
 
 	/* Wait for sessions to clear out (just in case) */
@@ -5266,6 +5276,32 @@ void qlt_async_event(uint16_t code, struct scsi_qla_host *vha,
 		    le16_to_cpu(mailbox[2]), le16_to_cpu(mailbox[3]));
 		break;
 
+	case MBA_REJECTED_FCP_CMD:
+		ql_dbg(ql_dbg_tgt_mgt, vha, 0xffff,
+			"qla_target(%d): Async event LS_REJECT occurred "
+			"(m[0]=%x, m[1]=%x, m[2]=%x, m[3]=%x)", vha->vp_idx,
+			le16_to_cpu(mailbox[0]), le16_to_cpu(mailbox[1]),
+			le16_to_cpu(mailbox[2]), le16_to_cpu(mailbox[3]));
+
+		if (le16_to_cpu(mailbox[3]) == 1) {
+			/* exchange starvation. */
+			vha->hw->exch_starvation++;
+			if (vha->hw->exch_starvation > 5) {
+				ql_log(ql_log_warn, vha, 0xffff,
+				    "Exchange starvation-. Resetting RISC\n");
+
+				vha->hw->exch_starvation = 0;
+				if (IS_P3P_TYPE(vha->hw))
+					set_bit(FCOE_CTX_RESET_NEEDED,
+					    &vha->dpc_flags);
+				else
+					set_bit(ISP_ABORT_NEEDED,
+					    &vha->dpc_flags);
+				qla2xxx_wake_dpc(vha);
+			}
+		}
+		break;
+
 	case MBA_PORT_UPDATE:
 		ql_dbg(ql_dbg_tgt_mgt, vha, 0xf03d,
 		    "qla_target(%d): Port update async event %#x "
@@ -5275,10 +5311,11 @@ void qlt_async_event(uint16_t code, struct scsi_qla_host *vha,
 		    le16_to_cpu(mailbox[2]), le16_to_cpu(mailbox[3]));
 
 		login_code = le16_to_cpu(mailbox[2]);
-		if (login_code == 0x4)
+		if (login_code == 0x4) {
 			ql_dbg(ql_dbg_tgt_mgt, vha, 0xf03e,
 			    "Async MB 2: Got PLOGI Complete\n");
-		else if (login_code == 0x7)
+			vha->hw->exch_starvation = 0;
+		} else if (login_code == 0x7)
 			ql_dbg(ql_dbg_tgt_mgt, vha, 0xf03f,
 			    "Async MB 2: Port Logged Out\n");
 		break;
@@ -5829,7 +5866,10 @@ static void qlt_set_mode(struct scsi_qla_host *vha)
 		vha->host->active_mode = MODE_TARGET;
 		break;
 	case QLA2XXX_INI_MODE_ENABLED:
-		vha->host->active_mode |= MODE_TARGET;
+		vha->host->active_mode = MODE_UNKNOWN;
+		break;
+	case QLA2XXX_INI_MODE_DUAL:
+		vha->host->active_mode = MODE_DUAL;
 		break;
 	default:
 		break;
@@ -5852,8 +5892,8 @@ static void qlt_clear_mode(struct scsi_qla_host *vha)
 		vha->host->active_mode = MODE_INITIATOR;
 		break;
 	case QLA2XXX_INI_MODE_ENABLED:
-		vha->host->active_mode &= ~MODE_TARGET;
-		vha->host->active_mode |= MODE_INITIATOR;
+	case QLA2XXX_INI_MODE_DUAL:
+		vha->host->active_mode = MODE_INITIATOR;
 		break;
 	default:
 		break;
@@ -5948,9 +5988,6 @@ static void qlt_disable_vha(struct scsi_qla_host *vha)
 void
 qlt_vport_create(struct scsi_qla_host *vha, struct qla_hw_data *ha)
 {
-	if (!qla_tgt_mode_enabled(vha))
-		return;
-
 	vha->vha_tgt.qla_tgt = NULL;
 
 	mutex_init(&vha->vha_tgt.tgt_mutex);
@@ -5981,7 +6018,6 @@ qlt_rff_id(struct scsi_qla_host *vha, struct ct_sns_req *ct_req)
 		ct_req->req.rff_id.fc4_feature = BIT_1;
 	} else if (qla_dual_mode_enabled(vha))
 		ct_req->req.rff_id.fc4_feature = BIT_0 | BIT_1;
-
 }
 
 /*
@@ -6000,7 +6036,7 @@ qlt_init_atio_q_entries(struct scsi_qla_host *vha)
 	uint16_t cnt;
 	struct atio_from_isp *pkt = (struct atio_from_isp *)ha->tgt.atio_ring;
 
-	if (!qla_tgt_mode_enabled(vha))
+	if (qla_ini_mode_enabled(vha))
 		return;
 
 	for (cnt = 0; cnt < ha->tgt.atio_q_length; cnt++) {
@@ -6093,8 +6129,10 @@ void
 qlt_24xx_config_nvram_stage1(struct scsi_qla_host *vha, struct nvram_24xx *nv)
 {
 	struct qla_hw_data *ha = vha->hw;
+	u32 tmp;
+	u16 t;
 
-	if (qla_tgt_mode_enabled(vha)) {
+	if (qla_tgt_mode_enabled(vha) || qla_dual_mode_enabled(vha)) {
 		if (!ha->tgt.saved_set) {
 			/* We save only once */
 			ha->tgt.saved_exchange_count = nv->exchange_count;
@@ -6107,7 +6145,24 @@ qlt_24xx_config_nvram_stage1(struct scsi_qla_host *vha, struct nvram_24xx *nv)
 			ha->tgt.saved_set = 1;
 		}
 
-		nv->exchange_count = cpu_to_le16(0xFFFF);
+		if (qla_tgt_mode_enabled(vha)) {
+			nv->exchange_count = cpu_to_le16(0xFFFF);
+		} else {			/* dual */
+			if (ql_dm_tgt_ex_pct > 100) {
+				ql_dm_tgt_ex_pct = 50;
+			} else if (ql_dm_tgt_ex_pct == 100) {
+				/* leave some for FW */
+				ql_dm_tgt_ex_pct = 95;
+			}
+
+			tmp = ha->orig_fw_xcb_count * ql_dm_tgt_ex_pct;
+			tmp = tmp/100;
+			if (tmp > 0xffff)
+				tmp = 0xffff;
+
+			t = tmp & 0xffff;
+			nv->exchange_count = cpu_to_le16(t);
+		}
 
 		/* Enable target mode */
 		nv->firmware_options_1 |= cpu_to_le32(BIT_4);
@@ -6192,11 +6247,13 @@ void
 qlt_81xx_config_nvram_stage1(struct scsi_qla_host *vha, struct nvram_81xx *nv)
 {
 	struct qla_hw_data *ha = vha->hw;
+	u32 tmp;
+	u16 t;
 
 	if (!QLA_TGT_MODE_ENABLED())
 		return;
 
-	if (qla_tgt_mode_enabled(vha)) {
+	if (qla_tgt_mode_enabled(vha) || qla_dual_mode_enabled(vha)) {
 		if (!ha->tgt.saved_set) {
 			/* We save only once */
 			ha->tgt.saved_exchange_count = nv->exchange_count;
@@ -6209,7 +6266,23 @@ qlt_81xx_config_nvram_stage1(struct scsi_qla_host *vha, struct nvram_81xx *nv)
 			ha->tgt.saved_set = 1;
 		}
 
-		nv->exchange_count = cpu_to_le16(0xFFFF);
+		if (qla_tgt_mode_enabled(vha)) {
+			nv->exchange_count = cpu_to_le16(0xFFFF);
+		} else {			/* dual */
+			if (ql_dm_tgt_ex_pct > 100) {
+				ql_dm_tgt_ex_pct = 50;
+			} else if (ql_dm_tgt_ex_pct == 100) {
+				/* leave some for FW */
+				ql_dm_tgt_ex_pct = 95;
+			}
+
+			tmp = ha->orig_fw_xcb_count * ql_dm_tgt_ex_pct;
+			tmp = tmp/100;
+			if (tmp > 0xffff)
+				tmp = 0xffff;
+			t = tmp & 0xffff;
+			nv->exchange_count = cpu_to_le16(t);
+		}
 
 		/* Enable target mode */
 		nv->firmware_options_1 |= cpu_to_le32(BIT_4);
@@ -6319,10 +6392,11 @@ void
 qlt_modify_vp_config(struct scsi_qla_host *vha,
 	struct vp_config_entry_24xx *vpmod)
 {
-	if (qla_tgt_mode_enabled(vha))
+	/* enable target mode.  Bit5 = 1 => disable */
+	if (qla_tgt_mode_enabled(vha) || qla_dual_mode_enabled(vha))
 		vpmod->options_idx1 &= ~BIT_5;
 
-	/* Disable ini mode, if requested. */
+	/* Disable ini mode, if requested.  bit4 = 1 => disable */
 	if (qla_tgt_mode_enabled(vha))
 		vpmod->options_idx1 &= ~BIT_4;
 }
@@ -6477,6 +6551,8 @@ static int __init qlt_parse_ini_mode(void)
 		ql2x_ini_mode = QLA2XXX_INI_MODE_DISABLED;
 	else if (strcasecmp(qlini_mode, QLA2XXX_INI_MODE_STR_ENABLED) == 0)
 		ql2x_ini_mode = QLA2XXX_INI_MODE_ENABLED;
+	else if (strcasecmp(qlini_mode, QLA2XXX_INI_MODE_STR_DUAL) == 0)
+		ql2x_ini_mode = QLA2XXX_INI_MODE_DUAL;
 	else
 		return false;
 
diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h
index 5e56192a487724..ac86b379a26977 100644
--- a/drivers/scsi/qla2xxx/qla_target.h
+++ b/drivers/scsi/qla2xxx/qla_target.h
@@ -45,10 +45,12 @@
 #define QLA2XXX_INI_MODE_STR_EXCLUSIVE	"exclusive"
 #define QLA2XXX_INI_MODE_STR_DISABLED	"disabled"
 #define QLA2XXX_INI_MODE_STR_ENABLED	"enabled"
+#define QLA2XXX_INI_MODE_STR_DUAL		"dual"
 
 #define QLA2XXX_INI_MODE_EXCLUSIVE	0
 #define QLA2XXX_INI_MODE_DISABLED	1
 #define QLA2XXX_INI_MODE_ENABLED	2
+#define QLA2XXX_INI_MODE_DUAL	3
 
 #define QLA2XXX_COMMAND_COUNT_INIT	250
 #define QLA2XXX_IMMED_NOTIFY_COUNT_INIT 250
@@ -975,6 +977,7 @@ extern void qlt_update_vp_map(struct scsi_qla_host *, int);
  * is not set. Right now, ha value is ignored.
  */
 #define QLA_TGT_MODE_ENABLED() (ql2x_ini_mode != QLA2XXX_INI_MODE_ENABLED)
+
 extern int ql2x_ini_mode;
 
 static inline bool qla_tgt_mode_enabled(struct scsi_qla_host *ha)
diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
index e37d7ee9547324..8b878a2d730403 100644
--- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c
+++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
@@ -1744,7 +1744,7 @@ static int tcm_qla2xxx_lport_register_npiv_cb(struct scsi_qla_host *base_vha,
 			(struct tcm_qla2xxx_lport *)base_vha->vha_tgt.target_lport_ptr;
 	struct fc_vport_identifiers vport_id;
 
-	if (!qla_tgt_mode_enabled(base_vha)) {
+	if (qla_ini_mode_enabled(base_vha)) {
 		pr_err("qla2xxx base_vha not enabled for target mode\n");
 		return -EPERM;
 	}

From 0ca55938497412f9ffdbcb5cb7c4f8d6d28356ee Mon Sep 17 00:00:00 2001
From: Himanshu Madhani <himanshu.madhani@cavium.com>
Date: Thu, 19 Jan 2017 22:28:02 -0800
Subject: [PATCH 0069/1911] qla2xxx: Remove unused reverse_ini_mode

With support for dual mode in the driver, this mode becomes
dead code. Remove reverse_ini_mode from code.

Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/qla2xxx/qla_def.h    |  1 -
 drivers/scsi/qla2xxx/qla_target.c | 10 ----------
 drivers/scsi/qla2xxx/qla_target.h |  8 --------
 3 files changed, 19 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index f67a1c82502ccd..4ff30d136924a6 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -3237,7 +3237,6 @@ struct qlt_hw_data {
 	/* Protected by hw lock */
 	uint32_t enable_class_2:1;
 	uint32_t enable_explicit_conf:1;
-	uint32_t ini_mode_force_reverse:1;
 	uint32_t node_name_set:1;
 
 	dma_addr_t atio_dma;	/* Physical address. */
diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index c8f312f8e0ffd5..6571869c676a58 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -5858,8 +5858,6 @@ EXPORT_SYMBOL(qlt_lport_deregister);
 /* Must be called under HW lock */
 static void qlt_set_mode(struct scsi_qla_host *vha)
 {
-	struct qla_hw_data *ha = vha->hw;
-
 	switch (ql2x_ini_mode) {
 	case QLA2XXX_INI_MODE_DISABLED:
 	case QLA2XXX_INI_MODE_EXCLUSIVE:
@@ -5874,16 +5872,11 @@ static void qlt_set_mode(struct scsi_qla_host *vha)
 	default:
 		break;
 	}
-
-	if (ha->tgt.ini_mode_force_reverse)
-		qla_reverse_ini_mode(vha);
 }
 
 /* Must be called under HW lock */
 static void qlt_clear_mode(struct scsi_qla_host *vha)
 {
-	struct qla_hw_data *ha = vha->hw;
-
 	switch (ql2x_ini_mode) {
 	case QLA2XXX_INI_MODE_DISABLED:
 		vha->host->active_mode = MODE_UNKNOWN;
@@ -5898,9 +5891,6 @@ static void qlt_clear_mode(struct scsi_qla_host *vha)
 	default:
 		break;
 	}
-
-	if (ha->tgt.ini_mode_force_reverse)
-		qla_reverse_ini_mode(vha);
 }
 
 /*
diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h
index ac86b379a26977..a7f90dcaae37d3 100644
--- a/drivers/scsi/qla2xxx/qla_target.h
+++ b/drivers/scsi/qla2xxx/qla_target.h
@@ -995,14 +995,6 @@ static inline bool qla_dual_mode_enabled(struct scsi_qla_host *ha)
 	return (ha->host->active_mode == MODE_DUAL);
 }
 
-static inline void qla_reverse_ini_mode(struct scsi_qla_host *ha)
-{
-	if (ha->host->active_mode & MODE_INITIATOR)
-		ha->host->active_mode &= ~MODE_INITIATOR;
-	else
-		ha->host->active_mode |= MODE_INITIATOR;
-}
-
 static inline uint32_t sid_to_key(const uint8_t *s_id)
 {
 	uint32_t key;

From 41dc529a4602ac737020f423f84686a81de38e6d Mon Sep 17 00:00:00 2001
From: Quinn Tran <quinn.tran@cavium.com>
Date: Thu, 19 Jan 2017 22:28:03 -0800
Subject: [PATCH 0070/1911] qla2xxx: Improve RSCN handling in driver

Current code blindly does State Change Registration when
the link is up. Move SCR behind fabric scan, so that arbitrated
loop scan would not get erroneous error message.

Some of the other improvements are as follows

- Add session deletion for TPRLO and send acknowledgment for TPRLO.
- Enable FW option to move ABTS, RIDA & PUREX from RSPQ to ATIOQ.
- Save NPort ID early in link init.
- Move ABTS & RIDA to ATIOQ helps in keeping command ordering and
  link up sequence ordering.
- Save Nport ID and update VP map so that SCSI CMD/ATIO won't be dropped.
- fcport alloc does the initializes memory to zero. Remove memset to
  zero since It might corrupt link list.
- Turn off Registration for State Change MB in loop mode.

Signed-off-by: Quinn Tran <quinn.tran@cavium.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/qla2xxx/qla_def.h    |  11 +-
 drivers/scsi/qla2xxx/qla_fw.h     |  75 ++++++++--
 drivers/scsi/qla2xxx/qla_gbl.h    |   2 +
 drivers/scsi/qla2xxx/qla_gs.c     |   4 +-
 drivers/scsi/qla2xxx/qla_init.c   | 150 +++++++++++++++----
 drivers/scsi/qla2xxx/qla_isr.c    |   6 +-
 drivers/scsi/qla2xxx/qla_mbx.c    | 144 +++++++++++-------
 drivers/scsi/qla2xxx/qla_os.c     |  48 +++---
 drivers/scsi/qla2xxx/qla_target.c | 238 +++++++++++++++++++++++++++---
 9 files changed, 533 insertions(+), 145 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index 4ff30d136924a6..3881790e80da5a 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -2226,6 +2226,13 @@ enum fcport_mgt_event {
 	FCME_DELETE_DONE,
 };
 
+enum rscn_addr_format {
+	RSCN_PORT_ADDR,
+	RSCN_AREA_ADDR,
+	RSCN_DOM_ADDR,
+	RSCN_FAB_ADDR,
+};
+
 /*
  * Fibre channel port structure.
  */
@@ -3956,7 +3963,7 @@ typedef struct scsi_qla_host {
 #define FCOE_CTX_RESET_NEEDED	18	/* Initiate FCoE context reset */
 #define MPI_RESET_NEEDED	19	/* Initiate MPI FW reset */
 #define ISP_QUIESCE_NEEDED	20	/* Driver need some quiescence */
-#define SCR_PENDING		21	/* SCR in target mode */
+#define FREE_BIT 21
 #define PORT_UPDATE_NEEDED	22
 #define FX00_RESET_RECOVERY	23
 #define FX00_TARGET_SCAN	24
@@ -4010,7 +4017,9 @@ typedef struct scsi_qla_host {
 	/* list of commands waiting on workqueue */
 	struct list_head	qla_cmd_list;
 	struct list_head	qla_sess_op_cmd_list;
+	struct list_head	unknown_atio_list;
 	spinlock_t		cmd_list_lock;
+	struct delayed_work	unknown_atio_work;
 
 	/* Counter to detect races between ELS and RSCN events */
 	atomic_t		generation_tick;
diff --git a/drivers/scsi/qla2xxx/qla_fw.h b/drivers/scsi/qla2xxx/qla_fw.h
index ee135cf96aee59..1f808928763b4e 100644
--- a/drivers/scsi/qla2xxx/qla_fw.h
+++ b/drivers/scsi/qla2xxx/qla_fw.h
@@ -1301,27 +1301,76 @@ struct vp_config_entry_24xx {
 };
 
 #define VP_RPT_ID_IOCB_TYPE	0x32	/* Report ID Acquisition entry. */
+enum VP_STATUS {
+	VP_STAT_COMPL,
+	VP_STAT_FAIL,
+	VP_STAT_ID_CHG,
+	VP_STAT_SNS_TO,				/* timeout */
+	VP_STAT_SNS_RJT,
+	VP_STAT_SCR_TO,				/* timeout */
+	VP_STAT_SCR_RJT,
+};
+
+enum VP_FLAGS {
+	VP_FLAGS_CON_FLOOP = 1,
+	VP_FLAGS_CON_P2P = 2,
+	VP_FLAGS_CON_FABRIC = 3,
+	VP_FLAGS_NAME_VALID = BIT_5,
+};
+
 struct vp_rpt_id_entry_24xx {
 	uint8_t entry_type;		/* Entry type. */
 	uint8_t entry_count;		/* Entry count. */
 	uint8_t sys_define;		/* System defined. */
 	uint8_t entry_status;		/* Entry Status. */
-
-	uint32_t handle;		/* System handle. */
-
-	uint16_t vp_count;		/* Format 0 -- | VP setup | VP acq |. */
-					/* Format 1 -- | VP count |. */
-	uint16_t vp_idx;		/* Format 0 -- Reserved. */
-					/* Format 1 -- VP status and index. */
+	uint32_t resv1;
+	uint8_t vp_acquired;
+	uint8_t vp_setup;
+	uint8_t vp_idx;		/* Format 0=reserved */
+	uint8_t vp_status;	/* Format 0=reserved */
 
 	uint8_t port_id[3];
 	uint8_t format;
-
-	uint8_t vp_idx_map[16];
-
-	uint8_t reserved_4[24];
-	uint16_t bbcr;
-	uint8_t reserved_5[6];
+	union {
+		struct {
+			/* format 0 loop */
+			uint8_t vp_idx_map[16];
+			uint8_t reserved_4[32];
+		} f0;
+		struct {
+			/* format 1 fabric */
+			uint8_t vpstat1_subcode; /* vp_status=1 subcode */
+			uint8_t flags;
+			uint16_t fip_flags;
+			uint8_t rsv2[12];
+
+			uint8_t ls_rjt_vendor;
+			uint8_t ls_rjt_explanation;
+			uint8_t ls_rjt_reason;
+			uint8_t rsv3[5];
+
+			uint8_t port_name[8];
+			uint8_t node_name[8];
+			uint16_t bbcr;
+			uint8_t reserved_5[6];
+		} f1;
+		struct { /* format 2: N2N direct connect */
+		    uint8_t vpstat1_subcode;
+		    uint8_t flags;
+		    uint16_t rsv6;
+		    uint8_t rsv2[12];
+
+		    uint8_t ls_rjt_vendor;
+		    uint8_t ls_rjt_explanation;
+		    uint8_t ls_rjt_reason;
+		    uint8_t rsv3[5];
+
+		    uint8_t port_name[8];
+		    uint8_t node_name[8];
+		    uint32_t remote_nport_id;
+		    uint32_t reserved_5;
+		} f2;
+	} u;
 };
 
 #define VF_EVFP_IOCB_TYPE       0x26    /* Exchange Virtual Fabric Parameters entry. */
diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h
index dd95ff620ae3c4..e0914575aab33f 100644
--- a/drivers/scsi/qla2xxx/qla_gbl.h
+++ b/drivers/scsi/qla2xxx/qla_gbl.h
@@ -138,6 +138,7 @@ extern int ql2xmdenable;
 extern int ql2xexlogins;
 extern int ql2xexchoffld;
 extern int ql2xfwholdabts;
+extern int ql2xmvasynctoatio;
 
 extern int qla2x00_loop_reset(scsi_qla_host_t *);
 extern void qla2x00_abort_all_cmds(scsi_qla_host_t *, int);
@@ -844,5 +845,6 @@ extern void qlt_schedule_sess_for_deletion_lock(struct fc_port *);
 extern struct fc_port *qlt_find_sess_invalidate_other(scsi_qla_host_t *,
 	uint64_t wwn, port_id_t port_id, uint16_t loop_id, struct fc_port **);
 void qla24xx_delete_sess_fn(struct work_struct *);
+void qlt_unknown_atio_work_fn(struct work_struct *);
 
 #endif /* _QLA_GBL_H */
diff --git a/drivers/scsi/qla2xxx/qla_gs.c b/drivers/scsi/qla2xxx/qla_gs.c
index 8f7054dc742b4b..d1074fb0fff884 100644
--- a/drivers/scsi/qla2xxx/qla_gs.c
+++ b/drivers/scsi/qla2xxx/qla_gs.c
@@ -2914,8 +2914,10 @@ int qla24xx_async_gidpn(scsi_qla_host_t *vha, fc_port_t *fcport)
 int qla24xx_post_gidpn_work(struct scsi_qla_host *vha, fc_port_t *fcport)
 {
 	struct qla_work_evt *e;
+	int ls;
 
-	if ((atomic_read(&vha->loop_state) != LOOP_READY) ||
+	ls = atomic_read(&vha->loop_state);
+	if (((ls != LOOP_READY) && (ls != LOOP_UP)) ||
 		test_bit(UNLOADING, &vha->dpc_flags))
 		return 0;
 
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index 4c0a2d8aa96465..68430934fa8d61 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -1071,10 +1071,10 @@ void qla24xx_handle_relogin_event(scsi_qla_host_t *vha,
 	qla24xx_fcport_handle_login(vha, fcport);
 }
 
-void qla2x00_fcport_event_handler(scsi_qla_host_t *vha,
-	struct event_arg *ea)
+void qla2x00_fcport_event_handler(scsi_qla_host_t *vha, struct event_arg *ea)
 {
-	fc_port_t *fcport;
+	fc_port_t *fcport, *f, *tf;
+	uint32_t id = 0, mask, rid;
 	int rc;
 
 	switch (ea->event) {
@@ -1087,20 +1087,55 @@ void qla2x00_fcport_event_handler(scsi_qla_host_t *vha,
 	case FCME_RSCN:
 		if (test_bit(UNLOADING, &vha->dpc_flags))
 			return;
+		switch (ea->id.b.rsvd_1) {
+		case RSCN_PORT_ADDR:
+			fcport = qla2x00_find_fcport_by_nportid(vha, &ea->id, 1);
+			if (!fcport) {
+				/* cable moved */
+				rc = qla24xx_post_gpnid_work(vha, &ea->id);
+				if (rc) {
+					ql_log(ql_log_warn, vha, 0xffff,
+						"RSCN GPNID work failed %02x%02x%02x\n",
+						ea->id.b.domain, ea->id.b.area,
+						ea->id.b.al_pa);
+				}
+			} else {
+				ea->fcport = fcport;
+				qla24xx_handle_rscn_event(fcport, ea);
+			}
+			break;
+		case RSCN_AREA_ADDR:
+		case RSCN_DOM_ADDR:
+			if (ea->id.b.rsvd_1 == RSCN_AREA_ADDR) {
+				mask = 0xffff00;
+				ql_log(ql_dbg_async, vha, 0xffff,
+					   "RSCN: Area 0x%06x was affected\n",
+					   ea->id.b24);
+			} else {
+				mask = 0xff0000;
+				ql_log(ql_dbg_async, vha, 0xffff,
+					   "RSCN: Domain 0x%06x was affected\n",
+					   ea->id.b24);
+			}
 
-		fcport = qla2x00_find_fcport_by_nportid(vha, &ea->id, 1);
-		if (!fcport) {
-			/* cable moved */
-			rc = qla24xx_post_gpnid_work(vha, &ea->id);
-			if (rc) {
-				ql_log(ql_log_warn, vha, 0xffff,
-					"RSCN GPNID work failed %02x%02x%02x\n",
-					ea->id.b.domain, ea->id.b.area,
-					ea->id.b.al_pa);
+			rid = ea->id.b24 & mask;
+			list_for_each_entry_safe(f, tf, &vha->vp_fcports,
+			    list) {
+				id = f->d_id.b24 & mask;
+				if (rid == id) {
+					ea->fcport = f;
+					qla24xx_handle_rscn_event(f, ea);
+				}
 			}
-		} else {
-			ea->fcport = fcport;
-			qla24xx_handle_rscn_event(fcport, ea);
+			break;
+		case RSCN_FAB_ADDR:
+		default:
+			ql_log(ql_log_warn, vha, 0xffff,
+				"RSCN: Fabric was affected. Addr format %d\n",
+				ea->id.b.rsvd_1);
+			qla2x00_mark_all_devices_lost(vha, 1);
+			set_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags);
+			set_bit(LOCAL_LOOP_UPDATE, &vha->dpc_flags);
 		}
 		break;
 	case FCME_GIDPN_DONE:
@@ -2947,6 +2982,21 @@ qla24xx_update_fw_options(scsi_qla_host_t *vha)
 			__func__, ha->fw_options[2]);
 	}
 
+	/* Move PUREX, ABTS RX & RIDA to ATIOQ */
+	if (ql2xmvasynctoatio) {
+		if (qla_tgt_mode_enabled(vha) ||
+		    qla_dual_mode_enabled(vha))
+			ha->fw_options[2] |= BIT_11;
+		else
+			ha->fw_options[2] &= ~BIT_11;
+	}
+
+	ql_dbg(ql_dbg_init, vha, 0xffff,
+		"%s, add FW options 1-3 = 0x%04x 0x%04x 0x%04x mode %x\n",
+		__func__, ha->fw_options[1], ha->fw_options[2],
+		ha->fw_options[3], vha->host->active_mode);
+	qla2x00_set_fw_options(vha, ha->fw_options);
+
 	/* Update Serial Link options. */
 	if ((le16_to_cpu(ha->fw_seriallink_options24[0]) & BIT_0) == 0)
 		return;
@@ -3953,10 +4003,11 @@ qla2x00_configure_loop(scsi_qla_host_t *vha)
 
 	} else if (ha->current_topology == ISP_CFG_N) {
 		clear_bit(RSCN_UPDATE, &flags);
-
+	} else if (ha->current_topology == ISP_CFG_NL) {
+		clear_bit(RSCN_UPDATE, &flags);
+		set_bit(LOCAL_LOOP_UPDATE, &flags);
 	} else if (!vha->flags.online ||
 	    (test_bit(ABORT_ISP_ACTIVE, &flags))) {
-
 		set_bit(RSCN_UPDATE, &flags);
 		set_bit(LOCAL_LOOP_UPDATE, &flags);
 	}
@@ -4058,6 +4109,7 @@ qla2x00_configure_local_loop(scsi_qla_host_t *vha)
 	uint16_t	loop_id;
 	uint8_t		domain, area, al_pa;
 	struct qla_hw_data *ha = vha->hw;
+	unsigned long flags;
 
 	found_devs = 0;
 	new_fcport = NULL;
@@ -4098,7 +4150,7 @@ qla2x00_configure_local_loop(scsi_qla_host_t *vha)
 			    "Marking port lost loop_id=0x%04x.\n",
 			    fcport->loop_id);
 
-			qla2x00_set_fcport_state(fcport, FCS_DEVICE_LOST);
+			qla2x00_mark_device_lost(vha, fcport, 0, 0);
 		}
 	}
 
@@ -4129,13 +4181,14 @@ qla2x00_configure_local_loop(scsi_qla_host_t *vha)
 		if (loop_id > LAST_LOCAL_LOOP_ID)
 			continue;
 
-		memset(new_fcport, 0, sizeof(fc_port_t));
+		memset(new_fcport->port_name, 0, WWN_SIZE);
 
 		/* Fill in member data. */
 		new_fcport->d_id.b.domain = domain;
 		new_fcport->d_id.b.area = area;
 		new_fcport->d_id.b.al_pa = al_pa;
 		new_fcport->loop_id = loop_id;
+
 		rval2 = qla2x00_get_port_database(vha, new_fcport, 0);
 		if (rval2 != QLA_SUCCESS) {
 			ql_dbg(ql_dbg_disc, vha, 0x201a,
@@ -4148,6 +4201,7 @@ qla2x00_configure_local_loop(scsi_qla_host_t *vha)
 			continue;
 		}
 
+		spin_lock_irqsave(&vha->hw->tgt.sess_lock, flags);
 		/* Check for matching device in port list. */
 		found = 0;
 		fcport = NULL;
@@ -4163,6 +4217,12 @@ qla2x00_configure_local_loop(scsi_qla_host_t *vha)
 			memcpy(fcport->node_name, new_fcport->node_name,
 			    WWN_SIZE);
 
+			if (!fcport->login_succ) {
+				vha->fcport_count++;
+				fcport->login_succ = 1;
+				fcport->disc_state = DSC_LOGIN_COMPLETE;
+			}
+
 			found++;
 			break;
 		}
@@ -4173,16 +4233,28 @@ qla2x00_configure_local_loop(scsi_qla_host_t *vha)
 
 			/* Allocate a new replacement fcport. */
 			fcport = new_fcport;
+			if (!fcport->login_succ) {
+				vha->fcport_count++;
+				fcport->login_succ = 1;
+				fcport->disc_state = DSC_LOGIN_COMPLETE;
+			}
+
+			spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
+
 			new_fcport = qla2x00_alloc_fcport(vha, GFP_KERNEL);
+
 			if (new_fcport == NULL) {
 				ql_log(ql_log_warn, vha, 0x201c,
 				    "Failed to allocate memory for fcport.\n");
 				rval = QLA_MEMORY_ALLOC_FAILED;
 				goto cleanup_allocation;
 			}
+			spin_lock_irqsave(&vha->hw->tgt.sess_lock, flags);
 			new_fcport->flags &= ~FCF_FABRIC_DEVICE;
 		}
 
+		spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
+
 		/* Base iIDMA settings on HBA port speed. */
 		fcport->fp_speed = ha->link_data_rate;
 
@@ -4371,6 +4443,16 @@ qla2x00_configure_fabric(scsi_qla_host_t *vha)
 	}
 	vha->device_flags |= SWITCH_FOUND;
 
+
+	if (qla_tgt_mode_enabled(vha) || qla_dual_mode_enabled(vha)) {
+		rval = qla2x00_send_change_request(vha, 0x3, 0);
+		if (rval != QLA_SUCCESS)
+			ql_log(ql_log_warn, vha, 0x121,
+				"Failed to enable receiving of RSCN requests: 0x%x.\n",
+				rval);
+	}
+
+
 	do {
 		qla2x00_mgmt_svr_login(vha);
 
@@ -6116,6 +6198,7 @@ uint8_t qla27xx_find_valid_image(struct scsi_qla_host *vha)
 
 	for (chksum = 0; cnt--; wptr++)
 		chksum += le32_to_cpu(*wptr);
+
 	if (chksum) {
 		ql_dbg(ql_dbg_init, vha, 0x018c,
 		    "Checksum validation failed for primary image (0x%x)\n",
@@ -7128,6 +7211,10 @@ qla81xx_nvram_config(scsi_qla_host_t *vha)
 		vha->flags.process_response_queue = 1;
 	}
 
+	 /* enable RIDA Format2 */
+	if (qla_tgt_mode_enabled(vha) || qla_dual_mode_enabled(vha))
+		icb->firmware_options_3 |= BIT_0;
+
 	if (rval) {
 		ql_log(ql_log_warn, vha, 0x0076,
 		    "NVRAM configuration failed.\n");
@@ -7252,13 +7339,26 @@ qla81xx_update_fw_options(scsi_qla_host_t *vha)
 			__func__, ha->fw_options[2]);
 	}
 
-	if (!ql2xetsenable)
-		goto out;
+	/* Move PUREX, ABTS RX & RIDA to ATIOQ */
+	if (ql2xmvasynctoatio) {
+		if (qla_tgt_mode_enabled(vha) ||
+		    qla_dual_mode_enabled(vha))
+			ha->fw_options[2] |= BIT_11;
+		else
+			ha->fw_options[2] &= ~BIT_11;
+	}
+
+	if (ql2xetsenable) {
+		/* Enable ETS Burst. */
+		memset(ha->fw_options, 0, sizeof(ha->fw_options));
+		ha->fw_options[2] |= BIT_9;
+	}
+
+	ql_dbg(ql_dbg_init, vha, 0xffff,
+		"%s, add FW options 1-3 = 0x%04x 0x%04x 0x%04x mode %x\n",
+		__func__, ha->fw_options[1], ha->fw_options[2],
+		ha->fw_options[3], vha->host->active_mode);
 
-	/* Enable ETS Burst. */
-	memset(ha->fw_options, 0, sizeof(ha->fw_options));
-	ha->fw_options[2] |= BIT_9;
-out:
 	qla2x00_set_fw_options(vha, ha->fw_options);
 }
 
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index a0a3412030cb64..d9577db7ddbba5 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -1025,10 +1025,6 @@ qla2x00_async_event(scsi_qla_host_t *vha, struct rsp_que *rsp, uint16_t *mb)
 
 		qla2x00_mark_all_devices_lost(vha, 1);
 
-		if (vha->vp_idx == 0 &&
-		    (qla_tgt_mode_enabled(vha) || qla_dual_mode_enabled(vha)))
-			set_bit(SCR_PENDING, &vha->dpc_flags);
-
 		set_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags);
 		set_bit(LOCAL_LOOP_UPDATE, &vha->dpc_flags);
 		set_bit(VP_CONFIG_OK, &vha->vp_flags);
@@ -1073,7 +1069,9 @@ qla2x00_async_event(scsi_qla_host_t *vha, struct rsp_que *rsp, uint16_t *mb)
 			memset(&ea, 0, sizeof(ea));
 			ea.event = FCME_RSCN;
 			ea.id.b24 = rscn_entry;
+			ea.id.b.rsvd_1 = rscn_entry >> 24;
 			qla2x00_fcport_event_handler(vha, &ea);
+			qla2x00_post_aen_work(vha, FCH_EVT_RSCN, rscn_entry);
 		}
 		break;
 	/* case MBA_RIO_RESPONSE: */
diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c
index 64f04aa2a503af..35079f41741799 100644
--- a/drivers/scsi/qla2xxx/qla_mbx.c
+++ b/drivers/scsi/qla2xxx/qla_mbx.c
@@ -3599,10 +3599,8 @@ void
 qla24xx_report_id_acquisition(scsi_qla_host_t *vha,
 	struct vp_rpt_id_entry_24xx *rptid_entry)
 {
-	uint8_t vp_idx;
-	uint16_t stat = le16_to_cpu(rptid_entry->vp_idx);
 	struct qla_hw_data *ha = vha->hw;
-	scsi_qla_host_t *vp;
+	scsi_qla_host_t *vp = NULL;
 	unsigned long   flags;
 	int found;
 
@@ -3613,80 +3611,124 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha,
 		return;
 
 	if (rptid_entry->format == 0) {
+		/* loop */
 		ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x10b7,
 		    "Format 0 : Number of VPs setup %d, number of "
-		    "VPs acquired %d.\n",
-		    MSB(le16_to_cpu(rptid_entry->vp_count)),
-		    LSB(le16_to_cpu(rptid_entry->vp_count)));
+		    "VPs acquired %d.\n", rptid_entry->vp_setup,
+		    rptid_entry->vp_acquired);
 		ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x10b8,
 		    "Primary port id %02x%02x%02x.\n",
 		    rptid_entry->port_id[2], rptid_entry->port_id[1],
 		    rptid_entry->port_id[0]);
+
+		vha->d_id.b.domain = rptid_entry->port_id[2];
+		vha->d_id.b.area = rptid_entry->port_id[1];
+		vha->d_id.b.al_pa = rptid_entry->port_id[0];
+
+		spin_lock_irqsave(&ha->vport_slock, flags);
+		qlt_update_vp_map(vha, SET_AL_PA);
+		spin_unlock_irqrestore(&ha->vport_slock, flags);
+
 	} else if (rptid_entry->format == 1) {
-		vp_idx = LSB(stat);
+		/* fabric */
 		ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x10b9,
 		    "Format 1: VP[%d] enabled - status %d - with "
-		    "port id %02x%02x%02x.\n", vp_idx, MSB(stat),
+		    "port id %02x%02x%02x.\n", rptid_entry->vp_idx,
+			rptid_entry->vp_status,
 		    rptid_entry->port_id[2], rptid_entry->port_id[1],
 		    rptid_entry->port_id[0]);
 
 		/* buffer to buffer credit flag */
-		vha->flags.bbcr_enable = (rptid_entry->bbcr & 0xf) != 0;
-
-		/* FA-WWN is only for physical port */
-		if (!vp_idx) {
-			void *wwpn = ha->init_cb->port_name;
+		vha->flags.bbcr_enable = (rptid_entry->u.f1.bbcr & 0xf) != 0;
+
+		if (rptid_entry->vp_idx == 0) {
+			if (rptid_entry->vp_status == VP_STAT_COMPL) {
+				/* FA-WWN is only for physical port */
+				if (qla_ini_mode_enabled(vha) &&
+				    ha->flags.fawwpn_enabled &&
+				    (rptid_entry->u.f1.flags &
+				     VP_FLAGS_NAME_VALID)) {
+					memcpy(vha->port_name,
+					    rptid_entry->u.f1.port_name,
+					    WWN_SIZE);
+				}
 
-			if (!MSB(stat)) {
-				if (rptid_entry->vp_idx_map[1] & BIT_6)
-					wwpn = rptid_entry->reserved_4 + 8;
+				vha->d_id.b.domain = rptid_entry->port_id[2];
+				vha->d_id.b.area = rptid_entry->port_id[1];
+				vha->d_id.b.al_pa = rptid_entry->port_id[0];
+				spin_lock_irqsave(&ha->vport_slock, flags);
+				qlt_update_vp_map(vha, SET_AL_PA);
+				spin_unlock_irqrestore(&ha->vport_slock, flags);
 			}
-			memcpy(vha->port_name, wwpn, WWN_SIZE);
+
 			fc_host_port_name(vha->host) =
 			    wwn_to_u64(vha->port_name);
-			ql_dbg(ql_dbg_mbx, vha, 0x1018,
-			    "FA-WWN portname %016llx (%x)\n",
-			    fc_host_port_name(vha->host), MSB(stat));
-		}
-
-		vp = vha;
-		if (vp_idx == 0)
-			goto reg_needed;
 
-		if (MSB(stat) != 0 && MSB(stat) != 2) {
-			ql_dbg(ql_dbg_mbx, vha, 0x10ba,
-			    "Could not acquire ID for VP[%d].\n", vp_idx);
-			return;
-		}
+			if (qla_ini_mode_enabled(vha))
+				ql_dbg(ql_dbg_mbx, vha, 0x1018,
+				    "FA-WWN portname %016llx (%x)\n",
+				    fc_host_port_name(vha->host),
+				    rptid_entry->vp_status);
 
-		found = 0;
-		spin_lock_irqsave(&ha->vport_slock, flags);
-		list_for_each_entry(vp, &ha->vp_list, list) {
-			if (vp_idx == vp->vp_idx) {
-				found = 1;
-				break;
+			set_bit(REGISTER_FC4_NEEDED, &vha->dpc_flags);
+			set_bit(REGISTER_FDMI_NEEDED, &vha->dpc_flags);
+		} else {
+			if (rptid_entry->vp_status != VP_STAT_COMPL &&
+				rptid_entry->vp_status != VP_STAT_ID_CHG) {
+				ql_dbg(ql_dbg_mbx, vha, 0x10ba,
+				    "Could not acquire ID for VP[%d].\n",
+				    rptid_entry->vp_idx);
+				return;
 			}
-		}
-		spin_unlock_irqrestore(&ha->vport_slock, flags);
 
-		if (!found)
-			return;
+			found = 0;
+			spin_lock_irqsave(&ha->vport_slock, flags);
+			list_for_each_entry(vp, &ha->vp_list, list) {
+				if (rptid_entry->vp_idx == vp->vp_idx) {
+					found = 1;
+					break;
+				}
+			}
+			spin_unlock_irqrestore(&ha->vport_slock, flags);
 
-		vp->d_id.b.domain = rptid_entry->port_id[2];
-		vp->d_id.b.area =  rptid_entry->port_id[1];
-		vp->d_id.b.al_pa = rptid_entry->port_id[0];
+			if (!found)
+				return;
 
-		/*
-		 * Cannot configure here as we are still sitting on the
-		 * response queue. Handle it in dpc context.
-		 */
-		set_bit(VP_IDX_ACQUIRED, &vp->vp_flags);
+			vp->d_id.b.domain = rptid_entry->port_id[2];
+			vp->d_id.b.area =  rptid_entry->port_id[1];
+			vp->d_id.b.al_pa = rptid_entry->port_id[0];
+			spin_lock_irqsave(&ha->vport_slock, flags);
+			qlt_update_vp_map(vp, SET_AL_PA);
+			spin_unlock_irqrestore(&ha->vport_slock, flags);
 
-reg_needed:
-		set_bit(REGISTER_FC4_NEEDED, &vp->dpc_flags);
-		set_bit(REGISTER_FDMI_NEEDED, &vp->dpc_flags);
+			/*
+			 * Cannot configure here as we are still sitting on the
+			 * response queue. Handle it in dpc context.
+			 */
+			set_bit(VP_IDX_ACQUIRED, &vp->vp_flags);
+			set_bit(REGISTER_FC4_NEEDED, &vp->dpc_flags);
+			set_bit(REGISTER_FDMI_NEEDED, &vp->dpc_flags);
+		}
 		set_bit(VP_DPC_NEEDED, &vha->dpc_flags);
 		qla2xxx_wake_dpc(vha);
+	} else if (rptid_entry->format == 2) {
+		ql_dbg(ql_dbg_async, vha, 0xffff,
+		    "RIDA: format 2/N2N Primary port id %02x%02x%02x.\n",
+		    rptid_entry->port_id[2], rptid_entry->port_id[1],
+		    rptid_entry->port_id[0]);
+
+		ql_dbg(ql_dbg_async, vha, 0xffff,
+		    "N2N: Remote WWPN %8phC.\n",
+		    rptid_entry->u.f2.port_name);
+
+		/* N2N.  direct connect */
+		vha->d_id.b.domain = rptid_entry->port_id[2];
+		vha->d_id.b.area = rptid_entry->port_id[1];
+		vha->d_id.b.al_pa = rptid_entry->port_id[0];
+
+		spin_lock_irqsave(&ha->vport_slock, flags);
+		qlt_update_vp_map(vha, SET_AL_PA);
+		spin_unlock_irqrestore(&ha->vport_slock, flags);
 	}
 }
 
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 9c4699623410d2..bdaa238ca0ab07 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -237,6 +237,13 @@ MODULE_PARM_DESC(ql2xfwholdabts,
 		"0 (Default) Do not set fw option. "
 		"1 - Set fw option to hold ABTS.");
 
+int ql2xmvasynctoatio = 1;
+module_param(ql2xmvasynctoatio, int, S_IRUGO|S_IWUSR);
+MODULE_PARM_DESC(ql2xmvasynctoatio,
+		"Move PUREX, ABTS RX and RIDA IOCBs to ATIOQ"
+		"0 (Default). Do not move IOCBs"
+		"1 - Move IOCBs.");
+
 /*
  * SCSI host template entry points
  */
@@ -2932,18 +2939,6 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (ret)
 		goto probe_init_failed;
 
-	base_vha->gnl.size = (ha->max_loop_id + 1) *
-		sizeof(struct get_name_list_extended);
-	base_vha->gnl.l = dma_alloc_coherent(&ha->pdev->dev,
-		base_vha->gnl.size, &base_vha->gnl.ldma, GFP_KERNEL);
-	INIT_LIST_HEAD(&base_vha->gnl.fcports);
-
-	if (base_vha->gnl.l == NULL) {
-		ql_log(ql_log_fatal, base_vha, 0xffff,
-			"Alloc failed for name list.\n");
-		goto probe_init_failed;
-	}
-
 	/* Alloc arrays of request and response ring ptrs */
 	if (!qla2x00_alloc_queues(ha, req, rsp)) {
 		ql_log(ql_log_fatal, base_vha, 0x003d,
@@ -4250,10 +4245,10 @@ struct scsi_qla_host *qla2x00_create_host(struct scsi_host_template *sht,
 	struct scsi_qla_host *vha = NULL;
 
 	host = scsi_host_alloc(sht, sizeof(scsi_qla_host_t));
-	if (host == NULL) {
+	if (!host) {
 		ql_log_pci(ql_log_fatal, ha->pdev, 0x0107,
 		    "Failed to allocate host from the scsi layer, aborting.\n");
-		goto fail;
+		return NULL;
 	}
 
 	/* Clear our data area */
@@ -4272,11 +4267,23 @@ struct scsi_qla_host *qla2x00_create_host(struct scsi_host_template *sht,
 	INIT_LIST_HEAD(&vha->logo_list);
 	INIT_LIST_HEAD(&vha->plogi_ack_list);
 	INIT_LIST_HEAD(&vha->qp_list);
+	INIT_LIST_HEAD(&vha->gnl.fcports);
 
 	spin_lock_init(&vha->work_lock);
 	spin_lock_init(&vha->cmd_list_lock);
 	init_waitqueue_head(&vha->fcport_waitQ);
 
+	vha->gnl.size =
+	    sizeof(struct get_name_list_extended[ha->max_loop_id+1]);
+	vha->gnl.l = dma_alloc_coherent(&ha->pdev->dev,
+	    vha->gnl.size, &vha->gnl.ldma, GFP_KERNEL);
+	if (!vha->gnl.l) {
+		ql_log(ql_log_fatal, vha, 0xffff,
+		    "Alloc failed for name list.\n");
+		scsi_remove_host(vha->host);
+		return NULL;
+	}
+
 	sprintf(vha->host_str, "%s_%ld", QLA2XXX_DRIVER_NAME, vha->host_no);
 	ql_dbg(ql_dbg_init, vha, 0x0041,
 	    "Allocated the host=%p hw=%p vha=%p dev_name=%s",
@@ -4284,9 +4291,6 @@ struct scsi_qla_host *qla2x00_create_host(struct scsi_host_template *sht,
 	    dev_name(&(ha->pdev->dev)));
 
 	return vha;
-
-fail:
-	return vha;
 }
 
 struct qla_work_evt *
@@ -5512,16 +5516,6 @@ qla2x00_do_dpc(void *data)
 			qla2x00_update_fcports(base_vha);
 		}
 
-		if (test_bit(SCR_PENDING, &base_vha->dpc_flags)) {
-			int ret;
-			ret = qla2x00_send_change_request(base_vha, 0x3, 0);
-			if (ret != QLA_SUCCESS)
-				ql_log(ql_log_warn, base_vha, 0x121,
-				    "Failed to enable receiving of RSCN "
-				    "requests: 0x%x.\n", ret);
-			clear_bit(SCR_PENDING, &base_vha->dpc_flags);
-		}
-
 		if (IS_QLAFX00(ha))
 			goto loop_resync_check;
 
diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index 6571869c676a58..1cd3734292f945 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -224,6 +224,105 @@ static inline void qlt_decr_num_pend_cmds(struct scsi_qla_host *vha)
 	spin_unlock_irqrestore(&vha->hw->tgt.q_full_lock, flags);
 }
 
+
+static void qlt_queue_unknown_atio(scsi_qla_host_t *vha,
+	struct atio_from_isp *atio,	uint8_t ha_locked)
+{
+	struct qla_tgt_sess_op *u;
+	struct qla_tgt *tgt = vha->vha_tgt.qla_tgt;
+	unsigned long flags;
+
+	if (tgt->tgt_stop) {
+		ql_dbg(ql_dbg_async, vha, 0xffff,
+			   "qla_target(%d): dropping unknown ATIO_TYPE7, "
+			   "because tgt is being stopped", vha->vp_idx);
+		goto out_term;
+	}
+
+	u = kzalloc(sizeof(*u), GFP_ATOMIC);
+	if (u == NULL) {
+		ql_dbg(ql_dbg_async, vha, 0xffff,
+		    "Alloc of struct unknown_atio (size %zd) failed", sizeof(*u));
+		/* It should be harmless and on the next retry should work well */
+		goto out_term;
+	}
+
+	u->vha = vha;
+	memcpy(&u->atio, atio, sizeof(*atio));
+	INIT_LIST_HEAD(&u->cmd_list);
+
+	spin_lock_irqsave(&vha->cmd_list_lock, flags);
+	list_add_tail(&u->cmd_list, &vha->unknown_atio_list);
+	spin_unlock_irqrestore(&vha->cmd_list_lock, flags);
+
+	schedule_delayed_work(&vha->unknown_atio_work, 1);
+
+out:
+	return;
+
+out_term:
+	qlt_send_term_exchange(vha, NULL, atio, ha_locked, 0);
+	goto out;
+}
+
+static void qlt_try_to_dequeue_unknown_atios(struct scsi_qla_host *vha,
+	uint8_t ha_locked)
+{
+	struct qla_tgt_sess_op *u, *t;
+	scsi_qla_host_t *host;
+	struct qla_tgt *tgt = vha->vha_tgt.qla_tgt;
+	unsigned long flags;
+	uint8_t queued = 0;
+
+	list_for_each_entry_safe(u, t, &vha->unknown_atio_list, cmd_list) {
+		if (u->aborted) {
+			ql_dbg(ql_dbg_async, vha, 0xffff,
+			    "Freeing unknown %s %p, because of Abort",
+			    "ATIO_TYPE7", u);
+			qlt_send_term_exchange(vha, NULL, &u->atio,
+			    ha_locked, 0);
+			goto abort;
+		}
+
+		host = qlt_find_host_by_d_id(vha, u->atio.u.isp24.fcp_hdr.d_id);
+		if (host != NULL) {
+			ql_dbg(ql_dbg_async, vha, 0xffff,
+				"Requeuing unknown ATIO_TYPE7 %p", u);
+			qlt_24xx_atio_pkt(host, &u->atio, ha_locked);
+		} else if (tgt->tgt_stop) {
+			ql_dbg(ql_dbg_async, vha, 0xffff,
+				"Freeing unknown %s %p, because tgt is being stopped",
+				"ATIO_TYPE7", u);
+			qlt_send_term_exchange(vha, NULL, &u->atio,
+			    ha_locked, 0);
+		} else {
+			ql_dbg(ql_dbg_async, vha, 0xffff,
+				"u %p, vha %p, host %p, sched again..", u,
+				vha, host);
+			if (!queued) {
+				queued = 1;
+				schedule_delayed_work(&vha->unknown_atio_work,
+				    1);
+			}
+			continue;
+		}
+
+abort:
+		spin_lock_irqsave(&vha->cmd_list_lock, flags);
+		list_del(&u->cmd_list);
+		spin_unlock_irqrestore(&vha->cmd_list_lock, flags);
+		kfree(u);
+	}
+}
+
+void qlt_unknown_atio_work_fn(struct work_struct *work)
+{
+	struct scsi_qla_host *vha = container_of(to_delayed_work(work),
+	    struct scsi_qla_host, unknown_atio_work);
+
+	qlt_try_to_dequeue_unknown_atios(vha, 0);
+}
+
 static bool qlt_24xx_atio_pkt_all_vps(struct scsi_qla_host *vha,
 	struct atio_from_isp *atio, uint8_t ha_locked)
 {
@@ -244,8 +343,14 @@ static bool qlt_24xx_atio_pkt_all_vps(struct scsi_qla_host *vha,
 			    atio->u.isp24.fcp_hdr.d_id[0],
 			    atio->u.isp24.fcp_hdr.d_id[1],
 			    atio->u.isp24.fcp_hdr.d_id[2]);
+
+
+			qlt_queue_unknown_atio(vha, atio, ha_locked);
 			break;
 		}
+		if (unlikely(!list_empty(&vha->unknown_atio_list)))
+			qlt_try_to_dequeue_unknown_atios(vha, ha_locked);
+
 		qlt_24xx_atio_pkt(host, atio, ha_locked);
 		break;
 	}
@@ -273,6 +378,31 @@ static bool qlt_24xx_atio_pkt_all_vps(struct scsi_qla_host *vha,
 		break;
 	}
 
+	case VP_RPT_ID_IOCB_TYPE:
+		qla24xx_report_id_acquisition(vha,
+			(struct vp_rpt_id_entry_24xx *)atio);
+		break;
+
+	case ABTS_RECV_24XX:
+	{
+		struct abts_recv_from_24xx *entry =
+			(struct abts_recv_from_24xx *)atio;
+		struct scsi_qla_host *host = qlt_find_host_by_vp_idx(vha,
+			entry->vp_index);
+		if (unlikely(!host)) {
+			ql_dbg(ql_dbg_tgt, vha, 0xffff,
+			    "qla_target(%d): Response pkt (ABTS_RECV_24XX) "
+			    "received, with unknown vp_index %d\n",
+			    vha->vp_idx, entry->vp_index);
+			break;
+		}
+		qlt_response_pkt(host, (response_t *)atio);
+		break;
+
+	}
+
+	/* case PUREX_IOCB_TYPE: ql2xmvasynctoatio */
+
 	default:
 		ql_dbg(ql_dbg_tgt, vha, 0xe040,
 		    "qla_target(%d): Received unknown ATIO atio "
@@ -791,6 +921,7 @@ static void qlt_free_session_done(struct work_struct *work)
 	unsigned long flags;
 	bool logout_started = false;
 	struct event_arg ea;
+	scsi_qla_host_t *base_vha;
 
 	ql_dbg(ql_dbg_tgt_mgt, vha, 0xf084,
 		"%s: se_sess %p / sess %p from port %8phC loop_id %#04x"
@@ -802,23 +933,25 @@ static void qlt_free_session_done(struct work_struct *work)
 
 
 	if (!IS_SW_RESV_ADDR(sess->d_id)) {
-	if (sess->send_els_logo) {
-		qlt_port_logo_t logo;
-		logo.id = sess->d_id;
-		logo.cmd_count = 0;
-		qlt_send_first_logo(vha, &logo);
-	}
+		if (sess->send_els_logo) {
+			qlt_port_logo_t logo;
 
-	if (sess->logout_on_delete) {
-		int rc;
-		rc = qla2x00_post_async_logout_work(vha, sess, NULL);
-		if (rc != QLA_SUCCESS)
-			ql_log(ql_log_warn, vha, 0xf085,
-			       "Schedule logo failed sess %p rc %d\n",
-			       sess, rc);
-		else
-			logout_started = true;
-	}
+			logo.id = sess->d_id;
+			logo.cmd_count = 0;
+			qlt_send_first_logo(vha, &logo);
+		}
+
+		if (sess->logout_on_delete) {
+			int rc;
+
+			rc = qla2x00_post_async_logout_work(vha, sess, NULL);
+			if (rc != QLA_SUCCESS)
+				ql_log(ql_log_warn, vha, 0xf085,
+				    "Schedule logo failed sess %p rc %d\n",
+				    sess, rc);
+			else
+				logout_started = true;
+		}
 	}
 
 	/*
@@ -841,8 +974,13 @@ static void qlt_free_session_done(struct work_struct *work)
 		}
 
 		ql_dbg(ql_dbg_disc, vha, 0xf087,
-			"%s: sess %p logout completed\n",
-			__func__, sess);
+		    "%s: sess %p logout completed\n",__func__, sess);
+	}
+
+	if (sess->logo_ack_needed) {
+		sess->logo_ack_needed = 0;
+		qla24xx_async_notify_ack(vha, sess,
+			(struct imm_ntfy_from_isp *)sess->iocb, SRB_NACK_LOGO);
 	}
 
 	spin_lock_irqsave(&ha->tgt.sess_lock, flags);
@@ -917,6 +1055,10 @@ static void qlt_free_session_done(struct work_struct *work)
 	if (vha->fcport_count == 0)
 		wake_up_all(&vha->fcport_waitQ);
 
+	base_vha = pci_get_drvdata(ha->pdev);
+	if (test_bit(PFLG_DRIVER_REMOVING, &base_vha->pci_flags))
+		return;
+
 	if (!tgt || !tgt->tgt_stop) {
 		memset(&ea, 0, sizeof(ea));
 		ea.event = FCME_DELETE_DONE;
@@ -1602,6 +1744,14 @@ static int abort_cmd_for_tag(struct scsi_qla_host *vha, uint32_t tag)
 		}
 	}
 
+	list_for_each_entry(op, &vha->unknown_atio_list, cmd_list) {
+		if (tag == op->atio.u.isp24.exchange_addr) {
+			op->aborted = true;
+			spin_unlock(&vha->cmd_list_lock);
+			return 1;
+		}
+	}
+
 	list_for_each_entry(cmd, &vha->qla_cmd_list, cmd_list) {
 		if (tag == cmd->atio.u.isp24.exchange_addr) {
 			cmd->aborted = 1;
@@ -1638,6 +1788,18 @@ static void abort_cmds_for_lun(struct scsi_qla_host *vha,
 		if (op_key == key && op_lun == lun)
 			op->aborted = true;
 	}
+
+	list_for_each_entry(op, &vha->unknown_atio_list, cmd_list) {
+		uint32_t op_key;
+		u64 op_lun;
+
+		op_key = sid_to_key(op->atio.u.isp24.fcp_hdr.s_id);
+		op_lun = scsilun_to_int(
+			(struct scsi_lun *)&op->atio.u.isp24.fcp_cmnd.lun);
+		if (op_key == key && op_lun == lun)
+			op->aborted = true;
+	}
+
 	list_for_each_entry(cmd, &vha->qla_cmd_list, cmd_list) {
 		uint32_t cmd_key;
 		uint32_t cmd_lun;
@@ -1880,7 +2042,11 @@ void qlt_xmit_tm_rsp(struct qla_tgt_mgmt_cmd *mcmd)
 
 	if (mcmd->flags == QLA24XX_MGMT_SEND_NACK) {
 		if (mcmd->orig_iocb.imm_ntfy.u.isp24.status_subcode ==
-		    ELS_LOGO) {
+		    ELS_LOGO ||
+		    mcmd->orig_iocb.imm_ntfy.u.isp24.status_subcode ==
+		    ELS_PRLO ||
+		    mcmd->orig_iocb.imm_ntfy.u.isp24.status_subcode ==
+		    ELS_TPRLO) {
 			ql_dbg(ql_dbg_disc, vha, 0xffff,
 			    "TM response logo %phC status %#x state %#x",
 			    mcmd->sess->port_name, mcmd->fc_tm_rsp,
@@ -4252,11 +4418,21 @@ static int abort_cmds_for_s_id(struct scsi_qla_host *vha, port_id_t *s_id)
 	spin_lock(&vha->cmd_list_lock);
 	list_for_each_entry(op, &vha->qla_sess_op_cmd_list, cmd_list) {
 		uint32_t op_key = sid_to_key(op->atio.u.isp24.fcp_hdr.s_id);
+
 		if (op_key == key) {
 			op->aborted = true;
 			count++;
 		}
 	}
+
+	list_for_each_entry(op, &vha->unknown_atio_list, cmd_list) {
+		uint32_t op_key = sid_to_key(op->atio.u.isp24.fcp_hdr.s_id);
+		if (op_key == key) {
+			op->aborted = true;
+			count++;
+		}
+	}
+
 	list_for_each_entry(cmd, &vha->qla_cmd_list, cmd_list) {
 		uint32_t cmd_key = sid_to_key(cmd->atio.u.isp24.fcp_hdr.s_id);
 		if (cmd_key == key) {
@@ -4463,6 +4639,16 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha,
 		}
 		break;
 
+
+	case ELS_TPRLO:
+		if (le16_to_cpu(iocb->u.isp24.flags) &
+			NOTIFY24XX_FLAGS_GLOBAL_TPRLO) {
+			loop_id = 0xFFFF;
+			qlt_reset(vha, iocb, QLA_TGT_NEXUS_LOSS);
+			res = 1;
+			break;
+		}
+		/* drop through */
 	case ELS_LOGO:
 	case ELS_PRLO:
 		spin_lock_irqsave(&ha->tgt.sess_lock, flags);
@@ -4472,7 +4658,6 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha,
 		if (sess) {
 			sess->login_gen++;
 			sess->fw_login_state = DSC_LS_LOGO_PEND;
-			sess->logout_on_delete = 0;
 			sess->logo_ack_needed = 1;
 			memcpy(sess->iocb, iocb, IOCB_SIZE);
 		}
@@ -4483,12 +4668,14 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha,
 		    "%s: logo %llx res %d sess %p ",
 		    __func__, wwn, res, sess);
 		if (res == 0) {
-			/* cmd went up to ULP. look for qlt_xmit_tm_rsp()
-			   for LOGO_ACK */
+			/*
+			 * cmd went upper layer, look for qlt_xmit_tm_rsp()
+			 * for LOGO_ACK & sess delete
+			 */
 			BUG_ON(!sess);
 			res = 0;
 		} else {
-			/* cmd did not go upstair. */
+			/* cmd did not go to upper layer. */
 			if (sess) {
 				qlt_schedule_sess_for_deletion_lock(sess);
 				res = 0;
@@ -6407,6 +6594,11 @@ qlt_probe_one_stage1(struct scsi_qla_host *base_vha, struct qla_hw_data *ha)
 
 	mutex_init(&base_vha->vha_tgt.tgt_mutex);
 	mutex_init(&base_vha->vha_tgt.tgt_host_action_mutex);
+
+	INIT_LIST_HEAD(&base_vha->unknown_atio_list);
+	INIT_DELAYED_WORK(&base_vha->unknown_atio_work,
+	    qlt_unknown_atio_work_fn);
+
 	qlt_clear_mode(base_vha);
 }
 

From 25ff6af10562cfe30dd29452a8fc04ec022d4f18 Mon Sep 17 00:00:00 2001
From: Joe Carnuccio <joe.carnuccio@cavium.com>
Date: Thu, 19 Jan 2017 22:28:04 -0800
Subject: [PATCH 0071/1911] qla2xxx: Simplify usage of SRB structure in driver

This patch simplifies SRB structure usage in driver.

- Simplify sp->done() and sp->free() interfaces.
- Remove sp->fcport->vha to use vha pointer from sp.
- Use sp->vha context in qla2x00_rel_sp().

Signed-off-by: Joe Carnuccio <joe.carnuccio@cavium.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/qla2xxx/qla_bsg.c    |  23 +++----
 drivers/scsi/qla2xxx/qla_def.h    |   7 ++-
 drivers/scsi/qla2xxx/qla_gbl.h    |  14 ++---
 drivers/scsi/qla2xxx/qla_gs.c     |  32 +++++-----
 drivers/scsi/qla2xxx/qla_init.c   | 100 +++++++++++++++---------------
 drivers/scsi/qla2xxx/qla_inline.h |  13 ++--
 drivers/scsi/qla2xxx/qla_iocb.c   |  96 ++++++++++++++--------------
 drivers/scsi/qla2xxx/qla_isr.c    |  36 +++++------
 drivers/scsi/qla2xxx/qla_mr.c     |  48 +++++++-------
 drivers/scsi/qla2xxx/qla_os.c     |  35 +++++------
 drivers/scsi/qla2xxx/qla_target.c |   8 +--
 11 files changed, 201 insertions(+), 211 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_bsg.c b/drivers/scsi/qla2xxx/qla_bsg.c
index 1bf8061ff8032f..975e7a1175329d 100644
--- a/drivers/scsi/qla2xxx/qla_bsg.c
+++ b/drivers/scsi/qla2xxx/qla_bsg.c
@@ -13,28 +13,25 @@
 
 /* BSG support for ELS/CT pass through */
 void
-qla2x00_bsg_job_done(void *data, void *ptr, int res)
+qla2x00_bsg_job_done(void *ptr, int res)
 {
-	srb_t *sp = (srb_t *)ptr;
-	struct scsi_qla_host *vha = (scsi_qla_host_t *)data;
+	srb_t *sp = ptr;
 	struct bsg_job *bsg_job = sp->u.bsg_job;
 	struct fc_bsg_reply *bsg_reply = bsg_job->reply;
 
 	bsg_reply->result = res;
 	bsg_job_done(bsg_job, bsg_reply->result,
 		       bsg_reply->reply_payload_rcv_len);
-	sp->free(vha, sp);
+	sp->free(sp);
 }
 
 void
-qla2x00_bsg_sp_free(void *data, void *ptr)
+qla2x00_bsg_sp_free(void *ptr)
 {
-	srb_t *sp = (srb_t *)ptr;
-	struct scsi_qla_host *vha = sp->fcport->vha;
+	srb_t *sp = ptr;
+	struct qla_hw_data *ha = sp->vha->hw;
 	struct bsg_job *bsg_job = sp->u.bsg_job;
 	struct fc_bsg_request *bsg_request = bsg_job->request;
-
-	struct qla_hw_data *ha = vha->hw;
 	struct qla_mt_iocb_rqst_fx00 *piocb_rqst;
 
 	if (sp->type == SRB_FXIOCB_BCMD) {
@@ -62,7 +59,7 @@ qla2x00_bsg_sp_free(void *data, void *ptr)
 	    sp->type == SRB_FXIOCB_BCMD ||
 	    sp->type == SRB_ELS_CMD_HST)
 		kfree(sp->fcport);
-	qla2x00_rel_sp(vha, sp);
+	qla2x00_rel_sp(sp);
 }
 
 int
@@ -394,7 +391,7 @@ qla2x00_process_els(struct bsg_job *bsg_job)
 	if (rval != QLA_SUCCESS) {
 		ql_log(ql_log_warn, vha, 0x700e,
 		    "qla2x00_start_sp failed = %d\n", rval);
-		qla2x00_rel_sp(vha, sp);
+		qla2x00_rel_sp(sp);
 		rval = -EIO;
 		goto done_unmap_sg;
 	}
@@ -542,7 +539,7 @@ qla2x00_process_ct(struct bsg_job *bsg_job)
 	if (rval != QLA_SUCCESS) {
 		ql_log(ql_log_warn, vha, 0x7017,
 		    "qla2x00_start_sp failed=%d.\n", rval);
-		qla2x00_rel_sp(vha, sp);
+		qla2x00_rel_sp(sp);
 		rval = -EIO;
 		goto done_free_fcport;
 	}
@@ -2578,6 +2575,6 @@ qla24xx_bsg_timeout(struct bsg_job *bsg_job)
 
 done:
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
-	sp->free(vha, sp);
+	sp->free(sp);
 	return 0;
 }
diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index 3881790e80da5a..ac37e91aee209f 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -433,7 +433,7 @@ struct srb_iocb {
 typedef struct srb {
 	atomic_t ref_count;
 	struct fc_port *fcport;
-	void *vha;
+	struct scsi_qla_host *vha;
 	uint32_t handle;
 	uint16_t flags;
 	uint16_t type;
@@ -447,8 +447,8 @@ typedef struct srb {
 		struct bsg_job *bsg_job;
 		struct srb_cmd scmd;
 	} u;
-	void (*done)(void *, void *, int);
-	void (*free)(void *, void *);
+	void (*done)(void *, int);
+	void (*free)(void *);
 } srb_t;
 
 #define GET_CMD_SP(sp) (sp->u.scmd.cmd)
@@ -3224,6 +3224,7 @@ struct qla_qpair {
 	struct qla_hw_data *hw;
 	struct work_struct q_work;
 	struct list_head qp_list_elem; /* vha->qp_list */
+	struct scsi_qla_host *vha;
 };
 
 /* Place holder for FW buffer parameters */
diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h
index e0914575aab33f..b3d6441d1d90eb 100644
--- a/drivers/scsi/qla2xxx/qla_gbl.h
+++ b/drivers/scsi/qla2xxx/qla_gbl.h
@@ -186,9 +186,9 @@ extern int qla2x00_post_uevent_work(struct scsi_qla_host *, u32);
 
 extern int qla2x00_post_uevent_work(struct scsi_qla_host *, u32);
 extern void qla2x00_disable_board_on_pci_error(struct work_struct *);
-extern void qla2x00_sp_compl(void *, void *, int);
-extern void qla2xxx_qpair_sp_free_dma(void *, void *);
-extern void qla2xxx_qpair_sp_compl(void *, void *, int);
+extern void qla2x00_sp_compl(void *, int);
+extern void qla2xxx_qpair_sp_free_dma(void *);
+extern void qla2xxx_qpair_sp_compl(void *, int);
 extern int qla24xx_post_upd_fcport_work(struct scsi_qla_host *, fc_port_t *);
 void qla2x00_handle_login_done_event(struct scsi_qla_host *, fc_port_t *,
 	uint16_t *);
@@ -215,7 +215,7 @@ extern void qla2x00_do_dpc_all_vps(scsi_qla_host_t *);
 extern int qla24xx_vport_create_req_sanity_check(struct fc_vport *);
 extern scsi_qla_host_t * qla24xx_create_vhost(struct fc_vport *);
 
-extern void qla2x00_sp_free_dma(void *, void *);
+extern void qla2x00_sp_free_dma(void *);
 extern char *qla2x00_get_fw_version_str(struct scsi_qla_host *, char *);
 
 extern void qla2x00_mark_device_lost(scsi_qla_host_t *, fc_port_t *, int, int);
@@ -733,10 +733,10 @@ extern int qla82xx_restart_isp(scsi_qla_host_t *);
 
 /* IOCB related functions */
 extern int qla82xx_start_scsi(srb_t *);
-extern void qla2x00_sp_free(void *, void *);
+extern void qla2x00_sp_free(void *);
 extern void qla2x00_sp_timeout(unsigned long);
-extern void qla2x00_bsg_job_done(void *, void *, int);
-extern void qla2x00_bsg_sp_free(void *, void *);
+extern void qla2x00_bsg_job_done(void *, int);
+extern void qla2x00_bsg_sp_free(void *);
 extern void qla2x00_start_iocbs(struct scsi_qla_host *, struct req_que *);
 
 /* Interrupt related */
diff --git a/drivers/scsi/qla2xxx/qla_gs.c b/drivers/scsi/qla2xxx/qla_gs.c
index d1074fb0fff884..6d500fd0676817 100644
--- a/drivers/scsi/qla2xxx/qla_gs.c
+++ b/drivers/scsi/qla2xxx/qla_gs.c
@@ -2822,10 +2822,10 @@ void qla24xx_handle_gidpn_event(scsi_qla_host_t *vha, struct event_arg *ea)
 	}
 } /* gidpn_event */
 
-static void qla2x00_async_gidpn_sp_done(void *v, void *s, int res)
+static void qla2x00_async_gidpn_sp_done(void *s, int res)
 {
-	struct scsi_qla_host *vha = (struct scsi_qla_host *)v;
-	struct srb *sp = (struct srb *)s;
+	struct srb *sp = s;
+	struct scsi_qla_host *vha = sp->vha;
 	fc_port_t *fcport = sp->fcport;
 	u8 *id = fcport->ct_desc.ct_sns->p.rsp.rsp.gid_pn.port_id;
 	struct event_arg ea;
@@ -2847,7 +2847,7 @@ static void qla2x00_async_gidpn_sp_done(void *v, void *s, int res)
 
 	qla2x00_fcport_event_handler(vha, &ea);
 
-	sp->free(vha, sp);
+	sp->free(sp);
 }
 
 int qla24xx_async_gidpn(scsi_qla_host_t *vha, fc_port_t *fcport)
@@ -2905,7 +2905,7 @@ int qla24xx_async_gidpn(scsi_qla_host_t *vha, fc_port_t *fcport)
 	return rval;
 
 done_free_sp:
-	sp->free(vha, sp);
+	sp->free(sp);
 done:
 	fcport->flags &= ~FCF_ASYNC_SENT;
 	return rval;
@@ -2941,11 +2941,11 @@ int qla24xx_post_gpsc_work(struct scsi_qla_host *vha, fc_port_t *fcport)
 	return qla2x00_post_work(vha, e);
 }
 
-static void qla24xx_async_gpsc_sp_done(void *v, void *s, int res)
+static void qla24xx_async_gpsc_sp_done(void *s, int res)
 {
-	struct scsi_qla_host *vha = (struct scsi_qla_host *)v;
+	struct srb *sp = s;
+	struct scsi_qla_host *vha = sp->vha;
 	struct qla_hw_data *ha = vha->hw;
-	struct srb *sp = (struct srb *)s;
 	fc_port_t *fcport = sp->fcport;
 	struct ct_sns_rsp       *ct_rsp;
 	struct event_arg ea;
@@ -3011,7 +3011,7 @@ static void qla24xx_async_gpsc_sp_done(void *v, void *s, int res)
 	ea.fcport = fcport;
 	qla2x00_fcport_event_handler(vha, &ea);
 
-	sp->free(vha, sp);
+	sp->free(sp);
 }
 
 int qla24xx_async_gpsc(scsi_qla_host_t *vha, fc_port_t *fcport)
@@ -3066,7 +3066,7 @@ int qla24xx_async_gpsc(scsi_qla_host_t *vha, fc_port_t *fcport)
 	return rval;
 
 done_free_sp:
-	sp->free(vha, sp);
+	sp->free(sp);
 done:
 	fcport->flags &= ~FCF_ASYNC_SENT;
 	return rval;
@@ -3104,7 +3104,7 @@ void qla24xx_async_gpnid_done(scsi_qla_host_t *vha, srb_t *sp)
 		sp->u.iocb_cmd.u.ctarg.rsp = NULL;
 	}
 
-	sp->free(vha, sp);
+	sp->free(sp);
 }
 
 void qla24xx_handle_gpnid_event(scsi_qla_host_t *vha, struct event_arg *ea)
@@ -3138,10 +3138,10 @@ void qla24xx_handle_gpnid_event(scsi_qla_host_t *vha, struct event_arg *ea)
 	}
 }
 
-static void qla2x00_async_gpnid_sp_done(void *v, void *s, int res)
+static void qla2x00_async_gpnid_sp_done(void *s, int res)
 {
-	struct scsi_qla_host *vha = (struct scsi_qla_host *)v;
-	struct srb *sp = (struct srb *)s;
+	struct srb *sp = s;
+	struct scsi_qla_host *vha = sp->vha;
 	struct ct_sns_req *ct_req =
 	    (struct ct_sns_req *)sp->u.iocb_cmd.u.ctarg.req;
 	struct ct_sns_rsp *ct_rsp =
@@ -3183,7 +3183,7 @@ static void qla2x00_async_gpnid_sp_done(void *v, void *s, int res)
 			sp->u.iocb_cmd.u.ctarg.rsp = NULL;
 		}
 
-		sp->free(vha, sp);
+		sp->free(sp);
 		return;
 	}
 
@@ -3272,7 +3272,7 @@ int qla24xx_async_gpnid(scsi_qla_host_t *vha, port_id_t *id)
 		sp->u.iocb_cmd.u.ctarg.rsp = NULL;
 	}
 
-	sp->free(vha, sp);
+	sp->free(sp);
 done:
 	return rval;
 }
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index 68430934fa8d61..f65431480833be 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -47,29 +47,27 @@ qla2x00_sp_timeout(unsigned long __data)
 {
 	srb_t *sp = (srb_t *)__data;
 	struct srb_iocb *iocb;
-	scsi_qla_host_t *vha = (scsi_qla_host_t *)sp->vha;
-	struct qla_hw_data *ha = vha->hw;
+	scsi_qla_host_t *vha = sp->vha;
 	struct req_que *req;
 	unsigned long flags;
 
-	spin_lock_irqsave(&ha->hardware_lock, flags);
-	req = ha->req_q_map[0];
+	spin_lock_irqsave(&vha->hw->hardware_lock, flags);
+	req = vha->hw->req_q_map[0];
 	req->outstanding_cmds[sp->handle] = NULL;
 	iocb = &sp->u.iocb_cmd;
 	iocb->timeout(sp);
-	sp->free(vha, sp);
-	spin_unlock_irqrestore(&ha->hardware_lock, flags);
+	sp->free(sp);
+	spin_unlock_irqrestore(&vha->hw->hardware_lock, flags);
 }
 
 void
-qla2x00_sp_free(void *data, void *ptr)
+qla2x00_sp_free(void *ptr)
 {
-	srb_t *sp = (srb_t *)ptr;
+	srb_t *sp = ptr;
 	struct srb_iocb *iocb = &sp->u.iocb_cmd;
-	struct scsi_qla_host *vha = (scsi_qla_host_t *)data;
 
 	del_timer(&iocb->timer);
-	qla2x00_rel_sp(vha, sp);
+	qla2x00_rel_sp(sp);
 }
 
 /* Asynchronous Login/Logout Routines -------------------------------------- */
@@ -97,7 +95,7 @@ qla2x00_get_async_timeout(struct scsi_qla_host *vha)
 void
 qla2x00_async_iocb_timeout(void *data)
 {
-	srb_t *sp = (srb_t *)data;
+	srb_t *sp = data;
 	fc_port_t *fcport = sp->fcport;
 	struct srb_iocb *lio = &sp->u.iocb_cmd;
 	struct event_arg ea;
@@ -130,22 +128,21 @@ qla2x00_async_iocb_timeout(void *data)
 	case SRB_NACK_PLOGI:
 	case SRB_NACK_PRLI:
 	case SRB_NACK_LOGO:
-		sp->done(sp->vha, sp, QLA_FUNCTION_TIMEOUT);
+		sp->done(sp, QLA_FUNCTION_TIMEOUT);
 		break;
 	}
 }
 
 static void
-qla2x00_async_login_sp_done(void *data, void *ptr, int res)
+qla2x00_async_login_sp_done(void *ptr, int res)
 {
-	srb_t *sp = (srb_t *)ptr;
+	srb_t *sp = ptr;
+	struct scsi_qla_host *vha = sp->vha;
 	struct srb_iocb *lio = &sp->u.iocb_cmd;
-	struct scsi_qla_host *vha = (scsi_qla_host_t *)data;
 	struct event_arg ea;
 
 	ql_dbg(ql_dbg_disc, vha, 0xffff,
-		"%s %8phC res %d \n",
-		   __func__, sp->fcport->port_name, res);
+	    "%s %8phC res %d \n", __func__, sp->fcport->port_name, res);
 
 	sp->fcport->flags &= ~FCF_ASYNC_SENT;
 	if (!test_bit(UNLOADING, &vha->dpc_flags)) {
@@ -160,7 +157,7 @@ qla2x00_async_login_sp_done(void *data, void *ptr, int res)
 		qla2x00_fcport_event_handler(vha, &ea);
 	}
 
-	sp->free(sp->fcport->vha, sp);
+	sp->free(sp);
 }
 
 int
@@ -212,24 +209,23 @@ qla2x00_async_login(struct scsi_qla_host *vha, fc_port_t *fcport,
 	return rval;
 
 done_free_sp:
-	sp->free(fcport->vha, sp);
+	sp->free(sp);
 done:
 	fcport->flags &= ~FCF_ASYNC_SENT;
 	return rval;
 }
 
 static void
-qla2x00_async_logout_sp_done(void *data, void *ptr, int res)
+qla2x00_async_logout_sp_done(void *ptr, int res)
 {
-	srb_t *sp = (srb_t *)ptr;
+	srb_t *sp = ptr;
 	struct srb_iocb *lio = &sp->u.iocb_cmd;
-	struct scsi_qla_host *vha = (scsi_qla_host_t *)data;
 
 	sp->fcport->flags &= ~FCF_ASYNC_SENT;
-	if (!test_bit(UNLOADING, &vha->dpc_flags))
-		qla2x00_post_async_logout_done_work(sp->fcport->vha, sp->fcport,
+	if (!test_bit(UNLOADING, &sp->vha->dpc_flags))
+		qla2x00_post_async_logout_done_work(sp->vha, sp->fcport,
 		    lio->u.logio.data);
-	sp->free(sp->fcport->vha, sp);
+	sp->free(sp);
 }
 
 int
@@ -264,23 +260,23 @@ qla2x00_async_logout(struct scsi_qla_host *vha, fc_port_t *fcport)
 	return rval;
 
 done_free_sp:
-	sp->free(fcport->vha, sp);
+	sp->free(sp);
 done:
 	fcport->flags &= ~FCF_ASYNC_SENT;
 	return rval;
 }
 
 static void
-qla2x00_async_adisc_sp_done(void *data, void *ptr, int res)
+qla2x00_async_adisc_sp_done(void *ptr, int res)
 {
-	srb_t *sp = (srb_t *)ptr;
+	srb_t *sp = ptr;
+	struct scsi_qla_host *vha = sp->vha;
 	struct srb_iocb *lio = &sp->u.iocb_cmd;
-	struct scsi_qla_host *vha = (scsi_qla_host_t *)data;
 
 	if (!test_bit(UNLOADING, &vha->dpc_flags))
-		qla2x00_post_async_adisc_done_work(sp->fcport->vha, sp->fcport,
+		qla2x00_post_async_adisc_done_work(sp->vha, sp->fcport,
 		    lio->u.logio.data);
-	sp->free(sp->fcport->vha, sp);
+	sp->free(sp);
 }
 
 int
@@ -317,7 +313,7 @@ qla2x00_async_adisc(struct scsi_qla_host *vha, fc_port_t *fcport,
 	return rval;
 
 done_free_sp:
-	sp->free(fcport->vha, sp);
+	sp->free(sp);
 done:
 	fcport->flags &= ~FCF_ASYNC_SENT;
 	return rval;
@@ -479,10 +475,10 @@ static void qla24xx_handle_gnl_done_event(scsi_qla_host_t *vha,
 } /* gnl_event */
 
 static void
-qla24xx_async_gnl_sp_done(void *v, void *s, int res)
+qla24xx_async_gnl_sp_done(void *s, int res)
 {
-	struct scsi_qla_host *vha = (struct scsi_qla_host *)v;
-	struct srb *sp = (struct srb *)s;
+	struct srb *sp = s;
+	struct scsi_qla_host *vha = sp->vha;
 	unsigned long flags;
 	struct fc_port *fcport = NULL, *tf;
 	u16 i, n = 0, loop_id;
@@ -541,7 +537,7 @@ qla24xx_async_gnl_sp_done(void *v, void *s, int res)
 
 	spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
 
-	sp->free(vha, sp);
+	sp->free(sp);
 }
 
 int qla24xx_async_gnl(struct scsi_qla_host *vha, fc_port_t *fcport)
@@ -609,7 +605,7 @@ int qla24xx_async_gnl(struct scsi_qla_host *vha, fc_port_t *fcport)
 	return rval;
 
 done_free_sp:
-	sp->free(fcport->vha, sp);
+	sp->free(sp);
 done:
 	fcport->flags &= ~FCF_ASYNC_SENT;
 	return rval;
@@ -628,10 +624,10 @@ int qla24xx_post_gnl_work(struct scsi_qla_host *vha, fc_port_t *fcport)
 }
 
 static
-void qla24xx_async_gpdb_sp_done(void *v, void *s, int res)
+void qla24xx_async_gpdb_sp_done(void *s, int res)
 {
-	struct scsi_qla_host *vha = (struct scsi_qla_host *)v;
-	struct srb *sp = (struct srb *)s;
+	struct srb *sp = s;
+	struct scsi_qla_host *vha = sp->vha;
 	struct qla_hw_data *ha = vha->hw;
 	uint64_t zero = 0;
 	struct port_database_24xx *pd;
@@ -708,7 +704,7 @@ void qla24xx_async_gpdb_sp_done(void *v, void *s, int res)
 	dma_pool_free(ha->s_dma_pool, sp->u.iocb_cmd.u.mbx.in,
 		sp->u.iocb_cmd.u.mbx.in_dma);
 
-	sp->free(vha, sp);
+	sp->free(sp);
 }
 
 static int qla24xx_post_gpdb_work(struct scsi_qla_host *vha, fc_port_t *fcport,
@@ -790,7 +786,7 @@ int qla24xx_async_gpdb(struct scsi_qla_host *vha, fc_port_t *fcport, u8 opt)
 	if (pd)
 		dma_pool_free(ha->s_dma_pool, pd, pd_dma);
 
-	sp->free(vha, sp);
+	sp->free(sp);
 done:
 	fcport->flags &= ~FCF_ASYNC_SENT;
 	qla24xx_post_gpdb_work(vha, fcport, opt);
@@ -1168,7 +1164,7 @@ void qla2x00_fcport_event_handler(scsi_qla_host_t *vha, struct event_arg *ea)
 static void
 qla2x00_tmf_iocb_timeout(void *data)
 {
-	srb_t *sp = (srb_t *)data;
+	srb_t *sp = data;
 	struct srb_iocb *tmf = &sp->u.iocb_cmd;
 
 	tmf->u.tmf.comp_status = CS_TIMEOUT;
@@ -1176,10 +1172,11 @@ qla2x00_tmf_iocb_timeout(void *data)
 }
 
 static void
-qla2x00_tmf_sp_done(void *data, void *ptr, int res)
+qla2x00_tmf_sp_done(void *ptr, int res)
 {
-	srb_t *sp = (srb_t *)ptr;
+	srb_t *sp = ptr;
 	struct srb_iocb *tmf = &sp->u.iocb_cmd;
+
 	complete(&tmf->u.tmf.comp);
 }
 
@@ -1237,7 +1234,7 @@ qla2x00_async_tm_cmd(fc_port_t *fcport, uint32_t flags, uint32_t lun,
 	}
 
 done_free_sp:
-	sp->free(vha, sp);
+	sp->free(sp);
 done:
 	return rval;
 }
@@ -1245,7 +1242,7 @@ qla2x00_async_tm_cmd(fc_port_t *fcport, uint32_t flags, uint32_t lun,
 static void
 qla24xx_abort_iocb_timeout(void *data)
 {
-	srb_t *sp = (srb_t *)data;
+	srb_t *sp = data;
 	struct srb_iocb *abt = &sp->u.iocb_cmd;
 
 	abt->u.abt.comp_status = CS_TIMEOUT;
@@ -1253,9 +1250,9 @@ qla24xx_abort_iocb_timeout(void *data)
 }
 
 static void
-qla24xx_abort_sp_done(void *data, void *ptr, int res)
+qla24xx_abort_sp_done(void *ptr, int res)
 {
-	srb_t *sp = (srb_t *)ptr;
+	srb_t *sp = ptr;
 	struct srb_iocb *abt = &sp->u.iocb_cmd;
 
 	complete(&abt->u.abt.comp);
@@ -1264,7 +1261,7 @@ qla24xx_abort_sp_done(void *data, void *ptr, int res)
 static int
 qla24xx_async_abort_cmd(srb_t *cmd_sp)
 {
-	scsi_qla_host_t *vha = cmd_sp->fcport->vha;
+	scsi_qla_host_t *vha = cmd_sp->vha;
 	fc_port_t *fcport = cmd_sp->fcport;
 	struct srb_iocb *abt_iocb;
 	srb_t *sp;
@@ -1297,7 +1294,7 @@ qla24xx_async_abort_cmd(srb_t *cmd_sp)
 	    QLA_SUCCESS : QLA_FUNCTION_FAILED;
 
 done_free_sp:
-	sp->free(vha, sp);
+	sp->free(sp);
 done:
 	return rval;
 }
@@ -7564,6 +7561,7 @@ struct qla_qpair *qla2xxx_create_qpair(struct scsi_qla_host *vha, int qos, int v
 		memset(qpair, 0, sizeof(struct qla_qpair));
 
 		qpair->hw = vha->hw;
+		qpair->vha = vha;
 
 		/* Assign available que pair id */
 		mutex_lock(&ha->mq_lock);
diff --git a/drivers/scsi/qla2xxx/qla_inline.h b/drivers/scsi/qla2xxx/qla_inline.h
index 647828b9e6226e..66df6cec59da40 100644
--- a/drivers/scsi/qla2xxx/qla_inline.h
+++ b/drivers/scsi/qla2xxx/qla_inline.h
@@ -232,6 +232,7 @@ qla2xxx_get_qpair_sp(struct qla_qpair *qpair, fc_port_t *fcport, gfp_t flag)
 	memset(sp, 0, sizeof(*sp));
 	sp->fcport = fcport;
 	sp->iocbs = 1;
+	sp->vha = qpair->vha;
 done:
 	if (!sp)
 		QLA_QPAIR_MARK_NOT_BUSY(qpair);
@@ -249,14 +250,13 @@ static inline srb_t *
 qla2x00_get_sp(scsi_qla_host_t *vha, fc_port_t *fcport, gfp_t flag)
 {
 	srb_t *sp = NULL;
-	struct qla_hw_data *ha = vha->hw;
 	uint8_t bail;
 
 	QLA_VHA_MARK_BUSY(vha, bail);
 	if (unlikely(bail))
 		return NULL;
 
-	sp = mempool_alloc(ha->srb_mempool, flag);
+	sp = mempool_alloc(vha->hw->srb_mempool, flag);
 	if (!sp)
 		goto done;
 
@@ -271,10 +271,10 @@ qla2x00_get_sp(scsi_qla_host_t *vha, fc_port_t *fcport, gfp_t flag)
 }
 
 static inline void
-qla2x00_rel_sp(scsi_qla_host_t *vha, srb_t *sp)
+qla2x00_rel_sp(srb_t *sp)
 {
-	mempool_free(sp, vha->hw->srb_mempool);
-	QLA_VHA_MARK_NOT_BUSY(vha);
+	QLA_VHA_MARK_NOT_BUSY(sp->vha);
+	mempool_free(sp, sp->vha->hw->srb_mempool);
 }
 
 static inline void
@@ -286,8 +286,7 @@ qla2x00_init_timer(srb_t *sp, unsigned long tmo)
 	sp->u.iocb_cmd.timer.function = qla2x00_sp_timeout;
 	add_timer(&sp->u.iocb_cmd.timer);
 	sp->free = qla2x00_sp_free;
-	if ((IS_QLAFX00(((scsi_qla_host_t *)sp->vha)->hw)) &&
-	    (sp->type == SRB_FXIOCB_DCMD))
+	if (IS_QLAFX00(sp->vha->hw) && (sp->type == SRB_FXIOCB_DCMD))
 		init_completion(&sp->u.iocb_cmd.u.fxiocb.fxiocb_comp);
 	if (sp->type == SRB_ELS_DCMD)
 		init_completion(&sp->u.iocb_cmd.u.els_logo.comp);
diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c
index 33d3d90c089b46..535079280288fb 100644
--- a/drivers/scsi/qla2xxx/qla_iocb.c
+++ b/drivers/scsi/qla2xxx/qla_iocb.c
@@ -23,7 +23,7 @@ qla2x00_get_cmd_direction(srb_t *sp)
 {
 	uint16_t cflags;
 	struct scsi_cmnd *cmd = GET_CMD_SP(sp);
-	struct scsi_qla_host *vha = sp->fcport->vha;
+	struct scsi_qla_host *vha = sp->vha;
 
 	cflags = 0;
 
@@ -210,7 +210,7 @@ void qla2x00_build_scsi_iocbs_32(srb_t *sp, cmd_entry_t *cmd_pkt,
 		return;
 	}
 
-	vha = sp->fcport->vha;
+	vha = sp->vha;
 	cmd_pkt->control_flags |= cpu_to_le16(qla2x00_get_cmd_direction(sp));
 
 	/* Three DSDs are available in the Command Type 2 IOCB */
@@ -267,7 +267,7 @@ void qla2x00_build_scsi_iocbs_64(srb_t *sp, cmd_entry_t *cmd_pkt,
 		return;
 	}
 
-	vha = sp->fcport->vha;
+	vha = sp->vha;
 	cmd_pkt->control_flags |= cpu_to_le16(qla2x00_get_cmd_direction(sp));
 
 	/* Two DSDs are available in the Command Type 3 IOCB */
@@ -324,7 +324,7 @@ qla2x00_start_scsi(srb_t *sp)
 	struct rsp_que *rsp;
 
 	/* Setup device pointers. */
-	vha = sp->fcport->vha;
+	vha = sp->vha;
 	ha = vha->hw;
 	reg = &ha->iobase->isp;
 	cmd = GET_CMD_SP(sp);
@@ -601,7 +601,7 @@ qla24xx_build_scsi_type_6_iocbs(srb_t *sp, struct cmd_type_6 *cmd_pkt,
 		return 0;
 	}
 
-	vha = sp->fcport->vha;
+	vha = sp->vha;
 	ha = vha->hw;
 
 	/* Set transfer direction */
@@ -716,7 +716,7 @@ qla24xx_build_scsi_iocbs(srb_t *sp, struct cmd_type_7 *cmd_pkt,
 		return;
 	}
 
-	vha = sp->fcport->vha;
+	vha = sp->vha;
 
 	/* Set transfer direction */
 	if (cmd->sc_data_direction == DMA_TO_DEVICE) {
@@ -1108,7 +1108,7 @@ qla24xx_walk_and_build_prot_sglist(struct qla_hw_data *ha, srb_t *sp,
 	if (sp) {
 		cmd = GET_CMD_SP(sp);
 		sgl = scsi_prot_sglist(cmd);
-		vha = sp->fcport->vha;
+		vha = sp->vha;
 	} else if (tc) {
 		vha = tc->vha;
 		sgl = tc->prot_sg;
@@ -1215,7 +1215,7 @@ qla24xx_build_scsi_crc_2_iocbs(srb_t *sp, struct cmd_type_crc_2 *cmd_pkt,
 	/* Update entry type to indicate Command Type CRC_2 IOCB */
 	*((uint32_t *)(&cmd_pkt->entry_type)) = cpu_to_le32(COMMAND_TYPE_CRC_2);
 
-	vha = sp->fcport->vha;
+	vha = sp->vha;
 	ha = vha->hw;
 
 	/* No data transfer */
@@ -1225,7 +1225,7 @@ qla24xx_build_scsi_crc_2_iocbs(srb_t *sp, struct cmd_type_crc_2 *cmd_pkt,
 		return QLA_SUCCESS;
 	}
 
-	cmd_pkt->vp_index = sp->fcport->vha->vp_idx;
+	cmd_pkt->vp_index = sp->vha->vp_idx;
 
 	/* Set transfer direction */
 	if (cmd->sc_data_direction == DMA_TO_DEVICE) {
@@ -1415,7 +1415,7 @@ qla24xx_start_scsi(srb_t *sp)
 	struct req_que *req = NULL;
 	struct rsp_que *rsp = NULL;
 	struct scsi_cmnd *cmd = GET_CMD_SP(sp);
-	struct scsi_qla_host *vha = sp->fcport->vha;
+	struct scsi_qla_host *vha = sp->vha;
 	struct qla_hw_data *ha = vha->hw;
 
 	/* Setup device pointers. */
@@ -1492,7 +1492,7 @@ qla24xx_start_scsi(srb_t *sp)
 	cmd_pkt->port_id[0] = sp->fcport->d_id.b.al_pa;
 	cmd_pkt->port_id[1] = sp->fcport->d_id.b.area;
 	cmd_pkt->port_id[2] = sp->fcport->d_id.b.domain;
-	cmd_pkt->vp_index = sp->fcport->vha->vp_idx;
+	cmd_pkt->vp_index = sp->vha->vp_idx;
 
 	int_to_scsilun(cmd->device->lun, &cmd_pkt->lun);
 	host_to_fcp_swap((uint8_t *)&cmd_pkt->lun, sizeof(cmd_pkt->lun));
@@ -1564,7 +1564,7 @@ qla24xx_dif_start_scsi(srb_t *sp)
 	struct req_que		*req = NULL;
 	struct rsp_que		*rsp = NULL;
 	struct scsi_cmnd	*cmd = GET_CMD_SP(sp);
-	struct scsi_qla_host	*vha = sp->fcport->vha;
+	struct scsi_qla_host	*vha = sp->vha;
 	struct qla_hw_data	*ha = vha->hw;
 	struct cmd_type_crc_2	*cmd_pkt;
 	uint32_t		status = 0;
@@ -2214,13 +2214,13 @@ qla24xx_login_iocb(srb_t *sp, struct logio_entry_24xx *logio)
 	logio->port_id[0] = sp->fcport->d_id.b.al_pa;
 	logio->port_id[1] = sp->fcport->d_id.b.area;
 	logio->port_id[2] = sp->fcport->d_id.b.domain;
-	logio->vp_index = sp->fcport->vha->vp_idx;
+	logio->vp_index = sp->vha->vp_idx;
 }
 
 static void
 qla2x00_login_iocb(srb_t *sp, struct mbx_entry *mbx)
 {
-	struct qla_hw_data *ha = sp->fcport->vha->hw;
+	struct qla_hw_data *ha = sp->vha->hw;
 	struct srb_iocb *lio = &sp->u.iocb_cmd;
 	uint16_t opts;
 
@@ -2238,7 +2238,7 @@ qla2x00_login_iocb(srb_t *sp, struct mbx_entry *mbx)
 	mbx->mb2 = cpu_to_le16(sp->fcport->d_id.b.domain);
 	mbx->mb3 = cpu_to_le16(sp->fcport->d_id.b.area << 8 |
 	    sp->fcport->d_id.b.al_pa);
-	mbx->mb9 = cpu_to_le16(sp->fcport->vha->vp_idx);
+	mbx->mb9 = cpu_to_le16(sp->vha->vp_idx);
 }
 
 static void
@@ -2254,13 +2254,13 @@ qla24xx_logout_iocb(srb_t *sp, struct logio_entry_24xx *logio)
 	logio->port_id[0] = sp->fcport->d_id.b.al_pa;
 	logio->port_id[1] = sp->fcport->d_id.b.area;
 	logio->port_id[2] = sp->fcport->d_id.b.domain;
-	logio->vp_index = sp->fcport->vha->vp_idx;
+	logio->vp_index = sp->vha->vp_idx;
 }
 
 static void
 qla2x00_logout_iocb(srb_t *sp, struct mbx_entry *mbx)
 {
-	struct qla_hw_data *ha = sp->fcport->vha->hw;
+	struct qla_hw_data *ha = sp->vha->hw;
 
 	mbx->entry_type = MBX_IOCB_TYPE;
 	SET_TARGET_ID(ha, mbx->loop_id, sp->fcport->loop_id);
@@ -2271,7 +2271,7 @@ qla2x00_logout_iocb(srb_t *sp, struct mbx_entry *mbx)
 	mbx->mb2 = cpu_to_le16(sp->fcport->d_id.b.domain);
 	mbx->mb3 = cpu_to_le16(sp->fcport->d_id.b.area << 8 |
 	    sp->fcport->d_id.b.al_pa);
-	mbx->mb9 = cpu_to_le16(sp->fcport->vha->vp_idx);
+	mbx->mb9 = cpu_to_le16(sp->vha->vp_idx);
 	/* Implicit: mbx->mbx10 = 0. */
 }
 
@@ -2281,13 +2281,13 @@ qla24xx_adisc_iocb(srb_t *sp, struct logio_entry_24xx *logio)
 	logio->entry_type = LOGINOUT_PORT_IOCB_TYPE;
 	logio->control_flags = cpu_to_le16(LCF_COMMAND_ADISC);
 	logio->nport_handle = cpu_to_le16(sp->fcport->loop_id);
-	logio->vp_index = sp->fcport->vha->vp_idx;
+	logio->vp_index = sp->vha->vp_idx;
 }
 
 static void
 qla2x00_adisc_iocb(srb_t *sp, struct mbx_entry *mbx)
 {
-	struct qla_hw_data *ha = sp->fcport->vha->hw;
+	struct qla_hw_data *ha = sp->vha->hw;
 
 	mbx->entry_type = MBX_IOCB_TYPE;
 	SET_TARGET_ID(ha, mbx->loop_id, sp->fcport->loop_id);
@@ -2302,7 +2302,7 @@ qla2x00_adisc_iocb(srb_t *sp, struct mbx_entry *mbx)
 	mbx->mb3 = cpu_to_le16(LSW(ha->async_pd_dma));
 	mbx->mb6 = cpu_to_le16(MSW(MSD(ha->async_pd_dma)));
 	mbx->mb7 = cpu_to_le16(LSW(MSD(ha->async_pd_dma)));
-	mbx->mb9 = cpu_to_le16(sp->fcport->vha->vp_idx);
+	mbx->mb9 = cpu_to_le16(sp->vha->vp_idx);
 }
 
 static void
@@ -2338,32 +2338,30 @@ qla24xx_tm_iocb(srb_t *sp, struct tsk_mgmt_entry *tsk)
 }
 
 static void
-qla2x00_els_dcmd_sp_free(void *ptr, void *data)
+qla2x00_els_dcmd_sp_free(void *data)
 {
-	struct scsi_qla_host *vha = (scsi_qla_host_t *)ptr;
-	struct qla_hw_data *ha = vha->hw;
-	srb_t *sp = (srb_t *)data;
+	srb_t *sp = data;
 	struct srb_iocb *elsio = &sp->u.iocb_cmd;
 
 	kfree(sp->fcport);
 
 	if (elsio->u.els_logo.els_logo_pyld)
-		dma_free_coherent(&ha->pdev->dev, DMA_POOL_SIZE,
+		dma_free_coherent(&sp->vha->hw->pdev->dev, DMA_POOL_SIZE,
 		    elsio->u.els_logo.els_logo_pyld,
 		    elsio->u.els_logo.els_logo_pyld_dma);
 
 	del_timer(&elsio->timer);
-	qla2x00_rel_sp(vha, sp);
+	qla2x00_rel_sp(sp);
 }
 
 static void
 qla2x00_els_dcmd_iocb_timeout(void *data)
 {
-	srb_t *sp = (srb_t *)data;
-	struct srb_iocb *lio = &sp->u.iocb_cmd;
+	srb_t *sp = data;
 	fc_port_t *fcport = sp->fcport;
-	struct scsi_qla_host *vha = fcport->vha;
+	struct scsi_qla_host *vha = sp->vha;
 	struct qla_hw_data *ha = vha->hw;
+	struct srb_iocb *lio = &sp->u.iocb_cmd;
 	unsigned long flags = 0;
 
 	ql_dbg(ql_dbg_io, vha, 0x3069,
@@ -2386,12 +2384,12 @@ qla2x00_els_dcmd_iocb_timeout(void *data)
 }
 
 static void
-qla2x00_els_dcmd_sp_done(void *data, void *ptr, int res)
+qla2x00_els_dcmd_sp_done(void *ptr, int res)
 {
-	srb_t *sp = (srb_t *)ptr;
+	srb_t *sp = ptr;
 	fc_port_t *fcport = sp->fcport;
 	struct srb_iocb *lio = &sp->u.iocb_cmd;
-	struct scsi_qla_host *vha = fcport->vha;
+	struct scsi_qla_host *vha = sp->vha;
 
 	ql_dbg(ql_dbg_io, vha, 0x3072,
 	    "%s hdl=%x, portid=%02x%02x%02x done\n",
@@ -2449,7 +2447,7 @@ qla24xx_els_dcmd_iocb(scsi_qla_host_t *vha, int els_opcode,
 			    GFP_KERNEL);
 
 	if (!elsio->u.els_logo.els_logo_pyld) {
-		sp->free(vha, sp);
+		sp->free(sp);
 		return QLA_FUNCTION_FAILED;
 	}
 
@@ -2468,7 +2466,7 @@ qla24xx_els_dcmd_iocb(scsi_qla_host_t *vha, int els_opcode,
 
 	rval = qla2x00_start_sp(sp);
 	if (rval != QLA_SUCCESS) {
-		sp->free(vha, sp);
+		sp->free(sp);
 		return QLA_FUNCTION_FAILED;
 	}
 
@@ -2479,14 +2477,14 @@ qla24xx_els_dcmd_iocb(scsi_qla_host_t *vha, int els_opcode,
 
 	wait_for_completion(&elsio->u.els_logo.comp);
 
-	sp->free(vha, sp);
+	sp->free(sp);
 	return rval;
 }
 
 static void
 qla24xx_els_logo_iocb(srb_t *sp, struct els_entry_24xx *els_iocb)
 {
-	scsi_qla_host_t *vha = sp->fcport->vha;
+	scsi_qla_host_t *vha = sp->vha;
 	struct srb_iocb *elsio = &sp->u.iocb_cmd;
 
 	els_iocb->entry_type = ELS_IOCB_TYPE;
@@ -2518,7 +2516,7 @@ qla24xx_els_logo_iocb(srb_t *sp, struct els_entry_24xx *els_iocb)
 	els_iocb->rx_address[1] = 0;
 	els_iocb->rx_len = 0;
 
-	sp->fcport->vha->qla_stats.control_requests++;
+	sp->vha->qla_stats.control_requests++;
 }
 
 static void
@@ -2534,7 +2532,7 @@ qla24xx_els_iocb(srb_t *sp, struct els_entry_24xx *els_iocb)
         els_iocb->handle = sp->handle;
         els_iocb->nport_handle = cpu_to_le16(sp->fcport->loop_id);
 	els_iocb->tx_dsd_count = cpu_to_le16(bsg_job->request_payload.sg_cnt);
-	els_iocb->vp_index = sp->fcport->vha->vp_idx;
+	els_iocb->vp_index = sp->vha->vp_idx;
         els_iocb->sof_type = EST_SOFI3;
 	els_iocb->rx_dsd_count = cpu_to_le16(bsg_job->reply_payload.sg_cnt);
 
@@ -2565,7 +2563,7 @@ qla24xx_els_iocb(srb_t *sp, struct els_entry_24xx *els_iocb)
         els_iocb->rx_len = cpu_to_le32(sg_dma_len
             (bsg_job->reply_payload.sg_list));
 
-	sp->fcport->vha->qla_stats.control_requests++;
+	sp->vha->qla_stats.control_requests++;
 }
 
 static void
@@ -2576,7 +2574,7 @@ qla2x00_ct_iocb(srb_t *sp, ms_iocb_entry_t *ct_iocb)
 	struct scatterlist *sg;
 	int index;
 	uint16_t tot_dsds;
-	scsi_qla_host_t *vha = sp->fcport->vha;
+	scsi_qla_host_t *vha = sp->vha;
 	struct qla_hw_data *ha = vha->hw;
 	struct bsg_job *bsg_job = sp->u.bsg_job;
 	int loop_iterartion = 0;
@@ -2642,7 +2640,7 @@ qla2x00_ct_iocb(srb_t *sp, ms_iocb_entry_t *ct_iocb)
 	}
 	ct_iocb->entry_count = entry_count;
 
-	sp->fcport->vha->qla_stats.control_requests++;
+	sp->vha->qla_stats.control_requests++;
 }
 
 static void
@@ -2653,7 +2651,7 @@ qla24xx_ct_iocb(srb_t *sp, struct ct_entry_24xx *ct_iocb)
 	struct scatterlist *sg;
 	int index;
 	uint16_t tot_dsds;
-        scsi_qla_host_t *vha = sp->fcport->vha;
+	scsi_qla_host_t *vha = sp->vha;
 	struct qla_hw_data *ha = vha->hw;
 	struct bsg_job *bsg_job = sp->u.bsg_job;
 	int loop_iterartion = 0;
@@ -2665,7 +2663,7 @@ qla24xx_ct_iocb(srb_t *sp, struct ct_entry_24xx *ct_iocb)
         ct_iocb->handle = sp->handle;
 
 	ct_iocb->nport_handle = cpu_to_le16(sp->fcport->loop_id);
-	ct_iocb->vp_index = sp->fcport->vha->vp_idx;
+	ct_iocb->vp_index = sp->vha->vp_idx;
 	ct_iocb->comp_status = cpu_to_le16(0);
 
 	ct_iocb->cmd_dsd_count =
@@ -2739,7 +2737,7 @@ qla82xx_start_scsi(srb_t *sp)
 	uint32_t *fcp_dl;
 	uint8_t additional_cdb_len;
 	struct ct6_dsd *ctx;
-	struct scsi_qla_host *vha = sp->fcport->vha;
+	struct scsi_qla_host *vha = sp->vha;
 	struct qla_hw_data *ha = vha->hw;
 	struct req_que *req = NULL;
 	struct rsp_que *rsp = NULL;
@@ -2901,7 +2899,7 @@ qla82xx_start_scsi(srb_t *sp)
 		cmd_pkt->port_id[0] = sp->fcport->d_id.b.al_pa;
 		cmd_pkt->port_id[1] = sp->fcport->d_id.b.area;
 		cmd_pkt->port_id[2] = sp->fcport->d_id.b.domain;
-		cmd_pkt->vp_index = sp->fcport->vha->vp_idx;
+		cmd_pkt->vp_index = sp->vha->vp_idx;
 
 		/* Build IOCB segments */
 		if (qla24xx_build_scsi_type_6_iocbs(sp, cmd_pkt, tot_dsds))
@@ -2974,7 +2972,7 @@ qla82xx_start_scsi(srb_t *sp)
 		cmd_pkt->port_id[0] = sp->fcport->d_id.b.al_pa;
 		cmd_pkt->port_id[1] = sp->fcport->d_id.b.area;
 		cmd_pkt->port_id[2] = sp->fcport->d_id.b.domain;
-		cmd_pkt->vp_index = sp->fcport->vha->vp_idx;
+		cmd_pkt->vp_index = sp->vha->vp_idx;
 
 		int_to_scsilun(cmd->device->lun, &cmd_pkt->lun);
 		host_to_fcp_swap((uint8_t *)&cmd_pkt->lun,
@@ -3060,7 +3058,7 @@ static void
 qla24xx_abort_iocb(srb_t *sp, struct abort_entry_24xx *abt_iocb)
 {
 	struct srb_iocb *aio = &sp->u.iocb_cmd;
-	scsi_qla_host_t *vha = sp->fcport->vha;
+	scsi_qla_host_t *vha = sp->vha;
 	struct req_que *req = vha->req;
 
 	memset(abt_iocb, 0, sizeof(struct abort_entry_24xx));
@@ -3132,7 +3130,7 @@ int
 qla2x00_start_sp(srb_t *sp)
 {
 	int rval;
-	scsi_qla_host_t *vha = (scsi_qla_host_t *)sp->vha;
+	scsi_qla_host_t *vha = sp->vha;
 	struct qla_hw_data *ha = vha->hw;
 	void *pkt;
 	unsigned long flags;
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index d9577db7ddbba5..914cc5a66b3419 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -1241,7 +1241,7 @@ qla2x00_process_completed_request(struct scsi_qla_host *vha,
 		req->outstanding_cmds[index] = NULL;
 
 		/* Save ISP completion status */
-		sp->done(vha, sp, DID_OK << 16);
+		sp->done(sp, DID_OK << 16);
 	} else {
 		ql_log(ql_log_warn, vha, 0x3016, "Invalid SCSI SRB.\n");
 
@@ -1373,7 +1373,7 @@ qla2x00_mbx_iocb_entry(scsi_qla_host_t *vha, struct req_que *req,
 	    le16_to_cpu(mbx->mb7));
 
 logio_done:
-	sp->done(vha, sp, 0);
+	sp->done(sp, 0);
 }
 
 static void
@@ -1398,7 +1398,7 @@ qla24xx_mbx_iocb_entry(scsi_qla_host_t *vha, struct req_que *req,
 
 	res = (si->u.mbx.in_mb[0] & MBS_MASK);
 
-	sp->done(vha, sp, res);
+	sp->done(sp, res);
 }
 
 static void
@@ -1416,7 +1416,7 @@ qla24xxx_nack_iocb_entry(scsi_qla_host_t *vha, struct req_que *req,
 	if (pkt->u.isp2x.status != cpu_to_le16(NOTIFY_ACK_SUCCESS))
 		res = QLA_FUNCTION_FAILED;
 
-	sp->done(vha, sp, res);
+	sp->done(sp, res);
 }
 
 static void
@@ -1488,7 +1488,7 @@ qla2x00_ct_entry(scsi_qla_host_t *vha, struct req_que *req,
 	     break;
 	}
 
-	sp->done(vha, sp, res);
+	sp->done(sp, res);
 }
 
 static void
@@ -1524,7 +1524,7 @@ qla24xx_els_ct_entry(scsi_qla_host_t *vha, struct req_que *req,
 		type = "Driver ELS logo";
 		ql_dbg(ql_dbg_user, vha, 0x5047,
 		    "Completing %s: (%p) type=%d.\n", type, sp, sp->type);
-		sp->done(vha, sp, 0);
+		sp->done(sp, 0);
 		return;
 	case SRB_CT_PTHRU_CMD:
 		/* borrowing sts_entry_24xx.comp_status.
@@ -1533,7 +1533,7 @@ qla24xx_els_ct_entry(scsi_qla_host_t *vha, struct req_que *req,
 		res = qla2x00_chk_ms_status(vha, (ms_iocb_entry_t *)pkt,
 			(struct ct_sns_rsp *)sp->u.iocb_cmd.u.ctarg.rsp,
 			sp->name);
-		sp->done(vha, sp, res);
+		sp->done(sp, res);
 		return;
 	default:
 		ql_dbg(ql_dbg_user, vha, 0x503e,
@@ -1589,7 +1589,7 @@ qla24xx_els_ct_entry(scsi_qla_host_t *vha, struct req_que *req,
 		bsg_job->reply_len = 0;
 	}
 
-	sp->done(vha, sp, res);
+	sp->done(sp, res);
 }
 
 static void
@@ -1701,7 +1701,7 @@ qla24xx_logio_entry(scsi_qla_host_t *vha, struct req_que *req,
 	    le32_to_cpu(logio->io_parameter[1]));
 
 logio_done:
-	sp->done(vha, sp, 0);
+	sp->done(sp, 0);
 }
 
 static void
@@ -1751,7 +1751,7 @@ qla24xx_tm_iocb_entry(scsi_qla_host_t *vha, struct req_que *req, void *tsk)
 		ql_dump_buffer(ql_dbg_async + ql_dbg_buffer, vha, 0x5055,
 		    (uint8_t *)sts, sizeof(*sts));
 
-	sp->done(vha, sp, 0);
+	sp->done(sp, 0);
 }
 
 /**
@@ -1839,7 +1839,7 @@ static inline void
 qla2x00_handle_sense(srb_t *sp, uint8_t *sense_data, uint32_t par_sense_len,
 		     uint32_t sense_len, struct rsp_que *rsp, int res)
 {
-	struct scsi_qla_host *vha = sp->fcport->vha;
+	struct scsi_qla_host *vha = sp->vha;
 	struct scsi_cmnd *cp = GET_CMD_SP(sp);
 	uint32_t track_sense_len;
 
@@ -1867,7 +1867,7 @@ qla2x00_handle_sense(srb_t *sp, uint8_t *sense_data, uint32_t par_sense_len,
 	if (sense_len) {
 		ql_dbg(ql_dbg_io + ql_dbg_buffer, vha, 0x301c,
 		    "Check condition Sense data, nexus%ld:%d:%llu cmd=%p.\n",
-		    sp->fcport->vha->host_no, cp->device->id, cp->device->lun,
+		    sp->vha->host_no, cp->device->id, cp->device->lun,
 		    cp);
 		ql_dump_buffer(ql_dbg_io + ql_dbg_buffer, vha, 0x302b,
 		    cp->sense_buffer, sense_len);
@@ -1889,7 +1889,7 @@ struct scsi_dif_tuple {
 static inline int
 qla2x00_handle_dif_error(srb_t *sp, struct sts_entry_24xx *sts24)
 {
-	struct scsi_qla_host *vha = sp->fcport->vha;
+	struct scsi_qla_host *vha = sp->vha;
 	struct scsi_cmnd *cmd = GET_CMD_SP(sp);
 	uint8_t		*ap = &sts24->data[12];
 	uint8_t		*ep = &sts24->data[20];
@@ -2154,7 +2154,7 @@ qla25xx_process_bidir_status_iocb(scsi_qla_host_t *vha, void *pkt,
 	bsg_job->reply_len = sizeof(struct fc_bsg_reply);
 	/* Always return DID_OK, bsg will send the vendor specific response
 	 * in this case only */
-	sp->done(vha, sp, (DID_OK << 6));
+	sp->done(sp, DID_OK << 6);
 
 }
 
@@ -2527,7 +2527,7 @@ qla2x00_status_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, void *pkt)
 		    resid_len, fw_resid_len, sp, cp);
 
 	if (rsp->status_srb == NULL)
-		sp->done(vha, sp, res);
+		sp->done(sp, res);
 }
 
 /**
@@ -2584,7 +2584,7 @@ qla2x00_status_cont_entry(struct rsp_que *rsp, sts_cont_entry_t *pkt)
 	/* Place command on done queue. */
 	if (sense_len == 0) {
 		rsp->status_srb = NULL;
-		sp->done(vha, sp, cp->result);
+		sp->done(sp, cp->result);
 	}
 }
 
@@ -2620,7 +2620,7 @@ qla2x00_error_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, sts_entry_t *pkt)
 
 	sp = qla2x00_get_sp_from_handle(vha, func, req, pkt);
 	if (sp) {
-		sp->done(vha, sp, res);
+		sp->done(sp, res);
 		return;
 	}
 fatal:
@@ -2678,7 +2678,7 @@ qla24xx_abort_iocb_entry(scsi_qla_host_t *vha, struct req_que *req,
 
 	abt = &sp->u.iocb_cmd;
 	abt->u.abt.comp_status = le32_to_cpu(pkt->nport_handle);
-	sp->done(vha, sp, 0);
+	sp->done(sp, 0);
 }
 
 /**
diff --git a/drivers/scsi/qla2xxx/qla_mr.c b/drivers/scsi/qla2xxx/qla_mr.c
index d38bc6452abd4c..945b6320e7d76e 100644
--- a/drivers/scsi/qla2xxx/qla_mr.c
+++ b/drivers/scsi/qla2xxx/qla_mr.c
@@ -1789,16 +1789,16 @@ qlafx00_update_host_attr(scsi_qla_host_t *vha, struct port_info_data *pinfo)
 static void
 qla2x00_fxdisc_iocb_timeout(void *data)
 {
-	srb_t *sp = (srb_t *)data;
+	srb_t *sp = data;
 	struct srb_iocb *lio = &sp->u.iocb_cmd;
 
 	complete(&lio->u.fxiocb.fxiocb_comp);
 }
 
 static void
-qla2x00_fxdisc_sp_done(void *data, void *ptr, int res)
+qla2x00_fxdisc_sp_done(void *ptr, int res)
 {
-	srb_t *sp = (srb_t *)ptr;
+	srb_t *sp = ptr;
 	struct srb_iocb *lio = &sp->u.iocb_cmd;
 
 	complete(&lio->u.fxiocb.fxiocb_comp);
@@ -1999,7 +1999,7 @@ qlafx00_fx_disc(scsi_qla_host_t *vha, fc_port_t *fcport, uint16_t fx_type)
 		dma_free_coherent(&ha->pdev->dev, fdisc->u.fxiocb.req_len,
 		    fdisc->u.fxiocb.req_addr, fdisc->u.fxiocb.req_dma_handle);
 done_free_sp:
-	sp->free(vha, sp);
+	sp->free(sp);
 done:
 	return rval;
 }
@@ -2127,7 +2127,7 @@ static inline void
 qlafx00_handle_sense(srb_t *sp, uint8_t *sense_data, uint32_t par_sense_len,
 		     uint32_t sense_len, struct rsp_que *rsp, int res)
 {
-	struct scsi_qla_host *vha = sp->fcport->vha;
+	struct scsi_qla_host *vha = sp->vha;
 	struct scsi_cmnd *cp = GET_CMD_SP(sp);
 	uint32_t track_sense_len;
 
@@ -2162,7 +2162,7 @@ qlafx00_handle_sense(srb_t *sp, uint8_t *sense_data, uint32_t par_sense_len,
 	if (sense_len) {
 		ql_dbg(ql_dbg_io + ql_dbg_buffer, vha, 0x3039,
 		    "Check condition Sense data, nexus%ld:%d:%llu cmd=%p.\n",
-		    sp->fcport->vha->host_no, cp->device->id, cp->device->lun,
+		    sp->vha->host_no, cp->device->id, cp->device->lun,
 		    cp);
 		ql_dump_buffer(ql_dbg_io + ql_dbg_buffer, vha, 0x3049,
 		    cp->sense_buffer, sense_len);
@@ -2181,7 +2181,7 @@ qlafx00_tm_iocb_entry(scsi_qla_host_t *vha, struct req_que *req,
 	    (sstatus & cpu_to_le16((uint16_t)SS_RESPONSE_INFO_LEN_VALID)))
 		cpstatus = cpu_to_le16((uint16_t)CS_INCOMPLETE);
 	tmf->u.tmf.comp_status = cpstatus;
-	sp->done(vha, sp, 0);
+	sp->done(sp, 0);
 }
 
 static void
@@ -2198,7 +2198,7 @@ qlafx00_abort_iocb_entry(scsi_qla_host_t *vha, struct req_que *req,
 
 	abt = &sp->u.iocb_cmd;
 	abt->u.abt.comp_status = pkt->tgt_id_sts;
-	sp->done(vha, sp, 0);
+	sp->done(sp, 0);
 }
 
 static void
@@ -2264,7 +2264,7 @@ qlafx00_ioctl_iosb_entry(scsi_qla_host_t *vha, struct req_que *req,
 		bsg_reply->reply_payload_rcv_len =
 		    bsg_job->reply_payload.payload_len;
 	}
-	sp->done(vha, sp, res);
+	sp->done(sp, res);
 }
 
 /**
@@ -2537,7 +2537,7 @@ qlafx00_status_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, void *pkt)
 		    par_sense_len, rsp_info_len);
 
 	if (rsp->status_srb == NULL)
-		sp->done(vha, sp, res);
+		sp->done(sp, res);
 }
 
 /**
@@ -2614,7 +2614,7 @@ qlafx00_status_cont_entry(struct rsp_que *rsp, sts_cont_entry_t *pkt)
 	/* Place command on done queue. */
 	if (sense_len == 0) {
 		rsp->status_srb = NULL;
-		sp->done(vha, sp, cp->result);
+		sp->done(sp, cp->result);
 	}
 }
 
@@ -2695,7 +2695,7 @@ qlafx00_error_entry(scsi_qla_host_t *vha, struct rsp_que *rsp,
 
 	sp = qla2x00_get_sp_from_handle(vha, func, req, pkt);
 	if (sp) {
-		sp->done(vha, sp, res);
+		sp->done(sp, res);
 		return;
 	}
 
@@ -2997,7 +2997,7 @@ qlafx00_build_scsi_iocbs(srb_t *sp, struct cmd_type_7_fx00 *cmd_pkt,
 	cont_a64_entry_t lcont_pkt;
 	cont_a64_entry_t *cont_pkt;
 
-	vha = sp->fcport->vha;
+	vha = sp->vha;
 	req = vha->req;
 
 	cmd = GET_CMD_SP(sp);
@@ -3081,7 +3081,7 @@ qlafx00_start_scsi(srb_t *sp)
 	struct req_que *req = NULL;
 	struct rsp_que *rsp = NULL;
 	struct scsi_cmnd *cmd = GET_CMD_SP(sp);
-	struct scsi_qla_host *vha = sp->fcport->vha;
+	struct scsi_qla_host *vha = sp->vha;
 	struct qla_hw_data *ha = vha->hw;
 	struct cmd_type_7_fx00 *cmd_pkt;
 	struct cmd_type_7_fx00 lcmd_pkt;
@@ -3205,7 +3205,7 @@ void
 qlafx00_tm_iocb(srb_t *sp, struct tsk_mgmt_entry_fx00 *ptm_iocb)
 {
 	struct srb_iocb *fxio = &sp->u.iocb_cmd;
-	scsi_qla_host_t *vha = sp->fcport->vha;
+	scsi_qla_host_t *vha = sp->vha;
 	struct req_que *req = vha->req;
 	struct tsk_mgmt_entry_fx00 tm_iocb;
 	struct scsi_lun llun;
@@ -3232,7 +3232,7 @@ void
 qlafx00_abort_iocb(srb_t *sp, struct abort_iocb_entry_fx00 *pabt_iocb)
 {
 	struct srb_iocb *fxio = &sp->u.iocb_cmd;
-	scsi_qla_host_t *vha = sp->fcport->vha;
+	scsi_qla_host_t *vha = sp->vha;
 	struct req_que *req = vha->req;
 	struct abort_iocb_entry_fx00 abt_iocb;
 
@@ -3346,8 +3346,7 @@ qlafx00_fxdisc_iocb(srb_t *sp, struct fxdisc_entry_fx00 *pfxiocb)
 					    REQUEST_ENTRY_SIZE);
 					cont_pkt =
 					    qlafx00_prep_cont_type1_iocb(
-						sp->fcport->vha->req,
-						&lcont_pkt);
+						sp->vha->req, &lcont_pkt);
 					cur_dsd = (__le32 *)
 					    lcont_pkt.dseg_0_address;
 					avail_dsds = 5;
@@ -3368,7 +3367,7 @@ qlafx00_fxdisc_iocb(srb_t *sp, struct fxdisc_entry_fx00 *pfxiocb)
 					    &lcont_pkt, REQUEST_ENTRY_SIZE);
 					ql_dump_buffer(
 					    ql_dbg_user + ql_dbg_verbose,
-					    sp->fcport->vha, 0x3042,
+					    sp->vha, 0x3042,
 					    (uint8_t *)&lcont_pkt,
 					     REQUEST_ENTRY_SIZE);
 				}
@@ -3377,7 +3376,7 @@ qlafx00_fxdisc_iocb(srb_t *sp, struct fxdisc_entry_fx00 *pfxiocb)
 				memcpy_toio((void __iomem *)cont_pkt,
 				    &lcont_pkt, REQUEST_ENTRY_SIZE);
 				ql_dump_buffer(ql_dbg_user + ql_dbg_verbose,
-				    sp->fcport->vha, 0x3043,
+				    sp->vha, 0x3043,
 				    (uint8_t *)&lcont_pkt, REQUEST_ENTRY_SIZE);
 			}
 		}
@@ -3409,8 +3408,7 @@ qlafx00_fxdisc_iocb(srb_t *sp, struct fxdisc_entry_fx00 *pfxiocb)
 					    REQUEST_ENTRY_SIZE);
 					cont_pkt =
 					    qlafx00_prep_cont_type1_iocb(
-						sp->fcport->vha->req,
-						&lcont_pkt);
+						sp->vha->req, &lcont_pkt);
 					cur_dsd = (__le32 *)
 					    lcont_pkt.dseg_0_address;
 					avail_dsds = 5;
@@ -3431,7 +3429,7 @@ qlafx00_fxdisc_iocb(srb_t *sp, struct fxdisc_entry_fx00 *pfxiocb)
 					    REQUEST_ENTRY_SIZE);
 					ql_dump_buffer(
 					    ql_dbg_user + ql_dbg_verbose,
-					    sp->fcport->vha, 0x3045,
+					    sp->vha, 0x3045,
 					    (uint8_t *)&lcont_pkt,
 					    REQUEST_ENTRY_SIZE);
 				}
@@ -3440,7 +3438,7 @@ qlafx00_fxdisc_iocb(srb_t *sp, struct fxdisc_entry_fx00 *pfxiocb)
 				memcpy_toio((void __iomem *)cont_pkt,
 				    &lcont_pkt, REQUEST_ENTRY_SIZE);
 				ql_dump_buffer(ql_dbg_user + ql_dbg_verbose,
-				    sp->fcport->vha, 0x3046,
+				    sp->vha, 0x3046,
 				    (uint8_t *)&lcont_pkt, REQUEST_ENTRY_SIZE);
 			}
 		}
@@ -3452,7 +3450,7 @@ qlafx00_fxdisc_iocb(srb_t *sp, struct fxdisc_entry_fx00 *pfxiocb)
 	}
 
 	ql_dump_buffer(ql_dbg_user + ql_dbg_verbose,
-	    sp->fcport->vha, 0x3047,
+	    sp->vha, 0x3047,
 	    (uint8_t *)&fx_iocb, sizeof(struct fxdisc_entry_fx00));
 
 	memcpy_toio((void __iomem *)pfxiocb, &fx_iocb,
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index bdaa238ca0ab07..191aa94dc2f139 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -613,11 +613,11 @@ qla24xx_fw_version_str(struct scsi_qla_host *vha, char *str, size_t size)
 }
 
 void
-qla2x00_sp_free_dma(void *vha, void *ptr)
+qla2x00_sp_free_dma(void *ptr)
 {
-	srb_t *sp = (srb_t *)ptr;
+	srb_t *sp = ptr;
+	struct qla_hw_data *ha = sp->vha->hw;
 	struct scsi_cmnd *cmd = GET_CMD_SP(sp);
-	struct qla_hw_data *ha = sp->fcport->vha->hw;
 	void *ctx = GET_CMD_CTX_SP(sp);
 
 	if (sp->flags & SRB_DMA_VALID) {
@@ -656,20 +656,19 @@ qla2x00_sp_free_dma(void *vha, void *ptr)
 	}
 
 	CMD_SP(cmd) = NULL;
-	qla2x00_rel_sp(sp->fcport->vha, sp);
+	qla2x00_rel_sp(sp);
 }
 
 void
-qla2x00_sp_compl(void *data, void *ptr, int res)
+qla2x00_sp_compl(void *ptr, int res)
 {
-	struct qla_hw_data *ha = (struct qla_hw_data *)data;
-	srb_t *sp = (srb_t *)ptr;
+	srb_t *sp = ptr;
 	struct scsi_cmnd *cmd = GET_CMD_SP(sp);
 
 	cmd->result = res;
 
 	if (atomic_read(&sp->ref_count) == 0) {
-		ql_dbg(ql_dbg_io, sp->fcport->vha, 0x3015,
+		ql_dbg(ql_dbg_io, sp->vha, 0x3015,
 		    "SP reference-count to ZERO -- sp=%p cmd=%p.\n",
 		    sp, GET_CMD_SP(sp));
 		if (ql2xextended_error_logging & ql_dbg_io)
@@ -679,12 +678,12 @@ qla2x00_sp_compl(void *data, void *ptr, int res)
 	if (!atomic_dec_and_test(&sp->ref_count))
 		return;
 
-	qla2x00_sp_free_dma(ha, sp);
+	qla2x00_sp_free_dma(sp);
 	cmd->scsi_done(cmd);
 }
 
 void
-qla2xxx_qpair_sp_free_dma(void *vha, void *ptr)
+qla2xxx_qpair_sp_free_dma(void *ptr)
 {
 	srb_t *sp = (srb_t *)ptr;
 	struct scsi_cmnd *cmd = GET_CMD_SP(sp);
@@ -730,9 +729,9 @@ qla2xxx_qpair_sp_free_dma(void *vha, void *ptr)
 }
 
 void
-qla2xxx_qpair_sp_compl(void *data, void *ptr, int res)
+qla2xxx_qpair_sp_compl(void *ptr, int res)
 {
-	srb_t *sp = (srb_t *)ptr;
+	srb_t *sp = ptr;
 	struct scsi_cmnd *cmd = GET_CMD_SP(sp);
 
 	cmd->result = res;
@@ -748,7 +747,7 @@ qla2xxx_qpair_sp_compl(void *data, void *ptr, int res)
 	if (!atomic_dec_and_test(&sp->ref_count))
 		return;
 
-	qla2xxx_qpair_sp_free_dma(sp->fcport->vha, sp);
+	qla2xxx_qpair_sp_free_dma(sp);
 	cmd->scsi_done(cmd);
 }
 
@@ -869,7 +868,7 @@ qla2xxx_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
 	return 0;
 
 qc24_host_busy_free_sp:
-	qla2x00_sp_free_dma(ha, sp);
+	qla2x00_sp_free_dma(sp);
 
 qc24_host_busy:
 	return SCSI_MLQUEUE_HOST_BUSY;
@@ -958,7 +957,7 @@ qla2xxx_mqueuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd,
 	return 0;
 
 qc24_host_busy_free_sp:
-	qla2xxx_qpair_sp_free_dma(vha, sp);
+	qla2xxx_qpair_sp_free_dma(sp);
 
 qc24_host_busy:
 	return SCSI_MLQUEUE_HOST_BUSY;
@@ -1238,7 +1237,7 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd)
 	}
 
 	spin_lock_irqsave(&ha->hardware_lock, flags);
-	sp->done(vha, sp, 0);
+	sp->done(sp, 0);
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
 
 	/* Did the command return during mailbox execution? */
@@ -1283,7 +1282,7 @@ qla2x00_eh_wait_for_pending_commands(scsi_qla_host_t *vha, unsigned int t,
 			continue;
 		if (sp->type != SRB_SCSI_CMD)
 			continue;
-		if (vha->vp_idx != sp->fcport->vha->vp_idx)
+		if (vha->vp_idx != sp->vha->vp_idx)
 			continue;
 		match = 0;
 		cmd = GET_CMD_SP(sp);
@@ -1663,7 +1662,7 @@ qla2x00_abort_all_cmds(scsi_qla_host_t *vha, int res)
 					spin_lock_irqsave(&ha->hardware_lock, flags);
 				}
 				req->outstanding_cmds[cnt] = NULL;
-				sp->done(vha, sp, res);
+				sp->done(sp, res);
 			}
 		}
 	}
diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index 1cd3734292f945..45f5077684f0a5 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -535,10 +535,10 @@ static int qla24xx_post_nack_work(struct scsi_qla_host *vha, fc_port_t *fcport,
 }
 
 static
-void qla2x00_async_nack_sp_done(void *v, void *s, int res)
+void qla2x00_async_nack_sp_done(void *s, int res)
 {
-	struct scsi_qla_host *vha = (struct scsi_qla_host *)v;
 	struct srb *sp = (struct srb *)s;
+	struct scsi_qla_host *vha = sp->vha;
 	unsigned long flags;
 
 	ql_dbg(ql_dbg_disc, vha, 0xffff,
@@ -595,7 +595,7 @@ void qla2x00_async_nack_sp_done(void *v, void *s, int res)
 	}
 	spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
 
-	sp->free(vha, sp);
+	sp->free(sp);
 }
 
 int qla24xx_async_notify_ack(scsi_qla_host_t *vha, fc_port_t *fcport,
@@ -645,7 +645,7 @@ int qla24xx_async_notify_ack(scsi_qla_host_t *vha, fc_port_t *fcport,
 	return rval;
 
 done_free_sp:
-	sp->free(vha, sp);
+	sp->free(sp);
 done:
 	fcport->flags &= ~FCF_ASYNC_SENT;
 	return rval;

From 2fdbc65eae3989b566047b2ce21d66f52fe6bf69 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Fri, 20 Jan 2017 13:31:13 -0800
Subject: [PATCH 0072/1911] qla2xxx: Avoid using variable-length arrays

This patch does not change any functionality but avoids that sparse
complains about using variable-length arrays.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Acked-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Cc: Quinn Tran <quinn.tran@cavium.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/qla2xxx/qla_os.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 191aa94dc2f139..4f5965281395f4 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -4272,8 +4272,8 @@ struct scsi_qla_host *qla2x00_create_host(struct scsi_host_template *sht,
 	spin_lock_init(&vha->cmd_list_lock);
 	init_waitqueue_head(&vha->fcport_waitQ);
 
-	vha->gnl.size =
-	    sizeof(struct get_name_list_extended[ha->max_loop_id+1]);
+	vha->gnl.size = sizeof(struct get_name_list_extended) *
+			(ha->max_loop_id + 1);
 	vha->gnl.l = dma_alloc_coherent(&ha->pdev->dev,
 	    vha->gnl.size, &vha->gnl.ldma, GFP_KERNEL);
 	if (!vha->gnl.l) {

From 5cadafb236dffd8aa4772b32bf848af9128faedc Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Fri, 13 Jan 2017 10:41:52 -0800
Subject: [PATCH 0073/1911] target/cxgbit: Fix endianness annotations

This patch does not change any functionality but avoids that sparse
complains about endianness.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Acked-by: Varun Prakash <varun@chelsio.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.h |  2 +-
 drivers/target/iscsi/cxgbit/cxgbit_cm.c            | 11 ++++++-----
 drivers/target/iscsi/cxgbit/cxgbit_main.c          |  2 +-
 drivers/target/iscsi/cxgbit/cxgbit_target.c        |  2 +-
 4 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.h b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.h
index e995a1a3840a66..a91ad766cef009 100644
--- a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.h
+++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.h
@@ -59,7 +59,7 @@ struct cxgbi_pagepod_hdr {
 #define PPOD_PAGES_MAX			4
 struct cxgbi_pagepod {
 	struct cxgbi_pagepod_hdr hdr;
-	u64 addr[PPOD_PAGES_MAX + 1];
+	__be64 addr[PPOD_PAGES_MAX + 1];
 };
 
 /* ddp tag format
diff --git a/drivers/target/iscsi/cxgbit/cxgbit_cm.c b/drivers/target/iscsi/cxgbit/cxgbit_cm.c
index 2fb1bf1a26c5e6..35aaa36e9af50a 100644
--- a/drivers/target/iscsi/cxgbit/cxgbit_cm.c
+++ b/drivers/target/iscsi/cxgbit/cxgbit_cm.c
@@ -1726,7 +1726,7 @@ static bool cxgbit_credit_err(const struct cxgbit_sock *csk)
 	}
 
 	while (skb) {
-		credit += skb->csum;
+		credit += (__force u32)skb->csum;
 		skb = cxgbit_skcb_tx_wr_next(skb);
 	}
 
@@ -1753,6 +1753,7 @@ static void cxgbit_fw4_ack(struct cxgbit_sock *csk, struct sk_buff *skb)
 
 	while (credits) {
 		struct sk_buff *p = cxgbit_sock_peek_wr(csk);
+		const u32 csum = (__force u32)p->csum;
 
 		if (unlikely(!p)) {
 			pr_err("csk 0x%p,%u, cr %u,%u+%u, empty.\n",
@@ -1761,17 +1762,17 @@ static void cxgbit_fw4_ack(struct cxgbit_sock *csk, struct sk_buff *skb)
 			break;
 		}
 
-		if (unlikely(credits < p->csum)) {
+		if (unlikely(credits < csum)) {
 			pr_warn("csk 0x%p,%u, cr %u,%u+%u, < %u.\n",
 				csk,  csk->tid,
 				credits, csk->wr_cred, csk->wr_una_cred,
-				p->csum);
-			p->csum -= credits;
+				csum);
+			p->csum = (__force __wsum)(csum - credits);
 			break;
 		}
 
 		cxgbit_sock_dequeue_wr(csk);
-		credits -= p->csum;
+		credits -= csum;
 		kfree_skb(p);
 	}
 
diff --git a/drivers/target/iscsi/cxgbit/cxgbit_main.c b/drivers/target/iscsi/cxgbit/cxgbit_main.c
index 96eedfc49c9428..6531aaac2b9681 100644
--- a/drivers/target/iscsi/cxgbit/cxgbit_main.c
+++ b/drivers/target/iscsi/cxgbit/cxgbit_main.c
@@ -454,7 +454,7 @@ cxgbit_uld_lro_rx_handler(void *hndl, const __be64 *rsp,
 		if (unlikely(op != *(u8 *)gl->va)) {
 			pr_info("? FL 0x%p,RSS%#llx,FL %#llx,len %u.\n",
 				gl->va, be64_to_cpu(*rsp),
-				be64_to_cpu(*(u64 *)gl->va),
+				get_unaligned_be64(gl->va),
 				gl->tot_len);
 			return 0;
 		}
diff --git a/drivers/target/iscsi/cxgbit/cxgbit_target.c b/drivers/target/iscsi/cxgbit/cxgbit_target.c
index 8bcb9b71f76432..e183e0bdd3bf85 100644
--- a/drivers/target/iscsi/cxgbit/cxgbit_target.c
+++ b/drivers/target/iscsi/cxgbit/cxgbit_target.c
@@ -243,7 +243,7 @@ void cxgbit_push_tx_frames(struct cxgbit_sock *csk)
 		}
 		__skb_unlink(skb, &csk->txq);
 		set_wr_txq(skb, CPL_PRIORITY_DATA, csk->txq_idx);
-		skb->csum = credits_needed + flowclen16;
+		skb->csum = (__force __wsum)(credits_needed + flowclen16);
 		csk->wr_cred -= credits_needed;
 		csk->wr_una_cred += credits_needed;
 

From bdec5188745c4f03ecbc5b88bc2c3f76acb435ae Mon Sep 17 00:00:00 2001
From: Varun Prakash <varun@chelsio.com>
Date: Tue, 24 Jan 2017 17:07:02 +0530
Subject: [PATCH 0074/1911] target/cxgbit: Use T6 specific macro to set the
 force bit

For T6 adapters use T6 specific macro to set the force bit.

Signed-off-by: Varun Prakash <varun@chelsio.com>
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/net/ethernet/chelsio/cxgb4/t4_msg.h | 4 ++++
 drivers/target/iscsi/cxgbit/cxgbit_target.c | 4 +++-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
index a267173f59972f..21fc2fe1fc10c6 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
@@ -1349,6 +1349,10 @@ struct cpl_tx_data {
 #define TX_FORCE_S	13
 #define TX_FORCE_V(x)	((x) << TX_FORCE_S)
 
+#define T6_TX_FORCE_S		20
+#define T6_TX_FORCE_V(x)	((x) << T6_TX_FORCE_S)
+#define T6_TX_FORCE_F		T6_TX_FORCE_V(1U)
+
 enum {
 	ULP_TX_MEM_READ = 2,
 	ULP_TX_MEM_WRITE = 3,
diff --git a/drivers/target/iscsi/cxgbit/cxgbit_target.c b/drivers/target/iscsi/cxgbit/cxgbit_target.c
index e183e0bdd3bf85..8ae399dfe84f16 100644
--- a/drivers/target/iscsi/cxgbit/cxgbit_target.c
+++ b/drivers/target/iscsi/cxgbit/cxgbit_target.c
@@ -162,12 +162,14 @@ cxgbit_tx_data_wr(struct cxgbit_sock *csk, struct sk_buff *skb, u32 dlen,
 		  u32 len, u32 credits, u32 compl)
 {
 	struct fw_ofld_tx_data_wr *req;
+	const struct cxgb4_lld_info *lldi = &csk->com.cdev->lldi;
 	u32 submode = cxgbit_skcb_submode(skb);
 	u32 wr_ulp_mode = 0;
 	u32 hdr_size = sizeof(*req);
 	u32 opcode = FW_OFLD_TX_DATA_WR;
 	u32 immlen = 0;
-	u32 force = TX_FORCE_V(!submode);
+	u32 force = is_t5(lldi->adapter_type) ? TX_FORCE_V(!submode) :
+		    T6_TX_FORCE_F;
 
 	if (cxgbit_skcb_flags(skb) & SKCBF_TX_ISO) {
 		opcode = FW_ISCSI_TX_DATA_WR;

From 664a722b4f1bab63fee0193459036d1a95e19e99 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Wed, 4 Jan 2017 08:13:34 +0100
Subject: [PATCH 0075/1911] target/tcm_fc: Remove a set-but-not-used variable

This was detected by building with W=1.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Johannes Thumshirn <jth@kernel.org>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/tcm_fc/tfc_cmd.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/target/tcm_fc/tfc_cmd.c b/drivers/target/tcm_fc/tfc_cmd.c
index 9af7842b8178e9..ec372860106f12 100644
--- a/drivers/target/tcm_fc/tfc_cmd.c
+++ b/drivers/target/tcm_fc/tfc_cmd.c
@@ -83,14 +83,12 @@ void ft_dump_cmd(struct ft_cmd *cmd, const char *caller)
 static void ft_free_cmd(struct ft_cmd *cmd)
 {
 	struct fc_frame *fp;
-	struct fc_lport *lport;
 	struct ft_sess *sess;
 
 	if (!cmd)
 		return;
 	sess = cmd->sess;
 	fp = cmd->req_frame;
-	lport = fr_dev(fp);
 	if (fr_seq(fp))
 		fc_seq_release(fr_seq(fp));
 	fc_frame_free(fp);

From 1efaa949396b5d9e8d1e6edef7e97e9ce1a97319 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Fri, 23 Dec 2016 12:45:27 +0100
Subject: [PATCH 0076/1911] target/iscsi: Fix indentation in
 iscsi_target_start_negotiation()

This patch avoids that smatch complains about inconsistent
indentation in iscsi_target_start_negotiation().

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Nicholas A. Bellinger <nab@linux-iscsi.org>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/iscsi/iscsi_target_nego.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c
index 46388c9e08dad3..5269e9ef031ca0 100644
--- a/drivers/target/iscsi/iscsi_target_nego.c
+++ b/drivers/target/iscsi/iscsi_target_nego.c
@@ -1249,16 +1249,16 @@ int iscsi_target_start_negotiation(
 {
 	int ret;
 
-       if (conn->sock) {
-               struct sock *sk = conn->sock->sk;
+	if (conn->sock) {
+		struct sock *sk = conn->sock->sk;
 
-               write_lock_bh(&sk->sk_callback_lock);
-               set_bit(LOGIN_FLAGS_READY, &conn->login_flags);
-               write_unlock_bh(&sk->sk_callback_lock);
-       }
+		write_lock_bh(&sk->sk_callback_lock);
+		set_bit(LOGIN_FLAGS_READY, &conn->login_flags);
+		write_unlock_bh(&sk->sk_callback_lock);
+	}
 
-       ret = iscsi_target_do_login(conn, login);
-       if (ret < 0) {
+	ret = iscsi_target_do_login(conn, login);
+	if (ret < 0) {
 		cancel_delayed_work_sync(&conn->login_work);
 		cancel_delayed_work_sync(&conn->login_cleanup_work);
 		iscsi_target_restore_sock_callbacks(conn);

From 0d5efb8a3c62c598d092bedb579debacd3f746eb Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Fri, 23 Dec 2016 14:37:52 +0100
Subject: [PATCH 0077/1911] target/iscsi: Fix spelling of "perform"

Change two occurrences of "preform" into "perform".

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Nicholas A. Bellinger <nab@linux-iscsi.org>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/iscsi/iscsi_target.c       | 2 +-
 drivers/target/iscsi/iscsi_target_login.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
index da2c73a255dec1..3b770fddcb3bc0 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -1545,7 +1545,7 @@ iscsit_check_dataout_hdr(struct iscsi_conn *conn, unsigned char *buf,
 		}
 	}
 	/*
-	 * Preform DataSN, DataSequenceInOrder, DataPDUInOrder, and
+	 * Perform DataSN, DataSequenceInOrder, DataPDUInOrder, and
 	 * within-command recovery checks before receiving the payload.
 	 */
 	rc = iscsit_check_pre_dataout(cmd, buf);
diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c
index 450f51deb2a2ae..303cb6574a93da 100644
--- a/drivers/target/iscsi/iscsi_target_login.c
+++ b/drivers/target/iscsi/iscsi_target_login.c
@@ -222,7 +222,7 @@ int iscsi_check_for_session_reinstatement(struct iscsi_conn *conn)
 		return 0;
 
 	pr_debug("%s iSCSI Session SID %u is still active for %s,"
-		" preforming session reinstatement.\n", (sessiontype) ?
+		" performing session reinstatement.\n", (sessiontype) ?
 		"Discovery" : "Normal", sess->sid,
 		sess->sess_ops->InitiatorName);
 

From 53c561dca9fd66330fe63d8a03ef859407e2cd91 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Fri, 23 Dec 2016 14:40:24 +0100
Subject: [PATCH 0078/1911] target/iscsi: Fix spelling of "reallegiance"

Fix the spelling of this word in a function name, messages and
source code comments.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Nicholas A. Bellinger <nab@linux-iscsi.org>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/iscsi/iscsi_target.c      | 4 ++--
 drivers/target/iscsi/iscsi_target_erl2.c | 6 +++---
 drivers/target/iscsi/iscsi_target_erl2.h | 2 +-
 drivers/target/iscsi/iscsi_target_tmr.c  | 4 ++--
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
index 3b770fddcb3bc0..e11f8d0f1fa33a 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -4136,7 +4136,7 @@ int iscsit_close_connection(
 	/*
 	 * During Connection recovery drop unacknowledged out of order
 	 * commands for this connection, and prepare the other commands
-	 * for realligence.
+	 * for reallegiance.
 	 *
 	 * During normal operation clear the out of order commands (but
 	 * do not free the struct iscsi_ooo_cmdsn's) and release all
@@ -4144,7 +4144,7 @@ int iscsit_close_connection(
 	 */
 	if (atomic_read(&conn->connection_recovery)) {
 		iscsit_discard_unacknowledged_ooo_cmdsns_for_conn(conn);
-		iscsit_prepare_cmds_for_realligance(conn);
+		iscsit_prepare_cmds_for_reallegiance(conn);
 	} else {
 		iscsit_clear_ooo_cmdsns_for_conn(conn);
 		iscsit_release_commands_from_conn(conn);
diff --git a/drivers/target/iscsi/iscsi_target_erl2.c b/drivers/target/iscsi/iscsi_target_erl2.c
index faf9ae014b3044..8df9c90f3db3e9 100644
--- a/drivers/target/iscsi/iscsi_target_erl2.c
+++ b/drivers/target/iscsi/iscsi_target_erl2.c
@@ -312,7 +312,7 @@ int iscsit_discard_unacknowledged_ooo_cmdsns_for_conn(struct iscsi_conn *conn)
 	return 0;
 }
 
-int iscsit_prepare_cmds_for_realligance(struct iscsi_conn *conn)
+int iscsit_prepare_cmds_for_reallegiance(struct iscsi_conn *conn)
 {
 	u32 cmd_count = 0;
 	struct iscsi_cmd *cmd, *cmd_tmp;
@@ -347,7 +347,7 @@ int iscsit_prepare_cmds_for_realligance(struct iscsi_conn *conn)
 
 		if ((cmd->iscsi_opcode != ISCSI_OP_SCSI_CMD) &&
 		    (cmd->iscsi_opcode != ISCSI_OP_NOOP_OUT)) {
-			pr_debug("Not performing realligence on"
+			pr_debug("Not performing reallegiance on"
 				" Opcode: 0x%02x, ITT: 0x%08x, CmdSN: 0x%08x,"
 				" CID: %hu\n", cmd->iscsi_opcode,
 				cmd->init_task_tag, cmd->cmd_sn, conn->cid);
@@ -382,7 +382,7 @@ int iscsit_prepare_cmds_for_realligance(struct iscsi_conn *conn)
 		cmd_count++;
 		pr_debug("Preparing Opcode: 0x%02x, ITT: 0x%08x,"
 			" CmdSN: 0x%08x, StatSN: 0x%08x, CID: %hu for"
-			" realligence.\n", cmd->iscsi_opcode,
+			" reallegiance.\n", cmd->iscsi_opcode,
 			cmd->init_task_tag, cmd->cmd_sn, cmd->stat_sn,
 			conn->cid);
 
diff --git a/drivers/target/iscsi/iscsi_target_erl2.h b/drivers/target/iscsi/iscsi_target_erl2.h
index 7965f1e865061e..634d01e136521c 100644
--- a/drivers/target/iscsi/iscsi_target_erl2.h
+++ b/drivers/target/iscsi/iscsi_target_erl2.h
@@ -19,7 +19,7 @@ extern int iscsit_remove_cmd_from_connection_recovery(struct iscsi_cmd *,
 			struct iscsi_session *);
 extern void iscsit_discard_cr_cmds_by_expstatsn(struct iscsi_conn_recovery *, u32);
 extern int iscsit_discard_unacknowledged_ooo_cmdsns_for_conn(struct iscsi_conn *);
-extern int iscsit_prepare_cmds_for_realligance(struct iscsi_conn *);
+extern int iscsit_prepare_cmds_for_reallegiance(struct iscsi_conn *);
 extern int iscsit_connection_recovery_transport_reset(struct iscsi_conn *);
 
 #endif /*** ISCSI_TARGET_ERL2_H ***/
diff --git a/drivers/target/iscsi/iscsi_target_tmr.c b/drivers/target/iscsi/iscsi_target_tmr.c
index 3d637055c36f05..cb231c907d5119 100644
--- a/drivers/target/iscsi/iscsi_target_tmr.c
+++ b/drivers/target/iscsi/iscsi_target_tmr.c
@@ -440,14 +440,14 @@ static int iscsit_task_reassign_complete(
 		break;
 	default:
 		 pr_err("Illegal iSCSI Opcode 0x%02x during"
-			" command realligence\n", cmd->iscsi_opcode);
+			" command reallegiance\n", cmd->iscsi_opcode);
 		return -1;
 	}
 
 	if (ret != 0)
 		return ret;
 
-	pr_debug("Completed connection realligence for Opcode: 0x%02x,"
+	pr_debug("Completed connection reallegiance for Opcode: 0x%02x,"
 		" ITT: 0x%08x to CID: %hu.\n", cmd->iscsi_opcode,
 			cmd->init_task_tag, conn->cid);
 

From e381fe9e89909ced3d878b8c3c3a9b344c02cbdc Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Fri, 6 Jan 2017 11:32:08 +0100
Subject: [PATCH 0079/1911] target/iscsi: Introduce a helper function for TMF
 translation

This patch does not change any functionality.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Nicholas A. Bellinger <nab@linux-iscsi.org>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/iscsi/iscsi_target.c | 48 ++++++++++++++---------------
 include/target/target_core_base.h   |  1 +
 2 files changed, 25 insertions(+), 24 deletions(-)

diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
index e11f8d0f1fa33a..d16729fa1f67fa 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -1920,6 +1920,28 @@ static int iscsit_handle_nop_out(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
 	return ret;
 }
 
+static enum tcm_tmreq_table iscsit_convert_tmf(u8 iscsi_tmf)
+{
+	switch (iscsi_tmf) {
+	case ISCSI_TM_FUNC_ABORT_TASK:
+		return TMR_ABORT_TASK;
+	case ISCSI_TM_FUNC_ABORT_TASK_SET:
+		return TMR_ABORT_TASK_SET;
+	case ISCSI_TM_FUNC_CLEAR_ACA:
+		return TMR_CLEAR_ACA;
+	case ISCSI_TM_FUNC_CLEAR_TASK_SET:
+		return TMR_CLEAR_TASK_SET;
+	case ISCSI_TM_FUNC_LOGICAL_UNIT_RESET:
+		return TMR_LUN_RESET;
+	case ISCSI_TM_FUNC_TARGET_WARM_RESET:
+		return TMR_TARGET_WARM_RESET;
+	case ISCSI_TM_FUNC_TARGET_COLD_RESET:
+		return TMR_TARGET_COLD_RESET;
+	default:
+		return TMR_UNKNOWN;
+	}
+}
+
 int
 iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
 			   unsigned char *buf)
@@ -1985,30 +2007,8 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
 
 		target_get_sess_cmd(&cmd->se_cmd, true);
 		sess_ref = true;
-
-		switch (function) {
-		case ISCSI_TM_FUNC_ABORT_TASK:
-			tcm_function = TMR_ABORT_TASK;
-			break;
-		case ISCSI_TM_FUNC_ABORT_TASK_SET:
-			tcm_function = TMR_ABORT_TASK_SET;
-			break;
-		case ISCSI_TM_FUNC_CLEAR_ACA:
-			tcm_function = TMR_CLEAR_ACA;
-			break;
-		case ISCSI_TM_FUNC_CLEAR_TASK_SET:
-			tcm_function = TMR_CLEAR_TASK_SET;
-			break;
-		case ISCSI_TM_FUNC_LOGICAL_UNIT_RESET:
-			tcm_function = TMR_LUN_RESET;
-			break;
-		case ISCSI_TM_FUNC_TARGET_WARM_RESET:
-			tcm_function = TMR_TARGET_WARM_RESET;
-			break;
-		case ISCSI_TM_FUNC_TARGET_COLD_RESET:
-			tcm_function = TMR_TARGET_COLD_RESET;
-			break;
-		default:
+		tcm_function = iscsit_convert_tmf(function);
+		if (tcm_function == TMR_UNKNOWN) {
 			pr_err("Unknown iSCSI TMR Function:"
 			       " 0x%02x\n", function);
 			return iscsit_add_reject_cmd(cmd,
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index da854fb4530f1f..92d5628775c2f8 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -197,6 +197,7 @@ enum tcm_tmreq_table {
 	TMR_LUN_RESET		= 5,
 	TMR_TARGET_WARM_RESET	= 6,
 	TMR_TARGET_COLD_RESET	= 7,
+	TMR_UNKNOWN		= 0xff,
 };
 
 /* fabric independent task management response values */

From 59b6986dbfcdab96a971f9663221849de79a7556 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Thu, 5 Jan 2017 12:39:57 +0100
Subject: [PATCH 0080/1911] target/iscsi: Fix iSCSI task reassignment handling

Allocate a task management request structure for all task management
requests, including task reassignment. This change avoids that the
se_tmr->response assignment dereferences an uninitialized se_tmr
pointer.

Reported-by: Moshe David <mdavid@infinidat.com>
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Moshe David <mdavid@infinidat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/iscsi/iscsi_target.c | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
index d16729fa1f67fa..b4f1d1cbe521a4 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -1951,7 +1951,7 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
 	struct iscsi_tm *hdr;
 	int out_of_order_cmdsn = 0, ret;
 	bool sess_ref = false;
-	u8 function;
+	u8 function, tcm_function = TMR_UNKNOWN;
 
 	hdr			= (struct iscsi_tm *) buf;
 	hdr->flags &= ~ISCSI_FLAG_CMD_FINAL;
@@ -1997,10 +1997,6 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
 	 * LIO-Target $FABRIC_MOD
 	 */
 	if (function != ISCSI_TM_FUNC_TASK_REASSIGN) {
-
-		u8 tcm_function;
-		int ret;
-
 		transport_init_se_cmd(&cmd->se_cmd, &iscsi_ops,
 				      conn->sess->se_sess, 0, DMA_NONE,
 				      TCM_SIMPLE_TAG, cmd->sense_buffer + 2);
@@ -2014,15 +2010,14 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
 			return iscsit_add_reject_cmd(cmd,
 				ISCSI_REASON_BOOKMARK_NO_RESOURCES, buf);
 		}
-
-		ret = core_tmr_alloc_req(&cmd->se_cmd, cmd->tmr_req,
-					 tcm_function, GFP_KERNEL);
-		if (ret < 0)
-			return iscsit_add_reject_cmd(cmd,
+	}
+	ret = core_tmr_alloc_req(&cmd->se_cmd, cmd->tmr_req, tcm_function,
+				 GFP_KERNEL);
+	if (ret < 0)
+		return iscsit_add_reject_cmd(cmd,
 				ISCSI_REASON_BOOKMARK_NO_RESOURCES, buf);
 
-		cmd->tmr_req->se_tmr_req = cmd->se_cmd.se_tmr_req;
-	}
+	cmd->tmr_req->se_tmr_req = cmd->se_cmd.se_tmr_req;
 
 	cmd->iscsi_opcode	= ISCSI_OP_SCSI_TMFUNC;
 	cmd->i_state		= ISTATE_SEND_TASKMGTRSP;

From 4f4c6c3ddb15af396cd8cf81b487b59261199ee9 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Fri, 23 Dec 2016 13:23:45 +0100
Subject: [PATCH 0081/1911] target: Remove se_tmr_req.tmr_lun

Member tmr_lun of se_tmr_req is set but not used. Hence remove it.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Cc: Giridhar Malavali <giridhar.malavali@cavium.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_device.c | 1 -
 include/target/target_core_base.h   | 1 -
 2 files changed, 2 deletions(-)

diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c
index 26929c44d70316..cb7047d66afcd1 100644
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c
@@ -163,7 +163,6 @@ int transport_lookup_tmr_lun(struct se_cmd *se_cmd, u64 unpacked_lun)
 	rcu_read_lock();
 	deve = target_nacl_find_deve(nacl, unpacked_lun);
 	if (deve) {
-		se_tmr->tmr_lun = rcu_dereference(deve->se_lun);
 		se_cmd->se_lun = rcu_dereference(deve->se_lun);
 		se_lun = rcu_dereference(deve->se_lun);
 		se_cmd->pr_res_key = deve->pr_res_key;
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index 92d5628775c2f8..c8b06e226f2e22 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -398,7 +398,6 @@ struct se_tmr_req {
 	void 			*fabric_tmr_ptr;
 	struct se_cmd		*task_cmd;
 	struct se_device	*tmr_dev;
-	struct se_lun		*tmr_lun;
 	struct list_head	tmr_list;
 };
 

From 3a1e7ca64f2dae69536054c42b7b90e480db1045 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Fri, 23 Dec 2016 13:47:38 +0100
Subject: [PATCH 0082/1911] target: Make core_tmr_abort_task() consider all
 commands

It is possible that two commands with the same tag are present on
sess_cmd_list because commands are removed from sess_cmd_list after
a response has been sent to the initiator. Hence continue searching
through sess_cmd_list even if a matching tag has already been found.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Himanshu Madhani <himanshu.madhani@cavium.com>
Cc: Giridhar Malavali <giridhar.malavali@cavium.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_tmr.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c
index 4f229e711e1c1c..311dc3c2f1dc5c 100644
--- a/drivers/target/target_core_tmr.c
+++ b/drivers/target/target_core_tmr.c
@@ -175,10 +175,9 @@ void core_tmr_abort_task(
 		printk("ABORT_TASK: Found referenced %s task_tag: %llu\n",
 			se_cmd->se_tfo->get_fabric_name(), ref_tag);
 
-		if (!__target_check_io_state(se_cmd, se_sess, 0)) {
-			spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
-			goto out;
-		}
+		if (!__target_check_io_state(se_cmd, se_sess, 0))
+			continue;
+
 		list_del_init(&se_cmd->se_cmd_list);
 		spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
 
@@ -195,7 +194,6 @@ void core_tmr_abort_task(
 	}
 	spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
 
-out:
 	printk("ABORT_TASK: Sending TMR_TASK_DOES_NOT_EXIST for ref_tag: %lld\n",
 			tmr->ref_task_tag);
 	tmr->response = TMR_TASK_DOES_NOT_EXIST;

From 2d4760ee4720d5f0eeda770ce248244166676283 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Tue, 10 Jan 2017 10:03:28 -0800
Subject: [PATCH 0083/1911] target: Correct transport_wait_for_tasks()
 documentation

transport_wait_for_tasks() not only waits for command completion
but also sets CMD_T_STOP. Additionally, this function is not only
called by frontend drivers but also by the target core. Update
the transport_wait_for_tasks() documentation to reflect this.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Andy Grover <agrover@redhat.com>
Cc: David Disseldorp <ddiss@suse.de>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_transport.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 437591bc7c0855..62a50ea52a49ab 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -2765,11 +2765,8 @@ __transport_wait_for_tasks(struct se_cmd *cmd, bool fabric_stop,
 }
 
 /**
- * transport_wait_for_tasks - wait for completion to occur
- * @cmd:	command to wait
- *
- * Called from frontend fabric context to wait for storage engine
- * to pause and/or release frontend generated struct se_cmd.
+ * transport_wait_for_tasks - set CMD_T_STOP and wait for t_transport_stop_comp
+ * @cmd: command to wait on
  */
 bool transport_wait_for_tasks(struct se_cmd *cmd)
 {

From a5eb307f37d1e9909284b6f0a8f72bf55423c137 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Mon, 9 Jan 2017 10:14:54 -0800
Subject: [PATCH 0084/1911] target: Stop execution if CMD_T_STOP has been set

Stop execution if CMD_T_STOP has been set for a command just after
the command has been added to the device command list and before
.write_pending() is called. The following sequence can trigger this:
- transport_handle_cdb_direct() gets called. This function namely
  sets CMD_T_ACTIVE before it calls transport_generic_new_cmd().
- __transport_wait_for_tasks() is called concurrently. This function
  sets CMD_T_STOP for all active commands that have not been aborted.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Andy Grover <agrover@redhat.com>
Cc: David Disseldorp <ddiss@suse.de>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_transport.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 62a50ea52a49ab..2d867af1961fc4 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -2452,7 +2452,8 @@ transport_generic_new_cmd(struct se_cmd *cmd)
 		target_execute_cmd(cmd);
 		return 0;
 	}
-	transport_cmd_check_stop(cmd, false, true);
+	if (transport_cmd_check_stop(cmd, false, true))
+		return 0;
 
 	ret = cmd->se_tfo->write_pending(cmd);
 	if (ret == -EAGAIN || ret == -ENOMEM)

From 580ab13ac445a2bb717bee995d8639ec6836f78c Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Sun, 27 Dec 2015 13:48:09 +0100
Subject: [PATCH 0085/1911] target: Remove an overly chatty debug message

Enabling dynamic debug for the target_core_mod kernel module
causes the system log to be spammed with the "Incremented ..."
message. Hence remove it.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Andy Grover <agrover@redhat.com>
Cc: David Disseldorp <ddiss@suse.de>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_transport.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 2d867af1961fc4..738ab107ec120e 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -1979,8 +1979,6 @@ static void transport_complete_task_attr(struct se_cmd *cmd)
 	if (cmd->sam_task_attr == TCM_SIMPLE_TAG) {
 		atomic_dec_mb(&dev->simple_cmds);
 		dev->dev_cur_ordered_id++;
-		pr_debug("Incremented dev->dev_cur_ordered_id: %u for SIMPLE\n",
-			 dev->dev_cur_ordered_id);
 	} else if (cmd->sam_task_attr == TCM_HEAD_TAG) {
 		dev->dev_cur_ordered_id++;
 		pr_debug("Incremented dev_cur_ordered_id: %u for HEAD_OF_QUEUE\n",

From b1a2ecdad72e0a4f0ebdbaff580b5330ebcf68c3 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Wed, 1 Feb 2017 16:58:35 -0800
Subject: [PATCH 0086/1911] target: Inline transport_cmd_check_stop()

The function transport_cmd_check_stop() has two callers. These callers
invoke this function as follows:
* transport_cmd_check_stop(cmd, true, false)
* transport_cmd_check_stop(cmd, false, true)
Hence inline this function into its callers.

This patch does not change any functionality but improves source
code readability.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Bryant G. Ly <bryantly@linux.vnet.ibm.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Andy Grover <agrover@redhat.com>
Cc: David Disseldorp <ddiss@suse.de>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_transport.c | 68 +++++++++++++-------------
 include/target/target_core_fabric.h    |  2 +-
 2 files changed, 34 insertions(+), 36 deletions(-)

diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 738ab107ec120e..6f66a2d890b872 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -604,24 +604,18 @@ static void target_remove_from_state_list(struct se_cmd *cmd)
 	spin_unlock_irqrestore(&dev->execute_task_lock, flags);
 }
 
-static int transport_cmd_check_stop(struct se_cmd *cmd, bool remove_from_lists,
-				    bool write_pending)
+static int transport_cmd_check_stop_to_fabric(struct se_cmd *cmd)
 {
 	unsigned long flags;
 
-	if (remove_from_lists) {
-		target_remove_from_state_list(cmd);
+	target_remove_from_state_list(cmd);
 
-		/*
-		 * Clear struct se_cmd->se_lun before the handoff to FE.
-		 */
-		cmd->se_lun = NULL;
-	}
+	/*
+	 * Clear struct se_cmd->se_lun before the handoff to FE.
+	 */
+	cmd->se_lun = NULL;
 
 	spin_lock_irqsave(&cmd->t_state_lock, flags);
-	if (write_pending)
-		cmd->t_state = TRANSPORT_WRITE_PENDING;
-
 	/*
 	 * Determine if frontend context caller is requesting the stopping of
 	 * this command for frontend exceptions.
@@ -635,31 +629,18 @@ static int transport_cmd_check_stop(struct se_cmd *cmd, bool remove_from_lists,
 		complete_all(&cmd->t_transport_stop_comp);
 		return 1;
 	}
-
 	cmd->transport_state &= ~CMD_T_ACTIVE;
-	if (remove_from_lists) {
-		/*
-		 * Some fabric modules like tcm_loop can release
-		 * their internally allocated I/O reference now and
-		 * struct se_cmd now.
-		 *
-		 * Fabric modules are expected to return '1' here if the
-		 * se_cmd being passed is released at this point,
-		 * or zero if not being released.
-		 */
-		if (cmd->se_tfo->check_stop_free != NULL) {
-			spin_unlock_irqrestore(&cmd->t_state_lock, flags);
-			return cmd->se_tfo->check_stop_free(cmd);
-		}
-	}
-
 	spin_unlock_irqrestore(&cmd->t_state_lock, flags);
-	return 0;
-}
 
-static int transport_cmd_check_stop_to_fabric(struct se_cmd *cmd)
-{
-	return transport_cmd_check_stop(cmd, true, false);
+	/*
+	 * Some fabric modules like tcm_loop can release their internally
+	 * allocated I/O reference and struct se_cmd now.
+	 *
+	 * Fabric modules are expected to return '1' here if the se_cmd being
+	 * passed is released at this point, or zero if not being released.
+	 */
+	return cmd->se_tfo->check_stop_free ? cmd->se_tfo->check_stop_free(cmd)
+		: 0;
 }
 
 static void transport_lun_remove_cmd(struct se_cmd *cmd)
@@ -2385,6 +2366,7 @@ EXPORT_SYMBOL(target_alloc_sgl);
 sense_reason_t
 transport_generic_new_cmd(struct se_cmd *cmd)
 {
+	unsigned long flags;
 	int ret = 0;
 	bool zero_flag = !(cmd->se_cmd_flags & SCF_SCSI_DATA_CDB);
 
@@ -2450,8 +2432,24 @@ transport_generic_new_cmd(struct se_cmd *cmd)
 		target_execute_cmd(cmd);
 		return 0;
 	}
-	if (transport_cmd_check_stop(cmd, false, true))
+
+	spin_lock_irqsave(&cmd->t_state_lock, flags);
+	cmd->t_state = TRANSPORT_WRITE_PENDING;
+	/*
+	 * Determine if frontend context caller is requesting the stopping of
+	 * this command for frontend exceptions.
+	 */
+	if (cmd->transport_state & CMD_T_STOP) {
+		pr_debug("%s:%d CMD_T_STOP for ITT: 0x%08llx\n",
+			 __func__, __LINE__, cmd->tag);
+
+		spin_unlock_irqrestore(&cmd->t_state_lock, flags);
+
+		complete_all(&cmd->t_transport_stop_comp);
 		return 0;
+	}
+	cmd->transport_state &= ~CMD_T_ACTIVE;
+	spin_unlock_irqrestore(&cmd->t_state_lock, flags);
 
 	ret = cmd->se_tfo->write_pending(cmd);
 	if (ret == -EAGAIN || ret == -ENOMEM)
diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h
index 358041bad1da03..d7dd1427fe0de9 100644
--- a/include/target/target_core_fabric.h
+++ b/include/target/target_core_fabric.h
@@ -47,7 +47,7 @@ struct target_core_fabric_ops {
 	u32 (*tpg_get_inst_index)(struct se_portal_group *);
 	/*
 	 * Optional to release struct se_cmd and fabric dependent allocated
-	 * I/O descriptor in transport_cmd_check_stop().
+	 * I/O descriptor after command execution has finished.
 	 *
 	 * Returning 1 will signal a descriptor has been released.
 	 * Returning 0 will signal a descriptor has not been released.

From 6b6427b6fd90ba1ff13e540bf7276f30a3e6f74f Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Tue, 7 Feb 2017 12:19:16 -0800
Subject: [PATCH 0087/1911] target: Move session check from
 target_put_sess_cmd() into target_release_cmd_kref()

This patch does not change any functionality.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: David Disseldorp <ddiss@suse.de>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_transport.c | 39 +++++++++++++-------------
 1 file changed, 19 insertions(+), 20 deletions(-)

diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 6f66a2d890b872..e949db12c4d88c 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -2592,39 +2592,38 @@ static void target_release_cmd_kref(struct kref *kref)
 	unsigned long flags;
 	bool fabric_stop;
 
-	spin_lock_irqsave(&se_sess->sess_cmd_lock, flags);
+	if (se_sess) {
+		spin_lock_irqsave(&se_sess->sess_cmd_lock, flags);
 
-	spin_lock(&se_cmd->t_state_lock);
-	fabric_stop = (se_cmd->transport_state & CMD_T_FABRIC_STOP) &&
-		      (se_cmd->transport_state & CMD_T_ABORTED);
-	spin_unlock(&se_cmd->t_state_lock);
+		spin_lock(&se_cmd->t_state_lock);
+		fabric_stop = (se_cmd->transport_state & CMD_T_FABRIC_STOP) &&
+			      (se_cmd->transport_state & CMD_T_ABORTED);
+		spin_unlock(&se_cmd->t_state_lock);
 
-	if (se_cmd->cmd_wait_set || fabric_stop) {
+		if (se_cmd->cmd_wait_set || fabric_stop) {
+			list_del_init(&se_cmd->se_cmd_list);
+			spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
+			target_free_cmd_mem(se_cmd);
+			complete(&se_cmd->cmd_wait_comp);
+			return;
+		}
 		list_del_init(&se_cmd->se_cmd_list);
 		spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
-		target_free_cmd_mem(se_cmd);
-		complete(&se_cmd->cmd_wait_comp);
-		return;
 	}
-	list_del_init(&se_cmd->se_cmd_list);
-	spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
 
 	target_free_cmd_mem(se_cmd);
 	se_cmd->se_tfo->release_cmd(se_cmd);
 }
 
-/* target_put_sess_cmd - Check for active I/O shutdown via kref_put
- * @se_cmd:	command descriptor to drop
+/**
+ * target_put_sess_cmd - decrease the command reference count
+ * @se_cmd:	command to drop a reference from
+ *
+ * Returns 1 if and only if this target_put_sess_cmd() call caused the
+ * refcount to drop to zero. Returns zero otherwise.
  */
 int target_put_sess_cmd(struct se_cmd *se_cmd)
 {
-	struct se_session *se_sess = se_cmd->se_sess;
-
-	if (!se_sess) {
-		target_free_cmd_mem(se_cmd);
-		se_cmd->se_tfo->release_cmd(se_cmd);
-		return 1;
-	}
 	return kref_put(&se_cmd->cmd_kref, target_release_cmd_kref);
 }
 EXPORT_SYMBOL(target_put_sess_cmd);

From fd5e64def9170392fdf258d1eee5966433ee3d45 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Tue, 3 Jan 2017 10:44:11 +0100
Subject: [PATCH 0088/1911] target: Remove command flag CMD_T_BUSY

The patch that reworks task management function handling guarantees
that target_remove_from_state_list() is always called with CMD_T_BUSY
cleared. Since that function is the only function that tests that flag
this means that that flag is now superfluous. Hence remove that flag.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Andy Grover <agrover@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_sbc.c       |  2 +-
 drivers/target/target_core_transport.c | 12 ++++--------
 include/target/target_core_base.h      |  1 -
 3 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c
index df7b6e95c019dd..68d8aef7ab78d4 100644
--- a/drivers/target/target_core_sbc.c
+++ b/drivers/target/target_core_sbc.c
@@ -604,7 +604,7 @@ static sense_reason_t compare_and_write_callback(struct se_cmd *cmd, bool succes
 
 	spin_lock_irq(&cmd->t_state_lock);
 	cmd->t_state = TRANSPORT_PROCESSING;
-	cmd->transport_state |= CMD_T_ACTIVE|CMD_T_BUSY|CMD_T_SENT;
+	cmd->transport_state |= CMD_T_ACTIVE | CMD_T_SENT;
 	spin_unlock_irq(&cmd->t_state_lock);
 
 	__target_execute_cmd(cmd, false);
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index e949db12c4d88c..012ed95c3f40e0 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -593,9 +593,6 @@ static void target_remove_from_state_list(struct se_cmd *cmd)
 	if (!dev)
 		return;
 
-	if (cmd->transport_state & CMD_T_BUSY)
-		return;
-
 	spin_lock_irqsave(&dev->execute_task_lock, flags);
 	if (cmd->state_active) {
 		list_del(&cmd->state_list);
@@ -714,7 +711,6 @@ void target_complete_cmd(struct se_cmd *cmd, u8 scsi_status)
 
 
 	spin_lock_irqsave(&cmd->t_state_lock, flags);
-	cmd->transport_state &= ~CMD_T_BUSY;
 
 	if (dev && dev->transport->transport_complete) {
 		dev->transport->transport_complete(cmd,
@@ -1782,7 +1778,7 @@ void __target_execute_cmd(struct se_cmd *cmd, bool do_checks)
 		return;
 err:
 	spin_lock_irq(&cmd->t_state_lock);
-	cmd->transport_state &= ~(CMD_T_BUSY|CMD_T_SENT);
+	cmd->transport_state &= ~CMD_T_SENT;
 	spin_unlock_irq(&cmd->t_state_lock);
 
 	transport_generic_request_failure(cmd, ret);
@@ -1810,7 +1806,7 @@ static int target_write_prot_action(struct se_cmd *cmd)
 					     sectors, 0, cmd->t_prot_sg, 0);
 		if (unlikely(cmd->pi_err)) {
 			spin_lock_irq(&cmd->t_state_lock);
-			cmd->transport_state &= ~(CMD_T_BUSY|CMD_T_SENT);
+			cmd->transport_state &= ~CMD_T_SENT;
 			spin_unlock_irq(&cmd->t_state_lock);
 			transport_generic_request_failure(cmd, cmd->pi_err);
 			return -1;
@@ -1899,7 +1895,7 @@ void target_execute_cmd(struct se_cmd *cmd)
 	}
 
 	cmd->t_state = TRANSPORT_PROCESSING;
-	cmd->transport_state |= CMD_T_ACTIVE|CMD_T_BUSY|CMD_T_SENT;
+	cmd->transport_state |= CMD_T_ACTIVE | CMD_T_SENT;
 	spin_unlock_irq(&cmd->t_state_lock);
 
 	if (target_write_prot_action(cmd))
@@ -1907,7 +1903,7 @@ void target_execute_cmd(struct se_cmd *cmd)
 
 	if (target_handle_task_attr(cmd)) {
 		spin_lock_irq(&cmd->t_state_lock);
-		cmd->transport_state &= ~(CMD_T_BUSY | CMD_T_SENT);
+		cmd->transport_state &= ~CMD_T_SENT;
 		spin_unlock_irq(&cmd->t_state_lock);
 		return;
 	}
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index c8b06e226f2e22..7b350fd60cdbe0 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -489,7 +489,6 @@ struct se_cmd {
 #define CMD_T_SENT		(1 << 4)
 #define CMD_T_STOP		(1 << 5)
 #define CMD_T_DEV_ACTIVE	(1 << 7)
-#define CMD_T_BUSY		(1 << 9)
 #define CMD_T_TAS		(1 << 10)
 #define CMD_T_FABRIC_STOP	(1 << 11)
 	spinlock_t		t_state_lock;

From b2c9652eba6c11787b5f6cfa53aaea752f58809e Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Fri, 5 Jun 2015 16:17:51 -0700
Subject: [PATCH 0089/1911] target: Remove command flag CMD_T_DEV_ACTIVE

The code that tests the CMD_T_DEV_ACTIVE flag has been removed. Hence
also remove the flag itself.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Andy Grover <agrover@redhat.com>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_transport.c | 1 -
 include/target/target_core_base.h      | 1 -
 2 files changed, 2 deletions(-)

diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 012ed95c3f40e0..22190003534dbc 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -1223,7 +1223,6 @@ void transport_init_se_cmd(
 	init_completion(&cmd->cmd_wait_comp);
 	spin_lock_init(&cmd->t_state_lock);
 	kref_init(&cmd->cmd_kref);
-	cmd->transport_state = CMD_T_DEV_ACTIVE;
 
 	cmd->se_tfo = tfo;
 	cmd->se_sess = se_sess;
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index 7b350fd60cdbe0..d7336f3c6b600a 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -488,7 +488,6 @@ struct se_cmd {
 #define CMD_T_COMPLETE		(1 << 2)
 #define CMD_T_SENT		(1 << 4)
 #define CMD_T_STOP		(1 << 5)
-#define CMD_T_DEV_ACTIVE	(1 << 7)
 #define CMD_T_TAS		(1 << 10)
 #define CMD_T_FABRIC_STOP	(1 << 11)
 	spinlock_t		t_state_lock;

From c282222a45cb9503cbfbebfdb60491f06ae84b49 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 8 Feb 2017 11:52:29 +0100
Subject: [PATCH 0090/1911] xfrm: policy: init locks early

Dmitry reports following splat:
 INFO: trying to register non-static key.
 the code is fine but needs lockdep annotation.
 turning off the locking correctness validator.
 CPU: 0 PID: 13059 Comm: syz-executor1 Not tainted 4.10.0-rc7-next-20170207 #1
[..]
 spin_lock_bh include/linux/spinlock.h:304 [inline]
 xfrm_policy_flush+0x32/0x470 net/xfrm/xfrm_policy.c:963
 xfrm_policy_fini+0xbf/0x560 net/xfrm/xfrm_policy.c:3041
 xfrm_net_init+0x79f/0x9e0 net/xfrm/xfrm_policy.c:3091
 ops_init+0x10a/0x530 net/core/net_namespace.c:115
 setup_net+0x2ed/0x690 net/core/net_namespace.c:291
 copy_net_ns+0x26c/0x530 net/core/net_namespace.c:396
 create_new_namespaces+0x409/0x860 kernel/nsproxy.c:106
 unshare_nsproxy_namespaces+0xae/0x1e0 kernel/nsproxy.c:205
 SYSC_unshare kernel/fork.c:2281 [inline]

Problem is that when we get error during xfrm_net_init we will call
xfrm_policy_fini which will acquire xfrm_policy_lock before it was
initialized.  Just move it around so locks get set up first.

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Fixes: 283bc9f35bbbcb0e9 ("xfrm: Namespacify xfrm state/policy locks")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_policy.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 177e208e8ff509..3c8f5b70abf874 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -3062,6 +3062,11 @@ static int __net_init xfrm_net_init(struct net *net)
 {
 	int rv;
 
+	/* Initialize the per-net locks here */
+	spin_lock_init(&net->xfrm.xfrm_state_lock);
+	spin_lock_init(&net->xfrm.xfrm_policy_lock);
+	mutex_init(&net->xfrm.xfrm_cfg_mutex);
+
 	rv = xfrm_statistics_init(net);
 	if (rv < 0)
 		goto out_statistics;
@@ -3078,11 +3083,6 @@ static int __net_init xfrm_net_init(struct net *net)
 	if (rv < 0)
 		goto out;
 
-	/* Initialize the per-net locks here */
-	spin_lock_init(&net->xfrm.xfrm_state_lock);
-	spin_lock_init(&net->xfrm.xfrm_policy_lock);
-	mutex_init(&net->xfrm.xfrm_cfg_mutex);
-
 	return 0;
 
 out:

From ca8d8e03b4c9ad447d1e882cc8014e538f653018 Mon Sep 17 00:00:00 2001
From: William Breathitt Gray <vilhelm.gray@gmail.com>
Date: Thu, 9 Feb 2017 10:03:41 -0500
Subject: [PATCH 0091/1911] iio: 104-quad-8: Fix off-by-one error when
 addressing flag register

The flag register is offset by 1 from the respective channel data
register. This patch fixes an off-by-one error when attempting to read a
channel flag register where the base address was not properly offset.

Fixes: 28e5d3bb0325 ("iio: 104-quad-8: Add IIO support for the ACCES 104-QUAD-8")
Signed-off-by: William Breathitt Gray <vilhelm.gray@gmail.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/counter/104-quad-8.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iio/counter/104-quad-8.c b/drivers/iio/counter/104-quad-8.c
index a5913e97945eb6..f9b8fc9ae13fc7 100644
--- a/drivers/iio/counter/104-quad-8.c
+++ b/drivers/iio/counter/104-quad-8.c
@@ -76,7 +76,7 @@ static int quad8_read_raw(struct iio_dev *indio_dev,
 			return IIO_VAL_INT;
 		}
 
-		flags = inb(base_offset);
+		flags = inb(base_offset + 1);
 		borrow = flags & BIT(0);
 		carry = !!(flags & BIT(1));
 

From 7e91c7df29b5e196de3dc6f086c8937973bd0b88 Mon Sep 17 00:00:00 2001
From: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Date: Tue, 7 Feb 2017 19:58:02 +0200
Subject: [PATCH 0092/1911] swiotlb-xen: implement xen_swiotlb_dma_mmap
 callback

This function creates userspace mapping for the DMA-coherent memory.

Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Signed-off-by: Oleksandr Dmytryshyn <oleksandr.dmytryshyn@globallogic.com>
Signed-off-by: Andrii Anisov <andrii_anisov@epam.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad@kernel.org>
---
 arch/arm/xen/mm.c         |  1 +
 drivers/xen/swiotlb-xen.c | 19 +++++++++++++++++++
 include/xen/swiotlb-xen.h |  5 +++++
 3 files changed, 25 insertions(+)

diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
index bd62d94f8ac5b7..cd1684e4e864be 100644
--- a/arch/arm/xen/mm.c
+++ b/arch/arm/xen/mm.c
@@ -198,6 +198,7 @@ static struct dma_map_ops xen_swiotlb_dma_ops = {
 	.unmap_page = xen_swiotlb_unmap_page,
 	.dma_supported = xen_swiotlb_dma_supported,
 	.set_dma_mask = xen_swiotlb_set_dma_mask,
+	.mmap = xen_swiotlb_dma_mmap,
 };
 
 int __init xen_mm_init(void)
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index f905d6eeb0482e..c7f61fc0572a31 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -680,3 +680,22 @@ xen_swiotlb_set_dma_mask(struct device *dev, u64 dma_mask)
 	return 0;
 }
 EXPORT_SYMBOL_GPL(xen_swiotlb_set_dma_mask);
+
+/*
+ * Create userspace mapping for the DMA-coherent memory.
+ * This function should be called with the pages from the current domain only,
+ * passing pages mapped from other domains would lead to memory corruption.
+ */
+int
+xen_swiotlb_dma_mmap(struct device *dev, struct vm_area_struct *vma,
+		     void *cpu_addr, dma_addr_t dma_addr, size_t size,
+		     unsigned long attrs)
+{
+#if defined(CONFIG_ARM) || defined(CONFIG_ARM64)
+	if (__generic_dma_ops(dev)->mmap)
+		return __generic_dma_ops(dev)->mmap(dev, vma, cpu_addr,
+						    dma_addr, size, attrs);
+#endif
+	return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size);
+}
+EXPORT_SYMBOL_GPL(xen_swiotlb_dma_mmap);
diff --git a/include/xen/swiotlb-xen.h b/include/xen/swiotlb-xen.h
index a0083be5d52951..a315c876aaa9ca 100644
--- a/include/xen/swiotlb-xen.h
+++ b/include/xen/swiotlb-xen.h
@@ -55,4 +55,9 @@ xen_swiotlb_dma_supported(struct device *hwdev, u64 mask);
 
 extern int
 xen_swiotlb_set_dma_mask(struct device *dev, u64 dma_mask);
+
+extern int
+xen_swiotlb_dma_mmap(struct device *dev, struct vm_area_struct *vma,
+		     void *cpu_addr, dma_addr_t dma_addr, size_t size,
+		     unsigned long attrs);
 #endif /* __LINUX_SWIOTLB_XEN_H */

From 69369f52d28a34c84acb6f2a8a585e743441566a Mon Sep 17 00:00:00 2001
From: Andrii Anisov <andrii_anisov@epam.com>
Date: Tue, 7 Feb 2017 19:58:03 +0200
Subject: [PATCH 0093/1911] swiotlb-xen: implement xen_swiotlb_get_sgtable
 callback

Signed-off-by: Andrii Anisov <andrii_anisov@epam.com>
Signed-off-by: Stefano Stabellini <sstabellini@kernel.org>
Signed-off-by: Konrad Rzeszutek Wilk <konrad@kernel.org>
---
 arch/arm/xen/mm.c         |  1 +
 drivers/xen/swiotlb-xen.c | 28 ++++++++++++++++++++++++++++
 include/xen/swiotlb-xen.h |  6 ++++++
 3 files changed, 35 insertions(+)

diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
index cd1684e4e864be..76ea48a614e163 100644
--- a/arch/arm/xen/mm.c
+++ b/arch/arm/xen/mm.c
@@ -199,6 +199,7 @@ static struct dma_map_ops xen_swiotlb_dma_ops = {
 	.dma_supported = xen_swiotlb_dma_supported,
 	.set_dma_mask = xen_swiotlb_set_dma_mask,
 	.mmap = xen_swiotlb_dma_mmap,
+	.get_sgtable = xen_swiotlb_get_sgtable,
 };
 
 int __init xen_mm_init(void)
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index c7f61fc0572a31..23e30b4e1fb600 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -699,3 +699,31 @@ xen_swiotlb_dma_mmap(struct device *dev, struct vm_area_struct *vma,
 	return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size);
 }
 EXPORT_SYMBOL_GPL(xen_swiotlb_dma_mmap);
+
+/*
+ * This function should be called with the pages from the current domain only,
+ * passing pages mapped from other domains would lead to memory corruption.
+ */
+int
+xen_swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt,
+			void *cpu_addr, dma_addr_t handle, size_t size,
+			unsigned long attrs)
+{
+#if defined(CONFIG_ARM) || defined(CONFIG_ARM64)
+	if (__generic_dma_ops(dev)->get_sgtable) {
+#if 0
+	/*
+	 * This check verifies that the page belongs to the current domain and
+	 * is not one mapped from another domain.
+	 * This check is for debug only, and should not go to production build
+	 */
+		unsigned long bfn = PHYS_PFN(dma_to_phys(dev, handle));
+		BUG_ON (!page_is_ram(bfn));
+#endif
+		return __generic_dma_ops(dev)->get_sgtable(dev, sgt, cpu_addr,
+							   handle, size, attrs);
+	}
+#endif
+	return dma_common_get_sgtable(dev, sgt, cpu_addr, handle, size);
+}
+EXPORT_SYMBOL_GPL(xen_swiotlb_get_sgtable);
diff --git a/include/xen/swiotlb-xen.h b/include/xen/swiotlb-xen.h
index a315c876aaa9ca..1f6d78f044b671 100644
--- a/include/xen/swiotlb-xen.h
+++ b/include/xen/swiotlb-xen.h
@@ -2,6 +2,7 @@
 #define __LINUX_SWIOTLB_XEN_H
 
 #include <linux/dma-direction.h>
+#include <linux/scatterlist.h>
 #include <linux/swiotlb.h>
 
 extern int xen_swiotlb_init(int verbose, bool early);
@@ -60,4 +61,9 @@ extern int
 xen_swiotlb_dma_mmap(struct device *dev, struct vm_area_struct *vma,
 		     void *cpu_addr, dma_addr_t dma_addr, size_t size,
 		     unsigned long attrs);
+
+extern int
+xen_swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt,
+			void *cpu_addr, dma_addr_t handle, size_t size,
+			unsigned long attrs);
 #endif /* __LINUX_SWIOTLB_XEN_H */

From 4c86d77743a54fb2d8a4d18a037a074c892bb3be Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Tue, 14 Feb 2017 07:43:56 +0100
Subject: [PATCH 0094/1911] xfrm: Don't use sk_family for socket policy lookups

On IPv4-mapped IPv6 addresses sk_family is AF_INET6,
but the flow informations are created based on AF_INET.
So the routing set up 'struct flowi4' but we try to
access 'struct flowi6' what leads to an out of bounds
access. Fix this by using the family we get with the
dst_entry, like we do it for the standard policy lookup.

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Tested-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_policy.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 3c8f5b70abf874..a9af17f0fce669 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1248,7 +1248,7 @@ static inline int policy_to_flow_dir(int dir)
 }
 
 static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
-						 const struct flowi *fl)
+						 const struct flowi *fl, u16 family)
 {
 	struct xfrm_policy *pol;
 
@@ -1256,8 +1256,7 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
  again:
 	pol = rcu_dereference(sk->sk_policy[dir]);
 	if (pol != NULL) {
-		bool match = xfrm_selector_match(&pol->selector, fl,
-						 sk->sk_family);
+		bool match = xfrm_selector_match(&pol->selector, fl, family);
 		int err = 0;
 
 		if (match) {
@@ -2253,7 +2252,7 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
 	sk = sk_const_to_full_sk(sk);
 	if (sk && sk->sk_policy[XFRM_POLICY_OUT]) {
 		num_pols = 1;
-		pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
+		pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl, family);
 		err = xfrm_expand_policies(fl, family, pols,
 					   &num_pols, &num_xfrms);
 		if (err < 0)
@@ -2532,7 +2531,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 	pol = NULL;
 	sk = sk_to_full_sk(sk);
 	if (sk && sk->sk_policy[dir]) {
-		pol = xfrm_sk_policy_lookup(sk, dir, &fl);
+		pol = xfrm_sk_policy_lookup(sk, dir, &fl, family);
 		if (IS_ERR(pol)) {
 			XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
 			return 0;

From 179125085bd4ca70e8e028913193a93653bd12f7 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Tue, 14 Feb 2017 10:26:03 -0800
Subject: [PATCH 0095/1911] ARM: OMAP3: Fix smartreflex platform data
 regression

Commit d9d9cec02835 ("ARM: OMAP2+: Remove legacy data from hwmod for
omap3") dropped platform data that should no longer be used as we're
booting with device tree. It turns out that smartreflex is still
using platform data and produces the following errors during probe:

smartreflex smartreflex.0: invalid resource
smartreflex smartreflex.0: omap_sr_probe: ioremap fail
smartreflex: probe of smartreflex.0 failed with error -22
smartreflex smartreflex.1: invalid resource
smartreflex smartreflex.1: omap_sr_probe: ioremap fail
smartreflex: probe of smartreflex.1 failed with error -22

Let's fix the regression by adding back the smartreflex hwmod data.
The long term is to update the smartreflex driver to use device tree
based probing.

Fixes: d9d9cec02835 ("ARM: OMAP2+: Remove legacy data from hwmod
for omap3")
Reported-by: Adam Ford <aford173@gmail.com>
Tested-by: Adam Ford <aford173@gmail.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/omap_hwmod_3xxx_data.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
index 56f917ec8621e8..507ff0795a8e25 100644
--- a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
@@ -2112,11 +2112,20 @@ static struct omap_hwmod_ocp_if omap3_l4_core__i2c3 = {
 };
 
 /* L4 CORE -> SR1 interface */
+static struct omap_hwmod_addr_space omap3_sr1_addr_space[] = {
+	{
+		.pa_start	= OMAP34XX_SR1_BASE,
+		.pa_end		= OMAP34XX_SR1_BASE + SZ_1K - 1,
+		.flags		= ADDR_TYPE_RT,
+	},
+	{ },
+};
 
 static struct omap_hwmod_ocp_if omap34xx_l4_core__sr1 = {
 	.master		= &omap3xxx_l4_core_hwmod,
 	.slave		= &omap34xx_sr1_hwmod,
 	.clk		= "sr_l4_ick",
+	.addr		= omap3_sr1_addr_space,
 	.user		= OCP_USER_MPU,
 };
 
@@ -2124,15 +2133,25 @@ static struct omap_hwmod_ocp_if omap36xx_l4_core__sr1 = {
 	.master		= &omap3xxx_l4_core_hwmod,
 	.slave		= &omap36xx_sr1_hwmod,
 	.clk		= "sr_l4_ick",
+	.addr		= omap3_sr1_addr_space,
 	.user		= OCP_USER_MPU,
 };
 
 /* L4 CORE -> SR1 interface */
+static struct omap_hwmod_addr_space omap3_sr2_addr_space[] = {
+	{
+		.pa_start	= OMAP34XX_SR2_BASE,
+		.pa_end		= OMAP34XX_SR2_BASE + SZ_1K - 1,
+		.flags		= ADDR_TYPE_RT,
+	},
+	{ },
+};
 
 static struct omap_hwmod_ocp_if omap34xx_l4_core__sr2 = {
 	.master		= &omap3xxx_l4_core_hwmod,
 	.slave		= &omap34xx_sr2_hwmod,
 	.clk		= "sr_l4_ick",
+	.addr		= omap3_sr2_addr_space,
 	.user		= OCP_USER_MPU,
 };
 
@@ -2140,6 +2159,7 @@ static struct omap_hwmod_ocp_if omap36xx_l4_core__sr2 = {
 	.master		= &omap3xxx_l4_core_hwmod,
 	.slave		= &omap36xx_sr2_hwmod,
 	.clk		= "sr_l4_ick",
+	.addr		= omap3_sr2_addr_space,
 	.user		= OCP_USER_MPU,
 };
 

From e3dc847a5f85b43ee2bfc8eae407a7e383483228 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Wed, 15 Feb 2017 11:38:58 +0100
Subject: [PATCH 0096/1911] vti6: Don't report path MTU below IPV6_MIN_MTU.

In vti6_xmit(), the check for IPV6_MIN_MTU before we
send a ICMPV6_PKT_TOOBIG message is missing. So we might
report a PMTU below 1280. Fix this by adding the required
check.

Fixes: ccd740cbc6e ("vti6: Add pmtu handling to vti6_xmit.")
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/ipv6/ip6_vti.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index d82042c8d8fd4b..9156d6b9eb24ba 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -484,11 +484,15 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
 	if (!skb->ignore_df && skb->len > mtu) {
 		skb_dst(skb)->ops->update_pmtu(dst, NULL, skb, mtu);
 
-		if (skb->protocol == htons(ETH_P_IPV6))
+		if (skb->protocol == htons(ETH_P_IPV6)) {
+			if (mtu < IPV6_MIN_MTU)
+				mtu = IPV6_MIN_MTU;
+
 			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
-		else
+		} else {
 			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
 				  htonl(mtu));
+		}
 
 		return -EMSGSIZE;
 	}

From 448c077eeb02240c430db2a2c3bf5285a4c65d66 Mon Sep 17 00:00:00 2001
From: Matthijs van Duin <matthijsvanduin@gmail.com>
Date: Thu, 16 Feb 2017 01:05:04 +0100
Subject: [PATCH 0097/1911] ARM: OMAP5 / DRA7: Fix HYP mode boot for thumb2
 build

'adr' yields a data-pointer, not a function-pointer.

Fixes: 999f934de195 ("ARM: omap5/dra7xx: Enable booting secondary
CPU in HYP mode")
Signed-off-by: Matthijs van Duin <matthijsvanduin@gmail.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/omap-headsmp.S | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/arm/mach-omap2/omap-headsmp.S b/arch/arm/mach-omap2/omap-headsmp.S
index fe36ce2734d47a..4c6f14cf92a82e 100644
--- a/arch/arm/mach-omap2/omap-headsmp.S
+++ b/arch/arm/mach-omap2/omap-headsmp.S
@@ -17,6 +17,7 @@
 
 #include <linux/linkage.h>
 #include <linux/init.h>
+#include <asm/assembler.h>
 
 #include "omap44xx.h"
 
@@ -66,7 +67,7 @@ wait_2:	ldr	r2, =AUX_CORE_BOOT0_PA	@ read from AuxCoreBoot0
 	cmp	r0, r4
 	bne	wait_2
 	ldr	r12, =API_HYP_ENTRY
-	adr	r0, hyp_boot
+	badr	r0, hyp_boot
 	smc	#0
 hyp_boot:
 	b	omap_secondary_startup

From f655e67ac8d797425abb0404d0878758f3f71c1a Mon Sep 17 00:00:00 2001
From: Zhenyu Wang <zhenyuw@linux.intel.com>
Date: Thu, 16 Feb 2017 14:10:01 +0800
Subject: [PATCH 0098/1911] drm/i915/gvt: Fix check error on opregion.c

As we switched to memremap for opregion, shouldn't use any __iomem
for that, and move to use memcpy instead.

This fixed static check errors for:

  CHECK   drivers/gpu/drm/i915//gvt/opregion.c
  drivers/gpu/drm/i915//gvt/opregion.c:142:31: warning: incorrect type in argument 1 (different address spaces)
  drivers/gpu/drm/i915//gvt/opregion.c:142:31:    expected void *addr
  drivers/gpu/drm/i915//gvt/opregion.c:142:31:    got void [noderef] <asn:2>*opregion_va
  drivers/gpu/drm/i915//gvt/opregion.c:160:35: warning: incorrect type in assignment (different address spaces)
  drivers/gpu/drm/i915//gvt/opregion.c:160:35:    expected void [noderef] <asn:2>*opregion_va
  drivers/gpu/drm/i915//gvt/opregion.c:160:35:    got void *

Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/gvt.h      | 2 +-
 drivers/gpu/drm/i915/gvt/opregion.c | 5 ++---
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
index e227caf5859ebd..5ee660077b0c0e 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.h
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@@ -203,7 +203,7 @@ struct intel_gvt_firmware {
 };
 
 struct intel_gvt_opregion {
-	void __iomem *opregion_va;
+	void *opregion_va;
 	u32 opregion_pa;
 };
 
diff --git a/drivers/gpu/drm/i915/gvt/opregion.c b/drivers/gpu/drm/i915/gvt/opregion.c
index d9fb41ab71198c..5d1caf9daba9bd 100644
--- a/drivers/gpu/drm/i915/gvt/opregion.c
+++ b/drivers/gpu/drm/i915/gvt/opregion.c
@@ -27,7 +27,6 @@
 
 static int init_vgpu_opregion(struct intel_vgpu *vgpu, u32 gpa)
 {
-	void __iomem *host_va = vgpu->gvt->opregion.opregion_va;
 	u8 *buf;
 	int i;
 
@@ -43,8 +42,8 @@ static int init_vgpu_opregion(struct intel_vgpu *vgpu, u32 gpa)
 	if (!vgpu_opregion(vgpu)->va)
 		return -ENOMEM;
 
-	memcpy_fromio(vgpu_opregion(vgpu)->va, host_va,
-			INTEL_GVT_OPREGION_SIZE);
+	memcpy(vgpu_opregion(vgpu)->va, vgpu->gvt->opregion.opregion_va,
+	       INTEL_GVT_OPREGION_SIZE);
 
 	for (i = 0; i < INTEL_GVT_OPREGION_PAGES; i++)
 		vgpu_opregion(vgpu)->gfn[i] = (gpa >> PAGE_SHIFT) + i;

From fd64be636708d808852c4c8c1efce0a0a51c24c5 Mon Sep 17 00:00:00 2001
From: Min He <min.he@intel.com>
Date: Fri, 17 Feb 2017 15:02:36 +0800
Subject: [PATCH 0099/1911] drm/i915/gvt: introduced failsafe mode into vgpu

New failsafe mode is introduced, when we detect guest not supporting
GVT-g.
In failsafe mode, we will ignore all the MMIO and cfg space read/write
from guest.

This patch can fix the issue that when guest kernel or graphics driver
version is too low, there will be a lot of kernel traces in host.

V5: rebased onto latest gvt-staging
V4: changed coding style by Zhenyu and Ping's advice
V3: modified coding style and error messages according to Zhenyu's comment
V2: 1) implemented MMIO/GTT/WP pages read/write logic; 2) used a unified
function to enter failsafe mode

Signed-off-by: Min He <min.he@intel.com>
Signed-off-by: Pei Zhang <pei.zhang@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/cfg_space.c |  3 ++
 drivers/gpu/drm/i915/gvt/gvt.h       |  6 +++
 drivers/gpu/drm/i915/gvt/handlers.c  | 36 ++++++++++++++++
 drivers/gpu/drm/i915/gvt/mmio.c      | 62 ++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/gvt/vgpu.c      |  6 ++-
 5 files changed, 112 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gvt/cfg_space.c b/drivers/gpu/drm/i915/gvt/cfg_space.c
index 4a6a2ed65732e1..a77e050b85a341 100644
--- a/drivers/gpu/drm/i915/gvt/cfg_space.c
+++ b/drivers/gpu/drm/i915/gvt/cfg_space.c
@@ -237,6 +237,9 @@ int intel_vgpu_emulate_cfg_write(struct intel_vgpu *vgpu, unsigned int offset,
 {
 	int ret;
 
+	if (vgpu->failsafe)
+		return 0;
+
 	if (WARN_ON(bytes > 4))
 		return -EINVAL;
 
diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
index 5ee660077b0c0e..48ef0771d772c8 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.h
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@@ -143,6 +143,8 @@ struct intel_vgpu {
 	int id;
 	unsigned long handle; /* vGPU handle used by hypervisor MPT modules */
 	bool active;
+	bool pv_notified;
+	bool failsafe;
 	bool resetting;
 	void *sched_data;
 
@@ -449,6 +451,10 @@ struct intel_gvt_ops {
 };
 
 
+enum {
+	GVT_FAILSAFE_UNSUPPORTED_GUEST,
+};
+
 #include "mpt.h"
 
 #endif
diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index 1d450627ff6540..6f098bb110bda3 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -150,10 +150,34 @@ static int render_mmio_to_ring_id(struct intel_gvt *gvt, unsigned int reg)
 #define fence_num_to_offset(num) \
 	(num * 8 + i915_mmio_reg_offset(FENCE_REG_GEN6_LO(0)))
 
+
+static void enter_failsafe_mode(struct intel_vgpu *vgpu, int reason)
+{
+	switch (reason) {
+	case GVT_FAILSAFE_UNSUPPORTED_GUEST:
+		pr_err("Detected your guest driver doesn't support GVT-g.\n");
+		break;
+	default:
+		break;
+	}
+	pr_err("Now vgpu %d will enter failsafe mode.\n", vgpu->id);
+	vgpu->failsafe = true;
+}
+
 static int sanitize_fence_mmio_access(struct intel_vgpu *vgpu,
 		unsigned int fence_num, void *p_data, unsigned int bytes)
 {
 	if (fence_num >= vgpu_fence_sz(vgpu)) {
+
+		/* When guest access oob fence regs without access
+		 * pv_info first, we treat guest not supporting GVT,
+		 * and we will let vgpu enter failsafe mode.
+		 */
+		if (!vgpu->pv_notified) {
+			enter_failsafe_mode(vgpu,
+					GVT_FAILSAFE_UNSUPPORTED_GUEST);
+			return -EINVAL;
+		}
 		gvt_err("vgpu%d: found oob fence register access\n",
 				vgpu->id);
 		gvt_err("vgpu%d: total fence num %d access fence num %d\n",
@@ -1001,6 +1025,7 @@ static int pvinfo_mmio_read(struct intel_vgpu *vgpu, unsigned int offset,
 	if (invalid_read)
 		gvt_err("invalid pvinfo read: [%x:%x] = %x\n",
 				offset, bytes, *(u32 *)p_data);
+	vgpu->pv_notified = true;
 	return 0;
 }
 
@@ -1318,6 +1343,17 @@ static int ring_mode_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
 	bool enable_execlist;
 
 	write_vreg(vgpu, offset, p_data, bytes);
+
+	/* when PPGTT mode enabled, we will check if guest has called
+	 * pvinfo, if not, we will treat this guest as non-gvtg-aware
+	 * guest, and stop emulating its cfg space, mmio, gtt, etc.
+	 */
+	if (((data & _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)) ||
+			(data & _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE)))
+			&& !vgpu->pv_notified) {
+		enter_failsafe_mode(vgpu, GVT_FAILSAFE_UNSUPPORTED_GUEST);
+		return 0;
+	}
 	if ((data & _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE))
 			|| (data & _MASKED_BIT_DISABLE(GFX_RUN_LIST_ENABLE))) {
 		enable_execlist = !!(data & GFX_RUN_LIST_ENABLE);
diff --git a/drivers/gpu/drm/i915/gvt/mmio.c b/drivers/gpu/drm/i915/gvt/mmio.c
index 4df078bc5d042b..b2d72dad153715 100644
--- a/drivers/gpu/drm/i915/gvt/mmio.c
+++ b/drivers/gpu/drm/i915/gvt/mmio.c
@@ -57,6 +57,58 @@ int intel_vgpu_gpa_to_mmio_offset(struct intel_vgpu *vgpu, u64 gpa)
 	(reg >= gvt->device_info.gtt_start_offset \
 	 && reg < gvt->device_info.gtt_start_offset + gvt_ggtt_sz(gvt))
 
+static void failsafe_emulate_mmio_rw(struct intel_vgpu *vgpu, uint64_t pa,
+		void *p_data, unsigned int bytes, bool read)
+{
+	struct intel_gvt *gvt = NULL;
+	void *pt = NULL;
+	unsigned int offset = 0;
+
+	if (!vgpu || !p_data)
+		return;
+
+	gvt = vgpu->gvt;
+	mutex_lock(&gvt->lock);
+	offset = intel_vgpu_gpa_to_mmio_offset(vgpu, pa);
+	if (reg_is_mmio(gvt, offset)) {
+		if (read)
+			intel_vgpu_default_mmio_read(vgpu, offset, p_data,
+					bytes);
+		else
+			intel_vgpu_default_mmio_write(vgpu, offset, p_data,
+					bytes);
+	} else if (reg_is_gtt(gvt, offset) &&
+			vgpu->gtt.ggtt_mm->virtual_page_table) {
+		offset -= gvt->device_info.gtt_start_offset;
+		pt = vgpu->gtt.ggtt_mm->virtual_page_table + offset;
+		if (read)
+			memcpy(p_data, pt, bytes);
+		else
+			memcpy(pt, p_data, bytes);
+
+	} else if (atomic_read(&vgpu->gtt.n_write_protected_guest_page)) {
+		struct intel_vgpu_guest_page *gp;
+
+		/* Since we enter the failsafe mode early during guest boot,
+		 * guest may not have chance to set up its ppgtt table, so
+		 * there should not be any wp pages for guest. Keep the wp
+		 * related code here in case we need to handle it in furture.
+		 */
+		gp = intel_vgpu_find_guest_page(vgpu, pa >> PAGE_SHIFT);
+		if (gp) {
+			/* remove write protection to prevent furture traps */
+			intel_vgpu_clean_guest_page(vgpu, gp);
+			if (read)
+				intel_gvt_hypervisor_read_gpa(vgpu, pa,
+						p_data, bytes);
+			else
+				intel_gvt_hypervisor_write_gpa(vgpu, pa,
+						p_data, bytes);
+		}
+	}
+	mutex_unlock(&gvt->lock);
+}
+
 /**
  * intel_vgpu_emulate_mmio_read - emulate MMIO read
  * @vgpu: a vGPU
@@ -75,6 +127,11 @@ int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, uint64_t pa,
 	unsigned int offset = 0;
 	int ret = -EINVAL;
 
+
+	if (vgpu->failsafe) {
+		failsafe_emulate_mmio_rw(vgpu, pa, p_data, bytes, true);
+		return 0;
+	}
 	mutex_lock(&gvt->lock);
 
 	if (atomic_read(&vgpu->gtt.n_write_protected_guest_page)) {
@@ -188,6 +245,11 @@ int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, uint64_t pa,
 	u32 old_vreg = 0, old_sreg = 0;
 	int ret = -EINVAL;
 
+	if (vgpu->failsafe) {
+		failsafe_emulate_mmio_rw(vgpu, pa, p_data, bytes, false);
+		return 0;
+	}
+
 	mutex_lock(&gvt->lock);
 
 	if (atomic_read(&vgpu->gtt.n_write_protected_guest_page)) {
diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c
index 95a97aa0051e78..dcfcce1dc00e34 100644
--- a/drivers/gpu/drm/i915/gvt/vgpu.c
+++ b/drivers/gpu/drm/i915/gvt/vgpu.c
@@ -387,8 +387,12 @@ void intel_gvt_reset_vgpu_locked(struct intel_vgpu *vgpu, bool dmlr,
 		populate_pvinfo_page(vgpu);
 		intel_vgpu_reset_display(vgpu);
 
-		if (dmlr)
+		if (dmlr) {
 			intel_vgpu_reset_cfg_space(vgpu);
+			/* only reset the failsafe mode when dmlr reset */
+			vgpu->failsafe = false;
+			vgpu->pv_notified = false;
+		}
 	}
 
 	vgpu->resetting = false;

From d1be371d4f4c12d11023c9fc795e5d460d960680 Mon Sep 17 00:00:00 2001
From: "Zhao, Xinda" <xinda.zhao@intel.com>
Date: Fri, 17 Feb 2017 14:38:33 +0800
Subject: [PATCH 0100/1911] drm/i915/gvt: handle fence reg access during GPU
 reset

Lots of reduntant log info will be printed out during GPU reset,
including accessing untracked mmio register and fence register,
variable disable_warn_untrack is added previously to handle the
situation, but the accessing of fence register is ignored in the
previously patch, so add it back.

Besides, set the variable disable_warn_untrack to the defalut value
after GPU reset is finished.

Signed-off-by: Zhao, Xinda <xinda.zhao@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/handlers.c | 15 +++++++++------
 drivers/gpu/drm/i915/gvt/mmio.c     |  2 ++
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index 6f098bb110bda3..fd7e789a72c317 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -173,16 +173,19 @@ static int sanitize_fence_mmio_access(struct intel_vgpu *vgpu,
 		 * pv_info first, we treat guest not supporting GVT,
 		 * and we will let vgpu enter failsafe mode.
 		 */
-		if (!vgpu->pv_notified) {
+		if (!vgpu->pv_notified)
 			enter_failsafe_mode(vgpu,
 					GVT_FAILSAFE_UNSUPPORTED_GUEST);
-			return -EINVAL;
+
+		if (!vgpu->mmio.disable_warn_untrack) {
+			gvt_err("vgpu%d: found oob fence register access\n",
+					vgpu->id);
+			gvt_err("vgpu%d: total fence %d, access fence %d\n",
+					vgpu->id, vgpu_fence_sz(vgpu),
+					fence_num);
 		}
-		gvt_err("vgpu%d: found oob fence register access\n",
-				vgpu->id);
-		gvt_err("vgpu%d: total fence num %d access fence num %d\n",
-				vgpu->id, vgpu_fence_sz(vgpu), fence_num);
 		memset(p_data, 0, bytes);
+		return -EINVAL;
 	}
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/gvt/mmio.c b/drivers/gpu/drm/i915/gvt/mmio.c
index b2d72dad153715..99abb01fa9eb61 100644
--- a/drivers/gpu/drm/i915/gvt/mmio.c
+++ b/drivers/gpu/drm/i915/gvt/mmio.c
@@ -384,6 +384,8 @@ void intel_vgpu_reset_mmio(struct intel_vgpu *vgpu)
 
 	/* set the bit 0:2(Core C-State ) to C0 */
 	vgpu_vreg(vgpu, GEN6_GT_CORE_STATUS) = 0;
+
+	vgpu->mmio.disable_warn_untrack = false;
 }
 
 /**

From b8826e506ee58873725ec3a25a2a27fefd762574 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 18 Feb 2017 22:07:24 -0500
Subject: [PATCH 0101/1911] selftest for default_file_splice_read() infoleak

bug fixed in commit b9dc6f65bc5e ("fix a fencepost error in pipe_advance()")

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 tools/testing/selftests/Makefile                          | 1 +
 tools/testing/selftests/splice/Makefile                   | 8 ++++++++
 tools/testing/selftests/splice/default_file_splice_read.c | 8 ++++++++
 .../testing/selftests/splice/default_file_splice_read.sh  | 7 +++++++
 4 files changed, 24 insertions(+)
 create mode 100644 tools/testing/selftests/splice/Makefile
 create mode 100644 tools/testing/selftests/splice/default_file_splice_read.c
 create mode 100755 tools/testing/selftests/splice/default_file_splice_read.sh

diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 71b05891a6a14a..0e72f1d03c9ece 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -24,6 +24,7 @@ TARGETS += ptrace
 TARGETS += seccomp
 TARGETS += sigaltstack
 TARGETS += size
+TARGETS += splice
 TARGETS += static_keys
 TARGETS += sync
 TARGETS += sysctl
diff --git a/tools/testing/selftests/splice/Makefile b/tools/testing/selftests/splice/Makefile
new file mode 100644
index 00000000000000..de51f439d4a6a3
--- /dev/null
+++ b/tools/testing/selftests/splice/Makefile
@@ -0,0 +1,8 @@
+TEST_PROGS := default_file_splice_read.sh
+EXTRA := default_file_splice_read
+all: $(TEST_PROGS) $(EXTRA)
+
+include ../lib.mk
+
+clean:
+	rm -fr $(TEST_PROGS) $(EXTRA)
diff --git a/tools/testing/selftests/splice/default_file_splice_read.c b/tools/testing/selftests/splice/default_file_splice_read.c
new file mode 100644
index 00000000000000..01dd6091554c3d
--- /dev/null
+++ b/tools/testing/selftests/splice/default_file_splice_read.c
@@ -0,0 +1,8 @@
+#define _GNU_SOURCE
+#include <fcntl.h>
+
+int main(int argc, char **argv)
+{
+        splice(0, 0, 1, 0, 1<<30, 0);
+	return 0;
+}
diff --git a/tools/testing/selftests/splice/default_file_splice_read.sh b/tools/testing/selftests/splice/default_file_splice_read.sh
new file mode 100755
index 00000000000000..1ea2adeabc946d
--- /dev/null
+++ b/tools/testing/selftests/splice/default_file_splice_read.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+n=`./default_file_splice_read </dev/null | wc -c`
+
+test "$n" = 0 && exit 0
+
+echo "default_file_splice_read broken: leaked $n"
+exit 1

From eec11535ca3d3e2daa2c8f59fa8ce1963db98abd Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 18 Jan 2017 14:13:20 +0300
Subject: [PATCH 0102/1911] hfs: fix hfs_readdir()

I was looking through static analysis warnings and there is a bug here
that goes all the way back to the start of git.  Basically we're copying
the pointer and nearby garbage instead of the data the fd.key pointer is
pointing to.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Vyacheslav Dubeyko <slava@dubeyko.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/hfs/dir.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index 5de5c48b418da2..75b254280ff631 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -169,7 +169,7 @@ static int hfs_readdir(struct file *file, struct dir_context *ctx)
 	 * Can be done after the list insertion; exclusion with
 	 * hfs_delete_cat() is provided by directory lock.
 	 */
-	memcpy(&rd->key, &fd.key, sizeof(struct hfs_cat_key));
+	memcpy(&rd->key, &fd.key->cat, sizeof(struct hfs_cat_key));
 out:
 	hfs_find_exit(&fd);
 	return err;

From 9a584bf9bf0a1c608f5ed5f5e63b074bbc81a322 Mon Sep 17 00:00:00 2001
From: Varun Prakash <varun@chelsio.com>
Date: Fri, 13 Jan 2017 20:53:21 +0530
Subject: [PATCH 0103/1911] target/iscsi: split iscsit_check_dataout_hdr()

Split iscsit_check_dataout_hdr() into two functions
1. __iscsit_check_dataout_hdr() - This function
   validates data out hdr.
2. iscsit_check_dataout_hdr() - This function finds
   iSCSI cmd using iscsit_find_cmd_from_itt_or_dump(),
   then it calls __iscsit_check_dataout_hdr() to
   validate iSCSI hdr.

This split is required to support Chelsio T6 iSCSI
DDP completion feature. T6 adapters reduce number of
completions to host by generating single completion
for all directly placed(DDP) iSCSI pdus in a sequence,
DDP completion contains iSCSI hdr of the last pdu in a
sequence.

On receiving DDP completion cxgbit driver will first
find iSCSI cmd using iscsit_find_cmd_from_itt_or_dump()
then updates cmd->write_data_done, cmd->next_burst_len,
cmd->data_sn and calls  __iscsit_check_dataout_hdr()
to validate iSCSI hdr.

(Move XRDSL check ahead of itt lookup / dump - nab)

Signed-off-by: Varun Prakash <varun@chelsio.com>
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/iscsi/iscsi_target.c      | 65 +++++++++++++++---------
 drivers/target/iscsi/iscsi_target_util.c |  1 +
 include/target/iscsi/iscsi_transport.h   | 11 +++-
 3 files changed, 50 insertions(+), 27 deletions(-)

diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
index b4f1d1cbe521a4..2285988c209b00 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -1431,36 +1431,17 @@ static void iscsit_do_crypto_hash_buf(
 }
 
 int
-iscsit_check_dataout_hdr(struct iscsi_conn *conn, unsigned char *buf,
-			  struct iscsi_cmd **out_cmd)
+__iscsit_check_dataout_hdr(struct iscsi_conn *conn, void *buf,
+			   struct iscsi_cmd *cmd, u32 payload_length,
+			   bool *success)
 {
-	struct iscsi_data *hdr = (struct iscsi_data *)buf;
-	struct iscsi_cmd *cmd = NULL;
+	struct iscsi_data *hdr = buf;
 	struct se_cmd *se_cmd;
-	u32 payload_length = ntoh24(hdr->dlength);
 	int rc;
 
-	if (!payload_length) {
-		pr_warn("DataOUT payload is ZERO, ignoring.\n");
-		return 0;
-	}
-
 	/* iSCSI write */
 	atomic_long_add(payload_length, &conn->sess->rx_data_octets);
 
-	if (payload_length > conn->conn_ops->MaxXmitDataSegmentLength) {
-		pr_err("DataSegmentLength: %u is greater than"
-			" MaxXmitDataSegmentLength: %u\n", payload_length,
-			conn->conn_ops->MaxXmitDataSegmentLength);
-		return iscsit_add_reject(conn, ISCSI_REASON_PROTOCOL_ERROR,
-					 buf);
-	}
-
-	cmd = iscsit_find_cmd_from_itt_or_dump(conn, hdr->itt,
-			payload_length);
-	if (!cmd)
-		return 0;
-
 	pr_debug("Got DataOut ITT: 0x%08x, TTT: 0x%08x,"
 		" DataSN: 0x%08x, Offset: %u, Length: %u, CID: %hu\n",
 		hdr->itt, hdr->ttt, hdr->datasn, ntohl(hdr->offset),
@@ -1553,10 +1534,44 @@ iscsit_check_dataout_hdr(struct iscsi_conn *conn, unsigned char *buf,
 		return 0;
 	else if (rc == DATAOUT_CANNOT_RECOVER)
 		return -1;
-
-	*out_cmd = cmd;
+	*success = true;
 	return 0;
 }
+EXPORT_SYMBOL(__iscsit_check_dataout_hdr);
+
+int
+iscsit_check_dataout_hdr(struct iscsi_conn *conn, void *buf,
+			 struct iscsi_cmd **out_cmd)
+{
+	struct iscsi_data *hdr = buf;
+	struct iscsi_cmd *cmd;
+	u32 payload_length = ntoh24(hdr->dlength);
+	int rc;
+	bool success = false;
+
+	if (!payload_length) {
+		pr_warn_ratelimited("DataOUT payload is ZERO, ignoring.\n");
+		return 0;
+	}
+
+	if (payload_length > conn->conn_ops->MaxXmitDataSegmentLength) {
+		pr_err_ratelimited("DataSegmentLength: %u is greater than"
+			" MaxXmitDataSegmentLength: %u\n", payload_length,
+			conn->conn_ops->MaxXmitDataSegmentLength);
+		return iscsit_add_reject(conn, ISCSI_REASON_PROTOCOL_ERROR, buf);
+	}
+
+	cmd = iscsit_find_cmd_from_itt_or_dump(conn, hdr->itt, payload_length);
+	if (!cmd)
+		return 0;
+
+	rc = __iscsit_check_dataout_hdr(conn, buf, cmd, payload_length, &success);
+
+	if (success)
+		*out_cmd = cmd;
+
+	return rc;
+}
 EXPORT_SYMBOL(iscsit_check_dataout_hdr);
 
 static int
diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c
index b5a1b4ccba124d..f46eadffec0704 100644
--- a/drivers/target/iscsi/iscsi_target_util.c
+++ b/drivers/target/iscsi/iscsi_target_util.c
@@ -417,6 +417,7 @@ struct iscsi_cmd *iscsit_find_cmd_from_itt_or_dump(
 
 	return NULL;
 }
+EXPORT_SYMBOL(iscsit_find_cmd_from_itt_or_dump);
 
 struct iscsi_cmd *iscsit_find_cmd_from_ttt(
 	struct iscsi_conn *conn,
diff --git a/include/target/iscsi/iscsi_transport.h b/include/target/iscsi/iscsi_transport.h
index 1277e9ba031818..ff1a4f4cd66d06 100644
--- a/include/target/iscsi/iscsi_transport.h
+++ b/include/target/iscsi/iscsi_transport.h
@@ -55,8 +55,12 @@ extern int iscsit_setup_scsi_cmd(struct iscsi_conn *, struct iscsi_cmd *,
 extern void iscsit_set_unsoliticed_dataout(struct iscsi_cmd *);
 extern int iscsit_process_scsi_cmd(struct iscsi_conn *, struct iscsi_cmd *,
 				struct iscsi_scsi_req *);
-extern int iscsit_check_dataout_hdr(struct iscsi_conn *, unsigned char *,
-				struct iscsi_cmd **);
+extern int
+__iscsit_check_dataout_hdr(struct iscsi_conn *, void *,
+			   struct iscsi_cmd *, u32, bool *);
+extern int
+iscsit_check_dataout_hdr(struct iscsi_conn *conn, void *buf,
+			 struct iscsi_cmd **out_cmd);
 extern int iscsit_check_dataout_payload(struct iscsi_cmd *, struct iscsi_data *,
 				bool);
 extern int iscsit_setup_nop_out(struct iscsi_conn *, struct iscsi_cmd *,
@@ -125,6 +129,9 @@ extern void iscsit_release_cmd(struct iscsi_cmd *);
 extern void iscsit_free_cmd(struct iscsi_cmd *, bool);
 extern void iscsit_add_cmd_to_immediate_queue(struct iscsi_cmd *,
 					      struct iscsi_conn *, u8);
+extern struct iscsi_cmd *
+iscsit_find_cmd_from_itt_or_dump(struct iscsi_conn *conn,
+				 itt_t init_task_tag, u32 length);
 
 /*
  * From iscsi_target_nego.c

From 3da1110fd1ac3e11208f12c966c6de7a2c306b9d Mon Sep 17 00:00:00 2001
From: Varun Prakash <varun@chelsio.com>
Date: Fri, 13 Jan 2017 20:53:22 +0530
Subject: [PATCH 0104/1911] target/cxgbit: use cxgb4_tp_smt_idx() to get smt
 idx

cxgb4_tp_smt_idx() returns smt idx for T4,T5,T6 adapters.

Signed-off-by: Varun Prakash <varun@chelsio.com>
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/iscsi/cxgbit/cxgbit_cm.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/target/iscsi/cxgbit/cxgbit_cm.c b/drivers/target/iscsi/cxgbit/cxgbit_cm.c
index 35aaa36e9af50a..845a7de39b9c4d 100644
--- a/drivers/target/iscsi/cxgbit/cxgbit_cm.c
+++ b/drivers/target/iscsi/cxgbit/cxgbit_cm.c
@@ -872,7 +872,8 @@ cxgbit_offload_init(struct cxgbit_sock *csk, int iptype, __u8 *peer_ip,
 			goto out;
 		csk->mtu = ndev->mtu;
 		csk->tx_chan = cxgb4_port_chan(ndev);
-		csk->smac_idx = (cxgb4_port_viid(ndev) & 0x7F) << 1;
+		csk->smac_idx = cxgb4_tp_smt_idx(cdev->lldi.adapter_type,
+						 cxgb4_port_viid(ndev));
 		step = cdev->lldi.ntxq /
 			cdev->lldi.nchan;
 		csk->txq_idx = cxgb4_port_idx(ndev) * step;
@@ -907,7 +908,8 @@ cxgbit_offload_init(struct cxgbit_sock *csk, int iptype, __u8 *peer_ip,
 		port_id = cxgb4_port_idx(ndev);
 		csk->mtu = dst_mtu(dst);
 		csk->tx_chan = cxgb4_port_chan(ndev);
-		csk->smac_idx = (cxgb4_port_viid(ndev) & 0x7F) << 1;
+		csk->smac_idx = cxgb4_tp_smt_idx(cdev->lldi.adapter_type,
+						 cxgb4_port_viid(ndev));
 		step = cdev->lldi.ntxq /
 			cdev->lldi.nports;
 		csk->txq_idx = (port_id * step) +

From d88b504e413e7d1471562ff9c3311dbf983e0291 Mon Sep 17 00:00:00 2001
From: Varun Prakash <varun@chelsio.com>
Date: Fri, 13 Jan 2017 20:53:23 +0530
Subject: [PATCH 0105/1911] target/cxgbit: Use T6 specific macros to get ETH/IP
 hdr len

Signed-off-by: Varun Prakash <varun@chelsio.com>
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/iscsi/cxgbit/cxgbit_cm.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/target/iscsi/cxgbit/cxgbit_cm.c b/drivers/target/iscsi/cxgbit/cxgbit_cm.c
index 845a7de39b9c4d..b26c85a65b8089 100644
--- a/drivers/target/iscsi/cxgbit/cxgbit_cm.c
+++ b/drivers/target/iscsi/cxgbit/cxgbit_cm.c
@@ -1068,6 +1068,7 @@ cxgbit_pass_accept_rpl(struct cxgbit_sock *csk, struct cpl_pass_accept_req *req)
 	struct sk_buff *skb;
 	const struct tcphdr *tcph;
 	struct cpl_t5_pass_accept_rpl *rpl5;
+	struct cxgb4_lld_info *lldi = &csk->com.cdev->lldi;
 	unsigned int len = roundup(sizeof(*rpl5), 16);
 	unsigned int mtu_idx;
 	u64 opt0;
@@ -1121,8 +1122,13 @@ cxgbit_pass_accept_rpl(struct cxgbit_sock *csk, struct cpl_pass_accept_req *req)
 		opt2 |= WND_SCALE_EN_F;
 
 	hlen = ntohl(req->hdr_len);
-	tcph = (const void *)(req + 1) + ETH_HDR_LEN_G(hlen) +
-		IP_HDR_LEN_G(hlen);
+
+	if (is_t5(lldi->adapter_type))
+		tcph = (struct tcphdr *)((u8 *)(req + 1) +
+		       ETH_HDR_LEN_G(hlen) + IP_HDR_LEN_G(hlen));
+	else
+		tcph = (struct tcphdr *)((u8 *)(req + 1) +
+		       T6_ETH_HDR_LEN_G(hlen) + T6_IP_HDR_LEN_G(hlen));
 
 	if (tcph->ece && tcph->cwr)
 		opt2 |= CCTRL_ECN_V(1);

From 5248788ec300f7ee738502bfc466dbb9b625247e Mon Sep 17 00:00:00 2001
From: Varun Prakash <varun@chelsio.com>
Date: Fri, 13 Jan 2017 20:53:24 +0530
Subject: [PATCH 0106/1911] target/cxgbit: Enable DDP for T6 only if data
 sequence and pdu are in order

Enable DDP for T6 only if DataSequenceInOrder=YES and
DataPDUInOrder=YES to ensure inorder delivery of iSCSI pdus.

Signed-off-by: Varun Prakash <varun@chelsio.com>
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/iscsi/cxgbit/cxgbit_target.c | 86 +++++++++++++++------
 1 file changed, 61 insertions(+), 25 deletions(-)

diff --git a/drivers/target/iscsi/cxgbit/cxgbit_target.c b/drivers/target/iscsi/cxgbit/cxgbit_target.c
index 8ae399dfe84f16..4780fae2a1d159 100644
--- a/drivers/target/iscsi/cxgbit/cxgbit_target.c
+++ b/drivers/target/iscsi/cxgbit/cxgbit_target.c
@@ -653,26 +653,6 @@ static int cxgbit_set_iso_npdu(struct cxgbit_sock *csk)
 	u32 max_npdu, max_iso_npdu;
 
 	if (conn->login->leading_connection) {
-		param = iscsi_find_param_from_key(DATASEQUENCEINORDER,
-						  conn->param_list);
-		if (!param) {
-			pr_err("param not found key %s\n", DATASEQUENCEINORDER);
-			return -1;
-		}
-
-		if (strcmp(param->value, YES))
-			return 0;
-
-		param = iscsi_find_param_from_key(DATAPDUINORDER,
-						  conn->param_list);
-		if (!param) {
-			pr_err("param not found key %s\n", DATAPDUINORDER);
-			return -1;
-		}
-
-		if (strcmp(param->value, YES))
-			return 0;
-
 		param = iscsi_find_param_from_key(MAXBURSTLENGTH,
 						  conn->param_list);
 		if (!param) {
@@ -683,11 +663,6 @@ static int cxgbit_set_iso_npdu(struct cxgbit_sock *csk)
 		if (kstrtou32(param->value, 0, &mbl) < 0)
 			return -1;
 	} else {
-		if (!conn->sess->sess_ops->DataSequenceInOrder)
-			return 0;
-		if (!conn->sess->sess_ops->DataPDUInOrder)
-			return 0;
-
 		mbl = conn->sess->sess_ops->MaxBurstLength;
 	}
 
@@ -706,6 +681,53 @@ static int cxgbit_set_iso_npdu(struct cxgbit_sock *csk)
 	return 0;
 }
 
+/*
+ * cxgbit_seq_pdu_inorder()
+ * @csk: pointer to cxgbit socket structure
+ *
+ * This function checks whether data sequence and data
+ * pdu are in order.
+ *
+ * Return: returns -1 on error, 0 if data sequence and
+ * data pdu are in order, 1 if data sequence or data pdu
+ * is not in order.
+ */
+static int cxgbit_seq_pdu_inorder(struct cxgbit_sock *csk)
+{
+	struct iscsi_conn *conn = csk->conn;
+	struct iscsi_param *param;
+
+	if (conn->login->leading_connection) {
+		param = iscsi_find_param_from_key(DATASEQUENCEINORDER,
+						  conn->param_list);
+		if (!param) {
+			pr_err("param not found key %s\n", DATASEQUENCEINORDER);
+			return -1;
+		}
+
+		if (strcmp(param->value, YES))
+			return 1;
+
+		param = iscsi_find_param_from_key(DATAPDUINORDER,
+						  conn->param_list);
+		if (!param) {
+			pr_err("param not found key %s\n", DATAPDUINORDER);
+			return -1;
+		}
+
+		if (strcmp(param->value, YES))
+			return 1;
+
+	} else {
+		if (!conn->sess->sess_ops->DataSequenceInOrder)
+			return 1;
+		if (!conn->sess->sess_ops->DataPDUInOrder)
+			return 1;
+	}
+
+	return 0;
+}
+
 static int cxgbit_set_params(struct iscsi_conn *conn)
 {
 	struct cxgbit_sock *csk = conn->context;
@@ -732,11 +754,24 @@ static int cxgbit_set_params(struct iscsi_conn *conn)
 	}
 
 	if (!erl) {
+		int ret;
+
+		ret = cxgbit_seq_pdu_inorder(csk);
+		if (ret < 0) {
+			return -1;
+		} else if (ret > 0) {
+			if (is_t5(cdev->lldi.adapter_type))
+				goto enable_ddp;
+			else
+				goto enable_digest;
+		}
+
 		if (test_bit(CDEV_ISO_ENABLE, &cdev->flags)) {
 			if (cxgbit_set_iso_npdu(csk))
 				return -1;
 		}
 
+enable_ddp:
 		if (test_bit(CDEV_DDP_ENABLE, &cdev->flags)) {
 			if (cxgbit_setup_conn_pgidx(csk,
 						    ppm->tformat.pgsz_idx_dflt))
@@ -745,6 +780,7 @@ static int cxgbit_set_params(struct iscsi_conn *conn)
 		}
 	}
 
+enable_digest:
 	if (cxgbit_set_digest(csk))
 		return -1;
 

From 79e57cfe00f40d509e2d007a5662db26cdbc74db Mon Sep 17 00:00:00 2001
From: Varun Prakash <varun@chelsio.com>
Date: Fri, 13 Jan 2017 20:53:25 +0530
Subject: [PATCH 0107/1911] target/cxgbit: add T6 iSCSI DDP completion feature

Chelsio T6 adapters reduce number of completion
to host by generating single completion for all
directly placed(DDP) iSCSI pdus in a sequence,
completion contains iSCSI hdr of the last pdu
in a sequence.

On receiving DDP completion cxgbit driver finds
iSCSI cmd using iscsit_find_cmd_from_itt_or_dump(),
then updates cmd->write_data_done, cmd->next_burst_len,
cmd->data_sn and calls __iscsit_check_dataout_hdr()
to validate iSCSI hdr.

(Update __iscsit_check_dataout_hdr parameter usage - nab)

Signed-off-by: Varun Prakash <varun@chelsio.com>
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/iscsi/cxgbit/cxgbit_cm.c     |  3 +
 drivers/target/iscsi/cxgbit/cxgbit_lro.h    |  5 +-
 drivers/target/iscsi/cxgbit/cxgbit_main.c   | 67 ++++++++++++-----
 drivers/target/iscsi/cxgbit/cxgbit_target.c | 81 +++++++++++++++------
 4 files changed, 113 insertions(+), 43 deletions(-)

diff --git a/drivers/target/iscsi/cxgbit/cxgbit_cm.c b/drivers/target/iscsi/cxgbit/cxgbit_cm.c
index b26c85a65b8089..37a05185dcbe0e 100644
--- a/drivers/target/iscsi/cxgbit/cxgbit_cm.c
+++ b/drivers/target/iscsi/cxgbit/cxgbit_cm.c
@@ -1114,6 +1114,9 @@ cxgbit_pass_accept_rpl(struct cxgbit_sock *csk, struct cpl_pass_accept_req *req)
 	opt2 = RX_CHANNEL_V(0) |
 		RSS_QUEUE_VALID_F | RSS_QUEUE_V(csk->rss_qid);
 
+	if (!is_t5(lldi->adapter_type))
+		opt2 |= RX_FC_DISABLE_F;
+
 	if (req->tcpopt.tstamp)
 		opt2 |= TSTAMPS_EN_F;
 	if (req->tcpopt.sack)
diff --git a/drivers/target/iscsi/cxgbit/cxgbit_lro.h b/drivers/target/iscsi/cxgbit/cxgbit_lro.h
index 28c11bd1b9308c..dcaed3a1d23f87 100644
--- a/drivers/target/iscsi/cxgbit/cxgbit_lro.h
+++ b/drivers/target/iscsi/cxgbit/cxgbit_lro.h
@@ -31,8 +31,9 @@ enum cxgbit_pducb_flags {
 	PDUCBF_RX_DATA		= (1 << 1), /* received pdu payload */
 	PDUCBF_RX_STATUS	= (1 << 2), /* received ddp status */
 	PDUCBF_RX_DATA_DDPD	= (1 << 3), /* pdu payload ddp'd */
-	PDUCBF_RX_HCRC_ERR	= (1 << 4), /* header digest error */
-	PDUCBF_RX_DCRC_ERR	= (1 << 5), /* data digest error */
+	PDUCBF_RX_DDP_CMP	= (1 << 4), /* ddp completion */
+	PDUCBF_RX_HCRC_ERR	= (1 << 5), /* header digest error */
+	PDUCBF_RX_DCRC_ERR	= (1 << 6), /* data digest error */
 };
 
 struct cxgbit_lro_pdu_cb {
diff --git a/drivers/target/iscsi/cxgbit/cxgbit_main.c b/drivers/target/iscsi/cxgbit/cxgbit_main.c
index 6531aaac2b9681..4fd775ace541a9 100644
--- a/drivers/target/iscsi/cxgbit/cxgbit_main.c
+++ b/drivers/target/iscsi/cxgbit/cxgbit_main.c
@@ -165,29 +165,24 @@ static int cxgbit_uld_state_change(void *handle, enum cxgb4_state state)
 }
 
 static void
-cxgbit_proc_ddp_status(unsigned int tid, struct cpl_rx_data_ddp *cpl,
-		       struct cxgbit_lro_pdu_cb *pdu_cb)
+cxgbit_process_ddpvld(struct cxgbit_sock *csk, struct cxgbit_lro_pdu_cb *pdu_cb,
+		      u32 ddpvld)
 {
-	unsigned int status = ntohl(cpl->ddpvld);
 
-	pdu_cb->flags |= PDUCBF_RX_STATUS;
-	pdu_cb->ddigest = ntohl(cpl->ulp_crc);
-	pdu_cb->pdulen = ntohs(cpl->len);
-
-	if (status & (1 << CPL_RX_ISCSI_DDP_STATUS_HCRC_SHIFT)) {
-		pr_info("tid 0x%x, status 0x%x, hcrc bad.\n", tid, status);
+	if (ddpvld & (1 << CPL_RX_ISCSI_DDP_STATUS_HCRC_SHIFT)) {
+		pr_info("tid 0x%x, status 0x%x, hcrc bad.\n", csk->tid, ddpvld);
 		pdu_cb->flags |= PDUCBF_RX_HCRC_ERR;
 	}
 
-	if (status & (1 << CPL_RX_ISCSI_DDP_STATUS_DCRC_SHIFT)) {
-		pr_info("tid 0x%x, status 0x%x, dcrc bad.\n", tid, status);
+	if (ddpvld & (1 << CPL_RX_ISCSI_DDP_STATUS_DCRC_SHIFT)) {
+		pr_info("tid 0x%x, status 0x%x, dcrc bad.\n", csk->tid, ddpvld);
 		pdu_cb->flags |= PDUCBF_RX_DCRC_ERR;
 	}
 
-	if (status & (1 << CPL_RX_ISCSI_DDP_STATUS_PAD_SHIFT))
-		pr_info("tid 0x%x, status 0x%x, pad bad.\n", tid, status);
+	if (ddpvld & (1 << CPL_RX_ISCSI_DDP_STATUS_PAD_SHIFT))
+		pr_info("tid 0x%x, status 0x%x, pad bad.\n", csk->tid, ddpvld);
 
-	if ((status & (1 << CPL_RX_ISCSI_DDP_STATUS_DDP_SHIFT)) &&
+	if ((ddpvld & (1 << CPL_RX_ISCSI_DDP_STATUS_DDP_SHIFT)) &&
 	    (!(pdu_cb->flags & PDUCBF_RX_DATA))) {
 		pdu_cb->flags |= PDUCBF_RX_DATA_DDPD;
 	}
@@ -201,13 +196,17 @@ cxgbit_lro_add_packet_rsp(struct sk_buff *skb, u8 op, const __be64 *rsp)
 						lro_cb->pdu_idx);
 	struct cpl_rx_iscsi_ddp *cpl = (struct cpl_rx_iscsi_ddp *)(rsp + 1);
 
-	cxgbit_proc_ddp_status(lro_cb->csk->tid, cpl, pdu_cb);
+	cxgbit_process_ddpvld(lro_cb->csk, pdu_cb, be32_to_cpu(cpl->ddpvld));
+
+	pdu_cb->flags |= PDUCBF_RX_STATUS;
+	pdu_cb->ddigest = ntohl(cpl->ulp_crc);
+	pdu_cb->pdulen = ntohs(cpl->len);
 
 	if (pdu_cb->flags & PDUCBF_RX_HDR)
 		pdu_cb->complete = true;
 
-	lro_cb->complete = true;
 	lro_cb->pdu_totallen += pdu_cb->pdulen;
+	lro_cb->complete = true;
 	lro_cb->pdu_idx++;
 }
 
@@ -257,7 +256,7 @@ cxgbit_lro_add_packet_gl(struct sk_buff *skb, u8 op, const struct pkt_gl *gl)
 			cxgbit_skcb_flags(skb) = 0;
 
 		lro_cb->complete = false;
-	} else {
+	} else if (op == CPL_ISCSI_DATA) {
 		struct cpl_iscsi_data *cpl = (struct cpl_iscsi_data *)gl->va;
 
 		offset = sizeof(struct cpl_iscsi_data);
@@ -267,6 +266,36 @@ cxgbit_lro_add_packet_gl(struct sk_buff *skb, u8 op, const struct pkt_gl *gl)
 		pdu_cb->doffset = lro_cb->offset;
 		pdu_cb->nr_dfrags = gl->nfrags;
 		pdu_cb->dfrag_idx = skb_shinfo(skb)->nr_frags;
+		lro_cb->complete = false;
+	} else {
+		struct cpl_rx_iscsi_cmp *cpl;
+
+		cpl = (struct cpl_rx_iscsi_cmp *)gl->va;
+		offset = sizeof(struct cpl_rx_iscsi_cmp);
+		pdu_cb->flags |= (PDUCBF_RX_HDR | PDUCBF_RX_STATUS);
+		len = be16_to_cpu(cpl->len);
+		pdu_cb->hdr = gl->va + offset;
+		pdu_cb->hlen = len;
+		pdu_cb->hfrag_idx = skb_shinfo(skb)->nr_frags;
+		pdu_cb->ddigest = be32_to_cpu(cpl->ulp_crc);
+		pdu_cb->pdulen = ntohs(cpl->len);
+
+		if (unlikely(gl->nfrags > 1))
+			cxgbit_skcb_flags(skb) = 0;
+
+		cxgbit_process_ddpvld(lro_cb->csk, pdu_cb,
+				      be32_to_cpu(cpl->ddpvld));
+
+		if (pdu_cb->flags & PDUCBF_RX_DATA_DDPD) {
+			pdu_cb->flags |= PDUCBF_RX_DDP_CMP;
+			pdu_cb->complete = true;
+		} else if (pdu_cb->flags & PDUCBF_RX_DATA) {
+			pdu_cb->complete = true;
+		}
+
+		lro_cb->pdu_totallen += pdu_cb->hlen + pdu_cb->dlen;
+		lro_cb->complete = true;
+		lro_cb->pdu_idx++;
 	}
 
 	cxgbit_copy_frags(skb, gl, offset);
@@ -413,6 +442,7 @@ cxgbit_uld_lro_rx_handler(void *hndl, const __be64 *rsp,
 	switch (op) {
 	case CPL_ISCSI_HDR:
 	case CPL_ISCSI_DATA:
+	case CPL_RX_ISCSI_CMP:
 	case CPL_RX_ISCSI_DDP:
 	case CPL_FW4_ACK:
 		lro_flush = false;
@@ -459,7 +489,8 @@ cxgbit_uld_lro_rx_handler(void *hndl, const __be64 *rsp,
 			return 0;
 		}
 
-		if (op == CPL_ISCSI_HDR || op == CPL_ISCSI_DATA) {
+		if ((op == CPL_ISCSI_HDR) || (op == CPL_ISCSI_DATA) ||
+		    (op == CPL_RX_ISCSI_CMP)) {
 			if (!cxgbit_lro_receive(csk, op, rsp, gl, lro_mgr,
 						napi))
 				return 0;
diff --git a/drivers/target/iscsi/cxgbit/cxgbit_target.c b/drivers/target/iscsi/cxgbit/cxgbit_target.c
index 4780fae2a1d159..2714e5901d1845 100644
--- a/drivers/target/iscsi/cxgbit/cxgbit_target.c
+++ b/drivers/target/iscsi/cxgbit/cxgbit_target.c
@@ -1021,11 +1021,36 @@ static int cxgbit_handle_iscsi_dataout(struct cxgbit_sock *csk)
 	int rc, sg_nents, sg_off;
 	bool dcrc_err = false;
 
-	rc = iscsit_check_dataout_hdr(conn, (unsigned char *)hdr, &cmd);
-	if (rc < 0)
-		return rc;
-	else if (!cmd)
-		return 0;
+	if (pdu_cb->flags & PDUCBF_RX_DDP_CMP) {
+		u32 offset = be32_to_cpu(hdr->offset);
+		u32 ddp_data_len;
+		u32 payload_length = ntoh24(hdr->dlength);
+		bool success = false;
+
+		cmd = iscsit_find_cmd_from_itt_or_dump(conn, hdr->itt, 0);
+		if (!cmd)
+			return 0;
+
+		ddp_data_len = offset - cmd->write_data_done;
+		atomic_long_add(ddp_data_len, &conn->sess->rx_data_octets);
+
+		cmd->write_data_done = offset;
+		cmd->next_burst_len = ddp_data_len;
+		cmd->data_sn = be32_to_cpu(hdr->datasn);
+
+		rc = __iscsit_check_dataout_hdr(conn, (unsigned char *)hdr,
+						cmd, payload_length, &success);
+		if (rc < 0)
+			return rc;
+		else if (!success)
+			return 0;
+	} else {
+		rc = iscsit_check_dataout_hdr(conn, (unsigned char *)hdr, &cmd);
+		if (rc < 0)
+			return rc;
+		else if (!cmd)
+			return 0;
+	}
 
 	if (pdu_cb->flags & PDUCBF_RX_DCRC_ERR) {
 		pr_err("ITT: 0x%08x, Offset: %u, Length: %u,"
@@ -1389,6 +1414,9 @@ static void cxgbit_lro_hskb_reset(struct cxgbit_sock *csk)
 	for (i = 0; i < ssi->nr_frags; i++)
 		put_page(skb_frag_page(&ssi->frags[i]));
 	ssi->nr_frags = 0;
+	skb->data_len = 0;
+	skb->truesize -= skb->len;
+	skb->len = 0;
 }
 
 static void
@@ -1402,39 +1430,42 @@ cxgbit_lro_skb_merge(struct cxgbit_sock *csk, struct sk_buff *skb, u8 pdu_idx)
 	unsigned int len = 0;
 
 	if (pdu_cb->flags & PDUCBF_RX_HDR) {
-		hpdu_cb->flags = pdu_cb->flags;
+		u8 hfrag_idx = hssi->nr_frags;
+
+		hpdu_cb->flags |= pdu_cb->flags;
 		hpdu_cb->seq = pdu_cb->seq;
 		hpdu_cb->hdr = pdu_cb->hdr;
 		hpdu_cb->hlen = pdu_cb->hlen;
 
-		memcpy(&hssi->frags[0], &ssi->frags[pdu_cb->hfrag_idx],
+		memcpy(&hssi->frags[hfrag_idx], &ssi->frags[pdu_cb->hfrag_idx],
 		       sizeof(skb_frag_t));
 
-		get_page(skb_frag_page(&hssi->frags[0]));
-		hssi->nr_frags = 1;
-		hpdu_cb->frags = 1;
-		hpdu_cb->hfrag_idx = 0;
+		get_page(skb_frag_page(&hssi->frags[hfrag_idx]));
+		hssi->nr_frags++;
+		hpdu_cb->frags++;
+		hpdu_cb->hfrag_idx = hfrag_idx;
 
-		len = hssi->frags[0].size;
-		hskb->len = len;
-		hskb->data_len = len;
-		hskb->truesize = len;
+		len = hssi->frags[hfrag_idx].size;
+		hskb->len += len;
+		hskb->data_len += len;
+		hskb->truesize += len;
 	}
 
 	if (pdu_cb->flags & PDUCBF_RX_DATA) {
-		u8 hfrag_idx = 1, i;
+		u8 dfrag_idx = hssi->nr_frags, i;
 
 		hpdu_cb->flags |= pdu_cb->flags;
+		hpdu_cb->dfrag_idx = dfrag_idx;
 
 		len = 0;
-		for (i = 0; i < pdu_cb->nr_dfrags; hfrag_idx++, i++) {
-			memcpy(&hssi->frags[hfrag_idx],
+		for (i = 0; i < pdu_cb->nr_dfrags; dfrag_idx++, i++) {
+			memcpy(&hssi->frags[dfrag_idx],
 			       &ssi->frags[pdu_cb->dfrag_idx + i],
 			       sizeof(skb_frag_t));
 
-			get_page(skb_frag_page(&hssi->frags[hfrag_idx]));
+			get_page(skb_frag_page(&hssi->frags[dfrag_idx]));
 
-			len += hssi->frags[hfrag_idx].size;
+			len += hssi->frags[dfrag_idx].size;
 
 			hssi->nr_frags++;
 			hpdu_cb->frags++;
@@ -1443,7 +1474,6 @@ cxgbit_lro_skb_merge(struct cxgbit_sock *csk, struct sk_buff *skb, u8 pdu_idx)
 		hpdu_cb->dlen = pdu_cb->dlen;
 		hpdu_cb->doffset = hpdu_cb->hlen;
 		hpdu_cb->nr_dfrags = pdu_cb->nr_dfrags;
-		hpdu_cb->dfrag_idx = 1;
 		hskb->len += len;
 		hskb->data_len += len;
 		hskb->truesize += len;
@@ -1528,10 +1558,15 @@ static int cxgbit_rx_lro_skb(struct cxgbit_sock *csk, struct sk_buff *skb)
 
 static int cxgbit_rx_skb(struct cxgbit_sock *csk, struct sk_buff *skb)
 {
+	struct cxgb4_lld_info *lldi = &csk->com.cdev->lldi;
 	int ret = -1;
 
-	if (likely(cxgbit_skcb_flags(skb) & SKCBF_RX_LRO))
-		ret = cxgbit_rx_lro_skb(csk, skb);
+	if (likely(cxgbit_skcb_flags(skb) & SKCBF_RX_LRO)) {
+		if (is_t5(lldi->adapter_type))
+			ret = cxgbit_rx_lro_skb(csk, skb);
+		else
+			ret = cxgbit_process_lro_skb(csk, skb);
+	}
 
 	__kfree_skb(skb);
 	return ret;

From 4d65491c269729a1e3b375c45e73213f49103d33 Mon Sep 17 00:00:00 2001
From: Varun Prakash <varun@chelsio.com>
Date: Fri, 20 Jan 2017 16:44:33 +0530
Subject: [PATCH 0108/1911] target/iscsi: Fix unsolicited data seq_end_offset
 calculation

In case of unsolicited data for the first sequence
seq_end_offset must be set to minimum of total data length
and FirstBurstLength, so do not add cmd->write_data_done
to the min of total data length and FirstBurstLength.

This patch avoids that with ImmediateData=Yes, InitialR2T=No,
MaxXmitDataSegmentLength < FirstBurstLength that a WRITE command
with IO size above FirstBurstLength triggers sequence error
messages, for example

Set following parameters on target (linux-4.8.12)
ImmediateData = Yes
InitialR2T = No
MaxXmitDataSegmentLength = 8k
FirstBurstLength = 64k

Log in from Open iSCSI initiator and execute
dd if=/dev/zero of=/dev/sdb bs=128k count=1 oflag=direct

Error messages on target
Command ITT: 0x00000035 with Offset: 65536, Length: 8192 outside
of Sequence 73728:131072 while DataSequenceInOrder=Yes.
Command ITT: 0x00000035, received DataSN: 0x00000001 higher than
expected 0x00000000.
Unable to perform within-command recovery while ERL=0.

Signed-off-by: Varun Prakash <varun@chelsio.com>
[ bvanassche: Use min() instead of open-coding it / edited patch description ]
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/iscsi/iscsi_target_erl0.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/target/iscsi/iscsi_target_erl0.c b/drivers/target/iscsi/iscsi_target_erl0.c
index b54e72c7ab0fa5..a8bcbc43b047c8 100644
--- a/drivers/target/iscsi/iscsi_target_erl0.c
+++ b/drivers/target/iscsi/iscsi_target_erl0.c
@@ -44,10 +44,8 @@ void iscsit_set_dataout_sequence_values(
 	 */
 	if (cmd->unsolicited_data) {
 		cmd->seq_start_offset = cmd->write_data_done;
-		cmd->seq_end_offset = (cmd->write_data_done +
-			((cmd->se_cmd.data_length >
-			  conn->sess->sess_ops->FirstBurstLength) ?
-			 conn->sess->sess_ops->FirstBurstLength : cmd->se_cmd.data_length));
+		cmd->seq_end_offset = min(cmd->se_cmd.data_length,
+					conn->sess->sess_ops->FirstBurstLength);
 		return;
 	}
 

From 6cb3216a7863fd13fb125f0867fce55c2bbd8c8e Mon Sep 17 00:00:00 2001
From: Quinn Tran <quinn.tran@cavium.com>
Date: Mon, 13 Feb 2017 12:18:29 -0800
Subject: [PATCH 0109/1911] qla2xxx: Fix a warning reported by the "smatch"
 static checker

Fix the following warning reported by the "smatch" static checker:

drivers/scsi/qla2xxx/qla_init.c:3910 qla2x00_alloc_fcport()
warn: use 'flags' here instead of GFP_XXX?

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Quinn Tran <quinn.tran@cavium.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Cc: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/qla2xxx/qla_init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index f65431480833be..ac579fc5f20231 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -3909,7 +3909,7 @@ qla2x00_alloc_fcport(scsi_qla_host_t *vha, gfp_t flags)
 
 	fcport->ct_desc.ct_sns = dma_alloc_coherent(&vha->hw->pdev->dev,
 		sizeof(struct ct_sns_pkt), &fcport->ct_desc.ct_sns_dma,
-			GFP_ATOMIC);
+		flags);
 	fcport->disc_state = DSC_DELETED;
 	fcport->fw_login_state = DSC_LS_PORT_UNAVAIL;
 	fcport->deleted = QLA_SESS_DELETED;

From 0ab8ac6f504d61b59856bd34e62bbfd60edd2bb0 Mon Sep 17 00:00:00 2001
From: Mike Christie <mchristi@redhat.com>
Date: Fri, 13 Jan 2017 04:47:48 -0600
Subject: [PATCH 0110/1911] target: export protocol identifier

I think the transport statistics device file was supposed
to show scsiTransportProtocolType. It instead shows the
fabric name which is normally closer to the driver name.

I was thinking I cannot change from fabric name to protocol
type name incase people are expecting the driver name, so
this patch adds another file proto_id that exports the SCSI
protocol identifier ID.

Signed-off-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_stat.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/drivers/target/target_core_stat.c b/drivers/target/target_core_stat.c
index 1a39033d2bffaa..be380e4acfcdbb 100644
--- a/drivers/target/target_core_stat.c
+++ b/drivers/target/target_core_stat.c
@@ -795,16 +795,34 @@ static ssize_t target_stat_transport_dev_name_show(struct config_item *item,
 	return ret;
 }
 
+static ssize_t target_stat_transport_proto_id_show(struct config_item *item,
+		char *page)
+{
+	struct se_lun *lun = to_transport_stat(item);
+	struct se_device *dev;
+	struct se_portal_group *tpg = lun->lun_tpg;
+	ssize_t ret = -ENODEV;
+
+	rcu_read_lock();
+	dev = rcu_dereference(lun->lun_se_dev);
+	if (dev)
+		ret = snprintf(page, PAGE_SIZE, "%u\n", tpg->proto_id);
+	rcu_read_unlock();
+	return ret;
+}
+
 CONFIGFS_ATTR_RO(target_stat_transport_, inst);
 CONFIGFS_ATTR_RO(target_stat_transport_, device);
 CONFIGFS_ATTR_RO(target_stat_transport_, indx);
 CONFIGFS_ATTR_RO(target_stat_transport_, dev_name);
+CONFIGFS_ATTR_RO(target_stat_transport_, proto_id);
 
 static struct configfs_attribute *target_stat_scsi_transport_attrs[] = {
 	&target_stat_transport_attr_inst,
 	&target_stat_transport_attr_device,
 	&target_stat_transport_attr_indx,
 	&target_stat_transport_attr_dev_name,
+	&target_stat_transport_attr_proto_id,
 	NULL,
 };
 

From 762b6f00a995863afa274d6b5ffa3880dac1714b Mon Sep 17 00:00:00 2001
From: "Dmitry V. Levin" <ldv@altlinux.org>
Date: Wed, 15 Feb 2017 23:04:54 +0300
Subject: [PATCH 0111/1911] uapi: fix linux/target_core_user.h userspace
 compilation errors

Consistently use types from linux/types.h to fix the following
linux/target_core_user.h userspace compilation errors:

/usr/include/linux/target_core_user.h:108:4: error: unknown type name 'uint32_t'
    uint32_t iov_cnt;
/usr/include/linux/target_core_user.h:109:4: error: unknown type name 'uint32_t'
    uint32_t iov_bidi_cnt;
/usr/include/linux/target_core_user.h:110:4: error: unknown type name 'uint32_t'
    uint32_t iov_dif_cnt;
/usr/include/linux/target_core_user.h:111:4: error: unknown type name 'uint64_t'
    uint64_t cdb_off;
/usr/include/linux/target_core_user.h:112:4: error: unknown type name 'uint64_t'
    uint64_t __pad1;
/usr/include/linux/target_core_user.h:113:4: error: unknown type name 'uint64_t'
    uint64_t __pad2;
/usr/include/linux/target_core_user.h:117:4: error: unknown type name 'uint8_t'
    uint8_t scsi_status;
/usr/include/linux/target_core_user.h:118:4: error: unknown type name 'uint8_t'
    uint8_t __pad1;
/usr/include/linux/target_core_user.h:119:4: error: unknown type name 'uint16_t'
    uint16_t __pad2;
/usr/include/linux/target_core_user.h:120:4: error: unknown type name 'uint32_t'
    uint32_t __pad3;

Signed-off-by: Dmitry V. Levin <ldv@altlinux.org>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 include/uapi/linux/target_core_user.h | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/include/uapi/linux/target_core_user.h b/include/uapi/linux/target_core_user.h
index c506cddb8165cf..af17b4154ef607 100644
--- a/include/uapi/linux/target_core_user.h
+++ b/include/uapi/linux/target_core_user.h
@@ -105,26 +105,26 @@ struct tcmu_cmd_entry {
 
 	union {
 		struct {
-			uint32_t iov_cnt;
-			uint32_t iov_bidi_cnt;
-			uint32_t iov_dif_cnt;
-			uint64_t cdb_off;
-			uint64_t __pad1;
-			uint64_t __pad2;
+			__u32 iov_cnt;
+			__u32 iov_bidi_cnt;
+			__u32 iov_dif_cnt;
+			__u64 cdb_off;
+			__u64 __pad1;
+			__u64 __pad2;
 			struct iovec iov[0];
 		} req;
 		struct {
-			uint8_t scsi_status;
-			uint8_t __pad1;
-			uint16_t __pad2;
-			uint32_t __pad3;
+			__u8 scsi_status;
+			__u8 __pad1;
+			__u16 __pad2;
+			__u32 __pad3;
 			char sense_buffer[TCMU_SENSE_BUFFERSIZE];
 		} rsp;
 	};
 
 } __packed;
 
-#define TCMU_OP_ALIGN_SIZE sizeof(uint64_t)
+#define TCMU_OP_ALIGN_SIZE sizeof(__u64)
 
 enum tcmu_genl_cmd {
 	TCMU_CMD_UNSPEC,

From 7687a7a4435f4b32d8e775a7b6b25aea3a25d25a Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Mon, 20 Feb 2017 16:51:23 +0100
Subject: [PATCH 0112/1911] vfs: extract common parts of
 {compat_,}do_readv_writev()

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/read_write.c | 80 ++++++++++++++++++-------------------------------
 1 file changed, 29 insertions(+), 51 deletions(-)

diff --git a/fs/read_write.c b/fs/read_write.c
index 820a3f06c46a1a..12a216021f8734 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -834,25 +834,15 @@ ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
 	return ret;
 }
 
-static ssize_t do_readv_writev(int type, struct file *file,
-			       const struct iovec __user * uvector,
-			       unsigned long nr_segs, loff_t *pos,
-			       int flags)
+static ssize_t __do_readv_writev(int type, struct file *file,
+				 struct iov_iter *iter, loff_t *pos, int flags)
 {
 	size_t tot_len;
-	struct iovec iovstack[UIO_FASTIOV];
-	struct iovec *iov = iovstack;
-	struct iov_iter iter;
-	ssize_t ret;
+	ssize_t ret = 0;
 	io_fn_t fn;
 	iter_fn_t iter_fn;
 
-	ret = import_iovec(type, uvector, nr_segs,
-			   ARRAY_SIZE(iovstack), &iov, &iter);
-	if (ret < 0)
-		return ret;
-
-	tot_len = iov_iter_count(&iter);
+	tot_len = iov_iter_count(iter);
 	if (!tot_len)
 		goto out;
 	ret = rw_verify_area(type, file, pos, tot_len);
@@ -869,15 +859,14 @@ static ssize_t do_readv_writev(int type, struct file *file,
 	}
 
 	if (iter_fn)
-		ret = do_iter_readv_writev(file, &iter, pos, iter_fn, flags);
+		ret = do_iter_readv_writev(file, iter, pos, iter_fn, flags);
 	else
-		ret = do_loop_readv_writev(file, &iter, pos, fn, flags);
+		ret = do_loop_readv_writev(file, iter, pos, fn, flags);
 
 	if (type != READ)
 		file_end_write(file);
 
 out:
-	kfree(iov);
 	if ((ret + (type == READ)) > 0) {
 		if (type == READ)
 			fsnotify_access(file);
@@ -887,6 +876,27 @@ static ssize_t do_readv_writev(int type, struct file *file,
 	return ret;
 }
 
+static ssize_t do_readv_writev(int type, struct file *file,
+			       const struct iovec __user *uvector,
+			       unsigned long nr_segs, loff_t *pos,
+			       int flags)
+{
+	struct iovec iovstack[UIO_FASTIOV];
+	struct iovec *iov = iovstack;
+	struct iov_iter iter;
+	ssize_t ret;
+
+	ret = import_iovec(type, uvector, nr_segs,
+			   ARRAY_SIZE(iovstack), &iov, &iter);
+	if (ret < 0)
+		return ret;
+
+	ret = __do_readv_writev(type, file, &iter, pos, flags);
+	kfree(iov);
+
+	return ret;
+}
+
 ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
 		  unsigned long vlen, loff_t *pos, int flags)
 {
@@ -1064,51 +1074,19 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
 			       unsigned long nr_segs, loff_t *pos,
 			       int flags)
 {
-	compat_ssize_t tot_len;
 	struct iovec iovstack[UIO_FASTIOV];
 	struct iovec *iov = iovstack;
 	struct iov_iter iter;
 	ssize_t ret;
-	io_fn_t fn;
-	iter_fn_t iter_fn;
 
 	ret = compat_import_iovec(type, uvector, nr_segs,
 				  UIO_FASTIOV, &iov, &iter);
 	if (ret < 0)
 		return ret;
 
-	tot_len = iov_iter_count(&iter);
-	if (!tot_len)
-		goto out;
-	ret = rw_verify_area(type, file, pos, tot_len);
-	if (ret < 0)
-		goto out;
-
-	if (type == READ) {
-		fn = file->f_op->read;
-		iter_fn = file->f_op->read_iter;
-	} else {
-		fn = (io_fn_t)file->f_op->write;
-		iter_fn = file->f_op->write_iter;
-		file_start_write(file);
-	}
-
-	if (iter_fn)
-		ret = do_iter_readv_writev(file, &iter, pos, iter_fn, flags);
-	else
-		ret = do_loop_readv_writev(file, &iter, pos, fn, flags);
-
-	if (type != READ)
-		file_end_write(file);
-
-out:
+	ret = __do_readv_writev(type, file, &iter, pos, flags);
 	kfree(iov);
-	if ((ret + (type == READ)) > 0) {
-		if (type == READ)
-			fsnotify_access(file);
-		else
-			fsnotify_modify(file);
-	}
+
 	return ret;
 }
 

From 0f78d06ac1e9b470cbd8f913ee1688c8b2c8feb3 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Mon, 20 Feb 2017 16:51:23 +0100
Subject: [PATCH 0113/1911] vfs: pass type instead of fn to
 do_{loop,iter}_readv_writev()

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/read_write.c | 37 ++++++++++++++++++-------------------
 1 file changed, 18 insertions(+), 19 deletions(-)

diff --git a/fs/read_write.c b/fs/read_write.c
index 12a216021f8734..198614f757fa46 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -23,9 +23,6 @@
 #include <linux/uaccess.h>
 #include <asm/unistd.h>
 
-typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *);
-typedef ssize_t (*iter_fn_t)(struct kiocb *, struct iov_iter *);
-
 const struct file_operations generic_ro_fops = {
 	.llseek		= generic_file_llseek,
 	.read_iter	= generic_file_read_iter,
@@ -675,7 +672,7 @@ unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to)
 EXPORT_SYMBOL(iov_shorten);
 
 static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
-		loff_t *ppos, iter_fn_t fn, int flags)
+		loff_t *ppos, int type, int flags)
 {
 	struct kiocb kiocb;
 	ssize_t ret;
@@ -692,7 +689,10 @@ static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
 		kiocb.ki_flags |= (IOCB_DSYNC | IOCB_SYNC);
 	kiocb.ki_pos = *ppos;
 
-	ret = fn(&kiocb, iter);
+	if (type == READ)
+		ret = filp->f_op->read_iter(&kiocb, iter);
+	else
+		ret = filp->f_op->write_iter(&kiocb, iter);
 	BUG_ON(ret == -EIOCBQUEUED);
 	*ppos = kiocb.ki_pos;
 	return ret;
@@ -700,7 +700,7 @@ static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
 
 /* Do it by hand, with file-ops */
 static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
-		loff_t *ppos, io_fn_t fn, int flags)
+		loff_t *ppos, int type, int flags)
 {
 	ssize_t ret = 0;
 
@@ -711,7 +711,13 @@ static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
 		struct iovec iovec = iov_iter_iovec(iter);
 		ssize_t nr;
 
-		nr = fn(filp, iovec.iov_base, iovec.iov_len, ppos);
+		if (type == READ) {
+			nr = filp->f_op->read(filp, iovec.iov_base,
+					      iovec.iov_len, ppos);
+		} else {
+			nr = filp->f_op->write(filp, iovec.iov_base,
+					       iovec.iov_len, ppos);
+		}
 
 		if (nr < 0) {
 			if (!ret)
@@ -839,8 +845,6 @@ static ssize_t __do_readv_writev(int type, struct file *file,
 {
 	size_t tot_len;
 	ssize_t ret = 0;
-	io_fn_t fn;
-	iter_fn_t iter_fn;
 
 	tot_len = iov_iter_count(iter);
 	if (!tot_len)
@@ -849,19 +853,14 @@ static ssize_t __do_readv_writev(int type, struct file *file,
 	if (ret < 0)
 		goto out;
 
-	if (type == READ) {
-		fn = file->f_op->read;
-		iter_fn = file->f_op->read_iter;
-	} else {
-		fn = (io_fn_t)file->f_op->write;
-		iter_fn = file->f_op->write_iter;
+	if (type != READ)
 		file_start_write(file);
-	}
 
-	if (iter_fn)
-		ret = do_iter_readv_writev(file, iter, pos, iter_fn, flags);
+	if ((type == READ && file->f_op->read_iter) ||
+	    (type == WRITE && file->f_op->write_iter))
+		ret = do_iter_readv_writev(file, iter, pos, type, flags);
 	else
-		ret = do_loop_readv_writev(file, iter, pos, fn, flags);
+		ret = do_loop_readv_writev(file, iter, pos, type, flags);
 
 	if (type != READ)
 		file_end_write(file);

From bb7462b6fd64e40809a857223bf7f0e628969f87 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Mon, 20 Feb 2017 16:51:23 +0100
Subject: [PATCH 0114/1911] vfs: use helpers for calling
 f_op->{read,write}_iter()

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 drivers/block/loop.c |  4 ++--
 fs/aio.c             |  4 ++--
 fs/read_write.c      | 12 ++++++------
 fs/splice.c          |  2 +-
 include/linux/fs.h   | 12 ++++++++++++
 5 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index f347285c67ec11..2cf2903a071504 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -501,9 +501,9 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
 	cmd->iocb.ki_flags = IOCB_DIRECT;
 
 	if (rw == WRITE)
-		ret = file->f_op->write_iter(&cmd->iocb, &iter);
+		ret = call_write_iter(file, &cmd->iocb, &iter);
 	else
-		ret = file->f_op->read_iter(&cmd->iocb, &iter);
+		ret = call_read_iter(file, &cmd->iocb, &iter);
 
 	if (ret != -EIOCBQUEUED)
 		cmd->iocb.ki_complete(&cmd->iocb, ret, 0);
diff --git a/fs/aio.c b/fs/aio.c
index 4ab67e8cb776ac..9f4a9a2e7ae4c3 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1494,7 +1494,7 @@ static ssize_t aio_read(struct kiocb *req, struct iocb *iocb, bool vectored,
 		return ret;
 	ret = rw_verify_area(READ, file, &req->ki_pos, iov_iter_count(&iter));
 	if (!ret)
-		ret = aio_ret(req, file->f_op->read_iter(req, &iter));
+		ret = aio_ret(req, call_read_iter(file, req, &iter));
 	kfree(iovec);
 	return ret;
 }
@@ -1519,7 +1519,7 @@ static ssize_t aio_write(struct kiocb *req, struct iocb *iocb, bool vectored,
 	if (!ret) {
 		req->ki_flags |= IOCB_WRITE;
 		file_start_write(file);
-		ret = aio_ret(req, file->f_op->write_iter(req, &iter));
+		ret = aio_ret(req, call_write_iter(file, req, &iter));
 		/*
 		 * We release freeze protection in aio_complete().  Fool lockdep
 		 * by telling it the lock got released so that it doesn't
diff --git a/fs/read_write.c b/fs/read_write.c
index 198614f757fa46..f2ed9fdc98fdea 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -367,7 +367,7 @@ ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos)
 	kiocb.ki_pos = *ppos;
 
 	iter->type |= READ;
-	ret = file->f_op->read_iter(&kiocb, iter);
+	ret = call_read_iter(file, &kiocb, iter);
 	BUG_ON(ret == -EIOCBQUEUED);
 	if (ret > 0)
 		*ppos = kiocb.ki_pos;
@@ -387,7 +387,7 @@ ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos)
 	kiocb.ki_pos = *ppos;
 
 	iter->type |= WRITE;
-	ret = file->f_op->write_iter(&kiocb, iter);
+	ret = call_write_iter(file, &kiocb, iter);
 	BUG_ON(ret == -EIOCBQUEUED);
 	if (ret > 0)
 		*ppos = kiocb.ki_pos;
@@ -436,7 +436,7 @@ static ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, lo
 	kiocb.ki_pos = *ppos;
 	iov_iter_init(&iter, READ, &iov, 1, len);
 
-	ret = filp->f_op->read_iter(&kiocb, &iter);
+	ret = call_read_iter(filp, &kiocb, &iter);
 	BUG_ON(ret == -EIOCBQUEUED);
 	*ppos = kiocb.ki_pos;
 	return ret;
@@ -493,7 +493,7 @@ static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t
 	kiocb.ki_pos = *ppos;
 	iov_iter_init(&iter, WRITE, &iov, 1, len);
 
-	ret = filp->f_op->write_iter(&kiocb, &iter);
+	ret = call_write_iter(filp, &kiocb, &iter);
 	BUG_ON(ret == -EIOCBQUEUED);
 	if (ret > 0)
 		*ppos = kiocb.ki_pos;
@@ -690,9 +690,9 @@ static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
 	kiocb.ki_pos = *ppos;
 
 	if (type == READ)
-		ret = filp->f_op->read_iter(&kiocb, iter);
+		ret = call_read_iter(filp, &kiocb, iter);
 	else
-		ret = filp->f_op->write_iter(&kiocb, iter);
+		ret = call_write_iter(filp, &kiocb, iter);
 	BUG_ON(ret == -EIOCBQUEUED);
 	*ppos = kiocb.ki_pos;
 	return ret;
diff --git a/fs/splice.c b/fs/splice.c
index 873d83104e79ae..6518f058bd7f3a 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -306,7 +306,7 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
 	idx = to.idx;
 	init_sync_kiocb(&kiocb, in);
 	kiocb.ki_pos = *ppos;
-	ret = in->f_op->read_iter(&kiocb, &to);
+	ret = call_read_iter(in, &kiocb, &to);
 	if (ret > 0) {
 		*ppos = kiocb.ki_pos;
 		file_accessed(in);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 78c81e6f5d76f7..a3145cdff8f261 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1715,6 +1715,18 @@ struct inode_operations {
 	int (*set_acl)(struct inode *, struct posix_acl *, int);
 } ____cacheline_aligned;
 
+static inline ssize_t call_read_iter(struct file *file, struct kiocb *kio,
+				     struct iov_iter *iter)
+{
+	return file->f_op->read_iter(kio, iter);
+}
+
+static inline ssize_t call_write_iter(struct file *file, struct kiocb *kio,
+				      struct iov_iter *iter)
+{
+	return file->f_op->write_iter(kio, iter);
+}
+
 ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
 			      unsigned long nr_segs, unsigned long fast_segs,
 			      struct iovec *fast_pointer,

From f74ac01520c9f6d89bbc3c6931a72f757b742f86 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Mon, 20 Feb 2017 16:51:23 +0100
Subject: [PATCH 0115/1911] mm: use helper for calling f_op->mmap()

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 drivers/gpu/drm/i915/i915_gem_dmabuf.c | 2 +-
 drivers/gpu/drm/vgem/vgem_drv.c        | 2 +-
 fs/coda/file.c                         | 2 +-
 include/linux/fs.h                     | 5 +++++
 ipc/shm.c                              | 2 +-
 mm/mmap.c                              | 2 +-
 mm/nommu.c                             | 4 ++--
 7 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
index 5e38299b5df653..84fc9f00157600 100644
--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
@@ -141,7 +141,7 @@ static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *
 	if (!obj->base.filp)
 		return -ENODEV;
 
-	ret = obj->base.filp->f_op->mmap(obj->base.filp, vma);
+	ret = call_mmap(obj->base.filp, vma);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c
index 477e07f0ecb674..9f7e222b3e89ce 100644
--- a/drivers/gpu/drm/vgem/vgem_drv.c
+++ b/drivers/gpu/drm/vgem/vgem_drv.c
@@ -287,7 +287,7 @@ static int vgem_prime_mmap(struct drm_gem_object *obj,
 	if (!obj->filp)
 		return -ENODEV;
 
-	ret = obj->filp->f_op->mmap(obj->filp, vma);
+	ret = call_mmap(obj->filp, vma);
 	if (ret)
 		return ret;
 
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 6e0154eb6fcc1c..9d956cd6d46f93 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -96,7 +96,7 @@ coda_file_mmap(struct file *coda_file, struct vm_area_struct *vma)
 	cfi->cfi_mapcount++;
 	spin_unlock(&cii->c_lock);
 
-	return host_file->f_op->mmap(host_file, vma);
+	return call_mmap(host_file, vma);
 }
 
 int coda_open(struct inode *coda_inode, struct file *coda_file)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index a3145cdff8f261..93691b59e476ac 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1727,6 +1727,11 @@ static inline ssize_t call_write_iter(struct file *file, struct kiocb *kio,
 	return file->f_op->write_iter(kio, iter);
 }
 
+static inline int call_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	return file->f_op->mmap(file, vma);
+}
+
 ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
 			      unsigned long nr_segs, unsigned long fast_segs,
 			      struct iovec *fast_pointer,
diff --git a/ipc/shm.c b/ipc/shm.c
index 81203e8ba01359..4329fe3ef5948a 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -423,7 +423,7 @@ static int shm_mmap(struct file *file, struct vm_area_struct *vma)
 	if (ret)
 		return ret;
 
-	ret = sfd->file->f_op->mmap(sfd->file, vma);
+	ret = call_mmap(sfd->file, vma);
 	if (ret) {
 		shm_close(vma);
 		return ret;
diff --git a/mm/mmap.c b/mm/mmap.c
index dc4291dcc99b8f..3714aa4e6f8160 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1668,7 +1668,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
 		 * new file must not have been exposed to user-space, yet.
 		 */
 		vma->vm_file = get_file(file);
-		error = file->f_op->mmap(file, vma);
+		error = call_mmap(file, vma);
 		if (error)
 			goto unmap_and_free_vma;
 
diff --git a/mm/nommu.c b/mm/nommu.c
index 24f9f5f3914592..e366354f777de1 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1084,7 +1084,7 @@ static int do_mmap_shared_file(struct vm_area_struct *vma)
 {
 	int ret;
 
-	ret = vma->vm_file->f_op->mmap(vma->vm_file, vma);
+	ret = call_mmap(vma->vm_file, vma);
 	if (ret == 0) {
 		vma->vm_region->vm_top = vma->vm_region->vm_end;
 		return 0;
@@ -1115,7 +1115,7 @@ static int do_mmap_private(struct vm_area_struct *vma,
 	 * - VM_MAYSHARE will be set if it may attempt to share
 	 */
 	if (capabilities & NOMMU_MAP_DIRECT) {
-		ret = vma->vm_file->f_op->mmap(vma->vm_file, vma);
+		ret = call_mmap(vma->vm_file, vma);
 		if (ret == 0) {
 			/* shouldn't return success if we're not sharing */
 			BUG_ON(!(vma->vm_flags & VM_MAYSHARE));

From 0eb8af4916a540c362a2950e5ab54eca32eb7d58 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Mon, 20 Feb 2017 16:51:23 +0100
Subject: [PATCH 0116/1911] vfs: use helper for calling f_op->fsync()

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/sync.c          | 2 +-
 include/linux/fs.h | 6 ++++++
 ipc/shm.c          | 2 +-
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/fs/sync.c b/fs/sync.c
index 2a54c1f2203595..11ba023434b14b 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -192,7 +192,7 @@ int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync)
 		spin_unlock(&inode->i_lock);
 		mark_inode_dirty_sync(inode);
 	}
-	return file->f_op->fsync(file, start, end, datasync);
+	return call_fsync(file, start, end, datasync);
 }
 EXPORT_SYMBOL(vfs_fsync_range);
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 93691b59e476ac..72a33007ec24ba 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1732,6 +1732,12 @@ static inline int call_mmap(struct file *file, struct vm_area_struct *vma)
 	return file->f_op->mmap(file, vma);
 }
 
+static inline int call_fsync(struct file *file, loff_t start, loff_t end,
+			     int datasync)
+{
+	return file->f_op->fsync(file, start, end, datasync);
+}
+
 ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
 			      unsigned long nr_segs, unsigned long fast_segs,
 			      struct iovec *fast_pointer,
diff --git a/ipc/shm.c b/ipc/shm.c
index 4329fe3ef5948a..258aff2e03bbc6 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -452,7 +452,7 @@ static int shm_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 
 	if (!sfd->file->f_op->fsync)
 		return -EINVAL;
-	return sfd->file->f_op->fsync(sfd->file, start, end, datasync);
+	return call_fsync(sfd->file, start, end, datasync);
 }
 
 static long shm_fallocate(struct file *file, int mode, loff_t offset,

From e3b88ee95b4e4bf3e9729a4695d695b9c7c296c8 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Tue, 14 Feb 2017 16:25:45 -0800
Subject: [PATCH 0117/1911] target: Fix handling of aborted failed commands

If a target driver (e.g. tcm_qla2xxx) calls
transport_generic_request_failure() to report that receiving data
has failed and that SCSI command has already been aborted by the
initiator, ensure that the SCSI status ABORTED is sent back to the
initiator instead of the sense code provided by the target driver.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: David Disseldorp <ddiss@suse.de>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_transport.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 22190003534dbc..efb9e6f382019e 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -1647,6 +1647,9 @@ void transport_generic_request_failure(struct se_cmd *cmd,
 {
 	int ret = 0, post_ret = 0;
 
+	if (transport_check_aborted_status(cmd, 1))
+		return;
+
 	pr_debug("-----[ Storage Engine Exception for cmd: %p ITT: 0x%08llx"
 		" CDB: 0x%02x\n", cmd, cmd->tag, cmd->t_task_cdb[0]);
 	pr_debug("-----[ i_state: %d t_state: %d sense_reason: %d\n",

From 51ec502a32665fed66c7f03799ede4023b212536 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Tue, 14 Feb 2017 16:25:54 -0800
Subject: [PATCH 0118/1911] target: Delete tmr from list before processing

This patch does an explicit list_del_init(tmr->tmr_list) in
core_tmr_drain_tmr_list() before starting processing of
outstanding TMRs to abort, instead of explicitly checking
which TMR descriptor matches the caller.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: David Disseldorp <ddiss@suse.de>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_tmr.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c
index 311dc3c2f1dc5c..a806d9bca3d2aa 100644
--- a/drivers/target/target_core_tmr.c
+++ b/drivers/target/target_core_tmr.c
@@ -215,13 +215,8 @@ static void core_tmr_drain_tmr_list(
 	 * LUN_RESET tmr..
 	 */
 	spin_lock_irqsave(&dev->se_tmr_lock, flags);
+	list_del_init(&tmr->tmr_list);
 	list_for_each_entry_safe(tmr_p, tmr_pp, &dev->dev_tmr_list, tmr_list) {
-		/*
-		 * Allow the received TMR to return with FUNCTION_COMPLETE.
-		 */
-		if (tmr_p == tmr)
-			continue;
-
 		cmd = tmr_p->task_cmd;
 		if (!cmd) {
 			pr_err("Unable to locate struct se_cmd for TMR\n");

From 7e6a69ee954caf8dd8c98f70c8ee894c8ebfdcf0 Mon Sep 17 00:00:00 2001
From: Mike Leach <mike.leach@linaro.org>
Date: Fri, 17 Feb 2017 19:13:10 +0000
Subject: [PATCH 0119/1911] arm64: dts: juno: update definition for
 programmable replicator

Juno platforms have a programmable replicator splitting the trace output
to TPIU and ETR. Currently this is not being programmed as it is being
treated as a none-programmable replicator - which is the default
operational mode for these devices. The TPIU in the system is enabled by
default, and this combination is causing back-pressure in the trace
system resulting in overflows at the source.

Replaces the existing definition with one that defines the programmable
replicator, using the "qcom,coresight-replicator1x" driver that provides
the correct functionality for CoreSight programmable replicators.

Reviewed-and-Tested-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Signed-off-by: Mike Leach <mike.leach@linaro.org>
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
---
 arch/arm64/boot/dts/arm/juno-base.dtsi | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/boot/dts/arm/juno-base.dtsi b/arch/arm64/boot/dts/arm/juno-base.dtsi
index 9d799d938d2f6b..df539e865b903c 100644
--- a/arch/arm64/boot/dts/arm/juno-base.dtsi
+++ b/arch/arm64/boot/dts/arm/juno-base.dtsi
@@ -372,12 +372,13 @@
 		};
 	};
 
-	coresight-replicator {
-		/*
-		 * Non-configurable replicators don't show up on the
-		 * AMBA bus.  As such no need to add "arm,primecell".
-		 */
-		compatible = "arm,coresight-replicator";
+	replicator@20120000 {
+		compatible = "qcom,coresight-replicator1x", "arm,primecell";
+		reg = <0 0x20120000 0 0x1000>;
+
+		clocks = <&soc_smc50mhz>;
+		clock-names = "apb_pclk";
+		power-domains = <&scpi_devpd 0>;
 
 		ports {
 			#address-cells = <1>;

From 248e23b50e2da0753f3b5faa068939cbe9f8a75a Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Sun, 12 Feb 2017 11:26:33 +0100
Subject: [PATCH 0120/1911] batman-adv: Fix double free during fragment merge
 error

The function batadv_frag_skb_buffer was supposed not to consume the skbuff
on errors. This was followed in the helper function
batadv_frag_insert_packet when the skb would potentially be inserted in the
fragment queue. But it could happen that the next helper function
batadv_frag_merge_packets would try to merge the fragments and fail. This
results in a kfree_skb of all the enqueued fragments (including the just
inserted one). batadv_recv_frag_packet would detect the error in
batadv_frag_skb_buffer and try to free the skb again.

The behavior of batadv_frag_skb_buffer (and its helper
batadv_frag_insert_packet) must therefore be changed to always consume the
skbuff to have a common behavior and avoid the double kfree_skb.

Fixes: 610bfc6bc99b ("batman-adv: Receive fragmented packets and merge")
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/fragmentation.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index 0854ebd8613e9b..31e97e9aee0d54 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -239,8 +239,10 @@ static bool batadv_frag_insert_packet(struct batadv_orig_node *orig_node,
 	spin_unlock_bh(&chain->lock);
 
 err:
-	if (!ret)
+	if (!ret) {
 		kfree(frag_entry_new);
+		kfree_skb(skb);
+	}
 
 	return ret;
 }
@@ -313,7 +315,7 @@ batadv_frag_merge_packets(struct hlist_head *chain)
  *
  * There are three possible outcomes: 1) Packet is merged: Return true and
  * set *skb to merged packet; 2) Packet is buffered: Return true and set *skb
- * to NULL; 3) Error: Return false and leave skb as is.
+ * to NULL; 3) Error: Return false and free skb.
  *
  * Return: true when packet is merged or buffered, false when skb is not not
  * used.
@@ -338,9 +340,9 @@ bool batadv_frag_skb_buffer(struct sk_buff **skb,
 		goto out_err;
 
 out:
-	*skb = skb_out;
 	ret = true;
 out_err:
+	*skb = skb_out;
 	return ret;
 }
 

From 51c6b429c0c95e67edd1cb0b548c5cf6a6604763 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Linus=20L=C3=BCssing?= <linus.luessing@c0d3.blue>
Date: Mon, 13 Feb 2017 20:44:31 +0100
Subject: [PATCH 0121/1911] batman-adv: Fix transmission of final, 16th
 fragment
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Trying to split and transmit a unicast packet in 16 parts will fail for
the final fragment: After having sent the 15th one with a frag_packet.no
index of 14, we will increase the the index to 15 - and return with an
error code immediately, even though one more fragment is due for
transmission and allowed.

Fixing this issue by moving the check before incrementing the index.

While at it, adding an unlikely(), because the check is actually more of
an assertion.

Fixes: ee75ed88879a ("batman-adv: Fragment and send skbs larger than mtu")
Signed-off-by: Linus Lüssing <linus.luessing@c0d3.blue>
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/fragmentation.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index 31e97e9aee0d54..11149e5be4e0ef 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -501,6 +501,12 @@ int batadv_frag_send_packet(struct sk_buff *skb,
 
 	/* Eat and send fragments from the tail of skb */
 	while (skb->len > max_fragment_size) {
+		/* The initial check in this function should cover this case */
+		if (unlikely(frag_header.no == BATADV_FRAG_MAX_FRAGMENTS - 1)) {
+			ret = -EINVAL;
+			goto put_primary_if;
+		}
+
 		skb_fragment = batadv_frag_create(skb, &frag_header, mtu);
 		if (!skb_fragment) {
 			ret = -ENOMEM;
@@ -517,12 +523,6 @@ int batadv_frag_send_packet(struct sk_buff *skb,
 		}
 
 		frag_header.no++;
-
-		/* The initial check in this function should cover this case */
-		if (frag_header.no == BATADV_FRAG_MAX_FRAGMENTS - 1) {
-			ret = -EINVAL;
-			goto put_primary_if;
-		}
 	}
 
 	/* Make room for the fragment header. */

From 72cedf599fcebfd6cd2550274d7855838068d28c Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 21 Feb 2017 23:00:46 +0100
Subject: [PATCH 0122/1911] ASoC: mediatek: add I2C dependency for CS42XX8

We should not select drivers that depend on I2C when that is disabled,
as it results in a build error:

warning: (SND_SOC_MT2701_CS42448) selects SND_SOC_CS42XX8_I2C which has unmet direct dependencies (SOUND && !M68K && !UML && SND && SND_SOC && I2C)
sound/soc/codecs/cs42xx8-i2c.c:60:1: warning: data definition has no type or storage class
 module_i2c_driver(cs42xx8_i2c_driver);
sound/soc/codecs/cs42xx8-i2c.c:60:1: error: type defaults to 'int' in declaration of 'module_i2c_driver' [-Werror=implicit-int]

Fixes: 1f458d53f76c ("ASoC: mediatek: Add mt2701-cs42448 driver and config option.")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/mediatek/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/soc/mediatek/Kconfig b/sound/soc/mediatek/Kconfig
index 05cf809cf9e146..d7013bde6f45fc 100644
--- a/sound/soc/mediatek/Kconfig
+++ b/sound/soc/mediatek/Kconfig
@@ -13,7 +13,7 @@ config SND_SOC_MT2701
 
 config SND_SOC_MT2701_CS42448
 	tristate "ASoc Audio driver for MT2701 with CS42448 codec"
-	depends on SND_SOC_MT2701
+	depends on SND_SOC_MT2701 && I2C
 	select SND_SOC_CS42XX8_I2C
 	select SND_SOC_BT_SCO
 	help

From c74fd80f2f41d05f350bb478151021f88551afe8 Mon Sep 17 00:00:00 2001
From: Dan Streetman <ddstreet@ieee.org>
Date: Fri, 13 Jan 2017 15:07:51 -0500
Subject: [PATCH 0123/1911] xen: do not re-use pirq number cached in pci device
 msi msg data

Revert the main part of commit:
af42b8d12f8a ("xen: fix MSI setup and teardown for PV on HVM guests")

That commit introduced reading the pci device's msi message data to see
if a pirq was previously configured for the device's msi/msix, and re-use
that pirq.  At the time, that was the correct behavior.  However, a
later change to Qemu caused it to call into the Xen hypervisor to unmap
all pirqs for a pci device, when the pci device disables its MSI/MSIX
vectors; specifically the Qemu commit:
c976437c7dba9c7444fb41df45468968aaa326ad
("qemu-xen: free all the pirqs for msi/msix when driver unload")

Once Qemu added this pirq unmapping, it was no longer correct for the
kernel to re-use the pirq number cached in the pci device msi message
data.  All Qemu releases since 2.1.0 contain the patch that unmaps the
pirqs when the pci device disables its MSI/MSIX vectors.

This bug is causing failures to initialize multiple NVMe controllers
under Xen, because the NVMe driver sets up a single MSIX vector for
each controller (concurrently), and then after using that to talk to
the controller for some configuration data, it disables the single MSIX
vector and re-configures all the MSIX vectors it needs.  So the MSIX
setup code tries to re-use the cached pirq from the first vector
for each controller, but the hypervisor has already given away that
pirq to another controller, and its initialization fails.

This is discussed in more detail at:
https://lists.xen.org/archives/html/xen-devel/2017-01/msg00447.html

Fixes: af42b8d12f8a ("xen: fix MSI setup and teardown for PV on HVM guests")
Signed-off-by: Dan Streetman <dan.streetman@canonical.com>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
Acked-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
 arch/x86/pci/xen.c | 23 +++++++----------------
 1 file changed, 7 insertions(+), 16 deletions(-)

diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index e1fb269c87af7b..292ab0364a89af 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -234,23 +234,14 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 		return 1;
 
 	for_each_pci_msi_entry(msidesc, dev) {
-		__pci_read_msi_msg(msidesc, &msg);
-		pirq = MSI_ADDR_EXT_DEST_ID(msg.address_hi) |
-			((msg.address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff);
-		if (msg.data != XEN_PIRQ_MSI_DATA ||
-		    xen_irq_from_pirq(pirq) < 0) {
-			pirq = xen_allocate_pirq_msi(dev, msidesc);
-			if (pirq < 0) {
-				irq = -ENODEV;
-				goto error;
-			}
-			xen_msi_compose_msg(dev, pirq, &msg);
-			__pci_write_msi_msg(msidesc, &msg);
-			dev_dbg(&dev->dev, "xen: msi bound to pirq=%d\n", pirq);
-		} else {
-			dev_dbg(&dev->dev,
-				"xen: msi already bound to pirq=%d\n", pirq);
+		pirq = xen_allocate_pirq_msi(dev, msidesc);
+		if (pirq < 0) {
+			irq = -ENODEV;
+			goto error;
 		}
+		xen_msi_compose_msg(dev, pirq, &msg);
+		__pci_write_msi_msg(msidesc, &msg);
+		dev_dbg(&dev->dev, "xen: msi bound to pirq=%d\n", pirq);
 		irq = xen_bind_pirq_msi_to_irq(dev, msidesc, pirq,
 					       (type == PCI_CAP_ID_MSI) ? nvec : 1,
 					       (type == PCI_CAP_ID_MSIX) ?

From 2e38bea99a80eab408adee27f873a188d57b76cb Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Wed, 22 Feb 2017 20:08:25 +0100
Subject: [PATCH 0124/1911] fuse: add missing FR_FORCE

fuse_file_put() was missing the "force" flag for the RELEASE request when
sending synchronously (fuseblk).

If this flag is not set, then a sync request may be interrupted before it
is dequeued by the userspace filesystem.  In this case the OPEN won't be
balanced with a RELEASE.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Fixes: 5a18ec176c93 ("fuse: fix hang of single threaded fuseblk filesystem")
Cc: <stable@vger.kernel.org> # v2.6.38+
---
 fs/fuse/file.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 2401c5dabb2a22..5ec5870e423ab5 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -100,6 +100,7 @@ static void fuse_file_put(struct fuse_file *ff, bool sync)
 			iput(req->misc.release.inode);
 			fuse_put_request(ff->fc, req);
 		} else if (sync) {
+			__set_bit(FR_FORCE, &req->flags);
 			__clear_bit(FR_BACKGROUND, &req->flags);
 			fuse_request_send(ff->fc, req);
 			iput(req->misc.release.inode);

From 267d84449f52349ee252db684ed95ede18e51744 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Wed, 22 Feb 2017 20:08:25 +0100
Subject: [PATCH 0125/1911] fuse: cleanup fuse_file refcounting

struct fuse_file is stored in file->private_data.  Make this always be a
counting reference for consistency.

This also allows fuse_sync_release() to call fuse_file_put() instead of
partially duplicating its functionality.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/fuse/dir.c    |  2 +-
 fs/fuse/file.c   | 18 +++++++++---------
 fs/fuse/fuse_i.h |  1 -
 3 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 811fd8929a18c1..e816166ce42fa9 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -473,7 +473,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
 	if (err) {
 		fuse_sync_release(ff, flags);
 	} else {
-		file->private_data = fuse_file_get(ff);
+		file->private_data = ff;
 		fuse_finish_open(inode, file);
 	}
 	return err;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 5ec5870e423ab5..a5f79c59fe1e51 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -58,7 +58,7 @@ struct fuse_file *fuse_file_alloc(struct fuse_conn *fc)
 	}
 
 	INIT_LIST_HEAD(&ff->write_entry);
-	atomic_set(&ff->count, 0);
+	atomic_set(&ff->count, 1);
 	RB_CLEAR_NODE(&ff->polled_node);
 	init_waitqueue_head(&ff->poll_wait);
 
@@ -75,7 +75,7 @@ void fuse_file_free(struct fuse_file *ff)
 	kfree(ff);
 }
 
-struct fuse_file *fuse_file_get(struct fuse_file *ff)
+static struct fuse_file *fuse_file_get(struct fuse_file *ff)
 {
 	atomic_inc(&ff->count);
 	return ff;
@@ -147,7 +147,7 @@ int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
 		ff->open_flags &= ~FOPEN_DIRECT_IO;
 
 	ff->nodeid = nodeid;
-	file->private_data = fuse_file_get(ff);
+	file->private_data = ff;
 
 	return 0;
 }
@@ -298,13 +298,13 @@ static int fuse_release(struct inode *inode, struct file *file)
 
 void fuse_sync_release(struct fuse_file *ff, int flags)
 {
-	WARN_ON(atomic_read(&ff->count) > 1);
+	WARN_ON(atomic_read(&ff->count) != 1);
 	fuse_prepare_release(ff, flags, FUSE_RELEASE);
-	__set_bit(FR_FORCE, &ff->reserved_req->flags);
-	__clear_bit(FR_BACKGROUND, &ff->reserved_req->flags);
-	fuse_request_send(ff->fc, ff->reserved_req);
-	fuse_put_request(ff->fc, ff->reserved_req);
-	kfree(ff);
+	/*
+	 * iput(NULL) is a no-op and since the refcount is 1 and everything's
+	 * synchronous, we are fine with not doing igrab() here"
+	 */
+	fuse_file_put(ff, true);
 }
 EXPORT_SYMBOL_GPL(fuse_sync_release);
 
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 91307940c8ac5e..83f797271aefb4 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -732,7 +732,6 @@ void fuse_read_fill(struct fuse_req *req, struct file *file,
 int fuse_open_common(struct inode *inode, struct file *file, bool isdir);
 
 struct fuse_file *fuse_file_alloc(struct fuse_conn *fc);
-struct fuse_file *fuse_file_get(struct fuse_file *ff);
 void fuse_file_free(struct fuse_file *ff);
 void fuse_finish_open(struct inode *inode, struct file *file);
 

From 9a87ad3da905239413477ac0698734afc5cc30bd Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Wed, 22 Feb 2017 20:08:25 +0100
Subject: [PATCH 0126/1911] fuse: release: private_data cannot be NULL

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/fuse/file.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index a5f79c59fe1e51..7069ea23204933 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -246,14 +246,9 @@ static void fuse_prepare_release(struct fuse_file *ff, int flags, int opcode)
 
 void fuse_release_common(struct file *file, int opcode)
 {
-	struct fuse_file *ff;
-	struct fuse_req *req;
-
-	ff = file->private_data;
-	if (unlikely(!ff))
-		return;
+	struct fuse_file *ff = file->private_data;
+	struct fuse_req *req = ff->reserved_req;
 
-	req = ff->reserved_req;
 	fuse_prepare_release(ff, file->f_flags, opcode);
 
 	if (ff->flock) {

From 1c68856e6ea8abd714415e52ef88943c022e24f0 Mon Sep 17 00:00:00 2001
From: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Date: Thu, 16 Feb 2017 12:51:10 -0800
Subject: [PATCH 0127/1911] scsi: aacraid: Fix camel case

Replaced camel case with snake case for init supported options.

Suggested-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Reviewed-by: David Carroll <David.Carroll@microsemi.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/aacraid/aachba.c  | 53 +++++++++---------
 drivers/scsi/aacraid/aacraid.h | 98 +++++++++++++++++-----------------
 drivers/scsi/aacraid/commsup.c |  6 +--
 drivers/scsi/aacraid/linit.c   | 32 +++++------
 drivers/scsi/aacraid/rx.c      |  2 +-
 drivers/scsi/aacraid/src.c     |  2 +-
 6 files changed, 100 insertions(+), 93 deletions(-)

diff --git a/drivers/scsi/aacraid/aachba.c b/drivers/scsi/aacraid/aachba.c
index 907f1e80665b1c..98d4ffd798c507 100644
--- a/drivers/scsi/aacraid/aachba.c
+++ b/drivers/scsi/aacraid/aachba.c
@@ -483,7 +483,7 @@ int aac_get_containers(struct aac_dev *dev)
 	if (status >= 0) {
 		dresp = (struct aac_get_container_count_resp *)fib_data(fibptr);
 		maximum_num_containers = le32_to_cpu(dresp->ContainerSwitchEntries);
-		if (fibptr->dev->supplement_adapter_info.SupportedOptions2 &
+		if (fibptr->dev->supplement_adapter_info.supported_options2 &
 		    AAC_OPTION_SUPPORTED_240_VOLUMES) {
 			maximum_num_containers =
 				le32_to_cpu(dresp->MaxSimpleVolumes);
@@ -639,13 +639,16 @@ static void _aac_probe_container2(void * context, struct fib * fibptr)
 	fsa_dev_ptr = fibptr->dev->fsa_dev;
 	if (fsa_dev_ptr) {
 		struct aac_mount * dresp = (struct aac_mount *) fib_data(fibptr);
+		__le32 sup_options2;
+
 		fsa_dev_ptr += scmd_id(scsicmd);
+		sup_options2 =
+			fibptr->dev->supplement_adapter_info.supported_options2;
 
 		if ((le32_to_cpu(dresp->status) == ST_OK) &&
 		    (le32_to_cpu(dresp->mnt[0].vol) != CT_NONE) &&
 		    (le32_to_cpu(dresp->mnt[0].state) != FSCS_HIDDEN)) {
-			if (!(fibptr->dev->supplement_adapter_info.SupportedOptions2 &
-			    AAC_OPTION_VARIABLE_BLOCK_SIZE)) {
+			if (!(sup_options2 & AAC_OPTION_VARIABLE_BLOCK_SIZE)) {
 				dresp->mnt[0].fileinfo.bdevinfo.block_size = 0x200;
 				fsa_dev_ptr->block_size = 0x200;
 			} else {
@@ -688,7 +691,7 @@ static void _aac_probe_container1(void * context, struct fib * fibptr)
 	int status;
 
 	dresp = (struct aac_mount *) fib_data(fibptr);
-	if (!(fibptr->dev->supplement_adapter_info.SupportedOptions2 &
+	if (!(fibptr->dev->supplement_adapter_info.supported_options2 &
 	    AAC_OPTION_VARIABLE_BLOCK_SIZE))
 		dresp->mnt[0].capacityhigh = 0;
 	if ((le32_to_cpu(dresp->status) != ST_OK) ||
@@ -705,7 +708,7 @@ static void _aac_probe_container1(void * context, struct fib * fibptr)
 
 	dinfo = (struct aac_query_mount *)fib_data(fibptr);
 
-	if (fibptr->dev->supplement_adapter_info.SupportedOptions2 &
+	if (fibptr->dev->supplement_adapter_info.supported_options2 &
 	    AAC_OPTION_VARIABLE_BLOCK_SIZE)
 		dinfo->command = cpu_to_le32(VM_NameServeAllBlk);
 	else
@@ -745,7 +748,7 @@ static int _aac_probe_container(struct scsi_cmnd * scsicmd, int (*callback)(stru
 
 		dinfo = (struct aac_query_mount *)fib_data(fibptr);
 
-		if (fibptr->dev->supplement_adapter_info.SupportedOptions2 &
+		if (fibptr->dev->supplement_adapter_info.supported_options2 &
 		    AAC_OPTION_VARIABLE_BLOCK_SIZE)
 			dinfo->command = cpu_to_le32(VM_NameServeAllBlk);
 		else
@@ -896,12 +899,14 @@ char * get_container_type(unsigned tindex)
 static void setinqstr(struct aac_dev *dev, void *data, int tindex)
 {
 	struct scsi_inq *str;
+	struct aac_supplement_adapter_info *sup_adap_info;
 
+	sup_adap_info = &dev->supplement_adapter_info;
 	str = (struct scsi_inq *)(data); /* cast data to scsi inq block */
 	memset(str, ' ', sizeof(*str));
 
-	if (dev->supplement_adapter_info.AdapterTypeText[0]) {
-		char * cp = dev->supplement_adapter_info.AdapterTypeText;
+	if (sup_adap_info->adapter_type_text[0]) {
+		char *cp = sup_adap_info->adapter_type_text;
 		int c;
 		if ((cp[0] == 'A') && (cp[1] == 'O') && (cp[2] == 'C'))
 			inqstrcpy("SMC", str->vid);
@@ -911,8 +916,7 @@ static void setinqstr(struct aac_dev *dev, void *data, int tindex)
 				++cp;
 			c = *cp;
 			*cp = '\0';
-			inqstrcpy (dev->supplement_adapter_info.AdapterTypeText,
-				   str->vid);
+			inqstrcpy(sup_adap_info->adapter_type_text, str->vid);
 			*cp = c;
 			while (*cp && *cp != ' ')
 				++cp;
@@ -1675,8 +1679,8 @@ int aac_issue_bmic_identify(struct aac_dev *dev, u32 bus, u32 target)
 	if (!identify_resp)
 		goto fib_free_ptr;
 
-	vbus = (u32)le16_to_cpu(dev->supplement_adapter_info.VirtDeviceBus);
-	vid = (u32)le16_to_cpu(dev->supplement_adapter_info.VirtDeviceTarget);
+	vbus = (u32)le16_to_cpu(dev->supplement_adapter_info.virt_device_bus);
+	vid = (u32)le16_to_cpu(dev->supplement_adapter_info.virt_device_target);
 
 	aac_fib_init(fibptr);
 
@@ -1815,9 +1819,9 @@ int aac_report_phys_luns(struct aac_dev *dev, struct fib *fibptr, int rescan)
 	}
 
 	vbus = (u32) le16_to_cpu(
-			dev->supplement_adapter_info.VirtDeviceBus);
+			dev->supplement_adapter_info.virt_device_bus);
 	vid = (u32) le16_to_cpu(
-			dev->supplement_adapter_info.VirtDeviceTarget);
+			dev->supplement_adapter_info.virt_device_target);
 
 	aac_fib_init(fibptr);
 
@@ -1893,7 +1897,7 @@ int aac_get_adapter_info(struct aac_dev* dev)
 	}
 	memcpy(&dev->adapter_info, info, sizeof(*info));
 
-	dev->supplement_adapter_info.VirtDeviceBus = 0xffff;
+	dev->supplement_adapter_info.virt_device_bus = 0xffff;
 	if (dev->adapter_info.options & AAC_OPT_SUPPLEMENT_ADAPTER_INFO) {
 		struct aac_supplement_adapter_info * sinfo;
 
@@ -1961,7 +1965,7 @@ int aac_get_adapter_info(struct aac_dev* dev)
 	}
 
 	if (!dev->sync_mode && dev->sa_firmware &&
-			dev->supplement_adapter_info.VirtDeviceBus != 0xffff) {
+		dev->supplement_adapter_info.virt_device_bus != 0xffff) {
 		/* Thor SA Firmware -> CISS_REPORT_PHYSICAL_LUNS */
 		rcode = aac_report_phys_luns(dev, fibptr, AAC_INIT);
 	}
@@ -1976,8 +1980,8 @@ int aac_get_adapter_info(struct aac_dev* dev)
 			(tmp>>16)&0xff,
 			tmp&0xff,
 			le32_to_cpu(dev->adapter_info.kernelbuild),
-			(int)sizeof(dev->supplement_adapter_info.BuildDate),
-			dev->supplement_adapter_info.BuildDate);
+			(int)sizeof(dev->supplement_adapter_info.build_date),
+			dev->supplement_adapter_info.build_date);
 		tmp = le32_to_cpu(dev->adapter_info.monitorrev);
 		printk(KERN_INFO "%s%d: monitor %d.%d-%d[%d]\n",
 			dev->name, dev->id,
@@ -1993,14 +1997,15 @@ int aac_get_adapter_info(struct aac_dev* dev)
 		  shost_to_class(dev->scsi_host_ptr), buffer))
 			printk(KERN_INFO "%s%d: serial %s",
 			  dev->name, dev->id, buffer);
-		if (dev->supplement_adapter_info.VpdInfo.Tsid[0]) {
+		if (dev->supplement_adapter_info.vpd_info.tsid[0]) {
 			printk(KERN_INFO "%s%d: TSID %.*s\n",
 			  dev->name, dev->id,
-			  (int)sizeof(dev->supplement_adapter_info.VpdInfo.Tsid),
-			  dev->supplement_adapter_info.VpdInfo.Tsid);
+			  (int)sizeof(dev->supplement_adapter_info
+							.vpd_info.tsid),
+				dev->supplement_adapter_info.vpd_info.tsid);
 		}
 		if (!aac_check_reset || ((aac_check_reset == 1) &&
-		  (dev->supplement_adapter_info.SupportedOptions2 &
+		  (dev->supplement_adapter_info.supported_options2 &
 		  AAC_OPTION_IGNORE_RESET))) {
 			printk(KERN_INFO "%s%d: Reset Adapter Ignored\n",
 			  dev->name, dev->id);
@@ -2008,7 +2013,7 @@ int aac_get_adapter_info(struct aac_dev* dev)
 	}
 
 	dev->cache_protected = 0;
-	dev->jbod = ((dev->supplement_adapter_info.FeatureBits &
+	dev->jbod = ((dev->supplement_adapter_info.feature_bits &
 		AAC_FEATURE_JBOD) != 0);
 	dev->nondasd_support = 0;
 	dev->raid_scsi_mode = 0;
@@ -2631,7 +2636,7 @@ static int aac_start_stop(struct scsi_cmnd *scsicmd)
 	struct scsi_device *sdev = scsicmd->device;
 	struct aac_dev *aac = (struct aac_dev *)sdev->host->hostdata;
 
-	if (!(aac->supplement_adapter_info.SupportedOptions2 &
+	if (!(aac->supplement_adapter_info.supported_options2 &
 	      AAC_OPTION_POWER_MANAGEMENT)) {
 		scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 |
 				  SAM_STAT_GOOD;
diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h
index f2344971e3cbe3..b5a2c87f289b81 100644
--- a/drivers/scsi/aacraid/aacraid.h
+++ b/drivers/scsi/aacraid/aacraid.h
@@ -1380,57 +1380,57 @@ struct aac_adapter_info
 
 struct aac_supplement_adapter_info
 {
-	u8	AdapterTypeText[17+1];
-	u8	Pad[2];
-	__le32	FlashMemoryByteSize;
-	__le32	FlashImageId;
-	__le32	MaxNumberPorts;
-	__le32	Version;
-	__le32	FeatureBits;
-	u8	SlotNumber;
-	u8	ReservedPad0[3];
-	u8	BuildDate[12];
-	__le32	CurrentNumberPorts;
+	u8	adapter_type_text[17+1];
+	u8	pad[2];
+	__le32	flash_memory_byte_size;
+	__le32	flash_image_id;
+	__le32	max_number_ports;
+	__le32	version;
+	__le32	feature_bits;
+	u8	slot_number;
+	u8	reserved_pad0[3];
+	u8	build_date[12];
+	__le32	current_number_ports;
 	struct {
-		u8	AssemblyPn[8];
-		u8	FruPn[8];
-		u8	BatteryFruPn[8];
-		u8	EcVersionString[8];
-		u8	Tsid[12];
-	}	VpdInfo;
-	__le32	FlashFirmwareRevision;
-	__le32	FlashFirmwareBuild;
-	__le32	RaidTypeMorphOptions;
-	__le32	FlashFirmwareBootRevision;
-	__le32	FlashFirmwareBootBuild;
-	u8	MfgPcbaSerialNo[12];
-	u8	MfgWWNName[8];
-	__le32	SupportedOptions2;
-	__le32	StructExpansion;
+		u8	assembly_pn[8];
+		u8	fru_pn[8];
+		u8	battery_fru_pn[8];
+		u8	ec_version_string[8];
+		u8	tsid[12];
+	}	vpd_info;
+	__le32	flash_firmware_revision;
+	__le32	flash_firmware_build;
+	__le32	raid_type_morph_options;
+	__le32	flash_firmware_boot_revision;
+	__le32	flash_firmware_boot_build;
+	u8	mfg_pcba_serial_no[12];
+	u8	mfg_wwn_name[8];
+	__le32	supported_options2;
+	__le32	struct_expansion;
 	/* StructExpansion == 1 */
-	__le32	FeatureBits3;
-	__le32	SupportedPerformanceModes;
-	u8	HostBusType;		/* uses HOST_BUS_TYPE_xxx defines */
-	u8	HostBusWidth;		/* actual width in bits or links */
-	u16	HostBusSpeed;		/* actual bus speed/link rate in MHz */
-	u8	MaxRRCDrives;		/* max. number of ITP-RRC drives/pool */
-	u8	MaxDiskXtasks;		/* max. possible num of DiskX Tasks */
-
-	u8	CpldVerLoaded;
-	u8	CpldVerInFlash;
-
-	__le64	MaxRRCCapacity;
-	__le32	CompiledMaxHistLogLevel;
-	u8	CustomBoardName[12];
-	u16	SupportedCntlrMode;	/* identify supported controller mode */
-	u16	ReservedForFuture16;
-	__le32	SupportedOptions3;	/* reserved for future options */
-
-	__le16	VirtDeviceBus;		/* virt. SCSI device for Thor */
-	__le16	VirtDeviceTarget;
-	__le16	VirtDeviceLUN;
-	__le16	Unused;
-	__le32	ReservedForFutureGrowth[68];
+	__le32	feature_bits3;
+	__le32	supported_performance_modes;
+	u8	host_bus_type;		/* uses HOST_BUS_TYPE_xxx defines */
+	u8	host_bus_width;		/* actual width in bits or links */
+	u16	host_bus_speed;		/* actual bus speed/link rate in MHz */
+	u8	max_rrc_drives;		/* max. number of ITP-RRC drives/pool */
+	u8	max_disk_xtasks;	/* max. possible num of DiskX Tasks */
+
+	u8	cpld_ver_loaded;
+	u8	cpld_ver_in_flash;
+
+	__le64	max_rrc_capacity;
+	__le32	compiled_max_hist_log_level;
+	u8	custom_board_name[12];
+	u16	supported_cntlr_mode;	/* identify supported controller mode */
+	u16	reserved_for_future16;
+	__le32	supported_options3;	/* reserved for future options */
+
+	__le16	virt_device_bus;		/* virt. SCSI device for Thor */
+	__le16	virt_device_target;
+	__le16	virt_device_lun;
+	__le16	unused;
+	__le32	reserved_for_future_growth[68];
 
 };
 #define AAC_FEATURE_FALCON	cpu_to_le32(0x00000010)
diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c
index 969727b67cdd14..56090f5be6c5bb 100644
--- a/drivers/scsi/aacraid/commsup.c
+++ b/drivers/scsi/aacraid/commsup.c
@@ -1815,7 +1815,7 @@ int aac_check_health(struct aac_dev * aac)
 	printk(KERN_ERR "%s: Host adapter BLINK LED 0x%x\n", aac->name, BlinkLED);
 
 	if (!aac_check_reset || ((aac_check_reset == 1) &&
-		(aac->supplement_adapter_info.SupportedOptions2 &
+		(aac->supplement_adapter_info.supported_options2 &
 			AAC_OPTION_IGNORE_RESET)))
 		goto out;
 	host = aac->scsi_host_ptr;
@@ -2264,8 +2264,8 @@ static int aac_send_wellness_command(struct aac_dev *dev, char *wellness_str,
 
 	aac_fib_init(fibptr);
 
-	vbus = (u32)le16_to_cpu(dev->supplement_adapter_info.VirtDeviceBus);
-	vid = (u32)le16_to_cpu(dev->supplement_adapter_info.VirtDeviceTarget);
+	vbus = (u32)le16_to_cpu(dev->supplement_adapter_info.virt_device_bus);
+	vid = (u32)le16_to_cpu(dev->supplement_adapter_info.virt_device_target);
 
 	srbcmd = (struct aac_srb *)fib_data(fibptr);
 
diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c
index 137d22d3a005dd..ab4f1e7870b940 100644
--- a/drivers/scsi/aacraid/linit.c
+++ b/drivers/scsi/aacraid/linit.c
@@ -891,13 +891,13 @@ static int aac_eh_reset(struct scsi_cmnd* cmd)
 		 * Adapters that support a register, instead of a commanded,
 		 * reset.
 		 */
-		if (((aac->supplement_adapter_info.SupportedOptions2 &
+		if (((aac->supplement_adapter_info.supported_options2 &
 			  AAC_OPTION_MU_RESET) ||
-			  (aac->supplement_adapter_info.SupportedOptions2 &
+			  (aac->supplement_adapter_info.supported_options2 &
 			  AAC_OPTION_DOORBELL_RESET)) &&
 			  aac_check_reset &&
 			  ((aac_check_reset != 1) ||
-			   !(aac->supplement_adapter_info.SupportedOptions2 &
+			   !(aac->supplement_adapter_info.supported_options2 &
 			    AAC_OPTION_IGNORE_RESET))) {
 			/* Bypass wait for command quiesce */
 			aac_reset_adapter(aac, 2, IOP_HWSOFT_RESET);
@@ -1029,8 +1029,8 @@ static ssize_t aac_show_model(struct device *device,
 	struct aac_dev *dev = (struct aac_dev*)class_to_shost(device)->hostdata;
 	int len;
 
-	if (dev->supplement_adapter_info.AdapterTypeText[0]) {
-		char * cp = dev->supplement_adapter_info.AdapterTypeText;
+	if (dev->supplement_adapter_info.adapter_type_text[0]) {
+		char *cp = dev->supplement_adapter_info.adapter_type_text;
 		while (*cp && *cp != ' ')
 			++cp;
 		while (*cp == ' ')
@@ -1046,18 +1046,20 @@ static ssize_t aac_show_vendor(struct device *device,
 			       struct device_attribute *attr, char *buf)
 {
 	struct aac_dev *dev = (struct aac_dev*)class_to_shost(device)->hostdata;
+	struct aac_supplement_adapter_info *sup_adap_info;
 	int len;
 
-	if (dev->supplement_adapter_info.AdapterTypeText[0]) {
-		char * cp = dev->supplement_adapter_info.AdapterTypeText;
+	sup_adap_info = &dev->supplement_adapter_info;
+	if (sup_adap_info->adapter_type_text[0]) {
+		char *cp = sup_adap_info->adapter_type_text;
 		while (*cp && *cp != ' ')
 			++cp;
 		len = snprintf(buf, PAGE_SIZE, "%.*s\n",
-		  (int)(cp - (char *)dev->supplement_adapter_info.AdapterTypeText),
-		  dev->supplement_adapter_info.AdapterTypeText);
+			(int)(cp - (char *)sup_adap_info->adapter_type_text),
+					sup_adap_info->adapter_type_text);
 	} else
 		len = snprintf(buf, PAGE_SIZE, "%s\n",
-		  aac_drivers[dev->cardtype].vname);
+			aac_drivers[dev->cardtype].vname);
 	return len;
 }
 
@@ -1078,7 +1080,7 @@ static ssize_t aac_show_flags(struct device *cdev,
 				"SAI_READ_CAPACITY_16\n");
 	if (dev->jbod)
 		len += snprintf(buf + len, PAGE_SIZE - len, "SUPPORTED_JBOD\n");
-	if (dev->supplement_adapter_info.SupportedOptions2 &
+	if (dev->supplement_adapter_info.supported_options2 &
 		AAC_OPTION_POWER_MANAGEMENT)
 		len += snprintf(buf + len, PAGE_SIZE - len,
 				"SUPPORTED_POWER_MANAGEMENT\n");
@@ -1139,12 +1141,12 @@ static ssize_t aac_show_serial_number(struct device *device,
 		len = snprintf(buf, 16, "%06X\n",
 		  le32_to_cpu(dev->adapter_info.serial[0]));
 	if (len &&
-	  !memcmp(&dev->supplement_adapter_info.MfgPcbaSerialNo[
-	    sizeof(dev->supplement_adapter_info.MfgPcbaSerialNo)-len],
+	  !memcmp(&dev->supplement_adapter_info.mfg_pcba_serial_no[
+	    sizeof(dev->supplement_adapter_info.mfg_pcba_serial_no)-len],
 	  buf, len-1))
 		len = snprintf(buf, 16, "%.*s\n",
-		  (int)sizeof(dev->supplement_adapter_info.MfgPcbaSerialNo),
-		  dev->supplement_adapter_info.MfgPcbaSerialNo);
+		  (int)sizeof(dev->supplement_adapter_info.mfg_pcba_serial_no),
+		  dev->supplement_adapter_info.mfg_pcba_serial_no);
 
 	return min(len, 16);
 }
diff --git a/drivers/scsi/aacraid/rx.c b/drivers/scsi/aacraid/rx.c
index 0e69a80c327583..5d19c31e3bbac5 100644
--- a/drivers/scsi/aacraid/rx.c
+++ b/drivers/scsi/aacraid/rx.c
@@ -475,7 +475,7 @@ static int aac_rx_restart_adapter(struct aac_dev *dev, int bled, u8 reset_type)
 {
 	u32 var = 0;
 
-	if (!(dev->supplement_adapter_info.SupportedOptions2 &
+	if (!(dev->supplement_adapter_info.supported_options2 &
 	  AAC_OPTION_MU_RESET) || (bled >= 0) || (bled == -2)) {
 		if (bled)
 			printk(KERN_ERR "%s%d: adapter kernel panic'd %x.\n",
diff --git a/drivers/scsi/aacraid/src.c b/drivers/scsi/aacraid/src.c
index 8e4e2ddbafd744..c17b0603749cc6 100644
--- a/drivers/scsi/aacraid/src.c
+++ b/drivers/scsi/aacraid/src.c
@@ -684,7 +684,7 @@ static void aac_send_iop_reset(struct aac_dev *dev, int bled)
 
 	aac_set_intx_mode(dev);
 
-	if (!bled && (dev->supplement_adapter_info.SupportedOptions2 &
+	if (!bled && (dev->supplement_adapter_info.supported_options2 &
 	    AAC_OPTION_DOORBELL_RESET)) {
 		src_writel(dev, MUnit.IDR, reset_mask);
 	} else {

From f3ef4a74dc3712ef0ce60d652aa87b1ba70cb2a4 Mon Sep 17 00:00:00 2001
From: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Date: Thu, 16 Feb 2017 12:51:11 -0800
Subject: [PATCH 0128/1911] scsi: aacraid: Use correct channel number for raw
 srb

The channel being used for raw srb commands is retrieved from the utility
sent fibs and is converted into physical channel id. The driver does not
need to to do this since the management utility sends the correct channel
id in the first place and in addition the driver sets inaccurate
information in the cmd sent to the firmware and gets an invalid response.

Fixed by using channel id from srb command.

Cc: stable@vger.kernel.org
Fixes: 423400e64d377c0 ("scsi: aacraid: Include HBA direct interface")
Signed-off-by: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Reviewed-by: David Carroll <David.Carroll@microsemi.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/aacraid/commctrl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/aacraid/commctrl.c b/drivers/scsi/aacraid/commctrl.c
index 614842a9eb07fe..f6afd50579c038 100644
--- a/drivers/scsi/aacraid/commctrl.c
+++ b/drivers/scsi/aacraid/commctrl.c
@@ -580,7 +580,7 @@ static int aac_send_raw_srb(struct aac_dev* dev, void __user * arg)
 		goto cleanup;
 	}
 
-	chn = aac_logical_to_phys(user_srbcmd->channel);
+	chn = user_srbcmd->channel;
 	if (chn < AAC_MAX_BUSES && user_srbcmd->id < AAC_MAX_TARGETS &&
 		dev->hba_map[chn][user_srbcmd->id].devtype ==
 		AAC_DEVTYPE_NATIVE_RAW) {

From 16ae9dd35d374182ce955063100fce66a9974e74 Mon Sep 17 00:00:00 2001
From: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Date: Thu, 16 Feb 2017 12:51:12 -0800
Subject: [PATCH 0129/1911] scsi: aacraid: Fix for excessive prints on EEH

This issue showed up on a kdump debug(single CPU on powerkvm), when EEH
errors rendered the adapter unusable. The driver correctly detected the
issue and attempted to restart the controller, in doing so the driver
attempted to read the status registers of the controller. This triggered
additional eeh errors which continued for a good 6 minutes.

Fixed by returning without waiting when EEH error is reported.

Signed-off-by: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Reviewed-by: David Carroll <David.Carroll@microsemi.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/aacraid/commsup.c | 39 +++++++++++++++++++++++++++++++++-
 1 file changed, 38 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c
index 56090f5be6c5bb..a8dd4b51b086c9 100644
--- a/drivers/scsi/aacraid/commsup.c
+++ b/drivers/scsi/aacraid/commsup.c
@@ -461,6 +461,35 @@ int aac_queue_get(struct aac_dev * dev, u32 * index, u32 qid, struct hw_fib * hw
 	return 0;
 }
 
+#ifdef CONFIG_EEH
+static inline int aac_check_eeh_failure(struct aac_dev *dev)
+{
+	/* Check for an EEH failure for the given
+	 * device node. Function eeh_dev_check_failure()
+	 * returns 0 if there has not been an EEH error
+	 * otherwise returns a non-zero value.
+	 *
+	 * Need to be called before any PCI operation,
+	 * i.e.,before aac_adapter_check_health()
+	 */
+	struct eeh_dev *edev = pci_dev_to_eeh_dev(dev->pdev);
+
+	if (eeh_dev_check_failure(edev)) {
+		/* The EEH mechanisms will handle this
+		 * error and reset the device if
+		 * necessary.
+		 */
+		return 1;
+	}
+	return 0;
+}
+#else
+static inline int aac_check_eeh_failure(struct aac_dev *dev)
+{
+	return 0;
+}
+#endif
+
 /*
  *	Define the highest level of host to adapter communication routines.
  *	These routines will support host to adapter FS commuication. These
@@ -496,7 +525,6 @@ int aac_fib_send(u16 command, struct fib *fibptr, unsigned long size,
 	unsigned long mflags = 0;
 	unsigned long sflags = 0;
 
-
 	if (!(hw_fib->header.XferState & cpu_to_le32(HostOwned)))
 		return -EBUSY;
 	/*
@@ -662,6 +690,10 @@ int aac_fib_send(u16 command, struct fib *fibptr, unsigned long size,
 					}
 					return -ETIMEDOUT;
 				}
+
+				if (aac_check_eeh_failure(dev))
+					return -EFAULT;
+
 				if ((blink = aac_adapter_check_health(dev)) > 0) {
 					if (wait == -1) {
 	        				printk(KERN_ERR "aacraid: aac_fib_send: adapter blinkLED 0x%x.\n"
@@ -755,7 +787,12 @@ int aac_hba_send(u8 command, struct fib *fibptr, fib_callback callback,
 	FIB_COUNTER_INCREMENT(aac_config.NativeSent);
 
 	if (wait) {
+
 		spin_unlock_irqrestore(&fibptr->event_lock, flags);
+
+		if (aac_check_eeh_failure(dev))
+			return -EFAULT;
+
 		/* Only set for first known interruptable command */
 		if (down_interruptible(&fibptr->event_wait)) {
 			fibptr->done = 2;

From a0c6143e95c5b9e1f2d83e005e4e86ed3dc6096f Mon Sep 17 00:00:00 2001
From: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Date: Thu, 16 Feb 2017 12:51:13 -0800
Subject: [PATCH 0130/1911] scsi: aacraid: Prevent E3 lockup when deleting
 units

Arrconf management utility at times sends fibs with AdapterProcessed set
in its fibs. This causes the controller to panic and lockup.

Fixed by failing the commands that have AdapterProcessed set in its flag.

Signed-off-by: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Reviewed-by: David Carroll <David.Carroll@microsemi.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/aacraid/commsup.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c
index a8dd4b51b086c9..e221321d97d265 100644
--- a/drivers/scsi/aacraid/commsup.c
+++ b/drivers/scsi/aacraid/commsup.c
@@ -527,6 +527,10 @@ int aac_fib_send(u16 command, struct fib *fibptr, unsigned long size,
 
 	if (!(hw_fib->header.XferState & cpu_to_le32(HostOwned)))
 		return -EBUSY;
+
+	if (hw_fib->header.XferState & cpu_to_le32(AdapterProcessed))
+		return -EINVAL;
+
 	/*
 	 *	There are 5 cases with the wait and response requested flags.
 	 *	The only invalid cases are if the caller requests to wait and

From 1bff5abca65d4b9761fcc992ab6288243220003d Mon Sep 17 00:00:00 2001
From: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Date: Thu, 16 Feb 2017 12:51:14 -0800
Subject: [PATCH 0131/1911] scsi: aacraid: Fix memory leak in fib init path

aac_fib_map_free frees misaligned fib dma memory, additionally it does not
free up the whole memory.

Fixed by changing the  code to free up the correct and full memory
allocation.

Cc: stable@vger.kernel.org
Fixes: e8b12f0fb835223 ([SCSI] aacraid: Add new code for PMC-Sierra's SRC based controller family)
Signed-off-by: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Reviewed-by: David Carroll <David.Carroll@microsemi.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/aacraid/commsup.c | 36 ++++++++++++++++++++--------------
 1 file changed, 21 insertions(+), 15 deletions(-)

diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c
index e221321d97d265..c10954b3cd46b5 100644
--- a/drivers/scsi/aacraid/commsup.c
+++ b/drivers/scsi/aacraid/commsup.c
@@ -95,12 +95,20 @@ static int fib_map_alloc(struct aac_dev *dev)
 
 void aac_fib_map_free(struct aac_dev *dev)
 {
-	if (dev->hw_fib_va && dev->max_cmd_size) {
-		pci_free_consistent(dev->pdev,
-		(dev->max_cmd_size *
-		(dev->scsi_host_ptr->can_queue + AAC_NUM_MGT_FIB)),
-		dev->hw_fib_va, dev->hw_fib_pa);
-	}
+	size_t alloc_size;
+	size_t fib_size;
+	int num_fibs;
+
+	if(!dev->hw_fib_va || !dev->max_cmd_size)
+		return;
+
+	num_fibs = dev->scsi_host_ptr->can_queue + AAC_NUM_MGT_FIB;
+	fib_size = dev->max_fib_size + sizeof(struct aac_fib_xporthdr);
+	alloc_size = fib_size * num_fibs + ALIGN32 - 1;
+
+	pci_free_consistent(dev->pdev, alloc_size, dev->hw_fib_va,
+							dev->hw_fib_pa);
+
 	dev->hw_fib_va = NULL;
 	dev->hw_fib_pa = 0;
 }
@@ -153,22 +161,20 @@ int aac_fib_setup(struct aac_dev * dev)
 	if (i<0)
 		return -ENOMEM;
 
-	/* 32 byte alignment for PMC */
-	hw_fib_pa = (dev->hw_fib_pa + (ALIGN32 - 1)) & ~(ALIGN32 - 1);
-	dev->hw_fib_va = (struct hw_fib *)((unsigned char *)dev->hw_fib_va +
-		(hw_fib_pa - dev->hw_fib_pa));
-	dev->hw_fib_pa = hw_fib_pa;
 	memset(dev->hw_fib_va, 0,
 		(dev->max_cmd_size + sizeof(struct aac_fib_xporthdr)) *
 		(dev->scsi_host_ptr->can_queue + AAC_NUM_MGT_FIB));
 
+	/* 32 byte alignment for PMC */
+	hw_fib_pa = (dev->hw_fib_pa + (ALIGN32 - 1)) & ~(ALIGN32 - 1);
+	hw_fib    = (struct hw_fib *)((unsigned char *)dev->hw_fib_va +
+					(hw_fib_pa - dev->hw_fib_pa));
+
 	/* add Xport header */
-	dev->hw_fib_va = (struct hw_fib *)((unsigned char *)dev->hw_fib_va +
+	hw_fib = (struct hw_fib *)((unsigned char *)hw_fib +
 		sizeof(struct aac_fib_xporthdr));
-	dev->hw_fib_pa += sizeof(struct aac_fib_xporthdr);
+	hw_fib_pa += sizeof(struct aac_fib_xporthdr);
 
-	hw_fib = dev->hw_fib_va;
-	hw_fib_pa = dev->hw_fib_pa;
 	/*
 	 *	Initialise the fibs
 	 */

From 30202e067a81754cb78cb823b7ce7e7cddd040e2 Mon Sep 17 00:00:00 2001
From: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Date: Thu, 16 Feb 2017 12:51:15 -0800
Subject: [PATCH 0132/1911] scsi: aacraid: Added sysfs for driver version

Added support to retrieve driver version from a new sysfs variable called
driver_version. It makes it easier for the user to figure out the driver
version that is currently running.

Signed-off-by: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Reviewed-by: David Carroll <David.Carroll@microsemi.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/aacraid/linit.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c
index ab4f1e7870b940..df027847f20831 100644
--- a/drivers/scsi/aacraid/linit.c
+++ b/drivers/scsi/aacraid/linit.c
@@ -1131,6 +1131,13 @@ static ssize_t aac_show_bios_version(struct device *device,
 	return len;
 }
 
+static ssize_t aac_show_driver_version(struct device *device,
+					struct device_attribute *attr,
+					char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%s\n", aac_driver_version);
+}
+
 static ssize_t aac_show_serial_number(struct device *device,
 			       struct device_attribute *attr, char *buf)
 {
@@ -1241,6 +1248,13 @@ static struct device_attribute aac_bios_version = {
 	},
 	.show = aac_show_bios_version,
 };
+static struct device_attribute aac_lld_version = {
+	.attr = {
+		.name = "driver_version",
+		.mode = 0444,
+	},
+	.show = aac_show_driver_version,
+};
 static struct device_attribute aac_serial_number = {
 	.attr = {
 		.name = "serial_number",
@@ -1278,6 +1292,7 @@ static struct device_attribute *aac_attrs[] = {
 	&aac_kernel_version,
 	&aac_monitor_version,
 	&aac_bios_version,
+	&aac_lld_version,
 	&aac_serial_number,
 	&aac_max_channel,
 	&aac_max_id,

From a7e2c642844cfefd570cb54a8d9fe7b85605311b Mon Sep 17 00:00:00 2001
From: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Date: Thu, 16 Feb 2017 12:51:16 -0800
Subject: [PATCH 0133/1911] scsi: aacraid: Fix sync fibs time out on controller
 reset

After controller shutdown, all sync fibs time out due to not knowing
about the switch to INT-x mode

Fixed by replacing aac_src_access_devreg() to aac_set_intx_mode() call.

Cc: stable@vger.kernel.org
Fixes: 495c021767bd78c998 (aacraid: MSI-x support)
Signed-off-by: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Reviewed-by: David Carroll <David.Carroll@microsemi.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/aacraid/aacraid.h  | 1 +
 drivers/scsi/aacraid/comminit.c | 2 +-
 drivers/scsi/aacraid/src.c      | 2 +-
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h
index b5a2c87f289b81..9281e729548071 100644
--- a/drivers/scsi/aacraid/aacraid.h
+++ b/drivers/scsi/aacraid/aacraid.h
@@ -2639,6 +2639,7 @@ void aac_hba_callback(void *context, struct fib *fibptr);
 #define fib_data(fibctx) ((void *)(fibctx)->hw_fib_va->data)
 struct aac_dev *aac_init_adapter(struct aac_dev *dev);
 void aac_src_access_devreg(struct aac_dev *dev, int mode);
+void aac_set_intx_mode(struct aac_dev *dev);
 int aac_get_config_status(struct aac_dev *dev, int commit_flag);
 int aac_get_containers(struct aac_dev *dev);
 int aac_scsi_cmd(struct scsi_cmnd *cmd);
diff --git a/drivers/scsi/aacraid/comminit.c b/drivers/scsi/aacraid/comminit.c
index 40bfc57b68493a..35607005f7e1fb 100644
--- a/drivers/scsi/aacraid/comminit.c
+++ b/drivers/scsi/aacraid/comminit.c
@@ -330,7 +330,7 @@ int aac_send_shutdown(struct aac_dev * dev)
 	     dev->pdev->device == PMC_DEVICE_S8 ||
 	     dev->pdev->device == PMC_DEVICE_S9) &&
 	     dev->msi_enabled)
-		aac_src_access_devreg(dev, AAC_ENABLE_INTX);
+		aac_set_intx_mode(dev);
 	return status;
 }
 
diff --git a/drivers/scsi/aacraid/src.c b/drivers/scsi/aacraid/src.c
index c17b0603749cc6..b23c818adbf68e 100644
--- a/drivers/scsi/aacraid/src.c
+++ b/drivers/scsi/aacraid/src.c
@@ -657,7 +657,7 @@ static int aac_srcv_ioremap(struct aac_dev *dev, u32 size)
 	return 0;
 }
 
-static void aac_set_intx_mode(struct aac_dev *dev)
+void aac_set_intx_mode(struct aac_dev *dev)
 {
 	if (dev->msi_enabled) {
 		aac_src_access_devreg(dev, AAC_ENABLE_INTX);

From 849ac6a591bf7b5777fdb6ce65030f32a7c73e1a Mon Sep 17 00:00:00 2001
From: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Date: Thu, 16 Feb 2017 12:51:17 -0800
Subject: [PATCH 0134/1911] scsi: aacraid: Skip wellness sync on controller
 failure

aac_command_thread checks on the health of controller periodically,
using aac_check_health. If the status is an error state KERNEL_PANIC or
anything else. The driver will attempt to restart the adapter, but the
response is not checked in aac_command_thread. This allows the periodic
sync to go thru and lead the driver to a hung state.

Fixed by terminating the periodic loop(intended per original design),
if the controller is not restored to a healthy state.

Cc: stable@vger.kernel.org
Fixes: 3d77d8404478353358 (scsi: aacraid: Added support for periodic wellness sync)
Signed-off-by: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Reviewed-by: David Carroll <David.Carroll@microsemi.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/aacraid/commsup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c
index c10954b3cd46b5..eb4d8cfc31945c 100644
--- a/drivers/scsi/aacraid/commsup.c
+++ b/drivers/scsi/aacraid/commsup.c
@@ -2481,7 +2481,7 @@ int aac_command_thread(void *data)
 
 			/* Don't even try to talk to adapter if its sick */
 			ret = aac_check_health(dev);
-			if (!dev->queues)
+			if (ret || !dev->queues)
 				break;
 			next_check_jiffies = jiffies
 					   + ((long)(unsigned)check_interval)

From a2d0321dd532901ea64118ed5a752fa6e447d1da Mon Sep 17 00:00:00 2001
From: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Date: Thu, 16 Feb 2017 12:51:18 -0800
Subject: [PATCH 0135/1911] scsi: aacraid: Reload offlined drives after
 controller reset

During the IOP reset stress testing, it was found that the drives can be
marked offline when the adapter controller crashes and IO's are running
in parallel. When the controller  does come back from the reset, the drive
that is marked offline is not exposed.

Fixed by removing and adding drives that are marked offline. In addition
invoke a scsi host bus rescan to capture any additional configuration
changes.

Signed-off-by: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Reviewed-by: David Carroll <David.Carroll@microsemi.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/aacraid/commsup.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c
index eb4d8cfc31945c..1f716c01708bc5 100644
--- a/drivers/scsi/aacraid/commsup.c
+++ b/drivers/scsi/aacraid/commsup.c
@@ -1637,11 +1637,29 @@ static int _aac_reset_adapter(struct aac_dev *aac, int forced, u8 reset_type)
 		command->SCp.phase = AAC_OWNER_ERROR_HANDLER;
 		command->scsi_done(command);
 	}
+	/*
+	 * Any Device that was already marked offline needs to be cleaned up
+	 */
+	__shost_for_each_device(dev, host) {
+		if (!scsi_device_online(dev)) {
+			sdev_printk(KERN_INFO, dev, "Removing offline device\n");
+			scsi_remove_device(dev);
+			scsi_device_put(dev);
+		}
+	}
 	retval = 0;
 
 out:
 	aac->in_reset = 0;
 	scsi_unblock_requests(host);
+	/*
+	 * Issue bus rescan to catch any configuration that might have
+	 * occurred
+	 */
+	if (!retval) {
+		dev_info(&aac->pdev->dev, "Issuing bus rescan\n");
+		scsi_scan_host(host);
+	}
 	if (jafo) {
 		spin_lock_irq(host->host_lock);
 	}

From 11da1b7c4856de05e00f50f54efe2f5349214d5b Mon Sep 17 00:00:00 2001
From: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Date: Thu, 16 Feb 2017 12:51:19 -0800
Subject: [PATCH 0136/1911] scsi: aacraid: Decrease adapter health check
 interval

Currently driver checks the health status of the adapter once every 24
hours. When that happens the driver becomes dependent on the kernel to
figure out if the  adapter is misbehaving. This might take some time
(when the adapter is idle). The driver currently has support to
restart/recover the controller when it fails, and decreasing the time
interval will help.

Fixed by decreasing check interval from 24 hours to 1 minute

Signed-off-by: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Reviewed-by: David Carroll <David.Carroll@microsemi.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/aacraid/aachba.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/aacraid/aachba.c b/drivers/scsi/aacraid/aachba.c
index 98d4ffd798c507..3ede50f25f46c7 100644
--- a/drivers/scsi/aacraid/aachba.c
+++ b/drivers/scsi/aacraid/aachba.c
@@ -311,7 +311,7 @@ module_param(update_interval, int, S_IRUGO|S_IWUSR);
 MODULE_PARM_DESC(update_interval, "Interval in seconds between time sync"
 	" updates issued to adapter.");
 
-int check_interval = 24 * 60 * 60;
+int check_interval = 60;
 module_param(check_interval, int, S_IRUGO|S_IWUSR);
 MODULE_PARM_DESC(check_interval, "Interval in seconds between adapter health"
 	" checks.");

From 146aa1786d4978795cab5347d810e00236dea1c3 Mon Sep 17 00:00:00 2001
From: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Date: Thu, 16 Feb 2017 12:51:20 -0800
Subject: [PATCH 0137/1911] scsi: aacraid: Skip IOP reset on controller
 panic(SMART Family)

When the SMART family of controller panic (KERNEL_PANIC) , they do not
honor IOP resets. So better to skip it and directly perform a IWBR reset.

Signed-off-by: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Reviewed-by: David Carroll <David.Carroll@microsemi.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/aacraid/src.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/scsi/aacraid/src.c b/drivers/scsi/aacraid/src.c
index b23c818adbf68e..9b11e1a6cc7591 100644
--- a/drivers/scsi/aacraid/src.c
+++ b/drivers/scsi/aacraid/src.c
@@ -714,6 +714,12 @@ static int aac_src_restart_adapter(struct aac_dev *dev, int bled, u8 reset_type)
 		pr_err("%s%d: adapter kernel panic'd %x.\n",
 				dev->name, dev->id, bled);
 
+	/*
+	 * When there is a BlinkLED, IOP_RESET has not effect
+	 */
+	if (bled >= 2 && dev->sa_firmware && reset_type & HW_IOP_RESET)
+		reset_type &= ~HW_IOP_RESET;
+
 	dev->a_ops.adapter_enable_int = aac_src_disable_interrupt;
 
 	switch (reset_type) {

From c421530bf848604e97d0785a03b3fe2c62775083 Mon Sep 17 00:00:00 2001
From: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Date: Thu, 16 Feb 2017 12:51:21 -0800
Subject: [PATCH 0138/1911] scsi: aacraid: Reorder Adapter status check

The driver currently checks the SELF_TEST_FAILED first and then
KERNEL_PANIC next. Under error conditions(boot code failure) both
SELF_TEST_FAILED and KERNEL_PANIC can be set at the same time.

The driver has the capability to reset the controller on an KERNEL_PANIC,
but not on SELF_TEST_FAILED.

Fixed by first checking KERNEL_PANIC and then the others.

Cc: stable@vger.kernel.org
Fixes: e8b12f0fb835223752 ([SCSI] aacraid: Add new code for PMC-Sierra's SRC base controller family)
Signed-off-by: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Reviewed-by: David Carroll <David.Carroll@microsemi.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/aacraid/src.c | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/drivers/scsi/aacraid/src.c b/drivers/scsi/aacraid/src.c
index 9b11e1a6cc7591..71aaabde6b573b 100644
--- a/drivers/scsi/aacraid/src.c
+++ b/drivers/scsi/aacraid/src.c
@@ -436,17 +436,24 @@ static int aac_src_check_health(struct aac_dev *dev)
 {
 	u32 status = src_readl(dev, MUnit.OMR);
 
+	/*
+	 *	Check to see if the board panic'd.
+	 */
+	if (unlikely(status & KERNEL_PANIC))
+		goto err_blink;
+
 	/*
 	 *	Check to see if the board failed any self tests.
 	 */
 	if (unlikely(status & SELF_TEST_FAILED))
-		return -1;
+		goto err_out;
 
 	/*
-	 *	Check to see if the board panic'd.
+	 *	Check to see if the board failed any self tests.
 	 */
-	if (unlikely(status & KERNEL_PANIC))
-		return (status >> 16) & 0xFF;
+	if (unlikely(status & MONITOR_PANIC))
+		goto err_out;
+
 	/*
 	 *	Wait for the adapter to be up and running.
 	 */
@@ -456,6 +463,12 @@ static int aac_src_check_health(struct aac_dev *dev)
 	 *	Everything is OK
 	 */
 	return 0;
+
+err_out:
+	return -1;
+
+err_blink:
+	return (status > 16) & 0xFF;
 }
 
 static inline u32 aac_get_vector(struct aac_dev *dev)

From 09624645e1e85df8d68b04de6e0607d696268333 Mon Sep 17 00:00:00 2001
From: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Date: Thu, 16 Feb 2017 12:51:22 -0800
Subject: [PATCH 0139/1911] scsi: aacraid: Save adapter fib log before an IOP
 reset

Currently  the adapter firmware does not save outstanding I/O's log
information  when an IOP reset is triggered. This is problematic when
trying to root cause and debug issues.

Fixed by adding sync command to trigger I/O log file save in the adapter
firmware before issuing an IOP reset.

Signed-off-by: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Reviewed-by: David Carroll <David.Carroll@microsemi.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/aacraid/aachba.c  |  4 ++++
 drivers/scsi/aacraid/aacraid.h |  6 ++++++
 drivers/scsi/aacraid/src.c     | 17 +++++++++++++++++
 3 files changed, 27 insertions(+)

diff --git a/drivers/scsi/aacraid/aachba.c b/drivers/scsi/aacraid/aachba.c
index 3ede50f25f46c7..e3e93def722b04 100644
--- a/drivers/scsi/aacraid/aachba.c
+++ b/drivers/scsi/aacraid/aachba.c
@@ -294,6 +294,10 @@ MODULE_PARM_DESC(aif_timeout, "The duration of time in seconds to wait for"
 	"deregistering them. This is typically adjusted for heavily burdened"
 	" systems.");
 
+int aac_fib_dump;
+module_param(aac_fib_dump, int, 0644);
+MODULE_PARM_DESC(aac_fib_dump, "Dump controller fibs prior to IOP_RESET 0=off, 1=on");
+
 int numacb = -1;
 module_param(numacb, int, S_IRUGO|S_IWUSR);
 MODULE_PARM_DESC(numacb, "Request a limit to the number of adapter control"
diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h
index 9281e729548071..622fd69b35ccb1 100644
--- a/drivers/scsi/aacraid/aacraid.h
+++ b/drivers/scsi/aacraid/aacraid.h
@@ -1444,6 +1444,10 @@ struct aac_supplement_adapter_info
 #define AAC_OPTION_VARIABLE_BLOCK_SIZE	cpu_to_le32(0x00040000)
 /* 240 simple volume support */
 #define AAC_OPTION_SUPPORTED_240_VOLUMES cpu_to_le32(0x10000000)
+/*
+ * Supports FIB dump sync command send prior to IOP_RESET
+ */
+#define AAC_OPTION_SUPPORTED3_IOP_RESET_FIB_DUMP	cpu_to_le32(0x00004000)
 #define AAC_SIS_VERSION_V3	3
 #define AAC_SIS_SLOT_UNKNOWN	0xFF
 
@@ -2483,6 +2487,7 @@ struct aac_hba_info {
 #define GET_DRIVER_BUFFER_PROPERTIES	0x00000023
 #define RCV_TEMP_READINGS		0x00000025
 #define GET_COMM_PREFERRED_SETTINGS	0x00000026
+#define IOP_RESET_FW_FIB_DUMP		0x00000034
 #define IOP_RESET			0x00001000
 #define IOP_RESET_ALWAYS		0x00001001
 #define RE_INIT_ADAPTER			0x000000ee
@@ -2686,4 +2691,5 @@ extern int aac_commit;
 extern int update_interval;
 extern int check_interval;
 extern int aac_check_reset;
+extern int aac_fib_dump;
 #endif
diff --git a/drivers/scsi/aacraid/src.c b/drivers/scsi/aacraid/src.c
index 71aaabde6b573b..2e5338dec621fb 100644
--- a/drivers/scsi/aacraid/src.c
+++ b/drivers/scsi/aacraid/src.c
@@ -679,10 +679,27 @@ void aac_set_intx_mode(struct aac_dev *dev)
 	}
 }
 
+static void aac_dump_fw_fib_iop_reset(struct aac_dev *dev)
+{
+	__le32 supported_options3;
+
+	if (!aac_fib_dump)
+		return;
+
+	supported_options3  = dev->supplement_adapter_info.supported_options3;
+	if (!(supported_options3 & AAC_OPTION_SUPPORTED3_IOP_RESET_FIB_DUMP))
+		return;
+
+	aac_adapter_sync_cmd(dev, IOP_RESET_FW_FIB_DUMP,
+			0, 0, 0,  0, 0, 0, NULL, NULL, NULL, NULL, NULL);
+}
+
 static void aac_send_iop_reset(struct aac_dev *dev, int bled)
 {
 	u32 var, reset_mask;
 
+	aac_dump_fw_fib_iop_reset(dev);
+
 	bled = aac_adapter_sync_cmd(dev, IOP_RESET_ALWAYS,
 				    0, 0, 0, 0, 0, 0, &var,
 				    &reset_mask, NULL, NULL, NULL);

From d844752e1801099f92c178845f56412861a2b4af Mon Sep 17 00:00:00 2001
From: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Date: Thu, 16 Feb 2017 12:51:23 -0800
Subject: [PATCH 0140/1911] scsi: aacraid: Fix a potential spinlock double
 unlock bug

The driver does not unlock the reply  queue spin lock after handling SMART
adapter events. Instead it might attempt to unlock an already unlocked
spin lock.

Fixed by making sure the driver locks the spin lock before freeing it.

Thank you dan for finding this issue out.

Fixes: 6223a39fe6fbbeef (scsi: aacraid: Added support for hotplug)
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Reviewed-by: David Carroll <David.Carroll@microsemi.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/aacraid/commsup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c
index 1f716c01708bc5..a2ea70d8a13ac6 100644
--- a/drivers/scsi/aacraid/commsup.c
+++ b/drivers/scsi/aacraid/commsup.c
@@ -2215,7 +2215,7 @@ static void aac_process_events(struct aac_dev *dev)
 			/* Thor AIF */
 			aac_handle_sa_aif(dev, fib);
 			aac_fib_adapter_complete(fib, (u16)sizeof(u32));
-			continue;
+			goto free_fib;
 		}
 		/*
 		 *	We will process the FIB here or pass it to a

From 0662cc968aceaca34848e63f431e585f4f71f746 Mon Sep 17 00:00:00 2001
From: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Date: Thu, 16 Feb 2017 12:51:24 -0800
Subject: [PATCH 0141/1911] scsi: aacraid: Update driver version

Updated driver version to 50792

Signed-off-by: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Reviewed-by: David Carroll <David.Carroll@microsemi.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/aacraid/aacraid.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h
index 622fd69b35ccb1..d036a806f31c47 100644
--- a/drivers/scsi/aacraid/aacraid.h
+++ b/drivers/scsi/aacraid/aacraid.h
@@ -97,7 +97,7 @@ enum {
 #define	PMC_GLOBAL_INT_BIT0		0x00000001
 
 #ifndef AAC_DRIVER_BUILD
-# define AAC_DRIVER_BUILD 50740
+# define AAC_DRIVER_BUILD 50792
 # define AAC_DRIVER_BRANCH "-custom"
 #endif
 #define MAXIMUM_NUM_CONTAINERS	32

From ed10858eadd4988260c6bc7d75fc25176342b5a7 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 17 Feb 2017 16:03:52 +0100
Subject: [PATCH 0142/1911] scsi: smartpqi: fix time handling

When we have turned off RTC support, the smartpqi driver fails to build:

ERROR: "rtc_time64_to_tm" [drivers/scsi/smartpqi/smartpqi.ko] undefined!

This is easily avoided by using the generic 'struct tm' based helper rather
than the RTC specific one. While fixing this, I noticed that even though
the driver uses time64_t for storing seconds, it gets them from the
old 32-bit struct timeval. To address this, we can simplify the code
by calling ktime_get_real_seconds() directly.

Fixes: 6c223761eb54 ("smartpqi: initial commit of Microsemi smartpqi driver")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Don Brace <don.brace@microsemi.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/smartpqi/smartpqi_init.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c
index 11c0dfb3dfa392..657ad15682a34c 100644
--- a/drivers/scsi/smartpqi/smartpqi_init.c
+++ b/drivers/scsi/smartpqi/smartpqi_init.c
@@ -534,8 +534,7 @@ static int pqi_write_current_time_to_host_wellness(
 	size_t buffer_length;
 	time64_t local_time;
 	unsigned int year;
-	struct timeval time;
-	struct rtc_time tm;
+	struct tm tm;
 
 	buffer_length = sizeof(*buffer);
 
@@ -552,9 +551,8 @@ static int pqi_write_current_time_to_host_wellness(
 	put_unaligned_le16(sizeof(buffer->time),
 		&buffer->time_length);
 
-	do_gettimeofday(&time);
-	local_time = time.tv_sec - (sys_tz.tz_minuteswest * 60);
-	rtc_time64_to_tm(local_time, &tm);
+	local_time = ktime_get_real_seconds();
+	time64_to_tm(local_time, -sys_tz.tz_minuteswest * 60, &tm);
 	year = tm.tm_year + 1900;
 
 	buffer->time[0] = bin2bcd(tm.tm_hour);

From f22aaec97d889f3fbf89b185309bdc98b8202eda Mon Sep 17 00:00:00 2001
From: Subhash Jadavani <subhashj@codeaurora.org>
Date: Fri, 17 Feb 2017 13:53:41 -0800
Subject: [PATCH 0143/1911] scsi: ufs-qcom: remove redundant condition check

Dan Carpenter <dan.carpenter@oracle.com> reported this:

The patch 9c46b8676271: "scsi: ufs-qcom: dump additional testbus
registers" from Feb 3, 2017, leads to the following static checker
warning:

    drivers/scsi/ufs/ufs-qcom.c:1531 ufs_qcom_testbus_cfg_is_ok()
    warn: impossible condition
                '(host->testbus.select_minor > 255) => (0-255 > 255)'

drivers/scsi/ufs/ufs-qcom.c
  1517  static bool ufs_qcom_testbus_cfg_is_ok(struct ufs_qcom_host *host)
  1518  {
  1519          if (host->testbus.select_major >= TSTBUS_MAX) {
  1520               dev_err(host->hba->dev,
  1521                 "%s: UFS_CFG1[TEST_BUS_SEL} may not equal 0x%05X\n",
  1522                  __func__, host->testbus.select_major);
  1523                  return false;
  1524          }
  1525
  1526          /*
  1527           * Not performing check for each individual select_major
  1528           * mappings of select_minor, since there is no harm in
  1529           * configuring a non-existent select_minor
  1530           */
  1531          if (host->testbus.select_minor > 0xFF) {
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

It might make sense to keep this check.  I don't know.  But it's
confusing that 0xFF is a magic number.  Better to make it a define.

  1532                  dev_err(host->hba->dev,
  1533                       "%s: 0x%05X is not a legal testbus option\n",
  1534                        __func__, host->testbus.select_minor);
  1535                  return false;
  1536          }
  1537
  1538          return true;
  1539  }
---

As data type of "select_minor" is u8, above check is redundant. This
change removes it.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Subhash Jadavani <subhashj@codeaurora.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/ufs/ufs-qcom.c | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/drivers/scsi/ufs/ufs-qcom.c b/drivers/scsi/ufs/ufs-qcom.c
index ce5d023c1c915c..c87d770b519a7c 100644
--- a/drivers/scsi/ufs/ufs-qcom.c
+++ b/drivers/scsi/ufs/ufs-qcom.c
@@ -1523,18 +1523,6 @@ static bool ufs_qcom_testbus_cfg_is_ok(struct ufs_qcom_host *host)
 		return false;
 	}
 
-	/*
-	 * Not performing check for each individual select_major
-	 * mappings of select_minor, since there is no harm in
-	 * configuring a non-existent select_minor
-	 */
-	if (host->testbus.select_minor > 0xFF) {
-		dev_err(host->hba->dev,
-			"%s: 0x%05X is not a legal testbus option\n",
-			__func__, host->testbus.select_minor);
-		return false;
-	}
-
 	return true;
 }
 

From 857de6e00778738dc3d61f75acbac35bdc48e533 Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Fri, 17 Feb 2017 09:02:45 +0100
Subject: [PATCH 0144/1911] scsi: use 'scsi_device_from_queue()' for scsi_dh

The device handler needs to check if a given queue belongs to a scsi
device; only then does it make sense to attach a device handler.

[mkp: dropped flags]

Cc: <stable@vger.kernel.org>
Signed-off-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/scsi_dh.c     | 22 ++++------------------
 drivers/scsi/scsi_lib.c    | 23 +++++++++++++++++++++++
 include/scsi/scsi_device.h |  1 +
 3 files changed, 28 insertions(+), 18 deletions(-)

diff --git a/drivers/scsi/scsi_dh.c b/drivers/scsi/scsi_dh.c
index b8d3b97b217ac5..84addee05be67a 100644
--- a/drivers/scsi/scsi_dh.c
+++ b/drivers/scsi/scsi_dh.c
@@ -219,20 +219,6 @@ int scsi_unregister_device_handler(struct scsi_device_handler *scsi_dh)
 }
 EXPORT_SYMBOL_GPL(scsi_unregister_device_handler);
 
-static struct scsi_device *get_sdev_from_queue(struct request_queue *q)
-{
-	struct scsi_device *sdev;
-	unsigned long flags;
-
-	spin_lock_irqsave(q->queue_lock, flags);
-	sdev = q->queuedata;
-	if (!sdev || !get_device(&sdev->sdev_gendev))
-		sdev = NULL;
-	spin_unlock_irqrestore(q->queue_lock, flags);
-
-	return sdev;
-}
-
 /*
  * scsi_dh_activate - activate the path associated with the scsi_device
  *      corresponding to the given request queue.
@@ -251,7 +237,7 @@ int scsi_dh_activate(struct request_queue *q, activate_complete fn, void *data)
 	struct scsi_device *sdev;
 	int err = SCSI_DH_NOSYS;
 
-	sdev = get_sdev_from_queue(q);
+	sdev = scsi_device_from_queue(q);
 	if (!sdev) {
 		if (fn)
 			fn(data, err);
@@ -298,7 +284,7 @@ int scsi_dh_set_params(struct request_queue *q, const char *params)
 	struct scsi_device *sdev;
 	int err = -SCSI_DH_NOSYS;
 
-	sdev = get_sdev_from_queue(q);
+	sdev = scsi_device_from_queue(q);
 	if (!sdev)
 		return err;
 
@@ -321,7 +307,7 @@ int scsi_dh_attach(struct request_queue *q, const char *name)
 	struct scsi_device_handler *scsi_dh;
 	int err = 0;
 
-	sdev = get_sdev_from_queue(q);
+	sdev = scsi_device_from_queue(q);
 	if (!sdev)
 		return -ENODEV;
 
@@ -359,7 +345,7 @@ const char *scsi_dh_attached_handler_name(struct request_queue *q, gfp_t gfp)
 	struct scsi_device *sdev;
 	const char *handler_name = NULL;
 
-	sdev = get_sdev_from_queue(q);
+	sdev = scsi_device_from_queue(q);
 	if (!sdev)
 		return NULL;
 
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 912fbc3b4543dd..39b2d727f033ef 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -2231,6 +2231,29 @@ void scsi_mq_destroy_tags(struct Scsi_Host *shost)
 	blk_mq_free_tag_set(&shost->tag_set);
 }
 
+/**
+ * scsi_device_from_queue - return sdev associated with a request_queue
+ * @q: The request queue to return the sdev from
+ *
+ * Return the sdev associated with a request queue or NULL if the
+ * request_queue does not reference a SCSI device.
+ */
+struct scsi_device *scsi_device_from_queue(struct request_queue *q)
+{
+	struct scsi_device *sdev = NULL;
+
+	if (q->mq_ops) {
+		if (q->mq_ops == &scsi_mq_ops)
+			sdev = q->queuedata;
+	} else if (q->request_fn == scsi_request_fn)
+		sdev = q->queuedata;
+	if (!sdev || !get_device(&sdev->sdev_gendev))
+		sdev = NULL;
+
+	return sdev;
+}
+EXPORT_SYMBOL_GPL(scsi_device_from_queue);
+
 /*
  * Function:    scsi_block_requests()
  *
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index 8990e580b278bd..be41c76ddd489b 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -315,6 +315,7 @@ extern void scsi_remove_device(struct scsi_device *);
 extern int scsi_unregister_device_handler(struct scsi_device_handler *scsi_dh);
 void scsi_attach_vpd(struct scsi_device *sdev);
 
+extern struct scsi_device *scsi_device_from_queue(struct request_queue *q);
 extern int scsi_device_get(struct scsi_device *);
 extern void scsi_device_put(struct scsi_device *);
 extern struct scsi_device *scsi_device_lookup(struct Scsi_Host *,

From 943445200b049d5179b95297e5372d399c8ab0e2 Mon Sep 17 00:00:00 2001
From: "Matthew R. Ochs" <mrochs@linux.vnet.ibm.com>
Date: Thu, 16 Feb 2017 21:39:32 -0600
Subject: [PATCH 0145/1911] scsi: cxlflash: Enable PCI device ID for future IBM
 CXL Flash AFU

Add support for a future IBM Coherent Accelerator (CXL) flash AFU with
an ID of 0x0624.

Signed-off-by: Matthew R. Ochs <mrochs@linux.vnet.ibm.com>
Signed-off-by: Uma Krishnan <ukrishn@linux.vnet.ibm.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/cxlflash/main.c | 4 ++++
 drivers/scsi/cxlflash/main.h | 1 +
 2 files changed, 5 insertions(+)

diff --git a/drivers/scsi/cxlflash/main.c b/drivers/scsi/cxlflash/main.c
index 7069639e92bc40..3061d8045382e4 100644
--- a/drivers/scsi/cxlflash/main.c
+++ b/drivers/scsi/cxlflash/main.c
@@ -2259,6 +2259,8 @@ static struct dev_dependent_vals dev_corsa_vals = { CXLFLASH_MAX_SECTORS,
 					0ULL };
 static struct dev_dependent_vals dev_flash_gt_vals = { CXLFLASH_MAX_SECTORS,
 					CXLFLASH_NOTIFY_SHUTDOWN };
+static struct dev_dependent_vals dev_briard_vals = { CXLFLASH_MAX_SECTORS,
+					CXLFLASH_NOTIFY_SHUTDOWN };
 
 /*
  * PCI device binding table
@@ -2268,6 +2270,8 @@ static struct pci_device_id cxlflash_pci_table[] = {
 	 PCI_ANY_ID, PCI_ANY_ID, 0, 0, (kernel_ulong_t)&dev_corsa_vals},
 	{PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_FLASH_GT,
 	 PCI_ANY_ID, PCI_ANY_ID, 0, 0, (kernel_ulong_t)&dev_flash_gt_vals},
+	{PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_BRIARD,
+	 PCI_ANY_ID, PCI_ANY_ID, 0, 0, (kernel_ulong_t)&dev_briard_vals},
 	{}
 };
 
diff --git a/drivers/scsi/cxlflash/main.h b/drivers/scsi/cxlflash/main.h
index e43545c86bcf9f..0be2261e631224 100644
--- a/drivers/scsi/cxlflash/main.h
+++ b/drivers/scsi/cxlflash/main.h
@@ -25,6 +25,7 @@
 
 #define PCI_DEVICE_ID_IBM_CORSA		0x04F0
 #define PCI_DEVICE_ID_IBM_FLASH_GT	0x0600
+#define PCI_DEVICE_ID_IBM_BRIARD	0x0624
 
 /* Since there is only one target, make it 0 */
 #define CXLFLASH_TARGET		0

From 8ea73db486cda442f0671f4bc9c03a76be398a28 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sun, 12 Feb 2017 13:52:25 -0800
Subject: [PATCH 0146/1911] scsi: lpfc: Correct WQ creation for pagesize

Correct WQ creation for pagesize

The driver was calculating the adapter command pagesize indicator from
the system pagesize. However, the buffers the driver allocates are only
one size (SLI4_PAGE_SIZE), so no calculation was necessary.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_hw4.h | 2 ++
 drivers/scsi/lpfc/lpfc_sli.c | 9 +++++----
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h
index 5646699b0516b2..964a1fdb076bba 100644
--- a/drivers/scsi/lpfc/lpfc_hw4.h
+++ b/drivers/scsi/lpfc/lpfc_hw4.h
@@ -1186,6 +1186,7 @@ struct lpfc_mbx_wq_create {
 #define lpfc_mbx_wq_create_page_size_SHIFT	0
 #define lpfc_mbx_wq_create_page_size_MASK	0x000000FF
 #define lpfc_mbx_wq_create_page_size_WORD	word1
+#define LPFC_WQ_PAGE_SIZE_4096	0x1
 #define lpfc_mbx_wq_create_wqe_size_SHIFT	8
 #define lpfc_mbx_wq_create_wqe_size_MASK	0x0000000F
 #define lpfc_mbx_wq_create_wqe_size_WORD	word1
@@ -1257,6 +1258,7 @@ struct rq_context {
 #define lpfc_rq_context_page_size_SHIFT	0		/* Version 1 Only */
 #define lpfc_rq_context_page_size_MASK	0x000000FF
 #define lpfc_rq_context_page_size_WORD	word0
+#define	LPFC_RQ_PAGE_SIZE_4096	0x1
 	uint32_t reserved1;
 	uint32_t word2;
 #define lpfc_rq_context_cq_id_SHIFT	16
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index d977a472f89f06..586984dce3a43f 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -13722,7 +13722,7 @@ lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq,
 			       LPFC_WQ_WQE_SIZE_128);
 			bf_set(lpfc_mbx_wq_create_page_size,
 			       &wq_create->u.request_1,
-			       (PAGE_SIZE/SLI4_PAGE_SIZE));
+			       LPFC_WQ_PAGE_SIZE_4096);
 			page = wq_create->u.request_1.page;
 			break;
 		}
@@ -13748,8 +13748,9 @@ lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq,
 			       LPFC_WQ_WQE_SIZE_128);
 			break;
 		}
-		bf_set(lpfc_mbx_wq_create_page_size, &wq_create->u.request_1,
-		       (PAGE_SIZE/SLI4_PAGE_SIZE));
+		bf_set(lpfc_mbx_wq_create_page_size,
+		       &wq_create->u.request_1,
+		       LPFC_WQ_PAGE_SIZE_4096);
 		page = wq_create->u.request_1.page;
 		break;
 	default:
@@ -13935,7 +13936,7 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq,
 		       LPFC_RQE_SIZE_8);
 		bf_set(lpfc_rq_context_page_size,
 		       &rq_create->u.request.context,
-		       (PAGE_SIZE/SLI4_PAGE_SIZE));
+		       LPFC_RQ_PAGE_SIZE_4096);
 	} else {
 		switch (hrq->entry_count) {
 		default:

From 45ffac1976c580ac20d926257b2f6320ce1b210f Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 12 Feb 2017 13:52:26 -0800
Subject: [PATCH 0147/1911] scsi: lpfc: use pci_irq_alloc_vectors and
 pci_irq_free_vectors

This avoids having to store the msix_entries array and simpliefies the
shutdown and cleanup path a lot.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc.h      |   2 -
 drivers/scsi/lpfc/lpfc_init.c | 240 ++++++++--------------------------
 drivers/scsi/lpfc/lpfc_sli4.h |   1 -
 3 files changed, 54 insertions(+), 189 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index 6593b073c52483..64022d79117b78 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -878,8 +878,6 @@ struct lpfc_hba {
 	enum intr_type_t intr_type;
 	uint32_t intr_mode;
 #define LPFC_INTR_ERROR	0xFFFFFFFF
-	struct msix_entry msix_entries[LPFC_MSIX_VECTORS];
-
 	struct list_head port_list;
 	struct lpfc_vport *pport;	/* physical lpfc_vport pointer */
 	uint16_t max_vpi;		/* Maximum virtual nports */
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 64717c171b1557..8cffad192f1734 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -5509,17 +5509,6 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 		goto out_free_fcf_rr_bmask;
 	}
 
-	phba->sli4_hba.msix_entries = kzalloc((sizeof(struct msix_entry) *
-				  (fof_vectors +
-				   phba->cfg_fcp_io_channel)), GFP_KERNEL);
-	if (!phba->sli4_hba.msix_entries) {
-		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"2573 Failed allocate memory for msi-x "
-				"interrupt vector entries\n");
-		rc = -ENOMEM;
-		goto out_free_fcp_eq_hdl;
-	}
-
 	phba->sli4_hba.cpu_map = kzalloc((sizeof(struct lpfc_vector_map_info) *
 					 phba->sli4_hba.num_present_cpu),
 					 GFP_KERNEL);
@@ -5528,7 +5517,7 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 				"3327 Failed allocate memory for msi-x "
 				"interrupt vector mapping\n");
 		rc = -ENOMEM;
-		goto out_free_msix;
+		goto out_free_fcp_eq_hdl;
 	}
 	if (lpfc_used_cpu == NULL) {
 		lpfc_used_cpu = kzalloc((sizeof(uint16_t) * lpfc_present_cpu),
@@ -5539,7 +5528,7 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 					"interrupt vector mapping\n");
 			kfree(phba->sli4_hba.cpu_map);
 			rc = -ENOMEM;
-			goto out_free_msix;
+			goto out_free_fcp_eq_hdl;
 		}
 		for (i = 0; i < lpfc_present_cpu; i++)
 			lpfc_used_cpu[i] = LPFC_VECTOR_MAP_EMPTY;
@@ -5574,8 +5563,6 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 
 	return 0;
 
-out_free_msix:
-	kfree(phba->sli4_hba.msix_entries);
 out_free_fcp_eq_hdl:
 	kfree(phba->sli4_hba.fcp_eq_hdl);
 out_free_fcf_rr_bmask:
@@ -5611,9 +5598,6 @@ lpfc_sli4_driver_resource_unset(struct lpfc_hba *phba)
 	phba->sli4_hba.num_online_cpu = 0;
 	phba->sli4_hba.curr_disp_cpu = 0;
 
-	/* Free memory allocated for msi-x interrupt vector entries */
-	kfree(phba->sli4_hba.msix_entries);
-
 	/* Free memory allocated for fast-path work queue handles */
 	kfree(phba->sli4_hba.fcp_eq_hdl);
 
@@ -8484,16 +8468,7 @@ lpfc_sli4_pci_mem_unset(struct lpfc_hba *phba)
  * @phba: pointer to lpfc hba data structure.
  *
  * This routine is invoked to enable the MSI-X interrupt vectors to device
- * with SLI-3 interface specs. The kernel function pci_enable_msix_exact()
- * is called to enable the MSI-X vectors. Note that pci_enable_msix_exact(),
- * once invoked, enables either all or nothing, depending on the current
- * availability of PCI vector resources. The device driver is responsible
- * for calling the individual request_irq() to register each MSI-X vector
- * with a interrupt handler, which is done in this function. Note that
- * later when device is unloading, the driver should always call free_irq()
- * on all MSI-X vectors it has done request_irq() on before calling
- * pci_disable_msix(). Failure to do so results in a BUG_ON() and a device
- * will be left with MSI-X enabled and leaks its vectors.
+ * with SLI-3 interface specs.
  *
  * Return codes
  *   0 - successful
@@ -8502,33 +8477,24 @@ lpfc_sli4_pci_mem_unset(struct lpfc_hba *phba)
 static int
 lpfc_sli_enable_msix(struct lpfc_hba *phba)
 {
-	int rc, i;
+	int rc;
 	LPFC_MBOXQ_t *pmb;
 
 	/* Set up MSI-X multi-message vectors */
-	for (i = 0; i < LPFC_MSIX_VECTORS; i++)
-		phba->msix_entries[i].entry = i;
-
-	/* Configure MSI-X capability structure */
-	rc = pci_enable_msix_exact(phba->pcidev, phba->msix_entries,
-				   LPFC_MSIX_VECTORS);
-	if (rc) {
+	rc = pci_alloc_irq_vectors(phba->pcidev,
+			LPFC_MSIX_VECTORS, LPFC_MSIX_VECTORS, PCI_IRQ_MSIX);
+	if (rc < 0) {
 		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
 				"0420 PCI enable MSI-X failed (%d)\n", rc);
 		goto vec_fail_out;
 	}
-	for (i = 0; i < LPFC_MSIX_VECTORS; i++)
-		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-				"0477 MSI-X entry[%d]: vector=x%x "
-				"message=%d\n", i,
-				phba->msix_entries[i].vector,
-				phba->msix_entries[i].entry);
+
 	/*
 	 * Assign MSI-X vectors to interrupt handlers
 	 */
 
 	/* vector-0 is associated to slow-path handler */
-	rc = request_irq(phba->msix_entries[0].vector,
+	rc = request_irq(pci_irq_vector(phba->pcidev, 0),
 			 &lpfc_sli_sp_intr_handler, 0,
 			 LPFC_SP_DRIVER_HANDLER_NAME, phba);
 	if (rc) {
@@ -8539,7 +8505,7 @@ lpfc_sli_enable_msix(struct lpfc_hba *phba)
 	}
 
 	/* vector-1 is associated to fast-path handler */
-	rc = request_irq(phba->msix_entries[1].vector,
+	rc = request_irq(pci_irq_vector(phba->pcidev, 1),
 			 &lpfc_sli_fp_intr_handler, 0,
 			 LPFC_FP_DRIVER_HANDLER_NAME, phba);
 
@@ -8584,41 +8550,20 @@ lpfc_sli_enable_msix(struct lpfc_hba *phba)
 
 mem_fail_out:
 	/* free the irq already requested */
-	free_irq(phba->msix_entries[1].vector, phba);
+	free_irq(pci_irq_vector(phba->pcidev, 1), phba);
 
 irq_fail_out:
 	/* free the irq already requested */
-	free_irq(phba->msix_entries[0].vector, phba);
+	free_irq(pci_irq_vector(phba->pcidev, 0), phba);
 
 msi_fail_out:
 	/* Unconfigure MSI-X capability structure */
-	pci_disable_msix(phba->pcidev);
+	pci_free_irq_vectors(phba->pcidev);
 
 vec_fail_out:
 	return rc;
 }
 
-/**
- * lpfc_sli_disable_msix - Disable MSI-X interrupt mode on SLI-3 device.
- * @phba: pointer to lpfc hba data structure.
- *
- * This routine is invoked to release the MSI-X vectors and then disable the
- * MSI-X interrupt mode to device with SLI-3 interface spec.
- **/
-static void
-lpfc_sli_disable_msix(struct lpfc_hba *phba)
-{
-	int i;
-
-	/* Free up MSI-X multi-message vectors */
-	for (i = 0; i < LPFC_MSIX_VECTORS; i++)
-		free_irq(phba->msix_entries[i].vector, phba);
-	/* Disable MSI-X */
-	pci_disable_msix(phba->pcidev);
-
-	return;
-}
-
 /**
  * lpfc_sli_enable_msi - Enable MSI interrupt mode on SLI-3 device.
  * @phba: pointer to lpfc hba data structure.
@@ -8658,24 +8603,6 @@ lpfc_sli_enable_msi(struct lpfc_hba *phba)
 	return rc;
 }
 
-/**
- * lpfc_sli_disable_msi - Disable MSI interrupt mode to SLI-3 device.
- * @phba: pointer to lpfc hba data structure.
- *
- * This routine is invoked to disable the MSI interrupt mode to device with
- * SLI-3 interface spec. The driver calls free_irq() on MSI vector it has
- * done request_irq() on before calling pci_disable_msi(). Failure to do so
- * results in a BUG_ON() and a device will be left with MSI enabled and leaks
- * its vector.
- */
-static void
-lpfc_sli_disable_msi(struct lpfc_hba *phba)
-{
-	free_irq(phba->pcidev->irq, phba);
-	pci_disable_msi(phba->pcidev);
-	return;
-}
-
 /**
  * lpfc_sli_enable_intr - Enable device interrupt to SLI-3 device.
  * @phba: pointer to lpfc hba data structure.
@@ -8747,19 +8674,20 @@ lpfc_sli_enable_intr(struct lpfc_hba *phba, uint32_t cfg_mode)
 static void
 lpfc_sli_disable_intr(struct lpfc_hba *phba)
 {
-	/* Disable the currently initialized interrupt mode */
+	int nr_irqs, i;
+
 	if (phba->intr_type == MSIX)
-		lpfc_sli_disable_msix(phba);
-	else if (phba->intr_type == MSI)
-		lpfc_sli_disable_msi(phba);
-	else if (phba->intr_type == INTx)
-		free_irq(phba->pcidev->irq, phba);
+		nr_irqs = LPFC_MSIX_VECTORS;
+	else
+		nr_irqs = 1;
+
+	for (i = 0; i < nr_irqs; i++)
+		free_irq(pci_irq_vector(phba->pcidev, i), phba);
+	pci_free_irq_vectors(phba->pcidev);
 
 	/* Reset interrupt management states */
 	phba->intr_type = NONE;
 	phba->sli.slistat.sli_intr = 0;
-
-	return;
 }
 
 /**
@@ -8915,7 +8843,7 @@ lpfc_sli4_set_affinity(struct lpfc_hba *phba, int vectors)
 			lpfc_used_cpu[cpu] = phys_id;
 
 		/* Associate vector with selected CPU */
-		cpup->irq = phba->sli4_hba.msix_entries[idx].vector;
+		cpup->irq = pci_irq_vector(phba->pcidev, idx);
 
 		/* Associate IO channel with selected CPU */
 		cpup->channel_id = idx;
@@ -8925,14 +8853,14 @@ lpfc_sli4_set_affinity(struct lpfc_hba *phba, int vectors)
 			first_cpu = cpu;
 
 		/* Now affinitize to the selected CPU */
-		i = irq_set_affinity_hint(phba->sli4_hba.msix_entries[idx].
-					  vector, get_cpu_mask(cpu));
+		i = irq_set_affinity_hint(pci_irq_vector(phba->pcidev, idx),
+					  get_cpu_mask(cpu));
 
 		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
 				"3330 Set Affinity: CPU %d channel %d "
 				"irq %d (%x)\n",
 				cpu, cpup->channel_id,
-				phba->sli4_hba.msix_entries[idx].vector, i);
+				pci_irq_vector(phba->pcidev, idx), i);
 
 		/* Spread vector mapping across multple physical CPU nodes */
 		phys_id++;
@@ -9047,14 +8975,7 @@ lpfc_sli4_set_affinity(struct lpfc_hba *phba, int vectors)
  * @phba: pointer to lpfc hba data structure.
  *
  * This routine is invoked to enable the MSI-X interrupt vectors to device
- * with SLI-4 interface spec. The kernel function pci_enable_msix_range()
- * is called to enable the MSI-X vectors. The device driver is responsible
- * for calling the individual request_irq() to register each MSI-X vector
- * with a interrupt handler, which is done in this function. Note that
- * later when device is unloading, the driver should always call free_irq()
- * on all MSI-X vectors it has done request_irq() on before calling
- * pci_disable_msix(). Failure to do so results in a BUG_ON() and a device
- * will be left with MSI-X enabled and leaks its vectors.
+ * with SLI-4 interface spec.
  *
  * Return codes
  * 0 - successful
@@ -9066,17 +8987,11 @@ lpfc_sli4_enable_msix(struct lpfc_hba *phba)
 	int vectors, rc, index;
 
 	/* Set up MSI-X multi-message vectors */
-	for (index = 0; index < phba->cfg_fcp_io_channel; index++)
-		phba->sli4_hba.msix_entries[index].entry = index;
-
-	/* Configure MSI-X capability structure */
 	vectors = phba->cfg_fcp_io_channel;
-	if (phba->cfg_fof) {
-		phba->sli4_hba.msix_entries[index].entry = index;
+	if (phba->cfg_fof)
 		vectors++;
-	}
-	rc = pci_enable_msix_range(phba->pcidev, phba->sli4_hba.msix_entries,
-				   2, vectors);
+
+	rc = pci_alloc_irq_vectors(phba->pcidev, 2, vectors, PCI_IRQ_MSIX);
 	if (rc < 0) {
 		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
 				"0484 PCI enable MSI-X failed (%d)\n", rc);
@@ -9084,14 +8999,6 @@ lpfc_sli4_enable_msix(struct lpfc_hba *phba)
 	}
 	vectors = rc;
 
-	/* Log MSI-X vector assignment */
-	for (index = 0; index < vectors; index++)
-		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-				"0489 MSI-X entry[%d]: vector=x%x "
-				"message=%d\n", index,
-				phba->sli4_hba.msix_entries[index].vector,
-				phba->sli4_hba.msix_entries[index].entry);
-
 	/* Assign MSI-X vectors to interrupt handlers */
 	for (index = 0; index < vectors; index++) {
 		memset(&phba->sli4_hba.handler_name[index], 0, 16);
@@ -9103,14 +9010,12 @@ lpfc_sli4_enable_msix(struct lpfc_hba *phba)
 		phba->sli4_hba.fcp_eq_hdl[index].phba = phba;
 		atomic_set(&phba->sli4_hba.fcp_eq_hdl[index].fcp_eq_in_use, 1);
 		if (phba->cfg_fof && (index == (vectors - 1)))
-			rc = request_irq(
-				phba->sli4_hba.msix_entries[index].vector,
+			rc = request_irq(pci_irq_vector(phba->pcidev, index),
 				 &lpfc_sli4_fof_intr_handler, 0,
 				 (char *)&phba->sli4_hba.handler_name[index],
 				 &phba->sli4_hba.fcp_eq_hdl[index]);
 		else
-			rc = request_irq(
-				phba->sli4_hba.msix_entries[index].vector,
+			rc = request_irq(pci_irq_vector(phba->pcidev, index),
 				 &lpfc_sli4_hba_intr_handler, 0,
 				 (char *)&phba->sli4_hba.handler_name[index],
 				 &phba->sli4_hba.fcp_eq_hdl[index]);
@@ -9140,48 +9045,19 @@ lpfc_sli4_enable_msix(struct lpfc_hba *phba)
 cfg_fail_out:
 	/* free the irq already requested */
 	for (--index; index >= 0; index--) {
-		irq_set_affinity_hint(phba->sli4_hba.msix_entries[index].
-					  vector, NULL);
-		free_irq(phba->sli4_hba.msix_entries[index].vector,
-			 &phba->sli4_hba.fcp_eq_hdl[index]);
+		int irq = pci_irq_vector(phba->pcidev, index);
+
+		irq_set_affinity_hint(irq, NULL);
+		free_irq(irq, &phba->sli4_hba.fcp_eq_hdl[index]);
 	}
 
 	/* Unconfigure MSI-X capability structure */
-	pci_disable_msix(phba->pcidev);
+	pci_free_irq_vectors(phba->pcidev);
 
 vec_fail_out:
 	return rc;
 }
 
-/**
- * lpfc_sli4_disable_msix - Disable MSI-X interrupt mode to SLI-4 device
- * @phba: pointer to lpfc hba data structure.
- *
- * This routine is invoked to release the MSI-X vectors and then disable the
- * MSI-X interrupt mode to device with SLI-4 interface spec.
- **/
-static void
-lpfc_sli4_disable_msix(struct lpfc_hba *phba)
-{
-	int index;
-
-	/* Free up MSI-X multi-message vectors */
-	for (index = 0; index < phba->cfg_fcp_io_channel; index++) {
-		irq_set_affinity_hint(phba->sli4_hba.msix_entries[index].
-					  vector, NULL);
-		free_irq(phba->sli4_hba.msix_entries[index].vector,
-			 &phba->sli4_hba.fcp_eq_hdl[index]);
-	}
-	if (phba->cfg_fof) {
-		free_irq(phba->sli4_hba.msix_entries[index].vector,
-			 &phba->sli4_hba.fcp_eq_hdl[index]);
-	}
-	/* Disable MSI-X */
-	pci_disable_msix(phba->pcidev);
-
-	return;
-}
-
 /**
  * lpfc_sli4_enable_msi - Enable MSI interrupt mode to SLI-4 device
  * @phba: pointer to lpfc hba data structure.
@@ -9232,24 +9108,6 @@ lpfc_sli4_enable_msi(struct lpfc_hba *phba)
 	return 0;
 }
 
-/**
- * lpfc_sli4_disable_msi - Disable MSI interrupt mode to SLI-4 device
- * @phba: pointer to lpfc hba data structure.
- *
- * This routine is invoked to disable the MSI interrupt mode to device with
- * SLI-4 interface spec. The driver calls free_irq() on MSI vector it has
- * done request_irq() on before calling pci_disable_msi(). Failure to do so
- * results in a BUG_ON() and a device will be left with MSI enabled and leaks
- * its vector.
- **/
-static void
-lpfc_sli4_disable_msi(struct lpfc_hba *phba)
-{
-	free_irq(phba->pcidev->irq, phba);
-	pci_disable_msi(phba->pcidev);
-	return;
-}
-
 /**
  * lpfc_sli4_enable_intr - Enable device interrupt to SLI-4 device
  * @phba: pointer to lpfc hba data structure.
@@ -9335,18 +9193,28 @@ static void
 lpfc_sli4_disable_intr(struct lpfc_hba *phba)
 {
 	/* Disable the currently initialized interrupt mode */
-	if (phba->intr_type == MSIX)
-		lpfc_sli4_disable_msix(phba);
-	else if (phba->intr_type == MSI)
-		lpfc_sli4_disable_msi(phba);
-	else if (phba->intr_type == INTx)
+	if (phba->intr_type == MSIX) {
+		int index;
+
+		/* Free up MSI-X multi-message vectors */
+		for (index = 0; index < phba->cfg_fcp_io_channel; index++) {
+			int irq = pci_irq_vector(phba->pcidev, index);
+
+			irq_set_affinity_hint(irq, NULL);
+			free_irq(irq, &phba->sli4_hba.fcp_eq_hdl[index]);
+		}
+
+		if (phba->cfg_fof)
+			free_irq(pci_irq_vector(phba->pcidev, index), phba);
+	} else {
 		free_irq(phba->pcidev->irq, phba);
+	}
+
+	pci_free_irq_vectors(phba->pcidev);
 
 	/* Reset interrupt management states */
 	phba->intr_type = NONE;
 	phba->sli.slistat.sli_intr = 0;
-
-	return;
 }
 
 /**
diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h
index 0b88b5703e0f10..dfbb25e5c5b68b 100644
--- a/drivers/scsi/lpfc/lpfc_sli4.h
+++ b/drivers/scsi/lpfc/lpfc_sli4.h
@@ -515,7 +515,6 @@ struct lpfc_sli4_hba {
 	uint32_t ue_to_rp;
 	struct lpfc_register sli_intf;
 	struct lpfc_pc_sli4_params pc_sli4_params;
-	struct msix_entry *msix_entries;
 	uint8_t handler_name[LPFC_SLI4_HANDLER_CNT][LPFC_SLI4_HANDLER_NAME_SZ];
 	struct lpfc_fcp_eq_hdl *fcp_eq_hdl; /* FCP per-WQ handle */
 

From 2ea259eead133026ac6a3fbfa040cc58a96cae44 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sun, 12 Feb 2017 13:52:27 -0800
Subject: [PATCH 0148/1911] scsi: lpfc: minor code cleanups

This contains code cleanups that were in the prior patch set.
This allows better review of real changes later.

minor code cleanups:
 fix indentation, punctuation, line length
 addition/reduction of whitespace
 remove unneeded parens, braces
 lpfc_debugfs_nodelist_data: print as u64 rather than byte by byte
 covert printk(KERN_ERR to pr_err
 small print string deltas
 use num_present_cpus() rather than count them
 comment updates
 rctl/type names moved to module variable, not on stack

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_attr.c    |  8 ++--
 drivers/scsi/lpfc/lpfc_bsg.c     |  2 +-
 drivers/scsi/lpfc/lpfc_debugfs.c | 81 +++++++++++++++-----------------
 drivers/scsi/lpfc/lpfc_hbadisc.c |  7 ++-
 drivers/scsi/lpfc/lpfc_init.c    | 19 ++++----
 drivers/scsi/lpfc/lpfc_scsi.c    |  3 +-
 drivers/scsi/lpfc/lpfc_scsi.h    | 12 ++---
 drivers/scsi/lpfc/lpfc_sli.c     | 48 ++++++++++---------
 drivers/scsi/lpfc/lpfc_sli.h     |  6 +--
 drivers/scsi/lpfc/lpfc_vport.c   |  1 +
 10 files changed, 98 insertions(+), 89 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index 50cf402dea2981..ce41dea476827b 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -50,9 +50,9 @@
 #include "lpfc_vport.h"
 #include "lpfc_attr.h"
 
-#define LPFC_DEF_DEVLOSS_TMO 30
-#define LPFC_MIN_DEVLOSS_TMO 1
-#define LPFC_MAX_DEVLOSS_TMO 255
+#define LPFC_DEF_DEVLOSS_TMO	30
+#define LPFC_MIN_DEVLOSS_TMO	1
+#define LPFC_MAX_DEVLOSS_TMO	255
 
 /*
  * Write key size should be multiple of 4. If write key is changed
@@ -5769,10 +5769,12 @@ lpfc_get_cfgparam(struct lpfc_hba *phba)
 	lpfc_fcp_io_channel_init(phba, lpfc_fcp_io_channel);
 	lpfc_enable_hba_reset_init(phba, lpfc_enable_hba_reset);
 	lpfc_enable_hba_heartbeat_init(phba, lpfc_enable_hba_heartbeat);
+
 	lpfc_EnableXLane_init(phba, lpfc_EnableXLane);
 	if (phba->sli_rev != LPFC_SLI_REV4)
 		phba->cfg_EnableXLane = 0;
 	lpfc_XLanePriority_init(phba, lpfc_XLanePriority);
+
 	memset(phba->cfg_oas_tgt_wwpn, 0, (8 * sizeof(uint8_t)));
 	memset(phba->cfg_oas_vpt_wwpn, 0, (8 * sizeof(uint8_t)));
 	phba->cfg_oas_lun_state = 0;
diff --git a/drivers/scsi/lpfc/lpfc_bsg.c b/drivers/scsi/lpfc/lpfc_bsg.c
index 7dca4d6a888346..1ee131f124b9b4 100644
--- a/drivers/scsi/lpfc/lpfc_bsg.c
+++ b/drivers/scsi/lpfc/lpfc_bsg.c
@@ -2703,7 +2703,7 @@ static int lpfcdiag_loop_get_xri(struct lpfc_hba *phba, uint16_t rpi,
  * lpfc_bsg_dma_page_alloc - allocate a bsg mbox page sized dma buffers
  * @phba: Pointer to HBA context object
  *
- * This function allocates BSG_MBOX_SIZE (4KB) page size dma buffer and.
+ * This function allocates BSG_MBOX_SIZE (4KB) page size dma buffer and
  * returns the pointer to the buffer.
  **/
 static struct lpfc_dmabuf *
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index caa7a7b0ec53b3..f26eba713c5361 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -531,7 +531,7 @@ lpfc_debugfs_nodelist_data(struct lpfc_vport *vport, char *buf, int size)
 	int cnt;
 	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
 	struct lpfc_nodelist *ndlp;
-	unsigned char *statep, *name;
+	unsigned char *statep;
 
 	cnt = (LPFC_NODELIST_SIZE / LPFC_NODELIST_ENTRY_SIZE);
 
@@ -574,36 +574,32 @@ lpfc_debugfs_nodelist_data(struct lpfc_vport *vport, char *buf, int size)
 		default:
 			statep = "UNKNOWN";
 		}
-		len +=  snprintf(buf+len, size-len, "%s DID:x%06x ",
-			statep, ndlp->nlp_DID);
-		name = (unsigned char *)&ndlp->nlp_portname;
-		len +=  snprintf(buf+len, size-len,
-			"WWPN %02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x ",
-			*name, *(name+1), *(name+2), *(name+3),
-			*(name+4), *(name+5), *(name+6), *(name+7));
-		name = (unsigned char *)&ndlp->nlp_nodename;
-		len +=  snprintf(buf+len, size-len,
-			"WWNN %02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x ",
-			*name, *(name+1), *(name+2), *(name+3),
-			*(name+4), *(name+5), *(name+6), *(name+7));
+		len += snprintf(buf+len, size-len, "%s DID:x%06x ",
+				statep, ndlp->nlp_DID);
+		len += snprintf(buf+len, size-len,
+				"WWPN x%llx ",
+				wwn_to_u64(ndlp->nlp_portname.u.wwn));
+		len += snprintf(buf+len, size-len,
+				"WWNN x%llx ",
+				wwn_to_u64(ndlp->nlp_nodename.u.wwn));
 		if (ndlp->nlp_flag & NLP_RPI_REGISTERED)
-			len +=  snprintf(buf+len, size-len, "RPI:%03d ",
-				ndlp->nlp_rpi);
+			len += snprintf(buf+len, size-len, "RPI:%03d ",
+					ndlp->nlp_rpi);
 		else
-			len +=  snprintf(buf+len, size-len, "RPI:none ");
+			len += snprintf(buf+len, size-len, "RPI:none ");
 		len +=  snprintf(buf+len, size-len, "flag:x%08x ",
 			ndlp->nlp_flag);
 		if (!ndlp->nlp_type)
-			len +=  snprintf(buf+len, size-len, "UNKNOWN_TYPE ");
+			len += snprintf(buf+len, size-len, "UNKNOWN_TYPE ");
 		if (ndlp->nlp_type & NLP_FC_NODE)
-			len +=  snprintf(buf+len, size-len, "FC_NODE ");
+			len += snprintf(buf+len, size-len, "FC_NODE ");
 		if (ndlp->nlp_type & NLP_FABRIC)
-			len +=  snprintf(buf+len, size-len, "FABRIC ");
+			len += snprintf(buf+len, size-len, "FABRIC ");
 		if (ndlp->nlp_type & NLP_FCP_TARGET)
-			len +=  snprintf(buf+len, size-len, "FCP_TGT sid:%d ",
+			len += snprintf(buf+len, size-len, "FCP_TGT sid:%d ",
 				ndlp->nlp_sid);
 		if (ndlp->nlp_type & NLP_FCP_INITIATOR)
-			len +=  snprintf(buf+len, size-len, "FCP_INITIATOR ");
+			len += snprintf(buf+len, size-len, "FCP_INITIATOR ");
 		len += snprintf(buf+len, size-len, "usgmap:%x ",
 			ndlp->nlp_usg_map);
 		len += snprintf(buf+len, size-len, "refcnt:%x",
@@ -611,8 +607,10 @@ lpfc_debugfs_nodelist_data(struct lpfc_vport *vport, char *buf, int size)
 		len +=  snprintf(buf+len, size-len, "\n");
 	}
 	spin_unlock_irq(shost->host_lock);
+
 	return len;
 }
+
 #endif
 
 /**
@@ -938,7 +936,7 @@ lpfc_debugfs_dumpData_open(struct inode *inode, struct file *file)
 		goto out;
 
 	/* Round to page boundary */
-	printk(KERN_ERR "9059 BLKGRD:  %s: _dump_buf_data=0x%p\n",
+	pr_err("9059 BLKGRD:  %s: _dump_buf_data=0x%p\n",
 			__func__, _dump_buf_data);
 	debug->buffer = _dump_buf_data;
 	if (!debug->buffer) {
@@ -968,8 +966,8 @@ lpfc_debugfs_dumpDif_open(struct inode *inode, struct file *file)
 		goto out;
 
 	/* Round to page boundary */
-	printk(KERN_ERR	"9060 BLKGRD: %s: _dump_buf_dif=0x%p file=%pD\n",
-		__func__, _dump_buf_dif, file);
+	pr_err("9060 BLKGRD: %s: _dump_buf_dif=0x%p file=%pD\n",
+			__func__, _dump_buf_dif, file);
 	debug->buffer = _dump_buf_dif;
 	if (!debug->buffer) {
 		kfree(debug);
@@ -3853,7 +3851,7 @@ lpfc_idiag_mbxacc_dump_bsg_mbox(struct lpfc_hba *phba, enum nemb_type nemb_tp,
 	if ((mbox_tp == mbox_rd) && (dma_tp == dma_mbox)) {
 		if (*mbx_dump_map & LPFC_BSG_DMP_MBX_RD_MBX) {
 			do_dump |= LPFC_BSG_DMP_MBX_RD_MBX;
-			printk(KERN_ERR "\nRead mbox command (x%x), "
+			pr_err("\nRead mbox command (x%x), "
 			       "nemb:0x%x, extbuf_cnt:%d:\n",
 			       sta_tp, nemb_tp, ext_buf);
 		}
@@ -3861,7 +3859,7 @@ lpfc_idiag_mbxacc_dump_bsg_mbox(struct lpfc_hba *phba, enum nemb_type nemb_tp,
 	if ((mbox_tp == mbox_rd) && (dma_tp == dma_ebuf)) {
 		if (*mbx_dump_map & LPFC_BSG_DMP_MBX_RD_BUF) {
 			do_dump |= LPFC_BSG_DMP_MBX_RD_BUF;
-			printk(KERN_ERR "\nRead mbox buffer (x%x), "
+			pr_err("\nRead mbox buffer (x%x), "
 			       "nemb:0x%x, extbuf_seq:%d:\n",
 			       sta_tp, nemb_tp, ext_buf);
 		}
@@ -3869,7 +3867,7 @@ lpfc_idiag_mbxacc_dump_bsg_mbox(struct lpfc_hba *phba, enum nemb_type nemb_tp,
 	if ((mbox_tp == mbox_wr) && (dma_tp == dma_mbox)) {
 		if (*mbx_dump_map & LPFC_BSG_DMP_MBX_WR_MBX) {
 			do_dump |= LPFC_BSG_DMP_MBX_WR_MBX;
-			printk(KERN_ERR "\nWrite mbox command (x%x), "
+			pr_err("\nWrite mbox command (x%x), "
 			       "nemb:0x%x, extbuf_cnt:%d:\n",
 			       sta_tp, nemb_tp, ext_buf);
 		}
@@ -3877,7 +3875,7 @@ lpfc_idiag_mbxacc_dump_bsg_mbox(struct lpfc_hba *phba, enum nemb_type nemb_tp,
 	if ((mbox_tp == mbox_wr) && (dma_tp == dma_ebuf)) {
 		if (*mbx_dump_map & LPFC_BSG_DMP_MBX_WR_BUF) {
 			do_dump |= LPFC_BSG_DMP_MBX_WR_BUF;
-			printk(KERN_ERR "\nWrite mbox buffer (x%x), "
+			pr_err("\nWrite mbox buffer (x%x), "
 			       "nemb:0x%x, extbuf_seq:%d:\n",
 			       sta_tp, nemb_tp, ext_buf);
 		}
@@ -3889,7 +3887,7 @@ lpfc_idiag_mbxacc_dump_bsg_mbox(struct lpfc_hba *phba, enum nemb_type nemb_tp,
 		for (i = 0; i < *mbx_word_cnt; i++) {
 			if (!(i % 8)) {
 				if (i != 0)
-					printk(KERN_ERR "%s\n", line_buf);
+					pr_err("%s\n", line_buf);
 				len = 0;
 				len += snprintf(line_buf+len,
 						LPFC_MBX_ACC_LBUF_SZ-len,
@@ -3900,7 +3898,7 @@ lpfc_idiag_mbxacc_dump_bsg_mbox(struct lpfc_hba *phba, enum nemb_type nemb_tp,
 			pword++;
 		}
 		if ((i - 1) % 8)
-			printk(KERN_ERR "%s\n", line_buf);
+			pr_err("%s\n", line_buf);
 		(*mbx_dump_cnt)--;
 	}
 
@@ -3949,13 +3947,13 @@ lpfc_idiag_mbxacc_dump_issue_mbox(struct lpfc_hba *phba, MAILBOX_t *pmbox)
 
 	/* dump buffer content */
 	if (*mbx_dump_map & LPFC_MBX_DMP_MBX_WORD) {
-		printk(KERN_ERR "Mailbox command:0x%x dump by word:\n",
+		pr_err("Mailbox command:0x%x dump by word:\n",
 		       pmbox->mbxCommand);
 		pword = (uint32_t *)pmbox;
 		for (i = 0; i < *mbx_word_cnt; i++) {
 			if (!(i % 8)) {
 				if (i != 0)
-					printk(KERN_ERR "%s\n", line_buf);
+					pr_err("%s\n", line_buf);
 				len = 0;
 				memset(line_buf, 0, LPFC_MBX_ACC_LBUF_SZ);
 				len += snprintf(line_buf+len,
@@ -3968,17 +3966,17 @@ lpfc_idiag_mbxacc_dump_issue_mbox(struct lpfc_hba *phba, MAILBOX_t *pmbox)
 			pword++;
 		}
 		if ((i - 1) % 8)
-			printk(KERN_ERR "%s\n", line_buf);
-		printk(KERN_ERR "\n");
+			pr_err("%s\n", line_buf);
+		pr_err("\n");
 	}
 	if (*mbx_dump_map & LPFC_MBX_DMP_MBX_BYTE) {
-		printk(KERN_ERR "Mailbox command:0x%x dump by byte:\n",
+		pr_err("Mailbox command:0x%x dump by byte:\n",
 		       pmbox->mbxCommand);
 		pbyte = (uint8_t *)pmbox;
 		for (i = 0; i < *mbx_word_cnt; i++) {
 			if (!(i % 8)) {
 				if (i != 0)
-					printk(KERN_ERR "%s\n", line_buf);
+					pr_err("%s\n", line_buf);
 				len = 0;
 				memset(line_buf, 0, LPFC_MBX_ACC_LBUF_SZ);
 				len += snprintf(line_buf+len,
@@ -3996,8 +3994,8 @@ lpfc_idiag_mbxacc_dump_issue_mbox(struct lpfc_hba *phba, MAILBOX_t *pmbox)
 					LPFC_MBX_ACC_LBUF_SZ-len, " ");
 		}
 		if ((i - 1) % 8)
-			printk(KERN_ERR "%s\n", line_buf);
-		printk(KERN_ERR "\n");
+			pr_err("%s\n", line_buf);
+		pr_err("\n");
 	}
 	(*mbx_dump_cnt)--;
 
@@ -4240,8 +4238,7 @@ lpfc_debugfs_initialize(struct lpfc_vport *vport)
 					i++;
 				}
 				lpfc_debugfs_max_slow_ring_trc = (1 << i);
-				printk(KERN_ERR
-				       "lpfc_debugfs_max_disc_trc changed to "
+				pr_err("lpfc_debugfs_max_disc_trc changed to "
 				       "%d\n", lpfc_debugfs_max_disc_trc);
 			}
 		}
@@ -4273,6 +4270,7 @@ lpfc_debugfs_initialize(struct lpfc_vport *vport)
 				(sizeof(struct lpfc_debugfs_trc) *
 				lpfc_debugfs_max_slow_ring_trc));
 		}
+
 	}
 
 	snprintf(name, sizeof(name), "vport%d", vport->vpi);
@@ -4298,8 +4296,7 @@ lpfc_debugfs_initialize(struct lpfc_vport *vport)
 				i++;
 			}
 			lpfc_debugfs_max_disc_trc = (1 << i);
-			printk(KERN_ERR
-			       "lpfc_debugfs_max_disc_trc changed to %d\n",
+			pr_err("lpfc_debugfs_max_disc_trc changed to %d\n",
 			       lpfc_debugfs_max_disc_trc);
 		}
 	}
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index 82047070cdc973..92627df35a58af 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -3972,12 +3972,13 @@ static void
 lpfc_unregister_remote_port(struct lpfc_nodelist *ndlp)
 {
 	struct fc_rport *rport = ndlp->rport;
+	struct lpfc_vport *vport = ndlp->vport;
 
-	lpfc_debugfs_disc_trc(ndlp->vport, LPFC_DISC_TRC_RPORT,
+	lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_RPORT,
 		"rport delete:    did:x%x flg:x%x type x%x",
 		ndlp->nlp_DID, ndlp->nlp_flag, ndlp->nlp_type);
 
-	lpfc_printf_vlog(ndlp->vport, KERN_INFO, LOG_NODE,
+	lpfc_printf_vlog(vport, KERN_INFO, LOG_NODE,
 			 "3184 rport unregister x%06x, rport %p\n",
 			 ndlp->nlp_DID, rport);
 
@@ -4424,8 +4425,6 @@ lpfc_check_sli_ndlp(struct lpfc_hba *phba,
 		if (icmd->ulpContext == (volatile ushort)ndlp->nlp_rpi) {
 			return 1;
 		}
-	} else if (pring->ringno == psli->next_ring) {
-
 	}
 	return 0;
 }
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 8cffad192f1734..ec23388ac4f590 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -4272,13 +4272,13 @@ lpfc_sli4_async_sli_evt(struct lpfc_hba *phba, struct lpfc_acqe_sli *acqe_sli)
 			sprintf(message, "Unqualified optics - Replace with "
 				"Avago optics for Warranty and Technical "
 				"Support - Link is%s operational",
-				(operational) ? "" : " not");
+				(operational) ? " not" : "");
 			break;
 		case LPFC_SLI_EVENT_STATUS_UNCERTIFIED:
 			sprintf(message, "Uncertified optics - Replace with "
 				"Avago-certified optics to enable link "
 				"operation - Link is%s operational",
-				(operational) ? "" : " not");
+				(operational) ? " not" : "");
 			break;
 		default:
 			/* firmware is reporting a status we don't know about */
@@ -6207,6 +6207,7 @@ lpfc_create_shost(struct lpfc_hba *phba)
 
 	shost = lpfc_shost_from_vport(vport);
 	phba->pport = vport;
+
 	lpfc_debugfs_initialize(vport);
 	/* Put reference to SCSI host to driver's device private data */
 	pci_set_drvdata(phba->pcidev, shost);
@@ -6993,7 +6994,7 @@ lpfc_sli4_read_config(struct lpfc_hba *phba)
 				"VPI(B:%d M:%d) "
 				"VFI(B:%d M:%d) "
 				"RPI(B:%d M:%d) "
-				"FCFI(Count:%d)\n",
+				"FCFI:%d EQ:%d CQ:%d WQ:%d RQ:%d\n",
 				phba->sli4_hba.extents_in_use,
 				phba->sli4_hba.max_cfg_param.xri_base,
 				phba->sli4_hba.max_cfg_param.max_xri,
@@ -7003,7 +7004,12 @@ lpfc_sli4_read_config(struct lpfc_hba *phba)
 				phba->sli4_hba.max_cfg_param.max_vfi,
 				phba->sli4_hba.max_cfg_param.rpi_base,
 				phba->sli4_hba.max_cfg_param.max_rpi,
-				phba->sli4_hba.max_cfg_param.max_fcfi);
+				phba->sli4_hba.max_cfg_param.max_fcfi,
+				phba->sli4_hba.max_cfg_param.max_eq,
+				phba->sli4_hba.max_cfg_param.max_cq,
+				phba->sli4_hba.max_cfg_param.max_wq,
+				phba->sli4_hba.max_cfg_param.max_rq);
+
 	}
 
 	if (rc)
@@ -11344,7 +11350,6 @@ static struct miscdevice lpfc_mgmt_dev = {
 static int __init
 lpfc_init(void)
 {
-	int cpu;
 	int error = 0;
 
 	printk(LPFC_MODULE_DESC "\n");
@@ -11370,9 +11375,7 @@ lpfc_init(void)
 
 	/* Initialize in case vector mapping is needed */
 	lpfc_used_cpu = NULL;
-	lpfc_present_cpu = 0;
-	for_each_present_cpu(cpu)
-		lpfc_present_cpu++;
+	lpfc_present_cpu = num_present_cpus();
 
 	error = pci_register_driver(&lpfc_driver);
 	if (error) {
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index 1180a22beb435c..bd7b75dbb97e4c 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -3894,7 +3894,7 @@ int lpfc_sli4_scmd_to_wqidx_distr(struct lpfc_hba *phba,
 		}
 	}
 	chann = atomic_add_return(1, &phba->fcp_qidx);
-	chann = (chann % phba->cfg_fcp_io_channel);
+	chann = chann % phba->cfg_fcp_io_channel;
 	return chann;
 }
 
@@ -3967,6 +3967,7 @@ lpfc_scsi_cmd_iocb_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pIocbIn,
 		lpfc_cmd->prot_data_segment = NULL;
 	}
 #endif
+
 	if (pnode && NLP_CHK_NODE_ACT(pnode))
 		atomic_dec(&pnode->cmd_pending);
 
diff --git a/drivers/scsi/lpfc/lpfc_scsi.h b/drivers/scsi/lpfc/lpfc_scsi.h
index 8cb80dabada849..4d7062258cf8a6 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.h
+++ b/drivers/scsi/lpfc/lpfc_scsi.h
@@ -178,13 +178,13 @@ struct lpfc_scsi_buf {
 #endif
 };
 
-#define LPFC_SCSI_DMA_EXT_SIZE 264
-#define LPFC_BPL_SIZE          1024
-#define MDAC_DIRECT_CMD                  0x22
+#define LPFC_SCSI_DMA_EXT_SIZE	264
+#define LPFC_BPL_SIZE		1024
+#define MDAC_DIRECT_CMD		0x22
 
-#define FIND_FIRST_OAS_LUN		 0
-#define NO_MORE_OAS_LUN			-1
-#define NOT_OAS_ENABLED_LUN		NO_MORE_OAS_LUN
+#define FIND_FIRST_OAS_LUN	0
+#define NO_MORE_OAS_LUN		-1
+#define NOT_OAS_ENABLED_LUN	NO_MORE_OAS_LUN
 
 int lpfc_sli4_scmd_to_wqidx_distr(struct lpfc_hba *phba,
 				  struct lpfc_scsi_buf *lpfc_cmd);
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 586984dce3a43f..19543142c94ca9 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -271,10 +271,11 @@ lpfc_sli4_eq_get(struct lpfc_queue *q)
 	/*
 	 * insert barrier for instruction interlock : data from the hardware
 	 * must have the valid bit checked before it can be copied and acted
-	 * upon. Given what was seen in lpfc_sli4_cq_get() of speculative
-	 * instructions allowing action on content before valid bit checked,
-	 * add barrier here as well. May not be needed as "content" is a
-	 * single 32-bit entity here (vs multi word structure for cq's).
+	 * upon. Speculative instructions were allowing a bcopy at the start
+	 * of lpfc_sli4_fp_handle_wcqe(), which is called immediately
+	 * after our return, to copy data before the valid bit check above
+	 * was done. As such, some of the copied data was stale. The barrier
+	 * ensures the check is before any data is copied.
 	 */
 	mb();
 	return eqe;
@@ -386,11 +387,10 @@ lpfc_sli4_cq_get(struct lpfc_queue *q)
 	/*
 	 * insert barrier for instruction interlock : data from the hardware
 	 * must have the valid bit checked before it can be copied and acted
-	 * upon. Speculative instructions were allowing a bcopy at the start
-	 * of lpfc_sli4_fp_handle_wcqe(), which is called immediately
-	 * after our return, to copy data before the valid bit check above
-	 * was done. As such, some of the copied data was stale. The barrier
-	 * ensures the check is before any data is copied.
+	 * upon. Given what was seen in lpfc_sli4_cq_get() of speculative
+	 * instructions allowing action on content before valid bit checked,
+	 * add barrier here as well. May not be needed as "content" is a
+	 * single 32-bit entity here (vs multi word structure for cq's).
 	 */
 	mb();
 	return cqe;
@@ -7368,7 +7368,8 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox,
 
 		if (psli->sli_flag & LPFC_SLI_ACTIVE) {
 			/* copy results back to user */
-			lpfc_sli_pcimem_bcopy(phba->mbox, mbx, MAILBOX_CMD_SIZE);
+			lpfc_sli_pcimem_bcopy(phba->mbox, mbx,
+						MAILBOX_CMD_SIZE);
 			/* Copy the mailbox extension data */
 			if (pmbox->out_ext_byte_len && pmbox->context2) {
 				lpfc_sli_pcimem_bcopy(phba->mbox_ext,
@@ -7378,7 +7379,7 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox,
 		} else {
 			/* First copy command data */
 			lpfc_memcpy_from_slim(mbx, phba->MBslimaddr,
-							MAILBOX_CMD_SIZE);
+						MAILBOX_CMD_SIZE);
 			/* Copy the mailbox extension data */
 			if (pmbox->out_ext_byte_len && pmbox->context2) {
 				lpfc_memcpy_from_slim(pmbox->context2,
@@ -8906,10 +8907,10 @@ __lpfc_sli_issue_iocb_s4(struct lpfc_hba *phba, uint32_t ring_number,
 				}
 			}
 		}
-	} else if (piocb->iocb_flag &  LPFC_IO_FCP) {
+	} else if (piocb->iocb_flag &  LPFC_IO_FCP)
 		/* These IO's already have an XRI and a mapped sgl. */
 		sglq = NULL;
-	} else {
+	else {
 		/*
 		 * This is a continuation of a commandi,(CX) so this
 		 * sglq is on the active list
@@ -13359,8 +13360,10 @@ lpfc_cq_create(struct lpfc_hba *phba, struct lpfc_queue *cq,
 	switch (cq->entry_count) {
 	default:
 		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
-				"0361 Unsupported CQ count. (%d)\n",
-				cq->entry_count);
+				"0361 Unsupported CQ count: "
+				"entry cnt %d sz %d pg cnt %d repost %d\n",
+				cq->entry_count, cq->entry_size,
+				cq->page_count, cq->entry_repost);
 		if (cq->entry_count < 256) {
 			status = -EINVAL;
 			goto out;
@@ -14824,6 +14827,9 @@ lpfc_sli4_post_scsi_sgl_block(struct lpfc_hba *phba,
 	return rc;
 }
 
+static char *lpfc_rctl_names[] = FC_RCTL_NAMES_INIT;
+static char *lpfc_type_names[] = FC_TYPE_NAMES_INIT;
+
 /**
  * lpfc_fc_frame_check - Check that this frame is a valid frame to handle
  * @phba: pointer to lpfc_hba struct that the frame was received on
@@ -14838,8 +14844,6 @@ static int
 lpfc_fc_frame_check(struct lpfc_hba *phba, struct fc_frame_header *fc_hdr)
 {
 	/*  make rctl_names static to save stack space */
-	static char *rctl_names[] = FC_RCTL_NAMES_INIT;
-	char *type_names[] = FC_TYPE_NAMES_INIT;
 	struct fc_vft_header *fc_vft_hdr;
 	uint32_t *header = (uint32_t *) fc_hdr;
 
@@ -14894,8 +14898,8 @@ lpfc_fc_frame_check(struct lpfc_hba *phba, struct fc_frame_header *fc_hdr)
 	lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
 			"2538 Received frame rctl:%s (x%x), type:%s (x%x), "
 			"frame Data:%08x %08x %08x %08x %08x %08x %08x\n",
-			rctl_names[fc_hdr->fh_r_ctl], fc_hdr->fh_r_ctl,
-			type_names[fc_hdr->fh_type], fc_hdr->fh_type,
+			lpfc_rctl_names[fc_hdr->fh_r_ctl], fc_hdr->fh_r_ctl,
+			lpfc_type_names[fc_hdr->fh_type], fc_hdr->fh_type,
 			be32_to_cpu(header[0]), be32_to_cpu(header[1]),
 			be32_to_cpu(header[2]), be32_to_cpu(header[3]),
 			be32_to_cpu(header[4]), be32_to_cpu(header[5]),
@@ -14904,8 +14908,8 @@ lpfc_fc_frame_check(struct lpfc_hba *phba, struct fc_frame_header *fc_hdr)
 drop:
 	lpfc_printf_log(phba, KERN_WARNING, LOG_ELS,
 			"2539 Dropped frame rctl:%s type:%s\n",
-			rctl_names[fc_hdr->fh_r_ctl],
-			type_names[fc_hdr->fh_type]);
+			lpfc_rctl_names[fc_hdr->fh_r_ctl],
+			lpfc_type_names[fc_hdr->fh_type]);
 	return 1;
 }
 
@@ -15726,11 +15730,13 @@ lpfc_sli4_handle_received_buffer(struct lpfc_hba *phba,
 
 	/* Process each received buffer */
 	fc_hdr = (struct fc_frame_header *)dmabuf->hbuf.virt;
+
 	/* check to see if this a valid type of frame */
 	if (lpfc_fc_frame_check(phba, fc_hdr)) {
 		lpfc_in_buf_free(phba, &dmabuf->dbuf);
 		return;
 	}
+
 	if ((bf_get(lpfc_cqe_code,
 		    &dmabuf->cq_event.cqe.rcqe_cmpl) == CQE_CODE_RECEIVE_V1))
 		fcfi = bf_get(lpfc_rcqe_fcf_id_v1,
diff --git a/drivers/scsi/lpfc/lpfc_sli.h b/drivers/scsi/lpfc/lpfc_sli.h
index 74227a28bd569e..3fad5657514b43 100644
--- a/drivers/scsi/lpfc/lpfc_sli.h
+++ b/drivers/scsi/lpfc/lpfc_sli.h
@@ -97,11 +97,11 @@ struct lpfc_iocbq {
 		struct lpfc_node_rrq *rrq;
 	} context_un;
 
-	void (*fabric_iocb_cmpl) (struct lpfc_hba *, struct lpfc_iocbq *,
+	void (*fabric_iocb_cmpl)(struct lpfc_hba *, struct lpfc_iocbq *,
 			   struct lpfc_iocbq *);
-	void (*wait_iocb_cmpl) (struct lpfc_hba *, struct lpfc_iocbq *,
+	void (*wait_iocb_cmpl)(struct lpfc_hba *, struct lpfc_iocbq *,
 			   struct lpfc_iocbq *);
-	void (*iocb_cmpl) (struct lpfc_hba *, struct lpfc_iocbq *,
+	void (*iocb_cmpl)(struct lpfc_hba *, struct lpfc_iocbq *,
 			   struct lpfc_iocbq *);
 };
 
diff --git a/drivers/scsi/lpfc/lpfc_vport.c b/drivers/scsi/lpfc/lpfc_vport.c
index e18bbc66e83b1f..5bbe6af148dd46 100644
--- a/drivers/scsi/lpfc/lpfc_vport.c
+++ b/drivers/scsi/lpfc/lpfc_vport.c
@@ -33,6 +33,7 @@
 #include <scsi/scsi_device.h>
 #include <scsi/scsi_host.h>
 #include <scsi/scsi_transport_fc.h>
+
 #include "lpfc_hw4.h"
 #include "lpfc_hw.h"
 #include "lpfc_sli.h"

From 07bcd98efba2edd2bd2230e24f6a46a09991e2a4 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sun, 12 Feb 2017 13:52:28 -0800
Subject: [PATCH 0149/1911] scsi: lpfc: refactor debugfs queue prints

Create common wq, cq, eq, rq print functions

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc.h         |   1 +
 drivers/scsi/lpfc/lpfc_debugfs.c | 564 +++++++++++++------------------
 2 files changed, 228 insertions(+), 337 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index 64022d79117b78..8e0d6f43878d5f 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -948,6 +948,7 @@ struct lpfc_hba {
 	struct dentry *idiag_ctl_acc;
 	struct dentry *idiag_mbx_acc;
 	struct dentry *idiag_ext_acc;
+	uint8_t lpfc_idiag_last_eq;
 #endif
 
 	/* Used for deferred freeing of ELS data buffers */
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index f26eba713c5361..866361d4d63742 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -1972,6 +1972,155 @@ lpfc_idiag_baracc_write(struct file *file, const char __user *buf,
 	return -EINVAL;
 }
 
+static int
+__lpfc_idiag_print_wq(struct lpfc_queue *qp, char *wqtype,
+			char *pbuffer, int len)
+{
+	if (!qp)
+		return len;
+
+	len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
+			"\t\t%s WQ info: ", wqtype);
+	len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
+			"AssocCQID[%04d]: WQ-STAT[oflow:x%x posted:x%llx]\n",
+			qp->assoc_qid, qp->q_cnt_1,
+			(unsigned long long)qp->q_cnt_4);
+	len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
+			"\t\tWQID[%02d], QE-CNT[%04d], QE-SIZE[%04d], "
+			"HOST-IDX[%04d], PORT-IDX[%04d]",
+			qp->queue_id, qp->entry_count,
+			qp->entry_size, qp->host_index,
+			qp->hba_index);
+	len +=  snprintf(pbuffer + len,
+			LPFC_QUE_INFO_GET_BUF_SIZE - len, "\n");
+	return len;
+}
+
+static int
+lpfc_idiag_wqs_for_cq(struct lpfc_hba *phba, char *wqtype, char *pbuffer,
+		int *len, int max_cnt, int cq_id)
+{
+	struct lpfc_queue *qp;
+	int qidx;
+
+	for (qidx = 0; qidx < phba->cfg_fcp_io_channel; qidx++) {
+		qp = phba->sli4_hba.fcp_wq[qidx];
+		if (qp->assoc_qid != cq_id)
+			continue;
+		*len = __lpfc_idiag_print_wq(qp, wqtype, pbuffer, *len);
+		if (*len >= max_cnt)
+			return 1;
+	}
+	return 0;
+}
+
+static int
+__lpfc_idiag_print_cq(struct lpfc_queue *qp, char *cqtype,
+			char *pbuffer, int len)
+{
+	if (!qp)
+		return len;
+
+	len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
+			"\t%s CQ info: ", cqtype);
+	len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
+			"AssocEQID[%02d]: CQ STAT[max:x%x relw:x%x "
+			"xabt:x%x wq:x%llx]\n",
+			qp->assoc_qid, qp->q_cnt_1, qp->q_cnt_2,
+			qp->q_cnt_3, (unsigned long long)qp->q_cnt_4);
+	len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
+			"\tCQID[%02d], QE-CNT[%04d], QE-SIZE[%04d], "
+			"HOST-IDX[%04d], PORT-IDX[%04d]",
+			qp->queue_id, qp->entry_count,
+			qp->entry_size, qp->host_index,
+			qp->hba_index);
+
+	len +=  snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, "\n");
+
+	return len;
+}
+
+static int
+__lpfc_idiag_print_rqpair(struct lpfc_queue *qp, struct lpfc_queue *datqp,
+			char *rqtype, char *pbuffer, int len)
+{
+	if (!qp || !datqp)
+		return len;
+
+	len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
+			"\t\t%s RQ info: ", rqtype);
+	len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
+			"AssocCQID[%02d]: RQ-STAT[nopost:x%x nobuf:x%x "
+			"trunc:x%x rcv:x%llx]\n",
+			qp->assoc_qid, qp->q_cnt_1, qp->q_cnt_2,
+			qp->q_cnt_3, (unsigned long long)qp->q_cnt_4);
+	len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
+			"\t\tHQID[%02d], QE-CNT[%04d], QE-SIZE[%04d], "
+			"HOST-IDX[%04d], PORT-IDX[%04d]\n",
+			qp->queue_id, qp->entry_count, qp->entry_size,
+			qp->host_index, qp->hba_index);
+	len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
+			"\t\tDQID[%02d], QE-CNT[%04d], QE-SIZE[%04d], "
+			"HOST-IDX[%04d], PORT-IDX[%04d]\n",
+			datqp->queue_id, datqp->entry_count,
+			datqp->entry_size, datqp->host_index,
+			datqp->hba_index);
+	len +=  snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, "\n");
+
+	return len;
+}
+
+static int
+lpfc_idiag_cqs_for_eq(struct lpfc_hba *phba, char *pbuffer,
+		int *len, int max_cnt, int eq_id)
+{
+	struct lpfc_queue *qp;
+	int qidx, rc;
+
+	for (qidx = 0; qidx < phba->cfg_fcp_io_channel; qidx++) {
+		qp = phba->sli4_hba.fcp_cq[qidx];
+		if (qp->assoc_qid != eq_id)
+			continue;
+
+		*len = __lpfc_idiag_print_cq(qp, "FCP", pbuffer, *len);
+
+		/* Reset max counter */
+		qp->CQ_max_cqe = 0;
+
+		if (*len >= max_cnt)
+			return 1;
+
+		rc = lpfc_idiag_wqs_for_cq(phba, "FCP", pbuffer, len,
+				max_cnt, qp->queue_id);
+		if (rc)
+			return 1;
+	}
+
+	return 0;
+}
+
+static int
+__lpfc_idiag_print_eq(struct lpfc_queue *qp, char *eqtype,
+			char *pbuffer, int len)
+{
+	if (!qp)
+		return len;
+
+	len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
+			"\n%s EQ info: EQ-STAT[max:x%x noE:x%x "
+			"bs:x%x proc:x%llx]\n",
+			eqtype, qp->q_cnt_1, qp->q_cnt_2, qp->q_cnt_3,
+			(unsigned long long)qp->q_cnt_4);
+	len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
+			"EQID[%02d], QE-CNT[%04d], QE-SIZE[%04d], "
+			"HOST-IDX[%04d], PORT-IDX[%04d]",
+			qp->queue_id, qp->entry_count, qp->entry_size,
+			qp->host_index, qp->hba_index);
+	len +=  snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, "\n");
+
+	return len;
+}
+
 /**
  * lpfc_idiag_queinfo_read - idiag debugfs read queue information
  * @file: The file pointer to read from.
@@ -1982,6 +2131,9 @@ lpfc_idiag_baracc_write(struct file *file, const char __user *buf,
  * Description:
  * This routine reads data from the @phba SLI4 PCI function queue information,
  * and copies to user @buf.
+ * This routine only returns 1 EQs worth of information. It remembers the last
+ * EQ read and jumps to the next EQ. Thus subsequent calls to queInfo will
+ * retrieve all EQs allocated for the phba.
  *
  * Returns:
  * This function returns the amount of data that was read (this could be less
@@ -1993,19 +2145,16 @@ lpfc_idiag_queinfo_read(struct file *file, char __user *buf, size_t nbytes,
 {
 	struct lpfc_debug *debug = file->private_data;
 	struct lpfc_hba *phba = (struct lpfc_hba *)debug->i_private;
-	int len = 0;
 	char *pbuffer;
-	int x, cnt;
-	int max_cnt;
+	int max_cnt, rc, x, len = 0;
 	struct lpfc_queue *qp = NULL;
 
-
 	if (!debug->buffer)
 		debug->buffer = kmalloc(LPFC_QUE_INFO_GET_BUF_SIZE, GFP_KERNEL);
 	if (!debug->buffer)
 		return 0;
 	pbuffer = debug->buffer;
-	max_cnt = LPFC_QUE_INFO_GET_BUF_SIZE - 128;
+	max_cnt = LPFC_QUE_INFO_GET_BUF_SIZE - 256;
 
 	if (*ppos)
 		return 0;
@@ -2014,374 +2163,115 @@ lpfc_idiag_queinfo_read(struct file *file, char __user *buf, size_t nbytes,
 
 	/* Fast-path event queue */
 	if (phba->sli4_hba.hba_eq && phba->cfg_fcp_io_channel) {
-		cnt = phba->cfg_fcp_io_channel;
 
-		for (x = 0; x < cnt; x++) {
+		x = phba->lpfc_idiag_last_eq;
+		if (phba->cfg_fof && (x >= phba->cfg_fcp_io_channel)) {
+			phba->lpfc_idiag_last_eq = 0;
+			goto fof;
+		}
+		phba->lpfc_idiag_last_eq++;
+		if (phba->lpfc_idiag_last_eq >= phba->cfg_fcp_io_channel)
+			if (phba->cfg_fof == 0)
+				phba->lpfc_idiag_last_eq = 0;
 
-			/* Fast-path EQ */
-			qp = phba->sli4_hba.hba_eq[x];
-			if (!qp)
-				goto proc_cq;
+		len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
+					"EQ %d out of %d HBA EQs\n",
+					x, phba->cfg_fcp_io_channel);
 
-			len += snprintf(pbuffer+len,
-				LPFC_QUE_INFO_GET_BUF_SIZE-len,
-				"\nHBA EQ info: "
-				"EQ-STAT[max:x%x noE:x%x "
-				"bs:x%x proc:x%llx]\n",
-				qp->q_cnt_1, qp->q_cnt_2,
-				qp->q_cnt_3, (unsigned long long)qp->q_cnt_4);
-
-			len += snprintf(pbuffer+len,
-				LPFC_QUE_INFO_GET_BUF_SIZE-len,
-				"EQID[%02d], "
-				"QE-CNT[%04d], QE-SIZE[%04d], "
-				"HOST-IDX[%04d], PORT-IDX[%04d]",
-				qp->queue_id,
-				qp->entry_count,
-				qp->entry_size,
-				qp->host_index,
-				qp->hba_index);
-
-
-			/* Reset max counter */
-			qp->EQ_max_eqe = 0;
+		/* Fast-path EQ */
+		qp = phba->sli4_hba.hba_eq[x];
+		if (!qp)
+			goto out;
 
-			len +=  snprintf(pbuffer+len,
-				LPFC_QUE_INFO_GET_BUF_SIZE-len, "\n");
-			if (len >= max_cnt)
-				goto too_big;
-proc_cq:
-			/* Fast-path FCP CQ */
-			qp = phba->sli4_hba.fcp_cq[x];
-			len += snprintf(pbuffer+len,
-				LPFC_QUE_INFO_GET_BUF_SIZE-len,
-				"\tFCP CQ info: ");
-			len += snprintf(pbuffer+len,
-				LPFC_QUE_INFO_GET_BUF_SIZE-len,
-				"AssocEQID[%02d]: "
-				"CQ STAT[max:x%x relw:x%x "
-				"xabt:x%x wq:x%llx]\n",
-				qp->assoc_qid,
-				qp->q_cnt_1, qp->q_cnt_2,
-				qp->q_cnt_3, (unsigned long long)qp->q_cnt_4);
-			len += snprintf(pbuffer+len,
-				LPFC_QUE_INFO_GET_BUF_SIZE-len,
-				"\tCQID[%02d], "
-				"QE-CNT[%04d], QE-SIZE[%04d], "
-				"HOST-IDX[%04d], PORT-IDX[%04d]",
-				qp->queue_id, qp->entry_count,
-				qp->entry_size, qp->host_index,
-				qp->hba_index);
+		len = __lpfc_idiag_print_eq(qp, "HBA", pbuffer, len);
 
+		/* Reset max counter */
+		qp->EQ_max_eqe = 0;
 
-			/* Reset max counter */
-			qp->CQ_max_cqe = 0;
+		if (len >= max_cnt)
+			goto too_big;
 
-			len +=  snprintf(pbuffer+len,
-				LPFC_QUE_INFO_GET_BUF_SIZE-len, "\n");
-			if (len >= max_cnt)
-				goto too_big;
+		rc = lpfc_idiag_cqs_for_eq(phba, pbuffer, &len,
+			max_cnt, qp->queue_id);
+		if (rc)
+			goto too_big;
 
-			/* Fast-path FCP WQ */
-			qp = phba->sli4_hba.fcp_wq[x];
+		/* Only EQ 0 has slow path CQs configured */
+		if (x)
+			goto out;
 
-			len += snprintf(pbuffer+len,
-				LPFC_QUE_INFO_GET_BUF_SIZE-len,
-				"\t\tFCP WQ info: ");
-			len += snprintf(pbuffer+len,
-				LPFC_QUE_INFO_GET_BUF_SIZE-len,
-				"AssocCQID[%02d]: "
-				"WQ-STAT[oflow:x%x posted:x%llx]\n",
-				qp->assoc_qid,
-				qp->q_cnt_1, (unsigned long long)qp->q_cnt_4);
-			len += snprintf(pbuffer+len,
-				LPFC_QUE_INFO_GET_BUF_SIZE-len,
-				"\t\tWQID[%02d], "
-				"QE-CNT[%04d], QE-SIZE[%04d], "
-				"HOST-IDX[%04d], PORT-IDX[%04d]",
-				qp->queue_id,
-				qp->entry_count,
-				qp->entry_size,
-				qp->host_index,
-				qp->hba_index);
-
-			len +=  snprintf(pbuffer+len,
-				LPFC_QUE_INFO_GET_BUF_SIZE-len, "\n");
-			if (len >= max_cnt)
-				goto too_big;
-
-			if (x)
-				continue;
-
-			/* Only EQ 0 has slow path CQs configured */
-
-			/* Slow-path mailbox CQ */
-			qp = phba->sli4_hba.mbx_cq;
-			if (qp) {
-				len += snprintf(pbuffer+len,
-					LPFC_QUE_INFO_GET_BUF_SIZE-len,
-					"\tMBX CQ info: ");
-				len += snprintf(pbuffer+len,
-					LPFC_QUE_INFO_GET_BUF_SIZE-len,
-					"AssocEQID[%02d]: "
-					"CQ-STAT[mbox:x%x relw:x%x "
-					"xabt:x%x wq:x%llx]\n",
-					qp->assoc_qid,
-					qp->q_cnt_1, qp->q_cnt_2,
-					qp->q_cnt_3,
-					(unsigned long long)qp->q_cnt_4);
-				len += snprintf(pbuffer+len,
-					LPFC_QUE_INFO_GET_BUF_SIZE-len,
-					"\tCQID[%02d], "
-					"QE-CNT[%04d], QE-SIZE[%04d], "
-					"HOST-IDX[%04d], PORT-IDX[%04d]",
-					qp->queue_id, qp->entry_count,
-					qp->entry_size, qp->host_index,
-					qp->hba_index);
-
-				len +=  snprintf(pbuffer+len,
-					LPFC_QUE_INFO_GET_BUF_SIZE-len, "\n");
-				if (len >= max_cnt)
-					goto too_big;
-			}
+		/* Slow-path mailbox CQ */
+		qp = phba->sli4_hba.mbx_cq;
+		len = __lpfc_idiag_print_cq(qp, "MBX", pbuffer, len);
+		if (len >= max_cnt)
+			goto too_big;
 
-			/* Slow-path MBOX MQ */
-			qp = phba->sli4_hba.mbx_wq;
-			if (qp) {
-				len += snprintf(pbuffer+len,
-					LPFC_QUE_INFO_GET_BUF_SIZE-len,
-					"\t\tMBX MQ info: ");
-				len += snprintf(pbuffer+len,
-					LPFC_QUE_INFO_GET_BUF_SIZE-len,
-					"AssocCQID[%02d]:\n",
-					phba->sli4_hba.mbx_wq->assoc_qid);
-				len += snprintf(pbuffer+len,
-					LPFC_QUE_INFO_GET_BUF_SIZE-len,
-					"\t\tWQID[%02d], "
-					"QE-CNT[%04d], QE-SIZE[%04d], "
-					"HOST-IDX[%04d], PORT-IDX[%04d]",
-					qp->queue_id, qp->entry_count,
-					qp->entry_size, qp->host_index,
-					qp->hba_index);
-
-				len +=  snprintf(pbuffer+len,
-					LPFC_QUE_INFO_GET_BUF_SIZE-len, "\n");
-				if (len >= max_cnt)
-					goto too_big;
-			}
+		/* Slow-path MBOX MQ */
+		qp = phba->sli4_hba.mbx_wq;
+		len = __lpfc_idiag_print_wq(qp, "MBX", pbuffer, len);
+		if (len >= max_cnt)
+			goto too_big;
 
-			/* Slow-path ELS response CQ */
-			qp = phba->sli4_hba.els_cq;
-			if (qp) {
-				len += snprintf(pbuffer+len,
-					LPFC_QUE_INFO_GET_BUF_SIZE-len,
-					"\tELS CQ info: ");
-				len += snprintf(pbuffer+len,
-					LPFC_QUE_INFO_GET_BUF_SIZE-len,
-					"AssocEQID[%02d]: "
-					"CQ-STAT[max:x%x relw:x%x "
-					"xabt:x%x wq:x%llx]\n",
-					qp->assoc_qid,
-					qp->q_cnt_1, qp->q_cnt_2,
-					qp->q_cnt_3,
-					(unsigned long long)qp->q_cnt_4);
-				len += snprintf(pbuffer+len,
-					LPFC_QUE_INFO_GET_BUF_SIZE-len,
-					"\tCQID [%02d], "
-					"QE-CNT[%04d], QE-SIZE[%04d], "
-					"HOST-IDX[%04d], PORT-IDX[%04d]",
-					qp->queue_id, qp->entry_count,
-					qp->entry_size, qp->host_index,
-					qp->hba_index);
-
-				/* Reset max counter */
-				qp->CQ_max_cqe = 0;
-
-				len +=  snprintf(pbuffer+len,
-					LPFC_QUE_INFO_GET_BUF_SIZE-len, "\n");
-				if (len >= max_cnt)
-					goto too_big;
-			}
+		/* Slow-path ELS response CQ */
+		qp = phba->sli4_hba.els_cq;
+		len = __lpfc_idiag_print_cq(qp, "ELS", pbuffer, len);
+		/* Reset max counter */
+		if (qp)
+			qp->CQ_max_cqe = 0;
+		if (len >= max_cnt)
+			goto too_big;
 
-			/* Slow-path ELS WQ */
-			qp = phba->sli4_hba.els_wq;
-			if (qp) {
-				len += snprintf(pbuffer+len,
-					LPFC_QUE_INFO_GET_BUF_SIZE-len,
-					"\t\tELS WQ info: ");
-				len += snprintf(pbuffer+len,
-					LPFC_QUE_INFO_GET_BUF_SIZE-len,
-					"AssocCQID[%02d]: "
-					" WQ-STAT[oflow:x%x "
-					"posted:x%llx]\n",
-					qp->assoc_qid,
-					qp->q_cnt_1,
-					(unsigned long long)qp->q_cnt_4);
-				len += snprintf(pbuffer+len,
-					LPFC_QUE_INFO_GET_BUF_SIZE-len,
-					"\t\tWQID[%02d], "
-					"QE-CNT[%04d], QE-SIZE[%04d], "
-					"HOST-IDX[%04d], PORT-IDX[%04d]",
-					qp->queue_id, qp->entry_count,
-					qp->entry_size, qp->host_index,
-					qp->hba_index);
-
-				len +=  snprintf(pbuffer+len,
-					LPFC_QUE_INFO_GET_BUF_SIZE-len, "\n");
-				if (len >= max_cnt)
-					goto too_big;
-			}
+		/* Slow-path ELS WQ */
+		qp = phba->sli4_hba.els_wq;
+		len = __lpfc_idiag_print_wq(qp, "ELS", pbuffer, len);
+		if (len >= max_cnt)
+			goto too_big;
 
-			if (phba->sli4_hba.hdr_rq && phba->sli4_hba.dat_rq) {
-				/* Slow-path RQ header */
-				qp = phba->sli4_hba.hdr_rq;
+		qp = phba->sli4_hba.hdr_rq;
+		len = __lpfc_idiag_print_rqpair(qp, phba->sli4_hba.dat_rq,
+				"RQpair", pbuffer, len);
+		if (len >= max_cnt)
+			goto too_big;
 
-				len += snprintf(pbuffer+len,
-				LPFC_QUE_INFO_GET_BUF_SIZE-len,
-					"\t\tRQ info: ");
-				len += snprintf(pbuffer+len,
-					LPFC_QUE_INFO_GET_BUF_SIZE-len,
-					"AssocCQID[%02d]: "
-					"RQ-STAT[nopost:x%x nobuf:x%x "
-					"trunc:x%x rcv:x%llx]\n",
-					qp->assoc_qid,
-					qp->q_cnt_1, qp->q_cnt_2,
-					qp->q_cnt_3,
-					(unsigned long long)qp->q_cnt_4);
-				len += snprintf(pbuffer+len,
-					LPFC_QUE_INFO_GET_BUF_SIZE-len,
-					"\t\tHQID[%02d], "
-					"QE-CNT[%04d], QE-SIZE[%04d], "
-					"HOST-IDX[%04d], PORT-IDX[%04d]\n",
-					qp->queue_id,
-					qp->entry_count,
-					qp->entry_size,
-					qp->host_index,
-					qp->hba_index);
-
-				/* Slow-path RQ data */
-				qp = phba->sli4_hba.dat_rq;
-				len += snprintf(pbuffer+len,
-					LPFC_QUE_INFO_GET_BUF_SIZE-len,
-					"\t\tDQID[%02d], "
-					"QE-CNT[%04d], QE-SIZE[%04d], "
-					"HOST-IDX[%04d], PORT-IDX[%04d]\n",
-					qp->queue_id,
-					qp->entry_count,
-					qp->entry_size,
-					qp->host_index,
-					qp->hba_index);
-
-				len +=  snprintf(pbuffer+len,
-					LPFC_QUE_INFO_GET_BUF_SIZE-len, "\n");
-			}
-		}
+		goto out;
 	}
 
+fof:
 	if (phba->cfg_fof) {
 		/* FOF EQ */
 		qp = phba->sli4_hba.fof_eq;
-		if (!qp)
-			goto out;
-
-		len += snprintf(pbuffer+len,
-			LPFC_QUE_INFO_GET_BUF_SIZE-len,
-			"\nFOF EQ info: "
-			"EQ-STAT[max:x%x noE:x%x "
-			"bs:x%x proc:x%llx]\n",
-			qp->q_cnt_1, qp->q_cnt_2,
-			qp->q_cnt_3, (unsigned long long)qp->q_cnt_4);
-
-		len += snprintf(pbuffer+len,
-			LPFC_QUE_INFO_GET_BUF_SIZE-len,
-			"EQID[%02d], "
-			"QE-CNT[%04d], QE-SIZE[%04d], "
-			"HOST-IDX[%04d], PORT-IDX[%04d]",
-			qp->queue_id,
-			qp->entry_count,
-			qp->entry_size,
-			qp->host_index,
-			qp->hba_index);
+		len = __lpfc_idiag_print_eq(qp, "FOF", pbuffer, len);
 
 		/* Reset max counter */
-		qp->EQ_max_eqe = 0;
+		if (qp)
+			qp->EQ_max_eqe = 0;
 
-		len +=  snprintf(pbuffer+len,
-			LPFC_QUE_INFO_GET_BUF_SIZE-len, "\n");
 		if (len >= max_cnt)
 			goto too_big;
-	}
-
-	if (phba->cfg_fof) {
 
 		/* OAS CQ */
 		qp = phba->sli4_hba.oas_cq;
-		if (qp) {
-			len += snprintf(pbuffer+len,
-				LPFC_QUE_INFO_GET_BUF_SIZE-len,
-				"\tOAS CQ info: ");
-			len += snprintf(pbuffer+len,
-				LPFC_QUE_INFO_GET_BUF_SIZE-len,
-				"AssocEQID[%02d]: "
-				"CQ STAT[max:x%x relw:x%x "
-				"xabt:x%x wq:x%llx]\n",
-				qp->assoc_qid,
-				qp->q_cnt_1, qp->q_cnt_2,
-				qp->q_cnt_3, (unsigned long long)qp->q_cnt_4);
-			len += snprintf(pbuffer+len,
-				LPFC_QUE_INFO_GET_BUF_SIZE-len,
-				"\tCQID[%02d], "
-				"QE-CNT[%04d], QE-SIZE[%04d], "
-				"HOST-IDX[%04d], PORT-IDX[%04d]",
-				qp->queue_id, qp->entry_count,
-				qp->entry_size, qp->host_index,
-				qp->hba_index);
-
-			/* Reset max counter */
+		len = __lpfc_idiag_print_cq(qp, "OAS", pbuffer, len);
+		/* Reset max counter */
+		if (qp)
 			qp->CQ_max_cqe = 0;
-
-			len +=  snprintf(pbuffer+len,
-				LPFC_QUE_INFO_GET_BUF_SIZE-len, "\n");
-			if (len >= max_cnt)
-				goto too_big;
-		}
+		if (len >= max_cnt)
+			goto too_big;
 
 		/* OAS WQ */
 		qp = phba->sli4_hba.oas_wq;
-		if (qp) {
-			len += snprintf(pbuffer+len,
-				LPFC_QUE_INFO_GET_BUF_SIZE-len,
-				"\t\tOAS WQ info: ");
-			len += snprintf(pbuffer+len,
-				LPFC_QUE_INFO_GET_BUF_SIZE-len,
-				"AssocCQID[%02d]: "
-				"WQ-STAT[oflow:x%x posted:x%llx]\n",
-				qp->assoc_qid,
-				qp->q_cnt_1, (unsigned long long)qp->q_cnt_4);
-			len += snprintf(pbuffer+len,
-				LPFC_QUE_INFO_GET_BUF_SIZE-len,
-				"\t\tWQID[%02d], "
-				"QE-CNT[%04d], QE-SIZE[%04d], "
-				"HOST-IDX[%04d], PORT-IDX[%04d]",
-				qp->queue_id,
-				qp->entry_count,
-				qp->entry_size,
-				qp->host_index,
-				qp->hba_index);
-
-			len +=  snprintf(pbuffer+len,
-				LPFC_QUE_INFO_GET_BUF_SIZE-len, "\n");
-			if (len >= max_cnt)
-				goto too_big;
-		}
+		len = __lpfc_idiag_print_wq(qp, "OAS", pbuffer, len);
+		if (len >= max_cnt)
+			goto too_big;
 	}
-out:
+
 	spin_unlock_irq(&phba->hbalock);
 	return simple_read_from_buffer(buf, nbytes, ppos, pbuffer, len);
 
 too_big:
-	len +=  snprintf(pbuffer+len,
-		LPFC_QUE_INFO_GET_BUF_SIZE-len, "Truncated ...\n");
+	len +=  snprintf(pbuffer + len,
+		LPFC_QUE_INFO_GET_BUF_SIZE - len, "Truncated ...\n");
+out:
 	spin_unlock_irq(&phba->hbalock);
 	return simple_read_from_buffer(buf, nbytes, ppos, pbuffer, len);
 }

From 1d9d5a9879ad493ee7cf75987df1f365c61fefe5 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sun, 12 Feb 2017 13:52:29 -0800
Subject: [PATCH 0150/1911] scsi: lpfc: refactor debugfs queue dump routines

Create common wq, cq, eq, rq dump functions

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_debugfs.c |  22 ++--
 drivers/scsi/lpfc/lpfc_debugfs.h | 214 +++++++++++++------------------
 2 files changed, 101 insertions(+), 135 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index 866361d4d63742..93ddda16c63277 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -4555,31 +4555,31 @@ lpfc_debugfs_terminate(struct lpfc_vport *vport)
 void
 lpfc_debug_dump_all_queues(struct lpfc_hba *phba)
 {
-	int fcp_wqidx;
+	int idx;
 
 	/*
 	 * Dump Work Queues (WQs)
 	 */
-	lpfc_debug_dump_mbx_wq(phba);
-	lpfc_debug_dump_els_wq(phba);
+	lpfc_debug_dump_wq(phba, DUMP_MBX, 0);
+	lpfc_debug_dump_wq(phba, DUMP_ELS, 0);
 
-	for (fcp_wqidx = 0; fcp_wqidx < phba->cfg_fcp_io_channel; fcp_wqidx++)
-		lpfc_debug_dump_fcp_wq(phba, fcp_wqidx);
+	for (idx = 0; idx < phba->cfg_fcp_io_channel; idx++)
+		lpfc_debug_dump_wq(phba, DUMP_FCP, idx);
 
 	lpfc_debug_dump_hdr_rq(phba);
 	lpfc_debug_dump_dat_rq(phba);
 	/*
 	 * Dump Complete Queues (CQs)
 	 */
-	lpfc_debug_dump_mbx_cq(phba);
-	lpfc_debug_dump_els_cq(phba);
+	lpfc_debug_dump_cq(phba, DUMP_MBX, 0);
+	lpfc_debug_dump_cq(phba, DUMP_ELS, 0);
 
-	for (fcp_wqidx = 0; fcp_wqidx < phba->cfg_fcp_io_channel; fcp_wqidx++)
-		lpfc_debug_dump_fcp_cq(phba, fcp_wqidx);
+	for (idx = 0; idx < phba->cfg_fcp_io_channel; idx++)
+		lpfc_debug_dump_cq(phba, DUMP_FCP, idx);
 
 	/*
 	 * Dump Event Queues (EQs)
 	 */
-	for (fcp_wqidx = 0; fcp_wqidx < phba->cfg_fcp_io_channel; fcp_wqidx++)
-		lpfc_debug_dump_hba_eq(phba, fcp_wqidx);
+	for (idx = 0; idx < phba->cfg_fcp_io_channel; idx++)
+		lpfc_debug_dump_hba_eq(phba, idx);
 }
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.h b/drivers/scsi/lpfc/lpfc_debugfs.h
index 8b2b6a3bfc25b5..9ae2c4b5fd127e 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.h
+++ b/drivers/scsi/lpfc/lpfc_debugfs.h
@@ -42,6 +42,12 @@
 /* hbqinfo output buffer size */
 #define LPFC_HBQINFO_SIZE 8192
 
+enum {
+	DUMP_FCP,
+	DUMP_MBX,
+	DUMP_ELS,
+};
+
 /*
  * For SLI4 iDiag debugfs diagnostics tool
  */
@@ -358,58 +364,97 @@ lpfc_debug_dump_q(struct lpfc_queue *q)
 }
 
 /**
- * lpfc_debug_dump_fcp_wq - dump all entries from a fcp work queue
+ * lpfc_debug_dump_wq - dump all entries from the fcp work queue
  * @phba: Pointer to HBA context object.
- * @fcp_wqidx: Index to a FCP work queue.
+ * @wqidx: Index to a FCP work queue.
  *
- * This function dumps all entries from a FCP work queue specified by the
- * @fcp_wqidx.
+ * This function dumps all entries from a FCP work queue specified
+ * by the wqidx.
  **/
 static inline void
-lpfc_debug_dump_fcp_wq(struct lpfc_hba *phba, int fcp_wqidx)
+lpfc_debug_dump_wq(struct lpfc_hba *phba, int qtype, int wqidx)
 {
-	/* sanity check */
-	if (fcp_wqidx >= phba->cfg_fcp_io_channel)
+	struct lpfc_queue *wq;
+	char *qtypestr;
+
+	if (qtype == DUMP_FCP) {
+		wq = phba->sli4_hba.fcp_wq[wqidx];
+		qtypestr = "FCP";
+	} else if (qtype == DUMP_MBX) {
+		wq = phba->sli4_hba.mbx_wq;
+		qtypestr = "MBX";
+	} else if (qtype == DUMP_ELS) {
+		wq = phba->sli4_hba.els_wq;
+		qtypestr = "ELS";
+	} else
 		return;
 
-	printk(KERN_ERR "FCP WQ: WQ[Idx:%d|Qid:%d]\n",
-		fcp_wqidx, phba->sli4_hba.fcp_wq[fcp_wqidx]->queue_id);
-	lpfc_debug_dump_q(phba->sli4_hba.fcp_wq[fcp_wqidx]);
+	if (qtype == DUMP_FCP)
+		pr_err("%s WQ: WQ[Idx:%d|Qid:%d]\n",
+			qtypestr, wqidx, wq->queue_id);
+	else
+		pr_err("%s WQ: WQ[Qid:%d]\n",
+			qtypestr, wq->queue_id);
+
+	lpfc_debug_dump_q(wq);
 }
 
 /**
- * lpfc_debug_dump_fcp_cq - dump all entries from a fcp work queue's cmpl queue
+ * lpfc_debug_dump_cq - dump all entries from a fcp work queue's
+ * cmpl queue
  * @phba: Pointer to HBA context object.
- * @fcp_wqidx: Index to a FCP work queue.
+ * @wqidx: Index to a FCP work queue.
  *
- * This function dumps all entries from a FCP complete queue which is
- * associated to the FCP work queue specified by the @fcp_wqidx.
+ * This function dumps all entries from a FCP completion queue
+ * which is associated to the work queue specified by the @wqidx.
  **/
 static inline void
-lpfc_debug_dump_fcp_cq(struct lpfc_hba *phba, int fcp_wqidx)
+lpfc_debug_dump_cq(struct lpfc_hba *phba, int qtype, int wqidx)
 {
-	int fcp_cqidx, fcp_cqid;
-
-	/* sanity check */
-	if (fcp_wqidx >= phba->cfg_fcp_io_channel)
+	struct lpfc_queue *wq, *cq, *eq;
+	char *qtypestr;
+	int eqidx;
+
+	/* fcp wq and cq are 1:1, thus same indexes */
+
+	if (qtype == DUMP_FCP) {
+		wq = phba->sli4_hba.fcp_wq[wqidx];
+		cq = phba->sli4_hba.fcp_cq[wqidx];
+		qtypestr = "FCP";
+	} else if (qtype == DUMP_MBX) {
+		wq = phba->sli4_hba.mbx_wq;
+		cq = phba->sli4_hba.mbx_cq;
+		qtypestr = "MBX";
+	} else if (qtype == DUMP_ELS) {
+		wq = phba->sli4_hba.els_wq;
+		cq = phba->sli4_hba.els_cq;
+		qtypestr = "ELS";
+	} else
 		return;
 
-	fcp_cqid = phba->sli4_hba.fcp_wq[fcp_wqidx]->assoc_qid;
-	for (fcp_cqidx = 0; fcp_cqidx < phba->cfg_fcp_io_channel; fcp_cqidx++)
-		if (phba->sli4_hba.fcp_cq[fcp_cqidx]->queue_id == fcp_cqid)
+	for (eqidx = 0; eqidx < phba->cfg_fcp_io_channel; eqidx++) {
+		eq = phba->sli4_hba.hba_eq[eqidx];
+		if (cq->assoc_qid == eq->queue_id)
 			break;
-	if (phba->intr_type == MSIX) {
-		if (fcp_cqidx >= phba->cfg_fcp_io_channel)
-			return;
-	} else {
-		if (fcp_cqidx > 0)
-			return;
+	}
+	if (eqidx == phba->cfg_fcp_io_channel) {
+		pr_err("Couldn't find EQ for CQ. Using EQ[0]\n");
+		eqidx = 0;
+		eq = phba->sli4_hba.hba_eq[0];
 	}
 
-	printk(KERN_ERR "FCP CQ: WQ[Idx:%d|Qid%d]->CQ[Idx%d|Qid%d]:\n",
-		fcp_wqidx, phba->sli4_hba.fcp_wq[fcp_wqidx]->queue_id,
-		fcp_cqidx, fcp_cqid);
-	lpfc_debug_dump_q(phba->sli4_hba.fcp_cq[fcp_cqidx]);
+	if (qtype == DUMP_FCP)
+		pr_err("%s CQ: WQ[Idx:%d|Qid%d]->CQ[Idx%d|Qid%d]"
+			"->EQ[Idx:%d|Qid:%d]:\n",
+			qtypestr, wqidx, wq->queue_id, wqidx, cq->queue_id,
+			eqidx, eq->queue_id);
+	else
+		pr_err("%s CQ: WQ[Qid:%d]->CQ[Qid:%d]"
+			"->EQ[Idx:%d|Qid:%d]:\n",
+			qtypestr, wq->queue_id, cq->queue_id,
+			eqidx, eq->queue_id);
+
+	lpfc_debug_dump_q(cq);
 }
 
 /**
@@ -421,64 +466,15 @@ lpfc_debug_dump_fcp_cq(struct lpfc_hba *phba, int fcp_wqidx)
  * associated to the FCP work queue specified by the @fcp_wqidx.
  **/
 static inline void
-lpfc_debug_dump_hba_eq(struct lpfc_hba *phba, int fcp_wqidx)
+lpfc_debug_dump_hba_eq(struct lpfc_hba *phba, int qidx)
 {
-	struct lpfc_queue *qdesc;
-	int fcp_eqidx, fcp_eqid;
-	int fcp_cqidx, fcp_cqid;
-
-	/* sanity check */
-	if (fcp_wqidx >= phba->cfg_fcp_io_channel)
-		return;
-	fcp_cqid = phba->sli4_hba.fcp_wq[fcp_wqidx]->assoc_qid;
-	for (fcp_cqidx = 0; fcp_cqidx < phba->cfg_fcp_io_channel; fcp_cqidx++)
-		if (phba->sli4_hba.fcp_cq[fcp_cqidx]->queue_id == fcp_cqid)
-			break;
-	if (phba->intr_type == MSIX) {
-		if (fcp_cqidx >= phba->cfg_fcp_io_channel)
-			return;
-	} else {
-		if (fcp_cqidx > 0)
-			return;
-	}
+	struct lpfc_queue *qp;
 
-	fcp_eqidx = fcp_cqidx;
-	fcp_eqid = phba->sli4_hba.hba_eq[fcp_eqidx]->queue_id;
-	qdesc = phba->sli4_hba.hba_eq[fcp_eqidx];
+	qp = phba->sli4_hba.hba_eq[qidx];
 
-	printk(KERN_ERR "FCP EQ: WQ[Idx:%d|Qid:%d]->CQ[Idx:%d|Qid:%d]->"
-		"EQ[Idx:%d|Qid:%d]\n",
-		fcp_wqidx, phba->sli4_hba.fcp_wq[fcp_wqidx]->queue_id,
-		fcp_cqidx, fcp_cqid, fcp_eqidx, fcp_eqid);
-	lpfc_debug_dump_q(qdesc);
-}
+	pr_err("EQ[Idx:%d|Qid:%d]\n", qidx, qp->queue_id);
 
-/**
- * lpfc_debug_dump_els_wq - dump all entries from the els work queue
- * @phba: Pointer to HBA context object.
- *
- * This function dumps all entries from the ELS work queue.
- **/
-static inline void
-lpfc_debug_dump_els_wq(struct lpfc_hba *phba)
-{
-	printk(KERN_ERR "ELS WQ: WQ[Qid:%d]:\n",
-		phba->sli4_hba.els_wq->queue_id);
-	lpfc_debug_dump_q(phba->sli4_hba.els_wq);
-}
-
-/**
- * lpfc_debug_dump_mbx_wq - dump all entries from the mbox work queue
- * @phba: Pointer to HBA context object.
- *
- * This function dumps all entries from the MBOX work queue.
- **/
-static inline void
-lpfc_debug_dump_mbx_wq(struct lpfc_hba *phba)
-{
-	printk(KERN_ERR "MBX WQ: WQ[Qid:%d]\n",
-		phba->sli4_hba.mbx_wq->queue_id);
-	lpfc_debug_dump_q(phba->sli4_hba.mbx_wq);
+	lpfc_debug_dump_q(qp);
 }
 
 /**
@@ -509,36 +505,6 @@ lpfc_debug_dump_hdr_rq(struct lpfc_hba *phba)
 	lpfc_debug_dump_q(phba->sli4_hba.hdr_rq);
 }
 
-/**
- * lpfc_debug_dump_els_cq - dump all entries from the els complete queue
- * @phba: Pointer to HBA context object.
- *
- * This function dumps all entries from the els complete queue.
- **/
-static inline void
-lpfc_debug_dump_els_cq(struct lpfc_hba *phba)
-{
-	printk(KERN_ERR "ELS CQ: WQ[Qid:%d]->CQ[Qid:%d]\n",
-		phba->sli4_hba.els_wq->queue_id,
-		phba->sli4_hba.els_cq->queue_id);
-	lpfc_debug_dump_q(phba->sli4_hba.els_cq);
-}
-
-/**
- * lpfc_debug_dump_mbx_cq - dump all entries from the mbox complete queue
- * @phba: Pointer to HBA context object.
- *
- * This function dumps all entries from the mbox complete queue.
- **/
-static inline void
-lpfc_debug_dump_mbx_cq(struct lpfc_hba *phba)
-{
-	printk(KERN_ERR "MBX CQ: WQ[Qid:%d]->CQ[Qid:%d]\n",
-		phba->sli4_hba.mbx_wq->queue_id,
-		phba->sli4_hba.mbx_cq->queue_id);
-	lpfc_debug_dump_q(phba->sli4_hba.mbx_cq);
-}
-
 /**
  * lpfc_debug_dump_wq_by_id - dump all entries from a work queue by queue id
  * @phba: Pointer to HBA context object.
@@ -556,14 +522,15 @@ lpfc_debug_dump_wq_by_id(struct lpfc_hba *phba, int qid)
 		if (phba->sli4_hba.fcp_wq[wq_idx]->queue_id == qid)
 			break;
 	if (wq_idx < phba->cfg_fcp_io_channel) {
-		printk(KERN_ERR "FCP WQ[Idx:%d|Qid:%d]\n", wq_idx, qid);
+		pr_err("FCP WQ[Idx:%d|Qid:%d]\n", wq_idx, qid);
 		lpfc_debug_dump_q(phba->sli4_hba.fcp_wq[wq_idx]);
 		return;
 	}
 
 	if (phba->sli4_hba.els_wq->queue_id == qid) {
-		printk(KERN_ERR "ELS WQ[Qid:%d]\n", qid);
+		pr_err("ELS WQ[Qid:%d]\n", qid);
 		lpfc_debug_dump_q(phba->sli4_hba.els_wq);
+		return;
 	}
 }
 
@@ -617,27 +584,26 @@ lpfc_debug_dump_rq_by_id(struct lpfc_hba *phba, int qid)
 static inline void
 lpfc_debug_dump_cq_by_id(struct lpfc_hba *phba, int qid)
 {
-	int cq_idx = 0;
+	int cq_idx;
 
-	do {
+	for (cq_idx = 0; cq_idx < phba->cfg_fcp_io_channel; cq_idx++)
 		if (phba->sli4_hba.fcp_cq[cq_idx]->queue_id == qid)
 			break;
-	} while (++cq_idx < phba->cfg_fcp_io_channel);
 
 	if (cq_idx < phba->cfg_fcp_io_channel) {
-		printk(KERN_ERR "FCP CQ[Idx:%d|Qid:%d]\n", cq_idx, qid);
+		pr_err("FCP CQ[Idx:%d|Qid:%d]\n", cq_idx, qid);
 		lpfc_debug_dump_q(phba->sli4_hba.fcp_cq[cq_idx]);
 		return;
 	}
 
 	if (phba->sli4_hba.els_cq->queue_id == qid) {
-		printk(KERN_ERR "ELS CQ[Qid:%d]\n", qid);
+		pr_err("ELS CQ[Qid:%d]\n", qid);
 		lpfc_debug_dump_q(phba->sli4_hba.els_cq);
 		return;
 	}
 
 	if (phba->sli4_hba.mbx_cq->queue_id == qid) {
-		printk(KERN_ERR "MBX CQ[Qid:%d]\n", qid);
+		pr_err("MBX CQ[Qid:%d]\n", qid);
 		lpfc_debug_dump_q(phba->sli4_hba.mbx_cq);
 	}
 }

From 895427bd012ce5814fc9888c7c0ee9de44761833 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sun, 12 Feb 2017 13:52:30 -0800
Subject: [PATCH 0151/1911] scsi: lpfc: NVME Initiator: Base modifications

NVME Initiator: Base modifications

This patch adds base modifications for NVME initiator support.

The base modifications consist of:
- Formal split of SLI3 rings from SLI-4 WQs (sometimes referred to as
  rings as well) as implementation now widely varies between the two.
- Addition of configuration modes:
   SCSI initiator only; NVME initiator only; NVME target only; and
   SCSI and NVME initiator.
   The configuration mode drives overall adapter configuration,
   offloads enabled, and resource splits.
   NVME support is only available on SLI-4 devices and newer fw.
- Implements the following based on configuration mode:
  - Exchange resources are split by protocol; Obviously, if only
     1 mode, then no split occurs. Default is 50/50. module attribute
     allows tuning.
  - Pools and config parameters are separated per-protocol
  - Each protocol has it's own set of queues, but share interrupt
    vectors.
     SCSI:
       SLI3 devices have few queues and the original style of queue
         allocation remains.
       SLI4 devices piggy back on an "io-channel" concept that
         eventually needs to merge with scsi-mq/blk-mq support (it is
	 underway).  For now, the paradigm continues as it existed
	 prior. io channel allocates N msix and N WQs (N=4 default)
	 and either round robins or uses cpu # modulo N for scheduling.
	 A bunch of module parameters allow the configuration to be
	 tuned.
     NVME (initiator):
       Allocates an msix per cpu (or whatever pci_alloc_irq_vectors
         gets)
       Allocates a WQ per cpu, and maps the WQs to msix on a WQ #
         modulo msix vector count basis.
       Module parameters exist to cap/control the config if desired.
  - Each protocol has its own buffer and dma pools.

I apologize for the size of the patch.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>

----
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc.h           |   80 +-
 drivers/scsi/lpfc/lpfc_attr.c      |  374 ++++-
 drivers/scsi/lpfc/lpfc_bsg.c       |   27 +-
 drivers/scsi/lpfc/lpfc_crtn.h      |   27 +-
 drivers/scsi/lpfc/lpfc_debugfs.c   |  178 ++-
 drivers/scsi/lpfc/lpfc_debugfs.h   |   72 +-
 drivers/scsi/lpfc/lpfc_els.c       |   38 +-
 drivers/scsi/lpfc/lpfc_hbadisc.c   |  157 +-
 drivers/scsi/lpfc/lpfc_hw.h        |    3 +-
 drivers/scsi/lpfc/lpfc_hw4.h       |   63 +-
 drivers/scsi/lpfc/lpfc_init.c      | 2141 +++++++++++++++-------------
 drivers/scsi/lpfc/lpfc_logmsg.h    |    4 +
 drivers/scsi/lpfc/lpfc_mbox.c      |   23 +-
 drivers/scsi/lpfc/lpfc_mem.c       |   62 +-
 drivers/scsi/lpfc/lpfc_nportdisc.c |    7 +-
 drivers/scsi/lpfc/lpfc_nvme.h      |   88 ++
 drivers/scsi/lpfc/lpfc_scsi.c      |  101 +-
 drivers/scsi/lpfc/lpfc_scsi.h      |    6 +
 drivers/scsi/lpfc/lpfc_sli.c       | 1429 ++++++++++++++-----
 drivers/scsi/lpfc/lpfc_sli.h       |   30 +-
 drivers/scsi/lpfc/lpfc_sli4.h      |   75 +-
 drivers/scsi/lpfc/lpfc_vport.c     |    2 +
 22 files changed, 3353 insertions(+), 1634 deletions(-)
 create mode 100644 drivers/scsi/lpfc/lpfc_nvme.h

diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index 8e0d6f43878d5f..77ad757ca23186 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -20,6 +20,7 @@
  *******************************************************************/
 
 #include <scsi/scsi_host.h>
+#include <linux/ktime.h>
 
 #if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_SCSI_LPFC_DEBUG_FS)
 #define CONFIG_SCSI_LPFC_DEBUG_FS
@@ -53,6 +54,7 @@ struct lpfc_sli2_slim;
 #define LPFC_MAX_SG_SEG_CNT	4096	/* sg element count per scsi cmnd */
 #define LPFC_MAX_SGL_SEG_CNT	512	/* SGL element count per scsi cmnd */
 #define LPFC_MAX_BPL_SEG_CNT	4096	/* BPL element count per scsi cmnd */
+#define LPFC_MIN_NVME_SEG_CNT	254
 
 #define LPFC_MAX_SGE_SIZE       0x80000000 /* Maximum data allowed in a SGE */
 #define LPFC_IOCB_LIST_CNT	2250	/* list of IOCBs for fast-path usage. */
@@ -114,6 +116,13 @@ enum lpfc_polling_flags {
 	DISABLE_FCP_RING_INT    = 0x2
 };
 
+struct perf_prof {
+	uint16_t cmd_cpu[40];
+	uint16_t rsp_cpu[40];
+	uint16_t qh_cpu[40];
+	uint16_t wqidx[40];
+};
+
 /* Provide DMA memory definitions the driver uses per port instance. */
 struct lpfc_dmabuf {
 	struct list_head list;
@@ -131,10 +140,24 @@ struct lpfc_dma_pool {
 struct hbq_dmabuf {
 	struct lpfc_dmabuf hbuf;
 	struct lpfc_dmabuf dbuf;
-	uint32_t size;
+	uint16_t total_size;
+	uint16_t bytes_recv;
 	uint32_t tag;
 	struct lpfc_cq_event cq_event;
 	unsigned long time_stamp;
+	void *context;
+};
+
+struct rqb_dmabuf {
+	struct lpfc_dmabuf hbuf;
+	struct lpfc_dmabuf dbuf;
+	uint16_t total_size;
+	uint16_t bytes_recv;
+	void *context;
+	struct lpfc_iocbq *iocbq;
+	struct lpfc_sglq *sglq;
+	struct lpfc_queue *hrq;	  /* ptr to associated Header RQ */
+	struct lpfc_queue *drq;	  /* ptr to associated Data RQ */
 };
 
 /* Priority bit.  Set value to exceed low water mark in lpfc_mem. */
@@ -442,6 +465,11 @@ struct lpfc_vport {
 	uint16_t fdmi_num_disc;
 	uint32_t fdmi_hba_mask;
 	uint32_t fdmi_port_mask;
+
+	/* There is a single nvme instance per vport. */
+	struct nvme_fc_local_port *localport;
+	uint8_t  nvmei_support; /* driver supports NVME Initiator */
+	uint32_t last_fcp_wqidx;
 };
 
 struct hbq_s {
@@ -459,10 +487,9 @@ struct hbq_s {
 					       struct hbq_dmabuf *);
 };
 
-#define LPFC_MAX_HBQS  4
 /* this matches the position in the lpfc_hbq_defs array */
 #define LPFC_ELS_HBQ	0
-#define LPFC_EXTRA_HBQ	1
+#define LPFC_MAX_HBQS	1
 
 enum hba_temp_state {
 	HBA_NORMAL_TEMP,
@@ -652,6 +679,8 @@ struct lpfc_hba {
 					 * Firmware supports Forced Link Speed
 					 * capability
 					 */
+#define HBA_NVME_IOQ_FLUSH      0x80000 /* NVME IO queues flushed. */
+
 	uint32_t fcp_ring_in_use; /* When polling test if intr-hndlr active*/
 	struct lpfc_dmabuf slim2p;
 
@@ -700,6 +729,8 @@ struct lpfc_hba {
 	uint8_t  wwpn[8];
 	uint32_t RandomData[7];
 	uint8_t  fcp_embed_io;
+	uint8_t  nvme_support;	/* Firmware supports NVME */
+	uint8_t  nvmet_support;	/* driver supports NVMET */
 	uint8_t  mds_diags_support;
 
 	/* HBA Config Parameters */
@@ -725,6 +756,9 @@ struct lpfc_hba {
 	uint32_t cfg_fcp_imax;
 	uint32_t cfg_fcp_cpu_map;
 	uint32_t cfg_fcp_io_channel;
+	uint32_t cfg_nvme_oas;
+	uint32_t cfg_nvme_io_channel;
+	uint32_t cfg_nvme_enable_fb;
 	uint32_t cfg_total_seg_cnt;
 	uint32_t cfg_sg_seg_cnt;
 	uint32_t cfg_sg_dma_buf_size;
@@ -770,6 +804,12 @@ struct lpfc_hba {
 #define LPFC_FDMI_SUPPORT	1	/* FDMI supported? */
 	uint32_t cfg_enable_SmartSAN;
 	uint32_t cfg_enable_mds_diags;
+	uint32_t cfg_enable_fc4_type;
+	uint32_t cfg_xri_split;
+#define LPFC_ENABLE_FCP  1
+#define LPFC_ENABLE_NVME 2
+#define LPFC_ENABLE_BOTH 3
+	uint32_t io_channel_irqs;	/* number of irqs for io channels */
 	lpfc_vpd_t vpd;		/* vital product data */
 
 	struct pci_dev *pcidev;
@@ -784,11 +824,11 @@ struct lpfc_hba {
 	unsigned long data_flags;
 
 	uint32_t hbq_in_use;		/* HBQs in use flag */
-	struct list_head rb_pend_list;  /* Received buffers to be processed */
 	uint32_t hbq_count;	        /* Count of configured HBQs */
 	struct hbq_s hbqs[LPFC_MAX_HBQS]; /* local copy of hbq indicies  */
 
-	atomic_t fcp_qidx;		/* next work queue to post work to */
+	atomic_t fcp_qidx;         /* next FCP WQ (RR Policy) */
+	atomic_t nvme_qidx;        /* next NVME WQ (RR Policy) */
 
 	phys_addr_t pci_bar0_map;     /* Physical address for PCI BAR0 */
 	phys_addr_t pci_bar1_map;     /* Physical address for PCI BAR1 */
@@ -843,9 +883,17 @@ struct lpfc_hba {
 	/*
 	 * stat  counters
 	 */
-	uint64_t fc4InputRequests;
-	uint64_t fc4OutputRequests;
-	uint64_t fc4ControlRequests;
+	uint64_t fc4ScsiInputRequests;
+	uint64_t fc4ScsiOutputRequests;
+	uint64_t fc4ScsiControlRequests;
+	uint64_t fc4ScsiIoCmpls;
+	uint64_t fc4NvmeInputRequests;
+	uint64_t fc4NvmeOutputRequests;
+	uint64_t fc4NvmeControlRequests;
+	uint64_t fc4NvmeIoCmpls;
+	uint64_t fc4NvmeLsRequests;
+	uint64_t fc4NvmeLsCmpls;
+
 	uint64_t bg_guard_err_cnt;
 	uint64_t bg_apptag_err_cnt;
 	uint64_t bg_reftag_err_cnt;
@@ -856,17 +904,23 @@ struct lpfc_hba {
 	struct list_head lpfc_scsi_buf_list_get;
 	struct list_head lpfc_scsi_buf_list_put;
 	uint32_t total_scsi_bufs;
+	spinlock_t nvme_buf_list_get_lock;  /* NVME buf alloc list lock */
+	spinlock_t nvme_buf_list_put_lock;  /* NVME buf free list lock */
+	struct list_head lpfc_nvme_buf_list_get;
+	struct list_head lpfc_nvme_buf_list_put;
+	uint32_t total_nvme_bufs;
 	struct list_head lpfc_iocb_list;
 	uint32_t total_iocbq_bufs;
 	struct list_head active_rrq_list;
 	spinlock_t hbalock;
 
 	/* pci_mem_pools */
-	struct pci_pool *lpfc_scsi_dma_buf_pool;
+	struct pci_pool *lpfc_sg_dma_buf_pool;
 	struct pci_pool *lpfc_mbuf_pool;
 	struct pci_pool *lpfc_hrb_pool;	/* header receive buffer pool */
 	struct pci_pool *lpfc_drb_pool; /* data receive buffer pool */
 	struct pci_pool *lpfc_hbq_pool;	/* SLI3 hbq buffer pool */
+	struct pci_pool *txrdy_payload_pool;
 	struct lpfc_dma_pool lpfc_mbuf_safety_pool;
 
 	mempool_t *mbox_mem_pool;
@@ -1092,3 +1146,11 @@ lpfc_sli_read_hs(struct lpfc_hba *phba)
 
 	return 0;
 }
+
+static inline struct lpfc_sli_ring *
+lpfc_phba_elsring(struct lpfc_hba *phba)
+{
+	if (phba->sli_rev == LPFC_SLI_REV4)
+		return phba->sli4_hba.els_wq->pring;
+	return &phba->sli.sli3_ring[LPFC_ELS_RING];
+}
diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index ce41dea476827b..b91b4bb1062df5 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -35,14 +35,17 @@
 #include <scsi/scsi_transport_fc.h>
 #include <scsi/fc/fc_fs.h>
 
+#include <linux/nvme-fc-driver.h>
+
 #include "lpfc_hw4.h"
 #include "lpfc_hw.h"
 #include "lpfc_sli.h"
 #include "lpfc_sli4.h"
 #include "lpfc_nl.h"
 #include "lpfc_disc.h"
-#include "lpfc_scsi.h"
 #include "lpfc.h"
+#include "lpfc_scsi.h"
+#include "lpfc_nvme.h"
 #include "lpfc_logmsg.h"
 #include "lpfc_version.h"
 #include "lpfc_compat.h"
@@ -129,6 +132,124 @@ lpfc_enable_fip_show(struct device *dev, struct device_attribute *attr,
 		return snprintf(buf, PAGE_SIZE, "0\n");
 }
 
+static ssize_t
+lpfc_nvme_info_show(struct device *dev, struct device_attribute *attr,
+		    char *buf)
+{
+	struct Scsi_Host *shost = class_to_shost(dev);
+	struct lpfc_vport *vport = shost_priv(shost);
+	struct lpfc_hba   *phba = vport->phba;
+	struct nvme_fc_local_port *localport;
+	struct lpfc_nvme_lport *lport;
+	struct lpfc_nvme_rport *rport;
+	struct nvme_fc_remote_port *nrport;
+	char *statep;
+	int len = 0;
+
+	if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)) {
+		len += snprintf(buf, PAGE_SIZE, "NVME Disabled\n");
+		return len;
+	}
+
+	localport = vport->localport;
+	if (!localport) {
+		len = snprintf(buf, PAGE_SIZE,
+				"NVME Initiator x%llx is not allocated\n",
+				wwn_to_u64(vport->fc_portname.u.wwn));
+		return len;
+	}
+	len = snprintf(buf, PAGE_SIZE, "NVME Initiator Enabled\n");
+
+	spin_lock_irq(shost->host_lock);
+	lport = (struct lpfc_nvme_lport *)localport->private;
+
+	/* Port state is only one of two values for now. */
+	if (localport->port_id)
+		statep = "ONLINE";
+	else
+		statep = "UNKNOWN ";
+
+	len += snprintf(buf + len, PAGE_SIZE - len,
+			"%s%d WWPN x%llx WWNN x%llx DID x%06x %s\n",
+			"NVME LPORT lpfc",
+			phba->brd_no,
+			wwn_to_u64(vport->fc_portname.u.wwn),
+			wwn_to_u64(vport->fc_nodename.u.wwn),
+			localport->port_id, statep);
+
+	list_for_each_entry(rport, &lport->rport_list, list) {
+		/* local short-hand pointer. */
+		nrport = rport->remoteport;
+
+		/* Port state is only one of two values for now. */
+		switch (nrport->port_state) {
+		case FC_OBJSTATE_ONLINE:
+			statep = "ONLINE";
+			break;
+		case FC_OBJSTATE_UNKNOWN:
+			statep = "UNKNOWN ";
+			break;
+		default:
+			statep = "UNSUPPORTED";
+			break;
+		}
+
+		/* Tab in to show lport ownership. */
+		len += snprintf(buf + len, PAGE_SIZE - len,
+				"NVME RPORT       ");
+		if (phba->brd_no >= 10)
+			len += snprintf(buf + len, PAGE_SIZE - len, " ");
+
+		len += snprintf(buf + len, PAGE_SIZE - len, "WWPN x%llx ",
+				nrport->port_name);
+		len += snprintf(buf + len, PAGE_SIZE - len, "WWNN x%llx ",
+				nrport->node_name);
+		len += snprintf(buf + len, PAGE_SIZE - len, "DID x%06x ",
+				nrport->port_id);
+
+		switch (nrport->port_role) {
+		case FC_PORT_ROLE_NVME_INITIATOR:
+			len +=  snprintf(buf + len, PAGE_SIZE - len,
+					 "INITIATOR ");
+			break;
+		case FC_PORT_ROLE_NVME_TARGET:
+			len +=  snprintf(buf + len, PAGE_SIZE - len,
+					 "TARGET ");
+			break;
+		case FC_PORT_ROLE_NVME_DISCOVERY:
+			len +=  snprintf(buf + len, PAGE_SIZE - len,
+					 "DISCOVERY ");
+			break;
+		default:
+			len +=  snprintf(buf + len, PAGE_SIZE - len,
+					 "UNKNOWN_ROLE x%x",
+					 nrport->port_role);
+			break;
+		}
+		len +=  snprintf(buf + len, PAGE_SIZE - len, "%s  ", statep);
+		/* Terminate the string. */
+		len +=  snprintf(buf + len, PAGE_SIZE - len, "\n");
+	}
+	spin_unlock_irq(shost->host_lock);
+
+	len += snprintf(buf + len, PAGE_SIZE, "\nNVME Statistics\n");
+	len += snprintf(buf+len, PAGE_SIZE-len,
+			"LS: Xmt %016llx Cmpl %016llx\n",
+			phba->fc4NvmeLsRequests,
+			phba->fc4NvmeLsCmpls);
+
+	len += snprintf(buf+len, PAGE_SIZE-len,
+			"FCP: Rd %016llx Wr %016llx IO %016llx\n",
+			phba->fc4NvmeInputRequests,
+			phba->fc4NvmeOutputRequests,
+			phba->fc4NvmeControlRequests);
+
+	len += snprintf(buf+len, PAGE_SIZE-len,
+			"    Cmpl %016llx\n", phba->fc4NvmeIoCmpls);
+
+	return len;
+}
+
 static ssize_t
 lpfc_bg_info_show(struct device *dev, struct device_attribute *attr,
 		  char *buf)
@@ -675,6 +796,28 @@ lpfc_issue_lip(struct Scsi_Host *shost)
 	return 0;
 }
 
+int
+lpfc_emptyq_wait(struct lpfc_hba *phba, struct list_head *q, spinlock_t *lock)
+{
+	int cnt = 0;
+
+	spin_lock_irq(lock);
+	while (!list_empty(q)) {
+		spin_unlock_irq(lock);
+		msleep(20);
+		if (cnt++ > 250) {  /* 5 secs */
+			lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
+					"0466 %s %s\n",
+					"Outstanding IO when ",
+					"bringing Adapter offline\n");
+				return 0;
+		}
+		spin_lock_irq(lock);
+	}
+	spin_unlock_irq(lock);
+	return 1;
+}
+
 /**
  * lpfc_do_offline - Issues a mailbox command to bring the link down
  * @phba: lpfc_hba pointer.
@@ -694,10 +837,10 @@ static int
 lpfc_do_offline(struct lpfc_hba *phba, uint32_t type)
 {
 	struct completion online_compl;
+	struct lpfc_queue *qp = NULL;
 	struct lpfc_sli_ring *pring;
 	struct lpfc_sli *psli;
 	int status = 0;
-	int cnt = 0;
 	int i;
 	int rc;
 
@@ -717,20 +860,24 @@ lpfc_do_offline(struct lpfc_hba *phba, uint32_t type)
 	/* Wait a little for things to settle down, but not
 	 * long enough for dev loss timeout to expire.
 	 */
-	for (i = 0; i < psli->num_rings; i++) {
-		pring = &psli->ring[i];
-		while (!list_empty(&pring->txcmplq)) {
-			msleep(10);
-			if (cnt++ > 500) {  /* 5 secs */
-				lpfc_printf_log(phba,
-					KERN_WARNING, LOG_INIT,
-					"0466 Outstanding IO when "
-					"bringing Adapter offline\n");
-				break;
-			}
+	if (phba->sli_rev != LPFC_SLI_REV4) {
+		for (i = 0; i < psli->num_rings; i++) {
+			pring = &psli->sli3_ring[i];
+			if (!lpfc_emptyq_wait(phba, &pring->txcmplq,
+					      &phba->hbalock))
+				goto out;
+		}
+	} else {
+		list_for_each_entry(qp, &phba->sli4_hba.lpfc_wq_list, wq_list) {
+			pring = qp->pring;
+			if (!pring)
+				continue;
+			if (!lpfc_emptyq_wait(phba, &pring->txcmplq,
+					      &pring->ring_lock))
+				goto out;
 		}
 	}
-
+out:
 	init_completion(&online_compl);
 	rc = lpfc_workq_post_event(phba, &status, &online_compl, type);
 	if (rc == 0)
@@ -1945,6 +2092,7 @@ lpfc_##attr##_store(struct device *dev, struct device_attribute *attr, \
 }
 
 
+static DEVICE_ATTR(nvme_info, 0444, lpfc_nvme_info_show, NULL);
 static DEVICE_ATTR(bg_info, S_IRUGO, lpfc_bg_info_show, NULL);
 static DEVICE_ATTR(bg_guard_err, S_IRUGO, lpfc_bg_guard_err_show, NULL);
 static DEVICE_ATTR(bg_apptag_err, S_IRUGO, lpfc_bg_apptag_err_show, NULL);
@@ -2816,9 +2964,9 @@ lpfc_txq_hw_show(struct device *dev, struct device_attribute *attr, char *buf)
 {
 	struct Scsi_Host  *shost = class_to_shost(dev);
 	struct lpfc_hba   *phba = ((struct lpfc_vport *) shost->hostdata)->phba;
+	struct lpfc_sli_ring *pring = lpfc_phba_elsring(phba);
 
-	return snprintf(buf, PAGE_SIZE, "%d\n",
-		phba->sli.ring[LPFC_ELS_RING].txq_max);
+	return snprintf(buf, PAGE_SIZE, "%d\n", pring->txq_max);
 }
 
 static DEVICE_ATTR(txq_hw, S_IRUGO,
@@ -2829,9 +2977,9 @@ lpfc_txcmplq_hw_show(struct device *dev, struct device_attribute *attr,
 {
 	struct Scsi_Host  *shost = class_to_shost(dev);
 	struct lpfc_hba   *phba = ((struct lpfc_vport *) shost->hostdata)->phba;
+	struct lpfc_sli_ring *pring = lpfc_phba_elsring(phba);
 
-	return snprintf(buf, PAGE_SIZE, "%d\n",
-		phba->sli.ring[LPFC_ELS_RING].txcmplq_max);
+	return snprintf(buf, PAGE_SIZE, "%d\n", pring->txcmplq_max);
 }
 
 static DEVICE_ATTR(txcmplq_hw, S_IRUGO,
@@ -3029,6 +3177,31 @@ lpfc_vport_param_store(devloss_tmo)
 static DEVICE_ATTR(lpfc_devloss_tmo, S_IRUGO | S_IWUSR,
 		   lpfc_devloss_tmo_show, lpfc_devloss_tmo_store);
 
+/*
+ * lpfc_enable_fc4_type: Defines what FC4 types are supported.
+ * Supported Values:  1 - register just FCP
+ *                    3 - register both FCP and NVME
+ * Supported values are [1,3]. Default value is 3
+ */
+LPFC_ATTR_R(enable_fc4_type, LPFC_ENABLE_BOTH,
+	    LPFC_ENABLE_FCP, LPFC_ENABLE_BOTH,
+	    "Define fc4 type to register with fabric.");
+
+/*
+ * lpfc_xri_split: Defines the division of XRI resources between SCSI and NVME
+ * This parameter is only used if:
+ *     lpfc_enable_fc4_type is 3 - register both FCP and NVME
+ *
+ * ELS/CT always get 10% of XRIs, up to a maximum of 250
+ * The remaining XRIs get split up based on lpfc_xri_split per port:
+ *
+ * Supported Values are in percentages
+ * the xri_split value is the percentage the SCSI port will get. The remaining
+ * percentage will go to NVME.
+ */
+LPFC_ATTR_R(xri_split, 50, 10, 90,
+	     "Division of XRI resources between SCSI and NVME");
+
 /*
 # lpfc_log_verbose: Only turn this flag on if you are willing to risk being
 # deluged with LOTS of information.
@@ -4143,13 +4316,14 @@ lpfc_fcp_imax_store(struct device *dev, struct device_attribute *attr,
 	/*
 	 * Value range for the HBA is [5000,5000000]
 	 * The value for each EQ depends on how many EQs are configured.
+	 * Allow value == 0
 	 */
-	if (val < LPFC_MIN_IMAX || val > LPFC_MAX_IMAX)
+	if (val && (val < LPFC_MIN_IMAX || val > LPFC_MAX_IMAX))
 		return -EINVAL;
 
 	phba->cfg_fcp_imax = (uint32_t)val;
-	for (i = 0; i < phba->cfg_fcp_io_channel; i += LPFC_MAX_EQ_DELAY)
-		lpfc_modify_fcp_eq_delay(phba, i);
+	for (i = 0; i < phba->io_channel_irqs; i++)
+		lpfc_modify_hba_eq_delay(phba, i);
 
 	return strlen(buf);
 }
@@ -4187,7 +4361,8 @@ lpfc_fcp_imax_init(struct lpfc_hba *phba, int val)
 		return 0;
 	}
 
-	if (val >= LPFC_MIN_IMAX && val <= LPFC_MAX_IMAX) {
+	if ((val >= LPFC_MIN_IMAX && val <= LPFC_MAX_IMAX) ||
+	    (val == 0)) {
 		phba->cfg_fcp_imax = val;
 		return 0;
 	}
@@ -4376,6 +4551,17 @@ LPFC_VPORT_ATTR_RW(use_adisc, 0, 0, 1,
 LPFC_VPORT_ATTR_RW(first_burst_size, 0, 0, 65536,
 		   "First burst size for Targets that support first burst");
 
+/*
+* lpfc_nvme_enable_fb: Enable NVME first burst on I and T functions.
+* For the Initiator (I), enabling this parameter means that an NVME
+* PRLI response with FBA enabled and an FB_SIZE set to a nonzero value
+* will be processed by the initiator for subsequent NVME FCP IO.
+* Parameter supported on physical port only - no NPIV support.
+* Value range is [0,1]. Default value is 0 (disabled).
+*/
+LPFC_ATTR_RW(nvme_enable_fb, 0, 0, 1,
+	     "Enable First Burst feature on I and T functions.");
+
 /*
 # lpfc_max_scsicmpl_time: Use scsi command completion time to control I/O queue
 # depth. Default value is 0. When the value of this parameter is zero the
@@ -4423,17 +4609,25 @@ static DEVICE_ATTR(lpfc_max_scsicmpl_time, S_IRUGO | S_IWUSR,
 LPFC_ATTR_R(ack0, 0, 0, 1, "Enable ACK0 support");
 
 /*
-# lpfc_fcp_io_sched: Determine scheduling algrithmn for issuing FCP cmds
-# range is [0,1]. Default value is 0.
-# For [0], FCP commands are issued to Work Queues ina round robin fashion.
-# For [1], FCP commands are issued to a Work Queue associated with the
-#          current CPU.
-# It would be set to 1 by the driver if it's able to set up cpu affinity
-# for FCP I/Os through Work Queue associated with the current CPU. Otherwise,
-# roundrobin scheduling of FCP I/Os through WQs will be used.
-*/
-LPFC_ATTR_RW(fcp_io_sched, 0, 0, 1, "Determine scheduling algorithm for "
-		"issuing commands [0] - Round Robin, [1] - Current CPU");
+ * lpfc_io_sched: Determine scheduling algrithmn for issuing FCP cmds
+ * range is [0,1]. Default value is 0.
+ * For [0], FCP commands are issued to Work Queues ina round robin fashion.
+ * For [1], FCP commands are issued to a Work Queue associated with the
+ *          current CPU.
+ *
+ * LPFC_FCP_SCHED_ROUND_ROBIN == 0
+ * LPFC_FCP_SCHED_BY_CPU == 1
+ *
+ * The driver dynamically sets this to 1 (BY_CPU) if it's able to set up cpu
+ * affinity for FCP/NVME I/Os through Work Queues associated with the current
+ * CPU. Otherwise, the default 0 (Round Robin) scheduling of FCP/NVME I/Os
+ * through WQs will be used.
+ */
+LPFC_ATTR_RW(fcp_io_sched, LPFC_FCP_SCHED_ROUND_ROBIN,
+	     LPFC_FCP_SCHED_ROUND_ROBIN,
+	     LPFC_FCP_SCHED_BY_CPU,
+	     "Determine scheduling algorithm for "
+	     "issuing commands [0] - Round Robin, [1] - Current CPU");
 
 /*
 # lpfc_fcp2_no_tgt_reset: Determine bus reset behavior
@@ -4560,14 +4754,53 @@ LPFC_ATTR_R(use_msi, 2, 0, 2, "Use Message Signaled Interrupts (1) or "
 	    "MSI-X (2), if possible");
 
 /*
-# lpfc_fcp_io_channel: Set the number of FCP EQ/CQ/WQ IO channels
-#
-# Value range is [1,7]. Default value is 4.
-*/
-LPFC_ATTR_R(fcp_io_channel, LPFC_FCP_IO_CHAN_DEF, LPFC_FCP_IO_CHAN_MIN,
-	    LPFC_FCP_IO_CHAN_MAX,
+ * lpfc_nvme_oas: Use the oas bit when sending NVME IOs
+ *
+ *      0  = NVME OAS disabled
+ *      1  = NVME OAS enabled
+ *
+ * Value range is [0,1]. Default value is 0.
+ */
+LPFC_ATTR_RW(nvme_oas, 0, 0, 1,
+	     "Use OAS bit on NVME IOs");
+
+/*
+ * lpfc_fcp_io_channel: Set the number of FCP IO channels the driver
+ * will advertise it supports to the SCSI layer. This also will map to
+ * the number of WQs the driver will create.
+ *
+ *      0    = Configure the number of io channels to the number of active CPUs.
+ *      1,32 = Manually specify how many io channels to use.
+ *
+ * Value range is [0,32]. Default value is 4.
+ */
+LPFC_ATTR_R(fcp_io_channel,
+	    LPFC_FCP_IO_CHAN_DEF,
+	    LPFC_HBA_IO_CHAN_MIN, LPFC_HBA_IO_CHAN_MAX,
 	    "Set the number of FCP I/O channels");
 
+/*
+ * lpfc_nvme_io_channel: Set the number of IO hardware queues the driver
+ * will advertise it supports to the NVME layer. This also will map to
+ * the number of WQs the driver will create.
+ *
+ * This module parameter is valid when lpfc_enable_fc4_type is set
+ * to support NVME.
+ *
+ * The NVME Layer will try to create this many, plus 1 administrative
+ * hardware queue. The administrative queue will always map to WQ 0
+ * A hardware IO queue maps (qidx) to a specific driver WQ.
+ *
+ *      0    = Configure the number of io channels to the number of active CPUs.
+ *      1,32 = Manually specify how many io channels to use.
+ *
+ * Value range is [0,32]. Default value is 0.
+ */
+LPFC_ATTR_R(nvme_io_channel,
+	    LPFC_NVME_IO_CHAN_DEF,
+	    LPFC_HBA_IO_CHAN_MIN, LPFC_HBA_IO_CHAN_MAX,
+	    "Set the number of NVME I/O channels");
+
 /*
 # lpfc_enable_hba_reset: Allow or prevent HBA resets to the hardware.
 #       0  = HBA resets disabled
@@ -4692,6 +4925,7 @@ LPFC_ATTR_R(sg_seg_cnt, LPFC_DEFAULT_SG_SEG_CNT, LPFC_DEFAULT_SG_SEG_CNT,
 LPFC_ATTR_R(enable_mds_diags, 0, 0, 1, "Enable MDS Diagnostics");
 
 struct device_attribute *lpfc_hba_attrs[] = {
+	&dev_attr_nvme_info,
 	&dev_attr_bg_info,
 	&dev_attr_bg_guard_err,
 	&dev_attr_bg_apptag_err,
@@ -4718,6 +4952,8 @@ struct device_attribute *lpfc_hba_attrs[] = {
 	&dev_attr_lpfc_peer_port_login,
 	&dev_attr_lpfc_nodev_tmo,
 	&dev_attr_lpfc_devloss_tmo,
+	&dev_attr_lpfc_enable_fc4_type,
+	&dev_attr_lpfc_xri_split,
 	&dev_attr_lpfc_fcp_class,
 	&dev_attr_lpfc_use_adisc,
 	&dev_attr_lpfc_first_burst_size,
@@ -4752,9 +4988,12 @@ struct device_attribute *lpfc_hba_attrs[] = {
 	&dev_attr_lpfc_poll_tmo,
 	&dev_attr_lpfc_task_mgmt_tmo,
 	&dev_attr_lpfc_use_msi,
+	&dev_attr_lpfc_nvme_oas,
 	&dev_attr_lpfc_fcp_imax,
 	&dev_attr_lpfc_fcp_cpu_map,
 	&dev_attr_lpfc_fcp_io_channel,
+	&dev_attr_lpfc_nvme_io_channel,
+	&dev_attr_lpfc_nvme_enable_fb,
 	&dev_attr_lpfc_enable_bg,
 	&dev_attr_lpfc_soft_wwnn,
 	&dev_attr_lpfc_soft_wwpn,
@@ -5764,9 +6003,9 @@ lpfc_get_cfgparam(struct lpfc_hba *phba)
 	lpfc_fdmi_on_init(phba, lpfc_fdmi_on);
 	lpfc_enable_SmartSAN_init(phba, lpfc_enable_SmartSAN);
 	lpfc_use_msi_init(phba, lpfc_use_msi);
+	lpfc_nvme_oas_init(phba, lpfc_nvme_oas);
 	lpfc_fcp_imax_init(phba, lpfc_fcp_imax);
 	lpfc_fcp_cpu_map_init(phba, lpfc_fcp_cpu_map);
-	lpfc_fcp_io_channel_init(phba, lpfc_fcp_io_channel);
 	lpfc_enable_hba_reset_init(phba, lpfc_enable_hba_reset);
 	lpfc_enable_hba_heartbeat_init(phba, lpfc_enable_hba_heartbeat);
 
@@ -5789,8 +6028,43 @@ lpfc_get_cfgparam(struct lpfc_hba *phba)
 	else
 		phba->cfg_poll = lpfc_poll;
 
+	lpfc_enable_fc4_type_init(phba, lpfc_enable_fc4_type);
+
+	/* Initialize first burst. Target vs Initiator are different. */
+	lpfc_nvme_enable_fb_init(phba, lpfc_nvme_enable_fb);
+	lpfc_fcp_io_channel_init(phba, lpfc_fcp_io_channel);
+	lpfc_nvme_io_channel_init(phba, lpfc_nvme_io_channel);
+
+	if (phba->sli_rev != LPFC_SLI_REV4) {
+		/* NVME only supported on SLI4 */
+		phba->nvmet_support = 0;
+		phba->cfg_enable_fc4_type = LPFC_ENABLE_FCP;
+	} else {
+		/* We MUST have FCP support */
+		if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP))
+			phba->cfg_enable_fc4_type |= LPFC_ENABLE_FCP;
+	}
+
+	/* A value of 0 means use the number of CPUs found in the system */
+	if (phba->cfg_nvme_io_channel == 0)
+		phba->cfg_nvme_io_channel = phba->sli4_hba.num_present_cpu;
+	if (phba->cfg_fcp_io_channel == 0)
+		phba->cfg_fcp_io_channel = phba->sli4_hba.num_present_cpu;
+
+	if (phba->cfg_enable_fc4_type == LPFC_ENABLE_FCP)
+		phba->cfg_nvme_io_channel = 0;
+
+	if (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME)
+		phba->cfg_fcp_io_channel = 0;
+
+	if (phba->cfg_fcp_io_channel > phba->cfg_nvme_io_channel)
+		phba->io_channel_irqs = phba->cfg_fcp_io_channel;
+	else
+		phba->io_channel_irqs = phba->cfg_nvme_io_channel;
+
 	phba->cfg_soft_wwnn = 0L;
 	phba->cfg_soft_wwpn = 0L;
+	lpfc_xri_split_init(phba, lpfc_xri_split);
 	lpfc_sg_seg_cnt_init(phba, lpfc_sg_seg_cnt);
 	lpfc_hba_queue_depth_init(phba, lpfc_hba_queue_depth);
 	lpfc_hba_log_verbose_init(phba, lpfc_log_verbose);
@@ -5806,6 +6080,26 @@ lpfc_get_cfgparam(struct lpfc_hba *phba)
 	return;
 }
 
+/**
+ * lpfc_nvme_mod_param_dep - Adjust module parameter value based on
+ * dependencies between protocols and roles.
+ * @phba: lpfc_hba pointer.
+ **/
+void
+lpfc_nvme_mod_param_dep(struct lpfc_hba *phba)
+{
+	phba->nvmet_support = 0;
+	if (phba->cfg_nvme_io_channel > phba->sli4_hba.num_present_cpu)
+		phba->cfg_nvme_io_channel = phba->sli4_hba.num_present_cpu;
+	if (phba->cfg_fcp_io_channel > phba->sli4_hba.num_present_cpu)
+		phba->cfg_fcp_io_channel = phba->sli4_hba.num_present_cpu;
+
+	if (phba->cfg_fcp_io_channel > phba->cfg_nvme_io_channel)
+		phba->io_channel_irqs = phba->cfg_fcp_io_channel;
+	else
+		phba->io_channel_irqs = phba->cfg_nvme_io_channel;
+}
+
 /**
  * lpfc_get_vport_cfgparam - Used during port create, init the vport structure
  * @vport: lpfc_vport pointer.
diff --git a/drivers/scsi/lpfc/lpfc_bsg.c b/drivers/scsi/lpfc/lpfc_bsg.c
index 1ee131f124b9b4..55a2270cadfcfb 100644
--- a/drivers/scsi/lpfc/lpfc_bsg.c
+++ b/drivers/scsi/lpfc/lpfc_bsg.c
@@ -1704,6 +1704,7 @@ lpfc_bsg_diag_mode_enter(struct lpfc_hba *phba)
 	struct lpfc_vport **vports;
 	struct Scsi_Host *shost;
 	struct lpfc_sli *psli;
+	struct lpfc_queue *qp = NULL;
 	struct lpfc_sli_ring *pring;
 	int i = 0;
 
@@ -1711,9 +1712,6 @@ lpfc_bsg_diag_mode_enter(struct lpfc_hba *phba)
 	if (!psli)
 		return -ENODEV;
 
-	pring = &psli->ring[LPFC_FCP_RING];
-	if (!pring)
-		return -ENODEV;
 
 	if ((phba->link_state == LPFC_HBA_ERROR) ||
 	    (psli->sli_flag & LPFC_BLOCK_MGMT_IO) ||
@@ -1732,10 +1730,18 @@ lpfc_bsg_diag_mode_enter(struct lpfc_hba *phba)
 		scsi_block_requests(shost);
 	}
 
-	while (!list_empty(&pring->txcmplq)) {
-		if (i++ > 500)  /* wait up to 5 seconds */
+	if (phba->sli_rev != LPFC_SLI_REV4) {
+		pring = &psli->sli3_ring[LPFC_FCP_RING];
+		lpfc_emptyq_wait(phba, &pring->txcmplq, &phba->hbalock);
+		return 0;
+	}
+	list_for_each_entry(qp, &phba->sli4_hba.lpfc_wq_list, wq_list) {
+		pring = qp->pring;
+		if (!pring || (pring->ringno != LPFC_FCP_RING))
+			continue;
+		if (!lpfc_emptyq_wait(phba, &pring->txcmplq,
+				      &pring->ring_lock))
 			break;
-		msleep(10);
 	}
 	return 0;
 }
@@ -2875,8 +2881,7 @@ diag_cmd_data_alloc(struct lpfc_hba *phba,
 static int lpfcdiag_loop_post_rxbufs(struct lpfc_hba *phba, uint16_t rxxri,
 			     size_t len)
 {
-	struct lpfc_sli *psli = &phba->sli;
-	struct lpfc_sli_ring *pring = &psli->ring[LPFC_ELS_RING];
+	struct lpfc_sli_ring *pring;
 	struct lpfc_iocbq *cmdiocbq;
 	IOCB_t *cmd = NULL;
 	struct list_head head, *curr, *next;
@@ -2890,6 +2895,8 @@ static int lpfcdiag_loop_post_rxbufs(struct lpfc_hba *phba, uint16_t rxxri,
 	int iocb_stat;
 	int i = 0;
 
+	pring = lpfc_phba_elsring(phba);
+
 	cmdiocbq = lpfc_sli_get_iocbq(phba);
 	rxbmp = kmalloc(sizeof(struct lpfc_dmabuf), GFP_KERNEL);
 	if (rxbmp != NULL) {
@@ -5403,13 +5410,15 @@ lpfc_bsg_timeout(struct bsg_job *job)
 	struct lpfc_vport *vport = shost_priv(fc_bsg_to_shost(job));
 	struct lpfc_hba *phba = vport->phba;
 	struct lpfc_iocbq *cmdiocb;
-	struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_ELS_RING];
+	struct lpfc_sli_ring *pring;
 	struct bsg_job_data *dd_data;
 	unsigned long flags;
 	int rc = 0;
 	LIST_HEAD(completions);
 	struct lpfc_iocbq *check_iocb, *next_iocb;
 
+	pring = lpfc_phba_elsring(phba);
+
 	/* if job's driver data is NULL, the command completed or is in the
 	 * the process of completing.  In this case, return status to request
 	 * so the timeout is retried.  This avoids double completion issues
diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h
index 309643a2c55c7d..2e6345ebd6c548 100644
--- a/drivers/scsi/lpfc/lpfc_crtn.h
+++ b/drivers/scsi/lpfc/lpfc_crtn.h
@@ -21,6 +21,7 @@
 typedef int (*node_filter)(struct lpfc_nodelist *, void *);
 
 struct fc_rport;
+struct fc_frame_header;
 void lpfc_down_link(struct lpfc_hba *, LPFC_MBOXQ_t *);
 void lpfc_sli_read_link_ste(struct lpfc_hba *);
 void lpfc_dump_mem(struct lpfc_hba *, LPFC_MBOXQ_t *, uint16_t, uint16_t);
@@ -167,6 +168,8 @@ void lpfc_hb_timeout_handler(struct lpfc_hba *);
 void lpfc_ct_unsol_event(struct lpfc_hba *, struct lpfc_sli_ring *,
 			 struct lpfc_iocbq *);
 int lpfc_ct_handle_unsol_abort(struct lpfc_hba *, struct hbq_dmabuf *);
+int lpfc_issue_gidft(struct lpfc_vport *vport);
+int lpfc_get_gidft_type(struct lpfc_vport *vport, struct lpfc_iocbq *iocbq);
 int lpfc_ns_cmd(struct lpfc_vport *, int, uint8_t, uint32_t);
 int lpfc_fdmi_cmd(struct lpfc_vport *, struct lpfc_nodelist *, int, uint32_t);
 void lpfc_fdmi_num_disc_check(struct lpfc_vport *);
@@ -186,6 +189,8 @@ void lpfc_unblock_mgmt_io(struct lpfc_hba *);
 void lpfc_offline_prep(struct lpfc_hba *, int);
 void lpfc_offline(struct lpfc_hba *);
 void lpfc_reset_hba(struct lpfc_hba *);
+int lpfc_emptyq_wait(struct lpfc_hba *phba, struct list_head *hd,
+			spinlock_t *slock);
 
 int lpfc_fof_queue_create(struct lpfc_hba *);
 int lpfc_fof_queue_setup(struct lpfc_hba *);
@@ -193,7 +198,11 @@ int lpfc_fof_queue_destroy(struct lpfc_hba *);
 irqreturn_t lpfc_sli4_fof_intr_handler(int, void *);
 
 int lpfc_sli_setup(struct lpfc_hba *);
-int lpfc_sli_queue_setup(struct lpfc_hba *);
+int lpfc_sli4_setup(struct lpfc_hba *phba);
+void lpfc_sli_queue_init(struct lpfc_hba *phba);
+void lpfc_sli4_queue_init(struct lpfc_hba *phba);
+struct lpfc_sli_ring *lpfc_sli4_calc_ring(struct lpfc_hba *phba,
+					  struct lpfc_iocbq *iocbq);
 
 void lpfc_handle_eratt(struct lpfc_hba *);
 void lpfc_handle_latt(struct lpfc_hba *);
@@ -233,6 +242,11 @@ struct hbq_dmabuf *lpfc_sli4_rb_alloc(struct lpfc_hba *);
 void lpfc_sli4_rb_free(struct lpfc_hba *, struct hbq_dmabuf *);
 void lpfc_sli4_build_dflt_fcf_record(struct lpfc_hba *, struct fcf_record *,
 			uint16_t);
+int lpfc_sli4_rq_put(struct lpfc_queue *hq, struct lpfc_queue *dq,
+		     struct lpfc_rqe *hrqe, struct lpfc_rqe *drqe);
+int lpfc_post_rq_buffer(struct lpfc_hba *phba, struct lpfc_queue *hq,
+			struct lpfc_queue *dq, int count);
+int lpfc_free_rq_buffer(struct lpfc_hba *phba, struct lpfc_queue *hq);
 void lpfc_unregister_fcf(struct lpfc_hba *);
 void lpfc_unregister_fcf_rescan(struct lpfc_hba *);
 void lpfc_unregister_unused_fcf(struct lpfc_hba *);
@@ -287,6 +301,9 @@ void lpfc_sli_def_mbox_cmpl(struct lpfc_hba *, LPFC_MBOXQ_t *);
 void lpfc_sli4_unreg_rpi_cmpl_clr(struct lpfc_hba *, LPFC_MBOXQ_t *);
 int lpfc_sli_issue_iocb(struct lpfc_hba *, uint32_t,
 			struct lpfc_iocbq *, uint32_t);
+int lpfc_sli4_issue_wqe(struct lpfc_hba *phba, uint32_t rnum,
+			struct lpfc_iocbq *iocbq);
+struct lpfc_sglq *__lpfc_clear_active_sglq(struct lpfc_hba *phba, uint16_t xri);
 void lpfc_sli_pcimem_bcopy(void *, void *, uint32_t);
 void lpfc_sli_bemem_bcopy(void *, void *, uint32_t);
 void lpfc_sli_abort_iocb_ring(struct lpfc_hba *, struct lpfc_sli_ring *);
@@ -356,6 +373,7 @@ extern struct device_attribute *lpfc_hba_attrs[];
 extern struct device_attribute *lpfc_vport_attrs[];
 extern struct scsi_host_template lpfc_template;
 extern struct scsi_host_template lpfc_template_s3;
+extern struct scsi_host_template lpfc_template_nvme;
 extern struct scsi_host_template lpfc_vport_template;
 extern struct fc_function_template lpfc_transport_functions;
 extern struct fc_function_template lpfc_vport_transport_functions;
@@ -471,7 +489,9 @@ int lpfc_issue_unreg_vfi(struct lpfc_vport *);
 int lpfc_selective_reset(struct lpfc_hba *);
 int lpfc_sli4_read_config(struct lpfc_hba *);
 void lpfc_sli4_node_prep(struct lpfc_hba *);
-int lpfc_sli4_xri_sgl_update(struct lpfc_hba *);
+int lpfc_sli4_els_sgl_update(struct lpfc_hba *phba);
+int lpfc_sli4_scsi_sgl_update(struct lpfc_hba *phba);
+int lpfc_sli4_nvme_sgl_update(struct lpfc_hba *phba);
 void lpfc_free_sgl_list(struct lpfc_hba *, struct list_head *);
 uint32_t lpfc_sli_port_speed_get(struct lpfc_hba *);
 int lpfc_sli4_request_firmware_update(struct lpfc_hba *, uint8_t);
@@ -496,3 +516,6 @@ bool lpfc_find_next_oas_lun(struct lpfc_hba *, struct lpfc_name *,
 			    uint32_t *, uint32_t *);
 int lpfc_sli4_dump_page_a0(struct lpfc_hba *phba, struct lpfcMboxq *mbox);
 void lpfc_mbx_cmpl_rdp_page_a0(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb);
+
+/* NVME interfaces. */
+void lpfc_nvme_mod_param_dep(struct lpfc_hba *phba);
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index 93ddda16c63277..d9bbebe8eb37df 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -484,20 +484,23 @@ lpfc_debugfs_dumpHostSlim_data(struct lpfc_hba *phba, char *buf, int size)
 		off += (8 * sizeof(uint32_t));
 	}
 
-	for (i = 0; i < 4; i++) {
-		pgpp = &phba->port_gp[i];
-		pring = &psli->ring[i];
-		len +=  snprintf(buf+len, size-len,
-				 "Ring %d: CMD GetInx:%d (Max:%d Next:%d "
-				 "Local:%d flg:x%x)  RSP PutInx:%d Max:%d\n",
-				 i, pgpp->cmdGetInx, pring->sli.sli3.numCiocb,
-				 pring->sli.sli3.next_cmdidx,
-				 pring->sli.sli3.local_getidx,
-				 pring->flag, pgpp->rspPutInx,
-				 pring->sli.sli3.numRiocb);
-	}
-
 	if (phba->sli_rev <= LPFC_SLI_REV3) {
+		for (i = 0; i < 4; i++) {
+			pgpp = &phba->port_gp[i];
+			pring = &psli->sli3_ring[i];
+			len +=  snprintf(buf+len, size-len,
+					 "Ring %d: CMD GetInx:%d "
+					 "(Max:%d Next:%d "
+					 "Local:%d flg:x%x)  "
+					 "RSP PutInx:%d Max:%d\n",
+					 i, pgpp->cmdGetInx,
+					 pring->sli.sli3.numCiocb,
+					 pring->sli.sli3.next_cmdidx,
+					 pring->sli.sli3.local_getidx,
+					 pring->flag, pgpp->rspPutInx,
+					 pring->sli.sli3.numRiocb);
+		}
+
 		word0 = readl(phba->HAregaddr);
 		word1 = readl(phba->CAregaddr);
 		word2 = readl(phba->HSregaddr);
@@ -535,6 +538,7 @@ lpfc_debugfs_nodelist_data(struct lpfc_vport *vport, char *buf, int size)
 
 	cnt = (LPFC_NODELIST_SIZE / LPFC_NODELIST_ENTRY_SIZE);
 
+	len += snprintf(buf+len, size-len, "\nFCP Nodelist Entries ...\n");
 	spin_lock_irq(shost->host_lock);
 	list_for_each_entry(ndlp, &vport->fc_nodes, nlp_listp) {
 		if (!cnt) {
@@ -2011,6 +2015,14 @@ lpfc_idiag_wqs_for_cq(struct lpfc_hba *phba, char *wqtype, char *pbuffer,
 		if (*len >= max_cnt)
 			return 1;
 	}
+	for (qidx = 0; qidx < phba->cfg_nvme_io_channel; qidx++) {
+		qp = phba->sli4_hba.nvme_wq[qidx];
+		if (qp->assoc_qid != cq_id)
+			continue;
+		*len = __lpfc_idiag_print_wq(qp, wqtype, pbuffer, *len);
+		if (*len >= max_cnt)
+			return 1;
+	}
 	return 0;
 }
 
@@ -2096,6 +2108,25 @@ lpfc_idiag_cqs_for_eq(struct lpfc_hba *phba, char *pbuffer,
 			return 1;
 	}
 
+	for (qidx = 0; qidx < phba->cfg_nvme_io_channel; qidx++) {
+		qp = phba->sli4_hba.nvme_cq[qidx];
+		if (qp->assoc_qid != eq_id)
+			continue;
+
+		*len = __lpfc_idiag_print_cq(qp, "NVME", pbuffer, *len);
+
+		/* Reset max counter */
+		qp->CQ_max_cqe = 0;
+
+		if (*len >= max_cnt)
+			return 1;
+
+		rc = lpfc_idiag_wqs_for_cq(phba, "NVME", pbuffer, len,
+				max_cnt, qp->queue_id);
+		if (rc)
+			return 1;
+	}
+
 	return 0;
 }
 
@@ -2162,21 +2193,21 @@ lpfc_idiag_queinfo_read(struct file *file, char __user *buf, size_t nbytes,
 	spin_lock_irq(&phba->hbalock);
 
 	/* Fast-path event queue */
-	if (phba->sli4_hba.hba_eq && phba->cfg_fcp_io_channel) {
+	if (phba->sli4_hba.hba_eq && phba->io_channel_irqs) {
 
 		x = phba->lpfc_idiag_last_eq;
-		if (phba->cfg_fof && (x >= phba->cfg_fcp_io_channel)) {
+		if (phba->cfg_fof && (x >= phba->io_channel_irqs)) {
 			phba->lpfc_idiag_last_eq = 0;
 			goto fof;
 		}
 		phba->lpfc_idiag_last_eq++;
-		if (phba->lpfc_idiag_last_eq >= phba->cfg_fcp_io_channel)
+		if (phba->lpfc_idiag_last_eq >= phba->io_channel_irqs)
 			if (phba->cfg_fof == 0)
 				phba->lpfc_idiag_last_eq = 0;
 
 		len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
 					"EQ %d out of %d HBA EQs\n",
-					x, phba->cfg_fcp_io_channel);
+					x, phba->io_channel_irqs);
 
 		/* Fast-path EQ */
 		qp = phba->sli4_hba.hba_eq[x];
@@ -2191,6 +2222,7 @@ lpfc_idiag_queinfo_read(struct file *file, char __user *buf, size_t nbytes,
 		if (len >= max_cnt)
 			goto too_big;
 
+		/* will dump both fcp and nvme cqs/wqs for the eq */
 		rc = lpfc_idiag_cqs_for_eq(phba, pbuffer, &len,
 			max_cnt, qp->queue_id);
 		if (rc)
@@ -2227,6 +2259,23 @@ lpfc_idiag_queinfo_read(struct file *file, char __user *buf, size_t nbytes,
 		if (len >= max_cnt)
 			goto too_big;
 
+		/* Slow-path NVME LS response CQ */
+		qp = phba->sli4_hba.nvmels_cq;
+		len = __lpfc_idiag_print_cq(qp, "NVME LS",
+						pbuffer, len);
+		/* Reset max counter */
+		if (qp)
+			qp->CQ_max_cqe = 0;
+		if (len >= max_cnt)
+			goto too_big;
+
+		/* Slow-path NVME LS WQ */
+		qp = phba->sli4_hba.nvmels_wq;
+		len = __lpfc_idiag_print_wq(qp, "NVME LS",
+						pbuffer, len);
+		if (len >= max_cnt)
+			goto too_big;
+
 		qp = phba->sli4_hba.hdr_rq;
 		len = __lpfc_idiag_print_rqpair(qp, phba->sli4_hba.dat_rq,
 				"RQpair", pbuffer, len);
@@ -2447,7 +2496,7 @@ lpfc_idiag_queacc_write(struct file *file, const char __user *buf,
 	struct lpfc_hba *phba = (struct lpfc_hba *)debug->i_private;
 	uint32_t qidx, quetp, queid, index, count, offset, value;
 	uint32_t *pentry;
-	struct lpfc_queue *pque;
+	struct lpfc_queue *pque, *qp;
 	int rc;
 
 	/* This is a user write operation */
@@ -2483,19 +2532,15 @@ lpfc_idiag_queacc_write(struct file *file, const char __user *buf,
 	case LPFC_IDIAG_EQ:
 		/* HBA event queue */
 		if (phba->sli4_hba.hba_eq) {
-			for (qidx = 0; qidx < phba->cfg_fcp_io_channel;
-				qidx++) {
-				if (phba->sli4_hba.hba_eq[qidx] &&
-				    phba->sli4_hba.hba_eq[qidx]->queue_id ==
-				    queid) {
+			for (qidx = 0; qidx < phba->io_channel_irqs; qidx++) {
+				qp = phba->sli4_hba.hba_eq[qidx];
+				if (qp && qp->queue_id == queid) {
 					/* Sanity check */
-					rc = lpfc_idiag_que_param_check(
-						phba->sli4_hba.hba_eq[qidx],
+					rc = lpfc_idiag_que_param_check(qp,
 						index, count);
 					if (rc)
 						goto error_out;
-					idiag.ptr_private =
-						phba->sli4_hba.hba_eq[qidx];
+					idiag.ptr_private = qp;
 					goto pass_check;
 				}
 			}
@@ -2525,24 +2570,32 @@ lpfc_idiag_queacc_write(struct file *file, const char __user *buf,
 			idiag.ptr_private = phba->sli4_hba.els_cq;
 			goto pass_check;
 		}
+		/* NVME LS complete queue */
+		if (phba->sli4_hba.nvmels_cq &&
+		    phba->sli4_hba.nvmels_cq->queue_id == queid) {
+			/* Sanity check */
+			rc = lpfc_idiag_que_param_check(
+					phba->sli4_hba.nvmels_cq, index, count);
+			if (rc)
+				goto error_out;
+			idiag.ptr_private = phba->sli4_hba.nvmels_cq;
+			goto pass_check;
+		}
 		/* FCP complete queue */
 		if (phba->sli4_hba.fcp_cq) {
-			qidx = 0;
-			do {
-				if (phba->sli4_hba.fcp_cq[qidx] &&
-				    phba->sli4_hba.fcp_cq[qidx]->queue_id ==
-				    queid) {
+			for (qidx = 0; qidx < phba->cfg_fcp_io_channel;
+								qidx++) {
+				qp = phba->sli4_hba.fcp_cq[qidx];
+				if (qp && qp->queue_id == queid) {
 					/* Sanity check */
 					rc = lpfc_idiag_que_param_check(
-						phba->sli4_hba.fcp_cq[qidx],
-						index, count);
+						qp, index, count);
 					if (rc)
 						goto error_out;
-					idiag.ptr_private =
-						phba->sli4_hba.fcp_cq[qidx];
+					idiag.ptr_private = qp;
 					goto pass_check;
 				}
-			} while (++qidx < phba->cfg_fcp_io_channel);
+			}
 		}
 		goto error_out;
 		break;
@@ -2572,22 +2625,45 @@ lpfc_idiag_queacc_write(struct file *file, const char __user *buf,
 			idiag.ptr_private = phba->sli4_hba.els_wq;
 			goto pass_check;
 		}
+		/* NVME LS work queue */
+		if (phba->sli4_hba.nvmels_wq &&
+		    phba->sli4_hba.nvmels_wq->queue_id == queid) {
+			/* Sanity check */
+			rc = lpfc_idiag_que_param_check(
+					phba->sli4_hba.nvmels_wq, index, count);
+			if (rc)
+				goto error_out;
+			idiag.ptr_private = phba->sli4_hba.nvmels_wq;
+			goto pass_check;
+		}
 		/* FCP work queue */
 		if (phba->sli4_hba.fcp_wq) {
 			for (qidx = 0; qidx < phba->cfg_fcp_io_channel;
-				qidx++) {
-				if (!phba->sli4_hba.fcp_wq[qidx])
-					continue;
-				if (phba->sli4_hba.fcp_wq[qidx]->queue_id ==
-				    queid) {
+								qidx++) {
+				qp = phba->sli4_hba.fcp_wq[qidx];
+				if (qp && qp->queue_id == queid) {
 					/* Sanity check */
 					rc = lpfc_idiag_que_param_check(
-						phba->sli4_hba.fcp_wq[qidx],
-						index, count);
+						qp, index, count);
+					if (rc)
+						goto error_out;
+					idiag.ptr_private = qp;
+					goto pass_check;
+				}
+			}
+		}
+		/* NVME work queue */
+		if (phba->sli4_hba.nvme_wq) {
+			for (qidx = 0; qidx < phba->cfg_nvme_io_channel;
+								qidx++) {
+				qp = phba->sli4_hba.nvme_wq[qidx];
+				if (qp && qp->queue_id == queid) {
+					/* Sanity check */
+					rc = lpfc_idiag_que_param_check(
+						qp, index, count);
 					if (rc)
 						goto error_out;
-					idiag.ptr_private =
-						phba->sli4_hba.fcp_wq[qidx];
+					idiag.ptr_private = qp;
 					goto pass_check;
 				}
 			}
@@ -4562,10 +4638,14 @@ lpfc_debug_dump_all_queues(struct lpfc_hba *phba)
 	 */
 	lpfc_debug_dump_wq(phba, DUMP_MBX, 0);
 	lpfc_debug_dump_wq(phba, DUMP_ELS, 0);
+	lpfc_debug_dump_wq(phba, DUMP_NVMELS, 0);
 
 	for (idx = 0; idx < phba->cfg_fcp_io_channel; idx++)
 		lpfc_debug_dump_wq(phba, DUMP_FCP, idx);
 
+	for (idx = 0; idx < phba->cfg_nvme_io_channel; idx++)
+		lpfc_debug_dump_wq(phba, DUMP_NVME, idx);
+
 	lpfc_debug_dump_hdr_rq(phba);
 	lpfc_debug_dump_dat_rq(phba);
 	/*
@@ -4573,13 +4653,17 @@ lpfc_debug_dump_all_queues(struct lpfc_hba *phba)
 	 */
 	lpfc_debug_dump_cq(phba, DUMP_MBX, 0);
 	lpfc_debug_dump_cq(phba, DUMP_ELS, 0);
+	lpfc_debug_dump_cq(phba, DUMP_NVMELS, 0);
 
 	for (idx = 0; idx < phba->cfg_fcp_io_channel; idx++)
 		lpfc_debug_dump_cq(phba, DUMP_FCP, idx);
 
+	for (idx = 0; idx < phba->cfg_nvme_io_channel; idx++)
+		lpfc_debug_dump_cq(phba, DUMP_NVME, idx);
+
 	/*
 	 * Dump Event Queues (EQs)
 	 */
-	for (idx = 0; idx < phba->cfg_fcp_io_channel; idx++)
+	for (idx = 0; idx < phba->io_channel_irqs; idx++)
 		lpfc_debug_dump_hba_eq(phba, idx);
 }
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.h b/drivers/scsi/lpfc/lpfc_debugfs.h
index 9ae2c4b5fd127e..98814d65123904 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.h
+++ b/drivers/scsi/lpfc/lpfc_debugfs.h
@@ -44,8 +44,10 @@
 
 enum {
 	DUMP_FCP,
+	DUMP_NVME,
 	DUMP_MBX,
 	DUMP_ELS,
+	DUMP_NVMELS,
 };
 
 /*
@@ -364,11 +366,11 @@ lpfc_debug_dump_q(struct lpfc_queue *q)
 }
 
 /**
- * lpfc_debug_dump_wq - dump all entries from the fcp work queue
+ * lpfc_debug_dump_wq - dump all entries from the fcp or nvme work queue
  * @phba: Pointer to HBA context object.
- * @wqidx: Index to a FCP work queue.
+ * @wqidx: Index to a FCP or NVME work queue.
  *
- * This function dumps all entries from a FCP work queue specified
+ * This function dumps all entries from a FCP or NVME work queue specified
  * by the wqidx.
  **/
 static inline void
@@ -380,16 +382,22 @@ lpfc_debug_dump_wq(struct lpfc_hba *phba, int qtype, int wqidx)
 	if (qtype == DUMP_FCP) {
 		wq = phba->sli4_hba.fcp_wq[wqidx];
 		qtypestr = "FCP";
+	} else if (qtype == DUMP_NVME) {
+		wq = phba->sli4_hba.nvme_wq[wqidx];
+		qtypestr = "NVME";
 	} else if (qtype == DUMP_MBX) {
 		wq = phba->sli4_hba.mbx_wq;
 		qtypestr = "MBX";
 	} else if (qtype == DUMP_ELS) {
 		wq = phba->sli4_hba.els_wq;
 		qtypestr = "ELS";
+	} else if (qtype == DUMP_NVMELS) {
+		wq = phba->sli4_hba.nvmels_wq;
+		qtypestr = "NVMELS";
 	} else
 		return;
 
-	if (qtype == DUMP_FCP)
+	if (qtype == DUMP_FCP || qtype == DUMP_NVME)
 		pr_err("%s WQ: WQ[Idx:%d|Qid:%d]\n",
 			qtypestr, wqidx, wq->queue_id);
 	else
@@ -400,12 +408,12 @@ lpfc_debug_dump_wq(struct lpfc_hba *phba, int qtype, int wqidx)
 }
 
 /**
- * lpfc_debug_dump_cq - dump all entries from a fcp work queue's
+ * lpfc_debug_dump_cq - dump all entries from a fcp or nvme work queue's
  * cmpl queue
  * @phba: Pointer to HBA context object.
  * @wqidx: Index to a FCP work queue.
  *
- * This function dumps all entries from a FCP completion queue
+ * This function dumps all entries from a FCP or NVME completion queue
  * which is associated to the work queue specified by the @wqidx.
  **/
 static inline void
@@ -415,12 +423,16 @@ lpfc_debug_dump_cq(struct lpfc_hba *phba, int qtype, int wqidx)
 	char *qtypestr;
 	int eqidx;
 
-	/* fcp wq and cq are 1:1, thus same indexes */
+	/* fcp/nvme wq and cq are 1:1, thus same indexes */
 
 	if (qtype == DUMP_FCP) {
 		wq = phba->sli4_hba.fcp_wq[wqidx];
 		cq = phba->sli4_hba.fcp_cq[wqidx];
 		qtypestr = "FCP";
+	} else if (qtype == DUMP_NVME) {
+		wq = phba->sli4_hba.nvme_wq[wqidx];
+		cq = phba->sli4_hba.nvme_cq[wqidx];
+		qtypestr = "NVME";
 	} else if (qtype == DUMP_MBX) {
 		wq = phba->sli4_hba.mbx_wq;
 		cq = phba->sli4_hba.mbx_cq;
@@ -429,21 +441,25 @@ lpfc_debug_dump_cq(struct lpfc_hba *phba, int qtype, int wqidx)
 		wq = phba->sli4_hba.els_wq;
 		cq = phba->sli4_hba.els_cq;
 		qtypestr = "ELS";
+	} else if (qtype == DUMP_NVMELS) {
+		wq = phba->sli4_hba.nvmels_wq;
+		cq = phba->sli4_hba.nvmels_cq;
+		qtypestr = "NVMELS";
 	} else
 		return;
 
-	for (eqidx = 0; eqidx < phba->cfg_fcp_io_channel; eqidx++) {
+	for (eqidx = 0; eqidx < phba->io_channel_irqs; eqidx++) {
 		eq = phba->sli4_hba.hba_eq[eqidx];
 		if (cq->assoc_qid == eq->queue_id)
 			break;
 	}
-	if (eqidx == phba->cfg_fcp_io_channel) {
+	if (eqidx == phba->io_channel_irqs) {
 		pr_err("Couldn't find EQ for CQ. Using EQ[0]\n");
 		eqidx = 0;
 		eq = phba->sli4_hba.hba_eq[0];
 	}
 
-	if (qtype == DUMP_FCP)
+	if (qtype == DUMP_FCP || qtype == DUMP_NVME)
 		pr_err("%s CQ: WQ[Idx:%d|Qid%d]->CQ[Idx%d|Qid%d]"
 			"->EQ[Idx:%d|Qid:%d]:\n",
 			qtypestr, wqidx, wq->queue_id, wqidx, cq->queue_id,
@@ -527,11 +543,25 @@ lpfc_debug_dump_wq_by_id(struct lpfc_hba *phba, int qid)
 		return;
 	}
 
+	for (wq_idx = 0; wq_idx < phba->cfg_nvme_io_channel; wq_idx++)
+		if (phba->sli4_hba.nvme_wq[wq_idx]->queue_id == qid)
+			break;
+	if (wq_idx < phba->cfg_nvme_io_channel) {
+		pr_err("NVME WQ[Idx:%d|Qid:%d]\n", wq_idx, qid);
+		lpfc_debug_dump_q(phba->sli4_hba.nvme_wq[wq_idx]);
+		return;
+	}
+
 	if (phba->sli4_hba.els_wq->queue_id == qid) {
 		pr_err("ELS WQ[Qid:%d]\n", qid);
 		lpfc_debug_dump_q(phba->sli4_hba.els_wq);
 		return;
 	}
+
+	if (phba->sli4_hba.nvmels_wq->queue_id == qid) {
+		pr_err("NVME LS WQ[Qid:%d]\n", qid);
+		lpfc_debug_dump_q(phba->sli4_hba.nvmels_wq);
+	}
 }
 
 /**
@@ -596,12 +626,28 @@ lpfc_debug_dump_cq_by_id(struct lpfc_hba *phba, int qid)
 		return;
 	}
 
+	for (cq_idx = 0; cq_idx < phba->cfg_nvme_io_channel; cq_idx++)
+		if (phba->sli4_hba.nvme_cq[cq_idx]->queue_id == qid)
+			break;
+
+	if (cq_idx < phba->cfg_nvme_io_channel) {
+		pr_err("NVME CQ[Idx:%d|Qid:%d]\n", cq_idx, qid);
+		lpfc_debug_dump_q(phba->sli4_hba.nvme_cq[cq_idx]);
+		return;
+	}
+
 	if (phba->sli4_hba.els_cq->queue_id == qid) {
 		pr_err("ELS CQ[Qid:%d]\n", qid);
 		lpfc_debug_dump_q(phba->sli4_hba.els_cq);
 		return;
 	}
 
+	if (phba->sli4_hba.nvmels_cq->queue_id == qid) {
+		pr_err("NVME LS CQ[Qid:%d]\n", qid);
+		lpfc_debug_dump_q(phba->sli4_hba.nvmels_cq);
+		return;
+	}
+
 	if (phba->sli4_hba.mbx_cq->queue_id == qid) {
 		pr_err("MBX CQ[Qid:%d]\n", qid);
 		lpfc_debug_dump_q(phba->sli4_hba.mbx_cq);
@@ -621,17 +667,15 @@ lpfc_debug_dump_eq_by_id(struct lpfc_hba *phba, int qid)
 {
 	int eq_idx;
 
-	for (eq_idx = 0; eq_idx < phba->cfg_fcp_io_channel; eq_idx++) {
+	for (eq_idx = 0; eq_idx < phba->io_channel_irqs; eq_idx++)
 		if (phba->sli4_hba.hba_eq[eq_idx]->queue_id == qid)
 			break;
-	}
 
-	if (eq_idx < phba->cfg_fcp_io_channel) {
+	if (eq_idx < phba->io_channel_irqs) {
 		printk(KERN_ERR "FCP EQ[Idx:%d|Qid:%d]\n", eq_idx, qid);
 		lpfc_debug_dump_q(phba->sli4_hba.hba_eq[eq_idx]);
 		return;
 	}
-
 }
 
 void lpfc_debug_dump_all_queues(struct lpfc_hba *);
diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index 3a1f1a2a2b559a..7be2354575405a 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -1323,7 +1323,7 @@ lpfc_els_abort_flogi(struct lpfc_hba *phba)
 			"0201 Abort outstanding I/O on NPort x%x\n",
 			Fabric_DID);
 
-	pring = &phba->sli.ring[LPFC_ELS_RING];
+	pring = lpfc_phba_elsring(phba);
 
 	/*
 	 * Check the txcmplq for an iocb that matches the nport the driver is
@@ -7155,7 +7155,8 @@ lpfc_els_timeout_handler(struct lpfc_vport *vport)
 
 	timeout = (uint32_t)(phba->fc_ratov << 1);
 
-	pring = &phba->sli.ring[LPFC_ELS_RING];
+	pring = lpfc_phba_elsring(phba);
+
 	if ((phba->pport->load_flag & FC_UNLOADING))
 		return;
 	spin_lock_irq(&phba->hbalock);
@@ -7224,7 +7225,7 @@ lpfc_els_timeout_handler(struct lpfc_vport *vport)
 		spin_unlock_irq(&phba->hbalock);
 	}
 
-	if (!list_empty(&phba->sli.ring[LPFC_ELS_RING].txcmplq))
+	if (!list_empty(&pring->txcmplq))
 		if (!(phba->pport->load_flag & FC_UNLOADING))
 			mod_timer(&vport->els_tmofunc,
 				  jiffies + msecs_to_jiffies(1000 * timeout));
@@ -7255,7 +7256,7 @@ lpfc_els_flush_cmd(struct lpfc_vport *vport)
 {
 	LIST_HEAD(abort_list);
 	struct lpfc_hba  *phba = vport->phba;
-	struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_ELS_RING];
+	struct lpfc_sli_ring *pring;
 	struct lpfc_iocbq *tmp_iocb, *piocb;
 	IOCB_t *cmd = NULL;
 
@@ -7267,6 +7268,7 @@ lpfc_els_flush_cmd(struct lpfc_vport *vport)
 	 * a working list and release the locks before calling the abort.
 	 */
 	spin_lock_irq(&phba->hbalock);
+	pring = lpfc_phba_elsring(phba);
 	if (phba->sli_rev == LPFC_SLI_REV4)
 		spin_lock(&pring->ring_lock);
 
@@ -9013,7 +9015,9 @@ void lpfc_fabric_abort_nport(struct lpfc_nodelist *ndlp)
 	LIST_HEAD(completions);
 	struct lpfc_hba  *phba = ndlp->phba;
 	struct lpfc_iocbq *tmp_iocb, *piocb;
-	struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_ELS_RING];
+	struct lpfc_sli_ring *pring;
+
+	pring = lpfc_phba_elsring(phba);
 
 	spin_lock_irq(&phba->hbalock);
 	list_for_each_entry_safe(piocb, tmp_iocb, &phba->fabric_iocb_list,
@@ -9069,13 +9073,13 @@ lpfc_sli4_vport_delete_els_xri_aborted(struct lpfc_vport *vport)
 	unsigned long iflag = 0;
 
 	spin_lock_irqsave(&phba->hbalock, iflag);
-	spin_lock(&phba->sli4_hba.abts_sgl_list_lock);
+	spin_lock(&phba->sli4_hba.sgl_list_lock);
 	list_for_each_entry_safe(sglq_entry, sglq_next,
 			&phba->sli4_hba.lpfc_abts_els_sgl_list, list) {
 		if (sglq_entry->ndlp && sglq_entry->ndlp->vport == vport)
 			sglq_entry->ndlp = NULL;
 	}
-	spin_unlock(&phba->sli4_hba.abts_sgl_list_lock);
+	spin_unlock(&phba->sli4_hba.sgl_list_lock);
 	spin_unlock_irqrestore(&phba->hbalock, iflag);
 	return;
 }
@@ -9099,22 +9103,22 @@ lpfc_sli4_els_xri_aborted(struct lpfc_hba *phba,
 	struct lpfc_sglq *sglq_entry = NULL, *sglq_next = NULL;
 	unsigned long iflag = 0;
 	struct lpfc_nodelist *ndlp;
-	struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_ELS_RING];
+	struct lpfc_sli_ring *pring;
+
+	pring = lpfc_phba_elsring(phba);
 
 	spin_lock_irqsave(&phba->hbalock, iflag);
-	spin_lock(&phba->sli4_hba.abts_sgl_list_lock);
+	spin_lock(&phba->sli4_hba.sgl_list_lock);
 	list_for_each_entry_safe(sglq_entry, sglq_next,
 			&phba->sli4_hba.lpfc_abts_els_sgl_list, list) {
 		if (sglq_entry->sli4_xritag == xri) {
 			list_del(&sglq_entry->list);
 			ndlp = sglq_entry->ndlp;
 			sglq_entry->ndlp = NULL;
-			spin_lock(&pring->ring_lock);
 			list_add_tail(&sglq_entry->list,
-				&phba->sli4_hba.lpfc_sgl_list);
+				&phba->sli4_hba.lpfc_els_sgl_list);
 			sglq_entry->state = SGL_FREED;
-			spin_unlock(&pring->ring_lock);
-			spin_unlock(&phba->sli4_hba.abts_sgl_list_lock);
+			spin_unlock(&phba->sli4_hba.sgl_list_lock);
 			spin_unlock_irqrestore(&phba->hbalock, iflag);
 			lpfc_set_rrq_active(phba, ndlp,
 				sglq_entry->sli4_lxritag,
@@ -9126,21 +9130,21 @@ lpfc_sli4_els_xri_aborted(struct lpfc_hba *phba,
 			return;
 		}
 	}
-	spin_unlock(&phba->sli4_hba.abts_sgl_list_lock);
+	spin_unlock(&phba->sli4_hba.sgl_list_lock);
 	lxri = lpfc_sli4_xri_inrange(phba, xri);
 	if (lxri == NO_XRI) {
 		spin_unlock_irqrestore(&phba->hbalock, iflag);
 		return;
 	}
-	spin_lock(&pring->ring_lock);
+	spin_lock(&phba->sli4_hba.sgl_list_lock);
 	sglq_entry = __lpfc_get_active_sglq(phba, lxri);
 	if (!sglq_entry || (sglq_entry->sli4_xritag != xri)) {
-		spin_unlock(&pring->ring_lock);
+		spin_unlock(&phba->sli4_hba.sgl_list_lock);
 		spin_unlock_irqrestore(&phba->hbalock, iflag);
 		return;
 	}
 	sglq_entry->state = SGL_XRI_ABORTED;
-	spin_unlock(&pring->ring_lock);
+	spin_unlock(&phba->sli4_hba.sgl_list_lock);
 	spin_unlock_irqrestore(&phba->hbalock, iflag);
 	return;
 }
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index 92627df35a58af..ede831d1f46761 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -93,7 +93,7 @@ lpfc_terminate_rport_io(struct fc_rport *rport)
 
 	if (ndlp->nlp_sid != NLP_NO_SID) {
 		lpfc_sli_abort_iocb(ndlp->vport,
-			&phba->sli.ring[phba->sli.fcp_ring],
+			&phba->sli.sli3_ring[LPFC_FCP_RING],
 			ndlp->nlp_sid, 0, LPFC_CTX_TGT);
 	}
 }
@@ -247,8 +247,8 @@ lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *ndlp)
 		if (ndlp->nlp_sid != NLP_NO_SID) {
 			/* flush the target */
 			lpfc_sli_abort_iocb(vport,
-					&phba->sli.ring[phba->sli.fcp_ring],
-					ndlp->nlp_sid, 0, LPFC_CTX_TGT);
+					    &phba->sli.sli3_ring[LPFC_FCP_RING],
+					    ndlp->nlp_sid, 0, LPFC_CTX_TGT);
 		}
 		put_node = rdata->pnode != NULL;
 		rdata->pnode = NULL;
@@ -283,7 +283,7 @@ lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *ndlp)
 
 	if (ndlp->nlp_sid != NLP_NO_SID) {
 		warn_on = 1;
-		lpfc_sli_abort_iocb(vport, &phba->sli.ring[phba->sli.fcp_ring],
+		lpfc_sli_abort_iocb(vport, &phba->sli.sli3_ring[LPFC_FCP_RING],
 				    ndlp->nlp_sid, 0, LPFC_CTX_TGT);
 	}
 
@@ -495,11 +495,12 @@ lpfc_send_fastpath_evt(struct lpfc_hba *phba,
 		return;
 	}
 
-	fc_host_post_vendor_event(shost,
-		fc_get_event_number(),
-		evt_data_size,
-		evt_data,
-		LPFC_NL_VENDOR_ID);
+	if (phba->cfg_enable_fc4_type != LPFC_ENABLE_NVME)
+		fc_host_post_vendor_event(shost,
+			fc_get_event_number(),
+			evt_data_size,
+			evt_data,
+			LPFC_NL_VENDOR_ID);
 
 	lpfc_free_fast_evt(phba, fast_evt_data);
 	return;
@@ -682,7 +683,7 @@ lpfc_work_done(struct lpfc_hba *phba)
 		}
 	lpfc_destroy_vport_work_array(phba, vports);
 
-	pring = &phba->sli.ring[LPFC_ELS_RING];
+	pring = lpfc_phba_elsring(phba);
 	status = (ha_copy & (HA_RXMASK  << (4*LPFC_ELS_RING)));
 	status >>= (4*LPFC_ELS_RING);
 	if ((status & HA_RXMASK) ||
@@ -894,11 +895,16 @@ lpfc_linkdown(struct lpfc_hba *phba)
 		spin_unlock_irq(shost->host_lock);
 	}
 	vports = lpfc_create_vport_work_array(phba);
-	if (vports != NULL)
+	if (vports != NULL) {
 		for (i = 0; i <= phba->max_vports && vports[i] != NULL; i++) {
 			/* Issue a LINK DOWN event to all nodes */
 			lpfc_linkdown_port(vports[i]);
+
+			vports[i]->fc_myDID = 0;
+
+			/* todo: init: revise localport nvme attributes */
 		}
+	}
 	lpfc_destroy_vport_work_array(phba, vports);
 	/* Clean up any firmware default rpi's */
 	mb = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
@@ -914,7 +920,6 @@ lpfc_linkdown(struct lpfc_hba *phba)
 
 	/* Setup myDID for link up if we are in pt2pt mode */
 	if (phba->pport->fc_flag & FC_PT2PT) {
-		phba->pport->fc_myDID = 0;
 		mb = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
 		if (mb) {
 			lpfc_config_link(phba, mb);
@@ -929,7 +934,6 @@ lpfc_linkdown(struct lpfc_hba *phba)
 		phba->pport->fc_flag &= ~(FC_PT2PT | FC_PT2PT_PLOGI);
 		spin_unlock_irq(shost->host_lock);
 	}
-
 	return 0;
 }
 
@@ -1016,7 +1020,7 @@ lpfc_linkup(struct lpfc_hba *phba)
  * This routine handles processing a CLEAR_LA mailbox
  * command upon completion. It is setup in the LPFC_MBOXQ
  * as the completion routine when the command is
- * handed off to the SLI layer.
+ * handed off to the SLI layer. SLI3 only.
  */
 static void
 lpfc_mbx_cmpl_clear_la(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
@@ -1028,9 +1032,8 @@ lpfc_mbx_cmpl_clear_la(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 	uint32_t control;
 
 	/* Since we don't do discovery right now, turn these off here */
-	psli->ring[psli->extra_ring].flag &= ~LPFC_STOP_IOCB_EVENT;
-	psli->ring[psli->fcp_ring].flag &= ~LPFC_STOP_IOCB_EVENT;
-	psli->ring[psli->next_ring].flag &= ~LPFC_STOP_IOCB_EVENT;
+	psli->sli3_ring[LPFC_EXTRA_RING].flag &= ~LPFC_STOP_IOCB_EVENT;
+	psli->sli3_ring[LPFC_FCP_RING].flag &= ~LPFC_STOP_IOCB_EVENT;
 
 	/* Check for error */
 	if ((mb->mbxStatus) && (mb->mbxStatus != 0x1601)) {
@@ -3277,7 +3280,7 @@ lpfc_mbx_issue_link_down(struct lpfc_hba *phba)
  * This routine handles processing a READ_TOPOLOGY mailbox
  * command upon completion. It is setup in the LPFC_MBOXQ
  * as the completion routine when the command is
- * handed off to the SLI layer.
+ * handed off to the SLI layer. SLI4 only.
  */
 void
 lpfc_mbx_cmpl_read_topology(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
@@ -3285,11 +3288,14 @@ lpfc_mbx_cmpl_read_topology(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 	struct lpfc_vport *vport = pmb->vport;
 	struct Scsi_Host  *shost = lpfc_shost_from_vport(vport);
 	struct lpfc_mbx_read_top *la;
+	struct lpfc_sli_ring *pring;
 	MAILBOX_t *mb = &pmb->u.mb;
 	struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) (pmb->context1);
 
 	/* Unblock ELS traffic */
-	phba->sli.ring[LPFC_ELS_RING].flag &= ~LPFC_STOP_IOCB_EVENT;
+	pring = lpfc_phba_elsring(phba);
+	pring->flag &= ~LPFC_STOP_IOCB_EVENT;
+
 	/* Check for error */
 	if (mb->mbxStatus) {
 		lpfc_printf_log(phba, KERN_INFO, LOG_LINK_EVENT,
@@ -3458,6 +3464,14 @@ lpfc_mbx_cmpl_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 		spin_lock_irq(shost->host_lock);
 		ndlp->nlp_flag &= ~NLP_IGNR_REG_CMPL;
 		spin_unlock_irq(shost->host_lock);
+
+		/*
+		 * We cannot leave the RPI registered because
+		 * if we go thru discovery again for this ndlp
+		 * a subsequent REG_RPI will fail.
+		 */
+		ndlp->nlp_flag |= NLP_RPI_REGISTERED;
+		lpfc_unreg_rpi(vport, ndlp);
 	}
 
 	/* Call state machine */
@@ -3903,6 +3917,9 @@ lpfc_register_remote_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 	struct fc_rport_identifiers rport_ids;
 	struct lpfc_hba  *phba = vport->phba;
 
+	if (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME)
+		return;
+
 	/* Remote port has reappeared. Re-register w/ FC transport */
 	rport_ids.node_name = wwn_to_u64(ndlp->nlp_nodename.u.wwn);
 	rport_ids.port_name = wwn_to_u64(ndlp->nlp_portname.u.wwn);
@@ -4395,7 +4412,6 @@ lpfc_check_sli_ndlp(struct lpfc_hba *phba,
 		    struct lpfc_iocbq *iocb,
 		    struct lpfc_nodelist *ndlp)
 {
-	struct lpfc_sli *psli = &phba->sli;
 	IOCB_t *icmd = &iocb->iocb;
 	struct lpfc_vport    *vport = ndlp->vport;
 
@@ -4414,9 +4430,7 @@ lpfc_check_sli_ndlp(struct lpfc_hba *phba,
 			if (iocb->context1 == (uint8_t *) ndlp)
 				return 1;
 		}
-	} else if (pring->ringno == psli->extra_ring) {
-
-	} else if (pring->ringno == psli->fcp_ring) {
+	} else if (pring->ringno == LPFC_FCP_RING) {
 		/* Skip match check if waiting to relogin to FCP target */
 		if ((ndlp->nlp_type & NLP_FCP_TARGET) &&
 		    (ndlp->nlp_flag & NLP_DELAY_TMO)) {
@@ -4429,6 +4443,54 @@ lpfc_check_sli_ndlp(struct lpfc_hba *phba,
 	return 0;
 }
 
+static void
+__lpfc_dequeue_nport_iocbs(struct lpfc_hba *phba,
+		struct lpfc_nodelist *ndlp, struct lpfc_sli_ring *pring,
+		struct list_head *dequeue_list)
+{
+	struct lpfc_iocbq *iocb, *next_iocb;
+
+	list_for_each_entry_safe(iocb, next_iocb, &pring->txq, list) {
+		/* Check to see if iocb matches the nport */
+		if (lpfc_check_sli_ndlp(phba, pring, iocb, ndlp))
+			/* match, dequeue */
+			list_move_tail(&iocb->list, dequeue_list);
+	}
+}
+
+static void
+lpfc_sli3_dequeue_nport_iocbs(struct lpfc_hba *phba,
+		struct lpfc_nodelist *ndlp, struct list_head *dequeue_list)
+{
+	struct lpfc_sli *psli = &phba->sli;
+	uint32_t i;
+
+	spin_lock_irq(&phba->hbalock);
+	for (i = 0; i < psli->num_rings; i++)
+		__lpfc_dequeue_nport_iocbs(phba, ndlp, &psli->sli3_ring[i],
+						dequeue_list);
+	spin_unlock_irq(&phba->hbalock);
+}
+
+static void
+lpfc_sli4_dequeue_nport_iocbs(struct lpfc_hba *phba,
+		struct lpfc_nodelist *ndlp, struct list_head *dequeue_list)
+{
+	struct lpfc_sli_ring *pring;
+	struct lpfc_queue *qp = NULL;
+
+	spin_lock_irq(&phba->hbalock);
+	list_for_each_entry(qp, &phba->sli4_hba.lpfc_wq_list, wq_list) {
+		pring = qp->pring;
+		if (!pring)
+			continue;
+		spin_lock_irq(&pring->ring_lock);
+		__lpfc_dequeue_nport_iocbs(phba, ndlp, pring, dequeue_list);
+		spin_unlock_irq(&pring->ring_lock);
+	}
+	spin_unlock_irq(&phba->hbalock);
+}
+
 /*
  * Free resources / clean up outstanding I/Os
  * associated with nlp_rpi in the LPFC_NODELIST entry.
@@ -4437,10 +4499,6 @@ static int
 lpfc_no_rpi(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp)
 {
 	LIST_HEAD(completions);
-	struct lpfc_sli *psli;
-	struct lpfc_sli_ring *pring;
-	struct lpfc_iocbq *iocb, *next_iocb;
-	uint32_t i;
 
 	lpfc_fabric_abort_nport(ndlp);
 
@@ -4448,29 +4506,11 @@ lpfc_no_rpi(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp)
 	 * Everything that matches on txcmplq will be returned
 	 * by firmware with a no rpi error.
 	 */
-	psli = &phba->sli;
 	if (ndlp->nlp_flag & NLP_RPI_REGISTERED) {
-		/* Now process each ring */
-		for (i = 0; i < psli->num_rings; i++) {
-			pring = &psli->ring[i];
-
-			spin_lock_irq(&phba->hbalock);
-			list_for_each_entry_safe(iocb, next_iocb, &pring->txq,
-						 list) {
-				/*
-				 * Check to see if iocb matches the nport we are
-				 * looking for
-				 */
-				if ((lpfc_check_sli_ndlp(phba, pring, iocb,
-							 ndlp))) {
-					/* It matches, so deque and call compl
-					   with an error */
-					list_move_tail(&iocb->list,
-						       &completions);
-				}
-			}
-			spin_unlock_irq(&phba->hbalock);
-		}
+		if (phba->sli_rev != LPFC_SLI_REV4)
+			lpfc_sli3_dequeue_nport_iocbs(phba, ndlp, &completions);
+		else
+			lpfc_sli4_dequeue_nport_iocbs(phba, ndlp, &completions);
 	}
 
 	/* Cancel all the IOCBs from the completions list */
@@ -5039,14 +5079,14 @@ lpfc_disc_list_loopmap(struct lpfc_vport *vport)
 	return;
 }
 
+/* SLI3 only */
 void
 lpfc_issue_clear_la(struct lpfc_hba *phba, struct lpfc_vport *vport)
 {
 	LPFC_MBOXQ_t *mbox;
 	struct lpfc_sli *psli = &phba->sli;
-	struct lpfc_sli_ring *extra_ring = &psli->ring[psli->extra_ring];
-	struct lpfc_sli_ring *fcp_ring   = &psli->ring[psli->fcp_ring];
-	struct lpfc_sli_ring *next_ring  = &psli->ring[psli->next_ring];
+	struct lpfc_sli_ring *extra_ring = &psli->sli3_ring[LPFC_EXTRA_RING];
+	struct lpfc_sli_ring *fcp_ring   = &psli->sli3_ring[LPFC_FCP_RING];
 	int  rc;
 
 	/*
@@ -5070,7 +5110,6 @@ lpfc_issue_clear_la(struct lpfc_hba *phba, struct lpfc_vport *vport)
 			lpfc_disc_flush_list(vport);
 			extra_ring->flag &= ~LPFC_STOP_IOCB_EVENT;
 			fcp_ring->flag &= ~LPFC_STOP_IOCB_EVENT;
-			next_ring->flag &= ~LPFC_STOP_IOCB_EVENT;
 			phba->link_state = LPFC_HBA_ERROR;
 		}
 	}
@@ -5206,7 +5245,7 @@ lpfc_free_tx(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp)
 	struct lpfc_sli_ring *pring;
 
 	psli = &phba->sli;
-	pring = &psli->ring[LPFC_ELS_RING];
+	pring = lpfc_phba_elsring(phba);
 
 	/* Error matching iocb on txq or txcmplq
 	 * First check the txq.
@@ -5522,12 +5561,14 @@ lpfc_disc_timeout_handler(struct lpfc_vport *vport)
 
 	if (clrlaerr) {
 		lpfc_disc_flush_list(vport);
-		psli->ring[(psli->extra_ring)].flag &= ~LPFC_STOP_IOCB_EVENT;
-		psli->ring[(psli->fcp_ring)].flag &= ~LPFC_STOP_IOCB_EVENT;
-		psli->ring[(psli->next_ring)].flag &= ~LPFC_STOP_IOCB_EVENT;
+		if (phba->sli_rev != LPFC_SLI_REV4) {
+			psli->sli3_ring[(LPFC_EXTRA_RING)].flag &=
+				~LPFC_STOP_IOCB_EVENT;
+			psli->sli3_ring[LPFC_FCP_RING].flag &=
+				~LPFC_STOP_IOCB_EVENT;
+		}
 		vport->port_state = LPFC_VPORT_READY;
 	}
-
 	return;
 }
 
diff --git a/drivers/scsi/lpfc/lpfc_hw.h b/drivers/scsi/lpfc/lpfc_hw.h
index 3b970d3706008c..28247c99b4f220 100644
--- a/drivers/scsi/lpfc/lpfc_hw.h
+++ b/drivers/scsi/lpfc/lpfc_hw.h
@@ -44,8 +44,6 @@
 #define LPFC_FCP_RING            0	/* ring 0 for FCP initiator commands */
 #define LPFC_EXTRA_RING          1	/* ring 1 for other protocols */
 #define LPFC_ELS_RING            2	/* ring 2 for ELS commands */
-#define LPFC_FCP_NEXT_RING       3
-#define LPFC_FCP_OAS_RING        3
 
 #define SLI2_IOCB_CMD_R0_ENTRIES    172	/* SLI-2 FCP command ring entries */
 #define SLI2_IOCB_RSP_R0_ENTRIES    134	/* SLI-2 FCP response ring entries */
@@ -1791,6 +1789,7 @@ typedef struct {		/* FireFly BIU registers */
 #define MBX_INIT_VFI        0xA3
 #define MBX_INIT_VPI        0xA4
 #define MBX_ACCESS_VDATA    0xA5
+#define MBX_REG_FCFI_MRQ    0xAF
 
 #define MBX_AUTH_PORT       0xF8
 #define MBX_SECURITY_MGMT   0xF9
diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h
index 964a1fdb076bba..c3277c5312c94c 100644
--- a/drivers/scsi/lpfc/lpfc_hw4.h
+++ b/drivers/scsi/lpfc/lpfc_hw4.h
@@ -108,6 +108,7 @@ struct lpfc_sli_intf {
 #define LPFC_MAX_MQ_PAGE		8
 #define LPFC_MAX_WQ_PAGE_V0		4
 #define LPFC_MAX_WQ_PAGE		8
+#define LPFC_MAX_RQ_PAGE		8
 #define LPFC_MAX_CQ_PAGE		4
 #define LPFC_MAX_EQ_PAGE		8
 
@@ -198,7 +199,7 @@ struct lpfc_sli_intf {
 /* Configuration of Interrupts / sec for entire HBA port */
 #define LPFC_MIN_IMAX          5000
 #define LPFC_MAX_IMAX          5000000
-#define LPFC_DEF_IMAX          50000
+#define LPFC_DEF_IMAX          150000
 
 #define LPFC_MIN_CPU_MAP       0
 #define LPFC_MAX_CPU_MAP       2
@@ -348,6 +349,7 @@ struct lpfc_cqe {
 #define CQE_CODE_RECEIVE		0x4
 #define CQE_CODE_XRI_ABORTED		0x5
 #define CQE_CODE_RECEIVE_V1		0x9
+#define CQE_CODE_NVME_ERSP		0xd
 
 /*
  * Define mask value for xri_aborted and wcqe completed CQE extended status.
@@ -367,6 +369,9 @@ struct lpfc_wcqe_complete {
 #define lpfc_wcqe_c_hw_status_SHIFT	0
 #define lpfc_wcqe_c_hw_status_MASK	0x000000FF
 #define lpfc_wcqe_c_hw_status_WORD	word0
+#define lpfc_wcqe_c_ersp0_SHIFT		0
+#define lpfc_wcqe_c_ersp0_MASK		0x0000FFFF
+#define lpfc_wcqe_c_ersp0_WORD		word0
 	uint32_t total_data_placed;
 	uint32_t parameter;
 #define lpfc_wcqe_c_bg_edir_SHIFT	5
@@ -400,6 +405,9 @@ struct lpfc_wcqe_complete {
 #define lpfc_wcqe_c_code_SHIFT		lpfc_cqe_code_SHIFT
 #define lpfc_wcqe_c_code_MASK		lpfc_cqe_code_MASK
 #define lpfc_wcqe_c_code_WORD		lpfc_cqe_code_WORD
+#define lpfc_wcqe_c_sqhead_SHIFT	0
+#define lpfc_wcqe_c_sqhead_MASK		0x0000FFFF
+#define lpfc_wcqe_c_sqhead_WORD		word3
 };
 
 /* completion queue entry for wqe release */
@@ -2841,12 +2849,18 @@ struct lpfc_sli4_parameters {
 #define cfg_mqv_WORD				word6
 	uint32_t word7;
 	uint32_t word8;
+#define cfg_wqpcnt_SHIFT			0
+#define cfg_wqpcnt_MASK				0x0000000f
+#define cfg_wqpcnt_WORD				word8
 #define cfg_wqsize_SHIFT			8
 #define cfg_wqsize_MASK				0x0000000f
 #define cfg_wqsize_WORD				word8
 #define cfg_wqv_SHIFT				14
 #define cfg_wqv_MASK				0x00000003
 #define cfg_wqv_WORD				word8
+#define cfg_wqpsize_SHIFT			16
+#define cfg_wqpsize_MASK			0x000000ff
+#define cfg_wqpsize_WORD			word8
 	uint32_t word9;
 	uint32_t word10;
 #define cfg_rqv_SHIFT				14
@@ -2897,6 +2911,12 @@ struct lpfc_sli4_parameters {
 #define cfg_mds_diags_SHIFT			1
 #define cfg_mds_diags_MASK			0x00000001
 #define cfg_mds_diags_WORD			word19
+#define cfg_nvme_SHIFT				3
+#define cfg_nvme_MASK				0x00000001
+#define cfg_nvme_WORD				word19
+#define cfg_xib_SHIFT				4
+#define cfg_xib_MASK				0x00000001
+#define cfg_xib_WORD				word19
 };
 
 #define LPFC_SET_UE_RECOVERY		0x10
@@ -3659,6 +3679,9 @@ struct wqe_common {
 #define wqe_ebde_cnt_SHIFT    0
 #define wqe_ebde_cnt_MASK     0x0000000f
 #define wqe_ebde_cnt_WORD     word10
+#define wqe_nvme_SHIFT        4
+#define wqe_nvme_MASK         0x00000001
+#define wqe_nvme_WORD         word10
 #define wqe_oas_SHIFT         6
 #define wqe_oas_MASK          0x00000001
 #define wqe_oas_WORD          word10
@@ -4017,11 +4040,39 @@ struct lpfc_grp_hdr {
 	uint8_t revision[32];
 };
 
-#define FCP_COMMAND 0x0
-#define FCP_COMMAND_DATA_OUT 0x1
-#define ELS_COMMAND_NON_FIP 0xC
-#define ELS_COMMAND_FIP 0xD
-#define OTHER_COMMAND 0x8
+/* Defines for WQE command type */
+#define FCP_COMMAND		0x0
+#define NVME_READ_CMD		0x0
+#define FCP_COMMAND_DATA_OUT	0x1
+#define NVME_WRITE_CMD		0x1
+#define FCP_COMMAND_TRECEIVE	0x2
+#define FCP_COMMAND_TRSP	0x3
+#define FCP_COMMAND_TSEND	0x7
+#define OTHER_COMMAND		0x8
+#define ELS_COMMAND_NON_FIP	0xC
+#define ELS_COMMAND_FIP		0xD
+
+#define LPFC_NVME_EMBED_CMD	0x0
+#define LPFC_NVME_EMBED_WRITE	0x1
+#define LPFC_NVME_EMBED_READ	0x2
+
+/* WQE Commands */
+#define CMD_ABORT_XRI_WQE       0x0F
+#define CMD_XMIT_SEQUENCE64_WQE 0x82
+#define CMD_XMIT_BCAST64_WQE    0x84
+#define CMD_ELS_REQUEST64_WQE   0x8A
+#define CMD_XMIT_ELS_RSP64_WQE  0x95
+#define CMD_XMIT_BLS_RSP64_WQE  0x97
+#define CMD_FCP_IWRITE64_WQE    0x98
+#define CMD_FCP_IREAD64_WQE     0x9A
+#define CMD_FCP_ICMND64_WQE     0x9C
+#define CMD_FCP_TSEND64_WQE     0x9F
+#define CMD_FCP_TRECEIVE64_WQE  0xA1
+#define CMD_FCP_TRSP64_WQE      0xA3
+#define CMD_GEN_REQUEST64_WQE   0xC2
+
+#define CMD_WQE_MASK            0xff
+
 
 #define LPFC_FW_DUMP	1
 #define LPFC_FW_RESET	2
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index ec23388ac4f590..57087ba4834fa1 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -34,6 +34,7 @@
 #include <linux/firmware.h>
 #include <linux/miscdevice.h>
 #include <linux/percpu.h>
+#include <linux/msi.h>
 
 #include <scsi/scsi.h>
 #include <scsi/scsi_device.h>
@@ -46,8 +47,9 @@
 #include "lpfc_sli4.h"
 #include "lpfc_nl.h"
 #include "lpfc_disc.h"
-#include "lpfc_scsi.h"
 #include "lpfc.h"
+#include "lpfc_scsi.h"
+#include "lpfc_nvme.h"
 #include "lpfc_logmsg.h"
 #include "lpfc_crtn.h"
 #include "lpfc_vport.h"
@@ -499,12 +501,10 @@ lpfc_config_port_post(struct lpfc_hba *phba)
 	phba->link_state = LPFC_LINK_DOWN;
 
 	/* Only process IOCBs on ELS ring till hba_state is READY */
-	if (psli->ring[psli->extra_ring].sli.sli3.cmdringaddr)
-		psli->ring[psli->extra_ring].flag |= LPFC_STOP_IOCB_EVENT;
-	if (psli->ring[psli->fcp_ring].sli.sli3.cmdringaddr)
-		psli->ring[psli->fcp_ring].flag |= LPFC_STOP_IOCB_EVENT;
-	if (psli->ring[psli->next_ring].sli.sli3.cmdringaddr)
-		psli->ring[psli->next_ring].flag |= LPFC_STOP_IOCB_EVENT;
+	if (psli->sli3_ring[LPFC_EXTRA_RING].sli.sli3.cmdringaddr)
+		psli->sli3_ring[LPFC_EXTRA_RING].flag |= LPFC_STOP_IOCB_EVENT;
+	if (psli->sli3_ring[LPFC_FCP_RING].sli.sli3.cmdringaddr)
+		psli->sli3_ring[LPFC_FCP_RING].flag |= LPFC_STOP_IOCB_EVENT;
 
 	/* Post receive buffers for desired rings */
 	if (phba->sli_rev != 3)
@@ -892,7 +892,7 @@ lpfc_hba_free_post_buf(struct lpfc_hba *phba)
 		lpfc_sli_hbqbuf_free_all(phba);
 	else {
 		/* Cleanup preposted buffers on the ELS ring */
-		pring = &psli->ring[LPFC_ELS_RING];
+		pring = &psli->sli3_ring[LPFC_ELS_RING];
 		spin_lock_irq(&phba->hbalock);
 		list_splice_init(&pring->postbufq, &buflist);
 		spin_unlock_irq(&phba->hbalock);
@@ -925,32 +925,43 @@ static void
 lpfc_hba_clean_txcmplq(struct lpfc_hba *phba)
 {
 	struct lpfc_sli *psli = &phba->sli;
+	struct lpfc_queue *qp = NULL;
 	struct lpfc_sli_ring *pring;
 	LIST_HEAD(completions);
 	int i;
 
-	for (i = 0; i < psli->num_rings; i++) {
-		pring = &psli->ring[i];
-		if (phba->sli_rev >= LPFC_SLI_REV4)
-			spin_lock_irq(&pring->ring_lock);
-		else
+	if (phba->sli_rev != LPFC_SLI_REV4) {
+		for (i = 0; i < psli->num_rings; i++) {
+			pring = &psli->sli3_ring[i];
 			spin_lock_irq(&phba->hbalock);
-		/* At this point in time the HBA is either reset or DOA. Either
-		 * way, nothing should be on txcmplq as it will NEVER complete.
-		 */
-		list_splice_init(&pring->txcmplq, &completions);
-		pring->txcmplq_cnt = 0;
-
-		if (phba->sli_rev >= LPFC_SLI_REV4)
-			spin_unlock_irq(&pring->ring_lock);
-		else
+			/* At this point in time the HBA is either reset or DOA
+			 * Nothing should be on txcmplq as it will
+			 * NEVER complete.
+			 */
+			list_splice_init(&pring->txcmplq, &completions);
+			pring->txcmplq_cnt = 0;
 			spin_unlock_irq(&phba->hbalock);
 
+			lpfc_sli_abort_iocb_ring(phba, pring);
+		}
 		/* Cancel all the IOCBs from the completions list */
-		lpfc_sli_cancel_iocbs(phba, &completions, IOSTAT_LOCAL_REJECT,
-				      IOERR_SLI_ABORTED);
+		lpfc_sli_cancel_iocbs(phba, &completions,
+				      IOSTAT_LOCAL_REJECT, IOERR_SLI_ABORTED);
+		return;
+	}
+	list_for_each_entry(qp, &phba->sli4_hba.lpfc_wq_list, wq_list) {
+		pring = qp->pring;
+		if (!pring)
+			continue;
+		spin_lock_irq(&pring->ring_lock);
+		list_splice_init(&pring->txcmplq, &completions);
+		pring->txcmplq_cnt = 0;
+		spin_unlock_irq(&pring->ring_lock);
 		lpfc_sli_abort_iocb_ring(phba, pring);
 	}
+	/* Cancel all the IOCBs from the completions list */
+	lpfc_sli_cancel_iocbs(phba, &completions,
+			      IOSTAT_LOCAL_REJECT, IOERR_SLI_ABORTED);
 }
 
 /**
@@ -989,43 +1000,51 @@ lpfc_hba_down_post_s4(struct lpfc_hba *phba)
 {
 	struct lpfc_scsi_buf *psb, *psb_next;
 	LIST_HEAD(aborts);
+	LIST_HEAD(nvme_aborts);
 	unsigned long iflag = 0;
 	struct lpfc_sglq *sglq_entry = NULL;
-	struct lpfc_sli *psli = &phba->sli;
-	struct lpfc_sli_ring *pring;
 
-	lpfc_hba_free_post_buf(phba);
+
+	lpfc_sli_hbqbuf_free_all(phba);
 	lpfc_hba_clean_txcmplq(phba);
-	pring = &psli->ring[LPFC_ELS_RING];
 
 	/* At this point in time the HBA is either reset or DOA. Either
 	 * way, nothing should be on lpfc_abts_els_sgl_list, it needs to be
-	 * on the lpfc_sgl_list so that it can either be freed if the
+	 * on the lpfc_els_sgl_list so that it can either be freed if the
 	 * driver is unloading or reposted if the driver is restarting
 	 * the port.
 	 */
-	spin_lock_irq(&phba->hbalock);  /* required for lpfc_sgl_list and */
+	spin_lock_irq(&phba->hbalock);  /* required for lpfc_els_sgl_list and */
 					/* scsl_buf_list */
-	/* abts_sgl_list_lock required because worker thread uses this
+	/* sgl_list_lock required because worker thread uses this
 	 * list.
 	 */
-	spin_lock(&phba->sli4_hba.abts_sgl_list_lock);
+	spin_lock(&phba->sli4_hba.sgl_list_lock);
 	list_for_each_entry(sglq_entry,
 		&phba->sli4_hba.lpfc_abts_els_sgl_list, list)
 		sglq_entry->state = SGL_FREED;
 
-	spin_lock(&pring->ring_lock);
 	list_splice_init(&phba->sli4_hba.lpfc_abts_els_sgl_list,
-			&phba->sli4_hba.lpfc_sgl_list);
-	spin_unlock(&pring->ring_lock);
-	spin_unlock(&phba->sli4_hba.abts_sgl_list_lock);
+			&phba->sli4_hba.lpfc_els_sgl_list);
+
+	spin_unlock(&phba->sli4_hba.sgl_list_lock);
 	/* abts_scsi_buf_list_lock required because worker thread uses this
 	 * list.
 	 */
-	spin_lock(&phba->sli4_hba.abts_scsi_buf_list_lock);
-	list_splice_init(&phba->sli4_hba.lpfc_abts_scsi_buf_list,
-			&aborts);
-	spin_unlock(&phba->sli4_hba.abts_scsi_buf_list_lock);
+	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP) {
+		spin_lock(&phba->sli4_hba.abts_scsi_buf_list_lock);
+		list_splice_init(&phba->sli4_hba.lpfc_abts_scsi_buf_list,
+				 &aborts);
+		spin_unlock(&phba->sli4_hba.abts_scsi_buf_list_lock);
+	}
+
+	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
+		spin_lock(&phba->sli4_hba.abts_nvme_buf_list_lock);
+		list_splice_init(&phba->sli4_hba.lpfc_abts_nvme_buf_list,
+				 &nvme_aborts);
+		spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock);
+	}
+
 	spin_unlock_irq(&phba->hbalock);
 
 	list_for_each_entry_safe(psb, psb_next, &aborts, list) {
@@ -1036,6 +1055,14 @@ lpfc_hba_down_post_s4(struct lpfc_hba *phba)
 	list_splice(&aborts, &phba->lpfc_scsi_buf_list_put);
 	spin_unlock_irqrestore(&phba->scsi_buf_list_put_lock, iflag);
 
+	list_for_each_entry_safe(psb, psb_next, &nvme_aborts, list) {
+		psb->pCmd = NULL;
+		psb->status = IOSTAT_SUCCESS;
+	}
+	spin_lock_irqsave(&phba->nvme_buf_list_put_lock, iflag);
+	list_splice(&nvme_aborts, &phba->lpfc_nvme_buf_list_put);
+	spin_unlock_irqrestore(&phba->nvme_buf_list_put_lock, iflag);
+
 	lpfc_sli4_free_sp_events(phba);
 	return 0;
 }
@@ -1829,7 +1856,7 @@ lpfc_handle_eratt(struct lpfc_hba *phba)
  * @phba: pointer to lpfc hba data structure.
  *
  * This routine is invoked from the worker thread to handle a HBA host
- * attention link event.
+ * attention link event. SLI3 only.
  **/
 void
 lpfc_handle_latt(struct lpfc_hba *phba)
@@ -1867,7 +1894,7 @@ lpfc_handle_latt(struct lpfc_hba *phba)
 	pmb->mbox_cmpl = lpfc_mbx_cmpl_read_topology;
 	pmb->vport = vport;
 	/* Block ELS IOCBs until we have processed this mbox command */
-	phba->sli.ring[LPFC_ELS_RING].flag |= LPFC_STOP_IOCB_EVENT;
+	phba->sli.sli3_ring[LPFC_ELS_RING].flag |= LPFC_STOP_IOCB_EVENT;
 	rc = lpfc_sli_issue_mbox (phba, pmb, MBX_NOWAIT);
 	if (rc == MBX_NOT_FINISHED) {
 		rc = 4;
@@ -1883,7 +1910,7 @@ lpfc_handle_latt(struct lpfc_hba *phba)
 	return;
 
 lpfc_handle_latt_free_mbuf:
-	phba->sli.ring[LPFC_ELS_RING].flag &= ~LPFC_STOP_IOCB_EVENT;
+	phba->sli.sli3_ring[LPFC_ELS_RING].flag &= ~LPFC_STOP_IOCB_EVENT;
 	lpfc_mbuf_free(phba, mp->virt, mp->phys);
 lpfc_handle_latt_free_mp:
 	kfree(mp);
@@ -2441,7 +2468,7 @@ lpfc_post_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, int cnt)
  *
  * This routine posts initial receive IOCB buffers to the ELS ring. The
  * current number of initial IOCB buffers specified by LPFC_BUF_RING0 is
- * set to 64 IOCBs.
+ * set to 64 IOCBs. SLI3 only.
  *
  * Return codes
  *   0 - success (currently always success)
@@ -2452,7 +2479,7 @@ lpfc_post_rcv_buf(struct lpfc_hba *phba)
 	struct lpfc_sli *psli = &phba->sli;
 
 	/* Ring 0, ELS / CT buffers */
-	lpfc_post_buffer(phba, &psli->ring[LPFC_ELS_RING], LPFC_BUF_RING0);
+	lpfc_post_buffer(phba, &psli->sli3_ring[LPFC_ELS_RING], LPFC_BUF_RING0);
 	/* Ring 2 - FCP no buffers needed */
 
 	return 0;
@@ -2894,11 +2921,6 @@ lpfc_online(struct lpfc_hba *phba)
 
 	lpfc_block_mgmt_io(phba, LPFC_MBX_WAIT);
 
-	if (!lpfc_sli_queue_setup(phba)) {
-		lpfc_unblock_mgmt_io(phba);
-		return 1;
-	}
-
 	if (phba->sli_rev == LPFC_SLI_REV4) {
 		if (lpfc_sli4_hba_setup(phba)) { /* Initialize SLI4 HBA */
 			lpfc_unblock_mgmt_io(phba);
@@ -2909,6 +2931,7 @@ lpfc_online(struct lpfc_hba *phba)
 			vpis_cleared = true;
 		spin_unlock_irq(&phba->hbalock);
 	} else {
+		lpfc_sli_queue_init(phba);
 		if (lpfc_sli_hba_setup(phba)) {	/* Initialize SLI2/SLI3 HBA */
 			lpfc_unblock_mgmt_io(phba);
 			return 1;
@@ -3100,6 +3123,9 @@ lpfc_scsi_free(struct lpfc_hba *phba)
 	struct lpfc_scsi_buf *sb, *sb_next;
 	struct lpfc_iocbq *io, *io_next;
 
+	if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP))
+		return;
+
 	spin_lock_irq(&phba->hbalock);
 
 	/* Release all the lpfc_scsi_bufs maintained by this host. */
@@ -3108,7 +3134,7 @@ lpfc_scsi_free(struct lpfc_hba *phba)
 	list_for_each_entry_safe(sb, sb_next, &phba->lpfc_scsi_buf_list_put,
 				 list) {
 		list_del(&sb->list);
-		pci_pool_free(phba->lpfc_scsi_dma_buf_pool, sb->data,
+		pci_pool_free(phba->lpfc_sg_dma_buf_pool, sb->data,
 			      sb->dma_handle);
 		kfree(sb);
 		phba->total_scsi_bufs--;
@@ -3119,7 +3145,7 @@ lpfc_scsi_free(struct lpfc_hba *phba)
 	list_for_each_entry_safe(sb, sb_next, &phba->lpfc_scsi_buf_list_get,
 				 list) {
 		list_del(&sb->list);
-		pci_pool_free(phba->lpfc_scsi_dma_buf_pool, sb->data,
+		pci_pool_free(phba->lpfc_sg_dma_buf_pool, sb->data,
 			      sb->dma_handle);
 		kfree(sb);
 		phba->total_scsi_bufs--;
@@ -3135,9 +3161,59 @@ lpfc_scsi_free(struct lpfc_hba *phba)
 
 	spin_unlock_irq(&phba->hbalock);
 }
+/**
+ * lpfc_nvme_free - Free all the NVME buffers and IOCBs from driver lists
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is to free all the NVME buffers and IOCBs from the driver
+ * list back to kernel. It is called from lpfc_pci_remove_one to free
+ * the internal resources before the device is removed from the system.
+ **/
+static void
+lpfc_nvme_free(struct lpfc_hba *phba)
+{
+	struct lpfc_nvme_buf *lpfc_ncmd, *lpfc_ncmd_next;
+	struct lpfc_iocbq *io, *io_next;
+
+	if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME))
+		return;
+
+	spin_lock_irq(&phba->hbalock);
+
+	/* Release all the lpfc_nvme_bufs maintained by this host. */
+	spin_lock(&phba->nvme_buf_list_put_lock);
+	list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next,
+				 &phba->lpfc_nvme_buf_list_put, list) {
+		list_del(&lpfc_ncmd->list);
+		pci_pool_free(phba->lpfc_sg_dma_buf_pool, lpfc_ncmd->data,
+			      lpfc_ncmd->dma_handle);
+		kfree(lpfc_ncmd);
+		phba->total_nvme_bufs--;
+	}
+	spin_unlock(&phba->nvme_buf_list_put_lock);
+
+	spin_lock(&phba->nvme_buf_list_get_lock);
+	list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next,
+				 &phba->lpfc_nvme_buf_list_get, list) {
+		list_del(&lpfc_ncmd->list);
+		pci_pool_free(phba->lpfc_sg_dma_buf_pool, lpfc_ncmd->data,
+			      lpfc_ncmd->dma_handle);
+		kfree(lpfc_ncmd);
+		phba->total_nvme_bufs--;
+	}
+	spin_unlock(&phba->nvme_buf_list_get_lock);
 
+	/* Release all the lpfc_iocbq entries maintained by this host. */
+	list_for_each_entry_safe(io, io_next, &phba->lpfc_iocb_list, list) {
+		list_del(&io->list);
+		kfree(io);
+		phba->total_iocbq_bufs--;
+	}
+
+	spin_unlock_irq(&phba->hbalock);
+}
 /**
- * lpfc_sli4_xri_sgl_update - update xri-sgl sizing and mapping
+ * lpfc_sli4_els_sgl_update - update ELS xri-sgl sizing and mapping
  * @phba: pointer to lpfc hba data structure.
  *
  * This routine first calculates the sizes of the current els and allocated
@@ -3149,20 +3225,18 @@ lpfc_scsi_free(struct lpfc_hba *phba)
  *   0 - successful (for now, it always returns 0)
  **/
 int
-lpfc_sli4_xri_sgl_update(struct lpfc_hba *phba)
+lpfc_sli4_els_sgl_update(struct lpfc_hba *phba)
 {
 	struct lpfc_sglq *sglq_entry = NULL, *sglq_entry_next = NULL;
-	struct lpfc_scsi_buf *psb = NULL, *psb_next = NULL;
-	uint16_t i, lxri, xri_cnt, els_xri_cnt, scsi_xri_cnt;
+	uint16_t i, lxri, xri_cnt, els_xri_cnt;
 	LIST_HEAD(els_sgl_list);
-	LIST_HEAD(scsi_sgl_list);
 	int rc;
-	struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_ELS_RING];
 
 	/*
 	 * update on pci function's els xri-sgl list
 	 */
 	els_xri_cnt = lpfc_sli4_get_els_iocb_cnt(phba);
+
 	if (els_xri_cnt > phba->sli4_hba.els_xri_cnt) {
 		/* els xri-sgl expanded */
 		xri_cnt = els_xri_cnt - phba->sli4_hba.els_xri_cnt;
@@ -3198,9 +3272,10 @@ lpfc_sli4_xri_sgl_update(struct lpfc_hba *phba)
 			list_add_tail(&sglq_entry->list, &els_sgl_list);
 		}
 		spin_lock_irq(&phba->hbalock);
-		spin_lock(&pring->ring_lock);
-		list_splice_init(&els_sgl_list, &phba->sli4_hba.lpfc_sgl_list);
-		spin_unlock(&pring->ring_lock);
+		spin_lock(&phba->sli4_hba.sgl_list_lock);
+		list_splice_init(&els_sgl_list,
+				 &phba->sli4_hba.lpfc_els_sgl_list);
+		spin_unlock(&phba->sli4_hba.sgl_list_lock);
 		spin_unlock_irq(&phba->hbalock);
 	} else if (els_xri_cnt < phba->sli4_hba.els_xri_cnt) {
 		/* els xri-sgl shrinked */
@@ -3210,24 +3285,22 @@ lpfc_sli4_xri_sgl_update(struct lpfc_hba *phba)
 				"%d to %d\n", phba->sli4_hba.els_xri_cnt,
 				els_xri_cnt);
 		spin_lock_irq(&phba->hbalock);
-		spin_lock(&pring->ring_lock);
-		list_splice_init(&phba->sli4_hba.lpfc_sgl_list, &els_sgl_list);
-		spin_unlock(&pring->ring_lock);
-		spin_unlock_irq(&phba->hbalock);
+		spin_lock(&phba->sli4_hba.sgl_list_lock);
+		list_splice_init(&phba->sli4_hba.lpfc_els_sgl_list,
+				 &els_sgl_list);
 		/* release extra els sgls from list */
 		for (i = 0; i < xri_cnt; i++) {
 			list_remove_head(&els_sgl_list,
 					 sglq_entry, struct lpfc_sglq, list);
 			if (sglq_entry) {
-				lpfc_mbuf_free(phba, sglq_entry->virt,
-					       sglq_entry->phys);
+				__lpfc_mbuf_free(phba, sglq_entry->virt,
+						 sglq_entry->phys);
 				kfree(sglq_entry);
 			}
 		}
-		spin_lock_irq(&phba->hbalock);
-		spin_lock(&pring->ring_lock);
-		list_splice_init(&els_sgl_list, &phba->sli4_hba.lpfc_sgl_list);
-		spin_unlock(&pring->ring_lock);
+		list_splice_init(&els_sgl_list,
+				 &phba->sli4_hba.lpfc_els_sgl_list);
+		spin_unlock(&phba->sli4_hba.sgl_list_lock);
 		spin_unlock_irq(&phba->hbalock);
 	} else
 		lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
@@ -3239,7 +3312,7 @@ lpfc_sli4_xri_sgl_update(struct lpfc_hba *phba)
 	sglq_entry = NULL;
 	sglq_entry_next = NULL;
 	list_for_each_entry_safe(sglq_entry, sglq_entry_next,
-				 &phba->sli4_hba.lpfc_sgl_list, list) {
+				 &phba->sli4_hba.lpfc_els_sgl_list, list) {
 		lxri = lpfc_sli4_next_xritag(phba);
 		if (lxri == NO_XRI) {
 			lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
@@ -3251,21 +3324,53 @@ lpfc_sli4_xri_sgl_update(struct lpfc_hba *phba)
 		sglq_entry->sli4_lxritag = lxri;
 		sglq_entry->sli4_xritag = phba->sli4_hba.xri_ids[lxri];
 	}
+	return 0;
+
+out_free_mem:
+	lpfc_free_els_sgl_list(phba);
+	return rc;
+}
+
+/**
+ * lpfc_sli4_scsi_sgl_update - update xri-sgl sizing and mapping
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine first calculates the sizes of the current els and allocated
+ * scsi sgl lists, and then goes through all sgls to updates the physical
+ * XRIs assigned due to port function reset. During port initialization, the
+ * current els and allocated scsi sgl lists are 0s.
+ *
+ * Return codes
+ *   0 - successful (for now, it always returns 0)
+ **/
+int
+lpfc_sli4_scsi_sgl_update(struct lpfc_hba *phba)
+{
+	struct lpfc_scsi_buf *psb, *psb_next;
+	uint16_t i, lxri, els_xri_cnt, scsi_xri_cnt;
+	LIST_HEAD(scsi_sgl_list);
+	int rc;
 
 	/*
-	 * update on pci function's allocated scsi xri-sgl list
+	 * update on pci function's els xri-sgl list
 	 */
+	els_xri_cnt = lpfc_sli4_get_els_iocb_cnt(phba);
 	phba->total_scsi_bufs = 0;
 
+	/*
+	 * update on pci function's allocated scsi xri-sgl list
+	 */
 	/* maximum number of xris available for scsi buffers */
 	phba->sli4_hba.scsi_xri_max = phba->sli4_hba.max_cfg_param.max_xri -
 				      els_xri_cnt;
 
-	lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
-			"2401 Current allocated SCSI xri-sgl count:%d, "
-			"maximum  SCSI xri count:%d\n",
-			phba->sli4_hba.scsi_xri_cnt,
-			phba->sli4_hba.scsi_xri_max);
+	if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP))
+		return 0;
+
+	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)
+		phba->sli4_hba.scsi_xri_max =  /* Split them up */
+			(phba->sli4_hba.scsi_xri_max *
+			 phba->cfg_xri_split) / 100;
 
 	spin_lock_irq(&phba->scsi_buf_list_get_lock);
 	spin_lock(&phba->scsi_buf_list_put_lock);
@@ -3283,7 +3388,7 @@ lpfc_sli4_xri_sgl_update(struct lpfc_hba *phba)
 			list_remove_head(&scsi_sgl_list, psb,
 					 struct lpfc_scsi_buf, list);
 			if (psb) {
-				pci_pool_free(phba->lpfc_scsi_dma_buf_pool,
+				pci_pool_free(phba->lpfc_sg_dma_buf_pool,
 					      psb->data, psb->dma_handle);
 				kfree(psb);
 			}
@@ -3314,15 +3419,112 @@ lpfc_sli4_xri_sgl_update(struct lpfc_hba *phba)
 	INIT_LIST_HEAD(&phba->lpfc_scsi_buf_list_put);
 	spin_unlock(&phba->scsi_buf_list_put_lock);
 	spin_unlock_irq(&phba->scsi_buf_list_get_lock);
-
 	return 0;
 
 out_free_mem:
-	lpfc_free_els_sgl_list(phba);
 	lpfc_scsi_free(phba);
 	return rc;
 }
 
+/**
+ * lpfc_sli4_nvme_sgl_update - update xri-sgl sizing and mapping
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine first calculates the sizes of the current els and allocated
+ * scsi sgl lists, and then goes through all sgls to updates the physical
+ * XRIs assigned due to port function reset. During port initialization, the
+ * current els and allocated scsi sgl lists are 0s.
+ *
+ * Return codes
+ *   0 - successful (for now, it always returns 0)
+ **/
+int
+lpfc_sli4_nvme_sgl_update(struct lpfc_hba *phba)
+{
+	struct lpfc_nvme_buf *lpfc_ncmd = NULL, *lpfc_ncmd_next = NULL;
+	uint16_t i, lxri, els_xri_cnt;
+	uint16_t nvme_xri_cnt, nvme_xri_max;
+	LIST_HEAD(nvme_sgl_list);
+	int rc;
+
+	phba->total_nvme_bufs = 0;
+
+	if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME))
+		return 0;
+	/*
+	 * update on pci function's allocated nvme xri-sgl list
+	 */
+
+	/* maximum number of xris available for nvme buffers */
+	els_xri_cnt = lpfc_sli4_get_els_iocb_cnt(phba);
+	nvme_xri_max = phba->sli4_hba.max_cfg_param.max_xri - els_xri_cnt;
+	phba->sli4_hba.nvme_xri_max = nvme_xri_max;
+	phba->sli4_hba.nvme_xri_max -= phba->sli4_hba.scsi_xri_max;
+
+	lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
+			"6074 Current allocated NVME xri-sgl count:%d, "
+			"maximum  NVME xri count:%d\n",
+			phba->sli4_hba.nvme_xri_cnt,
+			phba->sli4_hba.nvme_xri_max);
+
+	spin_lock_irq(&phba->nvme_buf_list_get_lock);
+	spin_lock(&phba->nvme_buf_list_put_lock);
+	list_splice_init(&phba->lpfc_nvme_buf_list_get, &nvme_sgl_list);
+	list_splice(&phba->lpfc_nvme_buf_list_put, &nvme_sgl_list);
+	spin_unlock(&phba->nvme_buf_list_put_lock);
+	spin_unlock_irq(&phba->nvme_buf_list_get_lock);
+
+	if (phba->sli4_hba.nvme_xri_cnt > phba->sli4_hba.nvme_xri_max) {
+		/* max nvme xri shrunk below the allocated nvme buffers */
+		spin_lock_irq(&phba->nvme_buf_list_get_lock);
+		nvme_xri_cnt = phba->sli4_hba.nvme_xri_cnt -
+					phba->sli4_hba.nvme_xri_max;
+		spin_unlock_irq(&phba->nvme_buf_list_get_lock);
+		/* release the extra allocated nvme buffers */
+		for (i = 0; i < nvme_xri_cnt; i++) {
+			list_remove_head(&nvme_sgl_list, lpfc_ncmd,
+					 struct lpfc_nvme_buf, list);
+			if (lpfc_ncmd) {
+				pci_pool_free(phba->lpfc_sg_dma_buf_pool,
+					      lpfc_ncmd->data,
+					      lpfc_ncmd->dma_handle);
+				kfree(lpfc_ncmd);
+			}
+		}
+		spin_lock_irq(&phba->nvme_buf_list_get_lock);
+		phba->sli4_hba.nvme_xri_cnt -= nvme_xri_cnt;
+		spin_unlock_irq(&phba->nvme_buf_list_get_lock);
+	}
+
+	/* update xris associated to remaining allocated nvme buffers */
+	lpfc_ncmd = NULL;
+	lpfc_ncmd_next = NULL;
+	list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next,
+				 &nvme_sgl_list, list) {
+		lxri = lpfc_sli4_next_xritag(phba);
+		if (lxri == NO_XRI) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+					"6075 Failed to allocate xri for "
+					"nvme buffer\n");
+			rc = -ENOMEM;
+			goto out_free_mem;
+		}
+		lpfc_ncmd->cur_iocbq.sli4_lxritag = lxri;
+		lpfc_ncmd->cur_iocbq.sli4_xritag = phba->sli4_hba.xri_ids[lxri];
+	}
+	spin_lock_irq(&phba->nvme_buf_list_get_lock);
+	spin_lock(&phba->nvme_buf_list_put_lock);
+	list_splice_init(&nvme_sgl_list, &phba->lpfc_nvme_buf_list_get);
+	INIT_LIST_HEAD(&phba->lpfc_nvme_buf_list_put);
+	spin_unlock(&phba->nvme_buf_list_put_lock);
+	spin_unlock_irq(&phba->nvme_buf_list_get_lock);
+	return 0;
+
+out_free_mem:
+	lpfc_nvme_free(phba);
+	return rc;
+}
+
 /**
  * lpfc_create_port - Create an FC port
  * @phba: pointer to lpfc hba data structure.
@@ -3343,18 +3545,23 @@ struct lpfc_vport *
 lpfc_create_port(struct lpfc_hba *phba, int instance, struct device *dev)
 {
 	struct lpfc_vport *vport;
-	struct Scsi_Host  *shost;
+	struct Scsi_Host  *shost = NULL;
 	int error = 0;
 
-	if (dev != &phba->pcidev->dev) {
-		shost = scsi_host_alloc(&lpfc_vport_template,
-					sizeof(struct lpfc_vport));
-	} else {
-		if (phba->sli_rev == LPFC_SLI_REV4)
-			shost = scsi_host_alloc(&lpfc_template,
-					sizeof(struct lpfc_vport));
-		else
-			shost = scsi_host_alloc(&lpfc_template_s3,
+	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP) {
+		if (dev != &phba->pcidev->dev) {
+			shost = scsi_host_alloc(&lpfc_vport_template,
+						sizeof(struct lpfc_vport));
+		} else {
+			if (phba->sli_rev == LPFC_SLI_REV4)
+				shost = scsi_host_alloc(&lpfc_template,
+						sizeof(struct lpfc_vport));
+			else
+				shost = scsi_host_alloc(&lpfc_template_s3,
+						sizeof(struct lpfc_vport));
+		}
+	} else if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
+		shost = scsi_host_alloc(&lpfc_template_nvme,
 					sizeof(struct lpfc_vport));
 	}
 	if (!shost)
@@ -3365,8 +3572,8 @@ lpfc_create_port(struct lpfc_hba *phba, int instance, struct device *dev)
 	vport->load_flag |= FC_LOADING;
 	vport->fc_flag |= FC_VPORT_NEEDS_REG_VPI;
 	vport->fc_rscn_flush = 0;
-
 	lpfc_get_vport_cfgparam(vport);
+
 	shost->unique_id = instance;
 	shost->max_id = LPFC_MAX_TARGET;
 	shost->max_lun = vport->cfg_max_luns;
@@ -3944,7 +4151,7 @@ lpfc_sli4_async_link_evt(struct lpfc_hba *phba,
 	lpfc_els_flush_all_cmd(phba);
 
 	/* Block ELS IOCBs until we have done process link event */
-	phba->sli.ring[LPFC_ELS_RING].flag |= LPFC_STOP_IOCB_EVENT;
+	phba->sli4_hba.els_wq->pring->flag |= LPFC_STOP_IOCB_EVENT;
 
 	/* Update link event statistics */
 	phba->sli.slistat.link_event++;
@@ -4103,7 +4310,7 @@ lpfc_sli4_async_fc_evt(struct lpfc_hba *phba, struct lpfc_acqe_fc_la *acqe_fc)
 	lpfc_els_flush_all_cmd(phba);
 
 	/* Block ELS IOCBs until we have done process link event */
-	phba->sli.ring[LPFC_ELS_RING].flag |= LPFC_STOP_IOCB_EVENT;
+	phba->sli4_hba.els_wq->pring->flag |= LPFC_STOP_IOCB_EVENT;
 
 	/* Update link event statistics */
 	phba->sli.slistat.link_event++;
@@ -5000,40 +5207,78 @@ lpfc_sli_probe_sriov_nr_virtfn(struct lpfc_hba *phba, int nr_vfn)
 }
 
 /**
- * lpfc_sli_driver_resource_setup - Setup driver internal resources for SLI3 dev.
+ * lpfc_setup_driver_resource_phase1 - Phase1 etup driver internal resources.
  * @phba: pointer to lpfc hba data structure.
  *
- * This routine is invoked to set up the driver internal resources specific to
- * support the SLI-3 HBA device it attached to.
+ * This routine is invoked to set up the driver internal resources before the
+ * device specific resource setup to support the HBA device it attached to.
  *
  * Return codes
- * 	0 - successful
- * 	other values - error
+ *	0 - successful
+ *	other values - error
  **/
 static int
-lpfc_sli_driver_resource_setup(struct lpfc_hba *phba)
+lpfc_setup_driver_resource_phase1(struct lpfc_hba *phba)
 {
-	struct lpfc_sli *psli;
-	int rc;
+	struct lpfc_sli *psli = &phba->sli;
 
 	/*
-	 * Initialize timers used by driver
+	 * Driver resources common to all SLI revisions
 	 */
+	atomic_set(&phba->fast_event_count, 0);
+	spin_lock_init(&phba->hbalock);
 
-	/* Heartbeat timer */
-	init_timer(&phba->hb_tmofunc);
-	phba->hb_tmofunc.function = lpfc_hb_timeout;
-	phba->hb_tmofunc.data = (unsigned long)phba;
+	/* Initialize ndlp management spinlock */
+	spin_lock_init(&phba->ndlp_lock);
+
+	INIT_LIST_HEAD(&phba->port_list);
+	INIT_LIST_HEAD(&phba->work_list);
+	init_waitqueue_head(&phba->wait_4_mlo_m_q);
+
+	/* Initialize the wait queue head for the kernel thread */
+	init_waitqueue_head(&phba->work_waitq);
+
+	lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+			"1403 Protocols supported %s %s\n",
+			((phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP) ?
+				"SCSI" : " "),
+			((phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) ?
+				"NVME" : " "));
+
+	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP) {
+		/* Initialize the scsi buffer list used by driver for scsi IO */
+		spin_lock_init(&phba->scsi_buf_list_get_lock);
+		INIT_LIST_HEAD(&phba->lpfc_scsi_buf_list_get);
+		spin_lock_init(&phba->scsi_buf_list_put_lock);
+		INIT_LIST_HEAD(&phba->lpfc_scsi_buf_list_put);
+	}
+
+	if ((phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) &&
+		(phba->nvmet_support == 0)) {
+		/* Initialize the NVME buffer list used by driver for NVME IO */
+		spin_lock_init(&phba->nvme_buf_list_get_lock);
+		INIT_LIST_HEAD(&phba->lpfc_nvme_buf_list_get);
+		spin_lock_init(&phba->nvme_buf_list_put_lock);
+		INIT_LIST_HEAD(&phba->lpfc_nvme_buf_list_put);
+	}
+
+	/* Initialize the fabric iocb list */
+	INIT_LIST_HEAD(&phba->fabric_iocb_list);
+
+	/* Initialize list to save ELS buffers */
+	INIT_LIST_HEAD(&phba->elsbuf);
+
+	/* Initialize FCF connection rec list */
+	INIT_LIST_HEAD(&phba->fcf_conn_rec_list);
+
+	/* Initialize OAS configuration list */
+	spin_lock_init(&phba->devicelock);
+	INIT_LIST_HEAD(&phba->luns);
 
-	psli = &phba->sli;
 	/* MBOX heartbeat timer */
 	init_timer(&psli->mbox_tmo);
 	psli->mbox_tmo.function = lpfc_mbox_timeout;
 	psli->mbox_tmo.data = (unsigned long) phba;
-	/* FCP polling mode timer */
-	init_timer(&phba->fcp_poll_timer);
-	phba->fcp_poll_timer.function = lpfc_poll_timeout;
-	phba->fcp_poll_timer.data = (unsigned long) phba;
 	/* Fabric block timer */
 	init_timer(&phba->fabric_block_timer);
 	phba->fabric_block_timer.function = lpfc_fabric_block_timeout;
@@ -5042,6 +5287,38 @@ lpfc_sli_driver_resource_setup(struct lpfc_hba *phba)
 	init_timer(&phba->eratt_poll);
 	phba->eratt_poll.function = lpfc_poll_eratt;
 	phba->eratt_poll.data = (unsigned long) phba;
+	/* Heartbeat timer */
+	init_timer(&phba->hb_tmofunc);
+	phba->hb_tmofunc.function = lpfc_hb_timeout;
+	phba->hb_tmofunc.data = (unsigned long)phba;
+
+	return 0;
+}
+
+/**
+ * lpfc_sli_driver_resource_setup - Setup driver internal resources for SLI3 dev
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to set up the driver internal resources specific to
+ * support the SLI-3 HBA device it attached to.
+ *
+ * Return codes
+ * 0 - successful
+ * other values - error
+ **/
+static int
+lpfc_sli_driver_resource_setup(struct lpfc_hba *phba)
+{
+	int rc;
+
+	/*
+	 * Initialize timers used by driver
+	 */
+
+	/* FCP polling mode timer */
+	init_timer(&phba->fcp_poll_timer);
+	phba->fcp_poll_timer.function = lpfc_poll_timeout;
+	phba->fcp_poll_timer.data = (unsigned long) phba;
 
 	/* Host attention work mask setup */
 	phba->work_ha_mask = (HA_ERATT | HA_MBATT | HA_LATT);
@@ -5049,6 +5326,12 @@ lpfc_sli_driver_resource_setup(struct lpfc_hba *phba)
 
 	/* Get all the module params for configuring this host */
 	lpfc_get_cfgparam(phba);
+	/* Set up phase-1 common device driver resources */
+
+	rc = lpfc_setup_driver_resource_phase1(phba);
+	if (rc)
+		return -ENODEV;
+
 	if (phba->pcidev->device == PCI_DEVICE_ID_HORNET) {
 		phba->menlo_flag |= HBA_MENLO_SUPPORT;
 		/* check for menlo minimum sg count */
@@ -5056,10 +5339,10 @@ lpfc_sli_driver_resource_setup(struct lpfc_hba *phba)
 			phba->cfg_sg_seg_cnt = LPFC_DEFAULT_MENLO_SG_SEG_CNT;
 	}
 
-	if (!phba->sli.ring)
-		phba->sli.ring = kzalloc(LPFC_SLI3_MAX_RING *
+	if (!phba->sli.sli3_ring)
+		phba->sli.sli3_ring = kzalloc(LPFC_SLI3_MAX_RING *
 			sizeof(struct lpfc_sli_ring), GFP_KERNEL);
-	if (!phba->sli.ring)
+	if (!phba->sli.sli3_ring)
 		return -ENOMEM;
 
 	/*
@@ -5118,7 +5401,7 @@ lpfc_sli_driver_resource_setup(struct lpfc_hba *phba)
 	 * Initialize the SLI Layer to run with lpfc HBAs.
 	 */
 	lpfc_sli_setup(phba);
-	lpfc_sli_queue_setup(phba);
+	lpfc_sli_queue_init(phba);
 
 	/* Allocate device driver memory */
 	if (lpfc_mem_alloc(phba, BPL_ALIGN_SZ))
@@ -5174,18 +5457,25 @@ lpfc_sli_driver_resource_unset(struct lpfc_hba *phba)
 static int
 lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 {
-	struct lpfc_vector_map_info *cpup;
-	struct lpfc_sli *psli;
 	LPFC_MBOXQ_t *mboxq;
-	int rc, i, hbq_count, max_buf_size;
+	int rc, i, max_buf_size;
 	uint8_t pn_page[LPFC_MAX_SUPPORTED_PAGES] = {0};
 	struct lpfc_mqe *mqe;
 	int longs;
 	int fof_vectors = 0;
 
+	phba->sli4_hba.num_online_cpu = num_online_cpus();
+	phba->sli4_hba.num_present_cpu = lpfc_present_cpu;
+	phba->sli4_hba.curr_disp_cpu = 0;
+
 	/* Get all the module params for configuring this host */
 	lpfc_get_cfgparam(phba);
 
+	/* Set up phase-1 common device driver resources */
+	rc = lpfc_setup_driver_resource_phase1(phba);
+	if (rc)
+		return -ENODEV;
+
 	/* Before proceed, wait for POST done and device ready */
 	rc = lpfc_sli4_post_status_check(phba);
 	if (rc)
@@ -5195,27 +5485,10 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 	 * Initialize timers used by driver
 	 */
 
-	/* Heartbeat timer */
-	init_timer(&phba->hb_tmofunc);
-	phba->hb_tmofunc.function = lpfc_hb_timeout;
-	phba->hb_tmofunc.data = (unsigned long)phba;
 	init_timer(&phba->rrq_tmr);
 	phba->rrq_tmr.function = lpfc_rrq_timeout;
 	phba->rrq_tmr.data = (unsigned long)phba;
 
-	psli = &phba->sli;
-	/* MBOX heartbeat timer */
-	init_timer(&psli->mbox_tmo);
-	psli->mbox_tmo.function = lpfc_mbox_timeout;
-	psli->mbox_tmo.data = (unsigned long) phba;
-	/* Fabric block timer */
-	init_timer(&phba->fabric_block_timer);
-	phba->fabric_block_timer.function = lpfc_fabric_block_timeout;
-	phba->fabric_block_timer.data = (unsigned long) phba;
-	/* EA polling mode timer */
-	init_timer(&phba->eratt_poll);
-	phba->eratt_poll.function = lpfc_poll_eratt;
-	phba->eratt_poll.data = (unsigned long) phba;
 	/* FCF rediscover timer */
 	init_timer(&phba->fcf.redisc_wait);
 	phba->fcf.redisc_wait.function = lpfc_sli4_fcf_redisc_wait_tmo;
@@ -5242,14 +5515,9 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 
 	/*
 	 * For SLI4, instead of using ring 0 (LPFC_FCP_RING) for FCP commands
-	 * we will associate a new ring, for each FCP fastpath EQ/CQ/WQ tuple.
+	 * we will associate a new ring, for each EQ/CQ/WQ tuple.
+	 * The WQ create will allocate the ring.
 	 */
-	if (!phba->sli.ring)
-		phba->sli.ring = kzalloc(
-			(LPFC_SLI3_MAX_RING + phba->cfg_fcp_io_channel) *
-			sizeof(struct lpfc_sli_ring), GFP_KERNEL);
-	if (!phba->sli.ring)
-		return -ENOMEM;
 
 	/*
 	 * It doesn't matter what family our adapter is in, we are
@@ -5261,43 +5529,45 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 		phba->cfg_sg_seg_cnt = LPFC_MAX_SGL_SEG_CNT - 2;
 
 	/*
-	 * Since lpfc_sg_seg_cnt is module parameter, the sg_dma_buf_size
-	 * used to create the sg_dma_buf_pool must be dynamically calculated.
+	 * Since lpfc_sg_seg_cnt is module param, the sg_dma_buf_size
+	 * used to create the sg_dma_buf_pool must be calculated.
 	 */
-
 	if (phba->cfg_enable_bg) {
 		/*
-		 * The scsi_buf for a T10-DIF I/O will hold the FCP cmnd,
-		 * the FCP rsp, and a SGE for each. Sice we have no control
-		 * over how many protection data segments the SCSI Layer
+		 * The scsi_buf for a T10-DIF I/O holds the FCP cmnd,
+		 * the FCP rsp, and a SGE. Sice we have no control
+		 * over how many protection segments the SCSI Layer
 		 * will hand us (ie: there could be one for every block
-		 * in the IO), we just allocate enough SGEs to accomidate
-		 * our max amount and we need to limit lpfc_sg_seg_cnt to
-		 * minimize the risk of running out.
+		 * in the IO), just allocate enough SGEs to accomidate
+		 * our max amount and we need to limit lpfc_sg_seg_cnt
+		 * to minimize the risk of running out.
 		 */
 		phba->cfg_sg_dma_buf_size = sizeof(struct fcp_cmnd) +
-			sizeof(struct fcp_rsp) + max_buf_size;
+				sizeof(struct fcp_rsp) + max_buf_size;
 
 		/* Total SGEs for scsi_sg_list and scsi_sg_prot_list */
 		phba->cfg_total_seg_cnt = LPFC_MAX_SGL_SEG_CNT;
 
 		if (phba->cfg_sg_seg_cnt > LPFC_MAX_SG_SLI4_SEG_CNT_DIF)
-			phba->cfg_sg_seg_cnt = LPFC_MAX_SG_SLI4_SEG_CNT_DIF;
+			phba->cfg_sg_seg_cnt =
+				LPFC_MAX_SG_SLI4_SEG_CNT_DIF;
 	} else {
 		/*
-		 * The scsi_buf for a regular I/O will hold the FCP cmnd,
+		 * The scsi_buf for a regular I/O holds the FCP cmnd,
 		 * the FCP rsp, a SGE for each, and a SGE for up to
 		 * cfg_sg_seg_cnt data segments.
 		 */
 		phba->cfg_sg_dma_buf_size = sizeof(struct fcp_cmnd) +
-			sizeof(struct fcp_rsp) +
-			((phba->cfg_sg_seg_cnt + 2) * sizeof(struct sli4_sge));
+				sizeof(struct fcp_rsp) +
+				((phba->cfg_sg_seg_cnt + 2) *
+				sizeof(struct sli4_sge));
 
 		/* Total SGEs for scsi_sg_list */
 		phba->cfg_total_seg_cnt = phba->cfg_sg_seg_cnt + 2;
+
 		/*
-		 * NOTE: if (phba->cfg_sg_seg_cnt + 2) <= 256 we only need
-		 * to post 1 page for the SGL.
+		 * NOTE: if (phba->cfg_sg_seg_cnt + 2) <= 256 we only
+		 * need to post 1 page for the SGL.
 		 */
 	}
 
@@ -5317,21 +5587,27 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 			phba->cfg_total_seg_cnt);
 
 	/* Initialize buffer queue management fields */
-	hbq_count = lpfc_sli_hbq_count();
-	for (i = 0; i < hbq_count; ++i)
-		INIT_LIST_HEAD(&phba->hbqs[i].hbq_buffer_list);
-	INIT_LIST_HEAD(&phba->rb_pend_list);
+	INIT_LIST_HEAD(&phba->hbqs[LPFC_ELS_HBQ].hbq_buffer_list);
 	phba->hbqs[LPFC_ELS_HBQ].hbq_alloc_buffer = lpfc_sli4_rb_alloc;
 	phba->hbqs[LPFC_ELS_HBQ].hbq_free_buffer = lpfc_sli4_rb_free;
 
 	/*
 	 * Initialize the SLI Layer to run with lpfc SLI4 HBAs.
 	 */
-	/* Initialize the Abort scsi buffer list used by driver */
-	spin_lock_init(&phba->sli4_hba.abts_scsi_buf_list_lock);
-	INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_scsi_buf_list);
+	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP) {
+		/* Initialize the Abort scsi buffer list used by driver */
+		spin_lock_init(&phba->sli4_hba.abts_scsi_buf_list_lock);
+		INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_scsi_buf_list);
+	}
+
+	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
+		/* Initialize the Abort nvme buffer list used by driver */
+		spin_lock_init(&phba->sli4_hba.abts_nvme_buf_list_lock);
+		INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_nvme_buf_list);
+	}
+
 	/* This abort list used by worker thread */
-	spin_lock_init(&phba->sli4_hba.abts_sgl_list_lock);
+	spin_lock_init(&phba->sli4_hba.sgl_list_lock);
 
 	/*
 	 * Initialize driver internal slow-path work queues
@@ -5359,10 +5635,6 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 	/* initialize optic_state to 0xFF */
 	phba->sli4_hba.lnk_info.optic_state = 0xff;
 
-	/* Initialize the driver internal SLI layer lists. */
-	lpfc_sli_setup(phba);
-	lpfc_sli_queue_setup(phba);
-
 	/* Allocate device driver memory */
 	rc = lpfc_mem_alloc(phba, SGL_ALIGN_SZ);
 	if (rc)
@@ -5372,8 +5644,10 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 	if (bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) ==
 	    LPFC_SLI_INTF_IF_TYPE_2) {
 		rc = lpfc_pci_function_reset(phba);
-		if (unlikely(rc))
-			return -ENODEV;
+		if (unlikely(rc)) {
+			rc = -ENODEV;
+			goto out_free_mem;
+		}
 		phba->temp_sensor_support = 1;
 	}
 
@@ -5410,6 +5684,10 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 		goto out_free_bsmbx;
 	}
 
+	phba->nvmet_support = 0;
+
+	lpfc_nvme_mod_param_dep(phba);
+
 	/* Get the Supported Pages if PORT_CAPABILITIES is supported by port. */
 	lpfc_supported_pages(mboxq);
 	rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL);
@@ -5448,9 +5726,11 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 				"2999 Unsupported SLI4 Parameters "
 				"Extents and RPI headers enabled.\n");
-			goto out_free_bsmbx;
 		}
+		mempool_free(mboxq, phba->mbox_mem_pool);
+		goto out_free_bsmbx;
 	}
+
 	mempool_free(mboxq, phba->mbox_mem_pool);
 
 	/* Verify OAS is supported */
@@ -5497,11 +5777,10 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 		goto out_remove_rpi_hdrs;
 	}
 
-	phba->sli4_hba.fcp_eq_hdl =
-			kzalloc((sizeof(struct lpfc_fcp_eq_hdl) *
-			    (fof_vectors + phba->cfg_fcp_io_channel)),
-			    GFP_KERNEL);
-	if (!phba->sli4_hba.fcp_eq_hdl) {
+	phba->sli4_hba.hba_eq_hdl = kcalloc(fof_vectors + phba->io_channel_irqs,
+						sizeof(struct lpfc_hba_eq_hdl),
+						GFP_KERNEL);
+	if (!phba->sli4_hba.hba_eq_hdl) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 				"2572 Failed allocate memory for "
 				"fast-path per-EQ handle array\n");
@@ -5509,41 +5788,31 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 		goto out_free_fcf_rr_bmask;
 	}
 
-	phba->sli4_hba.cpu_map = kzalloc((sizeof(struct lpfc_vector_map_info) *
-					 phba->sli4_hba.num_present_cpu),
-					 GFP_KERNEL);
+	phba->sli4_hba.cpu_map = kcalloc(phba->sli4_hba.num_present_cpu,
+					sizeof(struct lpfc_vector_map_info),
+					GFP_KERNEL);
 	if (!phba->sli4_hba.cpu_map) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 				"3327 Failed allocate memory for msi-x "
 				"interrupt vector mapping\n");
 		rc = -ENOMEM;
-		goto out_free_fcp_eq_hdl;
+		goto out_free_hba_eq_hdl;
 	}
 	if (lpfc_used_cpu == NULL) {
-		lpfc_used_cpu = kzalloc((sizeof(uint16_t) * lpfc_present_cpu),
-					 GFP_KERNEL);
+		lpfc_used_cpu = kcalloc(lpfc_present_cpu, sizeof(uint16_t),
+						GFP_KERNEL);
 		if (!lpfc_used_cpu) {
 			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 					"3335 Failed allocate memory for msi-x "
 					"interrupt vector mapping\n");
 			kfree(phba->sli4_hba.cpu_map);
 			rc = -ENOMEM;
-			goto out_free_fcp_eq_hdl;
+			goto out_free_hba_eq_hdl;
 		}
 		for (i = 0; i < lpfc_present_cpu; i++)
 			lpfc_used_cpu[i] = LPFC_VECTOR_MAP_EMPTY;
 	}
 
-	/* Initialize io channels for round robin */
-	cpup = phba->sli4_hba.cpu_map;
-	rc = 0;
-	for (i = 0; i < phba->sli4_hba.num_present_cpu; i++) {
-		cpup->channel_id = rc;
-		rc++;
-		if (rc >= phba->cfg_fcp_io_channel)
-			rc = 0;
-	}
-
 	/*
 	 * Enable sr-iov virtual functions if supported and configured
 	 * through the module parameter.
@@ -5563,8 +5832,8 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 
 	return 0;
 
-out_free_fcp_eq_hdl:
-	kfree(phba->sli4_hba.fcp_eq_hdl);
+out_free_hba_eq_hdl:
+	kfree(phba->sli4_hba.hba_eq_hdl);
 out_free_fcf_rr_bmask:
 	kfree(phba->fcf.fcf_rr_bmask);
 out_remove_rpi_hdrs:
@@ -5599,7 +5868,7 @@ lpfc_sli4_driver_resource_unset(struct lpfc_hba *phba)
 	phba->sli4_hba.curr_disp_cpu = 0;
 
 	/* Free memory allocated for fast-path work queue handles */
-	kfree(phba->sli4_hba.fcp_eq_hdl);
+	kfree(phba->sli4_hba.hba_eq_hdl);
 
 	/* Free the allocated rpi headers. */
 	lpfc_sli4_remove_rpi_hdrs(phba);
@@ -5672,58 +5941,6 @@ lpfc_init_api_table_setup(struct lpfc_hba *phba, uint8_t dev_grp)
 	return 0;
 }
 
-/**
- * lpfc_setup_driver_resource_phase1 - Phase1 etup driver internal resources.
- * @phba: pointer to lpfc hba data structure.
- *
- * This routine is invoked to set up the driver internal resources before the
- * device specific resource setup to support the HBA device it attached to.
- *
- * Return codes
- *	0 - successful
- *	other values - error
- **/
-static int
-lpfc_setup_driver_resource_phase1(struct lpfc_hba *phba)
-{
-	/*
-	 * Driver resources common to all SLI revisions
-	 */
-	atomic_set(&phba->fast_event_count, 0);
-	spin_lock_init(&phba->hbalock);
-
-	/* Initialize ndlp management spinlock */
-	spin_lock_init(&phba->ndlp_lock);
-
-	INIT_LIST_HEAD(&phba->port_list);
-	INIT_LIST_HEAD(&phba->work_list);
-	init_waitqueue_head(&phba->wait_4_mlo_m_q);
-
-	/* Initialize the wait queue head for the kernel thread */
-	init_waitqueue_head(&phba->work_waitq);
-
-	/* Initialize the scsi buffer list used by driver for scsi IO */
-	spin_lock_init(&phba->scsi_buf_list_get_lock);
-	INIT_LIST_HEAD(&phba->lpfc_scsi_buf_list_get);
-	spin_lock_init(&phba->scsi_buf_list_put_lock);
-	INIT_LIST_HEAD(&phba->lpfc_scsi_buf_list_put);
-
-	/* Initialize the fabric iocb list */
-	INIT_LIST_HEAD(&phba->fabric_iocb_list);
-
-	/* Initialize list to save ELS buffers */
-	INIT_LIST_HEAD(&phba->elsbuf);
-
-	/* Initialize FCF connection rec list */
-	INIT_LIST_HEAD(&phba->fcf_conn_rec_list);
-
-	/* Initialize OAS configuration list */
-	spin_lock_init(&phba->devicelock);
-	INIT_LIST_HEAD(&phba->luns);
-
-	return 0;
-}
-
 /**
  * lpfc_setup_driver_resource_phase2 - Phase2 setup driver internal resources.
  * @phba: pointer to lpfc hba data structure.
@@ -5871,13 +6088,12 @@ static void
 lpfc_free_els_sgl_list(struct lpfc_hba *phba)
 {
 	LIST_HEAD(sglq_list);
-	struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_ELS_RING];
 
 	/* Retrieve all els sgls from driver list */
 	spin_lock_irq(&phba->hbalock);
-	spin_lock(&pring->ring_lock);
-	list_splice_init(&phba->sli4_hba.lpfc_sgl_list, &sglq_list);
-	spin_unlock(&pring->ring_lock);
+	spin_lock(&phba->sli4_hba.sgl_list_lock);
+	list_splice_init(&phba->sli4_hba.lpfc_els_sgl_list, &sglq_list);
+	spin_unlock(&phba->sli4_hba.sgl_list_lock);
 	spin_unlock_irq(&phba->hbalock);
 
 	/* Now free the sgl list */
@@ -5931,7 +6147,7 @@ static void
 lpfc_init_sgl_list(struct lpfc_hba *phba)
 {
 	/* Initialize and populate the sglq list per host/VF. */
-	INIT_LIST_HEAD(&phba->sli4_hba.lpfc_sgl_list);
+	INIT_LIST_HEAD(&phba->sli4_hba.lpfc_els_sgl_list);
 	INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_els_sgl_list);
 
 	/* els xri-sgl book keeping */
@@ -5939,6 +6155,9 @@ lpfc_init_sgl_list(struct lpfc_hba *phba)
 
 	/* scsi xri-buffer book keeping */
 	phba->sli4_hba.scsi_xri_cnt = 0;
+
+	/* nvme xri-buffer book keeping */
+	phba->sli4_hba.nvme_xri_cnt = 0;
 }
 
 /**
@@ -6169,9 +6388,9 @@ lpfc_hba_free(struct lpfc_hba *phba)
 	/* Release the driver assigned board number */
 	idr_remove(&lpfc_hba_index, phba->brd_no);
 
-	/* Free memory allocated with sli rings */
-	kfree(phba->sli.ring);
-	phba->sli.ring = NULL;
+	/* Free memory allocated with sli3 rings */
+	kfree(phba->sli.sli3_ring);
+	phba->sli.sli3_ring = NULL;
 
 	kfree(phba);
 	return;
@@ -6489,8 +6708,6 @@ lpfc_sli_pci_mem_setup(struct lpfc_hba *phba)
 
 	memset(phba->hbqslimp.virt, 0, lpfc_sli_hbq_size());
 
-	INIT_LIST_HEAD(&phba->rb_pend_list);
-
 	phba->MBslimaddr = phba->slim_memmap_p;
 	phba->HAregaddr = phba->ctrl_regs_memmap_p + HA_REG_OFFSET;
 	phba->CAregaddr = phba->ctrl_regs_memmap_p + CA_REG_OFFSET;
@@ -7200,11 +7417,11 @@ lpfc_setup_endian_order(struct lpfc_hba *phba)
 }
 
 /**
- * lpfc_sli4_queue_verify - Verify and update EQ and CQ counts
+ * lpfc_sli4_queue_verify - Verify and update EQ counts
  * @phba: pointer to lpfc hba data structure.
  *
- * This routine is invoked to check the user settable queue counts for EQs and
- * CQs. after this routine is called the counts will be set to valid values that
+ * This routine is invoked to check the user settable queue counts for EQs.
+ * After this routine is called the counts will be set to valid values that
  * adhere to the constraints of the system's interrupt vectors and the port's
  * queue resources.
  *
@@ -7215,9 +7432,7 @@ lpfc_setup_endian_order(struct lpfc_hba *phba)
 static int
 lpfc_sli4_queue_verify(struct lpfc_hba *phba)
 {
-	int cfg_fcp_io_channel;
-	uint32_t cpu;
-	uint32_t i = 0;
+	int io_channel;
 	int fof_vectors = phba->cfg_fof ? 1 : 0;
 
 	/*
@@ -7226,49 +7441,38 @@ lpfc_sli4_queue_verify(struct lpfc_hba *phba)
 	 */
 
 	/* Sanity check on HBA EQ parameters */
-	cfg_fcp_io_channel = phba->cfg_fcp_io_channel;
+	io_channel = phba->io_channel_irqs;
 
-	/* It doesn't make sense to have more io channels then online CPUs */
-	for_each_present_cpu(cpu) {
-		if (cpu_online(cpu))
-			i++;
-	}
-	phba->sli4_hba.num_online_cpu = i;
-	phba->sli4_hba.num_present_cpu = lpfc_present_cpu;
-	phba->sli4_hba.curr_disp_cpu = 0;
-
-	if (i < cfg_fcp_io_channel) {
+	if (phba->sli4_hba.num_online_cpu < io_channel) {
 		lpfc_printf_log(phba,
 				KERN_ERR, LOG_INIT,
 				"3188 Reducing IO channels to match number of "
 				"online CPUs: from %d to %d\n",
-				cfg_fcp_io_channel, i);
-		cfg_fcp_io_channel = i;
+				io_channel, phba->sli4_hba.num_online_cpu);
+		io_channel = phba->sli4_hba.num_online_cpu;
 	}
 
-	if (cfg_fcp_io_channel + fof_vectors >
-	    phba->sli4_hba.max_cfg_param.max_eq) {
-		if (phba->sli4_hba.max_cfg_param.max_eq <
-		    LPFC_FCP_IO_CHAN_MIN) {
-			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-					"2574 Not enough EQs (%d) from the "
-					"pci function for supporting FCP "
-					"EQs (%d)\n",
-					phba->sli4_hba.max_cfg_param.max_eq,
-					phba->cfg_fcp_io_channel);
-			goto out_error;
-		}
+	if (io_channel + fof_vectors > phba->sli4_hba.max_cfg_param.max_eq) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 				"2575 Reducing IO channels to match number of "
 				"available EQs: from %d to %d\n",
-				cfg_fcp_io_channel,
+				io_channel,
 				phba->sli4_hba.max_cfg_param.max_eq);
-		cfg_fcp_io_channel = phba->sli4_hba.max_cfg_param.max_eq -
-			fof_vectors;
+		io_channel = phba->sli4_hba.max_cfg_param.max_eq - fof_vectors;
 	}
 
-	/* The actual number of FCP event queues adopted */
-	phba->cfg_fcp_io_channel = cfg_fcp_io_channel;
+	/* The actual number of FCP / NVME event queues adopted */
+	if (io_channel != phba->io_channel_irqs)
+		phba->io_channel_irqs = io_channel;
+	if (phba->cfg_fcp_io_channel > io_channel)
+		phba->cfg_fcp_io_channel = io_channel;
+	if (phba->cfg_nvme_io_channel > io_channel)
+		phba->cfg_nvme_io_channel = io_channel;
+
+	lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+			"2574 IRQs: %d, IO Channels: fcp %d nvme %d\n",
+			phba->io_channel_irqs, phba->cfg_fcp_io_channel,
+			phba->cfg_nvme_io_channel);
 
 	/* Get EQ depth from module parameter, fake the default for now */
 	phba->sli4_hba.eq_esize = LPFC_EQE_SIZE_4B;
@@ -7277,10 +7481,67 @@ lpfc_sli4_queue_verify(struct lpfc_hba *phba)
 	/* Get CQ depth from module parameter, fake the default for now */
 	phba->sli4_hba.cq_esize = LPFC_CQE_SIZE;
 	phba->sli4_hba.cq_ecount = LPFC_CQE_DEF_COUNT;
+	return 0;
+}
+
+static int
+lpfc_alloc_nvme_wq_cq(struct lpfc_hba *phba, int wqidx)
+{
+	struct lpfc_queue *qdesc;
+	int cnt;
 
+	qdesc = lpfc_sli4_queue_alloc(phba, phba->sli4_hba.cq_esize,
+					    phba->sli4_hba.cq_ecount);
+	if (!qdesc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0508 Failed allocate fast-path NVME CQ (%d)\n",
+				wqidx);
+		return 1;
+	}
+	phba->sli4_hba.nvme_cq[wqidx] = qdesc;
+
+	cnt = LPFC_NVME_WQSIZE;
+	qdesc = lpfc_sli4_queue_alloc(phba, LPFC_WQE128_SIZE, cnt);
+	if (!qdesc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0509 Failed allocate fast-path NVME WQ (%d)\n",
+				wqidx);
+		return 1;
+	}
+	phba->sli4_hba.nvme_wq[wqidx] = qdesc;
+	list_add_tail(&qdesc->wq_list, &phba->sli4_hba.lpfc_wq_list);
+	return 0;
+}
+
+static int
+lpfc_alloc_fcp_wq_cq(struct lpfc_hba *phba, int wqidx)
+{
+	struct lpfc_queue *qdesc;
+	uint32_t wqesize;
+
+	/* Create Fast Path FCP CQs */
+	qdesc = lpfc_sli4_queue_alloc(phba, phba->sli4_hba.cq_esize,
+					phba->sli4_hba.cq_ecount);
+	if (!qdesc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+			"0499 Failed allocate fast-path FCP CQ (%d)\n", wqidx);
+		return 1;
+	}
+	phba->sli4_hba.fcp_cq[wqidx] = qdesc;
+
+	/* Create Fast Path FCP WQs */
+	wqesize = (phba->fcp_embed_io) ?
+				LPFC_WQE128_SIZE : phba->sli4_hba.wq_esize;
+	qdesc = lpfc_sli4_queue_alloc(phba, wqesize, phba->sli4_hba.wq_ecount);
+	if (!qdesc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0503 Failed allocate fast-path FCP WQ (%d)\n",
+				wqidx);
+		return 1;
+	}
+	phba->sli4_hba.fcp_wq[wqidx] = qdesc;
+	list_add_tail(&qdesc->wq_list, &phba->sli4_hba.lpfc_wq_list);
 	return 0;
-out_error:
-	return -ENOMEM;
 }
 
 /**
@@ -7301,13 +7562,14 @@ int
 lpfc_sli4_queue_create(struct lpfc_hba *phba)
 {
 	struct lpfc_queue *qdesc;
-	uint32_t wqesize;
-	int idx;
+	int idx, io_channel;
 
 	/*
 	 * Create HBA Record arrays.
+	 * Both NVME and FCP will share that same vectors / EQs
 	 */
-	if (!phba->cfg_fcp_io_channel)
+	io_channel = phba->io_channel_irqs;
+	if (!io_channel)
 		return -ERANGE;
 
 	phba->sli4_hba.mq_esize = LPFC_MQE_SIZE;
@@ -7316,9 +7578,14 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 	phba->sli4_hba.wq_ecount = LPFC_WQE_DEF_COUNT;
 	phba->sli4_hba.rq_esize = LPFC_RQE_SIZE;
 	phba->sli4_hba.rq_ecount = LPFC_RQE_DEF_COUNT;
+	phba->sli4_hba.eq_esize = LPFC_EQE_SIZE_4B;
+	phba->sli4_hba.eq_ecount = LPFC_EQE_DEF_COUNT;
+	phba->sli4_hba.cq_esize = LPFC_CQE_SIZE;
+	phba->sli4_hba.cq_ecount = LPFC_CQE_DEF_COUNT;
 
-	phba->sli4_hba.hba_eq =  kzalloc((sizeof(struct lpfc_queue *) *
-				phba->cfg_fcp_io_channel), GFP_KERNEL);
+	phba->sli4_hba.hba_eq =  kcalloc(io_channel,
+					sizeof(struct lpfc_queue *),
+					GFP_KERNEL);
 	if (!phba->sli4_hba.hba_eq) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 			"2576 Failed allocate memory for "
@@ -7326,44 +7593,83 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 		goto out_error;
 	}
 
-	phba->sli4_hba.fcp_cq = kzalloc((sizeof(struct lpfc_queue *) *
-				phba->cfg_fcp_io_channel), GFP_KERNEL);
-	if (!phba->sli4_hba.fcp_cq) {
-		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"2577 Failed allocate memory for fast-path "
-				"CQ record array\n");
-		goto out_error;
+	if (phba->cfg_fcp_io_channel) {
+		phba->sli4_hba.fcp_cq = kcalloc(phba->cfg_fcp_io_channel,
+						sizeof(struct lpfc_queue *),
+						GFP_KERNEL);
+		if (!phba->sli4_hba.fcp_cq) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"2577 Failed allocate memory for "
+					"fast-path CQ record array\n");
+			goto out_error;
+		}
+		phba->sli4_hba.fcp_wq = kcalloc(phba->cfg_fcp_io_channel,
+						sizeof(struct lpfc_queue *),
+						GFP_KERNEL);
+		if (!phba->sli4_hba.fcp_wq) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"2578 Failed allocate memory for "
+					"fast-path FCP WQ record array\n");
+			goto out_error;
+		}
+		/*
+		 * Since the first EQ can have multiple CQs associated with it,
+		 * this array is used to quickly see if we have a FCP fast-path
+		 * CQ match.
+		 */
+		phba->sli4_hba.fcp_cq_map = kcalloc(phba->cfg_fcp_io_channel,
+							sizeof(uint16_t),
+							GFP_KERNEL);
+		if (!phba->sli4_hba.fcp_cq_map) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"2545 Failed allocate memory for "
+					"fast-path CQ map\n");
+			goto out_error;
+		}
 	}
 
-	phba->sli4_hba.fcp_wq = kzalloc((sizeof(struct lpfc_queue *) *
-				phba->cfg_fcp_io_channel), GFP_KERNEL);
-	if (!phba->sli4_hba.fcp_wq) {
-		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"2578 Failed allocate memory for fast-path "
-				"WQ record array\n");
-		goto out_error;
-	}
+	if (phba->cfg_nvme_io_channel) {
+		phba->sli4_hba.nvme_cq = kcalloc(phba->cfg_nvme_io_channel,
+						sizeof(struct lpfc_queue *),
+						GFP_KERNEL);
+		if (!phba->sli4_hba.nvme_cq) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"6077 Failed allocate memory for "
+					"fast-path CQ record array\n");
+			goto out_error;
+		}
 
-	/*
-	 * Since the first EQ can have multiple CQs associated with it,
-	 * this array is used to quickly see if we have a FCP fast-path
-	 * CQ match.
-	 */
-	phba->sli4_hba.fcp_cq_map = kzalloc((sizeof(uint16_t) *
-					 phba->cfg_fcp_io_channel), GFP_KERNEL);
-	if (!phba->sli4_hba.fcp_cq_map) {
-		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"2545 Failed allocate memory for fast-path "
-				"CQ map\n");
-		goto out_error;
+
+		phba->sli4_hba.nvme_wq = kcalloc(phba->cfg_nvme_io_channel,
+						sizeof(struct lpfc_queue *),
+						GFP_KERNEL);
+		if (!phba->sli4_hba.nvme_wq) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"2581 Failed allocate memory for "
+					"fast-path NVME WQ record array\n");
+			goto out_error;
+		}
+
+		/*
+		 * Since the first EQ can have multiple CQs associated with it,
+		 * this array is used to quickly see if we have a NVME fast-path
+		 * CQ match.
+		 */
+		phba->sli4_hba.nvme_cq_map = kcalloc(phba->cfg_nvme_io_channel,
+							sizeof(uint16_t),
+							GFP_KERNEL);
+		if (!phba->sli4_hba.nvme_cq_map) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"6078 Failed allocate memory for "
+					"fast-path CQ map\n");
+			goto out_error;
+		}
 	}
 
-	/*
-	 * Create HBA Event Queues (EQs).  The cfg_fcp_io_channel specifies
-	 * how many EQs to create.
-	 */
-	for (idx = 0; idx < phba->cfg_fcp_io_channel; idx++) {
+	INIT_LIST_HEAD(&phba->sli4_hba.lpfc_wq_list);
 
+	/* Create HBA Event Queues (EQs) */
+	for (idx = 0; idx < io_channel; idx++) {
 		/* Create EQs */
 		qdesc = lpfc_sli4_queue_alloc(phba, phba->sli4_hba.eq_esize,
 					      phba->sli4_hba.eq_ecount);
@@ -7373,32 +7679,17 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 			goto out_error;
 		}
 		phba->sli4_hba.hba_eq[idx] = qdesc;
+	}
 
-		/* Create Fast Path FCP CQs */
-		qdesc = lpfc_sli4_queue_alloc(phba, phba->sli4_hba.cq_esize,
-					      phba->sli4_hba.cq_ecount);
-		if (!qdesc) {
-			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-					"0499 Failed allocate fast-path FCP "
-					"CQ (%d)\n", idx);
-			goto out_error;
-		}
-		phba->sli4_hba.fcp_cq[idx] = qdesc;
+	/* FCP and NVME io channels are not required to be balanced */
 
-		/* Create Fast Path FCP WQs */
-		wqesize = (phba->fcp_embed_io) ?
-				LPFC_WQE128_SIZE : phba->sli4_hba.wq_esize;
-		qdesc = lpfc_sli4_queue_alloc(phba, wqesize,
-						phba->sli4_hba.wq_ecount);
-		if (!qdesc) {
-			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-					"0503 Failed allocate fast-path FCP "
-					"WQ (%d)\n", idx);
+	for (idx = 0; idx < phba->cfg_fcp_io_channel; idx++)
+		if (lpfc_alloc_fcp_wq_cq(phba, idx))
 			goto out_error;
-		}
-		phba->sli4_hba.fcp_wq[idx] = qdesc;
-	}
 
+	for (idx = 0; idx < phba->cfg_nvme_io_channel; idx++)
+		if (lpfc_alloc_nvme_wq_cq(phba, idx))
+			goto out_error;
 
 	/*
 	 * Create Slow Path Completion Queues (CQs)
@@ -7453,6 +7744,30 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 		goto out_error;
 	}
 	phba->sli4_hba.els_wq = qdesc;
+	list_add_tail(&qdesc->wq_list, &phba->sli4_hba.lpfc_wq_list);
+
+	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
+		/* Create NVME LS Complete Queue */
+		qdesc = lpfc_sli4_queue_alloc(phba, phba->sli4_hba.cq_esize,
+					      phba->sli4_hba.cq_ecount);
+		if (!qdesc) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"6079 Failed allocate NVME LS CQ\n");
+			goto out_error;
+		}
+		phba->sli4_hba.nvmels_cq = qdesc;
+
+		/* Create NVME LS Work Queue */
+		qdesc = lpfc_sli4_queue_alloc(phba, phba->sli4_hba.wq_esize,
+					      phba->sli4_hba.wq_ecount);
+		if (!qdesc) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"6080 Failed allocate NVME LS WQ\n");
+			goto out_error;
+		}
+		phba->sli4_hba.nvmels_wq = qdesc;
+		list_add_tail(&qdesc->wq_list, &phba->sli4_hba.lpfc_wq_list);
+	}
 
 	/*
 	 * Create Receive Queue (RQ)
@@ -7488,6 +7803,39 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 	return -ENOMEM;
 }
 
+static inline void
+__lpfc_sli4_release_queue(struct lpfc_queue **qp)
+{
+	if (*qp != NULL) {
+		lpfc_sli4_queue_free(*qp);
+		*qp = NULL;
+	}
+}
+
+static inline void
+lpfc_sli4_release_queues(struct lpfc_queue ***qs, int max)
+{
+	int idx;
+
+	if (*qs == NULL)
+		return;
+
+	for (idx = 0; idx < max; idx++)
+		__lpfc_sli4_release_queue(&(*qs)[idx]);
+
+	kfree(*qs);
+	*qs = NULL;
+}
+
+static inline void
+lpfc_sli4_release_queue_map(uint16_t **qmap)
+{
+	if (*qmap != NULL) {
+		kfree(*qmap);
+		*qmap = NULL;
+	}
+}
+
 /**
  * lpfc_sli4_queue_destroy - Destroy all the SLI4 queues
  * @phba: pointer to lpfc hba data structure.
@@ -7503,91 +7851,188 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 void
 lpfc_sli4_queue_destroy(struct lpfc_hba *phba)
 {
-	int idx;
-
 	if (phba->cfg_fof)
 		lpfc_fof_queue_destroy(phba);
 
-	if (phba->sli4_hba.hba_eq != NULL) {
-		/* Release HBA event queue */
-		for (idx = 0; idx < phba->cfg_fcp_io_channel; idx++) {
-			if (phba->sli4_hba.hba_eq[idx] != NULL) {
-				lpfc_sli4_queue_free(
-					phba->sli4_hba.hba_eq[idx]);
-				phba->sli4_hba.hba_eq[idx] = NULL;
-			}
+	/* Release HBA eqs */
+	lpfc_sli4_release_queues(&phba->sli4_hba.hba_eq, phba->io_channel_irqs);
+
+	/* Release FCP cqs */
+	lpfc_sli4_release_queues(&phba->sli4_hba.fcp_cq,
+					phba->cfg_fcp_io_channel);
+
+	/* Release FCP wqs */
+	lpfc_sli4_release_queues(&phba->sli4_hba.fcp_wq,
+					phba->cfg_fcp_io_channel);
+
+	/* Release FCP CQ mapping array */
+	lpfc_sli4_release_queue_map(&phba->sli4_hba.fcp_cq_map);
+
+	/* Release NVME cqs */
+	lpfc_sli4_release_queues(&phba->sli4_hba.nvme_cq,
+					phba->cfg_nvme_io_channel);
+
+	/* Release NVME wqs */
+	lpfc_sli4_release_queues(&phba->sli4_hba.nvme_wq,
+					phba->cfg_nvme_io_channel);
+
+	/* Release NVME CQ mapping array */
+	lpfc_sli4_release_queue_map(&phba->sli4_hba.nvme_cq_map);
+
+	/* Release mailbox command work queue */
+	__lpfc_sli4_release_queue(&phba->sli4_hba.mbx_wq);
+
+	/* Release ELS work queue */
+	__lpfc_sli4_release_queue(&phba->sli4_hba.els_wq);
+
+	/* Release ELS work queue */
+	__lpfc_sli4_release_queue(&phba->sli4_hba.nvmels_wq);
+
+	/* Release unsolicited receive queue */
+	__lpfc_sli4_release_queue(&phba->sli4_hba.hdr_rq);
+	__lpfc_sli4_release_queue(&phba->sli4_hba.dat_rq);
+
+	/* Release ELS complete queue */
+	__lpfc_sli4_release_queue(&phba->sli4_hba.els_cq);
+
+	/* Release NVME LS complete queue */
+	__lpfc_sli4_release_queue(&phba->sli4_hba.nvmels_cq);
+
+	/* Release mailbox command complete queue */
+	__lpfc_sli4_release_queue(&phba->sli4_hba.mbx_cq);
+
+	/* Everything on this list has been freed */
+	INIT_LIST_HEAD(&phba->sli4_hba.lpfc_wq_list);
+}
+
+int
+lpfc_post_rq_buffer(struct lpfc_hba *phba, struct lpfc_queue *hrq,
+		    struct lpfc_queue *drq, int count)
+{
+	int rc, i;
+	struct lpfc_rqe hrqe;
+	struct lpfc_rqe drqe;
+	struct lpfc_rqb *rqbp;
+	struct rqb_dmabuf *rqb_buffer;
+	LIST_HEAD(rqb_buf_list);
+
+	rqbp = hrq->rqbp;
+	for (i = 0; i < count; i++) {
+		rqb_buffer = (rqbp->rqb_alloc_buffer)(phba);
+		if (!rqb_buffer)
+			break;
+		rqb_buffer->hrq = hrq;
+		rqb_buffer->drq = drq;
+		list_add_tail(&rqb_buffer->hbuf.list, &rqb_buf_list);
+	}
+	while (!list_empty(&rqb_buf_list)) {
+		list_remove_head(&rqb_buf_list, rqb_buffer, struct rqb_dmabuf,
+				 hbuf.list);
+
+		hrqe.address_lo = putPaddrLow(rqb_buffer->hbuf.phys);
+		hrqe.address_hi = putPaddrHigh(rqb_buffer->hbuf.phys);
+		drqe.address_lo = putPaddrLow(rqb_buffer->dbuf.phys);
+		drqe.address_hi = putPaddrHigh(rqb_buffer->dbuf.phys);
+		rc = lpfc_sli4_rq_put(hrq, drq, &hrqe, &drqe);
+		if (rc < 0) {
+			(rqbp->rqb_free_buffer)(phba, rqb_buffer);
+		} else {
+			list_add_tail(&rqb_buffer->hbuf.list,
+				      &rqbp->rqb_buffer_list);
+			rqbp->buffer_count++;
 		}
-		kfree(phba->sli4_hba.hba_eq);
-		phba->sli4_hba.hba_eq = NULL;
 	}
+	return 1;
+}
 
-	if (phba->sli4_hba.fcp_cq != NULL) {
-		/* Release FCP completion queue */
-		for (idx = 0; idx < phba->cfg_fcp_io_channel; idx++) {
-			if (phba->sli4_hba.fcp_cq[idx] != NULL) {
-				lpfc_sli4_queue_free(
-					phba->sli4_hba.fcp_cq[idx]);
-				phba->sli4_hba.fcp_cq[idx] = NULL;
-			}
-		}
-		kfree(phba->sli4_hba.fcp_cq);
-		phba->sli4_hba.fcp_cq = NULL;
+int
+lpfc_free_rq_buffer(struct lpfc_hba *phba, struct lpfc_queue *rq)
+{
+	struct lpfc_rqb *rqbp;
+	struct lpfc_dmabuf *h_buf;
+	struct rqb_dmabuf *rqb_buffer;
+
+	rqbp = rq->rqbp;
+	while (!list_empty(&rqbp->rqb_buffer_list)) {
+		list_remove_head(&rqbp->rqb_buffer_list, h_buf,
+				 struct lpfc_dmabuf, list);
+
+		rqb_buffer = container_of(h_buf, struct rqb_dmabuf, hbuf);
+		(rqbp->rqb_free_buffer)(phba, rqb_buffer);
+		rqbp->buffer_count--;
 	}
+	return 1;
+}
 
-	if (phba->sli4_hba.fcp_wq != NULL) {
-		/* Release FCP work queue */
-		for (idx = 0; idx < phba->cfg_fcp_io_channel; idx++) {
-			if (phba->sli4_hba.fcp_wq[idx] != NULL) {
-				lpfc_sli4_queue_free(
-					phba->sli4_hba.fcp_wq[idx]);
-				phba->sli4_hba.fcp_wq[idx] = NULL;
-			}
-		}
-		kfree(phba->sli4_hba.fcp_wq);
-		phba->sli4_hba.fcp_wq = NULL;
+static int
+lpfc_create_wq_cq(struct lpfc_hba *phba, struct lpfc_queue *eq,
+	struct lpfc_queue *cq, struct lpfc_queue *wq, uint16_t *cq_map,
+	int qidx, uint32_t qtype)
+{
+	struct lpfc_sli_ring *pring;
+	int rc;
+
+	if (!eq || !cq || !wq) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+			"6085 Fast-path %s (%d) not allocated\n",
+			((eq) ? ((cq) ? "WQ" : "CQ") : "EQ"), qidx);
+		return -ENOMEM;
+	}
+
+	/* create the Cq first */
+	rc = lpfc_cq_create(phba, cq, eq,
+			(qtype == LPFC_MBOX) ? LPFC_MCQ : LPFC_WCQ, qtype);
+	if (rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+			"6086 Failed setup of CQ (%d), rc = 0x%x\n",
+			qidx, (uint32_t)rc);
+		return rc;
 	}
 
-	/* Release FCP CQ mapping array */
-	if (phba->sli4_hba.fcp_cq_map != NULL) {
-		kfree(phba->sli4_hba.fcp_cq_map);
-		phba->sli4_hba.fcp_cq_map = NULL;
-	}
+	if (qtype != LPFC_MBOX) {
+		/* Setup nvme_cq_map for fast lookup */
+		if (cq_map)
+			*cq_map = cq->queue_id;
 
-	/* Release mailbox command work queue */
-	if (phba->sli4_hba.mbx_wq != NULL) {
-		lpfc_sli4_queue_free(phba->sli4_hba.mbx_wq);
-		phba->sli4_hba.mbx_wq = NULL;
-	}
+		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+			"6087 CQ setup: cq[%d]-id=%d, parent eq[%d]-id=%d\n",
+			qidx, cq->queue_id, qidx, eq->queue_id);
 
-	/* Release ELS work queue */
-	if (phba->sli4_hba.els_wq != NULL) {
-		lpfc_sli4_queue_free(phba->sli4_hba.els_wq);
-		phba->sli4_hba.els_wq = NULL;
-	}
+		/* create the wq */
+		rc = lpfc_wq_create(phba, wq, cq, qtype);
+		if (rc) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"6123 Fail setup fastpath WQ (%d), rc = 0x%x\n",
+				qidx, (uint32_t)rc);
+			/* no need to tear down cq - caller will do so */
+			return rc;
+		}
 
-	/* Release unsolicited receive queue */
-	if (phba->sli4_hba.hdr_rq != NULL) {
-		lpfc_sli4_queue_free(phba->sli4_hba.hdr_rq);
-		phba->sli4_hba.hdr_rq = NULL;
-	}
-	if (phba->sli4_hba.dat_rq != NULL) {
-		lpfc_sli4_queue_free(phba->sli4_hba.dat_rq);
-		phba->sli4_hba.dat_rq = NULL;
-	}
+		/* Bind this CQ/WQ to the NVME ring */
+		pring = wq->pring;
+		pring->sli.sli4.wqp = (void *)wq;
+		cq->pring = pring;
 
-	/* Release ELS complete queue */
-	if (phba->sli4_hba.els_cq != NULL) {
-		lpfc_sli4_queue_free(phba->sli4_hba.els_cq);
-		phba->sli4_hba.els_cq = NULL;
-	}
+		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+			"2593 WQ setup: wq[%d]-id=%d assoc=%d, cq[%d]-id=%d\n",
+			qidx, wq->queue_id, wq->assoc_qid, qidx, cq->queue_id);
+	} else {
+		rc = lpfc_mq_create(phba, wq, cq, LPFC_MBOX);
+		if (rc) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0539 Failed setup of slow-path MQ: "
+				"rc = 0x%x\n", rc);
+			/* no need to tear down cq - caller will do so */
+			return rc;
+		}
 
-	/* Release mailbox command complete queue */
-	if (phba->sli4_hba.mbx_cq != NULL) {
-		lpfc_sli4_queue_free(phba->sli4_hba.mbx_cq);
-		phba->sli4_hba.mbx_cq = NULL;
+		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+			"2589 MBX MQ setup: wq-id=%d, parent cq-id=%d\n",
+			phba->sli4_hba.mbx_wq->queue_id,
+			phba->sli4_hba.mbx_cq->queue_id);
 	}
 
-	return;
+	return 0;
 }
 
 /**
@@ -7605,15 +8050,12 @@ lpfc_sli4_queue_destroy(struct lpfc_hba *phba)
 int
 lpfc_sli4_queue_setup(struct lpfc_hba *phba)
 {
-	struct lpfc_sli *psli = &phba->sli;
-	struct lpfc_sli_ring *pring;
-	int rc = -ENOMEM;
-	int fcp_eqidx, fcp_cqidx, fcp_wqidx;
-	int fcp_cq_index = 0;
 	uint32_t shdr_status, shdr_add_status;
 	union lpfc_sli4_cfg_shdr *shdr;
 	LPFC_MBOXQ_t *mboxq;
-	uint32_t length;
+	int qidx;
+	uint32_t length, io_channel;
+	int rc = -ENOMEM;
 
 	/* Check for dual-ULP support */
 	mboxq = (LPFC_MBOXQ_t *)mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
@@ -7663,220 +8105,173 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba)
 	/*
 	 * Set up HBA Event Queues (EQs)
 	 */
+	io_channel = phba->io_channel_irqs;
 
 	/* Set up HBA event queue */
-	if (phba->cfg_fcp_io_channel && !phba->sli4_hba.hba_eq) {
+	if (io_channel && !phba->sli4_hba.hba_eq) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 				"3147 Fast-path EQs not allocated\n");
 		rc = -ENOMEM;
 		goto out_error;
 	}
-	for (fcp_eqidx = 0; fcp_eqidx < phba->cfg_fcp_io_channel; fcp_eqidx++) {
-		if (!phba->sli4_hba.hba_eq[fcp_eqidx]) {
+	for (qidx = 0; qidx < io_channel; qidx++) {
+		if (!phba->sli4_hba.hba_eq[qidx]) {
 			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 					"0522 Fast-path EQ (%d) not "
-					"allocated\n", fcp_eqidx);
+					"allocated\n", qidx);
 			rc = -ENOMEM;
-			goto out_destroy_hba_eq;
+			goto out_destroy;
 		}
-		rc = lpfc_eq_create(phba, phba->sli4_hba.hba_eq[fcp_eqidx],
-			 (phba->cfg_fcp_imax / phba->cfg_fcp_io_channel));
+		rc = lpfc_eq_create(phba, phba->sli4_hba.hba_eq[qidx],
+						phba->cfg_fcp_imax);
 		if (rc) {
 			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 					"0523 Failed setup of fast-path EQ "
-					"(%d), rc = 0x%x\n", fcp_eqidx,
+					"(%d), rc = 0x%x\n", qidx,
 					(uint32_t)rc);
-			goto out_destroy_hba_eq;
+			goto out_destroy;
 		}
 		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-				"2584 HBA EQ setup: "
-				"queue[%d]-id=%d\n", fcp_eqidx,
-				phba->sli4_hba.hba_eq[fcp_eqidx]->queue_id);
-	}
-
-	/* Set up fast-path FCP Response Complete Queue */
-	if (!phba->sli4_hba.fcp_cq) {
-		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"3148 Fast-path FCP CQ array not "
-				"allocated\n");
-		rc = -ENOMEM;
-		goto out_destroy_hba_eq;
+				"2584 HBA EQ setup: queue[%d]-id=%d\n",
+				qidx, phba->sli4_hba.hba_eq[qidx]->queue_id);
 	}
 
-	for (fcp_cqidx = 0; fcp_cqidx < phba->cfg_fcp_io_channel; fcp_cqidx++) {
-		if (!phba->sli4_hba.fcp_cq[fcp_cqidx]) {
+	if (phba->cfg_nvme_io_channel) {
+		if (!phba->sli4_hba.nvme_cq || !phba->sli4_hba.nvme_wq) {
 			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-					"0526 Fast-path FCP CQ (%d) not "
-					"allocated\n", fcp_cqidx);
+				"6084 Fast-path NVME %s array not allocated\n",
+				(phba->sli4_hba.nvme_cq) ? "CQ" : "WQ");
 			rc = -ENOMEM;
-			goto out_destroy_fcp_cq;
-		}
-		rc = lpfc_cq_create(phba, phba->sli4_hba.fcp_cq[fcp_cqidx],
-			phba->sli4_hba.hba_eq[fcp_cqidx], LPFC_WCQ, LPFC_FCP);
-		if (rc) {
-			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-					"0527 Failed setup of fast-path FCP "
-					"CQ (%d), rc = 0x%x\n", fcp_cqidx,
-					(uint32_t)rc);
-			goto out_destroy_fcp_cq;
+			goto out_destroy;
 		}
 
-		/* Setup fcp_cq_map for fast lookup */
-		phba->sli4_hba.fcp_cq_map[fcp_cqidx] =
-				phba->sli4_hba.fcp_cq[fcp_cqidx]->queue_id;
-
-		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-				"2588 FCP CQ setup: cq[%d]-id=%d, "
-				"parent seq[%d]-id=%d\n",
-				fcp_cqidx,
-				phba->sli4_hba.fcp_cq[fcp_cqidx]->queue_id,
-				fcp_cqidx,
-				phba->sli4_hba.hba_eq[fcp_cqidx]->queue_id);
-	}
-
-	/* Set up fast-path FCP Work Queue */
-	if (!phba->sli4_hba.fcp_wq) {
-		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"3149 Fast-path FCP WQ array not "
-				"allocated\n");
-		rc = -ENOMEM;
-		goto out_destroy_fcp_cq;
+		for (qidx = 0; qidx < phba->cfg_nvme_io_channel; qidx++) {
+			rc = lpfc_create_wq_cq(phba,
+					phba->sli4_hba.hba_eq[
+						qidx % io_channel],
+					phba->sli4_hba.nvme_cq[qidx],
+					phba->sli4_hba.nvme_wq[qidx],
+					&phba->sli4_hba.nvme_cq_map[qidx],
+					qidx, LPFC_NVME);
+			if (rc) {
+				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"6123 Failed to setup fastpath "
+					"NVME WQ/CQ (%d), rc = 0x%x\n",
+					qidx, (uint32_t)rc);
+				goto out_destroy;
+			}
+		}
 	}
 
-	for (fcp_wqidx = 0; fcp_wqidx < phba->cfg_fcp_io_channel; fcp_wqidx++) {
-		if (!phba->sli4_hba.fcp_wq[fcp_wqidx]) {
+	if (phba->cfg_fcp_io_channel) {
+		/* Set up fast-path FCP Response Complete Queue */
+		if (!phba->sli4_hba.fcp_cq || !phba->sli4_hba.fcp_wq) {
 			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-					"0534 Fast-path FCP WQ (%d) not "
-					"allocated\n", fcp_wqidx);
+				"3148 Fast-path FCP %s array not allocated\n",
+				phba->sli4_hba.fcp_cq ? "WQ" : "CQ");
 			rc = -ENOMEM;
-			goto out_destroy_fcp_wq;
-		}
-		rc = lpfc_wq_create(phba, phba->sli4_hba.fcp_wq[fcp_wqidx],
-				    phba->sli4_hba.fcp_cq[fcp_wqidx],
-				    LPFC_FCP);
-		if (rc) {
-			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-					"0535 Failed setup of fast-path FCP "
-					"WQ (%d), rc = 0x%x\n", fcp_wqidx,
-					(uint32_t)rc);
-			goto out_destroy_fcp_wq;
+			goto out_destroy;
 		}
 
-		/* Bind this WQ to the next FCP ring */
-		pring = &psli->ring[MAX_SLI3_CONFIGURED_RINGS + fcp_wqidx];
-		pring->sli.sli4.wqp = (void *)phba->sli4_hba.fcp_wq[fcp_wqidx];
-		phba->sli4_hba.fcp_cq[fcp_wqidx]->pring = pring;
-
-		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-				"2591 FCP WQ setup: wq[%d]-id=%d, "
-				"parent cq[%d]-id=%d\n",
-				fcp_wqidx,
-				phba->sli4_hba.fcp_wq[fcp_wqidx]->queue_id,
-				fcp_cq_index,
-				phba->sli4_hba.fcp_cq[fcp_wqidx]->queue_id);
+		for (qidx = 0; qidx < phba->cfg_fcp_io_channel; qidx++) {
+			rc = lpfc_create_wq_cq(phba,
+					phba->sli4_hba.hba_eq[
+						qidx % io_channel],
+					phba->sli4_hba.fcp_cq[qidx],
+					phba->sli4_hba.fcp_wq[qidx],
+					&phba->sli4_hba.fcp_cq_map[qidx],
+					qidx, LPFC_FCP);
+			if (rc) {
+				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"0535 Failed to setup fastpath "
+					"FCP WQ/CQ (%d), rc = 0x%x\n",
+					qidx, (uint32_t)rc);
+				goto out_destroy;
+			}
+		}
 	}
+
 	/*
-	 * Set up Complete Queues (CQs)
+	 * Set up Slow Path Complete Queues (CQs)
 	 */
 
-	/* Set up slow-path MBOX Complete Queue as the first CQ */
-	if (!phba->sli4_hba.mbx_cq) {
-		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"0528 Mailbox CQ not allocated\n");
-		rc = -ENOMEM;
-		goto out_destroy_fcp_wq;
-	}
-	rc = lpfc_cq_create(phba, phba->sli4_hba.mbx_cq,
-			phba->sli4_hba.hba_eq[0], LPFC_MCQ, LPFC_MBOX);
-	if (rc) {
-		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"0529 Failed setup of slow-path mailbox CQ: "
-				"rc = 0x%x\n", (uint32_t)rc);
-		goto out_destroy_fcp_wq;
-	}
-	lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-			"2585 MBX CQ setup: cq-id=%d, parent eq-id=%d\n",
-			phba->sli4_hba.mbx_cq->queue_id,
-			phba->sli4_hba.hba_eq[0]->queue_id);
+	/* Set up slow-path MBOX CQ/MQ */
 
-	/* Set up slow-path ELS Complete Queue */
-	if (!phba->sli4_hba.els_cq) {
+	if (!phba->sli4_hba.mbx_cq || !phba->sli4_hba.mbx_wq) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"0530 ELS CQ not allocated\n");
+				"0528 %s not allocated\n",
+				phba->sli4_hba.mbx_cq ?
+						"Mailbox WQ" : "Mailbox CQ");
 		rc = -ENOMEM;
-		goto out_destroy_mbx_cq;
+		goto out_destroy;
 	}
-	rc = lpfc_cq_create(phba, phba->sli4_hba.els_cq,
-			phba->sli4_hba.hba_eq[0], LPFC_WCQ, LPFC_ELS);
-	if (rc) {
-		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"0531 Failed setup of slow-path ELS CQ: "
-				"rc = 0x%x\n", (uint32_t)rc);
-		goto out_destroy_mbx_cq;
-	}
-	lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-			"2586 ELS CQ setup: cq-id=%d, parent eq-id=%d\n",
-			phba->sli4_hba.els_cq->queue_id,
-			phba->sli4_hba.hba_eq[0]->queue_id);
-
-	/*
-	 * Set up all the Work Queues (WQs)
-	 */
 
-	/* Set up Mailbox Command Queue */
-	if (!phba->sli4_hba.mbx_wq) {
-		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"0538 Slow-path MQ not allocated\n");
-		rc = -ENOMEM;
-		goto out_destroy_els_cq;
-	}
-	rc = lpfc_mq_create(phba, phba->sli4_hba.mbx_wq,
-			    phba->sli4_hba.mbx_cq, LPFC_MBOX);
+	rc = lpfc_create_wq_cq(phba, phba->sli4_hba.hba_eq[0],
+					phba->sli4_hba.mbx_cq,
+					phba->sli4_hba.mbx_wq,
+					NULL, 0, LPFC_MBOX);
 	if (rc) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"0539 Failed setup of slow-path MQ: "
-				"rc = 0x%x\n", rc);
-		goto out_destroy_els_cq;
+			"0529 Failed setup of mailbox WQ/CQ: rc = 0x%x\n",
+			(uint32_t)rc);
+		goto out_destroy;
 	}
-	lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-			"2589 MBX MQ setup: wq-id=%d, parent cq-id=%d\n",
-			phba->sli4_hba.mbx_wq->queue_id,
-			phba->sli4_hba.mbx_cq->queue_id);
 
-	/* Set up slow-path ELS Work Queue */
-	if (!phba->sli4_hba.els_wq) {
+	/* Set up slow-path ELS WQ/CQ */
+	if (!phba->sli4_hba.els_cq || !phba->sli4_hba.els_wq) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"0536 Slow-path ELS WQ not allocated\n");
+				"0530 ELS %s not allocated\n",
+				phba->sli4_hba.els_cq ? "WQ" : "CQ");
 		rc = -ENOMEM;
-		goto out_destroy_mbx_wq;
+		goto out_destroy;
 	}
-	rc = lpfc_wq_create(phba, phba->sli4_hba.els_wq,
-			    phba->sli4_hba.els_cq, LPFC_ELS);
+	rc = lpfc_create_wq_cq(phba, phba->sli4_hba.hba_eq[0],
+					phba->sli4_hba.els_cq,
+					phba->sli4_hba.els_wq,
+					NULL, 0, LPFC_ELS);
 	if (rc) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"0537 Failed setup of slow-path ELS WQ: "
-				"rc = 0x%x\n", (uint32_t)rc);
-		goto out_destroy_mbx_wq;
+			"0529 Failed setup of ELS WQ/CQ: rc = 0x%x\n",
+			(uint32_t)rc);
+		goto out_destroy;
 	}
-
-	/* Bind this WQ to the ELS ring */
-	pring = &psli->ring[LPFC_ELS_RING];
-	pring->sli.sli4.wqp = (void *)phba->sli4_hba.els_wq;
-	phba->sli4_hba.els_cq->pring = pring;
-
 	lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
 			"2590 ELS WQ setup: wq-id=%d, parent cq-id=%d\n",
 			phba->sli4_hba.els_wq->queue_id,
 			phba->sli4_hba.els_cq->queue_id);
 
-	/*
-	 * Create Receive Queue (RQ)
-	 */
+	if (phba->cfg_nvme_io_channel) {
+		/* Set up NVME LS Complete Queue */
+		if (!phba->sli4_hba.nvmels_cq || !phba->sli4_hba.nvmels_wq) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"6091 LS %s not allocated\n",
+					phba->sli4_hba.nvmels_cq ? "WQ" : "CQ");
+			rc = -ENOMEM;
+			goto out_destroy;
+		}
+		rc = lpfc_create_wq_cq(phba, phba->sli4_hba.hba_eq[0],
+					phba->sli4_hba.nvmels_cq,
+					phba->sli4_hba.nvmels_wq,
+					NULL, 0, LPFC_NVME_LS);
+		if (rc) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0529 Failed setup of NVVME LS WQ/CQ: "
+				"rc = 0x%x\n", (uint32_t)rc);
+			goto out_destroy;
+		}
+
+		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+				"6096 ELS WQ setup: wq-id=%d, "
+				"parent cq-id=%d\n",
+				phba->sli4_hba.nvmels_wq->queue_id,
+				phba->sli4_hba.nvmels_cq->queue_id);
+	}
+
 	if (!phba->sli4_hba.hdr_rq || !phba->sli4_hba.dat_rq) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 				"0540 Receive Queue not allocated\n");
 		rc = -ENOMEM;
-		goto out_destroy_els_wq;
+		goto out_destroy;
 	}
 
 	lpfc_rq_adjust_repost(phba, phba->sli4_hba.hdr_rq, LPFC_ELS_HBQ);
@@ -7888,7 +8283,7 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba)
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 				"0541 Failed setup of Receive Queue: "
 				"rc = 0x%x\n", (uint32_t)rc);
-		goto out_destroy_fcp_wq;
+		goto out_destroy;
 	}
 
 	lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
@@ -7904,7 +8299,7 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba)
 			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 					"0549 Failed setup of FOF Queues: "
 					"rc = 0x%x\n", rc);
-			goto out_destroy_els_rq;
+			goto out_destroy;
 		}
 	}
 
@@ -7912,30 +8307,12 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba)
 	 * Configure EQ delay multipier for interrupt coalescing using
 	 * MODIFY_EQ_DELAY for all EQs created, LPFC_MAX_EQ_DELAY at a time.
 	 */
-	for (fcp_eqidx = 0; fcp_eqidx < phba->cfg_fcp_io_channel;
-			fcp_eqidx += LPFC_MAX_EQ_DELAY)
-		lpfc_modify_fcp_eq_delay(phba, fcp_eqidx);
+	for (qidx = 0; qidx < io_channel; qidx += LPFC_MAX_EQ_DELAY)
+		lpfc_modify_hba_eq_delay(phba, qidx);
 	return 0;
 
-out_destroy_els_rq:
-	lpfc_rq_destroy(phba, phba->sli4_hba.hdr_rq, phba->sli4_hba.dat_rq);
-out_destroy_els_wq:
-	lpfc_wq_destroy(phba, phba->sli4_hba.els_wq);
-out_destroy_mbx_wq:
-	lpfc_mq_destroy(phba, phba->sli4_hba.mbx_wq);
-out_destroy_els_cq:
-	lpfc_cq_destroy(phba, phba->sli4_hba.els_cq);
-out_destroy_mbx_cq:
-	lpfc_cq_destroy(phba, phba->sli4_hba.mbx_cq);
-out_destroy_fcp_wq:
-	for (--fcp_wqidx; fcp_wqidx >= 0; fcp_wqidx--)
-		lpfc_wq_destroy(phba, phba->sli4_hba.fcp_wq[fcp_wqidx]);
-out_destroy_fcp_cq:
-	for (--fcp_cqidx; fcp_cqidx >= 0; fcp_cqidx--)
-		lpfc_cq_destroy(phba, phba->sli4_hba.fcp_cq[fcp_cqidx]);
-out_destroy_hba_eq:
-	for (--fcp_eqidx; fcp_eqidx >= 0; fcp_eqidx--)
-		lpfc_eq_destroy(phba, phba->sli4_hba.hba_eq[fcp_eqidx]);
+out_destroy:
+	lpfc_sli4_queue_unset(phba);
 out_error:
 	return rc;
 }
@@ -7955,39 +8332,66 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba)
 void
 lpfc_sli4_queue_unset(struct lpfc_hba *phba)
 {
-	int fcp_qidx;
+	int qidx;
 
 	/* Unset the queues created for Flash Optimized Fabric operations */
 	if (phba->cfg_fof)
 		lpfc_fof_queue_destroy(phba);
+
 	/* Unset mailbox command work queue */
-	lpfc_mq_destroy(phba, phba->sli4_hba.mbx_wq);
+	if (phba->sli4_hba.mbx_wq)
+		lpfc_mq_destroy(phba, phba->sli4_hba.mbx_wq);
+
+	/* Unset NVME LS work queue */
+	if (phba->sli4_hba.nvmels_wq)
+		lpfc_wq_destroy(phba, phba->sli4_hba.nvmels_wq);
+
 	/* Unset ELS work queue */
-	lpfc_wq_destroy(phba, phba->sli4_hba.els_wq);
+	if (phba->sli4_hba.els_cq)
+		lpfc_wq_destroy(phba, phba->sli4_hba.els_wq);
+
 	/* Unset unsolicited receive queue */
-	lpfc_rq_destroy(phba, phba->sli4_hba.hdr_rq, phba->sli4_hba.dat_rq);
+	if (phba->sli4_hba.hdr_rq)
+		lpfc_rq_destroy(phba, phba->sli4_hba.hdr_rq,
+				phba->sli4_hba.dat_rq);
+
 	/* Unset FCP work queue */
-	if (phba->sli4_hba.fcp_wq) {
-		for (fcp_qidx = 0; fcp_qidx < phba->cfg_fcp_io_channel;
-		     fcp_qidx++)
-			lpfc_wq_destroy(phba, phba->sli4_hba.fcp_wq[fcp_qidx]);
+	if (phba->sli4_hba.fcp_wq)
+		for (qidx = 0; qidx < phba->cfg_fcp_io_channel; qidx++)
+			lpfc_wq_destroy(phba, phba->sli4_hba.fcp_wq[qidx]);
+
+	/* Unset NVME work queue */
+	if (phba->sli4_hba.nvme_wq) {
+		for (qidx = 0; qidx < phba->cfg_nvme_io_channel; qidx++)
+			lpfc_wq_destroy(phba, phba->sli4_hba.nvme_wq[qidx]);
 	}
+
 	/* Unset mailbox command complete queue */
-	lpfc_cq_destroy(phba, phba->sli4_hba.mbx_cq);
+	if (phba->sli4_hba.mbx_cq)
+		lpfc_cq_destroy(phba, phba->sli4_hba.mbx_cq);
+
 	/* Unset ELS complete queue */
-	lpfc_cq_destroy(phba, phba->sli4_hba.els_cq);
+	if (phba->sli4_hba.els_cq)
+		lpfc_cq_destroy(phba, phba->sli4_hba.els_cq);
+
+	/* Unset NVME LS complete queue */
+	if (phba->sli4_hba.nvmels_cq)
+		lpfc_cq_destroy(phba, phba->sli4_hba.nvmels_cq);
+
+	/* Unset NVME response complete queue */
+	if (phba->sli4_hba.nvme_cq)
+		for (qidx = 0; qidx < phba->cfg_nvme_io_channel; qidx++)
+			lpfc_cq_destroy(phba, phba->sli4_hba.nvme_cq[qidx]);
+
 	/* Unset FCP response complete queue */
-	if (phba->sli4_hba.fcp_cq) {
-		for (fcp_qidx = 0; fcp_qidx < phba->cfg_fcp_io_channel;
-		     fcp_qidx++)
-			lpfc_cq_destroy(phba, phba->sli4_hba.fcp_cq[fcp_qidx]);
-	}
+	if (phba->sli4_hba.fcp_cq)
+		for (qidx = 0; qidx < phba->cfg_fcp_io_channel; qidx++)
+			lpfc_cq_destroy(phba, phba->sli4_hba.fcp_cq[qidx]);
+
 	/* Unset fast-path event queue */
-	if (phba->sli4_hba.hba_eq) {
-		for (fcp_qidx = 0; fcp_qidx < phba->cfg_fcp_io_channel;
-		     fcp_qidx++)
-			lpfc_eq_destroy(phba, phba->sli4_hba.hba_eq[fcp_qidx]);
-	}
+	if (phba->sli4_hba.hba_eq)
+		for (qidx = 0; qidx < phba->io_channel_irqs; qidx++)
+			lpfc_eq_destroy(phba, phba->sli4_hba.hba_eq[qidx]);
 }
 
 /**
@@ -8697,91 +9101,33 @@ lpfc_sli_disable_intr(struct lpfc_hba *phba)
 }
 
 /**
- * lpfc_find_next_cpu - Find next available CPU that matches the phys_id
+ * lpfc_cpu_affinity_check - Check vector CPU affinity mappings
  * @phba: pointer to lpfc hba data structure.
- *
- * Find next available CPU to use for IRQ to CPU affinity.
+ * @vectors: number of msix vectors allocated.
+ *
+ * The routine will figure out the CPU affinity assignment for every
+ * MSI-X vector allocated for the HBA.  The hba_eq_hdl will be updated
+ * with a pointer to the CPU mask that defines ALL the CPUs this vector
+ * can be associated with. If the vector can be unquely associated with
+ * a single CPU, that CPU will be recorded in hba_eq_hdl[index].cpu.
+ * In addition, the CPU to IO channel mapping will be calculated
+ * and the phba->sli4_hba.cpu_map array will reflect this.
  */
-static int
-lpfc_find_next_cpu(struct lpfc_hba *phba, uint32_t phys_id)
+static void
+lpfc_cpu_affinity_check(struct lpfc_hba *phba, int vectors)
 {
 	struct lpfc_vector_map_info *cpup;
+	int index = 0;
+	int vec = 0;
 	int cpu;
-
-	cpup = phba->sli4_hba.cpu_map;
-	for (cpu = 0; cpu < phba->sli4_hba.num_present_cpu; cpu++) {
-		/* CPU must be online */
-		if (cpu_online(cpu)) {
-			if ((cpup->irq == LPFC_VECTOR_MAP_EMPTY) &&
-			    (lpfc_used_cpu[cpu] == LPFC_VECTOR_MAP_EMPTY) &&
-			    (cpup->phys_id == phys_id)) {
-				return cpu;
-			}
-		}
-		cpup++;
-	}
-
-	/*
-	 * If we get here, we have used ALL CPUs for the specific
-	 * phys_id. Now we need to clear out lpfc_used_cpu and start
-	 * reusing CPUs.
-	 */
-
-	for (cpu = 0; cpu < phba->sli4_hba.num_present_cpu; cpu++) {
-		if (lpfc_used_cpu[cpu] == phys_id)
-			lpfc_used_cpu[cpu] = LPFC_VECTOR_MAP_EMPTY;
-	}
-
-	cpup = phba->sli4_hba.cpu_map;
-	for (cpu = 0; cpu < phba->sli4_hba.num_present_cpu; cpu++) {
-		/* CPU must be online */
-		if (cpu_online(cpu)) {
-			if ((cpup->irq == LPFC_VECTOR_MAP_EMPTY) &&
-			    (cpup->phys_id == phys_id)) {
-				return cpu;
-			}
-		}
-		cpup++;
-	}
-	return LPFC_VECTOR_MAP_EMPTY;
-}
-
-/**
- * lpfc_sli4_set_affinity - Set affinity for HBA IRQ vectors
- * @phba:	pointer to lpfc hba data structure.
- * @vectors:	number of HBA vectors
- *
- * Affinitize MSIX IRQ vectors to CPUs. Try to equally spread vector
- * affinization across multple physical CPUs (numa nodes).
- * In addition, this routine will assign an IO channel for each CPU
- * to use when issuing I/Os.
- */
-static int
-lpfc_sli4_set_affinity(struct lpfc_hba *phba, int vectors)
-{
-	int i, idx, saved_chann, used_chann, cpu, phys_id;
-	int max_phys_id, min_phys_id;
-	int num_io_channel, first_cpu, chan;
-	struct lpfc_vector_map_info *cpup;
 #ifdef CONFIG_X86
 	struct cpuinfo_x86 *cpuinfo;
 #endif
-	uint8_t chann[LPFC_FCP_IO_CHAN_MAX+1];
-
-	/* If there is no mapping, just return */
-	if (!phba->cfg_fcp_cpu_map)
-		return 1;
 
 	/* Init cpu_map array */
 	memset(phba->sli4_hba.cpu_map, 0xff,
 	       (sizeof(struct lpfc_vector_map_info) *
-		phba->sli4_hba.num_present_cpu));
-
-	max_phys_id = 0;
-	min_phys_id = 0xff;
-	phys_id = 0;
-	num_io_channel = 0;
-	first_cpu = LPFC_VECTOR_MAP_EMPTY;
+	       phba->sli4_hba.num_present_cpu));
 
 	/* Update CPU map with physical id and core id of each CPU */
 	cpup = phba->sli4_hba.cpu_map;
@@ -8795,184 +9141,16 @@ lpfc_sli4_set_affinity(struct lpfc_hba *phba, int vectors)
 		cpup->phys_id = 0;
 		cpup->core_id = 0;
 #endif
-
-		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-				"3328 CPU physid %d coreid %d\n",
-				cpup->phys_id, cpup->core_id);
-
-		if (cpup->phys_id > max_phys_id)
-			max_phys_id = cpup->phys_id;
-		if (cpup->phys_id < min_phys_id)
-			min_phys_id = cpup->phys_id;
+		cpup->channel_id = index;  /* For now round robin */
+		cpup->irq = pci_irq_vector(phba->pcidev, vec);
+		vec++;
+		if (vec >= vectors)
+			vec = 0;
+		index++;
+		if (index >= phba->cfg_fcp_io_channel)
+			index = 0;
 		cpup++;
 	}
-
-	phys_id = min_phys_id;
-	/* Now associate the HBA vectors with specific CPUs */
-	for (idx = 0; idx < vectors; idx++) {
-		cpup = phba->sli4_hba.cpu_map;
-		cpu = lpfc_find_next_cpu(phba, phys_id);
-		if (cpu == LPFC_VECTOR_MAP_EMPTY) {
-
-			/* Try for all phys_id's */
-			for (i = 1; i < max_phys_id; i++) {
-				phys_id++;
-				if (phys_id > max_phys_id)
-					phys_id = min_phys_id;
-				cpu = lpfc_find_next_cpu(phba, phys_id);
-				if (cpu == LPFC_VECTOR_MAP_EMPTY)
-					continue;
-				goto found;
-			}
-
-			/* Use round robin for scheduling */
-			phba->cfg_fcp_io_sched = LPFC_FCP_SCHED_ROUND_ROBIN;
-			chan = 0;
-			cpup = phba->sli4_hba.cpu_map;
-			for (i = 0; i < phba->sli4_hba.num_present_cpu; i++) {
-				cpup->channel_id = chan;
-				cpup++;
-				chan++;
-				if (chan >= phba->cfg_fcp_io_channel)
-					chan = 0;
-			}
-
-			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-					"3329 Cannot set affinity:"
-					"Error mapping vector %d (%d)\n",
-					idx, vectors);
-			return 0;
-		}
-found:
-		cpup += cpu;
-		if (phba->cfg_fcp_cpu_map == LPFC_DRIVER_CPU_MAP)
-			lpfc_used_cpu[cpu] = phys_id;
-
-		/* Associate vector with selected CPU */
-		cpup->irq = pci_irq_vector(phba->pcidev, idx);
-
-		/* Associate IO channel with selected CPU */
-		cpup->channel_id = idx;
-		num_io_channel++;
-
-		if (first_cpu == LPFC_VECTOR_MAP_EMPTY)
-			first_cpu = cpu;
-
-		/* Now affinitize to the selected CPU */
-		i = irq_set_affinity_hint(pci_irq_vector(phba->pcidev, idx),
-					  get_cpu_mask(cpu));
-
-		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-				"3330 Set Affinity: CPU %d channel %d "
-				"irq %d (%x)\n",
-				cpu, cpup->channel_id,
-				pci_irq_vector(phba->pcidev, idx), i);
-
-		/* Spread vector mapping across multple physical CPU nodes */
-		phys_id++;
-		if (phys_id > max_phys_id)
-			phys_id = min_phys_id;
-	}
-
-	/*
-	 * Finally fill in the IO channel for any remaining CPUs.
-	 * At this point, all IO channels have been assigned to a specific
-	 * MSIx vector, mapped to a specific CPU.
-	 * Base the remaining IO channel assigned, to IO channels already
-	 * assigned to other CPUs on the same phys_id.
-	 */
-	for (i = min_phys_id; i <= max_phys_id; i++) {
-		/*
-		 * If there are no io channels already mapped to
-		 * this phys_id, just round robin thru the io_channels.
-		 * Setup chann[] for round robin.
-		 */
-		for (idx = 0; idx < phba->cfg_fcp_io_channel; idx++)
-			chann[idx] = idx;
-
-		saved_chann = 0;
-		used_chann = 0;
-
-		/*
-		 * First build a list of IO channels already assigned
-		 * to this phys_id before reassigning the same IO
-		 * channels to the remaining CPUs.
-		 */
-		cpup = phba->sli4_hba.cpu_map;
-		cpu = first_cpu;
-		cpup += cpu;
-		for (idx = 0; idx < phba->sli4_hba.num_present_cpu;
-		     idx++) {
-			if (cpup->phys_id == i) {
-				/*
-				 * Save any IO channels that are
-				 * already mapped to this phys_id.
-				 */
-				if (cpup->irq != LPFC_VECTOR_MAP_EMPTY) {
-					if (saved_chann <=
-					    LPFC_FCP_IO_CHAN_MAX) {
-						chann[saved_chann] =
-							cpup->channel_id;
-						saved_chann++;
-					}
-					goto out;
-				}
-
-				/* See if we are using round-robin */
-				if (saved_chann == 0)
-					saved_chann =
-						phba->cfg_fcp_io_channel;
-
-				/* Associate next IO channel with CPU */
-				cpup->channel_id = chann[used_chann];
-				num_io_channel++;
-				used_chann++;
-				if (used_chann == saved_chann)
-					used_chann = 0;
-
-				lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-						"3331 Set IO_CHANN "
-						"CPU %d channel %d\n",
-						idx, cpup->channel_id);
-			}
-out:
-			cpu++;
-			if (cpu >= phba->sli4_hba.num_present_cpu) {
-				cpup = phba->sli4_hba.cpu_map;
-				cpu = 0;
-			} else {
-				cpup++;
-			}
-		}
-	}
-
-	if (phba->sli4_hba.num_online_cpu != phba->sli4_hba.num_present_cpu) {
-		cpup = phba->sli4_hba.cpu_map;
-		for (idx = 0; idx < phba->sli4_hba.num_present_cpu; idx++) {
-			if (cpup->channel_id == LPFC_VECTOR_MAP_EMPTY) {
-				cpup->channel_id = 0;
-				num_io_channel++;
-
-				lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-						"3332 Assign IO_CHANN "
-						"CPU %d channel %d\n",
-						idx, cpup->channel_id);
-			}
-			cpup++;
-		}
-	}
-
-	/* Sanity check */
-	if (num_io_channel != phba->sli4_hba.num_present_cpu)
-		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"3333 Set affinity mismatch:"
-				"%d chann != %d cpus: %d vectors\n",
-				num_io_channel, phba->sli4_hba.num_present_cpu,
-				vectors);
-
-	/* Enable using cpu affinity for scheduling */
-	phba->cfg_fcp_io_sched = LPFC_FCP_SCHED_BY_CPU;
-	return 1;
 }
 
 
@@ -8993,11 +9171,12 @@ lpfc_sli4_enable_msix(struct lpfc_hba *phba)
 	int vectors, rc, index;
 
 	/* Set up MSI-X multi-message vectors */
-	vectors = phba->cfg_fcp_io_channel;
+	vectors = phba->io_channel_irqs;
 	if (phba->cfg_fof)
 		vectors++;
 
-	rc = pci_alloc_irq_vectors(phba->pcidev, 2, vectors, PCI_IRQ_MSIX);
+	rc = pci_alloc_irq_vectors(phba->pcidev, 2, vectors,
+				PCI_IRQ_MSIX | PCI_IRQ_AFFINITY);
 	if (rc < 0) {
 		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
 				"0484 PCI enable MSI-X failed (%d)\n", rc);
@@ -9012,19 +9191,19 @@ lpfc_sli4_enable_msix(struct lpfc_hba *phba)
 			 LPFC_SLI4_HANDLER_NAME_SZ,
 			 LPFC_DRIVER_HANDLER_NAME"%d", index);
 
-		phba->sli4_hba.fcp_eq_hdl[index].idx = index;
-		phba->sli4_hba.fcp_eq_hdl[index].phba = phba;
-		atomic_set(&phba->sli4_hba.fcp_eq_hdl[index].fcp_eq_in_use, 1);
+		phba->sli4_hba.hba_eq_hdl[index].idx = index;
+		phba->sli4_hba.hba_eq_hdl[index].phba = phba;
+		atomic_set(&phba->sli4_hba.hba_eq_hdl[index].hba_eq_in_use, 1);
 		if (phba->cfg_fof && (index == (vectors - 1)))
 			rc = request_irq(pci_irq_vector(phba->pcidev, index),
 				 &lpfc_sli4_fof_intr_handler, 0,
 				 (char *)&phba->sli4_hba.handler_name[index],
-				 &phba->sli4_hba.fcp_eq_hdl[index]);
+				 &phba->sli4_hba.hba_eq_hdl[index]);
 		else
 			rc = request_irq(pci_irq_vector(phba->pcidev, index),
 				 &lpfc_sli4_hba_intr_handler, 0,
 				 (char *)&phba->sli4_hba.handler_name[index],
-				 &phba->sli4_hba.fcp_eq_hdl[index]);
+				 &phba->sli4_hba.hba_eq_hdl[index]);
 		if (rc) {
 			lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
 					"0486 MSI-X fast-path (%d) "
@@ -9036,26 +9215,29 @@ lpfc_sli4_enable_msix(struct lpfc_hba *phba)
 	if (phba->cfg_fof)
 		vectors--;
 
-	if (vectors != phba->cfg_fcp_io_channel) {
+	if (vectors != phba->io_channel_irqs) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 				"3238 Reducing IO channels to match number of "
 				"MSI-X vectors, requested %d got %d\n",
-				phba->cfg_fcp_io_channel, vectors);
-		phba->cfg_fcp_io_channel = vectors;
+				phba->io_channel_irqs, vectors);
+		if (phba->cfg_fcp_io_channel > vectors)
+			phba->cfg_fcp_io_channel = vectors;
+		if (phba->cfg_nvme_io_channel > vectors)
+			phba->cfg_nvme_io_channel = vectors;
+		if (phba->cfg_fcp_io_channel > phba->cfg_nvme_io_channel)
+			phba->io_channel_irqs = phba->cfg_fcp_io_channel;
+		else
+			phba->io_channel_irqs = phba->cfg_nvme_io_channel;
 	}
+	lpfc_cpu_affinity_check(phba, vectors);
 
-	if (!shost_use_blk_mq(lpfc_shost_from_vport(phba->pport)))
-		lpfc_sli4_set_affinity(phba, vectors);
 	return rc;
 
 cfg_fail_out:
 	/* free the irq already requested */
-	for (--index; index >= 0; index--) {
-		int irq = pci_irq_vector(phba->pcidev, index);
-
-		irq_set_affinity_hint(irq, NULL);
-		free_irq(irq, &phba->sli4_hba.fcp_eq_hdl[index]);
-	}
+	for (--index; index >= 0; index--)
+		free_irq(pci_irq_vector(phba->pcidev, index),
+				&phba->sli4_hba.hba_eq_hdl[index]);
 
 	/* Unconfigure MSI-X capability structure */
 	pci_free_irq_vectors(phba->pcidev);
@@ -9102,14 +9284,14 @@ lpfc_sli4_enable_msi(struct lpfc_hba *phba)
 		return rc;
 	}
 
-	for (index = 0; index < phba->cfg_fcp_io_channel; index++) {
-		phba->sli4_hba.fcp_eq_hdl[index].idx = index;
-		phba->sli4_hba.fcp_eq_hdl[index].phba = phba;
+	for (index = 0; index < phba->io_channel_irqs; index++) {
+		phba->sli4_hba.hba_eq_hdl[index].idx = index;
+		phba->sli4_hba.hba_eq_hdl[index].phba = phba;
 	}
 
 	if (phba->cfg_fof) {
-		phba->sli4_hba.fcp_eq_hdl[index].idx = index;
-		phba->sli4_hba.fcp_eq_hdl[index].phba = phba;
+		phba->sli4_hba.hba_eq_hdl[index].idx = index;
+		phba->sli4_hba.hba_eq_hdl[index].phba = phba;
 	}
 	return 0;
 }
@@ -9134,7 +9316,7 @@ static uint32_t
 lpfc_sli4_enable_intr(struct lpfc_hba *phba, uint32_t cfg_mode)
 {
 	uint32_t intr_mode = LPFC_INTR_ERROR;
-	int retval, index;
+	int retval, idx;
 
 	if (cfg_mode == 2) {
 		/* Preparation before conf_msi mbox cmd */
@@ -9165,21 +9347,23 @@ lpfc_sli4_enable_intr(struct lpfc_hba *phba, uint32_t cfg_mode)
 		retval = request_irq(phba->pcidev->irq, lpfc_sli4_intr_handler,
 				     IRQF_SHARED, LPFC_DRIVER_NAME, phba);
 		if (!retval) {
+			struct lpfc_hba_eq_hdl *eqhdl;
+
 			/* Indicate initialization to INTx mode */
 			phba->intr_type = INTx;
 			intr_mode = 0;
-			for (index = 0; index < phba->cfg_fcp_io_channel;
-			     index++) {
-				phba->sli4_hba.fcp_eq_hdl[index].idx = index;
-				phba->sli4_hba.fcp_eq_hdl[index].phba = phba;
-				atomic_set(&phba->sli4_hba.fcp_eq_hdl[index].
-					fcp_eq_in_use, 1);
+
+			for (idx = 0; idx < phba->io_channel_irqs; idx++) {
+				eqhdl = &phba->sli4_hba.hba_eq_hdl[idx];
+				eqhdl->idx = idx;
+				eqhdl->phba = phba;
+				atomic_set(&eqhdl->hba_eq_in_use, 1);
 			}
 			if (phba->cfg_fof) {
-				phba->sli4_hba.fcp_eq_hdl[index].idx = index;
-				phba->sli4_hba.fcp_eq_hdl[index].phba = phba;
-				atomic_set(&phba->sli4_hba.fcp_eq_hdl[index].
-					fcp_eq_in_use, 1);
+				eqhdl = &phba->sli4_hba.hba_eq_hdl[idx];
+				eqhdl->idx = idx;
+				eqhdl->phba = phba;
+				atomic_set(&eqhdl->hba_eq_in_use, 1);
 			}
 		}
 	}
@@ -9203,15 +9387,13 @@ lpfc_sli4_disable_intr(struct lpfc_hba *phba)
 		int index;
 
 		/* Free up MSI-X multi-message vectors */
-		for (index = 0; index < phba->cfg_fcp_io_channel; index++) {
-			int irq = pci_irq_vector(phba->pcidev, index);
-
-			irq_set_affinity_hint(irq, NULL);
-			free_irq(irq, &phba->sli4_hba.fcp_eq_hdl[index]);
-		}
+		for (index = 0; index < phba->io_channel_irqs; index++)
+			free_irq(pci_irq_vector(phba->pcidev, index),
+					&phba->sli4_hba.hba_eq_hdl[index]);
 
 		if (phba->cfg_fof)
-			free_irq(pci_irq_vector(phba->pcidev, index), phba);
+			free_irq(pci_irq_vector(phba->pcidev, index),
+					&phba->sli4_hba.hba_eq_hdl[index]);
 	} else {
 		free_irq(phba->pcidev->irq, phba);
 	}
@@ -9273,11 +9455,24 @@ static void
 lpfc_sli4_xri_exchange_busy_wait(struct lpfc_hba *phba)
 {
 	int wait_time = 0;
-	int fcp_xri_cmpl = list_empty(&phba->sli4_hba.lpfc_abts_scsi_buf_list);
+	int nvme_xri_cmpl = 1;
+	int fcp_xri_cmpl = 1;
 	int els_xri_cmpl = list_empty(&phba->sli4_hba.lpfc_abts_els_sgl_list);
 
-	while (!fcp_xri_cmpl || !els_xri_cmpl) {
+	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP)
+		fcp_xri_cmpl =
+			list_empty(&phba->sli4_hba.lpfc_abts_scsi_buf_list);
+	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)
+		nvme_xri_cmpl =
+			list_empty(&phba->sli4_hba.lpfc_abts_nvme_buf_list);
+
+	while (!fcp_xri_cmpl || !els_xri_cmpl || !nvme_xri_cmpl) {
 		if (wait_time > LPFC_XRI_EXCH_BUSY_WAIT_TMO) {
+			if (!nvme_xri_cmpl)
+				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+						"6100 NVME XRI exchange busy "
+						"wait time: %d seconds.\n",
+						wait_time/1000);
 			if (!fcp_xri_cmpl)
 				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 						"2877 FCP XRI exchange busy "
@@ -9294,8 +9489,14 @@ lpfc_sli4_xri_exchange_busy_wait(struct lpfc_hba *phba)
 			msleep(LPFC_XRI_EXCH_BUSY_WAIT_T1);
 			wait_time += LPFC_XRI_EXCH_BUSY_WAIT_T1;
 		}
-		fcp_xri_cmpl =
-			list_empty(&phba->sli4_hba.lpfc_abts_scsi_buf_list);
+		if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)
+			nvme_xri_cmpl = list_empty(
+				&phba->sli4_hba.lpfc_abts_nvme_buf_list);
+
+		if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP)
+			fcp_xri_cmpl = list_empty(
+				&phba->sli4_hba.lpfc_abts_scsi_buf_list);
+
 		els_xri_cmpl =
 			list_empty(&phba->sli4_hba.lpfc_abts_els_sgl_list);
 	}
@@ -9509,10 +9710,31 @@ lpfc_get_sli4_parameters(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq)
 	sli4_params->wqsize = bf_get(cfg_wqsize, mbx_sli4_parameters);
 	sli4_params->sgl_pages_max = bf_get(cfg_sgl_page_cnt,
 					    mbx_sli4_parameters);
+	sli4_params->wqpcnt = bf_get(cfg_wqpcnt, mbx_sli4_parameters);
 	sli4_params->sgl_pp_align = bf_get(cfg_sgl_pp_align,
 					   mbx_sli4_parameters);
 	phba->sli4_hba.extents_in_use = bf_get(cfg_ext, mbx_sli4_parameters);
 	phba->sli4_hba.rpi_hdrs_in_use = bf_get(cfg_hdrr, mbx_sli4_parameters);
+	phba->nvme_support = (bf_get(cfg_nvme, mbx_sli4_parameters) &&
+			      bf_get(cfg_xib, mbx_sli4_parameters));
+
+	if ((phba->cfg_enable_fc4_type == LPFC_ENABLE_FCP) ||
+	    !phba->nvme_support) {
+		phba->nvme_support = 0;
+		phba->nvmet_support = 0;
+		phba->cfg_nvme_io_channel = 0;
+		phba->io_channel_irqs = phba->cfg_fcp_io_channel;
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT | LOG_NVME,
+				"6101 Disabling NVME support: "
+				"Not supported by firmware: %d %d\n",
+				bf_get(cfg_nvme, mbx_sli4_parameters),
+				bf_get(cfg_xib, mbx_sli4_parameters));
+
+		/* If firmware doesn't support NVME, just use SCSI support */
+		if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP))
+			return -ENODEV;
+		phba->cfg_enable_fc4_type = LPFC_ENABLE_FCP;
+	}
 
 	/* Make sure that sge_supp_len can be handled by the driver */
 	if (sli4_params->sge_supp_len > LPFC_MAX_SGE_SIZE)
@@ -9587,14 +9809,6 @@ lpfc_pci_probe_one_s3(struct pci_dev *pdev, const struct pci_device_id *pid)
 		goto out_disable_pci_dev;
 	}
 
-	/* Set up phase-1 common device driver resources */
-	error = lpfc_setup_driver_resource_phase1(phba);
-	if (error) {
-		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"1403 Failed to set up driver resource.\n");
-		goto out_unset_pci_mem_s3;
-	}
-
 	/* Set up SLI-3 specific device driver resources */
 	error = lpfc_sli_driver_resource_setup(phba);
 	if (error) {
@@ -10169,6 +10383,21 @@ lpfc_sli4_get_els_iocb_cnt(struct lpfc_hba *phba)
 		return 0;
 }
 
+/**
+ * lpfc_sli4_get_iocb_cnt - Calculate the # of total IOCBs to reserve
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * returns the number of ELS/CT
+ **/
+int
+lpfc_sli4_get_iocb_cnt(struct lpfc_hba *phba)
+{
+	int max_xri = lpfc_sli4_get_els_iocb_cnt(phba);
+
+	return max_xri;
+}
+
+
 /**
  * lpfc_write_firmware - attempt to write a firmware image to the port
  * @fw: pointer to firmware image returned from request_firmware.
@@ -10333,7 +10562,6 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid)
 	struct Scsi_Host  *shost = NULL;
 	int error;
 	uint32_t cfg_mode, intr_mode;
-	int adjusted_fcp_io_channel;
 
 	/* Allocate memory for HBA structure */
 	phba = lpfc_hba_alloc(pdev);
@@ -10358,14 +10586,6 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid)
 		goto out_disable_pci_dev;
 	}
 
-	/* Set up phase-1 common device driver resources */
-	error = lpfc_setup_driver_resource_phase1(phba);
-	if (error) {
-		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"1411 Failed to set up driver resource.\n");
-		goto out_unset_pci_mem_s4;
-	}
-
 	/* Set up SLI-4 Specific device driver resources */
 	error = lpfc_sli4_driver_resource_setup(phba);
 	if (error) {
@@ -10424,6 +10644,7 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid)
 
 	/* Put device to a known state before enabling interrupt */
 	lpfc_stop_port(phba);
+
 	/* Configure and enable interrupt */
 	intr_mode = lpfc_sli4_enable_intr(phba, cfg_mode);
 	if (intr_mode == LPFC_INTR_ERROR) {
@@ -10433,11 +10654,15 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid)
 		goto out_free_sysfs_attr;
 	}
 	/* Default to single EQ for non-MSI-X */
-	if (phba->intr_type != MSIX)
-		adjusted_fcp_io_channel = 1;
-	else
-		adjusted_fcp_io_channel = phba->cfg_fcp_io_channel;
-	phba->cfg_fcp_io_channel = adjusted_fcp_io_channel;
+	if (phba->intr_type != MSIX) {
+		if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP)
+			phba->cfg_fcp_io_channel = 1;
+		if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)
+			phba->cfg_nvme_io_channel = 1;
+		phba->io_channel_irqs = 1;
+	}
+
+
 	/* Set up SLI-4 HBA */
 	if (lpfc_sli4_hba_setup(phba)) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
@@ -10453,6 +10678,8 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid)
 	/* Perform post initialization setup */
 	lpfc_post_init_setup(phba);
 
+	/* todo: init: register port with nvme */
+
 	/* check for firmware upgrade or downgrade */
 	if (phba->cfg_request_firmware_upgrade)
 		lpfc_sli4_request_firmware_update(phba, INT_FW_UPGRADE);
@@ -10524,8 +10751,11 @@ lpfc_pci_remove_one_s4(struct pci_dev *pdev)
 	fc_remove_host(shost);
 	scsi_remove_host(shost);
 
-	/* Perform cleanup on the physical port */
+	/* Perform ndlp cleanup on the physical port.  The nvme localport
+	 * is destroyed after to ensure all rports are io-disabled.
+	 */
 	lpfc_cleanup(vport);
+	/* todo: init: unregister port with nvme */
 
 	/*
 	 * Bring down the SLI Layer. This step disables all interrupts,
@@ -10543,6 +10773,7 @@ lpfc_pci_remove_one_s4(struct pci_dev *pdev)
 	 * buffers are released to their corresponding pools here.
 	 */
 	lpfc_scsi_free(phba);
+	lpfc_nvme_free(phba);
 
 	lpfc_sli4_driver_resource_unset(phba);
 
@@ -11188,7 +11419,7 @@ lpfc_sli4_oas_verify(struct lpfc_hba *phba)
 int
 lpfc_fof_queue_setup(struct lpfc_hba *phba)
 {
-	struct lpfc_sli *psli = &phba->sli;
+	struct lpfc_sli_ring *pring;
 	int rc;
 
 	rc = lpfc_eq_create(phba, phba->sli4_hba.fof_eq, LPFC_MAX_IMAX);
@@ -11207,8 +11438,11 @@ lpfc_fof_queue_setup(struct lpfc_hba *phba)
 		if (rc)
 			goto out_oas_wq;
 
-		phba->sli4_hba.oas_cq->pring = &psli->ring[LPFC_FCP_OAS_RING];
-		phba->sli4_hba.oas_ring = &psli->ring[LPFC_FCP_OAS_RING];
+		/* Bind this CQ/WQ to the NVME ring */
+		pring = phba->sli4_hba.oas_wq->pring;
+		pring->sli.sli4.wqp =
+			(void *)phba->sli4_hba.oas_wq;
+		phba->sli4_hba.oas_cq->pring = pring;
 	}
 
 	return 0;
@@ -11265,6 +11499,7 @@ lpfc_fof_queue_create(struct lpfc_hba *phba)
 			goto out_error;
 
 		phba->sli4_hba.oas_wq = qdesc;
+		list_add_tail(&qdesc->wq_list, &phba->sli4_hba.lpfc_wq_list);
 
 	}
 	return 0;
diff --git a/drivers/scsi/lpfc/lpfc_logmsg.h b/drivers/scsi/lpfc/lpfc_logmsg.h
index 2a4e5d21eab2ad..3faf7a07bfd416 100644
--- a/drivers/scsi/lpfc/lpfc_logmsg.h
+++ b/drivers/scsi/lpfc/lpfc_logmsg.h
@@ -38,6 +38,10 @@
 #define LOG_FIP		0x00020000	/* FIP events */
 #define LOG_FCP_UNDER	0x00040000	/* FCP underruns errors */
 #define LOG_SCSI_CMD	0x00080000	/* ALL SCSI commands */
+#define LOG_NVME	0x00100000	/* NVME general events. */
+#define LOG_NVME_DISC   0x00200000      /* NVME Discovery/Connect events. */
+#define LOG_NVME_ABTS   0x00400000      /* NVME ABTS events. */
+#define LOG_NVME_IOERR  0x00800000      /* NVME IO Error events. */
 #define LOG_ALL_MSG	0xffffffff	/* LOG all messages */
 
 #define lpfc_printf_vlog(vport, level, mask, fmt, arg...) \
diff --git a/drivers/scsi/lpfc/lpfc_mbox.c b/drivers/scsi/lpfc/lpfc_mbox.c
index b234c50c255feb..c20dc25a1e79a6 100644
--- a/drivers/scsi/lpfc/lpfc_mbox.c
+++ b/drivers/scsi/lpfc/lpfc_mbox.c
@@ -954,7 +954,7 @@ lpfc_config_pcb_setup(struct lpfc_hba * phba)
 	pcbp->maxRing = (psli->num_rings - 1);
 
 	for (i = 0; i < psli->num_rings; i++) {
-		pring = &psli->ring[i];
+		pring = &psli->sli3_ring[i];
 
 		pring->sli.sli3.sizeCiocb =
 			phba->sli_rev == 3 ? SLI3_IOCB_CMD_SIZE :
@@ -1217,7 +1217,7 @@ lpfc_config_ring(struct lpfc_hba * phba, int ring, LPFC_MBOXQ_t * pmb)
 	mb->un.varCfgRing.recvNotify = 1;
 
 	psli = &phba->sli;
-	pring = &psli->ring[ring];
+	pring = &psli->sli3_ring[ring];
 	mb->un.varCfgRing.numMask = pring->num_mask;
 	mb->mbxCommand = MBX_CONFIG_RING;
 	mb->mbxOwner = OWN_HOST;
@@ -2434,14 +2434,25 @@ lpfc_reg_fcfi(struct lpfc_hba *phba, struct lpfcMboxq *mbox)
 	memset(mbox, 0, sizeof(*mbox));
 	reg_fcfi = &mbox->u.mqe.un.reg_fcfi;
 	bf_set(lpfc_mqe_command, &mbox->u.mqe, MBX_REG_FCFI);
-	bf_set(lpfc_reg_fcfi_rq_id0, reg_fcfi, phba->sli4_hba.hdr_rq->queue_id);
-	bf_set(lpfc_reg_fcfi_rq_id1, reg_fcfi, REG_FCF_INVALID_QID);
+	if (phba->nvmet_support == 0) {
+		bf_set(lpfc_reg_fcfi_rq_id0, reg_fcfi,
+		       phba->sli4_hba.hdr_rq->queue_id);
+		/* Match everything - rq_id0 */
+		bf_set(lpfc_reg_fcfi_type_match0, reg_fcfi, 0);
+		bf_set(lpfc_reg_fcfi_type_mask0, reg_fcfi, 0);
+		bf_set(lpfc_reg_fcfi_rctl_match0, reg_fcfi, 0);
+		bf_set(lpfc_reg_fcfi_rctl_mask0, reg_fcfi, 0);
+
+		bf_set(lpfc_reg_fcfi_rq_id1, reg_fcfi, REG_FCF_INVALID_QID);
+
+		/* addr mode is bit wise inverted value of fcf addr_mode */
+		bf_set(lpfc_reg_fcfi_mam, reg_fcfi,
+		       (~phba->fcf.addr_mode) & 0x3);
+	}
 	bf_set(lpfc_reg_fcfi_rq_id2, reg_fcfi, REG_FCF_INVALID_QID);
 	bf_set(lpfc_reg_fcfi_rq_id3, reg_fcfi, REG_FCF_INVALID_QID);
 	bf_set(lpfc_reg_fcfi_info_index, reg_fcfi,
 	       phba->fcf.current_rec.fcf_indx);
-	/* reg_fcf addr mode is bit wise inverted value of fcf addr_mode */
-	bf_set(lpfc_reg_fcfi_mam, reg_fcfi, (~phba->fcf.addr_mode) & 0x3);
 	if (phba->fcf.current_rec.vlan_id != LPFC_FCOE_NULL_VID) {
 		bf_set(lpfc_reg_fcfi_vv, reg_fcfi, 1);
 		bf_set(lpfc_reg_fcfi_vlan_tag, reg_fcfi,
diff --git a/drivers/scsi/lpfc/lpfc_mem.c b/drivers/scsi/lpfc/lpfc_mem.c
index 3fa65338d3f556..c65a1ec3d2e48f 100644
--- a/drivers/scsi/lpfc/lpfc_mem.c
+++ b/drivers/scsi/lpfc/lpfc_mem.c
@@ -24,10 +24,12 @@
 #include <linux/pci.h>
 #include <linux/interrupt.h>
 
+#include <scsi/scsi.h>
 #include <scsi/scsi_device.h>
 #include <scsi/scsi_transport_fc.h>
+#include <scsi/fc/fc_fs.h>
 
-#include <scsi/scsi.h>
+#include <linux/nvme-fc-driver.h>
 
 #include "lpfc_hw4.h"
 #include "lpfc_hw.h"
@@ -35,8 +37,9 @@
 #include "lpfc_sli4.h"
 #include "lpfc_nl.h"
 #include "lpfc_disc.h"
-#include "lpfc_scsi.h"
 #include "lpfc.h"
+#include "lpfc_scsi.h"
+#include "lpfc_nvme.h"
 #include "lpfc_crtn.h"
 #include "lpfc_logmsg.h"
 
@@ -66,7 +69,7 @@ lpfc_mem_alloc_active_rrq_pool_s4(struct lpfc_hba *phba) {
  * lpfc_mem_alloc - create and allocate all PCI and memory pools
  * @phba: HBA to allocate pools for
  *
- * Description: Creates and allocates PCI pools lpfc_scsi_dma_buf_pool,
+ * Description: Creates and allocates PCI pools lpfc_sg_dma_buf_pool,
  * lpfc_mbuf_pool, lpfc_hrb_pool.  Creates and allocates kmalloc-backed mempools
  * for LPFC_MBOXQ_t and lpfc_nodelist.  Also allocates the VPI bitmask.
  *
@@ -90,21 +93,23 @@ lpfc_mem_alloc(struct lpfc_hba *phba, int align)
 		else
 			i = SLI4_PAGE_SIZE;
 
-		phba->lpfc_scsi_dma_buf_pool =
-			pci_pool_create("lpfc_scsi_dma_buf_pool",
-				phba->pcidev,
-				phba->cfg_sg_dma_buf_size,
-				i,
-				0);
+		phba->lpfc_sg_dma_buf_pool =
+			pci_pool_create("lpfc_sg_dma_buf_pool",
+					phba->pcidev,
+					phba->cfg_sg_dma_buf_size,
+					i, 0);
+		if (!phba->lpfc_sg_dma_buf_pool)
+			goto fail;
+
 	} else {
-		phba->lpfc_scsi_dma_buf_pool =
-			pci_pool_create("lpfc_scsi_dma_buf_pool",
-				phba->pcidev, phba->cfg_sg_dma_buf_size,
-				align, 0);
-	}
+		phba->lpfc_sg_dma_buf_pool =
+			pci_pool_create("lpfc_sg_dma_buf_pool",
+					phba->pcidev, phba->cfg_sg_dma_buf_size,
+					align, 0);
 
-	if (!phba->lpfc_scsi_dma_buf_pool)
-		goto fail;
+		if (!phba->lpfc_sg_dma_buf_pool)
+			goto fail;
+	}
 
 	phba->lpfc_mbuf_pool = pci_pool_create("lpfc_mbuf_pool", phba->pcidev,
 							LPFC_BPL_SIZE,
@@ -170,12 +175,15 @@ lpfc_mem_alloc(struct lpfc_hba *phba, int align)
 					LPFC_DEVICE_DATA_POOL_SIZE,
 					sizeof(struct lpfc_device_data));
 		if (!phba->device_data_mem_pool)
-			goto fail_free_hrb_pool;
+			goto fail_free_drb_pool;
 	} else {
 		phba->device_data_mem_pool = NULL;
 	}
 
 	return 0;
+fail_free_drb_pool:
+	pci_pool_destroy(phba->lpfc_drb_pool);
+	phba->lpfc_drb_pool = NULL;
  fail_free_hrb_pool:
 	pci_pool_destroy(phba->lpfc_hrb_pool);
 	phba->lpfc_hrb_pool = NULL;
@@ -197,8 +205,8 @@ lpfc_mem_alloc(struct lpfc_hba *phba, int align)
 	pci_pool_destroy(phba->lpfc_mbuf_pool);
 	phba->lpfc_mbuf_pool = NULL;
  fail_free_dma_buf_pool:
-	pci_pool_destroy(phba->lpfc_scsi_dma_buf_pool);
-	phba->lpfc_scsi_dma_buf_pool = NULL;
+	pci_pool_destroy(phba->lpfc_sg_dma_buf_pool);
+	phba->lpfc_sg_dma_buf_pool = NULL;
  fail:
 	return -ENOMEM;
 }
@@ -227,6 +235,9 @@ lpfc_mem_free(struct lpfc_hba *phba)
 	if (phba->lpfc_hrb_pool)
 		pci_pool_destroy(phba->lpfc_hrb_pool);
 	phba->lpfc_hrb_pool = NULL;
+	if (phba->txrdy_payload_pool)
+		pci_pool_destroy(phba->txrdy_payload_pool);
+	phba->txrdy_payload_pool = NULL;
 
 	if (phba->lpfc_hbq_pool)
 		pci_pool_destroy(phba->lpfc_hbq_pool);
@@ -258,8 +269,8 @@ lpfc_mem_free(struct lpfc_hba *phba)
 	phba->lpfc_mbuf_pool = NULL;
 
 	/* Free DMA buffer memory pool */
-	pci_pool_destroy(phba->lpfc_scsi_dma_buf_pool);
-	phba->lpfc_scsi_dma_buf_pool = NULL;
+	pci_pool_destroy(phba->lpfc_sg_dma_buf_pool);
+	phba->lpfc_sg_dma_buf_pool = NULL;
 
 	/* Free Device Data memory pool */
 	if (phba->device_data_mem_pool) {
@@ -282,7 +293,7 @@ lpfc_mem_free(struct lpfc_hba *phba)
  * @phba: HBA to free memory for
  *
  * Description: Free memory from PCI and driver memory pools and also those
- * used : lpfc_scsi_dma_buf_pool, lpfc_mbuf_pool, lpfc_hrb_pool. Frees
+ * used : lpfc_sg_dma_buf_pool, lpfc_mbuf_pool, lpfc_hrb_pool. Frees
  * kmalloc-backed mempools for LPFC_MBOXQ_t and lpfc_nodelist. Also frees
  * the VPI bitmask.
  *
@@ -458,7 +469,7 @@ lpfc_els_hbq_alloc(struct lpfc_hba *phba)
 		kfree(hbqbp);
 		return NULL;
 	}
-	hbqbp->size = LPFC_BPL_SIZE;
+	hbqbp->total_size = LPFC_BPL_SIZE;
 	return hbqbp;
 }
 
@@ -518,7 +529,7 @@ lpfc_sli4_rb_alloc(struct lpfc_hba *phba)
 		kfree(dma_buf);
 		return NULL;
 	}
-	dma_buf->size = LPFC_BPL_SIZE;
+	dma_buf->total_size = LPFC_DATA_BUF_SIZE;
 	return dma_buf;
 }
 
@@ -540,7 +551,6 @@ lpfc_sli4_rb_free(struct lpfc_hba *phba, struct hbq_dmabuf *dmab)
 	pci_pool_free(phba->lpfc_hrb_pool, dmab->hbuf.virt, dmab->hbuf.phys);
 	pci_pool_free(phba->lpfc_drb_pool, dmab->dbuf.virt, dmab->dbuf.phys);
 	kfree(dmab);
-	return;
 }
 
 /**
@@ -565,13 +575,13 @@ lpfc_in_buf_free(struct lpfc_hba *phba, struct lpfc_dmabuf *mp)
 		return;
 
 	if (phba->sli3_options & LPFC_SLI3_HBQ_ENABLED) {
+		hbq_entry = container_of(mp, struct hbq_dmabuf, dbuf);
 		/* Check whether HBQ is still in use */
 		spin_lock_irqsave(&phba->hbalock, flags);
 		if (!phba->hbq_in_use) {
 			spin_unlock_irqrestore(&phba->hbalock, flags);
 			return;
 		}
-		hbq_entry = container_of(mp, struct hbq_dmabuf, dbuf);
 		list_del(&hbq_entry->dbuf.list);
 		if (hbq_entry->tag == -1) {
 			(phba->hbqs[LPFC_ELS_HBQ].hbq_free_buffer)
diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c
index 56a3df4fddb05e..835ea9f78219c3 100644
--- a/drivers/scsi/lpfc/lpfc_nportdisc.c
+++ b/drivers/scsi/lpfc/lpfc_nportdisc.c
@@ -204,10 +204,11 @@ int
 lpfc_els_abort(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp)
 {
 	LIST_HEAD(abort_list);
-	struct lpfc_sli  *psli = &phba->sli;
-	struct lpfc_sli_ring *pring = &psli->ring[LPFC_ELS_RING];
+	struct lpfc_sli_ring *pring;
 	struct lpfc_iocbq *iocb, *next_iocb;
 
+	pring = lpfc_phba_elsring(phba);
+
 	/* Abort outstanding I/O on NPort <nlp_DID> */
 	lpfc_printf_vlog(ndlp->vport, KERN_INFO, LOG_DISCOVERY,
 			 "2819 Abort outstanding I/O on NPort x%x "
@@ -2104,7 +2105,7 @@ lpfc_rcv_prlo_mapped_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 	struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg;
 
 	/* flush the target */
-	lpfc_sli_abort_iocb(vport, &phba->sli.ring[phba->sli.fcp_ring],
+	lpfc_sli_abort_iocb(vport, &phba->sli.sli3_ring[LPFC_FCP_RING],
 			    ndlp->nlp_sid, 0, LPFC_CTX_TGT);
 
 	/* Treat like rcv logo */
diff --git a/drivers/scsi/lpfc/lpfc_nvme.h b/drivers/scsi/lpfc/lpfc_nvme.h
new file mode 100644
index 00000000000000..2b167933fd2602
--- /dev/null
+++ b/drivers/scsi/lpfc/lpfc_nvme.h
@@ -0,0 +1,88 @@
+/*******************************************************************
+ * This file is part of the Emulex Linux Device Driver for         *
+ * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
+ * EMULEX and SLI are trademarks of Emulex.                        *
+ * www.emulex.com                                                  *
+ * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
+ *                                                                 *
+ * This program is free software; you can redistribute it and/or   *
+ * modify it under the terms of version 2 of the GNU General       *
+ * Public License as published by the Free Software Foundation.    *
+ * This program is distributed in the hope that it will be useful. *
+ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND          *
+ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY,  *
+ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE      *
+ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
+ * TO BE LEGALLY INVALID.  See the GNU General Public License for  *
+ * more details, a copy of which can be found in the file COPYING  *
+ * included with this package.                                     *
+ ********************************************************************/
+
+#define LPFC_NVME_MIN_SEGS		16
+#define LPFC_NVME_DEFAULT_SEGS		66	/* 256K IOs - 64 + 2 */
+#define LPFC_NVME_MAX_SEGS		510
+#define LPFC_NVMET_MIN_POSTBUF		16
+#define LPFC_NVMET_DEFAULT_POSTBUF	1024
+#define LPFC_NVMET_MAX_POSTBUF		4096
+#define LPFC_NVME_WQSIZE		256
+
+#define LPFC_NVME_ERSP_LEN		0x20
+
+/* Declare nvme-based local and remote port definitions. */
+struct lpfc_nvme_lport {
+	struct lpfc_vport *vport;
+	struct list_head rport_list;
+	struct completion lport_unreg_done;
+	/* Add sttats counters here */
+};
+
+struct lpfc_nvme_rport {
+	struct list_head list;
+	struct lpfc_nvme_lport *lport;
+	struct nvme_fc_remote_port *remoteport;
+	struct lpfc_nodelist *ndlp;
+	struct completion rport_unreg_done;
+};
+
+struct lpfc_nvme_buf {
+	struct list_head list;
+	struct nvmefc_fcp_req *nvmeCmd;
+	struct lpfc_nvme_rport *nrport;
+
+	uint32_t timeout;
+
+	uint16_t flags;  /* TBD convert exch_busy to flags */
+#define LPFC_SBUF_XBUSY         0x1     /* SLI4 hba reported XB on WCQE cmpl */
+	uint16_t exch_busy;     /* SLI4 hba reported XB on complete WCQE */
+	uint16_t status;	/* From IOCB Word 7- ulpStatus */
+	uint16_t cpu;
+	uint16_t qidx;
+	uint16_t sqid;
+	uint32_t result;	/* From IOCB Word 4. */
+
+	uint32_t   seg_cnt;	/* Number of scatter-gather segments returned by
+				 * dma_map_sg.  The driver needs this for calls
+				 * to dma_unmap_sg.
+				 */
+	dma_addr_t nonsg_phys;	/* Non scatter-gather physical address. */
+
+	/*
+	 * data and dma_handle are the kernel virtual and bus address of the
+	 * dma-able buffer containing the fcp_cmd, fcp_rsp and a scatter
+	 * gather bde list that supports the sg_tablesize value.
+	 */
+	void *data;
+	dma_addr_t dma_handle;
+
+	struct sli4_sge *nvme_sgl;
+	dma_addr_t dma_phys_sgl;
+
+	/* cur_iocbq has phys of the dma-able buffer.
+	 * Iotag is in here
+	 */
+	struct lpfc_iocbq cur_iocbq;
+
+	wait_queue_head_t *waitq;
+	unsigned long start_time;
+};
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index bd7b75dbb97e4c..c327fc7b1a5455 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -413,7 +413,7 @@ lpfc_new_scsi_buf_s3(struct lpfc_vport *vport, int num_to_alloc)
 		 * struct fcp_cmnd, struct fcp_rsp and the number of bde's
 		 * necessary to support the sg_tablesize.
 		 */
-		psb->data = pci_pool_zalloc(phba->lpfc_scsi_dma_buf_pool,
+		psb->data = pci_pool_zalloc(phba->lpfc_sg_dma_buf_pool,
 					GFP_KERNEL, &psb->dma_handle);
 		if (!psb->data) {
 			kfree(psb);
@@ -424,8 +424,8 @@ lpfc_new_scsi_buf_s3(struct lpfc_vport *vport, int num_to_alloc)
 		/* Allocate iotag for psb->cur_iocbq. */
 		iotag = lpfc_sli_next_iotag(phba, &psb->cur_iocbq);
 		if (iotag == 0) {
-			pci_pool_free(phba->lpfc_scsi_dma_buf_pool,
-					psb->data, psb->dma_handle);
+			pci_pool_free(phba->lpfc_sg_dma_buf_pool,
+				      psb->data, psb->dma_handle);
 			kfree(psb);
 			break;
 		}
@@ -522,6 +522,8 @@ lpfc_sli4_vport_delete_fcp_xri_aborted(struct lpfc_vport *vport)
 	struct lpfc_scsi_buf *psb, *next_psb;
 	unsigned long iflag = 0;
 
+	if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP))
+		return;
 	spin_lock_irqsave(&phba->hbalock, iflag);
 	spin_lock(&phba->sli4_hba.abts_scsi_buf_list_lock);
 	list_for_each_entry_safe(psb, next_psb,
@@ -554,8 +556,10 @@ lpfc_sli4_fcp_xri_aborted(struct lpfc_hba *phba,
 	int i;
 	struct lpfc_nodelist *ndlp;
 	int rrq_empty = 0;
-	struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_ELS_RING];
+	struct lpfc_sli_ring *pring = phba->sli4_hba.els_wq->pring;
 
+	if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP))
+		return;
 	spin_lock_irqsave(&phba->hbalock, iflag);
 	spin_lock(&phba->sli4_hba.abts_scsi_buf_list_lock);
 	list_for_each_entry_safe(psb, next_psb,
@@ -819,7 +823,7 @@ lpfc_new_scsi_buf_s4(struct lpfc_vport *vport, int num_to_alloc)
 		 * for the struct fcp_cmnd, struct fcp_rsp and the number
 		 * of bde's necessary to support the sg_tablesize.
 		 */
-		psb->data = pci_pool_zalloc(phba->lpfc_scsi_dma_buf_pool,
+		psb->data = pci_pool_zalloc(phba->lpfc_sg_dma_buf_pool,
 						GFP_KERNEL, &psb->dma_handle);
 		if (!psb->data) {
 			kfree(psb);
@@ -832,7 +836,7 @@ lpfc_new_scsi_buf_s4(struct lpfc_vport *vport, int num_to_alloc)
 		 */
 		if (phba->cfg_enable_bg  && (((unsigned long)(psb->data) &
 		    (unsigned long)(SLI4_PAGE_SIZE - 1)) != 0)) {
-			pci_pool_free(phba->lpfc_scsi_dma_buf_pool,
+			pci_pool_free(phba->lpfc_sg_dma_buf_pool,
 				      psb->data, psb->dma_handle);
 			kfree(psb);
 			break;
@@ -841,8 +845,8 @@ lpfc_new_scsi_buf_s4(struct lpfc_vport *vport, int num_to_alloc)
 
 		lxri = lpfc_sli4_next_xritag(phba);
 		if (lxri == NO_XRI) {
-			pci_pool_free(phba->lpfc_scsi_dma_buf_pool,
-			      psb->data, psb->dma_handle);
+			pci_pool_free(phba->lpfc_sg_dma_buf_pool,
+				      psb->data, psb->dma_handle);
 			kfree(psb);
 			break;
 		}
@@ -850,8 +854,8 @@ lpfc_new_scsi_buf_s4(struct lpfc_vport *vport, int num_to_alloc)
 		/* Allocate iotag for psb->cur_iocbq. */
 		iotag = lpfc_sli_next_iotag(phba, &psb->cur_iocbq);
 		if (iotag == 0) {
-			pci_pool_free(phba->lpfc_scsi_dma_buf_pool,
-				psb->data, psb->dma_handle);
+			pci_pool_free(phba->lpfc_sg_dma_buf_pool,
+				      psb->data, psb->dma_handle);
 			kfree(psb);
 			lpfc_printf_log(phba, KERN_ERR, LOG_FCP,
 					"3368 Failed to allocate IOTAG for"
@@ -920,7 +924,7 @@ lpfc_new_scsi_buf_s4(struct lpfc_vport *vport, int num_to_alloc)
 		phba->sli4_hba.scsi_xri_cnt++;
 		spin_unlock_irq(&phba->scsi_buf_list_get_lock);
 	}
-	lpfc_printf_log(phba, KERN_INFO, LOG_BG,
+	lpfc_printf_log(phba, KERN_INFO, LOG_BG | LOG_FCP,
 			"3021 Allocate %d out of %d requested new SCSI "
 			"buffers\n", bcnt, num_to_alloc);
 
@@ -3925,6 +3929,8 @@ lpfc_scsi_cmd_iocb_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pIocbIn,
 	struct Scsi_Host *shost;
 	uint32_t logit = LOG_FCP;
 
+	phba->fc4ScsiIoCmpls++;
+
 	/* Sanity check on return of outstanding command */
 	cmd = lpfc_cmd->pCmd;
 	if (!cmd)
@@ -4242,19 +4248,19 @@ lpfc_scsi_prep_cmnd(struct lpfc_vport *vport, struct lpfc_scsi_buf *lpfc_cmd,
 						vport->cfg_first_burst_size;
 			}
 			fcp_cmnd->fcpCntl3 = WRITE_DATA;
-			phba->fc4OutputRequests++;
+			phba->fc4ScsiOutputRequests++;
 		} else {
 			iocb_cmd->ulpCommand = CMD_FCP_IREAD64_CR;
 			iocb_cmd->ulpPU = PARM_READ_CHECK;
 			fcp_cmnd->fcpCntl3 = READ_DATA;
-			phba->fc4InputRequests++;
+			phba->fc4ScsiInputRequests++;
 		}
 	} else {
 		iocb_cmd->ulpCommand = CMD_FCP_ICMND64_CR;
 		iocb_cmd->un.fcpi.fcpi_parm = 0;
 		iocb_cmd->ulpPU = 0;
 		fcp_cmnd->fcpCntl3 = 0;
-		phba->fc4ControlRequests++;
+		phba->fc4ScsiControlRequests++;
 	}
 	if (phba->sli_rev == 3 &&
 	    !(phba->sli3_options & LPFC_SLI3_BG_ENABLED))
@@ -4468,7 +4474,7 @@ static __inline__ void lpfc_poll_rearm_timer(struct lpfc_hba * phba)
 	unsigned long  poll_tmo_expires =
 		(jiffies + msecs_to_jiffies(phba->cfg_poll_tmo));
 
-	if (!list_empty(&phba->sli.ring[LPFC_FCP_RING].txcmplq))
+	if (!list_empty(&phba->sli.sli3_ring[LPFC_FCP_RING].txcmplq))
 		mod_timer(&phba->fcp_poll_timer,
 			  poll_tmo_expires);
 }
@@ -4498,7 +4504,7 @@ void lpfc_poll_timeout(unsigned long ptr)
 
 	if (phba->cfg_poll & ENABLE_FCP_RING_POLLING) {
 		lpfc_sli_handle_fast_ring_event(phba,
-			&phba->sli.ring[LPFC_FCP_RING], HA_R0RE_REQ);
+			&phba->sli.sli3_ring[LPFC_FCP_RING], HA_R0RE_REQ);
 
 		if (phba->cfg_poll & DISABLE_FCP_RING_INT)
 			lpfc_poll_rearm_timer(phba);
@@ -4562,7 +4568,7 @@ lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd)
 	if (lpfc_cmd == NULL) {
 		lpfc_rampdown_queue_depth(phba);
 
-		lpfc_printf_vlog(vport, KERN_INFO, LOG_MISC,
+		lpfc_printf_vlog(vport, KERN_INFO, LOG_FCP_ERROR,
 				 "0707 driver's buffer pool is empty, "
 				 "IO busied\n");
 		goto out_host_busy;
@@ -4637,7 +4643,7 @@ lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd)
 	}
 	if (phba->cfg_poll & ENABLE_FCP_RING_POLLING) {
 		lpfc_sli_handle_fast_ring_event(phba,
-			&phba->sli.ring[LPFC_FCP_RING], HA_R0RE_REQ);
+			&phba->sli.sli3_ring[LPFC_FCP_RING], HA_R0RE_REQ);
 
 		if (phba->cfg_poll & DISABLE_FCP_RING_INT)
 			lpfc_poll_rearm_timer(phba);
@@ -4682,7 +4688,7 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd)
 	IOCB_t *cmd, *icmd;
 	int ret = SUCCESS, status = 0;
 	struct lpfc_sli_ring *pring_s4;
-	int ring_number, ret_val;
+	int ret_val;
 	unsigned long flags, iflags;
 	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(waitq);
 
@@ -4770,7 +4776,7 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd)
 	icmd->ulpClass = cmd->ulpClass;
 
 	/* ABTS WQE must go to the same WQ as the WQE to be aborted */
-	abtsiocb->fcp_wqidx = iocb->fcp_wqidx;
+	abtsiocb->hba_wqidx = iocb->hba_wqidx;
 	abtsiocb->iocb_flag |= LPFC_USE_FCPWQIDX;
 	if (iocb->iocb_flag & LPFC_IO_FOF)
 		abtsiocb->iocb_flag |= LPFC_IO_FOF;
@@ -4783,8 +4789,11 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd)
 	abtsiocb->iocb_cmpl = lpfc_sli_abort_fcp_cmpl;
 	abtsiocb->vport = vport;
 	if (phba->sli_rev == LPFC_SLI_REV4) {
-		ring_number = MAX_SLI3_CONFIGURED_RINGS + iocb->fcp_wqidx;
-		pring_s4 = &phba->sli.ring[ring_number];
+		pring_s4 = lpfc_sli4_calc_ring(phba, iocb);
+		if (pring_s4 == NULL) {
+			ret = FAILED;
+			goto out_unlock;
+		}
 		/* Note: both hbalock and ring_lock must be set here */
 		spin_lock_irqsave(&pring_s4->ring_lock, iflags);
 		ret_val = __lpfc_sli_issue_iocb(phba, pring_s4->ringno,
@@ -4806,7 +4815,7 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd)
 
 	if (phba->cfg_poll & DISABLE_FCP_RING_INT)
 		lpfc_sli_handle_fast_ring_event(phba,
-			&phba->sli.ring[LPFC_FCP_RING], HA_R0RE_REQ);
+			&phba->sli.sli3_ring[LPFC_FCP_RING], HA_R0RE_REQ);
 
 wait_for_cmpl:
 	lpfc_cmd->waitq = &waitq;
@@ -5106,7 +5115,7 @@ lpfc_reset_flush_io_context(struct lpfc_vport *vport, uint16_t tgt_id,
 	cnt = lpfc_sli_sum_iocb(vport, tgt_id, lun_id, context);
 	if (cnt)
 		lpfc_sli_abort_taskmgmt(vport,
-					&phba->sli.ring[phba->sli.fcp_ring],
+					&phba->sli.sli3_ring[LPFC_FCP_RING],
 					tgt_id, lun_id, context);
 	later = msecs_to_jiffies(2 * vport->cfg_devloss_tmo * 1000) + jiffies;
 	while (time_after(later, jiffies) && cnt) {
@@ -5535,7 +5544,7 @@ lpfc_slave_configure(struct scsi_device *sdev)
 
 	if (phba->cfg_poll & ENABLE_FCP_RING_POLLING) {
 		lpfc_sli_handle_fast_ring_event(phba,
-			&phba->sli.ring[LPFC_FCP_RING], HA_R0RE_REQ);
+			&phba->sli.sli3_ring[LPFC_FCP_RING], HA_R0RE_REQ);
 		if (phba->cfg_poll & DISABLE_FCP_RING_INT)
 			lpfc_poll_rearm_timer(phba);
 	}
@@ -5899,6 +5908,48 @@ lpfc_disable_oas_lun(struct lpfc_hba *phba, struct lpfc_name *vport_wwpn,
 	return false;
 }
 
+static int
+lpfc_no_command(struct Scsi_Host *shost, struct scsi_cmnd *cmnd)
+{
+	return SCSI_MLQUEUE_HOST_BUSY;
+}
+
+static int
+lpfc_no_handler(struct scsi_cmnd *cmnd)
+{
+	return FAILED;
+}
+
+static int
+lpfc_no_slave(struct scsi_device *sdev)
+{
+	return -ENODEV;
+}
+
+struct scsi_host_template lpfc_template_nvme = {
+	.module			= THIS_MODULE,
+	.name			= LPFC_DRIVER_NAME,
+	.proc_name		= LPFC_DRIVER_NAME,
+	.info			= lpfc_info,
+	.queuecommand		= lpfc_no_command,
+	.eh_abort_handler	= lpfc_no_handler,
+	.eh_device_reset_handler = lpfc_no_handler,
+	.eh_target_reset_handler = lpfc_no_handler,
+	.eh_bus_reset_handler	= lpfc_no_handler,
+	.eh_host_reset_handler  = lpfc_no_handler,
+	.slave_alloc		= lpfc_no_slave,
+	.slave_configure	= lpfc_no_slave,
+	.scan_finished		= lpfc_scan_finished,
+	.this_id		= -1,
+	.sg_tablesize		= 1,
+	.cmd_per_lun		= 1,
+	.use_clustering		= ENABLE_CLUSTERING,
+	.shost_attrs		= lpfc_hba_attrs,
+	.max_sectors		= 0xFFFF,
+	.vendor_id		= LPFC_NL_VENDOR_ID,
+	.track_queue_depth	= 0,
+};
+
 struct scsi_host_template lpfc_template_s3 = {
 	.module			= THIS_MODULE,
 	.name			= LPFC_DRIVER_NAME,
diff --git a/drivers/scsi/lpfc/lpfc_scsi.h b/drivers/scsi/lpfc/lpfc_scsi.h
index 4d7062258cf8a6..def0c0a5b17e10 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.h
+++ b/drivers/scsi/lpfc/lpfc_scsi.h
@@ -135,6 +135,8 @@ struct lpfc_scsi_buf {
 
 	uint32_t timeout;
 
+	uint16_t flags;  /* TBD convert exch_busy to flags */
+#define LPFC_SBUF_XBUSY         0x1     /* SLI4 hba reported XB on WCQE cmpl */
 	uint16_t exch_busy;     /* SLI4 hba reported XB on complete WCQE */
 	uint16_t status;	/* From IOCB Word 7- ulpStatus */
 	uint32_t result;	/* From IOCB Word 4. */
@@ -164,6 +166,8 @@ struct lpfc_scsi_buf {
 	 * Iotag is in here
 	 */
 	struct lpfc_iocbq cur_iocbq;
+	uint16_t cpu;
+
 	wait_queue_head_t *waitq;
 	unsigned long start_time;
 
@@ -186,5 +190,7 @@ struct lpfc_scsi_buf {
 #define NO_MORE_OAS_LUN		-1
 #define NOT_OAS_ENABLED_LUN	NO_MORE_OAS_LUN
 
+#define TXRDY_PAYLOAD_LEN	12
+
 int lpfc_sli4_scmd_to_wqidx_distr(struct lpfc_hba *phba,
 				  struct lpfc_scsi_buf *lpfc_cmd);
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 19543142c94ca9..52fa5c77f3bcfc 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -34,14 +34,17 @@
 #include <scsi/fc/fc_fs.h>
 #include <linux/aer.h>
 
+#include <linux/nvme-fc-driver.h>
+
 #include "lpfc_hw4.h"
 #include "lpfc_hw.h"
 #include "lpfc_sli.h"
 #include "lpfc_sli4.h"
 #include "lpfc_nl.h"
 #include "lpfc_disc.h"
-#include "lpfc_scsi.h"
 #include "lpfc.h"
+#include "lpfc_scsi.h"
+#include "lpfc_nvme.h"
 #include "lpfc_crtn.h"
 #include "lpfc_logmsg.h"
 #include "lpfc_compat.h"
@@ -67,14 +70,17 @@ static struct lpfc_iocbq *lpfc_sli4_els_wcqe_to_rspiocbq(struct lpfc_hba *,
 							 struct lpfc_iocbq *);
 static void lpfc_sli4_send_seq_to_ulp(struct lpfc_vport *,
 				      struct hbq_dmabuf *);
-static int lpfc_sli4_fp_handle_wcqe(struct lpfc_hba *, struct lpfc_queue *,
+static int lpfc_sli4_fp_handle_cqe(struct lpfc_hba *, struct lpfc_queue *,
 				    struct lpfc_cqe *);
-static int lpfc_sli4_post_els_sgl_list(struct lpfc_hba *, struct list_head *,
+static int lpfc_sli4_post_sgl_list(struct lpfc_hba *, struct list_head *,
 				       int);
 static void lpfc_sli4_hba_handle_eqe(struct lpfc_hba *, struct lpfc_eqe *,
 			uint32_t);
 static bool lpfc_sli4_mbox_completions_pending(struct lpfc_hba *phba);
 static bool lpfc_sli4_process_missed_mbox_completions(struct lpfc_hba *phba);
+static int lpfc_sli4_abort_nvme_io(struct lpfc_hba *phba,
+				   struct lpfc_sli_ring *pring,
+				   struct lpfc_iocbq *cmdiocb);
 
 static IOCB_t *
 lpfc_get_iocb_from_iocbq(struct lpfc_iocbq *iocbq)
@@ -456,7 +462,7 @@ lpfc_sli4_cq_release(struct lpfc_queue *q, bool arm)
  * on @q then this function will return -ENOMEM.
  * The caller is expected to hold the hbalock when calling this routine.
  **/
-static int
+int
 lpfc_sli4_rq_put(struct lpfc_queue *hq, struct lpfc_queue *dq,
 		 struct lpfc_rqe *hrqe, struct lpfc_rqe *drqe)
 {
@@ -602,7 +608,7 @@ __lpfc_sli_get_iocbq(struct lpfc_hba *phba)
  *
  * Returns sglq ponter = success, NULL = Failure.
  **/
-static struct lpfc_sglq *
+struct lpfc_sglq *
 __lpfc_clear_active_sglq(struct lpfc_hba *phba, uint16_t xritag)
 {
 	struct lpfc_sglq *sglq;
@@ -902,7 +908,7 @@ lpfc_set_rrq_active(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp,
 }
 
 /**
- * __lpfc_sli_get_sglq - Allocates an iocb object from sgl pool
+ * __lpfc_sli_get_els_sglq - Allocates an iocb object from sgl pool
  * @phba: Pointer to HBA context object.
  * @piocb: Pointer to the iocbq.
  *
@@ -912,9 +918,9 @@ lpfc_set_rrq_active(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp,
  * allocated sglq object else it returns NULL.
  **/
 static struct lpfc_sglq *
-__lpfc_sli_get_sglq(struct lpfc_hba *phba, struct lpfc_iocbq *piocbq)
+__lpfc_sli_get_els_sglq(struct lpfc_hba *phba, struct lpfc_iocbq *piocbq)
 {
-	struct list_head *lpfc_sgl_list = &phba->sli4_hba.lpfc_sgl_list;
+	struct list_head *lpfc_els_sgl_list = &phba->sli4_hba.lpfc_els_sgl_list;
 	struct lpfc_sglq *sglq = NULL;
 	struct lpfc_sglq *start_sglq = NULL;
 	struct lpfc_scsi_buf *lpfc_cmd;
@@ -938,18 +944,21 @@ __lpfc_sli_get_sglq(struct lpfc_hba *phba, struct lpfc_iocbq *piocbq)
 		ndlp = piocbq->context1;
 	}
 
-	list_remove_head(lpfc_sgl_list, sglq, struct lpfc_sglq, list);
+	spin_lock(&phba->sli4_hba.sgl_list_lock);
+	list_remove_head(lpfc_els_sgl_list, sglq, struct lpfc_sglq, list);
 	start_sglq = sglq;
 	while (!found) {
 		if (!sglq)
 			return NULL;
-		if (lpfc_test_rrq_active(phba, ndlp, sglq->sli4_lxritag)) {
+		if (ndlp && ndlp->active_rrqs_xri_bitmap &&
+		    test_bit(sglq->sli4_lxritag,
+		    ndlp->active_rrqs_xri_bitmap)) {
 			/* This xri has an rrq outstanding for this DID.
 			 * put it back in the list and get another xri.
 			 */
-			list_add_tail(&sglq->list, lpfc_sgl_list);
+			list_add_tail(&sglq->list, lpfc_els_sgl_list);
 			sglq = NULL;
-			list_remove_head(lpfc_sgl_list, sglq,
+			list_remove_head(lpfc_els_sgl_list, sglq,
 						struct lpfc_sglq, list);
 			if (sglq == start_sglq) {
 				sglq = NULL;
@@ -962,6 +971,7 @@ __lpfc_sli_get_sglq(struct lpfc_hba *phba, struct lpfc_iocbq *piocbq)
 		phba->sli4_hba.lpfc_sglq_active_list[sglq->sli4_lxritag] = sglq;
 		sglq->state = SGL_ALLOCATED;
 	}
+	spin_unlock(&phba->sli4_hba.sgl_list_lock);
 	return sglq;
 }
 
@@ -1002,7 +1012,7 @@ lpfc_sli_get_iocbq(struct lpfc_hba *phba)
  * this IO was aborted then the sglq entry it put on the
  * lpfc_abts_els_sgl_list until the CQ_ABORTED_XRI is received. If the
  * IO has good status or fails for any other reason then the sglq
- * entry is added to the free list (lpfc_sgl_list).
+ * entry is added to the free list (lpfc_els_sgl_list).
  **/
 static void
 __lpfc_sli_release_iocbq_s4(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq)
@@ -1010,7 +1020,7 @@ __lpfc_sli_release_iocbq_s4(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq)
 	struct lpfc_sglq *sglq;
 	size_t start_clean = offsetof(struct lpfc_iocbq, iocb);
 	unsigned long iflag = 0;
-	struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_ELS_RING];
+	struct lpfc_sli_ring *pring;
 
 	lockdep_assert_held(&phba->hbalock);
 
@@ -1021,21 +1031,24 @@ __lpfc_sli_release_iocbq_s4(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq)
 
 
 	if (sglq)  {
+		pring = phba->sli4_hba.els_wq->pring;
 		if ((iocbq->iocb_flag & LPFC_EXCHANGE_BUSY) &&
 			(sglq->state != SGL_XRI_ABORTED)) {
-			spin_lock_irqsave(&phba->sli4_hba.abts_sgl_list_lock,
-					iflag);
+			spin_lock_irqsave(&phba->sli4_hba.sgl_list_lock,
+					  iflag);
 			list_add(&sglq->list,
-				&phba->sli4_hba.lpfc_abts_els_sgl_list);
+				 &phba->sli4_hba.lpfc_abts_els_sgl_list);
 			spin_unlock_irqrestore(
-				&phba->sli4_hba.abts_sgl_list_lock, iflag);
+				&phba->sli4_hba.sgl_list_lock, iflag);
 		} else {
-			spin_lock_irqsave(&pring->ring_lock, iflag);
+			spin_lock_irqsave(&phba->sli4_hba.sgl_list_lock,
+					  iflag);
 			sglq->state = SGL_FREED;
 			sglq->ndlp = NULL;
 			list_add_tail(&sglq->list,
-				&phba->sli4_hba.lpfc_sgl_list);
-			spin_unlock_irqrestore(&pring->ring_lock, iflag);
+				      &phba->sli4_hba.lpfc_els_sgl_list);
+			spin_unlock_irqrestore(
+				&phba->sli4_hba.sgl_list_lock, iflag);
 
 			/* Check if TXQ queue needs to be serviced */
 			if (!list_empty(&pring->txq))
@@ -1043,13 +1056,13 @@ __lpfc_sli_release_iocbq_s4(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq)
 		}
 	}
 
-
 	/*
 	 * Clean all volatile data fields, preserve iotag and node struct.
 	 */
 	memset((char *)iocbq + start_clean, 0, sizeof(*iocbq) - start_clean);
 	iocbq->sli4_lxritag = NO_XRI;
 	iocbq->sli4_xritag = NO_XRI;
+	iocbq->iocb_flag &= ~(LPFC_IO_NVME | LPFC_IO_NVME_LS);
 	list_add_tail(&iocbq->list, &phba->lpfc_iocb_list);
 }
 
@@ -1639,7 +1652,7 @@ lpfc_sli_resume_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring)
 
 	if (lpfc_is_link_up(phba) &&
 	    (!list_empty(&pring->txq)) &&
-	    (pring->ringno != phba->sli.fcp_ring ||
+	    (pring->ringno != LPFC_FCP_RING ||
 	     phba->sli.sli_flag & LPFC_PROCESS_LA)) {
 
 		while ((iocb = lpfc_sli_next_iocb_slot(phba, pring)) &&
@@ -1718,7 +1731,6 @@ lpfc_sli_hbqbuf_free_all(struct lpfc_hba *phba)
 	struct hbq_dmabuf *hbq_buf;
 	unsigned long flags;
 	int i, hbq_count;
-	uint32_t hbqno;
 
 	hbq_count = lpfc_sli_hbq_count();
 	/* Return all memory used by all HBQs */
@@ -1732,24 +1744,6 @@ lpfc_sli_hbqbuf_free_all(struct lpfc_hba *phba)
 		}
 		phba->hbqs[i].buffer_count = 0;
 	}
-	/* Return all HBQ buffer that are in-fly */
-	list_for_each_entry_safe(dmabuf, next_dmabuf, &phba->rb_pend_list,
-				 list) {
-		hbq_buf = container_of(dmabuf, struct hbq_dmabuf, dbuf);
-		list_del(&hbq_buf->dbuf.list);
-		if (hbq_buf->tag == -1) {
-			(phba->hbqs[LPFC_ELS_HBQ].hbq_free_buffer)
-				(phba, hbq_buf);
-		} else {
-			hbqno = hbq_buf->tag >> 16;
-			if (hbqno >= LPFC_MAX_HBQS)
-				(phba->hbqs[LPFC_ELS_HBQ].hbq_free_buffer)
-					(phba, hbq_buf);
-			else
-				(phba->hbqs[hbqno].hbq_free_buffer)(phba,
-					hbq_buf);
-		}
-	}
 
 	/* Mark the HBQs not in use */
 	phba->hbq_in_use = 0;
@@ -1802,7 +1796,7 @@ lpfc_sli_hbq_to_firmware_s3(struct lpfc_hba *phba, uint32_t hbqno,
 
 		hbqe->bde.addrHigh = le32_to_cpu(putPaddrHigh(physaddr));
 		hbqe->bde.addrLow  = le32_to_cpu(putPaddrLow(physaddr));
-		hbqe->bde.tus.f.bdeSize = hbq_buf->size;
+		hbqe->bde.tus.f.bdeSize = hbq_buf->total_size;
 		hbqe->bde.tus.f.bdeFlags = 0;
 		hbqe->bde.tus.w = le32_to_cpu(hbqe->bde.tus.w);
 		hbqe->buffer_tag = le32_to_cpu(hbq_buf->tag);
@@ -1834,17 +1828,23 @@ lpfc_sli_hbq_to_firmware_s4(struct lpfc_hba *phba, uint32_t hbqno,
 	int rc;
 	struct lpfc_rqe hrqe;
 	struct lpfc_rqe drqe;
+	struct lpfc_queue *hrq;
+	struct lpfc_queue *drq;
+
+	if (hbqno != LPFC_ELS_HBQ)
+		return 1;
+	hrq = phba->sli4_hba.hdr_rq;
+	drq = phba->sli4_hba.dat_rq;
 
 	lockdep_assert_held(&phba->hbalock);
 	hrqe.address_lo = putPaddrLow(hbq_buf->hbuf.phys);
 	hrqe.address_hi = putPaddrHigh(hbq_buf->hbuf.phys);
 	drqe.address_lo = putPaddrLow(hbq_buf->dbuf.phys);
 	drqe.address_hi = putPaddrHigh(hbq_buf->dbuf.phys);
-	rc = lpfc_sli4_rq_put(phba->sli4_hba.hdr_rq, phba->sli4_hba.dat_rq,
-			      &hrqe, &drqe);
+	rc = lpfc_sli4_rq_put(hrq, drq, &hrqe, &drqe);
 	if (rc < 0)
 		return rc;
-	hbq_buf->tag = rc;
+	hbq_buf->tag = (rc | (hbqno << 16));
 	list_add_tail(&hbq_buf->dbuf.list, &phba->hbqs[hbqno].hbq_buffer_list);
 	return 0;
 }
@@ -1861,22 +1861,9 @@ static struct lpfc_hbq_init lpfc_els_hbq = {
 	.add_count = 40,
 };
 
-/* HBQ for the extra ring if needed */
-static struct lpfc_hbq_init lpfc_extra_hbq = {
-	.rn = 1,
-	.entry_count = 200,
-	.mask_count = 0,
-	.profile = 0,
-	.ring_mask = (1 << LPFC_EXTRA_RING),
-	.buffer_count = 0,
-	.init_count = 0,
-	.add_count = 5,
-};
-
 /* Array of HBQs */
 struct lpfc_hbq_init *lpfc_hbq_defs[] = {
 	&lpfc_els_hbq,
-	&lpfc_extra_hbq,
 };
 
 /**
@@ -2713,7 +2700,7 @@ static struct lpfc_iocbq *
 lpfc_sli_iocbq_lookup_by_tag(struct lpfc_hba *phba,
 			     struct lpfc_sli_ring *pring, uint16_t iotag)
 {
-	struct lpfc_iocbq *cmd_iocb;
+	struct lpfc_iocbq *cmd_iocb = NULL;
 
 	lockdep_assert_held(&phba->hbalock);
 	if (iotag != 0 && iotag <= phba->sli.last_iotag) {
@@ -2727,8 +2714,10 @@ lpfc_sli_iocbq_lookup_by_tag(struct lpfc_hba *phba,
 	}
 
 	lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
-			"0372 iotag x%x is out of range: max iotag (x%x)\n",
-			iotag, phba->sli.last_iotag);
+			"0372 iotag x%x lookup error: max iotag (x%x) "
+			"iocb_flag x%x\n",
+			iotag, phba->sli.last_iotag,
+			cmd_iocb ? cmd_iocb->iocb_flag : 0xffff);
 	return NULL;
 }
 
@@ -3597,6 +3586,33 @@ lpfc_sli_abort_iocb_ring(struct lpfc_hba *phba, struct lpfc_sli_ring *pring)
 			      IOERR_SLI_ABORTED);
 }
 
+/**
+ * lpfc_sli_abort_wqe_ring - Abort all iocbs in the ring
+ * @phba: Pointer to HBA context object.
+ * @pring: Pointer to driver SLI ring object.
+ *
+ * This function aborts all iocbs in the given ring and frees all the iocb
+ * objects in txq. This function issues an abort iocb for all the iocb commands
+ * in txcmplq. The iocbs in the txcmplq is not guaranteed to complete before
+ * the return of this function. The caller is not required to hold any locks.
+ **/
+void
+lpfc_sli_abort_wqe_ring(struct lpfc_hba *phba, struct lpfc_sli_ring *pring)
+{
+	LIST_HEAD(completions);
+	struct lpfc_iocbq *iocb, *next_iocb;
+
+	if (pring->ringno == LPFC_ELS_RING)
+		lpfc_fabric_abort_hba(phba);
+
+	spin_lock_irq(&phba->hbalock);
+	/* Next issue ABTS for everything on the txcmplq */
+	list_for_each_entry_safe(iocb, next_iocb, &pring->txcmplq, list)
+		lpfc_sli4_abort_nvme_io(phba, pring, iocb);
+	spin_unlock_irq(&phba->hbalock);
+}
+
+
 /**
  * lpfc_sli_abort_fcp_rings - Abort all iocbs in all FCP rings
  * @phba: Pointer to HBA context object.
@@ -3617,15 +3633,40 @@ lpfc_sli_abort_fcp_rings(struct lpfc_hba *phba)
 	/* Look on all the FCP Rings for the iotag */
 	if (phba->sli_rev >= LPFC_SLI_REV4) {
 		for (i = 0; i < phba->cfg_fcp_io_channel; i++) {
-			pring = &psli->ring[i + MAX_SLI3_CONFIGURED_RINGS];
+			pring = phba->sli4_hba.fcp_wq[i]->pring;
 			lpfc_sli_abort_iocb_ring(phba, pring);
 		}
 	} else {
-		pring = &psli->ring[psli->fcp_ring];
+		pring = &psli->sli3_ring[LPFC_FCP_RING];
 		lpfc_sli_abort_iocb_ring(phba, pring);
 	}
 }
 
+/**
+ * lpfc_sli_abort_nvme_rings - Abort all wqes in all NVME rings
+ * @phba: Pointer to HBA context object.
+ *
+ * This function aborts all wqes in NVME rings. This function issues an
+ * abort wqe for all the outstanding IO commands in txcmplq. The iocbs in
+ * the txcmplq is not guaranteed to complete before the return of this
+ * function. The caller is not required to hold any locks.
+ **/
+void
+lpfc_sli_abort_nvme_rings(struct lpfc_hba *phba)
+{
+	struct lpfc_sli_ring  *pring;
+	uint32_t i;
+
+	if (phba->sli_rev < LPFC_SLI_REV4)
+		return;
+
+	/* Abort all IO on each NVME ring. */
+	for (i = 0; i < phba->cfg_nvme_io_channel; i++) {
+		pring = phba->sli4_hba.nvme_wq[i]->pring;
+		lpfc_sli_abort_wqe_ring(phba, pring);
+	}
+}
+
 
 /**
  * lpfc_sli_flush_fcp_rings - flush all iocbs in the fcp ring
@@ -3654,7 +3695,7 @@ lpfc_sli_flush_fcp_rings(struct lpfc_hba *phba)
 	/* Look on all the FCP Rings for the iotag */
 	if (phba->sli_rev >= LPFC_SLI_REV4) {
 		for (i = 0; i < phba->cfg_fcp_io_channel; i++) {
-			pring = &psli->ring[i + MAX_SLI3_CONFIGURED_RINGS];
+			pring = phba->sli4_hba.fcp_wq[i]->pring;
 
 			spin_lock_irq(&pring->ring_lock);
 			/* Retrieve everything on txq */
@@ -3675,7 +3716,7 @@ lpfc_sli_flush_fcp_rings(struct lpfc_hba *phba)
 					      IOERR_SLI_DOWN);
 		}
 	} else {
-		pring = &psli->ring[psli->fcp_ring];
+		pring = &psli->sli3_ring[LPFC_FCP_RING];
 
 		spin_lock_irq(&phba->hbalock);
 		/* Retrieve everything on txq */
@@ -3695,6 +3736,51 @@ lpfc_sli_flush_fcp_rings(struct lpfc_hba *phba)
 	}
 }
 
+/**
+ * lpfc_sli_flush_nvme_rings - flush all wqes in the nvme rings
+ * @phba: Pointer to HBA context object.
+ *
+ * This function flushes all wqes in the nvme rings and frees all resources
+ * in the txcmplq. This function does not issue abort wqes for the IO
+ * commands in txcmplq, they will just be returned with
+ * IOERR_SLI_DOWN. This function is invoked with EEH when device's PCI
+ * slot has been permanently disabled.
+ **/
+void
+lpfc_sli_flush_nvme_rings(struct lpfc_hba *phba)
+{
+	LIST_HEAD(txcmplq);
+	struct lpfc_sli_ring  *pring;
+	uint32_t i;
+
+	if (phba->sli_rev < LPFC_SLI_REV4)
+		return;
+
+	/* Hint to other driver operations that a flush is in progress. */
+	spin_lock_irq(&phba->hbalock);
+	phba->hba_flag |= HBA_NVME_IOQ_FLUSH;
+	spin_unlock_irq(&phba->hbalock);
+
+	/* Cycle through all NVME rings and complete each IO with
+	 * a local driver reason code.  This is a flush so no
+	 * abort exchange to FW.
+	 */
+	for (i = 0; i < phba->cfg_nvme_io_channel; i++) {
+		pring = phba->sli4_hba.nvme_wq[i]->pring;
+
+		/* Retrieve everything on the txcmplq */
+		spin_lock_irq(&pring->ring_lock);
+		list_splice_init(&pring->txcmplq, &txcmplq);
+		pring->txcmplq_cnt = 0;
+		spin_unlock_irq(&pring->ring_lock);
+
+		/* Flush the txcmpq &&&PAE */
+		lpfc_sli_cancel_iocbs(phba, &txcmplq,
+				      IOSTAT_LOCAL_REJECT,
+				      IOERR_SLI_DOWN);
+	}
+}
+
 /**
  * lpfc_sli_brdready_s3 - Check for sli3 host ready status
  * @phba: Pointer to HBA context object.
@@ -4069,7 +4155,7 @@ lpfc_sli_brdreset(struct lpfc_hba *phba)
 
 	/* Initialize relevant SLI info */
 	for (i = 0; i < psli->num_rings; i++) {
-		pring = &psli->ring[i];
+		pring = &psli->sli3_ring[i];
 		pring->flag = 0;
 		pring->sli.sli3.rspidx = 0;
 		pring->sli.sli3.next_cmdidx  = 0;
@@ -4498,10 +4584,11 @@ static int
 lpfc_sli4_rb_setup(struct lpfc_hba *phba)
 {
 	phba->hbq_in_use = 1;
-	phba->hbqs[0].entry_count = lpfc_hbq_defs[0]->entry_count;
+	phba->hbqs[LPFC_ELS_HBQ].entry_count =
+		lpfc_hbq_defs[LPFC_ELS_HBQ]->entry_count;
 	phba->hbq_count = 1;
+	lpfc_sli_hbqbuf_init_hbqs(phba, LPFC_ELS_HBQ);
 	/* Initially populate or replenish the HBQs */
-	lpfc_sli_hbqbuf_init_hbqs(phba, 0);
 	return 0;
 }
 
@@ -5107,27 +5194,31 @@ lpfc_sli4_retrieve_pport_name(struct lpfc_hba *phba)
 static void
 lpfc_sli4_arm_cqeq_intr(struct lpfc_hba *phba)
 {
-	int fcp_eqidx;
+	int qidx;
 
 	lpfc_sli4_cq_release(phba->sli4_hba.mbx_cq, LPFC_QUEUE_REARM);
 	lpfc_sli4_cq_release(phba->sli4_hba.els_cq, LPFC_QUEUE_REARM);
-	fcp_eqidx = 0;
-	if (phba->sli4_hba.fcp_cq) {
-		do {
-			lpfc_sli4_cq_release(phba->sli4_hba.fcp_cq[fcp_eqidx],
-					     LPFC_QUEUE_REARM);
-		} while (++fcp_eqidx < phba->cfg_fcp_io_channel);
-	}
+	if (phba->sli4_hba.nvmels_cq)
+		lpfc_sli4_cq_release(phba->sli4_hba.nvmels_cq,
+						LPFC_QUEUE_REARM);
+
+	if (phba->sli4_hba.fcp_cq)
+		for (qidx = 0; qidx < phba->cfg_fcp_io_channel; qidx++)
+			lpfc_sli4_cq_release(phba->sli4_hba.fcp_cq[qidx],
+						LPFC_QUEUE_REARM);
+
+	if (phba->sli4_hba.nvme_cq)
+		for (qidx = 0; qidx < phba->cfg_nvme_io_channel; qidx++)
+			lpfc_sli4_cq_release(phba->sli4_hba.nvme_cq[qidx],
+						LPFC_QUEUE_REARM);
 
 	if (phba->cfg_fof)
 		lpfc_sli4_cq_release(phba->sli4_hba.oas_cq, LPFC_QUEUE_REARM);
 
-	if (phba->sli4_hba.hba_eq) {
-		for (fcp_eqidx = 0; fcp_eqidx < phba->cfg_fcp_io_channel;
-		     fcp_eqidx++)
-			lpfc_sli4_eq_release(phba->sli4_hba.hba_eq[fcp_eqidx],
-					     LPFC_QUEUE_REARM);
-	}
+	if (phba->sli4_hba.hba_eq)
+		for (qidx = 0; qidx < phba->io_channel_irqs; qidx++)
+			lpfc_sli4_eq_release(phba->sli4_hba.hba_eq[qidx],
+						LPFC_QUEUE_REARM);
 
 	if (phba->cfg_fof)
 		lpfc_sli4_eq_release(phba->sli4_hba.fof_eq, LPFC_QUEUE_REARM);
@@ -5560,9 +5651,13 @@ lpfc_sli4_alloc_extent(struct lpfc_hba *phba, uint16_t type)
 		rsrc_blks->rsrc_size = rsrc_size;
 		list_add_tail(&rsrc_blks->list, ext_blk_list);
 		rsrc_start = rsrc_id;
-		if ((type == LPFC_RSC_TYPE_FCOE_XRI) && (j == 0))
+		if ((type == LPFC_RSC_TYPE_FCOE_XRI) && (j == 0)) {
 			phba->sli4_hba.scsi_xri_start = rsrc_start +
-				lpfc_sli4_get_els_iocb_cnt(phba);
+				lpfc_sli4_get_iocb_cnt(phba);
+			phba->sli4_hba.nvme_xri_start =
+				phba->sli4_hba.scsi_xri_start +
+				phba->sli4_hba.scsi_xri_max;
+		}
 
 		while (rsrc_id < (rsrc_start + rsrc_size)) {
 			ids[j] = rsrc_id;
@@ -5578,6 +5673,8 @@ lpfc_sli4_alloc_extent(struct lpfc_hba *phba, uint16_t type)
 	return rc;
 }
 
+
+
 /**
  * lpfc_sli4_dealloc_extent - Deallocate an SLI4 resource extent.
  * @phba: Pointer to HBA context object.
@@ -6156,42 +6253,45 @@ lpfc_sli4_get_allocated_extnts(struct lpfc_hba *phba, uint16_t type,
 }
 
 /**
- * lpfc_sli4_repost_els_sgl_list - Repsot the els buffers sgl pages as block
+ * lpfc_sli4_repost_sgl_list - Repsot the buffers sgl pages as block
  * @phba: pointer to lpfc hba data structure.
+ * @pring: Pointer to driver SLI ring object.
+ * @sgl_list: linked link of sgl buffers to post
+ * @cnt: number of linked list buffers
  *
- * This routine walks the list of els buffers that have been allocated and
+ * This routine walks the list of buffers that have been allocated and
  * repost them to the port by using SGL block post. This is needed after a
  * pci_function_reset/warm_start or start. It attempts to construct blocks
- * of els buffer sgls which contains contiguous xris and uses the non-embedded
- * SGL block post mailbox commands to post them to the port. For single els
+ * of buffer sgls which contains contiguous xris and uses the non-embedded
+ * SGL block post mailbox commands to post them to the port. For single
  * buffer sgl with non-contiguous xri, if any, it shall use embedded SGL post
  * mailbox command for posting.
  *
  * Returns: 0 = success, non-zero failure.
  **/
 static int
-lpfc_sli4_repost_els_sgl_list(struct lpfc_hba *phba)
+lpfc_sli4_repost_sgl_list(struct lpfc_hba *phba,
+			  struct list_head *sgl_list, int cnt)
 {
 	struct lpfc_sglq *sglq_entry = NULL;
 	struct lpfc_sglq *sglq_entry_next = NULL;
 	struct lpfc_sglq *sglq_entry_first = NULL;
-	int status, total_cnt, post_cnt = 0, num_posted = 0, block_cnt = 0;
+	int status, total_cnt;
+	int post_cnt = 0, num_posted = 0, block_cnt = 0;
 	int last_xritag = NO_XRI;
-	struct lpfc_sli_ring *pring;
 	LIST_HEAD(prep_sgl_list);
 	LIST_HEAD(blck_sgl_list);
 	LIST_HEAD(allc_sgl_list);
 	LIST_HEAD(post_sgl_list);
 	LIST_HEAD(free_sgl_list);
 
-	pring = &phba->sli.ring[LPFC_ELS_RING];
 	spin_lock_irq(&phba->hbalock);
-	spin_lock(&pring->ring_lock);
-	list_splice_init(&phba->sli4_hba.lpfc_sgl_list, &allc_sgl_list);
-	spin_unlock(&pring->ring_lock);
+	spin_lock(&phba->sli4_hba.sgl_list_lock);
+	list_splice_init(sgl_list, &allc_sgl_list);
+	spin_unlock(&phba->sli4_hba.sgl_list_lock);
 	spin_unlock_irq(&phba->hbalock);
 
-	total_cnt = phba->sli4_hba.els_xri_cnt;
+	total_cnt = cnt;
 	list_for_each_entry_safe(sglq_entry, sglq_entry_next,
 				 &allc_sgl_list, list) {
 		list_del_init(&sglq_entry->list);
@@ -6220,8 +6320,8 @@ lpfc_sli4_repost_els_sgl_list(struct lpfc_hba *phba)
 		/* keep track of last sgl's xritag */
 		last_xritag = sglq_entry->sli4_xritag;
 
-		/* end of repost sgl list condition for els buffers */
-		if (num_posted == phba->sli4_hba.els_xri_cnt) {
+		/* end of repost sgl list condition for buffers */
+		if (num_posted == total_cnt) {
 			if (post_cnt == 0) {
 				list_splice_init(&prep_sgl_list,
 						 &blck_sgl_list);
@@ -6238,7 +6338,7 @@ lpfc_sli4_repost_els_sgl_list(struct lpfc_hba *phba)
 					/* Failure, put sgl to free list */
 					lpfc_printf_log(phba, KERN_WARNING,
 						LOG_SLI,
-						"3159 Failed to post els "
+						"3159 Failed to post "
 						"sgl, xritag:x%x\n",
 						sglq_entry->sli4_xritag);
 					list_add_tail(&sglq_entry->list,
@@ -6252,9 +6352,9 @@ lpfc_sli4_repost_els_sgl_list(struct lpfc_hba *phba)
 		if (post_cnt == 0)
 			continue;
 
-		/* post the els buffer list sgls as a block */
-		status = lpfc_sli4_post_els_sgl_list(phba, &blck_sgl_list,
-						     post_cnt);
+		/* post the buffer list sgls as a block */
+		status = lpfc_sli4_post_sgl_list(phba, &blck_sgl_list,
+						 post_cnt);
 
 		if (!status) {
 			/* success, put sgl list to posted sgl list */
@@ -6265,7 +6365,7 @@ lpfc_sli4_repost_els_sgl_list(struct lpfc_hba *phba)
 							    struct lpfc_sglq,
 							    list);
 			lpfc_printf_log(phba, KERN_WARNING, LOG_SLI,
-					"3160 Failed to post els sgl-list, "
+					"3160 Failed to post sgl-list, "
 					"xritag:x%x-x%x\n",
 					sglq_entry_first->sli4_xritag,
 					(sglq_entry_first->sli4_xritag +
@@ -6278,29 +6378,28 @@ lpfc_sli4_repost_els_sgl_list(struct lpfc_hba *phba)
 		if (block_cnt == 0)
 			last_xritag = NO_XRI;
 
-		/* reset els sgl post count for next round of posting */
+		/* reset sgl post count for next round of posting */
 		post_cnt = 0;
 	}
-	/* update the number of XRIs posted for ELS */
-	phba->sli4_hba.els_xri_cnt = total_cnt;
 
-	/* free the els sgls failed to post */
+	/* free the sgls failed to post */
 	lpfc_free_sgl_list(phba, &free_sgl_list);
 
-	/* push els sgls posted to the availble list */
+	/* push sgls posted to the available list */
 	if (!list_empty(&post_sgl_list)) {
 		spin_lock_irq(&phba->hbalock);
-		spin_lock(&pring->ring_lock);
-		list_splice_init(&post_sgl_list,
-				 &phba->sli4_hba.lpfc_sgl_list);
-		spin_unlock(&pring->ring_lock);
+		spin_lock(&phba->sli4_hba.sgl_list_lock);
+		list_splice_init(&post_sgl_list, sgl_list);
+		spin_unlock(&phba->sli4_hba.sgl_list_lock);
 		spin_unlock_irq(&phba->hbalock);
 	} else {
 		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
-				"3161 Failure to post els sgl to port.\n");
+				"3161 Failure to post sgl to port.\n");
 		return -EIO;
 	}
-	return 0;
+
+	/* return the number of XRIs actually posted */
+	return total_cnt;
 }
 
 void
@@ -6622,35 +6721,78 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
 	fc_host_node_name(shost) = wwn_to_u64(vport->fc_nodename.u.wwn);
 	fc_host_port_name(shost) = wwn_to_u64(vport->fc_portname.u.wwn);
 
-	/* update host els and scsi xri-sgl sizes and mappings */
-	rc = lpfc_sli4_xri_sgl_update(phba);
+	/* Create all the SLI4 queues */
+	rc = lpfc_sli4_queue_create(phba);
+	if (rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"3089 Failed to allocate queues\n");
+		rc = -ENODEV;
+		goto out_free_mbox;
+	}
+	/* Set up all the queues to the device */
+	rc = lpfc_sli4_queue_setup(phba);
+	if (unlikely(rc)) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
+				"0381 Error %d during queue setup.\n ", rc);
+		goto out_stop_timers;
+	}
+	/* Initialize the driver internal SLI layer lists. */
+	lpfc_sli4_setup(phba);
+	lpfc_sli4_queue_init(phba);
+
+	/* update host els xri-sgl sizes and mappings */
+	rc = lpfc_sli4_els_sgl_update(phba);
 	if (unlikely(rc)) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
 				"1400 Failed to update xri-sgl size and "
 				"mapping: %d\n", rc);
-		goto out_free_mbox;
+		goto out_destroy_queue;
 	}
 
 	/* register the els sgl pool to the port */
-	rc = lpfc_sli4_repost_els_sgl_list(phba);
-	if (unlikely(rc)) {
+	rc = lpfc_sli4_repost_sgl_list(phba, &phba->sli4_hba.lpfc_els_sgl_list,
+				       phba->sli4_hba.els_xri_cnt);
+	if (unlikely(rc < 0)) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
 				"0582 Error %d during els sgl post "
 				"operation\n", rc);
 		rc = -ENODEV;
-		goto out_free_mbox;
+		goto out_destroy_queue;
 	}
+	phba->sli4_hba.els_xri_cnt = rc;
 
-	/* register the allocated scsi sgl pool to the port */
-	rc = lpfc_sli4_repost_scsi_sgl_list(phba);
-	if (unlikely(rc)) {
-		lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
-				"0383 Error %d during scsi sgl post "
-				"operation\n", rc);
-		/* Some Scsi buffers were moved to the abort scsi list */
-		/* A pci function reset will repost them */
-		rc = -ENODEV;
-		goto out_free_mbox;
+	if (phba->nvmet_support == 0) {
+		/* update host scsi xri-sgl sizes and mappings */
+		rc = lpfc_sli4_scsi_sgl_update(phba);
+		if (unlikely(rc)) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
+					"6309 Failed to update scsi-sgl size "
+					"and mapping: %d\n", rc);
+			goto out_destroy_queue;
+		}
+
+		/* update host nvme xri-sgl sizes and mappings */
+		rc = lpfc_sli4_nvme_sgl_update(phba);
+		if (unlikely(rc)) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
+					"6082 Failed to update nvme-sgl size "
+					"and mapping: %d\n", rc);
+			goto out_destroy_queue;
+		}
+	}
+
+	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP) {
+		/* register the allocated scsi sgl pool to the port */
+		rc = lpfc_sli4_repost_scsi_sgl_list(phba);
+		if (unlikely(rc)) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
+					"0383 Error %d during scsi sgl post "
+					"operation\n", rc);
+			/* Some Scsi buffers were moved to abort scsi list */
+			/* A pci function reset will repost them */
+			rc = -ENODEV;
+			goto out_destroy_queue;
+		}
 	}
 
 	/* Post the rpi header region to the device. */
@@ -6660,24 +6802,26 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
 				"0393 Error %d during rpi post operation\n",
 				rc);
 		rc = -ENODEV;
-		goto out_free_mbox;
+		goto out_destroy_queue;
 	}
 	lpfc_sli4_node_prep(phba);
 
-	/* Create all the SLI4 queues */
-	rc = lpfc_sli4_queue_create(phba);
-	if (rc) {
-		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"3089 Failed to allocate queues\n");
-		rc = -ENODEV;
-		goto out_stop_timers;
-	}
-	/* Set up all the queues to the device */
-	rc = lpfc_sli4_queue_setup(phba);
-	if (unlikely(rc)) {
-		lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
-				"0381 Error %d during queue setup.\n ", rc);
-		goto out_destroy_queue;
+	if (!(phba->hba_flag & HBA_FCOE_MODE)) {
+		if (phba->nvmet_support == 0) {
+			/*
+			 * The FC Port needs to register FCFI (index 0)
+			 */
+			lpfc_reg_fcfi(phba, mboxq);
+			mboxq->vport = phba->pport;
+			rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL);
+			if (rc != MBX_SUCCESS)
+				goto out_unset_queue;
+			rc = 0;
+			phba->fcf.fcfi = bf_get(lpfc_reg_fcfi_fcfi,
+						&mboxq->u.mqe.un.reg_fcfi);
+		}
+		/* Check if the port is configured to be disabled */
+		lpfc_sli_read_link_ste(phba);
 	}
 
 	/* Arm the CQs and then EQs on device */
@@ -6731,23 +6875,6 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
 		rc = 0;
 	}
 
-	if (!(phba->hba_flag & HBA_FCOE_MODE)) {
-		/*
-		 * The FC Port needs to register FCFI (index 0)
-		 */
-		lpfc_reg_fcfi(phba, mboxq);
-		mboxq->vport = phba->pport;
-		rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL);
-		if (rc != MBX_SUCCESS)
-			goto out_unset_queue;
-		rc = 0;
-		phba->fcf.fcfi = bf_get(lpfc_reg_fcfi_fcfi,
-					&mboxq->u.mqe.un.reg_fcfi);
-
-		/* Check if the port is configured to be disabled */
-		lpfc_sli_read_link_ste(phba);
-	}
-
 	/*
 	 * The port is ready, set the host's link state to LINK_DOWN
 	 * in preparation for link interrupts.
@@ -6884,7 +7011,7 @@ lpfc_sli4_process_missed_mbox_completions(struct lpfc_hba *phba)
 	/* Find the eq associated with the mcq */
 
 	if (phba->sli4_hba.hba_eq)
-		for (eqidx = 0; eqidx < phba->cfg_fcp_io_channel; eqidx++)
+		for (eqidx = 0; eqidx < phba->io_channel_irqs; eqidx++)
 			if (phba->sli4_hba.hba_eq[eqidx]->queue_id ==
 			    phba->sli4_hba.mbx_cq->assoc_qid) {
 				fpeq = phba->sli4_hba.hba_eq[eqidx];
@@ -7243,16 +7370,15 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox,
 				= MAILBOX_HBA_EXT_OFFSET;
 
 		/* Copy the mailbox extension data */
-		if (pmbox->in_ext_byte_len && pmbox->context2) {
+		if (pmbox->in_ext_byte_len && pmbox->context2)
 			lpfc_memcpy_to_slim(phba->MBslimaddr +
 				MAILBOX_HBA_EXT_OFFSET,
 				pmbox->context2, pmbox->in_ext_byte_len);
 
-		}
-		if (mbx->mbxCommand == MBX_CONFIG_PORT) {
+		if (mbx->mbxCommand == MBX_CONFIG_PORT)
 			/* copy command data into host mbox for cmpl */
-			lpfc_sli_pcimem_bcopy(mbx, phba->mbox, MAILBOX_CMD_SIZE);
-		}
+			lpfc_sli_pcimem_bcopy(mbx, phba->mbox,
+					      MAILBOX_CMD_SIZE);
 
 		/* First copy mbox command data to HBA SLIM, skip past first
 		   word */
@@ -7266,10 +7392,9 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox,
 		writel(ldata, to_slim);
 		readl(to_slim); /* flush */
 
-		if (mbx->mbxCommand == MBX_CONFIG_PORT) {
+		if (mbx->mbxCommand == MBX_CONFIG_PORT)
 			/* switch over to host mailbox */
 			psli->sli_flag |= LPFC_SLI_ACTIVE;
-		}
 	}
 
 	wmb();
@@ -8060,7 +8185,7 @@ __lpfc_sli_issue_iocb_s3(struct lpfc_hba *phba, uint32_t ring_number,
 {
 	struct lpfc_iocbq *nextiocb;
 	IOCB_t *iocb;
-	struct lpfc_sli_ring *pring = &phba->sli.ring[ring_number];
+	struct lpfc_sli_ring *pring = &phba->sli.sli3_ring[ring_number];
 
 	lockdep_assert_held(&phba->hbalock);
 
@@ -8134,7 +8259,7 @@ __lpfc_sli_issue_iocb_s3(struct lpfc_hba *phba, uint32_t ring_number,
 	 * For FCP commands, we must be in a state where we can process link
 	 * attention events.
 	 */
-	} else if (unlikely(pring->ringno == phba->sli.fcp_ring &&
+	} else if (unlikely(pring->ringno == LPFC_FCP_RING &&
 			    !(phba->sli.sli_flag & LPFC_PROCESS_LA))) {
 		goto iocb_busy;
 	}
@@ -8871,9 +8996,21 @@ __lpfc_sli_issue_iocb_s4(struct lpfc_hba *phba, uint32_t ring_number,
 	union lpfc_wqe *wqe;
 	union lpfc_wqe128 wqe128;
 	struct lpfc_queue *wq;
-	struct lpfc_sli_ring *pring = &phba->sli.ring[ring_number];
+	struct lpfc_sli_ring *pring;
 
-	lockdep_assert_held(&phba->hbalock);
+	/* Get the WQ */
+	if ((piocb->iocb_flag & LPFC_IO_FCP) ||
+	    (piocb->iocb_flag & LPFC_USE_FCPWQIDX)) {
+		if (!phba->cfg_fof || (!(piocb->iocb_flag & LPFC_IO_OAS)))
+			wq = phba->sli4_hba.fcp_wq[piocb->hba_wqidx];
+		else
+			wq = phba->sli4_hba.oas_wq;
+	} else {
+		wq = phba->sli4_hba.els_wq;
+	}
+
+	/* Get corresponding ring */
+	pring = wq->pring;
 
 	/*
 	 * The WQE can be either 64 or 128 bytes,
@@ -8881,6 +9018,8 @@ __lpfc_sli_issue_iocb_s4(struct lpfc_hba *phba, uint32_t ring_number,
 	 */
 	wqe = (union lpfc_wqe *)&wqe128;
 
+	lockdep_assert_held(&phba->hbalock);
+
 	if (piocb->sli4_xritag == NO_XRI) {
 		if (piocb->iocb.ulpCommand == CMD_ABORT_XRI_CN ||
 		    piocb->iocb.ulpCommand == CMD_CLOSE_XRI_CN)
@@ -8895,7 +9034,7 @@ __lpfc_sli_issue_iocb_s4(struct lpfc_hba *phba, uint32_t ring_number,
 					return IOCB_BUSY;
 				}
 			} else {
-				sglq = __lpfc_sli_get_sglq(phba, piocb);
+				sglq = __lpfc_sli_get_els_sglq(phba, piocb);
 				if (!sglq) {
 					if (!(flag & SLI_IOCB_RET_IOCB)) {
 						__lpfc_sli_ringtx_put(phba,
@@ -8930,21 +9069,8 @@ __lpfc_sli_issue_iocb_s4(struct lpfc_hba *phba, uint32_t ring_number,
 	if (lpfc_sli4_iocb2wqe(phba, piocb, wqe))
 		return IOCB_ERROR;
 
-	if ((piocb->iocb_flag & LPFC_IO_FCP) ||
-	    (piocb->iocb_flag & LPFC_USE_FCPWQIDX)) {
-		if (!phba->cfg_fof || (!(piocb->iocb_flag & LPFC_IO_OAS))) {
-			wq = phba->sli4_hba.fcp_wq[piocb->fcp_wqidx];
-		} else {
-			wq = phba->sli4_hba.oas_wq;
-		}
-		if (lpfc_sli4_wq_put(wq, wqe))
-			return IOCB_ERROR;
-	} else {
-		if (unlikely(!phba->sli4_hba.els_wq))
-			return IOCB_ERROR;
-		if (lpfc_sli4_wq_put(phba->sli4_hba.els_wq, wqe))
-			return IOCB_ERROR;
-	}
+	if (lpfc_sli4_wq_put(wq, wqe))
+		return IOCB_ERROR;
 	lpfc_sli_ringtxcmpl_put(phba, pring, piocb);
 
 	return 0;
@@ -9002,46 +9128,44 @@ lpfc_sli_api_table_setup(struct lpfc_hba *phba, uint8_t dev_grp)
 }
 
 /**
- * lpfc_sli_calc_ring - Calculates which ring to use
+ * lpfc_sli4_calc_ring - Calculates which ring to use
  * @phba: Pointer to HBA context object.
- * @ring_number: Initial ring
  * @piocb: Pointer to command iocb.
  *
- * For SLI4, FCP IO can deferred to one fo many WQs, based on
- * fcp_wqidx, thus we need to calculate the corresponding ring.
+ * For SLI4 only, FCP IO can deferred to one fo many WQs, based on
+ * hba_wqidx, thus we need to calculate the corresponding ring.
  * Since ABORTS must go on the same WQ of the command they are
- * aborting, we use command's fcp_wqidx.
+ * aborting, we use command's hba_wqidx.
  */
-static int
-lpfc_sli_calc_ring(struct lpfc_hba *phba, uint32_t ring_number,
-		    struct lpfc_iocbq *piocb)
+struct lpfc_sli_ring *
+lpfc_sli4_calc_ring(struct lpfc_hba *phba, struct lpfc_iocbq *piocb)
 {
-	if (phba->sli_rev < LPFC_SLI_REV4)
-		return ring_number;
-
-	if (piocb->iocb_flag &  (LPFC_IO_FCP | LPFC_USE_FCPWQIDX)) {
+	if (piocb->iocb_flag & (LPFC_IO_FCP | LPFC_USE_FCPWQIDX)) {
 		if (!(phba->cfg_fof) ||
-				(!(piocb->iocb_flag & LPFC_IO_FOF))) {
+		    (!(piocb->iocb_flag & LPFC_IO_FOF))) {
 			if (unlikely(!phba->sli4_hba.fcp_wq))
-				return LPFC_HBA_ERROR;
+				return NULL;
 			/*
-			 * for abort iocb fcp_wqidx should already
+			 * for abort iocb hba_wqidx should already
 			 * be setup based on what work queue we used.
 			 */
 			if (!(piocb->iocb_flag & LPFC_USE_FCPWQIDX))
-				piocb->fcp_wqidx =
+				piocb->hba_wqidx =
 					lpfc_sli4_scmd_to_wqidx_distr(phba,
 							      piocb->context1);
-			ring_number = MAX_SLI3_CONFIGURED_RINGS +
-				piocb->fcp_wqidx;
+			return phba->sli4_hba.fcp_wq[piocb->hba_wqidx]->pring;
 		} else {
 			if (unlikely(!phba->sli4_hba.oas_wq))
-				return LPFC_HBA_ERROR;
-			piocb->fcp_wqidx = 0;
-			ring_number =  LPFC_FCP_OAS_RING;
+				return NULL;
+			piocb->hba_wqidx = 0;
+			return phba->sli4_hba.oas_wq->pring;
 		}
+	} else {
+		if (unlikely(!phba->sli4_hba.els_wq))
+			return NULL;
+		piocb->hba_wqidx = 0;
+		return phba->sli4_hba.els_wq->pring;
 	}
-	return ring_number;
 }
 
 /**
@@ -9061,7 +9185,7 @@ int
 lpfc_sli_issue_iocb(struct lpfc_hba *phba, uint32_t ring_number,
 		    struct lpfc_iocbq *piocb, uint32_t flag)
 {
-	struct lpfc_fcp_eq_hdl *fcp_eq_hdl;
+	struct lpfc_hba_eq_hdl *hba_eq_hdl;
 	struct lpfc_sli_ring *pring;
 	struct lpfc_queue *fpeq;
 	struct lpfc_eqe *eqe;
@@ -9069,21 +9193,19 @@ lpfc_sli_issue_iocb(struct lpfc_hba *phba, uint32_t ring_number,
 	int rc, idx;
 
 	if (phba->sli_rev == LPFC_SLI_REV4) {
-		ring_number = lpfc_sli_calc_ring(phba, ring_number, piocb);
-		if (unlikely(ring_number == LPFC_HBA_ERROR))
+		pring = lpfc_sli4_calc_ring(phba, piocb);
+		if (unlikely(pring == NULL))
 			return IOCB_ERROR;
-		idx = piocb->fcp_wqidx;
 
-		pring = &phba->sli.ring[ring_number];
 		spin_lock_irqsave(&pring->ring_lock, iflags);
 		rc = __lpfc_sli_issue_iocb(phba, ring_number, piocb, flag);
 		spin_unlock_irqrestore(&pring->ring_lock, iflags);
 
 		if (lpfc_fcp_look_ahead && (piocb->iocb_flag &  LPFC_IO_FCP)) {
-			fcp_eq_hdl = &phba->sli4_hba.fcp_eq_hdl[idx];
+			idx = piocb->hba_wqidx;
+			hba_eq_hdl = &phba->sli4_hba.hba_eq_hdl[idx];
 
-			if (atomic_dec_and_test(&fcp_eq_hdl->
-				fcp_eq_in_use)) {
+			if (atomic_dec_and_test(&hba_eq_hdl->hba_eq_in_use)) {
 
 				/* Get associated EQ with this index */
 				fpeq = phba->sli4_hba.hba_eq[idx];
@@ -9104,7 +9226,7 @@ lpfc_sli_issue_iocb(struct lpfc_hba *phba, uint32_t ring_number,
 				lpfc_sli4_eq_release(fpeq,
 					LPFC_QUEUE_REARM);
 			}
-			atomic_inc(&fcp_eq_hdl->fcp_eq_in_use);
+			atomic_inc(&hba_eq_hdl->hba_eq_in_use);
 		}
 	} else {
 		/* For now, SLI2/3 will still use hbalock */
@@ -9124,7 +9246,7 @@ lpfc_sli_issue_iocb(struct lpfc_hba *phba, uint32_t ring_number,
  * only when driver needs to support target mode functionality
  * or IP over FC functionalities.
  *
- * This function is called with no lock held.
+ * This function is called with no lock held. SLI3 only.
  **/
 static int
 lpfc_extra_ring_setup( struct lpfc_hba *phba)
@@ -9137,14 +9259,14 @@ lpfc_extra_ring_setup( struct lpfc_hba *phba)
 	/* Adjust cmd/rsp ring iocb entries more evenly */
 
 	/* Take some away from the FCP ring */
-	pring = &psli->ring[psli->fcp_ring];
+	pring = &psli->sli3_ring[LPFC_FCP_RING];
 	pring->sli.sli3.numCiocb -= SLI2_IOCB_CMD_R1XTRA_ENTRIES;
 	pring->sli.sli3.numRiocb -= SLI2_IOCB_RSP_R1XTRA_ENTRIES;
 	pring->sli.sli3.numCiocb -= SLI2_IOCB_CMD_R3XTRA_ENTRIES;
 	pring->sli.sli3.numRiocb -= SLI2_IOCB_RSP_R3XTRA_ENTRIES;
 
 	/* and give them to the extra ring */
-	pring = &psli->ring[psli->extra_ring];
+	pring = &psli->sli3_ring[LPFC_EXTRA_RING];
 
 	pring->sli.sli3.numCiocb += SLI2_IOCB_CMD_R1XTRA_ENTRIES;
 	pring->sli.sli3.numRiocb += SLI2_IOCB_RSP_R1XTRA_ENTRIES;
@@ -9329,7 +9451,7 @@ lpfc_sli_async_event_handler(struct lpfc_hba * phba,
 
 
 /**
- * lpfc_sli_setup - SLI ring setup function
+ * lpfc_sli4_setup - SLI ring setup function
  * @phba: Pointer to HBA context object.
  *
  * lpfc_sli_setup sets up rings of the SLI interface with
@@ -9340,6 +9462,51 @@ lpfc_sli_async_event_handler(struct lpfc_hba * phba,
  * This function always returns 0.
  **/
 int
+lpfc_sli4_setup(struct lpfc_hba *phba)
+{
+	struct lpfc_sli_ring *pring;
+
+	pring = phba->sli4_hba.els_wq->pring;
+	pring->num_mask = LPFC_MAX_RING_MASK;
+	pring->prt[0].profile = 0;	/* Mask 0 */
+	pring->prt[0].rctl = FC_RCTL_ELS_REQ;
+	pring->prt[0].type = FC_TYPE_ELS;
+	pring->prt[0].lpfc_sli_rcv_unsol_event =
+	    lpfc_els_unsol_event;
+	pring->prt[1].profile = 0;	/* Mask 1 */
+	pring->prt[1].rctl = FC_RCTL_ELS_REP;
+	pring->prt[1].type = FC_TYPE_ELS;
+	pring->prt[1].lpfc_sli_rcv_unsol_event =
+	    lpfc_els_unsol_event;
+	pring->prt[2].profile = 0;	/* Mask 2 */
+	/* NameServer Inquiry */
+	pring->prt[2].rctl = FC_RCTL_DD_UNSOL_CTL;
+	/* NameServer */
+	pring->prt[2].type = FC_TYPE_CT;
+	pring->prt[2].lpfc_sli_rcv_unsol_event =
+	    lpfc_ct_unsol_event;
+	pring->prt[3].profile = 0;	/* Mask 3 */
+	/* NameServer response */
+	pring->prt[3].rctl = FC_RCTL_DD_SOL_CTL;
+	/* NameServer */
+	pring->prt[3].type = FC_TYPE_CT;
+	pring->prt[3].lpfc_sli_rcv_unsol_event =
+	    lpfc_ct_unsol_event;
+	return 0;
+}
+
+/**
+ * lpfc_sli_setup - SLI ring setup function
+ * @phba: Pointer to HBA context object.
+ *
+ * lpfc_sli_setup sets up rings of the SLI interface with
+ * number of iocbs per ring and iotags. This function is
+ * called while driver attach to the HBA and before the
+ * interrupts are enabled. So there is no need for locking.
+ *
+ * This function always returns 0. SLI3 only.
+ **/
+int
 lpfc_sli_setup(struct lpfc_hba *phba)
 {
 	int i, totiocbsize = 0;
@@ -9347,19 +9514,14 @@ lpfc_sli_setup(struct lpfc_hba *phba)
 	struct lpfc_sli_ring *pring;
 
 	psli->num_rings = MAX_SLI3_CONFIGURED_RINGS;
-	if (phba->sli_rev == LPFC_SLI_REV4)
-		psli->num_rings += phba->cfg_fcp_io_channel;
 	psli->sli_flag = 0;
-	psli->fcp_ring = LPFC_FCP_RING;
-	psli->next_ring = LPFC_FCP_NEXT_RING;
-	psli->extra_ring = LPFC_EXTRA_RING;
 
 	psli->iocbq_lookup = NULL;
 	psli->iocbq_lookup_len = 0;
 	psli->last_iotag = 0;
 
 	for (i = 0; i < psli->num_rings; i++) {
-		pring = &psli->ring[i];
+		pring = &psli->sli3_ring[i];
 		switch (i) {
 		case LPFC_FCP_RING:	/* ring 0 - FCP */
 			/* numCiocb and numRiocb are used in config_port */
@@ -9458,18 +9620,18 @@ lpfc_sli_setup(struct lpfc_hba *phba)
 }
 
 /**
- * lpfc_sli_queue_setup - Queue initialization function
+ * lpfc_sli4_queue_init - Queue initialization function
  * @phba: Pointer to HBA context object.
  *
- * lpfc_sli_queue_setup sets up mailbox queues and iocb queues for each
+ * lpfc_sli4_queue_init sets up mailbox queues and iocb queues for each
  * ring. This function also initializes ring indices of each ring.
  * This function is called during the initialization of the SLI
  * interface of an HBA.
  * This function is called with no lock held and always returns
  * 1.
  **/
-int
-lpfc_sli_queue_setup(struct lpfc_hba *phba)
+void
+lpfc_sli4_queue_init(struct lpfc_hba *phba)
 {
 	struct lpfc_sli *psli;
 	struct lpfc_sli_ring *pring;
@@ -9480,31 +9642,102 @@ lpfc_sli_queue_setup(struct lpfc_hba *phba)
 	INIT_LIST_HEAD(&psli->mboxq);
 	INIT_LIST_HEAD(&psli->mboxq_cmpl);
 	/* Initialize list headers for txq and txcmplq as double linked lists */
-	for (i = 0; i < psli->num_rings; i++) {
-		pring = &psli->ring[i];
-		pring->ringno = i;
-		pring->sli.sli3.next_cmdidx  = 0;
-		pring->sli.sli3.local_getidx = 0;
-		pring->sli.sli3.cmdidx = 0;
+	for (i = 0; i < phba->cfg_fcp_io_channel; i++) {
+		pring = phba->sli4_hba.fcp_wq[i]->pring;
 		pring->flag = 0;
+		pring->ringno = LPFC_FCP_RING;
 		INIT_LIST_HEAD(&pring->txq);
 		INIT_LIST_HEAD(&pring->txcmplq);
 		INIT_LIST_HEAD(&pring->iocb_continueq);
-		INIT_LIST_HEAD(&pring->iocb_continue_saveq);
-		INIT_LIST_HEAD(&pring->postbufq);
 		spin_lock_init(&pring->ring_lock);
 	}
-	spin_unlock_irq(&phba->hbalock);
-	return 1;
-}
+	for (i = 0; i < phba->cfg_nvme_io_channel; i++) {
+		pring = phba->sli4_hba.nvme_wq[i]->pring;
+		pring->flag = 0;
+		pring->ringno = LPFC_FCP_RING;
+		INIT_LIST_HEAD(&pring->txq);
+		INIT_LIST_HEAD(&pring->txcmplq);
+		INIT_LIST_HEAD(&pring->iocb_continueq);
+		spin_lock_init(&pring->ring_lock);
+	}
+	pring = phba->sli4_hba.els_wq->pring;
+	pring->flag = 0;
+	pring->ringno = LPFC_ELS_RING;
+	INIT_LIST_HEAD(&pring->txq);
+	INIT_LIST_HEAD(&pring->txcmplq);
+	INIT_LIST_HEAD(&pring->iocb_continueq);
+	spin_lock_init(&pring->ring_lock);
 
-/**
- * lpfc_sli_mbox_sys_flush - Flush mailbox command sub-system
- * @phba: Pointer to HBA context object.
- *
- * This routine flushes the mailbox command subsystem. It will unconditionally
- * flush all the mailbox commands in the three possible stages in the mailbox
- * command sub-system: pending mailbox command queue; the outstanding mailbox
+	if (phba->cfg_nvme_io_channel) {
+		pring = phba->sli4_hba.nvmels_wq->pring;
+		pring->flag = 0;
+		pring->ringno = LPFC_ELS_RING;
+		INIT_LIST_HEAD(&pring->txq);
+		INIT_LIST_HEAD(&pring->txcmplq);
+		INIT_LIST_HEAD(&pring->iocb_continueq);
+		spin_lock_init(&pring->ring_lock);
+	}
+
+	if (phba->cfg_fof) {
+		pring = phba->sli4_hba.oas_wq->pring;
+		pring->flag = 0;
+		pring->ringno = LPFC_FCP_RING;
+		INIT_LIST_HEAD(&pring->txq);
+		INIT_LIST_HEAD(&pring->txcmplq);
+		INIT_LIST_HEAD(&pring->iocb_continueq);
+		spin_lock_init(&pring->ring_lock);
+	}
+
+	spin_unlock_irq(&phba->hbalock);
+}
+
+/**
+ * lpfc_sli_queue_init - Queue initialization function
+ * @phba: Pointer to HBA context object.
+ *
+ * lpfc_sli_queue_init sets up mailbox queues and iocb queues for each
+ * ring. This function also initializes ring indices of each ring.
+ * This function is called during the initialization of the SLI
+ * interface of an HBA.
+ * This function is called with no lock held and always returns
+ * 1.
+ **/
+void
+lpfc_sli_queue_init(struct lpfc_hba *phba)
+{
+	struct lpfc_sli *psli;
+	struct lpfc_sli_ring *pring;
+	int i;
+
+	psli = &phba->sli;
+	spin_lock_irq(&phba->hbalock);
+	INIT_LIST_HEAD(&psli->mboxq);
+	INIT_LIST_HEAD(&psli->mboxq_cmpl);
+	/* Initialize list headers for txq and txcmplq as double linked lists */
+	for (i = 0; i < psli->num_rings; i++) {
+		pring = &psli->sli3_ring[i];
+		pring->ringno = i;
+		pring->sli.sli3.next_cmdidx  = 0;
+		pring->sli.sli3.local_getidx = 0;
+		pring->sli.sli3.cmdidx = 0;
+		INIT_LIST_HEAD(&pring->iocb_continueq);
+		INIT_LIST_HEAD(&pring->iocb_continue_saveq);
+		INIT_LIST_HEAD(&pring->postbufq);
+		pring->flag = 0;
+		INIT_LIST_HEAD(&pring->txq);
+		INIT_LIST_HEAD(&pring->txcmplq);
+		spin_lock_init(&pring->ring_lock);
+	}
+	spin_unlock_irq(&phba->hbalock);
+}
+
+/**
+ * lpfc_sli_mbox_sys_flush - Flush mailbox command sub-system
+ * @phba: Pointer to HBA context object.
+ *
+ * This routine flushes the mailbox command subsystem. It will unconditionally
+ * flush all the mailbox commands in the three possible stages in the mailbox
+ * command sub-system: pending mailbox command queue; the outstanding mailbox
  * command; and completed mailbox command queue. It is caller's responsibility
  * to make sure that the driver is in the proper state to flush the mailbox
  * command sub-system. Namely, the posting of mailbox commands into the
@@ -9567,6 +9800,7 @@ lpfc_sli_host_down(struct lpfc_vport *vport)
 	LIST_HEAD(completions);
 	struct lpfc_hba *phba = vport->phba;
 	struct lpfc_sli *psli = &phba->sli;
+	struct lpfc_queue *qp = NULL;
 	struct lpfc_sli_ring *pring;
 	struct lpfc_iocbq *iocb, *next_iocb;
 	int i;
@@ -9576,36 +9810,64 @@ lpfc_sli_host_down(struct lpfc_vport *vport)
 	lpfc_cleanup_discovery_resources(vport);
 
 	spin_lock_irqsave(&phba->hbalock, flags);
-	for (i = 0; i < psli->num_rings; i++) {
-		pring = &psli->ring[i];
-		prev_pring_flag = pring->flag;
-		/* Only slow rings */
-		if (pring->ringno == LPFC_ELS_RING) {
-			pring->flag |= LPFC_DEFERRED_RING_EVENT;
-			/* Set the lpfc data pending flag */
-			set_bit(LPFC_DATA_READY, &phba->data_flags);
-		}
-		/*
-		 * Error everything on the txq since these iocbs have not been
-		 * given to the FW yet.
-		 */
-		list_for_each_entry_safe(iocb, next_iocb, &pring->txq, list) {
-			if (iocb->vport != vport)
-				continue;
-			list_move_tail(&iocb->list, &completions);
-		}
 
-		/* Next issue ABTS for everything on the txcmplq */
-		list_for_each_entry_safe(iocb, next_iocb, &pring->txcmplq,
-									list) {
-			if (iocb->vport != vport)
+	/*
+	 * Error everything on the txq since these iocbs
+	 * have not been given to the FW yet.
+	 * Also issue ABTS for everything on the txcmplq
+	 */
+	if (phba->sli_rev != LPFC_SLI_REV4) {
+		for (i = 0; i < psli->num_rings; i++) {
+			pring = &psli->sli3_ring[i];
+			prev_pring_flag = pring->flag;
+			/* Only slow rings */
+			if (pring->ringno == LPFC_ELS_RING) {
+				pring->flag |= LPFC_DEFERRED_RING_EVENT;
+				/* Set the lpfc data pending flag */
+				set_bit(LPFC_DATA_READY, &phba->data_flags);
+			}
+			list_for_each_entry_safe(iocb, next_iocb,
+						 &pring->txq, list) {
+				if (iocb->vport != vport)
+					continue;
+				list_move_tail(&iocb->list, &completions);
+			}
+			list_for_each_entry_safe(iocb, next_iocb,
+						 &pring->txcmplq, list) {
+				if (iocb->vport != vport)
+					continue;
+				lpfc_sli_issue_abort_iotag(phba, pring, iocb);
+			}
+			pring->flag = prev_pring_flag;
+		}
+	} else {
+		list_for_each_entry(qp, &phba->sli4_hba.lpfc_wq_list, wq_list) {
+			pring = qp->pring;
+			if (!pring)
 				continue;
-			lpfc_sli_issue_abort_iotag(phba, pring, iocb);
+			if (pring == phba->sli4_hba.els_wq->pring) {
+				pring->flag |= LPFC_DEFERRED_RING_EVENT;
+				/* Set the lpfc data pending flag */
+				set_bit(LPFC_DATA_READY, &phba->data_flags);
+			}
+			prev_pring_flag = pring->flag;
+			spin_lock_irq(&pring->ring_lock);
+			list_for_each_entry_safe(iocb, next_iocb,
+						 &pring->txq, list) {
+				if (iocb->vport != vport)
+					continue;
+				list_move_tail(&iocb->list, &completions);
+			}
+			spin_unlock_irq(&pring->ring_lock);
+			list_for_each_entry_safe(iocb, next_iocb,
+						 &pring->txcmplq, list) {
+				if (iocb->vport != vport)
+					continue;
+				lpfc_sli_issue_abort_iotag(phba, pring, iocb);
+			}
+			pring->flag = prev_pring_flag;
 		}
-
-		pring->flag = prev_pring_flag;
 	}
-
 	spin_unlock_irqrestore(&phba->hbalock, flags);
 
 	/* Cancel all the IOCBs from the completions list */
@@ -9634,6 +9896,7 @@ lpfc_sli_hba_down(struct lpfc_hba *phba)
 {
 	LIST_HEAD(completions);
 	struct lpfc_sli *psli = &phba->sli;
+	struct lpfc_queue *qp = NULL;
 	struct lpfc_sli_ring *pring;
 	struct lpfc_dmabuf *buf_ptr;
 	unsigned long flags = 0;
@@ -9647,20 +9910,36 @@ lpfc_sli_hba_down(struct lpfc_hba *phba)
 	lpfc_fabric_abort_hba(phba);
 
 	spin_lock_irqsave(&phba->hbalock, flags);
-	for (i = 0; i < psli->num_rings; i++) {
-		pring = &psli->ring[i];
-		/* Only slow rings */
-		if (pring->ringno == LPFC_ELS_RING) {
-			pring->flag |= LPFC_DEFERRED_RING_EVENT;
-			/* Set the lpfc data pending flag */
-			set_bit(LPFC_DATA_READY, &phba->data_flags);
-		}
 
-		/*
-		 * Error everything on the txq since these iocbs have not been
-		 * given to the FW yet.
-		 */
-		list_splice_init(&pring->txq, &completions);
+	/*
+	 * Error everything on the txq since these iocbs
+	 * have not been given to the FW yet.
+	 */
+	if (phba->sli_rev != LPFC_SLI_REV4) {
+		for (i = 0; i < psli->num_rings; i++) {
+			pring = &psli->sli3_ring[i];
+			/* Only slow rings */
+			if (pring->ringno == LPFC_ELS_RING) {
+				pring->flag |= LPFC_DEFERRED_RING_EVENT;
+				/* Set the lpfc data pending flag */
+				set_bit(LPFC_DATA_READY, &phba->data_flags);
+			}
+			list_splice_init(&pring->txq, &completions);
+		}
+	} else {
+		list_for_each_entry(qp, &phba->sli4_hba.lpfc_wq_list, wq_list) {
+			pring = qp->pring;
+			if (!pring)
+				continue;
+			spin_lock_irq(&pring->ring_lock);
+			list_splice_init(&pring->txq, &completions);
+			spin_unlock_irq(&pring->ring_lock);
+			if (pring == phba->sli4_hba.els_wq->pring) {
+				pring->flag |= LPFC_DEFERRED_RING_EVENT;
+				/* Set the lpfc data pending flag */
+				set_bit(LPFC_DATA_READY, &phba->data_flags);
+			}
+		}
 	}
 	spin_unlock_irqrestore(&phba->hbalock, flags);
 
@@ -9987,7 +10266,6 @@ lpfc_sli_abort_iotag_issue(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 	struct lpfc_iocbq *abtsiocbp;
 	IOCB_t *icmd = NULL;
 	IOCB_t *iabt = NULL;
-	int ring_number;
 	int retval;
 	unsigned long iflags;
 
@@ -10027,7 +10305,7 @@ lpfc_sli_abort_iotag_issue(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 	iabt->ulpClass = icmd->ulpClass;
 
 	/* ABTS WQE must go to the same WQ as the WQE to be aborted */
-	abtsiocbp->fcp_wqidx = cmdiocb->fcp_wqidx;
+	abtsiocbp->hba_wqidx = cmdiocb->hba_wqidx;
 	if (cmdiocb->iocb_flag & LPFC_IO_FCP)
 		abtsiocbp->iocb_flag |= LPFC_USE_FCPWQIDX;
 	if (cmdiocb->iocb_flag & LPFC_IO_FOF)
@@ -10049,11 +10327,9 @@ lpfc_sli_abort_iotag_issue(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 			 abtsiocbp->iotag);
 
 	if (phba->sli_rev == LPFC_SLI_REV4) {
-		ring_number =
-			lpfc_sli_calc_ring(phba, pring->ringno, abtsiocbp);
-		if (unlikely(ring_number == LPFC_HBA_ERROR))
+		pring = lpfc_sli4_calc_ring(phba, abtsiocbp);
+		if (unlikely(pring == NULL))
 			return 0;
-		pring = &phba->sli.ring[ring_number];
 		/* Note: both hbalock and ring_lock need to be set here */
 		spin_lock_irqsave(&pring->ring_lock, iflags);
 		retval = __lpfc_sli_issue_iocb(phba, pring->ringno,
@@ -10134,6 +10410,111 @@ lpfc_sli_issue_abort_iotag(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 	return retval;
 }
 
+/**
+ * lpfc_sli4_abort_nvme_io - Issue abort for a command iocb
+ * @phba: Pointer to HBA context object.
+ * @pring: Pointer to driver SLI ring object.
+ * @cmdiocb: Pointer to driver command iocb object.
+ *
+ * This function issues an abort iocb for the provided command iocb down to
+ * the port. Other than the case the outstanding command iocb is an abort
+ * request, this function issues abort out unconditionally. This function is
+ * called with hbalock held. The function returns 0 when it fails due to
+ * memory allocation failure or when the command iocb is an abort request.
+ **/
+static int
+lpfc_sli4_abort_nvme_io(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
+			struct lpfc_iocbq *cmdiocb)
+{
+	struct lpfc_vport *vport = cmdiocb->vport;
+	struct lpfc_iocbq *abtsiocbp;
+	union lpfc_wqe *abts_wqe;
+	int retval;
+
+	/*
+	 * There are certain command types we don't want to abort.  And we
+	 * don't want to abort commands that are already in the process of
+	 * being aborted.
+	 */
+	if (cmdiocb->iocb.ulpCommand == CMD_ABORT_XRI_CN ||
+	    cmdiocb->iocb.ulpCommand == CMD_CLOSE_XRI_CN ||
+	    (cmdiocb->iocb_flag & LPFC_DRIVER_ABORTED) != 0)
+		return 0;
+
+	/* issue ABTS for this io based on iotag */
+	abtsiocbp = __lpfc_sli_get_iocbq(phba);
+	if (abtsiocbp == NULL)
+		return 0;
+
+	/* This signals the response to set the correct status
+	 * before calling the completion handler
+	 */
+	cmdiocb->iocb_flag |= LPFC_DRIVER_ABORTED;
+
+	/* Complete prepping the abort wqe and issue to the FW. */
+	abts_wqe = &abtsiocbp->wqe;
+	bf_set(abort_cmd_ia, &abts_wqe->abort_cmd, 0);
+	bf_set(abort_cmd_criteria, &abts_wqe->abort_cmd, T_XRI_TAG);
+
+	/* Explicitly set reserved fields to zero.*/
+	abts_wqe->abort_cmd.rsrvd4 = 0;
+	abts_wqe->abort_cmd.rsrvd5 = 0;
+
+	/* WQE Common - word 6.  Context is XRI tag.  Set 0. */
+	bf_set(wqe_xri_tag, &abts_wqe->abort_cmd.wqe_com, 0);
+	bf_set(wqe_ctxt_tag, &abts_wqe->abort_cmd.wqe_com, 0);
+
+	/* word 7 */
+	bf_set(wqe_ct, &abts_wqe->abort_cmd.wqe_com, 0);
+	bf_set(wqe_cmnd, &abts_wqe->abort_cmd.wqe_com, CMD_ABORT_XRI_CX);
+	bf_set(wqe_class, &abts_wqe->abort_cmd.wqe_com,
+	       cmdiocb->iocb.ulpClass);
+
+	/* word 8 - tell the FW to abort the IO associated with this
+	 * outstanding exchange ID.
+	 */
+	abts_wqe->abort_cmd.wqe_com.abort_tag = cmdiocb->sli4_xritag;
+
+	/* word 9 - this is the iotag for the abts_wqe completion. */
+	bf_set(wqe_reqtag, &abts_wqe->abort_cmd.wqe_com,
+	       abtsiocbp->iotag);
+
+	/* word 10 */
+	bf_set(wqe_wqid, &abts_wqe->abort_cmd.wqe_com, cmdiocb->hba_wqidx);
+	bf_set(wqe_qosd, &abts_wqe->abort_cmd.wqe_com, 1);
+	bf_set(wqe_lenloc, &abts_wqe->abort_cmd.wqe_com, LPFC_WQE_LENLOC_NONE);
+
+	/* word 11 */
+	bf_set(wqe_cmd_type, &abts_wqe->abort_cmd.wqe_com, OTHER_COMMAND);
+	bf_set(wqe_wqec, &abts_wqe->abort_cmd.wqe_com, 1);
+	bf_set(wqe_cqid, &abts_wqe->abort_cmd.wqe_com, LPFC_WQE_CQ_ID_DEFAULT);
+
+	/* ABTS WQE must go to the same WQ as the WQE to be aborted */
+	abtsiocbp->iocb_flag |= LPFC_IO_NVME;
+	abtsiocbp->vport = vport;
+	/* todo: assign wqe_cmpl to lpfc_nvme_abort_fcreq_cmpl
+	 * subsequent patch will add routine. For now, just skip assignment
+	 * as won't ever be called.
+	 */
+	retval = lpfc_sli4_issue_wqe(phba, LPFC_FCP_RING, abtsiocbp);
+	if (retval == IOCB_ERROR) {
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME,
+				 "6147 Failed abts issue_wqe with status x%x "
+				 "for oxid x%x\n",
+				 retval, cmdiocb->sli4_xritag);
+		lpfc_sli_release_iocbq(phba, abtsiocbp);
+		return retval;
+	}
+
+	lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME,
+			 "6148 Drv Abort NVME Request Issued for "
+			 "ox_id x%x on reqtag x%x\n",
+			 cmdiocb->sli4_xritag,
+			 abtsiocbp->iotag);
+
+	return retval;
+}
+
 /**
  * lpfc_sli_hba_iocb_abort - Abort all iocbs to an hba.
  * @phba: pointer to lpfc HBA data structure.
@@ -10145,10 +10526,20 @@ lpfc_sli_hba_iocb_abort(struct lpfc_hba *phba)
 {
 	struct lpfc_sli *psli = &phba->sli;
 	struct lpfc_sli_ring *pring;
+	struct lpfc_queue *qp = NULL;
 	int i;
 
-	for (i = 0; i < psli->num_rings; i++) {
-		pring = &psli->ring[i];
+	if (phba->sli_rev != LPFC_SLI_REV4) {
+		for (i = 0; i < psli->num_rings; i++) {
+			pring = &psli->sli3_ring[i];
+			lpfc_sli_abort_iocb_ring(phba, pring);
+		}
+		return;
+	}
+	list_for_each_entry(qp, &phba->sli4_hba.lpfc_wq_list, wq_list) {
+		pring = qp->pring;
+		if (!pring)
+			continue;
 		lpfc_sli_abort_iocb_ring(phba, pring);
 	}
 }
@@ -10352,7 +10743,7 @@ lpfc_sli_abort_iocb(struct lpfc_vport *vport, struct lpfc_sli_ring *pring,
 		abtsiocb->vport = vport;
 
 		/* ABTS WQE must go to the same WQ as the WQE to be aborted */
-		abtsiocb->fcp_wqidx = iocbq->fcp_wqidx;
+		abtsiocb->hba_wqidx = iocbq->hba_wqidx;
 		if (iocbq->iocb_flag & LPFC_IO_FCP)
 			abtsiocb->iocb_flag |= LPFC_USE_FCPWQIDX;
 		if (iocbq->iocb_flag & LPFC_IO_FOF)
@@ -10412,7 +10803,6 @@ lpfc_sli_abort_taskmgmt(struct lpfc_vport *vport, struct lpfc_sli_ring *pring,
 	int sum, i, ret_val;
 	unsigned long iflags;
 	struct lpfc_sli_ring *pring_s4;
-	uint32_t ring_number;
 
 	spin_lock_irq(&phba->hbalock);
 
@@ -10455,7 +10845,7 @@ lpfc_sli_abort_taskmgmt(struct lpfc_vport *vport, struct lpfc_sli_ring *pring,
 		abtsiocbq->vport = vport;
 
 		/* ABTS WQE must go to the same WQ as the WQE to be aborted */
-		abtsiocbq->fcp_wqidx = iocbq->fcp_wqidx;
+		abtsiocbq->hba_wqidx = iocbq->hba_wqidx;
 		if (iocbq->iocb_flag & LPFC_IO_FCP)
 			abtsiocbq->iocb_flag |= LPFC_USE_FCPWQIDX;
 		if (iocbq->iocb_flag & LPFC_IO_FOF)
@@ -10480,9 +10870,9 @@ lpfc_sli_abort_taskmgmt(struct lpfc_vport *vport, struct lpfc_sli_ring *pring,
 		iocbq->iocb_flag |= LPFC_DRIVER_ABORTED;
 
 		if (phba->sli_rev == LPFC_SLI_REV4) {
-			ring_number = MAX_SLI3_CONFIGURED_RINGS +
-					 iocbq->fcp_wqidx;
-			pring_s4 = &phba->sli.ring[ring_number];
+			pring_s4 = lpfc_sli4_calc_ring(phba, iocbq);
+			if (pring_s4 == NULL)
+				continue;
 			/* Note: both hbalock and ring_lock must be set here */
 			spin_lock_irqsave(&pring_s4->ring_lock, iflags);
 			ret_val = __lpfc_sli_issue_iocb(phba, pring_s4->ringno,
@@ -10644,10 +11034,14 @@ lpfc_sli_issue_iocb_wait(struct lpfc_hba *phba,
 	struct lpfc_iocbq *iocb;
 	int txq_cnt = 0;
 	int txcmplq_cnt = 0;
-	struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_ELS_RING];
+	struct lpfc_sli_ring *pring;
 	unsigned long iflags;
 	bool iocb_completed = true;
 
+	if (phba->sli_rev >= LPFC_SLI_REV4)
+		pring = lpfc_sli4_calc_ring(phba, piocb);
+	else
+		pring = &phba->sli.sli3_ring[ring_number];
 	/*
 	 * If the caller has provided a response iocbq buffer, then context2
 	 * is NULL or its an error.
@@ -11442,6 +11836,7 @@ lpfc_sli_fp_intr_handler(int irq, void *dev_id)
 	uint32_t ha_copy;
 	unsigned long status;
 	unsigned long iflag;
+	struct lpfc_sli_ring *pring;
 
 	/* Get the driver's phba structure from the dev_id and
 	 * assume the HBA is not interrupting.
@@ -11486,10 +11881,9 @@ lpfc_sli_fp_intr_handler(int irq, void *dev_id)
 
 	status = (ha_copy & (HA_RXMASK << (4*LPFC_FCP_RING)));
 	status >>= (4*LPFC_FCP_RING);
+	pring = &phba->sli.sli3_ring[LPFC_FCP_RING];
 	if (status & HA_RXMASK)
-		lpfc_sli_handle_fast_ring_event(phba,
-						&phba->sli.ring[LPFC_FCP_RING],
-						status);
+		lpfc_sli_handle_fast_ring_event(phba, pring, status);
 
 	if (phba->cfg_multi_ring_support == 2) {
 		/*
@@ -11500,7 +11894,7 @@ lpfc_sli_fp_intr_handler(int irq, void *dev_id)
 		status >>= (4*LPFC_EXTRA_RING);
 		if (status & HA_RXMASK) {
 			lpfc_sli_handle_fast_ring_event(phba,
-					&phba->sli.ring[LPFC_EXTRA_RING],
+					&phba->sli.sli3_ring[LPFC_EXTRA_RING],
 					status);
 		}
 	}
@@ -11813,11 +12207,13 @@ static struct lpfc_iocbq *
 lpfc_sli4_els_wcqe_to_rspiocbq(struct lpfc_hba *phba,
 			       struct lpfc_iocbq *irspiocbq)
 {
-	struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_ELS_RING];
+	struct lpfc_sli_ring *pring;
 	struct lpfc_iocbq *cmdiocbq;
 	struct lpfc_wcqe_complete *wcqe;
 	unsigned long iflags;
 
+	pring = lpfc_phba_elsring(phba);
+
 	wcqe = &irspiocbq->cq_event.cqe.wcqe_cmpl;
 	spin_lock_irqsave(&pring->ring_lock, iflags);
 	pring->stats.iocb_event++;
@@ -12053,8 +12449,6 @@ lpfc_sli4_sp_handle_els_wcqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
 			txq_cnt++;
 		if (!list_empty(&pring->txcmplq))
 			txcmplq_cnt++;
-		if (!list_empty(&phba->sli.ring[LPFC_FCP_RING].txcmplq))
-			fcp_txcmplq_cnt++;
 		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
 			"0387 NO IOCBQ data: txq_cnt=%d iocb_cnt=%d "
 			"fcp_txcmplq_cnt=%d, els_txcmplq_cnt=%d\n",
@@ -12173,6 +12567,7 @@ static bool
 lpfc_sli4_sp_handle_rcqe(struct lpfc_hba *phba, struct lpfc_rcqe *rcqe)
 {
 	bool workposted = false;
+	struct fc_frame_header *fc_hdr;
 	struct lpfc_queue *hrq = phba->sli4_hba.hdr_rq;
 	struct lpfc_queue *drq = phba->sli4_hba.dat_rq;
 	struct hbq_dmabuf *dma_buf;
@@ -12207,6 +12602,10 @@ lpfc_sli4_sp_handle_rcqe(struct lpfc_hba *phba, struct lpfc_rcqe *rcqe)
 		}
 		hrq->RQ_rcv_buf++;
 		memcpy(&dma_buf->cq_event.cqe.rcqe_cmpl, rcqe, sizeof(*rcqe));
+
+		/* If a NVME LS event (type 0x28), treat it as Fast path */
+		fc_hdr = (struct fc_frame_header *)dma_buf->hbuf.virt;
+
 		/* save off the frame for the word thread to process */
 		list_add_tail(&dma_buf->cq_event.list,
 			      &phba->sli4_hba.sp_queue_event);
@@ -12325,6 +12724,9 @@ lpfc_sli4_sp_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe,
 		return;
 	}
 
+	/* Save EQ associated with this CQ */
+	cq->assoc_qp = speq;
+
 	/* Process all the entries to the CQ */
 	switch (cq->type) {
 	case LPFC_MCQ:
@@ -12337,8 +12739,9 @@ lpfc_sli4_sp_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe,
 		break;
 	case LPFC_WCQ:
 		while ((cqe = lpfc_sli4_cq_get(cq))) {
-			if (cq->subtype == LPFC_FCP)
-				workposted |= lpfc_sli4_fp_handle_wcqe(phba, cq,
+			if ((cq->subtype == LPFC_FCP) ||
+			    (cq->subtype == LPFC_NVME))
+				workposted |= lpfc_sli4_fp_handle_cqe(phba, cq,
 								       cqe);
 			else
 				workposted |= lpfc_sli4_sp_handle_cqe(phba, cq,
@@ -12425,7 +12828,23 @@ lpfc_sli4_fp_handle_fcp_wcqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
 				bf_get(lpfc_wcqe_c_request_tag, wcqe));
 		return;
 	}
-	if (unlikely(!cmdiocbq->iocb_cmpl)) {
+
+	if (cq->assoc_qp)
+		cmdiocbq->isr_timestamp =
+			cq->assoc_qp->isr_timestamp;
+
+	if (cmdiocbq->iocb_cmpl == NULL) {
+		if (cmdiocbq->wqe_cmpl) {
+			if (cmdiocbq->iocb_flag & LPFC_DRIVER_ABORTED) {
+				spin_lock_irqsave(&phba->hbalock, iflags);
+				cmdiocbq->iocb_flag &= ~LPFC_DRIVER_ABORTED;
+				spin_unlock_irqrestore(&phba->hbalock, iflags);
+			}
+
+			/* Pass the cmd_iocb and the wcqe to the upper layer */
+			(cmdiocbq->wqe_cmpl)(phba, cmdiocbq, wcqe);
+			return;
+		}
 		lpfc_printf_log(phba, KERN_WARNING, LOG_SLI,
 				"0375 FCP cmdiocb not callback function "
 				"iotag: (%d)\n",
@@ -12461,12 +12880,12 @@ lpfc_sli4_fp_handle_rel_wcqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
 {
 	struct lpfc_queue *childwq;
 	bool wqid_matched = false;
-	uint16_t fcp_wqid;
+	uint16_t hba_wqid;
 
 	/* Check for fast-path FCP work queue release */
-	fcp_wqid = bf_get(lpfc_wcqe_r_wq_id, wcqe);
+	hba_wqid = bf_get(lpfc_wcqe_r_wq_id, wcqe);
 	list_for_each_entry(childwq, &cq->child_list, list) {
-		if (childwq->queue_id == fcp_wqid) {
+		if (childwq->queue_id == hba_wqid) {
 			lpfc_sli4_wq_release(childwq,
 					bf_get(lpfc_wcqe_r_wqe_index, wcqe));
 			wqid_matched = true;
@@ -12477,11 +12896,11 @@ lpfc_sli4_fp_handle_rel_wcqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
 	if (wqid_matched != true)
 		lpfc_printf_log(phba, KERN_WARNING, LOG_SLI,
 				"2580 Fast-path wqe consume event carries "
-				"miss-matched qid: wcqe-qid=x%x\n", fcp_wqid);
+				"miss-matched qid: wcqe-qid=x%x\n", hba_wqid);
 }
 
 /**
- * lpfc_sli4_fp_handle_wcqe - Process fast-path work queue completion entry
+ * lpfc_sli4_fp_handle_cqe - Process fast-path work queue completion entry
  * @cq: Pointer to the completion queue.
  * @eqe: Pointer to fast-path completion queue entry.
  *
@@ -12489,7 +12908,7 @@ lpfc_sli4_fp_handle_rel_wcqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
  * event queue for FCP command response completion.
  **/
 static int
-lpfc_sli4_fp_handle_wcqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
+lpfc_sli4_fp_handle_cqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
 			 struct lpfc_cqe *cqe)
 {
 	struct lpfc_wcqe_release wcqe;
@@ -12501,10 +12920,15 @@ lpfc_sli4_fp_handle_wcqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
 	/* Check and process for different type of WCQE and dispatch */
 	switch (bf_get(lpfc_wcqe_c_code, &wcqe)) {
 	case CQE_CODE_COMPL_WQE:
+	case CQE_CODE_NVME_ERSP:
 		cq->CQ_wq++;
 		/* Process the WQ complete event */
 		phba->last_completion_time = jiffies;
-		lpfc_sli4_fp_handle_fcp_wcqe(phba, cq,
+		if ((cq->subtype == LPFC_FCP) || (cq->subtype == LPFC_NVME))
+			lpfc_sli4_fp_handle_fcp_wcqe(phba, cq,
+				(struct lpfc_wcqe_complete *)&wcqe);
+		if (cq->subtype == LPFC_NVME_LS)
+			lpfc_sli4_fp_handle_fcp_wcqe(phba, cq,
 				(struct lpfc_wcqe_complete *)&wcqe);
 		break;
 	case CQE_CODE_RELEASE_WQE:
@@ -12520,9 +12944,13 @@ lpfc_sli4_fp_handle_wcqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
 		workposted = lpfc_sli4_sp_handle_abort_xri_wcqe(phba, cq,
 				(struct sli4_wcqe_xri_aborted *)&wcqe);
 		break;
+	case CQE_CODE_RECEIVE_V1:
+	case CQE_CODE_RECEIVE:
+		phba->last_completion_time = jiffies;
+		break;
 	default:
 		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
-				"0144 Not a valid WCQE code: x%x\n",
+				"0144 Not a valid CQE code: x%x\n",
 				bf_get(lpfc_wcqe_c_code, &wcqe));
 		break;
 	}
@@ -12545,7 +12973,7 @@ static void
 lpfc_sli4_hba_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe,
 			uint32_t qidx)
 {
-	struct lpfc_queue *cq;
+	struct lpfc_queue *cq = NULL;
 	struct lpfc_cqe *cqe;
 	bool workposted = false;
 	uint16_t cqid;
@@ -12563,28 +12991,33 @@ lpfc_sli4_hba_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe,
 	/* Get the reference to the corresponding CQ */
 	cqid = bf_get_le32(lpfc_eqe_resource_id, eqe);
 
-	/* Check if this is a Slow path event */
-	if (unlikely(cqid != phba->sli4_hba.fcp_cq_map[qidx])) {
-		lpfc_sli4_sp_handle_eqe(phba, eqe,
-			phba->sli4_hba.hba_eq[qidx]);
-		return;
+	if (phba->sli4_hba.nvme_cq_map &&
+	    (cqid == phba->sli4_hba.nvme_cq_map[qidx])) {
+		/* Process NVME command completion */
+		cq = phba->sli4_hba.nvme_cq[qidx];
+		goto  process_cq;
 	}
 
-	if (unlikely(!phba->sli4_hba.fcp_cq)) {
-		lpfc_printf_log(phba, KERN_WARNING, LOG_SLI,
-				"3146 Fast-path completion queues "
-				"does not exist\n");
-		return;
+	if (phba->sli4_hba.fcp_cq_map &&
+	    (cqid == phba->sli4_hba.fcp_cq_map[qidx])) {
+		/* Process FCP command completion */
+		cq = phba->sli4_hba.fcp_cq[qidx];
+		goto  process_cq;
 	}
-	cq = phba->sli4_hba.fcp_cq[qidx];
-	if (unlikely(!cq)) {
-		if (phba->sli.sli_flag & LPFC_SLI_ACTIVE)
-			lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
-					"0367 Fast-path completion queue "
-					"(%d) does not exist\n", qidx);
+
+	if (phba->sli4_hba.nvmels_cq &&
+	    (cqid == phba->sli4_hba.nvmels_cq->queue_id)) {
+		/* Process NVME unsol rcv */
+		cq = phba->sli4_hba.nvmels_cq;
+	}
+
+	/* Otherwise this is a Slow path event */
+	if (cq == NULL) {
+		lpfc_sli4_sp_handle_eqe(phba, eqe, phba->sli4_hba.hba_eq[qidx]);
 		return;
 	}
 
+process_cq:
 	if (unlikely(cqid != cq->queue_id)) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
 				"0368 Miss-matched fast-path completion "
@@ -12593,9 +13026,12 @@ lpfc_sli4_hba_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe,
 		return;
 	}
 
+	/* Save EQ associated with this CQ */
+	cq->assoc_qp = phba->sli4_hba.hba_eq[qidx];
+
 	/* Process all the entries to the CQ */
 	while ((cqe = lpfc_sli4_cq_get(cq))) {
-		workposted |= lpfc_sli4_fp_handle_wcqe(phba, cq, cqe);
+		workposted |= lpfc_sli4_fp_handle_cqe(phba, cq, cqe);
 		if (!(++ecount % cq->entry_repost))
 			lpfc_sli4_cq_release(cq, LPFC_QUEUE_NOARM);
 	}
@@ -12686,7 +13122,7 @@ lpfc_sli4_fof_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe)
 
 	/* Process all the entries to the OAS CQ */
 	while ((cqe = lpfc_sli4_cq_get(cq))) {
-		workposted |= lpfc_sli4_fp_handle_wcqe(phba, cq, cqe);
+		workposted |= lpfc_sli4_fp_handle_cqe(phba, cq, cqe);
 		if (!(++ecount % cq->entry_repost))
 			lpfc_sli4_cq_release(cq, LPFC_QUEUE_NOARM);
 	}
@@ -12734,15 +13170,15 @@ irqreturn_t
 lpfc_sli4_fof_intr_handler(int irq, void *dev_id)
 {
 	struct lpfc_hba *phba;
-	struct lpfc_fcp_eq_hdl *fcp_eq_hdl;
+	struct lpfc_hba_eq_hdl *hba_eq_hdl;
 	struct lpfc_queue *eq;
 	struct lpfc_eqe *eqe;
 	unsigned long iflag;
 	int ecount = 0;
 
 	/* Get the driver's phba structure from the dev_id */
-	fcp_eq_hdl = (struct lpfc_fcp_eq_hdl *)dev_id;
-	phba = fcp_eq_hdl->phba;
+	hba_eq_hdl = (struct lpfc_hba_eq_hdl *)dev_id;
+	phba = hba_eq_hdl->phba;
 
 	if (unlikely(!phba))
 		return IRQ_NONE;
@@ -12828,17 +13264,17 @@ irqreturn_t
 lpfc_sli4_hba_intr_handler(int irq, void *dev_id)
 {
 	struct lpfc_hba *phba;
-	struct lpfc_fcp_eq_hdl *fcp_eq_hdl;
+	struct lpfc_hba_eq_hdl *hba_eq_hdl;
 	struct lpfc_queue *fpeq;
 	struct lpfc_eqe *eqe;
 	unsigned long iflag;
 	int ecount = 0;
-	int fcp_eqidx;
+	int hba_eqidx;
 
 	/* Get the driver's phba structure from the dev_id */
-	fcp_eq_hdl = (struct lpfc_fcp_eq_hdl *)dev_id;
-	phba = fcp_eq_hdl->phba;
-	fcp_eqidx = fcp_eq_hdl->idx;
+	hba_eq_hdl = (struct lpfc_hba_eq_hdl *)dev_id;
+	phba = hba_eq_hdl->phba;
+	hba_eqidx = hba_eq_hdl->idx;
 
 	if (unlikely(!phba))
 		return IRQ_NONE;
@@ -12846,15 +13282,15 @@ lpfc_sli4_hba_intr_handler(int irq, void *dev_id)
 		return IRQ_NONE;
 
 	/* Get to the EQ struct associated with this vector */
-	fpeq = phba->sli4_hba.hba_eq[fcp_eqidx];
+	fpeq = phba->sli4_hba.hba_eq[hba_eqidx];
 	if (unlikely(!fpeq))
 		return IRQ_NONE;
 
 	if (lpfc_fcp_look_ahead) {
-		if (atomic_dec_and_test(&fcp_eq_hdl->fcp_eq_in_use))
+		if (atomic_dec_and_test(&hba_eq_hdl->hba_eq_in_use))
 			lpfc_sli4_eq_clr_intr(fpeq);
 		else {
-			atomic_inc(&fcp_eq_hdl->fcp_eq_in_use);
+			atomic_inc(&hba_eq_hdl->hba_eq_in_use);
 			return IRQ_NONE;
 		}
 	}
@@ -12869,7 +13305,7 @@ lpfc_sli4_hba_intr_handler(int irq, void *dev_id)
 			lpfc_sli4_eq_flush(phba, fpeq);
 		spin_unlock_irqrestore(&phba->hbalock, iflag);
 		if (lpfc_fcp_look_ahead)
-			atomic_inc(&fcp_eq_hdl->fcp_eq_in_use);
+			atomic_inc(&hba_eq_hdl->hba_eq_in_use);
 		return IRQ_NONE;
 	}
 
@@ -12880,7 +13316,7 @@ lpfc_sli4_hba_intr_handler(int irq, void *dev_id)
 		if (eqe == NULL)
 			break;
 
-		lpfc_sli4_hba_handle_eqe(phba, eqe, fcp_eqidx);
+		lpfc_sli4_hba_handle_eqe(phba, eqe, hba_eqidx);
 		if (!(++ecount % fpeq->entry_repost))
 			lpfc_sli4_eq_release(fpeq, LPFC_QUEUE_NOARM);
 		fpeq->EQ_processed++;
@@ -12897,7 +13333,7 @@ lpfc_sli4_hba_intr_handler(int irq, void *dev_id)
 		fpeq->EQ_no_entry++;
 
 		if (lpfc_fcp_look_ahead) {
-			atomic_inc(&fcp_eq_hdl->fcp_eq_in_use);
+			atomic_inc(&hba_eq_hdl->hba_eq_in_use);
 			return IRQ_NONE;
 		}
 
@@ -12911,7 +13347,8 @@ lpfc_sli4_hba_intr_handler(int irq, void *dev_id)
 	}
 
 	if (lpfc_fcp_look_ahead)
-		atomic_inc(&fcp_eq_hdl->fcp_eq_in_use);
+		atomic_inc(&hba_eq_hdl->hba_eq_in_use);
+
 	return IRQ_HANDLED;
 } /* lpfc_sli4_fp_intr_handler */
 
@@ -12938,7 +13375,7 @@ lpfc_sli4_intr_handler(int irq, void *dev_id)
 	struct lpfc_hba  *phba;
 	irqreturn_t hba_irq_rc;
 	bool hba_handled = false;
-	int fcp_eqidx;
+	int qidx;
 
 	/* Get the driver's phba structure from the dev_id */
 	phba = (struct lpfc_hba *)dev_id;
@@ -12949,16 +13386,16 @@ lpfc_sli4_intr_handler(int irq, void *dev_id)
 	/*
 	 * Invoke fast-path host attention interrupt handling as appropriate.
 	 */
-	for (fcp_eqidx = 0; fcp_eqidx < phba->cfg_fcp_io_channel; fcp_eqidx++) {
+	for (qidx = 0; qidx < phba->io_channel_irqs; qidx++) {
 		hba_irq_rc = lpfc_sli4_hba_intr_handler(irq,
-					&phba->sli4_hba.fcp_eq_hdl[fcp_eqidx]);
+					&phba->sli4_hba.hba_eq_hdl[qidx]);
 		if (hba_irq_rc == IRQ_HANDLED)
 			hba_handled |= true;
 	}
 
 	if (phba->cfg_fof) {
 		hba_irq_rc = lpfc_sli4_fof_intr_handler(irq,
-					&phba->sli4_hba.fcp_eq_hdl[0]);
+					&phba->sli4_hba.hba_eq_hdl[qidx]);
 		if (hba_irq_rc == IRQ_HANDLED)
 			hba_handled |= true;
 	}
@@ -12989,6 +13426,11 @@ lpfc_sli4_queue_free(struct lpfc_queue *queue)
 				  dmabuf->virt, dmabuf->phys);
 		kfree(dmabuf);
 	}
+	if (queue->rqbp) {
+		lpfc_free_rq_buffer(queue->phba, queue);
+		kfree(queue->rqbp);
+	}
+	kfree(queue->pring);
 	kfree(queue);
 	return;
 }
@@ -13022,7 +13464,13 @@ lpfc_sli4_queue_alloc(struct lpfc_hba *phba, uint32_t entry_size,
 		return NULL;
 	queue->page_count = (ALIGN(entry_size * entry_count,
 			hw_page_size))/hw_page_size;
+
+	/* If needed, Adjust page count to match the max the adapter supports */
+	if (queue->page_count > phba->sli4_hba.pc_sli4_params.wqpcnt)
+		queue->page_count = phba->sli4_hba.pc_sli4_params.wqpcnt;
+
 	INIT_LIST_HEAD(&queue->list);
+	INIT_LIST_HEAD(&queue->wq_list);
 	INIT_LIST_HEAD(&queue->page_list);
 	INIT_LIST_HEAD(&queue->child_list);
 	for (x = 0, total_qe_count = 0; x < queue->page_count; x++) {
@@ -13094,7 +13542,7 @@ lpfc_dual_chute_pci_bar_map(struct lpfc_hba *phba, uint16_t pci_barset)
 }
 
 /**
- * lpfc_modify_fcp_eq_delay - Modify Delay Multiplier on FCP EQs
+ * lpfc_modify_hba_eq_delay - Modify Delay Multiplier on FCP EQs
  * @phba: HBA structure that indicates port to create a queue on.
  * @startq: The starting FCP EQ to modify
  *
@@ -13110,7 +13558,7 @@ lpfc_dual_chute_pci_bar_map(struct lpfc_hba *phba, uint16_t pci_barset)
  * fails this function will return -ENXIO.
  **/
 int
-lpfc_modify_fcp_eq_delay(struct lpfc_hba *phba, uint32_t startq)
+lpfc_modify_hba_eq_delay(struct lpfc_hba *phba, uint32_t startq)
 {
 	struct lpfc_mbx_modify_eq_delay *eq_delay;
 	LPFC_MBOXQ_t *mbox;
@@ -13118,11 +13566,11 @@ lpfc_modify_fcp_eq_delay(struct lpfc_hba *phba, uint32_t startq)
 	int cnt, rc, length, status = 0;
 	uint32_t shdr_status, shdr_add_status;
 	uint32_t result;
-	int fcp_eqidx;
+	int qidx;
 	union lpfc_sli4_cfg_shdr *shdr;
 	uint16_t dmult;
 
-	if (startq >= phba->cfg_fcp_io_channel)
+	if (startq >= phba->io_channel_irqs)
 		return 0;
 
 	mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
@@ -13136,16 +13584,15 @@ lpfc_modify_fcp_eq_delay(struct lpfc_hba *phba, uint32_t startq)
 	eq_delay = &mbox->u.mqe.un.eq_delay;
 
 	/* Calculate delay multiper from maximum interrupt per second */
-	result = phba->cfg_fcp_imax / phba->cfg_fcp_io_channel;
-	if (result > LPFC_DMULT_CONST)
+	result = phba->cfg_fcp_imax / phba->io_channel_irqs;
+	if (result > LPFC_DMULT_CONST || result == 0)
 		dmult = 0;
 	else
 		dmult = LPFC_DMULT_CONST/result - 1;
 
 	cnt = 0;
-	for (fcp_eqidx = startq; fcp_eqidx < phba->cfg_fcp_io_channel;
-	    fcp_eqidx++) {
-		eq = phba->sli4_hba.hba_eq[fcp_eqidx];
+	for (qidx = startq; qidx < phba->io_channel_irqs; qidx++) {
+		eq = phba->sli4_hba.hba_eq[qidx];
 		if (!eq)
 			continue;
 		eq_delay->u.request.eq[cnt].eq_id = eq->queue_id;
@@ -13829,6 +14276,11 @@ lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq,
 		wq->db_format = LPFC_DB_LIST_FORMAT;
 		wq->db_regaddr = phba->sli4_hba.WQDBregaddr;
 	}
+	wq->pring = kzalloc(sizeof(struct lpfc_sli_ring), GFP_KERNEL);
+	if (wq->pring == NULL) {
+		status = -ENOMEM;
+		goto out;
+	}
 	wq->type = LPFC_WQ;
 	wq->assoc_qid = cq->queue_id;
 	wq->subtype = subtype;
@@ -14613,7 +15065,7 @@ lpfc_sli4_next_xritag(struct lpfc_hba *phba)
 }
 
 /**
- * lpfc_sli4_post_els_sgl_list - post a block of ELS sgls to the port.
+ * lpfc_sli4_post_sgl_list - post a block of ELS sgls to the port.
  * @phba: pointer to lpfc hba data structure.
  * @post_sgl_list: pointer to els sgl entry list.
  * @count: number of els sgl entries on the list.
@@ -14624,7 +15076,7 @@ lpfc_sli4_next_xritag(struct lpfc_hba *phba)
  * stopped.
  **/
 static int
-lpfc_sli4_post_els_sgl_list(struct lpfc_hba *phba,
+lpfc_sli4_post_sgl_list(struct lpfc_hba *phba,
 			    struct list_head *post_sgl_list,
 			    int post_cnt)
 {
@@ -14640,14 +15092,15 @@ lpfc_sli4_post_els_sgl_list(struct lpfc_hba *phba,
 	uint32_t shdr_status, shdr_add_status;
 	union lpfc_sli4_cfg_shdr *shdr;
 
-	reqlen = phba->sli4_hba.els_xri_cnt * sizeof(struct sgl_page_pairs) +
+	reqlen = post_cnt * sizeof(struct sgl_page_pairs) +
 		 sizeof(union lpfc_sli4_cfg_shdr) + sizeof(uint32_t);
 	if (reqlen > SLI4_PAGE_SIZE) {
-		lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 				"2559 Block sgl registration required DMA "
 				"size (%d) great than a page\n", reqlen);
 		return -ENOMEM;
 	}
+
 	mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
 	if (!mbox)
 		return -ENOMEM;
@@ -14691,8 +15144,9 @@ lpfc_sli4_post_els_sgl_list(struct lpfc_hba *phba,
 
 	/* Complete initialization and perform endian conversion. */
 	bf_set(lpfc_post_sgl_pages_xri, sgl, xritag_start);
-	bf_set(lpfc_post_sgl_pages_xricnt, sgl, phba->sli4_hba.els_xri_cnt);
+	bf_set(lpfc_post_sgl_pages_xricnt, sgl, post_cnt);
 	sgl->word0 = cpu_to_le32(sgl->word0);
+
 	if (!phba->sli4_hba.intr_enable)
 		rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL);
 	else {
@@ -14888,6 +15342,7 @@ lpfc_fc_frame_check(struct lpfc_hba *phba, struct fc_frame_header *fc_hdr)
 	case FC_TYPE_ELS:
 	case FC_TYPE_FCP:
 	case FC_TYPE_CT:
+	case FC_TYPE_NVME:
 		break;
 	case FC_TYPE_IP:
 	case FC_TYPE_ILS:
@@ -14945,14 +15400,11 @@ lpfc_fc_hdr_get_vfi(struct fc_frame_header *fc_hdr)
  **/
 static struct lpfc_vport *
 lpfc_fc_frame_to_vport(struct lpfc_hba *phba, struct fc_frame_header *fc_hdr,
-		       uint16_t fcfi)
+		       uint16_t fcfi, uint32_t did)
 {
 	struct lpfc_vport **vports;
 	struct lpfc_vport *vport = NULL;
 	int i;
-	uint32_t did = (fc_hdr->fh_d_id[0] << 16 |
-			fc_hdr->fh_d_id[1] << 8 |
-			fc_hdr->fh_d_id[2]);
 
 	if (did == Fabric_DID)
 		return phba->pport;
@@ -14961,7 +15413,7 @@ lpfc_fc_frame_to_vport(struct lpfc_hba *phba, struct fc_frame_header *fc_hdr,
 		return phba->pport;
 
 	vports = lpfc_create_vport_work_array(phba);
-	if (vports != NULL)
+	if (vports != NULL) {
 		for (i = 0; i <= phba->max_vpi && vports[i] != NULL; i++) {
 			if (phba->fcf.fcfi == fcfi &&
 			    vports[i]->vfi == lpfc_fc_hdr_get_vfi(fc_hdr) &&
@@ -14970,6 +15422,7 @@ lpfc_fc_frame_to_vport(struct lpfc_hba *phba, struct fc_frame_header *fc_hdr,
 				break;
 			}
 		}
+	}
 	lpfc_destroy_vport_work_array(phba, vports);
 	return vport;
 }
@@ -15399,7 +15852,7 @@ lpfc_sli4_seq_abort_rsp(struct lpfc_vport *vport,
 	 * a BA_RJT.
 	 */
 	if ((fctl & FC_FC_EX_CTX) &&
-	    (lxri > lpfc_sli4_get_els_iocb_cnt(phba))) {
+	    (lxri > lpfc_sli4_get_iocb_cnt(phba))) {
 		icmd->un.xseq64.w5.hcsw.Rctl = FC_RCTL_BA_RJT;
 		bf_set(lpfc_vndr_code, &icmd->un.bls_rsp, 0);
 		bf_set(lpfc_rsn_expln, &icmd->un.bls_rsp, FC_BA_RJT_INV_XID);
@@ -15576,6 +16029,7 @@ lpfc_prep_seq(struct lpfc_vport *vport, struct hbq_dmabuf *seq_dmabuf)
 		/* Initialize the first IOCB. */
 		first_iocbq->iocb.unsli3.rcvsli3.acc_len = 0;
 		first_iocbq->iocb.ulpStatus = IOSTAT_SUCCESS;
+		first_iocbq->vport = vport;
 
 		/* Check FC Header to see what TYPE of frame we are rcv'ing */
 		if (sli4_type_from_fc_hdr(fc_hdr) == FC_TYPE_ELS) {
@@ -15688,7 +16142,7 @@ lpfc_sli4_send_seq_to_ulp(struct lpfc_vport *vport,
 		return;
 	}
 	if (!lpfc_complete_unsol_iocb(phba,
-				      &phba->sli.ring[LPFC_ELS_RING],
+				      phba->sli4_hba.els_wq->pring,
 				      iocbq, fc_hdr->fh_r_ctl,
 				      fc_hdr->fh_type))
 		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
@@ -15713,8 +16167,7 @@ lpfc_sli4_send_seq_to_ulp(struct lpfc_vport *vport,
  * This function is called with no lock held. This function processes all
  * the received buffers and gives it to upper layers when a received buffer
  * indicates that it is the final frame in the sequence. The interrupt
- * service routine processes received buffers at interrupt contexts and adds
- * received dma buffers to the rb_pend_list queue and signals the worker thread.
+ * service routine processes received buffers at interrupt contexts.
  * Worker thread calls lpfc_sli4_handle_received_buffer, which will call the
  * appropriate receive function when the final frame in a sequence is received.
  **/
@@ -15745,16 +16198,16 @@ lpfc_sli4_handle_received_buffer(struct lpfc_hba *phba,
 		fcfi = bf_get(lpfc_rcqe_fcf_id,
 			      &dmabuf->cq_event.cqe.rcqe_cmpl);
 
-	vport = lpfc_fc_frame_to_vport(phba, fc_hdr, fcfi);
+	/* d_id this frame is directed to */
+	did = sli4_did_from_fc_hdr(fc_hdr);
+
+	vport = lpfc_fc_frame_to_vport(phba, fc_hdr, fcfi, did);
 	if (!vport) {
 		/* throw out the frame */
 		lpfc_in_buf_free(phba, &dmabuf->dbuf);
 		return;
 	}
 
-	/* d_id this frame is directed to */
-	did = sli4_did_from_fc_hdr(fc_hdr);
-
 	/* vport is registered unless we rcv a FLOGI directed to Fabric_DID */
 	if (!(vport->vpi_state & LPFC_VPI_REGISTERED) &&
 		(did != Fabric_DID)) {
@@ -17232,7 +17685,7 @@ uint32_t
 lpfc_drain_txq(struct lpfc_hba *phba)
 {
 	LIST_HEAD(completions);
-	struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_ELS_RING];
+	struct lpfc_sli_ring *pring;
 	struct lpfc_iocbq *piocbq = NULL;
 	unsigned long iflags = 0;
 	char *fail_msg = NULL;
@@ -17241,6 +17694,8 @@ lpfc_drain_txq(struct lpfc_hba *phba)
 	union lpfc_wqe *wqe = (union lpfc_wqe *) &wqe128;
 	uint32_t txq_cnt = 0;
 
+	pring = lpfc_phba_elsring(phba);
+
 	spin_lock_irqsave(&pring->ring_lock, iflags);
 	list_for_each_entry(piocbq, &pring->txq, list) {
 		txq_cnt++;
@@ -17262,7 +17717,7 @@ lpfc_drain_txq(struct lpfc_hba *phba)
 				txq_cnt);
 			break;
 		}
-		sglq = __lpfc_sli_get_sglq(phba, piocbq);
+		sglq = __lpfc_sli_get_els_sglq(phba, piocbq);
 		if (!sglq) {
 			__lpfc_sli_ringtx_put(phba, pring, piocbq);
 			spin_unlock_irqrestore(&pring->ring_lock, iflags);
@@ -17302,3 +17757,191 @@ lpfc_drain_txq(struct lpfc_hba *phba)
 
 	return txq_cnt;
 }
+
+/**
+ * lpfc_wqe_bpl2sgl - Convert the bpl/bde to a sgl.
+ * @phba: Pointer to HBA context object.
+ * @pwqe: Pointer to command WQE.
+ * @sglq: Pointer to the scatter gather queue object.
+ *
+ * This routine converts the bpl or bde that is in the WQE
+ * to a sgl list for the sli4 hardware. The physical address
+ * of the bpl/bde is converted back to a virtual address.
+ * If the WQE contains a BPL then the list of BDE's is
+ * converted to sli4_sge's. If the WQE contains a single
+ * BDE then it is converted to a single sli_sge.
+ * The WQE is still in cpu endianness so the contents of
+ * the bpl can be used without byte swapping.
+ *
+ * Returns valid XRI = Success, NO_XRI = Failure.
+ */
+static uint16_t
+lpfc_wqe_bpl2sgl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeq,
+		 struct lpfc_sglq *sglq)
+{
+	uint16_t xritag = NO_XRI;
+	struct ulp_bde64 *bpl = NULL;
+	struct ulp_bde64 bde;
+	struct sli4_sge *sgl  = NULL;
+	struct lpfc_dmabuf *dmabuf;
+	union lpfc_wqe *wqe;
+	int numBdes = 0;
+	int i = 0;
+	uint32_t offset = 0; /* accumulated offset in the sg request list */
+	int inbound = 0; /* number of sg reply entries inbound from firmware */
+	uint32_t cmd;
+
+	if (!pwqeq || !sglq)
+		return xritag;
+
+	sgl  = (struct sli4_sge *)sglq->sgl;
+	wqe = &pwqeq->wqe;
+	pwqeq->iocb.ulpIoTag = pwqeq->iotag;
+
+	cmd = bf_get(wqe_cmnd, &wqe->generic.wqe_com);
+	if (cmd == CMD_XMIT_BLS_RSP64_WQE)
+		return sglq->sli4_xritag;
+	numBdes = pwqeq->rsvd2;
+	if (numBdes) {
+		/* The addrHigh and addrLow fields within the WQE
+		 * have not been byteswapped yet so there is no
+		 * need to swap them back.
+		 */
+		if (pwqeq->context3)
+			dmabuf = (struct lpfc_dmabuf *)pwqeq->context3;
+		else
+			return xritag;
+
+		bpl  = (struct ulp_bde64 *)dmabuf->virt;
+		if (!bpl)
+			return xritag;
+
+		for (i = 0; i < numBdes; i++) {
+			/* Should already be byte swapped. */
+			sgl->addr_hi = bpl->addrHigh;
+			sgl->addr_lo = bpl->addrLow;
+
+			sgl->word2 = le32_to_cpu(sgl->word2);
+			if ((i+1) == numBdes)
+				bf_set(lpfc_sli4_sge_last, sgl, 1);
+			else
+				bf_set(lpfc_sli4_sge_last, sgl, 0);
+			/* swap the size field back to the cpu so we
+			 * can assign it to the sgl.
+			 */
+			bde.tus.w = le32_to_cpu(bpl->tus.w);
+			sgl->sge_len = cpu_to_le32(bde.tus.f.bdeSize);
+			/* The offsets in the sgl need to be accumulated
+			 * separately for the request and reply lists.
+			 * The request is always first, the reply follows.
+			 */
+			switch (cmd) {
+			case CMD_GEN_REQUEST64_WQE:
+				/* add up the reply sg entries */
+				if (bpl->tus.f.bdeFlags == BUFF_TYPE_BDE_64I)
+					inbound++;
+				/* first inbound? reset the offset */
+				if (inbound == 1)
+					offset = 0;
+				bf_set(lpfc_sli4_sge_offset, sgl, offset);
+				bf_set(lpfc_sli4_sge_type, sgl,
+					LPFC_SGE_TYPE_DATA);
+				offset += bde.tus.f.bdeSize;
+				break;
+			case CMD_FCP_TRSP64_WQE:
+				bf_set(lpfc_sli4_sge_offset, sgl, 0);
+				bf_set(lpfc_sli4_sge_type, sgl,
+					LPFC_SGE_TYPE_DATA);
+				break;
+			case CMD_FCP_TSEND64_WQE:
+			case CMD_FCP_TRECEIVE64_WQE:
+				bf_set(lpfc_sli4_sge_type, sgl,
+					bpl->tus.f.bdeFlags);
+				if (i < 3)
+					offset = 0;
+				else
+					offset += bde.tus.f.bdeSize;
+				bf_set(lpfc_sli4_sge_offset, sgl, offset);
+				break;
+			}
+			sgl->word2 = cpu_to_le32(sgl->word2);
+			bpl++;
+			sgl++;
+		}
+	} else if (wqe->gen_req.bde.tus.f.bdeFlags == BUFF_TYPE_BDE_64) {
+		/* The addrHigh and addrLow fields of the BDE have not
+		 * been byteswapped yet so they need to be swapped
+		 * before putting them in the sgl.
+		 */
+		sgl->addr_hi = cpu_to_le32(wqe->gen_req.bde.addrHigh);
+		sgl->addr_lo = cpu_to_le32(wqe->gen_req.bde.addrLow);
+		sgl->word2 = le32_to_cpu(sgl->word2);
+		bf_set(lpfc_sli4_sge_last, sgl, 1);
+		sgl->word2 = cpu_to_le32(sgl->word2);
+		sgl->sge_len = cpu_to_le32(wqe->gen_req.bde.tus.f.bdeSize);
+	}
+	return sglq->sli4_xritag;
+}
+
+/**
+ * lpfc_sli4_issue_wqe - Issue an SLI4 Work Queue Entry (WQE)
+ * @phba: Pointer to HBA context object.
+ * @ring_number: Base sli ring number
+ * @pwqe: Pointer to command WQE.
+ **/
+int
+lpfc_sli4_issue_wqe(struct lpfc_hba *phba, uint32_t ring_number,
+		    struct lpfc_iocbq *pwqe)
+{
+	union lpfc_wqe *wqe = &pwqe->wqe;
+	struct lpfc_queue *wq;
+	struct lpfc_sglq *sglq;
+	struct lpfc_sli_ring *pring;
+	unsigned long iflags;
+
+	/* NVME_LS and NVME_LS ABTS requests. */
+	if (pwqe->iocb_flag & LPFC_IO_NVME_LS) {
+		pring =  phba->sli4_hba.nvmels_wq->pring;
+		spin_lock_irqsave(&pring->ring_lock, iflags);
+		sglq = __lpfc_sli_get_els_sglq(phba, pwqe);
+		if (!sglq) {
+			spin_unlock_irqrestore(&pring->ring_lock, iflags);
+			return WQE_BUSY;
+		}
+		pwqe->sli4_lxritag = sglq->sli4_lxritag;
+		pwqe->sli4_xritag = sglq->sli4_xritag;
+		if (lpfc_wqe_bpl2sgl(phba, pwqe, sglq) == NO_XRI) {
+			spin_unlock_irqrestore(&pring->ring_lock, iflags);
+			return WQE_ERROR;
+		}
+		bf_set(wqe_xri_tag, &pwqe->wqe.xmit_bls_rsp.wqe_com,
+		       pwqe->sli4_xritag);
+		if (lpfc_sli4_wq_put(phba->sli4_hba.nvmels_wq, wqe)) {
+			spin_unlock_irqrestore(&pring->ring_lock, iflags);
+			return WQE_ERROR;
+		}
+		lpfc_sli_ringtxcmpl_put(phba, pring, pwqe);
+		spin_unlock_irqrestore(&pring->ring_lock, iflags);
+		return 0;
+	}
+
+	/* NVME_FCREQ and NVME_ABTS requests */
+	if (pwqe->iocb_flag & LPFC_IO_NVME) {
+		/* Get the IO distribution (hba_wqidx) for WQ assignment. */
+		pring = phba->sli4_hba.nvme_wq[pwqe->hba_wqidx]->pring;
+
+		spin_lock_irqsave(&pring->ring_lock, iflags);
+		wq = phba->sli4_hba.nvme_wq[pwqe->hba_wqidx];
+		bf_set(wqe_cqid, &wqe->generic.wqe_com,
+		      phba->sli4_hba.nvme_cq[pwqe->hba_wqidx]->queue_id);
+		if (lpfc_sli4_wq_put(wq, wqe)) {
+			spin_unlock_irqrestore(&pring->ring_lock, iflags);
+			return WQE_ERROR;
+		}
+		lpfc_sli_ringtxcmpl_put(phba, pring, pwqe);
+		spin_unlock_irqrestore(&pring->ring_lock, iflags);
+		return 0;
+	}
+
+	return WQE_ERROR;
+}
diff --git a/drivers/scsi/lpfc/lpfc_sli.h b/drivers/scsi/lpfc/lpfc_sli.h
index 3fad5657514b43..72520125251b5f 100644
--- a/drivers/scsi/lpfc/lpfc_sli.h
+++ b/drivers/scsi/lpfc/lpfc_sli.h
@@ -54,9 +54,16 @@ struct lpfc_iocbq {
 	uint16_t iotag;         /* pre-assigned IO tag */
 	uint16_t sli4_lxritag;  /* logical pre-assigned XRI. */
 	uint16_t sli4_xritag;   /* pre-assigned XRI, (OXID) tag. */
+	uint16_t hba_wqidx;     /* index to HBA work queue */
 	struct lpfc_cq_event cq_event;
+	struct lpfc_wcqe_complete wcqe_cmpl;	/* WQE cmpl */
+	uint64_t isr_timestamp;
 
-	IOCB_t iocb;		/* IOCB cmd */
+	/* Be careful here */
+	union lpfc_wqe wqe;	/* WQE cmd */
+	IOCB_t iocb;		/* For IOCB cmd or if we want 128 byte WQE */
+
+	uint8_t rsvd2;
 	uint8_t priority;	/* OAS priority */
 	uint8_t retry;		/* retry counter for IOCB cmd - if needed */
 	uint32_t iocb_flag;
@@ -82,9 +89,12 @@ struct lpfc_iocbq {
 #define LPFC_IO_OAS		0x10000 /* OAS FCP IO */
 #define LPFC_IO_FOF		0x20000 /* FOF FCP IO */
 #define LPFC_IO_LOOPBACK	0x40000 /* Loopback IO */
+#define LPFC_PRLI_NVME_REQ	0x80000 /* This is an NVME PRLI. */
+#define LPFC_PRLI_FCP_REQ	0x100000 /* This is an NVME PRLI. */
+#define LPFC_IO_NVME	        0x200000 /* NVME FCP command */
+#define LPFC_IO_NVME_LS		0x400000 /* NVME LS command */
 
 	uint32_t drvrTimeout;	/* driver timeout in seconds */
-	uint32_t fcp_wqidx;	/* index to FCP work queue */
 	struct lpfc_vport *vport;/* virtual port pointer */
 	void *context1;		/* caller context information */
 	void *context2;		/* caller context information */
@@ -103,6 +113,8 @@ struct lpfc_iocbq {
 			   struct lpfc_iocbq *);
 	void (*iocb_cmpl)(struct lpfc_hba *, struct lpfc_iocbq *,
 			   struct lpfc_iocbq *);
+	void (*wqe_cmpl)(struct lpfc_hba *, struct lpfc_iocbq *,
+			  struct lpfc_wcqe_complete *);
 };
 
 #define SLI_IOCB_RET_IOCB      1	/* Return IOCB if cmd ring full */
@@ -112,6 +124,14 @@ struct lpfc_iocbq {
 #define IOCB_ERROR          2
 #define IOCB_TIMEDOUT       3
 
+#define SLI_WQE_RET_WQE    1    /* Return WQE if cmd ring full */
+
+#define WQE_SUCCESS        0
+#define WQE_BUSY           1
+#define WQE_ERROR          2
+#define WQE_TIMEDOUT       3
+#define WQE_ABORTED        4
+
 #define LPFC_MBX_WAKE		1
 #define LPFC_MBX_IMED_UNREG	2
 
@@ -298,11 +318,7 @@ struct lpfc_sli {
 #define LPFC_MENLO_MAINT          0x1000 /* need for menl fw download */
 #define LPFC_SLI_ASYNC_MBX_BLK    0x2000 /* Async mailbox is blocked */
 
-	struct lpfc_sli_ring *ring;
-	int fcp_ring;		/* ring used for FCP initiator commands */
-	int next_ring;
-
-	int extra_ring;		/* extra ring used for other protocols */
+	struct lpfc_sli_ring *sli3_ring;
 
 	struct lpfc_sli_stat slistat;	/* SLI statistical info */
 	struct list_head mboxq;
diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h
index dfbb25e5c5b68b..b5b4f6b843c0b3 100644
--- a/drivers/scsi/lpfc/lpfc_sli4.h
+++ b/drivers/scsi/lpfc/lpfc_sli4.h
@@ -35,9 +35,10 @@
 #define LPFC_NEMBED_MBOX_SGL_CNT		254
 
 /* Multi-queue arrangement for FCP EQ/CQ/WQ tuples */
-#define LPFC_FCP_IO_CHAN_DEF       4
-#define LPFC_FCP_IO_CHAN_MIN       1
-#define LPFC_FCP_IO_CHAN_MAX       16
+#define LPFC_HBA_IO_CHAN_MIN	0
+#define LPFC_HBA_IO_CHAN_MAX	32
+#define LPFC_FCP_IO_CHAN_DEF	4
+#define LPFC_NVME_IO_CHAN_DEF	0
 
 /* Number of channels used for Flash Optimized Fabric (FOF) operations */
 
@@ -107,6 +108,8 @@ enum lpfc_sli4_queue_subtype {
 	LPFC_MBOX,
 	LPFC_FCP,
 	LPFC_ELS,
+	LPFC_NVME,
+	LPFC_NVME_LS,
 	LPFC_USOL
 };
 
@@ -125,25 +128,41 @@ union sli4_qe {
 	struct lpfc_rqe *rqe;
 };
 
+/* RQ buffer list */
+struct lpfc_rqb {
+	uint16_t entry_count;	  /* Current number of RQ slots */
+	uint16_t buffer_count;	  /* Current number of buffers posted */
+	struct list_head rqb_buffer_list;  /* buffers assigned to this HBQ */
+				  /* Callback for HBQ buffer allocation */
+	struct rqb_dmabuf *(*rqb_alloc_buffer)(struct lpfc_hba *);
+				  /* Callback for HBQ buffer free */
+	void               (*rqb_free_buffer)(struct lpfc_hba *,
+					       struct rqb_dmabuf *);
+};
+
 struct lpfc_queue {
 	struct list_head list;
+	struct list_head wq_list;
 	enum lpfc_sli4_queue_type type;
 	enum lpfc_sli4_queue_subtype subtype;
 	struct lpfc_hba *phba;
 	struct list_head child_list;
+	struct list_head page_list;
+	struct list_head sgl_list;
 	uint32_t entry_count;	/* Number of entries to support on the queue */
 	uint32_t entry_size;	/* Size of each queue entry. */
 	uint32_t entry_repost;	/* Count of entries before doorbell is rung */
 #define LPFC_QUEUE_MIN_REPOST	8
 	uint32_t queue_id;	/* Queue ID assigned by the hardware */
 	uint32_t assoc_qid;     /* Queue ID associated with, for CQ/WQ/MQ */
-	struct list_head page_list;
 	uint32_t page_count;	/* Number of pages allocated for this queue */
 	uint32_t host_index;	/* The host's index for putting or getting */
 	uint32_t hba_index;	/* The last known hba index for get or put */
 
 	struct lpfc_sli_ring *pring; /* ptr to io ring associated with q */
+	struct lpfc_rqb *rqbp;	/* ptr to RQ buffers */
 
+	uint16_t sgl_list_cnt;
 	uint16_t db_format;
 #define LPFC_DB_RING_FORMAT	0x01
 #define LPFC_DB_LIST_FORMAT	0x02
@@ -176,6 +195,8 @@ struct lpfc_queue {
 #define	RQ_buf_trunc		q_cnt_3
 #define	RQ_rcv_buf		q_cnt_4
 
+	uint64_t isr_timestamp;
+	struct lpfc_queue *assoc_qp;
 	union sli4_qe qe[1];	/* array to index entries (must be last) */
 };
 
@@ -338,6 +359,7 @@ struct lpfc_bmbx {
 #define LPFC_CQE_DEF_COUNT      1024
 #define LPFC_WQE_DEF_COUNT      256
 #define LPFC_WQE128_DEF_COUNT   128
+#define LPFC_WQE128_MAX_COUNT   256
 #define LPFC_MQE_DEF_COUNT      16
 #define LPFC_RQE_DEF_COUNT	512
 
@@ -379,10 +401,14 @@ struct lpfc_max_cfg_param {
 
 struct lpfc_hba;
 /* SLI4 HBA multi-fcp queue handler struct */
-struct lpfc_fcp_eq_hdl {
+struct lpfc_hba_eq_hdl {
 	uint32_t idx;
 	struct lpfc_hba *phba;
-	atomic_t fcp_eq_in_use;
+	atomic_t hba_eq_in_use;
+	struct cpumask *cpumask;
+	/* CPU affinitsed to or 0xffffffff if multiple */
+	uint32_t cpu;
+#define LPFC_MULTI_CPU_AFFINITY 0xffffffff
 };
 
 /* Port Capabilities for SLI4 Parameters */
@@ -427,6 +453,7 @@ struct lpfc_pc_sli4_params {
 	uint8_t wqsize;
 #define LPFC_WQ_SZ64_SUPPORT	1
 #define LPFC_WQ_SZ128_SUPPORT	2
+	uint8_t wqpcnt;
 };
 
 struct lpfc_iov {
@@ -445,7 +472,7 @@ struct lpfc_sli4_lnk_info {
 	uint8_t optic_state;
 };
 
-#define LPFC_SLI4_HANDLER_CNT		(LPFC_FCP_IO_CHAN_MAX+ \
+#define LPFC_SLI4_HANDLER_CNT		(LPFC_HBA_IO_CHAN_MAX+ \
 					 LPFC_FOF_IO_CHAN_NUM)
 #define LPFC_SLI4_HANDLER_NAME_SZ	16
 
@@ -516,21 +543,30 @@ struct lpfc_sli4_hba {
 	struct lpfc_register sli_intf;
 	struct lpfc_pc_sli4_params pc_sli4_params;
 	uint8_t handler_name[LPFC_SLI4_HANDLER_CNT][LPFC_SLI4_HANDLER_NAME_SZ];
-	struct lpfc_fcp_eq_hdl *fcp_eq_hdl; /* FCP per-WQ handle */
+	struct lpfc_hba_eq_hdl *hba_eq_hdl; /* HBA per-WQ handle */
 
 	/* Pointers to the constructed SLI4 queues */
-	struct lpfc_queue **hba_eq;/* Event queues for HBA */
-	struct lpfc_queue **fcp_cq;/* Fast-path FCP compl queue */
-	struct lpfc_queue **fcp_wq;/* Fast-path FCP work queue */
+	struct lpfc_queue **hba_eq;  /* Event queues for HBA */
+	struct lpfc_queue **fcp_cq;  /* Fast-path FCP compl queue */
+	struct lpfc_queue **nvme_cq; /* Fast-path NVME compl queue */
+	struct lpfc_queue **fcp_wq;  /* Fast-path FCP work queue */
+	struct lpfc_queue **nvme_wq; /* Fast-path NVME work queue */
 	uint16_t *fcp_cq_map;
+	uint16_t *nvme_cq_map;
+	struct list_head lpfc_wq_list;
 
 	struct lpfc_queue *mbx_cq; /* Slow-path mailbox complete queue */
 	struct lpfc_queue *els_cq; /* Slow-path ELS response complete queue */
+	struct lpfc_queue *nvmels_cq; /* NVME LS complete queue */
 	struct lpfc_queue *mbx_wq; /* Slow-path MBOX work queue */
 	struct lpfc_queue *els_wq; /* Slow-path ELS work queue */
+	struct lpfc_queue *nvmels_wq; /* NVME LS work queue */
 	struct lpfc_queue *hdr_rq; /* Slow-path Header Receive queue */
 	struct lpfc_queue *dat_rq; /* Slow-path Data Receive queue */
 
+	struct lpfc_name wwnn;
+	struct lpfc_name wwpn;
+
 	uint32_t fw_func_mode;	/* FW function protocol mode */
 	uint32_t ulp0_mode;	/* ULP0 protocol mode */
 	uint32_t ulp1_mode;	/* ULP1 protocol mode */
@@ -567,14 +603,17 @@ struct lpfc_sli4_hba {
 	uint16_t rpi_hdrs_in_use; /* must post rpi hdrs if set. */
 	uint16_t next_xri; /* last_xri - max_cfg_param.xri_base = used */
 	uint16_t next_rpi;
+	uint16_t nvme_xri_max;
+	uint16_t nvme_xri_cnt;
+	uint16_t nvme_xri_start;
 	uint16_t scsi_xri_max;
 	uint16_t scsi_xri_cnt;
-	uint16_t els_xri_cnt;
 	uint16_t scsi_xri_start;
-	struct list_head lpfc_free_sgl_list;
-	struct list_head lpfc_sgl_list;
+	uint16_t els_xri_cnt;
+	struct list_head lpfc_els_sgl_list;
 	struct list_head lpfc_abts_els_sgl_list;
 	struct list_head lpfc_abts_scsi_buf_list;
+	struct list_head lpfc_abts_nvme_buf_list;
 	struct lpfc_sglq **lpfc_sglq_active_list;
 	struct list_head lpfc_rpi_hdr_list;
 	unsigned long *rpi_bmask;
@@ -601,8 +640,9 @@ struct lpfc_sli4_hba {
 #define LPFC_SLI4_PPNAME_NON	0
 #define LPFC_SLI4_PPNAME_GET	1
 	struct lpfc_iov iov;
+	spinlock_t abts_nvme_buf_list_lock; /* list of aborted SCSI IOs */
 	spinlock_t abts_scsi_buf_list_lock; /* list of aborted SCSI IOs */
-	spinlock_t abts_sgl_list_lock; /* list of aborted els IOs */
+	spinlock_t sgl_list_lock; /* list of aborted els IOs */
 	uint32_t physical_port;
 
 	/* CPU to vector mapping information */
@@ -614,7 +654,7 @@ struct lpfc_sli4_hba {
 
 enum lpfc_sge_type {
 	GEN_BUFF_TYPE,
-	SCSI_BUFF_TYPE
+	SCSI_BUFF_TYPE,
 };
 
 enum lpfc_sgl_state {
@@ -693,7 +733,7 @@ struct lpfc_queue *lpfc_sli4_queue_alloc(struct lpfc_hba *, uint32_t,
 			uint32_t);
 void lpfc_sli4_queue_free(struct lpfc_queue *);
 int lpfc_eq_create(struct lpfc_hba *, struct lpfc_queue *, uint32_t);
-int lpfc_modify_fcp_eq_delay(struct lpfc_hba *, uint32_t);
+int lpfc_modify_hba_eq_delay(struct lpfc_hba *phba, uint32_t startq);
 int lpfc_cq_create(struct lpfc_hba *, struct lpfc_queue *,
 			struct lpfc_queue *, uint32_t, uint32_t);
 int32_t lpfc_mq_create(struct lpfc_hba *, struct lpfc_queue *,
@@ -745,6 +785,7 @@ int lpfc_sli4_brdreset(struct lpfc_hba *);
 int lpfc_sli4_add_fcf_record(struct lpfc_hba *, struct fcf_record *);
 void lpfc_sli_remove_dflt_fcf(struct lpfc_hba *);
 int lpfc_sli4_get_els_iocb_cnt(struct lpfc_hba *);
+int lpfc_sli4_get_iocb_cnt(struct lpfc_hba *phba);
 int lpfc_sli4_init_vpi(struct lpfc_vport *);
 uint32_t lpfc_sli4_cq_release(struct lpfc_queue *, bool);
 uint32_t lpfc_sli4_eq_release(struct lpfc_queue *, bool);
diff --git a/drivers/scsi/lpfc/lpfc_vport.c b/drivers/scsi/lpfc/lpfc_vport.c
index 5bbe6af148dd46..07c7c5f9fdde4f 100644
--- a/drivers/scsi/lpfc/lpfc_vport.c
+++ b/drivers/scsi/lpfc/lpfc_vport.c
@@ -403,6 +403,8 @@ lpfc_vport_create(struct fc_vport *fc_vport, bool disable)
 		vport->fdmi_port_mask = phba->pport->fdmi_port_mask;
 	}
 
+	/* todo: init: register port with nvme */
+
 	/*
 	 * In SLI4, the vpi must be activated before it can be used
 	 * by the port.

From a0f2d3ef374fd8d2f51b8cc1ea723014b1aa2c9b Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sun, 12 Feb 2017 13:52:31 -0800
Subject: [PATCH 0152/1911] scsi: lpfc: NVME Initiator: Merge into FC discovery

NVME Initiator: Merge into FC discovery

Adds NVME PRLI support and Nameserver registrations and Queries for NVME

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc.h           |   6 +-
 drivers/scsi/lpfc/lpfc_ct.c        | 357 +++++++++++++++++++----------
 drivers/scsi/lpfc/lpfc_disc.h      |  19 +-
 drivers/scsi/lpfc/lpfc_els.c       | 280 +++++++++++++++-------
 drivers/scsi/lpfc/lpfc_hbadisc.c   | 169 +++++++++++---
 drivers/scsi/lpfc/lpfc_hw.h        |  69 ++++--
 drivers/scsi/lpfc/lpfc_hw4.h       |  43 ++++
 drivers/scsi/lpfc/lpfc_init.c      |   7 +
 drivers/scsi/lpfc/lpfc_nportdisc.c | 177 ++++++++++++--
 drivers/scsi/lpfc/lpfc_scsi.c      |   3 +-
 10 files changed, 851 insertions(+), 279 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index 77ad757ca23186..81c47d1aebb2c1 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -123,6 +123,8 @@ struct perf_prof {
 	uint16_t wqidx[40];
 };
 
+#define LPFC_FC4_TYPE_BITMASK	0x00000100
+
 /* Provide DMA memory definitions the driver uses per port instance. */
 struct lpfc_dmabuf {
 	struct list_head list;
@@ -390,7 +392,8 @@ struct lpfc_vport {
 	int32_t stopped;   /* HBA has not been restarted since last ERATT */
 	uint8_t fc_linkspeed;	/* Link speed after last READ_LA */
 
-	uint32_t num_disc_nodes;	/*in addition to hba_state */
+	uint32_t num_disc_nodes;	/* in addition to hba_state */
+	uint32_t gidft_inp;		/* cnt of outstanding GID_FTs */
 
 	uint32_t fc_nlp_cnt;	/* outstanding NODELIST requests */
 	uint32_t fc_rscn_id_cnt;	/* count of RSCNs payloads in list */
@@ -443,7 +446,6 @@ struct lpfc_vport {
 	uint32_t cfg_max_scsicmpl_time;
 	uint32_t cfg_tgt_queue_depth;
 	uint32_t cfg_first_burst_size;
-
 	uint32_t dev_loss_tmo_changed;
 
 	struct fc_vport *fc_vport;
diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c
index 4ac03b16d17f56..afb25369bc3bf1 100644
--- a/drivers/scsi/lpfc/lpfc_ct.c
+++ b/drivers/scsi/lpfc/lpfc_ct.c
@@ -40,8 +40,9 @@
 #include "lpfc_sli4.h"
 #include "lpfc_nl.h"
 #include "lpfc_disc.h"
-#include "lpfc_scsi.h"
 #include "lpfc.h"
+#include "lpfc_scsi.h"
+#include "lpfc_nvme.h"
 #include "lpfc_logmsg.h"
 #include "lpfc_crtn.h"
 #include "lpfc_version.h"
@@ -453,8 +454,73 @@ lpfc_find_vport_by_did(struct lpfc_hba *phba, uint32_t did) {
 	return NULL;
 }
 
+static void
+lpfc_prep_node_fc4type(struct lpfc_vport *vport, uint32_t Did, uint8_t fc4_type)
+{
+	struct lpfc_nodelist *ndlp;
+
+	if ((vport->port_type != LPFC_NPIV_PORT) ||
+	    !(vport->ct_flags & FC_CT_RFF_ID) || !vport->cfg_restrict_login) {
+
+		ndlp = lpfc_setup_disc_node(vport, Did);
+
+		if (ndlp && NLP_CHK_NODE_ACT(ndlp)) {
+			/* By default, the driver expects to support FCP FC4 */
+			if (fc4_type == FC_TYPE_FCP)
+				ndlp->nlp_fc4_type |= NLP_FC4_FCP;
+
+			if (fc4_type == FC_TYPE_NVME)
+				ndlp->nlp_fc4_type |= NLP_FC4_NVME;
+
+			lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
+					 "0238 Process x%06x NameServer Rsp "
+					 "Data: x%x x%x x%x x%x\n", Did,
+					 ndlp->nlp_flag, ndlp->nlp_fc4_type,
+					 vport->fc_flag,
+					 vport->fc_rscn_id_cnt);
+		} else
+			lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
+					 "0239 Skip x%06x NameServer Rsp "
+					 "Data: x%x x%x\n", Did,
+					 vport->fc_flag,
+					 vport->fc_rscn_id_cnt);
+
+	} else {
+		if (!(vport->fc_flag & FC_RSCN_MODE) ||
+		    lpfc_rscn_payload_check(vport, Did)) {
+			/*
+			 * This NPortID was previously a FCP target,
+			 * Don't even bother to send GFF_ID.
+			 */
+			ndlp = lpfc_findnode_did(vport, Did);
+			if (ndlp && NLP_CHK_NODE_ACT(ndlp))
+				ndlp->nlp_fc4_type = fc4_type;
+
+			if (ndlp && NLP_CHK_NODE_ACT(ndlp)) {
+				ndlp->nlp_fc4_type = fc4_type;
+
+				if (ndlp->nlp_type & NLP_FCP_TARGET)
+					lpfc_setup_disc_node(vport, Did);
+
+				else if (lpfc_ns_cmd(vport, SLI_CTNS_GFF_ID,
+							0, Did) == 0)
+					vport->num_disc_nodes++;
+
+				else
+					lpfc_setup_disc_node(vport, Did);
+			}
+		} else
+			lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
+					 "0245 Skip x%06x NameServer Rsp "
+					 "Data: x%x x%x\n", Did,
+					 vport->fc_flag,
+					 vport->fc_rscn_id_cnt);
+	}
+}
+
 static int
-lpfc_ns_rsp(struct lpfc_vport *vport, struct lpfc_dmabuf *mp, uint32_t Size)
+lpfc_ns_rsp(struct lpfc_vport *vport, struct lpfc_dmabuf *mp, uint8_t fc4_type,
+	    uint32_t Size)
 {
 	struct lpfc_hba  *phba = vport->phba;
 	struct lpfc_sli_ct_request *Response =
@@ -499,97 +565,12 @@ lpfc_ns_rsp(struct lpfc_vport *vport, struct lpfc_dmabuf *mp, uint32_t Size)
 			 */
 			if ((Did != vport->fc_myDID) &&
 			    ((lpfc_find_vport_by_did(phba, Did) == NULL) ||
-			     vport->cfg_peer_port_login)) {
-				if ((vport->port_type != LPFC_NPIV_PORT) ||
-				    (!(vport->ct_flags & FC_CT_RFF_ID)) ||
-				    (!vport->cfg_restrict_login)) {
-					ndlp = lpfc_setup_disc_node(vport, Did);
-					if (ndlp && NLP_CHK_NODE_ACT(ndlp)) {
-						lpfc_debugfs_disc_trc(vport,
-						LPFC_DISC_TRC_CT,
-						"Parse GID_FTrsp: "
-						"did:x%x flg:x%x x%x",
-						Did, ndlp->nlp_flag,
-						vport->fc_flag);
-
-						lpfc_printf_vlog(vport,
-							KERN_INFO,
-							LOG_DISCOVERY,
-							"0238 Process "
-							"x%x NameServer Rsp"
-							"Data: x%x x%x x%x\n",
-							Did, ndlp->nlp_flag,
-							vport->fc_flag,
-							vport->fc_rscn_id_cnt);
-					} else {
-						lpfc_debugfs_disc_trc(vport,
-						LPFC_DISC_TRC_CT,
-						"Skip1 GID_FTrsp: "
-						"did:x%x flg:x%x cnt:%d",
-						Did, vport->fc_flag,
-						vport->fc_rscn_id_cnt);
-
-						lpfc_printf_vlog(vport,
-							KERN_INFO,
-							LOG_DISCOVERY,
-							"0239 Skip x%x "
-							"NameServer Rsp Data: "
-							"x%x x%x\n",
-							Did, vport->fc_flag,
-							vport->fc_rscn_id_cnt);
-					}
-
-				} else {
-					if (!(vport->fc_flag & FC_RSCN_MODE) ||
-					(lpfc_rscn_payload_check(vport, Did))) {
-						lpfc_debugfs_disc_trc(vport,
-						LPFC_DISC_TRC_CT,
-						"Query GID_FTrsp: "
-						"did:x%x flg:x%x cnt:%d",
-						Did, vport->fc_flag,
-						vport->fc_rscn_id_cnt);
-
-						/* This NPortID was previously
-						 * a FCP target, * Don't even
-						 * bother to send GFF_ID.
-						 */
-						ndlp = lpfc_findnode_did(vport,
-							Did);
-						if (ndlp &&
-						    NLP_CHK_NODE_ACT(ndlp)
-						    && (ndlp->nlp_type &
-						     NLP_FCP_TARGET))
-							lpfc_setup_disc_node
-								(vport, Did);
-						else if (lpfc_ns_cmd(vport,
-							SLI_CTNS_GFF_ID,
-							0, Did) == 0)
-							vport->num_disc_nodes++;
-						else
-							lpfc_setup_disc_node
-								(vport, Did);
-					}
-					else {
-						lpfc_debugfs_disc_trc(vport,
-						LPFC_DISC_TRC_CT,
-						"Skip2 GID_FTrsp: "
-						"did:x%x flg:x%x cnt:%d",
-						Did, vport->fc_flag,
-						vport->fc_rscn_id_cnt);
-
-						lpfc_printf_vlog(vport,
-							KERN_INFO,
-							LOG_DISCOVERY,
-							"0245 Skip x%x "
-							"NameServer Rsp Data: "
-							"x%x x%x\n",
-							Did, vport->fc_flag,
-							vport->fc_rscn_id_cnt);
-					}
-				}
-			}
+			     vport->cfg_peer_port_login))
+				lpfc_prep_node_fc4type(vport, Did, fc4_type);
+
 			if (CTentry & (cpu_to_be32(SLI_CT_LAST_ENTRY)))
 				goto nsout1;
+
 			Cnt -= sizeof(uint32_t);
 		}
 		ctptr = NULL;
@@ -609,16 +590,18 @@ lpfc_cmpl_ct_cmd_gid_ft(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
 	IOCB_t *irsp;
 	struct lpfc_dmabuf *outp;
+	struct lpfc_dmabuf *inp;
 	struct lpfc_sli_ct_request *CTrsp;
+	struct lpfc_sli_ct_request *CTreq;
 	struct lpfc_nodelist *ndlp;
-	int rc;
+	int rc, type;
 
 	/* First save ndlp, before we overwrite it */
 	ndlp = cmdiocb->context_un.ndlp;
 
 	/* we pass cmdiocb to state machine which needs rspiocb as well */
 	cmdiocb->context_un.rsp_iocb = rspiocb;
-
+	inp = (struct lpfc_dmabuf *) cmdiocb->context1;
 	outp = (struct lpfc_dmabuf *) cmdiocb->context2;
 	irsp = &rspiocb->iocb;
 
@@ -656,9 +639,14 @@ lpfc_cmpl_ct_cmd_gid_ft(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 			    IOERR_NO_RESOURCES)
 				vport->fc_ns_retry++;
 
+			type = lpfc_get_gidft_type(vport, cmdiocb);
+			if (type == 0)
+				goto out;
+
 			/* CT command is being retried */
+			vport->gidft_inp--;
 			rc = lpfc_ns_cmd(vport, SLI_CTNS_GID_FT,
-					 vport->fc_ns_retry, 0);
+					 vport->fc_ns_retry, type);
 			if (rc == 0)
 				goto out;
 		}
@@ -670,13 +658,18 @@ lpfc_cmpl_ct_cmd_gid_ft(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 				 irsp->ulpStatus, vport->fc_ns_retry);
 	} else {
 		/* Good status, continue checking */
+		CTreq = (struct lpfc_sli_ct_request *) inp->virt;
 		CTrsp = (struct lpfc_sli_ct_request *) outp->virt;
 		if (CTrsp->CommandResponse.bits.CmdRsp ==
 		    cpu_to_be16(SLI_CT_RESPONSE_FS_ACC)) {
 			lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
-					 "0208 NameServer Rsp Data: x%x\n",
-					 vport->fc_flag);
-			lpfc_ns_rsp(vport, outp,
+					 "0208 NameServer Rsp Data: x%x x%x\n",
+					 vport->fc_flag,
+					 CTreq->un.gid.Fc4Type);
+
+			lpfc_ns_rsp(vport,
+				    outp,
+				    CTreq->un.gid.Fc4Type,
 				    (uint32_t) (irsp->un.genreq64.bdl.bdeSize));
 		} else if (CTrsp->CommandResponse.bits.CmdRsp ==
 			   be16_to_cpu(SLI_CT_RESPONSE_FS_RJT)) {
@@ -731,9 +724,11 @@ lpfc_cmpl_ct_cmd_gid_ft(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 				(uint32_t) CTrsp->ReasonCode,
 				(uint32_t) CTrsp->Explanation);
 		}
+		vport->gidft_inp--;
 	}
 	/* Link up / RSCN discovery */
-	if (vport->num_disc_nodes == 0) {
+	if ((vport->num_disc_nodes == 0) &&
+	    (vport->gidft_inp == 0)) {
 		/*
 		 * The driver has cycled through all Nports in the RSCN payload.
 		 * Complete the handling by cleaning up and marking the
@@ -881,6 +876,56 @@ lpfc_cmpl_ct_cmd_gff_id(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 	return;
 }
 
+static void
+lpfc_cmpl_ct_cmd_gft_id(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
+				struct lpfc_iocbq *rspiocb)
+{
+	struct lpfc_vport *vport = cmdiocb->vport;
+	IOCB_t *irsp = &rspiocb->iocb;
+	struct lpfc_dmabuf *inp = (struct lpfc_dmabuf *)cmdiocb->context1;
+	struct lpfc_dmabuf *outp = (struct lpfc_dmabuf *)cmdiocb->context2;
+	struct lpfc_sli_ct_request *CTrsp;
+	int did;
+	struct lpfc_nodelist *ndlp;
+	uint32_t fc4_data_0, fc4_data_1;
+
+	did = ((struct lpfc_sli_ct_request *)inp->virt)->un.gft.PortId;
+	did = be32_to_cpu(did);
+
+	if (irsp->ulpStatus == IOSTAT_SUCCESS) {
+		/* Good status, continue checking */
+		CTrsp = (struct lpfc_sli_ct_request *)outp->virt;
+		fc4_data_0 = be32_to_cpu(CTrsp->un.gft_acc.fc4_types[0]);
+		fc4_data_1 = be32_to_cpu(CTrsp->un.gft_acc.fc4_types[1]);
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_DISCOVERY,
+				 "3062 DID x%06x GFT Wd0 x%08x Wd1 x%08x\n",
+				 did, fc4_data_0, fc4_data_1);
+
+		ndlp = lpfc_findnode_did(vport, did);
+		if (ndlp) {
+			/* The bitmask value for FCP and NVME FCP types is
+			 * the same because they are 32 bits distant from
+			 * each other in word0 and word0.
+			 */
+			if (fc4_data_0 & LPFC_FC4_TYPE_BITMASK)
+				ndlp->nlp_fc4_type |= NLP_FC4_FCP;
+			if (fc4_data_1 &  LPFC_FC4_TYPE_BITMASK)
+				ndlp->nlp_fc4_type |= NLP_FC4_NVME;
+			lpfc_printf_vlog(vport, KERN_ERR, LOG_DISCOVERY,
+					 "3064 Setting ndlp %p, DID x%06x with "
+					 "FC4 x%08x, Data: x%08x x%08x\n",
+					 ndlp, did, ndlp->nlp_fc4_type,
+					 FC_TYPE_FCP, FC_TYPE_NVME);
+		}
+		ndlp->nlp_prev_state = NLP_STE_REG_LOGIN_ISSUE;
+		lpfc_nlp_set_state(vport, ndlp, NLP_STE_PRLI_ISSUE);
+		lpfc_issue_els_prli(vport, ndlp, 0);
+	} else
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_DISCOVERY,
+				 "3065 GFT_ID failed x%08x\n", irsp->ulpStatus);
+
+	lpfc_ct_free_iocb(phba, cmdiocb);
+}
 
 static void
 lpfc_cmpl_ct(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
@@ -1071,31 +1116,27 @@ lpfc_cmpl_ct_cmd_rff_id(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 	return;
 }
 
+/*
+ * Although the symbolic port name is thought to be an integer
+ * as of January 18, 2016, leave it as a string until more of
+ * the record state becomes defined.
+ */
 int
 lpfc_vport_symbolic_port_name(struct lpfc_vport *vport, char *symbol,
 	size_t size)
 {
 	int n;
-	uint8_t *wwn = vport->phba->wwpn;
-
-	n = snprintf(symbol, size,
-		     "Emulex PPN-%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x",
-		     wwn[0], wwn[1], wwn[2], wwn[3],
-		     wwn[4], wwn[5], wwn[6], wwn[7]);
 
-	if (vport->port_type == LPFC_PHYSICAL_PORT)
-		return n;
-
-	if (n < size)
-		n += snprintf(symbol + n, size - n, " VPort-%d", vport->vpi);
-
-	if (n < size &&
-	    strlen(vport->fc_vport->symbolic_name))
-		n += snprintf(symbol + n, size - n, " VName-%s",
-			      vport->fc_vport->symbolic_name);
+	/*
+	 * Use the lpfc board number as the Symbolic Port
+	 * Name object.  NPIV is not in play so this integer
+	 * value is sufficient and unique per FC-ID.
+	 */
+	n = snprintf(symbol, size, "%d", vport->phba->brd_no);
 	return n;
 }
 
+
 int
 lpfc_vport_symbolic_node_name(struct lpfc_vport *vport, char *symbol,
 	size_t size)
@@ -1106,24 +1147,26 @@ lpfc_vport_symbolic_node_name(struct lpfc_vport *vport, char *symbol,
 	lpfc_decode_firmware_rev(vport->phba, fwrev, 0);
 
 	n = snprintf(symbol, size, "Emulex %s", vport->phba->ModelName);
-
 	if (size < n)
 		return n;
-	n += snprintf(symbol + n, size - n, " FV%s", fwrev);
 
+	n += snprintf(symbol + n, size - n, " FV%s", fwrev);
 	if (size < n)
 		return n;
-	n += snprintf(symbol + n, size - n, " DV%s", lpfc_release_version);
 
+	n += snprintf(symbol + n, size - n, " DV%s.",
+		      lpfc_release_version);
 	if (size < n)
 		return n;
-	n += snprintf(symbol + n, size - n, " HN:%s", init_utsname()->nodename);
 
-	/* Note :- OS name is "Linux" */
+	n += snprintf(symbol + n, size - n, " HN:%s.",
+		      init_utsname()->nodename);
 	if (size < n)
 		return n;
-	n += snprintf(symbol + n, size - n, " OS:%s", init_utsname()->sysname);
 
+	/* Note :- OS name is "Linux" */
+	n += snprintf(symbol + n, size - n, " OS:%s\n",
+		      init_utsname()->sysname);
 	return n;
 }
 
@@ -1147,6 +1190,27 @@ lpfc_find_map_node(struct lpfc_vport *vport)
 	return cnt;
 }
 
+/*
+ * This routine will return the FC4 Type associated with the CT
+ * GID_FT command.
+ */
+int
+lpfc_get_gidft_type(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb)
+{
+	struct lpfc_sli_ct_request *CtReq;
+	struct lpfc_dmabuf *mp;
+	uint32_t type;
+
+	mp = cmdiocb->context1;
+	if (mp == NULL)
+		return 0;
+	CtReq = (struct lpfc_sli_ct_request *)mp->virt;
+	type = (uint32_t)CtReq->un.gid.Fc4Type;
+	if ((type != SLI_CTPT_FCP) && (type != SLI_CTPT_NVME))
+		return 0;
+	return type;
+}
+
 /*
  * lpfc_ns_cmd
  * Description:
@@ -1207,8 +1271,9 @@ lpfc_ns_cmd(struct lpfc_vport *vport, int cmdcode,
 
 	/* NameServer Req */
 	lpfc_printf_vlog(vport, KERN_INFO ,LOG_DISCOVERY,
-			 "0236 NameServer Req Data: x%x x%x x%x\n",
-			 cmdcode, vport->fc_flag, vport->fc_rscn_id_cnt);
+			 "0236 NameServer Req Data: x%x x%x x%x x%x\n",
+			 cmdcode, vport->fc_flag, vport->fc_rscn_id_cnt,
+			 context);
 
 	bpl = (struct ulp_bde64 *) bmp->virt;
 	memset(bpl, 0, sizeof(struct ulp_bde64));
@@ -1219,6 +1284,8 @@ lpfc_ns_cmd(struct lpfc_vport *vport, int cmdcode,
 		bpl->tus.f.bdeSize = GID_REQUEST_SZ;
 	else if (cmdcode == SLI_CTNS_GFF_ID)
 		bpl->tus.f.bdeSize = GFF_REQUEST_SZ;
+	else if (cmdcode == SLI_CTNS_GFT_ID)
+		bpl->tus.f.bdeSize = GFT_REQUEST_SZ;
 	else if (cmdcode == SLI_CTNS_RFT_ID)
 		bpl->tus.f.bdeSize = RFT_REQUEST_SZ;
 	else if (cmdcode == SLI_CTNS_RNN_ID)
@@ -1246,7 +1313,8 @@ lpfc_ns_cmd(struct lpfc_vport *vport, int cmdcode,
 	case SLI_CTNS_GID_FT:
 		CtReq->CommandResponse.bits.CmdRsp =
 		    cpu_to_be16(SLI_CTNS_GID_FT);
-		CtReq->un.gid.Fc4Type = SLI_CTPT_FCP;
+		CtReq->un.gid.Fc4Type = context;
+
 		if (vport->port_state < LPFC_NS_QRY)
 			vport->port_state = LPFC_NS_QRY;
 		lpfc_set_disctmo(vport);
@@ -1261,12 +1329,32 @@ lpfc_ns_cmd(struct lpfc_vport *vport, int cmdcode,
 		cmpl = lpfc_cmpl_ct_cmd_gff_id;
 		break;
 
+	case SLI_CTNS_GFT_ID:
+		CtReq->CommandResponse.bits.CmdRsp =
+			cpu_to_be16(SLI_CTNS_GFT_ID);
+		CtReq->un.gft.PortId = cpu_to_be32(context);
+		cmpl = lpfc_cmpl_ct_cmd_gft_id;
+		break;
+
 	case SLI_CTNS_RFT_ID:
 		vport->ct_flags &= ~FC_CT_RFT_ID;
 		CtReq->CommandResponse.bits.CmdRsp =
 		    cpu_to_be16(SLI_CTNS_RFT_ID);
 		CtReq->un.rft.PortId = cpu_to_be32(vport->fc_myDID);
-		CtReq->un.rft.fcpReg = 1;
+
+		/* Register FC4 FCP type if enabled.  */
+		if ((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
+		    (phba->cfg_enable_fc4_type == LPFC_ENABLE_FCP))
+			CtReq->un.rft.fcpReg = 1;
+
+		/* Register NVME type if enabled.  Defined LE and swapped.
+		 * rsvd[0] is used as word1 because of the hard-coded
+		 * word0 usage in the ct_request data structure.
+		 */
+		if ((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
+		    (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME))
+			CtReq->un.rft.rsvd[0] = cpu_to_be32(0x00000100);
+
 		cmpl = lpfc_cmpl_ct_cmd_rft_id;
 		break;
 
@@ -1316,7 +1404,25 @@ lpfc_ns_cmd(struct lpfc_vport *vport, int cmdcode,
 		    cpu_to_be16(SLI_CTNS_RFF_ID);
 		CtReq->un.rff.PortId = cpu_to_be32(vport->fc_myDID);
 		CtReq->un.rff.fbits = FC4_FEATURE_INIT;
-		CtReq->un.rff.type_code = FC_TYPE_FCP;
+
+		/* The driver always supports FC_TYPE_FCP.  However, the
+		 * caller can specify NVME (type x28) as well.  But only
+		 * these that FC4 type is supported.
+		 */
+		if (((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
+		     (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME)) &&
+		    (context == FC_TYPE_NVME)) {
+			/* todo: init: revise localport nvme attributes */
+			CtReq->un.rff.type_code = context;
+
+		} else if (((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
+			    (phba->cfg_enable_fc4_type == LPFC_ENABLE_FCP)) &&
+			   (context == FC_TYPE_FCP))
+			CtReq->un.rff.type_code = context;
+
+		else
+			goto ns_cmd_free_bmpvirt;
+
 		cmpl = lpfc_cmpl_ct_cmd_rff_id;
 		break;
 	}
@@ -1337,6 +1443,7 @@ lpfc_ns_cmd(struct lpfc_vport *vport, int cmdcode,
 	 */
 	lpfc_nlp_put(ndlp);
 
+ns_cmd_free_bmpvirt:
 	lpfc_mbuf_free(phba, bmp->virt, bmp->phys);
 ns_cmd_free_bmp:
 	kfree(bmp);
diff --git a/drivers/scsi/lpfc/lpfc_disc.h b/drivers/scsi/lpfc/lpfc_disc.h
index 361f5b3d9d936b..13cc1d19b33653 100644
--- a/drivers/scsi/lpfc/lpfc_disc.h
+++ b/drivers/scsi/lpfc/lpfc_disc.h
@@ -86,6 +86,17 @@ struct lpfc_nodelist {
 #define NLP_FABRIC         0x4			/* entry rep a Fabric entity */
 #define NLP_FCP_TARGET     0x8			/* entry is an FCP target */
 #define NLP_FCP_INITIATOR  0x10			/* entry is an FCP Initiator */
+#define NLP_NVME_TARGET    0x20			/* entry is a NVME Target */
+#define NLP_NVME_INITIATOR 0x40			/* entry is a NVME Initiator */
+
+	uint16_t	nlp_fc4_type;		/* FC types node supports. */
+						/* Assigned from GID_FF, only
+						 * FCP (0x8) and NVME (0x28)
+						 * supported.
+						 */
+#define NLP_FC4_NONE	0x0
+#define NLP_FC4_FCP	0x1			/* FC4 Type FCP (value x8)) */
+#define NLP_FC4_NVME	0x2			/* FC4 TYPE NVME (value x28) */
 
 	uint16_t        nlp_rpi;
 	uint16_t        nlp_state;		/* state transition indicator */
@@ -107,8 +118,8 @@ struct lpfc_nodelist {
 
 	struct timer_list   nlp_delayfunc;	/* Used for delayed ELS cmds */
 	struct lpfc_hba *phba;
-	struct fc_rport *rport;			/* Corresponding FC transport
-						   port structure */
+	struct fc_rport *rport;		/* scsi_transport_fc port structure */
+	struct lpfc_nvme_rport *nrport;	/* nvme transport rport struct. */
 	struct lpfc_vport *vport;
 	struct lpfc_work_evt els_retry_evt;
 	struct lpfc_work_evt dev_loss_evt;
@@ -118,6 +129,10 @@ struct lpfc_nodelist {
 	unsigned long last_change_time;
 	unsigned long *active_rrqs_xri_bitmap;
 	struct lpfc_scsicmd_bkt *lat_data;	/* Latency data */
+	uint32_t fc4_prli_sent;
+	uint32_t upcall_flags;
+	uint32_t nvme_fb_size; /* NVME target's supported byte cnt */
+#define NVME_FB_BIT_SHIFT 9    /* PRLI Rsp first burst in 512B units. */
 };
 struct lpfc_node_rrq {
 	struct list_head list;
diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index 7be2354575405a..e164eed25e3d60 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -29,7 +29,6 @@
 #include <scsi/scsi_host.h>
 #include <scsi/scsi_transport_fc.h>
 
-
 #include "lpfc_hw4.h"
 #include "lpfc_hw.h"
 #include "lpfc_sli.h"
@@ -1513,7 +1512,7 @@ static struct lpfc_nodelist *
 lpfc_plogi_confirm_nport(struct lpfc_hba *phba, uint32_t *prsp,
 			 struct lpfc_nodelist *ndlp)
 {
-	struct lpfc_vport    *vport = ndlp->vport;
+	struct lpfc_vport *vport = ndlp->vport;
 	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
 	struct lpfc_nodelist *new_ndlp;
 	struct lpfc_rport_data *rdata;
@@ -1868,10 +1867,12 @@ lpfc_cmpl_els_plogi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 
 	/* PLOGI completes to NPort <nlp_DID> */
 	lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS,
-			 "0102 PLOGI completes to NPort x%x "
+			 "0102 PLOGI completes to NPort x%06x "
 			 "Data: x%x x%x x%x x%x x%x\n",
-			 ndlp->nlp_DID, irsp->ulpStatus, irsp->un.ulpWord[4],
-			 irsp->ulpTimeout, disc, vport->num_disc_nodes);
+			 ndlp->nlp_DID, ndlp->nlp_fc4_type,
+			 irsp->ulpStatus, irsp->un.ulpWord[4],
+			 disc, vport->num_disc_nodes);
+
 	/* Check to see if link went down during discovery */
 	if (lpfc_els_chk_latt(vport)) {
 		spin_lock_irq(shost->host_lock);
@@ -2000,7 +2001,6 @@ lpfc_issue_els_plogi(struct lpfc_vport *vport, uint32_t did, uint8_t retry)
 		sp->cmn.fcphHigh = FC_PH3;
 
 	sp->cmn.valid_vendor_ver_level = 0;
-	memset(sp->vendorVersion, 0, sizeof(sp->vendorVersion));
 
 	lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD,
 		"Issue PLOGI:     did:x%x",
@@ -2052,14 +2052,17 @@ lpfc_cmpl_els_prli(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 		"PRLI cmpl:       status:x%x/x%x did:x%x",
 		irsp->ulpStatus, irsp->un.ulpWord[4],
 		ndlp->nlp_DID);
+
+	/* Ddriver supports multiple FC4 types.  Counters matter. */
+	vport->fc_prli_sent--;
+
 	/* PRLI completes to NPort <nlp_DID> */
 	lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS,
-			 "0103 PRLI completes to NPort x%x "
+			 "0103 PRLI completes to NPort x%06x "
 			 "Data: x%x x%x x%x x%x\n",
 			 ndlp->nlp_DID, irsp->ulpStatus, irsp->un.ulpWord[4],
-			 irsp->ulpTimeout, vport->num_disc_nodes);
+			 vport->num_disc_nodes, ndlp->fc4_prli_sent);
 
-	vport->fc_prli_sent--;
 	/* Check to see if link went down during discovery */
 	if (lpfc_els_chk_latt(vport))
 		goto out;
@@ -2068,6 +2071,7 @@ lpfc_cmpl_els_prli(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 		/* Check for retry */
 		if (lpfc_els_retry(phba, cmdiocb, rspiocb)) {
 			/* ELS command is being retried */
+			ndlp->fc4_prli_sent--;
 			goto out;
 		}
 		/* PRLI failed */
@@ -2082,9 +2086,14 @@ lpfc_cmpl_els_prli(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 			lpfc_disc_state_machine(vport, ndlp, cmdiocb,
 						NLP_EVT_CMPL_PRLI);
 	} else
-		/* Good status, call state machine */
+		/* Good status, call state machine.  However, if another
+		 * PRLI is outstanding, don't call the state machine
+		 * because final disposition to Mapped or Unmapped is
+		 * completed there.
+		 */
 		lpfc_disc_state_machine(vport, ndlp, cmdiocb,
 					NLP_EVT_CMPL_PRLI);
+
 out:
 	lpfc_els_free_iocb(phba, cmdiocb);
 	return;
@@ -2118,42 +2127,94 @@ lpfc_issue_els_prli(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
 	struct lpfc_hba *phba = vport->phba;
 	PRLI *npr;
+	struct lpfc_nvme_prli *npr_nvme;
 	struct lpfc_iocbq *elsiocb;
 	uint8_t *pcmd;
 	uint16_t cmdsize;
-
-	cmdsize = (sizeof(uint32_t) + sizeof(PRLI));
+	u32 local_nlp_type, elscmd;
+
+	local_nlp_type = ndlp->nlp_fc4_type;
+
+ send_next_prli:
+	if (local_nlp_type & NLP_FC4_FCP) {
+		/* Payload is 4 + 16 = 20 x14 bytes. */
+		cmdsize = (sizeof(uint32_t) + sizeof(PRLI));
+		elscmd = ELS_CMD_PRLI;
+	} else if (local_nlp_type & NLP_FC4_NVME) {
+		/* Payload is 4 + 20 = 24 x18 bytes. */
+		cmdsize = (sizeof(uint32_t) + sizeof(struct lpfc_nvme_prli));
+		elscmd = ELS_CMD_NVMEPRLI;
+	} else {
+		lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
+				 "3083 Unknown FC_TYPE x%x ndlp x%06x\n",
+				 ndlp->nlp_fc4_type, ndlp->nlp_DID);
+		return 1;
+	}
 	elsiocb = lpfc_prep_els_iocb(vport, 1, cmdsize, retry, ndlp,
-				     ndlp->nlp_DID, ELS_CMD_PRLI);
+				     ndlp->nlp_DID, elscmd);
 	if (!elsiocb)
 		return 1;
 
 	pcmd = (uint8_t *) (((struct lpfc_dmabuf *) elsiocb->context2)->virt);
 
 	/* For PRLI request, remainder of payload is service parameters */
-	memset(pcmd, 0, (sizeof(PRLI) + sizeof(uint32_t)));
-	*((uint32_t *) (pcmd)) = ELS_CMD_PRLI;
-	pcmd += sizeof(uint32_t);
+	memset(pcmd, 0, cmdsize);
 
-	/* For PRLI, remainder of payload is PRLI parameter page */
-	npr = (PRLI *) pcmd;
-	/*
-	 * If our firmware version is 3.20 or later,
-	 * set the following bits for FC-TAPE support.
-	 */
-	if (phba->vpd.rev.feaLevelHigh >= 0x02) {
-		npr->ConfmComplAllowed = 1;
-		npr->Retry = 1;
-		npr->TaskRetryIdReq = 1;
-	}
-	npr->estabImagePair = 1;
-	npr->readXferRdyDis = 1;
-	 if (vport->cfg_first_burst_size)
-		npr->writeXferRdyDis = 1;
+	if (local_nlp_type & NLP_FC4_FCP) {
+		/* Remainder of payload is FCP PRLI parameter page.
+		 * Note: this data structure is defined as
+		 * BE/LE in the structure definition so no
+		 * byte swap call is made.
+		 */
+		*((uint32_t *)(pcmd)) = ELS_CMD_PRLI;
+		pcmd += sizeof(uint32_t);
+		npr = (PRLI *)pcmd;
 
-	/* For FCP support */
-	npr->prliType = PRLI_FCP_TYPE;
-	npr->initiatorFunc = 1;
+		/*
+		 * If our firmware version is 3.20 or later,
+		 * set the following bits for FC-TAPE support.
+		 */
+		if (phba->vpd.rev.feaLevelHigh >= 0x02) {
+			npr->ConfmComplAllowed = 1;
+			npr->Retry = 1;
+			npr->TaskRetryIdReq = 1;
+		}
+		npr->estabImagePair = 1;
+		npr->readXferRdyDis = 1;
+		if (vport->cfg_first_burst_size)
+			npr->writeXferRdyDis = 1;
+
+		/* For FCP support */
+		npr->prliType = PRLI_FCP_TYPE;
+		npr->initiatorFunc = 1;
+		elsiocb->iocb_flag |= LPFC_PRLI_FCP_REQ;
+
+		/* Remove FCP type - processed. */
+		local_nlp_type &= ~NLP_FC4_FCP;
+	} else if (local_nlp_type & NLP_FC4_NVME) {
+		/* Remainder of payload is NVME PRLI parameter page.
+		 * This data structure is the newer definition that
+		 * uses bf macros so a byte swap is required.
+		 */
+		*((uint32_t *)(pcmd)) = ELS_CMD_NVMEPRLI;
+		pcmd += sizeof(uint32_t);
+		npr_nvme = (struct lpfc_nvme_prli *)pcmd;
+		bf_set(prli_type_code, npr_nvme, PRLI_NVME_TYPE);
+		bf_set(prli_estabImagePair, npr_nvme, 0);  /* Should be 0 */
+
+		/* Only initiators request first burst. */
+		if ((phba->cfg_nvme_enable_fb) &&
+		    !phba->nvmet_support)
+			bf_set(prli_fba, npr_nvme, 1);
+
+		bf_set(prli_init, npr_nvme, 1);
+		npr_nvme->word1 = cpu_to_be32(npr_nvme->word1);
+		npr_nvme->word4 = cpu_to_be32(npr_nvme->word4);
+		elsiocb->iocb_flag |= LPFC_PRLI_NVME_REQ;
+
+		/* Remove NVME type - processed. */
+		local_nlp_type &= ~NLP_FC4_NVME;
+	}
 
 	lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD,
 		"Issue PRLI:      did:x%x",
@@ -2172,7 +2233,20 @@ lpfc_issue_els_prli(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 		lpfc_els_free_iocb(phba, elsiocb);
 		return 1;
 	}
+
+	/* The vport counters are used for lpfc_scan_finished, but
+	 * the ndlp is used to track outstanding PRLIs for different
+	 * FC4 types.
+	 */
 	vport->fc_prli_sent++;
+	ndlp->fc4_prli_sent++;
+
+	/* The driver supports 2 FC4 types.  Make sure
+	 * a PRLI is issued for all types before exiting.
+	 */
+	if (local_nlp_type & (NLP_FC4_FCP | NLP_FC4_NVME))
+		goto send_next_prli;
+
 	return 0;
 }
 
@@ -2543,6 +2617,9 @@ lpfc_cmpl_els_logo(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 	if ((vport->fc_flag & FC_PT2PT) &&
 		!(vport->fc_flag & FC_PT2PT_PLOGI)) {
 		phba->pport->fc_myDID = 0;
+
+		/* todo: init: revise localport nvme attributes */
+
 		mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
 		if (mbox) {
 			lpfc_config_link(phba, mbox);
@@ -3055,6 +3132,7 @@ lpfc_els_retry_delay_handler(struct lpfc_nodelist *ndlp)
 		}
 		break;
 	case ELS_CMD_PRLI:
+	case ELS_CMD_NVMEPRLI:
 		if (!lpfc_issue_els_prli(vport, ndlp, retry)) {
 			ndlp->nlp_prev_state = ndlp->nlp_state;
 			lpfc_nlp_set_state(vport, ndlp, NLP_STE_PRLI_ISSUE);
@@ -3245,7 +3323,8 @@ lpfc_els_retry(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 				break;
 			}
 			if ((cmd == ELS_CMD_PLOGI) ||
-			    (cmd == ELS_CMD_PRLI)) {
+			    (cmd == ELS_CMD_PRLI) ||
+			    (cmd == ELS_CMD_NVMEPRLI)) {
 				delay = 1000;
 				maxretry = lpfc_max_els_tries + 1;
 				retry = 1;
@@ -3265,7 +3344,8 @@ lpfc_els_retry(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 
 		case LSRJT_LOGICAL_BSY:
 			if ((cmd == ELS_CMD_PLOGI) ||
-			    (cmd == ELS_CMD_PRLI)) {
+			    (cmd == ELS_CMD_PRLI) ||
+			    (cmd == ELS_CMD_NVMEPRLI)) {
 				delay = 1000;
 				maxretry = 48;
 			} else if (cmd == ELS_CMD_FDISC) {
@@ -3399,7 +3479,8 @@ lpfc_els_retry(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 			spin_unlock_irq(shost->host_lock);
 
 			ndlp->nlp_prev_state = ndlp->nlp_state;
-			if (cmd == ELS_CMD_PRLI)
+			if ((cmd == ELS_CMD_PRLI) ||
+			    (cmd == ELS_CMD_NVMEPRLI))
 				lpfc_nlp_set_state(vport, ndlp,
 					NLP_STE_PRLI_ISSUE);
 			else
@@ -3430,6 +3511,7 @@ lpfc_els_retry(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 			lpfc_issue_els_adisc(vport, ndlp, cmdiocb->retry);
 			return 1;
 		case ELS_CMD_PRLI:
+		case ELS_CMD_NVMEPRLI:
 			ndlp->nlp_prev_state = ndlp->nlp_state;
 			lpfc_nlp_set_state(vport, ndlp, NLP_STE_PRLI_ISSUE);
 			lpfc_issue_els_prli(vport, ndlp, cmdiocb->retry);
@@ -3990,14 +4072,10 @@ lpfc_els_rsp_acc(struct lpfc_vport *vport, uint32_t flag,
 			       sizeof(struct lpfc_name));
 			memcpy(&sp->nodeName, &vport->fc_sparam.nodeName,
 			       sizeof(struct lpfc_name));
-		} else {
+		} else
 			memcpy(pcmd, &vport->fc_sparam,
 			       sizeof(struct serv_parm));
 
-			sp->cmn.valid_vendor_ver_level = 0;
-			memset(sp->vendorVersion, 0, sizeof(sp->vendorVersion));
-		}
-
 		lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_RSP,
 			"Issue ACC FLOGI/PLOGI: did:x%x flg:x%x",
 			ndlp->nlp_DID, ndlp->nlp_flag, 0);
@@ -4231,17 +4309,43 @@ lpfc_els_rsp_prli_acc(struct lpfc_vport *vport, struct lpfc_iocbq *oldiocb,
 {
 	struct lpfc_hba  *phba = vport->phba;
 	PRLI *npr;
+	struct lpfc_nvme_prli *npr_nvme;
 	lpfc_vpd_t *vpd;
 	IOCB_t *icmd;
 	IOCB_t *oldcmd;
 	struct lpfc_iocbq *elsiocb;
 	uint8_t *pcmd;
 	uint16_t cmdsize;
+	uint32_t prli_fc4_req, *req_payload;
+	struct lpfc_dmabuf *req_buf;
 	int rc;
+	u32 elsrspcmd;
+
+	/* Need the incoming PRLI payload to determine if the ACC is for an
+	 * FC4 or NVME PRLI type.  The PRLI type is at word 1.
+	 */
+	req_buf = (struct lpfc_dmabuf *)oldiocb->context2;
+	req_payload = (((uint32_t *)req_buf->virt) + 1);
+
+	/* PRLI type payload is at byte 3 for FCP or NVME. */
+	prli_fc4_req = be32_to_cpu(*req_payload);
+	prli_fc4_req = (prli_fc4_req >> 24) & 0xff;
+	lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS,
+			 "6127 PRLI_ACC:  Req Type x%x, Word1 x%08x\n",
+			 prli_fc4_req, *((uint32_t *)req_payload));
+
+	if (prli_fc4_req == PRLI_FCP_TYPE) {
+		cmdsize = sizeof(uint32_t) + sizeof(PRLI);
+		elsrspcmd = (ELS_CMD_ACC | (ELS_CMD_PRLI & ~ELS_RSP_MASK));
+	} else if (prli_fc4_req & PRLI_NVME_TYPE) {
+		cmdsize = sizeof(uint32_t) + sizeof(struct lpfc_nvme_prli);
+		elsrspcmd = (ELS_CMD_ACC | (ELS_CMD_NVMEPRLI & ~ELS_RSP_MASK));
+	} else {
+		return 1;
+	}
 
-	cmdsize = sizeof(uint32_t) + sizeof(PRLI);
 	elsiocb = lpfc_prep_els_iocb(vport, 0, cmdsize, oldiocb->retry, ndlp,
-		ndlp->nlp_DID, (ELS_CMD_ACC | (ELS_CMD_PRLI & ~ELS_RSP_MASK)));
+		ndlp->nlp_DID, elsrspcmd);
 	if (!elsiocb)
 		return 1;
 
@@ -4258,33 +4362,56 @@ lpfc_els_rsp_prli_acc(struct lpfc_vport *vport, struct lpfc_iocbq *oldiocb,
 			 ndlp->nlp_DID, ndlp->nlp_flag, ndlp->nlp_state,
 			 ndlp->nlp_rpi);
 	pcmd = (uint8_t *) (((struct lpfc_dmabuf *) elsiocb->context2)->virt);
+	memset(pcmd, 0, cmdsize);
 
 	*((uint32_t *) (pcmd)) = (ELS_CMD_ACC | (ELS_CMD_PRLI & ~ELS_RSP_MASK));
 	pcmd += sizeof(uint32_t);
 
 	/* For PRLI, remainder of payload is PRLI parameter page */
-	memset(pcmd, 0, sizeof(PRLI));
-
-	npr = (PRLI *) pcmd;
 	vpd = &phba->vpd;
-	/*
-	 * If the remote port is a target and our firmware version is 3.20 or
-	 * later, set the following bits for FC-TAPE support.
-	 */
-	if ((ndlp->nlp_type & NLP_FCP_TARGET) &&
-	    (vpd->rev.feaLevelHigh >= 0x02)) {
-		npr->ConfmComplAllowed = 1;
-		npr->Retry = 1;
-		npr->TaskRetryIdReq = 1;
-	}
-
-	npr->acceptRspCode = PRLI_REQ_EXECUTED;
-	npr->estabImagePair = 1;
-	npr->readXferRdyDis = 1;
-	npr->ConfmComplAllowed = 1;
 
-	npr->prliType = PRLI_FCP_TYPE;
-	npr->initiatorFunc = 1;
+	if (prli_fc4_req == PRLI_FCP_TYPE) {
+		/*
+		 * If the remote port is a target and our firmware version
+		 * is 3.20 or later, set the following bits for FC-TAPE
+		 * support.
+		 */
+		npr = (PRLI *) pcmd;
+		if ((ndlp->nlp_type & NLP_FCP_TARGET) &&
+		    (vpd->rev.feaLevelHigh >= 0x02)) {
+			npr->ConfmComplAllowed = 1;
+			npr->Retry = 1;
+			npr->TaskRetryIdReq = 1;
+		}
+		npr->acceptRspCode = PRLI_REQ_EXECUTED;
+		npr->estabImagePair = 1;
+		npr->readXferRdyDis = 1;
+		npr->ConfmComplAllowed = 1;
+		npr->prliType = PRLI_FCP_TYPE;
+		npr->initiatorFunc = 1;
+	} else if (prli_fc4_req & PRLI_NVME_TYPE) {
+		/* Respond with an NVME PRLI Type */
+		npr_nvme = (struct lpfc_nvme_prli *) pcmd;
+		bf_set(prli_type_code, npr_nvme, PRLI_NVME_TYPE);
+		bf_set(prli_estabImagePair, npr_nvme, 0);  /* Should be 0 */
+		bf_set(prli_acc_rsp_code, npr_nvme, PRLI_REQ_EXECUTED);
+		bf_set(prli_init, npr_nvme, 1);
+
+		lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC,
+				 "6015 NVME issue PRLI ACC word1 x%08x "
+				 "word4 x%08x word5 x%08x flag x%x, "
+				 "fcp_info x%x nlp_type x%x\n",
+				 npr_nvme->word1, npr_nvme->word4,
+				 npr_nvme->word5, ndlp->nlp_flag,
+				 ndlp->nlp_fcp_info, ndlp->nlp_type);
+		npr_nvme->word1 = cpu_to_be32(npr_nvme->word1);
+		npr_nvme->word4 = cpu_to_be32(npr_nvme->word4);
+		npr_nvme->word5 = cpu_to_be32(npr_nvme->word5);
+	} else
+		lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
+				 "6128 Unknown FC_TYPE x%x x%x ndlp x%06x\n",
+				 prli_fc4_req, ndlp->nlp_fc4_type,
+				 ndlp->nlp_DID);
 
 	lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_RSP,
 		"Issue ACC PRLI:  did:x%x flg:x%x",
@@ -4411,7 +4538,7 @@ lpfc_els_rsp_rnid_acc(struct lpfc_vport *vport, uint8_t format,
  **/
 static void
 lpfc_els_clear_rrq(struct lpfc_vport *vport,
-      struct lpfc_iocbq *iocb, struct lpfc_nodelist *ndlp)
+		   struct lpfc_iocbq *iocb, struct lpfc_nodelist *ndlp)
 {
 	struct lpfc_hba  *phba = vport->phba;
 	uint8_t *pcmd;
@@ -4909,7 +5036,7 @@ lpfc_rdp_res_opd_desc(struct fc_rdp_opd_sfp_desc *desc,
 	memcpy(desc->opd_info.vendor_name, &page_a0[SSF_VENDOR_NAME], 16);
 	memcpy(desc->opd_info.model_number, &page_a0[SSF_VENDOR_PN], 16);
 	memcpy(desc->opd_info.serial_number, &page_a0[SSF_VENDOR_SN], 16);
-	memcpy(desc->opd_info.revision, &page_a0[SSF_VENDOR_REV], 2);
+	memcpy(desc->opd_info.revision, &page_a0[SSF_VENDOR_REV], 4);
 	memcpy(desc->opd_info.date, &page_a0[SSF_DATE_CODE], 8);
 	desc->length = cpu_to_be32(sizeof(desc->opd_info));
 	return sizeof(struct fc_rdp_opd_sfp_desc);
@@ -5004,7 +5131,7 @@ lpfc_rdp_res_diag_port_names(struct fc_rdp_port_name_desc *desc,
 	memcpy(desc->port_names.wwnn, phba->wwnn,
 			sizeof(desc->port_names.wwnn));
 
-	memcpy(desc->port_names.wwpn, &phba->wwpn,
+	memcpy(desc->port_names.wwpn, phba->wwpn,
 			sizeof(desc->port_names.wwpn));
 
 	desc->length = cpu_to_be32(sizeof(desc->port_names));
@@ -5233,9 +5360,8 @@ lpfc_els_rcv_rdp(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb,
 	struct ls_rjt stat;
 
 	if (phba->sli_rev < LPFC_SLI_REV4 ||
-			(bf_get(lpfc_sli_intf_if_type,
-				&phba->sli4_hba.sli_intf) !=
-						LPFC_SLI_INTF_IF_TYPE_2)) {
+	    bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) !=
+						LPFC_SLI_INTF_IF_TYPE_2) {
 		rjt_err = LSRJT_UNABLE_TPC;
 		rjt_expl = LSEXP_REQ_UNSUPPORTED;
 		goto error;
@@ -5976,9 +6102,11 @@ lpfc_els_handle_rscn(struct lpfc_vport *vport)
 	if (ndlp && NLP_CHK_NODE_ACT(ndlp)
 	    && ndlp->nlp_state == NLP_STE_UNMAPPED_NODE) {
 		/* Good ndlp, issue CT Request to NameServer */
-		if (lpfc_ns_cmd(vport, SLI_CTNS_GID_FT, 0, 0) == 0)
+		vport->gidft_inp = 0;
+		if (lpfc_issue_gidft(vport) == 0)
 			/* Wait for NameServer query cmpl before we can
-			   continue */
+			 * continue
+			 */
 			return 1;
 	} else {
 		/* If login to NameServer does not exist, issue one */
@@ -6082,7 +6210,6 @@ lpfc_els_rcv_flogi(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb,
 
 	(void) lpfc_check_sparm(vport, ndlp, sp, CLASS3, 1);
 
-
 	/*
 	 * If our portname is greater than the remote portname,
 	 * then we initiate Nport login.
@@ -7779,6 +7906,7 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 		lpfc_els_rcv_fan(vport, elsiocb, ndlp);
 		break;
 	case ELS_CMD_PRLI:
+	case ELS_CMD_NVMEPRLI:
 		lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL,
 			"RCV PRLI:        did:x%x/ste:x%x flg:x%x",
 			did, vport->port_state, ndlp->nlp_flag);
@@ -8883,8 +9011,7 @@ lpfc_cmpl_fabric_iocb(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 			break;
 	}
 
-	if (atomic_read(&phba->fabric_iocb_count) == 0)
-		BUG();
+	BUG_ON(atomic_read(&phba->fabric_iocb_count) == 0);
 
 	cmdiocb->iocb_cmpl = cmdiocb->fabric_iocb_cmpl;
 	cmdiocb->fabric_iocb_cmpl = NULL;
@@ -8929,8 +9056,7 @@ lpfc_issue_fabric_iocb(struct lpfc_hba *phba, struct lpfc_iocbq *iocb)
 	int ready;
 	int ret;
 
-	if (atomic_read(&phba->fabric_iocb_count) > 1)
-		BUG();
+	BUG_ON(atomic_read(&phba->fabric_iocb_count) > 1);
 
 	spin_lock_irqsave(&phba->hbalock, iflags);
 	ready = atomic_read(&phba->fabric_iocb_count) == 0 &&
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index ede831d1f46761..6e64c8e8e44f5d 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -31,6 +31,9 @@
 #include <scsi/scsi_device.h>
 #include <scsi/scsi_host.h>
 #include <scsi/scsi_transport_fc.h>
+#include <scsi/fc/fc_fs.h>
+
+#include <linux/nvme-fc-driver.h>
 
 #include "lpfc_hw4.h"
 #include "lpfc_hw.h"
@@ -38,8 +41,9 @@
 #include "lpfc_disc.h"
 #include "lpfc_sli.h"
 #include "lpfc_sli4.h"
-#include "lpfc_scsi.h"
 #include "lpfc.h"
+#include "lpfc_scsi.h"
+#include "lpfc_nvme.h"
 #include "lpfc_logmsg.h"
 #include "lpfc_crtn.h"
 #include "lpfc_vport.h"
@@ -853,9 +857,12 @@ lpfc_port_link_failure(struct lpfc_vport *vport)
 void
 lpfc_linkdown_port(struct lpfc_vport *vport)
 {
+	struct lpfc_hba  *phba = vport->phba;
 	struct Scsi_Host  *shost = lpfc_shost_from_vport(vport);
 
-	fc_host_post_event(shost, fc_get_event_number(), FCH_EVT_LINKDOWN, 0);
+	if (phba->cfg_enable_fc4_type != LPFC_ENABLE_NVME)
+		fc_host_post_event(shost, fc_get_event_number(),
+				   FCH_EVT_LINKDOWN, 0);
 
 	lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD,
 		"Link Down:       state:x%x rtry:x%x flg:x%x",
@@ -981,7 +988,9 @@ lpfc_linkup_port(struct lpfc_vport *vport)
 		(vport != phba->pport))
 		return;
 
-	fc_host_post_event(shost, fc_get_event_number(), FCH_EVT_LINKUP, 0);
+	if (phba->cfg_enable_fc4_type != LPFC_ENABLE_NVME)
+		fc_host_post_event(shost, fc_get_event_number(),
+				   FCH_EVT_LINKUP, 0);
 
 	spin_lock_irq(shost->host_lock);
 	vport->fc_flag &= ~(FC_PT2PT | FC_PT2PT_PLOGI | FC_ABORT_DISCOVERY |
@@ -3570,6 +3579,8 @@ lpfc_mbx_cmpl_reg_vpi(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 		vport->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP);
 		spin_unlock_irq(shost->host_lock);
 		vport->fc_myDID = 0;
+
+		/* todo: init: revise localport nvme attributes */
 		goto out;
 	}
 
@@ -3819,6 +3830,52 @@ lpfc_mbx_cmpl_fabric_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 	return;
 }
 
+ /*
+  * This routine will issue a GID_FT for each FC4 Type supported
+  * by the driver. ALL GID_FTs must complete before discovery is started.
+  */
+int
+lpfc_issue_gidft(struct lpfc_vport *vport)
+{
+	struct lpfc_hba *phba = vport->phba;
+
+	/* Good status, issue CT Request to NameServer */
+	if ((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
+	    (phba->cfg_enable_fc4_type == LPFC_ENABLE_FCP)) {
+		if (lpfc_ns_cmd(vport, SLI_CTNS_GID_FT, 0, SLI_CTPT_FCP)) {
+			/* Cannot issue NameServer FCP Query, so finish up
+			 * discovery
+			 */
+			lpfc_printf_vlog(vport, KERN_ERR, LOG_SLI,
+					 "0604 %s FC TYPE %x %s\n",
+					 "Failed to issue GID_FT to ",
+					 FC_TYPE_FCP,
+					 "Finishing discovery.");
+			return 0;
+		}
+		vport->gidft_inp++;
+	}
+
+	if ((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
+	    (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME)) {
+		if (lpfc_ns_cmd(vport, SLI_CTNS_GID_FT, 0, SLI_CTPT_NVME)) {
+			/* Cannot issue NameServer NVME Query, so finish up
+			 * discovery
+			 */
+			lpfc_printf_vlog(vport, KERN_ERR, LOG_SLI,
+					 "0605 %s FC_TYPE %x %s %d\n",
+					 "Failed to issue GID_FT to ",
+					 FC_TYPE_NVME,
+					 "Finishing discovery: gidftinp ",
+					 vport->gidft_inp);
+			if (vport->gidft_inp == 0)
+				return 0;
+		} else
+			vport->gidft_inp++;
+	}
+	return vport->gidft_inp;
+}
+
 /*
  * This routine handles processing a NameServer REG_LOGIN mailbox
  * command upon completion. It is setup in the LPFC_MBOXQ
@@ -3835,12 +3892,14 @@ lpfc_mbx_cmpl_ns_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 
 	pmb->context1 = NULL;
 	pmb->context2 = NULL;
+	vport->gidft_inp = 0;
 
 	if (mb->mbxStatus) {
-out:
 		lpfc_printf_vlog(vport, KERN_ERR, LOG_ELS,
 				 "0260 Register NameServer error: 0x%x\n",
 				 mb->mbxStatus);
+
+out:
 		/* decrement the node reference count held for this
 		 * callback function.
 		 */
@@ -3884,20 +3943,28 @@ lpfc_mbx_cmpl_ns_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 		lpfc_ns_cmd(vport, SLI_CTNS_RSNN_NN, 0, 0);
 		lpfc_ns_cmd(vport, SLI_CTNS_RSPN_ID, 0, 0);
 		lpfc_ns_cmd(vport, SLI_CTNS_RFT_ID, 0, 0);
-		lpfc_ns_cmd(vport, SLI_CTNS_RFF_ID, 0, 0);
+
+		if ((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
+		    (phba->cfg_enable_fc4_type == LPFC_ENABLE_FCP))
+			lpfc_ns_cmd(vport, SLI_CTNS_RFF_ID, 0, FC_TYPE_FCP);
+
+		if ((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
+		    (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME))
+			lpfc_ns_cmd(vport, SLI_CTNS_RFF_ID, 0, FC_TYPE_NVME);
 
 		/* Issue SCR just before NameServer GID_FT Query */
 		lpfc_issue_els_scr(vport, SCR_DID, 0);
 	}
 
 	vport->fc_ns_retry = 0;
-	/* Good status, issue CT Request to NameServer */
-	if (lpfc_ns_cmd(vport, SLI_CTNS_GID_FT, 0, 0)) {
-		/* Cannot issue NameServer Query, so finish up discovery */
+	if (lpfc_issue_gidft(vport) == 0)
 		goto out;
-	}
 
-	/* decrement the node reference count held for this
+	/*
+	 * At this point in time we may need to wait for multiple
+	 * SLI_CTNS_GID_FT CT commands to complete before we start discovery.
+	 *
+	 * decrement the node reference count held for this
 	 * callback function.
 	 */
 	lpfc_nlp_put(ndlp);
@@ -3990,6 +4057,10 @@ lpfc_unregister_remote_port(struct lpfc_nodelist *ndlp)
 {
 	struct fc_rport *rport = ndlp->rport;
 	struct lpfc_vport *vport = ndlp->vport;
+	struct lpfc_hba  *phba = vport->phba;
+
+	if (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME)
+		return;
 
 	lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_RPORT,
 		"rport delete:    did:x%x flg:x%x type x%x",
@@ -4047,6 +4118,7 @@ lpfc_nlp_state_cleanup(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 		       int old_state, int new_state)
 {
 	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
+	struct lpfc_hba *phba = vport->phba;
 
 	if (new_state == NLP_STE_UNMAPPED_NODE) {
 		ndlp->nlp_flag &= ~NLP_NODEV_REMOVE;
@@ -4057,23 +4129,51 @@ lpfc_nlp_state_cleanup(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 	if (new_state == NLP_STE_NPR_NODE)
 		ndlp->nlp_flag &= ~NLP_RCV_PLOGI;
 
-	/* Transport interface */
-	if (ndlp->rport && (old_state == NLP_STE_MAPPED_NODE ||
-			    old_state == NLP_STE_UNMAPPED_NODE)) {
-		vport->phba->nport_event_cnt++;
-		lpfc_unregister_remote_port(ndlp);
+	/* FCP and NVME Transport interface */
+	if ((old_state == NLP_STE_MAPPED_NODE ||
+	     old_state == NLP_STE_UNMAPPED_NODE)) {
+		if (ndlp->rport) {
+			vport->phba->nport_event_cnt++;
+			lpfc_unregister_remote_port(ndlp);
+		}
+
+		/* Notify the NVME transport of this rport's loss */
+		if (((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
+		     (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME)) &&
+		    (vport->phba->nvmet_support == 0) &&
+		    ((ndlp->nlp_fc4_type & NLP_FC4_NVME) ||
+		    (ndlp->nlp_DID == Fabric_DID))) {
+			vport->phba->nport_event_cnt++;
+			/* todo: init: unregister rport from nvme */
+		}
 	}
 
+	/* FCP and NVME Transport interfaces */
+
 	if (new_state ==  NLP_STE_MAPPED_NODE ||
 	    new_state == NLP_STE_UNMAPPED_NODE) {
-		vport->phba->nport_event_cnt++;
-		/*
-		 * Tell the fc transport about the port, if we haven't
-		 * already. If we have, and it's a scsi entity, be
-		 * sure to unblock any attached scsi devices
-		 */
-		lpfc_register_remote_port(vport, ndlp);
+		if ((ndlp->nlp_fc4_type & NLP_FC4_FCP) ||
+		    (ndlp->nlp_DID == Fabric_DID)) {
+			vport->phba->nport_event_cnt++;
+			/*
+			 * Tell the fc transport about the port, if we haven't
+			 * already. If we have, and it's a scsi entity, be
+			 */
+			lpfc_register_remote_port(vport, ndlp);
+		}
+		/* Notify the NVME transport of this new rport. */
+		if (ndlp->nlp_fc4_type & NLP_FC4_NVME) {
+			if (vport->phba->nvmet_support == 0) {
+				/* Register this rport with the transport.
+				 * Initiators take the NDLP ref count in
+				 * the register.
+				 */
+				vport->phba->nport_event_cnt++;
+				/* todo: init: register rport with nvme */
+			}
+		}
 	}
+
 	if ((new_state ==  NLP_STE_MAPPED_NODE) &&
 		(vport->stat_data_enabled)) {
 		/*
@@ -4091,12 +4191,13 @@ lpfc_nlp_state_cleanup(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 				"0x%x\n", ndlp->nlp_DID);
 	}
 	/*
-	 * if we added to Mapped list, but the remote port
-	 * registration failed or assigned a target id outside
-	 * our presentable range - move the node to the
-	 * Unmapped List
+	 * If the node just added to Mapped list was an FCP target,
+	 * but the remote port registration failed or assigned a target
+	 * id outside the presentable range - move the node to the
+	 * Unmapped List.
 	 */
-	if (new_state == NLP_STE_MAPPED_NODE &&
+	if ((new_state == NLP_STE_MAPPED_NODE) &&
+	    (ndlp->nlp_type & NLP_FCP_TARGET) &&
 	    (!ndlp->rport ||
 	     ndlp->rport->scsi_target_id == -1 ||
 	     ndlp->rport->scsi_target_id >= LPFC_MAX_TARGET)) {
@@ -4230,6 +4331,7 @@ lpfc_initialize_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 	ndlp->vport = vport;
 	ndlp->phba = vport->phba;
 	ndlp->nlp_sid = NLP_NO_SID;
+	ndlp->nlp_fc4_type = NLP_FC4_NONE;
 	kref_init(&ndlp->kref);
 	NLP_INT_NODE_ACT(ndlp);
 	atomic_set(&ndlp->cmd_pending, 0);
@@ -5369,12 +5471,13 @@ lpfc_disc_timeout_handler(struct lpfc_vport *vport)
 	switch (vport->port_state) {
 
 	case LPFC_LOCAL_CFG_LINK:
-	/* port_state is identically  LPFC_LOCAL_CFG_LINK while waiting for
-	 * FAN
-	 */
-				/* FAN timeout */
+		/*
+		 * port_state is identically  LPFC_LOCAL_CFG_LINK while
+		 * waiting for FAN timeout
+		 */
 		lpfc_printf_vlog(vport, KERN_WARNING, LOG_DISCOVERY,
 				 "0221 FAN timeout\n");
+
 		/* Start discovery by sending FLOGI, clean up old rpis */
 		list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes,
 					 nlp_listp) {
@@ -5445,8 +5548,8 @@ lpfc_disc_timeout_handler(struct lpfc_vport *vport)
 		if (vport->fc_ns_retry < LPFC_MAX_NS_RETRY) {
 			/* Try it one more time */
 			vport->fc_ns_retry++;
-			rc = lpfc_ns_cmd(vport, SLI_CTNS_GID_FT,
-					 vport->fc_ns_retry, 0);
+			vport->gidft_inp = 0;
+			rc = lpfc_issue_gidft(vport);
 			if (rc == 0)
 				break;
 		}
diff --git a/drivers/scsi/lpfc/lpfc_hw.h b/drivers/scsi/lpfc/lpfc_hw.h
index 28247c99b4f220..883e6d2a7bc7bd 100644
--- a/drivers/scsi/lpfc/lpfc_hw.h
+++ b/drivers/scsi/lpfc/lpfc_hw.h
@@ -90,8 +90,10 @@ union CtCommandResponse {
 	uint32_t word;
 };
 
-#define FC4_FEATURE_INIT 0x2
-#define FC4_FEATURE_TARGET 0x1
+/* FC4 Feature bits for RFF_ID */
+#define FC4_FEATURE_TARGET	0x1
+#define FC4_FEATURE_INIT	0x2
+#define FC4_FEATURE_NVME_DISC	0x4
 
 struct lpfc_sli_ct_request {
 	/* Structure is in Big Endian format */
@@ -115,6 +117,16 @@ struct lpfc_sli_ct_request {
 			uint8_t AreaScope;
 			uint8_t Fc4Type;	/* for GID_FT requests */
 		} gid;
+		struct gid_ff {
+			uint8_t Flags;
+			uint8_t DomainScope;
+			uint8_t AreaScope;
+			uint8_t rsvd1;
+			uint8_t rsvd2;
+			uint8_t rsvd3;
+			uint8_t Fc4FBits;
+			uint8_t Fc4Type;
+		} gid_ff;
 		struct rft {
 			uint32_t PortId;	/* For RFT_ID requests */
 
@@ -159,6 +171,12 @@ struct lpfc_sli_ct_request {
 		struct gff_acc {
 			uint8_t fbits[128];
 		} gff_acc;
+		struct gft {
+			uint32_t PortId;
+		} gft;
+		struct gft_acc {
+			uint32_t fc4_types[8];
+		} gft_acc;
 #define FCP_TYPE_FEATURE_OFFSET 7
 		struct rff {
 			uint32_t PortId;
@@ -174,8 +192,12 @@ struct lpfc_sli_ct_request {
 #define  SLI_CT_REVISION        1
 #define  GID_REQUEST_SZ   (offsetof(struct lpfc_sli_ct_request, un) + \
 			   sizeof(struct gid))
+#define  GIDFF_REQUEST_SZ (offsetof(struct lpfc_sli_ct_request, un) + \
+			   sizeof(struct gid_ff))
 #define  GFF_REQUEST_SZ   (offsetof(struct lpfc_sli_ct_request, un) + \
 			   sizeof(struct gff))
+#define  GFT_REQUEST_SZ   (offsetof(struct lpfc_sli_ct_request, un) + \
+			   sizeof(struct gft))
 #define  RFT_REQUEST_SZ   (offsetof(struct lpfc_sli_ct_request, un) + \
 			   sizeof(struct rft))
 #define  RFF_REQUEST_SZ   (offsetof(struct lpfc_sli_ct_request, un) + \
@@ -271,6 +293,7 @@ struct lpfc_sli_ct_request {
 #define  SLI_CTNS_GNN_IP      0x0153
 #define  SLI_CTNS_GIPA_IP     0x0156
 #define  SLI_CTNS_GID_FT      0x0171
+#define  SLI_CTNS_GID_FF      0x01F1
 #define  SLI_CTNS_GID_PT      0x01A1
 #define  SLI_CTNS_RPN_ID      0x0212
 #define  SLI_CTNS_RNN_ID      0x0213
@@ -288,15 +311,16 @@ struct lpfc_sli_ct_request {
  * Port Types
  */
 
-#define  SLI_CTPT_N_PORT      0x01
-#define  SLI_CTPT_NL_PORT     0x02
-#define  SLI_CTPT_FNL_PORT    0x03
-#define  SLI_CTPT_IP          0x04
-#define  SLI_CTPT_FCP         0x08
-#define  SLI_CTPT_NX_PORT     0x7F
-#define  SLI_CTPT_F_PORT      0x81
-#define  SLI_CTPT_FL_PORT     0x82
-#define  SLI_CTPT_E_PORT      0x84
+#define SLI_CTPT_N_PORT		0x01
+#define SLI_CTPT_NL_PORT	0x02
+#define SLI_CTPT_FNL_PORT	0x03
+#define SLI_CTPT_IP		0x04
+#define SLI_CTPT_FCP		0x08
+#define SLI_CTPT_NVME		0x28
+#define SLI_CTPT_NX_PORT	0x7F
+#define SLI_CTPT_F_PORT		0x81
+#define SLI_CTPT_FL_PORT	0x82
+#define SLI_CTPT_E_PORT		0x84
 
 #define SLI_CT_LAST_ENTRY     0x80000000
 
@@ -337,6 +361,7 @@ struct lpfc_name {
 			uint8_t IEEE[6];	/* FC IEEE address */
 		} s;
 		uint8_t wwn[8];
+		uint64_t name;
 	} u;
 };
 
@@ -549,6 +574,7 @@ struct fc_vft_header {
 #define ELS_CMD_REC       0x13000000
 #define ELS_CMD_RDP       0x18000000
 #define ELS_CMD_PRLI      0x20100014
+#define ELS_CMD_NVMEPRLI  0x20140018
 #define ELS_CMD_PRLO      0x21100014
 #define ELS_CMD_PRLO_ACC  0x02100014
 #define ELS_CMD_PDISC     0x50000000
@@ -588,6 +614,7 @@ struct fc_vft_header {
 #define ELS_CMD_REC       0x13
 #define ELS_CMD_RDP	  0x18
 #define ELS_CMD_PRLI      0x14001020
+#define ELS_CMD_NVMEPRLI  0x18001420
 #define ELS_CMD_PRLO      0x14001021
 #define ELS_CMD_PRLO_ACC  0x14001002
 #define ELS_CMD_PDISC     0x50
@@ -684,6 +711,7 @@ typedef struct _PRLI {		/* Structure is in Big Endian format */
 	uint8_t prliType;	/* FC Parm Word 0, bit 24:31 */
 
 #define PRLI_FCP_TYPE 0x08
+#define PRLI_NVME_TYPE 0x28
 	uint8_t word0Reserved1;	/* FC Parm Word 0, bit 16:23 */
 
 #ifdef __BIG_ENDIAN_BITFIELD
@@ -1243,8 +1271,7 @@ struct fc_rdp_opd_sfp_info {
 	uint8_t            vendor_name[16];
 	uint8_t            model_number[16];
 	uint8_t            serial_number[16];
-	uint8_t            revision[2];
-	uint8_t            reserved[2];
+	uint8_t            revision[4];
 	uint8_t            date[8];
 };
 
@@ -1263,14 +1290,14 @@ struct fc_rdp_req_frame {
 
 
 struct fc_rdp_res_frame {
-	uint32_t	reply_sequence;		/* FC word0 LS_ACC or LS_RJT */
-	uint32_t	length;			/* FC Word 1      */
-	struct fc_rdp_link_service_desc link_service_desc;    /* Word 2 -4  */
-	struct fc_rdp_sfp_desc sfp_desc;                      /* Word 5 -9  */
-	struct fc_rdp_port_speed_desc portspeed_desc;         /* Word 10-12 */
-	struct fc_rdp_link_error_status_desc link_error_desc; /* Word 13-21 */
-	struct fc_rdp_port_name_desc diag_port_names_desc;    /* Word 22-27 */
-	struct fc_rdp_port_name_desc attached_port_names_desc;/* Word 28-33 */
+	uint32_t    reply_sequence;		/* FC word0 LS_ACC or LS_RJT */
+	uint32_t   length;			/* FC Word 1      */
+	struct fc_rdp_link_service_desc link_service_desc;    /* Word 2 -4   */
+	struct fc_rdp_sfp_desc sfp_desc;                      /* Word 5 -9   */
+	struct fc_rdp_port_speed_desc portspeed_desc;         /* Word 10 -12 */
+	struct fc_rdp_link_error_status_desc link_error_desc; /* Word 13 -21 */
+	struct fc_rdp_port_name_desc diag_port_names_desc;    /* Word 22 -27 */
+	struct fc_rdp_port_name_desc attached_port_names_desc;/* Word 28 -33 */
 	struct fc_fec_rdp_desc fec_desc;                      /* FC word 34-37*/
 	struct fc_rdp_bbc_desc bbc_desc;                      /* FC Word 38-42*/
 	struct fc_rdp_oed_sfp_desc oed_temp_desc;             /* FC Word 43-47*/
diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h
index c3277c5312c94c..fcc083cc00e0f5 100644
--- a/drivers/scsi/lpfc/lpfc_hw4.h
+++ b/drivers/scsi/lpfc/lpfc_hw4.h
@@ -3922,6 +3922,49 @@ struct gen_req64_wqe {
 	uint32_t max_response_payload_len;
 };
 
+/* Define NVME PRLI request to fabric. NVME is a
+ * fabric-only protocol.
+ * Updated to red-lined v1.08 on Sept 16, 2016
+ */
+struct lpfc_nvme_prli {
+	uint32_t word1;
+	/* The Response Code is defined in the FCP PRLI lpfc_hw.h */
+#define prli_acc_rsp_code_SHIFT         8
+#define prli_acc_rsp_code_MASK          0x0000000f
+#define prli_acc_rsp_code_WORD          word1
+#define prli_estabImagePair_SHIFT       13
+#define prli_estabImagePair_MASK        0x00000001
+#define prli_estabImagePair_WORD        word1
+#define prli_type_code_ext_SHIFT        16
+#define prli_type_code_ext_MASK         0x000000ff
+#define prli_type_code_ext_WORD         word1
+#define prli_type_code_SHIFT            24
+#define prli_type_code_MASK             0x000000ff
+#define prli_type_code_WORD             word1
+	uint32_t word_rsvd2;
+	uint32_t word_rsvd3;
+	uint32_t word4;
+#define prli_fba_SHIFT                  0
+#define prli_fba_MASK                   0x00000001
+#define prli_fba_WORD                   word4
+#define prli_disc_SHIFT                 3
+#define prli_disc_MASK                  0x00000001
+#define prli_disc_WORD                  word4
+#define prli_tgt_SHIFT                  4
+#define prli_tgt_MASK                   0x00000001
+#define prli_tgt_WORD                   word4
+#define prli_init_SHIFT                 5
+#define prli_init_MASK                  0x00000001
+#define prli_init_WORD                  word4
+#define prli_recov_SHIFT                8
+#define prli_recov_MASK                 0x00000001
+#define prli_recov_WORD                 word4
+	uint32_t word5;
+#define prli_fb_sz_SHIFT                0
+#define prli_fb_sz_MASK                 0x0000ffff
+#define prli_fb_sz_WORD                 word5
+};
+
 struct create_xri_wqe {
 	uint32_t rsrvd[5];           /* words 0-4 */
 	struct wqe_did	wqe_dest;  /* word 5 */
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 57087ba4834fa1..e609afaa472a0f 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -2667,6 +2667,13 @@ lpfc_cleanup(struct lpfc_vport *vport)
 			lpfc_disc_state_machine(vport, ndlp, NULL,
 					NLP_EVT_DEVICE_RECOVERY);
 
+		if (ndlp->nlp_fc4_type & NLP_FC4_NVME) {
+			/* Remove the NVME transport reference now and
+			 * continue to remove the node.
+			 */
+			lpfc_nlp_put(ndlp);
+		}
+
 		lpfc_disc_state_machine(vport, ndlp, NULL,
 					     NLP_EVT_DEVICE_RM);
 	}
diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c
index 835ea9f78219c3..65e7b2433ee794 100644
--- a/drivers/scsi/lpfc/lpfc_nportdisc.c
+++ b/drivers/scsi/lpfc/lpfc_nportdisc.c
@@ -28,6 +28,9 @@
 #include <scsi/scsi_device.h>
 #include <scsi/scsi_host.h>
 #include <scsi/scsi_transport_fc.h>
+#include <scsi/fc/fc_fs.h>
+
+#include <linux/nvme-fc-driver.h>
 
 #include "lpfc_hw4.h"
 #include "lpfc_hw.h"
@@ -35,8 +38,9 @@
 #include "lpfc_sli4.h"
 #include "lpfc_nl.h"
 #include "lpfc_disc.h"
-#include "lpfc_scsi.h"
 #include "lpfc.h"
+#include "lpfc_scsi.h"
+#include "lpfc_nvme.h"
 #include "lpfc_logmsg.h"
 #include "lpfc_crtn.h"
 #include "lpfc_vport.h"
@@ -708,6 +712,7 @@ static void
 lpfc_rcv_prli(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 	      struct lpfc_iocbq *cmdiocb)
 {
+	struct lpfc_hba  *phba = vport->phba;
 	struct lpfc_dmabuf *pcmd;
 	uint32_t *lp;
 	PRLI *npr;
@@ -721,11 +726,19 @@ lpfc_rcv_prli(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 	ndlp->nlp_type &= ~(NLP_FCP_TARGET | NLP_FCP_INITIATOR);
 	ndlp->nlp_fcp_info &= ~NLP_FCP_2_DEVICE;
 	ndlp->nlp_flag &= ~NLP_FIRSTBURST;
-	if (npr->prliType == PRLI_FCP_TYPE) {
-		if (npr->initiatorFunc)
-			ndlp->nlp_type |= NLP_FCP_INITIATOR;
+	if ((npr->prliType == PRLI_FCP_TYPE) ||
+	    (npr->prliType == PRLI_NVME_TYPE)) {
+		if (npr->initiatorFunc) {
+			if (npr->prliType == PRLI_FCP_TYPE)
+				ndlp->nlp_type |= NLP_FCP_INITIATOR;
+			if (npr->prliType == PRLI_NVME_TYPE)
+				ndlp->nlp_type |= NLP_NVME_INITIATOR;
+		}
 		if (npr->targetFunc) {
-			ndlp->nlp_type |= NLP_FCP_TARGET;
+			if (npr->prliType == PRLI_FCP_TYPE)
+				ndlp->nlp_type |= NLP_FCP_TARGET;
+			if (npr->prliType == PRLI_NVME_TYPE)
+				ndlp->nlp_type |= NLP_NVME_TARGET;
 			if (npr->writeXferRdyDis)
 				ndlp->nlp_flag |= NLP_FIRSTBURST;
 		}
@@ -744,7 +757,8 @@ lpfc_rcv_prli(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 			"rport rolechg:   role:x%x did:x%x flg:x%x",
 			roles, ndlp->nlp_DID, ndlp->nlp_flag);
 
-		fc_remote_port_rolechg(rport, roles);
+		if (phba->cfg_enable_fc4_type != LPFC_ENABLE_NVME)
+			fc_remote_port_rolechg(rport, roles);
 	}
 }
 
@@ -1491,7 +1505,9 @@ lpfc_rcv_prli_reglogin_issue(struct lpfc_vport *vport,
 {
 	struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg;
 
+	/* Initiator mode. */
 	lpfc_els_rsp_prli_acc(vport, cmdiocb, ndlp);
+
 	return ndlp->nlp_state;
 }
 
@@ -1574,9 +1590,11 @@ lpfc_cmpl_reglogin_reglogin_issue(struct lpfc_vport *vport,
 				  uint32_t evt)
 {
 	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
+	struct lpfc_hba *phba = vport->phba;
 	LPFC_MBOXQ_t *pmb = (LPFC_MBOXQ_t *) arg;
 	MAILBOX_t *mb = &pmb->u.mb;
 	uint32_t did  = mb->un.varWords[1];
+	int rc = 0;
 
 	if (mb->mbxStatus) {
 		/* RegLogin failed */
@@ -1611,19 +1629,52 @@ lpfc_cmpl_reglogin_reglogin_issue(struct lpfc_vport *vport,
 	}
 
 	/* SLI4 ports have preallocated logical rpis. */
-	if (vport->phba->sli_rev < LPFC_SLI_REV4)
+	if (phba->sli_rev < LPFC_SLI_REV4)
 		ndlp->nlp_rpi = mb->un.varWords[0];
 
 	ndlp->nlp_flag |= NLP_RPI_REGISTERED;
 
 	/* Only if we are not a fabric nport do we issue PRLI */
-	if (!(ndlp->nlp_type & NLP_FABRIC)) {
+	lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
+			 "3066 RegLogin Complete on x%x x%x x%x\n",
+			 did, ndlp->nlp_type, ndlp->nlp_fc4_type);
+	if (!(ndlp->nlp_type & NLP_FABRIC) &&
+	    (phba->nvmet_support == 0)) {
+		/* The driver supports FCP and NVME concurrently.  If the
+		 * ndlp's nlp_fc4_type is still zero, the driver doesn't
+		 * know what PRLI to send yet.  Figure that out now and
+		 * call PRLI depending on the outcome.
+		 */
+		if (vport->fc_flag & FC_PT2PT) {
+			/* If we are pt2pt, there is no Fabric to determine
+			 * the FC4 type of the remote nport. So if NVME
+			 * is configured try it.
+			 */
+			ndlp->nlp_fc4_type |= NLP_FC4_FCP;
+			if ((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
+			     (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME)) {
+				ndlp->nlp_fc4_type |= NLP_FC4_NVME;
+				/* We need to update the localport also */
+				/* todo: init: revise localport nvme
+				 * attributes
+				 */
+			}
+
+		} else if (ndlp->nlp_fc4_type == 0) {
+			rc = lpfc_ns_cmd(vport, SLI_CTNS_GFT_ID,
+					 0, ndlp->nlp_DID);
+			return ndlp->nlp_state;
+		}
+
 		ndlp->nlp_prev_state = NLP_STE_REG_LOGIN_ISSUE;
 		lpfc_nlp_set_state(vport, ndlp, NLP_STE_PRLI_ISSUE);
 		lpfc_issue_els_prli(vport, ndlp, 0);
 	} else {
-		ndlp->nlp_prev_state = NLP_STE_REG_LOGIN_ISSUE;
-		lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE);
+		/* Only Fabric ports should transition */
+		if (ndlp->nlp_type & NLP_FABRIC) {
+			ndlp->nlp_prev_state = NLP_STE_REG_LOGIN_ISSUE;
+			lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE);
+		}
 	}
 	return ndlp->nlp_state;
 }
@@ -1664,7 +1715,7 @@ lpfc_device_recov_reglogin_issue(struct lpfc_vport *vport,
 	ndlp->nlp_prev_state = NLP_STE_REG_LOGIN_ISSUE;
 	lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
 	spin_lock_irq(shost->host_lock);
-	ndlp->nlp_flag |= NLP_IGNR_REG_CMPL;
+
 	ndlp->nlp_flag &= ~(NLP_NODEV_REMOVE | NLP_NPR_2B_DISC);
 	spin_unlock_irq(shost->host_lock);
 	lpfc_disc_set_adisc(vport, ndlp);
@@ -1740,10 +1791,23 @@ lpfc_cmpl_prli_prli_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 	struct lpfc_hba   *phba = vport->phba;
 	IOCB_t *irsp;
 	PRLI *npr;
+	struct lpfc_nvme_prli *nvpr;
+	void *temp_ptr;
 
 	cmdiocb = (struct lpfc_iocbq *) arg;
 	rspiocb = cmdiocb->context_un.rsp_iocb;
-	npr = (PRLI *)lpfc_check_elscmpl_iocb(phba, cmdiocb, rspiocb);
+
+	/* A solicited PRLI is either FCP or NVME.  The PRLI cmd/rsp
+	 * format is different so NULL the two PRLI types so that the
+	 * driver correctly gets the correct context.
+	 */
+	npr = NULL;
+	nvpr = NULL;
+	temp_ptr = lpfc_check_elscmpl_iocb(phba, cmdiocb, rspiocb);
+	if (cmdiocb->iocb_flag & LPFC_PRLI_FCP_REQ)
+		npr = (PRLI *) temp_ptr;
+	else if (cmdiocb->iocb_flag & LPFC_PRLI_NVME_REQ)
+		nvpr = (struct lpfc_nvme_prli *) temp_ptr;
 
 	irsp = &rspiocb->iocb;
 	if (irsp->ulpStatus) {
@@ -1751,7 +1815,21 @@ lpfc_cmpl_prli_prli_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 		    vport->cfg_restrict_login) {
 			goto out;
 		}
+
+		/* The LS Req had some error.  Don't let this be a
+		 * target.
+		 */
+		if ((ndlp->fc4_prli_sent == 1) &&
+		    (ndlp->nlp_state == NLP_STE_PRLI_ISSUE) &&
+		    (ndlp->nlp_type & (NLP_FCP_TARGET | NLP_FCP_INITIATOR)))
+			/* The FCP PRLI completed successfully but
+			 * the NVME PRLI failed.  Since they are sent in
+			 * succession, allow the FCP to complete.
+			 */
+			goto out_err;
+
 		ndlp->nlp_prev_state = NLP_STE_PRLI_ISSUE;
+		ndlp->nlp_type |= NLP_FCP_INITIATOR;
 		lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE);
 		return ndlp->nlp_state;
 	}
@@ -1759,9 +1837,16 @@ lpfc_cmpl_prli_prli_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 	/* Check out PRLI rsp */
 	ndlp->nlp_type &= ~(NLP_FCP_TARGET | NLP_FCP_INITIATOR);
 	ndlp->nlp_fcp_info &= ~NLP_FCP_2_DEVICE;
+
+	/* NVME or FCP first burst must be negotiated for each PRLI. */
 	ndlp->nlp_flag &= ~NLP_FIRSTBURST;
-	if ((npr->acceptRspCode == PRLI_REQ_EXECUTED) &&
+	ndlp->nvme_fb_size = 0;
+	if (npr && (npr->acceptRspCode == PRLI_REQ_EXECUTED) &&
 	    (npr->prliType == PRLI_FCP_TYPE)) {
+		lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC,
+				 "6028 FCP NPR PRLI Cmpl Init %d Target %d\n",
+				 npr->initiatorFunc,
+				 npr->targetFunc);
 		if (npr->initiatorFunc)
 			ndlp->nlp_type |= NLP_FCP_INITIATOR;
 		if (npr->targetFunc) {
@@ -1771,6 +1856,49 @@ lpfc_cmpl_prli_prli_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 		}
 		if (npr->Retry)
 			ndlp->nlp_fcp_info |= NLP_FCP_2_DEVICE;
+
+		/* PRLI completed.  Decrement count. */
+		ndlp->fc4_prli_sent--;
+	} else if (nvpr &&
+		   (bf_get_be32(prli_acc_rsp_code, nvpr) ==
+		    PRLI_REQ_EXECUTED) &&
+		   (bf_get_be32(prli_type_code, nvpr) ==
+		    PRLI_NVME_TYPE)) {
+
+		/* Complete setting up the remote ndlp personality. */
+		if (bf_get_be32(prli_init, nvpr))
+			ndlp->nlp_type |= NLP_NVME_INITIATOR;
+
+		/* Target driver cannot solicit NVME FB. */
+		if (bf_get_be32(prli_tgt, nvpr)) {
+			ndlp->nlp_type |= NLP_NVME_TARGET;
+			if ((bf_get_be32(prli_fba, nvpr) == 1) &&
+			    (bf_get_be32(prli_fb_sz, nvpr) > 0) &&
+			    (phba->cfg_nvme_enable_fb) &&
+			    (!phba->nvmet_support)) {
+				/* Both sides support FB. The target's first
+				 * burst size is a 512 byte encoded value.
+				 */
+				ndlp->nlp_flag |= NLP_FIRSTBURST;
+				ndlp->nvme_fb_size = bf_get_be32(prli_fb_sz,
+								 nvpr);
+			}
+		}
+
+		if (bf_get_be32(prli_recov, nvpr))
+			ndlp->nlp_fcp_info |= NLP_FCP_2_DEVICE;
+
+		lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC,
+				 "6029 NVME PRLI Cmpl w1 x%08x "
+				 "w4 x%08x w5 x%08x flag x%x, "
+				 "fcp_info x%x nlp_type x%x\n",
+				 be32_to_cpu(nvpr->word1),
+				 be32_to_cpu(nvpr->word4),
+				 be32_to_cpu(nvpr->word5),
+				 ndlp->nlp_flag, ndlp->nlp_fcp_info,
+				 ndlp->nlp_type);
+		/* PRLI completed.  Decrement count. */
+		ndlp->fc4_prli_sent--;
 	}
 	if (!(ndlp->nlp_type & NLP_FCP_TARGET) &&
 	    (vport->port_type == LPFC_NPIV_PORT) &&
@@ -1786,11 +1914,24 @@ lpfc_cmpl_prli_prli_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 		return ndlp->nlp_state;
 	}
 
-	ndlp->nlp_prev_state = NLP_STE_PRLI_ISSUE;
-	if (ndlp->nlp_type & NLP_FCP_TARGET)
-		lpfc_nlp_set_state(vport, ndlp, NLP_STE_MAPPED_NODE);
-	else
-		lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE);
+out_err:
+	/* The ndlp state cannot move to MAPPED or UNMAPPED before all PRLIs
+	 * are complete.
+	 */
+	if (ndlp->fc4_prli_sent == 0) {
+		ndlp->nlp_prev_state = NLP_STE_PRLI_ISSUE;
+		if (ndlp->nlp_type & (NLP_FCP_TARGET | NLP_NVME_TARGET))
+			lpfc_nlp_set_state(vport, ndlp, NLP_STE_MAPPED_NODE);
+		else
+			lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE);
+	} else
+		lpfc_printf_vlog(vport,
+				 KERN_INFO, LOG_ELS,
+				 "3067 PRLI's still outstanding "
+				 "on x%06x - count %d, Pend Node Mode "
+				 "transition...\n",
+				 ndlp->nlp_DID, ndlp->fc4_prli_sent);
+
 	return ndlp->nlp_state;
 }
 
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index c327fc7b1a5455..6ba2b341233737 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -5333,7 +5333,8 @@ lpfc_bus_reset_handler(struct scsi_cmnd *cmnd)
 				continue;
 			if (ndlp->nlp_state == NLP_STE_MAPPED_NODE &&
 			    ndlp->nlp_sid == i &&
-			    ndlp->rport) {
+			    ndlp->rport &&
+			    ndlp->nlp_type & NLP_FCP_TARGET) {
 				match = 1;
 				break;
 			}

From 01649561a8b4b77247bd234f240d737367bb8a52 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sun, 12 Feb 2017 13:52:32 -0800
Subject: [PATCH 0153/1911] scsi: lpfc: NVME Initiator: bind to nvme_fc api

NVME Initiator: Tie in to NVME Fabrics nvme_fc LLDD initiator api

Adds the routines to:
- register and deregister the FC port as a nvme-fc initiator localport
- register and deregister remote FC ports as a nvme-fc remoteport
- binding of nvme queues to adapter WQs
- send/perform NVME LS's
- send/perform NVME FCP initiator io operations

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/Makefile         |    7 +-
 drivers/scsi/lpfc/lpfc.h           |    5 +
 drivers/scsi/lpfc/lpfc_crtn.h      |   10 +
 drivers/scsi/lpfc/lpfc_ct.c        |    2 +-
 drivers/scsi/lpfc/lpfc_els.c       |    4 +-
 drivers/scsi/lpfc/lpfc_hbadisc.c   |   15 +-
 drivers/scsi/lpfc/lpfc_init.c      |   39 +-
 drivers/scsi/lpfc/lpfc_nportdisc.c |    4 +-
 drivers/scsi/lpfc/lpfc_nvme.c      | 2319 ++++++++++++++++++++++++++++
 drivers/scsi/lpfc/lpfc_nvme.h      |    6 +
 drivers/scsi/lpfc/lpfc_sli.c       |   21 +-
 drivers/scsi/lpfc/lpfc_sli4.h      |    1 +
 drivers/scsi/lpfc/lpfc_vport.c     |   16 +-
 13 files changed, 2411 insertions(+), 38 deletions(-)
 create mode 100644 drivers/scsi/lpfc/lpfc_nvme.c

diff --git a/drivers/scsi/lpfc/Makefile b/drivers/scsi/lpfc/Makefile
index e2516ba8ebfa91..cd7e1fcf52c6f0 100644
--- a/drivers/scsi/lpfc/Makefile
+++ b/drivers/scsi/lpfc/Makefile
@@ -28,6 +28,7 @@ endif
 
 obj-$(CONFIG_SCSI_LPFC) := lpfc.o
 
-lpfc-objs := lpfc_mem.o lpfc_sli.o lpfc_ct.o lpfc_els.o lpfc_hbadisc.o	\
-	lpfc_init.o lpfc_mbox.o lpfc_nportdisc.o lpfc_scsi.o lpfc_attr.o \
-	lpfc_vport.o lpfc_debugfs.o lpfc_bsg.o
+lpfc-objs := lpfc_mem.o lpfc_sli.o lpfc_ct.o lpfc_els.o \
+	lpfc_hbadisc.o	lpfc_init.o lpfc_mbox.o lpfc_nportdisc.o   \
+	lpfc_scsi.o lpfc_attr.o lpfc_vport.o lpfc_debugfs.o lpfc_bsg.o \
+	lpfc_nvme.o
diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index 81c47d1aebb2c1..9ad63a47425be5 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -123,6 +123,11 @@ struct perf_prof {
 	uint16_t wqidx[40];
 };
 
+/*
+ * Provide for FC4 TYPE x28 - NVME.  The
+ * bit mask for FCP and NVME is 0x8 identically
+ * because they are 32 bit positions distance.
+ */
 #define LPFC_FC4_TYPE_BITMASK	0x00000100
 
 /* Provide DMA memory definitions the driver uses per port instance. */
diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h
index 2e6345ebd6c548..a9c8a0a41b1644 100644
--- a/drivers/scsi/lpfc/lpfc_crtn.h
+++ b/drivers/scsi/lpfc/lpfc_crtn.h
@@ -518,4 +518,14 @@ int lpfc_sli4_dump_page_a0(struct lpfc_hba *phba, struct lpfcMboxq *mbox);
 void lpfc_mbx_cmpl_rdp_page_a0(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb);
 
 /* NVME interfaces. */
+void lpfc_nvme_unregister_port(struct lpfc_vport *vport,
+			struct lpfc_nodelist *ndlp);
+int lpfc_nvme_register_port(struct lpfc_vport *vport,
+			struct lpfc_nodelist *ndlp);
+int lpfc_nvme_create_localport(struct lpfc_vport *vport);
+void lpfc_nvme_destroy_localport(struct lpfc_vport *vport);
+void lpfc_nvme_update_localport(struct lpfc_vport *vport);
 void lpfc_nvme_mod_param_dep(struct lpfc_hba *phba);
+void lpfc_nvme_abort_fcreq_cmpl(struct lpfc_hba *phba,
+				struct lpfc_iocbq *cmdiocb,
+				struct lpfc_wcqe_complete *abts_cmpl);
diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c
index afb25369bc3bf1..8cfeffe312e027 100644
--- a/drivers/scsi/lpfc/lpfc_ct.c
+++ b/drivers/scsi/lpfc/lpfc_ct.c
@@ -1412,7 +1412,7 @@ lpfc_ns_cmd(struct lpfc_vport *vport, int cmdcode,
 		if (((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
 		     (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME)) &&
 		    (context == FC_TYPE_NVME)) {
-			/* todo: init: revise localport nvme attributes */
+			lpfc_nvme_update_localport(vport);
 			CtReq->un.rff.type_code = context;
 
 		} else if (((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index e164eed25e3d60..23546b3c950c4c 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -2618,7 +2618,9 @@ lpfc_cmpl_els_logo(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 		!(vport->fc_flag & FC_PT2PT_PLOGI)) {
 		phba->pport->fc_myDID = 0;
 
-		/* todo: init: revise localport nvme attributes */
+		if ((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
+		    (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME))
+			lpfc_nvme_update_localport(phba->pport);
 
 		mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
 		if (mbox) {
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index 6e64c8e8e44f5d..8936f5d91c873e 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -909,7 +909,9 @@ lpfc_linkdown(struct lpfc_hba *phba)
 
 			vports[i]->fc_myDID = 0;
 
-			/* todo: init: revise localport nvme attributes */
+			if ((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
+			    (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME))
+				lpfc_nvme_update_localport(vports[i]);
 		}
 	}
 	lpfc_destroy_vport_work_array(phba, vports);
@@ -3580,7 +3582,9 @@ lpfc_mbx_cmpl_reg_vpi(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 		spin_unlock_irq(shost->host_lock);
 		vport->fc_myDID = 0;
 
-		/* todo: init: revise localport nvme attributes */
+		if ((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
+		    (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME))
+			lpfc_nvme_update_localport(vport);
 		goto out;
 	}
 
@@ -3950,7 +3954,8 @@ lpfc_mbx_cmpl_ns_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 
 		if ((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
 		    (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME))
-			lpfc_ns_cmd(vport, SLI_CTNS_RFF_ID, 0, FC_TYPE_NVME);
+			lpfc_ns_cmd(vport, SLI_CTNS_RFF_ID, 0,
+				    FC_TYPE_NVME);
 
 		/* Issue SCR just before NameServer GID_FT Query */
 		lpfc_issue_els_scr(vport, SCR_DID, 0);
@@ -4144,7 +4149,7 @@ lpfc_nlp_state_cleanup(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 		    ((ndlp->nlp_fc4_type & NLP_FC4_NVME) ||
 		    (ndlp->nlp_DID == Fabric_DID))) {
 			vport->phba->nport_event_cnt++;
-			/* todo: init: unregister rport from nvme */
+			lpfc_nvme_unregister_port(vport, ndlp);
 		}
 	}
 
@@ -4169,7 +4174,7 @@ lpfc_nlp_state_cleanup(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 				 * the register.
 				 */
 				vport->phba->nport_event_cnt++;
-				/* todo: init: register rport with nvme */
+				lpfc_nvme_register_port(vport, ndlp);
 			}
 		}
 	}
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index e609afaa472a0f..474bafc63055a1 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -3128,7 +3128,6 @@ static void
 lpfc_scsi_free(struct lpfc_hba *phba)
 {
 	struct lpfc_scsi_buf *sb, *sb_next;
-	struct lpfc_iocbq *io, *io_next;
 
 	if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP))
 		return;
@@ -3158,14 +3157,6 @@ lpfc_scsi_free(struct lpfc_hba *phba)
 		phba->total_scsi_bufs--;
 	}
 	spin_unlock(&phba->scsi_buf_list_get_lock);
-
-	/* Release all the lpfc_iocbq entries maintained by this host. */
-	list_for_each_entry_safe(io, io_next, &phba->lpfc_iocb_list, list) {
-		list_del(&io->list);
-		kfree(io);
-		phba->total_iocbq_bufs--;
-	}
-
 	spin_unlock_irq(&phba->hbalock);
 }
 /**
@@ -3180,7 +3171,6 @@ static void
 lpfc_nvme_free(struct lpfc_hba *phba)
 {
 	struct lpfc_nvme_buf *lpfc_ncmd, *lpfc_ncmd_next;
-	struct lpfc_iocbq *io, *io_next;
 
 	if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME))
 		return;
@@ -3209,14 +3199,6 @@ lpfc_nvme_free(struct lpfc_hba *phba)
 		phba->total_nvme_bufs--;
 	}
 	spin_unlock(&phba->nvme_buf_list_get_lock);
-
-	/* Release all the lpfc_iocbq entries maintained by this host. */
-	list_for_each_entry_safe(io, io_next, &phba->lpfc_iocb_list, list) {
-		list_del(&io->list);
-		kfree(io);
-		phba->total_iocbq_bufs--;
-	}
-
 	spin_unlock_irq(&phba->hbalock);
 }
 /**
@@ -10685,7 +10667,23 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid)
 	/* Perform post initialization setup */
 	lpfc_post_init_setup(phba);
 
-	/* todo: init: register port with nvme */
+	/* NVME support in FW earlier in the driver load corrects the
+	 * FC4 type making a check for nvme_support unnecessary.
+	 */
+	if ((phba->nvmet_support == 0) &&
+	    (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)) {
+		/* Create NVME binding with nvme_fc_transport. This
+		 * ensures the vport is initialized.
+		 */
+		error = lpfc_nvme_create_localport(vport);
+		if (error) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"6004 NVME registration failed, "
+					"error x%x\n",
+					error);
+			goto out_disable_intr;
+		}
+	}
 
 	/* check for firmware upgrade or downgrade */
 	if (phba->cfg_request_firmware_upgrade)
@@ -10761,8 +10759,8 @@ lpfc_pci_remove_one_s4(struct pci_dev *pdev)
 	/* Perform ndlp cleanup on the physical port.  The nvme localport
 	 * is destroyed after to ensure all rports are io-disabled.
 	 */
+	lpfc_nvme_destroy_localport(vport);
 	lpfc_cleanup(vport);
-	/* todo: init: unregister port with nvme */
 
 	/*
 	 * Bring down the SLI Layer. This step disables all interrupts,
@@ -10781,6 +10779,7 @@ lpfc_pci_remove_one_s4(struct pci_dev *pdev)
 	 */
 	lpfc_scsi_free(phba);
 	lpfc_nvme_free(phba);
+	lpfc_free_iocb_list(phba);
 
 	lpfc_sli4_driver_resource_unset(phba);
 
diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c
index 65e7b2433ee794..470f9586192f7a 100644
--- a/drivers/scsi/lpfc/lpfc_nportdisc.c
+++ b/drivers/scsi/lpfc/lpfc_nportdisc.c
@@ -1655,9 +1655,7 @@ lpfc_cmpl_reglogin_reglogin_issue(struct lpfc_vport *vport,
 			     (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME)) {
 				ndlp->nlp_fc4_type |= NLP_FC4_NVME;
 				/* We need to update the localport also */
-				/* todo: init: revise localport nvme
-				 * attributes
-				 */
+				lpfc_nvme_update_localport(vport);
 			}
 
 		} else if (ndlp->nlp_fc4_type == 0) {
diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c
new file mode 100644
index 00000000000000..729803dacf1561
--- /dev/null
+++ b/drivers/scsi/lpfc/lpfc_nvme.c
@@ -0,0 +1,2319 @@
+/*******************************************************************
+ * This file is part of the Emulex Linux Device Driver for         *
+ * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
+ * EMULEX and SLI are trademarks of Emulex.                        *
+ * www.emulex.com                                                  *
+ * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
+ *                                                                 *
+ * This program is free software; you can redistribute it and/or   *
+ * modify it under the terms of version 2 of the GNU General       *
+ * Public License as published by the Free Software Foundation.    *
+ * This program is distributed in the hope that it will be useful. *
+ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND          *
+ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY,  *
+ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE      *
+ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
+ * TO BE LEGALLY INVALID.  See the GNU General Public License for  *
+ * more details, a copy of which can be found in the file COPYING  *
+ * included with this package.                                     *
+ ********************************************************************/
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <asm/unaligned.h>
+#include <linux/crc-t10dif.h>
+#include <net/checksum.h>
+
+#include <scsi/scsi.h>
+#include <scsi/scsi_device.h>
+#include <scsi/scsi_eh.h>
+#include <scsi/scsi_host.h>
+#include <scsi/scsi_tcq.h>
+#include <scsi/scsi_transport_fc.h>
+#include <scsi/fc/fc_fs.h>
+
+#include <linux/nvme.h>
+#include <linux/nvme-fc-driver.h>
+#include <linux/nvme-fc.h>
+#include "lpfc_version.h"
+#include "lpfc_hw4.h"
+#include "lpfc_hw.h"
+#include "lpfc_sli.h"
+#include "lpfc_sli4.h"
+#include "lpfc_nl.h"
+#include "lpfc_disc.h"
+#include "lpfc.h"
+#include "lpfc_nvme.h"
+#include "lpfc_scsi.h"
+#include "lpfc_logmsg.h"
+#include "lpfc_crtn.h"
+#include "lpfc_vport.h"
+
+/* NVME initiator-based functions */
+
+static struct lpfc_nvme_buf *
+lpfc_get_nvme_buf(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp);
+
+static void
+lpfc_release_nvme_buf(struct lpfc_hba *, struct lpfc_nvme_buf *);
+
+
+/**
+ * lpfc_nvme_create_queue -
+ * @lpfc_pnvme: Pointer to the driver's nvme instance data
+ * @qidx: An cpu index used to affinitize IO queues and MSIX vectors.
+ * @handle: An opaque driver handle used in follow-up calls.
+ *
+ * Driver registers this routine to preallocate and initialize any
+ * internal data structures to bind the @qidx to its internal IO queues.
+ * A hardware queue maps (qidx) to a specific driver MSI-X vector/EQ/CQ/WQ.
+ *
+ * Return value :
+ *   0 - Success
+ *   -EINVAL - Unsupported input value.
+ *   -ENOMEM - Could not alloc necessary memory
+ **/
+static int
+lpfc_nvme_create_queue(struct nvme_fc_local_port *pnvme_lport,
+		       unsigned int qidx, u16 qsize,
+		       void **handle)
+{
+	struct lpfc_nvme_lport *lport;
+	struct lpfc_vport *vport;
+	struct lpfc_nvme_qhandle *qhandle;
+	char *str;
+
+	lport = (struct lpfc_nvme_lport *)pnvme_lport->private;
+	vport = lport->vport;
+	qhandle = kzalloc(sizeof(struct lpfc_nvme_qhandle), GFP_KERNEL);
+	if (qhandle == NULL)
+		return -ENOMEM;
+
+	qhandle->cpu_id = smp_processor_id();
+	qhandle->qidx = qidx;
+	/*
+	 * NVME qidx == 0 is the admin queue, so both admin queue
+	 * and first IO queue will use MSI-X vector and associated
+	 * EQ/CQ/WQ at index 0. After that they are sequentially assigned.
+	 */
+	if (qidx) {
+		str = "IO ";  /* IO queue */
+		qhandle->index = ((qidx - 1) %
+			vport->phba->cfg_nvme_io_channel);
+	} else {
+		str = "ADM";  /* Admin queue */
+		qhandle->index = qidx;
+	}
+
+	lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME,
+			 "6073 Binding %s HdwQueue %d  (cpu %d) to "
+			 "io_channel %d qhandle %p\n", str,
+			 qidx, qhandle->cpu_id, qhandle->index, qhandle);
+	*handle = (void *)qhandle;
+	return 0;
+}
+
+/**
+ * lpfc_nvme_delete_queue -
+ * @lpfc_pnvme: Pointer to the driver's nvme instance data
+ * @qidx: An cpu index used to affinitize IO queues and MSIX vectors.
+ * @handle: An opaque driver handle from lpfc_nvme_create_queue
+ *
+ * Driver registers this routine to free
+ * any internal data structures to bind the @qidx to its internal
+ * IO queues.
+ *
+ * Return value :
+ *   0 - Success
+ *   TODO:  What are the failure codes.
+ **/
+static void
+lpfc_nvme_delete_queue(struct nvme_fc_local_port *pnvme_lport,
+		       unsigned int qidx,
+		       void *handle)
+{
+	struct lpfc_nvme_lport *lport;
+	struct lpfc_vport *vport;
+
+	lport = (struct lpfc_nvme_lport *)pnvme_lport->private;
+	vport = lport->vport;
+
+	lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME,
+			"6001 ENTER.  lpfc_pnvme %p, qidx x%xi qhandle %p\n",
+			lport, qidx, handle);
+	kfree(handle);
+}
+
+static void
+lpfc_nvme_localport_delete(struct nvme_fc_local_port *localport)
+{
+	struct lpfc_nvme_lport *lport = localport->private;
+
+	/* release any threads waiting for the unreg to complete */
+	complete(&lport->lport_unreg_done);
+}
+
+/* lpfc_nvme_remoteport_delete
+ *
+ * @remoteport: Pointer to an nvme transport remoteport instance.
+ *
+ * This is a template downcall.  NVME transport calls this function
+ * when it has completed the unregistration of a previously
+ * registered remoteport.
+ *
+ * Return value :
+ * None
+ */
+void
+lpfc_nvme_remoteport_delete(struct nvme_fc_remote_port *remoteport)
+{
+	struct lpfc_nvme_rport *rport = remoteport->private;
+	struct lpfc_vport *vport;
+	struct lpfc_nodelist *ndlp;
+
+	ndlp = rport->ndlp;
+	if (!ndlp)
+		goto rport_err;
+
+	vport = ndlp->vport;
+	if (!vport)
+		goto rport_err;
+
+	/* Remove this rport from the lport's list - memory is owned by the
+	 * transport. Remove the ndlp reference for the NVME transport before
+	 * calling state machine to remove the node, this is devloss = 0
+	 * semantics.
+	 */
+	lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC,
+			"6146 remoteport delete complete %p\n",
+			remoteport);
+	list_del(&rport->list);
+	lpfc_nlp_put(ndlp);
+
+ rport_err:
+	/* This call has to execute as long as the rport is valid.
+	 * Release any threads waiting for the unreg to complete.
+	 */
+	complete(&rport->rport_unreg_done);
+}
+
+static void
+lpfc_nvme_cmpl_gen_req(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
+		       struct lpfc_wcqe_complete *wcqe)
+{
+	struct lpfc_vport *vport = cmdwqe->vport;
+	uint32_t status;
+	struct nvmefc_ls_req *pnvme_lsreq;
+	struct lpfc_dmabuf *buf_ptr;
+	struct lpfc_nodelist *ndlp;
+
+	vport->phba->fc4NvmeLsCmpls++;
+
+	pnvme_lsreq = (struct nvmefc_ls_req *)cmdwqe->context2;
+	status = bf_get(lpfc_wcqe_c_status, wcqe) & LPFC_IOCB_STATUS_MASK;
+	ndlp = (struct lpfc_nodelist *)cmdwqe->context1;
+	lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC,
+			 "6047 nvme cmpl Enter "
+			 "Data %p DID %x Xri: %x status %x cmd:%p lsreg:%p "
+			 "bmp:%p ndlp:%p\n",
+			 pnvme_lsreq, ndlp ? ndlp->nlp_DID : 0,
+			 cmdwqe->sli4_xritag, status,
+			 cmdwqe, pnvme_lsreq, cmdwqe->context3, ndlp);
+
+	if (cmdwqe->context3) {
+		buf_ptr = (struct lpfc_dmabuf *)cmdwqe->context3;
+		lpfc_mbuf_free(phba, buf_ptr->virt, buf_ptr->phys);
+		kfree(buf_ptr);
+		cmdwqe->context3 = NULL;
+	}
+	if (pnvme_lsreq->done)
+		pnvme_lsreq->done(pnvme_lsreq, status);
+	else
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC,
+				 "6046 nvme cmpl without done call back? "
+				 "Data %p DID %x Xri: %x status %x\n",
+				pnvme_lsreq, ndlp ? ndlp->nlp_DID : 0,
+				cmdwqe->sli4_xritag, status);
+	if (ndlp) {
+		lpfc_nlp_put(ndlp);
+		cmdwqe->context1 = NULL;
+	}
+	lpfc_sli_release_iocbq(phba, cmdwqe);
+}
+
+static int
+lpfc_nvme_gen_req(struct lpfc_vport *vport, struct lpfc_dmabuf *bmp,
+		  struct lpfc_dmabuf *inp,
+		 struct nvmefc_ls_req *pnvme_lsreq,
+	     void (*cmpl)(struct lpfc_hba *, struct lpfc_iocbq *,
+			   struct lpfc_wcqe_complete *),
+	     struct lpfc_nodelist *ndlp, uint32_t num_entry,
+	     uint32_t tmo, uint8_t retry)
+{
+	struct lpfc_hba  *phba = vport->phba;
+	union lpfc_wqe *wqe;
+	struct lpfc_iocbq *genwqe;
+	struct ulp_bde64 *bpl;
+	struct ulp_bde64 bde;
+	int i, rc, xmit_len, first_len;
+
+	/* Allocate buffer for  command WQE */
+	genwqe = lpfc_sli_get_iocbq(phba);
+	if (genwqe == NULL)
+		return 1;
+
+	wqe = &genwqe->wqe;
+	memset(wqe, 0, sizeof(union lpfc_wqe));
+
+	genwqe->context3 = (uint8_t *)bmp;
+	genwqe->iocb_flag |= LPFC_IO_NVME_LS;
+
+	/* Save for completion so we can release these resources */
+	genwqe->context1 = lpfc_nlp_get(ndlp);
+	genwqe->context2 = (uint8_t *)pnvme_lsreq;
+	/* Fill in payload, bp points to frame payload */
+
+	if (!tmo)
+		/* FC spec states we need 3 * ratov for CT requests */
+		tmo = (3 * phba->fc_ratov);
+
+	/* For this command calculate the xmit length of the request bde. */
+	xmit_len = 0;
+	first_len = 0;
+	bpl = (struct ulp_bde64 *)bmp->virt;
+	for (i = 0; i < num_entry; i++) {
+		bde.tus.w = bpl[i].tus.w;
+		if (bde.tus.f.bdeFlags != BUFF_TYPE_BDE_64)
+			break;
+		xmit_len += bde.tus.f.bdeSize;
+		if (i == 0)
+			first_len = xmit_len;
+	}
+
+	genwqe->rsvd2 = num_entry;
+	genwqe->hba_wqidx = 0;
+
+	/* Words 0 - 2 */
+	wqe->generic.bde.tus.f.bdeFlags = BUFF_TYPE_BDE_64;
+	wqe->generic.bde.tus.f.bdeSize = first_len;
+	wqe->generic.bde.addrLow = bpl[0].addrLow;
+	wqe->generic.bde.addrHigh = bpl[0].addrHigh;
+
+	/* Word 3 */
+	wqe->gen_req.request_payload_len = first_len;
+
+	/* Word 4 */
+
+	/* Word 5 */
+	bf_set(wqe_dfctl, &wqe->gen_req.wge_ctl, 0);
+	bf_set(wqe_si, &wqe->gen_req.wge_ctl, 1);
+	bf_set(wqe_la, &wqe->gen_req.wge_ctl, 1);
+	bf_set(wqe_rctl, &wqe->gen_req.wge_ctl, FC_RCTL_DD_UNSOL_CTL);
+	bf_set(wqe_type, &wqe->gen_req.wge_ctl, FC_TYPE_NVME);
+
+	/* Word 6 */
+	bf_set(wqe_ctxt_tag, &wqe->gen_req.wqe_com,
+	       phba->sli4_hba.rpi_ids[ndlp->nlp_rpi]);
+	bf_set(wqe_xri_tag, &wqe->gen_req.wqe_com, genwqe->sli4_xritag);
+
+	/* Word 7 */
+	bf_set(wqe_tmo, &wqe->gen_req.wqe_com, (vport->phba->fc_ratov-1));
+	bf_set(wqe_class, &wqe->gen_req.wqe_com, CLASS3);
+	bf_set(wqe_cmnd, &wqe->gen_req.wqe_com, CMD_GEN_REQUEST64_WQE);
+	bf_set(wqe_ct, &wqe->gen_req.wqe_com, SLI4_CT_RPI);
+
+	/* Word 8 */
+	wqe->gen_req.wqe_com.abort_tag = genwqe->iotag;
+
+	/* Word 9 */
+	bf_set(wqe_reqtag, &wqe->gen_req.wqe_com, genwqe->iotag);
+
+	/* Word 10 */
+	bf_set(wqe_dbde, &wqe->gen_req.wqe_com, 1);
+	bf_set(wqe_iod, &wqe->gen_req.wqe_com, LPFC_WQE_IOD_READ);
+	bf_set(wqe_qosd, &wqe->gen_req.wqe_com, 1);
+	bf_set(wqe_lenloc, &wqe->gen_req.wqe_com, LPFC_WQE_LENLOC_NONE);
+	bf_set(wqe_ebde_cnt, &wqe->gen_req.wqe_com, 0);
+
+	/* Word 11 */
+	bf_set(wqe_cqid, &wqe->gen_req.wqe_com, LPFC_WQE_CQ_ID_DEFAULT);
+	bf_set(wqe_cmd_type, &wqe->gen_req.wqe_com, OTHER_COMMAND);
+
+
+	/* Issue GEN REQ WQE for NPORT <did> */
+	lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS,
+			 "6050 Issue GEN REQ WQE to NPORT x%x "
+			 "Data: x%x x%x wq:%p lsreq:%p bmp:%p xmit:%d 1st:%d\n",
+			 ndlp->nlp_DID, genwqe->iotag,
+			 vport->port_state,
+			genwqe, pnvme_lsreq, bmp, xmit_len, first_len);
+	genwqe->wqe_cmpl = cmpl;
+	genwqe->iocb_cmpl = NULL;
+	genwqe->drvrTimeout = tmo + LPFC_DRVR_TIMEOUT;
+	genwqe->vport = vport;
+	genwqe->retry = retry;
+
+	rc = lpfc_sli4_issue_wqe(phba, LPFC_ELS_RING, genwqe);
+	if (rc == WQE_ERROR) {
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_ELS,
+				 "6045 Issue GEN REQ WQE to NPORT x%x "
+				 "Data: x%x x%x\n",
+				 ndlp->nlp_DID, genwqe->iotag,
+				 vport->port_state);
+		lpfc_sli_release_iocbq(phba, genwqe);
+		return 1;
+	}
+	return 0;
+}
+
+/**
+ * lpfc_nvme_ls_req - Issue an Link Service request
+ * @lpfc_pnvme: Pointer to the driver's nvme instance data
+ * @lpfc_nvme_lport: Pointer to the driver's local port data
+ * @lpfc_nvme_rport: Pointer to the rport getting the @lpfc_nvme_ereq
+ *
+ * Driver registers this routine to handle any link service request
+ * from the nvme_fc transport to a remote nvme-aware port.
+ *
+ * Return value :
+ *   0 - Success
+ *   TODO: What are the failure codes.
+ **/
+static int
+lpfc_nvme_ls_req(struct nvme_fc_local_port *pnvme_lport,
+		 struct nvme_fc_remote_port *pnvme_rport,
+		 struct nvmefc_ls_req *pnvme_lsreq)
+{
+	int ret = 0;
+	struct lpfc_nvme_lport *lport;
+	struct lpfc_vport *vport;
+	struct lpfc_nodelist *ndlp;
+	struct ulp_bde64 *bpl;
+	struct lpfc_dmabuf *bmp;
+
+	/* there are two dma buf in the request, actually there is one and
+	 * the second one is just the start address + cmd size.
+	 * Before calling lpfc_nvme_gen_req these buffers need to be wrapped
+	 * in a lpfc_dmabuf struct. When freeing we just free the wrapper
+	 * because the nvem layer owns the data bufs.
+	 * We do not have to break these packets open, we don't care what is in
+	 * them. And we do not have to look at the resonse data, we only care
+	 * that we got a response. All of the caring is going to happen in the
+	 * nvme-fc layer.
+	 */
+
+	lport = (struct lpfc_nvme_lport *)pnvme_lport->private;
+	vport = lport->vport;
+
+	ndlp = lpfc_findnode_did(vport, pnvme_rport->port_id);
+	if (!ndlp) {
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC,
+				 "6043 Could not find node for DID %x\n",
+				 pnvme_rport->port_id);
+		return 1;
+	}
+	bmp = kmalloc(sizeof(struct lpfc_dmabuf), GFP_KERNEL);
+	if (!bmp) {
+
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC,
+				 "6044 Could not find node for DID %x\n",
+				 pnvme_rport->port_id);
+		return 2;
+	}
+	INIT_LIST_HEAD(&bmp->list);
+	bmp->virt = lpfc_mbuf_alloc(vport->phba, MEM_PRI, &(bmp->phys));
+	if (!bmp->virt) {
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC,
+				 "6042 Could not find node for DID %x\n",
+				 pnvme_rport->port_id);
+		kfree(bmp);
+		return 3;
+	}
+	bpl = (struct ulp_bde64 *)bmp->virt;
+	bpl->addrHigh = le32_to_cpu(putPaddrHigh(pnvme_lsreq->rqstdma));
+	bpl->addrLow = le32_to_cpu(putPaddrLow(pnvme_lsreq->rqstdma));
+	bpl->tus.f.bdeFlags = 0;
+	bpl->tus.f.bdeSize = pnvme_lsreq->rqstlen;
+	bpl->tus.w = le32_to_cpu(bpl->tus.w);
+	bpl++;
+
+	bpl->addrHigh = le32_to_cpu(putPaddrHigh(pnvme_lsreq->rspdma));
+	bpl->addrLow = le32_to_cpu(putPaddrLow(pnvme_lsreq->rspdma));
+	bpl->tus.f.bdeFlags = BUFF_TYPE_BDE_64I;
+	bpl->tus.f.bdeSize = pnvme_lsreq->rsplen;
+	bpl->tus.w = le32_to_cpu(bpl->tus.w);
+
+	/* Expand print to include key fields. */
+	lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC,
+			 "6051 ENTER.  lport %p, rport %p lsreq%p rqstlen:%d "
+			 "rsplen:%d %llux %llux\n",
+			 pnvme_lport, pnvme_rport,
+			 pnvme_lsreq, pnvme_lsreq->rqstlen,
+			 pnvme_lsreq->rsplen, pnvme_lsreq->rqstdma,
+			 pnvme_lsreq->rspdma);
+
+	vport->phba->fc4NvmeLsRequests++;
+
+	/* Hardcode the wait to 30 seconds.  Connections are failing otherwise.
+	 * This code allows it all to work.
+	 */
+	ret = lpfc_nvme_gen_req(vport, bmp, pnvme_lsreq->rqstaddr,
+				pnvme_lsreq, lpfc_nvme_cmpl_gen_req,
+				ndlp, 2, 30, 0);
+	if (ret != WQE_SUCCESS) {
+		lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC,
+				 "6052 EXIT. issue ls wqe failed lport %p, "
+				 "rport %p lsreq%p Status %x DID %x\n",
+				 pnvme_lport, pnvme_rport, pnvme_lsreq,
+				 ret, ndlp->nlp_DID);
+		lpfc_mbuf_free(vport->phba, bmp->virt, bmp->phys);
+		kfree(bmp);
+		return ret;
+	}
+
+	/* Stub in routine and return 0 for now. */
+	return ret;
+}
+
+/**
+ * lpfc_nvme_ls_abort - Issue an Link Service request
+ * @lpfc_pnvme: Pointer to the driver's nvme instance data
+ * @lpfc_nvme_lport: Pointer to the driver's local port data
+ * @lpfc_nvme_rport: Pointer to the rport getting the @lpfc_nvme_ereq
+ *
+ * Driver registers this routine to handle any link service request
+ * from the nvme_fc transport to a remote nvme-aware port.
+ *
+ * Return value :
+ *   0 - Success
+ *   TODO: What are the failure codes.
+ **/
+static void
+lpfc_nvme_ls_abort(struct nvme_fc_local_port *pnvme_lport,
+		   struct nvme_fc_remote_port *pnvme_rport,
+		   struct nvmefc_ls_req *pnvme_lsreq)
+{
+	struct lpfc_nvme_lport *lport;
+	struct lpfc_vport *vport;
+	struct lpfc_hba *phba;
+	struct lpfc_nodelist *ndlp;
+	LIST_HEAD(abort_list);
+	struct lpfc_sli_ring *pring;
+	struct lpfc_iocbq *wqe, *next_wqe;
+
+	lport = (struct lpfc_nvme_lport *)pnvme_lport->private;
+	vport = lport->vport;
+	phba = vport->phba;
+
+	ndlp = lpfc_findnode_did(vport, pnvme_rport->port_id);
+	if (!ndlp) {
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS,
+				 "6049 Could not find node for DID %x\n",
+				 pnvme_rport->port_id);
+		return;
+	}
+
+	/* Expand print to include key fields. */
+	lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_ABTS,
+			 "6040 ENTER.  lport %p, rport %p lsreq %p rqstlen:%d "
+			 "rsplen:%d %llux %llux\n",
+			 pnvme_lport, pnvme_rport,
+			 pnvme_lsreq, pnvme_lsreq->rqstlen,
+			 pnvme_lsreq->rsplen, pnvme_lsreq->rqstdma,
+			 pnvme_lsreq->rspdma);
+
+	/*
+	 * Lock the ELS ring txcmplq and build a local list of all ELS IOs
+	 * that need an ABTS.  The IOs need to stay on the txcmplq so that
+	 * the abort operation completes them successfully.
+	 */
+	pring = phba->sli4_hba.nvmels_wq->pring;
+	spin_lock_irq(&phba->hbalock);
+	spin_lock(&pring->ring_lock);
+	list_for_each_entry_safe(wqe, next_wqe, &pring->txcmplq, list) {
+		/* Add to abort_list on on NDLP match. */
+		if (lpfc_check_sli_ndlp(phba, pring, wqe, ndlp)) {
+			wqe->iocb_flag |= LPFC_DRIVER_ABORTED;
+			list_add_tail(&wqe->dlist, &abort_list);
+		}
+	}
+	spin_unlock(&pring->ring_lock);
+	spin_unlock_irq(&phba->hbalock);
+
+	/* Abort the targeted IOs and remove them from the abort list. */
+	list_for_each_entry_safe(wqe, next_wqe, &abort_list, dlist) {
+		spin_lock_irq(&phba->hbalock);
+		list_del_init(&wqe->dlist);
+		lpfc_sli_issue_abort_iotag(phba, pring, wqe);
+		spin_unlock_irq(&phba->hbalock);
+	}
+}
+
+/* Fix up the existing sgls for NVME IO. */
+static void
+lpfc_nvme_adj_fcp_sgls(struct lpfc_vport *vport,
+		       struct lpfc_nvme_buf *lpfc_ncmd,
+		       struct nvmefc_fcp_req *nCmd)
+{
+	struct sli4_sge *sgl;
+	union lpfc_wqe128 *wqe;
+	uint32_t *wptr, *dptr;
+
+	/*
+	 * Adjust the FCP_CMD and FCP_RSP DMA data and sge_len to
+	 * match NVME.  NVME sends 96 bytes. Also, use the
+	 * nvme commands command and response dma addresses
+	 * rather than the virtual memory to ease the restore
+	 * operation.
+	 */
+	sgl = lpfc_ncmd->nvme_sgl;
+	sgl->sge_len = cpu_to_le32(nCmd->cmdlen);
+
+	sgl++;
+
+	/* Setup the physical region for the FCP RSP */
+	sgl->addr_hi = cpu_to_le32(putPaddrHigh(nCmd->rspdma));
+	sgl->addr_lo = cpu_to_le32(putPaddrLow(nCmd->rspdma));
+	sgl->word2 = le32_to_cpu(sgl->word2);
+	if (nCmd->sg_cnt)
+		bf_set(lpfc_sli4_sge_last, sgl, 0);
+	else
+		bf_set(lpfc_sli4_sge_last, sgl, 1);
+	sgl->word2 = cpu_to_le32(sgl->word2);
+	sgl->sge_len = cpu_to_le32(nCmd->rsplen);
+
+	/*
+	 * Get a local pointer to the built-in wqe and correct
+	 * the cmd size to match NVME's 96 bytes and fix
+	 * the dma address.
+	 */
+
+	/* 128 byte wqe support here */
+	wqe = (union lpfc_wqe128 *)&lpfc_ncmd->cur_iocbq.wqe;
+
+	/* Word 0-2 - NVME CMND IU (embedded payload) */
+	wqe->generic.bde.tus.f.bdeFlags = BUFF_TYPE_BDE_IMMED;
+	wqe->generic.bde.tus.f.bdeSize = 60;
+	wqe->generic.bde.addrHigh = 0;
+	wqe->generic.bde.addrLow =  64;  /* Word 16 */
+
+	/* Word 3 */
+	bf_set(payload_offset_len, &wqe->fcp_icmd,
+	       (nCmd->rsplen + nCmd->cmdlen));
+
+	/* Word 10 */
+	bf_set(wqe_nvme, &wqe->fcp_icmd.wqe_com, 1);
+	bf_set(wqe_wqes, &wqe->fcp_icmd.wqe_com, 1);
+
+	/*
+	 * Embed the payload in the last half of the WQE
+	 * WQE words 16-30 get the NVME CMD IU payload
+	 *
+	 * WQE Word 16 is already setup with flags
+	 * WQE words 17-19 get payload Words 2-4
+	 * WQE words 20-21 get payload Words 6-7
+	 * WQE words 22-29 get payload Words 16-23
+	 */
+	wptr = &wqe->words[17];  /* WQE ptr */
+	dptr = (uint32_t *)nCmd->cmdaddr;  /* payload ptr */
+	dptr += 2;		/* Skip Words 0-1 in payload */
+
+	*wptr++ = *dptr++;	/* Word 2 */
+	*wptr++ = *dptr++;	/* Word 3 */
+	*wptr++ = *dptr++;	/* Word 4 */
+	dptr++;			/* Skip Word 5 in payload */
+	*wptr++ = *dptr++;	/* Word 6 */
+	*wptr++ = *dptr++;	/* Word 7 */
+	dptr += 8;		/* Skip Words 8-15 in payload */
+	*wptr++ = *dptr++;	/* Word 16 */
+	*wptr++ = *dptr++;	/* Word 17 */
+	*wptr++ = *dptr++;	/* Word 18 */
+	*wptr++ = *dptr++;	/* Word 19 */
+	*wptr++ = *dptr++;	/* Word 20 */
+	*wptr++ = *dptr++;	/* Word 21 */
+	*wptr++ = *dptr++;	/* Word 22 */
+	*wptr   = *dptr;	/* Word 23 */
+}
+
+/**
+ * lpfc_nvme_io_cmd_wqe_cmpl - Complete an NVME-over-FCP IO
+ * @lpfc_pnvme: Pointer to the driver's nvme instance data
+ * @lpfc_nvme_lport: Pointer to the driver's local port data
+ * @lpfc_nvme_rport: Pointer to the rport getting the @lpfc_nvme_ereq
+ *
+ * Driver registers this routine as it io request handler.  This
+ * routine issues an fcp WQE with data from the @lpfc_nvme_fcpreq
+ * data structure to the rport indicated in @lpfc_nvme_rport.
+ *
+ * Return value :
+ *   0 - Success
+ *   TODO: What are the failure codes.
+ **/
+static void
+lpfc_nvme_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn,
+			  struct lpfc_wcqe_complete *wcqe)
+{
+	struct lpfc_nvme_buf *lpfc_ncmd =
+		(struct lpfc_nvme_buf *)pwqeIn->context1;
+	struct lpfc_vport *vport = pwqeIn->vport;
+	struct nvmefc_fcp_req *nCmd;
+	struct nvme_fc_ersp_iu *ep;
+	struct nvme_fc_cmd_iu *cp;
+	struct lpfc_nvme_rport *rport;
+	struct lpfc_nodelist *ndlp;
+	unsigned long flags;
+	uint32_t code;
+	uint16_t cid, sqhd, data;
+	uint32_t *ptr;
+
+	/* Sanity check on return of outstanding command */
+	if (!lpfc_ncmd || !lpfc_ncmd->nvmeCmd || !lpfc_ncmd->nrport) {
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_NODE | LOG_NVME_IOERR,
+				 "6071 Completion pointers bad on wqe %p.\n",
+				 wcqe);
+		return;
+	}
+	phba->fc4NvmeIoCmpls++;
+
+	nCmd = lpfc_ncmd->nvmeCmd;
+	rport = lpfc_ncmd->nrport;
+
+	/*
+	 * Catch race where our node has transitioned, but the
+	 * transport is still transitioning.
+	 */
+	ndlp = rport->ndlp;
+	if (!ndlp || !NLP_CHK_NODE_ACT(ndlp)) {
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_NODE | LOG_NVME_IOERR,
+				 "6061 rport %p, ndlp %p, DID x%06x ndlp "
+				 "not ready.\n",
+				 rport, ndlp, rport->remoteport->port_id);
+
+		ndlp = lpfc_findnode_did(vport, rport->remoteport->port_id);
+		if (!ndlp) {
+			lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_IOERR,
+					 "6062 Ignoring NVME cmpl.  No ndlp\n");
+			goto out_err;
+		}
+	}
+
+	code = bf_get(lpfc_wcqe_c_code, wcqe);
+	if (code == CQE_CODE_NVME_ERSP) {
+		/* For this type of CQE, we need to rebuild the rsp */
+		ep = (struct nvme_fc_ersp_iu *)nCmd->rspaddr;
+
+		/*
+		 * Get Command Id from cmd to plug into response. This
+		 * code is not needed in the next NVME Transport drop.
+		 */
+		cp = (struct nvme_fc_cmd_iu *)nCmd->cmdaddr;
+		cid = cp->sqe.common.command_id;
+
+		/*
+		 * RSN is in CQE word 2
+		 * SQHD is in CQE Word 3 bits 15:0
+		 * Cmd Specific info is in CQE Word 1
+		 * and in CQE Word 0 bits 15:0
+		 */
+		sqhd = bf_get(lpfc_wcqe_c_sqhead, wcqe);
+
+		/* Now lets build the NVME ERSP IU */
+		ep->iu_len = cpu_to_be16(8);
+		ep->rsn = wcqe->parameter;
+		ep->xfrd_len = cpu_to_be32(nCmd->payload_length);
+		ep->rsvd12 = 0;
+		ptr = (uint32_t *)&ep->cqe.result.u64;
+		*ptr++ = wcqe->total_data_placed;
+		data = bf_get(lpfc_wcqe_c_ersp0, wcqe);
+		*ptr = (uint32_t)data;
+		ep->cqe.sq_head = sqhd;
+		ep->cqe.sq_id =  nCmd->sqid;
+		ep->cqe.command_id = cid;
+		ep->cqe.status = 0;
+
+		lpfc_ncmd->status = IOSTAT_SUCCESS;
+		lpfc_ncmd->result = 0;
+		nCmd->rcv_rsplen = LPFC_NVME_ERSP_LEN;
+		nCmd->transferred_length = nCmd->payload_length;
+	} else {
+		lpfc_ncmd->status = (bf_get(lpfc_wcqe_c_status, wcqe) &
+			    LPFC_IOCB_STATUS_MASK);
+		lpfc_ncmd->result = wcqe->parameter;
+
+		/* For NVME, the only failure path that results in an
+		 * IO error is when the adapter rejects it.  All other
+		 * conditions are a success case and resolved by the
+		 * transport.
+		 * IOSTAT_FCP_RSP_ERROR means:
+		 * 1. Length of data received doesn't match total
+		 *    transfer length in WQE
+		 * 2. If the RSP payload does NOT match these cases:
+		 *    a. RSP length 12/24 bytes and all zeros
+		 *    b. NVME ERSP
+		 */
+		switch (lpfc_ncmd->status) {
+		case IOSTAT_SUCCESS:
+			nCmd->transferred_length = wcqe->total_data_placed;
+			nCmd->rcv_rsplen = 0;
+			nCmd->status = 0;
+			break;
+		case IOSTAT_FCP_RSP_ERROR:
+			nCmd->transferred_length = wcqe->total_data_placed;
+			nCmd->rcv_rsplen = wcqe->parameter;
+			nCmd->status = 0;
+			/* Sanity check */
+			if (nCmd->rcv_rsplen == LPFC_NVME_ERSP_LEN)
+				break;
+			lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_IOERR,
+					 "6081 NVME Completion Protocol Error: "
+					 "status x%x result x%x placed x%x\n",
+					 lpfc_ncmd->status, lpfc_ncmd->result,
+					 wcqe->total_data_placed);
+			break;
+		default:
+out_err:
+			lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_IOERR,
+					 "6072 NVME Completion Error: "
+					 "status x%x result x%x placed x%x\n",
+					 lpfc_ncmd->status, lpfc_ncmd->result,
+					 wcqe->total_data_placed);
+			nCmd->transferred_length = 0;
+			nCmd->rcv_rsplen = 0;
+			nCmd->status = NVME_SC_FC_TRANSPORT_ERROR;
+		}
+	}
+
+	/* pick up SLI4 exhange busy condition */
+	if (bf_get(lpfc_wcqe_c_xb, wcqe))
+		lpfc_ncmd->flags |= LPFC_SBUF_XBUSY;
+	else
+		lpfc_ncmd->flags &= ~LPFC_SBUF_XBUSY;
+
+	if (ndlp && NLP_CHK_NODE_ACT(ndlp))
+		atomic_dec(&ndlp->cmd_pending);
+
+	/* Update stats and complete the IO.  There is
+	 * no need for dma unprep because the nvme_transport
+	 * owns the dma address.
+	 */
+	nCmd->done(nCmd);
+
+	spin_lock_irqsave(&phba->hbalock, flags);
+	lpfc_ncmd->nrport = NULL;
+	spin_unlock_irqrestore(&phba->hbalock, flags);
+
+	lpfc_release_nvme_buf(phba, lpfc_ncmd);
+}
+
+
+/**
+ * lpfc_nvme_prep_io_cmd - Issue an NVME-over-FCP IO
+ * @lpfc_pnvme: Pointer to the driver's nvme instance data
+ * @lpfc_nvme_lport: Pointer to the driver's local port data
+ * @lpfc_nvme_rport: Pointer to the rport getting the @lpfc_nvme_ereq
+ * @lpfc_nvme_fcreq: IO request from nvme fc to driver.
+ * @hw_queue_handle: Driver-returned handle in lpfc_nvme_create_queue
+ *
+ * Driver registers this routine as it io request handler.  This
+ * routine issues an fcp WQE with data from the @lpfc_nvme_fcpreq
+ * data structure to the rport indicated in @lpfc_nvme_rport.
+ *
+ * Return value :
+ *   0 - Success
+ *   TODO: What are the failure codes.
+ **/
+static int
+lpfc_nvme_prep_io_cmd(struct lpfc_vport *vport,
+		      struct lpfc_nvme_buf *lpfc_ncmd,
+		      struct lpfc_nodelist *pnode)
+{
+	struct lpfc_hba *phba = vport->phba;
+	struct nvmefc_fcp_req *nCmd = lpfc_ncmd->nvmeCmd;
+	struct lpfc_iocbq *pwqeq = &(lpfc_ncmd->cur_iocbq);
+	union lpfc_wqe128 *wqe = (union lpfc_wqe128 *)&pwqeq->wqe;
+	uint32_t req_len;
+
+	if (!pnode || !NLP_CHK_NODE_ACT(pnode))
+		return -EINVAL;
+
+	/*
+	 * There are three possibilities here - use scatter-gather segment, use
+	 * the single mapping, or neither.
+	 */
+	wqe->fcp_iwrite.initial_xfer_len = 0;
+	if (nCmd->sg_cnt) {
+		if (nCmd->io_dir == NVMEFC_FCP_WRITE) {
+			/* Word 5 */
+			if ((phba->cfg_nvme_enable_fb) &&
+			    (pnode->nlp_flag & NLP_FIRSTBURST)) {
+				req_len = lpfc_ncmd->nvmeCmd->payload_length;
+				if (req_len < pnode->nvme_fb_size)
+					wqe->fcp_iwrite.initial_xfer_len =
+						req_len;
+				else
+					wqe->fcp_iwrite.initial_xfer_len =
+						pnode->nvme_fb_size;
+			}
+
+			/* Word 7 */
+			bf_set(wqe_cmnd, &wqe->generic.wqe_com,
+			       CMD_FCP_IWRITE64_WQE);
+			bf_set(wqe_pu, &wqe->generic.wqe_com,
+			       PARM_READ_CHECK);
+
+			/* Word 10 */
+			bf_set(wqe_qosd, &wqe->fcp_iwrite.wqe_com, 0);
+			bf_set(wqe_iod, &wqe->fcp_iwrite.wqe_com,
+			       LPFC_WQE_IOD_WRITE);
+			bf_set(wqe_lenloc, &wqe->fcp_iwrite.wqe_com,
+			       LPFC_WQE_LENLOC_WORD4);
+			if (phba->cfg_nvme_oas)
+				bf_set(wqe_oas, &wqe->fcp_iwrite.wqe_com, 1);
+
+			/* Word 11 */
+			bf_set(wqe_cmd_type, &wqe->generic.wqe_com,
+			       NVME_WRITE_CMD);
+
+			/* Word 16 */
+			wqe->words[16] = LPFC_NVME_EMBED_WRITE;
+
+			phba->fc4NvmeOutputRequests++;
+		} else {
+			/* Word 7 */
+			bf_set(wqe_cmnd, &wqe->generic.wqe_com,
+			       CMD_FCP_IREAD64_WQE);
+			bf_set(wqe_pu, &wqe->generic.wqe_com,
+			       PARM_READ_CHECK);
+
+			/* Word 10 */
+			bf_set(wqe_qosd, &wqe->fcp_iread.wqe_com, 0);
+			bf_set(wqe_iod, &wqe->fcp_iread.wqe_com,
+			       LPFC_WQE_IOD_READ);
+			bf_set(wqe_lenloc, &wqe->fcp_iread.wqe_com,
+			       LPFC_WQE_LENLOC_WORD4);
+			if (phba->cfg_nvme_oas)
+				bf_set(wqe_oas, &wqe->fcp_iread.wqe_com, 1);
+
+			/* Word 11 */
+			bf_set(wqe_cmd_type, &wqe->generic.wqe_com,
+			       NVME_READ_CMD);
+
+			/* Word 16 */
+			wqe->words[16] = LPFC_NVME_EMBED_READ;
+
+			phba->fc4NvmeInputRequests++;
+		}
+	} else {
+		/* Word 4 */
+		wqe->fcp_icmd.rsrvd4 = 0;
+
+		/* Word 7 */
+		bf_set(wqe_cmnd, &wqe->generic.wqe_com, CMD_FCP_ICMND64_WQE);
+		bf_set(wqe_pu, &wqe->generic.wqe_com, 0);
+
+		/* Word 10 */
+		bf_set(wqe_qosd, &wqe->fcp_icmd.wqe_com, 1);
+		bf_set(wqe_iod, &wqe->fcp_icmd.wqe_com, LPFC_WQE_IOD_WRITE);
+		bf_set(wqe_lenloc, &wqe->fcp_icmd.wqe_com,
+		       LPFC_WQE_LENLOC_NONE);
+		if (phba->cfg_nvme_oas)
+			bf_set(wqe_oas, &wqe->fcp_icmd.wqe_com, 1);
+
+		/* Word 11 */
+		bf_set(wqe_cmd_type, &wqe->generic.wqe_com, NVME_READ_CMD);
+
+		/* Word 16 */
+		wqe->words[16] = LPFC_NVME_EMBED_CMD;
+
+		phba->fc4NvmeControlRequests++;
+	}
+	/*
+	 * Finish initializing those WQE fields that are independent
+	 * of the nvme_cmnd request_buffer
+	 */
+
+	/* Word 6 */
+	bf_set(wqe_ctxt_tag, &wqe->generic.wqe_com,
+	       phba->sli4_hba.rpi_ids[pnode->nlp_rpi]);
+	bf_set(wqe_xri_tag, &wqe->generic.wqe_com, pwqeq->sli4_xritag);
+
+	/* Word 7 */
+	/* Preserve Class data in the ndlp. */
+	bf_set(wqe_class, &wqe->generic.wqe_com,
+	       (pnode->nlp_fcp_info & 0x0f));
+
+	/* Word 8 */
+	wqe->generic.wqe_com.abort_tag = pwqeq->iotag;
+
+	/* Word 9 */
+	bf_set(wqe_reqtag, &wqe->generic.wqe_com, pwqeq->iotag);
+
+	/* Word 11 */
+	bf_set(wqe_cqid, &wqe->generic.wqe_com, LPFC_WQE_CQ_ID_DEFAULT);
+
+	pwqeq->vport = vport;
+	return 0;
+}
+
+
+/**
+ * lpfc_nvme_prep_io_dma - Issue an NVME-over-FCP IO
+ * @lpfc_pnvme: Pointer to the driver's nvme instance data
+ * @lpfc_nvme_lport: Pointer to the driver's local port data
+ * @lpfc_nvme_rport: Pointer to the rport getting the @lpfc_nvme_ereq
+ * @lpfc_nvme_fcreq: IO request from nvme fc to driver.
+ * @hw_queue_handle: Driver-returned handle in lpfc_nvme_create_queue
+ *
+ * Driver registers this routine as it io request handler.  This
+ * routine issues an fcp WQE with data from the @lpfc_nvme_fcpreq
+ * data structure to the rport indicated in @lpfc_nvme_rport.
+ *
+ * Return value :
+ *   0 - Success
+ *   TODO: What are the failure codes.
+ **/
+static int
+lpfc_nvme_prep_io_dma(struct lpfc_vport *vport,
+		      struct lpfc_nvme_buf *lpfc_ncmd)
+{
+	struct lpfc_hba *phba = vport->phba;
+	struct nvmefc_fcp_req *nCmd = lpfc_ncmd->nvmeCmd;
+	union lpfc_wqe128 *wqe = (union lpfc_wqe128 *)&lpfc_ncmd->cur_iocbq.wqe;
+	struct sli4_sge *sgl = lpfc_ncmd->nvme_sgl;
+	struct scatterlist *data_sg;
+	struct sli4_sge *first_data_sgl;
+	dma_addr_t physaddr;
+	uint32_t num_bde = 0;
+	uint32_t dma_len;
+	uint32_t dma_offset = 0;
+	int nseg, i;
+
+	/* Fix up the command and response DMA stuff. */
+	lpfc_nvme_adj_fcp_sgls(vport, lpfc_ncmd, nCmd);
+
+	/*
+	 * There are three possibilities here - use scatter-gather segment, use
+	 * the single mapping, or neither.
+	 */
+	if (nCmd->sg_cnt) {
+		/*
+		 * Jump over the cmd and rsp SGEs.  The fix routine
+		 * has already adjusted for this.
+		 */
+		sgl += 2;
+
+		first_data_sgl = sgl;
+		lpfc_ncmd->seg_cnt = nCmd->sg_cnt;
+		if (lpfc_ncmd->seg_cnt > phba->cfg_sg_seg_cnt) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
+					"6058 Too many sg segments from "
+					"NVME Transport.  Max %d, "
+					"nvmeIO sg_cnt %d\n",
+					phba->cfg_sg_seg_cnt,
+					lpfc_ncmd->seg_cnt);
+			lpfc_ncmd->seg_cnt = 0;
+			return 1;
+		}
+
+		/*
+		 * The driver established a maximum scatter-gather segment count
+		 * during probe that limits the number of sg elements in any
+		 * single nvme command.  Just run through the seg_cnt and format
+		 * the sge's.
+		 */
+		nseg = nCmd->sg_cnt;
+		data_sg = nCmd->first_sgl;
+		for (i = 0; i < nseg; i++) {
+			if (data_sg == NULL) {
+				lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
+						"6059 dptr err %d, nseg %d\n",
+						i, nseg);
+				lpfc_ncmd->seg_cnt = 0;
+				return 1;
+			}
+			physaddr = data_sg->dma_address;
+			dma_len = data_sg->length;
+			sgl->addr_lo = cpu_to_le32(putPaddrLow(physaddr));
+			sgl->addr_hi = cpu_to_le32(putPaddrHigh(physaddr));
+			sgl->word2 = le32_to_cpu(sgl->word2);
+			if ((num_bde + 1) == nseg)
+				bf_set(lpfc_sli4_sge_last, sgl, 1);
+			else
+				bf_set(lpfc_sli4_sge_last, sgl, 0);
+			bf_set(lpfc_sli4_sge_offset, sgl, dma_offset);
+			bf_set(lpfc_sli4_sge_type, sgl, LPFC_SGE_TYPE_DATA);
+			sgl->word2 = cpu_to_le32(sgl->word2);
+			sgl->sge_len = cpu_to_le32(dma_len);
+
+			dma_offset += dma_len;
+			data_sg = sg_next(data_sg);
+			sgl++;
+		}
+	} else {
+		/* For this clause to be valid, the payload_length
+		 * and sg_cnt must zero.
+		 */
+		if (nCmd->payload_length != 0) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
+					"6063 NVME DMA Prep Err: sg_cnt %d "
+					"payload_length x%x\n",
+					nCmd->sg_cnt, nCmd->payload_length);
+			return 1;
+		}
+	}
+
+	/*
+	 * Due to difference in data length between DIF/non-DIF paths,
+	 * we need to set word 4 of WQE here
+	 */
+	wqe->fcp_iread.total_xfer_len = nCmd->payload_length;
+	return 0;
+}
+
+/**
+ * lpfc_nvme_fcp_io_submit - Issue an NVME-over-FCP IO
+ * @lpfc_pnvme: Pointer to the driver's nvme instance data
+ * @lpfc_nvme_lport: Pointer to the driver's local port data
+ * @lpfc_nvme_rport: Pointer to the rport getting the @lpfc_nvme_ereq
+ * @lpfc_nvme_fcreq: IO request from nvme fc to driver.
+ * @hw_queue_handle: Driver-returned handle in lpfc_nvme_create_queue
+ *
+ * Driver registers this routine as it io request handler.  This
+ * routine issues an fcp WQE with data from the @lpfc_nvme_fcpreq
+ * data structure to the rport
+ indicated in @lpfc_nvme_rport.
+ *
+ * Return value :
+ *   0 - Success
+ *   TODO: What are the failure codes.
+ **/
+static int
+lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport,
+			struct nvme_fc_remote_port *pnvme_rport,
+			void *hw_queue_handle,
+			struct nvmefc_fcp_req *pnvme_fcreq)
+{
+	int ret = 0;
+	struct lpfc_nvme_lport *lport;
+	struct lpfc_vport *vport;
+	struct lpfc_hba *phba;
+	struct lpfc_nodelist *ndlp;
+	struct lpfc_nvme_buf *lpfc_ncmd;
+	struct lpfc_nvme_rport *rport;
+	struct lpfc_nvme_qhandle *lpfc_queue_info;
+
+	lport = (struct lpfc_nvme_lport *)pnvme_lport->private;
+	vport = lport->vport;
+	phba = vport->phba;
+
+	rport = (struct lpfc_nvme_rport *)pnvme_rport->private;
+	lpfc_queue_info = (struct lpfc_nvme_qhandle *)hw_queue_handle;
+
+	/*
+	 * Catch race where our node has transitioned, but the
+	 * transport is still transitioning.
+	 */
+	ndlp = rport->ndlp;
+	if (!ndlp || !NLP_CHK_NODE_ACT(ndlp)) {
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_NODE | LOG_NVME_IOERR,
+				 "6053 rport %p, ndlp %p, DID x%06x "
+				 "ndlp not ready.\n",
+				 rport, ndlp, pnvme_rport->port_id);
+
+		ndlp = lpfc_findnode_did(vport, pnvme_rport->port_id);
+		if (!ndlp) {
+			lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_IOERR,
+					 "6066 Missing node for DID %x\n",
+					 pnvme_rport->port_id);
+			ret = -ENODEV;
+			goto out_fail;
+		}
+	}
+
+	/* The remote node has to be a mapped target or it's an error. */
+	if ((ndlp->nlp_type & NLP_NVME_TARGET) &&
+	    (ndlp->nlp_state != NLP_STE_MAPPED_NODE)) {
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_NODE | LOG_NVME_IOERR,
+				 "6036 rport %p, DID x%06x not ready for "
+				 "IO. State x%x, Type x%x\n",
+				 rport, pnvme_rport->port_id,
+				 ndlp->nlp_state, ndlp->nlp_type);
+		ret = -ENODEV;
+		goto out_fail;
+
+	}
+
+	/* The node is shared with FCP IO, make sure the IO pending count does
+	 * not exceed the programmed depth.
+	 */
+	if (atomic_read(&ndlp->cmd_pending) >= ndlp->cmd_qdepth) {
+		ret = -EAGAIN;
+		goto out_fail;
+	}
+
+	lpfc_ncmd = lpfc_get_nvme_buf(phba, ndlp);
+	if (lpfc_ncmd == NULL) {
+		lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_IOERR,
+				 "6065 driver's buffer pool is empty, "
+				 "IO failed\n");
+		ret = -ENOMEM;
+		goto out_fail;
+	}
+
+	/*
+	 * Store the data needed by the driver to issue, abort, and complete
+	 * an IO.
+	 * Do not let the IO hang out forever.  There is no midlayer issuing
+	 * an abort so inform the FW of the maximum IO pending time.
+	 */
+	pnvme_fcreq->private = (void *)lpfc_ncmd;
+	lpfc_ncmd->nvmeCmd = pnvme_fcreq;
+	lpfc_ncmd->nrport = rport;
+	lpfc_ncmd->start_time = jiffies;
+
+	lpfc_nvme_prep_io_cmd(vport, lpfc_ncmd, ndlp);
+	ret = lpfc_nvme_prep_io_dma(vport, lpfc_ncmd);
+	if (ret) {
+		ret = -ENOMEM;
+		goto out_free_nvme_buf;
+	}
+
+	atomic_inc(&ndlp->cmd_pending);
+
+	/*
+	 * Issue the IO on the WQ indicated by index in the hw_queue_handle.
+	 * This identfier was create in our hardware queue create callback
+	 * routine. The driver now is dependent on the IO queue steering from
+	 * the transport.  We are trusting the upper NVME layers know which
+	 * index to use and that they have affinitized a CPU to this hardware
+	 * queue. A hardware queue maps to a driver MSI-X vector/EQ/CQ/WQ.
+	 */
+	lpfc_ncmd->cur_iocbq.hba_wqidx = lpfc_queue_info->index;
+
+	ret = lpfc_sli4_issue_wqe(phba, LPFC_FCP_RING, &lpfc_ncmd->cur_iocbq);
+	if (ret) {
+		atomic_dec(&ndlp->cmd_pending);
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_IOERR,
+				 "6113 FCP could not issue WQE err %x "
+				 "sid: x%x did: x%x oxid: x%x\n",
+				 ret, vport->fc_myDID, ndlp->nlp_DID,
+				 lpfc_ncmd->cur_iocbq.sli4_xritag);
+		ret = -EINVAL;
+		goto out_free_nvme_buf;
+	}
+
+	return 0;
+
+ out_free_nvme_buf:
+	lpfc_release_nvme_buf(phba, lpfc_ncmd);
+ out_fail:
+	return ret;
+}
+
+/**
+ * lpfc_nvme_abort_fcreq_cmpl - Complete an NVME FCP abort request.
+ * @phba: Pointer to HBA context object
+ * @cmdiocb: Pointer to command iocb object.
+ * @rspiocb: Pointer to response iocb object.
+ *
+ * This is the callback function for any NVME FCP IO that was aborted.
+ *
+ * Return value:
+ *   None
+ **/
+void
+lpfc_nvme_abort_fcreq_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
+			   struct lpfc_wcqe_complete *abts_cmpl)
+{
+	lpfc_printf_log(phba, KERN_ERR, LOG_NVME,
+			"6145 ABORT_XRI_CN completing on rpi x%x "
+			"original iotag x%x, abort cmd iotag x%x "
+			"req_tag x%x, status x%x, hwstatus x%x\n",
+			cmdiocb->iocb.un.acxri.abortContextTag,
+			cmdiocb->iocb.un.acxri.abortIoTag,
+			cmdiocb->iotag,
+			bf_get(lpfc_wcqe_c_request_tag, abts_cmpl),
+			bf_get(lpfc_wcqe_c_status, abts_cmpl),
+			bf_get(lpfc_wcqe_c_hw_status, abts_cmpl));
+	lpfc_sli_release_iocbq(phba, cmdiocb);
+}
+
+/**
+ * lpfc_nvme_fcp_abort - Issue an NVME-over-FCP ABTS
+ * @lpfc_pnvme: Pointer to the driver's nvme instance data
+ * @lpfc_nvme_lport: Pointer to the driver's local port data
+ * @lpfc_nvme_rport: Pointer to the rport getting the @lpfc_nvme_ereq
+ * @lpfc_nvme_fcreq: IO request from nvme fc to driver.
+ * @hw_queue_handle: Driver-returned handle in lpfc_nvme_create_queue
+ *
+ * Driver registers this routine as its nvme request io abort handler.  This
+ * routine issues an fcp Abort WQE with data from the @lpfc_nvme_fcpreq
+ * data structure to the rport indicated in @lpfc_nvme_rport.  This routine
+ * is executed asynchronously - one the target is validated as "MAPPED" and
+ * ready for IO, the driver issues the abort request and returns.
+ *
+ * Return value:
+ *   None
+ **/
+static void
+lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport,
+		    struct nvme_fc_remote_port *pnvme_rport,
+		    void *hw_queue_handle,
+		    struct nvmefc_fcp_req *pnvme_fcreq)
+{
+	struct lpfc_nvme_lport *lport;
+	struct lpfc_vport *vport;
+	struct lpfc_hba *phba;
+	struct lpfc_nodelist *ndlp;
+	struct lpfc_nvme_rport *rport;
+	struct lpfc_nvme_buf *lpfc_nbuf;
+	struct lpfc_iocbq *abts_buf;
+	struct lpfc_iocbq *nvmereq_wqe;
+	union lpfc_wqe *abts_wqe;
+	unsigned long flags;
+	int ret_val;
+
+	lport = (struct lpfc_nvme_lport *)pnvme_lport->private;
+	rport = (struct lpfc_nvme_rport *)pnvme_rport->private;
+	vport = lport->vport;
+	phba = vport->phba;
+
+	/* Announce entry to new IO submit field. */
+	lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS,
+			 "6002 Abort Request to rport DID x%06x "
+			 "for nvme_fc_req %p\n",
+			 pnvme_rport->port_id,
+			 pnvme_fcreq);
+
+	/*
+	 * Catch race where our node has transitioned, but the
+	 * transport is still transitioning.
+	 */
+	ndlp = rport->ndlp;
+	if (!ndlp || !NLP_CHK_NODE_ACT(ndlp)) {
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_NODE | LOG_NVME_ABTS,
+				 "6054 rport %p, ndlp %p, DID x%06x ndlp "
+				 " not ready.\n",
+				 rport, ndlp, pnvme_rport->port_id);
+
+		ndlp = lpfc_findnode_did(vport, pnvme_rport->port_id);
+		if (!ndlp) {
+			lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS,
+					 "6055 Could not find node for "
+					 "DID %x\n",
+					 pnvme_rport->port_id);
+			return;
+		}
+	}
+
+	/* The remote node has to be ready to send an abort. */
+	if ((ndlp->nlp_state != NLP_STE_MAPPED_NODE) &&
+	    !(ndlp->nlp_type & NLP_NVME_TARGET)) {
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_NODE | LOG_NVME_ABTS,
+				 "6048 rport %p, DID x%06x not ready for "
+				 "IO. State x%x, Type x%x\n",
+				 rport, pnvme_rport->port_id,
+				 ndlp->nlp_state, ndlp->nlp_type);
+		return;
+	}
+
+	/* If the hba is getting reset, this flag is set.  It is
+	 * cleared when the reset is complete and rings reestablished.
+	 */
+	spin_lock_irqsave(&phba->hbalock, flags);
+	/* driver queued commands are in process of being flushed */
+	if (phba->hba_flag & HBA_NVME_IOQ_FLUSH) {
+		spin_unlock_irqrestore(&phba->hbalock, flags);
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME,
+				 "6139 Driver in reset cleanup - flushing "
+				 "NVME Req now.  hba_flag x%x\n",
+				 phba->hba_flag);
+		return;
+	}
+
+	lpfc_nbuf = (struct lpfc_nvme_buf *)pnvme_fcreq->private;
+	if (!lpfc_nbuf) {
+		spin_unlock_irqrestore(&phba->hbalock, flags);
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME,
+				 "6140 NVME IO req has no matching lpfc nvme "
+				 "io buffer.  Skipping abort req.\n");
+		return;
+	} else if (!lpfc_nbuf->nvmeCmd) {
+		spin_unlock_irqrestore(&phba->hbalock, flags);
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME,
+				 "6141 lpfc NVME IO req has no nvme_fcreq "
+				 "io buffer.  Skipping abort req.\n");
+		return;
+	}
+
+	/*
+	 * The lpfc_nbuf and the mapped nvme_fcreq in the driver's
+	 * state must match the nvme_fcreq passed by the nvme
+	 * transport.  If they don't match, it is likely the driver
+	 * has already completed the NVME IO and the nvme transport
+	 * has not seen it yet.
+	 */
+	if (lpfc_nbuf->nvmeCmd != pnvme_fcreq) {
+		spin_unlock_irqrestore(&phba->hbalock, flags);
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME,
+				 "6143 NVME req mismatch: "
+				 "lpfc_nbuf %p nvmeCmd %p, "
+				 "pnvme_fcreq %p.  Skipping Abort\n",
+				 lpfc_nbuf, lpfc_nbuf->nvmeCmd,
+				 pnvme_fcreq);
+		return;
+	}
+
+	/* Don't abort IOs no longer on the pending queue. */
+	nvmereq_wqe = &lpfc_nbuf->cur_iocbq;
+	if (!(nvmereq_wqe->iocb_flag & LPFC_IO_ON_TXCMPLQ)) {
+		spin_unlock_irqrestore(&phba->hbalock, flags);
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME,
+				 "6142 NVME IO req %p not queued - skipping "
+				 "abort req\n",
+				 pnvme_fcreq);
+		return;
+	}
+
+	/* Outstanding abort is in progress */
+	if (nvmereq_wqe->iocb_flag & LPFC_DRIVER_ABORTED) {
+		spin_unlock_irqrestore(&phba->hbalock, flags);
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME,
+				 "6144 Outstanding NVME I/O Abort Request "
+				 "still pending on nvme_fcreq %p, "
+				 "lpfc_ncmd %p\n",
+				 pnvme_fcreq, lpfc_nbuf);
+		return;
+	}
+
+	abts_buf = __lpfc_sli_get_iocbq(phba);
+	if (!abts_buf) {
+		spin_unlock_irqrestore(&phba->hbalock, flags);
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME,
+				 "6136 No available abort wqes. Skipping "
+				 "Abts req for nvme_fcreq %p.\n",
+				 pnvme_fcreq);
+		return;
+	}
+
+	/* Ready - mark outstanding as aborted by driver. */
+	nvmereq_wqe->iocb_flag |= LPFC_DRIVER_ABORTED;
+
+	/* Complete prepping the abort wqe and issue to the FW. */
+	abts_wqe = &abts_buf->wqe;
+
+	/* WQEs are reused.  Clear stale data and set key fields to
+	 * zero like ia, iaab, iaar, xri_tag, and ctxt_tag.
+	 */
+	memset(abts_wqe, 0, sizeof(union lpfc_wqe));
+	bf_set(abort_cmd_criteria, &abts_wqe->abort_cmd, T_XRI_TAG);
+
+	/* word 7 */
+	bf_set(wqe_ct, &abts_wqe->abort_cmd.wqe_com, 0);
+	bf_set(wqe_cmnd, &abts_wqe->abort_cmd.wqe_com, CMD_ABORT_XRI_CX);
+	bf_set(wqe_class, &abts_wqe->abort_cmd.wqe_com,
+	       nvmereq_wqe->iocb.ulpClass);
+
+	/* word 8 - tell the FW to abort the IO associated with this
+	 * outstanding exchange ID.
+	 */
+	abts_wqe->abort_cmd.wqe_com.abort_tag = nvmereq_wqe->sli4_xritag;
+
+	/* word 9 - this is the iotag for the abts_wqe completion. */
+	bf_set(wqe_reqtag, &abts_wqe->abort_cmd.wqe_com,
+	       abts_buf->iotag);
+
+	/* word 10 */
+	bf_set(wqe_wqid, &abts_wqe->abort_cmd.wqe_com, nvmereq_wqe->hba_wqidx);
+	bf_set(wqe_qosd, &abts_wqe->abort_cmd.wqe_com, 1);
+	bf_set(wqe_lenloc, &abts_wqe->abort_cmd.wqe_com, LPFC_WQE_LENLOC_NONE);
+
+	/* word 11 */
+	bf_set(wqe_cmd_type, &abts_wqe->abort_cmd.wqe_com, OTHER_COMMAND);
+	bf_set(wqe_wqec, &abts_wqe->abort_cmd.wqe_com, 1);
+	bf_set(wqe_cqid, &abts_wqe->abort_cmd.wqe_com, LPFC_WQE_CQ_ID_DEFAULT);
+
+	/* ABTS WQE must go to the same WQ as the WQE to be aborted */
+	abts_buf->iocb_flag |= LPFC_IO_NVME;
+	abts_buf->hba_wqidx = nvmereq_wqe->hba_wqidx;
+	abts_buf->vport = vport;
+	abts_buf->wqe_cmpl = lpfc_nvme_abort_fcreq_cmpl;
+	ret_val = lpfc_sli4_issue_wqe(phba, LPFC_FCP_RING, abts_buf);
+	spin_unlock_irqrestore(&phba->hbalock, flags);
+	if (ret_val == IOCB_ERROR) {
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME,
+				 "6137 Failed abts issue_wqe with status x%x "
+				 "for nvme_fcreq %p.\n",
+				 ret_val, pnvme_fcreq);
+		lpfc_sli_release_iocbq(phba, abts_buf);
+		return;
+	}
+
+	lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME,
+			 "6138 Transport Abort NVME Request Issued for\n"
+			 "ox_id x%x on reqtag x%x\n",
+			 nvmereq_wqe->sli4_xritag,
+			 abts_buf->iotag);
+}
+
+/* Declare and initialization an instance of the FC NVME template. */
+static struct nvme_fc_port_template lpfc_nvme_template = {
+	/* initiator-based functions */
+	.localport_delete  = lpfc_nvme_localport_delete,
+	.remoteport_delete = lpfc_nvme_remoteport_delete,
+	.create_queue = lpfc_nvme_create_queue,
+	.delete_queue = lpfc_nvme_delete_queue,
+	.ls_req       = lpfc_nvme_ls_req,
+	.fcp_io       = lpfc_nvme_fcp_io_submit,
+	.ls_abort     = lpfc_nvme_ls_abort,
+	.fcp_abort    = lpfc_nvme_fcp_abort,
+
+	.max_hw_queues = 1,
+	.max_sgl_segments = LPFC_NVME_DEFAULT_SEGS,
+	.max_dif_sgl_segments = LPFC_NVME_DEFAULT_SEGS,
+	.dma_boundary = 0xFFFFFFFF,
+
+	/* Sizes of additional private data for data structures.
+	 * No use for the last two sizes at this time.
+	 */
+	.local_priv_sz = sizeof(struct lpfc_nvme_lport),
+	.remote_priv_sz = sizeof(struct lpfc_nvme_rport),
+	.lsrqst_priv_sz = 0,
+	.fcprqst_priv_sz = 0,
+};
+
+/**
+ * lpfc_sli4_post_nvme_sgl_block - post a block of nvme sgl list to firmware
+ * @phba: pointer to lpfc hba data structure.
+ * @nblist: pointer to nvme buffer list.
+ * @count: number of scsi buffers on the list.
+ *
+ * This routine is invoked to post a block of @count scsi sgl pages from a
+ * SCSI buffer list @nblist to the HBA using non-embedded mailbox command.
+ * No Lock is held.
+ *
+ **/
+static int
+lpfc_sli4_post_nvme_sgl_block(struct lpfc_hba *phba,
+			      struct list_head *nblist,
+			      int count)
+{
+	struct lpfc_nvme_buf *lpfc_ncmd;
+	struct lpfc_mbx_post_uembed_sgl_page1 *sgl;
+	struct sgl_page_pairs *sgl_pg_pairs;
+	void *viraddr;
+	LPFC_MBOXQ_t *mbox;
+	uint32_t reqlen, alloclen, pg_pairs;
+	uint32_t mbox_tmo;
+	uint16_t xritag_start = 0;
+	int rc = 0;
+	uint32_t shdr_status, shdr_add_status;
+	dma_addr_t pdma_phys_bpl1;
+	union lpfc_sli4_cfg_shdr *shdr;
+
+	/* Calculate the requested length of the dma memory */
+	reqlen = count * sizeof(struct sgl_page_pairs) +
+		 sizeof(union lpfc_sli4_cfg_shdr) + sizeof(uint32_t);
+	if (reqlen > SLI4_PAGE_SIZE) {
+		lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
+				"6118 Block sgl registration required DMA "
+				"size (%d) great than a page\n", reqlen);
+		return -ENOMEM;
+	}
+	mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mbox) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"6119 Failed to allocate mbox cmd memory\n");
+		return -ENOMEM;
+	}
+
+	/* Allocate DMA memory and set up the non-embedded mailbox command */
+	alloclen = lpfc_sli4_config(phba, mbox, LPFC_MBOX_SUBSYSTEM_FCOE,
+				LPFC_MBOX_OPCODE_FCOE_POST_SGL_PAGES, reqlen,
+				LPFC_SLI4_MBX_NEMBED);
+
+	if (alloclen < reqlen) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"6120 Allocated DMA memory size (%d) is "
+				"less than the requested DMA memory "
+				"size (%d)\n", alloclen, reqlen);
+		lpfc_sli4_mbox_cmd_free(phba, mbox);
+		return -ENOMEM;
+	}
+
+	/* Get the first SGE entry from the non-embedded DMA memory */
+	viraddr = mbox->sge_array->addr[0];
+
+	/* Set up the SGL pages in the non-embedded DMA pages */
+	sgl = (struct lpfc_mbx_post_uembed_sgl_page1 *)viraddr;
+	sgl_pg_pairs = &sgl->sgl_pg_pairs;
+
+	pg_pairs = 0;
+	list_for_each_entry(lpfc_ncmd, nblist, list) {
+		/* Set up the sge entry */
+		sgl_pg_pairs->sgl_pg0_addr_lo =
+			cpu_to_le32(putPaddrLow(lpfc_ncmd->dma_phys_sgl));
+		sgl_pg_pairs->sgl_pg0_addr_hi =
+			cpu_to_le32(putPaddrHigh(lpfc_ncmd->dma_phys_sgl));
+		if (phba->cfg_sg_dma_buf_size > SGL_PAGE_SIZE)
+			pdma_phys_bpl1 = lpfc_ncmd->dma_phys_sgl +
+						SGL_PAGE_SIZE;
+		else
+			pdma_phys_bpl1 = 0;
+		sgl_pg_pairs->sgl_pg1_addr_lo =
+			cpu_to_le32(putPaddrLow(pdma_phys_bpl1));
+		sgl_pg_pairs->sgl_pg1_addr_hi =
+			cpu_to_le32(putPaddrHigh(pdma_phys_bpl1));
+		/* Keep the first xritag on the list */
+		if (pg_pairs == 0)
+			xritag_start = lpfc_ncmd->cur_iocbq.sli4_xritag;
+		sgl_pg_pairs++;
+		pg_pairs++;
+	}
+	bf_set(lpfc_post_sgl_pages_xri, sgl, xritag_start);
+	bf_set(lpfc_post_sgl_pages_xricnt, sgl, pg_pairs);
+	/* Perform endian conversion if necessary */
+	sgl->word0 = cpu_to_le32(sgl->word0);
+
+	if (!phba->sli4_hba.intr_enable)
+		rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL);
+	else {
+		mbox_tmo = lpfc_mbox_tmo_val(phba, mbox);
+		rc = lpfc_sli_issue_mbox_wait(phba, mbox, mbox_tmo);
+	}
+	shdr = (union lpfc_sli4_cfg_shdr *)&sgl->cfg_shdr;
+	shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
+	shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response);
+	if (rc != MBX_TIMEOUT)
+		lpfc_sli4_mbox_cmd_free(phba, mbox);
+	if (shdr_status || shdr_add_status || rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"6125 POST_SGL_BLOCK mailbox command failed "
+				"status x%x add_status x%x mbx status x%x\n",
+				shdr_status, shdr_add_status, rc);
+		rc = -ENXIO;
+	}
+	return rc;
+}
+
+/**
+ * lpfc_post_nvme_sgl_list - Post blocks of nvme buffer sgls from a list
+ * @phba: pointer to lpfc hba data structure.
+ * @post_nblist: pointer to the nvme buffer list.
+ *
+ * This routine walks a list of nvme buffers that was passed in. It attempts
+ * to construct blocks of nvme buffer sgls which contains contiguous xris and
+ * uses the non-embedded SGL block post mailbox commands to post to the port.
+ * For single NVME buffer sgl with non-contiguous xri, if any, it shall use
+ * embedded SGL post mailbox command for posting. The @post_nblist passed in
+ * must be local list, thus no lock is needed when manipulate the list.
+ *
+ * Returns: 0 = failure, non-zero number of successfully posted buffers.
+ **/
+static int
+lpfc_post_nvme_sgl_list(struct lpfc_hba *phba,
+			     struct list_head *post_nblist, int sb_count)
+{
+	struct lpfc_nvme_buf *lpfc_ncmd, *lpfc_ncmd_next;
+	int status, sgl_size;
+	int post_cnt = 0, block_cnt = 0, num_posting = 0, num_posted = 0;
+	dma_addr_t pdma_phys_sgl1;
+	int last_xritag = NO_XRI;
+	int cur_xritag;
+	LIST_HEAD(prep_nblist);
+	LIST_HEAD(blck_nblist);
+	LIST_HEAD(nvme_nblist);
+
+	/* sanity check */
+	if (sb_count <= 0)
+		return -EINVAL;
+
+	sgl_size = phba->cfg_sg_dma_buf_size;
+
+	list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next, post_nblist, list) {
+		list_del_init(&lpfc_ncmd->list);
+		block_cnt++;
+		if ((last_xritag != NO_XRI) &&
+		    (lpfc_ncmd->cur_iocbq.sli4_xritag != last_xritag + 1)) {
+			/* a hole in xri block, form a sgl posting block */
+			list_splice_init(&prep_nblist, &blck_nblist);
+			post_cnt = block_cnt - 1;
+			/* prepare list for next posting block */
+			list_add_tail(&lpfc_ncmd->list, &prep_nblist);
+			block_cnt = 1;
+		} else {
+			/* prepare list for next posting block */
+			list_add_tail(&lpfc_ncmd->list, &prep_nblist);
+			/* enough sgls for non-embed sgl mbox command */
+			if (block_cnt == LPFC_NEMBED_MBOX_SGL_CNT) {
+				list_splice_init(&prep_nblist, &blck_nblist);
+				post_cnt = block_cnt;
+				block_cnt = 0;
+			}
+		}
+		num_posting++;
+		last_xritag = lpfc_ncmd->cur_iocbq.sli4_xritag;
+
+		/* end of repost sgl list condition for NVME buffers */
+		if (num_posting == sb_count) {
+			if (post_cnt == 0) {
+				/* last sgl posting block */
+				list_splice_init(&prep_nblist, &blck_nblist);
+				post_cnt = block_cnt;
+			} else if (block_cnt == 1) {
+				/* last single sgl with non-contiguous xri */
+				if (sgl_size > SGL_PAGE_SIZE)
+					pdma_phys_sgl1 =
+						lpfc_ncmd->dma_phys_sgl +
+						SGL_PAGE_SIZE;
+				else
+					pdma_phys_sgl1 = 0;
+				cur_xritag = lpfc_ncmd->cur_iocbq.sli4_xritag;
+				status = lpfc_sli4_post_sgl(phba,
+						lpfc_ncmd->dma_phys_sgl,
+						pdma_phys_sgl1, cur_xritag);
+				if (status) {
+					/* failure, put on abort nvme list */
+					lpfc_ncmd->exch_busy = 1;
+				} else {
+					/* success, put on NVME buffer list */
+					lpfc_ncmd->exch_busy = 0;
+					lpfc_ncmd->status = IOSTAT_SUCCESS;
+					num_posted++;
+				}
+				/* success, put on NVME buffer sgl list */
+				list_add_tail(&lpfc_ncmd->list, &nvme_nblist);
+			}
+		}
+
+		/* continue until a nembed page worth of sgls */
+		if (post_cnt == 0)
+			continue;
+
+		/* post block of NVME buffer list sgls */
+		status = lpfc_sli4_post_nvme_sgl_block(phba, &blck_nblist,
+						       post_cnt);
+
+		/* don't reset xirtag due to hole in xri block */
+		if (block_cnt == 0)
+			last_xritag = NO_XRI;
+
+		/* reset NVME buffer post count for next round of posting */
+		post_cnt = 0;
+
+		/* put posted NVME buffer-sgl posted on NVME buffer sgl list */
+		while (!list_empty(&blck_nblist)) {
+			list_remove_head(&blck_nblist, lpfc_ncmd,
+					 struct lpfc_nvme_buf, list);
+			if (status) {
+				/* failure, put on abort nvme list */
+				lpfc_ncmd->exch_busy = 1;
+			} else {
+				/* success, put on NVME buffer list */
+				lpfc_ncmd->exch_busy = 0;
+				lpfc_ncmd->status = IOSTAT_SUCCESS;
+				num_posted++;
+			}
+			list_add_tail(&lpfc_ncmd->list, &nvme_nblist);
+		}
+	}
+	/* Push NVME buffers with sgl posted to the available list */
+	while (!list_empty(&nvme_nblist)) {
+		list_remove_head(&nvme_nblist, lpfc_ncmd,
+				 struct lpfc_nvme_buf, list);
+		lpfc_release_nvme_buf(phba, lpfc_ncmd);
+	}
+	return num_posted;
+}
+
+/**
+ * lpfc_repost_nvme_sgl_list - Repost all the allocated nvme buffer sgls
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine walks the list of nvme buffers that have been allocated and
+ * repost them to the port by using SGL block post. This is needed after a
+ * pci_function_reset/warm_start or start. The lpfc_hba_down_post_s4 routine
+ * is responsible for moving all nvme buffers on the lpfc_abts_nvme_sgl_list
+ * to the lpfc_nvme_buf_list. If the repost fails, reject all nvme buffers.
+ *
+ * Returns: 0 = success, non-zero failure.
+ **/
+int
+lpfc_repost_nvme_sgl_list(struct lpfc_hba *phba)
+{
+	LIST_HEAD(post_nblist);
+	int num_posted, rc = 0;
+
+	/* get all NVME buffers need to repost to a local list */
+	spin_lock_irq(&phba->nvme_buf_list_get_lock);
+	spin_lock(&phba->nvme_buf_list_put_lock);
+	list_splice_init(&phba->lpfc_nvme_buf_list_get, &post_nblist);
+	list_splice(&phba->lpfc_nvme_buf_list_put, &post_nblist);
+	spin_unlock(&phba->nvme_buf_list_put_lock);
+	spin_unlock_irq(&phba->nvme_buf_list_get_lock);
+
+	/* post the list of nvme buffer sgls to port if available */
+	if (!list_empty(&post_nblist)) {
+		num_posted = lpfc_post_nvme_sgl_list(phba, &post_nblist,
+						phba->sli4_hba.nvme_xri_cnt);
+		/* failed to post any nvme buffer, return error */
+		if (num_posted == 0)
+			rc = -EIO;
+	}
+	return rc;
+}
+
+/**
+ * lpfc_new_nvme_buf - Scsi buffer allocator for HBA with SLI4 IF spec
+ * @vport: The virtual port for which this call being executed.
+ * @num_to_allocate: The requested number of buffers to allocate.
+ *
+ * This routine allocates nvme buffers for device with SLI-4 interface spec,
+ * the nvme buffer contains all the necessary information needed to initiate
+ * a NVME I/O. After allocating up to @num_to_allocate NVME buffers and put
+ * them on a list, it post them to the port by using SGL block post.
+ *
+ * Return codes:
+ *   int - number of nvme buffers that were allocated and posted.
+ *   0 = failure, less than num_to_alloc is a partial failure.
+ **/
+static int
+lpfc_new_nvme_buf(struct lpfc_vport *vport, int num_to_alloc)
+{
+	struct lpfc_hba *phba = vport->phba;
+	struct lpfc_nvme_buf *lpfc_ncmd;
+	struct lpfc_iocbq *pwqeq;
+	union lpfc_wqe128 *wqe;
+	struct sli4_sge *sgl;
+	dma_addr_t pdma_phys_sgl;
+	uint16_t iotag, lxri = 0;
+	int bcnt, num_posted, sgl_size;
+	LIST_HEAD(prep_nblist);
+	LIST_HEAD(post_nblist);
+	LIST_HEAD(nvme_nblist);
+
+	sgl_size = phba->cfg_sg_dma_buf_size;
+
+	for (bcnt = 0; bcnt < num_to_alloc; bcnt++) {
+		lpfc_ncmd = kzalloc(sizeof(struct lpfc_nvme_buf), GFP_KERNEL);
+		if (!lpfc_ncmd)
+			break;
+		/*
+		 * Get memory from the pci pool to map the virt space to
+		 * pci bus space for an I/O. The DMA buffer includes the
+		 * number of SGE's necessary to support the sg_tablesize.
+		 */
+		lpfc_ncmd->data = pci_pool_alloc(phba->lpfc_sg_dma_buf_pool,
+						 GFP_KERNEL,
+						 &lpfc_ncmd->dma_handle);
+		if (!lpfc_ncmd->data) {
+			kfree(lpfc_ncmd);
+			break;
+		}
+		memset(lpfc_ncmd->data, 0, phba->cfg_sg_dma_buf_size);
+
+		lxri = lpfc_sli4_next_xritag(phba);
+		if (lxri == NO_XRI) {
+			pci_pool_free(phba->lpfc_sg_dma_buf_pool,
+				      lpfc_ncmd->data, lpfc_ncmd->dma_handle);
+			kfree(lpfc_ncmd);
+			break;
+		}
+		pwqeq = &(lpfc_ncmd->cur_iocbq);
+		wqe = (union lpfc_wqe128 *)&pwqeq->wqe;
+
+		/* Allocate iotag for lpfc_ncmd->cur_iocbq. */
+		iotag = lpfc_sli_next_iotag(phba, pwqeq);
+		if (iotag == 0) {
+			pci_pool_free(phba->lpfc_sg_dma_buf_pool,
+				      lpfc_ncmd->data, lpfc_ncmd->dma_handle);
+			kfree(lpfc_ncmd);
+			lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
+					"6121 Failed to allocated IOTAG for"
+					" XRI:0x%x\n", lxri);
+			lpfc_sli4_free_xri(phba, lxri);
+			break;
+		}
+		pwqeq->sli4_lxritag = lxri;
+		pwqeq->sli4_xritag = phba->sli4_hba.xri_ids[lxri];
+		pwqeq->iocb_flag |= LPFC_IO_NVME;
+		pwqeq->context1 = lpfc_ncmd;
+		pwqeq->wqe_cmpl = lpfc_nvme_io_cmd_wqe_cmpl;
+
+		/* Initialize local short-hand pointers. */
+		lpfc_ncmd->nvme_sgl = lpfc_ncmd->data;
+		sgl = lpfc_ncmd->nvme_sgl;
+		pdma_phys_sgl = lpfc_ncmd->dma_handle;
+		lpfc_ncmd->dma_phys_sgl = pdma_phys_sgl;
+
+		/* Rsp SGE will be filled in when we rcv an IO
+		 * from the NVME Layer to be sent.
+		 * The cmd is going to be embedded so we need a SKIP SGE.
+		 */
+		bf_set(lpfc_sli4_sge_type, sgl, LPFC_SGE_TYPE_SKIP);
+		bf_set(lpfc_sli4_sge_last, sgl, 0);
+		sgl->word2 = cpu_to_le32(sgl->word2);
+		/* Fill in word 3 / sgl_len during cmd submission */
+
+		lpfc_ncmd->cur_iocbq.context1 = lpfc_ncmd;
+
+		/* Word 7 */
+		bf_set(wqe_erp, &wqe->generic.wqe_com, 0);
+		/* NVME upper layers will time things out, if needed */
+		bf_set(wqe_tmo, &wqe->generic.wqe_com, 0);
+
+		/* Word 10 */
+		bf_set(wqe_ebde_cnt, &wqe->generic.wqe_com, 0);
+		bf_set(wqe_dbde, &wqe->generic.wqe_com, 1);
+
+		/* add the nvme buffer to a post list */
+		list_add_tail(&lpfc_ncmd->list, &post_nblist);
+		spin_lock_irq(&phba->nvme_buf_list_get_lock);
+		phba->sli4_hba.nvme_xri_cnt++;
+		spin_unlock_irq(&phba->nvme_buf_list_get_lock);
+	}
+	lpfc_printf_log(phba, KERN_INFO, LOG_NVME,
+			"6114 Allocate %d out of %d requested new NVME "
+			"buffers\n", bcnt, num_to_alloc);
+
+	/* post the list of nvme buffer sgls to port if available */
+	if (!list_empty(&post_nblist))
+		num_posted = lpfc_post_nvme_sgl_list(phba,
+						     &post_nblist, bcnt);
+	else
+		num_posted = 0;
+
+	return num_posted;
+}
+
+/**
+ * lpfc_get_nvme_buf - Get a nvme buffer from lpfc_nvme_buf_list of the HBA
+ * @phba: The HBA for which this call is being executed.
+ *
+ * This routine removes a nvme buffer from head of @phba lpfc_nvme_buf_list list
+ * and returns to caller.
+ *
+ * Return codes:
+ *   NULL - Error
+ *   Pointer to lpfc_nvme_buf - Success
+ **/
+static struct lpfc_nvme_buf *
+lpfc_get_nvme_buf(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp)
+{
+	struct lpfc_nvme_buf *lpfc_ncmd, *lpfc_ncmd_next;
+	unsigned long iflag = 0;
+	int found = 0;
+
+	spin_lock_irqsave(&phba->nvme_buf_list_get_lock, iflag);
+	list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next,
+				 &phba->lpfc_nvme_buf_list_get, list) {
+		if (lpfc_test_rrq_active(phba, ndlp,
+					 lpfc_ncmd->cur_iocbq.sli4_lxritag))
+			continue;
+		list_del(&lpfc_ncmd->list);
+		found = 1;
+		break;
+	}
+	if (!found) {
+		spin_lock(&phba->nvme_buf_list_put_lock);
+		list_splice(&phba->lpfc_nvme_buf_list_put,
+			    &phba->lpfc_nvme_buf_list_get);
+		INIT_LIST_HEAD(&phba->lpfc_nvme_buf_list_put);
+		spin_unlock(&phba->nvme_buf_list_put_lock);
+		list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next,
+					 &phba->lpfc_nvme_buf_list_get, list) {
+			if (lpfc_test_rrq_active(
+				phba, ndlp, lpfc_ncmd->cur_iocbq.sli4_lxritag))
+				continue;
+			list_del(&lpfc_ncmd->list);
+			found = 1;
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&phba->nvme_buf_list_get_lock, iflag);
+	if (!found)
+		return NULL;
+	return  lpfc_ncmd;
+}
+
+/**
+ * lpfc_release_nvme_buf: Return a nvme buffer back to hba nvme buf list.
+ * @phba: The Hba for which this call is being executed.
+ * @lpfc_ncmd: The nvme buffer which is being released.
+ *
+ * This routine releases @lpfc_ncmd nvme buffer by adding it to tail of @phba
+ * lpfc_nvme_buf_list list. For SLI4 XRI's are tied to the nvme buffer
+ * and cannot be reused for at least RA_TOV amount of time if it was
+ * aborted.
+ **/
+static void
+lpfc_release_nvme_buf(struct lpfc_hba *phba, struct lpfc_nvme_buf *lpfc_ncmd)
+{
+	unsigned long iflag = 0;
+
+	lpfc_ncmd->nonsg_phys = 0;
+	if (lpfc_ncmd->exch_busy) {
+		spin_lock_irqsave(&phba->sli4_hba.abts_nvme_buf_list_lock,
+					iflag);
+		lpfc_ncmd->nvmeCmd = NULL;
+		list_add_tail(&lpfc_ncmd->list,
+			&phba->sli4_hba.lpfc_abts_nvme_buf_list);
+		spin_unlock_irqrestore(&phba->sli4_hba.abts_nvme_buf_list_lock,
+					iflag);
+	} else {
+		lpfc_ncmd->nvmeCmd = NULL;
+		lpfc_ncmd->cur_iocbq.iocb_flag = LPFC_IO_NVME;
+		spin_lock_irqsave(&phba->nvme_buf_list_put_lock, iflag);
+		list_add_tail(&lpfc_ncmd->list, &phba->lpfc_nvme_buf_list_put);
+		spin_unlock_irqrestore(&phba->nvme_buf_list_put_lock, iflag);
+	}
+}
+
+/**
+ * lpfc_nvme_create_localport - Create/Bind an nvme localport instance.
+ * @pvport - the lpfc_vport instance requesting a localport.
+ *
+ * This routine is invoked to create an nvme localport instance to bind
+ * to the nvme_fc_transport.  It is called once during driver load
+ * like lpfc_create_shost after all other services are initialized.
+ * It requires a vport, vpi, and wwns at call time.  Other localport
+ * parameters are modified as the driver's FCID and the Fabric WWN
+ * are established.
+ *
+ * Return codes
+ *      0 - successful
+ *      -ENOMEM - no heap memory available
+ *      other values - from nvme registration upcall
+ **/
+int
+lpfc_nvme_create_localport(struct lpfc_vport *vport)
+{
+	struct lpfc_hba  *phba = vport->phba;
+	struct nvme_fc_port_info nfcp_info;
+	struct nvme_fc_local_port *localport;
+	struct lpfc_nvme_lport *lport;
+	int len, ret = 0;
+
+	/* Initialize this localport instance.  The vport wwn usage ensures
+	 * that NPIV is accounted for.
+	 */
+	memset(&nfcp_info, 0, sizeof(struct nvme_fc_port_info));
+	nfcp_info.port_role = FC_PORT_ROLE_NVME_INITIATOR;
+	nfcp_info.node_name = wwn_to_u64(vport->fc_nodename.u.wwn);
+	nfcp_info.port_name = wwn_to_u64(vport->fc_portname.u.wwn);
+
+	/* For now need + 1 to get around NVME transport logic */
+	lpfc_nvme_template.max_sgl_segments = phba->cfg_sg_seg_cnt + 1;
+	lpfc_nvme_template.max_hw_queues = phba->cfg_nvme_io_channel;
+
+	/* localport is allocated from the stack, but the registration
+	 * call allocates heap memory as well as the private area.
+	 */
+	ret = nvme_fc_register_localport(&nfcp_info, &lpfc_nvme_template,
+					 &vport->phba->pcidev->dev, &localport);
+	if (!ret) {
+		lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME | LOG_NVME_DISC,
+				 "6005 Successfully registered local "
+				 "NVME port num %d, localP %p, private %p, "
+				 "sg_seg %d\n",
+				 localport->port_num, localport,
+				 localport->private,
+				 lpfc_nvme_template.max_sgl_segments);
+
+		/* Private is our lport size declared in the template. */
+		lport = (struct lpfc_nvme_lport *)localport->private;
+		vport->localport = localport;
+		lport->vport = vport;
+		INIT_LIST_HEAD(&lport->rport_list);
+		vport->nvmei_support = 1;
+	}
+
+	len  = lpfc_new_nvme_buf(vport, phba->sli4_hba.nvme_xri_max);
+	vport->phba->total_nvme_bufs += len;
+	return ret;
+}
+
+/**
+ * lpfc_nvme_destroy_localport - Destroy lpfc_nvme bound to nvme transport.
+ * @pnvme: pointer to lpfc nvme data structure.
+ *
+ * This routine is invoked to destroy all lports bound to the phba.
+ * The lport memory was allocated by the nvme fc transport and is
+ * released there.  This routine ensures all rports bound to the
+ * lport have been disconnected.
+ *
+ **/
+void
+lpfc_nvme_destroy_localport(struct lpfc_vport *vport)
+{
+	struct nvme_fc_local_port *localport;
+	struct lpfc_nvme_lport *lport;
+	struct lpfc_nvme_rport *rport = NULL, *rport_next = NULL;
+	int ret;
+
+	if (vport->nvmei_support == 0)
+		return;
+
+	localport = vport->localport;
+	vport->localport = NULL;
+	lport = (struct lpfc_nvme_lport *)localport->private;
+
+	lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME,
+			 "6011 Destroying NVME localport %p\n",
+			 localport);
+
+	list_for_each_entry_safe(rport, rport_next, &lport->rport_list, list) {
+		/* The last node ref has to get released now before the rport
+		 * private memory area is released by the transport.
+		 */
+		list_del(&rport->list);
+
+		init_completion(&rport->rport_unreg_done);
+		ret = nvme_fc_unregister_remoteport(rport->remoteport);
+		if (ret)
+			lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC,
+					 "6008 rport fail destroy %x\n", ret);
+		wait_for_completion_timeout(&rport->rport_unreg_done, 5);
+	}
+	/* lport's rport list is clear.  Unregister
+	 * lport and release resources.
+	 */
+	init_completion(&lport->lport_unreg_done);
+	ret = nvme_fc_unregister_localport(localport);
+	wait_for_completion_timeout(&lport->lport_unreg_done, 5);
+
+	/* Regardless of the unregister upcall response, clear
+	 * nvmei_support.  All rports are unregistered and the
+	 * driver will clean up.
+	 */
+	vport->nvmei_support = 0;
+	if (ret == 0) {
+		lpfc_printf_vlog(vport,
+				 KERN_INFO, LOG_NVME_DISC,
+				 "6009 Unregistered lport Success\n");
+	} else {
+		lpfc_printf_vlog(vport,
+				 KERN_INFO, LOG_NVME_DISC,
+				 "6010 Unregistered lport "
+				 "Failed, status x%x\n",
+				 ret);
+	}
+}
+
+void
+lpfc_nvme_update_localport(struct lpfc_vport *vport)
+{
+	struct nvme_fc_local_port *localport;
+	struct lpfc_nvme_lport *lport;
+
+	localport = vport->localport;
+	lport = (struct lpfc_nvme_lport *)localport->private;
+
+	lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME,
+			 "6012 Update NVME lport %p did x%x\n",
+			 localport, vport->fc_myDID);
+
+	localport->port_id = vport->fc_myDID;
+	if (localport->port_id == 0)
+		localport->port_role = FC_PORT_ROLE_NVME_DISCOVERY;
+	else
+		localport->port_role = FC_PORT_ROLE_NVME_INITIATOR;
+
+	lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC,
+			 "6030 bound lport %p to DID x%06x\n",
+			 lport, localport->port_id);
+
+}
+
+int
+lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
+{
+	int ret = 0;
+	struct nvme_fc_local_port *localport;
+	struct lpfc_nvme_lport *lport;
+	struct lpfc_nvme_rport *rport;
+	struct nvme_fc_remote_port *remote_port;
+	struct nvme_fc_port_info rpinfo;
+
+	lpfc_printf_vlog(ndlp->vport, KERN_INFO, LOG_NVME_DISC,
+			 "6006 Register NVME PORT. DID x%06x nlptype x%x\n",
+			 ndlp->nlp_DID, ndlp->nlp_type);
+
+	localport = vport->localport;
+	lport = (struct lpfc_nvme_lport *)localport->private;
+
+	if (ndlp->nlp_type & (NLP_NVME_TARGET | NLP_NVME_INITIATOR)) {
+
+		/* The driver isn't expecting the rport wwn to change
+		 * but it might get a different DID on a different
+		 * fabric.
+		 */
+		list_for_each_entry(rport, &lport->rport_list, list) {
+			if (rport->remoteport->port_name !=
+			    wwn_to_u64(ndlp->nlp_portname.u.wwn))
+				continue;
+			lpfc_printf_vlog(ndlp->vport, KERN_INFO, LOG_NVME_DISC,
+					 "6035 lport %p, found matching rport "
+					 "at wwpn 0x%llx, Data: x%x x%x x%x "
+					 "x%06x\n",
+					 lport,
+					 rport->remoteport->port_name,
+					 rport->remoteport->port_id,
+					 rport->remoteport->port_role,
+					 ndlp->nlp_type,
+					 ndlp->nlp_DID);
+			remote_port = rport->remoteport;
+			if ((remote_port->port_id == 0) &&
+			    (remote_port->port_role ==
+			     FC_PORT_ROLE_NVME_DISCOVERY)) {
+				remote_port->port_id = ndlp->nlp_DID;
+				remote_port->port_role &=
+					~FC_PORT_ROLE_NVME_DISCOVERY;
+				if (ndlp->nlp_type & NLP_NVME_TARGET)
+					remote_port->port_role |=
+						FC_PORT_ROLE_NVME_TARGET;
+				if (ndlp->nlp_type & NLP_NVME_INITIATOR)
+					remote_port->port_role |=
+						FC_PORT_ROLE_NVME_INITIATOR;
+
+				lpfc_printf_vlog(ndlp->vport, KERN_INFO,
+						 LOG_NVME_DISC,
+						 "6014 Rebinding lport to "
+						 "rport wwpn 0x%llx, "
+						 "Data: x%x x%x x%x x%06x\n",
+						 remote_port->port_name,
+						 remote_port->port_id,
+						 remote_port->port_role,
+						 ndlp->nlp_type,
+						 ndlp->nlp_DID);
+			}
+			return 0;
+		}
+
+		/* NVME rports are not preserved across devloss.
+		 * Just register this instance.
+		 */
+		rpinfo.port_id = ndlp->nlp_DID;
+		rpinfo.port_role = 0;
+		if (ndlp->nlp_type & NLP_NVME_TARGET)
+			rpinfo.port_role |= FC_PORT_ROLE_NVME_TARGET;
+		if (ndlp->nlp_type & NLP_NVME_INITIATOR)
+			rpinfo.port_role |= FC_PORT_ROLE_NVME_INITIATOR;
+		rpinfo.port_name = wwn_to_u64(ndlp->nlp_portname.u.wwn);
+		rpinfo.node_name = wwn_to_u64(ndlp->nlp_nodename.u.wwn);
+
+		ret = nvme_fc_register_remoteport(localport, &rpinfo,
+						  &remote_port);
+		if (!ret) {
+			rport = remote_port->private;
+			rport->remoteport = remote_port;
+			rport->lport = lport;
+			rport->ndlp = lpfc_nlp_get(ndlp);
+			if (!rport->ndlp)
+				return -1;
+			ndlp->nrport = rport;
+			INIT_LIST_HEAD(&rport->list);
+			list_add_tail(&rport->list, &lport->rport_list);
+			lpfc_printf_vlog(vport, KERN_INFO,
+					 LOG_NVME_DISC | LOG_NODE,
+					 "6022 Binding new rport to lport %p "
+					 "Rport WWNN 0x%llx, Rport WWPN 0x%llx "
+					 "DID x%06x Role x%x\n",
+					 lport,
+					 rpinfo.node_name, rpinfo.port_name,
+					 rpinfo.port_id, rpinfo.port_role);
+		} else {
+			lpfc_printf_vlog(vport, KERN_ERR,
+					 LOG_NVME_DISC | LOG_NODE,
+					 "6031 RemotePort Registration failed "
+					 "err: %d, DID x%06x\n",
+					 ret, ndlp->nlp_DID);
+		}
+	} else {
+		ret = -EINVAL;
+		lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC,
+				 "6027 Unknown nlp_type x%x on DID x%06x "
+				 "ndlp %p.  Not Registering nvme rport\n",
+				 ndlp->nlp_type, ndlp->nlp_DID, ndlp);
+	}
+	return ret;
+}
+
+/* lpfc_nvme_unregister_port - unbind the DID and port_role from this rport.
+ *
+ * There is no notion of Devloss or rport recovery from the current
+ * nvme_transport perspective.  Loss of an rport just means IO cannot
+ * be sent and recovery is completely up to the initator.
+ * For now, the driver just unbinds the DID and port_role so that
+ * no further IO can be issued.  Changes are planned for later.
+ *
+ * Notes - the ndlp reference count is not decremented here since
+ * since there is no nvme_transport api for devloss.  Node ref count
+ * is only adjusted in driver unload.
+ */
+void
+lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
+{
+	int ret;
+	struct nvme_fc_local_port *localport;
+	struct lpfc_nvme_lport *lport;
+	struct lpfc_nvme_rport *rport;
+	struct nvme_fc_remote_port *remoteport;
+
+	localport = vport->localport;
+
+	/* This is fundamental error.  The localport is always
+	 * available until driver unload.  Just exit.
+	 */
+	if (!localport)
+		return;
+
+	lport = (struct lpfc_nvme_lport *)localport->private;
+	if (!lport)
+		goto input_err;
+
+	rport = ndlp->nrport;
+	if (!rport)
+		goto input_err;
+
+	remoteport = rport->remoteport;
+	lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC,
+			 "6033 Unreg nvme remoteport %p, portname x%llx, "
+			 "port_id x%06x, portstate x%x port type x%x\n",
+			 remoteport, remoteport->port_name,
+			 remoteport->port_id, remoteport->port_state,
+			 ndlp->nlp_type);
+
+	/* Sanity check ndlp type.  Only call for NVME ports. Don't
+	 * clear any rport state until the transport calls back.
+	 */
+	if (ndlp->nlp_type & (NLP_NVME_TARGET | NLP_NVME_INITIATOR)) {
+		init_completion(&rport->rport_unreg_done);
+		ret = nvme_fc_unregister_remoteport(remoteport);
+		if (ret != 0) {
+			lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC,
+					 "6167 NVME unregister failed %d "
+					 "port_state x%x\n",
+					 ret, remoteport->port_state);
+		}
+
+		/* Wait for the driver's delete completion routine to finish
+		 * before proceeding.  This guarantees the transport and driver
+		 * have completed the unreg process.
+		 */
+		ret = wait_for_completion_timeout(&rport->rport_unreg_done, 5);
+		if (ret == 0) {
+			lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC,
+					 "6169 Unreg nvme wait failed %d\n",
+					 ret);
+		}
+	}
+	return;
+
+ input_err:
+	lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC,
+			 "6168: State error: lport %p, rport%p FCID x%06x\n",
+			 vport->localport, ndlp->rport, ndlp->nlp_DID);
+}
diff --git a/drivers/scsi/lpfc/lpfc_nvme.h b/drivers/scsi/lpfc/lpfc_nvme.h
index 2b167933fd2602..4cc51086a5f703 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.h
+++ b/drivers/scsi/lpfc/lpfc_nvme.h
@@ -29,6 +29,12 @@
 
 #define LPFC_NVME_ERSP_LEN		0x20
 
+struct lpfc_nvme_qhandle {
+	uint32_t index;		/* WQ index to use */
+	uint32_t qidx;		/* queue index passed to create */
+	uint32_t cpu_id;	/* current cpu id at time of create */
+};
+
 /* Declare nvme-based local and remote port definitions. */
 struct lpfc_nvme_lport {
 	struct lpfc_vport *vport;
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 52fa5c77f3bcfc..ccda1f73ae848c 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -6795,6 +6795,22 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
 		}
 	}
 
+	if ((phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) &&
+	    (phba->nvmet_support == 0)) {
+
+		/* register the allocated nvme sgl pool to the port */
+		rc = lpfc_repost_nvme_sgl_list(phba);
+		if (unlikely(rc)) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
+					"6116 Error %d during nvme sgl post "
+					"operation\n", rc);
+			/* Some NVME buffers were moved to abort nvme list */
+			/* A pci function reset will repost them */
+			rc = -ENODEV;
+			goto out_destroy_queue;
+		}
+	}
+
 	/* Post the rpi header region to the device. */
 	rc = lpfc_sli4_post_all_rpi_hdrs(phba);
 	if (unlikely(rc)) {
@@ -10492,10 +10508,7 @@ lpfc_sli4_abort_nvme_io(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 	/* ABTS WQE must go to the same WQ as the WQE to be aborted */
 	abtsiocbp->iocb_flag |= LPFC_IO_NVME;
 	abtsiocbp->vport = vport;
-	/* todo: assign wqe_cmpl to lpfc_nvme_abort_fcreq_cmpl
-	 * subsequent patch will add routine. For now, just skip assignment
-	 * as won't ever be called.
-	 */
+	abtsiocbp->wqe_cmpl = lpfc_nvme_abort_fcreq_cmpl;
 	retval = lpfc_sli4_issue_wqe(phba, LPFC_FCP_RING, abtsiocbp);
 	if (retval == IOCB_ERROR) {
 		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME,
diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h
index b5b4f6b843c0b3..9cb8508b4b4b6b 100644
--- a/drivers/scsi/lpfc/lpfc_sli4.h
+++ b/drivers/scsi/lpfc/lpfc_sli4.h
@@ -753,6 +753,7 @@ int lpfc_sli4_queue_setup(struct lpfc_hba *);
 void lpfc_sli4_queue_unset(struct lpfc_hba *);
 int lpfc_sli4_post_sgl(struct lpfc_hba *, dma_addr_t, dma_addr_t, uint16_t);
 int lpfc_sli4_repost_scsi_sgl_list(struct lpfc_hba *);
+int lpfc_repost_nvme_sgl_list(struct lpfc_hba *phba);
 uint16_t lpfc_sli4_next_xritag(struct lpfc_hba *);
 void lpfc_sli4_free_xri(struct lpfc_hba *, int);
 int lpfc_sli4_post_async_mbox(struct lpfc_hba *);
diff --git a/drivers/scsi/lpfc/lpfc_vport.c b/drivers/scsi/lpfc/lpfc_vport.c
index 07c7c5f9fdde4f..b0d94f2e5b44a7 100644
--- a/drivers/scsi/lpfc/lpfc_vport.c
+++ b/drivers/scsi/lpfc/lpfc_vport.c
@@ -403,7 +403,21 @@ lpfc_vport_create(struct fc_vport *fc_vport, bool disable)
 		vport->fdmi_port_mask = phba->pport->fdmi_port_mask;
 	}
 
-	/* todo: init: register port with nvme */
+	if ((phba->nvmet_support == 0) &&
+	    ((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
+	     (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME))) {
+		/* Create NVME binding with nvme_fc_transport. This
+		 * ensures the vport is initialized.
+		 */
+		rc = lpfc_nvme_create_localport(vport);
+		if (rc) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"6003 %s status x%x\n",
+					"NVME registration failed, ",
+					rc);
+			goto error_out;
+		}
+	}
 
 	/*
 	 * In SLI4, the vpi must be activated before it can be used

From bd2cdd5e400f5914bc30d5cfb0a0185cf51e4424 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sun, 12 Feb 2017 13:52:33 -0800
Subject: [PATCH 0154/1911] scsi: lpfc: NVME Initiator: Add debugfs support

NVME Initiator: Add debugfs support

Adds debugfs snippets to cover the new NVME initiator functionality

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc.h         |   55 ++
 drivers/scsi/lpfc/lpfc_crtn.h    |    6 +-
 drivers/scsi/lpfc/lpfc_ct.c      |   27 +-
 drivers/scsi/lpfc/lpfc_debugfs.c | 1160 +++++++++++++++++++++++++++---
 drivers/scsi/lpfc/lpfc_debugfs.h |   21 +
 drivers/scsi/lpfc/lpfc_nvme.c    |  143 ++++
 drivers/scsi/lpfc/lpfc_nvme.h    |    7 +
 drivers/scsi/lpfc/lpfc_sli.c     |    5 +
 8 files changed, 1300 insertions(+), 124 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index 9ad63a47425be5..fdb314b3282d32 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -458,6 +458,9 @@ struct lpfc_vport {
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
 	struct dentry *debug_disc_trc;
 	struct dentry *debug_nodelist;
+	struct dentry *debug_nvmestat;
+	struct dentry *debug_nvmektime;
+	struct dentry *debug_cpucheck;
 	struct dentry *vport_debugfs_root;
 	struct lpfc_debugfs_trc *disc_trc;
 	atomic_t disc_trc_cnt;
@@ -984,6 +987,12 @@ struct lpfc_hba {
 	struct dentry *debug_readApp;    /* inject read app_tag errors */
 	struct dentry *debug_readRef;    /* inject read ref_tag errors */
 
+	struct dentry *debug_nvmeio_trc;
+	struct lpfc_debugfs_nvmeio_trc *nvmeio_trc;
+	atomic_t nvmeio_trc_cnt;
+	uint32_t nvmeio_trc_size;
+	uint32_t nvmeio_trc_output_idx;
+
 	/* T10 DIF error injection */
 	uint32_t lpfc_injerr_wgrd_cnt;
 	uint32_t lpfc_injerr_wapp_cnt;
@@ -1011,6 +1020,7 @@ struct lpfc_hba {
 	struct dentry *idiag_ext_acc;
 	uint8_t lpfc_idiag_last_eq;
 #endif
+	uint16_t nvmeio_trc_on;
 
 	/* Used for deferred freeing of ELS data buffers */
 	struct list_head elsbuf;
@@ -1083,6 +1093,51 @@ struct lpfc_hba {
 #define LPFC_TRANSGRESSION_LOW_RXPOWER		0x4000
 	uint16_t sfp_alarm;
 	uint16_t sfp_warning;
+
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+#define LPFC_CHECK_CPU_CNT    32
+	uint32_t cpucheck_rcv_io[LPFC_CHECK_CPU_CNT];
+	uint32_t cpucheck_xmt_io[LPFC_CHECK_CPU_CNT];
+	uint32_t cpucheck_cmpl_io[LPFC_CHECK_CPU_CNT];
+	uint32_t cpucheck_ccmpl_io[LPFC_CHECK_CPU_CNT];
+	uint16_t cpucheck_on;
+#define LPFC_CHECK_OFF		0
+#define LPFC_CHECK_NVME_IO	1
+	uint16_t ktime_on;
+	uint64_t ktime_data_samples;
+	uint64_t ktime_status_samples;
+	uint64_t ktime_last_cmd;
+	uint64_t ktime_seg1_total;
+	uint64_t ktime_seg1_min;
+	uint64_t ktime_seg1_max;
+	uint64_t ktime_seg2_total;
+	uint64_t ktime_seg2_min;
+	uint64_t ktime_seg2_max;
+	uint64_t ktime_seg3_total;
+	uint64_t ktime_seg3_min;
+	uint64_t ktime_seg3_max;
+	uint64_t ktime_seg4_total;
+	uint64_t ktime_seg4_min;
+	uint64_t ktime_seg4_max;
+	uint64_t ktime_seg5_total;
+	uint64_t ktime_seg5_min;
+	uint64_t ktime_seg5_max;
+	uint64_t ktime_seg6_total;
+	uint64_t ktime_seg6_min;
+	uint64_t ktime_seg6_max;
+	uint64_t ktime_seg7_total;
+	uint64_t ktime_seg7_min;
+	uint64_t ktime_seg7_max;
+	uint64_t ktime_seg8_total;
+	uint64_t ktime_seg8_min;
+	uint64_t ktime_seg8_max;
+	uint64_t ktime_seg9_total;
+	uint64_t ktime_seg9_min;
+	uint64_t ktime_seg9_max;
+	uint64_t ktime_seg10_total;
+	uint64_t ktime_seg10_min;
+	uint64_t ktime_seg10_max;
+#endif
 };
 
 static inline struct Scsi_Host *
diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h
index a9c8a0a41b1644..4a0db38959936c 100644
--- a/drivers/scsi/lpfc/lpfc_crtn.h
+++ b/drivers/scsi/lpfc/lpfc_crtn.h
@@ -393,9 +393,11 @@ void lpfc_host_attrib_init(struct Scsi_Host *);
 extern void lpfc_debugfs_initialize(struct lpfc_vport *);
 extern void lpfc_debugfs_terminate(struct lpfc_vport *);
 extern void lpfc_debugfs_disc_trc(struct lpfc_vport *, int, char *, uint32_t,
-	uint32_t, uint32_t);
+				  uint32_t, uint32_t);
 extern void lpfc_debugfs_slow_ring_trc(struct lpfc_hba *, char *, uint32_t,
-	uint32_t, uint32_t);
+				       uint32_t, uint32_t);
+extern void lpfc_debugfs_nvme_trc(struct lpfc_hba *phba, char *fmt,
+				uint16_t data1, uint16_t data2, uint32_t data3);
 extern struct lpfc_hbq_init *lpfc_hbq_defs[];
 
 /* SLI4 if_type 2 externs. */
diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c
index 8cfeffe312e027..2c051369857a13 100644
--- a/drivers/scsi/lpfc/lpfc_ct.c
+++ b/drivers/scsi/lpfc/lpfc_ct.c
@@ -465,6 +465,10 @@ lpfc_prep_node_fc4type(struct lpfc_vport *vport, uint32_t Did, uint8_t fc4_type)
 		ndlp = lpfc_setup_disc_node(vport, Did);
 
 		if (ndlp && NLP_CHK_NODE_ACT(ndlp)) {
+			lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_CT,
+				"Parse GID_FTrsp: did:x%x flg:x%x x%x",
+				Did, ndlp->nlp_flag, vport->fc_flag);
+
 			/* By default, the driver expects to support FCP FC4 */
 			if (fc4_type == FC_TYPE_FCP)
 				ndlp->nlp_fc4_type |= NLP_FC4_FCP;
@@ -478,16 +482,24 @@ lpfc_prep_node_fc4type(struct lpfc_vport *vport, uint32_t Did, uint8_t fc4_type)
 					 ndlp->nlp_flag, ndlp->nlp_fc4_type,
 					 vport->fc_flag,
 					 vport->fc_rscn_id_cnt);
-		} else
+		} else {
+			lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_CT,
+				"Skip1 GID_FTrsp: did:x%x flg:x%x cnt:%d",
+				Did, vport->fc_flag, vport->fc_rscn_id_cnt);
+
 			lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
 					 "0239 Skip x%06x NameServer Rsp "
 					 "Data: x%x x%x\n", Did,
 					 vport->fc_flag,
 					 vport->fc_rscn_id_cnt);
-
+		}
 	} else {
 		if (!(vport->fc_flag & FC_RSCN_MODE) ||
 		    lpfc_rscn_payload_check(vport, Did)) {
+			lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_CT,
+				"Query GID_FTrsp: did:x%x flg:x%x cnt:%d",
+				Did, vport->fc_flag, vport->fc_rscn_id_cnt);
+
 			/*
 			 * This NPortID was previously a FCP target,
 			 * Don't even bother to send GFF_ID.
@@ -509,12 +521,17 @@ lpfc_prep_node_fc4type(struct lpfc_vport *vport, uint32_t Did, uint8_t fc4_type)
 				else
 					lpfc_setup_disc_node(vport, Did);
 			}
-		} else
+		} else {
+			lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_CT,
+				"Skip2 GID_FTrsp: did:x%x flg:x%x cnt:%d",
+				Did, vport->fc_flag, vport->fc_rscn_id_cnt);
+
 			lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
 					 "0245 Skip x%06x NameServer Rsp "
 					 "Data: x%x x%x\n", Did,
 					 vport->fc_flag,
 					 vport->fc_rscn_id_cnt);
+		}
 	}
 }
 
@@ -892,6 +909,10 @@ lpfc_cmpl_ct_cmd_gft_id(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 	did = ((struct lpfc_sli_ct_request *)inp->virt)->un.gft.PortId;
 	did = be32_to_cpu(did);
 
+	lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_CT,
+			      "GFT_ID cmpl: status:x%x/x%x did:x%x",
+			      irsp->ulpStatus, irsp->un.ulpWord[4], did);
+
 	if (irsp->ulpStatus == IOSTAT_SUCCESS) {
 		/* Good status, continue checking */
 		CTrsp = (struct lpfc_sli_ct_request *)outp->virt;
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index d9bbebe8eb37df..3fcba26623eb78 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -34,6 +34,9 @@
 #include <scsi/scsi_device.h>
 #include <scsi/scsi_host.h>
 #include <scsi/scsi_transport_fc.h>
+#include <scsi/fc/fc_fs.h>
+
+#include <linux/nvme-fc-driver.h>
 
 #include "lpfc_hw4.h"
 #include "lpfc_hw.h"
@@ -41,8 +44,9 @@
 #include "lpfc_sli4.h"
 #include "lpfc_nl.h"
 #include "lpfc_disc.h"
-#include "lpfc_scsi.h"
 #include "lpfc.h"
+#include "lpfc_scsi.h"
+#include "lpfc_nvme.h"
 #include "lpfc_logmsg.h"
 #include "lpfc_crtn.h"
 #include "lpfc_vport.h"
@@ -99,6 +103,12 @@ module_param(lpfc_debugfs_max_slow_ring_trc, int, S_IRUGO);
 MODULE_PARM_DESC(lpfc_debugfs_max_slow_ring_trc,
 	"Set debugfs slow ring trace depth");
 
+/* This MUST be a power of 2 */
+static int lpfc_debugfs_max_nvmeio_trc;
+module_param(lpfc_debugfs_max_nvmeio_trc, int, 0444);
+MODULE_PARM_DESC(lpfc_debugfs_max_nvmeio_trc,
+		 "Set debugfs NVME IO trace depth");
+
 static int lpfc_debugfs_mask_disc_trc;
 module_param(lpfc_debugfs_mask_disc_trc, int, S_IRUGO);
 MODULE_PARM_DESC(lpfc_debugfs_mask_disc_trc,
@@ -535,6 +545,10 @@ lpfc_debugfs_nodelist_data(struct lpfc_vport *vport, char *buf, int size)
 	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
 	struct lpfc_nodelist *ndlp;
 	unsigned char *statep;
+	struct nvme_fc_local_port *localport;
+	struct lpfc_nvme_lport *lport;
+	struct lpfc_nvme_rport *rport;
+	struct nvme_fc_remote_port *nrport;
 
 	cnt = (LPFC_NODELIST_SIZE / LPFC_NODELIST_ENTRY_SIZE);
 
@@ -612,6 +626,358 @@ lpfc_debugfs_nodelist_data(struct lpfc_vport *vport, char *buf, int size)
 	}
 	spin_unlock_irq(shost->host_lock);
 
+	len += snprintf(buf + len, size - len,
+				"\nNVME Lport/Rport Entries ...\n");
+
+	localport = vport->localport;
+	if (!localport)
+		goto out_exit;
+
+	spin_lock_irq(shost->host_lock);
+	lport = (struct lpfc_nvme_lport *)localport->private;
+
+	/* Port state is only one of two values for now. */
+	if (localport->port_id)
+		statep = "ONLINE";
+	else
+		statep = "UNKNOWN ";
+
+	len += snprintf(buf + len, size - len,
+			"Lport DID x%06x PortState %s\n",
+			localport->port_id, statep);
+
+	len += snprintf(buf + len, size - len, "\tRport List:\n");
+	list_for_each_entry(rport, &lport->rport_list, list) {
+		/* local short-hand pointer. */
+		nrport = rport->remoteport;
+
+		/* Port state is only one of two values for now. */
+		switch (nrport->port_state) {
+		case FC_OBJSTATE_ONLINE:
+			statep = "ONLINE";
+			break;
+		case FC_OBJSTATE_UNKNOWN:
+			statep = "UNKNOWN ";
+			break;
+		default:
+			statep = "UNSUPPORTED";
+			break;
+		}
+
+		/* Tab in to show lport ownership. */
+		len += snprintf(buf + len, size - len,
+				"\t%s Port ID:x%06x ",
+				statep, nrport->port_id);
+		len += snprintf(buf + len, size - len, "WWPN x%llx ",
+				nrport->port_name);
+		len += snprintf(buf + len, size - len, "WWNN x%llx ",
+				nrport->node_name);
+		switch (nrport->port_role) {
+		case FC_PORT_ROLE_NVME_INITIATOR:
+			len +=  snprintf(buf + len, size - len,
+					 "NVME INITIATOR ");
+			break;
+		case FC_PORT_ROLE_NVME_TARGET:
+			len +=  snprintf(buf + len, size - len,
+					 "NVME TARGET ");
+			break;
+		case FC_PORT_ROLE_NVME_DISCOVERY:
+			len +=  snprintf(buf + len, size - len,
+					 "NVME DISCOVERY ");
+			break;
+		default:
+			len +=  snprintf(buf + len, size - len,
+					 "UNKNOWN ROLE x%x",
+					 nrport->port_role);
+			break;
+		}
+
+		/* Terminate the string. */
+		len +=  snprintf(buf + len, size - len, "\n");
+	}
+
+	spin_unlock_irq(shost->host_lock);
+ out_exit:
+	return len;
+}
+
+/**
+ * lpfc_debugfs_nvmestat_data - Dump target node list to a buffer
+ * @vport: The vport to gather target node info from.
+ * @buf: The buffer to dump log into.
+ * @size: The maximum amount of data to process.
+ *
+ * Description:
+ * This routine dumps the NVME statistics associated with @vport
+ *
+ * Return Value:
+ * This routine returns the amount of bytes that were dumped into @buf and will
+ * not exceed @size.
+ **/
+static int
+lpfc_debugfs_nvmestat_data(struct lpfc_vport *vport, char *buf, int size)
+{
+	struct lpfc_hba   *phba = vport->phba;
+	int len = 0;
+
+	if (phba->nvmet_support == 0) {
+		if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME))
+			return len;
+
+		len += snprintf(buf + len, size - len,
+				"\nNVME Lport Statistics\n");
+
+		len += snprintf(buf + len, size - len,
+				"LS: Xmt %016llx Cmpl %016llx\n",
+				phba->fc4NvmeLsRequests,
+				phba->fc4NvmeLsCmpls);
+
+		len += snprintf(buf + len, size - len,
+				"FCP: Rd %016llx Wr %016llx IO %016llx\n",
+				phba->fc4NvmeInputRequests,
+				phba->fc4NvmeOutputRequests,
+				phba->fc4NvmeControlRequests);
+
+		len += snprintf(buf + len, size - len,
+				"    Cmpl %016llx\n", phba->fc4NvmeIoCmpls);
+	}
+
+	return len;
+}
+
+
+/**
+ * lpfc_debugfs_nvmektime_data - Dump target node list to a buffer
+ * @vport: The vport to gather target node info from.
+ * @buf: The buffer to dump log into.
+ * @size: The maximum amount of data to process.
+ *
+ * Description:
+ * This routine dumps the NVME statistics associated with @vport
+ *
+ * Return Value:
+ * This routine returns the amount of bytes that were dumped into @buf and will
+ * not exceed @size.
+ **/
+static int
+lpfc_debugfs_nvmektime_data(struct lpfc_vport *vport, char *buf, int size)
+{
+	struct lpfc_hba   *phba = vport->phba;
+	int len = 0;
+
+	if (phba->nvmet_support == 0) {
+		/* NVME Initiator */
+		len += snprintf(buf + len, PAGE_SIZE - len,
+				"ktime %s: Total Samples: %lld\n",
+				(phba->ktime_on ?  "Enabled" : "Disabled"),
+				phba->ktime_data_samples);
+		if (phba->ktime_data_samples == 0)
+			return len;
+
+		len += snprintf(
+			buf + len, PAGE_SIZE - len,
+			"Segment 1: Last NVME Cmd cmpl "
+			"done -to- Start of next NVME cnd (in driver)\n");
+		len += snprintf(
+			buf + len, PAGE_SIZE - len,
+			"avg:%08lld min:%08lld max %08lld\n",
+			phba->ktime_seg1_total /
+			phba->ktime_data_samples,
+			phba->ktime_seg1_min,
+			phba->ktime_seg1_max);
+		len += snprintf(
+			buf + len, PAGE_SIZE - len,
+			"Segment 2: Driver start of NVME cmd "
+			"-to- Firmware WQ doorbell\n");
+		len += snprintf(
+			buf + len, PAGE_SIZE - len,
+			"avg:%08lld min:%08lld max %08lld\n",
+			phba->ktime_seg2_total /
+			phba->ktime_data_samples,
+			phba->ktime_seg2_min,
+			phba->ktime_seg2_max);
+		len += snprintf(
+			buf + len, PAGE_SIZE - len,
+			"Segment 3: Firmware WQ doorbell -to- "
+			"MSI-X ISR cmpl\n");
+		len += snprintf(
+			buf + len, PAGE_SIZE - len,
+			"avg:%08lld min:%08lld max %08lld\n",
+			phba->ktime_seg3_total /
+			phba->ktime_data_samples,
+			phba->ktime_seg3_min,
+			phba->ktime_seg3_max);
+		len += snprintf(
+			buf + len, PAGE_SIZE - len,
+			"Segment 4: MSI-X ISR cmpl -to- "
+			"NVME cmpl done\n");
+		len += snprintf(
+			buf + len, PAGE_SIZE - len,
+			"avg:%08lld min:%08lld max %08lld\n",
+			phba->ktime_seg4_total /
+			phba->ktime_data_samples,
+			phba->ktime_seg4_min,
+			phba->ktime_seg4_max);
+		len += snprintf(
+			buf + len, PAGE_SIZE - len,
+			"Total IO avg time: %08lld\n",
+			((phba->ktime_seg1_total +
+			phba->ktime_seg2_total  +
+			phba->ktime_seg3_total +
+			phba->ktime_seg4_total) /
+			phba->ktime_data_samples));
+		return len;
+	}
+
+	return len;
+}
+
+/**
+ * lpfc_debugfs_nvmeio_trc_data - Dump NVME IO trace list to a buffer
+ * @phba: The phba to gather target node info from.
+ * @buf: The buffer to dump log into.
+ * @size: The maximum amount of data to process.
+ *
+ * Description:
+ * This routine dumps the NVME IO trace associated with @phba
+ *
+ * Return Value:
+ * This routine returns the amount of bytes that were dumped into @buf and will
+ * not exceed @size.
+ **/
+static int
+lpfc_debugfs_nvmeio_trc_data(struct lpfc_hba *phba, char *buf, int size)
+{
+	struct lpfc_debugfs_nvmeio_trc *dtp;
+	int i, state, index, skip;
+	int len = 0;
+
+	state = phba->nvmeio_trc_on;
+
+	index = (atomic_read(&phba->nvmeio_trc_cnt) + 1) &
+		(phba->nvmeio_trc_size - 1);
+	skip = phba->nvmeio_trc_output_idx;
+
+	len += snprintf(buf + len, size - len,
+			"%s IO Trace %s: next_idx %d skip %d size %d\n",
+			(phba->nvmet_support ? "NVME" : "NVMET"),
+			(state ? "Enabled" : "Disabled"),
+			index, skip, phba->nvmeio_trc_size);
+
+	if (!phba->nvmeio_trc || state)
+		return len;
+
+	/* trace MUST bhe off to continue */
+
+	for (i = index; i < phba->nvmeio_trc_size; i++) {
+		if (skip) {
+			skip--;
+			continue;
+		}
+		dtp = phba->nvmeio_trc + i;
+		phba->nvmeio_trc_output_idx++;
+
+		if (!dtp->fmt)
+			continue;
+
+		len +=  snprintf(buf + len, size - len, dtp->fmt,
+			dtp->data1, dtp->data2, dtp->data3);
+
+		if (phba->nvmeio_trc_output_idx >= phba->nvmeio_trc_size) {
+			phba->nvmeio_trc_output_idx = 0;
+			len += snprintf(buf + len, size - len,
+					"Trace Complete\n");
+			goto out;
+		}
+
+		if (len >= (size - LPFC_DEBUG_OUT_LINE_SZ)) {
+			len += snprintf(buf + len, size - len,
+					"Trace Continue (%d of %d)\n",
+					phba->nvmeio_trc_output_idx,
+					phba->nvmeio_trc_size);
+			goto out;
+		}
+	}
+	for (i = 0; i < index; i++) {
+		if (skip) {
+			skip--;
+			continue;
+		}
+		dtp = phba->nvmeio_trc + i;
+		phba->nvmeio_trc_output_idx++;
+
+		if (!dtp->fmt)
+			continue;
+
+		len +=  snprintf(buf + len, size - len, dtp->fmt,
+			dtp->data1, dtp->data2, dtp->data3);
+
+		if (phba->nvmeio_trc_output_idx >= phba->nvmeio_trc_size) {
+			phba->nvmeio_trc_output_idx = 0;
+			len += snprintf(buf + len, size - len,
+					"Trace Complete\n");
+			goto out;
+		}
+
+		if (len >= (size - LPFC_DEBUG_OUT_LINE_SZ)) {
+			len += snprintf(buf + len, size - len,
+					"Trace Continue (%d of %d)\n",
+					phba->nvmeio_trc_output_idx,
+					phba->nvmeio_trc_size);
+			goto out;
+		}
+	}
+
+	len += snprintf(buf + len, size - len,
+			"Trace Done\n");
+out:
+	return len;
+}
+
+/**
+ * lpfc_debugfs_cpucheck_data - Dump target node list to a buffer
+ * @vport: The vport to gather target node info from.
+ * @buf: The buffer to dump log into.
+ * @size: The maximum amount of data to process.
+ *
+ * Description:
+ * This routine dumps the NVME statistics associated with @vport
+ *
+ * Return Value:
+ * This routine returns the amount of bytes that were dumped into @buf and will
+ * not exceed @size.
+ **/
+static int
+lpfc_debugfs_cpucheck_data(struct lpfc_vport *vport, char *buf, int size)
+{
+	struct lpfc_hba   *phba = vport->phba;
+	int i;
+	int len = 0;
+	uint32_t tot_xmt = 0;
+	uint32_t tot_cmpl = 0;
+
+	if (phba->nvmet_support == 0) {
+		/* NVME Initiator */
+		len += snprintf(buf + len, PAGE_SIZE - len,
+				"CPUcheck %s\n",
+				(phba->cpucheck_on & LPFC_CHECK_NVME_IO ?
+					"Enabled" : "Disabled"));
+		for (i = 0; i < phba->sli4_hba.num_present_cpu; i++) {
+			if (i >= LPFC_CHECK_CPU_CNT)
+				break;
+			len += snprintf(buf + len, PAGE_SIZE - len,
+					"%02d: xmit x%08x cmpl x%08x\n",
+					i, phba->cpucheck_xmt_io[i],
+					phba->cpucheck_cmpl_io[i]);
+			tot_xmt += phba->cpucheck_xmt_io[i];
+			tot_cmpl += phba->cpucheck_cmpl_io[i];
+		}
+		len += snprintf(buf + len, PAGE_SIZE - len,
+				"tot:xmit x%08x cmpl x%08x\n",
+				tot_xmt, tot_cmpl);
+		return len;
+	}
+
 	return len;
 }
 
@@ -699,6 +1065,40 @@ lpfc_debugfs_slow_ring_trc(struct lpfc_hba *phba, char *fmt,
 	return;
 }
 
+/**
+ * lpfc_debugfs_nvme_trc - Store NVME/NVMET trace log
+ * @phba: The phba to associate this trace string with for retrieval.
+ * @fmt: Format string to be displayed when dumping the log.
+ * @data1: 1st data parameter to be applied to @fmt.
+ * @data2: 2nd data parameter to be applied to @fmt.
+ * @data3: 3rd data parameter to be applied to @fmt.
+ *
+ * Description:
+ * This routine is used by the driver code to add a debugfs log entry to the
+ * nvme trace buffer associated with @phba. @fmt, @data1, @data2, and
+ * @data3 are used like printf when displaying the log.
+ **/
+inline void
+lpfc_debugfs_nvme_trc(struct lpfc_hba *phba, char *fmt,
+		      uint16_t data1, uint16_t data2, uint32_t data3)
+{
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+	struct lpfc_debugfs_nvmeio_trc *dtp;
+	int index;
+
+	if (!phba->nvmeio_trc_on || !phba->nvmeio_trc)
+		return;
+
+	index = atomic_inc_return(&phba->nvmeio_trc_cnt) &
+		(phba->nvmeio_trc_size - 1);
+	dtp = phba->nvmeio_trc + index;
+	dtp->fmt = fmt;
+	dtp->data1 = data1;
+	dtp->data2 = data2;
+	dtp->data3 = data3;
+#endif
+}
+
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
 /**
  * lpfc_debugfs_disc_trc_open - Open the discovery trace log
@@ -1231,6 +1631,356 @@ lpfc_debugfs_dumpDataDif_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
+
+static int
+lpfc_debugfs_nvmestat_open(struct inode *inode, struct file *file)
+{
+	struct lpfc_vport *vport = inode->i_private;
+	struct lpfc_debug *debug;
+	int rc = -ENOMEM;
+
+	debug = kmalloc(sizeof(*debug), GFP_KERNEL);
+	if (!debug)
+		goto out;
+
+	 /* Round to page boundary */
+	debug->buffer = kmalloc(LPFC_NVMESTAT_SIZE, GFP_KERNEL);
+	if (!debug->buffer) {
+		kfree(debug);
+		goto out;
+	}
+
+	debug->len = lpfc_debugfs_nvmestat_data(vport, debug->buffer,
+		LPFC_NVMESTAT_SIZE);
+
+	debug->i_private = inode->i_private;
+	file->private_data = debug;
+
+	rc = 0;
+out:
+	return rc;
+}
+
+static int
+lpfc_debugfs_nvmektime_open(struct inode *inode, struct file *file)
+{
+	struct lpfc_vport *vport = inode->i_private;
+	struct lpfc_debug *debug;
+	int rc = -ENOMEM;
+
+	debug = kmalloc(sizeof(*debug), GFP_KERNEL);
+	if (!debug)
+		goto out;
+
+	 /* Round to page boundary */
+	debug->buffer = kmalloc(LPFC_NVMEKTIME_SIZE, GFP_KERNEL);
+	if (!debug->buffer) {
+		kfree(debug);
+		goto out;
+	}
+
+	debug->len = lpfc_debugfs_nvmektime_data(vport, debug->buffer,
+		LPFC_NVMEKTIME_SIZE);
+
+	debug->i_private = inode->i_private;
+	file->private_data = debug;
+
+	rc = 0;
+out:
+	return rc;
+}
+
+static ssize_t
+lpfc_debugfs_nvmektime_write(struct file *file, const char __user *buf,
+			     size_t nbytes, loff_t *ppos)
+{
+	struct lpfc_debug *debug = file->private_data;
+	struct lpfc_vport *vport = (struct lpfc_vport *)debug->i_private;
+	struct lpfc_hba   *phba = vport->phba;
+	char mybuf[64];
+	char *pbuf;
+
+	if (nbytes > 64)
+		nbytes = 64;
+
+	/* Protect copy from user */
+	if (!access_ok(VERIFY_READ, buf, nbytes))
+		return -EFAULT;
+
+	memset(mybuf, 0, sizeof(mybuf));
+
+	if (copy_from_user(mybuf, buf, nbytes))
+		return -EFAULT;
+	pbuf = &mybuf[0];
+
+	if ((strncmp(pbuf, "on", sizeof("on") - 1) == 0)) {
+		phba->ktime_data_samples = 0;
+		phba->ktime_status_samples = 0;
+		phba->ktime_seg1_total = 0;
+		phba->ktime_seg1_max = 0;
+		phba->ktime_seg1_min = 0xffffffff;
+		phba->ktime_seg2_total = 0;
+		phba->ktime_seg2_max = 0;
+		phba->ktime_seg2_min = 0xffffffff;
+		phba->ktime_seg3_total = 0;
+		phba->ktime_seg3_max = 0;
+		phba->ktime_seg3_min = 0xffffffff;
+		phba->ktime_seg4_total = 0;
+		phba->ktime_seg4_max = 0;
+		phba->ktime_seg4_min = 0xffffffff;
+		phba->ktime_seg5_total = 0;
+		phba->ktime_seg5_max = 0;
+		phba->ktime_seg5_min = 0xffffffff;
+		phba->ktime_seg6_total = 0;
+		phba->ktime_seg6_max = 0;
+		phba->ktime_seg6_min = 0xffffffff;
+		phba->ktime_seg7_total = 0;
+		phba->ktime_seg7_max = 0;
+		phba->ktime_seg7_min = 0xffffffff;
+		phba->ktime_seg8_total = 0;
+		phba->ktime_seg8_max = 0;
+		phba->ktime_seg8_min = 0xffffffff;
+		phba->ktime_seg9_total = 0;
+		phba->ktime_seg9_max = 0;
+		phba->ktime_seg9_min = 0xffffffff;
+		phba->ktime_seg10_total = 0;
+		phba->ktime_seg10_max = 0;
+		phba->ktime_seg10_min = 0xffffffff;
+
+		phba->ktime_on = 1;
+		return strlen(pbuf);
+	} else if ((strncmp(pbuf, "off",
+		   sizeof("off") - 1) == 0)) {
+		phba->ktime_on = 0;
+		return strlen(pbuf);
+	} else if ((strncmp(pbuf, "zero",
+		   sizeof("zero") - 1) == 0)) {
+		phba->ktime_data_samples = 0;
+		phba->ktime_status_samples = 0;
+		phba->ktime_seg1_total = 0;
+		phba->ktime_seg1_max = 0;
+		phba->ktime_seg1_min = 0xffffffff;
+		phba->ktime_seg2_total = 0;
+		phba->ktime_seg2_max = 0;
+		phba->ktime_seg2_min = 0xffffffff;
+		phba->ktime_seg3_total = 0;
+		phba->ktime_seg3_max = 0;
+		phba->ktime_seg3_min = 0xffffffff;
+		phba->ktime_seg4_total = 0;
+		phba->ktime_seg4_max = 0;
+		phba->ktime_seg4_min = 0xffffffff;
+		phba->ktime_seg5_total = 0;
+		phba->ktime_seg5_max = 0;
+		phba->ktime_seg5_min = 0xffffffff;
+		phba->ktime_seg6_total = 0;
+		phba->ktime_seg6_max = 0;
+		phba->ktime_seg6_min = 0xffffffff;
+		phba->ktime_seg7_total = 0;
+		phba->ktime_seg7_max = 0;
+		phba->ktime_seg7_min = 0xffffffff;
+		phba->ktime_seg8_total = 0;
+		phba->ktime_seg8_max = 0;
+		phba->ktime_seg8_min = 0xffffffff;
+		phba->ktime_seg9_total = 0;
+		phba->ktime_seg9_max = 0;
+		phba->ktime_seg9_min = 0xffffffff;
+		phba->ktime_seg10_total = 0;
+		phba->ktime_seg10_max = 0;
+		phba->ktime_seg10_min = 0xffffffff;
+		return strlen(pbuf);
+	}
+	return -EINVAL;
+}
+
+static int
+lpfc_debugfs_nvmeio_trc_open(struct inode *inode, struct file *file)
+{
+	struct lpfc_hba *phba = inode->i_private;
+	struct lpfc_debug *debug;
+	int rc = -ENOMEM;
+
+	debug = kmalloc(sizeof(*debug), GFP_KERNEL);
+	if (!debug)
+		goto out;
+
+	 /* Round to page boundary */
+	debug->buffer = kmalloc(LPFC_NVMEIO_TRC_SIZE, GFP_KERNEL);
+	if (!debug->buffer) {
+		kfree(debug);
+		goto out;
+	}
+
+	debug->len = lpfc_debugfs_nvmeio_trc_data(phba, debug->buffer,
+		LPFC_NVMEIO_TRC_SIZE);
+
+	debug->i_private = inode->i_private;
+	file->private_data = debug;
+
+	rc = 0;
+out:
+	return rc;
+}
+
+static ssize_t
+lpfc_debugfs_nvmeio_trc_write(struct file *file, const char __user *buf,
+			      size_t nbytes, loff_t *ppos)
+{
+	struct lpfc_debug *debug = file->private_data;
+	struct lpfc_hba *phba = (struct lpfc_hba *)debug->i_private;
+	int i;
+	unsigned long sz;
+	char mybuf[64];
+	char *pbuf;
+
+	if (nbytes > 64)
+		nbytes = 64;
+
+	/* Protect copy from user */
+	if (!access_ok(VERIFY_READ, buf, nbytes))
+		return -EFAULT;
+
+	memset(mybuf, 0, sizeof(mybuf));
+
+	if (copy_from_user(mybuf, buf, nbytes))
+		return -EFAULT;
+	pbuf = &mybuf[0];
+
+	if ((strncmp(pbuf, "off", sizeof("off") - 1) == 0)) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0570 nvmeio_trc_off\n");
+		phba->nvmeio_trc_output_idx = 0;
+		phba->nvmeio_trc_on = 0;
+		return strlen(pbuf);
+	} else if ((strncmp(pbuf, "on", sizeof("on") - 1) == 0)) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0571 nvmeio_trc_on\n");
+		phba->nvmeio_trc_output_idx = 0;
+		phba->nvmeio_trc_on = 1;
+		return strlen(pbuf);
+	}
+
+	/* We must be off to allocate the trace buffer */
+	if (phba->nvmeio_trc_on != 0)
+		return -EINVAL;
+
+	/* If not on or off, the parameter is the trace buffer size */
+	i = kstrtoul(pbuf, 0, &sz);
+	if (i)
+		return -EINVAL;
+	phba->nvmeio_trc_size = (uint32_t)sz;
+
+	/* It must be a power of 2 - round down */
+	i = 0;
+	while (sz > 1) {
+		sz = sz >> 1;
+		i++;
+	}
+	sz = (1 << i);
+	if (phba->nvmeio_trc_size != sz)
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0572 nvmeio_trc_size changed to %ld\n",
+				sz);
+	phba->nvmeio_trc_size = (uint32_t)sz;
+
+	/* If one previously exists, free it */
+	kfree(phba->nvmeio_trc);
+
+	/* Allocate new trace buffer and initialize */
+	phba->nvmeio_trc = kmalloc((sizeof(struct lpfc_debugfs_nvmeio_trc) *
+				    sz), GFP_KERNEL);
+	if (!phba->nvmeio_trc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0573 Cannot create debugfs "
+				"nvmeio_trc buffer\n");
+		return -ENOMEM;
+	}
+	memset(phba->nvmeio_trc, 0,
+	       (sizeof(struct lpfc_debugfs_nvmeio_trc) * sz));
+	atomic_set(&phba->nvmeio_trc_cnt, 0);
+	phba->nvmeio_trc_on = 0;
+	phba->nvmeio_trc_output_idx = 0;
+
+	return strlen(pbuf);
+}
+
+static int
+lpfc_debugfs_cpucheck_open(struct inode *inode, struct file *file)
+{
+	struct lpfc_vport *vport = inode->i_private;
+	struct lpfc_debug *debug;
+	int rc = -ENOMEM;
+
+	debug = kmalloc(sizeof(*debug), GFP_KERNEL);
+	if (!debug)
+		goto out;
+
+	 /* Round to page boundary */
+	debug->buffer = kmalloc(LPFC_CPUCHECK_SIZE, GFP_KERNEL);
+	if (!debug->buffer) {
+		kfree(debug);
+		goto out;
+	}
+
+	debug->len = lpfc_debugfs_cpucheck_data(vport, debug->buffer,
+		LPFC_NVMEKTIME_SIZE);
+
+	debug->i_private = inode->i_private;
+	file->private_data = debug;
+
+	rc = 0;
+out:
+	return rc;
+}
+
+static ssize_t
+lpfc_debugfs_cpucheck_write(struct file *file, const char __user *buf,
+			    size_t nbytes, loff_t *ppos)
+{
+	struct lpfc_debug *debug = file->private_data;
+	struct lpfc_vport *vport = (struct lpfc_vport *)debug->i_private;
+	struct lpfc_hba   *phba = vport->phba;
+	char mybuf[64];
+	char *pbuf;
+	int i;
+
+	if (nbytes > 64)
+		nbytes = 64;
+
+	/* Protect copy from user */
+	if (!access_ok(VERIFY_READ, buf, nbytes))
+		return -EFAULT;
+
+	memset(mybuf, 0, sizeof(mybuf));
+
+	if (copy_from_user(mybuf, buf, nbytes))
+		return -EFAULT;
+	pbuf = &mybuf[0];
+
+	if ((strncmp(pbuf, "on", sizeof("on") - 1) == 0)) {
+		phba->cpucheck_on |= LPFC_CHECK_NVME_IO;
+		return strlen(pbuf);
+	} else if ((strncmp(pbuf, "rcv",
+		   sizeof("rcv") - 1) == 0)) {
+		return -EINVAL;
+	} else if ((strncmp(pbuf, "off",
+		   sizeof("off") - 1) == 0)) {
+		phba->cpucheck_on = LPFC_CHECK_OFF;
+		return strlen(pbuf);
+	} else if ((strncmp(pbuf, "zero",
+		   sizeof("zero") - 1) == 0)) {
+		for (i = 0; i < phba->sli4_hba.num_present_cpu; i++) {
+			if (i >= LPFC_CHECK_CPU_CNT)
+				break;
+			phba->cpucheck_rcv_io[i] = 0;
+			phba->cpucheck_xmt_io[i] = 0;
+			phba->cpucheck_cmpl_io[i] = 0;
+			phba->cpucheck_ccmpl_io[i] = 0;
+		}
+		return strlen(pbuf);
+	}
+	return -EINVAL;
+}
+
 /*
  * ---------------------------------
  * iDiag debugfs file access methods
@@ -2581,6 +3331,17 @@ lpfc_idiag_queacc_write(struct file *file, const char __user *buf,
 			idiag.ptr_private = phba->sli4_hba.nvmels_cq;
 			goto pass_check;
 		}
+		/* NVME LS complete queue */
+		if (phba->sli4_hba.nvmels_cq &&
+		    phba->sli4_hba.nvmels_cq->queue_id == queid) {
+			/* Sanity check */
+			rc = lpfc_idiag_que_param_check(
+					phba->sli4_hba.nvmels_cq, index, count);
+			if (rc)
+				goto error_out;
+			idiag.ptr_private = phba->sli4_hba.nvmels_cq;
+			goto pass_check;
+		}
 		/* FCP complete queue */
 		if (phba->sli4_hba.fcp_cq) {
 			for (qidx = 0; qidx < phba->cfg_fcp_io_channel;
@@ -2597,6 +3358,25 @@ lpfc_idiag_queacc_write(struct file *file, const char __user *buf,
 				}
 			}
 		}
+		/* NVME complete queue */
+		if (phba->sli4_hba.nvme_cq) {
+			qidx = 0;
+			do {
+				if (phba->sli4_hba.nvme_cq[qidx] &&
+				    phba->sli4_hba.nvme_cq[qidx]->queue_id ==
+				    queid) {
+					/* Sanity check */
+					rc = lpfc_idiag_que_param_check(
+						phba->sli4_hba.nvme_cq[qidx],
+						index, count);
+					if (rc)
+						goto error_out;
+					idiag.ptr_private =
+						phba->sli4_hba.nvme_cq[qidx];
+					goto pass_check;
+				}
+			} while (++qidx < phba->cfg_nvme_io_channel);
+		}
 		goto error_out;
 		break;
 	case LPFC_IDIAG_MQ:
@@ -2636,6 +3416,17 @@ lpfc_idiag_queacc_write(struct file *file, const char __user *buf,
 			idiag.ptr_private = phba->sli4_hba.nvmels_wq;
 			goto pass_check;
 		}
+		/* NVME LS work queue */
+		if (phba->sli4_hba.nvmels_wq &&
+		    phba->sli4_hba.nvmels_wq->queue_id == queid) {
+			/* Sanity check */
+			rc = lpfc_idiag_que_param_check(
+					phba->sli4_hba.nvmels_wq, index, count);
+			if (rc)
+				goto error_out;
+			idiag.ptr_private = phba->sli4_hba.nvmels_wq;
+			goto pass_check;
+		}
 		/* FCP work queue */
 		if (phba->sli4_hba.fcp_wq) {
 			for (qidx = 0; qidx < phba->cfg_fcp_io_channel;
@@ -2668,6 +3459,27 @@ lpfc_idiag_queacc_write(struct file *file, const char __user *buf,
 				}
 			}
 		}
+
+		/* NVME work queues */
+		if (phba->sli4_hba.nvme_wq) {
+			for (qidx = 0; qidx < phba->cfg_nvme_io_channel;
+				qidx++) {
+				if (!phba->sli4_hba.nvme_wq[qidx])
+					continue;
+				if (phba->sli4_hba.nvme_wq[qidx]->queue_id ==
+				    queid) {
+					/* Sanity check */
+					rc = lpfc_idiag_que_param_check(
+						phba->sli4_hba.nvme_wq[qidx],
+						index, count);
+					if (rc)
+						goto error_out;
+					idiag.ptr_private =
+						phba->sli4_hba.nvme_wq[qidx];
+					goto pass_check;
+				}
+			}
+		}
 		goto error_out;
 		break;
 	case LPFC_IDIAG_RQ:
@@ -3651,6 +4463,45 @@ static const struct file_operations lpfc_debugfs_op_dumpHostSlim = {
 	.release =      lpfc_debugfs_release,
 };
 
+#undef lpfc_debugfs_op_nvmestat
+static const struct file_operations lpfc_debugfs_op_nvmestat = {
+	.owner =        THIS_MODULE,
+	.open =         lpfc_debugfs_nvmestat_open,
+	.llseek =       lpfc_debugfs_lseek,
+	.read =         lpfc_debugfs_read,
+	.release =      lpfc_debugfs_release,
+};
+
+#undef lpfc_debugfs_op_nvmektime
+static const struct file_operations lpfc_debugfs_op_nvmektime = {
+	.owner =        THIS_MODULE,
+	.open =         lpfc_debugfs_nvmektime_open,
+	.llseek =       lpfc_debugfs_lseek,
+	.read =         lpfc_debugfs_read,
+	.write =	lpfc_debugfs_nvmektime_write,
+	.release =      lpfc_debugfs_release,
+};
+
+#undef lpfc_debugfs_op_nvmeio_trc
+static const struct file_operations lpfc_debugfs_op_nvmeio_trc = {
+	.owner =        THIS_MODULE,
+	.open =         lpfc_debugfs_nvmeio_trc_open,
+	.llseek =       lpfc_debugfs_lseek,
+	.read =         lpfc_debugfs_read,
+	.write =	lpfc_debugfs_nvmeio_trc_write,
+	.release =      lpfc_debugfs_release,
+};
+
+#undef lpfc_debugfs_op_cpucheck
+static const struct file_operations lpfc_debugfs_op_cpucheck = {
+	.owner =        THIS_MODULE,
+	.open =         lpfc_debugfs_cpucheck_open,
+	.llseek =       lpfc_debugfs_lseek,
+	.read =         lpfc_debugfs_read,
+	.write =	lpfc_debugfs_cpucheck_write,
+	.release =      lpfc_debugfs_release,
+};
+
 #undef lpfc_debugfs_op_dumpData
 static const struct file_operations lpfc_debugfs_op_dumpData = {
 	.owner =        THIS_MODULE,
@@ -4237,6 +5088,60 @@ lpfc_debugfs_initialize(struct lpfc_vport *vport)
 				lpfc_debugfs_max_slow_ring_trc));
 		}
 
+		snprintf(name, sizeof(name), "nvmeio_trc");
+		phba->debug_nvmeio_trc =
+			debugfs_create_file(name, 0644,
+					    phba->hba_debugfs_root,
+					    phba, &lpfc_debugfs_op_nvmeio_trc);
+		if (!phba->debug_nvmeio_trc) {
+			lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
+					 "0574 No create debugfs nvmeio_trc\n");
+			goto debug_failed;
+		}
+
+		atomic_set(&phba->nvmeio_trc_cnt, 0);
+		if (lpfc_debugfs_max_nvmeio_trc) {
+			num = lpfc_debugfs_max_nvmeio_trc - 1;
+			if (num & lpfc_debugfs_max_disc_trc) {
+				/* Change to be a power of 2 */
+				num = lpfc_debugfs_max_nvmeio_trc;
+				i = 0;
+				while (num > 1) {
+					num = num >> 1;
+					i++;
+				}
+				lpfc_debugfs_max_nvmeio_trc = (1 << i);
+				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+						"0575 lpfc_debugfs_max_nvmeio_trc "
+						"changed to %d\n",
+						lpfc_debugfs_max_nvmeio_trc);
+			}
+			phba->nvmeio_trc_size = lpfc_debugfs_max_nvmeio_trc;
+
+			/* Allocate trace buffer and initialize */
+			phba->nvmeio_trc = kmalloc(
+				(sizeof(struct lpfc_debugfs_nvmeio_trc) *
+				phba->nvmeio_trc_size), GFP_KERNEL);
+
+			if (!phba->nvmeio_trc) {
+				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+						"0576 Cannot create debugfs "
+						"nvmeio_trc buffer\n");
+				goto nvmeio_off;
+			}
+			memset(phba->nvmeio_trc, 0,
+			       (sizeof(struct lpfc_debugfs_nvmeio_trc) *
+			       phba->nvmeio_trc_size));
+			phba->nvmeio_trc_on = 1;
+			phba->nvmeio_trc_output_idx = 0;
+			phba->nvmeio_trc = NULL;
+		} else {
+nvmeio_off:
+			phba->nvmeio_trc_size = 0;
+			phba->nvmeio_trc_on = 0;
+			phba->nvmeio_trc_output_idx = 0;
+			phba->nvmeio_trc = NULL;
+		}
 	}
 
 	snprintf(name, sizeof(name), "vport%d", vport->vpi);
@@ -4301,6 +5206,39 @@ lpfc_debugfs_initialize(struct lpfc_vport *vport)
 		goto debug_failed;
 	}
 
+	snprintf(name, sizeof(name), "nvmestat");
+	vport->debug_nvmestat =
+		debugfs_create_file(name, 0644,
+				    vport->vport_debugfs_root,
+				    vport, &lpfc_debugfs_op_nvmestat);
+	if (!vport->debug_nvmestat) {
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
+				 "0811 Cannot create debugfs nvmestat\n");
+		goto debug_failed;
+	}
+
+	snprintf(name, sizeof(name), "nvmektime");
+	vport->debug_nvmektime =
+		debugfs_create_file(name, 0644,
+				    vport->vport_debugfs_root,
+				    vport, &lpfc_debugfs_op_nvmektime);
+	if (!vport->debug_nvmektime) {
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
+				 "0815 Cannot create debugfs nvmektime\n");
+		goto debug_failed;
+	}
+
+	snprintf(name, sizeof(name), "cpucheck");
+	vport->debug_cpucheck =
+		debugfs_create_file(name, 0644,
+				    vport->vport_debugfs_root,
+				    vport, &lpfc_debugfs_op_cpucheck);
+	if (!vport->debug_cpucheck) {
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
+				 "0819 Cannot create debugfs cpucheck\n");
+		goto debug_failed;
+	}
+
 	/*
 	 * The following section is for additional directories/files for the
 	 * physical port.
@@ -4465,140 +5403,126 @@ lpfc_debugfs_terminate(struct lpfc_vport *vport)
 		kfree(vport->disc_trc);
 		vport->disc_trc = NULL;
 	}
-	if (vport->debug_disc_trc) {
-		debugfs_remove(vport->debug_disc_trc); /* discovery_trace */
-		vport->debug_disc_trc = NULL;
-	}
-	if (vport->debug_nodelist) {
-		debugfs_remove(vport->debug_nodelist); /* nodelist */
-		vport->debug_nodelist = NULL;
-	}
+
+	debugfs_remove(vport->debug_disc_trc); /* discovery_trace */
+	vport->debug_disc_trc = NULL;
+
+	debugfs_remove(vport->debug_nodelist); /* nodelist */
+	vport->debug_nodelist = NULL;
+
+	debugfs_remove(vport->debug_nvmestat); /* nvmestat */
+	vport->debug_nvmestat = NULL;
+
+	debugfs_remove(vport->debug_nvmektime); /* nvmektime */
+	vport->debug_nvmektime = NULL;
+
+	debugfs_remove(vport->debug_cpucheck); /* cpucheck */
+	vport->debug_cpucheck = NULL;
+
 	if (vport->vport_debugfs_root) {
 		debugfs_remove(vport->vport_debugfs_root); /* vportX */
 		vport->vport_debugfs_root = NULL;
 		atomic_dec(&phba->debugfs_vport_count);
 	}
+
 	if (atomic_read(&phba->debugfs_vport_count) == 0) {
 
-		if (phba->debug_hbqinfo) {
-			debugfs_remove(phba->debug_hbqinfo); /* hbqinfo */
-			phba->debug_hbqinfo = NULL;
-		}
-		if (phba->debug_dumpHBASlim) {
-			debugfs_remove(phba->debug_dumpHBASlim); /* HBASlim */
-			phba->debug_dumpHBASlim = NULL;
-		}
-		if (phba->debug_dumpHostSlim) {
-			debugfs_remove(phba->debug_dumpHostSlim); /* HostSlim */
-			phba->debug_dumpHostSlim = NULL;
-		}
-		if (phba->debug_dumpData) {
-			debugfs_remove(phba->debug_dumpData); /* dumpData */
-			phba->debug_dumpData = NULL;
-		}
+		debugfs_remove(phba->debug_hbqinfo); /* hbqinfo */
+		phba->debug_hbqinfo = NULL;
 
-		if (phba->debug_dumpDif) {
-			debugfs_remove(phba->debug_dumpDif); /* dumpDif */
-			phba->debug_dumpDif = NULL;
-		}
-		if (phba->debug_InjErrLBA) {
-			debugfs_remove(phba->debug_InjErrLBA); /* InjErrLBA */
-			phba->debug_InjErrLBA = NULL;
-		}
-		if (phba->debug_InjErrNPortID) {	 /* InjErrNPortID */
-			debugfs_remove(phba->debug_InjErrNPortID);
-			phba->debug_InjErrNPortID = NULL;
-		}
-		if (phba->debug_InjErrWWPN) {
-			debugfs_remove(phba->debug_InjErrWWPN); /* InjErrWWPN */
-			phba->debug_InjErrWWPN = NULL;
-		}
-		if (phba->debug_writeGuard) {
-			debugfs_remove(phba->debug_writeGuard); /* writeGuard */
-			phba->debug_writeGuard = NULL;
-		}
-		if (phba->debug_writeApp) {
-			debugfs_remove(phba->debug_writeApp); /* writeApp */
-			phba->debug_writeApp = NULL;
-		}
-		if (phba->debug_writeRef) {
-			debugfs_remove(phba->debug_writeRef); /* writeRef */
-			phba->debug_writeRef = NULL;
-		}
-		if (phba->debug_readGuard) {
-			debugfs_remove(phba->debug_readGuard); /* readGuard */
-			phba->debug_readGuard = NULL;
-		}
-		if (phba->debug_readApp) {
-			debugfs_remove(phba->debug_readApp); /* readApp */
-			phba->debug_readApp = NULL;
-		}
-		if (phba->debug_readRef) {
-			debugfs_remove(phba->debug_readRef); /* readRef */
-			phba->debug_readRef = NULL;
-		}
+		debugfs_remove(phba->debug_dumpHBASlim); /* HBASlim */
+		phba->debug_dumpHBASlim = NULL;
+
+		debugfs_remove(phba->debug_dumpHostSlim); /* HostSlim */
+		phba->debug_dumpHostSlim = NULL;
+
+		debugfs_remove(phba->debug_dumpData); /* dumpData */
+		phba->debug_dumpData = NULL;
+
+		debugfs_remove(phba->debug_dumpDif); /* dumpDif */
+		phba->debug_dumpDif = NULL;
+
+		debugfs_remove(phba->debug_InjErrLBA); /* InjErrLBA */
+		phba->debug_InjErrLBA = NULL;
+
+		debugfs_remove(phba->debug_InjErrNPortID);
+		phba->debug_InjErrNPortID = NULL;
+
+		debugfs_remove(phba->debug_InjErrWWPN); /* InjErrWWPN */
+		phba->debug_InjErrWWPN = NULL;
+
+		debugfs_remove(phba->debug_writeGuard); /* writeGuard */
+		phba->debug_writeGuard = NULL;
+
+		debugfs_remove(phba->debug_writeApp); /* writeApp */
+		phba->debug_writeApp = NULL;
+
+		debugfs_remove(phba->debug_writeRef); /* writeRef */
+		phba->debug_writeRef = NULL;
+
+		debugfs_remove(phba->debug_readGuard); /* readGuard */
+		phba->debug_readGuard = NULL;
+
+		debugfs_remove(phba->debug_readApp); /* readApp */
+		phba->debug_readApp = NULL;
+
+		debugfs_remove(phba->debug_readRef); /* readRef */
+		phba->debug_readRef = NULL;
 
 		if (phba->slow_ring_trc) {
 			kfree(phba->slow_ring_trc);
 			phba->slow_ring_trc = NULL;
 		}
-		if (phba->debug_slow_ring_trc) {
-			/* slow_ring_trace */
-			debugfs_remove(phba->debug_slow_ring_trc);
-			phba->debug_slow_ring_trc = NULL;
-		}
+
+		/* slow_ring_trace */
+		debugfs_remove(phba->debug_slow_ring_trc);
+		phba->debug_slow_ring_trc = NULL;
+
+		debugfs_remove(phba->debug_nvmeio_trc);
+		phba->debug_nvmeio_trc = NULL;
+
+		kfree(phba->nvmeio_trc);
+		phba->nvmeio_trc = NULL;
 
 		/*
 		 * iDiag release
 		 */
 		if (phba->sli_rev == LPFC_SLI_REV4) {
-			if (phba->idiag_ext_acc) {
-				/* iDiag extAcc */
-				debugfs_remove(phba->idiag_ext_acc);
-				phba->idiag_ext_acc = NULL;
-			}
-			if (phba->idiag_mbx_acc) {
-				/* iDiag mbxAcc */
-				debugfs_remove(phba->idiag_mbx_acc);
-				phba->idiag_mbx_acc = NULL;
-			}
-			if (phba->idiag_ctl_acc) {
-				/* iDiag ctlAcc */
-				debugfs_remove(phba->idiag_ctl_acc);
-				phba->idiag_ctl_acc = NULL;
-			}
-			if (phba->idiag_drb_acc) {
-				/* iDiag drbAcc */
-				debugfs_remove(phba->idiag_drb_acc);
-				phba->idiag_drb_acc = NULL;
-			}
-			if (phba->idiag_que_acc) {
-				/* iDiag queAcc */
-				debugfs_remove(phba->idiag_que_acc);
-				phba->idiag_que_acc = NULL;
-			}
-			if (phba->idiag_que_info) {
-				/* iDiag queInfo */
-				debugfs_remove(phba->idiag_que_info);
-				phba->idiag_que_info = NULL;
-			}
-			if (phba->idiag_bar_acc) {
-				/* iDiag barAcc */
-				debugfs_remove(phba->idiag_bar_acc);
-				phba->idiag_bar_acc = NULL;
-			}
-			if (phba->idiag_pci_cfg) {
-				/* iDiag pciCfg */
-				debugfs_remove(phba->idiag_pci_cfg);
-				phba->idiag_pci_cfg = NULL;
-			}
+			/* iDiag extAcc */
+			debugfs_remove(phba->idiag_ext_acc);
+			phba->idiag_ext_acc = NULL;
+
+			/* iDiag mbxAcc */
+			debugfs_remove(phba->idiag_mbx_acc);
+			phba->idiag_mbx_acc = NULL;
+
+			/* iDiag ctlAcc */
+			debugfs_remove(phba->idiag_ctl_acc);
+			phba->idiag_ctl_acc = NULL;
+
+			/* iDiag drbAcc */
+			debugfs_remove(phba->idiag_drb_acc);
+			phba->idiag_drb_acc = NULL;
+
+			/* iDiag queAcc */
+			debugfs_remove(phba->idiag_que_acc);
+			phba->idiag_que_acc = NULL;
+
+			/* iDiag queInfo */
+			debugfs_remove(phba->idiag_que_info);
+			phba->idiag_que_info = NULL;
+
+			/* iDiag barAcc */
+			debugfs_remove(phba->idiag_bar_acc);
+			phba->idiag_bar_acc = NULL;
+
+			/* iDiag pciCfg */
+			debugfs_remove(phba->idiag_pci_cfg);
+			phba->idiag_pci_cfg = NULL;
 
 			/* Finally remove the iDiag debugfs root */
-			if (phba->idiag_root) {
-				/* iDiag root */
-				debugfs_remove(phba->idiag_root);
-				phba->idiag_root = NULL;
-			}
+			debugfs_remove(phba->idiag_root);
+			phba->idiag_root = NULL;
 		}
 
 		if (phba->hba_debugfs_root) {
@@ -4607,10 +5531,8 @@ lpfc_debugfs_terminate(struct lpfc_vport *vport)
 			atomic_dec(&lpfc_debugfs_hba_count);
 		}
 
-		if (atomic_read(&lpfc_debugfs_hba_count) == 0) {
-			debugfs_remove(lpfc_debugfs_root); /* lpfc */
-			lpfc_debugfs_root = NULL;
-		}
+		debugfs_remove(lpfc_debugfs_root); /* lpfc */
+		lpfc_debugfs_root = NULL;
 	}
 #endif
 	return;
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.h b/drivers/scsi/lpfc/lpfc_debugfs.h
index 98814d65123904..5312e0f9deb64a 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.h
+++ b/drivers/scsi/lpfc/lpfc_debugfs.h
@@ -50,6 +50,14 @@ enum {
 	DUMP_NVMELS,
 };
 
+/* nvmestat output buffer size */
+#define LPFC_NVMESTAT_SIZE 8192
+#define LPFC_NVMEKTIME_SIZE 8192
+#define LPFC_CPUCHECK_SIZE 8192
+#define LPFC_NVMEIO_TRC_SIZE 8192
+
+#define LPFC_DEBUG_OUT_LINE_SZ	80
+
 /*
  * For SLI4 iDiag debugfs diagnostics tool
  */
@@ -196,6 +204,12 @@ enum {
 #define SIZE_U16 sizeof(uint16_t)
 #define SIZE_U32 sizeof(uint32_t)
 
+#define lpfc_nvmeio_data(phba, fmt, arg...) \
+	{ \
+	if (phba->nvmeio_trc_on) \
+		lpfc_debugfs_nvme_trc(phba, fmt, ##arg); \
+	}
+
 struct lpfc_debug {
 	char *i_private;
 	char op;
@@ -214,6 +228,13 @@ struct lpfc_debugfs_trc {
 	unsigned long jif;
 };
 
+struct lpfc_debugfs_nvmeio_trc {
+	char *fmt;
+	uint16_t data1;
+	uint16_t data2;
+	uint32_t data3;
+};
+
 struct lpfc_idiag_offset {
 	uint32_t last_rd;
 };
diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c
index 729803dacf1561..56b4b94a372e52 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.c
+++ b/drivers/scsi/lpfc/lpfc_nvme.c
@@ -50,6 +50,7 @@
 #include "lpfc_logmsg.h"
 #include "lpfc_crtn.h"
 #include "lpfc_vport.h"
+#include "lpfc_debugfs.h"
 
 /* NVME initiator-based functions */
 
@@ -222,6 +223,9 @@ lpfc_nvme_cmpl_gen_req(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 			 cmdwqe->sli4_xritag, status,
 			 cmdwqe, pnvme_lsreq, cmdwqe->context3, ndlp);
 
+	lpfc_nvmeio_data(phba, "NVME LS  CMPL: xri x%x stat x%x parm x%x\n",
+			 cmdwqe->sli4_xritag, status, wcqe->parameter);
+
 	if (cmdwqe->context3) {
 		buf_ptr = (struct lpfc_dmabuf *)cmdwqe->context3;
 		lpfc_mbuf_free(phba, buf_ptr->virt, buf_ptr->phys);
@@ -355,6 +359,9 @@ lpfc_nvme_gen_req(struct lpfc_vport *vport, struct lpfc_dmabuf *bmp,
 	genwqe->vport = vport;
 	genwqe->retry = retry;
 
+	lpfc_nvmeio_data(phba, "NVME LS  XMIT: xri x%x iotag x%x to x%06x\n",
+			 genwqe->sli4_xritag, genwqe->iotag, ndlp->nlp_DID);
+
 	rc = lpfc_sli4_issue_wqe(phba, LPFC_ELS_RING, genwqe);
 	if (rc == WQE_ERROR) {
 		lpfc_printf_vlog(vport, KERN_ERR, LOG_ELS,
@@ -637,6 +644,79 @@ lpfc_nvme_adj_fcp_sgls(struct lpfc_vport *vport,
 	*wptr   = *dptr;	/* Word 23 */
 }
 
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+static void
+lpfc_nvme_ktime(struct lpfc_hba *phba,
+		struct lpfc_nvme_buf *lpfc_ncmd)
+{
+	uint64_t seg1, seg2, seg3, seg4;
+
+	if (!phba->ktime_on)
+		return;
+	if (!lpfc_ncmd->ts_last_cmd ||
+	    !lpfc_ncmd->ts_cmd_start ||
+	    !lpfc_ncmd->ts_cmd_wqput ||
+	    !lpfc_ncmd->ts_isr_cmpl ||
+	    !lpfc_ncmd->ts_data_nvme)
+		return;
+	if (lpfc_ncmd->ts_cmd_start < lpfc_ncmd->ts_last_cmd)
+		return;
+	if (lpfc_ncmd->ts_cmd_wqput < lpfc_ncmd->ts_cmd_start)
+		return;
+	if (lpfc_ncmd->ts_isr_cmpl < lpfc_ncmd->ts_cmd_wqput)
+		return;
+	if (lpfc_ncmd->ts_data_nvme < lpfc_ncmd->ts_isr_cmpl)
+		return;
+	/*
+	 * Segment 1 - Time from Last FCP command cmpl is handed
+	 * off to NVME Layer to start of next command.
+	 * Segment 2 - Time from Driver receives a IO cmd start
+	 * from NVME Layer to WQ put is done on IO cmd.
+	 * Segment 3 - Time from Driver WQ put is done on IO cmd
+	 * to MSI-X ISR for IO cmpl.
+	 * Segment 4 - Time from MSI-X ISR for IO cmpl to when
+	 * cmpl is handled off to the NVME Layer.
+	 */
+	seg1 = lpfc_ncmd->ts_cmd_start - lpfc_ncmd->ts_last_cmd;
+	if (seg1 > 5000000)  /* 5 ms - for sequential IOs */
+		return;
+
+	/* Calculate times relative to start of IO */
+	seg2 = (lpfc_ncmd->ts_cmd_wqput - lpfc_ncmd->ts_cmd_start);
+	seg3 = (lpfc_ncmd->ts_isr_cmpl -
+		lpfc_ncmd->ts_cmd_start) - seg2;
+	seg4 = (lpfc_ncmd->ts_data_nvme -
+		lpfc_ncmd->ts_cmd_start) - seg2 - seg3;
+	phba->ktime_data_samples++;
+	phba->ktime_seg1_total += seg1;
+	if (seg1 < phba->ktime_seg1_min)
+		phba->ktime_seg1_min = seg1;
+	else if (seg1 > phba->ktime_seg1_max)
+		phba->ktime_seg1_max = seg1;
+	phba->ktime_seg2_total += seg2;
+	if (seg2 < phba->ktime_seg2_min)
+		phba->ktime_seg2_min = seg2;
+	else if (seg2 > phba->ktime_seg2_max)
+		phba->ktime_seg2_max = seg2;
+	phba->ktime_seg3_total += seg3;
+	if (seg3 < phba->ktime_seg3_min)
+		phba->ktime_seg3_min = seg3;
+	else if (seg3 > phba->ktime_seg3_max)
+		phba->ktime_seg3_max = seg3;
+	phba->ktime_seg4_total += seg4;
+	if (seg4 < phba->ktime_seg4_min)
+		phba->ktime_seg4_min = seg4;
+	else if (seg4 > phba->ktime_seg4_max)
+		phba->ktime_seg4_max = seg4;
+
+	lpfc_ncmd->ts_last_cmd = 0;
+	lpfc_ncmd->ts_cmd_start = 0;
+	lpfc_ncmd->ts_cmd_wqput  = 0;
+	lpfc_ncmd->ts_isr_cmpl = 0;
+	lpfc_ncmd->ts_data_nvme = 0;
+}
+#endif
+
 /**
  * lpfc_nvme_io_cmd_wqe_cmpl - Complete an NVME-over-FCP IO
  * @lpfc_pnvme: Pointer to the driver's nvme instance data
@@ -680,6 +760,9 @@ lpfc_nvme_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn,
 	nCmd = lpfc_ncmd->nvmeCmd;
 	rport = lpfc_ncmd->nrport;
 
+	lpfc_nvmeio_data(phba, "NVME FCP CMPL: xri x%x stat x%x parm x%x\n",
+			 lpfc_ncmd->cur_iocbq.sli4_xritag,
+			 bf_get(lpfc_wcqe_c_status, wcqe), wcqe->parameter);
 	/*
 	 * Catch race where our node has transitioned, but the
 	 * transport is still transitioning.
@@ -798,6 +881,23 @@ lpfc_nvme_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn,
 	 * no need for dma unprep because the nvme_transport
 	 * owns the dma address.
 	 */
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+	if (phba->ktime_on) {
+		lpfc_ncmd->ts_isr_cmpl = pwqeIn->isr_timestamp;
+		lpfc_ncmd->ts_data_nvme = ktime_get_ns();
+		phba->ktime_last_cmd = lpfc_ncmd->ts_data_nvme;
+		lpfc_nvme_ktime(phba, lpfc_ncmd);
+	}
+	if (phba->cpucheck_on & LPFC_CHECK_NVME_IO) {
+		if (lpfc_ncmd->cpu != smp_processor_id())
+			lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_IOERR,
+					 "6701 CPU Check cmpl: "
+					 "cpu %d expect %d\n",
+					 smp_processor_id(), lpfc_ncmd->cpu);
+		if (lpfc_ncmd->cpu < LPFC_CHECK_CPU_CNT)
+			phba->cpucheck_cmpl_io[lpfc_ncmd->cpu]++;
+	}
+#endif
 	nCmd->done(nCmd);
 
 	spin_lock_irqsave(&phba->hbalock, flags);
@@ -1103,11 +1203,18 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport,
 	struct lpfc_nvme_buf *lpfc_ncmd;
 	struct lpfc_nvme_rport *rport;
 	struct lpfc_nvme_qhandle *lpfc_queue_info;
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+	uint64_t start = 0;
+#endif
 
 	lport = (struct lpfc_nvme_lport *)pnvme_lport->private;
 	vport = lport->vport;
 	phba = vport->phba;
 
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+	if (phba->ktime_on)
+		start = ktime_get_ns();
+#endif
 	rport = (struct lpfc_nvme_rport *)pnvme_rport->private;
 	lpfc_queue_info = (struct lpfc_nvme_qhandle *)hw_queue_handle;
 
@@ -1161,6 +1268,12 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport,
 		ret = -ENOMEM;
 		goto out_fail;
 	}
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+	if (phba->ktime_on) {
+		lpfc_ncmd->ts_cmd_start = start;
+		lpfc_ncmd->ts_last_cmd = phba->ktime_last_cmd;
+	}
+#endif
 
 	/*
 	 * Store the data needed by the driver to issue, abort, and complete
@@ -1192,6 +1305,10 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport,
 	 */
 	lpfc_ncmd->cur_iocbq.hba_wqidx = lpfc_queue_info->index;
 
+	lpfc_nvmeio_data(phba, "NVME FCP XMIT: xri x%x idx %d to %06x\n",
+			 lpfc_ncmd->cur_iocbq.sli4_xritag,
+			 lpfc_queue_info->index, ndlp->nlp_DID);
+
 	ret = lpfc_sli4_issue_wqe(phba, LPFC_FCP_RING, &lpfc_ncmd->cur_iocbq);
 	if (ret) {
 		atomic_dec(&ndlp->cmd_pending);
@@ -1204,6 +1321,28 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport,
 		goto out_free_nvme_buf;
 	}
 
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+	if (phba->ktime_on)
+		lpfc_ncmd->ts_cmd_wqput = ktime_get_ns();
+
+	if (phba->cpucheck_on & LPFC_CHECK_NVME_IO) {
+		lpfc_ncmd->cpu = smp_processor_id();
+		if (lpfc_ncmd->cpu != lpfc_queue_info->index) {
+			/* Check for admin queue */
+			if (lpfc_queue_info->qidx) {
+				lpfc_printf_vlog(vport,
+						 KERN_ERR, LOG_NVME_IOERR,
+						"6702 CPU Check cmd: "
+						"cpu %d wq %d\n",
+						lpfc_ncmd->cpu,
+						lpfc_queue_info->index);
+			}
+			lpfc_ncmd->cpu = lpfc_queue_info->index;
+		}
+		if (lpfc_ncmd->cpu < LPFC_CHECK_CPU_CNT)
+			phba->cpucheck_xmt_io[lpfc_ncmd->cpu]++;
+	}
+#endif
 	return 0;
 
  out_free_nvme_buf:
@@ -1377,6 +1516,10 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport,
 		return;
 	}
 
+	lpfc_nvmeio_data(phba, "NVME FCP ABORT: xri x%x idx %d to %06x\n",
+			 nvmereq_wqe->sli4_xritag,
+			 nvmereq_wqe->hba_wqidx, ndlp->nlp_DID);
+
 	/* Outstanding abort is in progress */
 	if (nvmereq_wqe->iocb_flag & LPFC_DRIVER_ABORTED) {
 		spin_unlock_irqrestore(&phba->hbalock, flags);
diff --git a/drivers/scsi/lpfc/lpfc_nvme.h b/drivers/scsi/lpfc/lpfc_nvme.h
index 4cc51086a5f703..85961c1ddecfd0 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.h
+++ b/drivers/scsi/lpfc/lpfc_nvme.h
@@ -91,4 +91,11 @@ struct lpfc_nvme_buf {
 
 	wait_queue_head_t *waitq;
 	unsigned long start_time;
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+	uint64_t ts_cmd_start;
+	uint64_t ts_last_cmd;
+	uint64_t ts_cmd_wqput;
+	uint64_t ts_isr_cmpl;
+	uint64_t ts_data_nvme;
+#endif
 };
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index ccda1f73ae848c..5f6ad67d5a73a1 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -13299,6 +13299,11 @@ lpfc_sli4_hba_intr_handler(int irq, void *dev_id)
 	if (unlikely(!fpeq))
 		return IRQ_NONE;
 
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+	if (phba->ktime_on)
+		fpeq->isr_timestamp = ktime_get_ns();
+#endif
+
 	if (lpfc_fcp_look_ahead) {
 		if (atomic_dec_and_test(&hba_eq_hdl->hba_eq_in_use))
 			lpfc_sli4_eq_clr_intr(fpeq);

From f358dd0ca26c152a5e0922e269996268dcb98a9d Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sun, 12 Feb 2017 13:52:34 -0800
Subject: [PATCH 0155/1911] scsi: lpfc: NVME Target: Base modifications

NVME Target: Base modifications

This set of patches adds the base modifications for NVME target support

The base modifications consist of:
- Additional module parameters or configuration tuning
- Enablement of configuration mode for NVME target. Ties into the
  queueing model put into place by the initiator basemods patches.
- Target-specific buffer pools, dma pools, sgl pools

[mkp: fixed space at end of file]

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc.h         |   6 +
 drivers/scsi/lpfc/lpfc_attr.c    | 131 ++++++++++++++++-
 drivers/scsi/lpfc/lpfc_crtn.h    |  10 ++
 drivers/scsi/lpfc/lpfc_debugfs.c |   7 +-
 drivers/scsi/lpfc/lpfc_disc.h    |   1 +
 drivers/scsi/lpfc/lpfc_hw4.h     |  45 ++++++
 drivers/scsi/lpfc/lpfc_init.c    | 244 ++++++++++++++++++++++++++++++-
 drivers/scsi/lpfc/lpfc_mem.c     | 167 +++++++++++++++++++++
 drivers/scsi/lpfc/lpfc_nvmet.h   | 101 +++++++++++++
 drivers/scsi/lpfc/lpfc_sli.c     | 107 +++++++++++++-
 drivers/scsi/lpfc/lpfc_sli.h     |   2 +
 drivers/scsi/lpfc/lpfc_sli4.h    |   6 +
 12 files changed, 807 insertions(+), 20 deletions(-)
 create mode 100644 drivers/scsi/lpfc/lpfc_nvmet.h

diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index fdb314b3282d32..b7361474880e77 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -741,6 +741,7 @@ struct lpfc_hba {
 	uint8_t  fcp_embed_io;
 	uint8_t  nvme_support;	/* Firmware supports NVME */
 	uint8_t  nvmet_support;	/* driver supports NVMET */
+#define LPFC_NVMET_MAX_PORTS	32
 	uint8_t  mds_diags_support;
 
 	/* HBA Config Parameters */
@@ -766,8 +767,10 @@ struct lpfc_hba {
 	uint32_t cfg_fcp_imax;
 	uint32_t cfg_fcp_cpu_map;
 	uint32_t cfg_fcp_io_channel;
+	uint32_t cfg_suppress_rsp;
 	uint32_t cfg_nvme_oas;
 	uint32_t cfg_nvme_io_channel;
+	uint32_t cfg_enable_nvmet;
 	uint32_t cfg_nvme_enable_fb;
 	uint32_t cfg_total_seg_cnt;
 	uint32_t cfg_sg_seg_cnt;
@@ -820,6 +823,7 @@ struct lpfc_hba {
 #define LPFC_ENABLE_NVME 2
 #define LPFC_ENABLE_BOTH 3
 	uint32_t io_channel_irqs;	/* number of irqs for io channels */
+	struct nvmet_fc_target_port *targetport;
 	lpfc_vpd_t vpd;		/* vital product data */
 
 	struct pci_dev *pcidev;
@@ -1103,6 +1107,8 @@ struct lpfc_hba {
 	uint16_t cpucheck_on;
 #define LPFC_CHECK_OFF		0
 #define LPFC_CHECK_NVME_IO	1
+#define LPFC_CHECK_NVMET_RCV	2
+#define LPFC_CHECK_NVMET_IO	4
 	uint16_t ktime_on;
 	uint64_t ktime_data_samples;
 	uint64_t ktime_status_samples;
diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index b91b4bb1062df5..835c6c1f4cd4f2 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -46,6 +46,7 @@
 #include "lpfc.h"
 #include "lpfc_scsi.h"
 #include "lpfc_nvme.h"
+#include "lpfc_nvmet.h"
 #include "lpfc_logmsg.h"
 #include "lpfc_version.h"
 #include "lpfc_compat.h"
@@ -139,6 +140,7 @@ lpfc_nvme_info_show(struct device *dev, struct device_attribute *attr,
 	struct Scsi_Host *shost = class_to_shost(dev);
 	struct lpfc_vport *vport = shost_priv(shost);
 	struct lpfc_hba   *phba = vport->phba;
+	struct lpfc_nvmet_tgtport *tgtp;
 	struct nvme_fc_local_port *localport;
 	struct lpfc_nvme_lport *lport;
 	struct lpfc_nvme_rport *rport;
@@ -150,6 +152,92 @@ lpfc_nvme_info_show(struct device *dev, struct device_attribute *attr,
 		len += snprintf(buf, PAGE_SIZE, "NVME Disabled\n");
 		return len;
 	}
+	if (phba->nvmet_support) {
+		if (!phba->targetport) {
+			len = snprintf(buf, PAGE_SIZE,
+					"NVME Target: x%llx is not allocated\n",
+					wwn_to_u64(vport->fc_portname.u.wwn));
+			return len;
+		}
+		/* Port state is only one of two values for now. */
+		if (phba->targetport->port_id)
+			statep = "REGISTERED";
+		else
+			statep = "INIT";
+		len += snprintf(buf + len, PAGE_SIZE - len,
+				"NVME Target: Enabled  State %s\n",
+				statep);
+		len += snprintf(buf + len, PAGE_SIZE - len,
+				"%s%d WWPN x%llx WWNN x%llx DID x%06x\n",
+				"NVME Target: lpfc",
+				phba->brd_no,
+				wwn_to_u64(vport->fc_portname.u.wwn),
+				wwn_to_u64(vport->fc_nodename.u.wwn),
+				phba->targetport->port_id);
+
+		len += snprintf(buf + len, PAGE_SIZE,
+				"\nNVME Target: Statistics\n");
+		tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+		len += snprintf(buf+len, PAGE_SIZE-len,
+				"LS: Rcv %08x Drop %08x Abort %08x\n",
+				atomic_read(&tgtp->rcv_ls_req_in),
+				atomic_read(&tgtp->rcv_ls_req_drop),
+				atomic_read(&tgtp->xmt_ls_abort));
+		if (atomic_read(&tgtp->rcv_ls_req_in) !=
+		    atomic_read(&tgtp->rcv_ls_req_out)) {
+			len += snprintf(buf+len, PAGE_SIZE-len,
+					"Rcv LS: in %08x != out %08x\n",
+					atomic_read(&tgtp->rcv_ls_req_in),
+					atomic_read(&tgtp->rcv_ls_req_out));
+		}
+
+		len += snprintf(buf+len, PAGE_SIZE-len,
+				"LS: Xmt %08x Drop %08x Cmpl %08x Err %08x\n",
+				atomic_read(&tgtp->xmt_ls_rsp),
+				atomic_read(&tgtp->xmt_ls_drop),
+				atomic_read(&tgtp->xmt_ls_rsp_cmpl),
+				atomic_read(&tgtp->xmt_ls_rsp_error));
+
+		len += snprintf(buf+len, PAGE_SIZE-len,
+				"FCP: Rcv %08x Drop %08x\n",
+				atomic_read(&tgtp->rcv_fcp_cmd_in),
+				atomic_read(&tgtp->rcv_fcp_cmd_drop));
+
+		if (atomic_read(&tgtp->rcv_fcp_cmd_in) !=
+		    atomic_read(&tgtp->rcv_fcp_cmd_out)) {
+			len += snprintf(buf+len, PAGE_SIZE-len,
+					"Rcv FCP: in %08x != out %08x\n",
+					atomic_read(&tgtp->rcv_fcp_cmd_in),
+					atomic_read(&tgtp->rcv_fcp_cmd_out));
+		}
+
+		len += snprintf(buf+len, PAGE_SIZE-len,
+				"FCP Rsp: RD %08x rsp %08x WR %08x rsp %08x\n",
+				atomic_read(&tgtp->xmt_fcp_read),
+				atomic_read(&tgtp->xmt_fcp_read_rsp),
+				atomic_read(&tgtp->xmt_fcp_write),
+				atomic_read(&tgtp->xmt_fcp_rsp));
+
+		len += snprintf(buf+len, PAGE_SIZE-len,
+				"FCP Rsp: abort %08x drop %08x\n",
+				atomic_read(&tgtp->xmt_fcp_abort),
+				atomic_read(&tgtp->xmt_fcp_drop));
+
+		len += snprintf(buf+len, PAGE_SIZE-len,
+				"FCP Rsp Cmpl: %08x err %08x drop %08x\n",
+				atomic_read(&tgtp->xmt_fcp_rsp_cmpl),
+				atomic_read(&tgtp->xmt_fcp_rsp_error),
+				atomic_read(&tgtp->xmt_fcp_rsp_drop));
+
+		len += snprintf(buf+len, PAGE_SIZE-len,
+				"ABORT: Xmt %08x Err %08x Cmpl %08x",
+				atomic_read(&tgtp->xmt_abort_rsp),
+				atomic_read(&tgtp->xmt_abort_rsp_error),
+				atomic_read(&tgtp->xmt_abort_cmpl));
+
+		len +=  snprintf(buf+len, PAGE_SIZE-len, "\n");
+		return len;
+	}
 
 	localport = vport->localport;
 	if (!localport) {
@@ -2899,6 +2987,13 @@ lpfc_oas_lun_store(struct device *dev, struct device_attribute *attr,
 static DEVICE_ATTR(lpfc_xlane_lun, S_IRUGO | S_IWUSR,
 		   lpfc_oas_lun_show, lpfc_oas_lun_store);
 
+int lpfc_enable_nvmet_cnt;
+unsigned long long lpfc_enable_nvmet[LPFC_NVMET_MAX_PORTS] = {
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+module_param_array(lpfc_enable_nvmet, ullong, &lpfc_enable_nvmet_cnt, 0444);
+MODULE_PARM_DESC(lpfc_enable_nvmet, "Enable HBA port(s) WWPN as a NVME Target");
+
 static int lpfc_poll = 0;
 module_param(lpfc_poll, int, S_IRUGO);
 MODULE_PARM_DESC(lpfc_poll, "FCP ring polling mode control:"
@@ -3177,6 +3272,15 @@ lpfc_vport_param_store(devloss_tmo)
 static DEVICE_ATTR(lpfc_devloss_tmo, S_IRUGO | S_IWUSR,
 		   lpfc_devloss_tmo_show, lpfc_devloss_tmo_store);
 
+/*
+ * lpfc_suppress_rsp: Enable suppress rsp feature is firmware supports it
+ * lpfc_suppress_rsp = 0  Disable
+ * lpfc_suppress_rsp = 1  Enable (default)
+ *
+ */
+LPFC_ATTR_R(suppress_rsp, 1, 0, 1,
+	    "Enable suppress rsp feature is firmware supports it");
+
 /*
  * lpfc_enable_fc4_type: Defines what FC4 types are supported.
  * Supported Values:  1 - register just FCP
@@ -3190,7 +3294,8 @@ LPFC_ATTR_R(enable_fc4_type, LPFC_ENABLE_BOTH,
 /*
  * lpfc_xri_split: Defines the division of XRI resources between SCSI and NVME
  * This parameter is only used if:
- *     lpfc_enable_fc4_type is 3 - register both FCP and NVME
+ *     lpfc_enable_fc4_type is 3 - register both FCP and NVME and
+ *     port is not configured for NVMET.
  *
  * ELS/CT always get 10% of XRIs, up to a maximum of 250
  * The remaining XRIs get split up based on lpfc_xri_split per port:
@@ -4754,7 +4859,7 @@ LPFC_ATTR_R(use_msi, 2, 0, 2, "Use Message Signaled Interrupts (1) or "
 	    "MSI-X (2), if possible");
 
 /*
- * lpfc_nvme_oas: Use the oas bit when sending NVME IOs
+ * lpfc_nvme_oas: Use the oas bit when sending NVME/NVMET IOs
  *
  *      0  = NVME OAS disabled
  *      1  = NVME OAS enabled
@@ -4992,6 +5097,7 @@ struct device_attribute *lpfc_hba_attrs[] = {
 	&dev_attr_lpfc_fcp_imax,
 	&dev_attr_lpfc_fcp_cpu_map,
 	&dev_attr_lpfc_fcp_io_channel,
+	&dev_attr_lpfc_suppress_rsp,
 	&dev_attr_lpfc_nvme_io_channel,
 	&dev_attr_lpfc_nvme_enable_fb,
 	&dev_attr_lpfc_enable_bg,
@@ -6027,6 +6133,7 @@ lpfc_get_cfgparam(struct lpfc_hba *phba)
 		phba->cfg_poll = 0;
 	else
 		phba->cfg_poll = lpfc_poll;
+	lpfc_suppress_rsp_init(phba, lpfc_suppress_rsp);
 
 	lpfc_enable_fc4_type_init(phba, lpfc_enable_fc4_type);
 
@@ -6046,17 +6153,17 @@ lpfc_get_cfgparam(struct lpfc_hba *phba)
 	}
 
 	/* A value of 0 means use the number of CPUs found in the system */
-	if (phba->cfg_nvme_io_channel == 0)
-		phba->cfg_nvme_io_channel = phba->sli4_hba.num_present_cpu;
 	if (phba->cfg_fcp_io_channel == 0)
 		phba->cfg_fcp_io_channel = phba->sli4_hba.num_present_cpu;
-
-	if (phba->cfg_enable_fc4_type == LPFC_ENABLE_FCP)
-		phba->cfg_nvme_io_channel = 0;
+	if (phba->cfg_nvme_io_channel == 0)
+		phba->cfg_nvme_io_channel = phba->sli4_hba.num_present_cpu;
 
 	if (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME)
 		phba->cfg_fcp_io_channel = 0;
 
+	if (phba->cfg_enable_fc4_type == LPFC_ENABLE_FCP)
+		phba->cfg_nvme_io_channel = 0;
+
 	if (phba->cfg_fcp_io_channel > phba->cfg_nvme_io_channel)
 		phba->io_channel_irqs = phba->cfg_fcp_io_channel;
 	else
@@ -6088,12 +6195,20 @@ lpfc_get_cfgparam(struct lpfc_hba *phba)
 void
 lpfc_nvme_mod_param_dep(struct lpfc_hba *phba)
 {
-	phba->nvmet_support = 0;
 	if (phba->cfg_nvme_io_channel > phba->sli4_hba.num_present_cpu)
 		phba->cfg_nvme_io_channel = phba->sli4_hba.num_present_cpu;
+
 	if (phba->cfg_fcp_io_channel > phba->sli4_hba.num_present_cpu)
 		phba->cfg_fcp_io_channel = phba->sli4_hba.num_present_cpu;
 
+	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME &&
+	    phba->nvmet_support) {
+		phba->cfg_enable_fc4_type &= ~LPFC_ENABLE_FCP;
+		phba->cfg_fcp_io_channel = 0;
+	} else
+		/* Not NVME Target mode.  Turn off Target parameters. */
+		phba->nvmet_support = 0;
+
 	if (phba->cfg_fcp_io_channel > phba->cfg_nvme_io_channel)
 		phba->io_channel_irqs = phba->cfg_fcp_io_channel;
 	else
diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h
index 4a0db38959936c..1e97b1a106c560 100644
--- a/drivers/scsi/lpfc/lpfc_crtn.h
+++ b/drivers/scsi/lpfc/lpfc_crtn.h
@@ -240,6 +240,8 @@ struct hbq_dmabuf *lpfc_els_hbq_alloc(struct lpfc_hba *);
 void lpfc_els_hbq_free(struct lpfc_hba *, struct hbq_dmabuf *);
 struct hbq_dmabuf *lpfc_sli4_rb_alloc(struct lpfc_hba *);
 void lpfc_sli4_rb_free(struct lpfc_hba *, struct hbq_dmabuf *);
+struct rqb_dmabuf *lpfc_sli4_nvmet_alloc(struct lpfc_hba *phba);
+void lpfc_sli4_nvmet_free(struct lpfc_hba *phba, struct rqb_dmabuf *dmab);
 void lpfc_sli4_build_dflt_fcf_record(struct lpfc_hba *, struct fcf_record *,
 			uint16_t);
 int lpfc_sli4_rq_put(struct lpfc_queue *hq, struct lpfc_queue *dq,
@@ -304,6 +306,8 @@ int lpfc_sli_issue_iocb(struct lpfc_hba *, uint32_t,
 int lpfc_sli4_issue_wqe(struct lpfc_hba *phba, uint32_t rnum,
 			struct lpfc_iocbq *iocbq);
 struct lpfc_sglq *__lpfc_clear_active_sglq(struct lpfc_hba *phba, uint16_t xri);
+struct lpfc_sglq *__lpfc_sli_get_nvmet_sglq(struct lpfc_hba *phba,
+					    struct lpfc_iocbq *piocbq);
 void lpfc_sli_pcimem_bcopy(void *, void *, uint32_t);
 void lpfc_sli_bemem_bcopy(void *, void *, uint32_t);
 void lpfc_sli_abort_iocb_ring(struct lpfc_hba *, struct lpfc_sli_ring *);
@@ -353,6 +357,9 @@ void lpfc_sli_free_hbq(struct lpfc_hba *, struct hbq_dmabuf *);
 void *lpfc_mbuf_alloc(struct lpfc_hba *, int, dma_addr_t *);
 void __lpfc_mbuf_free(struct lpfc_hba *, void *, dma_addr_t);
 void lpfc_mbuf_free(struct lpfc_hba *, void *, dma_addr_t);
+void *lpfc_nvmet_buf_alloc(struct lpfc_hba *phba, int flags,
+			dma_addr_t *handle);
+void lpfc_nvmet_buf_free(struct lpfc_hba *phba, void *virtp, dma_addr_t dma);
 
 void lpfc_in_buf_free(struct lpfc_hba *, struct lpfc_dmabuf *);
 /* Function prototypes. */
@@ -492,6 +499,7 @@ int lpfc_selective_reset(struct lpfc_hba *);
 int lpfc_sli4_read_config(struct lpfc_hba *);
 void lpfc_sli4_node_prep(struct lpfc_hba *);
 int lpfc_sli4_els_sgl_update(struct lpfc_hba *phba);
+int lpfc_sli4_nvmet_sgl_update(struct lpfc_hba *phba);
 int lpfc_sli4_scsi_sgl_update(struct lpfc_hba *phba);
 int lpfc_sli4_nvme_sgl_update(struct lpfc_hba *phba);
 void lpfc_free_sgl_list(struct lpfc_hba *, struct list_head *);
@@ -531,3 +539,5 @@ void lpfc_nvme_mod_param_dep(struct lpfc_hba *phba);
 void lpfc_nvme_abort_fcreq_cmpl(struct lpfc_hba *phba,
 				struct lpfc_iocbq *cmdiocb,
 				struct lpfc_wcqe_complete *abts_cmpl);
+extern int lpfc_enable_nvmet_cnt;
+extern unsigned long long lpfc_enable_nvmet[];
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index 3fcba26623eb78..e54307dff8be3b 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -828,7 +828,6 @@ lpfc_debugfs_nvmektime_data(struct lpfc_vport *vport, char *buf, int size)
 			phba->ktime_data_samples));
 		return len;
 	}
-
 	return len;
 }
 
@@ -1961,7 +1960,11 @@ lpfc_debugfs_cpucheck_write(struct file *file, const char __user *buf,
 		return strlen(pbuf);
 	} else if ((strncmp(pbuf, "rcv",
 		   sizeof("rcv") - 1) == 0)) {
-		return -EINVAL;
+		if (phba->nvmet_support)
+			phba->cpucheck_on |= LPFC_CHECK_NVMET_RCV;
+		else
+			return -EINVAL;
+		return strlen(pbuf);
 	} else if ((strncmp(pbuf, "off",
 		   sizeof("off") - 1) == 0)) {
 		phba->cpucheck_on = LPFC_CHECK_OFF;
diff --git a/drivers/scsi/lpfc/lpfc_disc.h b/drivers/scsi/lpfc/lpfc_disc.h
index 13cc1d19b33653..e305e97e05a94a 100644
--- a/drivers/scsi/lpfc/lpfc_disc.h
+++ b/drivers/scsi/lpfc/lpfc_disc.h
@@ -148,6 +148,7 @@ struct lpfc_node_rrq {
 /* Defines for nlp_flag (uint32) */
 #define NLP_IGNR_REG_CMPL  0x00000001 /* Rcvd rscn before we cmpl reg login */
 #define NLP_REG_LOGIN_SEND 0x00000002   /* sent reglogin to adapter */
+#define NLP_SUPPRESS_RSP   0x00000010	/* Remote NPort supports suppress rsp */
 #define NLP_PLOGI_SND      0x00000020	/* sent PLOGI request for this entry */
 #define NLP_PRLI_SND       0x00000040	/* sent PRLI request for this entry */
 #define NLP_ADISC_SND      0x00000080	/* sent ADISC request for this entry */
diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h
index fcc083cc00e0f5..9bee888b7dc4cb 100644
--- a/drivers/scsi/lpfc/lpfc_hw4.h
+++ b/drivers/scsi/lpfc/lpfc_hw4.h
@@ -3742,9 +3742,18 @@ struct wqe_common {
 #define LPFC_ELS_ID_FDISC	2
 #define LPFC_ELS_ID_LOGO	1
 #define LPFC_ELS_ID_DEFAULT	0
+#define wqe_irsp_SHIFT        4
+#define wqe_irsp_MASK         0x00000001
+#define wqe_irsp_WORD         word11
+#define wqe_sup_SHIFT         6
+#define wqe_sup_MASK          0x00000001
+#define wqe_sup_WORD          word11
 #define wqe_wqec_SHIFT        7
 #define wqe_wqec_MASK         0x00000001
 #define wqe_wqec_WORD         word11
+#define wqe_irsplen_SHIFT     8
+#define wqe_irsplen_MASK      0x0000000f
+#define wqe_irsplen_WORD      word11
 #define wqe_cqid_SHIFT        16
 #define wqe_cqid_MASK         0x0000ffff
 #define wqe_cqid_WORD         word11
@@ -4037,6 +4046,35 @@ struct fcp_icmnd64_wqe {
 	uint32_t rsvd_12_15[4];        /* word 12-15 */
 };
 
+struct fcp_trsp64_wqe {
+	struct ulp_bde64 bde;
+	uint32_t response_len;
+	uint32_t rsvd_4_5[2];
+	struct wqe_common wqe_com;      /* words 6-11 */
+	uint32_t rsvd_12_15[4];         /* word 12-15 */
+};
+
+struct fcp_tsend64_wqe {
+	struct ulp_bde64 bde;
+	uint32_t payload_offset_len;
+	uint32_t relative_offset;
+	uint32_t reserved;
+	struct wqe_common wqe_com;     /* words 6-11 */
+	uint32_t fcp_data_len;         /* word 12 */
+	uint32_t rsvd_13_15[3];        /* word 13-15 */
+};
+
+struct fcp_treceive64_wqe {
+	struct ulp_bde64 bde;
+	uint32_t payload_offset_len;
+	uint32_t relative_offset;
+	uint32_t reserved;
+	struct wqe_common wqe_com;     /* words 6-11 */
+	uint32_t fcp_data_len;         /* word 12 */
+	uint32_t rsvd_13_15[3];        /* word 13-15 */
+};
+#define TXRDY_PAYLOAD_LEN      12
+
 
 union lpfc_wqe {
 	uint32_t words[16];
@@ -4052,6 +4090,10 @@ union lpfc_wqe {
 	struct xmit_els_rsp64_wqe xmit_els_rsp;
 	struct els_request64_wqe els_req;
 	struct gen_req64_wqe gen_req;
+	struct fcp_trsp64_wqe fcp_trsp;
+	struct fcp_tsend64_wqe fcp_tsend;
+	struct fcp_treceive64_wqe fcp_treceive;
+
 };
 
 union lpfc_wqe128 {
@@ -4060,6 +4102,9 @@ union lpfc_wqe128 {
 	struct fcp_icmnd64_wqe fcp_icmd;
 	struct fcp_iread64_wqe fcp_iread;
 	struct fcp_iwrite64_wqe fcp_iwrite;
+	struct fcp_trsp64_wqe fcp_trsp;
+	struct fcp_tsend64_wqe fcp_tsend;
+	struct fcp_treceive64_wqe fcp_treceive;
 	struct xmit_seq64_wqe xmit_sequence;
 	struct gen_req64_wqe gen_req;
 };
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 474bafc63055a1..03fa3f02e33e58 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -73,6 +73,7 @@ static int lpfc_create_bootstrap_mbox(struct lpfc_hba *);
 static int lpfc_setup_endian_order(struct lpfc_hba *);
 static void lpfc_destroy_bootstrap_mbox(struct lpfc_hba *);
 static void lpfc_free_els_sgl_list(struct lpfc_hba *);
+static void lpfc_free_nvmet_sgl_list(struct lpfc_hba *);
 static void lpfc_init_sgl_list(struct lpfc_hba *);
 static int lpfc_init_active_sgl_array(struct lpfc_hba *);
 static void lpfc_free_active_sgl(struct lpfc_hba *);
@@ -88,6 +89,7 @@ static void lpfc_sli4_oas_verify(struct lpfc_hba *phba);
 static struct scsi_transport_template *lpfc_transport_template = NULL;
 static struct scsi_transport_template *lpfc_vport_transport_template = NULL;
 static DEFINE_IDR(lpfc_hba_index);
+#define LPFC_NVMET_BUF_POST 254
 
 /**
  * lpfc_config_port_prep - Perform lpfc initialization prior to config port
@@ -1023,10 +1025,17 @@ lpfc_hba_down_post_s4(struct lpfc_hba *phba)
 	list_for_each_entry(sglq_entry,
 		&phba->sli4_hba.lpfc_abts_els_sgl_list, list)
 		sglq_entry->state = SGL_FREED;
+	list_for_each_entry(sglq_entry,
+		&phba->sli4_hba.lpfc_abts_nvmet_sgl_list, list)
+		sglq_entry->state = SGL_FREED;
 
 	list_splice_init(&phba->sli4_hba.lpfc_abts_els_sgl_list,
 			&phba->sli4_hba.lpfc_els_sgl_list);
 
+	if (phba->sli4_hba.nvme_wq)
+		list_splice_init(&phba->sli4_hba.lpfc_abts_nvmet_sgl_list,
+				 &phba->sli4_hba.lpfc_nvmet_sgl_list);
+
 	spin_unlock(&phba->sli4_hba.sgl_list_lock);
 	/* abts_scsi_buf_list_lock required because worker thread uses this
 	 * list.
@@ -3320,6 +3329,128 @@ lpfc_sli4_els_sgl_update(struct lpfc_hba *phba)
 	return rc;
 }
 
+/**
+ * lpfc_sli4_nvmet_sgl_update - update xri-sgl sizing and mapping
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine first calculates the sizes of the current els and allocated
+ * scsi sgl lists, and then goes through all sgls to updates the physical
+ * XRIs assigned due to port function reset. During port initialization, the
+ * current els and allocated scsi sgl lists are 0s.
+ *
+ * Return codes
+ *   0 - successful (for now, it always returns 0)
+ **/
+int
+lpfc_sli4_nvmet_sgl_update(struct lpfc_hba *phba)
+{
+	struct lpfc_sglq *sglq_entry = NULL, *sglq_entry_next = NULL;
+	uint16_t i, lxri, xri_cnt, els_xri_cnt;
+	uint16_t nvmet_xri_cnt, tot_cnt;
+	LIST_HEAD(nvmet_sgl_list);
+	int rc;
+
+	/*
+	 * update on pci function's nvmet xri-sgl list
+	 */
+	els_xri_cnt = lpfc_sli4_get_els_iocb_cnt(phba);
+	nvmet_xri_cnt = 0;
+	tot_cnt = phba->sli4_hba.max_cfg_param.max_xri - els_xri_cnt;
+
+	if (nvmet_xri_cnt > phba->sli4_hba.nvmet_xri_cnt) {
+		/* els xri-sgl expanded */
+		xri_cnt = nvmet_xri_cnt - phba->sli4_hba.nvmet_xri_cnt;
+		lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
+				"6302 NVMET xri-sgl cnt grew from %d to %d\n",
+				phba->sli4_hba.nvmet_xri_cnt, nvmet_xri_cnt);
+		/* allocate the additional nvmet sgls */
+		for (i = 0; i < xri_cnt; i++) {
+			sglq_entry = kzalloc(sizeof(struct lpfc_sglq),
+					     GFP_KERNEL);
+			if (sglq_entry == NULL) {
+				lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+						"6303 Failure to allocate an "
+						"NVMET sgl entry:%d\n", i);
+				rc = -ENOMEM;
+				goto out_free_mem;
+			}
+			sglq_entry->buff_type = NVMET_BUFF_TYPE;
+			sglq_entry->virt = lpfc_nvmet_buf_alloc(phba, 0,
+							   &sglq_entry->phys);
+			if (sglq_entry->virt == NULL) {
+				kfree(sglq_entry);
+				lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+						"6304 Failure to allocate an "
+						"NVMET buf:%d\n", i);
+				rc = -ENOMEM;
+				goto out_free_mem;
+			}
+			sglq_entry->sgl = sglq_entry->virt;
+			memset(sglq_entry->sgl, 0,
+			       phba->cfg_sg_dma_buf_size);
+			sglq_entry->state = SGL_FREED;
+			list_add_tail(&sglq_entry->list, &nvmet_sgl_list);
+		}
+		spin_lock_irq(&phba->hbalock);
+		spin_lock(&phba->sli4_hba.sgl_list_lock);
+		list_splice_init(&nvmet_sgl_list,
+				 &phba->sli4_hba.lpfc_nvmet_sgl_list);
+		spin_unlock(&phba->sli4_hba.sgl_list_lock);
+		spin_unlock_irq(&phba->hbalock);
+	} else if (nvmet_xri_cnt < phba->sli4_hba.nvmet_xri_cnt) {
+		/* nvmet xri-sgl shrunk */
+		xri_cnt = phba->sli4_hba.nvmet_xri_cnt - nvmet_xri_cnt;
+		lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
+				"6305 NVMET xri-sgl count decreased from "
+				"%d to %d\n", phba->sli4_hba.nvmet_xri_cnt,
+				nvmet_xri_cnt);
+		spin_lock_irq(&phba->hbalock);
+		spin_lock(&phba->sli4_hba.sgl_list_lock);
+		list_splice_init(&phba->sli4_hba.lpfc_nvmet_sgl_list,
+				 &nvmet_sgl_list);
+		/* release extra nvmet sgls from list */
+		for (i = 0; i < xri_cnt; i++) {
+			list_remove_head(&nvmet_sgl_list,
+					 sglq_entry, struct lpfc_sglq, list);
+			if (sglq_entry) {
+				lpfc_nvmet_buf_free(phba, sglq_entry->virt,
+						    sglq_entry->phys);
+				kfree(sglq_entry);
+			}
+		}
+		list_splice_init(&nvmet_sgl_list,
+				 &phba->sli4_hba.lpfc_nvmet_sgl_list);
+		spin_unlock(&phba->sli4_hba.sgl_list_lock);
+		spin_unlock_irq(&phba->hbalock);
+	} else
+		lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
+				"6306 NVMET xri-sgl count unchanged: %d\n",
+				nvmet_xri_cnt);
+	phba->sli4_hba.nvmet_xri_cnt = nvmet_xri_cnt;
+
+	/* update xris to nvmet sgls on the list */
+	sglq_entry = NULL;
+	sglq_entry_next = NULL;
+	list_for_each_entry_safe(sglq_entry, sglq_entry_next,
+				 &phba->sli4_hba.lpfc_nvmet_sgl_list, list) {
+		lxri = lpfc_sli4_next_xritag(phba);
+		if (lxri == NO_XRI) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+					"6307 Failed to allocate xri for "
+					"NVMET sgl\n");
+			rc = -ENOMEM;
+			goto out_free_mem;
+		}
+		sglq_entry->sli4_lxritag = lxri;
+		sglq_entry->sli4_xritag = phba->sli4_hba.xri_ids[lxri];
+	}
+	return 0;
+
+out_free_mem:
+	lpfc_free_nvmet_sgl_list(phba);
+	return rc;
+}
+
 /**
  * lpfc_sli4_scsi_sgl_update - update xri-sgl sizing and mapping
  * @phba: pointer to lpfc hba data structure.
@@ -5228,11 +5359,12 @@ lpfc_setup_driver_resource_phase1(struct lpfc_hba *phba)
 	init_waitqueue_head(&phba->work_waitq);
 
 	lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-			"1403 Protocols supported %s %s\n",
+			"1403 Protocols supported %s %s %s\n",
 			((phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP) ?
 				"SCSI" : " "),
 			((phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) ?
-				"NVME" : " "));
+				"NVME" : " "),
+			(phba->nvmet_support ? "NVMET" : " "));
 
 	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP) {
 		/* Initialize the scsi buffer list used by driver for scsi IO */
@@ -5447,11 +5579,13 @@ static int
 lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 {
 	LPFC_MBOXQ_t *mboxq;
+	MAILBOX_t *mb;
 	int rc, i, max_buf_size;
 	uint8_t pn_page[LPFC_MAX_SUPPORTED_PAGES] = {0};
 	struct lpfc_mqe *mqe;
 	int longs;
 	int fof_vectors = 0;
+	uint64_t wwn;
 
 	phba->sli4_hba.num_online_cpu = num_online_cpus();
 	phba->sli4_hba.num_present_cpu = lpfc_present_cpu;
@@ -5597,6 +5731,7 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 
 	/* This abort list used by worker thread */
 	spin_lock_init(&phba->sli4_hba.sgl_list_lock);
+	spin_lock_init(&phba->sli4_hba.nvmet_io_lock);
 
 	/*
 	 * Initialize driver internal slow-path work queues
@@ -5673,7 +5808,43 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 		goto out_free_bsmbx;
 	}
 
+	/* Check for NVMET being configured */
 	phba->nvmet_support = 0;
+	if (lpfc_enable_nvmet_cnt) {
+
+		/* First get WWN of HBA instance */
+		lpfc_read_nv(phba, mboxq);
+		rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL);
+		if (rc != MBX_SUCCESS) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+					"6016 Mailbox failed , mbxCmd x%x "
+					"READ_NV, mbxStatus x%x\n",
+					bf_get(lpfc_mqe_command, &mboxq->u.mqe),
+					bf_get(lpfc_mqe_status, &mboxq->u.mqe));
+			rc = -EIO;
+			goto out_free_bsmbx;
+		}
+		mb = &mboxq->u.mb;
+		memcpy(&wwn, (char *)mb->un.varRDnvp.nodename,
+		       sizeof(uint64_t));
+		wwn = cpu_to_be64(wwn);
+		phba->sli4_hba.wwnn.u.name = wwn;
+		memcpy(&wwn, (char *)mb->un.varRDnvp.portname,
+		       sizeof(uint64_t));
+		/* wwn is WWPN of HBA instance */
+		wwn = cpu_to_be64(wwn);
+		phba->sli4_hba.wwpn.u.name = wwn;
+
+		/* Check to see if it matches any module parameter */
+		for (i = 0; i < lpfc_enable_nvmet_cnt; i++) {
+			if (wwn == lpfc_enable_nvmet[i]) {
+				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+						"6017 NVME Target %016llx\n",
+						wwn);
+				phba->nvmet_support = 1; /* a match */
+			}
+		}
+	}
 
 	lpfc_nvme_mod_param_dep(phba);
 
@@ -5869,6 +6040,7 @@ lpfc_sli4_driver_resource_unset(struct lpfc_hba *phba)
 	/* Free the ELS sgl list */
 	lpfc_free_active_sgl(phba);
 	lpfc_free_els_sgl_list(phba);
+	lpfc_free_nvmet_sgl_list(phba);
 
 	/* Free the completion queue EQ event pool */
 	lpfc_sli4_cq_event_release_all(phba);
@@ -6089,6 +6261,33 @@ lpfc_free_els_sgl_list(struct lpfc_hba *phba)
 	lpfc_free_sgl_list(phba, &sglq_list);
 }
 
+/**
+ * lpfc_free_nvmet_sgl_list - Free nvmet sgl list.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to free the driver's nvmet sgl list and memory.
+ **/
+static void
+lpfc_free_nvmet_sgl_list(struct lpfc_hba *phba)
+{
+	struct lpfc_sglq *sglq_entry = NULL, *sglq_next = NULL;
+	LIST_HEAD(sglq_list);
+
+	/* Retrieve all nvmet sgls from driver list */
+	spin_lock_irq(&phba->hbalock);
+	spin_lock(&phba->sli4_hba.sgl_list_lock);
+	list_splice_init(&phba->sli4_hba.lpfc_nvmet_sgl_list, &sglq_list);
+	spin_unlock(&phba->sli4_hba.sgl_list_lock);
+	spin_unlock_irq(&phba->hbalock);
+
+	/* Now free the sgl list */
+	list_for_each_entry_safe(sglq_entry, sglq_next, &sglq_list, list) {
+		list_del(&sglq_entry->list);
+		lpfc_nvmet_buf_free(phba, sglq_entry->virt, sglq_entry->phys);
+		kfree(sglq_entry);
+	}
+}
+
 /**
  * lpfc_init_active_sgl_array - Allocate the buf to track active ELS XRIs.
  * @phba: pointer to lpfc hba data structure.
@@ -6138,6 +6337,8 @@ lpfc_init_sgl_list(struct lpfc_hba *phba)
 	/* Initialize and populate the sglq list per host/VF. */
 	INIT_LIST_HEAD(&phba->sli4_hba.lpfc_els_sgl_list);
 	INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_els_sgl_list);
+	INIT_LIST_HEAD(&phba->sli4_hba.lpfc_nvmet_sgl_list);
+	INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_nvmet_sgl_list);
 
 	/* els xri-sgl book keeping */
 	phba->sli4_hba.els_xri_cnt = 0;
@@ -6416,6 +6617,22 @@ lpfc_create_shost(struct lpfc_hba *phba)
 	shost = lpfc_shost_from_vport(vport);
 	phba->pport = vport;
 
+	if (phba->nvmet_support) {
+		/* Only 1 vport (pport) will support NVME target */
+		if (phba->txrdy_payload_pool == NULL) {
+			phba->txrdy_payload_pool = pci_pool_create(
+				"txrdy_pool", phba->pcidev,
+				TXRDY_PAYLOAD_LEN, 16, 0);
+			if (phba->txrdy_payload_pool) {
+				phba->targetport = NULL;
+				phba->cfg_enable_fc4_type = LPFC_ENABLE_NVME;
+				lpfc_printf_log(phba, KERN_INFO,
+						LOG_INIT | LOG_NVME_DISC,
+						"6076 NVME Target Found\n");
+			}
+		}
+	}
+
 	lpfc_debugfs_initialize(vport);
 	/* Put reference to SCSI host to driver's device private data */
 	pci_set_drvdata(phba->pcidev, shost);
@@ -7459,7 +7676,7 @@ lpfc_sli4_queue_verify(struct lpfc_hba *phba)
 		phba->cfg_nvme_io_channel = io_channel;
 
 	lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-			"2574 IRQs: %d, IO Channels: fcp %d nvme %d\n",
+			"2574 IO channels: irqs %d fcp %d nvme %d\n",
 			phba->io_channel_irqs, phba->cfg_fcp_io_channel,
 			phba->cfg_nvme_io_channel);
 
@@ -9164,8 +9381,9 @@ lpfc_sli4_enable_msix(struct lpfc_hba *phba)
 	if (phba->cfg_fof)
 		vectors++;
 
-	rc = pci_alloc_irq_vectors(phba->pcidev, 2, vectors,
-				PCI_IRQ_MSIX | PCI_IRQ_AFFINITY);
+	rc = pci_alloc_irq_vectors(phba->pcidev,
+				(phba->nvmet_support) ? 1 : 2,
+				vectors, PCI_IRQ_MSIX | PCI_IRQ_AFFINITY);
 	if (rc < 0) {
 		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
 				"0484 PCI enable MSI-X failed (%d)\n", rc);
@@ -9447,6 +9665,8 @@ lpfc_sli4_xri_exchange_busy_wait(struct lpfc_hba *phba)
 	int nvme_xri_cmpl = 1;
 	int fcp_xri_cmpl = 1;
 	int els_xri_cmpl = list_empty(&phba->sli4_hba.lpfc_abts_els_sgl_list);
+	int nvmet_xri_cmpl =
+			list_empty(&phba->sli4_hba.lpfc_abts_nvmet_sgl_list);
 
 	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP)
 		fcp_xri_cmpl =
@@ -9455,7 +9675,8 @@ lpfc_sli4_xri_exchange_busy_wait(struct lpfc_hba *phba)
 		nvme_xri_cmpl =
 			list_empty(&phba->sli4_hba.lpfc_abts_nvme_buf_list);
 
-	while (!fcp_xri_cmpl || !els_xri_cmpl || !nvme_xri_cmpl) {
+	while (!fcp_xri_cmpl || !els_xri_cmpl || !nvme_xri_cmpl ||
+	       !nvmet_xri_cmpl) {
 		if (wait_time > LPFC_XRI_EXCH_BUSY_WAIT_TMO) {
 			if (!nvme_xri_cmpl)
 				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
@@ -9488,6 +9709,9 @@ lpfc_sli4_xri_exchange_busy_wait(struct lpfc_hba *phba)
 
 		els_xri_cmpl =
 			list_empty(&phba->sli4_hba.lpfc_abts_els_sgl_list);
+
+		nvmet_xri_cmpl =
+			list_empty(&phba->sli4_hba.lpfc_abts_nvmet_sgl_list);
 	}
 }
 
@@ -9725,6 +9949,9 @@ lpfc_get_sli4_parameters(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq)
 		phba->cfg_enable_fc4_type = LPFC_ENABLE_FCP;
 	}
 
+	if (bf_get(cfg_xib, mbx_sli4_parameters) && phba->cfg_suppress_rsp)
+		phba->sli.sli_flag |= LPFC_SLI_SUPPRESS_RSP;
+
 	/* Make sure that sge_supp_len can be handled by the driver */
 	if (sli4_params->sge_supp_len > LPFC_MAX_SGE_SIZE)
 		sli4_params->sge_supp_len = LPFC_MAX_SGE_SIZE;
@@ -10376,13 +10603,15 @@ lpfc_sli4_get_els_iocb_cnt(struct lpfc_hba *phba)
  * lpfc_sli4_get_iocb_cnt - Calculate the # of total IOCBs to reserve
  * @phba: pointer to lpfc hba data structure.
  *
- * returns the number of ELS/CT
+ * returns the number of ELS/CT + NVMET IOCBs to reserve
  **/
 int
 lpfc_sli4_get_iocb_cnt(struct lpfc_hba *phba)
 {
 	int max_xri = lpfc_sli4_get_els_iocb_cnt(phba);
 
+	if (phba->nvmet_support)
+		max_xri += LPFC_NVMET_BUF_POST;
 	return max_xri;
 }
 
@@ -10755,6 +10984,7 @@ lpfc_pci_remove_one_s4(struct pci_dev *pdev)
 	/* Remove FC host and then SCSI host with the physical port */
 	fc_remove_host(shost);
 	scsi_remove_host(shost);
+	/* todo: tgt: remove targetport */
 
 	/* Perform ndlp cleanup on the physical port.  The nvme localport
 	 * is destroyed after to ensure all rports are io-disabled.
diff --git a/drivers/scsi/lpfc/lpfc_mem.c b/drivers/scsi/lpfc/lpfc_mem.c
index c65a1ec3d2e48f..32db255f521647 100644
--- a/drivers/scsi/lpfc/lpfc_mem.c
+++ b/drivers/scsi/lpfc/lpfc_mem.c
@@ -40,6 +40,7 @@
 #include "lpfc.h"
 #include "lpfc_scsi.h"
 #include "lpfc_nvme.h"
+#include "lpfc_nvmet.h"
 #include "lpfc_crtn.h"
 #include "lpfc_logmsg.h"
 
@@ -441,6 +442,44 @@ lpfc_mbuf_free(struct lpfc_hba * phba, void *virt, dma_addr_t dma)
 	return;
 }
 
+/**
+ * lpfc_nvmet_buf_alloc - Allocate an nvmet_buf from the
+ * lpfc_sg_dma_buf_pool PCI pool
+ * @phba: HBA which owns the pool to allocate from
+ * @mem_flags: indicates if this is a priority (MEM_PRI) allocation
+ * @handle: used to return the DMA-mapped address of the nvmet_buf
+ *
+ * Description: Allocates a DMA-mapped buffer from the lpfc_sg_dma_buf_pool
+ * PCI pool.  Allocates from generic pci_pool_alloc function.
+ *
+ * Returns:
+ *   pointer to the allocated nvmet_buf on success
+ *   NULL on failure
+ **/
+void *
+lpfc_nvmet_buf_alloc(struct lpfc_hba *phba, int mem_flags, dma_addr_t *handle)
+{
+	void *ret;
+
+	ret = pci_pool_alloc(phba->lpfc_sg_dma_buf_pool, GFP_KERNEL, handle);
+	return ret;
+}
+
+/**
+ * lpfc_nvmet_buf_free - Free an nvmet_buf from the lpfc_sg_dma_buf_pool
+ * PCI pool
+ * @phba: HBA which owns the pool to return to
+ * @virt: nvmet_buf to free
+ * @dma: the DMA-mapped address of the lpfc_sg_dma_buf_pool to be freed
+ *
+ * Returns: None
+ **/
+void
+lpfc_nvmet_buf_free(struct lpfc_hba *phba, void *virt, dma_addr_t dma)
+{
+	pci_pool_free(phba->lpfc_sg_dma_buf_pool, virt, dma);
+}
+
 /**
  * lpfc_els_hbq_alloc - Allocate an HBQ buffer
  * @phba: HBA to allocate HBQ buffer for
@@ -553,6 +592,134 @@ lpfc_sli4_rb_free(struct lpfc_hba *phba, struct hbq_dmabuf *dmab)
 	kfree(dmab);
 }
 
+/**
+ * lpfc_sli4_nvmet_alloc - Allocate an SLI4 Receive buffer
+ * @phba: HBA to allocate a receive buffer for
+ *
+ * Description: Allocates a DMA-mapped receive buffer from the lpfc_hrb_pool PCI
+ * pool along a non-DMA-mapped container for it.
+ *
+ * Notes: Not interrupt-safe.  Must be called with no locks held.
+ *
+ * Returns:
+ *   pointer to HBQ on success
+ *   NULL on failure
+ **/
+struct rqb_dmabuf *
+lpfc_sli4_nvmet_alloc(struct lpfc_hba *phba)
+{
+	struct rqb_dmabuf *dma_buf;
+	struct lpfc_iocbq *nvmewqe;
+	union lpfc_wqe128 *wqe;
+
+	dma_buf = kzalloc(sizeof(struct rqb_dmabuf), GFP_KERNEL);
+	if (!dma_buf)
+		return NULL;
+
+	dma_buf->hbuf.virt = pci_pool_alloc(phba->lpfc_hrb_pool, GFP_KERNEL,
+					    &dma_buf->hbuf.phys);
+	if (!dma_buf->hbuf.virt) {
+		kfree(dma_buf);
+		return NULL;
+	}
+	dma_buf->dbuf.virt = pci_pool_alloc(phba->lpfc_drb_pool, GFP_KERNEL,
+					    &dma_buf->dbuf.phys);
+	if (!dma_buf->dbuf.virt) {
+		pci_pool_free(phba->lpfc_hrb_pool, dma_buf->hbuf.virt,
+			      dma_buf->hbuf.phys);
+		kfree(dma_buf);
+		return NULL;
+	}
+	dma_buf->total_size = LPFC_DATA_BUF_SIZE;
+
+	dma_buf->context = kzalloc(sizeof(struct lpfc_nvmet_rcv_ctx),
+				   GFP_KERNEL);
+	if (!dma_buf->context) {
+		pci_pool_free(phba->lpfc_drb_pool, dma_buf->dbuf.virt,
+			      dma_buf->dbuf.phys);
+		pci_pool_free(phba->lpfc_hrb_pool, dma_buf->hbuf.virt,
+			      dma_buf->hbuf.phys);
+		kfree(dma_buf);
+		return NULL;
+	}
+
+	dma_buf->iocbq = lpfc_sli_get_iocbq(phba);
+	dma_buf->iocbq->iocb_flag = LPFC_IO_NVMET;
+	if (!dma_buf->iocbq) {
+		kfree(dma_buf->context);
+		pci_pool_free(phba->lpfc_drb_pool, dma_buf->dbuf.virt,
+			      dma_buf->dbuf.phys);
+		pci_pool_free(phba->lpfc_hrb_pool, dma_buf->hbuf.virt,
+			      dma_buf->hbuf.phys);
+		kfree(dma_buf);
+		lpfc_printf_log(phba, KERN_ERR, LOG_NVME,
+				"2621 Ran out of nvmet iocb/WQEs\n");
+		return NULL;
+	}
+	nvmewqe = dma_buf->iocbq;
+	wqe = (union lpfc_wqe128 *)&nvmewqe->wqe;
+	/* Initialize WQE */
+	memset(wqe, 0, sizeof(union lpfc_wqe));
+	/* Word 7 */
+	bf_set(wqe_ct, &wqe->generic.wqe_com, SLI4_CT_RPI);
+	bf_set(wqe_class, &wqe->generic.wqe_com, CLASS3);
+	bf_set(wqe_pu, &wqe->generic.wqe_com, 1);
+	/* Word 10 */
+	bf_set(wqe_nvme, &wqe->fcp_tsend.wqe_com, 1);
+	bf_set(wqe_ebde_cnt, &wqe->generic.wqe_com, 0);
+	bf_set(wqe_qosd, &wqe->generic.wqe_com, 0);
+
+	dma_buf->iocbq->context1 = NULL;
+	spin_lock(&phba->sli4_hba.sgl_list_lock);
+	dma_buf->sglq = __lpfc_sli_get_nvmet_sglq(phba, dma_buf->iocbq);
+	spin_unlock(&phba->sli4_hba.sgl_list_lock);
+	if (!dma_buf->sglq) {
+		lpfc_sli_release_iocbq(phba, dma_buf->iocbq);
+		kfree(dma_buf->context);
+		pci_pool_free(phba->lpfc_drb_pool, dma_buf->dbuf.virt,
+			      dma_buf->dbuf.phys);
+		pci_pool_free(phba->lpfc_hrb_pool, dma_buf->hbuf.virt,
+			      dma_buf->hbuf.phys);
+		kfree(dma_buf);
+		lpfc_printf_log(phba, KERN_ERR, LOG_NVME,
+				"6132 Ran out of nvmet XRIs\n");
+		return NULL;
+	}
+	return dma_buf;
+}
+
+/**
+ * lpfc_sli4_nvmet_free - Frees a receive buffer
+ * @phba: HBA buffer was allocated for
+ * @dmab: DMA Buffer container returned by lpfc_sli4_rbq_alloc
+ *
+ * Description: Frees both the container and the DMA-mapped buffers returned by
+ * lpfc_sli4_nvmet_alloc.
+ *
+ * Notes: Can be called with or without locks held.
+ *
+ * Returns: None
+ **/
+void
+lpfc_sli4_nvmet_free(struct lpfc_hba *phba, struct rqb_dmabuf *dmab)
+{
+	unsigned long flags;
+
+	__lpfc_clear_active_sglq(phba, dmab->sglq->sli4_lxritag);
+	dmab->sglq->state = SGL_FREED;
+	dmab->sglq->ndlp = NULL;
+
+	spin_lock_irqsave(&phba->sli4_hba.sgl_list_lock, flags);
+	list_add_tail(&dmab->sglq->list, &phba->sli4_hba.lpfc_nvmet_sgl_list);
+	spin_unlock_irqrestore(&phba->sli4_hba.sgl_list_lock, flags);
+
+	lpfc_sli_release_iocbq(phba, dmab->iocbq);
+	kfree(dmab->context);
+	pci_pool_free(phba->lpfc_hrb_pool, dmab->hbuf.virt, dmab->hbuf.phys);
+	pci_pool_free(phba->lpfc_drb_pool, dmab->dbuf.virt, dmab->dbuf.phys);
+	kfree(dmab);
+}
+
 /**
  * lpfc_in_buf_free - Free a DMA buffer
  * @phba: HBA buffer is associated with
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.h b/drivers/scsi/lpfc/lpfc_nvmet.h
new file mode 100644
index 00000000000000..f7b6a3f374a451
--- /dev/null
+++ b/drivers/scsi/lpfc/lpfc_nvmet.h
@@ -0,0 +1,101 @@
+/*******************************************************************
+ * This file is part of the Emulex Linux Device Driver for         *
+ * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
+ * EMULEX and SLI are trademarks of Emulex.                        *
+ * www.emulex.com                                                  *
+ * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
+ *                                                                 *
+ * This program is free software; you can redistribute it and/or   *
+ * modify it under the terms of version 2 of the GNU General       *
+ * Public License as published by the Free Software Foundation.    *
+ * This program is distributed in the hope that it will be useful. *
+ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND          *
+ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY,  *
+ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE      *
+ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
+ * TO BE LEGALLY INVALID.  See the GNU General Public License for  *
+ * more details, a copy of which can be found in the file COPYING  *
+ * included with this package.                                     *
+ ********************************************************************/
+
+#define LPFC_NVMET_MIN_SEGS		16
+#define LPFC_NVMET_DEFAULT_SEGS		64	/* 256K IOs */
+#define LPFC_NVMET_MAX_SEGS		510
+#define LPFC_NVMET_SUCCESS_LEN	12
+
+/* Used for NVME Target */
+struct lpfc_nvmet_tgtport {
+	struct lpfc_hba *phba;
+	struct completion tport_unreg_done;
+
+	/* Stats counters - lpfc_nvmet_unsol_ls_buffer */
+	atomic_t rcv_ls_req_in;
+	atomic_t rcv_ls_req_out;
+	atomic_t rcv_ls_req_drop;
+	atomic_t xmt_ls_abort;
+
+	/* Stats counters - lpfc_nvmet_xmt_ls_rsp */
+	atomic_t xmt_ls_rsp;
+	atomic_t xmt_ls_drop;
+
+	/* Stats counters - lpfc_nvmet_xmt_ls_rsp_cmp */
+	atomic_t xmt_ls_rsp_error;
+	atomic_t xmt_ls_rsp_cmpl;
+
+	/* Stats counters - lpfc_nvmet_unsol_fcp_buffer */
+	atomic_t rcv_fcp_cmd_in;
+	atomic_t rcv_fcp_cmd_out;
+	atomic_t rcv_fcp_cmd_drop;
+
+	/* Stats counters - lpfc_nvmet_xmt_fcp_op */
+	atomic_t xmt_fcp_abort;
+	atomic_t xmt_fcp_drop;
+	atomic_t xmt_fcp_read_rsp;
+	atomic_t xmt_fcp_read;
+	atomic_t xmt_fcp_write;
+	atomic_t xmt_fcp_rsp;
+
+	/* Stats counters - lpfc_nvmet_xmt_fcp_op_cmp */
+	atomic_t xmt_fcp_rsp_cmpl;
+	atomic_t xmt_fcp_rsp_error;
+	atomic_t xmt_fcp_rsp_drop;
+
+
+	/* Stats counters - lpfc_nvmet_unsol_issue_abort */
+	atomic_t xmt_abort_rsp;
+	atomic_t xmt_abort_rsp_error;
+
+	/* Stats counters - lpfc_nvmet_xmt_abort_cmp */
+	atomic_t xmt_abort_cmpl;
+};
+
+struct lpfc_nvmet_rcv_ctx {
+	union {
+		struct nvmefc_tgt_ls_req ls_req;
+		struct nvmefc_tgt_fcp_req fcp_req;
+	} ctx;
+	struct lpfc_hba *phba;
+	struct lpfc_iocbq *wqeq;
+	struct lpfc_iocbq *abort_wqeq;
+	dma_addr_t txrdy_phys;
+	uint32_t *txrdy;
+	uint32_t sid;
+	uint32_t offset;
+	uint16_t oxid;
+	uint16_t size;
+	uint16_t entry_cnt;
+	uint16_t cpu;
+	uint16_t state;
+	/* States */
+#define LPFC_NVMET_STE_FREE		0
+#define LPFC_NVMET_STE_RCV		1
+#define LPFC_NVMET_STE_DATA		2
+#define LPFC_NVMET_STE_ABORT		3
+#define LPFC_NVMET_STE_RSP		4
+#define LPFC_NVMET_STE_DONE		5
+	uint16_t flag;
+#define LPFC_NVMET_IO_INP		1
+#define LPFC_NVMET_ABORT_OP		2
+	struct rqb_dmabuf *rqb_buffer;
+};
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 5f6ad67d5a73a1..f2cd0f37e03f7f 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -45,6 +45,7 @@
 #include "lpfc.h"
 #include "lpfc_scsi.h"
 #include "lpfc_nvme.h"
+#include "lpfc_nvmet.h"
 #include "lpfc_crtn.h"
 #include "lpfc_logmsg.h"
 #include "lpfc_compat.h"
@@ -975,6 +976,34 @@ __lpfc_sli_get_els_sglq(struct lpfc_hba *phba, struct lpfc_iocbq *piocbq)
 	return sglq;
 }
 
+/**
+ * __lpfc_sli_get_nvmet_sglq - Allocates an iocb object from sgl pool
+ * @phba: Pointer to HBA context object.
+ * @piocb: Pointer to the iocbq.
+ *
+ * This function is called with the sgl_list lock held. This function
+ * gets a new driver sglq object from the sglq list. If the
+ * list is not empty then it is successful, it returns pointer to the newly
+ * allocated sglq object else it returns NULL.
+ **/
+struct lpfc_sglq *
+__lpfc_sli_get_nvmet_sglq(struct lpfc_hba *phba, struct lpfc_iocbq *piocbq)
+{
+	struct list_head *lpfc_nvmet_sgl_list;
+	struct lpfc_sglq *sglq = NULL;
+
+	lpfc_nvmet_sgl_list = &phba->sli4_hba.lpfc_nvmet_sgl_list;
+
+	lockdep_assert_held(&phba->sli4_hba.sgl_list_lock);
+
+	list_remove_head(lpfc_nvmet_sgl_list, sglq, struct lpfc_sglq, list);
+	if (!sglq)
+		return NULL;
+	phba->sli4_hba.lpfc_sglq_active_list[sglq->sli4_lxritag] = sglq;
+	sglq->state = SGL_ALLOCATED;
+	return sglq;
+}
+
 /**
  * lpfc_sli_get_iocbq - Allocates an iocb object from iocb pool
  * @phba: Pointer to HBA context object.
@@ -1031,6 +1060,18 @@ __lpfc_sli_release_iocbq_s4(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq)
 
 
 	if (sglq)  {
+		if (iocbq->iocb_flag & LPFC_IO_NVMET) {
+			spin_lock_irqsave(&phba->sli4_hba.sgl_list_lock,
+					  iflag);
+			sglq->state = SGL_FREED;
+			sglq->ndlp = NULL;
+			list_add_tail(&sglq->list,
+				      &phba->sli4_hba.lpfc_nvmet_sgl_list);
+			spin_unlock_irqrestore(
+				&phba->sli4_hba.sgl_list_lock, iflag);
+			goto out;
+		}
+
 		pring = phba->sli4_hba.els_wq->pring;
 		if ((iocbq->iocb_flag & LPFC_EXCHANGE_BUSY) &&
 			(sglq->state != SGL_XRI_ABORTED)) {
@@ -1056,13 +1097,15 @@ __lpfc_sli_release_iocbq_s4(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq)
 		}
 	}
 
+out:
 	/*
 	 * Clean all volatile data fields, preserve iotag and node struct.
 	 */
 	memset((char *)iocbq + start_clean, 0, sizeof(*iocbq) - start_clean);
 	iocbq->sli4_lxritag = NO_XRI;
 	iocbq->sli4_xritag = NO_XRI;
-	iocbq->iocb_flag &= ~(LPFC_IO_NVME | LPFC_IO_NVME_LS);
+	iocbq->iocb_flag &= ~(LPFC_IO_NVME | LPFC_IO_NVMET |
+			      LPFC_IO_NVME_LS);
 	list_add_tail(&iocbq->list, &phba->lpfc_iocb_list);
 }
 
@@ -2450,6 +2493,14 @@ lpfc_complete_unsol_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 {
 	int i;
 
+	switch (fch_type) {
+	case FC_TYPE_NVME:
+		/* todo: tgt: forward NVME LS to transport */
+		return 1;
+	default:
+		break;
+	}
+
 	/* unSolicited Responses */
 	if (pring->prt[0].profile) {
 		if (pring->prt[0].lpfc_sli_rcv_unsol_event)
@@ -6761,7 +6812,31 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
 	}
 	phba->sli4_hba.els_xri_cnt = rc;
 
-	if (phba->nvmet_support == 0) {
+	if (phba->nvmet_support) {
+		/* update host nvmet xri-sgl sizes and mappings */
+		rc = lpfc_sli4_nvmet_sgl_update(phba);
+		if (unlikely(rc)) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
+					"6308 Failed to update nvmet-sgl size "
+					"and mapping: %d\n", rc);
+			goto out_destroy_queue;
+		}
+
+		/* register the nvmet sgl pool to the port */
+		rc = lpfc_sli4_repost_sgl_list(
+			phba,
+			&phba->sli4_hba.lpfc_nvmet_sgl_list,
+			phba->sli4_hba.nvmet_xri_cnt);
+		if (unlikely(rc < 0)) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
+					"3117 Error %d during nvmet "
+					"sgl post\n", rc);
+			rc = -ENODEV;
+			goto out_destroy_queue;
+		}
+		phba->sli4_hba.nvmet_xri_cnt = rc;
+		/* todo: tgt: create targetport */
+	} else {
 		/* update host scsi xri-sgl sizes and mappings */
 		rc = lpfc_sli4_scsi_sgl_update(phba);
 		if (unlikely(rc)) {
@@ -13006,7 +13081,7 @@ lpfc_sli4_hba_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe,
 
 	if (phba->sli4_hba.nvme_cq_map &&
 	    (cqid == phba->sli4_hba.nvme_cq_map[qidx])) {
-		/* Process NVME command completion */
+		/* Process NVME / NVMET command completion */
 		cq = phba->sli4_hba.nvme_cq[qidx];
 		goto  process_cq;
 	}
@@ -17912,6 +17987,7 @@ lpfc_sli4_issue_wqe(struct lpfc_hba *phba, uint32_t ring_number,
 		    struct lpfc_iocbq *pwqe)
 {
 	union lpfc_wqe *wqe = &pwqe->wqe;
+	struct lpfc_nvmet_rcv_ctx *ctxp;
 	struct lpfc_queue *wq;
 	struct lpfc_sglq *sglq;
 	struct lpfc_sli_ring *pring;
@@ -17961,5 +18037,30 @@ lpfc_sli4_issue_wqe(struct lpfc_hba *phba, uint32_t ring_number,
 		return 0;
 	}
 
+	/* NVMET requests */
+	if (pwqe->iocb_flag & LPFC_IO_NVMET) {
+		/* Get the IO distribution (hba_wqidx) for WQ assignment. */
+		pring = phba->sli4_hba.nvme_wq[pwqe->hba_wqidx]->pring;
+
+		spin_lock_irqsave(&pring->ring_lock, iflags);
+		ctxp = pwqe->context2;
+		sglq = ctxp->rqb_buffer->sglq;
+		if (pwqe->sli4_xritag ==  NO_XRI) {
+			pwqe->sli4_lxritag = sglq->sli4_lxritag;
+			pwqe->sli4_xritag = sglq->sli4_xritag;
+		}
+		bf_set(wqe_xri_tag, &pwqe->wqe.xmit_bls_rsp.wqe_com,
+		       pwqe->sli4_xritag);
+		wq = phba->sli4_hba.nvme_wq[pwqe->hba_wqidx];
+		bf_set(wqe_cqid, &wqe->generic.wqe_com,
+		      phba->sli4_hba.nvme_cq[pwqe->hba_wqidx]->queue_id);
+		if (lpfc_sli4_wq_put(wq, wqe)) {
+			spin_unlock_irqrestore(&pring->ring_lock, iflags);
+			return WQE_ERROR;
+		}
+		lpfc_sli_ringtxcmpl_put(phba, pring, pwqe);
+		spin_unlock_irqrestore(&pring->ring_lock, iflags);
+		return 0;
+	}
 	return WQE_ERROR;
 }
diff --git a/drivers/scsi/lpfc/lpfc_sli.h b/drivers/scsi/lpfc/lpfc_sli.h
index 72520125251b5f..f6cea02adc7c2e 100644
--- a/drivers/scsi/lpfc/lpfc_sli.h
+++ b/drivers/scsi/lpfc/lpfc_sli.h
@@ -93,6 +93,7 @@ struct lpfc_iocbq {
 #define LPFC_PRLI_FCP_REQ	0x100000 /* This is an NVME PRLI. */
 #define LPFC_IO_NVME	        0x200000 /* NVME FCP command */
 #define LPFC_IO_NVME_LS		0x400000 /* NVME LS command */
+#define LPFC_IO_NVMET		0x800000 /* NVMET command */
 
 	uint32_t drvrTimeout;	/* driver timeout in seconds */
 	struct lpfc_vport *vport;/* virtual port pointer */
@@ -317,6 +318,7 @@ struct lpfc_sli {
 #define LPFC_BLOCK_MGMT_IO        0x800	/* Don't allow mgmt mbx or iocb cmds */
 #define LPFC_MENLO_MAINT          0x1000 /* need for menl fw download */
 #define LPFC_SLI_ASYNC_MBX_BLK    0x2000 /* Async mailbox is blocked */
+#define LPFC_SLI_SUPPRESS_RSP     0x4000 /* Suppress RSP feature is supported */
 
 	struct lpfc_sli_ring *sli3_ring;
 
diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h
index 9cb8508b4b4b6b..99546eaef087b5 100644
--- a/drivers/scsi/lpfc/lpfc_sli4.h
+++ b/drivers/scsi/lpfc/lpfc_sli4.h
@@ -109,6 +109,7 @@ enum lpfc_sli4_queue_subtype {
 	LPFC_FCP,
 	LPFC_ELS,
 	LPFC_NVME,
+	LPFC_NVMET,
 	LPFC_NVME_LS,
 	LPFC_USOL
 };
@@ -610,8 +611,11 @@ struct lpfc_sli4_hba {
 	uint16_t scsi_xri_cnt;
 	uint16_t scsi_xri_start;
 	uint16_t els_xri_cnt;
+	uint16_t nvmet_xri_cnt;
 	struct list_head lpfc_els_sgl_list;
 	struct list_head lpfc_abts_els_sgl_list;
+	struct list_head lpfc_nvmet_sgl_list;
+	struct list_head lpfc_abts_nvmet_sgl_list;
 	struct list_head lpfc_abts_scsi_buf_list;
 	struct list_head lpfc_abts_nvme_buf_list;
 	struct lpfc_sglq **lpfc_sglq_active_list;
@@ -643,6 +647,7 @@ struct lpfc_sli4_hba {
 	spinlock_t abts_nvme_buf_list_lock; /* list of aborted SCSI IOs */
 	spinlock_t abts_scsi_buf_list_lock; /* list of aborted SCSI IOs */
 	spinlock_t sgl_list_lock; /* list of aborted els IOs */
+	spinlock_t nvmet_io_lock;
 	uint32_t physical_port;
 
 	/* CPU to vector mapping information */
@@ -655,6 +660,7 @@ struct lpfc_sli4_hba {
 enum lpfc_sge_type {
 	GEN_BUFF_TYPE,
 	SCSI_BUFF_TYPE,
+	NVMET_BUFF_TYPE
 };
 
 enum lpfc_sgl_state {

From 2d7dbc4c2775eb30df97be00090adbfcc7fc5086 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sun, 12 Feb 2017 13:52:35 -0800
Subject: [PATCH 0156/1911] scsi: lpfc: NVME Target: Receive buffer updates

NVME Target: Receive buffer updates

Allocates buffer pools and configures adapter interfaces to handle
receive buffer (asynchronous FCP CMD ius, first burst data)
from the adapter. Splits by protocol, etc.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc.h         |   3 +
 drivers/scsi/lpfc/lpfc_attr.c    |  81 +++-
 drivers/scsi/lpfc/lpfc_crtn.h    |   1 +
 drivers/scsi/lpfc/lpfc_debugfs.c |  25 +-
 drivers/scsi/lpfc/lpfc_hw4.h     | 347 +++++++++++++++++-
 drivers/scsi/lpfc/lpfc_init.c    | 233 +++++++++++-
 drivers/scsi/lpfc/lpfc_mbox.c    |  87 +++++
 drivers/scsi/lpfc/lpfc_sli.c     | 608 ++++++++++++++++++++++++++++++-
 drivers/scsi/lpfc/lpfc_sli4.h    |  11 +
 9 files changed, 1374 insertions(+), 22 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index b7361474880e77..8a4090c6771c8f 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -770,8 +770,11 @@ struct lpfc_hba {
 	uint32_t cfg_suppress_rsp;
 	uint32_t cfg_nvme_oas;
 	uint32_t cfg_nvme_io_channel;
+	uint32_t cfg_nvmet_mrq;
+	uint32_t cfg_nvmet_mrq_post;
 	uint32_t cfg_enable_nvmet;
 	uint32_t cfg_nvme_enable_fb;
+	uint32_t cfg_nvmet_fb_size;
 	uint32_t cfg_total_seg_cnt;
 	uint32_t cfg_sg_seg_cnt;
 	uint32_t cfg_sg_dma_buf_size;
diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index 835c6c1f4cd4f2..700a68f303f377 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -58,6 +58,10 @@
 #define LPFC_MIN_DEVLOSS_TMO	1
 #define LPFC_MAX_DEVLOSS_TMO	255
 
+#define LPFC_DEF_MRQ_POST	256
+#define LPFC_MIN_MRQ_POST	32
+#define LPFC_MAX_MRQ_POST	512
+
 /*
  * Write key size should be multiple of 4. If write key is changed
  * make sure that library write key is also changed.
@@ -3281,6 +3285,24 @@ static DEVICE_ATTR(lpfc_devloss_tmo, S_IRUGO | S_IWUSR,
 LPFC_ATTR_R(suppress_rsp, 1, 0, 1,
 	    "Enable suppress rsp feature is firmware supports it");
 
+/*
+ * lpfc_nvmet_mrq: Specify number of RQ pairs for processing NVMET cmds
+ * lpfc_nvmet_mrq = 1  use a single RQ pair
+ * lpfc_nvmet_mrq >= 2  use specified RQ pairs for MRQ
+ *
+ */
+LPFC_ATTR_R(nvmet_mrq,
+	    1, 1, 16,
+	    "Specify number of RQ pairs for processing NVMET cmds");
+
+/*
+ * lpfc_nvmet_mrq_post: Specify number buffers to post on every MRQ
+ *
+ */
+LPFC_ATTR_R(nvmet_mrq_post, LPFC_DEF_MRQ_POST,
+	    LPFC_MIN_MRQ_POST, LPFC_MAX_MRQ_POST,
+	    "Specify number of buffers to post on every MRQ");
+
 /*
  * lpfc_enable_fc4_type: Defines what FC4 types are supported.
  * Supported Values:  1 - register just FCP
@@ -4657,13 +4679,28 @@ LPFC_VPORT_ATTR_RW(first_burst_size, 0, 0, 65536,
 		   "First burst size for Targets that support first burst");
 
 /*
-* lpfc_nvme_enable_fb: Enable NVME first burst on I and T functions.
-* For the Initiator (I), enabling this parameter means that an NVME
-* PRLI response with FBA enabled and an FB_SIZE set to a nonzero value
-* will be processed by the initiator for subsequent NVME FCP IO.
+* lpfc_nvmet_fb_size: NVME Target mode supported first burst size.
+* When the driver is configured as an NVME target, this value is
+* communicated to the NVME initiator in the PRLI response.  It is
+* used only when the lpfc_nvme_enable_fb and lpfc_nvmet_support
+* parameters are set and the target is sending the PRLI RSP.
 * Parameter supported on physical port only - no NPIV support.
-* Value range is [0,1]. Default value is 0 (disabled).
+* Value range is [0,65536]. Default value is 0.
 */
+LPFC_ATTR_RW(nvmet_fb_size, 0, 0, 65536,
+	     "NVME Target mode first burst size in 512B increments.");
+
+/*
+ * lpfc_nvme_enable_fb: Enable NVME first burst on I and T functions.
+ * For the Initiator (I), enabling this parameter means that an NVMET
+ * PRLI response with FBA enabled and an FB_SIZE set to a nonzero value will be
+ * processed by the initiator for subsequent NVME FCP IO. For the target
+ * function (T), enabling this parameter qualifies the lpfc_nvmet_fb_size
+ * driver parameter as the target function's first burst size returned to the
+ * initiator in the target's NVME PRLI response. Parameter supported on physical
+ * port only - no NPIV support.
+ * Value range is [0,1]. Default value is 0 (disabled).
+ */
 LPFC_ATTR_RW(nvme_enable_fb, 0, 0, 1,
 	     "Enable First Burst feature on I and T functions.");
 
@@ -5099,7 +5136,10 @@ struct device_attribute *lpfc_hba_attrs[] = {
 	&dev_attr_lpfc_fcp_io_channel,
 	&dev_attr_lpfc_suppress_rsp,
 	&dev_attr_lpfc_nvme_io_channel,
+	&dev_attr_lpfc_nvmet_mrq,
+	&dev_attr_lpfc_nvmet_mrq_post,
 	&dev_attr_lpfc_nvme_enable_fb,
+	&dev_attr_lpfc_nvmet_fb_size,
 	&dev_attr_lpfc_enable_bg,
 	&dev_attr_lpfc_soft_wwnn,
 	&dev_attr_lpfc_soft_wwpn,
@@ -6136,9 +6176,12 @@ lpfc_get_cfgparam(struct lpfc_hba *phba)
 	lpfc_suppress_rsp_init(phba, lpfc_suppress_rsp);
 
 	lpfc_enable_fc4_type_init(phba, lpfc_enable_fc4_type);
+	lpfc_nvmet_mrq_init(phba, lpfc_nvmet_mrq);
+	lpfc_nvmet_mrq_post_init(phba, lpfc_nvmet_mrq_post);
 
 	/* Initialize first burst. Target vs Initiator are different. */
 	lpfc_nvme_enable_fb_init(phba, lpfc_nvme_enable_fb);
+	lpfc_nvmet_fb_size_init(phba, lpfc_nvmet_fb_size);
 	lpfc_fcp_io_channel_init(phba, lpfc_fcp_io_channel);
 	lpfc_nvme_io_channel_init(phba, lpfc_nvme_io_channel);
 
@@ -6205,9 +6248,35 @@ lpfc_nvme_mod_param_dep(struct lpfc_hba *phba)
 	    phba->nvmet_support) {
 		phba->cfg_enable_fc4_type &= ~LPFC_ENABLE_FCP;
 		phba->cfg_fcp_io_channel = 0;
-	} else
+
+		lpfc_printf_log(phba, KERN_INFO, LOG_NVME_DISC,
+				"6013 %s x%x fb_size x%x, fb_max x%x\n",
+				"NVME Target PRLI ACC enable_fb ",
+				phba->cfg_nvme_enable_fb,
+				phba->cfg_nvmet_fb_size,
+				LPFC_NVMET_FB_SZ_MAX);
+
+		if (phba->cfg_nvme_enable_fb == 0)
+			phba->cfg_nvmet_fb_size = 0;
+		else {
+			if (phba->cfg_nvmet_fb_size > LPFC_NVMET_FB_SZ_MAX)
+				phba->cfg_nvmet_fb_size = LPFC_NVMET_FB_SZ_MAX;
+		}
+
+		/* Adjust lpfc_nvmet_mrq to avoid running out of WQE slots */
+		if (phba->cfg_nvmet_mrq > phba->cfg_nvme_io_channel) {
+			phba->cfg_nvmet_mrq = phba->cfg_nvme_io_channel;
+			lpfc_printf_log(phba, KERN_ERR, LOG_NVME_DISC,
+					"6018 Adjust lpfc_nvmet_mrq to %d\n",
+					phba->cfg_nvmet_mrq);
+		}
+	} else {
 		/* Not NVME Target mode.  Turn off Target parameters. */
 		phba->nvmet_support = 0;
+		phba->cfg_nvmet_mrq = 0;
+		phba->cfg_nvmet_mrq_post = 0;
+		phba->cfg_nvmet_fb_size = 0;
+	}
 
 	if (phba->cfg_fcp_io_channel > phba->cfg_nvme_io_channel)
 		phba->io_channel_irqs = phba->cfg_fcp_io_channel;
diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h
index 1e97b1a106c560..2e7a7f7f43b287 100644
--- a/drivers/scsi/lpfc/lpfc_crtn.h
+++ b/drivers/scsi/lpfc/lpfc_crtn.h
@@ -229,6 +229,7 @@ void lpfc_reg_vfi(struct lpfcMboxq *, struct lpfc_vport *, dma_addr_t);
 void lpfc_init_vpi(struct lpfc_hba *, struct lpfcMboxq *, uint16_t);
 void lpfc_unreg_vfi(struct lpfcMboxq *, struct lpfc_vport *);
 void lpfc_reg_fcfi(struct lpfc_hba *, struct lpfcMboxq *);
+void lpfc_reg_fcfi_mrq(struct lpfc_hba *phba, struct lpfcMboxq *mbox, int mode);
 void lpfc_unreg_fcfi(struct lpfcMboxq *, uint16_t);
 void lpfc_resume_rpi(struct lpfcMboxq *, struct lpfc_nodelist *);
 int lpfc_check_pending_fcoe_event(struct lpfc_hba *, uint8_t);
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index e54307dff8be3b..abc39f605960e9 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -2837,7 +2837,7 @@ __lpfc_idiag_print_rqpair(struct lpfc_queue *qp, struct lpfc_queue *datqp,
 
 static int
 lpfc_idiag_cqs_for_eq(struct lpfc_hba *phba, char *pbuffer,
-		int *len, int max_cnt, int eq_id)
+		int *len, int max_cnt, int eqidx, int eq_id)
 {
 	struct lpfc_queue *qp;
 	int qidx, rc;
@@ -2880,6 +2880,27 @@ lpfc_idiag_cqs_for_eq(struct lpfc_hba *phba, char *pbuffer,
 			return 1;
 	}
 
+	if (phba->cfg_nvmet_mrq > eqidx) {
+		/* NVMET CQset */
+		qp = phba->sli4_hba.nvmet_cqset[eqidx];
+		*len = __lpfc_idiag_print_cq(qp, "NVMET CQset", pbuffer, *len);
+
+		/* Reset max counter */
+		qp->CQ_max_cqe = 0;
+
+		if (*len >= max_cnt)
+			return 1;
+
+		/* RQ header */
+		qp = phba->sli4_hba.nvmet_mrq_hdr[eqidx];
+		*len = __lpfc_idiag_print_rqpair(qp,
+				phba->sli4_hba.nvmet_mrq_data[eqidx],
+				"NVMET MRQ", pbuffer, *len);
+
+		if (*len >= max_cnt)
+			return 1;
+	}
+
 	return 0;
 }
 
@@ -2977,7 +2998,7 @@ lpfc_idiag_queinfo_read(struct file *file, char __user *buf, size_t nbytes,
 
 		/* will dump both fcp and nvme cqs/wqs for the eq */
 		rc = lpfc_idiag_cqs_for_eq(phba, pbuffer, &len,
-			max_cnt, qp->queue_id);
+			max_cnt, x, qp->queue_id);
 		if (rc)
 			goto too_big;
 
diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h
index 9bee888b7dc4cb..0fddb23178752f 100644
--- a/drivers/scsi/lpfc/lpfc_hw4.h
+++ b/drivers/scsi/lpfc/lpfc_hw4.h
@@ -962,6 +962,7 @@ struct mbox_header {
 #define LPFC_MBOX_OPCODE_FCOE_DELETE_FCF		0x0A
 #define LPFC_MBOX_OPCODE_FCOE_POST_HDR_TEMPLATE		0x0B
 #define LPFC_MBOX_OPCODE_FCOE_REDISCOVER_FCF		0x10
+#define LPFC_MBOX_OPCODE_FCOE_CQ_CREATE_SET		0x1D
 #define LPFC_MBOX_OPCODE_FCOE_SET_FCLINK_SETTINGS	0x21
 #define LPFC_MBOX_OPCODE_FCOE_LINK_DIAG_STATE		0x22
 #define LPFC_MBOX_OPCODE_FCOE_LINK_DIAG_LOOPBACK	0x23
@@ -1143,6 +1144,116 @@ struct lpfc_mbx_cq_create {
 	} u;
 };
 
+struct lpfc_mbx_cq_create_set {
+	union  lpfc_sli4_cfg_shdr cfg_shdr;
+	union {
+		struct {
+			uint32_t word0;
+#define lpfc_mbx_cq_create_set_page_size_SHIFT	16	/* Version 2 Only */
+#define lpfc_mbx_cq_create_set_page_size_MASK	0x000000FF
+#define lpfc_mbx_cq_create_set_page_size_WORD	word0
+#define lpfc_mbx_cq_create_set_num_pages_SHIFT	0
+#define lpfc_mbx_cq_create_set_num_pages_MASK	0x0000FFFF
+#define lpfc_mbx_cq_create_set_num_pages_WORD	word0
+			uint32_t word1;
+#define lpfc_mbx_cq_create_set_evt_SHIFT	31
+#define lpfc_mbx_cq_create_set_evt_MASK		0x00000001
+#define lpfc_mbx_cq_create_set_evt_WORD		word1
+#define lpfc_mbx_cq_create_set_valid_SHIFT	29
+#define lpfc_mbx_cq_create_set_valid_MASK	0x00000001
+#define lpfc_mbx_cq_create_set_valid_WORD	word1
+#define lpfc_mbx_cq_create_set_cqe_cnt_SHIFT	27
+#define lpfc_mbx_cq_create_set_cqe_cnt_MASK	0x00000003
+#define lpfc_mbx_cq_create_set_cqe_cnt_WORD	word1
+#define lpfc_mbx_cq_create_set_cqe_size_SHIFT	25
+#define lpfc_mbx_cq_create_set_cqe_size_MASK	0x00000003
+#define lpfc_mbx_cq_create_set_cqe_size_WORD	word1
+#define lpfc_mbx_cq_create_set_auto_SHIFT	15
+#define lpfc_mbx_cq_create_set_auto_MASK	0x0000001
+#define lpfc_mbx_cq_create_set_auto_WORD	word1
+#define lpfc_mbx_cq_create_set_nodelay_SHIFT	14
+#define lpfc_mbx_cq_create_set_nodelay_MASK	0x00000001
+#define lpfc_mbx_cq_create_set_nodelay_WORD	word1
+#define lpfc_mbx_cq_create_set_clswm_SHIFT	12
+#define lpfc_mbx_cq_create_set_clswm_MASK	0x00000003
+#define lpfc_mbx_cq_create_set_clswm_WORD	word1
+			uint32_t word2;
+#define lpfc_mbx_cq_create_set_arm_SHIFT	31
+#define lpfc_mbx_cq_create_set_arm_MASK		0x00000001
+#define lpfc_mbx_cq_create_set_arm_WORD		word2
+#define lpfc_mbx_cq_create_set_num_cq_SHIFT	0
+#define lpfc_mbx_cq_create_set_num_cq_MASK	0x0000FFFF
+#define lpfc_mbx_cq_create_set_num_cq_WORD	word2
+			uint32_t word3;
+#define lpfc_mbx_cq_create_set_eq_id1_SHIFT	16
+#define lpfc_mbx_cq_create_set_eq_id1_MASK	0x0000FFFF
+#define lpfc_mbx_cq_create_set_eq_id1_WORD	word3
+#define lpfc_mbx_cq_create_set_eq_id0_SHIFT	0
+#define lpfc_mbx_cq_create_set_eq_id0_MASK	0x0000FFFF
+#define lpfc_mbx_cq_create_set_eq_id0_WORD	word3
+			uint32_t word4;
+#define lpfc_mbx_cq_create_set_eq_id3_SHIFT	16
+#define lpfc_mbx_cq_create_set_eq_id3_MASK	0x0000FFFF
+#define lpfc_mbx_cq_create_set_eq_id3_WORD	word4
+#define lpfc_mbx_cq_create_set_eq_id2_SHIFT	0
+#define lpfc_mbx_cq_create_set_eq_id2_MASK	0x0000FFFF
+#define lpfc_mbx_cq_create_set_eq_id2_WORD	word4
+			uint32_t word5;
+#define lpfc_mbx_cq_create_set_eq_id5_SHIFT	16
+#define lpfc_mbx_cq_create_set_eq_id5_MASK	0x0000FFFF
+#define lpfc_mbx_cq_create_set_eq_id5_WORD	word5
+#define lpfc_mbx_cq_create_set_eq_id4_SHIFT	0
+#define lpfc_mbx_cq_create_set_eq_id4_MASK	0x0000FFFF
+#define lpfc_mbx_cq_create_set_eq_id4_WORD	word5
+			uint32_t word6;
+#define lpfc_mbx_cq_create_set_eq_id7_SHIFT	16
+#define lpfc_mbx_cq_create_set_eq_id7_MASK	0x0000FFFF
+#define lpfc_mbx_cq_create_set_eq_id7_WORD	word6
+#define lpfc_mbx_cq_create_set_eq_id6_SHIFT	0
+#define lpfc_mbx_cq_create_set_eq_id6_MASK	0x0000FFFF
+#define lpfc_mbx_cq_create_set_eq_id6_WORD	word6
+			uint32_t word7;
+#define lpfc_mbx_cq_create_set_eq_id9_SHIFT	16
+#define lpfc_mbx_cq_create_set_eq_id9_MASK	0x0000FFFF
+#define lpfc_mbx_cq_create_set_eq_id9_WORD	word7
+#define lpfc_mbx_cq_create_set_eq_id8_SHIFT	0
+#define lpfc_mbx_cq_create_set_eq_id8_MASK	0x0000FFFF
+#define lpfc_mbx_cq_create_set_eq_id8_WORD	word7
+			uint32_t word8;
+#define lpfc_mbx_cq_create_set_eq_id11_SHIFT	16
+#define lpfc_mbx_cq_create_set_eq_id11_MASK	0x0000FFFF
+#define lpfc_mbx_cq_create_set_eq_id11_WORD	word8
+#define lpfc_mbx_cq_create_set_eq_id10_SHIFT	0
+#define lpfc_mbx_cq_create_set_eq_id10_MASK	0x0000FFFF
+#define lpfc_mbx_cq_create_set_eq_id10_WORD	word8
+			uint32_t word9;
+#define lpfc_mbx_cq_create_set_eq_id13_SHIFT	16
+#define lpfc_mbx_cq_create_set_eq_id13_MASK	0x0000FFFF
+#define lpfc_mbx_cq_create_set_eq_id13_WORD	word9
+#define lpfc_mbx_cq_create_set_eq_id12_SHIFT	0
+#define lpfc_mbx_cq_create_set_eq_id12_MASK	0x0000FFFF
+#define lpfc_mbx_cq_create_set_eq_id12_WORD	word9
+			uint32_t word10;
+#define lpfc_mbx_cq_create_set_eq_id15_SHIFT	16
+#define lpfc_mbx_cq_create_set_eq_id15_MASK	0x0000FFFF
+#define lpfc_mbx_cq_create_set_eq_id15_WORD	word10
+#define lpfc_mbx_cq_create_set_eq_id14_SHIFT	0
+#define lpfc_mbx_cq_create_set_eq_id14_MASK	0x0000FFFF
+#define lpfc_mbx_cq_create_set_eq_id14_WORD	word10
+			struct dma_address page[1];
+		} request;
+		struct {
+			uint32_t word0;
+#define lpfc_mbx_cq_create_set_num_alloc_SHIFT	16
+#define lpfc_mbx_cq_create_set_num_alloc_MASK	0x0000FFFF
+#define lpfc_mbx_cq_create_set_num_alloc_WORD	word0
+#define lpfc_mbx_cq_create_set_base_id_SHIFT	0
+#define lpfc_mbx_cq_create_set_base_id_MASK	0x0000FFFF
+#define lpfc_mbx_cq_create_set_base_id_WORD	word0
+		} response;
+	} u;
+};
+
 struct lpfc_mbx_cq_destroy {
 	struct mbox_header header;
 	union {
@@ -1252,10 +1363,10 @@ struct rq_context {
 #define LPFC_RQ_RING_SIZE_1024		10	/* 1024 entries */
 #define LPFC_RQ_RING_SIZE_2048		11	/* 2048 entries */
 #define LPFC_RQ_RING_SIZE_4096		12	/* 4096 entries */
-#define lpfc_rq_context_rqe_count_1_SHIFT	16	/* Version 1 Only */
+#define lpfc_rq_context_rqe_count_1_SHIFT	16	/* Version 1-2 Only */
 #define lpfc_rq_context_rqe_count_1_MASK	0x0000FFFF
 #define lpfc_rq_context_rqe_count_1_WORD	word0
-#define lpfc_rq_context_rqe_size_SHIFT	8		/* Version 1 Only */
+#define lpfc_rq_context_rqe_size_SHIFT	8		/* Version 1-2 Only */
 #define lpfc_rq_context_rqe_size_MASK	0x0000000F
 #define lpfc_rq_context_rqe_size_WORD	word0
 #define LPFC_RQE_SIZE_8		2
@@ -1267,7 +1378,13 @@ struct rq_context {
 #define lpfc_rq_context_page_size_MASK	0x000000FF
 #define lpfc_rq_context_page_size_WORD	word0
 #define	LPFC_RQ_PAGE_SIZE_4096	0x1
-	uint32_t reserved1;
+	uint32_t word1;
+#define lpfc_rq_context_data_size_SHIFT	16		/* Version 2 Only */
+#define lpfc_rq_context_data_size_MASK	0x0000FFFF
+#define lpfc_rq_context_data_size_WORD	word1
+#define lpfc_rq_context_hdr_size_SHIFT	0		/* Version 2 Only */
+#define lpfc_rq_context_hdr_size_MASK	0x0000FFFF
+#define lpfc_rq_context_hdr_size_WORD	word1
 	uint32_t word2;
 #define lpfc_rq_context_cq_id_SHIFT	16
 #define lpfc_rq_context_cq_id_MASK	0x000003FF
@@ -1275,6 +1392,9 @@ struct rq_context {
 #define lpfc_rq_context_buf_size_SHIFT	0
 #define lpfc_rq_context_buf_size_MASK	0x0000FFFF
 #define lpfc_rq_context_buf_size_WORD	word2
+#define lpfc_rq_context_base_cq_SHIFT	0		/* Version 2 Only */
+#define lpfc_rq_context_base_cq_MASK	0x0000FFFF
+#define lpfc_rq_context_base_cq_WORD	word2
 	uint32_t buffer_size;				/* Version 1 Only */
 };
 
@@ -1296,10 +1416,65 @@ struct lpfc_mbx_rq_create {
 #define lpfc_mbx_rq_create_ulp_num_MASK		0x000000FF
 #define lpfc_mbx_rq_create_ulp_num_WORD		word0
 			struct rq_context context;
-			struct dma_address page[LPFC_MAX_WQ_PAGE];
+			struct dma_address page[LPFC_MAX_RQ_PAGE];
 		} request;
 		struct {
 			uint32_t word0;
+#define lpfc_mbx_rq_create_q_cnt_v2_SHIFT	16
+#define lpfc_mbx_rq_create_q_cnt_v2_MASK	0x0000FFFF
+#define lpfc_mbx_rq_create_q_cnt_v2_WORD	word0
+#define lpfc_mbx_rq_create_q_id_SHIFT		0
+#define lpfc_mbx_rq_create_q_id_MASK		0x0000FFFF
+#define lpfc_mbx_rq_create_q_id_WORD		word0
+			uint32_t doorbell_offset;
+			uint32_t word2;
+#define lpfc_mbx_rq_create_bar_set_SHIFT	0
+#define lpfc_mbx_rq_create_bar_set_MASK		0x0000FFFF
+#define lpfc_mbx_rq_create_bar_set_WORD		word2
+#define lpfc_mbx_rq_create_db_format_SHIFT	16
+#define lpfc_mbx_rq_create_db_format_MASK	0x0000FFFF
+#define lpfc_mbx_rq_create_db_format_WORD	word2
+		} response;
+	} u;
+};
+
+struct lpfc_mbx_rq_create_v2 {
+	union  lpfc_sli4_cfg_shdr cfg_shdr;
+	union {
+		struct {
+			uint32_t word0;
+#define lpfc_mbx_rq_create_num_pages_SHIFT	0
+#define lpfc_mbx_rq_create_num_pages_MASK	0x0000FFFF
+#define lpfc_mbx_rq_create_num_pages_WORD	word0
+#define lpfc_mbx_rq_create_rq_cnt_SHIFT		16
+#define lpfc_mbx_rq_create_rq_cnt_MASK		0x000000FF
+#define lpfc_mbx_rq_create_rq_cnt_WORD		word0
+#define lpfc_mbx_rq_create_dua_SHIFT		16
+#define lpfc_mbx_rq_create_dua_MASK		0x00000001
+#define lpfc_mbx_rq_create_dua_WORD		word0
+#define lpfc_mbx_rq_create_bqu_SHIFT		17
+#define lpfc_mbx_rq_create_bqu_MASK		0x00000001
+#define lpfc_mbx_rq_create_bqu_WORD		word0
+#define lpfc_mbx_rq_create_ulp_num_SHIFT	24
+#define lpfc_mbx_rq_create_ulp_num_MASK		0x000000FF
+#define lpfc_mbx_rq_create_ulp_num_WORD		word0
+#define lpfc_mbx_rq_create_dim_SHIFT		29
+#define lpfc_mbx_rq_create_dim_MASK		0x00000001
+#define lpfc_mbx_rq_create_dim_WORD		word0
+#define lpfc_mbx_rq_create_dfd_SHIFT		30
+#define lpfc_mbx_rq_create_dfd_MASK		0x00000001
+#define lpfc_mbx_rq_create_dfd_WORD		word0
+#define lpfc_mbx_rq_create_dnb_SHIFT		31
+#define lpfc_mbx_rq_create_dnb_MASK		0x00000001
+#define lpfc_mbx_rq_create_dnb_WORD		word0
+			struct rq_context context;
+			struct dma_address page[1];
+		} request;
+		struct {
+			uint32_t word0;
+#define lpfc_mbx_rq_create_q_cnt_v2_SHIFT	16
+#define lpfc_mbx_rq_create_q_cnt_v2_MASK	0x0000FFFF
+#define lpfc_mbx_rq_create_q_cnt_v2_WORD	word0
 #define lpfc_mbx_rq_create_q_id_SHIFT		0
 #define lpfc_mbx_rq_create_q_id_MASK		0x0000FFFF
 #define lpfc_mbx_rq_create_q_id_WORD		word0
@@ -2213,6 +2388,160 @@ struct lpfc_mbx_reg_fcfi {
 #define lpfc_reg_fcfi_vlan_tag_WORD	word8
 };
 
+struct lpfc_mbx_reg_fcfi_mrq {
+	uint32_t word1;
+#define lpfc_reg_fcfi_mrq_info_index_SHIFT	0
+#define lpfc_reg_fcfi_mrq_info_index_MASK	0x0000FFFF
+#define lpfc_reg_fcfi_mrq_info_index_WORD	word1
+#define lpfc_reg_fcfi_mrq_fcfi_SHIFT		16
+#define lpfc_reg_fcfi_mrq_fcfi_MASK		0x0000FFFF
+#define lpfc_reg_fcfi_mrq_fcfi_WORD		word1
+	uint32_t word2;
+#define lpfc_reg_fcfi_mrq_rq_id1_SHIFT		0
+#define lpfc_reg_fcfi_mrq_rq_id1_MASK		0x0000FFFF
+#define lpfc_reg_fcfi_mrq_rq_id1_WORD		word2
+#define lpfc_reg_fcfi_mrq_rq_id0_SHIFT		16
+#define lpfc_reg_fcfi_mrq_rq_id0_MASK		0x0000FFFF
+#define lpfc_reg_fcfi_mrq_rq_id0_WORD		word2
+	uint32_t word3;
+#define lpfc_reg_fcfi_mrq_rq_id3_SHIFT		0
+#define lpfc_reg_fcfi_mrq_rq_id3_MASK		0x0000FFFF
+#define lpfc_reg_fcfi_mrq_rq_id3_WORD		word3
+#define lpfc_reg_fcfi_mrq_rq_id2_SHIFT		16
+#define lpfc_reg_fcfi_mrq_rq_id2_MASK		0x0000FFFF
+#define lpfc_reg_fcfi_mrq_rq_id2_WORD		word3
+	uint32_t word4;
+#define lpfc_reg_fcfi_mrq_type_match0_SHIFT	24
+#define lpfc_reg_fcfi_mrq_type_match0_MASK	0x000000FF
+#define lpfc_reg_fcfi_mrq_type_match0_WORD	word4
+#define lpfc_reg_fcfi_mrq_type_mask0_SHIFT	16
+#define lpfc_reg_fcfi_mrq_type_mask0_MASK	0x000000FF
+#define lpfc_reg_fcfi_mrq_type_mask0_WORD	word4
+#define lpfc_reg_fcfi_mrq_rctl_match0_SHIFT	8
+#define lpfc_reg_fcfi_mrq_rctl_match0_MASK	0x000000FF
+#define lpfc_reg_fcfi_mrq_rctl_match0_WORD	word4
+#define lpfc_reg_fcfi_mrq_rctl_mask0_SHIFT	0
+#define lpfc_reg_fcfi_mrq_rctl_mask0_MASK	0x000000FF
+#define lpfc_reg_fcfi_mrq_rctl_mask0_WORD	word4
+	uint32_t word5;
+#define lpfc_reg_fcfi_mrq_type_match1_SHIFT	24
+#define lpfc_reg_fcfi_mrq_type_match1_MASK	0x000000FF
+#define lpfc_reg_fcfi_mrq_type_match1_WORD	word5
+#define lpfc_reg_fcfi_mrq_type_mask1_SHIFT	16
+#define lpfc_reg_fcfi_mrq_type_mask1_MASK	0x000000FF
+#define lpfc_reg_fcfi_mrq_type_mask1_WORD	word5
+#define lpfc_reg_fcfi_mrq_rctl_match1_SHIFT	8
+#define lpfc_reg_fcfi_mrq_rctl_match1_MASK	0x000000FF
+#define lpfc_reg_fcfi_mrq_rctl_match1_WORD	word5
+#define lpfc_reg_fcfi_mrq_rctl_mask1_SHIFT	0
+#define lpfc_reg_fcfi_mrq_rctl_mask1_MASK	0x000000FF
+#define lpfc_reg_fcfi_mrq_rctl_mask1_WORD	word5
+	uint32_t word6;
+#define lpfc_reg_fcfi_mrq_type_match2_SHIFT	24
+#define lpfc_reg_fcfi_mrq_type_match2_MASK	0x000000FF
+#define lpfc_reg_fcfi_mrq_type_match2_WORD	word6
+#define lpfc_reg_fcfi_mrq_type_mask2_SHIFT	16
+#define lpfc_reg_fcfi_mrq_type_mask2_MASK	0x000000FF
+#define lpfc_reg_fcfi_mrq_type_mask2_WORD	word6
+#define lpfc_reg_fcfi_mrq_rctl_match2_SHIFT	8
+#define lpfc_reg_fcfi_mrq_rctl_match2_MASK	0x000000FF
+#define lpfc_reg_fcfi_mrq_rctl_match2_WORD	word6
+#define lpfc_reg_fcfi_mrq_rctl_mask2_SHIFT	0
+#define lpfc_reg_fcfi_mrq_rctl_mask2_MASK	0x000000FF
+#define lpfc_reg_fcfi_mrq_rctl_mask2_WORD	word6
+	uint32_t word7;
+#define lpfc_reg_fcfi_mrq_type_match3_SHIFT	24
+#define lpfc_reg_fcfi_mrq_type_match3_MASK	0x000000FF
+#define lpfc_reg_fcfi_mrq_type_match3_WORD	word7
+#define lpfc_reg_fcfi_mrq_type_mask3_SHIFT	16
+#define lpfc_reg_fcfi_mrq_type_mask3_MASK	0x000000FF
+#define lpfc_reg_fcfi_mrq_type_mask3_WORD	word7
+#define lpfc_reg_fcfi_mrq_rctl_match3_SHIFT	8
+#define lpfc_reg_fcfi_mrq_rctl_match3_MASK	0x000000FF
+#define lpfc_reg_fcfi_mrq_rctl_match3_WORD	word7
+#define lpfc_reg_fcfi_mrq_rctl_mask3_SHIFT	0
+#define lpfc_reg_fcfi_mrq_rctl_mask3_MASK	0x000000FF
+#define lpfc_reg_fcfi_mrq_rctl_mask3_WORD	word7
+	uint32_t word8;
+#define lpfc_reg_fcfi_mrq_ptc7_SHIFT		31
+#define lpfc_reg_fcfi_mrq_ptc7_MASK		0x00000001
+#define lpfc_reg_fcfi_mrq_ptc7_WORD		word8
+#define lpfc_reg_fcfi_mrq_ptc6_SHIFT		30
+#define lpfc_reg_fcfi_mrq_ptc6_MASK		0x00000001
+#define lpfc_reg_fcfi_mrq_ptc6_WORD		word8
+#define lpfc_reg_fcfi_mrq_ptc5_SHIFT		29
+#define lpfc_reg_fcfi_mrq_ptc5_MASK		0x00000001
+#define lpfc_reg_fcfi_mrq_ptc5_WORD		word8
+#define lpfc_reg_fcfi_mrq_ptc4_SHIFT		28
+#define lpfc_reg_fcfi_mrq_ptc4_MASK		0x00000001
+#define lpfc_reg_fcfi_mrq_ptc4_WORD		word8
+#define lpfc_reg_fcfi_mrq_ptc3_SHIFT		27
+#define lpfc_reg_fcfi_mrq_ptc3_MASK		0x00000001
+#define lpfc_reg_fcfi_mrq_ptc3_WORD		word8
+#define lpfc_reg_fcfi_mrq_ptc2_SHIFT		26
+#define lpfc_reg_fcfi_mrq_ptc2_MASK		0x00000001
+#define lpfc_reg_fcfi_mrq_ptc2_WORD		word8
+#define lpfc_reg_fcfi_mrq_ptc1_SHIFT		25
+#define lpfc_reg_fcfi_mrq_ptc1_MASK		0x00000001
+#define lpfc_reg_fcfi_mrq_ptc1_WORD		word8
+#define lpfc_reg_fcfi_mrq_ptc0_SHIFT		24
+#define lpfc_reg_fcfi_mrq_ptc0_MASK		0x00000001
+#define lpfc_reg_fcfi_mrq_ptc0_WORD		word8
+#define lpfc_reg_fcfi_mrq_pt7_SHIFT		23
+#define lpfc_reg_fcfi_mrq_pt7_MASK		0x00000001
+#define lpfc_reg_fcfi_mrq_pt7_WORD		word8
+#define lpfc_reg_fcfi_mrq_pt6_SHIFT		22
+#define lpfc_reg_fcfi_mrq_pt6_MASK		0x00000001
+#define lpfc_reg_fcfi_mrq_pt6_WORD		word8
+#define lpfc_reg_fcfi_mrq_pt5_SHIFT		21
+#define lpfc_reg_fcfi_mrq_pt5_MASK		0x00000001
+#define lpfc_reg_fcfi_mrq_pt5_WORD		word8
+#define lpfc_reg_fcfi_mrq_pt4_SHIFT		20
+#define lpfc_reg_fcfi_mrq_pt4_MASK		0x00000001
+#define lpfc_reg_fcfi_mrq_pt4_WORD		word8
+#define lpfc_reg_fcfi_mrq_pt3_SHIFT		19
+#define lpfc_reg_fcfi_mrq_pt3_MASK		0x00000001
+#define lpfc_reg_fcfi_mrq_pt3_WORD		word8
+#define lpfc_reg_fcfi_mrq_pt2_SHIFT		18
+#define lpfc_reg_fcfi_mrq_pt2_MASK		0x00000001
+#define lpfc_reg_fcfi_mrq_pt2_WORD		word8
+#define lpfc_reg_fcfi_mrq_pt1_SHIFT		17
+#define lpfc_reg_fcfi_mrq_pt1_MASK		0x00000001
+#define lpfc_reg_fcfi_mrq_pt1_WORD		word8
+#define lpfc_reg_fcfi_mrq_pt0_SHIFT		16
+#define lpfc_reg_fcfi_mrq_pt0_MASK		0x00000001
+#define lpfc_reg_fcfi_mrq_pt0_WORD		word8
+#define lpfc_reg_fcfi_mrq_xmv_SHIFT		15
+#define lpfc_reg_fcfi_mrq_xmv_MASK		0x00000001
+#define lpfc_reg_fcfi_mrq_xmv_WORD		word8
+#define lpfc_reg_fcfi_mrq_mode_SHIFT		13
+#define lpfc_reg_fcfi_mrq_mode_MASK		0x00000001
+#define lpfc_reg_fcfi_mrq_mode_WORD		word8
+#define lpfc_reg_fcfi_mrq_vv_SHIFT		12
+#define lpfc_reg_fcfi_mrq_vv_MASK		0x00000001
+#define lpfc_reg_fcfi_mrq_vv_WORD		word8
+#define lpfc_reg_fcfi_mrq_vlan_tag_SHIFT	0
+#define lpfc_reg_fcfi_mrq_vlan_tag_MASK		0x00000FFF
+#define lpfc_reg_fcfi_mrq_vlan_tag_WORD		word8
+	uint32_t word9;
+#define lpfc_reg_fcfi_mrq_policy_SHIFT		12
+#define lpfc_reg_fcfi_mrq_policy_MASK		0x0000000F
+#define lpfc_reg_fcfi_mrq_policy_WORD		word9
+#define lpfc_reg_fcfi_mrq_filter_SHIFT		8
+#define lpfc_reg_fcfi_mrq_filter_MASK		0x0000000F
+#define lpfc_reg_fcfi_mrq_filter_WORD		word9
+#define lpfc_reg_fcfi_mrq_npairs_SHIFT		0
+#define lpfc_reg_fcfi_mrq_npairs_MASK		0x000000FF
+#define lpfc_reg_fcfi_mrq_npairs_WORD		word9
+	uint32_t word10;
+	uint32_t word11;
+	uint32_t word12;
+	uint32_t word13;
+	uint32_t word14;
+	uint32_t word15;
+	uint32_t word16;
+};
+
 struct lpfc_mbx_unreg_fcfi {
 	uint32_t word1_rsv;
 	uint32_t word2;
@@ -2392,6 +2721,9 @@ struct lpfc_mbx_request_features {
 #define lpfc_mbx_rq_ftr_rq_perfh_SHIFT		11
 #define lpfc_mbx_rq_ftr_rq_perfh_MASK		0x00000001
 #define lpfc_mbx_rq_ftr_rq_perfh_WORD		word2
+#define lpfc_mbx_rq_ftr_rq_mrqp_SHIFT		16
+#define lpfc_mbx_rq_ftr_rq_mrqp_MASK		0x00000001
+#define lpfc_mbx_rq_ftr_rq_mrqp_WORD		word2
 	uint32_t word3;
 #define lpfc_mbx_rq_ftr_rsp_iaab_SHIFT		0
 #define lpfc_mbx_rq_ftr_rsp_iaab_MASK		0x00000001
@@ -2420,6 +2752,9 @@ struct lpfc_mbx_request_features {
 #define lpfc_mbx_rq_ftr_rsp_perfh_SHIFT		11
 #define lpfc_mbx_rq_ftr_rsp_perfh_MASK		0x00000001
 #define lpfc_mbx_rq_ftr_rsp_perfh_WORD		word3
+#define lpfc_mbx_rq_ftr_rsp_mrqp_SHIFT		16
+#define lpfc_mbx_rq_ftr_rsp_mrqp_MASK		0x00000001
+#define lpfc_mbx_rq_ftr_rsp_mrqp_WORD		word3
 };
 
 struct lpfc_mbx_supp_pages {
@@ -3312,14 +3647,17 @@ struct lpfc_mqe {
 		struct lpfc_mbx_del_fcf_tbl_entry del_fcf_entry;
 		struct lpfc_mbx_redisc_fcf_tbl redisc_fcf_tbl;
 		struct lpfc_mbx_reg_fcfi reg_fcfi;
+		struct lpfc_mbx_reg_fcfi_mrq reg_fcfi_mrq;
 		struct lpfc_mbx_unreg_fcfi unreg_fcfi;
 		struct lpfc_mbx_mq_create mq_create;
 		struct lpfc_mbx_mq_create_ext mq_create_ext;
 		struct lpfc_mbx_eq_create eq_create;
 		struct lpfc_mbx_modify_eq_delay eq_delay;
 		struct lpfc_mbx_cq_create cq_create;
+		struct lpfc_mbx_cq_create_set cq_create_set;
 		struct lpfc_mbx_wq_create wq_create;
 		struct lpfc_mbx_rq_create rq_create;
+		struct lpfc_mbx_rq_create_v2 rq_create_v2;
 		struct lpfc_mbx_mq_destroy mq_destroy;
 		struct lpfc_mbx_eq_destroy eq_destroy;
 		struct lpfc_mbx_cq_destroy cq_destroy;
@@ -3972,6 +4310,7 @@ struct lpfc_nvme_prli {
 #define prli_fb_sz_SHIFT                0
 #define prli_fb_sz_MASK                 0x0000ffff
 #define prli_fb_sz_WORD                 word5
+#define LPFC_NVMET_FB_SZ_MAX  65536   /* Driver target mode only. */
 };
 
 struct create_xri_wqe {
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 03fa3f02e33e58..e6cbc0bac029ef 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -3354,8 +3354,15 @@ lpfc_sli4_nvmet_sgl_update(struct lpfc_hba *phba)
 	 * update on pci function's nvmet xri-sgl list
 	 */
 	els_xri_cnt = lpfc_sli4_get_els_iocb_cnt(phba);
-	nvmet_xri_cnt = 0;
+	nvmet_xri_cnt = phba->cfg_nvmet_mrq * phba->cfg_nvmet_mrq_post;
 	tot_cnt = phba->sli4_hba.max_cfg_param.max_xri - els_xri_cnt;
+	if (nvmet_xri_cnt > tot_cnt) {
+		phba->cfg_nvmet_mrq_post = tot_cnt / phba->cfg_nvmet_mrq;
+		nvmet_xri_cnt = phba->cfg_nvmet_mrq * phba->cfg_nvmet_mrq_post;
+		lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
+				"6301 NVMET post-sgl count changed to %d\n",
+				phba->cfg_nvmet_mrq_post);
+	}
 
 	if (nvmet_xri_cnt > phba->sli4_hba.nvmet_xri_cnt) {
 		/* els xri-sgl expanded */
@@ -7674,11 +7681,13 @@ lpfc_sli4_queue_verify(struct lpfc_hba *phba)
 		phba->cfg_fcp_io_channel = io_channel;
 	if (phba->cfg_nvme_io_channel > io_channel)
 		phba->cfg_nvme_io_channel = io_channel;
+	if (phba->cfg_nvme_io_channel < phba->cfg_nvmet_mrq)
+		phba->cfg_nvmet_mrq = phba->cfg_nvme_io_channel;
 
 	lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-			"2574 IO channels: irqs %d fcp %d nvme %d\n",
+			"2574 IO channels: irqs %d fcp %d nvme %d MRQ: %d\n",
 			phba->io_channel_irqs, phba->cfg_fcp_io_channel,
-			phba->cfg_nvme_io_channel);
+			phba->cfg_nvme_io_channel, phba->cfg_nvmet_mrq);
 
 	/* Get EQ depth from module parameter, fake the default for now */
 	phba->sli4_hba.eq_esize = LPFC_EQE_SIZE_4B;
@@ -7768,7 +7777,7 @@ int
 lpfc_sli4_queue_create(struct lpfc_hba *phba)
 {
 	struct lpfc_queue *qdesc;
-	int idx, io_channel;
+	int idx, io_channel, max;
 
 	/*
 	 * Create HBA Record arrays.
@@ -7845,7 +7854,6 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 			goto out_error;
 		}
 
-
 		phba->sli4_hba.nvme_wq = kcalloc(phba->cfg_nvme_io_channel,
 						sizeof(struct lpfc_queue *),
 						GFP_KERNEL);
@@ -7870,6 +7878,39 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 					"fast-path CQ map\n");
 			goto out_error;
 		}
+
+		if (phba->nvmet_support) {
+			phba->sli4_hba.nvmet_cqset = kcalloc(
+					phba->cfg_nvmet_mrq,
+					sizeof(struct lpfc_queue *),
+					GFP_KERNEL);
+			if (!phba->sli4_hba.nvmet_cqset) {
+				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"3121 Fail allocate memory for "
+					"fast-path CQ set array\n");
+				goto out_error;
+			}
+			phba->sli4_hba.nvmet_mrq_hdr = kcalloc(
+					phba->cfg_nvmet_mrq,
+					sizeof(struct lpfc_queue *),
+					GFP_KERNEL);
+			if (!phba->sli4_hba.nvmet_mrq_hdr) {
+				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"3122 Fail allocate memory for "
+					"fast-path RQ set hdr array\n");
+				goto out_error;
+			}
+			phba->sli4_hba.nvmet_mrq_data = kcalloc(
+					phba->cfg_nvmet_mrq,
+					sizeof(struct lpfc_queue *),
+					GFP_KERNEL);
+			if (!phba->sli4_hba.nvmet_mrq_data) {
+				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"3124 Fail allocate memory for "
+					"fast-path RQ set data array\n");
+				goto out_error;
+			}
+		}
 	}
 
 	INIT_LIST_HEAD(&phba->sli4_hba.lpfc_wq_list);
@@ -7897,6 +7938,30 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 		if (lpfc_alloc_nvme_wq_cq(phba, idx))
 			goto out_error;
 
+	/* allocate MRQ CQs */
+	max = phba->cfg_nvme_io_channel;
+	if (max < phba->cfg_nvmet_mrq)
+		max = phba->cfg_nvmet_mrq;
+
+	for (idx = 0; idx < max; idx++)
+		if (lpfc_alloc_nvme_wq_cq(phba, idx))
+			goto out_error;
+
+	if (phba->nvmet_support) {
+		for (idx = 0; idx < phba->cfg_nvmet_mrq; idx++) {
+			qdesc = lpfc_sli4_queue_alloc(phba,
+					phba->sli4_hba.cq_esize,
+					phba->sli4_hba.cq_ecount);
+			if (!qdesc) {
+				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"3142 Failed allocate NVME "
+					"CQ Set (%d)\n", idx);
+				goto out_error;
+			}
+			phba->sli4_hba.nvmet_cqset[idx] = qdesc;
+		}
+	}
+
 	/*
 	 * Create Slow Path Completion Queues (CQs)
 	 */
@@ -7999,6 +8064,44 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 	}
 	phba->sli4_hba.dat_rq = qdesc;
 
+	if (phba->nvmet_support) {
+		for (idx = 0; idx < phba->cfg_nvmet_mrq; idx++) {
+			/* Create NVMET Receive Queue for header */
+			qdesc = lpfc_sli4_queue_alloc(phba,
+						      phba->sli4_hba.rq_esize,
+						      phba->sli4_hba.rq_ecount);
+			if (!qdesc) {
+				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+						"3146 Failed allocate "
+						"receive HRQ\n");
+				goto out_error;
+			}
+			phba->sli4_hba.nvmet_mrq_hdr[idx] = qdesc;
+
+			/* Only needed for header of RQ pair */
+			qdesc->rqbp = kzalloc(sizeof(struct lpfc_rqb),
+					      GFP_KERNEL);
+			if (qdesc->rqbp == NULL) {
+				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+						"6131 Failed allocate "
+						"Header RQBP\n");
+				goto out_error;
+			}
+
+			/* Create NVMET Receive Queue for data */
+			qdesc = lpfc_sli4_queue_alloc(phba,
+						      phba->sli4_hba.rq_esize,
+						      phba->sli4_hba.rq_ecount);
+			if (!qdesc) {
+				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+						"3156 Failed allocate "
+						"receive DRQ\n");
+				goto out_error;
+			}
+			phba->sli4_hba.nvmet_mrq_data[idx] = qdesc;
+		}
+	}
+
 	/* Create the Queues needed for Flash Optimized Fabric operations */
 	if (phba->cfg_fof)
 		lpfc_fof_queue_create(phba);
@@ -8085,6 +8188,14 @@ lpfc_sli4_queue_destroy(struct lpfc_hba *phba)
 	/* Release NVME CQ mapping array */
 	lpfc_sli4_release_queue_map(&phba->sli4_hba.nvme_cq_map);
 
+	lpfc_sli4_release_queues(&phba->sli4_hba.nvmet_cqset,
+					phba->cfg_nvmet_mrq);
+
+	lpfc_sli4_release_queues(&phba->sli4_hba.nvmet_mrq_hdr,
+					phba->cfg_nvmet_mrq);
+	lpfc_sli4_release_queues(&phba->sli4_hba.nvmet_mrq_data,
+					phba->cfg_nvmet_mrq);
+
 	/* Release mailbox command work queue */
 	__lpfc_sli4_release_queue(&phba->sli4_hba.mbx_wq);
 
@@ -8422,6 +8533,44 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba)
 			(uint32_t)rc);
 		goto out_destroy;
 	}
+	if (phba->nvmet_support) {
+		if (!phba->sli4_hba.nvmet_cqset) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"3165 Fast-path NVME CQ Set "
+					"array not allocated\n");
+			rc = -ENOMEM;
+			goto out_destroy;
+		}
+		if (phba->cfg_nvmet_mrq > 1) {
+			rc = lpfc_cq_create_set(phba,
+					phba->sli4_hba.nvmet_cqset,
+					phba->sli4_hba.hba_eq,
+					LPFC_WCQ, LPFC_NVMET);
+			if (rc) {
+				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+						"3164 Failed setup of NVME CQ "
+						"Set, rc = 0x%x\n",
+						(uint32_t)rc);
+				goto out_destroy;
+			}
+		} else {
+			/* Set up NVMET Receive Complete Queue */
+			rc = lpfc_cq_create(phba, phba->sli4_hba.nvmet_cqset[0],
+					    phba->sli4_hba.hba_eq[0],
+					    LPFC_WCQ, LPFC_NVMET);
+			if (rc) {
+				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+						"6089 Failed setup NVMET CQ: "
+						"rc = 0x%x\n", (uint32_t)rc);
+				goto out_destroy;
+			}
+			lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+					"6090 NVMET CQ setup: cq-id=%d, "
+					"parent eq-id=%d\n",
+					phba->sli4_hba.nvmet_cqset[0]->queue_id,
+					phba->sli4_hba.hba_eq[0]->queue_id);
+		}
+	}
 
 	/* Set up slow-path ELS WQ/CQ */
 	if (!phba->sli4_hba.els_cq || !phba->sli4_hba.els_wq) {
@@ -8473,6 +8622,58 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba)
 				phba->sli4_hba.nvmels_cq->queue_id);
 	}
 
+	/*
+	 * Create NVMET Receive Queue (RQ)
+	 */
+	if (phba->nvmet_support) {
+		if ((!phba->sli4_hba.nvmet_cqset) ||
+		    (!phba->sli4_hba.nvmet_mrq_hdr) ||
+		    (!phba->sli4_hba.nvmet_mrq_data)) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"6130 MRQ CQ Queues not "
+					"allocated\n");
+			rc = -ENOMEM;
+			goto out_destroy;
+		}
+		if (phba->cfg_nvmet_mrq > 1) {
+			rc = lpfc_mrq_create(phba,
+					     phba->sli4_hba.nvmet_mrq_hdr,
+					     phba->sli4_hba.nvmet_mrq_data,
+					     phba->sli4_hba.nvmet_cqset,
+					     LPFC_NVMET);
+			if (rc) {
+				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+						"6098 Failed setup of NVMET "
+						"MRQ: rc = 0x%x\n",
+						(uint32_t)rc);
+				goto out_destroy;
+			}
+
+		} else {
+			rc = lpfc_rq_create(phba,
+					    phba->sli4_hba.nvmet_mrq_hdr[0],
+					    phba->sli4_hba.nvmet_mrq_data[0],
+					    phba->sli4_hba.nvmet_cqset[0],
+					    LPFC_NVMET);
+			if (rc) {
+				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+						"6057 Failed setup of NVMET "
+						"Receive Queue: rc = 0x%x\n",
+						(uint32_t)rc);
+				goto out_destroy;
+			}
+
+			lpfc_printf_log(
+				phba, KERN_INFO, LOG_INIT,
+				"6099 NVMET RQ setup: hdr-rq-id=%d, "
+				"dat-rq-id=%d parent cq-id=%d\n",
+				phba->sli4_hba.nvmet_mrq_hdr[0]->queue_id,
+				phba->sli4_hba.nvmet_mrq_data[0]->queue_id,
+				phba->sli4_hba.nvmet_cqset[0]->queue_id);
+
+		}
+	}
+
 	if (!phba->sli4_hba.hdr_rq || !phba->sli4_hba.dat_rq) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 				"0540 Receive Queue not allocated\n");
@@ -8589,6 +8790,21 @@ lpfc_sli4_queue_unset(struct lpfc_hba *phba)
 		for (qidx = 0; qidx < phba->cfg_nvme_io_channel; qidx++)
 			lpfc_cq_destroy(phba, phba->sli4_hba.nvme_cq[qidx]);
 
+	/* Unset NVMET MRQ queue */
+	if (phba->sli4_hba.nvmet_mrq_hdr) {
+		for (qidx = 0; qidx < phba->cfg_nvmet_mrq; qidx++)
+			lpfc_rq_destroy(phba,
+					phba->sli4_hba.nvmet_mrq_hdr[qidx],
+					phba->sli4_hba.nvmet_mrq_data[qidx]);
+	}
+
+	/* Unset NVMET CQ Set complete queue */
+	if (phba->sli4_hba.nvmet_cqset) {
+		for (qidx = 0; qidx < phba->cfg_nvmet_mrq; qidx++)
+			lpfc_cq_destroy(phba,
+					phba->sli4_hba.nvmet_cqset[qidx]);
+	}
+
 	/* Unset FCP response complete queue */
 	if (phba->sli4_hba.fcp_cq)
 		for (qidx = 0; qidx < phba->cfg_fcp_io_channel; qidx++)
@@ -9935,6 +10151,7 @@ lpfc_get_sli4_parameters(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq)
 	    !phba->nvme_support) {
 		phba->nvme_support = 0;
 		phba->nvmet_support = 0;
+		phba->cfg_nvmet_mrq = 0;
 		phba->cfg_nvme_io_channel = 0;
 		phba->io_channel_irqs = phba->cfg_fcp_io_channel;
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT | LOG_NVME,
@@ -10875,12 +11092,14 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid)
 	if (phba->intr_type != MSIX) {
 		if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP)
 			phba->cfg_fcp_io_channel = 1;
-		if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)
+		if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
 			phba->cfg_nvme_io_channel = 1;
+			if (phba->nvmet_support)
+				phba->cfg_nvmet_mrq = 1;
+		}
 		phba->io_channel_irqs = 1;
 	}
 
-
 	/* Set up SLI-4 HBA */
 	if (lpfc_sli4_hba_setup(phba)) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
diff --git a/drivers/scsi/lpfc/lpfc_mbox.c b/drivers/scsi/lpfc/lpfc_mbox.c
index c20dc25a1e79a6..8f4bfdfd9910eb 100644
--- a/drivers/scsi/lpfc/lpfc_mbox.c
+++ b/drivers/scsi/lpfc/lpfc_mbox.c
@@ -2081,6 +2081,9 @@ lpfc_request_features(struct lpfc_hba *phba, struct lpfcMboxq *mboxq)
 	if (phba->max_vpi && phba->cfg_enable_npiv)
 		bf_set(lpfc_mbx_rq_ftr_rq_npiv, &mboxq->u.mqe.un.req_ftrs, 1);
 
+	if (phba->nvmet_support)
+		bf_set(lpfc_mbx_rq_ftr_rq_mrqp, &mboxq->u.mqe.un.req_ftrs, 1);
+
 	return;
 }
 
@@ -2448,6 +2451,26 @@ lpfc_reg_fcfi(struct lpfc_hba *phba, struct lpfcMboxq *mbox)
 		/* addr mode is bit wise inverted value of fcf addr_mode */
 		bf_set(lpfc_reg_fcfi_mam, reg_fcfi,
 		       (~phba->fcf.addr_mode) & 0x3);
+	} else {
+		/* This is ONLY for NVMET MRQ == 1 */
+		if (phba->cfg_nvmet_mrq != 1)
+			return;
+
+		bf_set(lpfc_reg_fcfi_rq_id0, reg_fcfi,
+		       phba->sli4_hba.nvmet_mrq_hdr[0]->queue_id);
+		/* Match type FCP - rq_id0 */
+		bf_set(lpfc_reg_fcfi_type_match0, reg_fcfi, FC_TYPE_FCP);
+		bf_set(lpfc_reg_fcfi_type_mask0, reg_fcfi, 0xff);
+		bf_set(lpfc_reg_fcfi_rctl_match0, reg_fcfi,
+		       FC_RCTL_DD_UNSOL_CMD);
+
+		bf_set(lpfc_reg_fcfi_rq_id1, reg_fcfi,
+		       phba->sli4_hba.hdr_rq->queue_id);
+		/* Match everything else - rq_id1 */
+		bf_set(lpfc_reg_fcfi_type_match1, reg_fcfi, 0);
+		bf_set(lpfc_reg_fcfi_type_mask1, reg_fcfi, 0);
+		bf_set(lpfc_reg_fcfi_rctl_match1, reg_fcfi, 0);
+		bf_set(lpfc_reg_fcfi_rctl_mask1, reg_fcfi, 0);
 	}
 	bf_set(lpfc_reg_fcfi_rq_id2, reg_fcfi, REG_FCF_INVALID_QID);
 	bf_set(lpfc_reg_fcfi_rq_id3, reg_fcfi, REG_FCF_INVALID_QID);
@@ -2460,6 +2483,70 @@ lpfc_reg_fcfi(struct lpfc_hba *phba, struct lpfcMboxq *mbox)
 	}
 }
 
+/**
+ * lpfc_reg_fcfi_mrq - Initialize the REG_FCFI_MRQ mailbox command
+ * @phba: pointer to the hba structure containing the FCF index and RQ ID.
+ * @mbox: pointer to lpfc mbox command to initialize.
+ * @mode: 0 to register FCFI, 1 to register MRQs
+ *
+ * The REG_FCFI_MRQ mailbox command supports Fibre Channel Forwarders (FCFs).
+ * The SLI Host uses the command to activate an FCF after it has acquired FCF
+ * information via a READ_FCF mailbox command. This mailbox command also is used
+ * to indicate where received unsolicited frames from this FCF will be sent. By
+ * default this routine will set up the FCF to forward all unsolicited frames
+ * the the RQ ID passed in the @phba. This can be overridden by the caller for
+ * more complicated setups.
+ **/
+void
+lpfc_reg_fcfi_mrq(struct lpfc_hba *phba, struct lpfcMboxq *mbox, int mode)
+{
+	struct lpfc_mbx_reg_fcfi_mrq *reg_fcfi;
+
+	/* This is ONLY for MRQ */
+	if (phba->cfg_nvmet_mrq <= 1)
+		return;
+
+	memset(mbox, 0, sizeof(*mbox));
+	reg_fcfi = &mbox->u.mqe.un.reg_fcfi_mrq;
+	bf_set(lpfc_mqe_command, &mbox->u.mqe, MBX_REG_FCFI_MRQ);
+	if (mode == 0) {
+		bf_set(lpfc_reg_fcfi_mrq_info_index, reg_fcfi,
+		       phba->fcf.current_rec.fcf_indx);
+		if (phba->fcf.current_rec.vlan_id != LPFC_FCOE_NULL_VID) {
+			bf_set(lpfc_reg_fcfi_mrq_vv, reg_fcfi, 1);
+			bf_set(lpfc_reg_fcfi_mrq_vlan_tag, reg_fcfi,
+			       phba->fcf.current_rec.vlan_id);
+		}
+		return;
+	}
+
+	bf_set(lpfc_reg_fcfi_mrq_rq_id0, reg_fcfi,
+	       phba->sli4_hba.nvmet_mrq_hdr[0]->queue_id);
+	/* Match NVME frames of type FCP (protocol NVME) - rq_id0 */
+	bf_set(lpfc_reg_fcfi_mrq_type_match0, reg_fcfi, FC_TYPE_FCP);
+	bf_set(lpfc_reg_fcfi_mrq_type_mask0, reg_fcfi, 0xff);
+	bf_set(lpfc_reg_fcfi_mrq_rctl_match0, reg_fcfi, FC_RCTL_DD_UNSOL_CMD);
+	bf_set(lpfc_reg_fcfi_mrq_rctl_mask0, reg_fcfi, 0xff);
+	bf_set(lpfc_reg_fcfi_mrq_ptc0, reg_fcfi, 1);
+	bf_set(lpfc_reg_fcfi_mrq_pt0, reg_fcfi, 1);
+
+	bf_set(lpfc_reg_fcfi_mrq_policy, reg_fcfi, 3); /* NVME connection id */
+	bf_set(lpfc_reg_fcfi_mrq_mode, reg_fcfi, 1);
+	bf_set(lpfc_reg_fcfi_mrq_filter, reg_fcfi, 1); /* rq_id0 */
+	bf_set(lpfc_reg_fcfi_mrq_npairs, reg_fcfi, phba->cfg_nvmet_mrq);
+
+	bf_set(lpfc_reg_fcfi_mrq_rq_id1, reg_fcfi,
+	       phba->sli4_hba.hdr_rq->queue_id);
+	/* Match everything - rq_id1 */
+	bf_set(lpfc_reg_fcfi_mrq_type_match1, reg_fcfi, 0);
+	bf_set(lpfc_reg_fcfi_mrq_type_mask1, reg_fcfi, 0);
+	bf_set(lpfc_reg_fcfi_mrq_rctl_match1, reg_fcfi, 0);
+	bf_set(lpfc_reg_fcfi_mrq_rctl_mask1, reg_fcfi, 0);
+
+	bf_set(lpfc_reg_fcfi_mrq_rq_id2, reg_fcfi, REG_FCF_INVALID_QID);
+	bf_set(lpfc_reg_fcfi_mrq_rq_id3, reg_fcfi, REG_FCF_INVALID_QID);
+}
+
 /**
  * lpfc_unreg_fcfi - Initialize the UNREG_FCFI mailbox command
  * @mbox: pointer to lpfc mbox command to initialize.
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index f2cd0f37e03f7f..72164acb6656ae 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -2027,6 +2027,29 @@ lpfc_sli_hbqbuf_get(struct list_head *rb_list)
 	return container_of(d_buf, struct hbq_dmabuf, dbuf);
 }
 
+/**
+ * lpfc_sli_rqbuf_get - Remove the first dma buffer off of an RQ list
+ * @phba: Pointer to HBA context object.
+ * @hbqno: HBQ number.
+ *
+ * This function removes the first RQ buffer on an RQ buffer list and returns a
+ * pointer to that buffer. If it finds no buffers on the list it returns NULL.
+ **/
+static struct rqb_dmabuf *
+lpfc_sli_rqbuf_get(struct lpfc_hba *phba, struct lpfc_queue *hrq)
+{
+	struct lpfc_dmabuf *h_buf;
+	struct lpfc_rqb *rqbp;
+
+	rqbp = hrq->rqbp;
+	list_remove_head(&rqbp->rqb_buffer_list, h_buf,
+			 struct lpfc_dmabuf, list);
+	if (!h_buf)
+		return NULL;
+	rqbp->buffer_count--;
+	return container_of(h_buf, struct rqb_dmabuf, hbuf);
+}
+
 /**
  * lpfc_sli_hbqbuf_find - Find the hbq buffer associated with a tag
  * @phba: Pointer to HBA context object.
@@ -5271,6 +5294,14 @@ lpfc_sli4_arm_cqeq_intr(struct lpfc_hba *phba)
 			lpfc_sli4_eq_release(phba->sli4_hba.hba_eq[qidx],
 						LPFC_QUEUE_REARM);
 
+	if (phba->nvmet_support) {
+		for (qidx = 0; qidx < phba->cfg_nvmet_mrq; qidx++) {
+			lpfc_sli4_cq_release(
+				phba->sli4_hba.nvmet_cqset[qidx],
+				LPFC_QUEUE_REARM);
+		}
+	}
+
 	if (phba->cfg_fof)
 		lpfc_sli4_eq_release(phba->sli4_hba.fof_eq, LPFC_QUEUE_REARM);
 }
@@ -6485,7 +6516,7 @@ lpfc_set_host_data(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbox)
 int
 lpfc_sli4_hba_setup(struct lpfc_hba *phba)
 {
-	int rc;
+	int rc, i;
 	LPFC_MBOXQ_t *mboxq;
 	struct lpfc_mqe *mqe;
 	uint8_t *vpd;
@@ -6494,6 +6525,7 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
 	struct Scsi_Host *shost = lpfc_shost_from_vport(phba->pport);
 	struct lpfc_vport *vport = phba->pport;
 	struct lpfc_dmabuf *mp;
+	struct lpfc_rqb *rqbp;
 
 	/* Perform a PCI function reset to start from clean */
 	rc = lpfc_pci_function_reset(phba);
@@ -6856,6 +6888,29 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
 		}
 	}
 
+	if (phba->nvmet_support && phba->cfg_nvmet_mrq) {
+
+		/* Post initial buffers to all RQs created */
+		for (i = 0; i < phba->cfg_nvmet_mrq; i++) {
+			rqbp = phba->sli4_hba.nvmet_mrq_hdr[i]->rqbp;
+			INIT_LIST_HEAD(&rqbp->rqb_buffer_list);
+			rqbp->rqb_alloc_buffer = lpfc_sli4_nvmet_alloc;
+			rqbp->rqb_free_buffer = lpfc_sli4_nvmet_free;
+			rqbp->entry_count = 256;
+			rqbp->buffer_count = 0;
+
+			/* Divide by 4 and round down to multiple of 16 */
+			rc = (phba->cfg_nvmet_mrq_post >> 2) & 0xfff8;
+			phba->sli4_hba.nvmet_mrq_hdr[i]->entry_repost = rc;
+			phba->sli4_hba.nvmet_mrq_data[i]->entry_repost = rc;
+
+			lpfc_post_rq_buffer(
+				phba, phba->sli4_hba.nvmet_mrq_hdr[i],
+				phba->sli4_hba.nvmet_mrq_data[i],
+				phba->cfg_nvmet_mrq_post);
+		}
+	}
+
 	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP) {
 		/* register the allocated scsi sgl pool to the port */
 		rc = lpfc_sli4_repost_scsi_sgl_list(phba);
@@ -6898,7 +6953,7 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
 	lpfc_sli4_node_prep(phba);
 
 	if (!(phba->hba_flag & HBA_FCOE_MODE)) {
-		if (phba->nvmet_support == 0) {
+		if ((phba->nvmet_support == 0) || (phba->cfg_nvmet_mrq == 1)) {
 			/*
 			 * The FC Port needs to register FCFI (index 0)
 			 */
@@ -6910,6 +6965,26 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
 			rc = 0;
 			phba->fcf.fcfi = bf_get(lpfc_reg_fcfi_fcfi,
 						&mboxq->u.mqe.un.reg_fcfi);
+		} else {
+			/* We are a NVME Target mode with MRQ > 1 */
+
+			/* First register the FCFI */
+			lpfc_reg_fcfi_mrq(phba, mboxq, 0);
+			mboxq->vport = phba->pport;
+			rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL);
+			if (rc != MBX_SUCCESS)
+				goto out_unset_queue;
+			rc = 0;
+			phba->fcf.fcfi = bf_get(lpfc_reg_fcfi_mrq_fcfi,
+						&mboxq->u.mqe.un.reg_fcfi_mrq);
+
+			/* Next register the MRQs */
+			lpfc_reg_fcfi_mrq(phba, mboxq, 1);
+			mboxq->vport = phba->pport;
+			rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL);
+			if (rc != MBX_SUCCESS)
+				goto out_unset_queue;
+			rc = 0;
 		}
 		/* Check if the port is configured to be disabled */
 		lpfc_sli_read_link_ste(phba);
@@ -12987,6 +13062,101 @@ lpfc_sli4_fp_handle_rel_wcqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
 				"miss-matched qid: wcqe-qid=x%x\n", hba_wqid);
 }
 
+/**
+ * lpfc_sli4_nvmet_handle_rcqe - Process a receive-queue completion queue entry
+ * @phba: Pointer to HBA context object.
+ * @rcqe: Pointer to receive-queue completion queue entry.
+ *
+ * This routine process a receive-queue completion queue entry.
+ *
+ * Return: true if work posted to worker thread, otherwise false.
+ **/
+static bool
+lpfc_sli4_nvmet_handle_rcqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
+			    struct lpfc_rcqe *rcqe)
+{
+	bool workposted = false;
+	struct lpfc_queue *hrq;
+	struct lpfc_queue *drq;
+	struct rqb_dmabuf *dma_buf;
+	struct fc_frame_header *fc_hdr;
+	uint32_t status, rq_id;
+	unsigned long iflags;
+	uint32_t fctl, idx;
+
+	if ((phba->nvmet_support == 0) ||
+	    (phba->sli4_hba.nvmet_cqset == NULL))
+		return workposted;
+
+	idx = cq->queue_id - phba->sli4_hba.nvmet_cqset[0]->queue_id;
+	hrq = phba->sli4_hba.nvmet_mrq_hdr[idx];
+	drq = phba->sli4_hba.nvmet_mrq_data[idx];
+
+	/* sanity check on queue memory */
+	if (unlikely(!hrq) || unlikely(!drq))
+		return workposted;
+
+	if (bf_get(lpfc_cqe_code, rcqe) == CQE_CODE_RECEIVE_V1)
+		rq_id = bf_get(lpfc_rcqe_rq_id_v1, rcqe);
+	else
+		rq_id = bf_get(lpfc_rcqe_rq_id, rcqe);
+
+	if ((phba->nvmet_support == 0) ||
+	    (rq_id != hrq->queue_id))
+		return workposted;
+
+	status = bf_get(lpfc_rcqe_status, rcqe);
+	switch (status) {
+	case FC_STATUS_RQ_BUF_LEN_EXCEEDED:
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"6126 Receive Frame Truncated!!\n");
+		hrq->RQ_buf_trunc++;
+		break;
+	case FC_STATUS_RQ_SUCCESS:
+		lpfc_sli4_rq_release(hrq, drq);
+		spin_lock_irqsave(&phba->hbalock, iflags);
+		dma_buf = lpfc_sli_rqbuf_get(phba, hrq);
+		if (!dma_buf) {
+			hrq->RQ_no_buf_found++;
+			spin_unlock_irqrestore(&phba->hbalock, iflags);
+			goto out;
+		}
+		spin_unlock_irqrestore(&phba->hbalock, iflags);
+		hrq->RQ_rcv_buf++;
+		fc_hdr = (struct fc_frame_header *)dma_buf->hbuf.virt;
+
+		/* Just some basic sanity checks on FCP Command frame */
+		fctl = (fc_hdr->fh_f_ctl[0] << 16 |
+		fc_hdr->fh_f_ctl[1] << 8 |
+		fc_hdr->fh_f_ctl[2]);
+		if (((fctl &
+		    (FC_FC_FIRST_SEQ | FC_FC_END_SEQ | FC_FC_SEQ_INIT)) !=
+		    (FC_FC_FIRST_SEQ | FC_FC_END_SEQ | FC_FC_SEQ_INIT)) ||
+		    (fc_hdr->fh_seq_cnt != 0)) /* 0 byte swapped is still 0 */
+			goto drop;
+
+		if (fc_hdr->fh_type == FC_TYPE_FCP) {
+			dma_buf->bytes_recv = bf_get(lpfc_rcqe_length,  rcqe);
+			/* todo: tgt: forward cmd iu to transport */
+			return false;
+		}
+drop:
+		lpfc_in_buf_free(phba, &dma_buf->dbuf);
+		break;
+	case FC_STATUS_INSUFF_BUF_NEED_BUF:
+	case FC_STATUS_INSUFF_BUF_FRM_DISC:
+		hrq->RQ_no_posted_buf++;
+		/* Post more buffers if possible */
+		spin_lock_irqsave(&phba->hbalock, iflags);
+		phba->hba_flag |= HBA_POST_RECEIVE_BUFFER;
+		spin_unlock_irqrestore(&phba->hbalock, iflags);
+		workposted = true;
+		break;
+	}
+out:
+	return workposted;
+}
+
 /**
  * lpfc_sli4_fp_handle_cqe - Process fast-path work queue completion entry
  * @cq: Pointer to the completion queue.
@@ -13035,6 +13205,10 @@ lpfc_sli4_fp_handle_cqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
 	case CQE_CODE_RECEIVE_V1:
 	case CQE_CODE_RECEIVE:
 		phba->last_completion_time = jiffies;
+		if (cq->subtype == LPFC_NVMET) {
+			workposted = lpfc_sli4_nvmet_handle_rcqe(
+				phba, cq, (struct lpfc_rcqe *)&wcqe);
+		}
 		break;
 	default:
 		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
@@ -13064,7 +13238,7 @@ lpfc_sli4_hba_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe,
 	struct lpfc_queue *cq = NULL;
 	struct lpfc_cqe *cqe;
 	bool workposted = false;
-	uint16_t cqid;
+	uint16_t cqid, id;
 	int ecount = 0;
 
 	if (unlikely(bf_get_le32(lpfc_eqe_major_code, eqe) != 0)) {
@@ -13079,6 +13253,15 @@ lpfc_sli4_hba_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe,
 	/* Get the reference to the corresponding CQ */
 	cqid = bf_get_le32(lpfc_eqe_resource_id, eqe);
 
+	if (phba->cfg_nvmet_mrq && phba->sli4_hba.nvmet_cqset) {
+		id = phba->sli4_hba.nvmet_cqset[0]->queue_id;
+		if ((cqid >= id) && (cqid < (id + phba->cfg_nvmet_mrq))) {
+			/* Process NVMET unsol rcv */
+			cq = phba->sli4_hba.nvmet_cqset[cqid - id];
+			goto  process_cq;
+		}
+	}
+
 	if (phba->sli4_hba.nvme_cq_map &&
 	    (cqid == phba->sli4_hba.nvme_cq_map[qidx])) {
 		/* Process NVME / NVMET command completion */
@@ -13962,6 +14145,234 @@ lpfc_cq_create(struct lpfc_hba *phba, struct lpfc_queue *cq,
 	return status;
 }
 
+/**
+ * lpfc_cq_create_set - Create a set of Completion Queues on the HBA for MRQ
+ * @phba: HBA structure that indicates port to create a queue on.
+ * @cqp: The queue structure array to use to create the completion queues.
+ * @eqp: The event queue array to bind these completion queues to.
+ *
+ * This function creates a set of  completion queue, s to support MRQ
+ * as detailed in @cqp, on a port,
+ * described by @phba by sending a CREATE_CQ_SET mailbox command to the HBA.
+ *
+ * The @phba struct is used to send mailbox command to HBA. The @cq struct
+ * is used to get the entry count and entry size that are necessary to
+ * determine the number of pages to allocate and use for this queue. The @eq
+ * is used to indicate which event queue to bind this completion queue to. This
+ * function will send the CREATE_CQ_SET mailbox command to the HBA to setup the
+ * completion queue. This function is asynchronous and will wait for the mailbox
+ * command to finish before continuing.
+ *
+ * On success this function will return a zero. If unable to allocate enough
+ * memory this function will return -ENOMEM. If the queue create mailbox command
+ * fails this function will return -ENXIO.
+ **/
+int
+lpfc_cq_create_set(struct lpfc_hba *phba, struct lpfc_queue **cqp,
+		   struct lpfc_queue **eqp, uint32_t type, uint32_t subtype)
+{
+	struct lpfc_queue *cq;
+	struct lpfc_queue *eq;
+	struct lpfc_mbx_cq_create_set *cq_set;
+	struct lpfc_dmabuf *dmabuf;
+	LPFC_MBOXQ_t *mbox;
+	int rc, length, alloclen, status = 0;
+	int cnt, idx, numcq, page_idx = 0;
+	uint32_t shdr_status, shdr_add_status;
+	union lpfc_sli4_cfg_shdr *shdr;
+	uint32_t hw_page_size = phba->sli4_hba.pc_sli4_params.if_page_sz;
+
+	/* sanity check on queue memory */
+	numcq = phba->cfg_nvmet_mrq;
+	if (!cqp || !eqp || !numcq)
+		return -ENODEV;
+	if (!phba->sli4_hba.pc_sli4_params.supported)
+		hw_page_size = SLI4_PAGE_SIZE;
+
+	mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mbox)
+		return -ENOMEM;
+
+	length = sizeof(struct lpfc_mbx_cq_create_set);
+	length += ((numcq * cqp[0]->page_count) *
+		   sizeof(struct dma_address));
+	alloclen = lpfc_sli4_config(phba, mbox, LPFC_MBOX_SUBSYSTEM_FCOE,
+			LPFC_MBOX_OPCODE_FCOE_CQ_CREATE_SET, length,
+			LPFC_SLI4_MBX_NEMBED);
+	if (alloclen < length) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"3098 Allocated DMA memory size (%d) is "
+				"less than the requested DMA memory size "
+				"(%d)\n", alloclen, length);
+		status = -ENOMEM;
+		goto out;
+	}
+	cq_set = mbox->sge_array->addr[0];
+	shdr = (union lpfc_sli4_cfg_shdr *)&cq_set->cfg_shdr;
+	bf_set(lpfc_mbox_hdr_version, &shdr->request, 0);
+
+	for (idx = 0; idx < numcq; idx++) {
+		cq = cqp[idx];
+		eq = eqp[idx];
+		if (!cq || !eq) {
+			status = -ENOMEM;
+			goto out;
+		}
+
+		switch (idx) {
+		case 0:
+			bf_set(lpfc_mbx_cq_create_set_page_size,
+			       &cq_set->u.request,
+			       (hw_page_size / SLI4_PAGE_SIZE));
+			bf_set(lpfc_mbx_cq_create_set_num_pages,
+			       &cq_set->u.request, cq->page_count);
+			bf_set(lpfc_mbx_cq_create_set_evt,
+			       &cq_set->u.request, 1);
+			bf_set(lpfc_mbx_cq_create_set_valid,
+			       &cq_set->u.request, 1);
+			bf_set(lpfc_mbx_cq_create_set_cqe_size,
+			       &cq_set->u.request, 0);
+			bf_set(lpfc_mbx_cq_create_set_num_cq,
+			       &cq_set->u.request, numcq);
+			switch (cq->entry_count) {
+			default:
+				lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+						"3118 Bad CQ count. (%d)\n",
+						cq->entry_count);
+				if (cq->entry_count < 256) {
+					status = -EINVAL;
+					goto out;
+				}
+				/* otherwise default to smallest (drop thru) */
+			case 256:
+				bf_set(lpfc_mbx_cq_create_set_cqe_cnt,
+				       &cq_set->u.request, LPFC_CQ_CNT_256);
+				break;
+			case 512:
+				bf_set(lpfc_mbx_cq_create_set_cqe_cnt,
+				       &cq_set->u.request, LPFC_CQ_CNT_512);
+				break;
+			case 1024:
+				bf_set(lpfc_mbx_cq_create_set_cqe_cnt,
+				       &cq_set->u.request, LPFC_CQ_CNT_1024);
+				break;
+			}
+			bf_set(lpfc_mbx_cq_create_set_eq_id0,
+			       &cq_set->u.request, eq->queue_id);
+			break;
+		case 1:
+			bf_set(lpfc_mbx_cq_create_set_eq_id1,
+			       &cq_set->u.request, eq->queue_id);
+			break;
+		case 2:
+			bf_set(lpfc_mbx_cq_create_set_eq_id2,
+			       &cq_set->u.request, eq->queue_id);
+			break;
+		case 3:
+			bf_set(lpfc_mbx_cq_create_set_eq_id3,
+			       &cq_set->u.request, eq->queue_id);
+			break;
+		case 4:
+			bf_set(lpfc_mbx_cq_create_set_eq_id4,
+			       &cq_set->u.request, eq->queue_id);
+			break;
+		case 5:
+			bf_set(lpfc_mbx_cq_create_set_eq_id5,
+			       &cq_set->u.request, eq->queue_id);
+			break;
+		case 6:
+			bf_set(lpfc_mbx_cq_create_set_eq_id6,
+			       &cq_set->u.request, eq->queue_id);
+			break;
+		case 7:
+			bf_set(lpfc_mbx_cq_create_set_eq_id7,
+			       &cq_set->u.request, eq->queue_id);
+			break;
+		case 8:
+			bf_set(lpfc_mbx_cq_create_set_eq_id8,
+			       &cq_set->u.request, eq->queue_id);
+			break;
+		case 9:
+			bf_set(lpfc_mbx_cq_create_set_eq_id9,
+			       &cq_set->u.request, eq->queue_id);
+			break;
+		case 10:
+			bf_set(lpfc_mbx_cq_create_set_eq_id10,
+			       &cq_set->u.request, eq->queue_id);
+			break;
+		case 11:
+			bf_set(lpfc_mbx_cq_create_set_eq_id11,
+			       &cq_set->u.request, eq->queue_id);
+			break;
+		case 12:
+			bf_set(lpfc_mbx_cq_create_set_eq_id12,
+			       &cq_set->u.request, eq->queue_id);
+			break;
+		case 13:
+			bf_set(lpfc_mbx_cq_create_set_eq_id13,
+			       &cq_set->u.request, eq->queue_id);
+			break;
+		case 14:
+			bf_set(lpfc_mbx_cq_create_set_eq_id14,
+			       &cq_set->u.request, eq->queue_id);
+			break;
+		case 15:
+			bf_set(lpfc_mbx_cq_create_set_eq_id15,
+			       &cq_set->u.request, eq->queue_id);
+			break;
+		}
+
+		/* link the cq onto the parent eq child list */
+		list_add_tail(&cq->list, &eq->child_list);
+		/* Set up completion queue's type and subtype */
+		cq->type = type;
+		cq->subtype = subtype;
+		cq->assoc_qid = eq->queue_id;
+		cq->host_index = 0;
+		cq->hba_index = 0;
+
+		rc = 0;
+		list_for_each_entry(dmabuf, &cq->page_list, list) {
+			memset(dmabuf->virt, 0, hw_page_size);
+			cnt = page_idx + dmabuf->buffer_tag;
+			cq_set->u.request.page[cnt].addr_lo =
+					putPaddrLow(dmabuf->phys);
+			cq_set->u.request.page[cnt].addr_hi =
+					putPaddrHigh(dmabuf->phys);
+			rc++;
+		}
+		page_idx += rc;
+	}
+
+	rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL);
+
+	/* The IOCTL status is embedded in the mailbox subheader. */
+	shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
+	shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response);
+	if (shdr_status || shdr_add_status || rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"3119 CQ_CREATE_SET mailbox failed with "
+				"status x%x add_status x%x, mbx status x%x\n",
+				shdr_status, shdr_add_status, rc);
+		status = -ENXIO;
+		goto out;
+	}
+	rc = bf_get(lpfc_mbx_cq_create_set_base_id, &cq_set->u.response);
+	if (rc == 0xFFFF) {
+		status = -ENXIO;
+		goto out;
+	}
+
+	for (idx = 0; idx < numcq; idx++) {
+		cq = cqp[idx];
+		cq->queue_id = rc + idx;
+	}
+
+out:
+	lpfc_sli4_mbox_cmd_free(phba, mbox);
+	return status;
+}
+
 /**
  * lpfc_mq_create_fb_init - Send MCC_CREATE without async events registration
  * @phba: HBA structure that indicates port to create a queue on.
@@ -14692,6 +15103,197 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq,
 	return status;
 }
 
+/**
+ * lpfc_mrq_create - Create MRQ Receive Queues on the HBA
+ * @phba: HBA structure that indicates port to create a queue on.
+ * @hrqp: The queue structure array to use to create the header receive queues.
+ * @drqp: The queue structure array to use to create the data receive queues.
+ * @cqp: The completion queue array to bind these receive queues to.
+ *
+ * This function creates a receive buffer queue pair , as detailed in @hrq and
+ * @drq, on a port, described by @phba by sending a RQ_CREATE mailbox command
+ * to the HBA.
+ *
+ * The @phba struct is used to send mailbox command to HBA. The @drq and @hrq
+ * struct is used to get the entry count that is necessary to determine the
+ * number of pages to use for this queue. The @cq is used to indicate which
+ * completion queue to bind received buffers that are posted to these queues to.
+ * This function will send the RQ_CREATE mailbox command to the HBA to setup the
+ * receive queue pair. This function is asynchronous and will wait for the
+ * mailbox command to finish before continuing.
+ *
+ * On success this function will return a zero. If unable to allocate enough
+ * memory this function will return -ENOMEM. If the queue create mailbox command
+ * fails this function will return -ENXIO.
+ **/
+int
+lpfc_mrq_create(struct lpfc_hba *phba, struct lpfc_queue **hrqp,
+		struct lpfc_queue **drqp, struct lpfc_queue **cqp,
+		uint32_t subtype)
+{
+	struct lpfc_queue *hrq, *drq, *cq;
+	struct lpfc_mbx_rq_create_v2 *rq_create;
+	struct lpfc_dmabuf *dmabuf;
+	LPFC_MBOXQ_t *mbox;
+	int rc, length, alloclen, status = 0;
+	int cnt, idx, numrq, page_idx = 0;
+	uint32_t shdr_status, shdr_add_status;
+	union lpfc_sli4_cfg_shdr *shdr;
+	uint32_t hw_page_size = phba->sli4_hba.pc_sli4_params.if_page_sz;
+
+	numrq = phba->cfg_nvmet_mrq;
+	/* sanity check on array memory */
+	if (!hrqp || !drqp || !cqp || !numrq)
+		return -ENODEV;
+	if (!phba->sli4_hba.pc_sli4_params.supported)
+		hw_page_size = SLI4_PAGE_SIZE;
+
+	mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mbox)
+		return -ENOMEM;
+
+	length = sizeof(struct lpfc_mbx_rq_create_v2);
+	length += ((2 * numrq * hrqp[0]->page_count) *
+		   sizeof(struct dma_address));
+
+	alloclen = lpfc_sli4_config(phba, mbox, LPFC_MBOX_SUBSYSTEM_FCOE,
+				    LPFC_MBOX_OPCODE_FCOE_RQ_CREATE, length,
+				    LPFC_SLI4_MBX_NEMBED);
+	if (alloclen < length) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"3099 Allocated DMA memory size (%d) is "
+				"less than the requested DMA memory size "
+				"(%d)\n", alloclen, length);
+		status = -ENOMEM;
+		goto out;
+	}
+
+
+
+	rq_create = mbox->sge_array->addr[0];
+	shdr = (union lpfc_sli4_cfg_shdr *)&rq_create->cfg_shdr;
+
+	bf_set(lpfc_mbox_hdr_version, &shdr->request, LPFC_Q_CREATE_VERSION_2);
+	cnt = 0;
+
+	for (idx = 0; idx < numrq; idx++) {
+		hrq = hrqp[idx];
+		drq = drqp[idx];
+		cq  = cqp[idx];
+
+		if (hrq->entry_count != drq->entry_count) {
+			status = -EINVAL;
+			goto out;
+		}
+
+		/* sanity check on queue memory */
+		if (!hrq || !drq || !cq) {
+			status = -ENODEV;
+			goto out;
+		}
+
+		if (idx == 0) {
+			bf_set(lpfc_mbx_rq_create_num_pages,
+			       &rq_create->u.request,
+			       hrq->page_count);
+			bf_set(lpfc_mbx_rq_create_rq_cnt,
+			       &rq_create->u.request, (numrq * 2));
+			bf_set(lpfc_mbx_rq_create_dnb, &rq_create->u.request,
+			       1);
+			bf_set(lpfc_rq_context_base_cq,
+			       &rq_create->u.request.context,
+			       cq->queue_id);
+			bf_set(lpfc_rq_context_data_size,
+			       &rq_create->u.request.context,
+			       LPFC_DATA_BUF_SIZE);
+			bf_set(lpfc_rq_context_hdr_size,
+			       &rq_create->u.request.context,
+			       LPFC_HDR_BUF_SIZE);
+			bf_set(lpfc_rq_context_rqe_count_1,
+			       &rq_create->u.request.context,
+			       hrq->entry_count);
+			bf_set(lpfc_rq_context_rqe_size,
+			       &rq_create->u.request.context,
+			       LPFC_RQE_SIZE_8);
+			bf_set(lpfc_rq_context_page_size,
+			       &rq_create->u.request.context,
+			       (PAGE_SIZE/SLI4_PAGE_SIZE));
+		}
+		rc = 0;
+		list_for_each_entry(dmabuf, &hrq->page_list, list) {
+			memset(dmabuf->virt, 0, hw_page_size);
+			cnt = page_idx + dmabuf->buffer_tag;
+			rq_create->u.request.page[cnt].addr_lo =
+					putPaddrLow(dmabuf->phys);
+			rq_create->u.request.page[cnt].addr_hi =
+					putPaddrHigh(dmabuf->phys);
+			rc++;
+		}
+		page_idx += rc;
+
+		rc = 0;
+		list_for_each_entry(dmabuf, &drq->page_list, list) {
+			memset(dmabuf->virt, 0, hw_page_size);
+			cnt = page_idx + dmabuf->buffer_tag;
+			rq_create->u.request.page[cnt].addr_lo =
+					putPaddrLow(dmabuf->phys);
+			rq_create->u.request.page[cnt].addr_hi =
+					putPaddrHigh(dmabuf->phys);
+			rc++;
+		}
+		page_idx += rc;
+
+		hrq->db_format = LPFC_DB_RING_FORMAT;
+		hrq->db_regaddr = phba->sli4_hba.RQDBregaddr;
+		hrq->type = LPFC_HRQ;
+		hrq->assoc_qid = cq->queue_id;
+		hrq->subtype = subtype;
+		hrq->host_index = 0;
+		hrq->hba_index = 0;
+
+		drq->db_format = LPFC_DB_RING_FORMAT;
+		drq->db_regaddr = phba->sli4_hba.RQDBregaddr;
+		drq->type = LPFC_DRQ;
+		drq->assoc_qid = cq->queue_id;
+		drq->subtype = subtype;
+		drq->host_index = 0;
+		drq->hba_index = 0;
+
+		list_add_tail(&hrq->list, &cq->child_list);
+		list_add_tail(&drq->list, &cq->child_list);
+	}
+
+	rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL);
+	/* The IOCTL status is embedded in the mailbox subheader. */
+	shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
+	shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response);
+	if (shdr_status || shdr_add_status || rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"3120 RQ_CREATE mailbox failed with "
+				"status x%x add_status x%x, mbx status x%x\n",
+				shdr_status, shdr_add_status, rc);
+		status = -ENXIO;
+		goto out;
+	}
+	rc = bf_get(lpfc_mbx_rq_create_q_id, &rq_create->u.response);
+	if (rc == 0xFFFF) {
+		status = -ENXIO;
+		goto out;
+	}
+
+	/* Initialize all RQs with associated queue id */
+	for (idx = 0; idx < numrq; idx++) {
+		hrq = hrqp[idx];
+		hrq->queue_id = rc + (2 * idx);
+		drq = drqp[idx];
+		drq->queue_id = rc + (2 * idx) + 1;
+	}
+
+out:
+	lpfc_sli4_mbox_cmd_free(phba, mbox);
+	return status;
+}
+
 /**
  * lpfc_eq_destroy - Destroy an event Queue on the HBA
  * @eq: The queue structure associated with the queue to destroy.
diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h
index 99546eaef087b5..3bfdff1a4c53cd 100644
--- a/drivers/scsi/lpfc/lpfc_sli4.h
+++ b/drivers/scsi/lpfc/lpfc_sli4.h
@@ -550,6 +550,9 @@ struct lpfc_sli4_hba {
 	struct lpfc_queue **hba_eq;  /* Event queues for HBA */
 	struct lpfc_queue **fcp_cq;  /* Fast-path FCP compl queue */
 	struct lpfc_queue **nvme_cq; /* Fast-path NVME compl queue */
+	struct lpfc_queue **nvmet_cqset; /* Fast-path NVMET CQ Set queues */
+	struct lpfc_queue **nvmet_mrq_hdr; /* Fast-path NVMET hdr MRQs */
+	struct lpfc_queue **nvmet_mrq_data; /* Fast-path NVMET data MRQs */
 	struct lpfc_queue **fcp_wq;  /* Fast-path FCP work queue */
 	struct lpfc_queue **nvme_wq; /* Fast-path NVME work queue */
 	uint16_t *fcp_cq_map;
@@ -655,6 +658,8 @@ struct lpfc_sli4_hba {
 	uint16_t num_online_cpu;
 	uint16_t num_present_cpu;
 	uint16_t curr_disp_cpu;
+
+	uint16_t nvmet_mrq_post_idx;
 };
 
 enum lpfc_sge_type {
@@ -742,12 +747,18 @@ int lpfc_eq_create(struct lpfc_hba *, struct lpfc_queue *, uint32_t);
 int lpfc_modify_hba_eq_delay(struct lpfc_hba *phba, uint32_t startq);
 int lpfc_cq_create(struct lpfc_hba *, struct lpfc_queue *,
 			struct lpfc_queue *, uint32_t, uint32_t);
+int lpfc_cq_create_set(struct lpfc_hba *phba, struct lpfc_queue **cqp,
+			struct lpfc_queue **eqp, uint32_t type,
+			uint32_t subtype);
 int32_t lpfc_mq_create(struct lpfc_hba *, struct lpfc_queue *,
 		       struct lpfc_queue *, uint32_t);
 int lpfc_wq_create(struct lpfc_hba *, struct lpfc_queue *,
 			struct lpfc_queue *, uint32_t);
 int lpfc_rq_create(struct lpfc_hba *, struct lpfc_queue *,
 			struct lpfc_queue *, struct lpfc_queue *, uint32_t);
+int lpfc_mrq_create(struct lpfc_hba *phba, struct lpfc_queue **hrqp,
+			struct lpfc_queue **drqp, struct lpfc_queue **cqp,
+			uint32_t subtype);
 void lpfc_rq_adjust_repost(struct lpfc_hba *, struct lpfc_queue *, int);
 int lpfc_eq_destroy(struct lpfc_hba *, struct lpfc_queue *);
 int lpfc_cq_destroy(struct lpfc_hba *, struct lpfc_queue *);

From 8c258641e01cfcc5620c4fb191300bea224bcc99 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sun, 12 Feb 2017 13:52:36 -0800
Subject: [PATCH 0157/1911] scsi: lpfc: NVME Target: Merge into FC discovery

NVME Target: Merge into FC discovery

Adds NVME PRLI handling and Nameserver registrations for NVME

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_ct.c        |  8 +++-
 drivers/scsi/lpfc/lpfc_els.c       | 61 ++++++++++++++++++++++--
 drivers/scsi/lpfc/lpfc_hbadisc.c   | 27 +++++++++--
 drivers/scsi/lpfc/lpfc_hw.h        | 10 +++-
 drivers/scsi/lpfc/lpfc_nportdisc.c | 75 ++++++++++++++++++++++++++++--
 5 files changed, 167 insertions(+), 14 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c
index 2c051369857a13..d576da4d3afbf9 100644
--- a/drivers/scsi/lpfc/lpfc_ct.c
+++ b/drivers/scsi/lpfc/lpfc_ct.c
@@ -1433,7 +1433,13 @@ lpfc_ns_cmd(struct lpfc_vport *vport, int cmdcode,
 		if (((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
 		     (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME)) &&
 		    (context == FC_TYPE_NVME)) {
-			lpfc_nvme_update_localport(vport);
+			if ((vport == phba->pport) && phba->nvmet_support) {
+				CtReq->un.rff.fbits = (FC4_FEATURE_TARGET |
+					FC4_FEATURE_NVME_DISC);
+				/* todo: update targetport attributes */
+			} else {
+				lpfc_nvme_update_localport(vport);
+			}
 			CtReq->un.rff.type_code = context;
 
 		} else if (((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index 23546b3c950c4c..5ee3ae88a080b7 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -2001,11 +2001,21 @@ lpfc_issue_els_plogi(struct lpfc_vport *vport, uint32_t did, uint8_t retry)
 		sp->cmn.fcphHigh = FC_PH3;
 
 	sp->cmn.valid_vendor_ver_level = 0;
+	memset(sp->un.vendorVersion, 0, sizeof(sp->un.vendorVersion));
 
 	lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD,
 		"Issue PLOGI:     did:x%x",
 		did, 0, 0);
 
+	/* If our firmware supports this feature, convey that
+	 * information to the target using the vendor specific field.
+	 */
+	if (phba->sli.sli_flag & LPFC_SLI_SUPPRESS_RSP) {
+		sp->cmn.valid_vendor_ver_level = 1;
+		sp->un.vv.vid = cpu_to_be32(LPFC_VV_EMLX_ID);
+		sp->un.vv.flags = cpu_to_be32(LPFC_VV_SUPPRESS_RSP);
+	}
+
 	phba->fc_stat.elsXmitPLOGI++;
 	elsiocb->iocb_cmpl = lpfc_cmpl_els_plogi;
 	ret = lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, elsiocb, 0);
@@ -2207,7 +2217,13 @@ lpfc_issue_els_prli(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 		    !phba->nvmet_support)
 			bf_set(prli_fba, npr_nvme, 1);
 
-		bf_set(prli_init, npr_nvme, 1);
+		if (phba->nvmet_support) {
+			bf_set(prli_tgt, npr_nvme, 1);
+			bf_set(prli_disc, npr_nvme, 1);
+
+		} else {
+			bf_set(prli_init, npr_nvme, 1);
+		}
 		npr_nvme->word1 = cpu_to_be32(npr_nvme->word1);
 		npr_nvme->word4 = cpu_to_be32(npr_nvme->word4);
 		elsiocb->iocb_flag |= LPFC_PRLI_NVME_REQ;
@@ -2619,8 +2635,11 @@ lpfc_cmpl_els_logo(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 		phba->pport->fc_myDID = 0;
 
 		if ((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
-		    (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME))
-			lpfc_nvme_update_localport(phba->pport);
+		    (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME)) {
+			if (!phba->nvmet_support)
+				lpfc_nvme_update_localport(phba->pport);
+			/* todo: tgt: update targetport attributes */
+		}
 
 		mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
 		if (mbox) {
@@ -4074,10 +4093,25 @@ lpfc_els_rsp_acc(struct lpfc_vport *vport, uint32_t flag,
 			       sizeof(struct lpfc_name));
 			memcpy(&sp->nodeName, &vport->fc_sparam.nodeName,
 			       sizeof(struct lpfc_name));
-		} else
+		} else {
 			memcpy(pcmd, &vport->fc_sparam,
 			       sizeof(struct serv_parm));
 
+			sp->cmn.valid_vendor_ver_level = 0;
+			memset(sp->un.vendorVersion, 0,
+			       sizeof(sp->un.vendorVersion));
+
+			/* If our firmware supports this feature, convey that
+			 * info to the target using the vendor specific field.
+			 */
+			if (phba->sli.sli_flag & LPFC_SLI_SUPPRESS_RSP) {
+				sp->cmn.valid_vendor_ver_level = 1;
+				sp->un.vv.vid = cpu_to_be32(LPFC_VV_EMLX_ID);
+				sp->un.vv.flags =
+					cpu_to_be32(LPFC_VV_SUPPRESS_RSP);
+			}
+		}
+
 		lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_RSP,
 			"Issue ACC FLOGI/PLOGI: did:x%x flg:x%x",
 			ndlp->nlp_DID, ndlp->nlp_flag, 0);
@@ -4397,7 +4431,22 @@ lpfc_els_rsp_prli_acc(struct lpfc_vport *vport, struct lpfc_iocbq *oldiocb,
 		bf_set(prli_type_code, npr_nvme, PRLI_NVME_TYPE);
 		bf_set(prli_estabImagePair, npr_nvme, 0);  /* Should be 0 */
 		bf_set(prli_acc_rsp_code, npr_nvme, PRLI_REQ_EXECUTED);
-		bf_set(prli_init, npr_nvme, 1);
+		if (phba->nvmet_support) {
+			bf_set(prli_tgt, npr_nvme, 1);
+			bf_set(prli_disc, npr_nvme, 1);
+			if (phba->cfg_nvme_enable_fb) {
+				bf_set(prli_fba, npr_nvme, 1);
+
+				/* TBD.  Target mode needs to post buffers
+				 * that support the configured first burst
+				 * byte size.
+				 */
+				bf_set(prli_fb_sz, npr_nvme,
+				       phba->cfg_nvmet_fb_size);
+			}
+		} else {
+			bf_set(prli_init, npr_nvme, 1);
+		}
 
 		lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC,
 				 "6015 NVME issue PRLI ACC word1 x%08x "
@@ -5815,6 +5864,8 @@ lpfc_rscn_recovery_check(struct lpfc_vport *vport)
 		    (ndlp->nlp_state == NLP_STE_UNUSED_NODE) ||
 		    !lpfc_rscn_payload_check(vport, ndlp->nlp_DID))
 			continue;
+		if (vport->phba->nvmet_support)
+			continue;
 		lpfc_disc_state_machine(vport, ndlp, NULL,
 					NLP_EVT_DEVICE_RECOVERY);
 		lpfc_cancel_retry_delay_tmo(vport, ndlp);
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index 8936f5d91c873e..6bbb988dd8daab 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -910,8 +910,11 @@ lpfc_linkdown(struct lpfc_hba *phba)
 			vports[i]->fc_myDID = 0;
 
 			if ((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
-			    (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME))
-				lpfc_nvme_update_localport(vports[i]);
+			    (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME)) {
+				if (!phba->nvmet_support)
+					lpfc_nvme_update_localport(vports[i]);
+				/* todo: tgt: update targetport attributes */
+			}
 		}
 	}
 	lpfc_destroy_vport_work_array(phba, vports);
@@ -3583,8 +3586,11 @@ lpfc_mbx_cmpl_reg_vpi(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 		vport->fc_myDID = 0;
 
 		if ((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
-		    (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME))
-			lpfc_nvme_update_localport(vport);
+		    (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME)) {
+			if (!phba->nvmet_support)
+				lpfc_nvme_update_localport(vport);
+			/* todo: update targetport attributes */
+		}
 		goto out;
 	}
 
@@ -4175,6 +4181,11 @@ lpfc_nlp_state_cleanup(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 				 */
 				vport->phba->nport_event_cnt++;
 				lpfc_nvme_register_port(vport, ndlp);
+			} else {
+				/* Just take an NDLP ref count since the
+				 * target does not register rports.
+				 */
+				lpfc_nlp_get(ndlp);
 			}
 		}
 	}
@@ -5096,6 +5107,8 @@ lpfc_setup_disc_node(struct lpfc_vport *vport, uint32_t did)
 			return NULL;
 		lpfc_nlp_init(vport, ndlp, did);
 		lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
+		if (vport->phba->nvmet_support)
+			return ndlp;
 		spin_lock_irq(shost->host_lock);
 		ndlp->nlp_flag |= NLP_NPR_2B_DISC;
 		spin_unlock_irq(shost->host_lock);
@@ -5104,6 +5117,8 @@ lpfc_setup_disc_node(struct lpfc_vport *vport, uint32_t did)
 		ndlp = lpfc_enable_node(vport, ndlp, NLP_STE_NPR_NODE);
 		if (!ndlp)
 			return NULL;
+		if (vport->phba->nvmet_support)
+			return ndlp;
 		spin_lock_irq(shost->host_lock);
 		ndlp->nlp_flag |= NLP_NPR_2B_DISC;
 		spin_unlock_irq(shost->host_lock);
@@ -5123,6 +5138,8 @@ lpfc_setup_disc_node(struct lpfc_vport *vport, uint32_t did)
 			 * delay timeout is not needed.
 			 */
 			lpfc_cancel_retry_delay_tmo(vport, ndlp);
+			if (vport->phba->nvmet_support)
+				return ndlp;
 			spin_lock_irq(shost->host_lock);
 			ndlp->nlp_flag |= NLP_NPR_2B_DISC;
 			spin_unlock_irq(shost->host_lock);
@@ -5138,6 +5155,8 @@ lpfc_setup_disc_node(struct lpfc_vport *vport, uint32_t did)
 		    ndlp->nlp_flag & NLP_RCV_PLOGI)
 			return NULL;
 		lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
+		if (vport->phba->nvmet_support)
+			return ndlp;
 		spin_lock_irq(shost->host_lock);
 		ndlp->nlp_flag |= NLP_NPR_2B_DISC;
 		spin_unlock_irq(shost->host_lock);
diff --git a/drivers/scsi/lpfc/lpfc_hw.h b/drivers/scsi/lpfc/lpfc_hw.h
index 883e6d2a7bc7bd..c2221377e246cf 100644
--- a/drivers/scsi/lpfc/lpfc_hw.h
+++ b/drivers/scsi/lpfc/lpfc_hw.h
@@ -515,7 +515,15 @@ struct serv_parm {	/* Structure is in Big Endian format */
 	struct class_parms cls2;
 	struct class_parms cls3;
 	struct class_parms cls4;
-	uint8_t vendorVersion[16];
+	union {
+		uint8_t vendorVersion[16];
+		struct {
+			uint32_t vid;
+#define LPFC_VV_EMLX_ID	0x454d4c58	/* EMLX */
+			uint32_t flags;
+#define LPFC_VV_SUPPRESS_RSP	1
+		} vv;
+	} un;
 };
 
 /*
diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c
index 470f9586192f7a..0716818f269f13 100644
--- a/drivers/scsi/lpfc/lpfc_nportdisc.c
+++ b/drivers/scsi/lpfc/lpfc_nportdisc.c
@@ -288,6 +288,7 @@ lpfc_rcv_plogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 	uint32_t ed_tov;
 	LPFC_MBOXQ_t *mbox;
 	struct ls_rjt stat;
+	uint32_t vid, flag;
 	int rc;
 
 	memset(&stat, 0, sizeof (struct ls_rjt));
@@ -423,6 +424,15 @@ lpfc_rcv_plogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 		lpfc_can_disctmo(vport);
 	}
 
+	ndlp->nlp_flag &= ~NLP_SUPPRESS_RSP;
+	if ((phba->sli.sli_flag & LPFC_SLI_SUPPRESS_RSP) &&
+	    sp->cmn.valid_vendor_ver_level) {
+		vid = be32_to_cpu(sp->un.vv.vid);
+		flag = be32_to_cpu(sp->un.vv.flags);
+		if ((vid == LPFC_VV_EMLX_ID) && (flag & LPFC_VV_SUPPRESS_RSP))
+			ndlp->nlp_flag |= NLP_SUPPRESS_RSP;
+	}
+
 	mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
 	if (!mbox)
 		goto out;
@@ -744,6 +754,14 @@ lpfc_rcv_prli(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 		}
 		if (npr->Retry)
 			ndlp->nlp_fcp_info |= NLP_FCP_2_DEVICE;
+
+		/* If this driver is in nvme target mode, set the ndlp's fc4
+		 * type to NVME provided the PRLI response claims NVME FC4
+		 * type.  Target mode does not issue gft_id so doesn't get
+		 * the fc4 type set until now.
+		 */
+		if ((phba->nvmet_support) && (npr->prliType == PRLI_NVME_TYPE))
+			ndlp->nlp_fc4_type |= NLP_FC4_NVME;
 	}
 	if (rport) {
 		/* We need to update the rport role values */
@@ -1041,6 +1059,7 @@ lpfc_cmpl_plogi_plogi_issue(struct lpfc_vport *vport,
 	struct lpfc_iocbq  *cmdiocb, *rspiocb;
 	struct lpfc_dmabuf *pcmd, *prsp, *mp;
 	uint32_t *lp;
+	uint32_t vid, flag;
 	IOCB_t *irsp;
 	struct serv_parm *sp;
 	uint32_t ed_tov;
@@ -1109,6 +1128,16 @@ lpfc_cmpl_plogi_plogi_issue(struct lpfc_vport *vport,
 			ed_tov = (phba->fc_edtov + 999999) / 1000000;
 		}
 
+		ndlp->nlp_flag &= ~NLP_SUPPRESS_RSP;
+		if ((phba->sli.sli_flag & LPFC_SLI_SUPPRESS_RSP) &&
+		    sp->cmn.valid_vendor_ver_level) {
+			vid = be32_to_cpu(sp->un.vv.vid);
+			flag = be32_to_cpu(sp->un.vv.flags);
+			if ((vid == LPFC_VV_EMLX_ID) &&
+			    (flag & LPFC_VV_SUPPRESS_RSP))
+				ndlp->nlp_flag |= NLP_SUPPRESS_RSP;
+		}
+
 		/*
 		 * Use the larger EDTOV
 		 * RATOV = 2 * EDTOV for pt-to-pt
@@ -1504,9 +1533,37 @@ lpfc_rcv_prli_reglogin_issue(struct lpfc_vport *vport,
 			     uint32_t evt)
 {
 	struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg;
+	struct ls_rjt     stat;
 
-	/* Initiator mode. */
-	lpfc_els_rsp_prli_acc(vport, cmdiocb, ndlp);
+	if (vport->phba->nvmet_support) {
+		/* NVME Target mode.  Handle and respond to the PRLI and
+		 * transition to UNMAPPED provided the RPI has completed
+		 * registration.
+		 */
+		if (ndlp->nlp_flag & NLP_RPI_REGISTERED) {
+			lpfc_rcv_prli(vport, ndlp, cmdiocb);
+			lpfc_els_rsp_prli_acc(vport, cmdiocb, ndlp);
+			lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE);
+		} else {
+			/* RPI registration has not completed. Reject the PRLI
+			 * to prevent an illegal state transition when the
+			 * rpi registration does complete.
+			 */
+			lpfc_printf_vlog(vport, KERN_WARNING, LOG_NVME_DISC,
+					 "6115 NVMET ndlp rpi %d state "
+					 "unknown, state x%x flags x%08x\n",
+					 ndlp->nlp_rpi, ndlp->nlp_state,
+					 ndlp->nlp_flag);
+			memset(&stat, 0, sizeof(struct ls_rjt));
+			stat.un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC;
+			stat.un.b.lsRjtRsnCodeExp = LSEXP_CMD_IN_PROGRESS;
+			lpfc_els_rsp_reject(vport, stat.un.lsRjtError, cmdiocb,
+					    ndlp, NULL);
+		}
+	} else {
+		/* Initiator mode. */
+		lpfc_els_rsp_prli_acc(vport, cmdiocb, ndlp);
+	}
 
 	return ndlp->nlp_state;
 }
@@ -1668,7 +1725,12 @@ lpfc_cmpl_reglogin_reglogin_issue(struct lpfc_vport *vport,
 		lpfc_nlp_set_state(vport, ndlp, NLP_STE_PRLI_ISSUE);
 		lpfc_issue_els_prli(vport, ndlp, 0);
 	} else {
-		/* Only Fabric ports should transition */
+		if ((vport->fc_flag & FC_PT2PT) && phba->nvmet_support)
+			phba->targetport->port_id = vport->fc_myDID;
+
+		/* Only Fabric ports should transition. NVME target
+		 * must complete PRLI.
+		 */
 		if (ndlp->nlp_type & NLP_FABRIC) {
 			ndlp->nlp_prev_state = NLP_STE_REG_LOGIN_ISSUE;
 			lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE);
@@ -1714,6 +1776,13 @@ lpfc_device_recov_reglogin_issue(struct lpfc_vport *vport,
 	lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
 	spin_lock_irq(shost->host_lock);
 
+	/* If we are a target we won't immediately transition into PRLI,
+	 * so if REG_LOGIN already completed we don't need to ignore it.
+	 */
+	if (!(ndlp->nlp_flag & NLP_RPI_REGISTERED) ||
+	    !vport->phba->nvmet_support)
+		ndlp->nlp_flag |= NLP_IGNR_REG_CMPL;
+
 	ndlp->nlp_flag &= ~(NLP_NODEV_REMOVE | NLP_NPR_2B_DISC);
 	spin_unlock_irq(shost->host_lock);
 	lpfc_disc_set_adisc(vport, ndlp);

From d613b6a7aa922690e341c9ff0faba66ae299b5ad Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sun, 12 Feb 2017 13:52:37 -0800
Subject: [PATCH 0158/1911] scsi: lpfc: NVME Target: bind to nvmet_fc api

NVME Target: Tie in to NVME Fabrics nvmet_fc LLDD target api

Adds the routines to:
- register and deregister the FC port as a nvmet-fc targetport
- binding of nvme queues to adapter WQs
- receipt and passing of NVME LS's to transport, sending transport response
- receipt of NVME FCP CMD IUs, processing FCP target io data transmission
  commands; transmission of FCP io response
- Abort operations for tgt io exchanges

[mkp: fixed space at end of file warning]

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/Makefile       |    2 +-
 drivers/scsi/lpfc/lpfc_crtn.h    |   10 +
 drivers/scsi/lpfc/lpfc_ct.c      |    2 +-
 drivers/scsi/lpfc/lpfc_els.c     |    5 +-
 drivers/scsi/lpfc/lpfc_hbadisc.c |   10 +-
 drivers/scsi/lpfc/lpfc_init.c    |   14 +-
 drivers/scsi/lpfc/lpfc_mem.c     |   45 +
 drivers/scsi/lpfc/lpfc_nvmet.c   | 1671 ++++++++++++++++++++++++++++++
 drivers/scsi/lpfc/lpfc_sli.c     |    8 +-
 9 files changed, 1752 insertions(+), 15 deletions(-)
 create mode 100644 drivers/scsi/lpfc/lpfc_nvmet.c

diff --git a/drivers/scsi/lpfc/Makefile b/drivers/scsi/lpfc/Makefile
index cd7e1fcf52c6f0..30a6a35abafdc9 100644
--- a/drivers/scsi/lpfc/Makefile
+++ b/drivers/scsi/lpfc/Makefile
@@ -31,4 +31,4 @@ obj-$(CONFIG_SCSI_LPFC) := lpfc.o
 lpfc-objs := lpfc_mem.o lpfc_sli.o lpfc_ct.o lpfc_els.o \
 	lpfc_hbadisc.o	lpfc_init.o lpfc_mbox.o lpfc_nportdisc.o   \
 	lpfc_scsi.o lpfc_attr.o lpfc_vport.o lpfc_debugfs.o lpfc_bsg.o \
-	lpfc_nvme.o
+	lpfc_nvme.o lpfc_nvmet.o
diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h
index 2e7a7f7f43b287..f56015b7852598 100644
--- a/drivers/scsi/lpfc/lpfc_crtn.h
+++ b/drivers/scsi/lpfc/lpfc_crtn.h
@@ -363,6 +363,8 @@ void *lpfc_nvmet_buf_alloc(struct lpfc_hba *phba, int flags,
 void lpfc_nvmet_buf_free(struct lpfc_hba *phba, void *virtp, dma_addr_t dma);
 
 void lpfc_in_buf_free(struct lpfc_hba *, struct lpfc_dmabuf *);
+void lpfc_rq_buf_free(struct lpfc_hba *phba, struct lpfc_dmabuf *mp);
+
 /* Function prototypes. */
 const char* lpfc_info(struct Scsi_Host *);
 int lpfc_scan_finished(struct Scsi_Host *, unsigned long);
@@ -536,6 +538,14 @@ int lpfc_nvme_register_port(struct lpfc_vport *vport,
 int lpfc_nvme_create_localport(struct lpfc_vport *vport);
 void lpfc_nvme_destroy_localport(struct lpfc_vport *vport);
 void lpfc_nvme_update_localport(struct lpfc_vport *vport);
+int lpfc_nvmet_create_targetport(struct lpfc_hba *phba);
+int lpfc_nvmet_update_targetport(struct lpfc_hba *phba);
+void lpfc_nvmet_destroy_targetport(struct lpfc_hba *phba);
+void lpfc_nvmet_unsol_ls_event(struct lpfc_hba *phba,
+			struct lpfc_sli_ring *pring, struct lpfc_iocbq *piocb);
+void lpfc_nvmet_unsol_fcp_event(struct lpfc_hba *phba,
+			struct lpfc_sli_ring *pring,
+			struct rqb_dmabuf *nvmebuf, uint64_t isr_ts);
 void lpfc_nvme_mod_param_dep(struct lpfc_hba *phba);
 void lpfc_nvme_abort_fcreq_cmpl(struct lpfc_hba *phba,
 				struct lpfc_iocbq *cmdiocb,
diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c
index d576da4d3afbf9..53bdfe4212512c 100644
--- a/drivers/scsi/lpfc/lpfc_ct.c
+++ b/drivers/scsi/lpfc/lpfc_ct.c
@@ -1436,7 +1436,7 @@ lpfc_ns_cmd(struct lpfc_vport *vport, int cmdcode,
 			if ((vport == phba->pport) && phba->nvmet_support) {
 				CtReq->un.rff.fbits = (FC4_FEATURE_TARGET |
 					FC4_FEATURE_NVME_DISC);
-				/* todo: update targetport attributes */
+				lpfc_nvmet_update_targetport(phba);
 			} else {
 				lpfc_nvme_update_localport(vport);
 			}
diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index 5ee3ae88a080b7..1c0ac509e6db1c 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -2636,9 +2636,10 @@ lpfc_cmpl_els_logo(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 
 		if ((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
 		    (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME)) {
-			if (!phba->nvmet_support)
+			if (phba->nvmet_support)
+				lpfc_nvmet_update_targetport(phba);
+			else
 				lpfc_nvme_update_localport(phba->pport);
-			/* todo: tgt: update targetport attributes */
 		}
 
 		mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index 6bbb988dd8daab..542797bd3134ba 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -911,9 +911,10 @@ lpfc_linkdown(struct lpfc_hba *phba)
 
 			if ((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
 			    (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME)) {
-				if (!phba->nvmet_support)
+				if (phba->nvmet_support)
+					lpfc_nvmet_update_targetport(phba);
+				else
 					lpfc_nvme_update_localport(vports[i]);
-				/* todo: tgt: update targetport attributes */
 			}
 		}
 	}
@@ -3587,9 +3588,10 @@ lpfc_mbx_cmpl_reg_vpi(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 
 		if ((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
 		    (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME)) {
-			if (!phba->nvmet_support)
+			if (phba->nvmet_support)
+				lpfc_nvmet_update_targetport(phba);
+			else
 				lpfc_nvme_update_localport(vport);
-			/* todo: update targetport attributes */
 		}
 		goto out;
 	}
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index e6cbc0bac029ef..5eafd0db758dd5 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -10397,7 +10397,13 @@ lpfc_pci_remove_one_s3(struct pci_dev *pdev)
 	/* Remove FC host and then SCSI host with the physical port */
 	fc_remove_host(shost);
 	scsi_remove_host(shost);
+
+	/* Perform ndlp cleanup on the physical port.  The nvme and nvmet
+	 * localports are destroyed after to cleanup all transport memory.
+	 */
 	lpfc_cleanup(vport);
+	lpfc_nvmet_destroy_targetport(phba);
+	lpfc_nvme_destroy_localport(vport);
 
 	/*
 	 * Bring down the SLI Layer. This step disable all interrupts,
@@ -11203,13 +11209,13 @@ lpfc_pci_remove_one_s4(struct pci_dev *pdev)
 	/* Remove FC host and then SCSI host with the physical port */
 	fc_remove_host(shost);
 	scsi_remove_host(shost);
-	/* todo: tgt: remove targetport */
 
-	/* Perform ndlp cleanup on the physical port.  The nvme localport
-	 * is destroyed after to ensure all rports are io-disabled.
+	/* Perform ndlp cleanup on the physical port.  The nvme and nvmet
+	 * localports are destroyed after to cleanup all transport memory.
 	 */
-	lpfc_nvme_destroy_localport(vport);
 	lpfc_cleanup(vport);
+	lpfc_nvmet_destroy_targetport(phba);
+	lpfc_nvme_destroy_localport(vport);
 
 	/*
 	 * Bring down the SLI Layer. This step disables all interrupts,
diff --git a/drivers/scsi/lpfc/lpfc_mem.c b/drivers/scsi/lpfc/lpfc_mem.c
index 32db255f521647..2437ec5f48631d 100644
--- a/drivers/scsi/lpfc/lpfc_mem.c
+++ b/drivers/scsi/lpfc/lpfc_mem.c
@@ -763,3 +763,48 @@ lpfc_in_buf_free(struct lpfc_hba *phba, struct lpfc_dmabuf *mp)
 	}
 	return;
 }
+
+/**
+ * lpfc_rq_buf_free - Free a RQ DMA buffer
+ * @phba: HBA buffer is associated with
+ * @mp: Buffer to free
+ *
+ * Description: Frees the given DMA buffer in the appropriate way given by
+ * reposting it to its associated RQ so it can be reused.
+ *
+ * Notes: Takes phba->hbalock.  Can be called with or without other locks held.
+ *
+ * Returns: None
+ **/
+void
+lpfc_rq_buf_free(struct lpfc_hba *phba, struct lpfc_dmabuf *mp)
+{
+	struct lpfc_rqb *rqbp;
+	struct lpfc_rqe hrqe;
+	struct lpfc_rqe drqe;
+	struct rqb_dmabuf *rqb_entry;
+	unsigned long flags;
+	int rc;
+
+	if (!mp)
+		return;
+
+	rqb_entry = container_of(mp, struct rqb_dmabuf, hbuf);
+	rqbp = rqb_entry->hrq->rqbp;
+
+	spin_lock_irqsave(&phba->hbalock, flags);
+	list_del(&rqb_entry->hbuf.list);
+	hrqe.address_lo = putPaddrLow(rqb_entry->hbuf.phys);
+	hrqe.address_hi = putPaddrHigh(rqb_entry->hbuf.phys);
+	drqe.address_lo = putPaddrLow(rqb_entry->dbuf.phys);
+	drqe.address_hi = putPaddrHigh(rqb_entry->dbuf.phys);
+	rc = lpfc_sli4_rq_put(rqb_entry->hrq, rqb_entry->drq, &hrqe, &drqe);
+	if (rc < 0) {
+		(rqbp->rqb_free_buffer)(phba, rqb_entry);
+	} else {
+		list_add_tail(&rqb_entry->hbuf.list, &rqbp->rqb_buffer_list);
+		rqbp->buffer_count++;
+	}
+
+	spin_unlock_irqrestore(&phba->hbalock, flags);
+}
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c
new file mode 100644
index 00000000000000..d4efd86bb15679
--- /dev/null
+++ b/drivers/scsi/lpfc/lpfc_nvmet.c
@@ -0,0 +1,1671 @@
+/*******************************************************************
+ * This file is part of the Emulex Linux Device Driver for         *
+ * Fibre Channsel Host Bus Adapters.                                *
+ * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
+ * EMULEX and SLI are trademarks of Emulex.                        *
+ * www.emulex.com                                                  *
+ * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
+ *                                                                 *
+ * This program is free software; you can redistribute it and/or   *
+ * modify it under the terms of version 2 of the GNU General       *
+ * Public License as published by the Free Software Foundation.    *
+ * This program is distributed in the hope that it will be useful. *
+ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND          *
+ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY,  *
+ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE      *
+ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
+ * TO BE LEGALLY INVALID.  See the GNU General Public License for  *
+ * more details, a copy of which can be found in the file COPYING  *
+ * included with this package.                                     *
+ ********************************************************************/
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <asm/unaligned.h>
+#include <linux/crc-t10dif.h>
+#include <net/checksum.h>
+
+#include <scsi/scsi.h>
+#include <scsi/scsi_device.h>
+#include <scsi/scsi_eh.h>
+#include <scsi/scsi_host.h>
+#include <scsi/scsi_tcq.h>
+#include <scsi/scsi_transport_fc.h>
+#include <scsi/fc/fc_fs.h>
+
+#include <../drivers/nvme/host/nvme.h>
+#include <linux/nvme-fc-driver.h>
+
+#include "lpfc_version.h"
+#include "lpfc_hw4.h"
+#include "lpfc_hw.h"
+#include "lpfc_sli.h"
+#include "lpfc_sli4.h"
+#include "lpfc_nl.h"
+#include "lpfc_disc.h"
+#include "lpfc.h"
+#include "lpfc_scsi.h"
+#include "lpfc_nvme.h"
+#include "lpfc_nvmet.h"
+#include "lpfc_logmsg.h"
+#include "lpfc_crtn.h"
+#include "lpfc_vport.h"
+
+static struct lpfc_iocbq *lpfc_nvmet_prep_ls_wqe(struct lpfc_hba *,
+						 struct lpfc_nvmet_rcv_ctx *,
+						 dma_addr_t rspbuf,
+						 uint16_t rspsize);
+static struct lpfc_iocbq *lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *,
+						  struct lpfc_nvmet_rcv_ctx *);
+static int lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *,
+					  struct lpfc_nvmet_rcv_ctx *,
+					  uint32_t, uint16_t);
+static int lpfc_nvmet_unsol_fcp_issue_abort(struct lpfc_hba *,
+					    struct lpfc_nvmet_rcv_ctx *,
+					    uint32_t, uint16_t);
+static int lpfc_nvmet_unsol_ls_issue_abort(struct lpfc_hba *,
+					   struct lpfc_nvmet_rcv_ctx *,
+					   uint32_t, uint16_t);
+
+/**
+ * lpfc_nvmet_xmt_ls_rsp_cmp - Completion handler for LS Response
+ * @phba: Pointer to HBA context object.
+ * @cmdwqe: Pointer to driver command WQE object.
+ * @wcqe: Pointer to driver response CQE object.
+ *
+ * The function is called from SLI ring event handler with no
+ * lock held. This function is the completion handler for NVME LS commands
+ * The function frees memory resources used for the NVME commands.
+ **/
+static void
+lpfc_nvmet_xmt_ls_rsp_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
+			  struct lpfc_wcqe_complete *wcqe)
+{
+	struct lpfc_nvmet_tgtport *tgtp;
+	struct nvmefc_tgt_ls_req *rsp;
+	struct lpfc_nvmet_rcv_ctx *ctxp;
+	uint32_t status, result;
+
+	status = bf_get(lpfc_wcqe_c_status, wcqe);
+	result = wcqe->parameter;
+	if (!phba->targetport)
+		goto out;
+
+	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+
+	if (status)
+		atomic_inc(&tgtp->xmt_ls_rsp_error);
+	else
+		atomic_inc(&tgtp->xmt_ls_rsp_cmpl);
+
+out:
+	ctxp = cmdwqe->context2;
+	rsp = &ctxp->ctx.ls_req;
+
+	lpfc_printf_log(phba, KERN_INFO, LOG_NVME_DISC,
+			"6038 %s: Entrypoint: ctx %p status %x/%x\n", __func__,
+			ctxp, status, result);
+
+	lpfc_nlp_put(cmdwqe->context1);
+	cmdwqe->context2 = NULL;
+	cmdwqe->context3 = NULL;
+	lpfc_sli_release_iocbq(phba, cmdwqe);
+	rsp->done(rsp);
+	kfree(ctxp);
+}
+
+/**
+ * lpfc_nvmet_rq_post - Repost a NVMET RQ DMA buffer and clean up context
+ * @phba: HBA buffer is associated with
+ * @ctxp: context to clean up
+ * @mp: Buffer to free
+ *
+ * Description: Frees the given DMA buffer in the appropriate way given by
+ * reposting it to its associated RQ so it can be reused.
+ *
+ * Notes: Takes phba->hbalock.  Can be called with or without other locks held.
+ *
+ * Returns: None
+ **/
+void
+lpfc_nvmet_rq_post(struct lpfc_hba *phba, struct lpfc_nvmet_rcv_ctx *ctxp,
+		   struct lpfc_dmabuf *mp)
+{
+	if (ctxp) {
+		if (ctxp->txrdy) {
+			pci_pool_free(phba->txrdy_payload_pool, ctxp->txrdy,
+				      ctxp->txrdy_phys);
+			ctxp->txrdy = NULL;
+			ctxp->txrdy_phys = 0;
+		}
+		ctxp->state = LPFC_NVMET_STE_FREE;
+	}
+	lpfc_rq_buf_free(phba, mp);
+}
+
+/**
+ * lpfc_nvmet_xmt_fcp_op_cmp - Completion handler for FCP Response
+ * @phba: Pointer to HBA context object.
+ * @cmdwqe: Pointer to driver command WQE object.
+ * @wcqe: Pointer to driver response CQE object.
+ *
+ * The function is called from SLI ring event handler with no
+ * lock held. This function is the completion handler for NVME FCP commands
+ * The function frees memory resources used for the NVME commands.
+ **/
+static void
+lpfc_nvmet_xmt_fcp_op_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
+			  struct lpfc_wcqe_complete *wcqe)
+{
+	struct lpfc_nvmet_tgtport *tgtp;
+	struct nvmefc_tgt_fcp_req *rsp;
+	struct lpfc_nvmet_rcv_ctx *ctxp;
+	uint32_t status, result, op, start_clean;
+
+	ctxp = cmdwqe->context2;
+	rsp = &ctxp->ctx.fcp_req;
+	op = rsp->op;
+	ctxp->flag &= ~LPFC_NVMET_IO_INP;
+
+	status = bf_get(lpfc_wcqe_c_status, wcqe);
+	result = wcqe->parameter;
+
+	if (!phba->targetport)
+		goto out;
+
+	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+	if (status) {
+		rsp->fcp_error = NVME_SC_DATA_XFER_ERROR;
+		rsp->transferred_length = 0;
+		atomic_inc(&tgtp->xmt_fcp_rsp_error);
+	} else {
+		rsp->fcp_error = NVME_SC_SUCCESS;
+		if (op == NVMET_FCOP_RSP)
+			rsp->transferred_length = rsp->rsplen;
+		else
+			rsp->transferred_length = rsp->transfer_length;
+		atomic_inc(&tgtp->xmt_fcp_rsp_cmpl);
+	}
+
+out:
+	if ((op == NVMET_FCOP_READDATA_RSP) ||
+	    (op == NVMET_FCOP_RSP)) {
+		/* Sanity check */
+		ctxp->state = LPFC_NVMET_STE_DONE;
+		ctxp->entry_cnt++;
+		rsp->done(rsp);
+		/* Let Abort cmpl repost the context */
+		if (!(ctxp->flag & LPFC_NVMET_ABORT_OP))
+			lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf);
+	} else {
+		ctxp->entry_cnt++;
+		start_clean = offsetof(struct lpfc_iocbq, wqe);
+		memset(((char *)cmdwqe) + start_clean, 0,
+		       (sizeof(struct lpfc_iocbq) - start_clean));
+		rsp->done(rsp);
+	}
+}
+
+static int
+lpfc_nvmet_xmt_ls_rsp(struct nvmet_fc_target_port *tgtport,
+		      struct nvmefc_tgt_ls_req *rsp)
+{
+	struct lpfc_nvmet_rcv_ctx *ctxp =
+		container_of(rsp, struct lpfc_nvmet_rcv_ctx, ctx.ls_req);
+	struct lpfc_hba *phba = ctxp->phba;
+	struct hbq_dmabuf *nvmebuf =
+		(struct hbq_dmabuf *)ctxp->rqb_buffer;
+	struct lpfc_iocbq *nvmewqeq;
+	struct lpfc_nvmet_tgtport *nvmep = tgtport->private;
+	struct lpfc_dmabuf dmabuf;
+	struct ulp_bde64 bpl;
+	int rc;
+
+	lpfc_printf_log(phba, KERN_INFO, LOG_NVME_DISC,
+			"6023 %s: Entrypoint ctx %p %p\n", __func__,
+			ctxp, tgtport);
+
+	nvmewqeq = lpfc_nvmet_prep_ls_wqe(phba, ctxp, rsp->rspdma,
+				      rsp->rsplen);
+	if (nvmewqeq == NULL) {
+		atomic_inc(&nvmep->xmt_ls_drop);
+		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
+				"6150 LS Drop IO x%x: Prep\n",
+				ctxp->oxid);
+		lpfc_in_buf_free(phba, &nvmebuf->dbuf);
+		lpfc_nvmet_unsol_ls_issue_abort(phba, ctxp,
+						ctxp->sid, ctxp->oxid);
+		return -ENOMEM;
+	}
+
+	/* Save numBdes for bpl2sgl */
+	nvmewqeq->rsvd2 = 1;
+	nvmewqeq->hba_wqidx = 0;
+	nvmewqeq->context3 = &dmabuf;
+	dmabuf.virt = &bpl;
+	bpl.addrLow = nvmewqeq->wqe.xmit_sequence.bde.addrLow;
+	bpl.addrHigh = nvmewqeq->wqe.xmit_sequence.bde.addrHigh;
+	bpl.tus.f.bdeSize = rsp->rsplen;
+	bpl.tus.f.bdeFlags = 0;
+	bpl.tus.w = le32_to_cpu(bpl.tus.w);
+
+	nvmewqeq->wqe_cmpl = lpfc_nvmet_xmt_ls_rsp_cmp;
+	nvmewqeq->iocb_cmpl = NULL;
+	nvmewqeq->context2 = ctxp;
+
+	rc = lpfc_sli4_issue_wqe(phba, LPFC_ELS_RING, nvmewqeq);
+	if (rc == WQE_SUCCESS) {
+		/*
+		 * Okay to repost buffer here, but wait till cmpl
+		 * before freeing ctxp and iocbq.
+		 */
+		lpfc_in_buf_free(phba, &nvmebuf->dbuf);
+		ctxp->rqb_buffer = 0;
+		atomic_inc(&nvmep->xmt_ls_rsp);
+		return 0;
+	}
+	/* Give back resources */
+	atomic_inc(&nvmep->xmt_ls_drop);
+	lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
+			"6151 LS Drop IO x%x: Issue %d\n",
+			ctxp->oxid, rc);
+
+	lpfc_nlp_put(nvmewqeq->context1);
+
+	lpfc_in_buf_free(phba, &nvmebuf->dbuf);
+	lpfc_nvmet_unsol_ls_issue_abort(phba, ctxp, ctxp->sid, ctxp->oxid);
+	return -ENXIO;
+}
+
+static int
+lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport,
+		      struct nvmefc_tgt_fcp_req *rsp)
+{
+	struct lpfc_nvmet_tgtport *lpfc_nvmep = tgtport->private;
+	struct lpfc_nvmet_rcv_ctx *ctxp =
+		container_of(rsp, struct lpfc_nvmet_rcv_ctx, ctx.fcp_req);
+	struct lpfc_hba *phba = ctxp->phba;
+	struct lpfc_iocbq *nvmewqeq;
+	unsigned long iflags;
+	int rc;
+
+	if (rsp->op == NVMET_FCOP_ABORT) {
+		lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
+				"6103 Abort op: oxri x%x %d cnt %d\n",
+				ctxp->oxid, ctxp->state, ctxp->entry_cnt);
+		atomic_inc(&lpfc_nvmep->xmt_fcp_abort);
+		ctxp->entry_cnt++;
+		ctxp->flag |= LPFC_NVMET_ABORT_OP;
+		if (ctxp->flag & LPFC_NVMET_IO_INP)
+			lpfc_nvmet_sol_fcp_issue_abort(phba, ctxp, ctxp->sid,
+						       ctxp->oxid);
+		else
+			lpfc_nvmet_unsol_fcp_issue_abort(phba, ctxp, ctxp->sid,
+							 ctxp->oxid);
+		return 0;
+	}
+
+	/* Sanity check */
+	if (ctxp->state == LPFC_NVMET_STE_ABORT) {
+		atomic_inc(&lpfc_nvmep->xmt_fcp_drop);
+		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
+				"6102 Bad state IO x%x aborted\n",
+				ctxp->oxid);
+		goto aerr;
+	}
+
+	nvmewqeq = lpfc_nvmet_prep_fcp_wqe(phba, ctxp);
+	if (nvmewqeq == NULL) {
+		atomic_inc(&lpfc_nvmep->xmt_fcp_drop);
+		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
+				"6152 FCP Drop IO x%x: Prep\n",
+				ctxp->oxid);
+		goto aerr;
+	}
+
+	nvmewqeq->wqe_cmpl = lpfc_nvmet_xmt_fcp_op_cmp;
+	nvmewqeq->iocb_cmpl = NULL;
+	nvmewqeq->context2 = ctxp;
+	nvmewqeq->iocb_flag |=  LPFC_IO_NVMET;
+	ctxp->wqeq->hba_wqidx = rsp->hwqid;
+
+	/* For now we take hbalock */
+	spin_lock_irqsave(&phba->hbalock, iflags);
+	rc = lpfc_sli4_issue_wqe(phba, LPFC_FCP_RING, nvmewqeq);
+	spin_unlock_irqrestore(&phba->hbalock, iflags);
+	if (rc == WQE_SUCCESS) {
+		ctxp->flag |= LPFC_NVMET_IO_INP;
+		return 0;
+	}
+
+	/* Give back resources */
+	atomic_inc(&lpfc_nvmep->xmt_fcp_drop);
+	lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
+			"6153 FCP Drop IO x%x: Issue: %d\n",
+			ctxp->oxid, rc);
+
+	ctxp->wqeq->hba_wqidx = 0;
+	nvmewqeq->context2 = NULL;
+	nvmewqeq->context3 = NULL;
+aerr:
+	return -ENXIO;
+}
+
+static void
+lpfc_nvmet_targetport_delete(struct nvmet_fc_target_port *targetport)
+{
+	struct lpfc_nvmet_tgtport *tport = targetport->private;
+
+	/* release any threads waiting for the unreg to complete */
+	complete(&tport->tport_unreg_done);
+}
+
+static struct nvmet_fc_target_template lpfc_tgttemplate = {
+	.targetport_delete = lpfc_nvmet_targetport_delete,
+	.xmt_ls_rsp     = lpfc_nvmet_xmt_ls_rsp,
+	.fcp_op         = lpfc_nvmet_xmt_fcp_op,
+
+	.max_hw_queues  = 1,
+	.max_sgl_segments = LPFC_NVMET_DEFAULT_SEGS,
+	.max_dif_sgl_segments = LPFC_NVMET_DEFAULT_SEGS,
+	.dma_boundary = 0xFFFFFFFF,
+
+	/* optional features */
+	.target_features = 0,
+	/* sizes of additional private data for data structures */
+	.target_priv_sz = sizeof(struct lpfc_nvmet_tgtport),
+};
+
+int
+lpfc_nvmet_create_targetport(struct lpfc_hba *phba)
+{
+	struct lpfc_vport  *vport = phba->pport;
+	struct lpfc_nvmet_tgtport *tgtp;
+	struct nvmet_fc_port_info pinfo;
+	int error = 0;
+
+	if (phba->targetport)
+		return 0;
+
+	memset(&pinfo, 0, sizeof(struct nvmet_fc_port_info));
+	pinfo.node_name = wwn_to_u64(vport->fc_nodename.u.wwn);
+	pinfo.port_name = wwn_to_u64(vport->fc_portname.u.wwn);
+	pinfo.port_id = vport->fc_myDID;
+
+	lpfc_tgttemplate.max_hw_queues = phba->cfg_nvme_io_channel;
+	lpfc_tgttemplate.max_sgl_segments = phba->cfg_sg_seg_cnt;
+	lpfc_tgttemplate.target_features = NVMET_FCTGTFEAT_READDATA_RSP |
+					   NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED;
+
+	error = nvmet_fc_register_targetport(&pinfo, &lpfc_tgttemplate,
+					     &phba->pcidev->dev,
+					     &phba->targetport);
+	if (error) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_DISC,
+				"6025 Cannot register NVME targetport "
+				"x%x\n", error);
+		phba->targetport = NULL;
+	} else {
+		tgtp = (struct lpfc_nvmet_tgtport *)
+			phba->targetport->private;
+		tgtp->phba = phba;
+
+		lpfc_printf_log(phba, KERN_INFO, LOG_NVME_DISC,
+				"6026 Registered NVME "
+				"targetport: %p, private %p "
+				"portnm %llx nodenm %llx\n",
+				phba->targetport, tgtp,
+				pinfo.port_name, pinfo.node_name);
+
+		atomic_set(&tgtp->rcv_ls_req_in, 0);
+		atomic_set(&tgtp->rcv_ls_req_out, 0);
+		atomic_set(&tgtp->rcv_ls_req_drop, 0);
+		atomic_set(&tgtp->xmt_ls_abort, 0);
+		atomic_set(&tgtp->xmt_ls_rsp, 0);
+		atomic_set(&tgtp->xmt_ls_drop, 0);
+		atomic_set(&tgtp->xmt_ls_rsp_error, 0);
+		atomic_set(&tgtp->xmt_ls_rsp_cmpl, 0);
+		atomic_set(&tgtp->rcv_fcp_cmd_in, 0);
+		atomic_set(&tgtp->rcv_fcp_cmd_out, 0);
+		atomic_set(&tgtp->rcv_fcp_cmd_drop, 0);
+		atomic_set(&tgtp->xmt_fcp_abort, 0);
+		atomic_set(&tgtp->xmt_fcp_drop, 0);
+		atomic_set(&tgtp->xmt_fcp_read_rsp, 0);
+		atomic_set(&tgtp->xmt_fcp_read, 0);
+		atomic_set(&tgtp->xmt_fcp_write, 0);
+		atomic_set(&tgtp->xmt_fcp_rsp, 0);
+		atomic_set(&tgtp->xmt_fcp_rsp_cmpl, 0);
+		atomic_set(&tgtp->xmt_fcp_rsp_error, 0);
+		atomic_set(&tgtp->xmt_fcp_rsp_drop, 0);
+		atomic_set(&tgtp->xmt_abort_rsp, 0);
+		atomic_set(&tgtp->xmt_abort_rsp_error, 0);
+		atomic_set(&tgtp->xmt_abort_cmpl, 0);
+	}
+	return error;
+}
+
+int
+lpfc_nvmet_update_targetport(struct lpfc_hba *phba)
+{
+	struct lpfc_vport  *vport = phba->pport;
+
+	if (!phba->targetport)
+		return 0;
+
+	lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME,
+			 "6007 Update NVMET port %p did x%x\n",
+			 phba->targetport, vport->fc_myDID);
+
+	phba->targetport->port_id = vport->fc_myDID;
+	return 0;
+}
+
+void
+lpfc_nvmet_destroy_targetport(struct lpfc_hba *phba)
+{
+	struct lpfc_nvmet_tgtport *tgtp;
+
+	if (phba->nvmet_support == 0)
+		return;
+	if (phba->targetport) {
+		tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+		init_completion(&tgtp->tport_unreg_done);
+		nvmet_fc_unregister_targetport(phba->targetport);
+		wait_for_completion_timeout(&tgtp->tport_unreg_done, 5);
+	}
+	phba->targetport = NULL;
+}
+
+/**
+ * lpfc_nvmet_unsol_ls_buffer - Process an unsolicited event data buffer
+ * @phba: pointer to lpfc hba data structure.
+ * @pring: pointer to a SLI ring.
+ * @nvmebuf: pointer to lpfc nvme command HBQ data structure.
+ *
+ * This routine is used for processing the WQE associated with a unsolicited
+ * event. It first determines whether there is an existing ndlp that matches
+ * the DID from the unsolicited WQE. If not, it will create a new one with
+ * the DID from the unsolicited WQE. The ELS command from the unsolicited
+ * WQE is then used to invoke the proper routine and to set up proper state
+ * of the discovery state machine.
+ **/
+static void
+lpfc_nvmet_unsol_ls_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
+			   struct hbq_dmabuf *nvmebuf)
+{
+	struct lpfc_nvmet_tgtport *tgtp;
+	struct fc_frame_header *fc_hdr;
+	struct lpfc_nvmet_rcv_ctx *ctxp;
+	uint32_t *payload;
+	uint32_t size, oxid, sid, rc;
+
+	if (!nvmebuf || !phba->targetport) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
+				"6154 LS Drop IO\n");
+		goto dropit;
+	}
+
+	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+	payload = (uint32_t *)(nvmebuf->dbuf.virt);
+	fc_hdr = (struct fc_frame_header *)(nvmebuf->hbuf.virt);
+	size = bf_get(lpfc_rcqe_length,  &nvmebuf->cq_event.cqe.rcqe_cmpl);
+	oxid = be16_to_cpu(fc_hdr->fh_ox_id);
+	sid = sli4_sid_from_fc_hdr(fc_hdr);
+
+	ctxp = kzalloc(sizeof(struct lpfc_nvmet_rcv_ctx), GFP_ATOMIC);
+	if (ctxp == NULL) {
+		atomic_inc(&tgtp->rcv_ls_req_drop);
+		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
+				"6155 LS Drop IO x%x: Alloc\n",
+				oxid);
+dropit:
+		if (nvmebuf)
+			lpfc_in_buf_free(phba, &nvmebuf->dbuf);
+		return;
+	}
+	ctxp->phba = phba;
+	ctxp->size = size;
+	ctxp->oxid = oxid;
+	ctxp->sid = sid;
+	ctxp->wqeq = NULL;
+	ctxp->state = LPFC_NVMET_STE_RCV;
+	ctxp->rqb_buffer = (void *)nvmebuf;
+	/*
+	 * The calling sequence should be:
+	 * nvmet_fc_rcv_ls_req -> lpfc_nvmet_xmt_ls_rsp/cmp ->_req->done
+	 * lpfc_nvmet_xmt_ls_rsp_cmp should free the allocated ctxp.
+	 */
+	atomic_inc(&tgtp->rcv_ls_req_in);
+	rc = nvmet_fc_rcv_ls_req(phba->targetport, &ctxp->ctx.ls_req,
+				 payload, size);
+
+	lpfc_printf_log(phba, KERN_INFO, LOG_NVME_DISC,
+			"6037 %s: ctx %p sz %d rc %d: %08x %08x %08x "
+			"%08x %08x %08x\n", __func__, ctxp, size, rc,
+			*payload, *(payload+1), *(payload+2),
+			*(payload+3), *(payload+4), *(payload+5));
+	if (rc == 0) {
+		atomic_inc(&tgtp->rcv_ls_req_out);
+		return;
+	}
+	atomic_inc(&tgtp->rcv_ls_req_drop);
+	lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
+			"6156 LS Drop IO x%x: nvmet_fc_rcv_ls_req %d\n",
+			ctxp->oxid, rc);
+
+	/* We assume a rcv'ed cmd ALWAYs fits into 1 buffer */
+	if (nvmebuf)
+		lpfc_in_buf_free(phba, &nvmebuf->dbuf);
+
+	atomic_inc(&tgtp->xmt_ls_abort);
+	lpfc_nvmet_unsol_ls_issue_abort(phba, ctxp, sid, oxid);
+}
+
+/**
+ * lpfc_nvmet_unsol_fcp_buffer - Process an unsolicited event data buffer
+ * @phba: pointer to lpfc hba data structure.
+ * @pring: pointer to a SLI ring.
+ * @nvmebuf: pointer to lpfc nvme command HBQ data structure.
+ *
+ * This routine is used for processing the WQE associated with a unsolicited
+ * event. It first determines whether there is an existing ndlp that matches
+ * the DID from the unsolicited WQE. If not, it will create a new one with
+ * the DID from the unsolicited WQE. The ELS command from the unsolicited
+ * WQE is then used to invoke the proper routine and to set up proper state
+ * of the discovery state machine.
+ **/
+static void
+lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
+			    struct lpfc_sli_ring *pring,
+			    struct rqb_dmabuf *nvmebuf,
+			    uint64_t isr_timestamp)
+{
+	struct lpfc_nvmet_rcv_ctx *ctxp;
+	struct lpfc_nvmet_tgtport *tgtp;
+	struct fc_frame_header *fc_hdr;
+	uint32_t *payload;
+	uint32_t size, oxid, sid, rc;
+
+	oxid = 0;
+	if (!nvmebuf || !phba->targetport) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
+				"6157 FCP Drop IO\n");
+		goto dropit;
+	}
+
+
+	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+	payload = (uint32_t *)(nvmebuf->dbuf.virt);
+	fc_hdr = (struct fc_frame_header *)(nvmebuf->hbuf.virt);
+	size = nvmebuf->bytes_recv;
+	oxid = be16_to_cpu(fc_hdr->fh_ox_id);
+	sid = sli4_sid_from_fc_hdr(fc_hdr);
+
+	ctxp = (struct lpfc_nvmet_rcv_ctx *)nvmebuf->context;
+	if (ctxp == NULL) {
+		atomic_inc(&tgtp->rcv_fcp_cmd_drop);
+		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
+				"6158 FCP Drop IO x%x: Alloc\n",
+				oxid);
+		lpfc_nvmet_rq_post(phba, NULL, &nvmebuf->hbuf);
+		/* Cannot send ABTS without context */
+		return;
+	}
+	memset(ctxp, 0, sizeof(ctxp->ctx));
+	ctxp->wqeq = NULL;
+	ctxp->txrdy = NULL;
+	ctxp->offset = 0;
+	ctxp->phba = phba;
+	ctxp->size = size;
+	ctxp->oxid = oxid;
+	ctxp->sid = sid;
+	ctxp->state = LPFC_NVMET_STE_RCV;
+	ctxp->rqb_buffer = nvmebuf;
+	ctxp->entry_cnt = 1;
+	ctxp->flag = 0;
+
+	atomic_inc(&tgtp->rcv_fcp_cmd_in);
+	/*
+	 * The calling sequence should be:
+	 * nvmet_fc_rcv_fcp_req -> lpfc_nvmet_xmt_fcp_op/cmp -> req->done
+	 * lpfc_nvmet_xmt_fcp_op_cmp should free the allocated ctxp.
+	 */
+	rc = nvmet_fc_rcv_fcp_req(phba->targetport, &ctxp->ctx.fcp_req,
+				  payload, size);
+
+	/* Process FCP command */
+	if (rc == 0) {
+		atomic_inc(&tgtp->rcv_fcp_cmd_out);
+		return;
+	}
+
+	atomic_inc(&tgtp->rcv_fcp_cmd_drop);
+	lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
+			"6159 FCP Drop IO x%x: nvmet_fc_rcv_fcp_req x%x\n",
+			ctxp->oxid, rc);
+dropit:
+	if (oxid) {
+		lpfc_nvmet_unsol_fcp_issue_abort(phba, ctxp, sid, oxid);
+		return;
+	}
+
+	if (nvmebuf) {
+		nvmebuf->iocbq->hba_wqidx = 0;
+		/* We assume a rcv'ed cmd ALWAYs fits into 1 buffer */
+		lpfc_nvmet_rq_post(phba, NULL, &nvmebuf->hbuf);
+	}
+}
+
+/**
+ * lpfc_nvmet_unsol_ls_event - Process an unsolicited event from an nvme nport
+ * @phba: pointer to lpfc hba data structure.
+ * @pring: pointer to a SLI ring.
+ * @nvmebuf: pointer to received nvme data structure.
+ *
+ * This routine is used to process an unsolicited event received from a SLI
+ * (Service Level Interface) ring. The actual processing of the data buffer
+ * associated with the unsolicited event is done by invoking the routine
+ * lpfc_nvmet_unsol_ls_buffer() after properly set up the buffer from the
+ * SLI RQ on which the unsolicited event was received.
+ **/
+void
+lpfc_nvmet_unsol_ls_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
+			  struct lpfc_iocbq *piocb)
+{
+	struct lpfc_dmabuf *d_buf;
+	struct hbq_dmabuf *nvmebuf;
+
+	d_buf = piocb->context2;
+	nvmebuf = container_of(d_buf, struct hbq_dmabuf, dbuf);
+
+	if (phba->nvmet_support == 0) {
+		lpfc_in_buf_free(phba, &nvmebuf->dbuf);
+		return;
+	}
+	lpfc_nvmet_unsol_ls_buffer(phba, pring, nvmebuf);
+}
+
+/**
+ * lpfc_nvmet_unsol_fcp_event - Process an unsolicited event from an nvme nport
+ * @phba: pointer to lpfc hba data structure.
+ * @pring: pointer to a SLI ring.
+ * @nvmebuf: pointer to received nvme data structure.
+ *
+ * This routine is used to process an unsolicited event received from a SLI
+ * (Service Level Interface) ring. The actual processing of the data buffer
+ * associated with the unsolicited event is done by invoking the routine
+ * lpfc_nvmet_unsol_fcp_buffer() after properly set up the buffer from the
+ * SLI RQ on which the unsolicited event was received.
+ **/
+void
+lpfc_nvmet_unsol_fcp_event(struct lpfc_hba *phba,
+			   struct lpfc_sli_ring *pring,
+			   struct rqb_dmabuf *nvmebuf,
+			   uint64_t isr_timestamp)
+{
+	if (phba->nvmet_support == 0) {
+		lpfc_nvmet_rq_post(phba, NULL, &nvmebuf->hbuf);
+		return;
+	}
+	lpfc_nvmet_unsol_fcp_buffer(phba, pring, nvmebuf,
+				    isr_timestamp);
+}
+
+/**
+ * lpfc_nvmet_prep_ls_wqe - Allocate and prepare a lpfc wqe data structure
+ * @phba: pointer to a host N_Port data structure.
+ * @ctxp: Context info for NVME LS Request
+ * @rspbuf: DMA buffer of NVME command.
+ * @rspsize: size of the NVME command.
+ *
+ * This routine is used for allocating a lpfc-WQE data structure from
+ * the driver lpfc-WQE free-list and prepare the WQE with the parameters
+ * passed into the routine for discovery state machine to issue an Extended
+ * Link Service (NVME) commands. It is a generic lpfc-WQE allocation
+ * and preparation routine that is used by all the discovery state machine
+ * routines and the NVME command-specific fields will be later set up by
+ * the individual discovery machine routines after calling this routine
+ * allocating and preparing a generic WQE data structure. It fills in the
+ * Buffer Descriptor Entries (BDEs), allocates buffers for both command
+ * payload and response payload (if expected). The reference count on the
+ * ndlp is incremented by 1 and the reference to the ndlp is put into
+ * context1 of the WQE data structure for this WQE to hold the ndlp
+ * reference for the command's callback function to access later.
+ *
+ * Return code
+ *   Pointer to the newly allocated/prepared nvme wqe data structure
+ *   NULL - when nvme wqe data structure allocation/preparation failed
+ **/
+static struct lpfc_iocbq *
+lpfc_nvmet_prep_ls_wqe(struct lpfc_hba *phba,
+		       struct lpfc_nvmet_rcv_ctx *ctxp,
+		       dma_addr_t rspbuf, uint16_t rspsize)
+{
+	struct lpfc_nodelist *ndlp;
+	struct lpfc_iocbq *nvmewqe;
+	union lpfc_wqe *wqe;
+
+	if (!lpfc_is_link_up(phba)) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_DISC,
+				"6104 lpfc_nvmet_prep_ls_wqe: link err: "
+				"NPORT x%x oxid:x%x\n",
+				ctxp->sid, ctxp->oxid);
+		return NULL;
+	}
+
+	/* Allocate buffer for  command wqe */
+	nvmewqe = lpfc_sli_get_iocbq(phba);
+	if (nvmewqe == NULL) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_DISC,
+				"6105 lpfc_nvmet_prep_ls_wqe: No WQE: "
+				"NPORT x%x oxid:x%x\n",
+				ctxp->sid, ctxp->oxid);
+		return NULL;
+	}
+
+	ndlp = lpfc_findnode_did(phba->pport, ctxp->sid);
+	if (!ndlp || !NLP_CHK_NODE_ACT(ndlp) ||
+	    ((ndlp->nlp_state != NLP_STE_UNMAPPED_NODE) &&
+	    (ndlp->nlp_state != NLP_STE_MAPPED_NODE))) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_DISC,
+				"6106 lpfc_nvmet_prep_ls_wqe: No ndlp: "
+				"NPORT x%x oxid:x%x\n",
+				ctxp->sid, ctxp->oxid);
+		goto nvme_wqe_free_wqeq_exit;
+	}
+	ctxp->wqeq = nvmewqe;
+
+	/* prevent preparing wqe with NULL ndlp reference */
+	nvmewqe->context1 = lpfc_nlp_get(ndlp);
+	if (nvmewqe->context1 == NULL)
+		goto nvme_wqe_free_wqeq_exit;
+	nvmewqe->context2 = ctxp;
+
+	wqe = &nvmewqe->wqe;
+	memset(wqe, 0, sizeof(union lpfc_wqe));
+
+	/* Words 0 - 2 */
+	wqe->xmit_sequence.bde.tus.f.bdeFlags = BUFF_TYPE_BDE_64;
+	wqe->xmit_sequence.bde.tus.f.bdeSize = rspsize;
+	wqe->xmit_sequence.bde.addrLow = le32_to_cpu(putPaddrLow(rspbuf));
+	wqe->xmit_sequence.bde.addrHigh = le32_to_cpu(putPaddrHigh(rspbuf));
+
+	/* Word 3 */
+
+	/* Word 4 */
+
+	/* Word 5 */
+	bf_set(wqe_dfctl, &wqe->xmit_sequence.wge_ctl, 0);
+	bf_set(wqe_ls, &wqe->xmit_sequence.wge_ctl, 1);
+	bf_set(wqe_la, &wqe->xmit_sequence.wge_ctl, 0);
+	bf_set(wqe_rctl, &wqe->xmit_sequence.wge_ctl, FC_RCTL_DD_SOL_CTL);
+	bf_set(wqe_type, &wqe->xmit_sequence.wge_ctl, FC_TYPE_NVME);
+
+	/* Word 6 */
+	bf_set(wqe_ctxt_tag, &wqe->xmit_sequence.wqe_com,
+	       phba->sli4_hba.rpi_ids[ndlp->nlp_rpi]);
+	bf_set(wqe_xri_tag, &wqe->xmit_sequence.wqe_com, nvmewqe->sli4_xritag);
+
+	/* Word 7 */
+	bf_set(wqe_cmnd, &wqe->xmit_sequence.wqe_com,
+	       CMD_XMIT_SEQUENCE64_WQE);
+	bf_set(wqe_ct, &wqe->xmit_sequence.wqe_com, SLI4_CT_RPI);
+	bf_set(wqe_class, &wqe->xmit_sequence.wqe_com, CLASS3);
+	bf_set(wqe_pu, &wqe->xmit_sequence.wqe_com, 0);
+
+	/* Word 8 */
+	wqe->xmit_sequence.wqe_com.abort_tag = nvmewqe->iotag;
+
+	/* Word 9 */
+	bf_set(wqe_reqtag, &wqe->xmit_sequence.wqe_com, nvmewqe->iotag);
+	/* Needs to be set by caller */
+	bf_set(wqe_rcvoxid, &wqe->xmit_sequence.wqe_com, ctxp->oxid);
+
+	/* Word 10 */
+	bf_set(wqe_dbde, &wqe->xmit_sequence.wqe_com, 1);
+	bf_set(wqe_iod, &wqe->xmit_sequence.wqe_com, LPFC_WQE_IOD_WRITE);
+	bf_set(wqe_lenloc, &wqe->xmit_sequence.wqe_com,
+	       LPFC_WQE_LENLOC_WORD12);
+	bf_set(wqe_ebde_cnt, &wqe->xmit_sequence.wqe_com, 0);
+
+	/* Word 11 */
+	bf_set(wqe_cqid, &wqe->xmit_sequence.wqe_com,
+	       LPFC_WQE_CQ_ID_DEFAULT);
+	bf_set(wqe_cmd_type, &wqe->xmit_sequence.wqe_com,
+	       OTHER_COMMAND);
+
+	/* Word 12 */
+	wqe->xmit_sequence.xmit_len = rspsize;
+
+	nvmewqe->retry = 1;
+	nvmewqe->vport = phba->pport;
+	nvmewqe->drvrTimeout = (phba->fc_ratov * 3) + LPFC_DRVR_TIMEOUT;
+	nvmewqe->iocb_flag |= LPFC_IO_NVME_LS;
+
+	/* Xmit NVME response to remote NPORT <did> */
+	lpfc_printf_log(phba, KERN_INFO, LOG_NVME_DISC,
+			"6039 Xmit NVME LS response to remote "
+			"NPORT x%x iotag:x%x oxid:x%x size:x%x\n",
+			ndlp->nlp_DID, nvmewqe->iotag, ctxp->oxid,
+			rspsize);
+	return nvmewqe;
+
+nvme_wqe_free_wqeq_exit:
+	nvmewqe->context2 = NULL;
+	nvmewqe->context3 = NULL;
+	lpfc_sli_release_iocbq(phba, nvmewqe);
+	return NULL;
+}
+
+
+static struct lpfc_iocbq *
+lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *phba,
+			struct lpfc_nvmet_rcv_ctx *ctxp)
+{
+	struct nvmefc_tgt_fcp_req *rsp = &ctxp->ctx.fcp_req;
+	struct lpfc_nvmet_tgtport *tgtp;
+	struct sli4_sge *sgl;
+	struct lpfc_nodelist *ndlp;
+	struct lpfc_iocbq *nvmewqe;
+	struct scatterlist *sgel;
+	union lpfc_wqe128 *wqe;
+	uint32_t *txrdy;
+	dma_addr_t physaddr;
+	int i, cnt;
+	int xc = 1;
+
+	if (!lpfc_is_link_up(phba)) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
+				"6107 lpfc_nvmet_prep_fcp_wqe: link err:"
+				"NPORT x%x oxid:x%x\n", ctxp->sid,
+				ctxp->oxid);
+		return NULL;
+	}
+
+	ndlp = lpfc_findnode_did(phba->pport, ctxp->sid);
+	if (!ndlp || !NLP_CHK_NODE_ACT(ndlp) ||
+	    ((ndlp->nlp_state != NLP_STE_UNMAPPED_NODE) &&
+	     (ndlp->nlp_state != NLP_STE_MAPPED_NODE))) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
+				"6108 lpfc_nvmet_prep_fcp_wqe: no ndlp: "
+				"NPORT x%x oxid:x%x\n",
+				ctxp->sid, ctxp->oxid);
+		return NULL;
+	}
+
+	if (rsp->sg_cnt > phba->cfg_sg_seg_cnt) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
+				"6109 lpfc_nvmet_prep_fcp_wqe: seg cnt err: "
+				"NPORT x%x oxid:x%x\n",
+				ctxp->sid, ctxp->oxid);
+		return NULL;
+	}
+
+	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+	nvmewqe = ctxp->wqeq;
+	if (nvmewqe == NULL) {
+		/* Allocate buffer for  command wqe */
+		nvmewqe = ctxp->rqb_buffer->iocbq;
+		if (nvmewqe == NULL) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
+					"6110 lpfc_nvmet_prep_fcp_wqe: No "
+					"WQE: NPORT x%x oxid:x%x\n",
+					ctxp->sid, ctxp->oxid);
+			return NULL;
+		}
+		ctxp->wqeq = nvmewqe;
+		xc = 0; /* create new XRI */
+		nvmewqe->sli4_lxritag = NO_XRI;
+		nvmewqe->sli4_xritag = NO_XRI;
+	}
+
+	/* Sanity check */
+	if (((ctxp->state == LPFC_NVMET_STE_RCV) &&
+	    (ctxp->entry_cnt == 1)) ||
+	    ((ctxp->state == LPFC_NVMET_STE_DATA) &&
+	    (ctxp->entry_cnt > 1))) {
+		wqe = (union lpfc_wqe128 *)&nvmewqe->wqe;
+	} else {
+		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
+				"6111 Wrong state %s: %d  cnt %d\n",
+				__func__, ctxp->state, ctxp->entry_cnt);
+		return NULL;
+	}
+
+	sgl  = (struct sli4_sge *)ctxp->rqb_buffer->sglq->sgl;
+	switch (rsp->op) {
+	case NVMET_FCOP_READDATA:
+	case NVMET_FCOP_READDATA_RSP:
+		/* Words 0 - 2 : The first sg segment */
+		sgel = &rsp->sg[0];
+		physaddr = sg_dma_address(sgel);
+		wqe->fcp_tsend.bde.tus.f.bdeFlags = BUFF_TYPE_BDE_64;
+		wqe->fcp_tsend.bde.tus.f.bdeSize = sg_dma_len(sgel);
+		wqe->fcp_tsend.bde.addrLow = cpu_to_le32(putPaddrLow(physaddr));
+		wqe->fcp_tsend.bde.addrHigh =
+			cpu_to_le32(putPaddrHigh(physaddr));
+
+		/* Word 3 */
+		wqe->fcp_tsend.payload_offset_len = 0;
+
+		/* Word 4 */
+		wqe->fcp_tsend.relative_offset = ctxp->offset;
+
+		/* Word 5 */
+
+		/* Word 6 */
+		bf_set(wqe_ctxt_tag, &wqe->fcp_tsend.wqe_com,
+		       phba->sli4_hba.rpi_ids[ndlp->nlp_rpi]);
+		bf_set(wqe_xri_tag, &wqe->fcp_tsend.wqe_com,
+		       nvmewqe->sli4_xritag);
+
+		/* Word 7 */
+		bf_set(wqe_cmnd, &wqe->fcp_tsend.wqe_com, CMD_FCP_TSEND64_WQE);
+
+		/* Word 8 */
+		wqe->fcp_tsend.wqe_com.abort_tag = nvmewqe->iotag;
+
+		/* Word 9 */
+		bf_set(wqe_reqtag, &wqe->fcp_tsend.wqe_com, nvmewqe->iotag);
+		bf_set(wqe_rcvoxid, &wqe->fcp_tsend.wqe_com, ctxp->oxid);
+
+		/* Word 10 */
+		bf_set(wqe_nvme, &wqe->fcp_tsend.wqe_com, 1);
+		bf_set(wqe_dbde, &wqe->fcp_tsend.wqe_com, 1);
+		bf_set(wqe_iod, &wqe->fcp_tsend.wqe_com, LPFC_WQE_IOD_WRITE);
+		bf_set(wqe_lenloc, &wqe->fcp_tsend.wqe_com,
+		       LPFC_WQE_LENLOC_WORD12);
+		bf_set(wqe_ebde_cnt, &wqe->fcp_tsend.wqe_com, 0);
+		bf_set(wqe_xc, &wqe->fcp_tsend.wqe_com, xc);
+		bf_set(wqe_nvme, &wqe->fcp_tsend.wqe_com, 1);
+		if (phba->cfg_nvme_oas)
+			bf_set(wqe_oas, &wqe->fcp_tsend.wqe_com, 1);
+
+		/* Word 11 */
+		bf_set(wqe_cqid, &wqe->fcp_tsend.wqe_com,
+		       LPFC_WQE_CQ_ID_DEFAULT);
+		bf_set(wqe_cmd_type, &wqe->fcp_tsend.wqe_com,
+		       FCP_COMMAND_TSEND);
+
+		/* Word 12 */
+		wqe->fcp_tsend.fcp_data_len = rsp->transfer_length;
+
+		/* Setup 2 SKIP SGEs */
+		sgl->addr_hi = 0;
+		sgl->addr_lo = 0;
+		sgl->word2 = 0;
+		bf_set(lpfc_sli4_sge_type, sgl, LPFC_SGE_TYPE_SKIP);
+		sgl->word2 = cpu_to_le32(sgl->word2);
+		sgl->sge_len = 0;
+		sgl++;
+		sgl->addr_hi = 0;
+		sgl->addr_lo = 0;
+		sgl->word2 = 0;
+		bf_set(lpfc_sli4_sge_type, sgl, LPFC_SGE_TYPE_SKIP);
+		sgl->word2 = cpu_to_le32(sgl->word2);
+		sgl->sge_len = 0;
+		sgl++;
+		if (rsp->op == NVMET_FCOP_READDATA_RSP) {
+			atomic_inc(&tgtp->xmt_fcp_read_rsp);
+			bf_set(wqe_ar, &wqe->fcp_tsend.wqe_com, 1);
+			if ((ndlp->nlp_flag & NLP_SUPPRESS_RSP) &&
+			    (rsp->rsplen == 12)) {
+				bf_set(wqe_sup, &wqe->fcp_tsend.wqe_com, 1);
+				bf_set(wqe_wqes, &wqe->fcp_tsend.wqe_com, 0);
+				bf_set(wqe_irsp, &wqe->fcp_tsend.wqe_com, 0);
+				bf_set(wqe_irsplen, &wqe->fcp_tsend.wqe_com, 0);
+			} else {
+				bf_set(wqe_sup, &wqe->fcp_tsend.wqe_com, 0);
+				bf_set(wqe_wqes, &wqe->fcp_tsend.wqe_com, 1);
+				bf_set(wqe_irsp, &wqe->fcp_tsend.wqe_com, 1);
+				bf_set(wqe_irsplen, &wqe->fcp_tsend.wqe_com,
+				       ((rsp->rsplen >> 2) - 1));
+				memcpy(&wqe->words[16], rsp->rspaddr,
+				       rsp->rsplen);
+			}
+		} else {
+			atomic_inc(&tgtp->xmt_fcp_read);
+
+			bf_set(wqe_sup, &wqe->fcp_tsend.wqe_com, 0);
+			bf_set(wqe_wqes, &wqe->fcp_tsend.wqe_com, 0);
+			bf_set(wqe_irsp, &wqe->fcp_tsend.wqe_com, 0);
+			bf_set(wqe_ar, &wqe->fcp_tsend.wqe_com, 0);
+			bf_set(wqe_irsplen, &wqe->fcp_tsend.wqe_com, 0);
+		}
+		ctxp->state = LPFC_NVMET_STE_DATA;
+		break;
+
+	case NVMET_FCOP_WRITEDATA:
+		/* Words 0 - 2 : The first sg segment */
+		txrdy = pci_pool_alloc(phba->txrdy_payload_pool,
+				       GFP_KERNEL, &physaddr);
+		if (!txrdy) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
+					"6041 Bad txrdy buffer: oxid x%x\n",
+					ctxp->oxid);
+			return NULL;
+		}
+		ctxp->txrdy = txrdy;
+		ctxp->txrdy_phys = physaddr;
+		wqe->fcp_treceive.bde.tus.f.bdeFlags = BUFF_TYPE_BDE_64;
+		wqe->fcp_treceive.bde.tus.f.bdeSize = TXRDY_PAYLOAD_LEN;
+		wqe->fcp_treceive.bde.addrLow =
+			cpu_to_le32(putPaddrLow(physaddr));
+		wqe->fcp_treceive.bde.addrHigh =
+			cpu_to_le32(putPaddrHigh(physaddr));
+
+		/* Word 3 */
+		wqe->fcp_treceive.payload_offset_len = TXRDY_PAYLOAD_LEN;
+
+		/* Word 4 */
+		wqe->fcp_treceive.relative_offset = ctxp->offset;
+
+		/* Word 5 */
+
+		/* Word 6 */
+		bf_set(wqe_ctxt_tag, &wqe->fcp_treceive.wqe_com,
+		       phba->sli4_hba.rpi_ids[ndlp->nlp_rpi]);
+		bf_set(wqe_xri_tag, &wqe->fcp_treceive.wqe_com,
+		       nvmewqe->sli4_xritag);
+
+		/* Word 7 */
+		bf_set(wqe_ar, &wqe->fcp_treceive.wqe_com, 0);
+		bf_set(wqe_cmnd, &wqe->fcp_treceive.wqe_com,
+		       CMD_FCP_TRECEIVE64_WQE);
+
+		/* Word 8 */
+		wqe->fcp_treceive.wqe_com.abort_tag = nvmewqe->iotag;
+
+		/* Word 9 */
+		bf_set(wqe_reqtag, &wqe->fcp_treceive.wqe_com, nvmewqe->iotag);
+		bf_set(wqe_rcvoxid, &wqe->fcp_treceive.wqe_com, ctxp->oxid);
+
+		/* Word 10 */
+		bf_set(wqe_nvme, &wqe->fcp_treceive.wqe_com, 1);
+		bf_set(wqe_dbde, &wqe->fcp_treceive.wqe_com, 1);
+		bf_set(wqe_iod, &wqe->fcp_treceive.wqe_com, LPFC_WQE_IOD_READ);
+		bf_set(wqe_lenloc, &wqe->fcp_treceive.wqe_com,
+		       LPFC_WQE_LENLOC_WORD12);
+		bf_set(wqe_xc, &wqe->fcp_treceive.wqe_com, xc);
+		bf_set(wqe_wqes, &wqe->fcp_treceive.wqe_com, 0);
+		bf_set(wqe_irsp, &wqe->fcp_treceive.wqe_com, 0);
+		bf_set(wqe_irsplen, &wqe->fcp_treceive.wqe_com, 0);
+		bf_set(wqe_nvme, &wqe->fcp_treceive.wqe_com, 1);
+		if (phba->cfg_nvme_oas)
+			bf_set(wqe_oas, &wqe->fcp_treceive.wqe_com, 1);
+
+		/* Word 11 */
+		bf_set(wqe_cqid, &wqe->fcp_treceive.wqe_com,
+		       LPFC_WQE_CQ_ID_DEFAULT);
+		bf_set(wqe_cmd_type, &wqe->fcp_treceive.wqe_com,
+		       FCP_COMMAND_TRECEIVE);
+		bf_set(wqe_sup, &wqe->fcp_tsend.wqe_com, 0);
+
+		/* Word 12 */
+		wqe->fcp_tsend.fcp_data_len = rsp->transfer_length;
+
+		/* Setup 1 TXRDY and 1 SKIP SGE */
+		txrdy[0] = 0;
+		txrdy[1] = cpu_to_be32(rsp->transfer_length);
+		txrdy[2] = 0;
+
+		sgl->addr_hi = putPaddrHigh(physaddr);
+		sgl->addr_lo = putPaddrLow(physaddr);
+		sgl->word2 = 0;
+		bf_set(lpfc_sli4_sge_type, sgl, LPFC_SGE_TYPE_DATA);
+		sgl->word2 = cpu_to_le32(sgl->word2);
+		sgl->sge_len = cpu_to_le32(TXRDY_PAYLOAD_LEN);
+		sgl++;
+		sgl->addr_hi = 0;
+		sgl->addr_lo = 0;
+		sgl->word2 = 0;
+		bf_set(lpfc_sli4_sge_type, sgl, LPFC_SGE_TYPE_SKIP);
+		sgl->word2 = cpu_to_le32(sgl->word2);
+		sgl->sge_len = 0;
+		sgl++;
+		ctxp->state = LPFC_NVMET_STE_DATA;
+		atomic_inc(&tgtp->xmt_fcp_write);
+		break;
+
+	case NVMET_FCOP_RSP:
+		/* Words 0 - 2 */
+		sgel = &rsp->sg[0];
+		physaddr = rsp->rspdma;
+		wqe->fcp_trsp.bde.tus.f.bdeFlags = BUFF_TYPE_BDE_64;
+		wqe->fcp_trsp.bde.tus.f.bdeSize = rsp->rsplen;
+		wqe->fcp_trsp.bde.addrLow =
+			cpu_to_le32(putPaddrLow(physaddr));
+		wqe->fcp_trsp.bde.addrHigh =
+			cpu_to_le32(putPaddrHigh(physaddr));
+
+		/* Word 3 */
+		wqe->fcp_trsp.response_len = rsp->rsplen;
+
+		/* Word 4 */
+		wqe->fcp_trsp.rsvd_4_5[0] = 0;
+
+
+		/* Word 5 */
+
+		/* Word 6 */
+		bf_set(wqe_ctxt_tag, &wqe->fcp_trsp.wqe_com,
+		       phba->sli4_hba.rpi_ids[ndlp->nlp_rpi]);
+		bf_set(wqe_xri_tag, &wqe->fcp_trsp.wqe_com,
+		       nvmewqe->sli4_xritag);
+
+		/* Word 7 */
+		bf_set(wqe_ag, &wqe->fcp_trsp.wqe_com, 1);
+		bf_set(wqe_cmnd, &wqe->fcp_trsp.wqe_com, CMD_FCP_TRSP64_WQE);
+
+		/* Word 8 */
+		wqe->fcp_trsp.wqe_com.abort_tag = nvmewqe->iotag;
+
+		/* Word 9 */
+		bf_set(wqe_reqtag, &wqe->fcp_trsp.wqe_com, nvmewqe->iotag);
+		bf_set(wqe_rcvoxid, &wqe->fcp_trsp.wqe_com, ctxp->oxid);
+
+		/* Word 10 */
+		bf_set(wqe_nvme, &wqe->fcp_trsp.wqe_com, 1);
+		bf_set(wqe_dbde, &wqe->fcp_trsp.wqe_com, 0);
+		bf_set(wqe_iod, &wqe->fcp_trsp.wqe_com, LPFC_WQE_IOD_WRITE);
+		bf_set(wqe_lenloc, &wqe->fcp_trsp.wqe_com,
+		       LPFC_WQE_LENLOC_WORD3);
+		bf_set(wqe_xc, &wqe->fcp_trsp.wqe_com, xc);
+		bf_set(wqe_nvme, &wqe->fcp_trsp.wqe_com, 1);
+		if (phba->cfg_nvme_oas)
+			bf_set(wqe_oas, &wqe->fcp_trsp.wqe_com, 1);
+
+		/* Word 11 */
+		bf_set(wqe_cqid, &wqe->fcp_trsp.wqe_com,
+		       LPFC_WQE_CQ_ID_DEFAULT);
+		bf_set(wqe_cmd_type, &wqe->fcp_trsp.wqe_com,
+		       FCP_COMMAND_TRSP);
+		bf_set(wqe_sup, &wqe->fcp_tsend.wqe_com, 0);
+		ctxp->state = LPFC_NVMET_STE_RSP;
+
+		if (rsp->rsplen == LPFC_NVMET_SUCCESS_LEN) {
+			/* Good response - all zero's on wire */
+			bf_set(wqe_wqes, &wqe->fcp_trsp.wqe_com, 0);
+			bf_set(wqe_irsp, &wqe->fcp_trsp.wqe_com, 0);
+			bf_set(wqe_irsplen, &wqe->fcp_trsp.wqe_com, 0);
+		} else {
+			bf_set(wqe_wqes, &wqe->fcp_trsp.wqe_com, 1);
+			bf_set(wqe_irsp, &wqe->fcp_trsp.wqe_com, 1);
+			bf_set(wqe_irsplen, &wqe->fcp_trsp.wqe_com,
+			       ((rsp->rsplen >> 2) - 1));
+			memcpy(&wqe->words[16], rsp->rspaddr, rsp->rsplen);
+		}
+
+		/* Use rspbuf, NOT sg list */
+		rsp->sg_cnt = 0;
+		sgl->word2 = 0;
+		atomic_inc(&tgtp->xmt_fcp_rsp);
+		break;
+
+	default:
+		lpfc_printf_log(phba, KERN_INFO, LOG_NVME_IOERR,
+				"6064 Unknown Rsp Op %d\n",
+				rsp->op);
+		return NULL;
+	}
+
+	nvmewqe->retry = 1;
+	nvmewqe->vport = phba->pport;
+	nvmewqe->drvrTimeout = (phba->fc_ratov * 3) + LPFC_DRVR_TIMEOUT;
+	nvmewqe->context1 = ndlp;
+
+	for (i = 0; i < rsp->sg_cnt; i++) {
+		sgel = &rsp->sg[i];
+		physaddr = sg_dma_address(sgel);
+		cnt = sg_dma_len(sgel);
+		sgl->addr_hi = putPaddrHigh(physaddr);
+		sgl->addr_lo = putPaddrLow(physaddr);
+		sgl->word2 = 0;
+		bf_set(lpfc_sli4_sge_type, sgl, LPFC_SGE_TYPE_DATA);
+		bf_set(lpfc_sli4_sge_offset, sgl, ctxp->offset);
+		if ((i+1) == rsp->sg_cnt)
+			bf_set(lpfc_sli4_sge_last, sgl, 1);
+		sgl->word2 = cpu_to_le32(sgl->word2);
+		sgl->sge_len = cpu_to_le32(cnt);
+		sgl++;
+		ctxp->offset += cnt;
+	}
+	return nvmewqe;
+}
+
+/**
+ * lpfc_nvmet_sol_fcp_abort_cmp - Completion handler for ABTS
+ * @phba: Pointer to HBA context object.
+ * @cmdwqe: Pointer to driver command WQE object.
+ * @wcqe: Pointer to driver response CQE object.
+ *
+ * The function is called from SLI ring event handler with no
+ * lock held. This function is the completion handler for NVME ABTS for FCP cmds
+ * The function frees memory resources used for the NVME commands.
+ **/
+static void
+lpfc_nvmet_sol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
+			     struct lpfc_wcqe_complete *wcqe)
+{
+	struct lpfc_nvmet_rcv_ctx *ctxp;
+	struct lpfc_nvmet_tgtport *tgtp;
+	uint32_t status, result;
+
+	ctxp = cmdwqe->context2;
+	status = bf_get(lpfc_wcqe_c_status, wcqe);
+	result = wcqe->parameter;
+
+	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+	atomic_inc(&tgtp->xmt_abort_cmpl);
+
+	lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS,
+			"6165 Abort cmpl: xri x%x WCQE: %08x %08x %08x %08x\n",
+			ctxp->oxid, wcqe->word0, wcqe->total_data_placed,
+			result, wcqe->word3);
+
+	ctxp->state = LPFC_NVMET_STE_DONE;
+	lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf);
+
+	cmdwqe->context2 = NULL;
+	cmdwqe->context3 = NULL;
+	lpfc_sli_release_iocbq(phba, cmdwqe);
+}
+
+/**
+ * lpfc_nvmet_xmt_fcp_abort_cmp - Completion handler for ABTS
+ * @phba: Pointer to HBA context object.
+ * @cmdwqe: Pointer to driver command WQE object.
+ * @wcqe: Pointer to driver response CQE object.
+ *
+ * The function is called from SLI ring event handler with no
+ * lock held. This function is the completion handler for NVME ABTS for FCP cmds
+ * The function frees memory resources used for the NVME commands.
+ **/
+static void
+lpfc_nvmet_xmt_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
+			     struct lpfc_wcqe_complete *wcqe)
+{
+	struct lpfc_nvmet_rcv_ctx *ctxp;
+	struct lpfc_nvmet_tgtport *tgtp;
+	uint32_t status, result;
+
+	ctxp = cmdwqe->context2;
+	status = bf_get(lpfc_wcqe_c_status, wcqe);
+	result = wcqe->parameter;
+
+	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+	atomic_inc(&tgtp->xmt_abort_cmpl);
+
+	lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
+			"6070 Abort cmpl: ctx %p WCQE: %08x %08x %08x %08x\n",
+			ctxp, wcqe->word0, wcqe->total_data_placed,
+			result, wcqe->word3);
+
+	if (ctxp) {
+		/* Sanity check */
+		if (ctxp->state != LPFC_NVMET_STE_ABORT) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS,
+					"6112 ABORT Wrong state:%d oxid x%x\n",
+					ctxp->state, ctxp->oxid);
+		}
+		ctxp->state = LPFC_NVMET_STE_DONE;
+		lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf);
+		cmdwqe->context2 = NULL;
+		cmdwqe->context3 = NULL;
+	}
+}
+
+/**
+ * lpfc_nvmet_xmt_ls_abort_cmp - Completion handler for ABTS
+ * @phba: Pointer to HBA context object.
+ * @cmdwqe: Pointer to driver command WQE object.
+ * @wcqe: Pointer to driver response CQE object.
+ *
+ * The function is called from SLI ring event handler with no
+ * lock held. This function is the completion handler for NVME ABTS for LS cmds
+ * The function frees memory resources used for the NVME commands.
+ **/
+static void
+lpfc_nvmet_xmt_ls_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
+			    struct lpfc_wcqe_complete *wcqe)
+{
+	struct lpfc_nvmet_rcv_ctx *ctxp;
+	struct lpfc_nvmet_tgtport *tgtp;
+	uint32_t status, result;
+
+	ctxp = cmdwqe->context2;
+	status = bf_get(lpfc_wcqe_c_status, wcqe);
+	result = wcqe->parameter;
+
+	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+	atomic_inc(&tgtp->xmt_abort_cmpl);
+
+	lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
+			"6083 Abort cmpl: ctx %p WCQE: %08x %08x %08x %08x\n",
+			ctxp, wcqe->word0, wcqe->total_data_placed,
+			result, wcqe->word3);
+
+	if (ctxp) {
+		cmdwqe->context2 = NULL;
+		cmdwqe->context3 = NULL;
+		lpfc_sli_release_iocbq(phba, cmdwqe);
+		kfree(ctxp);
+	} else
+		lpfc_sli_release_iocbq(phba, cmdwqe);
+}
+
+static int
+lpfc_nvmet_unsol_issue_abort(struct lpfc_hba *phba,
+			     struct lpfc_nvmet_rcv_ctx *ctxp,
+			     uint32_t sid, uint16_t xri)
+{
+	struct lpfc_nvmet_tgtport *tgtp;
+	struct lpfc_iocbq *abts_wqeq;
+	union lpfc_wqe *wqe_abts;
+	struct lpfc_nodelist *ndlp;
+
+	lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
+			"6067 %s: Entrypoint: sid %x xri %x\n", __func__,
+			sid, xri);
+
+	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+
+	ndlp = lpfc_findnode_did(phba->pport, sid);
+	if (!ndlp || !NLP_CHK_NODE_ACT(ndlp) ||
+	    ((ndlp->nlp_state != NLP_STE_UNMAPPED_NODE) &&
+	    (ndlp->nlp_state != NLP_STE_MAPPED_NODE))) {
+		atomic_inc(&tgtp->xmt_abort_rsp_error);
+		lpfc_printf_log(phba, KERN_WARNING, LOG_NVME_ABTS,
+				"6134 Drop ABTS - wrong NDLP state x%x.\n",
+				ndlp->nlp_state);
+
+		/* No failure to an ABTS request. */
+		return 0;
+	}
+
+	abts_wqeq = ctxp->wqeq;
+	wqe_abts = &abts_wqeq->wqe;
+	ctxp->state = LPFC_NVMET_STE_ABORT;
+
+	/*
+	 * Since we zero the whole WQE, we need to ensure we set the WQE fields
+	 * that were initialized in lpfc_sli4_nvmet_alloc.
+	 */
+	memset(wqe_abts, 0, sizeof(union lpfc_wqe));
+
+	/* Word 5 */
+	bf_set(wqe_dfctl, &wqe_abts->xmit_sequence.wge_ctl, 0);
+	bf_set(wqe_ls, &wqe_abts->xmit_sequence.wge_ctl, 1);
+	bf_set(wqe_la, &wqe_abts->xmit_sequence.wge_ctl, 0);
+	bf_set(wqe_rctl, &wqe_abts->xmit_sequence.wge_ctl, FC_RCTL_BA_ABTS);
+	bf_set(wqe_type, &wqe_abts->xmit_sequence.wge_ctl, FC_TYPE_BLS);
+
+	/* Word 6 */
+	bf_set(wqe_ctxt_tag, &wqe_abts->xmit_sequence.wqe_com,
+	       phba->sli4_hba.rpi_ids[ndlp->nlp_rpi]);
+	bf_set(wqe_xri_tag, &wqe_abts->xmit_sequence.wqe_com,
+	       abts_wqeq->sli4_xritag);
+
+	/* Word 7 */
+	bf_set(wqe_cmnd, &wqe_abts->xmit_sequence.wqe_com,
+	       CMD_XMIT_SEQUENCE64_WQE);
+	bf_set(wqe_ct, &wqe_abts->xmit_sequence.wqe_com, SLI4_CT_RPI);
+	bf_set(wqe_class, &wqe_abts->xmit_sequence.wqe_com, CLASS3);
+	bf_set(wqe_pu, &wqe_abts->xmit_sequence.wqe_com, 0);
+
+	/* Word 8 */
+	wqe_abts->xmit_sequence.wqe_com.abort_tag = abts_wqeq->iotag;
+
+	/* Word 9 */
+	bf_set(wqe_reqtag, &wqe_abts->xmit_sequence.wqe_com, abts_wqeq->iotag);
+	/* Needs to be set by caller */
+	bf_set(wqe_rcvoxid, &wqe_abts->xmit_sequence.wqe_com, xri);
+
+	/* Word 10 */
+	bf_set(wqe_dbde, &wqe_abts->xmit_sequence.wqe_com, 1);
+	bf_set(wqe_iod, &wqe_abts->xmit_sequence.wqe_com, LPFC_WQE_IOD_WRITE);
+	bf_set(wqe_lenloc, &wqe_abts->xmit_sequence.wqe_com,
+	       LPFC_WQE_LENLOC_WORD12);
+	bf_set(wqe_ebde_cnt, &wqe_abts->xmit_sequence.wqe_com, 0);
+	bf_set(wqe_qosd, &wqe_abts->xmit_sequence.wqe_com, 0);
+
+	/* Word 11 */
+	bf_set(wqe_cqid, &wqe_abts->xmit_sequence.wqe_com,
+	       LPFC_WQE_CQ_ID_DEFAULT);
+	bf_set(wqe_cmd_type, &wqe_abts->xmit_sequence.wqe_com,
+	       OTHER_COMMAND);
+
+	abts_wqeq->vport = phba->pport;
+	abts_wqeq->context1 = ndlp;
+	abts_wqeq->context2 = ctxp;
+	abts_wqeq->context3 = NULL;
+	abts_wqeq->rsvd2 = 0;
+	/* hba_wqidx should already be setup from command we are aborting */
+	abts_wqeq->iocb.ulpCommand = CMD_XMIT_SEQUENCE64_CR;
+	abts_wqeq->iocb.ulpLe = 1;
+
+	lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
+			"6069 Issue ABTS to xri x%x reqtag x%x\n",
+			xri, abts_wqeq->iotag);
+	return 1;
+}
+
+static int
+lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba,
+			       struct lpfc_nvmet_rcv_ctx *ctxp,
+			       uint32_t sid, uint16_t xri)
+{
+	struct lpfc_nvmet_tgtport *tgtp;
+	struct lpfc_iocbq *abts_wqeq;
+	union lpfc_wqe *abts_wqe;
+	struct lpfc_nodelist *ndlp;
+	unsigned long flags;
+	int rc;
+
+	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+	if (!ctxp->wqeq) {
+		ctxp->wqeq = ctxp->rqb_buffer->iocbq;
+		ctxp->wqeq->hba_wqidx = 0;
+	}
+
+	ndlp = lpfc_findnode_did(phba->pport, sid);
+	if (!ndlp || !NLP_CHK_NODE_ACT(ndlp) ||
+	    ((ndlp->nlp_state != NLP_STE_UNMAPPED_NODE) &&
+	    (ndlp->nlp_state != NLP_STE_MAPPED_NODE))) {
+		atomic_inc(&tgtp->xmt_abort_rsp_error);
+		lpfc_printf_log(phba, KERN_WARNING, LOG_NVME_ABTS,
+				"6160 Drop ABTS - wrong NDLP state x%x.\n",
+				ndlp->nlp_state);
+
+		/* No failure to an ABTS request. */
+		return 0;
+	}
+
+	/* Issue ABTS for this WQE based on iotag */
+	ctxp->abort_wqeq = lpfc_sli_get_iocbq(phba);
+	if (!ctxp->abort_wqeq) {
+		lpfc_printf_log(phba, KERN_WARNING, LOG_NVME_ABTS,
+				"6161 Abort failed: No wqeqs: "
+				"xri: x%x\n", ctxp->oxid);
+		/* No failure to an ABTS request. */
+		return 0;
+	}
+	abts_wqeq = ctxp->abort_wqeq;
+	abts_wqe = &abts_wqeq->wqe;
+	ctxp->state = LPFC_NVMET_STE_ABORT;
+
+	/* Announce entry to new IO submit field. */
+	lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS,
+			"6162 Abort Request to rport DID x%06x "
+			"for xri x%x x%x\n",
+			ctxp->sid, ctxp->oxid, ctxp->wqeq->sli4_xritag);
+
+	/* If the hba is getting reset, this flag is set.  It is
+	 * cleared when the reset is complete and rings reestablished.
+	 */
+	spin_lock_irqsave(&phba->hbalock, flags);
+	/* driver queued commands are in process of being flushed */
+	if (phba->hba_flag & HBA_NVME_IOQ_FLUSH) {
+		spin_unlock_irqrestore(&phba->hbalock, flags);
+		lpfc_printf_log(phba, KERN_ERR, LOG_NVME,
+				"6163 Driver in reset cleanup - flushing "
+				"NVME Req now. hba_flag x%x oxid x%x\n",
+				phba->hba_flag, ctxp->oxid);
+		lpfc_sli_release_iocbq(phba, abts_wqeq);
+		return 0;
+	}
+
+	/* Outstanding abort is in progress */
+	if (abts_wqeq->iocb_flag & LPFC_DRIVER_ABORTED) {
+		spin_unlock_irqrestore(&phba->hbalock, flags);
+		lpfc_printf_log(phba, KERN_ERR, LOG_NVME,
+				"6164 Outstanding NVME I/O Abort Request "
+				"still pending on oxid x%x\n",
+				ctxp->oxid);
+		lpfc_sli_release_iocbq(phba, abts_wqeq);
+		return 0;
+	}
+
+	/* Ready - mark outstanding as aborted by driver. */
+	abts_wqeq->iocb_flag |= LPFC_DRIVER_ABORTED;
+
+	/* WQEs are reused.  Clear stale data and set key fields to
+	 * zero like ia, iaab, iaar, xri_tag, and ctxt_tag.
+	 */
+	memset(abts_wqe, 0, sizeof(union lpfc_wqe));
+
+	/* word 3 */
+	bf_set(abort_cmd_criteria, &abts_wqe->abort_cmd, T_XRI_TAG);
+
+	/* word 7 */
+	bf_set(wqe_ct, &abts_wqe->abort_cmd.wqe_com, 0);
+	bf_set(wqe_cmnd, &abts_wqe->abort_cmd.wqe_com, CMD_ABORT_XRI_CX);
+
+	/* word 8 - tell the FW to abort the IO associated with this
+	 * outstanding exchange ID.
+	 */
+	abts_wqe->abort_cmd.wqe_com.abort_tag = ctxp->wqeq->sli4_xritag;
+
+	/* word 9 - this is the iotag for the abts_wqe completion. */
+	bf_set(wqe_reqtag, &abts_wqe->abort_cmd.wqe_com,
+	       abts_wqeq->iotag);
+
+	/* word 10 */
+	bf_set(wqe_qosd, &abts_wqe->abort_cmd.wqe_com, 1);
+	bf_set(wqe_lenloc, &abts_wqe->abort_cmd.wqe_com, LPFC_WQE_LENLOC_NONE);
+
+	/* word 11 */
+	bf_set(wqe_cmd_type, &abts_wqe->abort_cmd.wqe_com, OTHER_COMMAND);
+	bf_set(wqe_wqec, &abts_wqe->abort_cmd.wqe_com, 1);
+	bf_set(wqe_cqid, &abts_wqe->abort_cmd.wqe_com, LPFC_WQE_CQ_ID_DEFAULT);
+
+	/* ABTS WQE must go to the same WQ as the WQE to be aborted */
+	abts_wqeq->hba_wqidx = ctxp->wqeq->hba_wqidx;
+	abts_wqeq->wqe_cmpl = lpfc_nvmet_sol_fcp_abort_cmp;
+	abts_wqeq->iocb_cmpl = 0;
+	abts_wqeq->iocb_flag |= LPFC_IO_NVME;
+	abts_wqeq->context2 = ctxp;
+	rc = lpfc_sli4_issue_wqe(phba, LPFC_FCP_RING, abts_wqeq);
+	spin_unlock_irqrestore(&phba->hbalock, flags);
+	if (rc == WQE_SUCCESS)
+		return 0;
+
+	lpfc_sli_release_iocbq(phba, abts_wqeq);
+	lpfc_printf_log(phba, KERN_ERR, LOG_NVME,
+			"6166 Failed abts issue_wqe with status x%x "
+			"for oxid x%x.\n",
+			rc, ctxp->oxid);
+	return 1;
+}
+
+
+static int
+lpfc_nvmet_unsol_fcp_issue_abort(struct lpfc_hba *phba,
+				 struct lpfc_nvmet_rcv_ctx *ctxp,
+				 uint32_t sid, uint16_t xri)
+{
+	struct lpfc_nvmet_tgtport *tgtp;
+	struct lpfc_iocbq *abts_wqeq;
+	unsigned long flags;
+	int rc;
+
+	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+	if (!ctxp->wqeq) {
+		ctxp->wqeq = ctxp->rqb_buffer->iocbq;
+		ctxp->wqeq->hba_wqidx = 0;
+	}
+
+	rc = lpfc_nvmet_unsol_issue_abort(phba, ctxp, sid, xri);
+	if (rc == 0)
+		goto aerr;
+
+	spin_lock_irqsave(&phba->hbalock, flags);
+	abts_wqeq = ctxp->wqeq;
+	abts_wqeq->wqe_cmpl = lpfc_nvmet_xmt_fcp_abort_cmp;
+	abts_wqeq->iocb_cmpl = 0;
+	abts_wqeq->iocb_flag |= LPFC_IO_NVMET;
+	rc = lpfc_sli4_issue_wqe(phba, LPFC_FCP_RING, abts_wqeq);
+	spin_unlock_irqrestore(&phba->hbalock, flags);
+	if (rc == WQE_SUCCESS) {
+		atomic_inc(&tgtp->xmt_abort_rsp);
+		return 0;
+	}
+
+aerr:
+	lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf);
+	atomic_inc(&tgtp->xmt_abort_rsp_error);
+	lpfc_printf_log(phba, KERN_WARNING, LOG_NVME_ABTS,
+			"6135 Failed to Issue ABTS for oxid x%x. Status x%x\n",
+			ctxp->oxid, rc);
+	return 1;
+}
+
+static int
+lpfc_nvmet_unsol_ls_issue_abort(struct lpfc_hba *phba,
+				struct lpfc_nvmet_rcv_ctx *ctxp,
+				uint32_t sid, uint16_t xri)
+{
+	struct lpfc_nvmet_tgtport *tgtp;
+	struct lpfc_iocbq *abts_wqeq;
+	union lpfc_wqe *wqe_abts;
+	unsigned long flags;
+	int rc;
+
+	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+	if (!ctxp->wqeq) {
+		/* Issue ABTS for this WQE based on iotag */
+		ctxp->wqeq = lpfc_sli_get_iocbq(phba);
+		if (!ctxp->wqeq) {
+			lpfc_printf_log(phba, KERN_WARNING, LOG_NVME_ABTS,
+					"6068 Abort failed: No wqeqs: "
+					"xri: x%x\n", xri);
+			/* No failure to an ABTS request. */
+			kfree(ctxp);
+			return 0;
+		}
+	}
+	abts_wqeq = ctxp->wqeq;
+	wqe_abts = &abts_wqeq->wqe;
+	lpfc_nvmet_unsol_issue_abort(phba, ctxp, sid, xri);
+
+	spin_lock_irqsave(&phba->hbalock, flags);
+	abts_wqeq->wqe_cmpl = lpfc_nvmet_xmt_ls_abort_cmp;
+	abts_wqeq->iocb_cmpl = 0;
+	abts_wqeq->iocb_flag |=  LPFC_IO_NVME_LS;
+	rc = lpfc_sli4_issue_wqe(phba, LPFC_ELS_RING, abts_wqeq);
+	spin_unlock_irqrestore(&phba->hbalock, flags);
+	if (rc == WQE_SUCCESS) {
+		atomic_inc(&tgtp->xmt_abort_rsp);
+		return 0;
+	}
+
+	atomic_inc(&tgtp->xmt_abort_rsp_error);
+	abts_wqeq->context2 = NULL;
+	abts_wqeq->context3 = NULL;
+	lpfc_sli_release_iocbq(phba, abts_wqeq);
+	kfree(ctxp);
+	lpfc_printf_log(phba, KERN_WARNING, LOG_NVME_ABTS,
+			"6056 Failed to Issue ABTS. Status x%x\n", rc);
+	return 0;
+}
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 72164acb6656ae..84565f810392b7 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -2518,7 +2518,7 @@ lpfc_complete_unsol_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 
 	switch (fch_type) {
 	case FC_TYPE_NVME:
-		/* todo: tgt: forward NVME LS to transport */
+		lpfc_nvmet_unsol_ls_event(phba, pring, saveq);
 		return 1;
 	default:
 		break;
@@ -6867,7 +6867,7 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
 			goto out_destroy_queue;
 		}
 		phba->sli4_hba.nvmet_xri_cnt = rc;
-		/* todo: tgt: create targetport */
+		lpfc_nvmet_create_targetport(phba);
 	} else {
 		/* update host scsi xri-sgl sizes and mappings */
 		rc = lpfc_sli4_scsi_sgl_update(phba);
@@ -13137,7 +13137,9 @@ lpfc_sli4_nvmet_handle_rcqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
 
 		if (fc_hdr->fh_type == FC_TYPE_FCP) {
 			dma_buf->bytes_recv = bf_get(lpfc_rcqe_length,  rcqe);
-			/* todo: tgt: forward cmd iu to transport */
+			lpfc_nvmet_unsol_fcp_event(
+				phba, phba->sli4_hba.els_wq->pring, dma_buf,
+				cq->assoc_qp->isr_timestamp);
 			return false;
 		}
 drop:

From 2b65e18202fd0f109b739dd6717286edca7a2b0d Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sun, 12 Feb 2017 13:52:38 -0800
Subject: [PATCH 0159/1911] scsi: lpfc: NVME Target: Add debugfs support

NVME Target: Add debugfs support

Adds debugfs snippets to cover the new NVME target functionality

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_debugfs.c | 303 ++++++++++++++++++++++++++++-
 drivers/scsi/lpfc/lpfc_nvmet.c   | 317 ++++++++++++++++++++++++++++++-
 drivers/scsi/lpfc/lpfc_nvmet.h   |  13 ++
 3 files changed, 628 insertions(+), 5 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index abc39f605960e9..f92796d8dc429d 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -47,6 +47,7 @@
 #include "lpfc.h"
 #include "lpfc_scsi.h"
 #include "lpfc_nvme.h"
+#include "lpfc_nvmet.h"
 #include "lpfc_logmsg.h"
 #include "lpfc_crtn.h"
 #include "lpfc_vport.h"
@@ -543,11 +544,13 @@ lpfc_debugfs_nodelist_data(struct lpfc_vport *vport, char *buf, int size)
 	int len = 0;
 	int cnt;
 	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
+	struct lpfc_hba  *phba = vport->phba;
 	struct lpfc_nodelist *ndlp;
 	unsigned char *statep;
 	struct nvme_fc_local_port *localport;
 	struct lpfc_nvme_lport *lport;
 	struct lpfc_nvme_rport *rport;
+	struct lpfc_nvmet_tgtport *tgtp;
 	struct nvme_fc_remote_port *nrport;
 
 	cnt = (LPFC_NODELIST_SIZE / LPFC_NODELIST_ENTRY_SIZE);
@@ -626,6 +629,27 @@ lpfc_debugfs_nodelist_data(struct lpfc_vport *vport, char *buf, int size)
 	}
 	spin_unlock_irq(shost->host_lock);
 
+	if (phba->nvmet_support && phba->targetport && (vport == phba->pport)) {
+		tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+		len += snprintf(buf + len, size - len,
+				"\nNVME Targetport Entry ...\n");
+
+		/* Port state is only one of two values for now. */
+		if (phba->targetport->port_id)
+			statep = "REGISTERED";
+		else
+			statep = "INIT";
+		len += snprintf(buf + len, size - len,
+				"TGT WWNN x%llx WWPN x%llx State %s\n",
+				wwn_to_u64(vport->fc_nodename.u.wwn),
+				wwn_to_u64(vport->fc_portname.u.wwn),
+				statep);
+		len += snprintf(buf + len, size - len,
+				"    Targetport DID x%06x\n",
+				phba->targetport->port_id);
+		goto out_exit;
+	}
+
 	len += snprintf(buf + len, size - len,
 				"\nNVME Lport/Rport Entries ...\n");
 
@@ -718,9 +742,75 @@ static int
 lpfc_debugfs_nvmestat_data(struct lpfc_vport *vport, char *buf, int size)
 {
 	struct lpfc_hba   *phba = vport->phba;
+	struct lpfc_nvmet_tgtport *tgtp;
 	int len = 0;
 
-	if (phba->nvmet_support == 0) {
+	if (phba->nvmet_support) {
+		if (!phba->targetport)
+			return len;
+		tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+		len += snprintf(buf+len, size-len,
+				"\nNVME Targetport Statistics\n");
+
+		len += snprintf(buf+len, size-len,
+				"LS: Rcv %08x Drop %08x Abort %08x\n",
+				atomic_read(&tgtp->rcv_ls_req_in),
+				atomic_read(&tgtp->rcv_ls_req_drop),
+				atomic_read(&tgtp->xmt_ls_abort));
+		if (atomic_read(&tgtp->rcv_ls_req_in) !=
+		    atomic_read(&tgtp->rcv_ls_req_out)) {
+			len += snprintf(buf+len, size-len,
+					"Rcv LS: in %08x != out %08x\n",
+					atomic_read(&tgtp->rcv_ls_req_in),
+					atomic_read(&tgtp->rcv_ls_req_out));
+		}
+
+		len += snprintf(buf+len, size-len,
+				"LS: Xmt %08x Drop %08x Cmpl %08x Err %08x\n",
+				atomic_read(&tgtp->xmt_ls_rsp),
+				atomic_read(&tgtp->xmt_ls_drop),
+				atomic_read(&tgtp->xmt_ls_rsp_cmpl),
+				atomic_read(&tgtp->xmt_ls_rsp_error));
+
+		len += snprintf(buf+len, size-len,
+				"FCP: Rcv %08x Drop %08x\n",
+				atomic_read(&tgtp->rcv_fcp_cmd_in),
+				atomic_read(&tgtp->rcv_fcp_cmd_drop));
+
+		if (atomic_read(&tgtp->rcv_fcp_cmd_in) !=
+		    atomic_read(&tgtp->rcv_fcp_cmd_out)) {
+			len += snprintf(buf+len, size-len,
+					"Rcv FCP: in %08x != out %08x\n",
+					atomic_read(&tgtp->rcv_fcp_cmd_in),
+					atomic_read(&tgtp->rcv_fcp_cmd_out));
+		}
+
+		len += snprintf(buf+len, size-len,
+				"FCP Rsp: read %08x readrsp %08x write %08x rsp %08x\n",
+				atomic_read(&tgtp->xmt_fcp_read),
+				atomic_read(&tgtp->xmt_fcp_read_rsp),
+				atomic_read(&tgtp->xmt_fcp_write),
+				atomic_read(&tgtp->xmt_fcp_rsp));
+
+		len += snprintf(buf+len, size-len,
+				"FCP Rsp: abort %08x drop %08x\n",
+				atomic_read(&tgtp->xmt_fcp_abort),
+				atomic_read(&tgtp->xmt_fcp_drop));
+
+		len += snprintf(buf+len, size-len,
+				"FCP Rsp Cmpl: %08x err %08x drop %08x\n",
+				atomic_read(&tgtp->xmt_fcp_rsp_cmpl),
+				atomic_read(&tgtp->xmt_fcp_rsp_error),
+				atomic_read(&tgtp->xmt_fcp_rsp_drop));
+
+		len += snprintf(buf+len, size-len,
+				"ABORT: Xmt %08x Err %08x Cmpl %08x",
+				atomic_read(&tgtp->xmt_abort_rsp),
+				atomic_read(&tgtp->xmt_abort_rsp_error),
+				atomic_read(&tgtp->xmt_abort_cmpl));
+
+		len +=  snprintf(buf+len, size-len, "\n");
+	} else {
 		if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME))
 			return len;
 
@@ -828,6 +918,121 @@ lpfc_debugfs_nvmektime_data(struct lpfc_vport *vport, char *buf, int size)
 			phba->ktime_data_samples));
 		return len;
 	}
+
+	/* NVME Target */
+	len += snprintf(buf + len, PAGE_SIZE-len,
+			"ktime %s: Total Samples: %lld %lld\n",
+			(phba->ktime_on ? "Enabled" : "Disabled"),
+			phba->ktime_data_samples,
+			phba->ktime_status_samples);
+	if (phba->ktime_data_samples == 0)
+		return len;
+
+	len += snprintf(buf + len, PAGE_SIZE-len,
+			"Segment 1: MSI-X ISR Rcv cmd -to- "
+			"cmd pass to NVME Layer\n");
+	len += snprintf(buf + len, PAGE_SIZE-len,
+			"avg:%08lld min:%08lld max %08lld\n",
+			phba->ktime_seg1_total /
+			phba->ktime_data_samples,
+			phba->ktime_seg1_min,
+			phba->ktime_seg1_max);
+	len += snprintf(buf + len, PAGE_SIZE-len,
+			"Segment 2: cmd pass to NVME Layer- "
+			"-to- Driver rcv cmd OP (action)\n");
+	len += snprintf(buf + len, PAGE_SIZE-len,
+			"avg:%08lld min:%08lld max %08lld\n",
+			phba->ktime_seg2_total /
+			phba->ktime_data_samples,
+			phba->ktime_seg2_min,
+			phba->ktime_seg2_max);
+	len += snprintf(buf + len, PAGE_SIZE-len,
+			"Segment 3: Driver rcv cmd OP -to- "
+			"Firmware WQ doorbell: cmd\n");
+	len += snprintf(buf + len, PAGE_SIZE-len,
+			"avg:%08lld min:%08lld max %08lld\n",
+			phba->ktime_seg3_total /
+			phba->ktime_data_samples,
+			phba->ktime_seg3_min,
+			phba->ktime_seg3_max);
+	len += snprintf(buf + len, PAGE_SIZE-len,
+			"Segment 4: Firmware WQ doorbell: cmd "
+			"-to- MSI-X ISR for cmd cmpl\n");
+	len += snprintf(buf + len, PAGE_SIZE-len,
+			"avg:%08lld min:%08lld max %08lld\n",
+			phba->ktime_seg4_total /
+			phba->ktime_data_samples,
+			phba->ktime_seg4_min,
+			phba->ktime_seg4_max);
+	len += snprintf(buf + len, PAGE_SIZE-len,
+			"Segment 5: MSI-X ISR for cmd cmpl "
+			"-to- NVME layer passed cmd done\n");
+	len += snprintf(buf + len, PAGE_SIZE-len,
+			"avg:%08lld min:%08lld max %08lld\n",
+			phba->ktime_seg5_total /
+			phba->ktime_data_samples,
+			phba->ktime_seg5_min,
+			phba->ktime_seg5_max);
+
+	if (phba->ktime_status_samples == 0) {
+		len += snprintf(buf + len, PAGE_SIZE-len,
+				"Total: cmd received by MSI-X ISR "
+				"-to- cmd completed on wire\n");
+		len += snprintf(buf + len, PAGE_SIZE-len,
+				"avg:%08lld min:%08lld "
+				"max %08lld\n",
+				phba->ktime_seg10_total /
+				phba->ktime_data_samples,
+				phba->ktime_seg10_min,
+				phba->ktime_seg10_max);
+		return len;
+	}
+
+	len += snprintf(buf + len, PAGE_SIZE-len,
+			"Segment 6: NVME layer passed cmd done "
+			"-to- Driver rcv rsp status OP\n");
+	len += snprintf(buf + len, PAGE_SIZE-len,
+			"avg:%08lld min:%08lld max %08lld\n",
+			phba->ktime_seg6_total /
+			phba->ktime_status_samples,
+			phba->ktime_seg6_min,
+			phba->ktime_seg6_max);
+	len += snprintf(buf + len, PAGE_SIZE-len,
+			"Segment 7: Driver rcv rsp status OP "
+			"-to- Firmware WQ doorbell: status\n");
+	len += snprintf(buf + len, PAGE_SIZE-len,
+			"avg:%08lld min:%08lld max %08lld\n",
+			phba->ktime_seg7_total /
+			phba->ktime_status_samples,
+			phba->ktime_seg7_min,
+			phba->ktime_seg7_max);
+	len += snprintf(buf + len, PAGE_SIZE-len,
+			"Segment 8: Firmware WQ doorbell: status"
+			" -to- MSI-X ISR for status cmpl\n");
+	len += snprintf(buf + len, PAGE_SIZE-len,
+			"avg:%08lld min:%08lld max %08lld\n",
+			phba->ktime_seg8_total /
+			phba->ktime_status_samples,
+			phba->ktime_seg8_min,
+			phba->ktime_seg8_max);
+	len += snprintf(buf + len, PAGE_SIZE-len,
+			"Segment 9: MSI-X ISR for status cmpl  "
+			"-to- NVME layer passed status done\n");
+	len += snprintf(buf + len, PAGE_SIZE-len,
+			"avg:%08lld min:%08lld max %08lld\n",
+			phba->ktime_seg9_total /
+			phba->ktime_status_samples,
+			phba->ktime_seg9_min,
+			phba->ktime_seg9_max);
+	len += snprintf(buf + len, PAGE_SIZE-len,
+			"Total: cmd received by MSI-X ISR -to- "
+			"cmd completed on wire\n");
+	len += snprintf(buf + len, PAGE_SIZE-len,
+			"avg:%08lld min:%08lld max %08lld\n",
+			phba->ktime_seg10_total /
+			phba->ktime_status_samples,
+			phba->ktime_seg10_min,
+			phba->ktime_seg10_max);
 	return len;
 }
 
@@ -953,7 +1158,9 @@ lpfc_debugfs_cpucheck_data(struct lpfc_vport *vport, char *buf, int size)
 	int i;
 	int len = 0;
 	uint32_t tot_xmt = 0;
+	uint32_t tot_rcv = 0;
 	uint32_t tot_cmpl = 0;
+	uint32_t tot_ccmpl = 0;
 
 	if (phba->nvmet_support == 0) {
 		/* NVME Initiator */
@@ -977,6 +1184,33 @@ lpfc_debugfs_cpucheck_data(struct lpfc_vport *vport, char *buf, int size)
 		return len;
 	}
 
+	/* NVME Target */
+	len += snprintf(buf + len, PAGE_SIZE - len,
+			"CPUcheck %s ",
+			(phba->cpucheck_on & LPFC_CHECK_NVMET_IO ?
+				"IO Enabled - " : "IO Disabled - "));
+	len += snprintf(buf + len, PAGE_SIZE - len,
+			"%s\n",
+			(phba->cpucheck_on & LPFC_CHECK_NVMET_RCV ?
+				"Rcv Enabled\n" : "Rcv Disabled\n"));
+	for (i = 0; i < phba->sli4_hba.num_present_cpu; i++) {
+		if (i >= LPFC_CHECK_CPU_CNT)
+			break;
+		len += snprintf(buf + len, PAGE_SIZE - len,
+				"%02d: xmit x%08x ccmpl x%08x "
+				"cmpl x%08x rcv x%08x\n",
+				i, phba->cpucheck_xmt_io[i],
+				phba->cpucheck_ccmpl_io[i],
+				phba->cpucheck_cmpl_io[i],
+				phba->cpucheck_rcv_io[i]);
+		tot_xmt += phba->cpucheck_xmt_io[i];
+		tot_rcv += phba->cpucheck_rcv_io[i];
+		tot_cmpl += phba->cpucheck_cmpl_io[i];
+		tot_ccmpl += phba->cpucheck_ccmpl_io[i];
+	}
+	len += snprintf(buf + len, PAGE_SIZE - len,
+			"tot:xmit x%08x ccmpl x%08x cmpl x%08x rcv x%08x\n",
+			tot_xmt, tot_ccmpl, tot_cmpl, tot_rcv);
 	return len;
 }
 
@@ -1660,6 +1894,65 @@ lpfc_debugfs_nvmestat_open(struct inode *inode, struct file *file)
 	return rc;
 }
 
+static ssize_t
+lpfc_debugfs_nvmestat_write(struct file *file, const char __user *buf,
+			    size_t nbytes, loff_t *ppos)
+{
+	struct lpfc_debug *debug = file->private_data;
+	struct lpfc_vport *vport = (struct lpfc_vport *)debug->i_private;
+	struct lpfc_hba   *phba = vport->phba;
+	struct lpfc_nvmet_tgtport *tgtp;
+	char mybuf[64];
+	char *pbuf;
+
+	if (!phba->targetport)
+		return -ENXIO;
+
+	if (nbytes > 64)
+		nbytes = 64;
+
+	/* Protect copy from user */
+	if (!access_ok(VERIFY_READ, buf, nbytes))
+		return -EFAULT;
+
+	memset(mybuf, 0, sizeof(mybuf));
+
+	if (copy_from_user(mybuf, buf, nbytes))
+		return -EFAULT;
+	pbuf = &mybuf[0];
+
+	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+	if ((strncmp(pbuf, "reset", strlen("reset")) == 0) ||
+	    (strncmp(pbuf, "zero", strlen("zero")) == 0)) {
+		atomic_set(&tgtp->rcv_ls_req_in, 0);
+		atomic_set(&tgtp->rcv_ls_req_out, 0);
+		atomic_set(&tgtp->rcv_ls_req_drop, 0);
+		atomic_set(&tgtp->xmt_ls_abort, 0);
+		atomic_set(&tgtp->xmt_ls_rsp, 0);
+		atomic_set(&tgtp->xmt_ls_drop, 0);
+		atomic_set(&tgtp->xmt_ls_rsp_error, 0);
+		atomic_set(&tgtp->xmt_ls_rsp_cmpl, 0);
+
+		atomic_set(&tgtp->rcv_fcp_cmd_in, 0);
+		atomic_set(&tgtp->rcv_fcp_cmd_out, 0);
+		atomic_set(&tgtp->rcv_fcp_cmd_drop, 0);
+		atomic_set(&tgtp->xmt_fcp_abort, 0);
+		atomic_set(&tgtp->xmt_fcp_drop, 0);
+		atomic_set(&tgtp->xmt_fcp_read_rsp, 0);
+		atomic_set(&tgtp->xmt_fcp_read, 0);
+		atomic_set(&tgtp->xmt_fcp_write, 0);
+		atomic_set(&tgtp->xmt_fcp_rsp, 0);
+		atomic_set(&tgtp->xmt_fcp_rsp_cmpl, 0);
+		atomic_set(&tgtp->xmt_fcp_rsp_error, 0);
+		atomic_set(&tgtp->xmt_fcp_rsp_drop, 0);
+
+		atomic_set(&tgtp->xmt_abort_rsp, 0);
+		atomic_set(&tgtp->xmt_abort_rsp_error, 0);
+		atomic_set(&tgtp->xmt_abort_cmpl, 0);
+	}
+	return nbytes;
+}
+
 static int
 lpfc_debugfs_nvmektime_open(struct inode *inode, struct file *file)
 {
@@ -1956,7 +2249,10 @@ lpfc_debugfs_cpucheck_write(struct file *file, const char __user *buf,
 	pbuf = &mybuf[0];
 
 	if ((strncmp(pbuf, "on", sizeof("on") - 1) == 0)) {
-		phba->cpucheck_on |= LPFC_CHECK_NVME_IO;
+		if (phba->nvmet_support)
+			phba->cpucheck_on |= LPFC_CHECK_NVMET_IO;
+		else
+			phba->cpucheck_on |= LPFC_CHECK_NVME_IO;
 		return strlen(pbuf);
 	} else if ((strncmp(pbuf, "rcv",
 		   sizeof("rcv") - 1) == 0)) {
@@ -2880,7 +3176,7 @@ lpfc_idiag_cqs_for_eq(struct lpfc_hba *phba, char *pbuffer,
 			return 1;
 	}
 
-	if (phba->cfg_nvmet_mrq > eqidx) {
+	if (eqidx < phba->cfg_nvmet_mrq) {
 		/* NVMET CQset */
 		qp = phba->sli4_hba.nvmet_cqset[eqidx];
 		*len = __lpfc_idiag_print_cq(qp, "NVMET CQset", pbuffer, *len);
@@ -4493,6 +4789,7 @@ static const struct file_operations lpfc_debugfs_op_nvmestat = {
 	.open =         lpfc_debugfs_nvmestat_open,
 	.llseek =       lpfc_debugfs_lseek,
 	.read =         lpfc_debugfs_read,
+	.write =	lpfc_debugfs_nvmestat_write,
 	.release =      lpfc_debugfs_release,
 };
 
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c
index d4efd86bb15679..a476d538360f97 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.c
+++ b/drivers/scsi/lpfc/lpfc_nvmet.c
@@ -51,6 +51,7 @@
 #include "lpfc_logmsg.h"
 #include "lpfc_crtn.h"
 #include "lpfc_vport.h"
+#include "lpfc_debugfs.h"
 
 static struct lpfc_iocbq *lpfc_nvmet_prep_ls_wqe(struct lpfc_hba *,
 						 struct lpfc_nvmet_rcv_ctx *,
@@ -103,6 +104,9 @@ lpfc_nvmet_xmt_ls_rsp_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 	ctxp = cmdwqe->context2;
 	rsp = &ctxp->ctx.ls_req;
 
+	lpfc_nvmeio_data(phba, "NVMET LS  CMPL: xri x%x stat x%x result x%x\n",
+			 ctxp->oxid, status, result);
+
 	lpfc_printf_log(phba, KERN_INFO, LOG_NVME_DISC,
 			"6038 %s: Entrypoint: ctx %p status %x/%x\n", __func__,
 			ctxp, status, result);
@@ -144,6 +148,170 @@ lpfc_nvmet_rq_post(struct lpfc_hba *phba, struct lpfc_nvmet_rcv_ctx *ctxp,
 	lpfc_rq_buf_free(phba, mp);
 }
 
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+static void
+lpfc_nvmet_ktime(struct lpfc_hba *phba,
+		 struct lpfc_nvmet_rcv_ctx *ctxp)
+{
+	uint64_t seg1, seg2, seg3, seg4, seg5;
+	uint64_t seg6, seg7, seg8, seg9, seg10;
+
+	if (!phba->ktime_on)
+		return;
+
+	if (!ctxp->ts_isr_cmd || !ctxp->ts_cmd_nvme ||
+	    !ctxp->ts_nvme_data || !ctxp->ts_data_wqput ||
+	    !ctxp->ts_isr_data || !ctxp->ts_data_nvme ||
+	    !ctxp->ts_nvme_status || !ctxp->ts_status_wqput ||
+	    !ctxp->ts_isr_status || !ctxp->ts_status_nvme)
+		return;
+
+	if (ctxp->ts_isr_cmd  > ctxp->ts_cmd_nvme)
+		return;
+	if (ctxp->ts_cmd_nvme > ctxp->ts_nvme_data)
+		return;
+	if (ctxp->ts_nvme_data > ctxp->ts_data_wqput)
+		return;
+	if (ctxp->ts_data_wqput > ctxp->ts_isr_data)
+		return;
+	if (ctxp->ts_isr_data > ctxp->ts_data_nvme)
+		return;
+	if (ctxp->ts_data_nvme > ctxp->ts_nvme_status)
+		return;
+	if (ctxp->ts_nvme_status > ctxp->ts_status_wqput)
+		return;
+	if (ctxp->ts_status_wqput > ctxp->ts_isr_status)
+		return;
+	if (ctxp->ts_isr_status > ctxp->ts_status_nvme)
+		return;
+	/*
+	 * Segment 1 - Time from FCP command received by MSI-X ISR
+	 * to FCP command is passed to NVME Layer.
+	 * Segment 2 - Time from FCP command payload handed
+	 * off to NVME Layer to Driver receives a Command op
+	 * from NVME Layer.
+	 * Segment 3 - Time from Driver receives a Command op
+	 * from NVME Layer to Command is put on WQ.
+	 * Segment 4 - Time from Driver WQ put is done
+	 * to MSI-X ISR for Command cmpl.
+	 * Segment 5 - Time from MSI-X ISR for Command cmpl to
+	 * Command cmpl is passed to NVME Layer.
+	 * Segment 6 - Time from Command cmpl is passed to NVME
+	 * Layer to Driver receives a RSP op from NVME Layer.
+	 * Segment 7 - Time from Driver receives a RSP op from
+	 * NVME Layer to WQ put is done on TRSP FCP Status.
+	 * Segment 8 - Time from Driver WQ put is done on TRSP
+	 * FCP Status to MSI-X ISR for TRSP cmpl.
+	 * Segment 9 - Time from MSI-X ISR for TRSP cmpl to
+	 * TRSP cmpl is passed to NVME Layer.
+	 * Segment 10 - Time from FCP command received by
+	 * MSI-X ISR to command is completed on wire.
+	 * (Segments 1 thru 8) for READDATA / WRITEDATA
+	 * (Segments 1 thru 4) for READDATA_RSP
+	 */
+	seg1 = ctxp->ts_cmd_nvme - ctxp->ts_isr_cmd;
+	seg2 = (ctxp->ts_nvme_data - ctxp->ts_isr_cmd) - seg1;
+	seg3 = (ctxp->ts_data_wqput - ctxp->ts_isr_cmd) -
+		seg1 - seg2;
+	seg4 = (ctxp->ts_isr_data - ctxp->ts_isr_cmd) -
+		seg1 - seg2 - seg3;
+	seg5 = (ctxp->ts_data_nvme - ctxp->ts_isr_cmd) -
+		seg1 - seg2 - seg3 - seg4;
+
+	/* For auto rsp commands seg6 thru seg10 will be 0 */
+	if (ctxp->ts_nvme_status > ctxp->ts_data_nvme) {
+		seg6 = (ctxp->ts_nvme_status -
+			ctxp->ts_isr_cmd) -
+			seg1 - seg2 - seg3 - seg4 - seg5;
+		seg7 = (ctxp->ts_status_wqput -
+			ctxp->ts_isr_cmd) -
+			seg1 - seg2 - seg3 -
+			seg4 - seg5 - seg6;
+		seg8 = (ctxp->ts_isr_status -
+			ctxp->ts_isr_cmd) -
+			seg1 - seg2 - seg3 - seg4 -
+			seg5 - seg6 - seg7;
+		seg9 = (ctxp->ts_status_nvme -
+			ctxp->ts_isr_cmd) -
+			seg1 - seg2 - seg3 - seg4 -
+			seg5 - seg6 - seg7 - seg8;
+		seg10 = (ctxp->ts_isr_status -
+			ctxp->ts_isr_cmd);
+	} else {
+		seg6 =  0;
+		seg7 =  0;
+		seg8 =  0;
+		seg9 =  0;
+		seg10 = (ctxp->ts_isr_data - ctxp->ts_isr_cmd);
+	}
+
+	phba->ktime_seg1_total += seg1;
+	if (seg1 < phba->ktime_seg1_min)
+		phba->ktime_seg1_min = seg1;
+	else if (seg1 > phba->ktime_seg1_max)
+		phba->ktime_seg1_max = seg1;
+
+	phba->ktime_seg2_total += seg2;
+	if (seg2 < phba->ktime_seg2_min)
+		phba->ktime_seg2_min = seg2;
+	else if (seg2 > phba->ktime_seg2_max)
+		phba->ktime_seg2_max = seg2;
+
+	phba->ktime_seg3_total += seg3;
+	if (seg3 < phba->ktime_seg3_min)
+		phba->ktime_seg3_min = seg3;
+	else if (seg3 > phba->ktime_seg3_max)
+		phba->ktime_seg3_max = seg3;
+
+	phba->ktime_seg4_total += seg4;
+	if (seg4 < phba->ktime_seg4_min)
+		phba->ktime_seg4_min = seg4;
+	else if (seg4 > phba->ktime_seg4_max)
+		phba->ktime_seg4_max = seg4;
+
+	phba->ktime_seg5_total += seg5;
+	if (seg5 < phba->ktime_seg5_min)
+		phba->ktime_seg5_min = seg5;
+	else if (seg5 > phba->ktime_seg5_max)
+		phba->ktime_seg5_max = seg5;
+
+	phba->ktime_data_samples++;
+	if (!seg6)
+		goto out;
+
+	phba->ktime_seg6_total += seg6;
+	if (seg6 < phba->ktime_seg6_min)
+		phba->ktime_seg6_min = seg6;
+	else if (seg6 > phba->ktime_seg6_max)
+		phba->ktime_seg6_max = seg6;
+
+	phba->ktime_seg7_total += seg7;
+	if (seg7 < phba->ktime_seg7_min)
+		phba->ktime_seg7_min = seg7;
+	else if (seg7 > phba->ktime_seg7_max)
+		phba->ktime_seg7_max = seg7;
+
+	phba->ktime_seg8_total += seg8;
+	if (seg8 < phba->ktime_seg8_min)
+		phba->ktime_seg8_min = seg8;
+	else if (seg8 > phba->ktime_seg8_max)
+		phba->ktime_seg8_max = seg8;
+
+	phba->ktime_seg9_total += seg9;
+	if (seg9 < phba->ktime_seg9_min)
+		phba->ktime_seg9_min = seg9;
+	else if (seg9 > phba->ktime_seg9_max)
+		phba->ktime_seg9_max = seg9;
+out:
+	phba->ktime_seg10_total += seg10;
+	if (seg10 < phba->ktime_seg10_min)
+		phba->ktime_seg10_min = seg10;
+	else if (seg10 > phba->ktime_seg10_max)
+		phba->ktime_seg10_max = seg10;
+	phba->ktime_status_samples++;
+}
+#endif
+
 /**
  * lpfc_nvmet_xmt_fcp_op_cmp - Completion handler for FCP Response
  * @phba: Pointer to HBA context object.
@@ -162,6 +330,9 @@ lpfc_nvmet_xmt_fcp_op_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 	struct nvmefc_tgt_fcp_req *rsp;
 	struct lpfc_nvmet_rcv_ctx *ctxp;
 	uint32_t status, result, op, start_clean;
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+	uint32_t id;
+#endif
 
 	ctxp = cmdwqe->context2;
 	rsp = &ctxp->ctx.fcp_req;
@@ -174,6 +345,9 @@ lpfc_nvmet_xmt_fcp_op_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 	if (!phba->targetport)
 		goto out;
 
+	lpfc_nvmeio_data(phba, "NVMET FCP CMPL: xri x%x op x%x status x%x\n",
+			 ctxp->oxid, op, status);
+
 	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
 	if (status) {
 		rsp->fcp_error = NVME_SC_DATA_XFER_ERROR;
@@ -194,7 +368,44 @@ lpfc_nvmet_xmt_fcp_op_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 		/* Sanity check */
 		ctxp->state = LPFC_NVMET_STE_DONE;
 		ctxp->entry_cnt++;
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+		if (phba->ktime_on) {
+			if (rsp->op == NVMET_FCOP_READDATA_RSP) {
+				ctxp->ts_isr_data =
+					cmdwqe->isr_timestamp;
+				ctxp->ts_data_nvme =
+					ktime_get_ns();
+				ctxp->ts_nvme_status =
+					ctxp->ts_data_nvme;
+				ctxp->ts_status_wqput =
+					ctxp->ts_data_nvme;
+				ctxp->ts_isr_status =
+					ctxp->ts_data_nvme;
+				ctxp->ts_status_nvme =
+					ctxp->ts_data_nvme;
+			} else {
+				ctxp->ts_isr_status =
+					cmdwqe->isr_timestamp;
+				ctxp->ts_status_nvme =
+					ktime_get_ns();
+			}
+		}
+		if (phba->cpucheck_on & LPFC_CHECK_NVMET_IO) {
+			id = smp_processor_id();
+			if (ctxp->cpu != id)
+				lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
+						"6703 CPU Check cmpl: "
+						"cpu %d expect %d\n",
+						id, ctxp->cpu);
+			if (ctxp->cpu < LPFC_CHECK_CPU_CNT)
+				phba->cpucheck_cmpl_io[id]++;
+		}
+#endif
 		rsp->done(rsp);
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+		if (phba->ktime_on)
+			lpfc_nvmet_ktime(phba, ctxp);
+#endif
 		/* Let Abort cmpl repost the context */
 		if (!(ctxp->flag & LPFC_NVMET_ABORT_OP))
 			lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf);
@@ -203,6 +414,22 @@ lpfc_nvmet_xmt_fcp_op_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 		start_clean = offsetof(struct lpfc_iocbq, wqe);
 		memset(((char *)cmdwqe) + start_clean, 0,
 		       (sizeof(struct lpfc_iocbq) - start_clean));
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+		if (phba->ktime_on) {
+			ctxp->ts_isr_data = cmdwqe->isr_timestamp;
+			ctxp->ts_data_nvme = ktime_get_ns();
+		}
+		if (phba->cpucheck_on & LPFC_CHECK_NVMET_IO) {
+			id = smp_processor_id();
+			if (ctxp->cpu != id)
+				lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
+						"6704 CPU Check cmdcmpl: "
+						"cpu %d expect %d\n",
+						id, ctxp->cpu);
+			if (ctxp->cpu < LPFC_CHECK_CPU_CNT)
+				phba->cpucheck_ccmpl_io[id]++;
+		}
+#endif
 		rsp->done(rsp);
 	}
 }
@@ -254,6 +481,9 @@ lpfc_nvmet_xmt_ls_rsp(struct nvmet_fc_target_port *tgtport,
 	nvmewqeq->iocb_cmpl = NULL;
 	nvmewqeq->context2 = ctxp;
 
+	lpfc_nvmeio_data(phba, "NVMET LS  RESP: xri x%x wqidx x%x len x%x\n",
+			 ctxp->oxid, nvmewqeq->hba_wqidx, rsp->rsplen);
+
 	rc = lpfc_sli4_issue_wqe(phba, LPFC_ELS_RING, nvmewqeq);
 	if (rc == WQE_SUCCESS) {
 		/*
@@ -288,12 +518,39 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport,
 	struct lpfc_hba *phba = ctxp->phba;
 	struct lpfc_iocbq *nvmewqeq;
 	unsigned long iflags;
-	int rc;
+	int rc, id;
+
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+	if (phba->ktime_on) {
+		if (rsp->op == NVMET_FCOP_RSP)
+			ctxp->ts_nvme_status = ktime_get_ns();
+		else
+			ctxp->ts_nvme_data = ktime_get_ns();
+	}
+	if (phba->cpucheck_on & LPFC_CHECK_NVMET_IO) {
+		id = smp_processor_id();
+		ctxp->cpu = id;
+		if (id < LPFC_CHECK_CPU_CNT)
+			phba->cpucheck_xmt_io[id]++;
+		if (rsp->hwqid != id) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
+					"6705 CPU Check OP: "
+					"cpu %d expect %d\n",
+					id, rsp->hwqid);
+			ctxp->cpu = rsp->hwqid;
+		}
+	}
+#endif
 
 	if (rsp->op == NVMET_FCOP_ABORT) {
 		lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
 				"6103 Abort op: oxri x%x %d cnt %d\n",
 				ctxp->oxid, ctxp->state, ctxp->entry_cnt);
+
+		lpfc_nvmeio_data(phba, "NVMET FCP ABRT: "
+				 "xri x%x state x%x cnt x%x\n",
+				 ctxp->oxid, ctxp->state, ctxp->entry_cnt);
+
 		atomic_inc(&lpfc_nvmep->xmt_fcp_abort);
 		ctxp->entry_cnt++;
 		ctxp->flag |= LPFC_NVMET_ABORT_OP;
@@ -330,12 +587,23 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport,
 	nvmewqeq->iocb_flag |=  LPFC_IO_NVMET;
 	ctxp->wqeq->hba_wqidx = rsp->hwqid;
 
+	lpfc_nvmeio_data(phba, "NVMET FCP CMND: xri x%x op x%x len x%x\n",
+			 ctxp->oxid, rsp->op, rsp->rsplen);
+
 	/* For now we take hbalock */
 	spin_lock_irqsave(&phba->hbalock, iflags);
 	rc = lpfc_sli4_issue_wqe(phba, LPFC_FCP_RING, nvmewqeq);
 	spin_unlock_irqrestore(&phba->hbalock, iflags);
 	if (rc == WQE_SUCCESS) {
 		ctxp->flag |= LPFC_NVMET_IO_INP;
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+		if (!phba->ktime_on)
+			return 0;
+		if (rsp->op == NVMET_FCOP_RSP)
+			ctxp->ts_status_wqput = ktime_get_ns();
+		else
+			ctxp->ts_data_wqput = ktime_get_ns();
+#endif
 		return 0;
 	}
 
@@ -503,6 +771,9 @@ lpfc_nvmet_unsol_ls_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 	if (!nvmebuf || !phba->targetport) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
 				"6154 LS Drop IO\n");
+		oxid = 0;
+		size = 0;
+		sid = 0;
 		goto dropit;
 	}
 
@@ -520,6 +791,9 @@ lpfc_nvmet_unsol_ls_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 				"6155 LS Drop IO x%x: Alloc\n",
 				oxid);
 dropit:
+		lpfc_nvmeio_data(phba, "NVMET LS  DROP: "
+				 "xri x%x sz %d from %06x\n",
+				 oxid, size, sid);
 		if (nvmebuf)
 			lpfc_in_buf_free(phba, &nvmebuf->dbuf);
 		return;
@@ -531,6 +805,9 @@ lpfc_nvmet_unsol_ls_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 	ctxp->wqeq = NULL;
 	ctxp->state = LPFC_NVMET_STE_RCV;
 	ctxp->rqb_buffer = (void *)nvmebuf;
+
+	lpfc_nvmeio_data(phba, "NVMET LS   RCV: xri x%x sz %d from %06x\n",
+			 oxid, size, sid);
 	/*
 	 * The calling sequence should be:
 	 * nvmet_fc_rcv_ls_req -> lpfc_nvmet_xmt_ls_rsp/cmp ->_req->done
@@ -545,10 +822,15 @@ lpfc_nvmet_unsol_ls_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 			"%08x %08x %08x\n", __func__, ctxp, size, rc,
 			*payload, *(payload+1), *(payload+2),
 			*(payload+3), *(payload+4), *(payload+5));
+
 	if (rc == 0) {
 		atomic_inc(&tgtp->rcv_ls_req_out);
 		return;
 	}
+
+	lpfc_nvmeio_data(phba, "NVMET LS  DROP: xri x%x sz %d from %06x\n",
+			 oxid, size, sid);
+
 	atomic_inc(&tgtp->rcv_ls_req_drop);
 	lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
 			"6156 LS Drop IO x%x: nvmet_fc_rcv_ls_req %d\n",
@@ -586,11 +868,16 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
 	struct fc_frame_header *fc_hdr;
 	uint32_t *payload;
 	uint32_t size, oxid, sid, rc;
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+	uint32_t id;
+#endif
 
-	oxid = 0;
 	if (!nvmebuf || !phba->targetport) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
 				"6157 FCP Drop IO\n");
+		oxid = 0;
+		size = 0;
+		sid = 0;
 		goto dropit;
 	}
 
@@ -625,6 +912,30 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
 	ctxp->entry_cnt = 1;
 	ctxp->flag = 0;
 
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+	if (phba->ktime_on) {
+		ctxp->ts_isr_cmd = isr_timestamp;
+		ctxp->ts_cmd_nvme = ktime_get_ns();
+		ctxp->ts_nvme_data = 0;
+		ctxp->ts_data_wqput = 0;
+		ctxp->ts_isr_data = 0;
+		ctxp->ts_data_nvme = 0;
+		ctxp->ts_nvme_status = 0;
+		ctxp->ts_status_wqput = 0;
+		ctxp->ts_isr_status = 0;
+		ctxp->ts_status_nvme = 0;
+	}
+
+	if (phba->cpucheck_on & LPFC_CHECK_NVMET_RCV) {
+		id = smp_processor_id();
+		if (id < LPFC_CHECK_CPU_CNT)
+			phba->cpucheck_rcv_io[id]++;
+	}
+#endif
+
+	lpfc_nvmeio_data(phba, "NVMET FCP  RCV: xri x%x sz %d from %06x\n",
+			 oxid, size, sid);
+
 	atomic_inc(&tgtp->rcv_fcp_cmd_in);
 	/*
 	 * The calling sequence should be:
@@ -645,6 +956,8 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
 			"6159 FCP Drop IO x%x: nvmet_fc_rcv_fcp_req x%x\n",
 			ctxp->oxid, rc);
 dropit:
+	lpfc_nvmeio_data(phba, "NVMET FCP DROP: xri x%x sz %d from %06x\n",
+			 oxid, size, sid);
 	if (oxid) {
 		lpfc_nvmet_unsol_fcp_issue_abort(phba, ctxp, sid, oxid);
 		return;
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.h b/drivers/scsi/lpfc/lpfc_nvmet.h
index f7b6a3f374a451..4aa18d31a5f7b9 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.h
+++ b/drivers/scsi/lpfc/lpfc_nvmet.h
@@ -98,4 +98,17 @@ struct lpfc_nvmet_rcv_ctx {
 #define LPFC_NVMET_IO_INP		1
 #define LPFC_NVMET_ABORT_OP		2
 	struct rqb_dmabuf *rqb_buffer;
+
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+	uint64_t ts_isr_cmd;
+	uint64_t ts_cmd_nvme;
+	uint64_t ts_nvme_data;
+	uint64_t ts_data_wqput;
+	uint64_t ts_isr_data;
+	uint64_t ts_data_nvme;
+	uint64_t ts_nvme_status;
+	uint64_t ts_status_wqput;
+	uint64_t ts_isr_status;
+	uint64_t ts_status_nvme;
+#endif
 };

From d080abe0a8693c58a4b35b74bab3526d8b554ca3 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sun, 12 Feb 2017 13:52:39 -0800
Subject: [PATCH 0160/1911] scsi: lpfc: Update copyrights

Update copyrights to 2017 for all files touched in this patch set

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/Makefile         | 4 +++-
 drivers/scsi/lpfc/lpfc.h           | 4 +++-
 drivers/scsi/lpfc/lpfc_attr.c      | 4 +++-
 drivers/scsi/lpfc/lpfc_attr.h      | 4 +++-
 drivers/scsi/lpfc/lpfc_bsg.c       | 4 +++-
 drivers/scsi/lpfc/lpfc_bsg.h       | 6 ++++--
 drivers/scsi/lpfc/lpfc_compat.h    | 4 +++-
 drivers/scsi/lpfc/lpfc_crtn.h      | 4 +++-
 drivers/scsi/lpfc/lpfc_ct.c        | 4 +++-
 drivers/scsi/lpfc/lpfc_debugfs.c   | 4 +++-
 drivers/scsi/lpfc/lpfc_debugfs.h   | 4 +++-
 drivers/scsi/lpfc/lpfc_disc.h      | 4 +++-
 drivers/scsi/lpfc/lpfc_els.c       | 4 +++-
 drivers/scsi/lpfc/lpfc_hbadisc.c   | 4 +++-
 drivers/scsi/lpfc/lpfc_hw.h        | 4 +++-
 drivers/scsi/lpfc/lpfc_hw4.h       | 6 ++++--
 drivers/scsi/lpfc/lpfc_ids.h       | 4 +++-
 drivers/scsi/lpfc/lpfc_init.c      | 6 ++++--
 drivers/scsi/lpfc/lpfc_logmsg.h    | 4 +++-
 drivers/scsi/lpfc/lpfc_mbox.c      | 4 +++-
 drivers/scsi/lpfc/lpfc_mem.c       | 4 +++-
 drivers/scsi/lpfc/lpfc_nl.h        | 4 +++-
 drivers/scsi/lpfc/lpfc_nportdisc.c | 4 +++-
 drivers/scsi/lpfc/lpfc_nvme.c      | 4 +++-
 drivers/scsi/lpfc/lpfc_nvme.h      | 4 +++-
 drivers/scsi/lpfc/lpfc_nvmet.c     | 6 ++++--
 drivers/scsi/lpfc/lpfc_nvmet.h     | 4 +++-
 drivers/scsi/lpfc/lpfc_scsi.c      | 4 +++-
 drivers/scsi/lpfc/lpfc_scsi.h      | 4 +++-
 drivers/scsi/lpfc/lpfc_sli.c       | 4 +++-
 drivers/scsi/lpfc/lpfc_sli.h       | 4 +++-
 drivers/scsi/lpfc/lpfc_sli4.h      | 4 +++-
 drivers/scsi/lpfc/lpfc_version.h   | 8 ++++++--
 drivers/scsi/lpfc/lpfc_vport.c     | 4 +++-
 drivers/scsi/lpfc/lpfc_vport.h     | 4 +++-
 35 files changed, 112 insertions(+), 40 deletions(-)

diff --git a/drivers/scsi/lpfc/Makefile b/drivers/scsi/lpfc/Makefile
index 30a6a35abafdc9..cb6aa802c48e48 100644
--- a/drivers/scsi/lpfc/Makefile
+++ b/drivers/scsi/lpfc/Makefile
@@ -1,9 +1,11 @@
 #/*******************************************************************
 # * This file is part of the Emulex Linux Device Driver for         *
 # * Fibre Channel Host Bus Adapters.                                *
+# * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+# * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
 # * Copyright (C) 2004-2012 Emulex.  All rights reserved.           *
 # * EMULEX and SLI are trademarks of Emulex.                        *
-# * www.emulex.com                                                  *
+# * www.broadcom.com                                                *
 # *                                                                 *
 # * This program is free software; you can redistribute it and/or   *
 # * modify it under the terms of version 2 of the GNU General       *
diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index 8a4090c6771c8f..0bba2e30b4f09f 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index 700a68f303f377..4114cf4308d0fe 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
diff --git a/drivers/scsi/lpfc/lpfc_attr.h b/drivers/scsi/lpfc/lpfc_attr.h
index b2bd28e965faf2..d56dafcdd563b6 100644
--- a/drivers/scsi/lpfc/lpfc_attr.h
+++ b/drivers/scsi/lpfc/lpfc_attr.h
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
diff --git a/drivers/scsi/lpfc/lpfc_bsg.c b/drivers/scsi/lpfc/lpfc_bsg.c
index 55a2270cadfcfb..18157d2840a3b0 100644
--- a/drivers/scsi/lpfc/lpfc_bsg.c
+++ b/drivers/scsi/lpfc/lpfc_bsg.c
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
  * Copyright (C) 2009-2015 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
  * modify it under the terms of version 2 of the GNU General       *
diff --git a/drivers/scsi/lpfc/lpfc_bsg.h b/drivers/scsi/lpfc/lpfc_bsg.h
index f2247aa4fa1736..e7d95a4e8042fd 100644
--- a/drivers/scsi/lpfc/lpfc_bsg.h
+++ b/drivers/scsi/lpfc/lpfc_bsg.h
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2010-2015 Emulex.  All rights reserved.                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
+ * Copyright (C) 2010-2015 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
  * modify it under the terms of version 2 of the GNU General       *
diff --git a/drivers/scsi/lpfc/lpfc_compat.h b/drivers/scsi/lpfc/lpfc_compat.h
index c88e556ea62e54..6b32b0ae750666 100644
--- a/drivers/scsi/lpfc/lpfc_compat.h
+++ b/drivers/scsi/lpfc/lpfc_compat.h
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
  * Copyright (C) 2004-2011 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
  * modify it under the terms of version 2 of the GNU General       *
diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h
index f56015b7852598..843dd73004da02 100644
--- a/drivers/scsi/lpfc/lpfc_crtn.h
+++ b/drivers/scsi/lpfc/lpfc_crtn.h
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
  * modify it under the terms of version 2 of the GNU General       *
diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c
index 53bdfe4212512c..c22bb3f887e15b 100644
--- a/drivers/scsi/lpfc/lpfc_ct.c
+++ b/drivers/scsi/lpfc/lpfc_ct.c
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
  * modify it under the terms of version 2 of the GNU General       *
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index f92796d8dc429d..599fde4ea8b103 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
  * Copyright (C) 2007-2015 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
  * modify it under the terms of version 2 of the GNU General       *
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.h b/drivers/scsi/lpfc/lpfc_debugfs.h
index 5312e0f9deb64a..c05f56c3023f1e 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.h
+++ b/drivers/scsi/lpfc/lpfc_debugfs.h
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
  * Copyright (C) 2007-2011 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
  * modify it under the terms of version 2 of the GNU General       *
diff --git a/drivers/scsi/lpfc/lpfc_disc.h b/drivers/scsi/lpfc/lpfc_disc.h
index e305e97e05a94a..f4ff99d95db343 100644
--- a/drivers/scsi/lpfc/lpfc_disc.h
+++ b/drivers/scsi/lpfc/lpfc_disc.h
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
  * Copyright (C) 2004-2013 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
  * modify it under the terms of version 2 of the GNU General       *
diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index 1c0ac509e6db1c..2d26440e6f2fe6 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index 542797bd3134ba..194a14d5f8a982 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
diff --git a/drivers/scsi/lpfc/lpfc_hw.h b/drivers/scsi/lpfc/lpfc_hw.h
index c2221377e246cf..15ca2148415055 100644
--- a/drivers/scsi/lpfc/lpfc_hw.h
+++ b/drivers/scsi/lpfc/lpfc_hw.h
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
  * modify it under the terms of version 2 of the GNU General       *
diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h
index 0fddb23178752f..cfdb068a3bfccb 100644
--- a/drivers/scsi/lpfc/lpfc_hw4.h
+++ b/drivers/scsi/lpfc/lpfc_hw4.h
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2009-2016 Emulex.  All rights reserved.                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
+ * Copyright (C) 2009-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
  * modify it under the terms of version 2 of the GNU General       *
diff --git a/drivers/scsi/lpfc/lpfc_ids.h b/drivers/scsi/lpfc/lpfc_ids.h
index 5733feafe25ffb..0ba3733eb36d07 100644
--- a/drivers/scsi/lpfc/lpfc_ids.h
+++ b/drivers/scsi/lpfc/lpfc_ids.h
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 5eafd0db758dd5..0ee429d773f394 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
@@ -12117,5 +12119,5 @@ module_init(lpfc_init);
 module_exit(lpfc_exit);
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION(LPFC_MODULE_DESC);
-MODULE_AUTHOR("Emulex Corporation - tech.support@emulex.com");
+MODULE_AUTHOR("Broadcom");
 MODULE_VERSION("0:" LPFC_DRIVER_VERSION);
diff --git a/drivers/scsi/lpfc/lpfc_logmsg.h b/drivers/scsi/lpfc/lpfc_logmsg.h
index 3faf7a07bfd416..3b654ad08d1f99 100644
--- a/drivers/scsi/lpfc/lpfc_logmsg.h
+++ b/drivers/scsi/lpfc/lpfc_logmsg.h
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
  * Copyright (C) 2004-2009 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
  * modify it under the terms of version 2 of the GNU General       *
diff --git a/drivers/scsi/lpfc/lpfc_mbox.c b/drivers/scsi/lpfc/lpfc_mbox.c
index 8f4bfdfd9910eb..a928f5187fa46b 100644
--- a/drivers/scsi/lpfc/lpfc_mbox.c
+++ b/drivers/scsi/lpfc/lpfc_mbox.c
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
diff --git a/drivers/scsi/lpfc/lpfc_mem.c b/drivers/scsi/lpfc/lpfc_mem.c
index 2437ec5f48631d..c61d8d692edeee 100644
--- a/drivers/scsi/lpfc/lpfc_mem.c
+++ b/drivers/scsi/lpfc/lpfc_mem.c
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
  * Copyright (C) 2004-2014 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
diff --git a/drivers/scsi/lpfc/lpfc_nl.h b/drivers/scsi/lpfc/lpfc_nl.h
index f2b1bbcb196ff4..b93e78f671fbc2 100644
--- a/drivers/scsi/lpfc/lpfc_nl.h
+++ b/drivers/scsi/lpfc/lpfc_nl.h
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
  * Copyright (C) 2010 Emulex.  All rights reserved.                *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
  * modify it under the terms of version 2 of the GNU General       *
diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c
index 0716818f269f13..061626bdf70106 100644
--- a/drivers/scsi/lpfc/lpfc_nportdisc.c
+++ b/drivers/scsi/lpfc/lpfc_nportdisc.c
@@ -1,9 +1,11 @@
  /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c
index 56b4b94a372e52..625b6589a34deb 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.c
+++ b/drivers/scsi/lpfc/lpfc_nvme.c
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
diff --git a/drivers/scsi/lpfc/lpfc_nvme.h b/drivers/scsi/lpfc/lpfc_nvme.h
index 85961c1ddecfd0..b2fae5e813f8a8 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.h
+++ b/drivers/scsi/lpfc/lpfc_nvme.h
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c
index a476d538360f97..c421e1738ee989 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.c
+++ b/drivers/scsi/lpfc/lpfc_nvmet.c
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
- * Fibre Channsel Host Bus Adapters.                                *
+ * Fibre Channsel Host Bus Adapters.                               *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.h b/drivers/scsi/lpfc/lpfc_nvmet.h
index 4aa18d31a5f7b9..ca96f05c1604f5 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.h
+++ b/drivers/scsi/lpfc/lpfc_nvmet.h
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index 6ba2b341233737..9d6384af9fce7e 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
diff --git a/drivers/scsi/lpfc/lpfc_scsi.h b/drivers/scsi/lpfc/lpfc_scsi.h
index def0c0a5b17e10..5da7e15400cbc1 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.h
+++ b/drivers/scsi/lpfc/lpfc_scsi.h
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
  * modify it under the terms of version 2 of the GNU General       *
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 84565f810392b7..d8d8693040acd0 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
diff --git a/drivers/scsi/lpfc/lpfc_sli.h b/drivers/scsi/lpfc/lpfc_sli.h
index f6cea02adc7c2e..9085306ddd785d 100644
--- a/drivers/scsi/lpfc/lpfc_sli.h
+++ b/drivers/scsi/lpfc/lpfc_sli.h
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
  * modify it under the terms of version 2 of the GNU General       *
diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h
index 3bfdff1a4c53cd..91153c9f6d1825 100644
--- a/drivers/scsi/lpfc/lpfc_sli4.h
+++ b/drivers/scsi/lpfc/lpfc_sli4.h
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
  * Copyright (C) 2009-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
  * modify it under the terms of version 2 of the GNU General       *
diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h
index 0ee0623a354c03..0d935351407132 100644
--- a/drivers/scsi/lpfc/lpfc_version.h
+++ b/drivers/scsi/lpfc/lpfc_version.h
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
  * modify it under the terms of version 2 of the GNU General       *
@@ -30,4 +32,6 @@
 
 #define LPFC_MODULE_DESC "Emulex LightPulse Fibre Channel SCSI driver " \
 		LPFC_DRIVER_VERSION
-#define LPFC_COPYRIGHT "Copyright(c) 2004-2016 Emulex.  All rights reserved."
+#define LPFC_COPYRIGHT "Copyright (C) 2017 Broadcom. All Rights Reserved. " \
+		"The term \"Broadcom\" refers to Broadcom Limited " \
+		"and/or its subsidiaries."
diff --git a/drivers/scsi/lpfc/lpfc_vport.c b/drivers/scsi/lpfc/lpfc_vport.c
index b0d94f2e5b44a7..c0eeea0694cbbb 100644
--- a/drivers/scsi/lpfc/lpfc_vport.c
+++ b/drivers/scsi/lpfc/lpfc_vport.c
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
diff --git a/drivers/scsi/lpfc/lpfc_vport.h b/drivers/scsi/lpfc/lpfc_vport.h
index 6b2c94eb813430..62295971f66cf7 100644
--- a/drivers/scsi/lpfc/lpfc_vport.h
+++ b/drivers/scsi/lpfc/lpfc_vport.h
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
  * Copyright (C) 2004-2006 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *

From 693dcc316525d15fcf5a0a8a8e1663985ebdb02c Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sun, 12 Feb 2017 13:52:40 -0800
Subject: [PATCH 0161/1911] scsi: lpfc: Update lpfc version to 11.2.0.7

Update lpfc version to 11.2.0.7

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_version.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h
index 0d935351407132..86c6c9b26b823a 100644
--- a/drivers/scsi/lpfc/lpfc_version.h
+++ b/drivers/scsi/lpfc/lpfc_version.h
@@ -20,7 +20,7 @@
  * included with this package.                                     *
  *******************************************************************/
 
-#define LPFC_DRIVER_VERSION "11.2.0.4"
+#define LPFC_DRIVER_VERSION "11.2.0.7"
 #define LPFC_DRIVER_NAME		"lpfc"
 
 /* Used for SLI 2/3 */

From 75d943f3e4222fdd4f907b71bd9d1730412e7ce9 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Wed, 22 Feb 2017 08:20:47 -0800
Subject: [PATCH 0162/1911] scsi: lpfc: add missing Kconfig NVME dependencies

Add missing Kconfig NVME dependencies.

Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index d4023bf1e739d2..255843446e99b3 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -1240,6 +1240,7 @@ config SCSI_LPFC
 	tristate "Emulex LightPulse Fibre Channel Support"
 	depends on PCI && SCSI
 	depends on SCSI_FC_ATTRS
+	depends on NVME_FC && NVME_TARGET_FC
 	select CRC_T10DIF
 	help
           This lpfc driver supports the Emulex LightPulse

From f54f2cb540b53d55a81d620e816810d59be5cb1b Mon Sep 17 00:00:00 2001
From: Michael Hernandez <michael.hernandez@cavium.com>
Date: Wed, 15 Feb 2017 15:37:19 -0800
Subject: [PATCH 0163/1911] scsi: qla2xxx: Cleaned up queue configuration code.

This patch cleaned up queue configuration code, such that once
initialized, we should not touch msix_count value.  This will prevent
incorrect numbers of MSI-X vectors requested while performing target
mode configuration.

[mkp: fixed Fixes: hash]

Cc: <stable@vger.kernel.org>
Fixes: d74595278f4a ("scsi: qla2xxx: Add multiple queue pair functionality.")
Signed-off-by: Michael Hernandez <michael.hernandez@cavium.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qla2xxx/qla_os.c | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index d01c90c7dd04f0..8174cee8eb53db 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -1815,6 +1815,7 @@ qla2x00_iospace_config(struct qla_hw_data *ha)
 
 	/* Determine queue resources */
 	ha->max_req_queues = ha->max_rsp_queues = 1;
+	ha->msix_count = QLA_BASE_VECTORS;
 	if (!ql2xmqsupport || (!IS_QLA25XX(ha) && !IS_QLA81XX(ha)))
 		goto mqiobase_exit;
 
@@ -1842,9 +1843,8 @@ qla2x00_iospace_config(struct qla_hw_data *ha)
 		    "BAR 3 not enabled.\n");
 
 mqiobase_exit:
-	ha->msix_count = ha->max_rsp_queues + 1;
 	ql_dbg_pci(ql_dbg_init, ha->pdev, 0x001c,
-	    "MSIX Count:%d.\n", ha->msix_count);
+	    "MSIX Count: %d.\n", ha->msix_count);
 	return (0);
 
 iospace_error_exit:
@@ -1892,6 +1892,7 @@ qla83xx_iospace_config(struct qla_hw_data *ha)
 	/* 83XX 26XX always use MQ type access for queues
 	 * - mbar 2, a.k.a region 4 */
 	ha->max_req_queues = ha->max_rsp_queues = 1;
+	ha->msix_count = QLA_BASE_VECTORS;
 	ha->mqiobase = ioremap(pci_resource_start(ha->pdev, 4),
 			pci_resource_len(ha->pdev, 4));
 
@@ -1934,14 +1935,8 @@ qla83xx_iospace_config(struct qla_hw_data *ha)
 		    "BAR 1 not enabled.\n");
 
 mqiobase_exit:
-	ha->msix_count = ha->max_rsp_queues + 1;
-	if (QLA_TGT_MODE_ENABLED())
-		ha->msix_count++;
-
-	qlt_83xx_iospace_config(ha);
-
 	ql_dbg_pci(ql_dbg_init, ha->pdev, 0x011f,
-	    "MSIX Count:%d.\n", ha->msix_count);
+	    "MSIX Count: %d.\n", ha->msix_count);
 	return 0;
 
 iospace_error_exit:

From d0d2c68b759bbf678e078fd0c71b5fde65a9392c Mon Sep 17 00:00:00 2001
From: Michael Hernandez <michael.hernandez@cavium.com>
Date: Wed, 15 Feb 2017 15:37:20 -0800
Subject: [PATCH 0164/1911] scsi: qla2xxx: Fix response queue count for Target
 mode.

Target mode initialization was not calculating response queue values
correctly resulting into one less MSI-X vector.

[mkp: fixed Fixes: hash]

Cc: <stable@vger.kernel.org>
Fixes: 093df73771ba ("scsi: qla2xxx: Fix Target mode handling with Multiqueue changes.")
Signed-off-by: Michael Hernandez <michael.hernandez@cavium.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qla2xxx/qla_os.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 8174cee8eb53db..71b6b20ae82b94 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -1916,12 +1916,13 @@ qla83xx_iospace_config(struct qla_hw_data *ha)
 		if (ql2xmqsupport) {
 			/* MB interrupt uses 1 vector */
 			ha->max_req_queues = ha->msix_count - 1;
-			ha->max_rsp_queues = ha->max_req_queues;
 
 			/* ATIOQ needs 1 vector. That's 1 less QPair */
 			if (QLA_TGT_MODE_ENABLED())
 				ha->max_req_queues--;
 
+			ha->max_rsp_queues = ha->max_req_queues;
+
 			/* Queue pairs is the max value minus
 			 * the base queue pair */
 			ha->max_qpairs = ha->max_req_queues - 1;

From 67f2db8792f96d8f7521461635d25f9c80245d80 Mon Sep 17 00:00:00 2001
From: Michael Hernandez <michael.hernandez@cavium.com>
Date: Wed, 15 Feb 2017 15:37:21 -0800
Subject: [PATCH 0165/1911] scsi: qla2xxx: Fix Regression introduced by
 pci_alloc_irq_vectors_affinity call.

For target mode, we need to increase minimum vectors value by one to
account for ATIO queue.

Following stack trace will be seen

Call Trace:
qla24xx_config_rings+0x15a/0x230 [qla2xxx]
qla2x00_init_rings+0x1a1/0x3a0 [qla2xxx]
qla2x00_restart_isp+0x5c/0x120 [qla2xxx]
qla2x00_abort_isp+0x138/0x430 [qla2xxx]
? __schedule+0x260/0x580
qla2x00_do_dpc+0x3bc/0x920 [qla2xxx]
? qla2x00_relogin+0x290/0x290 [qla2xxx]
? schedule+0x3a/0xa0
? qla2x00_relogin+0x290/0x290 [qla2xxx]
kthread+0x103/0x140
? __kthread_init_worker+0x40/0x40
ret_from_fork+0x29/0x40

RIP: qlt_24xx_config_rings+0x6c/0x90

[mkp: fixed Fixes: hash]

Cc: <stable@vger.kernel.org>
Fixes: 17e5fc58588b ("scsi: qla2xxx: fix MSI-X vector affinity")
Signed-off-by: Michael Hernandez <michael.hernandez@cavium.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qla2xxx/qla_isr.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index edc2264db45bec..87fc921ce3915d 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -3015,14 +3015,17 @@ qla24xx_enable_msix(struct qla_hw_data *ha, struct rsp_que *rsp)
 	int i, ret;
 	struct qla_msix_entry *qentry;
 	scsi_qla_host_t *vha = pci_get_drvdata(ha->pdev);
+	int min_vecs = QLA_BASE_VECTORS;
 	struct irq_affinity desc = {
 		.pre_vectors = QLA_BASE_VECTORS,
 	};
 
-	if (QLA_TGT_MODE_ENABLED() && IS_ATIO_MSIX_CAPABLE(ha))
+	if (QLA_TGT_MODE_ENABLED() && IS_ATIO_MSIX_CAPABLE(ha)) {
 		desc.pre_vectors++;
+		min_vecs++;
+	}
 
-	ret = pci_alloc_irq_vectors_affinity(ha->pdev, QLA_BASE_VECTORS,
+	ret = pci_alloc_irq_vectors_affinity(ha->pdev, min_vecs,
 			ha->msix_count, PCI_IRQ_MSIX | PCI_IRQ_AFFINITY,
 			&desc);
 

From 61d8658b4a435eac729966cc94cdda077a8df5cd Mon Sep 17 00:00:00 2001
From: "Dupuis, Chad" <chad.dupuis@cavium.com>
Date: Wed, 15 Feb 2017 06:28:23 -0800
Subject: [PATCH 0166/1911] scsi: qedf: Add QLogic FastLinQ offload FCoE driver
 framework.

The QLogic FastLinQ Driver for FCoE (qedf) is the FCoE specific module
for 41000 Series Converged Network Adapters by QLogic. This patch
consists of following changes:

- MAINTAINERS Makefile and Kconfig changes for qedf
- PCI driver registration
- libfc/fcoe host level initialization
- SCSI host template initialization and callbacks
- Debugfs and log level infrastructure
- Link handling
- Firmware interface structures
- QED core module initialization
- Light L2 interface callbacks
- I/O request initialization
- Firmware I/O completion handling
- Firmware ELS request/response handling
- FIP request/response handled by the driver itself

Signed-off-by: Nilesh Javali <nilesh.javali@cavium.com>
Signed-off-by: Manish Rangankar <manish.rangankar@cavium.com>
Signed-off-by: Saurav Kashyap <saurav.kashyap@cavium.com>
Signed-off-by: Arun Easi <arun.easi@cavium.com>
Signed-off-by: Chad Dupuis <chad.dupuis@cavium.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 MAINTAINERS                      |    6 +
 drivers/scsi/Kconfig             |    1 +
 drivers/scsi/Makefile            |    1 +
 drivers/scsi/qedf/Kconfig        |   11 +
 drivers/scsi/qedf/Makefile       |    5 +
 drivers/scsi/qedf/qedf.h         |  545 +++++
 drivers/scsi/qedf/qedf_attr.c    |  165 ++
 drivers/scsi/qedf/qedf_dbg.c     |  195 ++
 drivers/scsi/qedf/qedf_dbg.h     |  154 ++
 drivers/scsi/qedf/qedf_debugfs.c |  460 ++++
 drivers/scsi/qedf/qedf_els.c     |  949 +++++++++
 drivers/scsi/qedf/qedf_fip.c     |  269 +++
 drivers/scsi/qedf/qedf_hsi.h     |  422 ++++
 drivers/scsi/qedf/qedf_io.c      | 2282 ++++++++++++++++++++
 drivers/scsi/qedf/qedf_main.c    | 3336 ++++++++++++++++++++++++++++++
 drivers/scsi/qedf/qedf_version.h |   15 +
 16 files changed, 8816 insertions(+)
 create mode 100644 drivers/scsi/qedf/Kconfig
 create mode 100644 drivers/scsi/qedf/Makefile
 create mode 100644 drivers/scsi/qedf/qedf.h
 create mode 100644 drivers/scsi/qedf/qedf_attr.c
 create mode 100644 drivers/scsi/qedf/qedf_dbg.c
 create mode 100644 drivers/scsi/qedf/qedf_dbg.h
 create mode 100644 drivers/scsi/qedf/qedf_debugfs.c
 create mode 100644 drivers/scsi/qedf/qedf_els.c
 create mode 100644 drivers/scsi/qedf/qedf_fip.c
 create mode 100644 drivers/scsi/qedf/qedf_hsi.h
 create mode 100644 drivers/scsi/qedf/qedf_io.c
 create mode 100644 drivers/scsi/qedf/qedf_main.c
 create mode 100644 drivers/scsi/qedf/qedf_version.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 545633d6663d67..8a0e01477043c2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10242,6 +10242,12 @@ L:	linux-scsi@vger.kernel.org
 S:	Supported
 F:	drivers/scsi/qedi/
 
+QLOGIC QL41xxx FCOE DRIVER
+M:	QLogic-Storage-Upstream@cavium.com
+L:	linux-scsi@vger.kernel.org
+S:	Supported
+F:	drivers/scsi/qedf/
+
 QNX4 FILESYSTEM
 M:	Anders Larsen <al@alarsen.net>
 W:	http://www.alarsen.net/linux/qnx4fs/
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 255843446e99b3..8aa9bd34123e64 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -1235,6 +1235,7 @@ config SCSI_QLOGICPTI
 source "drivers/scsi/qla2xxx/Kconfig"
 source "drivers/scsi/qla4xxx/Kconfig"
 source "drivers/scsi/qedi/Kconfig"
+source "drivers/scsi/qedf/Kconfig"
 
 config SCSI_LPFC
 	tristate "Emulex LightPulse Fibre Channel Support"
diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile
index 736b77414a4baa..fc2855565a51fd 100644
--- a/drivers/scsi/Makefile
+++ b/drivers/scsi/Makefile
@@ -41,6 +41,7 @@ obj-$(CONFIG_FCOE)		+= fcoe/
 obj-$(CONFIG_FCOE_FNIC)		+= fnic/
 obj-$(CONFIG_SCSI_SNIC)		+= snic/
 obj-$(CONFIG_SCSI_BNX2X_FCOE)	+= libfc/ fcoe/ bnx2fc/
+obj-$(CONFIG_QEDF)		+= qedf/
 obj-$(CONFIG_ISCSI_TCP) 	+= libiscsi.o	libiscsi_tcp.o iscsi_tcp.o
 obj-$(CONFIG_INFINIBAND_ISER) 	+= libiscsi.o
 obj-$(CONFIG_ISCSI_BOOT_SYSFS)	+= iscsi_boot_sysfs.o
diff --git a/drivers/scsi/qedf/Kconfig b/drivers/scsi/qedf/Kconfig
new file mode 100644
index 00000000000000..943f5ee45807b1
--- /dev/null
+++ b/drivers/scsi/qedf/Kconfig
@@ -0,0 +1,11 @@
+config QEDF
+	tristate "QLogic QEDF 25/40/100Gb FCoE Initiator Driver Support"
+	depends on PCI && SCSI
+	depends on QED
+        depends on LIBFC
+        depends on LIBFCOE
+	select QED_LL2
+	select QED_FCOE
+	---help---
+	This driver supports FCoE offload for the QLogic FastLinQ
+	41000 Series Converged Network Adapters.
diff --git a/drivers/scsi/qedf/Makefile b/drivers/scsi/qedf/Makefile
new file mode 100644
index 00000000000000..64e9f507ce3286
--- /dev/null
+++ b/drivers/scsi/qedf/Makefile
@@ -0,0 +1,5 @@
+obj-$(CONFIG_QEDF) := qedf.o
+qedf-y = qedf_dbg.o qedf_main.o qedf_io.o qedf_fip.o \
+	 qedf_attr.o qedf_els.o
+
+qedf-$(CONFIG_DEBUG_FS) += qedf_debugfs.o
diff --git a/drivers/scsi/qedf/qedf.h b/drivers/scsi/qedf/qedf.h
new file mode 100644
index 00000000000000..96346a1b1515e8
--- /dev/null
+++ b/drivers/scsi/qedf/qedf.h
@@ -0,0 +1,545 @@
+/*
+ *  QLogic FCoE Offload Driver
+ *  Copyright (c) 2016 Cavium Inc.
+ *
+ *  This software is available under the terms of the GNU General Public License
+ *  (GPL) Version 2, available from the file COPYING in the main directory of
+ *  this source tree.
+ */
+#ifndef _QEDFC_H_
+#define _QEDFC_H_
+
+#include <scsi/libfcoe.h>
+#include <scsi/libfc.h>
+#include <scsi/fc/fc_fip.h>
+#include <scsi/fc/fc_fc2.h>
+#include <scsi/scsi_tcq.h>
+#include <scsi/fc_encode.h>
+#include <linux/version.h>
+
+
+/* qedf_hsi.h needs to before included any qed includes */
+#include "qedf_hsi.h"
+
+#include <linux/qed/qed_if.h>
+#include <linux/qed/qed_fcoe_if.h>
+#include <linux/qed/qed_ll2_if.h>
+#include "qedf_version.h"
+#include "qedf_dbg.h"
+
+/* Helpers to extract upper and lower 32-bits of pointer */
+#define U64_HI(val) ((u32)(((u64)(val)) >> 32))
+#define U64_LO(val) ((u32)(((u64)(val)) & 0xffffffff))
+
+#define QEDF_DESCR "QLogic FCoE Offload Driver"
+#define QEDF_MODULE_NAME "qedf"
+
+#define QEDF_MIN_XID		0
+#define QEDF_MAX_SCSI_XID	(NUM_TASKS_PER_CONNECTION - 1)
+#define QEDF_MAX_ELS_XID	4095
+#define QEDF_FLOGI_RETRY_CNT	3
+#define QEDF_RPORT_RETRY_CNT	255
+#define QEDF_MAX_SESSIONS	1024
+#define QEDF_MAX_PAYLOAD	2048
+#define QEDF_MAX_BDS_PER_CMD	256
+#define QEDF_MAX_BD_LEN		0xffff
+#define QEDF_BD_SPLIT_SZ	0x1000
+#define QEDF_PAGE_SIZE		4096
+#define QED_HW_DMA_BOUNDARY     0xfff
+#define QEDF_MAX_SGLEN_FOR_CACHESGL		((1U << 16) - 1)
+#define QEDF_MFS		(QEDF_MAX_PAYLOAD + \
+	sizeof(struct fc_frame_header))
+#define QEDF_MAX_NPIV		64
+#define QEDF_TM_TIMEOUT		10
+#define QEDF_ABORT_TIMEOUT	10
+#define QEDF_CLEANUP_TIMEOUT	10
+#define QEDF_MAX_CDB_LEN	16
+
+#define UPSTREAM_REMOVE		1
+#define UPSTREAM_KEEP		1
+
+struct qedf_mp_req {
+	uint8_t tm_flags;
+
+	uint32_t req_len;
+	void *req_buf;
+	dma_addr_t req_buf_dma;
+	struct fcoe_sge *mp_req_bd;
+	dma_addr_t mp_req_bd_dma;
+	struct fc_frame_header req_fc_hdr;
+
+	uint32_t resp_len;
+	void *resp_buf;
+	dma_addr_t resp_buf_dma;
+	struct fcoe_sge *mp_resp_bd;
+	dma_addr_t mp_resp_bd_dma;
+	struct fc_frame_header resp_fc_hdr;
+};
+
+struct qedf_els_cb_arg {
+	struct qedf_ioreq *aborted_io_req;
+	struct qedf_ioreq *io_req;
+	u8 op; /* Used to keep track of ELS op */
+	uint16_t l2_oxid;
+	u32 offset; /* Used for sequence cleanup */
+	u8 r_ctl; /* Used for sequence cleanup */
+};
+
+enum qedf_ioreq_event {
+	QEDF_IOREQ_EV_ABORT_SUCCESS,
+	QEDF_IOREQ_EV_ABORT_FAILED,
+	QEDF_IOREQ_EV_SEND_RRQ,
+	QEDF_IOREQ_EV_ELS_TMO,
+	QEDF_IOREQ_EV_ELS_ERR_DETECT,
+	QEDF_IOREQ_EV_ELS_FLUSH,
+	QEDF_IOREQ_EV_CLEANUP_SUCCESS,
+	QEDF_IOREQ_EV_CLEANUP_FAILED,
+};
+
+#define FC_GOOD		0
+#define FCOE_FCP_RSP_FLAGS_FCP_RESID_OVER	(0x1<<2)
+#define FCOE_FCP_RSP_FLAGS_FCP_RESID_UNDER	(0x1<<3)
+#define CMD_SCSI_STATUS(Cmnd)			((Cmnd)->SCp.Status)
+#define FCOE_FCP_RSP_FLAGS_FCP_RSP_LEN_VALID	(0x1<<0)
+#define FCOE_FCP_RSP_FLAGS_FCP_SNS_LEN_VALID	(0x1<<1)
+struct qedf_ioreq {
+	struct list_head link;
+	uint16_t xid;
+	struct scsi_cmnd *sc_cmd;
+	bool use_slowpath; /* Use slow SGL for this I/O */
+#define QEDF_SCSI_CMD		1
+#define QEDF_TASK_MGMT_CMD	2
+#define QEDF_ABTS		3
+#define QEDF_ELS		4
+#define QEDF_CLEANUP		5
+#define QEDF_SEQ_CLEANUP	6
+	u8 cmd_type;
+#define QEDF_CMD_OUTSTANDING		0x0
+#define QEDF_CMD_IN_ABORT		0x1
+#define QEDF_CMD_IN_CLEANUP		0x2
+#define QEDF_CMD_SRR_SENT		0x3
+	u8 io_req_flags;
+	struct qedf_rport *fcport;
+	unsigned long flags;
+	enum qedf_ioreq_event event;
+	size_t data_xfer_len;
+	struct kref refcount;
+	struct qedf_cmd_mgr *cmd_mgr;
+	struct io_bdt *bd_tbl;
+	struct delayed_work timeout_work;
+	struct completion tm_done;
+	struct completion abts_done;
+	struct fcoe_task_context *task;
+	int idx;
+/*
+ * Need to allocate enough room for both sense data and FCP response data
+ * which has a max length of 8 bytes according to spec.
+ */
+#define QEDF_SCSI_SENSE_BUFFERSIZE	(SCSI_SENSE_BUFFERSIZE + 8)
+	uint8_t *sense_buffer;
+	dma_addr_t sense_buffer_dma;
+	u32 fcp_resid;
+	u32 fcp_rsp_len;
+	u32 fcp_sns_len;
+	u8 cdb_status;
+	u8 fcp_status;
+	u8 fcp_rsp_code;
+	u8 scsi_comp_flags;
+#define QEDF_MAX_REUSE		0xfff
+	u16 reuse_count;
+	struct qedf_mp_req mp_req;
+	void (*cb_func)(struct qedf_els_cb_arg *cb_arg);
+	struct qedf_els_cb_arg *cb_arg;
+	int fp_idx;
+	unsigned int cpu;
+	unsigned int int_cpu;
+#define QEDF_IOREQ_SLOW_SGE		0
+#define QEDF_IOREQ_SINGLE_SGE		1
+#define QEDF_IOREQ_FAST_SGE		2
+	u8 sge_type;
+	struct delayed_work rrq_work;
+
+	/* Used for sequence level recovery; i.e. REC/SRR */
+	uint32_t rx_buf_off;
+	uint32_t tx_buf_off;
+	uint32_t rx_id;
+	uint32_t task_retry_identifier;
+
+	/*
+	 * Used to tell if we need to return a SCSI command
+	 * during some form of error processing.
+	 */
+	bool return_scsi_cmd_on_abts;
+};
+
+extern struct workqueue_struct *qedf_io_wq;
+
+struct qedf_rport {
+	spinlock_t rport_lock;
+#define QEDF_RPORT_SESSION_READY 1
+#define QEDF_RPORT_UPLOADING_CONNECTION	2
+	unsigned long flags;
+	unsigned long retry_delay_timestamp;
+	struct fc_rport *rport;
+	struct fc_rport_priv *rdata;
+	struct qedf_ctx *qedf;
+	u32 handle; /* Handle from qed */
+	u32 fw_cid; /* fw_cid from qed */
+	void __iomem *p_doorbell;
+	/* Send queue management */
+	atomic_t free_sqes;
+	atomic_t num_active_ios;
+	struct fcoe_wqe *sq;
+	dma_addr_t sq_dma;
+	u16 sq_prod_idx;
+	u16 fw_sq_prod_idx;
+	u16 sq_con_idx;
+	u32 sq_mem_size;
+	void *sq_pbl;
+	dma_addr_t sq_pbl_dma;
+	u32 sq_pbl_size;
+	u32 sid;
+#define	QEDF_RPORT_TYPE_DISK		1
+#define	QEDF_RPORT_TYPE_TAPE		2
+	uint dev_type; /* Disk or tape */
+	struct list_head peers;
+};
+
+/* Used to contain LL2 skb's in ll2_skb_list */
+struct qedf_skb_work {
+	struct work_struct work;
+	struct sk_buff *skb;
+	struct qedf_ctx *qedf;
+};
+
+struct qedf_fastpath {
+#define	QEDF_SB_ID_NULL		0xffff
+	u16		sb_id;
+	struct qed_sb_info	*sb_info;
+	struct qedf_ctx *qedf;
+	/* Keep track of number of completions on this fastpath */
+	unsigned long completions;
+	uint32_t cq_num_entries;
+};
+
+/* Used to pass fastpath information needed to process CQEs */
+struct qedf_io_work {
+	struct work_struct work;
+	struct fcoe_cqe cqe;
+	struct qedf_ctx *qedf;
+	struct fc_frame *fp;
+};
+
+struct qedf_glbl_q_params {
+	u64	hw_p_cq;	/* Completion queue PBL */
+	u64	hw_p_rq;	/* Request queue PBL */
+	u64	hw_p_cmdq;	/* Command queue PBL */
+};
+
+struct global_queue {
+	struct fcoe_cqe *cq;
+	dma_addr_t cq_dma;
+	u32 cq_mem_size;
+	u32 cq_cons_idx; /* Completion queue consumer index */
+	u32 cq_prod_idx;
+
+	void *cq_pbl;
+	dma_addr_t cq_pbl_dma;
+	u32 cq_pbl_size;
+};
+
+/* I/O tracing entry */
+#define QEDF_IO_TRACE_SIZE		2048
+struct qedf_io_log {
+#define QEDF_IO_TRACE_REQ		0
+#define QEDF_IO_TRACE_RSP		1
+	uint8_t direction;
+	uint16_t task_id;
+	uint32_t port_id; /* Remote port fabric ID */
+	int lun;
+	char op; /* SCSI CDB */
+	uint8_t lba[4];
+	unsigned int bufflen; /* SCSI buffer length */
+	unsigned int sg_count; /* Number of SG elements */
+	int result; /* Result passed back to mid-layer */
+	unsigned long jiffies; /* Time stamp when I/O logged */
+	int refcount; /* Reference count for task id */
+	unsigned int req_cpu; /* CPU that the task is queued on */
+	unsigned int int_cpu; /* Interrupt CPU that the task is received on */
+	unsigned int rsp_cpu; /* CPU that task is returned on */
+	u8 sge_type; /* Did we take the slow, single or fast SGE path */
+};
+
+/* Number of entries in BDQ */
+#define QEDF_BDQ_SIZE			256
+#define QEDF_BDQ_BUF_SIZE		2072
+
+/* DMA coherent buffers for BDQ */
+struct qedf_bdq_buf {
+	void *buf_addr;
+	dma_addr_t buf_dma;
+};
+
+/* Main adapter struct */
+struct qedf_ctx {
+	struct qedf_dbg_ctx dbg_ctx;
+	struct fcoe_ctlr ctlr;
+	struct fc_lport *lport;
+	u8 data_src_addr[ETH_ALEN];
+#define QEDF_LINK_DOWN		0
+#define QEDF_LINK_UP		1
+	atomic_t link_state;
+#define QEDF_DCBX_PENDING	0
+#define QEDF_DCBX_DONE		1
+	atomic_t dcbx;
+	uint16_t max_scsi_xid;
+	uint16_t max_els_xid;
+#define QEDF_NULL_VLAN_ID	-1
+#define QEDF_FALLBACK_VLAN	1002
+#define QEDF_DEFAULT_PRIO	3
+	int vlan_id;
+	uint vlan_hw_insert:1;
+	struct qed_dev *cdev;
+	struct qed_dev_fcoe_info dev_info;
+	struct qed_int_info int_info;
+	uint16_t last_command;
+	spinlock_t hba_lock;
+	struct pci_dev *pdev;
+	u64 wwnn;
+	u64 wwpn;
+	u8 __aligned(16) mac[ETH_ALEN];
+	struct list_head fcports;
+	atomic_t num_offloads;
+	unsigned int curr_conn_id;
+	struct workqueue_struct *ll2_recv_wq;
+	struct workqueue_struct *link_update_wq;
+	struct delayed_work link_update;
+	struct delayed_work link_recovery;
+	struct completion flogi_compl;
+	struct completion fipvlan_compl;
+
+	/*
+	 * Used to tell if we're in the window where we are waiting for
+	 * the link to come back up before informting fcoe that the link is
+	 * done.
+	 */
+	atomic_t link_down_tmo_valid;
+#define QEDF_TIMER_INTERVAL		(1 * HZ)
+	struct timer_list timer; /* One second book keeping timer */
+#define QEDF_DRAIN_ACTIVE		1
+#define QEDF_LL2_STARTED		2
+#define QEDF_UNLOADING			3
+#define QEDF_GRCDUMP_CAPTURE		4
+#define QEDF_IN_RECOVERY		5
+#define QEDF_DBG_STOP_IO		6
+	unsigned long flags; /* Miscellaneous state flags */
+	int fipvlan_retries;
+	u8 num_queues;
+	struct global_queue **global_queues;
+	/* Pointer to array of queue structures */
+	struct qedf_glbl_q_params *p_cpuq;
+	/* Physical address of array of queue structures */
+	dma_addr_t hw_p_cpuq;
+
+	struct qedf_bdq_buf bdq[QEDF_BDQ_SIZE];
+	void *bdq_pbl;
+	dma_addr_t bdq_pbl_dma;
+	size_t bdq_pbl_mem_size;
+	void *bdq_pbl_list;
+	dma_addr_t bdq_pbl_list_dma;
+	u8 bdq_pbl_list_num_entries;
+	void __iomem *bdq_primary_prod;
+	void __iomem *bdq_secondary_prod;
+	uint16_t bdq_prod_idx;
+
+	/* Structure for holding all the fastpath for this qedf_ctx */
+	struct qedf_fastpath *fp_array;
+	struct qed_fcoe_tid tasks;
+	struct qedf_cmd_mgr *cmd_mgr;
+	/* Holds the PF parameters we pass to qed to start he FCoE function */
+	struct qed_pf_params pf_params;
+	/* Used to time middle path ELS and TM commands */
+	struct workqueue_struct *timer_work_queue;
+
+#define QEDF_IO_WORK_MIN		64
+	mempool_t *io_mempool;
+	struct workqueue_struct *dpc_wq;
+
+	u32 slow_sge_ios;
+	u32 fast_sge_ios;
+	u32 single_sge_ios;
+
+	uint8_t	*grcdump;
+	uint32_t grcdump_size;
+
+	struct qedf_io_log io_trace_buf[QEDF_IO_TRACE_SIZE];
+	spinlock_t io_trace_lock;
+	uint16_t io_trace_idx;
+
+	bool stop_io_on_error;
+
+	u32 flogi_cnt;
+	u32 flogi_failed;
+
+	/* Used for fc statistics */
+	u64 input_requests;
+	u64 output_requests;
+	u64 control_requests;
+	u64 packet_aborts;
+	u64 alloc_failures;
+};
+
+struct io_bdt {
+	struct qedf_ioreq *io_req;
+	struct fcoe_sge *bd_tbl;
+	dma_addr_t bd_tbl_dma;
+	u16 bd_valid;
+};
+
+struct qedf_cmd_mgr {
+	struct qedf_ctx *qedf;
+	u16 idx;
+	struct io_bdt **io_bdt_pool;
+#define FCOE_PARAMS_NUM_TASKS		4096
+	struct qedf_ioreq cmds[FCOE_PARAMS_NUM_TASKS];
+	spinlock_t lock;
+	atomic_t free_list_cnt;
+};
+
+/* Stolen from qed_cxt_api.h and adapted for qed_fcoe_info
+ * Usage:
+ *
+ * void *ptr;
+ * ptr = qedf_get_task_mem(&qedf->tasks, 128);
+ */
+static inline void *qedf_get_task_mem(struct qed_fcoe_tid *info, u32 tid)
+{
+	return (void *)(info->blocks[tid / info->num_tids_per_block] +
+			(tid % info->num_tids_per_block) * info->size);
+}
+
+static inline void qedf_stop_all_io(struct qedf_ctx *qedf)
+{
+	set_bit(QEDF_DBG_STOP_IO, &qedf->flags);
+}
+
+/*
+ * Externs
+ */
+#define QEDF_DEFAULT_LOG_MASK		0x3CFB6
+extern const struct qed_fcoe_ops *qed_ops;
+extern uint qedf_dump_frames;
+extern uint qedf_io_tracing;
+extern uint qedf_stop_io_on_error;
+extern uint qedf_link_down_tmo;
+#define QEDF_RETRY_DELAY_MAX		20 /* 2 seconds */
+extern bool qedf_retry_delay;
+extern uint qedf_debug;
+
+extern struct qedf_cmd_mgr *qedf_cmd_mgr_alloc(struct qedf_ctx *qedf);
+extern void qedf_cmd_mgr_free(struct qedf_cmd_mgr *cmgr);
+extern int qedf_queuecommand(struct Scsi_Host *host,
+	struct scsi_cmnd *sc_cmd);
+extern void qedf_fip_send(struct fcoe_ctlr *fip, struct sk_buff *skb);
+extern void qedf_update_src_mac(struct fc_lport *lport, u8 *addr);
+extern u8 *qedf_get_src_mac(struct fc_lport *lport);
+extern void qedf_fip_recv(struct qedf_ctx *qedf, struct sk_buff *skb);
+extern void qedf_fcoe_send_vlan_req(struct qedf_ctx *qedf);
+extern void qedf_scsi_completion(struct qedf_ctx *qedf, struct fcoe_cqe *cqe,
+	struct qedf_ioreq *io_req);
+extern void qedf_process_warning_compl(struct qedf_ctx *qedf,
+	struct fcoe_cqe *cqe, struct qedf_ioreq *io_req);
+extern void qedf_process_error_detect(struct qedf_ctx *qedf,
+	struct fcoe_cqe *cqe, struct qedf_ioreq *io_req);
+extern void qedf_flush_active_ios(struct qedf_rport *fcport, int lun);
+extern void qedf_release_cmd(struct kref *ref);
+extern int qedf_initiate_abts(struct qedf_ioreq *io_req,
+	bool return_scsi_cmd_on_abts);
+extern void qedf_process_abts_compl(struct qedf_ctx *qedf, struct fcoe_cqe *cqe,
+	struct qedf_ioreq *io_req);
+extern struct qedf_ioreq *qedf_alloc_cmd(struct qedf_rport *fcport,
+	u8 cmd_type);
+
+extern struct device_attribute *qedf_host_attrs[];
+extern void qedf_cmd_timer_set(struct qedf_ctx *qedf, struct qedf_ioreq *io_req,
+	unsigned int timer_msec);
+extern int qedf_init_mp_req(struct qedf_ioreq *io_req);
+extern void qedf_init_mp_task(struct qedf_ioreq *io_req,
+	struct fcoe_task_context *task_ctx);
+extern void qedf_add_to_sq(struct qedf_rport *fcport, u16 xid,
+	u32 ptu_invalidate, enum fcoe_task_type req_type, u32 offset);
+extern void qedf_ring_doorbell(struct qedf_rport *fcport);
+extern void qedf_process_els_compl(struct qedf_ctx *qedf, struct fcoe_cqe *cqe,
+	struct qedf_ioreq *els_req);
+extern int qedf_send_rrq(struct qedf_ioreq *aborted_io_req);
+extern int qedf_send_adisc(struct qedf_rport *fcport, struct fc_frame *fp);
+extern int qedf_initiate_cleanup(struct qedf_ioreq *io_req,
+	bool return_scsi_cmd_on_abts);
+extern void qedf_process_cleanup_compl(struct qedf_ctx *qedf,
+	struct fcoe_cqe *cqe, struct qedf_ioreq *io_req);
+extern int qedf_initiate_tmf(struct scsi_cmnd *sc_cmd, u8 tm_flags);
+extern void qedf_process_tmf_compl(struct qedf_ctx *qedf, struct fcoe_cqe *cqe,
+	struct qedf_ioreq *io_req);
+extern void qedf_process_cqe(struct qedf_ctx *qedf, struct fcoe_cqe *cqe);
+extern void qedf_scsi_done(struct qedf_ctx *qedf, struct qedf_ioreq *io_req,
+	int result);
+extern void qedf_set_vlan_id(struct qedf_ctx *qedf, int vlan_id);
+extern void qedf_create_sysfs_ctx_attr(struct qedf_ctx *qedf);
+extern void qedf_remove_sysfs_ctx_attr(struct qedf_ctx *qedf);
+extern void qedf_capture_grc_dump(struct qedf_ctx *qedf);
+extern void qedf_wait_for_upload(struct qedf_ctx *qedf);
+extern void qedf_process_unsol_compl(struct qedf_ctx *qedf, uint16_t que_idx,
+	struct fcoe_cqe *cqe);
+extern void qedf_restart_rport(struct qedf_rport *fcport);
+extern int qedf_send_rec(struct qedf_ioreq *orig_io_req);
+extern int qedf_post_io_req(struct qedf_rport *fcport,
+	struct qedf_ioreq *io_req);
+extern void qedf_process_seq_cleanup_compl(struct qedf_ctx *qedf,
+	struct fcoe_cqe *cqe, struct qedf_ioreq *io_req);
+extern int qedf_send_flogi(struct qedf_ctx *qedf);
+extern void qedf_fp_io_handler(struct work_struct *work);
+
+#define FCOE_WORD_TO_BYTE  4
+#define QEDF_MAX_TASK_NUM	0xFFFF
+
+struct fip_vlan {
+	struct ethhdr eth;
+	struct fip_header fip;
+	struct {
+		struct fip_mac_desc mac;
+		struct fip_wwn_desc wwnn;
+	} desc;
+};
+
+/* SQ/CQ Sizes */
+#define GBL_RSVD_TASKS			16
+#define NUM_TASKS_PER_CONNECTION	1024
+#define NUM_RW_TASKS_PER_CONNECTION	512
+#define FCOE_PARAMS_CQ_NUM_ENTRIES	FCOE_PARAMS_NUM_TASKS
+
+#define FCOE_PARAMS_CMDQ_NUM_ENTRIES	FCOE_PARAMS_NUM_TASKS
+#define SQ_NUM_ENTRIES			NUM_TASKS_PER_CONNECTION
+
+#define QEDF_FCOE_PARAMS_GL_RQ_PI              0
+#define QEDF_FCOE_PARAMS_GL_CMD_PI             1
+
+#define QEDF_READ                     (1 << 1)
+#define QEDF_WRITE                    (1 << 0)
+#define MAX_FIBRE_LUNS			0xffffffff
+
+#define QEDF_MAX_NUM_CQS		8
+
+/*
+ * PCI function probe defines
+ */
+/* Probe/remove called during normal PCI probe */
+#define	QEDF_MODE_NORMAL		0
+/* Probe/remove called from qed error recovery */
+#define QEDF_MODE_RECOVERY		1
+
+#define SUPPORTED_25000baseKR_Full    (1<<27)
+#define SUPPORTED_50000baseKR2_Full   (1<<28)
+#define SUPPORTED_100000baseKR4_Full  (1<<29)
+#define SUPPORTED_100000baseCR4_Full  (1<<30)
+
+#endif
diff --git a/drivers/scsi/qedf/qedf_attr.c b/drivers/scsi/qedf/qedf_attr.c
new file mode 100644
index 00000000000000..47720611ad2c5e
--- /dev/null
+++ b/drivers/scsi/qedf/qedf_attr.c
@@ -0,0 +1,165 @@
+/*
+ *  QLogic FCoE Offload Driver
+ *  Copyright (c) 2016 Cavium Inc.
+ *
+ *  This software is available under the terms of the GNU General Public License
+ *  (GPL) Version 2, available from the file COPYING in the main directory of
+ *  this source tree.
+ */
+#include "qedf.h"
+
+static ssize_t
+qedf_fcoe_mac_show(struct device *dev,
+	struct device_attribute *attr, char *buf)
+{
+	struct fc_lport *lport = shost_priv(class_to_shost(dev));
+	u32 port_id;
+	u8 lport_src_id[3];
+	u8 fcoe_mac[6];
+
+	port_id = fc_host_port_id(lport->host);
+	lport_src_id[2] = (port_id & 0x000000FF);
+	lport_src_id[1] = (port_id & 0x0000FF00) >> 8;
+	lport_src_id[0] = (port_id & 0x00FF0000) >> 16;
+	fc_fcoe_set_mac(fcoe_mac, lport_src_id);
+
+	return scnprintf(buf, PAGE_SIZE, "%pM\n", fcoe_mac);
+}
+
+static DEVICE_ATTR(fcoe_mac, S_IRUGO, qedf_fcoe_mac_show, NULL);
+
+struct device_attribute *qedf_host_attrs[] = {
+	&dev_attr_fcoe_mac,
+	NULL,
+};
+
+extern const struct qed_fcoe_ops *qed_ops;
+
+inline bool qedf_is_vport(struct qedf_ctx *qedf)
+{
+	return (!(qedf->lport->vport == NULL));
+}
+
+/* Get base qedf for physical port from vport */
+static struct qedf_ctx *qedf_get_base_qedf(struct qedf_ctx *qedf)
+{
+	struct fc_lport *lport;
+	struct fc_lport *base_lport;
+
+	if (!(qedf_is_vport(qedf)))
+		return NULL;
+
+	lport = qedf->lport;
+	base_lport = shost_priv(vport_to_shost(lport->vport));
+	return (struct qedf_ctx *)(lport_priv(base_lport));
+}
+
+void qedf_capture_grc_dump(struct qedf_ctx *qedf)
+{
+	struct qedf_ctx *base_qedf;
+
+	/* Make sure we use the base qedf to take the GRC dump */
+	if (qedf_is_vport(qedf))
+		base_qedf = qedf_get_base_qedf(qedf);
+	else
+		base_qedf = qedf;
+
+	if (test_bit(QEDF_GRCDUMP_CAPTURE, &base_qedf->flags)) {
+		QEDF_INFO(&(base_qedf->dbg_ctx), QEDF_LOG_INFO,
+		    "GRC Dump already captured.\n");
+		return;
+	}
+
+
+	qedf_get_grc_dump(base_qedf->cdev, qed_ops->common,
+	    &base_qedf->grcdump, &base_qedf->grcdump_size);
+	QEDF_ERR(&(base_qedf->dbg_ctx), "GRC Dump captured.\n");
+	set_bit(QEDF_GRCDUMP_CAPTURE, &base_qedf->flags);
+	qedf_uevent_emit(base_qedf->lport->host, QEDF_UEVENT_CODE_GRCDUMP,
+	    NULL);
+}
+
+static ssize_t
+qedf_sysfs_read_grcdump(struct file *filep, struct kobject *kobj,
+			struct bin_attribute *ba, char *buf, loff_t off,
+			size_t count)
+{
+	ssize_t ret = 0;
+	struct fc_lport *lport = shost_priv(dev_to_shost(container_of(kobj,
+							struct device, kobj)));
+	struct qedf_ctx *qedf = lport_priv(lport);
+
+	if (test_bit(QEDF_GRCDUMP_CAPTURE, &qedf->flags)) {
+		ret = memory_read_from_buffer(buf, count, &off,
+		    qedf->grcdump, qedf->grcdump_size);
+	} else {
+		QEDF_ERR(&(qedf->dbg_ctx), "GRC Dump not captured!\n");
+	}
+
+	return ret;
+}
+
+static ssize_t
+qedf_sysfs_write_grcdump(struct file *filep, struct kobject *kobj,
+			struct bin_attribute *ba, char *buf, loff_t off,
+			size_t count)
+{
+	struct fc_lport *lport = NULL;
+	struct qedf_ctx *qedf = NULL;
+	long reading;
+	int ret = 0;
+	char msg[40];
+
+	if (off != 0)
+		return ret;
+
+
+	lport = shost_priv(dev_to_shost(container_of(kobj,
+	    struct device, kobj)));
+	qedf = lport_priv(lport);
+
+	buf[1] = 0;
+	ret = kstrtol(buf, 10, &reading);
+	if (ret) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Invalid input, err(%d)\n", ret);
+		return ret;
+	}
+
+	memset(msg, 0, sizeof(msg));
+	switch (reading) {
+	case 0:
+		memset(qedf->grcdump, 0, qedf->grcdump_size);
+		clear_bit(QEDF_GRCDUMP_CAPTURE, &qedf->flags);
+		break;
+	case 1:
+		qedf_capture_grc_dump(qedf);
+		break;
+	}
+
+	return count;
+}
+
+static struct bin_attribute sysfs_grcdump_attr = {
+	.attr = {
+		.name = "grcdump",
+		.mode = S_IRUSR | S_IWUSR,
+	},
+	.size = 0,
+	.read = qedf_sysfs_read_grcdump,
+	.write = qedf_sysfs_write_grcdump,
+};
+
+static struct sysfs_bin_attrs bin_file_entries[] = {
+	{"grcdump", &sysfs_grcdump_attr},
+	{NULL},
+};
+
+void qedf_create_sysfs_ctx_attr(struct qedf_ctx *qedf)
+{
+	qedf_create_sysfs_attr(qedf->lport->host, bin_file_entries);
+}
+
+void qedf_remove_sysfs_ctx_attr(struct qedf_ctx *qedf)
+{
+	qedf_remove_sysfs_attr(qedf->lport->host, bin_file_entries);
+}
diff --git a/drivers/scsi/qedf/qedf_dbg.c b/drivers/scsi/qedf/qedf_dbg.c
new file mode 100644
index 00000000000000..e023f5d0dc12cf
--- /dev/null
+++ b/drivers/scsi/qedf/qedf_dbg.c
@@ -0,0 +1,195 @@
+/*
+ *  QLogic FCoE Offload Driver
+ *  Copyright (c) 2016 Cavium Inc.
+ *
+ *  This software is available under the terms of the GNU General Public License
+ *  (GPL) Version 2, available from the file COPYING in the main directory of
+ *  this source tree.
+ */
+#include "qedf_dbg.h"
+#include <linux/vmalloc.h>
+
+void
+qedf_dbg_err(struct qedf_dbg_ctx *qedf, const char *func, u32 line,
+	      const char *fmt, ...)
+{
+	va_list va;
+	struct va_format vaf;
+	char nfunc[32];
+
+	memset(nfunc, 0, sizeof(nfunc));
+	memcpy(nfunc, func, sizeof(nfunc) - 1);
+
+	va_start(va, fmt);
+
+	vaf.fmt = fmt;
+	vaf.va = &va;
+
+	if (likely(qedf) && likely(qedf->pdev))
+		pr_err("[%s]:[%s:%d]:%d: %pV", dev_name(&(qedf->pdev->dev)),
+			nfunc, line, qedf->host_no, &vaf);
+	else
+		pr_err("[0000:00:00.0]:[%s:%d]: %pV", nfunc, line, &vaf);
+
+	va_end(va);
+}
+
+void
+qedf_dbg_warn(struct qedf_dbg_ctx *qedf, const char *func, u32 line,
+	       const char *fmt, ...)
+{
+	va_list va;
+	struct va_format vaf;
+	char nfunc[32];
+
+	memset(nfunc, 0, sizeof(nfunc));
+	memcpy(nfunc, func, sizeof(nfunc) - 1);
+
+	va_start(va, fmt);
+
+	vaf.fmt = fmt;
+	vaf.va = &va;
+
+	if (!(qedf_debug & QEDF_LOG_WARN))
+		goto ret;
+
+	if (likely(qedf) && likely(qedf->pdev))
+		pr_warn("[%s]:[%s:%d]:%d: %pV", dev_name(&(qedf->pdev->dev)),
+			nfunc, line, qedf->host_no, &vaf);
+	else
+		pr_warn("[0000:00:00.0]:[%s:%d]: %pV", nfunc, line, &vaf);
+
+ret:
+	va_end(va);
+}
+
+void
+qedf_dbg_notice(struct qedf_dbg_ctx *qedf, const char *func, u32 line,
+		 const char *fmt, ...)
+{
+	va_list va;
+	struct va_format vaf;
+	char nfunc[32];
+
+	memset(nfunc, 0, sizeof(nfunc));
+	memcpy(nfunc, func, sizeof(nfunc) - 1);
+
+	va_start(va, fmt);
+
+	vaf.fmt = fmt;
+	vaf.va = &va;
+
+	if (!(qedf_debug & QEDF_LOG_NOTICE))
+		goto ret;
+
+	if (likely(qedf) && likely(qedf->pdev))
+		pr_notice("[%s]:[%s:%d]:%d: %pV",
+			  dev_name(&(qedf->pdev->dev)), nfunc, line,
+			  qedf->host_no, &vaf);
+	else
+		pr_notice("[0000:00:00.0]:[%s:%d]: %pV", nfunc, line, &vaf);
+
+ret:
+	va_end(va);
+}
+
+void
+qedf_dbg_info(struct qedf_dbg_ctx *qedf, const char *func, u32 line,
+	       u32 level, const char *fmt, ...)
+{
+	va_list va;
+	struct va_format vaf;
+	char nfunc[32];
+
+	memset(nfunc, 0, sizeof(nfunc));
+	memcpy(nfunc, func, sizeof(nfunc) - 1);
+
+	va_start(va, fmt);
+
+	vaf.fmt = fmt;
+	vaf.va = &va;
+
+	if (!(qedf_debug & level))
+		goto ret;
+
+	if (likely(qedf) && likely(qedf->pdev))
+		pr_info("[%s]:[%s:%d]:%d: %pV", dev_name(&(qedf->pdev->dev)),
+			nfunc, line, qedf->host_no, &vaf);
+	else
+		pr_info("[0000:00:00.0]:[%s:%d]: %pV", nfunc, line, &vaf);
+
+ret:
+	va_end(va);
+}
+
+int
+qedf_alloc_grc_dump_buf(u8 **buf, uint32_t len)
+{
+		*buf = vmalloc(len);
+		if (!(*buf))
+			return -ENOMEM;
+
+		memset(*buf, 0, len);
+		return 0;
+}
+
+void
+qedf_free_grc_dump_buf(uint8_t **buf)
+{
+		vfree(*buf);
+		*buf = NULL;
+}
+
+int
+qedf_get_grc_dump(struct qed_dev *cdev, const struct qed_common_ops *common,
+		   u8 **buf, uint32_t *grcsize)
+{
+	if (!*buf)
+		return -EINVAL;
+
+	return common->dbg_grc(cdev, *buf, grcsize);
+}
+
+void
+qedf_uevent_emit(struct Scsi_Host *shost, u32 code, char *msg)
+{
+	char event_string[40];
+	char *envp[] = {event_string, NULL};
+
+	memset(event_string, 0, sizeof(event_string));
+	switch (code) {
+	case QEDF_UEVENT_CODE_GRCDUMP:
+		if (msg)
+			strncpy(event_string, msg, strlen(msg));
+		else
+			sprintf(event_string, "GRCDUMP=%u", shost->host_no);
+		break;
+	default:
+		/* do nothing */
+		break;
+	}
+
+	kobject_uevent_env(&shost->shost_gendev.kobj, KOBJ_CHANGE, envp);
+}
+
+int
+qedf_create_sysfs_attr(struct Scsi_Host *shost, struct sysfs_bin_attrs *iter)
+{
+	int ret = 0;
+
+	for (; iter->name; iter++) {
+		ret = sysfs_create_bin_file(&shost->shost_gendev.kobj,
+					    iter->attr);
+		if (ret)
+			pr_err("Unable to create sysfs %s attr, err(%d).\n",
+			       iter->name, ret);
+	}
+	return ret;
+}
+
+void
+qedf_remove_sysfs_attr(struct Scsi_Host *shost, struct sysfs_bin_attrs *iter)
+{
+	for (; iter->name; iter++)
+		sysfs_remove_bin_file(&shost->shost_gendev.kobj, iter->attr);
+}
diff --git a/drivers/scsi/qedf/qedf_dbg.h b/drivers/scsi/qedf/qedf_dbg.h
new file mode 100644
index 00000000000000..23bd70628a2f05
--- /dev/null
+++ b/drivers/scsi/qedf/qedf_dbg.h
@@ -0,0 +1,154 @@
+/*
+ *  QLogic FCoE Offload Driver
+ *  Copyright (c) 2016 Cavium Inc.
+ *
+ *  This software is available under the terms of the GNU General Public License
+ *  (GPL) Version 2, available from the file COPYING in the main directory of
+ *  this source tree.
+ */
+#ifndef _QEDF_DBG_H_
+#define _QEDF_DBG_H_
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/compiler.h>
+#include <linux/string.h>
+#include <linux/version.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <scsi/scsi_transport.h>
+#include <linux/fs.h>
+
+#include <linux/qed/common_hsi.h>
+#include <linux/qed/qed_if.h>
+
+extern uint qedf_debug;
+
+/* Debug print level definitions */
+#define QEDF_LOG_DEFAULT	0x1		/* Set default logging mask */
+#define QEDF_LOG_INFO		0x2		/*
+						 * Informational logs,
+						 * MAC address, WWPN, WWNN
+						 */
+#define QEDF_LOG_DISC		0x4		/* Init, discovery, rport */
+#define QEDF_LOG_LL2		0x8		/* LL2, VLAN logs */
+#define QEDF_LOG_CONN		0x10		/* Connection setup, cleanup */
+#define QEDF_LOG_EVT		0x20		/* Events, link, mtu */
+#define QEDF_LOG_TIMER		0x40		/* Timer events */
+#define QEDF_LOG_MP_REQ	0x80		/* Middle Path (MP) logs */
+#define QEDF_LOG_SCSI_TM	0x100		/* SCSI Aborts, Task Mgmt */
+#define QEDF_LOG_UNSOL		0x200		/* unsolicited event logs */
+#define QEDF_LOG_IO		0x400		/* scsi cmd, completion */
+#define QEDF_LOG_MQ		0x800		/* Multi Queue logs */
+#define QEDF_LOG_BSG		0x1000		/* BSG logs */
+#define QEDF_LOG_DEBUGFS	0x2000		/* debugFS logs */
+#define QEDF_LOG_LPORT		0x4000		/* lport logs */
+#define QEDF_LOG_ELS		0x8000		/* ELS logs */
+#define QEDF_LOG_NPIV		0x10000		/* NPIV logs */
+#define QEDF_LOG_SESS		0x20000		/* Conection setup, cleanup */
+#define QEDF_LOG_TID		0x80000         /*
+						 * FW TID context acquire
+						 * free
+						 */
+#define QEDF_TRACK_TID		0x100000        /*
+						 * Track TID state. To be
+						 * enabled only at module load
+						 * and not run-time.
+						 */
+#define QEDF_TRACK_CMD_LIST    0x300000        /*
+						* Track active cmd list nodes,
+						* done with reference to TID,
+						* hence TRACK_TID also enabled.
+						*/
+#define QEDF_LOG_NOTICE	0x40000000	/* Notice logs */
+#define QEDF_LOG_WARN		0x80000000	/* Warning logs */
+
+/* Debug context structure */
+struct qedf_dbg_ctx {
+	unsigned int host_no;
+	struct pci_dev *pdev;
+#ifdef CONFIG_DEBUG_FS
+	struct dentry *bdf_dentry;
+#endif
+};
+
+#define QEDF_ERR(pdev, fmt, ...)	\
+		qedf_dbg_err(pdev, __func__, __LINE__, fmt, ## __VA_ARGS__)
+#define QEDF_WARN(pdev, fmt, ...)	\
+		qedf_dbg_warn(pdev, __func__, __LINE__, fmt, ## __VA_ARGS__)
+#define QEDF_NOTICE(pdev, fmt, ...)	\
+		qedf_dbg_notice(pdev, __func__, __LINE__, fmt, ## __VA_ARGS__)
+#define QEDF_INFO(pdev, level, fmt, ...)	\
+		qedf_dbg_info(pdev, __func__, __LINE__, level, fmt,	\
+			      ## __VA_ARGS__)
+
+extern void qedf_dbg_err(struct qedf_dbg_ctx *qedf, const char *func, u32 line,
+			  const char *fmt, ...);
+extern void qedf_dbg_warn(struct qedf_dbg_ctx *qedf, const char *func, u32 line,
+			   const char *, ...);
+extern void qedf_dbg_notice(struct qedf_dbg_ctx *qedf, const char *func,
+			    u32 line, const char *, ...);
+extern void qedf_dbg_info(struct qedf_dbg_ctx *qedf, const char *func, u32 line,
+			  u32 info, const char *fmt, ...);
+
+/* GRC Dump related defines */
+
+struct Scsi_Host;
+
+#define QEDF_UEVENT_CODE_GRCDUMP 0
+
+struct sysfs_bin_attrs {
+	char *name;
+	struct bin_attribute *attr;
+};
+
+extern int qedf_alloc_grc_dump_buf(uint8_t **buf, uint32_t len);
+extern void qedf_free_grc_dump_buf(uint8_t **buf);
+extern int qedf_get_grc_dump(struct qed_dev *cdev,
+			     const struct qed_common_ops *common, uint8_t **buf,
+			     uint32_t *grcsize);
+extern void qedf_uevent_emit(struct Scsi_Host *shost, u32 code, char *msg);
+extern int qedf_create_sysfs_attr(struct Scsi_Host *shost,
+				   struct sysfs_bin_attrs *iter);
+extern void qedf_remove_sysfs_attr(struct Scsi_Host *shost,
+				    struct sysfs_bin_attrs *iter);
+
+#ifdef CONFIG_DEBUG_FS
+/* DebugFS related code */
+struct qedf_list_of_funcs {
+	char *oper_str;
+	ssize_t (*oper_func)(struct qedf_dbg_ctx *qedf);
+};
+
+struct qedf_debugfs_ops {
+	char *name;
+	struct qedf_list_of_funcs *qedf_funcs;
+};
+
+#define qedf_dbg_fileops(drv, ops) \
+{ \
+	.owner  = THIS_MODULE, \
+	.open   = simple_open, \
+	.read   = drv##_dbg_##ops##_cmd_read, \
+	.write  = drv##_dbg_##ops##_cmd_write \
+}
+
+/* Used for debugfs sequential files */
+#define qedf_dbg_fileops_seq(drv, ops) \
+{ \
+	.owner = THIS_MODULE, \
+	.open = drv##_dbg_##ops##_open, \
+	.read = seq_read, \
+	.llseek = seq_lseek, \
+	.release = single_release, \
+}
+
+extern void qedf_dbg_host_init(struct qedf_dbg_ctx *qedf,
+				struct qedf_debugfs_ops *dops,
+				struct file_operations *fops);
+extern void qedf_dbg_host_exit(struct qedf_dbg_ctx *qedf);
+extern void qedf_dbg_init(char *drv_name);
+extern void qedf_dbg_exit(void);
+#endif /* CONFIG_DEBUG_FS */
+
+#endif /* _QEDF_DBG_H_ */
diff --git a/drivers/scsi/qedf/qedf_debugfs.c b/drivers/scsi/qedf/qedf_debugfs.c
new file mode 100644
index 00000000000000..cb08b625c59479
--- /dev/null
+++ b/drivers/scsi/qedf/qedf_debugfs.c
@@ -0,0 +1,460 @@
+/*
+ *  QLogic FCoE Offload Driver
+ *  Copyright (c) 2016 QLogic Corporation
+ *
+ *  This software is available under the terms of the GNU General Public License
+ *  (GPL) Version 2, available from the file COPYING in the main directory of
+ *  this source tree.
+ */
+#ifdef CONFIG_DEBUG_FS
+
+#include <linux/uaccess.h>
+#include <linux/debugfs.h>
+#include <linux/module.h>
+
+#include "qedf.h"
+#include "qedf_dbg.h"
+
+static struct dentry *qedf_dbg_root;
+
+/**
+ * qedf_dbg_host_init - setup the debugfs file for the pf
+ * @pf: the pf that is starting up
+ **/
+void
+qedf_dbg_host_init(struct qedf_dbg_ctx *qedf,
+		    struct qedf_debugfs_ops *dops,
+		    struct file_operations *fops)
+{
+	char host_dirname[32];
+	struct dentry *file_dentry = NULL;
+
+	QEDF_INFO(qedf, QEDF_LOG_DEBUGFS, "Creating debugfs host node\n");
+	/* create pf dir */
+	sprintf(host_dirname, "host%u", qedf->host_no);
+	qedf->bdf_dentry = debugfs_create_dir(host_dirname, qedf_dbg_root);
+	if (!qedf->bdf_dentry)
+		return;
+
+	/* create debugfs files */
+	while (dops) {
+		if (!(dops->name))
+			break;
+
+		file_dentry = debugfs_create_file(dops->name, 0600,
+						  qedf->bdf_dentry, qedf,
+						  fops);
+		if (!file_dentry) {
+			QEDF_INFO(qedf, QEDF_LOG_DEBUGFS,
+				   "Debugfs entry %s creation failed\n",
+				   dops->name);
+			debugfs_remove_recursive(qedf->bdf_dentry);
+			return;
+		}
+		dops++;
+		fops++;
+	}
+}
+
+/**
+ * qedf_dbg_host_exit - clear out the pf's debugfs entries
+ * @pf: the pf that is stopping
+ **/
+void
+qedf_dbg_host_exit(struct qedf_dbg_ctx *qedf)
+{
+	QEDF_INFO(qedf, QEDF_LOG_DEBUGFS, "Destroying debugfs host "
+		   "entry\n");
+	/* remove debugfs  entries of this PF */
+	debugfs_remove_recursive(qedf->bdf_dentry);
+	qedf->bdf_dentry = NULL;
+}
+
+/**
+ * qedf_dbg_init - start up debugfs for the driver
+ **/
+void
+qedf_dbg_init(char *drv_name)
+{
+	QEDF_INFO(NULL, QEDF_LOG_DEBUGFS, "Creating debugfs root node\n");
+
+	/* create qed dir in root of debugfs. NULL means debugfs root */
+	qedf_dbg_root = debugfs_create_dir(drv_name, NULL);
+	if (!qedf_dbg_root)
+		QEDF_INFO(NULL, QEDF_LOG_DEBUGFS, "Init of debugfs "
+			   "failed\n");
+}
+
+/**
+ * qedf_dbg_exit - clean out the driver's debugfs entries
+ **/
+void
+qedf_dbg_exit(void)
+{
+	QEDF_INFO(NULL, QEDF_LOG_DEBUGFS, "Destroying debugfs root "
+		   "entry\n");
+
+	/* remove qed dir in root of debugfs */
+	debugfs_remove_recursive(qedf_dbg_root);
+	qedf_dbg_root = NULL;
+}
+
+struct qedf_debugfs_ops qedf_debugfs_ops[] = {
+	{ "fp_int", NULL },
+	{ "io_trace", NULL },
+	{ "debug", NULL },
+	{ "stop_io_on_error", NULL},
+	{ "driver_stats", NULL},
+	{ "clear_stats", NULL},
+	{ "offload_stats", NULL},
+	/* This must be last */
+	{ NULL, NULL }
+};
+
+DECLARE_PER_CPU(struct qedf_percpu_iothread_s, qedf_percpu_iothreads);
+
+static ssize_t
+qedf_dbg_fp_int_cmd_read(struct file *filp, char __user *buffer, size_t count,
+			 loff_t *ppos)
+{
+	size_t cnt = 0;
+	int id;
+	struct qedf_fastpath *fp = NULL;
+	struct qedf_dbg_ctx *qedf_dbg =
+				(struct qedf_dbg_ctx *)filp->private_data;
+	struct qedf_ctx *qedf = container_of(qedf_dbg,
+	    struct qedf_ctx, dbg_ctx);
+
+	QEDF_INFO(qedf_dbg, QEDF_LOG_DEBUGFS, "entered\n");
+
+	cnt = sprintf(buffer, "\nFastpath I/O completions\n\n");
+
+	for (id = 0; id < qedf->num_queues; id++) {
+		fp = &(qedf->fp_array[id]);
+		if (fp->sb_id == QEDF_SB_ID_NULL)
+			continue;
+		cnt += sprintf((buffer + cnt), "#%d: %lu\n", id,
+			       fp->completions);
+	}
+
+	cnt = min_t(int, count, cnt - *ppos);
+	*ppos += cnt;
+	return cnt;
+}
+
+static ssize_t
+qedf_dbg_fp_int_cmd_write(struct file *filp, const char __user *buffer,
+			  size_t count, loff_t *ppos)
+{
+	if (!count || *ppos)
+		return 0;
+
+	return count;
+}
+
+static ssize_t
+qedf_dbg_debug_cmd_read(struct file *filp, char __user *buffer, size_t count,
+			loff_t *ppos)
+{
+	int cnt;
+	struct qedf_dbg_ctx *qedf =
+				(struct qedf_dbg_ctx *)filp->private_data;
+
+	QEDF_INFO(qedf, QEDF_LOG_DEBUGFS, "entered\n");
+	cnt = sprintf(buffer, "debug mask = 0x%x\n", qedf_debug);
+
+	cnt = min_t(int, count, cnt - *ppos);
+	*ppos += cnt;
+	return cnt;
+}
+
+static ssize_t
+qedf_dbg_debug_cmd_write(struct file *filp, const char __user *buffer,
+			 size_t count, loff_t *ppos)
+{
+	uint32_t val;
+	void *kern_buf;
+	int rval;
+	struct qedf_dbg_ctx *qedf =
+	    (struct qedf_dbg_ctx *)filp->private_data;
+
+	if (!count || *ppos)
+		return 0;
+
+	kern_buf = memdup_user(buffer, count);
+	if (IS_ERR(kern_buf))
+		return PTR_ERR(kern_buf);
+
+	rval = kstrtouint(kern_buf, 10, &val);
+	kfree(kern_buf);
+	if (rval)
+		return rval;
+
+	if (val == 1)
+		qedf_debug = QEDF_DEFAULT_LOG_MASK;
+	else
+		qedf_debug = val;
+
+	QEDF_INFO(qedf, QEDF_LOG_DEBUGFS, "Setting debug=0x%x.\n", val);
+	return count;
+}
+
+static ssize_t
+qedf_dbg_stop_io_on_error_cmd_read(struct file *filp, char __user *buffer,
+				   size_t count, loff_t *ppos)
+{
+	int cnt;
+	struct qedf_dbg_ctx *qedf_dbg =
+				(struct qedf_dbg_ctx *)filp->private_data;
+	struct qedf_ctx *qedf = container_of(qedf_dbg,
+	    struct qedf_ctx, dbg_ctx);
+
+	QEDF_INFO(qedf_dbg, QEDF_LOG_DEBUGFS, "entered\n");
+	cnt = sprintf(buffer, "%s\n",
+	    qedf->stop_io_on_error ? "true" : "false");
+
+	cnt = min_t(int, count, cnt - *ppos);
+	*ppos += cnt;
+	return cnt;
+}
+
+static ssize_t
+qedf_dbg_stop_io_on_error_cmd_write(struct file *filp,
+				    const char __user *buffer, size_t count,
+				    loff_t *ppos)
+{
+	void *kern_buf;
+	struct qedf_dbg_ctx *qedf_dbg =
+				(struct qedf_dbg_ctx *)filp->private_data;
+	struct qedf_ctx *qedf = container_of(qedf_dbg, struct qedf_ctx,
+	    dbg_ctx);
+
+	QEDF_INFO(qedf_dbg, QEDF_LOG_DEBUGFS, "entered\n");
+
+	if (!count || *ppos)
+		return 0;
+
+	kern_buf = memdup_user(buffer, 6);
+	if (IS_ERR(kern_buf))
+		return PTR_ERR(kern_buf);
+
+	if (strncmp(kern_buf, "false", 5) == 0)
+		qedf->stop_io_on_error = false;
+	else if (strncmp(kern_buf, "true", 4) == 0)
+		qedf->stop_io_on_error = true;
+	else if (strncmp(kern_buf, "now", 3) == 0)
+		/* Trigger from user to stop all I/O on this host */
+		set_bit(QEDF_DBG_STOP_IO, &qedf->flags);
+
+	kfree(kern_buf);
+	return count;
+}
+
+static int
+qedf_io_trace_show(struct seq_file *s, void *unused)
+{
+	int i, idx = 0;
+	struct qedf_ctx *qedf = s->private;
+	struct qedf_dbg_ctx *qedf_dbg = &qedf->dbg_ctx;
+	struct qedf_io_log *io_log;
+	unsigned long flags;
+
+	if (!qedf_io_tracing) {
+		seq_puts(s, "I/O tracing not enabled.\n");
+		goto out;
+	}
+
+	QEDF_INFO(qedf_dbg, QEDF_LOG_DEBUGFS, "entered\n");
+
+	spin_lock_irqsave(&qedf->io_trace_lock, flags);
+	idx = qedf->io_trace_idx;
+	for (i = 0; i < QEDF_IO_TRACE_SIZE; i++) {
+		io_log = &qedf->io_trace_buf[idx];
+		seq_printf(s, "%d:", io_log->direction);
+		seq_printf(s, "0x%x:", io_log->task_id);
+		seq_printf(s, "0x%06x:", io_log->port_id);
+		seq_printf(s, "%d:", io_log->lun);
+		seq_printf(s, "0x%02x:", io_log->op);
+		seq_printf(s, "0x%02x%02x%02x%02x:", io_log->lba[0],
+		    io_log->lba[1], io_log->lba[2], io_log->lba[3]);
+		seq_printf(s, "%d:", io_log->bufflen);
+		seq_printf(s, "%d:", io_log->sg_count);
+		seq_printf(s, "0x%08x:", io_log->result);
+		seq_printf(s, "%lu:", io_log->jiffies);
+		seq_printf(s, "%d:", io_log->refcount);
+		seq_printf(s, "%d:", io_log->req_cpu);
+		seq_printf(s, "%d:", io_log->int_cpu);
+		seq_printf(s, "%d:", io_log->rsp_cpu);
+		seq_printf(s, "%d\n", io_log->sge_type);
+
+		idx++;
+		if (idx == QEDF_IO_TRACE_SIZE)
+			idx = 0;
+	}
+	spin_unlock_irqrestore(&qedf->io_trace_lock, flags);
+
+out:
+	return 0;
+}
+
+static int
+qedf_dbg_io_trace_open(struct inode *inode, struct file *file)
+{
+	struct qedf_dbg_ctx *qedf_dbg = inode->i_private;
+	struct qedf_ctx *qedf = container_of(qedf_dbg,
+	    struct qedf_ctx, dbg_ctx);
+
+	return single_open(file, qedf_io_trace_show, qedf);
+}
+
+static int
+qedf_driver_stats_show(struct seq_file *s, void *unused)
+{
+	struct qedf_ctx *qedf = s->private;
+	struct qedf_rport *fcport;
+	struct fc_rport_priv *rdata;
+
+	seq_printf(s, "cmg_mgr free io_reqs: %d\n",
+	    atomic_read(&qedf->cmd_mgr->free_list_cnt));
+	seq_printf(s, "slow SGEs: %d\n", qedf->slow_sge_ios);
+	seq_printf(s, "single SGEs: %d\n", qedf->single_sge_ios);
+	seq_printf(s, "fast SGEs: %d\n\n", qedf->fast_sge_ios);
+
+	seq_puts(s, "Offloaded ports:\n\n");
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(fcport, &qedf->fcports, peers) {
+		rdata = fcport->rdata;
+		if (rdata == NULL)
+			continue;
+		seq_printf(s, "%06x: free_sqes: %d, num_active_ios: %d\n",
+		    rdata->ids.port_id, atomic_read(&fcport->free_sqes),
+		    atomic_read(&fcport->num_active_ios));
+	}
+	rcu_read_unlock();
+
+	return 0;
+}
+
+static int
+qedf_dbg_driver_stats_open(struct inode *inode, struct file *file)
+{
+	struct qedf_dbg_ctx *qedf_dbg = inode->i_private;
+	struct qedf_ctx *qedf = container_of(qedf_dbg,
+	    struct qedf_ctx, dbg_ctx);
+
+	return single_open(file, qedf_driver_stats_show, qedf);
+}
+
+static ssize_t
+qedf_dbg_clear_stats_cmd_read(struct file *filp, char __user *buffer,
+				   size_t count, loff_t *ppos)
+{
+	int cnt = 0;
+
+	/* Essentially a read stub */
+	cnt = min_t(int, count, cnt - *ppos);
+	*ppos += cnt;
+	return cnt;
+}
+
+static ssize_t
+qedf_dbg_clear_stats_cmd_write(struct file *filp,
+				    const char __user *buffer, size_t count,
+				    loff_t *ppos)
+{
+	struct qedf_dbg_ctx *qedf_dbg =
+				(struct qedf_dbg_ctx *)filp->private_data;
+	struct qedf_ctx *qedf = container_of(qedf_dbg, struct qedf_ctx,
+	    dbg_ctx);
+
+	QEDF_INFO(qedf_dbg, QEDF_LOG_DEBUGFS, "Clearing stat counters.\n");
+
+	if (!count || *ppos)
+		return 0;
+
+	/* Clear stat counters exposed by 'stats' node */
+	qedf->slow_sge_ios = 0;
+	qedf->single_sge_ios = 0;
+	qedf->fast_sge_ios = 0;
+
+	return count;
+}
+
+static int
+qedf_offload_stats_show(struct seq_file *s, void *unused)
+{
+	struct qedf_ctx *qedf = s->private;
+	struct qed_fcoe_stats *fw_fcoe_stats;
+
+	fw_fcoe_stats = kmalloc(sizeof(struct qed_fcoe_stats), GFP_KERNEL);
+	if (!fw_fcoe_stats) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Could not allocate memory for "
+		    "fw_fcoe_stats.\n");
+		goto out;
+	}
+
+	/* Query firmware for offload stats */
+	qed_ops->get_stats(qedf->cdev, fw_fcoe_stats);
+
+	seq_printf(s, "fcoe_rx_byte_cnt=%llu\n"
+	    "fcoe_rx_data_pkt_cnt=%llu\n"
+	    "fcoe_rx_xfer_pkt_cnt=%llu\n"
+	    "fcoe_rx_other_pkt_cnt=%llu\n"
+	    "fcoe_silent_drop_pkt_cmdq_full_cnt=%u\n"
+	    "fcoe_silent_drop_pkt_crc_error_cnt=%u\n"
+	    "fcoe_silent_drop_pkt_task_invalid_cnt=%u\n"
+	    "fcoe_silent_drop_total_pkt_cnt=%u\n"
+	    "fcoe_silent_drop_pkt_rq_full_cnt=%u\n"
+	    "fcoe_tx_byte_cnt=%llu\n"
+	    "fcoe_tx_data_pkt_cnt=%llu\n"
+	    "fcoe_tx_xfer_pkt_cnt=%llu\n"
+	    "fcoe_tx_other_pkt_cnt=%llu\n",
+	    fw_fcoe_stats->fcoe_rx_byte_cnt,
+	    fw_fcoe_stats->fcoe_rx_data_pkt_cnt,
+	    fw_fcoe_stats->fcoe_rx_xfer_pkt_cnt,
+	    fw_fcoe_stats->fcoe_rx_other_pkt_cnt,
+	    fw_fcoe_stats->fcoe_silent_drop_pkt_cmdq_full_cnt,
+	    fw_fcoe_stats->fcoe_silent_drop_pkt_crc_error_cnt,
+	    fw_fcoe_stats->fcoe_silent_drop_pkt_task_invalid_cnt,
+	    fw_fcoe_stats->fcoe_silent_drop_total_pkt_cnt,
+	    fw_fcoe_stats->fcoe_silent_drop_pkt_rq_full_cnt,
+	    fw_fcoe_stats->fcoe_tx_byte_cnt,
+	    fw_fcoe_stats->fcoe_tx_data_pkt_cnt,
+	    fw_fcoe_stats->fcoe_tx_xfer_pkt_cnt,
+	    fw_fcoe_stats->fcoe_tx_other_pkt_cnt);
+
+	kfree(fw_fcoe_stats);
+out:
+	return 0;
+}
+
+static int
+qedf_dbg_offload_stats_open(struct inode *inode, struct file *file)
+{
+	struct qedf_dbg_ctx *qedf_dbg = inode->i_private;
+	struct qedf_ctx *qedf = container_of(qedf_dbg,
+	    struct qedf_ctx, dbg_ctx);
+
+	return single_open(file, qedf_offload_stats_show, qedf);
+}
+
+
+const struct file_operations qedf_dbg_fops[] = {
+	qedf_dbg_fileops(qedf, fp_int),
+	qedf_dbg_fileops_seq(qedf, io_trace),
+	qedf_dbg_fileops(qedf, debug),
+	qedf_dbg_fileops(qedf, stop_io_on_error),
+	qedf_dbg_fileops_seq(qedf, driver_stats),
+	qedf_dbg_fileops(qedf, clear_stats),
+	qedf_dbg_fileops_seq(qedf, offload_stats),
+	/* This must be last */
+	{ NULL, NULL },
+};
+
+#else /* CONFIG_DEBUG_FS */
+void qedf_dbg_host_init(struct qedf_dbg_ctx *);
+void qedf_dbg_host_exit(struct qedf_dbg_ctx *);
+void qedf_dbg_init(char *);
+void qedf_dbg_exit(void);
+#endif /* CONFIG_DEBUG_FS */
diff --git a/drivers/scsi/qedf/qedf_els.c b/drivers/scsi/qedf/qedf_els.c
new file mode 100644
index 00000000000000..78f1c252b649d9
--- /dev/null
+++ b/drivers/scsi/qedf/qedf_els.c
@@ -0,0 +1,949 @@
+/*
+ *  QLogic FCoE Offload Driver
+ *  Copyright (c) 2016 Cavium Inc.
+ *
+ *  This software is available under the terms of the GNU General Public License
+ *  (GPL) Version 2, available from the file COPYING in the main directory of
+ *  this source tree.
+ */
+#include "qedf.h"
+
+/* It's assumed that the lock is held when calling this function. */
+static int qedf_initiate_els(struct qedf_rport *fcport, unsigned int op,
+	void *data, uint32_t data_len,
+	void (*cb_func)(struct qedf_els_cb_arg *cb_arg),
+	struct qedf_els_cb_arg *cb_arg, uint32_t timer_msec)
+{
+	struct qedf_ctx *qedf = fcport->qedf;
+	struct fc_lport *lport = qedf->lport;
+	struct qedf_ioreq *els_req;
+	struct qedf_mp_req *mp_req;
+	struct fc_frame_header *fc_hdr;
+	struct fcoe_task_context *task;
+	int rc = 0;
+	uint32_t did, sid;
+	uint16_t xid;
+	uint32_t start_time = jiffies / HZ;
+	uint32_t current_time;
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS, "Sending ELS\n");
+
+	rc = fc_remote_port_chkready(fcport->rport);
+	if (rc) {
+		QEDF_ERR(&(qedf->dbg_ctx), "els 0x%x: rport not ready\n", op);
+		rc = -EAGAIN;
+		goto els_err;
+	}
+	if (lport->state != LPORT_ST_READY || !(lport->link_up)) {
+		QEDF_ERR(&(qedf->dbg_ctx), "els 0x%x: link is not ready\n",
+			  op);
+		rc = -EAGAIN;
+		goto els_err;
+	}
+
+	if (!(test_bit(QEDF_RPORT_SESSION_READY, &fcport->flags))) {
+		QEDF_ERR(&(qedf->dbg_ctx), "els 0x%x: fcport not ready\n", op);
+		rc = -EINVAL;
+		goto els_err;
+	}
+
+retry_els:
+	els_req = qedf_alloc_cmd(fcport, QEDF_ELS);
+	if (!els_req) {
+		current_time = jiffies / HZ;
+		if ((current_time - start_time) > 10) {
+			QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS,
+				   "els: Failed els 0x%x\n", op);
+			rc = -ENOMEM;
+			goto els_err;
+		}
+		mdelay(20 * USEC_PER_MSEC);
+		goto retry_els;
+	}
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS, "initiate_els els_req = "
+		   "0x%p cb_arg = %p xid = %x\n", els_req, cb_arg,
+		   els_req->xid);
+	els_req->sc_cmd = NULL;
+	els_req->cmd_type = QEDF_ELS;
+	els_req->fcport = fcport;
+	els_req->cb_func = cb_func;
+	cb_arg->io_req = els_req;
+	cb_arg->op = op;
+	els_req->cb_arg = cb_arg;
+	els_req->data_xfer_len = data_len;
+
+	/* Record which cpu this request is associated with */
+	els_req->cpu = smp_processor_id();
+
+	mp_req = (struct qedf_mp_req *)&(els_req->mp_req);
+	rc = qedf_init_mp_req(els_req);
+	if (rc) {
+		QEDF_ERR(&(qedf->dbg_ctx), "ELS MP request init failed\n");
+		kref_put(&els_req->refcount, qedf_release_cmd);
+		goto els_err;
+	} else {
+		rc = 0;
+	}
+
+	/* Fill ELS Payload */
+	if ((op >= ELS_LS_RJT) && (op <= ELS_AUTH_ELS)) {
+		memcpy(mp_req->req_buf, data, data_len);
+	} else {
+		QEDF_ERR(&(qedf->dbg_ctx), "Invalid ELS op 0x%x\n", op);
+		els_req->cb_func = NULL;
+		els_req->cb_arg = NULL;
+		kref_put(&els_req->refcount, qedf_release_cmd);
+		rc = -EINVAL;
+	}
+
+	if (rc)
+		goto els_err;
+
+	/* Fill FC header */
+	fc_hdr = &(mp_req->req_fc_hdr);
+
+	did = fcport->rdata->ids.port_id;
+	sid = fcport->sid;
+
+	__fc_fill_fc_hdr(fc_hdr, FC_RCTL_ELS_REQ, sid, did,
+			   FC_TYPE_ELS, FC_FC_FIRST_SEQ | FC_FC_END_SEQ |
+			   FC_FC_SEQ_INIT, 0);
+
+	/* Obtain exchange id */
+	xid = els_req->xid;
+
+	/* Initialize task context for this IO request */
+	task = qedf_get_task_mem(&qedf->tasks, xid);
+	qedf_init_mp_task(els_req, task);
+
+	/* Put timer on original I/O request */
+	if (timer_msec)
+		qedf_cmd_timer_set(qedf, els_req, timer_msec);
+
+	qedf_add_to_sq(fcport, xid, 0, FCOE_TASK_TYPE_MIDPATH, 0);
+
+	/* Ring doorbell */
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS, "Ringing doorbell for ELS "
+		   "req\n");
+	qedf_ring_doorbell(fcport);
+els_err:
+	return rc;
+}
+
+void qedf_process_els_compl(struct qedf_ctx *qedf, struct fcoe_cqe *cqe,
+	struct qedf_ioreq *els_req)
+{
+	struct fcoe_task_context *task_ctx;
+	struct scsi_cmnd *sc_cmd;
+	uint16_t xid;
+	struct fcoe_cqe_midpath_info *mp_info;
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS, "Entered with xid = 0x%x"
+		   " cmd_type = %d.\n", els_req->xid, els_req->cmd_type);
+
+	/* Kill the ELS timer */
+	cancel_delayed_work(&els_req->timeout_work);
+
+	xid = els_req->xid;
+	task_ctx = qedf_get_task_mem(&qedf->tasks, xid);
+	sc_cmd = els_req->sc_cmd;
+
+	/* Get ELS response length from CQE */
+	mp_info = &cqe->cqe_info.midpath_info;
+	els_req->mp_req.resp_len = mp_info->data_placement_size;
+
+	/* Parse ELS response */
+	if ((els_req->cb_func) && (els_req->cb_arg)) {
+		els_req->cb_func(els_req->cb_arg);
+		els_req->cb_arg = NULL;
+	}
+
+	kref_put(&els_req->refcount, qedf_release_cmd);
+}
+
+static void qedf_rrq_compl(struct qedf_els_cb_arg *cb_arg)
+{
+	struct qedf_ioreq *orig_io_req;
+	struct qedf_ioreq *rrq_req;
+	struct qedf_ctx *qedf;
+	int refcount;
+
+	rrq_req = cb_arg->io_req;
+	qedf = rrq_req->fcport->qedf;
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS, "Entered.\n");
+
+	orig_io_req = cb_arg->aborted_io_req;
+
+	if (!orig_io_req)
+		goto out_free;
+
+	if (rrq_req->event != QEDF_IOREQ_EV_ELS_TMO &&
+	    rrq_req->event != QEDF_IOREQ_EV_ELS_ERR_DETECT)
+		cancel_delayed_work_sync(&orig_io_req->timeout_work);
+
+	refcount = atomic_read(&orig_io_req->refcount.refcount);
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS, "rrq_compl: orig io = %p,"
+		   " orig xid = 0x%x, rrq_xid = 0x%x, refcount=%d\n",
+		   orig_io_req, orig_io_req->xid, rrq_req->xid, refcount);
+
+	/* This should return the aborted io_req to the command pool */
+	if (orig_io_req)
+		kref_put(&orig_io_req->refcount, qedf_release_cmd);
+
+out_free:
+	kfree(cb_arg);
+}
+
+/* Assumes kref is already held by caller */
+int qedf_send_rrq(struct qedf_ioreq *aborted_io_req)
+{
+
+	struct fc_els_rrq rrq;
+	struct qedf_rport *fcport;
+	struct fc_lport *lport;
+	struct qedf_els_cb_arg *cb_arg = NULL;
+	struct qedf_ctx *qedf;
+	uint32_t sid;
+	uint32_t r_a_tov;
+	int rc;
+
+	if (!aborted_io_req) {
+		QEDF_ERR(NULL, "abort_io_req is NULL.\n");
+		return -EINVAL;
+	}
+
+	fcport = aborted_io_req->fcport;
+
+	/* Check that fcport is still offloaded */
+	if (!(test_bit(QEDF_RPORT_SESSION_READY, &fcport->flags))) {
+		QEDF_ERR(NULL, "fcport is no longer offloaded.\n");
+		return -EINVAL;
+	}
+
+	if (!fcport->qedf) {
+		QEDF_ERR(NULL, "fcport->qedf is NULL.\n");
+		return -EINVAL;
+	}
+
+	qedf = fcport->qedf;
+	lport = qedf->lport;
+	sid = fcport->sid;
+	r_a_tov = lport->r_a_tov;
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS, "Sending RRQ orig "
+		   "io = %p, orig_xid = 0x%x\n", aborted_io_req,
+		   aborted_io_req->xid);
+	memset(&rrq, 0, sizeof(rrq));
+
+	cb_arg = kzalloc(sizeof(struct qedf_els_cb_arg), GFP_NOIO);
+	if (!cb_arg) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Unable to allocate cb_arg for "
+			  "RRQ\n");
+		rc = -ENOMEM;
+		goto rrq_err;
+	}
+
+	cb_arg->aborted_io_req = aborted_io_req;
+
+	rrq.rrq_cmd = ELS_RRQ;
+	hton24(rrq.rrq_s_id, sid);
+	rrq.rrq_ox_id = htons(aborted_io_req->xid);
+	rrq.rrq_rx_id =
+	    htons(aborted_io_req->task->tstorm_st_context.read_write.rx_id);
+
+	rc = qedf_initiate_els(fcport, ELS_RRQ, &rrq, sizeof(rrq),
+	    qedf_rrq_compl, cb_arg, r_a_tov);
+
+rrq_err:
+	if (rc) {
+		QEDF_ERR(&(qedf->dbg_ctx), "RRQ failed - release orig io "
+			  "req 0x%x\n", aborted_io_req->xid);
+		kfree(cb_arg);
+		kref_put(&aborted_io_req->refcount, qedf_release_cmd);
+	}
+	return rc;
+}
+
+static void qedf_process_l2_frame_compl(struct qedf_rport *fcport,
+					struct fc_frame *fp,
+					u16 l2_oxid)
+{
+	struct fc_lport *lport = fcport->qedf->lport;
+	struct fc_frame_header *fh;
+	u32 crc;
+
+	fh = (struct fc_frame_header *)fc_frame_header_get(fp);
+
+	/* Set the OXID we return to what libfc used */
+	if (l2_oxid != FC_XID_UNKNOWN)
+		fh->fh_ox_id = htons(l2_oxid);
+
+	/* Setup header fields */
+	fh->fh_r_ctl = FC_RCTL_ELS_REP;
+	fh->fh_type = FC_TYPE_ELS;
+	/* Last sequence, end sequence */
+	fh->fh_f_ctl[0] = 0x98;
+	hton24(fh->fh_d_id, lport->port_id);
+	hton24(fh->fh_s_id, fcport->rdata->ids.port_id);
+	fh->fh_rx_id = 0xffff;
+
+	/* Set frame attributes */
+	crc = fcoe_fc_crc(fp);
+	fc_frame_init(fp);
+	fr_dev(fp) = lport;
+	fr_sof(fp) = FC_SOF_I3;
+	fr_eof(fp) = FC_EOF_T;
+	fr_crc(fp) = cpu_to_le32(~crc);
+
+	/* Send completed request to libfc */
+	fc_exch_recv(lport, fp);
+}
+
+/*
+ * In instances where an ELS command times out we may need to restart the
+ * rport by logging out and then logging back in.
+ */
+void qedf_restart_rport(struct qedf_rport *fcport)
+{
+	struct fc_lport *lport;
+	struct fc_rport_priv *rdata;
+	u32 port_id;
+
+	if (!fcport)
+		return;
+
+	rdata = fcport->rdata;
+	if (rdata) {
+		lport = fcport->qedf->lport;
+		port_id = rdata->ids.port_id;
+		QEDF_ERR(&(fcport->qedf->dbg_ctx),
+		    "LOGO port_id=%x.\n", port_id);
+		fc_rport_logoff(rdata);
+		/* Recreate the rport and log back in */
+		rdata = fc_rport_create(lport, port_id);
+		if (rdata)
+			fc_rport_login(rdata);
+	}
+}
+
+static void qedf_l2_els_compl(struct qedf_els_cb_arg *cb_arg)
+{
+	struct qedf_ioreq *els_req;
+	struct qedf_rport *fcport;
+	struct qedf_mp_req *mp_req;
+	struct fc_frame *fp;
+	struct fc_frame_header *fh, *mp_fc_hdr;
+	void *resp_buf, *fc_payload;
+	u32 resp_len;
+	u16 l2_oxid;
+
+	l2_oxid = cb_arg->l2_oxid;
+	els_req = cb_arg->io_req;
+
+	if (!els_req) {
+		QEDF_ERR(NULL, "els_req is NULL.\n");
+		goto free_arg;
+	}
+
+	/*
+	 * If we are flushing the command just free the cb_arg as none of the
+	 * response data will be valid.
+	 */
+	if (els_req->event == QEDF_IOREQ_EV_ELS_FLUSH)
+		goto free_arg;
+
+	fcport = els_req->fcport;
+	mp_req = &(els_req->mp_req);
+	mp_fc_hdr = &(mp_req->resp_fc_hdr);
+	resp_len = mp_req->resp_len;
+	resp_buf = mp_req->resp_buf;
+
+	/*
+	 * If a middle path ELS command times out, don't try to return
+	 * the command but rather do any internal cleanup and then libfc
+	 * timeout the command and clean up its internal resources.
+	 */
+	if (els_req->event == QEDF_IOREQ_EV_ELS_TMO) {
+		/*
+		 * If ADISC times out, libfc will timeout the exchange and then
+		 * try to send a PLOGI which will timeout since the session is
+		 * still offloaded.  Force libfc to logout the session which
+		 * will offload the connection and allow the PLOGI response to
+		 * flow over the LL2 path.
+		 */
+		if (cb_arg->op == ELS_ADISC)
+			qedf_restart_rport(fcport);
+		return;
+	}
+
+	if (sizeof(struct fc_frame_header) + resp_len > QEDF_PAGE_SIZE) {
+		QEDF_ERR(&(fcport->qedf->dbg_ctx), "resp_len is "
+		   "beyond page size.\n");
+		goto free_arg;
+	}
+
+	fp = fc_frame_alloc(fcport->qedf->lport, resp_len);
+	if (!fp) {
+		QEDF_ERR(&(fcport->qedf->dbg_ctx),
+		    "fc_frame_alloc failure.\n");
+		return;
+	}
+
+	/* Copy frame header from firmware into fp */
+	fh = (struct fc_frame_header *)fc_frame_header_get(fp);
+	memcpy(fh, mp_fc_hdr, sizeof(struct fc_frame_header));
+
+	/* Copy payload from firmware into fp */
+	fc_payload = fc_frame_payload_get(fp, resp_len);
+	memcpy(fc_payload, resp_buf, resp_len);
+
+	QEDF_INFO(&(fcport->qedf->dbg_ctx), QEDF_LOG_ELS,
+	    "Completing OX_ID 0x%x back to libfc.\n", l2_oxid);
+	qedf_process_l2_frame_compl(fcport, fp, l2_oxid);
+
+free_arg:
+	kfree(cb_arg);
+}
+
+int qedf_send_adisc(struct qedf_rport *fcport, struct fc_frame *fp)
+{
+	struct fc_els_adisc *adisc;
+	struct fc_frame_header *fh;
+	struct fc_lport *lport = fcport->qedf->lport;
+	struct qedf_els_cb_arg *cb_arg = NULL;
+	struct qedf_ctx *qedf;
+	uint32_t r_a_tov = lport->r_a_tov;
+	int rc;
+
+	qedf = fcport->qedf;
+	fh = fc_frame_header_get(fp);
+
+	cb_arg = kzalloc(sizeof(struct qedf_els_cb_arg), GFP_NOIO);
+	if (!cb_arg) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Unable to allocate cb_arg for "
+			  "ADISC\n");
+		rc = -ENOMEM;
+		goto adisc_err;
+	}
+	cb_arg->l2_oxid = ntohs(fh->fh_ox_id);
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS,
+	    "Sending ADISC ox_id=0x%x.\n", cb_arg->l2_oxid);
+
+	adisc = fc_frame_payload_get(fp, sizeof(*adisc));
+
+	rc = qedf_initiate_els(fcport, ELS_ADISC, adisc, sizeof(*adisc),
+	    qedf_l2_els_compl, cb_arg, r_a_tov);
+
+adisc_err:
+	if (rc) {
+		QEDF_ERR(&(qedf->dbg_ctx), "ADISC failed.\n");
+		kfree(cb_arg);
+	}
+	return rc;
+}
+
+static void qedf_srr_compl(struct qedf_els_cb_arg *cb_arg)
+{
+	struct qedf_ioreq *orig_io_req;
+	struct qedf_ioreq *srr_req;
+	struct qedf_mp_req *mp_req;
+	struct fc_frame_header *mp_fc_hdr, *fh;
+	struct fc_frame *fp;
+	void *resp_buf, *fc_payload;
+	u32 resp_len;
+	struct fc_lport *lport;
+	struct qedf_ctx *qedf;
+	int refcount;
+	u8 opcode;
+
+	srr_req = cb_arg->io_req;
+	qedf = srr_req->fcport->qedf;
+	lport = qedf->lport;
+
+	orig_io_req = cb_arg->aborted_io_req;
+
+	if (!orig_io_req)
+		goto out_free;
+
+	clear_bit(QEDF_CMD_SRR_SENT, &orig_io_req->flags);
+
+	if (srr_req->event != QEDF_IOREQ_EV_ELS_TMO &&
+	    srr_req->event != QEDF_IOREQ_EV_ELS_ERR_DETECT)
+		cancel_delayed_work_sync(&orig_io_req->timeout_work);
+
+	refcount = atomic_read(&orig_io_req->refcount.refcount);
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS, "Entered: orig_io=%p,"
+		   " orig_io_xid=0x%x, rec_xid=0x%x, refcount=%d\n",
+		   orig_io_req, orig_io_req->xid, srr_req->xid, refcount);
+
+	/* If a SRR times out, simply free resources */
+	if (srr_req->event == QEDF_IOREQ_EV_ELS_TMO)
+		goto out_free;
+
+	/* Normalize response data into struct fc_frame */
+	mp_req = &(srr_req->mp_req);
+	mp_fc_hdr = &(mp_req->resp_fc_hdr);
+	resp_len = mp_req->resp_len;
+	resp_buf = mp_req->resp_buf;
+
+	fp = fc_frame_alloc(lport, resp_len);
+	if (!fp) {
+		QEDF_ERR(&(qedf->dbg_ctx),
+		    "fc_frame_alloc failure.\n");
+		goto out_free;
+	}
+
+	/* Copy frame header from firmware into fp */
+	fh = (struct fc_frame_header *)fc_frame_header_get(fp);
+	memcpy(fh, mp_fc_hdr, sizeof(struct fc_frame_header));
+
+	/* Copy payload from firmware into fp */
+	fc_payload = fc_frame_payload_get(fp, resp_len);
+	memcpy(fc_payload, resp_buf, resp_len);
+
+	opcode = fc_frame_payload_op(fp);
+	switch (opcode) {
+	case ELS_LS_ACC:
+		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS,
+		    "SRR success.\n");
+		break;
+	case ELS_LS_RJT:
+		QEDF_INFO(&qedf->dbg_ctx, QEDF_LOG_ELS,
+		    "SRR rejected.\n");
+		qedf_initiate_abts(orig_io_req, true);
+		break;
+	}
+
+	fc_frame_free(fp);
+out_free:
+	/* Put reference for original command since SRR completed */
+	kref_put(&orig_io_req->refcount, qedf_release_cmd);
+	kfree(cb_arg);
+}
+
+static int qedf_send_srr(struct qedf_ioreq *orig_io_req, u32 offset, u8 r_ctl)
+{
+	struct fcp_srr srr;
+	struct qedf_ctx *qedf;
+	struct qedf_rport *fcport;
+	struct fc_lport *lport;
+	struct qedf_els_cb_arg *cb_arg = NULL;
+	u32 sid, r_a_tov;
+	int rc;
+
+	if (!orig_io_req) {
+		QEDF_ERR(NULL, "orig_io_req is NULL.\n");
+		return -EINVAL;
+	}
+
+	fcport = orig_io_req->fcport;
+
+	/* Check that fcport is still offloaded */
+	if (!(test_bit(QEDF_RPORT_SESSION_READY, &fcport->flags))) {
+		QEDF_ERR(NULL, "fcport is no longer offloaded.\n");
+		return -EINVAL;
+	}
+
+	if (!fcport->qedf) {
+		QEDF_ERR(NULL, "fcport->qedf is NULL.\n");
+		return -EINVAL;
+	}
+
+	/* Take reference until SRR command completion */
+	kref_get(&orig_io_req->refcount);
+
+	qedf = fcport->qedf;
+	lport = qedf->lport;
+	sid = fcport->sid;
+	r_a_tov = lport->r_a_tov;
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS, "Sending SRR orig_io=%p, "
+		   "orig_xid=0x%x\n", orig_io_req, orig_io_req->xid);
+	memset(&srr, 0, sizeof(srr));
+
+	cb_arg = kzalloc(sizeof(struct qedf_els_cb_arg), GFP_NOIO);
+	if (!cb_arg) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Unable to allocate cb_arg for "
+			  "SRR\n");
+		rc = -ENOMEM;
+		goto srr_err;
+	}
+
+	cb_arg->aborted_io_req = orig_io_req;
+
+	srr.srr_op = ELS_SRR;
+	srr.srr_ox_id = htons(orig_io_req->xid);
+	srr.srr_rx_id = htons(orig_io_req->rx_id);
+	srr.srr_rel_off = htonl(offset);
+	srr.srr_r_ctl = r_ctl;
+
+	rc = qedf_initiate_els(fcport, ELS_SRR, &srr, sizeof(srr),
+	    qedf_srr_compl, cb_arg, r_a_tov);
+
+srr_err:
+	if (rc) {
+		QEDF_ERR(&(qedf->dbg_ctx), "SRR failed - release orig_io_req"
+			  "=0x%x\n", orig_io_req->xid);
+		kfree(cb_arg);
+		/* If we fail to queue SRR, send ABTS to orig_io */
+		qedf_initiate_abts(orig_io_req, true);
+		kref_put(&orig_io_req->refcount, qedf_release_cmd);
+	} else
+		/* Tell other threads that SRR is in progress */
+		set_bit(QEDF_CMD_SRR_SENT, &orig_io_req->flags);
+
+	return rc;
+}
+
+static void qedf_initiate_seq_cleanup(struct qedf_ioreq *orig_io_req,
+	u32 offset, u8 r_ctl)
+{
+	struct qedf_rport *fcport;
+	unsigned long flags;
+	struct qedf_els_cb_arg *cb_arg;
+
+	fcport = orig_io_req->fcport;
+
+	QEDF_INFO(&(fcport->qedf->dbg_ctx), QEDF_LOG_ELS,
+	    "Doing sequence cleanup for xid=0x%x offset=%u.\n",
+	    orig_io_req->xid, offset);
+
+	cb_arg = kzalloc(sizeof(struct qedf_els_cb_arg), GFP_NOIO);
+	if (!cb_arg) {
+		QEDF_ERR(&(fcport->qedf->dbg_ctx), "Unable to allocate cb_arg "
+			  "for sequence cleanup\n");
+		return;
+	}
+
+	/* Get reference for cleanup request */
+	kref_get(&orig_io_req->refcount);
+
+	orig_io_req->cmd_type = QEDF_SEQ_CLEANUP;
+	cb_arg->offset = offset;
+	cb_arg->r_ctl = r_ctl;
+	orig_io_req->cb_arg = cb_arg;
+
+	qedf_cmd_timer_set(fcport->qedf, orig_io_req,
+	    QEDF_CLEANUP_TIMEOUT * HZ);
+
+	spin_lock_irqsave(&fcport->rport_lock, flags);
+
+	qedf_add_to_sq(fcport, orig_io_req->xid, 0,
+	    FCOE_TASK_TYPE_SEQUENCE_CLEANUP, offset);
+	qedf_ring_doorbell(fcport);
+
+	spin_unlock_irqrestore(&fcport->rport_lock, flags);
+}
+
+void qedf_process_seq_cleanup_compl(struct qedf_ctx *qedf,
+	struct fcoe_cqe *cqe, struct qedf_ioreq *io_req)
+{
+	int rc;
+	struct qedf_els_cb_arg *cb_arg;
+
+	cb_arg = io_req->cb_arg;
+
+	/* If we timed out just free resources */
+	if (io_req->event == QEDF_IOREQ_EV_ELS_TMO || !cqe)
+		goto free;
+
+	/* Kill the timer we put on the request */
+	cancel_delayed_work_sync(&io_req->timeout_work);
+
+	rc = qedf_send_srr(io_req, cb_arg->offset, cb_arg->r_ctl);
+	if (rc)
+		QEDF_ERR(&(qedf->dbg_ctx), "Unable to send SRR, I/O will "
+		    "abort, xid=0x%x.\n", io_req->xid);
+free:
+	kfree(cb_arg);
+	kref_put(&io_req->refcount, qedf_release_cmd);
+}
+
+static bool qedf_requeue_io_req(struct qedf_ioreq *orig_io_req)
+{
+	struct qedf_rport *fcport;
+	struct qedf_ioreq *new_io_req;
+	unsigned long flags;
+	bool rc = false;
+
+	fcport = orig_io_req->fcport;
+	if (!fcport) {
+		QEDF_ERR(NULL, "fcport is NULL.\n");
+		goto out;
+	}
+
+	if (!orig_io_req->sc_cmd) {
+		QEDF_ERR(&(fcport->qedf->dbg_ctx), "sc_cmd is NULL for "
+		    "xid=0x%x.\n", orig_io_req->xid);
+		goto out;
+	}
+
+	new_io_req = qedf_alloc_cmd(fcport, QEDF_SCSI_CMD);
+	if (!new_io_req) {
+		QEDF_ERR(&(fcport->qedf->dbg_ctx), "Could not allocate new "
+		    "io_req.\n");
+		goto out;
+	}
+
+	new_io_req->sc_cmd = orig_io_req->sc_cmd;
+
+	/*
+	 * This keeps the sc_cmd struct from being returned to the tape
+	 * driver and being requeued twice. We do need to put a reference
+	 * for the original I/O request since we will not do a SCSI completion
+	 * for it.
+	 */
+	orig_io_req->sc_cmd = NULL;
+	kref_put(&orig_io_req->refcount, qedf_release_cmd);
+
+	spin_lock_irqsave(&fcport->rport_lock, flags);
+
+	/* kref for new command released in qedf_post_io_req on error */
+	if (qedf_post_io_req(fcport, new_io_req)) {
+		QEDF_ERR(&(fcport->qedf->dbg_ctx), "Unable to post io_req\n");
+		/* Return SQE to pool */
+		atomic_inc(&fcport->free_sqes);
+	} else {
+		QEDF_INFO(&(fcport->qedf->dbg_ctx), QEDF_LOG_ELS,
+		    "Reissued SCSI command from  orig_xid=0x%x on "
+		    "new_xid=0x%x.\n", orig_io_req->xid, new_io_req->xid);
+		/*
+		 * Abort the original I/O but do not return SCSI command as
+		 * it has been reissued on another OX_ID.
+		 */
+		spin_unlock_irqrestore(&fcport->rport_lock, flags);
+		qedf_initiate_abts(orig_io_req, false);
+		goto out;
+	}
+
+	spin_unlock_irqrestore(&fcport->rport_lock, flags);
+out:
+	return rc;
+}
+
+
+static void qedf_rec_compl(struct qedf_els_cb_arg *cb_arg)
+{
+	struct qedf_ioreq *orig_io_req;
+	struct qedf_ioreq *rec_req;
+	struct qedf_mp_req *mp_req;
+	struct fc_frame_header *mp_fc_hdr, *fh;
+	struct fc_frame *fp;
+	void *resp_buf, *fc_payload;
+	u32 resp_len;
+	struct fc_lport *lport;
+	struct qedf_ctx *qedf;
+	int refcount;
+	enum fc_rctl r_ctl;
+	struct fc_els_ls_rjt *rjt;
+	struct fc_els_rec_acc *acc;
+	u8 opcode;
+	u32 offset, e_stat;
+	struct scsi_cmnd *sc_cmd;
+	bool srr_needed = false;
+
+	rec_req = cb_arg->io_req;
+	qedf = rec_req->fcport->qedf;
+	lport = qedf->lport;
+
+	orig_io_req = cb_arg->aborted_io_req;
+
+	if (!orig_io_req)
+		goto out_free;
+
+	if (rec_req->event != QEDF_IOREQ_EV_ELS_TMO &&
+	    rec_req->event != QEDF_IOREQ_EV_ELS_ERR_DETECT)
+		cancel_delayed_work_sync(&orig_io_req->timeout_work);
+
+	refcount = atomic_read(&orig_io_req->refcount.refcount);
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS, "Entered: orig_io=%p,"
+		   " orig_io_xid=0x%x, rec_xid=0x%x, refcount=%d\n",
+		   orig_io_req, orig_io_req->xid, rec_req->xid, refcount);
+
+	/* If a REC times out, free resources */
+	if (rec_req->event == QEDF_IOREQ_EV_ELS_TMO)
+		goto out_free;
+
+	/* Normalize response data into struct fc_frame */
+	mp_req = &(rec_req->mp_req);
+	mp_fc_hdr = &(mp_req->resp_fc_hdr);
+	resp_len = mp_req->resp_len;
+	acc = resp_buf = mp_req->resp_buf;
+
+	fp = fc_frame_alloc(lport, resp_len);
+	if (!fp) {
+		QEDF_ERR(&(qedf->dbg_ctx),
+		    "fc_frame_alloc failure.\n");
+		goto out_free;
+	}
+
+	/* Copy frame header from firmware into fp */
+	fh = (struct fc_frame_header *)fc_frame_header_get(fp);
+	memcpy(fh, mp_fc_hdr, sizeof(struct fc_frame_header));
+
+	/* Copy payload from firmware into fp */
+	fc_payload = fc_frame_payload_get(fp, resp_len);
+	memcpy(fc_payload, resp_buf, resp_len);
+
+	opcode = fc_frame_payload_op(fp);
+	if (opcode == ELS_LS_RJT) {
+		rjt = fc_frame_payload_get(fp, sizeof(*rjt));
+		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS,
+		    "Received LS_RJT for REC: er_reason=0x%x, "
+		    "er_explan=0x%x.\n", rjt->er_reason, rjt->er_explan);
+		/*
+		 * The following response(s) mean that we need to reissue the
+		 * request on another exchange.  We need to do this without
+		 * informing the upper layers lest it cause an application
+		 * error.
+		 */
+		if ((rjt->er_reason == ELS_RJT_LOGIC ||
+		    rjt->er_reason == ELS_RJT_UNAB) &&
+		    rjt->er_explan == ELS_EXPL_OXID_RXID) {
+			QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS,
+			    "Handle CMD LOST case.\n");
+			qedf_requeue_io_req(orig_io_req);
+		}
+	} else if (opcode == ELS_LS_ACC) {
+		offset = ntohl(acc->reca_fc4value);
+		e_stat = ntohl(acc->reca_e_stat);
+		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS,
+		    "Received LS_ACC for REC: offset=0x%x, e_stat=0x%x.\n",
+		    offset, e_stat);
+		if (e_stat & ESB_ST_SEQ_INIT)  {
+			QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS,
+			    "Target has the seq init\n");
+			goto out_free_frame;
+		}
+		sc_cmd = orig_io_req->sc_cmd;
+		if (!sc_cmd) {
+			QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS,
+			    "sc_cmd is NULL for xid=0x%x.\n",
+			    orig_io_req->xid);
+			goto out_free_frame;
+		}
+		/* SCSI write case */
+		if (sc_cmd->sc_data_direction == DMA_TO_DEVICE) {
+			if (offset == orig_io_req->data_xfer_len) {
+				QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS,
+				    "WRITE - response lost.\n");
+				r_ctl = FC_RCTL_DD_CMD_STATUS;
+				srr_needed = true;
+				offset = 0;
+			} else {
+				QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS,
+				    "WRITE - XFER_RDY/DATA lost.\n");
+				r_ctl = FC_RCTL_DD_DATA_DESC;
+				/* Use data from warning CQE instead of REC */
+				offset = orig_io_req->tx_buf_off;
+			}
+		/* SCSI read case */
+		} else {
+			if (orig_io_req->rx_buf_off ==
+			    orig_io_req->data_xfer_len) {
+				QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS,
+				    "READ - response lost.\n");
+				srr_needed = true;
+				r_ctl = FC_RCTL_DD_CMD_STATUS;
+				offset = 0;
+			} else {
+				QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS,
+				    "READ - DATA lost.\n");
+				/*
+				 * For read case we always set the offset to 0
+				 * for sequence recovery task.
+				 */
+				offset = 0;
+				r_ctl = FC_RCTL_DD_SOL_DATA;
+			}
+		}
+
+		if (srr_needed)
+			qedf_send_srr(orig_io_req, offset, r_ctl);
+		else
+			qedf_initiate_seq_cleanup(orig_io_req, offset, r_ctl);
+	}
+
+out_free_frame:
+	fc_frame_free(fp);
+out_free:
+	/* Put reference for original command since REC completed */
+	kref_put(&orig_io_req->refcount, qedf_release_cmd);
+	kfree(cb_arg);
+}
+
+/* Assumes kref is already held by caller */
+int qedf_send_rec(struct qedf_ioreq *orig_io_req)
+{
+
+	struct fc_els_rec rec;
+	struct qedf_rport *fcport;
+	struct fc_lport *lport;
+	struct qedf_els_cb_arg *cb_arg = NULL;
+	struct qedf_ctx *qedf;
+	uint32_t sid;
+	uint32_t r_a_tov;
+	int rc;
+
+	if (!orig_io_req) {
+		QEDF_ERR(NULL, "orig_io_req is NULL.\n");
+		return -EINVAL;
+	}
+
+	fcport = orig_io_req->fcport;
+
+	/* Check that fcport is still offloaded */
+	if (!test_bit(QEDF_RPORT_SESSION_READY, &fcport->flags)) {
+		QEDF_ERR(NULL, "fcport is no longer offloaded.\n");
+		return -EINVAL;
+	}
+
+	if (!fcport->qedf) {
+		QEDF_ERR(NULL, "fcport->qedf is NULL.\n");
+		return -EINVAL;
+	}
+
+	/* Take reference until REC command completion */
+	kref_get(&orig_io_req->refcount);
+
+	qedf = fcport->qedf;
+	lport = qedf->lport;
+	sid = fcport->sid;
+	r_a_tov = lport->r_a_tov;
+
+	memset(&rec, 0, sizeof(rec));
+
+	cb_arg = kzalloc(sizeof(struct qedf_els_cb_arg), GFP_NOIO);
+	if (!cb_arg) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Unable to allocate cb_arg for "
+			  "REC\n");
+		rc = -ENOMEM;
+		goto rec_err;
+	}
+
+	cb_arg->aborted_io_req = orig_io_req;
+
+	rec.rec_cmd = ELS_REC;
+	hton24(rec.rec_s_id, sid);
+	rec.rec_ox_id = htons(orig_io_req->xid);
+	rec.rec_rx_id =
+	    htons(orig_io_req->task->tstorm_st_context.read_write.rx_id);
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS, "Sending REC orig_io=%p, "
+	   "orig_xid=0x%x rx_id=0x%x\n", orig_io_req,
+	   orig_io_req->xid, rec.rec_rx_id);
+	rc = qedf_initiate_els(fcport, ELS_REC, &rec, sizeof(rec),
+	    qedf_rec_compl, cb_arg, r_a_tov);
+
+rec_err:
+	if (rc) {
+		QEDF_ERR(&(qedf->dbg_ctx), "REC failed - release orig_io_req"
+			  "=0x%x\n", orig_io_req->xid);
+		kfree(cb_arg);
+		kref_put(&orig_io_req->refcount, qedf_release_cmd);
+	}
+	return rc;
+}
diff --git a/drivers/scsi/qedf/qedf_fip.c b/drivers/scsi/qedf/qedf_fip.c
new file mode 100644
index 00000000000000..868d423380d120
--- /dev/null
+++ b/drivers/scsi/qedf/qedf_fip.c
@@ -0,0 +1,269 @@
+/*
+ *  QLogic FCoE Offload Driver
+ *  Copyright (c) 2016 Cavium Inc.
+ *
+ *  This software is available under the terms of the GNU General Public License
+ *  (GPL) Version 2, available from the file COPYING in the main directory of
+ *  this source tree.
+ */
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include "qedf.h"
+
+extern const struct qed_fcoe_ops *qed_ops;
+/*
+ * FIP VLAN functions that will eventually move to libfcoe.
+ */
+
+void qedf_fcoe_send_vlan_req(struct qedf_ctx *qedf)
+{
+	struct sk_buff *skb;
+	char *eth_fr;
+	int fr_len;
+	struct fip_vlan *vlan;
+#define MY_FIP_ALL_FCF_MACS        ((__u8[6]) { 1, 0x10, 0x18, 1, 0, 2 })
+	static u8 my_fcoe_all_fcfs[ETH_ALEN] = MY_FIP_ALL_FCF_MACS;
+
+	skb = dev_alloc_skb(sizeof(struct fip_vlan));
+	if (!skb)
+		return;
+
+	fr_len = sizeof(*vlan);
+	eth_fr = (char *)skb->data;
+	vlan = (struct fip_vlan *)eth_fr;
+
+	memset(vlan, 0, sizeof(*vlan));
+	ether_addr_copy(vlan->eth.h_source, qedf->mac);
+	ether_addr_copy(vlan->eth.h_dest, my_fcoe_all_fcfs);
+	vlan->eth.h_proto = htons(ETH_P_FIP);
+
+	vlan->fip.fip_ver = FIP_VER_ENCAPS(FIP_VER);
+	vlan->fip.fip_op = htons(FIP_OP_VLAN);
+	vlan->fip.fip_subcode = FIP_SC_VL_REQ;
+	vlan->fip.fip_dl_len = htons(sizeof(vlan->desc) / FIP_BPW);
+
+	vlan->desc.mac.fd_desc.fip_dtype = FIP_DT_MAC;
+	vlan->desc.mac.fd_desc.fip_dlen = sizeof(vlan->desc.mac) / FIP_BPW;
+	ether_addr_copy(vlan->desc.mac.fd_mac, qedf->mac);
+
+	vlan->desc.wwnn.fd_desc.fip_dtype = FIP_DT_NAME;
+	vlan->desc.wwnn.fd_desc.fip_dlen = sizeof(vlan->desc.wwnn) / FIP_BPW;
+	put_unaligned_be64(qedf->lport->wwnn, &vlan->desc.wwnn.fd_wwn);
+
+	skb_put(skb, sizeof(*vlan));
+	skb->protocol = htons(ETH_P_FIP);
+	skb_reset_mac_header(skb);
+	skb_reset_network_header(skb);
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC, "Sending FIP VLAN "
+		   "request.");
+
+	if (atomic_read(&qedf->link_state) != QEDF_LINK_UP) {
+		QEDF_WARN(&(qedf->dbg_ctx), "Cannot send vlan request "
+		    "because link is not up.\n");
+
+		kfree_skb(skb);
+		return;
+	}
+	qed_ops->ll2->start_xmit(qedf->cdev, skb);
+}
+
+static void qedf_fcoe_process_vlan_resp(struct qedf_ctx *qedf,
+	struct sk_buff *skb)
+{
+	struct fip_header *fiph;
+	struct fip_desc *desc;
+	u16 vid = 0;
+	ssize_t rlen;
+	size_t dlen;
+
+	fiph = (struct fip_header *)(((void *)skb->data) + 2 * ETH_ALEN + 2);
+
+	rlen = ntohs(fiph->fip_dl_len) * 4;
+	desc = (struct fip_desc *)(fiph + 1);
+	while (rlen > 0) {
+		dlen = desc->fip_dlen * FIP_BPW;
+		switch (desc->fip_dtype) {
+		case FIP_DT_VLAN:
+			vid = ntohs(((struct fip_vlan_desc *)desc)->fd_vlan);
+			break;
+		}
+		desc = (struct fip_desc *)((char *)desc + dlen);
+		rlen -= dlen;
+	}
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC, "VLAN response, "
+		   "vid=0x%x.\n", vid);
+
+	if (vid > 0 && qedf->vlan_id != vid) {
+		qedf_set_vlan_id(qedf, vid);
+
+		/* Inform waiter that it's ok to call fcoe_ctlr_link up() */
+		complete(&qedf->fipvlan_compl);
+	}
+}
+
+void qedf_fip_send(struct fcoe_ctlr *fip, struct sk_buff *skb)
+{
+	struct qedf_ctx *qedf = container_of(fip, struct qedf_ctx, ctlr);
+	struct ethhdr *eth_hdr;
+	struct vlan_ethhdr *vlan_hdr;
+	struct fip_header *fiph;
+	u16 op, vlan_tci = 0;
+	u8 sub;
+
+	if (!test_bit(QEDF_LL2_STARTED, &qedf->flags)) {
+		QEDF_WARN(&(qedf->dbg_ctx), "LL2 not started\n");
+		kfree_skb(skb);
+		return;
+	}
+
+	fiph = (struct fip_header *) ((void *)skb->data + 2 * ETH_ALEN + 2);
+	eth_hdr = (struct ethhdr *)skb_mac_header(skb);
+	op = ntohs(fiph->fip_op);
+	sub = fiph->fip_subcode;
+
+	if (!qedf->vlan_hw_insert) {
+		vlan_hdr = (struct vlan_ethhdr *)skb_push(skb, sizeof(*vlan_hdr)
+		    - sizeof(*eth_hdr));
+		memcpy(vlan_hdr, eth_hdr, 2 * ETH_ALEN);
+		vlan_hdr->h_vlan_proto = htons(ETH_P_8021Q);
+		vlan_hdr->h_vlan_encapsulated_proto = eth_hdr->h_proto;
+		vlan_hdr->h_vlan_TCI = vlan_tci =  htons(qedf->vlan_id);
+	}
+
+	/* Update eth_hdr since we added a VLAN tag */
+	eth_hdr = (struct ethhdr *)skb_mac_header(skb);
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_LL2, "FIP frame send: "
+	    "dest=%pM op=%x sub=%x vlan=%04x.", eth_hdr->h_dest, op, sub,
+	    ntohs(vlan_tci));
+	if (qedf_dump_frames)
+		print_hex_dump(KERN_WARNING, "fip ", DUMP_PREFIX_OFFSET, 16, 1,
+		    skb->data, skb->len, false);
+
+	qed_ops->ll2->start_xmit(qedf->cdev, skb);
+}
+
+/* Process incoming FIP frames. */
+void qedf_fip_recv(struct qedf_ctx *qedf, struct sk_buff *skb)
+{
+	struct ethhdr *eth_hdr;
+	struct fip_header *fiph;
+	struct fip_desc *desc;
+	struct fip_mac_desc *mp;
+	struct fip_wwn_desc *wp;
+	struct fip_vn_desc *vp;
+	size_t rlen, dlen;
+	uint32_t cvl_port_id;
+	__u8 cvl_mac[ETH_ALEN];
+	u16 op;
+	u8 sub;
+
+	eth_hdr = (struct ethhdr *)skb_mac_header(skb);
+	fiph = (struct fip_header *) ((void *)skb->data + 2 * ETH_ALEN + 2);
+	op = ntohs(fiph->fip_op);
+	sub = fiph->fip_subcode;
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_LL2, "FIP frame received: "
+	    "skb=%p fiph=%p source=%pM op=%x sub=%x", skb, fiph,
+	    eth_hdr->h_source, op, sub);
+	if (qedf_dump_frames)
+		print_hex_dump(KERN_WARNING, "fip ", DUMP_PREFIX_OFFSET, 16, 1,
+		    skb->data, skb->len, false);
+
+	/* Handle FIP VLAN resp in the driver */
+	if (op == FIP_OP_VLAN && sub == FIP_SC_VL_NOTE) {
+		qedf_fcoe_process_vlan_resp(qedf, skb);
+		qedf->vlan_hw_insert = 0;
+		kfree_skb(skb);
+	} else if (op == FIP_OP_CTRL && sub == FIP_SC_CLR_VLINK) {
+		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC, "Clear virtual "
+			   "link received.\n");
+
+		/* Check that an FCF has been selected by fcoe */
+		if (qedf->ctlr.sel_fcf == NULL) {
+			QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC,
+			    "Dropping CVL since FCF has not been selected "
+			    "yet.");
+			return;
+		}
+
+		cvl_port_id = 0;
+		memset(cvl_mac, 0, ETH_ALEN);
+		/*
+		 * We need to loop through the CVL descriptors to determine
+		 * if we want to reset the fcoe link
+		 */
+		rlen = ntohs(fiph->fip_dl_len) * FIP_BPW;
+		desc = (struct fip_desc *)(fiph + 1);
+		while (rlen >= sizeof(*desc)) {
+			dlen = desc->fip_dlen * FIP_BPW;
+			switch (desc->fip_dtype) {
+			case FIP_DT_MAC:
+				mp = (struct fip_mac_desc *)desc;
+				QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_LL2,
+				    "fd_mac=%pM.\n", __func__, mp->fd_mac);
+				ether_addr_copy(cvl_mac, mp->fd_mac);
+				break;
+			case FIP_DT_NAME:
+				wp = (struct fip_wwn_desc *)desc;
+				QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_LL2,
+				    "fc_wwpn=%016llx.\n",
+				    get_unaligned_be64(&wp->fd_wwn));
+				break;
+			case FIP_DT_VN_ID:
+				vp = (struct fip_vn_desc *)desc;
+				QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_LL2,
+				    "fd_fc_id=%x.\n", ntoh24(vp->fd_fc_id));
+				cvl_port_id = ntoh24(vp->fd_fc_id);
+				break;
+			default:
+				/* Ignore anything else */
+				break;
+			}
+			desc = (struct fip_desc *)((char *)desc + dlen);
+			rlen -= dlen;
+		}
+
+		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_LL2,
+		    "cvl_port_id=%06x cvl_mac=%pM.\n", cvl_port_id,
+		    cvl_mac);
+		if (cvl_port_id == qedf->lport->port_id &&
+		    ether_addr_equal(cvl_mac,
+		    qedf->ctlr.sel_fcf->fcf_mac)) {
+			fcoe_ctlr_link_down(&qedf->ctlr);
+			qedf_wait_for_upload(qedf);
+			fcoe_ctlr_link_up(&qedf->ctlr);
+		}
+		kfree_skb(skb);
+	} else {
+		/* Everything else is handled by libfcoe */
+		__skb_pull(skb, ETH_HLEN);
+		fcoe_ctlr_recv(&qedf->ctlr, skb);
+	}
+}
+
+void qedf_update_src_mac(struct fc_lport *lport, u8 *addr)
+{
+	struct qedf_ctx *qedf = lport_priv(lport);
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC,
+	    "Setting data_src_addr=%pM.\n", addr);
+	ether_addr_copy(qedf->data_src_addr, addr);
+}
+
+u8 *qedf_get_src_mac(struct fc_lport *lport)
+{
+	u8 mac[ETH_ALEN];
+	u8 port_id[3];
+	struct qedf_ctx *qedf = lport_priv(lport);
+
+	/* We need to use the lport port_id to create the data_src_addr */
+	if (is_zero_ether_addr(qedf->data_src_addr)) {
+		hton24(port_id, lport->port_id);
+		fc_fcoe_set_mac(mac, port_id);
+		qedf->ctlr.update_mac(lport, mac);
+	}
+	return qedf->data_src_addr;
+}
diff --git a/drivers/scsi/qedf/qedf_hsi.h b/drivers/scsi/qedf/qedf_hsi.h
new file mode 100644
index 00000000000000..dfd65dec287498
--- /dev/null
+++ b/drivers/scsi/qedf/qedf_hsi.h
@@ -0,0 +1,422 @@
+/*
+ *  QLogic FCoE Offload Driver
+ *  Copyright (c) 2016 Cavium Inc.
+ *
+ *  This software is available under the terms of the GNU General Public License
+ *  (GPL) Version 2, available from the file COPYING in the main directory of
+ *  this source tree.
+ */
+#ifndef __QEDF_HSI__
+#define __QEDF_HSI__
+/*
+ * Add include to common target
+ */
+#include <linux/qed/common_hsi.h>
+
+/*
+ * Add include to common storage target
+ */
+#include <linux/qed/storage_common.h>
+
+/*
+ * Add include to common fcoe target for both eCore and protocol driver
+ */
+#include <linux/qed/fcoe_common.h>
+
+
+/*
+ * FCoE CQ element ABTS information
+ */
+struct fcoe_abts_info {
+	u8 r_ctl /* R_CTL in the ABTS response frame */;
+	u8 reserved0;
+	__le16 rx_id;
+	__le32 reserved2[2];
+	__le32 fc_payload[3] /* ABTS FC payload response frame */;
+};
+
+
+/*
+ * FCoE class type
+ */
+enum fcoe_class_type {
+	FCOE_TASK_CLASS_TYPE_3,
+	FCOE_TASK_CLASS_TYPE_2,
+	MAX_FCOE_CLASS_TYPE
+};
+
+
+/*
+ * FCoE CMDQ element control information
+ */
+struct fcoe_cmdqe_control {
+	__le16 conn_id;
+	u8 num_additional_cmdqes;
+	u8 cmdType;
+	/* true for ABTS request cmdqe. used in Target mode */
+#define FCOE_CMDQE_CONTROL_ABTSREQCMD_MASK  0x1
+#define FCOE_CMDQE_CONTROL_ABTSREQCMD_SHIFT 0
+#define FCOE_CMDQE_CONTROL_RESERVED1_MASK   0x7F
+#define FCOE_CMDQE_CONTROL_RESERVED1_SHIFT  1
+	u8 reserved2[4];
+};
+
+/*
+ * FCoE control + payload CMDQ element
+ */
+struct fcoe_cmdqe {
+	struct fcoe_cmdqe_control hdr;
+	u8 fc_header[24];
+	__le32 fcp_cmd_payload[8];
+};
+
+
+
+/*
+ * FCP RSP flags
+ */
+struct fcoe_fcp_rsp_flags {
+	u8 flags;
+#define FCOE_FCP_RSP_FLAGS_FCP_RSP_LEN_VALID_MASK  0x1
+#define FCOE_FCP_RSP_FLAGS_FCP_RSP_LEN_VALID_SHIFT 0
+#define FCOE_FCP_RSP_FLAGS_FCP_SNS_LEN_VALID_MASK  0x1
+#define FCOE_FCP_RSP_FLAGS_FCP_SNS_LEN_VALID_SHIFT 1
+#define FCOE_FCP_RSP_FLAGS_FCP_RESID_OVER_MASK     0x1
+#define FCOE_FCP_RSP_FLAGS_FCP_RESID_OVER_SHIFT    2
+#define FCOE_FCP_RSP_FLAGS_FCP_RESID_UNDER_MASK    0x1
+#define FCOE_FCP_RSP_FLAGS_FCP_RESID_UNDER_SHIFT   3
+#define FCOE_FCP_RSP_FLAGS_FCP_CONF_REQ_MASK       0x1
+#define FCOE_FCP_RSP_FLAGS_FCP_CONF_REQ_SHIFT      4
+#define FCOE_FCP_RSP_FLAGS_FCP_BIDI_FLAGS_MASK     0x7
+#define FCOE_FCP_RSP_FLAGS_FCP_BIDI_FLAGS_SHIFT    5
+};
+
+/*
+ * FCoE CQ element response information
+ */
+struct fcoe_cqe_rsp_info {
+	struct fcoe_fcp_rsp_flags rsp_flags;
+	u8 scsi_status_code;
+	__le16 retry_delay_timer;
+	__le32 fcp_resid;
+	__le32 fcp_sns_len;
+	__le32 fcp_rsp_len;
+	__le16 rx_id;
+	u8 fw_error_flags;
+#define FCOE_CQE_RSP_INFO_FW_UNDERRUN_MASK  0x1 /* FW detected underrun */
+#define FCOE_CQE_RSP_INFO_FW_UNDERRUN_SHIFT 0
+#define FCOE_CQE_RSP_INFO_RESREVED_MASK     0x7F
+#define FCOE_CQE_RSP_INFO_RESREVED_SHIFT    1
+	u8 reserved;
+	__le32 fw_residual /* Residual bytes calculated by FW */;
+};
+
+/*
+ * FCoE CQ element Target completion information
+ */
+struct fcoe_cqe_target_info {
+	__le16 rx_id;
+	__le16 reserved0;
+	__le32 reserved1[5];
+};
+
+/*
+ * FCoE error/warning reporting entry
+ */
+struct fcoe_err_report_entry {
+	__le32 err_warn_bitmap_lo /* Error bitmap lower 32 bits */;
+	__le32 err_warn_bitmap_hi /* Error bitmap higher 32 bits */;
+	/* Buffer offset the beginning of the Sequence last transmitted */
+	__le32 tx_buf_off;
+	/* Buffer offset from the beginning of the Sequence last received */
+	__le32 rx_buf_off;
+	__le16 rx_id /* RX_ID of the associated task */;
+	__le16 reserved1;
+	__le32 reserved2;
+};
+
+/*
+ * FCoE CQ element middle path information
+ */
+struct fcoe_cqe_midpath_info {
+	__le32 data_placement_size;
+	__le16 rx_id;
+	__le16 reserved0;
+	__le32 reserved1[4];
+};
+
+/*
+ * FCoE CQ element unsolicited information
+ */
+struct fcoe_unsolic_info {
+	/* BD information: Physical address and opaque data */
+	struct scsi_bd bd_info;
+	__le16 conn_id /* Connection ID the frame is associated to */;
+	__le16 pkt_len /* Packet length */;
+	u8 reserved1[4];
+};
+
+/*
+ * FCoE warning reporting entry
+ */
+struct fcoe_warning_report_entry {
+	/* BD information: Physical address and opaque data */
+	struct scsi_bd bd_info;
+	/* Buffer offset the beginning of the Sequence last transmitted */
+	__le32 buf_off;
+	__le16 rx_id /* RX_ID of the associated task */;
+	__le16 reserved1;
+};
+
+/*
+ * FCoE CQ element information
+ */
+union fcoe_cqe_info {
+	struct fcoe_cqe_rsp_info rsp_info /* Response completion information */;
+	/* Target completion information */
+	struct fcoe_cqe_target_info target_info;
+	/* Error completion information */
+	struct fcoe_err_report_entry err_info;
+	struct fcoe_abts_info abts_info /* ABTS completion information */;
+	/* Middle path completion information */
+	struct fcoe_cqe_midpath_info midpath_info;
+	/* Unsolicited packet completion information */
+	struct fcoe_unsolic_info unsolic_info;
+	/* Warning completion information (Rec Tov expiration) */
+	struct fcoe_warning_report_entry warn_info;
+};
+
+/*
+ * FCoE CQ element
+ */
+struct fcoe_cqe {
+	__le32 cqe_data;
+	/* The task identifier (OX_ID) to be completed */
+#define FCOE_CQE_TASK_ID_MASK    0xFFFF
+#define FCOE_CQE_TASK_ID_SHIFT   0
+	/*
+	 * The CQE type: 0x0 Indicating on a pending work request completion.
+	 * 0x1 - Indicating on an unsolicited event notification. use enum
+	 * fcoe_cqe_type  (use enum fcoe_cqe_type)
+	 */
+#define FCOE_CQE_CQE_TYPE_MASK   0xF
+#define FCOE_CQE_CQE_TYPE_SHIFT  16
+#define FCOE_CQE_RESERVED0_MASK  0xFFF
+#define FCOE_CQE_RESERVED0_SHIFT 20
+	__le16 reserved1;
+	__le16 fw_cq_prod;
+	union fcoe_cqe_info cqe_info;
+};
+
+/*
+ * FCoE CQE type
+ */
+enum fcoe_cqe_type {
+	/* solicited response on a R/W or middle-path SQE */
+	FCOE_GOOD_COMPLETION_CQE_TYPE,
+	FCOE_UNSOLIC_CQE_TYPE /* unsolicited packet, RQ consumed */,
+	FCOE_ERROR_DETECTION_CQE_TYPE /* timer expiration, validation error */,
+	FCOE_WARNING_CQE_TYPE /* rec_tov or rr_tov timer expiration */,
+	FCOE_EXCH_CLEANUP_CQE_TYPE /* task cleanup completed */,
+	FCOE_ABTS_CQE_TYPE /* ABTS received and task cleaned */,
+	FCOE_DUMMY_CQE_TYPE /* just increment SQ CONS */,
+	/* Task was completed wight after sending a pkt to the target */
+	FCOE_LOCAL_COMP_CQE_TYPE,
+	MAX_FCOE_CQE_TYPE
+};
+
+
+/*
+ * FCoE device type
+ */
+enum fcoe_device_type {
+	FCOE_TASK_DEV_TYPE_DISK,
+	FCOE_TASK_DEV_TYPE_TAPE,
+	MAX_FCOE_DEVICE_TYPE
+};
+
+
+
+
+/*
+ * FCoE fast path error codes
+ */
+enum fcoe_fp_error_warning_code {
+	FCOE_ERROR_CODE_XFER_OOO_RO /* XFER error codes */,
+	FCOE_ERROR_CODE_XFER_RO_NOT_ALIGNED,
+	FCOE_ERROR_CODE_XFER_NULL_BURST_LEN,
+	FCOE_ERROR_CODE_XFER_RO_GREATER_THAN_DATA2TRNS,
+	FCOE_ERROR_CODE_XFER_INVALID_PAYLOAD_SIZE,
+	FCOE_ERROR_CODE_XFER_TASK_TYPE_NOT_WRITE,
+	FCOE_ERROR_CODE_XFER_PEND_XFER_SET,
+	FCOE_ERROR_CODE_XFER_OPENED_SEQ,
+	FCOE_ERROR_CODE_XFER_FCTL,
+	FCOE_ERROR_CODE_FCP_RSP_BIDI_FLAGS_SET /* FCP RSP error codes */,
+	FCOE_ERROR_CODE_FCP_RSP_INVALID_LENGTH_FIELD,
+	FCOE_ERROR_CODE_FCP_RSP_INVALID_SNS_FIELD,
+	FCOE_ERROR_CODE_FCP_RSP_INVALID_PAYLOAD_SIZE,
+	FCOE_ERROR_CODE_FCP_RSP_PEND_XFER_SET,
+	FCOE_ERROR_CODE_FCP_RSP_OPENED_SEQ,
+	FCOE_ERROR_CODE_FCP_RSP_FCTL,
+	FCOE_ERROR_CODE_FCP_RSP_LAST_SEQ_RESET,
+	FCOE_ERROR_CODE_FCP_RSP_CONF_REQ_NOT_SUPPORTED_YET,
+	FCOE_ERROR_CODE_DATA_OOO_RO /* FCP DATA error codes */,
+	FCOE_ERROR_CODE_DATA_EXCEEDS_DEFINED_MAX_FRAME_SIZE,
+	FCOE_ERROR_CODE_DATA_EXCEEDS_DATA2TRNS,
+	FCOE_ERROR_CODE_DATA_SOFI3_SEQ_ACTIVE_SET,
+	FCOE_ERROR_CODE_DATA_SOFN_SEQ_ACTIVE_RESET,
+	FCOE_ERROR_CODE_DATA_EOFN_END_SEQ_SET,
+	FCOE_ERROR_CODE_DATA_EOFT_END_SEQ_RESET,
+	FCOE_ERROR_CODE_DATA_TASK_TYPE_NOT_READ,
+	FCOE_ERROR_CODE_DATA_FCTL_INITIATIR,
+	FCOE_ERROR_CODE_MIDPATH_INVALID_TYPE /* Middle path error codes */,
+	FCOE_ERROR_CODE_MIDPATH_SOFI3_SEQ_ACTIVE_SET,
+	FCOE_ERROR_CODE_MIDPATH_SOFN_SEQ_ACTIVE_RESET,
+	FCOE_ERROR_CODE_MIDPATH_EOFN_END_SEQ_SET,
+	FCOE_ERROR_CODE_MIDPATH_EOFT_END_SEQ_RESET,
+	FCOE_ERROR_CODE_MIDPATH_REPLY_FCTL,
+	FCOE_ERROR_CODE_MIDPATH_INVALID_REPLY,
+	FCOE_ERROR_CODE_MIDPATH_ELS_REPLY_RCTL,
+	FCOE_ERROR_CODE_COMMON_MIDDLE_FRAME_WITH_PAD /* Common error codes */,
+	FCOE_ERROR_CODE_COMMON_SEQ_INIT_IN_TCE,
+	FCOE_ERROR_CODE_COMMON_FC_HDR_RX_ID_MISMATCH,
+	FCOE_ERROR_CODE_COMMON_INCORRECT_SEQ_CNT,
+	FCOE_ERROR_CODE_COMMON_DATA_FC_HDR_FCP_TYPE_MISMATCH,
+	FCOE_ERROR_CODE_COMMON_DATA_NO_MORE_SGES,
+	FCOE_ERROR_CODE_COMMON_OPTIONAL_FC_HDR,
+	FCOE_ERROR_CODE_COMMON_READ_TCE_OX_ID_TOO_BIG,
+	FCOE_ERROR_CODE_COMMON_DATA_WAS_NOT_TRANSMITTED,
+	FCOE_ERROR_CODE_COMMON_TASK_DDF_RCTL_INFO_FIELD,
+	FCOE_ERROR_CODE_COMMON_TASK_INVALID_RCTL,
+	FCOE_ERROR_CODE_COMMON_TASK_RCTL_GENERAL_MISMATCH,
+	FCOE_ERROR_CODE_E_D_TOV_TIMER_EXPIRATION /* Timer error codes */,
+	FCOE_WARNING_CODE_REC_TOV_TIMER_EXPIRATION /* Timer error codes */,
+	FCOE_ERROR_CODE_RR_TOV_TIMER_EXPIRATION /* Timer error codes */,
+	/* ABTSrsp pckt arrived unexpected */
+	FCOE_ERROR_CODE_ABTS_REPLY_UNEXPECTED,
+	FCOE_ERROR_CODE_TARGET_MODE_FCP_RSP,
+	FCOE_ERROR_CODE_TARGET_MODE_FCP_XFER,
+	FCOE_ERROR_CODE_TARGET_MODE_DATA_TASK_TYPE_NOT_WRITE,
+	FCOE_ERROR_CODE_DATA_FCTL_TARGET,
+	FCOE_ERROR_CODE_TARGET_DATA_SIZE_NO_MATCH_XFER,
+	FCOE_ERROR_CODE_TARGET_DIF_CRC_CHECKSUM_ERROR,
+	FCOE_ERROR_CODE_TARGET_DIF_REF_TAG_ERROR,
+	FCOE_ERROR_CODE_TARGET_DIF_APP_TAG_ERROR,
+	MAX_FCOE_FP_ERROR_WARNING_CODE
+};
+
+
+/*
+ * FCoE RESPQ element
+ */
+struct fcoe_respqe {
+	__le16 ox_id /* OX_ID that is located in the FCP_RSP FC header */;
+	__le16 rx_id /* RX_ID that is located in the FCP_RSP FC header */;
+	__le32 additional_info;
+/* PARAM that is located in the FCP_RSP FC header */
+#define FCOE_RESPQE_PARAM_MASK            0xFFFFFF
+#define FCOE_RESPQE_PARAM_SHIFT           0
+/* Indication whther its Target-auto-rsp mode or not */
+#define FCOE_RESPQE_TARGET_AUTO_RSP_MASK  0xFF
+#define FCOE_RESPQE_TARGET_AUTO_RSP_SHIFT 24
+};
+
+
+/*
+ * FCoE slow path error codes
+ */
+enum fcoe_sp_error_code {
+	/* Error codes for Error Reporting in slow path flows */
+	FCOE_ERROR_CODE_SLOW_PATH_TOO_MANY_FUNCS,
+	FCOE_ERROR_SLOW_PATH_CODE_NO_LICENSE,
+	MAX_FCOE_SP_ERROR_CODE
+};
+
+
+/*
+ * FCoE SQE request type
+ */
+enum fcoe_sqe_request_type {
+	SEND_FCOE_CMD,
+	SEND_FCOE_MIDPATH,
+	SEND_FCOE_ABTS_REQUEST,
+	FCOE_EXCHANGE_CLEANUP,
+	FCOE_SEQUENCE_RECOVERY,
+	SEND_FCOE_XFER_RDY,
+	SEND_FCOE_RSP,
+	SEND_FCOE_RSP_WITH_SENSE_DATA,
+	SEND_FCOE_TARGET_DATA,
+	SEND_FCOE_INITIATOR_DATA,
+	/*
+	 * Xfer Continuation (==1) ready to be sent. Previous XFERs data
+	 * received successfully.
+	 */
+	SEND_FCOE_XFER_CONTINUATION_RDY,
+	SEND_FCOE_TARGET_ABTS_RSP,
+	MAX_FCOE_SQE_REQUEST_TYPE
+};
+
+
+/*
+ * FCoE task TX state
+ */
+enum fcoe_task_tx_state {
+	/* Initiate state after driver has initialized the task */
+	FCOE_TASK_TX_STATE_NORMAL,
+	/* Updated by TX path after complete transmitting unsolicited packet */
+	FCOE_TASK_TX_STATE_UNSOLICITED_COMPLETED,
+	/*
+	 * Updated by TX path after start processing the task requesting the
+	 * cleanup/abort operation
+	 */
+	FCOE_TASK_TX_STATE_CLEAN_REQ,
+	FCOE_TASK_TX_STATE_ABTS /* Updated by TX path during abort procedure */,
+	/* Updated by TX path during exchange cleanup procedure */
+	FCOE_TASK_TX_STATE_EXCLEANUP,
+	/*
+	 * Updated by TX path during exchange cleanup continuation task
+	 * procedure
+	 */
+	FCOE_TASK_TX_STATE_EXCLEANUP_TARGET_WRITE_CONT,
+	/* Updated by TX path during exchange cleanup first xfer procedure */
+	FCOE_TASK_TX_STATE_EXCLEANUP_TARGET_WRITE,
+	/* Updated by TX path during exchange cleanup read task in Target */
+	FCOE_TASK_TX_STATE_EXCLEANUP_TARGET_READ_OR_RSP,
+	/* Updated by TX path during target exchange cleanup procedure */
+	FCOE_TASK_TX_STATE_EXCLEANUP_TARGET_WRITE_LAST_CYCLE,
+	/* Updated by TX path during sequence recovery procedure */
+	FCOE_TASK_TX_STATE_SEQRECOVERY,
+	MAX_FCOE_TASK_TX_STATE
+};
+
+
+/*
+ * FCoE task type
+ */
+enum fcoe_task_type {
+	FCOE_TASK_TYPE_WRITE_INITIATOR,
+	FCOE_TASK_TYPE_READ_INITIATOR,
+	FCOE_TASK_TYPE_MIDPATH,
+	FCOE_TASK_TYPE_UNSOLICITED,
+	FCOE_TASK_TYPE_ABTS,
+	FCOE_TASK_TYPE_EXCHANGE_CLEANUP,
+	FCOE_TASK_TYPE_SEQUENCE_CLEANUP,
+	FCOE_TASK_TYPE_WRITE_TARGET,
+	FCOE_TASK_TYPE_READ_TARGET,
+	FCOE_TASK_TYPE_RSP,
+	FCOE_TASK_TYPE_RSP_SENSE_DATA,
+	FCOE_TASK_TYPE_ABTS_TARGET,
+	FCOE_TASK_TYPE_ENUM_SIZE,
+	MAX_FCOE_TASK_TYPE
+};
+
+struct scsi_glbl_queue_entry {
+	/* Start physical address for the RQ (receive queue) PBL. */
+	struct regpair rq_pbl_addr;
+	/* Start physical address for the CQ (completion queue) PBL. */
+	struct regpair cq_pbl_addr;
+	/* Start physical address for the CMDQ (command queue) PBL. */
+	struct regpair cmdq_pbl_addr;
+};
+
+#endif /* __QEDF_HSI__ */
diff --git a/drivers/scsi/qedf/qedf_io.c b/drivers/scsi/qedf/qedf_io.c
new file mode 100644
index 00000000000000..486c045ac8bb25
--- /dev/null
+++ b/drivers/scsi/qedf/qedf_io.c
@@ -0,0 +1,2282 @@
+/*
+ *  QLogic FCoE Offload Driver
+ *  Copyright (c) 2016 Cavium Inc.
+ *
+ *  This software is available under the terms of the GNU General Public License
+ *  (GPL) Version 2, available from the file COPYING in the main directory of
+ *  this source tree.
+ */
+#include <linux/spinlock.h>
+#include <linux/vmalloc.h>
+#include "qedf.h"
+#include <scsi/scsi_tcq.h>
+
+void qedf_cmd_timer_set(struct qedf_ctx *qedf, struct qedf_ioreq *io_req,
+	unsigned int timer_msec)
+{
+	queue_delayed_work(qedf->timer_work_queue, &io_req->timeout_work,
+	    msecs_to_jiffies(timer_msec));
+}
+
+static void qedf_cmd_timeout(struct work_struct *work)
+{
+
+	struct qedf_ioreq *io_req =
+	    container_of(work, struct qedf_ioreq, timeout_work.work);
+	struct qedf_ctx *qedf = io_req->fcport->qedf;
+	struct qedf_rport *fcport = io_req->fcport;
+	u8 op = 0;
+
+	switch (io_req->cmd_type) {
+	case QEDF_ABTS:
+		QEDF_ERR((&qedf->dbg_ctx), "ABTS timeout, xid=0x%x.\n",
+		    io_req->xid);
+		/* Cleanup timed out ABTS */
+		qedf_initiate_cleanup(io_req, true);
+		complete(&io_req->abts_done);
+
+		/*
+		 * Need to call kref_put for reference taken when initiate_abts
+		 * was called since abts_compl won't be called now that we've
+		 * cleaned up the task.
+		 */
+		kref_put(&io_req->refcount, qedf_release_cmd);
+
+		/*
+		 * Now that the original I/O and the ABTS are complete see
+		 * if we need to reconnect to the target.
+		 */
+		qedf_restart_rport(fcport);
+		break;
+	case QEDF_ELS:
+		kref_get(&io_req->refcount);
+		/*
+		 * Don't attempt to clean an ELS timeout as any subseqeunt
+		 * ABTS or cleanup requests just hang.  For now just free
+		 * the resources of the original I/O and the RRQ
+		 */
+		QEDF_ERR(&(qedf->dbg_ctx), "ELS timeout, xid=0x%x.\n",
+			  io_req->xid);
+		io_req->event = QEDF_IOREQ_EV_ELS_TMO;
+		/* Call callback function to complete command */
+		if (io_req->cb_func && io_req->cb_arg) {
+			op = io_req->cb_arg->op;
+			io_req->cb_func(io_req->cb_arg);
+			io_req->cb_arg = NULL;
+		}
+		qedf_initiate_cleanup(io_req, true);
+		kref_put(&io_req->refcount, qedf_release_cmd);
+		break;
+	case QEDF_SEQ_CLEANUP:
+		QEDF_ERR(&(qedf->dbg_ctx), "Sequence cleanup timeout, "
+		    "xid=0x%x.\n", io_req->xid);
+		qedf_initiate_cleanup(io_req, true);
+		io_req->event = QEDF_IOREQ_EV_ELS_TMO;
+		qedf_process_seq_cleanup_compl(qedf, NULL, io_req);
+		break;
+	default:
+		break;
+	}
+}
+
+void qedf_cmd_mgr_free(struct qedf_cmd_mgr *cmgr)
+{
+	struct io_bdt *bdt_info;
+	struct qedf_ctx *qedf = cmgr->qedf;
+	size_t bd_tbl_sz;
+	u16 min_xid = QEDF_MIN_XID;
+	u16 max_xid = (FCOE_PARAMS_NUM_TASKS - 1);
+	int num_ios;
+	int i;
+	struct qedf_ioreq *io_req;
+
+	num_ios = max_xid - min_xid + 1;
+
+	/* Free fcoe_bdt_ctx structures */
+	if (!cmgr->io_bdt_pool)
+		goto free_cmd_pool;
+
+	bd_tbl_sz = QEDF_MAX_BDS_PER_CMD * sizeof(struct fcoe_sge);
+	for (i = 0; i < num_ios; i++) {
+		bdt_info = cmgr->io_bdt_pool[i];
+		if (bdt_info->bd_tbl) {
+			dma_free_coherent(&qedf->pdev->dev, bd_tbl_sz,
+			    bdt_info->bd_tbl, bdt_info->bd_tbl_dma);
+			bdt_info->bd_tbl = NULL;
+		}
+	}
+
+	/* Destroy io_bdt pool */
+	for (i = 0; i < num_ios; i++) {
+		kfree(cmgr->io_bdt_pool[i]);
+		cmgr->io_bdt_pool[i] = NULL;
+	}
+
+	kfree(cmgr->io_bdt_pool);
+	cmgr->io_bdt_pool = NULL;
+
+free_cmd_pool:
+
+	for (i = 0; i < num_ios; i++) {
+		io_req = &cmgr->cmds[i];
+		/* Make sure we free per command sense buffer */
+		if (io_req->sense_buffer)
+			dma_free_coherent(&qedf->pdev->dev,
+			    QEDF_SCSI_SENSE_BUFFERSIZE, io_req->sense_buffer,
+			    io_req->sense_buffer_dma);
+		cancel_delayed_work_sync(&io_req->rrq_work);
+	}
+
+	/* Free command manager itself */
+	vfree(cmgr);
+}
+
+static void qedf_handle_rrq(struct work_struct *work)
+{
+	struct qedf_ioreq *io_req =
+	    container_of(work, struct qedf_ioreq, rrq_work.work);
+
+	qedf_send_rrq(io_req);
+
+}
+
+struct qedf_cmd_mgr *qedf_cmd_mgr_alloc(struct qedf_ctx *qedf)
+{
+	struct qedf_cmd_mgr *cmgr;
+	struct io_bdt *bdt_info;
+	struct qedf_ioreq *io_req;
+	u16 xid;
+	int i;
+	int num_ios;
+	u16 min_xid = QEDF_MIN_XID;
+	u16 max_xid = (FCOE_PARAMS_NUM_TASKS - 1);
+
+	/* Make sure num_queues is already set before calling this function */
+	if (!qedf->num_queues) {
+		QEDF_ERR(&(qedf->dbg_ctx), "num_queues is not set.\n");
+		return NULL;
+	}
+
+	if (max_xid <= min_xid || max_xid == FC_XID_UNKNOWN) {
+		QEDF_WARN(&(qedf->dbg_ctx), "Invalid min_xid 0x%x and "
+			   "max_xid 0x%x.\n", min_xid, max_xid);
+		return NULL;
+	}
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC, "min xid 0x%x, max xid "
+		   "0x%x.\n", min_xid, max_xid);
+
+	num_ios = max_xid - min_xid + 1;
+
+	cmgr = vzalloc(sizeof(struct qedf_cmd_mgr));
+	if (!cmgr) {
+		QEDF_WARN(&(qedf->dbg_ctx), "Failed to alloc cmd mgr.\n");
+		return NULL;
+	}
+
+	cmgr->qedf = qedf;
+	spin_lock_init(&cmgr->lock);
+
+	/*
+	 * Initialize list of qedf_ioreq.
+	 */
+	xid = QEDF_MIN_XID;
+
+	for (i = 0; i < num_ios; i++) {
+		io_req = &cmgr->cmds[i];
+		INIT_DELAYED_WORK(&io_req->timeout_work, qedf_cmd_timeout);
+
+		io_req->xid = xid++;
+
+		INIT_DELAYED_WORK(&io_req->rrq_work, qedf_handle_rrq);
+
+		/* Allocate DMA memory to hold sense buffer */
+		io_req->sense_buffer = dma_alloc_coherent(&qedf->pdev->dev,
+		    QEDF_SCSI_SENSE_BUFFERSIZE, &io_req->sense_buffer_dma,
+		    GFP_KERNEL);
+		if (!io_req->sense_buffer)
+			goto mem_err;
+	}
+
+	/* Allocate pool of io_bdts - one for each qedf_ioreq */
+	cmgr->io_bdt_pool = kmalloc_array(num_ios, sizeof(struct io_bdt *),
+	    GFP_KERNEL);
+
+	if (!cmgr->io_bdt_pool) {
+		QEDF_WARN(&(qedf->dbg_ctx), "Failed to alloc io_bdt_pool.\n");
+		goto mem_err;
+	}
+
+	for (i = 0; i < num_ios; i++) {
+		cmgr->io_bdt_pool[i] = kmalloc(sizeof(struct io_bdt),
+		    GFP_KERNEL);
+		if (!cmgr->io_bdt_pool[i]) {
+			QEDF_WARN(&(qedf->dbg_ctx), "Failed to alloc "
+				   "io_bdt_pool[%d].\n", i);
+			goto mem_err;
+		}
+	}
+
+	for (i = 0; i < num_ios; i++) {
+		bdt_info = cmgr->io_bdt_pool[i];
+		bdt_info->bd_tbl = dma_alloc_coherent(&qedf->pdev->dev,
+		    QEDF_MAX_BDS_PER_CMD * sizeof(struct fcoe_sge),
+		    &bdt_info->bd_tbl_dma, GFP_KERNEL);
+		if (!bdt_info->bd_tbl) {
+			QEDF_WARN(&(qedf->dbg_ctx), "Failed to alloc "
+				   "bdt_tbl[%d].\n", i);
+			goto mem_err;
+		}
+	}
+	atomic_set(&cmgr->free_list_cnt, num_ios);
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO,
+	    "cmgr->free_list_cnt=%d.\n",
+	    atomic_read(&cmgr->free_list_cnt));
+
+	return cmgr;
+
+mem_err:
+	qedf_cmd_mgr_free(cmgr);
+	return NULL;
+}
+
+struct qedf_ioreq *qedf_alloc_cmd(struct qedf_rport *fcport, u8 cmd_type)
+{
+	struct qedf_ctx *qedf = fcport->qedf;
+	struct qedf_cmd_mgr *cmd_mgr = qedf->cmd_mgr;
+	struct qedf_ioreq *io_req = NULL;
+	struct io_bdt *bd_tbl;
+	u16 xid;
+	uint32_t free_sqes;
+	int i;
+	unsigned long flags;
+
+	free_sqes = atomic_read(&fcport->free_sqes);
+
+	if (!free_sqes) {
+		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO,
+		    "Returning NULL, free_sqes=%d.\n ",
+		    free_sqes);
+		goto out_failed;
+	}
+
+	/* Limit the number of outstanding R/W tasks */
+	if ((atomic_read(&fcport->num_active_ios) >=
+	    NUM_RW_TASKS_PER_CONNECTION)) {
+		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO,
+		    "Returning NULL, num_active_ios=%d.\n",
+		    atomic_read(&fcport->num_active_ios));
+		goto out_failed;
+	}
+
+	/* Limit global TIDs certain tasks */
+	if (atomic_read(&cmd_mgr->free_list_cnt) <= GBL_RSVD_TASKS) {
+		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO,
+		    "Returning NULL, free_list_cnt=%d.\n",
+		    atomic_read(&cmd_mgr->free_list_cnt));
+		goto out_failed;
+	}
+
+	spin_lock_irqsave(&cmd_mgr->lock, flags);
+	for (i = 0; i < FCOE_PARAMS_NUM_TASKS; i++) {
+		io_req = &cmd_mgr->cmds[cmd_mgr->idx];
+		cmd_mgr->idx++;
+		if (cmd_mgr->idx == FCOE_PARAMS_NUM_TASKS)
+			cmd_mgr->idx = 0;
+
+		/* Check to make sure command was previously freed */
+		if (!test_bit(QEDF_CMD_OUTSTANDING, &io_req->flags))
+			break;
+	}
+
+	if (i == FCOE_PARAMS_NUM_TASKS) {
+		spin_unlock_irqrestore(&cmd_mgr->lock, flags);
+		goto out_failed;
+	}
+
+	set_bit(QEDF_CMD_OUTSTANDING, &io_req->flags);
+	spin_unlock_irqrestore(&cmd_mgr->lock, flags);
+
+	atomic_inc(&fcport->num_active_ios);
+	atomic_dec(&fcport->free_sqes);
+	xid = io_req->xid;
+	atomic_dec(&cmd_mgr->free_list_cnt);
+
+	io_req->cmd_mgr = cmd_mgr;
+	io_req->fcport = fcport;
+
+	/* Hold the io_req against deletion */
+	kref_init(&io_req->refcount);
+
+	/* Bind io_bdt for this io_req */
+	/* Have a static link between io_req and io_bdt_pool */
+	bd_tbl = io_req->bd_tbl = cmd_mgr->io_bdt_pool[xid];
+	if (bd_tbl == NULL) {
+		QEDF_ERR(&(qedf->dbg_ctx), "bd_tbl is NULL, xid=%x.\n", xid);
+		kref_put(&io_req->refcount, qedf_release_cmd);
+		goto out_failed;
+	}
+	bd_tbl->io_req = io_req;
+	io_req->cmd_type = cmd_type;
+
+	/* Reset sequence offset data */
+	io_req->rx_buf_off = 0;
+	io_req->tx_buf_off = 0;
+	io_req->rx_id = 0xffff; /* No OX_ID */
+
+	return io_req;
+
+out_failed:
+	/* Record failure for stats and return NULL to caller */
+	qedf->alloc_failures++;
+	return NULL;
+}
+
+static void qedf_free_mp_resc(struct qedf_ioreq *io_req)
+{
+	struct qedf_mp_req *mp_req = &(io_req->mp_req);
+	struct qedf_ctx *qedf = io_req->fcport->qedf;
+	uint64_t sz = sizeof(struct fcoe_sge);
+
+	/* clear tm flags */
+	mp_req->tm_flags = 0;
+	if (mp_req->mp_req_bd) {
+		dma_free_coherent(&qedf->pdev->dev, sz,
+		    mp_req->mp_req_bd, mp_req->mp_req_bd_dma);
+		mp_req->mp_req_bd = NULL;
+	}
+	if (mp_req->mp_resp_bd) {
+		dma_free_coherent(&qedf->pdev->dev, sz,
+		    mp_req->mp_resp_bd, mp_req->mp_resp_bd_dma);
+		mp_req->mp_resp_bd = NULL;
+	}
+	if (mp_req->req_buf) {
+		dma_free_coherent(&qedf->pdev->dev, QEDF_PAGE_SIZE,
+		    mp_req->req_buf, mp_req->req_buf_dma);
+		mp_req->req_buf = NULL;
+	}
+	if (mp_req->resp_buf) {
+		dma_free_coherent(&qedf->pdev->dev, QEDF_PAGE_SIZE,
+		    mp_req->resp_buf, mp_req->resp_buf_dma);
+		mp_req->resp_buf = NULL;
+	}
+}
+
+void qedf_release_cmd(struct kref *ref)
+{
+	struct qedf_ioreq *io_req =
+	    container_of(ref, struct qedf_ioreq, refcount);
+	struct qedf_cmd_mgr *cmd_mgr = io_req->cmd_mgr;
+	struct qedf_rport *fcport = io_req->fcport;
+
+	if (io_req->cmd_type == QEDF_ELS ||
+	    io_req->cmd_type == QEDF_TASK_MGMT_CMD)
+		qedf_free_mp_resc(io_req);
+
+	atomic_inc(&cmd_mgr->free_list_cnt);
+	atomic_dec(&fcport->num_active_ios);
+	if (atomic_read(&fcport->num_active_ios) < 0)
+		QEDF_WARN(&(fcport->qedf->dbg_ctx), "active_ios < 0.\n");
+
+	/* Increment task retry identifier now that the request is released */
+	io_req->task_retry_identifier++;
+
+	clear_bit(QEDF_CMD_OUTSTANDING, &io_req->flags);
+}
+
+static int qedf_split_bd(struct qedf_ioreq *io_req, u64 addr, int sg_len,
+	int bd_index)
+{
+	struct fcoe_sge *bd = io_req->bd_tbl->bd_tbl;
+	int frag_size, sg_frags;
+
+	sg_frags = 0;
+	while (sg_len) {
+		if (sg_len > QEDF_BD_SPLIT_SZ)
+			frag_size = QEDF_BD_SPLIT_SZ;
+		else
+			frag_size = sg_len;
+		bd[bd_index + sg_frags].sge_addr.lo = U64_LO(addr);
+		bd[bd_index + sg_frags].sge_addr.hi = U64_HI(addr);
+		bd[bd_index + sg_frags].size = (uint16_t)frag_size;
+
+		addr += (u64)frag_size;
+		sg_frags++;
+		sg_len -= frag_size;
+	}
+	return sg_frags;
+}
+
+static int qedf_map_sg(struct qedf_ioreq *io_req)
+{
+	struct scsi_cmnd *sc = io_req->sc_cmd;
+	struct Scsi_Host *host = sc->device->host;
+	struct fc_lport *lport = shost_priv(host);
+	struct qedf_ctx *qedf = lport_priv(lport);
+	struct fcoe_sge *bd = io_req->bd_tbl->bd_tbl;
+	struct scatterlist *sg;
+	int byte_count = 0;
+	int sg_count = 0;
+	int bd_count = 0;
+	int sg_frags;
+	unsigned int sg_len;
+	u64 addr, end_addr;
+	int i;
+
+	sg_count = dma_map_sg(&qedf->pdev->dev, scsi_sglist(sc),
+	    scsi_sg_count(sc), sc->sc_data_direction);
+
+	sg = scsi_sglist(sc);
+
+	/*
+	 * New condition to send single SGE as cached-SGL with length less
+	 * than 64k.
+	 */
+	if ((sg_count == 1) && (sg_dma_len(sg) <=
+	    QEDF_MAX_SGLEN_FOR_CACHESGL)) {
+		sg_len = sg_dma_len(sg);
+		addr = (u64)sg_dma_address(sg);
+
+		bd[bd_count].sge_addr.lo = (addr & 0xffffffff);
+		bd[bd_count].sge_addr.hi = (addr >> 32);
+		bd[bd_count].size = (u16)sg_len;
+
+		return ++bd_count;
+	}
+
+	scsi_for_each_sg(sc, sg, sg_count, i) {
+		sg_len = sg_dma_len(sg);
+		addr = (u64)sg_dma_address(sg);
+		end_addr = (u64)(addr + sg_len);
+
+		/*
+		 * First s/g element in the list so check if the end_addr
+		 * is paged aligned. Also check to make sure the length is
+		 * at least page size.
+		 */
+		if ((i == 0) && (sg_count > 1) &&
+		    ((end_addr % QEDF_PAGE_SIZE) ||
+		    sg_len < QEDF_PAGE_SIZE))
+			io_req->use_slowpath = true;
+		/*
+		 * Last s/g element so check if the start address is paged
+		 * aligned.
+		 */
+		else if ((i == (sg_count - 1)) && (sg_count > 1) &&
+		    (addr % QEDF_PAGE_SIZE))
+			io_req->use_slowpath = true;
+		/*
+		 * Intermediate s/g element so check if start and end address
+		 * is page aligned.
+		 */
+		else if ((i != 0) && (i != (sg_count - 1)) &&
+		    ((addr % QEDF_PAGE_SIZE) || (end_addr % QEDF_PAGE_SIZE)))
+			io_req->use_slowpath = true;
+
+		if (sg_len > QEDF_MAX_BD_LEN) {
+			sg_frags = qedf_split_bd(io_req, addr, sg_len,
+			    bd_count);
+		} else {
+			sg_frags = 1;
+			bd[bd_count].sge_addr.lo = U64_LO(addr);
+			bd[bd_count].sge_addr.hi  = U64_HI(addr);
+			bd[bd_count].size = (uint16_t)sg_len;
+		}
+
+		bd_count += sg_frags;
+		byte_count += sg_len;
+	}
+
+	if (byte_count != scsi_bufflen(sc))
+		QEDF_ERR(&(qedf->dbg_ctx), "byte_count = %d != "
+			  "scsi_bufflen = %d, task_id = 0x%x.\n", byte_count,
+			   scsi_bufflen(sc), io_req->xid);
+
+	return bd_count;
+}
+
+static int qedf_build_bd_list_from_sg(struct qedf_ioreq *io_req)
+{
+	struct scsi_cmnd *sc = io_req->sc_cmd;
+	struct fcoe_sge *bd = io_req->bd_tbl->bd_tbl;
+	int bd_count;
+
+	if (scsi_sg_count(sc)) {
+		bd_count = qedf_map_sg(io_req);
+		if (bd_count == 0)
+			return -ENOMEM;
+	} else {
+		bd_count = 0;
+		bd[0].sge_addr.lo = bd[0].sge_addr.hi = 0;
+		bd[0].size = 0;
+	}
+	io_req->bd_tbl->bd_valid = bd_count;
+
+	return 0;
+}
+
+static void qedf_build_fcp_cmnd(struct qedf_ioreq *io_req,
+				  struct fcp_cmnd *fcp_cmnd)
+{
+	struct scsi_cmnd *sc_cmd = io_req->sc_cmd;
+
+	/* fcp_cmnd is 32 bytes */
+	memset(fcp_cmnd, 0, FCP_CMND_LEN);
+
+	/* 8 bytes: SCSI LUN info */
+	int_to_scsilun(sc_cmd->device->lun,
+			(struct scsi_lun *)&fcp_cmnd->fc_lun);
+
+	/* 4 bytes: flag info */
+	fcp_cmnd->fc_pri_ta = 0;
+	fcp_cmnd->fc_tm_flags = io_req->mp_req.tm_flags;
+	fcp_cmnd->fc_flags = io_req->io_req_flags;
+	fcp_cmnd->fc_cmdref = 0;
+
+	/* Populate data direction */
+	if (sc_cmd->sc_data_direction == DMA_TO_DEVICE)
+		fcp_cmnd->fc_flags |= FCP_CFL_WRDATA;
+	else if (sc_cmd->sc_data_direction == DMA_FROM_DEVICE)
+		fcp_cmnd->fc_flags |= FCP_CFL_RDDATA;
+
+	fcp_cmnd->fc_pri_ta = FCP_PTA_SIMPLE;
+
+	/* 16 bytes: CDB information */
+	memcpy(fcp_cmnd->fc_cdb, sc_cmd->cmnd, sc_cmd->cmd_len);
+
+	/* 4 bytes: FCP data length */
+	fcp_cmnd->fc_dl = htonl(io_req->data_xfer_len);
+
+}
+
+static void  qedf_init_task(struct qedf_rport *fcport, struct fc_lport *lport,
+	struct qedf_ioreq *io_req, u32 *ptu_invalidate,
+	struct fcoe_task_context *task_ctx)
+{
+	enum fcoe_task_type task_type;
+	struct scsi_cmnd *sc_cmd = io_req->sc_cmd;
+	struct io_bdt *bd_tbl = io_req->bd_tbl;
+	union fcoe_data_desc_ctx *data_desc;
+	u32 *fcp_cmnd;
+	u32 tmp_fcp_cmnd[8];
+	int cnt, i;
+	int bd_count;
+	struct qedf_ctx *qedf = fcport->qedf;
+	uint16_t cq_idx = smp_processor_id() % qedf->num_queues;
+	u8 tmp_sgl_mode = 0;
+	u8 mst_sgl_mode = 0;
+
+	memset(task_ctx, 0, sizeof(struct fcoe_task_context));
+	io_req->task = task_ctx;
+
+	if (sc_cmd->sc_data_direction == DMA_TO_DEVICE)
+		task_type = FCOE_TASK_TYPE_WRITE_INITIATOR;
+	else
+		task_type = FCOE_TASK_TYPE_READ_INITIATOR;
+
+	/* Y Storm context */
+	task_ctx->ystorm_st_context.expect_first_xfer = 1;
+	task_ctx->ystorm_st_context.data_2_trns_rem = io_req->data_xfer_len;
+	/* Check if this is required */
+	task_ctx->ystorm_st_context.ox_id = io_req->xid;
+	task_ctx->ystorm_st_context.task_rety_identifier =
+	    io_req->task_retry_identifier;
+
+	/* T Storm ag context */
+	SET_FIELD(task_ctx->tstorm_ag_context.flags0,
+	    TSTORM_FCOE_TASK_AG_CTX_CONNECTION_TYPE, PROTOCOLID_FCOE);
+	task_ctx->tstorm_ag_context.icid = (u16)fcport->fw_cid;
+
+	/* T Storm st context */
+	SET_FIELD(task_ctx->tstorm_st_context.read_write.flags,
+	    FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_EXP_FIRST_FRAME,
+	    1);
+	task_ctx->tstorm_st_context.read_write.rx_id = 0xffff;
+
+	task_ctx->tstorm_st_context.read_only.dev_type =
+	    FCOE_TASK_DEV_TYPE_DISK;
+	task_ctx->tstorm_st_context.read_only.conf_supported = 0;
+	task_ctx->tstorm_st_context.read_only.cid = fcport->fw_cid;
+
+	/* Completion queue for response. */
+	task_ctx->tstorm_st_context.read_only.glbl_q_num = cq_idx;
+	task_ctx->tstorm_st_context.read_only.fcp_cmd_trns_size =
+	    io_req->data_xfer_len;
+	task_ctx->tstorm_st_context.read_write.e_d_tov_exp_timeout_val =
+	    lport->e_d_tov;
+
+	task_ctx->ustorm_ag_context.global_cq_num = cq_idx;
+	io_req->fp_idx = cq_idx;
+
+	bd_count = bd_tbl->bd_valid;
+	if (task_type == FCOE_TASK_TYPE_WRITE_INITIATOR) {
+		/* Setup WRITE task */
+		struct fcoe_sge *fcoe_bd_tbl = bd_tbl->bd_tbl;
+
+		task_ctx->ystorm_st_context.task_type =
+		    FCOE_TASK_TYPE_WRITE_INITIATOR;
+		data_desc = &task_ctx->ystorm_st_context.data_desc;
+
+		if (io_req->use_slowpath) {
+			SET_FIELD(task_ctx->ystorm_st_context.sgl_mode,
+			    YSTORM_FCOE_TASK_ST_CTX_TX_SGL_MODE,
+			    FCOE_SLOW_SGL);
+			data_desc->slow.base_sgl_addr.lo =
+			    U64_LO(bd_tbl->bd_tbl_dma);
+			data_desc->slow.base_sgl_addr.hi =
+			    U64_HI(bd_tbl->bd_tbl_dma);
+			data_desc->slow.remainder_num_sges = bd_count;
+			data_desc->slow.curr_sge_off = 0;
+			data_desc->slow.curr_sgl_index = 0;
+			qedf->slow_sge_ios++;
+			io_req->sge_type = QEDF_IOREQ_SLOW_SGE;
+		} else {
+			SET_FIELD(task_ctx->ystorm_st_context.sgl_mode,
+			    YSTORM_FCOE_TASK_ST_CTX_TX_SGL_MODE,
+			    (bd_count <= 4) ? (enum fcoe_sgl_mode)bd_count :
+			    FCOE_MUL_FAST_SGES);
+
+			if (bd_count == 1) {
+				data_desc->single_sge.sge_addr.lo =
+				    fcoe_bd_tbl->sge_addr.lo;
+				data_desc->single_sge.sge_addr.hi =
+				    fcoe_bd_tbl->sge_addr.hi;
+				data_desc->single_sge.size =
+				    fcoe_bd_tbl->size;
+				data_desc->single_sge.is_valid_sge = 0;
+				qedf->single_sge_ios++;
+				io_req->sge_type = QEDF_IOREQ_SINGLE_SGE;
+			} else {
+				data_desc->fast.sgl_start_addr.lo =
+				    U64_LO(bd_tbl->bd_tbl_dma);
+				data_desc->fast.sgl_start_addr.hi =
+				    U64_HI(bd_tbl->bd_tbl_dma);
+				data_desc->fast.sgl_byte_offset =
+				    data_desc->fast.sgl_start_addr.lo &
+				    (QEDF_PAGE_SIZE - 1);
+				if (data_desc->fast.sgl_byte_offset > 0)
+					QEDF_ERR(&(qedf->dbg_ctx),
+					    "byte_offset=%u for xid=0x%x.\n",
+					    io_req->xid,
+					    data_desc->fast.sgl_byte_offset);
+				data_desc->fast.task_reuse_cnt =
+				    io_req->reuse_count;
+				io_req->reuse_count++;
+				if (io_req->reuse_count == QEDF_MAX_REUSE) {
+					*ptu_invalidate = 1;
+					io_req->reuse_count = 0;
+				}
+				qedf->fast_sge_ios++;
+				io_req->sge_type = QEDF_IOREQ_FAST_SGE;
+			}
+		}
+
+		/* T Storm context */
+		task_ctx->tstorm_st_context.read_only.task_type =
+		    FCOE_TASK_TYPE_WRITE_INITIATOR;
+
+		/* M Storm context */
+		tmp_sgl_mode = GET_FIELD(task_ctx->ystorm_st_context.sgl_mode,
+		    YSTORM_FCOE_TASK_ST_CTX_TX_SGL_MODE);
+		SET_FIELD(task_ctx->mstorm_st_context.non_fp.tx_rx_sgl_mode,
+		    FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_TX_SGL_MODE,
+		    tmp_sgl_mode);
+
+	} else {
+		/* Setup READ task */
+
+		/* M Storm context */
+		struct fcoe_sge *fcoe_bd_tbl = bd_tbl->bd_tbl;
+
+		data_desc = &task_ctx->mstorm_st_context.fp.data_desc;
+		task_ctx->mstorm_st_context.fp.data_2_trns_rem =
+		    io_req->data_xfer_len;
+
+		if (io_req->use_slowpath) {
+			SET_FIELD(
+			    task_ctx->mstorm_st_context.non_fp.tx_rx_sgl_mode,
+			    FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_RX_SGL_MODE,
+			    FCOE_SLOW_SGL);
+			data_desc->slow.base_sgl_addr.lo =
+			    U64_LO(bd_tbl->bd_tbl_dma);
+			data_desc->slow.base_sgl_addr.hi =
+			    U64_HI(bd_tbl->bd_tbl_dma);
+			data_desc->slow.remainder_num_sges =
+			    bd_count;
+			data_desc->slow.curr_sge_off = 0;
+			data_desc->slow.curr_sgl_index = 0;
+			qedf->slow_sge_ios++;
+			io_req->sge_type = QEDF_IOREQ_SLOW_SGE;
+		} else {
+			SET_FIELD(
+			    task_ctx->mstorm_st_context.non_fp.tx_rx_sgl_mode,
+			    FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_RX_SGL_MODE,
+			    (bd_count <= 4) ? (enum fcoe_sgl_mode)bd_count :
+			    FCOE_MUL_FAST_SGES);
+
+			if (bd_count == 1) {
+				data_desc->single_sge.sge_addr.lo =
+				    fcoe_bd_tbl->sge_addr.lo;
+				data_desc->single_sge.sge_addr.hi =
+				    fcoe_bd_tbl->sge_addr.hi;
+				data_desc->single_sge.size =
+				    fcoe_bd_tbl->size;
+				data_desc->single_sge.is_valid_sge = 0;
+				qedf->single_sge_ios++;
+				io_req->sge_type = QEDF_IOREQ_SINGLE_SGE;
+			} else {
+				data_desc->fast.sgl_start_addr.lo =
+				    U64_LO(bd_tbl->bd_tbl_dma);
+				data_desc->fast.sgl_start_addr.hi =
+				    U64_HI(bd_tbl->bd_tbl_dma);
+				data_desc->fast.sgl_byte_offset = 0;
+				data_desc->fast.task_reuse_cnt =
+				    io_req->reuse_count;
+				io_req->reuse_count++;
+				if (io_req->reuse_count == QEDF_MAX_REUSE) {
+					*ptu_invalidate = 1;
+					io_req->reuse_count = 0;
+				}
+				qedf->fast_sge_ios++;
+				io_req->sge_type = QEDF_IOREQ_FAST_SGE;
+			}
+		}
+
+		/* Y Storm context */
+		task_ctx->ystorm_st_context.expect_first_xfer = 0;
+		task_ctx->ystorm_st_context.task_type =
+		    FCOE_TASK_TYPE_READ_INITIATOR;
+
+		/* T Storm context */
+		task_ctx->tstorm_st_context.read_only.task_type =
+		    FCOE_TASK_TYPE_READ_INITIATOR;
+		mst_sgl_mode = GET_FIELD(
+		    task_ctx->mstorm_st_context.non_fp.tx_rx_sgl_mode,
+		    FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_RX_SGL_MODE);
+		SET_FIELD(task_ctx->tstorm_st_context.read_write.flags,
+		    FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_RX_SGL_MODE,
+		    mst_sgl_mode);
+	}
+
+	/* fill FCP_CMND IU */
+	fcp_cmnd = (u32 *)task_ctx->ystorm_st_context.tx_info_union.fcp_cmd_payload.opaque;
+	qedf_build_fcp_cmnd(io_req, (struct fcp_cmnd *)&tmp_fcp_cmnd);
+
+	/* Swap fcp_cmnd since FC is big endian */
+	cnt = sizeof(struct fcp_cmnd) / sizeof(u32);
+
+	for (i = 0; i < cnt; i++) {
+		*fcp_cmnd = cpu_to_be32(tmp_fcp_cmnd[i]);
+		fcp_cmnd++;
+	}
+
+	/* M Storm context - Sense buffer */
+	task_ctx->mstorm_st_context.non_fp.rsp_buf_addr.lo =
+		U64_LO(io_req->sense_buffer_dma);
+	task_ctx->mstorm_st_context.non_fp.rsp_buf_addr.hi =
+		U64_HI(io_req->sense_buffer_dma);
+}
+
+void qedf_init_mp_task(struct qedf_ioreq *io_req,
+	struct fcoe_task_context *task_ctx)
+{
+	struct qedf_mp_req *mp_req = &(io_req->mp_req);
+	struct qedf_rport *fcport = io_req->fcport;
+	struct qedf_ctx *qedf = io_req->fcport->qedf;
+	struct fc_frame_header *fc_hdr;
+	enum fcoe_task_type task_type = 0;
+	union fcoe_data_desc_ctx *data_desc;
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC, "Initializing MP task "
+		   "for cmd_type = %d\n", io_req->cmd_type);
+
+	qedf->control_requests++;
+
+	/* Obtain task_type */
+	if ((io_req->cmd_type == QEDF_TASK_MGMT_CMD) ||
+	    (io_req->cmd_type == QEDF_ELS)) {
+		task_type = FCOE_TASK_TYPE_MIDPATH;
+	} else if (io_req->cmd_type == QEDF_ABTS) {
+		task_type = FCOE_TASK_TYPE_ABTS;
+	}
+
+	memset(task_ctx, 0, sizeof(struct fcoe_task_context));
+
+	/* Setup the task from io_req for easy reference */
+	io_req->task = task_ctx;
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC, "task type = %d\n",
+		   task_type);
+
+	/* YSTORM only */
+	{
+		/* Initialize YSTORM task context */
+		struct fcoe_tx_mid_path_params *task_fc_hdr =
+		    &task_ctx->ystorm_st_context.tx_info_union.tx_params.mid_path;
+		memset(task_fc_hdr, 0, sizeof(struct fcoe_tx_mid_path_params));
+		task_ctx->ystorm_st_context.task_rety_identifier =
+		    io_req->task_retry_identifier;
+
+		/* Init SGL parameters */
+		if ((task_type == FCOE_TASK_TYPE_MIDPATH) ||
+		    (task_type == FCOE_TASK_TYPE_UNSOLICITED)) {
+			data_desc = &task_ctx->ystorm_st_context.data_desc;
+			data_desc->slow.base_sgl_addr.lo =
+			    U64_LO(mp_req->mp_req_bd_dma);
+			data_desc->slow.base_sgl_addr.hi =
+			    U64_HI(mp_req->mp_req_bd_dma);
+			data_desc->slow.remainder_num_sges = 1;
+			data_desc->slow.curr_sge_off = 0;
+			data_desc->slow.curr_sgl_index = 0;
+		}
+
+		fc_hdr = &(mp_req->req_fc_hdr);
+		if (task_type == FCOE_TASK_TYPE_MIDPATH) {
+			fc_hdr->fh_ox_id = io_req->xid;
+			fc_hdr->fh_rx_id = htons(0xffff);
+		} else if (task_type == FCOE_TASK_TYPE_UNSOLICITED) {
+			fc_hdr->fh_rx_id = io_req->xid;
+		}
+
+		/* Fill FC Header into middle path buffer */
+		task_fc_hdr->parameter = fc_hdr->fh_parm_offset;
+		task_fc_hdr->r_ctl = fc_hdr->fh_r_ctl;
+		task_fc_hdr->type = fc_hdr->fh_type;
+		task_fc_hdr->cs_ctl = fc_hdr->fh_cs_ctl;
+		task_fc_hdr->df_ctl = fc_hdr->fh_df_ctl;
+		task_fc_hdr->rx_id = fc_hdr->fh_rx_id;
+		task_fc_hdr->ox_id = fc_hdr->fh_ox_id;
+
+		task_ctx->ystorm_st_context.data_2_trns_rem =
+		    io_req->data_xfer_len;
+		task_ctx->ystorm_st_context.task_type = task_type;
+	}
+
+	/* TSTORM ONLY */
+	{
+		task_ctx->tstorm_ag_context.icid = (u16)fcport->fw_cid;
+		task_ctx->tstorm_st_context.read_only.cid = fcport->fw_cid;
+		/* Always send middle-path repsonses on CQ #0 */
+		task_ctx->tstorm_st_context.read_only.glbl_q_num = 0;
+		io_req->fp_idx = 0;
+		SET_FIELD(task_ctx->tstorm_ag_context.flags0,
+		    TSTORM_FCOE_TASK_AG_CTX_CONNECTION_TYPE,
+		    PROTOCOLID_FCOE);
+		task_ctx->tstorm_st_context.read_only.task_type = task_type;
+		SET_FIELD(task_ctx->tstorm_st_context.read_write.flags,
+		    FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_EXP_FIRST_FRAME,
+		    1);
+		task_ctx->tstorm_st_context.read_write.rx_id = 0xffff;
+	}
+
+	/* MSTORM only */
+	{
+		if (task_type == FCOE_TASK_TYPE_MIDPATH) {
+			/* Initialize task context */
+			data_desc = &task_ctx->mstorm_st_context.fp.data_desc;
+
+			/* Set cache sges address and length */
+			data_desc->slow.base_sgl_addr.lo =
+			    U64_LO(mp_req->mp_resp_bd_dma);
+			data_desc->slow.base_sgl_addr.hi =
+			    U64_HI(mp_req->mp_resp_bd_dma);
+			data_desc->slow.remainder_num_sges = 1;
+			data_desc->slow.curr_sge_off = 0;
+			data_desc->slow.curr_sgl_index = 0;
+
+			/*
+			 * Also need to fil in non-fastpath response address
+			 * for middle path commands.
+			 */
+			task_ctx->mstorm_st_context.non_fp.rsp_buf_addr.lo =
+			    U64_LO(mp_req->mp_resp_bd_dma);
+			task_ctx->mstorm_st_context.non_fp.rsp_buf_addr.hi =
+			    U64_HI(mp_req->mp_resp_bd_dma);
+		}
+	}
+
+	/* USTORM ONLY */
+	{
+		task_ctx->ustorm_ag_context.global_cq_num = 0;
+	}
+
+	/* I/O stats. Middle path commands always use slow SGEs */
+	qedf->slow_sge_ios++;
+	io_req->sge_type = QEDF_IOREQ_SLOW_SGE;
+}
+
+void qedf_add_to_sq(struct qedf_rport *fcport, u16 xid, u32 ptu_invalidate,
+	enum fcoe_task_type req_type, u32 offset)
+{
+	struct fcoe_wqe *sqe;
+	uint16_t total_sqe = (fcport->sq_mem_size)/(sizeof(struct fcoe_wqe));
+
+	sqe = &fcport->sq[fcport->sq_prod_idx];
+
+	fcport->sq_prod_idx++;
+	fcport->fw_sq_prod_idx++;
+	if (fcport->sq_prod_idx == total_sqe)
+		fcport->sq_prod_idx = 0;
+
+	switch (req_type) {
+	case FCOE_TASK_TYPE_WRITE_INITIATOR:
+	case FCOE_TASK_TYPE_READ_INITIATOR:
+		SET_FIELD(sqe->flags, FCOE_WQE_REQ_TYPE, SEND_FCOE_CMD);
+		if (ptu_invalidate)
+			SET_FIELD(sqe->flags, FCOE_WQE_INVALIDATE_PTU, 1);
+		break;
+	case FCOE_TASK_TYPE_MIDPATH:
+		SET_FIELD(sqe->flags, FCOE_WQE_REQ_TYPE, SEND_FCOE_MIDPATH);
+		break;
+	case FCOE_TASK_TYPE_ABTS:
+		SET_FIELD(sqe->flags, FCOE_WQE_REQ_TYPE,
+		    SEND_FCOE_ABTS_REQUEST);
+		break;
+	case FCOE_TASK_TYPE_EXCHANGE_CLEANUP:
+		SET_FIELD(sqe->flags, FCOE_WQE_REQ_TYPE,
+		     FCOE_EXCHANGE_CLEANUP);
+		break;
+	case FCOE_TASK_TYPE_SEQUENCE_CLEANUP:
+		SET_FIELD(sqe->flags, FCOE_WQE_REQ_TYPE,
+		    FCOE_SEQUENCE_RECOVERY);
+		/* NOTE: offset param only used for sequence recovery */
+		sqe->additional_info_union.seq_rec_updated_offset = offset;
+		break;
+	case FCOE_TASK_TYPE_UNSOLICITED:
+		break;
+	default:
+		break;
+	}
+
+	sqe->task_id = xid;
+
+	/* Make sure SQ data is coherent */
+	wmb();
+
+}
+
+void qedf_ring_doorbell(struct qedf_rport *fcport)
+{
+	struct fcoe_db_data dbell = { 0 };
+
+	dbell.agg_flags = 0;
+
+	dbell.params |= DB_DEST_XCM << FCOE_DB_DATA_DEST_SHIFT;
+	dbell.params |= DB_AGG_CMD_SET << FCOE_DB_DATA_AGG_CMD_SHIFT;
+	dbell.params |= DQ_XCM_FCOE_SQ_PROD_CMD <<
+	    FCOE_DB_DATA_AGG_VAL_SEL_SHIFT;
+
+	dbell.sq_prod = fcport->fw_sq_prod_idx;
+	writel(*(u32 *)&dbell, fcport->p_doorbell);
+	/* Make sure SQ index is updated so f/w prcesses requests in order */
+	wmb();
+	mmiowb();
+}
+
+static void qedf_trace_io(struct qedf_rport *fcport, struct qedf_ioreq *io_req,
+			  int8_t direction)
+{
+	struct qedf_ctx *qedf = fcport->qedf;
+	struct qedf_io_log *io_log;
+	struct scsi_cmnd *sc_cmd = io_req->sc_cmd;
+	unsigned long flags;
+	uint8_t op;
+
+	spin_lock_irqsave(&qedf->io_trace_lock, flags);
+
+	io_log = &qedf->io_trace_buf[qedf->io_trace_idx];
+	io_log->direction = direction;
+	io_log->task_id = io_req->xid;
+	io_log->port_id = fcport->rdata->ids.port_id;
+	io_log->lun = sc_cmd->device->lun;
+	io_log->op = op = sc_cmd->cmnd[0];
+	io_log->lba[0] = sc_cmd->cmnd[2];
+	io_log->lba[1] = sc_cmd->cmnd[3];
+	io_log->lba[2] = sc_cmd->cmnd[4];
+	io_log->lba[3] = sc_cmd->cmnd[5];
+	io_log->bufflen = scsi_bufflen(sc_cmd);
+	io_log->sg_count = scsi_sg_count(sc_cmd);
+	io_log->result = sc_cmd->result;
+	io_log->jiffies = jiffies;
+	io_log->refcount = atomic_read(&io_req->refcount.refcount);
+
+	if (direction == QEDF_IO_TRACE_REQ) {
+		/* For requests we only care abot the submission CPU */
+		io_log->req_cpu = io_req->cpu;
+		io_log->int_cpu = 0;
+		io_log->rsp_cpu = 0;
+	} else if (direction == QEDF_IO_TRACE_RSP) {
+		io_log->req_cpu = io_req->cpu;
+		io_log->int_cpu = io_req->int_cpu;
+		io_log->rsp_cpu = smp_processor_id();
+	}
+
+	io_log->sge_type = io_req->sge_type;
+
+	qedf->io_trace_idx++;
+	if (qedf->io_trace_idx == QEDF_IO_TRACE_SIZE)
+		qedf->io_trace_idx = 0;
+
+	spin_unlock_irqrestore(&qedf->io_trace_lock, flags);
+}
+
+int qedf_post_io_req(struct qedf_rport *fcport, struct qedf_ioreq *io_req)
+{
+	struct scsi_cmnd *sc_cmd = io_req->sc_cmd;
+	struct Scsi_Host *host = sc_cmd->device->host;
+	struct fc_lport *lport = shost_priv(host);
+	struct qedf_ctx *qedf = lport_priv(lport);
+	struct fcoe_task_context *task_ctx;
+	u16 xid;
+	enum fcoe_task_type req_type = 0;
+	u32 ptu_invalidate = 0;
+
+	/* Initialize rest of io_req fileds */
+	io_req->data_xfer_len = scsi_bufflen(sc_cmd);
+	sc_cmd->SCp.ptr = (char *)io_req;
+	io_req->use_slowpath = false; /* Assume fast SGL by default */
+
+	/* Record which cpu this request is associated with */
+	io_req->cpu = smp_processor_id();
+
+	if (sc_cmd->sc_data_direction == DMA_FROM_DEVICE) {
+		req_type = FCOE_TASK_TYPE_READ_INITIATOR;
+		io_req->io_req_flags = QEDF_READ;
+		qedf->input_requests++;
+	} else if (sc_cmd->sc_data_direction == DMA_TO_DEVICE) {
+		req_type = FCOE_TASK_TYPE_WRITE_INITIATOR;
+		io_req->io_req_flags = QEDF_WRITE;
+		qedf->output_requests++;
+	} else {
+		io_req->io_req_flags = 0;
+		qedf->control_requests++;
+	}
+
+	xid = io_req->xid;
+
+	/* Build buffer descriptor list for firmware from sg list */
+	if (qedf_build_bd_list_from_sg(io_req)) {
+		QEDF_ERR(&(qedf->dbg_ctx), "BD list creation failed.\n");
+		kref_put(&io_req->refcount, qedf_release_cmd);
+		return -EAGAIN;
+	}
+
+	/* Get the task context */
+	task_ctx = qedf_get_task_mem(&qedf->tasks, xid);
+	if (!task_ctx) {
+		QEDF_WARN(&(qedf->dbg_ctx), "task_ctx is NULL, xid=%d.\n",
+			   xid);
+		kref_put(&io_req->refcount, qedf_release_cmd);
+		return -EINVAL;
+	}
+
+	qedf_init_task(fcport, lport, io_req, &ptu_invalidate, task_ctx);
+
+	if (!test_bit(QEDF_RPORT_SESSION_READY, &fcport->flags)) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Session not offloaded yet.\n");
+		kref_put(&io_req->refcount, qedf_release_cmd);
+	}
+
+	/* Obtain free SQ entry */
+	qedf_add_to_sq(fcport, xid, ptu_invalidate, req_type, 0);
+
+	/* Ring doorbell */
+	qedf_ring_doorbell(fcport);
+
+	if (qedf_io_tracing && io_req->sc_cmd)
+		qedf_trace_io(fcport, io_req, QEDF_IO_TRACE_REQ);
+
+	return false;
+}
+
+int
+qedf_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *sc_cmd)
+{
+	struct fc_lport *lport = shost_priv(host);
+	struct qedf_ctx *qedf = lport_priv(lport);
+	struct fc_rport *rport = starget_to_rport(scsi_target(sc_cmd->device));
+	struct fc_rport_libfc_priv *rp = rport->dd_data;
+	struct qedf_rport *fcport = rport->dd_data;
+	struct qedf_ioreq *io_req;
+	int rc = 0;
+	int rval;
+	unsigned long flags = 0;
+
+
+	if (test_bit(QEDF_UNLOADING, &qedf->flags) ||
+	    test_bit(QEDF_DBG_STOP_IO, &qedf->flags)) {
+		sc_cmd->result = DID_NO_CONNECT << 16;
+		sc_cmd->scsi_done(sc_cmd);
+		return 0;
+	}
+
+	rval = fc_remote_port_chkready(rport);
+	if (rval) {
+		sc_cmd->result = rval;
+		sc_cmd->scsi_done(sc_cmd);
+		return 0;
+	}
+
+	/* Retry command if we are doing a qed drain operation */
+	if (test_bit(QEDF_DRAIN_ACTIVE, &qedf->flags)) {
+		rc = SCSI_MLQUEUE_HOST_BUSY;
+		goto exit_qcmd;
+	}
+
+	if (lport->state != LPORT_ST_READY ||
+	    atomic_read(&qedf->link_state) != QEDF_LINK_UP) {
+		rc = SCSI_MLQUEUE_HOST_BUSY;
+		goto exit_qcmd;
+	}
+
+	/* rport and tgt are allocated together, so tgt should be non-NULL */
+	fcport = (struct qedf_rport *)&rp[1];
+
+	if (!test_bit(QEDF_RPORT_SESSION_READY, &fcport->flags)) {
+		/*
+		 * Session is not offloaded yet. Let SCSI-ml retry
+		 * the command.
+		 */
+		rc = SCSI_MLQUEUE_TARGET_BUSY;
+		goto exit_qcmd;
+	}
+	if (fcport->retry_delay_timestamp) {
+		if (time_after(jiffies, fcport->retry_delay_timestamp)) {
+			fcport->retry_delay_timestamp = 0;
+		} else {
+			/* If retry_delay timer is active, flow off the ML */
+			rc = SCSI_MLQUEUE_TARGET_BUSY;
+			goto exit_qcmd;
+		}
+	}
+
+	io_req = qedf_alloc_cmd(fcport, QEDF_SCSI_CMD);
+	if (!io_req) {
+		rc = SCSI_MLQUEUE_HOST_BUSY;
+		goto exit_qcmd;
+	}
+
+	io_req->sc_cmd = sc_cmd;
+
+	/* Take fcport->rport_lock for posting to fcport send queue */
+	spin_lock_irqsave(&fcport->rport_lock, flags);
+	if (qedf_post_io_req(fcport, io_req)) {
+		QEDF_WARN(&(qedf->dbg_ctx), "Unable to post io_req\n");
+		/* Return SQE to pool */
+		atomic_inc(&fcport->free_sqes);
+		rc = SCSI_MLQUEUE_HOST_BUSY;
+	}
+	spin_unlock_irqrestore(&fcport->rport_lock, flags);
+
+exit_qcmd:
+	return rc;
+}
+
+static void qedf_parse_fcp_rsp(struct qedf_ioreq *io_req,
+				 struct fcoe_cqe_rsp_info *fcp_rsp)
+{
+	struct scsi_cmnd *sc_cmd = io_req->sc_cmd;
+	struct qedf_ctx *qedf = io_req->fcport->qedf;
+	u8 rsp_flags = fcp_rsp->rsp_flags.flags;
+	int fcp_sns_len = 0;
+	int fcp_rsp_len = 0;
+	uint8_t *rsp_info, *sense_data;
+
+	io_req->fcp_status = FC_GOOD;
+	io_req->fcp_resid = 0;
+	if (rsp_flags & (FCOE_FCP_RSP_FLAGS_FCP_RESID_OVER |
+	    FCOE_FCP_RSP_FLAGS_FCP_RESID_UNDER))
+		io_req->fcp_resid = fcp_rsp->fcp_resid;
+
+	io_req->scsi_comp_flags = rsp_flags;
+	CMD_SCSI_STATUS(sc_cmd) = io_req->cdb_status =
+	    fcp_rsp->scsi_status_code;
+
+	if (rsp_flags &
+	    FCOE_FCP_RSP_FLAGS_FCP_RSP_LEN_VALID)
+		fcp_rsp_len = fcp_rsp->fcp_rsp_len;
+
+	if (rsp_flags &
+	    FCOE_FCP_RSP_FLAGS_FCP_SNS_LEN_VALID)
+		fcp_sns_len = fcp_rsp->fcp_sns_len;
+
+	io_req->fcp_rsp_len = fcp_rsp_len;
+	io_req->fcp_sns_len = fcp_sns_len;
+	rsp_info = sense_data = io_req->sense_buffer;
+
+	/* fetch fcp_rsp_code */
+	if ((fcp_rsp_len == 4) || (fcp_rsp_len == 8)) {
+		/* Only for task management function */
+		io_req->fcp_rsp_code = rsp_info[3];
+		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO,
+		    "fcp_rsp_code = %d\n", io_req->fcp_rsp_code);
+		/* Adjust sense-data location. */
+		sense_data += fcp_rsp_len;
+	}
+
+	if (fcp_sns_len > SCSI_SENSE_BUFFERSIZE) {
+		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO,
+		    "Truncating sense buffer\n");
+		fcp_sns_len = SCSI_SENSE_BUFFERSIZE;
+	}
+
+	memset(sc_cmd->sense_buffer, 0, SCSI_SENSE_BUFFERSIZE);
+	if (fcp_sns_len)
+		memcpy(sc_cmd->sense_buffer, sense_data,
+		    fcp_sns_len);
+}
+
+static void qedf_unmap_sg_list(struct qedf_ctx *qedf, struct qedf_ioreq *io_req)
+{
+	struct scsi_cmnd *sc = io_req->sc_cmd;
+
+	if (io_req->bd_tbl->bd_valid && sc && scsi_sg_count(sc)) {
+		dma_unmap_sg(&qedf->pdev->dev, scsi_sglist(sc),
+		    scsi_sg_count(sc), sc->sc_data_direction);
+		io_req->bd_tbl->bd_valid = 0;
+	}
+}
+
+void qedf_scsi_completion(struct qedf_ctx *qedf, struct fcoe_cqe *cqe,
+	struct qedf_ioreq *io_req)
+{
+	u16 xid, rval;
+	struct fcoe_task_context *task_ctx;
+	struct scsi_cmnd *sc_cmd;
+	struct fcoe_cqe_rsp_info *fcp_rsp;
+	struct qedf_rport *fcport;
+	int refcount;
+	u16 scope, qualifier = 0;
+	u8 fw_residual_flag = 0;
+
+	if (!io_req)
+		return;
+	if (!cqe)
+		return;
+
+	xid = io_req->xid;
+	task_ctx = qedf_get_task_mem(&qedf->tasks, xid);
+	sc_cmd = io_req->sc_cmd;
+	fcp_rsp = &cqe->cqe_info.rsp_info;
+
+	if (!sc_cmd) {
+		QEDF_WARN(&(qedf->dbg_ctx), "sc_cmd is NULL!\n");
+		return;
+	}
+
+	if (!sc_cmd->SCp.ptr) {
+		QEDF_WARN(&(qedf->dbg_ctx), "SCp.ptr is NULL, returned in "
+		    "another context.\n");
+		return;
+	}
+
+	if (!sc_cmd->request) {
+		QEDF_WARN(&(qedf->dbg_ctx), "sc_cmd->request is NULL, "
+		    "sc_cmd=%p.\n", sc_cmd);
+		return;
+	}
+
+	if (!sc_cmd->request->special) {
+		QEDF_WARN(&(qedf->dbg_ctx), "request->special is NULL so "
+		    "request not valid, sc_cmd=%p.\n", sc_cmd);
+		return;
+	}
+
+	if (!sc_cmd->request->q) {
+		QEDF_WARN(&(qedf->dbg_ctx), "request->q is NULL so request "
+		   "is not valid, sc_cmd=%p.\n", sc_cmd);
+		return;
+	}
+
+	fcport = io_req->fcport;
+
+	qedf_parse_fcp_rsp(io_req, fcp_rsp);
+
+	qedf_unmap_sg_list(qedf, io_req);
+
+	/* Check for FCP transport error */
+	if (io_req->fcp_rsp_len > 3 && io_req->fcp_rsp_code) {
+		QEDF_ERR(&(qedf->dbg_ctx),
+		    "FCP I/O protocol failure xid=0x%x fcp_rsp_len=%d "
+		    "fcp_rsp_code=%d.\n", io_req->xid, io_req->fcp_rsp_len,
+		    io_req->fcp_rsp_code);
+		sc_cmd->result = DID_BUS_BUSY << 16;
+		goto out;
+	}
+
+	fw_residual_flag = GET_FIELD(cqe->cqe_info.rsp_info.fw_error_flags,
+	    FCOE_CQE_RSP_INFO_FW_UNDERRUN);
+	if (fw_residual_flag) {
+		QEDF_ERR(&(qedf->dbg_ctx),
+		    "Firmware detected underrun: xid=0x%x fcp_rsp.flags=0x%02x "
+		    "fcp_resid=%d fw_residual=0x%x.\n", io_req->xid,
+		    fcp_rsp->rsp_flags.flags, io_req->fcp_resid,
+		    cqe->cqe_info.rsp_info.fw_residual);
+
+		if (io_req->cdb_status == 0)
+			sc_cmd->result = (DID_ERROR << 16) | io_req->cdb_status;
+		else
+			sc_cmd->result = (DID_OK << 16) | io_req->cdb_status;
+
+		/* Abort the command since we did not get all the data */
+		init_completion(&io_req->abts_done);
+		rval = qedf_initiate_abts(io_req, true);
+		if (rval) {
+			QEDF_ERR(&(qedf->dbg_ctx), "Failed to queue ABTS.\n");
+			sc_cmd->result = (DID_ERROR << 16) | io_req->cdb_status;
+		}
+
+		/*
+		 * Set resid to the whole buffer length so we won't try to resue
+		 * any previously data.
+		 */
+		scsi_set_resid(sc_cmd, scsi_bufflen(sc_cmd));
+		goto out;
+	}
+
+	switch (io_req->fcp_status) {
+	case FC_GOOD:
+		if (io_req->cdb_status == 0) {
+			/* Good I/O completion */
+			sc_cmd->result = DID_OK << 16;
+		} else {
+			refcount = atomic_read(&io_req->refcount.refcount);
+			QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO,
+			    "%d:0:%d:%d xid=0x%0x op=0x%02x "
+			    "lba=%02x%02x%02x%02x cdb_status=%d "
+			    "fcp_resid=0x%x refcount=%d.\n",
+			    qedf->lport->host->host_no, sc_cmd->device->id,
+			    sc_cmd->device->lun, io_req->xid,
+			    sc_cmd->cmnd[0], sc_cmd->cmnd[2], sc_cmd->cmnd[3],
+			    sc_cmd->cmnd[4], sc_cmd->cmnd[5],
+			    io_req->cdb_status, io_req->fcp_resid,
+			    refcount);
+			sc_cmd->result = (DID_OK << 16) | io_req->cdb_status;
+
+			if (io_req->cdb_status == SAM_STAT_TASK_SET_FULL ||
+			    io_req->cdb_status == SAM_STAT_BUSY) {
+				/*
+				 * Check whether we need to set retry_delay at
+				 * all based on retry_delay module parameter
+				 * and the status qualifier.
+				 */
+
+				/* Upper 2 bits */
+				scope = fcp_rsp->retry_delay_timer & 0xC000;
+				/* Lower 14 bits */
+				qualifier = fcp_rsp->retry_delay_timer & 0x3FFF;
+
+				if (qedf_retry_delay &&
+				    scope > 0 && qualifier > 0 &&
+				    qualifier <= 0x3FEF) {
+					/* Check we don't go over the max */
+					if (qualifier > QEDF_RETRY_DELAY_MAX)
+						qualifier =
+						    QEDF_RETRY_DELAY_MAX;
+					fcport->retry_delay_timestamp =
+					    jiffies + (qualifier * HZ / 10);
+				}
+			}
+		}
+		if (io_req->fcp_resid)
+			scsi_set_resid(sc_cmd, io_req->fcp_resid);
+		break;
+	default:
+		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO, "fcp_status=%d.\n",
+			   io_req->fcp_status);
+		break;
+	}
+
+out:
+	if (qedf_io_tracing)
+		qedf_trace_io(fcport, io_req, QEDF_IO_TRACE_RSP);
+
+	io_req->sc_cmd = NULL;
+	sc_cmd->SCp.ptr =  NULL;
+	sc_cmd->scsi_done(sc_cmd);
+	kref_put(&io_req->refcount, qedf_release_cmd);
+}
+
+/* Return a SCSI command in some other context besides a normal completion */
+void qedf_scsi_done(struct qedf_ctx *qedf, struct qedf_ioreq *io_req,
+	int result)
+{
+	u16 xid;
+	struct scsi_cmnd *sc_cmd;
+	int refcount;
+
+	if (!io_req)
+		return;
+
+	xid = io_req->xid;
+	sc_cmd = io_req->sc_cmd;
+
+	if (!sc_cmd) {
+		QEDF_WARN(&(qedf->dbg_ctx), "sc_cmd is NULL!\n");
+		return;
+	}
+
+	if (!sc_cmd->SCp.ptr) {
+		QEDF_WARN(&(qedf->dbg_ctx), "SCp.ptr is NULL, returned in "
+		    "another context.\n");
+		return;
+	}
+
+	qedf_unmap_sg_list(qedf, io_req);
+
+	sc_cmd->result = result << 16;
+	refcount = atomic_read(&io_req->refcount.refcount);
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO, "%d:0:%d:%d: Completing "
+	    "sc_cmd=%p result=0x%08x op=0x%02x lba=0x%02x%02x%02x%02x, "
+	    "allowed=%d retries=%d refcount=%d.\n",
+	    qedf->lport->host->host_no, sc_cmd->device->id,
+	    sc_cmd->device->lun, sc_cmd, sc_cmd->result, sc_cmd->cmnd[0],
+	    sc_cmd->cmnd[2], sc_cmd->cmnd[3], sc_cmd->cmnd[4],
+	    sc_cmd->cmnd[5], sc_cmd->allowed, sc_cmd->retries,
+	    refcount);
+
+	/*
+	 * Set resid to the whole buffer length so we won't try to resue any
+	 * previously read data
+	 */
+	scsi_set_resid(sc_cmd, scsi_bufflen(sc_cmd));
+
+	if (qedf_io_tracing)
+		qedf_trace_io(io_req->fcport, io_req, QEDF_IO_TRACE_RSP);
+
+	io_req->sc_cmd = NULL;
+	sc_cmd->SCp.ptr = NULL;
+	sc_cmd->scsi_done(sc_cmd);
+	kref_put(&io_req->refcount, qedf_release_cmd);
+}
+
+/*
+ * Handle warning type CQE completions. This is mainly used for REC timer
+ * popping.
+ */
+void qedf_process_warning_compl(struct qedf_ctx *qedf, struct fcoe_cqe *cqe,
+	struct qedf_ioreq *io_req)
+{
+	int rval, i;
+	struct qedf_rport *fcport = io_req->fcport;
+	u64 err_warn_bit_map;
+	u8 err_warn = 0xff;
+
+	if (!cqe)
+		return;
+
+	QEDF_ERR(&(io_req->fcport->qedf->dbg_ctx), "Warning CQE, "
+		  "xid=0x%x\n", io_req->xid);
+	QEDF_ERR(&(io_req->fcport->qedf->dbg_ctx),
+		  "err_warn_bitmap=%08x:%08x\n",
+		  le32_to_cpu(cqe->cqe_info.err_info.err_warn_bitmap_hi),
+		  le32_to_cpu(cqe->cqe_info.err_info.err_warn_bitmap_lo));
+	QEDF_ERR(&(io_req->fcport->qedf->dbg_ctx), "tx_buff_off=%08x, "
+		  "rx_buff_off=%08x, rx_id=%04x\n",
+		  le32_to_cpu(cqe->cqe_info.err_info.tx_buf_off),
+		  le32_to_cpu(cqe->cqe_info.err_info.rx_buf_off),
+		  le32_to_cpu(cqe->cqe_info.err_info.rx_id));
+
+	/* Normalize the error bitmap value to an just an unsigned int */
+	err_warn_bit_map = (u64)
+	    ((u64)cqe->cqe_info.err_info.err_warn_bitmap_hi << 32) |
+	    (u64)cqe->cqe_info.err_info.err_warn_bitmap_lo;
+	for (i = 0; i < 64; i++) {
+		if (err_warn_bit_map & (u64)((u64)1 << i)) {
+			err_warn = i;
+			break;
+		}
+	}
+
+	/* Check if REC TOV expired if this is a tape device */
+	if (fcport->dev_type == QEDF_RPORT_TYPE_TAPE) {
+		if (err_warn ==
+		    FCOE_WARNING_CODE_REC_TOV_TIMER_EXPIRATION) {
+			QEDF_ERR(&(qedf->dbg_ctx), "REC timer expired.\n");
+			if (!test_bit(QEDF_CMD_SRR_SENT, &io_req->flags)) {
+				io_req->rx_buf_off =
+				    cqe->cqe_info.err_info.rx_buf_off;
+				io_req->tx_buf_off =
+				    cqe->cqe_info.err_info.tx_buf_off;
+				io_req->rx_id = cqe->cqe_info.err_info.rx_id;
+				rval = qedf_send_rec(io_req);
+				/*
+				 * We only want to abort the io_req if we
+				 * can't queue the REC command as we want to
+				 * keep the exchange open for recovery.
+				 */
+				if (rval)
+					goto send_abort;
+			}
+			return;
+		}
+	}
+
+send_abort:
+	init_completion(&io_req->abts_done);
+	rval = qedf_initiate_abts(io_req, true);
+	if (rval)
+		QEDF_ERR(&(qedf->dbg_ctx), "Failed to queue ABTS.\n");
+}
+
+/* Cleanup a command when we receive an error detection completion */
+void qedf_process_error_detect(struct qedf_ctx *qedf, struct fcoe_cqe *cqe,
+	struct qedf_ioreq *io_req)
+{
+	int rval;
+
+	if (!cqe)
+		return;
+
+	QEDF_ERR(&(io_req->fcport->qedf->dbg_ctx), "Error detection CQE, "
+		  "xid=0x%x\n", io_req->xid);
+	QEDF_ERR(&(io_req->fcport->qedf->dbg_ctx),
+		  "err_warn_bitmap=%08x:%08x\n",
+		  le32_to_cpu(cqe->cqe_info.err_info.err_warn_bitmap_hi),
+		  le32_to_cpu(cqe->cqe_info.err_info.err_warn_bitmap_lo));
+	QEDF_ERR(&(io_req->fcport->qedf->dbg_ctx), "tx_buff_off=%08x, "
+		  "rx_buff_off=%08x, rx_id=%04x\n",
+		  le32_to_cpu(cqe->cqe_info.err_info.tx_buf_off),
+		  le32_to_cpu(cqe->cqe_info.err_info.rx_buf_off),
+		  le32_to_cpu(cqe->cqe_info.err_info.rx_id));
+
+	if (qedf->stop_io_on_error) {
+		qedf_stop_all_io(qedf);
+		return;
+	}
+
+	init_completion(&io_req->abts_done);
+	rval = qedf_initiate_abts(io_req, true);
+	if (rval)
+		QEDF_ERR(&(qedf->dbg_ctx), "Failed to queue ABTS.\n");
+}
+
+static void qedf_flush_els_req(struct qedf_ctx *qedf,
+	struct qedf_ioreq *els_req)
+{
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO,
+	    "Flushing ELS request xid=0x%x refcount=%d.\n", els_req->xid,
+	    atomic_read(&els_req->refcount.refcount));
+
+	/*
+	 * Need to distinguish this from a timeout when calling the
+	 * els_req->cb_func.
+	 */
+	els_req->event = QEDF_IOREQ_EV_ELS_FLUSH;
+
+	/* Cancel the timer */
+	cancel_delayed_work_sync(&els_req->timeout_work);
+
+	/* Call callback function to complete command */
+	if (els_req->cb_func && els_req->cb_arg) {
+		els_req->cb_func(els_req->cb_arg);
+		els_req->cb_arg = NULL;
+	}
+
+	/* Release kref for original initiate_els */
+	kref_put(&els_req->refcount, qedf_release_cmd);
+}
+
+/* A value of -1 for lun is a wild card that means flush all
+ * active SCSI I/Os for the target.
+ */
+void qedf_flush_active_ios(struct qedf_rport *fcport, int lun)
+{
+	struct qedf_ioreq *io_req;
+	struct qedf_ctx *qedf;
+	struct qedf_cmd_mgr *cmd_mgr;
+	int i, rc;
+
+	if (!fcport)
+		return;
+
+	qedf = fcport->qedf;
+	cmd_mgr = qedf->cmd_mgr;
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO, "Flush active i/o's.\n");
+
+	for (i = 0; i < FCOE_PARAMS_NUM_TASKS; i++) {
+		io_req = &cmd_mgr->cmds[i];
+
+		if (!io_req)
+			continue;
+		if (io_req->fcport != fcport)
+			continue;
+		if (io_req->cmd_type == QEDF_ELS) {
+			rc = kref_get_unless_zero(&io_req->refcount);
+			if (!rc) {
+				QEDF_ERR(&(qedf->dbg_ctx),
+				    "Could not get kref for io_req=0x%p.\n",
+				    io_req);
+				continue;
+			}
+			qedf_flush_els_req(qedf, io_req);
+			/*
+			 * Release the kref and go back to the top of the
+			 * loop.
+			 */
+			goto free_cmd;
+		}
+
+		if (!io_req->sc_cmd)
+			continue;
+		if (lun > 0) {
+			if (io_req->sc_cmd->device->lun !=
+			    (u64)lun)
+				continue;
+		}
+
+		/*
+		 * Use kref_get_unless_zero in the unlikely case the command
+		 * we're about to flush was completed in the normal SCSI path
+		 */
+		rc = kref_get_unless_zero(&io_req->refcount);
+		if (!rc) {
+			QEDF_ERR(&(qedf->dbg_ctx), "Could not get kref for "
+			    "io_req=0x%p\n", io_req);
+			continue;
+		}
+		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO,
+		    "Cleanup xid=0x%x.\n", io_req->xid);
+
+		/* Cleanup task and return I/O mid-layer */
+		qedf_initiate_cleanup(io_req, true);
+
+free_cmd:
+		kref_put(&io_req->refcount, qedf_release_cmd);
+	}
+}
+
+/*
+ * Initiate a ABTS middle path command. Note that we don't have to initialize
+ * the task context for an ABTS task.
+ */
+int qedf_initiate_abts(struct qedf_ioreq *io_req, bool return_scsi_cmd_on_abts)
+{
+	struct fc_lport *lport;
+	struct qedf_rport *fcport = io_req->fcport;
+	struct fc_rport_priv *rdata = fcport->rdata;
+	struct qedf_ctx *qedf = fcport->qedf;
+	u16 xid;
+	u32 r_a_tov = 0;
+	int rc = 0;
+	unsigned long flags;
+
+	r_a_tov = rdata->r_a_tov;
+	lport = qedf->lport;
+
+	if (!test_bit(QEDF_RPORT_SESSION_READY, &fcport->flags)) {
+		QEDF_ERR(&(qedf->dbg_ctx), "tgt not offloaded\n");
+		rc = 1;
+		goto abts_err;
+	}
+
+	if (lport->state != LPORT_ST_READY || !(lport->link_up)) {
+		QEDF_ERR(&(qedf->dbg_ctx), "link is not ready\n");
+		rc = 1;
+		goto abts_err;
+	}
+
+	if (atomic_read(&qedf->link_down_tmo_valid) > 0) {
+		QEDF_ERR(&(qedf->dbg_ctx), "link_down_tmo active.\n");
+		rc = 1;
+		goto abts_err;
+	}
+
+	/* Ensure room on SQ */
+	if (!atomic_read(&fcport->free_sqes)) {
+		QEDF_ERR(&(qedf->dbg_ctx), "No SQ entries available\n");
+		rc = 1;
+		goto abts_err;
+	}
+
+
+	kref_get(&io_req->refcount);
+
+	xid = io_req->xid;
+	qedf->control_requests++;
+	qedf->packet_aborts++;
+
+	/* Set the return CPU to be the same as the request one */
+	io_req->cpu = smp_processor_id();
+
+	/* Set the command type to abort */
+	io_req->cmd_type = QEDF_ABTS;
+	io_req->return_scsi_cmd_on_abts = return_scsi_cmd_on_abts;
+
+	set_bit(QEDF_CMD_IN_ABORT, &io_req->flags);
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_SCSI_TM, "ABTS io_req xid = "
+		   "0x%x\n", xid);
+
+	qedf_cmd_timer_set(qedf, io_req, QEDF_ABORT_TIMEOUT * HZ);
+
+	spin_lock_irqsave(&fcport->rport_lock, flags);
+
+	/* Add ABTS to send queue */
+	qedf_add_to_sq(fcport, xid, 0, FCOE_TASK_TYPE_ABTS, 0);
+
+	/* Ring doorbell */
+	qedf_ring_doorbell(fcport);
+
+	spin_unlock_irqrestore(&fcport->rport_lock, flags);
+
+	return rc;
+abts_err:
+	/*
+	 * If the ABTS task fails to queue then we need to cleanup the
+	 * task at the firmware.
+	 */
+	qedf_initiate_cleanup(io_req, return_scsi_cmd_on_abts);
+	return rc;
+}
+
+void qedf_process_abts_compl(struct qedf_ctx *qedf, struct fcoe_cqe *cqe,
+	struct qedf_ioreq *io_req)
+{
+	uint32_t r_ctl;
+	uint16_t xid;
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_SCSI_TM, "Entered with xid = "
+		   "0x%x cmd_type = %d\n", io_req->xid, io_req->cmd_type);
+
+	cancel_delayed_work(&io_req->timeout_work);
+
+	xid = io_req->xid;
+	r_ctl = cqe->cqe_info.abts_info.r_ctl;
+
+	switch (r_ctl) {
+	case FC_RCTL_BA_ACC:
+		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_SCSI_TM,
+		    "ABTS response - ACC Send RRQ after R_A_TOV\n");
+		io_req->event = QEDF_IOREQ_EV_ABORT_SUCCESS;
+		/*
+		 * Dont release this cmd yet. It will be relesed
+		 * after we get RRQ response
+		 */
+		kref_get(&io_req->refcount);
+		queue_delayed_work(qedf->dpc_wq, &io_req->rrq_work,
+		    msecs_to_jiffies(qedf->lport->r_a_tov));
+		break;
+	/* For error cases let the cleanup return the command */
+	case FC_RCTL_BA_RJT:
+		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_SCSI_TM,
+		   "ABTS response - RJT\n");
+		io_req->event = QEDF_IOREQ_EV_ABORT_FAILED;
+		break;
+	default:
+		QEDF_ERR(&(qedf->dbg_ctx), "Unknown ABTS response\n");
+		break;
+	}
+
+	clear_bit(QEDF_CMD_IN_ABORT, &io_req->flags);
+
+	if (io_req->sc_cmd) {
+		if (io_req->return_scsi_cmd_on_abts)
+			qedf_scsi_done(qedf, io_req, DID_ERROR);
+	}
+
+	/* Notify eh_abort handler that ABTS is complete */
+	complete(&io_req->abts_done);
+
+	kref_put(&io_req->refcount, qedf_release_cmd);
+}
+
+int qedf_init_mp_req(struct qedf_ioreq *io_req)
+{
+	struct qedf_mp_req *mp_req;
+	struct fcoe_sge *mp_req_bd;
+	struct fcoe_sge *mp_resp_bd;
+	struct qedf_ctx *qedf = io_req->fcport->qedf;
+	dma_addr_t addr;
+	uint64_t sz;
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_MP_REQ, "Entered.\n");
+
+	mp_req = (struct qedf_mp_req *)&(io_req->mp_req);
+	memset(mp_req, 0, sizeof(struct qedf_mp_req));
+
+	if (io_req->cmd_type != QEDF_ELS) {
+		mp_req->req_len = sizeof(struct fcp_cmnd);
+		io_req->data_xfer_len = mp_req->req_len;
+	} else
+		mp_req->req_len = io_req->data_xfer_len;
+
+	mp_req->req_buf = dma_alloc_coherent(&qedf->pdev->dev, QEDF_PAGE_SIZE,
+	    &mp_req->req_buf_dma, GFP_KERNEL);
+	if (!mp_req->req_buf) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Unable to alloc MP req buffer\n");
+		qedf_free_mp_resc(io_req);
+		return -ENOMEM;
+	}
+
+	mp_req->resp_buf = dma_alloc_coherent(&qedf->pdev->dev,
+	    QEDF_PAGE_SIZE, &mp_req->resp_buf_dma, GFP_KERNEL);
+	if (!mp_req->resp_buf) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Unable to alloc TM resp "
+			  "buffer\n");
+		qedf_free_mp_resc(io_req);
+		return -ENOMEM;
+	}
+
+	/* Allocate and map mp_req_bd and mp_resp_bd */
+	sz = sizeof(struct fcoe_sge);
+	mp_req->mp_req_bd = dma_alloc_coherent(&qedf->pdev->dev, sz,
+	    &mp_req->mp_req_bd_dma, GFP_KERNEL);
+	if (!mp_req->mp_req_bd) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Unable to alloc MP req bd\n");
+		qedf_free_mp_resc(io_req);
+		return -ENOMEM;
+	}
+
+	mp_req->mp_resp_bd = dma_alloc_coherent(&qedf->pdev->dev, sz,
+	    &mp_req->mp_resp_bd_dma, GFP_KERNEL);
+	if (!mp_req->mp_resp_bd) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Unable to alloc MP resp bd\n");
+		qedf_free_mp_resc(io_req);
+		return -ENOMEM;
+	}
+
+	/* Fill bd table */
+	addr = mp_req->req_buf_dma;
+	mp_req_bd = mp_req->mp_req_bd;
+	mp_req_bd->sge_addr.lo = U64_LO(addr);
+	mp_req_bd->sge_addr.hi = U64_HI(addr);
+	mp_req_bd->size = QEDF_PAGE_SIZE;
+
+	/*
+	 * MP buffer is either a task mgmt command or an ELS.
+	 * So the assumption is that it consumes a single bd
+	 * entry in the bd table
+	 */
+	mp_resp_bd = mp_req->mp_resp_bd;
+	addr = mp_req->resp_buf_dma;
+	mp_resp_bd->sge_addr.lo = U64_LO(addr);
+	mp_resp_bd->sge_addr.hi = U64_HI(addr);
+	mp_resp_bd->size = QEDF_PAGE_SIZE;
+
+	return 0;
+}
+
+/*
+ * Last ditch effort to clear the port if it's stuck. Used only after a
+ * cleanup task times out.
+ */
+static void qedf_drain_request(struct qedf_ctx *qedf)
+{
+	if (test_bit(QEDF_DRAIN_ACTIVE, &qedf->flags)) {
+		QEDF_ERR(&(qedf->dbg_ctx), "MCP drain already active.\n");
+		return;
+	}
+
+	/* Set bit to return all queuecommand requests as busy */
+	set_bit(QEDF_DRAIN_ACTIVE, &qedf->flags);
+
+	/* Call qed drain request for function. Should be synchronous */
+	qed_ops->common->drain(qedf->cdev);
+
+	/* Settle time for CQEs to be returned */
+	msleep(100);
+
+	/* Unplug and continue */
+	clear_bit(QEDF_DRAIN_ACTIVE, &qedf->flags);
+}
+
+/*
+ * Returns SUCCESS if the cleanup task does not timeout, otherwise return
+ * FAILURE.
+ */
+int qedf_initiate_cleanup(struct qedf_ioreq *io_req,
+	bool return_scsi_cmd_on_abts)
+{
+	struct qedf_rport *fcport;
+	struct qedf_ctx *qedf;
+	uint16_t xid;
+	struct fcoe_task_context *task;
+	int tmo = 0;
+	int rc = SUCCESS;
+	unsigned long flags;
+
+	fcport = io_req->fcport;
+	if (!fcport) {
+		QEDF_ERR(NULL, "fcport is NULL.\n");
+		return SUCCESS;
+	}
+
+	qedf = fcport->qedf;
+	if (!qedf) {
+		QEDF_ERR(NULL, "qedf is NULL.\n");
+		return SUCCESS;
+	}
+
+	if (!test_bit(QEDF_CMD_OUTSTANDING, &io_req->flags) ||
+	    test_bit(QEDF_CMD_IN_CLEANUP, &io_req->flags)) {
+		QEDF_ERR(&(qedf->dbg_ctx), "io_req xid=0x%x already in "
+			  "cleanup processing or already completed.\n",
+			  io_req->xid);
+		return SUCCESS;
+	}
+
+	/* Ensure room on SQ */
+	if (!atomic_read(&fcport->free_sqes)) {
+		QEDF_ERR(&(qedf->dbg_ctx), "No SQ entries available\n");
+		return FAILED;
+	}
+
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO, "Entered xid=0x%x\n",
+	    io_req->xid);
+
+	/* Cleanup cmds re-use the same TID as the original I/O */
+	xid = io_req->xid;
+	io_req->cmd_type = QEDF_CLEANUP;
+	io_req->return_scsi_cmd_on_abts = return_scsi_cmd_on_abts;
+
+	/* Set the return CPU to be the same as the request one */
+	io_req->cpu = smp_processor_id();
+
+	set_bit(QEDF_CMD_IN_CLEANUP, &io_req->flags);
+
+	task = qedf_get_task_mem(&qedf->tasks, xid);
+
+	init_completion(&io_req->tm_done);
+
+	/* Obtain free SQ entry */
+	spin_lock_irqsave(&fcport->rport_lock, flags);
+	qedf_add_to_sq(fcport, xid, 0, FCOE_TASK_TYPE_EXCHANGE_CLEANUP, 0);
+
+	/* Ring doorbell */
+	qedf_ring_doorbell(fcport);
+	spin_unlock_irqrestore(&fcport->rport_lock, flags);
+
+	tmo = wait_for_completion_timeout(&io_req->tm_done,
+	    QEDF_CLEANUP_TIMEOUT * HZ);
+
+	if (!tmo) {
+		rc = FAILED;
+		/* Timeout case */
+		QEDF_ERR(&(qedf->dbg_ctx), "Cleanup command timeout, "
+			  "xid=%x.\n", io_req->xid);
+		clear_bit(QEDF_CMD_IN_CLEANUP, &io_req->flags);
+		/* Issue a drain request if cleanup task times out */
+		QEDF_ERR(&(qedf->dbg_ctx), "Issuing MCP drain request.\n");
+		qedf_drain_request(qedf);
+	}
+
+	if (io_req->sc_cmd) {
+		if (io_req->return_scsi_cmd_on_abts)
+			qedf_scsi_done(qedf, io_req, DID_ERROR);
+	}
+
+	if (rc == SUCCESS)
+		io_req->event = QEDF_IOREQ_EV_CLEANUP_SUCCESS;
+	else
+		io_req->event = QEDF_IOREQ_EV_CLEANUP_FAILED;
+
+	return rc;
+}
+
+void qedf_process_cleanup_compl(struct qedf_ctx *qedf, struct fcoe_cqe *cqe,
+	struct qedf_ioreq *io_req)
+{
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO, "Entered xid = 0x%x\n",
+		   io_req->xid);
+
+	clear_bit(QEDF_CMD_IN_CLEANUP, &io_req->flags);
+
+	/* Complete so we can finish cleaning up the I/O */
+	complete(&io_req->tm_done);
+}
+
+static int qedf_execute_tmf(struct qedf_rport *fcport, struct scsi_cmnd *sc_cmd,
+	uint8_t tm_flags)
+{
+	struct qedf_ioreq *io_req;
+	struct qedf_mp_req *tm_req;
+	struct fcoe_task_context *task;
+	struct fc_frame_header *fc_hdr;
+	struct fcp_cmnd *fcp_cmnd;
+	struct qedf_ctx *qedf = fcport->qedf;
+	int rc = 0;
+	uint16_t xid;
+	uint32_t sid, did;
+	int tmo = 0;
+	unsigned long flags;
+
+	if (!sc_cmd) {
+		QEDF_ERR(&(qedf->dbg_ctx), "invalid arg\n");
+		return FAILED;
+	}
+
+	if (!(test_bit(QEDF_RPORT_SESSION_READY, &fcport->flags))) {
+		QEDF_ERR(&(qedf->dbg_ctx), "fcport not offloaded\n");
+		rc = FAILED;
+		return FAILED;
+	}
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_SCSI_TM, "portid = 0x%x "
+		   "tm_flags = %d\n", fcport->rdata->ids.port_id, tm_flags);
+
+	io_req = qedf_alloc_cmd(fcport, QEDF_TASK_MGMT_CMD);
+	if (!io_req) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Failed TMF");
+		rc = -EAGAIN;
+		goto reset_tmf_err;
+	}
+
+	/* Initialize rest of io_req fields */
+	io_req->sc_cmd = sc_cmd;
+	io_req->fcport = fcport;
+	io_req->cmd_type = QEDF_TASK_MGMT_CMD;
+
+	/* Set the return CPU to be the same as the request one */
+	io_req->cpu = smp_processor_id();
+
+	tm_req = (struct qedf_mp_req *)&(io_req->mp_req);
+
+	rc = qedf_init_mp_req(io_req);
+	if (rc == FAILED) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Task mgmt MP request init "
+			  "failed\n");
+		kref_put(&io_req->refcount, qedf_release_cmd);
+		goto reset_tmf_err;
+	}
+
+	/* Set TM flags */
+	io_req->io_req_flags = 0;
+	tm_req->tm_flags = tm_flags;
+
+	/* Default is to return a SCSI command when an error occurs */
+	io_req->return_scsi_cmd_on_abts = true;
+
+	/* Fill FCP_CMND */
+	qedf_build_fcp_cmnd(io_req, (struct fcp_cmnd *)tm_req->req_buf);
+	fcp_cmnd = (struct fcp_cmnd *)tm_req->req_buf;
+	memset(fcp_cmnd->fc_cdb, 0, FCP_CMND_LEN);
+	fcp_cmnd->fc_dl = 0;
+
+	/* Fill FC header */
+	fc_hdr = &(tm_req->req_fc_hdr);
+	sid = fcport->sid;
+	did = fcport->rdata->ids.port_id;
+	__fc_fill_fc_hdr(fc_hdr, FC_RCTL_DD_UNSOL_CMD, sid, did,
+			   FC_TYPE_FCP, FC_FC_FIRST_SEQ | FC_FC_END_SEQ |
+			   FC_FC_SEQ_INIT, 0);
+	/* Obtain exchange id */
+	xid = io_req->xid;
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_SCSI_TM, "TMF io_req xid = "
+		   "0x%x\n", xid);
+
+	/* Initialize task context for this IO request */
+	task = qedf_get_task_mem(&qedf->tasks, xid);
+	qedf_init_mp_task(io_req, task);
+
+	init_completion(&io_req->tm_done);
+
+	/* Obtain free SQ entry */
+	spin_lock_irqsave(&fcport->rport_lock, flags);
+	qedf_add_to_sq(fcport, xid, 0, FCOE_TASK_TYPE_MIDPATH, 0);
+
+	/* Ring doorbell */
+	qedf_ring_doorbell(fcport);
+	spin_unlock_irqrestore(&fcport->rport_lock, flags);
+
+	tmo = wait_for_completion_timeout(&io_req->tm_done,
+	    QEDF_TM_TIMEOUT * HZ);
+
+	if (!tmo) {
+		rc = FAILED;
+		QEDF_ERR(&(qedf->dbg_ctx), "wait for tm_cmpl timeout!\n");
+	} else {
+		/* Check TMF response code */
+		if (io_req->fcp_rsp_code == 0)
+			rc = SUCCESS;
+		else
+			rc = FAILED;
+	}
+
+	if (tm_flags == FCP_TMF_LUN_RESET)
+		qedf_flush_active_ios(fcport, (int)sc_cmd->device->lun);
+	else
+		qedf_flush_active_ios(fcport, -1);
+
+	kref_put(&io_req->refcount, qedf_release_cmd);
+
+	if (rc != SUCCESS) {
+		QEDF_ERR(&(qedf->dbg_ctx), "task mgmt command failed...\n");
+		rc = FAILED;
+	} else {
+		QEDF_ERR(&(qedf->dbg_ctx), "task mgmt command success...\n");
+		rc = SUCCESS;
+	}
+reset_tmf_err:
+	return rc;
+}
+
+int qedf_initiate_tmf(struct scsi_cmnd *sc_cmd, u8 tm_flags)
+{
+	struct fc_rport *rport = starget_to_rport(scsi_target(sc_cmd->device));
+	struct fc_rport_libfc_priv *rp = rport->dd_data;
+	struct qedf_rport *fcport = (struct qedf_rport *)&rp[1];
+	struct qedf_ctx *qedf;
+	struct fc_lport *lport;
+	int rc = SUCCESS;
+	int rval;
+
+	rval = fc_remote_port_chkready(rport);
+
+	if (rval) {
+		QEDF_ERR(NULL, "device_reset rport not ready\n");
+		rc = FAILED;
+		goto tmf_err;
+	}
+
+	if (fcport == NULL) {
+		QEDF_ERR(NULL, "device_reset: rport is NULL\n");
+		rc = FAILED;
+		goto tmf_err;
+	}
+
+	qedf = fcport->qedf;
+	lport = qedf->lport;
+
+	if (test_bit(QEDF_UNLOADING, &qedf->flags) ||
+	    test_bit(QEDF_DBG_STOP_IO, &qedf->flags)) {
+		rc = SUCCESS;
+		goto tmf_err;
+	}
+
+	if (lport->state != LPORT_ST_READY || !(lport->link_up)) {
+		QEDF_ERR(&(qedf->dbg_ctx), "link is not ready\n");
+		rc = FAILED;
+		goto tmf_err;
+	}
+
+	rc = qedf_execute_tmf(fcport, sc_cmd, tm_flags);
+
+tmf_err:
+	return rc;
+}
+
+void qedf_process_tmf_compl(struct qedf_ctx *qedf, struct fcoe_cqe *cqe,
+	struct qedf_ioreq *io_req)
+{
+	struct fcoe_cqe_rsp_info *fcp_rsp;
+	struct fcoe_cqe_midpath_info *mp_info;
+
+
+	/* Get TMF response length from CQE */
+	mp_info = &cqe->cqe_info.midpath_info;
+	io_req->mp_req.resp_len = mp_info->data_placement_size;
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_SCSI_TM,
+	    "Response len is %d.\n", io_req->mp_req.resp_len);
+
+	fcp_rsp = &cqe->cqe_info.rsp_info;
+	qedf_parse_fcp_rsp(io_req, fcp_rsp);
+
+	io_req->sc_cmd = NULL;
+	complete(&io_req->tm_done);
+}
+
+void qedf_process_unsol_compl(struct qedf_ctx *qedf, uint16_t que_idx,
+	struct fcoe_cqe *cqe)
+{
+	unsigned long flags;
+	uint16_t tmp;
+	uint16_t pktlen = cqe->cqe_info.unsolic_info.pkt_len;
+	u32 payload_len, crc;
+	struct fc_frame_header *fh;
+	struct fc_frame *fp;
+	struct qedf_io_work *io_work;
+	u32 bdq_idx;
+	void *bdq_addr;
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_UNSOL,
+	    "address.hi=%x address.lo=%x opaque_data.hi=%x "
+	    "opaque_data.lo=%x bdq_prod_idx=%u len=%u.\n",
+	    le32_to_cpu(cqe->cqe_info.unsolic_info.bd_info.address.hi),
+	    le32_to_cpu(cqe->cqe_info.unsolic_info.bd_info.address.lo),
+	    le32_to_cpu(cqe->cqe_info.unsolic_info.bd_info.opaque.hi),
+	    le32_to_cpu(cqe->cqe_info.unsolic_info.bd_info.opaque.lo),
+	    qedf->bdq_prod_idx, pktlen);
+
+	bdq_idx = le32_to_cpu(cqe->cqe_info.unsolic_info.bd_info.opaque.lo);
+	if (bdq_idx >= QEDF_BDQ_SIZE) {
+		QEDF_ERR(&(qedf->dbg_ctx), "bdq_idx is out of range %d.\n",
+		    bdq_idx);
+		goto increment_prod;
+	}
+
+	bdq_addr = qedf->bdq[bdq_idx].buf_addr;
+	if (!bdq_addr) {
+		QEDF_ERR(&(qedf->dbg_ctx), "bdq_addr is NULL, dropping "
+		    "unsolicited packet.\n");
+		goto increment_prod;
+	}
+
+	if (qedf_dump_frames) {
+		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_UNSOL,
+		    "BDQ frame is at addr=%p.\n", bdq_addr);
+		print_hex_dump(KERN_WARNING, "bdq ", DUMP_PREFIX_OFFSET, 16, 1,
+		    (void *)bdq_addr, pktlen, false);
+	}
+
+	/* Allocate frame */
+	payload_len = pktlen - sizeof(struct fc_frame_header);
+	fp = fc_frame_alloc(qedf->lport, payload_len);
+	if (!fp) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Could not allocate fp.\n");
+		goto increment_prod;
+	}
+
+	/* Copy data from BDQ buffer into fc_frame struct */
+	fh = (struct fc_frame_header *)fc_frame_header_get(fp);
+	memcpy(fh, (void *)bdq_addr, pktlen);
+
+	/* Initialize the frame so libfc sees it as a valid frame */
+	crc = fcoe_fc_crc(fp);
+	fc_frame_init(fp);
+	fr_dev(fp) = qedf->lport;
+	fr_sof(fp) = FC_SOF_I3;
+	fr_eof(fp) = FC_EOF_T;
+	fr_crc(fp) = cpu_to_le32(~crc);
+
+	/*
+	 * We need to return the frame back up to libfc in a non-atomic
+	 * context
+	 */
+	io_work = mempool_alloc(qedf->io_mempool, GFP_ATOMIC);
+	if (!io_work) {
+		QEDF_WARN(&(qedf->dbg_ctx), "Could not allocate "
+			   "work for I/O completion.\n");
+		fc_frame_free(fp);
+		goto increment_prod;
+	}
+	memset(io_work, 0, sizeof(struct qedf_io_work));
+
+	INIT_WORK(&io_work->work, qedf_fp_io_handler);
+
+	/* Copy contents of CQE for deferred processing */
+	memcpy(&io_work->cqe, cqe, sizeof(struct fcoe_cqe));
+
+	io_work->qedf = qedf;
+	io_work->fp = fp;
+
+	queue_work_on(smp_processor_id(), qedf_io_wq, &io_work->work);
+increment_prod:
+	spin_lock_irqsave(&qedf->hba_lock, flags);
+
+	/* Increment producer to let f/w know we've handled the frame */
+	qedf->bdq_prod_idx++;
+
+	/* Producer index wraps at uint16_t boundary */
+	if (qedf->bdq_prod_idx == 0xffff)
+		qedf->bdq_prod_idx = 0;
+
+	writew(qedf->bdq_prod_idx, qedf->bdq_primary_prod);
+	tmp = readw(qedf->bdq_primary_prod);
+	writew(qedf->bdq_prod_idx, qedf->bdq_secondary_prod);
+	tmp = readw(qedf->bdq_secondary_prod);
+
+	spin_unlock_irqrestore(&qedf->hba_lock, flags);
+}
diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c
new file mode 100644
index 00000000000000..d9d7a86b5f8baf
--- /dev/null
+++ b/drivers/scsi/qedf/qedf_main.c
@@ -0,0 +1,3336 @@
+/*
+ *  QLogic FCoE Offload Driver
+ *  Copyright (c) 2016 Cavium Inc.
+ *
+ *  This software is available under the terms of the GNU General Public License
+ *  (GPL) Version 2, available from the file COPYING in the main directory of
+ *  this source tree.
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/device.h>
+#include <linux/highmem.h>
+#include <linux/crc32.h>
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/kthread.h>
+#include <scsi/libfc.h>
+#include <scsi/scsi_host.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <linux/cpu.h>
+#include "qedf.h"
+
+const struct qed_fcoe_ops *qed_ops;
+
+static int qedf_probe(struct pci_dev *pdev, const struct pci_device_id *id);
+static void qedf_remove(struct pci_dev *pdev);
+
+extern struct qedf_debugfs_ops qedf_debugfs_ops;
+extern struct file_operations qedf_dbg_fops;
+
+/*
+ * Driver module parameters.
+ */
+static unsigned int qedf_dev_loss_tmo = 60;
+module_param_named(dev_loss_tmo, qedf_dev_loss_tmo, int, S_IRUGO);
+MODULE_PARM_DESC(dev_loss_tmo,  " dev_loss_tmo setting for attached "
+	"remote ports (default 60)");
+
+uint qedf_debug = QEDF_LOG_INFO;
+module_param_named(debug, qedf_debug, uint, S_IRUGO);
+MODULE_PARM_DESC(qedf_debug, " Debug mask. Pass '1' to enable default debugging"
+	" mask");
+
+static uint qedf_fipvlan_retries = 30;
+module_param_named(fipvlan_retries, qedf_fipvlan_retries, int, S_IRUGO);
+MODULE_PARM_DESC(fipvlan_retries, " Number of FIP VLAN requests to attempt "
+	"before giving up (default 30)");
+
+static uint qedf_fallback_vlan = QEDF_FALLBACK_VLAN;
+module_param_named(fallback_vlan, qedf_fallback_vlan, int, S_IRUGO);
+MODULE_PARM_DESC(fallback_vlan, " VLAN ID to try if fip vlan request fails "
+	"(default 1002).");
+
+static uint qedf_default_prio = QEDF_DEFAULT_PRIO;
+module_param_named(default_prio, qedf_default_prio, int, S_IRUGO);
+MODULE_PARM_DESC(default_prio, " Default 802.1q priority for FIP and FCoE"
+	" traffic (default 3).");
+
+uint qedf_dump_frames;
+module_param_named(dump_frames, qedf_dump_frames, int, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(dump_frames, " Print the skb data of FIP and FCoE frames "
+	"(default off)");
+
+static uint qedf_queue_depth;
+module_param_named(queue_depth, qedf_queue_depth, int, S_IRUGO);
+MODULE_PARM_DESC(queue_depth, " Sets the queue depth for all LUNs discovered "
+	"by the qedf driver. Default is 0 (use OS default).");
+
+uint qedf_io_tracing;
+module_param_named(io_tracing, qedf_io_tracing, int, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(io_tracing, " Enable logging of SCSI requests/completions "
+	"into trace buffer. (default off).");
+
+static uint qedf_max_lun = MAX_FIBRE_LUNS;
+module_param_named(max_lun, qedf_max_lun, int, S_IRUGO);
+MODULE_PARM_DESC(max_lun, " Sets the maximum luns per target that the driver "
+	"supports. (default 0xffffffff)");
+
+uint qedf_link_down_tmo;
+module_param_named(link_down_tmo, qedf_link_down_tmo, int, S_IRUGO);
+MODULE_PARM_DESC(link_down_tmo, " Delays informing the fcoe transport that the "
+	"link is down by N seconds.");
+
+bool qedf_retry_delay;
+module_param_named(retry_delay, qedf_retry_delay, bool, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(retry_delay, " Enable/disable handling of FCP_RSP IU retry "
+	"delay handling (default off).");
+
+static uint qedf_dp_module;
+module_param_named(dp_module, qedf_dp_module, uint, S_IRUGO);
+MODULE_PARM_DESC(dp_module, " bit flags control for verbose printk passed "
+	"qed module during probe.");
+
+static uint qedf_dp_level;
+module_param_named(dp_level, qedf_dp_level, uint, S_IRUGO);
+MODULE_PARM_DESC(dp_level, " printk verbosity control passed to qed module  "
+	"during probe (0-3: 0 more verbose).");
+
+struct workqueue_struct *qedf_io_wq;
+
+static struct fcoe_percpu_s qedf_global;
+static DEFINE_SPINLOCK(qedf_global_lock);
+
+static struct kmem_cache *qedf_io_work_cache;
+
+void qedf_set_vlan_id(struct qedf_ctx *qedf, int vlan_id)
+{
+	qedf->vlan_id = vlan_id;
+	qedf->vlan_id |= qedf_default_prio << VLAN_PRIO_SHIFT;
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC, "Setting vlan_id=%04x "
+		   "prio=%d.\n", vlan_id, qedf_default_prio);
+}
+
+/* Returns true if we have a valid vlan, false otherwise */
+static bool qedf_initiate_fipvlan_req(struct qedf_ctx *qedf)
+{
+	int rc;
+
+	if (atomic_read(&qedf->link_state) != QEDF_LINK_UP) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Link not up.\n");
+		return  false;
+	}
+
+	while (qedf->fipvlan_retries--) {
+		if (qedf->vlan_id > 0)
+			return true;
+		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC,
+			   "Retry %d.\n", qedf->fipvlan_retries);
+		init_completion(&qedf->fipvlan_compl);
+		qedf_fcoe_send_vlan_req(qedf);
+		rc = wait_for_completion_timeout(&qedf->fipvlan_compl,
+		    1 * HZ);
+		if (rc > 0) {
+			fcoe_ctlr_link_up(&qedf->ctlr);
+			return true;
+		}
+	}
+
+	return false;
+}
+
+static void qedf_handle_link_update(struct work_struct *work)
+{
+	struct qedf_ctx *qedf =
+	    container_of(work, struct qedf_ctx, link_update.work);
+	int rc;
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC, "Entered.\n");
+
+	if (atomic_read(&qedf->link_state) == QEDF_LINK_UP) {
+		rc = qedf_initiate_fipvlan_req(qedf);
+		if (rc)
+			return;
+		/*
+		 * If we get here then we never received a repsonse to our
+		 * fip vlan request so set the vlan_id to the default and
+		 * tell FCoE that the link is up
+		 */
+		QEDF_WARN(&(qedf->dbg_ctx), "Did not receive FIP VLAN "
+			   "response, falling back to default VLAN %d.\n",
+			   qedf_fallback_vlan);
+		qedf_set_vlan_id(qedf, QEDF_FALLBACK_VLAN);
+
+		/*
+		 * Zero out data_src_addr so we'll update it with the new
+		 * lport port_id
+		 */
+		eth_zero_addr(qedf->data_src_addr);
+		fcoe_ctlr_link_up(&qedf->ctlr);
+	} else if (atomic_read(&qedf->link_state) == QEDF_LINK_DOWN) {
+		/*
+		 * If we hit here and link_down_tmo_valid is still 1 it means
+		 * that link_down_tmo timed out so set it to 0 to make sure any
+		 * other readers have accurate state.
+		 */
+		atomic_set(&qedf->link_down_tmo_valid, 0);
+		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC,
+		    "Calling fcoe_ctlr_link_down().\n");
+		fcoe_ctlr_link_down(&qedf->ctlr);
+		qedf_wait_for_upload(qedf);
+		/* Reset the number of FIP VLAN retries */
+		qedf->fipvlan_retries = qedf_fipvlan_retries;
+	}
+}
+
+static void qedf_flogi_resp(struct fc_seq *seq, struct fc_frame *fp,
+	void *arg)
+{
+	struct fc_exch *exch = fc_seq_exch(seq);
+	struct fc_lport *lport = exch->lp;
+	struct qedf_ctx *qedf = lport_priv(lport);
+
+	if (!qedf) {
+		QEDF_ERR(NULL, "qedf is NULL.\n");
+		return;
+	}
+
+	/*
+	 * If ERR_PTR is set then don't try to stat anything as it will cause
+	 * a crash when we access fp.
+	 */
+	if (IS_ERR(fp)) {
+		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS,
+		    "fp has IS_ERR() set.\n");
+		goto skip_stat;
+	}
+
+	/* Log stats for FLOGI reject */
+	if (fc_frame_payload_op(fp) == ELS_LS_RJT)
+		qedf->flogi_failed++;
+
+	/* Complete flogi_compl so we can proceed to sending ADISCs */
+	complete(&qedf->flogi_compl);
+
+skip_stat:
+	/* Report response to libfc */
+	fc_lport_flogi_resp(seq, fp, lport);
+}
+
+static struct fc_seq *qedf_elsct_send(struct fc_lport *lport, u32 did,
+	struct fc_frame *fp, unsigned int op,
+	void (*resp)(struct fc_seq *,
+	struct fc_frame *,
+	void *),
+	void *arg, u32 timeout)
+{
+	struct qedf_ctx *qedf = lport_priv(lport);
+
+	/*
+	 * Intercept FLOGI for statistic purposes. Note we use the resp
+	 * callback to tell if this is really a flogi.
+	 */
+	if (resp == fc_lport_flogi_resp) {
+		qedf->flogi_cnt++;
+		return fc_elsct_send(lport, did, fp, op, qedf_flogi_resp,
+		    arg, timeout);
+	}
+
+	return fc_elsct_send(lport, did, fp, op, resp, arg, timeout);
+}
+
+int qedf_send_flogi(struct qedf_ctx *qedf)
+{
+	struct fc_lport *lport;
+	struct fc_frame *fp;
+
+	lport = qedf->lport;
+
+	if (!lport->tt.elsct_send)
+		return -EINVAL;
+
+	fp = fc_frame_alloc(lport, sizeof(struct fc_els_flogi));
+	if (!fp) {
+		QEDF_ERR(&(qedf->dbg_ctx), "fc_frame_alloc failed.\n");
+		return -ENOMEM;
+	}
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS,
+	    "Sending FLOGI to reestablish session with switch.\n");
+	lport->tt.elsct_send(lport, FC_FID_FLOGI, fp,
+	    ELS_FLOGI, qedf_flogi_resp, lport, lport->r_a_tov);
+
+	init_completion(&qedf->flogi_compl);
+
+	return 0;
+}
+
+struct qedf_tmp_rdata_item {
+	struct fc_rport_priv *rdata;
+	struct list_head list;
+};
+
+/*
+ * This function is called if link_down_tmo is in use.  If we get a link up and
+ * link_down_tmo has not expired then use just FLOGI/ADISC to recover our
+ * sessions with targets.  Otherwise, just call fcoe_ctlr_link_up().
+ */
+static void qedf_link_recovery(struct work_struct *work)
+{
+	struct qedf_ctx *qedf =
+	    container_of(work, struct qedf_ctx, link_recovery.work);
+	struct qedf_rport *fcport;
+	struct fc_rport_priv *rdata;
+	struct qedf_tmp_rdata_item *rdata_item, *tmp_rdata_item;
+	bool rc;
+	int retries = 30;
+	int rval, i;
+	struct list_head rdata_login_list;
+
+	INIT_LIST_HEAD(&rdata_login_list);
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC,
+	    "Link down tmo did not expire.\n");
+
+	/*
+	 * Essentially reset the fcoe_ctlr here without affecting the state
+	 * of the libfc structs.
+	 */
+	qedf->ctlr.state = FIP_ST_LINK_WAIT;
+	fcoe_ctlr_link_down(&qedf->ctlr);
+
+	/*
+	 * Bring the link up before we send the fipvlan request so libfcoe
+	 * can select a new fcf in parallel
+	 */
+	fcoe_ctlr_link_up(&qedf->ctlr);
+
+	/* Since the link when down and up to verify which vlan we're on */
+	qedf->fipvlan_retries = qedf_fipvlan_retries;
+	rc = qedf_initiate_fipvlan_req(qedf);
+	if (!rc)
+		return;
+
+	/*
+	 * We need to wait for an FCF to be selected due to the
+	 * fcoe_ctlr_link_up other the FLOGI will be rejected.
+	 */
+	while (retries > 0) {
+		if (qedf->ctlr.sel_fcf) {
+			QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC,
+			    "FCF reselected, proceeding with FLOGI.\n");
+			break;
+		}
+		msleep(500);
+		retries--;
+	}
+
+	if (retries < 1) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Exhausted retries waiting for "
+		    "FCF selection.\n");
+		return;
+	}
+
+	rval = qedf_send_flogi(qedf);
+	if (rval)
+		return;
+
+	/* Wait for FLOGI completion before proceeding with sending ADISCs */
+	i = wait_for_completion_timeout(&qedf->flogi_compl,
+	    qedf->lport->r_a_tov);
+	if (i == 0) {
+		QEDF_ERR(&(qedf->dbg_ctx), "FLOGI timed out.\n");
+		return;
+	}
+
+	/*
+	 * Call lport->tt.rport_login which will cause libfc to send an
+	 * ADISC since the rport is in state ready.
+	 */
+	rcu_read_lock();
+	list_for_each_entry_rcu(fcport, &qedf->fcports, peers) {
+		rdata = fcport->rdata;
+		if (rdata == NULL)
+			continue;
+		rdata_item = kzalloc(sizeof(struct qedf_tmp_rdata_item),
+		    GFP_ATOMIC);
+		if (!rdata_item)
+			continue;
+		if (kref_get_unless_zero(&rdata->kref)) {
+			rdata_item->rdata = rdata;
+			list_add(&rdata_item->list, &rdata_login_list);
+		} else
+			kfree(rdata_item);
+	}
+	rcu_read_unlock();
+	/*
+	 * Do the fc_rport_login outside of the rcu lock so we don't take a
+	 * mutex in an atomic context.
+	 */
+	list_for_each_entry_safe(rdata_item, tmp_rdata_item, &rdata_login_list,
+	    list) {
+		list_del(&rdata_item->list);
+		fc_rport_login(rdata_item->rdata);
+		kref_put(&rdata_item->rdata->kref, fc_rport_destroy);
+		kfree(rdata_item);
+	}
+}
+
+static void qedf_update_link_speed(struct qedf_ctx *qedf,
+	struct qed_link_output *link)
+{
+	struct fc_lport *lport = qedf->lport;
+
+	lport->link_speed = FC_PORTSPEED_UNKNOWN;
+	lport->link_supported_speeds = FC_PORTSPEED_UNKNOWN;
+
+	/* Set fc_host link speed */
+	switch (link->speed) {
+	case 10000:
+		lport->link_speed = FC_PORTSPEED_10GBIT;
+		break;
+	case 25000:
+		lport->link_speed = FC_PORTSPEED_25GBIT;
+		break;
+	case 40000:
+		lport->link_speed = FC_PORTSPEED_40GBIT;
+		break;
+	case 50000:
+		lport->link_speed = FC_PORTSPEED_50GBIT;
+		break;
+	case 100000:
+		lport->link_speed = FC_PORTSPEED_100GBIT;
+		break;
+	default:
+		lport->link_speed = FC_PORTSPEED_UNKNOWN;
+		break;
+	}
+
+	/*
+	 * Set supported link speed by querying the supported
+	 * capabilities of the link.
+	 */
+	if (link->supported_caps & SUPPORTED_10000baseKR_Full)
+		lport->link_supported_speeds |= FC_PORTSPEED_10GBIT;
+	if (link->supported_caps & SUPPORTED_25000baseKR_Full)
+		lport->link_supported_speeds |= FC_PORTSPEED_25GBIT;
+	if (link->supported_caps & SUPPORTED_40000baseLR4_Full)
+		lport->link_supported_speeds |= FC_PORTSPEED_40GBIT;
+	if (link->supported_caps & SUPPORTED_50000baseKR2_Full)
+		lport->link_supported_speeds |= FC_PORTSPEED_50GBIT;
+	if (link->supported_caps & SUPPORTED_100000baseKR4_Full)
+		lport->link_supported_speeds |= FC_PORTSPEED_100GBIT;
+	fc_host_supported_speeds(lport->host) = lport->link_supported_speeds;
+}
+
+static void qedf_link_update(void *dev, struct qed_link_output *link)
+{
+	struct qedf_ctx *qedf = (struct qedf_ctx *)dev;
+
+	if (link->link_up) {
+		QEDF_ERR(&(qedf->dbg_ctx), "LINK UP (%d GB/s).\n",
+		    link->speed / 1000);
+
+		/* Cancel any pending link down work */
+		cancel_delayed_work(&qedf->link_update);
+
+		atomic_set(&qedf->link_state, QEDF_LINK_UP);
+		qedf_update_link_speed(qedf, link);
+
+		if (atomic_read(&qedf->dcbx) == QEDF_DCBX_DONE) {
+			QEDF_ERR(&(qedf->dbg_ctx), "DCBx done.\n");
+			if (atomic_read(&qedf->link_down_tmo_valid) > 0)
+				queue_delayed_work(qedf->link_update_wq,
+				    &qedf->link_recovery, 0);
+			else
+				queue_delayed_work(qedf->link_update_wq,
+				    &qedf->link_update, 0);
+			atomic_set(&qedf->link_down_tmo_valid, 0);
+		}
+
+	} else {
+		QEDF_ERR(&(qedf->dbg_ctx), "LINK DOWN.\n");
+
+		atomic_set(&qedf->link_state, QEDF_LINK_DOWN);
+		atomic_set(&qedf->dcbx, QEDF_DCBX_PENDING);
+		/*
+		 * Flag that we're waiting for the link to come back up before
+		 * informing the fcoe layer of the event.
+		 */
+		if (qedf_link_down_tmo > 0) {
+			QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC,
+			    "Starting link down tmo.\n");
+			atomic_set(&qedf->link_down_tmo_valid, 1);
+		}
+		qedf->vlan_id  = 0;
+		qedf_update_link_speed(qedf, link);
+		queue_delayed_work(qedf->link_update_wq, &qedf->link_update,
+		    qedf_link_down_tmo * HZ);
+	}
+}
+
+
+static void qedf_dcbx_handler(void *dev, struct qed_dcbx_get *get, u32 mib_type)
+{
+	struct qedf_ctx *qedf = (struct qedf_ctx *)dev;
+
+	QEDF_ERR(&(qedf->dbg_ctx), "DCBx event valid=%d enabled=%d fcoe "
+	    "prio=%d.\n", get->operational.valid, get->operational.enabled,
+	    get->operational.app_prio.fcoe);
+
+	if (get->operational.enabled && get->operational.valid) {
+		/* If DCBX was already negotiated on link up then just exit */
+		if (atomic_read(&qedf->dcbx) == QEDF_DCBX_DONE) {
+			QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC,
+			    "DCBX already set on link up.\n");
+			return;
+		}
+
+		atomic_set(&qedf->dcbx, QEDF_DCBX_DONE);
+
+		if (atomic_read(&qedf->link_state) == QEDF_LINK_UP) {
+			if (atomic_read(&qedf->link_down_tmo_valid) > 0)
+				queue_delayed_work(qedf->link_update_wq,
+				    &qedf->link_recovery, 0);
+			else
+				queue_delayed_work(qedf->link_update_wq,
+				    &qedf->link_update, 0);
+			atomic_set(&qedf->link_down_tmo_valid, 0);
+		}
+	}
+
+}
+
+static u32 qedf_get_login_failures(void *cookie)
+{
+	struct qedf_ctx *qedf;
+
+	qedf = (struct qedf_ctx *)cookie;
+	return qedf->flogi_failed;
+}
+
+static struct qed_fcoe_cb_ops qedf_cb_ops = {
+	{
+		.link_update = qedf_link_update,
+		.dcbx_aen = qedf_dcbx_handler,
+	}
+};
+
+/*
+ * Various transport templates.
+ */
+
+static struct scsi_transport_template *qedf_fc_transport_template;
+static struct scsi_transport_template *qedf_fc_vport_transport_template;
+
+/*
+ * SCSI EH handlers
+ */
+static int qedf_eh_abort(struct scsi_cmnd *sc_cmd)
+{
+	struct fc_rport *rport = starget_to_rport(scsi_target(sc_cmd->device));
+	struct fc_rport_libfc_priv *rp = rport->dd_data;
+	struct qedf_rport *fcport;
+	struct fc_lport *lport;
+	struct qedf_ctx *qedf;
+	struct qedf_ioreq *io_req;
+	int rc = FAILED;
+	int rval;
+
+	if (fc_remote_port_chkready(rport)) {
+		QEDF_ERR(NULL, "rport not ready\n");
+		goto out;
+	}
+
+	lport = shost_priv(sc_cmd->device->host);
+	qedf = (struct qedf_ctx *)lport_priv(lport);
+
+	if ((lport->state != LPORT_ST_READY) || !(lport->link_up)) {
+		QEDF_ERR(&(qedf->dbg_ctx), "link not ready.\n");
+		goto out;
+	}
+
+	fcport = (struct qedf_rport *)&rp[1];
+
+	io_req = (struct qedf_ioreq *)sc_cmd->SCp.ptr;
+	if (!io_req) {
+		QEDF_ERR(&(qedf->dbg_ctx), "io_req is NULL.\n");
+		rc = SUCCESS;
+		goto out;
+	}
+
+	if (!test_bit(QEDF_CMD_OUTSTANDING, &io_req->flags) ||
+	    test_bit(QEDF_CMD_IN_CLEANUP, &io_req->flags) ||
+	    test_bit(QEDF_CMD_IN_ABORT, &io_req->flags)) {
+		QEDF_ERR(&(qedf->dbg_ctx), "io_req xid=0x%x already in "
+			  "cleanup or abort processing or already "
+			  "completed.\n", io_req->xid);
+		rc = SUCCESS;
+		goto out;
+	}
+
+	QEDF_ERR(&(qedf->dbg_ctx), "Aborting io_req sc_cmd=%p xid=0x%x "
+		  "fp_idx=%d.\n", sc_cmd, io_req->xid, io_req->fp_idx);
+
+	if (qedf->stop_io_on_error) {
+		qedf_stop_all_io(qedf);
+		rc = SUCCESS;
+		goto out;
+	}
+
+	init_completion(&io_req->abts_done);
+	rval = qedf_initiate_abts(io_req, true);
+	if (rval) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Failed to queue ABTS.\n");
+		goto out;
+	}
+
+	wait_for_completion(&io_req->abts_done);
+
+	if (io_req->event == QEDF_IOREQ_EV_ABORT_SUCCESS ||
+	    io_req->event == QEDF_IOREQ_EV_ABORT_FAILED ||
+	    io_req->event == QEDF_IOREQ_EV_CLEANUP_SUCCESS) {
+		/*
+		 * If we get a reponse to the abort this is success from
+		 * the perspective that all references to the command have
+		 * been removed from the driver and firmware
+		 */
+		rc = SUCCESS;
+	} else {
+		/* If the abort and cleanup failed then return a failure */
+		rc = FAILED;
+	}
+
+	if (rc == SUCCESS)
+		QEDF_ERR(&(qedf->dbg_ctx), "ABTS succeeded, xid=0x%x.\n",
+			  io_req->xid);
+	else
+		QEDF_ERR(&(qedf->dbg_ctx), "ABTS failed, xid=0x%x.\n",
+			  io_req->xid);
+
+out:
+	return rc;
+}
+
+static int qedf_eh_target_reset(struct scsi_cmnd *sc_cmd)
+{
+	QEDF_ERR(NULL, "TARGET RESET Issued...");
+	return qedf_initiate_tmf(sc_cmd, FCP_TMF_TGT_RESET);
+}
+
+static int qedf_eh_device_reset(struct scsi_cmnd *sc_cmd)
+{
+	QEDF_ERR(NULL, "LUN RESET Issued...\n");
+	return qedf_initiate_tmf(sc_cmd, FCP_TMF_LUN_RESET);
+}
+
+void qedf_wait_for_upload(struct qedf_ctx *qedf)
+{
+	while (1) {
+		if (atomic_read(&qedf->num_offloads))
+			QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC,
+			    "Waiting for all uploads to complete.\n");
+		else
+			break;
+		msleep(500);
+	}
+}
+
+/* Reset the host by gracefully logging out and then logging back in */
+static int qedf_eh_host_reset(struct scsi_cmnd *sc_cmd)
+{
+	struct fc_lport *lport;
+	struct qedf_ctx *qedf;
+
+	lport = shost_priv(sc_cmd->device->host);
+
+	if (lport->vport) {
+		QEDF_ERR(NULL, "Cannot issue host reset on NPIV port.\n");
+		return SUCCESS;
+	}
+
+	qedf = (struct qedf_ctx *)lport_priv(lport);
+
+	if (atomic_read(&qedf->link_state) == QEDF_LINK_DOWN ||
+	    test_bit(QEDF_UNLOADING, &qedf->flags) ||
+	    test_bit(QEDF_DBG_STOP_IO, &qedf->flags))
+		return FAILED;
+
+	QEDF_ERR(&(qedf->dbg_ctx), "HOST RESET Issued...");
+
+	/* For host reset, essentially do a soft link up/down */
+	atomic_set(&qedf->link_state, QEDF_LINK_DOWN);
+	atomic_set(&qedf->dcbx, QEDF_DCBX_PENDING);
+	queue_delayed_work(qedf->link_update_wq, &qedf->link_update,
+	    0);
+	qedf_wait_for_upload(qedf);
+	atomic_set(&qedf->link_state, QEDF_LINK_UP);
+	qedf->vlan_id  = 0;
+	queue_delayed_work(qedf->link_update_wq, &qedf->link_update,
+	    0);
+
+	return SUCCESS;
+}
+
+static int qedf_slave_configure(struct scsi_device *sdev)
+{
+	if (qedf_queue_depth) {
+		scsi_change_queue_depth(sdev, qedf_queue_depth);
+	}
+
+	return 0;
+}
+
+static struct scsi_host_template qedf_host_template = {
+	.module 	= THIS_MODULE,
+	.name 		= QEDF_MODULE_NAME,
+	.this_id 	= -1,
+	.cmd_per_lun 	= 3,
+	.use_clustering = ENABLE_CLUSTERING,
+	.max_sectors 	= 0xffff,
+	.queuecommand 	= qedf_queuecommand,
+	.shost_attrs	= qedf_host_attrs,
+	.eh_abort_handler	= qedf_eh_abort,
+	.eh_device_reset_handler = qedf_eh_device_reset, /* lun reset */
+	.eh_target_reset_handler = qedf_eh_target_reset, /* target reset */
+	.eh_host_reset_handler  = qedf_eh_host_reset,
+	.slave_configure	= qedf_slave_configure,
+	.dma_boundary = QED_HW_DMA_BOUNDARY,
+	.sg_tablesize = QEDF_MAX_BDS_PER_CMD,
+	.can_queue = FCOE_PARAMS_NUM_TASKS,
+};
+
+static int qedf_get_paged_crc_eof(struct sk_buff *skb, int tlen)
+{
+	int rc;
+
+	spin_lock(&qedf_global_lock);
+	rc = fcoe_get_paged_crc_eof(skb, tlen, &qedf_global);
+	spin_unlock(&qedf_global_lock);
+
+	return rc;
+}
+
+static struct qedf_rport *qedf_fcport_lookup(struct qedf_ctx *qedf, u32 port_id)
+{
+	struct qedf_rport *fcport;
+	struct fc_rport_priv *rdata;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(fcport, &qedf->fcports, peers) {
+		rdata = fcport->rdata;
+		if (rdata == NULL)
+			continue;
+		if (rdata->ids.port_id == port_id) {
+			rcu_read_unlock();
+			return fcport;
+		}
+	}
+	rcu_read_unlock();
+
+	/* Return NULL to caller to let them know fcport was not found */
+	return NULL;
+}
+
+/* Transmits an ELS frame over an offloaded session */
+static int qedf_xmit_l2_frame(struct qedf_rport *fcport, struct fc_frame *fp)
+{
+	struct fc_frame_header *fh;
+	int rc = 0;
+
+	fh = fc_frame_header_get(fp);
+	if ((fh->fh_type == FC_TYPE_ELS) &&
+	    (fh->fh_r_ctl == FC_RCTL_ELS_REQ)) {
+		switch (fc_frame_payload_op(fp)) {
+		case ELS_ADISC:
+			qedf_send_adisc(fcport, fp);
+			rc = 1;
+			break;
+		}
+	}
+
+	return rc;
+}
+
+/**
+ * qedf_xmit - qedf FCoE frame transmit function
+ *
+ */
+static int qedf_xmit(struct fc_lport *lport, struct fc_frame *fp)
+{
+	struct fc_lport		*base_lport;
+	struct qedf_ctx		*qedf;
+	struct ethhdr		*eh;
+	struct fcoe_crc_eof	*cp;
+	struct sk_buff		*skb;
+	struct fc_frame_header	*fh;
+	struct fcoe_hdr		*hp;
+	u8			sof, eof;
+	u32			crc;
+	unsigned int		hlen, tlen, elen;
+	int			wlen;
+	struct fc_stats		*stats;
+	struct fc_lport *tmp_lport;
+	struct fc_lport *vn_port = NULL;
+	struct qedf_rport *fcport;
+	int rc;
+	u16 vlan_tci = 0;
+
+	qedf = (struct qedf_ctx *)lport_priv(lport);
+
+	fh = fc_frame_header_get(fp);
+	skb = fp_skb(fp);
+
+	/* Filter out traffic to other NPIV ports on the same host */
+	if (lport->vport)
+		base_lport = shost_priv(vport_to_shost(lport->vport));
+	else
+		base_lport = lport;
+
+	/* Flag if the destination is the base port */
+	if (base_lport->port_id == ntoh24(fh->fh_d_id)) {
+		vn_port = base_lport;
+	} else {
+		/* Got through the list of vports attached to the base_lport
+		 * and see if we have a match with the destination address.
+		 */
+		list_for_each_entry(tmp_lport, &base_lport->vports, list) {
+			if (tmp_lport->port_id == ntoh24(fh->fh_d_id)) {
+				vn_port = tmp_lport;
+				break;
+			}
+		}
+	}
+	if (vn_port && ntoh24(fh->fh_d_id) != FC_FID_FLOGI) {
+		struct fc_rport_priv *rdata = NULL;
+
+		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_LL2,
+		    "Dropping FCoE frame to %06x.\n", ntoh24(fh->fh_d_id));
+		kfree_skb(skb);
+		rdata = fc_rport_lookup(lport, ntoh24(fh->fh_d_id));
+		if (rdata)
+			rdata->retries = lport->max_rport_retry_count;
+		return -EINVAL;
+	}
+	/* End NPIV filtering */
+
+	if (!qedf->ctlr.sel_fcf) {
+		kfree_skb(skb);
+		return 0;
+	}
+
+	if (!test_bit(QEDF_LL2_STARTED, &qedf->flags)) {
+		QEDF_WARN(&(qedf->dbg_ctx), "LL2 not started\n");
+		kfree_skb(skb);
+		return 0;
+	}
+
+	if (atomic_read(&qedf->link_state) != QEDF_LINK_UP) {
+		QEDF_WARN(&(qedf->dbg_ctx), "qedf link down\n");
+		kfree_skb(skb);
+		return 0;
+	}
+
+	if (unlikely(fh->fh_r_ctl == FC_RCTL_ELS_REQ)) {
+		if (fcoe_ctlr_els_send(&qedf->ctlr, lport, skb))
+			return 0;
+	}
+
+	/* Check to see if this needs to be sent on an offloaded session */
+	fcport = qedf_fcport_lookup(qedf, ntoh24(fh->fh_d_id));
+
+	if (fcport && test_bit(QEDF_RPORT_SESSION_READY, &fcport->flags)) {
+		rc = qedf_xmit_l2_frame(fcport, fp);
+		/*
+		 * If the frame was successfully sent over the middle path
+		 * then do not try to also send it over the LL2 path
+		 */
+		if (rc)
+			return 0;
+	}
+
+	sof = fr_sof(fp);
+	eof = fr_eof(fp);
+
+	elen = sizeof(struct ethhdr);
+	hlen = sizeof(struct fcoe_hdr);
+	tlen = sizeof(struct fcoe_crc_eof);
+	wlen = (skb->len - tlen + sizeof(crc)) / FCOE_WORD_TO_BYTE;
+
+	skb->ip_summed = CHECKSUM_NONE;
+	crc = fcoe_fc_crc(fp);
+
+	/* copy port crc and eof to the skb buff */
+	if (skb_is_nonlinear(skb)) {
+		skb_frag_t *frag;
+
+		if (qedf_get_paged_crc_eof(skb, tlen)) {
+			kfree_skb(skb);
+			return -ENOMEM;
+		}
+		frag = &skb_shinfo(skb)->frags[skb_shinfo(skb)->nr_frags - 1];
+		cp = kmap_atomic(skb_frag_page(frag)) + frag->page_offset;
+	} else {
+		cp = (struct fcoe_crc_eof *)skb_put(skb, tlen);
+	}
+
+	memset(cp, 0, sizeof(*cp));
+	cp->fcoe_eof = eof;
+	cp->fcoe_crc32 = cpu_to_le32(~crc);
+	if (skb_is_nonlinear(skb)) {
+		kunmap_atomic(cp);
+		cp = NULL;
+	}
+
+
+	/* adjust skb network/transport offsets to match mac/fcoe/port */
+	skb_push(skb, elen + hlen);
+	skb_reset_mac_header(skb);
+	skb_reset_network_header(skb);
+	skb->mac_len = elen;
+	skb->protocol = htons(ETH_P_FCOE);
+
+	__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), qedf->vlan_id);
+
+	/* fill up mac and fcoe headers */
+	eh = eth_hdr(skb);
+	eh->h_proto = htons(ETH_P_FCOE);
+	if (qedf->ctlr.map_dest)
+		fc_fcoe_set_mac(eh->h_dest, fh->fh_d_id);
+	else
+		/* insert GW address */
+		ether_addr_copy(eh->h_dest, qedf->ctlr.dest_addr);
+
+	/* Set the source MAC address */
+	fc_fcoe_set_mac(eh->h_source, fh->fh_s_id);
+
+	hp = (struct fcoe_hdr *)(eh + 1);
+	memset(hp, 0, sizeof(*hp));
+	if (FC_FCOE_VER)
+		FC_FCOE_ENCAPS_VER(hp, FC_FCOE_VER);
+	hp->fcoe_sof = sof;
+
+	/*update tx stats */
+	stats = per_cpu_ptr(lport->stats, get_cpu());
+	stats->TxFrames++;
+	stats->TxWords += wlen;
+	put_cpu();
+
+	/* Get VLAN ID from skb for printing purposes */
+	__vlan_hwaccel_get_tag(skb, &vlan_tci);
+
+	/* send down to lld */
+	fr_dev(fp) = lport;
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_LL2, "FCoE frame send: "
+	    "src=%06x dest=%06x r_ctl=%x type=%x vlan=%04x.\n",
+	    ntoh24(fh->fh_s_id), ntoh24(fh->fh_d_id), fh->fh_r_ctl, fh->fh_type,
+	    vlan_tci);
+	if (qedf_dump_frames)
+		print_hex_dump(KERN_WARNING, "fcoe: ", DUMP_PREFIX_OFFSET, 16,
+		    1, skb->data, skb->len, false);
+	qed_ops->ll2->start_xmit(qedf->cdev, skb);
+
+	return 0;
+}
+
+static int qedf_alloc_sq(struct qedf_ctx *qedf, struct qedf_rport *fcport)
+{
+	int rval = 0;
+	u32 *pbl;
+	dma_addr_t page;
+	int num_pages;
+
+	/* Calculate appropriate queue and PBL sizes */
+	fcport->sq_mem_size = SQ_NUM_ENTRIES * sizeof(struct fcoe_wqe);
+	fcport->sq_mem_size = ALIGN(fcport->sq_mem_size, QEDF_PAGE_SIZE);
+	fcport->sq_pbl_size = (fcport->sq_mem_size / QEDF_PAGE_SIZE) *
+	    sizeof(void *);
+	fcport->sq_pbl_size = fcport->sq_pbl_size + QEDF_PAGE_SIZE;
+
+	fcport->sq = dma_alloc_coherent(&qedf->pdev->dev, fcport->sq_mem_size,
+	    &fcport->sq_dma, GFP_KERNEL);
+	if (!fcport->sq) {
+		QEDF_WARN(&(qedf->dbg_ctx), "Could not allocate send "
+			   "queue.\n");
+		rval = 1;
+		goto out;
+	}
+	memset(fcport->sq, 0, fcport->sq_mem_size);
+
+	fcport->sq_pbl = dma_alloc_coherent(&qedf->pdev->dev,
+	    fcport->sq_pbl_size, &fcport->sq_pbl_dma, GFP_KERNEL);
+	if (!fcport->sq_pbl) {
+		QEDF_WARN(&(qedf->dbg_ctx), "Could not allocate send "
+			   "queue PBL.\n");
+		rval = 1;
+		goto out_free_sq;
+	}
+	memset(fcport->sq_pbl, 0, fcport->sq_pbl_size);
+
+	/* Create PBL */
+	num_pages = fcport->sq_mem_size / QEDF_PAGE_SIZE;
+	page = fcport->sq_dma;
+	pbl = (u32 *)fcport->sq_pbl;
+
+	while (num_pages--) {
+		*pbl = U64_LO(page);
+		pbl++;
+		*pbl = U64_HI(page);
+		pbl++;
+		page += QEDF_PAGE_SIZE;
+	}
+
+	return rval;
+
+out_free_sq:
+	dma_free_coherent(&qedf->pdev->dev, fcport->sq_mem_size, fcport->sq,
+	    fcport->sq_dma);
+out:
+	return rval;
+}
+
+static void qedf_free_sq(struct qedf_ctx *qedf, struct qedf_rport *fcport)
+{
+	if (fcport->sq_pbl)
+		dma_free_coherent(&qedf->pdev->dev, fcport->sq_pbl_size,
+		    fcport->sq_pbl, fcport->sq_pbl_dma);
+	if (fcport->sq)
+		dma_free_coherent(&qedf->pdev->dev, fcport->sq_mem_size,
+		    fcport->sq, fcport->sq_dma);
+}
+
+static int qedf_offload_connection(struct qedf_ctx *qedf,
+	struct qedf_rport *fcport)
+{
+	struct qed_fcoe_params_offload conn_info;
+	u32 port_id;
+	u8 lport_src_id[3];
+	int rval;
+	uint16_t total_sqe = (fcport->sq_mem_size / sizeof(struct fcoe_wqe));
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_CONN, "Offloading connection "
+		   "portid=%06x.\n", fcport->rdata->ids.port_id);
+	rval = qed_ops->acquire_conn(qedf->cdev, &fcport->handle,
+	    &fcport->fw_cid, &fcport->p_doorbell);
+	if (rval) {
+		QEDF_WARN(&(qedf->dbg_ctx), "Could not acquire connection "
+			   "for portid=%06x.\n", fcport->rdata->ids.port_id);
+		rval = 1; /* For some reason qed returns 0 on failure here */
+		goto out;
+	}
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_CONN, "portid=%06x "
+		   "fw_cid=%08x handle=%d.\n", fcport->rdata->ids.port_id,
+		   fcport->fw_cid, fcport->handle);
+
+	memset(&conn_info, 0, sizeof(struct qed_fcoe_params_offload));
+
+	/* Fill in the offload connection info */
+	conn_info.sq_pbl_addr = fcport->sq_pbl_dma;
+
+	conn_info.sq_curr_page_addr = (dma_addr_t)(*(u64 *)fcport->sq_pbl);
+	conn_info.sq_next_page_addr =
+	    (dma_addr_t)(*(u64 *)(fcport->sq_pbl + 8));
+
+	/* Need to use our FCoE MAC for the offload session */
+	port_id = fc_host_port_id(qedf->lport->host);
+	lport_src_id[2] = (port_id & 0x000000FF);
+	lport_src_id[1] = (port_id & 0x0000FF00) >> 8;
+	lport_src_id[0] = (port_id & 0x00FF0000) >> 16;
+	fc_fcoe_set_mac(conn_info.src_mac, lport_src_id);
+
+	ether_addr_copy(conn_info.dst_mac, qedf->ctlr.dest_addr);
+
+	conn_info.tx_max_fc_pay_len = fcport->rdata->maxframe_size;
+	conn_info.e_d_tov_timer_val = qedf->lport->e_d_tov / 20;
+	conn_info.rec_tov_timer_val = 3; /* I think this is what E3 was */
+	conn_info.rx_max_fc_pay_len = fcport->rdata->maxframe_size;
+
+	/* Set VLAN data */
+	conn_info.vlan_tag = qedf->vlan_id <<
+	    FCOE_CONN_OFFLOAD_RAMROD_DATA_VLAN_ID_SHIFT;
+	conn_info.vlan_tag |=
+	    qedf_default_prio << FCOE_CONN_OFFLOAD_RAMROD_DATA_PRIORITY_SHIFT;
+	conn_info.flags |= (FCOE_CONN_OFFLOAD_RAMROD_DATA_B_VLAN_FLAG_MASK <<
+	    FCOE_CONN_OFFLOAD_RAMROD_DATA_B_VLAN_FLAG_SHIFT);
+
+	/* Set host port source id */
+	port_id = fc_host_port_id(qedf->lport->host);
+	fcport->sid = port_id;
+	conn_info.s_id.addr_hi = (port_id & 0x000000FF);
+	conn_info.s_id.addr_mid = (port_id & 0x0000FF00) >> 8;
+	conn_info.s_id.addr_lo = (port_id & 0x00FF0000) >> 16;
+
+	conn_info.max_conc_seqs_c3 = fcport->rdata->max_seq;
+
+	/* Set remote port destination id */
+	port_id = fcport->rdata->rport->port_id;
+	conn_info.d_id.addr_hi = (port_id & 0x000000FF);
+	conn_info.d_id.addr_mid = (port_id & 0x0000FF00) >> 8;
+	conn_info.d_id.addr_lo = (port_id & 0x00FF0000) >> 16;
+
+	conn_info.def_q_idx = 0; /* Default index for send queue? */
+
+	/* Set FC-TAPE specific flags if needed */
+	if (fcport->dev_type == QEDF_RPORT_TYPE_TAPE) {
+		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_CONN,
+		    "Enable CONF, REC for portid=%06x.\n",
+		    fcport->rdata->ids.port_id);
+		conn_info.flags |= 1 <<
+		    FCOE_CONN_OFFLOAD_RAMROD_DATA_B_CONF_REQ_SHIFT;
+		conn_info.flags |=
+		    ((fcport->rdata->sp_features & FC_SP_FT_SEQC) ? 1 : 0) <<
+		    FCOE_CONN_OFFLOAD_RAMROD_DATA_B_REC_VALID_SHIFT;
+	}
+
+	rval = qed_ops->offload_conn(qedf->cdev, fcport->handle, &conn_info);
+	if (rval) {
+		QEDF_WARN(&(qedf->dbg_ctx), "Could not offload connection "
+			   "for portid=%06x.\n", fcport->rdata->ids.port_id);
+		goto out_free_conn;
+	} else
+		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_CONN, "Offload "
+			   "succeeded portid=%06x total_sqe=%d.\n",
+			   fcport->rdata->ids.port_id, total_sqe);
+
+	spin_lock_init(&fcport->rport_lock);
+	atomic_set(&fcport->free_sqes, total_sqe);
+	return 0;
+out_free_conn:
+	qed_ops->release_conn(qedf->cdev, fcport->handle);
+out:
+	return rval;
+}
+
+#define QEDF_TERM_BUFF_SIZE		10
+static void qedf_upload_connection(struct qedf_ctx *qedf,
+	struct qedf_rport *fcport)
+{
+	void *term_params;
+	dma_addr_t term_params_dma;
+
+	/* Term params needs to be a DMA coherent buffer as qed shared the
+	 * physical DMA address with the firmware. The buffer may be used in
+	 * the receive path so we may eventually have to move this.
+	 */
+	term_params = dma_alloc_coherent(&qedf->pdev->dev, QEDF_TERM_BUFF_SIZE,
+		&term_params_dma, GFP_KERNEL);
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_CONN, "Uploading connection "
+		   "port_id=%06x.\n", fcport->rdata->ids.port_id);
+
+	qed_ops->destroy_conn(qedf->cdev, fcport->handle, term_params_dma);
+	qed_ops->release_conn(qedf->cdev, fcport->handle);
+
+	dma_free_coherent(&qedf->pdev->dev, QEDF_TERM_BUFF_SIZE, term_params,
+	    term_params_dma);
+}
+
+static void qedf_cleanup_fcport(struct qedf_ctx *qedf,
+	struct qedf_rport *fcport)
+{
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_CONN, "Cleaning up portid=%06x.\n",
+	    fcport->rdata->ids.port_id);
+
+	/* Flush any remaining i/o's before we upload the connection */
+	qedf_flush_active_ios(fcport, -1);
+
+	if (test_and_clear_bit(QEDF_RPORT_SESSION_READY, &fcport->flags))
+		qedf_upload_connection(qedf, fcport);
+	qedf_free_sq(qedf, fcport);
+	fcport->rdata = NULL;
+	fcport->qedf = NULL;
+}
+
+/**
+ * This event_callback is called after successful completion of libfc
+ * initiated target login. qedf can proceed with initiating the session
+ * establishment.
+ */
+static void qedf_rport_event_handler(struct fc_lport *lport,
+				struct fc_rport_priv *rdata,
+				enum fc_rport_event event)
+{
+	struct qedf_ctx *qedf = lport_priv(lport);
+	struct fc_rport *rport = rdata->rport;
+	struct fc_rport_libfc_priv *rp;
+	struct qedf_rport *fcport;
+	u32 port_id;
+	int rval;
+	unsigned long flags;
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC, "event = %d, "
+		   "port_id = 0x%x\n", event, rdata->ids.port_id);
+
+	switch (event) {
+	case RPORT_EV_READY:
+		if (!rport) {
+			QEDF_WARN(&(qedf->dbg_ctx), "rport is NULL.\n");
+			break;
+		}
+
+		rp = rport->dd_data;
+		fcport = (struct qedf_rport *)&rp[1];
+		fcport->qedf = qedf;
+
+		if (atomic_read(&qedf->num_offloads) >= QEDF_MAX_SESSIONS) {
+			QEDF_ERR(&(qedf->dbg_ctx), "Not offloading "
+			    "portid=0x%x as max number of offloaded sessions "
+			    "reached.\n", rdata->ids.port_id);
+			return;
+		}
+
+		/*
+		 * Don't try to offload the session again. Can happen when we
+		 * get an ADISC
+		 */
+		if (test_bit(QEDF_RPORT_SESSION_READY, &fcport->flags)) {
+			QEDF_WARN(&(qedf->dbg_ctx), "Session already "
+				   "offloaded, portid=0x%x.\n",
+				   rdata->ids.port_id);
+			return;
+		}
+
+		if (rport->port_id == FC_FID_DIR_SERV) {
+			/*
+			 * qedf_rport structure doesn't exist for
+			 * directory server.
+			 * We should not come here, as lport will
+			 * take care of fabric login
+			 */
+			QEDF_WARN(&(qedf->dbg_ctx), "rport struct does not "
+			    "exist for dir server port_id=%x\n",
+			    rdata->ids.port_id);
+			break;
+		}
+
+		if (rdata->spp_type != FC_TYPE_FCP) {
+			QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC,
+			    "Not offlading since since spp type isn't FCP\n");
+			break;
+		}
+		if (!(rdata->ids.roles & FC_RPORT_ROLE_FCP_TARGET)) {
+			QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC,
+			    "Not FCP target so not offloading\n");
+			break;
+		}
+
+		fcport->rdata = rdata;
+		fcport->rport = rport;
+
+		rval = qedf_alloc_sq(qedf, fcport);
+		if (rval) {
+			qedf_cleanup_fcport(qedf, fcport);
+			break;
+		}
+
+		/* Set device type */
+		if (rdata->flags & FC_RP_FLAGS_RETRY &&
+		    rdata->ids.roles & FC_RPORT_ROLE_FCP_TARGET &&
+		    !(rdata->ids.roles & FC_RPORT_ROLE_FCP_INITIATOR)) {
+			fcport->dev_type = QEDF_RPORT_TYPE_TAPE;
+			QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC,
+			    "portid=%06x is a TAPE device.\n",
+			    rdata->ids.port_id);
+		} else {
+			fcport->dev_type = QEDF_RPORT_TYPE_DISK;
+		}
+
+		rval = qedf_offload_connection(qedf, fcport);
+		if (rval) {
+			qedf_cleanup_fcport(qedf, fcport);
+			break;
+		}
+
+		/* Add fcport to list of qedf_ctx list of offloaded ports */
+		spin_lock_irqsave(&qedf->hba_lock, flags);
+		list_add_rcu(&fcport->peers, &qedf->fcports);
+		spin_unlock_irqrestore(&qedf->hba_lock, flags);
+
+		/*
+		 * Set the session ready bit to let everyone know that this
+		 * connection is ready for I/O
+		 */
+		set_bit(QEDF_RPORT_SESSION_READY, &fcport->flags);
+		atomic_inc(&qedf->num_offloads);
+
+		break;
+	case RPORT_EV_LOGO:
+	case RPORT_EV_FAILED:
+	case RPORT_EV_STOP:
+		port_id = rdata->ids.port_id;
+		if (port_id == FC_FID_DIR_SERV)
+			break;
+
+		if (!rport) {
+			QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC,
+			    "port_id=%x - rport notcreated Yet!!\n", port_id);
+			break;
+		}
+		rp = rport->dd_data;
+		/*
+		 * Perform session upload. Note that rdata->peers is already
+		 * removed from disc->rports list before we get this event.
+		 */
+		fcport = (struct qedf_rport *)&rp[1];
+
+		/* Only free this fcport if it is offloaded already */
+		if (test_bit(QEDF_RPORT_SESSION_READY, &fcport->flags)) {
+			set_bit(QEDF_RPORT_UPLOADING_CONNECTION, &fcport->flags);
+			qedf_cleanup_fcport(qedf, fcport);
+
+			/*
+			 * Remove fcport to list of qedf_ctx list of offloaded
+			 * ports
+			 */
+			spin_lock_irqsave(&qedf->hba_lock, flags);
+			list_del_rcu(&fcport->peers);
+			spin_unlock_irqrestore(&qedf->hba_lock, flags);
+
+			clear_bit(QEDF_RPORT_UPLOADING_CONNECTION,
+			    &fcport->flags);
+			atomic_dec(&qedf->num_offloads);
+		}
+
+		break;
+
+	case RPORT_EV_NONE:
+		break;
+	}
+}
+
+static void qedf_abort_io(struct fc_lport *lport)
+{
+	/* NO-OP but need to fill in the template */
+}
+
+static void qedf_fcp_cleanup(struct fc_lport *lport)
+{
+	/*
+	 * NO-OP but need to fill in template to prevent a NULL
+	 * function pointer dereference during link down. I/Os
+	 * will be flushed when port is uploaded.
+	 */
+}
+
+static struct libfc_function_template qedf_lport_template = {
+	.frame_send		= qedf_xmit,
+	.fcp_abort_io		= qedf_abort_io,
+	.fcp_cleanup		= qedf_fcp_cleanup,
+	.rport_event_callback	= qedf_rport_event_handler,
+	.elsct_send		= qedf_elsct_send,
+};
+
+static void qedf_fcoe_ctlr_setup(struct qedf_ctx *qedf)
+{
+	fcoe_ctlr_init(&qedf->ctlr, FIP_ST_AUTO);
+
+	qedf->ctlr.send = qedf_fip_send;
+	qedf->ctlr.update_mac = qedf_update_src_mac;
+	qedf->ctlr.get_src_addr = qedf_get_src_mac;
+	ether_addr_copy(qedf->ctlr.ctl_src_addr, qedf->mac);
+}
+
+static int qedf_lport_setup(struct qedf_ctx *qedf)
+{
+	struct fc_lport *lport = qedf->lport;
+
+	lport->link_up = 0;
+	lport->max_retry_count = QEDF_FLOGI_RETRY_CNT;
+	lport->max_rport_retry_count = QEDF_RPORT_RETRY_CNT;
+	lport->service_params = (FCP_SPPF_INIT_FCN | FCP_SPPF_RD_XRDY_DIS |
+	    FCP_SPPF_RETRY | FCP_SPPF_CONF_COMPL);
+	lport->boot_time = jiffies;
+	lport->e_d_tov = 2 * 1000;
+	lport->r_a_tov = 10 * 1000;
+
+	/* Set NPIV support */
+	lport->does_npiv = 1;
+	fc_host_max_npiv_vports(lport->host) = QEDF_MAX_NPIV;
+
+	fc_set_wwnn(lport, qedf->wwnn);
+	fc_set_wwpn(lport, qedf->wwpn);
+
+	fcoe_libfc_config(lport, &qedf->ctlr, &qedf_lport_template, 0);
+
+	/* Allocate the exchange manager */
+	fc_exch_mgr_alloc(lport, FC_CLASS_3, qedf->max_scsi_xid + 1,
+	    qedf->max_els_xid, NULL);
+
+	if (fc_lport_init_stats(lport))
+		return -ENOMEM;
+
+	/* Finish lport config */
+	fc_lport_config(lport);
+
+	/* Set max frame size */
+	fc_set_mfs(lport, QEDF_MFS);
+	fc_host_maxframe_size(lport->host) = lport->mfs;
+
+	/* Set default dev_loss_tmo based on module parameter */
+	fc_host_dev_loss_tmo(lport->host) = qedf_dev_loss_tmo;
+
+	/* Set symbolic node name */
+	snprintf(fc_host_symbolic_name(lport->host), 256,
+	    "QLogic %s v%s", QEDF_MODULE_NAME, QEDF_VERSION);
+
+	return 0;
+}
+
+/*
+ * NPIV functions
+ */
+
+static int qedf_vport_libfc_config(struct fc_vport *vport,
+	struct fc_lport *lport)
+{
+	lport->link_up = 0;
+	lport->qfull = 0;
+	lport->max_retry_count = QEDF_FLOGI_RETRY_CNT;
+	lport->max_rport_retry_count = QEDF_RPORT_RETRY_CNT;
+	lport->service_params = (FCP_SPPF_INIT_FCN | FCP_SPPF_RD_XRDY_DIS |
+	    FCP_SPPF_RETRY | FCP_SPPF_CONF_COMPL);
+	lport->boot_time = jiffies;
+	lport->e_d_tov = 2 * 1000;
+	lport->r_a_tov = 10 * 1000;
+	lport->does_npiv = 1; /* Temporary until we add NPIV support */
+
+	/* Allocate stats for vport */
+	if (fc_lport_init_stats(lport))
+		return -ENOMEM;
+
+	/* Finish lport config */
+	fc_lport_config(lport);
+
+	/* offload related configuration */
+	lport->crc_offload = 0;
+	lport->seq_offload = 0;
+	lport->lro_enabled = 0;
+	lport->lro_xid = 0;
+	lport->lso_max = 0;
+
+	return 0;
+}
+
+static int qedf_vport_create(struct fc_vport *vport, bool disabled)
+{
+	struct Scsi_Host *shost = vport_to_shost(vport);
+	struct fc_lport *n_port = shost_priv(shost);
+	struct fc_lport *vn_port;
+	struct qedf_ctx *base_qedf = lport_priv(n_port);
+	struct qedf_ctx *vport_qedf;
+
+	char buf[32];
+	int rc = 0;
+
+	rc = fcoe_validate_vport_create(vport);
+	if (rc) {
+		fcoe_wwn_to_str(vport->port_name, buf, sizeof(buf));
+		QEDF_WARN(&(base_qedf->dbg_ctx), "Failed to create vport, "
+			   "WWPN (0x%s) already exists.\n", buf);
+		goto err1;
+	}
+
+	if (atomic_read(&base_qedf->link_state) != QEDF_LINK_UP) {
+		QEDF_WARN(&(base_qedf->dbg_ctx), "Cannot create vport "
+			   "because link is not up.\n");
+		rc = -EIO;
+		goto err1;
+	}
+
+	vn_port = libfc_vport_create(vport, sizeof(struct qedf_ctx));
+	if (!vn_port) {
+		QEDF_WARN(&(base_qedf->dbg_ctx), "Could not create lport "
+			   "for vport.\n");
+		rc = -ENOMEM;
+		goto err1;
+	}
+
+	fcoe_wwn_to_str(vport->port_name, buf, sizeof(buf));
+	QEDF_ERR(&(base_qedf->dbg_ctx), "Creating NPIV port, WWPN=%s.\n",
+	    buf);
+
+	/* Copy some fields from base_qedf */
+	vport_qedf = lport_priv(vn_port);
+	memcpy(vport_qedf, base_qedf, sizeof(struct qedf_ctx));
+
+	/* Set qedf data specific to this vport */
+	vport_qedf->lport = vn_port;
+	/* Use same hba_lock as base_qedf */
+	vport_qedf->hba_lock = base_qedf->hba_lock;
+	vport_qedf->pdev = base_qedf->pdev;
+	vport_qedf->cmd_mgr = base_qedf->cmd_mgr;
+	init_completion(&vport_qedf->flogi_compl);
+	INIT_LIST_HEAD(&vport_qedf->fcports);
+
+	rc = qedf_vport_libfc_config(vport, vn_port);
+	if (rc) {
+		QEDF_ERR(&(base_qedf->dbg_ctx), "Could not allocate memory "
+		    "for lport stats.\n");
+		goto err2;
+	}
+
+	fc_set_wwnn(vn_port, vport->node_name);
+	fc_set_wwpn(vn_port, vport->port_name);
+	vport_qedf->wwnn = vn_port->wwnn;
+	vport_qedf->wwpn = vn_port->wwpn;
+
+	vn_port->host->transportt = qedf_fc_vport_transport_template;
+	vn_port->host->can_queue = QEDF_MAX_ELS_XID;
+	vn_port->host->max_lun = qedf_max_lun;
+	vn_port->host->sg_tablesize = QEDF_MAX_BDS_PER_CMD;
+	vn_port->host->max_cmd_len = QEDF_MAX_CDB_LEN;
+
+	rc = scsi_add_host(vn_port->host, &vport->dev);
+	if (rc) {
+		QEDF_WARN(&(base_qedf->dbg_ctx), "Error adding Scsi_Host.\n");
+		goto err2;
+	}
+
+	/* Set default dev_loss_tmo based on module parameter */
+	fc_host_dev_loss_tmo(vn_port->host) = qedf_dev_loss_tmo;
+
+	/* Init libfc stuffs */
+	memcpy(&vn_port->tt, &qedf_lport_template,
+		sizeof(qedf_lport_template));
+	fc_exch_init(vn_port);
+	fc_elsct_init(vn_port);
+	fc_lport_init(vn_port);
+	fc_disc_init(vn_port);
+	fc_disc_config(vn_port, vn_port);
+
+
+	/* Allocate the exchange manager */
+	shost = vport_to_shost(vport);
+	n_port = shost_priv(shost);
+	fc_exch_mgr_list_clone(n_port, vn_port);
+
+	/* Set max frame size */
+	fc_set_mfs(vn_port, QEDF_MFS);
+
+	fc_host_port_type(vn_port->host) = FC_PORTTYPE_UNKNOWN;
+
+	if (disabled) {
+		fc_vport_set_state(vport, FC_VPORT_DISABLED);
+	} else {
+		vn_port->boot_time = jiffies;
+		fc_fabric_login(vn_port);
+		fc_vport_setlink(vn_port);
+	}
+
+	QEDF_INFO(&(base_qedf->dbg_ctx), QEDF_LOG_NPIV, "vn_port=%p.\n",
+		   vn_port);
+
+	/* Set up debug context for vport */
+	vport_qedf->dbg_ctx.host_no = vn_port->host->host_no;
+	vport_qedf->dbg_ctx.pdev = base_qedf->pdev;
+
+err2:
+	scsi_host_put(vn_port->host);
+err1:
+	return rc;
+}
+
+static int qedf_vport_destroy(struct fc_vport *vport)
+{
+	struct Scsi_Host *shost = vport_to_shost(vport);
+	struct fc_lport *n_port = shost_priv(shost);
+	struct fc_lport *vn_port = vport->dd_data;
+
+	mutex_lock(&n_port->lp_mutex);
+	list_del(&vn_port->list);
+	mutex_unlock(&n_port->lp_mutex);
+
+	fc_fabric_logoff(vn_port);
+	fc_lport_destroy(vn_port);
+
+	/* Detach from scsi-ml */
+	fc_remove_host(vn_port->host);
+	scsi_remove_host(vn_port->host);
+
+	/*
+	 * Only try to release the exchange manager if the vn_port
+	 * configuration is complete.
+	 */
+	if (vn_port->state == LPORT_ST_READY)
+		fc_exch_mgr_free(vn_port);
+
+	/* Free memory used by statistical counters */
+	fc_lport_free_stats(vn_port);
+
+	/* Release Scsi_Host */
+	if (vn_port->host)
+		scsi_host_put(vn_port->host);
+
+	return 0;
+}
+
+static int qedf_vport_disable(struct fc_vport *vport, bool disable)
+{
+	struct fc_lport *lport = vport->dd_data;
+
+	if (disable) {
+		fc_vport_set_state(vport, FC_VPORT_DISABLED);
+		fc_fabric_logoff(lport);
+	} else {
+		lport->boot_time = jiffies;
+		fc_fabric_login(lport);
+		fc_vport_setlink(lport);
+	}
+	return 0;
+}
+
+/*
+ * During removal we need to wait for all the vports associated with a port
+ * to be destroyed so we avoid a race condition where libfc is still trying
+ * to reap vports while the driver remove function has already reaped the
+ * driver contexts associated with the physical port.
+ */
+static void qedf_wait_for_vport_destroy(struct qedf_ctx *qedf)
+{
+	struct fc_host_attrs *fc_host = shost_to_fc_host(qedf->lport->host);
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_NPIV,
+	    "Entered.\n");
+	while (fc_host->npiv_vports_inuse > 0) {
+		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_NPIV,
+		    "Waiting for all vports to be reaped.\n");
+		msleep(1000);
+	}
+}
+
+/**
+ * qedf_fcoe_reset - Resets the fcoe
+ *
+ * @shost: shost the reset is from
+ *
+ * Returns: always 0
+ */
+static int qedf_fcoe_reset(struct Scsi_Host *shost)
+{
+	struct fc_lport *lport = shost_priv(shost);
+
+	fc_fabric_logoff(lport);
+	fc_fabric_login(lport);
+	return 0;
+}
+
+static struct fc_host_statistics *qedf_fc_get_host_stats(struct Scsi_Host
+	*shost)
+{
+	struct fc_host_statistics *qedf_stats;
+	struct fc_lport *lport = shost_priv(shost);
+	struct qedf_ctx *qedf = lport_priv(lport);
+	struct qed_fcoe_stats *fw_fcoe_stats;
+
+	qedf_stats = fc_get_host_stats(shost);
+
+	/* We don't collect offload stats for specific NPIV ports */
+	if (lport->vport)
+		goto out;
+
+	fw_fcoe_stats = kmalloc(sizeof(struct qed_fcoe_stats), GFP_KERNEL);
+	if (!fw_fcoe_stats) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Could not allocate memory for "
+		    "fw_fcoe_stats.\n");
+		goto out;
+	}
+
+	/* Query firmware for offload stats */
+	qed_ops->get_stats(qedf->cdev, fw_fcoe_stats);
+
+	/*
+	 * The expectation is that we add our offload stats to the stats
+	 * being maintained by libfc each time the fc_get_host_status callback
+	 * is invoked. The additions are not carried over for each call to
+	 * the fc_get_host_stats callback.
+	 */
+	qedf_stats->tx_frames += fw_fcoe_stats->fcoe_tx_data_pkt_cnt +
+	    fw_fcoe_stats->fcoe_tx_xfer_pkt_cnt +
+	    fw_fcoe_stats->fcoe_tx_other_pkt_cnt;
+	qedf_stats->rx_frames += fw_fcoe_stats->fcoe_rx_data_pkt_cnt +
+	    fw_fcoe_stats->fcoe_rx_xfer_pkt_cnt +
+	    fw_fcoe_stats->fcoe_rx_other_pkt_cnt;
+	qedf_stats->fcp_input_megabytes +=
+	    do_div(fw_fcoe_stats->fcoe_rx_byte_cnt, 1000000);
+	qedf_stats->fcp_output_megabytes +=
+	    do_div(fw_fcoe_stats->fcoe_tx_byte_cnt, 1000000);
+	qedf_stats->rx_words += fw_fcoe_stats->fcoe_rx_byte_cnt / 4;
+	qedf_stats->tx_words += fw_fcoe_stats->fcoe_tx_byte_cnt / 4;
+	qedf_stats->invalid_crc_count +=
+	    fw_fcoe_stats->fcoe_silent_drop_pkt_crc_error_cnt;
+	qedf_stats->dumped_frames =
+	    fw_fcoe_stats->fcoe_silent_drop_total_pkt_cnt;
+	qedf_stats->error_frames +=
+	    fw_fcoe_stats->fcoe_silent_drop_total_pkt_cnt;
+	qedf_stats->fcp_input_requests += qedf->input_requests;
+	qedf_stats->fcp_output_requests += qedf->output_requests;
+	qedf_stats->fcp_control_requests += qedf->control_requests;
+	qedf_stats->fcp_packet_aborts += qedf->packet_aborts;
+	qedf_stats->fcp_frame_alloc_failures += qedf->alloc_failures;
+
+	kfree(fw_fcoe_stats);
+out:
+	return qedf_stats;
+}
+
+static struct fc_function_template qedf_fc_transport_fn = {
+	.show_host_node_name = 1,
+	.show_host_port_name = 1,
+	.show_host_supported_classes = 1,
+	.show_host_supported_fc4s = 1,
+	.show_host_active_fc4s = 1,
+	.show_host_maxframe_size = 1,
+
+	.show_host_port_id = 1,
+	.show_host_supported_speeds = 1,
+	.get_host_speed = fc_get_host_speed,
+	.show_host_speed = 1,
+	.show_host_port_type = 1,
+	.get_host_port_state = fc_get_host_port_state,
+	.show_host_port_state = 1,
+	.show_host_symbolic_name = 1,
+
+	/*
+	 * Tell FC transport to allocate enough space to store the backpointer
+	 * for the associate qedf_rport struct.
+	 */
+	.dd_fcrport_size = (sizeof(struct fc_rport_libfc_priv) +
+				sizeof(struct qedf_rport)),
+	.show_rport_maxframe_size = 1,
+	.show_rport_supported_classes = 1,
+	.show_host_fabric_name = 1,
+	.show_starget_node_name = 1,
+	.show_starget_port_name = 1,
+	.show_starget_port_id = 1,
+	.set_rport_dev_loss_tmo = fc_set_rport_loss_tmo,
+	.show_rport_dev_loss_tmo = 1,
+	.get_fc_host_stats = qedf_fc_get_host_stats,
+	.issue_fc_host_lip = qedf_fcoe_reset,
+	.vport_create = qedf_vport_create,
+	.vport_delete = qedf_vport_destroy,
+	.vport_disable = qedf_vport_disable,
+	.bsg_request = fc_lport_bsg_request,
+};
+
+static struct fc_function_template qedf_fc_vport_transport_fn = {
+	.show_host_node_name = 1,
+	.show_host_port_name = 1,
+	.show_host_supported_classes = 1,
+	.show_host_supported_fc4s = 1,
+	.show_host_active_fc4s = 1,
+	.show_host_maxframe_size = 1,
+	.show_host_port_id = 1,
+	.show_host_supported_speeds = 1,
+	.get_host_speed = fc_get_host_speed,
+	.show_host_speed = 1,
+	.show_host_port_type = 1,
+	.get_host_port_state = fc_get_host_port_state,
+	.show_host_port_state = 1,
+	.show_host_symbolic_name = 1,
+	.dd_fcrport_size = (sizeof(struct fc_rport_libfc_priv) +
+				sizeof(struct qedf_rport)),
+	.show_rport_maxframe_size = 1,
+	.show_rport_supported_classes = 1,
+	.show_host_fabric_name = 1,
+	.show_starget_node_name = 1,
+	.show_starget_port_name = 1,
+	.show_starget_port_id = 1,
+	.set_rport_dev_loss_tmo = fc_set_rport_loss_tmo,
+	.show_rport_dev_loss_tmo = 1,
+	.get_fc_host_stats = fc_get_host_stats,
+	.issue_fc_host_lip = qedf_fcoe_reset,
+	.bsg_request = fc_lport_bsg_request,
+};
+
+static bool qedf_fp_has_work(struct qedf_fastpath *fp)
+{
+	struct qedf_ctx *qedf = fp->qedf;
+	struct global_queue *que;
+	struct qed_sb_info *sb_info = fp->sb_info;
+	struct status_block *sb = sb_info->sb_virt;
+	u16 prod_idx;
+
+	/* Get the pointer to the global CQ this completion is on */
+	que = qedf->global_queues[fp->sb_id];
+
+	/* Be sure all responses have been written to PI */
+	rmb();
+
+	/* Get the current firmware producer index */
+	prod_idx = sb->pi_array[QEDF_FCOE_PARAMS_GL_RQ_PI];
+
+	return (que->cq_prod_idx != prod_idx);
+}
+
+/*
+ * Interrupt handler code.
+ */
+
+/* Process completion queue and copy CQE contents for deferred processesing
+ *
+ * Return true if we should wake the I/O thread, false if not.
+ */
+static bool qedf_process_completions(struct qedf_fastpath *fp)
+{
+	struct qedf_ctx *qedf = fp->qedf;
+	struct qed_sb_info *sb_info = fp->sb_info;
+	struct status_block *sb = sb_info->sb_virt;
+	struct global_queue *que;
+	u16 prod_idx;
+	struct fcoe_cqe *cqe;
+	struct qedf_io_work *io_work;
+	int num_handled = 0;
+	unsigned int cpu;
+	struct qedf_ioreq *io_req = NULL;
+	u16 xid;
+	u16 new_cqes;
+	u32 comp_type;
+
+	/* Get the current firmware producer index */
+	prod_idx = sb->pi_array[QEDF_FCOE_PARAMS_GL_RQ_PI];
+
+	/* Get the pointer to the global CQ this completion is on */
+	que = qedf->global_queues[fp->sb_id];
+
+	/* Calculate the amount of new elements since last processing */
+	new_cqes = (prod_idx >= que->cq_prod_idx) ?
+	    (prod_idx - que->cq_prod_idx) :
+	    0x10000 - que->cq_prod_idx + prod_idx;
+
+	/* Save producer index */
+	que->cq_prod_idx = prod_idx;
+
+	while (new_cqes) {
+		fp->completions++;
+		num_handled++;
+		cqe = &que->cq[que->cq_cons_idx];
+
+		comp_type = (cqe->cqe_data >> FCOE_CQE_CQE_TYPE_SHIFT) &
+		    FCOE_CQE_CQE_TYPE_MASK;
+
+		/*
+		 * Process unsolicited CQEs directly in the interrupt handler
+		 * sine we need the fastpath ID
+		 */
+		if (comp_type == FCOE_UNSOLIC_CQE_TYPE) {
+			QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_UNSOL,
+			   "Unsolicated CQE.\n");
+			qedf_process_unsol_compl(qedf, fp->sb_id, cqe);
+			/*
+			 * Don't add a work list item.  Increment consumer
+			 * consumer index and move on.
+			 */
+			goto inc_idx;
+		}
+
+		xid = cqe->cqe_data & FCOE_CQE_TASK_ID_MASK;
+		io_req = &qedf->cmd_mgr->cmds[xid];
+
+		/*
+		 * Figure out which percpu thread we should queue this I/O
+		 * on.
+		 */
+		if (!io_req)
+			/* If there is not io_req assocated with this CQE
+			 * just queue it on CPU 0
+			 */
+			cpu = 0;
+		else {
+			cpu = io_req->cpu;
+			io_req->int_cpu = smp_processor_id();
+		}
+
+		io_work = mempool_alloc(qedf->io_mempool, GFP_ATOMIC);
+		if (!io_work) {
+			QEDF_WARN(&(qedf->dbg_ctx), "Could not allocate "
+				   "work for I/O completion.\n");
+			continue;
+		}
+		memset(io_work, 0, sizeof(struct qedf_io_work));
+
+		INIT_WORK(&io_work->work, qedf_fp_io_handler);
+
+		/* Copy contents of CQE for deferred processing */
+		memcpy(&io_work->cqe, cqe, sizeof(struct fcoe_cqe));
+
+		io_work->qedf = fp->qedf;
+		io_work->fp = NULL; /* Only used for unsolicited frames */
+
+		queue_work_on(cpu, qedf_io_wq, &io_work->work);
+
+inc_idx:
+		que->cq_cons_idx++;
+		if (que->cq_cons_idx == fp->cq_num_entries)
+			que->cq_cons_idx = 0;
+		new_cqes--;
+	}
+
+	return true;
+}
+
+
+/* MSI-X fastpath handler code */
+static irqreturn_t qedf_msix_handler(int irq, void *dev_id)
+{
+	struct qedf_fastpath *fp = dev_id;
+
+	if (!fp) {
+		QEDF_ERR(NULL, "fp is null.\n");
+		return IRQ_HANDLED;
+	}
+	if (!fp->sb_info) {
+		QEDF_ERR(NULL, "fp->sb_info in null.");
+		return IRQ_HANDLED;
+	}
+
+	/*
+	 * Disable interrupts for this status block while we process new
+	 * completions
+	 */
+	qed_sb_ack(fp->sb_info, IGU_INT_DISABLE, 0 /*do not update*/);
+
+	while (1) {
+		qedf_process_completions(fp);
+
+		if (qedf_fp_has_work(fp) == 0) {
+			/* Update the sb information */
+			qed_sb_update_sb_idx(fp->sb_info);
+
+			/* Check for more work */
+			rmb();
+
+			if (qedf_fp_has_work(fp) == 0) {
+				/* Re-enable interrupts */
+				qed_sb_ack(fp->sb_info, IGU_INT_ENABLE, 1);
+				return IRQ_HANDLED;
+			}
+		}
+	}
+
+	/* Do we ever want to break out of above loop? */
+	return IRQ_HANDLED;
+}
+
+/* simd handler for MSI/INTa */
+static void qedf_simd_int_handler(void *cookie)
+{
+	/* Cookie is qedf_ctx struct */
+	struct qedf_ctx *qedf = (struct qedf_ctx *)cookie;
+
+	QEDF_WARN(&(qedf->dbg_ctx), "qedf=%p.\n", qedf);
+}
+
+#define QEDF_SIMD_HANDLER_NUM		0
+static void qedf_sync_free_irqs(struct qedf_ctx *qedf)
+{
+	int i;
+
+	if (qedf->int_info.msix_cnt) {
+		for (i = 0; i < qedf->int_info.used_cnt; i++) {
+			synchronize_irq(qedf->int_info.msix[i].vector);
+			irq_set_affinity_hint(qedf->int_info.msix[i].vector,
+			    NULL);
+			irq_set_affinity_notifier(qedf->int_info.msix[i].vector,
+			    NULL);
+			free_irq(qedf->int_info.msix[i].vector,
+			    &qedf->fp_array[i]);
+		}
+	} else
+		qed_ops->common->simd_handler_clean(qedf->cdev,
+		    QEDF_SIMD_HANDLER_NUM);
+
+	qedf->int_info.used_cnt = 0;
+	qed_ops->common->set_fp_int(qedf->cdev, 0);
+}
+
+static int qedf_request_msix_irq(struct qedf_ctx *qedf)
+{
+	int i, rc, cpu;
+
+	cpu = cpumask_first(cpu_online_mask);
+	for (i = 0; i < qedf->num_queues; i++) {
+		rc = request_irq(qedf->int_info.msix[i].vector,
+		    qedf_msix_handler, 0, "qedf", &qedf->fp_array[i]);
+
+		if (rc) {
+			QEDF_WARN(&(qedf->dbg_ctx), "request_irq failed.\n");
+			qedf_sync_free_irqs(qedf);
+			return rc;
+		}
+
+		qedf->int_info.used_cnt++;
+		rc = irq_set_affinity_hint(qedf->int_info.msix[i].vector,
+		    get_cpu_mask(cpu));
+		cpu = cpumask_next(cpu, cpu_online_mask);
+	}
+
+	return 0;
+}
+
+static int qedf_setup_int(struct qedf_ctx *qedf)
+{
+	int rc = 0;
+
+	/*
+	 * Learn interrupt configuration
+	 */
+	rc = qed_ops->common->set_fp_int(qedf->cdev, num_online_cpus());
+
+	rc  = qed_ops->common->get_fp_int(qedf->cdev, &qedf->int_info);
+	if (rc)
+		return 0;
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC, "Number of msix_cnt = "
+		   "0x%x num of cpus = 0x%x\n", qedf->int_info.msix_cnt,
+		   num_online_cpus());
+
+	if (qedf->int_info.msix_cnt)
+		return qedf_request_msix_irq(qedf);
+
+	qed_ops->common->simd_handler_config(qedf->cdev, &qedf,
+	    QEDF_SIMD_HANDLER_NUM, qedf_simd_int_handler);
+	qedf->int_info.used_cnt = 1;
+
+	return 0;
+}
+
+/* Main function for libfc frame reception */
+static void qedf_recv_frame(struct qedf_ctx *qedf,
+	struct sk_buff *skb)
+{
+	u32 fr_len;
+	struct fc_lport *lport;
+	struct fc_frame_header *fh;
+	struct fcoe_crc_eof crc_eof;
+	struct fc_frame *fp;
+	u8 *mac = NULL;
+	u8 *dest_mac = NULL;
+	struct fcoe_hdr *hp;
+	struct qedf_rport *fcport;
+
+	lport = qedf->lport;
+	if (lport == NULL || lport->state == LPORT_ST_DISABLED) {
+		QEDF_WARN(NULL, "Invalid lport struct or lport disabled.\n");
+		kfree_skb(skb);
+		return;
+	}
+
+	if (skb_is_nonlinear(skb))
+		skb_linearize(skb);
+	mac = eth_hdr(skb)->h_source;
+	dest_mac = eth_hdr(skb)->h_dest;
+
+	/* Pull the header */
+	hp = (struct fcoe_hdr *)skb->data;
+	fh = (struct fc_frame_header *) skb_transport_header(skb);
+	skb_pull(skb, sizeof(struct fcoe_hdr));
+	fr_len = skb->len - sizeof(struct fcoe_crc_eof);
+
+	fp = (struct fc_frame *)skb;
+	fc_frame_init(fp);
+	fr_dev(fp) = lport;
+	fr_sof(fp) = hp->fcoe_sof;
+	if (skb_copy_bits(skb, fr_len, &crc_eof, sizeof(crc_eof))) {
+		kfree_skb(skb);
+		return;
+	}
+	fr_eof(fp) = crc_eof.fcoe_eof;
+	fr_crc(fp) = crc_eof.fcoe_crc32;
+	if (pskb_trim(skb, fr_len)) {
+		kfree_skb(skb);
+		return;
+	}
+
+	fh = fc_frame_header_get(fp);
+
+	if (fh->fh_r_ctl == FC_RCTL_DD_SOL_DATA &&
+	    fh->fh_type == FC_TYPE_FCP) {
+		/* Drop FCP data. We dont this in L2 path */
+		kfree_skb(skb);
+		return;
+	}
+	if (fh->fh_r_ctl == FC_RCTL_ELS_REQ &&
+	    fh->fh_type == FC_TYPE_ELS) {
+		switch (fc_frame_payload_op(fp)) {
+		case ELS_LOGO:
+			if (ntoh24(fh->fh_s_id) == FC_FID_FLOGI) {
+				/* drop non-FIP LOGO */
+				kfree_skb(skb);
+				return;
+			}
+			break;
+		}
+	}
+
+	if (fh->fh_r_ctl == FC_RCTL_BA_ABTS) {
+		/* Drop incoming ABTS */
+		kfree_skb(skb);
+		return;
+	}
+
+	/*
+	 * If a connection is uploading, drop incoming FCoE frames as there
+	 * is a small window where we could try to return a frame while libfc
+	 * is trying to clean things up.
+	 */
+
+	/* Get fcport associated with d_id if it exists */
+	fcport = qedf_fcport_lookup(qedf, ntoh24(fh->fh_d_id));
+
+	if (fcport && test_bit(QEDF_RPORT_UPLOADING_CONNECTION,
+	    &fcport->flags)) {
+		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_LL2,
+		    "Connection uploading, dropping fp=%p.\n", fp);
+		kfree_skb(skb);
+		return;
+	}
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_LL2, "FCoE frame receive: "
+	    "skb=%p fp=%p src=%06x dest=%06x r_ctl=%x fh_type=%x.\n", skb, fp,
+	    ntoh24(fh->fh_s_id), ntoh24(fh->fh_d_id), fh->fh_r_ctl,
+	    fh->fh_type);
+	if (qedf_dump_frames)
+		print_hex_dump(KERN_WARNING, "fcoe: ", DUMP_PREFIX_OFFSET, 16,
+		    1, skb->data, skb->len, false);
+	fc_exch_recv(lport, fp);
+}
+
+static void qedf_ll2_process_skb(struct work_struct *work)
+{
+	struct qedf_skb_work *skb_work =
+	    container_of(work, struct qedf_skb_work, work);
+	struct qedf_ctx *qedf = skb_work->qedf;
+	struct sk_buff *skb = skb_work->skb;
+	struct ethhdr *eh;
+
+	if (!qedf) {
+		QEDF_ERR(NULL, "qedf is NULL\n");
+		goto err_out;
+	}
+
+	eh = (struct ethhdr *)skb->data;
+
+	/* Undo VLAN encapsulation */
+	if (eh->h_proto == htons(ETH_P_8021Q)) {
+		memmove((u8 *)eh + VLAN_HLEN, eh, ETH_ALEN * 2);
+		eh = (struct ethhdr *)skb_pull(skb, VLAN_HLEN);
+		skb_reset_mac_header(skb);
+	}
+
+	/*
+	 * Process either a FIP frame or FCoE frame based on the
+	 * protocol value.  If it's not either just drop the
+	 * frame.
+	 */
+	if (eh->h_proto == htons(ETH_P_FIP)) {
+		qedf_fip_recv(qedf, skb);
+		goto out;
+	} else if (eh->h_proto == htons(ETH_P_FCOE)) {
+		__skb_pull(skb, ETH_HLEN);
+		qedf_recv_frame(qedf, skb);
+		goto out;
+	} else
+		goto err_out;
+
+err_out:
+	kfree_skb(skb);
+out:
+	kfree(skb_work);
+	return;
+}
+
+static int qedf_ll2_rx(void *cookie, struct sk_buff *skb,
+	u32 arg1, u32 arg2)
+{
+	struct qedf_ctx *qedf = (struct qedf_ctx *)cookie;
+	struct qedf_skb_work *skb_work;
+
+	skb_work = kzalloc(sizeof(struct qedf_skb_work), GFP_ATOMIC);
+	if (!skb_work) {
+		QEDF_WARN(&(qedf->dbg_ctx), "Could not allocate skb_work so "
+			   "dropping frame.\n");
+		kfree_skb(skb);
+		return 0;
+	}
+
+	INIT_WORK(&skb_work->work, qedf_ll2_process_skb);
+	skb_work->skb = skb;
+	skb_work->qedf = qedf;
+	queue_work(qedf->ll2_recv_wq, &skb_work->work);
+
+	return 0;
+}
+
+static struct qed_ll2_cb_ops qedf_ll2_cb_ops = {
+	.rx_cb = qedf_ll2_rx,
+	.tx_cb = NULL,
+};
+
+/* Main thread to process I/O completions */
+void qedf_fp_io_handler(struct work_struct *work)
+{
+	struct qedf_io_work *io_work =
+	    container_of(work, struct qedf_io_work, work);
+	u32 comp_type;
+
+	/*
+	 * Deferred part of unsolicited CQE sends
+	 * frame to libfc.
+	 */
+	comp_type = (io_work->cqe.cqe_data >>
+	    FCOE_CQE_CQE_TYPE_SHIFT) &
+	    FCOE_CQE_CQE_TYPE_MASK;
+	if (comp_type == FCOE_UNSOLIC_CQE_TYPE &&
+	    io_work->fp)
+		fc_exch_recv(io_work->qedf->lport, io_work->fp);
+	else
+		qedf_process_cqe(io_work->qedf, &io_work->cqe);
+
+	kfree(io_work);
+}
+
+static int qedf_alloc_and_init_sb(struct qedf_ctx *qedf,
+	struct qed_sb_info *sb_info, u16 sb_id)
+{
+	struct status_block *sb_virt;
+	dma_addr_t sb_phys;
+	int ret;
+
+	sb_virt = dma_alloc_coherent(&qedf->pdev->dev,
+	    sizeof(struct status_block), &sb_phys, GFP_KERNEL);
+
+	if (!sb_virt) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Status block allocation failed "
+			  "for id = %d.\n", sb_id);
+		return -ENOMEM;
+	}
+
+	ret = qed_ops->common->sb_init(qedf->cdev, sb_info, sb_virt, sb_phys,
+	    sb_id, QED_SB_TYPE_STORAGE);
+
+	if (ret) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Status block initialization "
+			  "failed for id = %d.\n", sb_id);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void qedf_free_sb(struct qedf_ctx *qedf, struct qed_sb_info *sb_info)
+{
+	if (sb_info->sb_virt)
+		dma_free_coherent(&qedf->pdev->dev, sizeof(*sb_info->sb_virt),
+		    (void *)sb_info->sb_virt, sb_info->sb_phys);
+}
+
+static void qedf_destroy_sb(struct qedf_ctx *qedf)
+{
+	int id;
+	struct qedf_fastpath *fp = NULL;
+
+	for (id = 0; id < qedf->num_queues; id++) {
+		fp = &(qedf->fp_array[id]);
+		if (fp->sb_id == QEDF_SB_ID_NULL)
+			break;
+		qedf_free_sb(qedf, fp->sb_info);
+		kfree(fp->sb_info);
+	}
+	kfree(qedf->fp_array);
+}
+
+static int qedf_prepare_sb(struct qedf_ctx *qedf)
+{
+	int id;
+	struct qedf_fastpath *fp;
+	int ret;
+
+	qedf->fp_array =
+	    kcalloc(qedf->num_queues, sizeof(struct qedf_fastpath),
+		GFP_KERNEL);
+
+	if (!qedf->fp_array) {
+		QEDF_ERR(&(qedf->dbg_ctx), "fastpath array allocation "
+			  "failed.\n");
+		return -ENOMEM;
+	}
+
+	for (id = 0; id < qedf->num_queues; id++) {
+		fp = &(qedf->fp_array[id]);
+		fp->sb_id = QEDF_SB_ID_NULL;
+		fp->sb_info = kcalloc(1, sizeof(*fp->sb_info), GFP_KERNEL);
+		if (!fp->sb_info) {
+			QEDF_ERR(&(qedf->dbg_ctx), "SB info struct "
+				  "allocation failed.\n");
+			goto err;
+		}
+		ret = qedf_alloc_and_init_sb(qedf, fp->sb_info, id);
+		if (ret) {
+			QEDF_ERR(&(qedf->dbg_ctx), "SB allocation and "
+				  "initialization failed.\n");
+			goto err;
+		}
+		fp->sb_id = id;
+		fp->qedf = qedf;
+		fp->cq_num_entries =
+		    qedf->global_queues[id]->cq_mem_size /
+		    sizeof(struct fcoe_cqe);
+	}
+err:
+	return 0;
+}
+
+void qedf_process_cqe(struct qedf_ctx *qedf, struct fcoe_cqe *cqe)
+{
+	u16 xid;
+	struct qedf_ioreq *io_req;
+	struct qedf_rport *fcport;
+	u32 comp_type;
+
+	comp_type = (cqe->cqe_data >> FCOE_CQE_CQE_TYPE_SHIFT) &
+	    FCOE_CQE_CQE_TYPE_MASK;
+
+	xid = cqe->cqe_data & FCOE_CQE_TASK_ID_MASK;
+	io_req = &qedf->cmd_mgr->cmds[xid];
+
+	/* Completion not for a valid I/O anymore so just return */
+	if (!io_req)
+		return;
+
+	fcport = io_req->fcport;
+
+	if (fcport == NULL) {
+		QEDF_ERR(&(qedf->dbg_ctx), "fcport is NULL.\n");
+		return;
+	}
+
+	/*
+	 * Check that fcport is offloaded.  If it isn't then the spinlock
+	 * isn't valid and shouldn't be taken. We should just return.
+	 */
+	if (!test_bit(QEDF_RPORT_SESSION_READY, &fcport->flags)) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Session not offloaded yet.\n");
+		return;
+	}
+
+
+	switch (comp_type) {
+	case FCOE_GOOD_COMPLETION_CQE_TYPE:
+		atomic_inc(&fcport->free_sqes);
+		switch (io_req->cmd_type) {
+		case QEDF_SCSI_CMD:
+			qedf_scsi_completion(qedf, cqe, io_req);
+			break;
+		case QEDF_ELS:
+			qedf_process_els_compl(qedf, cqe, io_req);
+			break;
+		case QEDF_TASK_MGMT_CMD:
+			qedf_process_tmf_compl(qedf, cqe, io_req);
+			break;
+		case QEDF_SEQ_CLEANUP:
+			qedf_process_seq_cleanup_compl(qedf, cqe, io_req);
+			break;
+		}
+		break;
+	case FCOE_ERROR_DETECTION_CQE_TYPE:
+		atomic_inc(&fcport->free_sqes);
+		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO,
+		    "Error detect CQE.\n");
+		qedf_process_error_detect(qedf, cqe, io_req);
+		break;
+	case FCOE_EXCH_CLEANUP_CQE_TYPE:
+		atomic_inc(&fcport->free_sqes);
+		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO,
+		    "Cleanup CQE.\n");
+		qedf_process_cleanup_compl(qedf, cqe, io_req);
+		break;
+	case FCOE_ABTS_CQE_TYPE:
+		atomic_inc(&fcport->free_sqes);
+		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO,
+		    "Abort CQE.\n");
+		qedf_process_abts_compl(qedf, cqe, io_req);
+		break;
+	case FCOE_DUMMY_CQE_TYPE:
+		atomic_inc(&fcport->free_sqes);
+		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO,
+		    "Dummy CQE.\n");
+		break;
+	case FCOE_LOCAL_COMP_CQE_TYPE:
+		atomic_inc(&fcport->free_sqes);
+		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO,
+		    "Local completion CQE.\n");
+		break;
+	case FCOE_WARNING_CQE_TYPE:
+		atomic_inc(&fcport->free_sqes);
+		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO,
+		    "Warning CQE.\n");
+		qedf_process_warning_compl(qedf, cqe, io_req);
+		break;
+	case MAX_FCOE_CQE_TYPE:
+		atomic_inc(&fcport->free_sqes);
+		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO,
+		    "Max FCoE CQE.\n");
+		break;
+	default:
+		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO,
+		    "Default CQE.\n");
+		break;
+	}
+}
+
+static void qedf_free_bdq(struct qedf_ctx *qedf)
+{
+	int i;
+
+	if (qedf->bdq_pbl_list)
+		dma_free_coherent(&qedf->pdev->dev, QEDF_PAGE_SIZE,
+		    qedf->bdq_pbl_list, qedf->bdq_pbl_list_dma);
+
+	if (qedf->bdq_pbl)
+		dma_free_coherent(&qedf->pdev->dev, qedf->bdq_pbl_mem_size,
+		    qedf->bdq_pbl, qedf->bdq_pbl_dma);
+
+	for (i = 0; i < QEDF_BDQ_SIZE; i++) {
+		if (qedf->bdq[i].buf_addr) {
+			dma_free_coherent(&qedf->pdev->dev, QEDF_BDQ_BUF_SIZE,
+			    qedf->bdq[i].buf_addr, qedf->bdq[i].buf_dma);
+		}
+	}
+}
+
+static void qedf_free_global_queues(struct qedf_ctx *qedf)
+{
+	int i;
+	struct global_queue **gl = qedf->global_queues;
+
+	for (i = 0; i < qedf->num_queues; i++) {
+		if (!gl[i])
+			continue;
+
+		if (gl[i]->cq)
+			dma_free_coherent(&qedf->pdev->dev,
+			    gl[i]->cq_mem_size, gl[i]->cq, gl[i]->cq_dma);
+		if (gl[i]->cq_pbl)
+			dma_free_coherent(&qedf->pdev->dev, gl[i]->cq_pbl_size,
+			    gl[i]->cq_pbl, gl[i]->cq_pbl_dma);
+
+		kfree(gl[i]);
+	}
+
+	qedf_free_bdq(qedf);
+}
+
+static int qedf_alloc_bdq(struct qedf_ctx *qedf)
+{
+	int i;
+	struct scsi_bd *pbl;
+	u64 *list;
+	dma_addr_t page;
+
+	/* Alloc dma memory for BDQ buffers */
+	for (i = 0; i < QEDF_BDQ_SIZE; i++) {
+		qedf->bdq[i].buf_addr = dma_alloc_coherent(&qedf->pdev->dev,
+		    QEDF_BDQ_BUF_SIZE, &qedf->bdq[i].buf_dma, GFP_KERNEL);
+		if (!qedf->bdq[i].buf_addr) {
+			QEDF_ERR(&(qedf->dbg_ctx), "Could not allocate BDQ "
+			    "buffer %d.\n", i);
+			return -ENOMEM;
+		}
+	}
+
+	/* Alloc dma memory for BDQ page buffer list */
+	qedf->bdq_pbl_mem_size =
+	    QEDF_BDQ_SIZE * sizeof(struct scsi_bd);
+	qedf->bdq_pbl_mem_size =
+	    ALIGN(qedf->bdq_pbl_mem_size, QEDF_PAGE_SIZE);
+
+	qedf->bdq_pbl = dma_alloc_coherent(&qedf->pdev->dev,
+	    qedf->bdq_pbl_mem_size, &qedf->bdq_pbl_dma, GFP_KERNEL);
+	if (!qedf->bdq_pbl) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Could not allocate BDQ PBL.\n");
+		return -ENOMEM;
+	}
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC,
+	    "BDQ PBL addr=0x%p dma=0x%llx.\n", qedf->bdq_pbl,
+	    qedf->bdq_pbl_dma);
+
+	/*
+	 * Populate BDQ PBL with physical and virtual address of individual
+	 * BDQ buffers
+	 */
+	pbl = (struct scsi_bd *)qedf->bdq_pbl;
+	for (i = 0; i < QEDF_BDQ_SIZE; i++) {
+		pbl->address.hi = cpu_to_le32(U64_HI(qedf->bdq[i].buf_dma));
+		pbl->address.lo = cpu_to_le32(U64_LO(qedf->bdq[i].buf_dma));
+		pbl->opaque.hi = 0;
+		/* Opaque lo data is an index into the BDQ array */
+		pbl->opaque.lo = cpu_to_le32(i);
+		pbl++;
+	}
+
+	/* Allocate list of PBL pages */
+	qedf->bdq_pbl_list = dma_alloc_coherent(&qedf->pdev->dev,
+	    QEDF_PAGE_SIZE, &qedf->bdq_pbl_list_dma, GFP_KERNEL);
+	if (!qedf->bdq_pbl_list) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Could not allocate list of PBL "
+		    "pages.\n");
+		return -ENOMEM;
+	}
+	memset(qedf->bdq_pbl_list, 0, QEDF_PAGE_SIZE);
+
+	/*
+	 * Now populate PBL list with pages that contain pointers to the
+	 * individual buffers.
+	 */
+	qedf->bdq_pbl_list_num_entries = qedf->bdq_pbl_mem_size /
+	    QEDF_PAGE_SIZE;
+	list = (u64 *)qedf->bdq_pbl_list;
+	page = qedf->bdq_pbl_list_dma;
+	for (i = 0; i < qedf->bdq_pbl_list_num_entries; i++) {
+		*list = qedf->bdq_pbl_dma;
+		list++;
+		page += QEDF_PAGE_SIZE;
+	}
+
+	return 0;
+}
+
+static int qedf_alloc_global_queues(struct qedf_ctx *qedf)
+{
+	u32 *list;
+	int i;
+	int status = 0, rc;
+	u32 *pbl;
+	dma_addr_t page;
+	int num_pages;
+
+	/* Allocate and map CQs, RQs */
+	/*
+	 * Number of global queues (CQ / RQ). This should
+	 * be <= number of available MSIX vectors for the PF
+	 */
+	if (!qedf->num_queues) {
+		QEDF_ERR(&(qedf->dbg_ctx), "No MSI-X vectors available!\n");
+		return 1;
+	}
+
+	/*
+	 * Make sure we allocated the PBL that will contain the physical
+	 * addresses of our queues
+	 */
+	if (!qedf->p_cpuq) {
+		status = 1;
+		goto mem_alloc_failure;
+	}
+
+	qedf->global_queues = kzalloc((sizeof(struct global_queue *)
+	    * qedf->num_queues), GFP_KERNEL);
+	if (!qedf->global_queues) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Unable to allocate global "
+			  "queues array ptr memory\n");
+		return -ENOMEM;
+	}
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC,
+		   "qedf->global_queues=%p.\n", qedf->global_queues);
+
+	/* Allocate DMA coherent buffers for BDQ */
+	rc = qedf_alloc_bdq(qedf);
+	if (rc)
+		goto mem_alloc_failure;
+
+	/* Allocate a CQ and an associated PBL for each MSI-X vector */
+	for (i = 0; i < qedf->num_queues; i++) {
+		qedf->global_queues[i] = kzalloc(sizeof(struct global_queue),
+		    GFP_KERNEL);
+		if (!qedf->global_queues[i]) {
+			QEDF_WARN(&(qedf->dbg_ctx), "Unable to allocation "
+				   "global queue %d.\n", i);
+			goto mem_alloc_failure;
+		}
+
+		qedf->global_queues[i]->cq_mem_size =
+		    FCOE_PARAMS_CQ_NUM_ENTRIES * sizeof(struct fcoe_cqe);
+		qedf->global_queues[i]->cq_mem_size =
+		    ALIGN(qedf->global_queues[i]->cq_mem_size, QEDF_PAGE_SIZE);
+
+		qedf->global_queues[i]->cq_pbl_size =
+		    (qedf->global_queues[i]->cq_mem_size /
+		    PAGE_SIZE) * sizeof(void *);
+		qedf->global_queues[i]->cq_pbl_size =
+		    ALIGN(qedf->global_queues[i]->cq_pbl_size, QEDF_PAGE_SIZE);
+
+		qedf->global_queues[i]->cq =
+		    dma_alloc_coherent(&qedf->pdev->dev,
+			qedf->global_queues[i]->cq_mem_size,
+			&qedf->global_queues[i]->cq_dma, GFP_KERNEL);
+
+		if (!qedf->global_queues[i]->cq) {
+			QEDF_WARN(&(qedf->dbg_ctx), "Could not allocate "
+				   "cq.\n");
+			status = -ENOMEM;
+			goto mem_alloc_failure;
+		}
+		memset(qedf->global_queues[i]->cq, 0,
+		    qedf->global_queues[i]->cq_mem_size);
+
+		qedf->global_queues[i]->cq_pbl =
+		    dma_alloc_coherent(&qedf->pdev->dev,
+			qedf->global_queues[i]->cq_pbl_size,
+			&qedf->global_queues[i]->cq_pbl_dma, GFP_KERNEL);
+
+		if (!qedf->global_queues[i]->cq_pbl) {
+			QEDF_WARN(&(qedf->dbg_ctx), "Could not allocate "
+				   "cq PBL.\n");
+			status = -ENOMEM;
+			goto mem_alloc_failure;
+		}
+		memset(qedf->global_queues[i]->cq_pbl, 0,
+		    qedf->global_queues[i]->cq_pbl_size);
+
+		/* Create PBL */
+		num_pages = qedf->global_queues[i]->cq_mem_size /
+		    QEDF_PAGE_SIZE;
+		page = qedf->global_queues[i]->cq_dma;
+		pbl = (u32 *)qedf->global_queues[i]->cq_pbl;
+
+		while (num_pages--) {
+			*pbl = U64_LO(page);
+			pbl++;
+			*pbl = U64_HI(page);
+			pbl++;
+			page += QEDF_PAGE_SIZE;
+		}
+		/* Set the initial consumer index for cq */
+		qedf->global_queues[i]->cq_cons_idx = 0;
+	}
+
+	list = (u32 *)qedf->p_cpuq;
+
+	/*
+	 * The list is built as follows: CQ#0 PBL pointer, RQ#0 PBL pointer,
+	 * CQ#1 PBL pointer, RQ#1 PBL pointer, etc.  Each PBL pointer points
+	 * to the physical address which contains an array of pointers to
+	 * the physical addresses of the specific queue pages.
+	 */
+	for (i = 0; i < qedf->num_queues; i++) {
+		*list = U64_LO(qedf->global_queues[i]->cq_pbl_dma);
+		list++;
+		*list = U64_HI(qedf->global_queues[i]->cq_pbl_dma);
+		list++;
+		*list = U64_LO(0);
+		list++;
+		*list = U64_HI(0);
+		list++;
+	}
+
+	return 0;
+
+mem_alloc_failure:
+	qedf_free_global_queues(qedf);
+	return status;
+}
+
+static int qedf_set_fcoe_pf_param(struct qedf_ctx *qedf)
+{
+	u8 sq_num_pbl_pages;
+	u32 sq_mem_size;
+	u32 cq_mem_size;
+	u32 cq_num_entries;
+	int rval;
+
+	/*
+	 * The number of completion queues/fastpath interrupts/status blocks
+	 * we allocation is the minimum off:
+	 *
+	 * Number of CPUs
+	 * Number of MSI-X vectors
+	 * Max number allocated in hardware (QEDF_MAX_NUM_CQS)
+	 */
+	qedf->num_queues = min((unsigned int)QEDF_MAX_NUM_CQS,
+	    num_online_cpus());
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC, "Number of CQs is %d.\n",
+		   qedf->num_queues);
+
+	qedf->p_cpuq = pci_alloc_consistent(qedf->pdev,
+	    qedf->num_queues * sizeof(struct qedf_glbl_q_params),
+	    &qedf->hw_p_cpuq);
+
+	if (!qedf->p_cpuq) {
+		QEDF_ERR(&(qedf->dbg_ctx), "pci_alloc_consistent failed.\n");
+		return 1;
+	}
+
+	rval = qedf_alloc_global_queues(qedf);
+	if (rval) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Global queue allocation "
+			  "failed.\n");
+		return 1;
+	}
+
+	/* Calculate SQ PBL size in the same manner as in qedf_sq_alloc() */
+	sq_mem_size = SQ_NUM_ENTRIES * sizeof(struct fcoe_wqe);
+	sq_mem_size = ALIGN(sq_mem_size, QEDF_PAGE_SIZE);
+	sq_num_pbl_pages = (sq_mem_size / QEDF_PAGE_SIZE);
+
+	/* Calculate CQ num entries */
+	cq_mem_size = FCOE_PARAMS_CQ_NUM_ENTRIES * sizeof(struct fcoe_cqe);
+	cq_mem_size = ALIGN(cq_mem_size, QEDF_PAGE_SIZE);
+	cq_num_entries = cq_mem_size / sizeof(struct fcoe_cqe);
+
+	memset(&(qedf->pf_params), 0,
+	    sizeof(qedf->pf_params));
+
+	/* Setup the value for fcoe PF */
+	qedf->pf_params.fcoe_pf_params.num_cons = QEDF_MAX_SESSIONS;
+	qedf->pf_params.fcoe_pf_params.num_tasks = FCOE_PARAMS_NUM_TASKS;
+	qedf->pf_params.fcoe_pf_params.glbl_q_params_addr =
+	    (u64)qedf->hw_p_cpuq;
+	qedf->pf_params.fcoe_pf_params.sq_num_pbl_pages = sq_num_pbl_pages;
+
+	qedf->pf_params.fcoe_pf_params.rq_buffer_log_size = 0;
+
+	qedf->pf_params.fcoe_pf_params.cq_num_entries = cq_num_entries;
+	qedf->pf_params.fcoe_pf_params.num_cqs = qedf->num_queues;
+
+	/* log_page_size: 12 for 4KB pages */
+	qedf->pf_params.fcoe_pf_params.log_page_size = ilog2(QEDF_PAGE_SIZE);
+
+	qedf->pf_params.fcoe_pf_params.mtu = 9000;
+	qedf->pf_params.fcoe_pf_params.gl_rq_pi = QEDF_FCOE_PARAMS_GL_RQ_PI;
+	qedf->pf_params.fcoe_pf_params.gl_cmd_pi = QEDF_FCOE_PARAMS_GL_CMD_PI;
+
+	/* BDQ address and size */
+	qedf->pf_params.fcoe_pf_params.bdq_pbl_base_addr[0] =
+	    qedf->bdq_pbl_list_dma;
+	qedf->pf_params.fcoe_pf_params.bdq_pbl_num_entries[0] =
+	    qedf->bdq_pbl_list_num_entries;
+	qedf->pf_params.fcoe_pf_params.rq_buffer_size = QEDF_BDQ_BUF_SIZE;
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC,
+	    "bdq_list=%p bdq_pbl_list_dma=%llx bdq_pbl_list_entries=%d.\n",
+	    qedf->bdq_pbl_list,
+	    qedf->pf_params.fcoe_pf_params.bdq_pbl_base_addr[0],
+	    qedf->pf_params.fcoe_pf_params.bdq_pbl_num_entries[0]);
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC,
+	    "cq_num_entries=%d.\n",
+	    qedf->pf_params.fcoe_pf_params.cq_num_entries);
+
+	return 0;
+}
+
+/* Free DMA coherent memory for array of queue pointers we pass to qed */
+static void qedf_free_fcoe_pf_param(struct qedf_ctx *qedf)
+{
+	size_t size = 0;
+
+	if (qedf->p_cpuq) {
+		size = qedf->num_queues * sizeof(struct qedf_glbl_q_params);
+		pci_free_consistent(qedf->pdev, size, qedf->p_cpuq,
+		    qedf->hw_p_cpuq);
+	}
+
+	qedf_free_global_queues(qedf);
+
+	if (qedf->global_queues)
+		kfree(qedf->global_queues);
+}
+
+/*
+ * PCI driver functions
+ */
+
+static const struct pci_device_id qedf_pci_tbl[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, 0x165c) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, 0x8080) },
+	{0}
+};
+MODULE_DEVICE_TABLE(pci, qedf_pci_tbl);
+
+static struct pci_driver qedf_pci_driver = {
+	.name = QEDF_MODULE_NAME,
+	.id_table = qedf_pci_tbl,
+	.probe = qedf_probe,
+	.remove = qedf_remove,
+};
+
+static int __qedf_probe(struct pci_dev *pdev, int mode)
+{
+	int rc = -EINVAL;
+	struct fc_lport *lport;
+	struct qedf_ctx *qedf;
+	struct Scsi_Host *host;
+	bool is_vf = false;
+	struct qed_ll2_params params;
+	char host_buf[20];
+	struct qed_link_params link_params;
+	int status;
+	void *task_start, *task_end;
+	struct qed_slowpath_params slowpath_params;
+	struct qed_probe_params qed_params;
+	u16 tmp;
+
+	/*
+	 * When doing error recovery we didn't reap the lport so don't try
+	 * to reallocate it.
+	 */
+	if (mode != QEDF_MODE_RECOVERY) {
+		lport = libfc_host_alloc(&qedf_host_template,
+		    sizeof(struct qedf_ctx));
+
+		if (!lport) {
+			QEDF_ERR(NULL, "Could not allocate lport.\n");
+			rc = -ENOMEM;
+			goto err0;
+		}
+
+		/* Initialize qedf_ctx */
+		qedf = lport_priv(lport);
+		qedf->lport = lport;
+		qedf->ctlr.lp = lport;
+		qedf->pdev = pdev;
+		qedf->dbg_ctx.pdev = pdev;
+		qedf->dbg_ctx.host_no = lport->host->host_no;
+		spin_lock_init(&qedf->hba_lock);
+		INIT_LIST_HEAD(&qedf->fcports);
+		qedf->curr_conn_id = QEDF_MAX_SESSIONS - 1;
+		atomic_set(&qedf->num_offloads, 0);
+		qedf->stop_io_on_error = false;
+		pci_set_drvdata(pdev, qedf);
+
+		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_INFO,
+		   "QLogic FastLinQ FCoE Module qedf %s, "
+		   "FW %d.%d.%d.%d\n", QEDF_VERSION,
+		   FW_MAJOR_VERSION, FW_MINOR_VERSION, FW_REVISION_VERSION,
+		   FW_ENGINEERING_VERSION);
+	} else {
+		/* Init pointers during recovery */
+		qedf = pci_get_drvdata(pdev);
+		lport = qedf->lport;
+	}
+
+	host = lport->host;
+
+	/* Allocate mempool for qedf_io_work structs */
+	qedf->io_mempool = mempool_create_slab_pool(QEDF_IO_WORK_MIN,
+	    qedf_io_work_cache);
+	if (qedf->io_mempool == NULL) {
+		QEDF_ERR(&(qedf->dbg_ctx), "qedf->io_mempool is NULL.\n");
+		goto err1;
+	}
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_INFO, "qedf->io_mempool=%p.\n",
+	    qedf->io_mempool);
+
+	sprintf(host_buf, "qedf_%u_link",
+	    qedf->lport->host->host_no);
+	qedf->link_update_wq = create_singlethread_workqueue(host_buf);
+	INIT_DELAYED_WORK(&qedf->link_update, qedf_handle_link_update);
+	INIT_DELAYED_WORK(&qedf->link_recovery, qedf_link_recovery);
+
+	qedf->fipvlan_retries = qedf_fipvlan_retries;
+
+	/*
+	 * Common probe. Takes care of basic hardware init and pci_*
+	 * functions.
+	 */
+	memset(&qed_params, 0, sizeof(qed_params));
+	qed_params.protocol = QED_PROTOCOL_FCOE;
+	qed_params.dp_module = qedf_dp_module;
+	qed_params.dp_level = qedf_dp_level;
+	qed_params.is_vf = is_vf;
+	qedf->cdev = qed_ops->common->probe(pdev, &qed_params);
+	if (!qedf->cdev) {
+		rc = -ENODEV;
+		goto err1;
+	}
+
+	/* queue allocation code should come here
+	 * order should be
+	 * 	slowpath_start
+	 * 	status block allocation
+	 *	interrupt registration (to get min number of queues)
+	 *	set_fcoe_pf_param
+	 *	qed_sp_fcoe_func_start
+	 */
+	rc = qedf_set_fcoe_pf_param(qedf);
+	if (rc) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Cannot set fcoe pf param.\n");
+		goto err2;
+	}
+	qed_ops->common->update_pf_params(qedf->cdev, &qedf->pf_params);
+
+	/* Learn information crucial for qedf to progress */
+	rc = qed_ops->fill_dev_info(qedf->cdev, &qedf->dev_info);
+	if (rc) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Failed to dev info.\n");
+		goto err1;
+	}
+
+	/* Record BDQ producer doorbell addresses */
+	qedf->bdq_primary_prod = qedf->dev_info.primary_dbq_rq_addr;
+	qedf->bdq_secondary_prod = qedf->dev_info.secondary_bdq_rq_addr;
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC,
+	    "BDQ primary_prod=%p secondary_prod=%p.\n", qedf->bdq_primary_prod,
+	    qedf->bdq_secondary_prod);
+
+	qed_ops->register_ops(qedf->cdev, &qedf_cb_ops, qedf);
+
+	rc = qedf_prepare_sb(qedf);
+	if (rc) {
+
+		QEDF_ERR(&(qedf->dbg_ctx), "Cannot start slowpath.\n");
+		goto err2;
+	}
+
+	/* Start the Slowpath-process */
+	slowpath_params.int_mode = QED_INT_MODE_MSIX;
+	slowpath_params.drv_major = QEDF_DRIVER_MAJOR_VER;
+	slowpath_params.drv_minor = QEDF_DRIVER_MINOR_VER;
+	slowpath_params.drv_rev = QEDF_DRIVER_REV_VER;
+	slowpath_params.drv_eng = QEDF_DRIVER_ENG_VER;
+	memcpy(slowpath_params.name, "qedf", QED_DRV_VER_STR_SIZE);
+	rc = qed_ops->common->slowpath_start(qedf->cdev, &slowpath_params);
+	if (rc) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Cannot start slowpath.\n");
+		goto err2;
+	}
+
+	/*
+	 * update_pf_params needs to be called before and after slowpath
+	 * start
+	 */
+	qed_ops->common->update_pf_params(qedf->cdev, &qedf->pf_params);
+
+	/* Setup interrupts */
+	rc = qedf_setup_int(qedf);
+	if (rc)
+		goto err3;
+
+	rc = qed_ops->start(qedf->cdev, &qedf->tasks);
+	if (rc) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Cannot start FCoE function.\n");
+		goto err4;
+	}
+	task_start = qedf_get_task_mem(&qedf->tasks, 0);
+	task_end = qedf_get_task_mem(&qedf->tasks, MAX_TID_BLOCKS_FCOE - 1);
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC, "Task context start=%p, "
+		   "end=%p block_size=%u.\n", task_start, task_end,
+		   qedf->tasks.size);
+
+	/*
+	 * We need to write the number of BDs in the BDQ we've preallocated so
+	 * the f/w will do a prefetch and we'll get an unsolicited CQE when a
+	 * packet arrives.
+	 */
+	qedf->bdq_prod_idx = QEDF_BDQ_SIZE;
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC,
+	    "Writing %d to primary and secondary BDQ doorbell registers.\n",
+	    qedf->bdq_prod_idx);
+	writew(qedf->bdq_prod_idx, qedf->bdq_primary_prod);
+	tmp = readw(qedf->bdq_primary_prod);
+	writew(qedf->bdq_prod_idx, qedf->bdq_secondary_prod);
+	tmp = readw(qedf->bdq_secondary_prod);
+
+	qed_ops->common->set_power_state(qedf->cdev, PCI_D0);
+
+	/* Now that the dev_info struct has been filled in set the MAC
+	 * address
+	 */
+	ether_addr_copy(qedf->mac, qedf->dev_info.common.hw_mac);
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC, "MAC address is %pM.\n",
+		   qedf->mac);
+
+	/* Set the WWNN and WWPN based on the MAC address */
+	qedf->wwnn = fcoe_wwn_from_mac(qedf->mac, 1, 0);
+	qedf->wwpn = fcoe_wwn_from_mac(qedf->mac, 2, 0);
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC,  "WWNN=%016llx "
+		   "WWPN=%016llx.\n", qedf->wwnn, qedf->wwpn);
+
+	sprintf(host_buf, "host_%d", host->host_no);
+	qed_ops->common->set_id(qedf->cdev, host_buf, QEDF_VERSION);
+
+
+	/* Set xid max values */
+	qedf->max_scsi_xid = QEDF_MAX_SCSI_XID;
+	qedf->max_els_xid = QEDF_MAX_ELS_XID;
+
+	/* Allocate cmd mgr */
+	qedf->cmd_mgr = qedf_cmd_mgr_alloc(qedf);
+	if (!qedf->cmd_mgr) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Failed to allocate cmd mgr.\n");
+		goto err5;
+	}
+
+	if (mode != QEDF_MODE_RECOVERY) {
+		host->transportt = qedf_fc_transport_template;
+		host->can_queue = QEDF_MAX_ELS_XID;
+		host->max_lun = qedf_max_lun;
+		host->max_cmd_len = QEDF_MAX_CDB_LEN;
+		rc = scsi_add_host(host, &pdev->dev);
+		if (rc)
+			goto err6;
+	}
+
+	memset(&params, 0, sizeof(params));
+	params.mtu = 9000;
+	ether_addr_copy(params.ll2_mac_address, qedf->mac);
+
+	/* Start LL2 processing thread */
+	snprintf(host_buf, 20, "qedf_%d_ll2", host->host_no);
+	qedf->ll2_recv_wq =
+		create_singlethread_workqueue(host_buf);
+	if (!qedf->ll2_recv_wq) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Failed to LL2 workqueue.\n");
+		goto err7;
+	}
+
+#ifdef CONFIG_DEBUG_FS
+	qedf_dbg_host_init(&(qedf->dbg_ctx), &qedf_debugfs_ops,
+			    &qedf_dbg_fops);
+#endif
+
+	/* Start LL2 */
+	qed_ops->ll2->register_cb_ops(qedf->cdev, &qedf_ll2_cb_ops, qedf);
+	rc = qed_ops->ll2->start(qedf->cdev, &params);
+	if (rc) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Could not start Light L2.\n");
+		goto err7;
+	}
+	set_bit(QEDF_LL2_STARTED, &qedf->flags);
+
+	/* hw will be insterting vlan tag*/
+	qedf->vlan_hw_insert = 1;
+	qedf->vlan_id = 0;
+
+	/*
+	 * No need to setup fcoe_ctlr or fc_lport objects during recovery since
+	 * they were not reaped during the unload process.
+	 */
+	if (mode != QEDF_MODE_RECOVERY) {
+		/* Setup imbedded fcoe controller */
+		qedf_fcoe_ctlr_setup(qedf);
+
+		/* Setup lport */
+		rc = qedf_lport_setup(qedf);
+		if (rc) {
+			QEDF_ERR(&(qedf->dbg_ctx),
+			    "qedf_lport_setup failed.\n");
+			goto err7;
+		}
+	}
+
+	sprintf(host_buf, "qedf_%u_timer", qedf->lport->host->host_no);
+	qedf->timer_work_queue =
+		create_singlethread_workqueue(host_buf);
+	if (!qedf->timer_work_queue) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Failed to start timer "
+			  "workqueue.\n");
+		goto err7;
+	}
+
+	/* DPC workqueue is not reaped during recovery unload */
+	if (mode != QEDF_MODE_RECOVERY) {
+		sprintf(host_buf, "qedf_%u_dpc",
+		    qedf->lport->host->host_no);
+		qedf->dpc_wq = create_singlethread_workqueue(host_buf);
+	}
+
+	/*
+	 * GRC dump and sysfs parameters are not reaped during the recovery
+	 * unload process.
+	 */
+	if (mode != QEDF_MODE_RECOVERY) {
+		qedf->grcdump_size = qed_ops->common->dbg_grc_size(qedf->cdev);
+		if (qedf->grcdump_size) {
+			rc = qedf_alloc_grc_dump_buf(&qedf->grcdump,
+			    qedf->grcdump_size);
+			if (rc) {
+				QEDF_ERR(&(qedf->dbg_ctx),
+				    "GRC Dump buffer alloc failed.\n");
+				qedf->grcdump = NULL;
+			}
+
+			QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC,
+			    "grcdump: addr=%p, size=%u.\n",
+			    qedf->grcdump, qedf->grcdump_size);
+		}
+		qedf_create_sysfs_ctx_attr(qedf);
+
+		/* Initialize I/O tracing for this adapter */
+		spin_lock_init(&qedf->io_trace_lock);
+		qedf->io_trace_idx = 0;
+	}
+
+	init_completion(&qedf->flogi_compl);
+
+	memset(&link_params, 0, sizeof(struct qed_link_params));
+	link_params.link_up = true;
+	status = qed_ops->common->set_link(qedf->cdev, &link_params);
+	if (status)
+		QEDF_WARN(&(qedf->dbg_ctx), "set_link failed.\n");
+
+	/* Start/restart discovery */
+	if (mode == QEDF_MODE_RECOVERY)
+		fcoe_ctlr_link_up(&qedf->ctlr);
+	else
+		fc_fabric_login(lport);
+
+	/* All good */
+	return 0;
+
+err7:
+	if (qedf->ll2_recv_wq)
+		destroy_workqueue(qedf->ll2_recv_wq);
+	fc_remove_host(qedf->lport->host);
+	scsi_remove_host(qedf->lport->host);
+#ifdef CONFIG_DEBUG_FS
+	qedf_dbg_host_exit(&(qedf->dbg_ctx));
+#endif
+err6:
+	qedf_cmd_mgr_free(qedf->cmd_mgr);
+err5:
+	qed_ops->stop(qedf->cdev);
+err4:
+	qedf_free_fcoe_pf_param(qedf);
+	qedf_sync_free_irqs(qedf);
+err3:
+	qed_ops->common->slowpath_stop(qedf->cdev);
+err2:
+	qed_ops->common->remove(qedf->cdev);
+err1:
+	scsi_host_put(lport->host);
+err0:
+	return rc;
+}
+
+static int qedf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	return __qedf_probe(pdev, QEDF_MODE_NORMAL);
+}
+
+static void __qedf_remove(struct pci_dev *pdev, int mode)
+{
+	struct qedf_ctx *qedf;
+
+	if (!pdev) {
+		QEDF_ERR(NULL, "pdev is NULL.\n");
+		return;
+	}
+
+	qedf = pci_get_drvdata(pdev);
+
+	/*
+	 * Prevent race where we're in board disable work and then try to
+	 * rmmod the module.
+	 */
+	if (test_bit(QEDF_UNLOADING, &qedf->flags)) {
+		QEDF_ERR(&qedf->dbg_ctx, "Already removing PCI function.\n");
+		return;
+	}
+
+	if (mode != QEDF_MODE_RECOVERY)
+		set_bit(QEDF_UNLOADING, &qedf->flags);
+
+	/* Logoff the fabric to upload all connections */
+	if (mode == QEDF_MODE_RECOVERY)
+		fcoe_ctlr_link_down(&qedf->ctlr);
+	else
+		fc_fabric_logoff(qedf->lport);
+	qedf_wait_for_upload(qedf);
+
+#ifdef CONFIG_DEBUG_FS
+	qedf_dbg_host_exit(&(qedf->dbg_ctx));
+#endif
+
+	/* Stop any link update handling */
+	cancel_delayed_work_sync(&qedf->link_update);
+	destroy_workqueue(qedf->link_update_wq);
+	qedf->link_update_wq = NULL;
+
+	if (qedf->timer_work_queue)
+		destroy_workqueue(qedf->timer_work_queue);
+
+	/* Stop Light L2 */
+	clear_bit(QEDF_LL2_STARTED, &qedf->flags);
+	qed_ops->ll2->stop(qedf->cdev);
+	if (qedf->ll2_recv_wq)
+		destroy_workqueue(qedf->ll2_recv_wq);
+
+	/* Stop fastpath */
+	qedf_sync_free_irqs(qedf);
+	qedf_destroy_sb(qedf);
+
+	/*
+	 * During recovery don't destroy OS constructs that represent the
+	 * physical port.
+	 */
+	if (mode != QEDF_MODE_RECOVERY) {
+		qedf_free_grc_dump_buf(&qedf->grcdump);
+		qedf_remove_sysfs_ctx_attr(qedf);
+
+		/* Remove all SCSI/libfc/libfcoe structures */
+		fcoe_ctlr_destroy(&qedf->ctlr);
+		fc_lport_destroy(qedf->lport);
+		fc_remove_host(qedf->lport->host);
+		scsi_remove_host(qedf->lport->host);
+	}
+
+	qedf_cmd_mgr_free(qedf->cmd_mgr);
+
+	if (mode != QEDF_MODE_RECOVERY) {
+		fc_exch_mgr_free(qedf->lport);
+		fc_lport_free_stats(qedf->lport);
+
+		/* Wait for all vports to be reaped */
+		qedf_wait_for_vport_destroy(qedf);
+	}
+
+	/*
+	 * Now that all connections have been uploaded we can stop the
+	 * rest of the qed operations
+	 */
+	qed_ops->stop(qedf->cdev);
+
+	if (mode != QEDF_MODE_RECOVERY) {
+		if (qedf->dpc_wq) {
+			/* Stop general DPC handling */
+			destroy_workqueue(qedf->dpc_wq);
+			qedf->dpc_wq = NULL;
+		}
+	}
+
+	/* Final shutdown for the board */
+	qedf_free_fcoe_pf_param(qedf);
+	if (mode != QEDF_MODE_RECOVERY) {
+		qed_ops->common->set_power_state(qedf->cdev, PCI_D0);
+		pci_set_drvdata(pdev, NULL);
+	}
+	qed_ops->common->slowpath_stop(qedf->cdev);
+	qed_ops->common->remove(qedf->cdev);
+
+	mempool_destroy(qedf->io_mempool);
+
+	/* Only reap the Scsi_host on a real removal */
+	if (mode != QEDF_MODE_RECOVERY)
+		scsi_host_put(qedf->lport->host);
+}
+
+static void qedf_remove(struct pci_dev *pdev)
+{
+	/* Check to make sure this function wasn't already disabled */
+	if (!atomic_read(&pdev->enable_cnt))
+		return;
+
+	__qedf_remove(pdev, QEDF_MODE_NORMAL);
+}
+
+/*
+ * Module Init/Remove
+ */
+
+static int __init qedf_init(void)
+{
+	int ret;
+
+	/* If debug=1 passed, set the default log mask */
+	if (qedf_debug == QEDF_LOG_DEFAULT)
+		qedf_debug = QEDF_DEFAULT_LOG_MASK;
+
+	/* Print driver banner */
+	QEDF_INFO(NULL, QEDF_LOG_INFO, "%s v%s.\n", QEDF_DESCR,
+		   QEDF_VERSION);
+
+	/* Create kmem_cache for qedf_io_work structs */
+	qedf_io_work_cache = kmem_cache_create("qedf_io_work_cache",
+	    sizeof(struct qedf_io_work), 0, SLAB_HWCACHE_ALIGN, NULL);
+	if (qedf_io_work_cache == NULL) {
+		QEDF_ERR(NULL, "qedf_io_work_cache is NULL.\n");
+		goto err1;
+	}
+	QEDF_INFO(NULL, QEDF_LOG_DISC, "qedf_io_work_cache=%p.\n",
+	    qedf_io_work_cache);
+
+	qed_ops = qed_get_fcoe_ops();
+	if (!qed_ops) {
+		QEDF_ERR(NULL, "Failed to get qed fcoe operations\n");
+		goto err1;
+	}
+
+#ifdef CONFIG_DEBUG_FS
+	qedf_dbg_init("qedf");
+#endif
+
+	qedf_fc_transport_template =
+	    fc_attach_transport(&qedf_fc_transport_fn);
+	if (!qedf_fc_transport_template) {
+		QEDF_ERR(NULL, "Could not register with FC transport\n");
+		goto err2;
+	}
+
+	qedf_fc_vport_transport_template =
+		fc_attach_transport(&qedf_fc_vport_transport_fn);
+	if (!qedf_fc_vport_transport_template) {
+		QEDF_ERR(NULL, "Could not register vport template with FC "
+			  "transport\n");
+		goto err3;
+	}
+
+	qedf_io_wq = create_workqueue("qedf_io_wq");
+	if (!qedf_io_wq) {
+		QEDF_ERR(NULL, "Could not create qedf_io_wq.\n");
+		goto err4;
+	}
+
+	qedf_cb_ops.get_login_failures = qedf_get_login_failures;
+
+	ret = pci_register_driver(&qedf_pci_driver);
+	if (ret) {
+		QEDF_ERR(NULL, "Failed to register driver\n");
+		goto err5;
+	}
+
+	return 0;
+
+err5:
+	destroy_workqueue(qedf_io_wq);
+err4:
+	fc_release_transport(qedf_fc_vport_transport_template);
+err3:
+	fc_release_transport(qedf_fc_transport_template);
+err2:
+#ifdef CONFIG_DEBUG_FS
+	qedf_dbg_exit();
+#endif
+	qed_put_fcoe_ops();
+err1:
+	return -EINVAL;
+}
+
+static void __exit qedf_cleanup(void)
+{
+	pci_unregister_driver(&qedf_pci_driver);
+
+	destroy_workqueue(qedf_io_wq);
+
+	fc_release_transport(qedf_fc_vport_transport_template);
+	fc_release_transport(qedf_fc_transport_template);
+#ifdef CONFIG_DEBUG_FS
+	qedf_dbg_exit();
+#endif
+	qed_put_fcoe_ops();
+
+	kmem_cache_destroy(qedf_io_work_cache);
+}
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("QLogic QEDF 25/40/50/100Gb FCoE Driver");
+MODULE_AUTHOR("QLogic Corporation");
+MODULE_VERSION(QEDF_VERSION);
+module_init(qedf_init);
+module_exit(qedf_cleanup);
diff --git a/drivers/scsi/qedf/qedf_version.h b/drivers/scsi/qedf/qedf_version.h
new file mode 100644
index 00000000000000..4ae5f537a4403a
--- /dev/null
+++ b/drivers/scsi/qedf/qedf_version.h
@@ -0,0 +1,15 @@
+/*
+ *  QLogic FCoE Offload Driver
+ *  Copyright (c) 2016 Cavium Inc.
+ *
+ *  This software is available under the terms of the GNU General Public License
+ *  (GPL) Version 2, available from the file COPYING in the main directory of
+ *  this source tree.
+ */
+
+#define QEDF_VERSION		"8.10.7.0"
+#define QEDF_DRIVER_MAJOR_VER		8
+#define QEDF_DRIVER_MINOR_VER		10
+#define QEDF_DRIVER_REV_VER		7
+#define QEDF_DRIVER_ENG_VER		0
+

From 8bfcd1bf867d5fae67ecc553b44d7a29ab66013c Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Wed, 8 Feb 2017 16:24:24 +0000
Subject: [PATCH 0167/1911] scsi: sd: make sd_devt_release() static

Fixes the following sparse warning:

drivers/scsi/sd.c:3087:6: warning:
 symbol 'sd_devt_release' was not declared. Should it be static?

Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/sd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index cb6e68dd6df09d..b9b32f5811e074 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -3084,7 +3084,7 @@ struct sd_devt {
 	struct disk_devt disk_devt;
 };
 
-void sd_devt_release(struct disk_devt *disk_devt)
+static void sd_devt_release(struct disk_devt *disk_devt)
 {
 	struct sd_devt *sd_devt = container_of(disk_devt, struct sd_devt,
 			disk_devt);

From f170396c14243d307e2f479af142fd7356b3bd2a Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 3 Feb 2017 19:38:54 +0000
Subject: [PATCH 0168/1911] scsi: fix memory leak of sdpk on when gd fails to
 allocate

On an allocation failure of gd, the current exit path is via
out_free_devt which leaves sdpk still allocated and hence it gets
leaked. Fix this by correcting the order of resource free'ing with a
change in the error exit path labels.

Detected by CoverityScan, CID#1399519 ("Resource Leak")

Fixes: 0dba1314d4f81115dc ("scsi, block: fix duplicate bdi name registration crashes")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Reviewed-by: Bart Van Assche <Bart.VanAssche@sandisk.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/sd.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index b9b32f5811e074..8e4f4f72ea6447 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -3213,10 +3213,10 @@ static int sd_probe(struct device *dev)
 	sd_devt = NULL;
  out_put:
 	put_disk(gd);
- out_free:
-	kfree(sdkp);
  out_free_devt:
 	kfree(sd_devt);
+ out_free:
+	kfree(sdkp);
  out:
 	scsi_autopm_put_device(sdp);
 	return error;

From 4d7d39a18b8b81511f0b893b7d2203790bf8a58b Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Tue, 21 Feb 2017 21:46:37 +0300
Subject: [PATCH 0169/1911] scsi: scsi_dh_emc: return success in
 clariion_std_inquiry()

We accidentally return an uninitialized variable on success.

Fixes: b6ff1b14cdf4 ("[SCSI] scsi_dh: Update EMC handler")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/device_handler/scsi_dh_emc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/device_handler/scsi_dh_emc.c b/drivers/scsi/device_handler/scsi_dh_emc.c
index 4a7679f6c73da0..f1578832ec7ad9 100644
--- a/drivers/scsi/device_handler/scsi_dh_emc.c
+++ b/drivers/scsi/device_handler/scsi_dh_emc.c
@@ -358,7 +358,7 @@ static int clariion_prep_fn(struct scsi_device *sdev, struct request *req)
 static int clariion_std_inquiry(struct scsi_device *sdev,
 				struct clariion_dh_data *csdev)
 {
-	int err;
+	int err = SCSI_DH_OK;
 	char *sp_model;
 
 	sp_model = parse_sp_model(sdev, sdev->inquiry);

From 148cff67b401e2229c076c0ea418712654be77e4 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 14 Feb 2017 20:15:55 +0100
Subject: [PATCH 0170/1911] scsi: always zero sshdr in scsi_normalize_sense

This gives us a clear state even if a command didn't return sense data.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/scsi_common.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/scsi_common.c b/drivers/scsi/scsi_common.c
index b1383a71400ead..a75673bb82b393 100644
--- a/drivers/scsi/scsi_common.c
+++ b/drivers/scsi/scsi_common.c
@@ -137,11 +137,11 @@ EXPORT_SYMBOL(int_to_scsilun);
 bool scsi_normalize_sense(const u8 *sense_buffer, int sb_len,
 			  struct scsi_sense_hdr *sshdr)
 {
+	memset(sshdr, 0, sizeof(struct scsi_sense_hdr));
+
 	if (!sense_buffer || !sb_len)
 		return false;
 
-	memset(sshdr, 0, sizeof(struct scsi_sense_hdr));
-
 	sshdr->response_code = (sense_buffer[0] & 0x7f);
 
 	if (!scsi_sense_valid(sshdr))

From 6fa2b8f9e3ae8dd770d8fa264825f86b146381c8 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 14 Feb 2017 20:15:56 +0100
Subject: [PATCH 0171/1911] scsi: sd: improve TUR handling in sd_check_events

Remove bogus evaluations of retval and sshdr when the device is offline,
and fix a possible NULL pointer dereference by allocating the 8 byte
sized sense header on stack.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/sd.c | 25 +++++++++++--------------
 1 file changed, 11 insertions(+), 14 deletions(-)

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 8e4f4f72ea6447..be535d4215c403 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1425,7 +1425,6 @@ static unsigned int sd_check_events(struct gendisk *disk, unsigned int clearing)
 {
 	struct scsi_disk *sdkp = scsi_disk_get(disk);
 	struct scsi_device *sdp;
-	struct scsi_sense_hdr *sshdr = NULL;
 	int retval;
 
 	if (!sdkp)
@@ -1454,22 +1453,21 @@ static unsigned int sd_check_events(struct gendisk *disk, unsigned int clearing)
 	 * by sd_spinup_disk() from sd_revalidate_disk(), which happens whenever
 	 * sd_revalidate() is called.
 	 */
-	retval = -ENODEV;
-
 	if (scsi_block_when_processing_errors(sdp)) {
-		sshdr  = kzalloc(sizeof(*sshdr), GFP_KERNEL);
+		struct scsi_sense_hdr sshdr = { 0, };
+
 		retval = scsi_test_unit_ready(sdp, SD_TIMEOUT, SD_MAX_RETRIES,
-					      sshdr);
-	}
+					      &sshdr);
 
-	/* failed to execute TUR, assume media not present */
-	if (host_byte(retval)) {
-		set_media_not_present(sdkp);
-		goto out;
-	}
+		/* failed to execute TUR, assume media not present */
+		if (host_byte(retval)) {
+			set_media_not_present(sdkp);
+			goto out;
+		}
 
-	if (media_not_present(sdkp, sshdr))
-		goto out;
+		if (media_not_present(sdkp, &sshdr))
+			goto out;
+	}
 
 	/*
 	 * For removable scsi disk we have to recognise the presence
@@ -1485,7 +1483,6 @@ static unsigned int sd_check_events(struct gendisk *disk, unsigned int clearing)
 	 *	Medium present state has changed in either direction.
 	 *	Device has indicated UNIT_ATTENTION.
 	 */
-	kfree(sshdr);
 	retval = sdp->changed ? DISK_EVENT_MEDIA_CHANGE : 0;
 	sdp->changed = 0;
 	scsi_disk_put(sdkp);

From 74a78ebda4fa5c5ae87fc501152863ee31c17da4 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 14 Feb 2017 20:15:57 +0100
Subject: [PATCH 0172/1911] scsi: make the sense header argument to
 scsi_test_unit_ready mandatory

It's a tiny structure that can be allocated on the stack, don't
complicate the code by making it optional.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/osd/osd_uld.c |  3 ++-
 drivers/scsi/scsi_ioctl.c  |  3 ++-
 drivers/scsi/scsi_lib.c    | 14 ++------------
 3 files changed, 6 insertions(+), 14 deletions(-)

diff --git a/drivers/scsi/osd/osd_uld.c b/drivers/scsi/osd/osd_uld.c
index 243eab3d10d094..e0ce5d2fd14d29 100644
--- a/drivers/scsi/osd/osd_uld.c
+++ b/drivers/scsi/osd/osd_uld.c
@@ -372,6 +372,7 @@ EXPORT_SYMBOL(osduld_device_same);
 static int __detect_osd(struct osd_uld_device *oud)
 {
 	struct scsi_device *scsi_device = oud->od.scsi_device;
+	struct scsi_sense_hdr sense_hdr;
 	char caps[OSD_CAP_LEN];
 	int error;
 
@@ -380,7 +381,7 @@ static int __detect_osd(struct osd_uld_device *oud)
 	 */
 	OSD_DEBUG("start scsi_test_unit_ready %p %p %p\n",
 			oud, scsi_device, scsi_device->request_queue);
-	error = scsi_test_unit_ready(scsi_device, 10*HZ, 5, NULL);
+	error = scsi_test_unit_ready(scsi_device, 10*HZ, 5, &sense_hdr);
 	if (error)
 		OSD_ERR("warning: scsi_test_unit_ready failed\n");
 
diff --git a/drivers/scsi/scsi_ioctl.c b/drivers/scsi/scsi_ioctl.c
index 8b8c814df5c75d..b6bf3f29a12a4e 100644
--- a/drivers/scsi/scsi_ioctl.c
+++ b/drivers/scsi/scsi_ioctl.c
@@ -199,6 +199,7 @@ static int scsi_ioctl_get_pci(struct scsi_device *sdev, void __user *arg)
 int scsi_ioctl(struct scsi_device *sdev, int cmd, void __user *arg)
 {
 	char scsi_cmd[MAX_COMMAND_SIZE];
+	struct scsi_sense_hdr sense_hdr;
 
 	/* Check for deprecated ioctls ... all the ioctls which don't
 	 * follow the new unique numbering scheme are deprecated */
@@ -243,7 +244,7 @@ int scsi_ioctl(struct scsi_device *sdev, int cmd, void __user *arg)
 		return scsi_set_medium_removal(sdev, SCSI_REMOVAL_ALLOW);
 	case SCSI_IOCTL_TEST_UNIT_READY:
 		return scsi_test_unit_ready(sdev, IOCTL_NORMAL_TIMEOUT,
-					    NORMAL_RETRIES, NULL);
+					    NORMAL_RETRIES, &sense_hdr);
 	case SCSI_IOCTL_START_UNIT:
 		scsi_cmd[0] = START_STOP;
 		scsi_cmd[1] = 0;
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 39b2d727f033ef..a3c7ec1795c9e4 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -2520,28 +2520,20 @@ EXPORT_SYMBOL(scsi_mode_sense);
  *	@sdev:	scsi device to change the state of.
  *	@timeout: command timeout
  *	@retries: number of retries before failing
- *	@sshdr_external: Optional pointer to struct scsi_sense_hdr for
- *		returning sense. Make sure that this is cleared before passing
- *		in.
+ *	@sshdr: outpout pointer for decoded sense information.
  *
  *	Returns zero if unsuccessful or an error if TUR failed.  For
  *	removable media, UNIT_ATTENTION sets ->changed flag.
  **/
 int
 scsi_test_unit_ready(struct scsi_device *sdev, int timeout, int retries,
-		     struct scsi_sense_hdr *sshdr_external)
+		     struct scsi_sense_hdr *sshdr)
 {
 	char cmd[] = {
 		TEST_UNIT_READY, 0, 0, 0, 0, 0,
 	};
-	struct scsi_sense_hdr *sshdr;
 	int result;
 
-	if (!sshdr_external)
-		sshdr = kzalloc(sizeof(*sshdr), GFP_KERNEL);
-	else
-		sshdr = sshdr_external;
-
 	/* try to eat the UNIT_ATTENTION if there are enough retries */
 	do {
 		result = scsi_execute_req(sdev, cmd, DMA_NONE, NULL, 0, sshdr,
@@ -2552,8 +2544,6 @@ scsi_test_unit_ready(struct scsi_device *sdev, int timeout, int retries,
 	} while (scsi_sense_valid(sshdr) &&
 		 sshdr->sense_key == UNIT_ATTENTION && --retries);
 
-	if (!sshdr_external)
-		kfree(sshdr);
 	return result;
 }
 EXPORT_SYMBOL(scsi_test_unit_ready);

From 3949e2f04262495f6c8795b148aa26dffccba646 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 14 Feb 2017 20:15:58 +0100
Subject: [PATCH 0173/1911] scsi: simplify scsi_execute_req_flags

Add a sshdr argument to __scsi_execute so that we can decode the sense
data directly into the sense header instead of needing a copy of it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/scsi_lib.c | 27 +++++++++------------------
 1 file changed, 9 insertions(+), 18 deletions(-)

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index a3c7ec1795c9e4..5f661486cf6ea2 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -215,8 +215,9 @@ void scsi_queue_insert(struct scsi_cmnd *cmd, int reason)
 
 static int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
 		 int data_direction, void *buffer, unsigned bufflen,
-		 unsigned char *sense, int timeout, int retries, u64 flags,
-		 req_flags_t rq_flags, int *resid)
+		 unsigned char *sense, struct scsi_sense_hdr *sshdr,
+		 int timeout, int retries, u64 flags, req_flags_t rq_flags,
+		 int *resid)
 {
 	struct request *req;
 	struct scsi_request *rq;
@@ -259,6 +260,8 @@ static int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
 		*resid = rq->resid_len;
 	if (sense && rq->sense_len)
 		memcpy(sense, rq->sense, SCSI_SENSE_BUFFERSIZE);
+	if (sshdr)
+		scsi_normalize_sense(rq->sense, rq->sense_len, sshdr);
 	ret = req->errors;
  out:
 	blk_put_request(req);
@@ -288,7 +291,7 @@ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
 		 int *resid)
 {
 	return __scsi_execute(sdev, cmd, data_direction, buffer, bufflen, sense,
-			timeout, retries, flags, 0, resid);
+			NULL, timeout, retries, flags, 0, resid);
 }
 EXPORT_SYMBOL(scsi_execute);
 
@@ -297,21 +300,9 @@ int scsi_execute_req_flags(struct scsi_device *sdev, const unsigned char *cmd,
 		     struct scsi_sense_hdr *sshdr, int timeout, int retries,
 		     int *resid, u64 flags, req_flags_t rq_flags)
 {
-	char *sense = NULL;
-	int result;
-	
-	if (sshdr) {
-		sense = kzalloc(SCSI_SENSE_BUFFERSIZE, GFP_NOIO);
-		if (!sense)
-			return DRIVER_ERROR << 24;
-	}
-	result = __scsi_execute(sdev, cmd, data_direction, buffer, bufflen,
-			      sense, timeout, retries, flags, rq_flags, resid);
-	if (sshdr)
-		scsi_normalize_sense(sense, SCSI_SENSE_BUFFERSIZE, sshdr);
-
-	kfree(sense);
-	return result;
+	return __scsi_execute(sdev, cmd, data_direction, buffer, bufflen,
+			      NULL, sshdr, timeout, retries, flags, rq_flags,
+			      resid);
 }
 EXPORT_SYMBOL(scsi_execute_req_flags);
 

From a33fc7a0482a40068c022aefcefd50f9f0f44f87 Mon Sep 17 00:00:00 2001
From: Min He <min.he@intel.com>
Date: Fri, 17 Feb 2017 16:42:38 +0800
Subject: [PATCH 0174/1911] drm/i915/gvt: enter failsafe mode when guest
 requires more resources

Windows guest will notitfy GVT-g to request more resources through g2v
interface, when its resources are not enough.
This patch is to handle this case and let vgpu enter failsafe mode to
avoid too many error messages.

Signed-off-by: Min He <min.he@intel.com>
Signed-off-by: Pei Zhang <pei.zhang@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/gvt.h      | 1 +
 drivers/gpu/drm/i915/gvt/handlers.c | 5 +++++
 2 files changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
index 48ef0771d772c8..a41b322d8957dd 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.h
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@@ -453,6 +453,7 @@ struct intel_gvt_ops {
 
 enum {
 	GVT_FAILSAFE_UNSUPPORTED_GUEST,
+	GVT_FAILSAFE_INSUFFICIENT_RESOURCE,
 };
 
 #include "mpt.h"
diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index fd7e789a72c317..e1a3826451123f 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -157,6 +157,8 @@ static void enter_failsafe_mode(struct intel_vgpu *vgpu, int reason)
 	case GVT_FAILSAFE_UNSUPPORTED_GUEST:
 		pr_err("Detected your guest driver doesn't support GVT-g.\n");
 		break;
+	case GVT_FAILSAFE_INSUFFICIENT_RESOURCE:
+		pr_err("Graphics resource is not enough for the guest\n");
 	default:
 		break;
 	}
@@ -1106,6 +1108,9 @@ static int pvinfo_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
 	case _vgtif_reg(execlist_context_descriptor_lo):
 	case _vgtif_reg(execlist_context_descriptor_hi):
 		break;
+	case _vgtif_reg(rsv5[0])..._vgtif_reg(rsv5[3]):
+		enter_failsafe_mode(vgpu, GVT_FAILSAFE_INSUFFICIENT_RESOURCE);
+		break;
 	default:
 		gvt_err("invalid pvinfo write offset %x bytes %x data %x\n",
 				offset, bytes, data);

From 9272f73f79bd780502134f227fa52fd280ecda17 Mon Sep 17 00:00:00 2001
From: Chuanxiao Dong <chuanxiao.dong@intel.com>
Date: Fri, 17 Feb 2017 19:29:52 +0800
Subject: [PATCH 0175/1911] drm/i915/gvt: add a NULL pointer check to avoid
 kernel panic

Due to the request replay, context switch interrupt may come after
gvt free the workload thus can cause a kernel NULL pointer kernel
panic. This patch will add a simple check to avoid this for a short
term.

From long term, gvt workload lifecycle doesn't match with i915 request
and need to find a proper way to manage this.

v4: simplify the NULL pointer check.
v5: add unlikely to optimize.

Signed-off-by: Chuanxiao Dong <chuanxiao.dong@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/scheduler.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index d6b6d0efdd1aee..e355a82ccabd64 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -139,6 +139,9 @@ static int shadow_context_status_change(struct notifier_block *nb,
 	struct intel_vgpu_workload *workload =
 		scheduler->current_workload[req->engine->id];
 
+	if (unlikely(!workload))
+		return NOTIFY_OK;
+
 	switch (action) {
 	case INTEL_CONTEXT_SCHEDULE_IN:
 		intel_gvt_load_render_mmio(workload->vgpu,

From 593e59b4b941910e4f7ba87d3c02d63e38e960d0 Mon Sep 17 00:00:00 2001
From: Zhao Yan <yan.y.zhao@intel.com>
Date: Mon, 20 Feb 2017 15:51:13 +0800
Subject: [PATCH 0176/1911] drm/i915/gvt: fix unhandled mmio warnings

some registers were missing or treated as BDW only. This patch is to fix it
avoid unhandled mmio wanrings

v2: update commit message according to zhenyu's comment

Signed-off-by: Zhao Yan <yan.y.zhao@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/handlers.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index e1a3826451123f..e1abece853d4ad 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -1519,6 +1519,8 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
 	MMIO_DFH(GEN7_GT_MODE, D_ALL, F_MODE_MASK, NULL, NULL);
 	MMIO_DFH(CACHE_MODE_0_GEN7, D_ALL, F_MODE_MASK, NULL, NULL);
 	MMIO_DFH(CACHE_MODE_1, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(CACHE_MODE_0, D_ALL, F_MODE_MASK, NULL, NULL);
+	MMIO_DFH(0x2124, D_ALL, F_MODE_MASK, NULL, NULL);
 
 	MMIO_DFH(0x20dc, D_ALL, F_MODE_MASK, NULL, NULL);
 	MMIO_DFH(_3D_CHICKEN3, D_ALL, F_MODE_MASK, NULL, NULL);
@@ -2390,9 +2392,9 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt)
 
 	MMIO_DFH(HDC_CHICKEN0, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
 
-	MMIO_D(CHICKEN_PIPESL_1(PIPE_A), D_BDW);
-	MMIO_D(CHICKEN_PIPESL_1(PIPE_B), D_BDW);
-	MMIO_D(CHICKEN_PIPESL_1(PIPE_C), D_BDW);
+	MMIO_D(CHICKEN_PIPESL_1(PIPE_A), D_BDW_PLUS);
+	MMIO_D(CHICKEN_PIPESL_1(PIPE_B), D_BDW_PLUS);
+	MMIO_D(CHICKEN_PIPESL_1(PIPE_C), D_BDW_PLUS);
 
 	MMIO_D(WM_MISC, D_BDW);
 	MMIO_D(BDW_EDP_PSR_BASE, D_BDW);
@@ -2406,7 +2408,7 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt)
 	MMIO_D(GEN8_EU_DISABLE1, D_BDW_PLUS);
 	MMIO_D(GEN8_EU_DISABLE2, D_BDW_PLUS);
 
-	MMIO_D(0xfdc, D_BDW);
+	MMIO_D(0xfdc, D_BDW_PLUS);
 	MMIO_DFH(GEN8_ROW_CHICKEN, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
 	MMIO_D(GEN7_ROW_CHICKEN2, D_BDW_PLUS);
 	MMIO_D(GEN8_UCGCTL6, D_BDW_PLUS);
@@ -2423,6 +2425,10 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt)
 	MMIO_DFH(0x24d8, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
 	MMIO_DFH(0x24dc, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
 
+	MMIO_D(0x22040, D_BDW_PLUS);
+	MMIO_D(0x44484, D_BDW_PLUS);
+	MMIO_D(0x4448c, D_BDW_PLUS);
+
 	MMIO_D(0x83a4, D_BDW);
 	MMIO_D(GEN8_L3_LRA_1_GPGPU, D_BDW_PLUS);
 
@@ -2668,6 +2674,7 @@ static int init_skl_mmio_info(struct intel_gvt *gvt)
 	MMIO_D(_PLANE_KEYMSK_1(PIPE_C), D_SKL);
 
 	MMIO_D(0x44500, D_SKL);
+	MMIO_D(GEN9_CSFE_CHICKEN1_RCS, D_SKL_PLUS);
 	return 0;
 }
 

From d8e9b2b9097c117880dc22933239d05199c60b96 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 20 Feb 2017 14:58:25 +0100
Subject: [PATCH 0177/1911] drm/i915/gvt: Fix superfluous newline in
 GVT_DISPLAY_READY env var

send_display_send_uevent() sends two environment variable, and the
first one GVT_DISPLAY_READY is set including a new line at the end of
the string; that is obviously superfluous and wrong -- at least, it
*looks* so when you only read the code.

However, it doesn't appear in the actual output by a (supposedly
unexpected) trick.  The code uses snprintf() and truncates the string
in size 20 bytes.  This makes the string as GVT_DISPLAY_READY=0 or
...=1 including the trailing NUL-letter.  That is, the '\n' found in
the format string is always cut off as a result.

Although the code gives the correct result, it is confusing.  This
patch addresses it, just removing the superfluous '\n' from the format
string for avoiding further confusion.  If the argument "ready" were
not a  bool, the size 20 should be corrected as well.  But it's a
bool, so we can leave the magic number 20 as is for now.

FWIW, the bug was spotted by a new GCC7 warning:
  drivers/gpu/drm/i915/gvt/handlers.c: In function 'pvinfo_mmio_write':
  drivers/gpu/drm/i915/gvt/handlers.c:1042:34: error: 'snprintf' output truncated before the last format character [-Werror=format-truncation=]
    snprintf(display_ready_str, 20, "GVT_DISPLAY_READY=%d\n", ready);
                                    ^~~~~~~~~~~~~~~~~~~~~~~~
  drivers/gpu/drm/i915/gvt/handlers.c:1042:2: note: 'snprintf' output 21 bytes into a destination of size 20
    snprintf(display_ready_str, 20, "GVT_DISPLAY_READY=%d\n", ready);
    ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Fixes: 04d348ae3f0a ("drm/i915/gvt: vGPU display virtualization")
Bugzilla: https://bugzilla.suse.com/show_bug.cgi?id=1025903
Reported-by: Richard Biener <rguenther@suse.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/handlers.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index e1abece853d4ad..a1880424ad3267 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -1069,7 +1069,7 @@ static int send_display_ready_uevent(struct intel_vgpu *vgpu, int ready)
 	char vmid_str[20];
 	char display_ready_str[20];
 
-	snprintf(display_ready_str, 20, "GVT_DISPLAY_READY=%d\n", ready);
+	snprintf(display_ready_str, 20, "GVT_DISPLAY_READY=%d", ready);
 	env[0] = display_ready_str;
 
 	snprintf(vmid_str, 20, "VMID=%d", vgpu->id);

From 4c4b22abb3d405c5c194b02255eecc1a9eff42cf Mon Sep 17 00:00:00 2001
From: Zhao Yan <yan.y.zhao@intel.com>
Date: Tue, 21 Feb 2017 09:39:00 +0800
Subject: [PATCH 0178/1911] drm/i915/gvt: add more registers to context
 save/restore list

the value of those registers should be applied to hardware on
context restoring

Signed-off-by: Zhao Yan <yan.y.zhao@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/render.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/drivers/gpu/drm/i915/gvt/render.c b/drivers/gpu/drm/i915/gvt/render.c
index 2b3a642284b6da..73f052a4f4244d 100644
--- a/drivers/gpu/drm/i915/gvt/render.c
+++ b/drivers/gpu/drm/i915/gvt/render.c
@@ -53,6 +53,14 @@ static struct render_mmio gen8_render_mmio_list[] = {
 	{RCS, _MMIO(0x24d4), 0, false},
 	{RCS, _MMIO(0x24d8), 0, false},
 	{RCS, _MMIO(0x24dc), 0, false},
+	{RCS, _MMIO(0x24e0), 0, false},
+	{RCS, _MMIO(0x24e4), 0, false},
+	{RCS, _MMIO(0x24e8), 0, false},
+	{RCS, _MMIO(0x24ec), 0, false},
+	{RCS, _MMIO(0x24f0), 0, false},
+	{RCS, _MMIO(0x24f4), 0, false},
+	{RCS, _MMIO(0x24f8), 0, false},
+	{RCS, _MMIO(0x24fc), 0, false},
 	{RCS, _MMIO(0x7004), 0xffff, true},
 	{RCS, _MMIO(0x7008), 0xffff, true},
 	{RCS, _MMIO(0x7000), 0xffff, true},
@@ -76,6 +84,14 @@ static struct render_mmio gen9_render_mmio_list[] = {
 	{RCS, _MMIO(0x24d4), 0, false},
 	{RCS, _MMIO(0x24d8), 0, false},
 	{RCS, _MMIO(0x24dc), 0, false},
+	{RCS, _MMIO(0x24e0), 0, false},
+	{RCS, _MMIO(0x24e4), 0, false},
+	{RCS, _MMIO(0x24e8), 0, false},
+	{RCS, _MMIO(0x24ec), 0, false},
+	{RCS, _MMIO(0x24f0), 0, false},
+	{RCS, _MMIO(0x24f4), 0, false},
+	{RCS, _MMIO(0x24f8), 0, false},
+	{RCS, _MMIO(0x24fc), 0, false},
 	{RCS, _MMIO(0x7004), 0xffff, true},
 	{RCS, _MMIO(0x7008), 0xffff, true},
 	{RCS, _MMIO(0x7000), 0xffff, true},

From e6cedfea6b8dcb205f75ea632570f52d2ffd1251 Mon Sep 17 00:00:00 2001
From: Zhao Yan <yan.y.zhao@intel.com>
Date: Tue, 21 Feb 2017 10:38:53 +0800
Subject: [PATCH 0179/1911] drm/i915/gvt: force-nopriv register handling

add a whitelist to check the content of force-nonpriv registers

v3:
per He Min's comment, modify in_whitelist()'s return type to bool, and use
negative value as the return value for failure for force_nonpriv_write().

v2:
1. split a big patch into two smaller ones per zhenyu's comment.
this patch is the mmio handling part for force-nopriv registers

2. per zhenyu's comment, combine all non-priv registers into a single
MMIO_DFH entry

Signed-off-by: Zhao Yan <yan.y.zhao@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/handlers.c | 74 +++++++++++++++++++++++++++--
 1 file changed, 70 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index a1880424ad3267..af0c0d1ec9a8a3 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -398,6 +398,74 @@ static int pipeconf_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
 	return 0;
 }
 
+/* ascendingly sorted */
+static i915_reg_t force_nonpriv_white_list[] = {
+	GEN9_CS_DEBUG_MODE1, //_MMIO(0x20ec)
+	GEN9_CTX_PREEMPT_REG,//_MMIO(0x2248)
+	GEN8_CS_CHICKEN1,//_MMIO(0x2580)
+	_MMIO(0x2690),
+	_MMIO(0x2694),
+	_MMIO(0x2698),
+	_MMIO(0x4de0),
+	_MMIO(0x4de4),
+	_MMIO(0x4dfc),
+	GEN7_COMMON_SLICE_CHICKEN1,//_MMIO(0x7010)
+	_MMIO(0x7014),
+	HDC_CHICKEN0,//_MMIO(0x7300)
+	GEN8_HDC_CHICKEN1,//_MMIO(0x7304)
+	_MMIO(0x7700),
+	_MMIO(0x7704),
+	_MMIO(0x7708),
+	_MMIO(0x770c),
+	_MMIO(0xb110),
+	GEN8_L3SQCREG4,//_MMIO(0xb118)
+	_MMIO(0xe100),
+	_MMIO(0xe18c),
+	_MMIO(0xe48c),
+	_MMIO(0xe5f4),
+};
+
+/* a simple bsearch */
+static inline bool in_whitelist(unsigned int reg)
+{
+	int left = 0, right = ARRAY_SIZE(force_nonpriv_white_list);
+	i915_reg_t *array = force_nonpriv_white_list;
+
+	while (left < right) {
+		int mid = (left + right)/2;
+
+		if (reg > array[mid].reg)
+			left = mid + 1;
+		else if (reg < array[mid].reg)
+			right = mid;
+		else
+			return true;
+	}
+	return false;
+}
+
+static int force_nonpriv_write(struct intel_vgpu *vgpu,
+	unsigned int offset, void *p_data, unsigned int bytes)
+{
+	u32 reg_nonpriv = *(u32 *)p_data;
+	int ret = -EINVAL;
+
+	if ((bytes != 4) || ((offset & (bytes - 1)) != 0)) {
+		gvt_err("vgpu(%d) Invalid FORCE_NONPRIV offset %x(%dB)\n",
+			vgpu->id, offset, bytes);
+		return ret;
+	}
+
+	if (in_whitelist(reg_nonpriv)) {
+		ret = intel_vgpu_default_mmio_write(vgpu, offset, p_data,
+			bytes);
+	} else {
+		gvt_err("vgpu(%d) Invalid FORCE_NONPRIV write %x\n",
+			vgpu->id, reg_nonpriv);
+	}
+	return ret;
+}
+
 static int ddi_buf_ctl_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
 		void *p_data, unsigned int bytes)
 {
@@ -2420,10 +2488,8 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt)
 	MMIO_D(0xb10c, D_BDW);
 	MMIO_D(0xb110, D_BDW);
 
-	MMIO_DFH(0x24d0, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
-	MMIO_DFH(0x24d4, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
-	MMIO_DFH(0x24d8, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
-	MMIO_DFH(0x24dc, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_F(0x24d0, 48, F_CMD_ACCESS, 0, 0, D_BDW_PLUS,
+		NULL, force_nonpriv_write);
 
 	MMIO_D(0x22040, D_BDW_PLUS);
 	MMIO_D(0x44484, D_BDW_PLUS);

From 187447a106fc9caca45f10413845678d3666556c Mon Sep 17 00:00:00 2001
From: Pei Zhang <pei.zhang@intel.com>
Date: Tue, 21 Feb 2017 21:58:14 +0800
Subject: [PATCH 0180/1911] drm/i915/gvt: add cmd_access to
 GEN7_HALF_SLICE_CHICKEN1

Linux guest is using this MMIO in lri command. Add cmd_access flag
for this mmio in gvt to avoid error log.

v2: change the mmio address to its macro name

Signed-off-by: Pei Zhang <pei.zhang@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/handlers.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index af0c0d1ec9a8a3..bfe12ddb02107d 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -1607,7 +1607,7 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
 	MMIO_D(0x243c, D_ALL);
 	MMIO_DFH(0x7018, D_ALL, F_MODE_MASK, NULL, NULL);
 	MMIO_DFH(HALF_SLICE_CHICKEN3, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
-	MMIO_DFH(0xe100, D_ALL, F_MODE_MASK, NULL, NULL);
+	MMIO_DFH(GEN7_HALF_SLICE_CHICKEN1, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
 
 	/* display */
 	MMIO_F(0x60220, 0x20, 0, 0, 0, D_ALL, NULL, NULL);

From ec162aa84c9057e196f3a844c0a8d569f5095e6c Mon Sep 17 00:00:00 2001
From: Zhao Yan <yan.y.zhao@intel.com>
Date: Tue, 21 Feb 2017 14:00:37 +0800
Subject: [PATCH 0181/1911] drm/i915/gvt: set default value to 0 for unhandled
 mmio regs

for a handled mmio reg,  its default value is read from hardware, while
for an unhandled mmio regs,  its default value would be random if not
explicitly set to 0

Signed-off-by: Zhao Yan <yan.y.zhao@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/firmware.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gvt/firmware.c b/drivers/gpu/drm/i915/gvt/firmware.c
index 1cb29b2d7dc638..933a7c211a1c29 100644
--- a/drivers/gpu/drm/i915/gvt/firmware.c
+++ b/drivers/gpu/drm/i915/gvt/firmware.c
@@ -80,7 +80,7 @@ static int expose_firmware_sysfs(struct intel_gvt *gvt)
 	int ret;
 
 	size = sizeof(*h) + info->mmio_size + info->cfg_space_size - 1;
-	firmware = vmalloc(size);
+	firmware = vzalloc(size);
 	if (!firmware)
 		return -ENOMEM;
 

From bab059304314e127cf4a5330c6bd5a71fd27c022 Mon Sep 17 00:00:00 2001
From: "Zhao, Xinda" <xinda.zhao@intel.com>
Date: Tue, 21 Feb 2017 15:07:31 +0800
Subject: [PATCH 0182/1911] drm/i915/gvt: decrease priority of output msg for
 untracked mmio

When untracked mmio is visited, too many log info will be printed out,
it may confuse the user, but most of the time, it is not the urgent case,
so use gvt_dbg_mmio() instead.

Signed-off-by: Zhao, Xinda <xinda.zhao@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/mmio.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gvt/mmio.c b/drivers/gpu/drm/i915/gvt/mmio.c
index 99abb01fa9eb61..60b698cb836592 100644
--- a/drivers/gpu/drm/i915/gvt/mmio.c
+++ b/drivers/gpu/drm/i915/gvt/mmio.c
@@ -298,7 +298,7 @@ int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, uint64_t pa,
 
 	mmio = intel_gvt_find_mmio_info(gvt, rounddown(offset, 4));
 	if (!mmio && !vgpu->mmio.disable_warn_untrack)
-		gvt_err("vgpu%d: write untracked MMIO %x len %d val %x\n",
+		gvt_dbg_mmio("vgpu%d: write untracked MMIO %x len %d val %x\n",
 				vgpu->id, offset, bytes, *(u32 *)p_data);
 
 	if (!intel_gvt_mmio_is_unalign(gvt, offset)) {

From da9cc8de22aa6bd6ed51c406432d599ab520a6e3 Mon Sep 17 00:00:00 2001
From: Ping Gao <ping.a.gao@intel.com>
Date: Tue, 21 Feb 2017 15:52:56 +0800
Subject: [PATCH 0183/1911] drm/i915/gvt: clear the vGPU reset logic

Releasing shadow PPGTT pages is not enough when vGPU reset, the
guest page table tracking data should has same life-cycle with
all the shadow PPGTT pages; Otherwise there is no chance to
re-shadow the PPGTT pages without free the guest page table
tracking data.

This patch clear the PPGTT reset logic and make the vGPU reset in
working order.

v2: refactor some logic to avoid code duplicated.
v3: remove useless macro and add comments from Christophe.
v4: keep reset logic in reset function.

Signed-off-by: Ping Gao <ping.a.gao@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/gtt.c | 35 ++++++++++++++++++++++++----------
 1 file changed, 25 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
index 28c92346db0e4e..b5c833287d39f6 100644
--- a/drivers/gpu/drm/i915/gvt/gtt.c
+++ b/drivers/gpu/drm/i915/gvt/gtt.c
@@ -2015,6 +2015,22 @@ int intel_vgpu_init_gtt(struct intel_vgpu *vgpu)
 	return create_scratch_page_tree(vgpu);
 }
 
+static void intel_vgpu_free_mm(struct intel_vgpu *vgpu, int type)
+{
+	struct list_head *pos, *n;
+	struct intel_vgpu_mm *mm;
+
+	list_for_each_safe(pos, n, &vgpu->gtt.mm_list_head) {
+		mm = container_of(pos, struct intel_vgpu_mm, list);
+		if (mm->type == type) {
+			vgpu->gvt->gtt.mm_free_page_table(mm);
+			list_del(&mm->list);
+			list_del(&mm->lru_list);
+			kfree(mm);
+		}
+	}
+}
+
 /**
  * intel_vgpu_clean_gtt - clean up per-vGPU graphics memory virulization
  * @vgpu: a vGPU
@@ -2027,19 +2043,11 @@ int intel_vgpu_init_gtt(struct intel_vgpu *vgpu)
  */
 void intel_vgpu_clean_gtt(struct intel_vgpu *vgpu)
 {
-	struct list_head *pos, *n;
-	struct intel_vgpu_mm *mm;
-
 	ppgtt_free_all_shadow_page(vgpu);
 	release_scratch_page_tree(vgpu);
 
-	list_for_each_safe(pos, n, &vgpu->gtt.mm_list_head) {
-		mm = container_of(pos, struct intel_vgpu_mm, list);
-		vgpu->gvt->gtt.mm_free_page_table(mm);
-		list_del(&mm->list);
-		list_del(&mm->lru_list);
-		kfree(mm);
-	}
+	intel_vgpu_free_mm(vgpu, INTEL_GVT_MM_PPGTT);
+	intel_vgpu_free_mm(vgpu, INTEL_GVT_MM_GGTT);
 }
 
 static void clean_spt_oos(struct intel_gvt *gvt)
@@ -2322,6 +2330,13 @@ void intel_vgpu_reset_gtt(struct intel_vgpu *vgpu, bool dmlr)
 	int i;
 
 	ppgtt_free_all_shadow_page(vgpu);
+
+	/* Shadow pages are only created when there is no page
+	 * table tracking data, so remove page tracking data after
+	 * removing the shadow pages.
+	 */
+	intel_vgpu_free_mm(vgpu, INTEL_GVT_MM_PPGTT);
+
 	if (!dmlr)
 		return;
 

From d8a355be0b2b5613f7b34aee1394369d45d50586 Mon Sep 17 00:00:00 2001
From: Weinan Li <weinan.z.li@intel.com>
Date: Wed, 22 Feb 2017 11:03:24 +0800
Subject: [PATCH 0184/1911] drm/i915/gvt: refine pcode write emulation

In GVT-g we always emulate as pcode read/write success and ready for access
anytime, since we don't touch real physical registers here.

Add 'SKL_PCODE_CDCLK_CONTROL' write emulation, without it will cause
skl_set_cdclk fail in guest.

Signed-off-by: Weinan Li <weinan.z.li@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/handlers.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index bfe12ddb02107d..f89b183488e98e 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -1315,6 +1315,9 @@ static int mailbox_write(struct intel_vgpu *vgpu, unsigned int offset,
 		else
 			*data0 = 0x61514b3d;
 		break;
+	case SKL_PCODE_CDCLK_CONTROL:
+		*data0 = SKL_CDCLK_READY_FOR_CHANGE;
+		break;
 	case 0x5:
 		*data0 |= 0x1;
 		break;
@@ -1322,8 +1325,13 @@ static int mailbox_write(struct intel_vgpu *vgpu, unsigned int offset,
 
 	gvt_dbg_core("VM(%d) write %x to mailbox, return data0 %x\n",
 		     vgpu->id, value, *data0);
-
-	value &= ~(1 << 31);
+	/**
+	 * PCODE_READY clear means ready for pcode read/write,
+	 * PCODE_ERROR_MASK clear means no error happened. In GVT-g we
+	 * always emulate as pcode read/write success and ready for access
+	 * anytime, since we don't touch real physical registers here.
+	 */
+	value &= ~(GEN6_PCODE_READY | GEN6_PCODE_ERROR_MASK);
 	return intel_vgpu_default_mmio_write(vgpu, offset, &value, bytes);
 }
 

From 7c28135c77414327523b89bfc3f13096e095f5ac Mon Sep 17 00:00:00 2001
From: "Zhao, Xinda" <xinda.zhao@intel.com>
Date: Tue, 21 Feb 2017 15:54:56 +0800
Subject: [PATCH 0185/1911] drm/i915/gvt: remove unnecessary error msg from gtt
 write

The guest VM may initialize the whole GTT table during boot up,
so the warning msg in emulate_gtt_mmio_write is not necessary, it is
the expected behavior and it may confuse the user if error msg is
printed out, so remove the msg from emulate_gtt_mmio_write(),

Signed-off-by: Zhao, Xinda <xinda.zhao@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/gtt.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
index b5c833287d39f6..6a5ff23ded907c 100644
--- a/drivers/gpu/drm/i915/gvt/gtt.c
+++ b/drivers/gpu/drm/i915/gvt/gtt.c
@@ -1825,11 +1825,8 @@ static int emulate_gtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off,
 	gma = g_gtt_index << GTT_PAGE_SHIFT;
 
 	/* the VM may configure the whole GM space when ballooning is used */
-	if (WARN_ONCE(!vgpu_gmadr_is_valid(vgpu, gma),
-				"vgpu%d: found oob ggtt write, offset %x\n",
-				vgpu->id, off)) {
+	if (!vgpu_gmadr_is_valid(vgpu, gma))
 		return 0;
-	}
 
 	ggtt_get_guest_entry(ggtt_mm, &e, g_gtt_index);
 

From 191020b670f84f5e2edfaa906c3801df20485610 Mon Sep 17 00:00:00 2001
From: Zhenyu Wang <zhenyuw@linux.intel.com>
Date: Thu, 23 Feb 2017 14:46:23 +0800
Subject: [PATCH 0186/1911] drm/i915/gvt: adjust to fixed vGPU types

Previous vGPU type create tried to determine vGPU type name e.g _1, _2
based on the number of mdev devices can be created, but different type
might have very different resource size depending on physical device.
We need to split type name vs. actual mdev resource and create fixed
vGPU type with determined size for consistence.

With this we'd like to fix vGPU types for _1, _2, _4 and _8 now, each
type has fixed defined resource size. Available mdev instances that could
be created is determined by physical resource, and user should query
for that before creating.

Cc: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/gvt.h  |  1 -
 drivers/gpu/drm/i915/gvt/vgpu.c | 58 +++++++++++++++++++--------------
 2 files changed, 34 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
index a41b322d8957dd..faff044c643425 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.h
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@@ -212,7 +212,6 @@ struct intel_gvt_opregion {
 #define NR_MAX_INTEL_VGPU_TYPES 20
 struct intel_vgpu_type {
 	char name[16];
-	unsigned int max_instance;
 	unsigned int avail_instance;
 	unsigned int low_gm_size;
 	unsigned int high_gm_size;
diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c
index dcfcce1dc00e34..c27c13c3cc4836 100644
--- a/drivers/gpu/drm/i915/gvt/vgpu.c
+++ b/drivers/gpu/drm/i915/gvt/vgpu.c
@@ -64,6 +64,19 @@ void populate_pvinfo_page(struct intel_vgpu *vgpu)
 	WARN_ON(sizeof(struct vgt_if) != VGT_PVINFO_SIZE);
 }
 
+static struct {
+	unsigned int low_mm;
+	unsigned int high_mm;
+	unsigned int fence;
+	char *name;
+} vgpu_types[] = {
+/* Fixed vGPU type table */
+	{ MB_TO_BYTES(64), MB_TO_BYTES(512), 4, "8" },
+	{ MB_TO_BYTES(128), MB_TO_BYTES(512), 4, "4" },
+	{ MB_TO_BYTES(256), MB_TO_BYTES(1024), 4, "2" },
+	{ MB_TO_BYTES(512), MB_TO_BYTES(2048), 4, "1" },
+};
+
 /**
  * intel_gvt_init_vgpu_types - initialize vGPU type list
  * @gvt : GVT device
@@ -78,9 +91,8 @@ int intel_gvt_init_vgpu_types(struct intel_gvt *gvt)
 	unsigned int min_low;
 
 	/* vGPU type name is defined as GVTg_Vx_y which contains
-	 * physical GPU generation type and 'y' means maximum vGPU
-	 * instances user can create on one physical GPU for this
-	 * type.
+	 * physical GPU generation type (e.g V4 as BDW server, V5 as
+	 * SKL server).
 	 *
 	 * Depend on physical SKU resource, might see vGPU types like
 	 * GVTg_V4_8, GVTg_V4_4, GVTg_V4_2, etc. We can create
@@ -92,7 +104,7 @@ int intel_gvt_init_vgpu_types(struct intel_gvt *gvt)
 	 */
 	low_avail = gvt_aperture_sz(gvt) - HOST_LOW_GM_SIZE;
 	high_avail = gvt_hidden_sz(gvt) - HOST_HIGH_GM_SIZE;
-	num_types = 4;
+	num_types = sizeof(vgpu_types) / sizeof(vgpu_types[0]);
 
 	gvt->types = kzalloc(num_types * sizeof(struct intel_vgpu_type),
 			     GFP_KERNEL);
@@ -101,25 +113,24 @@ int intel_gvt_init_vgpu_types(struct intel_gvt *gvt)
 
 	min_low = MB_TO_BYTES(32);
 	for (i = 0; i < num_types; ++i) {
-		if (low_avail / min_low == 0)
+		if (low_avail / vgpu_types[i].low_mm == 0)
 			break;
-		gvt->types[i].low_gm_size = min_low;
-		gvt->types[i].high_gm_size = max((min_low<<3), MB_TO_BYTES(384U));
-		gvt->types[i].fence = 4;
-		gvt->types[i].max_instance = min(low_avail / min_low,
-						 high_avail / gvt->types[i].high_gm_size);
-		gvt->types[i].avail_instance = gvt->types[i].max_instance;
+
+		gvt->types[i].low_gm_size = vgpu_types[i].low_mm;
+		gvt->types[i].high_gm_size = vgpu_types[i].high_mm;
+		gvt->types[i].fence = vgpu_types[i].fence;
+		gvt->types[i].avail_instance = min(low_avail / vgpu_types[i].low_mm,
+						   high_avail / vgpu_types[i].high_mm);
 
 		if (IS_GEN8(gvt->dev_priv))
-			sprintf(gvt->types[i].name, "GVTg_V4_%u",
-						gvt->types[i].max_instance);
+			sprintf(gvt->types[i].name, "GVTg_V4_%s",
+						vgpu_types[i].name);
 		else if (IS_GEN9(gvt->dev_priv))
-			sprintf(gvt->types[i].name, "GVTg_V5_%u",
-						gvt->types[i].max_instance);
+			sprintf(gvt->types[i].name, "GVTg_V5_%s",
+						vgpu_types[i].name);
 
-		min_low <<= 1;
-		gvt_dbg_core("type[%d]: %s max %u avail %u low %u high %u fence %u\n",
-			     i, gvt->types[i].name, gvt->types[i].max_instance,
+		gvt_dbg_core("type[%d]: %s avail %u low %u high %u fence %u\n",
+			     i, gvt->types[i].name,
 			     gvt->types[i].avail_instance,
 			     gvt->types[i].low_gm_size,
 			     gvt->types[i].high_gm_size, gvt->types[i].fence);
@@ -138,7 +149,7 @@ static void intel_gvt_update_vgpu_types(struct intel_gvt *gvt)
 {
 	int i;
 	unsigned int low_gm_avail, high_gm_avail, fence_avail;
-	unsigned int low_gm_min, high_gm_min, fence_min, total_min;
+	unsigned int low_gm_min, high_gm_min, fence_min;
 
 	/* Need to depend on maxium hw resource size but keep on
 	 * static config for now.
@@ -154,12 +165,11 @@ static void intel_gvt_update_vgpu_types(struct intel_gvt *gvt)
 		low_gm_min = low_gm_avail / gvt->types[i].low_gm_size;
 		high_gm_min = high_gm_avail / gvt->types[i].high_gm_size;
 		fence_min = fence_avail / gvt->types[i].fence;
-		total_min = min(min(low_gm_min, high_gm_min), fence_min);
-		gvt->types[i].avail_instance = min(gvt->types[i].max_instance,
-						   total_min);
+		gvt->types[i].avail_instance = min(min(low_gm_min, high_gm_min),
+						   fence_min);
 
-		gvt_dbg_core("update type[%d]: %s max %u avail %u low %u high %u fence %u\n",
-		       i, gvt->types[i].name, gvt->types[i].max_instance,
+		gvt_dbg_core("update type[%d]: %s avail %u low %u high %u fence %u\n",
+		       i, gvt->types[i].name,
 		       gvt->types[i].avail_instance, gvt->types[i].low_gm_size,
 		       gvt->types[i].high_gm_size, gvt->types[i].fence);
 	}

From 1f8728b7adc4c2b351cda886a6916087595c9125 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Fri, 17 Feb 2017 10:55:23 +0100
Subject: [PATCH 0187/1911] PM / Domains: Move genpd_power_off() above
 genpd_power_on()

Following changes in genpd_power_on() makes it invoke genpd_power_off().
To enable these changes and avoiding to declare genpd_power_off(), let's
move its implementation above genpd_power_on(). In this way, following
changes should become easier to review.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 162 ++++++++++++++++++------------------
 1 file changed, 81 insertions(+), 81 deletions(-)

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 3a75fb1b4126f0..3dc44f21606783 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -273,6 +273,87 @@ static void genpd_queue_power_off_work(struct generic_pm_domain *genpd)
 	queue_work(pm_wq, &genpd->power_off_work);
 }
 
+/**
+ * genpd_power_off - Remove power from a given PM domain.
+ * @genpd: PM domain to power down.
+ * @is_async: PM domain is powered down from a scheduled work
+ *
+ * If all of the @genpd's devices have been suspended and all of its subdomains
+ * have been powered down, remove power from @genpd.
+ */
+static int genpd_power_off(struct generic_pm_domain *genpd, bool is_async)
+{
+	struct pm_domain_data *pdd;
+	struct gpd_link *link;
+	unsigned int not_suspended = 0;
+
+	/*
+	 * Do not try to power off the domain in the following situations:
+	 * (1) The domain is already in the "power off" state.
+	 * (2) System suspend is in progress.
+	 */
+	if (genpd->status == GPD_STATE_POWER_OFF
+	    || genpd->prepared_count > 0)
+		return 0;
+
+	if (atomic_read(&genpd->sd_count) > 0)
+		return -EBUSY;
+
+	list_for_each_entry(pdd, &genpd->dev_list, list_node) {
+		enum pm_qos_flags_status stat;
+
+		stat = dev_pm_qos_flags(pdd->dev,
+					PM_QOS_FLAG_NO_POWER_OFF
+						| PM_QOS_FLAG_REMOTE_WAKEUP);
+		if (stat > PM_QOS_FLAGS_NONE)
+			return -EBUSY;
+
+		/*
+		 * Do not allow PM domain to be powered off, when an IRQ safe
+		 * device is part of a non-IRQ safe domain.
+		 */
+		if (!pm_runtime_suspended(pdd->dev) ||
+			irq_safe_dev_in_no_sleep_domain(pdd->dev, genpd))
+			not_suspended++;
+	}
+
+	if (not_suspended > 1 || (not_suspended == 1 && is_async))
+		return -EBUSY;
+
+	if (genpd->gov && genpd->gov->power_down_ok) {
+		if (!genpd->gov->power_down_ok(&genpd->domain))
+			return -EAGAIN;
+	}
+
+	if (genpd->power_off) {
+		int ret;
+
+		if (atomic_read(&genpd->sd_count) > 0)
+			return -EBUSY;
+
+		/*
+		 * If sd_count > 0 at this point, one of the subdomains hasn't
+		 * managed to call genpd_power_on() for the master yet after
+		 * incrementing it.  In that case genpd_power_on() will wait
+		 * for us to drop the lock, so we can call .power_off() and let
+		 * the genpd_power_on() restore power for us (this shouldn't
+		 * happen very often).
+		 */
+		ret = _genpd_power_off(genpd, true);
+		if (ret)
+			return ret;
+	}
+
+	genpd->status = GPD_STATE_POWER_OFF;
+
+	list_for_each_entry(link, &genpd->slave_links, slave_node) {
+		genpd_sd_counter_dec(link->master);
+		genpd_queue_power_off_work(link->master);
+	}
+
+	return 0;
+}
+
 /**
  * genpd_power_on - Restore power to a given PM domain and its masters.
  * @genpd: PM domain to power up.
@@ -367,87 +448,6 @@ static int genpd_dev_pm_qos_notifier(struct notifier_block *nb,
 	return NOTIFY_DONE;
 }
 
-/**
- * genpd_power_off - Remove power from a given PM domain.
- * @genpd: PM domain to power down.
- * @is_async: PM domain is powered down from a scheduled work
- *
- * If all of the @genpd's devices have been suspended and all of its subdomains
- * have been powered down, remove power from @genpd.
- */
-static int genpd_power_off(struct generic_pm_domain *genpd, bool is_async)
-{
-	struct pm_domain_data *pdd;
-	struct gpd_link *link;
-	unsigned int not_suspended = 0;
-
-	/*
-	 * Do not try to power off the domain in the following situations:
-	 * (1) The domain is already in the "power off" state.
-	 * (2) System suspend is in progress.
-	 */
-	if (genpd->status == GPD_STATE_POWER_OFF
-	    || genpd->prepared_count > 0)
-		return 0;
-
-	if (atomic_read(&genpd->sd_count) > 0)
-		return -EBUSY;
-
-	list_for_each_entry(pdd, &genpd->dev_list, list_node) {
-		enum pm_qos_flags_status stat;
-
-		stat = dev_pm_qos_flags(pdd->dev,
-					PM_QOS_FLAG_NO_POWER_OFF
-						| PM_QOS_FLAG_REMOTE_WAKEUP);
-		if (stat > PM_QOS_FLAGS_NONE)
-			return -EBUSY;
-
-		/*
-		 * Do not allow PM domain to be powered off, when an IRQ safe
-		 * device is part of a non-IRQ safe domain.
-		 */
-		if (!pm_runtime_suspended(pdd->dev) ||
-			irq_safe_dev_in_no_sleep_domain(pdd->dev, genpd))
-			not_suspended++;
-	}
-
-	if (not_suspended > 1 || (not_suspended == 1 && is_async))
-		return -EBUSY;
-
-	if (genpd->gov && genpd->gov->power_down_ok) {
-		if (!genpd->gov->power_down_ok(&genpd->domain))
-			return -EAGAIN;
-	}
-
-	if (genpd->power_off) {
-		int ret;
-
-		if (atomic_read(&genpd->sd_count) > 0)
-			return -EBUSY;
-
-		/*
-		 * If sd_count > 0 at this point, one of the subdomains hasn't
-		 * managed to call genpd_power_on() for the master yet after
-		 * incrementing it.  In that case genpd_power_on() will wait
-		 * for us to drop the lock, so we can call .power_off() and let
-		 * the genpd_power_on() restore power for us (this shouldn't
-		 * happen very often).
-		 */
-		ret = _genpd_power_off(genpd, true);
-		if (ret)
-			return ret;
-	}
-
-	genpd->status = GPD_STATE_POWER_OFF;
-
-	list_for_each_entry(link, &genpd->slave_links, slave_node) {
-		genpd_sd_counter_dec(link->master);
-		genpd_queue_power_off_work(link->master);
-	}
-
-	return 0;
-}
-
 /**
  * genpd_power_off_work_fn - Power off PM domain whose subdomain count is 0.
  * @work: Work structure used for scheduling the execution of this function.

From 3c64649d1cf9f32fd16491e69ccc16385e0ef421 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Fri, 17 Feb 2017 10:55:24 +0100
Subject: [PATCH 0188/1911] PM / Domains: Rename is_async to one_dev_on for
 genpd_power_off()

The parameter name is_async, for genpd_power_off() gives a poor description
of its purpose. To clarify, let's rename it to one_dev_on and update the
documentation of it in the function header.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 3dc44f21606783..179bb269a58fb3 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -276,12 +276,15 @@ static void genpd_queue_power_off_work(struct generic_pm_domain *genpd)
 /**
  * genpd_power_off - Remove power from a given PM domain.
  * @genpd: PM domain to power down.
- * @is_async: PM domain is powered down from a scheduled work
+ * @one_dev_on: If invoked from genpd's ->runtime_suspend|resume() callback, the
+ * RPM status of the releated device is in an intermediate state, not yet turned
+ * into RPM_SUSPENDED. This means genpd_power_off() must allow one device to not
+ * be RPM_SUSPENDED, while it tries to power off the PM domain.
  *
  * If all of the @genpd's devices have been suspended and all of its subdomains
  * have been powered down, remove power from @genpd.
  */
-static int genpd_power_off(struct generic_pm_domain *genpd, bool is_async)
+static int genpd_power_off(struct generic_pm_domain *genpd, bool one_dev_on)
 {
 	struct pm_domain_data *pdd;
 	struct gpd_link *link;
@@ -317,7 +320,7 @@ static int genpd_power_off(struct generic_pm_domain *genpd, bool is_async)
 			not_suspended++;
 	}
 
-	if (not_suspended > 1 || (not_suspended == 1 && is_async))
+	if (not_suspended > 1 || (not_suspended == 1 && !one_dev_on))
 		return -EBUSY;
 
 	if (genpd->gov && genpd->gov->power_down_ok) {
@@ -459,7 +462,7 @@ static void genpd_power_off_work_fn(struct work_struct *work)
 	genpd = container_of(work, struct generic_pm_domain, power_off_work);
 
 	genpd_lock(genpd);
-	genpd_power_off(genpd, true);
+	genpd_power_off(genpd, false);
 	genpd_unlock(genpd);
 }
 
@@ -578,7 +581,7 @@ static int genpd_runtime_suspend(struct device *dev)
 		return 0;
 
 	genpd_lock(genpd);
-	genpd_power_off(genpd, false);
+	genpd_power_off(genpd, true);
 	genpd_unlock(genpd);
 
 	return 0;
@@ -658,7 +661,7 @@ static int genpd_runtime_resume(struct device *dev)
 	if (!pm_runtime_is_irq_safe(dev) ||
 		(pm_runtime_is_irq_safe(dev) && genpd_is_irq_safe(genpd))) {
 		genpd_lock(genpd);
-		genpd_power_off(genpd, 0);
+		genpd_power_off(genpd, true);
 		genpd_unlock(genpd);
 	}
 

From 2da835452a0875866ca55d23126c77b9372f0015 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Fri, 17 Feb 2017 10:55:25 +0100
Subject: [PATCH 0189/1911] PM / Domains: Power off masters immediately in the
 power off sequence

Once a subdomain is powered off, genpd queues a power off work for each of
the subdomain's corresponding masters, thus postponing the masters to be
powered off to a later point.

When genpd used intermediate power off states, which was removed in
commit ba2bbfbf6307 ("PM / Domains: Remove intermediate states from the
power off sequence"), this behaviour made sense, but now it simply doesn't.

Genpd can easily try to power off the masters in the same context as the
subdomain, of course by acquiring/releasing the lock. Then, let's convert
to this behaviour, as it avoids unnecessary works from being queued.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 179bb269a58fb3..e697dec9d25bf5 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -284,7 +284,8 @@ static void genpd_queue_power_off_work(struct generic_pm_domain *genpd)
  * If all of the @genpd's devices have been suspended and all of its subdomains
  * have been powered down, remove power from @genpd.
  */
-static int genpd_power_off(struct generic_pm_domain *genpd, bool one_dev_on)
+static int genpd_power_off(struct generic_pm_domain *genpd, bool one_dev_on,
+			   unsigned int depth)
 {
 	struct pm_domain_data *pdd;
 	struct gpd_link *link;
@@ -351,7 +352,9 @@ static int genpd_power_off(struct generic_pm_domain *genpd, bool one_dev_on)
 
 	list_for_each_entry(link, &genpd->slave_links, slave_node) {
 		genpd_sd_counter_dec(link->master);
-		genpd_queue_power_off_work(link->master);
+		genpd_lock_nested(link->master, depth + 1);
+		genpd_power_off(link->master, false, depth + 1);
+		genpd_unlock(link->master);
 	}
 
 	return 0;
@@ -405,7 +408,9 @@ static int genpd_power_on(struct generic_pm_domain *genpd, unsigned int depth)
 					&genpd->slave_links,
 					slave_node) {
 		genpd_sd_counter_dec(link->master);
-		genpd_queue_power_off_work(link->master);
+		genpd_lock_nested(link->master, depth + 1);
+		genpd_power_off(link->master, false, depth + 1);
+		genpd_unlock(link->master);
 	}
 
 	return ret;
@@ -462,7 +467,7 @@ static void genpd_power_off_work_fn(struct work_struct *work)
 	genpd = container_of(work, struct generic_pm_domain, power_off_work);
 
 	genpd_lock(genpd);
-	genpd_power_off(genpd, false);
+	genpd_power_off(genpd, false, 0);
 	genpd_unlock(genpd);
 }
 
@@ -581,7 +586,7 @@ static int genpd_runtime_suspend(struct device *dev)
 		return 0;
 
 	genpd_lock(genpd);
-	genpd_power_off(genpd, true);
+	genpd_power_off(genpd, true, 0);
 	genpd_unlock(genpd);
 
 	return 0;
@@ -661,7 +666,7 @@ static int genpd_runtime_resume(struct device *dev)
 	if (!pm_runtime_is_irq_safe(dev) ||
 		(pm_runtime_is_irq_safe(dev) && genpd_is_irq_safe(genpd))) {
 		genpd_lock(genpd);
-		genpd_power_off(genpd, true);
+		genpd_power_off(genpd, true, 0);
 		genpd_unlock(genpd);
 	}
 

From 76aaf87b4cdc7f7115a32e4fda88310d42ce7fde Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 23 Feb 2017 16:02:36 +0100
Subject: [PATCH 0190/1911] scsi: merge __scsi_execute into scsi_execute

All but one caller want the decoded sense header, so offer the existing
__scsi_execute helper as the public scsi_execute API to simply the
callers.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/ata/libata-scsi.c         | 12 +++-----
 drivers/scsi/cxlflash/superpipe.c |  8 ++----
 drivers/scsi/cxlflash/vlun.c      |  4 +--
 drivers/scsi/scsi_lib.c           | 48 ++++++++++++++-----------------
 drivers/scsi/scsi_transport_spi.c | 24 +++++++---------
 drivers/scsi/sr_ioctl.c           | 19 ++----------
 include/scsi/scsi_device.h        |  5 ++--
 7 files changed, 46 insertions(+), 74 deletions(-)

diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 12d3a66600a3f7..1ac70744ae7b4b 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -600,6 +600,7 @@ int ata_cmd_ioctl(struct scsi_device *scsidev, void __user *arg)
 	u8 args[4], *argbuf = NULL, *sensebuf = NULL;
 	int argsize = 0;
 	enum dma_data_direction data_dir;
+	struct scsi_sense_hdr sshdr;
 	int cmd_result;
 
 	if (arg == NULL)
@@ -648,7 +649,7 @@ int ata_cmd_ioctl(struct scsi_device *scsidev, void __user *arg)
 	/* Good values for timeout and retries?  Values below
 	   from scsi_ioctl_send_command() for default case... */
 	cmd_result = scsi_execute(scsidev, scsi_cmd, data_dir, argbuf, argsize,
-				  sensebuf, (10*HZ), 5, 0, NULL);
+				  sensebuf, &sshdr, (10*HZ), 5, 0, 0, NULL);
 
 	if (driver_byte(cmd_result) == DRIVER_SENSE) {/* sense data available */
 		u8 *desc = sensebuf + 8;
@@ -657,9 +658,6 @@ int ata_cmd_ioctl(struct scsi_device *scsidev, void __user *arg)
 		/* If we set cc then ATA pass-through will cause a
 		 * check condition even if no error. Filter that. */
 		if (cmd_result & SAM_STAT_CHECK_CONDITION) {
-			struct scsi_sense_hdr sshdr;
-			scsi_normalize_sense(sensebuf, SCSI_SENSE_BUFFERSIZE,
-					     &sshdr);
 			if (sshdr.sense_key == RECOVERED_ERROR &&
 			    sshdr.asc == 0 && sshdr.ascq == 0x1d)
 				cmd_result &= ~SAM_STAT_CHECK_CONDITION;
@@ -707,6 +705,7 @@ int ata_task_ioctl(struct scsi_device *scsidev, void __user *arg)
 	int rc = 0;
 	u8 scsi_cmd[MAX_COMMAND_SIZE];
 	u8 args[7], *sensebuf = NULL;
+	struct scsi_sense_hdr sshdr;
 	int cmd_result;
 
 	if (arg == NULL)
@@ -734,7 +733,7 @@ int ata_task_ioctl(struct scsi_device *scsidev, void __user *arg)
 	/* Good values for timeout and retries?  Values below
 	   from scsi_ioctl_send_command() for default case... */
 	cmd_result = scsi_execute(scsidev, scsi_cmd, DMA_NONE, NULL, 0,
-				sensebuf, (10*HZ), 5, 0, NULL);
+				sensebuf, &sshdr, (10*HZ), 5, 0, 0, NULL);
 
 	if (driver_byte(cmd_result) == DRIVER_SENSE) {/* sense data available */
 		u8 *desc = sensebuf + 8;
@@ -743,9 +742,6 @@ int ata_task_ioctl(struct scsi_device *scsidev, void __user *arg)
 		/* If we set cc then ATA pass-through will cause a
 		 * check condition even if no error. Filter that. */
 		if (cmd_result & SAM_STAT_CHECK_CONDITION) {
-			struct scsi_sense_hdr sshdr;
-			scsi_normalize_sense(sensebuf, SCSI_SENSE_BUFFERSIZE,
-						&sshdr);
 			if (sshdr.sense_key == RECOVERED_ERROR &&
 			    sshdr.asc == 0 && sshdr.ascq == 0x1d)
 				cmd_result &= ~SAM_STAT_CHECK_CONDITION;
diff --git a/drivers/scsi/cxlflash/superpipe.c b/drivers/scsi/cxlflash/superpipe.c
index 90869cee2b20b2..5b812ed87f224e 100644
--- a/drivers/scsi/cxlflash/superpipe.c
+++ b/drivers/scsi/cxlflash/superpipe.c
@@ -305,6 +305,7 @@ static int read_cap16(struct scsi_device *sdev, struct llun_info *lli)
 	struct cxlflash_cfg *cfg = shost_priv(sdev->host);
 	struct device *dev = &cfg->dev->dev;
 	struct glun_info *gli = lli->parent;
+	struct scsi_sense_hdr sshdr;
 	u8 *cmd_buf = NULL;
 	u8 *scsi_cmd = NULL;
 	u8 *sense_buf = NULL;
@@ -332,7 +333,8 @@ static int read_cap16(struct scsi_device *sdev, struct llun_info *lli)
 	/* Drop the ioctl read semahpore across lengthy call */
 	up_read(&cfg->ioctl_rwsem);
 	result = scsi_execute(sdev, scsi_cmd, DMA_FROM_DEVICE, cmd_buf,
-			      CMD_BUFSIZE, sense_buf, to, CMD_RETRIES, 0, NULL);
+			      CMD_BUFSIZE, sense_buf, &sshdr, to, CMD_RETRIES,
+			      0, 0, NULL);
 	down_read(&cfg->ioctl_rwsem);
 	rc = check_state(cfg);
 	if (rc) {
@@ -345,10 +347,6 @@ static int read_cap16(struct scsi_device *sdev, struct llun_info *lli)
 	if (driver_byte(result) == DRIVER_SENSE) {
 		result &= ~(0xFF<<24); /* DRIVER_SENSE is not an error */
 		if (result & SAM_STAT_CHECK_CONDITION) {
-			struct scsi_sense_hdr sshdr;
-
-			scsi_normalize_sense(sense_buf, SCSI_SENSE_BUFFERSIZE,
-					    &sshdr);
 			switch (sshdr.sense_key) {
 			case NO_SENSE:
 			case RECOVERED_ERROR:
diff --git a/drivers/scsi/cxlflash/vlun.c b/drivers/scsi/cxlflash/vlun.c
index 8fcc804dbef9d9..7aa06ef229fd79 100644
--- a/drivers/scsi/cxlflash/vlun.c
+++ b/drivers/scsi/cxlflash/vlun.c
@@ -453,8 +453,8 @@ static int write_same16(struct scsi_device *sdev,
 		/* Drop the ioctl read semahpore across lengthy call */
 		up_read(&cfg->ioctl_rwsem);
 		result = scsi_execute(sdev, scsi_cmd, DMA_TO_DEVICE, cmd_buf,
-				      CMD_BUFSIZE, sense_buf, to, CMD_RETRIES,
-				      0, NULL);
+				      CMD_BUFSIZE, sense_buf, NULL, to,
+				      CMD_RETRIES, 0, 0, NULL);
 		down_read(&cfg->ioctl_rwsem);
 		rc = check_state(cfg);
 		if (rc) {
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 5f661486cf6ea2..b0cb3e0713f308 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -213,7 +213,26 @@ void scsi_queue_insert(struct scsi_cmnd *cmd, int reason)
 	__scsi_queue_insert(cmd, reason, 1);
 }
 
-static int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
+
+/**
+ * scsi_execute - insert request and wait for the result
+ * @sdev:	scsi device
+ * @cmd:	scsi command
+ * @data_direction: data direction
+ * @buffer:	data buffer
+ * @bufflen:	len of buffer
+ * @sense:	optional sense buffer
+ * @sshdr:	optional decoded sense header
+ * @timeout:	request timeout in seconds
+ * @retries:	number of times to retry request
+ * @flags:	flags for ->cmd_flags
+ * @rq_flags:	flags for ->rq_flags
+ * @resid:	optional residual length
+ *
+ * returns the req->errors value which is the scsi_cmnd result
+ * field.
+ */
+int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
 		 int data_direction, void *buffer, unsigned bufflen,
 		 unsigned char *sense, struct scsi_sense_hdr *sshdr,
 		 int timeout, int retries, u64 flags, req_flags_t rq_flags,
@@ -268,31 +287,6 @@ static int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
 
 	return ret;
 }
-
-/**
- * scsi_execute - insert request and wait for the result
- * @sdev:	scsi device
- * @cmd:	scsi command
- * @data_direction: data direction
- * @buffer:	data buffer
- * @bufflen:	len of buffer
- * @sense:	optional sense buffer
- * @timeout:	request timeout in seconds
- * @retries:	number of times to retry request
- * @flags:	or into request flags;
- * @resid:	optional residual length
- *
- * returns the req->errors value which is the scsi_cmnd result
- * field.
- */
-int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
-		 int data_direction, void *buffer, unsigned bufflen,
-		 unsigned char *sense, int timeout, int retries, u64 flags,
-		 int *resid)
-{
-	return __scsi_execute(sdev, cmd, data_direction, buffer, bufflen, sense,
-			NULL, timeout, retries, flags, 0, resid);
-}
 EXPORT_SYMBOL(scsi_execute);
 
 int scsi_execute_req_flags(struct scsi_device *sdev, const unsigned char *cmd,
@@ -300,7 +294,7 @@ int scsi_execute_req_flags(struct scsi_device *sdev, const unsigned char *cmd,
 		     struct scsi_sense_hdr *sshdr, int timeout, int retries,
 		     int *resid, u64 flags, req_flags_t rq_flags)
 {
-	return __scsi_execute(sdev, cmd, data_direction, buffer, bufflen,
+	return scsi_execute(sdev, cmd, data_direction, buffer, bufflen,
 			      NULL, sshdr, timeout, retries, flags, rq_flags,
 			      resid);
 }
diff --git a/drivers/scsi/scsi_transport_spi.c b/drivers/scsi/scsi_transport_spi.c
index 319868f3f67430..d0219e36080c3b 100644
--- a/drivers/scsi/scsi_transport_spi.c
+++ b/drivers/scsi/scsi_transport_spi.c
@@ -123,25 +123,21 @@ static int spi_execute(struct scsi_device *sdev, const void *cmd,
 {
 	int i, result;
 	unsigned char sense[SCSI_SENSE_BUFFERSIZE];
+	struct scsi_sense_hdr sshdr_tmp;
+
+	if (!sshdr)
+		sshdr = &sshdr_tmp;
 
 	for(i = 0; i < DV_RETRIES; i++) {
-		result = scsi_execute(sdev, cmd, dir, buffer, bufflen,
-				      sense, DV_TIMEOUT, /* retries */ 1,
+		result = scsi_execute(sdev, cmd, dir, buffer, bufflen, sense,
+				      sshdr, DV_TIMEOUT, /* retries */ 1,
 				      REQ_FAILFAST_DEV |
 				      REQ_FAILFAST_TRANSPORT |
 				      REQ_FAILFAST_DRIVER,
-				      NULL);
-		if (driver_byte(result) & DRIVER_SENSE) {
-			struct scsi_sense_hdr sshdr_tmp;
-			if (!sshdr)
-				sshdr = &sshdr_tmp;
-
-			if (scsi_normalize_sense(sense, SCSI_SENSE_BUFFERSIZE,
-						 sshdr)
-			    && sshdr->sense_key == UNIT_ATTENTION)
-				continue;
-		}
-		break;
+				      0, NULL);
+		if (!(driver_byte(result) & DRIVER_SENSE) ||
+		    sshdr->sense_key != UNIT_ATTENTION)
+			break;
 	}
 	return result;
 }
diff --git a/drivers/scsi/sr_ioctl.c b/drivers/scsi/sr_ioctl.c
index dfffdf63e44c92..4610c8c5693fd4 100644
--- a/drivers/scsi/sr_ioctl.c
+++ b/drivers/scsi/sr_ioctl.c
@@ -187,30 +187,19 @@ int sr_do_ioctl(Scsi_CD *cd, struct packet_command *cgc)
 	struct scsi_device *SDev;
 	struct scsi_sense_hdr sshdr;
 	int result, err = 0, retries = 0;
-	struct request_sense *sense = cgc->sense;
 
 	SDev = cd->device;
 
-	if (!sense) {
-		sense = kmalloc(SCSI_SENSE_BUFFERSIZE, GFP_KERNEL);
-		if (!sense) {
-			err = -ENOMEM;
-			goto out;
-		}
-	}
-
       retry:
 	if (!scsi_block_when_processing_errors(SDev)) {
 		err = -ENODEV;
 		goto out;
 	}
 
-	memset(sense, 0, sizeof(*sense));
 	result = scsi_execute(SDev, cgc->cmd, cgc->data_direction,
-			      cgc->buffer, cgc->buflen, (char *)sense,
-			      cgc->timeout, IOCTL_RETRIES, 0, NULL);
-
-	scsi_normalize_sense((char *)sense, sizeof(*sense), &sshdr);
+			      cgc->buffer, cgc->buflen,
+			      (unsigned char *)cgc->sense, &sshdr,
+			      cgc->timeout, IOCTL_RETRIES, 0, 0, NULL);
 
 	/* Minimal error checking.  Ignore cases we know about, and report the rest. */
 	if (driver_byte(result) != 0) {
@@ -261,8 +250,6 @@ int sr_do_ioctl(Scsi_CD *cd, struct packet_command *cgc)
 
 	/* Wake up a process waiting for device */
       out:
-	if (!cgc->sense)
-		kfree(sense);
 	cgc->stat = err;
 	return err;
 }
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index be41c76ddd489b..cb4c8c889da03f 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -410,8 +410,9 @@ extern int scsi_is_target_device(const struct device *);
 extern void scsi_sanitize_inquiry_string(unsigned char *s, int len);
 extern int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
 			int data_direction, void *buffer, unsigned bufflen,
-			unsigned char *sense, int timeout, int retries,
-			u64 flags, int *resid);
+			unsigned char *sense, struct scsi_sense_hdr *sshdr,
+			int timeout, int retries, u64 flags,
+			req_flags_t rq_flags, int *resid);
 extern int scsi_execute_req_flags(struct scsi_device *sdev,
 	const unsigned char *cmd, int data_direction, void *buffer,
 	unsigned bufflen, struct scsi_sense_hdr *sshdr, int timeout,

From fcbfffe2c5cbec0c1721b2261c316b961ad50208 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 23 Feb 2017 16:02:37 +0100
Subject: [PATCH 0191/1911] scsi: remove scsi_execute_req_flags

And switch all callers to use scsi_execute instead.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/device_handler/scsi_dh_alua.c  | 16 ++++++----------
 drivers/scsi/device_handler/scsi_dh_emc.c   |  7 +++----
 drivers/scsi/device_handler/scsi_dh_hp_sw.c | 10 ++++------
 drivers/scsi/device_handler/scsi_dh_rdac.c  |  7 +++----
 drivers/scsi/scsi_lib.c                     | 11 -----------
 drivers/scsi/sd.c                           |  9 ++++-----
 drivers/scsi/ufs/ufshcd.c                   | 10 +++++-----
 include/scsi/scsi_device.h                  |  8 ++------
 8 files changed, 27 insertions(+), 51 deletions(-)

diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c
index d704752b63329f..48e200102221c5 100644
--- a/drivers/scsi/device_handler/scsi_dh_alua.c
+++ b/drivers/scsi/device_handler/scsi_dh_alua.c
@@ -151,11 +151,9 @@ static int submit_rtpg(struct scsi_device *sdev, unsigned char *buff,
 		cdb[1] = MI_REPORT_TARGET_PGS;
 	put_unaligned_be32(bufflen, &cdb[6]);
 
-	return scsi_execute_req_flags(sdev, cdb, DMA_FROM_DEVICE,
-				      buff, bufflen, sshdr,
-				      ALUA_FAILOVER_TIMEOUT * HZ,
-				      ALUA_FAILOVER_RETRIES, NULL,
-				      req_flags, 0);
+	return scsi_execute(sdev, cdb, DMA_FROM_DEVICE, buff, bufflen, NULL,
+			sshdr, ALUA_FAILOVER_TIMEOUT * HZ,
+			ALUA_FAILOVER_RETRIES, req_flags, 0, NULL);
 }
 
 /*
@@ -185,11 +183,9 @@ static int submit_stpg(struct scsi_device *sdev, int group_id,
 	cdb[1] = MO_SET_TARGET_PGS;
 	put_unaligned_be32(stpg_len, &cdb[6]);
 
-	return scsi_execute_req_flags(sdev, cdb, DMA_TO_DEVICE,
-				      stpg_data, stpg_len,
-				      sshdr, ALUA_FAILOVER_TIMEOUT * HZ,
-				      ALUA_FAILOVER_RETRIES, NULL,
-				      req_flags, 0);
+	return scsi_execute(sdev, cdb, DMA_TO_DEVICE, stpg_data, stpg_len, NULL,
+			sshdr, ALUA_FAILOVER_TIMEOUT * HZ,
+			ALUA_FAILOVER_RETRIES, req_flags, 0, NULL);
 }
 
 static struct alua_port_group *alua_find_get_pg(char *id_str, size_t id_size,
diff --git a/drivers/scsi/device_handler/scsi_dh_emc.c b/drivers/scsi/device_handler/scsi_dh_emc.c
index f1578832ec7ad9..8654e940e1a809 100644
--- a/drivers/scsi/device_handler/scsi_dh_emc.c
+++ b/drivers/scsi/device_handler/scsi_dh_emc.c
@@ -276,10 +276,9 @@ static int send_trespass_cmd(struct scsi_device *sdev,
 	BUG_ON((len > CLARIION_BUFFER_SIZE));
 	memcpy(csdev->buffer, page22, len);
 
-	err = scsi_execute_req_flags(sdev, cdb, DMA_TO_DEVICE,
-				     csdev->buffer, len, &sshdr,
-				     CLARIION_TIMEOUT * HZ, CLARIION_RETRIES,
-				     NULL, req_flags, 0);
+	err = scsi_execute(sdev, cdb, DMA_TO_DEVICE, csdev->buffer, len, NULL,
+			&sshdr, CLARIION_TIMEOUT * HZ, CLARIION_RETRIES,
+			req_flags, 0, NULL);
 	if (err) {
 		if (scsi_sense_valid(&sshdr))
 			res = trespass_endio(sdev, &sshdr);
diff --git a/drivers/scsi/device_handler/scsi_dh_hp_sw.c b/drivers/scsi/device_handler/scsi_dh_hp_sw.c
index be43c940636df6..62d314e07d1125 100644
--- a/drivers/scsi/device_handler/scsi_dh_hp_sw.c
+++ b/drivers/scsi/device_handler/scsi_dh_hp_sw.c
@@ -100,9 +100,8 @@ static int hp_sw_tur(struct scsi_device *sdev, struct hp_sw_dh_data *h)
 		REQ_FAILFAST_DRIVER;
 
 retry:
-	res = scsi_execute_req_flags(sdev, cmd, DMA_NONE, NULL, 0, &sshdr,
-				     HP_SW_TIMEOUT, HP_SW_RETRIES,
-				     NULL, req_flags, 0);
+	res = scsi_execute(sdev, cmd, DMA_NONE, NULL, 0, NULL, &sshdr,
+			HP_SW_TIMEOUT, HP_SW_RETRIES, req_flags, 0, NULL);
 	if (res) {
 		if (scsi_sense_valid(&sshdr))
 			ret = tur_done(sdev, h, &sshdr);
@@ -139,9 +138,8 @@ static int hp_sw_start_stop(struct hp_sw_dh_data *h)
 		REQ_FAILFAST_DRIVER;
 
 retry:
-	res = scsi_execute_req_flags(sdev, cmd, DMA_NONE, NULL, 0, &sshdr,
-				     HP_SW_TIMEOUT, HP_SW_RETRIES,
-				     NULL, req_flags, 0);
+	res = scsi_execute(sdev, cmd, DMA_NONE, NULL, 0, NULL, &sshdr,
+			HP_SW_TIMEOUT, HP_SW_RETRIES, req_flags, 0, NULL);
 	if (res) {
 		if (!scsi_sense_valid(&sshdr)) {
 			sdev_printk(KERN_WARNING, sdev,
diff --git a/drivers/scsi/device_handler/scsi_dh_rdac.c b/drivers/scsi/device_handler/scsi_dh_rdac.c
index b64eaae8533d99..3cbab8710e5813 100644
--- a/drivers/scsi/device_handler/scsi_dh_rdac.c
+++ b/drivers/scsi/device_handler/scsi_dh_rdac.c
@@ -555,10 +555,9 @@ static void send_mode_select(struct work_struct *work)
 		(char *) h->ctlr->array_name, h->ctlr->index,
 		(retry_cnt == RDAC_RETRY_COUNT) ? "queueing" : "retrying");
 
-	if (scsi_execute_req_flags(sdev, cdb, DMA_TO_DEVICE,
-				   &h->ctlr->mode_select, data_size, &sshdr,
-				   RDAC_TIMEOUT * HZ,
-				   RDAC_RETRIES, NULL, req_flags, 0)) {
+	if (scsi_execute(sdev, cdb, DMA_TO_DEVICE, &h->ctlr->mode_select,
+			data_size, NULL, &sshdr, RDAC_TIMEOUT * HZ,
+			RDAC_RETRIES, req_flags, 0, NULL)) {
 		err = mode_select_handle_sense(sdev, &sshdr);
 		if (err == SCSI_DH_RETRY && retry_cnt--)
 			goto retry;
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index b0cb3e0713f308..f5e45a25248505 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -289,17 +289,6 @@ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
 }
 EXPORT_SYMBOL(scsi_execute);
 
-int scsi_execute_req_flags(struct scsi_device *sdev, const unsigned char *cmd,
-		     int data_direction, void *buffer, unsigned bufflen,
-		     struct scsi_sense_hdr *sshdr, int timeout, int retries,
-		     int *resid, u64 flags, req_flags_t rq_flags)
-{
-	return scsi_execute(sdev, cmd, data_direction, buffer, bufflen,
-			      NULL, sshdr, timeout, retries, flags, rq_flags,
-			      resid);
-}
-EXPORT_SYMBOL(scsi_execute_req_flags);
-
 /*
  * Function:    scsi_init_cmd_errh()
  *
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index be535d4215c403..c7839f6c35ccc4 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1508,9 +1508,8 @@ static int sd_sync_cache(struct scsi_disk *sdkp)
 		 * Leave the rest of the command zero to indicate
 		 * flush everything.
 		 */
-		res = scsi_execute_req_flags(sdp, cmd, DMA_NONE, NULL, 0,
-					     &sshdr, timeout, SD_MAX_RETRIES,
-					     NULL, 0, RQF_PM);
+		res = scsi_execute(sdp, cmd, DMA_NONE, NULL, 0, NULL, &sshdr,
+				timeout, SD_MAX_RETRIES, 0, RQF_PM, NULL);
 		if (res == 0)
 			break;
 	}
@@ -3296,8 +3295,8 @@ static int sd_start_stop_device(struct scsi_disk *sdkp, int start)
 	if (!scsi_device_online(sdp))
 		return -ENODEV;
 
-	res = scsi_execute_req_flags(sdp, cmd, DMA_NONE, NULL, 0, &sshdr,
-			       SD_TIMEOUT, SD_MAX_RETRIES, NULL, 0, RQF_PM);
+	res = scsi_execute(sdp, cmd, DMA_NONE, NULL, 0, NULL, &sshdr,
+			SD_TIMEOUT, SD_MAX_RETRIES, 0, RQF_PM, NULL);
 	if (res) {
 		sd_print_result(sdkp, "Start/Stop Unit failed", res);
 		if (driver_byte(res) & DRIVER_SENSE)
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 8b721f431dd0df..dc6efbd1be8ef3 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -6915,9 +6915,9 @@ ufshcd_send_request_sense(struct ufs_hba *hba, struct scsi_device *sdp)
 		goto out;
 	}
 
-	ret = scsi_execute_req_flags(sdp, cmd, DMA_FROM_DEVICE, buffer,
-				UFSHCD_REQ_SENSE_SIZE, NULL,
-				msecs_to_jiffies(1000), 3, NULL, 0, RQF_PM);
+	ret = scsi_execute(sdp, cmd, DMA_FROM_DEVICE, buffer,
+			UFSHCD_REQ_SENSE_SIZE, NULL, NULL,
+			msecs_to_jiffies(1000), 3, 0, RQF_PM, NULL);
 	if (ret)
 		pr_err("%s: failed with err %d\n", __func__, ret);
 
@@ -6982,8 +6982,8 @@ static int ufshcd_set_dev_pwr_mode(struct ufs_hba *hba,
 	 * callbacks hence set the RQF_PM flag so that it doesn't resume the
 	 * already suspended childs.
 	 */
-	ret = scsi_execute_req_flags(sdp, cmd, DMA_NONE, NULL, 0, &sshdr,
-				     START_STOP_TIMEOUT, 0, NULL, 0, RQF_PM);
+	ret = scsi_execute(sdp, cmd, DMA_NONE, NULL, 0, NULL, &sshdr,
+			START_STOP_TIMEOUT, 0, 0, RQF_PM, NULL);
 	if (ret) {
 		sdev_printk(KERN_WARNING, sdp,
 			    "START_STOP failed for power mode: %d, result %x\n",
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index cb4c8c889da03f..6f22b39f1b0c3b 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -413,17 +413,13 @@ extern int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
 			unsigned char *sense, struct scsi_sense_hdr *sshdr,
 			int timeout, int retries, u64 flags,
 			req_flags_t rq_flags, int *resid);
-extern int scsi_execute_req_flags(struct scsi_device *sdev,
-	const unsigned char *cmd, int data_direction, void *buffer,
-	unsigned bufflen, struct scsi_sense_hdr *sshdr, int timeout,
-	int retries, int *resid, u64 flags, req_flags_t rq_flags);
 static inline int scsi_execute_req(struct scsi_device *sdev,
 	const unsigned char *cmd, int data_direction, void *buffer,
 	unsigned bufflen, struct scsi_sense_hdr *sshdr, int timeout,
 	int retries, int *resid)
 {
-	return scsi_execute_req_flags(sdev, cmd, data_direction, buffer,
-		bufflen, sshdr, timeout, retries, resid, 0, 0);
+	return scsi_execute(sdev, cmd, data_direction, buffer,
+		bufflen, NULL, sshdr, timeout, retries,  0, 0, resid);
 }
 extern void sdev_disable_disk_events(struct scsi_device *sdev);
 extern void sdev_enable_disk_events(struct scsi_device *sdev);

From 1afca6b5f31616d330fb64a0c87060640a75ff6a Mon Sep 17 00:00:00 2001
From: "Dupuis, Chad" <chad.dupuis@cavium.com>
Date: Thu, 23 Feb 2017 07:01:03 -0800
Subject: [PATCH 0192/1911] scsi: qedf: fixup compilation warning about
 atomic_t usage

Based on an original patch by Hannes Reinecke.

The driver didn't follow the atomic_t vs refcount_t change, and anyway
one should be using kref_read() instead of accessing the counter inside
an kref.

Fixes: 61d8658b4a435e ("scsi: qedf: Add QLogic FastLinQ offload FCoE driver framework.)
Cc: Hannes Reinecke <hare@suse.de>
Cc: Nilesh Javali <nilesh.javali@cavium.com>
Signed-off-by: Dupuis, Chad <chad.dupuis@cavium.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qedf/qedf_els.c | 6 +++---
 drivers/scsi/qedf/qedf_io.c  | 8 ++++----
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/scsi/qedf/qedf_els.c b/drivers/scsi/qedf/qedf_els.c
index 78f1c252b649d9..59f3e5c73a139b 100644
--- a/drivers/scsi/qedf/qedf_els.c
+++ b/drivers/scsi/qedf/qedf_els.c
@@ -183,7 +183,7 @@ static void qedf_rrq_compl(struct qedf_els_cb_arg *cb_arg)
 	    rrq_req->event != QEDF_IOREQ_EV_ELS_ERR_DETECT)
 		cancel_delayed_work_sync(&orig_io_req->timeout_work);
 
-	refcount = atomic_read(&orig_io_req->refcount.refcount);
+	refcount = kref_read(&orig_io_req->refcount);
 	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS, "rrq_compl: orig io = %p,"
 		   " orig xid = 0x%x, rrq_xid = 0x%x, refcount=%d\n",
 		   orig_io_req, orig_io_req->xid, rrq_req->xid, refcount);
@@ -474,7 +474,7 @@ static void qedf_srr_compl(struct qedf_els_cb_arg *cb_arg)
 	    srr_req->event != QEDF_IOREQ_EV_ELS_ERR_DETECT)
 		cancel_delayed_work_sync(&orig_io_req->timeout_work);
 
-	refcount = atomic_read(&orig_io_req->refcount.refcount);
+	refcount = kref_read(&orig_io_req->refcount);
 	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS, "Entered: orig_io=%p,"
 		   " orig_io_xid=0x%x, rec_xid=0x%x, refcount=%d\n",
 		   orig_io_req, orig_io_req->xid, srr_req->xid, refcount);
@@ -758,7 +758,7 @@ static void qedf_rec_compl(struct qedf_els_cb_arg *cb_arg)
 	    rec_req->event != QEDF_IOREQ_EV_ELS_ERR_DETECT)
 		cancel_delayed_work_sync(&orig_io_req->timeout_work);
 
-	refcount = atomic_read(&orig_io_req->refcount.refcount);
+	refcount = kref_read(&orig_io_req->refcount);
 	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS, "Entered: orig_io=%p,"
 		   " orig_io_xid=0x%x, rec_xid=0x%x, refcount=%d\n",
 		   orig_io_req, orig_io_req->xid, rec_req->xid, refcount);
diff --git a/drivers/scsi/qedf/qedf_io.c b/drivers/scsi/qedf/qedf_io.c
index 486c045ac8bb25..ee0dcf9d3aba78 100644
--- a/drivers/scsi/qedf/qedf_io.c
+++ b/drivers/scsi/qedf/qedf_io.c
@@ -998,7 +998,7 @@ static void qedf_trace_io(struct qedf_rport *fcport, struct qedf_ioreq *io_req,
 	io_log->sg_count = scsi_sg_count(sc_cmd);
 	io_log->result = sc_cmd->result;
 	io_log->jiffies = jiffies;
-	io_log->refcount = atomic_read(&io_req->refcount.refcount);
+	io_log->refcount = kref_read(&io_req->refcount);
 
 	if (direction == QEDF_IO_TRACE_REQ) {
 		/* For requests we only care abot the submission CPU */
@@ -1340,7 +1340,7 @@ void qedf_scsi_completion(struct qedf_ctx *qedf, struct fcoe_cqe *cqe,
 			/* Good I/O completion */
 			sc_cmd->result = DID_OK << 16;
 		} else {
-			refcount = atomic_read(&io_req->refcount.refcount);
+			refcount = kref_read(&io_req->refcount);
 			QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO,
 			    "%d:0:%d:%d xid=0x%0x op=0x%02x "
 			    "lba=%02x%02x%02x%02x cdb_status=%d "
@@ -1425,7 +1425,7 @@ void qedf_scsi_done(struct qedf_ctx *qedf, struct qedf_ioreq *io_req,
 	qedf_unmap_sg_list(qedf, io_req);
 
 	sc_cmd->result = result << 16;
-	refcount = atomic_read(&io_req->refcount.refcount);
+	refcount = kref_read(&io_req->refcount);
 	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO, "%d:0:%d:%d: Completing "
 	    "sc_cmd=%p result=0x%08x op=0x%02x lba=0x%02x%02x%02x%02x, "
 	    "allowed=%d retries=%d refcount=%d.\n",
@@ -1556,7 +1556,7 @@ static void qedf_flush_els_req(struct qedf_ctx *qedf,
 {
 	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO,
 	    "Flushing ELS request xid=0x%x refcount=%d.\n", els_req->xid,
-	    atomic_read(&els_req->refcount.refcount));
+	    kref_read(&els_req->refcount));
 
 	/*
 	 * Need to distinguish this from a timeout when calling the

From 8cc311167c22f9365304b2b20225df2d881c8843 Mon Sep 17 00:00:00 2001
From: Andrzej Hajda <a.hajda@samsung.com>
Date: Mon, 20 Feb 2017 19:57:57 +0100
Subject: [PATCH 0193/1911] PM / OPP: fix off-by-one bug in
 dev_pm_opp_get_max_volt_latency loop

Reading array at given index before checking if index is valid results in
illegal memory access.

The bug was detected using KASAN framework.

Signed-off-by: Andrzej Hajda <a.hajda@samsung.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/opp/core.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c
index 91ec3232d63004..dae61720b31402 100644
--- a/drivers/base/power/opp/core.c
+++ b/drivers/base/power/opp/core.c
@@ -231,7 +231,8 @@ unsigned long dev_pm_opp_get_max_volt_latency(struct device *dev)
 	 * The caller needs to ensure that opp_table (and hence the regulator)
 	 * isn't freed, while we are executing this routine.
 	 */
-	for (i = 0; reg = regulators[i], i < count; i++) {
+	for (i = 0; i < count; i++) {
+		reg = regulators[i];
 		ret = regulator_set_voltage_time(reg, uV[i].min, uV[i].max);
 		if (ret > 0)
 			latency_ns += ret * 1000;

From 1d55abc0e98a0bf35f3af80665aac564e3b30572 Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Wed, 22 Feb 2017 11:31:41 +0100
Subject: [PATCH 0194/1911] scsi: mpt3sas: switch to pci_alloc_irq_vectors

Cleanup the MSI-X handling allowing us to use the PCI-layer provided
vector allocation.

Signed-off-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Sreekanth Reddy <sreekanth.reddy@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/mpt3sas/mpt3sas_base.c | 105 +++++++++++++---------------
 drivers/scsi/mpt3sas/mpt3sas_base.h |   2 -
 2 files changed, 48 insertions(+), 59 deletions(-)

diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c
index a3fe1fb55c17c3..5b7aec5d575a39 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.c
@@ -1148,7 +1148,7 @@ mpt3sas_base_sync_reply_irqs(struct MPT3SAS_ADAPTER *ioc)
 		/* TMs are on msix_index == 0 */
 		if (reply_q->msix_index == 0)
 			continue;
-		synchronize_irq(reply_q->vector);
+		synchronize_irq(pci_irq_vector(ioc->pdev, reply_q->msix_index));
 	}
 }
 
@@ -1837,11 +1837,8 @@ _base_free_irq(struct MPT3SAS_ADAPTER *ioc)
 
 	list_for_each_entry_safe(reply_q, next, &ioc->reply_queue_list, list) {
 		list_del(&reply_q->list);
-		if (smp_affinity_enable) {
-			irq_set_affinity_hint(reply_q->vector, NULL);
-			free_cpumask_var(reply_q->affinity_hint);
-		}
-		free_irq(reply_q->vector, reply_q);
+		free_irq(pci_irq_vector(ioc->pdev, reply_q->msix_index),
+			 reply_q);
 		kfree(reply_q);
 	}
 }
@@ -1850,13 +1847,13 @@ _base_free_irq(struct MPT3SAS_ADAPTER *ioc)
  * _base_request_irq - request irq
  * @ioc: per adapter object
  * @index: msix index into vector table
- * @vector: irq vector
  *
  * Inserting respective reply_queue into the list.
  */
 static int
-_base_request_irq(struct MPT3SAS_ADAPTER *ioc, u8 index, u32 vector)
+_base_request_irq(struct MPT3SAS_ADAPTER *ioc, u8 index)
 {
+	struct pci_dev *pdev = ioc->pdev;
 	struct adapter_reply_queue *reply_q;
 	int r;
 
@@ -1868,14 +1865,6 @@ _base_request_irq(struct MPT3SAS_ADAPTER *ioc, u8 index, u32 vector)
 	}
 	reply_q->ioc = ioc;
 	reply_q->msix_index = index;
-	reply_q->vector = vector;
-
-	if (smp_affinity_enable) {
-		if (!zalloc_cpumask_var(&reply_q->affinity_hint, GFP_KERNEL)) {
-			kfree(reply_q);
-			return -ENOMEM;
-		}
-	}
 
 	atomic_set(&reply_q->busy, 0);
 	if (ioc->msix_enable)
@@ -1884,12 +1873,11 @@ _base_request_irq(struct MPT3SAS_ADAPTER *ioc, u8 index, u32 vector)
 	else
 		snprintf(reply_q->name, MPT_NAME_LENGTH, "%s%d",
 		    ioc->driver_name, ioc->id);
-	r = request_irq(vector, _base_interrupt, IRQF_SHARED, reply_q->name,
-	    reply_q);
+	r = request_irq(pci_irq_vector(pdev, index), _base_interrupt,
+			IRQF_SHARED, reply_q->name, reply_q);
 	if (r) {
 		pr_err(MPT3SAS_FMT "unable to allocate interrupt %d!\n",
-		    reply_q->name, vector);
-		free_cpumask_var(reply_q->affinity_hint);
+		       reply_q->name, pci_irq_vector(pdev, index));
 		kfree(reply_q);
 		return -EBUSY;
 	}
@@ -1925,6 +1913,21 @@ _base_assign_reply_queues(struct MPT3SAS_ADAPTER *ioc)
 	if (!nr_msix)
 		return;
 
+	if (smp_affinity_enable) {
+		list_for_each_entry(reply_q, &ioc->reply_queue_list, list) {
+			const cpumask_t *mask = pci_irq_get_affinity(ioc->pdev,
+							reply_q->msix_index);
+			if (!mask) {
+				pr_warn(MPT3SAS_FMT "no affinity for msi %x\n",
+					ioc->name, reply_q->msix_index);
+				continue;
+			}
+
+			for_each_cpu(cpu, mask)
+				ioc->cpu_msix_table[cpu] = reply_q->msix_index;
+		}
+		return;
+	}
 	cpu = cpumask_first(cpu_online_mask);
 
 	list_for_each_entry(reply_q, &ioc->reply_queue_list, list) {
@@ -1938,18 +1941,9 @@ _base_assign_reply_queues(struct MPT3SAS_ADAPTER *ioc)
 			group++;
 
 		for (i = 0 ; i < group ; i++) {
-			ioc->cpu_msix_table[cpu] = index;
-			if (smp_affinity_enable)
-				cpumask_or(reply_q->affinity_hint,
-				   reply_q->affinity_hint, get_cpu_mask(cpu));
+			ioc->cpu_msix_table[cpu] = reply_q->msix_index;
 			cpu = cpumask_next(cpu, cpu_online_mask);
 		}
-		if (smp_affinity_enable)
-			if (irq_set_affinity_hint(reply_q->vector,
-					   reply_q->affinity_hint))
-				dinitprintk(ioc, pr_info(MPT3SAS_FMT
-				 "Err setting affinity hint to irq vector %d\n",
-				 ioc->name, reply_q->vector));
 		index++;
 	}
 }
@@ -1976,10 +1970,10 @@ _base_disable_msix(struct MPT3SAS_ADAPTER *ioc)
 static int
 _base_enable_msix(struct MPT3SAS_ADAPTER *ioc)
 {
-	struct msix_entry *entries, *a;
 	int r;
 	int i, local_max_msix_vectors;
 	u8 try_msix = 0;
+	unsigned int irq_flags = PCI_IRQ_MSIX;
 
 	if (msix_disable == -1 || msix_disable == 0)
 		try_msix = 1;
@@ -1991,7 +1985,7 @@ _base_enable_msix(struct MPT3SAS_ADAPTER *ioc)
 		goto try_ioapic;
 
 	ioc->reply_queue_count = min_t(int, ioc->cpu_count,
-	    ioc->msix_vector_count);
+		ioc->msix_vector_count);
 
 	printk(MPT3SAS_FMT "MSI-X vectors supported: %d, no of cores"
 	  ": %d, max_msix_vectors: %d\n", ioc->name, ioc->msix_vector_count,
@@ -2002,56 +1996,51 @@ _base_enable_msix(struct MPT3SAS_ADAPTER *ioc)
 	else
 		local_max_msix_vectors = max_msix_vectors;
 
-	if (local_max_msix_vectors > 0) {
+	if (local_max_msix_vectors > 0)
 		ioc->reply_queue_count = min_t(int, local_max_msix_vectors,
 			ioc->reply_queue_count);
-		ioc->msix_vector_count = ioc->reply_queue_count;
-	} else if (local_max_msix_vectors == 0)
+	else if (local_max_msix_vectors == 0)
 		goto try_ioapic;
 
 	if (ioc->msix_vector_count < ioc->cpu_count)
 		smp_affinity_enable = 0;
 
-	entries = kcalloc(ioc->reply_queue_count, sizeof(struct msix_entry),
-	    GFP_KERNEL);
-	if (!entries) {
-		dfailprintk(ioc, pr_info(MPT3SAS_FMT
-			"kcalloc failed @ at %s:%d/%s() !!!\n",
-			ioc->name, __FILE__, __LINE__, __func__));
-		goto try_ioapic;
-	}
+	if (smp_affinity_enable)
+		irq_flags |= PCI_IRQ_AFFINITY;
 
-	for (i = 0, a = entries; i < ioc->reply_queue_count; i++, a++)
-		a->entry = i;
-
-	r = pci_enable_msix_exact(ioc->pdev, entries, ioc->reply_queue_count);
-	if (r) {
+	r = pci_alloc_irq_vectors(ioc->pdev, 1, ioc->reply_queue_count,
+				  irq_flags);
+	if (r < 0) {
 		dfailprintk(ioc, pr_info(MPT3SAS_FMT
-			"pci_enable_msix_exact failed (r=%d) !!!\n",
+			"pci_alloc_irq_vectors failed (r=%d) !!!\n",
 			ioc->name, r));
-		kfree(entries);
 		goto try_ioapic;
 	}
 
 	ioc->msix_enable = 1;
-	for (i = 0, a = entries; i < ioc->reply_queue_count; i++, a++) {
-		r = _base_request_irq(ioc, i, a->vector);
+	ioc->reply_queue_count = r;
+	for (i = 0; i < ioc->reply_queue_count; i++) {
+		r = _base_request_irq(ioc, i);
 		if (r) {
 			_base_free_irq(ioc);
 			_base_disable_msix(ioc);
-			kfree(entries);
 			goto try_ioapic;
 		}
 	}
 
-	kfree(entries);
 	return 0;
 
 /* failback to io_apic interrupt routing */
  try_ioapic:
 
 	ioc->reply_queue_count = 1;
-	r = _base_request_irq(ioc, 0, ioc->pdev->irq);
+	r = pci_alloc_irq_vectors(ioc->pdev, 1, 1, PCI_IRQ_LEGACY);
+	if (r < 0) {
+		dfailprintk(ioc, pr_info(MPT3SAS_FMT
+			"pci_alloc_irq_vector(legacy) failed (r=%d) !!!\n",
+			ioc->name, r));
+	} else
+		r = _base_request_irq(ioc, 0);
 
 	return r;
 }
@@ -2222,7 +2211,8 @@ mpt3sas_base_map_resources(struct MPT3SAS_ADAPTER *ioc)
 	list_for_each_entry(reply_q, &ioc->reply_queue_list, list)
 		pr_info(MPT3SAS_FMT "%s: IRQ %d\n",
 		    reply_q->name,  ((ioc->msix_enable) ? "PCI-MSI-X enabled" :
-		    "IO-APIC enabled"), reply_q->vector);
+		    "IO-APIC enabled"),
+		    pci_irq_vector(ioc->pdev, reply_q->msix_index));
 
 	pr_info(MPT3SAS_FMT "iomem(0x%016llx), mapped(0x%p), size(%d)\n",
 	    ioc->name, (unsigned long long)chip_phys, ioc->chip, memap_sz);
@@ -5357,7 +5347,8 @@ mpt3sas_base_attach(struct MPT3SAS_ADAPTER *ioc)
 		    sizeof(resource_size_t *), GFP_KERNEL);
 		if (!ioc->reply_post_host_index) {
 			dfailprintk(ioc, pr_info(MPT3SAS_FMT "allocation "
-				"for cpu_msix_table failed!!!\n", ioc->name));
+				"for reply_post_host_index failed!!!\n",
+				ioc->name));
 			r = -ENOMEM;
 			goto out_free_resources;
 		}
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h
index 4ab634fc27df92..7fe7e6ed595b79 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.h
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.h
@@ -731,12 +731,10 @@ struct _event_ack_list {
 struct adapter_reply_queue {
 	struct MPT3SAS_ADAPTER	*ioc;
 	u8			msix_index;
-	unsigned int		vector;
 	u32			reply_post_host_index;
 	Mpi2ReplyDescriptorsUnion_t *reply_post_free;
 	char			name[MPT_NAME_LENGTH];
 	atomic_t		busy;
-	cpumask_var_t		affinity_hint;
 	struct list_head	list;
 };
 

From 17b4eaf475684bd12aeecf358a7ea6e3bec871a5 Mon Sep 17 00:00:00 2001
From: Tang Yuantian <Yuantian.Tang@nxp.com>
Date: Tue, 21 Feb 2017 14:51:24 +0800
Subject: [PATCH 0195/1911] cpufreq: qoriq: clean up unused code

This snip code is not needed anymore since its user
get_hard_smp_processor_id() has been removed.

Signed-off-by: Tang Yuantian <yuantian.tang@nxp.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/qoriq-cpufreq.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/cpufreq/qoriq-cpufreq.c b/drivers/cpufreq/qoriq-cpufreq.c
index a6fefac8afe49a..bfec1bcd3835f3 100644
--- a/drivers/cpufreq/qoriq-cpufreq.c
+++ b/drivers/cpufreq/qoriq-cpufreq.c
@@ -23,10 +23,6 @@
 #include <linux/slab.h>
 #include <linux/smp.h>
 
-#if !defined(CONFIG_ARM)
-#include <asm/smp.h>	/* for get_hard_smp_processor_id() in UP configs */
-#endif
-
 /**
  * struct cpu_data
  * @pclk: the parent clock of cpu

From a56e574067c20d01d8fc74863fa187dd66da7b94 Mon Sep 17 00:00:00 2001
From: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Date: Wed, 22 Feb 2017 07:23:13 -0800
Subject: [PATCH 0196/1911] scsi: aacraid: Fixed expander hotplug for SMART
 family

Current driver Hotplug processing code skips over Enclosure channel,
therefore any addition/removal of expander enclosure is not processed.
Additionally device addition code relies on older device type, which
prevents the hotplug of adapter expanders.

Fixed by removing code that skips over Enclosure channels and using the
latest device type for addition or removal or enclosure expanders.

Fixes: 6223a39fe6fbbeef (scsi: aacraid: Added support for hotplug)
Signed-off-by: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Reviewed-by: Dave Carroll <david.carroll@microsemi.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/aacraid/commsup.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c
index a2ea70d8a13ac6..1994c7445b54fd 100644
--- a/drivers/scsi/aacraid/commsup.c
+++ b/drivers/scsi/aacraid/commsup.c
@@ -1908,9 +1908,6 @@ static void aac_resolve_luns(struct aac_dev *dev)
 	for (bus = 0; bus < AAC_MAX_BUSES; bus++) {
 		for (target = 0; target < AAC_MAX_TARGETS; target++) {
 
-			if (aac_phys_to_logical(bus) == ENCLOSURE_CHANNEL)
-				continue;
-
 			if (bus == CONTAINER_CHANNEL)
 				channel = CONTAINER_CHANNEL;
 			else
@@ -1922,7 +1919,7 @@ static void aac_resolve_luns(struct aac_dev *dev)
 			sdev = scsi_device_lookup(dev->scsi_host_ptr, channel,
 					target, 0);
 
-			if (!sdev && devtype)
+			if (!sdev && new_devtype)
 				scsi_add_device(dev->scsi_host_ptr, channel,
 						target, 0);
 			else if (sdev && new_devtype != devtype)

From 35bfa99e81f45079728f6b2ac553dabb0bc62c7d Mon Sep 17 00:00:00 2001
From: Christophe Jaillet <christophe.jaillet@wanadoo.fr>
Date: Tue, 21 Feb 2017 21:41:53 +0100
Subject: [PATCH 0197/1911] PM / runtime: Fix some typos

Signed-off-by: Christophe Jaillet <christophe.jaillet@wanadoo.fr>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/power/runtime_pm.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt
index 4870980e967e01..64546eb9a16a11 100644
--- a/Documentation/power/runtime_pm.txt
+++ b/Documentation/power/runtime_pm.txt
@@ -100,7 +100,7 @@ knows what to do to handle the device).
   * If the suspend callback returns an error code different from -EBUSY and
     -EAGAIN, the PM core regards this as a fatal error and will refuse to run
     the helper functions described in Section 4 for the device until its status
-    is directly set to  either'active', or 'suspended' (the PM core provides
+    is directly set to  either 'active', or 'suspended' (the PM core provides
     special helper functions for this purpose).
 
 In particular, if the driver requires remote wakeup capability (i.e. hardware
@@ -217,7 +217,7 @@ defined in include/linux/pm.h:
       one to complete
 
   spinlock_t lock;
-    - lock used for synchronisation
+    - lock used for synchronization
 
   atomic_t usage_count;
     - the usage counter of the device
@@ -565,7 +565,7 @@ appropriate to ensure that the device is not put back to sleep during the
 probe. This can happen with systems such as the network device layer.
 
 It may be desirable to suspend the device once ->probe() has finished.
-Therefore the driver core uses the asyncronous pm_request_idle() to submit a
+Therefore the driver core uses the asynchronous pm_request_idle() to submit a
 request to execute the subsystem-level idle callback for the device at that
 time.  A driver that makes use of the runtime autosuspend feature, may want to
 update the last busy mark before returning from ->probe().

From d08d1b27fe2a7f6923952613f5fab56ae47a6f5b Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Wed, 22 Feb 2017 13:58:52 +0530
Subject: [PATCH 0198/1911] PM / QoS: Remove global notifiers

They were never used in the kernel, so get rid of them.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/power/pm_qos_interface.txt | 13 +-----
 drivers/base/power/qos.c                 | 50 ++----------------------
 include/linux/pm_qos.h                   |  8 ----
 3 files changed, 5 insertions(+), 66 deletions(-)

diff --git a/Documentation/power/pm_qos_interface.txt b/Documentation/power/pm_qos_interface.txt
index 129f7c0e148398..21d2d48f87a254 100644
--- a/Documentation/power/pm_qos_interface.txt
+++ b/Documentation/power/pm_qos_interface.txt
@@ -163,8 +163,7 @@ of flags and remove sysfs attributes pm_qos_no_power_off and pm_qos_remote_wakeu
 under the device's power directory.
 
 Notification mechanisms:
-The per-device PM QoS framework has 2 different and distinct notification trees:
-a per-device notification tree and a global notification tree.
+The per-device PM QoS framework has a per-device notification tree.
 
 int dev_pm_qos_add_notifier(device, notifier):
 Adds a notification callback function for the device.
@@ -174,16 +173,6 @@ is changed (for resume latency device PM QoS only).
 int dev_pm_qos_remove_notifier(device, notifier):
 Removes the notification callback function for the device.
 
-int dev_pm_qos_add_global_notifier(notifier):
-Adds a notification callback function in the global notification tree of the
-framework.
-The callback is called when the aggregated value for any device is changed
-(for resume latency device PM QoS only).
-
-int dev_pm_qos_remove_global_notifier(notifier):
-Removes the notification callback function from the global notification tree
-of the framework.
-
 
 Active state latency tolerance
 
diff --git a/drivers/base/power/qos.c b/drivers/base/power/qos.c
index d888d9869b6a52..271bec73185ee5 100644
--- a/drivers/base/power/qos.c
+++ b/drivers/base/power/qos.c
@@ -17,12 +17,9 @@
  *
  * This QoS design is best effort based. Dependents register their QoS needs.
  * Watchers register to keep track of the current QoS needs of the system.
- * Watchers can register different types of notification callbacks:
- *  . a per-device notification callback using the dev_pm_qos_*_notifier API.
- *    The notification chain data is stored in the per-device constraint
- *    data struct.
- *  . a system-wide notification callback using the dev_pm_qos_*_global_notifier
- *    API. The notification chain data is stored in a static variable.
+ * Watchers can register a per-device notification callback using the
+ * dev_pm_qos_*_notifier API. The notification chain data is stored in the
+ * per-device constraint data struct.
  *
  * Note about the per-device constraint data struct allocation:
  * . The per-device constraints data struct ptr is tored into the device
@@ -49,8 +46,6 @@
 static DEFINE_MUTEX(dev_pm_qos_mtx);
 static DEFINE_MUTEX(dev_pm_qos_sysfs_mtx);
 
-static BLOCKING_NOTIFIER_HEAD(dev_pm_notifiers);
-
 /**
  * __dev_pm_qos_flags - Check PM QoS flags for a given device.
  * @dev: Device to check the PM QoS flags for.
@@ -135,8 +130,7 @@ s32 dev_pm_qos_read_value(struct device *dev)
  * @value: Value to assign to the QoS request.
  *
  * Internal function to update the constraints list using the PM QoS core
- * code and if needed call the per-device and the global notification
- * callbacks
+ * code and if needed call the per-device callbacks.
  */
 static int apply_constraint(struct dev_pm_qos_request *req,
 			    enum pm_qos_req_action action, s32 value)
@@ -148,12 +142,6 @@ static int apply_constraint(struct dev_pm_qos_request *req,
 	case DEV_PM_QOS_RESUME_LATENCY:
 		ret = pm_qos_update_target(&qos->resume_latency,
 					   &req->data.pnode, action, value);
-		if (ret) {
-			value = pm_qos_read_value(&qos->resume_latency);
-			blocking_notifier_call_chain(&dev_pm_notifiers,
-						     (unsigned long)value,
-						     req);
-		}
 		break;
 	case DEV_PM_QOS_LATENCY_TOLERANCE:
 		ret = pm_qos_update_target(&qos->latency_tolerance,
@@ -535,36 +523,6 @@ int dev_pm_qos_remove_notifier(struct device *dev,
 }
 EXPORT_SYMBOL_GPL(dev_pm_qos_remove_notifier);
 
-/**
- * dev_pm_qos_add_global_notifier - sets notification entry for changes to
- * target value of the PM QoS constraints for any device
- *
- * @notifier: notifier block managed by caller.
- *
- * Will register the notifier into a notification chain that gets called
- * upon changes to the target value for any device.
- */
-int dev_pm_qos_add_global_notifier(struct notifier_block *notifier)
-{
-	return blocking_notifier_chain_register(&dev_pm_notifiers, notifier);
-}
-EXPORT_SYMBOL_GPL(dev_pm_qos_add_global_notifier);
-
-/**
- * dev_pm_qos_remove_global_notifier - deletes notification for changes to
- * target value of PM QoS constraints for any device
- *
- * @notifier: notifier block to be removed.
- *
- * Will remove the notifier from the notification chain that gets called
- * upon changes to the target value for any device.
- */
-int dev_pm_qos_remove_global_notifier(struct notifier_block *notifier)
-{
-	return blocking_notifier_chain_unregister(&dev_pm_notifiers, notifier);
-}
-EXPORT_SYMBOL_GPL(dev_pm_qos_remove_global_notifier);
-
 /**
  * dev_pm_qos_add_ancestor_request - Add PM QoS request for device's ancestor.
  * @dev: Device whose ancestor to add the request for.
diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h
index d4d34791e4635f..3e2547d6e207c3 100644
--- a/include/linux/pm_qos.h
+++ b/include/linux/pm_qos.h
@@ -146,8 +146,6 @@ int dev_pm_qos_add_notifier(struct device *dev,
 			    struct notifier_block *notifier);
 int dev_pm_qos_remove_notifier(struct device *dev,
 			       struct notifier_block *notifier);
-int dev_pm_qos_add_global_notifier(struct notifier_block *notifier);
-int dev_pm_qos_remove_global_notifier(struct notifier_block *notifier);
 void dev_pm_qos_constraints_init(struct device *dev);
 void dev_pm_qos_constraints_destroy(struct device *dev);
 int dev_pm_qos_add_ancestor_request(struct device *dev,
@@ -199,12 +197,6 @@ static inline int dev_pm_qos_add_notifier(struct device *dev,
 static inline int dev_pm_qos_remove_notifier(struct device *dev,
 					     struct notifier_block *notifier)
 			{ return 0; }
-static inline int dev_pm_qos_add_global_notifier(
-					struct notifier_block *notifier)
-			{ return 0; }
-static inline int dev_pm_qos_remove_global_notifier(
-					struct notifier_block *notifier)
-			{ return 0; }
 static inline void dev_pm_qos_constraints_init(struct device *dev)
 {
 	dev->power.power_state = PMSG_ON;

From 669a311b6ecc4eeb367a269bdd5f4e143caddb9c Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 23 Feb 2017 10:57:23 +0000
Subject: [PATCH 0199/1911] scsi: qla2xxx: fix spelling mistake: "seperator" ->
 "separator"

Trivial fix to spelling mistake in pr_err message

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Acked-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qla2xxx/tcm_qla2xxx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
index 3084983c128720..fbbe5abd6ddb68 100644
--- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c
+++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
@@ -1738,7 +1738,7 @@ static struct se_wwn *tcm_qla2xxx_npiv_make_lport(
 
 	p = strchr(tmp, '@');
 	if (!p) {
-		pr_err("Unable to locate NPIV '@' seperator\n");
+		pr_err("Unable to locate NPIV '@' separator\n");
 		return ERR_PTR(-EINVAL);
 	}
 	*p++ = '\0';

From ebe73647fb3895b75544cecdb6766a517fbf03d6 Mon Sep 17 00:00:00 2001
From: Don Brace <don.brace@microsemi.com>
Date: Thu, 23 Feb 2017 08:57:20 -0600
Subject: [PATCH 0200/1911] scsi: cciss: correct check map error.

Remove device driver failed to check map error messages

Reported-by: Johnny Bieren <jbieren@redhat.com>
Tested-by: Johnny Bieren <jbieren@redhat.com>
Reviewed-by: Scott Teel <scott.teel@microsemi.com>
Signed-off-by: Don Brace <don.brace@microsemi.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/block/cciss.c | 32 ++++++++++++++++++++++++++++----
 1 file changed, 28 insertions(+), 4 deletions(-)

diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 27d613795653bd..8e1a4554951c0d 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -348,7 +348,7 @@ static void cciss_unmap_sg_chain_block(ctlr_info_t *h, CommandList_struct *c)
 	pci_unmap_single(h->pdev, temp64.val, chain_sg->Len, PCI_DMA_TODEVICE);
 }
 
-static void cciss_map_sg_chain_block(ctlr_info_t *h, CommandList_struct *c,
+static int cciss_map_sg_chain_block(ctlr_info_t *h, CommandList_struct *c,
 	SGDescriptor_struct *chain_block, int len)
 {
 	SGDescriptor_struct *chain_sg;
@@ -359,8 +359,16 @@ static void cciss_map_sg_chain_block(ctlr_info_t *h, CommandList_struct *c,
 	chain_sg->Len = len;
 	temp64.val = pci_map_single(h->pdev, chain_block, len,
 				PCI_DMA_TODEVICE);
+	if (dma_mapping_error(&h->pdev->dev, temp64.val)) {
+		dev_warn(&h->pdev->dev,
+			"%s: error mapping chain block for DMA\n",
+			__func__);
+		return -1;
+	}
 	chain_sg->Addr.lower = temp64.val32.lower;
 	chain_sg->Addr.upper = temp64.val32.upper;
+
+	return 0;
 }
 
 #include "cciss_scsi.c"		/* For SCSI tape support */
@@ -3369,15 +3377,31 @@ static void do_cciss_request(struct request_queue *q)
 		temp64.val = (__u64) pci_map_page(h->pdev, sg_page(&tmp_sg[i]),
 						tmp_sg[i].offset,
 						tmp_sg[i].length, dir);
+		if (dma_mapping_error(&h->pdev->dev, temp64.val)) {
+			dev_warn(&h->pdev->dev,
+				"%s: error mapping page for DMA\n", __func__);
+			creq->errors = make_status_bytes(SAM_STAT_GOOD,
+							0, DRIVER_OK,
+							DID_SOFT_ERROR);
+			cmd_free(h, c);
+			return;
+		}
 		curr_sg[sg_index].Addr.lower = temp64.val32.lower;
 		curr_sg[sg_index].Addr.upper = temp64.val32.upper;
 		curr_sg[sg_index].Ext = 0;  /* we are not chaining */
 		++sg_index;
 	}
-	if (chained)
-		cciss_map_sg_chain_block(h, c, h->cmd_sg_list[c->cmdindex],
+	if (chained) {
+		if (cciss_map_sg_chain_block(h, c, h->cmd_sg_list[c->cmdindex],
 			(seg - (h->max_cmd_sgentries - 1)) *
-				sizeof(SGDescriptor_struct));
+				sizeof(SGDescriptor_struct))) {
+			creq->errors = make_status_bytes(SAM_STAT_GOOD,
+							0, DRIVER_OK,
+							DID_SOFT_ERROR);
+			cmd_free(h, c);
+			return;
+		}
+	}
 
 	/* track how many SG entries we are using */
 	if (seg > h->maxSG)

From 2559a1ef688f933835912c731bed2254146a9b04 Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@telegraphics.com.au>
Date: Thu, 23 Feb 2017 09:08:02 +1100
Subject: [PATCH 0201/1911] scsi: mac_scsi: Fix MAC_SCSI=m option when SCSI=m

The mac_scsi driver still gets disabled when SCSI=m. This should have
been fixed back when I enabled the tristate but I didn't see the bug.

Fixes: 6e9ae6d560e1 ("[PATCH] mac_scsi: Add module option to Kconfig")
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 8aa9bd34123e64..230043c1c90ffc 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -1480,7 +1480,7 @@ config ATARI_SCSI
 
 config MAC_SCSI
 	tristate "Macintosh NCR5380 SCSI"
-	depends on MAC && SCSI=y
+	depends on MAC && SCSI
 	select SCSI_SPI_ATTRS
 	help
 	  This is the NCR 5380 SCSI controller included on most of the 68030

From 4dd9920d991745c4a16f53a8f615f706fbe4b3f7 Mon Sep 17 00:00:00 2001
From: Robbie Ko <robbieko@synology.com>
Date: Thu, 5 Jan 2017 16:24:55 +0800
Subject: [PATCH 0202/1911] Btrfs: send, fix failure to rename top level inode
 due to name collision

Under certain situations, an incremental send operation can fail due to a
premature attempt to create a new top level inode (a direct child of the
subvolume/snapshot root) whose name collides with another inode that was
removed from the send snapshot.

Consider the following example scenario.

Parent snapshot:

  .                 (ino 256, gen 8)
  |---- a1/         (ino 257, gen 9)
  |---- a2/         (ino 258, gen 9)

Send snapshot:

  .                 (ino 256, gen 3)
  |---- a2/         (ino 257, gen 7)

In this scenario, when receiving the incremental send stream, the btrfs
receive command fails like this (ran in verbose mode, -vv argument):

  rmdir a1
  mkfile o257-7-0
  rename o257-7-0 -> a2
  ERROR: rename o257-7-0 -> a2 failed: Is a directory

What happens when computing the incremental send stream is:

1) An operation to remove the directory with inode number 257 and
   generation 9 is issued.

2) An operation to create the inode with number 257 and generation 7 is
   issued. This creates the inode with an orphanized name of "o257-7-0".

3) An operation rename the new inode 257 to its final name, "a2", is
   issued. This is incorrect because inode 258, which has the same name
   and it's a child of the same parent (root inode 256), was not yet
   processed and therefore no rmdir operation for it was yet issued.
   The rename operation is issued because we fail to detect that the
   name of the new inode 257 collides with inode 258, because their
   parent, a subvolume/snapshot root (inode 256) has a different
   generation in both snapshots.

So fix this by ignoring the generation value of a parent directory that
matches a root inode (number 256) when we are checking if the name of the
inode currently being processed collides with the name of some other
inode that was not yet processed.

We can achieve this scenario of different inodes with the same number but
different generation values either by mounting a filesystem with the inode
cache option (-o inode_cache) or by creating and sending snapshots across
different filesystems, like in the following example:

  $ mkfs.btrfs -f /dev/sdb
  $ mount /dev/sdb /mnt
  $ mkdir /mnt/a1
  $ mkdir /mnt/a2
  $ btrfs subvolume snapshot -r /mnt /mnt/snap1
  $ btrfs send /mnt/snap1 -f /tmp/1.snap
  $ umount /mnt

  $ mkfs.btrfs -f /dev/sdc
  $ mount /dev/sdc /mnt
  $ touch /mnt/a2
  $ btrfs subvolume snapshot -r /mnt /mnt/snap2
  $ btrfs receive /mnt -f /tmp/1.snap
  # Take note that once the filesystem is created, its current
  # generation has value 7 so the inode from the second snapshot has
  # a generation value of 7. And after receiving the first snapshot
  # the filesystem is at a generation value of 10, because the call to
  # create the second snapshot bumps the generation to 8 (the snapshot
  # creation ioctl does a transaction commit), the receive command calls
  # the snapshot creation ioctl to create the first snapshot, which bumps
  # the filesystem's generation to 9, and finally when the receive
  # operation finishes it calls an ioctl to transition the first snapshot
  # (snap1) from RW mode to RO mode, which does another transaction commit
  # and bumps the filesystem's generation to 10.
  $ rm -f /tmp/1.snap
  $ btrfs send /mnt/snap1 -f /tmp/1.snap
  $ btrfs send -p /mnt/snap1 /mnt/snap2 -f /tmp/2.snap
  $ umount /mnt

  $ mkfs.btrfs -f /dev/sdd
  $ mount /dev/sdd /mnt
  $ btrfs receive /mnt /tmp/1.snap
  # Receive of snapshot snap2 used to fail.
  $ btrfs receive /mnt /tmp/2.snap

Signed-off-by: Robbie Ko <robbieko@synology.com>
Reviewed-by: Filipe Manana <fdmanana@suse.com>
[Rewrote changelog to be more precise and clear]
Signed-off-by: Filipe Manana <fdmanana@suse.com>
---
 fs/btrfs/send.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index d145ce80462021..2ae32c4f7dd7e5 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -1681,6 +1681,9 @@ static int is_inode_existent(struct send_ctx *sctx, u64 ino, u64 gen)
 {
 	int ret;
 
+	if (ino == BTRFS_FIRST_FREE_OBJECTID)
+		return 1;
+
 	ret = get_cur_inode_state(sctx, ino, gen);
 	if (ret < 0)
 		goto out;
@@ -1866,7 +1869,7 @@ static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen,
 	 * not deleted and then re-created, if it was then we have no overwrite
 	 * and we can just unlink this entry.
 	 */
-	if (sctx->parent_root) {
+	if (sctx->parent_root && dir != BTRFS_FIRST_FREE_OBJECTID) {
 		ret = get_inode_info(sctx->parent_root, dir, NULL, &gen, NULL,
 				     NULL, NULL, NULL);
 		if (ret < 0 && ret != -ENOENT)

From fe9c798dbf4c78322549068255f611e4038a5b28 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Wed, 11 Jan 2017 02:15:39 +0000
Subject: [PATCH 0203/1911] Btrfs: incremental send, do not delay rename when
 parent inode is new

When we are checking if we need to delay the rename operation for an
inode we not checking if a parent inode that exists in the send and
parent snapshots is really the same inode or not, that is, we are not
comparing the generation number of the parent inode in the send and
parent snapshots. Not only this results in unnecessarily delaying a
rename operation but also can later on make us generate an incorrect
name for a new inode in the send snapshot that has the same number
as another inode in the parent snapshot but a different generation.

Here follows an example where this happens.

Parent snapshot:

 .                                                  (ino 256, gen 3)
 |--- dir258/                                       (ino 258, gen 7)
 |       |--- dir257/                               (ino 257, gen 7)
 |
 |--- dir259/                                       (ino 259, gen 7)

Send snapshot:

 .                                                  (ino 256, gen 3)
 |--- file258                                       (ino 258, gen 10)
 |
 |--- new_dir259/                                   (ino 259, gen 10)
          |--- dir257/                              (ino 257, gen 7)

The following steps happen when computing the incremental send stream:

1) When processing inode 257, its new parent is created using its orphan
   name (o257-21-0), and the rename operation for inode 257 is delayed
   because its new parent (inode 259) was not yet processed - this
   decision to delay the rename operation does not make much sense
   because the inode 259 in the send snapshot is a new inode, it's not
   the same as inode 259 in the parent snapshot.

2) When processing inode 258 we end up delaying its rmdir operation,
   because inode 257 was not yet renamed (moved away from the directory
   inode 258 represents). We also create the new inode 258 using its
   orphan name "o258-10-0", then rename it to its final name of "file258"
   and then issue a truncate operation for it. However this truncate
   operation contains an incorrect name, which corresponds to the orphan
   name and not to the final name, which makes the receiver fail. This
   happens because when we attempt to compute the inode's current name
   we verify that there's another inode with the same number (258) that
   has its rmdir operation pending and because of that we generate an
   orphan name for the new inode 258 (we do this in the function
   get_cur_path()).

Fix this by not delayed the rename operation of an inode if it has parents
with the same number but different generations in both snapshots.

The following steps reproduce this example scenario.

 $ mkfs.btrfs -f /dev/sdb
 $ mount /dev/sdb /mnt
 $ mkdir /mnt/dir257
 $ mkdir /mnt/dir258
 $ mkdir /mnt/dir259
 $ mv /mnt/dir257 /mnt/dir258/dir257
 $ btrfs subvolume snapshot -r /mnt /mnt/snap1

 $ mv /mnt/dir258/dir257 /mnt/dir257
 $ rmdir /mnt/dir258
 $ rmdir /mnt/dir259

 # Remount the filesystem so that the next created inodes will have the
 # numbers 258 and 259. This is because when a filesystem is mounted,
 # btrfs sets the subvolume's inode counter to a value corresponding to
 # the highest inode number in the subvolume plus 1. This inode counter
 # is used to assign a unique number to each new inode and it's
 # incremented by 1 after very inode creation.
 # Note: we unmount and then mount instead of doing a mount with
 # "-o remount" because otherwise the inode counter remains at value 260.
 $ umount /mnt
 $ mount /dev/sdb /mnt
 $ touch /mnt/file258
 $ mkdir /mnt/new_dir259
 $ mv /mnt/dir257 /mnt/new_dir259/dir257
 $ btrfs subvolume snapshot -r /mnt /mnt/snap2

 $ btrfs send /mnt/snap1 -f /tmp/1.snap
 $ btrfs send -p /mnt/snap1 /mnt/snap2 -f /tmp/2.snap

 $ umount /mnt
 $ mkfs.btrfs -f /dev/sdc
 $ mount /dev/sdc /mnt
 $ btrfs receive /mnt -f /tmo/1.snap
 $ btrfs receive /mnt -f /tmo/2.snap -vv
 receiving snapshot mysnap2 uuid=e059b6d1-7f55-f140-8d7c-9a3039d23c97, ctransid=10 parent_uuid=77e98cb6-8762-814f-9e05-e8ba877fc0b0, parent_ctransid=7
 utimes
 mkdir o259-10-0
 rename dir258 -> o258-7-0
 utimes
 mkfile o258-10-0
 rename o258-10-0 -> file258
 utimes
 truncate o258-10-0 size=0
 ERROR: truncate o258-10-0 failed: No such file or directory

Reported-by: Robbie Ko <robbieko@synology.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
---
 fs/btrfs/send.c | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 2ae32c4f7dd7e5..2742324514e4fd 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -3559,6 +3559,7 @@ static int wait_for_parent_move(struct send_ctx *sctx,
 {
 	int ret = 0;
 	u64 ino = parent_ref->dir;
+	u64 ino_gen = parent_ref->dir_gen;
 	u64 parent_ino_before, parent_ino_after;
 	struct fs_path *path_before = NULL;
 	struct fs_path *path_after = NULL;
@@ -3579,6 +3580,8 @@ static int wait_for_parent_move(struct send_ctx *sctx,
 	 * at get_cur_path()).
 	 */
 	while (ino > BTRFS_FIRST_FREE_OBJECTID) {
+		u64 parent_ino_after_gen;
+
 		if (is_waiting_for_move(sctx, ino)) {
 			/*
 			 * If the current inode is an ancestor of ino in the
@@ -3601,7 +3604,7 @@ static int wait_for_parent_move(struct send_ctx *sctx,
 		fs_path_reset(path_after);
 
 		ret = get_first_ref(sctx->send_root, ino, &parent_ino_after,
-				    NULL, path_after);
+				    &parent_ino_after_gen, path_after);
 		if (ret < 0)
 			goto out;
 		ret = get_first_ref(sctx->parent_root, ino, &parent_ino_before,
@@ -3618,10 +3621,20 @@ static int wait_for_parent_move(struct send_ctx *sctx,
 		if (ino > sctx->cur_ino &&
 		    (parent_ino_before != parent_ino_after || len1 != len2 ||
 		     memcmp(path_before->start, path_after->start, len1))) {
-			ret = 1;
-			break;
+			u64 parent_ino_gen;
+
+			ret = get_inode_info(sctx->parent_root, ino, NULL,
+					     &parent_ino_gen, NULL, NULL, NULL,
+					     NULL);
+			if (ret < 0)
+				goto out;
+			if (ino_gen == parent_ino_gen) {
+				ret = 1;
+				break;
+			}
 		}
 		ino = parent_ino_after;
+		ino_gen = parent_ino_after_gen;
 	}
 
 out:

From 0191410158840d6176de3267daa4604ad6a773fb Mon Sep 17 00:00:00 2001
From: Robbie Ko <robbieko@synology.com>
Date: Thu, 5 Jan 2017 16:24:58 +0800
Subject: [PATCH 0204/1911] Btrfs: incremental send, do not issue invalid rmdir
 operations

When both the parent and send snapshots have a directory inode with the
same number but different generations (therefore they are different
inodes) and both have an entry with the same name, an incremental send
stream will contain an invalid rmdir operation that refers to the
orphanized name of the inode from the parent snapshot.

The following example scenario shows how this happens.

Parent snapshot:

 .
 |---- d259_old/               (ino 259, gen 9)
 |         |---- d1/           (ino 258, gen 9)
 |
 |---- f                       (ino 257, gen 9)

Send snapshot:

 .
 |---- d258/                   (ino 258, gen 7)
 |---- d259/                   (ino 259, gen 7)
         |---- d1/             (ino 257, gen 7)

When the kernel is processing inode 258 it notices that in both snapshots
there is an inode numbered 259 that is a parent of an inode 258. However
it ignores the fact that the inodes numbered 259 have different generations
in both snapshots, which means they are effectively different inodes.
Then it checks that both inodes 259 have a dentry named "d1" and because
of that it issues a rmdir operation with orphanized name of the inode 258
from the parent snapshot. This happens at send.c:process_record_refs(),
which calls send.c:did_overwrite_first_ref() that returns true and because
of that later on at process_recorded_refs() such rmdir operation is issued
because the inode being currently processed (258) is a directory and it
was deleted in the send snapshot (and replaced with another inode that has
the same number and is a directory too).
Fix this issue by comparing the generations of parent directory inodes
that have the same number and make send.c:did_overwrite_first_ref() when
the generations are different.

The following steps reproduce the problem.

 $ mkfs.btrfs -f /dev/sdb
 $ mount /dev/sdb /mnt
 $ touch /mnt/f
 $ mkdir /mnt/d1
 $ mkdir /mnt/d259_old
 $ mv /mnt/d1 /mnt/d259_old/d1
 $ btrfs subvolume snapshot -r /mnt /mnt/snap1
 $ btrfs send /mnt/snap1 -f /tmp/1.snap
 $ umount /mnt

 $ mkfs.btrfs -f /dev/sdc
 $ mount /dev/sdc /mnt
 $ mkdir /mnt/d1
 $ mkdir /mnt/dir258
 $ mkdir /mnt/dir259
 $ mv /mnt/d1 /mnt/dir259/d1
 $ btrfs subvolume snapshot -r /mnt /mnt/snap2
 $ btrfs receive /mnt/ -f /tmp/1.snap
 # Take note that once the filesystem is created, its current
 # generation has value 7 so the inodes from the second snapshot all have
 # a generation value of 7. And after receiving the first snapshot
 # the filesystem is at a generation value of 10, because the call to
 # create the second snapshot bumps the generation to 8 (the snapshot
 # creation ioctl does a transaction commit), the receive command calls
 # the snapshot creation ioctl to create the first snapshot, which bumps
 # the filesystem's generation to 9, and finally when the receive
 # operation finishes it calls an ioctl to transition the first snapshot
 # (snap1) from RW mode to RO mode, which does another transaction commit
 # and bumps the filesystem's generation to 10. This means all the inodes
 # in the first snapshot (snap1) have a generation value of 9.
 $ rm -f /tmp/1.snap
 $ btrfs send /mnt/snap1 -f /tmp/1.snap
 $ btrfs send -p /mnt/snap1 /mnt/snap2 -f /tmp/2.snap
 $ umount /mnt

 $ mkfs.btrfs -f /dev/sdd
 $ mount /dev/sdd /mnt
 $ btrfs receive /mnt -f /tmp/1.snap
 $ btrfs receive -vv /mnt -f /tmp/2.snap
 receiving snapshot mysnap2 uuid=9c03962f-f620-0047-9f98-32e5a87116d9, ctransid=7 parent_uuid=d17a6e3f-14e5-df4f-be39-a7951a5399aa, parent_ctransid=9
 utimes
 unlink f
 mkdir o257-7-0
 mkdir o259-7-0
 rename o257-7-0 -> o259-7-0/d1
 chown o259-7-0/d1 - uid=0, gid=0
 chmod o259-7-0/d1 - mode=0755
 utimes o259-7-0/d1
 rmdir o258-9-0
 ERROR: rmdir o258-9-0 failed: No such file or directory

Signed-off-by: Robbie Ko <robbieko@synology.com>
Reviewed-by: Filipe Manana <fdmanana@suse.com>
[Rewrote changelog to be more precise and clear]
Signed-off-by: Filipe Manana <fdmanana@suse.com>
---
 fs/btrfs/send.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 2742324514e4fd..712922ea64d278 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -1937,6 +1937,19 @@ static int did_overwrite_ref(struct send_ctx *sctx,
 	if (ret <= 0)
 		goto out;
 
+	if (dir != BTRFS_FIRST_FREE_OBJECTID) {
+		ret = get_inode_info(sctx->send_root, dir, NULL, &gen, NULL,
+				     NULL, NULL, NULL);
+		if (ret < 0 && ret != -ENOENT)
+			goto out;
+		if (ret) {
+			ret = 0;
+			goto out;
+		}
+		if (gen != dir_gen)
+			goto out;
+	}
+
 	/* check if the ref was overwritten by another ref */
 	ret = lookup_dir_item_inode(sctx->send_root, dir, name, name_len,
 			&ow_inode, &other_type);

From 6f546216e9f9e95d6783547ce6113eb13e2daa54 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Sat, 28 Jan 2017 01:47:56 +0000
Subject: [PATCH 0205/1911] Btrfs: bulk delete checksum items in the same leaf

Very often we have the checksums for an extent spread in multiple items
in the checksums tree, and currently the algorithm to delete them starts
by looking for them one by one and then deleting them one by one, which
is not optimal since each deletion involves shifting all the other items
in the leaf and when the leaf reaches some low threshold, to move items
off the leaf into its left and right neighbor leafs. Also, after each
item deletion we release our search path and start a new search for other
checksums items.

So optimize this by deleting in bulk all the items in the same leaf that
contain checksums for the extent being freed.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: Liu Bo <bo.li.liu@oracle.com>
---
 fs/btrfs/file-item.c | 28 +++++++++++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index f7b9a92ad56d17..e35df48b438387 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -643,7 +643,33 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
 
 		/* delete the entire item, it is inside our range */
 		if (key.offset >= bytenr && csum_end <= end_byte) {
-			ret = btrfs_del_item(trans, root, path);
+			int del_nr = 1;
+
+			/*
+			 * Check how many csum items preceding this one in this
+			 * leaf correspond to our range and then delete them all
+			 * at once.
+			 */
+			if (key.offset > bytenr && path->slots[0] > 0) {
+				int slot = path->slots[0] - 1;
+
+				while (slot >= 0) {
+					struct btrfs_key pk;
+
+					btrfs_item_key_to_cpu(leaf, &pk, slot);
+					if (pk.offset < bytenr ||
+					    pk.type != BTRFS_EXTENT_CSUM_KEY ||
+					    pk.objectid !=
+					    BTRFS_EXTENT_CSUM_OBJECTID)
+						break;
+					path->slots[0] = slot;
+					del_nr++;
+					key.offset = pk.offset;
+					slot--;
+				}
+			}
+			ret = btrfs_del_items(trans, root, path,
+					      path->slots[0], del_nr);
 			if (ret)
 				goto out;
 			if (key.offset == bytenr)

From 91e1f56a8b3c94cb5ac9ce12b806134dc33c1eeb Mon Sep 17 00:00:00 2001
From: Robbie Ko <robbieko@synology.com>
Date: Fri, 7 Oct 2016 10:01:29 +0800
Subject: [PATCH 0206/1911] Btrfs: fix leak of subvolume writers counter

When falling back from a nocow write to a regular cow write, we were
leaking the subvolume writers counter in 2 situations, preventing
snapshot creation from ever completing in the future, as it waits
for that counter to go down to zero before the snapshot creation
starts.

Signed-off-by: Robbie Ko <robbieko@synology.com>
Reviewed-by: Filipe Manana <fdmanana@suse.com>
[Improved changelog and subject]
Signed-off-by: Filipe Manana <fdmanana@suse.com>
---
 fs/btrfs/inode.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index c38391e948d97e..4efe9d82944c1b 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1331,10 +1331,16 @@ static noinline int run_delalloc_nocow(struct inode *inode,
 			 * either valid or do not exist.
 			 */
 			if (csum_exist_in_range(fs_info, disk_bytenr,
-						num_bytes))
+						num_bytes)) {
+				if (!nolock)
+					btrfs_end_write_no_snapshoting(root);
 				goto out_check;
-			if (!btrfs_inc_nocow_writers(fs_info, disk_bytenr))
+			}
+			if (!btrfs_inc_nocow_writers(fs_info, disk_bytenr)) {
+				if (!nolock)
+					btrfs_end_write_no_snapshoting(root);
 				goto out_check;
+			}
 			nocow = 1;
 		} else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
 			extent_end = found_key.offset +

From 3168021cf9b4906f7bd9871770235f14c5a17715 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Wed, 1 Feb 2017 14:58:02 +0000
Subject: [PATCH 0207/1911] Btrfs: do not create explicit holes when replaying
 log tree if NO_HOLES enabled

We log holes explicitly by using file extent items, however when replaying
a log tree, if a logged file extent item corresponds to a hole and the
NO_HOLES feature is enabled we do not need to copy the file extent item
into the fs/subvolume tree, as the absence of such file extent items is
the purpose of the NO_HOLES feature. So skip the copying of file extent
items representing holes when the NO_HOLES feature is enabled.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
---
 fs/btrfs/tree-log.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 3806853cde08d8..3cc972330ab34a 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -673,6 +673,10 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
 		unsigned long dest_offset;
 		struct btrfs_key ins;
 
+		if (btrfs_file_extent_disk_bytenr(eb, item) == 0 &&
+		    btrfs_fs_incompat(fs_info, NO_HOLES))
+			goto update_inode;
+
 		ret = btrfs_insert_empty_item(trans, root, path, key,
 					      sizeof(*item));
 		if (ret)
@@ -825,6 +829,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
 	}
 
 	inode_add_bytes(inode, nbytes);
+update_inode:
 	ret = btrfs_update_inode(trans, root, inode);
 out:
 	if (inode)

From 5cdd7db6c5c9d33dc66ecdd81aa8020276c7d140 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Wed, 1 Feb 2017 22:39:50 +0000
Subject: [PATCH 0208/1911] Btrfs: fix assertion failure when freeing block
 groups at close_ctree()

At close_ctree() we free the block groups and then only after we wait for
any running worker kthreads to finish and shutdown the workqueues. This
behaviour is racy and it triggers an assertion failure when freeing block
groups because while we are doing it we can have for example a block group
caching kthread running, and in that case the block group's reference
count can still be greater than 1 by the time we assert its reference count
is 1, leading to an assertion failure:

[19041.198004] assertion failed: atomic_read(&block_group->count) == 1, file: fs/btrfs/extent-tree.c, line: 9799
[19041.200584] ------------[ cut here ]------------
[19041.201692] kernel BUG at fs/btrfs/ctree.h:3418!
[19041.202830] invalid opcode: 0000 [#1] PREEMPT SMP
[19041.203929] Modules linked in: btrfs xor raid6_pq dm_flakey dm_mod crc32c_generic ppdev sg psmouse acpi_cpufreq pcspkr parport_pc evdev tpm_tis parport tpm_tis_core i2c_piix4 i2c_core tpm serio_raw processor button loop autofs4 ext4 crc16 jbd2 mbcache sr_mod cdrom sd_mod ata_generic virtio_scsi ata_piix virtio_pci libata virtio_ring virtio e1000 scsi_mod floppy [last unloaded: btrfs]
[19041.208082] CPU: 6 PID: 29051 Comm: umount Not tainted 4.9.0-rc7-btrfs-next-36+ #1
[19041.208082] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.1-0-gb3ef39f-prebuilt.qemu-project.org 04/01/2014
[19041.208082] task: ffff88015f028980 task.stack: ffffc9000ad34000
[19041.208082] RIP: 0010:[<ffffffffa03e319e>]  [<ffffffffa03e319e>] assfail.constprop.41+0x1c/0x1e [btrfs]
[19041.208082] RSP: 0018:ffffc9000ad37d60  EFLAGS: 00010286
[19041.208082] RAX: 0000000000000061 RBX: ffff88015ecb4000 RCX: 0000000000000001
[19041.208082] RDX: ffff88023f392fb8 RSI: ffffffff817ef7ba RDI: 00000000ffffffff
[19041.208082] RBP: ffffc9000ad37d60 R08: 0000000000000001 R09: 0000000000000000
[19041.208082] R10: ffffc9000ad37cb0 R11: ffffffff82f2b66d R12: ffff88023431d170
[19041.208082] R13: ffff88015ecb40c0 R14: ffff88023431d000 R15: ffff88015ecb4100
[19041.208082] FS:  00007f44f3d42840(0000) GS:ffff88023f380000(0000) knlGS:0000000000000000
[19041.208082] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[19041.208082] CR2: 00007f65d623b000 CR3: 00000002166f2000 CR4: 00000000000006e0
[19041.208082] Stack:
[19041.208082]  ffffc9000ad37d98 ffffffffa035989f ffff88015ecb4000 ffff88015ecb5630
[19041.208082]  ffff88014f6be000 0000000000000000 00007ffcf0ba6a10 ffffc9000ad37df8
[19041.208082]  ffffffffa0368cd4 ffff88014e9658e0 ffffc9000ad37e08 ffffffff811a634d
[19041.208082] Call Trace:
[19041.208082]  [<ffffffffa035989f>] btrfs_free_block_groups+0x17f/0x392 [btrfs]
[19041.208082]  [<ffffffffa0368cd4>] close_ctree+0x1c5/0x2e1 [btrfs]
[19041.208082]  [<ffffffff811a634d>] ? evict_inodes+0x132/0x141
[19041.208082]  [<ffffffffa034356d>] btrfs_put_super+0x15/0x17 [btrfs]
[19041.208082]  [<ffffffff8118fc32>] generic_shutdown_super+0x6a/0xeb
[19041.208082]  [<ffffffff8119004f>] kill_anon_super+0x12/0x1c
[19041.208082]  [<ffffffffa0343370>] btrfs_kill_super+0x16/0x21 [btrfs]
[19041.208082]  [<ffffffff8118fad1>] deactivate_locked_super+0x3b/0x68
[19041.208082]  [<ffffffff8118fb34>] deactivate_super+0x36/0x39
[19041.208082]  [<ffffffff811a9946>] cleanup_mnt+0x58/0x76
[19041.208082]  [<ffffffff811a99a2>] __cleanup_mnt+0x12/0x14
[19041.208082]  [<ffffffff81071573>] task_work_run+0x6f/0x95
[19041.208082]  [<ffffffff81001897>] prepare_exit_to_usermode+0xa3/0xc1
[19041.208082]  [<ffffffff81001a23>] syscall_return_slowpath+0x16e/0x1d2
[19041.208082]  [<ffffffff814c607d>] entry_SYSCALL_64_fastpath+0xab/0xad
[19041.208082] Code: c7 ae a0 3e a0 48 89 e5 e8 4e 74 d4 e0 0f 0b 55 89 f1 48 c7 c2 0b a4 3e a0 48 89 fe 48 c7 c7 a4 a6 3e a0 48 89 e5 e8 30 74 d4 e0 <0f> 0b 55 31 d2 48 89 e5 e8 d5 b9 f7 ff 5d c3 48 63 f6 55 31 c9
[19041.208082] RIP  [<ffffffffa03e319e>] assfail.constprop.41+0x1c/0x1e [btrfs]
[19041.208082]  RSP <ffffc9000ad37d60>
[19041.279264] ---[ end trace 23330586f16f064d ]---

This started happening as of kernel 4.8, since commit f3bca8028bd9
("Btrfs: add ASSERT for block group's memory leak") introduced these
assertions.

So fix this by freeing the block groups only after waiting for all
worker kthreads to complete and shutdown the workqueues.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: Liu Bo <bo.li.liu@oracle.com>
---
 fs/btrfs/disk-io.c     | 6 +++---
 fs/btrfs/extent-tree.c | 9 ++++++---
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 2b06f557c176ea..d0e61d58b12190 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3261,7 +3261,6 @@ int open_ctree(struct super_block *sb,
 
 fail_block_groups:
 	btrfs_put_block_group_cache(fs_info);
-	btrfs_free_block_groups(fs_info);
 
 fail_tree_roots:
 	free_root_pointers(fs_info, 1);
@@ -3269,6 +3268,7 @@ int open_ctree(struct super_block *sb,
 
 fail_sb_buffer:
 	btrfs_stop_all_workers(fs_info);
+	btrfs_free_block_groups(fs_info);
 fail_alloc:
 fail_iput:
 	btrfs_mapping_tree_free(&fs_info->mapping_tree);
@@ -3977,8 +3977,6 @@ void close_ctree(struct btrfs_fs_info *fs_info)
 
 	btrfs_put_block_group_cache(fs_info);
 
-	btrfs_free_block_groups(fs_info);
-
 	/*
 	 * we must make sure there is not any read request to
 	 * submit after we stopping all workers.
@@ -3986,6 +3984,8 @@ void close_ctree(struct btrfs_fs_info *fs_info)
 	invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
 	btrfs_stop_all_workers(fs_info);
 
+	btrfs_free_block_groups(fs_info);
+
 	clear_bit(BTRFS_FS_OPEN, &fs_info->flags);
 	free_root_pointers(fs_info, 1);
 
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index c35b966335543c..438c7312de3381 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -9740,6 +9740,11 @@ void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
 	}
 }
 
+/*
+ * Must be called only after stopping all workers, since we could have block
+ * group caching kthreads running, and therefore they could race with us if we
+ * freed the block groups before stopping them.
+ */
 int btrfs_free_block_groups(struct btrfs_fs_info *info)
 {
 	struct btrfs_block_group_cache *block_group;
@@ -9779,9 +9784,6 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
 		list_del(&block_group->list);
 		up_write(&block_group->space_info->groups_sem);
 
-		if (block_group->cached == BTRFS_CACHE_STARTED)
-			wait_block_group_cache_done(block_group);
-
 		/*
 		 * We haven't cached this block group, which means we could
 		 * possibly have excluded extents on this block group.
@@ -9791,6 +9793,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
 			free_excluded_extents(info, block_group);
 
 		btrfs_remove_free_space_cache(block_group);
+		ASSERT(block_group->cached != BTRFS_CACHE_STARTED);
 		ASSERT(list_empty(&block_group->dirty_list));
 		ASSERT(list_empty(&block_group->io_list));
 		ASSERT(list_empty(&block_group->bg_list));

From a9b9477db2937934e469db800317ec3ef7e81b51 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Sat, 4 Feb 2017 17:12:00 +0000
Subject: [PATCH 0209/1911] Btrfs: fix use-after-free due to wrong order of
 destroying work queues

Before we destroy all work queues (and wait for their tasks to complete)
we were destroying the work queues used for metadata I/O operations, which
can result in a use-after-free problem because most tasks from all work
queues do metadata I/O operations. For example, the tasks from the caching
workers work queue (fs_info->caching_workers), which is destroyed only
after the work queue used for metadata reads (fs_info->endio_meta_workers)
is destroyed, do metadata reads, which result in attempts to queue tasks
into the later work queue, triggering a use-after-free with a trace like
the following:

[23114.613543] general protection fault: 0000 [#1] PREEMPT SMP
[23114.614442] Modules linked in: dm_thin_pool dm_persistent_data dm_bio_prison dm_bufio libcrc32c btrfs xor raid6_pq dm_flakey dm_mod crc32c_generic
acpi_cpufreq tpm_tis tpm_tis_core tpm ppdev parport_pc parport i2c_piix4 processor sg evdev i2c_core psmouse pcspkr serio_raw button loop autofs4 ext4 crc16
jbd2 mbcache sr_mod cdrom sd_mod ata_generic virtio_scsi ata_piix virtio_pci libata virtio_ring virtio e1000 scsi_mod floppy [last unloaded: scsi_debug]
[23114.616932] CPU: 9 PID: 4537 Comm: kworker/u32:8 Not tainted 4.9.0-rc7-btrfs-next-36+ #1
[23114.616932] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.1-0-gb3ef39f-prebuilt.qemu-project.org 04/01/2014
[23114.616932] Workqueue: btrfs-cache btrfs_cache_helper [btrfs]
[23114.616932] task: ffff880221d45780 task.stack: ffffc9000bc50000
[23114.616932] RIP: 0010:[<ffffffffa037c1bf>]  [<ffffffffa037c1bf>] btrfs_queue_work+0x2c/0x190 [btrfs]
[23114.616932] RSP: 0018:ffff88023f443d60  EFLAGS: 00010246
[23114.616932] RAX: 0000000000000000 RBX: 6b6b6b6b6b6b6b6b RCX: 0000000000000102
[23114.616932] RDX: ffffffffa0419000 RSI: ffff88011df534f0 RDI: ffff880101f01c00
[23114.616932] RBP: ffff88023f443d80 R08: 00000000000f7000 R09: 000000000000ffff
[23114.616932] R10: ffff88023f443d48 R11: 0000000000001000 R12: ffff88011df534f0
[23114.616932] R13: ffff880135963868 R14: 0000000000001000 R15: 0000000000001000
[23114.616932] FS:  0000000000000000(0000) GS:ffff88023f440000(0000) knlGS:0000000000000000
[23114.616932] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[23114.616932] CR2: 00007f0fb9f8e520 CR3: 0000000001a0b000 CR4: 00000000000006e0
[23114.616932] Stack:
[23114.616932]  ffff880101f01c00 ffff88011df534f0 ffff880135963868 0000000000001000
[23114.616932]  ffff88023f443da0 ffffffffa03470af ffff880149b37200 ffff880135963868
[23114.616932]  ffff88023f443db8 ffffffff8125293c ffff880149b37200 ffff88023f443de0
[23114.616932] Call Trace:
[23114.616932]  <IRQ> [23114.616932]  [<ffffffffa03470af>] end_workqueue_bio+0xd5/0xda [btrfs]
[23114.616932]  [<ffffffff8125293c>] bio_endio+0x54/0x57
[23114.616932]  [<ffffffffa0377929>] btrfs_end_bio+0xf7/0x106 [btrfs]
[23114.616932]  [<ffffffff8125293c>] bio_endio+0x54/0x57
[23114.616932]  [<ffffffff8125955f>] blk_update_request+0x21a/0x30f
[23114.616932]  [<ffffffffa0022316>] scsi_end_request+0x31/0x182 [scsi_mod]
[23114.616932]  [<ffffffffa00235fc>] scsi_io_completion+0x1ce/0x4c8 [scsi_mod]
[23114.616932]  [<ffffffffa001ba9d>] scsi_finish_command+0x104/0x10d [scsi_mod]
[23114.616932]  [<ffffffffa002311f>] scsi_softirq_done+0x101/0x10a [scsi_mod]
[23114.616932]  [<ffffffff8125fbd9>] blk_done_softirq+0x82/0x8d
[23114.616932]  [<ffffffff814c8a4b>] __do_softirq+0x1ab/0x412
[23114.616932]  [<ffffffff8105b01d>] irq_exit+0x49/0x99
[23114.616932]  [<ffffffff81035135>] smp_call_function_single_interrupt+0x24/0x26
[23114.616932]  [<ffffffff814c7ec9>] call_function_single_interrupt+0x89/0x90
[23114.616932]  <EOI> [23114.616932]  [<ffffffffa0023262>] ? scsi_request_fn+0x13a/0x2a1 [scsi_mod]
[23114.616932]  [<ffffffff814c5966>] ? _raw_spin_unlock_irq+0x2c/0x4a
[23114.616932]  [<ffffffff814c596c>] ? _raw_spin_unlock_irq+0x32/0x4a
[23114.616932]  [<ffffffff814c5966>] ? _raw_spin_unlock_irq+0x2c/0x4a
[23114.616932]  [<ffffffffa0023262>] scsi_request_fn+0x13a/0x2a1 [scsi_mod]
[23114.616932]  [<ffffffff8125590e>] __blk_run_queue_uncond+0x22/0x2b
[23114.616932]  [<ffffffff81255930>] __blk_run_queue+0x19/0x1b
[23114.616932]  [<ffffffff8125ab01>] blk_queue_bio+0x268/0x282
[23114.616932]  [<ffffffff81258f44>] generic_make_request+0xbd/0x160
[23114.616932]  [<ffffffff812590e7>] submit_bio+0x100/0x11d
[23114.616932]  [<ffffffff81298603>] ? __this_cpu_preempt_check+0x13/0x15
[23114.616932]  [<ffffffff812a1805>] ? __percpu_counter_add+0x8e/0xa7
[23114.616932]  [<ffffffffa03bfd47>] btrfsic_submit_bio+0x1a/0x1d [btrfs]
[23114.616932]  [<ffffffffa0377db2>] btrfs_map_bio+0x1f4/0x26d [btrfs]
[23114.616932]  [<ffffffffa0348a33>] btree_submit_bio_hook+0x74/0xbf [btrfs]
[23114.616932]  [<ffffffffa03489bf>] ? btrfs_wq_submit_bio+0x160/0x160 [btrfs]
[23114.616932]  [<ffffffffa03697a9>] submit_one_bio+0x6b/0x89 [btrfs]
[23114.616932]  [<ffffffffa036f5be>] read_extent_buffer_pages+0x170/0x1ec [btrfs]
[23114.616932]  [<ffffffffa03471fa>] ? free_root_pointers+0x64/0x64 [btrfs]
[23114.616932]  [<ffffffffa0348adf>] readahead_tree_block+0x3f/0x4c [btrfs]
[23114.616932]  [<ffffffffa032e115>] read_block_for_search.isra.20+0x1ce/0x23d [btrfs]
[23114.616932]  [<ffffffffa032fab8>] btrfs_search_slot+0x65f/0x774 [btrfs]
[23114.616932]  [<ffffffffa036eff1>] ? free_extent_buffer+0x73/0x7e [btrfs]
[23114.616932]  [<ffffffffa0331ba4>] btrfs_next_old_leaf+0xa1/0x33c [btrfs]
[23114.616932]  [<ffffffffa0331e4f>] btrfs_next_leaf+0x10/0x12 [btrfs]
[23114.616932]  [<ffffffffa0336aa6>] caching_thread+0x22d/0x416 [btrfs]
[23114.616932]  [<ffffffffa037bce9>] btrfs_scrubparity_helper+0x187/0x3b6 [btrfs]
[23114.616932]  [<ffffffffa037c036>] btrfs_cache_helper+0xe/0x10 [btrfs]
[23114.616932]  [<ffffffff8106cf96>] process_one_work+0x273/0x4e4
[23114.616932]  [<ffffffff8106d6db>] worker_thread+0x1eb/0x2ca
[23114.616932]  [<ffffffff8106d4f0>] ? rescuer_thread+0x2b6/0x2b6
[23114.616932]  [<ffffffff81072a81>] kthread+0xd5/0xdd
[23114.616932]  [<ffffffff810729ac>] ? __kthread_unpark+0x5a/0x5a
[23114.616932]  [<ffffffff814c6257>] ret_from_fork+0x27/0x40
[23114.616932] Code: 1f 44 00 00 55 48 89 e5 41 56 41 55 41 54 53 49 89 f4 48 8b 46 70 a8 04 74 09 48 8b 5f 08 48 85 db 75 03 48 8b 1f 49 89 5c 24 68 <83> 7b
64 ff 74 04 f0 ff 43 58 49 83 7c 24 08 00 74 2c 4c 8d 6b
[23114.616932] RIP  [<ffffffffa037c1bf>] btrfs_queue_work+0x2c/0x190 [btrfs]
[23114.616932]  RSP <ffff88023f443d60>
[23114.689493] ---[ end trace 6e48b6bc707ca34b ]---
[23114.690166] Kernel panic - not syncing: Fatal exception in interrupt
[23114.691283] Kernel Offset: disabled
[23114.691918] ---[ end Kernel panic - not syncing: Fatal exception in interrupt

The following diagram shows the sequence of operations that lead to the
use-after-free problem from the above trace:

        CPU 1                               CPU 2                                     CPU 3

                                       caching_thread()
 close_ctree()
   btrfs_stop_all_workers()
     btrfs_destroy_workqueue(
      fs_info->endio_meta_workers)

                                         btrfs_search_slot()
                                          read_block_for_search()
                                           readahead_tree_block()
                                            read_extent_buffer_pages()
                                             submit_one_bio()
                                              btree_submit_bio_hook()
                                               btrfs_bio_wq_end_io()
                                                --> sets the bio's
                                                    bi_end_io callback
                                                    to end_workqueue_bio()
                                               --> bio is submitted
                                                                                  bio completes
                                                                                  and its bi_end_io callback
                                                                                  is invoked
                                                                                   --> end_workqueue_bio()
                                                                                       --> attempts to queue
                                                                                           a task on fs_info->endio_meta_workers

     btrfs_destroy_workqueue(
      fs_info->caching_workers)

So fix this by destroying the queues used for metadata I/O tasks only
after destroying all the other queues.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: Liu Bo <bo.li.liu@oracle.com>
---
 fs/btrfs/disk-io.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index d0e61d58b12190..32a9ec11888d7b 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2205,11 +2205,9 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
 	btrfs_destroy_workqueue(fs_info->delalloc_workers);
 	btrfs_destroy_workqueue(fs_info->workers);
 	btrfs_destroy_workqueue(fs_info->endio_workers);
-	btrfs_destroy_workqueue(fs_info->endio_meta_workers);
 	btrfs_destroy_workqueue(fs_info->endio_raid56_workers);
 	btrfs_destroy_workqueue(fs_info->endio_repair_workers);
 	btrfs_destroy_workqueue(fs_info->rmw_workers);
-	btrfs_destroy_workqueue(fs_info->endio_meta_write_workers);
 	btrfs_destroy_workqueue(fs_info->endio_write_workers);
 	btrfs_destroy_workqueue(fs_info->endio_freespace_worker);
 	btrfs_destroy_workqueue(fs_info->submit_workers);
@@ -2219,6 +2217,13 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
 	btrfs_destroy_workqueue(fs_info->flush_workers);
 	btrfs_destroy_workqueue(fs_info->qgroup_rescan_workers);
 	btrfs_destroy_workqueue(fs_info->extent_workers);
+	/*
+	 * Now that all other work queues are destroyed, we can safely destroy
+	 * the queues used for metadata I/O, since tasks from those other work
+	 * queues can do metadata I/O operations.
+	 */
+	btrfs_destroy_workqueue(fs_info->endio_meta_workers);
+	btrfs_destroy_workqueue(fs_info->endio_meta_write_workers);
 }
 
 static void free_root_extent_buffers(struct btrfs_root *root)

From 82bfb2e7b645c8f228dc3b6d3b27b0b10125ca4f Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Tue, 14 Feb 2017 17:56:32 +0000
Subject: [PATCH 0210/1911] Btrfs: incremental send, fix unnecessary hole
 writes for sparse files

When using the NO_HOLES feature, during an incremental send we often issue
write operations for holes when we should not, because that range is already
a hole in the destination snapshot. While that does not change the contents
of the file at the receiver, it avoids preservation of file holes, leading
to wasted disk space and extra IO during send/receive.

A couple examples where the holes are not preserved follows.

 $ mkfs.btrfs -O no-holes -f /dev/sdb
 $ mount /dev/sdb /mnt
 $ xfs_io -f -c "pwrite -S 0xaa 0 4K" /mnt/foo
 $ xfs_io -f -c "pwrite -S 0xaa 0 4K" -c "pwrite -S 0xbb 1028K 4K" /mnt/bar
 $ btrfs subvolume snapshot -r /mnt /mnt/snap1

 # Now add one new extent to our first test file, increasing its size and
 # leaving a 1Mb hole between the first extent and this new extent.
 $ xfs_io -c "pwrite -S 0xbb 1028K 4K" /mnt/foo

 # Now overwrite the last extent of our second test file.
 $ xfs_io -c "pwrite -S 0xcc 1028K 4K" /mnt/bar

 $ btrfs subvolume snapshot -r /mnt /mnt/snap2

 $ xfs_io -r -c "fiemap -v" /mnt/snap2/foo
 /mnt/snap2/foo:
 EXT: FILE-OFFSET      BLOCK-RANGE      TOTAL FLAGS
   0: [0..7]:          25088..25095         8 0x2000
   1: [8..2055]:       hole              2048
   2: [2056..2063]:    24576..24583         8 0x2001

 $ xfs_io -r -c "fiemap -v" /mnt/snap2/bar
 /mnt/snap2/bar:
 EXT: FILE-OFFSET      BLOCK-RANGE      TOTAL FLAGS
   0: [0..7]:          25096..25103         8 0x2000
   1: [8..2055]:       hole              2048
   2: [2056..2063]:    24584..24591         8 0x2001

  $ btrfs send /mnt/snap1 -f /tmp/1.snap
  $ btrfs send -p /mnt/snap1 /mnt/snap2 -f /tmp/2.snap

  $ umount /mnt
  # It's not relevant to enable no-holes in the new filesystem.
  $ mkfs.btrfs -O no-holes -f /dev/sdc
  $ mount /dev/sdc /mnt
  $ btrfs receive /mnt -f /tmp/1.snap
  $ btrfs receive /mnt -f /tmp/2.snap

  $ xfs_io -r -c "fiemap -v" /mnt/snap2/foo
  /mnt/snap2/foo:
  EXT: FILE-OFFSET      BLOCK-RANGE      TOTAL FLAGS
    0: [0..7]:          24576..24583         8 0x2000
    1: [8..2063]:       25624..27679      2056   0x1

  $ xfs_io -r -c "fiemap -v" /mnt/snap2/bar
  /mnt/snap2/bar:
  EXT: FILE-OFFSET      BLOCK-RANGE      TOTAL FLAGS
    0: [0..7]:          24584..24591         8 0x2000
    1: [8..2063]:       27680..29735      2056   0x1

The holes do not exist in the second filesystem and they were replaced
with extents filled with the byte 0x00, making each file take 1032Kb of
space instead of 8Kb.

So fix this by not issuing the write operations consisting of buffers
filled with the byte 0x00 when the destination snapshot already has a
hole for the respective range.

A test case for fstests will follow soon.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
---
 fs/btrfs/send.c | 88 +++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 86 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 712922ea64d278..456c8901489b6c 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -5306,6 +5306,81 @@ static int get_last_extent(struct send_ctx *sctx, u64 offset)
 	return ret;
 }
 
+static int range_is_hole_in_parent(struct send_ctx *sctx,
+				   const u64 start,
+				   const u64 end)
+{
+	struct btrfs_path *path;
+	struct btrfs_key key;
+	struct btrfs_root *root = sctx->parent_root;
+	u64 search_start = start;
+	int ret;
+
+	path = alloc_path_for_send();
+	if (!path)
+		return -ENOMEM;
+
+	key.objectid = sctx->cur_ino;
+	key.type = BTRFS_EXTENT_DATA_KEY;
+	key.offset = search_start;
+	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+	if (ret < 0)
+		goto out;
+	if (ret > 0 && path->slots[0] > 0)
+		path->slots[0]--;
+
+	while (search_start < end) {
+		struct extent_buffer *leaf = path->nodes[0];
+		int slot = path->slots[0];
+		struct btrfs_file_extent_item *fi;
+		u64 extent_end;
+
+		if (slot >= btrfs_header_nritems(leaf)) {
+			ret = btrfs_next_leaf(root, path);
+			if (ret < 0)
+				goto out;
+			else if (ret > 0)
+				break;
+			continue;
+		}
+
+		btrfs_item_key_to_cpu(leaf, &key, slot);
+		if (key.objectid < sctx->cur_ino ||
+		    key.type < BTRFS_EXTENT_DATA_KEY)
+			goto next;
+		if (key.objectid > sctx->cur_ino ||
+		    key.type > BTRFS_EXTENT_DATA_KEY ||
+		    key.offset >= end)
+			break;
+
+		fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
+		if (btrfs_file_extent_type(leaf, fi) ==
+		    BTRFS_FILE_EXTENT_INLINE) {
+			u64 size = btrfs_file_extent_inline_len(leaf, slot, fi);
+
+			extent_end = ALIGN(key.offset + size,
+					   root->fs_info->sectorsize);
+		} else {
+			extent_end = key.offset +
+				btrfs_file_extent_num_bytes(leaf, fi);
+		}
+		if (extent_end <= start)
+			goto next;
+		if (btrfs_file_extent_disk_bytenr(leaf, fi) == 0) {
+			search_start = extent_end;
+			goto next;
+		}
+		ret = 0;
+		goto out;
+next:
+		path->slots[0]++;
+	}
+	ret = 1;
+out:
+	btrfs_free_path(path);
+	return ret;
+}
+
 static int maybe_send_hole(struct send_ctx *sctx, struct btrfs_path *path,
 			   struct btrfs_key *key)
 {
@@ -5350,8 +5425,17 @@ static int maybe_send_hole(struct send_ctx *sctx, struct btrfs_path *path,
 			return ret;
 	}
 
-	if (sctx->cur_inode_last_extent < key->offset)
-		ret = send_hole(sctx, key->offset);
+	if (sctx->cur_inode_last_extent < key->offset) {
+		ret = range_is_hole_in_parent(sctx,
+					      sctx->cur_inode_last_extent,
+					      key->offset);
+		if (ret < 0)
+			return ret;
+		else if (ret == 0)
+			ret = send_hole(sctx, key->offset);
+		else
+			ret = 0;
+	}
 	sctx->cur_inode_last_extent = extent_end;
 	return ret;
 }

From 76b42abbf7488121c4f9f1ea5941123306e25d99 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Tue, 14 Feb 2017 16:56:01 +0000
Subject: [PATCH 0211/1911] Btrfs: fix data loss after truncate when using the
 no-holes feature

If we have a file with an implicit hole (NO_HOLES feature enabled) that
has an extent following the hole, delayed writes against regions of the
file behind the hole happened before but were not yet flushed and then
we truncate the file to a smaller size that lies inside the hole, we
end up persisting a wrong disk_i_size value for our inode that leads to
data loss after umounting and mounting again the filesystem or after
the inode is evicted and loaded again.

This happens because at inode.c:btrfs_truncate_inode_items() we end up
setting last_size to the offset of the extent that we deleted and that
followed the hole. We then pass that value to btrfs_ordered_update_i_size()
which updates the inode's disk_i_size to a value smaller then the offset
of the buffered (delayed) writes.

Example reproducer:

 $ mkfs.btrfs -f /dev/sdb
 $ mount /dev/sdb /mnt

 $ xfs_io -f -c "pwrite -S 0x01 0K 32K" /mnt/foo
 $ xfs_io -d -c "pwrite -S 0x02 -b 32K 64K 32K" /mnt/foo
 $ xfs_io -c "truncate 60K" /mnt/foo
   --> inode's disk_i_size updated to 0

 $ md5sum /mnt/foo
 3c5ca3c3ab42f4b04d7e7eb0b0d4d806  /mnt/foo

 $ umount /dev/sdb
 $ mount /dev/sdb /mnt

 $ md5sum /mnt/foo
 d41d8cd98f00b204e9800998ecf8427e  /mnt/foo
   --> Empty file, all data lost!

Cc: <stable@vger.kernel.org>  # 3.14+
Fixes: 16e7549f045d ("Btrfs: incompatible format change to remove hole extents")
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: Liu Bo <bo.li.liu@oracle.com>
---
 fs/btrfs/inode.c | 19 ++++++-------------
 1 file changed, 6 insertions(+), 13 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 4efe9d82944c1b..70df4519242406 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4418,19 +4418,8 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
 		if (found_type > min_type) {
 			del_item = 1;
 		} else {
-			if (item_end < new_size) {
-				/*
-				 * With NO_HOLES mode, for the following mapping
-				 *
-				 * [0-4k][hole][8k-12k]
-				 *
-				 * if truncating isize down to 6k, it ends up
-				 * isize being 8k.
-				 */
-				if (btrfs_fs_incompat(root->fs_info, NO_HOLES))
-					last_size = new_size;
+			if (item_end < new_size)
 				break;
-			}
 			if (found_key.offset >= new_size)
 				del_item = 1;
 			else
@@ -4613,8 +4602,12 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
 			btrfs_abort_transaction(trans, ret);
 	}
 error:
-	if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
+	if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
+		ASSERT(last_size >= new_size);
+		if (!err && last_size > new_size)
+			last_size = new_size;
 		btrfs_ordered_update_i_size(inode, last_size, NULL);
+	}
 
 	btrfs_free_path(path);
 

From 263d3995c93c6020576f6c93506412a0b9d1e932 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Fri, 17 Feb 2017 18:43:57 +0000
Subject: [PATCH 0212/1911] Btrfs: try harder to migrate items to left sibling
 before splitting a leaf

Before attempting to split a leaf we try to migrate items from the leaf to
its right and left siblings. We start by trying to move items into the
rigth sibling and, if the new item is meant to be inserted at the end of
our leaf, we try to free from our leaf an amount of bytes equal to the
number of bytes used by the new item, by setting the variable space_needed
to the byte size of that new item. However if we fail to move enough items
to the right sibling due to lack of space in that sibling, we then try
to move items into the left sibling, and in that case we try to free
an amount equal to the size of the new item from our leaf, when we need
only to free an amount corresponding to the size of the new item minus
the current free space of our leaf. So make sure that before we try to
move items to the left sibling we do set the variable space_needed with
a value corresponding to the new item's size minus the leaf's current
free space.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: Liu Bo <bo.li.liu@oracle.com>
---
 fs/btrfs/ctree.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 1192bc7d2ee782..1fb60ee77b4a50 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -4159,6 +4159,9 @@ static noinline int push_for_double_split(struct btrfs_trans_handle *trans,
 
 	/* try to push all the items before our slot into the next leaf */
 	slot = path->slots[0];
+	space_needed = data_size;
+	if (slot > 0)
+		space_needed -= btrfs_leaf_free_space(fs_info, path->nodes[0]);
 	ret = push_leaf_left(trans, root, path, 1, space_needed, 0, slot);
 	if (ret < 0)
 		return ret;
@@ -4214,6 +4217,10 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
 		if (wret < 0)
 			return wret;
 		if (wret) {
+			space_needed = data_size;
+			if (slot > 0)
+				space_needed -= btrfs_leaf_free_space(fs_info,
+								      l);
 			wret = push_leaf_left(trans, root, path, space_needed,
 					      space_needed, 0, (u32)-1);
 			if (wret < 0)

From bca5609fcc401b0055a3c01ecc33c5f6fb67af7b Mon Sep 17 00:00:00 2001
From: Zhenyu Wang <zhenyuw@linux.intel.com>
Date: Fri, 24 Feb 2017 10:58:20 +0800
Subject: [PATCH 0213/1911] drm/i915/gvt: Add more edid definition support

We'll need to apply different resolution for vgpu types, so this
adds more EDID types definition.

v2: fix typo for actual 1920x1200 resolution

Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/display.c | 112 +++++++++++++++++++----------
 drivers/gpu/drm/i915/gvt/display.h |   6 ++
 2 files changed, 80 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c
index 6d8fde880c3993..d346e43656f783 100644
--- a/drivers/gpu/drm/i915/gvt/display.c
+++ b/drivers/gpu/drm/i915/gvt/display.c
@@ -83,44 +83,80 @@ static int pipe_is_enabled(struct intel_vgpu *vgpu, int pipe)
 	return 0;
 }
 
+static unsigned char virtual_dp_monitor_edid[GVT_EDID_NUM][EDID_SIZE] = {
+	{
+/* EDID with 1024x768 as its resolution */
+		/*Header*/
+		0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00,
+		/* Vendor & Product Identification */
+		0x22, 0xf0, 0x54, 0x29, 0x00, 0x00, 0x00, 0x00, 0x04, 0x17,
+		/* Version & Revision */
+		0x01, 0x04,
+		/* Basic Display Parameters & Features */
+		0xa5, 0x34, 0x20, 0x78, 0x23,
+		/* Color Characteristics */
+		0xfc, 0x81, 0xa4, 0x55, 0x4d, 0x9d, 0x25, 0x12, 0x50, 0x54,
+		/* Established Timings: maximum resolution is 1024x768 */
+		0x21, 0x08, 0x00,
+		/* Standard Timings. All invalid */
+		0x00, 0xc0, 0x00, 0xc0, 0x00, 0x40, 0x00, 0x80, 0x00, 0x00,
+		0x00, 0x40, 0x00, 0x00, 0x00, 0x01,
+		/* 18 Byte Data Blocks 1: invalid */
+		0x00, 0x00, 0x80, 0xa0, 0x70, 0xb0,
+		0x23, 0x40, 0x30, 0x20, 0x36, 0x00, 0x06, 0x44, 0x21, 0x00, 0x00, 0x1a,
+		/* 18 Byte Data Blocks 2: invalid */
+		0x00, 0x00, 0x00, 0xfd, 0x00, 0x18, 0x3c, 0x18, 0x50, 0x11, 0x00, 0x0a,
+		0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+		/* 18 Byte Data Blocks 3: invalid */
+		0x00, 0x00, 0x00, 0xfc, 0x00, 0x48,
+		0x50, 0x20, 0x5a, 0x52, 0x32, 0x34, 0x34, 0x30, 0x77, 0x0a, 0x20, 0x20,
+		/* 18 Byte Data Blocks 4: invalid */
+		0x00, 0x00, 0x00, 0xff, 0x00, 0x43, 0x4e, 0x34, 0x33, 0x30, 0x34, 0x30,
+		0x44, 0x58, 0x51, 0x0a, 0x20, 0x20,
+		/* Extension Block Count */
+		0x00,
+		/* Checksum */
+		0xef,
+	},
+	{
 /* EDID with 1920x1200 as its resolution */
-static unsigned char virtual_dp_monitor_edid[] = {
-	/*Header*/
-	0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00,
-	/* Vendor & Product Identification */
-	0x22, 0xf0, 0x54, 0x29, 0x00, 0x00, 0x00, 0x00, 0x04, 0x17,
-	/* Version & Revision */
-	0x01, 0x04,
-	/* Basic Display Parameters & Features */
-	0xa5, 0x34, 0x20, 0x78, 0x23,
-	/* Color Characteristics */
-	0xfc, 0x81, 0xa4, 0x55, 0x4d, 0x9d, 0x25, 0x12, 0x50, 0x54,
-	/* Established Timings: maximum resolution is 1024x768 */
-	0x21, 0x08, 0x00,
-	/*
-	 * Standard Timings.
-	 * below new resolutions can be supported:
-	 * 1920x1080, 1280x720, 1280x960, 1280x1024,
-	 * 1440x900, 1600x1200, 1680x1050
-	 */
-	0xd1, 0xc0, 0x81, 0xc0, 0x81, 0x40, 0x81, 0x80, 0x95, 0x00,
-	0xa9, 0x40, 0xb3, 0x00, 0x01, 0x01,
-	/* 18 Byte Data Blocks 1: max resolution is 1920x1200 */
-	0x28, 0x3c, 0x80, 0xa0, 0x70, 0xb0,
-	0x23, 0x40, 0x30, 0x20, 0x36, 0x00, 0x06, 0x44, 0x21, 0x00, 0x00, 0x1a,
-	/* 18 Byte Data Blocks 2: invalid */
-	0x00, 0x00, 0x00, 0xfd, 0x00, 0x18, 0x3c, 0x18, 0x50, 0x11, 0x00, 0x0a,
-	0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
-	/* 18 Byte Data Blocks 3: invalid */
-	0x00, 0x00, 0x00, 0xfc, 0x00, 0x48,
-	0x50, 0x20, 0x5a, 0x52, 0x32, 0x34, 0x34, 0x30, 0x77, 0x0a, 0x20, 0x20,
-	/* 18 Byte Data Blocks 4: invalid */
-	0x00, 0x00, 0x00, 0xff, 0x00, 0x43, 0x4e, 0x34, 0x33, 0x30, 0x34, 0x30,
-	0x44, 0x58, 0x51, 0x0a, 0x20, 0x20,
-	/* Extension Block Count */
-	0x00,
-	/* Checksum */
-	0x45,
+		/*Header*/
+		0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00,
+		/* Vendor & Product Identification */
+		0x22, 0xf0, 0x54, 0x29, 0x00, 0x00, 0x00, 0x00, 0x04, 0x17,
+		/* Version & Revision */
+		0x01, 0x04,
+		/* Basic Display Parameters & Features */
+		0xa5, 0x34, 0x20, 0x78, 0x23,
+		/* Color Characteristics */
+		0xfc, 0x81, 0xa4, 0x55, 0x4d, 0x9d, 0x25, 0x12, 0x50, 0x54,
+		/* Established Timings: maximum resolution is 1024x768 */
+		0x21, 0x08, 0x00,
+		/*
+		 * Standard Timings.
+		 * below new resolutions can be supported:
+		 * 1920x1080, 1280x720, 1280x960, 1280x1024,
+		 * 1440x900, 1600x1200, 1680x1050
+		 */
+		0xd1, 0xc0, 0x81, 0xc0, 0x81, 0x40, 0x81, 0x80, 0x95, 0x00,
+		0xa9, 0x40, 0xb3, 0x00, 0x01, 0x01,
+		/* 18 Byte Data Blocks 1: max resolution is 1920x1200 */
+		0x28, 0x3c, 0x80, 0xa0, 0x70, 0xb0,
+		0x23, 0x40, 0x30, 0x20, 0x36, 0x00, 0x06, 0x44, 0x21, 0x00, 0x00, 0x1a,
+		/* 18 Byte Data Blocks 2: invalid */
+		0x00, 0x00, 0x00, 0xfd, 0x00, 0x18, 0x3c, 0x18, 0x50, 0x11, 0x00, 0x0a,
+		0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+		/* 18 Byte Data Blocks 3: invalid */
+		0x00, 0x00, 0x00, 0xfc, 0x00, 0x48,
+		0x50, 0x20, 0x5a, 0x52, 0x32, 0x34, 0x34, 0x30, 0x77, 0x0a, 0x20, 0x20,
+		/* 18 Byte Data Blocks 4: invalid */
+		0x00, 0x00, 0x00, 0xff, 0x00, 0x43, 0x4e, 0x34, 0x33, 0x30, 0x34, 0x30,
+		0x44, 0x58, 0x51, 0x0a, 0x20, 0x20,
+		/* Extension Block Count */
+		0x00,
+		/* Checksum */
+		0x45,
+	},
 };
 
 #define DPCD_HEADER_SIZE        0xb
@@ -189,7 +225,7 @@ static int setup_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num,
 		return -ENOMEM;
 	}
 
-	memcpy(port->edid->edid_block, virtual_dp_monitor_edid,
+	memcpy(port->edid->edid_block, virtual_dp_monitor_edid[GVT_EDID_1920_1200],
 			EDID_SIZE);
 	port->edid->data_valid = true;
 
diff --git a/drivers/gpu/drm/i915/gvt/display.h b/drivers/gpu/drm/i915/gvt/display.h
index 8b234ea961f67b..84f160bb7bc2ab 100644
--- a/drivers/gpu/drm/i915/gvt/display.h
+++ b/drivers/gpu/drm/i915/gvt/display.h
@@ -154,6 +154,12 @@ struct intel_vgpu_port {
 	int type;
 };
 
+enum intel_vgpu_edid {
+	GVT_EDID_1024_768,
+	GVT_EDID_1920_1200,
+	GVT_EDID_NUM,
+};
+
 void intel_gvt_emulate_vblank(struct intel_gvt *gvt);
 void intel_gvt_check_vblank_emulation(struct intel_gvt *gvt);
 

From d1a513be1f0a25f094e1577d059b9aebaa279bb2 Mon Sep 17 00:00:00 2001
From: Zhenyu Wang <zhenyuw@linux.intel.com>
Date: Fri, 24 Feb 2017 10:58:21 +0800
Subject: [PATCH 0214/1911] drm/i915/gvt: add resolution definition for vGPU
 type

This assigns resolution definition for each vGPU type. For smaller
resource type we should limit max resolution, so e.g limit to 1024x768
for 64M type, others are still default to 1920x1200.

v2: Fix for actual 1920x1200 resolution

Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/display.c | 15 ++++++++++-----
 drivers/gpu/drm/i915/gvt/display.h | 14 +++++++++++++-
 drivers/gpu/drm/i915/gvt/gvt.h     |  2 ++
 drivers/gpu/drm/i915/gvt/kvmgt.c   |  8 ++++----
 drivers/gpu/drm/i915/gvt/vgpu.c    | 18 +++++++++++-------
 5 files changed, 40 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c
index d346e43656f783..43e02e0383754a 100644
--- a/drivers/gpu/drm/i915/gvt/display.c
+++ b/drivers/gpu/drm/i915/gvt/display.c
@@ -211,10 +211,13 @@ static void clean_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num)
 }
 
 static int setup_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num,
-		int type)
+				    int type, unsigned int resolution)
 {
 	struct intel_vgpu_port *port = intel_vgpu_port(vgpu, port_num);
 
+	if (WARN_ON(resolution >= GVT_EDID_NUM))
+		return -EINVAL;
+
 	port->edid = kzalloc(sizeof(*(port->edid)), GFP_KERNEL);
 	if (!port->edid)
 		return -ENOMEM;
@@ -225,7 +228,7 @@ static int setup_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num,
 		return -ENOMEM;
 	}
 
-	memcpy(port->edid->edid_block, virtual_dp_monitor_edid[GVT_EDID_1920_1200],
+	memcpy(port->edid->edid_block, virtual_dp_monitor_edid[resolution],
 			EDID_SIZE);
 	port->edid->data_valid = true;
 
@@ -358,16 +361,18 @@ void intel_vgpu_clean_display(struct intel_vgpu *vgpu)
  * Zero on success, negative error code if failed.
  *
  */
-int intel_vgpu_init_display(struct intel_vgpu *vgpu)
+int intel_vgpu_init_display(struct intel_vgpu *vgpu, u64 resolution)
 {
 	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
 
 	intel_vgpu_init_i2c_edid(vgpu);
 
 	if (IS_SKYLAKE(dev_priv))
-		return setup_virtual_dp_monitor(vgpu, PORT_D, GVT_DP_D);
+		return setup_virtual_dp_monitor(vgpu, PORT_D, GVT_DP_D,
+						resolution);
 	else
-		return setup_virtual_dp_monitor(vgpu, PORT_B, GVT_DP_B);
+		return setup_virtual_dp_monitor(vgpu, PORT_B, GVT_DP_B,
+						resolution);
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/gvt/display.h b/drivers/gpu/drm/i915/gvt/display.h
index 84f160bb7bc2ab..d73de22102e2b7 100644
--- a/drivers/gpu/drm/i915/gvt/display.h
+++ b/drivers/gpu/drm/i915/gvt/display.h
@@ -160,10 +160,22 @@ enum intel_vgpu_edid {
 	GVT_EDID_NUM,
 };
 
+static inline char *vgpu_edid_str(enum intel_vgpu_edid id)
+{
+	switch (id) {
+	case GVT_EDID_1024_768:
+		return "1024x768";
+	case GVT_EDID_1920_1200:
+		return "1920x1200";
+	default:
+		return "";
+	}
+}
+
 void intel_gvt_emulate_vblank(struct intel_gvt *gvt);
 void intel_gvt_check_vblank_emulation(struct intel_gvt *gvt);
 
-int intel_vgpu_init_display(struct intel_vgpu *vgpu);
+int intel_vgpu_init_display(struct intel_vgpu *vgpu, u64 resolution);
 void intel_vgpu_reset_display(struct intel_vgpu *vgpu);
 void intel_vgpu_clean_display(struct intel_vgpu *vgpu);
 
diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
index faff044c643425..23791920ced1ee 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.h
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@@ -216,6 +216,7 @@ struct intel_vgpu_type {
 	unsigned int low_gm_size;
 	unsigned int high_gm_size;
 	unsigned int fence;
+	enum intel_vgpu_edid resolution;
 };
 
 struct intel_gvt {
@@ -318,6 +319,7 @@ struct intel_vgpu_creation_params {
 	__u64 low_gm_sz;  /* in MB */
 	__u64 high_gm_sz; /* in MB */
 	__u64 fence_sz;
+	__u64 resolution;
 	__s32 primary;
 	__u64 vgpu_id;
 };
diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index 10c3a4b95a9229..182914c22ac553 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -295,10 +295,10 @@ static ssize_t description_show(struct kobject *kobj, struct device *dev,
 		return 0;
 
 	return sprintf(buf, "low_gm_size: %dMB\nhigh_gm_size: %dMB\n"
-				"fence: %d\n",
-				BYTES_TO_MB(type->low_gm_size),
-				BYTES_TO_MB(type->high_gm_size),
-				type->fence);
+		       "fence: %d\nresolution: %s\n",
+		       BYTES_TO_MB(type->low_gm_size),
+		       BYTES_TO_MB(type->high_gm_size),
+		       type->fence, vgpu_edid_str(type->resolution));
 }
 
 static MDEV_TYPE_ATTR_RO(available_instance);
diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c
index c27c13c3cc4836..41cfa5ccae84ce 100644
--- a/drivers/gpu/drm/i915/gvt/vgpu.c
+++ b/drivers/gpu/drm/i915/gvt/vgpu.c
@@ -68,13 +68,14 @@ static struct {
 	unsigned int low_mm;
 	unsigned int high_mm;
 	unsigned int fence;
+	enum intel_vgpu_edid edid;
 	char *name;
 } vgpu_types[] = {
 /* Fixed vGPU type table */
-	{ MB_TO_BYTES(64), MB_TO_BYTES(512), 4, "8" },
-	{ MB_TO_BYTES(128), MB_TO_BYTES(512), 4, "4" },
-	{ MB_TO_BYTES(256), MB_TO_BYTES(1024), 4, "2" },
-	{ MB_TO_BYTES(512), MB_TO_BYTES(2048), 4, "1" },
+	{ MB_TO_BYTES(64), MB_TO_BYTES(512), 4, GVT_EDID_1024_768, "8" },
+	{ MB_TO_BYTES(128), MB_TO_BYTES(512), 4, GVT_EDID_1920_1200, "4" },
+	{ MB_TO_BYTES(256), MB_TO_BYTES(1024), 4, GVT_EDID_1920_1200, "2" },
+	{ MB_TO_BYTES(512), MB_TO_BYTES(2048), 4, GVT_EDID_1920_1200, "1" },
 };
 
 /**
@@ -119,6 +120,7 @@ int intel_gvt_init_vgpu_types(struct intel_gvt *gvt)
 		gvt->types[i].low_gm_size = vgpu_types[i].low_mm;
 		gvt->types[i].high_gm_size = vgpu_types[i].high_mm;
 		gvt->types[i].fence = vgpu_types[i].fence;
+		gvt->types[i].resolution = vgpu_types[i].edid;
 		gvt->types[i].avail_instance = min(low_avail / vgpu_types[i].low_mm,
 						   high_avail / vgpu_types[i].high_mm);
 
@@ -129,11 +131,12 @@ int intel_gvt_init_vgpu_types(struct intel_gvt *gvt)
 			sprintf(gvt->types[i].name, "GVTg_V5_%s",
 						vgpu_types[i].name);
 
-		gvt_dbg_core("type[%d]: %s avail %u low %u high %u fence %u\n",
+		gvt_dbg_core("type[%d]: %s avail %u low %u high %u fence %u res %s\n",
 			     i, gvt->types[i].name,
 			     gvt->types[i].avail_instance,
 			     gvt->types[i].low_gm_size,
-			     gvt->types[i].high_gm_size, gvt->types[i].fence);
+			     gvt->types[i].high_gm_size, gvt->types[i].fence,
+			     vgpu_edid_str(gvt->types[i].resolution));
 	}
 
 	gvt->num_types = i;
@@ -258,7 +261,7 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt,
 	if (ret)
 		goto out_detach_hypervisor_vgpu;
 
-	ret = intel_vgpu_init_display(vgpu);
+	ret = intel_vgpu_init_display(vgpu, param->resolution);
 	if (ret)
 		goto out_clean_gtt;
 
@@ -322,6 +325,7 @@ struct intel_vgpu *intel_gvt_create_vgpu(struct intel_gvt *gvt,
 	param.low_gm_sz = type->low_gm_size;
 	param.high_gm_sz = type->high_gm_size;
 	param.fence_sz = type->fence;
+	param.resolution = type->resolution;
 
 	/* XXX current param based on MB */
 	param.low_gm_sz = BYTES_TO_MB(param.low_gm_sz);

From 8bcd7c188bc479bea866d286a7dc0af9734c3c64 Mon Sep 17 00:00:00 2001
From: Weinan Li <weinan.z.li@intel.com>
Date: Fri, 24 Feb 2017 17:07:38 +0800
Subject: [PATCH 0215/1911] drm/i915/gvt: fix pcode mailbox write emulation of
 BDW

Add pcode mailbox write emulation in gvt for BDW, reuse emulation code of
Skylake.

V2: refine comments, remove duplication defination of 0x138124, add
IS_SKYLAKE() check for Skylake only pcode commands.

Signed-off-by: Weinan Li <weinan.z.li@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/handlers.c | 33 ++++++++++++++++-------------
 1 file changed, 18 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index f89b183488e98e..ebbe74072dc3bf 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -1304,21 +1304,24 @@ static int mailbox_write(struct intel_vgpu *vgpu, unsigned int offset,
 	u32 *data0 = &vgpu_vreg(vgpu, GEN6_PCODE_DATA);
 
 	switch (cmd) {
-	case 0x6:
-		/**
-		 * "Read memory latency" command on gen9.
-		 * Below memory latency values are read
-		 * from skylake platform.
-		 */
-		if (!*data0)
-			*data0 = 0x1e1a1100;
-		else
-			*data0 = 0x61514b3d;
+	case GEN9_PCODE_READ_MEM_LATENCY:
+		if (IS_SKYLAKE(vgpu->gvt->dev_priv)) {
+			/**
+			 * "Read memory latency" command on gen9.
+			 * Below memory latency values are read
+			 * from skylake platform.
+			 */
+			if (!*data0)
+				*data0 = 0x1e1a1100;
+			else
+				*data0 = 0x61514b3d;
+		}
 		break;
 	case SKL_PCODE_CDCLK_CONTROL:
-		*data0 = SKL_CDCLK_READY_FOR_CHANGE;
+		if (IS_SKYLAKE(vgpu->gvt->dev_priv))
+			*data0 = SKL_CDCLK_READY_FOR_CHANGE;
 		break;
-	case 0x5:
+	case GEN6_PCODE_READ_RC6VIDS:
 		*data0 |= 0x1;
 		break;
 	}
@@ -2202,7 +2205,7 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
 
 	MMIO_F(0x4f000, 0x90, 0, 0, 0, D_ALL, NULL, NULL);
 
-	MMIO_D(GEN6_PCODE_MAILBOX, D_PRE_SKL);
+	MMIO_D(GEN6_PCODE_MAILBOX, D_PRE_BDW);
 	MMIO_D(GEN6_PCODE_DATA, D_ALL);
 	MMIO_D(0x13812c, D_ALL);
 	MMIO_DH(GEN7_ERR_INT, D_ALL, NULL, NULL);
@@ -2281,7 +2284,6 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
 	MMIO_D(0x1a054, D_ALL);
 
 	MMIO_D(0x44070, D_ALL);
-
 	MMIO_D(0x215c, D_HSW_PLUS);
 	MMIO_DFH(0x2178, D_ALL, F_CMD_ACCESS, NULL, NULL);
 	MMIO_DFH(0x217c, D_ALL, F_CMD_ACCESS, NULL, NULL);
@@ -2453,6 +2455,8 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt)
 	MMIO_D(GEN7_MISCCPCTL, D_BDW_PLUS);
 	MMIO_D(0x1c054, D_BDW_PLUS);
 
+	MMIO_DH(GEN6_PCODE_MAILBOX, D_BDW_PLUS, NULL, mailbox_write);
+
 	MMIO_D(GEN8_PRIVATE_PAT_LO, D_BDW_PLUS);
 	MMIO_D(GEN8_PRIVATE_PAT_HI, D_BDW_PLUS);
 
@@ -2544,7 +2548,6 @@ static int init_skl_mmio_info(struct intel_gvt *gvt)
 	MMIO_D(HSW_PWR_WELL_BIOS, D_SKL);
 	MMIO_DH(HSW_PWR_WELL_DRIVER, D_SKL, NULL, skl_power_well_ctl_write);
 
-	MMIO_DH(GEN6_PCODE_MAILBOX, D_SKL, NULL, mailbox_write);
 	MMIO_D(0xa210, D_SKL_PLUS);
 	MMIO_D(GEN9_MEDIA_PG_IDLE_HYSTERESIS, D_SKL_PLUS);
 	MMIO_D(GEN9_RENDER_PG_IDLE_HYSTERESIS, D_SKL_PLUS);

From 41bfab35b3d0e0a85ed0c12d7bafd7484e96ca78 Mon Sep 17 00:00:00 2001
From: Pei Zhang <pei.zhang@intel.com>
Date: Fri, 24 Feb 2017 16:03:28 +0800
Subject: [PATCH 0216/1911] drm/i915/gvt: add some new MMIOs to cmd_access
 white list

Guest is now acces some MMIOs (0x215c, RING_INSTPM) through command which
is not originally in gvt's white list. This cause huge error log printed in
gvt.
This patch addes these MMIOs to the white list.

V2. change the commit message content.
V3. remove duplicate defination of 0x20c0.
V4. refine code style.

Signed-off-by: Pei Zhang <pei.zhang@intel.com>
Signed-off-by: Weinan Li <weinan.z.li@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/handlers.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index ebbe74072dc3bf..57b4d538f37022 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -1589,7 +1589,8 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
 #undef RING_REG
 
 	MMIO_RING_DFH(RING_MI_MODE, D_ALL, F_MODE_MASK, NULL, NULL);
-	MMIO_RING_DFH(RING_INSTPM, D_ALL, F_MODE_MASK, NULL, NULL);
+	MMIO_RING_DFH(RING_INSTPM, D_ALL, F_MODE_MASK | F_CMD_ACCESS,
+			NULL, NULL);
 	MMIO_RING_DFH(RING_TIMESTAMP, D_ALL, F_CMD_ACCESS,
 			ring_timestamp_mmio_read, NULL);
 	MMIO_RING_DFH(RING_TIMESTAMP_UDW, D_ALL, F_CMD_ACCESS,
@@ -2284,7 +2285,7 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
 	MMIO_D(0x1a054, D_ALL);
 
 	MMIO_D(0x44070, D_ALL);
-	MMIO_D(0x215c, D_HSW_PLUS);
+	MMIO_DFH(0x215c, D_HSW_PLUS, F_CMD_ACCESS, NULL, NULL);
 	MMIO_DFH(0x2178, D_ALL, F_CMD_ACCESS, NULL, NULL);
 	MMIO_DFH(0x217c, D_ALL, F_CMD_ACCESS, NULL, NULL);
 	MMIO_DFH(0x12178, D_ALL, F_CMD_ACCESS, NULL, NULL);

From c0464062bfea9cd2ef6643d93429eafe8f6c2a4a Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Sat, 25 Feb 2017 02:08:12 -0800
Subject: [PATCH 0217/1911] PCI: dwc: Fix crashes seen due to missing
 assignments

Fix the following crash, seen in dwc/pci-imx6.

  Unable to handle kernel NULL pointer dereference at virtual address 00000070
  pgd = c0004000
  [00000070] *pgd=00000000
  Internal error: Oops: 805 [#1] SMP ARM
  Modules linked in:
  CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.10.0-09686-g9e31489 #1
  Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree)
  task: cb850000 task.stack: cb84e000
  PC is at imx6_pcie_probe+0x2f4/0x414
  ...

While at it, fix the same problem in various drivers instead of waiting for
individual crash reports.

The change in the imx6 driver was tested with qemu. The changes in other
drivers are based on code inspection and have been compile tested only.

Fixes: 442ec4c04d12 ("PCI: dwc: all: Split struct pcie_port into host-only and core structures")
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Vivek Gautam <vivek.gautam@codeaurora.org>  # designware-plat
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Kishon Vijay Abraham I <kishon@ti.com>
---
 drivers/pci/dwc/pci-exynos.c           | 1 +
 drivers/pci/dwc/pci-imx6.c             | 1 +
 drivers/pci/dwc/pci-keystone.c         | 2 ++
 drivers/pci/dwc/pci-layerscape.c       | 2 ++
 drivers/pci/dwc/pcie-armada8k.c        | 2 ++
 drivers/pci/dwc/pcie-artpec6.c         | 2 ++
 drivers/pci/dwc/pcie-designware-plat.c | 2 ++
 drivers/pci/dwc/pcie-hisi.c            | 2 ++
 drivers/pci/dwc/pcie-qcom.c            | 2 ++
 drivers/pci/dwc/pcie-spear13xx.c       | 2 ++
 10 files changed, 18 insertions(+)

diff --git a/drivers/pci/dwc/pci-exynos.c b/drivers/pci/dwc/pci-exynos.c
index 001c91a945aa64..993b650ef2759c 100644
--- a/drivers/pci/dwc/pci-exynos.c
+++ b/drivers/pci/dwc/pci-exynos.c
@@ -668,6 +668,7 @@ static int __init exynos_pcie_probe(struct platform_device *pdev)
 	pci->dev = dev;
 	pci->ops = &dw_pcie_ops;
 
+	ep->pci = pci;
 	ep->ops = (const struct exynos_pcie_ops *)
 		of_device_get_match_data(dev);
 
diff --git a/drivers/pci/dwc/pci-imx6.c b/drivers/pci/dwc/pci-imx6.c
index 3ab6761db9e8ad..801e46cd266d79 100644
--- a/drivers/pci/dwc/pci-imx6.c
+++ b/drivers/pci/dwc/pci-imx6.c
@@ -605,6 +605,7 @@ static int __init imx6_pcie_probe(struct platform_device *pdev)
 	pci->dev = dev;
 	pci->ops = &dw_pcie_ops;
 
+	imx6_pcie->pci = pci;
 	imx6_pcie->variant =
 		(enum imx6_pcie_variants)of_device_get_match_data(dev);
 
diff --git a/drivers/pci/dwc/pci-keystone.c b/drivers/pci/dwc/pci-keystone.c
index 8dc66409182da0..fcc9723bad6e01 100644
--- a/drivers/pci/dwc/pci-keystone.c
+++ b/drivers/pci/dwc/pci-keystone.c
@@ -401,6 +401,8 @@ static int __init ks_pcie_probe(struct platform_device *pdev)
 	pci->dev = dev;
 	pci->ops = &dw_pcie_ops;
 
+	ks_pcie->pci = pci;
+
 	/* initialize SerDes Phy if present */
 	phy = devm_phy_get(dev, "pcie-phy");
 	if (PTR_ERR_OR_ZERO(phy) == -EPROBE_DEFER)
diff --git a/drivers/pci/dwc/pci-layerscape.c b/drivers/pci/dwc/pci-layerscape.c
index 175c09e3a93261..c32e392a0ae6f8 100644
--- a/drivers/pci/dwc/pci-layerscape.c
+++ b/drivers/pci/dwc/pci-layerscape.c
@@ -280,6 +280,8 @@ static int __init ls_pcie_probe(struct platform_device *pdev)
 	pci->dev = dev;
 	pci->ops = pcie->drvdata->dw_pcie_ops;
 
+	pcie->pci = pci;
+
 	dbi_base = platform_get_resource_byname(pdev, IORESOURCE_MEM, "regs");
 	pci->dbi_base = devm_ioremap_resource(dev, dbi_base);
 	if (IS_ERR(pci->dbi_base))
diff --git a/drivers/pci/dwc/pcie-armada8k.c b/drivers/pci/dwc/pcie-armada8k.c
index 66bac6fbfa9f1c..f110e3b24a26dc 100644
--- a/drivers/pci/dwc/pcie-armada8k.c
+++ b/drivers/pci/dwc/pcie-armada8k.c
@@ -220,6 +220,8 @@ static int armada8k_pcie_probe(struct platform_device *pdev)
 	pci->dev = dev;
 	pci->ops = &dw_pcie_ops;
 
+	pcie->pci = pci;
+
 	pcie->clk = devm_clk_get(dev, NULL);
 	if (IS_ERR(pcie->clk))
 		return PTR_ERR(pcie->clk);
diff --git a/drivers/pci/dwc/pcie-artpec6.c b/drivers/pci/dwc/pcie-artpec6.c
index 59ecc9e664362a..fcd3ef84588355 100644
--- a/drivers/pci/dwc/pcie-artpec6.c
+++ b/drivers/pci/dwc/pcie-artpec6.c
@@ -253,6 +253,8 @@ static int artpec6_pcie_probe(struct platform_device *pdev)
 
 	pci->dev = dev;
 
+	artpec6_pcie->pci = pci;
+
 	dbi_base = platform_get_resource_byname(pdev, IORESOURCE_MEM, "dbi");
 	pci->dbi_base = devm_ioremap_resource(dev, dbi_base);
 	if (IS_ERR(pci->dbi_base))
diff --git a/drivers/pci/dwc/pcie-designware-plat.c b/drivers/pci/dwc/pcie-designware-plat.c
index 65250f63515cf0..b6c832ba39dd69 100644
--- a/drivers/pci/dwc/pcie-designware-plat.c
+++ b/drivers/pci/dwc/pcie-designware-plat.c
@@ -104,6 +104,8 @@ static int dw_plat_pcie_probe(struct platform_device *pdev)
 
 	pci->dev = dev;
 
+	dw_plat_pcie->pci = pci;
+
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	pci->dbi_base = devm_ioremap_resource(dev, res);
 	if (IS_ERR(pci->dbi_base))
diff --git a/drivers/pci/dwc/pcie-hisi.c b/drivers/pci/dwc/pcie-hisi.c
index e3e4fedd9f68d6..fd66a3199db77d 100644
--- a/drivers/pci/dwc/pcie-hisi.c
+++ b/drivers/pci/dwc/pcie-hisi.c
@@ -284,6 +284,8 @@ static int hisi_pcie_probe(struct platform_device *pdev)
 
 	driver = dev->driver;
 
+	hisi_pcie->pci = pci;
+
 	hisi_pcie->soc_ops = of_device_get_match_data(dev);
 
 	hisi_pcie->subctrl =
diff --git a/drivers/pci/dwc/pcie-qcom.c b/drivers/pci/dwc/pcie-qcom.c
index e36abe0d9d6f03..67eb7f5926ddd2 100644
--- a/drivers/pci/dwc/pcie-qcom.c
+++ b/drivers/pci/dwc/pcie-qcom.c
@@ -686,6 +686,8 @@ static int qcom_pcie_probe(struct platform_device *pdev)
 	pci->ops = &dw_pcie_ops;
 	pp = &pci->pp;
 
+	pcie->pci = pci;
+
 	pcie->ops = (struct qcom_pcie_ops *)of_device_get_match_data(dev);
 
 	pcie->reset = devm_gpiod_get_optional(dev, "perst", GPIOD_OUT_LOW);
diff --git a/drivers/pci/dwc/pcie-spear13xx.c b/drivers/pci/dwc/pcie-spear13xx.c
index 348f9c5e0433e2..eaa4ea8e2ea4ea 100644
--- a/drivers/pci/dwc/pcie-spear13xx.c
+++ b/drivers/pci/dwc/pcie-spear13xx.c
@@ -247,6 +247,8 @@ static int spear13xx_pcie_probe(struct platform_device *pdev)
 	pci->dev = dev;
 	pci->ops = &dw_pcie_ops;
 
+	spear13xx_pcie->pci = pci;
+
 	spear13xx_pcie->phy = devm_phy_get(dev, "pcie-phy");
 	if (IS_ERR(spear13xx_pcie->phy)) {
 		ret = PTR_ERR(spear13xx_pcie->phy);

From ca1c39ef76376b67303d01f94fe98bb68bb3861a Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Tue, 21 Feb 2017 07:34:00 +0100
Subject: [PATCH 0218/1911] iio: adc: xilinx: Fix error handling

Reorder error handling labels in order to match the way resources have
been allocated.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Acked-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/adc/xilinx-xadc-core.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/adc/xilinx-xadc-core.c b/drivers/iio/adc/xilinx-xadc-core.c
index 0a6beb3d99cbc7..56cf5907a5f010 100644
--- a/drivers/iio/adc/xilinx-xadc-core.c
+++ b/drivers/iio/adc/xilinx-xadc-core.c
@@ -1208,7 +1208,7 @@ static int xadc_probe(struct platform_device *pdev)
 
 	ret = xadc->ops->setup(pdev, indio_dev, irq);
 	if (ret)
-		goto err_free_samplerate_trigger;
+		goto err_clk_disable_unprepare;
 
 	ret = request_irq(irq, xadc->ops->interrupt_handler, 0,
 			dev_name(&pdev->dev), indio_dev);
@@ -1268,6 +1268,8 @@ static int xadc_probe(struct platform_device *pdev)
 
 err_free_irq:
 	free_irq(irq, indio_dev);
+err_clk_disable_unprepare:
+	clk_disable_unprepare(xadc->clk);
 err_free_samplerate_trigger:
 	if (xadc->ops->flags & XADC_FLAGS_BUFFERED)
 		iio_trigger_free(xadc->samplerate_trigger);
@@ -1277,8 +1279,6 @@ static int xadc_probe(struct platform_device *pdev)
 err_triggered_buffer_cleanup:
 	if (xadc->ops->flags & XADC_FLAGS_BUFFERED)
 		iio_triggered_buffer_cleanup(indio_dev);
-err_clk_disable_unprepare:
-	clk_disable_unprepare(xadc->clk);
 err_device_free:
 	kfree(indio_dev->channels);
 

From 99cde44edfa06b5f910c1704cb15d36310a56545 Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Tue, 17 Jan 2017 13:35:03 +0530
Subject: [PATCH 0219/1911] parisc: eisa: Remove coding style errors

This patch removes coding style errors in the eisa driver.

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 drivers/parisc/eisa.c | 104 +++++++++++++++++++++---------------------
 1 file changed, 52 insertions(+), 52 deletions(-)

diff --git a/drivers/parisc/eisa.c b/drivers/parisc/eisa.c
index 103095bbe8c09b..59edee911f7deb 100644
--- a/drivers/parisc/eisa.c
+++ b/drivers/parisc/eisa.c
@@ -14,16 +14,16 @@
  * Wax ASIC also includes a PS/2 and RS-232 controller, but those are
  * dealt with elsewhere; this file is concerned only with the EISA portions
  * of Wax.
- * 
- * 
+ *
+ *
  * HINT:
  * -----
  * To allow an ISA card to work properly in the EISA slot you need to
- * set an edge trigger level. This may be done on the palo command line 
- * by adding the kernel parameter "eisa_irq_edge=n,n2,[...]]", with 
+ * set an edge trigger level. This may be done on the palo command line
+ * by adding the kernel parameter "eisa_irq_edge=n,n2,[...]]", with
  * n and n2 as the irq levels you want to use.
- * 
- * Example: "eisa_irq_edge=10,11" allows ISA cards to operate at 
+ *
+ * Example: "eisa_irq_edge=10,11" allows ISA cards to operate at
  * irq levels 10 and 11.
  */
 
@@ -46,9 +46,9 @@
 #include <asm/eisa_eeprom.h>
 
 #if 0
-#define EISA_DBG(msg, arg... ) printk(KERN_DEBUG "eisa: " msg , ## arg )
+#define EISA_DBG(msg, arg...) printk(KERN_DEBUG "eisa: " msg, ## arg)
 #else
-#define EISA_DBG(msg, arg... )  
+#define EISA_DBG(msg, arg...)
 #endif
 
 #define SNAKES_EEPROM_BASE_ADDR 0xF0810400
@@ -108,7 +108,7 @@ void eisa_out8(unsigned char data, unsigned short port)
 
 void eisa_out16(unsigned short data, unsigned short port)
 {
-	if (EISA_bus)	
+	if (EISA_bus)
 		gsc_writew(cpu_to_le16(data), eisa_permute(port));
 }
 
@@ -135,9 +135,9 @@ static int master_mask;
 static int slave_mask;
 
 /* the trig level can be set with the
- * eisa_irq_edge=n,n,n commandline parameter 
- * We should really read this from the EEPROM 
- * in the furure. 
+ * eisa_irq_edge=n,n,n commandline parameter
+ * We should really read this from the EEPROM
+ * in the furure.
  */
 /* irq 13,8,2,1,0 must be edge */
 static unsigned int eisa_irq_level __read_mostly; /* default to edge triggered */
@@ -170,7 +170,7 @@ static void eisa_unmask_irq(struct irq_data *d)
 	unsigned int irq = d->irq;
 	unsigned long flags;
 	EISA_DBG("enable irq %d\n", irq);
-		
+
 	spin_lock_irqsave(&eisa_irq_lock, flags);
         if (irq & 8) {
 		slave_mask &= ~(1 << (irq&7));
@@ -194,7 +194,7 @@ static irqreturn_t eisa_irq(int wax_irq, void *intr_dev)
 {
 	int irq = gsc_readb(0xfc01f000); /* EISA supports 16 irqs */
 	unsigned long flags;
-        
+
 	spin_lock_irqsave(&eisa_irq_lock, flags);
 	/* read IRR command */
 	eisa_out8(0x0a, 0x20);
@@ -202,31 +202,31 @@ static irqreturn_t eisa_irq(int wax_irq, void *intr_dev)
 
 	EISA_DBG("irq IAR %02x 8259-1 irr %02x 8259-2 irr %02x\n",
 		   irq, eisa_in8(0x20), eisa_in8(0xa0));
-   
+
 	/* read ISR command */
 	eisa_out8(0x0a, 0x20);
 	eisa_out8(0x0a, 0xa0);
 	EISA_DBG("irq 8259-1 isr %02x imr %02x 8259-2 isr %02x imr %02x\n",
 		 eisa_in8(0x20), eisa_in8(0x21), eisa_in8(0xa0), eisa_in8(0xa1));
-	
+
 	irq &= 0xf;
-	
+
 	/* mask irq and write eoi */
 	if (irq & 8) {
 		slave_mask |= (1 << (irq&7));
 		eisa_out8(slave_mask, 0xa1);
 		eisa_out8(0x60 | (irq&7),0xa0);/* 'Specific EOI' to slave */
-		eisa_out8(0x62,0x20);	/* 'Specific EOI' to master-IRQ2 */
-		
+		eisa_out8(0x62, 0x20);	/* 'Specific EOI' to master-IRQ2 */
+
 	} else {
 		master_mask |= (1 << (irq&7));
 		eisa_out8(master_mask, 0x21);
-		eisa_out8(0x60|irq,0x20);	/* 'Specific EOI' to master */
+		eisa_out8(0x60|irq, 0x20);	/* 'Specific EOI' to master */
 	}
 	spin_unlock_irqrestore(&eisa_irq_lock, flags);
 
 	generic_handle_irq(irq);
-   
+
 	spin_lock_irqsave(&eisa_irq_lock, flags);
 	/* unmask */
         if (irq & 8) {
@@ -254,44 +254,44 @@ static struct irqaction irq2_action = {
 static void init_eisa_pic(void)
 {
 	unsigned long flags;
-	
+
 	spin_lock_irqsave(&eisa_irq_lock, flags);
 
 	eisa_out8(0xff, 0x21); /* mask during init */
 	eisa_out8(0xff, 0xa1); /* mask during init */
-	
+
 	/* master pic */
-	eisa_out8(0x11,0x20); /* ICW1 */   
-	eisa_out8(0x00,0x21); /* ICW2 */   
-	eisa_out8(0x04,0x21); /* ICW3 */   
-	eisa_out8(0x01,0x21); /* ICW4 */   
-	eisa_out8(0x40,0x20); /* OCW2 */   
-	
+	eisa_out8(0x11, 0x20); /* ICW1 */
+	eisa_out8(0x00, 0x21); /* ICW2 */
+	eisa_out8(0x04, 0x21); /* ICW3 */
+	eisa_out8(0x01, 0x21); /* ICW4 */
+	eisa_out8(0x40, 0x20); /* OCW2 */
+
 	/* slave pic */
-	eisa_out8(0x11,0xa0); /* ICW1 */   
-	eisa_out8(0x08,0xa1); /* ICW2 */   
-        eisa_out8(0x02,0xa1); /* ICW3 */   
-	eisa_out8(0x01,0xa1); /* ICW4 */   
-	eisa_out8(0x40,0xa0); /* OCW2 */   
-        
+	eisa_out8(0x11, 0xa0); /* ICW1 */
+	eisa_out8(0x08, 0xa1); /* ICW2 */
+	eisa_out8(0x02, 0xa1); /* ICW3 */
+	eisa_out8(0x01, 0xa1); /* ICW4 */
+	eisa_out8(0x40, 0xa0); /* OCW2 */
+
 	udelay(100);
-	
-	slave_mask = 0xff; 
-	master_mask = 0xfb; 
+
+	slave_mask = 0xff;
+	master_mask = 0xfb;
 	eisa_out8(slave_mask, 0xa1); /* OCW1 */
 	eisa_out8(master_mask, 0x21); /* OCW1 */
-	
+
 	/* setup trig level */
 	EISA_DBG("EISA edge/level %04x\n", eisa_irq_level);
-	
+
 	eisa_out8(eisa_irq_level&0xff, 0x4d0); /* Set all irq's to edge  */
-	eisa_out8((eisa_irq_level >> 8) & 0xff, 0x4d1); 
-	
+	eisa_out8((eisa_irq_level >> 8) & 0xff, 0x4d1);
+
 	EISA_DBG("pic0 mask %02x\n", eisa_in8(0x21));
 	EISA_DBG("pic1 mask %02x\n", eisa_in8(0xa1));
 	EISA_DBG("pic0 edge/level %02x\n", eisa_in8(0x4d0));
 	EISA_DBG("pic1 edge/level %02x\n", eisa_in8(0x4d1));
-	
+
 	spin_unlock_irqrestore(&eisa_irq_lock, flags);
 }
 
@@ -305,7 +305,7 @@ static int __init eisa_probe(struct parisc_device *dev)
 
 	char *name = is_mongoose(dev) ? "Mongoose" : "Wax";
 
-	printk(KERN_INFO "%s EISA Adapter found at 0x%08lx\n", 
+	printk(KERN_INFO "%s EISA Adapter found at 0x%08lx\n",
 		name, (unsigned long)dev->hpa.start);
 
 	eisa_dev.hba.dev = dev;
@@ -336,14 +336,14 @@ static int __init eisa_probe(struct parisc_device *dev)
 		printk(KERN_ERR "EISA: request_irq failed!\n");
 		return result;
 	}
-	
+
 	/* Reserve IRQ2 */
 	setup_irq(2, &irq2_action);
 	for (i = 0; i < 16; i++) {
 		irq_set_chip_and_handler(i, &eisa_interrupt_type,
 					 handle_simple_irq);
 	}
-	
+
 	EISA_bus = 1;
 
 	if (dev->num_addrs) {
@@ -375,7 +375,7 @@ static int __init eisa_probe(struct parisc_device *dev)
 			return -1;
 		}
 	}
-	
+
 	return 0;
 }
 
@@ -404,7 +404,7 @@ void eisa_make_irq_level(int num)
 {
 	if (eisa_irq_configured& (1<<num)) {
 		printk(KERN_WARNING
-		       "IRQ %d polarity configured twice (last to level)\n", 
+		       "IRQ %d polarity configured twice (last to level)\n",
 		       num);
 	}
 	eisa_irq_level |= (1<<num); /* set the corresponding bit */
@@ -414,7 +414,7 @@ void eisa_make_irq_level(int num)
 void eisa_make_irq_edge(int num)
 {
 	if (eisa_irq_configured& (1<<num)) {
-		printk(KERN_WARNING 
+		printk(KERN_WARNING
 		       "IRQ %d polarity configured twice (last to edge)\n",
 		       num);
 	}
@@ -430,18 +430,18 @@ static int __init eisa_irq_setup(char *str)
 	EISA_DBG("IRQ setup\n");
 	while (cur != NULL) {
 		char *pe;
-		
+
 		val = (int) simple_strtoul(cur, &pe, 0);
 		if (val > 15 || val < 0) {
 			printk(KERN_ERR "eisa: EISA irq value are 0-15\n");
 			continue;
 		}
-		if (val == 2) { 
+		if (val == 2) {
 			val = 9;
 		}
 		eisa_make_irq_edge(val); /* clear the corresponding bit */
 		EISA_DBG("setting IRQ %d to edge-triggered mode\n", val);
-		
+
 		if ((cur = strchr(cur, ','))) {
 			cur++;
 		} else {

From af640d17ecee1f82a949b580cc4a3e8f29c377b9 Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Wed, 18 Jan 2017 13:41:31 +0530
Subject: [PATCH 0220/1911] parisc: eisa: Fix resource leaks in error paths

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 drivers/parisc/eisa.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/parisc/eisa.c b/drivers/parisc/eisa.c
index 59edee911f7deb..7e2f6d5a6aaf39 100644
--- a/drivers/parisc/eisa.c
+++ b/drivers/parisc/eisa.c
@@ -334,7 +334,7 @@ static int __init eisa_probe(struct parisc_device *dev)
 	result = request_irq(dev->irq, eisa_irq, IRQF_SHARED, "EISA", &eisa_dev);
 	if (result) {
 		printk(KERN_ERR "EISA: request_irq failed!\n");
-		return result;
+		goto error_release;
 	}
 
 	/* Reserve IRQ2 */
@@ -358,6 +358,11 @@ static int __init eisa_probe(struct parisc_device *dev)
 		}
 	}
 	eisa_eeprom_addr = ioremap_nocache(eisa_dev.eeprom_addr, HPEE_MAX_LENGTH);
+	if (!eisa_eeprom_addr) {
+		result = -ENOMEM;
+		printk(KERN_ERR "EISA: ioremap_nocache failed!\n");
+		goto error_free_irq;
+	}
 	result = eisa_enumerator(eisa_dev.eeprom_addr, &eisa_dev.hba.io_space,
 			&eisa_dev.hba.lmmio_space);
 	init_eisa_pic();
@@ -372,11 +377,20 @@ static int __init eisa_probe(struct parisc_device *dev)
 		eisa_dev.root.dma_mask = 0xffffffff; /* wild guess */
 		if (eisa_root_register (&eisa_dev.root)) {
 			printk(KERN_ERR "EISA: Failed to register EISA root\n");
-			return -1;
+			result = -ENOMEM;
+			goto error_iounmap;
 		}
 	}
 
 	return 0;
+
+error_iounmap:
+	iounmap(eisa_eeprom_addr);
+error_free_irq:
+	free_irq(dev->irq, &eisa_dev);
+error_release:
+	release_resource(&eisa_dev.hba.io_space);
+	return result;
 }
 
 static const struct parisc_device_id eisa_tbl[] = {

From 09b871ffd4d8ddc005a9480fb69ff1897caaeb1f Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Sun, 29 Jan 2017 18:19:33 +0100
Subject: [PATCH 0221/1911] parisc: Define access_ok() as macro

Define access_ok() as macro instead of static function. This fixes build
warnings in code where the second parameter is given as unsigned long.

Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/include/asm/uaccess.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h
index 9a2aee1b90fcca..fb4382c28259b3 100644
--- a/arch/parisc/include/asm/uaccess.h
+++ b/arch/parisc/include/asm/uaccess.h
@@ -32,11 +32,7 @@
  * that put_user is the same as __put_user, etc.
  */
 
-static inline long access_ok(int type, const void __user * addr,
-		unsigned long size)
-{
-	return 1;
-}
+#define access_ok(type, uaddr, size) (1)
 
 #define put_user __put_user
 #define get_user __get_user

From e28f701b21ee4b4eb716e4717fa2aa78c28a8523 Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Thu, 2 Feb 2017 18:52:34 +0530
Subject: [PATCH 0222/1911] parisc: ccio-dma: Handle return NULL error from
 ioremap_nocache

Fix error paths and return -ENOMEM instead of '1'.

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 drivers/parisc/ccio-dma.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c
index 553ef8a5d58868..b0558e2451b568 100644
--- a/drivers/parisc/ccio-dma.c
+++ b/drivers/parisc/ccio-dma.c
@@ -1539,7 +1539,7 @@ static int __init ccio_probe(struct parisc_device *dev)
 	ioc = kzalloc(sizeof(struct ioc), GFP_KERNEL);
 	if (ioc == NULL) {
 		printk(KERN_ERR MODULE_NAME ": memory allocation failure\n");
-		return 1;
+		return -ENOMEM;
 	}
 
 	ioc->name = dev->id.hversion == U2_IOA_RUNWAY ? "U2" : "UTurn";
@@ -1554,6 +1554,10 @@ static int __init ccio_probe(struct parisc_device *dev)
 
 	ioc->hw_path = dev->hw_path;
 	ioc->ioc_regs = ioremap_nocache(dev->hpa.start, 4096);
+	if (!ioc->ioc_regs) {
+		kfree(ioc);
+		return -ENOMEM;
+	}
 	ccio_ioc_init(ioc);
 	ccio_init_resources(ioc);
 	hppa_dma_ops = &ccio_ops;

From 8351badf349b22b47c9696f54656db65d7834e42 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 8 Feb 2017 10:20:35 +0300
Subject: [PATCH 0223/1911] parisc: fix a printk

We want to do a pr_cont() here and not a pr_warn().

Fixes: b391667eb45a ("parisc: Report trap type as human readable string")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/mm/fault.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c
index 1a0b4f63f0e90f..c3f8d34a11cf2f 100644
--- a/arch/parisc/mm/fault.c
+++ b/arch/parisc/mm/fault.c
@@ -238,8 +238,8 @@ show_signal_msg(struct pt_regs *regs, unsigned long code,
 		vma ? ',':'\n');
 
 	if (vma)
-		pr_warn(KERN_CONT " vm_start = 0x%08lx, vm_end = 0x%08lx\n",
-				vma->vm_start, vma->vm_end);
+		pr_cont(" vm_start = 0x%08lx, vm_end = 0x%08lx\n",
+			vma->vm_start, vma->vm_end);
 
 	show_regs(regs);
 }

From ef470a60e10eb12635d7b84c9502cea3028d44e8 Mon Sep 17 00:00:00 2001
From: John David Anglin <dave.anglin@bell.net>
Date: Mon, 20 Feb 2017 14:02:46 -0500
Subject: [PATCH 0224/1911] parisc: Remove flush_user_dcache_range and
 flush_user_icache_range

The functions flush_user_dcache_range() and flush_user_icache_range()
are only used by the parisc signal handling code.  This code only needs
to flush a couple of lines, so the threshold check is unnecessary
overhead.

Signed-off-by: John David Anglin <dave.anglin@bell.net>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/include/asm/cacheflush.h |  2 --
 arch/parisc/kernel/cache.c           | 18 ------------------
 arch/parisc/kernel/signal.c          | 13 +++++++------
 3 files changed, 7 insertions(+), 26 deletions(-)

diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h
index 7bd69bd43a0185..19c9c3c5f267ea 100644
--- a/arch/parisc/include/asm/cacheflush.h
+++ b/arch/parisc/include/asm/cacheflush.h
@@ -27,8 +27,6 @@ void flush_user_dcache_range_asm(unsigned long, unsigned long);
 void flush_kernel_dcache_range_asm(unsigned long, unsigned long);
 void flush_kernel_dcache_page_asm(void *);
 void flush_kernel_icache_page(void *);
-void flush_user_dcache_range(unsigned long, unsigned long);
-void flush_user_icache_range(unsigned long, unsigned long);
 
 /* Cache flush operations */
 
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index 977f0a4f5ecf2c..20b013cbd29762 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -573,24 +573,6 @@ void flush_cache_mm(struct mm_struct *mm)
 	}
 }
 
-void
-flush_user_dcache_range(unsigned long start, unsigned long end)
-{
-	if ((end - start) < parisc_cache_flush_threshold)
-		flush_user_dcache_range_asm(start,end);
-	else
-		flush_data_cache();
-}
-
-void
-flush_user_icache_range(unsigned long start, unsigned long end)
-{
-	if ((end - start) < parisc_cache_flush_threshold)
-		flush_user_icache_range_asm(start,end);
-	else
-		flush_instruction_cache();
-}
-
 void flush_cache_range(struct vm_area_struct *vma,
 		unsigned long start, unsigned long end)
 {
diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c
index e58925ac64d105..f6aaca27ac4e89 100644
--- a/arch/parisc/kernel/signal.c
+++ b/arch/parisc/kernel/signal.c
@@ -232,6 +232,7 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs,
 	struct rt_sigframe __user *frame;
 	unsigned long rp, usp;
 	unsigned long haddr, sigframe_size;
+	unsigned long start, end;
 	int err = 0;
 #ifdef CONFIG_64BIT
 	struct compat_rt_sigframe __user * compat_frame;
@@ -299,10 +300,10 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs,
 	}
 #endif
 
-	flush_user_dcache_range((unsigned long) &frame->tramp[0],
-			   (unsigned long) &frame->tramp[TRAMP_SIZE]);
-	flush_user_icache_range((unsigned long) &frame->tramp[0],
-			   (unsigned long) &frame->tramp[TRAMP_SIZE]);
+	start = (unsigned long) &frame->tramp[0];
+	end = (unsigned long) &frame->tramp[TRAMP_SIZE];
+	flush_user_dcache_range_asm(start, end);
+	flush_user_icache_range_asm(start, end);
 
 	/* TRAMP Words 0-4, Length 5 = SIGRESTARTBLOCK_TRAMP
 	 * TRAMP Words 5-9, Length 4 = SIGRETURN_TRAMP
@@ -548,8 +549,8 @@ insert_restart_trampoline(struct pt_regs *regs)
 		WARN_ON(err);
 
 		/* flush data/instruction cache for new insns */
-		flush_user_dcache_range(start, end);
-		flush_user_icache_range(start, end);
+		flush_user_dcache_range_asm(start, end);
+		flush_user_icache_range_asm(start, end);
 
 		regs->gr[31] = regs->gr[30] + 8;
 		return;

From 678a3bd1b3de6d2ebf604e7d708bc8150bb667e9 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Thu, 9 Feb 2017 22:22:13 -0500
Subject: [PATCH 0225/1911] tools/power turbostat: fix bugs in --add option

When --add was used more than once, overflowed buffers
caused some counters to be stored on top of others,
corrupting the results.  Simplify the code by simply
reserving space for up to 16 added counters per each
cpu, core, package.

Per-cpu added counters were being printed only per-core.

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 88 ++++++++++++++++-----------
 1 file changed, 52 insertions(+), 36 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index f13f61b065c699..c7fadf0faa4be9 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -154,6 +154,7 @@ char *progname;
 
 cpu_set_t *cpu_present_set, *cpu_affinity_set;
 size_t cpu_present_setsize, cpu_affinity_setsize;
+#define MAX_ADDED_COUNTERS 16
 
 struct thread_data {
 	unsigned long long tsc;
@@ -166,7 +167,7 @@ struct thread_data {
 	unsigned int flags;
 #define CPU_IS_FIRST_THREAD_IN_CORE	0x2
 #define CPU_IS_FIRST_CORE_IN_PACKAGE	0x4
-	unsigned long long counter[1];
+	unsigned long long counter[MAX_ADDED_COUNTERS];
 } *thread_even, *thread_odd;
 
 struct core_data {
@@ -175,7 +176,7 @@ struct core_data {
 	unsigned long long c7;
 	unsigned int core_temp_c;
 	unsigned int core_id;
-	unsigned long long counter[1];
+	unsigned long long counter[MAX_ADDED_COUNTERS];
 } *core_even, *core_odd;
 
 struct pkg_data {
@@ -200,7 +201,7 @@ struct pkg_data {
 	unsigned int rapl_pkg_perf_status;	/* MSR_PKG_PERF_STATUS */
 	unsigned int rapl_dram_perf_status;	/* MSR_DRAM_PERF_STATUS */
 	unsigned int pkg_temp_c;
-	unsigned long long counter[1];
+	unsigned long long counter[MAX_ADDED_COUNTERS];
 } *package_even, *package_odd;
 
 #define ODD_COUNTERS thread_odd, core_odd, package_odd
@@ -228,9 +229,9 @@ struct msr_counter {
 };
 
 struct sys_counters {
-	unsigned int thread_counter_bytes;
-	unsigned int core_counter_bytes;
-	unsigned int package_counter_bytes;
+	unsigned int added_thread_counters;
+	unsigned int added_core_counters;
+	unsigned int added_package_counters;
 	struct msr_counter *tp;
 	struct msr_counter *cp;
 	struct msr_counter *pp;
@@ -374,12 +375,6 @@ void print_header(void)
 
 	if (do_nhm_cstates)
 		outp += sprintf(outp, "\tCPU%%c1");
-	if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates)
-		outp += sprintf(outp, "\tCPU%%c3");
-	if (do_nhm_cstates)
-		outp += sprintf(outp, "\tCPU%%c6");
-	if (do_snb_cstates)
-		outp += sprintf(outp, "\tCPU%%c7");
 
 	for (mp = sys.tp; mp; mp = mp->next) {
 		if (mp->format == FORMAT_RAW) {
@@ -392,6 +387,14 @@ void print_header(void)
 		}
 	}
 
+	if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates)
+		outp += sprintf(outp, "\tCPU%%c3");
+	if (do_nhm_cstates)
+		outp += sprintf(outp, "\tCPU%%c6");
+	if (do_snb_cstates)
+		outp += sprintf(outp, "\tCPU%%c7");
+
+
 	if (do_dts)
 		outp += sprintf(outp, "\tCoreTmp");
 
@@ -635,20 +638,11 @@ int format_counters(struct thread_data *t, struct core_data *c,
 	if (do_smi)
 		outp += sprintf(outp, "\t%d", t->smi_count);
 
+	/* C1 */
 	if (do_nhm_cstates)
 		outp += sprintf(outp, "\t%.2f", 100.0 * t->c1/t->tsc);
 
-	/* print per-core data only for 1st thread in core */
-	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
-		goto done;
-
-	if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates)
-		outp += sprintf(outp, "\t%.2f", 100.0 * c->c3/t->tsc);
-	if (do_nhm_cstates)
-		outp += sprintf(outp, "\t%.2f", 100.0 * c->c6/t->tsc);
-	if (do_snb_cstates)
-		outp += sprintf(outp, "\t%.2f", 100.0 * c->c7/t->tsc);
-
+	/* Added counters */
 	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
 		if (mp->format == FORMAT_RAW) {
 			if (mp->width == 32)
@@ -656,12 +650,23 @@ int format_counters(struct thread_data *t, struct core_data *c,
 			else
 				outp += sprintf(outp, "\t0x%016llx", t->counter[i]);
 		} else if (mp->format == FORMAT_DELTA) {
-			outp += sprintf(outp, "\t%8lld", t->counter[i]);
+			outp += sprintf(outp, "\t%lld", t->counter[i]);
 		} else if (mp->format == FORMAT_PERCENT) {
 			outp += sprintf(outp, "\t%.2f", 100.0 * t->counter[i]/t->tsc);
 		}
 	}
 
+	/* print per-core data only for 1st thread in core */
+	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
+		goto done;
+
+	if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates)
+		outp += sprintf(outp, "\t%.2f", 100.0 * c->c3/t->tsc);
+	if (do_nhm_cstates)
+		outp += sprintf(outp, "\t%.2f", 100.0 * c->c6/t->tsc);
+	if (do_snb_cstates)
+		outp += sprintf(outp, "\t%.2f", 100.0 * c->c7/t->tsc);
+
 
 	if (do_dts)
 		outp += sprintf(outp, "\t%d", c->core_temp_c);
@@ -673,7 +678,7 @@ int format_counters(struct thread_data *t, struct core_data *c,
 			else
 				outp += sprintf(outp, "\t0x%016llx", c->counter[i]);
 		} else if (mp->format == FORMAT_DELTA) {
-			outp += sprintf(outp, "\t%8lld", c->counter[i]);
+			outp += sprintf(outp, "\t%lld", c->counter[i]);
 		} else if (mp->format == FORMAT_PERCENT) {
 			outp += sprintf(outp, "\t%.2f", 100.0 * c->counter[i]/t->tsc);
 		}
@@ -770,7 +775,7 @@ int format_counters(struct thread_data *t, struct core_data *c,
 			else
 				outp += sprintf(outp, "\t0x%016llx", p->counter[i]);
 		} else if (mp->format == FORMAT_DELTA) {
-			outp += sprintf(outp, "\t%8lld", p->counter[i]);
+			outp += sprintf(outp, "\t%lld", p->counter[i]);
 		} else if (mp->format == FORMAT_PERCENT) {
 			outp += sprintf(outp, "\t%.2f", 100.0 * p->counter[i]/t->tsc);
 		}
@@ -1036,7 +1041,6 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
 
 	p->gfx_rc6_ms = 0;
 	p->gfx_mhz = 0;
-
 	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next)
 		t->counter[i] = 0;
 
@@ -3662,7 +3666,7 @@ allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data
 	int i;
 
 	*t = calloc(topo.num_threads_per_core * topo.num_cores_per_pkg *
-		topo.num_packages, sizeof(struct thread_data) + sys.thread_counter_bytes);
+		topo.num_packages, sizeof(struct thread_data));
 	if (*t == NULL)
 		goto error;
 
@@ -3671,14 +3675,14 @@ allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data
 		(*t)[i].cpu_id = -1;
 
 	*c = calloc(topo.num_cores_per_pkg * topo.num_packages,
-		sizeof(struct core_data) + sys.core_counter_bytes);
+		sizeof(struct core_data));
 	if (*c == NULL)
 		goto error;
 
 	for (i = 0; i < topo.num_cores_per_pkg * topo.num_packages; i++)
 		(*c)[i].core_id = -1;
 
-	*p = calloc(topo.num_packages, sizeof(struct pkg_data) + sys.package_counter_bytes);
+	*p = calloc(topo.num_packages, sizeof(struct pkg_data));
 	if (*p == NULL)
 		goto error;
 
@@ -3901,24 +3905,36 @@ int add_counter(unsigned int msr_num, char *name, unsigned int width,
 	switch (scope) {
 
 	case SCOPE_CPU:
-		sys.thread_counter_bytes += 64;
 		msrp->next = sys.tp;
 		sys.tp = msrp;
-		sys.thread_counter_bytes += sizeof(unsigned long long);
+		sys.added_thread_counters++;
+		if (sys.added_thread_counters > MAX_ADDED_COUNTERS) {
+			fprintf(stderr, "exceeded max %d added thread counters\n",
+				MAX_ADDED_COUNTERS);
+			exit(-1);
+		}
 		break;
 
 	case SCOPE_CORE:
-		sys.core_counter_bytes += 64;
 		msrp->next = sys.cp;
 		sys.cp = msrp;
-		sys.core_counter_bytes += sizeof(unsigned long long);
+		sys.added_core_counters++;
+		if (sys.added_core_counters > MAX_ADDED_COUNTERS) {
+			fprintf(stderr, "exceeded max %d added core counters\n",
+				MAX_ADDED_COUNTERS);
+			exit(-1);
+		}
 		break;
 
 	case SCOPE_PACKAGE:
-		sys.package_counter_bytes += 64;
 		msrp->next = sys.pp;
 		sys.pp = msrp;
-		sys.package_counter_bytes += sizeof(unsigned long long);
+		sys.added_package_counters++;
+		if (sys.added_package_counters > MAX_ADDED_COUNTERS) {
+			fprintf(stderr, "exceeded max %d added package counters\n",
+				MAX_ADDED_COUNTERS);
+			exit(-1);
+		}
 		break;
 	}
 

From 812db3f77b9a3f6ed59baf7a0d5c3fd8ec8ef86a Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Fri, 10 Feb 2017 00:25:41 -0500
Subject: [PATCH 0226/1911] tools/power turbostat: Add --show and --hide
 parameters

Add the "--show" and "--hide" cmdline parameters.

By default, turbostat shows all columns.

turbostat --hide counter_list
will continue showing all columns, except for those listed.

turbostat --show counter_list
will show _only_ the listed columns

These features work for built-in counters, and have no effect
on columns added with the --add parameter.

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.8 |   4 +
 tools/power/x86/turbostat/turbostat.c | 463 +++++++++++++++++++-------
 2 files changed, 347 insertions(+), 120 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index 03cb639b292ecc..e8fb1e02d12126 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -47,6 +47,10 @@ name as necessary to disambiguate it from others is necessary.  Note that option
 		default: delta
 .fi
 .PP
+\fB--hide column\fP do not show the specified columns.  May be invoked multiple times, or with a comma-separated list of column names.
+.PP
+\fB--show column\fP show only the specified columns.  May be invoked multiple times, or with a comma-separated list of column names.
+.PP
 \fB--Dump\fP displays the raw counter values.
 .PP
 \fB--debug\fP displays additional system configuration information.  Invoking this parameter
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index c7fadf0faa4be9..fff280b50af0ce 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -52,7 +52,6 @@ unsigned int debug;
 unsigned int rapl_joules;
 unsigned int summary_only;
 unsigned int dump_only;
-unsigned int do_nhm_cstates;
 unsigned int do_snb_cstates;
 unsigned int do_knl_cstates;
 unsigned int do_pc2;
@@ -72,24 +71,17 @@ unsigned int genuine_intel;
 unsigned int has_invariant_tsc;
 unsigned int do_nhm_platform_info;
 unsigned int aperf_mperf_multiplier = 1;
-int do_irq = 1;
-int do_smi;
 double bclk;
 double base_hz;
 unsigned int has_base_hz;
 double tsc_tweak = 1.0;
-unsigned int show_pkg;
-unsigned int show_core;
-unsigned int show_cpu;
 unsigned int show_pkg_only;
 unsigned int show_core_only;
 char *output_buffer, *outp;
 unsigned int do_rapl;
 unsigned int do_dts;
 unsigned int do_ptm;
-unsigned int do_gfx_rc6_ms;
 unsigned long long  gfx_cur_rc6_ms;
-unsigned int do_gfx_mhz;
 unsigned int gfx_cur_mhz;
 unsigned int tcc_activation_temp;
 unsigned int tcc_activation_temp_override;
@@ -226,6 +218,9 @@ struct msr_counter {
 	enum counter_type type;
 	enum counter_format format;
 	struct msr_counter *next;
+	unsigned int flags;
+#define	FLAGS_HIDE	(1 << 0)
+#define	FLAGS_SHOW	(1 << 1)
 };
 
 struct sys_counters {
@@ -341,39 +336,153 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr)
 }
 
 /*
- * Example Format w/ field column widths:
- *
- *  Package    Core     CPU Avg_MHz Bzy_MHz TSC_MHz     IRQ   SMI   Busy% CPU_%c1 CPU_%c3 CPU_%c6 CPU_%c7 ThreadC CoreTmp  CoreCnt PkgTmp  GFXMHz Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt PkgCnt
- * 12345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678
+ * Each string in this array is compared in --show and --hide cmdline.
+ * Thus, strings that are proper sub-sets must follow their more specific peers.
  */
+struct msr_counter bic[] = {
+	{ 0x0, "Package" },
+	{ 0x0, "Avg_MHz" },
+	{ 0x0, "Bzy_MHz" },
+	{ 0x0, "TSC_MHz" },
+	{ 0x0, "IRQ" },
+	{ 0x0, "SMI", 32, 0, FORMAT_DELTA, NULL},
+	{ 0x0, "Busy%" },
+	{ 0x0, "CPU%c1" },
+	{ 0x0, "CPU%c3" },
+	{ 0x0, "CPU%c6" },
+	{ 0x0, "CPU%c7" },
+	{ 0x0, "ThreadC" },
+	{ 0x0, "CoreTmp" },
+	{ 0x0, "CoreCnt" },
+	{ 0x0, "PkgTmp" },
+	{ 0x0, "GFX%rc6" },
+	{ 0x0, "GFXMHz" },
+	{ 0x0, "Pkg%pc2" },
+	{ 0x0, "Pkg%pc3" },
+	{ 0x0, "Pkg%pc6" },
+	{ 0x0, "Pkg%pc7" },
+	{ 0x0, "PkgWatt" },
+	{ 0x0, "CorWatt" },
+	{ 0x0, "GFXWatt" },
+	{ 0x0, "PkgCnt" },
+	{ 0x0, "RAMWatt" },
+	{ 0x0, "PKG_%" },
+	{ 0x0, "RAM_%" },
+	{ 0x0, "Pkg_J" },
+	{ 0x0, "Cor_J" },
+	{ 0x0, "GFX_J" },
+	{ 0x0, "RAM_J" },
+	{ 0x0, "Core" },
+	{ 0x0, "CPU" },
+};
+
+#define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
+#define	BIC_Package	(1ULL << 0)
+#define	BIC_Avg_MHz	(1ULL << 1)
+#define	BIC_Bzy_MHz	(1ULL << 2)
+#define	BIC_TSC_MHz	(1ULL << 3)
+#define	BIC_IRQ		(1ULL << 4)
+#define	BIC_SMI		(1ULL << 5)
+#define	BIC_Busy	(1ULL << 6)
+#define	BIC_CPU_c1	(1ULL << 7)
+#define	BIC_CPU_c3	(1ULL << 8)
+#define	BIC_CPU_c6	(1ULL << 9)
+#define	BIC_CPU_c7	(1ULL << 10)
+#define	BIC_ThreadC	(1ULL << 11)
+#define	BIC_CoreTmp	(1ULL << 12)
+#define	BIC_CoreCnt	(1ULL << 13)
+#define	BIC_PkgTmp	(1ULL << 14)
+#define	BIC_GFX_rc6	(1ULL << 15)
+#define	BIC_GFXMHz	(1ULL << 16)
+#define	BIC_Pkgpc2	(1ULL << 17)
+#define	BIC_Pkgpc3	(1ULL << 18)
+#define	BIC_Pkgpc6	(1ULL << 19)
+#define	BIC_Pkgpc7	(1ULL << 20)
+#define	BIC_PkgWatt	(1ULL << 21)
+#define	BIC_CorWatt	(1ULL << 22)
+#define	BIC_GFXWatt	(1ULL << 23)
+#define	BIC_PkgCnt	(1ULL << 24)
+#define	BIC_RAMWatt	(1ULL << 27)
+#define	BIC_PKG__	(1ULL << 28)
+#define	BIC_RAM__	(1ULL << 29)
+#define	BIC_Pkg_J	(1ULL << 30)
+#define	BIC_Cor_J	(1ULL << 31)
+#define	BIC_GFX_J	(1ULL << 30)
+#define	BIC_RAM_J	(1ULL << 31)
+#define	BIC_Core	(1ULL << 32)
+#define	BIC_CPU		(1ULL << 33)
+
+unsigned long long bic_enabled = 0xFFFFFFFFFFFFFFFFULL;
+unsigned long long bic_present;
+
+#define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME)
+#define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT)
+
+/*
+ * bic_lookup
+ * for all the strings in comma separate name_list,
+ * set the approprate bit in return value.
+ */
+unsigned long long bic_lookup(char *name_list)
+{
+	int i;
+	unsigned long long retval = 0;
+
+	while (name_list) {
+		char *comma;
+
+		comma = strchr(name_list, ',');
+
+		if (comma)
+			*comma = '\0';
+
+		for (i = 0; i < MAX_BIC; ++i) {
+			if (!strcmp(name_list, bic[i].name)) {
+				retval |= (1ULL << i);
+				break;
+			}
+		}
+		if (i == MAX_BIC) {
+			fprintf(stderr, "Invalid counter name: %s\n", name_list);
+			exit(-1);
+		}
+
+		name_list = comma;
+		if (name_list)
+			name_list++;
+
+	}
+	return retval;
+}
 
 void print_header(void)
 {
 	struct msr_counter *mp;
 
-	if (show_pkg)
+	if (DO_BIC(BIC_Package))
 		outp += sprintf(outp, "\tPackage");
-	if (show_core)
+	if (DO_BIC(BIC_Core))
 		outp += sprintf(outp, "\tCore");
-	if (show_cpu)
+	if (DO_BIC(BIC_CPU))
 		outp += sprintf(outp, "\tCPU");
-	if (has_aperf)
+	if (DO_BIC(BIC_Avg_MHz))
 		outp += sprintf(outp, "\tAvg_MHz");
-	if (has_aperf)
+	if (DO_BIC(BIC_Busy))
 		outp += sprintf(outp, "\tBusy%%");
-	if (has_aperf)
+	if (DO_BIC(BIC_Bzy_MHz))
 		outp += sprintf(outp, "\tBzy_MHz");
-	outp += sprintf(outp, "\tTSC_MHz");
+	if (DO_BIC(BIC_TSC_MHz))
+		outp += sprintf(outp, "\tTSC_MHz");
 
 	if (!debug)
 		goto done;
 
-	if (do_irq)
+	if (DO_BIC(BIC_IRQ))
 		outp += sprintf(outp, "\tIRQ");
-	if (do_smi)
+	if (DO_BIC(BIC_SMI))
 		outp += sprintf(outp, "\tSMI");
 
-	if (do_nhm_cstates)
+	if (DO_BIC(BIC_CPU_c1))
 		outp += sprintf(outp, "\tCPU%%c1");
 
 	for (mp = sys.tp; mp; mp = mp->next) {
@@ -387,15 +496,15 @@ void print_header(void)
 		}
 	}
 
-	if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates)
+	if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates)
 		outp += sprintf(outp, "\tCPU%%c3");
-	if (do_nhm_cstates)
+	if (DO_BIC(BIC_CPU_c6))
 		outp += sprintf(outp, "\tCPU%%c6");
-	if (do_snb_cstates)
+	if (DO_BIC(BIC_CPU_c7))
 		outp += sprintf(outp, "\tCPU%%c7");
 
 
-	if (do_dts)
+	if (DO_BIC(BIC_CoreTmp))
 		outp += sprintf(outp, "\tCoreTmp");
 
 	for (mp = sys.cp; mp; mp = mp->next) {
@@ -409,13 +518,13 @@ void print_header(void)
 		}
 	}
 
-	if (do_ptm)
+	if (DO_BIC(BIC_PkgTmp))
 		outp += sprintf(outp, "\tPkgTmp");
 
-	if (do_gfx_rc6_ms)
+	if (DO_BIC(BIC_GFX_rc6))
 		outp += sprintf(outp, "\tGFX%%rc6");
 
-	if (do_gfx_mhz)
+	if (DO_BIC(BIC_GFXMHz))
 		outp += sprintf(outp, "\tGFXMHz");
 
 	if (do_skl_residency) {
@@ -440,30 +549,30 @@ void print_header(void)
 	}
 
 	if (do_rapl && !rapl_joules) {
-		if (do_rapl & RAPL_PKG)
+		if (DO_BIC(BIC_PkgWatt))
 			outp += sprintf(outp, "\tPkgWatt");
-		if (do_rapl & RAPL_CORES_ENERGY_STATUS)
+		if (DO_BIC(BIC_CorWatt))
 			outp += sprintf(outp, "\tCorWatt");
-		if (do_rapl & RAPL_GFX)
+		if (DO_BIC(BIC_GFXWatt))
 			outp += sprintf(outp, "\tGFXWatt");
-		if (do_rapl & RAPL_DRAM)
+		if (DO_BIC(BIC_RAMWatt))
 			outp += sprintf(outp, "\tRAMWatt");
-		if (do_rapl & RAPL_PKG_PERF_STATUS)
+		if (DO_BIC(BIC_PKG__))
 			outp += sprintf(outp, "\tPKG_%%");
-		if (do_rapl & RAPL_DRAM_PERF_STATUS)
+		if (DO_BIC(BIC_RAM__))
 			outp += sprintf(outp, "\tRAM_%%");
 	} else if (do_rapl && rapl_joules) {
-		if (do_rapl & RAPL_PKG)
+		if (DO_BIC(BIC_Pkg_J))
 			outp += sprintf(outp, "\tPkg_J");
-		if (do_rapl & RAPL_CORES_ENERGY_STATUS)
+		if (DO_BIC(BIC_Cor_J))
 			outp += sprintf(outp, "\tCor_J");
-		if (do_rapl & RAPL_GFX)
+		if (DO_BIC(BIC_GFX_J))
 			outp += sprintf(outp, "\tGFX_J");
-		if (do_rapl & RAPL_DRAM)
+		if (DO_BIC(BIC_RAM_J))
 			outp += sprintf(outp, "\tRAM_J");
-		if (do_rapl & RAPL_PKG_PERF_STATUS)
+		if (DO_BIC(BIC_PKG__))
 			outp += sprintf(outp, "\tPKG_%%");
-		if (do_rapl & RAPL_DRAM_PERF_STATUS)
+		if (DO_BIC(BIC_RAM__))
 			outp += sprintf(outp, "\tRAM_%%");
 	}
 	for (mp = sys.pp; mp; mp = mp->next) {
@@ -497,9 +606,9 @@ int dump_counters(struct thread_data *t, struct core_data *c,
 		outp += sprintf(outp, "mperf: %016llX\n", t->mperf);
 		outp += sprintf(outp, "c1: %016llX\n", t->c1);
 
-		if (do_irq)
+		if (DO_BIC(BIC_IRQ))
 			outp += sprintf(outp, "IRQ: %08X\n", t->irq_count);
-		if (do_smi)
+		if (DO_BIC(BIC_SMI))
 			outp += sprintf(outp, "SMI: %08X\n", t->smi_count);
 
 		for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
@@ -583,40 +692,37 @@ int format_counters(struct thread_data *t, struct core_data *c,
 
 	/* topo columns, print blanks on 1st (average) line */
 	if (t == &average.threads) {
-		if (show_pkg)
+		if (DO_BIC(BIC_Package))
 			outp += sprintf(outp, "\t-");
-		if (show_core)
+		if (DO_BIC(BIC_Core))
 			outp += sprintf(outp, "\t-");
-		if (show_cpu)
+		if (DO_BIC(BIC_CPU))
 			outp += sprintf(outp, "\t-");
 	} else {
-		if (show_pkg) {
+		if (DO_BIC(BIC_Package)) {
 			if (p)
 				outp += sprintf(outp, "\t%d", p->package_id);
 			else
 				outp += sprintf(outp, "\t-");
 		}
-		if (show_core) {
+		if (DO_BIC(BIC_Core)) {
 			if (c)
 				outp += sprintf(outp, "\t%d", c->core_id);
 			else
 				outp += sprintf(outp, "\t-");
 		}
-		if (show_cpu)
+		if (DO_BIC(BIC_CPU))
 			outp += sprintf(outp, "\t%d", t->cpu_id);
 	}
 
-	/* Avg_MHz */
-	if (has_aperf)
+	if (DO_BIC(BIC_Avg_MHz))
 		outp += sprintf(outp, "\t%.0f",
 			1.0 / units * t->aperf / interval_float);
 
-	/* Busy% */
-	if (has_aperf)
+	if (DO_BIC(BIC_Busy))
 		outp += sprintf(outp, "\t%.2f", 100.0 * t->mperf/t->tsc/tsc_tweak);
 
-	/* Bzy_MHz */
-	if (has_aperf) {
+	if (DO_BIC(BIC_Bzy_MHz)) {
 		if (has_base_hz)
 			outp += sprintf(outp, "\t%.0f", base_hz / units * t->aperf / t->mperf);
 		else
@@ -624,22 +730,22 @@ int format_counters(struct thread_data *t, struct core_data *c,
 				1.0 * t->tsc / units * t->aperf / t->mperf / interval_float);
 	}
 
-	/* TSC_MHz */
-	outp += sprintf(outp, "\t%.0f", 1.0 * t->tsc/units/interval_float);
+	if (DO_BIC(BIC_TSC_MHz))
+		outp += sprintf(outp, "\t%.0f", 1.0 * t->tsc/units/interval_float);
 
 	if (!debug)
 		goto done;
 
 	/* IRQ */
-	if (do_irq)
+	if (DO_BIC(BIC_IRQ))
 		outp += sprintf(outp, "\t%d", t->irq_count);
 
 	/* SMI */
-	if (do_smi)
+	if (DO_BIC(BIC_SMI))
 		outp += sprintf(outp, "\t%d", t->smi_count);
 
 	/* C1 */
-	if (do_nhm_cstates)
+	if (DO_BIC(BIC_CPU_c1))
 		outp += sprintf(outp, "\t%.2f", 100.0 * t->c1/t->tsc);
 
 	/* Added counters */
@@ -660,15 +766,14 @@ int format_counters(struct thread_data *t, struct core_data *c,
 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
 		goto done;
 
-	if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates)
+	if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates)
 		outp += sprintf(outp, "\t%.2f", 100.0 * c->c3/t->tsc);
-	if (do_nhm_cstates)
+	if (DO_BIC(BIC_CPU_c6))
 		outp += sprintf(outp, "\t%.2f", 100.0 * c->c6/t->tsc);
-	if (do_snb_cstates)
+	if (DO_BIC(BIC_CPU_c7))
 		outp += sprintf(outp, "\t%.2f", 100.0 * c->c7/t->tsc);
 
-
-	if (do_dts)
+	if (DO_BIC(BIC_CoreTmp))
 		outp += sprintf(outp, "\t%d", c->core_temp_c);
 
 	for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
@@ -689,11 +794,11 @@ int format_counters(struct thread_data *t, struct core_data *c,
 		goto done;
 
 	/* PkgTmp */
-	if (do_ptm)
+	if (DO_BIC(BIC_PkgTmp))
 		outp += sprintf(outp, "\t%d", p->pkg_temp_c);
 
 	/* GFXrc6 */
-	if (do_gfx_rc6_ms) {
+	if (DO_BIC(BIC_GFX_rc6)) {
 		if (p->gfx_rc6_ms == -1) {	/* detect GFX counter reset */
 			outp += sprintf(outp, "\t**.**");
 		} else {
@@ -703,7 +808,7 @@ int format_counters(struct thread_data *t, struct core_data *c,
 	}
 
 	/* GFXMHz */
-	if (do_gfx_mhz)
+	if (DO_BIC(BIC_GFXMHz))
 		outp += sprintf(outp, "\t%d", p->gfx_mhz);
 
 	/* Totl%C0, Any%C0 GFX%C0 CPUGFX% */
@@ -737,37 +842,27 @@ int format_counters(struct thread_data *t, struct core_data *c,
 	else
 		fmt8 = "%6.0f**";
 
-	if (do_rapl && !rapl_joules) {
-		if (do_rapl & RAPL_PKG)
-			outp += sprintf(outp, fmt8, p->energy_pkg * rapl_energy_units / interval_float);
-		if (do_rapl & RAPL_CORES_ENERGY_STATUS)
-			outp += sprintf(outp, fmt8, p->energy_cores * rapl_energy_units / interval_float);
-		if (do_rapl & RAPL_GFX)
-			outp += sprintf(outp, fmt8, p->energy_gfx * rapl_energy_units / interval_float);
-		if (do_rapl & RAPL_DRAM)
-			outp += sprintf(outp, fmt8, p->energy_dram * rapl_dram_energy_units / interval_float);
-		if (do_rapl & RAPL_PKG_PERF_STATUS)
-			outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
-		if (do_rapl & RAPL_DRAM_PERF_STATUS)
-			outp += sprintf(outp, fmt8, 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
-	} else if (do_rapl && rapl_joules) {
-		if (do_rapl & RAPL_PKG)
-			outp += sprintf(outp, fmt8,
-					p->energy_pkg * rapl_energy_units);
-		if (do_rapl & RAPL_CORES)
-			outp += sprintf(outp, fmt8,
-					p->energy_cores * rapl_energy_units);
-		if (do_rapl & RAPL_GFX)
-			outp += sprintf(outp, fmt8,
-					p->energy_gfx * rapl_energy_units);
-		if (do_rapl & RAPL_DRAM)
-			outp += sprintf(outp, fmt8,
-					p->energy_dram * rapl_dram_energy_units);
-		if (do_rapl & RAPL_PKG_PERF_STATUS)
-			outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
-		if (do_rapl & RAPL_DRAM_PERF_STATUS)
-			outp += sprintf(outp, fmt8, 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
-	}
+	if (DO_BIC(BIC_PkgWatt))
+		outp += sprintf(outp, fmt8, p->energy_pkg * rapl_energy_units / interval_float);
+	if (DO_BIC(BIC_CorWatt))
+		outp += sprintf(outp, fmt8, p->energy_cores * rapl_energy_units / interval_float);
+	if (DO_BIC(BIC_GFXWatt))
+		outp += sprintf(outp, fmt8, p->energy_gfx * rapl_energy_units / interval_float);
+	if (DO_BIC(BIC_RAMWatt))
+		outp += sprintf(outp, fmt8, p->energy_dram * rapl_dram_energy_units / interval_float);
+	if (DO_BIC(BIC_Pkg_J))
+		outp += sprintf(outp, fmt8, p->energy_pkg * rapl_energy_units);
+	if (DO_BIC(BIC_Cor_J))
+		outp += sprintf(outp, fmt8, p->energy_cores * rapl_energy_units);
+	if (DO_BIC(BIC_GFX_J))
+		outp += sprintf(outp, fmt8, p->energy_gfx * rapl_energy_units);
+	if (DO_BIC(BIC_RAM_J))
+		outp += sprintf(outp, fmt8, p->energy_dram * rapl_dram_energy_units);
+	if (DO_BIC(BIC_PKG__))
+		outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
+	if (DO_BIC(BIC_RAM__))
+		outp += sprintf(outp, fmt8, 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
+
 	for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
 		if (mp->format == FORMAT_RAW) {
 			if (mp->width == 32)
@@ -921,7 +1016,7 @@ delta_thread(struct thread_data *new, struct thread_data *old,
 
 	old->c1 = new->c1 - old->c1;
 
-	if (has_aperf) {
+	if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz)) {
 		if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) {
 			old->aperf = new->aperf - old->aperf;
 			old->mperf = new->mperf - old->mperf;
@@ -957,10 +1052,10 @@ delta_thread(struct thread_data *new, struct thread_data *old,
 		old->mperf = 1;	/* divide by 0 protection */
 	}
 
-	if (do_irq)
+	if (DO_BIC(BIC_IRQ))
 		old->irq_count = new->irq_count - old->irq_count;
 
-	if (do_smi)
+	if (DO_BIC(BIC_SMI))
 		old->smi_count = new->smi_count - old->smi_count;
 
 	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
@@ -1217,7 +1312,7 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 retry:
 	t->tsc = rdtsc();	/* we are running on local CPU of interest */
 
-	if (has_aperf) {
+	if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz)) {
 		unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time;
 
 		/*
@@ -1273,9 +1368,9 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 		t->mperf = t->mperf * aperf_mperf_multiplier;
 	}
 
-	if (do_irq)
+	if (DO_BIC(BIC_IRQ))
 		t->irq_count = irqs_per_cpu[cpu];
-	if (do_smi) {
+	if (DO_BIC(BIC_SMI)) {
 		if (get_msr(cpu, MSR_SMI_COUNT, &msr))
 			return -5;
 		t->smi_count = msr & 0xFFFFFFFF;
@@ -1296,12 +1391,12 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
 		return 0;
 
-	if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates) {
+	if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates) {
 		if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3))
 			return -6;
 	}
 
-	if (do_nhm_cstates && !do_knl_cstates) {
+	if (DO_BIC(BIC_CPU_c6) && !do_knl_cstates) {
 		if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6))
 			return -7;
 	} else if (do_knl_cstates) {
@@ -1309,11 +1404,11 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 			return -7;
 	}
 
-	if (do_snb_cstates)
+	if (DO_BIC(BIC_CPU_c7))
 		if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7))
 			return -8;
 
-	if (do_dts) {
+	if (DO_BIC(BIC_CoreTmp)) {
 		if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
 			return -9;
 		c->core_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
@@ -1388,16 +1483,16 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 			return -16;
 		p->rapl_dram_perf_status = msr & 0xFFFFFFFF;
 	}
-	if (do_ptm) {
+	if (DO_BIC(BIC_PkgTmp)) {
 		if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
 			return -17;
 		p->pkg_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
 	}
 
-	if (do_gfx_rc6_ms)
+	if (DO_BIC(BIC_GFX_rc6))
 		p->gfx_rc6_ms = gfx_cur_rc6_ms;
 
-	if (do_gfx_mhz)
+	if (DO_BIC(BIC_GFXMHz))
 		p->gfx_mhz = gfx_cur_mhz;
 
 	for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
@@ -2155,10 +2250,10 @@ int snapshot_proc_sysfs_files(void)
 	if (snapshot_proc_interrupts())
 		return 1;
 
-	if (do_gfx_rc6_ms)
+	if (DO_BIC(BIC_GFX_rc6))
 		snapshot_gfx_rc6_ms();
 
-	if (do_gfx_mhz)
+	if (DO_BIC(BIC_GFXMHz))
 		snapshot_gfx_mhz();
 
 	return 0;
@@ -2794,15 +2889,39 @@ void rapl_probe(unsigned int family, unsigned int model)
 	case INTEL_FAM6_BROADWELL_CORE:	/* BDW */
 	case INTEL_FAM6_BROADWELL_GT3E:	/* BDW */
 		do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_GFX | RAPL_PKG_POWER_INFO;
+		if (rapl_joules) {
+			BIC_PRESENT(BIC_Pkg_J);
+			BIC_PRESENT(BIC_Cor_J);
+			BIC_PRESENT(BIC_GFX_J);
+		} else {
+			BIC_PRESENT(BIC_PkgWatt);
+			BIC_PRESENT(BIC_CorWatt);
+			BIC_PRESENT(BIC_GFXWatt);
+		}
 		break;
 	case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
 		do_rapl = RAPL_PKG | RAPL_PKG_POWER_INFO;
+		if (rapl_joules)
+			BIC_PRESENT(BIC_Pkg_J);
+		else
+			BIC_PRESENT(BIC_PkgWatt);
 		break;
 	case INTEL_FAM6_SKYLAKE_MOBILE:	/* SKL */
 	case INTEL_FAM6_SKYLAKE_DESKTOP:	/* SKL */
 	case INTEL_FAM6_KABYLAKE_MOBILE:	/* KBL */
 	case INTEL_FAM6_KABYLAKE_DESKTOP:	/* KBL */
 		do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
+		BIC_PRESENT(BIC_PKG__);
+		BIC_PRESENT(BIC_RAM__);
+		if (rapl_joules) {
+			BIC_PRESENT(BIC_Pkg_J);
+			BIC_PRESENT(BIC_Cor_J);
+			BIC_PRESENT(BIC_RAM_J);
+		} else {
+			BIC_PRESENT(BIC_PkgWatt);
+			BIC_PRESENT(BIC_CorWatt);
+			BIC_PRESENT(BIC_RAMWatt);
+		}
 		break;
 	case INTEL_FAM6_HASWELL_X:	/* HSX */
 	case INTEL_FAM6_BROADWELL_X:	/* BDX */
@@ -2811,17 +2930,55 @@ void rapl_probe(unsigned int family, unsigned int model)
 	case INTEL_FAM6_XEON_PHI_KNL:	/* KNL */
 	case INTEL_FAM6_XEON_PHI_KNM:
 		do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
+		BIC_PRESENT(BIC_PKG__);
+		BIC_PRESENT(BIC_RAM__);
+		if (rapl_joules) {
+			BIC_PRESENT(BIC_Pkg_J);
+			BIC_PRESENT(BIC_RAM_J);
+		} else {
+			BIC_PRESENT(BIC_PkgWatt);
+			BIC_PRESENT(BIC_RAMWatt);
+		}
 		break;
 	case INTEL_FAM6_SANDYBRIDGE_X:
 	case INTEL_FAM6_IVYBRIDGE_X:
 		do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO;
+		BIC_PRESENT(BIC_PKG__);
+		BIC_PRESENT(BIC_RAM__);
+		if (rapl_joules) {
+			BIC_PRESENT(BIC_Pkg_J);
+			BIC_PRESENT(BIC_Cor_J);
+			BIC_PRESENT(BIC_RAM_J);
+		} else {
+			BIC_PRESENT(BIC_PkgWatt);
+			BIC_PRESENT(BIC_CorWatt);
+			BIC_PRESENT(BIC_RAMWatt);
+		}
 		break;
 	case INTEL_FAM6_ATOM_SILVERMONT1:	/* BYT */
 	case INTEL_FAM6_ATOM_SILVERMONT2:	/* AVN */
 		do_rapl = RAPL_PKG | RAPL_CORES;
+		if (rapl_joules) {
+			BIC_PRESENT(BIC_Pkg_J);
+			BIC_PRESENT(BIC_Cor_J);
+		} else {
+			BIC_PRESENT(BIC_PkgWatt);
+			BIC_PRESENT(BIC_CorWatt);
+		}
 		break;
 	case INTEL_FAM6_ATOM_DENVERTON:	/* DNV */
 		do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO | RAPL_CORES_ENERGY_STATUS;
+		BIC_PRESENT(BIC_PKG__);
+		BIC_PRESENT(BIC_RAM__);
+		if (rapl_joules) {
+			BIC_PRESENT(BIC_Pkg_J);
+			BIC_PRESENT(BIC_Cor_J);
+			BIC_PRESENT(BIC_RAM_J);
+		} else {
+			BIC_PRESENT(BIC_PkgWatt);
+			BIC_PRESENT(BIC_CorWatt);
+			BIC_PRESENT(BIC_RAMWatt);
+		}
 		break;
 	default:
 		return;
@@ -3398,8 +3555,17 @@ void process_cpuid()
 
 	__cpuid(0x6, eax, ebx, ecx, edx);
 	has_aperf = ecx & (1 << 0);
+	if (has_aperf) {
+		BIC_PRESENT(BIC_Avg_MHz);
+		BIC_PRESENT(BIC_Busy);
+		BIC_PRESENT(BIC_Bzy_MHz);
+	}
 	do_dts = eax & (1 << 0);
+	if (do_dts)
+		BIC_PRESENT(BIC_CoreTmp);
 	do_ptm = eax & (1 << 6);
+	if (do_ptm)
+		BIC_PRESENT(BIC_PkgTmp);
 	has_hwp = eax & (1 << 7);
 	has_hwp_notify = eax & (1 << 8);
 	has_hwp_activity_window = eax & (1 << 9);
@@ -3497,8 +3663,21 @@ void process_cpuid()
 	if (has_aperf)
 		aperf_mperf_multiplier = get_aperf_mperf_multiplier(family, model);
 
-	do_nhm_platform_info = do_nhm_cstates = do_smi = probe_nhm_msrs(family, model);
+	BIC_PRESENT(BIC_IRQ);
+	BIC_PRESENT(BIC_TSC_MHz);
+
+	if (probe_nhm_msrs(family, model)) {
+		do_nhm_platform_info = 1;
+		BIC_PRESENT(BIC_CPU_c1);
+		BIC_PRESENT(BIC_CPU_c3);
+		BIC_PRESENT(BIC_CPU_c6);
+		BIC_PRESENT(BIC_SMI);
+	}
 	do_snb_cstates = has_snb_msrs(family, model);
+
+	if (do_snb_cstates)
+		BIC_PRESENT(BIC_CPU_c7);
+
 	do_irtl_snb = has_snb_msrs(family, model);
 	do_pc2 = do_snb_cstates && (pkg_cstate_limit >= PCL__2);
 	do_pc3 = (pkg_cstate_limit >= PCL__3);
@@ -3522,9 +3701,11 @@ void process_cpuid()
 	if (has_skl_msrs(family, model))
 		calculate_tsc_tweak();
 
-	do_gfx_rc6_ms = !access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK);
+	if (!access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK))
+		BIC_PRESENT(BIC_GFX_rc6);
 
-	do_gfx_mhz = !access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK);
+	if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK))
+		BIC_PRESENT(BIC_GFXMHz);
 
 	return;
 }
@@ -3583,7 +3764,7 @@ void topology_probe()
 	topo.max_cpu_num = 0;
 	for_all_proc_cpus(count_cpus);
 	if (!summary_only && topo.num_cpus > 1)
-		show_cpu = 1;
+		BIC_PRESENT(BIC_CPU);
 
 	if (debug > 1)
 		fprintf(outf, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num);
@@ -3644,14 +3825,14 @@ void topology_probe()
 		fprintf(outf, "max_core_id %d, sizing for %d cores per package\n",
 			max_core_id, topo.num_cores_per_pkg);
 	if (debug && !summary_only && topo.num_cores_per_pkg > 1)
-		show_core = 1;
+		BIC_PRESENT(BIC_Core);
 
 	topo.num_packages = max_package_id + 1;
 	if (debug > 1)
 		fprintf(outf, "max_package_id %d, sizing for %d packages\n",
 			max_package_id, topo.num_packages);
 	if (debug && !summary_only && topo.num_packages > 1)
-		show_pkg = 1;
+		BIC_PRESENT(BIC_Package);
 
 	topo.num_threads_per_core = max_siblings;
 	if (debug > 1)
@@ -4045,6 +4226,40 @@ void parse_add_command(char *add_command)
 		exit(1);
 	}
 }
+/*
+ * HIDE_LIST - hide this list of counters, show the rest [default]
+ * SHOW_LIST - show this list of counters, hide the rest
+ */
+enum show_hide_mode { SHOW_LIST, HIDE_LIST } global_show_hide_mode = HIDE_LIST;
+
+int shown;
+/*
+ * parse_show_hide() - process cmdline to set default counter action
+ */
+void parse_show_hide(char *optarg, enum show_hide_mode new_mode)
+{
+	/*
+	 * --show: show only those specified
+	 *  The 1st invocation will clear and replace the enabled mask
+	 *  subsequent invocations can add to it.
+	 */
+	if (new_mode == SHOW_LIST) {
+		if (shown == 0)
+			bic_enabled = bic_lookup(optarg);
+		else
+			bic_enabled |= bic_lookup(optarg);
+		shown = 1;
+
+		return;
+	}
+
+	/*
+	 * --hide: do not show those specified
+	 *  multiple invocations simply clear more bits in enabled mask
+	 */
+	bic_enabled &= ~bic_lookup(optarg);
+}
+
 void cmdline(int argc, char **argv)
 {
 	int opt;
@@ -4055,10 +4270,12 @@ void cmdline(int argc, char **argv)
 		{"debug",	no_argument,		0, 'd'},
 		{"interval",	required_argument,	0, 'i'},
 		{"help",	no_argument,		0, 'h'},
+		{"hide",	required_argument,	0, 'H'},	// meh, -h taken by --help
 		{"Joules",	no_argument,		0, 'J'},
 		{"out",		required_argument,	0, 'o'},
 		{"Package",	no_argument,		0, 'p'},
 		{"processor",	no_argument,		0, 'p'},
+		{"show",	required_argument,	0, 's'},
 		{"Summary",	no_argument,		0, 'S'},
 		{"TCC",		required_argument,	0, 'T'},
 		{"version",	no_argument,		0, 'v' },
@@ -4079,6 +4296,9 @@ void cmdline(int argc, char **argv)
 		case 'd':
 			debug++;
 			break;
+		case 'H':
+			parse_show_hide(optarg, HIDE_LIST);
+			break;
 		case 'h':
 		default:
 			help();
@@ -4109,6 +4329,9 @@ void cmdline(int argc, char **argv)
 		case 'p':
 			show_core_only++;
 			break;
+		case 's':
+			parse_show_hide(optarg, SHOW_LIST);
+			break;
 		case 'S':
 			summary_only++;
 			break;

From cf4cbe5314884c3123fe4ca137e9d750b6e2b8c9 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Sun, 1 Jan 2017 13:08:33 -0500
Subject: [PATCH 0227/1911] tools/power turbostat: BYT does not have
 MSR_MISC_PWR_MGMT

and so --debug fails with:

turbostat: msr 1 offset 0x1aa read failed: Input/output error

It seems that baytrail, and airmont do not have this MSR.
It is included in subsequent Goldmont Atom.

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index fff280b50af0ce..fdf0273465fab8 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -70,6 +70,7 @@ unsigned int units = 1000000;	/* MHz etc */
 unsigned int genuine_intel;
 unsigned int has_invariant_tsc;
 unsigned int do_nhm_platform_info;
+unsigned int no_MSR_MISC_PWR_MGMT;
 unsigned int aperf_mperf_multiplier = 1;
 double bclk;
 double base_hz;
@@ -330,7 +331,7 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr)
 	retval = pread(get_msr_fd(cpu), msr, sizeof(*msr), offset);
 
 	if (retval != sizeof *msr)
-		err(-1, "msr %d offset 0x%llx read failed", cpu, (unsigned long long)offset);
+		err(-1, "cpu%d: msr offset 0x%llx read failed", cpu, (unsigned long long)offset);
 
 	return 0;
 }
@@ -2384,6 +2385,8 @@ void check_permissions()
  * MSR_PLATFORM_INFO               0x000000ce
  * MSR_NHM_SNB_PKG_CST_CFG_CTL     0x000000e2
  *
+ * MSR_MISC_PWR_MGMT               0x000001aa
+ *
  * MSR_PKG_C3_RESIDENCY            0x000003f8
  * MSR_PKG_C6_RESIDENCY            0x000003f9
  * MSR_CORE_C3_RESIDENCY           0x000003fc
@@ -2440,11 +2443,13 @@ int probe_nhm_msrs(unsigned int family, unsigned int model)
 		pkg_cstate_limits = skx_pkg_cstate_limits;
 		break;
 	case INTEL_FAM6_ATOM_SILVERMONT1:	/* BYT */
+		no_MSR_MISC_PWR_MGMT = 1;
 	case INTEL_FAM6_ATOM_SILVERMONT2:	/* AVN */
 		pkg_cstate_limits = slv_pkg_cstate_limits;
 		break;
 	case INTEL_FAM6_ATOM_AIRMONT:	/* AMT */
 		pkg_cstate_limits = amt_pkg_cstate_limits;
+		no_MSR_MISC_PWR_MGMT = 1;
 		break;
 	case INTEL_FAM6_XEON_PHI_KNL:	/* PHI */
 	case INTEL_FAM6_XEON_PHI_KNM:
@@ -3481,6 +3486,9 @@ void decode_misc_pwr_mgmt_msr(void)
 	if (!do_nhm_platform_info)
 		return;
 
+	if (no_MSR_MISC_PWR_MGMT)
+		return;
+
 	if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr))
 		fprintf(outf, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB %sable-OOB)\n",
 			base_cpu, msr,
@@ -4061,7 +4069,7 @@ int get_and_dump_counters(void)
 }
 
 void print_version() {
-	fprintf(outf, "turbostat version 4.16 24 Dec 2016"
+	fprintf(outf, "turbostat version 4.17 1 Jan 2017"
 		" - Len Brown <lenb@kernel.org>\n");
 }
 

From 71616c8e936a6dd541f0627d7bf4ff09971d8ccb Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Sat, 7 Jan 2017 22:37:48 -0500
Subject: [PATCH 0228/1911] tools/power turbostat: decode Baytrail CC6 and MC6
 demotion configuration

with --debug, see:

cpu0: MSR_CC6_DEMOTION_POLICY_CONFIG: 0x00000000 (DISable-CC6-Demotion)
cpu0: MSR_MC6_DEMOTION_POLICY_CONFIG: 0x00000000 (DISable-MC6-Demotion)

Note that the hardware default is to enable demotion,
and Linux started clearing these registers in 3.17.

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 42 +++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index fdf0273465fab8..1b762f67e3e239 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -3262,6 +3262,27 @@ int has_snb_msrs(unsigned int family, unsigned int model)
 	return 0;
 }
 
+/*
+ * SLV client has supporet for unique MSRs:
+ *
+ * MSR_CC6_DEMOTION_POLICY_CONFIG
+ * MSR_MC6_DEMOTION_POLICY_CONFIG
+ */
+
+int has_slv_msrs(unsigned int family, unsigned int model)
+{
+	if (!genuine_intel)
+		return 0;
+
+	switch (model) {
+	case INTEL_FAM6_ATOM_SILVERMONT1:
+	case INTEL_FAM6_ATOM_MERRIFIELD:
+	case INTEL_FAM6_ATOM_MOOREFIELD:
+		return 1;
+	}
+	return 0;
+}
+
 /*
  * HSW adds support for additional MSRs:
  *
@@ -3496,6 +3517,24 @@ void decode_misc_pwr_mgmt_msr(void)
 			msr & (1 << 1) ? "EN" : "DIS",
 			msr & (1 << 8) ? "EN" : "DIS");
 }
+/*
+ * Decode MSR_CC6_DEMOTION_POLICY_CONFIG, MSR_MC6_DEMOTION_POLICY_CONFIG
+ *
+ * This MSRs are present on Silvermont processors,
+ * Intel Atom processor E3000 series (Baytrail), and friends.
+ */
+void decode_c6_demotion_policy_msr(void)
+{
+	unsigned long long msr;
+
+	if (!get_msr(base_cpu, MSR_CC6_DEMOTION_POLICY_CONFIG, &msr))
+		fprintf(outf, "cpu%d: MSR_CC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-CC6-Demotion)\n",
+			base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");
+
+	if (!get_msr(base_cpu, MSR_MC6_DEMOTION_POLICY_CONFIG, &msr))
+		fprintf(outf, "cpu%d: MSR_MC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-MC6-Demotion)\n",
+			base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");
+}
 
 void process_cpuid()
 {
@@ -3700,6 +3739,9 @@ void process_cpuid()
 	if (debug)
 		decode_misc_pwr_mgmt_msr();
 
+	if (debug && has_slv_msrs(family, model))
+		decode_c6_demotion_policy_msr();
+
 	rapl_probe(family, model);
 	perf_limit_reasons_probe(family, model);
 

From 8f6196c192f6393823e632bfb927ff1572369875 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Sat, 7 Jan 2017 22:40:23 -0500
Subject: [PATCH 0229/1911] tools/power turbostat: Baytrail: remove debug line
 in quiet mode

Without --debug, a debug line was printed on Baytrail:

SLM BCLK: 83.3 Mhz

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 1b762f67e3e239..9b35c9bb04890b 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -3388,7 +3388,8 @@ double slm_bclk(void)
 	}
 	freq = slm_freq_table[i];
 
-	fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq);
+	if (debug)
+		fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq);
 
 	return freq;
 }

From f2642888476d7faefa9695bbebb2abbaeb3685d8 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Sat, 7 Jan 2017 23:13:15 -0500
Subject: [PATCH 0230/1911] tools/power turbostat: update
 MSR_PKG_CST_CONFIG_CONTROL decoding

AMT value 0 is unlimited, not PC0

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 9b35c9bb04890b..aedfaddbad30b4 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -1534,7 +1534,7 @@ int nhm_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV,
 int snb_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
 int hsw_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
 int slv_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
-int amt_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
+int amt_pkg_cstate_limits[16] = {PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
 int phi_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
 int bxt_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
 int skx_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};

From 9fc5cf6e14fc1db40ea8a1c10061bb9586e78585 Mon Sep 17 00:00:00 2001
From: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Date: Wed, 8 Feb 2017 14:46:24 +0100
Subject: [PATCH 0231/1911] platform/x86: fujitsu-laptop: clearly denote
 backlight-related symbols
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Unify naming for all backlight-related functions, structures, variables
and constants by using a consistent "_bl"/"_BL" suffix/infix.  Adjust
indentation to make checkpatch happy.

Signed-off-by: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
[kempniu: rebase patch, rewrite commit message]
Signed-off-by: Michał Kępień <kernel@kempniu.pl>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Jonathan Woithe <jwoithe@just42.net>
---
 drivers/platform/x86/fujitsu-laptop.c | 214 +++++++++++++-------------
 1 file changed, 107 insertions(+), 107 deletions(-)

diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c
index 2b218b1d13e55d..e1737b9d9d95db 100644
--- a/drivers/platform/x86/fujitsu-laptop.c
+++ b/drivers/platform/x86/fujitsu-laptop.c
@@ -78,10 +78,10 @@
 
 #define FUJITSU_LCD_N_LEVELS 8
 
-#define ACPI_FUJITSU_CLASS              "fujitsu"
-#define ACPI_FUJITSU_HID                "FUJ02B1"
-#define ACPI_FUJITSU_DRIVER_NAME	"Fujitsu laptop FUJ02B1 ACPI brightness driver"
-#define ACPI_FUJITSU_DEVICE_NAME        "Fujitsu FUJ02B1"
+#define ACPI_FUJITSU_CLASS		"fujitsu"
+#define ACPI_FUJITSU_BL_HID		"FUJ02B1"
+#define ACPI_FUJITSU_BL_DRIVER_NAME	"Fujitsu laptop FUJ02B1 ACPI brightness driver"
+#define ACPI_FUJITSU_BL_DEVICE_NAME	"Fujitsu FUJ02B1"
 #define ACPI_FUJITSU_HOTKEY_HID 	"FUJ02E3"
 #define ACPI_FUJITSU_HOTKEY_DRIVER_NAME "Fujitsu laptop FUJ02E3 ACPI hotkeys driver"
 #define ACPI_FUJITSU_HOTKEY_DEVICE_NAME "Fujitsu FUJ02E3"
@@ -136,7 +136,7 @@
 #endif
 
 /* Device controlling the backlight and associated keys */
-struct fujitsu_t {
+struct fujitsu_bl {
 	acpi_handle acpi_handle;
 	struct acpi_device *dev;
 	struct input_dev *input;
@@ -150,7 +150,7 @@ struct fujitsu_t {
 	unsigned int brightness_level;
 };
 
-static struct fujitsu_t *fujitsu;
+static struct fujitsu_bl *fujitsu_bl;
 static int use_alt_lcd_levels = -1;
 static int disable_brightness_adjust = -1;
 
@@ -222,7 +222,7 @@ static struct led_classdev eco_led = {
 static u32 dbg_level = 0x03;
 #endif
 
-static void acpi_fujitsu_notify(struct acpi_device *device, u32 event);
+static void acpi_fujitsu_bl_notify(struct acpi_device *device, u32 event);
 
 /* Fujitsu ACPI interface function */
 
@@ -373,10 +373,10 @@ static int set_lcd_level(int level)
 	vdbg_printk(FUJLAPTOP_DBG_TRACE, "set lcd level via SBLL [%d]\n",
 		    level);
 
-	if (level < 0 || level >= fujitsu->max_brightness)
+	if (level < 0 || level >= fujitsu_bl->max_brightness)
 		return -EINVAL;
 
-	status = acpi_get_handle(fujitsu->acpi_handle, "SBLL", &handle);
+	status = acpi_get_handle(fujitsu_bl->acpi_handle, "SBLL", &handle);
 	if (ACPI_FAILURE(status)) {
 		vdbg_printk(FUJLAPTOP_DBG_ERROR, "SBLL not present\n");
 		return -ENODEV;
@@ -398,10 +398,10 @@ static int set_lcd_level_alt(int level)
 	vdbg_printk(FUJLAPTOP_DBG_TRACE, "set lcd level via SBL2 [%d]\n",
 		    level);
 
-	if (level < 0 || level >= fujitsu->max_brightness)
+	if (level < 0 || level >= fujitsu_bl->max_brightness)
 		return -EINVAL;
 
-	status = acpi_get_handle(fujitsu->acpi_handle, "SBL2", &handle);
+	status = acpi_get_handle(fujitsu_bl->acpi_handle, "SBL2", &handle);
 	if (ACPI_FAILURE(status)) {
 		vdbg_printk(FUJLAPTOP_DBG_ERROR, "SBL2 not present\n");
 		return -ENODEV;
@@ -421,19 +421,19 @@ static int get_lcd_level(void)
 
 	vdbg_printk(FUJLAPTOP_DBG_TRACE, "get lcd level via GBLL\n");
 
-	status =
-	    acpi_evaluate_integer(fujitsu->acpi_handle, "GBLL", NULL, &state);
+	status = acpi_evaluate_integer(fujitsu_bl->acpi_handle, "GBLL", NULL,
+				       &state);
 	if (ACPI_FAILURE(status))
 		return 0;
 
-	fujitsu->brightness_level = state & 0x0fffffff;
+	fujitsu_bl->brightness_level = state & 0x0fffffff;
 
 	if (state & 0x80000000)
-		fujitsu->brightness_changed = 1;
+		fujitsu_bl->brightness_changed = 1;
 	else
-		fujitsu->brightness_changed = 0;
+		fujitsu_bl->brightness_changed = 0;
 
-	return fujitsu->brightness_level;
+	return fujitsu_bl->brightness_level;
 }
 
 static int get_max_brightness(void)
@@ -443,14 +443,14 @@ static int get_max_brightness(void)
 
 	vdbg_printk(FUJLAPTOP_DBG_TRACE, "get max lcd level via RBLL\n");
 
-	status =
-	    acpi_evaluate_integer(fujitsu->acpi_handle, "RBLL", NULL, &state);
+	status = acpi_evaluate_integer(fujitsu_bl->acpi_handle, "RBLL", NULL,
+				       &state);
 	if (ACPI_FAILURE(status))
 		return -1;
 
-	fujitsu->max_brightness = state;
+	fujitsu_bl->max_brightness = state;
 
-	return fujitsu->max_brightness;
+	return fujitsu_bl->max_brightness;
 }
 
 /* Backlight device stuff */
@@ -483,7 +483,7 @@ static int bl_update_status(struct backlight_device *b)
 	return ret;
 }
 
-static const struct backlight_ops fujitsubl_ops = {
+static const struct backlight_ops fujitsu_bl_ops = {
 	.get_brightness = bl_get_brightness,
 	.update_status = bl_update_status,
 };
@@ -511,7 +511,7 @@ show_brightness_changed(struct device *dev,
 
 	int ret;
 
-	ret = fujitsu->brightness_changed;
+	ret = fujitsu_bl->brightness_changed;
 	if (ret < 0)
 		return ret;
 
@@ -539,7 +539,7 @@ static ssize_t store_lcd_level(struct device *dev,
 	int level, ret;
 
 	if (sscanf(buf, "%i", &level) != 1
-	    || (level < 0 || level >= fujitsu->max_brightness))
+	    || (level < 0 || level >= fujitsu_bl->max_brightness))
 		return -EINVAL;
 
 	if (use_alt_lcd_levels)
@@ -644,25 +644,25 @@ static void __init dmi_check_cb_common(const struct dmi_system_id *id)
 static int __init dmi_check_cb_s6410(const struct dmi_system_id *id)
 {
 	dmi_check_cb_common(id);
-	fujitsu->keycode1 = KEY_SCREENLOCK;	/* "Lock" */
-	fujitsu->keycode2 = KEY_HELP;	/* "Mobility Center" */
+	fujitsu_bl->keycode1 = KEY_SCREENLOCK;	/* "Lock" */
+	fujitsu_bl->keycode2 = KEY_HELP;	/* "Mobility Center" */
 	return 1;
 }
 
 static int __init dmi_check_cb_s6420(const struct dmi_system_id *id)
 {
 	dmi_check_cb_common(id);
-	fujitsu->keycode1 = KEY_SCREENLOCK;	/* "Lock" */
-	fujitsu->keycode2 = KEY_HELP;	/* "Mobility Center" */
+	fujitsu_bl->keycode1 = KEY_SCREENLOCK;	/* "Lock" */
+	fujitsu_bl->keycode2 = KEY_HELP;	/* "Mobility Center" */
 	return 1;
 }
 
 static int __init dmi_check_cb_p8010(const struct dmi_system_id *id)
 {
 	dmi_check_cb_common(id);
-	fujitsu->keycode1 = KEY_HELP;	/* "Support" */
-	fujitsu->keycode3 = KEY_SWITCHVIDEOMODE;	/* "Presentation" */
-	fujitsu->keycode4 = KEY_WWW;	/* "Internet" */
+	fujitsu_bl->keycode1 = KEY_HELP;		/* "Support" */
+	fujitsu_bl->keycode3 = KEY_SWITCHVIDEOMODE;	/* "Presentation" */
+	fujitsu_bl->keycode4 = KEY_WWW;			/* "Internet" */
 	return 1;
 }
 
@@ -693,7 +693,7 @@ static const struct dmi_system_id fujitsu_dmi_table[] __initconst = {
 
 /* ACPI device for LCD brightness control */
 
-static int acpi_fujitsu_add(struct acpi_device *device)
+static int acpi_fujitsu_bl_add(struct acpi_device *device)
 {
 	int state = 0;
 	struct input_dev *input;
@@ -702,22 +702,22 @@ static int acpi_fujitsu_add(struct acpi_device *device)
 	if (!device)
 		return -EINVAL;
 
-	fujitsu->acpi_handle = device->handle;
-	sprintf(acpi_device_name(device), "%s", ACPI_FUJITSU_DEVICE_NAME);
+	fujitsu_bl->acpi_handle = device->handle;
+	sprintf(acpi_device_name(device), "%s", ACPI_FUJITSU_BL_DEVICE_NAME);
 	sprintf(acpi_device_class(device), "%s", ACPI_FUJITSU_CLASS);
-	device->driver_data = fujitsu;
+	device->driver_data = fujitsu_bl;
 
-	fujitsu->input = input = input_allocate_device();
+	fujitsu_bl->input = input = input_allocate_device();
 	if (!input) {
 		error = -ENOMEM;
 		goto err_stop;
 	}
 
-	snprintf(fujitsu->phys, sizeof(fujitsu->phys),
+	snprintf(fujitsu_bl->phys, sizeof(fujitsu_bl->phys),
 		 "%s/video/input0", acpi_device_hid(device));
 
 	input->name = acpi_device_name(device);
-	input->phys = fujitsu->phys;
+	input->phys = fujitsu_bl->phys;
 	input->id.bustype = BUS_HOST;
 	input->id.product = 0x06;
 	input->dev.parent = &device->dev;
@@ -730,7 +730,7 @@ static int acpi_fujitsu_add(struct acpi_device *device)
 	if (error)
 		goto err_free_input_dev;
 
-	error = acpi_bus_update_power(fujitsu->acpi_handle, &state);
+	error = acpi_bus_update_power(fujitsu_bl->acpi_handle, &state);
 	if (error) {
 		pr_err("Error reading power state\n");
 		goto err_unregister_input_dev;
@@ -740,7 +740,7 @@ static int acpi_fujitsu_add(struct acpi_device *device)
 	       acpi_device_name(device), acpi_device_bid(device),
 	       !device->power.state ? "on" : "off");
 
-	fujitsu->dev = device;
+	fujitsu_bl->dev = device;
 
 	if (acpi_has_method(device->handle, METHOD_NAME__INI)) {
 		vdbg_printk(FUJLAPTOP_DBG_INFO, "Invoking _INI\n");
@@ -758,7 +758,7 @@ static int acpi_fujitsu_add(struct acpi_device *device)
 		    use_alt_lcd_levels, disable_brightness_adjust);
 
 	if (get_max_brightness() <= 0)
-		fujitsu->max_brightness = FUJITSU_LCD_N_LEVELS;
+		fujitsu_bl->max_brightness = FUJITSU_LCD_N_LEVELS;
 	get_lcd_level();
 
 	return 0;
@@ -772,38 +772,38 @@ static int acpi_fujitsu_add(struct acpi_device *device)
 	return error;
 }
 
-static int acpi_fujitsu_remove(struct acpi_device *device)
+static int acpi_fujitsu_bl_remove(struct acpi_device *device)
 {
-	struct fujitsu_t *fujitsu = acpi_driver_data(device);
-	struct input_dev *input = fujitsu->input;
+	struct fujitsu_bl *fujitsu_bl = acpi_driver_data(device);
+	struct input_dev *input = fujitsu_bl->input;
 
 	input_unregister_device(input);
 
-	fujitsu->acpi_handle = NULL;
+	fujitsu_bl->acpi_handle = NULL;
 
 	return 0;
 }
 
 /* Brightness notify */
 
-static void acpi_fujitsu_notify(struct acpi_device *device, u32 event)
+static void acpi_fujitsu_bl_notify(struct acpi_device *device, u32 event)
 {
 	struct input_dev *input;
 	int keycode;
 	int oldb, newb;
 
-	input = fujitsu->input;
+	input = fujitsu_bl->input;
 
 	switch (event) {
 	case ACPI_FUJITSU_NOTIFY_CODE1:
 		keycode = 0;
-		oldb = fujitsu->brightness_level;
+		oldb = fujitsu_bl->brightness_level;
 		get_lcd_level();
-		newb = fujitsu->brightness_level;
+		newb = fujitsu_bl->brightness_level;
 
 		vdbg_printk(FUJLAPTOP_DBG_TRACE,
 			    "brightness button event [%i -> %i (%i)]\n",
-			    oldb, newb, fujitsu->brightness_changed);
+			    oldb, newb, fujitsu_bl->brightness_changed);
 
 		if (oldb < newb) {
 			if (disable_brightness_adjust != 1) {
@@ -882,11 +882,11 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
 	input->dev.parent = &device->dev;
 
 	set_bit(EV_KEY, input->evbit);
-	set_bit(fujitsu->keycode1, input->keybit);
-	set_bit(fujitsu->keycode2, input->keybit);
-	set_bit(fujitsu->keycode3, input->keybit);
-	set_bit(fujitsu->keycode4, input->keybit);
-	set_bit(fujitsu->keycode5, input->keybit);
+	set_bit(fujitsu_bl->keycode1, input->keybit);
+	set_bit(fujitsu_bl->keycode2, input->keybit);
+	set_bit(fujitsu_bl->keycode3, input->keybit);
+	set_bit(fujitsu_bl->keycode4, input->keybit);
+	set_bit(fujitsu_bl->keycode5, input->keybit);
 	set_bit(KEY_TOUCHPAD_TOGGLE, input->keybit);
 	set_bit(KEY_UNKNOWN, input->keybit);
 
@@ -937,7 +937,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
 
 #if IS_ENABLED(CONFIG_LEDS_CLASS)
 	if (call_fext_func(FUNC_LEDS, 0x0, 0x0, 0x0) & LOGOLAMP_POWERON) {
-		result = led_classdev_register(&fujitsu->pf_device->dev,
+		result = led_classdev_register(&fujitsu_bl->pf_device->dev,
 						&logolamp_led);
 		if (result == 0) {
 			fujitsu_hotkey->logolamp_registered = 1;
@@ -949,7 +949,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
 
 	if ((call_fext_func(FUNC_LEDS, 0x0, 0x0, 0x0) & KEYBOARD_LAMPS) &&
 	   (call_fext_func(FUNC_BUTTONS, 0x0, 0x0, 0x0) == 0x0)) {
-		result = led_classdev_register(&fujitsu->pf_device->dev,
+		result = led_classdev_register(&fujitsu_bl->pf_device->dev,
 						&kblamps_led);
 		if (result == 0) {
 			fujitsu_hotkey->kblamps_registered = 1;
@@ -966,7 +966,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
 	 * that an RF LED is present.
 	 */
 	if (call_fext_func(FUNC_BUTTONS, 0x0, 0x0, 0x0) & BIT(24)) {
-		result = led_classdev_register(&fujitsu->pf_device->dev,
+		result = led_classdev_register(&fujitsu_bl->pf_device->dev,
 						&radio_led);
 		if (result == 0) {
 			fujitsu_hotkey->radio_led_registered = 1;
@@ -983,7 +983,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
 	*/
 	if ((call_fext_func(FUNC_LEDS, 0x0, 0x0, 0x0) & BIT(14)) &&
 	   (call_fext_func(FUNC_LEDS, 0x2, ECO_LED, 0x0) != UNSUPPORTED_CMD)) {
-		result = led_classdev_register(&fujitsu->pf_device->dev,
+		result = led_classdev_register(&fujitsu_bl->pf_device->dev,
 						&eco_led);
 		if (result == 0) {
 			fujitsu_hotkey->eco_led_registered = 1;
@@ -1103,19 +1103,19 @@ static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event)
 			&& (i++) < MAX_HOTKEY_RINGBUFFER_SIZE) {
 		switch (irb & 0x4ff) {
 		case KEY1_CODE:
-			keycode = fujitsu->keycode1;
+			keycode = fujitsu_bl->keycode1;
 			break;
 		case KEY2_CODE:
-			keycode = fujitsu->keycode2;
+			keycode = fujitsu_bl->keycode2;
 			break;
 		case KEY3_CODE:
-			keycode = fujitsu->keycode3;
+			keycode = fujitsu_bl->keycode3;
 			break;
 		case KEY4_CODE:
-			keycode = fujitsu->keycode4;
+			keycode = fujitsu_bl->keycode4;
 			break;
 		case KEY5_CODE:
-			keycode = fujitsu->keycode5;
+			keycode = fujitsu_bl->keycode5;
 			break;
 		case 0:
 			keycode = 0;
@@ -1150,19 +1150,19 @@ static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event)
 
 /* Initialization */
 
-static const struct acpi_device_id fujitsu_device_ids[] = {
-	{ACPI_FUJITSU_HID, 0},
+static const struct acpi_device_id fujitsu_bl_device_ids[] = {
+	{ACPI_FUJITSU_BL_HID, 0},
 	{"", 0},
 };
 
-static struct acpi_driver acpi_fujitsu_driver = {
-	.name = ACPI_FUJITSU_DRIVER_NAME,
+static struct acpi_driver acpi_fujitsu_bl_driver = {
+	.name = ACPI_FUJITSU_BL_DRIVER_NAME,
 	.class = ACPI_FUJITSU_CLASS,
-	.ids = fujitsu_device_ids,
+	.ids = fujitsu_bl_device_ids,
 	.ops = {
-		.add = acpi_fujitsu_add,
-		.remove = acpi_fujitsu_remove,
-		.notify = acpi_fujitsu_notify,
+		.add = acpi_fujitsu_bl_add,
+		.remove = acpi_fujitsu_bl_remove,
+		.notify = acpi_fujitsu_bl_notify,
 		},
 };
 
@@ -1183,7 +1183,7 @@ static struct acpi_driver acpi_fujitsu_hotkey_driver = {
 };
 
 static const struct acpi_device_id fujitsu_ids[] __used = {
-	{ACPI_FUJITSU_HID, 0},
+	{ACPI_FUJITSU_BL_HID, 0},
 	{ACPI_FUJITSU_HOTKEY_HID, 0},
 	{"", 0}
 };
@@ -1196,17 +1196,17 @@ static int __init fujitsu_init(void)
 	if (acpi_disabled)
 		return -ENODEV;
 
-	fujitsu = kzalloc(sizeof(struct fujitsu_t), GFP_KERNEL);
-	if (!fujitsu)
+	fujitsu_bl = kzalloc(sizeof(struct fujitsu_bl), GFP_KERNEL);
+	if (!fujitsu_bl)
 		return -ENOMEM;
-	fujitsu->keycode1 = KEY_PROG1;
-	fujitsu->keycode2 = KEY_PROG2;
-	fujitsu->keycode3 = KEY_PROG3;
-	fujitsu->keycode4 = KEY_PROG4;
-	fujitsu->keycode5 = KEY_RFKILL;
+	fujitsu_bl->keycode1 = KEY_PROG1;
+	fujitsu_bl->keycode2 = KEY_PROG2;
+	fujitsu_bl->keycode3 = KEY_PROG3;
+	fujitsu_bl->keycode4 = KEY_PROG4;
+	fujitsu_bl->keycode5 = KEY_RFKILL;
 	dmi_check_system(fujitsu_dmi_table);
 
-	result = acpi_bus_register_driver(&acpi_fujitsu_driver);
+	result = acpi_bus_register_driver(&acpi_fujitsu_bl_driver);
 	if (result < 0) {
 		ret = -ENODEV;
 		goto fail_acpi;
@@ -1214,18 +1214,18 @@ static int __init fujitsu_init(void)
 
 	/* Register platform stuff */
 
-	fujitsu->pf_device = platform_device_alloc("fujitsu-laptop", -1);
-	if (!fujitsu->pf_device) {
+	fujitsu_bl->pf_device = platform_device_alloc("fujitsu-laptop", -1);
+	if (!fujitsu_bl->pf_device) {
 		ret = -ENOMEM;
 		goto fail_platform_driver;
 	}
 
-	ret = platform_device_add(fujitsu->pf_device);
+	ret = platform_device_add(fujitsu_bl->pf_device);
 	if (ret)
 		goto fail_platform_device1;
 
 	ret =
-	    sysfs_create_group(&fujitsu->pf_device->dev.kobj,
+	    sysfs_create_group(&fujitsu_bl->pf_device->dev.kobj,
 			       &fujitsupf_attribute_group);
 	if (ret)
 		goto fail_platform_device2;
@@ -1236,19 +1236,19 @@ static int __init fujitsu_init(void)
 		struct backlight_properties props;
 
 		memset(&props, 0, sizeof(struct backlight_properties));
-		max_brightness = fujitsu->max_brightness;
+		max_brightness = fujitsu_bl->max_brightness;
 		props.type = BACKLIGHT_PLATFORM;
 		props.max_brightness = max_brightness - 1;
-		fujitsu->bl_device = backlight_device_register("fujitsu-laptop",
-							       NULL, NULL,
-							       &fujitsubl_ops,
-							       &props);
-		if (IS_ERR(fujitsu->bl_device)) {
-			ret = PTR_ERR(fujitsu->bl_device);
-			fujitsu->bl_device = NULL;
+		fujitsu_bl->bl_device = backlight_device_register("fujitsu-laptop",
+								  NULL, NULL,
+								  &fujitsu_bl_ops,
+								  &props);
+		if (IS_ERR(fujitsu_bl->bl_device)) {
+			ret = PTR_ERR(fujitsu_bl->bl_device);
+			fujitsu_bl->bl_device = NULL;
 			goto fail_sysfs_group;
 		}
-		fujitsu->bl_device->props.brightness = fujitsu->brightness_level;
+		fujitsu_bl->bl_device->props.brightness = fujitsu_bl->brightness_level;
 	}
 
 	ret = platform_driver_register(&fujitsupf_driver);
@@ -1272,9 +1272,9 @@ static int __init fujitsu_init(void)
 	/* Sync backlight power status (needs FUJ02E3 device, hence deferred) */
 	if (acpi_video_get_backlight_type() == acpi_backlight_vendor) {
 		if (call_fext_func(FUNC_BACKLIGHT, 0x2, 0x4, 0x0) == 3)
-			fujitsu->bl_device->props.power = FB_BLANK_POWERDOWN;
+			fujitsu_bl->bl_device->props.power = FB_BLANK_POWERDOWN;
 		else
-			fujitsu->bl_device->props.power = FB_BLANK_UNBLANK;
+			fujitsu_bl->bl_device->props.power = FB_BLANK_UNBLANK;
 	}
 
 	pr_info("driver " FUJITSU_DRIVER_VERSION " successfully loaded\n");
@@ -1286,18 +1286,18 @@ static int __init fujitsu_init(void)
 fail_hotkey:
 	platform_driver_unregister(&fujitsupf_driver);
 fail_backlight:
-	backlight_device_unregister(fujitsu->bl_device);
+	backlight_device_unregister(fujitsu_bl->bl_device);
 fail_sysfs_group:
-	sysfs_remove_group(&fujitsu->pf_device->dev.kobj,
+	sysfs_remove_group(&fujitsu_bl->pf_device->dev.kobj,
 			   &fujitsupf_attribute_group);
 fail_platform_device2:
-	platform_device_del(fujitsu->pf_device);
+	platform_device_del(fujitsu_bl->pf_device);
 fail_platform_device1:
-	platform_device_put(fujitsu->pf_device);
+	platform_device_put(fujitsu_bl->pf_device);
 fail_platform_driver:
-	acpi_bus_unregister_driver(&acpi_fujitsu_driver);
+	acpi_bus_unregister_driver(&acpi_fujitsu_bl_driver);
 fail_acpi:
-	kfree(fujitsu);
+	kfree(fujitsu_bl);
 
 	return ret;
 }
@@ -1310,16 +1310,16 @@ static void __exit fujitsu_cleanup(void)
 
 	platform_driver_unregister(&fujitsupf_driver);
 
-	backlight_device_unregister(fujitsu->bl_device);
+	backlight_device_unregister(fujitsu_bl->bl_device);
 
-	sysfs_remove_group(&fujitsu->pf_device->dev.kobj,
+	sysfs_remove_group(&fujitsu_bl->pf_device->dev.kobj,
 			   &fujitsupf_attribute_group);
 
-	platform_device_unregister(fujitsu->pf_device);
+	platform_device_unregister(fujitsu_bl->pf_device);
 
-	acpi_bus_unregister_driver(&acpi_fujitsu_driver);
+	acpi_bus_unregister_driver(&acpi_fujitsu_bl_driver);
 
-	kfree(fujitsu);
+	kfree(fujitsu_bl);
 
 	pr_info("driver unloaded\n");
 }

From 6942eabc140eac54d5c0db2695eed63768209c34 Mon Sep 17 00:00:00 2001
From: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Date: Wed, 8 Feb 2017 14:46:25 +0100
Subject: [PATCH 0232/1911] platform/x86: fujitsu-laptop: replace "hotkey" with
 "laptop" in symbol names
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Functions, structures, variables and constants whose names currently
contain the "hotkey" keyword are not only responsible for handling
hotkeys, but also other laptop-related features (rfkill, lid, dock,
LEDs).  Fix their naming by using a consistent "_laptop"/"_LAPTOP"
suffix/infix.  Update comments so that they reflect this change.

Signed-off-by: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
[kempniu: rebase patch, rewrite commit message]
Signed-off-by: Michał Kępień <kernel@kempniu.pl>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Jonathan Woithe <jwoithe@just42.net>
---
 drivers/platform/x86/fujitsu-laptop.c | 158 +++++++++++++-------------
 1 file changed, 79 insertions(+), 79 deletions(-)

diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c
index e1737b9d9d95db..70124004b346cd 100644
--- a/drivers/platform/x86/fujitsu-laptop.c
+++ b/drivers/platform/x86/fujitsu-laptop.c
@@ -82,9 +82,9 @@
 #define ACPI_FUJITSU_BL_HID		"FUJ02B1"
 #define ACPI_FUJITSU_BL_DRIVER_NAME	"Fujitsu laptop FUJ02B1 ACPI brightness driver"
 #define ACPI_FUJITSU_BL_DEVICE_NAME	"Fujitsu FUJ02B1"
-#define ACPI_FUJITSU_HOTKEY_HID 	"FUJ02E3"
-#define ACPI_FUJITSU_HOTKEY_DRIVER_NAME "Fujitsu laptop FUJ02E3 ACPI hotkeys driver"
-#define ACPI_FUJITSU_HOTKEY_DEVICE_NAME "Fujitsu FUJ02E3"
+#define ACPI_FUJITSU_LAPTOP_HID		"FUJ02E3"
+#define ACPI_FUJITSU_LAPTOP_DRIVER_NAME	"Fujitsu laptop FUJ02E3 ACPI hotkeys driver"
+#define ACPI_FUJITSU_LAPTOP_DEVICE_NAME	"Fujitsu FUJ02E3"
 
 #define ACPI_FUJITSU_NOTIFY_CODE1     0x80
 
@@ -154,8 +154,8 @@ static struct fujitsu_bl *fujitsu_bl;
 static int use_alt_lcd_levels = -1;
 static int disable_brightness_adjust = -1;
 
-/* Device used to access other hotkeys on the laptop */
-struct fujitsu_hotkey_t {
+/* Device used to access hotkeys and other features on the laptop */
+struct fujitsu_laptop {
 	acpi_handle acpi_handle;
 	struct acpi_device *dev;
 	struct input_dev *input;
@@ -171,9 +171,9 @@ struct fujitsu_hotkey_t {
 	int eco_led_registered;
 };
 
-static struct fujitsu_hotkey_t *fujitsu_hotkey;
+static struct fujitsu_laptop *fujitsu_laptop;
 
-static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event);
+static void acpi_fujitsu_laptop_notify(struct acpi_device *device, u32 event);
 
 #if IS_ENABLED(CONFIG_LEDS_CLASS)
 static enum led_brightness logolamp_get(struct led_classdev *cdev);
@@ -239,7 +239,7 @@ static int call_fext_func(int cmd, int arg0, int arg1, int arg2)
 	unsigned long long value;
 	acpi_handle handle = NULL;
 
-	status = acpi_get_handle(fujitsu_hotkey->acpi_handle, "FUNC", &handle);
+	status = acpi_get_handle(fujitsu_laptop->acpi_handle, "FUNC", &handle);
 	if (ACPI_FAILURE(status)) {
 		vdbg_printk(FUJLAPTOP_DBG_ERROR,
 				"FUNC interface is not present\n");
@@ -567,9 +567,9 @@ static ssize_t
 show_lid_state(struct device *dev,
 			struct device_attribute *attr, char *buf)
 {
-	if (!(fujitsu_hotkey->rfkill_supported & 0x100))
+	if (!(fujitsu_laptop->rfkill_supported & 0x100))
 		return sprintf(buf, "unknown\n");
-	if (fujitsu_hotkey->rfkill_state & 0x100)
+	if (fujitsu_laptop->rfkill_state & 0x100)
 		return sprintf(buf, "open\n");
 	else
 		return sprintf(buf, "closed\n");
@@ -579,9 +579,9 @@ static ssize_t
 show_dock_state(struct device *dev,
 			struct device_attribute *attr, char *buf)
 {
-	if (!(fujitsu_hotkey->rfkill_supported & 0x200))
+	if (!(fujitsu_laptop->rfkill_supported & 0x200))
 		return sprintf(buf, "unknown\n");
-	if (fujitsu_hotkey->rfkill_state & 0x200)
+	if (fujitsu_laptop->rfkill_state & 0x200)
 		return sprintf(buf, "docked\n");
 	else
 		return sprintf(buf, "undocked\n");
@@ -591,9 +591,9 @@ static ssize_t
 show_radios_state(struct device *dev,
 			struct device_attribute *attr, char *buf)
 {
-	if (!(fujitsu_hotkey->rfkill_supported & 0x20))
+	if (!(fujitsu_laptop->rfkill_supported & 0x20))
 		return sprintf(buf, "unknown\n");
-	if (fujitsu_hotkey->rfkill_state & 0x20)
+	if (fujitsu_laptop->rfkill_state & 0x20)
 		return sprintf(buf, "on\n");
 	else
 		return sprintf(buf, "killed\n");
@@ -840,7 +840,7 @@ static void acpi_fujitsu_bl_notify(struct acpi_device *device, u32 event)
 
 /* ACPI device for hotkey handling */
 
-static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
+static int acpi_fujitsu_laptop_add(struct acpi_device *device)
 {
 	int result = 0;
 	int state = 0;
@@ -851,32 +851,32 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
 	if (!device)
 		return -EINVAL;
 
-	fujitsu_hotkey->acpi_handle = device->handle;
+	fujitsu_laptop->acpi_handle = device->handle;
 	sprintf(acpi_device_name(device), "%s",
-		ACPI_FUJITSU_HOTKEY_DEVICE_NAME);
+		ACPI_FUJITSU_LAPTOP_DEVICE_NAME);
 	sprintf(acpi_device_class(device), "%s", ACPI_FUJITSU_CLASS);
-	device->driver_data = fujitsu_hotkey;
+	device->driver_data = fujitsu_laptop;
 
 	/* kfifo */
-	spin_lock_init(&fujitsu_hotkey->fifo_lock);
-	error = kfifo_alloc(&fujitsu_hotkey->fifo, RINGBUFFERSIZE * sizeof(int),
+	spin_lock_init(&fujitsu_laptop->fifo_lock);
+	error = kfifo_alloc(&fujitsu_laptop->fifo, RINGBUFFERSIZE * sizeof(int),
 			GFP_KERNEL);
 	if (error) {
 		pr_err("kfifo_alloc failed\n");
 		goto err_stop;
 	}
 
-	fujitsu_hotkey->input = input = input_allocate_device();
+	fujitsu_laptop->input = input = input_allocate_device();
 	if (!input) {
 		error = -ENOMEM;
 		goto err_free_fifo;
 	}
 
-	snprintf(fujitsu_hotkey->phys, sizeof(fujitsu_hotkey->phys),
+	snprintf(fujitsu_laptop->phys, sizeof(fujitsu_laptop->phys),
 		 "%s/video/input0", acpi_device_hid(device));
 
 	input->name = acpi_device_name(device);
-	input->phys = fujitsu_hotkey->phys;
+	input->phys = fujitsu_laptop->phys;
 	input->id.bustype = BUS_HOST;
 	input->id.product = 0x06;
 	input->dev.parent = &device->dev;
@@ -894,7 +894,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
 	if (error)
 		goto err_free_input_dev;
 
-	error = acpi_bus_update_power(fujitsu_hotkey->acpi_handle, &state);
+	error = acpi_bus_update_power(fujitsu_laptop->acpi_handle, &state);
 	if (error) {
 		pr_err("Error reading power state\n");
 		goto err_unregister_input_dev;
@@ -904,7 +904,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
 		acpi_device_name(device), acpi_device_bid(device),
 		!device->power.state ? "on" : "off");
 
-	fujitsu_hotkey->dev = device;
+	fujitsu_laptop->dev = device;
 
 	if (acpi_has_method(device->handle, METHOD_NAME__INI)) {
 		vdbg_printk(FUJLAPTOP_DBG_INFO, "Invoking _INI\n");
@@ -920,16 +920,16 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
 		; /* No action, result is discarded */
 	vdbg_printk(FUJLAPTOP_DBG_INFO, "Discarded %i ringbuffer entries\n", i);
 
-	fujitsu_hotkey->rfkill_supported =
+	fujitsu_laptop->rfkill_supported =
 		call_fext_func(FUNC_RFKILL, 0x0, 0x0, 0x0);
 
 	/* Make sure our bitmask of supported functions is cleared if the
 	   RFKILL function block is not implemented, like on the S7020. */
-	if (fujitsu_hotkey->rfkill_supported == UNSUPPORTED_CMD)
-		fujitsu_hotkey->rfkill_supported = 0;
+	if (fujitsu_laptop->rfkill_supported == UNSUPPORTED_CMD)
+		fujitsu_laptop->rfkill_supported = 0;
 
-	if (fujitsu_hotkey->rfkill_supported)
-		fujitsu_hotkey->rfkill_state =
+	if (fujitsu_laptop->rfkill_supported)
+		fujitsu_laptop->rfkill_state =
 			call_fext_func(FUNC_RFKILL, 0x4, 0x0, 0x0);
 
 	/* Suspect this is a keymap of the application panel, print it */
@@ -940,7 +940,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
 		result = led_classdev_register(&fujitsu_bl->pf_device->dev,
 						&logolamp_led);
 		if (result == 0) {
-			fujitsu_hotkey->logolamp_registered = 1;
+			fujitsu_laptop->logolamp_registered = 1;
 		} else {
 			pr_err("Could not register LED handler for logo lamp, error %i\n",
 			       result);
@@ -952,7 +952,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
 		result = led_classdev_register(&fujitsu_bl->pf_device->dev,
 						&kblamps_led);
 		if (result == 0) {
-			fujitsu_hotkey->kblamps_registered = 1;
+			fujitsu_laptop->kblamps_registered = 1;
 		} else {
 			pr_err("Could not register LED handler for keyboard lamps, error %i\n",
 			       result);
@@ -969,7 +969,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
 		result = led_classdev_register(&fujitsu_bl->pf_device->dev,
 						&radio_led);
 		if (result == 0) {
-			fujitsu_hotkey->radio_led_registered = 1;
+			fujitsu_laptop->radio_led_registered = 1;
 		} else {
 			pr_err("Could not register LED handler for radio LED, error %i\n",
 			       result);
@@ -986,7 +986,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
 		result = led_classdev_register(&fujitsu_bl->pf_device->dev,
 						&eco_led);
 		if (result == 0) {
-			fujitsu_hotkey->eco_led_registered = 1;
+			fujitsu_laptop->eco_led_registered = 1;
 		} else {
 			pr_err("Could not register LED handler for eco LED, error %i\n",
 			       result);
@@ -1002,47 +1002,47 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
 err_free_input_dev:
 	input_free_device(input);
 err_free_fifo:
-	kfifo_free(&fujitsu_hotkey->fifo);
+	kfifo_free(&fujitsu_laptop->fifo);
 err_stop:
 	return error;
 }
 
-static int acpi_fujitsu_hotkey_remove(struct acpi_device *device)
+static int acpi_fujitsu_laptop_remove(struct acpi_device *device)
 {
-	struct fujitsu_hotkey_t *fujitsu_hotkey = acpi_driver_data(device);
-	struct input_dev *input = fujitsu_hotkey->input;
+	struct fujitsu_laptop *fujitsu_laptop = acpi_driver_data(device);
+	struct input_dev *input = fujitsu_laptop->input;
 
 #if IS_ENABLED(CONFIG_LEDS_CLASS)
-	if (fujitsu_hotkey->logolamp_registered)
+	if (fujitsu_laptop->logolamp_registered)
 		led_classdev_unregister(&logolamp_led);
 
-	if (fujitsu_hotkey->kblamps_registered)
+	if (fujitsu_laptop->kblamps_registered)
 		led_classdev_unregister(&kblamps_led);
 
-	if (fujitsu_hotkey->radio_led_registered)
+	if (fujitsu_laptop->radio_led_registered)
 		led_classdev_unregister(&radio_led);
 
-	if (fujitsu_hotkey->eco_led_registered)
+	if (fujitsu_laptop->eco_led_registered)
 		led_classdev_unregister(&eco_led);
 #endif
 
 	input_unregister_device(input);
 
-	kfifo_free(&fujitsu_hotkey->fifo);
+	kfifo_free(&fujitsu_laptop->fifo);
 
-	fujitsu_hotkey->acpi_handle = NULL;
+	fujitsu_laptop->acpi_handle = NULL;
 
 	return 0;
 }
 
-static void acpi_fujitsu_hotkey_press(int keycode)
+static void acpi_fujitsu_laptop_press(int keycode)
 {
-	struct input_dev *input = fujitsu_hotkey->input;
+	struct input_dev *input = fujitsu_laptop->input;
 	int status;
 
-	status = kfifo_in_locked(&fujitsu_hotkey->fifo,
+	status = kfifo_in_locked(&fujitsu_laptop->fifo,
 				 (unsigned char *)&keycode, sizeof(keycode),
-				 &fujitsu_hotkey->fifo_lock);
+				 &fujitsu_laptop->fifo_lock);
 	if (status != sizeof(keycode)) {
 		vdbg_printk(FUJLAPTOP_DBG_WARN,
 			    "Could not push keycode [0x%x]\n", keycode);
@@ -1054,16 +1054,16 @@ static void acpi_fujitsu_hotkey_press(int keycode)
 		    "Push keycode into ringbuffer [%d]\n", keycode);
 }
 
-static void acpi_fujitsu_hotkey_release(void)
+static void acpi_fujitsu_laptop_release(void)
 {
-	struct input_dev *input = fujitsu_hotkey->input;
+	struct input_dev *input = fujitsu_laptop->input;
 	int keycode, status;
 
 	while (true) {
-		status = kfifo_out_locked(&fujitsu_hotkey->fifo,
+		status = kfifo_out_locked(&fujitsu_laptop->fifo,
 					  (unsigned char *)&keycode,
 					  sizeof(keycode),
-					  &fujitsu_hotkey->fifo_lock);
+					  &fujitsu_laptop->fifo_lock);
 		if (status != sizeof(keycode))
 			return;
 		input_report_key(input, keycode, 0);
@@ -1073,14 +1073,14 @@ static void acpi_fujitsu_hotkey_release(void)
 	}
 }
 
-static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event)
+static void acpi_fujitsu_laptop_notify(struct acpi_device *device, u32 event)
 {
 	struct input_dev *input;
 	int keycode;
 	unsigned int irb = 1;
 	int i;
 
-	input = fujitsu_hotkey->input;
+	input = fujitsu_laptop->input;
 
 	if (event != ACPI_FUJITSU_NOTIFY_CODE1) {
 		keycode = KEY_UNKNOWN;
@@ -1093,8 +1093,8 @@ static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event)
 		return;
 	}
 
-	if (fujitsu_hotkey->rfkill_supported)
-		fujitsu_hotkey->rfkill_state =
+	if (fujitsu_laptop->rfkill_supported)
+		fujitsu_laptop->rfkill_state =
 			call_fext_func(FUNC_RFKILL, 0x4, 0x0, 0x0);
 
 	i = 0;
@@ -1128,16 +1128,16 @@ static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event)
 		}
 
 		if (keycode > 0)
-			acpi_fujitsu_hotkey_press(keycode);
+			acpi_fujitsu_laptop_press(keycode);
 		else if (keycode == 0)
-			acpi_fujitsu_hotkey_release();
+			acpi_fujitsu_laptop_release();
 	}
 
 	/* On some models (first seen on the Skylake-based Lifebook
 	 * E736/E746/E756), the touchpad toggle hotkey (Fn+F4) is
 	 * handled in software; its state is queried using FUNC_RFKILL
 	 */
-	if ((fujitsu_hotkey->rfkill_supported & BIT(26)) &&
+	if ((fujitsu_laptop->rfkill_supported & BIT(26)) &&
 	    (call_fext_func(FUNC_RFKILL, 0x1, 0x0, 0x0) & BIT(26))) {
 		keycode = KEY_TOUCHPAD_TOGGLE;
 		input_report_key(input, keycode, 1);
@@ -1166,25 +1166,25 @@ static struct acpi_driver acpi_fujitsu_bl_driver = {
 		},
 };
 
-static const struct acpi_device_id fujitsu_hotkey_device_ids[] = {
-	{ACPI_FUJITSU_HOTKEY_HID, 0},
+static const struct acpi_device_id fujitsu_laptop_device_ids[] = {
+	{ACPI_FUJITSU_LAPTOP_HID, 0},
 	{"", 0},
 };
 
-static struct acpi_driver acpi_fujitsu_hotkey_driver = {
-	.name = ACPI_FUJITSU_HOTKEY_DRIVER_NAME,
+static struct acpi_driver acpi_fujitsu_laptop_driver = {
+	.name = ACPI_FUJITSU_LAPTOP_DRIVER_NAME,
 	.class = ACPI_FUJITSU_CLASS,
-	.ids = fujitsu_hotkey_device_ids,
+	.ids = fujitsu_laptop_device_ids,
 	.ops = {
-		.add = acpi_fujitsu_hotkey_add,
-		.remove = acpi_fujitsu_hotkey_remove,
-		.notify = acpi_fujitsu_hotkey_notify,
+		.add = acpi_fujitsu_laptop_add,
+		.remove = acpi_fujitsu_laptop_remove,
+		.notify = acpi_fujitsu_laptop_notify,
 		},
 };
 
 static const struct acpi_device_id fujitsu_ids[] __used = {
 	{ACPI_FUJITSU_BL_HID, 0},
-	{ACPI_FUJITSU_HOTKEY_HID, 0},
+	{ACPI_FUJITSU_LAPTOP_HID, 0},
 	{"", 0}
 };
 MODULE_DEVICE_TABLE(acpi, fujitsu_ids);
@@ -1255,18 +1255,18 @@ static int __init fujitsu_init(void)
 	if (ret)
 		goto fail_backlight;
 
-	/* Register hotkey driver */
+	/* Register laptop driver */
 
-	fujitsu_hotkey = kzalloc(sizeof(struct fujitsu_hotkey_t), GFP_KERNEL);
-	if (!fujitsu_hotkey) {
+	fujitsu_laptop = kzalloc(sizeof(struct fujitsu_laptop), GFP_KERNEL);
+	if (!fujitsu_laptop) {
 		ret = -ENOMEM;
-		goto fail_hotkey;
+		goto fail_laptop;
 	}
 
-	result = acpi_bus_register_driver(&acpi_fujitsu_hotkey_driver);
+	result = acpi_bus_register_driver(&acpi_fujitsu_laptop_driver);
 	if (result < 0) {
 		ret = -ENODEV;
-		goto fail_hotkey1;
+		goto fail_laptop1;
 	}
 
 	/* Sync backlight power status (needs FUJ02E3 device, hence deferred) */
@@ -1281,9 +1281,9 @@ static int __init fujitsu_init(void)
 
 	return 0;
 
-fail_hotkey1:
-	kfree(fujitsu_hotkey);
-fail_hotkey:
+fail_laptop1:
+	kfree(fujitsu_laptop);
+fail_laptop:
 	platform_driver_unregister(&fujitsupf_driver);
 fail_backlight:
 	backlight_device_unregister(fujitsu_bl->bl_device);
@@ -1304,9 +1304,9 @@ static int __init fujitsu_init(void)
 
 static void __exit fujitsu_cleanup(void)
 {
-	acpi_bus_unregister_driver(&acpi_fujitsu_hotkey_driver);
+	acpi_bus_unregister_driver(&acpi_fujitsu_laptop_driver);
 
-	kfree(fujitsu_hotkey);
+	kfree(fujitsu_laptop);
 
 	platform_driver_unregister(&fujitsupf_driver);
 

From 1650602691f4e8ea8f6e306452a72ac9a611932a Mon Sep 17 00:00:00 2001
From: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Date: Wed, 8 Feb 2017 14:46:26 +0100
Subject: [PATCH 0233/1911] platform/x86: fujitsu-laptop: make platform-related
 variables match naming convention
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace "fujitsupf" with "fujitsu_pf" in all platform-related variable
names to match the module-wide naming convention.

Signed-off-by: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
[kempniu: rebase patch, rewrite commit message]
Signed-off-by: Michał Kępień <kernel@kempniu.pl>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Jonathan Woithe <jwoithe@just42.net>
---
 drivers/platform/x86/fujitsu-laptop.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c
index 70124004b346cd..6cdd1b334e8a79 100644
--- a/drivers/platform/x86/fujitsu-laptop.c
+++ b/drivers/platform/x86/fujitsu-laptop.c
@@ -607,7 +607,7 @@ static DEVICE_ATTR(lid, 0444, show_lid_state, ignore_store);
 static DEVICE_ATTR(dock, 0444, show_dock_state, ignore_store);
 static DEVICE_ATTR(radios, 0444, show_radios_state, ignore_store);
 
-static struct attribute *fujitsupf_attributes[] = {
+static struct attribute *fujitsu_pf_attributes[] = {
 	&dev_attr_brightness_changed.attr,
 	&dev_attr_max_brightness.attr,
 	&dev_attr_lcd_level.attr,
@@ -617,11 +617,11 @@ static struct attribute *fujitsupf_attributes[] = {
 	NULL
 };
 
-static struct attribute_group fujitsupf_attribute_group = {
-	.attrs = fujitsupf_attributes
+static struct attribute_group fujitsu_pf_attribute_group = {
+	.attrs = fujitsu_pf_attributes
 };
 
-static struct platform_driver fujitsupf_driver = {
+static struct platform_driver fujitsu_pf_driver = {
 	.driver = {
 		   .name = "fujitsu-laptop",
 		   }
@@ -1226,7 +1226,7 @@ static int __init fujitsu_init(void)
 
 	ret =
 	    sysfs_create_group(&fujitsu_bl->pf_device->dev.kobj,
-			       &fujitsupf_attribute_group);
+			       &fujitsu_pf_attribute_group);
 	if (ret)
 		goto fail_platform_device2;
 
@@ -1251,7 +1251,7 @@ static int __init fujitsu_init(void)
 		fujitsu_bl->bl_device->props.brightness = fujitsu_bl->brightness_level;
 	}
 
-	ret = platform_driver_register(&fujitsupf_driver);
+	ret = platform_driver_register(&fujitsu_pf_driver);
 	if (ret)
 		goto fail_backlight;
 
@@ -1284,12 +1284,12 @@ static int __init fujitsu_init(void)
 fail_laptop1:
 	kfree(fujitsu_laptop);
 fail_laptop:
-	platform_driver_unregister(&fujitsupf_driver);
+	platform_driver_unregister(&fujitsu_pf_driver);
 fail_backlight:
 	backlight_device_unregister(fujitsu_bl->bl_device);
 fail_sysfs_group:
 	sysfs_remove_group(&fujitsu_bl->pf_device->dev.kobj,
-			   &fujitsupf_attribute_group);
+			   &fujitsu_pf_attribute_group);
 fail_platform_device2:
 	platform_device_del(fujitsu_bl->pf_device);
 fail_platform_device1:
@@ -1308,12 +1308,12 @@ static void __exit fujitsu_cleanup(void)
 
 	kfree(fujitsu_laptop);
 
-	platform_driver_unregister(&fujitsupf_driver);
+	platform_driver_unregister(&fujitsu_pf_driver);
 
 	backlight_device_unregister(fujitsu_bl->bl_device);
 
 	sysfs_remove_group(&fujitsu_bl->pf_device->dev.kobj,
-			   &fujitsupf_attribute_group);
+			   &fujitsu_pf_attribute_group);
 
 	platform_device_unregister(fujitsu_bl->pf_device);
 

From 8ef27bd3410c4e5856bac2315ef51224926020e0 Mon Sep 17 00:00:00 2001
From: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Date: Wed, 8 Feb 2017 14:46:27 +0100
Subject: [PATCH 0234/1911] platform/x86: fujitsu-laptop: rename FUNC_RFKILL to
 FUNC_FLAGS
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

FUNC subfunction 0x1000 is currently referred to as FUNC_RFKILL, which
is misleading, because it handles more than just radio devices (also
lid, dock, LEDs).  Rename the FUNC_RFKILL constant to FUNC_FLAGS.
Replace "rfkill" with "flags" in the names of its associated fields
inside struct fujitsu_laptop.

Signed-off-by: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
[kempniu: rebase patch, rewrite commit message]
Signed-off-by: Michał Kępień <kernel@kempniu.pl>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Jonathan Woithe <jwoithe@just42.net>
---
 drivers/platform/x86/fujitsu-laptop.c | 50 +++++++++++++--------------
 1 file changed, 25 insertions(+), 25 deletions(-)

diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c
index 6cdd1b334e8a79..55d696262301b7 100644
--- a/drivers/platform/x86/fujitsu-laptop.c
+++ b/drivers/platform/x86/fujitsu-laptop.c
@@ -89,7 +89,7 @@
 #define ACPI_FUJITSU_NOTIFY_CODE1     0x80
 
 /* FUNC interface - command values */
-#define FUNC_RFKILL	0x1000
+#define FUNC_FLAGS	0x1000
 #define FUNC_LEDS	0x1001
 #define FUNC_BUTTONS	0x1002
 #define FUNC_BACKLIGHT  0x1004
@@ -163,8 +163,8 @@ struct fujitsu_laptop {
 	struct platform_device *pf_device;
 	struct kfifo fifo;
 	spinlock_t fifo_lock;
-	int rfkill_supported;
-	int rfkill_state;
+	int flags_supported;
+	int flags_state;
 	int logolamp_registered;
 	int kblamps_registered;
 	int radio_led_registered;
@@ -300,9 +300,9 @@ static int radio_led_set(struct led_classdev *cdev,
 				enum led_brightness brightness)
 {
 	if (brightness >= LED_FULL)
-		return call_fext_func(FUNC_RFKILL, 0x5, RADIO_LED_ON, RADIO_LED_ON);
+		return call_fext_func(FUNC_FLAGS, 0x5, RADIO_LED_ON, RADIO_LED_ON);
 	else
-		return call_fext_func(FUNC_RFKILL, 0x5, RADIO_LED_ON, 0x0);
+		return call_fext_func(FUNC_FLAGS, 0x5, RADIO_LED_ON, 0x0);
 }
 
 static int eco_led_set(struct led_classdev *cdev,
@@ -346,7 +346,7 @@ static enum led_brightness radio_led_get(struct led_classdev *cdev)
 {
 	enum led_brightness brightness = LED_OFF;
 
-	if (call_fext_func(FUNC_RFKILL, 0x4, 0x0, 0x0) & RADIO_LED_ON)
+	if (call_fext_func(FUNC_FLAGS, 0x4, 0x0, 0x0) & RADIO_LED_ON)
 		brightness = LED_FULL;
 
 	return brightness;
@@ -567,9 +567,9 @@ static ssize_t
 show_lid_state(struct device *dev,
 			struct device_attribute *attr, char *buf)
 {
-	if (!(fujitsu_laptop->rfkill_supported & 0x100))
+	if (!(fujitsu_laptop->flags_supported & 0x100))
 		return sprintf(buf, "unknown\n");
-	if (fujitsu_laptop->rfkill_state & 0x100)
+	if (fujitsu_laptop->flags_state & 0x100)
 		return sprintf(buf, "open\n");
 	else
 		return sprintf(buf, "closed\n");
@@ -579,9 +579,9 @@ static ssize_t
 show_dock_state(struct device *dev,
 			struct device_attribute *attr, char *buf)
 {
-	if (!(fujitsu_laptop->rfkill_supported & 0x200))
+	if (!(fujitsu_laptop->flags_supported & 0x200))
 		return sprintf(buf, "unknown\n");
-	if (fujitsu_laptop->rfkill_state & 0x200)
+	if (fujitsu_laptop->flags_state & 0x200)
 		return sprintf(buf, "docked\n");
 	else
 		return sprintf(buf, "undocked\n");
@@ -591,9 +591,9 @@ static ssize_t
 show_radios_state(struct device *dev,
 			struct device_attribute *attr, char *buf)
 {
-	if (!(fujitsu_laptop->rfkill_supported & 0x20))
+	if (!(fujitsu_laptop->flags_supported & 0x20))
 		return sprintf(buf, "unknown\n");
-	if (fujitsu_laptop->rfkill_state & 0x20)
+	if (fujitsu_laptop->flags_state & 0x20)
 		return sprintf(buf, "on\n");
 	else
 		return sprintf(buf, "killed\n");
@@ -920,17 +920,17 @@ static int acpi_fujitsu_laptop_add(struct acpi_device *device)
 		; /* No action, result is discarded */
 	vdbg_printk(FUJLAPTOP_DBG_INFO, "Discarded %i ringbuffer entries\n", i);
 
-	fujitsu_laptop->rfkill_supported =
-		call_fext_func(FUNC_RFKILL, 0x0, 0x0, 0x0);
+	fujitsu_laptop->flags_supported =
+		call_fext_func(FUNC_FLAGS, 0x0, 0x0, 0x0);
 
 	/* Make sure our bitmask of supported functions is cleared if the
 	   RFKILL function block is not implemented, like on the S7020. */
-	if (fujitsu_laptop->rfkill_supported == UNSUPPORTED_CMD)
-		fujitsu_laptop->rfkill_supported = 0;
+	if (fujitsu_laptop->flags_supported == UNSUPPORTED_CMD)
+		fujitsu_laptop->flags_supported = 0;
 
-	if (fujitsu_laptop->rfkill_supported)
-		fujitsu_laptop->rfkill_state =
-			call_fext_func(FUNC_RFKILL, 0x4, 0x0, 0x0);
+	if (fujitsu_laptop->flags_supported)
+		fujitsu_laptop->flags_state =
+			call_fext_func(FUNC_FLAGS, 0x4, 0x0, 0x0);
 
 	/* Suspect this is a keymap of the application panel, print it */
 	pr_info("BTNI: [0x%x]\n", call_fext_func(FUNC_BUTTONS, 0x0, 0x0, 0x0));
@@ -1093,9 +1093,9 @@ static void acpi_fujitsu_laptop_notify(struct acpi_device *device, u32 event)
 		return;
 	}
 
-	if (fujitsu_laptop->rfkill_supported)
-		fujitsu_laptop->rfkill_state =
-			call_fext_func(FUNC_RFKILL, 0x4, 0x0, 0x0);
+	if (fujitsu_laptop->flags_supported)
+		fujitsu_laptop->flags_state =
+			call_fext_func(FUNC_FLAGS, 0x4, 0x0, 0x0);
 
 	i = 0;
 	while ((irb =
@@ -1135,10 +1135,10 @@ static void acpi_fujitsu_laptop_notify(struct acpi_device *device, u32 event)
 
 	/* On some models (first seen on the Skylake-based Lifebook
 	 * E736/E746/E756), the touchpad toggle hotkey (Fn+F4) is
-	 * handled in software; its state is queried using FUNC_RFKILL
+	 * handled in software; its state is queried using FUNC_FLAGS
 	 */
-	if ((fujitsu_laptop->rfkill_supported & BIT(26)) &&
-	    (call_fext_func(FUNC_RFKILL, 0x1, 0x0, 0x0) & BIT(26))) {
+	if ((fujitsu_laptop->flags_supported & BIT(26)) &&
+	    (call_fext_func(FUNC_FLAGS, 0x1, 0x0, 0x0) & BIT(26))) {
 		keycode = KEY_TOUCHPAD_TOGGLE;
 		input_report_key(input, keycode, 1);
 		input_sync(input);

From d3dd4480f8a911198d85b3cdb97f22db6539d2d1 Mon Sep 17 00:00:00 2001
From: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Date: Wed, 8 Feb 2017 14:46:28 +0100
Subject: [PATCH 0235/1911] platform/x86: fujitsu-laptop: replace numeric
 values with constants
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace three repeating numeric values with constants to improve code
clarity.

Signed-off-by: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
[kempniu: rebase patch, rewrite commit message]
Signed-off-by: Michał Kępień <kernel@kempniu.pl>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Jonathan Woithe <jwoithe@just42.net>
---
 drivers/platform/x86/fujitsu-laptop.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c
index 55d696262301b7..deef40a03b6d20 100644
--- a/drivers/platform/x86/fujitsu-laptop.c
+++ b/drivers/platform/x86/fujitsu-laptop.c
@@ -97,6 +97,11 @@
 /* FUNC interface - responses */
 #define UNSUPPORTED_CMD 0x80000000
 
+/* FUNC interface - status flags */
+#define FLAG_RFKILL	0x020
+#define FLAG_LID	0x100
+#define FLAG_DOCK	0x200
+
 #if IS_ENABLED(CONFIG_LEDS_CLASS)
 /* FUNC interface - LED control */
 #define FUNC_LED_OFF	0x1
@@ -567,9 +572,9 @@ static ssize_t
 show_lid_state(struct device *dev,
 			struct device_attribute *attr, char *buf)
 {
-	if (!(fujitsu_laptop->flags_supported & 0x100))
+	if (!(fujitsu_laptop->flags_supported & FLAG_LID))
 		return sprintf(buf, "unknown\n");
-	if (fujitsu_laptop->flags_state & 0x100)
+	if (fujitsu_laptop->flags_state & FLAG_LID)
 		return sprintf(buf, "open\n");
 	else
 		return sprintf(buf, "closed\n");
@@ -579,9 +584,9 @@ static ssize_t
 show_dock_state(struct device *dev,
 			struct device_attribute *attr, char *buf)
 {
-	if (!(fujitsu_laptop->flags_supported & 0x200))
+	if (!(fujitsu_laptop->flags_supported & FLAG_DOCK))
 		return sprintf(buf, "unknown\n");
-	if (fujitsu_laptop->flags_state & 0x200)
+	if (fujitsu_laptop->flags_state & FLAG_DOCK)
 		return sprintf(buf, "docked\n");
 	else
 		return sprintf(buf, "undocked\n");
@@ -591,9 +596,9 @@ static ssize_t
 show_radios_state(struct device *dev,
 			struct device_attribute *attr, char *buf)
 {
-	if (!(fujitsu_laptop->flags_supported & 0x20))
+	if (!(fujitsu_laptop->flags_supported & FLAG_RFKILL))
 		return sprintf(buf, "unknown\n");
-	if (fujitsu_laptop->flags_state & 0x20)
+	if (fujitsu_laptop->flags_state & FLAG_RFKILL)
 		return sprintf(buf, "on\n");
 	else
 		return sprintf(buf, "killed\n");

From 8c590e339f37022dff209ef16cf699531df1a0ca Mon Sep 17 00:00:00 2001
From: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Date: Wed, 8 Feb 2017 14:46:29 +0100
Subject: [PATCH 0236/1911] platform/x86: fujitsu-laptop: remove redundant
 forward declarations
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Both acpi_fujitsu_bl_notify() and acpi_fujitsu_laptop_notify() are
defined before they are first used, so remove their forward declarations
as they are redundant.

Signed-off-by: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
[kempniu: rebase patch, rewrite commit message]
Signed-off-by: Michał Kępień <kernel@kempniu.pl>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Jonathan Woithe <jwoithe@just42.net>
---
 drivers/platform/x86/fujitsu-laptop.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c
index deef40a03b6d20..ba0c0c21125102 100644
--- a/drivers/platform/x86/fujitsu-laptop.c
+++ b/drivers/platform/x86/fujitsu-laptop.c
@@ -178,8 +178,6 @@ struct fujitsu_laptop {
 
 static struct fujitsu_laptop *fujitsu_laptop;
 
-static void acpi_fujitsu_laptop_notify(struct acpi_device *device, u32 event);
-
 #if IS_ENABLED(CONFIG_LEDS_CLASS)
 static enum led_brightness logolamp_get(struct led_classdev *cdev);
 static int logolamp_set(struct led_classdev *cdev,
@@ -227,8 +225,6 @@ static struct led_classdev eco_led = {
 static u32 dbg_level = 0x03;
 #endif
 
-static void acpi_fujitsu_bl_notify(struct acpi_device *device, u32 event);
-
 /* Fujitsu ACPI interface function */
 
 static int call_fext_func(int cmd, int arg0, int arg1, int arg2)

From c1d1e8a051761fb808308dab32b9351e1d1fbb9b Mon Sep 17 00:00:00 2001
From: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Date: Wed, 8 Feb 2017 14:46:30 +0100
Subject: [PATCH 0237/1911] platform/x86: fujitsu-laptop: simplify
 acpi_bus_register_driver() error handling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A separate variable is not needed to handle error codes returned by
acpi_bus_register_driver().  If the latter fails, just use the value it
returned as the value returned by fujitsu_init().

Signed-off-by: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
[kempniu: rebase patch, rewrite commit message]
Signed-off-by: Michał Kępień <kernel@kempniu.pl>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Jonathan Woithe <jwoithe@just42.net>
---
 drivers/platform/x86/fujitsu-laptop.c | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c
index ba0c0c21125102..a5478a011b9068 100644
--- a/drivers/platform/x86/fujitsu-laptop.c
+++ b/drivers/platform/x86/fujitsu-laptop.c
@@ -1192,7 +1192,7 @@ MODULE_DEVICE_TABLE(acpi, fujitsu_ids);
 
 static int __init fujitsu_init(void)
 {
-	int ret, result, max_brightness;
+	int ret, max_brightness;
 
 	if (acpi_disabled)
 		return -ENODEV;
@@ -1207,11 +1207,9 @@ static int __init fujitsu_init(void)
 	fujitsu_bl->keycode5 = KEY_RFKILL;
 	dmi_check_system(fujitsu_dmi_table);
 
-	result = acpi_bus_register_driver(&acpi_fujitsu_bl_driver);
-	if (result < 0) {
-		ret = -ENODEV;
+	ret = acpi_bus_register_driver(&acpi_fujitsu_bl_driver);
+	if (ret)
 		goto fail_acpi;
-	}
 
 	/* Register platform stuff */
 
@@ -1264,11 +1262,9 @@ static int __init fujitsu_init(void)
 		goto fail_laptop;
 	}
 
-	result = acpi_bus_register_driver(&acpi_fujitsu_laptop_driver);
-	if (result < 0) {
-		ret = -ENODEV;
+	ret = acpi_bus_register_driver(&acpi_fujitsu_laptop_driver);
+	if (ret)
 		goto fail_laptop1;
-	}
 
 	/* Sync backlight power status (needs FUJ02E3 device, hence deferred) */
 	if (acpi_video_get_backlight_type() == acpi_backlight_vendor) {

From 5296a73613814a15e54ebddc2843ec0f84951d60 Mon Sep 17 00:00:00 2001
From: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Date: Wed, 8 Feb 2017 14:46:32 +0100
Subject: [PATCH 0238/1911] platform/x86: fujitsu-laptop: autodetect LCD
 interface on all models
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Presence of ACPI method SBL2 should be checked on all models rather than
just the ones with predefined hotkey keycode overrides.  Move most of
dmi_check_cb_common() to acpi_fujitsu_bl_add().  Adjust indentation to
make checkpatch happy.

Signed-off-by: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
[kempniu: rebase patch, rewrite commit message]
Signed-off-by: Michał Kępień <kernel@kempniu.pl>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Jonathan Woithe <jwoithe@just42.net>
---
 drivers/platform/x86/fujitsu-laptop.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c
index a5478a011b9068..56804c4db33ff4 100644
--- a/drivers/platform/x86/fujitsu-laptop.c
+++ b/drivers/platform/x86/fujitsu-laptop.c
@@ -631,15 +631,6 @@ static struct platform_driver fujitsu_pf_driver = {
 static void __init dmi_check_cb_common(const struct dmi_system_id *id)
 {
 	pr_info("Identified laptop model '%s'\n", id->ident);
-	if (use_alt_lcd_levels == -1) {
-		if (acpi_has_method(NULL,
-				"\\_SB.PCI0.LPCB.FJEX.SBL2"))
-			use_alt_lcd_levels = 1;
-		else
-			use_alt_lcd_levels = 0;
-		vdbg_printk(FUJLAPTOP_DBG_TRACE, "auto-detected usealt as "
-			"%i\n", use_alt_lcd_levels);
-	}
 }
 
 static int __init dmi_check_cb_s6410(const struct dmi_system_id *id)
@@ -751,6 +742,15 @@ static int acpi_fujitsu_bl_add(struct acpi_device *device)
 			pr_err("_INI Method failed\n");
 	}
 
+	if (use_alt_lcd_levels == -1) {
+		if (acpi_has_method(NULL, "\\_SB.PCI0.LPCB.FJEX.SBL2"))
+			use_alt_lcd_levels = 1;
+		else
+			use_alt_lcd_levels = 0;
+		vdbg_printk(FUJLAPTOP_DBG_TRACE, "auto-detected usealt as %i\n",
+			    use_alt_lcd_levels);
+	}
+
 	/* do config (detect defaults) */
 	use_alt_lcd_levels = use_alt_lcd_levels == 1 ? 1 : 0;
 	disable_brightness_adjust = disable_brightness_adjust == 1 ? 1 : 0;

From 5802d0bc3fe9f598f0ff3f5fd7fd8c5c935a1b5d Mon Sep 17 00:00:00 2001
From: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Date: Wed, 8 Feb 2017 14:46:33 +0100
Subject: [PATCH 0239/1911] platform/x86: fujitsu-laptop: remove redundant
 MODULE_ALIAS entries
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

MODULE_DEVICE_TABLE is all that is needed for fujitsu-laptop to be
properly autoloaded based on presence of its associated ACPI devices, so
remove redundant MODULE_ALIAS entries.

Signed-off-by: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
[kempniu: rebase patch, rewrite commit message]
Signed-off-by: Michał Kępień <kernel@kempniu.pl>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Jonathan Woithe <jwoithe@just42.net>
---
 drivers/platform/x86/fujitsu-laptop.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c
index 56804c4db33ff4..e12cc3504d4879 100644
--- a/drivers/platform/x86/fujitsu-laptop.c
+++ b/drivers/platform/x86/fujitsu-laptop.c
@@ -1338,7 +1338,3 @@ MODULE_AUTHOR("Jonathan Woithe, Peter Gruber, Tony Vroon");
 MODULE_DESCRIPTION("Fujitsu laptop extras support");
 MODULE_VERSION(FUJITSU_DRIVER_VERSION);
 MODULE_LICENSE("GPL");
-
-MODULE_ALIAS("dmi:*:svnFUJITSUSIEMENS:*:pvr:rvnFUJITSU:rnFJNB1D3:*:cvrS6410:*");
-MODULE_ALIAS("dmi:*:svnFUJITSUSIEMENS:*:pvr:rvnFUJITSU:rnFJNB1E6:*:cvrS6420:*");
-MODULE_ALIAS("dmi:*:svnFUJITSU:*:pvr:rvnFUJITSU:rnFJNB19C:*:cvrS7020:*");

From 71050ae7bf83e4d71a859257d11adc5de517073e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Paulo=20Rechi=20Vita?= <jprvita@gmail.com>
Date: Mon, 20 Feb 2017 14:50:22 -0500
Subject: [PATCH 0240/1911] platform/x86: asus-wmi: Detect quirk_no_rfkill from
 the DSDT
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some Asus laptops that have an airplane-mode indicator LED, also have
the WMI WLAN user bit set, and the following bits in their DSDT:

    Scope (_SB)
    {
      (...)
      Device (ATKD)
      {
        (...)
        Method (WMNB, 3, Serialized)
        {
          (...)
          If (LEqual (IIA0, 0x00010002))
          {
            OWGD (IIA1)
            Return (One)
          }
        }
      }
    }

So when asus-wmi uses ASUS_WMI_DEVID_WLAN_LED (0x00010002) to store the
wlan state, it drives the airplane-mode indicator LED (through the call
to OWGD) in an inverted fashion: the LED is ON when airplane mode is OFF
(since wlan is ON), and vice-versa.

This commit skips registering RFKill switches at all for these laptops,
to allow the asus-wireless driver to drive the airplane mode LED
correctly through the ASHS ACPI device. Relying on the presence of ASHS
and ASUS_WMI_DSTS_USER_BIT avoids adding DMI-based quirks for at least
21 different laptops.

Signed-off-by: João Paulo Rechi Vita <jprvita@endlessm.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 drivers/platform/x86/asus-wmi.c | 23 +++++++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c
index 43cb680adbb420..8499d3ae4257b6 100644
--- a/drivers/platform/x86/asus-wmi.c
+++ b/drivers/platform/x86/asus-wmi.c
@@ -159,6 +159,8 @@ MODULE_LICENSE("GPL");
 #define USB_INTEL_XUSB2PR		0xD0
 #define PCI_DEVICE_ID_INTEL_LYNXPOINT_LP_XHCI	0x9c31
 
+static const char * const ashs_ids[] = { "ATK4001", "ATK4002", NULL };
+
 struct bios_args {
 	u32 arg0;
 	u32 arg1;
@@ -2051,6 +2053,16 @@ static int asus_wmi_fan_init(struct asus_wmi *asus)
 	return 0;
 }
 
+static bool ashs_present(void)
+{
+	int i = 0;
+	while (ashs_ids[i]) {
+		if (acpi_dev_found(ashs_ids[i++]))
+			return true;
+	}
+	return false;
+}
+
 /*
  * WMI Driver
  */
@@ -2095,6 +2107,13 @@ static int asus_wmi_add(struct platform_device *pdev)
 	if (err)
 		goto fail_leds;
 
+	asus_wmi_get_devstate(asus, ASUS_WMI_DEVID_WLAN, &result);
+	if (result & (ASUS_WMI_DSTS_PRESENCE_BIT | ASUS_WMI_DSTS_USER_BIT))
+		asus->driver->wlan_ctrl_by_user = 1;
+
+	if (asus->driver->wlan_ctrl_by_user && ashs_present())
+		asus->driver->quirks->no_rfkill = 1;
+
 	if (!asus->driver->quirks->no_rfkill) {
 		err = asus_wmi_rfkill_init(asus);
 		if (err)
@@ -2134,10 +2153,6 @@ static int asus_wmi_add(struct platform_device *pdev)
 	if (err)
 		goto fail_debugfs;
 
-	asus_wmi_get_devstate(asus, ASUS_WMI_DEVID_WLAN, &result);
-	if (result & (ASUS_WMI_DSTS_PRESENCE_BIT | ASUS_WMI_DSTS_USER_BIT))
-		asus->driver->wlan_ctrl_by_user = 1;
-
 	return 0;
 
 fail_debugfs:

From dc7ffefdcc28a45214aa707fdc3df6a5e611ba09 Mon Sep 17 00:00:00 2001
From: Oleg Drokin <green@linuxhacker.ru>
Date: Sun, 19 Feb 2017 16:35:59 -0500
Subject: [PATCH 0241/1911] staging/lustre/lnet: Fix allocation size for
 sv_cpt_data

This is unbreaking another of those "stealth" janitor
patches that got in and subtly broke some things.

sv_cpt_data is a pointer to pointer, so need to
dereference it twice to allocate the correct structure size.

Fixes: 9899cb68c6c2 ("Staging: lustre: rpc: Use sizeof type *pointer instead of sizeof type.")
CC: Sandhya Bankar <bankarsandhya512@gmail.com>
Cc: stable <stable@vger.kernel.org> # 4.7+
Signed-off-by: Oleg Drokin <green@linuxhacker.ru>
Reviewed-by: James Simmons <jsimmons@infradead.org>
Reviewed-by: Doug Oucharek <doug.s.oucharek@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/lustre/lnet/selftest/rpc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/lustre/lnet/selftest/rpc.c b/drivers/staging/lustre/lnet/selftest/rpc.c
index 92cd4113cf75e9..87fe366f8f7031 100644
--- a/drivers/staging/lustre/lnet/selftest/rpc.c
+++ b/drivers/staging/lustre/lnet/selftest/rpc.c
@@ -255,7 +255,7 @@ srpc_service_init(struct srpc_service *svc)
 	svc->sv_shuttingdown = 0;
 
 	svc->sv_cpt_data = cfs_percpt_alloc(lnet_cpt_table(),
-					    sizeof(*svc->sv_cpt_data));
+					    sizeof(**svc->sv_cpt_data));
 	if (!svc->sv_cpt_data)
 		return -ENOMEM;
 

From f4082c6f28a8e77dcb694c6f23de9e533251051d Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Sat, 18 Feb 2017 02:20:36 +0300
Subject: [PATCH 0242/1911] staging: bcm2835/mmal-vchiq: unlock on error in
 buffer_from_host()

We should unlock before returning on this error path.

Fixes: 7b3ad5abf027 ("staging: Import the BCM2835 MMAL-based V4L2 camera driver.")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/media/platform/bcm2835/mmal-vchiq.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/staging/media/platform/bcm2835/mmal-vchiq.c b/drivers/staging/media/platform/bcm2835/mmal-vchiq.c
index f0639ee6c8b998..fdfb6a620a4314 100644
--- a/drivers/staging/media/platform/bcm2835/mmal-vchiq.c
+++ b/drivers/staging/media/platform/bcm2835/mmal-vchiq.c
@@ -397,8 +397,10 @@ buffer_from_host(struct vchiq_mmal_instance *instance,
 
 	/* get context */
 	msg_context = get_msg_context(instance);
-	if (msg_context == NULL)
-		return -ENOMEM;
+	if (!msg_context) {
+		ret = -ENOMEM;
+		goto unlock;
+	}
 
 	/* store bulk message context for when data arrives */
 	msg_context->u.bulk.instance = instance;
@@ -454,6 +456,7 @@ buffer_from_host(struct vchiq_mmal_instance *instance,
 
 	vchi_service_release(instance->handle);
 
+unlock:
 	mutex_unlock(&instance->bulk_mutex);
 
 	return ret;

From 6cf1bf636a067eb308cb3a8322b9d6b1844a075d Mon Sep 17 00:00:00 2001
From: Michael Zoran <mzoran@crowfest.net>
Date: Sat, 18 Feb 2017 03:22:01 -0800
Subject: [PATCH 0243/1911] staging: vchiq_2835_arm: Make cache-line-size a
 required DT property

The original github source allowed for the cache-line-size property
to be missing.  Since recent firmwares also require this property,
it makes sense to always require it in the driver as well.

If the cache-line-size property is missing, then the driver probe
should fail as no dev since the kernel and dt may be out of sync.
The fix is to add a check for the return value of of_property_read_u32.

Changes V2:
	1. Add error message if cache-line-size is missing.
	2. Simple check for non-zero return value from
	   of_property_read_u32.

Signed-off-by: Michael Zoran <mzoran@crowfest.net>
Acked-by: Stefan Wahren <stefan.wahren@i2se.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../vc04_services/interface/vchiq_arm/vchiq_2835_arm.c    | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c
index e6241fb5cfa695..3aeffcb9c87e91 100644
--- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c
+++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c
@@ -121,8 +121,14 @@ int vchiq_platform_init(struct platform_device *pdev, VCHIQ_STATE_T *state)
 	if (err < 0)
 		return err;
 
-	(void)of_property_read_u32(dev->of_node, "cache-line-size",
+	err = of_property_read_u32(dev->of_node, "cache-line-size",
 				   &g_cache_line_size);
+
+	if (err) {
+		dev_err(dev, "Missing cache-line-size property\n");
+		return -ENODEV;
+	}
+
 	g_fragments_size = 2 * g_cache_line_size;
 
 	/* Allocate space for the channels in coherent memory */

From bd4e2d2907fa23a11d46217064ecf80470ddae10 Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Wed, 22 Feb 2017 22:06:32 -0800
Subject: [PATCH 0244/1911] target: Fix NULL dereference during LUN lookup +
 active I/O shutdown

When transport_clear_lun_ref() is shutting down a se_lun via
configfs with new I/O in-flight, it's possible to trigger a
NULL pointer dereference in transport_lookup_cmd_lun() due
to the fact percpu_ref_get() doesn't do any __PERCPU_REF_DEAD
checking before incrementing lun->lun_ref.count after
lun->lun_ref has switched to atomic_t mode.

This results in a NULL pointer dereference as LUN shutdown
code in core_tpg_remove_lun() continues running after the
existing ->release() -> core_tpg_lun_ref_release() callback
completes, and clears the RCU protected se_lun->lun_se_dev
pointer.

During the OOPs, the state of lun->lun_ref in the process
which triggered the NULL pointer dereference looks like
the following on v4.1.y stable code:

struct se_lun {
  lun_link_magic = 4294932337,
  lun_status = TRANSPORT_LUN_STATUS_FREE,

  .....

  lun_se_dev = 0x0,
  lun_sep = 0x0,

  .....

  lun_ref = {
    count = {
      counter = 1
    },
    percpu_count_ptr = 3,
    release = 0xffffffffa02fa1e0 <core_tpg_lun_ref_release>,
    confirm_switch = 0x0,
    force_atomic = false,
    rcu = {
      next = 0xffff88154fa1a5d0,
      func = 0xffffffff8137c4c0 <percpu_ref_switch_to_atomic_rcu>
    }
  }
}

To address this bug, use percpu_ref_tryget_live() to ensure
once __PERCPU_REF_DEAD is visable on all CPUs and ->lun_ref
has switched to atomic_t, all new I/Os will fail to obtain
a new lun->lun_ref reference.

Also use an explicit percpu_ref_kill_and_confirm() callback
to block on ->lun_ref_comp to allow the first stage and
associated RCU grace period to complete, and then block on
->lun_ref_shutdown waiting for the final percpu_ref_put()
to drop the last reference via transport_lun_remove_cmd()
before continuing with core_tpg_remove_lun() shutdown.

Reported-by: Rob Millner <rlm@daterainc.com>
Tested-by: Rob Millner <rlm@daterainc.com>
Cc: Rob Millner <rlm@daterainc.com>
Tested-by: Vaibhav Tandon <vst@datera.io>
Cc: Vaibhav Tandon <vst@datera.io>
Tested-by: Bryant G. Ly <bryantly@linux.vnet.ibm.com>
Cc: <stable@vger.kernel.org> # v3.14+
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_device.c    | 10 +++++++--
 drivers/target/target_core_tpg.c       |  3 ++-
 drivers/target/target_core_transport.c | 31 +++++++++++++++++++++++++-
 include/target/target_core_base.h      |  1 +
 4 files changed, 41 insertions(+), 4 deletions(-)

diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c
index cb7047d66afcd1..c754ae33bf7b15 100644
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c
@@ -78,12 +78,16 @@ transport_lookup_cmd_lun(struct se_cmd *se_cmd, u64 unpacked_lun)
 					&deve->read_bytes);
 
 		se_lun = rcu_dereference(deve->se_lun);
+
+		if (!percpu_ref_tryget_live(&se_lun->lun_ref)) {
+			se_lun = NULL;
+			goto out_unlock;
+		}
+
 		se_cmd->se_lun = rcu_dereference(deve->se_lun);
 		se_cmd->pr_res_key = deve->pr_res_key;
 		se_cmd->orig_fe_lun = unpacked_lun;
 		se_cmd->se_cmd_flags |= SCF_SE_LUN_CMD;
-
-		percpu_ref_get(&se_lun->lun_ref);
 		se_cmd->lun_ref_active = true;
 
 		if ((se_cmd->data_direction == DMA_TO_DEVICE) &&
@@ -97,6 +101,7 @@ transport_lookup_cmd_lun(struct se_cmd *se_cmd, u64 unpacked_lun)
 			goto ref_dev;
 		}
 	}
+out_unlock:
 	rcu_read_unlock();
 
 	if (!se_lun) {
@@ -815,6 +820,7 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name)
 	xcopy_lun = &dev->xcopy_lun;
 	rcu_assign_pointer(xcopy_lun->lun_se_dev, dev);
 	init_completion(&xcopy_lun->lun_ref_comp);
+	init_completion(&xcopy_lun->lun_shutdown_comp);
 	INIT_LIST_HEAD(&xcopy_lun->lun_deve_list);
 	INIT_LIST_HEAD(&xcopy_lun->lun_dev_link);
 	mutex_init(&xcopy_lun->lun_tg_pt_md_mutex);
diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c
index d99752c6cd602b..2744251178adb8 100644
--- a/drivers/target/target_core_tpg.c
+++ b/drivers/target/target_core_tpg.c
@@ -445,7 +445,7 @@ static void core_tpg_lun_ref_release(struct percpu_ref *ref)
 {
 	struct se_lun *lun = container_of(ref, struct se_lun, lun_ref);
 
-	complete(&lun->lun_ref_comp);
+	complete(&lun->lun_shutdown_comp);
 }
 
 int core_tpg_register(
@@ -571,6 +571,7 @@ struct se_lun *core_tpg_alloc_lun(
 	lun->lun_link_magic = SE_LUN_LINK_MAGIC;
 	atomic_set(&lun->lun_acl_count, 0);
 	init_completion(&lun->lun_ref_comp);
+	init_completion(&lun->lun_shutdown_comp);
 	INIT_LIST_HEAD(&lun->lun_deve_list);
 	INIT_LIST_HEAD(&lun->lun_dev_link);
 	atomic_set(&lun->lun_tg_pt_secondary_offline, 0);
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index efb9e6f382019e..434d9d69398917 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -2700,10 +2700,39 @@ void target_wait_for_sess_cmds(struct se_session *se_sess)
 }
 EXPORT_SYMBOL(target_wait_for_sess_cmds);
 
+static void target_lun_confirm(struct percpu_ref *ref)
+{
+	struct se_lun *lun = container_of(ref, struct se_lun, lun_ref);
+
+	complete(&lun->lun_ref_comp);
+}
+
 void transport_clear_lun_ref(struct se_lun *lun)
 {
-	percpu_ref_kill(&lun->lun_ref);
+	/*
+	 * Mark the percpu-ref as DEAD, switch to atomic_t mode, drop
+	 * the initial reference and schedule confirm kill to be
+	 * executed after one full RCU grace period has completed.
+	 */
+	percpu_ref_kill_and_confirm(&lun->lun_ref, target_lun_confirm);
+	/*
+	 * The first completion waits for percpu_ref_switch_to_atomic_rcu()
+	 * to call target_lun_confirm after lun->lun_ref has been marked
+	 * as __PERCPU_REF_DEAD on all CPUs, and switches to atomic_t
+	 * mode so that percpu_ref_tryget_live() lookup of lun->lun_ref
+	 * fails for all new incoming I/O.
+	 */
 	wait_for_completion(&lun->lun_ref_comp);
+	/*
+	 * The second completion waits for percpu_ref_put_many() to
+	 * invoke ->release() after lun->lun_ref has switched to
+	 * atomic_t mode, and lun->lun_ref.count has reached zero.
+	 *
+	 * At this point all target-core lun->lun_ref references have
+	 * been dropped via transport_lun_remove_cmd(), and it's safe
+	 * to proceed with the remaining LUN shutdown.
+	 */
+	wait_for_completion(&lun->lun_shutdown_comp);
 }
 
 static bool
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index d7336f3c6b600a..16d3be8395bebb 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -730,6 +730,7 @@ struct se_lun {
 	struct config_group	lun_group;
 	struct se_port_stat_grps port_stat_grps;
 	struct completion	lun_ref_comp;
+	struct completion	lun_shutdown_comp;
 	struct percpu_ref	lun_ref;
 	struct list_head	lun_dev_link;
 	struct hlist_node	link;

From 17c61ad66f2e09a9014ab2d4e1f04c8294427db1 Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Thu, 23 Feb 2017 21:26:31 -0800
Subject: [PATCH 0245/1911] iscsi-target: Fix early login failure statistics
 misses

Due to the long standing checks in iscsit_snmp_get_tiqn()
that assume conn->sess->tpg dereference of tpg->tpg_tiqn
for iscsit_collect_login_stats() usage, some of the early
login failure cases like ISCSI_LOGIN_STATUS_TGT_FORBIDDEN
where not getting incremented, due to sess->tpg assignment
happening later in iscsi_login_zero_tsih_s2().

Instead, use the earlier conn->tpg assignment done by
iscsi_target_locate_portal() -> iscsit_get_tpg_from_np()
so the existing counters are incremented correctly for
the various early login failure cases.

Also, go ahead and drop the old rate limiting check in
iscsit_collect_login_stats(), so we get the true number
of failed login attempts in the existing statistics.

Reported-by: Ryan Stiles <ras@datera.io>
Cc: Ryan Stiles <ras@datera.io>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/iscsi/iscsi_target_util.c | 38 ++----------------------
 1 file changed, 2 insertions(+), 36 deletions(-)

diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c
index f46eadffec0704..cc59588824313f 100644
--- a/drivers/target/iscsi/iscsi_target_util.c
+++ b/drivers/target/iscsi/iscsi_target_util.c
@@ -1378,33 +1378,6 @@ int tx_data(
 	return iscsit_do_tx_data(conn, &c);
 }
 
-static bool sockaddr_equal(struct sockaddr_storage *x, struct sockaddr_storage *y)
-{
-	switch (x->ss_family) {
-	case AF_INET: {
-		struct sockaddr_in *sinx = (struct sockaddr_in *)x;
-		struct sockaddr_in *siny = (struct sockaddr_in *)y;
-		if (sinx->sin_addr.s_addr != siny->sin_addr.s_addr)
-			return false;
-		if (sinx->sin_port != siny->sin_port)
-			return false;
-		break;
-	}
-	case AF_INET6: {
-		struct sockaddr_in6 *sinx = (struct sockaddr_in6 *)x;
-		struct sockaddr_in6 *siny = (struct sockaddr_in6 *)y;
-		if (!ipv6_addr_equal(&sinx->sin6_addr, &siny->sin6_addr))
-			return false;
-		if (sinx->sin6_port != siny->sin6_port)
-			return false;
-		break;
-	}
-	default:
-		return false;
-	}
-	return true;
-}
-
 void iscsit_collect_login_stats(
 	struct iscsi_conn *conn,
 	u8 status_class,
@@ -1421,13 +1394,6 @@ void iscsit_collect_login_stats(
 	ls = &tiqn->login_stats;
 
 	spin_lock(&ls->lock);
-	if (sockaddr_equal(&conn->login_sockaddr, &ls->last_intr_fail_sockaddr) &&
-	    ((get_jiffies_64() - ls->last_fail_time) < 10)) {
-		/* We already have the failure info for this login */
-		spin_unlock(&ls->lock);
-		return;
-	}
-
 	if (status_class == ISCSI_STATUS_CLS_SUCCESS)
 		ls->accepts++;
 	else if (status_class == ISCSI_STATUS_CLS_REDIRECT) {
@@ -1472,10 +1438,10 @@ struct iscsi_tiqn *iscsit_snmp_get_tiqn(struct iscsi_conn *conn)
 {
 	struct iscsi_portal_group *tpg;
 
-	if (!conn || !conn->sess)
+	if (!conn)
 		return NULL;
 
-	tpg = conn->sess->tpg;
+	tpg = conn->tpg;
 	if (!tpg)
 		return NULL;
 

From c87ba9c49c1fa86261448b09c5f1b2223bf7efd9 Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@daterainc.com>
Date: Thu, 19 Jan 2017 15:45:57 -0800
Subject: [PATCH 0246/1911] target: Add counters for ABORT_TASK success +
 failure

This patch introduces two counters for ABORT_TASK success +
failure under:

   /sys/kernel/config/target/core/$HBA/$DEV/statistics/scsi_tgt_dev/

that are useful for diagnosing various backend device latency
and front fabric issues.

Normally when folks see alot of aborts_complete happening,
it means the backend device I/O completion latency is high,
and not returning completions fast enough before host side
timeouts trigger.

And normally when folks see alot of aborts_no_task, it means
completions are being posted by target-core into fabric driver
code, but the responses aren't making it back to the host.

Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_stat.c | 18 ++++++++++++++++++
 drivers/target/target_core_tmr.c  |  2 ++
 include/target/target_core_base.h |  2 ++
 3 files changed, 22 insertions(+)

diff --git a/drivers/target/target_core_stat.c b/drivers/target/target_core_stat.c
index be380e4acfcdbb..8038255b21e874 100644
--- a/drivers/target/target_core_stat.c
+++ b/drivers/target/target_core_stat.c
@@ -158,12 +158,28 @@ static ssize_t target_stat_tgt_resets_show(struct config_item *item,
 			atomic_long_read(&to_stat_tgt_dev(item)->num_resets));
 }
 
+static ssize_t target_stat_tgt_aborts_complete_show(struct config_item *item,
+		char *page)
+{
+	return snprintf(page, PAGE_SIZE, "%lu\n",
+			atomic_long_read(&to_stat_tgt_dev(item)->aborts_complete));
+}
+
+static ssize_t target_stat_tgt_aborts_no_task_show(struct config_item *item,
+		char *page)
+{
+	return snprintf(page, PAGE_SIZE, "%lu\n",
+			atomic_long_read(&to_stat_tgt_dev(item)->aborts_no_task));
+}
+
 CONFIGFS_ATTR_RO(target_stat_tgt_, inst);
 CONFIGFS_ATTR_RO(target_stat_tgt_, indx);
 CONFIGFS_ATTR_RO(target_stat_tgt_, num_lus);
 CONFIGFS_ATTR_RO(target_stat_tgt_, status);
 CONFIGFS_ATTR_RO(target_stat_tgt_, non_access_lus);
 CONFIGFS_ATTR_RO(target_stat_tgt_, resets);
+CONFIGFS_ATTR_RO(target_stat_tgt_, aborts_complete);
+CONFIGFS_ATTR_RO(target_stat_tgt_, aborts_no_task);
 
 static struct configfs_attribute *target_stat_scsi_tgt_dev_attrs[] = {
 	&target_stat_tgt_attr_inst,
@@ -172,6 +188,8 @@ static struct configfs_attribute *target_stat_scsi_tgt_dev_attrs[] = {
 	&target_stat_tgt_attr_status,
 	&target_stat_tgt_attr_non_access_lus,
 	&target_stat_tgt_attr_resets,
+	&target_stat_tgt_attr_aborts_complete,
+	&target_stat_tgt_attr_aborts_no_task,
 	NULL,
 };
 
diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c
index a806d9bca3d2aa..dce1e1b4731617 100644
--- a/drivers/target/target_core_tmr.c
+++ b/drivers/target/target_core_tmr.c
@@ -190,6 +190,7 @@ void core_tmr_abort_task(
 		printk("ABORT_TASK: Sending TMR_FUNCTION_COMPLETE for"
 				" ref_tag: %llu\n", ref_tag);
 		tmr->response = TMR_FUNCTION_COMPLETE;
+		atomic_long_inc(&dev->aborts_complete);
 		return;
 	}
 	spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
@@ -197,6 +198,7 @@ void core_tmr_abort_task(
 	printk("ABORT_TASK: Sending TMR_TASK_DOES_NOT_EXIST for ref_tag: %lld\n",
 			tmr->ref_task_tag);
 	tmr->response = TMR_TASK_DOES_NOT_EXIST;
+	atomic_long_inc(&dev->aborts_no_task);
 }
 
 static void core_tmr_drain_tmr_list(
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index 16d3be8395bebb..49ce5bc9912fb4 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -766,6 +766,8 @@ struct se_device {
 	u32			dev_index;
 	u64			creation_time;
 	atomic_long_t		num_resets;
+	atomic_long_t		aborts_complete;
+	atomic_long_t		aborts_no_task;
 	atomic_long_t		num_cmds;
 	atomic_long_t		read_bytes;
 	atomic_long_t		write_bytes;

From 9bd829041b704e5e501ad5f5a9374a63426763fd Mon Sep 17 00:00:00 2001
From: George Cherian <george.cherian@cavium.com>
Date: Wed, 15 Feb 2017 12:42:19 +0000
Subject: [PATCH 0247/1911] crypto: cavium - Fix couple of static checker
 errors

Fix the following smatch errors
cptvf_reqmanager.c:333 do_post_process() warn: variable dereferenced
before check 'cptvf'
cptvf_main.c:825 cptvf_remove() error: we previously assumed 'cptvf'
could be null

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: George Cherian <george.cherian@cavium.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/cavium/cpt/cptvf_main.c       | 4 +++-
 drivers/crypto/cavium/cpt/cptvf_reqmanager.c | 4 ++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/crypto/cavium/cpt/cptvf_main.c b/drivers/crypto/cavium/cpt/cptvf_main.c
index aac2966ff8d92d..e50872e666d655 100644
--- a/drivers/crypto/cavium/cpt/cptvf_main.c
+++ b/drivers/crypto/cavium/cpt/cptvf_main.c
@@ -815,8 +815,10 @@ static void cptvf_remove(struct pci_dev *pdev)
 {
 	struct cpt_vf *cptvf = pci_get_drvdata(pdev);
 
-	if (!cptvf)
+	if (!cptvf) {
 		dev_err(&pdev->dev, "Invalid CPT-VF device\n");
+		return;
+	}
 
 	/* Convey DOWN to PF */
 	if (cptvf_send_vf_down(cptvf)) {
diff --git a/drivers/crypto/cavium/cpt/cptvf_reqmanager.c b/drivers/crypto/cavium/cpt/cptvf_reqmanager.c
index 7f57f30f88636c..169e66231bcf15 100644
--- a/drivers/crypto/cavium/cpt/cptvf_reqmanager.c
+++ b/drivers/crypto/cavium/cpt/cptvf_reqmanager.c
@@ -330,8 +330,8 @@ void do_post_process(struct cpt_vf *cptvf, struct cpt_info_buffer *info)
 {
 	struct pci_dev *pdev = cptvf->pdev;
 
-	if (!info || !cptvf) {
-		dev_err(&pdev->dev, "Input params are incorrect for post processing\n");
+	if (!info) {
+		dev_err(&pdev->dev, "incorrect cpt_info_buffer for post processing\n");
 		return;
 	}
 

From d80388eca1b476125755a811676f76f138bbbe28 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 17 Feb 2017 15:57:43 +0000
Subject: [PATCH 0248/1911] crypto: cavium - fix leak on curr if curr->head
 fails to be allocated

The exit path when curr->head cannot be allocated fails to kfree the
earlier allocated curr.  Fix this by kfree'ing it.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/cavium/cpt/cptvf_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/crypto/cavium/cpt/cptvf_main.c b/drivers/crypto/cavium/cpt/cptvf_main.c
index e50872e666d655..6ffc740c7431d2 100644
--- a/drivers/crypto/cavium/cpt/cptvf_main.c
+++ b/drivers/crypto/cavium/cpt/cptvf_main.c
@@ -242,6 +242,7 @@ static int alloc_command_queues(struct cpt_vf *cptvf,
 			if (!curr->head) {
 				dev_err(&pdev->dev, "Command Q (%d) chunk (%d) allocation failed\n",
 					i, queue->nchunks);
+				kfree(curr);
 				goto cmd_qfail;
 			}
 

From f7f9482e370981ebeeeac30bbdb9960807e332ee Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Fri, 24 Feb 2017 11:27:38 +0100
Subject: [PATCH 0249/1911] crypto: atmel - CRYPTO_DEV_ATMEL_TDES and
 CRYPTO_DEV_ATMEL_SHA should depend on HAS_DMA

If NO_DMA=y:

    ERROR: "bad_dma_ops" [drivers/crypto/atmel-tdes.ko] undefined!
    ERROR: "bad_dma_ops" [drivers/crypto/atmel-sha.ko] undefined!

Add dependencies on HAS_DMA to fix this.

Fixes: ceb4afb3086ab08f ("crypto: atmel - refine Kconfig dependencies")
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/Kconfig | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 2cac445b02fde0..69f7fc0dc84dc3 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -445,6 +445,7 @@ config CRYPTO_DEV_ATMEL_AES
 
 config CRYPTO_DEV_ATMEL_TDES
 	tristate "Support for Atmel DES/TDES hw accelerator"
+	depends on HAS_DMA
 	depends on ARCH_AT91 || COMPILE_TEST
 	select CRYPTO_DES
 	select CRYPTO_BLKCIPHER
@@ -458,6 +459,7 @@ config CRYPTO_DEV_ATMEL_TDES
 
 config CRYPTO_DEV_ATMEL_SHA
 	tristate "Support for Atmel SHA hw accelerator"
+	depends on HAS_DMA
 	depends on ARCH_AT91 || COMPILE_TEST
 	select CRYPTO_HASH
 	help

From c884b36816c131906038cdac4773f6c84a5bf3ee Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Fri, 24 Feb 2017 11:27:39 +0100
Subject: [PATCH 0250/1911] crypto: atmel - CRYPTO_DEV_MEDIATEK should depend
 on HAS_DMA

If NO_DMA=y:

    ERROR: "bad_dma_ops" [drivers/crypto/mediatek/mtk-crypto.ko] undefined!

Add a dependency on HAS_DMA to fix this.

Fixes: 7dee9f618790d0b7 ("crypto: mediatek - remove ARM dependencies")
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 69f7fc0dc84dc3..a7ff6e5d0ba92c 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -571,6 +571,7 @@ config CRYPTO_DEV_ROCKCHIP
 
 config CRYPTO_DEV_MEDIATEK
 	tristate "MediaTek's EIP97 Cryptographic Engine driver"
+	depends on HAS_DMA
 	depends on (ARM && ARCH_MEDIATEK) || COMPILE_TEST
 	select CRYPTO_AES
 	select CRYPTO_AEAD

From 016df0abc56ec06d0c63c5318ef53e40738dea8b Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 26 Feb 2017 12:22:35 +0800
Subject: [PATCH 0251/1911] crypto: api - Add crypto_requires_off helper

This patch adds crypto_requires_off which is an extension of
crypto_requires_sync for similar bits such as NEED_FALLBACK.

Cc: stable@vger.kernel.org #4.10
Suggested-by: Marcelo Cerri <marcelo.cerri@canonical.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/crypto/algapi.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
index ebe4ded0c55d7f..436c4c2683c7dc 100644
--- a/include/crypto/algapi.h
+++ b/include/crypto/algapi.h
@@ -360,13 +360,18 @@ static inline struct crypto_alg *crypto_get_attr_alg(struct rtattr **tb,
 	return crypto_attr_alg(tb[1], type, mask);
 }
 
+static inline int crypto_requires_off(u32 type, u32 mask, u32 off)
+{
+	return (type ^ off) & mask & off;
+}
+
 /*
  * Returns CRYPTO_ALG_ASYNC if type/mask requires the use of sync algorithms.
  * Otherwise returns zero.
  */
 static inline int crypto_requires_sync(u32 type, u32 mask)
 {
-	return (type ^ CRYPTO_ALG_ASYNC) & mask & CRYPTO_ALG_ASYNC;
+	return crypto_requires_off(type, mask, CRYPTO_ALG_ASYNC);
 }
 
 noinline unsigned long __crypto_memneq(const void *a, const void *b, size_t size);

From 89027579bc6c2febbcc9c2f9d5069adf71539e4b Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 26 Feb 2017 12:24:10 +0800
Subject: [PATCH 0252/1911] crypto: xts - Propagate NEED_FALLBACK bit

When we're used as a fallback algorithm, we should propagate
the NEED_FALLBACK bit when searching for the underlying ECB mode.

This just happens to fix a hang too because otherwise the search
may end up loading the same module that triggered this XTS creation.

Cc: stable@vger.kernel.org #4.10
Fixes: f1c131b45410 ("crypto: xts - Convert to skcipher")
Reported-by: Harald Freudenberger <freude@linux.vnet.ibm.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/xts.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/crypto/xts.c b/crypto/xts.c
index 410a2e299085f1..baeb34dd8582eb 100644
--- a/crypto/xts.c
+++ b/crypto/xts.c
@@ -463,6 +463,7 @@ static int create(struct crypto_template *tmpl, struct rtattr **tb)
 	struct xts_instance_ctx *ctx;
 	struct skcipher_alg *alg;
 	const char *cipher_name;
+	u32 mask;
 	int err;
 
 	algt = crypto_get_attr_type(tb);
@@ -483,18 +484,19 @@ static int create(struct crypto_template *tmpl, struct rtattr **tb)
 	ctx = skcipher_instance_ctx(inst);
 
 	crypto_set_skcipher_spawn(&ctx->spawn, skcipher_crypto_instance(inst));
-	err = crypto_grab_skcipher(&ctx->spawn, cipher_name, 0,
-				   crypto_requires_sync(algt->type,
-							algt->mask));
+
+	mask = crypto_requires_off(algt->type, algt->mask,
+				   CRYPTO_ALG_NEED_FALLBACK |
+				   CRYPTO_ALG_ASYNC);
+
+	err = crypto_grab_skcipher(&ctx->spawn, cipher_name, 0, mask);
 	if (err == -ENOENT) {
 		err = -ENAMETOOLONG;
 		if (snprintf(ctx->name, CRYPTO_MAX_ALG_NAME, "ecb(%s)",
 			     cipher_name) >= CRYPTO_MAX_ALG_NAME)
 			goto err_free_inst;
 
-		err = crypto_grab_skcipher(&ctx->spawn, ctx->name, 0,
-					   crypto_requires_sync(algt->type,
-								algt->mask));
+		err = crypto_grab_skcipher(&ctx->spawn, ctx->name, 0, mask);
 	}
 
 	if (err)

From 0328edc77d4f35014b35f32b46be0a7e16aae74f Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 20 Feb 2017 08:59:16 +0100
Subject: [PATCH 0253/1911] mac80211: fix packet statistics for fast-RX

When adding per-CPU statistics, which added statistics back
to mac80211 for the fast-RX path, I evidently forgot to add
the "stats->packets++" line. The reason for that is likely
that I didn't see it since it's done in defragmentation for
the regular RX path.

Add the missing line to properly count received packets in
the fast-RX case.

Fixes: c9c5962b56c1 ("mac80211: enable collecting station statistics per-CPU")
Reported-by: Oren Givon <oren.givon@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/rx.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 50ca3828b1242e..a8443d8bc23323 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -3880,6 +3880,7 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
 	stats->last_rate = sta_stats_encode_rate(status);
 
 	stats->fragments++;
+	stats->packets++;
 
 	if (!(status->flag & RX_FLAG_NO_SIGNAL_VAL)) {
 		stats->last_signal = status->signal;

From a9e9200d8661c1a0be8c39f93deb383dc940de35 Mon Sep 17 00:00:00 2001
From: Matt Chen <matt.chen@intel.com>
Date: Sun, 22 Jan 2017 02:16:58 +0800
Subject: [PATCH 0254/1911] mac80211: flush delayed work when entering suspend

The issue was found when entering suspend and resume.
It triggers a warning in:
mac80211/key.c: ieee80211_enable_keys()
...
WARN_ON_ONCE(sdata->crypto_tx_tailroom_needed_cnt ||
             sdata->crypto_tx_tailroom_pending_dec);
...

It points out sdata->crypto_tx_tailroom_pending_dec isn't cleaned up successfully
in a delayed_work during suspend. Add a flush_delayed_work to fix it.

Cc: stable@vger.kernel.org
Signed-off-by: Matt Chen <matt.chen@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/pm.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c
index 28a3a0957c9e35..76a8bcd8ef1123 100644
--- a/net/mac80211/pm.c
+++ b/net/mac80211/pm.c
@@ -168,6 +168,7 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
 			break;
 		}
 
+		flush_delayed_work(&sdata->dec_tailroom_needed_wk);
 		drv_remove_interface(local, sdata);
 	}
 

From b7540d8f25c8034de7e4163fc23ac457bf057731 Mon Sep 17 00:00:00 2001
From: Sara Sharon <sara.sharon@intel.com>
Date: Mon, 6 Feb 2017 15:28:42 +0200
Subject: [PATCH 0255/1911] mac80211: don't reorder frames with SN smaller than
 SSN

When RX aggregation starts, transmitter may continue send frames
with SN smaller than SSN until the AddBA response is received.
However, the reorder buffer is already initialized at this point,
which will cause the drop of such frames as duplicates since the
head SN of the reorder buffer is set to the SSN, which is bigger.

Cc: stable@vger.kernel.org
Signed-off-by: Sara Sharon <sara.sharon@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/agg-rx.c   |  1 +
 net/mac80211/rx.c       | 14 +++++++++++++-
 net/mac80211/sta_info.h |  6 ++++--
 3 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 3b5fd4188f2ac7..58ad23a441097c 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -398,6 +398,7 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta,
 	tid_agg_rx->timeout = timeout;
 	tid_agg_rx->stored_mpdu_num = 0;
 	tid_agg_rx->auto_seq = auto_seq;
+	tid_agg_rx->started = false;
 	tid_agg_rx->reorder_buf_filtered = 0;
 	status = WLAN_STATUS_SUCCESS;
 
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index a8443d8bc23323..28cc494a774d0e 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -4,7 +4,7 @@
  * Copyright 2006-2007	Jiri Benc <jbenc@suse.cz>
  * Copyright 2007-2010	Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
- * Copyright(c) 2015 - 2016 Intel Deutschland GmbH
+ * Copyright(c) 2015 - 2017 Intel Deutschland GmbH
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -1034,6 +1034,18 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_sub_if_data *sdata
 	buf_size = tid_agg_rx->buf_size;
 	head_seq_num = tid_agg_rx->head_seq_num;
 
+	/*
+	 * If the current MPDU's SN is smaller than the SSN, it shouldn't
+	 * be reordered.
+	 */
+	if (unlikely(!tid_agg_rx->started)) {
+		if (ieee80211_sn_less(mpdu_seq_num, head_seq_num)) {
+			ret = false;
+			goto out;
+		}
+		tid_agg_rx->started = true;
+	}
+
 	/* frame with out of date sequence number */
 	if (ieee80211_sn_less(mpdu_seq_num, head_seq_num)) {
 		dev_kfree_skb(skb);
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index dd06ef0b886145..15599c70a38fc9 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -189,6 +189,7 @@ struct tid_ampdu_tx {
  * @auto_seq: used for offloaded BA sessions to automatically pick head_seq_and
  *	and ssn.
  * @removed: this session is removed (but might have been found due to RCU)
+ * @started: this session has started (head ssn or higher was received)
  *
  * This structure's lifetime is managed by RCU, assignments to
  * the array holding it must hold the aggregation mutex.
@@ -212,8 +213,9 @@ struct tid_ampdu_rx {
 	u16 ssn;
 	u16 buf_size;
 	u16 timeout;
-	bool auto_seq;
-	bool removed;
+	u8 auto_seq:1,
+	   removed:1,
+	   started:1;
 };
 
 /**

From 8fbcfeb8a9cc803464d6c166e7991913711c612c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 25 Feb 2017 10:27:37 +0000
Subject: [PATCH 0256/1911] mac80211_hwsim: Replace bogus hrtimer clockid

mac80211_hwsim initializes a hrtimer with clockid
CLOCK_MONOTONIC_RAW. That's not supported.

Use CLOCK_MONOTONIC instead.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/mac80211_hwsim.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 1620a5d2757d38..0889fc81ce9e47 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -2671,7 +2671,7 @@ static int mac80211_hwsim_new_radio(struct genl_info *info,
 
 	tasklet_hrtimer_init(&data->beacon_timer,
 			     mac80211_hwsim_beacon,
-			     CLOCK_MONOTONIC_RAW, HRTIMER_MODE_ABS);
+			     CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
 
 	spin_lock_bh(&hwsim_radio_lock);
 	list_add_tail(&data->list, &hwsim_radios);

From 890030d3c425f49abaa4acf60e20f288b599f980 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Wed, 22 Feb 2017 16:16:07 +0100
Subject: [PATCH 0257/1911] mac80211: don't handle filtered frames within a BA
 session

When running a BA session, the driver (or the hardware) already takes
care of retransmitting failed frames, since it has to keep the receiver
reorder window in sync.

Adding another layer of retransmit around that does not improve
anything. In fact, it can only lead to some strong reordering with huge
latency.

Cc: stable@vger.kernel.org
Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/status.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index a3af6e1bfd984d..05ccd55b5d83d4 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -51,7 +51,8 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local,
 	struct ieee80211_hdr *hdr = (void *)skb->data;
 	int ac;
 
-	if (info->flags & IEEE80211_TX_CTL_NO_PS_BUFFER) {
+	if (info->flags & (IEEE80211_TX_CTL_NO_PS_BUFFER |
+			   IEEE80211_TX_CTL_AMPDU)) {
 		ieee80211_free_txskb(&local->hw, skb);
 		return;
 	}

From d98937f4ea713d21e0fcc345919f86c877dd8d6f Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Mon, 20 Feb 2017 14:24:36 +0100
Subject: [PATCH 0258/1911] mac80211: fix power saving clients handling in
 iwlwifi

iwlwifi now supports RSS and can't let mac80211 track the
PS state based on the Rx frames since they can come out of
order. iwlwifi is now advertising AP_LINK_PS, and uses
explicit notifications to teach mac80211 about the PS state
of the stations and the PS poll / uAPSD trigger frames
coming our way from the peers.

Because of that, the TIM stopped being maintained in
mac80211. I tried to fix this in commit c68df2e7be0c
("mac80211: allow using AP_LINK_PS with mac80211-generated TIM IE")
but that was later reverted by Felix in commit 6c18a6b4e799
("Revert "mac80211: allow using AP_LINK_PS with mac80211-generated TIM IE")
since it broke drivers that do not implement set_tim.

Since none of the drivers that set AP_LINK_PS have the
set_tim() handler set besides iwlwifi, I can bail out in
__sta_info_recalc_tim if AP_LINK_PS AND .set_tim is not
implemented.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/sta_info.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 4774e663a4112f..8bb99d299cdaa9 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -688,7 +688,7 @@ static void __sta_info_recalc_tim(struct sta_info *sta, bool ignore_pending)
 	}
 
 	/* No need to do anything if the driver does all */
-	if (ieee80211_hw_check(&local->hw, AP_LINK_PS))
+	if (ieee80211_hw_check(&local->hw, AP_LINK_PS) && !local->ops->set_tim)
 		return;
 
 	if (sta->dead)

From 2595d259b667114431501bae51b45d6656b987d1 Mon Sep 17 00:00:00 2001
From: Sara Sharon <sara.sharon@intel.com>
Date: Mon, 20 Feb 2017 14:24:39 +0100
Subject: [PATCH 0259/1911] mac80211: shorten debug message

Tracing is limited to 100 characters and this message passes
the limit when there are a few buffered frames. Shorten it.

Signed-off-by: Sara Sharon <sara.sharon@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/sta_info.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 8bb99d299cdaa9..3323a2fb289bd0 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -1264,7 +1264,7 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta)
 	sta_info_recalc_tim(sta);
 
 	ps_dbg(sdata,
-	       "STA %pM aid %d sending %d filtered/%d PS frames since STA not sleeping anymore\n",
+	       "STA %pM aid %d sending %d filtered/%d PS frames since STA woke up\n",
 	       sta->sta.addr, sta->sta.aid, filtered, buffered);
 
 	ieee80211_check_fast_xmit(sta);

From 09e0a2fe102208cbaf39510b8b04dd524d7d2935 Mon Sep 17 00:00:00 2001
From: Sara Sharon <sara.sharon@intel.com>
Date: Mon, 20 Feb 2017 14:24:38 +0100
Subject: [PATCH 0260/1911] mac80211: fix typo in debug print

Signed-off-by: Sara Sharon <sara.sharon@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/agg-rx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 58ad23a441097c..4456559cb056d1 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -85,7 +85,7 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
 	ht_dbg(sta->sdata,
 	       "Rx BA session stop requested for %pM tid %u %s reason: %d\n",
 	       sta->sta.addr, tid,
-	       initiator == WLAN_BACK_RECIPIENT ? "recipient" : "inititator",
+	       initiator == WLAN_BACK_RECIPIENT ? "recipient" : "initiator",
 	       (int)reason);
 
 	if (drv_ampdu_action(local, sta->sdata, &params))

From d825adb48cf9bf9e3f5cb1d927e2827f8c2abee4 Mon Sep 17 00:00:00 2001
From: Masanari Iida <standby24x7@gmail.com>
Date: Sun, 26 Feb 2017 16:15:21 +0900
Subject: [PATCH 0261/1911] xenbus: Remove duplicate inclusion of linux/init.h

This patch remove duplicate inclusion of linux/init.h in
xenbus_dev_frontend.c.
Confirm successfully compile after remove the line.

Signed-off-by: Masanari Iida <standby24x7@gmail.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
---
 drivers/xen/xenbus/xenbus_dev_frontend.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c
index 4d343eed08f51e..1f4733b80c8774 100644
--- a/drivers/xen/xenbus/xenbus_dev_frontend.c
+++ b/drivers/xen/xenbus/xenbus_dev_frontend.c
@@ -55,7 +55,6 @@
 #include <linux/string.h>
 #include <linux/slab.h>
 #include <linux/miscdevice.h>
-#include <linux/init.h>
 
 #include <xen/xenbus.h>
 #include <xen/xen.h>

From 6dbf5cea05a7098a69f294c96b6d76f08562cae5 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 24 Feb 2017 13:25:14 +0100
Subject: [PATCH 0262/1911] cpuidle: menu: Avoid taking spinlock for accessing
 QoS values

After commit 9908859acaa9 (cpuidle/menu: add per CPU PM QoS resume
latency consideration) the cpuidle menu governor calls
dev_pm_qos_read_value() on CPU devices to read the current resume
latency QoS constraint values for them.  That function takes a spinlock
to prevent the device's power.qos pointer from becoming NULL during
the access which is a problem for the RT patchset where spinlocks are
converted into mutexes and the idle loop stops working.

However, it is not even necessary for the menu governor to take
that spinlock, because the power.qos pointer accessed under it
cannot be modified during the access anyway.

For this reason, introduce a "raw" routine for accessing device
QoS resume latency constraints without locking and use it in the
menu governor.

Fixes: 9908859acaa9 (cpuidle/menu: add per CPU PM QoS resume latency consideration)
Acked-by: Alex Shi <alex.shi@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/qos.c         | 3 +--
 drivers/cpuidle/governors/menu.c | 2 +-
 include/linux/pm_qos.h           | 7 +++++++
 3 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/base/power/qos.c b/drivers/base/power/qos.c
index 58fcc758334e5b..73142d08624037 100644
--- a/drivers/base/power/qos.c
+++ b/drivers/base/power/qos.c
@@ -108,8 +108,7 @@ s32 __dev_pm_qos_read_value(struct device *dev)
 {
 	lockdep_assert_held(&dev->power.lock);
 
-	return IS_ERR_OR_NULL(dev->power.qos) ?
-		0 : pm_qos_read_value(&dev->power.qos->resume_latency);
+	return dev_pm_qos_raw_read_value(dev);
 }
 
 /**
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index 8d6d25c38c020e..6d6f46e79d9495 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -287,7 +287,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 	unsigned int interactivity_req;
 	unsigned int expected_interval;
 	unsigned long nr_iowaiters, cpu_load;
-	int resume_latency = dev_pm_qos_read_value(device);
+	int resume_latency = dev_pm_qos_raw_read_value(device);
 
 	if (data->needs_update) {
 		menu_update(drv, dev);
diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h
index 0f65d36c2a7515..5aba9f47899d2b 100644
--- a/include/linux/pm_qos.h
+++ b/include/linux/pm_qos.h
@@ -173,6 +173,12 @@ static inline s32 dev_pm_qos_requested_flags(struct device *dev)
 {
 	return dev->power.qos->flags_req->data.flr.flags;
 }
+
+static inline s32 dev_pm_qos_raw_read_value(struct device *dev)
+{
+	return IS_ERR_OR_NULL(dev->power.qos) ?
+		0 : pm_qos_read_value(&dev->power.qos->resume_latency);
+}
 #else
 static inline enum pm_qos_flags_status __dev_pm_qos_flags(struct device *dev,
 							  s32 mask)
@@ -237,6 +243,7 @@ static inline void dev_pm_qos_hide_latency_tolerance(struct device *dev) {}
 
 static inline s32 dev_pm_qos_requested_resume_latency(struct device *dev) { return 0; }
 static inline s32 dev_pm_qos_requested_flags(struct device *dev) { return 0; }
+static inline s32 dev_pm_qos_raw_read_value(struct device *dev) { return 0; }
 #endif
 
 #endif

From 81d45bdf89135cd26dc7535c14a45db6cdd647fa Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 24 Feb 2017 00:25:28 +0100
Subject: [PATCH 0263/1911] PM / hibernate: Untangle power_down()

The power_down() routine in the core hibernation code is not exactly
straightforward (to put it lightly), so clean it up to make it avoid
invoking itself recursively, among other things.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 kernel/power/hibernate.c | 35 ++++++++++++++++-------------------
 1 file changed, 16 insertions(+), 19 deletions(-)

diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index b26dbc48c75b9f..f251b4d32913ea 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -608,6 +608,22 @@ static void power_down(void)
 {
 #ifdef CONFIG_SUSPEND
 	int error;
+
+	if (hibernation_mode == HIBERNATION_SUSPEND) {
+		error = suspend_devices_and_enter(PM_SUSPEND_MEM);
+		if (error) {
+			hibernation_mode = hibernation_ops ?
+						HIBERNATION_PLATFORM :
+						HIBERNATION_SHUTDOWN;
+		} else {
+			/* Restore swap signature. */
+			error = swsusp_unmark();
+			if (error)
+				pr_err("PM: Swap will be unusable! Try swapon -a.\n");
+
+			return;
+		}
+	}
 #endif
 
 	switch (hibernation_mode) {
@@ -620,25 +636,6 @@ static void power_down(void)
 		if (pm_power_off)
 			kernel_power_off();
 		break;
-#ifdef CONFIG_SUSPEND
-	case HIBERNATION_SUSPEND:
-		error = suspend_devices_and_enter(PM_SUSPEND_MEM);
-		if (error) {
-			if (hibernation_ops)
-				hibernation_mode = HIBERNATION_PLATFORM;
-			else
-				hibernation_mode = HIBERNATION_SHUTDOWN;
-			power_down();
-		}
-		/*
-		 * Restore swap signature.
-		 */
-		error = swsusp_unmark();
-		if (error)
-			printk(KERN_ERR "PM: Swap will be unusable! "
-			                "Try swapon -a.\n");
-		return;
-#endif
 	}
 	kernel_halt();
 	/*

From 2872de1382a7c888fa69532eda25aa7342dfe748 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 24 Feb 2017 00:26:15 +0100
Subject: [PATCH 0264/1911] PM / hibernate: Define pr_fmt() and use pr_*()
 instead of printk()

Define a pr_fmt() for hibernate.c and convert all of the explicit
printk() calls into corresponding pr_*() so that they use the
pr_fmt() format.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 kernel/power/hibernate.c | 60 +++++++++++++++++++---------------------
 1 file changed, 29 insertions(+), 31 deletions(-)

diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index f251b4d32913ea..8951d0d0481026 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -10,6 +10,8 @@
  * This file is released under the GPLv2.
  */
 
+#define pr_fmt(fmt) "PM: " fmt
+
 #include <linux/export.h>
 #include <linux/suspend.h>
 #include <linux/syscalls.h>
@@ -104,7 +106,7 @@ EXPORT_SYMBOL(system_entering_hibernation);
 #ifdef CONFIG_PM_DEBUG
 static void hibernation_debug_sleep(void)
 {
-	printk(KERN_INFO "hibernation debug: Waiting for 5 seconds.\n");
+	pr_info("hibernation debug: Waiting for 5 seconds.\n");
 	mdelay(5000);
 }
 
@@ -250,10 +252,9 @@ void swsusp_show_speed(ktime_t start, ktime_t stop,
 		centisecs = 1;	/* avoid div-by-zero */
 	k = nr_pages * (PAGE_SIZE / 1024);
 	kps = (k * 100) / centisecs;
-	printk(KERN_INFO "PM: %s %u kbytes in %u.%02u seconds (%u.%02u MB/s)\n",
-			msg, k,
-			centisecs / 100, centisecs % 100,
-			kps / 1000, (kps % 1000) / 10);
+	pr_info("%s %u kbytes in %u.%02u seconds (%u.%02u MB/s)\n",
+		msg, k, centisecs / 100, centisecs % 100, kps / 1000,
+		(kps % 1000) / 10);
 }
 
 /**
@@ -271,8 +272,7 @@ static int create_image(int platform_mode)
 
 	error = dpm_suspend_end(PMSG_FREEZE);
 	if (error) {
-		printk(KERN_ERR "PM: Some devices failed to power down, "
-			"aborting hibernation\n");
+		pr_err("Some devices failed to power down, aborting hibernation\n");
 		return error;
 	}
 
@@ -288,8 +288,7 @@ static int create_image(int platform_mode)
 
 	error = syscore_suspend();
 	if (error) {
-		printk(KERN_ERR "PM: Some system devices failed to power down, "
-			"aborting hibernation\n");
+		pr_err("Some system devices failed to power down, aborting hibernation\n");
 		goto Enable_irqs;
 	}
 
@@ -304,8 +303,8 @@ static int create_image(int platform_mode)
 	restore_processor_state();
 	trace_suspend_resume(TPS("machine_suspend"), PM_EVENT_HIBERNATE, false);
 	if (error)
-		printk(KERN_ERR "PM: Error %d creating hibernation image\n",
-			error);
+		pr_err("Error %d creating hibernation image\n", error);
+
 	if (!in_suspend) {
 		events_check_enabled = false;
 		clear_free_pages();
@@ -432,8 +431,7 @@ static int resume_target_kernel(bool platform_mode)
 
 	error = dpm_suspend_end(PMSG_QUIESCE);
 	if (error) {
-		printk(KERN_ERR "PM: Some devices failed to power down, "
-			"aborting resume\n");
+		pr_err("Some devices failed to power down, aborting resume\n");
 		return error;
 	}
 
@@ -619,7 +617,7 @@ static void power_down(void)
 			/* Restore swap signature. */
 			error = swsusp_unmark();
 			if (error)
-				pr_err("PM: Swap will be unusable! Try swapon -a.\n");
+				pr_err("Swap will be unusable! Try swapon -a.\n");
 
 			return;
 		}
@@ -642,7 +640,7 @@ static void power_down(void)
 	 * Valid image is on the disk, if we continue we risk serious data
 	 * corruption after resume.
 	 */
-	printk(KERN_CRIT "PM: Please power down manually\n");
+	pr_crit("Power down manually\n");
 	while (1)
 		cpu_relax();
 }
@@ -652,7 +650,7 @@ static int load_image_and_restore(void)
 	int error;
 	unsigned int flags;
 
-	pr_debug("PM: Loading hibernation image.\n");
+	pr_debug("Loading hibernation image.\n");
 
 	lock_device_hotplug();
 	error = create_basic_memory_bitmaps();
@@ -664,7 +662,7 @@ static int load_image_and_restore(void)
 	if (!error)
 		hibernation_restore(flags & SF_PLATFORM_MODE);
 
-	printk(KERN_ERR "PM: Failed to load hibernation image, recovering.\n");
+	pr_err("Failed to load hibernation image, recovering.\n");
 	swsusp_free();
 	free_basic_memory_bitmaps();
  Unlock:
@@ -682,7 +680,7 @@ int hibernate(void)
 	bool snapshot_test = false;
 
 	if (!hibernation_available()) {
-		pr_debug("PM: Hibernation not available.\n");
+		pr_debug("Hibernation not available.\n");
 		return -EPERM;
 	}
 
@@ -700,9 +698,9 @@ int hibernate(void)
 		goto Exit;
 	}
 
-	printk(KERN_INFO "PM: Syncing filesystems ... ");
+	pr_info("Syncing filesystems ... \n");
 	sys_sync();
-	printk("done.\n");
+	pr_info("done.\n");
 
 	error = freeze_processes();
 	if (error)
@@ -728,7 +726,7 @@ int hibernate(void)
 		else
 		        flags |= SF_CRC32_MODE;
 
-		pr_debug("PM: writing image.\n");
+		pr_debug("Writing image.\n");
 		error = swsusp_write(flags);
 		swsusp_free();
 		if (!error) {
@@ -740,7 +738,7 @@ int hibernate(void)
 		in_suspend = 0;
 		pm_restore_gfp_mask();
 	} else {
-		pr_debug("PM: Image restored successfully.\n");
+		pr_debug("Image restored successfully.\n");
 	}
 
  Free_bitmaps:
@@ -748,7 +746,7 @@ int hibernate(void)
  Thaw:
 	unlock_device_hotplug();
 	if (snapshot_test) {
-		pr_debug("PM: Checking hibernation image\n");
+		pr_debug("Checking hibernation image\n");
 		error = swsusp_check();
 		if (!error)
 			error = load_image_and_restore();
@@ -812,10 +810,10 @@ static int software_resume(void)
 		goto Unlock;
 	}
 
-	pr_debug("PM: Checking hibernation image partition %s\n", resume_file);
+	pr_debug("Checking hibernation image partition %s\n", resume_file);
 
 	if (resume_delay) {
-		printk(KERN_INFO "Waiting %dsec before reading resume device...\n",
+		pr_info("Waiting %dsec before reading resume device ...\n",
 			resume_delay);
 		ssleep(resume_delay);
 	}
@@ -854,10 +852,10 @@ static int software_resume(void)
 	}
 
  Check_image:
-	pr_debug("PM: Hibernation image partition %d:%d present\n",
+	pr_debug("Hibernation image partition %d:%d present\n",
 		MAJOR(swsusp_resume_device), MINOR(swsusp_resume_device));
 
-	pr_debug("PM: Looking for hibernation image.\n");
+	pr_debug("Looking for hibernation image.\n");
 	error = swsusp_check();
 	if (error)
 		goto Unlock;
@@ -876,7 +874,7 @@ static int software_resume(void)
 		goto Close_Finish;
 	}
 
-	pr_debug("PM: Preparing processes for restore.\n");
+	pr_debug("Preparing processes for restore.\n");
 	error = freeze_processes();
 	if (error)
 		goto Close_Finish;
@@ -889,7 +887,7 @@ static int software_resume(void)
 	/* For success case, the suspend path will release the lock */
  Unlock:
 	mutex_unlock(&pm_mutex);
-	pr_debug("PM: Hibernation image not present or could not be loaded.\n");
+	pr_debug("Hibernation image not present or could not be loaded.\n");
 	return error;
  Close_Finish:
 	swsusp_close(FMODE_READ);
@@ -1013,7 +1011,7 @@ static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr,
 		error = -EINVAL;
 
 	if (!error)
-		pr_debug("PM: Hibernation mode set to '%s'\n",
+		pr_debug("Hibernation mode set to '%s'\n",
 			 hibernation_modes[mode]);
 	unlock_system_sleep();
 	return error ? error : n;
@@ -1049,7 +1047,7 @@ static ssize_t resume_store(struct kobject *kobj, struct kobj_attribute *attr,
 	lock_system_sleep();
 	swsusp_resume_device = res;
 	unlock_system_sleep();
-	printk(KERN_INFO "PM: Starting manual resume from disk\n");
+	pr_info("Starting manual resume from disk\n");
 	noresume = 0;
 	software_resume();
 	return n;

From 51be7a9a261ce18c520fb3928b168feb77522745 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Thu, 12 Jan 2017 23:36:32 +0200
Subject: [PATCH 0265/1911] virtio_mmio: expose header to userspace

It's handy for userspace emulators like QEMU.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/virtio/virtio_mmio.c           | 2 +-
 include/uapi/linux/Kbuild              | 1 +
 include/{ => uapi}/linux/virtio_mmio.h | 0
 3 files changed, 2 insertions(+), 1 deletion(-)
 rename include/{ => uapi}/linux/virtio_mmio.h (100%)

diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
index c71fde5fe835c4..08357d70a89174 100644
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -70,7 +70,7 @@
 #include <linux/spinlock.h>
 #include <linux/virtio.h>
 #include <linux/virtio_config.h>
-#include <linux/virtio_mmio.h>
+#include <uapi/linux/virtio_mmio.h>
 #include <linux/virtio_ring.h>
 
 
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index f330ba4547cfd4..718fa73310e1b9 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -458,6 +458,7 @@ header-y += virtio_console.h
 header-y += virtio_gpu.h
 header-y += virtio_ids.h
 header-y += virtio_input.h
+header-y += virtio_mmio.h
 header-y += virtio_net.h
 header-y += virtio_pci.h
 header-y += virtio_ring.h
diff --git a/include/linux/virtio_mmio.h b/include/uapi/linux/virtio_mmio.h
similarity index 100%
rename from include/linux/virtio_mmio.h
rename to include/uapi/linux/virtio_mmio.h

From 3a150df945b7408c27cad2c01a1638e8b14ac562 Mon Sep 17 00:00:00 2001
From: Chunyu Hu <chuhu@redhat.com>
Date: Wed, 22 Feb 2017 08:29:26 +0800
Subject: [PATCH 0266/1911] tracing: Fix code comment for ftrace_ops_get_func()

There is no function 'ftrace_ops_recurs_func' existing in the current code,
it was renamed to ftrace_ops_assist_func() in commit c68c0fa29341
("ftrace: Have ftrace_ops_get_func() handle RCU and PER_CPU flags too").
Update the comment to the correct function name.

Link: http://lkml.kernel.org/r/1487723366-14463-1-git-send-email-chuhu@redhat.com

Signed-off-by: Chunyu Hu <chuhu@redhat.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/ftrace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 0c060932639140..fd84f2e30b6dcb 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -5487,7 +5487,7 @@ static void ftrace_ops_assist_func(unsigned long ip, unsigned long parent_ip,
  * Normally the mcount trampoline will call the ops->func, but there
  * are times that it should not. For example, if the ops does not
  * have its own recursion protection, then it should call the
- * ftrace_ops_recurs_func() instead.
+ * ftrace_ops_assist_func() instead.
  *
  * Returns the function that the trampoline should call for @ops.
  */

From ff4dd73dd2b4806419f8ff65cbce11d5019548d0 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 27 Feb 2017 17:15:28 +0100
Subject: [PATCH 0267/1911] mac80211_hwsim: check HWSIM_ATTR_RADIO_NAME length

Unfortunately, the nla policy was defined to have HWSIM_ATTR_RADIO_NAME
as an NLA_STRING, rather than NLA_NUL_STRING, so we can't use it as a
NUL-terminated string in the kernel.

Rather than break the API, kasprintf() the string to a new buffer to
guarantee NUL termination.

Reported-by: Andrew Zaborowski <andrew.zaborowski@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/mac80211_hwsim.c | 28 ++++++++++++++++++++-------
 1 file changed, 21 insertions(+), 7 deletions(-)

diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 0889fc81ce9e47..50c219fb1a52b9 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -3056,6 +3056,7 @@ static int hwsim_register_received_nl(struct sk_buff *skb_2,
 static int hwsim_new_radio_nl(struct sk_buff *msg, struct genl_info *info)
 {
 	struct hwsim_new_radio_params param = { 0 };
+	const char *hwname = NULL;
 
 	param.reg_strict = info->attrs[HWSIM_ATTR_REG_STRICT_REG];
 	param.p2p_device = info->attrs[HWSIM_ATTR_SUPPORT_P2P_DEVICE];
@@ -3069,8 +3070,14 @@ static int hwsim_new_radio_nl(struct sk_buff *msg, struct genl_info *info)
 	if (info->attrs[HWSIM_ATTR_NO_VIF])
 		param.no_vif = true;
 
-	if (info->attrs[HWSIM_ATTR_RADIO_NAME])
-		param.hwname = nla_data(info->attrs[HWSIM_ATTR_RADIO_NAME]);
+	if (info->attrs[HWSIM_ATTR_RADIO_NAME]) {
+		hwname = kasprintf(GFP_KERNEL, "%.*s",
+				   nla_len(info->attrs[HWSIM_ATTR_RADIO_NAME]),
+				   (char *)nla_data(info->attrs[HWSIM_ATTR_RADIO_NAME]));
+		if (!hwname)
+			return -ENOMEM;
+		param.hwname = hwname;
+	}
 
 	if (info->attrs[HWSIM_ATTR_USE_CHANCTX])
 		param.use_chanctx = true;
@@ -3098,11 +3105,15 @@ static int hwsim_del_radio_nl(struct sk_buff *msg, struct genl_info *info)
 	s64 idx = -1;
 	const char *hwname = NULL;
 
-	if (info->attrs[HWSIM_ATTR_RADIO_ID])
+	if (info->attrs[HWSIM_ATTR_RADIO_ID]) {
 		idx = nla_get_u32(info->attrs[HWSIM_ATTR_RADIO_ID]);
-	else if (info->attrs[HWSIM_ATTR_RADIO_NAME])
-		hwname = (void *)nla_data(info->attrs[HWSIM_ATTR_RADIO_NAME]);
-	else
+	} else if (info->attrs[HWSIM_ATTR_RADIO_NAME]) {
+		hwname = kasprintf(GFP_KERNEL, "%.*s",
+				   nla_len(info->attrs[HWSIM_ATTR_RADIO_NAME]),
+				   (char *)nla_data(info->attrs[HWSIM_ATTR_RADIO_NAME]));
+		if (!hwname)
+			return -ENOMEM;
+	} else
 		return -EINVAL;
 
 	spin_lock_bh(&hwsim_radio_lock);
@@ -3111,7 +3122,8 @@ static int hwsim_del_radio_nl(struct sk_buff *msg, struct genl_info *info)
 			if (data->idx != idx)
 				continue;
 		} else {
-			if (strcmp(hwname, wiphy_name(data->hw->wiphy)))
+			if (!hwname ||
+			    strcmp(hwname, wiphy_name(data->hw->wiphy)))
 				continue;
 		}
 
@@ -3122,10 +3134,12 @@ static int hwsim_del_radio_nl(struct sk_buff *msg, struct genl_info *info)
 		spin_unlock_bh(&hwsim_radio_lock);
 		mac80211_hwsim_del_radio(data, wiphy_name(data->hw->wiphy),
 					 info);
+		kfree(hwname);
 		return 0;
 	}
 	spin_unlock_bh(&hwsim_radio_lock);
 
+	kfree(hwname);
 	return -ENODEV;
 }
 

From e3b56cdd4351f0e227d4d847eeadff4c82aef1b9 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Tue, 7 Feb 2017 15:49:50 +0800
Subject: [PATCH 0268/1911] vhost: try avoiding avail index access when getting
 descriptor

If last avail idx is not equal to cached avail idx, we're sure there's
still available buffers in the virtqueue so there's no need to re-read
avail idx. So let's skip this to avoid unnecessary userspace memory
access and memory barrier. Pktgen test show about 3% improvement on rx
pps.

Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/vhost/vhost.c | 39 +++++++++++++++++++++++----------------
 1 file changed, 23 insertions(+), 16 deletions(-)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 8f99fe08de02e7..1f7e4e4e6f8efe 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -1930,25 +1930,32 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
 
 	/* Check it isn't doing very strange things with descriptor numbers. */
 	last_avail_idx = vq->last_avail_idx;
-	if (unlikely(vhost_get_user(vq, avail_idx, &vq->avail->idx))) {
-		vq_err(vq, "Failed to access avail idx at %p\n",
-		       &vq->avail->idx);
-		return -EFAULT;
-	}
-	vq->avail_idx = vhost16_to_cpu(vq, avail_idx);
 
-	if (unlikely((u16)(vq->avail_idx - last_avail_idx) > vq->num)) {
-		vq_err(vq, "Guest moved used index from %u to %u",
-		       last_avail_idx, vq->avail_idx);
-		return -EFAULT;
-	}
+	if (vq->avail_idx == vq->last_avail_idx) {
+		if (unlikely(vhost_get_user(vq, avail_idx, &vq->avail->idx))) {
+			vq_err(vq, "Failed to access avail idx at %p\n",
+				&vq->avail->idx);
+			return -EFAULT;
+		}
+		vq->avail_idx = vhost16_to_cpu(vq, avail_idx);
 
-	/* If there's nothing new since last we looked, return invalid. */
-	if (vq->avail_idx == last_avail_idx)
-		return vq->num;
+		if (unlikely((u16)(vq->avail_idx - last_avail_idx) > vq->num)) {
+			vq_err(vq, "Guest moved used index from %u to %u",
+				last_avail_idx, vq->avail_idx);
+			return -EFAULT;
+		}
+
+		/* If there's nothing new since last we looked, return
+		 * invalid.
+		 */
+		if (vq->avail_idx == last_avail_idx)
+			return vq->num;
 
-	/* Only get avail ring entries after they have been exposed by guest. */
-	smp_rmb();
+		/* Only get avail ring entries after they have been
+		 * exposed by guest.
+		 */
+		smp_rmb();
+	}
 
 	/* Grab the next descriptor number they're advertising, and increment
 	 * the index we've seen. */

From 5c34d002dcc7a6dd665a19d098b4f4cd5501ba1a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 5 Feb 2017 18:15:18 +0100
Subject: [PATCH 0269/1911] virtio_pci: remove struct virtio_pci_vq_info

We don't really need struct virtio_pci_vq_info, as most field in there
are redundant:

 - the vq backpointer is not strictly neede to start with
 - the entry in the vqs list is not needed - the generic virtqueue already
   has list, we only need to check if it has a callback to get the same
   semantics
 - we can use a simple array to look up the MSI-X vec if needed.
 - That simple array now also duoble serves to replace the per_vq_vectors
   flag

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/virtio/virtio_pci_common.c | 117 +++++++++--------------------
 drivers/virtio/virtio_pci_common.h |  25 +-----
 drivers/virtio/virtio_pci_legacy.c |   6 +-
 drivers/virtio/virtio_pci_modern.c |   6 +-
 4 files changed, 39 insertions(+), 115 deletions(-)

diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c
index 186cbab327b8f6..a33767318cbf6b 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -62,16 +62,13 @@ static irqreturn_t vp_config_changed(int irq, void *opaque)
 static irqreturn_t vp_vring_interrupt(int irq, void *opaque)
 {
 	struct virtio_pci_device *vp_dev = opaque;
-	struct virtio_pci_vq_info *info;
 	irqreturn_t ret = IRQ_NONE;
-	unsigned long flags;
+	struct virtqueue *vq;
 
-	spin_lock_irqsave(&vp_dev->lock, flags);
-	list_for_each_entry(info, &vp_dev->virtqueues, node) {
-		if (vring_interrupt(irq, info->vq) == IRQ_HANDLED)
+	list_for_each_entry(vq, &vp_dev->vdev.vqs, list) {
+		if (vq->callback && vring_interrupt(irq, vq) == IRQ_HANDLED)
 			ret = IRQ_HANDLED;
 	}
-	spin_unlock_irqrestore(&vp_dev->lock, flags);
 
 	return ret;
 }
@@ -167,55 +164,6 @@ static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors,
 	return err;
 }
 
-static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned index,
-				     void (*callback)(struct virtqueue *vq),
-				     const char *name,
-				     u16 msix_vec)
-{
-	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-	struct virtio_pci_vq_info *info = kmalloc(sizeof *info, GFP_KERNEL);
-	struct virtqueue *vq;
-	unsigned long flags;
-
-	/* fill out our structure that represents an active queue */
-	if (!info)
-		return ERR_PTR(-ENOMEM);
-
-	vq = vp_dev->setup_vq(vp_dev, info, index, callback, name, msix_vec);
-	if (IS_ERR(vq))
-		goto out_info;
-
-	info->vq = vq;
-	if (callback) {
-		spin_lock_irqsave(&vp_dev->lock, flags);
-		list_add(&info->node, &vp_dev->virtqueues);
-		spin_unlock_irqrestore(&vp_dev->lock, flags);
-	} else {
-		INIT_LIST_HEAD(&info->node);
-	}
-
-	vp_dev->vqs[index] = info;
-	return vq;
-
-out_info:
-	kfree(info);
-	return vq;
-}
-
-static void vp_del_vq(struct virtqueue *vq)
-{
-	struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
-	struct virtio_pci_vq_info *info = vp_dev->vqs[vq->index];
-	unsigned long flags;
-
-	spin_lock_irqsave(&vp_dev->lock, flags);
-	list_del(&info->node);
-	spin_unlock_irqrestore(&vp_dev->lock, flags);
-
-	vp_dev->del_vq(info);
-	kfree(info);
-}
-
 /* the config->del_vqs() implementation */
 void vp_del_vqs(struct virtio_device *vdev)
 {
@@ -224,16 +172,15 @@ void vp_del_vqs(struct virtio_device *vdev)
 	int i;
 
 	list_for_each_entry_safe(vq, n, &vdev->vqs, list) {
-		if (vp_dev->per_vq_vectors) {
-			int v = vp_dev->vqs[vq->index]->msix_vector;
+		if (vp_dev->msix_vector_map) {
+			int v = vp_dev->msix_vector_map[vq->index];
 
 			if (v != VIRTIO_MSI_NO_VECTOR)
 				free_irq(pci_irq_vector(vp_dev->pci_dev, v),
 					vq);
 		}
-		vp_del_vq(vq);
+		vp_dev->del_vq(vq);
 	}
-	vp_dev->per_vq_vectors = false;
 
 	if (vp_dev->intx_enabled) {
 		free_irq(vp_dev->pci_dev->irq, vp_dev);
@@ -261,8 +208,8 @@ void vp_del_vqs(struct virtio_device *vdev)
 	vp_dev->msix_names = NULL;
 	kfree(vp_dev->msix_affinity_masks);
 	vp_dev->msix_affinity_masks = NULL;
-	kfree(vp_dev->vqs);
-	vp_dev->vqs = NULL;
+	kfree(vp_dev->msix_vector_map);
+	vp_dev->msix_vector_map = NULL;
 }
 
 static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
@@ -275,10 +222,6 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
 	u16 msix_vec;
 	int i, err, nvectors, allocated_vectors;
 
-	vp_dev->vqs = kcalloc(nvqs, sizeof(*vp_dev->vqs), GFP_KERNEL);
-	if (!vp_dev->vqs)
-		return -ENOMEM;
-
 	if (per_vq_vectors) {
 		/* Best option: one for change interrupt, one per vq. */
 		nvectors = 1;
@@ -294,7 +237,13 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
 	if (err)
 		goto error_find;
 
-	vp_dev->per_vq_vectors = per_vq_vectors;
+	if (per_vq_vectors) {
+		vp_dev->msix_vector_map = kmalloc_array(nvqs,
+				sizeof(*vp_dev->msix_vector_map), GFP_KERNEL);
+		if (!vp_dev->msix_vector_map)
+			goto error_find;
+	}
+
 	allocated_vectors = vp_dev->msix_used_vectors;
 	for (i = 0; i < nvqs; ++i) {
 		if (!names[i]) {
@@ -304,19 +253,25 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
 
 		if (!callbacks[i])
 			msix_vec = VIRTIO_MSI_NO_VECTOR;
-		else if (vp_dev->per_vq_vectors)
+		else if (per_vq_vectors)
 			msix_vec = allocated_vectors++;
 		else
 			msix_vec = VP_MSIX_VQ_VECTOR;
-		vqs[i] = vp_setup_vq(vdev, i, callbacks[i], names[i], msix_vec);
+		vqs[i] = vp_dev->setup_vq(vp_dev, i, callbacks[i], names[i],
+				msix_vec);
 		if (IS_ERR(vqs[i])) {
 			err = PTR_ERR(vqs[i]);
 			goto error_find;
 		}
 
-		if (!vp_dev->per_vq_vectors || msix_vec == VIRTIO_MSI_NO_VECTOR)
+		if (!per_vq_vectors)
 			continue;
 
+		if (msix_vec == VIRTIO_MSI_NO_VECTOR) {
+			vp_dev->msix_vector_map[i] = VIRTIO_MSI_NO_VECTOR;
+			continue;
+		}
+
 		/* allocate per-vq irq if available and necessary */
 		snprintf(vp_dev->msix_names[msix_vec],
 			 sizeof *vp_dev->msix_names,
@@ -326,8 +281,12 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
 				  vring_interrupt, 0,
 				  vp_dev->msix_names[msix_vec],
 				  vqs[i]);
-		if (err)
+		if (err) {
+			/* don't free this irq on error */
+			vp_dev->msix_vector_map[i] = VIRTIO_MSI_NO_VECTOR;
 			goto error_find;
+		}
+		vp_dev->msix_vector_map[i] = msix_vec;
 	}
 	return 0;
 
@@ -343,23 +302,18 @@ static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned nvqs,
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 	int i, err;
 
-	vp_dev->vqs = kcalloc(nvqs, sizeof(*vp_dev->vqs), GFP_KERNEL);
-	if (!vp_dev->vqs)
-		return -ENOMEM;
-
 	err = request_irq(vp_dev->pci_dev->irq, vp_interrupt, IRQF_SHARED,
 			dev_name(&vdev->dev), vp_dev);
 	if (err)
 		goto out_del_vqs;
 
 	vp_dev->intx_enabled = 1;
-	vp_dev->per_vq_vectors = false;
 	for (i = 0; i < nvqs; ++i) {
 		if (!names[i]) {
 			vqs[i] = NULL;
 			continue;
 		}
-		vqs[i] = vp_setup_vq(vdev, i, callbacks[i], names[i],
+		vqs[i] = vp_dev->setup_vq(vp_dev, i, callbacks[i], names[i],
 				VIRTIO_MSI_NO_VECTOR);
 		if (IS_ERR(vqs[i])) {
 			err = PTR_ERR(vqs[i]);
@@ -409,16 +363,15 @@ int vp_set_vq_affinity(struct virtqueue *vq, int cpu)
 {
 	struct virtio_device *vdev = vq->vdev;
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-	struct virtio_pci_vq_info *info = vp_dev->vqs[vq->index];
-	struct cpumask *mask;
-	unsigned int irq;
 
 	if (!vq->callback)
 		return -EINVAL;
 
 	if (vp_dev->msix_enabled) {
-		mask = vp_dev->msix_affinity_masks[info->msix_vector];
-		irq = pci_irq_vector(vp_dev->pci_dev, info->msix_vector);
+		int vec = vp_dev->msix_vector_map[vq->index];
+		struct cpumask *mask = vp_dev->msix_affinity_masks[vec];
+		unsigned int irq = pci_irq_vector(vp_dev->pci_dev, vec);
+
 		if (cpu == -1)
 			irq_set_affinity_hint(irq, NULL);
 		else {
@@ -498,8 +451,6 @@ static int virtio_pci_probe(struct pci_dev *pci_dev,
 	vp_dev->vdev.dev.parent = &pci_dev->dev;
 	vp_dev->vdev.dev.release = virtio_pci_release_dev;
 	vp_dev->pci_dev = pci_dev;
-	INIT_LIST_HEAD(&vp_dev->virtqueues);
-	spin_lock_init(&vp_dev->lock);
 
 	/* enable the device */
 	rc = pci_enable_device(pci_dev);
diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h
index b2f666250ae0bf..2038887bdf237c 100644
--- a/drivers/virtio/virtio_pci_common.h
+++ b/drivers/virtio/virtio_pci_common.h
@@ -31,17 +31,6 @@
 #include <linux/highmem.h>
 #include <linux/spinlock.h>
 
-struct virtio_pci_vq_info {
-	/* the actual virtqueue */
-	struct virtqueue *vq;
-
-	/* the list node for the virtqueues list */
-	struct list_head node;
-
-	/* MSI-X vector (or none) */
-	unsigned msix_vector;
-};
-
 /* Our device structure */
 struct virtio_pci_device {
 	struct virtio_device vdev;
@@ -75,13 +64,6 @@ struct virtio_pci_device {
 	/* the IO mapping for the PCI config space */
 	void __iomem *ioaddr;
 
-	/* a list of queues so we can dispatch IRQs */
-	spinlock_t lock;
-	struct list_head virtqueues;
-
-	/* array of all queues for house-keeping */
-	struct virtio_pci_vq_info **vqs;
-
 	/* MSI-X support */
 	int msix_enabled;
 	int intx_enabled;
@@ -94,16 +76,15 @@ struct virtio_pci_device {
 	/* Vectors allocated, excluding per-vq vectors if any */
 	unsigned msix_used_vectors;
 
-	/* Whether we have vector per vq */
-	bool per_vq_vectors;
+	/* Map of per-VQ MSI-X vectors, may be NULL */
+	unsigned *msix_vector_map;
 
 	struct virtqueue *(*setup_vq)(struct virtio_pci_device *vp_dev,
-				      struct virtio_pci_vq_info *info,
 				      unsigned idx,
 				      void (*callback)(struct virtqueue *vq),
 				      const char *name,
 				      u16 msix_vec);
-	void (*del_vq)(struct virtio_pci_vq_info *info);
+	void (*del_vq)(struct virtqueue *vq);
 
 	u16 (*config_vector)(struct virtio_pci_device *vp_dev, u16 vector);
 };
diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c
index 6d9e5173d5fa6b..47292dad0ff980 100644
--- a/drivers/virtio/virtio_pci_legacy.c
+++ b/drivers/virtio/virtio_pci_legacy.c
@@ -112,7 +112,6 @@ static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector)
 }
 
 static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
-				  struct virtio_pci_vq_info *info,
 				  unsigned index,
 				  void (*callback)(struct virtqueue *vq),
 				  const char *name,
@@ -130,8 +129,6 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
 	if (!num || ioread32(vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN))
 		return ERR_PTR(-ENOENT);
 
-	info->msix_vector = msix_vec;
-
 	/* create the vring */
 	vq = vring_create_virtqueue(index, num,
 				    VIRTIO_PCI_VRING_ALIGN, &vp_dev->vdev,
@@ -162,9 +159,8 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
 	return ERR_PTR(err);
 }
 
-static void del_vq(struct virtio_pci_vq_info *info)
+static void del_vq(struct virtqueue *vq)
 {
-	struct virtqueue *vq = info->vq;
 	struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
 
 	iowrite16(vq->index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c
index 4bf7ab37589417..00e6fc1df407a7 100644
--- a/drivers/virtio/virtio_pci_modern.c
+++ b/drivers/virtio/virtio_pci_modern.c
@@ -293,7 +293,6 @@ static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector)
 }
 
 static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
-				  struct virtio_pci_vq_info *info,
 				  unsigned index,
 				  void (*callback)(struct virtqueue *vq),
 				  const char *name,
@@ -323,8 +322,6 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
 	/* get offset of notification word for this vq */
 	off = vp_ioread16(&cfg->queue_notify_off);
 
-	info->msix_vector = msix_vec;
-
 	/* create the vring */
 	vq = vring_create_virtqueue(index, num,
 				    SMP_CACHE_BYTES, &vp_dev->vdev,
@@ -409,9 +406,8 @@ static int vp_modern_find_vqs(struct virtio_device *vdev, unsigned nvqs,
 	return 0;
 }
 
-static void del_vq(struct virtio_pci_vq_info *info)
+static void del_vq(struct virtqueue *vq)
 {
-	struct virtqueue *vq = info->vq;
 	struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
 
 	vp_iowrite16(vq->index, &vp_dev->common->queue_select);

From 07ec51480b5eb1233f8c1b0f5d7a7c8d1247c507 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 5 Feb 2017 18:15:19 +0100
Subject: [PATCH 0270/1911] virtio_pci: use shared interrupts for virtqueues

This lets IRQ layer handle dispatching IRQs to separate handlers for the
case where we don't have per-VQ MSI-X vectors, and allows us to greatly
simplify the code based on the assumption that we always have interrupt
vector 0 (legacy INTx or config interrupt for MSI-X) available, and
any other interrupt is request/freed throught the VQ, even if the
actual interrupt line might be shared in some cases.

This allows removing a great deal of variables keeping track of the
interrupt state in struct virtio_pci_device, as we can now simply walk the
list of VQs and deal with per-VQ interrupt handlers there, and only treat
vector 0 special.

Additionally clean up the VQ allocation code to properly unwind on error
instead of having a single global cleanup label, which is error prone,
and in this case also leads to more code.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/virtio/virtio_pci_common.c | 235 +++++++++++++----------------
 drivers/virtio/virtio_pci_common.h |  16 +-
 2 files changed, 106 insertions(+), 145 deletions(-)

diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c
index a33767318cbf6b..274dc1ff09c03a 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -33,10 +33,8 @@ void vp_synchronize_vectors(struct virtio_device *vdev)
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 	int i;
 
-	if (vp_dev->intx_enabled)
-		synchronize_irq(vp_dev->pci_dev->irq);
-
-	for (i = 0; i < vp_dev->msix_vectors; ++i)
+	synchronize_irq(pci_irq_vector(vp_dev->pci_dev, 0));
+	for (i = 1; i < vp_dev->msix_vectors; i++)
 		synchronize_irq(pci_irq_vector(vp_dev->pci_dev, i));
 }
 
@@ -99,77 +97,10 @@ static irqreturn_t vp_interrupt(int irq, void *opaque)
 	return vp_vring_interrupt(irq, opaque);
 }
 
-static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors,
-				   bool per_vq_vectors)
-{
-	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-	const char *name = dev_name(&vp_dev->vdev.dev);
-	unsigned i, v;
-	int err = -ENOMEM;
-
-	vp_dev->msix_vectors = nvectors;
-
-	vp_dev->msix_names = kmalloc(nvectors * sizeof *vp_dev->msix_names,
-				     GFP_KERNEL);
-	if (!vp_dev->msix_names)
-		goto error;
-	vp_dev->msix_affinity_masks
-		= kzalloc(nvectors * sizeof *vp_dev->msix_affinity_masks,
-			  GFP_KERNEL);
-	if (!vp_dev->msix_affinity_masks)
-		goto error;
-	for (i = 0; i < nvectors; ++i)
-		if (!alloc_cpumask_var(&vp_dev->msix_affinity_masks[i],
-					GFP_KERNEL))
-			goto error;
-
-	err = pci_alloc_irq_vectors(vp_dev->pci_dev, nvectors, nvectors,
-			PCI_IRQ_MSIX);
-	if (err < 0)
-		goto error;
-	vp_dev->msix_enabled = 1;
-
-	/* Set the vector used for configuration */
-	v = vp_dev->msix_used_vectors;
-	snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
-		 "%s-config", name);
-	err = request_irq(pci_irq_vector(vp_dev->pci_dev, v),
-			  vp_config_changed, 0, vp_dev->msix_names[v],
-			  vp_dev);
-	if (err)
-		goto error;
-	++vp_dev->msix_used_vectors;
-
-	v = vp_dev->config_vector(vp_dev, v);
-	/* Verify we had enough resources to assign the vector */
-	if (v == VIRTIO_MSI_NO_VECTOR) {
-		err = -EBUSY;
-		goto error;
-	}
-
-	if (!per_vq_vectors) {
-		/* Shared vector for all VQs */
-		v = vp_dev->msix_used_vectors;
-		snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
-			 "%s-virtqueues", name);
-		err = request_irq(pci_irq_vector(vp_dev->pci_dev, v),
-				  vp_vring_interrupt, 0, vp_dev->msix_names[v],
-				  vp_dev);
-		if (err)
-			goto error;
-		++vp_dev->msix_used_vectors;
-	}
-	return 0;
-error:
-	return err;
-}
-
-/* the config->del_vqs() implementation */
-void vp_del_vqs(struct virtio_device *vdev)
+static void vp_remove_vqs(struct virtio_device *vdev)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 	struct virtqueue *vq, *n;
-	int i;
 
 	list_for_each_entry_safe(vq, n, &vdev->vqs, list) {
 		if (vp_dev->msix_vector_map) {
@@ -181,35 +112,33 @@ void vp_del_vqs(struct virtio_device *vdev)
 		}
 		vp_dev->del_vq(vq);
 	}
+}
 
-	if (vp_dev->intx_enabled) {
-		free_irq(vp_dev->pci_dev->irq, vp_dev);
-		vp_dev->intx_enabled = 0;
-	}
+/* the config->del_vqs() implementation */
+void vp_del_vqs(struct virtio_device *vdev)
+{
+	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+	int i;
 
-	for (i = 0; i < vp_dev->msix_used_vectors; ++i)
-		free_irq(pci_irq_vector(vp_dev->pci_dev, i), vp_dev);
+	if (WARN_ON_ONCE(list_empty_careful(&vdev->vqs)))
+		return;
 
-	for (i = 0; i < vp_dev->msix_vectors; i++)
-		if (vp_dev->msix_affinity_masks[i])
-			free_cpumask_var(vp_dev->msix_affinity_masks[i]);
+	vp_remove_vqs(vdev);
 
 	if (vp_dev->msix_enabled) {
+		for (i = 0; i < vp_dev->msix_vectors; i++)
+			free_cpumask_var(vp_dev->msix_affinity_masks[i]);
+
 		/* Disable the vector used for configuration */
 		vp_dev->config_vector(vp_dev, VIRTIO_MSI_NO_VECTOR);
 
-		pci_free_irq_vectors(vp_dev->pci_dev);
-		vp_dev->msix_enabled = 0;
+		kfree(vp_dev->msix_affinity_masks);
+		kfree(vp_dev->msix_names);
+		kfree(vp_dev->msix_vector_map);
 	}
 
-	vp_dev->msix_vectors = 0;
-	vp_dev->msix_used_vectors = 0;
-	kfree(vp_dev->msix_names);
-	vp_dev->msix_names = NULL;
-	kfree(vp_dev->msix_affinity_masks);
-	vp_dev->msix_affinity_masks = NULL;
-	kfree(vp_dev->msix_vector_map);
-	vp_dev->msix_vector_map = NULL;
+	free_irq(pci_irq_vector(vp_dev->pci_dev, 0), vp_dev);
+	pci_free_irq_vectors(vp_dev->pci_dev);
 }
 
 static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
@@ -219,79 +148,122 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
 			      bool per_vq_vectors)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+	const char *name = dev_name(&vp_dev->vdev.dev);
+	int i, err = -ENOMEM, allocated_vectors, nvectors;
 	u16 msix_vec;
-	int i, err, nvectors, allocated_vectors;
+
+	nvectors = 1;
+	for (i = 0; i < nvqs; i++)
+		if (callbacks[i])
+			nvectors++;
 
 	if (per_vq_vectors) {
-		/* Best option: one for change interrupt, one per vq. */
-		nvectors = 1;
-		for (i = 0; i < nvqs; ++i)
-			if (callbacks[i])
-				++nvectors;
+		err = pci_alloc_irq_vectors(vp_dev->pci_dev, nvectors, nvectors,
+				PCI_IRQ_MSIX);
 	} else {
-		/* Second best: one for change, shared for all vqs. */
-		nvectors = 2;
+		err = pci_alloc_irq_vectors(vp_dev->pci_dev, 2, 2,
+				PCI_IRQ_MSIX);
 	}
+	if (err < 0)
+		return err;
 
-	err = vp_request_msix_vectors(vdev, nvectors, per_vq_vectors);
+	vp_dev->msix_vectors = nvectors;
+	vp_dev->msix_names = kmalloc_array(nvectors,
+			sizeof(*vp_dev->msix_names), GFP_KERNEL);
+	if (!vp_dev->msix_names)
+		goto out_free_irq_vectors;
+
+	vp_dev->msix_affinity_masks = kcalloc(nvectors,
+			sizeof(*vp_dev->msix_affinity_masks), GFP_KERNEL);
+	if (!vp_dev->msix_affinity_masks)
+		goto out_free_msix_names;
+
+	for (i = 0; i < nvectors; ++i) {
+		if (!alloc_cpumask_var(&vp_dev->msix_affinity_masks[i],
+				GFP_KERNEL))
+			goto out_free_msix_affinity_masks;
+	}
+
+	/* Set the vector used for configuration */
+	snprintf(vp_dev->msix_names[0], sizeof(*vp_dev->msix_names),
+		 "%s-config", name);
+	err = request_irq(pci_irq_vector(vp_dev->pci_dev, 0), vp_config_changed,
+			0, vp_dev->msix_names[0], vp_dev);
 	if (err)
-		goto error_find;
+		goto out_free_irq_vectors;
 
-	if (per_vq_vectors) {
-		vp_dev->msix_vector_map = kmalloc_array(nvqs,
-				sizeof(*vp_dev->msix_vector_map), GFP_KERNEL);
-		if (!vp_dev->msix_vector_map)
-			goto error_find;
+	/* Verify we had enough resources to assign the vector */
+	if (vp_dev->config_vector(vp_dev, 0) == VIRTIO_MSI_NO_VECTOR) {
+		err = -EBUSY;
+		goto out_free_config_irq;
 	}
 
-	allocated_vectors = vp_dev->msix_used_vectors;
+	vp_dev->msix_vector_map = kmalloc_array(nvqs,
+			sizeof(*vp_dev->msix_vector_map), GFP_KERNEL);
+	if (!vp_dev->msix_vector_map)
+		goto out_disable_config_irq;
+
+	allocated_vectors = 1; /* vector 0 is the config interrupt */
 	for (i = 0; i < nvqs; ++i) {
 		if (!names[i]) {
 			vqs[i] = NULL;
 			continue;
 		}
 
-		if (!callbacks[i])
-			msix_vec = VIRTIO_MSI_NO_VECTOR;
-		else if (per_vq_vectors)
-			msix_vec = allocated_vectors++;
+		if (callbacks[i])
+			msix_vec = allocated_vectors;
 		else
-			msix_vec = VP_MSIX_VQ_VECTOR;
+			msix_vec = VIRTIO_MSI_NO_VECTOR;
+
 		vqs[i] = vp_dev->setup_vq(vp_dev, i, callbacks[i], names[i],
 				msix_vec);
 		if (IS_ERR(vqs[i])) {
 			err = PTR_ERR(vqs[i]);
-			goto error_find;
+			goto out_remove_vqs;
 		}
 
-		if (!per_vq_vectors)
-			continue;
-
 		if (msix_vec == VIRTIO_MSI_NO_VECTOR) {
 			vp_dev->msix_vector_map[i] = VIRTIO_MSI_NO_VECTOR;
 			continue;
 		}
 
-		/* allocate per-vq irq if available and necessary */
-		snprintf(vp_dev->msix_names[msix_vec],
-			 sizeof *vp_dev->msix_names,
-			 "%s-%s",
+		snprintf(vp_dev->msix_names[i + 1],
+			 sizeof(*vp_dev->msix_names), "%s-%s",
 			 dev_name(&vp_dev->vdev.dev), names[i]);
 		err = request_irq(pci_irq_vector(vp_dev->pci_dev, msix_vec),
-				  vring_interrupt, 0,
-				  vp_dev->msix_names[msix_vec],
-				  vqs[i]);
+				  vring_interrupt, IRQF_SHARED,
+				  vp_dev->msix_names[i + 1], vqs[i]);
 		if (err) {
 			/* don't free this irq on error */
 			vp_dev->msix_vector_map[i] = VIRTIO_MSI_NO_VECTOR;
-			goto error_find;
+			goto out_remove_vqs;
 		}
 		vp_dev->msix_vector_map[i] = msix_vec;
+
+		if (per_vq_vectors)
+			allocated_vectors++;
 	}
+
+	vp_dev->msix_enabled = 1;
 	return 0;
 
-error_find:
-	vp_del_vqs(vdev);
+out_remove_vqs:
+	vp_remove_vqs(vdev);
+	kfree(vp_dev->msix_vector_map);
+out_disable_config_irq:
+	vp_dev->config_vector(vp_dev, VIRTIO_MSI_NO_VECTOR);
+out_free_config_irq:
+	free_irq(pci_irq_vector(vp_dev->pci_dev, 0), vp_dev);
+out_free_msix_affinity_masks:
+	for (i = 0; i < nvectors; i++) {
+		if (vp_dev->msix_affinity_masks[i])
+			free_cpumask_var(vp_dev->msix_affinity_masks[i]);
+	}
+	kfree(vp_dev->msix_affinity_masks);
+out_free_msix_names:
+	kfree(vp_dev->msix_names);
+out_free_irq_vectors:
+	pci_free_irq_vectors(vp_dev->pci_dev);
 	return err;
 }
 
@@ -305,9 +277,8 @@ static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned nvqs,
 	err = request_irq(vp_dev->pci_dev->irq, vp_interrupt, IRQF_SHARED,
 			dev_name(&vdev->dev), vp_dev);
 	if (err)
-		goto out_del_vqs;
+		return err;
 
-	vp_dev->intx_enabled = 1;
 	for (i = 0; i < nvqs; ++i) {
 		if (!names[i]) {
 			vqs[i] = NULL;
@@ -317,13 +288,15 @@ static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned nvqs,
 				VIRTIO_MSI_NO_VECTOR);
 		if (IS_ERR(vqs[i])) {
 			err = PTR_ERR(vqs[i]);
-			goto out_del_vqs;
+			goto out_remove_vqs;
 		}
 	}
 
 	return 0;
-out_del_vqs:
-	vp_del_vqs(vdev);
+
+out_remove_vqs:
+	vp_remove_vqs(vdev);
+	free_irq(pci_irq_vector(vp_dev->pci_dev, 0), vp_dev);
 	return err;
 }
 
diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h
index 2038887bdf237c..85593867e712f7 100644
--- a/drivers/virtio/virtio_pci_common.h
+++ b/drivers/virtio/virtio_pci_common.h
@@ -66,16 +66,12 @@ struct virtio_pci_device {
 
 	/* MSI-X support */
 	int msix_enabled;
-	int intx_enabled;
 	cpumask_var_t *msix_affinity_masks;
 	/* Name strings for interrupts. This size should be enough,
 	 * and I'm too lazy to allocate each name separately. */
 	char (*msix_names)[256];
-	/* Number of available vectors */
-	unsigned msix_vectors;
-	/* Vectors allocated, excluding per-vq vectors if any */
-	unsigned msix_used_vectors;
-
+	/* Total Number of MSI-X vectors (including per-VQ ones). */
+	int msix_vectors;
 	/* Map of per-VQ MSI-X vectors, may be NULL */
 	unsigned *msix_vector_map;
 
@@ -89,14 +85,6 @@ struct virtio_pci_device {
 	u16 (*config_vector)(struct virtio_pci_device *vp_dev, u16 vector);
 };
 
-/* Constants for MSI-X */
-/* Use first vector for configuration changes, second and the rest for
- * virtqueues Thus, we need at least 2 vectors for MSI. */
-enum {
-	VP_MSIX_CONFIG_VECTOR = 0,
-	VP_MSIX_VQ_VECTOR = 1,
-};
-
 /* Convert a generic virtio device to our structure */
 static struct virtio_pci_device *to_vp_device(struct virtio_device *vdev)
 {

From 53a020c661741f3b87ad3ac6fa545088aaebac9b Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 5 Feb 2017 18:15:20 +0100
Subject: [PATCH 0271/1911] virtio_pci: don't duplicate the msix_enable flag in
 struct pci_dev

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/virtio/virtio_pci_common.c | 5 ++---
 drivers/virtio/virtio_pci_common.h | 2 --
 drivers/virtio/virtio_pci_legacy.c | 2 +-
 drivers/virtio/virtio_pci_modern.c | 2 +-
 include/uapi/linux/virtio_pci.h    | 2 +-
 5 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c
index 274dc1ff09c03a..b83053082875bf 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -125,7 +125,7 @@ void vp_del_vqs(struct virtio_device *vdev)
 
 	vp_remove_vqs(vdev);
 
-	if (vp_dev->msix_enabled) {
+	if (vp_dev->pci_dev->msix_enabled) {
 		for (i = 0; i < vp_dev->msix_vectors; i++)
 			free_cpumask_var(vp_dev->msix_affinity_masks[i]);
 
@@ -244,7 +244,6 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
 			allocated_vectors++;
 	}
 
-	vp_dev->msix_enabled = 1;
 	return 0;
 
 out_remove_vqs:
@@ -340,7 +339,7 @@ int vp_set_vq_affinity(struct virtqueue *vq, int cpu)
 	if (!vq->callback)
 		return -EINVAL;
 
-	if (vp_dev->msix_enabled) {
+	if (vp_dev->pci_dev->msix_enabled) {
 		int vec = vp_dev->msix_vector_map[vq->index];
 		struct cpumask *mask = vp_dev->msix_affinity_masks[vec];
 		unsigned int irq = pci_irq_vector(vp_dev->pci_dev, vec);
diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h
index 85593867e712f7..217ca876eed729 100644
--- a/drivers/virtio/virtio_pci_common.h
+++ b/drivers/virtio/virtio_pci_common.h
@@ -64,8 +64,6 @@ struct virtio_pci_device {
 	/* the IO mapping for the PCI config space */
 	void __iomem *ioaddr;
 
-	/* MSI-X support */
-	int msix_enabled;
 	cpumask_var_t *msix_affinity_masks;
 	/* Name strings for interrupts. This size should be enough,
 	 * and I'm too lazy to allocate each name separately. */
diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c
index 47292dad0ff980..2ab6aee51bf6fd 100644
--- a/drivers/virtio/virtio_pci_legacy.c
+++ b/drivers/virtio/virtio_pci_legacy.c
@@ -165,7 +165,7 @@ static void del_vq(struct virtqueue *vq)
 
 	iowrite16(vq->index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
 
-	if (vp_dev->msix_enabled) {
+	if (vp_dev->pci_dev->msix_enabled) {
 		iowrite16(VIRTIO_MSI_NO_VECTOR,
 			  vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
 		/* Flush the write out to device */
diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c
index 00e6fc1df407a7..e5ce310919534e 100644
--- a/drivers/virtio/virtio_pci_modern.c
+++ b/drivers/virtio/virtio_pci_modern.c
@@ -412,7 +412,7 @@ static void del_vq(struct virtqueue *vq)
 
 	vp_iowrite16(vq->index, &vp_dev->common->queue_select);
 
-	if (vp_dev->msix_enabled) {
+	if (vp_dev->pci_dev->msix_enabled) {
 		vp_iowrite16(VIRTIO_MSI_NO_VECTOR,
 			     &vp_dev->common->queue_msix_vector);
 		/* Flush the write out to device */
diff --git a/include/uapi/linux/virtio_pci.h b/include/uapi/linux/virtio_pci.h
index 90007a1abcab14..15b4385a2be169 100644
--- a/include/uapi/linux/virtio_pci.h
+++ b/include/uapi/linux/virtio_pci.h
@@ -79,7 +79,7 @@
  * configuration space */
 #define VIRTIO_PCI_CONFIG_OFF(msix_enabled)	((msix_enabled) ? 24 : 20)
 /* Deprecated: please use VIRTIO_PCI_CONFIG_OFF instead */
-#define VIRTIO_PCI_CONFIG(dev)	VIRTIO_PCI_CONFIG_OFF((dev)->msix_enabled)
+#define VIRTIO_PCI_CONFIG(dev)	VIRTIO_PCI_CONFIG_OFF((dev)->pci_dev->msix_enabled)
 
 /* Virtio ABI version, this must match exactly */
 #define VIRTIO_PCI_ABI_VERSION		0

From 52a61516125fa9a21b3bdf4f90928308e2e5573f Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 5 Feb 2017 18:15:21 +0100
Subject: [PATCH 0272/1911] virtio_pci: simplify MSI-X setup

Try to grab the MSI-X vectors early and fall back to the shared one
before doing lots of allocations.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/virtio/virtio_pci_common.c | 35 +++++++++++++++---------------
 1 file changed, 18 insertions(+), 17 deletions(-)

diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c
index b83053082875bf..822f8e5dcee421 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -142,14 +142,13 @@ void vp_del_vqs(struct virtio_device *vdev)
 }
 
 static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
-			      struct virtqueue *vqs[],
-			      vq_callback_t *callbacks[],
-			      const char * const names[],
-			      bool per_vq_vectors)
+		struct virtqueue *vqs[], vq_callback_t *callbacks[],
+		const char * const names[])
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 	const char *name = dev_name(&vp_dev->vdev.dev);
 	int i, err = -ENOMEM, allocated_vectors, nvectors;
+	bool shared = false;
 	u16 msix_vec;
 
 	nvectors = 1;
@@ -157,12 +156,16 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
 		if (callbacks[i])
 			nvectors++;
 
-	if (per_vq_vectors) {
-		err = pci_alloc_irq_vectors(vp_dev->pci_dev, nvectors, nvectors,
-				PCI_IRQ_MSIX);
-	} else {
+	/* Try one vector per queue first. */
+	err = pci_alloc_irq_vectors(vp_dev->pci_dev, nvectors, nvectors,
+			PCI_IRQ_MSIX);
+	if (err < 0) {
+		/* Fallback to one vector for config, one shared for queues. */
+		shared = true;
 		err = pci_alloc_irq_vectors(vp_dev->pci_dev, 2, 2,
 				PCI_IRQ_MSIX);
+		if (err < 0)
+			return err;
 	}
 	if (err < 0)
 		return err;
@@ -190,7 +193,7 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
 	err = request_irq(pci_irq_vector(vp_dev->pci_dev, 0), vp_config_changed,
 			0, vp_dev->msix_names[0], vp_dev);
 	if (err)
-		goto out_free_irq_vectors;
+		goto out_free_msix_affinity_masks;
 
 	/* Verify we had enough resources to assign the vector */
 	if (vp_dev->config_vector(vp_dev, 0) == VIRTIO_MSI_NO_VECTOR) {
@@ -240,7 +243,11 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
 		}
 		vp_dev->msix_vector_map[i] = msix_vec;
 
-		if (per_vq_vectors)
+		/*
+		 * Use a different vector for each queue if they are available,
+		 * else share the same vector for all VQs.
+		 */
+		if (!shared)
 			allocated_vectors++;
 	}
 
@@ -307,15 +314,9 @@ int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs,
 {
 	int err;
 
-	/* Try MSI-X with one vector per queue. */
-	err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, true);
-	if (!err)
-		return 0;
-	/* Fallback: MSI-X with one vector for config, one shared for queues. */
-	err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, false);
+	err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names);
 	if (!err)
 		return 0;
-	/* Finally fall back to regular interrupts. */
 	return vp_find_vqs_intx(vdev, nvqs, vqs, callbacks, names);
 }
 

From fb5e31d970ce8b4941f03ed765d7dbefc39f22d9 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 5 Feb 2017 18:15:22 +0100
Subject: [PATCH 0273/1911] virtio: allow drivers to request IRQ affinity when
 creating VQs

Add a struct irq_affinity pointer to the find_vqs methods, which if set
is used to tell the PCI layer to create the MSI-X vectors for our I/O
virtqueues with the proper affinity from the start.  Compared to after
the fact affinity hints this gives us an instantly working setup and
allows to allocate the irq descritors node-local and avoid interconnect
traffic.  Last but not least this will allow blk-mq queues are created
based on the interrupt affinity for storage drivers.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/block/virtio_blk.c                 |  3 ++-
 drivers/char/virtio_console.c              |  2 +-
 drivers/crypto/virtio/virtio_crypto_core.c |  2 +-
 drivers/gpu/drm/virtio/virtgpu_kms.c       |  2 +-
 drivers/misc/mic/vop/vop_main.c            |  2 +-
 drivers/net/caif/caif_virtio.c             |  3 ++-
 drivers/net/virtio_net.c                   |  2 +-
 drivers/remoteproc/remoteproc_virtio.c     |  3 ++-
 drivers/rpmsg/virtio_rpmsg_bus.c           |  2 +-
 drivers/s390/virtio/kvm_virtio.c           |  3 ++-
 drivers/s390/virtio/virtio_ccw.c           |  3 ++-
 drivers/scsi/virtio_scsi.c                 |  3 ++-
 drivers/virtio/virtio_balloon.c            |  3 ++-
 drivers/virtio/virtio_input.c              |  3 ++-
 drivers/virtio/virtio_mmio.c               |  3 ++-
 drivers/virtio/virtio_pci_common.c         | 19 ++++++++++++-------
 drivers/virtio/virtio_pci_common.h         |  5 ++---
 drivers/virtio/virtio_pci_modern.c         |  7 +++----
 include/linux/virtio_config.h              |  9 +++++----
 net/vmw_vsock/virtio_transport.c           |  3 ++-
 20 files changed, 48 insertions(+), 34 deletions(-)

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 10332c24f9610d..c54118bdc67d06 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -411,7 +411,8 @@ static int init_vq(struct virtio_blk *vblk)
 	}
 
 	/* Discover virtqueues and write information to configuration.  */
-	err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names);
+	err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names,
+			NULL);
 	if (err)
 		goto out;
 
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index 17857beb489294..6266c0568e1d0a 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -1939,7 +1939,7 @@ static int init_vqs(struct ports_device *portdev)
 	/* Find the queues. */
 	err = portdev->vdev->config->find_vqs(portdev->vdev, nr_queues, vqs,
 					      io_callbacks,
-					      (const char **)io_names);
+					      (const char **)io_names, NULL);
 	if (err)
 		goto free;
 
diff --git a/drivers/crypto/virtio/virtio_crypto_core.c b/drivers/crypto/virtio/virtio_crypto_core.c
index fe70ec823b27dd..0aa2f045543b5b 100644
--- a/drivers/crypto/virtio/virtio_crypto_core.c
+++ b/drivers/crypto/virtio/virtio_crypto_core.c
@@ -119,7 +119,7 @@ static int virtcrypto_find_vqs(struct virtio_crypto *vi)
 	}
 
 	ret = vi->vdev->config->find_vqs(vi->vdev, total_vqs, vqs, callbacks,
-					 names);
+					 names, NULL);
 	if (ret)
 		goto err_find;
 
diff --git a/drivers/gpu/drm/virtio/virtgpu_kms.c b/drivers/gpu/drm/virtio/virtgpu_kms.c
index 1235519853f4df..e975fa5b0a3210 100644
--- a/drivers/gpu/drm/virtio/virtgpu_kms.c
+++ b/drivers/gpu/drm/virtio/virtgpu_kms.c
@@ -172,7 +172,7 @@ int virtio_gpu_driver_load(struct drm_device *dev, unsigned long flags)
 		 vgdev->has_virgl_3d ? "enabled" : "not available");
 
 	ret = vgdev->vdev->config->find_vqs(vgdev->vdev, 2, vqs,
-					    callbacks, names);
+					    callbacks, names, NULL);
 	if (ret) {
 		DRM_ERROR("failed to find virt queues\n");
 		goto err_vqs;
diff --git a/drivers/misc/mic/vop/vop_main.c b/drivers/misc/mic/vop/vop_main.c
index 1a2b67f3183d50..c2e29d7f0de888 100644
--- a/drivers/misc/mic/vop/vop_main.c
+++ b/drivers/misc/mic/vop/vop_main.c
@@ -374,7 +374,7 @@ static struct virtqueue *vop_find_vq(struct virtio_device *dev,
 static int vop_find_vqs(struct virtio_device *dev, unsigned nvqs,
 			struct virtqueue *vqs[],
 			vq_callback_t *callbacks[],
-			const char * const names[])
+			const char * const names[], struct irq_affinity *desc)
 {
 	struct _vop_vdev *vdev = to_vopvdev(dev);
 	struct vop_device *vpdev = vdev->vpdev;
diff --git a/drivers/net/caif/caif_virtio.c b/drivers/net/caif/caif_virtio.c
index b306210b02b7b4..bc0eb47ecceea7 100644
--- a/drivers/net/caif/caif_virtio.c
+++ b/drivers/net/caif/caif_virtio.c
@@ -679,7 +679,8 @@ static int cfv_probe(struct virtio_device *vdev)
 		goto err;
 
 	/* Get the TX virtio ring. This is a "guest side vring". */
-	err = vdev->config->find_vqs(vdev, 1, &cfv->vq_tx, &vq_cbs, &names);
+	err = vdev->config->find_vqs(vdev, 1, &cfv->vq_tx, &vq_cbs, &names,
+			NULL);
 	if (err)
 		goto err;
 
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 765c2d6358daf3..9be74c2dfb22bd 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -2003,7 +2003,7 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
 	}
 
 	ret = vi->vdev->config->find_vqs(vi->vdev, total_vqs, vqs, callbacks,
-					 names);
+					 names, NULL);
 	if (ret)
 		goto err_find;
 
diff --git a/drivers/remoteproc/remoteproc_virtio.c b/drivers/remoteproc/remoteproc_virtio.c
index 364411fb77343f..0142cc3f0c91c6 100644
--- a/drivers/remoteproc/remoteproc_virtio.c
+++ b/drivers/remoteproc/remoteproc_virtio.c
@@ -137,7 +137,8 @@ static void rproc_virtio_del_vqs(struct virtio_device *vdev)
 static int rproc_virtio_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
 				 struct virtqueue *vqs[],
 				 vq_callback_t *callbacks[],
-				 const char * const names[])
+				 const char * const names[],
+				 struct irq_affinity *desc)
 {
 	int i, ret;
 
diff --git a/drivers/rpmsg/virtio_rpmsg_bus.c b/drivers/rpmsg/virtio_rpmsg_bus.c
index 3090b0d3072f1e..5e66e081027e56 100644
--- a/drivers/rpmsg/virtio_rpmsg_bus.c
+++ b/drivers/rpmsg/virtio_rpmsg_bus.c
@@ -869,7 +869,7 @@ static int rpmsg_probe(struct virtio_device *vdev)
 	init_waitqueue_head(&vrp->sendq);
 
 	/* We expect two virtqueues, rx and tx (and in this order) */
-	err = vdev->config->find_vqs(vdev, 2, vqs, vq_cbs, names);
+	err = vdev->config->find_vqs(vdev, 2, vqs, vq_cbs, names, NULL);
 	if (err)
 		goto free_vrp;
 
diff --git a/drivers/s390/virtio/kvm_virtio.c b/drivers/s390/virtio/kvm_virtio.c
index 5e5c11f37b2420..2ce0b3eb2efebc 100644
--- a/drivers/s390/virtio/kvm_virtio.c
+++ b/drivers/s390/virtio/kvm_virtio.c
@@ -255,7 +255,8 @@ static void kvm_del_vqs(struct virtio_device *vdev)
 static int kvm_find_vqs(struct virtio_device *vdev, unsigned nvqs,
 			struct virtqueue *vqs[],
 			vq_callback_t *callbacks[],
-			const char * const names[])
+			const char * const names[],
+			struct irq_affinity *desc)
 {
 	struct kvm_device *kdev = to_kvmdev(vdev);
 	int i;
diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c
index 070c4da95f48c0..304d3b3cbfd3dd 100644
--- a/drivers/s390/virtio/virtio_ccw.c
+++ b/drivers/s390/virtio/virtio_ccw.c
@@ -628,7 +628,8 @@ static int virtio_ccw_register_adapter_ind(struct virtio_ccw_device *vcdev,
 static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs,
 			       struct virtqueue *vqs[],
 			       vq_callback_t *callbacks[],
-			       const char * const names[])
+			       const char * const names[],
+			       struct irq_affinity *desc)
 {
 	struct virtio_ccw_device *vcdev = to_vc_device(vdev);
 	unsigned long *indicatorp = NULL;
diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
index c680d76413116c..c9c5ea0611e9be 100644
--- a/drivers/scsi/virtio_scsi.c
+++ b/drivers/scsi/virtio_scsi.c
@@ -941,7 +941,8 @@ static int virtscsi_init(struct virtio_device *vdev,
 	}
 
 	/* Discover virtqueues and write information to configuration.  */
-	err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names);
+	err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names,
+			NULL);
 	if (err)
 		goto out;
 
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 181793f078524a..36c9c8fcb7f867 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -413,7 +413,8 @@ static int init_vqs(struct virtio_balloon *vb)
 	 * optionally stat.
 	 */
 	nvqs = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) ? 3 : 2;
-	err = vb->vdev->config->find_vqs(vb->vdev, nvqs, vqs, callbacks, names);
+	err = vb->vdev->config->find_vqs(vb->vdev, nvqs, vqs, callbacks, names,
+			NULL);
 	if (err)
 		return err;
 
diff --git a/drivers/virtio/virtio_input.c b/drivers/virtio/virtio_input.c
index 350a2a5a49dbed..79f1293cda9327 100644
--- a/drivers/virtio/virtio_input.c
+++ b/drivers/virtio/virtio_input.c
@@ -173,7 +173,8 @@ static int virtinput_init_vqs(struct virtio_input *vi)
 	static const char * const names[] = { "events", "status" };
 	int err;
 
-	err = vi->vdev->config->find_vqs(vi->vdev, 2, vqs, cbs, names);
+	err = vi->vdev->config->find_vqs(vi->vdev, 2, vqs, cbs, names,
+			NULL);
 	if (err)
 		return err;
 	vi->evt = vqs[0];
diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
index 08357d70a89174..78343b8f9034b3 100644
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -446,7 +446,8 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index,
 static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs,
 		       struct virtqueue *vqs[],
 		       vq_callback_t *callbacks[],
-		       const char * const names[])
+		       const char * const names[],
+		       struct irq_affinity *desc)
 {
 	struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
 	unsigned int irq = platform_get_irq(vm_dev->pdev, 0);
diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c
index 822f8e5dcee421..7902e920fc73fa 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -143,22 +143,28 @@ void vp_del_vqs(struct virtio_device *vdev)
 
 static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
 		struct virtqueue *vqs[], vq_callback_t *callbacks[],
-		const char * const names[])
+		const char * const names[], struct irq_affinity *desc)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 	const char *name = dev_name(&vp_dev->vdev.dev);
 	int i, err = -ENOMEM, allocated_vectors, nvectors;
+	unsigned flags = PCI_IRQ_MSIX;
 	bool shared = false;
 	u16 msix_vec;
 
+	if (desc) {
+		flags |= PCI_IRQ_AFFINITY;
+		desc->pre_vectors++; /* virtio config vector */
+	}
+
 	nvectors = 1;
 	for (i = 0; i < nvqs; i++)
 		if (callbacks[i])
 			nvectors++;
 
 	/* Try one vector per queue first. */
-	err = pci_alloc_irq_vectors(vp_dev->pci_dev, nvectors, nvectors,
-			PCI_IRQ_MSIX);
+	err = pci_alloc_irq_vectors_affinity(vp_dev->pci_dev, nvectors,
+			nvectors, flags, desc);
 	if (err < 0) {
 		/* Fallback to one vector for config, one shared for queues. */
 		shared = true;
@@ -308,13 +314,12 @@ static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned nvqs,
 
 /* the config->find_vqs() implementation */
 int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs,
-		struct virtqueue *vqs[],
-		vq_callback_t *callbacks[],
-		const char * const names[])
+		struct virtqueue *vqs[], vq_callback_t *callbacks[],
+		const char * const names[], struct irq_affinity *desc)
 {
 	int err;
 
-	err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names);
+	err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, desc);
 	if (!err)
 		return 0;
 	return vp_find_vqs_intx(vdev, nvqs, vqs, callbacks, names);
diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h
index 217ca876eed729..a6ad9ec6baefd0 100644
--- a/drivers/virtio/virtio_pci_common.h
+++ b/drivers/virtio/virtio_pci_common.h
@@ -97,9 +97,8 @@ bool vp_notify(struct virtqueue *vq);
 void vp_del_vqs(struct virtio_device *vdev);
 /* the config->find_vqs() implementation */
 int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs,
-		       struct virtqueue *vqs[],
-		       vq_callback_t *callbacks[],
-		       const char * const names[]);
+		struct virtqueue *vqs[], vq_callback_t *callbacks[],
+		const char * const names[], struct irq_affinity *desc);
 const char *vp_bus_name(struct virtio_device *vdev);
 
 /* Setup the affinity for a virtqueue:
diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c
index e5ce310919534e..a7a0981e441ce0 100644
--- a/drivers/virtio/virtio_pci_modern.c
+++ b/drivers/virtio/virtio_pci_modern.c
@@ -384,13 +384,12 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
 }
 
 static int vp_modern_find_vqs(struct virtio_device *vdev, unsigned nvqs,
-			      struct virtqueue *vqs[],
-			      vq_callback_t *callbacks[],
-			      const char * const names[])
+		struct virtqueue *vqs[], vq_callback_t *callbacks[],
+		const char * const names[], struct irq_affinity *desc)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 	struct virtqueue *vq;
-	int rc = vp_find_vqs(vdev, nvqs, vqs, callbacks, names);
+	int rc = vp_find_vqs(vdev, nvqs, vqs, callbacks, names, desc);
 
 	if (rc)
 		return rc;
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 26c155bb639b57..2ebe506fe41aea 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -7,6 +7,8 @@
 #include <linux/virtio_byteorder.h>
 #include <uapi/linux/virtio_config.h>
 
+struct irq_affinity;
+
 /**
  * virtio_config_ops - operations for configuring a virtio device
  * @get: read the value of a configuration field
@@ -68,9 +70,8 @@ struct virtio_config_ops {
 	void (*set_status)(struct virtio_device *vdev, u8 status);
 	void (*reset)(struct virtio_device *vdev);
 	int (*find_vqs)(struct virtio_device *, unsigned nvqs,
-			struct virtqueue *vqs[],
-			vq_callback_t *callbacks[],
-			const char * const names[]);
+			struct virtqueue *vqs[], vq_callback_t *callbacks[],
+			const char * const names[], struct irq_affinity *desc);
 	void (*del_vqs)(struct virtio_device *);
 	u64 (*get_features)(struct virtio_device *vdev);
 	int (*finalize_features)(struct virtio_device *vdev);
@@ -169,7 +170,7 @@ struct virtqueue *virtio_find_single_vq(struct virtio_device *vdev,
 	vq_callback_t *callbacks[] = { c };
 	const char *names[] = { n };
 	struct virtqueue *vq;
-	int err = vdev->config->find_vqs(vdev, 1, &vq, callbacks, names);
+	int err = vdev->config->find_vqs(vdev, 1, &vq, callbacks, names, NULL);
 	if (err < 0)
 		return ERR_PTR(err);
 	return vq;
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 6788264acc632d..9d24c0e958b18e 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -532,7 +532,8 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
 	vsock->vdev = vdev;
 
 	ret = vsock->vdev->config->find_vqs(vsock->vdev, VSOCK_VQ_MAX,
-					    vsock->vqs, callbacks, names);
+					    vsock->vqs, callbacks, names,
+					    NULL);
 	if (ret < 0)
 		goto out;
 

From bbaba479563910aaa51e59bb9027a09e396d3a3c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 5 Feb 2017 18:15:23 +0100
Subject: [PATCH 0274/1911] virtio: provide a method to get the IRQ affinity
 mask for a virtqueue

This basically passed up the pci_irq_get_affinity information through
virtio through an optional get_vq_affinity method.  It is only implemented
by the PCI backend for now, and only when we use per-virtqueue IRQs.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/virtio/virtio_pci_common.c | 11 +++++++++++
 drivers/virtio/virtio_pci_common.h |  2 ++
 drivers/virtio/virtio_pci_legacy.c |  1 +
 drivers/virtio/virtio_pci_modern.c |  2 ++
 include/linux/virtio_config.h      |  3 +++
 5 files changed, 19 insertions(+)

diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c
index 7902e920fc73fa..df548a6fb844f7 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -361,6 +361,17 @@ int vp_set_vq_affinity(struct virtqueue *vq, int cpu)
 	return 0;
 }
 
+const struct cpumask *vp_get_vq_affinity(struct virtio_device *vdev, int index)
+{
+	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+	unsigned int *map = vp_dev->msix_vector_map;
+
+	if (!map || map[index] == VIRTIO_MSI_NO_VECTOR)
+		return NULL;
+
+	return pci_irq_get_affinity(vp_dev->pci_dev, map[index]);
+}
+
 #ifdef CONFIG_PM_SLEEP
 static int virtio_pci_freeze(struct device *dev)
 {
diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h
index a6ad9ec6baefd0..ac8c9d7889646a 100644
--- a/drivers/virtio/virtio_pci_common.h
+++ b/drivers/virtio/virtio_pci_common.h
@@ -108,6 +108,8 @@ const char *vp_bus_name(struct virtio_device *vdev);
  */
 int vp_set_vq_affinity(struct virtqueue *vq, int cpu);
 
+const struct cpumask *vp_get_vq_affinity(struct virtio_device *vdev, int index);
+
 #if IS_ENABLED(CONFIG_VIRTIO_PCI_LEGACY)
 int virtio_pci_legacy_probe(struct virtio_pci_device *);
 void virtio_pci_legacy_remove(struct virtio_pci_device *);
diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c
index 2ab6aee51bf6fd..f7362c5fe18a96 100644
--- a/drivers/virtio/virtio_pci_legacy.c
+++ b/drivers/virtio/virtio_pci_legacy.c
@@ -190,6 +190,7 @@ static const struct virtio_config_ops virtio_pci_config_ops = {
 	.finalize_features = vp_finalize_features,
 	.bus_name	= vp_bus_name,
 	.set_vq_affinity = vp_set_vq_affinity,
+	.get_vq_affinity = vp_get_vq_affinity,
 };
 
 /* the PCI probing function */
diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c
index a7a0981e441ce0..7bc3004b840ef3 100644
--- a/drivers/virtio/virtio_pci_modern.c
+++ b/drivers/virtio/virtio_pci_modern.c
@@ -437,6 +437,7 @@ static const struct virtio_config_ops virtio_pci_config_nodev_ops = {
 	.finalize_features = vp_finalize_features,
 	.bus_name	= vp_bus_name,
 	.set_vq_affinity = vp_set_vq_affinity,
+	.get_vq_affinity = vp_get_vq_affinity,
 };
 
 static const struct virtio_config_ops virtio_pci_config_ops = {
@@ -452,6 +453,7 @@ static const struct virtio_config_ops virtio_pci_config_ops = {
 	.finalize_features = vp_finalize_features,
 	.bus_name	= vp_bus_name,
 	.set_vq_affinity = vp_set_vq_affinity,
+	.get_vq_affinity = vp_get_vq_affinity,
 };
 
 /**
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 2ebe506fe41aea..8355bab175e1d8 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -58,6 +58,7 @@ struct irq_affinity;
  *      This returns a pointer to the bus name a la pci_name from which
  *      the caller can then copy.
  * @set_vq_affinity: set the affinity for a virtqueue.
+ * @get_vq_affinity: get the affinity for a virtqueue (optional).
  */
 typedef void vq_callback_t(struct virtqueue *);
 struct virtio_config_ops {
@@ -77,6 +78,8 @@ struct virtio_config_ops {
 	int (*finalize_features)(struct virtio_device *vdev);
 	const char *(*bus_name)(struct virtio_device *vdev);
 	int (*set_vq_affinity)(struct virtqueue *vq, int cpu);
+	const struct cpumask *(*get_vq_affinity)(struct virtio_device *vdev,
+			int index);
 };
 
 /* If driver didn't advertise the feature, it will never appear. */

From 73473427bb551686e4b68ecd99bfd27e6635286a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 5 Feb 2017 18:15:24 +0100
Subject: [PATCH 0275/1911] blk-mq: provide a default queue mapping for virtio
 device

Similar to the PCI version, just calling into virtio instead.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 block/Kconfig                 |  5 ++++
 block/Makefile                |  1 +
 block/blk-mq-virtio.c         | 54 +++++++++++++++++++++++++++++++++++
 include/linux/blk-mq-virtio.h | 10 +++++++
 4 files changed, 70 insertions(+)
 create mode 100644 block/blk-mq-virtio.c
 create mode 100644 include/linux/blk-mq-virtio.h

diff --git a/block/Kconfig b/block/Kconfig
index 8bf114a3858acd..3523b4f0cd8b31 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -165,4 +165,9 @@ config BLK_MQ_PCI
 	depends on BLOCK && PCI
 	default y
 
+config BLK_MQ_VIRTIO
+	bool
+	depends on BLOCK && VIRTIO
+	default y
+
 source block/Kconfig.iosched
diff --git a/block/Makefile b/block/Makefile
index a827f988c4e674..60691949d28de6 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -23,5 +23,6 @@ obj-$(CONFIG_BLOCK_COMPAT)	+= compat_ioctl.o
 obj-$(CONFIG_BLK_CMDLINE_PARSER)	+= cmdline-parser.o
 obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o t10-pi.o
 obj-$(CONFIG_BLK_MQ_PCI)	+= blk-mq-pci.o
+obj-$(CONFIG_BLK_MQ_VIRTIO)	+= blk-mq-virtio.o
 obj-$(CONFIG_BLK_DEV_ZONED)	+= blk-zoned.o
 obj-$(CONFIG_BLK_WBT)		+= blk-wbt.o
diff --git a/block/blk-mq-virtio.c b/block/blk-mq-virtio.c
new file mode 100644
index 00000000000000..c3afbca1129956
--- /dev/null
+++ b/block/blk-mq-virtio.c
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2016 Christoph Hellwig.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#include <linux/device.h>
+#include <linux/blk-mq.h>
+#include <linux/blk-mq-virtio.h>
+#include <linux/virtio_config.h>
+#include <linux/module.h>
+#include "blk-mq.h"
+
+/**
+ * blk_mq_virtio_map_queues - provide a default queue mapping for virtio device
+ * @set:	tagset to provide the mapping for
+ * @vdev:	virtio device associated with @set.
+ * @first_vec:	first interrupt vectors to use for queues (usually 0)
+ *
+ * This function assumes the virtio device @vdev has at least as many available
+ * interrupt vetors as @set has queues.  It will then queuery the vector
+ * corresponding to each queue for it's affinity mask and built queue mapping
+ * that maps a queue to the CPUs that have irq affinity for the corresponding
+ * vector.
+ */
+int blk_mq_virtio_map_queues(struct blk_mq_tag_set *set,
+		struct virtio_device *vdev, int first_vec)
+{
+	const struct cpumask *mask;
+	unsigned int queue, cpu;
+
+	if (!vdev->config->get_vq_affinity)
+		goto fallback;
+
+	for (queue = 0; queue < set->nr_hw_queues; queue++) {
+		mask = vdev->config->get_vq_affinity(vdev, first_vec + queue);
+		if (!mask)
+			goto fallback;
+
+		for_each_cpu(cpu, mask)
+			set->mq_map[cpu] = queue;
+	}
+
+	return 0;
+fallback:
+	return blk_mq_map_queues(set);
+}
+EXPORT_SYMBOL_GPL(blk_mq_virtio_map_queues);
diff --git a/include/linux/blk-mq-virtio.h b/include/linux/blk-mq-virtio.h
new file mode 100644
index 00000000000000..b1ef6e14744f62
--- /dev/null
+++ b/include/linux/blk-mq-virtio.h
@@ -0,0 +1,10 @@
+#ifndef _LINUX_BLK_MQ_VIRTIO_H
+#define _LINUX_BLK_MQ_VIRTIO_H
+
+struct blk_mq_tag_set;
+struct virtio_device;
+
+int blk_mq_virtio_map_queues(struct blk_mq_tag_set *set,
+		struct virtio_device *vdev, int first_vec);
+
+#endif /* _LINUX_BLK_MQ_VIRTIO_H */

From ad71473d9c43725c917fc5a86d54ceb7001ee28c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 5 Feb 2017 18:15:25 +0100
Subject: [PATCH 0276/1911] virtio_blk: use virtio IRQ affinity

Use automatic IRQ affinity assignment in the virtio layer if available,
and build the blk-mq queues based on it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/block/virtio_blk.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index c54118bdc67d06..1028dfeb5a7fee 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -5,6 +5,7 @@
 #include <linux/hdreg.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
+#include <linux/interrupt.h>
 #include <linux/virtio.h>
 #include <linux/virtio_blk.h>
 #include <linux/scatterlist.h>
@@ -12,6 +13,7 @@
 #include <scsi/scsi_cmnd.h>
 #include <linux/idr.h>
 #include <linux/blk-mq.h>
+#include <linux/blk-mq-virtio.h>
 #include <linux/numa.h>
 
 #define PART_BITS 4
@@ -385,6 +387,7 @@ static int init_vq(struct virtio_blk *vblk)
 	struct virtqueue **vqs;
 	unsigned short num_vqs;
 	struct virtio_device *vdev = vblk->vdev;
+	struct irq_affinity desc = { 0, };
 
 	err = virtio_cread_feature(vdev, VIRTIO_BLK_F_MQ,
 				   struct virtio_blk_config, num_queues,
@@ -412,7 +415,7 @@ static int init_vq(struct virtio_blk *vblk)
 
 	/* Discover virtqueues and write information to configuration.  */
 	err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names,
-			NULL);
+			&desc);
 	if (err)
 		goto out;
 
@@ -543,10 +546,18 @@ static int virtblk_init_request(void *data, struct request *rq,
 	return 0;
 }
 
+static int virtblk_map_queues(struct blk_mq_tag_set *set)
+{
+	struct virtio_blk *vblk = set->driver_data;
+
+	return blk_mq_virtio_map_queues(set, vblk->vdev, 0);
+}
+
 static struct blk_mq_ops virtio_mq_ops = {
 	.queue_rq	= virtio_queue_rq,
 	.complete	= virtblk_request_done,
 	.init_request	= virtblk_init_request,
+	.map_queues	= virtblk_map_queues,
 };
 
 static unsigned int virtblk_queue_depth;

From 0d9f0a52c8b9f7a003fe1650b7d5fb8518efabe0 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 5 Feb 2017 18:15:26 +0100
Subject: [PATCH 0277/1911] virtio_scsi: use virtio IRQ affinity

Use automatic IRQ affinity assignment in the virtio layer if available,
and build the blk-mq queues based on it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/scsi/virtio_scsi.c | 126 ++++---------------------------------
 include/linux/cpuhotplug.h |   1 -
 2 files changed, 12 insertions(+), 115 deletions(-)

diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
index c9c5ea0611e9be..939c47df73fa97 100644
--- a/drivers/scsi/virtio_scsi.c
+++ b/drivers/scsi/virtio_scsi.c
@@ -18,6 +18,7 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/mempool.h>
+#include <linux/interrupt.h>
 #include <linux/virtio.h>
 #include <linux/virtio_ids.h>
 #include <linux/virtio_config.h>
@@ -29,6 +30,7 @@
 #include <scsi/scsi_cmnd.h>
 #include <scsi/scsi_tcq.h>
 #include <linux/seqlock.h>
+#include <linux/blk-mq-virtio.h>
 
 #define VIRTIO_SCSI_MEMPOOL_SZ 64
 #define VIRTIO_SCSI_EVENT_LEN 8
@@ -108,7 +110,6 @@ struct virtio_scsi {
 	bool affinity_hint_set;
 
 	struct hlist_node node;
-	struct hlist_node node_dead;
 
 	/* Protected by event_vq lock */
 	bool stop_events;
@@ -118,7 +119,6 @@ struct virtio_scsi {
 	struct virtio_scsi_vq req_vqs[];
 };
 
-static enum cpuhp_state virtioscsi_online;
 static struct kmem_cache *virtscsi_cmd_cache;
 static mempool_t *virtscsi_cmd_pool;
 
@@ -766,6 +766,13 @@ static void virtscsi_target_destroy(struct scsi_target *starget)
 	kfree(tgt);
 }
 
+static int virtscsi_map_queues(struct Scsi_Host *shost)
+{
+	struct virtio_scsi *vscsi = shost_priv(shost);
+
+	return blk_mq_virtio_map_queues(&shost->tag_set, vscsi->vdev, 2);
+}
+
 static struct scsi_host_template virtscsi_host_template_single = {
 	.module = THIS_MODULE,
 	.name = "Virtio SCSI HBA",
@@ -801,6 +808,7 @@ static struct scsi_host_template virtscsi_host_template_multi = {
 	.use_clustering = ENABLE_CLUSTERING,
 	.target_alloc = virtscsi_target_alloc,
 	.target_destroy = virtscsi_target_destroy,
+	.map_queues = virtscsi_map_queues,
 	.track_queue_depth = 1,
 };
 
@@ -817,80 +825,6 @@ static struct scsi_host_template virtscsi_host_template_multi = {
 		virtio_cwrite(vdev, struct virtio_scsi_config, fld, &__val); \
 	} while(0)
 
-static void __virtscsi_set_affinity(struct virtio_scsi *vscsi, bool affinity)
-{
-	int i;
-	int cpu;
-
-	/* In multiqueue mode, when the number of cpu is equal
-	 * to the number of request queues, we let the qeueues
-	 * to be private to one cpu by setting the affinity hint
-	 * to eliminate the contention.
-	 */
-	if ((vscsi->num_queues == 1 ||
-	     vscsi->num_queues != num_online_cpus()) && affinity) {
-		if (vscsi->affinity_hint_set)
-			affinity = false;
-		else
-			return;
-	}
-
-	if (affinity) {
-		i = 0;
-		for_each_online_cpu(cpu) {
-			virtqueue_set_affinity(vscsi->req_vqs[i].vq, cpu);
-			i++;
-		}
-
-		vscsi->affinity_hint_set = true;
-	} else {
-		for (i = 0; i < vscsi->num_queues; i++) {
-			if (!vscsi->req_vqs[i].vq)
-				continue;
-
-			virtqueue_set_affinity(vscsi->req_vqs[i].vq, -1);
-		}
-
-		vscsi->affinity_hint_set = false;
-	}
-}
-
-static void virtscsi_set_affinity(struct virtio_scsi *vscsi, bool affinity)
-{
-	get_online_cpus();
-	__virtscsi_set_affinity(vscsi, affinity);
-	put_online_cpus();
-}
-
-static int virtscsi_cpu_online(unsigned int cpu, struct hlist_node *node)
-{
-	struct virtio_scsi *vscsi = hlist_entry_safe(node, struct virtio_scsi,
-						     node);
-	__virtscsi_set_affinity(vscsi, true);
-	return 0;
-}
-
-static int virtscsi_cpu_notif_add(struct virtio_scsi *vi)
-{
-	int ret;
-
-	ret = cpuhp_state_add_instance(virtioscsi_online, &vi->node);
-	if (ret)
-		return ret;
-
-	ret = cpuhp_state_add_instance(CPUHP_VIRT_SCSI_DEAD, &vi->node_dead);
-	if (ret)
-		cpuhp_state_remove_instance(virtioscsi_online, &vi->node);
-	return ret;
-}
-
-static void virtscsi_cpu_notif_remove(struct virtio_scsi *vi)
-{
-	cpuhp_state_remove_instance_nocalls(virtioscsi_online, &vi->node);
-	cpuhp_state_remove_instance_nocalls(CPUHP_VIRT_SCSI_DEAD,
-					    &vi->node_dead);
-}
-
 static void virtscsi_init_vq(struct virtio_scsi_vq *virtscsi_vq,
 			     struct virtqueue *vq)
 {
@@ -900,14 +834,8 @@ static void virtscsi_init_vq(struct virtio_scsi_vq *virtscsi_vq,
 
 static void virtscsi_remove_vqs(struct virtio_device *vdev)
 {
-	struct Scsi_Host *sh = virtio_scsi_host(vdev);
-	struct virtio_scsi *vscsi = shost_priv(sh);
-
-	virtscsi_set_affinity(vscsi, false);
-
 	/* Stop all the virtqueues. */
 	vdev->config->reset(vdev);
-
 	vdev->config->del_vqs(vdev);
 }
 
@@ -920,6 +848,7 @@ static int virtscsi_init(struct virtio_device *vdev,
 	vq_callback_t **callbacks;
 	const char **names;
 	struct virtqueue **vqs;
+	struct irq_affinity desc = { .pre_vectors = 2 };
 
 	num_vqs = vscsi->num_queues + VIRTIO_SCSI_VQ_BASE;
 	vqs = kmalloc(num_vqs * sizeof(struct virtqueue *), GFP_KERNEL);
@@ -942,7 +871,7 @@ static int virtscsi_init(struct virtio_device *vdev,
 
 	/* Discover virtqueues and write information to configuration.  */
 	err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names,
-			NULL);
+			&desc);
 	if (err)
 		goto out;
 
@@ -1008,10 +937,6 @@ static int virtscsi_probe(struct virtio_device *vdev)
 	if (err)
 		goto virtscsi_init_failed;
 
-	err = virtscsi_cpu_notif_add(vscsi);
-	if (err)
-		goto scsi_add_host_failed;
-
 	cmd_per_lun = virtscsi_config_get(vdev, cmd_per_lun) ?: 1;
 	shost->cmd_per_lun = min_t(u32, cmd_per_lun, shost->can_queue);
 	shost->max_sectors = virtscsi_config_get(vdev, max_sectors) ?: 0xFFFF;
@@ -1066,9 +991,6 @@ static void virtscsi_remove(struct virtio_device *vdev)
 		virtscsi_cancel_event_work(vscsi);
 
 	scsi_remove_host(shost);
-
-	virtscsi_cpu_notif_remove(vscsi);
-
 	virtscsi_remove_vqs(vdev);
 	scsi_host_put(shost);
 }
@@ -1076,10 +998,6 @@ static void virtscsi_remove(struct virtio_device *vdev)
 #ifdef CONFIG_PM_SLEEP
 static int virtscsi_freeze(struct virtio_device *vdev)
 {
-	struct Scsi_Host *sh = virtio_scsi_host(vdev);
-	struct virtio_scsi *vscsi = shost_priv(sh);
-
-	virtscsi_cpu_notif_remove(vscsi);
 	virtscsi_remove_vqs(vdev);
 	return 0;
 }
@@ -1094,11 +1012,6 @@ static int virtscsi_restore(struct virtio_device *vdev)
 	if (err)
 		return err;
 
-	err = virtscsi_cpu_notif_add(vscsi);
-	if (err) {
-		vdev->config->del_vqs(vdev);
-		return err;
-	}
 	virtio_device_ready(vdev);
 
 	if (virtio_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG))
@@ -1153,16 +1066,6 @@ static int __init init(void)
 		pr_err("mempool_create() for virtscsi_cmd_pool failed\n");
 		goto error;
 	}
-	ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
-				      "scsi/virtio:online",
-				      virtscsi_cpu_online, NULL);
-	if (ret < 0)
-		goto error;
-	virtioscsi_online = ret;
-	ret = cpuhp_setup_state_multi(CPUHP_VIRT_SCSI_DEAD, "scsi/virtio:dead",
-				      NULL, virtscsi_cpu_online);
-	if (ret)
-		goto error;
 	ret = register_virtio_driver(&virtio_scsi_driver);
 	if (ret < 0)
 		goto error;
@@ -1178,17 +1081,12 @@ static int __init init(void)
 		kmem_cache_destroy(virtscsi_cmd_cache);
 		virtscsi_cmd_cache = NULL;
 	}
-	if (virtioscsi_online)
-		cpuhp_remove_multi_state(virtioscsi_online);
-	cpuhp_remove_multi_state(CPUHP_VIRT_SCSI_DEAD);
 	return ret;
 }
 
 static void __exit fini(void)
 {
 	unregister_virtio_driver(&virtio_scsi_driver);
-	cpuhp_remove_multi_state(virtioscsi_online);
-	cpuhp_remove_multi_state(CPUHP_VIRT_SCSI_DEAD);
 	mempool_destroy(virtscsi_cmd_pool);
 	kmem_cache_destroy(virtscsi_cmd_cache);
 }
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 921acaaa160179..01aea80a503e95 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -26,7 +26,6 @@ enum cpuhp_state {
 	CPUHP_ARM_OMAP_WAKE_DEAD,
 	CPUHP_IRQ_POLL_DEAD,
 	CPUHP_BLOCK_SOFTIRQ_DEAD,
-	CPUHP_VIRT_SCSI_DEAD,
 	CPUHP_ACPI_CPUDRV_DEAD,
 	CPUHP_S390_PFAULT_DEAD,
 	CPUHP_BLK_MQ_DEAD,

From 41139ac3cd76c6dff8286102a8ac62933c680463 Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Mon, 20 Feb 2017 09:12:58 +0800
Subject: [PATCH 0278/1911] ACPI: APEI: Fix BERT resources conflict with ACPI
 NVS area

It was reported that on some machines, there is overlap between ACPI
NVS area and BERT address range.  This appears reasonable because BERT
contents need to be non-volatile across reboot.  But this will cause
resources conflict in current Linux kernel implementation because the
ACPI NVS area is marked as busy.  The resource conflict is fixed via
excluding the ACPI NVS area when requesting IO resources for BERT.
When accessing the BERT contents, the whole BERT address range will be
ioremapped and accessed.

Reported-and-tested-by: Hans Kristian Rosbach <hansr@raskesider.no>
Signed-off-by: Ying Huang <ying.huang@intel.com>
Acked-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/apei/bert.c | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/drivers/acpi/apei/bert.c b/drivers/acpi/apei/bert.c
index a05b5c0cf181a5..12771fcf0417df 100644
--- a/drivers/acpi/apei/bert.c
+++ b/drivers/acpi/apei/bert.c
@@ -97,6 +97,7 @@ static int __init bert_check_table(struct acpi_table_bert *bert_tab)
 
 static int __init bert_init(void)
 {
+	struct apei_resources bert_resources;
 	struct acpi_bert_region *boot_error_region;
 	struct acpi_table_bert *bert_tab;
 	unsigned int region_len;
@@ -127,13 +128,14 @@ static int __init bert_init(void)
 	}
 
 	region_len = bert_tab->region_length;
-	if (!request_mem_region(bert_tab->address, region_len, "APEI BERT")) {
-		pr_err("Can't request iomem region <%016llx-%016llx>.\n",
-		       (unsigned long long)bert_tab->address,
-		       (unsigned long long)bert_tab->address + region_len - 1);
-		return -EIO;
-	}
-
+	apei_resources_init(&bert_resources);
+	rc = apei_resources_add(&bert_resources, bert_tab->address,
+				region_len, true);
+	if (rc)
+		return rc;
+	rc = apei_resources_request(&bert_resources, "APEI BERT");
+	if (rc)
+		goto out_fini;
 	boot_error_region = ioremap_cache(bert_tab->address, region_len);
 	if (boot_error_region) {
 		bert_print_all(boot_error_region, region_len);
@@ -142,7 +144,9 @@ static int __init bert_init(void)
 		rc = -ENOMEM;
 	}
 
-	release_mem_region(bert_tab->address, region_len);
+	apei_resources_release(&bert_resources);
+out_fini:
+	apei_resources_fini(&bert_resources);
 
 	return rc;
 }

From dcc235279a52d49023d4f1af7c3ed468a97015ae Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 27 Feb 2017 21:29:03 +0100
Subject: [PATCH 0279/1911] gcc-plugins: fix sancov_plugin for gcc-5
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The name of the local variable was inadvertantly changed from
sancov_plugin_pass_info to sancov_pass_info:

scripts/gcc-plugins/sancov_plugin.c: In function ‘int plugin_init(plugin_name_args*, plugin_gcc_version*)’:
scripts/gcc-plugins/sancov_plugin.c:136:67: error: ‘sancov_plugin_pass_info’ was not declared in this scope

This changes the conditional reference to this variable as well.

Fixes: 5a45a4c5c3f5 ("gcc-plugins: consolidate on PASS_INFO macro")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 scripts/gcc-plugins/sancov_plugin.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/gcc-plugins/sancov_plugin.c b/scripts/gcc-plugins/sancov_plugin.c
index 9b0b5cbc5b899b..0f98634c20a097 100644
--- a/scripts/gcc-plugins/sancov_plugin.c
+++ b/scripts/gcc-plugins/sancov_plugin.c
@@ -133,7 +133,7 @@ __visible int plugin_init(struct plugin_name_args *plugin_info, struct plugin_gc
 #if BUILDING_GCC_VERSION < 6000
 	register_callback(plugin_name, PLUGIN_START_UNIT, &sancov_start_unit, NULL);
 	register_callback(plugin_name, PLUGIN_REGISTER_GGC_ROOTS, NULL, (void *)&gt_ggc_r_gt_sancov);
-	register_callback(plugin_name, PLUGIN_PASS_MANAGER_SETUP, NULL, &sancov_plugin_pass_info);
+	register_callback(plugin_name, PLUGIN_PASS_MANAGER_SETUP, NULL, &sancov_pass_info);
 #endif
 
 	return 0;

From 90ec7c9dff07d676c0b9b499286b931005c6b051 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 27 Feb 2017 21:31:43 +0100
Subject: [PATCH 0280/1911] scsi: lpfc: use div_u64 for 64-bit division

The new debugfs output causes a link error on 32-bit architectures:

ERROR: "__aeabi_uldivmod" [drivers/scsi/lpfc/lpfc.ko] undefined!

This code is not performance critical, so we can simply use div_u64().

[mkp: fixed up whitespace]

Fixes: bd2cdd5e400f ("scsi: lpfc: NVME Initiator: Add debugfs support")
Fixes: 2b65e18202fd ("scsi: lpfc: NVME Target: Add debugfs support")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_debugfs.c | 64 ++++++++++++++++----------------
 1 file changed, 32 insertions(+), 32 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index 599fde4ea8b103..9f4798e9d9380d 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -873,8 +873,8 @@ lpfc_debugfs_nvmektime_data(struct lpfc_vport *vport, char *buf, int size)
 		len += snprintf(
 			buf + len, PAGE_SIZE - len,
 			"avg:%08lld min:%08lld max %08lld\n",
-			phba->ktime_seg1_total /
-			phba->ktime_data_samples,
+			div_u64(phba->ktime_seg1_total,
+				phba->ktime_data_samples),
 			phba->ktime_seg1_min,
 			phba->ktime_seg1_max);
 		len += snprintf(
@@ -884,8 +884,8 @@ lpfc_debugfs_nvmektime_data(struct lpfc_vport *vport, char *buf, int size)
 		len += snprintf(
 			buf + len, PAGE_SIZE - len,
 			"avg:%08lld min:%08lld max %08lld\n",
-			phba->ktime_seg2_total /
-			phba->ktime_data_samples,
+			div_u64(phba->ktime_seg2_total,
+				phba->ktime_data_samples),
 			phba->ktime_seg2_min,
 			phba->ktime_seg2_max);
 		len += snprintf(
@@ -895,8 +895,8 @@ lpfc_debugfs_nvmektime_data(struct lpfc_vport *vport, char *buf, int size)
 		len += snprintf(
 			buf + len, PAGE_SIZE - len,
 			"avg:%08lld min:%08lld max %08lld\n",
-			phba->ktime_seg3_total /
-			phba->ktime_data_samples,
+			div_u64(phba->ktime_seg3_total,
+				phba->ktime_data_samples),
 			phba->ktime_seg3_min,
 			phba->ktime_seg3_max);
 		len += snprintf(
@@ -906,17 +906,17 @@ lpfc_debugfs_nvmektime_data(struct lpfc_vport *vport, char *buf, int size)
 		len += snprintf(
 			buf + len, PAGE_SIZE - len,
 			"avg:%08lld min:%08lld max %08lld\n",
-			phba->ktime_seg4_total /
-			phba->ktime_data_samples,
+			div_u64(phba->ktime_seg4_total,
+				phba->ktime_data_samples),
 			phba->ktime_seg4_min,
 			phba->ktime_seg4_max);
 		len += snprintf(
 			buf + len, PAGE_SIZE - len,
 			"Total IO avg time: %08lld\n",
-			((phba->ktime_seg1_total +
+			div_u64(phba->ktime_seg1_total +
 			phba->ktime_seg2_total  +
 			phba->ktime_seg3_total +
-			phba->ktime_seg4_total) /
+			phba->ktime_seg4_total,
 			phba->ktime_data_samples));
 		return len;
 	}
@@ -935,8 +935,8 @@ lpfc_debugfs_nvmektime_data(struct lpfc_vport *vport, char *buf, int size)
 			"cmd pass to NVME Layer\n");
 	len += snprintf(buf + len, PAGE_SIZE-len,
 			"avg:%08lld min:%08lld max %08lld\n",
-			phba->ktime_seg1_total /
-			phba->ktime_data_samples,
+			div_u64(phba->ktime_seg1_total,
+				phba->ktime_data_samples),
 			phba->ktime_seg1_min,
 			phba->ktime_seg1_max);
 	len += snprintf(buf + len, PAGE_SIZE-len,
@@ -944,8 +944,8 @@ lpfc_debugfs_nvmektime_data(struct lpfc_vport *vport, char *buf, int size)
 			"-to- Driver rcv cmd OP (action)\n");
 	len += snprintf(buf + len, PAGE_SIZE-len,
 			"avg:%08lld min:%08lld max %08lld\n",
-			phba->ktime_seg2_total /
-			phba->ktime_data_samples,
+			div_u64(phba->ktime_seg2_total,
+				phba->ktime_data_samples),
 			phba->ktime_seg2_min,
 			phba->ktime_seg2_max);
 	len += snprintf(buf + len, PAGE_SIZE-len,
@@ -953,8 +953,8 @@ lpfc_debugfs_nvmektime_data(struct lpfc_vport *vport, char *buf, int size)
 			"Firmware WQ doorbell: cmd\n");
 	len += snprintf(buf + len, PAGE_SIZE-len,
 			"avg:%08lld min:%08lld max %08lld\n",
-			phba->ktime_seg3_total /
-			phba->ktime_data_samples,
+			div_u64(phba->ktime_seg3_total,
+				phba->ktime_data_samples),
 			phba->ktime_seg3_min,
 			phba->ktime_seg3_max);
 	len += snprintf(buf + len, PAGE_SIZE-len,
@@ -962,8 +962,8 @@ lpfc_debugfs_nvmektime_data(struct lpfc_vport *vport, char *buf, int size)
 			"-to- MSI-X ISR for cmd cmpl\n");
 	len += snprintf(buf + len, PAGE_SIZE-len,
 			"avg:%08lld min:%08lld max %08lld\n",
-			phba->ktime_seg4_total /
-			phba->ktime_data_samples,
+			div_u64(phba->ktime_seg4_total,
+				phba->ktime_data_samples),
 			phba->ktime_seg4_min,
 			phba->ktime_seg4_max);
 	len += snprintf(buf + len, PAGE_SIZE-len,
@@ -971,8 +971,8 @@ lpfc_debugfs_nvmektime_data(struct lpfc_vport *vport, char *buf, int size)
 			"-to- NVME layer passed cmd done\n");
 	len += snprintf(buf + len, PAGE_SIZE-len,
 			"avg:%08lld min:%08lld max %08lld\n",
-			phba->ktime_seg5_total /
-			phba->ktime_data_samples,
+			div_u64(phba->ktime_seg5_total,
+				phba->ktime_data_samples),
 			phba->ktime_seg5_min,
 			phba->ktime_seg5_max);
 
@@ -983,8 +983,8 @@ lpfc_debugfs_nvmektime_data(struct lpfc_vport *vport, char *buf, int size)
 		len += snprintf(buf + len, PAGE_SIZE-len,
 				"avg:%08lld min:%08lld "
 				"max %08lld\n",
-				phba->ktime_seg10_total /
-				phba->ktime_data_samples,
+				div_u64(phba->ktime_seg10_total,
+					phba->ktime_data_samples),
 				phba->ktime_seg10_min,
 				phba->ktime_seg10_max);
 		return len;
@@ -995,8 +995,8 @@ lpfc_debugfs_nvmektime_data(struct lpfc_vport *vport, char *buf, int size)
 			"-to- Driver rcv rsp status OP\n");
 	len += snprintf(buf + len, PAGE_SIZE-len,
 			"avg:%08lld min:%08lld max %08lld\n",
-			phba->ktime_seg6_total /
-			phba->ktime_status_samples,
+			div_u64(phba->ktime_seg6_total,
+				phba->ktime_status_samples),
 			phba->ktime_seg6_min,
 			phba->ktime_seg6_max);
 	len += snprintf(buf + len, PAGE_SIZE-len,
@@ -1004,8 +1004,8 @@ lpfc_debugfs_nvmektime_data(struct lpfc_vport *vport, char *buf, int size)
 			"-to- Firmware WQ doorbell: status\n");
 	len += snprintf(buf + len, PAGE_SIZE-len,
 			"avg:%08lld min:%08lld max %08lld\n",
-			phba->ktime_seg7_total /
-			phba->ktime_status_samples,
+			div_u64(phba->ktime_seg7_total,
+				phba->ktime_status_samples),
 			phba->ktime_seg7_min,
 			phba->ktime_seg7_max);
 	len += snprintf(buf + len, PAGE_SIZE-len,
@@ -1013,8 +1013,8 @@ lpfc_debugfs_nvmektime_data(struct lpfc_vport *vport, char *buf, int size)
 			" -to- MSI-X ISR for status cmpl\n");
 	len += snprintf(buf + len, PAGE_SIZE-len,
 			"avg:%08lld min:%08lld max %08lld\n",
-			phba->ktime_seg8_total /
-			phba->ktime_status_samples,
+			div_u64(phba->ktime_seg8_total,
+				phba->ktime_status_samples),
 			phba->ktime_seg8_min,
 			phba->ktime_seg8_max);
 	len += snprintf(buf + len, PAGE_SIZE-len,
@@ -1022,8 +1022,8 @@ lpfc_debugfs_nvmektime_data(struct lpfc_vport *vport, char *buf, int size)
 			"-to- NVME layer passed status done\n");
 	len += snprintf(buf + len, PAGE_SIZE-len,
 			"avg:%08lld min:%08lld max %08lld\n",
-			phba->ktime_seg9_total /
-			phba->ktime_status_samples,
+			div_u64(phba->ktime_seg9_total,
+				phba->ktime_status_samples),
 			phba->ktime_seg9_min,
 			phba->ktime_seg9_max);
 	len += snprintf(buf + len, PAGE_SIZE-len,
@@ -1031,8 +1031,8 @@ lpfc_debugfs_nvmektime_data(struct lpfc_vport *vport, char *buf, int size)
 			"cmd completed on wire\n");
 	len += snprintf(buf + len, PAGE_SIZE-len,
 			"avg:%08lld min:%08lld max %08lld\n",
-			phba->ktime_seg10_total /
-			phba->ktime_status_samples,
+			div_u64(phba->ktime_seg10_total,
+				phba->ktime_status_samples),
 			phba->ktime_seg10_min,
 			phba->ktime_seg10_max);
 	return len;

From 825c6abbc316f496cd2b66e1fa72892cf4b49a9f Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 27 Feb 2017 21:37:12 +0100
Subject: [PATCH 0281/1911] scsi: lpfc: use proper format string for dma_addr_t

dma_addr_t may be either u32 or u64, depending on the kernel configuration,
and we get a warning for the 32-bit case:

drivers/scsi/lpfc/lpfc_nvme.c: In function 'lpfc_nvme_ls_req':
drivers/scsi/lpfc/lpfc_logmsg.h:52:52: error: format '%llu' expects argument of type 'long long unsigned int', but argument 11 has type 'dma_addr_t {aka unsigned int}' [-Werror=format=]
drivers/scsi/lpfc/lpfc_logmsg.h:52:52: error: format '%llu' expects argument of type 'long long unsigned int', but argument 12 has type 'dma_addr_t {aka unsigned int}' [-Werror=format=]
drivers/scsi/lpfc/lpfc_nvme.c: In function 'lpfc_nvme_ls_abort':
drivers/scsi/lpfc/lpfc_logmsg.h:52:52: error: format '%llu' expects argument of type 'long long unsigned int', but argument 11 has type 'dma_addr_t {aka unsigned int}' [-Werror=format=]
drivers/scsi/lpfc/lpfc_logmsg.h:52:52: error: format '%llu' expects argument of type 'long long unsigned int', but argument 12 has type 'dma_addr_t {aka unsigned int}' [-Werror=format=]

printk has a special "%pad" format string that passes the dma address by
reference to solve this problem.

Fixes: 01649561a8b4 ("scsi: lpfc: NVME Initiator: bind to nvme_fc api")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_nvme.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c
index 625b6589a34deb..609a908ea9db5b 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.c
+++ b/drivers/scsi/lpfc/lpfc_nvme.c
@@ -457,11 +457,11 @@ lpfc_nvme_ls_req(struct nvme_fc_local_port *pnvme_lport,
 	/* Expand print to include key fields. */
 	lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC,
 			 "6051 ENTER.  lport %p, rport %p lsreq%p rqstlen:%d "
-			 "rsplen:%d %llux %llux\n",
+			 "rsplen:%d %pad %pad\n",
 			 pnvme_lport, pnvme_rport,
 			 pnvme_lsreq, pnvme_lsreq->rqstlen,
-			 pnvme_lsreq->rsplen, pnvme_lsreq->rqstdma,
-			 pnvme_lsreq->rspdma);
+			 pnvme_lsreq->rsplen, &pnvme_lsreq->rqstdma,
+			 &pnvme_lsreq->rspdma);
 
 	vport->phba->fc4NvmeLsRequests++;
 
@@ -527,11 +527,11 @@ lpfc_nvme_ls_abort(struct nvme_fc_local_port *pnvme_lport,
 	/* Expand print to include key fields. */
 	lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_ABTS,
 			 "6040 ENTER.  lport %p, rport %p lsreq %p rqstlen:%d "
-			 "rsplen:%d %llux %llux\n",
+			 "rsplen:%d %pad %pad\n",
 			 pnvme_lport, pnvme_rport,
 			 pnvme_lsreq, pnvme_lsreq->rqstlen,
-			 pnvme_lsreq->rsplen, pnvme_lsreq->rqstdma,
-			 pnvme_lsreq->rspdma);
+			 pnvme_lsreq->rsplen, &pnvme_lsreq->rqstdma,
+			 &pnvme_lsreq->rspdma);
 
 	/*
 	 * Lock the ELS ring txcmplq and build a local list of all ELS IOs

From fbdab3e7fd547e1ce558db1521659707bdf02cc6 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 24 Feb 2017 14:43:30 +0000
Subject: [PATCH 0282/1911] scsi: aacraid: remove redundant zero check on ret

The check for ret being zero is redundant as a few statements earlier we
break out of the while loop if ret is non-zero.  Thus we can remove the
zero check and also the dead-code non-zero case too.

Detected by CoverityScan, CID#1411632 ("Logically Dead Code")

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Acked-by: Dave Carroll <david.carroll@microsemi.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/aacraid/commsup.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c
index 1994c7445b54fd..a3ad042934870d 100644
--- a/drivers/scsi/aacraid/commsup.c
+++ b/drivers/scsi/aacraid/commsup.c
@@ -2508,8 +2508,7 @@ int aac_command_thread(void *data)
 			 && (now.tv_usec > (1000000 / HZ)))
 				difference = (((1000000 - now.tv_usec) * HZ)
 				  + 500000) / 1000000;
-			else if (ret == 0) {
-
+			else {
 				if (now.tv_usec > 500000)
 					++now.tv_sec;
 
@@ -2520,9 +2519,6 @@ int aac_command_thread(void *data)
 					ret = aac_send_hosttime(dev, &now);
 
 				difference = (long)(unsigned)update_interval*HZ;
-			} else {
-				/* retry shortly */
-				difference = 10 * HZ;
 			}
 			next_jiffies = jiffies + difference;
 			if (time_before(next_check_jiffies,next_jiffies))

From 42a70abddd90374518057e989f5b7289b7b535d8 Mon Sep 17 00:00:00 2001
From: "Dupuis, Chad" <chad.dupuis@cavium.com>
Date: Fri, 24 Feb 2017 00:31:56 -0800
Subject: [PATCH 0283/1911] scsi: qedi: Fix memory leak in tmf response
 processing.

Signed-off-by: Manish Rangankar <manish.rangankar@cavium.com>
Signed-off-by: Chad Dupuis <chad.dupuis@cavium.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qedi/qedi_fw.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/qedi/qedi_fw.c b/drivers/scsi/qedi/qedi_fw.c
index b1d3904ae8fd84..c9f0ef4e11b33c 100644
--- a/drivers/scsi/qedi/qedi_fw.c
+++ b/drivers/scsi/qedi/qedi_fw.c
@@ -165,10 +165,9 @@ static void qedi_tmf_resp_work(struct work_struct *work)
 	iscsi_block_session(session->cls_session);
 	rval = qedi_cleanup_all_io(qedi, qedi_conn, qedi_cmd->task, true);
 	if (rval) {
-		clear_bit(QEDI_CONN_FW_CLEANUP, &qedi_conn->flags);
 		qedi_clear_task_idx(qedi, qedi_cmd->task_id);
 		iscsi_unblock_session(session->cls_session);
-		return;
+		goto exit_tmf_resp;
 	}
 
 	iscsi_unblock_session(session->cls_session);
@@ -177,6 +176,8 @@ static void qedi_tmf_resp_work(struct work_struct *work)
 	spin_lock(&session->back_lock);
 	__iscsi_complete_pdu(conn, (struct iscsi_hdr *)resp_hdr_ptr, NULL, 0);
 	spin_unlock(&session->back_lock);
+
+exit_tmf_resp:
 	kfree(resp_hdr_ptr);
 	clear_bit(QEDI_CONN_FW_CLEANUP, &qedi_conn->flags);
 }

From 19d19e960598161be92a7e4828eb7706c6410ce6 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 27 Feb 2017 09:38:11 +0100
Subject: [PATCH 0284/1911] mac80211: use driver-indicated transmitter STA only
 for data frames
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When I originally introduced using the driver-indicated station as an
optimisation to avoid the hashtable lookup/iteration, of course it
wasn't intended to really functionally change anything.

I neglected, however, to take into account VLAN interfaces, which have
the property that management and data frames are handled differently:
data frames go directly to the station and the VLAN while management
frames continue to be processed over the underlying/associated AP-type
interface. As a consequence, when a driver used this optimisation for
management frames and the user enabled VLANs, my change broke things
since any management frames, particularly disassoc/deauth, were missed
by hostapd.

Fix this by restoring the original code path for non-data frames, they
aren't critical for performance to begin with.

This fixes https://bugzilla.kernel.org/show_bug.cgi?id=194713.

Big thanks goes to Jarek who bisected the issue and provided a very
detailed bug report, including the crucial information that he was
using VLANs in his configuration.

Cc: stable@vger.kernel.org
Fixes: 771e846bea9e ("mac80211: allow passing transmitter station on RX")
Reported-and-tested-by: Jarek Kamiński <jarek@freeside.be>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/rx.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 28cc494a774d0e..e48724a6725e32 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -4086,15 +4086,17 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
 		     ieee80211_is_beacon(hdr->frame_control)))
 		ieee80211_scan_rx(local, skb);
 
-	if (pubsta) {
-		rx.sta = container_of(pubsta, struct sta_info, sta);
-		rx.sdata = rx.sta->sdata;
-		if (ieee80211_prepare_and_rx_handle(&rx, skb, true))
-			return;
-		goto out;
-	} else if (ieee80211_is_data(fc)) {
+	if (ieee80211_is_data(fc)) {
 		struct sta_info *sta, *prev_sta;
 
+		if (pubsta) {
+			rx.sta = container_of(pubsta, struct sta_info, sta);
+			rx.sdata = rx.sta->sdata;
+			if (ieee80211_prepare_and_rx_handle(&rx, skb, true))
+				return;
+			goto out;
+		}
+
 		prev_sta = NULL;
 
 		for_each_sta_info(local, hdr->addr2, sta, tmp) {

From 3b30460c5b0ed762be75a004e924ec3f8711e032 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 27 Feb 2017 15:30:56 +0000
Subject: [PATCH 0285/1911] crypto: ccm - move cbcmac input off the stack

Commit f15f05b0a5de ("crypto: ccm - switch to separate cbcmac driver")
refactored the CCM driver to allow separate implementations of the
underlying MAC to be provided by a platform. However, in doing so, it
moved some data from the linear region to the stack, which violates the
SG constraints when the stack is virtually mapped.

So move idata/odata back to the request ctx struct, of which we can
reasonably expect that it has been allocated using kmalloc() et al.

Reported-by: Johannes Berg <johannes@sipsolutions.net>
Fixes: f15f05b0a5de ("crypto: ccm - switch to separate cbcmac driver")
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/ccm.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/crypto/ccm.c b/crypto/ccm.c
index 442848807a52b1..1ce37ae0ce565a 100644
--- a/crypto/ccm.c
+++ b/crypto/ccm.c
@@ -45,6 +45,7 @@ struct crypto_rfc4309_req_ctx {
 
 struct crypto_ccm_req_priv_ctx {
 	u8 odata[16];
+	u8 idata[16];
 	u8 auth_tag[16];
 	u32 flags;
 	struct scatterlist src[3];
@@ -183,8 +184,8 @@ static int crypto_ccm_auth(struct aead_request *req, struct scatterlist *plain,
 	AHASH_REQUEST_ON_STACK(ahreq, ctx->mac);
 	unsigned int assoclen = req->assoclen;
 	struct scatterlist sg[3];
-	u8 odata[16];
-	u8 idata[16];
+	u8 *odata = pctx->odata;
+	u8 *idata = pctx->idata;
 	int ilen, err;
 
 	/* format control data for input */

From d0a0b78de4a641ff0924687bfd5a9698f1a97f7a Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 20 Feb 2017 13:50:30 +0200
Subject: [PATCH 0286/1911] btrfs: Make btrfs_log_all_parents take btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/tree-log.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 3806853cde08d8..cf45b264cff3d8 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -5287,15 +5287,15 @@ static int log_new_dir_dentries(struct btrfs_trans_handle *trans,
 }
 
 static int btrfs_log_all_parents(struct btrfs_trans_handle *trans,
-				 struct inode *inode,
+				 struct btrfs_inode *inode,
 				 struct btrfs_log_ctx *ctx)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
 	int ret;
 	struct btrfs_path *path;
 	struct btrfs_key key;
-	struct btrfs_root *root = BTRFS_I(inode)->root;
-	const u64 ino = btrfs_ino(BTRFS_I(inode));
+	struct btrfs_root *root = inode->root;
+	const u64 ino = btrfs_ino(inode);
 
 	path = btrfs_alloc_path();
 	if (!path)
@@ -5506,7 +5506,7 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
 	 * and has a link count of 2.
 	 */
 	if (BTRFS_I(inode)->last_unlink_trans > last_committed) {
-		ret = btrfs_log_all_parents(trans, orig_inode, ctx);
+		ret = btrfs_log_all_parents(trans, BTRFS_I(orig_inode), ctx);
 		if (ret)
 			goto end_trans;
 	}

From 8e7611cf38765f1bf1324ed1190f1f8e76ab9546 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 20 Feb 2017 13:50:31 +0200
Subject: [PATCH 0287/1911] btrfs: Make btrfs_insert_dir_item take btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h       | 2 +-
 fs/btrfs/dir-item.c    | 6 +++---
 fs/btrfs/inode.c       | 2 +-
 fs/btrfs/ioctl.c       | 2 +-
 fs/btrfs/transaction.c | 2 +-
 5 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index ad23a73ac7e7f5..97f84a80b47952 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2982,7 +2982,7 @@ int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir,
 			  const char *name, int name_len);
 int btrfs_insert_dir_item(struct btrfs_trans_handle *trans,
 			  struct btrfs_root *root, const char *name,
-			  int name_len, struct inode *dir,
+			  int name_len, struct btrfs_inode *dir,
 			  struct btrfs_key *location, u8 type, u64 index);
 struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
 					     struct btrfs_root *root,
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index 724504a2d7ac56..b13d9536d4de8c 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -120,7 +120,7 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
  */
 int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root
 			  *root, const char *name, int name_len,
-			  struct inode *dir, struct btrfs_key *location,
+			  struct btrfs_inode *dir, struct btrfs_key *location,
 			  u8 type, u64 index)
 {
 	int ret = 0;
@@ -133,7 +133,7 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root
 	struct btrfs_disk_key disk_key;
 	u32 data_size;
 
-	key.objectid = btrfs_ino(BTRFS_I(dir));
+	key.objectid = btrfs_ino(dir);
 	key.type = BTRFS_DIR_ITEM_KEY;
 	key.offset = btrfs_name_hash(name, name_len);
 
@@ -174,7 +174,7 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root
 	btrfs_release_path(path);
 
 	ret2 = btrfs_insert_delayed_dir_index(trans, root->fs_info, name,
-			name_len, BTRFS_I(dir), &disk_key, type, index);
+			name_len, dir, &disk_key, type, index);
 out_free:
 	btrfs_free_path(path);
 	if (ret)
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index c38391e948d97e..a23391b52ab17e 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6276,7 +6276,7 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
 		return ret;
 
 	ret = btrfs_insert_dir_item(trans, root, name, name_len,
-				    parent_inode, &key,
+				    BTRFS_I(parent_inode), &key,
 				    btrfs_inode_type(inode), index);
 	if (ret == -EEXIST || ret == -EOVERFLOW)
 		goto fail_dir_item;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index d8539979b44ff2..a55361d9554d8e 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -587,7 +587,7 @@ static noinline int create_subvol(struct inode *dir,
 	}
 
 	ret = btrfs_insert_dir_item(trans, root,
-				    name, namelen, dir, &key,
+				    name, namelen, BTRFS_I(dir), &key,
 				    BTRFS_FT_DIR, index);
 	if (ret) {
 		btrfs_abort_transaction(trans, ret);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 6b3e0fc2fe7ac2..294563216dd3a3 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1644,7 +1644,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
 
 	ret = btrfs_insert_dir_item(trans, parent_root,
 				    dentry->d_name.name, dentry->d_name.len,
-				    parent_inode, &key,
+				    BTRFS_I(parent_inode), &key,
 				    BTRFS_FT_DIR, index);
 	/* We have check then name at the beginning, so it is impossible. */
 	BUG_ON(ret == -EEXIST || ret == -EOVERFLOW);

From 4c570655f4f95609e9f682a36647f84b54b8a721 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 20 Feb 2017 13:50:32 +0200
Subject: [PATCH 0288/1911] btrfs: make btrfs_set_inode_index_count take
 btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index a23391b52ab17e..226b5a9fdf0df4 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -5978,15 +5978,15 @@ static int btrfs_update_time(struct inode *inode, struct timespec *now,
  * and then set the in-memory index_cnt variable to reflect
  * free sequence numbers
  */
-static int btrfs_set_inode_index_count(struct inode *inode)
+static int btrfs_set_inode_index_count(struct btrfs_inode *inode)
 {
-	struct btrfs_root *root = BTRFS_I(inode)->root;
+	struct btrfs_root *root = inode->root;
 	struct btrfs_key key, found_key;
 	struct btrfs_path *path;
 	struct extent_buffer *leaf;
 	int ret;
 
-	key.objectid = btrfs_ino(BTRFS_I(inode));
+	key.objectid = btrfs_ino(inode);
 	key.type = BTRFS_DIR_INDEX_KEY;
 	key.offset = (u64)-1;
 
@@ -6009,7 +6009,7 @@ static int btrfs_set_inode_index_count(struct inode *inode)
 	 * else has to start at 2
 	 */
 	if (path->slots[0] == 0) {
-		BTRFS_I(inode)->index_cnt = 2;
+		inode->index_cnt = 2;
 		goto out;
 	}
 
@@ -6018,13 +6018,13 @@ static int btrfs_set_inode_index_count(struct inode *inode)
 	leaf = path->nodes[0];
 	btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
 
-	if (found_key.objectid != btrfs_ino(BTRFS_I(inode)) ||
+	if (found_key.objectid != btrfs_ino(inode) ||
 	    found_key.type != BTRFS_DIR_INDEX_KEY) {
-		BTRFS_I(inode)->index_cnt = 2;
+		inode->index_cnt = 2;
 		goto out;
 	}
 
-	BTRFS_I(inode)->index_cnt = found_key.offset + 1;
+	inode->index_cnt = found_key.offset + 1;
 out:
 	btrfs_free_path(path);
 	return ret;
@@ -6041,7 +6041,7 @@ int btrfs_set_inode_index(struct inode *dir, u64 *index)
 	if (BTRFS_I(dir)->index_cnt == (u64)-1) {
 		ret = btrfs_inode_delayed_dir_index_count(BTRFS_I(dir));
 		if (ret) {
-			ret = btrfs_set_inode_index_count(dir);
+			ret = btrfs_set_inode_index_count(BTRFS_I(dir));
 			if (ret)
 				return ret;
 		}

From 877574e2548bbfd792b0b1200d4b46eef54c05f5 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 20 Feb 2017 13:50:33 +0200
Subject: [PATCH 0289/1911] btrfs: Make btrfs_set_inode_index take btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c       | 22 +++++++++++-----------
 fs/btrfs/ioctl.c       |  2 +-
 fs/btrfs/transaction.c |  2 +-
 3 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 226b5a9fdf0df4..d4ba1c4235dea5 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6034,21 +6034,21 @@ static int btrfs_set_inode_index_count(struct btrfs_inode *inode)
  * helper to find a free sequence number in a given directory.  This current
  * code is very simple, later versions will do smarter things in the btree
  */
-int btrfs_set_inode_index(struct inode *dir, u64 *index)
+int btrfs_set_inode_index(struct btrfs_inode *dir, u64 *index)
 {
 	int ret = 0;
 
-	if (BTRFS_I(dir)->index_cnt == (u64)-1) {
-		ret = btrfs_inode_delayed_dir_index_count(BTRFS_I(dir));
+	if (dir->index_cnt == (u64)-1) {
+		ret = btrfs_inode_delayed_dir_index_count(dir);
 		if (ret) {
-			ret = btrfs_set_inode_index_count(BTRFS_I(dir));
+			ret = btrfs_set_inode_index_count(dir);
 			if (ret)
 				return ret;
 		}
 	}
 
-	*index = BTRFS_I(dir)->index_cnt;
-	BTRFS_I(dir)->index_cnt++;
+	*index = dir->index_cnt;
+	dir->index_cnt++;
 
 	return ret;
 }
@@ -6109,7 +6109,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
 	if (dir && name) {
 		trace_btrfs_inode_request(dir);
 
-		ret = btrfs_set_inode_index(dir, index);
+		ret = btrfs_set_inode_index(BTRFS_I(dir), index);
 		if (ret) {
 			btrfs_free_path(path);
 			iput(inode);
@@ -6490,7 +6490,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
 	if (inode->i_nlink >= BTRFS_LINK_MAX)
 		return -EMLINK;
 
-	err = btrfs_set_inode_index(dir, &index);
+	err = btrfs_set_inode_index(BTRFS_I(dir), &index);
 	if (err)
 		goto fail;
 
@@ -9480,10 +9480,10 @@ static int btrfs_rename_exchange(struct inode *old_dir,
 	 * We need to find a free sequence number both in the source and
 	 * in the destination directory for the exchange.
 	 */
-	ret = btrfs_set_inode_index(new_dir, &old_idx);
+	ret = btrfs_set_inode_index(BTRFS_I(new_dir), &old_idx);
 	if (ret)
 		goto out_fail;
-	ret = btrfs_set_inode_index(old_dir, &new_idx);
+	ret = btrfs_set_inode_index(BTRFS_I(old_dir), &new_idx);
 	if (ret)
 		goto out_fail;
 
@@ -9791,7 +9791,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	if (dest != root)
 		btrfs_record_root_in_trans(trans, dest);
 
-	ret = btrfs_set_inode_index(new_dir, &index);
+	ret = btrfs_set_inode_index(BTRFS_I(new_dir), &index);
 	if (ret)
 		goto out_fail;
 
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index a55361d9554d8e..628d1b180ceef4 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -580,7 +580,7 @@ static noinline int create_subvol(struct inode *dir,
 	/*
 	 * insert the directory item
 	 */
-	ret = btrfs_set_inode_index(dir, &index);
+	ret = btrfs_set_inode_index(BTRFS_I(dir), &index);
 	if (ret) {
 		btrfs_abort_transaction(trans, ret);
 		goto fail;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 294563216dd3a3..05c2bbff2a2814 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1505,7 +1505,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
 	/*
 	 * insert the directory item
 	 */
-	ret = btrfs_set_inode_index(parent_inode, &index);
+	ret = btrfs_set_inode_index(BTRFS_I(parent_inode), &index);
 	BUG_ON(ret); /* -ENOMEM */
 
 	/* check if there is a file/dir which has the same name. */

From 6ef06d27903d9c15505dc1a3ccf424f5018562f7 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 20 Feb 2017 13:50:34 +0200
Subject: [PATCH 0290/1911] btrfs: Make btrfs_i_size_write take btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/btrfs_inode.h      |  6 +++---
 fs/btrfs/delayed-inode.c    |  2 +-
 fs/btrfs/free-space-cache.c |  2 +-
 fs/btrfs/inode.c            | 19 +++++++++----------
 fs/btrfs/ioctl.c            |  4 ++--
 fs/btrfs/transaction.c      |  2 +-
 fs/btrfs/tree-log.c         |  2 +-
 7 files changed, 18 insertions(+), 19 deletions(-)

diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 819a6d27218a90..46d117b77bd286 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -237,10 +237,10 @@ static inline u64 btrfs_ino(struct btrfs_inode *inode)
 	return ino;
 }
 
-static inline void btrfs_i_size_write(struct inode *inode, u64 size)
+static inline void btrfs_i_size_write(struct btrfs_inode *inode, u64 size)
 {
-	i_size_write(inode, size);
-	BTRFS_I(inode)->disk_i_size = size;
+	i_size_write(&inode->vfs_inode, size);
+	inode->disk_i_size = size;
 }
 
 static inline bool btrfs_is_free_space_inode(struct inode *inode)
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index f7a6ee5ccc809a..1aff676f0e5b5b 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1790,7 +1790,7 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev)
 
 	i_uid_write(inode, btrfs_stack_inode_uid(inode_item));
 	i_gid_write(inode, btrfs_stack_inode_gid(inode_item));
-	btrfs_i_size_write(inode, btrfs_stack_inode_size(inode_item));
+	btrfs_i_size_write(BTRFS_I(inode), btrfs_stack_inode_size(inode_item));
 	inode->i_mode = btrfs_stack_inode_mode(inode_item);
 	set_nlink(inode, btrfs_stack_inode_nlink(inode_item));
 	inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item));
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 1a131f7d6c1bed..c0f313cbbbf2c2 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -260,7 +260,7 @@ int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans,
 		btrfs_free_path(path);
 	}
 
-	btrfs_i_size_write(inode, 0);
+	btrfs_i_size_write(BTRFS_I(inode), 0);
 	truncate_pagecache(inode, 0);
 
 	/*
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index d4ba1c4235dea5..4e9407a014d9a4 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3617,7 +3617,7 @@ static int btrfs_read_locked_inode(struct inode *inode)
 	set_nlink(inode, btrfs_inode_nlink(leaf, inode_item));
 	i_uid_write(inode, btrfs_inode_uid(leaf, inode_item));
 	i_gid_write(inode, btrfs_inode_gid(leaf, inode_item));
-	btrfs_i_size_write(inode, btrfs_inode_size(leaf, inode_item));
+	btrfs_i_size_write(BTRFS_I(inode), btrfs_inode_size(leaf, inode_item));
 
 	inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->atime);
 	inode->i_atime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->atime);
@@ -3988,8 +3988,7 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
 	if (ret)
 		goto out;
 
-	btrfs_i_size_write(&dir->vfs_inode,
-			dir->vfs_inode.i_size - name_len * 2);
+	btrfs_i_size_write(dir, dir->vfs_inode.i_size - name_len * 2);
 	inode_inc_iversion(&inode->vfs_inode);
 	inode_inc_iversion(&dir->vfs_inode);
 	inode->vfs_inode.i_ctime = dir->vfs_inode.i_mtime =
@@ -4137,7 +4136,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
 		goto out;
 	}
 
-	btrfs_i_size_write(dir, dir->i_size - name_len * 2);
+	btrfs_i_size_write(BTRFS_I(dir), dir->i_size - name_len * 2);
 	inode_inc_iversion(dir);
 	dir->i_mtime = dir->i_ctime = current_time(dir);
 	ret = btrfs_update_inode_fallback(trans, root, dir);
@@ -4184,7 +4183,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
 			BTRFS_I(d_inode(dentry)), dentry->d_name.name,
 			dentry->d_name.len);
 	if (!err) {
-		btrfs_i_size_write(inode, 0);
+		btrfs_i_size_write(BTRFS_I(inode), 0);
 		/*
 		 * Propagate the last_unlink_trans value of the deleted dir to
 		 * its parent directory. This is to prevent an unrecoverable
@@ -5221,7 +5220,7 @@ void btrfs_evict_inode(struct inode *inode)
 	rsv->failfast = 1;
 	global_rsv = &fs_info->global_block_rsv;
 
-	btrfs_i_size_write(inode, 0);
+	btrfs_i_size_write(BTRFS_I(inode), 0);
 
 	/*
 	 * This is a bit simpler than btrfs_truncate since we've already
@@ -6285,7 +6284,7 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
 		return ret;
 	}
 
-	btrfs_i_size_write(parent_inode, parent_inode->i_size +
+	btrfs_i_size_write(BTRFS_I(parent_inode), parent_inode->i_size +
 			   name_len * 2);
 	inode_inc_iversion(parent_inode);
 	parent_inode->i_mtime = parent_inode->i_ctime =
@@ -6589,7 +6588,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 	if (err)
 		goto out_fail_inode;
 
-	btrfs_i_size_write(inode, 0);
+	btrfs_i_size_write(BTRFS_I(inode), 0);
 	err = btrfs_update_inode(trans, root, inode);
 	if (err)
 		goto out_fail_inode;
@@ -9205,7 +9204,7 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
 	inode->i_fop = &btrfs_dir_file_operations;
 
 	set_nlink(inode, 1);
-	btrfs_i_size_write(inode, 0);
+	btrfs_i_size_write(BTRFS_I(inode), 0);
 	unlock_new_inode(inode);
 
 	err = btrfs_subvol_inherit_props(trans, new_root, parent_root);
@@ -10232,7 +10231,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
 	inode_nohighmem(inode);
 	inode->i_mapping->a_ops = &btrfs_symlink_aops;
 	inode_set_bytes(inode, name_len);
-	btrfs_i_size_write(inode, name_len);
+	btrfs_i_size_write(BTRFS_I(inode), name_len);
 	err = btrfs_update_inode(trans, root, inode);
 	/*
 	 * Last step, add directory indexes for our symlink inode. This is the
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 628d1b180ceef4..86f993c958bae7 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -594,7 +594,7 @@ static noinline int create_subvol(struct inode *dir,
 		goto fail;
 	}
 
-	btrfs_i_size_write(dir, dir->i_size + namelen * 2);
+	btrfs_i_size_write(BTRFS_I(dir), dir->i_size + namelen * 2);
 	ret = btrfs_update_inode(trans, root, dir);
 	BUG_ON(ret);
 
@@ -3298,7 +3298,7 @@ static int clone_finish_inode_update(struct btrfs_trans_handle *trans,
 	if (endoff > destoff + olen)
 		endoff = destoff + olen;
 	if (endoff > inode->i_size)
-		btrfs_i_size_write(inode, endoff);
+		btrfs_i_size_write(BTRFS_I(inode), endoff);
 
 	ret = btrfs_update_inode(trans, root, inode);
 	if (ret) {
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 05c2bbff2a2814..61b807de3e164e 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1653,7 +1653,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
 		goto fail;
 	}
 
-	btrfs_i_size_write(parent_inode, parent_inode->i_size +
+	btrfs_i_size_write(BTRFS_I(parent_inode), parent_inode->i_size +
 					 dentry->d_name.len * 2);
 	parent_inode->i_mtime = parent_inode->i_ctime =
 		current_time(parent_inode);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index cf45b264cff3d8..071f6494471194 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -1780,7 +1780,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
 out:
 	btrfs_release_path(path);
 	if (!ret && update_size) {
-		btrfs_i_size_write(dir, dir->i_size + name_len * 2);
+		btrfs_i_size_write(BTRFS_I(dir), dir->i_size + name_len * 2);
 		ret = btrfs_update_inode(trans, root, dir);
 	}
 	kfree(name);

From 70ddc553b5522b96e65a162be1cecba532630841 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 20 Feb 2017 13:50:35 +0200
Subject: [PATCH 0291/1911] btrfs: make btrfs_is_free_space_inode take
 btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/btrfs_inode.h |  8 ++++----
 fs/btrfs/ctree.h       |  2 +-
 fs/btrfs/extent-tree.c |  4 ++--
 fs/btrfs/file-item.c   |  2 +-
 fs/btrfs/inode.c       | 23 ++++++++++++-----------
 5 files changed, 20 insertions(+), 19 deletions(-)

diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 46d117b77bd286..36eca5464e1b3b 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -243,14 +243,14 @@ static inline void btrfs_i_size_write(struct btrfs_inode *inode, u64 size)
 	inode->disk_i_size = size;
 }
 
-static inline bool btrfs_is_free_space_inode(struct inode *inode)
+static inline bool btrfs_is_free_space_inode(struct btrfs_inode *inode)
 {
-	struct btrfs_root *root = BTRFS_I(inode)->root;
+	struct btrfs_root *root = inode->root;
 
 	if (root == root->fs_info->tree_root &&
-	    btrfs_ino(BTRFS_I(inode)) != BTRFS_BTREE_INODE_OBJECTID)
+	    btrfs_ino(inode) != BTRFS_BTREE_INODE_OBJECTID)
 		return true;
-	if (BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID)
+	if (inode->location.objectid == BTRFS_FREE_INO_OBJECTID)
 		return true;
 	return false;
 }
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 97f84a80b47952..8ab0ce65a21883 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3123,7 +3123,7 @@ static inline void btrfs_force_ra(struct address_space *mapping,
 }
 
 struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry);
-int btrfs_set_inode_index(struct inode *dir, u64 *index);
+int btrfs_set_inode_index(struct btrfs_inode *dir, u64 *index);
 int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
 		       struct btrfs_root *root,
 		       struct btrfs_inode *dir, struct btrfs_inode *inode,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index c35b966335543c..b8a172e6561989 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4148,7 +4148,7 @@ int btrfs_alloc_data_chunk_ondemand(struct inode *inode, u64 bytes)
 	/* make sure bytes are sectorsize aligned */
 	bytes = ALIGN(bytes, fs_info->sectorsize);
 
-	if (btrfs_is_free_space_inode(inode)) {
+	if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
 		need_commit = 0;
 		ASSERT(current->journal_info);
 	}
@@ -5947,7 +5947,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
 	 * If we have a transaction open (can happen if we call truncate_block
 	 * from truncate), then we need FLUSH_LIMIT so we don't deadlock.
 	 */
-	if (btrfs_is_free_space_inode(inode)) {
+	if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
 		flush = BTRFS_RESERVE_NO_FLUSH;
 		delalloc_lock = false;
 	} else if (current->journal_info) {
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index f7b9a92ad56d17..a8a0dd21708489 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -214,7 +214,7 @@ static int __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
 	 * read from the commit root and sidestep a nasty deadlock
 	 * between reading the free space cache and updating the csum tree.
 	 */
-	if (btrfs_is_free_space_inode(inode)) {
+	if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
 		path->search_commit_root = 1;
 		path->skip_locking = 1;
 	}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 4e9407a014d9a4..a465a927395e89 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -933,7 +933,7 @@ static noinline int cow_file_range(struct inode *inode,
 	struct extent_map *em;
 	int ret = 0;
 
-	if (btrfs_is_free_space_inode(inode)) {
+	if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
 		WARN_ON_ONCE(1);
 		ret = -EINVAL;
 		goto out_unlock;
@@ -1231,7 +1231,7 @@ static noinline int run_delalloc_nocow(struct inode *inode,
 		return -ENOMEM;
 	}
 
-	nolock = btrfs_is_free_space_inode(inode);
+	nolock = btrfs_is_free_space_inode(BTRFS_I(inode));
 
 	cow_start = (u64)-1;
 	cur_offset = start;
@@ -1670,7 +1670,7 @@ static void btrfs_set_bit_hook(struct inode *inode,
 	if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
 		struct btrfs_root *root = BTRFS_I(inode)->root;
 		u64 len = state->end + 1 - state->start;
-		bool do_list = !btrfs_is_free_space_inode(inode);
+		bool do_list = !btrfs_is_free_space_inode(BTRFS_I(inode));
 
 		if (*bits & EXTENT_FIRST_DELALLOC) {
 			*bits &= ~EXTENT_FIRST_DELALLOC;
@@ -1720,7 +1720,7 @@ static void btrfs_clear_bit_hook(struct inode *inode,
 	 */
 	if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
 		struct btrfs_root *root = BTRFS_I(inode)->root;
-		bool do_list = !btrfs_is_free_space_inode(inode);
+		bool do_list = !btrfs_is_free_space_inode(BTRFS_I(inode));
 
 		if (*bits & EXTENT_FIRST_DELALLOC) {
 			*bits &= ~EXTENT_FIRST_DELALLOC;
@@ -1854,7 +1854,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, struct bio *bio,
 
 	skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
 
-	if (btrfs_is_free_space_inode(inode))
+	if (btrfs_is_free_space_inode(BTRFS_I(inode)))
 		metadata = BTRFS_WQ_ENDIO_FREE_SPACE;
 
 	if (bio_op(bio) != REQ_OP_WRITE) {
@@ -2793,7 +2793,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
 	bool nolock;
 	bool truncated = false;
 
-	nolock = btrfs_is_free_space_inode(inode);
+	nolock = btrfs_is_free_space_inode(BTRFS_I(inode));
 
 	if (test_bit(BTRFS_ORDERED_IOERR, &ordered_extent->flags)) {
 		ret = -EIO;
@@ -2993,7 +2993,7 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
 					    end - start + 1, uptodate))
 		return 0;
 
-	if (btrfs_is_free_space_inode(inode)) {
+	if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
 		wq = fs_info->endio_freespace_worker;
 		func = btrfs_freespace_write_helper;
 	} else {
@@ -3865,7 +3865,7 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
 	 * The data relocation inode should also be directly updated
 	 * without delay
 	 */
-	if (!btrfs_is_free_space_inode(inode)
+	if (!btrfs_is_free_space_inode(BTRFS_I(inode))
 	    && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
 	    && !test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) {
 		btrfs_update_root_times(trans, root);
@@ -4319,7 +4319,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
 	 * for non-free space inodes and ref cows, we want to back off from
 	 * time to time
 	 */
-	if (!btrfs_is_free_space_inode(inode) &&
+	if (!btrfs_is_free_space_inode(BTRFS_I(inode)) &&
 	    test_bit(BTRFS_ROOT_REF_COWS, &root->state))
 		be_nice = 1;
 
@@ -5180,7 +5180,7 @@ void btrfs_evict_inode(struct inode *inode)
 	if (inode->i_nlink &&
 	    ((btrfs_root_refs(&root->root_item) != 0 &&
 	      root->root_key.objectid != BTRFS_ROOT_TREE_OBJECTID) ||
-	     btrfs_is_free_space_inode(inode)))
+	     btrfs_is_free_space_inode(BTRFS_I(inode))))
 		goto no_delete;
 
 	if (is_bad_inode(inode)) {
@@ -5897,7 +5897,8 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
 	if (test_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags))
 		return 0;
 
-	if (btrfs_fs_closing(root->fs_info) && btrfs_is_free_space_inode(inode))
+	if (btrfs_fs_closing(root->fs_info) &&
+			btrfs_is_free_space_inode(BTRFS_I(inode)))
 		nolock = true;
 
 	if (wbc->sync_mode == WB_SYNC_ALL) {

From 04f4f916531adc7d2ca6fdb16a68b6f2ff2a8a3b Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 20 Feb 2017 13:50:36 +0200
Subject: [PATCH 0292/1911] btrfs: make btrfs_alloc_data_chunk_ondemand take
 btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h       | 2 +-
 fs/btrfs/extent-tree.c | 8 ++++----
 fs/btrfs/file.c        | 3 ++-
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 8ab0ce65a21883..76e661d008223a 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2687,7 +2687,7 @@ enum btrfs_flush_state {
 };
 
 int btrfs_check_data_free_space(struct inode *inode, u64 start, u64 len);
-int btrfs_alloc_data_chunk_ondemand(struct inode *inode, u64 bytes);
+int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes);
 void btrfs_free_reserved_data_space(struct inode *inode, u64 start, u64 len);
 void btrfs_free_reserved_data_space_noquota(struct inode *inode, u64 start,
 					    u64 len);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index b8a172e6561989..c17493ee2d9d1e 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4135,10 +4135,10 @@ static u64 btrfs_space_info_used(struct btrfs_space_info *s_info,
 		(may_use_included ? s_info->bytes_may_use : 0);
 }
 
-int btrfs_alloc_data_chunk_ondemand(struct inode *inode, u64 bytes)
+int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes)
 {
 	struct btrfs_space_info *data_sinfo;
-	struct btrfs_root *root = BTRFS_I(inode)->root;
+	struct btrfs_root *root = inode->root;
 	struct btrfs_fs_info *fs_info = root->fs_info;
 	u64 used;
 	int ret = 0;
@@ -4148,7 +4148,7 @@ int btrfs_alloc_data_chunk_ondemand(struct inode *inode, u64 bytes)
 	/* make sure bytes are sectorsize aligned */
 	bytes = ALIGN(bytes, fs_info->sectorsize);
 
-	if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
+	if (btrfs_is_free_space_inode(inode)) {
 		need_commit = 0;
 		ASSERT(current->journal_info);
 	}
@@ -4281,7 +4281,7 @@ int btrfs_check_data_free_space(struct inode *inode, u64 start, u64 len)
 	      round_down(start, fs_info->sectorsize);
 	start = round_down(start, fs_info->sectorsize);
 
-	ret = btrfs_alloc_data_chunk_ondemand(inode, len);
+	ret = btrfs_alloc_data_chunk_ondemand(BTRFS_I(inode), len);
 	if (ret < 0)
 		return ret;
 
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 18e5146df864b5..073bc975bbb696 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2747,7 +2747,8 @@ static long btrfs_fallocate(struct file *file, int mode,
 	 *
 	 * For qgroup space, it will be checked later.
 	 */
-	ret = btrfs_alloc_data_chunk_ondemand(inode, alloc_end - alloc_start);
+	ret = btrfs_alloc_data_chunk_ondemand(BTRFS_I(inode),
+			alloc_end - alloc_start);
 	if (ret < 0)
 		return ret;
 

From baa3ba39b91bdfa270b3f6db6fdc81a1a6b4ab34 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <n.borisov.lkml@gmail.com>
Date: Mon, 20 Feb 2017 13:50:37 +0200
Subject: [PATCH 0293/1911] btrfs: Make drop_outstanding_extent take
 btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index c17493ee2d9d1e..19c2a9b6b52b2b 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -5846,7 +5846,8 @@ void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info,
  * reserved extents that need to be freed.  This must be called with
  * BTRFS_I(inode)->lock held.
  */
-static unsigned drop_outstanding_extent(struct inode *inode, u64 num_bytes)
+static unsigned drop_outstanding_extent(struct btrfs_inode *inode,
+		u64 num_bytes)
 {
 	unsigned drop_inode_space = 0;
 	unsigned dropped_extents = 0;
@@ -5854,25 +5855,23 @@ static unsigned drop_outstanding_extent(struct inode *inode, u64 num_bytes)
 
 	num_extents = count_max_extents(num_bytes);
 	ASSERT(num_extents);
-	ASSERT(BTRFS_I(inode)->outstanding_extents >= num_extents);
-	BTRFS_I(inode)->outstanding_extents -= num_extents;
+	ASSERT(inode->outstanding_extents >= num_extents);
+	inode->outstanding_extents -= num_extents;
 
-	if (BTRFS_I(inode)->outstanding_extents == 0 &&
+	if (inode->outstanding_extents == 0 &&
 	    test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
-			       &BTRFS_I(inode)->runtime_flags))
+			       &inode->runtime_flags))
 		drop_inode_space = 1;
 
 	/*
 	 * If we have more or the same amount of outstanding extents than we have
 	 * reserved then we need to leave the reserved extents count alone.
 	 */
-	if (BTRFS_I(inode)->outstanding_extents >=
-	    BTRFS_I(inode)->reserved_extents)
+	if (inode->outstanding_extents >= inode->reserved_extents)
 		return drop_inode_space;
 
-	dropped_extents = BTRFS_I(inode)->reserved_extents -
-		BTRFS_I(inode)->outstanding_extents;
-	BTRFS_I(inode)->reserved_extents -= dropped_extents;
+	dropped_extents = inode->reserved_extents - inode->outstanding_extents;
+	inode->reserved_extents -= dropped_extents;
 	return dropped_extents + drop_inode_space;
 }
 
@@ -6015,7 +6014,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
 
 out_fail:
 	spin_lock(&BTRFS_I(inode)->lock);
-	dropped = drop_outstanding_extent(inode, num_bytes);
+	dropped = drop_outstanding_extent(BTRFS_I(inode), num_bytes);
 	/*
 	 * If the inodes csum_bytes is the same as the original
 	 * csum_bytes then we know we haven't raced with any free()ers
@@ -6094,7 +6093,7 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
 
 	num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
 	spin_lock(&BTRFS_I(inode)->lock);
-	dropped = drop_outstanding_extent(inode, num_bytes);
+	dropped = drop_outstanding_extent(BTRFS_I(inode), num_bytes);
 
 	if (num_bytes)
 		to_free = calc_csum_metadata_size(inode, num_bytes, 0);

From 0e6bf9b13caa41b34592369c47faab916233343b Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <n.borisov.lkml@gmail.com>
Date: Mon, 20 Feb 2017 13:50:38 +0200
Subject: [PATCH 0294/1911] btrfs: Make calc_csum_metadata_size take
 btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c | 27 ++++++++++++---------------
 1 file changed, 12 insertions(+), 15 deletions(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 19c2a9b6b52b2b..1fe0626852531b 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -5893,24 +5893,21 @@ static unsigned drop_outstanding_extent(struct btrfs_inode *inode,
  *
  * This must be called with BTRFS_I(inode)->lock held.
  */
-static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes,
+static u64 calc_csum_metadata_size(struct btrfs_inode *inode, u64 num_bytes,
 				   int reserve)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
 	u64 old_csums, num_csums;
 
-	if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM &&
-	    BTRFS_I(inode)->csum_bytes == 0)
+	if (inode->flags & BTRFS_INODE_NODATASUM && inode->csum_bytes == 0)
 		return 0;
 
-	old_csums = btrfs_csum_bytes_to_leaves(fs_info,
-					       BTRFS_I(inode)->csum_bytes);
+	old_csums = btrfs_csum_bytes_to_leaves(fs_info, inode->csum_bytes);
 	if (reserve)
-		BTRFS_I(inode)->csum_bytes += num_bytes;
+		inode->csum_bytes += num_bytes;
 	else
-		BTRFS_I(inode)->csum_bytes -= num_bytes;
-	num_csums = btrfs_csum_bytes_to_leaves(fs_info,
-					       BTRFS_I(inode)->csum_bytes);
+		inode->csum_bytes -= num_bytes;
+	num_csums = btrfs_csum_bytes_to_leaves(fs_info, inode->csum_bytes);
 
 	/* No change, no need to reserve more */
 	if (old_csums == num_csums)
@@ -5974,7 +5971,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
 
 	/* We always want to reserve a slot for updating the inode. */
 	to_reserve = btrfs_calc_trans_metadata_size(fs_info, nr_extents + 1);
-	to_reserve += calc_csum_metadata_size(inode, num_bytes, 1);
+	to_reserve += calc_csum_metadata_size(BTRFS_I(inode), num_bytes, 1);
 	csum_bytes = BTRFS_I(inode)->csum_bytes;
 	spin_unlock(&BTRFS_I(inode)->lock);
 
@@ -6021,7 +6018,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
 	 * so we can just reduce our inodes csum bytes and carry on.
 	 */
 	if (BTRFS_I(inode)->csum_bytes == csum_bytes) {
-		calc_csum_metadata_size(inode, num_bytes, 0);
+		calc_csum_metadata_size(BTRFS_I(inode), num_bytes, 0);
 	} else {
 		u64 orig_csum_bytes = BTRFS_I(inode)->csum_bytes;
 		u64 bytes;
@@ -6036,7 +6033,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
 		 */
 		bytes = csum_bytes - BTRFS_I(inode)->csum_bytes;
 		BTRFS_I(inode)->csum_bytes = csum_bytes;
-		to_free = calc_csum_metadata_size(inode, bytes, 0);
+		to_free = calc_csum_metadata_size(BTRFS_I(inode), bytes, 0);
 
 
 		/*
@@ -6046,7 +6043,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
 		 */
 		BTRFS_I(inode)->csum_bytes = csum_bytes - num_bytes;
 		bytes = csum_bytes - orig_csum_bytes;
-		bytes = calc_csum_metadata_size(inode, bytes, 0);
+		bytes = calc_csum_metadata_size(BTRFS_I(inode), bytes, 0);
 
 		/*
 		 * Now reset ->csum_bytes to what it should be.  If bytes is
@@ -6096,7 +6093,7 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
 	dropped = drop_outstanding_extent(BTRFS_I(inode), num_bytes);
 
 	if (num_bytes)
-		to_free = calc_csum_metadata_size(inode, num_bytes, 0);
+		to_free = calc_csum_metadata_size(BTRFS_I(inode), num_bytes, 0);
 	spin_unlock(&BTRFS_I(inode)->lock);
 	if (dropped > 0)
 		to_free += btrfs_calc_trans_metadata_size(fs_info, dropped);

From 8ed7a2a0e0732b62d925041ff04a5e9621e0e58b Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <n.borisov.lkml@gmail.com>
Date: Mon, 20 Feb 2017 13:50:39 +0200
Subject: [PATCH 0295/1911] btrfs: Make btrfs_orphan_reserve_metadata take
 btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h       |  2 +-
 fs/btrfs/extent-tree.c | 10 +++++-----
 fs/btrfs/inode.c       |  2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 76e661d008223a..52af63e1d9a0a4 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2695,7 +2695,7 @@ void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
 				  struct btrfs_fs_info *fs_info);
 void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans);
 int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
-				  struct inode *inode);
+				  struct btrfs_inode *inode);
 void btrfs_orphan_release_metadata(struct inode *inode);
 int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
 				     struct btrfs_block_rsv *rsv,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 1fe0626852531b..8156964abc10e7 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -5742,10 +5742,10 @@ void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans)
 
 /* Can only return 0 or -ENOSPC */
 int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
-				  struct inode *inode)
+				  struct btrfs_inode *inode)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-	struct btrfs_root *root = BTRFS_I(inode)->root;
+	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+	struct btrfs_root *root = inode->root;
 	/*
 	 * We always use trans->block_rsv here as we will have reserved space
 	 * for our orphan when starting the transaction, using get_block_rsv()
@@ -5762,8 +5762,8 @@ int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
 	 */
 	u64 num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
 
-	trace_btrfs_space_reservation(fs_info, "orphan",
-				      btrfs_ino(BTRFS_I(inode)), num_bytes, 1);
+	trace_btrfs_space_reservation(fs_info, "orphan", btrfs_ino(inode), 
+			num_bytes, 1);
 	return btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1);
 }
 
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index a465a927395e89..12345d4f25a53a 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3215,7 +3215,7 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
 
 	/* grab metadata reservation from transaction handle */
 	if (reserve) {
-		ret = btrfs_orphan_reserve_metadata(trans, inode);
+		ret = btrfs_orphan_reserve_metadata(trans, BTRFS_I(inode));
 		ASSERT(!ret);
 		if (ret) {
 			atomic_dec(&root->orphan_inodes);

From 703b391a0362e0ee35260dd16d7d0c7a12fef0e6 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <n.borisov.lkml@gmail.com>
Date: Mon, 20 Feb 2017 13:50:40 +0200
Subject: [PATCH 0296/1911] btrfs: Make btrfs_orphan_release_metadata take
 btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h       |  2 +-
 fs/btrfs/extent-tree.c | 10 +++++-----
 fs/btrfs/inode.c       |  4 ++--
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 52af63e1d9a0a4..8eb07a9dfa6a0b 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2696,7 +2696,7 @@ void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
 void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans);
 int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
 				  struct btrfs_inode *inode);
-void btrfs_orphan_release_metadata(struct inode *inode);
+void btrfs_orphan_release_metadata(struct btrfs_inode *inode);
 int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
 				     struct btrfs_block_rsv *rsv,
 				     int nitems,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 8156964abc10e7..2aa665674329cc 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -5767,14 +5767,14 @@ int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
 	return btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1);
 }
 
-void btrfs_orphan_release_metadata(struct inode *inode)
+void btrfs_orphan_release_metadata(struct btrfs_inode *inode)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-	struct btrfs_root *root = BTRFS_I(inode)->root;
+	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+	struct btrfs_root *root = inode->root;
 	u64 num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
 
-	trace_btrfs_space_reservation(fs_info, "orphan",
-				      btrfs_ino(BTRFS_I(inode)), num_bytes, 0);
+	trace_btrfs_space_reservation(fs_info, "orphan", btrfs_ino(inode),
+			num_bytes, 0);
 	btrfs_block_rsv_release(fs_info, root->orphan_block_rsv, num_bytes);
 }
 
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 12345d4f25a53a..8a14b47b4dc07e 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3237,7 +3237,7 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
 			if (reserve) {
 				clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
 					  &BTRFS_I(inode)->runtime_flags);
-				btrfs_orphan_release_metadata(inode);
+				btrfs_orphan_release_metadata(BTRFS_I(inode));
 			}
 			if (ret != -EEXIST) {
 				clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
@@ -3291,7 +3291,7 @@ static int btrfs_orphan_del(struct btrfs_trans_handle *trans,
 	}
 
 	if (release_rsv)
-		btrfs_orphan_release_metadata(inode);
+		btrfs_orphan_release_metadata(BTRFS_I(inode));
 
 	return ret;
 }

From 9f3db423f98c5c6c53b47f4bb2729901500bc330 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <n.borisov.lkml@gmail.com>
Date: Mon, 20 Feb 2017 13:50:41 +0200
Subject: [PATCH 0297/1911] btrfs: Make btrfs_delalloc_reserve_metadata take
 btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h       |  2 +-
 fs/btrfs/extent-tree.c | 69 +++++++++++++++++++++---------------------
 fs/btrfs/file.c        |  3 +-
 fs/btrfs/relocation.c  |  3 +-
 4 files changed, 39 insertions(+), 38 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 8eb07a9dfa6a0b..23dcc42f479beb 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2703,7 +2703,7 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
 				     u64 *qgroup_reserved, bool use_global_rsv);
 void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info,
 				      struct btrfs_block_rsv *rsv);
-int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes);
+int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes);
 void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes);
 int btrfs_delalloc_reserve_space(struct inode *inode, u64 start, u64 len);
 void btrfs_delalloc_release_space(struct inode *inode, u64 start, u64 len);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 2aa665674329cc..4db3cf4675a1a1 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -5920,10 +5920,10 @@ static u64 calc_csum_metadata_size(struct btrfs_inode *inode, u64 num_bytes,
 	return btrfs_calc_trans_metadata_size(fs_info, old_csums - num_csums);
 }
 
-int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
+int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-	struct btrfs_root *root = BTRFS_I(inode)->root;
+	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+	struct btrfs_root *root = inode->root;
 	struct btrfs_block_rsv *block_rsv = &fs_info->delalloc_block_rsv;
 	u64 to_reserve = 0;
 	u64 csum_bytes;
@@ -5943,7 +5943,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
 	 * If we have a transaction open (can happen if we call truncate_block
 	 * from truncate), then we need FLUSH_LIMIT so we don't deadlock.
 	 */
-	if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
+	if (btrfs_is_free_space_inode(inode)) {
 		flush = BTRFS_RESERVE_NO_FLUSH;
 		delalloc_lock = false;
 	} else if (current->journal_info) {
@@ -5955,25 +5955,24 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
 		schedule_timeout(1);
 
 	if (delalloc_lock)
-		mutex_lock(&BTRFS_I(inode)->delalloc_mutex);
+		mutex_lock(&inode->delalloc_mutex);
 
 	num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
 
-	spin_lock(&BTRFS_I(inode)->lock);
+	spin_lock(&inode->lock);
 	nr_extents = count_max_extents(num_bytes);
-	BTRFS_I(inode)->outstanding_extents += nr_extents;
+	inode->outstanding_extents += nr_extents;
 
 	nr_extents = 0;
-	if (BTRFS_I(inode)->outstanding_extents >
-	    BTRFS_I(inode)->reserved_extents)
-		nr_extents += BTRFS_I(inode)->outstanding_extents -
-			BTRFS_I(inode)->reserved_extents;
+	if (inode->outstanding_extents > inode->reserved_extents)
+		nr_extents += inode->outstanding_extents -
+			inode->reserved_extents;
 
 	/* We always want to reserve a slot for updating the inode. */
 	to_reserve = btrfs_calc_trans_metadata_size(fs_info, nr_extents + 1);
-	to_reserve += calc_csum_metadata_size(BTRFS_I(inode), num_bytes, 1);
-	csum_bytes = BTRFS_I(inode)->csum_bytes;
-	spin_unlock(&BTRFS_I(inode)->lock);
+	to_reserve += calc_csum_metadata_size(inode, num_bytes, 1);
+	csum_bytes = inode->csum_bytes;
+	spin_unlock(&inode->lock);
 
 	if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
 		ret = btrfs_qgroup_reserve_meta(root,
@@ -5989,38 +5988,38 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
 		goto out_fail;
 	}
 
-	spin_lock(&BTRFS_I(inode)->lock);
+	spin_lock(&inode->lock);
 	if (test_and_set_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
-			     &BTRFS_I(inode)->runtime_flags)) {
+			     &inode->runtime_flags)) {
 		to_reserve -= btrfs_calc_trans_metadata_size(fs_info, 1);
 		release_extra = true;
 	}
-	BTRFS_I(inode)->reserved_extents += nr_extents;
-	spin_unlock(&BTRFS_I(inode)->lock);
+	inode->reserved_extents += nr_extents;
+	spin_unlock(&inode->lock);
 
 	if (delalloc_lock)
-		mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);
+		mutex_unlock(&inode->delalloc_mutex);
 
 	if (to_reserve)
 		trace_btrfs_space_reservation(fs_info, "delalloc",
-				      btrfs_ino(BTRFS_I(inode)), to_reserve, 1);
+					      btrfs_ino(inode), to_reserve, 1);
 	if (release_extra)
 		btrfs_block_rsv_release(fs_info, block_rsv,
 				btrfs_calc_trans_metadata_size(fs_info, 1));
 	return 0;
 
 out_fail:
-	spin_lock(&BTRFS_I(inode)->lock);
-	dropped = drop_outstanding_extent(BTRFS_I(inode), num_bytes);
+	spin_lock(&inode->lock);
+	dropped = drop_outstanding_extent(inode, num_bytes);
 	/*
 	 * If the inodes csum_bytes is the same as the original
 	 * csum_bytes then we know we haven't raced with any free()ers
 	 * so we can just reduce our inodes csum bytes and carry on.
 	 */
-	if (BTRFS_I(inode)->csum_bytes == csum_bytes) {
-		calc_csum_metadata_size(BTRFS_I(inode), num_bytes, 0);
+	if (inode->csum_bytes == csum_bytes) {
+		calc_csum_metadata_size(inode, num_bytes, 0);
 	} else {
-		u64 orig_csum_bytes = BTRFS_I(inode)->csum_bytes;
+		u64 orig_csum_bytes = inode->csum_bytes;
 		u64 bytes;
 
 		/*
@@ -6031,9 +6030,9 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
 		 * number of bytes that were freed while we were trying our
 		 * reservation.
 		 */
-		bytes = csum_bytes - BTRFS_I(inode)->csum_bytes;
-		BTRFS_I(inode)->csum_bytes = csum_bytes;
-		to_free = calc_csum_metadata_size(BTRFS_I(inode), bytes, 0);
+		bytes = csum_bytes - inode->csum_bytes;
+		inode->csum_bytes = csum_bytes;
+		to_free = calc_csum_metadata_size(inode, bytes, 0);
 
 
 		/*
@@ -6041,9 +6040,9 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
 		 * been making this reservation and our ->csum_bytes were not
 		 * artificially inflated.
 		 */
-		BTRFS_I(inode)->csum_bytes = csum_bytes - num_bytes;
+		inode->csum_bytes = csum_bytes - num_bytes;
 		bytes = csum_bytes - orig_csum_bytes;
-		bytes = calc_csum_metadata_size(BTRFS_I(inode), bytes, 0);
+		bytes = calc_csum_metadata_size(inode, bytes, 0);
 
 		/*
 		 * Now reset ->csum_bytes to what it should be.  If bytes is
@@ -6053,23 +6052,23 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
 		 * need to do anything, the other free-ers did the correct
 		 * thing.
 		 */
-		BTRFS_I(inode)->csum_bytes = orig_csum_bytes - num_bytes;
+		inode->csum_bytes = orig_csum_bytes - num_bytes;
 		if (bytes > to_free)
 			to_free = bytes - to_free;
 		else
 			to_free = 0;
 	}
-	spin_unlock(&BTRFS_I(inode)->lock);
+	spin_unlock(&inode->lock);
 	if (dropped)
 		to_free += btrfs_calc_trans_metadata_size(fs_info, dropped);
 
 	if (to_free) {
 		btrfs_block_rsv_release(fs_info, block_rsv, to_free);
 		trace_btrfs_space_reservation(fs_info, "delalloc",
-				      btrfs_ino(BTRFS_I(inode)), to_free, 0);
+					      btrfs_ino(inode), to_free, 0);
 	}
 	if (delalloc_lock)
-		mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);
+		mutex_unlock(&inode->delalloc_mutex);
 	return ret;
 }
 
@@ -6137,7 +6136,7 @@ int btrfs_delalloc_reserve_space(struct inode *inode, u64 start, u64 len)
 	ret = btrfs_check_data_free_space(inode, start, len);
 	if (ret < 0)
 		return ret;
-	ret = btrfs_delalloc_reserve_metadata(inode, len);
+	ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), len);
 	if (ret < 0)
 		btrfs_free_reserved_data_space(inode, start, len);
 	return ret;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 073bc975bbb696..e32a92081547e4 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1599,7 +1599,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
 			}
 		}
 
-		ret = btrfs_delalloc_reserve_metadata(inode, reserve_bytes);
+		ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode),
+				reserve_bytes);
 		if (ret) {
 			if (!only_release_metadata)
 				btrfs_free_reserved_data_space(inode, pos,
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index ddbde0f0836537..4ee1490f086e68 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -3203,7 +3203,8 @@ static int relocate_file_extent_cluster(struct inode *inode,
 	index = (cluster->start - offset) >> PAGE_SHIFT;
 	last_index = (cluster->end - offset) >> PAGE_SHIFT;
 	while (index <= last_index) {
-		ret = btrfs_delalloc_reserve_metadata(inode, PAGE_SIZE);
+		ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode),
+				PAGE_SIZE);
 		if (ret)
 			goto out;
 

From 691fa059673b3b33c25d7925acb0a58e8204dbd6 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 20 Feb 2017 13:50:42 +0200
Subject: [PATCH 0298/1911] btrfs: all btrfs_delalloc_release_metadata take
 btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h            |  2 +-
 fs/btrfs/extent-tree.c      | 18 +++++++++---------
 fs/btrfs/file.c             |  5 +++--
 fs/btrfs/free-space-cache.c |  3 ++-
 fs/btrfs/inode-map.c        |  2 +-
 fs/btrfs/inode.c            |  7 ++++---
 fs/btrfs/relocation.c       |  4 ++--
 7 files changed, 22 insertions(+), 19 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 23dcc42f479beb..7f7f3b60a73d20 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2704,7 +2704,7 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
 void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info,
 				      struct btrfs_block_rsv *rsv);
 int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes);
-void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes);
+void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes);
 int btrfs_delalloc_reserve_space(struct inode *inode, u64 start, u64 len);
 void btrfs_delalloc_release_space(struct inode *inode, u64 start, u64 len);
 void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 4db3cf4675a1a1..7b2313a4441e4b 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -6081,27 +6081,27 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
  * once we complete IO for a given set of bytes to release their metadata
  * reservations.
  */
-void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
+void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
 	u64 to_free = 0;
 	unsigned dropped;
 
 	num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
-	spin_lock(&BTRFS_I(inode)->lock);
-	dropped = drop_outstanding_extent(BTRFS_I(inode), num_bytes);
+	spin_lock(&inode->lock);
+	dropped = drop_outstanding_extent(inode, num_bytes);
 
 	if (num_bytes)
-		to_free = calc_csum_metadata_size(BTRFS_I(inode), num_bytes, 0);
-	spin_unlock(&BTRFS_I(inode)->lock);
+		to_free = calc_csum_metadata_size(inode, num_bytes, 0);
+	spin_unlock(&inode->lock);
 	if (dropped > 0)
 		to_free += btrfs_calc_trans_metadata_size(fs_info, dropped);
 
 	if (btrfs_is_testing(fs_info))
 		return;
 
-	trace_btrfs_space_reservation(fs_info, "delalloc",
-				      btrfs_ino(BTRFS_I(inode)), to_free, 0);
+	trace_btrfs_space_reservation(fs_info, "delalloc", btrfs_ino(inode),
+				      to_free, 0);
 
 	btrfs_block_rsv_release(fs_info, &fs_info->delalloc_block_rsv, to_free);
 }
@@ -6159,7 +6159,7 @@ int btrfs_delalloc_reserve_space(struct inode *inode, u64 start, u64 len)
  */
 void btrfs_delalloc_release_space(struct inode *inode, u64 start, u64 len)
 {
-	btrfs_delalloc_release_metadata(inode, len);
+	btrfs_delalloc_release_metadata(BTRFS_I(inode), len);
 	btrfs_free_reserved_data_space(inode, start, len);
 }
 
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index e32a92081547e4..0e30d14b4916ae 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1678,7 +1678,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
 				spin_unlock(&BTRFS_I(inode)->lock);
 			}
 			if (only_release_metadata) {
-				btrfs_delalloc_release_metadata(inode,
+				btrfs_delalloc_release_metadata(BTRFS_I(inode),
 								release_bytes);
 			} else {
 				u64 __pos;
@@ -1739,7 +1739,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
 	if (release_bytes) {
 		if (only_release_metadata) {
 			btrfs_end_write_no_snapshoting(root);
-			btrfs_delalloc_release_metadata(inode, release_bytes);
+			btrfs_delalloc_release_metadata(BTRFS_I(inode),
+					release_bytes);
 		} else {
 			btrfs_delalloc_release_space(inode,
 						round_down(pos, fs_info->sectorsize),
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index c0f313cbbbf2c2..7dcf0b100dcd9d 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -3545,7 +3545,8 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
 
 	if (ret) {
 		if (release_metadata)
-			btrfs_delalloc_release_metadata(inode, inode->i_size);
+			btrfs_delalloc_release_metadata(BTRFS_I(inode),
+					inode->i_size);
 #ifdef DEBUG
 		btrfs_err(fs_info,
 			  "failed to write free ino cache for root %llu",
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index 3bbb8f09595352..5c6c20ec64d8a9 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -499,7 +499,7 @@ int btrfs_save_ino_cache(struct btrfs_root *root,
 	ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc,
 					      prealloc, prealloc, &alloc_hint);
 	if (ret) {
-		btrfs_delalloc_release_metadata(inode, prealloc);
+		btrfs_delalloc_release_metadata(BTRFS_I(inode), prealloc);
 		goto out_put;
 	}
 
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 8a14b47b4dc07e..dd0f1abdacd256 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -316,7 +316,7 @@ static noinline int cow_file_range_inline(struct btrfs_root *root,
 	}
 
 	set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
-	btrfs_delalloc_release_metadata(inode, end + 1 - start);
+	btrfs_delalloc_release_metadata(BTRFS_I(inode), end + 1 - start);
 	btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
 out:
 	/*
@@ -1737,7 +1737,7 @@ static void btrfs_clear_bit_hook(struct inode *inode,
 		 */
 		if (*bits & EXTENT_DO_ACCOUNTING &&
 		    root != fs_info->tree_root)
-			btrfs_delalloc_release_metadata(inode, len);
+			btrfs_delalloc_release_metadata(BTRFS_I(inode), len);
 
 		/* For sanity tests. */
 		if (btrfs_is_testing(fs_info))
@@ -2914,7 +2914,8 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
 			     ordered_extent->len - 1, &cached_state, GFP_NOFS);
 out:
 	if (root != fs_info->tree_root)
-		btrfs_delalloc_release_metadata(inode, ordered_extent->len);
+		btrfs_delalloc_release_metadata(BTRFS_I(inode),
+				ordered_extent->len);
 	if (trans)
 		btrfs_end_transaction(trans);
 
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 4ee1490f086e68..e6470890ce26b0 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -3216,7 +3216,7 @@ static int relocate_file_extent_cluster(struct inode *inode,
 			page = find_or_create_page(inode->i_mapping, index,
 						   mask);
 			if (!page) {
-				btrfs_delalloc_release_metadata(inode,
+				btrfs_delalloc_release_metadata(BTRFS_I(inode),
 							PAGE_SIZE);
 				ret = -ENOMEM;
 				goto out;
@@ -3235,7 +3235,7 @@ static int relocate_file_extent_cluster(struct inode *inode,
 			if (!PageUptodate(page)) {
 				unlock_page(page);
 				put_page(page);
-				btrfs_delalloc_release_metadata(inode,
+				btrfs_delalloc_release_metadata(BTRFS_I(inode),
 							PAGE_SIZE);
 				ret = -EIO;
 				goto out;

From 6158e1ce1cc620df650ebdcfb3cc08a3d86f5a4c Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <n.borisov.lkml@gmail.com>
Date: Mon, 20 Feb 2017 13:50:43 +0200
Subject: [PATCH 0299/1911] btrfs: Make (__)btrfs_add_inode_defrag take
 btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h |  2 +-
 fs/btrfs/file.c  | 22 +++++++++++-----------
 fs/btrfs/inode.c |  9 +++++----
 3 files changed, 17 insertions(+), 16 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 7f7f3b60a73d20..5246cbe4c17f83 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3215,7 +3215,7 @@ ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,
 int btrfs_auto_defrag_init(void);
 void btrfs_auto_defrag_exit(void);
 int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
-			   struct inode *inode);
+			   struct btrfs_inode *inode);
 int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info);
 void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info);
 int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 0e30d14b4916ae..63645e86ad9535 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -92,10 +92,10 @@ static int __compare_inode_defrag(struct inode_defrag *defrag1,
  * If an existing record is found the defrag item you
  * pass in is freed
  */
-static int __btrfs_add_inode_defrag(struct inode *inode,
+static int __btrfs_add_inode_defrag(struct btrfs_inode *inode,
 				    struct inode_defrag *defrag)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
 	struct inode_defrag *entry;
 	struct rb_node **p;
 	struct rb_node *parent = NULL;
@@ -123,7 +123,7 @@ static int __btrfs_add_inode_defrag(struct inode *inode,
 			return -EEXIST;
 		}
 	}
-	set_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags);
+	set_bit(BTRFS_INODE_IN_DEFRAG, &inode->runtime_flags);
 	rb_link_node(&defrag->rb_node, parent, p);
 	rb_insert_color(&defrag->rb_node, &fs_info->defrag_inodes);
 	return 0;
@@ -145,10 +145,10 @@ static inline int __need_auto_defrag(struct btrfs_fs_info *fs_info)
  * enabled
  */
 int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
-			   struct inode *inode)
+			   struct btrfs_inode *inode)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-	struct btrfs_root *root = BTRFS_I(inode)->root;
+	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+	struct btrfs_root *root = inode->root;
 	struct inode_defrag *defrag;
 	u64 transid;
 	int ret;
@@ -156,24 +156,24 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
 	if (!__need_auto_defrag(fs_info))
 		return 0;
 
-	if (test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags))
+	if (test_bit(BTRFS_INODE_IN_DEFRAG, &inode->runtime_flags))
 		return 0;
 
 	if (trans)
 		transid = trans->transid;
 	else
-		transid = BTRFS_I(inode)->root->last_trans;
+		transid = inode->root->last_trans;
 
 	defrag = kmem_cache_zalloc(btrfs_inode_defrag_cachep, GFP_NOFS);
 	if (!defrag)
 		return -ENOMEM;
 
-	defrag->ino = btrfs_ino(BTRFS_I(inode));
+	defrag->ino = btrfs_ino(inode);
 	defrag->transid = transid;
 	defrag->root = root->root_key.objectid;
 
 	spin_lock(&fs_info->defrag_inodes_lock);
-	if (!test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags)) {
+	if (!test_bit(BTRFS_INODE_IN_DEFRAG, &inode->runtime_flags)) {
 		/*
 		 * If we set IN_DEFRAG flag and evict the inode from memory,
 		 * and then re-read this inode, this new inode doesn't have
@@ -208,7 +208,7 @@ static void btrfs_requeue_inode_defrag(struct inode *inode,
 	 * them together.
 	 */
 	spin_lock(&fs_info->defrag_inodes_lock);
-	ret = __btrfs_add_inode_defrag(inode, defrag);
+	ret = __btrfs_add_inode_defrag(BTRFS_I(inode), defrag);
 	spin_unlock(&fs_info->defrag_inodes_lock);
 	if (ret)
 		goto out;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index dd0f1abdacd256..c396533fd3e52b 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -389,12 +389,12 @@ static inline int inode_need_compress(struct inode *inode)
 	return 0;
 }
 
-static inline void inode_should_defrag(struct inode *inode,
+static inline void inode_should_defrag(struct btrfs_inode *inode,
 		u64 start, u64 end, u64 num_bytes, u64 small_write)
 {
 	/* If this is a small write inside eof, kick off a defrag */
 	if (num_bytes < small_write &&
-	    (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size))
+	    (start > 0 || end + 1 < inode->disk_i_size))
 		btrfs_add_inode_defrag(NULL, inode);
 }
 
@@ -440,7 +440,8 @@ static noinline void compress_file_range(struct inode *inode,
 	int compress_type = fs_info->compress_type;
 	int redirty = 0;
 
-	inode_should_defrag(inode, start, end, end - start + 1, SZ_16K);
+	inode_should_defrag(BTRFS_I(inode), start, end, end - start + 1,
+			SZ_16K);
 
 	actual_end = min_t(u64, isize, end + 1);
 again:
@@ -943,7 +944,7 @@ static noinline int cow_file_range(struct inode *inode,
 	num_bytes = max(blocksize,  num_bytes);
 	disk_num_bytes = num_bytes;
 
-	inode_should_defrag(inode, start, end, num_bytes, SZ_64K);
+	inode_should_defrag(BTRFS_I(inode), start, end, num_bytes, SZ_64K);
 
 	if (start == 0) {
 		/* lets try to make an inline extent */

From 46e59791836c75210bdf1f715592b49836fad848 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <n.borisov.lkml@gmail.com>
Date: Mon, 20 Feb 2017 13:50:44 +0200
Subject: [PATCH 0300/1911] btrfs: Make btrfs_requeue_inode_defrag take
 btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/file.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 63645e86ad9535..dd1b56504e1044 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -194,10 +194,10 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
  * the same inode in the tree, we will merge them together (by
  * __btrfs_add_inode_defrag()) and free the one that we want to requeue.
  */
-static void btrfs_requeue_inode_defrag(struct inode *inode,
+static void btrfs_requeue_inode_defrag(struct btrfs_inode *inode,
 				       struct inode_defrag *defrag)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
 	int ret;
 
 	if (!__need_auto_defrag(fs_info))
@@ -208,7 +208,7 @@ static void btrfs_requeue_inode_defrag(struct inode *inode,
 	 * them together.
 	 */
 	spin_lock(&fs_info->defrag_inodes_lock);
-	ret = __btrfs_add_inode_defrag(BTRFS_I(inode), defrag);
+	ret = __btrfs_add_inode_defrag(inode, defrag);
 	spin_unlock(&fs_info->defrag_inodes_lock);
 	if (ret)
 		goto out;
@@ -334,7 +334,7 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
 	 */
 	if (num_defrag == BTRFS_DEFRAG_BATCH) {
 		defrag->last_offset = range.start;
-		btrfs_requeue_inode_defrag(inode, defrag);
+		btrfs_requeue_inode_defrag(BTRFS_I(inode), defrag);
 	} else if (defrag->last_offset && !defrag->cycled) {
 		/*
 		 * we didn't fill our defrag batch, but
@@ -343,7 +343,7 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
 		 */
 		defrag->last_offset = 0;
 		defrag->cycled = 1;
-		btrfs_requeue_inode_defrag(inode, defrag);
+		btrfs_requeue_inode_defrag(BTRFS_I(inode), defrag);
 	} else {
 		kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
 	}

From dcdbc059f01e242f92e3239654a1a57d15b0da5a Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <n.borisov.lkml@gmail.com>
Date: Mon, 20 Feb 2017 13:50:45 +0200
Subject: [PATCH 0301/1911] btrfs: Make btrfs_drop_extent_cache take
 btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h             |  2 +-
 fs/btrfs/file.c              | 11 ++++++-----
 fs/btrfs/inode.c             | 31 +++++++++++++++++--------------
 fs/btrfs/ioctl.c             |  2 +-
 fs/btrfs/relocation.c        |  8 ++++----
 fs/btrfs/tests/inode-tests.c |  2 +-
 6 files changed, 30 insertions(+), 26 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 5246cbe4c17f83..4db18e5dc8f908 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3219,7 +3219,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
 int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info);
 void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info);
 int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
-void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
+void btrfs_drop_extent_cache(struct btrfs_inode *inode, u64 start, u64 end,
 			     int skip_pinned);
 extern const struct file_operations btrfs_file_operations;
 int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index dd1b56504e1044..5df1de43aace4a 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -529,13 +529,13 @@ int btrfs_dirty_pages(struct inode *inode, struct page **pages,
  * this drops all the extents in the cache that intersect the range
  * [start, end].  Existing extents are split as required.
  */
-void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
+void btrfs_drop_extent_cache(struct btrfs_inode *inode, u64 start, u64 end,
 			     int skip_pinned)
 {
 	struct extent_map *em;
 	struct extent_map *split = NULL;
 	struct extent_map *split2 = NULL;
-	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+	struct extent_map_tree *em_tree = &inode->extent_tree;
 	u64 len = end - start + 1;
 	u64 gen;
 	int ret;
@@ -720,7 +720,7 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
 	int leafs_visited = 0;
 
 	if (drop_cache)
-		btrfs_drop_extent_cache(inode, start, end - 1, 0);
+		btrfs_drop_extent_cache(BTRFS_I(inode), start, end - 1, 0);
 
 	if (start >= BTRFS_I(inode)->disk_i_size && !replace_extent)
 		modify_tree = 0;
@@ -2297,7 +2297,7 @@ static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode,
 
 	hole_em = alloc_extent_map();
 	if (!hole_em) {
-		btrfs_drop_extent_cache(inode, offset, end - 1, 0);
+		btrfs_drop_extent_cache(BTRFS_I(inode), offset, end - 1, 0);
 		set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
 			&BTRFS_I(inode)->runtime_flags);
 	} else {
@@ -2314,7 +2314,8 @@ static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode,
 		hole_em->generation = trans->transid;
 
 		do {
-			btrfs_drop_extent_cache(inode, offset, end - 1, 0);
+			btrfs_drop_extent_cache(BTRFS_I(inode), offset,
+					end - 1, 0);
 			write_lock(&em_tree->lock);
 			ret = add_extent_mapping(em_tree, hole_em, 1);
 			write_unlock(&em_tree->lock);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index c396533fd3e52b..c2383a442ff80a 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -317,7 +317,7 @@ static noinline int cow_file_range_inline(struct btrfs_root *root,
 
 	set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
 	btrfs_delalloc_release_metadata(BTRFS_I(inode), end + 1 - start);
-	btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
+	btrfs_drop_extent_cache(BTRFS_I(inode), start, aligned_end - 1, 0);
 out:
 	/*
 	 * Don't forget to free the reserved space, as for inlined extent
@@ -807,7 +807,8 @@ static noinline void submit_compressed_extents(struct inode *inode,
 						BTRFS_ORDERED_COMPRESSED,
 						async_extent->compress_type);
 		if (ret) {
-			btrfs_drop_extent_cache(inode, async_extent->start,
+			btrfs_drop_extent_cache(BTRFS_I(inode),
+						async_extent->start,
 						async_extent->start +
 						async_extent->ram_size - 1, 0);
 			goto out_free_reserve;
@@ -972,7 +973,8 @@ static noinline int cow_file_range(struct inode *inode,
 	       btrfs_super_total_bytes(fs_info->super_copy));
 
 	alloc_hint = get_extent_allocation_hint(inode, start, num_bytes);
-	btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0);
+	btrfs_drop_extent_cache(BTRFS_I(inode), start,
+			start + num_bytes - 1, 0);
 
 	while (disk_num_bytes > 0) {
 		unsigned long op;
@@ -1040,7 +1042,7 @@ static noinline int cow_file_range(struct inode *inode,
 	return ret;
 
 out_drop_extent_cache:
-	btrfs_drop_extent_cache(inode, start, start + ram_size - 1, 0);
+	btrfs_drop_extent_cache(BTRFS_I(inode), start, start + ram_size - 1, 0);
 out_reserve:
 	btrfs_dec_block_group_reservations(fs_info, ins.objectid);
 	btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1);
@@ -2931,7 +2933,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
 		clear_extent_uptodate(io_tree, start, end, NULL, GFP_NOFS);
 
 		/* Drop the cache for the part of the extent we didn't write. */
-		btrfs_drop_extent_cache(inode, start, end, 0);
+		btrfs_drop_extent_cache(BTRFS_I(inode), start, end, 0);
 
 		/*
 		 * If the ordered extent had an IOERR or something else went
@@ -4337,7 +4339,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
 	 */
 	if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
 	    root == fs_info->tree_root)
-		btrfs_drop_extent_cache(inode, ALIGN(new_size,
+		btrfs_drop_extent_cache(BTRFS_I(inode), ALIGN(new_size,
 					fs_info->sectorsize),
 					(u64)-1, 0);
 
@@ -4865,7 +4867,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
 						hole_size);
 			if (err)
 				break;
-			btrfs_drop_extent_cache(inode, cur_offset,
+			btrfs_drop_extent_cache(BTRFS_I(inode), cur_offset,
 						cur_offset + hole_size - 1, 0);
 			hole_em = alloc_extent_map();
 			if (!hole_em) {
@@ -4891,7 +4893,8 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
 				write_unlock(&em_tree->lock);
 				if (err != -EEXIST)
 					break;
-				btrfs_drop_extent_cache(inode, cur_offset,
+				btrfs_drop_extent_cache(BTRFS_I(inode),
+							cur_offset,
 							cur_offset +
 							hole_size - 1, 0);
 			}
@@ -7164,7 +7167,7 @@ static struct extent_map *btrfs_create_dio_extent(struct inode *inode,
 	if (ret) {
 		if (em) {
 			free_extent_map(em);
-			btrfs_drop_extent_cache(inode, start,
+			btrfs_drop_extent_cache(BTRFS_I(inode), start,
 						start + len - 1, 0);
 		}
 		em = ERR_PTR(ret);
@@ -7531,7 +7534,7 @@ static struct extent_map *create_io_em(struct inode *inode, u64 start, u64 len,
 	}
 
 	do {
-		btrfs_drop_extent_cache(inode, em->start,
+		btrfs_drop_extent_cache(BTRFS_I(inode), em->start,
 				em->start + em->len - 1, 0);
 		write_lock(&em_tree->lock);
 		ret = add_extent_mapping(em_tree, em, 1);
@@ -9280,7 +9283,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
 void btrfs_test_destroy_inode(struct inode *inode)
 {
-	btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
+	btrfs_drop_extent_cache(BTRFS_I(inode), 0, (u64)-1, 0);
 	kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
 }
 #endif
@@ -9335,7 +9338,7 @@ void btrfs_destroy_inode(struct inode *inode)
 	}
 	btrfs_qgroup_check_reserved_leak(inode);
 	inode_tree_del(inode);
-	btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
+	btrfs_drop_extent_cache(BTRFS_I(inode), 0, (u64)-1, 0);
 free:
 	call_rcu(&inode->i_rcu, btrfs_i_callback);
 }
@@ -10328,7 +10331,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
 			break;
 		}
 
-		btrfs_drop_extent_cache(inode, cur_offset,
+		btrfs_drop_extent_cache(BTRFS_I(inode), cur_offset,
 					cur_offset + ins.offset -1, 0);
 
 		em = alloc_extent_map();
@@ -10355,7 +10358,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
 			write_unlock(&em_tree->lock);
 			if (ret != -EEXIST)
 				break;
-			btrfs_drop_extent_cache(inode, cur_offset,
+			btrfs_drop_extent_cache(BTRFS_I(inode), cur_offset,
 						cur_offset + ins.offset - 1,
 						0);
 		}
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 86f993c958bae7..bc2e03a2569e77 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3359,7 +3359,7 @@ static void clone_update_extent_map(struct inode *inode,
 			free_extent_map(em);
 			break;
 		}
-		btrfs_drop_extent_cache(inode, em->start,
+		btrfs_drop_extent_cache(BTRFS_I(inode), em->start,
 					em->start + em->len - 1, 0);
 	}
 
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index e6470890ce26b0..e48625413fcb90 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1714,8 +1714,8 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
 				if (!ret)
 					continue;
 
-				btrfs_drop_extent_cache(inode, key.offset, end,
-							1);
+				btrfs_drop_extent_cache(BTRFS_I(inode),
+						key.offset,	end, 1);
 				unlock_extent(&BTRFS_I(inode)->io_tree,
 					      key.offset, end);
 			}
@@ -2130,7 +2130,7 @@ static int invalidate_extent_cache(struct btrfs_root *root,
 
 		/* the lock_extent waits for readpage to complete */
 		lock_extent(&BTRFS_I(inode)->io_tree, start, end);
-		btrfs_drop_extent_cache(inode, start, end, 1);
+		btrfs_drop_extent_cache(BTRFS_I(inode), start, end, 1);
 		unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
 	}
 	return 0;
@@ -3161,7 +3161,7 @@ int setup_extent_mapping(struct inode *inode, u64 start, u64 end,
 			free_extent_map(em);
 			break;
 		}
-		btrfs_drop_extent_cache(inode, start, end, 0);
+		btrfs_drop_extent_cache(BTRFS_I(inode), start, end, 0);
 	}
 	unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
 	return ret;
diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c
index 4d0f038e14f1f7..924bcbf4327528 100644
--- a/fs/btrfs/tests/inode-tests.c
+++ b/fs/btrfs/tests/inode-tests.c
@@ -293,7 +293,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
 		goto out;
 	}
 	free_extent_map(em);
-	btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
+	btrfs_drop_extent_cache(BTRFS_I(inode), 0, (u64)-1, 0);
 
 	/*
 	 * All of the magic numbers are based on the mapping setup in

From 35339c245b5939315e8763deb7f9b68fffe54912 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <n.borisov.lkml@gmail.com>
Date: Mon, 20 Feb 2017 13:50:46 +0200
Subject: [PATCH 0302/1911] btrfs: Make hole_mergeable take btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/file.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 5df1de43aace4a..27dfdfb7ff199d 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2195,7 +2195,7 @@ static int btrfs_file_mmap(struct file	*filp, struct vm_area_struct *vma)
 	return 0;
 }
 
-static int hole_mergeable(struct inode *inode, struct extent_buffer *leaf,
+static int hole_mergeable(struct btrfs_inode *inode, struct extent_buffer *leaf,
 			  int slot, u64 start, u64 end)
 {
 	struct btrfs_file_extent_item *fi;
@@ -2205,7 +2205,7 @@ static int hole_mergeable(struct inode *inode, struct extent_buffer *leaf,
 		return 0;
 
 	btrfs_item_key_to_cpu(leaf, &key, slot);
-	if (key.objectid != btrfs_ino(BTRFS_I(inode)) ||
+	if (key.objectid != btrfs_ino(inode) ||
 	    key.type != BTRFS_EXTENT_DATA_KEY)
 		return 0;
 
@@ -2255,7 +2255,8 @@ static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode,
 	}
 
 	leaf = path->nodes[0];
-	if (hole_mergeable(inode, leaf, path->slots[0]-1, offset, end)) {
+	if (hole_mergeable(BTRFS_I(inode), leaf, path->slots[0] - 1,
+				offset, end)) {
 		u64 num_bytes;
 
 		path->slots[0]--;
@@ -2270,7 +2271,7 @@ static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode,
 		goto out;
 	}
 
-	if (hole_mergeable(inode, leaf, path->slots[0], offset, end)) {
+	if (hole_mergeable(BTRFS_I(inode), leaf, path->slots[0], offset, end)) {
 		u64 num_bytes;
 
 		key.offset = offset;

From a012a74e78d99aa27f8487c050e9ac3183bc3785 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <n.borisov.lkml@gmail.com>
Date: Mon, 20 Feb 2017 13:50:47 +0200
Subject: [PATCH 0303/1911] btrfs: Make fill_holes take btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/file.c | 37 ++++++++++++++++++-------------------
 1 file changed, 18 insertions(+), 19 deletions(-)

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 27dfdfb7ff199d..ef4ecd003742c6 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2224,22 +2224,23 @@ static int hole_mergeable(struct btrfs_inode *inode, struct extent_buffer *leaf,
 	return 0;
 }
 
-static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode,
-		      struct btrfs_path *path, u64 offset, u64 end)
+static int fill_holes(struct btrfs_trans_handle *trans,
+		struct btrfs_inode *inode,
+		struct btrfs_path *path, u64 offset, u64 end)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-	struct btrfs_root *root = BTRFS_I(inode)->root;
+	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+	struct btrfs_root *root = inode->root;
 	struct extent_buffer *leaf;
 	struct btrfs_file_extent_item *fi;
 	struct extent_map *hole_em;
-	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+	struct extent_map_tree *em_tree = &inode->extent_tree;
 	struct btrfs_key key;
 	int ret;
 
 	if (btrfs_fs_incompat(fs_info, NO_HOLES))
 		goto out;
 
-	key.objectid = btrfs_ino(BTRFS_I(inode));
+	key.objectid = btrfs_ino(inode);
 	key.type = BTRFS_EXTENT_DATA_KEY;
 	key.offset = offset;
 
@@ -2255,8 +2256,7 @@ static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode,
 	}
 
 	leaf = path->nodes[0];
-	if (hole_mergeable(BTRFS_I(inode), leaf, path->slots[0] - 1,
-				offset, end)) {
+	if (hole_mergeable(inode, leaf, path->slots[0] - 1, offset, end)) {
 		u64 num_bytes;
 
 		path->slots[0]--;
@@ -2271,7 +2271,7 @@ static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode,
 		goto out;
 	}
 
-	if (hole_mergeable(BTRFS_I(inode), leaf, path->slots[0], offset, end)) {
+	if (hole_mergeable(inode, leaf, path->slots[0], offset, end)) {
 		u64 num_bytes;
 
 		key.offset = offset;
@@ -2288,7 +2288,7 @@ static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode,
 	}
 	btrfs_release_path(path);
 
-	ret = btrfs_insert_file_extent(trans, root, btrfs_ino(BTRFS_I(inode)),
+	ret = btrfs_insert_file_extent(trans, root, btrfs_ino(inode),
 			offset, 0, 0, end - offset, 0, end - offset, 0, 0, 0);
 	if (ret)
 		return ret;
@@ -2298,9 +2298,8 @@ static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode,
 
 	hole_em = alloc_extent_map();
 	if (!hole_em) {
-		btrfs_drop_extent_cache(BTRFS_I(inode), offset, end - 1, 0);
-		set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
-			&BTRFS_I(inode)->runtime_flags);
+		btrfs_drop_extent_cache(inode, offset, end - 1, 0);
+		set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags);
 	} else {
 		hole_em->start = offset;
 		hole_em->len = end - offset;
@@ -2315,8 +2314,7 @@ static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode,
 		hole_em->generation = trans->transid;
 
 		do {
-			btrfs_drop_extent_cache(BTRFS_I(inode), offset,
-					end - 1, 0);
+			btrfs_drop_extent_cache(inode, offset, end - 1, 0);
 			write_lock(&em_tree->lock);
 			ret = add_extent_mapping(em_tree, hole_em, 1);
 			write_unlock(&em_tree->lock);
@@ -2324,7 +2322,7 @@ static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode,
 		free_extent_map(hole_em);
 		if (ret)
 			set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
-				&BTRFS_I(inode)->runtime_flags);
+					&inode->runtime_flags);
 	}
 
 	return 0;
@@ -2554,8 +2552,8 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
 		trans->block_rsv = &fs_info->trans_block_rsv;
 
 		if (cur_offset < drop_end && cur_offset < ino_size) {
-			ret = fill_holes(trans, inode, path, cur_offset,
-					 drop_end);
+			ret = fill_holes(trans, BTRFS_I(inode), path,
+					cur_offset, drop_end);
 			if (ret) {
 				/*
 				 * If we failed then we didn't insert our hole
@@ -2626,7 +2624,8 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
 	 * cur_offset == drop_end).
 	 */
 	if (cur_offset < ino_size && cur_offset < drop_end) {
-		ret = fill_holes(trans, inode, path, cur_offset, drop_end);
+		ret = fill_holes(trans, BTRFS_I(inode), path,
+				cur_offset, drop_end);
 		if (ret) {
 			/* Same comment as above. */
 			btrfs_abort_transaction(trans, ret);

From 7a6d7067958c32d1c76e0bfd1a0d46be06340b2e Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <n.borisov.lkml@gmail.com>
Date: Mon, 20 Feb 2017 13:50:48 +0200
Subject: [PATCH 0304/1911] btrfs: Make btrfs_mark_extent_written take
 btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h | 2 +-
 fs/btrfs/file.c  | 8 ++++----
 fs/btrfs/inode.c | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 4db18e5dc8f908..2bfc2e289f514a 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3233,7 +3233,7 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans,
 		       struct btrfs_root *root, struct inode *inode, u64 start,
 		       u64 end, int drop_cache);
 int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
-			      struct inode *inode, u64 start, u64 end);
+			      struct btrfs_inode *inode, u64 start, u64 end);
 int btrfs_release_file(struct inode *inode, struct file *file);
 int btrfs_dirty_pages(struct inode *inode, struct page **pages,
 		      size_t num_pages, loff_t pos, size_t write_bytes,
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index ef4ecd003742c6..e704b6ce2058ff 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1082,10 +1082,10 @@ static int extent_mergeable(struct extent_buffer *leaf, int slot,
  * two or three.
  */
 int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
-			      struct inode *inode, u64 start, u64 end)
+			      struct btrfs_inode *inode, u64 start, u64 end)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-	struct btrfs_root *root = BTRFS_I(inode)->root;
+	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+	struct btrfs_root *root = inode->root;
 	struct extent_buffer *leaf;
 	struct btrfs_path *path;
 	struct btrfs_file_extent_item *fi;
@@ -1102,7 +1102,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
 	int del_slot = 0;
 	int recow;
 	int ret;
-	u64 ino = btrfs_ino(BTRFS_I(inode));
+	u64 ino = btrfs_ino(inode);
 
 	path = btrfs_alloc_path();
 	if (!path)
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index c2383a442ff80a..8114bbd359d4b2 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2876,7 +2876,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
 		compress_type = ordered_extent->compress_type;
 	if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
 		BUG_ON(compress_type);
-		ret = btrfs_mark_extent_written(trans, inode,
+		ret = btrfs_mark_extent_written(trans, BTRFS_I(inode),
 						ordered_extent->file_offset,
 						ordered_extent->file_offset +
 						logical_len);

From a776c6fa1feba7a84519170ebdb7f4a4155b89d6 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <n.borisov.lkml@gmail.com>
Date: Mon, 20 Feb 2017 13:50:49 +0200
Subject: [PATCH 0305/1911] btrfs: Make btrfs_lookup_ordered_range take
 btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c    |  4 ++--
 fs/btrfs/file.c         |  4 ++--
 fs/btrfs/inode.c        | 11 ++++++-----
 fs/btrfs/ordered-data.c |  9 ++++-----
 fs/btrfs/ordered-data.h |  7 ++++---
 fs/btrfs/scrub.c        |  2 +-
 6 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index d15b5ddb6732c6..996306d322de30 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3101,7 +3101,7 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree,
 	inode = pages[0]->mapping->host;
 	while (1) {
 		lock_extent(tree, start, end);
-		ordered = btrfs_lookup_ordered_range(inode, start,
+		ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), start,
 						     end - start + 1);
 		if (!ordered)
 			break;
@@ -3173,7 +3173,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
 
 	while (1) {
 		lock_extent(tree, start, end);
-		ordered = btrfs_lookup_ordered_range(inode, start,
+		ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), start,
 						PAGE_SIZE);
 		if (!ordered)
 			break;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index e704b6ce2058ff..f3648e9bfa017a 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1436,7 +1436,7 @@ lock_and_cleanup_extent_if_need(struct inode *inode, struct page **pages,
 		struct btrfs_ordered_extent *ordered;
 		lock_extent_bits(&BTRFS_I(inode)->io_tree,
 				 start_pos, last_pos, cached_state);
-		ordered = btrfs_lookup_ordered_range(inode, start_pos,
+		ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), start_pos,
 						     last_pos - start_pos + 1);
 		if (ordered &&
 		    ordered->file_offset + ordered->len > start_pos &&
@@ -1494,7 +1494,7 @@ static noinline int check_can_nocow(struct inode *inode, loff_t pos,
 
 	while (1) {
 		lock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend);
-		ordered = btrfs_lookup_ordered_range(inode, lockstart,
+		ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), lockstart,
 						     lockend - lockstart + 1);
 		if (!ordered) {
 			break;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 8114bbd359d4b2..723f69bd49f31b 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1966,7 +1966,7 @@ static void btrfs_writepage_fixup_worker(struct btrfs_work *work)
 	if (PagePrivate2(page))
 		goto out;
 
-	ordered = btrfs_lookup_ordered_range(inode, page_start,
+	ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), page_start,
 					PAGE_SIZE);
 	if (ordered) {
 		unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start,
@@ -4838,7 +4838,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
 
 		lock_extent_bits(io_tree, hole_start, block_end - 1,
 				 &cached_state);
-		ordered = btrfs_lookup_ordered_range(inode, hole_start,
+		ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), hole_start,
 						     block_end - hole_start);
 		if (!ordered)
 			break;
@@ -7428,7 +7428,7 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
 		 * doing DIO to, so we need to make sure there's no ordered
 		 * extents in this range.
 		 */
-		ordered = btrfs_lookup_ordered_range(inode, lockstart,
+		ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), lockstart,
 						     lockend - lockstart + 1);
 
 		/*
@@ -8801,7 +8801,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
 		lock_extent_bits(tree, page_start, page_end, &cached_state);
 again:
 	start = page_start;
-	ordered = btrfs_lookup_ordered_range(inode, start,
+	ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), start,
 					page_end - start + 1);
 	if (ordered) {
 		end = min(page_end, ordered->file_offset + ordered->len - 1);
@@ -8967,7 +8967,8 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 	 * we can't set the delalloc bits if there are pending ordered
 	 * extents.  Drop our locks and wait for them to finish
 	 */
-	ordered = btrfs_lookup_ordered_range(inode, page_start, PAGE_SIZE);
+	ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), page_start,
+			PAGE_SIZE);
 	if (ordered) {
 		unlock_extent_cached(io_tree, page_start, page_end,
 				     &cached_state, GFP_NOFS);
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index bc2aba8106293c..9a46878ba60fa9 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -879,15 +879,14 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
 /* Since the DIO code tries to lock a wide area we need to look for any ordered
  * extents that exist in the range, rather than just the start of the range.
  */
-struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode,
-							u64 file_offset,
-							u64 len)
+struct btrfs_ordered_extent *btrfs_lookup_ordered_range(
+		struct btrfs_inode *inode, u64 file_offset, u64 len)
 {
 	struct btrfs_ordered_inode_tree *tree;
 	struct rb_node *node;
 	struct btrfs_ordered_extent *entry = NULL;
 
-	tree = &BTRFS_I(inode)->ordered_tree;
+	tree = &inode->ordered_tree;
 	spin_lock_irq(&tree->lock);
 	node = tree_search(tree, file_offset);
 	if (!node) {
@@ -923,7 +922,7 @@ bool btrfs_have_ordered_extents_in_range(struct inode *inode,
 {
 	struct btrfs_ordered_extent *oe;
 
-	oe = btrfs_lookup_ordered_range(inode, file_offset, len);
+	oe = btrfs_lookup_ordered_range(BTRFS_I(inode), file_offset, len);
 	if (oe) {
 		btrfs_put_ordered_extent(oe);
 		return true;
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index a8cb8efe6fae50..195c93b67fe002 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -189,9 +189,10 @@ void btrfs_start_ordered_extent(struct inode *inode,
 int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len);
 struct btrfs_ordered_extent *
 btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset);
-struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode,
-							u64 file_offset,
-							u64 len);
+struct btrfs_ordered_extent *btrfs_lookup_ordered_range(
+		struct btrfs_inode *inode,
+		u64 file_offset,
+		u64 len);
 bool btrfs_have_ordered_extents_in_range(struct inode *inode,
 					 u64 file_offset,
 					 u64 len);
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index ff9a11c39f5e1d..82d873406aa321 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -4249,7 +4249,7 @@ static int check_extent_to_block(struct inode *inode, u64 start, u64 len,
 	io_tree = &BTRFS_I(inode)->io_tree;
 
 	lock_extent_bits(io_tree, lockstart, lockend, &cached_state);
-	ordered = btrfs_lookup_ordered_range(inode, lockstart, len);
+	ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), lockstart, len);
 	if (ordered) {
 		btrfs_put_ordered_extent(ordered);
 		ret = 1;

From 85b7ab6705d9a2e6173361f5da7cbf8f9eb07864 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <n.borisov.lkml@gmail.com>
Date: Mon, 20 Feb 2017 13:50:50 +0200
Subject: [PATCH 0306/1911] btrfs: Make check_can_nocow take btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/file.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index f3648e9bfa017a..1d09eccec477b8 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1474,11 +1474,11 @@ lock_and_cleanup_extent_if_need(struct inode *inode, struct page **pages,
 	return ret;
 }
 
-static noinline int check_can_nocow(struct inode *inode, loff_t pos,
+static noinline int check_can_nocow(struct btrfs_inode *inode, loff_t pos,
 				    size_t *write_bytes)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-	struct btrfs_root *root = BTRFS_I(inode)->root;
+	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+	struct btrfs_root *root = inode->root;
 	struct btrfs_ordered_extent *ordered;
 	u64 lockstart, lockend;
 	u64 num_bytes;
@@ -1493,19 +1493,20 @@ static noinline int check_can_nocow(struct inode *inode, loff_t pos,
 			   fs_info->sectorsize) - 1;
 
 	while (1) {
-		lock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend);
-		ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), lockstart,
+		lock_extent(&inode->io_tree, lockstart, lockend);
+		ordered = btrfs_lookup_ordered_range(inode, lockstart,
 						     lockend - lockstart + 1);
 		if (!ordered) {
 			break;
 		}
-		unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend);
-		btrfs_start_ordered_extent(inode, ordered, 1);
+		unlock_extent(&inode->io_tree, lockstart, lockend);
+		btrfs_start_ordered_extent(&inode->vfs_inode, ordered, 1);
 		btrfs_put_ordered_extent(ordered);
 	}
 
 	num_bytes = lockend - lockstart + 1;
-	ret = can_nocow_extent(inode, lockstart, &num_bytes, NULL, NULL, NULL);
+	ret = can_nocow_extent(&inode->vfs_inode, lockstart, &num_bytes,
+			NULL, NULL, NULL);
 	if (ret <= 0) {
 		ret = 0;
 		btrfs_end_write_no_snapshoting(root);
@@ -1514,7 +1515,7 @@ static noinline int check_can_nocow(struct inode *inode, loff_t pos,
 				     num_bytes - pos + lockstart);
 	}
 
-	unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend);
+	unlock_extent(&inode->io_tree, lockstart, lockend);
 
 	return ret;
 }
@@ -1579,7 +1580,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
 		if (ret < 0) {
 			if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
 						      BTRFS_INODE_PREALLOC)) &&
-			    check_can_nocow(inode, pos, &write_bytes) > 0) {
+			    check_can_nocow(BTRFS_I(inode), pos,
+					&write_bytes) > 0) {
 				/*
 				 * For nodata cow case, no need to reserve
 				 * data space.

From 2cff578cfceba883eef199c52674a301a8f91d19 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <n.borisov.lkml@gmail.com>
Date: Mon, 20 Feb 2017 13:50:51 +0200
Subject: [PATCH 0307/1911] btrfs: Make lock_and_cleanup_extent_if_need take
 btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/file.c | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 1d09eccec477b8..dff7ec1770c102 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1415,13 +1415,13 @@ static noinline int prepare_pages(struct inode *inode, struct page **pages,
  * the other < 0 number - Something wrong happens
  */
 static noinline int
-lock_and_cleanup_extent_if_need(struct inode *inode, struct page **pages,
+lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages,
 				size_t num_pages, loff_t pos,
 				size_t write_bytes,
 				u64 *lockstart, u64 *lockend,
 				struct extent_state **cached_state)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
 	u64 start_pos;
 	u64 last_pos;
 	int i;
@@ -1432,30 +1432,30 @@ lock_and_cleanup_extent_if_need(struct inode *inode, struct page **pages,
 		+ round_up(pos + write_bytes - start_pos,
 			   fs_info->sectorsize) - 1;
 
-	if (start_pos < inode->i_size) {
+	if (start_pos < inode->vfs_inode.i_size) {
 		struct btrfs_ordered_extent *ordered;
-		lock_extent_bits(&BTRFS_I(inode)->io_tree,
-				 start_pos, last_pos, cached_state);
-		ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), start_pos,
+		lock_extent_bits(&inode->io_tree, start_pos, last_pos,
+				cached_state);
+		ordered = btrfs_lookup_ordered_range(inode, start_pos,
 						     last_pos - start_pos + 1);
 		if (ordered &&
 		    ordered->file_offset + ordered->len > start_pos &&
 		    ordered->file_offset <= last_pos) {
-			unlock_extent_cached(&BTRFS_I(inode)->io_tree,
-					     start_pos, last_pos,
-					     cached_state, GFP_NOFS);
+			unlock_extent_cached(&inode->io_tree, start_pos,
+					last_pos, cached_state, GFP_NOFS);
 			for (i = 0; i < num_pages; i++) {
 				unlock_page(pages[i]);
 				put_page(pages[i]);
 			}
-			btrfs_start_ordered_extent(inode, ordered, 1);
+			btrfs_start_ordered_extent(&inode->vfs_inode,
+					ordered, 1);
 			btrfs_put_ordered_extent(ordered);
 			return -EAGAIN;
 		}
 		if (ordered)
 			btrfs_put_ordered_extent(ordered);
 
-		clear_extent_bit(&BTRFS_I(inode)->io_tree, start_pos,
+		clear_extent_bit(&inode->io_tree, start_pos,
 				  last_pos, EXTENT_DIRTY | EXTENT_DELALLOC |
 				  EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
 				  0, 0, cached_state, GFP_NOFS);
@@ -1626,9 +1626,9 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
 		if (ret)
 			break;
 
-		ret = lock_and_cleanup_extent_if_need(inode, pages, num_pages,
-						pos, write_bytes, &lockstart,
-						&lockend, &cached_state);
+		ret = lock_and_cleanup_extent_if_need(BTRFS_I(inode), pages,
+				num_pages, pos, write_bytes, &lockstart,
+				&lockend, &cached_state);
 		if (ret < 0) {
 			if (ret == -EAGAIN)
 				goto again;

From 4ac1f4acd7c60c95e3efa63d463418093aff9ce5 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 20 Feb 2017 13:50:52 +0200
Subject: [PATCH 0308/1911] btrfs: make free_io_failure take btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 14 +++++++-------
 fs/btrfs/extent_io.h |  4 +++-
 fs/btrfs/inode.c     |  6 +++---
 3 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 996306d322de30..0e73e48a9c4cde 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1959,11 +1959,11 @@ static void check_page_uptodate(struct extent_io_tree *tree, struct page *page)
 		SetPageUptodate(page);
 }
 
-int free_io_failure(struct inode *inode, struct io_failure_record *rec)
+int free_io_failure(struct btrfs_inode *inode, struct io_failure_record *rec)
 {
 	int ret;
 	int err = 0;
-	struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
+	struct extent_io_tree *failure_tree = &inode->io_failure_tree;
 
 	set_state_failrec(failure_tree, rec->start, NULL);
 	ret = clear_extent_bits(failure_tree, rec->start,
@@ -1972,7 +1972,7 @@ int free_io_failure(struct inode *inode, struct io_failure_record *rec)
 	if (ret)
 		err = ret;
 
-	ret = clear_extent_bits(&BTRFS_I(inode)->io_tree, rec->start,
+	ret = clear_extent_bits(&inode->io_tree, rec->start,
 				rec->start + rec->len - 1,
 				EXTENT_DAMAGED);
 	if (ret && !err)
@@ -2140,7 +2140,7 @@ int clean_io_failure(struct inode *inode, u64 start, struct page *page,
 	}
 
 out:
-	free_io_failure(inode, failrec);
+	free_io_failure(BTRFS_I(inode), failrec);
 
 	return 0;
 }
@@ -2393,7 +2393,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
 
 	ret = btrfs_check_repairable(inode, failed_bio, failrec, failed_mirror);
 	if (!ret) {
-		free_io_failure(inode, failrec);
+		free_io_failure(BTRFS_I(inode), failrec);
 		return -EIO;
 	}
 
@@ -2406,7 +2406,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
 				      (int)phy_offset, failed_bio->bi_end_io,
 				      NULL);
 	if (!bio) {
-		free_io_failure(inode, failrec);
+		free_io_failure(BTRFS_I(inode), failrec);
 		return -EIO;
 	}
 	bio_set_op_attrs(bio, REQ_OP_READ, read_mode);
@@ -2418,7 +2418,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
 	ret = tree->ops->submit_bio_hook(inode, bio, failrec->this_mirror,
 					 failrec->bio_flags, 0);
 	if (ret) {
-		free_io_failure(inode, failrec);
+		free_io_failure(BTRFS_I(inode), failrec);
 		bio_put(bio);
 	}
 
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 270d03be290eec..5b4132a9093a49 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -480,6 +480,8 @@ struct io_failure_record {
 	int in_validation;
 };
 
+struct btrfs_inode;
+
 void btrfs_free_io_failure_record(struct inode *inode, u64 start, u64 end);
 int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
 				struct io_failure_record **failrec_ret);
@@ -489,7 +491,7 @@ struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio,
 				    struct io_failure_record *failrec,
 				    struct page *page, int pg_offset, int icsum,
 				    bio_end_io_t *endio_func, void *data);
-int free_io_failure(struct inode *inode, struct io_failure_record *rec);
+int free_io_failure(struct btrfs_inode *inode, struct io_failure_record *rec);
 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
 noinline u64 find_lock_delalloc_range(struct inode *inode,
 				      struct extent_io_tree *tree,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 723f69bd49f31b..f1f9e201f9f327 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7859,7 +7859,7 @@ static int dio_read_error(struct inode *inode, struct bio *failed_bio,
 	ret = btrfs_check_dio_repairable(inode, failed_bio, failrec,
 					 failed_mirror);
 	if (!ret) {
-		free_io_failure(inode, failrec);
+		free_io_failure(BTRFS_I(inode), failrec);
 		return -EIO;
 	}
 
@@ -7873,7 +7873,7 @@ static int dio_read_error(struct inode *inode, struct bio *failed_bio,
 	bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page,
 				pgoff, isector, repair_endio, repair_arg);
 	if (!bio) {
-		free_io_failure(inode, failrec);
+		free_io_failure(BTRFS_I(inode), failrec);
 		return -EIO;
 	}
 	bio_set_op_attrs(bio, REQ_OP_READ, read_mode);
@@ -7884,7 +7884,7 @@ static int dio_read_error(struct inode *inode, struct bio *failed_bio,
 
 	ret = submit_dio_repair_bio(inode, bio, failrec->this_mirror);
 	if (ret) {
-		free_io_failure(inode, failrec);
+		free_io_failure(BTRFS_I(inode), failrec);
 		bio_put(bio);
 	}
 

From 0970a22e58d4c49f2c3e84519613a9db8f00f579 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 20 Feb 2017 13:50:53 +0200
Subject: [PATCH 0309/1911] btrfs: make btrfs_print_data_csum_error take
 btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/btrfs_inode.h | 8 ++++----
 fs/btrfs/compression.c | 5 +++--
 fs/btrfs/inode.c       | 2 +-
 3 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 36eca5464e1b3b..c10d21b2f0df6f 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -324,21 +324,21 @@ static inline void btrfs_inode_resume_unlocked_dio(struct inode *inode)
 		  &BTRFS_I(inode)->runtime_flags);
 }
 
-static inline void btrfs_print_data_csum_error(struct inode *inode,
+static inline void btrfs_print_data_csum_error(struct btrfs_inode *inode,
 		u64 logical_start, u32 csum, u32 csum_expected, int mirror_num)
 {
-	struct btrfs_root *root = BTRFS_I(inode)->root;
+	struct btrfs_root *root = inode->root;
 
 	/* Output minus objectid, which is more meaningful */
 	if (root->objectid >= BTRFS_LAST_FREE_OBJECTID)
 		btrfs_warn_rl(root->fs_info,
 	"csum failed root %lld ino %lld off %llu csum 0x%08x expected csum 0x%08x mirror %d",
-			root->objectid, btrfs_ino(BTRFS_I(inode)),
+			root->objectid, btrfs_ino(inode),
 			logical_start, csum, csum_expected, mirror_num);
 	else
 		btrfs_warn_rl(root->fs_info,
 	"csum failed root %llu ino %llu off %llu csum 0x%08x expected csum 0x%08x mirror %d",
-			root->objectid, btrfs_ino(BTRFS_I(inode)),
+			root->objectid, btrfs_ino(inode),
 			logical_start, csum, csum_expected, mirror_num);
 }
 
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 903c32c9eb2221..9ce85d5e5cae76 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -124,8 +124,9 @@ static int check_compressed_csum(struct inode *inode,
 		kunmap_atomic(kaddr);
 
 		if (csum != *cb_sum) {
-			btrfs_print_data_csum_error(inode, disk_start, csum,
-						    *cb_sum, cb->mirror_num);
+			btrfs_print_data_csum_error(BTRFS_I(inode),
+					disk_start, csum,
+					*cb_sum, cb->mirror_num);
 			ret = -EIO;
 			goto fail;
 		}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index f1f9e201f9f327..fc00117a0dd066 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3032,7 +3032,7 @@ static int __readpage_endio_check(struct inode *inode,
 	kunmap_atomic(kaddr);
 	return 0;
 zeroit:
-	btrfs_print_data_csum_error(inode, start, csum, csum_expected,
+	btrfs_print_data_csum_error(BTRFS_I(inode), start, csum, csum_expected,
 				    io_bio->mirror_num);
 	memset(kaddr + pgoff, 1, len);
 	flush_dcache_page(page);

From f898ac6ae339782bc304b2b15c9e187e438da9f7 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 20 Feb 2017 13:50:54 +0200
Subject: [PATCH 0310/1911] btrfs: make check_compressed_csum take btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/compression.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 9ce85d5e5cae76..f32e86b612404c 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -100,7 +100,7 @@ static struct bio *compressed_bio_alloc(struct block_device *bdev,
 	return btrfs_bio_alloc(bdev, first_byte >> 9, BIO_MAX_PAGES, gfp_flags);
 }
 
-static int check_compressed_csum(struct inode *inode,
+static int check_compressed_csum(struct btrfs_inode *inode,
 				 struct compressed_bio *cb,
 				 u64 disk_start)
 {
@@ -111,7 +111,7 @@ static int check_compressed_csum(struct inode *inode,
 	u32 csum;
 	u32 *cb_sum = &cb->sums;
 
-	if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
+	if (inode->flags & BTRFS_INODE_NODATASUM)
 		return 0;
 
 	for (i = 0; i < cb->nr_pages; i++) {
@@ -124,8 +124,7 @@ static int check_compressed_csum(struct inode *inode,
 		kunmap_atomic(kaddr);
 
 		if (csum != *cb_sum) {
-			btrfs_print_data_csum_error(BTRFS_I(inode),
-					disk_start, csum,
+			btrfs_print_data_csum_error(inode, disk_start, csum,
 					*cb_sum, cb->mirror_num);
 			ret = -EIO;
 			goto fail;
@@ -166,7 +165,7 @@ static void end_compressed_bio_read(struct bio *bio)
 		goto out;
 
 	inode = cb->inode;
-	ret = check_compressed_csum(inode, cb,
+	ret = check_compressed_csum(BTRFS_I(inode), cb,
 				    (u64)bio->bi_iter.bi_sector << 9);
 	if (ret)
 		goto csum_failed;

From 9d4f7f8ad69112137da0bbe4036b94739ae25f78 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 20 Feb 2017 13:50:55 +0200
Subject: [PATCH 0311/1911] btrfs: make repair_io_failure take btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 13 +++++++------
 fs/btrfs/extent_io.h |  8 ++++----
 fs/btrfs/scrub.c     |  2 +-
 3 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 0e73e48a9c4cde..e99e8dad3cc7a4 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1992,10 +1992,11 @@ int free_io_failure(struct btrfs_inode *inode, struct io_failure_record *rec)
  * currently, there can be no more than two copies of every data bit. thus,
  * exactly one rewrite is required.
  */
-int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical,
-		      struct page *page, unsigned int pg_offset, int mirror_num)
+int repair_io_failure(struct btrfs_inode *inode, u64 start, u64 length,
+		u64 logical, struct page *page,
+		unsigned int pg_offset, int mirror_num)
 {
-	struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
+	struct btrfs_fs_info *fs_info = inode->root->fs_info;
 	struct bio *bio;
 	struct btrfs_device *dev;
 	u64 map_length = 0;
@@ -2054,7 +2055,7 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical,
 
 	btrfs_info_rl_in_rcu(fs_info,
 		"read error corrected: ino %llu off %llu (dev %s sector %llu)",
-				  btrfs_ino(BTRFS_I(inode)), start,
+				  btrfs_ino(inode), start,
 				  rcu_str_deref(dev->name), sector);
 	btrfs_bio_counter_dec(fs_info);
 	bio_put(bio);
@@ -2074,7 +2075,7 @@ int repair_eb_io_failure(struct btrfs_fs_info *fs_info,
 	for (i = 0; i < num_pages; i++) {
 		struct page *p = eb->pages[i];
 
-		ret = repair_io_failure(fs_info->btree_inode, start,
+		ret = repair_io_failure(BTRFS_I(fs_info->btree_inode), start,
 					PAGE_SIZE, start, p,
 					start - page_offset(p), mirror_num);
 		if (ret)
@@ -2133,7 +2134,7 @@ int clean_io_failure(struct inode *inode, u64 start, struct page *page,
 		num_copies = btrfs_num_copies(fs_info, failrec->logical,
 					      failrec->len);
 		if (num_copies > 1)  {
-			repair_io_failure(inode, start, failrec->len,
+			repair_io_failure(BTRFS_I(inode), start, failrec->len,
 					  failrec->logical, page,
 					  pg_offset, failrec->failed_mirror);
 		}
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 5b4132a9093a49..039a6daa392bd2 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -451,10 +451,11 @@ struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs);
 struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask);
 
 struct btrfs_fs_info;
+struct btrfs_inode;
 
-int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical,
-		      struct page *page, unsigned int pg_offset,
-		      int mirror_num);
+int repair_io_failure(struct btrfs_inode *inode, u64 start, u64 length,
+		u64 logical, struct page *page,
+		unsigned int pg_offset, int mirror_num);
 int clean_io_failure(struct inode *inode, u64 start, struct page *page,
 		     unsigned int pg_offset);
 void end_extent_writepage(struct page *page, int err, u64 start, u64 end);
@@ -480,7 +481,6 @@ struct io_failure_record {
 	int in_validation;
 };
 
-struct btrfs_inode;
 
 void btrfs_free_io_failure_record(struct inode *inode, u64 start, u64 end);
 int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 82d873406aa321..bdf58b0eaef837 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -731,7 +731,7 @@ static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx)
 			ret = -EIO;
 			goto out;
 		}
-		ret = repair_io_failure(inode, offset, PAGE_SIZE,
+		ret = repair_io_failure(BTRFS_I(inode), offset, PAGE_SIZE,
 					fixup->logical, page,
 					offset - page_offset(page),
 					fixup->mirror_num);

From b30cb441fcf8786773dab590739ca4ebc2b4628b Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 20 Feb 2017 13:50:56 +0200
Subject: [PATCH 0312/1911] btrfs: make clean_io_failure take btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 21 +++++++++++----------
 fs/btrfs/extent_io.h |  4 ++--
 fs/btrfs/inode.c     |  4 ++--
 3 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index e99e8dad3cc7a4..b08fa96678eeca 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2090,23 +2090,23 @@ int repair_eb_io_failure(struct btrfs_fs_info *fs_info,
  * each time an IO finishes, we do a fast check in the IO failure tree
  * to see if we need to process or clean up an io_failure_record
  */
-int clean_io_failure(struct inode *inode, u64 start, struct page *page,
+int clean_io_failure(struct btrfs_inode *inode, u64 start, struct page *page,
 		     unsigned int pg_offset)
 {
 	u64 private;
 	struct io_failure_record *failrec;
-	struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
+	struct btrfs_fs_info *fs_info = inode->root->fs_info;
 	struct extent_state *state;
 	int num_copies;
 	int ret;
 
 	private = 0;
-	ret = count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private,
+	ret = count_range_bits(&inode->io_failure_tree, &private,
 				(u64)-1, 1, EXTENT_DIRTY, 0);
 	if (!ret)
 		return 0;
 
-	ret = get_state_failrec(&BTRFS_I(inode)->io_failure_tree, start,
+	ret = get_state_failrec(&inode->io_failure_tree, start,
 			&failrec);
 	if (ret)
 		return 0;
@@ -2123,25 +2123,25 @@ int clean_io_failure(struct inode *inode, u64 start, struct page *page,
 	if (fs_info->sb->s_flags & MS_RDONLY)
 		goto out;
 
-	spin_lock(&BTRFS_I(inode)->io_tree.lock);
-	state = find_first_extent_bit_state(&BTRFS_I(inode)->io_tree,
+	spin_lock(&inode->io_tree.lock);
+	state = find_first_extent_bit_state(&inode->io_tree,
 					    failrec->start,
 					    EXTENT_LOCKED);
-	spin_unlock(&BTRFS_I(inode)->io_tree.lock);
+	spin_unlock(&inode->io_tree.lock);
 
 	if (state && state->start <= failrec->start &&
 	    state->end >= failrec->start + failrec->len - 1) {
 		num_copies = btrfs_num_copies(fs_info, failrec->logical,
 					      failrec->len);
 		if (num_copies > 1)  {
-			repair_io_failure(BTRFS_I(inode), start, failrec->len,
+			repair_io_failure(inode, start, failrec->len,
 					  failrec->logical, page,
 					  pg_offset, failrec->failed_mirror);
 		}
 	}
 
 out:
-	free_io_failure(BTRFS_I(inode), failrec);
+	free_io_failure(inode, failrec);
 
 	return 0;
 }
@@ -2577,7 +2577,8 @@ static void end_bio_extent_readpage(struct bio *bio)
 			if (ret)
 				uptodate = 0;
 			else
-				clean_io_failure(inode, start, page, 0);
+				clean_io_failure(BTRFS_I(inode), start,
+						page, 0);
 		}
 
 		if (likely(uptodate))
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 039a6daa392bd2..0f67222f446431 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -456,8 +456,8 @@ struct btrfs_inode;
 int repair_io_failure(struct btrfs_inode *inode, u64 start, u64 length,
 		u64 logical, struct page *page,
 		unsigned int pg_offset, int mirror_num);
-int clean_io_failure(struct inode *inode, u64 start, struct page *page,
-		     unsigned int pg_offset);
+int clean_io_failure(struct btrfs_inode *inode, u64 start,
+		struct page *page, unsigned int pg_offset);
 void end_extent_writepage(struct page *page, int err, u64 start, u64 end);
 int repair_eb_io_failure(struct btrfs_fs_info *fs_info,
 			 struct extent_buffer *eb, int mirror_num);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index fc00117a0dd066..4498921bb6085e 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7914,7 +7914,7 @@ static void btrfs_retry_endio_nocsum(struct bio *bio)
 
 	done->uptodate = 1;
 	bio_for_each_segment_all(bvec, bio, i)
-		clean_io_failure(done->inode, done->start, bvec->bv_page, 0);
+	clean_io_failure(BTRFS_I(done->inode), done->start, bvec->bv_page, 0);
 end:
 	complete(&done->done);
 	bio_put(bio);
@@ -8000,7 +8000,7 @@ static void btrfs_retry_endio(struct bio *bio)
 					bvec->bv_page, bvec->bv_offset,
 					done->start, bvec->bv_len);
 		if (!ret)
-			clean_io_failure(done->inode, done->start,
+			clean_io_failure(BTRFS_I(done->inode), done->start,
 					bvec->bv_page, bvec->bv_offset);
 		else
 			uptodate = 0;

From 7ab7956ec3fc77667739d065748d96f87bff6c5d Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 20 Feb 2017 13:50:57 +0200
Subject: [PATCH 0313/1911] btrfs: make btrfs_free_io_failure_record take
 btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 4 ++--
 fs/btrfs/extent_io.h | 3 ++-
 fs/btrfs/inode.c     | 9 +++++----
 3 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index b08fa96678eeca..ded750eb90c476 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2152,9 +2152,9 @@ int clean_io_failure(struct btrfs_inode *inode, u64 start, struct page *page,
  * - under ordered extent
  * - the inode is freeing
  */
-void btrfs_free_io_failure_record(struct inode *inode, u64 start, u64 end)
+void btrfs_free_io_failure_record(struct btrfs_inode *inode, u64 start, u64 end)
 {
-	struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
+	struct extent_io_tree *failure_tree = &inode->io_failure_tree;
 	struct io_failure_record *failrec;
 	struct extent_state *state, *next;
 
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 0f67222f446431..345fc33f843d80 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -482,7 +482,8 @@ struct io_failure_record {
 };
 
 
-void btrfs_free_io_failure_record(struct inode *inode, u64 start, u64 end);
+void btrfs_free_io_failure_record(struct btrfs_inode *inode, u64 start,
+		u64 end);
 int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
 				struct io_failure_record **failrec_ret);
 int btrfs_check_repairable(struct inode *inode, struct bio *failed_bio,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 4498921bb6085e..4805489984eb55 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2803,9 +2803,10 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
 		goto out;
 	}
 
-	btrfs_free_io_failure_record(inode, ordered_extent->file_offset,
-				     ordered_extent->file_offset +
-				     ordered_extent->len - 1);
+	btrfs_free_io_failure_record(BTRFS_I(inode),
+			ordered_extent->file_offset,
+			ordered_extent->file_offset +
+			ordered_extent->len - 1);
 
 	if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags)) {
 		truncated = true;
@@ -5196,7 +5197,7 @@ void btrfs_evict_inode(struct inode *inode)
 	if (!special_file(inode->i_mode))
 		btrfs_wait_ordered_range(inode, 0, (u64)-1);
 
-	btrfs_free_io_failure_record(inode, 0, (u64)-1);
+	btrfs_free_io_failure_record(BTRFS_I(inode), 0, (u64)-1);
 
 	if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) {
 		BUG_ON(test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,

From 3d6ae7bb6a64b7dbc516863eeb99c3182d499aba Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 20 Feb 2017 13:50:58 +0200
Subject: [PATCH 0314/1911] btrfs: make btrfs_orphan_del take btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 40 ++++++++++++++++++++--------------------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 4805489984eb55..ad93974f86a9bc 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3271,20 +3271,20 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
  * item for this particular inode.
  */
 static int btrfs_orphan_del(struct btrfs_trans_handle *trans,
-			    struct inode *inode)
+			    struct btrfs_inode *inode)
 {
-	struct btrfs_root *root = BTRFS_I(inode)->root;
+	struct btrfs_root *root = inode->root;
 	int delete_item = 0;
 	int release_rsv = 0;
 	int ret = 0;
 
 	spin_lock(&root->orphan_lock);
 	if (test_and_clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
-			       &BTRFS_I(inode)->runtime_flags))
+			       &inode->runtime_flags))
 		delete_item = 1;
 
 	if (test_and_clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
-			       &BTRFS_I(inode)->runtime_flags))
+			       &inode->runtime_flags))
 		release_rsv = 1;
 	spin_unlock(&root->orphan_lock);
 
@@ -3292,11 +3292,11 @@ static int btrfs_orphan_del(struct btrfs_trans_handle *trans,
 		atomic_dec(&root->orphan_inodes);
 		if (trans)
 			ret = btrfs_del_orphan_item(trans, root,
-						    btrfs_ino(BTRFS_I(inode)));
+						    btrfs_ino(inode));
 	}
 
 	if (release_rsv)
-		btrfs_orphan_release_metadata(BTRFS_I(inode));
+		btrfs_orphan_release_metadata(inode);
 
 	return ret;
 }
@@ -3467,7 +3467,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
 
 			ret = btrfs_truncate(inode);
 			if (ret)
-				btrfs_orphan_del(NULL, inode);
+				btrfs_orphan_del(NULL, BTRFS_I(inode));
 		} else {
 			nr_unlink++;
 		}
@@ -5012,7 +5012,7 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
 			/* To get a stable disk_i_size */
 			err = btrfs_wait_ordered_range(inode, 0, (u64)-1);
 			if (err) {
-				btrfs_orphan_del(NULL, inode);
+				btrfs_orphan_del(NULL, BTRFS_I(inode));
 				return err;
 			}
 
@@ -5024,11 +5024,11 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
 			 */
 			trans = btrfs_join_transaction(root);
 			if (IS_ERR(trans)) {
-				btrfs_orphan_del(NULL, inode);
+				btrfs_orphan_del(NULL, BTRFS_I(inode));
 				return ret;
 			}
 			i_size_write(inode, BTRFS_I(inode)->disk_i_size);
-			err = btrfs_orphan_del(trans, inode);
+			err = btrfs_orphan_del(trans, BTRFS_I(inode));
 			if (err)
 				btrfs_abort_transaction(trans, err);
 			btrfs_end_transaction(trans);
@@ -5190,7 +5190,7 @@ void btrfs_evict_inode(struct inode *inode)
 		goto no_delete;
 
 	if (is_bad_inode(inode)) {
-		btrfs_orphan_del(NULL, inode);
+		btrfs_orphan_del(NULL, BTRFS_I(inode));
 		goto no_delete;
 	}
 	/* do we really want it for ->i_nlink > 0 and zero btrfs_root_refs? */
@@ -5213,13 +5213,13 @@ void btrfs_evict_inode(struct inode *inode)
 
 	ret = btrfs_commit_inode_delayed_inode(BTRFS_I(inode));
 	if (ret) {
-		btrfs_orphan_del(NULL, inode);
+		btrfs_orphan_del(NULL, BTRFS_I(inode));
 		goto no_delete;
 	}
 
 	rsv = btrfs_alloc_block_rsv(fs_info, BTRFS_BLOCK_RSV_TEMP);
 	if (!rsv) {
-		btrfs_orphan_del(NULL, inode);
+		btrfs_orphan_del(NULL, BTRFS_I(inode));
 		goto no_delete;
 	}
 	rsv->size = min_size;
@@ -5261,14 +5261,14 @@ void btrfs_evict_inode(struct inode *inode)
 			btrfs_warn(fs_info,
 				   "Could not get space for a delete, will truncate on mount %d",
 				   ret);
-			btrfs_orphan_del(NULL, inode);
+			btrfs_orphan_del(NULL, BTRFS_I(inode));
 			btrfs_free_block_rsv(fs_info, rsv);
 			goto no_delete;
 		}
 
 		trans = btrfs_join_transaction(root);
 		if (IS_ERR(trans)) {
-			btrfs_orphan_del(NULL, inode);
+			btrfs_orphan_del(NULL, BTRFS_I(inode));
 			btrfs_free_block_rsv(fs_info, rsv);
 			goto no_delete;
 		}
@@ -5294,7 +5294,7 @@ void btrfs_evict_inode(struct inode *inode)
 		if (ret) {
 			ret = btrfs_commit_transaction(trans);
 			if (ret) {
-				btrfs_orphan_del(NULL, inode);
+				btrfs_orphan_del(NULL, BTRFS_I(inode));
 				btrfs_free_block_rsv(fs_info, rsv);
 				goto no_delete;
 			}
@@ -5323,9 +5323,9 @@ void btrfs_evict_inode(struct inode *inode)
 	 */
 	if (ret == 0) {
 		trans->block_rsv = root->orphan_block_rsv;
-		btrfs_orphan_del(trans, inode);
+		btrfs_orphan_del(trans, BTRFS_I(inode));
 	} else {
-		btrfs_orphan_del(NULL, inode);
+		btrfs_orphan_del(NULL, BTRFS_I(inode));
 	}
 
 	trans->block_rsv = &fs_info->trans_block_rsv;
@@ -6534,7 +6534,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
 			 * If new hard link count is 1, it's a file created
 			 * with open(2) O_TMPFILE flag.
 			 */
-			err = btrfs_orphan_del(trans, inode);
+			err = btrfs_orphan_del(trans, BTRFS_I(inode));
 			if (err)
 				goto fail;
 		}
@@ -9167,7 +9167,7 @@ static int btrfs_truncate(struct inode *inode)
 
 	if (ret == 0 && inode->i_nlink > 0) {
 		trans->block_rsv = root->orphan_block_rsv;
-		ret = btrfs_orphan_del(trans, inode);
+		ret = btrfs_orphan_del(trans, BTRFS_I(inode));
 		if (ret)
 			err = ret;
 	}

From 73f2e545b68f6af033fd2c083ca9dc3079e79083 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 20 Feb 2017 13:50:59 +0200
Subject: [PATCH 0315/1911] btrfs: Make btrfs_orphan_add take btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h       |  3 ++-
 fs/btrfs/extent-tree.c |  2 +-
 fs/btrfs/inode.c       | 39 ++++++++++++++++++++-------------------
 fs/btrfs/relocation.c  |  2 +-
 4 files changed, 24 insertions(+), 22 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 2bfc2e289f514a..af68f8452f196f 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3174,7 +3174,8 @@ int btrfs_update_inode(struct btrfs_trans_handle *trans,
 			      struct inode *inode);
 int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
 				struct btrfs_root *root, struct inode *inode);
-int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode);
+int btrfs_orphan_add(struct btrfs_trans_handle *trans,
+		struct btrfs_inode *inode);
 int btrfs_orphan_cleanup(struct btrfs_root *root);
 void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans,
 			      struct btrfs_root *root);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 7b2313a4441e4b..3853fab2f49f5f 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -10337,7 +10337,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
 	mutex_unlock(&trans->transaction->cache_write_mutex);
 
 	if (!IS_ERR(inode)) {
-		ret = btrfs_orphan_add(trans, inode);
+		ret = btrfs_orphan_add(trans, BTRFS_I(inode));
 		if (ret) {
 			btrfs_add_delayed_iput(inode);
 			goto out;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index ad93974f86a9bc..d5339b8d46637f 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3172,10 +3172,11 @@ void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans,
  * NOTE: caller of this function should reserve 5 units of metadata for
  *	 this function.
  */
-int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
+int btrfs_orphan_add(struct btrfs_trans_handle *trans,
+		struct btrfs_inode *inode)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-	struct btrfs_root *root = BTRFS_I(inode)->root;
+	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+	struct btrfs_root *root = inode->root;
 	struct btrfs_block_rsv *block_rsv = NULL;
 	int reserve = 0;
 	int insert = 0;
@@ -3197,7 +3198,7 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
 	}
 
 	if (!test_and_set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
-			      &BTRFS_I(inode)->runtime_flags)) {
+			      &inode->runtime_flags)) {
 #if 0
 		/*
 		 * For proper ENOSPC handling, we should do orphan
@@ -3214,39 +3215,38 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
 	}
 
 	if (!test_and_set_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
-			      &BTRFS_I(inode)->runtime_flags))
+			      &inode->runtime_flags))
 		reserve = 1;
 	spin_unlock(&root->orphan_lock);
 
 	/* grab metadata reservation from transaction handle */
 	if (reserve) {
-		ret = btrfs_orphan_reserve_metadata(trans, BTRFS_I(inode));
+		ret = btrfs_orphan_reserve_metadata(trans, inode);
 		ASSERT(!ret);
 		if (ret) {
 			atomic_dec(&root->orphan_inodes);
 			clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
-				  &BTRFS_I(inode)->runtime_flags);
+				  &inode->runtime_flags);
 			if (insert)
 				clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
-					  &BTRFS_I(inode)->runtime_flags);
+					  &inode->runtime_flags);
 			return ret;
 		}
 	}
 
 	/* insert an orphan item to track this unlinked/truncated file */
 	if (insert >= 1) {
-		ret = btrfs_insert_orphan_item(trans, root,
-				btrfs_ino(BTRFS_I(inode)));
+		ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode));
 		if (ret) {
 			atomic_dec(&root->orphan_inodes);
 			if (reserve) {
 				clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
-					  &BTRFS_I(inode)->runtime_flags);
-				btrfs_orphan_release_metadata(BTRFS_I(inode));
+					  &inode->runtime_flags);
+				btrfs_orphan_release_metadata(inode);
 			}
 			if (ret != -EEXIST) {
 				clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
-					  &BTRFS_I(inode)->runtime_flags);
+					  &inode->runtime_flags);
 				btrfs_abort_transaction(trans, ret);
 				return ret;
 			}
@@ -3458,7 +3458,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
 				ret = PTR_ERR(trans);
 				goto out;
 			}
-			ret = btrfs_orphan_add(trans, inode);
+			ret = btrfs_orphan_add(trans, BTRFS_I(inode));
 			btrfs_end_transaction(trans);
 			if (ret) {
 				iput(inode);
@@ -4060,7 +4060,7 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
 		goto out;
 
 	if (inode->i_nlink == 0) {
-		ret = btrfs_orphan_add(trans, inode);
+		ret = btrfs_orphan_add(trans, BTRFS_I(inode));
 		if (ret)
 			goto out;
 	}
@@ -4177,7 +4177,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
 		goto out;
 	}
 
-	err = btrfs_orphan_add(trans, inode);
+	err = btrfs_orphan_add(trans, BTRFS_I(inode));
 	if (err)
 		goto out;
 
@@ -4992,7 +4992,7 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
 		 * so we need to guarantee from this point on that everything
 		 * will be consistent.
 		 */
-		ret = btrfs_orphan_add(trans, inode);
+		ret = btrfs_orphan_add(trans, BTRFS_I(inode));
 		btrfs_end_transaction(trans);
 		if (ret)
 			return ret;
@@ -9865,7 +9865,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 						 new_dentry->d_name.len);
 		}
 		if (!ret && new_inode->i_nlink == 0)
-			ret = btrfs_orphan_add(trans, d_inode(new_dentry));
+			ret = btrfs_orphan_add(trans,
+					BTRFS_I(d_inode(new_dentry)));
 		if (ret) {
 			btrfs_abort_transaction(trans, ret);
 			goto out_fail;
@@ -10482,7 +10483,7 @@ static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
 	ret = btrfs_update_inode(trans, root, inode);
 	if (ret)
 		goto out_inode;
-	ret = btrfs_orphan_add(trans, inode);
+	ret = btrfs_orphan_add(trans, BTRFS_I(inode));
 	if (ret)
 		goto out_inode;
 
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index e48625413fcb90..d60df51959f7ab 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -4246,7 +4246,7 @@ struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
 	BUG_ON(IS_ERR(inode) || is_bad_inode(inode));
 	BTRFS_I(inode)->index_cnt = group->key.objectid;
 
-	err = btrfs_orphan_add(trans, inode);
+	err = btrfs_orphan_add(trans, BTRFS_I(inode));
 out:
 	btrfs_end_transaction(trans);
 	btrfs_btree_balance_dirty(fs_info);

From aefa6115c04ee561302e133ab5916246cd3695f7 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 20 Feb 2017 13:51:00 +0200
Subject: [PATCH 0316/1911] btrfs: Make check_parent_dirs_for_sync take
 btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/tree-log.c | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 071f6494471194..399561b039c3ab 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -5045,14 +5045,14 @@ static bool btrfs_must_commit_transaction(struct btrfs_trans_handle *trans,
  * a full commit is required.
  */
 static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
-					       struct inode *inode,
+					       struct btrfs_inode *inode,
 					       struct dentry *parent,
 					       struct super_block *sb,
 					       u64 last_committed)
 {
 	int ret = 0;
 	struct dentry *old_parent = NULL;
-	struct inode *orig_inode = inode;
+	struct btrfs_inode *orig_inode = inode;
 
 	/*
 	 * for regular files, if its inode is already on disk, we don't
@@ -5060,15 +5060,15 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
 	 * we can use the last_unlink_trans field to record renames
 	 * and other fun in this file.
 	 */
-	if (S_ISREG(inode->i_mode) &&
-	    BTRFS_I(inode)->generation <= last_committed &&
-	    BTRFS_I(inode)->last_unlink_trans <= last_committed)
-			goto out;
+	if (S_ISREG(inode->vfs_inode.i_mode) &&
+	    inode->generation <= last_committed &&
+	    inode->last_unlink_trans <= last_committed)
+		goto out;
 
-	if (!S_ISDIR(inode->i_mode)) {
+	if (!S_ISDIR(inode->vfs_inode.i_mode)) {
 		if (!parent || d_really_is_negative(parent) || sb != parent->d_sb)
 			goto out;
-		inode = d_inode(parent);
+		inode = BTRFS_I(d_inode(parent));
 	}
 
 	while (1) {
@@ -5079,10 +5079,10 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
 		 * think this inode has already been logged.
 		 */
 		if (inode != orig_inode)
-			BTRFS_I(inode)->logged_trans = trans->transid;
+			inode->logged_trans = trans->transid;
 		smp_mb();
 
-		if (btrfs_must_commit_transaction(trans, BTRFS_I(inode))) {
+		if (btrfs_must_commit_transaction(trans, inode)) {
 			ret = 1;
 			break;
 		}
@@ -5091,8 +5091,8 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
 			break;
 
 		if (IS_ROOT(parent)) {
-			inode = d_inode(parent);
-			if (btrfs_must_commit_transaction(trans, BTRFS_I(inode)))
+			inode = BTRFS_I(d_inode(parent));
+			if (btrfs_must_commit_transaction(trans, inode))
 				ret = 1;
 			break;
 		}
@@ -5100,7 +5100,7 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
 		parent = dget_parent(parent);
 		dput(old_parent);
 		old_parent = parent;
-		inode = d_inode(parent);
+		inode = BTRFS_I(d_inode(parent));
 
 	}
 	dput(old_parent);
@@ -5429,7 +5429,7 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
 		goto end_no_trans;
 	}
 
-	ret = check_parent_dirs_for_sync(trans, inode, parent,
+	ret = check_parent_dirs_for_sync(trans, BTRFS_I(inode), parent,
 					 sb, last_committed);
 	if (ret)
 		goto end_no_trans;

From 19df27a9e47085dcc81cbf9e7b464e7402511e18 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 20 Feb 2017 13:51:01 +0200
Subject: [PATCH 0317/1911] btrfs: make btrfs_log_inode_parent take btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/tree-log.c | 50 ++++++++++++++++++++++-----------------------
 1 file changed, 24 insertions(+), 26 deletions(-)

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 399561b039c3ab..f8965b0812124f 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -5390,7 +5390,8 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans,
  * the last committed transaction
  */
 static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
-			    	  struct btrfs_root *root, struct inode *inode,
+				  struct btrfs_root *root,
+				  struct btrfs_inode *inode,
 				  struct dentry *parent,
 				  const loff_t start,
 				  const loff_t end,
@@ -5404,9 +5405,9 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
 	int ret = 0;
 	u64 last_committed = fs_info->last_trans_committed;
 	bool log_dentries = false;
-	struct inode *orig_inode = inode;
+	struct btrfs_inode *orig_inode = inode;
 
-	sb = inode->i_sb;
+	sb = inode->vfs_inode.i_sb;
 
 	if (btrfs_test_opt(fs_info, NOTREELOG)) {
 		ret = 1;
@@ -5423,18 +5424,17 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
 		goto end_no_trans;
 	}
 
-	if (root != BTRFS_I(inode)->root ||
-	    btrfs_root_refs(&root->root_item) == 0) {
+	if (root != inode->root || btrfs_root_refs(&root->root_item) == 0) {
 		ret = 1;
 		goto end_no_trans;
 	}
 
-	ret = check_parent_dirs_for_sync(trans, BTRFS_I(inode), parent,
-					 sb, last_committed);
+	ret = check_parent_dirs_for_sync(trans, inode, parent, sb,
+			last_committed);
 	if (ret)
 		goto end_no_trans;
 
-	if (btrfs_inode_in_log(BTRFS_I(inode), trans->transid)) {
+	if (btrfs_inode_in_log(inode, trans->transid)) {
 		ret = BTRFS_NO_LOG_SYNC;
 		goto end_no_trans;
 	}
@@ -5443,8 +5443,7 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
 	if (ret)
 		goto end_no_trans;
 
-	ret = btrfs_log_inode(trans, root, BTRFS_I(inode), inode_only,
-			start, end, ctx);
+	ret = btrfs_log_inode(trans, root, inode, inode_only, start, end, ctx);
 	if (ret)
 		goto end_trans;
 
@@ -5454,14 +5453,14 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
 	 * we can use the last_unlink_trans field to record renames
 	 * and other fun in this file.
 	 */
-	if (S_ISREG(inode->i_mode) &&
-	    BTRFS_I(inode)->generation <= last_committed &&
-	    BTRFS_I(inode)->last_unlink_trans <= last_committed) {
+	if (S_ISREG(inode->vfs_inode.i_mode) &&
+	    inode->generation <= last_committed &&
+	    inode->last_unlink_trans <= last_committed) {
 		ret = 0;
 		goto end_trans;
 	}
 
-	if (S_ISDIR(inode->i_mode) && ctx && ctx->log_new_dentries)
+	if (S_ISDIR(inode->vfs_inode.i_mode) && ctx && ctx->log_new_dentries)
 		log_dentries = true;
 
 	/*
@@ -5505,8 +5504,8 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
 	 * but the file inode does not have a matching BTRFS_INODE_REF_KEY item
 	 * and has a link count of 2.
 	 */
-	if (BTRFS_I(inode)->last_unlink_trans > last_committed) {
-		ret = btrfs_log_all_parents(trans, BTRFS_I(orig_inode), ctx);
+	if (inode->last_unlink_trans > last_committed) {
+		ret = btrfs_log_all_parents(trans, orig_inode, ctx);
 		if (ret)
 			goto end_trans;
 	}
@@ -5515,14 +5514,13 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
 		if (!parent || d_really_is_negative(parent) || sb != parent->d_sb)
 			break;
 
-		inode = d_inode(parent);
-		if (root != BTRFS_I(inode)->root)
+		inode = BTRFS_I(d_inode(parent));
+		if (root != inode->root)
 			break;
 
-		if (BTRFS_I(inode)->generation > last_committed) {
-			ret = btrfs_log_inode(trans, root, BTRFS_I(inode),
-					      LOG_INODE_EXISTS,
-					      0, LLONG_MAX, ctx);
+		if (inode->generation > last_committed) {
+			ret = btrfs_log_inode(trans, root, inode,
+					LOG_INODE_EXISTS, 0, LLONG_MAX, ctx);
 			if (ret)
 				goto end_trans;
 		}
@@ -5534,7 +5532,7 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
 		old_parent = parent;
 	}
 	if (log_dentries)
-		ret = log_new_dir_dentries(trans, root, BTRFS_I(orig_inode), ctx);
+		ret = log_new_dir_dentries(trans, root, orig_inode, ctx);
 	else
 		ret = 0;
 end_trans:
@@ -5566,8 +5564,8 @@ int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
 	struct dentry *parent = dget_parent(dentry);
 	int ret;
 
-	ret = btrfs_log_inode_parent(trans, root, d_inode(dentry), parent,
-				     start, end, 0, ctx);
+	ret = btrfs_log_inode_parent(trans, root, BTRFS_I(d_inode(dentry)),
+			parent, start, end, 0, ctx);
 	dput(parent);
 
 	return ret;
@@ -5829,7 +5827,7 @@ int btrfs_log_new_name(struct btrfs_trans_handle *trans,
 	    (!old_dir || old_dir->logged_trans <= fs_info->last_trans_committed))
 		return 0;
 
-	return btrfs_log_inode_parent(trans, root, &inode->vfs_inode, parent, 0,
+	return btrfs_log_inode_parent(trans, root, inode, parent, 0,
 				      LLONG_MAX, 1, NULL);
 }
 

From 9cdc51241090a36d3b7b4ff374fb18b764b3b3a4 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 20 Feb 2017 13:51:02 +0200
Subject: [PATCH 0318/1911] btrfs: Make btrfs_extent_item_to_extent_map take
 btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h     |  2 +-
 fs/btrfs/file-item.c | 10 +++++-----
 fs/btrfs/inode.c     |  3 ++-
 fs/btrfs/ioctl.c     |  3 ++-
 4 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index af68f8452f196f..2e0845eafcbc19 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3081,7 +3081,7 @@ int btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
 		       u64 file_start, int contig);
 int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
 			     struct list_head *list, int search_commit);
-void btrfs_extent_item_to_extent_map(struct inode *inode,
+void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
 				     const struct btrfs_path *path,
 				     struct btrfs_file_extent_item *fi,
 				     const bool new_inline,
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index a8a0dd21708489..54ec6d6ef0163f 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -904,14 +904,14 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
 	goto out;
 }
 
-void btrfs_extent_item_to_extent_map(struct inode *inode,
+void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
 				     const struct btrfs_path *path,
 				     struct btrfs_file_extent_item *fi,
 				     const bool new_inline,
 				     struct extent_map *em)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-	struct btrfs_root *root = BTRFS_I(inode)->root;
+	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+	struct btrfs_root *root = inode->root;
 	struct extent_buffer *leaf = path->nodes[0];
 	const int slot = path->slots[0];
 	struct btrfs_key key;
@@ -976,8 +976,8 @@ void btrfs_extent_item_to_extent_map(struct inode *inode,
 		}
 	} else {
 		btrfs_err(fs_info,
-			  "unknown file extent item type %d, inode %llu, offset %llu, root %llu",
-			  type, btrfs_ino(BTRFS_I(inode)), extent_start,
+			  "unknown file extent item type %d, inode %llu, offset %llu, "
+			  "root %llu", type, btrfs_ino(inode), extent_start,
 			  root->root_key.objectid);
 	}
 }
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index d5339b8d46637f..0b46542010fb15 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6862,7 +6862,8 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
 		goto not_found_em;
 	}
 
-	btrfs_extent_item_to_extent_map(inode, path, item, new_inline, em);
+	btrfs_extent_item_to_extent_map(BTRFS_I(inode), path, item,
+			new_inline, em);
 
 	if (found_type == BTRFS_FILE_EXTENT_REG ||
 	    found_type == BTRFS_FILE_EXTENT_PREALLOC) {
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index bc2e03a2569e77..bb4e6ec26b7f1d 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3333,7 +3333,8 @@ static void clone_update_extent_map(struct inode *inode,
 
 		fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
 				    struct btrfs_file_extent_item);
-		btrfs_extent_item_to_extent_map(inode, path, fi, false, em);
+		btrfs_extent_item_to_extent_map(BTRFS_I(inode), path, fi,
+				false, em);
 		em->generation = -1;
 		if (btrfs_file_extent_type(path->nodes[0], fi) ==
 		    BTRFS_FILE_EXTENT_INLINE)

From 6fc0ef687029760476e309aa85d437a47313eb08 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 20 Feb 2017 13:51:03 +0200
Subject: [PATCH 0319/1911] btrfs: Make btrfs_clear_bit_hook take btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c |  3 ++-
 fs/btrfs/extent_io.h |  6 ++++--
 fs/btrfs/inode.c     | 37 +++++++++++++++++++------------------
 3 files changed, 25 insertions(+), 21 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index ded750eb90c476..d2108296256567 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -428,7 +428,8 @@ static void clear_state_cb(struct extent_io_tree *tree,
 			   struct extent_state *state, unsigned *bits)
 {
 	if (tree->ops && tree->ops->clear_bit_hook)
-		tree->ops->clear_bit_hook(tree->mapping->host, state, bits);
+		tree->ops->clear_bit_hook(BTRFS_I(tree->mapping->host),
+				state, bits);
 }
 
 static void set_state_bits(struct extent_io_tree *tree,
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 345fc33f843d80..cd8b3dd6948df1 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -84,6 +84,7 @@ extern void le_bitmap_clear(u8 *map, unsigned int start, int len);
 
 struct extent_state;
 struct btrfs_root;
+struct btrfs_inode;
 struct btrfs_io_bio;
 struct io_failure_record;
 
@@ -107,8 +108,9 @@ struct extent_io_ops {
 				      struct extent_state *state, int uptodate);
 	void (*set_bit_hook)(struct inode *inode, struct extent_state *state,
 			     unsigned *bits);
-	void (*clear_bit_hook)(struct inode *inode, struct extent_state *state,
-			       unsigned *bits);
+	void (*clear_bit_hook)(struct btrfs_inode *inode,
+			struct extent_state *state,
+			unsigned *bits);
 	void (*merge_extent_hook)(struct inode *inode,
 				  struct extent_state *new,
 				  struct extent_state *other);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 0b46542010fb15..0600d55bb173b3 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1703,18 +1703,18 @@ static void btrfs_set_bit_hook(struct inode *inode,
 /*
  * extent_io.c clear_bit_hook, see set_bit_hook for why
  */
-static void btrfs_clear_bit_hook(struct inode *inode,
+static void btrfs_clear_bit_hook(struct btrfs_inode *inode,
 				 struct extent_state *state,
 				 unsigned *bits)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
 	u64 len = state->end + 1 - state->start;
 	u32 num_extents = count_max_extents(len);
 
-	spin_lock(&BTRFS_I(inode)->lock);
+	spin_lock(&inode->lock);
 	if ((state->state & EXTENT_DEFRAG) && (*bits & EXTENT_DEFRAG))
-		BTRFS_I(inode)->defrag_bytes -= len;
-	spin_unlock(&BTRFS_I(inode)->lock);
+		inode->defrag_bytes -= len;
+	spin_unlock(&inode->lock);
 
 	/*
 	 * set_bit and clear bit hooks normally require _irqsave/restore
@@ -1722,15 +1722,15 @@ static void btrfs_clear_bit_hook(struct inode *inode,
 	 * bit, which is only set or cleared with irqs on
 	 */
 	if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
-		struct btrfs_root *root = BTRFS_I(inode)->root;
-		bool do_list = !btrfs_is_free_space_inode(BTRFS_I(inode));
+		struct btrfs_root *root = inode->root;
+		bool do_list = !btrfs_is_free_space_inode(inode);
 
 		if (*bits & EXTENT_FIRST_DELALLOC) {
 			*bits &= ~EXTENT_FIRST_DELALLOC;
 		} else if (!(*bits & EXTENT_DO_ACCOUNTING)) {
-			spin_lock(&BTRFS_I(inode)->lock);
-			BTRFS_I(inode)->outstanding_extents -= num_extents;
-			spin_unlock(&BTRFS_I(inode)->lock);
+			spin_lock(&inode->lock);
+			inode->outstanding_extents -= num_extents;
+			spin_unlock(&inode->lock);
 		}
 
 		/*
@@ -1740,7 +1740,7 @@ static void btrfs_clear_bit_hook(struct inode *inode,
 		 */
 		if (*bits & EXTENT_DO_ACCOUNTING &&
 		    root != fs_info->tree_root)
-			btrfs_delalloc_release_metadata(BTRFS_I(inode), len);
+			btrfs_delalloc_release_metadata(inode, len);
 
 		/* For sanity tests. */
 		if (btrfs_is_testing(fs_info))
@@ -1750,18 +1750,19 @@ static void btrfs_clear_bit_hook(struct inode *inode,
 		    && do_list && !(state->state & EXTENT_NORESERVE)
 		    && (*bits & (EXTENT_DO_ACCOUNTING |
 		    EXTENT_CLEAR_DATA_RESV)))
-			btrfs_free_reserved_data_space_noquota(inode,
+			btrfs_free_reserved_data_space_noquota(
+					&inode->vfs_inode,
 					state->start, len);
 
 		__percpu_counter_add(&fs_info->delalloc_bytes, -len,
 				     fs_info->delalloc_batch);
-		spin_lock(&BTRFS_I(inode)->lock);
-		BTRFS_I(inode)->delalloc_bytes -= len;
-		if (do_list && BTRFS_I(inode)->delalloc_bytes == 0 &&
+		spin_lock(&inode->lock);
+		inode->delalloc_bytes -= len;
+		if (do_list && inode->delalloc_bytes == 0 &&
 		    test_bit(BTRFS_INODE_IN_DELALLOC_LIST,
-			     &BTRFS_I(inode)->runtime_flags))
-			btrfs_del_delalloc_inode(root, inode);
-		spin_unlock(&BTRFS_I(inode)->lock);
+				&inode->runtime_flags))
+			btrfs_del_delalloc_inode(root, &inode->vfs_inode);
+		spin_unlock(&inode->lock);
 	}
 }
 

From a2f392e4015a45c896528d5fdfcec4bad3ddd121 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 20 Feb 2017 13:51:04 +0200
Subject: [PATCH 0320/1911] btrfs: Make clone_update_extent_map take
 btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ioctl.c | 27 +++++++++++++--------------
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index bb4e6ec26b7f1d..12ae210f87194f 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3311,20 +3311,19 @@ static int clone_finish_inode_update(struct btrfs_trans_handle *trans,
 	return ret;
 }
 
-static void clone_update_extent_map(struct inode *inode,
+static void clone_update_extent_map(struct btrfs_inode *inode,
 				    const struct btrfs_trans_handle *trans,
 				    const struct btrfs_path *path,
 				    const u64 hole_offset,
 				    const u64 hole_len)
 {
-	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+	struct extent_map_tree *em_tree = &inode->extent_tree;
 	struct extent_map *em;
 	int ret;
 
 	em = alloc_extent_map();
 	if (!em) {
-		set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
-			&BTRFS_I(inode)->runtime_flags);
+		set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags);
 		return;
 	}
 
@@ -3333,13 +3332,12 @@ static void clone_update_extent_map(struct inode *inode,
 
 		fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
 				    struct btrfs_file_extent_item);
-		btrfs_extent_item_to_extent_map(BTRFS_I(inode), path, fi,
-				false, em);
+		btrfs_extent_item_to_extent_map(inode, path, fi, false, em);
 		em->generation = -1;
 		if (btrfs_file_extent_type(path->nodes[0], fi) ==
 		    BTRFS_FILE_EXTENT_INLINE)
 			set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
-				&BTRFS_I(inode)->runtime_flags);
+					&inode->runtime_flags);
 	} else {
 		em->start = hole_offset;
 		em->len = hole_len;
@@ -3360,13 +3358,12 @@ static void clone_update_extent_map(struct inode *inode,
 			free_extent_map(em);
 			break;
 		}
-		btrfs_drop_extent_cache(BTRFS_I(inode), em->start,
+		btrfs_drop_extent_cache(inode, em->start,
 					em->start + em->len - 1, 0);
 	}
 
 	if (ret)
-		set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
-			&BTRFS_I(inode)->runtime_flags);
+		set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags);
 }
 
 /*
@@ -3792,11 +3789,12 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
 
 			/* If we have an implicit hole (NO_HOLES feature). */
 			if (drop_start < new_key.offset)
-				clone_update_extent_map(inode, trans,
+				clone_update_extent_map(BTRFS_I(inode), trans,
 						NULL, drop_start,
 						new_key.offset - drop_start);
 
-			clone_update_extent_map(inode, trans, path, 0, 0);
+			clone_update_extent_map(BTRFS_I(inode), trans,
+					path, 0, 0);
 
 			btrfs_mark_buffer_dirty(leaf);
 			btrfs_release_path(path);
@@ -3846,8 +3844,9 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
 			btrfs_end_transaction(trans);
 			goto out;
 		}
-		clone_update_extent_map(inode, trans, NULL, last_dest_end,
-					destoff + len - last_dest_end);
+		clone_update_extent_map(BTRFS_I(inode), trans, NULL,
+				last_dest_end,
+				destoff + len - last_dest_end);
 		ret = clone_finish_inode_update(trans, inode, destoff + len,
 						destoff, olen, no_time_update);
 	}

From 1c8c9c5216295711c79d0e512dc8b3d5f1bfc35d Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 20 Feb 2017 13:51:05 +0200
Subject: [PATCH 0321/1911] btrfs: Make check_extent_to_block take btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/scrub.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index bdf58b0eaef837..b0251eb1239fce 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -4236,7 +4236,7 @@ static void copy_nocow_pages_worker(struct btrfs_work *work)
 	scrub_pending_trans_workers_dec(sctx);
 }
 
-static int check_extent_to_block(struct inode *inode, u64 start, u64 len,
+static int check_extent_to_block(struct btrfs_inode *inode, u64 start, u64 len,
 				 u64 logical)
 {
 	struct extent_state *cached_state = NULL;
@@ -4246,10 +4246,10 @@ static int check_extent_to_block(struct inode *inode, u64 start, u64 len,
 	u64 lockstart = start, lockend = start + len - 1;
 	int ret = 0;
 
-	io_tree = &BTRFS_I(inode)->io_tree;
+	io_tree = &inode->io_tree;
 
 	lock_extent_bits(io_tree, lockstart, lockend, &cached_state);
-	ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), lockstart, len);
+	ordered = btrfs_lookup_ordered_range(inode, lockstart, len);
 	if (ordered) {
 		btrfs_put_ordered_extent(ordered);
 		ret = 1;
@@ -4325,7 +4325,8 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
 	io_tree = &BTRFS_I(inode)->io_tree;
 	nocow_ctx_logical = nocow_ctx->logical;
 
-	ret = check_extent_to_block(inode, offset, len, nocow_ctx_logical);
+	ret = check_extent_to_block(BTRFS_I(inode), offset, len,
+			nocow_ctx_logical);
 	if (ret) {
 		ret = ret > 0 ? 0 : ret;
 		goto out;
@@ -4372,7 +4373,7 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
 			}
 		}
 
-		ret = check_extent_to_block(inode, offset, len,
+		ret = check_extent_to_block(BTRFS_I(inode), offset, len,
 					    nocow_ctx_logical);
 		if (ret) {
 			ret = ret > 0 ? 0 : ret;

From fc4f21b1d8d023cf0a2b1b050ae18e15dbe7068e Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 20 Feb 2017 13:51:06 +0200
Subject: [PATCH 0322/1911] btrfs: Make get_extent_t take btrfs_inode

In addition to changing the signature, this patch also switches
all the functions which are used as an argument to also take btrfs_inode.
Namely those are: btrfs_get_extent and btrfs_get_extent_filemap.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h             | 12 +++++-----
 fs/btrfs/disk-io.c           |  6 ++---
 fs/btrfs/extent_io.c         |  6 ++---
 fs/btrfs/extent_io.h         |  2 +-
 fs/btrfs/file.c              |  7 +++---
 fs/btrfs/inode.c             | 34 +++++++++++++++-------------
 fs/btrfs/ioctl.c             |  2 +-
 fs/btrfs/tests/inode-tests.c | 44 +++++++++++++++++++-----------------
 8 files changed, 59 insertions(+), 54 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 2e0845eafcbc19..809736ec549b43 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3100,9 +3100,9 @@ struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode,
 						    int delay_iput);
 void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work);
 
-struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page,
-					   size_t pg_offset, u64 start, u64 len,
-					   int create);
+struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode,
+		struct page *page, size_t pg_offset, u64 start,
+		u64 len, int create);
 noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
 			      u64 *orig_start, u64 *orig_block_len,
 			      u64 *ram_bytes);
@@ -3166,9 +3166,9 @@ void btrfs_destroy_cachep(void);
 long btrfs_ioctl_trans_end(struct file *file);
 struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
 			 struct btrfs_root *root, int *was_new);
-struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
-				    size_t pg_offset, u64 start, u64 end,
-				    int create);
+struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
+		struct page *page, size_t pg_offset,
+		u64 start, u64 end, int create);
 int btrfs_update_inode(struct btrfs_trans_handle *trans,
 			      struct btrfs_root *root,
 			      struct inode *inode);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 2b06f557c176ea..a53ff3bff8eb23 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -219,12 +219,12 @@ void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb,
  * extents on the btree inode are pretty simple, there's one extent
  * that covers the entire device
  */
-static struct extent_map *btree_get_extent(struct inode *inode,
+static struct extent_map *btree_get_extent(struct btrfs_inode *inode,
 		struct page *page, size_t pg_offset, u64 start, u64 len,
 		int create)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+	struct extent_map_tree *em_tree = &inode->extent_tree;
 	struct extent_map *em;
 	int ret;
 
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index d2108296256567..c3abf846a4495f 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2860,7 +2860,7 @@ __get_extent_map(struct inode *inode, struct page *page, size_t pg_offset,
 		*em_cached = NULL;
 	}
 
-	em = get_extent(inode, page, pg_offset, start, len, 0);
+	em = get_extent(BTRFS_I(inode), page, pg_offset, start, len, 0);
 	if (em_cached && !IS_ERR_OR_NULL(em)) {
 		BUG_ON(*em_cached);
 		atomic_inc(&em->refs);
@@ -3373,7 +3373,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
 							 page_end, NULL, 1);
 			break;
 		}
-		em = epd->get_extent(inode, page, pg_offset, cur,
+		em = epd->get_extent(BTRFS_I(inode), page, pg_offset, cur,
 				     end - cur + 1, 1);
 		if (IS_ERR_OR_NULL(em)) {
 			SetPageError(page);
@@ -4338,7 +4338,7 @@ static struct extent_map *get_extent_skip_holes(struct inode *inode,
 		if (len == 0)
 			break;
 		len = ALIGN(len, sectorsize);
-		em = get_extent(inode, NULL, 0, offset, len, 0);
+		em = get_extent(BTRFS_I(inode), NULL, 0, offset, len, 0);
 		if (IS_ERR_OR_NULL(em))
 			return em;
 
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index cd8b3dd6948df1..c16260c6c14f16 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -211,7 +211,7 @@ static inline int extent_compress_type(unsigned long bio_flags)
 
 struct extent_map_tree;
 
-typedef struct extent_map *(get_extent_t)(struct inode *inode,
+typedef struct extent_map *(get_extent_t)(struct btrfs_inode *inode,
 					  struct page *page,
 					  size_t pg_offset,
 					  u64 start, u64 len,
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index dff7ec1770c102..48dfb8e4baf202 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2341,7 +2341,7 @@ static int find_first_non_hole(struct inode *inode, u64 *start, u64 *len)
 	struct extent_map *em;
 	int ret = 0;
 
-	em = btrfs_get_extent(inode, NULL, 0, *start, *len, 0);
+	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, *start, *len, 0);
 	if (IS_ERR_OR_NULL(em)) {
 		if (!em)
 			ret = -ENOMEM;
@@ -2833,7 +2833,7 @@ static long btrfs_fallocate(struct file *file, int mode,
 	/* First, check if we exceed the qgroup limit */
 	INIT_LIST_HEAD(&reserve_list);
 	while (1) {
-		em = btrfs_get_extent(inode, NULL, 0, cur_offset,
+		em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, cur_offset,
 				      alloc_end - cur_offset, 0);
 		if (IS_ERR_OR_NULL(em)) {
 			if (!em)
@@ -2960,7 +2960,8 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int whence)
 			 &cached_state);
 
 	while (start < inode->i_size) {
-		em = btrfs_get_extent_fiemap(inode, NULL, 0, start, len, 0);
+		em = btrfs_get_extent_fiemap(BTRFS_I(inode), NULL, 0,
+				start, len, 0);
 		if (IS_ERR(em)) {
 			ret = PTR_ERR(em);
 			em = NULL;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 0600d55bb173b3..7e12c727f791d3 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4852,7 +4852,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
 
 	cur_offset = hole_start;
 	while (1) {
-		em = btrfs_get_extent(inode, NULL, 0, cur_offset,
+		em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, cur_offset,
 				block_end - cur_offset, 0);
 		if (IS_ERR(em)) {
 			err = PTR_ERR(em);
@@ -6732,25 +6732,26 @@ static noinline int uncompress_inline(struct btrfs_path *path,
  * This also copies inline extents directly into the page.
  */
 
-struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
-				    size_t pg_offset, u64 start, u64 len,
-				    int create)
+struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
+		struct page *page,
+	    size_t pg_offset, u64 start, u64 len,
+		int create)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
 	int ret;
 	int err = 0;
 	u64 extent_start = 0;
 	u64 extent_end = 0;
-	u64 objectid = btrfs_ino(BTRFS_I(inode));
+	u64 objectid = btrfs_ino(inode);
 	u32 found_type;
 	struct btrfs_path *path = NULL;
-	struct btrfs_root *root = BTRFS_I(inode)->root;
+	struct btrfs_root *root = inode->root;
 	struct btrfs_file_extent_item *item;
 	struct extent_buffer *leaf;
 	struct btrfs_key found_key;
 	struct extent_map *em = NULL;
-	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
-	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
+	struct extent_map_tree *em_tree = &inode->extent_tree;
+	struct extent_io_tree *io_tree = &inode->io_tree;
 	struct btrfs_trans_handle *trans = NULL;
 	const bool new_inline = !page || create;
 
@@ -6863,7 +6864,7 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
 		goto not_found_em;
 	}
 
-	btrfs_extent_item_to_extent_map(BTRFS_I(inode), path, item,
+	btrfs_extent_item_to_extent_map(inode, path, item,
 			new_inline, em);
 
 	if (found_type == BTRFS_FILE_EXTENT_REG ||
@@ -7000,7 +7001,7 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
 	write_unlock(&em_tree->lock);
 out:
 
-	trace_btrfs_get_extent(root, BTRFS_I(inode), em);
+	trace_btrfs_get_extent(root, inode, em);
 
 	btrfs_free_path(path);
 	if (trans) {
@@ -7016,9 +7017,10 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
 	return em;
 }
 
-struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page,
-					   size_t pg_offset, u64 start, u64 len,
-					   int create)
+struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode,
+		struct page *page,
+		size_t pg_offset, u64 start, u64 len,
+		int create)
 {
 	struct extent_map *em;
 	struct extent_map *hole_em = NULL;
@@ -7055,7 +7057,7 @@ struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *pag
 	em = NULL;
 
 	/* ok, we didn't find anything, lets look for delalloc */
-	found = count_range_bits(&BTRFS_I(inode)->io_tree, &range_start,
+	found = count_range_bits(&inode->io_tree, &range_start,
 				 end, len, EXTENT_DELALLOC, 1);
 	found_end = range_start + found;
 	if (found_end < range_start)
@@ -7625,7 +7627,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
 		goto err;
 	}
 
-	em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
+	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len, 0);
 	if (IS_ERR(em)) {
 		ret = PTR_ERR(em);
 		goto unlock_err;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 12ae210f87194f..45a708555f9be2 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1009,7 +1009,7 @@ static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start)
 
 		/* get the big lock and read metadata off disk */
 		lock_extent_bits(io_tree, start, end, &cached);
-		em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
+		em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len, 0);
 		unlock_extent_cached(io_tree, start, end, &cached, GFP_NOFS);
 
 		if (IS_ERR(em))
diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c
index 924bcbf4327528..8c91d03cc82d8a 100644
--- a/fs/btrfs/tests/inode-tests.c
+++ b/fs/btrfs/tests/inode-tests.c
@@ -278,7 +278,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
 
 	/* First with no extents */
 	BTRFS_I(inode)->root = root;
-	em = btrfs_get_extent(inode, NULL, 0, 0, sectorsize, 0);
+	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, 0, sectorsize, 0);
 	if (IS_ERR(em)) {
 		em = NULL;
 		test_msg("Got an error when we shouldn't have\n");
@@ -302,7 +302,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
 	 */
 	setup_file_extents(root, sectorsize);
 
-	em = btrfs_get_extent(inode, NULL, 0, 0, (u64)-1, 0);
+	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, 0, (u64)-1, 0);
 	if (IS_ERR(em)) {
 		test_msg("Got an error when we shouldn't have\n");
 		goto out;
@@ -323,7 +323,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
 	offset = em->start + em->len;
 	free_extent_map(em);
 
-	em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
+	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
 	if (IS_ERR(em)) {
 		test_msg("Got an error when we shouldn't have\n");
 		goto out;
@@ -350,7 +350,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
 	offset = em->start + em->len;
 	free_extent_map(em);
 
-	em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
+	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
 	if (IS_ERR(em)) {
 		test_msg("Got an error when we shouldn't have\n");
 		goto out;
@@ -372,7 +372,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
 	free_extent_map(em);
 
 	/* Regular extent */
-	em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
+	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
 	if (IS_ERR(em)) {
 		test_msg("Got an error when we shouldn't have\n");
 		goto out;
@@ -399,7 +399,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
 	free_extent_map(em);
 
 	/* The next 3 are split extents */
-	em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
+	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
 	if (IS_ERR(em)) {
 		test_msg("Got an error when we shouldn't have\n");
 		goto out;
@@ -428,7 +428,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
 	offset = em->start + em->len;
 	free_extent_map(em);
 
-	em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
+	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
 	if (IS_ERR(em)) {
 		test_msg("Got an error when we shouldn't have\n");
 		goto out;
@@ -450,7 +450,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
 	offset = em->start + em->len;
 	free_extent_map(em);
 
-	em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
+	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
 	if (IS_ERR(em)) {
 		test_msg("Got an error when we shouldn't have\n");
 		goto out;
@@ -484,7 +484,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
 	free_extent_map(em);
 
 	/* Prealloc extent */
-	em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
+	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
 	if (IS_ERR(em)) {
 		test_msg("Got an error when we shouldn't have\n");
 		goto out;
@@ -513,7 +513,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
 	free_extent_map(em);
 
 	/* The next 3 are a half written prealloc extent */
-	em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
+	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
 	if (IS_ERR(em)) {
 		test_msg("Got an error when we shouldn't have\n");
 		goto out;
@@ -543,7 +543,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
 	offset = em->start + em->len;
 	free_extent_map(em);
 
-	em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
+	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
 	if (IS_ERR(em)) {
 		test_msg("Got an error when we shouldn't have\n");
 		goto out;
@@ -576,7 +576,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
 	offset = em->start + em->len;
 	free_extent_map(em);
 
-	em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
+	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
 	if (IS_ERR(em)) {
 		test_msg("Got an error when we shouldn't have\n");
 		goto out;
@@ -611,7 +611,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
 	free_extent_map(em);
 
 	/* Now for the compressed extent */
-	em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
+	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
 	if (IS_ERR(em)) {
 		test_msg("Got an error when we shouldn't have\n");
 		goto out;
@@ -645,7 +645,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
 	free_extent_map(em);
 
 	/* Split compressed extent */
-	em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
+	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
 	if (IS_ERR(em)) {
 		test_msg("Got an error when we shouldn't have\n");
 		goto out;
@@ -680,7 +680,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
 	offset = em->start + em->len;
 	free_extent_map(em);
 
-	em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
+	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
 	if (IS_ERR(em)) {
 		test_msg("Got an error when we shouldn't have\n");
 		goto out;
@@ -707,7 +707,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
 	offset = em->start + em->len;
 	free_extent_map(em);
 
-	em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
+	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
 	if (IS_ERR(em)) {
 		test_msg("Got an error when we shouldn't have\n");
 		goto out;
@@ -742,7 +742,8 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
 	free_extent_map(em);
 
 	/* A hole between regular extents but no hole extent */
-	em = btrfs_get_extent(inode, NULL, 0, offset + 6, sectorsize, 0);
+	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset + 6,
+			sectorsize, 0);
 	if (IS_ERR(em)) {
 		test_msg("Got an error when we shouldn't have\n");
 		goto out;
@@ -769,7 +770,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
 	offset = em->start + em->len;
 	free_extent_map(em);
 
-	em = btrfs_get_extent(inode, NULL, 0, offset, 4096 * 1024, 0);
+	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, 4096 * 1024, 0);
 	if (IS_ERR(em)) {
 		test_msg("Got an error when we shouldn't have\n");
 		goto out;
@@ -802,7 +803,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
 	offset = em->start + em->len;
 	free_extent_map(em);
 
-	em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
+	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
 	if (IS_ERR(em)) {
 		test_msg("Got an error when we shouldn't have\n");
 		goto out;
@@ -885,7 +886,7 @@ static int test_hole_first(u32 sectorsize, u32 nodesize)
 	insert_inode_item_key(root);
 	insert_extent(root, sectorsize, sectorsize, sectorsize, 0, sectorsize,
 		      sectorsize, BTRFS_FILE_EXTENT_REG, 0, 1);
-	em = btrfs_get_extent(inode, NULL, 0, 0, 2 * sectorsize, 0);
+	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, 0, 2 * sectorsize, 0);
 	if (IS_ERR(em)) {
 		test_msg("Got an error when we shouldn't have\n");
 		goto out;
@@ -907,7 +908,8 @@ static int test_hole_first(u32 sectorsize, u32 nodesize)
 	}
 	free_extent_map(em);
 
-	em = btrfs_get_extent(inode, NULL, 0, sectorsize, 2 * sectorsize, 0);
+	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, sectorsize,
+			2 * sectorsize, 0);
 	if (IS_ERR(em)) {
 		test_msg("Got an error when we shouldn't have\n");
 		goto out;

From 9e3e97f45c1f4debe53f72ad309ec06890d3aac2 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 20 Feb 2017 13:51:07 +0200
Subject: [PATCH 0323/1911] btrfs: Make btrfs_del_delalloc_inode take
 btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 7e12c727f791d3..1934db1af2fc27 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1632,15 +1632,15 @@ static void btrfs_add_delalloc_inodes(struct btrfs_root *root,
 }
 
 static void btrfs_del_delalloc_inode(struct btrfs_root *root,
-				     struct inode *inode)
+				     struct btrfs_inode *inode)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
 
 	spin_lock(&root->delalloc_lock);
-	if (!list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
-		list_del_init(&BTRFS_I(inode)->delalloc_inodes);
+	if (!list_empty(&inode->delalloc_inodes)) {
+		list_del_init(&inode->delalloc_inodes);
 		clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
-			  &BTRFS_I(inode)->runtime_flags);
+			  &inode->runtime_flags);
 		root->nr_delalloc_inodes--;
 		if (!root->nr_delalloc_inodes) {
 			spin_lock(&fs_info->delalloc_root_lock);
@@ -1760,8 +1760,8 @@ static void btrfs_clear_bit_hook(struct btrfs_inode *inode,
 		inode->delalloc_bytes -= len;
 		if (do_list && inode->delalloc_bytes == 0 &&
 		    test_bit(BTRFS_INODE_IN_DELALLOC_LIST,
-				&inode->runtime_flags))
-			btrfs_del_delalloc_inode(root, &inode->vfs_inode);
+					&inode->runtime_flags))
+			btrfs_del_delalloc_inode(root, inode);
 		spin_unlock(&inode->lock);
 	}
 }

From db0a669fb002416faafe34481d6a6e21cdf0e926 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 20 Feb 2017 13:51:08 +0200
Subject: [PATCH 0324/1911] btrfs: Make btrfs_add_link take btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h    |  2 +-
 fs/btrfs/inode.c    | 39 ++++++++++++++++++++-------------------
 fs/btrfs/tree-log.c |  8 +++++---
 3 files changed, 26 insertions(+), 23 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 809736ec549b43..f03c2f285eb1ef 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3129,7 +3129,7 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
 		       struct btrfs_inode *dir, struct btrfs_inode *inode,
 		       const char *name, int name_len);
 int btrfs_add_link(struct btrfs_trans_handle *trans,
-		   struct inode *parent_inode, struct inode *inode,
+		   struct btrfs_inode *parent_inode, struct btrfs_inode *inode,
 		   const char *name, int name_len, int add_backref, u64 index);
 int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
 			struct btrfs_root *root,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 1934db1af2fc27..91c6be6a72df40 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6251,18 +6251,18 @@ static inline u8 btrfs_inode_type(struct inode *inode)
  * inode to the parent directory.
  */
 int btrfs_add_link(struct btrfs_trans_handle *trans,
-		   struct inode *parent_inode, struct inode *inode,
+		   struct btrfs_inode *parent_inode, struct btrfs_inode *inode,
 		   const char *name, int name_len, int add_backref, u64 index)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
 	int ret = 0;
 	struct btrfs_key key;
-	struct btrfs_root *root = BTRFS_I(parent_inode)->root;
-	u64 ino = btrfs_ino(BTRFS_I(inode));
-	u64 parent_ino = btrfs_ino(BTRFS_I(parent_inode));
+	struct btrfs_root *root = parent_inode->root;
+	u64 ino = btrfs_ino(inode);
+	u64 parent_ino = btrfs_ino(parent_inode);
 
 	if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
-		memcpy(&key, &BTRFS_I(inode)->root->root_key, sizeof(key));
+		memcpy(&key, &inode->root->root_key, sizeof(key));
 	} else {
 		key.objectid = ino;
 		key.type = BTRFS_INODE_ITEM_KEY;
@@ -6283,8 +6283,8 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
 		return ret;
 
 	ret = btrfs_insert_dir_item(trans, root, name, name_len,
-				    BTRFS_I(parent_inode), &key,
-				    btrfs_inode_type(inode), index);
+				    parent_inode, &key,
+				    btrfs_inode_type(&inode->vfs_inode), index);
 	if (ret == -EEXIST || ret == -EOVERFLOW)
 		goto fail_dir_item;
 	else if (ret) {
@@ -6292,12 +6292,12 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
 		return ret;
 	}
 
-	btrfs_i_size_write(BTRFS_I(parent_inode), parent_inode->i_size +
+	btrfs_i_size_write(parent_inode, parent_inode->vfs_inode.i_size +
 			   name_len * 2);
-	inode_inc_iversion(parent_inode);
-	parent_inode->i_mtime = parent_inode->i_ctime =
-		current_time(parent_inode);
-	ret = btrfs_update_inode(trans, root, parent_inode);
+	inode_inc_iversion(&parent_inode->vfs_inode);
+	parent_inode->vfs_inode.i_mtime = parent_inode->vfs_inode.i_ctime =
+		current_time(&parent_inode->vfs_inode);
+	ret = btrfs_update_inode(trans, root, &parent_inode->vfs_inode);
 	if (ret)
 		btrfs_abort_transaction(trans, ret);
 	return ret;
@@ -6324,7 +6324,7 @@ static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
 			    struct inode *dir, struct dentry *dentry,
 			    struct inode *inode, int backref, u64 index)
 {
-	int err = btrfs_add_link(trans, dir, inode,
+	int err = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode),
 				 dentry->d_name.name, dentry->d_name.len,
 				 backref, index);
 	if (err > 0)
@@ -6601,8 +6601,9 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 	if (err)
 		goto out_fail_inode;
 
-	err = btrfs_add_link(trans, dir, inode, dentry->d_name.name,
-			     dentry->d_name.len, 0, index);
+	err = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode),
+			dentry->d_name.name,
+			dentry->d_name.len, 0, index);
 	if (err)
 		goto out_fail_inode;
 
@@ -9592,7 +9593,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
 		goto out_fail;
 	}
 
-	ret = btrfs_add_link(trans, new_dir, old_inode,
+	ret = btrfs_add_link(trans, BTRFS_I(new_dir), BTRFS_I(old_inode),
 			     new_dentry->d_name.name,
 			     new_dentry->d_name.len, 0, old_idx);
 	if (ret) {
@@ -9600,7 +9601,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
 		goto out_fail;
 	}
 
-	ret = btrfs_add_link(trans, old_dir, new_inode,
+	ret = btrfs_add_link(trans, BTRFS_I(old_dir), BTRFS_I(new_inode),
 			     old_dentry->d_name.name,
 			     old_dentry->d_name.len, 0, new_idx);
 	if (ret) {
@@ -9877,7 +9878,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		}
 	}
 
-	ret = btrfs_add_link(trans, new_dir, old_inode,
+	ret = btrfs_add_link(trans, BTRFS_I(new_dir), BTRFS_I(old_inode),
 			     new_dentry->d_name.name,
 			     new_dentry->d_name.len, 0, index);
 	if (ret) {
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index f8965b0812124f..c9ada4b9000412 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -1322,8 +1322,9 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
 			}
 
 			/* insert our name */
-			ret = btrfs_add_link(trans, dir, inode, name, namelen,
-					     0, ref_index);
+			ret = btrfs_add_link(trans, BTRFS_I(dir),
+					BTRFS_I(inode),
+					name, namelen, 0, ref_index);
 			if (ret)
 				goto out;
 
@@ -1641,7 +1642,8 @@ static noinline int insert_one_name(struct btrfs_trans_handle *trans,
 		return -EIO;
 	}
 
-	ret = btrfs_add_link(trans, dir, inode, name, name_len, 1, index);
+	ret = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode), name,
+			name_len, 1, index);
 
 	/* FIXME, put inode into FIXUP list */
 

From cef415af208984650de925e28d40aa4a6e8513f4 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 20 Feb 2017 13:51:09 +0200
Subject: [PATCH 0325/1911] btrfs: Make btrfs_add_nondir take btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 91c6be6a72df40..dafd3fdd6f2bde 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6321,10 +6321,10 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
 }
 
 static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
-			    struct inode *dir, struct dentry *dentry,
-			    struct inode *inode, int backref, u64 index)
+			    struct btrfs_inode *dir, struct dentry *dentry,
+			    struct btrfs_inode *inode, int backref, u64 index)
 {
-	int err = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode),
+	int err = btrfs_add_link(trans, dir, inode,
 				 dentry->d_name.name, dentry->d_name.len,
 				 backref, index);
 	if (err > 0)
@@ -6378,7 +6378,8 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
 	if (err)
 		goto out_unlock_inode;
 
-	err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
+	err = btrfs_add_nondir(trans, BTRFS_I(dir), dentry, BTRFS_I(inode),
+			0, index);
 	if (err) {
 		goto out_unlock_inode;
 	} else {
@@ -6455,7 +6456,8 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
 	if (err)
 		goto out_unlock_inode;
 
-	err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
+	err = btrfs_add_nondir(trans, BTRFS_I(dir), dentry, BTRFS_I(inode),
+			0, index);
 	if (err)
 		goto out_unlock_inode;
 
@@ -6521,7 +6523,8 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
 	ihold(inode);
 	set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags);
 
-	err = btrfs_add_nondir(trans, dir, dentry, inode, 1, index);
+	err = btrfs_add_nondir(trans, BTRFS_I(dir), dentry, BTRFS_I(inode),
+			1, index);
 
 	if (err) {
 		drop_inode = 1;
@@ -9703,8 +9706,8 @@ static int btrfs_whiteout_for_rename(struct btrfs_trans_handle *trans,
 	if (ret)
 		goto out;
 
-	ret = btrfs_add_nondir(trans, dir, dentry,
-				inode, 0, index);
+	ret = btrfs_add_nondir(trans, BTRFS_I(dir), dentry,
+				BTRFS_I(inode), 0, index);
 	if (ret)
 		goto out;
 
@@ -10253,7 +10256,8 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
 	 * elsewhere above.
 	 */
 	if (!err)
-		err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
+		err = btrfs_add_nondir(trans, BTRFS_I(dir), dentry,
+				BTRFS_I(inode), 0, index);
 	if (err) {
 		drop_inode = 1;
 		goto out_unlock_inode;

From abcefb1eeeff04734a59b4dd3724abbf0688252c Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 20 Feb 2017 13:51:10 +0200
Subject: [PATCH 0326/1911] btrfs: make btrfs_inode_block_unlocked_dio take
 btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/btrfs_inode.h | 4 ++--
 fs/btrfs/inode.c       | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index c10d21b2f0df6f..d84cf2b0c7b6bc 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -311,9 +311,9 @@ struct btrfs_dio_private {
  * to grab i_mutex. It is used to avoid the endless truncate due to
  * nonlocked dio read.
  */
-static inline void btrfs_inode_block_unlocked_dio(struct inode *inode)
+static inline void btrfs_inode_block_unlocked_dio(struct btrfs_inode *inode)
 {
-	set_bit(BTRFS_INODE_READDIO_NEED_LOCK, &BTRFS_I(inode)->runtime_flags);
+	set_bit(BTRFS_INODE_READDIO_NEED_LOCK, &inode->runtime_flags);
 	smp_mb();
 }
 
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index dafd3fdd6f2bde..59b93a2369aa69 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -5002,7 +5002,7 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
 		truncate_setsize(inode, newsize);
 
 		/* Disable nonlocked read DIO to avoid the end less truncate */
-		btrfs_inode_block_unlocked_dio(inode);
+		btrfs_inode_block_unlocked_dio(BTRFS_I(inode));
 		inode_dio_wait(inode);
 		btrfs_inode_resume_unlocked_dio(inode);
 

From 0b581701d9771c55a908c612ca49850e6088fe08 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 20 Feb 2017 13:51:11 +0200
Subject: [PATCH 0327/1911] btrfs: make btrfs_inode_resume_unlocked_dio take
 btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/btrfs_inode.h | 5 ++---
 fs/btrfs/inode.c       | 2 +-
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index d84cf2b0c7b6bc..0c6baaba0651ce 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -317,11 +317,10 @@ static inline void btrfs_inode_block_unlocked_dio(struct btrfs_inode *inode)
 	smp_mb();
 }
 
-static inline void btrfs_inode_resume_unlocked_dio(struct inode *inode)
+static inline void btrfs_inode_resume_unlocked_dio(struct btrfs_inode *inode)
 {
 	smp_mb__before_atomic();
-	clear_bit(BTRFS_INODE_READDIO_NEED_LOCK,
-		  &BTRFS_I(inode)->runtime_flags);
+	clear_bit(BTRFS_INODE_READDIO_NEED_LOCK, &inode->runtime_flags);
 }
 
 static inline void btrfs_print_data_csum_error(struct btrfs_inode *inode,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 59b93a2369aa69..596e5cb4bfa29a 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -5004,7 +5004,7 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
 		/* Disable nonlocked read DIO to avoid the end less truncate */
 		btrfs_inode_block_unlocked_dio(BTRFS_I(inode));
 		inode_dio_wait(inode);
-		btrfs_inode_resume_unlocked_dio(inode);
+		btrfs_inode_resume_unlocked_dio(BTRFS_I(inode));
 
 		ret = btrfs_truncate(inode);
 		if (ret && inode->i_nlink) {

From 29e09229d9f26129a39462fae0ddabc4d9533989 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 17 Feb 2017 08:39:28 +0100
Subject: [PATCH 0328/1911] netfilter: use skb_to_full_sk in ip_route_me_harder

inet_sk(skb->sk) is illegal in case skb is attached to request socket.

Fixes: ca6fb0651883 ("tcp: attach SYNACK messages to request sockets instead of listener")
Reported by: Daniel J Blueman <daniel@quora.org>
Signed-off-by: Florian Westphal <fw@strlen.de>
Tested-by: Daniel J Blueman <daniel@quora.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index b3cc1335adbc1a..c0cc6aa8cfaa9c 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -23,7 +23,8 @@ int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned int addr_t
 	struct rtable *rt;
 	struct flowi4 fl4 = {};
 	__be32 saddr = iph->saddr;
-	__u8 flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0;
+	const struct sock *sk = skb_to_full_sk(skb);
+	__u8 flags = sk ? inet_sk_flowi_flags(sk) : 0;
 	struct net_device *dev = skb_dst(skb)->dev;
 	unsigned int hh_len;
 
@@ -40,7 +41,7 @@ int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned int addr_t
 	fl4.daddr = iph->daddr;
 	fl4.saddr = saddr;
 	fl4.flowi4_tos = RT_TOS(iph->tos);
-	fl4.flowi4_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0;
+	fl4.flowi4_oif = sk ? sk->sk_bound_dev_if : 0;
 	if (!fl4.flowi4_oif)
 		fl4.flowi4_oif = l3mdev_master_ifindex(dev);
 	fl4.flowi4_mark = skb->mark;
@@ -61,7 +62,7 @@ int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned int addr_t
 	    xfrm_decode_session(skb, flowi4_to_flowi(&fl4), AF_INET) == 0) {
 		struct dst_entry *dst = skb_dst(skb);
 		skb_dst_set(skb, NULL);
-		dst = xfrm_lookup(net, dst, flowi4_to_flowi(&fl4), skb->sk, 0);
+		dst = xfrm_lookup(net, dst, flowi4_to_flowi(&fl4), sk, 0);
 		if (IS_ERR(dst))
 			return PTR_ERR(dst);
 		skb_dst_set(skb, dst);

From c3a49c8991212e1ef795c7a46881ac5cc7766993 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 28 Feb 2017 00:05:01 +0100
Subject: [PATCH 0329/1911] cpufreq: intel_pstate: Fix limits issue with
 operation mode switching

There is a problem with intel_pstate operation mode switching
introduced by commit fb1fe1041c04 (cpufreq: intel_pstate: Operation
mode control from sysfs), because the global sysfs limits are
preserved across operation modes while per-policy limits are
reinitialized from scratch on a mode switch and both sets of limits
may get out of sync this way.

Fix that by always reinitializing the global limits upon the
registration of the driver.

Fixes: fb1fe1041c04 (cpufreq: intel_pstate: Operation mode control from sysfs)
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
---
 drivers/cpufreq/intel_pstate.c | 65 ++++++++++++----------------------
 1 file changed, 22 insertions(+), 43 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index eb0f7fb7168589..9ee13f195c377e 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -364,37 +364,25 @@ static bool driver_registered __read_mostly;
 static bool acpi_ppc;
 #endif
 
-static struct perf_limits performance_limits = {
-	.no_turbo = 0,
-	.turbo_disabled = 0,
-	.max_perf_pct = 100,
-	.max_perf = int_ext_tofp(1),
-	.min_perf_pct = 100,
-	.min_perf = int_ext_tofp(1),
-	.max_policy_pct = 100,
-	.max_sysfs_pct = 100,
-	.min_policy_pct = 0,
-	.min_sysfs_pct = 0,
-};
+static struct perf_limits performance_limits;
+static struct perf_limits powersave_limits;
+static struct perf_limits *limits;
 
-static struct perf_limits powersave_limits = {
-	.no_turbo = 0,
-	.turbo_disabled = 0,
-	.max_perf_pct = 100,
-	.max_perf = int_ext_tofp(1),
-	.min_perf_pct = 0,
-	.min_perf = 0,
-	.max_policy_pct = 100,
-	.max_sysfs_pct = 100,
-	.min_policy_pct = 0,
-	.min_sysfs_pct = 0,
-};
+static void intel_pstate_init_limits(struct perf_limits *limits)
+{
+	memset(limits, 0, sizeof(*limits));
+	limits->max_perf_pct = 100;
+	limits->max_perf = int_ext_tofp(1);
+	limits->max_policy_pct = 100;
+	limits->max_sysfs_pct = 100;
+}
 
-#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE
-static struct perf_limits *limits = &performance_limits;
-#else
-static struct perf_limits *limits = &powersave_limits;
-#endif
+static void intel_pstate_set_performance_limits(struct perf_limits *limits)
+{
+	intel_pstate_init_limits(limits);
+	limits->min_perf_pct = 100;
+	limits->min_perf = int_ext_tofp(1);
+}
 
 static DEFINE_MUTEX(intel_pstate_driver_lock);
 static DEFINE_MUTEX(intel_pstate_limits_lock);
@@ -2084,20 +2072,6 @@ static void intel_pstate_clear_update_util_hook(unsigned int cpu)
 	synchronize_sched();
 }
 
-static void intel_pstate_set_performance_limits(struct perf_limits *limits)
-{
-	limits->no_turbo = 0;
-	limits->turbo_disabled = 0;
-	limits->max_perf_pct = 100;
-	limits->max_perf = int_ext_tofp(1);
-	limits->min_perf_pct = 100;
-	limits->min_perf = int_ext_tofp(1);
-	limits->max_policy_pct = 100;
-	limits->max_sysfs_pct = 100;
-	limits->min_policy_pct = 0;
-	limits->min_sysfs_pct = 0;
-}
-
 static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy,
 					    struct perf_limits *limits)
 {
@@ -2466,6 +2440,11 @@ static int intel_pstate_register_driver(void)
 {
 	int ret;
 
+	intel_pstate_init_limits(&powersave_limits);
+	intel_pstate_set_performance_limits(&performance_limits);
+	limits = IS_ENABLED(CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE) ?
+			&performance_limits : &powersave_limits;
+
 	ret = cpufreq_register_driver(intel_pstate_driver);
 	if (ret) {
 		intel_pstate_driver_cleanup();

From da353f6b30bc02da29b54f5f024039698fbd23f4 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Tue, 14 Feb 2017 17:55:53 +0100
Subject: [PATCH 0330/1911] btrfs: constify device path passed to relevant
 helpers

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/dev-replace.c |  5 +++--
 fs/btrfs/dev-replace.h |  5 +++--
 fs/btrfs/volumes.c     | 18 ++++++++++--------
 fs/btrfs/volumes.h     | 12 ++++++------
 4 files changed, 22 insertions(+), 18 deletions(-)

diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 5de280b9ad7303..e653921f05d939 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -304,8 +304,9 @@ void btrfs_after_dev_replace_commit(struct btrfs_fs_info *fs_info)
 		dev_replace->cursor_left_last_write_of_item;
 }
 
-int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info, char *tgtdev_name,
-				u64 srcdevid, char *srcdev_name, int read_src)
+int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
+		const char *tgtdev_name, u64 srcdevid, const char *srcdev_name,
+		int read_src)
 {
 	struct btrfs_root *root = fs_info->dev_root;
 	struct btrfs_trans_handle *trans;
diff --git a/fs/btrfs/dev-replace.h b/fs/btrfs/dev-replace.h
index 54ea12bda15b30..f94a76844ae742 100644
--- a/fs/btrfs/dev-replace.h
+++ b/fs/btrfs/dev-replace.h
@@ -27,8 +27,9 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans,
 void btrfs_after_dev_replace_commit(struct btrfs_fs_info *fs_info);
 int btrfs_dev_replace_by_ioctl(struct btrfs_fs_info *fs_info,
 			    struct btrfs_ioctl_dev_replace_args *args);
-int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info, char *tgtdev_name,
-				u64 srcdevid, char *srcdev_name, int read_src);
+int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
+		const char *tgtdev_name, u64 srcdevid, const char *srcdev_name,
+		int read_src);
 void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info,
 			      struct btrfs_ioctl_dev_replace_args *args);
 int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info,
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 1fac9872881433..b443cc71830f85 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1725,7 +1725,7 @@ static int btrfs_add_device(struct btrfs_trans_handle *trans,
  * Function to update ctime/mtime for a given device path.
  * Mainly used for ctime/mtime based probe like libblkid.
  */
-static void update_dev_time(char *path_name)
+static void update_dev_time(const char *path_name)
 {
 	struct file *filp;
 
@@ -1851,7 +1851,8 @@ void btrfs_assign_next_active_device(struct btrfs_fs_info *fs_info,
 		fs_info->fs_devices->latest_bdev = next_device->bdev;
 }
 
-int btrfs_rm_device(struct btrfs_fs_info *fs_info, char *device_path, u64 devid)
+int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
+		u64 devid)
 {
 	struct btrfs_device *device;
 	struct btrfs_fs_devices *cur_devices;
@@ -2091,7 +2092,7 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
 }
 
 static int btrfs_find_device_by_path(struct btrfs_fs_info *fs_info,
-				     char *device_path,
+				     const char *device_path,
 				     struct btrfs_device **device)
 {
 	int ret = 0;
@@ -2118,7 +2119,7 @@ static int btrfs_find_device_by_path(struct btrfs_fs_info *fs_info,
 }
 
 int btrfs_find_device_missing_or_by_path(struct btrfs_fs_info *fs_info,
-					 char *device_path,
+					 const char *device_path,
 					 struct btrfs_device **device)
 {
 	*device = NULL;
@@ -2151,7 +2152,8 @@ int btrfs_find_device_missing_or_by_path(struct btrfs_fs_info *fs_info,
  * Lookup a device given by device id, or the path if the id is 0.
  */
 int btrfs_find_device_by_devspec(struct btrfs_fs_info *fs_info, u64 devid,
-				 char *devpath, struct btrfs_device **device)
+				 const char *devpath,
+				 struct btrfs_device **device)
 {
 	int ret;
 
@@ -2307,7 +2309,7 @@ static int btrfs_finish_sprout(struct btrfs_trans_handle *trans,
 	return ret;
 }
 
-int btrfs_init_new_device(struct btrfs_fs_info *fs_info, char *device_path)
+int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path)
 {
 	struct btrfs_root *root = fs_info->dev_root;
 	struct request_queue *q;
@@ -2515,7 +2517,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, char *device_path)
 }
 
 int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
-				  char *device_path,
+				  const char *device_path,
 				  struct btrfs_device *srcdev,
 				  struct btrfs_device **device_out)
 {
@@ -7102,7 +7104,7 @@ int btrfs_get_dev_stats(struct btrfs_fs_info *fs_info,
 	return 0;
 }
 
-void btrfs_scratch_superblocks(struct block_device *bdev, char *device_path)
+void btrfs_scratch_superblocks(struct block_device *bdev, const char *device_path)
 {
 	struct buffer_head *bh;
 	struct btrfs_super_block *disk_super;
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 24ba6bc3ec3466..59be81206dd7b9 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -422,16 +422,16 @@ void btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices, int step);
 void btrfs_assign_next_active_device(struct btrfs_fs_info *fs_info,
 		struct btrfs_device *device, struct btrfs_device *this_dev);
 int btrfs_find_device_missing_or_by_path(struct btrfs_fs_info *fs_info,
-					 char *device_path,
+					 const char *device_path,
 					 struct btrfs_device **device);
 int btrfs_find_device_by_devspec(struct btrfs_fs_info *fs_info, u64 devid,
-					 char *devpath,
+					 const char *devpath,
 					 struct btrfs_device **device);
 struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
 					const u64 *devid,
 					const u8 *uuid);
 int btrfs_rm_device(struct btrfs_fs_info *fs_info,
-		    char *device_path, u64 devid);
+		    const char *device_path, u64 devid);
 void btrfs_cleanup_fs_uuids(void);
 int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len);
 int btrfs_grow_device(struct btrfs_trans_handle *trans,
@@ -439,9 +439,9 @@ int btrfs_grow_device(struct btrfs_trans_handle *trans,
 struct btrfs_device *btrfs_find_device(struct btrfs_fs_info *fs_info, u64 devid,
 				       u8 *uuid, u8 *fsid);
 int btrfs_shrink_device(struct btrfs_device *device, u64 new_size);
-int btrfs_init_new_device(struct btrfs_fs_info *fs_info, char *path);
+int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *path);
 int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
-				  char *device_path,
+				  const char *device_path,
 				  struct btrfs_device *srcdev,
 				  struct btrfs_device **device_out);
 int btrfs_balance(struct btrfs_balance_control *bctl,
@@ -474,7 +474,7 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
 				      struct btrfs_device *tgtdev);
 void btrfs_init_dev_replace_tgtdev_for_resume(struct btrfs_fs_info *fs_info,
 					      struct btrfs_device *tgtdev);
-void btrfs_scratch_superblocks(struct block_device *bdev, char *device_path);
+void btrfs_scratch_superblocks(struct block_device *bdev, const char *device_path);
 int btrfs_is_parity_mirror(struct btrfs_mapping_tree *map_tree,
 			   u64 logical, u64 len, int mirror_num);
 unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info,

From 9ed573674ae607bc616c804657bfaf5b6ea5889d Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Tue, 14 Feb 2017 18:03:49 +0100
Subject: [PATCH 0331/1911] btrfs: constify input buffer of btrfs_csum_data

The function does not modify the input buffer, also update a typecast in
one caller.

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/disk-io.c | 4 ++--
 fs/btrfs/disk-io.h | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index a53ff3bff8eb23..67d663ab658adf 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -265,7 +265,7 @@ static struct extent_map *btree_get_extent(struct btrfs_inode *inode,
 	return em;
 }
 
-u32 btrfs_csum_data(char *data, u32 seed, size_t len)
+u32 btrfs_csum_data(const char *data, u32 seed, size_t len)
 {
 	return btrfs_crc32c(seed, data, len);
 }
@@ -3448,7 +3448,7 @@ static int write_dev_supers(struct btrfs_device *device,
 			btrfs_set_super_bytenr(sb, bytenr);
 
 			crc = ~(u32)0;
-			crc = btrfs_csum_data((char *)sb +
+			crc = btrfs_csum_data((const char *)sb +
 					      BTRFS_CSUM_SIZE, crc,
 					      BTRFS_SUPER_INFO_SIZE -
 					      BTRFS_CSUM_SIZE);
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 0be2d4fe705b4d..2e0ec29bfd69f0 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -116,7 +116,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf);
 int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
 			  int atomic);
 int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid);
-u32 btrfs_csum_data(char *data, u32 seed, size_t len);
+u32 btrfs_csum_data(const char *data, u32 seed, size_t len);
 void btrfs_csum_final(u32 crc, u8 *result);
 int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
 			enum btrfs_wq_endio_type metadata);

From 14a3357b4097d94ac6401c597abc2d02965e0fdd Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Tue, 14 Feb 2017 17:58:04 +0100
Subject: [PATCH 0332/1911] btrfs: constify buffers used by compression helpers

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/compression.c | 2 +-
 fs/btrfs/compression.h | 2 +-
 fs/btrfs/lzo.c         | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index f32e86b612404c..d426e4a10b2bc8 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -1015,7 +1015,7 @@ void btrfs_exit_compress(void)
  *
  * total_out is the last byte of the buffer
  */
-int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
+int btrfs_decompress_buf2page(const char *buf, unsigned long buf_start,
 			      unsigned long total_out, u64 disk_start,
 			      struct bio *bio)
 {
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index 09879579fbc83d..1c700bb20b52fd 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -32,7 +32,7 @@ int btrfs_compress_pages(int type, struct address_space *mapping,
 			 unsigned long max_out);
 int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
 		     unsigned long start_byte, size_t srclen, size_t destlen);
-int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
+int btrfs_decompress_buf2page(const char *buf, unsigned long buf_start,
 			      unsigned long total_out, u64 disk_start,
 			      struct bio *bio);
 
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index 45d26980caf97d..e074b85aa0542f 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -76,7 +76,7 @@ static inline void write_compress_length(char *buf, size_t len)
 	memcpy(buf, &dlen, LZO_LEN);
 }
 
-static inline size_t read_compress_length(char *buf)
+static inline size_t read_compress_length(const char *buf)
 {
 	__le32 dlen;
 

From 52f75f4fe74ce86376d68b30c94d5fb11cb4019e Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Tue, 14 Feb 2017 18:33:53 +0100
Subject: [PATCH 0333/1911] btrfs: constify name of subvolume in creation
 helpers

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ioctl.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 45a708555f9be2..dabfc7ac48a674 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -434,7 +434,7 @@ int btrfs_is_empty_uuid(u8 *uuid)
 
 static noinline int create_subvol(struct inode *dir,
 				  struct dentry *dentry,
-				  char *name, int namelen,
+				  const char *name, int namelen,
 				  u64 *async_transid,
 				  struct btrfs_qgroup_inherit *inherit)
 {
@@ -832,7 +832,7 @@ static inline int btrfs_may_create(struct inode *dir, struct dentry *child)
  * inside this filesystem so it's quite a bit simpler.
  */
 static noinline int btrfs_mksubvol(const struct path *parent,
-				   char *name, int namelen,
+				   const char *name, int namelen,
 				   struct btrfs_root *snap_src,
 				   u64 *async_transid, bool readonly,
 				   struct btrfs_qgroup_inherit *inherit)
@@ -1625,7 +1625,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
 }
 
 static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
-				char *name, unsigned long fd, int subvol,
+				const char *name, unsigned long fd, int subvol,
 				u64 *transid, bool readonly,
 				struct btrfs_qgroup_inherit *inherit)
 {

From 38c31464089f639630b7c28ce933a4d60e135a02 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Tue, 14 Feb 2017 19:04:07 +0100
Subject: [PATCH 0334/1911] btrfs: merge length input and output parameter in
 compress_pages

The length parameter is basically duplicated for input and output in the
top level caller of the compress_pages chain. We can simply use one
variable for that and reduce stack consumption. The compression
implementation will sink the parameter to a local variable so everything
works as before.

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/compression.c | 24 +++++++++++-------------
 fs/btrfs/compression.h |  5 ++---
 fs/btrfs/inode.c       |  2 +-
 fs/btrfs/lzo.c         |  4 ++--
 fs/btrfs/zlib.c        |  3 ++-
 5 files changed, 18 insertions(+), 20 deletions(-)

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index d426e4a10b2bc8..7e214fd254e4e2 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -911,27 +911,25 @@ static void free_workspaces(void)
 }
 
 /*
- * given an address space and start/len, compress the bytes.
+ * Given an address space and start and length, compress the bytes into @pages
+ * that are allocated on demand.
  *
- * pages are allocated to hold the compressed result and stored
- * in 'pages'
+ * @out_pages is used to return the number of pages allocated.  There
+ * may be pages allocated even if we return an error.
  *
- * out_pages is used to return the number of pages allocated.  There
- * may be pages allocated even if we return an error
- *
- * total_in is used to return the number of bytes actually read.  It
- * may be smaller then len if we had to exit early because we
+ * @total_in is used to return the number of bytes actually read.  It
+ * may be smaller than the input length if we had to exit early because we
  * ran out of room in the pages array or because we cross the
  * max_out threshold.
  *
- * total_out is used to return the total number of compressed bytes
+ * @total_out is an in/out parameter, must be set to the input length and will
+ * be also used to return the total number of compressed bytes
  *
- * max_out tells us the max number of bytes that we're allowed to
+ * @max_out tells us the max number of bytes that we're allowed to
  * stuff into pages
  */
 int btrfs_compress_pages(int type, struct address_space *mapping,
-			 u64 start, unsigned long len,
-			 struct page **pages,
+			 u64 start, struct page **pages,
 			 unsigned long nr_dest_pages,
 			 unsigned long *out_pages,
 			 unsigned long *total_in,
@@ -944,7 +942,7 @@ int btrfs_compress_pages(int type, struct address_space *mapping,
 	workspace = find_workspace(type);
 
 	ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping,
-						      start, len, pages,
+						      start, pages,
 						      nr_dest_pages, out_pages,
 						      total_in, total_out,
 						      max_out);
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index 1c700bb20b52fd..725d1e0bd25d98 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -23,8 +23,7 @@ void btrfs_init_compress(void);
 void btrfs_exit_compress(void);
 
 int btrfs_compress_pages(int type, struct address_space *mapping,
-			 u64 start, unsigned long len,
-			 struct page **pages,
+			 u64 start, struct page **pages,
 			 unsigned long nr_dest_pages,
 			 unsigned long *out_pages,
 			 unsigned long *total_in,
@@ -59,7 +58,7 @@ struct btrfs_compress_op {
 
 	int (*compress_pages)(struct list_head *workspace,
 			      struct address_space *mapping,
-			      u64 start, unsigned long len,
+			      u64 start,
 			      struct page **pages,
 			      unsigned long nr_dest_pages,
 			      unsigned long *out_pages,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 596e5cb4bfa29a..8b9eac33d4316c 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -517,7 +517,7 @@ static noinline void compress_file_range(struct inode *inode,
 		redirty = 1;
 		ret = btrfs_compress_pages(compress_type,
 					   inode->i_mapping, start,
-					   total_compressed, pages,
+					   pages,
 					   nr_pages, &nr_pages_ret,
 					   &total_in,
 					   &total_compressed,
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index e074b85aa0542f..489f78c4e5eca2 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -86,7 +86,7 @@ static inline size_t read_compress_length(const char *buf)
 
 static int lzo_compress_pages(struct list_head *ws,
 			      struct address_space *mapping,
-			      u64 start, unsigned long len,
+			      u64 start,
 			      struct page **pages,
 			      unsigned long nr_dest_pages,
 			      unsigned long *out_pages,
@@ -102,7 +102,7 @@ static int lzo_compress_pages(struct list_head *ws,
 	struct page *in_page = NULL;
 	struct page *out_page = NULL;
 	unsigned long bytes_left;
-
+	unsigned long len = *total_out;
 	size_t in_len;
 	size_t out_len;
 	char *buf;
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index da497f184ff4d2..42d76b7824c384 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -73,7 +73,7 @@ static struct list_head *zlib_alloc_workspace(void)
 
 static int zlib_compress_pages(struct list_head *ws,
 			       struct address_space *mapping,
-			       u64 start, unsigned long len,
+			       u64 start,
 			       struct page **pages,
 			       unsigned long nr_dest_pages,
 			       unsigned long *out_pages,
@@ -89,6 +89,7 @@ static int zlib_compress_pages(struct list_head *ws,
 	struct page *in_page = NULL;
 	struct page *out_page = NULL;
 	unsigned long bytes_left;
+	unsigned long len = *total_out;
 
 	*out_pages = 0;
 	*total_out = 0;

From 4d3a800ebb1299944408f3b40b5b6b996477fba2 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Tue, 14 Feb 2017 19:04:07 +0100
Subject: [PATCH 0335/1911] btrfs: merge nr_pages input and output parameter in
 compress_pages

The parameter saying how many pages can be allocated at maximum can be
merged with the output page counter, to save some stack space.  The
compression implementation will sink the parameter to a local variable
so everything works as before.

The nr_pages variables can also be simply merged in compress_file_range
into one.

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/compression.c |  7 +++----
 fs/btrfs/compression.h |  2 --
 fs/btrfs/inode.c       | 13 ++++++-------
 fs/btrfs/lzo.c         |  2 +-
 fs/btrfs/zlib.c        |  2 +-
 5 files changed, 11 insertions(+), 15 deletions(-)

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 7e214fd254e4e2..11dcda57e15c51 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -914,8 +914,8 @@ static void free_workspaces(void)
  * Given an address space and start and length, compress the bytes into @pages
  * that are allocated on demand.
  *
- * @out_pages is used to return the number of pages allocated.  There
- * may be pages allocated even if we return an error.
+ * @out_pages is an in/out parameter, holds maximum number of pages to allocate
+ * and returns number of actually allocated pages
  *
  * @total_in is used to return the number of bytes actually read.  It
  * may be smaller than the input length if we had to exit early because we
@@ -930,7 +930,6 @@ static void free_workspaces(void)
  */
 int btrfs_compress_pages(int type, struct address_space *mapping,
 			 u64 start, struct page **pages,
-			 unsigned long nr_dest_pages,
 			 unsigned long *out_pages,
 			 unsigned long *total_in,
 			 unsigned long *total_out,
@@ -943,7 +942,7 @@ int btrfs_compress_pages(int type, struct address_space *mapping,
 
 	ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping,
 						      start, pages,
-						      nr_dest_pages, out_pages,
+						      out_pages,
 						      total_in, total_out,
 						      max_out);
 	free_workspace(type, workspace);
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index 725d1e0bd25d98..cfcec709c61bff 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -24,7 +24,6 @@ void btrfs_exit_compress(void);
 
 int btrfs_compress_pages(int type, struct address_space *mapping,
 			 u64 start, struct page **pages,
-			 unsigned long nr_dest_pages,
 			 unsigned long *out_pages,
 			 unsigned long *total_in,
 			 unsigned long *total_out,
@@ -60,7 +59,6 @@ struct btrfs_compress_op {
 			      struct address_space *mapping,
 			      u64 start,
 			      struct page **pages,
-			      unsigned long nr_dest_pages,
 			      unsigned long *out_pages,
 			      unsigned long *total_in,
 			      unsigned long *total_out,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 8b9eac33d4316c..83bbe1e31869d5 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -430,7 +430,6 @@ static noinline void compress_file_range(struct inode *inode,
 	int ret = 0;
 	struct page **pages = NULL;
 	unsigned long nr_pages;
-	unsigned long nr_pages_ret = 0;
 	unsigned long total_compressed = 0;
 	unsigned long total_in = 0;
 	unsigned long max_compressed = SZ_128K;
@@ -518,7 +517,7 @@ static noinline void compress_file_range(struct inode *inode,
 		ret = btrfs_compress_pages(compress_type,
 					   inode->i_mapping, start,
 					   pages,
-					   nr_pages, &nr_pages_ret,
+					   &nr_pages,
 					   &total_in,
 					   &total_compressed,
 					   max_compressed);
@@ -526,7 +525,7 @@ static noinline void compress_file_range(struct inode *inode,
 		if (!ret) {
 			unsigned long offset = total_compressed &
 				(PAGE_SIZE - 1);
-			struct page *page = pages[nr_pages_ret - 1];
+			struct page *page = pages[nr_pages - 1];
 			char *kaddr;
 
 			/* zero the tail end of the last page, we might be
@@ -607,7 +606,7 @@ static noinline void compress_file_range(struct inode *inode,
 			 * will submit them to the elevator.
 			 */
 			add_async_extent(async_cow, start, num_bytes,
-					total_compressed, pages, nr_pages_ret,
+					total_compressed, pages, nr_pages,
 					compress_type);
 
 			if (start + num_bytes < end) {
@@ -624,14 +623,14 @@ static noinline void compress_file_range(struct inode *inode,
 		 * the compression code ran but failed to make things smaller,
 		 * free any pages it allocated and our page pointer array
 		 */
-		for (i = 0; i < nr_pages_ret; i++) {
+		for (i = 0; i < nr_pages; i++) {
 			WARN_ON(pages[i]->mapping);
 			put_page(pages[i]);
 		}
 		kfree(pages);
 		pages = NULL;
 		total_compressed = 0;
-		nr_pages_ret = 0;
+		nr_pages = 0;
 
 		/* flag the file so we don't compress in the future */
 		if (!btrfs_test_opt(fs_info, FORCE_COMPRESS) &&
@@ -660,7 +659,7 @@ static noinline void compress_file_range(struct inode *inode,
 	return;
 
 free_pages_out:
-	for (i = 0; i < nr_pages_ret; i++) {
+	for (i = 0; i < nr_pages; i++) {
 		WARN_ON(pages[i]->mapping);
 		put_page(pages[i]);
 	}
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index 489f78c4e5eca2..0baf978fbcaf15 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -88,7 +88,6 @@ static int lzo_compress_pages(struct list_head *ws,
 			      struct address_space *mapping,
 			      u64 start,
 			      struct page **pages,
-			      unsigned long nr_dest_pages,
 			      unsigned long *out_pages,
 			      unsigned long *total_in,
 			      unsigned long *total_out,
@@ -103,6 +102,7 @@ static int lzo_compress_pages(struct list_head *ws,
 	struct page *out_page = NULL;
 	unsigned long bytes_left;
 	unsigned long len = *total_out;
+	unsigned long nr_dest_pages = *out_pages;
 	size_t in_len;
 	size_t out_len;
 	char *buf;
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index 42d76b7824c384..e7f2020f8ee764 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -75,7 +75,6 @@ static int zlib_compress_pages(struct list_head *ws,
 			       struct address_space *mapping,
 			       u64 start,
 			       struct page **pages,
-			       unsigned long nr_dest_pages,
 			       unsigned long *out_pages,
 			       unsigned long *total_in,
 			       unsigned long *total_out,
@@ -90,6 +89,7 @@ static int zlib_compress_pages(struct list_head *ws,
 	struct page *out_page = NULL;
 	unsigned long bytes_left;
 	unsigned long len = *total_out;
+	unsigned long nr_dest_pages = *out_pages;
 
 	*out_pages = 0;
 	*total_out = 0;

From ff7638665c9a82894cabd18d26a9f8957f280f55 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Tue, 14 Feb 2017 19:30:39 +0100
Subject: [PATCH 0336/1911] btrfs: export compression buffer limits in a header

Move the buffer limit definitions out of compress_file_range.

Reviewed-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/compression.h | 15 +++++++++++++++
 fs/btrfs/inode.c       | 10 ----------
 2 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index cfcec709c61bff..619e64c274449a 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -19,6 +19,21 @@
 #ifndef __BTRFS_COMPRESSION_
 #define __BTRFS_COMPRESSION_
 
+/*
+ * We want to make sure that amount of RAM required to uncompress an extent is
+ * reasonable, so we limit the total size in ram of a compressed extent to
+ * 128k.  This is a crucial number because it also controls how easily we can
+ * spread reads across cpus for decompression.
+ *
+ * We also want to make sure the amount of IO required to do a random read is
+ * reasonably small, so we limit the size of a compressed extent to 128k.
+ */
+
+/* Maximum length of compressed data stored on disk */
+#define BTRFS_MAX_COMPRESSED		(SZ_128K)
+/* Maximum size of data before compression */
+#define BTRFS_MAX_UNCOMPRESSED		(SZ_128K)
+
 void btrfs_init_compress(void);
 void btrfs_exit_compress(void);
 
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 83bbe1e31869d5..77fb24b94a9eba 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -471,16 +471,6 @@ static noinline void compress_file_range(struct inode *inode,
 	   (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size))
 		goto cleanup_and_bail_uncompressed;
 
-	/* we want to make sure that amount of ram required to uncompress
-	 * an extent is reasonable, so we limit the total size in ram
-	 * of a compressed extent to 128k.  This is a crucial number
-	 * because it also controls how easily we can spread reads across
-	 * cpus for decompression.
-	 *
-	 * We also want to make sure the amount of IO required to do
-	 * a random read is reasonably small, so we limit the size of
-	 * a compressed extent to 128k.
-	 */
 	total_compressed = min(total_compressed, max_uncompressed);
 	num_bytes = ALIGN(end - start + 1, blocksize);
 	num_bytes = max(blocksize,  num_bytes);

From 069eac7850890acf0d3c21a6c8ca9f33ddb34a0d Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Tue, 14 Feb 2017 19:36:54 +0100
Subject: [PATCH 0337/1911] btrfs: use predefined limits for calculating
 maximum number of pages for compression

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 77fb24b94a9eba..11a4eeadf662b2 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -432,8 +432,6 @@ static noinline void compress_file_range(struct inode *inode,
 	unsigned long nr_pages;
 	unsigned long total_compressed = 0;
 	unsigned long total_in = 0;
-	unsigned long max_compressed = SZ_128K;
-	unsigned long max_uncompressed = SZ_128K;
 	int i;
 	int will_compress;
 	int compress_type = fs_info->compress_type;
@@ -446,7 +444,9 @@ static noinline void compress_file_range(struct inode *inode,
 again:
 	will_compress = 0;
 	nr_pages = (end >> PAGE_SHIFT) - (start >> PAGE_SHIFT) + 1;
-	nr_pages = min_t(unsigned long, nr_pages, SZ_128K / PAGE_SIZE);
+	BUILD_BUG_ON((BTRFS_MAX_COMPRESSED % PAGE_SIZE) != 0);
+	nr_pages = min_t(unsigned long, nr_pages,
+			BTRFS_MAX_COMPRESSED / PAGE_SIZE);
 
 	/*
 	 * we don't want to send crud past the end of i_size through
@@ -471,7 +471,8 @@ static noinline void compress_file_range(struct inode *inode,
 	   (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size))
 		goto cleanup_and_bail_uncompressed;
 
-	total_compressed = min(total_compressed, max_uncompressed);
+	total_compressed = min_t(unsigned long, total_compressed,
+			BTRFS_MAX_UNCOMPRESSED);
 	num_bytes = ALIGN(end - start + 1, blocksize);
 	num_bytes = max(blocksize,  num_bytes);
 	total_in = 0;
@@ -510,7 +511,7 @@ static noinline void compress_file_range(struct inode *inode,
 					   &nr_pages,
 					   &total_in,
 					   &total_compressed,
-					   max_compressed);
+					   BTRFS_MAX_COMPRESSED);
 
 		if (!ret) {
 			unsigned long offset = total_compressed &

From e5d74902362f1a06ea3674042d09f1af178c0a20 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Tue, 14 Feb 2017 19:45:05 +0100
Subject: [PATCH 0338/1911] btrfs: derive maximum output size in the
 compression implementation

The value of max_out can be calculated from the parameters passed to the
compressors, which is number of pages and the page size, and we don't
have to needlessly pass it around.

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/compression.c | 6 ++----
 fs/btrfs/compression.h | 6 ++----
 fs/btrfs/inode.c       | 3 +--
 fs/btrfs/lzo.c         | 4 ++--
 fs/btrfs/zlib.c        | 4 ++--
 5 files changed, 9 insertions(+), 14 deletions(-)

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 11dcda57e15c51..c7721a6aa3bb53 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -932,8 +932,7 @@ int btrfs_compress_pages(int type, struct address_space *mapping,
 			 u64 start, struct page **pages,
 			 unsigned long *out_pages,
 			 unsigned long *total_in,
-			 unsigned long *total_out,
-			 unsigned long max_out)
+			 unsigned long *total_out)
 {
 	struct list_head *workspace;
 	int ret;
@@ -943,8 +942,7 @@ int btrfs_compress_pages(int type, struct address_space *mapping,
 	ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping,
 						      start, pages,
 						      out_pages,
-						      total_in, total_out,
-						      max_out);
+						      total_in, total_out);
 	free_workspace(type, workspace);
 	return ret;
 }
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index 619e64c274449a..39ec43ab8df1b7 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -41,8 +41,7 @@ int btrfs_compress_pages(int type, struct address_space *mapping,
 			 u64 start, struct page **pages,
 			 unsigned long *out_pages,
 			 unsigned long *total_in,
-			 unsigned long *total_out,
-			 unsigned long max_out);
+			 unsigned long *total_out);
 int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
 		     unsigned long start_byte, size_t srclen, size_t destlen);
 int btrfs_decompress_buf2page(const char *buf, unsigned long buf_start,
@@ -76,8 +75,7 @@ struct btrfs_compress_op {
 			      struct page **pages,
 			      unsigned long *out_pages,
 			      unsigned long *total_in,
-			      unsigned long *total_out,
-			      unsigned long max_out);
+			      unsigned long *total_out);
 
 	int (*decompress_bio)(struct list_head *workspace,
 				 struct page **pages_in,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 11a4eeadf662b2..0d932b9594de5e 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -510,8 +510,7 @@ static noinline void compress_file_range(struct inode *inode,
 					   pages,
 					   &nr_pages,
 					   &total_in,
-					   &total_compressed,
-					   BTRFS_MAX_COMPRESSED);
+					   &total_compressed);
 
 		if (!ret) {
 			unsigned long offset = total_compressed &
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index 0baf978fbcaf15..f48c8c14dc144b 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -90,8 +90,7 @@ static int lzo_compress_pages(struct list_head *ws,
 			      struct page **pages,
 			      unsigned long *out_pages,
 			      unsigned long *total_in,
-			      unsigned long *total_out,
-			      unsigned long max_out)
+			      unsigned long *total_out)
 {
 	struct workspace *workspace = list_entry(ws, struct workspace, list);
 	int ret = 0;
@@ -103,6 +102,7 @@ static int lzo_compress_pages(struct list_head *ws,
 	unsigned long bytes_left;
 	unsigned long len = *total_out;
 	unsigned long nr_dest_pages = *out_pages;
+	const unsigned long max_out = nr_dest_pages * PAGE_SIZE;
 	size_t in_len;
 	size_t out_len;
 	char *buf;
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index e7f2020f8ee764..135b10823c6dbd 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -77,8 +77,7 @@ static int zlib_compress_pages(struct list_head *ws,
 			       struct page **pages,
 			       unsigned long *out_pages,
 			       unsigned long *total_in,
-			       unsigned long *total_out,
-			       unsigned long max_out)
+			       unsigned long *total_out)
 {
 	struct workspace *workspace = list_entry(ws, struct workspace, list);
 	int ret;
@@ -90,6 +89,7 @@ static int zlib_compress_pages(struct list_head *ws,
 	unsigned long bytes_left;
 	unsigned long len = *total_out;
 	unsigned long nr_dest_pages = *out_pages;
+	const unsigned long max_out = nr_dest_pages * PAGE_SIZE;
 
 	*out_pages = 0;
 	*total_out = 0;

From 047e5e17c1e3551ef0fb34b629c66bec0591db0d Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Tue, 14 Feb 2017 17:33:27 +0100
Subject: [PATCH 0339/1911] btrfs: remove BUG_ON from __tree_mod_log_insert

All callers dereference the 'tm' parameter before it gets to this
function, the NULL check does not make much sense here.

Reviewed-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 1192bc7d2ee782..2c3c943bfcdc8d 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -453,8 +453,6 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
 	struct rb_node *parent = NULL;
 	struct tree_mod_elem *cur;
 
-	BUG_ON(!tm);
-
 	tm->seq = btrfs_inc_tree_mod_seq(fs_info);
 
 	tm_root = &fs_info->tree_mod_log;

From fa2529923d3bc5f0b39ff7a7c52be74c4aea6f2a Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Wed, 15 Feb 2017 09:35:01 +0100
Subject: [PATCH 0340/1911] btrfs: handle allocation error in
 update_dev_stat_item

Reviewed-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/volumes.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index b443cc71830f85..7c8c7bbee19751 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6956,7 +6956,8 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans,
 	key.offset = device->devid;
 
 	path = btrfs_alloc_path();
-	BUG_ON(!path);
+	if (!path)
+		return -ENOMEM;
 	ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1);
 	if (ret < 0) {
 		btrfs_warn_in_rcu(fs_info,

From b9d04c607c21fafe0a346792d0d358e7ab9a768e Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Fri, 17 Feb 2017 19:42:43 +0100
Subject: [PATCH 0341/1911] btrfs: do proper error handling in
 btrfs_insert_xattr_item

The space check in btrfs_insert_xattr_item is duplicated in it's caller
(do_setxattr) so we won't hit the BUG_ON. Continuing without any check
could be disasterous so turn it to a proper error handling.

Reviewed-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/dir-item.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index b13d9536d4de8c..60a750678a82b3 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -80,7 +80,8 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
 	struct extent_buffer *leaf;
 	u32 data_size;
 
-	BUG_ON(name_len + data_len > BTRFS_MAX_XATTR_SIZE(root->fs_info));
+	if (name_len + data_len > BTRFS_MAX_XATTR_SIZE(root->fs_info))
+		return -ENOSPC;
 
 	key.objectid = objectid;
 	key.type = BTRFS_XATTR_ITEM_KEY;

From c3988d630a4dfec5c09f2b6496734f320949ea9c Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Fri, 17 Feb 2017 15:18:32 +0100
Subject: [PATCH 0342/1911] btrfs: let writepage_end_io_hook return void

There's no error path in any of the instances, always return 0.

Reviewed-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 9 +++------
 fs/btrfs/extent_io.h | 2 +-
 fs/btrfs/inode.c     | 6 ++----
 3 files changed, 6 insertions(+), 11 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index c3abf846a4495f..94beb758e8aacd 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2437,12 +2437,9 @@ void end_extent_writepage(struct page *page, int err, u64 start, u64 end)
 
 	tree = &BTRFS_I(page->mapping->host)->io_tree;
 
-	if (tree->ops && tree->ops->writepage_end_io_hook) {
-		ret = tree->ops->writepage_end_io_hook(page, start,
-					       end, NULL, uptodate);
-		if (ret)
-			uptodate = 0;
-	}
+	if (tree->ops && tree->ops->writepage_end_io_hook)
+		tree->ops->writepage_end_io_hook(page, start, end, NULL,
+				uptodate);
 
 	if (!uptodate) {
 		ClearPageUptodate(page);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index c16260c6c14f16..78bb4d76127bca 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -104,7 +104,7 @@ struct extent_io_ops {
 	int (*readpage_end_io_hook)(struct btrfs_io_bio *io_bio, u64 phy_offset,
 				    struct page *page, u64 start, u64 end,
 				    int mirror);
-	int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end,
+	void (*writepage_end_io_hook)(struct page *page, u64 start, u64 end,
 				      struct extent_state *state, int uptodate);
 	void (*set_bit_hook)(struct inode *inode, struct extent_state *state,
 			     unsigned *bits);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 0d932b9594de5e..b904c74f31fd32 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2972,7 +2972,7 @@ static void finish_ordered_fn(struct btrfs_work *work)
 	btrfs_finish_ordered_io(ordered_extent);
 }
 
-static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
+static void btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
 				struct extent_state *state, int uptodate)
 {
 	struct inode *inode = page->mapping->host;
@@ -2986,7 +2986,7 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
 	ClearPagePrivate2(page);
 	if (!btrfs_dec_test_ordered_pending(inode, &ordered_extent, start,
 					    end - start + 1, uptodate))
-		return 0;
+		return;
 
 	if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
 		wq = fs_info->endio_freespace_worker;
@@ -2999,8 +2999,6 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
 	btrfs_init_work(&ordered_extent->work, func, finish_ordered_fn, NULL,
 			NULL);
 	btrfs_queue_work(wq, &ordered_extent->work);
-
-	return 0;
 }
 
 static int __readpage_endio_check(struct inode *inode,

From 4d53dddbec671bcb64a936a3d2b7bf1ce2252ed0 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Fri, 17 Feb 2017 15:27:44 +0100
Subject: [PATCH 0343/1911] btrfs: document existence of extent_io ops
 callbacks

Some of the callbacks defined in btree_extent_io_ops and
btrfs_extent_io_ops do always exist so we don't need to check the
existence before each call. This patch just reorders the definition and
documents which are mandatory/optional.

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/disk-io.c   |  7 +++++--
 fs/btrfs/extent_io.h | 23 ++++++++++++++++-------
 fs/btrfs/inode.c     |  7 +++++--
 3 files changed, 26 insertions(+), 11 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 67d663ab658adf..60d4f1b210d7d6 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -4653,9 +4653,12 @@ static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info)
 }
 
 static const struct extent_io_ops btree_extent_io_ops = {
-	.readpage_end_io_hook = btree_readpage_end_io_hook,
-	.readpage_io_failed_hook = btree_io_failed_hook,
+	/* mandatory callbacks */
 	.submit_bio_hook = btree_submit_bio_hook,
+	.readpage_end_io_hook = btree_readpage_end_io_hook,
 	/* note we're sharing with inode.c for the merge bio hook */
 	.merge_bio_hook = btrfs_merge_bio_hook,
+
+	/* optional callbacks */
+	.readpage_io_failed_hook = btree_io_failed_hook,
 };
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 78bb4d76127bca..1f8478dc9f8ec2 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -92,18 +92,27 @@ typedef	int (extent_submit_bio_hook_t)(struct inode *inode, struct bio *bio,
 				       int mirror_num, unsigned long bio_flags,
 				       u64 bio_offset);
 struct extent_io_ops {
-	int (*fill_delalloc)(struct inode *inode, struct page *locked_page,
-			     u64 start, u64 end, int *page_started,
-			     unsigned long *nr_written);
-	int (*writepage_start_hook)(struct page *page, u64 start, u64 end);
+	/*
+	 * The following callbacks must be allways defined, the function
+	 * pointer will be called unconditionally.
+	 */
 	extent_submit_bio_hook_t *submit_bio_hook;
+	int (*readpage_end_io_hook)(struct btrfs_io_bio *io_bio, u64 phy_offset,
+				    struct page *page, u64 start, u64 end,
+				    int mirror);
 	int (*merge_bio_hook)(struct page *page, unsigned long offset,
 			      size_t size, struct bio *bio,
 			      unsigned long bio_flags);
+
+	/*
+	 * Optional hooks, called if the pointer is not NULL
+	 */
+	int (*fill_delalloc)(struct inode *inode, struct page *locked_page,
+			     u64 start, u64 end, int *page_started,
+			     unsigned long *nr_written);
 	int (*readpage_io_failed_hook)(struct page *page, int failed_mirror);
-	int (*readpage_end_io_hook)(struct btrfs_io_bio *io_bio, u64 phy_offset,
-				    struct page *page, u64 start, u64 end,
-				    int mirror);
+
+	int (*writepage_start_hook)(struct page *page, u64 start, u64 end);
 	void (*writepage_end_io_hook)(struct page *page, u64 start, u64 end,
 				      struct extent_state *state, int uptodate);
 	void (*set_bit_hook)(struct inode *inode, struct extent_state *state,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index b904c74f31fd32..3b327c8cfb1661 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -10547,10 +10547,13 @@ static const struct file_operations btrfs_dir_file_operations = {
 };
 
 static const struct extent_io_ops btrfs_extent_io_ops = {
-	.fill_delalloc = run_delalloc_range,
+	/* mandatory callbacks */
 	.submit_bio_hook = btrfs_submit_bio_hook,
-	.merge_bio_hook = btrfs_merge_bio_hook,
 	.readpage_end_io_hook = btrfs_readpage_end_io_hook,
+	.merge_bio_hook = btrfs_merge_bio_hook,
+
+	/* optional callbacks */
+	.fill_delalloc = run_delalloc_range,
 	.writepage_end_io_hook = btrfs_writepage_end_io_hook,
 	.writepage_start_hook = btrfs_writepage_start_hook,
 	.set_bit_hook = btrfs_set_bit_hook,

From 20c9801d393d9f077c476f40440b481daaccb9d6 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Fri, 17 Feb 2017 15:59:35 +0100
Subject: [PATCH 0344/1911] btrfs: drop checks for mandatory extent_io_ops
 callbacks

We know that eadpage_end_io_hook, submit_bio_hook and merge_bio_hook are
always defined so we can drop the checks before we call them.

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 94beb758e8aacd..0786985a8cc529 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2567,8 +2567,7 @@ static void end_bio_extent_readpage(struct bio *bio)
 		len = bvec->bv_len;
 
 		mirror = io_bio->mirror_num;
-		if (likely(uptodate && tree->ops &&
-			   tree->ops->readpage_end_io_hook)) {
+		if (likely(uptodate && tree->ops)) {
 			ret = tree->ops->readpage_end_io_hook(io_bio, offset,
 							      page, start, end,
 							      mirror);
@@ -2731,7 +2730,7 @@ static int __must_check submit_one_bio(struct bio *bio, int mirror_num,
 	bio->bi_private = NULL;
 	bio_get(bio);
 
-	if (tree->ops && tree->ops->submit_bio_hook)
+	if (tree->ops)
 		ret = tree->ops->submit_bio_hook(page->mapping->host, bio,
 					   mirror_num, bio_flags, start);
 	else
@@ -2746,7 +2745,7 @@ static int merge_bio(struct extent_io_tree *tree, struct page *page,
 		     unsigned long bio_flags)
 {
 	int ret = 0;
-	if (tree->ops && tree->ops->merge_bio_hook)
+	if (tree->ops)
 		ret = tree->ops->merge_bio_hook(page, offset, size, bio,
 						bio_flags);
 	return ret;

From 20a7db8ab3f2057a518448b1728d504ffadef65e Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Fri, 17 Feb 2017 16:24:29 +0100
Subject: [PATCH 0345/1911] btrfs: add dummy callback for readpage_io_failed
 and drop checks

Make extent_io_ops::readpage_io_failed_hook callback mandatory and
define a dummy function for btrfs_extent_io_ops. As the failed IO
callback is not performance critical, the branch vs extra trade off does
not hurt.

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/disk-io.c   | 2 +-
 fs/btrfs/extent_io.c | 2 +-
 fs/btrfs/extent_io.h | 2 +-
 fs/btrfs/inode.c     | 7 +++++++
 4 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 60d4f1b210d7d6..cad47ece2fdfbf 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -4658,7 +4658,7 @@ static const struct extent_io_ops btree_extent_io_ops = {
 	.readpage_end_io_hook = btree_readpage_end_io_hook,
 	/* note we're sharing with inode.c for the merge bio hook */
 	.merge_bio_hook = btrfs_merge_bio_hook,
+	.readpage_io_failed_hook = btree_io_failed_hook,
 
 	/* optional callbacks */
-	.readpage_io_failed_hook = btree_io_failed_hook,
 };
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 0786985a8cc529..28e81922a21c1e 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2581,7 +2581,7 @@ static void end_bio_extent_readpage(struct bio *bio)
 		if (likely(uptodate))
 			goto readpage_ok;
 
-		if (tree->ops && tree->ops->readpage_io_failed_hook) {
+		if (tree->ops) {
 			ret = tree->ops->readpage_io_failed_hook(page, mirror);
 			if (!ret && !bio->bi_error)
 				uptodate = 1;
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 1f8478dc9f8ec2..3e4fad4a909d11 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -103,6 +103,7 @@ struct extent_io_ops {
 	int (*merge_bio_hook)(struct page *page, unsigned long offset,
 			      size_t size, struct bio *bio,
 			      unsigned long bio_flags);
+	int (*readpage_io_failed_hook)(struct page *page, int failed_mirror);
 
 	/*
 	 * Optional hooks, called if the pointer is not NULL
@@ -110,7 +111,6 @@ struct extent_io_ops {
 	int (*fill_delalloc)(struct inode *inode, struct page *locked_page,
 			     u64 start, u64 end, int *page_started,
 			     unsigned long *nr_written);
-	int (*readpage_io_failed_hook)(struct page *page, int failed_mirror);
 
 	int (*writepage_start_hook)(struct page *page, u64 start, u64 end);
 	void (*writepage_end_io_hook)(struct page *page, u64 start, u64 end,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 3b327c8cfb1661..ca1995cfd8e996 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -10509,6 +10509,12 @@ static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
 
 }
 
+__attribute__((const))
+static int dummy_readpage_io_failed_hook(struct page *page, int failed_mirror)
+{
+	return 0;
+}
+
 static const struct inode_operations btrfs_dir_inode_operations = {
 	.getattr	= btrfs_getattr,
 	.lookup		= btrfs_lookup,
@@ -10551,6 +10557,7 @@ static const struct extent_io_ops btrfs_extent_io_ops = {
 	.submit_bio_hook = btrfs_submit_bio_hook,
 	.readpage_end_io_hook = btrfs_readpage_end_io_hook,
 	.merge_bio_hook = btrfs_merge_bio_hook,
+	.readpage_io_failed_hook = dummy_readpage_io_failed_hook,
 
 	/* optional callbacks */
 	.fill_delalloc = run_delalloc_range,

From f5432f01240ef69a391940d623b6a51768aefd65 Mon Sep 17 00:00:00 2001
From: Sekhar Nori <nsekhar@ti.com>
Date: Wed, 15 Feb 2017 20:42:52 +0530
Subject: [PATCH 0346/1911] ARM: dts: am57xx-idk: tpic2810 is on I2C bus, not
 SPI

commit 50e95b6b854c ("ARM: dts: am57xx-idk: Add Industrial
output support") added the TPIC2810 device-tree node under
SPI bus instead of I2C1.

Fix it. Tested on AM572x IDK by driving on-board LEDs
connected to TPIC2810

Fixes: 50e95b6b854c ("ARM: dts: am57xx-idk: Add Industrial output support")
Signed-off-by: Sekhar Nori <nsekhar@ti.com>
Acked-by: Andrew F. Davis <afd@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/boot/dts/am57xx-idk-common.dtsi | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/arm/boot/dts/am57xx-idk-common.dtsi b/arch/arm/boot/dts/am57xx-idk-common.dtsi
index 814a720d5c3db5..d0a55b84569054 100644
--- a/arch/arm/boot/dts/am57xx-idk-common.dtsi
+++ b/arch/arm/boot/dts/am57xx-idk-common.dtsi
@@ -311,6 +311,13 @@
 			/* ID & VBUS GPIOs provided in board dts */
 		};
 	};
+
+	tpic2810: tpic2810@60 {
+		compatible = "ti,tpic2810";
+		reg = <0x60>;
+		gpio-controller;
+		#gpio-cells = <2>;
+	};
 };
 
 &mcspi3 {
@@ -326,13 +333,6 @@
 		spi-max-frequency = <1000000>;
 		spi-cpol;
 	};
-
-	tpic2810: tpic2810@60 {
-		compatible = "ti,tpic2810";
-		reg = <0x60>;
-		gpio-controller;
-		#gpio-cells = <2>;
-	};
 };
 
 &uart3 {

From 0341735226dcfa9d3727ff001e030f879ab91ca2 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Fri, 17 Feb 2017 06:51:17 -0800
Subject: [PATCH 0347/1911] ARM: omap2plus_defconfig: Enable INPUT_MOUSEDEV as
 loadable modules

Otherwise mice won't be happy.

Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/configs/omap2plus_defconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig
index 195c98b85568b0..77ffccfd0c3f5f 100644
--- a/arch/arm/configs/omap2plus_defconfig
+++ b/arch/arm/configs/omap2plus_defconfig
@@ -188,6 +188,7 @@ CONFIG_WL12XX=m
 CONFIG_WL18XX=m
 CONFIG_WLCORE_SPI=m
 CONFIG_WLCORE_SDIO=m
+CONFIG_INPUT_MOUSEDEV=m
 CONFIG_INPUT_JOYDEV=m
 CONFIG_INPUT_EVDEV=m
 CONFIG_KEYBOARD_ATKBD=m

From 48385896e9c53e1061f103e1271a6be9a7ea00c4 Mon Sep 17 00:00:00 2001
From: Teresa Remmet <t.remmet@phytec.de>
Date: Fri, 10 Feb 2017 13:29:07 +0100
Subject: [PATCH 0348/1911] ARM: dts: am335x-pcm953: Fix legacy wakeup source
 binding

Replaced the legacy binding "gpio-key,wakeup" with "wakeup-source" as
noted in the kernel documentation.

Signed-off-by: Teresa Remmet <t.remmet@phytec.de>
Reported-by: Sudeep Holla <sudeep.holla@arm.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/boot/dts/am335x-pcm-953.dtsi | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/boot/dts/am335x-pcm-953.dtsi b/arch/arm/boot/dts/am335x-pcm-953.dtsi
index 02981eae96b994..1ec8e0d801912f 100644
--- a/arch/arm/boot/dts/am335x-pcm-953.dtsi
+++ b/arch/arm/boot/dts/am335x-pcm-953.dtsi
@@ -63,14 +63,14 @@
 			label = "home";
 			linux,code = <KEY_HOME>;
 			gpios = <&gpio3 7 GPIO_ACTIVE_HIGH>;
-			gpio-key,wakeup;
+			wakeup-source;
 		};
 
 		button@1 {
 			label = "menu";
 			linux,code = <KEY_MENU>;
 			gpios = <&gpio3 8 GPIO_ACTIVE_HIGH>;
-			gpio-key,wakeup;
+			wakeup-source;
 		};
 
 	};

From 7807e086a2d1f69cc1a57958cac04fea79fc2112 Mon Sep 17 00:00:00 2001
From: Ladislav Michl <ladis@linux-mips.org>
Date: Sat, 11 Feb 2017 14:02:49 +0100
Subject: [PATCH 0349/1911] ARM: OMAP2+: gpmc-onenand: propagate error on
 initialization failure

gpmc_probe_onenand_child returns success even on gpmc_onenand_init
failure. Fix that.

Signed-off-by: Ladislav Michl <ladis@linux-mips.org>
Acked-by: Roger Quadros <rogerq@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/gpmc-onenand.c | 10 ++++++----
 drivers/memory/omap-gpmc.c         |  4 +---
 include/linux/omap-gpmc.h          |  5 +++--
 3 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/arch/arm/mach-omap2/gpmc-onenand.c b/arch/arm/mach-omap2/gpmc-onenand.c
index 8633c703546a65..2944af82055847 100644
--- a/arch/arm/mach-omap2/gpmc-onenand.c
+++ b/arch/arm/mach-omap2/gpmc-onenand.c
@@ -367,7 +367,7 @@ static int gpmc_onenand_setup(void __iomem *onenand_base, int *freq_ptr)
 	return ret;
 }
 
-void gpmc_onenand_init(struct omap_onenand_platform_data *_onenand_data)
+int gpmc_onenand_init(struct omap_onenand_platform_data *_onenand_data)
 {
 	int err;
 	struct device *dev = &gpmc_onenand_device.dev;
@@ -393,15 +393,17 @@ void gpmc_onenand_init(struct omap_onenand_platform_data *_onenand_data)
 	if (err < 0) {
 		dev_err(dev, "Cannot request GPMC CS %d, error %d\n",
 			gpmc_onenand_data->cs, err);
-		return;
+		return err;
 	}
 
 	gpmc_onenand_resource.end = gpmc_onenand_resource.start +
 							ONENAND_IO_SIZE - 1;
 
-	if (platform_device_register(&gpmc_onenand_device) < 0) {
+	err = platform_device_register(&gpmc_onenand_device);
+	if (err) {
 		dev_err(dev, "Unable to register OneNAND device\n");
 		gpmc_cs_free(gpmc_onenand_data->cs);
-		return;
 	}
+
+	return err;
 }
diff --git a/drivers/memory/omap-gpmc.c b/drivers/memory/omap-gpmc.c
index 5457c361ad5864..bf0fe0137dfed2 100644
--- a/drivers/memory/omap-gpmc.c
+++ b/drivers/memory/omap-gpmc.c
@@ -1947,9 +1947,7 @@ static int gpmc_probe_onenand_child(struct platform_device *pdev,
 	if (!of_property_read_u32(child, "dma-channel", &val))
 		gpmc_onenand_data->dma_channel = val;
 
-	gpmc_onenand_init(gpmc_onenand_data);
-
-	return 0;
+	return gpmc_onenand_init(gpmc_onenand_data);
 }
 #else
 static int gpmc_probe_onenand_child(struct platform_device *pdev,
diff --git a/include/linux/omap-gpmc.h b/include/linux/omap-gpmc.h
index 35d0fd7a4948e6..e821a3132a3e1b 100644
--- a/include/linux/omap-gpmc.h
+++ b/include/linux/omap-gpmc.h
@@ -88,10 +88,11 @@ static inline int gpmc_nand_init(struct omap_nand_platform_data *d,
 #endif
 
 #if IS_ENABLED(CONFIG_MTD_ONENAND_OMAP2)
-extern void gpmc_onenand_init(struct omap_onenand_platform_data *d);
+extern int gpmc_onenand_init(struct omap_onenand_platform_data *d);
 #else
 #define board_onenand_data	NULL
-static inline void gpmc_onenand_init(struct omap_onenand_platform_data *d)
+static inline int gpmc_onenand_init(struct omap_onenand_platform_data *d)
 {
+	return 0;
 }
 #endif

From ac28e47ccc3ff8dabce1aec6b224760c3e524044 Mon Sep 17 00:00:00 2001
From: Ladislav Michl <ladis@linux-mips.org>
Date: Tue, 21 Feb 2017 10:44:45 +0100
Subject: [PATCH 0350/1911] ARM: OMAP2+: Remove legacy gpmc-nand.c

This code is no longer used and can be removed as we
are using device tree.

Removing this code also removes a dependency between
drivers/mtd and arch/arm/mach-omap2 making furhter driver
changes easier.

Signed-off-by: Ladislav Michl <ladis@linux-mips.org>
[tony@atomide.com: removed from header too, updated comments]
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/Makefile    |   3 -
 arch/arm/mach-omap2/gpmc-nand.c | 154 --------------------------------
 include/linux/omap-gpmc.h       |  11 ---
 3 files changed, 168 deletions(-)
 delete mode 100644 arch/arm/mach-omap2/gpmc-nand.c

diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile
index 093458b62c8dad..c89757abb0ae4b 100644
--- a/arch/arm/mach-omap2/Makefile
+++ b/arch/arm/mach-omap2/Makefile
@@ -241,6 +241,3 @@ obj-$(CONFIG_MACH_OMAP2_TUSB6010)	+= usb-tusb6010.o
 
 onenand-$(CONFIG_MTD_ONENAND_OMAP2)	:= gpmc-onenand.o
 obj-y					+= $(onenand-m) $(onenand-y)
-
-nand-$(CONFIG_MTD_NAND_OMAP2)		:= gpmc-nand.o
-obj-y					+= $(nand-m) $(nand-y)
diff --git a/arch/arm/mach-omap2/gpmc-nand.c b/arch/arm/mach-omap2/gpmc-nand.c
deleted file mode 100644
index f6ac027f3c3bf2..00000000000000
--- a/arch/arm/mach-omap2/gpmc-nand.c
+++ /dev/null
@@ -1,154 +0,0 @@
-/*
- * gpmc-nand.c
- *
- * Copyright (C) 2009 Texas Instruments
- * Vimal Singh <vimalsingh@ti.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/kernel.h>
-#include <linux/platform_device.h>
-#include <linux/io.h>
-#include <linux/omap-gpmc.h>
-#include <linux/mtd/nand.h>
-#include <linux/platform_data/mtd-nand-omap2.h>
-
-#include <asm/mach/flash.h>
-
-#include "soc.h"
-
-/* minimum size for IO mapping */
-#define	NAND_IO_SIZE	4
-
-static bool gpmc_hwecc_bch_capable(enum omap_ecc ecc_opt)
-{
-	/* platforms which support all ECC schemes */
-	if (soc_is_am33xx() || soc_is_am43xx() || cpu_is_omap44xx() ||
-		 soc_is_omap54xx() || soc_is_dra7xx())
-		return 1;
-
-	if (ecc_opt == OMAP_ECC_BCH4_CODE_HW_DETECTION_SW ||
-		 ecc_opt == OMAP_ECC_BCH8_CODE_HW_DETECTION_SW) {
-		if (cpu_is_omap24xx())
-			return 0;
-		else if (cpu_is_omap3630() && (GET_OMAP_REVISION() == 0))
-			return 0;
-		else
-			return 1;
-	}
-
-	/* OMAP3xxx do not have ELM engine, so cannot support ECC schemes
-	 * which require H/W based ECC error detection */
-	if ((cpu_is_omap34xx() || cpu_is_omap3630()) &&
-	    ((ecc_opt == OMAP_ECC_BCH4_CODE_HW) ||
-		 (ecc_opt == OMAP_ECC_BCH8_CODE_HW)))
-		return 0;
-
-	/* legacy platforms support only HAM1 (1-bit Hamming) ECC scheme */
-	if (ecc_opt == OMAP_ECC_HAM1_CODE_HW ||
-	    ecc_opt == OMAP_ECC_HAM1_CODE_SW)
-		return 1;
-	else
-		return 0;
-}
-
-/* This function will go away once the device-tree convertion is complete */
-static void gpmc_set_legacy(struct omap_nand_platform_data *gpmc_nand_data,
-			    struct gpmc_settings *s)
-{
-	/* Enable RD PIN Monitoring Reg */
-	if (gpmc_nand_data->dev_ready) {
-		s->wait_on_read = true;
-		s->wait_on_write = true;
-	}
-
-	if (gpmc_nand_data->devsize == NAND_BUSWIDTH_16)
-		s->device_width = GPMC_DEVWIDTH_16BIT;
-	else
-		s->device_width = GPMC_DEVWIDTH_8BIT;
-}
-
-int gpmc_nand_init(struct omap_nand_platform_data *gpmc_nand_data,
-		   struct gpmc_timings *gpmc_t)
-{
-	int err	= 0;
-	struct gpmc_settings s;
-	struct platform_device *pdev;
-	struct resource gpmc_nand_res[] = {
-		{ .flags = IORESOURCE_MEM, },
-		{ .flags = IORESOURCE_IRQ, },
-		{ .flags = IORESOURCE_IRQ, },
-	};
-
-	BUG_ON(gpmc_nand_data->cs >= GPMC_CS_NUM);
-
-	err = gpmc_cs_request(gpmc_nand_data->cs, NAND_IO_SIZE,
-			      (unsigned long *)&gpmc_nand_res[0].start);
-	if (err < 0) {
-		pr_err("omap2-gpmc: Cannot request GPMC CS %d, error %d\n",
-		       gpmc_nand_data->cs, err);
-		return err;
-	}
-	gpmc_nand_res[0].end = gpmc_nand_res[0].start + NAND_IO_SIZE - 1;
-	gpmc_nand_res[1].start = gpmc_get_client_irq(GPMC_IRQ_FIFOEVENTENABLE);
-	gpmc_nand_res[2].start = gpmc_get_client_irq(GPMC_IRQ_COUNT_EVENT);
-
-	memset(&s, 0, sizeof(struct gpmc_settings));
-	gpmc_set_legacy(gpmc_nand_data, &s);
-
-	s.device_nand = true;
-
-	if (gpmc_t) {
-		err = gpmc_cs_set_timings(gpmc_nand_data->cs, gpmc_t, &s);
-		if (err < 0) {
-			pr_err("omap2-gpmc: Unable to set gpmc timings: %d\n",
-			       err);
-			return err;
-		}
-	}
-
-	err = gpmc_cs_program_settings(gpmc_nand_data->cs, &s);
-	if (err < 0)
-		goto out_free_cs;
-
-	err = gpmc_configure(GPMC_CONFIG_WP, 0);
-	if (err < 0)
-		goto out_free_cs;
-
-	if (!gpmc_hwecc_bch_capable(gpmc_nand_data->ecc_opt)) {
-		pr_err("omap2-nand: Unsupported NAND ECC scheme selected\n");
-		err = -EINVAL;
-		goto out_free_cs;
-	}
-
-
-	pdev = platform_device_alloc("omap2-nand", gpmc_nand_data->cs);
-	if (pdev) {
-		err = platform_device_add_resources(pdev, gpmc_nand_res,
-						    ARRAY_SIZE(gpmc_nand_res));
-		if (!err)
-			pdev->dev.platform_data = gpmc_nand_data;
-	} else {
-		err = -ENOMEM;
-	}
-	if (err)
-		goto out_free_pdev;
-
-	err = platform_device_add(pdev);
-	if (err) {
-		dev_err(&pdev->dev, "Unable to register NAND device\n");
-		goto out_free_pdev;
-	}
-
-	return 0;
-
-out_free_pdev:
-	platform_device_put(pdev);
-out_free_cs:
-	gpmc_cs_free(gpmc_nand_data->cs);
-
-	return err;
-}
diff --git a/include/linux/omap-gpmc.h b/include/linux/omap-gpmc.h
index e821a3132a3e1b..fd0de00c0d777e 100644
--- a/include/linux/omap-gpmc.h
+++ b/include/linux/omap-gpmc.h
@@ -76,17 +76,6 @@ struct gpmc_timings;
 struct omap_nand_platform_data;
 struct omap_onenand_platform_data;
 
-#if IS_ENABLED(CONFIG_MTD_NAND_OMAP2)
-extern int gpmc_nand_init(struct omap_nand_platform_data *d,
-			  struct gpmc_timings *gpmc_t);
-#else
-static inline int gpmc_nand_init(struct omap_nand_platform_data *d,
-				 struct gpmc_timings *gpmc_t)
-{
-	return 0;
-}
-#endif
-
 #if IS_ENABLED(CONFIG_MTD_ONENAND_OMAP2)
 extern int gpmc_onenand_init(struct omap_onenand_platform_data *d);
 #else

From 2a7275a3d867b228216886aae35e1f64291180b1 Mon Sep 17 00:00:00 2001
From: Ley Foon Tan <ley.foon.tan@intel.com>
Date: Tue, 28 Feb 2017 18:37:16 +0800
Subject: [PATCH 0351/1911] PCI: altera: Fix TLP_CFG_DW0 for TLP write

eb5767122feb ("PCI: altera: Simplify TLB_CFG_DW0 usage") used
TLP_FMTTYPE_CFGRD* (instead of TLP_FMTTYPE_CFGWR*) for TLP writes, which
causes writing to configuration space to fail.  Fix it by using correct
FMTTYPE for write operation.

Fixes: eb5767122feb ("PCI: altera: Simplify TLB_CFG_DW0 usage")
Signed-off-by: Ley Foon Tan <ley.foon.tan@intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
CC: stable@vger.kernel.org	# v4.9+
---
 drivers/pci/host/pcie-altera.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/pci/host/pcie-altera.c b/drivers/pci/host/pcie-altera.c
index 5043b5f00ed833..75ec5cea26f6e1 100644
--- a/drivers/pci/host/pcie-altera.c
+++ b/drivers/pci/host/pcie-altera.c
@@ -57,10 +57,14 @@
 #define TLP_WRITE_TAG			0x10
 #define RP_DEVFN			0
 #define TLP_REQ_ID(bus, devfn)		(((bus) << 8) | (devfn))
-#define TLP_CFG_DW0(pcie, bus)						\
+#define TLP_CFGRD_DW0(pcie, bus)					\
     ((((bus == pcie->root_bus_nr) ? TLP_FMTTYPE_CFGRD0			\
 				    : TLP_FMTTYPE_CFGRD1) << 24) |	\
      TLP_PAYLOAD_SIZE)
+#define TLP_CFGWR_DW0(pcie, bus)					\
+    ((((bus == pcie->root_bus_nr) ? TLP_FMTTYPE_CFGWR0			\
+				    : TLP_FMTTYPE_CFGWR1) << 24) |	\
+     TLP_PAYLOAD_SIZE)
 #define TLP_CFG_DW1(pcie, tag, be)	\
     (((TLP_REQ_ID(pcie->root_bus_nr,  RP_DEVFN)) << 16) | (tag << 8) | (be))
 #define TLP_CFG_DW2(bus, devfn, offset)	\
@@ -222,7 +226,7 @@ static int tlp_cfg_dword_read(struct altera_pcie *pcie, u8 bus, u32 devfn,
 {
 	u32 headers[TLP_HDR_SIZE];
 
-	headers[0] = TLP_CFG_DW0(pcie, bus);
+	headers[0] = TLP_CFGRD_DW0(pcie, bus);
 	headers[1] = TLP_CFG_DW1(pcie, TLP_READ_TAG, byte_en);
 	headers[2] = TLP_CFG_DW2(bus, devfn, where);
 
@@ -237,7 +241,7 @@ static int tlp_cfg_dword_write(struct altera_pcie *pcie, u8 bus, u32 devfn,
 	u32 headers[TLP_HDR_SIZE];
 	int ret;
 
-	headers[0] = TLP_CFG_DW0(pcie, bus);
+	headers[0] = TLP_CFGWR_DW0(pcie, bus);
 	headers[1] = TLP_CFG_DW1(pcie, TLP_WRITE_TAG, byte_en);
 	headers[2] = TLP_CFG_DW2(bus, devfn, where);
 

From ad47047220777460c6d7dc8333808591f29e5c17 Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Tue, 28 Feb 2017 11:55:16 -0500
Subject: [PATCH 0352/1911] dm raid: fix raid "check" regression due to
 improper cleanup in raid_message()

While cleaning up awkward branching in raid_message() a raid set "check"
regression was introduced because "check" needs both MD_RECOVERY_SYNC
and MD_RECOVERY_REQUESTED flags set.

Fix this regression by explicitly setting both flags for the "check"
case (like is also done for the "repair" case, but redundant set_bit()s
are perfectly fine because it adds clarity to what is needed in response
to both messages -- in addition this isn't fast path code).

Fixes: 105db59912 ("dm raid: cleanup awkward branching in raid_message() option processing")
Reported-by: Heinz Mauelshagen <heinzm@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-raid.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 5c9e95d66f3b64..0460cf84fd0e74 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -3462,9 +3462,11 @@ static int raid_message(struct dm_target *ti, unsigned int argc, char **argv)
 	else if (!strcasecmp(argv[0], "recover"))
 		set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
 	else {
-		if (!strcasecmp(argv[0], "check"))
+		if (!strcasecmp(argv[0], "check")) {
 			set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
-		else if (!strcasecmp(argv[0], "repair")) {
+			set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
+			set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
+		} else if (!strcasecmp(argv[0], "repair")) {
 			set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
 			set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
 		} else

From d36a19541fe8f392778ac137d60f9be8dfdd8f9d Mon Sep 17 00:00:00 2001
From: Heinz Mauelshagen <heinzm@redhat.com>
Date: Tue, 28 Feb 2017 19:17:49 +0100
Subject: [PATCH 0353/1911] dm raid: fix data corruption on reshape request

The lvm2 sequence to manage dm-raid constructor flags that trigger a
rebuild or a reshape is defined as:

1) load table with flags (e.g. rebuild/delta_disks/data_offset)
2) clear out the flags in lvm2 metadata
3) store the lvm2 metadata, reload the table to reset the flags
   previously established during the initial load (1) -- in order to
   prevent repeatedly requesting a rebuild or a reshape on activation

Currently, loading an inactive table with rebuild/reshape flags
specified will cause dm-raid to rebuild/reshape on resume and thus start
updating the raid metadata (about the progress).  When the second table
reload, to reset the flags, occurs the constructor accesses the volatile
progress state kept in the raid superblocks.  Because the active mapping
is still processing the rebuild/reshape, that position will be stale by
the time the device is resumed.

In the reshape case, this causes data corruption by processing already
reshaped stripes again.  In the rebuild case, it does _not_ cause data
corruption but instead involves superfluous rebuilds.

Fix by keeping the raid set frozen during the first resume and then
allow the rebuild/reshape during the second resume.

Fixes: 9dbd1aa3a ("dm raid: add reshaping support to the target")
Signed-off-by: Heinz Mauelshagen <heinzm@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Cc: stable@vger.kernel.org # 4.8+
---
 drivers/md/dm-raid.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 0460cf84fd0e74..350527f6083412 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -3756,6 +3756,8 @@ static int raid_preresume(struct dm_target *ti)
 	return r;
 }
 
+#define RESUME_STAY_FROZEN_FLAGS (CTR_FLAG_DELTA_DISKS | CTR_FLAG_DATA_OFFSET)
+
 static void raid_resume(struct dm_target *ti)
 {
 	struct raid_set *rs = ti->private;
@@ -3773,7 +3775,15 @@ static void raid_resume(struct dm_target *ti)
 	mddev->ro = 0;
 	mddev->in_sync = 0;
 
-	clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+	/*
+	 * Keep the RAID set frozen if reshape/rebuild flags are set.
+	 * The RAID set is unfrozen once the next table load/resume,
+	 * which clears the reshape/rebuild flags, occurs.
+	 * This ensures that the constructor for the inactive table
+	 * retrieves an up-to-date reshape_position.
+	 */
+	if (!(rs->ctr_flags & RESUME_STAY_FROZEN_FLAGS))
+		clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
 
 	if (mddev->suspended)
 		mddev_resume(mddev);

From 2664f3c94abc7181171b7c05b2aaa76ea7d9d613 Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Tue, 28 Feb 2017 15:31:44 -0500
Subject: [PATCH 0354/1911] dm raid: bump the target version

This version bump reflects that the reshape corruption fix (commit
92a39f6cc "dm raid: fix data corruption on reshape request") is
present.

Done as a separate fix because the above referenced commit is marked for
stable and target version bumps in a stable@ fix are a recipe for the
fix to never get backported to stable@ kernels (because of target
version number conflicts).

Also, move RESUME_STAY_FROZEN_FLAGS up with the reset the the _FLAGS
definitions now that we don't need to worry about stable@ conflicts as a
result of missing context.

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-raid.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 350527f6083412..f8564d63982f43 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -101,6 +101,8 @@ struct raid_dev {
 #define CTR_FLAG_RAID10_USE_NEAR_SETS	(1 << __CTR_FLAG_RAID10_USE_NEAR_SETS)
 #define CTR_FLAG_JOURNAL_DEV		(1 << __CTR_FLAG_JOURNAL_DEV)
 
+#define RESUME_STAY_FROZEN_FLAGS (CTR_FLAG_DELTA_DISKS | CTR_FLAG_DATA_OFFSET)
+
 /*
  * Definitions of various constructor flags to
  * be used in checks of valid / invalid flags
@@ -3756,8 +3758,6 @@ static int raid_preresume(struct dm_target *ti)
 	return r;
 }
 
-#define RESUME_STAY_FROZEN_FLAGS (CTR_FLAG_DELTA_DISKS | CTR_FLAG_DATA_OFFSET)
-
 static void raid_resume(struct dm_target *ti)
 {
 	struct raid_set *rs = ti->private;
@@ -3791,7 +3791,7 @@ static void raid_resume(struct dm_target *ti)
 
 static struct target_type raid_target = {
 	.name = "raid",
-	.version = {1, 10, 0},
+	.version = {1, 10, 1},
 	.module = THIS_MODULE,
 	.ctr = raid_ctr,
 	.dtr = raid_dtr,

From de09cdd09fa12c3f837902680c3011d96e811821 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Tue, 28 Feb 2017 16:32:44 -0500
Subject: [PATCH 0355/1911] intel_idle: stop exposing platform acronyms in
 sysfs

Cosmetic only -- no functional change in this patch.

sysfs before:

state4/desc:MWAIT 0x20
state4/name:C6-HSW

sysfs after:

state4/desc:MWAIT 0x20
state4/name:C6

We remove the platform acronyms from the end of the state name
(-HSW in this case) for three reasonse.

 1. more consistency with acpi_idle, which prints C1, C2, C3 etc.

 2. users know what platform they are on already
    an acronym for the processor code name here
    seems to cause more confusion than clarity.

 3. less clutter in "cpupower monitor" output,
    which truncates the names to 4 columns.

The precise definition of the state continues to be available in "desc".

Reported-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/idle/intel_idle.c | 172 +++++++++++++++++++-------------------
 1 file changed, 86 insertions(+), 86 deletions(-)

diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index 7d8ea3d5fda656..e045a5caa5bb88 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -125,7 +125,7 @@ static struct cpuidle_state *cpuidle_state_table;
  */
 static struct cpuidle_state nehalem_cstates[] = {
 	{
-		.name = "C1-NHM",
+		.name = "C1",
 		.desc = "MWAIT 0x00",
 		.flags = MWAIT2flg(0x00),
 		.exit_latency = 3,
@@ -133,7 +133,7 @@ static struct cpuidle_state nehalem_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C1E-NHM",
+		.name = "C1E",
 		.desc = "MWAIT 0x01",
 		.flags = MWAIT2flg(0x01),
 		.exit_latency = 10,
@@ -141,7 +141,7 @@ static struct cpuidle_state nehalem_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C3-NHM",
+		.name = "C3",
 		.desc = "MWAIT 0x10",
 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 20,
@@ -149,7 +149,7 @@ static struct cpuidle_state nehalem_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C6-NHM",
+		.name = "C6",
 		.desc = "MWAIT 0x20",
 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 200,
@@ -162,7 +162,7 @@ static struct cpuidle_state nehalem_cstates[] = {
 
 static struct cpuidle_state snb_cstates[] = {
 	{
-		.name = "C1-SNB",
+		.name = "C1",
 		.desc = "MWAIT 0x00",
 		.flags = MWAIT2flg(0x00),
 		.exit_latency = 2,
@@ -170,7 +170,7 @@ static struct cpuidle_state snb_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C1E-SNB",
+		.name = "C1E",
 		.desc = "MWAIT 0x01",
 		.flags = MWAIT2flg(0x01),
 		.exit_latency = 10,
@@ -178,7 +178,7 @@ static struct cpuidle_state snb_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C3-SNB",
+		.name = "C3",
 		.desc = "MWAIT 0x10",
 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 80,
@@ -186,7 +186,7 @@ static struct cpuidle_state snb_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C6-SNB",
+		.name = "C6",
 		.desc = "MWAIT 0x20",
 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 104,
@@ -194,7 +194,7 @@ static struct cpuidle_state snb_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C7-SNB",
+		.name = "C7",
 		.desc = "MWAIT 0x30",
 		.flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 109,
@@ -207,7 +207,7 @@ static struct cpuidle_state snb_cstates[] = {
 
 static struct cpuidle_state byt_cstates[] = {
 	{
-		.name = "C1-BYT",
+		.name = "C1",
 		.desc = "MWAIT 0x00",
 		.flags = MWAIT2flg(0x00),
 		.exit_latency = 1,
@@ -215,7 +215,7 @@ static struct cpuidle_state byt_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C6N-BYT",
+		.name = "C6N",
 		.desc = "MWAIT 0x58",
 		.flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 300,
@@ -223,7 +223,7 @@ static struct cpuidle_state byt_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C6S-BYT",
+		.name = "C6S",
 		.desc = "MWAIT 0x52",
 		.flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 500,
@@ -231,7 +231,7 @@ static struct cpuidle_state byt_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C7-BYT",
+		.name = "C7",
 		.desc = "MWAIT 0x60",
 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 1200,
@@ -239,7 +239,7 @@ static struct cpuidle_state byt_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C7S-BYT",
+		.name = "C7S",
 		.desc = "MWAIT 0x64",
 		.flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 10000,
@@ -252,7 +252,7 @@ static struct cpuidle_state byt_cstates[] = {
 
 static struct cpuidle_state cht_cstates[] = {
 	{
-		.name = "C1-CHT",
+		.name = "C1",
 		.desc = "MWAIT 0x00",
 		.flags = MWAIT2flg(0x00),
 		.exit_latency = 1,
@@ -260,7 +260,7 @@ static struct cpuidle_state cht_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C6N-CHT",
+		.name = "C6N",
 		.desc = "MWAIT 0x58",
 		.flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 80,
@@ -268,7 +268,7 @@ static struct cpuidle_state cht_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C6S-CHT",
+		.name = "C6S",
 		.desc = "MWAIT 0x52",
 		.flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 200,
@@ -276,7 +276,7 @@ static struct cpuidle_state cht_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C7-CHT",
+		.name = "C7",
 		.desc = "MWAIT 0x60",
 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 1200,
@@ -284,7 +284,7 @@ static struct cpuidle_state cht_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C7S-CHT",
+		.name = "C7S",
 		.desc = "MWAIT 0x64",
 		.flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 10000,
@@ -297,7 +297,7 @@ static struct cpuidle_state cht_cstates[] = {
 
 static struct cpuidle_state ivb_cstates[] = {
 	{
-		.name = "C1-IVB",
+		.name = "C1",
 		.desc = "MWAIT 0x00",
 		.flags = MWAIT2flg(0x00),
 		.exit_latency = 1,
@@ -305,7 +305,7 @@ static struct cpuidle_state ivb_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C1E-IVB",
+		.name = "C1E",
 		.desc = "MWAIT 0x01",
 		.flags = MWAIT2flg(0x01),
 		.exit_latency = 10,
@@ -313,7 +313,7 @@ static struct cpuidle_state ivb_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C3-IVB",
+		.name = "C3",
 		.desc = "MWAIT 0x10",
 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 59,
@@ -321,7 +321,7 @@ static struct cpuidle_state ivb_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C6-IVB",
+		.name = "C6",
 		.desc = "MWAIT 0x20",
 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 80,
@@ -329,7 +329,7 @@ static struct cpuidle_state ivb_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C7-IVB",
+		.name = "C7",
 		.desc = "MWAIT 0x30",
 		.flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 87,
@@ -342,7 +342,7 @@ static struct cpuidle_state ivb_cstates[] = {
 
 static struct cpuidle_state ivt_cstates[] = {
 	{
-		.name = "C1-IVT",
+		.name = "C1",
 		.desc = "MWAIT 0x00",
 		.flags = MWAIT2flg(0x00),
 		.exit_latency = 1,
@@ -350,7 +350,7 @@ static struct cpuidle_state ivt_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C1E-IVT",
+		.name = "C1E",
 		.desc = "MWAIT 0x01",
 		.flags = MWAIT2flg(0x01),
 		.exit_latency = 10,
@@ -358,7 +358,7 @@ static struct cpuidle_state ivt_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C3-IVT",
+		.name = "C3",
 		.desc = "MWAIT 0x10",
 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 59,
@@ -366,7 +366,7 @@ static struct cpuidle_state ivt_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C6-IVT",
+		.name = "C6",
 		.desc = "MWAIT 0x20",
 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 82,
@@ -379,7 +379,7 @@ static struct cpuidle_state ivt_cstates[] = {
 
 static struct cpuidle_state ivt_cstates_4s[] = {
 	{
-		.name = "C1-IVT-4S",
+		.name = "C1",
 		.desc = "MWAIT 0x00",
 		.flags = MWAIT2flg(0x00),
 		.exit_latency = 1,
@@ -387,7 +387,7 @@ static struct cpuidle_state ivt_cstates_4s[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C1E-IVT-4S",
+		.name = "C1E",
 		.desc = "MWAIT 0x01",
 		.flags = MWAIT2flg(0x01),
 		.exit_latency = 10,
@@ -395,7 +395,7 @@ static struct cpuidle_state ivt_cstates_4s[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C3-IVT-4S",
+		.name = "C3",
 		.desc = "MWAIT 0x10",
 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 59,
@@ -403,7 +403,7 @@ static struct cpuidle_state ivt_cstates_4s[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C6-IVT-4S",
+		.name = "C6",
 		.desc = "MWAIT 0x20",
 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 84,
@@ -416,7 +416,7 @@ static struct cpuidle_state ivt_cstates_4s[] = {
 
 static struct cpuidle_state ivt_cstates_8s[] = {
 	{
-		.name = "C1-IVT-8S",
+		.name = "C1",
 		.desc = "MWAIT 0x00",
 		.flags = MWAIT2flg(0x00),
 		.exit_latency = 1,
@@ -424,7 +424,7 @@ static struct cpuidle_state ivt_cstates_8s[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C1E-IVT-8S",
+		.name = "C1E",
 		.desc = "MWAIT 0x01",
 		.flags = MWAIT2flg(0x01),
 		.exit_latency = 10,
@@ -432,7 +432,7 @@ static struct cpuidle_state ivt_cstates_8s[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C3-IVT-8S",
+		.name = "C3",
 		.desc = "MWAIT 0x10",
 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 59,
@@ -440,7 +440,7 @@ static struct cpuidle_state ivt_cstates_8s[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C6-IVT-8S",
+		.name = "C6",
 		.desc = "MWAIT 0x20",
 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 88,
@@ -453,7 +453,7 @@ static struct cpuidle_state ivt_cstates_8s[] = {
 
 static struct cpuidle_state hsw_cstates[] = {
 	{
-		.name = "C1-HSW",
+		.name = "C1",
 		.desc = "MWAIT 0x00",
 		.flags = MWAIT2flg(0x00),
 		.exit_latency = 2,
@@ -461,7 +461,7 @@ static struct cpuidle_state hsw_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C1E-HSW",
+		.name = "C1E",
 		.desc = "MWAIT 0x01",
 		.flags = MWAIT2flg(0x01),
 		.exit_latency = 10,
@@ -469,7 +469,7 @@ static struct cpuidle_state hsw_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C3-HSW",
+		.name = "C3",
 		.desc = "MWAIT 0x10",
 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 33,
@@ -477,7 +477,7 @@ static struct cpuidle_state hsw_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C6-HSW",
+		.name = "C6",
 		.desc = "MWAIT 0x20",
 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 133,
@@ -485,7 +485,7 @@ static struct cpuidle_state hsw_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C7s-HSW",
+		.name = "C7s",
 		.desc = "MWAIT 0x32",
 		.flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 166,
@@ -493,7 +493,7 @@ static struct cpuidle_state hsw_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C8-HSW",
+		.name = "C8",
 		.desc = "MWAIT 0x40",
 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 300,
@@ -501,7 +501,7 @@ static struct cpuidle_state hsw_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C9-HSW",
+		.name = "C9",
 		.desc = "MWAIT 0x50",
 		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 600,
@@ -509,7 +509,7 @@ static struct cpuidle_state hsw_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C10-HSW",
+		.name = "C10",
 		.desc = "MWAIT 0x60",
 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 2600,
@@ -521,7 +521,7 @@ static struct cpuidle_state hsw_cstates[] = {
 };
 static struct cpuidle_state bdw_cstates[] = {
 	{
-		.name = "C1-BDW",
+		.name = "C1",
 		.desc = "MWAIT 0x00",
 		.flags = MWAIT2flg(0x00),
 		.exit_latency = 2,
@@ -529,7 +529,7 @@ static struct cpuidle_state bdw_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C1E-BDW",
+		.name = "C1E",
 		.desc = "MWAIT 0x01",
 		.flags = MWAIT2flg(0x01),
 		.exit_latency = 10,
@@ -537,7 +537,7 @@ static struct cpuidle_state bdw_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C3-BDW",
+		.name = "C3",
 		.desc = "MWAIT 0x10",
 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 40,
@@ -545,7 +545,7 @@ static struct cpuidle_state bdw_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C6-BDW",
+		.name = "C6",
 		.desc = "MWAIT 0x20",
 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 133,
@@ -553,7 +553,7 @@ static struct cpuidle_state bdw_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C7s-BDW",
+		.name = "C7s",
 		.desc = "MWAIT 0x32",
 		.flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 166,
@@ -561,7 +561,7 @@ static struct cpuidle_state bdw_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C8-BDW",
+		.name = "C8",
 		.desc = "MWAIT 0x40",
 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 300,
@@ -569,7 +569,7 @@ static struct cpuidle_state bdw_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C9-BDW",
+		.name = "C9",
 		.desc = "MWAIT 0x50",
 		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 600,
@@ -577,7 +577,7 @@ static struct cpuidle_state bdw_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C10-BDW",
+		.name = "C10",
 		.desc = "MWAIT 0x60",
 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 2600,
@@ -590,7 +590,7 @@ static struct cpuidle_state bdw_cstates[] = {
 
 static struct cpuidle_state skl_cstates[] = {
 	{
-		.name = "C1-SKL",
+		.name = "C1",
 		.desc = "MWAIT 0x00",
 		.flags = MWAIT2flg(0x00),
 		.exit_latency = 2,
@@ -598,7 +598,7 @@ static struct cpuidle_state skl_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C1E-SKL",
+		.name = "C1E",
 		.desc = "MWAIT 0x01",
 		.flags = MWAIT2flg(0x01),
 		.exit_latency = 10,
@@ -606,7 +606,7 @@ static struct cpuidle_state skl_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C3-SKL",
+		.name = "C3",
 		.desc = "MWAIT 0x10",
 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 70,
@@ -614,7 +614,7 @@ static struct cpuidle_state skl_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C6-SKL",
+		.name = "C6",
 		.desc = "MWAIT 0x20",
 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 85,
@@ -622,7 +622,7 @@ static struct cpuidle_state skl_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C7s-SKL",
+		.name = "C7s",
 		.desc = "MWAIT 0x33",
 		.flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 124,
@@ -630,7 +630,7 @@ static struct cpuidle_state skl_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C8-SKL",
+		.name = "C8",
 		.desc = "MWAIT 0x40",
 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 200,
@@ -638,7 +638,7 @@ static struct cpuidle_state skl_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C9-SKL",
+		.name = "C9",
 		.desc = "MWAIT 0x50",
 		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 480,
@@ -646,7 +646,7 @@ static struct cpuidle_state skl_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C10-SKL",
+		.name = "C10",
 		.desc = "MWAIT 0x60",
 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 890,
@@ -659,7 +659,7 @@ static struct cpuidle_state skl_cstates[] = {
 
 static struct cpuidle_state skx_cstates[] = {
 	{
-		.name = "C1-SKX",
+		.name = "C1",
 		.desc = "MWAIT 0x00",
 		.flags = MWAIT2flg(0x00),
 		.exit_latency = 2,
@@ -667,7 +667,7 @@ static struct cpuidle_state skx_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C1E-SKX",
+		.name = "C1E",
 		.desc = "MWAIT 0x01",
 		.flags = MWAIT2flg(0x01),
 		.exit_latency = 10,
@@ -675,7 +675,7 @@ static struct cpuidle_state skx_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C6-SKX",
+		.name = "C6",
 		.desc = "MWAIT 0x20",
 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 133,
@@ -688,7 +688,7 @@ static struct cpuidle_state skx_cstates[] = {
 
 static struct cpuidle_state atom_cstates[] = {
 	{
-		.name = "C1E-ATM",
+		.name = "C1E",
 		.desc = "MWAIT 0x00",
 		.flags = MWAIT2flg(0x00),
 		.exit_latency = 10,
@@ -696,7 +696,7 @@ static struct cpuidle_state atom_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C2-ATM",
+		.name = "C2",
 		.desc = "MWAIT 0x10",
 		.flags = MWAIT2flg(0x10),
 		.exit_latency = 20,
@@ -704,7 +704,7 @@ static struct cpuidle_state atom_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C4-ATM",
+		.name = "C4",
 		.desc = "MWAIT 0x30",
 		.flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 100,
@@ -712,7 +712,7 @@ static struct cpuidle_state atom_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C6-ATM",
+		.name = "C6",
 		.desc = "MWAIT 0x52",
 		.flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 140,
@@ -724,7 +724,7 @@ static struct cpuidle_state atom_cstates[] = {
 };
 static struct cpuidle_state tangier_cstates[] = {
 	{
-		.name = "C1-TNG",
+		.name = "C1",
 		.desc = "MWAIT 0x00",
 		.flags = MWAIT2flg(0x00),
 		.exit_latency = 1,
@@ -732,7 +732,7 @@ static struct cpuidle_state tangier_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C4-TNG",
+		.name = "C4",
 		.desc = "MWAIT 0x30",
 		.flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 100,
@@ -740,7 +740,7 @@ static struct cpuidle_state tangier_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C6-TNG",
+		.name = "C6",
 		.desc = "MWAIT 0x52",
 		.flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 140,
@@ -748,7 +748,7 @@ static struct cpuidle_state tangier_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C7-TNG",
+		.name = "C7",
 		.desc = "MWAIT 0x60",
 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 1200,
@@ -756,7 +756,7 @@ static struct cpuidle_state tangier_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C9-TNG",
+		.name = "C9",
 		.desc = "MWAIT 0x64",
 		.flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 10000,
@@ -768,7 +768,7 @@ static struct cpuidle_state tangier_cstates[] = {
 };
 static struct cpuidle_state avn_cstates[] = {
 	{
-		.name = "C1-AVN",
+		.name = "C1",
 		.desc = "MWAIT 0x00",
 		.flags = MWAIT2flg(0x00),
 		.exit_latency = 2,
@@ -776,7 +776,7 @@ static struct cpuidle_state avn_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C6-AVN",
+		.name = "C6",
 		.desc = "MWAIT 0x51",
 		.flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 15,
@@ -788,7 +788,7 @@ static struct cpuidle_state avn_cstates[] = {
 };
 static struct cpuidle_state knl_cstates[] = {
 	{
-		.name = "C1-KNL",
+		.name = "C1",
 		.desc = "MWAIT 0x00",
 		.flags = MWAIT2flg(0x00),
 		.exit_latency = 1,
@@ -796,7 +796,7 @@ static struct cpuidle_state knl_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze },
 	{
-		.name = "C6-KNL",
+		.name = "C6",
 		.desc = "MWAIT 0x10",
 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 120,
@@ -809,7 +809,7 @@ static struct cpuidle_state knl_cstates[] = {
 
 static struct cpuidle_state bxt_cstates[] = {
 	{
-		.name = "C1-BXT",
+		.name = "C1",
 		.desc = "MWAIT 0x00",
 		.flags = MWAIT2flg(0x00),
 		.exit_latency = 2,
@@ -817,7 +817,7 @@ static struct cpuidle_state bxt_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C1E-BXT",
+		.name = "C1E",
 		.desc = "MWAIT 0x01",
 		.flags = MWAIT2flg(0x01),
 		.exit_latency = 10,
@@ -825,7 +825,7 @@ static struct cpuidle_state bxt_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C6-BXT",
+		.name = "C6",
 		.desc = "MWAIT 0x20",
 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 133,
@@ -833,7 +833,7 @@ static struct cpuidle_state bxt_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C7s-BXT",
+		.name = "C7s",
 		.desc = "MWAIT 0x31",
 		.flags = MWAIT2flg(0x31) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 155,
@@ -841,7 +841,7 @@ static struct cpuidle_state bxt_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C8-BXT",
+		.name = "C8",
 		.desc = "MWAIT 0x40",
 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 1000,
@@ -849,7 +849,7 @@ static struct cpuidle_state bxt_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C9-BXT",
+		.name = "C9",
 		.desc = "MWAIT 0x50",
 		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 2000,
@@ -857,7 +857,7 @@ static struct cpuidle_state bxt_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C10-BXT",
+		.name = "C10",
 		.desc = "MWAIT 0x60",
 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 10000,
@@ -870,7 +870,7 @@ static struct cpuidle_state bxt_cstates[] = {
 
 static struct cpuidle_state dnv_cstates[] = {
 	{
-		.name = "C1-DNV",
+		.name = "C1",
 		.desc = "MWAIT 0x00",
 		.flags = MWAIT2flg(0x00),
 		.exit_latency = 2,
@@ -878,7 +878,7 @@ static struct cpuidle_state dnv_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C1E-DNV",
+		.name = "C1E",
 		.desc = "MWAIT 0x01",
 		.flags = MWAIT2flg(0x01),
 		.exit_latency = 10,
@@ -886,7 +886,7 @@ static struct cpuidle_state dnv_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C6-DNV",
+		.name = "C6",
 		.desc = "MWAIT 0x20",
 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 50,

From 70cd4c10b290dd77fff6dc702a9a2c8c679df121 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@ozlabs.org>
Date: Mon, 27 Feb 2017 11:51:37 +1100
Subject: [PATCH 0356/1911] KVM: PPC: Book3S HV: Fix software walk of guest
 process page tables

This fixes some bugs in the code that walks the guest's page tables.
These bugs cause MMIO emulation to fail whenever the guest is in
virtial mode (MMU on), leading to the guest hanging if it tried to
access a virtio device.

The first bug was that when reading the guest's process table, we were
using the whole of arch->process_table, not just the field that contains
the process table base address.  The second bug was that the mask used
when reading the process table entry to get the radix tree base address,
RPDB_MASK, had the wrong value.

Fixes: 9e04ba69beec ("KVM: PPC: Book3S HV: Add basic infrastructure for radix guests")
Fixes: e99833448c5f ("powerpc/mm/radix: Add partition table format & callback")
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/include/asm/book3s/64/mmu.h | 3 ++-
 arch/powerpc/kvm/book3s_64_mmu_radix.c   | 5 +++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index d73e9dfa5237f4..440f3423e21377 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -46,7 +46,7 @@ extern struct patb_entry *partition_tb;
 
 /* Bits in patb0 field */
 #define PATB_HR		(1UL << 63)
-#define RPDB_MASK	0x0ffffffffffff00fUL
+#define RPDB_MASK	0x0fffffffffffff00UL
 #define RPDB_SHIFT	(1UL << 8)
 #define RTS1_SHIFT	61		/* top 2 bits of radix tree size */
 #define RTS1_MASK	(3UL << RTS1_SHIFT)
@@ -57,6 +57,7 @@ extern struct patb_entry *partition_tb;
 /* Bits in patb1 field */
 #define PATB_GR		(1UL << 63)	/* guest uses radix; must match HR */
 #define PRTS_MASK	0x1f		/* process table size field */
+#define PRTB_MASK	0x0ffffffffffff000UL
 
 /*
  * Limit process table to PAGE_SIZE table. This
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index 4344651f408ca2..f6b3e67c576294 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -32,6 +32,7 @@ int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
 	u32 pid;
 	int ret, level, ps;
 	__be64 prte, rpte;
+	unsigned long ptbl;
 	unsigned long root, pte, index;
 	unsigned long rts, bits, offset;
 	unsigned long gpa;
@@ -53,8 +54,8 @@ int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
 		return -EINVAL;
 
 	/* Read partition table to find root of tree for effective PID */
-	ret = kvm_read_guest(kvm, kvm->arch.process_table + pid * 16,
-			     &prte, sizeof(prte));
+	ptbl = (kvm->arch.process_table & PRTB_MASK) + (pid * 16);
+	ret = kvm_read_guest(kvm, ptbl, &prte, sizeof(prte));
 	if (ret)
 		return ret;
 

From 4e5acdc23a3dcbd6ad6dc93a9783dd9c838987c8 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@ozlabs.org>
Date: Tue, 28 Feb 2017 11:05:47 +1100
Subject: [PATCH 0357/1911] KVM: PPC: Book3S HV: Don't use ASDR for real-mode
 HPT faults on POWER9

In HPT mode on POWER9, the ASDR register is supposed to record
segment information for hypervisor page faults.  It turns out that
POWER9 DD1 does not record the page size information in the ASDR
for faults in guest real mode.  We have the necessary information
in memory already, so by moving the checks for real mode that already
existed, we can use the in-memory copy.  Since a load is likely to
be faster than reading an SPR, we do this unconditionally (not just
for POWER9 DD1).

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 47414a6fe2dde8..7c6477d1840aab 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1787,12 +1787,12 @@ kvmppc_hdsi:
 	/* HPTE not found fault or protection fault? */
 	andis.	r0, r6, (DSISR_NOHPTE | DSISR_PROTFAULT)@h
 	beq	1f			/* if not, send it to the guest */
+	andi.	r0, r11, MSR_DR		/* data relocation enabled? */
+	beq	3f
 BEGIN_FTR_SECTION
 	mfspr	r5, SPRN_ASDR		/* on POWER9, use ASDR to get VSID */
 	b	4f
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
-	andi.	r0, r11, MSR_DR		/* data relocation enabled? */
-	beq	3f
 	clrrdi	r0, r4, 28
 	PPC_SLBFEE_DOT(R5, R0)		/* if so, look up SLB */
 	li	r0, BOOK3S_INTERRUPT_DATA_SEGMENT
@@ -1879,12 +1879,12 @@ kvmppc_hisi:
 	bne	.Lradix_hisi		/* for radix, just save ASDR */
 	andis.	r0, r11, SRR1_ISI_NOPT@h
 	beq	1f
+	andi.	r0, r11, MSR_IR		/* instruction relocation enabled? */
+	beq	3f
 BEGIN_FTR_SECTION
 	mfspr	r5, SPRN_ASDR		/* on POWER9, use ASDR to get VSID */
 	b	4f
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
-	andi.	r0, r11, MSR_IR		/* instruction relocation enabled? */
-	beq	3f
 	clrrdi	r0, r10, 28
 	PPC_SLBFEE_DOT(R5, R0)		/* if so, look up SLB */
 	li	r0, BOOK3S_INTERRUPT_INST_SEGMENT

From 0aa5277c3a6dbe9d5991c490ef23fe900254d931 Mon Sep 17 00:00:00 2001
From: Zhao Yan <yan.y.zhao@intel.com>
Date: Tue, 28 Feb 2017 15:39:25 +0800
Subject: [PATCH 0358/1911] drm/i915/gvt: have more registers with F_CMD_ACCESS
 flags set

those registers are render registers, should have F_CMD_ACCESS flag set

v4:
rebase to lastest code base

v3:
per zhenyu's comments, move newly added registers to a separate patch

v2:
per Kevin's comments, move newly added registers to the tails of lists.

Signed-off-by: Zhao Yan <yan.y.zhao@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/handlers.c | 223 +++++++++++++++-------------
 1 file changed, 123 insertions(+), 100 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index 57b4d538f37022..37956eee342c29 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -1523,6 +1523,9 @@ static int ring_reset_ctl_write(struct intel_vgpu *vgpu,
 #define MMIO_GM(reg, d, r, w) \
 	MMIO_F(reg, 4, F_GMADR, 0xFFFFF000, 0, d, r, w)
 
+#define MMIO_GM_RDR(reg, d, r, w) \
+	MMIO_F(reg, 4, F_GMADR | F_CMD_ACCESS, 0xFFFFF000, 0, d, r, w)
+
 #define MMIO_RO(reg, d, f, rm, r, w) \
 	MMIO_F(reg, 4, F_RO | f, 0, rm, d, r, w)
 
@@ -1542,6 +1545,9 @@ static int ring_reset_ctl_write(struct intel_vgpu *vgpu,
 #define MMIO_RING_GM(prefix, d, r, w) \
 	MMIO_RING_F(prefix, 4, F_GMADR, 0xFFFF0000, 0, d, r, w)
 
+#define MMIO_RING_GM_RDR(prefix, d, r, w) \
+	MMIO_RING_F(prefix, 4, F_GMADR | F_CMD_ACCESS, 0xFFFF0000, 0, d, r, w)
+
 #define MMIO_RING_RO(prefix, d, f, rm, r, w) \
 	MMIO_RING_F(prefix, 4, F_RO | f, 0, rm, d, r, w)
 
@@ -1550,45 +1556,48 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
 	struct drm_i915_private *dev_priv = gvt->dev_priv;
 	int ret;
 
-	MMIO_RING_DFH(RING_IMR, D_ALL, 0, NULL, intel_vgpu_reg_imr_handler);
+	MMIO_RING_DFH(RING_IMR, D_ALL, F_CMD_ACCESS, NULL,
+		intel_vgpu_reg_imr_handler);
 
 	MMIO_DFH(SDEIMR, D_ALL, 0, NULL, intel_vgpu_reg_imr_handler);
 	MMIO_DFH(SDEIER, D_ALL, 0, NULL, intel_vgpu_reg_ier_handler);
 	MMIO_DFH(SDEIIR, D_ALL, 0, NULL, intel_vgpu_reg_iir_handler);
 	MMIO_D(SDEISR, D_ALL);
 
-	MMIO_RING_D(RING_HWSTAM, D_ALL);
+	MMIO_RING_DFH(RING_HWSTAM, D_ALL, F_CMD_ACCESS, NULL, NULL);
 
-	MMIO_GM(RENDER_HWS_PGA_GEN7, D_ALL, NULL, NULL);
-	MMIO_GM(BSD_HWS_PGA_GEN7, D_ALL, NULL, NULL);
-	MMIO_GM(BLT_HWS_PGA_GEN7, D_ALL, NULL, NULL);
-	MMIO_GM(VEBOX_HWS_PGA_GEN7, D_ALL, NULL, NULL);
+	MMIO_GM_RDR(RENDER_HWS_PGA_GEN7, D_ALL, NULL, NULL);
+	MMIO_GM_RDR(BSD_HWS_PGA_GEN7, D_ALL, NULL, NULL);
+	MMIO_GM_RDR(BLT_HWS_PGA_GEN7, D_ALL, NULL, NULL);
+	MMIO_GM_RDR(VEBOX_HWS_PGA_GEN7, D_ALL, NULL, NULL);
 
 #define RING_REG(base) (base + 0x28)
-	MMIO_RING_D(RING_REG, D_ALL);
+	MMIO_RING_DFH(RING_REG, D_ALL, F_CMD_ACCESS, NULL, NULL);
 #undef RING_REG
 
 #define RING_REG(base) (base + 0x134)
-	MMIO_RING_D(RING_REG, D_ALL);
+	MMIO_RING_DFH(RING_REG, D_ALL, F_CMD_ACCESS, NULL, NULL);
 #undef RING_REG
 
-	MMIO_GM(0x2148, D_ALL, NULL, NULL);
-	MMIO_GM(CCID, D_ALL, NULL, NULL);
-	MMIO_GM(0x12198, D_ALL, NULL, NULL);
+	MMIO_GM_RDR(0x2148, D_ALL, NULL, NULL);
+	MMIO_GM_RDR(CCID, D_ALL, NULL, NULL);
+	MMIO_GM_RDR(0x12198, D_ALL, NULL, NULL);
 	MMIO_D(GEN7_CXT_SIZE, D_ALL);
 
-	MMIO_RING_D(RING_TAIL, D_ALL);
-	MMIO_RING_D(RING_HEAD, D_ALL);
-	MMIO_RING_D(RING_CTL, D_ALL);
-	MMIO_RING_D(RING_ACTHD, D_ALL);
-	MMIO_RING_GM(RING_START, D_ALL, NULL, NULL);
+	MMIO_RING_DFH(RING_TAIL, D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_RING_DFH(RING_HEAD, D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_RING_DFH(RING_CTL, D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_RING_DFH(RING_ACTHD, D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_RING_GM_RDR(RING_START, D_ALL, NULL, NULL);
 
 	/* RING MODE */
 #define RING_REG(base) (base + 0x29c)
-	MMIO_RING_DFH(RING_REG, D_ALL, F_MODE_MASK, NULL, ring_mode_mmio_write);
+	MMIO_RING_DFH(RING_REG, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL,
+		ring_mode_mmio_write);
 #undef RING_REG
 
-	MMIO_RING_DFH(RING_MI_MODE, D_ALL, F_MODE_MASK, NULL, NULL);
+	MMIO_RING_DFH(RING_MI_MODE, D_ALL, F_MODE_MASK | F_CMD_ACCESS,
+		NULL, NULL);
 	MMIO_RING_DFH(RING_INSTPM, D_ALL, F_MODE_MASK | F_CMD_ACCESS,
 			NULL, NULL);
 	MMIO_RING_DFH(RING_TIMESTAMP, D_ALL, F_CMD_ACCESS,
@@ -1596,28 +1605,30 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
 	MMIO_RING_DFH(RING_TIMESTAMP_UDW, D_ALL, F_CMD_ACCESS,
 			ring_timestamp_mmio_read, NULL);
 
-	MMIO_DFH(GEN7_GT_MODE, D_ALL, F_MODE_MASK, NULL, NULL);
-	MMIO_DFH(CACHE_MODE_0_GEN7, D_ALL, F_MODE_MASK, NULL, NULL);
+	MMIO_DFH(GEN7_GT_MODE, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(CACHE_MODE_0_GEN7, D_ALL, F_MODE_MASK | F_CMD_ACCESS,
+		NULL, NULL);
 	MMIO_DFH(CACHE_MODE_1, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
-	MMIO_DFH(CACHE_MODE_0, D_ALL, F_MODE_MASK, NULL, NULL);
-	MMIO_DFH(0x2124, D_ALL, F_MODE_MASK, NULL, NULL);
-
-	MMIO_DFH(0x20dc, D_ALL, F_MODE_MASK, NULL, NULL);
-	MMIO_DFH(_3D_CHICKEN3, D_ALL, F_MODE_MASK, NULL, NULL);
-	MMIO_DFH(0x2088, D_ALL, F_MODE_MASK, NULL, NULL);
-	MMIO_DFH(0x20e4, D_ALL, F_MODE_MASK, NULL, NULL);
-	MMIO_DFH(0x2470, D_ALL, F_MODE_MASK, NULL, NULL);
-	MMIO_D(GAM_ECOCHK, D_ALL);
-	MMIO_DFH(GEN7_COMMON_SLICE_CHICKEN1, D_ALL, F_MODE_MASK, NULL, NULL);
+	MMIO_DFH(CACHE_MODE_0, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0x2124, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+
+	MMIO_DFH(0x20dc, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_3D_CHICKEN3, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0x2088, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0x20e4, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0x2470, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(GAM_ECOCHK, D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(GEN7_COMMON_SLICE_CHICKEN1, D_ALL, F_MODE_MASK | F_CMD_ACCESS,
+		NULL, NULL);
 	MMIO_DFH(COMMON_SLICE_CHICKEN2, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
-	MMIO_D(0x9030, D_ALL);
-	MMIO_D(0x20a0, D_ALL);
-	MMIO_D(0x2420, D_ALL);
-	MMIO_D(0x2430, D_ALL);
-	MMIO_D(0x2434, D_ALL);
-	MMIO_D(0x2438, D_ALL);
-	MMIO_D(0x243c, D_ALL);
-	MMIO_DFH(0x7018, D_ALL, F_MODE_MASK, NULL, NULL);
+	MMIO_DFH(0x9030, D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0x20a0, D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0x2420, D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0x2430, D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0x2434, D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0x2438, D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0x243c, D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0x7018, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
 	MMIO_DFH(HALF_SLICE_CHICKEN3, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
 	MMIO_DFH(GEN7_HALF_SLICE_CHICKEN1, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
 
@@ -2148,8 +2159,8 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
 	MMIO_D(FORCEWAKE_ACK, D_ALL);
 	MMIO_D(GEN6_GT_CORE_STATUS, D_ALL);
 	MMIO_D(GEN6_GT_THREAD_STATUS_REG, D_ALL);
-	MMIO_D(GTFIFODBG, D_ALL);
-	MMIO_D(GTFIFOCTL, D_ALL);
+	MMIO_DFH(GTFIFODBG, D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(GTFIFOCTL, D_ALL, F_CMD_ACCESS, NULL, NULL);
 	MMIO_DH(FORCEWAKE_MT, D_PRE_SKL, NULL, mul_force_wake_write);
 	MMIO_DH(FORCEWAKE_ACK_HSW, D_HSW | D_BDW, NULL, NULL);
 	MMIO_D(ECOBUS, D_ALL);
@@ -2291,29 +2302,29 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
 	MMIO_DFH(0x12178, D_ALL, F_CMD_ACCESS, NULL, NULL);
 	MMIO_DFH(0x1217c, D_ALL, F_CMD_ACCESS, NULL, NULL);
 
-	MMIO_F(0x2290, 8, 0, 0, 0, D_HSW_PLUS, NULL, NULL);
-	MMIO_D(GEN7_OACONTROL, D_HSW);
+	MMIO_F(0x2290, 8, F_CMD_ACCESS, 0, 0, D_HSW_PLUS, NULL, NULL);
+	MMIO_DFH(GEN7_OACONTROL, D_HSW, F_CMD_ACCESS, NULL, NULL);
 	MMIO_D(0x2b00, D_BDW_PLUS);
 	MMIO_D(0x2360, D_BDW_PLUS);
-	MMIO_F(0x5200, 32, 0, 0, 0, D_ALL, NULL, NULL);
-	MMIO_F(0x5240, 32, 0, 0, 0, D_ALL, NULL, NULL);
-	MMIO_F(0x5280, 16, 0, 0, 0, D_ALL, NULL, NULL);
+	MMIO_F(0x5200, 32, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
+	MMIO_F(0x5240, 32, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
+	MMIO_F(0x5280, 16, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
 
 	MMIO_DFH(0x1c17c, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
 	MMIO_DFH(0x1c178, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
-	MMIO_D(BCS_SWCTRL, D_ALL);
-
-	MMIO_F(HS_INVOCATION_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL);
-	MMIO_F(DS_INVOCATION_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL);
-	MMIO_F(IA_VERTICES_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL);
-	MMIO_F(IA_PRIMITIVES_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL);
-	MMIO_F(VS_INVOCATION_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL);
-	MMIO_F(GS_INVOCATION_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL);
-	MMIO_F(GS_PRIMITIVES_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL);
-	MMIO_F(CL_INVOCATION_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL);
-	MMIO_F(CL_PRIMITIVES_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL);
-	MMIO_F(PS_INVOCATION_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL);
-	MMIO_F(PS_DEPTH_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL);
+	MMIO_DFH(BCS_SWCTRL, D_ALL, F_CMD_ACCESS, NULL, NULL);
+
+	MMIO_F(HS_INVOCATION_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
+	MMIO_F(DS_INVOCATION_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
+	MMIO_F(IA_VERTICES_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
+	MMIO_F(IA_PRIMITIVES_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
+	MMIO_F(VS_INVOCATION_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
+	MMIO_F(GS_INVOCATION_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
+	MMIO_F(GS_PRIMITIVES_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
+	MMIO_F(CL_INVOCATION_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
+	MMIO_F(CL_PRIMITIVES_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
+	MMIO_F(PS_INVOCATION_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
+	MMIO_F(PS_DEPTH_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
 	MMIO_DH(0x4260, D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler);
 	MMIO_DH(0x4264, D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler);
 	MMIO_DH(0x4268, D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler);
@@ -2329,7 +2340,7 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt)
 	struct drm_i915_private *dev_priv = gvt->dev_priv;
 	int ret;
 
-	MMIO_DH(RING_IMR(GEN8_BSD2_RING_BASE), D_BDW_PLUS, NULL,
+	MMIO_DFH(RING_IMR(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_CMD_ACCESS, NULL,
 			intel_vgpu_reg_imr_handler);
 
 	MMIO_DH(GEN8_GT_IMR(0), D_BDW_PLUS, NULL, intel_vgpu_reg_imr_handler);
@@ -2394,24 +2405,31 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt)
 	MMIO_DH(GEN8_MASTER_IRQ, D_BDW_PLUS, NULL,
 		intel_vgpu_reg_master_irq_handler);
 
-	MMIO_D(RING_HWSTAM(GEN8_BSD2_RING_BASE), D_BDW_PLUS);
-	MMIO_D(0x1c134, D_BDW_PLUS);
-
-	MMIO_D(RING_TAIL(GEN8_BSD2_RING_BASE), D_BDW_PLUS);
-	MMIO_D(RING_HEAD(GEN8_BSD2_RING_BASE),  D_BDW_PLUS);
-	MMIO_GM(RING_START(GEN8_BSD2_RING_BASE), D_BDW_PLUS, NULL, NULL);
-	MMIO_D(RING_CTL(GEN8_BSD2_RING_BASE), D_BDW_PLUS);
-	MMIO_D(RING_ACTHD(GEN8_BSD2_RING_BASE), D_BDW_PLUS);
-	MMIO_D(RING_ACTHD_UDW(GEN8_BSD2_RING_BASE), D_BDW_PLUS);
-	MMIO_DFH(0x1c29c, D_BDW_PLUS, F_MODE_MASK, NULL, ring_mode_mmio_write);
-	MMIO_DFH(RING_MI_MODE(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_MODE_MASK,
-			NULL, NULL);
-	MMIO_DFH(RING_INSTPM(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_MODE_MASK,
-			NULL, NULL);
+	MMIO_DFH(RING_HWSTAM(GEN8_BSD2_RING_BASE), D_BDW_PLUS,
+		F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0x1c134, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+
+	MMIO_DFH(RING_TAIL(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_CMD_ACCESS,
+		NULL, NULL);
+	MMIO_DFH(RING_HEAD(GEN8_BSD2_RING_BASE),  D_BDW_PLUS,
+		F_CMD_ACCESS, NULL, NULL);
+	MMIO_GM_RDR(RING_START(GEN8_BSD2_RING_BASE), D_BDW_PLUS, NULL, NULL);
+	MMIO_DFH(RING_CTL(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_CMD_ACCESS,
+		NULL, NULL);
+	MMIO_DFH(RING_ACTHD(GEN8_BSD2_RING_BASE), D_BDW_PLUS,
+		F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(RING_ACTHD_UDW(GEN8_BSD2_RING_BASE), D_BDW_PLUS,
+		F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0x1c29c, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL,
+		ring_mode_mmio_write);
+	MMIO_DFH(RING_MI_MODE(GEN8_BSD2_RING_BASE), D_BDW_PLUS,
+		F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(RING_INSTPM(GEN8_BSD2_RING_BASE), D_BDW_PLUS,
+		F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
 	MMIO_DFH(RING_TIMESTAMP(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_CMD_ACCESS,
 			ring_timestamp_mmio_read, NULL);
 
-	MMIO_RING_D(RING_ACTHD_UDW, D_BDW_PLUS);
+	MMIO_RING_DFH(RING_ACTHD_UDW, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
 
 #define RING_REG(base) (base + 0xd0)
 	MMIO_RING_F(RING_REG, 4, F_RO, 0,
@@ -2428,13 +2446,16 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt)
 #undef RING_REG
 
 #define RING_REG(base) (base + 0x234)
-	MMIO_RING_F(RING_REG, 8, F_RO, 0, ~0, D_BDW_PLUS, NULL, NULL);
-	MMIO_F(RING_REG(GEN8_BSD2_RING_BASE), 4, F_RO, 0, ~0LL, D_BDW_PLUS, NULL, NULL);
+	MMIO_RING_F(RING_REG, 8, F_RO | F_CMD_ACCESS, 0, ~0, D_BDW_PLUS,
+		NULL, NULL);
+	MMIO_F(RING_REG(GEN8_BSD2_RING_BASE), 4, F_RO | F_CMD_ACCESS, 0,
+		~0LL, D_BDW_PLUS, NULL, NULL);
 #undef RING_REG
 
 #define RING_REG(base) (base + 0x244)
-	MMIO_RING_D(RING_REG, D_BDW_PLUS);
-	MMIO_D(RING_REG(GEN8_BSD2_RING_BASE), D_BDW_PLUS);
+	MMIO_RING_DFH(RING_REG, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(RING_REG(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_CMD_ACCESS,
+		NULL, NULL);
 #undef RING_REG
 
 #define RING_REG(base) (base + 0x370)
@@ -2468,8 +2489,8 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt)
 	MMIO_F(RING_REG(GEN8_BSD2_RING_BASE), 32, 0, 0, 0, D_BDW_PLUS, NULL, NULL);
 #undef RING_REG
 
-	MMIO_RING_GM(RING_HWS_PGA, D_BDW_PLUS, NULL, NULL);
-	MMIO_GM(0x1c080, D_BDW_PLUS, NULL, NULL);
+	MMIO_RING_GM_RDR(RING_HWS_PGA, D_BDW_PLUS, NULL, NULL);
+	MMIO_GM_RDR(RING_HWS_PGA(GEN8_BSD2_RING_BASE), D_BDW_PLUS, NULL, NULL);
 
 	MMIO_DFH(HDC_CHICKEN0, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
 
@@ -2490,15 +2511,17 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt)
 	MMIO_D(GEN8_EU_DISABLE2, D_BDW_PLUS);
 
 	MMIO_D(0xfdc, D_BDW_PLUS);
-	MMIO_DFH(GEN8_ROW_CHICKEN, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
-	MMIO_D(GEN7_ROW_CHICKEN2, D_BDW_PLUS);
-	MMIO_D(GEN8_UCGCTL6, D_BDW_PLUS);
+	MMIO_DFH(GEN8_ROW_CHICKEN, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS,
+		NULL, NULL);
+	MMIO_DFH(GEN7_ROW_CHICKEN2, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS,
+		NULL, NULL);
+	MMIO_DFH(GEN8_UCGCTL6, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
 
-	MMIO_D(0xb1f0, D_BDW);
-	MMIO_D(0xb1c0, D_BDW);
+	MMIO_DFH(0xb1f0, D_BDW, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0xb1c0, D_BDW, F_CMD_ACCESS, NULL, NULL);
 	MMIO_DFH(GEN8_L3SQCREG4, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
-	MMIO_D(0xb100, D_BDW);
-	MMIO_D(0xb10c, D_BDW);
+	MMIO_DFH(0xb100, D_BDW, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0xb10c, D_BDW, F_CMD_ACCESS, NULL, NULL);
 	MMIO_D(0xb110, D_BDW);
 
 	MMIO_F(0x24d0, 48, F_CMD_ACCESS, 0, 0, D_BDW_PLUS,
@@ -2508,10 +2531,10 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt)
 	MMIO_D(0x44484, D_BDW_PLUS);
 	MMIO_D(0x4448c, D_BDW_PLUS);
 
-	MMIO_D(0x83a4, D_BDW);
+	MMIO_DFH(0x83a4, D_BDW, F_CMD_ACCESS, NULL, NULL);
 	MMIO_D(GEN8_L3_LRA_1_GPGPU, D_BDW_PLUS);
 
-	MMIO_D(0x8430, D_BDW);
+	MMIO_DFH(0x8430, D_BDW, F_CMD_ACCESS, NULL, NULL);
 
 	MMIO_D(0x110000, D_BDW_PLUS);
 
@@ -2523,9 +2546,9 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt)
 	MMIO_DFH(0xe194, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
 	MMIO_DFH(0xe188, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
 	MMIO_DFH(HALF_SLICE_CHICKEN2, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
-	MMIO_DFH(0x2580, D_BDW_PLUS, F_MODE_MASK, NULL, NULL);
+	MMIO_DFH(0x2580, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
 
-	MMIO_D(0x2248, D_BDW);
+	MMIO_DFH(0x2248, D_BDW, F_CMD_ACCESS, NULL, NULL);
 
 	return 0;
 }
@@ -2707,15 +2730,15 @@ static int init_skl_mmio_info(struct intel_gvt *gvt)
 
 	MMIO_D(0xd08, D_SKL);
 	MMIO_D(0x20e0, D_SKL);
-	MMIO_D(0x20ec, D_SKL);
+	MMIO_DFH(0x20ec, D_SKL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
 
 	/* TRTT */
-	MMIO_D(0x4de0, D_SKL);
-	MMIO_D(0x4de4, D_SKL);
-	MMIO_D(0x4de8, D_SKL);
-	MMIO_D(0x4dec, D_SKL);
-	MMIO_D(0x4df0, D_SKL);
-	MMIO_DH(0x4df4, D_SKL, NULL, gen9_trtte_write);
+	MMIO_DFH(0x4de0, D_SKL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0x4de4, D_SKL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0x4de8, D_SKL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0x4dec, D_SKL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0x4df0, D_SKL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0x4df4, D_SKL, F_CMD_ACCESS, NULL, gen9_trtte_write);
 	MMIO_DH(0x4dfc, D_SKL, NULL, gen9_trtt_chicken_write);
 
 	MMIO_D(0x45008, D_SKL);
@@ -2739,7 +2762,7 @@ static int init_skl_mmio_info(struct intel_gvt *gvt)
 	MMIO_D(0x65f08, D_SKL);
 	MMIO_D(0x320f0, D_SKL);
 
-	MMIO_D(_REG_VCS2_EXCC, D_SKL);
+	MMIO_DFH(_REG_VCS2_EXCC, D_SKL, F_CMD_ACCESS, NULL, NULL);
 	MMIO_D(0x70034, D_SKL);
 	MMIO_D(0x71034, D_SKL);
 	MMIO_D(0x72034, D_SKL);
@@ -2752,7 +2775,7 @@ static int init_skl_mmio_info(struct intel_gvt *gvt)
 	MMIO_D(_PLANE_KEYMSK_1(PIPE_C), D_SKL);
 
 	MMIO_D(0x44500, D_SKL);
-	MMIO_D(GEN9_CSFE_CHICKEN1_RCS, D_SKL_PLUS);
+	MMIO_DFH(GEN9_CSFE_CHICKEN1_RCS, D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL);
 	return 0;
 }
 

From 9112caafdf01439a6e43f4d8c09ceed7dc613d4a Mon Sep 17 00:00:00 2001
From: Zhao Yan <yan.y.zhao@intel.com>
Date: Tue, 28 Feb 2017 15:40:10 +0800
Subject: [PATCH 0359/1911] drm/i915/gvt: add more registers into handlers list

those registers are render registers with F_CMD_ACCESS flag set

Signed-off-by: Zhao Yan <yan.y.zhao@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/handlers.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index 37956eee342c29..ef17c38e00c44f 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -2332,6 +2332,17 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
 	MMIO_DH(0x4270, D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler);
 	MMIO_DFH(0x4094, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
 
+	MMIO_DFH(ARB_MODE, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+	MMIO_RING_GM_RDR(RING_BBADDR, D_ALL, NULL, NULL);
+	MMIO_DFH(0x2220, D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0x12220, D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0x22220, D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_RING_DFH(RING_SYNC_1, D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_RING_DFH(RING_SYNC_0, D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0x22178, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0x1a178, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0x1a17c, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0x2217c, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
 	return 0;
 }
 
@@ -2550,6 +2561,15 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt)
 
 	MMIO_DFH(0x2248, D_BDW, F_CMD_ACCESS, NULL, NULL);
 
+	MMIO_DFH(0xe220, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0xe230, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0xe240, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0xe260, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0xe270, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0xe280, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0xe2a0, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0xe2b0, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0xe2c0, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
 	return 0;
 }
 
@@ -2776,6 +2796,8 @@ static int init_skl_mmio_info(struct intel_gvt *gvt)
 
 	MMIO_D(0x44500, D_SKL);
 	MMIO_DFH(GEN9_CSFE_CHICKEN1_RCS, D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(GEN8_HDC_CHICKEN1, D_SKL, F_MODE_MASK | F_CMD_ACCESS,
+		NULL, NULL);
 	return 0;
 }
 

From 1f58af304cce9e4a25b62b3619e69c586203c8ca Mon Sep 17 00:00:00 2001
From: Zhao Yan <yan.y.zhao@intel.com>
Date: Tue, 28 Feb 2017 15:41:03 +0800
Subject: [PATCH 0360/1911] drm/i915/gvt: fix an error for one register

register 0x20e0 should be mode register

v2: rebased to latest code base

Signed-off-by: Zhao Yan <yan.y.zhao@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/handlers.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index ef17c38e00c44f..548aedfbd40272 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -2749,7 +2749,7 @@ static int init_skl_mmio_info(struct intel_gvt *gvt)
 	MMIO_F(0xb020, 0x80, F_CMD_ACCESS, 0, 0, D_SKL, NULL, NULL);
 
 	MMIO_D(0xd08, D_SKL);
-	MMIO_D(0x20e0, D_SKL);
+	MMIO_DFH(0x20e0, D_SKL, F_MODE_MASK, NULL, NULL);
 	MMIO_DFH(0x20ec, D_SKL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
 
 	/* TRTT */

From 858b0f571d30916bd69c45922045f24f26d6bfc9 Mon Sep 17 00:00:00 2001
From: Bing Niu <bing.niu@intel.com>
Date: Tue, 28 Feb 2017 11:39:48 -0500
Subject: [PATCH 0361/1911] drm/i915/gvt: set SFUSE_STRAP properly for vitual
 monitor detection

update the correct virtual montior connection status to vreg

v2: address yulei's comment on commit message

Signed-off-by: Bing Niu <bing.niu@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/display.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c
index 43e02e0383754a..5419ae6ec6339c 100644
--- a/drivers/gpu/drm/i915/gvt/display.c
+++ b/drivers/gpu/drm/i915/gvt/display.c
@@ -176,14 +176,20 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu)
 		vgpu_vreg(vgpu, SDEISR) &= ~(SDE_PORTA_HOTPLUG_SPT |
 				SDE_PORTE_HOTPLUG_SPT);
 
-	if (intel_vgpu_has_monitor_on_port(vgpu, PORT_B))
+	if (intel_vgpu_has_monitor_on_port(vgpu, PORT_B)) {
 		vgpu_vreg(vgpu, SDEISR) |= SDE_PORTB_HOTPLUG_CPT;
+		vgpu_vreg(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDIB_DETECTED;
+	}
 
-	if (intel_vgpu_has_monitor_on_port(vgpu, PORT_C))
+	if (intel_vgpu_has_monitor_on_port(vgpu, PORT_C)) {
 		vgpu_vreg(vgpu, SDEISR) |= SDE_PORTC_HOTPLUG_CPT;
+		vgpu_vreg(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDIC_DETECTED;
+	}
 
-	if (intel_vgpu_has_monitor_on_port(vgpu, PORT_D))
+	if (intel_vgpu_has_monitor_on_port(vgpu, PORT_D)) {
 		vgpu_vreg(vgpu, SDEISR) |= SDE_PORTD_HOTPLUG_CPT;
+		vgpu_vreg(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDID_DETECTED;
+	}
 
 	if (IS_SKYLAKE(dev_priv) &&
 			intel_vgpu_has_monitor_on_port(vgpu, PORT_E)) {
@@ -196,6 +202,8 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu)
 				GEN8_PORT_DP_A_HOTPLUG;
 		else
 			vgpu_vreg(vgpu, SDEISR) |= SDE_PORTA_HOTPLUG_SPT;
+
+		vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_A)) |= DDI_INIT_DISPLAY_DETECTED;
 	}
 }
 

From d1a9ccc4b1374a5a7762031fd8e4e398c68549e6 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 28 Feb 2017 11:02:48 +0000
Subject: [PATCH 0362/1911] scsi: qedi: fix missing return error code check on
 call to qedi_setup_int

The call to qedi_setup_int is not updating the return code rc yet rc is
being checked for an error. Fix this by assigning rc to the return code
from the call to qedi_setup_int.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Acked-by: Manish Rangankar <Manish.Rangankar@cavium.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qedi/qedi_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c
index 5eda21d903e93d..8e3d92807cb803 100644
--- a/drivers/scsi/qedi/qedi_main.c
+++ b/drivers/scsi/qedi/qedi_main.c
@@ -1805,7 +1805,7 @@ static int __qedi_probe(struct pci_dev *pdev, int mode)
 	 */
 	qedi_ops->common->update_pf_params(qedi->cdev, &qedi->pf_params);
 
-	qedi_setup_int(qedi);
+	rc = qedi_setup_int(qedi);
 	if (rc)
 		goto stop_iscsi_func;
 

From 6f8830f5bbab16e54f261de187f3df4644a5b977 Mon Sep 17 00:00:00 2001
From: Chris Leech <cleech@redhat.com>
Date: Mon, 27 Feb 2017 16:58:36 -0800
Subject: [PATCH 0363/1911] scsi: libiscsi: add lock around task lists to fix
 list corruption regression

There's a rather long standing regression from the commit "libiscsi:
Reduce locking contention in fast path"

Depending on iSCSI target behavior, it's possible to hit the case in
iscsi_complete_task where the task is still on a pending list
(!list_empty(&task->running)).  When that happens the task is removed
from the list while holding the session back_lock, but other task list
modification occur under the frwd_lock.  That leads to linked list
corruption and eventually a panicked system.

Rather than back out the session lock split entirely, in order to try
and keep some of the performance gains this patch adds another lock to
maintain the task lists integrity.

Major enterprise supported kernels have been backing out the lock split
for while now, thanks to the efforts at IBM where a lab setup has the
most reliable reproducer I've seen on this issue.  This patch has been
tested there successfully.

Signed-off-by: Chris Leech <cleech@redhat.com>
Fixes: 659743b02c41 ("[SCSI] libiscsi: Reduce locking contention in fast path")
Reported-by: Prashantha Subbarao <psubbara@us.ibm.com>
Reviewed-by: Guilherme G. Piccoli <gpiccoli@linux.vnet.ibm.com>
Cc: <stable@vger.kernel.org> # v3.15+
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/libiscsi.c | 26 +++++++++++++++++++++++++-
 include/scsi/libiscsi.h |  1 +
 2 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index 834d1212b6d506..3fca34a675afc2 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -560,8 +560,12 @@ static void iscsi_complete_task(struct iscsi_task *task, int state)
 	WARN_ON_ONCE(task->state == ISCSI_TASK_FREE);
 	task->state = state;
 
-	if (!list_empty(&task->running))
+	spin_lock_bh(&conn->taskqueuelock);
+	if (!list_empty(&task->running)) {
+		pr_debug_once("%s while task on list", __func__);
 		list_del_init(&task->running);
+	}
+	spin_unlock_bh(&conn->taskqueuelock);
 
 	if (conn->task == task)
 		conn->task = NULL;
@@ -783,7 +787,9 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 		if (session->tt->xmit_task(task))
 			goto free_task;
 	} else {
+		spin_lock_bh(&conn->taskqueuelock);
 		list_add_tail(&task->running, &conn->mgmtqueue);
+		spin_unlock_bh(&conn->taskqueuelock);
 		iscsi_conn_queue_work(conn);
 	}
 
@@ -1474,8 +1480,10 @@ void iscsi_requeue_task(struct iscsi_task *task)
 	 * this may be on the requeue list already if the xmit_task callout
 	 * is handling the r2ts while we are adding new ones
 	 */
+	spin_lock_bh(&conn->taskqueuelock);
 	if (list_empty(&task->running))
 		list_add_tail(&task->running, &conn->requeue);
+	spin_unlock_bh(&conn->taskqueuelock);
 	iscsi_conn_queue_work(conn);
 }
 EXPORT_SYMBOL_GPL(iscsi_requeue_task);
@@ -1512,22 +1520,26 @@ static int iscsi_data_xmit(struct iscsi_conn *conn)
 	 * only have one nop-out as a ping from us and targets should not
 	 * overflow us with nop-ins
 	 */
+	spin_lock_bh(&conn->taskqueuelock);
 check_mgmt:
 	while (!list_empty(&conn->mgmtqueue)) {
 		conn->task = list_entry(conn->mgmtqueue.next,
 					 struct iscsi_task, running);
 		list_del_init(&conn->task->running);
+		spin_unlock_bh(&conn->taskqueuelock);
 		if (iscsi_prep_mgmt_task(conn, conn->task)) {
 			/* regular RX path uses back_lock */
 			spin_lock_bh(&conn->session->back_lock);
 			__iscsi_put_task(conn->task);
 			spin_unlock_bh(&conn->session->back_lock);
 			conn->task = NULL;
+			spin_lock_bh(&conn->taskqueuelock);
 			continue;
 		}
 		rc = iscsi_xmit_task(conn);
 		if (rc)
 			goto done;
+		spin_lock_bh(&conn->taskqueuelock);
 	}
 
 	/* process pending command queue */
@@ -1535,19 +1547,24 @@ static int iscsi_data_xmit(struct iscsi_conn *conn)
 		conn->task = list_entry(conn->cmdqueue.next, struct iscsi_task,
 					running);
 		list_del_init(&conn->task->running);
+		spin_unlock_bh(&conn->taskqueuelock);
 		if (conn->session->state == ISCSI_STATE_LOGGING_OUT) {
 			fail_scsi_task(conn->task, DID_IMM_RETRY);
+			spin_lock_bh(&conn->taskqueuelock);
 			continue;
 		}
 		rc = iscsi_prep_scsi_cmd_pdu(conn->task);
 		if (rc) {
 			if (rc == -ENOMEM || rc == -EACCES) {
+				spin_lock_bh(&conn->taskqueuelock);
 				list_add_tail(&conn->task->running,
 					      &conn->cmdqueue);
 				conn->task = NULL;
+				spin_unlock_bh(&conn->taskqueuelock);
 				goto done;
 			} else
 				fail_scsi_task(conn->task, DID_ABORT);
+			spin_lock_bh(&conn->taskqueuelock);
 			continue;
 		}
 		rc = iscsi_xmit_task(conn);
@@ -1558,6 +1575,7 @@ static int iscsi_data_xmit(struct iscsi_conn *conn)
 		 * we need to check the mgmt queue for nops that need to
 		 * be sent to aviod starvation
 		 */
+		spin_lock_bh(&conn->taskqueuelock);
 		if (!list_empty(&conn->mgmtqueue))
 			goto check_mgmt;
 	}
@@ -1577,12 +1595,15 @@ static int iscsi_data_xmit(struct iscsi_conn *conn)
 		conn->task = task;
 		list_del_init(&conn->task->running);
 		conn->task->state = ISCSI_TASK_RUNNING;
+		spin_unlock_bh(&conn->taskqueuelock);
 		rc = iscsi_xmit_task(conn);
 		if (rc)
 			goto done;
+		spin_lock_bh(&conn->taskqueuelock);
 		if (!list_empty(&conn->mgmtqueue))
 			goto check_mgmt;
 	}
+	spin_unlock_bh(&conn->taskqueuelock);
 	spin_unlock_bh(&conn->session->frwd_lock);
 	return -ENODATA;
 
@@ -1738,7 +1759,9 @@ int iscsi_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *sc)
 			goto prepd_reject;
 		}
 	} else {
+		spin_lock_bh(&conn->taskqueuelock);
 		list_add_tail(&task->running, &conn->cmdqueue);
+		spin_unlock_bh(&conn->taskqueuelock);
 		iscsi_conn_queue_work(conn);
 	}
 
@@ -2896,6 +2919,7 @@ iscsi_conn_setup(struct iscsi_cls_session *cls_session, int dd_size,
 	INIT_LIST_HEAD(&conn->mgmtqueue);
 	INIT_LIST_HEAD(&conn->cmdqueue);
 	INIT_LIST_HEAD(&conn->requeue);
+	spin_lock_init(&conn->taskqueuelock);
 	INIT_WORK(&conn->xmitwork, iscsi_xmitworker);
 
 	/* allocate login_task used for the login/text sequences */
diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h
index b0e275de6dec0d..583875ea136ab2 100644
--- a/include/scsi/libiscsi.h
+++ b/include/scsi/libiscsi.h
@@ -196,6 +196,7 @@ struct iscsi_conn {
 	struct iscsi_task	*task;		/* xmit task in progress */
 
 	/* xmit */
+	spinlock_t		taskqueuelock;  /* protects the next three lists */
 	struct list_head	mgmtqueue;	/* mgmt (control) xmit queue */
 	struct list_head	cmdqueue;	/* data-path cmd queue */
 	struct list_head	requeue;	/* tasks needing another run */

From 40496c8ee73a5ca4fa581badf2247418980586b1 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Sat, 7 Jan 2017 23:21:18 -0500
Subject: [PATCH 0364/1911] x86: msr-index.h: Define MSR_PKG_CST_CONFIG_CONTROL

define MSR_PKG_CST_CONFIG_CONTROL (0xE2),
which is the string used by Intel Documentation.

We use this MSR in intel_idle and turbostat by a previous name,
to be updated in the next patch.

Cc: x86@kernel.org
Signed-off-by: Len Brown <len.brown@intel.com>
---
 arch/x86/include/asm/msr-index.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 710273c617b8d3..975f23eefe14a8 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -47,6 +47,7 @@
 #define MSR_PLATFORM_INFO		0x000000ce
 
 #define MSR_NHM_SNB_PKG_CST_CFG_CTL	0x000000e2
+#define MSR_PKG_CST_CONFIG_CONTROL	0x000000e2
 #define NHM_C3_AUTO_DEMOTE		(1UL << 25)
 #define NHM_C1_AUTO_DEMOTE		(1UL << 26)
 #define ATM_LNC_C6_AUTO_DEMOTE		(1UL << 25)

From 6cfb2374f83bc70b8acb0ddb989988ba3f140b61 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Sat, 7 Jan 2017 23:23:25 -0500
Subject: [PATCH 0365/1911] intel_idle: use new name for
 MSR_PKG_CST_CONFIG_CONTROL

previously known as  MSR_NHM_SNB_PKG_CST_CFG_CTL

Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/idle/intel_idle.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index 7d8ea3d5fda656..930537da76d492 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -961,9 +961,9 @@ static void auto_demotion_disable(void)
 {
 	unsigned long long msr_bits;
 
-	rdmsrl(MSR_NHM_SNB_PKG_CST_CFG_CTL, msr_bits);
+	rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
 	msr_bits &= ~(icpu->auto_demotion_disable_flags);
-	wrmsrl(MSR_NHM_SNB_PKG_CST_CFG_CTL, msr_bits);
+	wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
 }
 static void c1e_promotion_disable(void)
 {
@@ -1273,7 +1273,7 @@ static void sklh_idle_state_table_update(void)
 	if ((mwait_substates & (0xF << 28)) == 0)
 		return;
 
-	rdmsrl(MSR_NHM_SNB_PKG_CST_CFG_CTL, msr);
+	rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr);
 
 	/* PC10 is not enabled in PKG C-state limit */
 	if ((msr & 0xF) != 8)

From 1df2e55abce64e2f3117fac3968a9ac382fbc9c3 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Sat, 7 Jan 2017 23:24:57 -0500
Subject: [PATCH 0366/1911] tools/power turbostat: use new name for
 MSR_PKG_CST_CONFIG_CONTROL

Previously called MSR_NHM_SNB_PKG_CST_CFG_CTL

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index aedfaddbad30b4..d56f2b75dc585d 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -1761,12 +1761,12 @@ dump_nhm_cst_cfg(void)
 {
 	unsigned long long msr;
 
-	get_msr(base_cpu, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr);
+	get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
 
 #define SNB_C1_AUTO_UNDEMOTE              (1UL << 27)
 #define SNB_C3_AUTO_UNDEMOTE              (1UL << 28)
 
-	fprintf(outf, "cpu%d: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x%08llx", base_cpu, msr);
+	fprintf(outf, "cpu%d: MSR_PKG_CST_CONFIG_CONTROL: 0x%08llx", base_cpu, msr);
 
 	fprintf(outf, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: %s)\n",
 		(msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "",
@@ -2383,7 +2383,7 @@ void check_permissions()
  * MSR_SMI_COUNT                   0x00000034
  *
  * MSR_PLATFORM_INFO               0x000000ce
- * MSR_NHM_SNB_PKG_CST_CFG_CTL     0x000000e2
+ * MSR_PKG_CST_CONFIG_CONTROL     0x000000e2
  *
  * MSR_MISC_PWR_MGMT               0x000001aa
  *
@@ -2393,7 +2393,7 @@ void check_permissions()
  * MSR_CORE_C6_RESIDENCY           0x000003fd
  *
  * Side effect:
- * sets global pkg_cstate_limit to decode MSR_NHM_SNB_PKG_CST_CFG_CTL
+ * sets global pkg_cstate_limit to decode MSR_PKG_CST_CONFIG_CONTROL
  */
 int probe_nhm_msrs(unsigned int family, unsigned int model)
 {
@@ -2462,7 +2462,7 @@ int probe_nhm_msrs(unsigned int family, unsigned int model)
 	default:
 		return 0;
 	}
-	get_msr(base_cpu, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr);
+	get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
 	pkg_cstate_limit = pkg_cstate_limits[msr & 0xF];
 
 	get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);

From 419c9e986ea453d07140bffb9708d730b6317e8e Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Sat, 7 Jan 2017 23:26:22 -0500
Subject: [PATCH 0367/1911] x86: msr-index.h: Remove unused
 MSR_NHM_SNB_PKG_CST_CFG_CTL

The two users, intel_idle driver and turbostat utility
are using the new name, MSR_PKG_CST_CONFIG_CONTROL

Cc: x86@kernel.org
Signed-off-by: Len Brown <len.brown@intel.com>
---
 arch/x86/include/asm/msr-index.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 975f23eefe14a8..12ecc6c1e7f891 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -46,7 +46,6 @@
 #define MSR_FSB_FREQ			0x000000cd
 #define MSR_PLATFORM_INFO		0x000000ce
 
-#define MSR_NHM_SNB_PKG_CST_CFG_CTL	0x000000e2
 #define MSR_PKG_CST_CONFIG_CONTROL	0x000000e2
 #define NHM_C3_AUTO_DEMOTE		(1UL << 25)
 #define NHM_C1_AUTO_DEMOTE		(1UL << 26)

From 0539ba118fe241b0d03202fda0cd19cb758b7fbd Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Fri, 10 Feb 2017 00:27:20 -0500
Subject: [PATCH 0368/1911] tools/power turbostat: Baytrail c-state support

The Baytrail SOC, with its Silvermont core, has some unique properties:

1. a hardware CC1 residency counter
2. a module-c6 residency counter
3. a package-c6 counter at traditional package-c7 counter address.

The SOC does not support c3, pc3, c7 or pc7 counters.

Signed-off-by: Len Brown <len.brown@intel.com>
---
 arch/x86/include/asm/msr-index.h      |  2 ++
 tools/power/x86/turbostat/turbostat.c | 46 +++++++++++++++++++++------
 2 files changed, 39 insertions(+), 9 deletions(-)

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 12ecc6c1e7f891..079db99f6560e3 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -147,6 +147,7 @@
 /* C-state Residency Counters */
 #define MSR_PKG_C3_RESIDENCY		0x000003f8
 #define MSR_PKG_C6_RESIDENCY		0x000003f9
+#define MSR_ATOM_PKG_C6_RESIDENCY	0x000003fa
 #define MSR_PKG_C7_RESIDENCY		0x000003fa
 #define MSR_CORE_C3_RESIDENCY		0x000003fc
 #define MSR_CORE_C6_RESIDENCY		0x000003fd
@@ -203,6 +204,7 @@
 #define MSR_PKG_BOTH_CORE_GFXE_C0_RES	0x0000065B
 
 #define MSR_CORE_C1_RES			0x00000660
+#define MSR_MODULE_C6_RES_MS		0x00000664
 
 #define MSR_CC6_DEMOTION_POLICY_CONFIG	0x00000668
 #define MSR_MC6_DEMOTION_POLICY_CONFIG	0x00000669
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index d56f2b75dc585d..19802be6dc02c5 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -167,6 +167,7 @@ struct core_data {
 	unsigned long long c3;
 	unsigned long long c6;
 	unsigned long long c7;
+	unsigned long long mc6_us;	/* duplicate as per-core for now, even though per module */
 	unsigned int core_temp_c;
 	unsigned int core_id;
 	unsigned long long counter[MAX_ADDED_COUNTERS];
@@ -375,6 +376,7 @@ struct msr_counter bic[] = {
 	{ 0x0, "RAM_J" },
 	{ 0x0, "Core" },
 	{ 0x0, "CPU" },
+	{ 0x0, "Mod%c6" },
 };
 
 #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
@@ -412,6 +414,7 @@ struct msr_counter bic[] = {
 #define	BIC_RAM_J	(1ULL << 31)
 #define	BIC_Core	(1ULL << 32)
 #define	BIC_CPU		(1ULL << 33)
+#define	BIC_Mod_c6	(1ULL << 34)
 
 unsigned long long bic_enabled = 0xFFFFFFFFFFFFFFFFULL;
 unsigned long long bic_present;
@@ -504,6 +507,8 @@ void print_header(void)
 	if (DO_BIC(BIC_CPU_c7))
 		outp += sprintf(outp, "\tCPU%%c7");
 
+	if (DO_BIC(BIC_Mod_c6))
+		outp += sprintf(outp, "\tMod%%c6");
 
 	if (DO_BIC(BIC_CoreTmp))
 		outp += sprintf(outp, "\tCoreTmp");
@@ -629,6 +634,7 @@ int dump_counters(struct thread_data *t, struct core_data *c,
 			outp += sprintf(outp, "cADDED [%d] msr0x%x: %08llX\n",
 				i, mp->msr_num, c->counter[i]);
 		}
+		outp += sprintf(outp, "mc6_us: %016llX\n", c->mc6_us);
 	}
 
 	if (p) {
@@ -774,6 +780,10 @@ int format_counters(struct thread_data *t, struct core_data *c,
 	if (DO_BIC(BIC_CPU_c7))
 		outp += sprintf(outp, "\t%.2f", 100.0 * c->c7/t->tsc);
 
+	/* Mod%c6 */
+	if (DO_BIC(BIC_Mod_c6))
+		outp += sprintf(outp, "\t%.2f", 100.0 * c->mc6_us / t->tsc);
+
 	if (DO_BIC(BIC_CoreTmp))
 		outp += sprintf(outp, "\t%d", c->core_temp_c);
 
@@ -988,6 +998,7 @@ delta_core(struct core_data *new, struct core_data *old)
 	old->c6 = new->c6 - old->c6;
 	old->c7 = new->c7 - old->c7;
 	old->core_temp_c = new->core_temp_c;
+	old->mc6_us = new->mc6_us - old->mc6_us;
 
 	for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
 		if (mp->format == FORMAT_RAW)
@@ -1109,6 +1120,7 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
 	c->c3 = 0;
 	c->c6 = 0;
 	c->c7 = 0;
+	c->mc6_us = 0;
 	c->core_temp_c = 0;
 
 	p->pkg_wtd_core_c0 = 0;
@@ -1173,6 +1185,7 @@ int sum_counters(struct thread_data *t, struct core_data *c,
 	average.cores.c3 += c->c3;
 	average.cores.c6 += c->c6;
 	average.cores.c7 += c->c7;
+	average.cores.mc6_us += c->mc6_us;
 
 	average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c);
 
@@ -1246,6 +1259,7 @@ void compute_average(struct thread_data *t, struct core_data *c,
 	average.cores.c3 /= topo.num_cores;
 	average.cores.c6 /= topo.num_cores;
 	average.cores.c7 /= topo.num_cores;
+	average.cores.mc6_us /= topo.num_cores;
 
 	if (do_skl_residency) {
 		average.packages.pkg_wtd_core_c0 /= topo.num_packages;
@@ -1376,8 +1390,7 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 			return -5;
 		t->smi_count = msr & 0xFFFFFFFF;
 	}
-
-	if (use_c1_residency_msr) {
+	if (DO_BIC(BIC_CPU_c1) && use_c1_residency_msr) {
 		if (get_msr(cpu, MSR_CORE_C1_RES, &t->c1))
 			return -6;
 	}
@@ -1409,6 +1422,10 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 		if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7))
 			return -8;
 
+	if (DO_BIC(BIC_Mod_c6))
+		if (get_msr(cpu, MSR_MODULE_C6_RES_MS, &c->mc6_us))
+			return -8;
+
 	if (DO_BIC(BIC_CoreTmp)) {
 		if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
 			return -9;
@@ -1437,9 +1454,16 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 	if (do_pc3)
 		if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3))
 			return -9;
-	if (do_pc6)
-		if (get_msr(cpu, MSR_PKG_C6_RESIDENCY, &p->pc6))
-			return -10;
+	if (do_pc6) {
+		if (do_slm_cstates) {
+			if (get_msr(cpu, MSR_ATOM_PKG_C6_RESIDENCY, &p->pc6))
+				return -10;
+		} else {
+			if (get_msr(cpu, MSR_PKG_C6_RESIDENCY, &p->pc6))
+				return -10;
+		}
+	}
+
 	if (do_pc2)
 		if (get_msr(cpu, MSR_PKG_C2_RESIDENCY, &p->pc2))
 			return -11;
@@ -1533,7 +1557,7 @@ char *pkg_cstate_limit_strings[] = { "reserved", "unknown", "pc0", "pc1", "pc2",
 int nhm_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
 int snb_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
 int hsw_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
-int slv_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
+int slv_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7};
 int amt_pkg_cstate_limits[16] = {PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
 int phi_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
 int bxt_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
@@ -3336,8 +3360,6 @@ int has_skl_msrs(unsigned int family, unsigned int model)
 	return 0;
 }
 
-
-
 int is_slm(unsigned int family, unsigned int model)
 {
 	if (!genuine_intel)
@@ -3731,6 +3753,12 @@ void process_cpuid()
 	do_pc3 = (pkg_cstate_limit >= PCL__3);
 	do_pc6 = (pkg_cstate_limit >= PCL__6);
 	do_pc7 = do_snb_cstates && (pkg_cstate_limit >= PCL__7);
+	if (has_slv_msrs(family, model)) {
+		do_pc2 = do_pc3 = do_pc7 = 0;
+		do_pc6 = 1;
+		BIC_PRESENT(BIC_Mod_c6);
+		use_c1_residency_msr = 1;
+	}
 	do_c8_c9_c10 = has_hsw_msrs(family, model);
 	do_irtl_hsw = has_hsw_msrs(family, model);
 	do_skl_residency = has_skl_msrs(family, model);
@@ -4112,7 +4140,7 @@ int get_and_dump_counters(void)
 }
 
 void print_version() {
-	fprintf(outf, "turbostat version 4.17 1 Jan 2017"
+	fprintf(outf, "turbostat version 4.17 10 Jan 2017"
 		" - Len Brown <lenb@kernel.org>\n");
 }
 

From 710f273ba96182fd93ee8540ae06583c7d889d7c Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Wed, 11 Jan 2017 22:12:25 -0500
Subject: [PATCH 0369/1911] tools/power turbostat: add precision to --debug
 frequency output

Add a digit of precision to the --debug output for frequency range.
This is useful when BCLK is not an integer.

old:
6 * 83 = 500 MHz max efficiency frequency
26 * 83 = 2166 MHz base frequency

new:
6 * 83.3 = 499.8 MHz max efficiency frequency
26 * 83.3 = 2165.8 MHz base frequency

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 42 +++++++++++++--------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 19802be6dc02c5..762b81497c3bf5 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -1581,11 +1581,11 @@ dump_nhm_platform_info(void)
 	fprintf(outf, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr);
 
 	ratio = (msr >> 40) & 0xFF;
-	fprintf(outf, "%d * %.0f = %.0f MHz max efficiency frequency\n",
+	fprintf(outf, "%d * %.1f = %.1f MHz max efficiency frequency\n",
 		ratio, bclk, ratio * bclk);
 
 	ratio = (msr >> 8) & 0xFF;
-	fprintf(outf, "%d * %.0f = %.0f MHz base frequency\n",
+	fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n",
 		ratio, bclk, ratio * bclk);
 
 	get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr);
@@ -1607,12 +1607,12 @@ dump_hsw_turbo_ratio_limits(void)
 
 	ratio = (msr >> 8) & 0xFF;
 	if (ratio)
-		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 18 active cores\n",
+		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 18 active cores\n",
 			ratio, bclk, ratio * bclk);
 
 	ratio = (msr >> 0) & 0xFF;
 	if (ratio)
-		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 17 active cores\n",
+		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 17 active cores\n",
 			ratio, bclk, ratio * bclk);
 	return;
 }
@@ -1629,42 +1629,42 @@ dump_ivt_turbo_ratio_limits(void)
 
 	ratio = (msr >> 56) & 0xFF;
 	if (ratio)
-		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 16 active cores\n",
+		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 16 active cores\n",
 			ratio, bclk, ratio * bclk);
 
 	ratio = (msr >> 48) & 0xFF;
 	if (ratio)
-		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 15 active cores\n",
+		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 15 active cores\n",
 			ratio, bclk, ratio * bclk);
 
 	ratio = (msr >> 40) & 0xFF;
 	if (ratio)
-		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 14 active cores\n",
+		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 14 active cores\n",
 			ratio, bclk, ratio * bclk);
 
 	ratio = (msr >> 32) & 0xFF;
 	if (ratio)
-		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 13 active cores\n",
+		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 13 active cores\n",
 			ratio, bclk, ratio * bclk);
 
 	ratio = (msr >> 24) & 0xFF;
 	if (ratio)
-		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 12 active cores\n",
+		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 12 active cores\n",
 			ratio, bclk, ratio * bclk);
 
 	ratio = (msr >> 16) & 0xFF;
 	if (ratio)
-		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 11 active cores\n",
+		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 11 active cores\n",
 			ratio, bclk, ratio * bclk);
 
 	ratio = (msr >> 8) & 0xFF;
 	if (ratio)
-		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 10 active cores\n",
+		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 10 active cores\n",
 			ratio, bclk, ratio * bclk);
 
 	ratio = (msr >> 0) & 0xFF;
 	if (ratio)
-		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 9 active cores\n",
+		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 9 active cores\n",
 			ratio, bclk, ratio * bclk);
 	return;
 }
@@ -1681,42 +1681,42 @@ dump_nhm_turbo_ratio_limits(void)
 
 	ratio = (msr >> 56) & 0xFF;
 	if (ratio)
-		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 8 active cores\n",
+		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 8 active cores\n",
 			ratio, bclk, ratio * bclk);
 
 	ratio = (msr >> 48) & 0xFF;
 	if (ratio)
-		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 7 active cores\n",
+		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 7 active cores\n",
 			ratio, bclk, ratio * bclk);
 
 	ratio = (msr >> 40) & 0xFF;
 	if (ratio)
-		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 6 active cores\n",
+		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 6 active cores\n",
 			ratio, bclk, ratio * bclk);
 
 	ratio = (msr >> 32) & 0xFF;
 	if (ratio)
-		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 5 active cores\n",
+		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 5 active cores\n",
 			ratio, bclk, ratio * bclk);
 
 	ratio = (msr >> 24) & 0xFF;
 	if (ratio)
-		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 4 active cores\n",
+		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 4 active cores\n",
 			ratio, bclk, ratio * bclk);
 
 	ratio = (msr >> 16) & 0xFF;
 	if (ratio)
-		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 3 active cores\n",
+		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 3 active cores\n",
 			ratio, bclk, ratio * bclk);
 
 	ratio = (msr >> 8) & 0xFF;
 	if (ratio)
-		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 2 active cores\n",
+		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 2 active cores\n",
 			ratio, bclk, ratio * bclk);
 
 	ratio = (msr >> 0) & 0xFF;
 	if (ratio)
-		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 1 active cores\n",
+		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 1 active cores\n",
 			ratio, bclk, ratio * bclk);
 	return;
 }
@@ -1776,7 +1776,7 @@ dump_knl_turbo_ratio_limits(void)
 	for (i = buckets_no - 1; i >= 0; i--)
 		if (i > 0 ? ratio[i] != ratio[i - 1] : 1)
 			fprintf(outf,
-				"%d * %.0f = %.0f MHz max turbo %d active cores\n",
+				"%d * %.1f = %.1f MHz max turbo %d active cores\n",
 				ratio[i], bclk, ratio[i] * bclk, cores[i]);
 }
 

From e651262477c6d8cba79dffc1a6039da43d9c96b0 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Wed, 11 Jan 2017 23:17:24 -0500
Subject: [PATCH 0370/1911] tools/power turbostat: further decode
 MSR_IA32_MISC_ENABLE

Decode MISC_ENABLE.NO_TURBO,
also use the #defines in msr-index.h for decoding this register

cpu0: MSR_IA32_MISC_ENABLE: 0x00850089 (TCC EIST MWAIT TURBO)

Although it is not architectural, decode also
MSR_IA32_MISC_ENABLE.prefetch-disable (bit-9).
documented to be present on: Core, P4, Intel-Xeon
reserved on: Atom, Silvermont, Nehalem, SNB, PHI ec.

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 762b81497c3bf5..a1ec9d816dfa94 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -3509,11 +3509,13 @@ void decode_misc_enable_msr(void)
 	unsigned long long msr;
 
 	if (!get_msr(base_cpu, MSR_IA32_MISC_ENABLE, &msr))
-		fprintf(outf, "cpu%d: MSR_IA32_MISC_ENABLE: 0x%08llx (%s %s %s)\n",
+		fprintf(outf, "cpu%d: MSR_IA32_MISC_ENABLE: 0x%08llx (%sTCC %sEIST %sMWAIT %sPREFETCH %sTURBO)\n",
 			base_cpu, msr,
-			msr & (1 << 3) ? "TCC" : "",
-			msr & (1 << 16) ? "EIST" : "",
-			msr & (1 << 18) ? "MONITOR" : "");
+			msr & MSR_IA32_MISC_ENABLE_TM1 ? "" : "No-",
+			msr & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP ? "" : "No-",
+			msr & MSR_IA32_MISC_ENABLE_MWAIT ? "No-" : "",
+			msr & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE ? "No-" : "",
+			msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ? "No-" : "");
 }
 
 /*

From 8a34fd0226eaae64d61ff9a113d276e28acb6b5c Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Thu, 12 Jan 2017 23:22:28 -0500
Subject: [PATCH 0371/1911] x86 msr-index.h: Define Atom specific core ratio
 MSR locations

These MSRs are currently used by the intel_pstate driver,
using a local definition.

Cc: x86@kernel.org
Signed-off-by: Len Brown <len.brown@intel.com>
---
 arch/x86/include/asm/msr-index.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 079db99f6560e3..83bc672c225c57 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -209,6 +209,12 @@
 #define MSR_CC6_DEMOTION_POLICY_CONFIG	0x00000668
 #define MSR_MC6_DEMOTION_POLICY_CONFIG	0x00000669
 
+#define MSR_ATOM_CORE_RATIOS		0x0000066a
+#define MSR_ATOM_CORE_VIDS		0x0000066b
+#define MSR_ATOM_CORE_TURBO_RATIOS	0x0000066c
+#define MSR_ATOM_CORE_TURBO_VIDS	0x0000066d
+
+
 #define MSR_CORE_PERF_LIMIT_REASONS	0x00000690
 #define MSR_GFX_PERF_LIMIT_REASONS	0x000006B0
 #define MSR_RING_PERF_LIMIT_REASONS	0x000006B1

From 92134bdbc6272da6e98e9242cc6f1576cedc0735 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Sat, 25 Feb 2017 16:55:17 -0500
Subject: [PATCH 0372/1911] intel_pstate: use MSR_ATOM_RATIOS definitions from
 msr-index.h

Originally, these MSRs were locally defined in this driver.
Now the definitions are in msr-index.h -- use them.

Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 50bd6d987fc3a0..6e68b556305a68 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -39,11 +39,6 @@
 
 #define INTEL_CPUFREQ_TRANSITION_LATENCY	20000
 
-#define ATOM_RATIOS		0x66a
-#define ATOM_VIDS		0x66b
-#define ATOM_TURBO_RATIOS	0x66c
-#define ATOM_TURBO_VIDS		0x66d
-
 #ifdef CONFIG_ACPI
 #include <acpi/processor.h>
 #include <acpi/cppc_acpi.h>
@@ -1258,7 +1253,7 @@ static int atom_get_min_pstate(void)
 {
 	u64 value;
 
-	rdmsrl(ATOM_RATIOS, value);
+	rdmsrl(MSR_ATOM_CORE_RATIOS, value);
 	return (value >> 8) & 0x7F;
 }
 
@@ -1266,7 +1261,7 @@ static int atom_get_max_pstate(void)
 {
 	u64 value;
 
-	rdmsrl(ATOM_RATIOS, value);
+	rdmsrl(MSR_ATOM_CORE_RATIOS, value);
 	return (value >> 16) & 0x7F;
 }
 
@@ -1274,7 +1269,7 @@ static int atom_get_turbo_pstate(void)
 {
 	u64 value;
 
-	rdmsrl(ATOM_TURBO_RATIOS, value);
+	rdmsrl(MSR_ATOM_CORE_TURBO_RATIOS, value);
 	return value & 0x7F;
 }
 
@@ -1336,7 +1331,7 @@ static void atom_get_vid(struct cpudata *cpudata)
 {
 	u64 value;
 
-	rdmsrl(ATOM_VIDS, value);
+	rdmsrl(MSR_ATOM_CORE_VIDS, value);
 	cpudata->vid.min = int_tofp((value >> 8) & 0x7f);
 	cpudata->vid.max = int_tofp((value >> 16) & 0x7f);
 	cpudata->vid.ratio = div_fp(
@@ -1344,7 +1339,7 @@ static void atom_get_vid(struct cpudata *cpudata)
 		int_tofp(cpudata->pstate.max_pstate -
 			cpudata->pstate.min_pstate));
 
-	rdmsrl(ATOM_TURBO_VIDS, value);
+	rdmsrl(MSR_ATOM_CORE_TURBO_VIDS, value);
 	cpudata->vid.turbo = value & 0x7f;
 }
 

From 0f7887c49b0c454aef9936a6eadabe1c91b5af55 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Thu, 12 Jan 2017 23:49:18 -0500
Subject: [PATCH 0373/1911] tools/power turbostat: dump Atom P-states correctly

Turbostat dumps MSR_TURBO_RATIO_LIMIT on Core Architecture.
But Atom Architecture uses MSR_ATOM_CORE_RATIOS and
MSR_ATOM_CORE_TURBO_RATIOS.

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 103 ++++++++++++++++++++------
 1 file changed, 82 insertions(+), 21 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index a1ec9d816dfa94..da6ec640caf780 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -1721,6 +1721,54 @@ dump_nhm_turbo_ratio_limits(void)
 	return;
 }
 
+static void
+dump_atom_turbo_ratio_limits(void)
+{
+	unsigned long long msr;
+	unsigned int ratio;
+
+	get_msr(base_cpu, MSR_ATOM_CORE_RATIOS, &msr);
+	fprintf(outf, "cpu%d: MSR_ATOM_CORE_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF);
+
+	ratio = (msr >> 0) & 0x3F;
+	if (ratio)
+		fprintf(outf, "%d * %.1f = %.1f MHz minimum operating frequency\n",
+			ratio, bclk, ratio * bclk);
+
+	ratio = (msr >> 8) & 0x3F;
+	if (ratio)
+		fprintf(outf, "%d * %.1f = %.1f MHz low frequency mode (LFM)\n",
+			ratio, bclk, ratio * bclk);
+
+	ratio = (msr >> 16) & 0x3F;
+	if (ratio)
+		fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n",
+			ratio, bclk, ratio * bclk);
+
+	get_msr(base_cpu, MSR_ATOM_CORE_TURBO_RATIOS, &msr);
+	fprintf(outf, "cpu%d: MSR_ATOM_CORE_TURBO_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF);
+
+	ratio = (msr >> 24) & 0x3F;
+	if (ratio)
+		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 4 active cores\n",
+			ratio, bclk, ratio * bclk);
+
+	ratio = (msr >> 16) & 0x3F;
+	if (ratio)
+		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 3 active cores\n",
+			ratio, bclk, ratio * bclk);
+
+	ratio = (msr >> 8) & 0x3F;
+	if (ratio)
+		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 2 active cores\n",
+			ratio, bclk, ratio * bclk);
+
+	ratio = (msr >> 0) & 0x3F;
+	if (ratio)
+		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 1 active core\n",
+			ratio, bclk, ratio * bclk);
+}
+
 static void
 dump_knl_turbo_ratio_limits(void)
 {
@@ -2496,8 +2544,32 @@ int probe_nhm_msrs(unsigned int family, unsigned int model)
 	has_base_hz = 1;
 	return 1;
 }
+/*
+ * SLV client has supporet for unique MSRs:
+ *
+ * MSR_CC6_DEMOTION_POLICY_CONFIG
+ * MSR_MC6_DEMOTION_POLICY_CONFIG
+ */
+
+int has_slv_msrs(unsigned int family, unsigned int model)
+{
+	if (!genuine_intel)
+		return 0;
+
+	switch (model) {
+	case INTEL_FAM6_ATOM_SILVERMONT1:
+	case INTEL_FAM6_ATOM_MERRIFIELD:
+	case INTEL_FAM6_ATOM_MOOREFIELD:
+		return 1;
+	}
+	return 0;
+}
+
 int has_nhm_turbo_ratio_limit(unsigned int family, unsigned int model)
 {
+	if (has_slv_msrs(family, model))
+		return 0;
+
 	switch (model) {
 	/* Nehalem compatible, but do not include turbo-ratio limit support */
 	case INTEL_FAM6_NEHALEM_EX:	/* Nehalem-EX Xeon - Beckton */
@@ -2509,6 +2581,13 @@ int has_nhm_turbo_ratio_limit(unsigned int family, unsigned int model)
 		return 1;
 	}
 }
+int has_atom_turbo_ratio_limit(unsigned int family, unsigned int model)
+{
+	if (has_slv_msrs(family, model))
+		return 1;
+
+	return 0;
+}
 int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model)
 {
 	if (!genuine_intel)
@@ -2606,6 +2685,9 @@ dump_cstate_pstate_config_info(unsigned int family, unsigned int model)
 	if (has_nhm_turbo_ratio_limit(family, model))
 		dump_nhm_turbo_ratio_limits();
 
+	if (has_atom_turbo_ratio_limit(family, model))
+		dump_atom_turbo_ratio_limits();
+
 	if (has_knl_turbo_ratio_limit(family, model))
 		dump_knl_turbo_ratio_limits();
 
@@ -3286,27 +3368,6 @@ int has_snb_msrs(unsigned int family, unsigned int model)
 	return 0;
 }
 
-/*
- * SLV client has supporet for unique MSRs:
- *
- * MSR_CC6_DEMOTION_POLICY_CONFIG
- * MSR_MC6_DEMOTION_POLICY_CONFIG
- */
-
-int has_slv_msrs(unsigned int family, unsigned int model)
-{
-	if (!genuine_intel)
-		return 0;
-
-	switch (model) {
-	case INTEL_FAM6_ATOM_SILVERMONT1:
-	case INTEL_FAM6_ATOM_MERRIFIELD:
-	case INTEL_FAM6_ATOM_MOOREFIELD:
-		return 1;
-	}
-	return 0;
-}
-
 /*
  * HSW adds support for additional MSRs:
  *

From b3a34e9382a4aacfa7c0b24f9548737bbb20338e Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Sat, 21 Jan 2017 00:50:08 -0500
Subject: [PATCH 0374/1911] tools/power turbostat: decode CPUID(6).TURBO

show the CPUID feature for turbo to clarify the case
when it may not be shown in MISC_ENABLE

CPUID(6): APERF, TURBO, DTS, PTM, No-HWP, No-HWPnotify, No-HWPwindow, No-HWPepp, No-HWPpkg, EPB
cpu4: MSR_IA32_MISC_ENABLE: 0x00850089 (TCC EIST MWAIT TURBO)

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index da6ec640caf780..1011b5f7f21f66 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -3626,6 +3626,7 @@ void process_cpuid()
 {
 	unsigned int eax, ebx, ecx, edx, max_level, max_extended_level;
 	unsigned int fms, family, model, stepping;
+	unsigned int has_turbo;
 
 	eax = ebx = ecx = edx = 0;
 
@@ -3696,6 +3697,7 @@ void process_cpuid()
 	do_dts = eax & (1 << 0);
 	if (do_dts)
 		BIC_PRESENT(BIC_CoreTmp);
+	has_turbo = eax & (1 << 1);
 	do_ptm = eax & (1 << 6);
 	if (do_ptm)
 		BIC_PRESENT(BIC_PkgTmp);
@@ -3707,9 +3709,10 @@ void process_cpuid()
 	has_epb = ecx & (1 << 3);
 
 	if (debug)
-		fprintf(outf, "CPUID(6): %sAPERF, %sDTS, %sPTM, %sHWP, "
+		fprintf(outf, "CPUID(6): %sAPERF, %sTURBO, %sDTS, %sPTM, %sHWP, "
 			"%sHWPnotify, %sHWPwindow, %sHWPepp, %sHWPpkg, %sEPB\n",
 			has_aperf ? "" : "No-",
+			has_turbo ? "" : "No-",
 			do_dts ? "" : "No-",
 			do_ptm ? "" : "No-",
 			has_hwp ? "" : "No-",

From 98af74599ea0757098a5776ea29e581b661dcf6f Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Sat, 21 Jan 2017 01:15:09 -0500
Subject: [PATCH 0375/1911] x86 msr_index.h: Define MSR_MISC_FEATURE_CONTROL

This non-architectural MSR has disable bits
for various prefetchers on modern processors.

While these bits are generally touched only by the BIOS,
say, via BIOS SETUP, it is useful to dump them
when examining options that can alter performance.

Cc: x86@kernel.org
Signed-off-by: Len Brown <len.brown@intel.com>
---
 arch/x86/include/asm/msr-index.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 83bc672c225c57..312fb7e14cdd98 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -467,6 +467,7 @@
 
 #define MSR_IA32_TEMPERATURE_TARGET	0x000001a2
 
+#define MSR_MISC_FEATURE_CONTROL	0x000001a4
 #define MSR_MISC_PWR_MGMT		0x000001aa
 
 #define MSR_IA32_ENERGY_PERF_BIAS	0x000001b0

From 33148d671cc191fceaca5017e1bb060e9f30fbf7 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Sat, 21 Jan 2017 01:26:16 -0500
Subject: [PATCH 0376/1911] tools/power turbostat: decode
 MSR_MISC_FEATURE_CONTROL

useful for observing if the BIOS disabled prefetch
Not architectural, but docuemented as present on NHM, SNB
and is present on others.

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 1011b5f7f21f66..2f033f3d2a21ac 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -102,6 +102,7 @@ unsigned int has_hwp_notify;		/* IA32_HWP_INTERRUPT */
 unsigned int has_hwp_activity_window;	/* IA32_HWP_REQUEST[bits 41:32] */
 unsigned int has_hwp_epp;		/* IA32_HWP_REQUEST[bits 31:24] */
 unsigned int has_hwp_pkg;		/* IA32_HWP_REQUEST_PKG */
+unsigned int has_misc_feature_control;
 
 #define RAPL_PKG		(1 << 0)
 					/* 0x610 MSR_PKG_POWER_LIMIT */
@@ -2466,6 +2467,7 @@ void check_permissions()
  *
  * Side effect:
  * sets global pkg_cstate_limit to decode MSR_PKG_CST_CONFIG_CONTROL
+ * sets has_misc_feature_control
  */
 int probe_nhm_msrs(unsigned int family, unsigned int model)
 {
@@ -2496,6 +2498,7 @@ int probe_nhm_msrs(unsigned int family, unsigned int model)
 	case INTEL_FAM6_IVYBRIDGE:	/* IVB */
 	case INTEL_FAM6_IVYBRIDGE_X:	/* IVB Xeon */
 		pkg_cstate_limits = snb_pkg_cstate_limits;
+		has_misc_feature_control = 1;
 		break;
 	case INTEL_FAM6_HASWELL_CORE:	/* HSW */
 	case INTEL_FAM6_HASWELL_X:	/* HSX */
@@ -2510,9 +2513,11 @@ int probe_nhm_msrs(unsigned int family, unsigned int model)
 	case INTEL_FAM6_KABYLAKE_MOBILE:	/* KBL */
 	case INTEL_FAM6_KABYLAKE_DESKTOP:	/* KBL */
 		pkg_cstate_limits = hsw_pkg_cstate_limits;
+		has_misc_feature_control = 1;
 		break;
 	case INTEL_FAM6_SKYLAKE_X:	/* SKX */
 		pkg_cstate_limits = skx_pkg_cstate_limits;
+		has_misc_feature_control = 1;
 		break;
 	case INTEL_FAM6_ATOM_SILVERMONT1:	/* BYT */
 		no_MSR_MISC_PWR_MGMT = 1;
@@ -3579,6 +3584,21 @@ void decode_misc_enable_msr(void)
 			msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ? "No-" : "");
 }
 
+void decode_misc_feature_control(void)
+{
+	unsigned long long msr;
+
+	if (!has_misc_feature_control)
+		return;
+
+	if (!get_msr(base_cpu, MSR_MISC_FEATURE_CONTROL, &msr))
+		fprintf(outf, "cpu%d: MSR_MISC_FEATURE_CONTROL: 0x%08llx (%sL2-Prefetch %sL2-Prefetch-pair %sL1-Prefetch %sL1-IP-Prefetch)\n",
+			base_cpu, msr,
+			msr & (0 << 0) ? "No-" : "",
+			msr & (1 << 0) ? "No-" : "",
+			msr & (2 << 0) ? "No-" : "",
+			msr & (3 << 0) ? "No-" : "");
+}
 /*
  * Decode MSR_MISC_PWR_MGMT
  *
@@ -3725,6 +3745,7 @@ void process_cpuid()
 	if (debug)
 		decode_misc_enable_msr();
 
+
 	if (max_level >= 0x7 && debug) {
 		int has_sgx;
 
@@ -3852,6 +3873,9 @@ void process_cpuid()
 	if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK))
 		BIC_PRESENT(BIC_GFXMHz);
 
+	if (debug)
+		decode_misc_feature_control();
+
 	return;
 }
 

From fee86541d28934da4eb235367f7e2137acb1b359 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Sat, 21 Jan 2017 01:59:12 -0500
Subject: [PATCH 0377/1911] tools/power turbostat: show all columns,
 independent of --debug

Some time ago, turbostat overflowed 80 columns.

So on the assumption that a "casual" user would always
want topology and frequency columns, we hid the rest
of the columns and the system configuration decoding
behind the --debug option.

Not everybody liked that change -- including me.
I use --debug 99% of the time...

Well, now we have "-o file" to put turbostat output into a file,
so unless you are watching real-time in a small window,
column count is less frequently a factor.

And more recently, we got the "--hide columnA,columnB" option
to specify columns to skip.

So now we "un-hide" the rest of the columns from behind --debug,
and show them all, by default.

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 2f033f3d2a21ac..c50b452a404e09 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -479,9 +479,6 @@ void print_header(void)
 	if (DO_BIC(BIC_TSC_MHz))
 		outp += sprintf(outp, "\tTSC_MHz");
 
-	if (!debug)
-		goto done;
-
 	if (DO_BIC(BIC_IRQ))
 		outp += sprintf(outp, "\tIRQ");
 	if (DO_BIC(BIC_SMI))
@@ -593,7 +590,6 @@ void print_header(void)
 		}
 	}
 
-done:
 	outp += sprintf(outp, "\n");
 }
 
@@ -741,9 +737,6 @@ int format_counters(struct thread_data *t, struct core_data *c,
 	if (DO_BIC(BIC_TSC_MHz))
 		outp += sprintf(outp, "\t%.0f", 1.0 * t->tsc/units/interval_float);
 
-	if (!debug)
-		goto done;
-
 	/* IRQ */
 	if (DO_BIC(BIC_IRQ))
 		outp += sprintf(outp, "\t%d", t->irq_count);

From 96e4715857cf184536ef46e6ea92465f58a7b12d Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Sat, 21 Jan 2017 02:26:00 -0500
Subject: [PATCH 0378/1911] tools/power turbostat: print system config, unless
 --quiet

Some users want turbostat to tell them everything, by default.
Some users want turbostat to be quiet, by default.

I find that I'm in the 1st camp, and so I've never liked
needing to type the --debug parameter to decode the system
configuration.

So here we change the default and print the system configuration,
by default.  (The --debug option is now un-documented, though
it does still exist for debugging turbostat internals)

When you do not want to see the system configuration
header, use the new "--quiet" option.

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.8 |   7 +-
 tools/power/x86/turbostat/turbostat.c | 105 +++++++++++++-------------
 2 files changed, 52 insertions(+), 60 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index e8fb1e02d12126..a08de27713e0c8 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -53,8 +53,7 @@ name as necessary to disambiguate it from others is necessary.  Note that option
 .PP
 \fB--Dump\fP displays the raw counter values.
 .PP
-\fB--debug\fP displays additional system configuration information.  Invoking this parameter
-more than once may also enable internal turbostat debug information.
+\fB--quiet\fP Do not decode and print the system configuration header information.
 .PP
 \fB--interval seconds\fP overrides the default 5.0 second measurement interval.
 .PP
@@ -124,7 +123,6 @@ Or a command may be specified as in "FORK EXAMPLE" below.
 
 .fi
 .SH DEBUG EXAMPLE
-The "--debug" option prints additional system information before measurements:
 
 The first row of statistics is a summary for the entire system.
 For residency % columns, the summary is a weighted average.
@@ -188,9 +186,6 @@ should be sustainable on all CPUs indefinitely, given nominal power and cooling.
 The remaining rows show what maximum turbo frequency is possible
 depending on the number of idle cores.  Note that not all information is
 available on all processors.
-.PP
-The --debug option adds additional columns to the measurement ouput, including CPU idle power-state residency processor temperature sensor readinds.
-See the field definitions above.
 .SH FORK EXAMPLE
 If turbostat is invoked with a command, it will fork that command
 and output the statistics gathered after the command exits.
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index c50b452a404e09..6867557596afcc 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -49,6 +49,7 @@ FILE *outf;
 int *fd_percpu;
 struct timespec interval_ts = {5, 0};
 unsigned int debug;
+unsigned int quiet;
 unsigned int rapl_joules;
 unsigned int summary_only;
 unsigned int dump_only;
@@ -3114,7 +3115,7 @@ void rapl_probe(unsigned int family, unsigned int model)
 	tdp = get_tdp(model);
 
 	rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
-	if (debug)
+	if (!quiet)
 		fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
 
 	return;
@@ -3239,11 +3240,9 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 	if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr))
 		return -1;
 
-	if (debug) {
-		fprintf(outf, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx "
-			"(%f Watts, %f Joules, %f sec.)\n", cpu, msr,
-			rapl_power_units, rapl_energy_units, rapl_time_units);
-	}
+	fprintf(outf, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx (%f Watts, %f Joules, %f sec.)\n", cpu, msr,
+		rapl_power_units, rapl_energy_units, rapl_time_units);
+
 	if (do_rapl & RAPL_PKG_POWER_INFO) {
 
 		if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr))
@@ -3264,7 +3263,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 			return -9;
 
 		fprintf(outf, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n",
-			cpu, msr, (msr >> 63) & 1 ? "": "UN");
+			cpu, msr, (msr >> 63) & 1 ? "" : "UN");
 
 		print_power_limit_msr(cpu, msr, "PKG Limit #1");
 		fprintf(outf, "cpu%d: PKG Limit #2: %sabled (%f Watts, %f* sec, clamp %sabled)\n",
@@ -3290,40 +3289,34 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 		if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr))
 			return -9;
 		fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n",
-				cpu, msr, (msr >> 31) & 1 ? "": "UN");
+				cpu, msr, (msr >> 31) & 1 ? "" : "UN");
 
 		print_power_limit_msr(cpu, msr, "DRAM Limit");
 	}
 	if (do_rapl & RAPL_CORE_POLICY) {
-		if (debug) {
-			if (get_msr(cpu, MSR_PP0_POLICY, &msr))
-				return -7;
+		if (get_msr(cpu, MSR_PP0_POLICY, &msr))
+			return -7;
 
-			fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF);
-		}
+		fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF);
 	}
 	if (do_rapl & RAPL_CORES_POWER_LIMIT) {
-		if (debug) {
-			if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr))
-				return -9;
-			fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n",
-					cpu, msr, (msr >> 31) & 1 ? "": "UN");
-			print_power_limit_msr(cpu, msr, "Cores Limit");
-		}
+		if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr))
+			return -9;
+		fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n",
+				cpu, msr, (msr >> 31) & 1 ? "" : "UN");
+		print_power_limit_msr(cpu, msr, "Cores Limit");
 	}
 	if (do_rapl & RAPL_GFX) {
-		if (debug) {
-			if (get_msr(cpu, MSR_PP1_POLICY, &msr))
-				return -8;
+		if (get_msr(cpu, MSR_PP1_POLICY, &msr))
+			return -8;
 
-			fprintf(outf, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF);
+		fprintf(outf, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF);
 
-			if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr))
-				return -9;
-			fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n",
-					cpu, msr, (msr >> 31) & 1 ? "": "UN");
-			print_power_limit_msr(cpu, msr, "GFX Limit");
-		}
+		if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr))
+			return -9;
+		fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n",
+				cpu, msr, (msr >> 31) & 1 ? "" : "UN");
+		print_power_limit_msr(cpu, msr, "GFX Limit");
 	}
 	return 0;
 }
@@ -3469,7 +3462,7 @@ double slm_bclk(void)
 	}
 	freq = slm_freq_table[i];
 
-	if (debug)
+	if (!quiet)
 		fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq);
 
 	return freq;
@@ -3533,7 +3526,7 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk
 
 	target_c_local = (msr >> 16) & 0xFF;
 
-	if (debug)
+	if (!quiet)
 		fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n",
 			cpu, msr, target_c_local);
 
@@ -3648,7 +3641,7 @@ void process_cpuid()
 	if (ebx == 0x756e6547 && edx == 0x49656e69 && ecx == 0x6c65746e)
 		genuine_intel = 1;
 
-	if (debug)
+	if (!quiet)
 		fprintf(outf, "CPUID(0): %.4s%.4s%.4s ",
 			(char *)&ebx, (char *)&edx, (char *)&ecx);
 
@@ -3659,7 +3652,7 @@ void process_cpuid()
 	if (family == 6 || family == 0xf)
 		model += ((fms >> 16) & 0xf) << 4;
 
-	if (debug) {
+	if (!quiet) {
 		fprintf(outf, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n",
 			max_level, family, model, stepping, family, model, stepping);
 		fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s\n",
@@ -3721,7 +3714,7 @@ void process_cpuid()
 	has_hwp_pkg = eax & (1 << 11);
 	has_epb = ecx & (1 << 3);
 
-	if (debug)
+	if (!quiet)
 		fprintf(outf, "CPUID(6): %sAPERF, %sTURBO, %sDTS, %sPTM, %sHWP, "
 			"%sHWPnotify, %sHWPwindow, %sHWPepp, %sHWPpkg, %sEPB\n",
 			has_aperf ? "" : "No-",
@@ -3735,11 +3728,11 @@ void process_cpuid()
 			has_hwp_pkg ? "" : "No-",
 			has_epb ? "" : "No-");
 
-	if (debug)
+	if (!quiet)
 		decode_misc_enable_msr();
 
 
-	if (max_level >= 0x7 && debug) {
+	if (max_level >= 0x7 && !quiet) {
 		int has_sgx;
 
 		ecx = 0;
@@ -3765,7 +3758,7 @@ void process_cpuid()
 
 		if (ebx_tsc != 0) {
 
-			if (debug && (ebx != 0))
+			if (!quiet && (ebx != 0))
 				fprintf(outf, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n",
 					eax_crystal, ebx_tsc, crystal_hz);
 
@@ -3790,7 +3783,7 @@ void process_cpuid()
 
 			if (crystal_hz) {
 				tsc_hz =  (unsigned long long) crystal_hz * ebx_tsc / eax_crystal;
-				if (debug)
+				if (!quiet)
 					fprintf(outf, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n",
 						tsc_hz / 1000000, crystal_hz, ebx_tsc,  eax_crystal);
 			}
@@ -3805,7 +3798,7 @@ void process_cpuid()
 		base_mhz = max_mhz = bus_mhz = edx = 0;
 
 		__cpuid(0x16, base_mhz, max_mhz, bus_mhz, edx);
-		if (debug)
+		if (!quiet)
 			fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n",
 				base_mhz, max_mhz, bus_mhz);
 	}
@@ -3845,16 +3838,16 @@ void process_cpuid()
 	do_slm_cstates = is_slm(family, model);
 	do_knl_cstates  = is_knl(family, model);
 
-	if (debug)
+	if (!quiet)
 		decode_misc_pwr_mgmt_msr();
 
-	if (debug && has_slv_msrs(family, model))
+	if (!quiet && has_slv_msrs(family, model))
 		decode_c6_demotion_policy_msr();
 
 	rapl_probe(family, model);
 	perf_limit_reasons_probe(family, model);
 
-	if (debug)
+	if (!quiet)
 		dump_cstate_pstate_config_info(family, model);
 
 	if (has_skl_msrs(family, model))
@@ -3866,7 +3859,7 @@ void process_cpuid()
 	if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK))
 		BIC_PRESENT(BIC_GFXMHz);
 
-	if (debug)
+	if (!quiet)
 		decode_misc_feature_control();
 
 	return;
@@ -3883,7 +3876,7 @@ void help()
 	"to print statistics, until interrupted.\n"
 	"--add		add a counter\n"
 	"		eg. --add msr0x10,u64,cpu,delta,MY_TSC\n"
-	"--debug	run in \"debug\" mode\n"
+	"--quiet	skip decoding system configuration header\n"
 	"--interval sec	Override default 5-second measurement interval\n"
 	"--help		print this help message\n"
 	"--out file	create or truncate \"file\" for all output\n"
@@ -4136,24 +4129,24 @@ void turbostat_init()
 	process_cpuid();
 
 
-	if (debug)
+	if (!quiet)
 		for_all_cpus(print_hwp, ODD_COUNTERS);
 
-	if (debug)
+	if (!quiet)
 		for_all_cpus(print_epb, ODD_COUNTERS);
 
-	if (debug)
+	if (!quiet)
 		for_all_cpus(print_perf_limit, ODD_COUNTERS);
 
-	if (debug)
+	if (!quiet)
 		for_all_cpus(print_rapl, ODD_COUNTERS);
 
 	for_all_cpus(set_temperature_target, ODD_COUNTERS);
 
-	if (debug)
+	if (!quiet)
 		for_all_cpus(print_thermal, ODD_COUNTERS);
 
-	if (debug && do_irtl_snb)
+	if (!quiet && do_irtl_snb)
 		print_irtl();
 }
 
@@ -4429,7 +4422,7 @@ void cmdline(int argc, char **argv)
 	static struct option long_options[] = {
 		{"add",		required_argument,	0, 'a'},
 		{"Dump",	no_argument,		0, 'D'},
-		{"debug",	no_argument,		0, 'd'},
+		{"debug",	no_argument,		0, 'd'},	/* internal, not documented */
 		{"interval",	required_argument,	0, 'i'},
 		{"help",	no_argument,		0, 'h'},
 		{"hide",	required_argument,	0, 'H'},	// meh, -h taken by --help
@@ -4437,6 +4430,7 @@ void cmdline(int argc, char **argv)
 		{"out",		required_argument,	0, 'o'},
 		{"Package",	no_argument,		0, 'p'},
 		{"processor",	no_argument,		0, 'p'},
+		{"quiet",	no_argument,		0, 'q'},
 		{"show",	required_argument,	0, 's'},
 		{"Summary",	no_argument,		0, 'S'},
 		{"TCC",		required_argument,	0, 'T'},
@@ -4446,7 +4440,7 @@ void cmdline(int argc, char **argv)
 
 	progname = argv[0];
 
-	while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:JM:m:o:PpST:v",
+	while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:JM:m:o:PpqST:v",
 				long_options, &option_index)) != -1) {
 		switch (opt) {
 		case 'a':
@@ -4491,6 +4485,9 @@ void cmdline(int argc, char **argv)
 		case 'p':
 			show_core_only++;
 			break;
+		case 'q':
+			quiet = 1;
+			break;
 		case 's':
 			parse_show_hide(optarg, SHOW_LIST);
 			break;
@@ -4514,7 +4511,7 @@ int main(int argc, char **argv)
 
 	cmdline(argc, argv);
 
-	if (debug)
+	if (!quiet)
 		print_version();
 
 	turbostat_init();

From 008d396eb219ee5a1c98c9ef01c35752d35f0d6c Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Fri, 10 Feb 2017 00:29:51 -0500
Subject: [PATCH 0379/1911] tools/power turbostat: use tsc_tweak everwhere it
 is needed

The CPU ticks at a rate in the "bus clock" domain.
eg. 100 MHz * bus_ratio.

On newer processors, the TSC has been moved out of this BCLK
domain and into a separate crystal-clock domain.

While the TSC ticks "close to" the base frequency, those that look
closely at the numbers will notice small errors in calculations that
mix units of TSC clocks and bus clocks.

"tsc_tweak" was introduced to address the most visible
mixing -- the %Busy and the the Busy_MHz calculations.
(A simplification as since removed TSC from the BusyMHz calculation)

Here we apply the tsc_tweak to everyplace where BCLK
and TSC units are mixed.  The results is that
on a system which is 100% idle, the sum of the C-states
are now much more likely to be closer to 100%.

Reported-by: Travis Downs <travis.downs@gmail.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 48 ++++++++++++++-------------
 1 file changed, 25 insertions(+), 23 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 6867557596afcc..bbdf9ba9d41ba3 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -680,7 +680,7 @@ int dump_counters(struct thread_data *t, struct core_data *c,
 int format_counters(struct thread_data *t, struct core_data *c,
 	struct pkg_data *p)
 {
-	double interval_float;
+	double interval_float, tsc;
 	char *fmt8;
 	int i;
 	struct msr_counter *mp;
@@ -695,6 +695,8 @@ int format_counters(struct thread_data *t, struct core_data *c,
 
 	interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0;
 
+	tsc = t->tsc * tsc_tweak;
+
 	/* topo columns, print blanks on 1st (average) line */
 	if (t == &average.threads) {
 		if (DO_BIC(BIC_Package))
@@ -725,14 +727,14 @@ int format_counters(struct thread_data *t, struct core_data *c,
 			1.0 / units * t->aperf / interval_float);
 
 	if (DO_BIC(BIC_Busy))
-		outp += sprintf(outp, "\t%.2f", 100.0 * t->mperf/t->tsc/tsc_tweak);
+		outp += sprintf(outp, "\t%.2f", 100.0 * t->mperf/tsc);
 
 	if (DO_BIC(BIC_Bzy_MHz)) {
 		if (has_base_hz)
 			outp += sprintf(outp, "\t%.0f", base_hz / units * t->aperf / t->mperf);
 		else
 			outp += sprintf(outp, "\t%.0f",
-				1.0 * t->tsc / units * t->aperf / t->mperf / interval_float);
+				tsc / units * t->aperf / t->mperf / interval_float);
 	}
 
 	if (DO_BIC(BIC_TSC_MHz))
@@ -748,7 +750,7 @@ int format_counters(struct thread_data *t, struct core_data *c,
 
 	/* C1 */
 	if (DO_BIC(BIC_CPU_c1))
-		outp += sprintf(outp, "\t%.2f", 100.0 * t->c1/t->tsc);
+		outp += sprintf(outp, "\t%.2f", 100.0 * t->c1/tsc);
 
 	/* Added counters */
 	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
@@ -760,7 +762,7 @@ int format_counters(struct thread_data *t, struct core_data *c,
 		} else if (mp->format == FORMAT_DELTA) {
 			outp += sprintf(outp, "\t%lld", t->counter[i]);
 		} else if (mp->format == FORMAT_PERCENT) {
-			outp += sprintf(outp, "\t%.2f", 100.0 * t->counter[i]/t->tsc);
+			outp += sprintf(outp, "\t%.2f", 100.0 * t->counter[i]/tsc);
 		}
 	}
 
@@ -769,15 +771,15 @@ int format_counters(struct thread_data *t, struct core_data *c,
 		goto done;
 
 	if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates)
-		outp += sprintf(outp, "\t%.2f", 100.0 * c->c3/t->tsc);
+		outp += sprintf(outp, "\t%.2f", 100.0 * c->c3/tsc);
 	if (DO_BIC(BIC_CPU_c6))
-		outp += sprintf(outp, "\t%.2f", 100.0 * c->c6/t->tsc);
+		outp += sprintf(outp, "\t%.2f", 100.0 * c->c6/tsc);
 	if (DO_BIC(BIC_CPU_c7))
-		outp += sprintf(outp, "\t%.2f", 100.0 * c->c7/t->tsc);
+		outp += sprintf(outp, "\t%.2f", 100.0 * c->c7/tsc);
 
 	/* Mod%c6 */
 	if (DO_BIC(BIC_Mod_c6))
-		outp += sprintf(outp, "\t%.2f", 100.0 * c->mc6_us / t->tsc);
+		outp += sprintf(outp, "\t%.2f", 100.0 * c->mc6_us / tsc);
 
 	if (DO_BIC(BIC_CoreTmp))
 		outp += sprintf(outp, "\t%d", c->core_temp_c);
@@ -791,7 +793,7 @@ int format_counters(struct thread_data *t, struct core_data *c,
 		} else if (mp->format == FORMAT_DELTA) {
 			outp += sprintf(outp, "\t%lld", c->counter[i]);
 		} else if (mp->format == FORMAT_PERCENT) {
-			outp += sprintf(outp, "\t%.2f", 100.0 * c->counter[i]/t->tsc);
+			outp += sprintf(outp, "\t%.2f", 100.0 * c->counter[i]/tsc);
 		}
 	}
 
@@ -819,24 +821,24 @@ int format_counters(struct thread_data *t, struct core_data *c,
 
 	/* Totl%C0, Any%C0 GFX%C0 CPUGFX% */
 	if (do_skl_residency) {
-		outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_wtd_core_c0/t->tsc);
-		outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_any_core_c0/t->tsc);
-		outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_any_gfxe_c0/t->tsc);
-		outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_both_core_gfxe_c0/t->tsc);
+		outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_wtd_core_c0/tsc);
+		outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_any_core_c0/tsc);
+		outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_any_gfxe_c0/tsc);
+		outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_both_core_gfxe_c0/tsc);
 	}
 
 	if (do_pc2)
-		outp += sprintf(outp, "\t%.2f", 100.0 * p->pc2/t->tsc);
+		outp += sprintf(outp, "\t%.2f", 100.0 * p->pc2/tsc);
 	if (do_pc3)
-		outp += sprintf(outp, "\t%.2f", 100.0 * p->pc3/t->tsc);
+		outp += sprintf(outp, "\t%.2f", 100.0 * p->pc3/tsc);
 	if (do_pc6)
-		outp += sprintf(outp, "\t%.2f", 100.0 * p->pc6/t->tsc);
+		outp += sprintf(outp, "\t%.2f", 100.0 * p->pc6/tsc);
 	if (do_pc7)
-		outp += sprintf(outp, "\t%.2f", 100.0 * p->pc7/t->tsc);
+		outp += sprintf(outp, "\t%.2f", 100.0 * p->pc7/tsc);
 	if (do_c8_c9_c10) {
-		outp += sprintf(outp, "\t%.2f", 100.0 * p->pc8/t->tsc);
-		outp += sprintf(outp, "\t%.2f", 100.0 * p->pc9/t->tsc);
-		outp += sprintf(outp, "\t%.2f", 100.0 * p->pc10/t->tsc);
+		outp += sprintf(outp, "\t%.2f", 100.0 * p->pc8/tsc);
+		outp += sprintf(outp, "\t%.2f", 100.0 * p->pc9/tsc);
+		outp += sprintf(outp, "\t%.2f", 100.0 * p->pc10/tsc);
 	}
 
 	/*
@@ -878,7 +880,7 @@ int format_counters(struct thread_data *t, struct core_data *c,
 		} else if (mp->format == FORMAT_DELTA) {
 			outp += sprintf(outp, "\t%lld", p->counter[i]);
 		} else if (mp->format == FORMAT_PERCENT) {
-			outp += sprintf(outp, "\t%.2f", 100.0 * p->counter[i]/t->tsc);
+			outp += sprintf(outp, "\t%.2f", 100.0 * p->counter[i]/tsc);
 		}
 	}
 
@@ -1048,7 +1050,7 @@ delta_thread(struct thread_data *new, struct thread_data *old,
 			old->c1 = 0;
 		else {
 			/* normal case, derive c1 */
-			old->c1 = old->tsc - old->mperf - core_delta->c3
+			old->c1 = (old->tsc * tsc_tweak) - old->mperf - core_delta->c3
 				- core_delta->c6 - core_delta->c7;
 		}
 	}

From 0f47c08d8ccf8252a5c007502919bdc2126ffb1f Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Fri, 27 Jan 2017 00:50:45 -0500
Subject: [PATCH 0380/1911] tools/power turbostat: bug fixes to --add,
 --show/--hide features

Fix a bug with --add, where the title of the column
is un-initialized if not specified by the user.

The initial implementation of --show and --hide
neglected to handle the pc8/pc9/pc10 counters.

Fix a bug where "--show Core" only worked with --debug

Reported-by: Wendy Wang <wendy.wang@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 138 ++++++++++++++------------
 1 file changed, 77 insertions(+), 61 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index bbdf9ba9d41ba3..4b04ba86e44fd7 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -55,11 +55,6 @@ unsigned int summary_only;
 unsigned int dump_only;
 unsigned int do_snb_cstates;
 unsigned int do_knl_cstates;
-unsigned int do_pc2;
-unsigned int do_pc3;
-unsigned int do_pc6;
-unsigned int do_pc7;
-unsigned int do_c8_c9_c10;
 unsigned int do_skl_residency;
 unsigned int do_slm_cstates;
 unsigned int use_c1_residency_msr;
@@ -365,6 +360,9 @@ struct msr_counter bic[] = {
 	{ 0x0, "Pkg%pc3" },
 	{ 0x0, "Pkg%pc6" },
 	{ 0x0, "Pkg%pc7" },
+	{ 0x0, "Pkg%pc8" },
+	{ 0x0, "Pkg%pc9" },
+	{ 0x0, "Pkg%pc10" },
 	{ 0x0, "PkgWatt" },
 	{ 0x0, "CorWatt" },
 	{ 0x0, "GFXWatt" },
@@ -403,26 +401,30 @@ struct msr_counter bic[] = {
 #define	BIC_Pkgpc3	(1ULL << 18)
 #define	BIC_Pkgpc6	(1ULL << 19)
 #define	BIC_Pkgpc7	(1ULL << 20)
-#define	BIC_PkgWatt	(1ULL << 21)
-#define	BIC_CorWatt	(1ULL << 22)
-#define	BIC_GFXWatt	(1ULL << 23)
-#define	BIC_PkgCnt	(1ULL << 24)
-#define	BIC_RAMWatt	(1ULL << 27)
-#define	BIC_PKG__	(1ULL << 28)
-#define	BIC_RAM__	(1ULL << 29)
-#define	BIC_Pkg_J	(1ULL << 30)
-#define	BIC_Cor_J	(1ULL << 31)
-#define	BIC_GFX_J	(1ULL << 30)
-#define	BIC_RAM_J	(1ULL << 31)
-#define	BIC_Core	(1ULL << 32)
-#define	BIC_CPU		(1ULL << 33)
-#define	BIC_Mod_c6	(1ULL << 34)
+#define	BIC_Pkgpc8	(1ULL << 21)
+#define	BIC_Pkgpc9	(1ULL << 22)
+#define	BIC_Pkgpc10	(1ULL << 23)
+#define	BIC_PkgWatt	(1ULL << 24)
+#define	BIC_CorWatt	(1ULL << 25)
+#define	BIC_GFXWatt	(1ULL << 26)
+#define	BIC_PkgCnt	(1ULL << 27)
+#define	BIC_RAMWatt	(1ULL << 28)
+#define	BIC_PKG__	(1ULL << 29)
+#define	BIC_RAM__	(1ULL << 30)
+#define	BIC_Pkg_J	(1ULL << 31)
+#define	BIC_Cor_J	(1ULL << 32)
+#define	BIC_GFX_J	(1ULL << 33)
+#define	BIC_RAM_J	(1ULL << 34)
+#define	BIC_Core	(1ULL << 35)
+#define	BIC_CPU		(1ULL << 36)
+#define	BIC_Mod_c6	(1ULL << 37)
 
 unsigned long long bic_enabled = 0xFFFFFFFFFFFFFFFFULL;
 unsigned long long bic_present;
 
 #define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME)
 #define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT)
+#define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT)
 
 /*
  * bic_lookup
@@ -539,19 +541,20 @@ void print_header(void)
 		outp += sprintf(outp, "\tCPUGFX%%");
 	}
 
-	if (do_pc2)
+	if (DO_BIC(BIC_Pkgpc2))
 		outp += sprintf(outp, "\tPkg%%pc2");
-	if (do_pc3)
+	if (DO_BIC(BIC_Pkgpc3))
 		outp += sprintf(outp, "\tPkg%%pc3");
-	if (do_pc6)
+	if (DO_BIC(BIC_Pkgpc6))
 		outp += sprintf(outp, "\tPkg%%pc6");
-	if (do_pc7)
+	if (DO_BIC(BIC_Pkgpc7))
 		outp += sprintf(outp, "\tPkg%%pc7");
-	if (do_c8_c9_c10) {
+	if (DO_BIC(BIC_Pkgpc8))
 		outp += sprintf(outp, "\tPkg%%pc8");
+	if (DO_BIC(BIC_Pkgpc9))
 		outp += sprintf(outp, "\tPkg%%pc9");
+	if (DO_BIC(BIC_Pkgpc10))
 		outp += sprintf(outp, "\tPk%%pc10");
-	}
 
 	if (do_rapl && !rapl_joules) {
 		if (DO_BIC(BIC_PkgWatt))
@@ -644,11 +647,11 @@ int dump_counters(struct thread_data *t, struct core_data *c,
 		outp += sprintf(outp, "CPU + GFX: %016llX\n", p->pkg_both_core_gfxe_c0);
 
 		outp += sprintf(outp, "pc2: %016llX\n", p->pc2);
-		if (do_pc3)
+		if (DO_BIC(BIC_Pkgpc3))
 			outp += sprintf(outp, "pc3: %016llX\n", p->pc3);
-		if (do_pc6)
+		if (DO_BIC(BIC_Pkgpc6))
 			outp += sprintf(outp, "pc6: %016llX\n", p->pc6);
-		if (do_pc7)
+		if (DO_BIC(BIC_Pkgpc7))
 			outp += sprintf(outp, "pc7: %016llX\n", p->pc7);
 		outp += sprintf(outp, "pc8: %016llX\n", p->pc8);
 		outp += sprintf(outp, "pc9: %016llX\n", p->pc9);
@@ -827,19 +830,20 @@ int format_counters(struct thread_data *t, struct core_data *c,
 		outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_both_core_gfxe_c0/tsc);
 	}
 
-	if (do_pc2)
+	if (DO_BIC(BIC_Pkgpc2))
 		outp += sprintf(outp, "\t%.2f", 100.0 * p->pc2/tsc);
-	if (do_pc3)
+	if (DO_BIC(BIC_Pkgpc3))
 		outp += sprintf(outp, "\t%.2f", 100.0 * p->pc3/tsc);
-	if (do_pc6)
+	if (DO_BIC(BIC_Pkgpc6))
 		outp += sprintf(outp, "\t%.2f", 100.0 * p->pc6/tsc);
-	if (do_pc7)
+	if (DO_BIC(BIC_Pkgpc7))
 		outp += sprintf(outp, "\t%.2f", 100.0 * p->pc7/tsc);
-	if (do_c8_c9_c10) {
+	if (DO_BIC(BIC_Pkgpc8))
 		outp += sprintf(outp, "\t%.2f", 100.0 * p->pc8/tsc);
+	if (DO_BIC(BIC_Pkgpc9))
 		outp += sprintf(outp, "\t%.2f", 100.0 * p->pc9/tsc);
+	if (DO_BIC(BIC_Pkgpc10))
 		outp += sprintf(outp, "\t%.2f", 100.0 * p->pc10/tsc);
-	}
 
 	/*
  	 * If measurement interval exceeds minimum RAPL Joule Counter range,
@@ -949,11 +953,11 @@ delta_package(struct pkg_data *new, struct pkg_data *old)
 		old->pkg_both_core_gfxe_c0 = new->pkg_both_core_gfxe_c0 - old->pkg_both_core_gfxe_c0;
 	}
 	old->pc2 = new->pc2 - old->pc2;
-	if (do_pc3)
+	if (DO_BIC(BIC_Pkgpc3))
 		old->pc3 = new->pc3 - old->pc3;
-	if (do_pc6)
+	if (DO_BIC(BIC_Pkgpc6))
 		old->pc6 = new->pc6 - old->pc6;
-	if (do_pc7)
+	if (DO_BIC(BIC_Pkgpc7))
 		old->pc7 = new->pc7 - old->pc7;
 	old->pc8 = new->pc8 - old->pc8;
 	old->pc9 = new->pc9 - old->pc9;
@@ -1126,11 +1130,11 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
 	p->pkg_both_core_gfxe_c0 = 0;
 
 	p->pc2 = 0;
-	if (do_pc3)
+	if (DO_BIC(BIC_Pkgpc3))
 		p->pc3 = 0;
-	if (do_pc6)
+	if (DO_BIC(BIC_Pkgpc6))
 		p->pc6 = 0;
-	if (do_pc7)
+	if (DO_BIC(BIC_Pkgpc7))
 		p->pc7 = 0;
 	p->pc8 = 0;
 	p->pc9 = 0;
@@ -1204,11 +1208,11 @@ int sum_counters(struct thread_data *t, struct core_data *c,
 	}
 
 	average.packages.pc2 += p->pc2;
-	if (do_pc3)
+	if (DO_BIC(BIC_Pkgpc3))
 		average.packages.pc3 += p->pc3;
-	if (do_pc6)
+	if (DO_BIC(BIC_Pkgpc6))
 		average.packages.pc6 += p->pc6;
-	if (do_pc7)
+	if (DO_BIC(BIC_Pkgpc7))
 		average.packages.pc7 += p->pc7;
 	average.packages.pc8 += p->pc8;
 	average.packages.pc9 += p->pc9;
@@ -1266,11 +1270,11 @@ void compute_average(struct thread_data *t, struct core_data *c,
 	}
 
 	average.packages.pc2 /= topo.num_packages;
-	if (do_pc3)
+	if (DO_BIC(BIC_Pkgpc3))
 		average.packages.pc3 /= topo.num_packages;
-	if (do_pc6)
+	if (DO_BIC(BIC_Pkgpc6))
 		average.packages.pc6 /= topo.num_packages;
-	if (do_pc7)
+	if (DO_BIC(BIC_Pkgpc7))
 		average.packages.pc7 /= topo.num_packages;
 
 	average.packages.pc8 /= topo.num_packages;
@@ -1448,10 +1452,10 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 		if (get_msr(cpu, MSR_PKG_BOTH_CORE_GFXE_C0_RES, &p->pkg_both_core_gfxe_c0))
 			return -13;
 	}
-	if (do_pc3)
+	if (DO_BIC(BIC_Pkgpc3))
 		if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3))
 			return -9;
-	if (do_pc6) {
+	if (DO_BIC(BIC_Pkgpc6)) {
 		if (do_slm_cstates) {
 			if (get_msr(cpu, MSR_ATOM_PKG_C6_RESIDENCY, &p->pc6))
 				return -10;
@@ -1461,20 +1465,22 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 		}
 	}
 
-	if (do_pc2)
+	if (DO_BIC(BIC_Pkgpc2))
 		if (get_msr(cpu, MSR_PKG_C2_RESIDENCY, &p->pc2))
 			return -11;
-	if (do_pc7)
+	if (DO_BIC(BIC_Pkgpc7))
 		if (get_msr(cpu, MSR_PKG_C7_RESIDENCY, &p->pc7))
 			return -12;
-	if (do_c8_c9_c10) {
+	if (DO_BIC(BIC_Pkgpc8))
 		if (get_msr(cpu, MSR_PKG_C8_RESIDENCY, &p->pc8))
 			return -13;
+	if (DO_BIC(BIC_Pkgpc9))
 		if (get_msr(cpu, MSR_PKG_C9_RESIDENCY, &p->pc9))
 			return -13;
+	if (DO_BIC(BIC_Pkgpc10))
 		if (get_msr(cpu, MSR_PKG_C10_RESIDENCY, &p->pc10))
 			return -13;
-	}
+
 	if (do_rapl & RAPL_PKG) {
 		if (get_msr(cpu, MSR_PKG_ENERGY_STATUS, &msr))
 			return -13;
@@ -3824,17 +3830,27 @@ void process_cpuid()
 		BIC_PRESENT(BIC_CPU_c7);
 
 	do_irtl_snb = has_snb_msrs(family, model);
-	do_pc2 = do_snb_cstates && (pkg_cstate_limit >= PCL__2);
-	do_pc3 = (pkg_cstate_limit >= PCL__3);
-	do_pc6 = (pkg_cstate_limit >= PCL__6);
-	do_pc7 = do_snb_cstates && (pkg_cstate_limit >= PCL__7);
+	if (do_snb_cstates && (pkg_cstate_limit >= PCL__2))
+		BIC_PRESENT(BIC_Pkgpc2);
+	if (pkg_cstate_limit >= PCL__3)
+		BIC_PRESENT(BIC_Pkgpc3);
+	if (pkg_cstate_limit >= PCL__6)
+		BIC_PRESENT(BIC_Pkgpc6);
+	if (do_snb_cstates && (pkg_cstate_limit >= PCL__7))
+		BIC_PRESENT(BIC_Pkgpc7);
 	if (has_slv_msrs(family, model)) {
-		do_pc2 = do_pc3 = do_pc7 = 0;
-		do_pc6 = 1;
+		BIC_NOT_PRESENT(BIC_Pkgpc2);
+		BIC_NOT_PRESENT(BIC_Pkgpc3);
+		BIC_PRESENT(BIC_Pkgpc6);
+		BIC_NOT_PRESENT(BIC_Pkgpc7);
 		BIC_PRESENT(BIC_Mod_c6);
 		use_c1_residency_msr = 1;
 	}
-	do_c8_c9_c10 = has_hsw_msrs(family, model);
+	if (has_hsw_msrs(family, model)) {
+		BIC_PRESENT(BIC_Pkgpc8);
+		BIC_PRESENT(BIC_Pkgpc9);
+		BIC_PRESENT(BIC_Pkgpc10);
+	}
 	do_irtl_hsw = has_hsw_msrs(family, model);
 	do_skl_residency = has_skl_msrs(family, model);
 	do_slm_cstates = is_slm(family, model);
@@ -3981,7 +3997,7 @@ void topology_probe()
 	if (debug > 1)
 		fprintf(outf, "max_core_id %d, sizing for %d cores per package\n",
 			max_core_id, topo.num_cores_per_pkg);
-	if (debug && !summary_only && topo.num_cores_per_pkg > 1)
+	if (!summary_only && topo.num_cores_per_pkg > 1)
 		BIC_PRESENT(BIC_Core);
 
 	topo.num_packages = max_package_id + 1;
@@ -4282,7 +4298,7 @@ int add_counter(unsigned int msr_num, char *name, unsigned int width,
 void parse_add_command(char *add_command)
 {
 	int msr_num = 0;
-	char name_buffer[NAME_BYTES];
+	char name_buffer[NAME_BYTES] = "";
 	int width = 64;
 	int fail = 0;
 	enum counter_scope scope = SCOPE_CPU;

From 311f77708f362c1af0a999ba5ebbbc062fdc5601 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Thu, 16 Feb 2017 23:48:50 -0500
Subject: [PATCH 0381/1911] x86: intel-family.h: Add GEMINI_LAKE SOC

Cc: x86@kernel.org
Signed-off-by: Len Brown <len.brown@intel.com>
---
 arch/x86/include/asm/intel-family.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index 8167fdb67ae846..9814db42b79001 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -59,6 +59,7 @@
 #define INTEL_FAM6_ATOM_MERRIFIELD	0x4A /* Tangier */
 #define INTEL_FAM6_ATOM_MOOREFIELD	0x5A /* Anniedale */
 #define INTEL_FAM6_ATOM_GOLDMONT	0x5C
+#define INTEL_FAM6_ATOM_GEMINI_LAKE	0x7A
 #define INTEL_FAM6_ATOM_DENVERTON	0x5F /* Goldmont Microserver */
 
 /* Xeon Phi */

From ac01ac1371d01beb02827f89fff9aac9d7941e91 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Fri, 27 Jan 2017 01:45:35 -0500
Subject: [PATCH 0382/1911] tools/power turbostat: initial Gemini Lake SOC
 support

Gemini Lake is similar to Apollo Lake (Broxton/Goldmont)

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 4b04ba86e44fd7..819d67fbb6ca75 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -2535,6 +2535,7 @@ int probe_nhm_msrs(unsigned int family, unsigned int model)
 		pkg_cstate_limits = phi_pkg_cstate_limits;
 		break;
 	case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
+	case INTEL_FAM6_ATOM_GEMINI_LAKE:
 	case INTEL_FAM6_ATOM_DENVERTON:	/* DNV */
 		pkg_cstate_limits = bxt_pkg_cstate_limits;
 		break;
@@ -3018,6 +3019,7 @@ void rapl_probe(unsigned int family, unsigned int model)
 		}
 		break;
 	case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
+	case INTEL_FAM6_ATOM_GEMINI_LAKE:
 		do_rapl = RAPL_PKG | RAPL_PKG_POWER_INFO;
 		if (rapl_joules)
 			BIC_PRESENT(BIC_Pkg_J);
@@ -3361,6 +3363,7 @@ int has_snb_msrs(unsigned int family, unsigned int model)
 	case INTEL_FAM6_KABYLAKE_DESKTOP:	/* KBL */
 	case INTEL_FAM6_SKYLAKE_X:	/* SKX */
 	case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
+	case INTEL_FAM6_ATOM_GEMINI_LAKE:
 	case INTEL_FAM6_ATOM_DENVERTON:	/* DNV */
 		return 1;
 	}
@@ -3392,6 +3395,7 @@ int has_hsw_msrs(unsigned int family, unsigned int model)
 	case INTEL_FAM6_KABYLAKE_MOBILE:	/* KBL */
 	case INTEL_FAM6_KABYLAKE_DESKTOP:	/* KBL */
 	case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
+	case INTEL_FAM6_ATOM_GEMINI_LAKE:
 		return 1;
 	}
 	return 0;
@@ -3783,6 +3787,7 @@ void process_cpuid()
 					crystal_hz = 25000000;	/* 25.0 MHz */
 					break;
 				case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
+				case INTEL_FAM6_ATOM_GEMINI_LAKE:
 					crystal_hz = 19200000;	/* 19.2 MHz */
 					break;
 				default:

From 7170a374377d7c70d63a2d3eb38f8fe32e6ffadd Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Fri, 27 Jan 2017 02:13:27 -0500
Subject: [PATCH 0383/1911] tools/power turbostat: Denverton: use HW CC1
 counter, skip C3, C7

The CC1 column in tubostat can be computed by subtracting
the core c-state residency countes from the total Cx residency.

CC1 = (Idle_time_as_measured by MPERF) - (all core C-states with
residency counters)

However, as the underlying counter reads are not atomic,
error can be noticed in this calculations, especially
when the numbers are small.

Denverton has a hardware CC1 residency counter
to improve the accuracy of the cc1 statistic -- use it.

At the same time, Denverton has no concept of CC3, PC3, CC7, PC7,
so skip collecting and printing those columns.

Finally, a note of clarification.
Turbostat prints the standard PC2 residency counter,
but on Denverton hardware, that actually means PC1E.
Turbostat prints the standard PC6 residency counter,
but on Denverton hardware, that actually means PC2.

At this point, we document that differnce in this commit message,
rather than adding a quirk to the software.

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 819d67fbb6ca75..1010135ee9733d 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -2572,6 +2572,18 @@ int has_slv_msrs(unsigned int family, unsigned int model)
 	}
 	return 0;
 }
+int is_dnv(unsigned int family, unsigned int model)
+{
+
+	if (!genuine_intel)
+		return 0;
+
+	switch (model) {
+	case INTEL_FAM6_ATOM_DENVERTON:
+		return 1;
+	}
+	return 0;
+}
 
 int has_nhm_turbo_ratio_limit(unsigned int family, unsigned int model)
 {
@@ -3851,6 +3863,14 @@ void process_cpuid()
 		BIC_PRESENT(BIC_Mod_c6);
 		use_c1_residency_msr = 1;
 	}
+	if (is_dnv(family, model)) {
+		BIC_PRESENT(BIC_CPU_c1);
+		BIC_NOT_PRESENT(BIC_CPU_c3);
+		BIC_NOT_PRESENT(BIC_Pkgpc3);
+		BIC_NOT_PRESENT(BIC_CPU_c7);
+		BIC_NOT_PRESENT(BIC_Pkgpc7);
+		use_c1_residency_msr = 1;
+	}
 	if (has_hsw_msrs(family, model)) {
 		BIC_PRESENT(BIC_Pkgpc8);
 		BIC_PRESENT(BIC_Pkgpc9);

From 34c7619762f7b4ebbd5157b312e6022b725c031e Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Fri, 27 Jan 2017 02:36:41 -0500
Subject: [PATCH 0384/1911] tools/power turbostat: skip unused counters on SKX

Skip these four counters on SKX, as they are always zero:
cc3, pc3
cc7, pc7

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 1010135ee9733d..8c437115d41b93 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -2584,6 +2584,18 @@ int is_dnv(unsigned int family, unsigned int model)
 	}
 	return 0;
 }
+int is_skx(unsigned int family, unsigned int model)
+{
+
+	if (!genuine_intel)
+		return 0;
+
+	switch (model) {
+	case INTEL_FAM6_SKYLAKE_X:
+		return 1;
+	}
+	return 0;
+}
 
 int has_nhm_turbo_ratio_limit(unsigned int family, unsigned int model)
 {
@@ -3871,6 +3883,12 @@ void process_cpuid()
 		BIC_NOT_PRESENT(BIC_Pkgpc7);
 		use_c1_residency_msr = 1;
 	}
+	if (is_skx(family, model)) {
+		BIC_NOT_PRESENT(BIC_CPU_c3);
+		BIC_NOT_PRESENT(BIC_Pkgpc3);
+		BIC_NOT_PRESENT(BIC_CPU_c7);
+		BIC_NOT_PRESENT(BIC_Pkgpc7);
+	}
 	if (has_hsw_msrs(family, model)) {
 		BIC_PRESENT(BIC_Pkgpc8);
 		BIC_PRESENT(BIC_Pkgpc9);

From 31e07522be566cd039ff7a770550076cc1707a0c Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Tue, 31 Jan 2017 23:07:49 -0500
Subject: [PATCH 0385/1911] tools/power turbostat: fix decoding for GLM, DNV,
 SKX turbo-ratio limits

Newer processors do not hard-code the the number of cpus in each bin
to {1, 2, 3, 4, 5, 6, 7, 8}  Rather, they can specify any number
of CPUS in each of the 8 bins:

eg.

...
37 * 100.0 = 3600.0 MHz max turbo 4 active cores
38 * 100.0 = 3700.0 MHz max turbo 3 active cores
39 * 100.0 = 3800.0 MHz max turbo 2 active cores
39 * 100.0 = 3900.0 MHz max turbo 1 active cores

could now look something like this:

...
37 * 100.0 = 3600.0 MHz max turbo 16 active cores
38 * 100.0 = 3700.0 MHz max turbo 8 active cores
39 * 100.0 = 3800.0 MHz max turbo 4 active cores
39 * 100.0 = 3900.0 MHz max turbo 2 active cores

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 90 ++++++++++++++++++++-------
 1 file changed, 67 insertions(+), 23 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 8c437115d41b93..67a275882a8df4 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -1671,56 +1671,84 @@ dump_ivt_turbo_ratio_limits(void)
 			ratio, bclk, ratio * bclk);
 	return;
 }
+int has_turbo_ratio_group_limits(int family, int model)
+{
+
+	if (!genuine_intel)
+		return 0;
+
+	switch (model) {
+	case INTEL_FAM6_ATOM_GOLDMONT:
+	case INTEL_FAM6_SKYLAKE_X:
+	case INTEL_FAM6_ATOM_DENVERTON:
+		return 1;
+	}
+	return 0;
+}
 
 static void
-dump_nhm_turbo_ratio_limits(void)
+dump_turbo_ratio_limits(int family, int model)
 {
-	unsigned long long msr;
-	unsigned int ratio;
+	unsigned long long msr, core_counts;
+	unsigned int ratio, group_size;
 
 	get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
-
 	fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr);
 
+	if (has_turbo_ratio_group_limits(family, model)) {
+		get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &core_counts);
+		fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, core_counts);
+	} else {
+		core_counts = 0x0807060504030201;
+	}
+
 	ratio = (msr >> 56) & 0xFF;
+	group_size = (core_counts >> 56) & 0xFF;
 	if (ratio)
-		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 8 active cores\n",
-			ratio, bclk, ratio * bclk);
+		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
+			ratio, bclk, ratio * bclk, group_size);
 
 	ratio = (msr >> 48) & 0xFF;
+	group_size = (core_counts >> 48) & 0xFF;
 	if (ratio)
-		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 7 active cores\n",
-			ratio, bclk, ratio * bclk);
+		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
+			ratio, bclk, ratio * bclk, group_size);
 
 	ratio = (msr >> 40) & 0xFF;
+	group_size = (core_counts >> 40) & 0xFF;
 	if (ratio)
-		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 6 active cores\n",
-			ratio, bclk, ratio * bclk);
+		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
+			ratio, bclk, ratio * bclk, group_size);
 
 	ratio = (msr >> 32) & 0xFF;
+	group_size = (core_counts >> 32) & 0xFF;
 	if (ratio)
-		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 5 active cores\n",
-			ratio, bclk, ratio * bclk);
+		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
+			ratio, bclk, ratio * bclk, group_size);
 
 	ratio = (msr >> 24) & 0xFF;
+	group_size = (core_counts >> 24) & 0xFF;
 	if (ratio)
-		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 4 active cores\n",
-			ratio, bclk, ratio * bclk);
+		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
+			ratio, bclk, ratio * bclk, group_size);
 
 	ratio = (msr >> 16) & 0xFF;
+	group_size = (core_counts >> 16) & 0xFF;
 	if (ratio)
-		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 3 active cores\n",
-			ratio, bclk, ratio * bclk);
+		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
+			ratio, bclk, ratio * bclk, group_size);
 
 	ratio = (msr >> 8) & 0xFF;
+	group_size = (core_counts >> 8) & 0xFF;
 	if (ratio)
-		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 2 active cores\n",
-			ratio, bclk, ratio * bclk);
+		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
+			ratio, bclk, ratio * bclk, group_size);
 
 	ratio = (msr >> 0) & 0xFF;
+	group_size = (core_counts >> 0) & 0xFF;
 	if (ratio)
-		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 1 active cores\n",
-			ratio, bclk, ratio * bclk);
+		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
+			ratio, bclk, ratio * bclk, group_size);
 	return;
 }
 
@@ -2597,7 +2625,7 @@ int is_skx(unsigned int family, unsigned int model)
 	return 0;
 }
 
-int has_nhm_turbo_ratio_limit(unsigned int family, unsigned int model)
+int has_turbo_ratio_limit(unsigned int family, unsigned int model)
 {
 	if (has_slv_msrs(family, model))
 		return 0;
@@ -2668,6 +2696,22 @@ int has_knl_turbo_ratio_limit(unsigned int family, unsigned int model)
 		return 0;
 	}
 }
+int has_glm_turbo_ratio_limit(unsigned int family, unsigned int model)
+{
+	if (!genuine_intel)
+		return 0;
+
+	if (family != 6)
+		return 0;
+
+	switch (model) {
+	case INTEL_FAM6_ATOM_GOLDMONT:
+	case INTEL_FAM6_SKYLAKE_X:
+		return 1;
+	default:
+		return 0;
+	}
+}
 int has_config_tdp(unsigned int family, unsigned int model)
 {
 	if (!genuine_intel)
@@ -2714,8 +2758,8 @@ dump_cstate_pstate_config_info(unsigned int family, unsigned int model)
 	if (has_ivt_turbo_ratio_limit(family, model))
 		dump_ivt_turbo_ratio_limits();
 
-	if (has_nhm_turbo_ratio_limit(family, model))
-		dump_nhm_turbo_ratio_limits();
+	if (has_turbo_ratio_limit(family, model))
+		dump_turbo_ratio_limits(family, model);
 
 	if (has_atom_turbo_ratio_limit(family, model))
 		dump_atom_turbo_ratio_limits();

From ade0ebacdf03591b3dab642e6e92da60c20ebdb3 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Fri, 10 Feb 2017 01:56:47 -0500
Subject: [PATCH 0386/1911] tools/power turbostat: skip unused counters on BDX

Skip these two counters on BDX, as they are always zero:
cc7, pc7

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 67a275882a8df4..334c4c29d4b5c8 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -2612,6 +2612,19 @@ int is_dnv(unsigned int family, unsigned int model)
 	}
 	return 0;
 }
+int is_bdx(unsigned int family, unsigned int model)
+{
+
+	if (!genuine_intel)
+		return 0;
+
+	switch (model) {
+	case INTEL_FAM6_BROADWELL_X:
+	case INTEL_FAM6_BROADWELL_XEON_D:
+		return 1;
+	}
+	return 0;
+}
 int is_skx(unsigned int family, unsigned int model)
 {
 
@@ -3933,6 +3946,10 @@ void process_cpuid()
 		BIC_NOT_PRESENT(BIC_CPU_c7);
 		BIC_NOT_PRESENT(BIC_Pkgpc7);
 	}
+	if (is_bdx(family, model)) {
+		BIC_NOT_PRESENT(BIC_CPU_c7);
+		BIC_NOT_PRESENT(BIC_Pkgpc7);
+	}
 	if (has_hsw_msrs(family, model)) {
 		BIC_PRESENT(BIC_Pkgpc8);
 		BIC_PRESENT(BIC_Pkgpc9);

From 495c7654ccfb771d19ce1b9fbc7be21e45b14636 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Wed, 8 Feb 2017 02:41:51 -0500
Subject: [PATCH 0387/1911] tools/power turbostat: extend --add option to
 accept /sys path

Previously, the --add option could specify only an MSR.

Here is is extended so an arbitrary /sys attribute,
as specified by an absolute file path name.

sudo ./turbostat --add /sys/devices/system/cpu/cpu0/cpuidle/state5/usage

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 92 ++++++++++++++++++++-------
 1 file changed, 69 insertions(+), 23 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 334c4c29d4b5c8..841f837e317fc7 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -138,6 +138,7 @@ unsigned int has_misc_feature_control;
  * Usually truncated to 7 characters, but also handles 18 columns for raw 64-bit counters
  */
 #define	NAME_BYTES 20
+#define PATH_BYTES 128
 
 int backwards_count;
 char *progname;
@@ -213,6 +214,7 @@ enum counter_format {FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT};
 struct msr_counter {
 	unsigned int msr_num;
 	char name[NAME_BYTES];
+	char path[PATH_BYTES];
 	unsigned int width;
 	enum counter_type type;
 	enum counter_format format;
@@ -344,7 +346,7 @@ struct msr_counter bic[] = {
 	{ 0x0, "Bzy_MHz" },
 	{ 0x0, "TSC_MHz" },
 	{ 0x0, "IRQ" },
-	{ 0x0, "SMI", 32, 0, FORMAT_DELTA, NULL},
+	{ 0x0, "SMI", "", 32, 0, FORMAT_DELTA, NULL},
 	{ 0x0, "Busy%" },
 	{ 0x0, "CPU%c1" },
 	{ 0x0, "CPU%c3" },
@@ -1307,6 +1309,51 @@ static unsigned long long rdtsc(void)
 	return low | ((unsigned long long)high) << 32;
 }
 
+/*
+ * Open a file, and exit on failure
+ */
+FILE *fopen_or_die(const char *path, const char *mode)
+{
+	FILE *filep = fopen(path, mode);
+
+	if (!filep)
+		err(1, "%s: open failed", path);
+	return filep;
+}
+/*
+ * snapshot_sysfs_counter()
+ *
+ * return snapshot of given counter
+ */
+unsigned long long snapshot_sysfs_counter(char *path)
+{
+	FILE *fp;
+	int retval;
+	unsigned long long counter;
+
+	fp = fopen_or_die(path, "r");
+
+	retval = fscanf(fp, "%lld", &counter);
+	if (retval != 1)
+		err(1, "snapshot_sysfs_counter(%s)", path);
+
+	fclose(fp);
+
+	return counter;
+}
+
+int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp)
+{
+	if (mp->msr_num != 0) {
+		if (get_msr(cpu, mp->msr_num, counterp))
+			return -1;
+	} else {
+		*counterp = snapshot_sysfs_counter(mp->path);
+	}
+
+	return 0;
+}
+
 /*
  * get_counters(...)
  * migrate to cpu
@@ -1397,11 +1444,10 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 	}
 
 	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
-		if (get_msr(cpu, mp->msr_num, &t->counter[i]))
+		if (get_mp(cpu, mp, &t->counter[i]))
 			return -10;
 	}
 
-
 	/* collect core counters only for 1st thread in core */
 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
 		return 0;
@@ -1434,7 +1480,7 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 	}
 
 	for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
-		if (get_msr(cpu, mp->msr_num, &c->counter[i]))
+		if (get_mp(cpu, mp, &c->counter[i]))
 			return -10;
 	}
 
@@ -1524,7 +1570,7 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 		p->gfx_mhz = gfx_cur_mhz;
 
 	for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
-		if (get_msr(cpu, mp->msr_num, &p->counter[i]))
+		if (get_mp(cpu, mp, &p->counter[i]))
 			return -10;
 	}
 
@@ -2013,16 +2059,6 @@ void free_all_buffers(void)
 	free(irqs_per_cpu);
 }
 
-/*
- * Open a file, and exit on failure
- */
-FILE *fopen_or_die(const char *path, const char *mode)
-{
-	FILE *filep = fopen(path, mode);
-	if (!filep)
-		err(1, "%s: open failed", path);
-	return filep;
-}
 
 /*
  * Parse a file containing a single int.
@@ -2581,7 +2617,7 @@ int probe_nhm_msrs(unsigned int family, unsigned int model)
 	return 1;
 }
 /*
- * SLV client has supporet for unique MSRs:
+ * SLV client has support for unique MSRs:
  *
  * MSR_CC6_DEMOTION_POLICY_CONFIG
  * MSR_MC6_DEMOTION_POLICY_CONFIG
@@ -4342,9 +4378,9 @@ void print_version() {
 		" - Len Brown <lenb@kernel.org>\n");
 }
 
-int add_counter(unsigned int msr_num, char *name, unsigned int width,
-	enum counter_scope scope, enum counter_type type,
-	enum counter_format format)
+int add_counter(unsigned int msr_num, char *path, char *name,
+	unsigned int width, enum counter_scope scope,
+	enum counter_type type, enum counter_format format)
 {
 	struct msr_counter *msrp;
 
@@ -4356,6 +4392,8 @@ int add_counter(unsigned int msr_num, char *name, unsigned int width,
 
 	msrp->msr_num = msr_num;
 	strncpy(msrp->name, name, NAME_BYTES);
+	if (path)
+		strncpy(msrp->path, path, PATH_BYTES);
 	msrp->width = width;
 	msrp->type = type;
 	msrp->format = format;
@@ -4402,6 +4440,7 @@ int add_counter(unsigned int msr_num, char *name, unsigned int width,
 void parse_add_command(char *add_command)
 {
 	int msr_num = 0;
+	char *path = NULL;
 	char name_buffer[NAME_BYTES] = "";
 	int width = 64;
 	int fail = 0;
@@ -4417,6 +4456,11 @@ void parse_add_command(char *add_command)
 		if (sscanf(add_command, "msr%d", &msr_num) == 1)
 			goto next;
 
+		if (*add_command == '/') {
+			path = add_command;
+			goto next;
+		}
+
 		if (sscanf(add_command, "u%d", &width) == 1) {
 			if ((width == 32) || (width == 64))
 				goto next;
@@ -4466,12 +4510,14 @@ void parse_add_command(char *add_command)
 
 next:
 		add_command = strchr(add_command, ',');
-		if (add_command)
+		if (add_command) {
+			*add_command = '\0';
 			add_command++;
+		}
 
 	}
-	if (msr_num == 0) {
-		fprintf(stderr, "--add: (msrDDD | msr0xXXX) required\n");
+	if ((msr_num == 0) && (path == NULL)) {
+		fprintf(stderr, "--add: (msrDDD | msr0xXXX | /path_to_counter ) required\n");
 		fail++;
 	}
 
@@ -4495,7 +4541,7 @@ void parse_add_command(char *add_command)
 		}
 	}
 
-	if (add_counter(msr_num, name_buffer, width, scope, type, format))
+	if (add_counter(msr_num, path, name_buffer, width, scope, type, format))
 		fail++;
 
 	if (fail) {

From 41618e63f2a869902f8534f0db337e85d6bd04c8 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Thu, 9 Feb 2017 18:25:22 -0500
Subject: [PATCH 0388/1911] tools/power turbostat: print sysfs C-state stats

When turbostat shows % of time in a CPU idle power state,
it has always been showing information from underlying
hardware residency counters.

While this reflects what the hardware is doing, and is thus
useful for understanding the hardware,
it doesn't directly tell us what Linux requested --
which is useful for tuning Linux itself.

Here we add columns to turbostat to show the
Linux cpuidle sub-system statistics:
/sys/devices/system/cpu/cpu*/cpuidle/state*/*

The first group of columns are the "usage", which is the
number of times software requested that C-state in the
measurement interval. eg C1 below.

The second group of columns are the "time", which is the percentage
of the measurement interval time that software has requested
the specified C-state. eg C1% below.

These software counters can be compared to the underlying
hardware residency counters (eg CPU%c1	CPU%c3	CPU%c6	CPU%c7)
to compare what sofware requested to what the hardware delivered.

These sysfs attributes are discovered when turbostat starts,
rather than being "built in".  So the --show and --hide
parameters do not know about these dynamic column names.
However "--show sysfs" and "--hide sysfs" act on the
entire group of columns:

turbostat --show sysfs
...
cpu4: POLL: CPUIDLE CORE POLL IDLE
cpu4: C1: MWAIT 0x00
cpu4: C1E: MWAIT 0x01
cpu4: C3: MWAIT 0x10
cpu4: C6: MWAIT 0x20
cpu4: C7s: MWAIT 0x32
...
C1 	C1E	C3 	C6 	C7s	C1% 	C1E%	C3%	C6% 	C7s%
3	6	5	1	188	0.00	0.02	0.00	0.00	99.93
0	6	5	0	58	0.00	0.16	0.02	0.00	99.70
0	0	0	0	9	0.00	0.00	0.00	0.00	99.96
0	0	0	1	24	0.00	0.00	0.00	0.02	99.93
0	0	0	0	9	0.00	0.00	0.00	0.00	99.97
0	0	0	0	32	0.00	0.00	0.00	0.00	99.96
0	0	0	0	7	0.00	0.00	0.00	0.00	99.98
2	0	0	0	36	0.00	0.00	0.00	0.00	99.97
1	0	0	0	13	0.00	0.00	0.00	0.00	99.98

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.8 |   4 +-
 tools/power/x86/turbostat/turbostat.c | 161 +++++++++++++++++++++++---
 2 files changed, 147 insertions(+), 18 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index a08de27713e0c8..e31b7213fd45b2 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -47,9 +47,9 @@ name as necessary to disambiguate it from others is necessary.  Note that option
 		default: delta
 .fi
 .PP
-\fB--hide column\fP do not show the specified columns.  May be invoked multiple times, or with a comma-separated list of column names.
+\fB--hide column\fP do not show the specified columns.  May be invoked multiple times, or with a comma-separated list of column names.  Use "--hide sysfs" to hide the sysfs statistics columns as a group.
 .PP
-\fB--show column\fP show only the specified columns.  May be invoked multiple times, or with a comma-separated list of column names.
+\fB--show column\fP show only the specified columns.  May be invoked multiple times, or with a comma-separated list of column names.  Use "--show sysfs" to show the sysfs statistics columns as a group.
 .PP
 \fB--Dump\fP displays the raw counter values.
 .PP
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 841f837e317fc7..eb6cc8ccef06e1 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -208,7 +208,7 @@ struct pkg_data {
 #define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no)
 
 enum counter_scope {SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE};
-enum counter_type {COUNTER_CYCLES, COUNTER_SECONDS};
+enum counter_type {COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC};
 enum counter_format {FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT};
 
 struct msr_counter {
@@ -222,6 +222,7 @@ struct msr_counter {
 	unsigned int flags;
 #define	FLAGS_HIDE	(1 << 0)
 #define	FLAGS_SHOW	(1 << 1)
+#define	SYSFS_PERCPU	(1 << 1)
 };
 
 struct sys_counters {
@@ -379,6 +380,7 @@ struct msr_counter bic[] = {
 	{ 0x0, "Core" },
 	{ 0x0, "CPU" },
 	{ 0x0, "Mod%c6" },
+	{ 0x0, "sysfs" },
 };
 
 #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
@@ -420,9 +422,10 @@ struct msr_counter bic[] = {
 #define	BIC_Core	(1ULL << 35)
 #define	BIC_CPU		(1ULL << 36)
 #define	BIC_Mod_c6	(1ULL << 37)
+#define	BIC_sysfs	(1ULL << 38)
 
 unsigned long long bic_enabled = 0xFFFFFFFFFFFFFFFFULL;
-unsigned long long bic_present;
+unsigned long long bic_present = BIC_sysfs;
 
 #define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME)
 #define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT)
@@ -489,9 +492,6 @@ void print_header(void)
 	if (DO_BIC(BIC_SMI))
 		outp += sprintf(outp, "\tSMI");
 
-	if (DO_BIC(BIC_CPU_c1))
-		outp += sprintf(outp, "\tCPU%%c1");
-
 	for (mp = sys.tp; mp; mp = mp->next) {
 		if (mp->format == FORMAT_RAW) {
 			if (mp->width == 64)
@@ -499,10 +499,12 @@ void print_header(void)
 			else
 				outp += sprintf(outp, "\t%10.10s", mp->name);
 		} else {
-			outp += sprintf(outp, "\t%-7.7s", mp->name);
+			outp += sprintf(outp, "\t%s", mp->name);
 		}
 	}
 
+	if (DO_BIC(BIC_CPU_c1))
+		outp += sprintf(outp, "\tCPU%%c1");
 	if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates)
 		outp += sprintf(outp, "\tCPU%%c3");
 	if (DO_BIC(BIC_CPU_c6))
@@ -523,7 +525,7 @@ void print_header(void)
 			else
 				outp += sprintf(outp, "\t%10.10s", mp->name);
 		} else {
-			outp += sprintf(outp, "\t%-7.7s", mp->name);
+			outp += sprintf(outp, "\t%s", mp->name);
 		}
 	}
 
@@ -592,7 +594,7 @@ void print_header(void)
 			else
 				outp += sprintf(outp, "\t%10.10s", mp->name);
 		} else {
-			outp += sprintf(outp, "\t%-7.7s", mp->name);
+			outp += sprintf(outp, "\t%s", mp->name);
 		}
 	}
 
@@ -753,10 +755,6 @@ int format_counters(struct thread_data *t, struct core_data *c,
 	if (DO_BIC(BIC_SMI))
 		outp += sprintf(outp, "\t%d", t->smi_count);
 
-	/* C1 */
-	if (DO_BIC(BIC_CPU_c1))
-		outp += sprintf(outp, "\t%.2f", 100.0 * t->c1/tsc);
-
 	/* Added counters */
 	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
 		if (mp->format == FORMAT_RAW) {
@@ -767,10 +765,18 @@ int format_counters(struct thread_data *t, struct core_data *c,
 		} else if (mp->format == FORMAT_DELTA) {
 			outp += sprintf(outp, "\t%lld", t->counter[i]);
 		} else if (mp->format == FORMAT_PERCENT) {
-			outp += sprintf(outp, "\t%.2f", 100.0 * t->counter[i]/tsc);
+			if (mp->type == COUNTER_USEC)
+				outp += sprintf(outp, "\t%.2f", t->counter[i]/interval_float/10000);
+			else
+				outp += sprintf(outp, "\t%.2f", 100.0 * t->counter[i]/tsc);
 		}
 	}
 
+	/* C1 */
+	if (DO_BIC(BIC_CPU_c1))
+		outp += sprintf(outp, "\t%.2f", 100.0 * t->c1/tsc);
+
+
 	/* print per-core data only for 1st thread in core */
 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
 		goto done;
@@ -1286,6 +1292,8 @@ void compute_average(struct thread_data *t, struct core_data *c,
 	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
 		if (mp->format == FORMAT_RAW)
 			continue;
+		if (mp->flags & SYSFS_PERCPU && mp->type == COUNTER_ITEMS)
+			continue;
 		average.threads.counter[i] /= topo.num_cpus;
 	}
 	for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
@@ -1348,7 +1356,16 @@ int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp)
 		if (get_msr(cpu, mp->msr_num, counterp))
 			return -1;
 	} else {
-		*counterp = snapshot_sysfs_counter(mp->path);
+		char path[128];
+
+		if (mp->flags & SYSFS_PERCPU) {
+			sprintf(path, "/sys/devices/system/cpu/cpu%d/%s",
+				 cpu, mp->path);
+
+			*counterp = snapshot_sysfs_counter(path);
+		} else {
+			*counterp = snapshot_sysfs_counter(mp->path);
+		}
 	}
 
 	return 0;
@@ -2822,6 +2839,48 @@ dump_cstate_pstate_config_info(unsigned int family, unsigned int model)
 	dump_nhm_cst_cfg();
 }
 
+static void
+dump_sysfs_cstate_config(void)
+{
+	char path[64];
+	char name_buf[16];
+	char desc[64];
+	FILE *input;
+	int state;
+	char *sp;
+
+	if (!DO_BIC(BIC_sysfs))
+		return;
+
+	for (state = 0; state < 10; ++state) {
+
+		sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name",
+			base_cpu, state);
+		input = fopen(path, "r");
+		if (input == NULL)
+			continue;
+		fgets(name_buf, sizeof(name_buf), input);
+
+		 /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
+		sp = strchr(name_buf, '-');
+		if (!sp)
+			sp = strchrnul(name_buf, '\n');
+		*sp = '\0';
+
+		fclose(input);
+
+		sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/desc",
+			base_cpu, state);
+		input = fopen(path, "r");
+		if (input == NULL)
+			continue;
+		fgets(desc, sizeof(desc), input);
+
+		fprintf(outf, "cpu%d: %s: %s", base_cpu, name_buf, desc);
+		fclose(input);
+	}
+}
+
 
 /*
  * print_epb()
@@ -4008,6 +4067,9 @@ void process_cpuid()
 	if (!quiet)
 		dump_cstate_pstate_config_info(family, model);
 
+	if (!quiet)
+		dump_sysfs_cstate_config();
+
 	if (has_skl_msrs(family, model))
 		calculate_tsc_tweak();
 
@@ -4380,7 +4442,7 @@ void print_version() {
 
 int add_counter(unsigned int msr_num, char *path, char *name,
 	unsigned int width, enum counter_scope scope,
-	enum counter_type type, enum counter_format format)
+	enum counter_type type, enum counter_format format, int flags)
 {
 	struct msr_counter *msrp;
 
@@ -4397,6 +4459,7 @@ int add_counter(unsigned int msr_num, char *path, char *name,
 	msrp->width = width;
 	msrp->type = type;
 	msrp->format = format;
+	msrp->flags = flags;
 
 	switch (scope) {
 
@@ -4486,6 +4549,10 @@ void parse_add_command(char *add_command)
 			type = COUNTER_SECONDS;
 			goto next;
 		}
+		if (!strncmp(add_command, "usec", strlen("usec"))) {
+			type = COUNTER_USEC;
+			goto next;
+		}
 		if (!strncmp(add_command, "raw", strlen("raw"))) {
 			format = FORMAT_RAW;
 			goto next;
@@ -4541,7 +4608,7 @@ void parse_add_command(char *add_command)
 		}
 	}
 
-	if (add_counter(msr_num, path, name_buffer, width, scope, type, format))
+	if (add_counter(msr_num, path, name_buffer, width, scope, type, format, 0))
 		fail++;
 
 	if (fail) {
@@ -4549,6 +4616,65 @@ void parse_add_command(char *add_command)
 		exit(1);
 	}
 }
+
+void probe_sysfs(void)
+{
+	char path[64];
+	char name_buf[16];
+	FILE *input;
+	int state;
+	char *sp;
+
+	if (!DO_BIC(BIC_sysfs))
+		return;
+
+	for (state = 10; state > 0; --state) {
+
+		sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name",
+			base_cpu, state);
+		input = fopen(path, "r");
+		if (input == NULL)
+			continue;
+		fgets(name_buf, sizeof(name_buf), input);
+
+		 /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
+		sp = strchr(name_buf, '-');
+		if (!sp)
+			sp = strchrnul(name_buf, '\n');
+		*sp = '%';
+		*(sp + 1) = '\0';
+
+		fclose(input);
+
+		sprintf(path, "cpuidle/state%d/time", state);
+
+		add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_USEC,
+				FORMAT_PERCENT, SYSFS_PERCPU);
+	}
+
+	for (state = 10; state > 0; --state) {
+
+		sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name",
+			base_cpu, state);
+		input = fopen(path, "r");
+		if (input == NULL)
+			continue;
+		fgets(name_buf, sizeof(name_buf), input);
+		 /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
+		sp = strchr(name_buf, '-');
+		if (!sp)
+			sp = strchrnul(name_buf, '\n');
+		*sp = '\0';
+		fclose(input);
+
+		sprintf(path, "cpuidle/state%d/usage", state);
+
+		add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS,
+				FORMAT_DELTA, SYSFS_PERCPU);
+	}
+
+}
+
 /*
  * HIDE_LIST - hide this list of counters, show the rest [default]
  * SHOW_LIST - show this list of counters, hide the rest
@@ -4581,6 +4707,7 @@ void parse_show_hide(char *optarg, enum show_hide_mode new_mode)
 	 *  multiple invocations simply clear more bits in enabled mask
 	 */
 	bic_enabled &= ~bic_lookup(optarg);
+
 }
 
 void cmdline(int argc, char **argv)
@@ -4682,6 +4809,8 @@ int main(int argc, char **argv)
 	if (!quiet)
 		print_version();
 
+	probe_sysfs();
+
 	turbostat_init();
 
 	/* dump counters and exit */

From 1ef7d21afe2197013aefe0e93641aa2c5a9ac3db Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Fri, 10 Feb 2017 23:54:15 -0500
Subject: [PATCH 0389/1911] tools/power turbostat: add --cpu parameter

With the --cpu parameter, turbostat prints only lines
for the specified set of CPUs:

sudo ./turbostat --quiet --show Core,CPU --cpu 0,1,3..5,6-7
	Core	CPU
	-	-
	0	0
	0	4
	1	1
	1	5
	2	6
	3	3
	3	7

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.8 |  2 +
 tools/power/x86/turbostat/turbostat.c | 95 ++++++++++++++++++++++++++-
 2 files changed, 95 insertions(+), 2 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index e31b7213fd45b2..efe6a7147ff23b 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -47,6 +47,8 @@ name as necessary to disambiguate it from others is necessary.  Note that option
 		default: delta
 .fi
 .PP
+\fB--cpu cpu-set\fP limit output to system summary plus the specified cpu-set.  cpu-set is a comma delimited list of cpu ranges. cpu ranges can be individual cpu numbers or start and end numbers, separated by ".." or '-'.  eg. 1,2,8,14..17,21-44
+.PP
 \fB--hide column\fP do not show the specified columns.  May be invoked multiple times, or with a comma-separated list of column names.  Use "--hide sysfs" to hide the sysfs statistics columns as a group.
 .PP
 \fB--show column\fP show only the specified columns.  May be invoked multiple times, or with a comma-separated list of column names.  Use "--show sysfs" to show the sysfs statistics columns as a group.
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index eb6cc8ccef06e1..8c965bb2f4610b 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -143,8 +143,9 @@ unsigned int has_misc_feature_control;
 int backwards_count;
 char *progname;
 
-cpu_set_t *cpu_present_set, *cpu_affinity_set;
-size_t cpu_present_setsize, cpu_affinity_setsize;
+#define CPU_SUBSET_MAXCPUS	1024	/* need to use before probe... */
+cpu_set_t *cpu_present_set, *cpu_affinity_set, *cpu_subset;
+size_t cpu_present_setsize, cpu_affinity_setsize, cpu_subset_size;
 #define MAX_ADDED_COUNTERS 16
 
 struct thread_data {
@@ -700,6 +701,11 @@ int format_counters(struct thread_data *t, struct core_data *c,
 	if (show_pkg_only && !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
 		return 0;
 
+	/*if not summary line and --cpu is used */
+	if ((t != &average.threads) &&
+		(cpu_subset && !CPU_ISSET_S(t->cpu_id, cpu_subset_size, cpu_subset)))
+		return 0;
+
 	interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0;
 
 	tsc = t->tsc * tsc_tweak;
@@ -4096,6 +4102,7 @@ void help()
 	"to print statistics, until interrupted.\n"
 	"--add		add a counter\n"
 	"		eg. --add msr0x10,u64,cpu,delta,MY_TSC\n"
+	"--cpu	cpu-set	limit output to summary plus cpu-set cpu-set\n"
 	"--quiet	skip decoding system configuration header\n"
 	"--interval sec	Override default 5-second measurement interval\n"
 	"--help		print this help message\n"
@@ -4158,6 +4165,15 @@ void topology_probe()
 	CPU_ZERO_S(cpu_present_setsize, cpu_present_set);
 	for_all_proc_cpus(mark_cpu_present);
 
+	/*
+	 * Validate that all cpus in cpu_subset are also in cpu_present_set
+	 */
+	for (i = 0; i < CPU_SUBSET_MAXCPUS; ++i) {
+		if (CPU_ISSET_S(i, cpu_subset_size, cpu_subset))
+			if (!CPU_ISSET_S(i, cpu_present_setsize, cpu_present_set))
+				err(1, "cpu%d not present", i);
+	}
+
 	/*
 	 * Allocate and initialize cpu_affinity_set
 	 */
@@ -4675,6 +4691,77 @@ void probe_sysfs(void)
 
 }
 
+
+/*
+ * parse cpuset with following syntax
+ * 1,2,4..6,8-10 and set bits in cpu_subset
+ */
+void parse_cpu_command(char *optarg)
+{
+	unsigned int start, end;
+	char *next;
+
+	cpu_subset = CPU_ALLOC(CPU_SUBSET_MAXCPUS);
+	if (cpu_subset == NULL)
+		err(3, "CPU_ALLOC");
+	cpu_subset_size = CPU_ALLOC_SIZE(CPU_SUBSET_MAXCPUS);
+
+	CPU_ZERO_S(cpu_subset_size, cpu_subset);
+
+	next = optarg;
+
+	while (next && *next) {
+
+		if (*next == '-')	/* no negative cpu numbers */
+			goto error;
+
+		start = strtoul(next, &next, 10);
+
+		if (start >= CPU_SUBSET_MAXCPUS)
+			goto error;
+		CPU_SET_S(start, cpu_subset_size, cpu_subset);
+
+		if (*next == '\0')
+			break;
+
+		if (*next == ',') {
+			next += 1;
+			continue;
+		}
+
+		if (*next == '-') {
+			next += 1;	/* start range */
+		} else if (*next == '.') {
+			next += 1;
+			if (*next == '.')
+				next += 1;	/* start range */
+			else
+				goto error;
+		}
+
+		end = strtoul(next, &next, 10);
+		if (end <= start)
+			goto error;
+
+		while (++start <= end) {
+			if (start >= CPU_SUBSET_MAXCPUS)
+				goto error;
+			CPU_SET_S(start, cpu_subset_size, cpu_subset);
+		}
+
+		if (*next == ',')
+			next += 1;
+		else if (*next != '\0')
+			goto error;
+	}
+
+	return;
+
+error:
+	fprintf(stderr, "'--cpu %s' malformed\n", optarg);
+	exit(-1);
+}
+
 /*
  * HIDE_LIST - hide this list of counters, show the rest [default]
  * SHOW_LIST - show this list of counters, hide the rest
@@ -4716,6 +4803,7 @@ void cmdline(int argc, char **argv)
 	int option_index = 0;
 	static struct option long_options[] = {
 		{"add",		required_argument,	0, 'a'},
+		{"cpu",		required_argument,	0, 'c'},
 		{"Dump",	no_argument,		0, 'D'},
 		{"debug",	no_argument,		0, 'd'},	/* internal, not documented */
 		{"interval",	required_argument,	0, 'i'},
@@ -4741,6 +4829,9 @@ void cmdline(int argc, char **argv)
 		case 'a':
 			parse_add_command(optarg);
 			break;
+		case 'c':
+			parse_cpu_command(optarg);
+			break;
 		case 'D':
 			dump_only++;
 			break;

From 218f0e8d5c388767be9c78fd2c5bc0a6f416d6d0 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Tue, 14 Feb 2017 22:07:52 -0500
Subject: [PATCH 0390/1911] tools/power turbostat: fix zero IRQ count shown in
 one-shot command mode

The IRQ column has been working for periodic mode,
but not in one-shot command mode, it shows only 0.

until now.

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 8c965bb2f4610b..48b540a2fe81e6 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -619,9 +619,9 @@ int dump_counters(struct thread_data *t, struct core_data *c,
 		outp += sprintf(outp, "c1: %016llX\n", t->c1);
 
 		if (DO_BIC(BIC_IRQ))
-			outp += sprintf(outp, "IRQ: %08X\n", t->irq_count);
+			outp += sprintf(outp, "IRQ: %d\n", t->irq_count);
 		if (DO_BIC(BIC_SMI))
-			outp += sprintf(outp, "SMI: %08X\n", t->smi_count);
+			outp += sprintf(outp, "SMI: %d\n", t->smi_count);
 
 		for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
 			outp += sprintf(outp, "tADDED [%d] msr0x%x: %08llX\n",
@@ -2410,8 +2410,9 @@ int snapshot_gfx_mhz(void)
  */
 int snapshot_proc_sysfs_files(void)
 {
-	if (snapshot_proc_interrupts())
-		return 1;
+	if (DO_BIC(BIC_IRQ))
+		if (snapshot_proc_interrupts())
+			return 1;
 
 	if (DO_BIC(BIC_GFX_rc6))
 		snapshot_gfx_rc6_ms();
@@ -4391,6 +4392,7 @@ int fork_it(char **argv)
 	pid_t child_pid;
 	int status;
 
+	snapshot_proc_sysfs_files();
 	status = for_all_cpus(get_counters, EVEN_COUNTERS);
 	if (status)
 		exit(status);
@@ -4417,6 +4419,7 @@ int fork_it(char **argv)
 	 * n.b. fork_it() does not check for errors from for_all_cpus()
 	 * because re-starting is problematic when forking
 	 */
+	snapshot_proc_sysfs_files();
 	for_all_cpus(get_counters, ODD_COUNTERS);
 	gettimeofday(&tv_odd, (struct timezone *)NULL);
 	timersub(&tv_odd, &tv_even, &tv_delta);
@@ -4438,6 +4441,7 @@ int get_and_dump_counters(void)
 {
 	int status;
 
+	snapshot_proc_sysfs_files();
 	status = for_all_cpus(get_counters, ODD_COUNTERS);
 	if (status)
 		return status;

From c8ade3616a1a5cf7767c0338d2b02007796f5d88 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Wed, 15 Feb 2017 17:15:11 -0500
Subject: [PATCH 0391/1911] tools/power turbostat: Add --list option to show
 available header names

It is handy to know the list of column header names,
so that they can be used with --add and --skip

The new --list option shows them:

sudo ./turbostat --list --hide sysfs
,Core,CPU,Avg_MHz,Busy%,Bzy_MHz,TSC_MHz,IRQ,SMI,CPU%c1,CPU%c3,CPU%c6,CPU%c7,CoreTmp,PkgTmp,GFX%rc6,GFXMHz,PkgWatt,CorWatt,GFXWatt

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 118 ++++++++++++++------------
 1 file changed, 66 insertions(+), 52 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 48b540a2fe81e6..7b02fbb6589316 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -52,6 +52,7 @@ unsigned int debug;
 unsigned int quiet;
 unsigned int rapl_joules;
 unsigned int summary_only;
+unsigned int list_header_only;
 unsigned int dump_only;
 unsigned int do_snb_cstates;
 unsigned int do_knl_cstates;
@@ -469,133 +470,133 @@ unsigned long long bic_lookup(char *name_list)
 	return retval;
 }
 
-void print_header(void)
+void print_header(char *delim)
 {
 	struct msr_counter *mp;
 
 	if (DO_BIC(BIC_Package))
-		outp += sprintf(outp, "\tPackage");
+		outp += sprintf(outp, "%sPackage", delim);
 	if (DO_BIC(BIC_Core))
-		outp += sprintf(outp, "\tCore");
+		outp += sprintf(outp, "%sCore", delim);
 	if (DO_BIC(BIC_CPU))
-		outp += sprintf(outp, "\tCPU");
+		outp += sprintf(outp, "%sCPU", delim);
 	if (DO_BIC(BIC_Avg_MHz))
-		outp += sprintf(outp, "\tAvg_MHz");
+		outp += sprintf(outp, "%sAvg_MHz", delim);
 	if (DO_BIC(BIC_Busy))
-		outp += sprintf(outp, "\tBusy%%");
+		outp += sprintf(outp, "%sBusy%%", delim);
 	if (DO_BIC(BIC_Bzy_MHz))
-		outp += sprintf(outp, "\tBzy_MHz");
+		outp += sprintf(outp, "%sBzy_MHz", delim);
 	if (DO_BIC(BIC_TSC_MHz))
-		outp += sprintf(outp, "\tTSC_MHz");
+		outp += sprintf(outp, "%sTSC_MHz", delim);
 
 	if (DO_BIC(BIC_IRQ))
-		outp += sprintf(outp, "\tIRQ");
+		outp += sprintf(outp, "%sIRQ", delim);
 	if (DO_BIC(BIC_SMI))
-		outp += sprintf(outp, "\tSMI");
+		outp += sprintf(outp, "%sSMI", delim);
 
 	for (mp = sys.tp; mp; mp = mp->next) {
 		if (mp->format == FORMAT_RAW) {
 			if (mp->width == 64)
-				outp += sprintf(outp, "\t%18.18s", mp->name);
+				outp += sprintf(outp, "%s%18.18s", delim, mp->name);
 			else
-				outp += sprintf(outp, "\t%10.10s", mp->name);
+				outp += sprintf(outp, "%s%10.10s", delim, mp->name);
 		} else {
-			outp += sprintf(outp, "\t%s", mp->name);
+			outp += sprintf(outp, "%s%s", delim, mp->name);
 		}
 	}
 
 	if (DO_BIC(BIC_CPU_c1))
-		outp += sprintf(outp, "\tCPU%%c1");
+		outp += sprintf(outp, "%sCPU%%c1", delim);
 	if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates)
-		outp += sprintf(outp, "\tCPU%%c3");
+		outp += sprintf(outp, "%sCPU%%c3", delim);
 	if (DO_BIC(BIC_CPU_c6))
-		outp += sprintf(outp, "\tCPU%%c6");
+		outp += sprintf(outp, "%sCPU%%c6", delim);
 	if (DO_BIC(BIC_CPU_c7))
-		outp += sprintf(outp, "\tCPU%%c7");
+		outp += sprintf(outp, "%sCPU%%c7", delim);
 
 	if (DO_BIC(BIC_Mod_c6))
-		outp += sprintf(outp, "\tMod%%c6");
+		outp += sprintf(outp, "%sMod%%c6", delim);
 
 	if (DO_BIC(BIC_CoreTmp))
-		outp += sprintf(outp, "\tCoreTmp");
+		outp += sprintf(outp, "%sCoreTmp", delim);
 
 	for (mp = sys.cp; mp; mp = mp->next) {
 		if (mp->format == FORMAT_RAW) {
 			if (mp->width == 64)
-				outp += sprintf(outp, "\t%18.18s", mp->name);
+				outp += sprintf(outp, "%s%18.18s", delim, mp->name);
 			else
-				outp += sprintf(outp, "\t%10.10s", mp->name);
+				outp += sprintf(outp, "%s%10.10s", delim, mp->name);
 		} else {
-			outp += sprintf(outp, "\t%s", mp->name);
+			outp += sprintf(outp, "%s%s", delim, mp->name);
 		}
 	}
 
 	if (DO_BIC(BIC_PkgTmp))
-		outp += sprintf(outp, "\tPkgTmp");
+		outp += sprintf(outp, "%sPkgTmp", delim);
 
 	if (DO_BIC(BIC_GFX_rc6))
-		outp += sprintf(outp, "\tGFX%%rc6");
+		outp += sprintf(outp, "%sGFX%%rc6", delim);
 
 	if (DO_BIC(BIC_GFXMHz))
-		outp += sprintf(outp, "\tGFXMHz");
+		outp += sprintf(outp, "%sGFXMHz", delim);
 
 	if (do_skl_residency) {
-		outp += sprintf(outp, "\tTotl%%C0");
-		outp += sprintf(outp, "\tAny%%C0");
-		outp += sprintf(outp, "\tGFX%%C0");
-		outp += sprintf(outp, "\tCPUGFX%%");
+		outp += sprintf(outp, "%sTotl%%C0", delim);
+		outp += sprintf(outp, "%sAny%%C0", delim);
+		outp += sprintf(outp, "%sGFX%%C0", delim);
+		outp += sprintf(outp, "%sCPUGFX%%", delim);
 	}
 
 	if (DO_BIC(BIC_Pkgpc2))
-		outp += sprintf(outp, "\tPkg%%pc2");
+		outp += sprintf(outp, "%sPkg%%pc2", delim);
 	if (DO_BIC(BIC_Pkgpc3))
-		outp += sprintf(outp, "\tPkg%%pc3");
+		outp += sprintf(outp, "%sPkg%%pc3", delim);
 	if (DO_BIC(BIC_Pkgpc6))
-		outp += sprintf(outp, "\tPkg%%pc6");
+		outp += sprintf(outp, "%sPkg%%pc6", delim);
 	if (DO_BIC(BIC_Pkgpc7))
-		outp += sprintf(outp, "\tPkg%%pc7");
+		outp += sprintf(outp, "%sPkg%%pc7", delim);
 	if (DO_BIC(BIC_Pkgpc8))
-		outp += sprintf(outp, "\tPkg%%pc8");
+		outp += sprintf(outp, "%sPkg%%pc8", delim);
 	if (DO_BIC(BIC_Pkgpc9))
-		outp += sprintf(outp, "\tPkg%%pc9");
+		outp += sprintf(outp, "%sPkg%%pc9", delim);
 	if (DO_BIC(BIC_Pkgpc10))
-		outp += sprintf(outp, "\tPk%%pc10");
+		outp += sprintf(outp, "%sPk%%pc10", delim);
 
 	if (do_rapl && !rapl_joules) {
 		if (DO_BIC(BIC_PkgWatt))
-			outp += sprintf(outp, "\tPkgWatt");
+			outp += sprintf(outp, "%sPkgWatt", delim);
 		if (DO_BIC(BIC_CorWatt))
-			outp += sprintf(outp, "\tCorWatt");
+			outp += sprintf(outp, "%sCorWatt", delim);
 		if (DO_BIC(BIC_GFXWatt))
-			outp += sprintf(outp, "\tGFXWatt");
+			outp += sprintf(outp, "%sGFXWatt", delim);
 		if (DO_BIC(BIC_RAMWatt))
-			outp += sprintf(outp, "\tRAMWatt");
+			outp += sprintf(outp, "%sRAMWatt", delim);
 		if (DO_BIC(BIC_PKG__))
-			outp += sprintf(outp, "\tPKG_%%");
+			outp += sprintf(outp, "%sPKG_%%", delim);
 		if (DO_BIC(BIC_RAM__))
-			outp += sprintf(outp, "\tRAM_%%");
+			outp += sprintf(outp, "%sRAM_%%", delim);
 	} else if (do_rapl && rapl_joules) {
 		if (DO_BIC(BIC_Pkg_J))
-			outp += sprintf(outp, "\tPkg_J");
+			outp += sprintf(outp, "%sPkg_J", delim);
 		if (DO_BIC(BIC_Cor_J))
-			outp += sprintf(outp, "\tCor_J");
+			outp += sprintf(outp, "%sCor_J", delim);
 		if (DO_BIC(BIC_GFX_J))
-			outp += sprintf(outp, "\tGFX_J");
+			outp += sprintf(outp, "%sGFX_J", delim);
 		if (DO_BIC(BIC_RAM_J))
-			outp += sprintf(outp, "\tRAM_J");
+			outp += sprintf(outp, "%sRAM_J", delim);
 		if (DO_BIC(BIC_PKG__))
-			outp += sprintf(outp, "\tPKG_%%");
+			outp += sprintf(outp, "%sPKG_%%", delim);
 		if (DO_BIC(BIC_RAM__))
-			outp += sprintf(outp, "\tRAM_%%");
+			outp += sprintf(outp, "%sRAM_%%", delim);
 	}
 	for (mp = sys.pp; mp; mp = mp->next) {
 		if (mp->format == FORMAT_RAW) {
 			if (mp->width == 64)
-				outp += sprintf(outp, "\t%18.18s", mp->name);
+				outp += sprintf(outp, "%s%18.18s", delim, mp->name);
 			else
-				outp += sprintf(outp, "\t%10.10s", mp->name);
+				outp += sprintf(outp, "%s%10.10s", delim, mp->name);
 		} else {
-			outp += sprintf(outp, "\t%s", mp->name);
+			outp += sprintf(outp, "%s%s", delim, mp->name);
 		}
 	}
 
@@ -933,7 +934,7 @@ void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_
 	static int printed;
 
 	if (!printed || !summary_only)
-		print_header();
+		print_header("\t");
 
 	if (topo.num_cpus > 1)
 		format_counters(&average.threads, &average.cores,
@@ -4107,6 +4108,7 @@ void help()
 	"--quiet	skip decoding system configuration header\n"
 	"--interval sec	Override default 5-second measurement interval\n"
 	"--help		print this help message\n"
+	"--list		list column headers only\n"
 	"--out file	create or truncate \"file\" for all output\n"
 	"--version	print version information\n"
 	"\n"
@@ -4814,6 +4816,7 @@ void cmdline(int argc, char **argv)
 		{"help",	no_argument,		0, 'h'},
 		{"hide",	required_argument,	0, 'H'},	// meh, -h taken by --help
 		{"Joules",	no_argument,		0, 'J'},
+		{"list",	no_argument,		0, 'l'},
 		{"out",		required_argument,	0, 'o'},
 		{"Package",	no_argument,		0, 'p'},
 		{"processor",	no_argument,		0, 'p'},
@@ -4866,6 +4869,10 @@ void cmdline(int argc, char **argv)
 		case 'J':
 			rapl_joules++;
 			break;
+		case 'l':
+			list_header_only++;
+			quiet++;
+			break;
 		case 'o':
 			outf = fopen_or_die(optarg, "w");
 			break;
@@ -4912,6 +4919,13 @@ int main(int argc, char **argv)
 	if (dump_only)
 		return get_and_dump_counters();
 
+	/* list header and exit */
+	if (list_header_only) {
+		print_header(",");
+		flush_output_stdout();
+		return 0;
+	}
+
 	/*
 	 * if any params left, it must be a command to fork
 	 */

From 0de6c0df4ecc32ffaf064fea3a43846ba4474bd0 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Wed, 15 Feb 2017 21:45:40 -0500
Subject: [PATCH 0392/1911] tools/power turbostat: use wide columns to display
 large numbers

When a counter overlfows 7 columns, it shifts the remaining
columns to the right, so they no longer line up under
their column header.

Update turbostat to dectect when it is handling large
numbers, and switch to wider columns where, necessary.

Reported-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 68 ++++++++++++++++++++++-----
 1 file changed, 55 insertions(+), 13 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 7b02fbb6589316..cafc6bba653953 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -50,6 +50,7 @@ int *fd_percpu;
 struct timespec interval_ts = {5, 0};
 unsigned int debug;
 unsigned int quiet;
+unsigned int sums_need_wide_columns;
 unsigned int rapl_joules;
 unsigned int summary_only;
 unsigned int list_header_only;
@@ -154,7 +155,7 @@ struct thread_data {
 	unsigned long long aperf;
 	unsigned long long mperf;
 	unsigned long long c1;
-	unsigned int irq_count;
+	unsigned long long  irq_count;
 	unsigned int smi_count;
 	unsigned int cpu_id;
 	unsigned int flags;
@@ -489,8 +490,13 @@ void print_header(char *delim)
 	if (DO_BIC(BIC_TSC_MHz))
 		outp += sprintf(outp, "%sTSC_MHz", delim);
 
-	if (DO_BIC(BIC_IRQ))
-		outp += sprintf(outp, "%sIRQ", delim);
+	if (DO_BIC(BIC_IRQ)) {
+		if (sums_need_wide_columns)
+			outp += sprintf(outp, "%s     IRQ", delim);
+		else
+			outp += sprintf(outp, "%sIRQ", delim);
+	}
+
 	if (DO_BIC(BIC_SMI))
 		outp += sprintf(outp, "%sSMI", delim);
 
@@ -501,7 +507,10 @@ void print_header(char *delim)
 			else
 				outp += sprintf(outp, "%s%10.10s", delim, mp->name);
 		} else {
-			outp += sprintf(outp, "%s%s", delim, mp->name);
+			if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
+				outp += sprintf(outp, "%s%8s", delim, mp->name);
+			else
+				outp += sprintf(outp, "%s%s", delim, mp->name);
 		}
 	}
 
@@ -527,7 +536,10 @@ void print_header(char *delim)
 			else
 				outp += sprintf(outp, "%s%10.10s", delim, mp->name);
 		} else {
-			outp += sprintf(outp, "%s%s", delim, mp->name);
+			if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
+				outp += sprintf(outp, "%s%8s", delim, mp->name);
+			else
+				outp += sprintf(outp, "%s%s", delim, mp->name);
 		}
 	}
 
@@ -596,7 +608,10 @@ void print_header(char *delim)
 			else
 				outp += sprintf(outp, "%s%10.10s", delim, mp->name);
 		} else {
-			outp += sprintf(outp, "%s%s", delim, mp->name);
+			if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
+				outp += sprintf(outp, "%s%8s", delim, mp->name);
+			else
+				outp += sprintf(outp, "%s%s", delim, mp->name);
 		}
 	}
 
@@ -620,7 +635,7 @@ int dump_counters(struct thread_data *t, struct core_data *c,
 		outp += sprintf(outp, "c1: %016llX\n", t->c1);
 
 		if (DO_BIC(BIC_IRQ))
-			outp += sprintf(outp, "IRQ: %d\n", t->irq_count);
+			outp += sprintf(outp, "IRQ: %lld\n", t->irq_count);
 		if (DO_BIC(BIC_SMI))
 			outp += sprintf(outp, "SMI: %d\n", t->smi_count);
 
@@ -755,8 +770,12 @@ int format_counters(struct thread_data *t, struct core_data *c,
 		outp += sprintf(outp, "\t%.0f", 1.0 * t->tsc/units/interval_float);
 
 	/* IRQ */
-	if (DO_BIC(BIC_IRQ))
-		outp += sprintf(outp, "\t%d", t->irq_count);
+	if (DO_BIC(BIC_IRQ)) {
+		if (sums_need_wide_columns)
+			outp += sprintf(outp, "\t%8lld", t->irq_count);
+		else
+			outp += sprintf(outp, "\t%lld", t->irq_count);
+	}
 
 	/* SMI */
 	if (DO_BIC(BIC_SMI))
@@ -770,7 +789,10 @@ int format_counters(struct thread_data *t, struct core_data *c,
 			else
 				outp += sprintf(outp, "\t0x%016llx", t->counter[i]);
 		} else if (mp->format == FORMAT_DELTA) {
-			outp += sprintf(outp, "\t%lld", t->counter[i]);
+			if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
+				outp += sprintf(outp, "\t%8lld", t->counter[i]);
+			else
+				outp += sprintf(outp, "\t%lld", t->counter[i]);
 		} else if (mp->format == FORMAT_PERCENT) {
 			if (mp->type == COUNTER_USEC)
 				outp += sprintf(outp, "\t%.2f", t->counter[i]/interval_float/10000);
@@ -809,7 +831,10 @@ int format_counters(struct thread_data *t, struct core_data *c,
 			else
 				outp += sprintf(outp, "\t0x%016llx", c->counter[i]);
 		} else if (mp->format == FORMAT_DELTA) {
-			outp += sprintf(outp, "\t%lld", c->counter[i]);
+			if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
+				outp += sprintf(outp, "\t%8lld", c->counter[i]);
+			else
+				outp += sprintf(outp, "\t%lld", c->counter[i]);
 		} else if (mp->format == FORMAT_PERCENT) {
 			outp += sprintf(outp, "\t%.2f", 100.0 * c->counter[i]/tsc);
 		}
@@ -897,7 +922,10 @@ int format_counters(struct thread_data *t, struct core_data *c,
 			else
 				outp += sprintf(outp, "\t0x%016llx", p->counter[i]);
 		} else if (mp->format == FORMAT_DELTA) {
-			outp += sprintf(outp, "\t%lld", p->counter[i]);
+			if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
+				outp += sprintf(outp, "\t%8lld", p->counter[i]);
+			else
+				outp += sprintf(outp, "\t%lld", p->counter[i]);
 		} else if (mp->format == FORMAT_PERCENT) {
 			outp += sprintf(outp, "\t%.2f", 100.0 * p->counter[i]/tsc);
 		}
@@ -1272,6 +1300,9 @@ void compute_average(struct thread_data *t, struct core_data *c,
 	average.threads.mperf /= topo.num_cpus;
 	average.threads.c1 /= topo.num_cpus;
 
+	if (average.threads.irq_count > 9999999)
+		sums_need_wide_columns = 1;
+
 	average.cores.c3 /= topo.num_cores;
 	average.cores.c6 /= topo.num_cores;
 	average.cores.c7 /= topo.num_cores;
@@ -1299,18 +1330,29 @@ void compute_average(struct thread_data *t, struct core_data *c,
 	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
 		if (mp->format == FORMAT_RAW)
 			continue;
-		if (mp->flags & SYSFS_PERCPU && mp->type == COUNTER_ITEMS)
+		if (mp->type == COUNTER_ITEMS) {
+			if (average.threads.counter[i] > 9999999)
+				sums_need_wide_columns = 1;
 			continue;
+		}
 		average.threads.counter[i] /= topo.num_cpus;
 	}
 	for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
 		if (mp->format == FORMAT_RAW)
 			continue;
+		if (mp->type == COUNTER_ITEMS) {
+			if (average.cores.counter[i] > 9999999)
+				sums_need_wide_columns = 1;
+		}
 		average.cores.counter[i] /= topo.num_cores;
 	}
 	for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
 		if (mp->format == FORMAT_RAW)
 			continue;
+		if (mp->type == COUNTER_ITEMS) {
+			if (average.packages.counter[i] > 9999999)
+				sums_need_wide_columns = 1;
+		}
 		average.packages.counter[i] /= topo.num_packages;
 	}
 }

From 6168c2e0fb5084d187aa8f3ec4093db5e161d4dc Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Thu, 16 Feb 2017 23:07:51 -0500
Subject: [PATCH 0393/1911] tools/power turbostat: update --list feature

Make it possible to take the entire un-edited output
from `turbostat --list` and feed it to "turbostat --show"
or "turbostat --hide".

To do this, the leading comma was removed
(no mater what columns are active)
and also they dynamic C-state "C1, C2, C3" etc are replaced
by the string "sysfs", which refers to them as a group.

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 219 +++++++++++++-------------
 1 file changed, 113 insertions(+), 106 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index cafc6bba653953..851eaf06f3589b 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -474,33 +474,38 @@ unsigned long long bic_lookup(char *name_list)
 void print_header(char *delim)
 {
 	struct msr_counter *mp;
+	int printed = 0;
 
 	if (DO_BIC(BIC_Package))
-		outp += sprintf(outp, "%sPackage", delim);
+		outp += sprintf(outp, "%sPackage", (printed++ ? delim : ""));
 	if (DO_BIC(BIC_Core))
-		outp += sprintf(outp, "%sCore", delim);
+		outp += sprintf(outp, "%sCore", (printed++ ? delim : ""));
 	if (DO_BIC(BIC_CPU))
-		outp += sprintf(outp, "%sCPU", delim);
+		outp += sprintf(outp, "%sCPU", (printed++ ? delim : ""));
 	if (DO_BIC(BIC_Avg_MHz))
-		outp += sprintf(outp, "%sAvg_MHz", delim);
+		outp += sprintf(outp, "%sAvg_MHz", (printed++ ? delim : ""));
 	if (DO_BIC(BIC_Busy))
-		outp += sprintf(outp, "%sBusy%%", delim);
+		outp += sprintf(outp, "%sBusy%%", (printed++ ? delim : ""));
 	if (DO_BIC(BIC_Bzy_MHz))
-		outp += sprintf(outp, "%sBzy_MHz", delim);
+		outp += sprintf(outp, "%sBzy_MHz", (printed++ ? delim : ""));
 	if (DO_BIC(BIC_TSC_MHz))
-		outp += sprintf(outp, "%sTSC_MHz", delim);
+		outp += sprintf(outp, "%sTSC_MHz", (printed++ ? delim : ""));
 
 	if (DO_BIC(BIC_IRQ)) {
 		if (sums_need_wide_columns)
-			outp += sprintf(outp, "%s     IRQ", delim);
+			outp += sprintf(outp, "%s     IRQ", (printed++ ? delim : ""));
 		else
-			outp += sprintf(outp, "%sIRQ", delim);
+			outp += sprintf(outp, "%sIRQ", (printed++ ? delim : ""));
 	}
 
 	if (DO_BIC(BIC_SMI))
-		outp += sprintf(outp, "%sSMI", delim);
+		outp += sprintf(outp, "%sSMI", (printed++ ? delim : ""));
 
 	for (mp = sys.tp; mp; mp = mp->next) {
+		if (*delim == ',') {
+			outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), "sysfs");
+			break;
+		}
 		if (mp->format == FORMAT_RAW) {
 			if (mp->width == 64)
 				outp += sprintf(outp, "%s%18.18s", delim, mp->name);
@@ -515,19 +520,19 @@ void print_header(char *delim)
 	}
 
 	if (DO_BIC(BIC_CPU_c1))
-		outp += sprintf(outp, "%sCPU%%c1", delim);
+		outp += sprintf(outp, "%sCPU%%c1", (printed++ ? delim : ""));
 	if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates)
-		outp += sprintf(outp, "%sCPU%%c3", delim);
+		outp += sprintf(outp, "%sCPU%%c3", (printed++ ? delim : ""));
 	if (DO_BIC(BIC_CPU_c6))
-		outp += sprintf(outp, "%sCPU%%c6", delim);
+		outp += sprintf(outp, "%sCPU%%c6", (printed++ ? delim : ""));
 	if (DO_BIC(BIC_CPU_c7))
-		outp += sprintf(outp, "%sCPU%%c7", delim);
+		outp += sprintf(outp, "%sCPU%%c7", (printed++ ? delim : ""));
 
 	if (DO_BIC(BIC_Mod_c6))
-		outp += sprintf(outp, "%sMod%%c6", delim);
+		outp += sprintf(outp, "%sMod%%c6", (printed++ ? delim : ""));
 
 	if (DO_BIC(BIC_CoreTmp))
-		outp += sprintf(outp, "%sCoreTmp", delim);
+		outp += sprintf(outp, "%sCoreTmp", (printed++ ? delim : ""));
 
 	for (mp = sys.cp; mp; mp = mp->next) {
 		if (mp->format == FORMAT_RAW) {
@@ -544,62 +549,62 @@ void print_header(char *delim)
 	}
 
 	if (DO_BIC(BIC_PkgTmp))
-		outp += sprintf(outp, "%sPkgTmp", delim);
+		outp += sprintf(outp, "%sPkgTmp", (printed++ ? delim : ""));
 
 	if (DO_BIC(BIC_GFX_rc6))
-		outp += sprintf(outp, "%sGFX%%rc6", delim);
+		outp += sprintf(outp, "%sGFX%%rc6", (printed++ ? delim : ""));
 
 	if (DO_BIC(BIC_GFXMHz))
-		outp += sprintf(outp, "%sGFXMHz", delim);
+		outp += sprintf(outp, "%sGFXMHz", (printed++ ? delim : ""));
 
 	if (do_skl_residency) {
-		outp += sprintf(outp, "%sTotl%%C0", delim);
-		outp += sprintf(outp, "%sAny%%C0", delim);
-		outp += sprintf(outp, "%sGFX%%C0", delim);
-		outp += sprintf(outp, "%sCPUGFX%%", delim);
+		outp += sprintf(outp, "%sTotl%%C0", (printed++ ? delim : ""));
+		outp += sprintf(outp, "%sAny%%C0", (printed++ ? delim : ""));
+		outp += sprintf(outp, "%sGFX%%C0", (printed++ ? delim : ""));
+		outp += sprintf(outp, "%sCPUGFX%%", (printed++ ? delim : ""));
 	}
 
 	if (DO_BIC(BIC_Pkgpc2))
-		outp += sprintf(outp, "%sPkg%%pc2", delim);
+		outp += sprintf(outp, "%sPkg%%pc2", (printed++ ? delim : ""));
 	if (DO_BIC(BIC_Pkgpc3))
-		outp += sprintf(outp, "%sPkg%%pc3", delim);
+		outp += sprintf(outp, "%sPkg%%pc3", (printed++ ? delim : ""));
 	if (DO_BIC(BIC_Pkgpc6))
-		outp += sprintf(outp, "%sPkg%%pc6", delim);
+		outp += sprintf(outp, "%sPkg%%pc6", (printed++ ? delim : ""));
 	if (DO_BIC(BIC_Pkgpc7))
-		outp += sprintf(outp, "%sPkg%%pc7", delim);
+		outp += sprintf(outp, "%sPkg%%pc7", (printed++ ? delim : ""));
 	if (DO_BIC(BIC_Pkgpc8))
-		outp += sprintf(outp, "%sPkg%%pc8", delim);
+		outp += sprintf(outp, "%sPkg%%pc8", (printed++ ? delim : ""));
 	if (DO_BIC(BIC_Pkgpc9))
-		outp += sprintf(outp, "%sPkg%%pc9", delim);
+		outp += sprintf(outp, "%sPkg%%pc9", (printed++ ? delim : ""));
 	if (DO_BIC(BIC_Pkgpc10))
-		outp += sprintf(outp, "%sPk%%pc10", delim);
+		outp += sprintf(outp, "%sPk%%pc10", (printed++ ? delim : ""));
 
 	if (do_rapl && !rapl_joules) {
 		if (DO_BIC(BIC_PkgWatt))
-			outp += sprintf(outp, "%sPkgWatt", delim);
+			outp += sprintf(outp, "%sPkgWatt", (printed++ ? delim : ""));
 		if (DO_BIC(BIC_CorWatt))
-			outp += sprintf(outp, "%sCorWatt", delim);
+			outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : ""));
 		if (DO_BIC(BIC_GFXWatt))
-			outp += sprintf(outp, "%sGFXWatt", delim);
+			outp += sprintf(outp, "%sGFXWatt", (printed++ ? delim : ""));
 		if (DO_BIC(BIC_RAMWatt))
-			outp += sprintf(outp, "%sRAMWatt", delim);
+			outp += sprintf(outp, "%sRAMWatt", (printed++ ? delim : ""));
 		if (DO_BIC(BIC_PKG__))
-			outp += sprintf(outp, "%sPKG_%%", delim);
+			outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : ""));
 		if (DO_BIC(BIC_RAM__))
-			outp += sprintf(outp, "%sRAM_%%", delim);
+			outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : ""));
 	} else if (do_rapl && rapl_joules) {
 		if (DO_BIC(BIC_Pkg_J))
-			outp += sprintf(outp, "%sPkg_J", delim);
+			outp += sprintf(outp, "%sPkg_J", (printed++ ? delim : ""));
 		if (DO_BIC(BIC_Cor_J))
-			outp += sprintf(outp, "%sCor_J", delim);
+			outp += sprintf(outp, "%sCor_J", (printed++ ? delim : ""));
 		if (DO_BIC(BIC_GFX_J))
-			outp += sprintf(outp, "%sGFX_J", delim);
+			outp += sprintf(outp, "%sGFX_J", (printed++ ? delim : ""));
 		if (DO_BIC(BIC_RAM_J))
-			outp += sprintf(outp, "%sRAM_J", delim);
+			outp += sprintf(outp, "%sRAM_J", (printed++ ? delim : ""));
 		if (DO_BIC(BIC_PKG__))
-			outp += sprintf(outp, "%sPKG_%%", delim);
+			outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : ""));
 		if (DO_BIC(BIC_RAM__))
-			outp += sprintf(outp, "%sRAM_%%", delim);
+			outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : ""));
 	}
 	for (mp = sys.pp; mp; mp = mp->next) {
 		if (mp->format == FORMAT_RAW) {
@@ -708,6 +713,8 @@ int format_counters(struct thread_data *t, struct core_data *c,
 	char *fmt8;
 	int i;
 	struct msr_counter *mp;
+	char *delim = "\t";
+	int printed = 0;
 
 	 /* if showing only 1st thread in core and this isn't one, bail out */
 	if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
@@ -729,81 +736,81 @@ int format_counters(struct thread_data *t, struct core_data *c,
 	/* topo columns, print blanks on 1st (average) line */
 	if (t == &average.threads) {
 		if (DO_BIC(BIC_Package))
-			outp += sprintf(outp, "\t-");
+			outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
 		if (DO_BIC(BIC_Core))
-			outp += sprintf(outp, "\t-");
+			outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
 		if (DO_BIC(BIC_CPU))
-			outp += sprintf(outp, "\t-");
+			outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
 	} else {
 		if (DO_BIC(BIC_Package)) {
 			if (p)
-				outp += sprintf(outp, "\t%d", p->package_id);
+				outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->package_id);
 			else
-				outp += sprintf(outp, "\t-");
+				outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
 		}
 		if (DO_BIC(BIC_Core)) {
 			if (c)
-				outp += sprintf(outp, "\t%d", c->core_id);
+				outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_id);
 			else
-				outp += sprintf(outp, "\t-");
+				outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
 		}
 		if (DO_BIC(BIC_CPU))
-			outp += sprintf(outp, "\t%d", t->cpu_id);
+			outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->cpu_id);
 	}
 
 	if (DO_BIC(BIC_Avg_MHz))
-		outp += sprintf(outp, "\t%.0f",
+		outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""),
 			1.0 / units * t->aperf / interval_float);
 
 	if (DO_BIC(BIC_Busy))
-		outp += sprintf(outp, "\t%.2f", 100.0 * t->mperf/tsc);
+		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->mperf/tsc);
 
 	if (DO_BIC(BIC_Bzy_MHz)) {
 		if (has_base_hz)
-			outp += sprintf(outp, "\t%.0f", base_hz / units * t->aperf / t->mperf);
+			outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), base_hz / units * t->aperf / t->mperf);
 		else
-			outp += sprintf(outp, "\t%.0f",
+			outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""),
 				tsc / units * t->aperf / t->mperf / interval_float);
 	}
 
 	if (DO_BIC(BIC_TSC_MHz))
-		outp += sprintf(outp, "\t%.0f", 1.0 * t->tsc/units/interval_float);
+		outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 * t->tsc/units/interval_float);
 
 	/* IRQ */
 	if (DO_BIC(BIC_IRQ)) {
 		if (sums_need_wide_columns)
-			outp += sprintf(outp, "\t%8lld", t->irq_count);
+			outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->irq_count);
 		else
-			outp += sprintf(outp, "\t%lld", t->irq_count);
+			outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->irq_count);
 	}
 
 	/* SMI */
 	if (DO_BIC(BIC_SMI))
-		outp += sprintf(outp, "\t%d", t->smi_count);
+		outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->smi_count);
 
 	/* Added counters */
 	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
 		if (mp->format == FORMAT_RAW) {
 			if (mp->width == 32)
-				outp += sprintf(outp, "\t0x%08lx", (unsigned long) t->counter[i]);
+				outp += sprintf(outp, "%s0x%08lx", (printed++ ? delim : ""), (unsigned long) t->counter[i]);
 			else
-				outp += sprintf(outp, "\t0x%016llx", t->counter[i]);
+				outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->counter[i]);
 		} else if (mp->format == FORMAT_DELTA) {
 			if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
-				outp += sprintf(outp, "\t%8lld", t->counter[i]);
+				outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->counter[i]);
 			else
-				outp += sprintf(outp, "\t%lld", t->counter[i]);
+				outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->counter[i]);
 		} else if (mp->format == FORMAT_PERCENT) {
 			if (mp->type == COUNTER_USEC)
-				outp += sprintf(outp, "\t%.2f", t->counter[i]/interval_float/10000);
+				outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), t->counter[i]/interval_float/10000);
 			else
-				outp += sprintf(outp, "\t%.2f", 100.0 * t->counter[i]/tsc);
+				outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->counter[i]/tsc);
 		}
 	}
 
 	/* C1 */
 	if (DO_BIC(BIC_CPU_c1))
-		outp += sprintf(outp, "\t%.2f", 100.0 * t->c1/tsc);
+		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->c1/tsc);
 
 
 	/* print per-core data only for 1st thread in core */
@@ -811,32 +818,32 @@ int format_counters(struct thread_data *t, struct core_data *c,
 		goto done;
 
 	if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates)
-		outp += sprintf(outp, "\t%.2f", 100.0 * c->c3/tsc);
+		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c3/tsc);
 	if (DO_BIC(BIC_CPU_c6))
-		outp += sprintf(outp, "\t%.2f", 100.0 * c->c6/tsc);
+		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c6/tsc);
 	if (DO_BIC(BIC_CPU_c7))
-		outp += sprintf(outp, "\t%.2f", 100.0 * c->c7/tsc);
+		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c7/tsc);
 
 	/* Mod%c6 */
 	if (DO_BIC(BIC_Mod_c6))
-		outp += sprintf(outp, "\t%.2f", 100.0 * c->mc6_us / tsc);
+		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->mc6_us / tsc);
 
 	if (DO_BIC(BIC_CoreTmp))
-		outp += sprintf(outp, "\t%d", c->core_temp_c);
+		outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_temp_c);
 
 	for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
 		if (mp->format == FORMAT_RAW) {
 			if (mp->width == 32)
-				outp += sprintf(outp, "\t0x%08lx", (unsigned long) c->counter[i]);
+				outp += sprintf(outp, "%s0x%08lx", (printed++ ? delim : ""), (unsigned long) c->counter[i]);
 			else
-				outp += sprintf(outp, "\t0x%016llx", c->counter[i]);
+				outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->counter[i]);
 		} else if (mp->format == FORMAT_DELTA) {
 			if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
-				outp += sprintf(outp, "\t%8lld", c->counter[i]);
+				outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), c->counter[i]);
 			else
-				outp += sprintf(outp, "\t%lld", c->counter[i]);
+				outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->counter[i]);
 		} else if (mp->format == FORMAT_PERCENT) {
-			outp += sprintf(outp, "\t%.2f", 100.0 * c->counter[i]/tsc);
+			outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->counter[i]/tsc);
 		}
 	}
 
@@ -846,88 +853,88 @@ int format_counters(struct thread_data *t, struct core_data *c,
 
 	/* PkgTmp */
 	if (DO_BIC(BIC_PkgTmp))
-		outp += sprintf(outp, "\t%d", p->pkg_temp_c);
+		outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->pkg_temp_c);
 
 	/* GFXrc6 */
 	if (DO_BIC(BIC_GFX_rc6)) {
 		if (p->gfx_rc6_ms == -1) {	/* detect GFX counter reset */
-			outp += sprintf(outp, "\t**.**");
+			outp += sprintf(outp, "%s**.**", (printed++ ? delim : ""));
 		} else {
-			outp += sprintf(outp, "\t%.2f",
+			outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
 				p->gfx_rc6_ms / 10.0 / interval_float);
 		}
 	}
 
 	/* GFXMHz */
 	if (DO_BIC(BIC_GFXMHz))
-		outp += sprintf(outp, "\t%d", p->gfx_mhz);
+		outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_mhz);
 
 	/* Totl%C0, Any%C0 GFX%C0 CPUGFX% */
 	if (do_skl_residency) {
-		outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_wtd_core_c0/tsc);
-		outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_any_core_c0/tsc);
-		outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_any_gfxe_c0/tsc);
-		outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_both_core_gfxe_c0/tsc);
+		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0/tsc);
+		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_core_c0/tsc);
+		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_gfxe_c0/tsc);
+		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_both_core_gfxe_c0/tsc);
 	}
 
 	if (DO_BIC(BIC_Pkgpc2))
-		outp += sprintf(outp, "\t%.2f", 100.0 * p->pc2/tsc);
+		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc2/tsc);
 	if (DO_BIC(BIC_Pkgpc3))
-		outp += sprintf(outp, "\t%.2f", 100.0 * p->pc3/tsc);
+		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc3/tsc);
 	if (DO_BIC(BIC_Pkgpc6))
-		outp += sprintf(outp, "\t%.2f", 100.0 * p->pc6/tsc);
+		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc6/tsc);
 	if (DO_BIC(BIC_Pkgpc7))
-		outp += sprintf(outp, "\t%.2f", 100.0 * p->pc7/tsc);
+		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc7/tsc);
 	if (DO_BIC(BIC_Pkgpc8))
-		outp += sprintf(outp, "\t%.2f", 100.0 * p->pc8/tsc);
+		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc8/tsc);
 	if (DO_BIC(BIC_Pkgpc9))
-		outp += sprintf(outp, "\t%.2f", 100.0 * p->pc9/tsc);
+		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc9/tsc);
 	if (DO_BIC(BIC_Pkgpc10))
-		outp += sprintf(outp, "\t%.2f", 100.0 * p->pc10/tsc);
+		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10/tsc);
 
 	/*
  	 * If measurement interval exceeds minimum RAPL Joule Counter range,
  	 * indicate that results are suspect by printing "**" in fraction place.
  	 */
 	if (interval_float < rapl_joule_counter_range)
-		fmt8 = "\t%.2f";
+		fmt8 = "%s%.2f";
 	else
 		fmt8 = "%6.0f**";
 
 	if (DO_BIC(BIC_PkgWatt))
-		outp += sprintf(outp, fmt8, p->energy_pkg * rapl_energy_units / interval_float);
+		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units / interval_float);
 	if (DO_BIC(BIC_CorWatt))
-		outp += sprintf(outp, fmt8, p->energy_cores * rapl_energy_units / interval_float);
+		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units / interval_float);
 	if (DO_BIC(BIC_GFXWatt))
-		outp += sprintf(outp, fmt8, p->energy_gfx * rapl_energy_units / interval_float);
+		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units / interval_float);
 	if (DO_BIC(BIC_RAMWatt))
-		outp += sprintf(outp, fmt8, p->energy_dram * rapl_dram_energy_units / interval_float);
+		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_dram * rapl_dram_energy_units / interval_float);
 	if (DO_BIC(BIC_Pkg_J))
-		outp += sprintf(outp, fmt8, p->energy_pkg * rapl_energy_units);
+		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units);
 	if (DO_BIC(BIC_Cor_J))
-		outp += sprintf(outp, fmt8, p->energy_cores * rapl_energy_units);
+		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units);
 	if (DO_BIC(BIC_GFX_J))
-		outp += sprintf(outp, fmt8, p->energy_gfx * rapl_energy_units);
+		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units);
 	if (DO_BIC(BIC_RAM_J))
-		outp += sprintf(outp, fmt8, p->energy_dram * rapl_dram_energy_units);
+		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_dram * rapl_dram_energy_units);
 	if (DO_BIC(BIC_PKG__))
-		outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
+		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
 	if (DO_BIC(BIC_RAM__))
-		outp += sprintf(outp, fmt8, 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
+		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
 
 	for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
 		if (mp->format == FORMAT_RAW) {
 			if (mp->width == 32)
-				outp += sprintf(outp, "\t0x%08lx", (unsigned long) p->counter[i]);
+				outp += sprintf(outp, "%s0x%08lx", (printed++ ? delim : ""), (unsigned long) p->counter[i]);
 			else
-				outp += sprintf(outp, "\t0x%016llx", p->counter[i]);
+				outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->counter[i]);
 		} else if (mp->format == FORMAT_DELTA) {
 			if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
-				outp += sprintf(outp, "\t%8lld", p->counter[i]);
+				outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), p->counter[i]);
 			else
-				outp += sprintf(outp, "\t%lld", p->counter[i]);
+				outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), p->counter[i]);
 		} else if (mp->format == FORMAT_PERCENT) {
-			outp += sprintf(outp, "\t%.2f", 100.0 * p->counter[i]/tsc);
+			outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->counter[i]/tsc);
 		}
 	}
 

From da67e2b9fd1d846a41978690da0a899d8e4378ec Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Wed, 15 Feb 2017 00:30:22 -0500
Subject: [PATCH 0394/1911] tools/power turbostat: turbostat.8 update

update examples to show recently updated features.
In particular
--add
--show
--hide
--cpu
--list

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.8 | 238 +++++++++++++++-----------
 1 file changed, 140 insertions(+), 98 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index efe6a7147ff23b..b1b1ab80102c82 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -16,9 +16,9 @@ idle power-state statistics, temperature and power on X86 processors.
 There are two ways to invoke turbostat.
 The first method is to supply a
 \fBcommand\fP, which is forked and statistics are printed
-upon its completion.
+in one-shot upon its completion.
 The second method is to omit the command,
-and turbostat displays statistics every 5 seconds.
+and turbostat displays statistics every 5 seconds interval.
 The 5-second interval can be changed using the --interval option.
 .PP
 Some information is not available on older processors.
@@ -28,9 +28,10 @@ name as necessary to disambiguate it from others is necessary.  Note that option
 .PP
 \fB--add attributes\fP add column with counter having specified 'attributes'.  The 'location' attribute is required, all others are optional.
 .nf
-	location: {\fBmsrDDD\fP | \fBmsr0xXXX\fP}
+	location: {\fBmsrDDD\fP | \fBmsr0xXXX\fP | \fB/sys/path...\fP}
 		msrDDD is a decimal offset, eg. msr16
 		msr0xXXX is a hex offset, eg. msr0x10
+		/sys/path... is an absolute path to a sysfs attribute
 
 	scope: {\fBcpu\fP | \fBcore\fP | \fBpackage\fP}
 		sample and print the counter for every cpu, core, or package.
@@ -45,6 +46,10 @@ name as necessary to disambiguate it from others is necessary.  Note that option
 		'delta' shows the difference in values during the measurement interval.
 		'percent' shows the delta as a percentage of the cycles elapsed.
 		default: delta
+
+	name: "name_string"
+		Any string that does not match a key-word above is used
+		as the column header.
 .fi
 .PP
 \fB--cpu cpu-set\fP limit output to system summary plus the specified cpu-set.  cpu-set is a comma delimited list of cpu ranges. cpu ranges can be individual cpu numbers or start and end numbers, separated by ".." or '-'.  eg. 1,2,8,14..17,21-44
@@ -68,6 +73,8 @@ The file is truncated if it already exists, and it is created if it does not exi
 .PP
 \fB--Package\fP limits output to the system summary plus the 1st thread in each Package.
 .PP
+\fB--list\fP display column header names available for use by --show and --hide, then exit.
+.PP
 \fB--processor\fP limits output to the system summary plus the 1st thread in each processor of each package.  Ie. it skips hyper-threaded siblings.
 .PP
 \fB--Summary\fP limits output to a 1-line System Summary for each interval.
@@ -79,24 +86,25 @@ The file is truncated if it already exists, and it is created if it does not exi
 The \fBcommand\fP parameter forks \fBcommand\fP, and upon its exit,
 displays the statistics gathered since it was forked.
 .PP
-.SH DEFAULT FIELD DESCRIPTIONS
+.SH ROW DESCRIPTIONS
+The system configuration dump (if --quiet is not used) is followed by statistics.  The first row of the statistics labels the content of each column (below).  The second row of statistics is the system summary line.  The system summary line has a '-' in the columns for the Package, Core, and CPU.  The contents of the system summary line depends on the type of column.  Columns that count items (eg. IRQ) show the sum across all CPUs in the system.  Columns that show a percentage show the average across all CPUs in the system.  Columns that dump raw MSR values simply show 0 in the summary.  After the system summary row, each row describes a specific Package/Core/CPU.  Note that if the --cpu parameter is used to limit which specific CPUs are displayed, turbostat will still collect statistics for all CPUs in the system and will still show the system summary for all CPUs in the system.
+.SH COLUMN DESCRIPTIONS
 .nf
+\fBCore\fP processor core number.  Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading Technology (HT).
 \fBCPU\fP Linux CPU (logical processor) number.  Yes, it is okay that on many systems the CPUs are not listed in numerical order -- for efficiency reasons, turbostat runs in topology order, so HT siblings appear together.
-\fBAVG_MHz\fP number of cycles executed divided by time elapsed.
-\fBBusy%\fP percent of the interval that the CPU retired instructions, aka. % of time in "C0" state.
-\fBBzy_MHz\fP average clock rate while the CPU was busy (in "c0" state).
+\fBPackage\fP processor package number -- not present on systems with a single processor package.
+\fBAvg_MHz\fP number of cycles executed divided by time elapsed.  Note that this includes idle-time when 0 instructions are executed.
+\fBBusy%\fP percent of the measurement interval that the CPU executes instructions, aka. % of time in "C0" state.
+\fBBzy_MHz\fP average clock rate while the CPU was not idle (ie. in "c0" state).
 \fBTSC_MHz\fP average MHz that the TSC ran during the entire interval.
-.fi
-.PP
-.SH DEBUG FIELD DESCRIPTIONS
-.nf
-\fBPackage\fP processor package number.
-\fBCore\fP processor core number.
-Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading Technology (HT).
-\fBCPU%c1, CPU%c3, CPU%c6, CPU%c7\fP show the percentage residency in hardware core idle states.
+\fBIRQ\fP The number of interrupts serviced by that CPU during the measurement interval.  The system total line is the sum of interrupts serviced across all CPUs.  turbostat parses /proc/interrupts to generate this summary.
+\fBSMI\fP The number of System Management Interrupts  serviced CPU during the measurement interval.  While this counter is actually per-CPU, SMI are triggered on all processors, so the number should be the same for all CPUs.
+\fBC1, C2, C3...\fP The number times Linux requested the C1, C2, C3 idle state during the measurement interval.  The system summary line shows the sum for all CPUs.  These are C-state names as exported in /sys/devices/system/cpu/cpu*/cpuidle/state*/name.  While their names are generic, their attributes are processor specific. They the system description section of output shows what MWAIT sub-states they are mapped to on each system.
+\fBC1%, C2%, C3%\fP The residency percentage that Linux requested C1, C2, C3....  The system summary is the average of all CPUs in the system.  Note that these are software, reflecting what was requested.  The hardware counters reflect what was actually achieved.
+\fBCPU%c1, CPU%c3, CPU%c6, CPU%c7\fP show the percentage residency in hardware core idle states.  These numbers are from hardware residency counters.
 \fBCoreTmp\fP Degrees Celsius reported by the per-core Digital Thermal Sensor.
 \fBPkgTtmp\fP Degrees Celsius reported by the per-package Package Thermal Monitor.
-\fBPkg%pc2, Pkg%pc3, Pkg%pc6, Pkg%pc7\fP percentage residency in hardware package idle states.
+\fBPkg%pc2, Pkg%pc3, Pkg%pc6, Pkg%pc7\fP percentage residency in hardware package idle states.  These numbers are from hardware residency counters.
 \fBPkgWatt\fP Watts consumed by the whole package.
 \fBCorWatt\fP Watts consumed by the core part of the package.
 \fBGFXWatt\fP Watts consumed by the Graphics part of the package -- available only on client processors.
@@ -104,50 +112,110 @@ Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading T
 \fBPKG_%\fP percent of the interval that RAPL throttling was active on the Package.
 \fBRAM_%\fP percent of the interval that RAPL throttling was active on DRAM.
 .fi
+.SH TOO MUCH INFORMATION EXAMPLE
+By default, turbostat dumps all possible information -- a system configuration header, followed by columns for all counters.
+This is ideal for remote debugging, use the "--out" option to save everything to a text file, and get that file to the expert helping you debug.
 .PP
-.SH PERIODIC EXAMPLE
-Without any parameters, turbostat displays statistics ever 5 seconds.
-Periodic output goes to stdout, by default, unless --out is used to specify an output file.
-The 5-second interval can be changed with th "-i sec" option.
-Or a command may be specified as in "FORK EXAMPLE" below.
+When you are not interested in all that information, and there are several ways to see only what you want.  First the "--quiet" option will skip the configuration information, and turbostat will show only the counter columns.  Second, you can reduce the columns with the "--hide" and "--show" options.  If you use the "--show" option, then turbostat will show only the columns you list.  If you use the "--hide" option, turbostat will show all columns, except the ones you list.
+.PP
+To find out what columns are available for --show and --hide, the "--list" option is available.  Note, however, there is an exception.  The C-state columns collected from sysfs "C1,C2,C3,C1%,C2%,C3%" are not built-in counters, but are discovered after --show and --hide are processed.  You can use the special counter name "sysfs" to refer to all of them at the same time.
+.nf
+sudo ./turbostat --show sysfs --quiet sleep 10
+10.003837 sec
+	C1	C1E	C3	C6	C7s	C1%	C1E%	C3%	C6%	C7s%
+	4	21	2	2	459	0.14	0.82	0.00	0.00	98.93
+	1	17	2	2	130	0.00	0.02	0.00	0.00	99.80
+	0	0	0	0	31	0.00	0.00	0.00	0.00	99.95
+	2	1	0	0	52	1.14	6.49	0.00	0.00	92.21
+	1	2	0	0	52	0.00	0.08	0.00	0.00	99.86
+	0	0	0	0	71	0.00	0.00	0.00	0.00	99.89
+	0	0	0	0	25	0.00	0.00	0.00	0.00	99.96
+	0	0	0	0	74	0.00	0.00	0.00	0.00	99.94
+	0	1	0	0	24	0.00	0.00	0.00	0.00	99.84
+.fi
+.PP
+.SH ONE SHOT COMMAND EXAMPLE
+If turbostat is invoked with a command, it will fork that command
+and output the statistics gathered after the command exits.
+In this case, turbostat output goes to stderr, by default.
+Output can instead be saved to a file using the --out option.
+In this example, the "sleep 10" command is forked, and turbostat waits for it to complete before saving all statistics into "ts.out".  Note that "sleep 10" is not part of turbostat, but is simply an example of a command that turbostat can fork.  The "ts.out" file is what you want to edit in a very wide window, paste into a spreadsheet, or attach to a bugzilla entry.
+
 .nf
-[root@hsw]# ./turbostat
-     CPU Avg_MHz   Busy% Bzy_MHz TSC_MHz
-       -     488   12.51    3898    3498
-       0       0    0.01    3885    3498
-       4    3897   99.99    3898    3498
-       1       0    0.00    3861    3498
-       5       0    0.00    3882    3498
-       2       1    0.02    3894    3498
-       6       2    0.06    3898    3498
-       3       0    0.00    3849    3498
-       7       0    0.00    3877    3498
+[root@hsw]# ./turbostat -o ts.out sleep 10
+[root@hsw]#
+.fi
 
+.SH PERIODIC INTERVAL EXAMPLE
+Without a command to fork, turbostat displays statistics ever 5 seconds.
+Periodic output goes to stdout, by default, unless --out is used to specify an output file.
+The 5-second interval can be changed with the "-i sec" option.
+.nf
+sudo ./turbostat --quiet --hide sysfs,IRQ,SMI,CoreTmp,PkgTmp,GFX%rc6,GFXMHz,PkgWatt,CorWatt,GFXWatt
+	Core	CPU	Avg_MHz	Busy%	Bzy_MHz	TSC_MHz	CPU%c1	CPU%c3	CPU%c6	CPU%c7
+	-	-	488	12.52	3900	3498	12.50	0.00	0.00	74.98
+	0	0	5	0.13	3900	3498	99.87	0.00	0.00	0.00
+	0	4	3897	99.99	3900	3498	0.01
+	1	1	0	0.00	3856	3498	0.01	0.00	0.00	99.98
+	1	5	0	0.00	3861	3498	0.01
+	2	2	1	0.02	3889	3498	0.03	0.00	0.00	99.95
+	2	6	0	0.00	3863	3498	0.05
+	3	3	0	0.01	3869	3498	0.02	0.00	0.00	99.97
+	3	7	0	0.00	3878	3498	0.03
+	Core	CPU	Avg_MHz	Busy%	Bzy_MHz	TSC_MHz	CPU%c1	CPU%c3	CPU%c6	CPU%c7
+	-	-	491	12.59	3900	3498	12.42	0.00	0.00	74.99
+	0	0	27	0.69	3900	3498	99.31	0.00	0.00	0.00
+	0	4	3898	99.99	3900	3498	0.01
+	1	1	0	0.00	3883	3498	0.01	0.00	0.00	99.99
+	1	5	0	0.00	3898	3498	0.01
+	2	2	0	0.01	3889	3498	0.02	0.00	0.00	99.98
+	2	6	0	0.00	3889	3498	0.02
+	3	3	0	0.00	3856	3498	0.01	0.00	0.00	99.99
+	3	7	0	0.00	3897	3498	0.01
 .fi
-.SH DEBUG EXAMPLE
+This example also shows the use of the --hide option to skip columns that are not wanted.
+Note that cpu4 in this example is 99.99% busy, while the other CPUs are all under 1% busy.
+Notice that cpu4's HT sibling is cpu0, which is under 1% busy, but can get into CPU%c1 only,
+because its cpu4's activity on shared hardware keeps it from entering a deeper C-state.
+
+.SH SYSTEM CONFIGURATION INFORMATION EXAMPLE
 
-The first row of statistics is a summary for the entire system.
-For residency % columns, the summary is a weighted average.
-For Temperature columns, the summary is the column maximum.
-For Watts columns, the summary is a system total.
-Subsequent rows show per-CPU statistics.
+By default, turbostat always dumps system configuration information
+before taking measurements.  In the example above, "--quiet" is used
+to suppress that output.  Here is an example of the configuration information:
 .nf
-turbostat version 4.1 10-Feb, 2015 - Len Brown <lenb@kernel.org>
+turbostat version 2017.02.15 - Len Brown <lenb@kernel.org>
 CPUID(0): GenuineIntel 13 CPUID levels; family:model:stepping 0x6:3c:3 (6:60:3)
-CPUID(6): APERF, DTS, PTM, EPB
+CPUID(1): SSE3 MONITOR - EIST TM2 TSC MSR ACPI-TM TM
+CPUID(6): APERF, TURBO, DTS, PTM, No-HWP, No-HWPnotify, No-HWPwindow, No-HWPepp, No-HWPpkg, EPB
+cpu4: MSR_IA32_MISC_ENABLE: 0x00850089 (TCC EIST No-MWAIT PREFETCH TURBO)
+CPUID(7): No-SGX
+cpu4: MSR_MISC_PWR_MGMT: 0x00400000 (ENable-EIST_Coordination DISable-EPB DISable-OOB)
 RAPL: 3121 sec. Joule Counter Range, at 84 Watts
-cpu0: MSR_NHM_PLATFORM_INFO: 0x80838f3012300
-8 * 100 = 800 MHz max efficiency
-35 * 100 = 3500 MHz TSC frequency
-cpu0: MSR_IA32_POWER_CTL: 0x0004005d (C1E auto-promotion: DISabled)
-cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x1e000400 (UNdemote-C3, UNdemote-C1, demote-C3, demote-C1, UNlocked: pkg-cstate-limit=0: pc0)
-cpu0: MSR_TURBO_RATIO_LIMIT: 0x25262727
-37 * 100 = 3700 MHz max turbo 4 active cores
-38 * 100 = 3800 MHz max turbo 3 active cores
-39 * 100 = 3900 MHz max turbo 2 active cores
-39 * 100 = 3900 MHz max turbo 1 active cores
+cpu4: MSR_PLATFORM_INFO: 0x80838f3012300
+8 * 100.0 = 800.0 MHz max efficiency frequency
+35 * 100.0 = 3500.0 MHz base frequency
+cpu4: MSR_IA32_POWER_CTL: 0x0004005d (C1E auto-promotion: DISabled)
+cpu4: MSR_TURBO_RATIO_LIMIT: 0x25262727
+37 * 100.0 = 3700.0 MHz max turbo 4 active cores
+38 * 100.0 = 3800.0 MHz max turbo 3 active cores
+39 * 100.0 = 3900.0 MHz max turbo 2 active cores
+39 * 100.0 = 3900.0 MHz max turbo 1 active cores
+cpu4: MSR_CONFIG_TDP_NOMINAL: 0x00000023 (base_ratio=35)
+cpu4: MSR_CONFIG_TDP_LEVEL_1: 0x00000000 ()
+cpu4: MSR_CONFIG_TDP_LEVEL_2: 0x00000000 ()
+cpu4: MSR_CONFIG_TDP_CONTROL: 0x80000000 ( lock=1)
+cpu4: MSR_TURBO_ACTIVATION_RATIO: 0x00000000 (MAX_NON_TURBO_RATIO=0 lock=0)
+cpu4: MSR_PKG_CST_CONFIG_CONTROL: 0x1e000400 (UNdemote-C3, UNdemote-C1, demote-C3, demote-C1, UNlocked: pkg-cstate-limit=0: pc0)
+cpu4: POLL: CPUIDLE CORE POLL IDLE
+cpu4: C1: MWAIT 0x00
+cpu4: C1E: MWAIT 0x01
+cpu4: C3: MWAIT 0x10
+cpu4: C6: MWAIT 0x20
+cpu4: C7s: MWAIT 0x32
+cpu4: MSR_MISC_FEATURE_CONTROL: 0x00000000 (L2-Prefetch L2-Prefetch-pair L1-Prefetch L1-IP-Prefetch)
 cpu0: MSR_IA32_ENERGY_PERF_BIAS: 0x00000006 (balanced)
-cpu0: MSR_CORE_PERF_LIMIT_REASONS, 0x31200000 (Active: ) (Logged: Auto-HWP, Amps, MultiCoreTurbo, Transitions, )
+cpu0: MSR_CORE_PERF_LIMIT_REASONS, 0x31200000 (Active: ) (Logged: Transitions, MultiCoreTurbo, Amps, Auto-HWP, )
 cpu0: MSR_GFX_PERF_LIMIT_REASONS, 0x00000000 (Active: ) (Logged: )
 cpu0: MSR_RING_PERF_LIMIT_REASONS, 0x0d000000 (Active: ) (Logged: Amps, PkgPwrL1, PkgPwrL2, )
 cpu0: MSR_RAPL_POWER_UNIT: 0x000a0e03 (0.125000 Watts, 0.000061 Joules, 0.000977 sec.)
@@ -162,23 +230,14 @@ cpu0: MSR_PP1_POLICY: 0
 cpu0: MSR_PP1_POWER_LIMIT: 0x00000000 (UNlocked)
 cpu0: GFX Limit: DISabled (0.000000 Watts, 0.000977 sec, clamp DISabled)
 cpu0: MSR_IA32_TEMPERATURE_TARGET: 0x00641400 (100 C)
-cpu0: MSR_IA32_PACKAGE_THERM_STATUS: 0x88340800 (48 C)
-cpu0: MSR_IA32_THERM_STATUS: 0x88340000 (48 C +/- 1)
-cpu1: MSR_IA32_THERM_STATUS: 0x88440000 (32 C +/- 1)
-cpu2: MSR_IA32_THERM_STATUS: 0x88450000 (31 C +/- 1)
-cpu3: MSR_IA32_THERM_STATUS: 0x88490000 (27 C +/- 1)
-    Core     CPU Avg_MHz   Busy% Bzy_MHz TSC_MHz     SMI  CPU%c1  CPU%c3  CPU%c6  CPU%c7 CoreTmp  PkgTmp PkgWatt CorWatt GFXWatt
-       -       -     493   12.64    3898    3498       0   12.64    0.00    0.00   74.72      47      47   21.62   13.74    0.00
-       0       0       4    0.11    3894    3498       0   99.89    0.00    0.00    0.00      47      47   21.62   13.74    0.00
-       0       4    3897   99.98    3898    3498       0    0.02
-       1       1       7    0.17    3887    3498       0    0.04    0.00    0.00   99.79      32
-       1       5       0    0.00    3885    3498       0    0.21
-       2       2      29    0.76    3895    3498       0    0.10    0.01    0.01   99.13      32
-       2       6       2    0.06    3896    3498       0    0.80
-       3       3       1    0.02    3832    3498       0    0.03    0.00    0.00   99.95      28
-       3       7       0    0.00    3879    3498       0    0.04
-^C
-
+cpu0: MSR_IA32_PACKAGE_THERM_STATUS: 0x884c0800 (24 C)
+cpu0: MSR_IA32_THERM_STATUS: 0x884c0000 (24 C +/- 1)
+cpu1: MSR_IA32_THERM_STATUS: 0x88510000 (19 C +/- 1)
+cpu2: MSR_IA32_THERM_STATUS: 0x884e0000 (22 C +/- 1)
+cpu3: MSR_IA32_THERM_STATUS: 0x88510000 (19 C +/- 1)
+cpu4: MSR_PKGC3_IRTL: 0x00008842 (valid, 67584 ns)
+cpu4: MSR_PKGC6_IRTL: 0x00008873 (valid, 117760 ns)
+cpu4: MSR_PKGC7_IRTL: 0x00008891 (valid, 148480 ns)
 .fi
 The \fBmax efficiency\fP frequency, a.k.a. Low Frequency Mode, is the frequency
 available at the minimum package voltage.  The \fBTSC frequency\fP is the base
@@ -188,39 +247,22 @@ should be sustainable on all CPUs indefinitely, given nominal power and cooling.
 The remaining rows show what maximum turbo frequency is possible
 depending on the number of idle cores.  Note that not all information is
 available on all processors.
-.SH FORK EXAMPLE
-If turbostat is invoked with a command, it will fork that command
-and output the statistics gathered after the command exits.
-In this case, turbostat output goes to stderr, by default.
-Output can instead be saved to a file using the --out option.
-eg. Here a cycle soaker is run on 1 CPU (see %c0) for a few seconds
-until ^C while the other CPUs are mostly idle:
-
+.SH ADD COUNTER EXAMPLE
+Here we limit turbostat to showing just the CPU number for cpu0 - cpu3.
+We add a counter showing the 32-bit raw value of MSR 0x199 (MSR_IA32_PERF_CTL),
+labeling it with the column header, "PRF_CTRL", and display it only once,
+afte the conclusion of a 0.1 second sleep.
 .nf
-root@hsw: turbostat cat /dev/zero > /dev/null
-^C
-     CPU Avg_MHz   Busy% Bzy_MHz TSC_MHz
-       -     482   12.51    3854    3498
-       0       0    0.01    1960    3498
-       4       0    0.00    2128    3498
-       1       0    0.00    3003    3498
-       5    3854   99.98    3855    3498
-       2       0    0.01    3504    3498
-       6       3    0.08    3884    3498
-       3       0    0.00    2553    3498
-       7       0    0.00    2126    3498
-10.783983 sec
+sudo ./turbostat --quiet --cpu 0-3 --show CPU --add msr0x199,u32,raw,PRF_CTRL sleep .1
+0.101604 sec
+CPU	  PRF_CTRL
+-	0x00000000
+0	0x00000c00
+1	0x00000800
+2	0x00000a00
+3	0x00000800
 
 .fi
-Above the cycle soaker drives cpu5 up its 3.9 GHz turbo limit.
-The first row shows the average MHz and Busy% across all the processors in the system.
-
-Note that the Avg_MHz column reflects the total number of cycles executed
-divided by the measurement interval.  If the Busy% column is 100%,
-then the processor was running at that speed the entire interval.
-The Avg_MHz multiplied by the Busy% results in the Bzy_MHz --
-which is the average frequency while the processor was executing --
-not including any non-busy idle time.
 
 .SH NOTES
 

From 4e4e1e7c6eaf387f8ec803f37f154fdd60e303c0 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Tue, 21 Feb 2017 22:33:42 -0500
Subject: [PATCH 0395/1911] tools/power turbostat: move --Package and
 --processor into the --cpu option

--Package is now "--cpu package",
which will display just the 1st CPU in each package

--processor is not "--cpu core"
which will display just the 1st CPU in each core

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.8 |  6 +-----
 tools/power/x86/turbostat/turbostat.c | 31 +++++++++++++++++----------
 2 files changed, 21 insertions(+), 16 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index b1b1ab80102c82..5189d9d982febb 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -52,7 +52,7 @@ name as necessary to disambiguate it from others is necessary.  Note that option
 		as the column header.
 .fi
 .PP
-\fB--cpu cpu-set\fP limit output to system summary plus the specified cpu-set.  cpu-set is a comma delimited list of cpu ranges. cpu ranges can be individual cpu numbers or start and end numbers, separated by ".." or '-'.  eg. 1,2,8,14..17,21-44
+\fB--cpu cpu-set\fP limit output to system summary plus the specified cpu-set.  If cpu-set is the string "core", then the system summary plus the first CPU in each core are printed -- eg. subsequent HT siblings are not printed.  Or if cpu-set is the string "package", then the system summary plus the first CPU in each package is printed.  Otherwise, the system summary plus the specified set of CPUs are printed.  The cpu-set is ordered from low to high, comma delimited with ".." and "-" permitted to denote a range. eg. 1,2,8,14..17,21-44
 .PP
 \fB--hide column\fP do not show the specified columns.  May be invoked multiple times, or with a comma-separated list of column names.  Use "--hide sysfs" to hide the sysfs statistics columns as a group.
 .PP
@@ -71,12 +71,8 @@ The file is truncated if it already exists, and it is created if it does not exi
 .PP
 \fB--Joules\fP displays energy in Joules, rather than dividing Joules by time to print power in Watts.
 .PP
-\fB--Package\fP limits output to the system summary plus the 1st thread in each Package.
-.PP
 \fB--list\fP display column header names available for use by --show and --hide, then exit.
 .PP
-\fB--processor\fP limits output to the system summary plus the 1st thread in each processor of each package.  Ie. it skips hyper-threaded siblings.
-.PP
 \fB--Summary\fP limits output to a 1-line System Summary for each interval.
 .PP
 \fB--TCC temperature\fP sets the Thermal Control Circuit temperature for systems which do not export that value.  This is used for making sense of the Digital Thermal Sensor outputs, as they return degrees Celsius below the TCC activation temperature.
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 851eaf06f3589b..c005d905267900 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -4153,7 +4153,8 @@ void help()
 	"to print statistics, until interrupted.\n"
 	"--add		add a counter\n"
 	"		eg. --add msr0x10,u64,cpu,delta,MY_TSC\n"
-	"--cpu	cpu-set	limit output to summary plus cpu-set cpu-set\n"
+	"--cpu	cpu-set	limit output to summary plus cpu-set:\n"
+	"		{core | package | j,k,l..m,n-p }\n"
 	"--quiet	skip decoding system configuration header\n"
 	"--interval sec	Override default 5-second measurement interval\n"
 	"--help		print this help message\n"
@@ -4756,6 +4757,21 @@ void parse_cpu_command(char *optarg)
 	unsigned int start, end;
 	char *next;
 
+	if (!strcmp(optarg, "core")) {
+		if (cpu_subset)
+			goto error;
+		show_core_only++;
+		return;
+	}
+	if (!strcmp(optarg, "package")) {
+		if (cpu_subset)
+			goto error;
+		show_pkg_only++;
+		return;
+	}
+	if (show_core_only || show_pkg_only)
+		goto error;
+
 	cpu_subset = CPU_ALLOC(CPU_SUBSET_MAXCPUS);
 	if (cpu_subset == NULL)
 		err(3, "CPU_ALLOC");
@@ -4813,7 +4829,8 @@ void parse_cpu_command(char *optarg)
 	return;
 
 error:
-	fprintf(stderr, "'--cpu %s' malformed\n", optarg);
+	fprintf(stderr, "\"--cpu %s\" malformed\n", optarg);
+	help();
 	exit(-1);
 }
 
@@ -4867,8 +4884,6 @@ void cmdline(int argc, char **argv)
 		{"Joules",	no_argument,		0, 'J'},
 		{"list",	no_argument,		0, 'l'},
 		{"out",		required_argument,	0, 'o'},
-		{"Package",	no_argument,		0, 'p'},
-		{"processor",	no_argument,		0, 'p'},
 		{"quiet",	no_argument,		0, 'q'},
 		{"show",	required_argument,	0, 's'},
 		{"Summary",	no_argument,		0, 'S'},
@@ -4879,7 +4894,7 @@ void cmdline(int argc, char **argv)
 
 	progname = argv[0];
 
-	while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:JM:m:o:PpqST:v",
+	while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:JM:m:o:qST:v",
 				long_options, &option_index)) != -1) {
 		switch (opt) {
 		case 'a':
@@ -4925,12 +4940,6 @@ void cmdline(int argc, char **argv)
 		case 'o':
 			outf = fopen_or_die(optarg, "w");
 			break;
-		case 'P':
-			show_pkg_only++;
-			break;
-		case 'p':
-			show_core_only++;
-			break;
 		case 'q':
 			quiet = 1;
 			break;

From dd778a5e6bbd0f52f34c61ae9b42b725c5f22398 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Tue, 21 Feb 2017 23:21:13 -0500
Subject: [PATCH 0396/1911] tools/power turbostat: support "--hide C1" etc.

Originally, the only way to hide the sysfs C-state statistics columns
was with "--hide sysfs".  This was because we process "--hide" before
we probe for those columns.

hack --hide to remember deferred hide requests, and apply
them when sysfs is probed.

"--hide sysfs" is still available as short-hand to refer to
the entire group of counters.

The down-side of this change is that we no longer error check for
bogus --hide column names.  But the user will quickly figure that
out if a column they mean to hide is still there...

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.8 |   2 +-
 tools/power/x86/turbostat/turbostat.c | 115 ++++++++++++++++----------
 2 files changed, 73 insertions(+), 44 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index 5189d9d982febb..fedca328532621 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -114,7 +114,7 @@ This is ideal for remote debugging, use the "--out" option to save everything to
 .PP
 When you are not interested in all that information, and there are several ways to see only what you want.  First the "--quiet" option will skip the configuration information, and turbostat will show only the counter columns.  Second, you can reduce the columns with the "--hide" and "--show" options.  If you use the "--show" option, then turbostat will show only the columns you list.  If you use the "--hide" option, turbostat will show all columns, except the ones you list.
 .PP
-To find out what columns are available for --show and --hide, the "--list" option is available.  Note, however, there is an exception.  The C-state columns collected from sysfs "C1,C2,C3,C1%,C2%,C3%" are not built-in counters, but are discovered after --show and --hide are processed.  You can use the special counter name "sysfs" to refer to all of them at the same time.
+To find out what columns are available for --show and --hide, the "--list" option is available.  For convenience, the special strings "sysfs" can be used to refer to all of the sysfs C-state counters at once:
 .nf
 sudo ./turbostat --show sysfs --quiet sleep 10
 10.003837 sec
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index c005d905267900..596259f48f5023 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -434,12 +434,45 @@ unsigned long long bic_present = BIC_sysfs;
 #define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT)
 #define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT)
 
+#define MAX_DEFERRED 16
+char *deferred_skip_names[MAX_DEFERRED];
+int deferred_skip_index;
+
+/*
+ * HIDE_LIST - hide this list of counters, show the rest [default]
+ * SHOW_LIST - show this list of counters, hide the rest
+ */
+enum show_hide_mode { SHOW_LIST, HIDE_LIST } global_show_hide_mode = HIDE_LIST;
+
+void help(void)
+{
+	fprintf(outf,
+	"Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n"
+	"\n"
+	"Turbostat forks the specified COMMAND and prints statistics\n"
+	"when COMMAND completes.\n"
+	"If no COMMAND is specified, turbostat wakes every 5-seconds\n"
+	"to print statistics, until interrupted.\n"
+	"--add		add a counter\n"
+	"		eg. --add msr0x10,u64,cpu,delta,MY_TSC\n"
+	"--cpu	cpu-set	limit output to summary plus cpu-set:\n"
+	"		{core | package | j,k,l..m,n-p }\n"
+	"--quiet	skip decoding system configuration header\n"
+	"--interval sec	Override default 5-second measurement interval\n"
+	"--help		print this help message\n"
+	"--list		list column headers only\n"
+	"--out file	create or truncate \"file\" for all output\n"
+	"--version	print version information\n"
+	"\n"
+	"For more help, run \"man turbostat\"\n");
+}
+
 /*
  * bic_lookup
  * for all the strings in comma separate name_list,
  * set the approprate bit in return value.
  */
-unsigned long long bic_lookup(char *name_list)
+unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode)
 {
 	int i;
 	unsigned long long retval = 0;
@@ -459,8 +492,19 @@ unsigned long long bic_lookup(char *name_list)
 			}
 		}
 		if (i == MAX_BIC) {
-			fprintf(stderr, "Invalid counter name: %s\n", name_list);
-			exit(-1);
+			if (mode == SHOW_LIST) {
+				fprintf(stderr, "Invalid counter name: %s\n", name_list);
+				exit(-1);
+			}
+			deferred_skip_names[deferred_skip_index++] = name_list;
+			if (debug)
+				fprintf(stderr, "deferred \"%s\"\n", name_list);
+			if (deferred_skip_index >= MAX_DEFERRED) {
+				fprintf(stderr, "More than max %d un-recognized --skip options '%s'\n",
+					MAX_DEFERRED, name_list);
+				help();
+				exit(1);
+			}
 		}
 
 		name_list = comma;
@@ -471,6 +515,7 @@ unsigned long long bic_lookup(char *name_list)
 	return retval;
 }
 
+
 void print_header(char *delim)
 {
 	struct msr_counter *mp;
@@ -502,20 +547,17 @@ void print_header(char *delim)
 		outp += sprintf(outp, "%sSMI", (printed++ ? delim : ""));
 
 	for (mp = sys.tp; mp; mp = mp->next) {
-		if (*delim == ',') {
-			outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), "sysfs");
-			break;
-		}
+
 		if (mp->format == FORMAT_RAW) {
 			if (mp->width == 64)
-				outp += sprintf(outp, "%s%18.18s", delim, mp->name);
+				outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), mp->name);
 			else
-				outp += sprintf(outp, "%s%10.10s", delim, mp->name);
+				outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), mp->name);
 		} else {
 			if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
-				outp += sprintf(outp, "%s%8s", delim, mp->name);
+				outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), mp->name);
 			else
-				outp += sprintf(outp, "%s%s", delim, mp->name);
+				outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), mp->name);
 		}
 	}
 
@@ -4142,29 +4184,6 @@ void process_cpuid()
 	return;
 }
 
-void help()
-{
-	fprintf(outf,
-	"Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n"
-	"\n"
-	"Turbostat forks the specified COMMAND and prints statistics\n"
-	"when COMMAND completes.\n"
-	"If no COMMAND is specified, turbostat wakes every 5-seconds\n"
-	"to print statistics, until interrupted.\n"
-	"--add		add a counter\n"
-	"		eg. --add msr0x10,u64,cpu,delta,MY_TSC\n"
-	"--cpu	cpu-set	limit output to summary plus cpu-set:\n"
-	"		{core | package | j,k,l..m,n-p }\n"
-	"--quiet	skip decoding system configuration header\n"
-	"--interval sec	Override default 5-second measurement interval\n"
-	"--help		print this help message\n"
-	"--list		list column headers only\n"
-	"--out file	create or truncate \"file\" for all output\n"
-	"--version	print version information\n"
-	"\n"
-	"For more help, run \"man turbostat\"\n");
-}
-
 
 /*
  * in /dev/cpu/ return success for names that are numbers
@@ -4689,6 +4708,16 @@ void parse_add_command(char *add_command)
 	}
 }
 
+int is_deferred_skip(char *name)
+{
+	int i;
+
+	for (i = 0; i < deferred_skip_index; ++i)
+		if (!strcmp(name, deferred_skip_names[i]))
+			return 1;
+	return 0;
+}
+
 void probe_sysfs(void)
 {
 	char path[64];
@@ -4720,6 +4749,9 @@ void probe_sysfs(void)
 
 		sprintf(path, "cpuidle/state%d/time", state);
 
+		if (is_deferred_skip(name_buf))
+			continue;
+
 		add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_USEC,
 				FORMAT_PERCENT, SYSFS_PERCPU);
 	}
@@ -4741,6 +4773,9 @@ void probe_sysfs(void)
 
 		sprintf(path, "cpuidle/state%d/usage", state);
 
+		if (is_deferred_skip(name_buf))
+			continue;
+
 		add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS,
 				FORMAT_DELTA, SYSFS_PERCPU);
 	}
@@ -4834,12 +4869,6 @@ void parse_cpu_command(char *optarg)
 	exit(-1);
 }
 
-/*
- * HIDE_LIST - hide this list of counters, show the rest [default]
- * SHOW_LIST - show this list of counters, hide the rest
- */
-enum show_hide_mode { SHOW_LIST, HIDE_LIST } global_show_hide_mode = HIDE_LIST;
-
 int shown;
 /*
  * parse_show_hide() - process cmdline to set default counter action
@@ -4853,9 +4882,9 @@ void parse_show_hide(char *optarg, enum show_hide_mode new_mode)
 	 */
 	if (new_mode == SHOW_LIST) {
 		if (shown == 0)
-			bic_enabled = bic_lookup(optarg);
+			bic_enabled = bic_lookup(optarg, new_mode);
 		else
-			bic_enabled |= bic_lookup(optarg);
+			bic_enabled |= bic_lookup(optarg, new_mode);
 		shown = 1;
 
 		return;
@@ -4865,7 +4894,7 @@ void parse_show_hide(char *optarg, enum show_hide_mode new_mode)
 	 * --hide: do not show those specified
 	 *  multiple invocations simply clear more bits in enabled mask
 	 */
-	bic_enabled &= ~bic_lookup(optarg);
+	bic_enabled &= ~bic_lookup(optarg, new_mode);
 
 }
 

From 7da6e3e2125d24040b3648ddc61edf70eb533849 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Tue, 21 Feb 2017 23:43:41 -0500
Subject: [PATCH 0397/1911] tools/power turbostat: show package number, even
 without --debug

On multi-package systems, the "Package" column was being displayed
only if --debug was used.  Show it always.

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 596259f48f5023..2c674ad5ab8039 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -4294,7 +4294,7 @@ void topology_probe()
 	if (debug > 1)
 		fprintf(outf, "max_package_id %d, sizing for %d packages\n",
 			max_package_id, topo.num_packages);
-	if (debug && !summary_only && topo.num_packages > 1)
+	if (!summary_only && topo.num_packages > 1)
 		BIC_PRESENT(BIC_Package);
 
 	topo.num_threads_per_core = max_siblings;

From 7293fccdffdec0ab0c36c4e4cffacb3ff114928e Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Wed, 22 Feb 2017 00:11:12 -0500
Subject: [PATCH 0398/1911] tools/power turbostat: dump p-state software config

cpu1: cpufreq driver: acpi-cpufreq
cpu1: cpufreq governor: ondemand
cpufreq boost: 1

or

cpu0: cpufreq driver: intel_pstate
cpu0: cpufreq governor: powersave
cpufreq intel_pstate no_turbo: 0

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 50 +++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 2c674ad5ab8039..7af5f42a9792f4 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -2979,6 +2979,54 @@ dump_sysfs_cstate_config(void)
 		fclose(input);
 	}
 }
+static void
+dump_sysfs_pstate_config(void)
+{
+	char path[64];
+	char driver_buf[64];
+	char governor_buf[64];
+	FILE *input;
+	int turbo;
+
+	sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_driver",
+			base_cpu);
+	input = fopen(path, "r");
+	if (input == NULL) {
+		fprintf(stderr, "NSFOD %s\n", path);
+		return;
+	}
+	fgets(driver_buf, sizeof(driver_buf), input);
+	fclose(input);
+
+	sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor",
+			base_cpu);
+	input = fopen(path, "r");
+	if (input == NULL) {
+		fprintf(stderr, "NSFOD %s\n", path);
+		return;
+	}
+	fgets(governor_buf, sizeof(governor_buf), input);
+	fclose(input);
+
+	fprintf(outf, "cpu%d: cpufreq driver: %s", base_cpu, driver_buf);
+	fprintf(outf, "cpu%d: cpufreq governor: %s", base_cpu, governor_buf);
+
+	sprintf(path, "/sys/devices/system/cpu/cpufreq/boost");
+	input = fopen(path, "r");
+	if (input != NULL) {
+		fscanf(input, "%d", &turbo);
+		fprintf(outf, "cpufreq boost: %d\n", turbo);
+		fclose(input);
+	}
+
+	sprintf(path, "/sys/devices/system/cpu/intel_pstate/no_turbo");
+	input = fopen(path, "r");
+	if (input != NULL) {
+		fscanf(input, "%d", &turbo);
+		fprintf(outf, "cpufreq intel_pstate no_turbo: %d\n", turbo);
+		fclose(input);
+	}
+}
 
 
 /*
@@ -4168,6 +4216,8 @@ void process_cpuid()
 
 	if (!quiet)
 		dump_sysfs_cstate_config();
+	if (!quiet)
+		dump_sysfs_pstate_config();
 
 	if (has_skl_msrs(family, model))
 		calculate_tsc_tweak();

From 0815a3d09baf2cd330f75020bdaad0f1adac0ecb Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Thu, 23 Feb 2017 17:00:51 -0500
Subject: [PATCH 0399/1911] tools/power turbostat: show error on exec

When turbostat is run in one-shot command mode,
the parent takes the 'before' counter snapshot,
fork/exec/wait for the child to exit,
takes the 'after' counter snapshot,
and prints the results.

however, if the child fails to exec the command,
it immediately returns, without indicating that
anythign was wrong.

Add an error message showing that exec failed:

sudo turbostat sleeeep 4
...
turbostat: exec sleeeep: No such file or directory
...

Note that the parent will still print out the statistics,
because it can't tell the difference between the failed
exec and a command that is purposefully returning
the same status.  Unfortunately, this may obscure the
error message.  However, if the --out parameter is used,
the error message is evident on stderr.

Reported-by: Wendy Wang <wendy.wang@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 7af5f42a9792f4..2d758abecd56b6 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -4525,6 +4525,7 @@ int fork_it(char **argv)
 	if (!child_pid) {
 		/* child */
 		execvp(argv[0], argv);
+		err(errno, "exec %s", argv[0]);
 	} else {
 
 		/* parent */

From 5f3aea57773dc7f788e374994636ffc0234a355f Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Thu, 23 Feb 2017 18:10:27 -0500
Subject: [PATCH 0400/1911] tools/power turbostat: bugfix: --add u32 was
 printed as u64

When the "u32" keyword is used with --add, it means that
the output should be truncated to 32-bits.  This was not
happening and all 64-bits were printed.

Also, when no column name was used for an added MSR,
The default column name was in deximal, eg. MSR16.
Users report that they tend to use hex MSR numbers,
so print them in hex.  To always fit into the columns,
use the syntax M0x10.  Note that the user can always
supply any column header that they want.

eg --add msr0x10,MY_TSC

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 26 +++++++-------------------
 1 file changed, 7 insertions(+), 19 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 2d758abecd56b6..5216549957f4b6 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -834,7 +834,7 @@ int format_counters(struct thread_data *t, struct core_data *c,
 	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
 		if (mp->format == FORMAT_RAW) {
 			if (mp->width == 32)
-				outp += sprintf(outp, "%s0x%08lx", (printed++ ? delim : ""), (unsigned long) t->counter[i]);
+				outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int) t->counter[i]);
 			else
 				outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->counter[i]);
 		} else if (mp->format == FORMAT_DELTA) {
@@ -876,7 +876,7 @@ int format_counters(struct thread_data *t, struct core_data *c,
 	for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
 		if (mp->format == FORMAT_RAW) {
 			if (mp->width == 32)
-				outp += sprintf(outp, "%s0x%08lx", (printed++ ? delim : ""), (unsigned long) c->counter[i]);
+				outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int) c->counter[i]);
 			else
 				outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->counter[i]);
 		} else if (mp->format == FORMAT_DELTA) {
@@ -967,7 +967,7 @@ int format_counters(struct thread_data *t, struct core_data *c,
 	for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
 		if (mp->format == FORMAT_RAW) {
 			if (mp->width == 32)
-				outp += sprintf(outp, "%s0x%08lx", (printed++ ? delim : ""), (unsigned long) p->counter[i]);
+				outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int) p->counter[i]);
 			else
 				outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->counter[i]);
 		} else if (mp->format == FORMAT_DELTA) {
@@ -4732,22 +4732,10 @@ void parse_add_command(char *add_command)
 
 	/* generate default column header */
 	if (*name_buffer == '\0') {
-		if (format == FORMAT_RAW) {
-			if (width == 32)
-				sprintf(name_buffer, "msr%d", msr_num);
-			else
-				sprintf(name_buffer, "MSR%d", msr_num);
-		} else if (format == FORMAT_DELTA) {
-			if (width == 32)
-				sprintf(name_buffer, "cnt%d", msr_num);
-			else
-				sprintf(name_buffer, "CNT%d", msr_num);
-		} else if (format == FORMAT_PERCENT) {
-			if (width == 32)
-				sprintf(name_buffer, "msr%d%%", msr_num);
-			else
-				sprintf(name_buffer, "MSR%d%%", msr_num);
-		}
+		if (width == 32)
+			sprintf(name_buffer, "M0x%x%s", msr_num, format == FORMAT_PERCENT ? "%" : "");
+		else
+			sprintf(name_buffer, "M0X%x%s", msr_num, format == FORMAT_PERCENT ? "%" : "");
 	}
 
 	if (add_counter(msr_num, path, name_buffer, width, scope, type, format, 0))

From e3942ed8c66bcff496abee5182422cd542962d9e Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Sat, 21 Jan 2017 02:33:23 -0500
Subject: [PATCH 0401/1911] tools/power turbostat: version 17.02.24

The turbostat before this last set of changes is obsolete.
This new version can do a lot more, but it also has
some different defaults, that might catch some off-guard.
So it seems a good time to give a new version number.

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 5216549957f4b6..828dccd3f01eaf 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -4578,7 +4578,7 @@ int get_and_dump_counters(void)
 }
 
 void print_version() {
-	fprintf(outf, "turbostat version 4.17 10 Jan 2017"
+	fprintf(outf, "turbostat version 17.02.24"
 		" - Len Brown <lenb@kernel.org>\n");
 }
 

From b6b6fbc8310e1b12b05717da9df6953b138f812b Mon Sep 17 00:00:00 2001
From: Chuanxiao Dong <chuanxiao.dong@intel.com>
Date: Wed, 1 Mar 2017 14:34:52 +0800
Subject: [PATCH 0402/1911] drm/i915/gvt: use pfn_valid for better checking

Before get the page from pfn, use pfn_valid to check if pfn
is able to translate to page structure.

Signed-off-by: Chuanxiao Dong <chuanxiao.dong@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/kvmgt.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index 182914c22ac553..24135489060321 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -96,10 +96,10 @@ static int gvt_dma_map_iova(struct intel_vgpu *vgpu, kvm_pfn_t pfn,
 	struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev;
 	dma_addr_t daddr;
 
-	page = pfn_to_page(pfn);
-	if (is_error_page(page))
+	if (unlikely(!pfn_valid(pfn)))
 		return -EFAULT;
 
+	page = pfn_to_page(pfn);
 	daddr = dma_map_page(dev, page, 0, PAGE_SIZE,
 			PCI_DMA_BIDIRECTIONAL);
 	if (dma_mapping_error(dev, daddr))

From df06a2d57711a1472ced72207373eeb6422d4721 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Wed, 1 Mar 2017 00:03:37 -0800
Subject: [PATCH 0403/1911] tools/testing/nvdimm: make iset cookie predictable

For testing changes to the iset cookie algorithm we need a value that is
constant from run-to-run.

Stop including dynamic data in the emulated region_offset values. Also,
pick values that sort in a different order depending on whether the
comparison is a memcmp() of two 8-byte arrays or subtraction of two
64-bit values.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 tools/testing/nvdimm/test/nfit.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index 45be8b55a66345..798f176554338b 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -887,7 +887,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	memdev->range_index = 0+1;
 	memdev->region_index = 4+1;
 	memdev->region_size = SPA0_SIZE/2;
-	memdev->region_offset = t->spa_set_dma[0];
+	memdev->region_offset = 1;
 	memdev->address = 0;
 	memdev->interleave_index = 0;
 	memdev->interleave_ways = 2;
@@ -902,7 +902,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	memdev->range_index = 0+1;
 	memdev->region_index = 5+1;
 	memdev->region_size = SPA0_SIZE/2;
-	memdev->region_offset = t->spa_set_dma[0] + SPA0_SIZE/2;
+	memdev->region_offset = (1 << 8);
 	memdev->address = 0;
 	memdev->interleave_index = 0;
 	memdev->interleave_ways = 2;
@@ -917,7 +917,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	memdev->range_index = 1+1;
 	memdev->region_index = 4+1;
 	memdev->region_size = SPA1_SIZE/4;
-	memdev->region_offset = t->spa_set_dma[1];
+	memdev->region_offset = (1 << 16);
 	memdev->address = SPA0_SIZE/2;
 	memdev->interleave_index = 0;
 	memdev->interleave_ways = 4;
@@ -932,7 +932,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	memdev->range_index = 1+1;
 	memdev->region_index = 5+1;
 	memdev->region_size = SPA1_SIZE/4;
-	memdev->region_offset = t->spa_set_dma[1] + SPA1_SIZE/4;
+	memdev->region_offset = (1 << 24);
 	memdev->address = SPA0_SIZE/2;
 	memdev->interleave_index = 0;
 	memdev->interleave_ways = 4;
@@ -947,7 +947,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	memdev->range_index = 1+1;
 	memdev->region_index = 6+1;
 	memdev->region_size = SPA1_SIZE/4;
-	memdev->region_offset = t->spa_set_dma[1] + 2*SPA1_SIZE/4;
+	memdev->region_offset = (1ULL << 32);
 	memdev->address = SPA0_SIZE/2;
 	memdev->interleave_index = 0;
 	memdev->interleave_ways = 4;
@@ -962,7 +962,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	memdev->range_index = 1+1;
 	memdev->region_index = 7+1;
 	memdev->region_size = SPA1_SIZE/4;
-	memdev->region_offset = t->spa_set_dma[1] + 3*SPA1_SIZE/4;
+	memdev->region_offset = (1ULL << 40);
 	memdev->address = SPA0_SIZE/2;
 	memdev->interleave_index = 0;
 	memdev->interleave_ways = 4;
@@ -1380,7 +1380,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 		memdev->range_index = 11+1;
 		memdev->region_index = 9+1;
 		memdev->region_size = SPA0_SIZE;
-		memdev->region_offset = t->spa_set_dma[2];
+		memdev->region_offset = (1ULL << 48);
 		memdev->address = 0;
 		memdev->interleave_index = 0;
 		memdev->interleave_ways = 1;

From 87bfbddd0935f7a3acf36b883707aee816d4bb49 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Noralf=20Tr=C3=B8nnes?= <noralf@tronnes.org>
Date: Sun, 26 Feb 2017 15:44:42 +0100
Subject: [PATCH 0404/1911] =?UTF-8?q?MAINTAINERS:=20Remove=20Noralf=20Tr?=
 =?UTF-8?q?=C3=B8nnes=20as=20fbtft=20maintainer?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Due to personal reasons I'm unable to continue as fbtft maintainer.

Signed-off-by: Noralf Trønnes <noralf@tronnes.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 MAINTAINERS | 1 -
 1 file changed, 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index ad4f2ce991ca26..30ff892e1e2f2c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4979,7 +4979,6 @@ F:	lib/fault-inject.c
 
 FBTFT Framebuffer drivers
 M:	Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
-M:	Noralf Trønnes <noralf@tronnes.org>
 S:	Maintained
 F:	drivers/staging/fbtft/
 

From a45e47f4b342884dcf9e40a530033f64c379ffc7 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 27 Feb 2017 21:42:16 +0100
Subject: [PATCH 0405/1911] staging: fsl-mc: fix warning in DT ranges parser

The fsl-mc-bus driver in staging contains a copy of the standard
'ranges' property parsing algorithm with a hack to treat a missing
property the same way as an empty one. This code produces false-positive
warnings for me in an allmodconfig build:

drivers/staging/fsl-mc/bus/fsl-mc-bus.c: In function 'fsl_mc_bus_probe':
drivers/staging/fsl-mc/bus/fsl-mc-bus.c:645:6: error: 'mc_size_cells' may be used uninitialized in this function [-Werror=maybe-uninitialized]
drivers/staging/fsl-mc/bus/fsl-mc-bus.c:682:8: error: 'mc_addr_cells' may be used uninitialized in this function [-Werror=maybe-uninitialized]
drivers/staging/fsl-mc/bus/fsl-mc-bus.c:644:6: note: 'mc_addr_cells' was declared here
drivers/staging/fsl-mc/bus/fsl-mc-bus.c:684:8: error: 'paddr_cells' may be used uninitialized in this function [-Werror=maybe-uninitialized]
drivers/staging/fsl-mc/bus/fsl-mc-bus.c:643:6: note: 'paddr_cells' was declared here

To avoid the warnings, I'm simplifying the argument handling to pass
the number of valid ranges in the property as the function return code
rather than passing it by reference. With this change, gcc can see that
we don't evaluate the cell numbers for an missing ranges property.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Laurentiu Tudor <laurentiu.tudor@nxp.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/fsl-mc/bus/fsl-mc-bus.c | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

diff --git a/drivers/staging/fsl-mc/bus/fsl-mc-bus.c b/drivers/staging/fsl-mc/bus/fsl-mc-bus.c
index 47acb0a298422a..3be5f25ff11306 100644
--- a/drivers/staging/fsl-mc/bus/fsl-mc-bus.c
+++ b/drivers/staging/fsl-mc/bus/fsl-mc-bus.c
@@ -588,8 +588,7 @@ static int parse_mc_ranges(struct device *dev,
 			   int *paddr_cells,
 			   int *mc_addr_cells,
 			   int *mc_size_cells,
-			   const __be32 **ranges_start,
-			   u8 *num_ranges)
+			   const __be32 **ranges_start)
 {
 	const __be32 *prop;
 	int range_tuple_cell_count;
@@ -602,8 +601,6 @@ static int parse_mc_ranges(struct device *dev,
 		dev_warn(dev,
 			 "missing or empty ranges property for device tree node '%s'\n",
 			 mc_node->name);
-
-		*num_ranges = 0;
 		return 0;
 	}
 
@@ -630,8 +627,7 @@ static int parse_mc_ranges(struct device *dev,
 		return -EINVAL;
 	}
 
-	*num_ranges = ranges_len / tuple_len;
-	return 0;
+	return ranges_len / tuple_len;
 }
 
 static int get_mc_addr_translation_ranges(struct device *dev,
@@ -639,7 +635,7 @@ static int get_mc_addr_translation_ranges(struct device *dev,
 						**ranges,
 					  u8 *num_ranges)
 {
-	int error;
+	int ret;
 	int paddr_cells;
 	int mc_addr_cells;
 	int mc_size_cells;
@@ -647,16 +643,16 @@ static int get_mc_addr_translation_ranges(struct device *dev,
 	const __be32 *ranges_start;
 	const __be32 *cell;
 
-	error = parse_mc_ranges(dev,
+	ret = parse_mc_ranges(dev,
 				&paddr_cells,
 				&mc_addr_cells,
 				&mc_size_cells,
-				&ranges_start,
-				num_ranges);
-	if (error < 0)
-		return error;
+				&ranges_start);
+	if (ret < 0)
+		return ret;
 
-	if (!(*num_ranges)) {
+	*num_ranges = ret;
+	if (!ret) {
 		/*
 		 * Missing or empty ranges property ("ranges;") for the
 		 * 'fsl,qoriq-mc' node. In this case, identity mapping

From 9dcfe2c75b51f454f39c2de4756e841228865b47 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 1 Mar 2017 09:25:55 +0100
Subject: [PATCH 0406/1911] locking/refcounts: Change WARN() to WARN_ONCE()

Linus noticed that the new refcount.h APIs used WARN(), which would turn
into a dmesg DoS if it triggers frequently on some buggy driver.

So make sure we only warn once. These warnings are never supposed to happen,
so it's typically not a problem to lose subsequent warnings.

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Elena Reshetova <elena.reshetova@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/CA+55aFzbYUTZ=oqZ2YgDjY0C2_n6ODhTfqj6V+m5xWmDxsuB0w@mail.gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 lib/refcount.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/lib/refcount.c b/lib/refcount.c
index 1d33366189d10c..aa09ad3c30b0dc 100644
--- a/lib/refcount.c
+++ b/lib/refcount.c
@@ -58,7 +58,7 @@ bool refcount_add_not_zero(unsigned int i, refcount_t *r)
 		val = old;
 	}
 
-	WARN(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n");
+	WARN_ONCE(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n");
 
 	return true;
 }
@@ -66,7 +66,7 @@ EXPORT_SYMBOL_GPL(refcount_add_not_zero);
 
 void refcount_add(unsigned int i, refcount_t *r)
 {
-	WARN(!refcount_add_not_zero(i, r), "refcount_t: addition on 0; use-after-free.\n");
+	WARN_ONCE(!refcount_add_not_zero(i, r), "refcount_t: addition on 0; use-after-free.\n");
 }
 EXPORT_SYMBOL_GPL(refcount_add);
 
@@ -97,7 +97,7 @@ bool refcount_inc_not_zero(refcount_t *r)
 		val = old;
 	}
 
-	WARN(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n");
+	WARN_ONCE(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n");
 
 	return true;
 }
@@ -111,7 +111,7 @@ EXPORT_SYMBOL_GPL(refcount_inc_not_zero);
  */
 void refcount_inc(refcount_t *r)
 {
-	WARN(!refcount_inc_not_zero(r), "refcount_t: increment on 0; use-after-free.\n");
+	WARN_ONCE(!refcount_inc_not_zero(r), "refcount_t: increment on 0; use-after-free.\n");
 }
 EXPORT_SYMBOL_GPL(refcount_inc);
 
@@ -125,7 +125,7 @@ bool refcount_sub_and_test(unsigned int i, refcount_t *r)
 
 		new = val - i;
 		if (new > val) {
-			WARN(new > val, "refcount_t: underflow; use-after-free.\n");
+			WARN_ONCE(new > val, "refcount_t: underflow; use-after-free.\n");
 			return false;
 		}
 
@@ -164,7 +164,7 @@ EXPORT_SYMBOL_GPL(refcount_dec_and_test);
 
 void refcount_dec(refcount_t *r)
 {
-	WARN(refcount_dec_and_test(r), "refcount_t: decrement hit 0; leaking memory.\n");
+	WARN_ONCE(refcount_dec_and_test(r), "refcount_t: decrement hit 0; leaking memory.\n");
 }
 EXPORT_SYMBOL_GPL(refcount_dec);
 
@@ -204,7 +204,7 @@ bool refcount_dec_not_one(refcount_t *r)
 
 		new = val - 1;
 		if (new > val) {
-			WARN(new > val, "refcount_t: underflow; use-after-free.\n");
+			WARN_ONCE(new > val, "refcount_t: underflow; use-after-free.\n");
 			return true;
 		}
 

From 86ef58a4e35e8fa66afb5898cf6dec6a3bb29f67 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 28 Feb 2017 18:32:48 -0800
Subject: [PATCH 0407/1911] nfit, libnvdimm: fix interleave set cookie
 calculation

The interleave-set cookie is a sum that sanity checks the composition of
an interleave set has not changed from when the namespace was initially
created.  The checksum is calculated by sorting the DIMMs by their
location in the interleave-set. The comparison for the sort must be
64-bit wide, not byte-by-byte as performed by memcmp() in the broken
case.

Fix the implementation to accept correct cookie values in addition to
the Linux "memcmp" order cookies, but only allow correct cookies to be
generated going forward. It does mean that namespaces created by
third-party-tooling, or created by newer kernels with this fix, will not
validate on older kernels. However, there are a couple mitigating
conditions:

    1/ platforms with namespace-label capable NVDIMMs are not widely
       available.

    2/ interleave-sets with a single-dimm are by definition not affected
       (nothing to sort). This covers the QEMU-KVM NVDIMM emulation case.

The cookie stored in the namespace label will be fixed by any write the
namespace label, the most straightforward way to achieve this is to
write to the "alt_name" attribute of a namespace in sysfs.

Cc: <stable@vger.kernel.org>
Fixes: eaf961536e16 ("libnvdimm, nfit: add interleave-set state-tracking infrastructure")
Reported-by: Nicholas Moulin <nicholas.w.moulin@linux.intel.com>
Tested-by: Nicholas Moulin <nicholas.w.moulin@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/acpi/nfit/core.c        | 16 +++++++++++++++-
 drivers/nvdimm/namespace_devs.c | 18 ++++++++++++++----
 drivers/nvdimm/nd.h             |  1 +
 drivers/nvdimm/region_devs.c    |  9 +++++++++
 include/linux/libnvdimm.h       |  2 ++
 5 files changed, 41 insertions(+), 5 deletions(-)

diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index 7361d00818e2bb..662036bdc65eca 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -1603,7 +1603,7 @@ static size_t sizeof_nfit_set_info(int num_mappings)
 		+ num_mappings * sizeof(struct nfit_set_info_map);
 }
 
-static int cmp_map(const void *m0, const void *m1)
+static int cmp_map_compat(const void *m0, const void *m1)
 {
 	const struct nfit_set_info_map *map0 = m0;
 	const struct nfit_set_info_map *map1 = m1;
@@ -1612,6 +1612,14 @@ static int cmp_map(const void *m0, const void *m1)
 			sizeof(u64));
 }
 
+static int cmp_map(const void *m0, const void *m1)
+{
+	const struct nfit_set_info_map *map0 = m0;
+	const struct nfit_set_info_map *map1 = m1;
+
+	return map0->region_offset - map1->region_offset;
+}
+
 /* Retrieve the nth entry referencing this spa */
 static struct acpi_nfit_memory_map *memdev_from_spa(
 		struct acpi_nfit_desc *acpi_desc, u16 range_index, int n)
@@ -1667,6 +1675,12 @@ static int acpi_nfit_init_interleave_set(struct acpi_nfit_desc *acpi_desc,
 	sort(&info->mapping[0], nr, sizeof(struct nfit_set_info_map),
 			cmp_map, NULL);
 	nd_set->cookie = nd_fletcher64(info, sizeof_nfit_set_info(nr), 0);
+
+	/* support namespaces created with the wrong sort order */
+	sort(&info->mapping[0], nr, sizeof(struct nfit_set_info_map),
+			cmp_map_compat, NULL);
+	nd_set->altcookie = nd_fletcher64(info, sizeof_nfit_set_info(nr), 0);
+
 	ndr_desc->nd_set = nd_set;
 	devm_kfree(dev, info);
 
diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
index ce3e8dfa10ad5c..1b481a5fb9667d 100644
--- a/drivers/nvdimm/namespace_devs.c
+++ b/drivers/nvdimm/namespace_devs.c
@@ -1700,6 +1700,7 @@ static int select_pmem_id(struct nd_region *nd_region, u8 *pmem_id)
 struct device *create_namespace_pmem(struct nd_region *nd_region,
 		struct nd_namespace_label *nd_label)
 {
+	u64 altcookie = nd_region_interleave_set_altcookie(nd_region);
 	u64 cookie = nd_region_interleave_set_cookie(nd_region);
 	struct nd_label_ent *label_ent;
 	struct nd_namespace_pmem *nspm;
@@ -1718,7 +1719,11 @@ struct device *create_namespace_pmem(struct nd_region *nd_region,
 	if (__le64_to_cpu(nd_label->isetcookie) != cookie) {
 		dev_dbg(&nd_region->dev, "invalid cookie in label: %pUb\n",
 				nd_label->uuid);
-		return ERR_PTR(-EAGAIN);
+		if (__le64_to_cpu(nd_label->isetcookie) != altcookie)
+			return ERR_PTR(-EAGAIN);
+
+		dev_dbg(&nd_region->dev, "valid altcookie in label: %pUb\n",
+				nd_label->uuid);
 	}
 
 	nspm = kzalloc(sizeof(*nspm), GFP_KERNEL);
@@ -1733,9 +1738,14 @@ struct device *create_namespace_pmem(struct nd_region *nd_region,
 	res->name = dev_name(&nd_region->dev);
 	res->flags = IORESOURCE_MEM;
 
-	for (i = 0; i < nd_region->ndr_mappings; i++)
-		if (!has_uuid_at_pos(nd_region, nd_label->uuid, cookie, i))
-			break;
+	for (i = 0; i < nd_region->ndr_mappings; i++) {
+		if (has_uuid_at_pos(nd_region, nd_label->uuid, cookie, i))
+			continue;
+		if (has_uuid_at_pos(nd_region, nd_label->uuid, altcookie, i))
+			continue;
+		break;
+	}
+
 	if (i < nd_region->ndr_mappings) {
 		struct nvdimm_drvdata *ndd = to_ndd(&nd_region->mapping[i]);
 
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index 35dd75057e1697..2a99c83aa19f08 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -328,6 +328,7 @@ struct nd_region *to_nd_region(struct device *dev);
 int nd_region_to_nstype(struct nd_region *nd_region);
 int nd_region_register_namespaces(struct nd_region *nd_region, int *err);
 u64 nd_region_interleave_set_cookie(struct nd_region *nd_region);
+u64 nd_region_interleave_set_altcookie(struct nd_region *nd_region);
 void nvdimm_bus_lock(struct device *dev);
 void nvdimm_bus_unlock(struct device *dev);
 bool is_nvdimm_bus_locked(struct device *dev);
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index 7cd705f3247c34..b7cb5066d9613e 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -505,6 +505,15 @@ u64 nd_region_interleave_set_cookie(struct nd_region *nd_region)
 	return 0;
 }
 
+u64 nd_region_interleave_set_altcookie(struct nd_region *nd_region)
+{
+	struct nd_interleave_set *nd_set = nd_region->nd_set;
+
+	if (nd_set)
+		return nd_set->altcookie;
+	return 0;
+}
+
 void nd_mapping_free_labels(struct nd_mapping *nd_mapping)
 {
 	struct nd_label_ent *label_ent, *e;
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index 8458c5351e562e..77e7af32543f69 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -70,6 +70,8 @@ struct nd_cmd_desc {
 
 struct nd_interleave_set {
 	u64 cookie;
+	/* compatibility with initial buggy Linux implementation */
+	u64 altcookie;
 };
 
 struct nd_mapping_desc {

From 75013fb16f8484898eaa8d0b08fed942d790f029 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Wed, 1 Mar 2017 01:23:24 +0900
Subject: [PATCH 0408/1911] kprobes/x86: Fix kernel panic when certain
 exception-handling addresses are probed

Fix to the exception table entry check by using probed address
instead of the address of copied instruction.

This bug may cause unexpected kernel panic if user probe an address
where an exception can happen which should be fixup by __ex_table
(e.g. copy_from_user.)

Unless user puts a kprobe on such address, this doesn't
cause any problem.

This bug has been introduced years ago, by commit:

  464846888d9a ("x86/kprobes: Fix a bug which can modify kernel code permanently").

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 464846888d9a ("x86/kprobes: Fix a bug which can modify kernel code permanently")
Link: http://lkml.kernel.org/r/148829899399.28855.12581062400757221722.stgit@devbox
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/kprobes/common.h | 2 +-
 arch/x86/kernel/kprobes/core.c   | 6 +++---
 arch/x86/kernel/kprobes/opt.c    | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/kprobes/common.h b/arch/x86/kernel/kprobes/common.h
index c6ee63f927ab72..d688826e5736a1 100644
--- a/arch/x86/kernel/kprobes/common.h
+++ b/arch/x86/kernel/kprobes/common.h
@@ -67,7 +67,7 @@
 #endif
 
 /* Ensure if the instruction can be boostable */
-extern int can_boost(kprobe_opcode_t *instruction);
+extern int can_boost(kprobe_opcode_t *instruction, void *addr);
 /* Recover instruction if given address is probed */
 extern unsigned long recover_probed_instruction(kprobe_opcode_t *buf,
 					 unsigned long addr);
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 520b8dfe164026..88b3c942473ded 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -166,12 +166,12 @@ NOKPROBE_SYMBOL(skip_prefixes);
  * Returns non-zero if opcode is boostable.
  * RIP relative instructions are adjusted at copying time in 64 bits mode
  */
-int can_boost(kprobe_opcode_t *opcodes)
+int can_boost(kprobe_opcode_t *opcodes, void *addr)
 {
 	kprobe_opcode_t opcode;
 	kprobe_opcode_t *orig_opcodes = opcodes;
 
-	if (search_exception_tables((unsigned long)opcodes))
+	if (search_exception_tables((unsigned long)addr))
 		return 0;	/* Page fault may occur on this address. */
 
 retry:
@@ -416,7 +416,7 @@ static int arch_copy_kprobe(struct kprobe *p)
 	 * __copy_instruction can modify the displacement of the instruction,
 	 * but it doesn't affect boostable check.
 	 */
-	if (can_boost(p->ainsn.insn))
+	if (can_boost(p->ainsn.insn, p->addr))
 		p->ainsn.boostable = 0;
 	else
 		p->ainsn.boostable = -1;
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index 3d1bee9d6a728f..3e7c6e5a08ffde 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -178,7 +178,7 @@ static int copy_optimized_instructions(u8 *dest, u8 *src)
 
 	while (len < RELATIVEJUMP_SIZE) {
 		ret = __copy_instruction(dest + len, src + len);
-		if (!ret || !can_boost(dest + len))
+		if (!ret || !can_boost(dest + len, src + len))
 			return -EINVAL;
 		len += ret;
 	}

From 10bce8410607a18eb3adf5d2739db8c8593e110d Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Mon, 27 Feb 2017 23:50:58 +0100
Subject: [PATCH 0409/1911] x86/kdebugfs: Move boot params hierarchy under
 (debugfs)/x86/

... since this is all x86-specific data and it makes sense to have it
under x86/ logically instead in the toplevel debugfs dir.

Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20170227225058.27289-1-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/kdebugfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c
index bdb83e431d8976..38b64587b31be5 100644
--- a/arch/x86/kernel/kdebugfs.c
+++ b/arch/x86/kernel/kdebugfs.c
@@ -167,7 +167,7 @@ static int __init boot_params_kdebugfs_init(void)
 	struct dentry *dbp, *version, *data;
 	int error = -ENOMEM;
 
-	dbp = debugfs_create_dir("boot_params", NULL);
+	dbp = debugfs_create_dir("boot_params", arch_debugfs_dir);
 	if (!dbp)
 		return -ENOMEM;
 

From e7c95effcd3a7a0c9535c809141ca499fede2c31 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Tue, 28 Feb 2017 07:05:59 +0100
Subject: [PATCH 0410/1911] s390/crypt: fix missing unlock in ctr_paes_crypt on
 error path

The ctr mode of protected key aes uses the ctrblk page if the
ctrblk_lock could be acquired. If the protected key has to be
reestablished and this operation fails the unlock for the
ctrblk_lock is missing. Add it.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/crypto/paes_s390.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c
index d69ea495c4d748..716b17238599f6 100644
--- a/arch/s390/crypto/paes_s390.c
+++ b/arch/s390/crypto/paes_s390.c
@@ -474,8 +474,11 @@ static int ctr_paes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
 			ret = blkcipher_walk_done(desc, walk, nbytes - n);
 		}
 		if (k < n) {
-			if (__ctr_paes_set_key(ctx) != 0)
+			if (__ctr_paes_set_key(ctx) != 0) {
+				if (locked)
+					spin_unlock(&ctrblk_lock);
 				return blkcipher_walk_done(desc, walk, -EIO);
+			}
 		}
 	}
 	if (locked)

From d9fcf2a1cbd1ff65d0109b1b400938808007fcd5 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Tue, 28 Feb 2017 07:42:01 +0100
Subject: [PATCH 0411/1911] s390: fix in-kernel program checks

A program check inside the kernel takes a slightly different path in
entry.S compare to a normal user fault. A recent change moved the store
of the breaking event address into the path taken for in-kernel program
checks as well, but %r14 has not been setup to point to the correct
location. A wild store is the consequence.

Move the store of the breaking event address to the code path for
user space faults.

Fixes: 34525e1f7e8d ("s390: store breaking event address only for program checks")
Reported-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/entry.S | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index dff2152350a7eb..6a7d737d514c4c 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -490,7 +490,7 @@ ENTRY(pgm_check_handler)
 	jnz	.Lpgm_svcper		# -> single stepped svc
 1:	CHECK_STACK STACK_SIZE,__LC_SAVE_AREA_SYNC
 	aghi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
-	j	3f
+	j	4f
 2:	UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER
 	lg	%r15,__LC_KERNEL_STACK
 	lgr	%r14,%r12
@@ -499,8 +499,8 @@ ENTRY(pgm_check_handler)
 	tm	__LC_PGM_ILC+2,0x02	# check for transaction abort
 	jz	3f
 	mvc	__THREAD_trap_tdb(256,%r14),0(%r13)
-3:	la	%r11,STACK_FRAME_OVERHEAD(%r15)
-	stg	%r10,__THREAD_last_break(%r14)
+3:	stg	%r10,__THREAD_last_break(%r14)
+4:	la	%r11,STACK_FRAME_OVERHEAD(%r15)
 	stmg	%r0,%r7,__PT_R0(%r11)
 	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
 	stmg	%r8,%r9,__PT_PSW(%r11)
@@ -509,14 +509,14 @@ ENTRY(pgm_check_handler)
 	xc	__PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
 	stg	%r10,__PT_ARGS(%r11)
 	tm	__LC_PGM_ILC+3,0x80	# check for per exception
-	jz	4f
+	jz	5f
 	tmhh	%r8,0x0001		# kernel per event ?
 	jz	.Lpgm_kprobe
 	oi	__PT_FLAGS+7(%r11),_PIF_PER_TRAP
 	mvc	__THREAD_per_address(8,%r14),__LC_PER_ADDRESS
 	mvc	__THREAD_per_cause(2,%r14),__LC_PER_CODE
 	mvc	__THREAD_per_paid(1,%r14),__LC_PER_ACCESS_ID
-4:	REENABLE_IRQS
+5:	REENABLE_IRQS
 	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
 	larl	%r1,pgm_check_table
 	llgh	%r10,__PT_INT_CODE+2(%r11)

From e69ca822ce0ed3ba006ce384d7d205c81d92373f Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Wed, 1 Mar 2017 09:16:03 +0100
Subject: [PATCH 0412/1911] s390/cputime: remove last traces of cputime_t

The cputime_t type is a thing of the past, replace the last occurences
of the type in the s390 code with a simple u64.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/cputime.h | 14 ++------------
 arch/s390/kernel/vtime.c        |  2 +-
 2 files changed, 3 insertions(+), 13 deletions(-)

diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h
index d1c407ddf7032d..e5672d6276a60d 100644
--- a/arch/s390/include/asm/cputime.h
+++ b/arch/s390/include/asm/cputime.h
@@ -8,32 +8,22 @@
 #define _S390_CPUTIME_H
 
 #include <linux/types.h>
-#include <asm/div64.h>
 
 #define CPUTIME_PER_USEC 4096ULL
 #define CPUTIME_PER_SEC (CPUTIME_PER_USEC * USEC_PER_SEC)
 
 /* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */
 
-typedef unsigned long long __nocast cputime_t;
-typedef unsigned long long __nocast cputime64_t;
-
 #define cmpxchg_cputime(ptr, old, new) cmpxchg64(ptr, old, new)
 
-static inline unsigned long __div(unsigned long long n, unsigned long base)
-{
-	return n / base;
-}
-
 /*
  * Convert cputime to microseconds and back.
  */
-static inline unsigned int cputime_to_usecs(const cputime_t cputime)
+static inline u64 cputime_to_usecs(const u64 cputime)
 {
-	return (__force unsigned long long) cputime >> 12;
+	return cputime >> 12;
 }
 
-
 u64 arch_cpu_idle_time(int cpu);
 
 #define arch_idle_time(cpu) arch_cpu_idle_time(cpu)
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 31bd96e8116775..8f5f59a151b41d 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -111,7 +111,7 @@ static inline u64 scale_vtime(u64 vtime)
 }
 
 static void account_system_index_scaled(struct task_struct *p,
-					cputime_t cputime, cputime_t scaled,
+					u64 cputime, u64 scaled,
 					enum cpu_usage_stat index)
 {
 	p->stimescaled += cputime_to_nsecs(scaled);

From 3c915bdc1775acfa214195da1ffb39dabdd1a389 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Wed, 1 Mar 2017 09:18:34 +0100
Subject: [PATCH 0413/1911] s390/cputime: reset all accounting fields on fork

copy_thread has to reset all cputime related field in the task struct,
not only user_timer and system_timer.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/process.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 54281660582cb1..249deafaa6ee8f 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -121,7 +121,10 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long new_stackp,
 	clear_tsk_thread_flag(p, TIF_SINGLE_STEP);
 	/* Initialize per thread user and system timer values */
 	p->thread.user_timer = 0;
+	p->thread.guest_timer = 0;
 	p->thread.system_timer = 0;
+	p->thread.hardirq_timer = 0;
+	p->thread.softirq_timer = 0;
 
 	frame->sf.back_chain = 0;
 	/* new return point is ret_from_fork */

From e53051e757d6cd66741955b93581e54415e48a70 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Wed, 1 Mar 2017 09:21:10 +0100
Subject: [PATCH 0414/1911] s390/cputime: provide archicture specific
 cputime_to_nsecs

The generic cputime_to_nsecs function first converts the cputime
to micro-seconds and then multiplies the result with 1000. This
looses some bits of accuracy, provide our own version of
cputime_to_nsecs that does not loose precision.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/cputime.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h
index e5672d6276a60d..9072bf63a84614 100644
--- a/arch/s390/include/asm/cputime.h
+++ b/arch/s390/include/asm/cputime.h
@@ -8,6 +8,7 @@
 #define _S390_CPUTIME_H
 
 #include <linux/types.h>
+#include <asm/timex.h>
 
 #define CPUTIME_PER_USEC 4096ULL
 #define CPUTIME_PER_SEC (CPUTIME_PER_USEC * USEC_PER_SEC)
@@ -17,13 +18,18 @@
 #define cmpxchg_cputime(ptr, old, new) cmpxchg64(ptr, old, new)
 
 /*
- * Convert cputime to microseconds and back.
+ * Convert cputime to microseconds.
  */
 static inline u64 cputime_to_usecs(const u64 cputime)
 {
 	return cputime >> 12;
 }
 
+/*
+ * Convert cputime to nanoseconds.
+ */
+#define cputime_to_nsecs(cputime) tod_to_ns(cputime)
+
 u64 arch_cpu_idle_time(int cpu);
 
 #define arch_idle_time(cpu) arch_cpu_idle_time(cpu)

From d03bd0454b101adb94d0b5a9cc11396182943cb4 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Wed, 1 Mar 2017 09:47:57 +0100
Subject: [PATCH 0415/1911] s390/timex: micro optimization for tod_to_ns

The conversion of a TOD value to nano-seconds currently uses a 32/32 bit
split with the calculation for "nsecs = (TOD * 125) >> 9". Using a
55/9 bit split saves an instruction.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/timex.h | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h
index 354344dcc19898..118535123f346d 100644
--- a/arch/s390/include/asm/timex.h
+++ b/arch/s390/include/asm/timex.h
@@ -206,20 +206,16 @@ static inline unsigned long long get_tod_clock_monotonic(void)
  *    ns = (todval * 125) >> 9;
  *
  * In order to avoid an overflow with the multiplication we can rewrite this.
- * With a split todval == 2^32 * th + tl (th upper 32 bits, tl lower 32 bits)
+ * With a split todval == 2^9 * th + tl (th upper 55 bits, tl lower 9 bits)
  * we end up with
  *
- *    ns = ((2^32 * th + tl) * 125 ) >> 9;
- * -> ns = (2^23 * th * 125) + ((tl * 125) >> 9);
+ *    ns = ((2^9 * th + tl) * 125 ) >> 9;
+ * -> ns = (th * 125) + ((tl * 125) >> 9);
  *
  */
 static inline unsigned long long tod_to_ns(unsigned long long todval)
 {
-	unsigned long long ns;
-
-	ns = ((todval >> 32) << 23) * 125;
-	ns += ((todval & 0xffffffff) * 125) >> 9;
-	return ns;
+	return ((todval >> 9) * 125) + (((todval & 0x1ff) * 125) >> 9);
 }
 
 #endif

From b28ace12661fbcfd90959c1e84ff5a85113a82a1 Mon Sep 17 00:00:00 2001
From: Franck Demathieu <fdemathieu@gmail.com>
Date: Thu, 23 Feb 2017 10:48:55 +0100
Subject: [PATCH 0416/1911] irqchip/crossbar: Fix incorrect type of local
 variables

The max and entry variables are unsigned according to the dt-bindings.
Fix following 3 sparse issues (-Wtypesign):

  drivers/irqchip/irq-crossbar.c:222:52: warning: incorrect type in argument 3 (different signedness)
  drivers/irqchip/irq-crossbar.c:222:52:    expected unsigned int [usertype] *out_value
  drivers/irqchip/irq-crossbar.c:222:52:    got int *<noident>

  drivers/irqchip/irq-crossbar.c:245:56: warning: incorrect type in argument 4 (different signedness)
  drivers/irqchip/irq-crossbar.c:245:56:    expected unsigned int [usertype] *out_value
  drivers/irqchip/irq-crossbar.c:245:56:    got int *<noident>

  drivers/irqchip/irq-crossbar.c:263:56: warning: incorrect type in argument 4 (different signedness)
  drivers/irqchip/irq-crossbar.c:263:56:    expected unsigned int [usertype] *out_value
  drivers/irqchip/irq-crossbar.c:263:56:    got int *<noident>

Signed-off-by: Franck Demathieu <fdemathieu@gmail.com>
Cc: marc.zyngier@arm.com
Cc: jason@lakedaemon.net
Link: http://lkml.kernel.org/r/20170223094855.6546-1-fdemathieu@gmail.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/irqchip/irq-crossbar.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/irqchip/irq-crossbar.c b/drivers/irqchip/irq-crossbar.c
index 1eef56a89b1fbf..05bbf171df3772 100644
--- a/drivers/irqchip/irq-crossbar.c
+++ b/drivers/irqchip/irq-crossbar.c
@@ -198,7 +198,8 @@ static const struct irq_domain_ops crossbar_domain_ops = {
 
 static int __init crossbar_of_init(struct device_node *node)
 {
-	int i, size, max = 0, reserved = 0, entry;
+	int i, size, reserved = 0;
+	u32 max = 0, entry;
 	const __be32 *irqsr;
 	int ret = -ENOMEM;
 

From bb3f0a52630c84807fca9bdd76ac2f5dcec82689 Mon Sep 17 00:00:00 2001
From: Dou Liyang <douly.fnst@cn.fujitsu.com>
Date: Tue, 28 Feb 2017 13:50:52 +0800
Subject: [PATCH 0417/1911] x86/apic: Fix a warning message in logical CPU IDs
 allocation

The current warning message in allocate_logical_cpuid() is somewhat confusing:

  Only 1 processors supported.Processor 2/0x2 and the rest are ignored.

As it might imply that there's only one CPU in the system - while what we ran
into here is a kernel limitation.

Fix the warning message to clarify all that:

  APIC: NR_CPUS/possible_cpus limit of 2 reached. Processor 2/0x2 and the rest are ignored.

( Also update the error return from -1 to -EINVAL, which is the more
  canonical return value. )

Signed-off-by: Dou Liyang <douly.fnst@cn.fujitsu.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: bp@alien8.de
Cc: nicstange@gmail.com
Cc: wanpeng.li@hotmail.com
Link: http://lkml.kernel.org/r/1488261052-25753-1-git-send-email-douly.fnst@cn.fujitsu.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/apic/apic.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 4261b3282ad99d..11088b86e5c716 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2062,10 +2062,10 @@ static int allocate_logical_cpuid(int apicid)
 
 	/* Allocate a new cpuid. */
 	if (nr_logical_cpuids >= nr_cpu_ids) {
-		WARN_ONCE(1, "Only %d processors supported."
+		WARN_ONCE(1, "APIC: NR_CPUS/possible_cpus limit of %i reached. "
 			     "Processor %d/0x%x and the rest are ignored.\n",
-			     nr_cpu_ids - 1, nr_logical_cpuids, apicid);
-		return -1;
+			     nr_cpu_ids, nr_logical_cpuids, apicid);
+		return -EINVAL;
 	}
 
 	cpuid_to_apicid[nr_logical_cpuids] = apicid;

From 11277aabcbbe13916151af897d29a5e9f71ca73f Mon Sep 17 00:00:00 2001
From: Dou Liyang <douly.fnst@cn.fujitsu.com>
Date: Thu, 23 Feb 2017 17:16:41 +0800
Subject: [PATCH 0418/1911] x86/apic: Simplify enable_IR_x2apic(), remove
 try_to_enable_IR()

The following commit:

  2e63ad4bd5dd ("x86/apic: Do not init irq remapping if ioapic is disabled")

... added a check for skipped IO-APIC setup to enable_IR_x2apic(), but this
check is also duplicated in try_to_enable_IR() - and it will never succeed in
calling irq_remapping_enable().

Remove the whole irq_remapping_enable() complication: if the IO-APIC is
disabled we cannot enable IRQ remapping.

Signed-off-by: Dou Liyang <douly.fnst@cn.fujitsu.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: bp@alien8.de
Cc: nicstange@gmail.com
Cc: wanpeng.li@hotmail.com
Link: http://lkml.kernel.org/r/1487841401-1543-1-git-send-email-douly.fnst@cn.fujitsu.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/apic/apic.c | 17 ++++-------------
 1 file changed, 4 insertions(+), 13 deletions(-)

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 11088b86e5c716..aee7deddabd089 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1610,24 +1610,15 @@ static inline void try_to_enable_x2apic(int remap_mode) { }
 static inline void __x2apic_enable(void) { }
 #endif /* !CONFIG_X86_X2APIC */
 
-static int __init try_to_enable_IR(void)
-{
-#ifdef CONFIG_X86_IO_APIC
-	if (!x2apic_enabled() && skip_ioapic_setup) {
-		pr_info("Not enabling interrupt remapping due to skipped IO-APIC setup\n");
-		return -1;
-	}
-#endif
-	return irq_remapping_enable();
-}
-
 void __init enable_IR_x2apic(void)
 {
 	unsigned long flags;
 	int ret, ir_stat;
 
-	if (skip_ioapic_setup)
+	if (skip_ioapic_setup) {
+		pr_info("Not enabling interrupt remapping due to skipped IO-APIC setup\n");
 		return;
+	}
 
 	ir_stat = irq_remapping_prepare();
 	if (ir_stat < 0 && !x2apic_supported())
@@ -1645,7 +1636,7 @@ void __init enable_IR_x2apic(void)
 
 	/* If irq_remapping_prepare() succeeded, try to enable it */
 	if (ir_stat >= 0)
-		ir_stat = try_to_enable_IR();
+		ir_stat = irq_remapping_enable();
 	/* ir_stat contains the remap mode or an error code */
 	try_to_enable_x2apic(ir_stat);
 

From 2a4d0c627f5374f365a873dea4e10ae0bb437680 Mon Sep 17 00:00:00 2001
From: Dmitry Safonov <dsafonov@virtuozzo.com>
Date: Mon, 13 Feb 2017 13:13:36 +0300
Subject: [PATCH 0419/1911] x86/selftests: Add clobbers for int80 on x86_64

Kernel erases R8..R11 registers prior returning to userspace
from int80:

  https://lkml.org/lkml/2009/10/1/164

GCC can reuse these registers and doesn't expect them to change
during syscall invocation. I met this kind of bug in CRIU once
GCC 6.1 and CLANG stored local variables in those registers
and the kernel zerofied them during syscall:

  https://github.com/xemul/criu/commit/990d33f1a1cdd17bca6c2eb059ab3be2564f7fa2

By that reason I suggest to add those registers to clobbers
in selftests.  Also, as noted by Andy - removed unneeded clobber
for flags in INT $0x80 inline asm.

Signed-off-by: Dmitry Safonov <dsafonov@virtuozzo.com>
Acked-by: Andy Lutomirski <luto@kernel.org>
Cc: 0x7f454c46@gmail.com
Cc: Borislav Petkov <bp@alien8.de>
Cc: Borislav Petkov <bp@suse.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kselftest@vger.kernel.org
Link: http://lkml.kernel.org/r/20170213101336.20486-1-dsafonov@virtuozzo.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/testing/selftests/x86/fsgsbase.c           |  2 +-
 tools/testing/selftests/x86/ldt_gdt.c            | 16 +++++++++++-----
 tools/testing/selftests/x86/ptrace_syscall.c     |  3 ++-
 .../testing/selftests/x86/single_step_syscall.c  |  5 ++++-
 4 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/tools/testing/selftests/x86/fsgsbase.c b/tools/testing/selftests/x86/fsgsbase.c
index 5b2b4b3c634ca1..b4967d87523655 100644
--- a/tools/testing/selftests/x86/fsgsbase.c
+++ b/tools/testing/selftests/x86/fsgsbase.c
@@ -245,7 +245,7 @@ void do_unexpected_base(void)
 		long ret;
 		asm volatile ("int $0x80"
 			      : "=a" (ret) : "a" (243), "b" (low_desc)
-			      : "flags");
+			      : "r8", "r9", "r10", "r11");
 		memcpy(&desc, low_desc, sizeof(desc));
 		munmap(low_desc, sizeof(desc));
 
diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c
index 4af47079cf0430..f6121612e769f5 100644
--- a/tools/testing/selftests/x86/ldt_gdt.c
+++ b/tools/testing/selftests/x86/ldt_gdt.c
@@ -45,6 +45,12 @@
 #define AR_DB			(1 << 22)
 #define AR_G			(1 << 23)
 
+#ifdef __x86_64__
+# define INT80_CLOBBERS "r8", "r9", "r10", "r11"
+#else
+# define INT80_CLOBBERS
+#endif
+
 static int nerrs;
 
 /* Points to an array of 1024 ints, each holding its own index. */
@@ -588,7 +594,7 @@ static int invoke_set_thread_area(void)
 	asm volatile ("int $0x80"
 		      : "=a" (ret), "+m" (low_user_desc) :
 			"a" (243), "b" (low_user_desc)
-		      : "flags");
+		      : INT80_CLOBBERS);
 	return ret;
 }
 
@@ -657,7 +663,7 @@ static void test_gdt_invalidation(void)
 			"+a" (eax)
 		      : "m" (low_user_desc_clear),
 			[arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear)
-		      : "flags");
+		      : INT80_CLOBBERS);
 
 	if (sel != 0) {
 		result = "FAIL";
@@ -688,7 +694,7 @@ static void test_gdt_invalidation(void)
 			"+a" (eax)
 		      : "m" (low_user_desc_clear),
 			[arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear)
-		      : "flags");
+		      : INT80_CLOBBERS);
 
 	if (sel != 0) {
 		result = "FAIL";
@@ -721,7 +727,7 @@ static void test_gdt_invalidation(void)
 			"+a" (eax)
 		      : "m" (low_user_desc_clear),
 			[arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear)
-		      : "flags");
+		      : INT80_CLOBBERS);
 
 #ifdef __x86_64__
 	syscall(SYS_arch_prctl, ARCH_GET_FS, &new_base);
@@ -774,7 +780,7 @@ static void test_gdt_invalidation(void)
 			"+a" (eax)
 		      : "m" (low_user_desc_clear),
 			[arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear)
-		      : "flags");
+		      : INT80_CLOBBERS);
 
 #ifdef __x86_64__
 	syscall(SYS_arch_prctl, ARCH_GET_GS, &new_base);
diff --git a/tools/testing/selftests/x86/ptrace_syscall.c b/tools/testing/selftests/x86/ptrace_syscall.c
index b037ce9cf116b1..eaea9243970840 100644
--- a/tools/testing/selftests/x86/ptrace_syscall.c
+++ b/tools/testing/selftests/x86/ptrace_syscall.c
@@ -58,7 +58,8 @@ static void do_full_int80(struct syscall_args32 *args)
 	asm volatile ("int $0x80"
 		      : "+a" (args->nr),
 			"+b" (args->arg0), "+c" (args->arg1), "+d" (args->arg2),
-			"+S" (args->arg3), "+D" (args->arg4), "+r" (bp));
+			"+S" (args->arg3), "+D" (args->arg4), "+r" (bp)
+			: : "r8", "r9", "r10", "r11");
 	args->arg5 = bp;
 #else
 	sys32_helper(args, int80_and_ret);
diff --git a/tools/testing/selftests/x86/single_step_syscall.c b/tools/testing/selftests/x86/single_step_syscall.c
index 50c26358e8b7ec..a48da95c18fdf1 100644
--- a/tools/testing/selftests/x86/single_step_syscall.c
+++ b/tools/testing/selftests/x86/single_step_syscall.c
@@ -56,9 +56,11 @@ static volatile sig_atomic_t sig_traps;
 #ifdef __x86_64__
 # define REG_IP REG_RIP
 # define WIDTH "q"
+# define INT80_CLOBBERS "r8", "r9", "r10", "r11"
 #else
 # define REG_IP REG_EIP
 # define WIDTH "l"
+# define INT80_CLOBBERS
 #endif
 
 static unsigned long get_eflags(void)
@@ -140,7 +142,8 @@ int main()
 
 	printf("[RUN]\tSet TF and check int80\n");
 	set_eflags(get_eflags() | X86_EFLAGS_TF);
-	asm volatile ("int $0x80" : "=a" (tmp) : "a" (SYS_getpid));
+	asm volatile ("int $0x80" : "=a" (tmp) : "a" (SYS_getpid)
+			: INT80_CLOBBERS);
 	check_result();
 
 	/*

From 6b0b7551428e4caae1e2c023a529465a9a9ae2d4 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Thu, 16 Feb 2017 17:00:50 +1100
Subject: [PATCH 0420/1911] perf/core: Rename CONFIG_[UK]PROBE_EVENT to
 CONFIG_[UK]PROBE_EVENTS

We have uses of CONFIG_UPROBE_EVENT and CONFIG_KPROBE_EVENT as
well as CONFIG_UPROBE_EVENTS and CONFIG_KPROBE_EVENTS.

Consistently use the plurals.

Signed-off-by: Anton Blanchard <anton@samba.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@kernel.org
Cc: alexander.shishkin@linux.intel.com
Cc: davem@davemloft.net
Cc: sparclinux@vger.kernel.org
Link: http://lkml.kernel.org/r/20170216060050.20866-1-anton@ozlabs.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 Documentation/trace/kprobetrace.txt         |  2 +-
 Documentation/trace/uprobetracer.txt        |  2 +-
 arch/powerpc/configs/85xx/kmp204x_defconfig |  2 +-
 arch/s390/configs/default_defconfig         |  2 +-
 arch/s390/configs/gcov_defconfig            |  2 +-
 arch/s390/configs/performance_defconfig     |  2 +-
 arch/s390/defconfig                         |  2 +-
 kernel/trace/Kconfig                        |  6 +++---
 kernel/trace/Makefile                       |  4 ++--
 kernel/trace/trace.c                        | 10 +++++-----
 kernel/trace/trace_probe.h                  |  4 ++--
 11 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt
index e4991fb1eedcd4..41ef9d8efe9517 100644
--- a/Documentation/trace/kprobetrace.txt
+++ b/Documentation/trace/kprobetrace.txt
@@ -12,7 +12,7 @@ kprobes can probe (this means, all functions body except for __kprobes
 functions). Unlike the Tracepoint based event, this can be added and removed
 dynamically, on the fly.
 
-To enable this feature, build your kernel with CONFIG_KPROBE_EVENT=y.
+To enable this feature, build your kernel with CONFIG_KPROBE_EVENTS=y.
 
 Similar to the events tracer, this doesn't need to be activated via
 current_tracer. Instead of that, add probe points via
diff --git a/Documentation/trace/uprobetracer.txt b/Documentation/trace/uprobetracer.txt
index fa7b680ee8a005..bf526a7c5559a8 100644
--- a/Documentation/trace/uprobetracer.txt
+++ b/Documentation/trace/uprobetracer.txt
@@ -7,7 +7,7 @@
 Overview
 --------
 Uprobe based trace events are similar to kprobe based trace events.
-To enable this feature, build your kernel with CONFIG_UPROBE_EVENT=y.
+To enable this feature, build your kernel with CONFIG_UPROBE_EVENTS=y.
 
 Similar to the kprobe-event tracer, this doesn't need to be activated via
 current_tracer. Instead of that, add probe points via
diff --git a/arch/powerpc/configs/85xx/kmp204x_defconfig b/arch/powerpc/configs/85xx/kmp204x_defconfig
index aaaaa609cd240a..34a4da23f000fd 100644
--- a/arch/powerpc/configs/85xx/kmp204x_defconfig
+++ b/arch/powerpc/configs/85xx/kmp204x_defconfig
@@ -210,7 +210,7 @@ CONFIG_DEBUG_SHIRQ=y
 CONFIG_DETECT_HUNG_TASK=y
 CONFIG_SCHEDSTATS=y
 CONFIG_RCU_TRACE=y
-CONFIG_UPROBE_EVENT=y
+CONFIG_UPROBE_EVENTS=y
 CONFIG_CRYPTO_NULL=y
 CONFIG_CRYPTO_PCBC=m
 CONFIG_CRYPTO_MD4=y
diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig
index 143b1e00b81849..4b176fe83da4c6 100644
--- a/arch/s390/configs/default_defconfig
+++ b/arch/s390/configs/default_defconfig
@@ -609,7 +609,7 @@ CONFIG_SCHED_TRACER=y
 CONFIG_FTRACE_SYSCALLS=y
 CONFIG_STACK_TRACER=y
 CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_UPROBE_EVENT=y
+CONFIG_UPROBE_EVENTS=y
 CONFIG_FUNCTION_PROFILER=y
 CONFIG_HIST_TRIGGERS=y
 CONFIG_TRACE_ENUM_MAP_FILE=y
diff --git a/arch/s390/configs/gcov_defconfig b/arch/s390/configs/gcov_defconfig
index f05d2d6e10872a..0de46cc397f6fe 100644
--- a/arch/s390/configs/gcov_defconfig
+++ b/arch/s390/configs/gcov_defconfig
@@ -560,7 +560,7 @@ CONFIG_SCHED_TRACER=y
 CONFIG_FTRACE_SYSCALLS=y
 CONFIG_STACK_TRACER=y
 CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_UPROBE_EVENT=y
+CONFIG_UPROBE_EVENTS=y
 CONFIG_FUNCTION_PROFILER=y
 CONFIG_HIST_TRIGGERS=y
 CONFIG_TRACE_ENUM_MAP_FILE=y
diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig
index 2358bf33c5efcf..e167557b434c20 100644
--- a/arch/s390/configs/performance_defconfig
+++ b/arch/s390/configs/performance_defconfig
@@ -558,7 +558,7 @@ CONFIG_SCHED_TRACER=y
 CONFIG_FTRACE_SYSCALLS=y
 CONFIG_STACK_TRACER=y
 CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_UPROBE_EVENT=y
+CONFIG_UPROBE_EVENTS=y
 CONFIG_FUNCTION_PROFILER=y
 CONFIG_HIST_TRIGGERS=y
 CONFIG_TRACE_ENUM_MAP_FILE=y
diff --git a/arch/s390/defconfig b/arch/s390/defconfig
index 68bfd09f1b02ec..97189dbaf34b2a 100644
--- a/arch/s390/defconfig
+++ b/arch/s390/defconfig
@@ -179,7 +179,7 @@ CONFIG_FTRACE_SYSCALLS=y
 CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y
 CONFIG_STACK_TRACER=y
 CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_UPROBE_EVENT=y
+CONFIG_UPROBE_EVENTS=y
 CONFIG_FUNCTION_PROFILER=y
 CONFIG_TRACE_ENUM_MAP_FILE=y
 CONFIG_KPROBES_SANITY_TEST=y
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index d5038005eb5dc0..d4a06e714645df 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -429,7 +429,7 @@ config BLK_DEV_IO_TRACE
 
 	  If unsure, say N.
 
-config KPROBE_EVENT
+config KPROBE_EVENTS
 	depends on KPROBES
 	depends on HAVE_REGS_AND_STACK_ACCESS_API
 	bool "Enable kprobes-based dynamic events"
@@ -447,7 +447,7 @@ config KPROBE_EVENT
 	  This option is also required by perf-probe subcommand of perf tools.
 	  If you want to use perf tools, this option is strongly recommended.
 
-config UPROBE_EVENT
+config UPROBE_EVENTS
 	bool "Enable uprobes-based dynamic events"
 	depends on ARCH_SUPPORTS_UPROBES
 	depends on MMU
@@ -466,7 +466,7 @@ config UPROBE_EVENT
 
 config BPF_EVENTS
 	depends on BPF_SYSCALL
-	depends on (KPROBE_EVENT || UPROBE_EVENT) && PERF_EVENTS
+	depends on (KPROBE_EVENTS || UPROBE_EVENTS) && PERF_EVENTS
 	bool
 	default y
 	help
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index e5798084554911..90f2701d92a7ee 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -57,7 +57,7 @@ obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
 obj-$(CONFIG_EVENT_TRACING) += trace_events_trigger.o
 obj-$(CONFIG_HIST_TRIGGERS) += trace_events_hist.o
 obj-$(CONFIG_BPF_EVENTS) += bpf_trace.o
-obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
+obj-$(CONFIG_KPROBE_EVENTS) += trace_kprobe.o
 obj-$(CONFIG_TRACEPOINTS) += power-traces.o
 ifeq ($(CONFIG_PM),y)
 obj-$(CONFIG_TRACEPOINTS) += rpm-traces.o
@@ -66,7 +66,7 @@ ifeq ($(CONFIG_TRACING),y)
 obj-$(CONFIG_KGDB_KDB) += trace_kdb.o
 endif
 obj-$(CONFIG_PROBE_EVENTS) += trace_probe.o
-obj-$(CONFIG_UPROBE_EVENT) += trace_uprobe.o
+obj-$(CONFIG_UPROBE_EVENTS) += trace_uprobe.o
 
 obj-$(CONFIG_TRACEPOINT_BENCHMARK) += trace_benchmark.o
 
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 707445ceb7efd4..f35109514a015c 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4341,22 +4341,22 @@ static const char readme_msg[] =
 	"\t\t\t  traces\n"
 #endif
 #endif /* CONFIG_STACK_TRACER */
-#ifdef CONFIG_KPROBE_EVENT
+#ifdef CONFIG_KPROBE_EVENTS
 	"  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
 #endif
-#ifdef CONFIG_UPROBE_EVENT
+#ifdef CONFIG_UPROBE_EVENTS
 	"  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
 #endif
-#if defined(CONFIG_KPROBE_EVENT) || defined(CONFIG_UPROBE_EVENT)
+#if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
 	"\t  accepts: event-definitions (one definition per line)\n"
 	"\t   Format: p|r[:[<group>/]<event>] <place> [<args>]\n"
 	"\t           -:[<group>/]<event>\n"
-#ifdef CONFIG_KPROBE_EVENT
+#ifdef CONFIG_KPROBE_EVENTS
 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
 #endif
-#ifdef CONFIG_UPROBE_EVENT
+#ifdef CONFIG_UPROBE_EVENTS
 	"\t    place: <path>:<offset>\n"
 #endif
 	"\t     args: <name>=fetcharg[:type]\n"
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index 0c0ae54d44c616..903273c93e6167 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -248,7 +248,7 @@ ASSIGN_FETCH_FUNC(file_offset, ftype),			\
 #define FETCH_TYPE_STRING	0
 #define FETCH_TYPE_STRSIZE	1
 
-#ifdef CONFIG_KPROBE_EVENT
+#ifdef CONFIG_KPROBE_EVENTS
 struct symbol_cache;
 unsigned long update_symbol_cache(struct symbol_cache *sc);
 void free_symbol_cache(struct symbol_cache *sc);
@@ -278,7 +278,7 @@ alloc_symbol_cache(const char *sym, long offset)
 {
 	return NULL;
 }
-#endif /* CONFIG_KPROBE_EVENT */
+#endif /* CONFIG_KPROBE_EVENTS */
 
 struct probe_arg {
 	struct fetch_param	fetch;

From 1b17c6df852851b40c3c27c66b8fa2fd99cf25d8 Mon Sep 17 00:00:00 2001
From: Andrew Banman <abanman@hpe.com>
Date: Fri, 17 Feb 2017 11:07:49 -0600
Subject: [PATCH 0421/1911] x86/platform/uv/BAU: Fix HUB errors by remove
 initial write to sw-ack register

Writing to the software acknowledge clear register when there are no
pending messages causes a HUB error to assert. The original intent of this
write was to clear the pending bits before start of operation, but this is
an incorrect method and has been determined to be unnecessary.

Signed-off-by: Andrew Banman <abanman@hpe.com>
Acked-by: Mike Travis <mike.travis@hpe.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: akpm@linux-foundation.org
Cc: rja@hpe.com
Cc: sivanich@hpe.com
Link: http://lkml.kernel.org/r/1487351269-181133-1-git-send-email-abanman@hpe.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/platform/uv/tlb_uv.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index 766d4d3529a1d9..f25982cdff9006 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -1847,7 +1847,6 @@ static void pq_init(int node, int pnode)
 
 	ops.write_payload_first(pnode, first);
 	ops.write_payload_last(pnode, last);
-	ops.write_g_sw_ack(pnode, 0xffffUL);
 
 	/* in effect, all msg_type's are set to MSG_NOOP */
 	memset(pqp, 0, sizeof(struct bau_pq_entry) * DEST_Q_SIZE);

From 90b28ded88dda8bea82b4a86923e73ba0746d884 Mon Sep 17 00:00:00 2001
From: Matjaz Hegedic <matjaz.hegedic@gmail.com>
Date: Sun, 19 Feb 2017 19:32:48 +0100
Subject: [PATCH 0422/1911] x86/reboot/quirks: Add ASUS EeeBook X205TA reboot
 quirk

Without the parameter reboot=a, ASUS EeeBook X205TA will hang when it should reboot.

This adds the appropriate quirk, thus fixing the problem.

Signed-off-by: Matjaz Hegedic <matjaz.hegedic@gmail.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/reboot.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index e244c19a2451aa..01c9cda3e1b77d 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -223,6 +223,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
 			DMI_MATCH(DMI_BOARD_NAME, "P4S800"),
 		},
 	},
+	{	/* Handle problems with rebooting on ASUS EeeBook X205TA */
+		.callback = set_acpi_reboot,
+		.ident = "ASUS EeeBook X205TA",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "X205TAW"),
+		},
+	},
 
 	/* Certec */
 	{       /* Handle problems with rebooting on Certec BPC600 */

From 73667e31a153a66da97feb1584726222504924f8 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 14 Feb 2017 22:17:17 +0100
Subject: [PATCH 0423/1911] x86/hyperv: Hide unused label

This new 32-bit warning just showed up:

arch/x86/hyperv/hv_init.c: In function 'hyperv_init':
arch/x86/hyperv/hv_init.c:167:1: error: label 'register_msr_cs' defined but not used [-Werror=unused-label]

The easiest solution is to move the label up into the existing #ifdef that
has the goto.

Fixes: dee863b571b0 ("hv: export current Hyper-V clocksource")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Stephen Hemminger <sthemmin@microsoft.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: devel@linuxdriverproject.org
Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Link: http://lkml.kernel.org/r/20170214211736.2641241-1-arnd@arndb.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/hyperv/hv_init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index db64baf0e500b4..8bef70e7f3cc6d 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -158,13 +158,13 @@ void hyperv_init(void)
 		clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100);
 		return;
 	}
+register_msr_cs:
 #endif
 	/*
 	 * For 32 bit guests just use the MSR based mechanism for reading
 	 * the partition counter.
 	 */
 
-register_msr_cs:
 	hyperv_cs = &hyperv_cs_msr;
 	if (ms_hyperv.features & HV_X64_MSR_TIME_REF_COUNT_AVAILABLE)
 		clocksource_register_hz(&hyperv_cs_msr, NSEC_PER_SEC/100);

From aa5ec3f715d576353c7d8f4f8085634bd845b73f Mon Sep 17 00:00:00 2001
From: Masanari Iida <standby24x7@gmail.com>
Date: Mon, 27 Feb 2017 21:29:22 +0900
Subject: [PATCH 0424/1911] x86/vmware: Remove duplicate inclusion of
 asm/timer.h

Signed-off-by: Masanari Iida <standby24x7@gmail.com>
Cc: akataria@vmware.com
Cc: virtualization@lists.linux-foundation.org
Link: http://lkml.kernel.org/r/20170227122922.26230-1-standby24x7@gmail.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/vmware.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 891f4dad7b2c49..22403a28caf522 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -30,7 +30,6 @@
 #include <asm/hypervisor.h>
 #include <asm/timer.h>
 #include <asm/apic.h>
-#include <asm/timer.h>
 
 #undef pr_fmt
 #define pr_fmt(fmt)	"vmware: " fmt

From e86a2d2d34e20289ae7d46692e16d43c3a785db9 Mon Sep 17 00:00:00 2001
From: Masanari Iida <standby24x7@gmail.com>
Date: Mon, 27 Feb 2017 22:07:03 +0900
Subject: [PATCH 0425/1911] x86/intel_rdt: Remove duplicate inclusion of
 linux/cpu.h

Signed-off-by: Masanari Iida <standby24x7@gmail.com>
Cc: fenghua.yu@intel.com
Link: http://lkml.kernel.org/r/20170227130703.26968-1-standby24x7@gmail.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index 8af04afdfcb964..759577d9d16658 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -27,7 +27,6 @@
 #include <linux/seq_file.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
-#include <linux/cpu.h>
 #include <linux/task_work.h>
 
 #include <uapi/linux/magic.h>

From 153654dbe595a68845ba14d5b0bfe299fa6a7e99 Mon Sep 17 00:00:00 2001
From: Rui Wang <rui.y.wang@intel.com>
Date: Tue, 28 Feb 2017 21:34:28 +0800
Subject: [PATCH 0426/1911] x86/PCI: Implement pcibios_release_device to
 release IRQ from IOAPIC

The revert of 991de2e59090 ("PCI, x86: Implement pcibios_alloc_irq()
and pcibios_free_irq()") causes a problem for IOAPIC hotplug. The
problem is that IRQs are allocated and freed in pci_enable_device()
and pci_disable_device(). But there are some drivers which don't call
pci_disable_device(), and they have good reasons not calling it, so
if they're using IOAPIC their IRQs won't have a chance to be released
from the IOAPIC. When this happens IOAPIC hot-removal fails with a
kernel stack dump and an error message like this:

  [149335.697989] pin16 on IOAPIC2 is still in use.

It turns out that we can fix it in a different way without moving IRQ
allocation into pcibios_alloc_irq(), thus avoiding the regression of
991de2e59090. We can keep the allocation and freeing of IRQs as is
within pci_enable_device()/pci_disable_device(), without breaking any
previous assumption of the rest of the system, keeping compatibility
with both the legacy and the modern drivers. We can accomplish this by
implementing the existing __weak hook of pcibios_release_device() thus
when a pci device is about to be deleted we get notified in the hook
and take the chance to release its IRQ, if any, from the IOAPIC.

Implement pcibios_release_device() for x86 to release any IRQ not released
by the driver.

Signed-off-by: Rui Wang <rui.y.wang@intel.com>
Cc: tony.luck@intel.com
Cc: linux-pci@vger.kernel.org
Cc: rjw@rjwysocki.net
Cc: linux-acpi@vger.kernel.org
Cc: fengguang.wu@intel.com
Cc: helgaas@kernel.org
Cc: kbuild-all@01.org
Cc: bhelgaas@google.com
Link: http://lkml.kernel.org/r/1488288869-31290-2-git-send-email-rui.y.wang@intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/pci/common.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 0cb52ae0a8f075..190e718694b172 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -735,6 +735,15 @@ void pcibios_disable_device (struct pci_dev *dev)
 		pcibios_disable_irq(dev);
 }
 
+#ifdef CONFIG_ACPI_HOTPLUG_IOAPIC
+void pcibios_release_device(struct pci_dev *dev)
+{
+	if (atomic_dec_return(&dev->enable_cnt) >= 0)
+		pcibios_disable_device(dev);
+
+}
+#endif
+
 int pci_ext_cfg_avail(void)
 {
 	if (raw_pci_ext_ops)

From f2ae5da726172fcf82f7be801489dd585f6a38eb Mon Sep 17 00:00:00 2001
From: Rui Wang <rui.y.wang@intel.com>
Date: Tue, 28 Feb 2017 21:34:29 +0800
Subject: [PATCH 0427/1911] x86/ioapic: Split IOAPIC hot-removal into two steps

The hot removal of IOAPIC is handling PCI and ACPI removal in one go. That
only works when the PCI drivers released the interrupt resources, but
breaks when a IOAPIC interrupt is still associated to a PCI device.

The new pcibios_release_device() callback allows to solve that problem by
splitting the removal into two steps:

1) PCI removal:

   Release all PCI resources including eventually not yet released IOAPIC
   interrupts via the new pcibios_release_device() callback.

2) ACPI removal:

   After release of all PCI resources the ACPI resources can be released
   without issue.

[ tglx: Rewrote changelog ]

Signed-off-by: Rui Wang <rui.y.wang@intel.com>
Cc: tony.luck@intel.com
Cc: linux-pci@vger.kernel.org
Cc: rjw@rjwysocki.net
Cc: linux-acpi@vger.kernel.org
Cc: fengguang.wu@intel.com
Cc: helgaas@kernel.org
Cc: kbuild-all@01.org
Cc: bhelgaas@google.com
Link: http://lkml.kernel.org/r/1488288869-31290-3-git-send-email-rui.y.wang@intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/acpi/internal.h |  2 ++
 drivers/acpi/ioapic.c   | 22 ++++++++++++++++------
 drivers/acpi/pci_root.c |  4 ++--
 3 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index 219b90bc092297..f15900132912a4 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h
@@ -41,8 +41,10 @@ void acpi_gpe_apply_masked_gpes(void);
 void acpi_container_init(void);
 void acpi_memory_hotplug_init(void);
 #ifdef	CONFIG_ACPI_HOTPLUG_IOAPIC
+void pci_ioapic_remove(struct acpi_pci_root *root);
 int acpi_ioapic_remove(struct acpi_pci_root *root);
 #else
+static inline void pci_ioapic_remove(struct acpi_pci_root *root) { return; }
 static inline int acpi_ioapic_remove(struct acpi_pci_root *root) { return 0; }
 #endif
 #ifdef CONFIG_ACPI_DOCK
diff --git a/drivers/acpi/ioapic.c b/drivers/acpi/ioapic.c
index 6d7ce6e12aaa66..1120dfd625b8a1 100644
--- a/drivers/acpi/ioapic.c
+++ b/drivers/acpi/ioapic.c
@@ -206,24 +206,34 @@ int acpi_ioapic_add(acpi_handle root_handle)
 	return ACPI_SUCCESS(status) && ACPI_SUCCESS(retval) ? 0 : -ENODEV;
 }
 
-int acpi_ioapic_remove(struct acpi_pci_root *root)
+void pci_ioapic_remove(struct acpi_pci_root *root)
 {
-	int retval = 0;
 	struct acpi_pci_ioapic *ioapic, *tmp;
 
 	mutex_lock(&ioapic_list_lock);
 	list_for_each_entry_safe(ioapic, tmp, &ioapic_list, list) {
 		if (root->device->handle != ioapic->root_handle)
 			continue;
-
-		if (acpi_unregister_ioapic(ioapic->handle, ioapic->gsi_base))
-			retval = -EBUSY;
-
 		if (ioapic->pdev) {
 			pci_release_region(ioapic->pdev, 0);
 			pci_disable_device(ioapic->pdev);
 			pci_dev_put(ioapic->pdev);
 		}
+	}
+	mutex_unlock(&ioapic_list_lock);
+}
+
+int acpi_ioapic_remove(struct acpi_pci_root *root)
+{
+	int retval = 0;
+	struct acpi_pci_ioapic *ioapic, *tmp;
+
+	mutex_lock(&ioapic_list_lock);
+	list_for_each_entry_safe(ioapic, tmp, &ioapic_list, list) {
+		if (root->device->handle != ioapic->root_handle)
+			continue;
+		if (acpi_unregister_ioapic(ioapic->handle, ioapic->gsi_base))
+			retval = -EBUSY;
 		if (ioapic->res.flags && ioapic->res.parent)
 			release_resource(&ioapic->res);
 		list_del(&ioapic->list);
diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index bf601d4df8cfcb..919be0aa257876 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -648,12 +648,12 @@ static void acpi_pci_root_remove(struct acpi_device *device)
 
 	pci_stop_root_bus(root->bus);
 
-	WARN_ON(acpi_ioapic_remove(root));
-
+	pci_ioapic_remove(root);
 	device_set_run_wake(root->bus->bridge, false);
 	pci_acpi_remove_bus_pm_notifier(device);
 
 	pci_remove_root_bus(root->bus);
+	WARN_ON(acpi_ioapic_remove(root));
 
 	dmar_device_remove(device->handle);
 

From 58ab9a088ddac4efe823471275859d64f735577e Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave.hansen@linux.intel.com>
Date: Thu, 23 Feb 2017 14:26:03 -0800
Subject: [PATCH 0428/1911] x86/pkeys: Check against max pkey to avoid
 overflows

Kirill reported a warning from UBSAN about undefined behavior when using
protection keys.  He is running on hardware that actually has support for
it, which is not widely available.

The warning triggers because of very large shifts of integers when doing a
pkey_free() of a large, invalid value. This happens because we never check
that the pkey "fits" into the mm_pkey_allocation_map().

I do not believe there is any danger here of anything bad happening
other than some aliasing issues where somebody could do:

	pkey_free(35);

and the kernel would effectively execute:

	pkey_free(8);

While this might be confusing to an app that was doing something stupid, it
has to do something stupid and the effects are limited to the app shooting
itself in the foot.

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Cc: stable@vger.kernel.org
Cc: linux-kselftest@vger.kernel.org
Cc: shuah@kernel.org
Cc: kirill.shutemov@linux.intel.com
Link: http://lkml.kernel.org/r/20170223222603.A022ED65@viggo.jf.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/include/asm/pkeys.h | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/pkeys.h b/arch/x86/include/asm/pkeys.h
index 34684adb6899ad..b3b09b98896d52 100644
--- a/arch/x86/include/asm/pkeys.h
+++ b/arch/x86/include/asm/pkeys.h
@@ -46,6 +46,15 @@ extern int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
 static inline
 bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey)
 {
+	/*
+	 * "Allocated" pkeys are those that have been returned
+	 * from pkey_alloc().  pkey 0 is special, and never
+	 * returned from pkey_alloc().
+	 */
+	if (pkey <= 0)
+		return false;
+	if (pkey >= arch_max_pkey())
+		return false;
 	return mm_pkey_allocation_map(mm) & (1U << pkey);
 }
 
@@ -82,12 +91,6 @@ int mm_pkey_alloc(struct mm_struct *mm)
 static inline
 int mm_pkey_free(struct mm_struct *mm, int pkey)
 {
-	/*
-	 * pkey 0 is special, always allocated and can never
-	 * be freed.
-	 */
-	if (!pkey)
-		return -EINVAL;
 	if (!mm_pkey_is_allocated(mm, pkey))
 		return -EINVAL;
 

From 940b2f2fd963c043418ce8af64605783b2b19140 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Sat, 18 Feb 2017 12:31:40 +0100
Subject: [PATCH 0429/1911] x86/events: Remove last remnants of old filenames

Update to the new file paths, remove them from introductory comments.

Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20170218113140.8051-1-bp@alien8.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/events/amd/core.c                               | 2 +-
 arch/x86/events/intel/cstate.c                           | 2 +-
 arch/x86/events/intel/rapl.c                             | 2 +-
 arch/x86/events/intel/uncore.h                           | 6 +++---
 tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c | 2 +-
 5 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index afb222b63caeb0..c84584bb940280 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -604,7 +604,7 @@ amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, int idx,
 			return &amd_f15_PMC20;
 		}
 	case AMD_EVENT_NB:
-		/* moved to perf_event_amd_uncore.c */
+		/* moved to uncore.c */
 		return &emptyconstraint;
 	default:
 		return &emptyconstraint;
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index aff4b5b69d4021..238ae3248ba559 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -1,5 +1,5 @@
 /*
- * perf_event_intel_cstate.c: support cstate residency counters
+ * Support cstate residency counters
  *
  * Copyright (C) 2015, Intel Corp.
  * Author: Kan Liang (kan.liang@intel.com)
diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index 22054ca4902651..9d05c7e67f6073 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -1,5 +1,5 @@
 /*
- * perf_event_intel_rapl.c: support Intel RAPL energy consumption counters
+ * Support Intel RAPL energy consumption counters
  * Copyright (C) 2013 Google, Inc., Stephane Eranian
  *
  * Intel RAPL interface is specified in the IA-32 Manual Vol3b
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index ad986c1e29bccd..df5989f27b1b65 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -360,7 +360,7 @@ extern struct list_head pci2phy_map_head;
 extern struct pci_extra_dev *uncore_extra_pci_dev;
 extern struct event_constraint uncore_constraint_empty;
 
-/* perf_event_intel_uncore_snb.c */
+/* uncore_snb.c */
 int snb_uncore_pci_init(void);
 int ivb_uncore_pci_init(void);
 int hsw_uncore_pci_init(void);
@@ -371,7 +371,7 @@ void nhm_uncore_cpu_init(void);
 void skl_uncore_cpu_init(void);
 int snb_pci2phy_map_init(int devid);
 
-/* perf_event_intel_uncore_snbep.c */
+/* uncore_snbep.c */
 int snbep_uncore_pci_init(void);
 void snbep_uncore_cpu_init(void);
 int ivbep_uncore_pci_init(void);
@@ -385,5 +385,5 @@ void knl_uncore_cpu_init(void);
 int skx_uncore_pci_init(void);
 void skx_uncore_cpu_init(void);
 
-/* perf_event_intel_uncore_nhmex.c */
+/* uncore_nhmex.c */
 void nhmex_uncore_cpu_init(void);
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
index 7913363bde5c04..4f3c758d875d6c 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
@@ -31,7 +31,7 @@
 #error Instruction buffer size too small
 #endif
 
-/* Based on branch_type() from perf_event_intel_lbr.c */
+/* Based on branch_type() from arch/x86/events/intel/lbr.c */
 static void intel_pt_insn_decoder(struct insn *insn,
 				  struct intel_pt_insn *intel_pt_insn)
 {

From 72042a8c7b01048a36ece216aaf206b7d60ca661 Mon Sep 17 00:00:00 2001
From: "Tobin C. Harding" <me@tobin.cc>
Date: Mon, 20 Feb 2017 10:12:35 +1100
Subject: [PATCH 0430/1911] x86/purgatory: Make functions and variables static

Sparse emits several 'symbol not declared' warnings for various
functions and variables.

Add static keyword to functions and variables which have file scope
only.

Signed-off-by: Tobin C. Harding <me@tobin.cc>
Link: http://lkml.kernel.org/r/1487545956-2547-2-git-send-email-me@tobin.cc
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/purgatory/purgatory.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/x86/purgatory/purgatory.c b/arch/x86/purgatory/purgatory.c
index 25e068ba338214..2a5f4373c797a4 100644
--- a/arch/x86/purgatory/purgatory.c
+++ b/arch/x86/purgatory/purgatory.c
@@ -18,11 +18,11 @@ struct sha_region {
 	unsigned long len;
 };
 
-unsigned long backup_dest = 0;
-unsigned long backup_src = 0;
-unsigned long backup_sz = 0;
+static unsigned long backup_dest;
+static unsigned long backup_src;
+static unsigned long backup_sz;
 
-u8 sha256_digest[SHA256_DIGEST_SIZE] = { 0 };
+static u8 sha256_digest[SHA256_DIGEST_SIZE] = { 0 };
 
 struct sha_region sha_regions[16] = {};
 
@@ -39,7 +39,7 @@ static int copy_backup_region(void)
 	return 0;
 }
 
-int verify_sha256_digest(void)
+static int verify_sha256_digest(void)
 {
 	struct sha_region *ptr, *end;
 	u8 digest[SHA256_DIGEST_SIZE];

From e98fe5127b9cc8ab33d1094b81c19deb1f9082bf Mon Sep 17 00:00:00 2001
From: "Tobin C. Harding" <me@tobin.cc>
Date: Mon, 20 Feb 2017 10:12:36 +1100
Subject: [PATCH 0431/1911] x86/purgatory: Fix sparse warning, symbol not
 declared

Sparse emits warning, 'symbol not declared' for a function that has
neither file scope nor a forward declaration. The functions only call
site is an ASM file.

Add a header file with the function declaration. Include the header file in
the C source file defining the function in order to fix the sparse
warning. Include the header file in ASM file containing the call site to
document the usage.

Signed-off-by: Tobin C. Harding <me@tobin.cc>
Link: http://lkml.kernel.org/r/1487545956-2547-3-git-send-email-me@tobin.cc
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/purgatory/purgatory.c    | 1 +
 arch/x86/purgatory/purgatory.h    | 8 ++++++++
 arch/x86/purgatory/setup-x86_64.S | 1 +
 3 files changed, 10 insertions(+)
 create mode 100644 arch/x86/purgatory/purgatory.h

diff --git a/arch/x86/purgatory/purgatory.c b/arch/x86/purgatory/purgatory.c
index 2a5f4373c797a4..b6d5c8946e664a 100644
--- a/arch/x86/purgatory/purgatory.c
+++ b/arch/x86/purgatory/purgatory.c
@@ -11,6 +11,7 @@
  */
 
 #include "sha256.h"
+#include "purgatory.h"
 #include "../boot/string.h"
 
 struct sha_region {
diff --git a/arch/x86/purgatory/purgatory.h b/arch/x86/purgatory/purgatory.h
new file mode 100644
index 00000000000000..e2e365a6c192a1
--- /dev/null
+++ b/arch/x86/purgatory/purgatory.h
@@ -0,0 +1,8 @@
+#ifndef PURGATORY_H
+#define PURGATORY_H
+
+#ifndef __ASSEMBLY__
+extern void purgatory(void);
+#endif	/* __ASSEMBLY__ */
+
+#endif /* PURGATORY_H */
diff --git a/arch/x86/purgatory/setup-x86_64.S b/arch/x86/purgatory/setup-x86_64.S
index fe3c91ba1bd0c6..f90e9dfa90bb92 100644
--- a/arch/x86/purgatory/setup-x86_64.S
+++ b/arch/x86/purgatory/setup-x86_64.S
@@ -9,6 +9,7 @@
  * This source code is licensed under the GNU General Public License,
  * Version 2.  See the file COPYING for more details.
  */
+#include "purgatory.h"
 
 	.text
 	.globl purgatory_start

From 8392f16d38bb5222c03073a3906b7fd272386faf Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Tue, 21 Feb 2017 19:36:39 +0100
Subject: [PATCH 0432/1911] x86/boot: Correct setup_header.start_sys name

It is called start_sys_seg elsewhere so rename it to that. It is an
obsolete field so we could just as well directly call it __u16 __pad...

No functional change.

Signed-off-by: Borislav Petkov <bp@suse.de>
Link: http://lkml.kernel.org/r/20170221183639.16554-1-bp@alien8.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/include/uapi/asm/bootparam.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h
index 5138dacf8bb836..07244ea16765a6 100644
--- a/arch/x86/include/uapi/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h
@@ -58,7 +58,7 @@ struct setup_header {
 	__u32	header;
 	__u16	version;
 	__u32	realmode_swtch;
-	__u16	start_sys;
+	__u16	start_sys_seg;
 	__u16	kernel_version;
 	__u8	type_of_loader;
 	__u8	loadflags;

From 1fb1683cb343d80736625f3048de2107cf5bbf79 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 28 Feb 2017 14:36:56 +0000
Subject: [PATCH 0433/1911] crypto: arm/crc32 - fix build error with outdated
 binutils

Annotate a vmov instruction with an explicit element size of 32 bits.
This is inferred by recent toolchains, but apparently, older versions
need some help figuring this out.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm/crypto/crc32-ce-core.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/crypto/crc32-ce-core.S b/arch/arm/crypto/crc32-ce-core.S
index e63d400dc5c14b..5cbd4a6fedad7c 100644
--- a/arch/arm/crypto/crc32-ce-core.S
+++ b/arch/arm/crypto/crc32-ce-core.S
@@ -135,7 +135,7 @@ ENTRY(crc32c_pmull_le)
 	vld1.8		{q3-q4}, [BUF, :128]!
 	vmov.i8		qzr, #0
 	vmov.i8		qCONSTANT, #0
-	vmov		dCONSTANTl[0], CRC
+	vmov.32		dCONSTANTl[0], CRC
 	veor.8		d2, d2, dCONSTANTl
 	sub		LEN, LEN, #0x40
 	cmp		LEN, #0x40

From efa7cebdbfde8506b54acd8947822394768cd476 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 28 Feb 2017 14:36:57 +0000
Subject: [PATCH 0434/1911] crypto: arm/crc32 - add build time test for CRC
 instruction support

The accelerated CRC32 module for ARM may use either the scalar CRC32
instructions, the NEON 64x64 to 128 bit polynomial multiplication
(vmull.p64) instruction, or both, depending on what the current CPU
supports.

However, this also requires support in binutils, and as it turns out,
versions of binutils exist that support the vmull.p64 instruction but
not the crc32 instructions.

So refactor the Makefile logic so that this module only gets built if
binutils has support for both.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Jon Hunter <jonathanh@nvidia.com>
Tested-by: Jon Hunter <jonathanh@nvidia.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm/crypto/Makefile | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile
index 1822c4697278cb..f2215fbeed138f 100644
--- a/arch/arm/crypto/Makefile
+++ b/arch/arm/crypto/Makefile
@@ -15,7 +15,17 @@ ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o
 ce-obj-$(CONFIG_CRYPTO_SHA2_ARM_CE) += sha2-arm-ce.o
 ce-obj-$(CONFIG_CRYPTO_GHASH_ARM_CE) += ghash-arm-ce.o
 ce-obj-$(CONFIG_CRYPTO_CRCT10DIF_ARM_CE) += crct10dif-arm-ce.o
-ce-obj-$(CONFIG_CRYPTO_CRC32_ARM_CE) += crc32-arm-ce.o
+crc-obj-$(CONFIG_CRYPTO_CRC32_ARM_CE) += crc32-arm-ce.o
+
+ifneq ($(crc-obj-y)$(crc-obj-m),)
+ifeq ($(call as-instr,.arch armv8-a\n.arch_extension crc,y,n),y)
+ce-obj-y += $(crc-obj-y)
+ce-obj-m += $(crc-obj-m)
+else
+$(warning These CRC Extensions modules need binutils 2.23 or higher)
+$(warning $(crc-obj-y) $(crc-obj-m))
+endif
+endif
 
 ifneq ($(ce-obj-y)$(ce-obj-m),)
 ifeq ($(call as-instr,.fpu crypto-neon-fp-armv8,y,n),y)

From 1c68bb0f62bf8de8bb30123ea840d5168f25abea Mon Sep 17 00:00:00 2001
From: Laura Abbott <labbott@redhat.com>
Date: Tue, 28 Feb 2017 14:07:25 -0800
Subject: [PATCH 0435/1911] crypto: testmgr - Pad aes_ccm_enc_tv_template
 vector

Running with KASAN and crypto tests currently gives

 BUG: KASAN: global-out-of-bounds in __test_aead+0x9d9/0x2200 at addr ffffffff8212fca0
 Read of size 16 by task cryptomgr_test/1107
 Address belongs to variable 0xffffffff8212fca0
 CPU: 0 PID: 1107 Comm: cryptomgr_test Not tainted 4.10.0+ #45
 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.9.1-1.fc24 04/01/2014
 Call Trace:
  dump_stack+0x63/0x8a
  kasan_report.part.1+0x4a7/0x4e0
  ? __test_aead+0x9d9/0x2200
  ? crypto_ccm_init_crypt+0x218/0x3c0 [ccm]
  kasan_report+0x20/0x30
  check_memory_region+0x13c/0x1a0
  memcpy+0x23/0x50
  __test_aead+0x9d9/0x2200
  ? kasan_unpoison_shadow+0x35/0x50
  ? alg_test_akcipher+0xf0/0xf0
  ? crypto_skcipher_init_tfm+0x2e3/0x310
  ? crypto_spawn_tfm2+0x37/0x60
  ? crypto_ccm_init_tfm+0xa9/0xd0 [ccm]
  ? crypto_aead_init_tfm+0x7b/0x90
  ? crypto_alloc_tfm+0xc4/0x190
  test_aead+0x28/0xc0
  alg_test_aead+0x54/0xd0
  alg_test+0x1eb/0x3d0
  ? alg_find_test+0x90/0x90
  ? __sched_text_start+0x8/0x8
  ? __wake_up_common+0x70/0xb0
  cryptomgr_test+0x4d/0x60
  kthread+0x173/0x1c0
  ? crypto_acomp_scomp_free_ctx+0x60/0x60
  ? kthread_create_on_node+0xa0/0xa0
  ret_from_fork+0x2c/0x40
 Memory state around the buggy address:
  ffffffff8212fb80: 00 00 00 00 01 fa fa fa fa fa fa fa 00 00 00 00
  ffffffff8212fc00: 00 01 fa fa fa fa fa fa 00 00 00 00 01 fa fa fa
 >ffffffff8212fc80: fa fa fa fa 00 05 fa fa fa fa fa fa 00 00 00 00
                                   ^
  ffffffff8212fd00: 01 fa fa fa fa fa fa fa 00 00 00 00 01 fa fa fa
  ffffffff8212fd80: fa fa fa fa 00 00 00 00 00 05 fa fa fa fa fa fa

This always happens on the same IV which is less than 16 bytes.

Per Ard,

"CCM IVs are 16 bytes, but due to the way they are constructed
internally, the final couple of bytes of input IV are dont-cares.

Apparently, we do read all 16 bytes, which triggers the KASAN errors."

Fix this by padding the IV with null bytes to be at least 16 bytes.

Cc: stable@vger.kernel.org
Fixes: 0bc5a6c5c79a ("crypto: testmgr - Disable rfc4309 test and convert
test vectors")
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Laura Abbott <labbott@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/testmgr.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index f85e51cf7dcca6..663f034c89b935 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -22691,7 +22691,7 @@ static struct aead_testvec aes_ccm_enc_tv_template[] = {
 			  "\x09\x75\x9a\x9b\x3c\x9b\x27\x39",
 		.klen	= 32,
 		.iv	= "\x03\xf9\xd9\x4e\x63\xb5\x3d\x9d"
-			  "\x43\xf6\x1e\x50",
+			  "\x43\xf6\x1e\x50\0\0\0\0",
 		.assoc	= "\x57\xf5\x6b\x8b\x57\x5c\x3d\x3b"
 			  "\x13\x02\x01\x0c\x83\x4c\x96\x35"
 			  "\x8e\xd6\x39\xcf\x7d\x14\x9b\x94"

From 25b68a8f0ab13a98de02650208ec927796659898 Mon Sep 17 00:00:00 2001
From: Stephen Smalley <sds@tycho.nsa.gov>
Date: Fri, 17 Feb 2017 10:13:59 -0500
Subject: [PATCH 0436/1911] timerfd: Only check CAP_WAKE_ALARM when it is
 needed

timerfd_create() and do_timerfd_settime() evaluate capable(CAP_WAKE_ALARM)
unconditionally although CAP_WAKE_ALARM is only required for
CLOCK_REALTIME_ALARM and CLOCK_BOOTTIME_ALARM.

This can cause extraneous audit messages when using a LSM such as SELinux,
incorrectly causes PF_SUPERPRIV to be set even when no privilege was
exercised, and is inefficient.

Flip the order of the tests in both functions so that we only call
capable() if the capability is truly required for the operation.

Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Cc: linux-security-module@vger.kernel.org
Cc: selinux@tycho.nsa.gov
Link: http://lkml.kernel.org/r/1487344439-22293-1-git-send-email-sds@tycho.nsa.gov
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 fs/timerfd.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/timerfd.c b/fs/timerfd.c
index 384fa759a56334..c543cdb5f8ed9b 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -400,9 +400,9 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
 	     clockid != CLOCK_BOOTTIME_ALARM))
 		return -EINVAL;
 
-	if (!capable(CAP_WAKE_ALARM) &&
-	    (clockid == CLOCK_REALTIME_ALARM ||
-	     clockid == CLOCK_BOOTTIME_ALARM))
+	if ((clockid == CLOCK_REALTIME_ALARM ||
+	     clockid == CLOCK_BOOTTIME_ALARM) &&
+	    !capable(CAP_WAKE_ALARM))
 		return -EPERM;
 
 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
@@ -449,7 +449,7 @@ static int do_timerfd_settime(int ufd, int flags,
 		return ret;
 	ctx = f.file->private_data;
 
-	if (!capable(CAP_WAKE_ALARM) && isalarm(ctx)) {
+	if (isalarm(ctx) && !capable(CAP_WAKE_ALARM)) {
 		fdput(f);
 		return -EPERM;
 	}

From bba82fd75694832b59433bf4e9d7ede8de1dfd42 Mon Sep 17 00:00:00 2001
From: Robert O'Callahan <robert@ocallahan.org>
Date: Wed, 1 Feb 2017 17:06:11 +1300
Subject: [PATCH 0437/1911] KVM: x86: never specify a sample period for
 virtualized in_tx_cp counters
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

pmc_reprogram_counter() always sets a sample period based on the value of
pmc->counter. However, hsw_hw_config() rejects sample periods less than
2^31 - 1. So for example, if a KVM guest does

    struct perf_event_attr attr;
    memset(&attr, 0, sizeof(attr));
    attr.type = PERF_TYPE_RAW;
    attr.size = sizeof(attr);
    attr.config = 0x2005101c4; // conditional branches retired IN_TXCP
    attr.sample_period = 0;
    int fd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, 0);
    ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
    ioctl(fd, PERF_EVENT_IOC_ENABLE, 0);

the guest kernel counts some conditional branch events, then updates the
virtual PMU register with a nonzero count. The host reaches
pmc_reprogram_counter() with nonzero pmc->counter, triggers EOPNOTSUPP
in hsw_hw_config(), prints "kvm_pmu: event creation failed" in
pmc_reprogram_counter(), and silently (from the guest's point of view) stops
counting events.

We fix event counting by forcing attr.sample_period to always be zero for
in_tx_cp counters. Sampling doesn't work, but it already didn't work and
can't be fixed without major changes to the approach in hsw_hw_config().

Signed-off-by: Robert O'Callahan <robert@ocallahan.org>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/pmu.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 06ce377dcbc9ff..026db42a86c323 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -113,12 +113,19 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
 		.config = config,
 	};
 
+	attr.sample_period = (-pmc->counter) & pmc_bitmask(pmc);
+
 	if (in_tx)
 		attr.config |= HSW_IN_TX;
-	if (in_tx_cp)
+	if (in_tx_cp) {
+		/*
+		 * HSW_IN_TX_CHECKPOINTED is not supported with nonzero
+		 * period. Just clear the sample period so at least
+		 * allocating the counter doesn't fail.
+		 */
+		attr.sample_period = 0;
 		attr.config |= HSW_IN_TX_CHECKPOINTED;
-
-	attr.sample_period = (-pmc->counter) & pmc_bitmask(pmc);
+	}
 
 	event = perf_event_create_kernel_counter(&attr, -1, current,
 						 intr ? kvm_perf_overflow_intr :

From d0e32fba85d5b7902db2715a9d5540fb0efdba83 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 28 Feb 2017 22:01:18 +0100
Subject: [PATCH 0438/1911] watchdog: wm831x watchdog really needs mfd

The wm831x watchdog driver can now be built without the wm831x mfd
driver, which results in a link error:

(.text+0x1a95c): undefined reference to `wm831x_set_bits'
(.text+0x1a95c): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `wm831x_set_bits'
(.text+0x1a968): undefined reference to `wm831x_reg_lock'
(.text+0x1a968): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `wm831x_reg_lock'
(.text+0x1a9dc): undefined reference to `wm831x_reg_unlock'
(.text+0x1a9dc): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `wm831x_reg_unlock'

This adds back the dependency that was removed. We can still build test
this driver on all architectures by enabling the MFD driver for it first.

Fixes: da2a68b3eb47 ("watchdog: Enable COMPILE_TEST where possible")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/watchdog/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index c831b7967bf95f..fda6f3d24780ca 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -176,7 +176,7 @@ config WDAT_WDT
 
 config WM831X_WATCHDOG
 	tristate "WM831x watchdog"
-	depends on MFD_WM831X || COMPILE_TEST
+	depends on MFD_WM831X
 	select WATCHDOG_CORE
 	help
 	  Support for the watchdog in the WM831x AudioPlus PMICs.  When

From 0369fdf2e1169896529bd53c3b5fb15511768276 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 28 Feb 2017 22:01:19 +0100
Subject: [PATCH 0439/1911] watchdog: geode: restore hard CS5535_MFGPT
 dependency

Wtihout CONFIG_CS5535_MFGPT, the driver does not link right:

drivers/watchdog/built-in.o: In function `geodewdt_probe':
geodewdt.c:(.init.text+0xca3): undefined reference to `cs5535_mfgpt_alloc_timer'
geodewdt.c:(.init.text+0xcd4): undefined reference to `cs5535_mfgpt_write'
geodewdt.c:(.init.text+0xcef): undefined reference to `cs5535_mfgpt_toggle_event'

This adds back the dependency on this base driver.

Fixes: da2a68b3eb47 ("watchdog: Enable COMPILE_TEST where possible")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/watchdog/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index fda6f3d24780ca..5b4bb1b6dd68e0 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -851,7 +851,7 @@ config SP5100_TCO
 
 config GEODE_WDT
 	tristate "AMD Geode CS5535/CS5536 Watchdog"
-	depends on CS5535_MFGPT || (X86 && COMPILE_TEST)
+	depends on CS5535_MFGPT
 	help
 	  This driver enables a watchdog capability built into the
 	  CS5535/CS5536 companion chips for the AMD Geode GX and LX

From 6fb303a81a0b106e7f5522df929bbf4596780c48 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 28 Feb 2017 22:01:20 +0100
Subject: [PATCH 0440/1911] watchdog: menf21bmc: add I2C dependency

This driver fails to link when CONFIG_I2C is disabled or a loadable module while
the watchdog is built-in:

drivers/watchdog/built-in.o: In function `menf21bmc_wdt_shutdown':
menf21bmc_wdt.c:(.text+0x9b44): undefined reference to `i2c_smbus_write_word_data'
menf21bmc_wdt.c:(.text+0x9b44): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `i2c_smbus_write_word_data'

This adds a Kconfig dependency for it, to enforce a valid configuration.

Fixes: da2a68b3eb47 ("watchdog: Enable COMPILE_TEST where possible")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/watchdog/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index 5b4bb1b6dd68e0..4d2c336daa394c 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -142,6 +142,7 @@ config GPIO_WATCHDOG_ARCH_INITCALL
 config MENF21BMC_WATCHDOG
 	tristate "MEN 14F021P00 BMC Watchdog"
 	depends on MFD_MENF21BMC || COMPILE_TEST
+	depends on I2C
 	select WATCHDOG_CORE
 	help
 	  Say Y here to include support for the MEN 14F021P00 BMC Watchdog.

From 3eafee95643360d22c6725ea2188bd700c09c986 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 28 Feb 2017 22:01:21 +0100
Subject: [PATCH 0441/1911] watchdog: sp805: add back AMBA dependency

The driver fails to link if ARM_AMBA is disabled:

drivers/watchdog/sp805_wdt.o: In function `sp805_wdt_driver_init':
sp805_wdt.c:(.init.text+0x4): undefined reference to `amba_driver_register'

It seems that the COMPILE_TEST was added in the wrong place, as there
is no architecture dependency, but a bus dependency. This moves
the dependency accordingly.

Fixes: da2a68b3eb47 ("watchdog: Enable COMPILE_TEST where possible")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/watchdog/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index 4d2c336daa394c..41a75b8ab21304 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -218,7 +218,7 @@ config ZIIRAVE_WATCHDOG
 
 config ARM_SP805_WATCHDOG
 	tristate "ARM SP805 Watchdog"
-	depends on (ARM || ARM64) && (ARM_AMBA || COMPILE_TEST)
+	depends on (ARM || ARM64 || COMPILE_TEST) && ARM_AMBA
 	select WATCHDOG_CORE
 	help
 	  ARM Primecell SP805 Watchdog timer. This will reboot your system when

From 2672b7e01abbe147a0a391216bc0dbd656608e0a Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 28 Feb 2017 22:01:22 +0100
Subject: [PATCH 0442/1911] watchdog: bcm2835: add CONFIG_OF dependency

Without CONFIG_OF, the driver fails to link:

drivers/watchdog/built-in.o: In function `bcm2835_power_off':
bcm2835_wdt.c:(.text+0x1946): undefined reference to `of_find_device_by_node'

This adds a new dependency, to allow the COMPILE_TEST check to succeed.

Fixes: da2a68b3eb47 ("watchdog: Enable COMPILE_TEST where possible")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/watchdog/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index 41a75b8ab21304..13390b00ec470c 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -1496,7 +1496,7 @@ config BCM63XX_WDT
 
 config BCM2835_WDT
 	tristate "Broadcom BCM2835 hardware watchdog"
-	depends on ARCH_BCM2835 || COMPILE_TEST
+	depends on ARCH_BCM2835 || (OF && COMPILE_TEST)
 	select WATCHDOG_CORE
 	help
 	  Watchdog driver for the built in watchdog hardware in Broadcom

From ed4a9eca66720366d117d9b39f2335ca9b0a7aae Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 28 Feb 2017 22:01:23 +0100
Subject: [PATCH 0443/1911] watchdog: kempld: revert to full dependency

The kempld watchdog driver requires the respective MFD driver:

drivers/watchdog/built-in.o: In function `kempld_wdt_probe':
kempld_wdt.c:(.text+0x5c78): undefined reference to `kempld_get_mutex'
kempld_wdt.c:(.text+0x5c84): undefined reference to `kempld_read8'
kempld_wdt.c:(.text+0x5c8e): undefined reference to `kempld_release_mutex'
kempld_wdt.c:(.text+0x5d1c): undefined reference to `kempld_read8'
kempld_wdt.c:(.text+0x5d2c): undefined reference to `kempld_write8'

This adds the Kconfig dependency back.

Fixes: da2a68b3eb47 ("watchdog: Enable COMPILE_TEST where possible")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/watchdog/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index 13390b00ec470c..47dbacf3bfb41c 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -1064,7 +1064,7 @@ config HP_WATCHDOG
 
 config KEMPLD_WDT
 	tristate "Kontron COM Watchdog Timer"
-	depends on MFD_KEMPLD || COMPILE_TEST
+	depends on MFD_KEMPLD
 	select WATCHDOG_CORE
 	help
 	  Support for the PLD watchdog on some Kontron ETX and COMexpress

From 8d5755b3f77b57447ce5de253ef704ad028474d3 Mon Sep 17 00:00:00 2001
From: Niklas Cassel <niklas.cassel@axis.com>
Date: Mon, 27 Feb 2017 13:49:09 +0100
Subject: [PATCH 0444/1911] watchdog: softdog: fire watchdog even if softirqs
 do not get to run

Checking for timer expiration is done from the softirq TIMER_SOFTIRQ.

Since commit 4cd13c21b207 ("softirq: Let ksoftirqd do its job"),
pending softirqs are no longer always handled immediately, instead,
if there are pending softirqs, and ksoftirqd is in state TASK_RUNNING,
the handling of the softirqs are deferred, and are instead supposed
to be handled by ksoftirqd, when ksoftirqd gets scheduled.

If a user space process with a real-time policy starts to misbehave
by never relinquishing the CPU while ksoftirqd is in state TASK_RUNNING,
what will happen is that all softirqs will get deferred, while ksoftirqd,
which is supposed to handle the deferred softirqs, will never get to run.

To make sure that the watchdog is able to fire even when we do not get
to run softirqs, replace the timers with hrtimers.

Signed-off-by: Niklas Cassel <niklas.cassel@axis.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/watchdog/softdog.c | 44 +++++++++++++++++++++++---------------
 1 file changed, 27 insertions(+), 17 deletions(-)

diff --git a/drivers/watchdog/softdog.c b/drivers/watchdog/softdog.c
index 7983029852ab0d..0607406254856a 100644
--- a/drivers/watchdog/softdog.c
+++ b/drivers/watchdog/softdog.c
@@ -21,13 +21,12 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include <linux/hrtimer.h>
 #include <linux/init.h>
-#include <linux/jiffies.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/reboot.h>
-#include <linux/timer.h>
 #include <linux/types.h>
 #include <linux/watchdog.h>
 
@@ -54,7 +53,10 @@ module_param(soft_panic, int, 0);
 MODULE_PARM_DESC(soft_panic,
 	"Softdog action, set to 1 to panic, 0 to reboot (default=0)");
 
-static void softdog_fire(unsigned long data)
+static struct hrtimer softdog_ticktock;
+static struct hrtimer softdog_preticktock;
+
+static enum hrtimer_restart softdog_fire(struct hrtimer *timer)
 {
 	module_put(THIS_MODULE);
 	if (soft_noboot) {
@@ -67,32 +69,33 @@ static void softdog_fire(unsigned long data)
 		emergency_restart();
 		pr_crit("Reboot didn't ?????\n");
 	}
-}
 
-static struct timer_list softdog_ticktock =
-		TIMER_INITIALIZER(softdog_fire, 0, 0);
+	return HRTIMER_NORESTART;
+}
 
 static struct watchdog_device softdog_dev;
 
-static void softdog_pretimeout(unsigned long data)
+static enum hrtimer_restart softdog_pretimeout(struct hrtimer *timer)
 {
 	watchdog_notify_pretimeout(&softdog_dev);
-}
 
-static struct timer_list softdog_preticktock =
-		TIMER_INITIALIZER(softdog_pretimeout, 0, 0);
+	return HRTIMER_NORESTART;
+}
 
 static int softdog_ping(struct watchdog_device *w)
 {
-	if (!mod_timer(&softdog_ticktock, jiffies + (w->timeout * HZ)))
+	if (!hrtimer_active(&softdog_ticktock))
 		__module_get(THIS_MODULE);
+	hrtimer_start(&softdog_ticktock, ktime_set(w->timeout, 0),
+		      HRTIMER_MODE_REL);
 
 	if (IS_ENABLED(CONFIG_SOFT_WATCHDOG_PRETIMEOUT)) {
 		if (w->pretimeout)
-			mod_timer(&softdog_preticktock, jiffies +
-				  (w->timeout - w->pretimeout) * HZ);
+			hrtimer_start(&softdog_preticktock,
+				      ktime_set(w->timeout - w->pretimeout, 0),
+				      HRTIMER_MODE_REL);
 		else
-			del_timer(&softdog_preticktock);
+			hrtimer_cancel(&softdog_preticktock);
 	}
 
 	return 0;
@@ -100,11 +103,11 @@ static int softdog_ping(struct watchdog_device *w)
 
 static int softdog_stop(struct watchdog_device *w)
 {
-	if (del_timer(&softdog_ticktock))
+	if (hrtimer_cancel(&softdog_ticktock))
 		module_put(THIS_MODULE);
 
 	if (IS_ENABLED(CONFIG_SOFT_WATCHDOG_PRETIMEOUT))
-		del_timer(&softdog_preticktock);
+		hrtimer_cancel(&softdog_preticktock);
 
 	return 0;
 }
@@ -136,8 +139,15 @@ static int __init softdog_init(void)
 	watchdog_set_nowayout(&softdog_dev, nowayout);
 	watchdog_stop_on_reboot(&softdog_dev);
 
-	if (IS_ENABLED(CONFIG_SOFT_WATCHDOG_PRETIMEOUT))
+	hrtimer_init(&softdog_ticktock, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	softdog_ticktock.function = softdog_fire;
+
+	if (IS_ENABLED(CONFIG_SOFT_WATCHDOG_PRETIMEOUT)) {
 		softdog_info.options |= WDIOF_PRETIMEOUT;
+		hrtimer_init(&softdog_preticktock, CLOCK_MONOTONIC,
+			     HRTIMER_MODE_REL);
+		softdog_preticktock.function = softdog_pretimeout;
+	}
 
 	ret = watchdog_register_device(&softdog_dev);
 	if (ret)

From 3736d4eb6af37492aeded7fec0072dedd959c842 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 1 Mar 2017 10:15:29 +0100
Subject: [PATCH 0445/1911] watchdog: kempld: fix gcc-4.3 build

gcc-4.3 can't decide whether the constant value in
kempld_prescaler[PRESCALER_21] is built-time constant or
not, and gets confused by the logic in do_div():

drivers/watchdog/kempld_wdt.o: In function `kempld_wdt_set_stage_timeout':
kempld_wdt.c:(.text.kempld_wdt_set_stage_timeout+0x130): undefined reference to `__aeabi_uldivmod'

This adds a call to ACCESS_ONCE() to force it to not consider
it to be constant, and leaves the more efficient normal case
in place for modern compilers, using an #ifdef to annotate
why we do this hack.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/watchdog/kempld_wdt.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/watchdog/kempld_wdt.c b/drivers/watchdog/kempld_wdt.c
index 73c46b3a09ab3e..2f3b049ea3017c 100644
--- a/drivers/watchdog/kempld_wdt.c
+++ b/drivers/watchdog/kempld_wdt.c
@@ -140,12 +140,19 @@ static int kempld_wdt_set_stage_timeout(struct kempld_wdt_data *wdt_data,
 					unsigned int timeout)
 {
 	struct kempld_device_data *pld = wdt_data->pld;
-	u32 prescaler = kempld_prescaler[PRESCALER_21];
+	u32 prescaler;
 	u64 stage_timeout64;
 	u32 stage_timeout;
 	u32 remainder;
 	u8 stage_cfg;
 
+#if GCC_VERSION < 40400
+	/* work around a bug compiling do_div() */
+	prescaler = READ_ONCE(kempld_prescaler[PRESCALER_21]);
+#else
+	prescaler = kempld_prescaler[PRESCALER_21];
+#endif
+
 	if (!stage)
 		return -EINVAL;
 

From 9297b652bd0a11e29251ffaac93369b8ad45932d Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 1 Mar 2017 10:15:30 +0100
Subject: [PATCH 0446/1911] watchdog: db8500: add back prmcu dependency

When the db8500 watchdog is enabled without the PRCMU, we get a lot of
warnings about duplicate or missing helper functions:

In file included from drivers/watchdog/ux500_wdt.c:21:0:
include/linux/mfd/dbx500-prcmu.h:422:19: error: redefinition of 'prcmu_abb_read'
 static inline int prcmu_abb_read(u8 slave, u8 reg, u8 *value, u8 size)

This restores the dependency as it was.

Fixes: da2a68b3eb47 ("watchdog: Enable COMPILE_TEST where possible")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/watchdog/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index 47dbacf3bfb41c..a199e8536ce49c 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -574,7 +574,7 @@ config IMX2_WDT
 
 config UX500_WATCHDOG
 	tristate "ST-Ericsson Ux500 watchdog"
-	depends on MFD_DB8500_PRCMU || (ARM && COMPILE_TEST)
+	depends on MFD_DB8500_PRCMU
 	select WATCHDOG_CORE
 	default y
 	help

From 9ad82f117f057ed04643cc60ce16fbf59e1b167f Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 1 Mar 2017 10:15:31 +0100
Subject: [PATCH 0447/1911] watchdog: retu: restore MFD dependency

The retu watchdog calls into the respective mfd driver, but fails to
link if that is diabled:

drivers/watchdog/built-in.o: In function `retu_wdt_set_timeout':
ziirave_wdt.c:(.text+0x8c88): undefined reference to `retu_write'
ziirave_wdt.c:(.text+0x8c88): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `retu_write'
drivers/watchdog/built-in.o: In function `retu_wdt_start':
ziirave_wdt.c:(.text+0x8cc8): undefined reference to `retu_write'
ziirave_wdt.c:(.text+0x8cc8): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `retu_write'

This restores the dependency as it was before

Fixes: da2a68b3eb47 ("watchdog: Enable COMPILE_TEST where possible")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/watchdog/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index a199e8536ce49c..52a70ee6014fa8 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -586,7 +586,7 @@ config UX500_WATCHDOG
 
 config RETU_WATCHDOG
 	tristate "Retu watchdog"
-	depends on MFD_RETU || COMPILE_TEST
+	depends on MFD_RETU
 	select WATCHDOG_CORE
 	help
 	  Retu watchdog driver for Nokia Internet Tablets (770, N800,

From e3736c3eb3a6f7c0966923b629c9f92b558aa9c7 Mon Sep 17 00:00:00 2001
From: Elena Reshetova <elena.reshetova@intel.com>
Date: Mon, 20 Feb 2017 13:06:21 +0200
Subject: [PATCH 0448/1911] kvm: convert kvm.users_count from atomic_t to
 refcount_t
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

refcount_t type and corresponding API should be
used instead of atomic_t when the variable is used as
a reference counter. This allows to avoid accidental
refcounter overflows that might lead to use-after-free
situations.

Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
Signed-off-by: Hans Liljestrand <ishkamiel@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David Windsor <dwindsor@gmail.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 include/linux/kvm_host.h | 3 ++-
 virt/kvm/kvm_main.c      | 8 ++++----
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 8d69d515074838..2c14ad9809da94 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -26,6 +26,7 @@
 #include <linux/context_tracking.h>
 #include <linux/irqbypass.h>
 #include <linux/swait.h>
+#include <linux/refcount.h>
 #include <asm/signal.h>
 
 #include <linux/kvm.h>
@@ -401,7 +402,7 @@ struct kvm {
 #endif
 	struct kvm_vm_stat stat;
 	struct kvm_arch arch;
-	atomic_t users_count;
+	refcount_t users_count;
 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
 	struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
 	spinlock_t ring_lock;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index cc4d6e0dd2a233..c6b7aff634bece 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -617,7 +617,7 @@ static struct kvm *kvm_create_vm(unsigned long type)
 	mutex_init(&kvm->lock);
 	mutex_init(&kvm->irq_lock);
 	mutex_init(&kvm->slots_lock);
-	atomic_set(&kvm->users_count, 1);
+	refcount_set(&kvm->users_count, 1);
 	INIT_LIST_HEAD(&kvm->devices);
 
 	r = kvm_arch_init_vm(kvm, type);
@@ -747,13 +747,13 @@ static void kvm_destroy_vm(struct kvm *kvm)
 
 void kvm_get_kvm(struct kvm *kvm)
 {
-	atomic_inc(&kvm->users_count);
+	refcount_inc(&kvm->users_count);
 }
 EXPORT_SYMBOL_GPL(kvm_get_kvm);
 
 void kvm_put_kvm(struct kvm *kvm)
 {
-	if (atomic_dec_and_test(&kvm->users_count))
+	if (refcount_dec_and_test(&kvm->users_count))
 		kvm_destroy_vm(kvm);
 }
 EXPORT_SYMBOL_GPL(kvm_put_kvm);
@@ -3639,7 +3639,7 @@ static int kvm_debugfs_open(struct inode *inode, struct file *file,
 	 * To avoid the race between open and the removal of the debugfs
 	 * directory we test against the users count.
 	 */
-	if (!atomic_add_unless(&stat_data->kvm->users_count, 1, 0))
+	if (!refcount_inc_not_zero(&stat_data->kvm->users_count))
 		return -ENOENT;
 
 	if (simple_attr_open(inode, file, get, set, fmt)) {

From b7ceaec112aa35aa287325754d8c52b8304892cd Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Wed, 22 Feb 2017 07:36:16 -0800
Subject: [PATCH 0449/1911] x86/asm: Tidy up TSS limit code
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In an earlier version of the patch ("x86/kvm/vmx: Defer TR reload
after VM exit") that introduced TSS limit validity tracking, I
confused which helper was which.  On reflection, the names I chose
sucked.  Rename the helpers to make it more obvious what's going on
and add some comments.

While I'm at it, clear __tss_limit_invalid when force-reloading as
well as when contitionally reloading, since any TR reload fixes the
limit.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/include/asm/desc.h | 18 +++++++++++-------
 arch/x86/kernel/ioport.c    |  8 +++++++-
 arch/x86/kernel/process.c   |  6 +++---
 3 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index cb8f9149f6c852..1548ca92ad3f62 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -205,6 +205,8 @@ static inline void native_load_tr_desc(void)
 	asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
 }
 
+DECLARE_PER_CPU(bool, __tss_limit_invalid);
+
 static inline void force_reload_TR(void)
 {
 	struct desc_struct *d = get_cpu_gdt_table(smp_processor_id());
@@ -220,18 +222,20 @@ static inline void force_reload_TR(void)
 	write_gdt_entry(d, GDT_ENTRY_TSS, &tss, DESC_TSS);
 
 	load_TR_desc();
+	this_cpu_write(__tss_limit_invalid, false);
 }
 
-DECLARE_PER_CPU(bool, need_tr_refresh);
-
-static inline void refresh_TR(void)
+/*
+ * Call this if you need the TSS limit to be correct, which should be the case
+ * if and only if you have TIF_IO_BITMAP set or you're switching to a task
+ * with TIF_IO_BITMAP set.
+ */
+static inline void refresh_tss_limit(void)
 {
 	DEBUG_LOCKS_WARN_ON(preemptible());
 
-	if (unlikely(this_cpu_read(need_tr_refresh))) {
+	if (unlikely(this_cpu_read(__tss_limit_invalid)))
 		force_reload_TR();
-		this_cpu_write(need_tr_refresh, false);
-	}
 }
 
 /*
@@ -250,7 +254,7 @@ static inline void invalidate_tss_limit(void)
 	if (unlikely(test_thread_flag(TIF_IO_BITMAP)))
 		force_reload_TR();
 	else
-		this_cpu_write(need_tr_refresh, true);
+		this_cpu_write(__tss_limit_invalid, true);
 }
 
 static inline void native_load_gdt(const struct desc_ptr *dtr)
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index b01bc851745048..875d3d25dd6a68 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -47,8 +47,14 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
 		t->io_bitmap_ptr = bitmap;
 		set_thread_flag(TIF_IO_BITMAP);
 
+		/*
+		 * Now that we have an IO bitmap, we need our TSS limit to be
+		 * correct.  It's fine if we are preempted after doing this:
+		 * with TIF_IO_BITMAP set, context switches will keep our TSS
+		 * limit correct.
+		 */
 		preempt_disable();
-		refresh_TR();
+		refresh_tss_limit();
 		preempt_enable();
 	}
 
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 7780efa635b911..0b302591b51f2e 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -65,8 +65,8 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
 };
 EXPORT_PER_CPU_SYMBOL(cpu_tss);
 
-DEFINE_PER_CPU(bool, need_tr_refresh);
-EXPORT_PER_CPU_SYMBOL_GPL(need_tr_refresh);
+DEFINE_PER_CPU(bool, __tss_limit_invalid);
+EXPORT_PER_CPU_SYMBOL_GPL(__tss_limit_invalid);
 
 /*
  * this gets called so that we can store lazy state into memory and copy the
@@ -218,7 +218,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
 		 * Make sure that the TSS limit is correct for the CPU
 		 * to notice the IO bitmap.
 		 */
-		refresh_TR();
+		refresh_tss_limit();
 	} else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
 		/*
 		 * Clear any possible leftover bits:

From 0eb1d0fa6a68324b51db4f7ae16d9b042c5b2de4 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Wed, 22 Feb 2017 07:36:17 -0800
Subject: [PATCH 0450/1911] selftests/x86: Add a basic selftest for ioperm
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This doesn't fully exercise the interaction between KVM and ioperm(),
but it does test basic functionality.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 tools/testing/selftests/x86/Makefile |   2 +-
 tools/testing/selftests/x86/ioperm.c | 170 +++++++++++++++++++++++++++
 2 files changed, 171 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/x86/ioperm.c

diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index 83d8b1c6cb0e54..bed3e6086cb186 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -5,7 +5,7 @@ include ../lib.mk
 .PHONY: all all_32 all_64 warn_32bit_failure clean
 
 TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall test_mremap_vdso \
-			check_initial_reg_state sigreturn ldt_gdt iopl mpx-mini-test \
+			check_initial_reg_state sigreturn ldt_gdt iopl mpx-mini-test ioperm \
 			protection_keys test_vdso
 TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
 			test_FCMOV test_FCOMI test_FISTTP \
diff --git a/tools/testing/selftests/x86/ioperm.c b/tools/testing/selftests/x86/ioperm.c
new file mode 100644
index 00000000000000..b77313ba2ab164
--- /dev/null
+++ b/tools/testing/selftests/x86/ioperm.c
@@ -0,0 +1,170 @@
+/*
+ * ioperm.c - Test case for ioperm(2)
+ * Copyright (c) 2015 Andrew Lutomirski
+ */
+
+#define _GNU_SOURCE
+#include <err.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <signal.h>
+#include <setjmp.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdbool.h>
+#include <sched.h>
+#include <sys/io.h>
+
+static int nerrs = 0;
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+		       int flags)
+{
+	struct sigaction sa;
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_sigaction = handler;
+	sa.sa_flags = SA_SIGINFO | flags;
+	sigemptyset(&sa.sa_mask);
+	if (sigaction(sig, &sa, 0))
+		err(1, "sigaction");
+
+}
+
+static void clearhandler(int sig)
+{
+	struct sigaction sa;
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_handler = SIG_DFL;
+	sigemptyset(&sa.sa_mask);
+	if (sigaction(sig, &sa, 0))
+		err(1, "sigaction");
+}
+
+static jmp_buf jmpbuf;
+
+static void sigsegv(int sig, siginfo_t *si, void *ctx_void)
+{
+	siglongjmp(jmpbuf, 1);
+}
+
+static bool try_outb(unsigned short port)
+{
+	sethandler(SIGSEGV, sigsegv, SA_RESETHAND);
+	if (sigsetjmp(jmpbuf, 1) != 0) {
+		return false;
+	} else {
+		asm volatile ("outb %%al, %w[port]"
+			      : : [port] "Nd" (port), "a" (0));
+		return true;
+	}
+	clearhandler(SIGSEGV);
+}
+
+static void expect_ok(unsigned short port)
+{
+	if (!try_outb(port)) {
+		printf("[FAIL]\toutb to 0x%02hx failed\n", port);
+		exit(1);
+	}
+
+	printf("[OK]\toutb to 0x%02hx worked\n", port);
+}
+
+static void expect_gp(unsigned short port)
+{
+	if (try_outb(port)) {
+		printf("[FAIL]\toutb to 0x%02hx worked\n", port);
+		exit(1);
+	}
+
+	printf("[OK]\toutb to 0x%02hx failed\n", port);
+}
+
+int main(void)
+{
+	cpu_set_t cpuset;
+	CPU_ZERO(&cpuset);
+	CPU_SET(0, &cpuset);
+	if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
+		err(1, "sched_setaffinity to CPU 0");
+
+	expect_gp(0x80);
+	expect_gp(0xed);
+
+	/*
+	 * Probe for ioperm support.  Note that clearing ioperm bits
+	 * works even as nonroot.
+	 */
+	printf("[RUN]\tenable 0x80\n");
+	if (ioperm(0x80, 1, 1) != 0) {
+		printf("[OK]\tioperm(0x80, 1, 1) failed (%d) -- try running as root\n",
+		       errno);
+		return 0;
+	}
+	expect_ok(0x80);
+	expect_gp(0xed);
+
+	printf("[RUN]\tdisable 0x80\n");
+	if (ioperm(0x80, 1, 0) != 0) {
+		printf("[FAIL]\tioperm(0x80, 1, 0) failed (%d)", errno);
+		return 1;
+	}
+	expect_gp(0x80);
+	expect_gp(0xed);
+
+	/* Make sure that fork() preserves ioperm. */
+	if (ioperm(0x80, 1, 1) != 0) {
+		printf("[FAIL]\tioperm(0x80, 1, 0) failed (%d)", errno);
+		return 1;
+	}
+
+	pid_t child = fork();
+	if (child == -1)
+		err(1, "fork");
+
+	if (child == 0) {
+		printf("[RUN]\tchild: check that we inherited permissions\n");
+		expect_ok(0x80);
+		expect_gp(0xed);
+		return 0;
+	} else {
+		int status;
+		if (waitpid(child, &status, 0) != child ||
+		    !WIFEXITED(status)) {
+			printf("[FAIL]\tChild died\n");
+			nerrs++;
+		} else if (WEXITSTATUS(status) != 0) {
+			printf("[FAIL]\tChild failed\n");
+			nerrs++;
+		} else {
+			printf("[OK]\tChild succeeded\n");
+		}
+	}
+
+	/* Test the capability checks. */
+
+	printf("\tDrop privileges\n");
+	if (setresuid(1, 1, 1) != 0) {
+		printf("[WARN]\tDropping privileges failed\n");
+		return 0;
+	}
+
+	printf("[RUN]\tdisable 0x80\n");
+	if (ioperm(0x80, 1, 0) != 0) {
+		printf("[FAIL]\tioperm(0x80, 1, 0) failed (%d)", errno);
+		return 1;
+	}
+	printf("[OK]\tit worked\n");
+
+	printf("[RUN]\tenable 0x80 again\n");
+	if (ioperm(0x80, 1, 1) == 0) {
+		printf("[FAIL]\tit succeeded but should have failed.\n");
+		return 1;
+	}
+	printf("[OK]\tit failed\n");
+	return 0;
+}

From 0fce546f9f07b94ccc9de09cf48d35e18946d2fa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=A9my=20Lefaure?= <jeremy.lefaure@lse.epita.fr>
Date: Sat, 25 Feb 2017 17:46:53 -0500
Subject: [PATCH 0451/1911] x86/kvm/vmx: remove unused variable in
 segment_base()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The pointer 'struct desc_struct *d' is unused since commit 8c2e41f7ae12
("x86/kvm/vmx: Simplify segment_base()") so let's remove it.

Signed-off-by: Jérémy Lefaure <jeremy.lefaure@lse.epita.fr>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/vmx.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index ef4ba71dbb66a5..764f1f897847b1 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2053,7 +2053,6 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
 static unsigned long segment_base(u16 selector)
 {
 	struct desc_ptr *gdt = this_cpu_ptr(&host_gdt);
-	struct desc_struct *d;
 	struct desc_struct *table;
 	unsigned long v;
 

From acc9ab60132739e0c5ab40644444cd7b22d12886 Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpeng.li@hotmail.com>
Date: Mon, 27 Feb 2017 04:24:39 -0800
Subject: [PATCH 0452/1911] KVM: nVMX: Fix pending events injection
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

L2 fails to boot on a non-APICv box dues to 'commit 0ad3bed6c5ec
("kvm: nVMX: move nested events check to kvm_vcpu_running")'

KVM internal error. Suberror: 3
extra data[0]: 800000ef
extra data[1]: 1
RAX=0000000000000000 RBX=ffffffff81f36140 RCX=0000000000000000 RDX=0000000000000000
RSI=0000000000000000 RDI=0000000000000000 RBP=ffff88007c92fe90 RSP=ffff88007c92fe90
R8 =ffff88007fccdca0 R9 =0000000000000000 R10=00000000fffedb3d R11=0000000000000000
R12=0000000000000003 R13=0000000000000000 R14=0000000000000000 R15=ffff88007c92c000
RIP=ffffffff810645e6 RFL=00000246 [---Z-P-] CPL=0 II=0 A20=1 SMM=0 HLT=0
ES =0000 0000000000000000 ffffffff 00c00000
CS =0010 0000000000000000 ffffffff 00a09b00 DPL=0 CS64 [-RA]
SS =0000 0000000000000000 ffffffff 00c00000
DS =0000 0000000000000000 ffffffff 00c00000
FS =0000 0000000000000000 ffffffff 00c00000
GS =0000 ffff88007fcc0000 ffffffff 00c00000
LDT=0000 0000000000000000 ffffffff 00c00000
TR =0040 ffff88007fcd4200 00002087 00008b00 DPL=0 TSS64-busy
GDT=     ffff88007fcc9000 0000007f
IDT=     ffffffffff578000 00000fff
CR0=80050033 CR2=00000000ffffffff CR3=0000000001e0a000 CR4=003406e0
DR0=0000000000000000 DR1=0000000000000000 DR2=0000000000000000 DR3=0000000000000000
DR6=00000000fffe0ff0 DR7=0000000000000400
EFER=0000000000000d01

We should try to reinject previous events if any before trying to inject
new event if pending. If vmexit is triggered by L2 guest and L0 interested
in, we should reinject IDT-vectoring info to L2 through vmcs02 if any,
otherwise, we can consider new IRQs/NMIs which can be injected and call
nested events callback to switch from L2 to L1 if needed and inject the
proper vmexit events. However, 'commit 0ad3bed6c5ec ("kvm: nVMX: move
nested events check to kvm_vcpu_running")' results in the handle events
order reversely on non-APICv box. This patch fixes it by bailing out for
pending events and not consider new events in this scenario.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
Fixes: 0ad3bed6c5ec ("kvm: nVMX: move nested events check to kvm_vcpu_running")
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/vmx.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 764f1f897847b1..283aa860183350 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -10641,6 +10641,11 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
+	if (vcpu->arch.exception.pending ||
+		vcpu->arch.nmi_injected ||
+		vcpu->arch.interrupt.pending)
+		return -EBUSY;
+
 	if (nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) &&
 	    vmx->nested.preemption_timer_expired) {
 		if (vmx->nested.nested_run_pending)
@@ -10650,8 +10655,7 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
 	}
 
 	if (vcpu->arch.nmi_pending && nested_exit_on_nmi(vcpu)) {
-		if (vmx->nested.nested_run_pending ||
-		    vcpu->arch.interrupt.pending)
+		if (vmx->nested.nested_run_pending)
 			return -EBUSY;
 		nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
 				  NMI_VECTOR | INTR_TYPE_NMI_INTR |

From 540b1c48c37ac0ad66212004db21e1ff7e2d78be Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 27 Feb 2017 15:43:06 +0000
Subject: [PATCH 0453/1911] rxrpc: Fix deadlock between call creation and
 sendmsg/recvmsg

All the routines by which rxrpc is accessed from the outside are serialised
by means of the socket lock (sendmsg, recvmsg, bind,
rxrpc_kernel_begin_call(), ...) and this presents a problem:

 (1) If a number of calls on the same socket are in the process of
     connection to the same peer, a maximum of four concurrent live calls
     are permitted before further calls need to wait for a slot.

 (2) If a call is waiting for a slot, it is deep inside sendmsg() or
     rxrpc_kernel_begin_call() and the entry function is holding the socket
     lock.

 (3) sendmsg() and recvmsg() or the in-kernel equivalents are prevented
     from servicing the other calls as they need to take the socket lock to
     do so.

 (4) The socket is stuck until a call is aborted and makes its slot
     available to the waiter.

Fix this by:

 (1) Provide each call with a mutex ('user_mutex') that arbitrates access
     by the users of rxrpc separately for each specific call.

 (2) Make rxrpc_sendmsg() and rxrpc_recvmsg() unlock the socket as soon as
     they've got a call and taken its mutex.

     Note that I'm returning EWOULDBLOCK from recvmsg() if MSG_DONTWAIT is
     set but someone else has the lock.  Should I instead only return
     EWOULDBLOCK if there's nothing currently to be done on a socket, and
     sleep in this particular instance because there is something to be
     done, but we appear to be blocked by the interrupt handler doing its
     ping?

 (3) Make rxrpc_new_client_call() unlock the socket after allocating a new
     call, locking its user mutex and adding it to the socket's call tree.
     The call is returned locked so that sendmsg() can add data to it
     immediately.

     From the moment the call is in the socket tree, it is subject to
     access by sendmsg() and recvmsg() - even if it isn't connected yet.

 (4) Lock new service calls in the UDP data_ready handler (in
     rxrpc_new_incoming_call()) because they may already be in the socket's
     tree and the data_ready handler makes them live immediately if a user
     ID has already been preassigned.

     Note that the new call is locked before any notifications are sent
     that it is live, so doing mutex_trylock() *ought* to always succeed.
     Userspace is prevented from doing sendmsg() on calls that are in a
     too-early state in rxrpc_do_sendmsg().

 (5) Make rxrpc_new_incoming_call() return the call with the user mutex
     held so that a ping can be scheduled immediately under it.

     Note that it might be worth moving the ping call into
     rxrpc_new_incoming_call() and then we can drop the mutex there.

 (6) Make rxrpc_accept_call() take the lock on the call it is accepting and
     release the socket after adding the call to the socket's tree.  This
     is slightly tricky as we've dequeued the call by that point and have
     to requeue it.

     Note that requeuing emits a trace event.

 (7) Make rxrpc_kernel_send_data() and rxrpc_kernel_recv_data() take the
     new mutex immediately and don't bother with the socket mutex at all.

This patch has the nice bonus that calls on the same socket are now to some
extent parallelisable.

Note that we might want to move rxrpc_service_prealloc() calls out from the
socket lock and give it its own lock, so that we don't hang progress in
other calls because we're waiting for the allocator.

We probably also want to avoid calling rxrpc_notify_socket() from within
the socket lock (rxrpc_accept_call()).

Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Marc Dionne <marc.c.dionne@auristor.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/trace/events/rxrpc.h |  2 ++
 net/rxrpc/af_rxrpc.c         | 12 ++++++--
 net/rxrpc/ar-internal.h      |  1 +
 net/rxrpc/call_accept.c      | 48 ++++++++++++++++++++++++++++++
 net/rxrpc/call_object.c      | 18 ++++++++++--
 net/rxrpc/input.c            |  1 +
 net/rxrpc/recvmsg.c          | 39 ++++++++++++++++++++----
 net/rxrpc/sendmsg.c          | 57 +++++++++++++++++++++++++++++-------
 8 files changed, 156 insertions(+), 22 deletions(-)

diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h
index 593f586545eba9..39123c06a56613 100644
--- a/include/trace/events/rxrpc.h
+++ b/include/trace/events/rxrpc.h
@@ -119,6 +119,7 @@ enum rxrpc_recvmsg_trace {
 	rxrpc_recvmsg_full,
 	rxrpc_recvmsg_hole,
 	rxrpc_recvmsg_next,
+	rxrpc_recvmsg_requeue,
 	rxrpc_recvmsg_return,
 	rxrpc_recvmsg_terminal,
 	rxrpc_recvmsg_to_be_accepted,
@@ -277,6 +278,7 @@ enum rxrpc_congest_change {
 	EM(rxrpc_recvmsg_full,			"FULL") \
 	EM(rxrpc_recvmsg_hole,			"HOLE") \
 	EM(rxrpc_recvmsg_next,			"NEXT") \
+	EM(rxrpc_recvmsg_requeue,		"REQU") \
 	EM(rxrpc_recvmsg_return,		"RETN") \
 	EM(rxrpc_recvmsg_terminal,		"TERM") \
 	EM(rxrpc_recvmsg_to_be_accepted,	"TBAC") \
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 199b46e93e64ee..7fb59c3f1542af 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -290,10 +290,11 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
 	cp.exclusive		= false;
 	cp.service_id		= srx->srx_service;
 	call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, gfp);
+	/* The socket has been unlocked. */
 	if (!IS_ERR(call))
 		call->notify_rx = notify_rx;
 
-	release_sock(&rx->sk);
+	mutex_unlock(&call->user_mutex);
 	_leave(" = %p", call);
 	return call;
 }
@@ -310,7 +311,10 @@ EXPORT_SYMBOL(rxrpc_kernel_begin_call);
 void rxrpc_kernel_end_call(struct socket *sock, struct rxrpc_call *call)
 {
 	_enter("%d{%d}", call->debug_id, atomic_read(&call->usage));
+
+	mutex_lock(&call->user_mutex);
 	rxrpc_release_call(rxrpc_sk(sock->sk), call);
+	mutex_unlock(&call->user_mutex);
 	rxrpc_put_call(call, rxrpc_call_put_kernel);
 }
 EXPORT_SYMBOL(rxrpc_kernel_end_call);
@@ -450,14 +454,16 @@ static int rxrpc_sendmsg(struct socket *sock, struct msghdr *m, size_t len)
 	case RXRPC_SERVER_BOUND:
 	case RXRPC_SERVER_LISTENING:
 		ret = rxrpc_do_sendmsg(rx, m, len);
-		break;
+		/* The socket has been unlocked */
+		goto out;
 	default:
 		ret = -EINVAL;
-		break;
+		goto error_unlock;
 	}
 
 error_unlock:
 	release_sock(&rx->sk);
+out:
 	_leave(" = %d", ret);
 	return ret;
 }
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 12be432be9b2fe..26a7b1db1361e5 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -467,6 +467,7 @@ struct rxrpc_call {
 	struct rxrpc_connection	*conn;		/* connection carrying call */
 	struct rxrpc_peer	*peer;		/* Peer record for remote address */
 	struct rxrpc_sock __rcu	*socket;	/* socket responsible */
+	struct mutex		user_mutex;	/* User access mutex */
 	ktime_t			ack_at;		/* When deferred ACK needs to happen */
 	ktime_t			resend_at;	/* When next resend needs to happen */
 	ktime_t			ping_at;	/* When next to send a ping */
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index 7c4c64ab8da2e2..0ed181f53f32a0 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -323,6 +323,8 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx,
  *
  * If we want to report an error, we mark the skb with the packet type and
  * abort code and return NULL.
+ *
+ * The call is returned with the user access mutex held.
  */
 struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local,
 					   struct rxrpc_connection *conn,
@@ -371,6 +373,18 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local,
 	trace_rxrpc_receive(call, rxrpc_receive_incoming,
 			    sp->hdr.serial, sp->hdr.seq);
 
+	/* Lock the call to prevent rxrpc_kernel_send/recv_data() and
+	 * sendmsg()/recvmsg() inconveniently stealing the mutex once the
+	 * notification is generated.
+	 *
+	 * The BUG should never happen because the kernel should be well
+	 * behaved enough not to access the call before the first notification
+	 * event and userspace is prevented from doing so until the state is
+	 * appropriate.
+	 */
+	if (!mutex_trylock(&call->user_mutex))
+		BUG();
+
 	/* Make the call live. */
 	rxrpc_incoming_call(rx, call, skb);
 	conn = call->conn;
@@ -429,10 +443,12 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local,
 /*
  * handle acceptance of a call by userspace
  * - assign the user call ID to the call at the front of the queue
+ * - called with the socket locked.
  */
 struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx,
 				     unsigned long user_call_ID,
 				     rxrpc_notify_rx_t notify_rx)
+	__releases(&rx->sk.sk_lock.slock)
 {
 	struct rxrpc_call *call;
 	struct rb_node *parent, **pp;
@@ -446,6 +462,7 @@ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx,
 
 	if (list_empty(&rx->to_be_accepted)) {
 		write_unlock(&rx->call_lock);
+		release_sock(&rx->sk);
 		kleave(" = -ENODATA [empty]");
 		return ERR_PTR(-ENODATA);
 	}
@@ -470,10 +487,39 @@ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx,
 	 */
 	call = list_entry(rx->to_be_accepted.next,
 			  struct rxrpc_call, accept_link);
+	write_unlock(&rx->call_lock);
+
+	/* We need to gain the mutex from the interrupt handler without
+	 * upsetting lockdep, so we have to release it there and take it here.
+	 * We are, however, still holding the socket lock, so other accepts
+	 * must wait for us and no one can add the user ID behind our backs.
+	 */
+	if (mutex_lock_interruptible(&call->user_mutex) < 0) {
+		release_sock(&rx->sk);
+		kleave(" = -ERESTARTSYS");
+		return ERR_PTR(-ERESTARTSYS);
+	}
+
+	write_lock(&rx->call_lock);
 	list_del_init(&call->accept_link);
 	sk_acceptq_removed(&rx->sk);
 	rxrpc_see_call(call);
 
+	/* Find the user ID insertion point. */
+	pp = &rx->calls.rb_node;
+	parent = NULL;
+	while (*pp) {
+		parent = *pp;
+		call = rb_entry(parent, struct rxrpc_call, sock_node);
+
+		if (user_call_ID < call->user_call_ID)
+			pp = &(*pp)->rb_left;
+		else if (user_call_ID > call->user_call_ID)
+			pp = &(*pp)->rb_right;
+		else
+			BUG();
+	}
+
 	write_lock_bh(&call->state_lock);
 	switch (call->state) {
 	case RXRPC_CALL_SERVER_ACCEPTING:
@@ -499,6 +545,7 @@ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx,
 	write_unlock(&rx->call_lock);
 	rxrpc_notify_socket(call);
 	rxrpc_service_prealloc(rx, GFP_KERNEL);
+	release_sock(&rx->sk);
 	_leave(" = %p{%d}", call, call->debug_id);
 	return call;
 
@@ -515,6 +562,7 @@ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx,
 	write_unlock(&rx->call_lock);
 out:
 	rxrpc_service_prealloc(rx, GFP_KERNEL);
+	release_sock(&rx->sk);
 	_leave(" = %d", ret);
 	return ERR_PTR(ret);
 }
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 8b94db3c9b2ecb..d79cd36987a95b 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -115,6 +115,7 @@ struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp)
 	if (!call->rxtx_annotations)
 		goto nomem_2;
 
+	mutex_init(&call->user_mutex);
 	setup_timer(&call->timer, rxrpc_call_timer_expired,
 		    (unsigned long)call);
 	INIT_WORK(&call->processor, &rxrpc_process_call);
@@ -194,14 +195,16 @@ static void rxrpc_start_call_timer(struct rxrpc_call *call)
 }
 
 /*
- * set up a call for the given data
- * - called in process context with IRQs enabled
+ * Set up a call for the given parameters.
+ * - Called with the socket lock held, which it must release.
+ * - If it returns a call, the call's lock will need releasing by the caller.
  */
 struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
 					 struct rxrpc_conn_parameters *cp,
 					 struct sockaddr_rxrpc *srx,
 					 unsigned long user_call_ID,
 					 gfp_t gfp)
+	__releases(&rx->sk.sk_lock.slock)
 {
 	struct rxrpc_call *call, *xcall;
 	struct rb_node *parent, **pp;
@@ -212,6 +215,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
 
 	call = rxrpc_alloc_client_call(srx, gfp);
 	if (IS_ERR(call)) {
+		release_sock(&rx->sk);
 		_leave(" = %ld", PTR_ERR(call));
 		return call;
 	}
@@ -219,6 +223,11 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
 	trace_rxrpc_call(call, rxrpc_call_new_client, atomic_read(&call->usage),
 			 here, (const void *)user_call_ID);
 
+	/* We need to protect a partially set up call against the user as we
+	 * will be acting outside the socket lock.
+	 */
+	mutex_lock(&call->user_mutex);
+
 	/* Publish the call, even though it is incompletely set up as yet */
 	write_lock(&rx->call_lock);
 
@@ -250,6 +259,9 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
 	list_add_tail(&call->link, &rxrpc_calls);
 	write_unlock(&rxrpc_call_lock);
 
+	/* From this point on, the call is protected by its own lock. */
+	release_sock(&rx->sk);
+
 	/* Set up or get a connection record and set the protocol parameters,
 	 * including channel number and call ID.
 	 */
@@ -279,6 +291,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
 	 */
 error_dup_user_ID:
 	write_unlock(&rx->call_lock);
+	release_sock(&rx->sk);
 	ret = -EEXIST;
 
 error:
@@ -287,6 +300,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
 	trace_rxrpc_call(call, rxrpc_call_error, atomic_read(&call->usage),
 			 here, ERR_PTR(ret));
 	rxrpc_release_call(rx, call);
+	mutex_unlock(&call->user_mutex);
 	rxrpc_put_call(call, rxrpc_call_put);
 	_leave(" = %d", ret);
 	return ERR_PTR(ret);
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 78ec33477adf6c..9f4cfa25af7c92 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -1194,6 +1194,7 @@ void rxrpc_data_ready(struct sock *udp_sk)
 			goto reject_packet;
 		}
 		rxrpc_send_ping(call, skb, skew);
+		mutex_unlock(&call->user_mutex);
 	}
 
 	rxrpc_input_call_packet(call, skb, skew);
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index f3a688e108430a..22447dbcc38021 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -487,6 +487,20 @@ int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 
 	trace_rxrpc_recvmsg(call, rxrpc_recvmsg_dequeue, 0, 0, 0, 0);
 
+	/* We're going to drop the socket lock, so we need to lock the call
+	 * against interference by sendmsg.
+	 */
+	if (!mutex_trylock(&call->user_mutex)) {
+		ret = -EWOULDBLOCK;
+		if (flags & MSG_DONTWAIT)
+			goto error_requeue_call;
+		ret = -ERESTARTSYS;
+		if (mutex_lock_interruptible(&call->user_mutex) < 0)
+			goto error_requeue_call;
+	}
+
+	release_sock(&rx->sk);
+
 	if (test_bit(RXRPC_CALL_RELEASED, &call->flags))
 		BUG();
 
@@ -502,7 +516,7 @@ int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 				       &call->user_call_ID);
 		}
 		if (ret < 0)
-			goto error;
+			goto error_unlock_call;
 	}
 
 	if (msg->msg_name) {
@@ -533,12 +547,12 @@ int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 	}
 
 	if (ret < 0)
-		goto error;
+		goto error_unlock_call;
 
 	if (call->state == RXRPC_CALL_COMPLETE) {
 		ret = rxrpc_recvmsg_term(call, msg);
 		if (ret < 0)
-			goto error;
+			goto error_unlock_call;
 		if (!(flags & MSG_PEEK))
 			rxrpc_release_call(rx, call);
 		msg->msg_flags |= MSG_EOR;
@@ -551,8 +565,21 @@ int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 		msg->msg_flags &= ~MSG_MORE;
 	ret = copied;
 
-error:
+error_unlock_call:
+	mutex_unlock(&call->user_mutex);
 	rxrpc_put_call(call, rxrpc_call_put);
+	trace_rxrpc_recvmsg(call, rxrpc_recvmsg_return, 0, 0, 0, ret);
+	return ret;
+
+error_requeue_call:
+	if (!(flags & MSG_PEEK)) {
+		write_lock_bh(&rx->recvmsg_lock);
+		list_add(&call->recvmsg_link, &rx->recvmsg_q);
+		write_unlock_bh(&rx->recvmsg_lock);
+		trace_rxrpc_recvmsg(call, rxrpc_recvmsg_requeue, 0, 0, 0, 0);
+	} else {
+		rxrpc_put_call(call, rxrpc_call_put);
+	}
 error_no_call:
 	release_sock(&rx->sk);
 	trace_rxrpc_recvmsg(call, rxrpc_recvmsg_return, 0, 0, 0, ret);
@@ -609,7 +636,7 @@ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call,
 	iov.iov_len = size - *_offset;
 	iov_iter_kvec(&iter, ITER_KVEC | READ, &iov, 1, size - *_offset);
 
-	lock_sock(sock->sk);
+	mutex_lock(&call->user_mutex);
 
 	switch (call->state) {
 	case RXRPC_CALL_CLIENT_RECV_REPLY:
@@ -648,7 +675,7 @@ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call,
 read_phase_complete:
 	ret = 1;
 out:
-	release_sock(sock->sk);
+	mutex_unlock(&call->user_mutex);
 	_leave(" = %d [%zu,%d]", ret, *_offset, *_abort);
 	return ret;
 
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 0a6ef217aa8ada..31c1538c1a8de6 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -59,9 +59,12 @@ static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx,
 		}
 
 		trace_rxrpc_transmit(call, rxrpc_transmit_wait);
-		release_sock(&rx->sk);
+		mutex_unlock(&call->user_mutex);
 		*timeo = schedule_timeout(*timeo);
-		lock_sock(&rx->sk);
+		if (mutex_lock_interruptible(&call->user_mutex) < 0) {
+			ret = sock_intr_errno(*timeo);
+			break;
+		}
 	}
 
 	remove_wait_queue(&call->waitq, &myself);
@@ -171,7 +174,7 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb,
 /*
  * send data through a socket
  * - must be called in process context
- * - caller holds the socket locked
+ * - The caller holds the call user access mutex, but not the socket lock.
  */
 static int rxrpc_send_data(struct rxrpc_sock *rx,
 			   struct rxrpc_call *call,
@@ -437,10 +440,13 @@ static int rxrpc_sendmsg_cmsg(struct msghdr *msg,
 
 /*
  * Create a new client call for sendmsg().
+ * - Called with the socket lock held, which it must release.
+ * - If it returns a call, the call's lock will need releasing by the caller.
  */
 static struct rxrpc_call *
 rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
 				  unsigned long user_call_ID, bool exclusive)
+	__releases(&rx->sk.sk_lock.slock)
 {
 	struct rxrpc_conn_parameters cp;
 	struct rxrpc_call *call;
@@ -450,8 +456,10 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
 
 	_enter("");
 
-	if (!msg->msg_name)
+	if (!msg->msg_name) {
+		release_sock(&rx->sk);
 		return ERR_PTR(-EDESTADDRREQ);
+	}
 
 	key = rx->key;
 	if (key && !rx->key->payload.data[0])
@@ -464,6 +472,7 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
 	cp.exclusive		= rx->exclusive | exclusive;
 	cp.service_id		= srx->srx_service;
 	call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, GFP_KERNEL);
+	/* The socket is now unlocked */
 
 	_leave(" = %p\n", call);
 	return call;
@@ -475,6 +484,7 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
  * - the socket may be either a client socket or a server socket
  */
 int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
+	__releases(&rx->sk.sk_lock.slock)
 {
 	enum rxrpc_command cmd;
 	struct rxrpc_call *call;
@@ -488,12 +498,14 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
 	ret = rxrpc_sendmsg_cmsg(msg, &user_call_ID, &cmd, &abort_code,
 				 &exclusive);
 	if (ret < 0)
-		return ret;
+		goto error_release_sock;
 
 	if (cmd == RXRPC_CMD_ACCEPT) {
+		ret = -EINVAL;
 		if (rx->sk.sk_state != RXRPC_SERVER_LISTENING)
-			return -EINVAL;
+			goto error_release_sock;
 		call = rxrpc_accept_call(rx, user_call_ID, NULL);
+		/* The socket is now unlocked. */
 		if (IS_ERR(call))
 			return PTR_ERR(call);
 		rxrpc_put_call(call, rxrpc_call_put);
@@ -502,12 +514,29 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
 
 	call = rxrpc_find_call_by_user_ID(rx, user_call_ID);
 	if (!call) {
+		ret = -EBADSLT;
 		if (cmd != RXRPC_CMD_SEND_DATA)
-			return -EBADSLT;
+			goto error_release_sock;
+		ret = -EBUSY;
+		if (call->state == RXRPC_CALL_UNINITIALISED ||
+		    call->state == RXRPC_CALL_CLIENT_AWAIT_CONN ||
+		    call->state == RXRPC_CALL_SERVER_PREALLOC ||
+		    call->state == RXRPC_CALL_SERVER_SECURING ||
+		    call->state == RXRPC_CALL_SERVER_ACCEPTING)
+			goto error_release_sock;
 		call = rxrpc_new_client_call_for_sendmsg(rx, msg, user_call_ID,
 							 exclusive);
+		/* The socket is now unlocked... */
 		if (IS_ERR(call))
 			return PTR_ERR(call);
+		/* ... and we have the call lock. */
+	} else {
+		ret = mutex_lock_interruptible(&call->user_mutex);
+		release_sock(&rx->sk);
+		if (ret < 0) {
+			ret = -ERESTARTSYS;
+			goto error_put;
+		}
 	}
 
 	_debug("CALL %d USR %lx ST %d on CONN %p",
@@ -535,9 +564,15 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
 		ret = rxrpc_send_data(rx, call, msg, len);
 	}
 
+	mutex_unlock(&call->user_mutex);
+error_put:
 	rxrpc_put_call(call, rxrpc_call_put);
 	_leave(" = %d", ret);
 	return ret;
+
+error_release_sock:
+	release_sock(&rx->sk);
+	return ret;
 }
 
 /**
@@ -562,7 +597,7 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call,
 	ASSERTCMP(msg->msg_name, ==, NULL);
 	ASSERTCMP(msg->msg_control, ==, NULL);
 
-	lock_sock(sock->sk);
+	mutex_lock(&call->user_mutex);
 
 	_debug("CALL %d USR %lx ST %d on CONN %p",
 	       call->debug_id, call->user_call_ID, call->state, call->conn);
@@ -577,7 +612,7 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call,
 		ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len);
 	}
 
-	release_sock(sock->sk);
+	mutex_unlock(&call->user_mutex);
 	_leave(" = %d", ret);
 	return ret;
 }
@@ -598,12 +633,12 @@ void rxrpc_kernel_abort_call(struct socket *sock, struct rxrpc_call *call,
 {
 	_enter("{%d},%d,%d,%s", call->debug_id, abort_code, error, why);
 
-	lock_sock(sock->sk);
+	mutex_lock(&call->user_mutex);
 
 	if (rxrpc_abort_call(why, call, 0, abort_code, error))
 		rxrpc_send_abort_packet(call);
 
-	release_sock(sock->sk);
+	mutex_unlock(&call->user_mutex);
 	_leave("");
 }
 

From 5179b26694c92373275e4933f5d0ff32d585c675 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Tue, 28 Feb 2017 12:41:29 +0800
Subject: [PATCH 0454/1911] sctp: call rcu_read_lock before checking for
 duplicate transport nodes

Commit cd2b70875058 ("sctp: check duplicate node before inserting a
new transport") called rhltable_lookup() to check for the duplicate
transport node in transport rhashtable.

But rhltable_lookup() doesn't call rcu_read_lock inside, it could cause
a use-after-free issue if it tries to dereference the node that another
cpu has freed it. Note that sock lock can not avoid this as it is per
sock.

This patch is to fix it by calling rcu_read_lock before checking for
duplicate transport nodes.

Fixes: cd2b70875058 ("sctp: check duplicate node before inserting a new transport")
Reported-by: Andrey Konovalov <andreyknvl@google.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/input.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/sctp/input.c b/net/sctp/input.c
index fc458968fe4bd8..2a28ab20487f03 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -884,14 +884,17 @@ int sctp_hash_transport(struct sctp_transport *t)
 	arg.paddr = &t->ipaddr;
 	arg.lport = htons(t->asoc->base.bind_addr.port);
 
+	rcu_read_lock();
 	list = rhltable_lookup(&sctp_transport_hashtable, &arg,
 			       sctp_hash_params);
 
 	rhl_for_each_entry_rcu(transport, tmp, list, node)
 		if (transport->asoc->ep == t->asoc->ep) {
+			rcu_read_unlock();
 			err = -EEXIST;
 			goto out;
 		}
+	rcu_read_unlock();
 
 	err = rhltable_insert_key(&sctp_transport_hashtable, &arg,
 				  &t->node, sctp_hash_params);

From 4f7bfb3982e02aacc5fb6e1a121e5326c1778ac3 Mon Sep 17 00:00:00 2001
From: Zhu Yanjun <yanjun.zhu@oracle.com>
Date: Tue, 28 Feb 2017 01:45:40 -0500
Subject: [PATCH 0455/1911] rds: ib: add the static type to the variables

The variables rds_ib_mr_1m_pool_size and rds_ib_mr_8k_pool_size
are used only in the ib.c file. As such, the static type is
added to limit them in this file.

Cc: Joe Jin <joe.jin@oracle.com>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Signed-off-by: Zhu Yanjun <yanjun.zhu@oracle.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/ib.c    | 4 ++--
 net/rds/ib_mr.h | 2 --
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/net/rds/ib.c b/net/rds/ib.c
index 91fe46f1e4ccf0..0f557b24331121 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -45,8 +45,8 @@
 #include "ib.h"
 #include "ib_mr.h"
 
-unsigned int rds_ib_mr_1m_pool_size = RDS_MR_1M_POOL_SIZE;
-unsigned int rds_ib_mr_8k_pool_size = RDS_MR_8K_POOL_SIZE;
+static unsigned int rds_ib_mr_1m_pool_size = RDS_MR_1M_POOL_SIZE;
+static unsigned int rds_ib_mr_8k_pool_size = RDS_MR_8K_POOL_SIZE;
 unsigned int rds_ib_retry_count = RDS_IB_DEFAULT_RETRY_COUNT;
 
 module_param(rds_ib_mr_1m_pool_size, int, 0444);
diff --git a/net/rds/ib_mr.h b/net/rds/ib_mr.h
index 24c086db4511d2..5d6e98a79a5e4b 100644
--- a/net/rds/ib_mr.h
+++ b/net/rds/ib_mr.h
@@ -107,8 +107,6 @@ struct rds_ib_mr_pool {
 };
 
 extern struct workqueue_struct *rds_ib_mr_wq;
-extern unsigned int rds_ib_mr_1m_pool_size;
-extern unsigned int rds_ib_mr_8k_pool_size;
 extern bool prefer_frmr;
 
 struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_dev,

From f7df4923fa986247e93ec2cdff5ca168fff14dcf Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Tue, 28 Feb 2017 08:55:40 +0100
Subject: [PATCH 0456/1911] mlxsw: spectrum_router: Avoid potential packets
 loss

When the structure of the LPM tree changes (f.e., due to the addition of
a new prefix), we unbind the old tree and then bind the new one. This
may result in temporary packet loss.

Instead, overwrite the old binding with the new one.

Fixes: 6b75c4807db3 ("mlxsw: spectrum_router: Add virtual router management")
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/mellanox/mlxsw/spectrum_router.c | 30 ++++++++++++-------
 1 file changed, 20 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index d7ac22d7f94029..bd8de6b9be718f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -441,30 +441,40 @@ static int
 mlxsw_sp_vr_lpm_tree_check(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr,
 			   struct mlxsw_sp_prefix_usage *req_prefix_usage)
 {
-	struct mlxsw_sp_lpm_tree *lpm_tree;
+	struct mlxsw_sp_lpm_tree *lpm_tree = vr->lpm_tree;
+	struct mlxsw_sp_lpm_tree *new_tree;
+	int err;
 
-	if (mlxsw_sp_prefix_usage_eq(req_prefix_usage,
-				     &vr->lpm_tree->prefix_usage))
+	if (mlxsw_sp_prefix_usage_eq(req_prefix_usage, &lpm_tree->prefix_usage))
 		return 0;
 
-	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, req_prefix_usage,
+	new_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, req_prefix_usage,
 					 vr->proto, false);
-	if (IS_ERR(lpm_tree)) {
+	if (IS_ERR(new_tree)) {
 		/* We failed to get a tree according to the required
 		 * prefix usage. However, the current tree might be still good
 		 * for us if our requirement is subset of the prefixes used
 		 * in the tree.
 		 */
 		if (mlxsw_sp_prefix_usage_subset(req_prefix_usage,
-						 &vr->lpm_tree->prefix_usage))
+						 &lpm_tree->prefix_usage))
 			return 0;
-		return PTR_ERR(lpm_tree);
+		return PTR_ERR(new_tree);
 	}
 
-	mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, vr);
-	mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree);
+	/* Prevent packet loss by overwriting existing binding */
+	vr->lpm_tree = new_tree;
+	err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, vr);
+	if (err)
+		goto err_tree_bind;
+	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
+
+	return 0;
+
+err_tree_bind:
 	vr->lpm_tree = lpm_tree;
-	return mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, vr);
+	mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree);
+	return err;
 }
 
 static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp,

From 0bf09c397e13b70ab6064eb2af69b8fa8e68a24e Mon Sep 17 00:00:00 2001
From: Baruch Siach <baruch@tkos.co.il>
Date: Tue, 28 Feb 2017 10:39:48 +0200
Subject: [PATCH 0457/1911] MAINTAINERS: Orphan usb/net/hso driver

The email address of Jan Dumon bounces, and there is not relevant information
in the linked website.

Signed-off-by: Baruch Siach <baruch@tkos.co.il>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 846f97aa350855..5a00239fd7b311 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6012,9 +6012,8 @@ F:	include/linux/hsi/
 F:	include/uapi/linux/hsi/
 
 HSO 3G MODEM DRIVER
-M:	Jan Dumon <j.dumon@option.com>
-W:	http://www.pharscape.org
-S:	Maintained
+L:	linux-usb@vger.kernel.org
+S:	Orphan
 F:	drivers/net/usb/hso.c
 
 HSR NETWORK PROTOCOL

From 4f3de46f7a57a8ecc16c7ef69c6917b3731a7c5f Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 28 Feb 2017 11:58:22 +0000
Subject: [PATCH 0458/1911] net: usb: asix_devices: fix missing return code
 check on call to asix_write_medium_mode

The call to asix_write_medium_mode is not updating the return code ret
and yet ret is being checked for an error. Fix this by assigning ret to
the return code from the call asix_write_medium_mode.

Detected by CoverityScan, CID#1357148 ("Logically Dead Code")

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/asix_devices.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c
index 6e98ede997d3f0..0dd510604118bc 100644
--- a/drivers/net/usb/asix_devices.c
+++ b/drivers/net/usb/asix_devices.c
@@ -346,7 +346,7 @@ static int ax88772_reset(struct usbnet *dev)
 	if (ret < 0)
 		goto out;
 
-	asix_write_medium_mode(dev, AX88772_MEDIUM_DEFAULT, 0);
+	ret = asix_write_medium_mode(dev, AX88772_MEDIUM_DEFAULT, 0);
 	if (ret < 0)
 		goto out;
 

From b2d0fe35471d1a71471f99147ffb5986bd60e744 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Tue, 28 Feb 2017 15:02:15 +0300
Subject: [PATCH 0459/1911] net/mlx4: && vs & typo

Bitwise & was obviously intended here.

Fixes: 745d8ae4622c ("net/mlx4: Spoofcheck and zero MAC can't coexist")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx4/driver.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/mlx4/driver.h b/include/linux/mlx4/driver.h
index e965e5090d9622..a858bcb6220b5d 100644
--- a/include/linux/mlx4/driver.h
+++ b/include/linux/mlx4/driver.h
@@ -109,7 +109,7 @@ static inline void mlx4_u64_to_mac(u8 *addr, u64 mac)
 	int i;
 
 	for (i = ETH_ALEN; i > 0; i--) {
-		addr[i - 1] = mac && 0xFF;
+		addr[i - 1] = mac & 0xFF;
 		mac >>= 8;
 	}
 }

From 39e6c8208d7b6fb9d2047850fb3327db567b564b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 28 Feb 2017 10:34:50 -0800
Subject: [PATCH 0460/1911] net: solve a NAPI race

While playing with mlx4 hardware timestamping of RX packets, I found
that some packets were received by TCP stack with a ~200 ms delay...

Since the timestamp was provided by the NIC, and my probe was added
in tcp_v4_rcv() while in BH handler, I was confident it was not
a sender issue, or a drop in the network.

This would happen with a very low probability, but hurting RPC
workloads.

A NAPI driver normally arms the IRQ after the napi_complete_done(),
after NAPI_STATE_SCHED is cleared, so that the hard irq handler can grab
it.

Problem is that if another point in the stack grabs NAPI_STATE_SCHED bit
while IRQ are not disabled, we might have later an IRQ firing and
finding this bit set, right before napi_complete_done() clears it.

This can happen with busy polling users, or if gro_flush_timeout is
used. But some other uses of napi_schedule() in drivers can cause this
as well.

thread 1                                 thread 2 (could be on same cpu, or not)

// busy polling or napi_watchdog()
napi_schedule();
...
napi->poll()

device polling:
read 2 packets from ring buffer
                                          Additional 3rd packet is
available.
                                          device hard irq

                                          // does nothing because
NAPI_STATE_SCHED bit is owned by thread 1
                                          napi_schedule();

napi_complete_done(napi, 2);
rearm_irq();

Note that rearm_irq() will not force the device to send an additional
IRQ for the packet it already signaled (3rd packet in my example)

This patch adds a new NAPI_STATE_MISSED bit, that napi_schedule_prep()
can set if it could not grab NAPI_STATE_SCHED

Then napi_complete_done() properly reschedules the napi to make sure
we do not miss something.

Since we manipulate multiple bits at once, use cmpxchg() like in
sk_busy_loop() to provide proper transactions.

In v2, I changed napi_watchdog() to use a relaxed variant of
napi_schedule_prep() : No need to set NAPI_STATE_MISSED from this point.

In v3, I added more details in the changelog and clears
NAPI_STATE_MISSED in busy_poll_stop()

In v4, I added the ideas given by Alexander Duyck in v3 review

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Alexander Duyck <alexander.duyck@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 29 +++++----------
 net/core/dev.c            | 76 ++++++++++++++++++++++++++++++++++++---
 2 files changed, 81 insertions(+), 24 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index f40f0ab3847a8c..97456b2539e46d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -330,6 +330,7 @@ struct napi_struct {
 
 enum {
 	NAPI_STATE_SCHED,	/* Poll is scheduled */
+	NAPI_STATE_MISSED,	/* reschedule a napi */
 	NAPI_STATE_DISABLE,	/* Disable pending */
 	NAPI_STATE_NPSVC,	/* Netpoll - don't dequeue from poll_list */
 	NAPI_STATE_HASHED,	/* In NAPI hash (busy polling possible) */
@@ -338,12 +339,13 @@ enum {
 };
 
 enum {
-	NAPIF_STATE_SCHED	 = (1UL << NAPI_STATE_SCHED),
-	NAPIF_STATE_DISABLE	 = (1UL << NAPI_STATE_DISABLE),
-	NAPIF_STATE_NPSVC	 = (1UL << NAPI_STATE_NPSVC),
-	NAPIF_STATE_HASHED	 = (1UL << NAPI_STATE_HASHED),
-	NAPIF_STATE_NO_BUSY_POLL = (1UL << NAPI_STATE_NO_BUSY_POLL),
-	NAPIF_STATE_IN_BUSY_POLL = (1UL << NAPI_STATE_IN_BUSY_POLL),
+	NAPIF_STATE_SCHED	 = BIT(NAPI_STATE_SCHED),
+	NAPIF_STATE_MISSED	 = BIT(NAPI_STATE_MISSED),
+	NAPIF_STATE_DISABLE	 = BIT(NAPI_STATE_DISABLE),
+	NAPIF_STATE_NPSVC	 = BIT(NAPI_STATE_NPSVC),
+	NAPIF_STATE_HASHED	 = BIT(NAPI_STATE_HASHED),
+	NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL),
+	NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL),
 };
 
 enum gro_result {
@@ -414,20 +416,7 @@ static inline bool napi_disable_pending(struct napi_struct *n)
 	return test_bit(NAPI_STATE_DISABLE, &n->state);
 }
 
-/**
- *	napi_schedule_prep - check if NAPI can be scheduled
- *	@n: NAPI context
- *
- * Test if NAPI routine is already running, and if not mark
- * it as running.  This is used as a condition variable to
- * insure only one NAPI poll instance runs.  We also make
- * sure there is no pending NAPI disable.
- */
-static inline bool napi_schedule_prep(struct napi_struct *n)
-{
-	return !napi_disable_pending(n) &&
-		!test_and_set_bit(NAPI_STATE_SCHED, &n->state);
-}
+bool napi_schedule_prep(struct napi_struct *n);
 
 /**
  *	napi_schedule - schedule NAPI poll
diff --git a/net/core/dev.c b/net/core/dev.c
index 304f2deae5f989..e63bf61b19be02 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4883,6 +4883,39 @@ void __napi_schedule(struct napi_struct *n)
 }
 EXPORT_SYMBOL(__napi_schedule);
 
+/**
+ *	napi_schedule_prep - check if napi can be scheduled
+ *	@n: napi context
+ *
+ * Test if NAPI routine is already running, and if not mark
+ * it as running.  This is used as a condition variable
+ * insure only one NAPI poll instance runs.  We also make
+ * sure there is no pending NAPI disable.
+ */
+bool napi_schedule_prep(struct napi_struct *n)
+{
+	unsigned long val, new;
+
+	do {
+		val = READ_ONCE(n->state);
+		if (unlikely(val & NAPIF_STATE_DISABLE))
+			return false;
+		new = val | NAPIF_STATE_SCHED;
+
+		/* Sets STATE_MISSED bit if STATE_SCHED was already set
+		 * This was suggested by Alexander Duyck, as compiler
+		 * emits better code than :
+		 * if (val & NAPIF_STATE_SCHED)
+		 *     new |= NAPIF_STATE_MISSED;
+		 */
+		new |= (val & NAPIF_STATE_SCHED) / NAPIF_STATE_SCHED *
+						   NAPIF_STATE_MISSED;
+	} while (cmpxchg(&n->state, val, new) != val);
+
+	return !(val & NAPIF_STATE_SCHED);
+}
+EXPORT_SYMBOL(napi_schedule_prep);
+
 /**
  * __napi_schedule_irqoff - schedule for receive
  * @n: entry to schedule
@@ -4897,7 +4930,7 @@ EXPORT_SYMBOL(__napi_schedule_irqoff);
 
 bool napi_complete_done(struct napi_struct *n, int work_done)
 {
-	unsigned long flags;
+	unsigned long flags, val, new;
 
 	/*
 	 * 1) Don't let napi dequeue from the cpu poll list
@@ -4927,7 +4960,27 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
 		list_del_init(&n->poll_list);
 		local_irq_restore(flags);
 	}
-	WARN_ON_ONCE(!test_and_clear_bit(NAPI_STATE_SCHED, &n->state));
+
+	do {
+		val = READ_ONCE(n->state);
+
+		WARN_ON_ONCE(!(val & NAPIF_STATE_SCHED));
+
+		new = val & ~(NAPIF_STATE_MISSED | NAPIF_STATE_SCHED);
+
+		/* If STATE_MISSED was set, leave STATE_SCHED set,
+		 * because we will call napi->poll() one more time.
+		 * This C code was suggested by Alexander Duyck to help gcc.
+		 */
+		new |= (val & NAPIF_STATE_MISSED) / NAPIF_STATE_MISSED *
+						    NAPIF_STATE_SCHED;
+	} while (cmpxchg(&n->state, val, new) != val);
+
+	if (unlikely(val & NAPIF_STATE_MISSED)) {
+		__napi_schedule(n);
+		return false;
+	}
+
 	return true;
 }
 EXPORT_SYMBOL(napi_complete_done);
@@ -4953,6 +5006,16 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock)
 {
 	int rc;
 
+	/* Busy polling means there is a high chance device driver hard irq
+	 * could not grab NAPI_STATE_SCHED, and that NAPI_STATE_MISSED was
+	 * set in napi_schedule_prep().
+	 * Since we are about to call napi->poll() once more, we can safely
+	 * clear NAPI_STATE_MISSED.
+	 *
+	 * Note: x86 could use a single "lock and ..." instruction
+	 * to perform these two clear_bit()
+	 */
+	clear_bit(NAPI_STATE_MISSED, &napi->state);
 	clear_bit(NAPI_STATE_IN_BUSY_POLL, &napi->state);
 
 	local_bh_disable();
@@ -5088,8 +5151,13 @@ static enum hrtimer_restart napi_watchdog(struct hrtimer *timer)
 	struct napi_struct *napi;
 
 	napi = container_of(timer, struct napi_struct, timer);
-	if (napi->gro_list)
-		napi_schedule_irqoff(napi);
+
+	/* Note : we use a relaxed variant of napi_schedule_prep() not setting
+	 * NAPI_STATE_MISSED, since we do not react to a device IRQ.
+	 */
+	if (napi->gro_list && !napi_disable_pending(napi) &&
+	    !test_and_set_bit(NAPI_STATE_SCHED, &napi->state))
+		__napi_schedule_irqoff(napi);
 
 	return HRTIMER_NORESTART;
 }

From 56de859e9967c070464a9a9f4f18d73f9447298e Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Fri, 24 Feb 2017 11:43:36 -0800
Subject: [PATCH 0461/1911] vxlan: lock RCU on TX path

There is no guarantees that callers of the TX path will hold
the RCU lock.  Grab it explicitly.

Fixes: c6fcc4fc5f8b ("vxlan: avoid using stale vxlan socket.")
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index b7911994112aeb..e375560cc74e5f 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -2105,6 +2105,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 	src_port = udp_flow_src_port(dev_net(dev), skb, vxlan->cfg.port_min,
 				     vxlan->cfg.port_max, true);
 
+	rcu_read_lock();
 	if (dst->sa.sa_family == AF_INET) {
 		struct vxlan_sock *sock4 = rcu_dereference(vxlan->vn4_sock);
 		struct rtable *rt;
@@ -2127,7 +2128,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 						    dst_port, vni, &rt->dst,
 						    rt->rt_flags);
 			if (err)
-				return;
+				goto out_unlock;
 		} else if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT) {
 			df = htons(IP_DF);
 		}
@@ -2166,7 +2167,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 						    dst_port, vni, ndst,
 						    rt6i_flags);
 			if (err)
-				return;
+				goto out_unlock;
 		}
 
 		tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
@@ -2183,6 +2184,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 				     label, src_port, dst_port, !udp_sum);
 #endif
 	}
+out_unlock:
+	rcu_read_unlock();
 	return;
 
 drop:
@@ -2191,6 +2194,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 	return;
 
 tx_error:
+	rcu_read_unlock();
 	if (err == -ELOOP)
 		dev->stats.collisions++;
 	else if (err == -ENETUNREACH)

From a717e3f740803cc88bd5c9a70c93504f6a368663 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Fri, 24 Feb 2017 11:43:37 -0800
Subject: [PATCH 0462/1911] geneve: lock RCU on TX path

There is no guarantees that callers of the TX path will hold
the RCU lock.  Grab it explicitly.

Fixes: fceb9c3e3825 ("geneve: avoid using stale geneve socket.")
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/geneve.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index 45301cb98bc1c2..7074b40ebd7f8e 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -881,12 +881,14 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev)
 		info = &geneve->info;
 	}
 
+	rcu_read_lock();
 #if IS_ENABLED(CONFIG_IPV6)
 	if (info->mode & IP_TUNNEL_INFO_IPV6)
 		err = geneve6_xmit_skb(skb, dev, geneve, info);
 	else
 #endif
 		err = geneve_xmit_skb(skb, dev, geneve, info);
+	rcu_read_unlock();
 
 	if (likely(!err))
 		return NETDEV_TX_OK;

From 8c171d6ca56c6891372a97af26b58b2cfad7fd9a Mon Sep 17 00:00:00 2001
From: Felix Jia <felix.jia@alliedtelesis.co.nz>
Date: Mon, 27 Feb 2017 12:41:23 +1300
Subject: [PATCH 0463/1911] net/ipv6: avoid possible dead locking on
 addr_gen_mode sysctl

The addr_gen_mode variable can be accessed by both sysctl and netlink.
Repleacd rtnl_lock() with rtnl_trylock() protect the sysctl operation to
avoid the possbile dead lock.`

Signed-off-by: Felix Jia <felix.jia@alliedtelesis.co.nz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 3a2025f5bf2c33..cfc485a8e1c028 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -5692,13 +5692,18 @@ static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write,
 	struct inet6_dev *idev = (struct inet6_dev *)ctl->extra1;
 	struct net *net = (struct net *)ctl->extra2;
 
+	if (!rtnl_trylock())
+		return restart_syscall();
+
 	ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
 
 	if (write) {
 		new_val = *((int *)ctl->data);
 
-		if (check_addr_gen_mode(new_val) < 0)
-			return -EINVAL;
+		if (check_addr_gen_mode(new_val) < 0) {
+			ret = -EINVAL;
+			goto out;
+		}
 
 		/* request for default */
 		if (&net->ipv6.devconf_dflt->addr_gen_mode == ctl->data) {
@@ -5707,20 +5712,23 @@ static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write,
 		/* request for individual net device */
 		} else {
 			if (!idev)
-				return ret;
+				goto out;
 
-			if (check_stable_privacy(idev, net, new_val) < 0)
-				return -EINVAL;
+			if (check_stable_privacy(idev, net, new_val) < 0) {
+				ret = -EINVAL;
+				goto out;
+			}
 
 			if (idev->cnf.addr_gen_mode != new_val) {
 				idev->cnf.addr_gen_mode = new_val;
-				rtnl_lock();
 				addrconf_dev_config(idev->dev);
-				rtnl_unlock();
 			}
 		}
 	}
 
+out:
+	rtnl_unlock();
+
 	return ret;
 }
 

From 3b45a4106f146c336cbcaccb9d8d0fa0e5c3dc1d Mon Sep 17 00:00:00 2001
From: Liping Zhang <zlpnobody@gmail.com>
Date: Mon, 27 Feb 2017 20:59:39 +0800
Subject: [PATCH 0464/1911] net: route: add missing nla_policy entry for
 RTA_MARK attribute

This will add stricter validating for RTA_MARK attribute.

Signed-off-by: Liping Zhang <zlpnobody@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_frontend.c | 1 +
 net/ipv6/route.c        | 1 +
 2 files changed, 2 insertions(+)

diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index b39a791f6756fc..42bfd08109dd78 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -622,6 +622,7 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = {
 	[RTA_ENCAP_TYPE]	= { .type = NLA_U16 },
 	[RTA_ENCAP]		= { .type = NLA_NESTED },
 	[RTA_UID]		= { .type = NLA_U32 },
+	[RTA_MARK]		= { .type = NLA_U32 },
 };
 
 static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index f54f4265b37f29..d94f1dfa54c842 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2891,6 +2891,7 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
 	[RTA_ENCAP]		= { .type = NLA_NESTED },
 	[RTA_EXPIRES]		= { .type = NLA_U32 },
 	[RTA_UID]		= { .type = NLA_U32 },
+	[RTA_MARK]		= { .type = NLA_U32 },
 };
 
 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,

From 4ab18701c66552944188dbcd0ce0012729baab84 Mon Sep 17 00:00:00 2001
From: Max Filippov <jcmvbkbc@gmail.com>
Date: Tue, 3 Jan 2017 09:37:34 -0800
Subject: [PATCH 0465/1911] xtensa: move parse_tag_fdt out of #ifdef
 CONFIG_BLK_DEV_INITRD

FDT tag parsing is not related to whether BLK_DEV_INITRD is configured
or not, move it out of the corresponding #ifdef/#endif block.
This fixes passing external FDT to the kernel configured w/o
BLK_DEV_INITRD support.

Cc: stable@vger.kernel.org
Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/xtensa/kernel/setup.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c
index 88a044af7504dd..23f2e034ba46cb 100644
--- a/arch/xtensa/kernel/setup.c
+++ b/arch/xtensa/kernel/setup.c
@@ -133,6 +133,8 @@ static int __init parse_tag_initrd(const bp_tag_t* tag)
 
 __tagtable(BP_TAG_INITRD, parse_tag_initrd);
 
+#endif /* CONFIG_BLK_DEV_INITRD */
+
 #ifdef CONFIG_OF
 
 static int __init parse_tag_fdt(const bp_tag_t *tag)
@@ -145,8 +147,6 @@ __tagtable(BP_TAG_FDT, parse_tag_fdt);
 
 #endif /* CONFIG_OF */
 
-#endif /* CONFIG_BLK_DEV_INITRD */
-
 static int __init parse_tag_cmdline(const bp_tag_t* tag)
 {
 	strlcpy(command_line, (char *)(tag->data), COMMAND_LINE_SIZE);

From 9a736fcb096b43b68af8329eb12abc8256dceaba Mon Sep 17 00:00:00 2001
From: Max Filippov <jcmvbkbc@gmail.com>
Date: Sun, 25 Dec 2016 04:58:57 -0800
Subject: [PATCH 0466/1911] xtensa: clean up bootable image build targets

Currently xtensa uses 'zImage' as a synonym of 'all', but in fact xtensa
supports three targets: 'Image' (ELF image with reset vector), 'zImage'
(compressed redboot image) and 'uImage' (U-Boot image).
Provide separate 'Image', 'zImage' and 'uImage' make targets that only
build corresponding image type. Make 'all' build all images appropriate
for a platform.

Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/xtensa/Makefile                   |  8 +++-----
 arch/xtensa/boot/Makefile              | 23 ++++++++++++++++-------
 arch/xtensa/boot/boot-elf/Makefile     |  2 +-
 arch/xtensa/boot/boot-redboot/Makefile |  2 +-
 arch/xtensa/boot/boot-uboot/Makefile   | 14 --------------
 5 files changed, 21 insertions(+), 28 deletions(-)
 delete mode 100644 arch/xtensa/boot/boot-uboot/Makefile

diff --git a/arch/xtensa/Makefile b/arch/xtensa/Makefile
index e54189427b3151..7ee02fe4a63df7 100644
--- a/arch/xtensa/Makefile
+++ b/arch/xtensa/Makefile
@@ -93,11 +93,7 @@ endif
 
 boot		:= arch/xtensa/boot
 
-all: zImage
-
-bzImage : zImage
-
-zImage: vmlinux
+all Image zImage uImage: vmlinux
 	$(Q)$(MAKE) $(build)=$(boot) $@
 
 %.dtb:
@@ -107,6 +103,8 @@ dtbs: scripts
 	$(Q)$(MAKE) $(build)=$(boot)/dts
 
 define archhelp
+  @echo '* Image       - Kernel ELF image with reset vector'
   @echo '* zImage      - Compressed kernel image (arch/xtensa/boot/images/zImage.*)'
+  @echo '* uImage      - U-Boot wrapped image'
   @echo '  dtbs        - Build device tree blobs for enabled boards'
 endef
diff --git a/arch/xtensa/boot/Makefile b/arch/xtensa/boot/Makefile
index ca20a892021bb6..53e4178711e695 100644
--- a/arch/xtensa/boot/Makefile
+++ b/arch/xtensa/boot/Makefile
@@ -21,14 +21,17 @@ subdir-y	:= lib
 
 # Subdirs for the boot loader(s)
 
-bootdir-$(CONFIG_XTENSA_PLATFORM_ISS)	 += boot-elf
-bootdir-$(CONFIG_XTENSA_PLATFORM_XT2000) += boot-redboot boot-elf boot-uboot
-bootdir-$(CONFIG_XTENSA_PLATFORM_XTFPGA) += boot-redboot boot-elf boot-uboot
+boot-$(CONFIG_XTENSA_PLATFORM_ISS)    += Image
+boot-$(CONFIG_XTENSA_PLATFORM_XT2000) += Image zImage uImage
+boot-$(CONFIG_XTENSA_PLATFORM_XTFPGA) += Image zImage uImage
 
-zImage Image: $(bootdir-y)
+all: $(boot-y)
+Image: boot-elf
+zImage: boot-redboot
+uImage: $(obj)/uImage
 
-$(bootdir-y): $(addprefix $(obj)/,$(subdir-y)) \
-	      $(addprefix $(obj)/,$(host-progs))
+boot-elf boot-redboot: $(addprefix $(obj)/,$(subdir-y)) \
+		       $(addprefix $(obj)/,$(host-progs))
 	$(Q)$(MAKE) $(build)=$(obj)/$@ $(MAKECMDGOALS)
 
 OBJCOPYFLAGS = --strip-all -R .comment -R .note.gnu.build-id -O binary
@@ -41,4 +44,10 @@ vmlinux.bin.gz: vmlinux.bin FORCE
 
 boot-elf: vmlinux.bin
 boot-redboot: vmlinux.bin.gz
-boot-uboot: vmlinux.bin.gz
+
+UIMAGE_LOADADDR = $(CONFIG_KERNEL_LOAD_ADDRESS)
+UIMAGE_COMPRESSION = gzip
+
+$(obj)/uImage: vmlinux.bin.gz FORCE
+	$(call if_changed,uimage)
+	$(Q)$(kecho) '  Kernel: $@ is ready'
diff --git a/arch/xtensa/boot/boot-elf/Makefile b/arch/xtensa/boot/boot-elf/Makefile
index 89db089f5a1247..52147198135635 100644
--- a/arch/xtensa/boot/boot-elf/Makefile
+++ b/arch/xtensa/boot/boot-elf/Makefile
@@ -31,4 +31,4 @@ $(obj)/../Image.elf: $(obj)/Image.o $(obj)/boot.lds
 		-o $@ $(obj)/Image.o
 	$(Q)$(kecho) '  Kernel: $@ is ready'
 
-zImage:	$(obj)/../Image.elf
+all Image: $(obj)/../Image.elf
diff --git a/arch/xtensa/boot/boot-redboot/Makefile b/arch/xtensa/boot/boot-redboot/Makefile
index 8be8b943698178..8632473ad319ea 100644
--- a/arch/xtensa/boot/boot-redboot/Makefile
+++ b/arch/xtensa/boot/boot-redboot/Makefile
@@ -32,4 +32,4 @@ $(obj)/../zImage.redboot: $(obj)/zImage.elf
 	$(Q)$(OBJCOPY) -S -O binary $< $@
 	$(Q)$(kecho) '  Kernel: $@ is ready'
 
-zImage: $(obj)/../zImage.redboot
+all zImage: $(obj)/../zImage.redboot
diff --git a/arch/xtensa/boot/boot-uboot/Makefile b/arch/xtensa/boot/boot-uboot/Makefile
deleted file mode 100644
index 0f4c417b4196e9..00000000000000
--- a/arch/xtensa/boot/boot-uboot/Makefile
+++ /dev/null
@@ -1,14 +0,0 @@
-#
-# This file is subject to the terms and conditions of the GNU General Public
-# License.  See the file "COPYING" in the main directory of this archive
-# for more details.
-#
-
-UIMAGE_LOADADDR = $(CONFIG_KERNEL_LOAD_ADDRESS)
-UIMAGE_COMPRESSION = gzip
-
-$(obj)/../uImage: vmlinux.bin.gz FORCE
-	$(call if_changed,uimage)
-	$(Q)$(kecho) '  Kernel: $@ is ready'
-
-zImage: $(obj)/../uImage

From b46dcfa378b0cdea1ee832802c9e36750e0fffa9 Mon Sep 17 00:00:00 2001
From: Max Filippov <jcmvbkbc@gmail.com>
Date: Wed, 4 Jan 2017 10:40:49 -0800
Subject: [PATCH 0467/1911] xtensa: allow merging vectors into .text section

Currently code for exception/IRQ vectors is stored in kernel image as
initialization data and is copied to its working addresses during
startup. It doesn't always make sense. In many cases vectors location
can be automatically decided at kernel link time and code can be placed
right there. This is especially useful for XIP kernel.

Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/xtensa/include/asm/vectors.h |  4 +++
 arch/xtensa/kernel/setup.c        |  3 +++
 arch/xtensa/kernel/vmlinux.lds.S  | 41 +++++++++++++++++++++++++++++++
 3 files changed, 48 insertions(+)

diff --git a/arch/xtensa/include/asm/vectors.h b/arch/xtensa/include/asm/vectors.h
index 77d41cc7a688ac..65d3da9db19beb 100644
--- a/arch/xtensa/include/asm/vectors.h
+++ b/arch/xtensa/include/asm/vectors.h
@@ -67,7 +67,11 @@ static inline unsigned long xtensa_get_kio_paddr(void)
 #endif /* CONFIG_MMU */
 
 #define RESET_VECTOR1_VADDR		(XCHAL_RESET_VECTOR1_VADDR)
+#ifdef CONFIG_VECTORS_OFFSET
 #define VECBASE_VADDR			(KERNELOFFSET - CONFIG_VECTORS_OFFSET)
+#else
+#define VECBASE_VADDR			_vecbase
+#endif
 
 #if defined(XCHAL_HAVE_VECBASE) && XCHAL_HAVE_VECBASE
 
diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c
index 23f2e034ba46cb..7fc7bb9f3b8482 100644
--- a/arch/xtensa/kernel/setup.c
+++ b/arch/xtensa/kernel/setup.c
@@ -455,6 +455,7 @@ void __init setup_arch(char **cmdline_p)
 
 	mem_reserve(__pa(&_stext), __pa(&_end));
 
+#ifdef CONFIG_VECTORS_OFFSET
 	mem_reserve(__pa(&_WindowVectors_text_start),
 		    __pa(&_WindowVectors_text_end));
 
@@ -491,6 +492,8 @@ void __init setup_arch(char **cmdline_p)
 		    __pa(&_Level6InterruptVector_text_end));
 #endif
 
+#endif /* CONFIG_VECTORS_OFFSET */
+
 #ifdef CONFIG_SMP
 	mem_reserve(__pa(&_SecondaryResetVector_text_start),
 		    __pa(&_SecondaryResetVector_text_end));
diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S
index 31411fc82662c8..30d9fc21e0763c 100644
--- a/arch/xtensa/kernel/vmlinux.lds.S
+++ b/arch/xtensa/kernel/vmlinux.lds.S
@@ -59,6 +59,7 @@ jiffies = jiffies_64;
  * garbage.)
  */
 
+#ifdef CONFIG_VECTORS_OFFSET
 #define SECTION_VECTOR(sym, section, addr, max_prevsec_size, prevsec)       \
   section addr : AT((MIN(LOADADDR(prevsec) + max_prevsec_size,		    \
 		         LOADADDR(prevsec) + SIZEOF(prevsec)) + 3) & ~ 3)   \
@@ -68,6 +69,11 @@ jiffies = jiffies_64;
     *(section)								    \
     sym ## _end = ABSOLUTE(.);						    \
   }
+#else
+#define SECTION_VECTOR(section, addr)					    \
+  . = addr;								    \
+  *(section)
+#endif
 
 /*
  *  Mapping of input sections to output sections when linking.
@@ -85,6 +91,37 @@ SECTIONS
   {
     /* The HEAD_TEXT section must be the first section! */
     HEAD_TEXT
+
+#ifndef CONFIG_VECTORS_OFFSET
+  . = ALIGN(PAGE_SIZE);
+  _vecbase = .;
+
+  SECTION_VECTOR (.WindowVectors.text, WINDOW_VECTORS_VADDR)
+#if XCHAL_EXCM_LEVEL >= 2
+  SECTION_VECTOR (.Level2InterruptVector.text, INTLEVEL2_VECTOR_VADDR)
+#endif
+#if XCHAL_EXCM_LEVEL >= 3
+  SECTION_VECTOR (.Level3InterruptVector.text, INTLEVEL3_VECTOR_VADDR)
+#endif
+#if XCHAL_EXCM_LEVEL >= 4
+  SECTION_VECTOR (.Level4InterruptVector.text, INTLEVEL4_VECTOR_VADDR)
+#endif
+#if XCHAL_EXCM_LEVEL >= 5
+  SECTION_VECTOR (.Level5InterruptVector.text, INTLEVEL5_VECTOR_VADDR)
+#endif
+#if XCHAL_EXCM_LEVEL >= 6
+  SECTION_VECTOR (.Level6InterruptVector.text, INTLEVEL6_VECTOR_VADDR)
+#endif
+  SECTION_VECTOR (.DebugInterruptVector.literal, DEBUG_VECTOR_VADDR - 4)
+  SECTION_VECTOR (.DebugInterruptVector.text, DEBUG_VECTOR_VADDR)
+  SECTION_VECTOR (.KernelExceptionVector.literal, KERNEL_VECTOR_VADDR - 4)
+  SECTION_VECTOR (.KernelExceptionVector.text, KERNEL_VECTOR_VADDR)
+  SECTION_VECTOR (.UserExceptionVector.literal, USER_VECTOR_VADDR - 4)
+  SECTION_VECTOR (.UserExceptionVector.text, USER_VECTOR_VADDR)
+  SECTION_VECTOR (.DoubleExceptionVector.literal, DOUBLEEXC_VECTOR_VADDR - 48)
+  SECTION_VECTOR (.DoubleExceptionVector.text, DOUBLEEXC_VECTOR_VADDR)
+#endif
+
     TEXT_TEXT
     VMLINUX_SYMBOL(__sched_text_start) = .;
     *(.sched.literal .sched.text)
@@ -132,6 +169,7 @@ SECTIONS
     . = ALIGN(16);
     __boot_reloc_table_start = ABSOLUTE(.);
 
+#ifdef CONFIG_VECTORS_OFFSET
     RELOCATE_ENTRY(_WindowVectors_text,
 		   .WindowVectors.text);
 #if XCHAL_EXCM_LEVEL >= 2
@@ -164,6 +202,7 @@ SECTIONS
 		   .DoubleExceptionVector.text);
     RELOCATE_ENTRY(_DebugInterruptVector_text,
 		   .DebugInterruptVector.text);
+#endif
 #if defined(CONFIG_SMP)
     RELOCATE_ENTRY(_SecondaryResetVector_text,
 		   .SecondaryResetVector.text);
@@ -186,6 +225,7 @@ SECTIONS
   . = ALIGN(4);
   .dummy : { LONG(0) }
 
+#ifdef CONFIG_VECTORS_OFFSET
   /* The vectors are relocated to the real position at startup time */
 
   SECTION_VECTOR (_WindowVectors_text,
@@ -277,6 +317,7 @@ SECTIONS
 
   . = (LOADADDR( .DoubleExceptionVector.text ) + SIZEOF( .DoubleExceptionVector.text ) + 3) & ~ 3;
 
+#endif
 #if defined(CONFIG_SMP)
 
   SECTION_VECTOR (_SecondaryResetVector_text,

From 28db0c7b1c7fbca3637994c1ce3c4ffe142e2755 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Wed, 1 Mar 2017 16:39:55 +0530
Subject: [PATCH 0468/1911] PM / OPP: Documentation: Fix opp-microvolt in
 examples

The triplet present in "opp-microvolt" property should be in the order
<target min max>, while all the examples have it in the order
<min target max>.

Fix it.

Luckily all of the users of "opp-microvolt" property have applied brain
instead of copying the examples from documentation and none of the
actual dts files have it wrong.

Reported-by: Rob Herring <robh@kernel.org>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/devicetree/bindings/opp/opp.txt | 44 +++++++++----------
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/Documentation/devicetree/bindings/opp/opp.txt b/Documentation/devicetree/bindings/opp/opp.txt
index 9f5ca4457b5f60..7951961ef3563a 100644
--- a/Documentation/devicetree/bindings/opp/opp.txt
+++ b/Documentation/devicetree/bindings/opp/opp.txt
@@ -188,14 +188,14 @@ Example 1: Single cluster Dual-core ARM cortex A9, switch DVFS states together.
 
 		opp@1000000000 {
 			opp-hz = /bits/ 64 <1000000000>;
-			opp-microvolt = <970000 975000 985000>;
+			opp-microvolt = <975000 970000 985000>;
 			opp-microamp = <70000>;
 			clock-latency-ns = <300000>;
 			opp-suspend;
 		};
 		opp@1100000000 {
 			opp-hz = /bits/ 64 <1100000000>;
-			opp-microvolt = <980000 1000000 1010000>;
+			opp-microvolt = <1000000 980000 1010000>;
 			opp-microamp = <80000>;
 			clock-latency-ns = <310000>;
 		};
@@ -267,14 +267,14 @@ independently.
 
 		opp@1000000000 {
 			opp-hz = /bits/ 64 <1000000000>;
-			opp-microvolt = <970000 975000 985000>;
+			opp-microvolt = <975000 970000 985000>;
 			opp-microamp = <70000>;
 			clock-latency-ns = <300000>;
 			opp-suspend;
 		};
 		opp@1100000000 {
 			opp-hz = /bits/ 64 <1100000000>;
-			opp-microvolt = <980000 1000000 1010000>;
+			opp-microvolt = <1000000 980000 1010000>;
 			opp-microamp = <80000>;
 			clock-latency-ns = <310000>;
 		};
@@ -343,14 +343,14 @@ DVFS state together.
 
 		opp@1000000000 {
 			opp-hz = /bits/ 64 <1000000000>;
-			opp-microvolt = <970000 975000 985000>;
+			opp-microvolt = <975000 970000 985000>;
 			opp-microamp = <70000>;
 			clock-latency-ns = <300000>;
 			opp-suspend;
 		};
 		opp@1100000000 {
 			opp-hz = /bits/ 64 <1100000000>;
-			opp-microvolt = <980000 1000000 1010000>;
+			opp-microvolt = <1000000 980000 1010000>;
 			opp-microamp = <80000>;
 			clock-latency-ns = <310000>;
 		};
@@ -369,7 +369,7 @@ DVFS state together.
 
 		opp@1300000000 {
 			opp-hz = /bits/ 64 <1300000000>;
-			opp-microvolt = <1045000 1050000 1055000>;
+			opp-microvolt = <1050000 1045000 1055000>;
 			opp-microamp = <95000>;
 			clock-latency-ns = <400000>;
 			opp-suspend;
@@ -382,7 +382,7 @@ DVFS state together.
 		};
 		opp@1500000000 {
 			opp-hz = /bits/ 64 <1500000000>;
-			opp-microvolt = <1010000 1100000 1110000>;
+			opp-microvolt = <1100000 1010000 1110000>;
 			opp-microamp = <95000>;
 			clock-latency-ns = <400000>;
 			turbo-mode;
@@ -424,9 +424,9 @@ Example 4: Handling multiple regulators
 
 		opp@1000000000 {
 			opp-hz = /bits/ 64 <1000000000>;
-			opp-microvolt = <970000 975000 985000>, /* Supply 0 */
-					<960000 965000 975000>, /* Supply 1 */
-					<960000 965000 975000>; /* Supply 2 */
+			opp-microvolt = <975000 970000 985000>, /* Supply 0 */
+					<965000 960000 975000>, /* Supply 1 */
+					<965000 960000 975000>; /* Supply 2 */
 			opp-microamp =  <70000>,		/* Supply 0 */
 					<70000>,		/* Supply 1 */
 					<70000>;		/* Supply 2 */
@@ -437,9 +437,9 @@ Example 4: Handling multiple regulators
 
 		opp@1000000000 {
 			opp-hz = /bits/ 64 <1000000000>;
-			opp-microvolt = <970000 975000 985000>, /* Supply 0 */
-					<960000 965000 975000>, /* Supply 1 */
-					<960000 965000 975000>; /* Supply 2 */
+			opp-microvolt = <975000 970000 985000>, /* Supply 0 */
+					<965000 960000 975000>, /* Supply 1 */
+					<965000 960000 975000>; /* Supply 2 */
 			opp-microamp =  <70000>,		/* Supply 0 */
 					<0>,			/* Supply 1 doesn't need this */
 					<70000>;		/* Supply 2 */
@@ -474,7 +474,7 @@ Example 5: opp-supported-hw
 			 */
 			opp-supported-hw = <0xF 0xFFFFFFFF 0xFFFFFFFF>
 			opp-hz = /bits/ 64 <600000000>;
-			opp-microvolt = <900000 915000 925000>;
+			opp-microvolt = <915000 900000 925000>;
 			...
 		};
 
@@ -487,7 +487,7 @@ Example 5: opp-supported-hw
 			 */
 			opp-supported-hw = <0x20 0xff0000ff 0x0000f4f0>
 			opp-hz = /bits/ 64 <800000000>;
-			opp-microvolt = <900000 915000 925000>;
+			opp-microvolt = <915000 900000 925000>;
 			...
 		};
 	};
@@ -512,18 +512,18 @@ Example 6: opp-microvolt-<name>, opp-microamp-<name>:
 
 		opp@1000000000 {
 			opp-hz = /bits/ 64 <1000000000>;
-			opp-microvolt-slow = <900000 915000 925000>;
-			opp-microvolt-fast = <970000 975000 985000>;
+			opp-microvolt-slow = <915000 900000 925000>;
+			opp-microvolt-fast = <975000 970000 985000>;
 			opp-microamp-slow =  <70000>;
 			opp-microamp-fast =  <71000>;
 		};
 
 		opp@1200000000 {
 			opp-hz = /bits/ 64 <1200000000>;
-			opp-microvolt-slow = <900000 915000 925000>, /* Supply vcc0 */
-					      <910000 925000 935000>; /* Supply vcc1 */
-			opp-microvolt-fast = <970000 975000 985000>, /* Supply vcc0 */
-					     <960000 965000 975000>; /* Supply vcc1 */
+			opp-microvolt-slow = <915000 900000 925000>, /* Supply vcc0 */
+					      <925000 910000 935000>; /* Supply vcc1 */
+			opp-microvolt-fast = <975000 970000 985000>, /* Supply vcc0 */
+					     <965000 960000 975000>; /* Supply vcc1 */
 			opp-microamp =  <70000>; /* Will be used for both slow/fast */
 		};
 	};

From 61cfac6f267dabcf2740a7ec8a0295833b28b5f5 Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <pshilov@microsoft.com>
Date: Tue, 28 Feb 2017 16:05:19 -0800
Subject: [PATCH 0469/1911] CIFS: Fix possible use after free in demultiplex
 thread

The recent changes that added SMB3 encryption support introduced
a possible use after free in the demultiplex thread. When we
process an encrypted packed we obtain a pointer to SMB session
but do not obtain a reference. This can possibly lead to a situation
when this session was freed before we copy a decryption key from
there. Fix this by obtaining a copy of the key rather than a pointer
to the session under a spinlock.

Signed-off-by: Pavel Shilovsky <pshilov@microsoft.com>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/smb2ops.c | 32 ++++++++++++++++++++++++++------
 1 file changed, 26 insertions(+), 6 deletions(-)

diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index a44b4dbe4aaec9..d2cdd9c19d1767 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -1609,6 +1609,26 @@ static void cifs_crypt_complete(struct crypto_async_request *req, int err)
 	complete(&res->completion);
 }
 
+static int
+smb2_get_enc_key(struct TCP_Server_Info *server, __u64 ses_id, int enc, u8 *key)
+{
+	struct cifs_ses *ses;
+	u8 *ses_enc_key;
+
+	spin_lock(&cifs_tcp_ses_lock);
+	list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
+		if (ses->Suid != ses_id)
+			continue;
+		ses_enc_key = enc ? ses->smb3encryptionkey :
+							ses->smb3decryptionkey;
+		memcpy(key, ses_enc_key, SMB3_SIGN_KEY_SIZE);
+		spin_unlock(&cifs_tcp_ses_lock);
+		return 0;
+	}
+	spin_unlock(&cifs_tcp_ses_lock);
+
+	return 1;
+}
 /*
  * Encrypt or decrypt @rqst message. @rqst has the following format:
  * iov[0] - transform header (associate data),
@@ -1622,10 +1642,10 @@ crypt_message(struct TCP_Server_Info *server, struct smb_rqst *rqst, int enc)
 	struct smb2_transform_hdr *tr_hdr =
 			(struct smb2_transform_hdr *)rqst->rq_iov[0].iov_base;
 	unsigned int assoc_data_len = sizeof(struct smb2_transform_hdr) - 24;
-	struct cifs_ses *ses;
 	int rc = 0;
 	struct scatterlist *sg;
 	u8 sign[SMB2_SIGNATURE_SIZE] = {};
+	u8 key[SMB3_SIGN_KEY_SIZE];
 	struct aead_request *req;
 	char *iv;
 	unsigned int iv_len;
@@ -1635,9 +1655,10 @@ crypt_message(struct TCP_Server_Info *server, struct smb_rqst *rqst, int enc)
 
 	init_completion(&result.completion);
 
-	ses = smb2_find_smb_ses(server, tr_hdr->SessionId);
-	if (!ses) {
-		cifs_dbg(VFS, "%s: Could not find session\n", __func__);
+	rc = smb2_get_enc_key(server, tr_hdr->SessionId, enc, key);
+	if (rc) {
+		cifs_dbg(VFS, "%s: Could not get %scryption key\n", __func__,
+			 enc ? "en" : "de");
 		return 0;
 	}
 
@@ -1649,8 +1670,7 @@ crypt_message(struct TCP_Server_Info *server, struct smb_rqst *rqst, int enc)
 
 	tfm = enc ? server->secmech.ccmaesencrypt :
 						server->secmech.ccmaesdecrypt;
-	rc = crypto_aead_setkey(tfm, enc ? ses->smb3encryptionkey :
-				ses->smb3decryptionkey, SMB3_SIGN_KEY_SIZE);
+	rc = crypto_aead_setkey(tfm, key, SMB3_SIGN_KEY_SIZE);
 	if (rc) {
 		cifs_dbg(VFS, "%s: Failed to set aead key %d\n", __func__, rc);
 		return rc;

From df2c43343b47a7138431f431118eb5819e205365 Mon Sep 17 00:00:00 2001
From: Yotam Gigi <yotamg@mellanox.com>
Date: Wed, 1 Mar 2017 16:50:45 +0200
Subject: [PATCH 0470/1911] bridge: Fix error path in nbp_vlan_init

Fix error path order in nbp_vlan_init, so if switchdev_port_attr_set
call failes, the vlan_hash wouldn't be destroyed before inited.

Fixes: efa5356b0d97 ("bridge: per vlan dst_metadata netlink support")
CC: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: Yotam Gigi <yotamg@mellanox.com>
Acked-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_vlan.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 62e68c0dc68740..b838213c408e24 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -997,10 +997,10 @@ int nbp_vlan_init(struct net_bridge_port *p)
 	RCU_INIT_POINTER(p->vlgrp, NULL);
 	synchronize_rcu();
 	vlan_tunnel_deinit(vg);
-err_vlan_enabled:
 err_tunnel_init:
 	rhashtable_destroy(&vg->vlan_hash);
 err_rhtbl:
+err_vlan_enabled:
 	kfree(vg);
 
 	goto out;

From eba38a968258b5ad9d70722ab8c584e1753f4b16 Mon Sep 17 00:00:00 2001
From: Gary Lin <glin@suse.com>
Date: Wed, 1 Mar 2017 16:25:51 +0800
Subject: [PATCH 0471/1911] bpf: update the comment about the length of
 analysis

Commit 07016151a446 ("bpf, verifier: further improve search
pruning") increased the limit of processed instructions from
32k to 64k, but the comment still mentioned the 32k limit.
This commit updates the comment to reflect the change.

Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Gary Lin <glin@suse.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/bpf/verifier.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 3fc6e39b223e2c..796b68d001198a 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -33,7 +33,7 @@
  * - out of bounds or malformed jumps
  * The second pass is all possible path descent from the 1st insn.
  * Since it's analyzing all pathes through the program, the length of the
- * analysis is limited to 32k insn, which may be hit even if total number of
+ * analysis is limited to 64k insn, which may be hit even if total number of
  * insn is less then 4K, but there are too many branches that change stack/regs.
  * Number of 'branches to be analyzed' is limited to 1k
  *

From 449809a66c1d0b1563dee84493e14bf3104d2d7e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 1 Mar 2017 08:39:49 -0800
Subject: [PATCH 0472/1911] tcp/dccp: block BH for SYN processing

SYN processing really was meant to be handled from BH.

When I got rid of BH blocking while processing socket backlog
in commit 5413d1babe8f ("net: do not block BH while processing socket
backlog"), I forgot that a malicious user could transition to TCP_LISTEN
from a state that allowed (SYN) packets to be parked in the socket
backlog while socket is owned by the thread doing the listen() call.

Sure enough syzkaller found this and reported the bug ;)

=================================
[ INFO: inconsistent lock state ]
4.10.0+ #60 Not tainted
---------------------------------
inconsistent {IN-SOFTIRQ-W} -> {SOFTIRQ-ON-W} usage.
syz-executor0/5090 [HC0[0]:SC0[0]:HE1:SE1] takes:
 (&(&hashinfo->ehash_locks[i])->rlock){+.?...}, at:
[<ffffffff83a6a370>] spin_lock include/linux/spinlock.h:299 [inline]
 (&(&hashinfo->ehash_locks[i])->rlock){+.?...}, at:
[<ffffffff83a6a370>] inet_ehash_insert+0x240/0xad0
net/ipv4/inet_hashtables.c:407
{IN-SOFTIRQ-W} state was registered at:
  mark_irqflags kernel/locking/lockdep.c:2923 [inline]
  __lock_acquire+0xbcf/0x3270 kernel/locking/lockdep.c:3295
  lock_acquire+0x241/0x580 kernel/locking/lockdep.c:3753
  __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline]
  _raw_spin_lock+0x33/0x50 kernel/locking/spinlock.c:151
  spin_lock include/linux/spinlock.h:299 [inline]
  inet_ehash_insert+0x240/0xad0 net/ipv4/inet_hashtables.c:407
  reqsk_queue_hash_req net/ipv4/inet_connection_sock.c:753 [inline]
  inet_csk_reqsk_queue_hash_add+0x1b7/0x2a0 net/ipv4/inet_connection_sock.c:764
  tcp_conn_request+0x25cc/0x3310 net/ipv4/tcp_input.c:6399
  tcp_v4_conn_request+0x157/0x220 net/ipv4/tcp_ipv4.c:1262
  tcp_rcv_state_process+0x802/0x4130 net/ipv4/tcp_input.c:5889
  tcp_v4_do_rcv+0x56b/0x940 net/ipv4/tcp_ipv4.c:1433
  tcp_v4_rcv+0x2e12/0x3210 net/ipv4/tcp_ipv4.c:1711
  ip_local_deliver_finish+0x4ce/0xc40 net/ipv4/ip_input.c:216
  NF_HOOK include/linux/netfilter.h:257 [inline]
  ip_local_deliver+0x1ce/0x710 net/ipv4/ip_input.c:257
  dst_input include/net/dst.h:492 [inline]
  ip_rcv_finish+0xb1d/0x2110 net/ipv4/ip_input.c:396
  NF_HOOK include/linux/netfilter.h:257 [inline]
  ip_rcv+0xd90/0x19c0 net/ipv4/ip_input.c:487
  __netif_receive_skb_core+0x1ad1/0x3400 net/core/dev.c:4179
  __netif_receive_skb+0x2a/0x170 net/core/dev.c:4217
  netif_receive_skb_internal+0x1d6/0x430 net/core/dev.c:4245
  napi_skb_finish net/core/dev.c:4602 [inline]
  napi_gro_receive+0x4e6/0x680 net/core/dev.c:4636
  e1000_receive_skb drivers/net/ethernet/intel/e1000/e1000_main.c:4033 [inline]
  e1000_clean_rx_irq+0x5e0/0x1490
drivers/net/ethernet/intel/e1000/e1000_main.c:4489
  e1000_clean+0xb9a/0x2910 drivers/net/ethernet/intel/e1000/e1000_main.c:3834
  napi_poll net/core/dev.c:5171 [inline]
  net_rx_action+0xe70/0x1900 net/core/dev.c:5236
  __do_softirq+0x2fb/0xb7d kernel/softirq.c:284
  invoke_softirq kernel/softirq.c:364 [inline]
  irq_exit+0x19e/0x1d0 kernel/softirq.c:405
  exiting_irq arch/x86/include/asm/apic.h:658 [inline]
  do_IRQ+0x81/0x1a0 arch/x86/kernel/irq.c:250
  ret_from_intr+0x0/0x20
  native_safe_halt+0x6/0x10 arch/x86/include/asm/irqflags.h:53
  arch_safe_halt arch/x86/include/asm/paravirt.h:98 [inline]
  default_idle+0x8f/0x410 arch/x86/kernel/process.c:271
  arch_cpu_idle+0xa/0x10 arch/x86/kernel/process.c:262
  default_idle_call+0x36/0x60 kernel/sched/idle.c:96
  cpuidle_idle_call kernel/sched/idle.c:154 [inline]
  do_idle+0x348/0x440 kernel/sched/idle.c:243
  cpu_startup_entry+0x18/0x20 kernel/sched/idle.c:345
  start_secondary+0x344/0x440 arch/x86/kernel/smpboot.c:272
  verify_cpu+0x0/0xfc
irq event stamp: 1741
hardirqs last  enabled at (1741): [<ffffffff84d49d77>]
__raw_spin_unlock_irqrestore include/linux/spinlock_api_smp.h:160
[inline]
hardirqs last  enabled at (1741): [<ffffffff84d49d77>]
_raw_spin_unlock_irqrestore+0xf7/0x1a0 kernel/locking/spinlock.c:191
hardirqs last disabled at (1740): [<ffffffff84d4a732>]
__raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:108 [inline]
hardirqs last disabled at (1740): [<ffffffff84d4a732>]
_raw_spin_lock_irqsave+0xa2/0x110 kernel/locking/spinlock.c:159
softirqs last  enabled at (1738): [<ffffffff84d4deff>]
__do_softirq+0x7cf/0xb7d kernel/softirq.c:310
softirqs last disabled at (1571): [<ffffffff84d4b92c>]
do_softirq_own_stack+0x1c/0x30 arch/x86/entry/entry_64.S:902

other info that might help us debug this:
 Possible unsafe locking scenario:

       CPU0
       ----
  lock(&(&hashinfo->ehash_locks[i])->rlock);
  <Interrupt>
    lock(&(&hashinfo->ehash_locks[i])->rlock);

 *** DEADLOCK ***

1 lock held by syz-executor0/5090:
 #0:  (sk_lock-AF_INET6){+.+.+.}, at: [<ffffffff83406b43>] lock_sock
include/net/sock.h:1460 [inline]
 #0:  (sk_lock-AF_INET6){+.+.+.}, at: [<ffffffff83406b43>]
sock_setsockopt+0x233/0x1e40 net/core/sock.c:683

stack backtrace:
CPU: 1 PID: 5090 Comm: syz-executor0 Not tainted 4.10.0+ #60
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:15 [inline]
 dump_stack+0x292/0x398 lib/dump_stack.c:51
 print_usage_bug+0x3ef/0x450 kernel/locking/lockdep.c:2387
 valid_state kernel/locking/lockdep.c:2400 [inline]
 mark_lock_irq kernel/locking/lockdep.c:2602 [inline]
 mark_lock+0xf30/0x1410 kernel/locking/lockdep.c:3065
 mark_irqflags kernel/locking/lockdep.c:2941 [inline]
 __lock_acquire+0x6dc/0x3270 kernel/locking/lockdep.c:3295
 lock_acquire+0x241/0x580 kernel/locking/lockdep.c:3753
 __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline]
 _raw_spin_lock+0x33/0x50 kernel/locking/spinlock.c:151
 spin_lock include/linux/spinlock.h:299 [inline]
 inet_ehash_insert+0x240/0xad0 net/ipv4/inet_hashtables.c:407
 reqsk_queue_hash_req net/ipv4/inet_connection_sock.c:753 [inline]
 inet_csk_reqsk_queue_hash_add+0x1b7/0x2a0 net/ipv4/inet_connection_sock.c:764
 dccp_v6_conn_request+0xada/0x11b0 net/dccp/ipv6.c:380
 dccp_rcv_state_process+0x51e/0x1660 net/dccp/input.c:606
 dccp_v6_do_rcv+0x213/0x350 net/dccp/ipv6.c:632
 sk_backlog_rcv include/net/sock.h:896 [inline]
 __release_sock+0x127/0x3a0 net/core/sock.c:2052
 release_sock+0xa5/0x2b0 net/core/sock.c:2539
 sock_setsockopt+0x60f/0x1e40 net/core/sock.c:1016
 SYSC_setsockopt net/socket.c:1782 [inline]
 SyS_setsockopt+0x2fb/0x3a0 net/socket.c:1765
 entry_SYSCALL_64_fastpath+0x1f/0xc2
RIP: 0033:0x4458b9
RSP: 002b:00007fe8b26c2b58 EFLAGS: 00000292 ORIG_RAX: 0000000000000036
RAX: ffffffffffffffda RBX: 0000000000000006 RCX: 00000000004458b9
RDX: 000000000000001a RSI: 0000000000000001 RDI: 0000000000000006
RBP: 00000000006e2110 R08: 0000000000000010 R09: 0000000000000000
R10: 00000000208c3000 R11: 0000000000000292 R12: 0000000000708000
R13: 0000000020000000 R14: 0000000000001000 R15: 0000000000000000

Fixes: 5413d1babe8f ("net: do not block BH while processing socket backlog")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Andrey Konovalov <andreyknvl@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/input.c     | 10 ++++++++--
 net/ipv4/tcp_input.c | 10 ++++++++--
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/net/dccp/input.c b/net/dccp/input.c
index 8fedc2d497709b..4a05d78768502d 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -577,6 +577,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 	struct dccp_sock *dp = dccp_sk(sk);
 	struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
 	const int old_state = sk->sk_state;
+	bool acceptable;
 	int queued = 0;
 
 	/*
@@ -603,8 +604,13 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 	 */
 	if (sk->sk_state == DCCP_LISTEN) {
 		if (dh->dccph_type == DCCP_PKT_REQUEST) {
-			if (inet_csk(sk)->icsk_af_ops->conn_request(sk,
-								    skb) < 0)
+			/* It is possible that we process SYN packets from backlog,
+			 * so we need to make sure to disable BH right there.
+			 */
+			local_bh_disable();
+			acceptable = inet_csk(sk)->icsk_af_ops->conn_request(sk, skb) >= 0;
+			local_bh_enable();
+			if (!acceptable)
 				return 1;
 			consume_skb(skb);
 			return 0;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 2c0ff327b6dfe6..39c393cc0fd3c1 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5886,9 +5886,15 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 		if (th->syn) {
 			if (th->fin)
 				goto discard;
-			if (icsk->icsk_af_ops->conn_request(sk, skb) < 0)
-				return 1;
+			/* It is possible that we process SYN packets from backlog,
+			 * so we need to make sure to disable BH right there.
+			 */
+			local_bh_disable();
+			acceptable = icsk->icsk_af_ops->conn_request(sk, skb) >= 0;
+			local_bh_enable();
 
+			if (!acceptable)
+				return 1;
 			consume_skb(skb);
 			return 0;
 		}

From 0837e49ab3fa8d903a499984575d71efee8097ce Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 1 Mar 2017 15:11:23 +0000
Subject: [PATCH 0473/1911] KEYS: Differentiate uses of rcu_dereference_key()
 and user_key_payload()

rcu_dereference_key() and user_key_payload() are currently being used in
two different, incompatible ways:

 (1) As a wrapper to rcu_dereference() - when only the RCU read lock used
     to protect the key.

 (2) As a wrapper to rcu_dereference_protected() - when the key semaphor is
     used to protect the key and the may be being modified.

Fix this by splitting both of the key wrappers to produce:

 (1) RCU accessors for keys when caller has the key semaphore locked:

	dereference_key_locked()
	user_key_payload_locked()

 (2) RCU accessors for keys when caller holds the RCU read lock:

	dereference_key_rcu()
	user_key_payload_rcu()

This should fix following warning in the NFS idmapper

  ===============================
  [ INFO: suspicious RCU usage. ]
  4.10.0 #1 Tainted: G        W
  -------------------------------
  ./include/keys/user-type.h:53 suspicious rcu_dereference_protected() usage!
  other info that might help us debug this:
  rcu_scheduler_active = 2, debug_locks = 0
  1 lock held by mount.nfs/5987:
    #0:  (rcu_read_lock){......}, at: [<d000000002527abc>] nfs_idmap_get_key+0x15c/0x420 [nfsv4]
  stack backtrace:
  CPU: 1 PID: 5987 Comm: mount.nfs Tainted: G        W       4.10.0 #1
  Call Trace:
    dump_stack+0xe8/0x154 (unreliable)
    lockdep_rcu_suspicious+0x140/0x190
    nfs_idmap_get_key+0x380/0x420 [nfsv4]
    nfs_map_name_to_uid+0x2a0/0x3b0 [nfsv4]
    decode_getfattr_attrs+0xfac/0x16b0 [nfsv4]
    decode_getfattr_generic.constprop.106+0xbc/0x150 [nfsv4]
    nfs4_xdr_dec_lookup_root+0xac/0xb0 [nfsv4]
    rpcauth_unwrap_resp+0xe8/0x140 [sunrpc]
    call_decode+0x29c/0x910 [sunrpc]
    __rpc_execute+0x140/0x8f0 [sunrpc]
    rpc_run_task+0x170/0x200 [sunrpc]
    nfs4_call_sync_sequence+0x68/0xa0 [nfsv4]
    _nfs4_lookup_root.isra.44+0xd0/0xf0 [nfsv4]
    nfs4_lookup_root+0xe0/0x350 [nfsv4]
    nfs4_lookup_root_sec+0x70/0xa0 [nfsv4]
    nfs4_find_root_sec+0xc4/0x100 [nfsv4]
    nfs4_proc_get_rootfh+0x5c/0xf0 [nfsv4]
    nfs4_get_rootfh+0x6c/0x190 [nfsv4]
    nfs4_server_common_setup+0xc4/0x260 [nfsv4]
    nfs4_create_server+0x278/0x3c0 [nfsv4]
    nfs4_remote_mount+0x50/0xb0 [nfsv4]
    mount_fs+0x74/0x210
    vfs_kern_mount+0x78/0x220
    nfs_do_root_mount+0xb0/0x140 [nfsv4]
    nfs4_try_mount+0x60/0x100 [nfsv4]
    nfs_fs_mount+0x5ec/0xda0 [nfs]
    mount_fs+0x74/0x210
    vfs_kern_mount+0x78/0x220
    do_mount+0x254/0xf70
    SyS_mount+0x94/0x100
    system_call+0x38/0xe0

Reported-by: Jan Stancek <jstancek@redhat.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Jan Stancek <jstancek@redhat.com>
Signed-off-by: James Morris <james.l.morris@oracle.com>
---
 Documentation/security/keys.txt          | 17 +++++++++++++++--
 drivers/md/dm-crypt.c                    |  2 +-
 fs/cifs/connect.c                        |  2 +-
 fs/crypto/keyinfo.c                      |  2 +-
 fs/ecryptfs/ecryptfs_kernel.h            |  2 +-
 fs/fscache/object-list.c                 |  2 +-
 fs/nfs/nfs4idmap.c                       |  2 +-
 include/keys/user-type.h                 |  9 +++++++--
 include/linux/key.h                      |  5 ++++-
 lib/digsig.c                             |  2 +-
 net/dns_resolver/dns_query.c             |  4 ++--
 security/keys/dh.c                       |  2 +-
 security/keys/encrypted-keys/encrypted.c |  4 ++--
 security/keys/trusted.c                  |  4 ++--
 security/keys/user_defined.c             |  6 +++---
 15 files changed, 43 insertions(+), 22 deletions(-)

diff --git a/Documentation/security/keys.txt b/Documentation/security/keys.txt
index 3849814bfe6dd2..0e03baf271bdb0 100644
--- a/Documentation/security/keys.txt
+++ b/Documentation/security/keys.txt
@@ -1151,8 +1151,21 @@ access the data:
      usage.  This is called key->payload.rcu_data0.  The following accessors
      wrap the RCU calls to this element:
 
-	rcu_assign_keypointer(struct key *key, void *data);
-	void *rcu_dereference_key(struct key *key);
+     (a) Set or change the first payload pointer:
+
+		rcu_assign_keypointer(struct key *key, void *data);
+
+     (b) Read the first payload pointer with the key semaphore held:
+
+		[const] void *dereference_key_locked([const] struct key *key);
+
+	 Note that the return value will inherit its constness from the key
+	 parameter.  Static analysis will give an error if it things the lock
+	 isn't held.
+
+     (c) Read the first payload pointer with the RCU read lock held:
+
+		const void *dereference_key_rcu(const struct key *key);
 
 
 ===================
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 1cb2ca9dfae36d..389a3637ffcc63 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1536,7 +1536,7 @@ static int crypt_set_keyring_key(struct crypt_config *cc, const char *key_string
 
 	down_read(&key->sem);
 
-	ukp = user_key_payload(key);
+	ukp = user_key_payload_locked(key);
 	if (!ukp) {
 		up_read(&key->sem);
 		key_put(key);
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 777ad9f4fc3c84..8a3ecef30d3ce5 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -2455,7 +2455,7 @@ cifs_set_cifscreds(struct smb_vol *vol, struct cifs_ses *ses)
 	}
 
 	down_read(&key->sem);
-	upayload = user_key_payload(key);
+	upayload = user_key_payload_locked(key);
 	if (IS_ERR_OR_NULL(upayload)) {
 		rc = upayload ? PTR_ERR(upayload) : -EINVAL;
 		goto out_key_put;
diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c
index 02eb6b9e44387d..d5d896fa5a7167 100644
--- a/fs/crypto/keyinfo.c
+++ b/fs/crypto/keyinfo.c
@@ -103,7 +103,7 @@ static int validate_user_key(struct fscrypt_info *crypt_info,
 		goto out;
 	}
 	down_read(&keyring_key->sem);
-	ukp = user_key_payload(keyring_key);
+	ukp = user_key_payload_locked(keyring_key);
 	if (ukp->datalen != sizeof(struct fscrypt_key)) {
 		res = -EINVAL;
 		up_read(&keyring_key->sem);
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 599a29237cfea5..95c1c8d3453922 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -117,7 +117,7 @@ ecryptfs_get_key_payload_data(struct key *key)
 
 	auth_tok = ecryptfs_get_encrypted_key_payload_data(key);
 	if (!auth_tok)
-		return (struct ecryptfs_auth_tok *)user_key_payload(key)->data;
+		return (struct ecryptfs_auth_tok *)user_key_payload_locked(key)->data;
 	else
 		return auth_tok;
 }
diff --git a/fs/fscache/object-list.c b/fs/fscache/object-list.c
index 5d5ddaa84b215f..67f940892ef810 100644
--- a/fs/fscache/object-list.c
+++ b/fs/fscache/object-list.c
@@ -329,7 +329,7 @@ static void fscache_objlist_config(struct fscache_objlist_data *data)
 	config = 0;
 	rcu_read_lock();
 
-	confkey = user_key_payload(key);
+	confkey = user_key_payload_rcu(key);
 	buf = confkey->data;
 
 	for (len = confkey->datalen - 1; len >= 0; len--) {
diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c
index c444285bb1b169..835c163f61af53 100644
--- a/fs/nfs/nfs4idmap.c
+++ b/fs/nfs/nfs4idmap.c
@@ -316,7 +316,7 @@ static ssize_t nfs_idmap_get_key(const char *name, size_t namelen,
 	if (ret < 0)
 		goto out_up;
 
-	payload = user_key_payload(rkey);
+	payload = user_key_payload_rcu(rkey);
 	if (IS_ERR_OR_NULL(payload)) {
 		ret = PTR_ERR(payload);
 		goto out_up;
diff --git a/include/keys/user-type.h b/include/keys/user-type.h
index c56fef40f53efa..e098cbe27db546 100644
--- a/include/keys/user-type.h
+++ b/include/keys/user-type.h
@@ -48,9 +48,14 @@ extern void user_describe(const struct key *user, struct seq_file *m);
 extern long user_read(const struct key *key,
 		      char __user *buffer, size_t buflen);
 
-static inline const struct user_key_payload *user_key_payload(const struct key *key)
+static inline const struct user_key_payload *user_key_payload_rcu(const struct key *key)
 {
-	return (struct user_key_payload *)rcu_dereference_key(key);
+	return (struct user_key_payload *)dereference_key_rcu(key);
+}
+
+static inline struct user_key_payload *user_key_payload_locked(const struct key *key)
+{
+	return (struct user_key_payload *)dereference_key_locked((struct key *)key);
 }
 
 #endif /* CONFIG_KEYS */
diff --git a/include/linux/key.h b/include/linux/key.h
index 722914798f3749..e45212f2777e36 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -354,7 +354,10 @@ static inline bool key_is_instantiated(const struct key *key)
 		!test_bit(KEY_FLAG_NEGATIVE, &key->flags);
 }
 
-#define rcu_dereference_key(KEY)					\
+#define dereference_key_rcu(KEY)					\
+	(rcu_dereference((KEY)->payload.rcu_data0))
+
+#define dereference_key_locked(KEY)					\
 	(rcu_dereference_protected((KEY)->payload.rcu_data0,		\
 				   rwsem_is_locked(&((struct key *)(KEY))->sem)))
 
diff --git a/lib/digsig.c b/lib/digsig.c
index 55b8b2f41a9e0a..03d7c63837aecb 100644
--- a/lib/digsig.c
+++ b/lib/digsig.c
@@ -85,7 +85,7 @@ static int digsig_verify_rsa(struct key *key,
 	struct pubkey_hdr *pkh;
 
 	down_read(&key->sem);
-	ukp = user_key_payload(key);
+	ukp = user_key_payload_locked(key);
 
 	if (ukp->datalen < sizeof(*pkh))
 		goto err1;
diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c
index ecc28cff08ab81..d502c94b1a82bb 100644
--- a/net/dns_resolver/dns_query.c
+++ b/net/dns_resolver/dns_query.c
@@ -70,7 +70,7 @@ int dns_query(const char *type, const char *name, size_t namelen,
 	      const char *options, char **_result, time64_t *_expiry)
 {
 	struct key *rkey;
-	const struct user_key_payload *upayload;
+	struct user_key_payload *upayload;
 	const struct cred *saved_cred;
 	size_t typelen, desclen;
 	char *desc, *cp;
@@ -141,7 +141,7 @@ int dns_query(const char *type, const char *name, size_t namelen,
 	if (ret)
 		goto put;
 
-	upayload = user_key_payload(rkey);
+	upayload = user_key_payload_locked(rkey);
 	len = upayload->datalen;
 
 	ret = -ENOMEM;
diff --git a/security/keys/dh.c b/security/keys/dh.c
index 531ed2ec132f4f..893af4c450382a 100644
--- a/security/keys/dh.c
+++ b/security/keys/dh.c
@@ -55,7 +55,7 @@ static ssize_t mpi_from_key(key_serial_t keyid, size_t maxlen, MPI *mpi)
 		if (status == 0) {
 			const struct user_key_payload *payload;
 
-			payload = user_key_payload(key);
+			payload = user_key_payload_locked(key);
 
 			if (maxlen == 0) {
 				*mpi = NULL;
diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c
index 4fb315cddf5b00..0010955d7876c2 100644
--- a/security/keys/encrypted-keys/encrypted.c
+++ b/security/keys/encrypted-keys/encrypted.c
@@ -314,7 +314,7 @@ static struct key *request_user_key(const char *master_desc, const u8 **master_k
 		goto error;
 
 	down_read(&ukey->sem);
-	upayload = user_key_payload(ukey);
+	upayload = user_key_payload_locked(ukey);
 	*master_key = upayload->data;
 	*master_keylen = upayload->datalen;
 error:
@@ -926,7 +926,7 @@ static long encrypted_read(const struct key *key, char __user *buffer,
 	size_t asciiblob_len;
 	int ret;
 
-	epayload = rcu_dereference_key(key);
+	epayload = dereference_key_locked(key);
 
 	/* returns the hex encoded iv, encrypted-data, and hmac as ascii */
 	asciiblob_len = epayload->datablob_len + ivsize + 1
diff --git a/security/keys/trusted.c b/security/keys/trusted.c
index 90d61751ff12f3..2ae31c5a87de9e 100644
--- a/security/keys/trusted.c
+++ b/security/keys/trusted.c
@@ -1140,12 +1140,12 @@ static int trusted_update(struct key *key, struct key_preparsed_payload *prep)
 static long trusted_read(const struct key *key, char __user *buffer,
 			 size_t buflen)
 {
-	struct trusted_key_payload *p;
+	const struct trusted_key_payload *p;
 	char *ascii_buf;
 	char *bufp;
 	int i;
 
-	p = rcu_dereference_key(key);
+	p = dereference_key_locked(key);
 	if (!p)
 		return -EINVAL;
 	if (!buffer || buflen <= 0)
diff --git a/security/keys/user_defined.c b/security/keys/user_defined.c
index e187c8909d9db1..26605134f17a8a 100644
--- a/security/keys/user_defined.c
+++ b/security/keys/user_defined.c
@@ -107,7 +107,7 @@ int user_update(struct key *key, struct key_preparsed_payload *prep)
 	/* attach the new data, displacing the old */
 	key->expiry = prep->expiry;
 	if (!test_bit(KEY_FLAG_NEGATIVE, &key->flags))
-		zap = rcu_dereference_key(key);
+		zap = dereference_key_locked(key);
 	rcu_assign_keypointer(key, prep->payload.data[0]);
 	prep->payload.data[0] = NULL;
 
@@ -123,7 +123,7 @@ EXPORT_SYMBOL_GPL(user_update);
  */
 void user_revoke(struct key *key)
 {
-	struct user_key_payload *upayload = key->payload.data[0];
+	struct user_key_payload *upayload = user_key_payload_locked(key);
 
 	/* clear the quota */
 	key_payload_reserve(key, 0);
@@ -169,7 +169,7 @@ long user_read(const struct key *key, char __user *buffer, size_t buflen)
 	const struct user_key_payload *upayload;
 	long ret;
 
-	upayload = user_key_payload(key);
+	upayload = user_key_payload_locked(key);
 	ret = upayload->datalen;
 
 	/* we can return the data as is */

From 2651225b5ebcdde60f684c4db8ec7e9e3800a74f Mon Sep 17 00:00:00 2001
From: Stephen Smalley <sds@tycho.nsa.gov>
Date: Tue, 28 Feb 2017 10:35:56 -0500
Subject: [PATCH 0474/1911] selinux: wrap cgroup seclabel support with its own
 policy capability

commit 1ea0ce40690dff38935538e8dab7b12683ded0d3 ("selinux: allow
changing labels for cgroupfs") broke the Android init program,
which looks up security contexts whenever creating directories
and attempts to assign them via setfscreatecon().
When creating subdirectories in cgroup mounts, this would previously
be ignored since cgroup did not support userspace setting of security
contexts.  However, after the commit, SELinux would attempt to honor
the requested context on cgroup directories and fail due to permission
denial.  Avoid breaking existing userspace/policy by wrapping this change
with a conditional on a new cgroup_seclabel policy capability.  This
preserves existing behavior until/unless a new policy explicitly enables
this capability.

Reported-by: John Stultz <john.stultz@linaro.org>
Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: Paul Moore <paul@paul-moore.com>
Signed-off-by: James Morris <james.l.morris@oracle.com>
---
 security/selinux/hooks.c            | 7 ++++---
 security/selinux/include/security.h | 2 ++
 security/selinux/selinuxfs.c        | 3 ++-
 security/selinux/ss/services.c      | 4 ++++
 4 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 9a8f12f8d5b7ff..0a4b4b040e0ab0 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -480,12 +480,13 @@ static int selinux_is_sblabel_mnt(struct super_block *sb)
 		sbsec->behavior == SECURITY_FS_USE_NATIVE ||
 		/* Special handling. Genfs but also in-core setxattr handler */
 		!strcmp(sb->s_type->name, "sysfs") ||
-		!strcmp(sb->s_type->name, "cgroup") ||
-		!strcmp(sb->s_type->name, "cgroup2") ||
 		!strcmp(sb->s_type->name, "pstore") ||
 		!strcmp(sb->s_type->name, "debugfs") ||
 		!strcmp(sb->s_type->name, "tracefs") ||
-		!strcmp(sb->s_type->name, "rootfs");
+		!strcmp(sb->s_type->name, "rootfs") ||
+		(selinux_policycap_cgroupseclabel &&
+		 (!strcmp(sb->s_type->name, "cgroup") ||
+		  !strcmp(sb->s_type->name, "cgroup2")));
 }
 
 static int sb_finish_set_opts(struct super_block *sb)
diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h
index beaa14b8b6cf57..f979c35e037ec4 100644
--- a/security/selinux/include/security.h
+++ b/security/selinux/include/security.h
@@ -71,6 +71,7 @@ enum {
 	POLICYDB_CAPABILITY_OPENPERM,
 	POLICYDB_CAPABILITY_EXTSOCKCLASS,
 	POLICYDB_CAPABILITY_ALWAYSNETWORK,
+	POLICYDB_CAPABILITY_CGROUPSECLABEL,
 	__POLICYDB_CAPABILITY_MAX
 };
 #define POLICYDB_CAPABILITY_MAX (__POLICYDB_CAPABILITY_MAX - 1)
@@ -79,6 +80,7 @@ extern int selinux_policycap_netpeer;
 extern int selinux_policycap_openperm;
 extern int selinux_policycap_extsockclass;
 extern int selinux_policycap_alwaysnetwork;
+extern int selinux_policycap_cgroupseclabel;
 
 /*
  * type_datum properties
diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c
index c9e8a9898ce481..cb3fd98fb05ae7 100644
--- a/security/selinux/selinuxfs.c
+++ b/security/selinux/selinuxfs.c
@@ -46,7 +46,8 @@ static char *policycap_names[] = {
 	"network_peer_controls",
 	"open_perms",
 	"extended_socket_class",
-	"always_check_network"
+	"always_check_network",
+	"cgroup_seclabel"
 };
 
 unsigned int selinux_checkreqprot = CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE;
diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index a70fcee9824ba3..b4aa491a0a23d8 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -74,6 +74,7 @@ int selinux_policycap_netpeer;
 int selinux_policycap_openperm;
 int selinux_policycap_extsockclass;
 int selinux_policycap_alwaysnetwork;
+int selinux_policycap_cgroupseclabel;
 
 static DEFINE_RWLOCK(policy_rwlock);
 
@@ -1993,6 +1994,9 @@ static void security_load_policycaps(void)
 					  POLICYDB_CAPABILITY_EXTSOCKCLASS);
 	selinux_policycap_alwaysnetwork = ebitmap_get_bit(&policydb.policycaps,
 						  POLICYDB_CAPABILITY_ALWAYSNETWORK);
+	selinux_policycap_cgroupseclabel =
+		ebitmap_get_bit(&policydb.policycaps,
+				POLICYDB_CAPABILITY_CGROUPSECLABEL);
 }
 
 static int security_preserve_bools(struct policydb *p);

From f889491380582b4ba2981cf0b0d7d6a40fb30ab7 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Tue, 28 Feb 2017 17:56:02 +0800
Subject: [PATCH 0475/1911] vhost: introduce O(1) vq metadata cache

When device IOTLB is enabled, all address translations were stored in
interval tree. O(lgN) searching time could be slow for virtqueue
metadata (avail, used and descriptors) since they were accessed much
often than other addresses. So this patch introduces an O(1) array
which points to the interval tree nodes that store the translations of
vq metadata. Those array were update during vq IOTLB prefetching and
were reset during each invalidation and tlb update. Each time we want
to access vq metadata, this small array were queried before interval
tree. This would be sufficient for static mappings but not dynamic
mappings, we could do optimizations on top.

Test were done with l2fwd in guest (2M hugepage):

   noiommu  | before        | after
tx 1.32Mpps | 1.06Mpps(82%) | 1.30Mpps(98%)
rx 2.33Mpps | 1.46Mpps(63%) | 2.29Mpps(98%)

We can almost reach the same performance as noiommu mode.

Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/vhost/vhost.c | 136 ++++++++++++++++++++++++++++++++++--------
 drivers/vhost/vhost.h |   8 +++
 2 files changed, 118 insertions(+), 26 deletions(-)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 1f7e4e4e6f8efe..998bed505530df 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -282,6 +282,22 @@ void vhost_poll_queue(struct vhost_poll *poll)
 }
 EXPORT_SYMBOL_GPL(vhost_poll_queue);
 
+static void __vhost_vq_meta_reset(struct vhost_virtqueue *vq)
+{
+	int j;
+
+	for (j = 0; j < VHOST_NUM_ADDRS; j++)
+		vq->meta_iotlb[j] = NULL;
+}
+
+static void vhost_vq_meta_reset(struct vhost_dev *d)
+{
+	int i;
+
+	for (i = 0; i < d->nvqs; ++i)
+		__vhost_vq_meta_reset(d->vqs[i]);
+}
+
 static void vhost_vq_reset(struct vhost_dev *dev,
 			   struct vhost_virtqueue *vq)
 {
@@ -312,6 +328,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
 	vq->busyloop_timeout = 0;
 	vq->umem = NULL;
 	vq->iotlb = NULL;
+	__vhost_vq_meta_reset(vq);
 }
 
 static int vhost_worker(void *data)
@@ -691,6 +708,18 @@ static int vq_memory_access_ok(void __user *log_base, struct vhost_umem *umem,
 	return 1;
 }
 
+static inline void __user *vhost_vq_meta_fetch(struct vhost_virtqueue *vq,
+					       u64 addr, unsigned int size,
+					       int type)
+{
+	const struct vhost_umem_node *node = vq->meta_iotlb[type];
+
+	if (!node)
+		return NULL;
+
+	return (void *)(uintptr_t)(node->userspace_addr + addr - node->start);
+}
+
 /* Can we switch to this memory table? */
 /* Caller should have device mutex but not vq mutex */
 static int memory_access_ok(struct vhost_dev *d, struct vhost_umem *umem,
@@ -733,8 +762,14 @@ static int vhost_copy_to_user(struct vhost_virtqueue *vq, void __user *to,
 		 * could be access through iotlb. So -EAGAIN should
 		 * not happen in this case.
 		 */
-		/* TODO: more fast path */
 		struct iov_iter t;
+		void __user *uaddr = vhost_vq_meta_fetch(vq,
+				     (u64)(uintptr_t)to, size,
+				     VHOST_ADDR_DESC);
+
+		if (uaddr)
+			return __copy_to_user(uaddr, from, size);
+
 		ret = translate_desc(vq, (u64)(uintptr_t)to, size, vq->iotlb_iov,
 				     ARRAY_SIZE(vq->iotlb_iov),
 				     VHOST_ACCESS_WO);
@@ -762,8 +797,14 @@ static int vhost_copy_from_user(struct vhost_virtqueue *vq, void *to,
 		 * could be access through iotlb. So -EAGAIN should
 		 * not happen in this case.
 		 */
-		/* TODO: more fast path */
+		void __user *uaddr = vhost_vq_meta_fetch(vq,
+				     (u64)(uintptr_t)from, size,
+				     VHOST_ADDR_DESC);
 		struct iov_iter f;
+
+		if (uaddr)
+			return __copy_from_user(to, uaddr, size);
+
 		ret = translate_desc(vq, (u64)(uintptr_t)from, size, vq->iotlb_iov,
 				     ARRAY_SIZE(vq->iotlb_iov),
 				     VHOST_ACCESS_RO);
@@ -783,17 +824,12 @@ static int vhost_copy_from_user(struct vhost_virtqueue *vq, void *to,
 	return ret;
 }
 
-static void __user *__vhost_get_user(struct vhost_virtqueue *vq,
-				     void __user *addr, unsigned size)
+static void __user *__vhost_get_user_slow(struct vhost_virtqueue *vq,
+					  void __user *addr, unsigned int size,
+					  int type)
 {
 	int ret;
 
-	/* This function should be called after iotlb
-	 * prefetch, which means we're sure that vq
-	 * could be access through iotlb. So -EAGAIN should
-	 * not happen in this case.
-	 */
-	/* TODO: more fast path */
 	ret = translate_desc(vq, (u64)(uintptr_t)addr, size, vq->iotlb_iov,
 			     ARRAY_SIZE(vq->iotlb_iov),
 			     VHOST_ACCESS_RO);
@@ -814,14 +850,32 @@ static void __user *__vhost_get_user(struct vhost_virtqueue *vq,
 	return vq->iotlb_iov[0].iov_base;
 }
 
-#define vhost_put_user(vq, x, ptr) \
+/* This function should be called after iotlb
+ * prefetch, which means we're sure that vq
+ * could be access through iotlb. So -EAGAIN should
+ * not happen in this case.
+ */
+static inline void __user *__vhost_get_user(struct vhost_virtqueue *vq,
+					    void *addr, unsigned int size,
+					    int type)
+{
+	void __user *uaddr = vhost_vq_meta_fetch(vq,
+			     (u64)(uintptr_t)addr, size, type);
+	if (uaddr)
+		return uaddr;
+
+	return __vhost_get_user_slow(vq, addr, size, type);
+}
+
+#define vhost_put_user(vq, x, ptr)		\
 ({ \
 	int ret = -EFAULT; \
 	if (!vq->iotlb) { \
 		ret = __put_user(x, ptr); \
 	} else { \
 		__typeof__(ptr) to = \
-			(__typeof__(ptr)) __vhost_get_user(vq, ptr, sizeof(*ptr)); \
+			(__typeof__(ptr)) __vhost_get_user(vq, ptr,	\
+					  sizeof(*ptr), VHOST_ADDR_USED); \
 		if (to != NULL) \
 			ret = __put_user(x, to); \
 		else \
@@ -830,14 +884,16 @@ static void __user *__vhost_get_user(struct vhost_virtqueue *vq,
 	ret; \
 })
 
-#define vhost_get_user(vq, x, ptr) \
+#define vhost_get_user(vq, x, ptr, type)		\
 ({ \
 	int ret; \
 	if (!vq->iotlb) { \
 		ret = __get_user(x, ptr); \
 	} else { \
 		__typeof__(ptr) from = \
-			(__typeof__(ptr)) __vhost_get_user(vq, ptr, sizeof(*ptr)); \
+			(__typeof__(ptr)) __vhost_get_user(vq, ptr, \
+							   sizeof(*ptr), \
+							   type); \
 		if (from != NULL) \
 			ret = __get_user(x, from); \
 		else \
@@ -846,6 +902,12 @@ static void __user *__vhost_get_user(struct vhost_virtqueue *vq,
 	ret; \
 })
 
+#define vhost_get_avail(vq, x, ptr) \
+	vhost_get_user(vq, x, ptr, VHOST_ADDR_AVAIL)
+
+#define vhost_get_used(vq, x, ptr) \
+	vhost_get_user(vq, x, ptr, VHOST_ADDR_USED)
+
 static void vhost_dev_lock_vqs(struct vhost_dev *d)
 {
 	int i = 0;
@@ -951,6 +1013,7 @@ static int vhost_process_iotlb_msg(struct vhost_dev *dev,
 			ret = -EFAULT;
 			break;
 		}
+		vhost_vq_meta_reset(dev);
 		if (vhost_new_umem_range(dev->iotlb, msg->iova, msg->size,
 					 msg->iova + msg->size - 1,
 					 msg->uaddr, msg->perm)) {
@@ -960,6 +1023,7 @@ static int vhost_process_iotlb_msg(struct vhost_dev *dev,
 		vhost_iotlb_notify_vq(dev, msg);
 		break;
 	case VHOST_IOTLB_INVALIDATE:
+		vhost_vq_meta_reset(dev);
 		vhost_del_umem_range(dev->iotlb, msg->iova,
 				     msg->iova + msg->size - 1);
 		break;
@@ -1103,12 +1167,26 @@ static int vq_access_ok(struct vhost_virtqueue *vq, unsigned int num,
 			sizeof *used + num * sizeof *used->ring + s);
 }
 
+static void vhost_vq_meta_update(struct vhost_virtqueue *vq,
+				 const struct vhost_umem_node *node,
+				 int type)
+{
+	int access = (type == VHOST_ADDR_USED) ?
+		     VHOST_ACCESS_WO : VHOST_ACCESS_RO;
+
+	if (likely(node->perm & access))
+		vq->meta_iotlb[type] = node;
+}
+
 static int iotlb_access_ok(struct vhost_virtqueue *vq,
-			   int access, u64 addr, u64 len)
+			   int access, u64 addr, u64 len, int type)
 {
 	const struct vhost_umem_node *node;
 	struct vhost_umem *umem = vq->iotlb;
-	u64 s = 0, size;
+	u64 s = 0, size, orig_addr = addr;
+
+	if (vhost_vq_meta_fetch(vq, addr, len, type))
+		return true;
 
 	while (len > s) {
 		node = vhost_umem_interval_tree_iter_first(&umem->umem_tree,
@@ -1125,6 +1203,10 @@ static int iotlb_access_ok(struct vhost_virtqueue *vq,
 		}
 
 		size = node->size - addr + node->start;
+
+		if (orig_addr == addr && size >= len)
+			vhost_vq_meta_update(vq, node, type);
+
 		s += size;
 		addr += size;
 	}
@@ -1141,13 +1223,15 @@ int vq_iotlb_prefetch(struct vhost_virtqueue *vq)
 		return 1;
 
 	return iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->desc,
-			       num * sizeof *vq->desc) &&
+			       num * sizeof(*vq->desc), VHOST_ADDR_DESC) &&
 	       iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->avail,
 			       sizeof *vq->avail +
-			       num * sizeof *vq->avail->ring + s) &&
+			       num * sizeof(*vq->avail->ring) + s,
+			       VHOST_ADDR_AVAIL) &&
 	       iotlb_access_ok(vq, VHOST_ACCESS_WO, (u64)(uintptr_t)vq->used,
 			       sizeof *vq->used +
-			       num * sizeof *vq->used->ring + s);
+			       num * sizeof(*vq->used->ring) + s,
+			       VHOST_ADDR_USED);
 }
 EXPORT_SYMBOL_GPL(vq_iotlb_prefetch);
 
@@ -1728,7 +1812,7 @@ int vhost_vq_init_access(struct vhost_virtqueue *vq)
 		r = -EFAULT;
 		goto err;
 	}
-	r = vhost_get_user(vq, last_used_idx, &vq->used->idx);
+	r = vhost_get_used(vq, last_used_idx, &vq->used->idx);
 	if (r) {
 		vq_err(vq, "Can't access used idx at %p\n",
 		       &vq->used->idx);
@@ -1932,7 +2016,7 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
 	last_avail_idx = vq->last_avail_idx;
 
 	if (vq->avail_idx == vq->last_avail_idx) {
-		if (unlikely(vhost_get_user(vq, avail_idx, &vq->avail->idx))) {
+		if (unlikely(vhost_get_avail(vq, avail_idx, &vq->avail->idx))) {
 			vq_err(vq, "Failed to access avail idx at %p\n",
 				&vq->avail->idx);
 			return -EFAULT;
@@ -1959,7 +2043,7 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
 
 	/* Grab the next descriptor number they're advertising, and increment
 	 * the index we've seen. */
-	if (unlikely(vhost_get_user(vq, ring_head,
+	if (unlikely(vhost_get_avail(vq, ring_head,
 		     &vq->avail->ring[last_avail_idx & (vq->num - 1)]))) {
 		vq_err(vq, "Failed to read head: idx %d address %p\n",
 		       last_avail_idx,
@@ -2175,7 +2259,7 @@ static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
 		 * with the barrier that the Guest executes when enabling
 		 * interrupts. */
 		smp_mb();
-		if (vhost_get_user(vq, flags, &vq->avail->flags)) {
+		if (vhost_get_avail(vq, flags, &vq->avail->flags)) {
 			vq_err(vq, "Failed to get flags");
 			return true;
 		}
@@ -2202,7 +2286,7 @@ static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
 	 * interrupts. */
 	smp_mb();
 
-	if (vhost_get_user(vq, event, vhost_used_event(vq))) {
+	if (vhost_get_avail(vq, event, vhost_used_event(vq))) {
 		vq_err(vq, "Failed to get used event idx");
 		return true;
 	}
@@ -2246,7 +2330,7 @@ bool vhost_vq_avail_empty(struct vhost_dev *dev, struct vhost_virtqueue *vq)
 	__virtio16 avail_idx;
 	int r;
 
-	r = vhost_get_user(vq, avail_idx, &vq->avail->idx);
+	r = vhost_get_avail(vq, avail_idx, &vq->avail->idx);
 	if (r)
 		return false;
 
@@ -2281,7 +2365,7 @@ bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
 	/* They could have slipped one in as we were doing that: make
 	 * sure it's written, then check again. */
 	smp_mb();
-	r = vhost_get_user(vq, avail_idx, &vq->avail->idx);
+	r = vhost_get_avail(vq, avail_idx, &vq->avail->idx);
 	if (r) {
 		vq_err(vq, "Failed to check avail idx at %p: %d\n",
 		       &vq->avail->idx, r);
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index a9cbbb148f460e..f55671d53f28fe 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -76,6 +76,13 @@ struct vhost_umem {
 	int numem;
 };
 
+enum vhost_uaddr_type {
+	VHOST_ADDR_DESC = 0,
+	VHOST_ADDR_AVAIL = 1,
+	VHOST_ADDR_USED = 2,
+	VHOST_NUM_ADDRS = 3,
+};
+
 /* The virtqueue structure describes a queue attached to a device. */
 struct vhost_virtqueue {
 	struct vhost_dev *dev;
@@ -86,6 +93,7 @@ struct vhost_virtqueue {
 	struct vring_desc __user *desc;
 	struct vring_avail __user *avail;
 	struct vring_used __user *used;
+	const struct vhost_umem_node *meta_iotlb[VHOST_NUM_ADDRS];
 	struct file *kick;
 	struct file *call;
 	struct file *error;

From c4baad50297d84bde1a7ad45e50c73adae4a2192 Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Wed, 1 Feb 2017 00:02:27 -0800
Subject: [PATCH 0476/1911] virtio-console: avoid DMA from stack

put_chars() stuffs the buffer it gets into an sg, but that buffer may be
on the stack. This breaks with CONFIG_VMAP_STACK=y (for me, it
manifested as printks getting turned into NUL bytes).

Signed-off-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Amit Shah <amit.shah@redhat.com>
---
 drivers/char/virtio_console.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index 6266c0568e1d0a..e9b7e0b3cabe60 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -1136,6 +1136,8 @@ static int put_chars(u32 vtermno, const char *buf, int count)
 {
 	struct port *port;
 	struct scatterlist sg[1];
+	void *data;
+	int ret;
 
 	if (unlikely(early_put_chars))
 		return early_put_chars(vtermno, buf, count);
@@ -1144,8 +1146,14 @@ static int put_chars(u32 vtermno, const char *buf, int count)
 	if (!port)
 		return -EPIPE;
 
-	sg_init_one(sg, buf, count);
-	return __send_to_port(port, sg, 1, count, (void *)buf, false);
+	data = kmemdup(buf, count, GFP_ATOMIC);
+	if (!data)
+		return -ENOMEM;
+
+	sg_init_one(sg, data, count);
+	ret = __send_to_port(port, sg, 1, count, data, false);
+	kfree(data);
+	return ret;
 }
 
 /*

From a4b0e8a4e92b1baa860e744847fbdb84a50a5071 Mon Sep 17 00:00:00 2001
From: "Potomski, MichalX" <michalx.potomski@intel.com>
Date: Thu, 23 Feb 2017 09:05:30 +0000
Subject: [PATCH 0477/1911] scsi: ufs: Factor out ufshcd_read_desc_param

Since in UFS 2.1 specification some of the descriptor lengths differs
from 2.0 specification and some devices, which are reporting spec
version 2.0 have different descriptor lengths we can not rely on
hardcoded values taken from 2.0 specification. This patch introduces
reading these lengths per each device from descriptor headers at probe
time to ensure their correctness.

Signed-off-by: Michal' Potomski <michalx.potomski@intel.com>
Reviewed-by: Subhash Jadavani <subhashj@codeaurora.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/ufs/ufs.h    |  22 ++--
 drivers/scsi/ufs/ufshcd.c | 231 ++++++++++++++++++++++++++++----------
 drivers/scsi/ufs/ufshcd.h |  15 +++
 3 files changed, 196 insertions(+), 72 deletions(-)

diff --git a/drivers/scsi/ufs/ufs.h b/drivers/scsi/ufs/ufs.h
index 318e4a1f76c92b..54deeb754db5fc 100644
--- a/drivers/scsi/ufs/ufs.h
+++ b/drivers/scsi/ufs/ufs.h
@@ -146,7 +146,7 @@ enum attr_idn {
 /* Descriptor idn for Query requests */
 enum desc_idn {
 	QUERY_DESC_IDN_DEVICE		= 0x0,
-	QUERY_DESC_IDN_CONFIGURAION	= 0x1,
+	QUERY_DESC_IDN_CONFIGURATION	= 0x1,
 	QUERY_DESC_IDN_UNIT		= 0x2,
 	QUERY_DESC_IDN_RFU_0		= 0x3,
 	QUERY_DESC_IDN_INTERCONNECT	= 0x4,
@@ -162,19 +162,13 @@ enum desc_header_offset {
 	QUERY_DESC_DESC_TYPE_OFFSET	= 0x01,
 };
 
-enum ufs_desc_max_size {
-	QUERY_DESC_DEVICE_MAX_SIZE		= 0x40,
-	QUERY_DESC_CONFIGURAION_MAX_SIZE	= 0x90,
-	QUERY_DESC_UNIT_MAX_SIZE		= 0x23,
-	QUERY_DESC_INTERCONNECT_MAX_SIZE	= 0x06,
-	/*
-	 * Max. 126 UNICODE characters (2 bytes per character) plus 2 bytes
-	 * of descriptor header.
-	 */
-	QUERY_DESC_STRING_MAX_SIZE		= 0xFE,
-	QUERY_DESC_GEOMETRY_MAX_SIZE		= 0x44,
-	QUERY_DESC_POWER_MAX_SIZE		= 0x62,
-	QUERY_DESC_RFU_MAX_SIZE			= 0x00,
+enum ufs_desc_def_size {
+	QUERY_DESC_DEVICE_DEF_SIZE		= 0x40,
+	QUERY_DESC_CONFIGURATION_DEF_SIZE	= 0x90,
+	QUERY_DESC_UNIT_DEF_SIZE		= 0x23,
+	QUERY_DESC_INTERCONNECT_DEF_SIZE	= 0x06,
+	QUERY_DESC_GEOMETRY_DEF_SIZE		= 0x44,
+	QUERY_DESC_POWER_DEF_SIZE		= 0x62,
 };
 
 /* Unit descriptor parameters offsets in bytes*/
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index dc6efbd1be8ef3..1359913bf840ce 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -100,19 +100,6 @@
 #define ufshcd_hex_dump(prefix_str, buf, len) \
 print_hex_dump(KERN_ERR, prefix_str, DUMP_PREFIX_OFFSET, 16, 4, buf, len, false)
 
-static u32 ufs_query_desc_max_size[] = {
-	QUERY_DESC_DEVICE_MAX_SIZE,
-	QUERY_DESC_CONFIGURAION_MAX_SIZE,
-	QUERY_DESC_UNIT_MAX_SIZE,
-	QUERY_DESC_RFU_MAX_SIZE,
-	QUERY_DESC_INTERCONNECT_MAX_SIZE,
-	QUERY_DESC_STRING_MAX_SIZE,
-	QUERY_DESC_RFU_MAX_SIZE,
-	QUERY_DESC_GEOMETRY_MAX_SIZE,
-	QUERY_DESC_POWER_MAX_SIZE,
-	QUERY_DESC_RFU_MAX_SIZE,
-};
-
 enum {
 	UFSHCD_MAX_CHANNEL	= 0,
 	UFSHCD_MAX_ID		= 1,
@@ -2857,7 +2844,7 @@ static int __ufshcd_query_descriptor(struct ufs_hba *hba,
 		goto out;
 	}
 
-	if (*buf_len <= QUERY_DESC_MIN_SIZE || *buf_len > QUERY_DESC_MAX_SIZE) {
+	if (*buf_len < QUERY_DESC_MIN_SIZE || *buf_len > QUERY_DESC_MAX_SIZE) {
 		dev_err(hba->dev, "%s: descriptor buffer size (%d) is out of range\n",
 				__func__, *buf_len);
 		err = -EINVAL;
@@ -2937,6 +2924,92 @@ static int ufshcd_query_descriptor_retry(struct ufs_hba *hba,
 	return err;
 }
 
+/**
+ * ufshcd_read_desc_length - read the specified descriptor length from header
+ * @hba: Pointer to adapter instance
+ * @desc_id: descriptor idn value
+ * @desc_index: descriptor index
+ * @desc_length: pointer to variable to read the length of descriptor
+ *
+ * Return 0 in case of success, non-zero otherwise
+ */
+static int ufshcd_read_desc_length(struct ufs_hba *hba,
+	enum desc_idn desc_id,
+	int desc_index,
+	int *desc_length)
+{
+	int ret;
+	u8 header[QUERY_DESC_HDR_SIZE];
+	int header_len = QUERY_DESC_HDR_SIZE;
+
+	if (desc_id >= QUERY_DESC_IDN_MAX)
+		return -EINVAL;
+
+	ret = ufshcd_query_descriptor_retry(hba, UPIU_QUERY_OPCODE_READ_DESC,
+					desc_id, desc_index, 0, header,
+					&header_len);
+
+	if (ret) {
+		dev_err(hba->dev, "%s: Failed to get descriptor header id %d",
+			__func__, desc_id);
+		return ret;
+	} else if (desc_id != header[QUERY_DESC_DESC_TYPE_OFFSET]) {
+		dev_warn(hba->dev, "%s: descriptor header id %d and desc_id %d mismatch",
+			__func__, header[QUERY_DESC_DESC_TYPE_OFFSET],
+			desc_id);
+		ret = -EINVAL;
+	}
+
+	*desc_length = header[QUERY_DESC_LENGTH_OFFSET];
+	return ret;
+
+}
+
+/**
+ * ufshcd_map_desc_id_to_length - map descriptor IDN to its length
+ * @hba: Pointer to adapter instance
+ * @desc_id: descriptor idn value
+ * @desc_len: mapped desc length (out)
+ *
+ * Return 0 in case of success, non-zero otherwise
+ */
+int ufshcd_map_desc_id_to_length(struct ufs_hba *hba,
+	enum desc_idn desc_id, int *desc_len)
+{
+	switch (desc_id) {
+	case QUERY_DESC_IDN_DEVICE:
+		*desc_len = hba->desc_size.dev_desc;
+		break;
+	case QUERY_DESC_IDN_POWER:
+		*desc_len = hba->desc_size.pwr_desc;
+		break;
+	case QUERY_DESC_IDN_GEOMETRY:
+		*desc_len = hba->desc_size.geom_desc;
+		break;
+	case QUERY_DESC_IDN_CONFIGURATION:
+		*desc_len = hba->desc_size.conf_desc;
+		break;
+	case QUERY_DESC_IDN_UNIT:
+		*desc_len = hba->desc_size.unit_desc;
+		break;
+	case QUERY_DESC_IDN_INTERCONNECT:
+		*desc_len = hba->desc_size.interc_desc;
+		break;
+	case QUERY_DESC_IDN_STRING:
+		*desc_len = QUERY_DESC_MAX_SIZE;
+		break;
+	case QUERY_DESC_IDN_RFU_0:
+	case QUERY_DESC_IDN_RFU_1:
+		*desc_len = 0;
+		break;
+	default:
+		*desc_len = 0;
+		return -EINVAL;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(ufshcd_map_desc_id_to_length);
+
 /**
  * ufshcd_read_desc_param - read the specified descriptor parameter
  * @hba: Pointer to adapter instance
@@ -2951,42 +3024,49 @@ static int ufshcd_query_descriptor_retry(struct ufs_hba *hba,
 static int ufshcd_read_desc_param(struct ufs_hba *hba,
 				  enum desc_idn desc_id,
 				  int desc_index,
-				  u32 param_offset,
+				  u8 param_offset,
 				  u8 *param_read_buf,
-				  u32 param_size)
+				  u8 param_size)
 {
 	int ret;
 	u8 *desc_buf;
-	u32 buff_len;
+	int buff_len;
 	bool is_kmalloc = true;
 
-	/* safety checks */
-	if (desc_id >= QUERY_DESC_IDN_MAX)
+	/* Safety check */
+	if (desc_id >= QUERY_DESC_IDN_MAX || !param_size)
 		return -EINVAL;
 
-	buff_len = ufs_query_desc_max_size[desc_id];
-	if ((param_offset + param_size) > buff_len)
-		return -EINVAL;
+	/* Get the max length of descriptor from structure filled up at probe
+	 * time.
+	 */
+	ret = ufshcd_map_desc_id_to_length(hba, desc_id, &buff_len);
 
-	if (!param_offset && (param_size == buff_len)) {
-		/* memory space already available to hold full descriptor */
-		desc_buf = param_read_buf;
-		is_kmalloc = false;
-	} else {
-		/* allocate memory to hold full descriptor */
+	/* Sanity checks */
+	if (ret || !buff_len) {
+		dev_err(hba->dev, "%s: Failed to get full descriptor length",
+			__func__);
+		return ret;
+	}
+
+	/* Check whether we need temp memory */
+	if (param_offset != 0 || param_size < buff_len) {
 		desc_buf = kmalloc(buff_len, GFP_KERNEL);
 		if (!desc_buf)
 			return -ENOMEM;
+	} else {
+		desc_buf = param_read_buf;
+		is_kmalloc = false;
 	}
 
+	/* Request for full descriptor */
 	ret = ufshcd_query_descriptor_retry(hba, UPIU_QUERY_OPCODE_READ_DESC,
-					desc_id, desc_index, 0, desc_buf,
-					&buff_len);
+					desc_id, desc_index, 0,
+					desc_buf, &buff_len);
 
 	if (ret) {
 		dev_err(hba->dev, "%s: Failed reading descriptor. desc_id %d, desc_index %d, param_offset %d, ret %d",
 			__func__, desc_id, desc_index, param_offset, ret);
-
 		goto out;
 	}
 
@@ -2998,25 +3078,9 @@ static int ufshcd_read_desc_param(struct ufs_hba *hba,
 		goto out;
 	}
 
-	/*
-	 * While reading variable size descriptors (like string descriptor),
-	 * some UFS devices may report the "LENGTH" (field in "Transaction
-	 * Specific fields" of Query Response UPIU) same as what was requested
-	 * in Query Request UPIU instead of reporting the actual size of the
-	 * variable size descriptor.
-	 * Although it's safe to ignore the "LENGTH" field for variable size
-	 * descriptors as we can always derive the length of the descriptor from
-	 * the descriptor header fields. Hence this change impose the length
-	 * match check only for fixed size descriptors (for which we always
-	 * request the correct size as part of Query Request UPIU).
-	 */
-	if ((desc_id != QUERY_DESC_IDN_STRING) &&
-	    (buff_len != desc_buf[QUERY_DESC_LENGTH_OFFSET])) {
-		dev_err(hba->dev, "%s: desc_buf length mismatch: buff_len %d, buff_len(desc_header) %d",
-			__func__, buff_len, desc_buf[QUERY_DESC_LENGTH_OFFSET]);
-		ret = -EINVAL;
-		goto out;
-	}
+	/* Check wherher we will not copy more data, than available */
+	if (is_kmalloc && param_size > buff_len)
+		param_size = buff_len;
 
 	if (is_kmalloc)
 		memcpy(param_read_buf, &desc_buf[param_offset], param_size);
@@ -5919,8 +5983,8 @@ static int ufshcd_set_icc_levels_attr(struct ufs_hba *hba, u32 icc_level)
 static void ufshcd_init_icc_levels(struct ufs_hba *hba)
 {
 	int ret;
-	int buff_len = QUERY_DESC_POWER_MAX_SIZE;
-	u8 desc_buf[QUERY_DESC_POWER_MAX_SIZE];
+	int buff_len = hba->desc_size.pwr_desc;
+	u8 desc_buf[hba->desc_size.pwr_desc];
 
 	ret = ufshcd_read_power_desc(hba, desc_buf, buff_len);
 	if (ret) {
@@ -6017,11 +6081,10 @@ static int ufs_get_device_desc(struct ufs_hba *hba,
 {
 	int err;
 	u8 model_index;
-	u8 str_desc_buf[QUERY_DESC_STRING_MAX_SIZE + 1] = {0};
-	u8 desc_buf[QUERY_DESC_DEVICE_MAX_SIZE];
+	u8 str_desc_buf[QUERY_DESC_MAX_SIZE + 1] = {0};
+	u8 desc_buf[hba->desc_size.dev_desc];
 
-	err = ufshcd_read_device_desc(hba, desc_buf,
-					QUERY_DESC_DEVICE_MAX_SIZE);
+	err = ufshcd_read_device_desc(hba, desc_buf, hba->desc_size.dev_desc);
 	if (err) {
 		dev_err(hba->dev, "%s: Failed reading Device Desc. err = %d\n",
 			__func__, err);
@@ -6038,14 +6101,14 @@ static int ufs_get_device_desc(struct ufs_hba *hba,
 	model_index = desc_buf[DEVICE_DESC_PARAM_PRDCT_NAME];
 
 	err = ufshcd_read_string_desc(hba, model_index, str_desc_buf,
-					QUERY_DESC_STRING_MAX_SIZE, ASCII_STD);
+				QUERY_DESC_MAX_SIZE, ASCII_STD);
 	if (err) {
 		dev_err(hba->dev, "%s: Failed reading Product Name. err = %d\n",
 			__func__, err);
 		goto out;
 	}
 
-	str_desc_buf[QUERY_DESC_STRING_MAX_SIZE] = '\0';
+	str_desc_buf[QUERY_DESC_MAX_SIZE] = '\0';
 	strlcpy(dev_desc->model, (str_desc_buf + QUERY_DESC_HDR_SIZE),
 		min_t(u8, str_desc_buf[QUERY_DESC_LENGTH_OFFSET],
 		      MAX_MODEL_LEN));
@@ -6251,6 +6314,51 @@ static void ufshcd_clear_dbg_ufs_stats(struct ufs_hba *hba)
 	hba->req_abort_count = 0;
 }
 
+static void ufshcd_init_desc_sizes(struct ufs_hba *hba)
+{
+	int err;
+
+	err = ufshcd_read_desc_length(hba, QUERY_DESC_IDN_DEVICE, 0,
+		&hba->desc_size.dev_desc);
+	if (err)
+		hba->desc_size.dev_desc = QUERY_DESC_DEVICE_DEF_SIZE;
+
+	err = ufshcd_read_desc_length(hba, QUERY_DESC_IDN_POWER, 0,
+		&hba->desc_size.pwr_desc);
+	if (err)
+		hba->desc_size.pwr_desc = QUERY_DESC_POWER_DEF_SIZE;
+
+	err = ufshcd_read_desc_length(hba, QUERY_DESC_IDN_INTERCONNECT, 0,
+		&hba->desc_size.interc_desc);
+	if (err)
+		hba->desc_size.interc_desc = QUERY_DESC_INTERCONNECT_DEF_SIZE;
+
+	err = ufshcd_read_desc_length(hba, QUERY_DESC_IDN_CONFIGURATION, 0,
+		&hba->desc_size.conf_desc);
+	if (err)
+		hba->desc_size.conf_desc = QUERY_DESC_CONFIGURATION_DEF_SIZE;
+
+	err = ufshcd_read_desc_length(hba, QUERY_DESC_IDN_UNIT, 0,
+		&hba->desc_size.unit_desc);
+	if (err)
+		hba->desc_size.unit_desc = QUERY_DESC_UNIT_DEF_SIZE;
+
+	err = ufshcd_read_desc_length(hba, QUERY_DESC_IDN_GEOMETRY, 0,
+		&hba->desc_size.geom_desc);
+	if (err)
+		hba->desc_size.geom_desc = QUERY_DESC_GEOMETRY_DEF_SIZE;
+}
+
+static void ufshcd_def_desc_sizes(struct ufs_hba *hba)
+{
+	hba->desc_size.dev_desc = QUERY_DESC_DEVICE_DEF_SIZE;
+	hba->desc_size.pwr_desc = QUERY_DESC_POWER_DEF_SIZE;
+	hba->desc_size.interc_desc = QUERY_DESC_INTERCONNECT_DEF_SIZE;
+	hba->desc_size.conf_desc = QUERY_DESC_CONFIGURATION_DEF_SIZE;
+	hba->desc_size.unit_desc = QUERY_DESC_UNIT_DEF_SIZE;
+	hba->desc_size.geom_desc = QUERY_DESC_GEOMETRY_DEF_SIZE;
+}
+
 /**
  * ufshcd_probe_hba - probe hba to detect device and initialize
  * @hba: per-adapter instance
@@ -6285,6 +6393,9 @@ static int ufshcd_probe_hba(struct ufs_hba *hba)
 	if (ret)
 		goto out;
 
+	/* Init check for device descriptor sizes */
+	ufshcd_init_desc_sizes(hba);
+
 	ret = ufs_get_device_desc(hba, &card);
 	if (ret) {
 		dev_err(hba->dev, "%s: Failed getting device info. err = %d\n",
@@ -6320,6 +6431,7 @@ static int ufshcd_probe_hba(struct ufs_hba *hba)
 
 	/* set the state as operational after switching to desired gear */
 	hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;
+
 	/*
 	 * If we are in error handling context or in power management callbacks
 	 * context, no need to scan the host
@@ -7774,6 +7886,9 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
 	hba->mmio_base = mmio_base;
 	hba->irq = irq;
 
+	/* Set descriptor lengths to specification defaults */
+	ufshcd_def_desc_sizes(hba);
+
 	err = ufshcd_hba_init(hba);
 	if (err)
 		goto out_error;
diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
index 7630600217a2ef..cdc8bd05f7dfcf 100644
--- a/drivers/scsi/ufs/ufshcd.h
+++ b/drivers/scsi/ufs/ufshcd.h
@@ -220,6 +220,15 @@ struct ufs_dev_cmd {
 	struct ufs_query query;
 };
 
+struct ufs_desc_size {
+	int dev_desc;
+	int pwr_desc;
+	int geom_desc;
+	int interc_desc;
+	int unit_desc;
+	int conf_desc;
+};
+
 /**
  * struct ufs_clk_info - UFS clock related info
  * @list: list headed by hba->clk_list_head
@@ -483,6 +492,7 @@ struct ufs_stats {
  * @clk_list_head: UFS host controller clocks list node head
  * @pwr_info: holds current power mode
  * @max_pwr_info: keeps the device max valid pwm
+ * @desc_size: descriptor sizes reported by device
  * @urgent_bkops_lvl: keeps track of urgent bkops level for device
  * @is_urgent_bkops_lvl_checked: keeps track if the urgent bkops level for
  *  device is known or not.
@@ -666,6 +676,7 @@ struct ufs_hba {
 	bool is_urgent_bkops_lvl_checked;
 
 	struct rw_semaphore clk_scaling_lock;
+	struct ufs_desc_size desc_size;
 };
 
 /* Returns true if clocks can be gated. Otherwise false */
@@ -832,6 +843,10 @@ int ufshcd_query_flag(struct ufs_hba *hba, enum query_opcode opcode,
 	enum flag_idn idn, bool *flag_res);
 int ufshcd_hold(struct ufs_hba *hba, bool async);
 void ufshcd_release(struct ufs_hba *hba);
+
+int ufshcd_map_desc_id_to_length(struct ufs_hba *hba, enum desc_idn desc_id,
+	int *desc_length);
+
 u32 ufshcd_get_local_unipro_ver(struct ufs_hba *hba);
 
 /* Wrapper functions for safely calling variant operations */

From c46f09175dabd5dd6a1507f36250bfa734a0156e Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@wdc.com>
Date: Wed, 1 Mar 2017 17:27:00 +0900
Subject: [PATCH 0478/1911] scsi: sd: Check for unaligned partial completion

Commit <f2e767bb5d6e> ("mpt3sas: Force request partial completion
alignment") was not considering the case of commands not operating on
logical block size units (e.g. REQ_OP_ZONE_REPORT and its 64B aligned
partial replies). In this case, forcing alignment of resid to the device
logical block size can break the command result, e.g. in the case of
REQ_OP_ZONE_REPORT, the exact number of zone reported by the device.

Move the partial completion alignement check of mpt3sas to a generic
implementation in sd_done(). The check is added within the default
section of the initial req_op() switch case so that the report and reset
zone commands are ignored. In addition, as sd_done() is not called for
passthrough requests, resid corrections are not done as intended by the
initial mpt3sas patch.

Fixes: f2e767bb5d6e ("mpt3sas: Force request partial completion alignment")
Cc: <stable@vger.kernel.org> # v4.10
Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Acked-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/mpt3sas/mpt3sas_scsih.c | 15 ---------------
 drivers/scsi/sd.c                    | 17 +++++++++++++++++
 2 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
index 46e866c36c8a88..69c29c560575e1 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
@@ -4677,7 +4677,6 @@ _scsih_io_done(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index, u32 reply)
 	struct MPT3SAS_DEVICE *sas_device_priv_data;
 	u32 response_code = 0;
 	unsigned long flags;
-	unsigned int sector_sz;
 
 	mpi_reply = mpt3sas_base_get_reply_virt_addr(ioc, reply);
 
@@ -4742,20 +4741,6 @@ _scsih_io_done(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index, u32 reply)
 	}
 
 	xfer_cnt = le32_to_cpu(mpi_reply->TransferCount);
-
-	/* In case of bogus fw or device, we could end up having
-	 * unaligned partial completion. We can force alignment here,
-	 * then scsi-ml does not need to handle this misbehavior.
-	 */
-	sector_sz = scmd->device->sector_size;
-	if (unlikely(!blk_rq_is_passthrough(scmd->request) && sector_sz &&
-		     xfer_cnt % sector_sz)) {
-		sdev_printk(KERN_INFO, scmd->device,
-		    "unaligned partial completion avoided (xfer_cnt=%u, sector_sz=%u)\n",
-			    xfer_cnt, sector_sz);
-		xfer_cnt = round_down(xfer_cnt, sector_sz);
-	}
-
 	scsi_set_resid(scmd, scsi_bufflen(scmd) - xfer_cnt);
 	if (ioc_status & MPI2_IOCSTATUS_FLAG_LOG_INFO_AVAILABLE)
 		log_info =  le32_to_cpu(mpi_reply->IOCLogInfo);
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index c7839f6c35ccc4..fb9b4d29af0b03 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1783,6 +1783,8 @@ static int sd_done(struct scsi_cmnd *SCpnt)
 {
 	int result = SCpnt->result;
 	unsigned int good_bytes = result ? 0 : scsi_bufflen(SCpnt);
+	unsigned int sector_size = SCpnt->device->sector_size;
+	unsigned int resid;
 	struct scsi_sense_hdr sshdr;
 	struct scsi_disk *sdkp = scsi_disk(SCpnt->request->rq_disk);
 	struct request *req = SCpnt->request;
@@ -1813,6 +1815,21 @@ static int sd_done(struct scsi_cmnd *SCpnt)
 			scsi_set_resid(SCpnt, blk_rq_bytes(req));
 		}
 		break;
+	default:
+		/*
+		 * In case of bogus fw or device, we could end up having
+		 * an unaligned partial completion. Check this here and force
+		 * alignment.
+		 */
+		resid = scsi_get_resid(SCpnt);
+		if (resid & (sector_size - 1)) {
+			sd_printk(KERN_INFO, sdkp,
+				"Unaligned partial completion (resid=%u, sector_sz=%u)\n",
+				resid, sector_size);
+			resid = min(scsi_bufflen(SCpnt),
+				    round_up(resid, sector_size));
+			scsi_set_resid(SCpnt, resid);
+		}
 	}
 
 	if (result) {

From 8893cf6cb1cf56334c05120e23092dbfc9423ebb Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Wed, 1 Mar 2017 09:00:36 -0800
Subject: [PATCH 0479/1911] scsi: mpt3sas: Avoid sleeping in interrupt context

Commit 669f044170d8 ("scsi: srp_transport: Move queuecommand() wait code
to SCSI core") can make scsi_internal_device_block() sleep.  However,
the mpt3sas driver can call this function from an interrupt
handler. Hence add a second argument to scsi_internal_device_block()
that restores the old behavior of this function for the mpt3sas handler.

The call chain that triggered an "IRQ handler enabled interrupts"
complaint is as follows:

_base_interrupt()
-> _base_async_event()
   -> mpt3sas_scsih_event_callback()
      -> _scsih_check_topo_delete_events()
         -> _scsih_block_io_to_children_attached_directly()
            -> _scsih_block_io_device()
               -> _scsih_internal_device_block()
                  -> scsi_internal_device_block()

Reported-by: Omar Sandoval <osandov@osandov.com>
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: Omar Sandoval <osandov@osandov.com>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Sagi Grimberg <sagi@grimberg.me>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Sathya Prakash <sathya.prakash@broadcom.com>
Cc: Chaitra P B <chaitra.basappa@broadcom.com>
Cc: Suganath Prabu Subramani <suganath-prabu.subramani@broadcom.com>
Cc: Sreekanth Reddy <Sreekanth.Reddy@broadcom.com>
Cc: <stable@vger.kernel.org> # v4.10+
Tested-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/mpt3sas/mpt3sas_base.h  |  3 ---
 drivers/scsi/mpt3sas/mpt3sas_scsih.c |  4 ++--
 drivers/scsi/scsi_lib.c              | 14 ++++++++++----
 drivers/scsi/scsi_priv.h             |  3 ---
 include/scsi/scsi_device.h           |  4 ++++
 5 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h
index 7fe7e6ed595b79..8981806fb13fa7 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.h
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.h
@@ -1442,9 +1442,6 @@ void mpt3sas_transport_update_links(struct MPT3SAS_ADAPTER *ioc,
 	u64 sas_address, u16 handle, u8 phy_number, u8 link_rate);
 extern struct sas_function_template mpt3sas_transport_functions;
 extern struct scsi_transport_template *mpt3sas_transport_template;
-extern int scsi_internal_device_block(struct scsi_device *sdev);
-extern int scsi_internal_device_unblock(struct scsi_device *sdev,
-				enum scsi_device_state new_state);
 /* trigger data externs */
 void mpt3sas_send_trigger_data_event(struct MPT3SAS_ADAPTER *ioc,
 	struct SL_WH_TRIGGERS_EVENT_DATA_T *event_data);
diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
index 69c29c560575e1..919ba2bb15f110 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
@@ -2859,7 +2859,7 @@ _scsih_internal_device_block(struct scsi_device *sdev,
 	    sas_device_priv_data->sas_target->handle);
 	sas_device_priv_data->block = 1;
 
-	r = scsi_internal_device_block(sdev);
+	r = scsi_internal_device_block(sdev, false);
 	if (r == -EINVAL)
 		sdev_printk(KERN_WARNING, sdev,
 		    "device_block failed with return(%d) for handle(0x%04x)\n",
@@ -2895,7 +2895,7 @@ _scsih_internal_device_unblock(struct scsi_device *sdev,
 		    "performing a block followed by an unblock\n",
 		    r, sas_device_priv_data->sas_target->handle);
 		sas_device_priv_data->block = 1;
-		r = scsi_internal_device_block(sdev);
+		r = scsi_internal_device_block(sdev, false);
 		if (r)
 			sdev_printk(KERN_WARNING, sdev, "retried device_block "
 			    "failed with return(%d) for handle(0x%04x)\n",
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index f5e45a25248505..f41e6b84a1bd92 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -2932,6 +2932,8 @@ EXPORT_SYMBOL(scsi_target_resume);
 /**
  * scsi_internal_device_block - internal function to put a device temporarily into the SDEV_BLOCK state
  * @sdev:	device to block
+ * @wait:	Whether or not to wait until ongoing .queuecommand() /
+ *		.queue_rq() calls have finished.
  *
  * Block request made by scsi lld's to temporarily stop all
  * scsi commands on the specified device. May sleep.
@@ -2949,7 +2951,7 @@ EXPORT_SYMBOL(scsi_target_resume);
  * remove the rport mutex lock and unlock calls from srp_queuecommand().
  */
 int
-scsi_internal_device_block(struct scsi_device *sdev)
+scsi_internal_device_block(struct scsi_device *sdev, bool wait)
 {
 	struct request_queue *q = sdev->request_queue;
 	unsigned long flags;
@@ -2969,12 +2971,16 @@ scsi_internal_device_block(struct scsi_device *sdev)
 	 * request queue. 
 	 */
 	if (q->mq_ops) {
-		blk_mq_quiesce_queue(q);
+		if (wait)
+			blk_mq_quiesce_queue(q);
+		else
+			blk_mq_stop_hw_queues(q);
 	} else {
 		spin_lock_irqsave(q->queue_lock, flags);
 		blk_stop_queue(q);
 		spin_unlock_irqrestore(q->queue_lock, flags);
-		scsi_wait_for_queuecommand(sdev);
+		if (wait)
+			scsi_wait_for_queuecommand(sdev);
 	}
 
 	return 0;
@@ -3036,7 +3042,7 @@ EXPORT_SYMBOL_GPL(scsi_internal_device_unblock);
 static void
 device_block(struct scsi_device *sdev, void *data)
 {
-	scsi_internal_device_block(sdev);
+	scsi_internal_device_block(sdev, true);
 }
 
 static int
diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h
index 99bfc985e1903b..f11bd102d6d5d6 100644
--- a/drivers/scsi/scsi_priv.h
+++ b/drivers/scsi/scsi_priv.h
@@ -188,8 +188,5 @@ static inline void scsi_dh_remove_device(struct scsi_device *sdev) { }
  */
 
 #define SCSI_DEVICE_BLOCK_MAX_TIMEOUT	600	/* units in seconds */
-extern int scsi_internal_device_block(struct scsi_device *sdev);
-extern int scsi_internal_device_unblock(struct scsi_device *sdev,
-					enum scsi_device_state new_state);
 
 #endif /* _SCSI_PRIV_H */
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index 6f22b39f1b0c3b..080c7ce9bae889 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -472,6 +472,10 @@ static inline int scsi_device_created(struct scsi_device *sdev)
 		sdev->sdev_state == SDEV_CREATED_BLOCK;
 }
 
+int scsi_internal_device_block(struct scsi_device *sdev, bool wait);
+int scsi_internal_device_unblock(struct scsi_device *sdev,
+				 enum scsi_device_state new_state);
+
 /* accessor functions for the SCSI parameters */
 static inline int scsi_device_sync(struct scsi_device *sdev)
 {

From 4ecce920e13ace16a5ba45efe8909946c28fb2ad Mon Sep 17 00:00:00 2001
From: Aurelien Aptel <aaptel@suse.com>
Date: Mon, 13 Feb 2017 16:03:47 +0100
Subject: [PATCH 0480/1911] CIFS: move DFS response parsing out of SMB1 code

since the DFS payload is not tied to the SMB version we can:
* isolate the DFS payload in its own struct, and include that struct in
  packet structs
* move the function that parses the response to misc.c and make it work
  on the new DFS payload struct (add payload size and utf16 flag as a
  result).

Signed-off-by: Aurelien Aptel <aaptel@suse.com>
Acked-by: Pavel Shilovsky <pshilov@microsoft.com>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/cifspdu.h   |  16 +++---
 fs/cifs/cifsproto.h |   5 ++
 fs/cifs/cifssmb.c   | 119 ++------------------------------------------
 fs/cifs/misc.c      | 105 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 125 insertions(+), 120 deletions(-)

diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index f5b87303ce46d5..1ce733f3582f66 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -2086,17 +2086,21 @@ typedef struct dfs_referral_level_3 { /* version 4 is same, + one flag bit */
 	__u8   ServiceSiteGuid[16];  /* MBZ, ignored */
 } __attribute__((packed)) REFERRAL3;
 
-typedef struct smb_com_transaction_get_dfs_refer_rsp {
-	struct smb_hdr hdr;	/* wct = 10 */
-	struct trans2_resp t2;
-	__u16 ByteCount;
-	__u8 Pad;
+struct get_dfs_referral_rsp {
 	__le16 PathConsumed;
 	__le16 NumberOfReferrals;
 	__le32 DFSFlags;
 	REFERRAL3 referrals[1];	/* array of level 3 dfs_referral structures */
 	/* followed by the strings pointed to by the referral structures */
-} __attribute__((packed)) TRANSACTION2_GET_DFS_REFER_RSP;
+} __packed;
+
+typedef struct smb_com_transaction_get_dfs_refer_rsp {
+	struct smb_hdr hdr;	/* wct = 10 */
+	struct trans2_resp t2;
+	__u16 ByteCount;
+	__u8 Pad;
+	struct get_dfs_referral_rsp dfs_data;
+} __packed TRANSACTION2_GET_DFS_REFER_RSP;
 
 /* DFS Flags */
 #define DFSREF_REFERRAL_SERVER  0x00000001 /* all targets are DFS roots */
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 406d2c10ba78d0..c0978304528841 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -284,6 +284,11 @@ extern int get_dfs_path(const unsigned int xid, struct cifs_ses *ses,
 			const struct nls_table *nls_codepage,
 			unsigned int *num_referrals,
 			struct dfs_info3_param **referrals, int remap);
+extern int parse_dfs_referrals(struct get_dfs_referral_rsp *rsp, u32 rsp_size,
+			       unsigned int *num_of_nodes,
+			       struct dfs_info3_param **target_nodes,
+			       const struct nls_table *nls_codepage, int remap,
+			       const char *searchName, bool is_unicode);
 extern void reset_cifs_unix_caps(unsigned int xid, struct cifs_tcon *tcon,
 				 struct cifs_sb_info *cifs_sb,
 				 struct smb_vol *vol);
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index f5099fb8a22f14..5005c7995b6a1a 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -4786,117 +4786,6 @@ CIFSGetSrvInodeNumber(const unsigned int xid, struct cifs_tcon *tcon,
 	return rc;
 }
 
-/* parses DFS refferal V3 structure
- * caller is responsible for freeing target_nodes
- * returns:
- * 	on success - 0
- *	on failure - errno
- */
-static int
-parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr,
-		unsigned int *num_of_nodes,
-		struct dfs_info3_param **target_nodes,
-		const struct nls_table *nls_codepage, int remap,
-		const char *searchName)
-{
-	int i, rc = 0;
-	char *data_end;
-	bool is_unicode;
-	struct dfs_referral_level_3 *ref;
-
-	if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE)
-		is_unicode = true;
-	else
-		is_unicode = false;
-	*num_of_nodes = le16_to_cpu(pSMBr->NumberOfReferrals);
-
-	if (*num_of_nodes < 1) {
-		cifs_dbg(VFS, "num_referrals: must be at least > 0, but we get num_referrals = %d\n",
-			 *num_of_nodes);
-		rc = -EINVAL;
-		goto parse_DFS_referrals_exit;
-	}
-
-	ref = (struct dfs_referral_level_3 *) &(pSMBr->referrals);
-	if (ref->VersionNumber != cpu_to_le16(3)) {
-		cifs_dbg(VFS, "Referrals of V%d version are not supported, should be V3\n",
-			 le16_to_cpu(ref->VersionNumber));
-		rc = -EINVAL;
-		goto parse_DFS_referrals_exit;
-	}
-
-	/* get the upper boundary of the resp buffer */
-	data_end = (char *)(&(pSMBr->PathConsumed)) +
-				le16_to_cpu(pSMBr->t2.DataCount);
-
-	cifs_dbg(FYI, "num_referrals: %d dfs flags: 0x%x ...\n",
-		 *num_of_nodes, le32_to_cpu(pSMBr->DFSFlags));
-
-	*target_nodes = kcalloc(*num_of_nodes, sizeof(struct dfs_info3_param),
-				GFP_KERNEL);
-	if (*target_nodes == NULL) {
-		rc = -ENOMEM;
-		goto parse_DFS_referrals_exit;
-	}
-
-	/* collect necessary data from referrals */
-	for (i = 0; i < *num_of_nodes; i++) {
-		char *temp;
-		int max_len;
-		struct dfs_info3_param *node = (*target_nodes)+i;
-
-		node->flags = le32_to_cpu(pSMBr->DFSFlags);
-		if (is_unicode) {
-			__le16 *tmp = kmalloc(strlen(searchName)*2 + 2,
-						GFP_KERNEL);
-			if (tmp == NULL) {
-				rc = -ENOMEM;
-				goto parse_DFS_referrals_exit;
-			}
-			cifsConvertToUTF16((__le16 *) tmp, searchName,
-					   PATH_MAX, nls_codepage, remap);
-			node->path_consumed = cifs_utf16_bytes(tmp,
-					le16_to_cpu(pSMBr->PathConsumed),
-					nls_codepage);
-			kfree(tmp);
-		} else
-			node->path_consumed = le16_to_cpu(pSMBr->PathConsumed);
-
-		node->server_type = le16_to_cpu(ref->ServerType);
-		node->ref_flag = le16_to_cpu(ref->ReferralEntryFlags);
-
-		/* copy DfsPath */
-		temp = (char *)ref + le16_to_cpu(ref->DfsPathOffset);
-		max_len = data_end - temp;
-		node->path_name = cifs_strndup_from_utf16(temp, max_len,
-						is_unicode, nls_codepage);
-		if (!node->path_name) {
-			rc = -ENOMEM;
-			goto parse_DFS_referrals_exit;
-		}
-
-		/* copy link target UNC */
-		temp = (char *)ref + le16_to_cpu(ref->NetworkAddressOffset);
-		max_len = data_end - temp;
-		node->node_name = cifs_strndup_from_utf16(temp, max_len,
-						is_unicode, nls_codepage);
-		if (!node->node_name) {
-			rc = -ENOMEM;
-			goto parse_DFS_referrals_exit;
-		}
-
-		ref++;
-	}
-
-parse_DFS_referrals_exit:
-	if (rc) {
-		free_dfs_info_array(*target_nodes, *num_of_nodes);
-		*target_nodes = NULL;
-		*num_of_nodes = 0;
-	}
-	return rc;
-}
-
 int
 CIFSGetDFSRefer(const unsigned int xid, struct cifs_ses *ses,
 		const char *search_name, struct dfs_info3_param **target_nodes,
@@ -4993,9 +4882,11 @@ CIFSGetDFSRefer(const unsigned int xid, struct cifs_ses *ses,
 		 get_bcc(&pSMBr->hdr), le16_to_cpu(pSMBr->t2.DataOffset));
 
 	/* parse returned result into more usable form */
-	rc = parse_DFS_referrals(pSMBr, num_of_nodes,
-				 target_nodes, nls_codepage, remap,
-				 search_name);
+	rc = parse_dfs_referrals(&pSMBr->dfs_data,
+				 le16_to_cpu(pSMBr->t2.DataCount),
+				 num_of_nodes, target_nodes, nls_codepage,
+				 remap, search_name,
+				 pSMBr->hdr.Flags2 & SMBFLG2_UNICODE);
 
 GetDFSRefExit:
 	cifs_buf_release(pSMB);
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index c6729156f9a00c..d3fb11529ed960 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -640,3 +640,108 @@ cifs_add_pending_open(struct cifs_fid *fid, struct tcon_link *tlink,
 	cifs_add_pending_open_locked(fid, tlink, open);
 	spin_unlock(&tlink_tcon(open->tlink)->open_file_lock);
 }
+
+/* parses DFS refferal V3 structure
+ * caller is responsible for freeing target_nodes
+ * returns:
+ * - on success - 0
+ * - on failure - errno
+ */
+int
+parse_dfs_referrals(struct get_dfs_referral_rsp *rsp, u32 rsp_size,
+		    unsigned int *num_of_nodes,
+		    struct dfs_info3_param **target_nodes,
+		    const struct nls_table *nls_codepage, int remap,
+		    const char *searchName, bool is_unicode)
+{
+	int i, rc = 0;
+	char *data_end;
+	struct dfs_referral_level_3 *ref;
+
+	*num_of_nodes = le16_to_cpu(rsp->NumberOfReferrals);
+
+	if (*num_of_nodes < 1) {
+		cifs_dbg(VFS, "num_referrals: must be at least > 0, but we get num_referrals = %d\n",
+			 *num_of_nodes);
+		rc = -EINVAL;
+		goto parse_DFS_referrals_exit;
+	}
+
+	ref = (struct dfs_referral_level_3 *) &(rsp->referrals);
+	if (ref->VersionNumber != cpu_to_le16(3)) {
+		cifs_dbg(VFS, "Referrals of V%d version are not supported, should be V3\n",
+			 le16_to_cpu(ref->VersionNumber));
+		rc = -EINVAL;
+		goto parse_DFS_referrals_exit;
+	}
+
+	/* get the upper boundary of the resp buffer */
+	data_end = (char *)rsp + rsp_size;
+
+	cifs_dbg(FYI, "num_referrals: %d dfs flags: 0x%x ...\n",
+		 *num_of_nodes, le32_to_cpu(rsp->DFSFlags));
+
+	*target_nodes = kcalloc(*num_of_nodes, sizeof(struct dfs_info3_param),
+				GFP_KERNEL);
+	if (*target_nodes == NULL) {
+		rc = -ENOMEM;
+		goto parse_DFS_referrals_exit;
+	}
+
+	/* collect necessary data from referrals */
+	for (i = 0; i < *num_of_nodes; i++) {
+		char *temp;
+		int max_len;
+		struct dfs_info3_param *node = (*target_nodes)+i;
+
+		node->flags = le32_to_cpu(rsp->DFSFlags);
+		if (is_unicode) {
+			__le16 *tmp = kmalloc(strlen(searchName)*2 + 2,
+						GFP_KERNEL);
+			if (tmp == NULL) {
+				rc = -ENOMEM;
+				goto parse_DFS_referrals_exit;
+			}
+			cifsConvertToUTF16((__le16 *) tmp, searchName,
+					   PATH_MAX, nls_codepage, remap);
+			node->path_consumed = cifs_utf16_bytes(tmp,
+					le16_to_cpu(rsp->PathConsumed),
+					nls_codepage);
+			kfree(tmp);
+		} else
+			node->path_consumed = le16_to_cpu(rsp->PathConsumed);
+
+		node->server_type = le16_to_cpu(ref->ServerType);
+		node->ref_flag = le16_to_cpu(ref->ReferralEntryFlags);
+
+		/* copy DfsPath */
+		temp = (char *)ref + le16_to_cpu(ref->DfsPathOffset);
+		max_len = data_end - temp;
+		node->path_name = cifs_strndup_from_utf16(temp, max_len,
+						is_unicode, nls_codepage);
+		if (!node->path_name) {
+			rc = -ENOMEM;
+			goto parse_DFS_referrals_exit;
+		}
+
+		/* copy link target UNC */
+		temp = (char *)ref + le16_to_cpu(ref->NetworkAddressOffset);
+		max_len = data_end - temp;
+		node->node_name = cifs_strndup_from_utf16(temp, max_len,
+						is_unicode, nls_codepage);
+		if (!node->node_name) {
+			rc = -ENOMEM;
+			goto parse_DFS_referrals_exit;
+		}
+
+		ref++;
+	}
+
+parse_DFS_referrals_exit:
+	if (rc) {
+		free_dfs_info_array(*target_nodes, *num_of_nodes);
+		*target_nodes = NULL;
+		*num_of_nodes = 0;
+	}
+	return rc;
+}

From 268a635d414df45a4a8da699d431da8f8ffcf014 Mon Sep 17 00:00:00 2001
From: Aurelien Aptel <aaptel@suse.com>
Date: Mon, 13 Feb 2017 16:14:17 +0100
Subject: [PATCH 0481/1911] CIFS: add build_path_from_dentry_optional_prefix()

this function does the same thing as add build_path_from_dentry() but
takes a boolean parameter to decide whether or not to prefix the path
with the tree name.

we cannot rely on tcon->Flags & SMB_SHARE_IS_IN_DFS for SMB2 as smb2
code never sets tcon->Flags but it sets tcon->share_flags and it seems
the SMB_SHARE_IS_IN_DFS has different semantics in SMB2: the prefix
shouldn't be added everytime it was in SMB1.

Signed-off-by: Aurelien Aptel <aaptel@suse.com>
Acked-by: Pavel Shilovsky <pshilov@microsoft.com>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/cifs_dfs_ref.c |  4 +++-
 fs/cifs/cifsproto.h    |  2 ++
 fs/cifs/dir.c          | 13 ++++++++++++-
 3 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index 9156be545b0f10..6b61df117fd48c 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -303,7 +303,9 @@ static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt)
 	 * gives us the latter, so we must adjust the result.
 	 */
 	mnt = ERR_PTR(-ENOMEM);
-	full_path = build_path_from_dentry(mntpt);
+
+	/* always use tree name prefix */
+	full_path = build_path_from_dentry_optional_prefix(mntpt, true);
 	if (full_path == NULL)
 		goto cdda_exit;
 
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index c0978304528841..9ee46c1c3ebda3 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -61,6 +61,8 @@ extern void exit_cifs_idmap(void);
 extern int init_cifs_spnego(void);
 extern void exit_cifs_spnego(void);
 extern char *build_path_from_dentry(struct dentry *);
+extern char *build_path_from_dentry_optional_prefix(struct dentry *direntry,
+						    bool prefix);
 extern char *cifs_build_path_to_root(struct smb_vol *vol,
 				     struct cifs_sb_info *cifs_sb,
 				     struct cifs_tcon *tcon,
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 2c227a99f369f0..56366e9840769d 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -80,6 +80,17 @@ cifs_build_path_to_root(struct smb_vol *vol, struct cifs_sb_info *cifs_sb,
 /* Note: caller must free return buffer */
 char *
 build_path_from_dentry(struct dentry *direntry)
+{
+	struct cifs_sb_info *cifs_sb = CIFS_SB(direntry->d_sb);
+	struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
+	bool prefix = tcon->Flags & SMB_SHARE_IS_IN_DFS;
+
+	return build_path_from_dentry_optional_prefix(direntry,
+						      prefix);
+}
+
+char *
+build_path_from_dentry_optional_prefix(struct dentry *direntry, bool prefix)
 {
 	struct dentry *temp;
 	int namelen;
@@ -92,7 +103,7 @@ build_path_from_dentry(struct dentry *direntry)
 	unsigned seq;
 
 	dirsep = CIFS_DIR_SEP(cifs_sb);
-	if (tcon->Flags & SMB_SHARE_IS_IN_DFS)
+	if (prefix)
 		dfsplen = strnlen(tcon->treeName, MAX_TREE_SIZE + 1);
 	else
 		dfsplen = 0;

From 51146625f34b348c15d55c378b5ae5c2fb963fc8 Mon Sep 17 00:00:00 2001
From: Aurelien Aptel <aaptel@suse.com>
Date: Tue, 28 Feb 2017 15:08:41 +0100
Subject: [PATCH 0482/1911] CIFS: add use_ipc flag to SMB2_ioctl()

when set, use the session IPC tree id instead of the tid in the provided
tcon.

Signed-off-by: Aurelien Aptel <aaptel@suse.com>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/smb2file.c  |  3 ++-
 fs/cifs/smb2ops.c   | 23 ++++++++++++++++-------
 fs/cifs/smb2pdu.c   | 17 +++++++++++++++--
 fs/cifs/smb2proto.h |  3 ++-
 4 files changed, 35 insertions(+), 11 deletions(-)

diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c
index b2aff0c6f22c52..b4b1f0305f2994 100644
--- a/fs/cifs/smb2file.c
+++ b/fs/cifs/smb2file.c
@@ -73,7 +73,8 @@ smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms,
 		nr_ioctl_req.Timeout = 0; /* use server default (120 seconds) */
 		nr_ioctl_req.Reserved = 0;
 		rc = SMB2_ioctl(xid, oparms->tcon, fid->persistent_fid,
-			fid->volatile_fid, FSCTL_LMR_REQUEST_RESILIENCY, true,
+			fid->volatile_fid, FSCTL_LMR_REQUEST_RESILIENCY,
+			true /* is_fsctl */, false /* use_ipc */,
 			(char *)&nr_ioctl_req, sizeof(nr_ioctl_req),
 			NULL, NULL /* no return info */);
 		if (rc == -EOPNOTSUPP) {
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index d2cdd9c19d1767..eba00cd3bd16f3 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -282,6 +282,7 @@ SMB3_request_interfaces(const unsigned int xid, struct cifs_tcon *tcon)
 
 	rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID,
 			FSCTL_QUERY_NETWORK_INTERFACE_INFO, true /* is_fsctl */,
+			false /* use_ipc */,
 			NULL /* no data input */, 0 /* no data input */,
 			(char **)&out_buf, &ret_data_len);
 	if (rc != 0)
@@ -571,6 +572,7 @@ SMB2_request_res_key(const unsigned int xid, struct cifs_tcon *tcon,
 
 	rc = SMB2_ioctl(xid, tcon, persistent_fid, volatile_fid,
 			FSCTL_SRV_REQUEST_RESUME_KEY, true /* is_fsctl */,
+			false /* use_ipc */,
 			NULL, 0 /* no input */,
 			(char **)&res_key, &ret_data_len);
 
@@ -635,7 +637,8 @@ smb2_clone_range(const unsigned int xid,
 		/* Request server copy to target from src identified by key */
 		rc = SMB2_ioctl(xid, tcon, trgtfile->fid.persistent_fid,
 			trgtfile->fid.volatile_fid, FSCTL_SRV_COPYCHUNK_WRITE,
-			true /* is_fsctl */, (char *)pcchunk,
+			true /* is_fsctl */, false /* use_ipc */,
+			(char *)pcchunk,
 			sizeof(struct copychunk_ioctl),	(char **)&retbuf,
 			&ret_data_len);
 		if (rc == 0) {
@@ -787,7 +790,8 @@ static bool smb2_set_sparse(const unsigned int xid, struct cifs_tcon *tcon,
 
 	rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
 			cfile->fid.volatile_fid, FSCTL_SET_SPARSE,
-			true /* is_fctl */, &setsparse, 1, NULL, NULL);
+			true /* is_fctl */, false /* use_ipc */,
+			&setsparse, 1, NULL, NULL);
 	if (rc) {
 		tcon->broken_sparse_sup = true;
 		cifs_dbg(FYI, "set sparse rc = %d\n", rc);
@@ -857,7 +861,8 @@ smb2_duplicate_extents(const unsigned int xid,
 	rc = SMB2_ioctl(xid, tcon, trgtfile->fid.persistent_fid,
 			trgtfile->fid.volatile_fid,
 			FSCTL_DUPLICATE_EXTENTS_TO_FILE,
-			true /* is_fsctl */, (char *)&dup_ext_buf,
+			true /* is_fsctl */, false /* use_ipc */,
+			(char *)&dup_ext_buf,
 			sizeof(struct duplicate_extents_to_file),
 			NULL,
 			&ret_data_len);
@@ -891,7 +896,8 @@ smb3_set_integrity(const unsigned int xid, struct cifs_tcon *tcon,
 	return SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
 			cfile->fid.volatile_fid,
 			FSCTL_SET_INTEGRITY_INFORMATION,
-			true /* is_fsctl */, (char *)&integr_info,
+			true /* is_fsctl */, false /* use_ipc */,
+			(char *)&integr_info,
 			sizeof(struct fsctl_set_integrity_information_req),
 			NULL,
 			&ret_data_len);
@@ -910,7 +916,8 @@ smb3_enum_snapshots(const unsigned int xid, struct cifs_tcon *tcon,
 	rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
 			cfile->fid.volatile_fid,
 			FSCTL_SRV_ENUMERATE_SNAPSHOTS,
-			true /* is_fsctl */, NULL, 0 /* no input data */,
+			true /* is_fsctl */, false /* use_ipc */,
+			NULL, 0 /* no input data */,
 			(char **)&retbuf,
 			&ret_data_len);
 	cifs_dbg(FYI, "enum snaphots ioctl returned %d and ret buflen is %d\n",
@@ -1220,7 +1227,8 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon,
 
 	rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
 			cfile->fid.volatile_fid, FSCTL_SET_ZERO_DATA,
-			true /* is_fctl */, (char *)&fsctl_buf,
+			true /* is_fctl */, false /* use_ipc */,
+			(char *)&fsctl_buf,
 			sizeof(struct file_zero_data_information), NULL, NULL);
 	free_xid(xid);
 	return rc;
@@ -1254,7 +1262,8 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon,
 
 	rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
 			cfile->fid.volatile_fid, FSCTL_SET_ZERO_DATA,
-			true /* is_fctl */, (char *)&fsctl_buf,
+			true /* is_fctl */, false /* use_ipc */,
+			(char *)&fsctl_buf,
 			sizeof(struct file_zero_data_information), NULL, NULL);
 	free_xid(xid);
 	return rc;
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index ad83b3db284028..8c4532dc749f72 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -620,6 +620,7 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon)
 
 	rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID,
 		FSCTL_VALIDATE_NEGOTIATE_INFO, true /* is_fsctl */,
+		false /* use_ipc */,
 		(char *)&vneg_inbuf, sizeof(struct validate_negotiate_info_req),
 		(char **)&pneg_rsp, &rsplen);
 
@@ -1683,8 +1684,9 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
  */
 int
 SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
-	   u64 volatile_fid, u32 opcode, bool is_fsctl, char *in_data,
-	   u32 indatalen, char **out_data, u32 *plen /* returned data len */)
+	   u64 volatile_fid, u32 opcode, bool is_fsctl, bool use_ipc,
+	   char *in_data, u32 indatalen,
+	   char **out_data, u32 *plen /* returned data len */)
 {
 	struct smb2_ioctl_req *req;
 	struct smb2_ioctl_rsp *rsp;
@@ -1721,6 +1723,16 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
 	if (rc)
 		return rc;
 
+	if (use_ipc) {
+		if (ses->ipc_tid == 0) {
+			cifs_small_buf_release(req);
+			return -ENOTCONN;
+		}
+
+		cifs_dbg(FYI, "replacing tid 0x%x with IPC tid 0x%x\n",
+			 req->hdr.sync_hdr.TreeId, ses->ipc_tid);
+		req->hdr.sync_hdr.TreeId = ses->ipc_tid;
+	}
 	if (encryption_required(tcon))
 		flags |= CIFS_TRANSFORM_REQ;
 
@@ -1843,6 +1855,7 @@ SMB2_set_compression(const unsigned int xid, struct cifs_tcon *tcon,
 
 	rc = SMB2_ioctl(xid, tcon, persistent_fid, volatile_fid,
 			FSCTL_SET_COMPRESSION, true /* is_fsctl */,
+			false /* use_ipc */,
 			(char *)&fsctl_input /* data input */,
 			2 /* in data len */, &ret_data /* out data */, NULL);
 
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index 85fc7a78933441..11d9f3013db85b 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -121,7 +121,8 @@ extern int SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms,
 		     struct smb2_err_rsp **err_buf);
 extern int SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon,
 		     u64 persistent_fid, u64 volatile_fid, u32 opcode,
-		     bool is_fsctl, char *in_data, u32 indatalen,
+		     bool is_fsctl, bool use_ipc,
+		     char *in_data, u32 indatalen,
 		     char **out_data, u32 *plen /* returned data len */);
 extern int SMB2_close(const unsigned int xid, struct cifs_tcon *tcon,
 		      u64 persistent_file_id, u64 volatile_file_id);

From 7f9f6d2ec51449a20bc35359c9e190bf861b57e1 Mon Sep 17 00:00:00 2001
From: Aurelien Aptel <aaptel@suse.com>
Date: Tue, 28 Feb 2017 15:26:30 +0100
Subject: [PATCH 0483/1911] CIFS: let ses->ipc_tid hold smb2 TreeIds

the TreeId field went from 2 bytes in CIFS to 4 bytes in SMB2+. this
commit updates the size of the ipc_tid field of a cifs_ses, which was
still using 2 bytes.

Signed-off-by: Aurelien Aptel <aaptel@suse.com>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/cifsglob.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 1a90bb3e29866c..af224cda86971b 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -822,7 +822,7 @@ struct cifs_ses {
 	int ses_count;		/* reference counter */
 	enum statusEnum status;
 	unsigned overrideSecFlg;  /* if non-zero override global sec flags */
-	__u16 ipc_tid;		/* special tid for connection to IPC share */
+	__u32 ipc_tid;		/* special tid for connection to IPC share */
 	char *serverOS;		/* name of operating system underlying server */
 	char *serverNOS;	/* name of network operating system of server */
 	char *serverDomain;	/* security realm of server */

From b9043cc5b99e9c93596a28647fd9f526f5bfa22c Mon Sep 17 00:00:00 2001
From: Aurelien Aptel <aaptel@suse.com>
Date: Mon, 13 Feb 2017 16:19:04 +0100
Subject: [PATCH 0484/1911] CIFS: set signing flag in SMB2+ TreeConnect if
 needed

cifs_enable_signing() already sets server->sign according to what the
server requires/offers and what mount options allows/forbids, so use
that.

this is required for IPC tcon that connects to signing-required servers.

Signed-off-by: Aurelien Aptel <aaptel@suse.com>
Acked-by: Pavel Shilovsky <pshilov@microsoft.com>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/smb2pdu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 8c4532dc749f72..2fd93eeed15a78 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1168,8 +1168,8 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
 
 		/* since no tcon, smb2_init can not do this, so do here */
 		req->hdr.sync_hdr.SessionId = ses->Suid;
-		/* if (ses->server->sec_mode & SECMODE_SIGN_REQUIRED)
-			req->hdr.Flags |= SMB2_FLAGS_SIGNED; */
+		if (ses->server->sign)
+			req->hdr.sync_hdr.Flags |= SMB2_FLAGS_SIGNED;
 	} else if (encryption_required(tcon))
 		flags |= CIFS_TRANSFORM_REQ;
 

From 8953de2f02ad7b15e4964c82f9afd60f128e4e98 Mon Sep 17 00:00:00 2001
From: Mike Manning <mmanning@brocade.com>
Date: Wed, 1 Mar 2017 09:55:28 +0000
Subject: [PATCH 0485/1911] net: bridge: allow IPv6 when multicast flood is
 disabled

Even with multicast flooding turned off, IPv6 ND should still work so
that IPv6 connectivity is provided. Allow this by continuing to flood
multicast traffic originated by us.

Fixes: b6cb5ac8331b ("net: bridge: add per-port multicast flood flag")
Cc: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: Mike Manning <mmanning@brocade.com>
Acked-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_forward.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 6bfac29318f21e..902af6ba481c99 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -186,8 +186,9 @@ void br_flood(struct net_bridge *br, struct sk_buff *skb,
 		/* Do not flood unicast traffic to ports that turn it off */
 		if (pkt_type == BR_PKT_UNICAST && !(p->flags & BR_FLOOD))
 			continue;
+		/* Do not flood if mc off, except for traffic we originate */
 		if (pkt_type == BR_PKT_MULTICAST &&
-		    !(p->flags & BR_MCAST_FLOOD))
+		    !(p->flags & BR_MCAST_FLOOD) && skb->dev != br->dev)
 			continue;
 
 		/* Do not flood to ports that enable proxy ARP */

From 540e2894f7905538740aaf122bd8e0548e1c34a4 Mon Sep 17 00:00:00 2001
From: Alexander Potapenko <glider@google.com>
Date: Wed, 1 Mar 2017 12:57:20 +0100
Subject: [PATCH 0486/1911] net: don't call strlen() on the user buffer in
 packet_bind_spkt()

KMSAN (KernelMemorySanitizer, a new error detection tool) reports use of
uninitialized memory in packet_bind_spkt():
Acked-by: Eric Dumazet <edumazet@google.com>

==================================================================
BUG: KMSAN: use of unitialized memory
CPU: 0 PID: 1074 Comm: packet Not tainted 4.8.0-rc6+ #1891
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs
01/01/2011
 0000000000000000 ffff88006b6dfc08 ffffffff82559ae8 ffff88006b6dfb48
 ffffffff818a7c91 ffffffff85b9c870 0000000000000092 ffffffff85b9c550
 0000000000000000 0000000000000092 00000000ec400911 0000000000000002
Call Trace:
 [<     inline     >] __dump_stack lib/dump_stack.c:15
 [<ffffffff82559ae8>] dump_stack+0x238/0x290 lib/dump_stack.c:51
 [<ffffffff818a6626>] kmsan_report+0x276/0x2e0 mm/kmsan/kmsan.c:1003
 [<ffffffff818a783b>] __msan_warning+0x5b/0xb0
mm/kmsan/kmsan_instr.c:424
 [<     inline     >] strlen lib/string.c:484
 [<ffffffff8259b58d>] strlcpy+0x9d/0x200 lib/string.c:144
 [<ffffffff84b2eca4>] packet_bind_spkt+0x144/0x230
net/packet/af_packet.c:3132
 [<ffffffff84242e4d>] SYSC_bind+0x40d/0x5f0 net/socket.c:1370
 [<ffffffff84242a22>] SyS_bind+0x82/0xa0 net/socket.c:1356
 [<ffffffff8515991b>] entry_SYSCALL_64_fastpath+0x13/0x8f
arch/x86/entry/entry_64.o:?
chained origin: 00000000eba00911
 [<ffffffff810bb787>] save_stack_trace+0x27/0x50
arch/x86/kernel/stacktrace.c:67
 [<     inline     >] kmsan_save_stack_with_flags mm/kmsan/kmsan.c:322
 [<     inline     >] kmsan_save_stack mm/kmsan/kmsan.c:334
 [<ffffffff818a59f8>] kmsan_internal_chain_origin+0x118/0x1e0
mm/kmsan/kmsan.c:527
 [<ffffffff818a7773>] __msan_set_alloca_origin4+0xc3/0x130
mm/kmsan/kmsan_instr.c:380
 [<ffffffff84242b69>] SYSC_bind+0x129/0x5f0 net/socket.c:1356
 [<ffffffff84242a22>] SyS_bind+0x82/0xa0 net/socket.c:1356
 [<ffffffff8515991b>] entry_SYSCALL_64_fastpath+0x13/0x8f
arch/x86/entry/entry_64.o:?
origin description: ----address@SYSC_bind (origin=00000000eb400911)
==================================================================
(the line numbers are relative to 4.8-rc6, but the bug persists
upstream)

, when I run the following program as root:

=====================================
 #include <string.h>
 #include <sys/socket.h>
 #include <netpacket/packet.h>
 #include <net/ethernet.h>

 int main() {
   struct sockaddr addr;
   memset(&addr, 0xff, sizeof(addr));
   addr.sa_family = AF_PACKET;
   int fd = socket(PF_PACKET, SOCK_PACKET, htons(ETH_P_ALL));
   bind(fd, &addr, sizeof(addr));
   return 0;
 }
=====================================

This happens because addr.sa_data copied from the userspace is not
zero-terminated, and copying it with strlcpy() in packet_bind_spkt()
results in calling strlen() on the kernel copy of that non-terminated
buffer.

Signed-off-by: Alexander Potapenko <glider@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 2bd0d1949312c3..a0dbe7ca8f724c 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -3103,7 +3103,7 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
 			    int addr_len)
 {
 	struct sock *sk = sock->sk;
-	char name[15];
+	char name[sizeof(uaddr->sa_data) + 1];
 
 	/*
 	 *	Check legality
@@ -3111,7 +3111,11 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
 
 	if (addr_len != sizeof(struct sockaddr))
 		return -EINVAL;
-	strlcpy(name, uaddr->sa_data, sizeof(name));
+	/* uaddr->sa_data comes from the userspace, it's not guaranteed to be
+	 * zero-terminated.
+	 */
+	memcpy(name, uaddr->sa_data, sizeof(uaddr->sa_data));
+	name[sizeof(uaddr->sa_data)] = 0;
 
 	return packet_do_bind(sk, name, 0, pkt_sk(sk)->num);
 }

From 13baa00ad01bb3a9f893e3a08cbc2d072fc0c15d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 1 Mar 2017 14:28:39 -0800
Subject: [PATCH 0487/1911] net: net_enable_timestamp() can be called from irq
 contexts

It is now very clear that silly TCP listeners might play with
enabling/disabling timestamping while new children are added
to their accept queue.

Meaning net_enable_timestamp() can be called from BH context
while current state of the static key is not enabled.

Lets play safe and allow all contexts.

The work queue is scheduled only under the problematic cases,
which are the static key enable/disable transition, to not slow down
critical paths.

This extends and improves what we did in commit 5fa8bbda38c6 ("net: use
a work queue to defer net_disable_timestamp() work")

Fixes: b90e5794c5bd ("net: dont call jump_label_dec from irq context")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 35 +++++++++++++++++++++++++++++++----
 1 file changed, 31 insertions(+), 4 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index e63bf61b19be02..8637b2b71f3d47 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1698,27 +1698,54 @@ EXPORT_SYMBOL_GPL(net_dec_egress_queue);
 static struct static_key netstamp_needed __read_mostly;
 #ifdef HAVE_JUMP_LABEL
 static atomic_t netstamp_needed_deferred;
+static atomic_t netstamp_wanted;
 static void netstamp_clear(struct work_struct *work)
 {
 	int deferred = atomic_xchg(&netstamp_needed_deferred, 0);
+	int wanted;
 
-	while (deferred--)
-		static_key_slow_dec(&netstamp_needed);
+	wanted = atomic_add_return(deferred, &netstamp_wanted);
+	if (wanted > 0)
+		static_key_enable(&netstamp_needed);
+	else
+		static_key_disable(&netstamp_needed);
 }
 static DECLARE_WORK(netstamp_work, netstamp_clear);
 #endif
 
 void net_enable_timestamp(void)
 {
+#ifdef HAVE_JUMP_LABEL
+	int wanted;
+
+	while (1) {
+		wanted = atomic_read(&netstamp_wanted);
+		if (wanted <= 0)
+			break;
+		if (atomic_cmpxchg(&netstamp_wanted, wanted, wanted + 1) == wanted)
+			return;
+	}
+	atomic_inc(&netstamp_needed_deferred);
+	schedule_work(&netstamp_work);
+#else
 	static_key_slow_inc(&netstamp_needed);
+#endif
 }
 EXPORT_SYMBOL(net_enable_timestamp);
 
 void net_disable_timestamp(void)
 {
 #ifdef HAVE_JUMP_LABEL
-	/* net_disable_timestamp() can be called from non process context */
-	atomic_inc(&netstamp_needed_deferred);
+	int wanted;
+
+	while (1) {
+		wanted = atomic_read(&netstamp_wanted);
+		if (wanted <= 1)
+			break;
+		if (atomic_cmpxchg(&netstamp_wanted, wanted, wanted - 1) == wanted)
+			return;
+	}
+	atomic_dec(&netstamp_needed_deferred);
 	schedule_work(&netstamp_work);
 #else
 	static_key_slow_dec(&netstamp_needed);

From 48cac18ecf1de82f76259a54402c3adb7839ad01 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 1 Mar 2017 14:45:06 -0800
Subject: [PATCH 0488/1911] ipv6: orphan skbs in reassembly unit

Andrey reported a use-after-free in IPv6 stack.

Issue here is that we free the socket while it still has skb
in TX path and in some queues.

It happens here because IPv6 reassembly unit messes skb->truesize,
breaking skb_set_owner_w() badly.

We fixed a similar issue for IPV4 in commit 8282f27449bf ("inet: frag:
Always orphan skbs inside ip_defrag()")
Acked-by: Joe Stringer <joe@ovn.org>

==================================================================
BUG: KASAN: use-after-free in sock_wfree+0x118/0x120
Read of size 8 at addr ffff880062da0060 by task a.out/4140

page:ffffea00018b6800 count:1 mapcount:0 mapping:          (null)
index:0x0 compound_mapcount: 0
flags: 0x100000000008100(slab|head)
raw: 0100000000008100 0000000000000000 0000000000000000 0000000180130013
raw: dead000000000100 dead000000000200 ffff88006741f140 0000000000000000
page dumped because: kasan: bad access detected

CPU: 0 PID: 4140 Comm: a.out Not tainted 4.10.0-rc3+ #59
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:15
 dump_stack+0x292/0x398 lib/dump_stack.c:51
 describe_address mm/kasan/report.c:262
 kasan_report_error+0x121/0x560 mm/kasan/report.c:370
 kasan_report mm/kasan/report.c:392
 __asan_report_load8_noabort+0x3e/0x40 mm/kasan/report.c:413
 sock_flag ./arch/x86/include/asm/bitops.h:324
 sock_wfree+0x118/0x120 net/core/sock.c:1631
 skb_release_head_state+0xfc/0x250 net/core/skbuff.c:655
 skb_release_all+0x15/0x60 net/core/skbuff.c:668
 __kfree_skb+0x15/0x20 net/core/skbuff.c:684
 kfree_skb+0x16e/0x4e0 net/core/skbuff.c:705
 inet_frag_destroy+0x121/0x290 net/ipv4/inet_fragment.c:304
 inet_frag_put ./include/net/inet_frag.h:133
 nf_ct_frag6_gather+0x1125/0x38b0 net/ipv6/netfilter/nf_conntrack_reasm.c:617
 ipv6_defrag+0x21b/0x350 net/ipv6/netfilter/nf_defrag_ipv6_hooks.c:68
 nf_hook_entry_hookfn ./include/linux/netfilter.h:102
 nf_hook_slow+0xc3/0x290 net/netfilter/core.c:310
 nf_hook ./include/linux/netfilter.h:212
 __ip6_local_out+0x52c/0xaf0 net/ipv6/output_core.c:160
 ip6_local_out+0x2d/0x170 net/ipv6/output_core.c:170
 ip6_send_skb+0xa1/0x340 net/ipv6/ip6_output.c:1722
 ip6_push_pending_frames+0xb3/0xe0 net/ipv6/ip6_output.c:1742
 rawv6_push_pending_frames net/ipv6/raw.c:613
 rawv6_sendmsg+0x2cff/0x4130 net/ipv6/raw.c:927
 inet_sendmsg+0x164/0x5b0 net/ipv4/af_inet.c:744
 sock_sendmsg_nosec net/socket.c:635
 sock_sendmsg+0xca/0x110 net/socket.c:645
 sock_write_iter+0x326/0x620 net/socket.c:848
 new_sync_write fs/read_write.c:499
 __vfs_write+0x483/0x760 fs/read_write.c:512
 vfs_write+0x187/0x530 fs/read_write.c:560
 SYSC_write fs/read_write.c:607
 SyS_write+0xfb/0x230 fs/read_write.c:599
 entry_SYSCALL_64_fastpath+0x1f/0xc2 arch/x86/entry/entry_64.S:203
RIP: 0033:0x7ff26e6f5b79
RSP: 002b:00007ff268e0ed98 EFLAGS: 00000206 ORIG_RAX: 0000000000000001
RAX: ffffffffffffffda RBX: 00007ff268e0f9c0 RCX: 00007ff26e6f5b79
RDX: 0000000000000010 RSI: 0000000020f50fe1 RDI: 0000000000000003
RBP: 00007ff26ebc1220 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000206 R12: 0000000000000000
R13: 00007ff268e0f9c0 R14: 00007ff26efec040 R15: 0000000000000003

The buggy address belongs to the object at ffff880062da0000
 which belongs to the cache RAWv6 of size 1504
The buggy address ffff880062da0060 is located 96 bytes inside
 of 1504-byte region [ffff880062da0000, ffff880062da05e0)

Freed by task 4113:
 save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:57
 save_stack+0x43/0xd0 mm/kasan/kasan.c:502
 set_track mm/kasan/kasan.c:514
 kasan_slab_free+0x73/0xc0 mm/kasan/kasan.c:578
 slab_free_hook mm/slub.c:1352
 slab_free_freelist_hook mm/slub.c:1374
 slab_free mm/slub.c:2951
 kmem_cache_free+0xb2/0x2c0 mm/slub.c:2973
 sk_prot_free net/core/sock.c:1377
 __sk_destruct+0x49c/0x6e0 net/core/sock.c:1452
 sk_destruct+0x47/0x80 net/core/sock.c:1460
 __sk_free+0x57/0x230 net/core/sock.c:1468
 sk_free+0x23/0x30 net/core/sock.c:1479
 sock_put ./include/net/sock.h:1638
 sk_common_release+0x31e/0x4e0 net/core/sock.c:2782
 rawv6_close+0x54/0x80 net/ipv6/raw.c:1214
 inet_release+0xed/0x1c0 net/ipv4/af_inet.c:425
 inet6_release+0x50/0x70 net/ipv6/af_inet6.c:431
 sock_release+0x8d/0x1e0 net/socket.c:599
 sock_close+0x16/0x20 net/socket.c:1063
 __fput+0x332/0x7f0 fs/file_table.c:208
 ____fput+0x15/0x20 fs/file_table.c:244
 task_work_run+0x19b/0x270 kernel/task_work.c:116
 exit_task_work ./include/linux/task_work.h:21
 do_exit+0x186b/0x2800 kernel/exit.c:839
 do_group_exit+0x149/0x420 kernel/exit.c:943
 SYSC_exit_group kernel/exit.c:954
 SyS_exit_group+0x1d/0x20 kernel/exit.c:952
 entry_SYSCALL_64_fastpath+0x1f/0xc2 arch/x86/entry/entry_64.S:203

Allocated by task 4115:
 save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:57
 save_stack+0x43/0xd0 mm/kasan/kasan.c:502
 set_track mm/kasan/kasan.c:514
 kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:605
 kasan_slab_alloc+0x12/0x20 mm/kasan/kasan.c:544
 slab_post_alloc_hook mm/slab.h:432
 slab_alloc_node mm/slub.c:2708
 slab_alloc mm/slub.c:2716
 kmem_cache_alloc+0x1af/0x250 mm/slub.c:2721
 sk_prot_alloc+0x65/0x2a0 net/core/sock.c:1334
 sk_alloc+0x105/0x1010 net/core/sock.c:1396
 inet6_create+0x44d/0x1150 net/ipv6/af_inet6.c:183
 __sock_create+0x4f6/0x880 net/socket.c:1199
 sock_create net/socket.c:1239
 SYSC_socket net/socket.c:1269
 SyS_socket+0xf9/0x230 net/socket.c:1249
 entry_SYSCALL_64_fastpath+0x1f/0xc2 arch/x86/entry/entry_64.S:203

Memory state around the buggy address:
 ffff880062d9ff00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
 ffff880062d9ff80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
>ffff880062da0000: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
                                                       ^
 ffff880062da0080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
 ffff880062da0100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
==================================================================

Reported-by: Andrey Konovalov <andreyknvl@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/netfilter/nf_conntrack_reasm.c | 1 +
 net/openvswitch/conntrack.c             | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 9948b5ce52dad3..986d4ca38832b1 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -589,6 +589,7 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
 	hdr = ipv6_hdr(skb);
 	fhdr = (struct frag_hdr *)skb_transport_header(skb);
 
+	skb_orphan(skb);
 	fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr,
 		     skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr));
 	if (fq == NULL) {
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 85cd5952667068..e0a87776a010a3 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -485,7 +485,6 @@ static int handle_fragments(struct net *net, struct sw_flow_key *key,
 	} else if (key->eth.type == htons(ETH_P_IPV6)) {
 		enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone;
 
-		skb_orphan(skb);
 		memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm));
 		err = nf_ct_frag6_gather(net, skb, user);
 		if (err) {

From 52e51f16407b7b34e26affb500a21e250d9fce0b Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Wed, 1 Mar 2017 19:04:35 +0000
Subject: [PATCH 0489/1911] efi/libstub: Treat missing SecureBoot variable as
 Secure Boot disabled

The newly refactored code that infers the firmware's Secure Boot state
prints the following error when the EFI variable 'SecureBoot' does not
exist:

  EFI stub: ERROR: Could not determine UEFI Secure Boot status.

However, this variable is only guaranteed to be defined on a system that
is Secure Boot capable to begin with, and so it is not an error if it is
missing. So report Secure Boot as being disabled in this case, without
printing any error messages.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-efi@vger.kernel.org
Link: http://lkml.kernel.org/r/1488395076-29712-2-git-send-email-ard.biesheuvel@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 drivers/firmware/efi/libstub/secureboot.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/firmware/efi/libstub/secureboot.c b/drivers/firmware/efi/libstub/secureboot.c
index 6def402bf5691f..5da36e56b36a1c 100644
--- a/drivers/firmware/efi/libstub/secureboot.c
+++ b/drivers/firmware/efi/libstub/secureboot.c
@@ -45,6 +45,8 @@ enum efi_secureboot_mode efi_get_secureboot(efi_system_table_t *sys_table_arg)
 	size = sizeof(secboot);
 	status = get_efi_var(efi_SecureBoot_name, &efi_variable_guid,
 			     NULL, &size, &secboot);
+	if (status == EFI_NOT_FOUND)
+		return efi_secureboot_mode_disabled;
 	if (status != EFI_SUCCESS)
 		goto out_efi_err;
 
@@ -78,7 +80,5 @@ enum efi_secureboot_mode efi_get_secureboot(efi_system_table_t *sys_table_arg)
 
 out_efi_err:
 	pr_efi_err(sys_table_arg, "Could not determine UEFI Secure Boot status.\n");
-	if (status == EFI_NOT_FOUND)
-		return efi_secureboot_mode_disabled;
 	return efi_secureboot_mode_unknown;
 }

From d1eb98143c56f24fef125f5bbed49ae0b52fb7d6 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Wed, 1 Mar 2017 19:05:54 +0000
Subject: [PATCH 0490/1911] efi/arm: Fix boot crash with
 CONFIG_CPUMASK_OFFSTACK=y

On ARM and arm64, we use a dedicated mm_struct to map the UEFI
Runtime Services regions, which allows us to map those regions
on demand, and in a way that is guaranteed to be compatible
with incoming kernels across kexec.

As it turns out, we don't fully initialize the mm_struct in the
same way as process mm_structs are initialized on fork(), which
results in the following crash on ARM if CONFIG_CPUMASK_OFFSTACK=y
is enabled:

  ...
  EFI Variables Facility v0.08 2004-May-17
  Unable to handle kernel NULL pointer dereference at virtual address 00000000
  [...]
  Process swapper/0 (pid: 1)
  ...
  __memzero()
  check_and_switch_context()
  virt_efi_get_next_variable()
  efivar_init()
  efivars_sysfs_init()
  do_one_initcall()
  ...

This is due to a missing call to mm_init_cpumask(), so add it.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: <stable@vger.kernel.org> # v4.5+
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-efi@vger.kernel.org
Link: http://lkml.kernel.org/r/1488395154-29786-1-git-send-email-ard.biesheuvel@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 drivers/firmware/efi/arm-runtime.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/firmware/efi/arm-runtime.c b/drivers/firmware/efi/arm-runtime.c
index 349dc3e1e52e0a..974c5a31a00598 100644
--- a/drivers/firmware/efi/arm-runtime.c
+++ b/drivers/firmware/efi/arm-runtime.c
@@ -65,6 +65,7 @@ static bool __init efi_virtmap_init(void)
 	bool systab_found;
 
 	efi_mm.pgd = pgd_alloc(&efi_mm);
+	mm_init_cpumask(&efi_mm);
 	init_new_context(NULL, &efi_mm);
 
 	systab_found = false;

From 4ec3dd89052a437304e1451733c989b8cec681af Mon Sep 17 00:00:00 2001
From: Zhao Yan <yan.y.zhao@intel.com>
Date: Thu, 2 Mar 2017 15:12:47 +0800
Subject: [PATCH 0491/1911] drm/i915/gvt: fix an error for F_RO flag

the ro_mask is not stored into each mmio entry

Fixes: 12d14cc43b34 ("drm/i915/gvt: Introduce a framework for tracking HW registers.")
Signed-off-by: Zhao Yan <yan.y.zhao@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/handlers.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index 548aedfbd40272..8e43395c748a15 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -121,6 +121,7 @@ static int new_mmio_info(struct intel_gvt *gvt,
 		info->size = size;
 		info->length = (i + 4) < end ? 4 : (end - i);
 		info->addr_mask = addr_mask;
+		info->ro_mask = ro_mask;
 		info->device = device;
 		info->read = read ? read : intel_vgpu_default_mmio_read;
 		info->write = write ? write : intel_vgpu_default_mmio_write;

From eb1e011a14748a1d9df9a7d7df9a5711721a1bdb Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 15 Feb 2017 09:49:26 +0100
Subject: [PATCH 0492/1911] average: change to declare precision, not factor

Declaring the factor is counter-intuitive, and people are prone
to using small(-ish) values even when that makes no sense.

Change the DECLARE_EWMA() macro to take the fractional precision,
in bits, rather than a factor, and update all users.

While at it, add some more documentation.

Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/virtio_net.c                    |  2 +-
 drivers/net/wireless/ath/ath5k/ath5k.h      |  2 +-
 drivers/net/wireless/ralink/rt2x00/rt2x00.h |  2 +-
 include/linux/average.h                     | 61 ++++++++++++++-------
 net/batman-adv/types.h                      |  2 +-
 net/mac80211/ieee80211_i.h                  |  2 +-
 net/mac80211/sta_info.h                     |  2 +-
 7 files changed, 47 insertions(+), 26 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index bf95016f442ace..e9d7e2b70085ee 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -51,7 +51,7 @@ module_param(gso, bool, 0444);
  * at once, the weight is chosen so that the EWMA will be insensitive to short-
  * term, transient changes in packet size.
  */
-DECLARE_EWMA(pkt_len, 1, 64)
+DECLARE_EWMA(pkt_len, 0, 64)
 
 /* With mergeable buffers we align buffer address and use the low bits to
  * encode its true size. Buffer size is up to 1 page so we need to align to
diff --git a/drivers/net/wireless/ath/ath5k/ath5k.h b/drivers/net/wireless/ath/ath5k/ath5k.h
index 67fedb61fcc02d..979800c6f57fba 100644
--- a/drivers/net/wireless/ath/ath5k/ath5k.h
+++ b/drivers/net/wireless/ath/ath5k/ath5k.h
@@ -1252,7 +1252,7 @@ struct ath5k_statistics {
 #define ATH5K_TXQ_LEN_MAX	(ATH_TXBUF / 4)		/* bufs per queue */
 #define ATH5K_TXQ_LEN_LOW	(ATH5K_TXQ_LEN_MAX / 2)	/* low mark */
 
-DECLARE_EWMA(beacon_rssi, 1024, 8)
+DECLARE_EWMA(beacon_rssi, 10, 8)
 
 /* Driver state associated with an instance of a device */
 struct ath5k_hw {
diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00.h b/drivers/net/wireless/ralink/rt2x00/rt2x00.h
index 26869b3bef45ff..340787894c694a 100644
--- a/drivers/net/wireless/ralink/rt2x00/rt2x00.h
+++ b/drivers/net/wireless/ralink/rt2x00/rt2x00.h
@@ -257,7 +257,7 @@ struct link_qual {
 	int tx_failed;
 };
 
-DECLARE_EWMA(rssi, 1024, 8)
+DECLARE_EWMA(rssi, 10, 8)
 
 /*
  * Antenna settings about the currently active link.
diff --git a/include/linux/average.h b/include/linux/average.h
index d04aa58280ded5..7ddaf340d2ac98 100644
--- a/include/linux/average.h
+++ b/include/linux/average.h
@@ -1,45 +1,66 @@
 #ifndef _LINUX_AVERAGE_H
 #define _LINUX_AVERAGE_H
 
-/* Exponentially weighted moving average (EWMA) */
+/*
+ * Exponentially weighted moving average (EWMA)
+ *
+ * This implements a fixed-precision EWMA algorithm, with both the
+ * precision and fall-off coefficient determined at compile-time
+ * and built into the generated helper funtions.
+ *
+ * The first argument to the macro is the name that will be used
+ * for the struct and helper functions.
+ *
+ * The second argument, the precision, expresses how many bits are
+ * used for the fractional part of the fixed-precision values.
+ *
+ * The third argument, the weight reciprocal, determines how the
+ * new values will be weighed vs. the old state, new values will
+ * get weight 1/weight_rcp and old values 1-1/weight_rcp. Note
+ * that this parameter must be a power of two for efficiency.
+ */
 
-#define DECLARE_EWMA(name, _factor, _weight)				\
+#define DECLARE_EWMA(name, _precision, _weight_rcp)			\
 	struct ewma_##name {						\
 		unsigned long internal;					\
 	};								\
 	static inline void ewma_##name##_init(struct ewma_##name *e)	\
 	{								\
-		BUILD_BUG_ON(!__builtin_constant_p(_factor));		\
-		BUILD_BUG_ON(!__builtin_constant_p(_weight));		\
-		BUILD_BUG_ON_NOT_POWER_OF_2(_factor);			\
-		BUILD_BUG_ON_NOT_POWER_OF_2(_weight);			\
+		BUILD_BUG_ON(!__builtin_constant_p(_precision));	\
+		BUILD_BUG_ON(!__builtin_constant_p(_weight_rcp));	\
+		/*							\
+		 * Even if you want to feed it just 0/1 you should have	\
+		 * some bits for the non-fractional part...		\
+		 */							\
+		BUILD_BUG_ON((_precision) > 30);			\
+		BUILD_BUG_ON_NOT_POWER_OF_2(_weight_rcp);		\
 		e->internal = 0;					\
 	}								\
 	static inline unsigned long					\
 	ewma_##name##_read(struct ewma_##name *e)			\
 	{								\
-		BUILD_BUG_ON(!__builtin_constant_p(_factor));		\
-		BUILD_BUG_ON(!__builtin_constant_p(_weight));		\
-		BUILD_BUG_ON_NOT_POWER_OF_2(_factor);			\
-		BUILD_BUG_ON_NOT_POWER_OF_2(_weight);			\
-		return e->internal >> ilog2(_factor);			\
+		BUILD_BUG_ON(!__builtin_constant_p(_precision));	\
+		BUILD_BUG_ON(!__builtin_constant_p(_weight_rcp));	\
+		BUILD_BUG_ON((_precision) > 30);			\
+		BUILD_BUG_ON_NOT_POWER_OF_2(_weight_rcp);		\
+		return e->internal >> (_precision);			\
 	}								\
 	static inline void ewma_##name##_add(struct ewma_##name *e,	\
 					     unsigned long val)		\
 	{								\
 		unsigned long internal = ACCESS_ONCE(e->internal);	\
-		unsigned long weight = ilog2(_weight);			\
-		unsigned long factor = ilog2(_factor);			\
+		unsigned long weight_rcp = ilog2(_weight_rcp);		\
+		unsigned long precision = _precision;			\
 									\
-		BUILD_BUG_ON(!__builtin_constant_p(_factor));		\
-		BUILD_BUG_ON(!__builtin_constant_p(_weight));		\
-		BUILD_BUG_ON_NOT_POWER_OF_2(_factor);			\
-		BUILD_BUG_ON_NOT_POWER_OF_2(_weight);			\
+		BUILD_BUG_ON(!__builtin_constant_p(_precision));	\
+		BUILD_BUG_ON(!__builtin_constant_p(_weight_rcp));	\
+		BUILD_BUG_ON((_precision) > 30);			\
+		BUILD_BUG_ON_NOT_POWER_OF_2(_weight_rcp);		\
 									\
 		ACCESS_ONCE(e->internal) = internal ?			\
-			(((internal << weight) - internal) +		\
-				(val << factor)) >> weight :		\
-			(val << factor);				\
+			(((internal << weight_rcp) - internal) +	\
+				(val << precision)) >> weight_rcp :	\
+			(val << precision);				\
 	}
 
 #endif /* _LINUX_AVERAGE_H */
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 8f64a5c013454a..66b25e410a4137 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -402,7 +402,7 @@ struct batadv_gw_node {
 	struct rcu_head rcu;
 };
 
-DECLARE_EWMA(throughput, 1024, 8)
+DECLARE_EWMA(throughput, 10, 8)
 
 /**
  * struct batadv_hardif_neigh_node_bat_v - B.A.T.M.A.N. V private neighbor
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 159a1a73372506..0e718437d080e7 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -428,7 +428,7 @@ struct ieee80211_sta_tx_tspec {
 	bool downgraded;
 };
 
-DECLARE_EWMA(beacon_signal, 16, 4)
+DECLARE_EWMA(beacon_signal, 4, 4)
 
 struct ieee80211_if_managed {
 	struct timer_list timer;
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 15599c70a38fc9..e65cda34d2bc00 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -372,7 +372,7 @@ struct mesh_sta {
 	unsigned int fail_avg;
 };
 
-DECLARE_EWMA(signal, 1024, 8)
+DECLARE_EWMA(signal, 10, 8)
 
 struct ieee80211_sta_rx_stats {
 	unsigned long packets;

From 9ccd27cc2e8bfbec502d7c64a353dc4489536b81 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 21:11:09 +0100
Subject: [PATCH 0493/1911] sched/headers: Make all include/linux/sched/*.h
 headers build standalone

Make each header self-sufficient, so that it can be built successfully
both in an allnoconfig and allyesconfig kernel.

Also standardize the naming of their header guards.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/deadline.h |  8 +++++---
 include/linux/sched/prio.h     |  6 +++---
 include/linux/sched/rt.h       | 10 ++++++----
 include/linux/sched/sysctl.h   | 10 +++++++---
 4 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h
index 9089a2ae913ddf..975be862e0835c 100644
--- a/include/linux/sched/deadline.h
+++ b/include/linux/sched/deadline.h
@@ -1,5 +1,7 @@
-#ifndef _SCHED_DEADLINE_H
-#define _SCHED_DEADLINE_H
+#ifndef _LINUX_SCHED_DEADLINE_H
+#define _LINUX_SCHED_DEADLINE_H
+
+#include <linux/sched.h>
 
 /*
  * SCHED_DEADLINE tasks has negative priorities, reflecting
@@ -26,4 +28,4 @@ static inline bool dl_time_before(u64 a, u64 b)
 	return (s64)(a - b) < 0;
 }
 
-#endif /* _SCHED_DEADLINE_H */
+#endif /* _LINUX_SCHED_DEADLINE_H */
diff --git a/include/linux/sched/prio.h b/include/linux/sched/prio.h
index d9cf5a5762d9d3..2cc450f6ec5423 100644
--- a/include/linux/sched/prio.h
+++ b/include/linux/sched/prio.h
@@ -1,5 +1,5 @@
-#ifndef _SCHED_PRIO_H
-#define _SCHED_PRIO_H
+#ifndef _LINUX_SCHED_PRIO_H
+#define _LINUX_SCHED_PRIO_H
 
 #define MAX_NICE	19
 #define MIN_NICE	-20
@@ -57,4 +57,4 @@ static inline long rlimit_to_nice(long prio)
 	return (MAX_NICE - prio + 1);
 }
 
-#endif /* _SCHED_PRIO_H */
+#endif /* _LINUX_SCHED_PRIO_H */
diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h
index a30b172df6e1a7..3bd668414f61a5 100644
--- a/include/linux/sched/rt.h
+++ b/include/linux/sched/rt.h
@@ -1,7 +1,9 @@
-#ifndef _SCHED_RT_H
-#define _SCHED_RT_H
+#ifndef _LINUX_SCHED_RT_H
+#define _LINUX_SCHED_RT_H
 
-#include <linux/sched/prio.h>
+#include <linux/sched.h>
+
+struct task_struct;
 
 static inline int rt_prio(int prio)
 {
@@ -57,4 +59,4 @@ extern void normalize_rt_tasks(void);
  */
 #define RR_TIMESLICE		(100 * HZ / 1000)
 
-#endif /* _SCHED_RT_H */
+#endif /* _LINUX_SCHED_RT_H */
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index 49308e142aaeb1..0f5ecd4d298e2a 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -1,5 +1,9 @@
-#ifndef _SCHED_SYSCTL_H
-#define _SCHED_SYSCTL_H
+#ifndef _LINUX_SCHED_SYSCTL_H
+#define _LINUX_SCHED_SYSCTL_H
+
+#include <linux/types.h>
+
+struct ctl_table;
 
 #ifdef CONFIG_DETECT_HUNG_TASK
 extern int	     sysctl_hung_task_check_count;
@@ -78,4 +82,4 @@ extern int sysctl_schedstats(struct ctl_table *table, int write,
 				 void __user *buffer, size_t *lenp,
 				 loff_t *ppos);
 
-#endif /* _SCHED_SYSCTL_H */
+#endif /* _LINUX_SCHED_SYSCTL_H */

From c930b2c0de32f45ce8f67affe936ce7a05b07b00 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 12:22:54 +0100
Subject: [PATCH 0494/1911] sched/core: Convert ___assert_task_state() link
 time assert to BUILD_BUG_ON()

The length of TASK_STATE_TO_CHAR_STR was still checked using the old
link-time manual error method - convert it to BUILD_BUG_ON(). This
has a couple of advantages:

 - it's more obvious what's going on

 - it reduces the size and complexity of <linux/sched.h>

 - BUILD_BUG_ON() will fail during compilation, with a clearer
   error message than the link time assert.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 3 ---
 kernel/sched/core.c   | 3 +++
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4a28deb5f210a1..c204613396cd3c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -223,9 +223,6 @@ extern void proc_sched_set_task(struct task_struct *p);
 
 #define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWPNn"
 
-extern char ___assert_task_state[1 - 2*!!(
-		sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1)];
-
 /* Convenience macros for the sake of set_current_state */
 #define TASK_KILLABLE		(TASK_WAKEKILL | TASK_UNINTERRUPTIBLE)
 #define TASK_STOPPED		(TASK_WAKEKILL | __TASK_STOPPED)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index bbfb917a9b4998..7d76ccb79e91e4 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5233,6 +5233,9 @@ void sched_show_task(struct task_struct *p)
 	int ppid;
 	unsigned long state = p->state;
 
+	/* Make sure the string lines up properly with the number of task states: */
+	BUILD_BUG_ON(sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1);
+
 	if (!try_get_task_stack(p))
 		return;
 	if (state)

From 59ddbcb2f45b958cf1f11f122b666cbcf50cd57b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 23:37:48 +0100
Subject: [PATCH 0495/1911] sched/core: Move the get_preempt_disable_ip()
 inline to sched/core.c

It's defined in <linux/sched.h>, but nothing outside the scheduler
uses it - so move it to the sched/core.c usage site.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 9 ---------
 kernel/sched/core.c   | 9 +++++++++
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index c204613396cd3c..df42cac04243f2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -3419,15 +3419,6 @@ static inline void cond_resched_rcu(void)
 #endif
 }
 
-static inline unsigned long get_preempt_disable_ip(struct task_struct *p)
-{
-#ifdef CONFIG_DEBUG_PREEMPT
-	return p->preempt_disable_ip;
-#else
-	return 0;
-#endif
-}
-
 /*
  * Does a critical section need to be broken due to another
  * task waiting?: (technically does not depend on CONFIG_PREEMPT,
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 7d76ccb79e91e4..2acdf19c5f7c2a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3211,6 +3211,15 @@ static inline void preempt_latency_start(int val) { }
 static inline void preempt_latency_stop(int val) { }
 #endif
 
+static inline unsigned long get_preempt_disable_ip(struct task_struct *p)
+{
+#ifdef CONFIG_DEBUG_PREEMPT
+	return p->preempt_disable_ip;
+#else
+	return 0;
+#endif
+}
+
 /*
  * Print scheduling while atomic bug:
  */

From 0c98d344fe5c27f6e4bce42ac503e9e9a51c7d1d Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Feb 2017 15:38:10 +0100
Subject: [PATCH 0496/1911] sched/core: Remove the tsk_cpus_allowed() wrapper

So the original intention of tsk_cpus_allowed() was to 'future-proof'
the field - but it's pretty ineffectual at that, because half of
the code uses ->cpus_allowed directly ...

Also, the wrapper makes the code longer than the original expression!

So just get rid of it. This also shrinks <linux/sched.h> a bit.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/powerpc/kernel/smp.c                  |  2 +-
 arch/powerpc/platforms/cell/spufs/sched.c  |  2 +-
 arch/sparc/kernel/sysfs.c                  |  2 +-
 drivers/cpufreq/sparc-us2e-cpufreq.c       |  4 ++--
 drivers/cpufreq/sparc-us3-cpufreq.c        |  4 ++--
 drivers/infiniband/hw/hfi1/affinity.c      |  2 +-
 drivers/infiniband/hw/hfi1/sdma.c          |  2 +-
 include/linux/sched.h                      |  3 ---
 kernel/sched/core.c                        | 20 ++++++++---------
 kernel/sched/cpudeadline.c                 |  4 ++--
 kernel/sched/cpupri.c                      |  4 ++--
 kernel/sched/deadline.c                    |  7 +++---
 kernel/sched/fair.c                        | 25 +++++++++++-----------
 kernel/sched/rt.c                          |  5 ++---
 lib/smp_processor_id.c                     |  2 +-
 samples/trace_events/trace-events-sample.c |  2 +-
 16 files changed, 42 insertions(+), 48 deletions(-)

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 573fb3a461b5d7..21fdf02583fe57 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -795,7 +795,7 @@ void __init smp_cpus_done(unsigned int max_cpus)
 	 * se we pin us down to CPU 0 for a short while
 	 */
 	alloc_cpumask_var(&old_mask, GFP_NOWAIT);
-	cpumask_copy(old_mask, tsk_cpus_allowed(current));
+	cpumask_copy(old_mask, &current->cpus_allowed);
 	set_cpus_allowed_ptr(current, cpumask_of(boot_cpuid));
 	
 	if (smp_ops && smp_ops->setup_cpu)
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index 460f5f31d5cb01..9b543df210fb37 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -140,7 +140,7 @@ void __spu_update_sched_info(struct spu_context *ctx)
 	 * runqueue. The context will be rescheduled on the proper node
 	 * if it is timesliced or preempted.
 	 */
-	cpumask_copy(&ctx->cpus_allowed, tsk_cpus_allowed(current));
+	cpumask_copy(&ctx->cpus_allowed, &current->cpus_allowed);
 
 	/* Save the current cpu id for spu interrupt routing. */
 	ctx->last_ran = raw_smp_processor_id();
diff --git a/arch/sparc/kernel/sysfs.c b/arch/sparc/kernel/sysfs.c
index 4808b6d234551b..d63fc613e7a9c7 100644
--- a/arch/sparc/kernel/sysfs.c
+++ b/arch/sparc/kernel/sysfs.c
@@ -106,7 +106,7 @@ static unsigned long run_on_cpu(unsigned long cpu,
 	cpumask_t old_affinity;
 	unsigned long ret;
 
-	cpumask_copy(&old_affinity, tsk_cpus_allowed(current));
+	cpumask_copy(&old_affinity, &current->cpus_allowed);
 	/* should return -EINVAL to userspace */
 	if (set_cpus_allowed_ptr(current, cpumask_of(cpu)))
 		return 0;
diff --git a/drivers/cpufreq/sparc-us2e-cpufreq.c b/drivers/cpufreq/sparc-us2e-cpufreq.c
index b73feeb666f9f9..35ddb6da93aaf8 100644
--- a/drivers/cpufreq/sparc-us2e-cpufreq.c
+++ b/drivers/cpufreq/sparc-us2e-cpufreq.c
@@ -234,7 +234,7 @@ static unsigned int us2e_freq_get(unsigned int cpu)
 	cpumask_t cpus_allowed;
 	unsigned long clock_tick, estar;
 
-	cpumask_copy(&cpus_allowed, tsk_cpus_allowed(current));
+	cpumask_copy(&cpus_allowed, &current->cpus_allowed);
 	set_cpus_allowed_ptr(current, cpumask_of(cpu));
 
 	clock_tick = sparc64_get_clock_tick(cpu) / 1000;
@@ -252,7 +252,7 @@ static int us2e_freq_target(struct cpufreq_policy *policy, unsigned int index)
 	unsigned long clock_tick, divisor, old_divisor, estar;
 	cpumask_t cpus_allowed;
 
-	cpumask_copy(&cpus_allowed, tsk_cpus_allowed(current));
+	cpumask_copy(&cpus_allowed, &current->cpus_allowed);
 	set_cpus_allowed_ptr(current, cpumask_of(cpu));
 
 	new_freq = clock_tick = sparc64_get_clock_tick(cpu) / 1000;
diff --git a/drivers/cpufreq/sparc-us3-cpufreq.c b/drivers/cpufreq/sparc-us3-cpufreq.c
index 9bb42ba50efaf9..a8d86a449ca11f 100644
--- a/drivers/cpufreq/sparc-us3-cpufreq.c
+++ b/drivers/cpufreq/sparc-us3-cpufreq.c
@@ -82,7 +82,7 @@ static unsigned int us3_freq_get(unsigned int cpu)
 	unsigned long reg;
 	unsigned int ret;
 
-	cpumask_copy(&cpus_allowed, tsk_cpus_allowed(current));
+	cpumask_copy(&cpus_allowed, &current->cpus_allowed);
 	set_cpus_allowed_ptr(current, cpumask_of(cpu));
 
 	reg = read_safari_cfg();
@@ -99,7 +99,7 @@ static int us3_freq_target(struct cpufreq_policy *policy, unsigned int index)
 	unsigned long new_bits, new_freq, reg;
 	cpumask_t cpus_allowed;
 
-	cpumask_copy(&cpus_allowed, tsk_cpus_allowed(current));
+	cpumask_copy(&cpus_allowed, &current->cpus_allowed);
 	set_cpus_allowed_ptr(current, cpumask_of(cpu));
 
 	new_freq = sparc64_get_clock_tick(cpu) / 1000;
diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c
index 7a3d906b36710f..e2cd2cd3b28a88 100644
--- a/drivers/infiniband/hw/hfi1/affinity.c
+++ b/drivers/infiniband/hw/hfi1/affinity.c
@@ -576,7 +576,7 @@ int hfi1_get_proc_affinity(int node)
 	struct hfi1_affinity_node *entry;
 	cpumask_var_t diff, hw_thread_mask, available_mask, intrs_mask;
 	const struct cpumask *node_mask,
-		*proc_mask = tsk_cpus_allowed(current);
+		*proc_mask = &current->cpus_allowed;
 	struct hfi1_affinity_node_list *affinity = &node_affinity;
 	struct cpu_mask_set *set = &affinity->proc;
 
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index 1d81cac1fa6c83..5cde1ecda0fea8 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -856,7 +856,7 @@ struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd,
 {
 	struct sdma_rht_node *rht_node;
 	struct sdma_engine *sde = NULL;
-	const struct cpumask *current_mask = tsk_cpus_allowed(current);
+	const struct cpumask *current_mask = &current->cpus_allowed;
 	unsigned long cpu_id;
 
 	/*
diff --git a/include/linux/sched.h b/include/linux/sched.h
index df42cac04243f2..6d1cc20cc477d0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1995,9 +1995,6 @@ static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t)
 }
 #endif
 
-/* Future-safe accessor for struct task_struct's cpus_allowed. */
-#define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
-
 static inline int tsk_nr_cpus_allowed(struct task_struct *p)
 {
 	return p->nr_cpus_allowed;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2acdf19c5f7c2a..ef5bbf760a08b0 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -981,7 +981,7 @@ static struct rq *__migrate_task(struct rq *rq, struct task_struct *p, int dest_
 		return rq;
 
 	/* Affinity changed (again). */
-	if (!cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p)))
+	if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
 		return rq;
 
 	rq = move_queued_task(rq, p, dest_cpu);
@@ -1259,10 +1259,10 @@ static int migrate_swap_stop(void *data)
 	if (task_cpu(arg->src_task) != arg->src_cpu)
 		goto unlock;
 
-	if (!cpumask_test_cpu(arg->dst_cpu, tsk_cpus_allowed(arg->src_task)))
+	if (!cpumask_test_cpu(arg->dst_cpu, &arg->src_task->cpus_allowed))
 		goto unlock;
 
-	if (!cpumask_test_cpu(arg->src_cpu, tsk_cpus_allowed(arg->dst_task)))
+	if (!cpumask_test_cpu(arg->src_cpu, &arg->dst_task->cpus_allowed))
 		goto unlock;
 
 	__migrate_swap_task(arg->src_task, arg->dst_cpu);
@@ -1303,10 +1303,10 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p)
 	if (!cpu_active(arg.src_cpu) || !cpu_active(arg.dst_cpu))
 		goto out;
 
-	if (!cpumask_test_cpu(arg.dst_cpu, tsk_cpus_allowed(arg.src_task)))
+	if (!cpumask_test_cpu(arg.dst_cpu, &arg.src_task->cpus_allowed))
 		goto out;
 
-	if (!cpumask_test_cpu(arg.src_cpu, tsk_cpus_allowed(arg.dst_task)))
+	if (!cpumask_test_cpu(arg.src_cpu, &arg.dst_task->cpus_allowed))
 		goto out;
 
 	trace_sched_swap_numa(cur, arg.src_cpu, p, arg.dst_cpu);
@@ -1490,14 +1490,14 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
 		for_each_cpu(dest_cpu, nodemask) {
 			if (!cpu_active(dest_cpu))
 				continue;
-			if (cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p)))
+			if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
 				return dest_cpu;
 		}
 	}
 
 	for (;;) {
 		/* Any allowed, online CPU? */
-		for_each_cpu(dest_cpu, tsk_cpus_allowed(p)) {
+		for_each_cpu(dest_cpu, &p->cpus_allowed) {
 			if (!(p->flags & PF_KTHREAD) && !cpu_active(dest_cpu))
 				continue;
 			if (!cpu_online(dest_cpu))
@@ -1552,7 +1552,7 @@ int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
 	if (tsk_nr_cpus_allowed(p) > 1)
 		cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags);
 	else
-		cpu = cpumask_any(tsk_cpus_allowed(p));
+		cpu = cpumask_any(&p->cpus_allowed);
 
 	/*
 	 * In order not to call set_task_cpu() on a blocking task we need
@@ -1564,7 +1564,7 @@ int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
 	 * [ this allows ->select_task() to simply return task_cpu(p) and
 	 *   not worry about this generic constraint ]
 	 */
-	if (unlikely(!cpumask_test_cpu(cpu, tsk_cpus_allowed(p)) ||
+	if (unlikely(!cpumask_test_cpu(cpu, &p->cpus_allowed) ||
 		     !cpu_online(cpu)))
 		cpu = select_fallback_rq(task_cpu(p), p);
 
@@ -5473,7 +5473,7 @@ int migrate_task_to(struct task_struct *p, int target_cpu)
 	if (curr_cpu == target_cpu)
 		return 0;
 
-	if (!cpumask_test_cpu(target_cpu, tsk_cpus_allowed(p)))
+	if (!cpumask_test_cpu(target_cpu, &p->cpus_allowed))
 		return -EINVAL;
 
 	/* TODO: This is not properly updating schedstats */
diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c
index e73119013c5318..fba235c7d02679 100644
--- a/kernel/sched/cpudeadline.c
+++ b/kernel/sched/cpudeadline.c
@@ -128,10 +128,10 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p,
 	const struct sched_dl_entity *dl_se = &p->dl;
 
 	if (later_mask &&
-	    cpumask_and(later_mask, cp->free_cpus, tsk_cpus_allowed(p))) {
+	    cpumask_and(later_mask, cp->free_cpus, &p->cpus_allowed)) {
 		best_cpu = cpumask_any(later_mask);
 		goto out;
-	} else if (cpumask_test_cpu(cpudl_maximum(cp), tsk_cpus_allowed(p)) &&
+	} else if (cpumask_test_cpu(cpudl_maximum(cp), &p->cpus_allowed) &&
 			dl_time_before(dl_se->deadline, cp->elements[0].dl)) {
 		best_cpu = cpudl_maximum(cp);
 		if (later_mask)
diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c
index 11e9705bf9378d..981fcd7dc394eb 100644
--- a/kernel/sched/cpupri.c
+++ b/kernel/sched/cpupri.c
@@ -103,11 +103,11 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p,
 		if (skip)
 			continue;
 
-		if (cpumask_any_and(tsk_cpus_allowed(p), vec->mask) >= nr_cpu_ids)
+		if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids)
 			continue;
 
 		if (lowest_mask) {
-			cpumask_and(lowest_mask, tsk_cpus_allowed(p), vec->mask);
+			cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask);
 
 			/*
 			 * We have to ensure that we have at least one bit
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 27737f34757d38..8e4d6e4e3ccc72 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -252,7 +252,7 @@ static struct rq *dl_task_offline_migration(struct rq *rq, struct task_struct *p
 		 * If we cannot preempt any rq, fall back to pick any
 		 * online cpu.
 		 */
-		cpu = cpumask_any_and(cpu_active_mask, tsk_cpus_allowed(p));
+		cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed);
 		if (cpu >= nr_cpu_ids) {
 			/*
 			 * Fail to find any suitable cpu.
@@ -1235,7 +1235,7 @@ static void set_curr_task_dl(struct rq *rq)
 static int pick_dl_task(struct rq *rq, struct task_struct *p, int cpu)
 {
 	if (!task_running(rq, p) &&
-	    cpumask_test_cpu(cpu, tsk_cpus_allowed(p)))
+	    cpumask_test_cpu(cpu, &p->cpus_allowed))
 		return 1;
 	return 0;
 }
@@ -1384,8 +1384,7 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
 		/* Retry if something changed. */
 		if (double_lock_balance(rq, later_rq)) {
 			if (unlikely(task_rq(task) != rq ||
-				     !cpumask_test_cpu(later_rq->cpu,
-						       tsk_cpus_allowed(task)) ||
+				     !cpumask_test_cpu(later_rq->cpu, &task->cpus_allowed) ||
 				     task_running(rq, task) ||
 				     !dl_task(task) ||
 				     !task_on_rq_queued(task))) {
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 274c747a01ce48..3b60d73ab290dd 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1551,7 +1551,7 @@ static void task_numa_compare(struct task_numa_env *env,
 	 */
 	if (cur) {
 		/* Skip this swap candidate if cannot move to the source cpu */
-		if (!cpumask_test_cpu(env->src_cpu, tsk_cpus_allowed(cur)))
+		if (!cpumask_test_cpu(env->src_cpu, &cur->cpus_allowed))
 			goto unlock;
 
 		/*
@@ -1661,7 +1661,7 @@ static void task_numa_find_cpu(struct task_numa_env *env,
 
 	for_each_cpu(cpu, cpumask_of_node(env->dst_nid)) {
 		/* Skip this CPU if the source task cannot migrate */
-		if (!cpumask_test_cpu(cpu, tsk_cpus_allowed(env->p)))
+		if (!cpumask_test_cpu(cpu, &env->p->cpus_allowed))
 			continue;
 
 		env->dst_cpu = cpu;
@@ -5458,7 +5458,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
 
 		/* Skip over this group if it has no CPUs allowed */
 		if (!cpumask_intersects(sched_group_cpus(group),
-					tsk_cpus_allowed(p)))
+					&p->cpus_allowed))
 			continue;
 
 		local_group = cpumask_test_cpu(this_cpu,
@@ -5578,7 +5578,7 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
 		return cpumask_first(sched_group_cpus(group));
 
 	/* Traverse only the allowed CPUs */
-	for_each_cpu_and(i, sched_group_cpus(group), tsk_cpus_allowed(p)) {
+	for_each_cpu_and(i, sched_group_cpus(group), &p->cpus_allowed) {
 		if (idle_cpu(i)) {
 			struct rq *rq = cpu_rq(i);
 			struct cpuidle_state *idle = idle_get_state(rq);
@@ -5717,7 +5717,7 @@ static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int
 	if (!test_idle_cores(target, false))
 		return -1;
 
-	cpumask_and(cpus, sched_domain_span(sd), tsk_cpus_allowed(p));
+	cpumask_and(cpus, sched_domain_span(sd), &p->cpus_allowed);
 
 	for_each_cpu_wrap(core, cpus, target, wrap) {
 		bool idle = true;
@@ -5751,7 +5751,7 @@ static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int t
 		return -1;
 
 	for_each_cpu(cpu, cpu_smt_mask(target)) {
-		if (!cpumask_test_cpu(cpu, tsk_cpus_allowed(p)))
+		if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
 			continue;
 		if (idle_cpu(cpu))
 			return cpu;
@@ -5803,7 +5803,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
 	time = local_clock();
 
 	for_each_cpu_wrap(cpu, sched_domain_span(sd), target, wrap) {
-		if (!cpumask_test_cpu(cpu, tsk_cpus_allowed(p)))
+		if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
 			continue;
 		if (idle_cpu(cpu))
 			break;
@@ -5958,7 +5958,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
 	if (sd_flag & SD_BALANCE_WAKE) {
 		record_wakee(p);
 		want_affine = !wake_wide(p) && !wake_cap(p, cpu, prev_cpu)
-			      && cpumask_test_cpu(cpu, tsk_cpus_allowed(p));
+			      && cpumask_test_cpu(cpu, &p->cpus_allowed);
 	}
 
 	rcu_read_lock();
@@ -6698,7 +6698,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
 	if (throttled_lb_pair(task_group(p), env->src_cpu, env->dst_cpu))
 		return 0;
 
-	if (!cpumask_test_cpu(env->dst_cpu, tsk_cpus_allowed(p))) {
+	if (!cpumask_test_cpu(env->dst_cpu, &p->cpus_allowed)) {
 		int cpu;
 
 		schedstat_inc(p->se.statistics.nr_failed_migrations_affine);
@@ -6718,7 +6718,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
 
 		/* Prevent to re-select dst_cpu via env's cpus */
 		for_each_cpu_and(cpu, env->dst_grpmask, env->cpus) {
-			if (cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) {
+			if (cpumask_test_cpu(cpu, &p->cpus_allowed)) {
 				env->flags |= LBF_DST_PINNED;
 				env->new_dst_cpu = cpu;
 				break;
@@ -7252,7 +7252,7 @@ check_cpu_capacity(struct rq *rq, struct sched_domain *sd)
 
 /*
  * Group imbalance indicates (and tries to solve) the problem where balancing
- * groups is inadequate due to tsk_cpus_allowed() constraints.
+ * groups is inadequate due to ->cpus_allowed constraints.
  *
  * Imagine a situation of two groups of 4 cpus each and 4 tasks each with a
  * cpumask covering 1 cpu of the first group and 3 cpus of the second group.
@@ -8211,8 +8211,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
 			 * if the curr task on busiest cpu can't be
 			 * moved to this_cpu
 			 */
-			if (!cpumask_test_cpu(this_cpu,
-					tsk_cpus_allowed(busiest->curr))) {
+			if (!cpumask_test_cpu(this_cpu, &busiest->curr->cpus_allowed)) {
 				raw_spin_unlock_irqrestore(&busiest->lock,
 							    flags);
 				env.flags |= LBF_ALL_PINNED;
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index e8836cfc4cdbee..cbd356f6388346 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1591,7 +1591,7 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
 static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
 {
 	if (!task_running(rq, p) &&
-	    cpumask_test_cpu(cpu, tsk_cpus_allowed(p)))
+	    cpumask_test_cpu(cpu, &p->cpus_allowed))
 		return 1;
 	return 0;
 }
@@ -1726,8 +1726,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
 			 * Also make sure that it wasn't scheduled on its rq.
 			 */
 			if (unlikely(task_rq(task) != rq ||
-				     !cpumask_test_cpu(lowest_rq->cpu,
-						       tsk_cpus_allowed(task)) ||
+				     !cpumask_test_cpu(lowest_rq->cpu, &task->cpus_allowed) ||
 				     task_running(rq, task) ||
 				     !rt_task(task) ||
 				     !task_on_rq_queued(task))) {
diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c
index 1afec32de6f21c..690d75b132fa7c 100644
--- a/lib/smp_processor_id.c
+++ b/lib/smp_processor_id.c
@@ -22,7 +22,7 @@ notrace static unsigned int check_preemption_disabled(const char *what1,
 	 * Kernel threads bound to a single CPU can safely use
 	 * smp_processor_id():
 	 */
-	if (cpumask_equal(tsk_cpus_allowed(current), cpumask_of(this_cpu)))
+	if (cpumask_equal(&current->cpus_allowed, cpumask_of(this_cpu)))
 		goto out;
 
 	/*
diff --git a/samples/trace_events/trace-events-sample.c b/samples/trace_events/trace-events-sample.c
index 30e282d33d4dc5..bc7fcf010a5b4c 100644
--- a/samples/trace_events/trace-events-sample.c
+++ b/samples/trace_events/trace-events-sample.c
@@ -33,7 +33,7 @@ static void simple_thread_func(int cnt)
 
 	/* Silly tracepoints */
 	trace_foo_bar("hello", cnt, array, random_strings[len],
-		      tsk_cpus_allowed(current));
+		      &current->cpus_allowed);
 
 	trace_foo_with_template_simple("HELLO", cnt);
 

From 4b53a3412d6663214ce9c754eff9373a9cff9dee Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Feb 2017 15:41:03 +0100
Subject: [PATCH 0497/1911] sched/core: Remove the tsk_nr_cpus_allowed()
 wrapper

tsk_nr_cpus_allowed() too is a pretty pointless wrapper that
is not used consistently and which makes the code both harder
to read and longer as well.

So remove it - this also shrinks <linux/sched.h> a bit.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h   |  5 -----
 kernel/sched/core.c     |  2 +-
 kernel/sched/deadline.c | 28 ++++++++++++++--------------
 kernel/sched/rt.c       | 24 ++++++++++++------------
 4 files changed, 27 insertions(+), 32 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6d1cc20cc477d0..e732881517f291 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1995,11 +1995,6 @@ static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t)
 }
 #endif
 
-static inline int tsk_nr_cpus_allowed(struct task_struct *p)
-{
-	return p->nr_cpus_allowed;
-}
-
 #define TNF_MIGRATED	0x01
 #define TNF_NO_GROUP	0x02
 #define TNF_SHARED	0x04
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index ef5bbf760a08b0..2d51ec65dc7349 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1549,7 +1549,7 @@ int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
 {
 	lockdep_assert_held(&p->pi_lock);
 
-	if (tsk_nr_cpus_allowed(p) > 1)
+	if (p->nr_cpus_allowed > 1)
 		cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags);
 	else
 		cpu = cpumask_any(&p->cpus_allowed);
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 8e4d6e4e3ccc72..99b2c33a9fbcb4 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -134,7 +134,7 @@ static void inc_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
 {
 	struct task_struct *p = dl_task_of(dl_se);
 
-	if (tsk_nr_cpus_allowed(p) > 1)
+	if (p->nr_cpus_allowed > 1)
 		dl_rq->dl_nr_migratory++;
 
 	update_dl_migration(dl_rq);
@@ -144,7 +144,7 @@ static void dec_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
 {
 	struct task_struct *p = dl_task_of(dl_se);
 
-	if (tsk_nr_cpus_allowed(p) > 1)
+	if (p->nr_cpus_allowed > 1)
 		dl_rq->dl_nr_migratory--;
 
 	update_dl_migration(dl_rq);
@@ -958,7 +958,7 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
 
 	enqueue_dl_entity(&p->dl, pi_se, flags);
 
-	if (!task_current(rq, p) && tsk_nr_cpus_allowed(p) > 1)
+	if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
 		enqueue_pushable_dl_task(rq, p);
 }
 
@@ -1032,9 +1032,9 @@ select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags)
 	 * try to make it stay here, it might be important.
 	 */
 	if (unlikely(dl_task(curr)) &&
-	    (tsk_nr_cpus_allowed(curr) < 2 ||
+	    (curr->nr_cpus_allowed < 2 ||
 	     !dl_entity_preempt(&p->dl, &curr->dl)) &&
-	    (tsk_nr_cpus_allowed(p) > 1)) {
+	    (p->nr_cpus_allowed > 1)) {
 		int target = find_later_rq(p);
 
 		if (target != -1 &&
@@ -1055,7 +1055,7 @@ static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
 	 * Current can't be migrated, useless to reschedule,
 	 * let's hope p can move out.
 	 */
-	if (tsk_nr_cpus_allowed(rq->curr) == 1 ||
+	if (rq->curr->nr_cpus_allowed == 1 ||
 	    cpudl_find(&rq->rd->cpudl, rq->curr, NULL) == -1)
 		return;
 
@@ -1063,7 +1063,7 @@ static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
 	 * p is migratable, so let's not schedule it and
 	 * see if it is pushed or pulled somewhere else.
 	 */
-	if (tsk_nr_cpus_allowed(p) != 1 &&
+	if (p->nr_cpus_allowed != 1 &&
 	    cpudl_find(&rq->rd->cpudl, p, NULL) != -1)
 		return;
 
@@ -1178,7 +1178,7 @@ static void put_prev_task_dl(struct rq *rq, struct task_struct *p)
 {
 	update_curr_dl(rq);
 
-	if (on_dl_rq(&p->dl) && tsk_nr_cpus_allowed(p) > 1)
+	if (on_dl_rq(&p->dl) && p->nr_cpus_allowed > 1)
 		enqueue_pushable_dl_task(rq, p);
 }
 
@@ -1279,7 +1279,7 @@ static int find_later_rq(struct task_struct *task)
 	if (unlikely(!later_mask))
 		return -1;
 
-	if (tsk_nr_cpus_allowed(task) == 1)
+	if (task->nr_cpus_allowed == 1)
 		return -1;
 
 	/*
@@ -1424,7 +1424,7 @@ static struct task_struct *pick_next_pushable_dl_task(struct rq *rq)
 
 	BUG_ON(rq->cpu != task_cpu(p));
 	BUG_ON(task_current(rq, p));
-	BUG_ON(tsk_nr_cpus_allowed(p) <= 1);
+	BUG_ON(p->nr_cpus_allowed <= 1);
 
 	BUG_ON(!task_on_rq_queued(p));
 	BUG_ON(!dl_task(p));
@@ -1463,7 +1463,7 @@ static int push_dl_task(struct rq *rq)
 	 */
 	if (dl_task(rq->curr) &&
 	    dl_time_before(next_task->dl.deadline, rq->curr->dl.deadline) &&
-	    tsk_nr_cpus_allowed(rq->curr) > 1) {
+	    rq->curr->nr_cpus_allowed > 1) {
 		resched_curr(rq);
 		return 0;
 	}
@@ -1610,9 +1610,9 @@ static void task_woken_dl(struct rq *rq, struct task_struct *p)
 {
 	if (!task_running(rq, p) &&
 	    !test_tsk_need_resched(rq->curr) &&
-	    tsk_nr_cpus_allowed(p) > 1 &&
+	    p->nr_cpus_allowed > 1 &&
 	    dl_task(rq->curr) &&
-	    (tsk_nr_cpus_allowed(rq->curr) < 2 ||
+	    (rq->curr->nr_cpus_allowed < 2 ||
 	     !dl_entity_preempt(&p->dl, &rq->curr->dl))) {
 		push_dl_tasks(rq);
 	}
@@ -1726,7 +1726,7 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p)
 
 	if (rq->curr != p) {
 #ifdef CONFIG_SMP
-		if (tsk_nr_cpus_allowed(p) > 1 && rq->dl.overloaded)
+		if (p->nr_cpus_allowed > 1 && rq->dl.overloaded)
 			queue_push_tasks(rq);
 #endif
 		if (dl_task(rq->curr))
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index cbd356f6388346..9f3e40226dec87 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -335,7 +335,7 @@ static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
 	rt_rq = &rq_of_rt_rq(rt_rq)->rt;
 
 	rt_rq->rt_nr_total++;
-	if (tsk_nr_cpus_allowed(p) > 1)
+	if (p->nr_cpus_allowed > 1)
 		rt_rq->rt_nr_migratory++;
 
 	update_rt_migration(rt_rq);
@@ -352,7 +352,7 @@ static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
 	rt_rq = &rq_of_rt_rq(rt_rq)->rt;
 
 	rt_rq->rt_nr_total--;
-	if (tsk_nr_cpus_allowed(p) > 1)
+	if (p->nr_cpus_allowed > 1)
 		rt_rq->rt_nr_migratory--;
 
 	update_rt_migration(rt_rq);
@@ -1324,7 +1324,7 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
 
 	enqueue_rt_entity(rt_se, flags);
 
-	if (!task_current(rq, p) && tsk_nr_cpus_allowed(p) > 1)
+	if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
 		enqueue_pushable_task(rq, p);
 }
 
@@ -1413,7 +1413,7 @@ select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags)
 	 * will have to sort it out.
 	 */
 	if (curr && unlikely(rt_task(curr)) &&
-	    (tsk_nr_cpus_allowed(curr) < 2 ||
+	    (curr->nr_cpus_allowed < 2 ||
 	     curr->prio <= p->prio)) {
 		int target = find_lowest_rq(p);
 
@@ -1437,7 +1437,7 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
 	 * Current can't be migrated, useless to reschedule,
 	 * let's hope p can move out.
 	 */
-	if (tsk_nr_cpus_allowed(rq->curr) == 1 ||
+	if (rq->curr->nr_cpus_allowed == 1 ||
 	    !cpupri_find(&rq->rd->cpupri, rq->curr, NULL))
 		return;
 
@@ -1445,7 +1445,7 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
 	 * p is migratable, so let's not schedule it and
 	 * see if it is pushed or pulled somewhere else.
 	 */
-	if (tsk_nr_cpus_allowed(p) != 1
+	if (p->nr_cpus_allowed != 1
 	    && cpupri_find(&rq->rd->cpupri, p, NULL))
 		return;
 
@@ -1579,7 +1579,7 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
 	 * The previous task needs to be made eligible for pushing
 	 * if it is still active
 	 */
-	if (on_rt_rq(&p->rt) && tsk_nr_cpus_allowed(p) > 1)
+	if (on_rt_rq(&p->rt) && p->nr_cpus_allowed > 1)
 		enqueue_pushable_task(rq, p);
 }
 
@@ -1629,7 +1629,7 @@ static int find_lowest_rq(struct task_struct *task)
 	if (unlikely(!lowest_mask))
 		return -1;
 
-	if (tsk_nr_cpus_allowed(task) == 1)
+	if (task->nr_cpus_allowed == 1)
 		return -1; /* No other targets possible */
 
 	if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask))
@@ -1761,7 +1761,7 @@ static struct task_struct *pick_next_pushable_task(struct rq *rq)
 
 	BUG_ON(rq->cpu != task_cpu(p));
 	BUG_ON(task_current(rq, p));
-	BUG_ON(tsk_nr_cpus_allowed(p) <= 1);
+	BUG_ON(p->nr_cpus_allowed <= 1);
 
 	BUG_ON(!task_on_rq_queued(p));
 	BUG_ON(!rt_task(p));
@@ -2121,9 +2121,9 @@ static void task_woken_rt(struct rq *rq, struct task_struct *p)
 {
 	if (!task_running(rq, p) &&
 	    !test_tsk_need_resched(rq->curr) &&
-	    tsk_nr_cpus_allowed(p) > 1 &&
+	    p->nr_cpus_allowed > 1 &&
 	    (dl_task(rq->curr) || rt_task(rq->curr)) &&
-	    (tsk_nr_cpus_allowed(rq->curr) < 2 ||
+	    (rq->curr->nr_cpus_allowed < 2 ||
 	     rq->curr->prio <= p->prio))
 		push_rt_tasks(rq);
 }
@@ -2196,7 +2196,7 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p)
 	 */
 	if (task_on_rq_queued(p) && rq->curr != p) {
 #ifdef CONFIG_SMP
-		if (tsk_nr_cpus_allowed(p) > 1 && rq->rt.overloaded)
+		if (p->nr_cpus_allowed > 1 && rq->rt.overloaded)
 			queue_push_tasks(rq);
 #endif /* CONFIG_SMP */
 		if (p->prio < rq->curr->prio)

From f9411ebe3d85cbbea06298241e6053d031d281fc Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Mon, 6 Feb 2017 09:50:49 +0100
Subject: [PATCH 0498/1911] rcu: Separate the RCU synchronization types and
 APIs into <linux/rcupdate_wait.h>

So rcupdate.h is a pretty complex header, in particular it includes
<linux/completion.h> which includes <linux/wait.h> - creating a
dependency that includes <linux/wait.h> in <linux/sched.h>,
which prevents the isolation of <linux/sched.h> from the derived
<linux/wait.h> header.

Solve part of the problem by decoupling rcupdate.h from completions:
this can be done by separating out the rcu_synchronize types and APIs,
and updating their usage sites.

Since this is a mostly RCU-internal types this will not just simplify
<linux/sched.h>'s dependencies, but will make all the hundreds of
.c files that include rcupdate.h but not completions or wait.h build
faster.

( For rcutiny this means that two dependent APIs have to be uninlined,
  but that shouldn't be much of a problem as they are rare variants. )

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 fs/autofs4/autofs_i.h         |  1 +
 include/linux/dcache.h        |  1 +
 include/linux/rcupdate.h      | 40 ----------------------------
 include/linux/rcupdate_wait.h | 50 +++++++++++++++++++++++++++++++++++
 include/linux/rcutiny.h       | 11 ++------
 kernel/rcu/srcu.c             |  2 +-
 kernel/rcu/tiny.c             | 14 +++++++++-
 kernel/rcu/tree.c             |  2 +-
 kernel/rcu/update.c           |  1 +
 kernel/sched/core.c           |  1 +
 10 files changed, 71 insertions(+), 52 deletions(-)
 create mode 100644 include/linux/rcupdate_wait.h

diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index c885daae68c8ee..beef981aa54f34 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -14,6 +14,7 @@
 #include <linux/mutex.h>
 #include <linux/spinlock.h>
 #include <linux/list.h>
+#include <linux/completion.h>
 
 /* This is the range of ioctl() numbers we claim as ours */
 #define AUTOFS_IOC_FIRST     AUTOFS_IOC_READY
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 591b6c16f9c12e..d2e38dc6172c06 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -11,6 +11,7 @@
 #include <linux/rcupdate.h>
 #include <linux/lockref.h>
 #include <linux/stringhash.h>
+#include <linux/wait.h>
 
 struct path;
 struct vfsmount;
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 6ade6a52d9d42b..de88b33c097487 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -40,7 +40,6 @@
 #include <linux/cpumask.h>
 #include <linux/seqlock.h>
 #include <linux/lockdep.h>
-#include <linux/completion.h>
 #include <linux/debugobjects.h>
 #include <linux/bug.h>
 #include <linux/compiler.h>
@@ -226,45 +225,6 @@ void call_rcu_sched(struct rcu_head *head,
 
 void synchronize_sched(void);
 
-/*
- * Structure allowing asynchronous waiting on RCU.
- */
-struct rcu_synchronize {
-	struct rcu_head head;
-	struct completion completion;
-};
-void wakeme_after_rcu(struct rcu_head *head);
-
-void __wait_rcu_gp(bool checktiny, int n, call_rcu_func_t *crcu_array,
-		   struct rcu_synchronize *rs_array);
-
-#define _wait_rcu_gp(checktiny, ...) \
-do {									\
-	call_rcu_func_t __crcu_array[] = { __VA_ARGS__ };		\
-	struct rcu_synchronize __rs_array[ARRAY_SIZE(__crcu_array)];	\
-	__wait_rcu_gp(checktiny, ARRAY_SIZE(__crcu_array),		\
-			__crcu_array, __rs_array);			\
-} while (0)
-
-#define wait_rcu_gp(...) _wait_rcu_gp(false, __VA_ARGS__)
-
-/**
- * synchronize_rcu_mult - Wait concurrently for multiple grace periods
- * @...: List of call_rcu() functions for the flavors to wait on.
- *
- * This macro waits concurrently for multiple flavors of RCU grace periods.
- * For example, synchronize_rcu_mult(call_rcu, call_rcu_bh) would wait
- * on concurrent RCU and RCU-bh grace periods.  Waiting on a give SRCU
- * domain requires you to write a wrapper function for that SRCU domain's
- * call_srcu() function, supplying the corresponding srcu_struct.
- *
- * If Tiny RCU, tell _wait_rcu_gp() not to bother waiting for RCU
- * or RCU-bh, given that anywhere synchronize_rcu_mult() can be called
- * is automatically a grace period.
- */
-#define synchronize_rcu_mult(...) \
-	_wait_rcu_gp(IS_ENABLED(CONFIG_TINY_RCU), __VA_ARGS__)
-
 /**
  * call_rcu_tasks() - Queue an RCU for invocation task-based grace period
  * @head: structure to be used for queueing the RCU updates.
diff --git a/include/linux/rcupdate_wait.h b/include/linux/rcupdate_wait.h
new file mode 100644
index 00000000000000..e774b4f5f220da
--- /dev/null
+++ b/include/linux/rcupdate_wait.h
@@ -0,0 +1,50 @@
+#ifndef _LINUX_SCHED_RCUPDATE_WAIT_H
+#define _LINUX_SCHED_RCUPDATE_WAIT_H
+
+/*
+ * RCU synchronization types and methods:
+ */
+
+#include <linux/rcupdate.h>
+#include <linux/completion.h>
+
+/*
+ * Structure allowing asynchronous waiting on RCU.
+ */
+struct rcu_synchronize {
+	struct rcu_head head;
+	struct completion completion;
+};
+void wakeme_after_rcu(struct rcu_head *head);
+
+void __wait_rcu_gp(bool checktiny, int n, call_rcu_func_t *crcu_array,
+		   struct rcu_synchronize *rs_array);
+
+#define _wait_rcu_gp(checktiny, ...) \
+do {									\
+	call_rcu_func_t __crcu_array[] = { __VA_ARGS__ };		\
+	struct rcu_synchronize __rs_array[ARRAY_SIZE(__crcu_array)];	\
+	__wait_rcu_gp(checktiny, ARRAY_SIZE(__crcu_array),		\
+			__crcu_array, __rs_array);			\
+} while (0)
+
+#define wait_rcu_gp(...) _wait_rcu_gp(false, __VA_ARGS__)
+
+/**
+ * synchronize_rcu_mult - Wait concurrently for multiple grace periods
+ * @...: List of call_rcu() functions for the flavors to wait on.
+ *
+ * This macro waits concurrently for multiple flavors of RCU grace periods.
+ * For example, synchronize_rcu_mult(call_rcu, call_rcu_bh) would wait
+ * on concurrent RCU and RCU-bh grace periods.  Waiting on a give SRCU
+ * domain requires you to write a wrapper function for that SRCU domain's
+ * call_srcu() function, supplying the corresponding srcu_struct.
+ *
+ * If Tiny RCU, tell _wait_rcu_gp() not to bother waiting for RCU
+ * or RCU-bh, given that anywhere synchronize_rcu_mult() can be called
+ * is automatically a grace period.
+ */
+#define synchronize_rcu_mult(...) \
+	_wait_rcu_gp(IS_ENABLED(CONFIG_TINY_RCU), __VA_ARGS__)
+
+#endif /* _LINUX_SCHED_RCUPDATE_WAIT_H */
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 4f9b2fa2173d69..b452953e21c8ae 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -53,15 +53,8 @@ static inline void cond_synchronize_sched(unsigned long oldstate)
 	might_sleep();
 }
 
-static inline void rcu_barrier_bh(void)
-{
-	wait_rcu_gp(call_rcu_bh);
-}
-
-static inline void rcu_barrier_sched(void)
-{
-	wait_rcu_gp(call_rcu_sched);
-}
+extern void rcu_barrier_bh(void);
+extern void rcu_barrier_sched(void);
 
 static inline void synchronize_rcu_expedited(void)
 {
diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c
index e773129c8b08d2..ef3bcfb15b39ec 100644
--- a/kernel/rcu/srcu.c
+++ b/kernel/rcu/srcu.c
@@ -30,7 +30,7 @@
 #include <linux/mutex.h>
 #include <linux/percpu.h>
 #include <linux/preempt.h>
-#include <linux/rcupdate.h>
+#include <linux/rcupdate_wait.h>
 #include <linux/sched.h>
 #include <linux/smp.h>
 #include <linux/delay.h>
diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
index fa6a48d3917bf2..6ad330dbbae2ec 100644
--- a/kernel/rcu/tiny.c
+++ b/kernel/rcu/tiny.c
@@ -25,7 +25,7 @@
 #include <linux/completion.h>
 #include <linux/interrupt.h>
 #include <linux/notifier.h>
-#include <linux/rcupdate.h>
+#include <linux/rcupdate_wait.h>
 #include <linux/kernel.h>
 #include <linux/export.h>
 #include <linux/mutex.h>
@@ -47,6 +47,18 @@ static void __call_rcu(struct rcu_head *head,
 
 #include "tiny_plugin.h"
 
+void rcu_barrier_bh(void)
+{
+	wait_rcu_gp(call_rcu_bh);
+}
+EXPORT_SYMBOL(rcu_barrier_bh);
+
+void rcu_barrier_sched(void)
+{
+	wait_rcu_gp(call_rcu_sched);
+}
+EXPORT_SYMBOL(rcu_barrier_sched);
+
 #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE)
 
 /*
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index d80e0d2f68c675..cb62ce23ffc7da 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -32,7 +32,7 @@
 #include <linux/init.h>
 #include <linux/spinlock.h>
 #include <linux/smp.h>
-#include <linux/rcupdate.h>
+#include <linux/rcupdate_wait.h>
 #include <linux/interrupt.h>
 #include <linux/sched.h>
 #include <linux/nmi.h>
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 9e03db9ea9c09c..a0e90e0afc755e 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -49,6 +49,7 @@
 #include <linux/moduleparam.h>
 #include <linux/kthread.h>
 #include <linux/tick.h>
+#include <linux/rcupdate_wait.h>
 
 #define CREATE_TRACE_POINTS
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2d51ec65dc7349..1bd317db98108a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -10,6 +10,7 @@
 #include <linux/delayacct.h>
 #include <linux/init_task.h>
 #include <linux/context_tracking.h>
+#include <linux/rcupdate_wait.h>
 
 #include <linux/blkdev.h>
 #include <linux/kprobes.h>

From 780de9dd2720debc14c501dab4dc80d1f75ad50e Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 11:50:56 +0100
Subject: [PATCH 0499/1911] sched/headers, cgroups: Remove the
 threadgroup_change_*() wrappery

threadgroup_change_begin()/end() is a pointless wrapper around
cgroup_threadgroup_change_begin()/end(), minus a might_sleep()
in the !CONFIG_CGROUPS=y case.

Remove the wrappery, move the might_sleep() (the down_read()
already has a might_sleep() check).

This debloats <linux/sched.h> a bit and simplifies this API.

Update all call sites.

No change in functionality.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 fs/exec.c                   |  6 +++---
 include/linux/cgroup-defs.h | 13 ++++++++-----
 include/linux/sched.h       | 28 ----------------------------
 kernel/cgroup/pids.c        |  2 +-
 kernel/fork.c               |  6 +++---
 kernel/signal.c             |  6 +++---
 6 files changed, 18 insertions(+), 43 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index 698a86094f7672..e595e152958134 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1088,7 +1088,7 @@ static int de_thread(struct task_struct *tsk)
 		struct task_struct *leader = tsk->group_leader;
 
 		for (;;) {
-			threadgroup_change_begin(tsk);
+			cgroup_threadgroup_change_begin(tsk);
 			write_lock_irq(&tasklist_lock);
 			/*
 			 * Do this under tasklist_lock to ensure that
@@ -1099,7 +1099,7 @@ static int de_thread(struct task_struct *tsk)
 				break;
 			__set_current_state(TASK_KILLABLE);
 			write_unlock_irq(&tasklist_lock);
-			threadgroup_change_end(tsk);
+			cgroup_threadgroup_change_end(tsk);
 			schedule();
 			if (unlikely(__fatal_signal_pending(tsk)))
 				goto killed;
@@ -1157,7 +1157,7 @@ static int de_thread(struct task_struct *tsk)
 		if (unlikely(leader->ptrace))
 			__wake_up_parent(leader, leader->parent);
 		write_unlock_irq(&tasklist_lock);
-		threadgroup_change_end(tsk);
+		cgroup_threadgroup_change_end(tsk);
 
 		release_task(leader);
 	}
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 3c02404cfce9b2..6a3f850cababb6 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -531,8 +531,8 @@ extern struct percpu_rw_semaphore cgroup_threadgroup_rwsem;
  * cgroup_threadgroup_change_begin - threadgroup exclusion for cgroups
  * @tsk: target task
  *
- * Called from threadgroup_change_begin() and allows cgroup operations to
- * synchronize against threadgroup changes using a percpu_rw_semaphore.
+ * Allows cgroup operations to synchronize against threadgroup changes
+ * using a percpu_rw_semaphore.
  */
 static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk)
 {
@@ -543,8 +543,7 @@ static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk)
  * cgroup_threadgroup_change_end - threadgroup exclusion for cgroups
  * @tsk: target task
  *
- * Called from threadgroup_change_end().  Counterpart of
- * cgroup_threadcgroup_change_begin().
+ * Counterpart of cgroup_threadcgroup_change_begin().
  */
 static inline void cgroup_threadgroup_change_end(struct task_struct *tsk)
 {
@@ -555,7 +554,11 @@ static inline void cgroup_threadgroup_change_end(struct task_struct *tsk)
 
 #define CGROUP_SUBSYS_COUNT 0
 
-static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk) {}
+static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk)
+{
+	might_sleep();
+}
+
 static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) {}
 
 #endif	/* CONFIG_CGROUPS */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index e732881517f291..3f61baac928b03 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -3162,34 +3162,6 @@ static inline void unlock_task_sighand(struct task_struct *tsk,
 	spin_unlock_irqrestore(&tsk->sighand->siglock, *flags);
 }
 
-/**
- * threadgroup_change_begin - mark the beginning of changes to a threadgroup
- * @tsk: task causing the changes
- *
- * All operations which modify a threadgroup - a new thread joining the
- * group, death of a member thread (the assertion of PF_EXITING) and
- * exec(2) dethreading the process and replacing the leader - are wrapped
- * by threadgroup_change_{begin|end}().  This is to provide a place which
- * subsystems needing threadgroup stability can hook into for
- * synchronization.
- */
-static inline void threadgroup_change_begin(struct task_struct *tsk)
-{
-	might_sleep();
-	cgroup_threadgroup_change_begin(tsk);
-}
-
-/**
- * threadgroup_change_end - mark the end of changes to a threadgroup
- * @tsk: task causing the changes
- *
- * See threadgroup_change_begin().
- */
-static inline void threadgroup_change_end(struct task_struct *tsk)
-{
-	cgroup_threadgroup_change_end(tsk);
-}
-
 #ifdef CONFIG_THREAD_INFO_IN_TASK
 
 static inline struct thread_info *task_thread_info(struct task_struct *task)
diff --git a/kernel/cgroup/pids.c b/kernel/cgroup/pids.c
index 2bd673783f1a95..e756dae493008e 100644
--- a/kernel/cgroup/pids.c
+++ b/kernel/cgroup/pids.c
@@ -214,7 +214,7 @@ static void pids_cancel_attach(struct cgroup_taskset *tset)
 
 /*
  * task_css_check(true) in pids_can_fork() and pids_cancel_fork() relies
- * on threadgroup_change_begin() held by the copy_process().
+ * on cgroup_threadgroup_change_begin() held by the copy_process().
  */
 static int pids_can_fork(struct task_struct *task)
 {
diff --git a/kernel/fork.c b/kernel/fork.c
index 246bf9aaf9dfdd..d043fedc03c819 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1746,7 +1746,7 @@ static __latent_entropy struct task_struct *copy_process(
 	INIT_LIST_HEAD(&p->thread_group);
 	p->task_works = NULL;
 
-	threadgroup_change_begin(current);
+	cgroup_threadgroup_change_begin(current);
 	/*
 	 * Ensure that the cgroup subsystem policies allow the new process to be
 	 * forked. It should be noted the the new process's css_set can be changed
@@ -1843,7 +1843,7 @@ static __latent_entropy struct task_struct *copy_process(
 
 	proc_fork_connector(p);
 	cgroup_post_fork(p);
-	threadgroup_change_end(current);
+	cgroup_threadgroup_change_end(current);
 	perf_event_fork(p);
 
 	trace_task_newtask(p, clone_flags);
@@ -1854,7 +1854,7 @@ static __latent_entropy struct task_struct *copy_process(
 bad_fork_cancel_cgroup:
 	cgroup_cancel_fork(p);
 bad_fork_free_pid:
-	threadgroup_change_end(current);
+	cgroup_threadgroup_change_end(current);
 	if (pid != &init_struct_pid)
 		free_pid(pid);
 bad_fork_cleanup_thread:
diff --git a/kernel/signal.c b/kernel/signal.c
index 214a8feeb77124..bae358532d0aee 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2395,11 +2395,11 @@ void exit_signals(struct task_struct *tsk)
 	 * @tsk is about to have PF_EXITING set - lock out users which
 	 * expect stable threadgroup.
 	 */
-	threadgroup_change_begin(tsk);
+	cgroup_threadgroup_change_begin(tsk);
 
 	if (thread_group_empty(tsk) || signal_group_exit(tsk->signal)) {
 		tsk->flags |= PF_EXITING;
-		threadgroup_change_end(tsk);
+		cgroup_threadgroup_change_end(tsk);
 		return;
 	}
 
@@ -2410,7 +2410,7 @@ void exit_signals(struct task_struct *tsk)
 	 */
 	tsk->flags |= PF_EXITING;
 
-	threadgroup_change_end(tsk);
+	cgroup_threadgroup_change_end(tsk);
 
 	if (!signal_pending(tsk))
 		goto out;

From 314ff7851fc8ea66cbf48eaa93d8ebfb5ca084a9 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 11:03:31 +0100
Subject: [PATCH 0500/1911] mm/vmacache, sched/headers: Introduce 'struct
 vmacache' and move it from <linux/sched.h> to <linux/mm_types>

The <linux/sched.h> header includes various vmacache related defines,
which are arguably misplaced.

Move them to mm_types.h and minimize the sched.h impact by putting
all task vmacache state into a new 'struct vmacache' structure.

No change in functionality.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/mm_types.h  | 12 ++++++++++++
 include/linux/sched.h     | 11 ++++-------
 include/linux/vmacache.h  |  2 +-
 kernel/debug/debug_core.c |  4 ++--
 mm/nommu.c                |  2 +-
 mm/vmacache.c             | 10 +++++-----
 6 files changed, 25 insertions(+), 16 deletions(-)

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 4f6d440ad78551..137797cd7b5052 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -360,6 +360,18 @@ struct vm_area_struct {
 	struct vm_userfaultfd_ctx vm_userfaultfd_ctx;
 };
 
+/*
+ * The per task VMA cache array:
+ */
+#define VMACACHE_BITS 2
+#define VMACACHE_SIZE (1U << VMACACHE_BITS)
+#define VMACACHE_MASK (VMACACHE_SIZE - 1)
+
+struct vmacache {
+	u32 seqnum;
+	struct vm_area_struct *vmas[VMACACHE_SIZE];
+};
+
 struct core_thread {
 	struct task_struct *task;
 	struct core_thread *next;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3f61baac928b03..e87c97e1a94778 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -134,10 +134,6 @@ struct blk_plug;
 struct filename;
 struct nameidata;
 
-#define VMACACHE_BITS 2
-#define VMACACHE_SIZE (1U << VMACACHE_BITS)
-#define VMACACHE_MASK (VMACACHE_SIZE - 1)
-
 /*
  * These are the constant used to fake the fixed-point load-average
  * counting. Some notes:
@@ -1550,9 +1546,10 @@ struct task_struct {
 #endif
 
 	struct mm_struct *mm, *active_mm;
-	/* per-thread vma caching */
-	u32 vmacache_seqnum;
-	struct vm_area_struct *vmacache[VMACACHE_SIZE];
+
+	/* Per-thread vma caching: */
+	struct vmacache vmacache;
+
 #if defined(SPLIT_RSS_COUNTING)
 	struct task_rss_stat	rss_stat;
 #endif
diff --git a/include/linux/vmacache.h b/include/linux/vmacache.h
index c3fa0fd4394995..1081db987391d2 100644
--- a/include/linux/vmacache.h
+++ b/include/linux/vmacache.h
@@ -12,7 +12,7 @@
 
 static inline void vmacache_flush(struct task_struct *tsk)
 {
-	memset(tsk->vmacache, 0, sizeof(tsk->vmacache));
+	memset(tsk->vmacache.vmas, 0, sizeof(tsk->vmacache.vmas));
 }
 
 extern void vmacache_flush_all(struct mm_struct *mm);
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index 79517e5549f119..a603ef28f70c98 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -232,9 +232,9 @@ static void kgdb_flush_swbreak_addr(unsigned long addr)
 		int i;
 
 		for (i = 0; i < VMACACHE_SIZE; i++) {
-			if (!current->vmacache[i])
+			if (!current->vmacache.vmas[i])
 				continue;
-			flush_cache_range(current->vmacache[i],
+			flush_cache_range(current->vmacache.vmas[i],
 					  addr, addr + BREAK_INSTR_SIZE);
 		}
 	}
diff --git a/mm/nommu.c b/mm/nommu.c
index fe9f4fa4a7a741..aae06e854552dd 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -757,7 +757,7 @@ static void delete_vma_from_mm(struct vm_area_struct *vma)
 	mm->map_count--;
 	for (i = 0; i < VMACACHE_SIZE; i++) {
 		/* if the vma is cached, invalidate the entire cache */
-		if (curr->vmacache[i] == vma) {
+		if (curr->vmacache.vmas[i] == vma) {
 			vmacache_invalidate(mm);
 			break;
 		}
diff --git a/mm/vmacache.c b/mm/vmacache.c
index 035fdeb35b43b9..7c233f8e20eef9 100644
--- a/mm/vmacache.c
+++ b/mm/vmacache.c
@@ -60,7 +60,7 @@ static inline bool vmacache_valid_mm(struct mm_struct *mm)
 void vmacache_update(unsigned long addr, struct vm_area_struct *newvma)
 {
 	if (vmacache_valid_mm(newvma->vm_mm))
-		current->vmacache[VMACACHE_HASH(addr)] = newvma;
+		current->vmacache.vmas[VMACACHE_HASH(addr)] = newvma;
 }
 
 static bool vmacache_valid(struct mm_struct *mm)
@@ -71,12 +71,12 @@ static bool vmacache_valid(struct mm_struct *mm)
 		return false;
 
 	curr = current;
-	if (mm->vmacache_seqnum != curr->vmacache_seqnum) {
+	if (mm->vmacache_seqnum != curr->vmacache.seqnum) {
 		/*
 		 * First attempt will always be invalid, initialize
 		 * the new cache for this task here.
 		 */
-		curr->vmacache_seqnum = mm->vmacache_seqnum;
+		curr->vmacache.seqnum = mm->vmacache_seqnum;
 		vmacache_flush(curr);
 		return false;
 	}
@@ -93,7 +93,7 @@ struct vm_area_struct *vmacache_find(struct mm_struct *mm, unsigned long addr)
 		return NULL;
 
 	for (i = 0; i < VMACACHE_SIZE; i++) {
-		struct vm_area_struct *vma = current->vmacache[i];
+		struct vm_area_struct *vma = current->vmacache.vmas[i];
 
 		if (!vma)
 			continue;
@@ -121,7 +121,7 @@ struct vm_area_struct *vmacache_find_exact(struct mm_struct *mm,
 		return NULL;
 
 	for (i = 0; i < VMACACHE_SIZE; i++) {
-		struct vm_area_struct *vma = current->vmacache[i];
+		struct vm_area_struct *vma = current->vmacache.vmas[i];
 
 		if (vma && vma->vm_start == start && vma->vm_end == end) {
 			count_vm_vmacache_event(VMACACHE_FIND_HITS);

From af8601ad420f6afa6445c927ad9f36d9700d96d6 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 09:57:00 +0100
Subject: [PATCH 0501/1911] kasan, sched/headers: Uninline
 kasan_enable/disable_current()

<linux/kasan.h> is a low level header that is included early
in affected kernel headers. But it includes <linux/sched.h>
which complicates the cleanup of sched.h dependencies.

But kasan.h has almost no need for sched.h: its only use of
scheduler functionality is in two inline functions which are
not used very frequently - so uninline kasan_enable_current()
and kasan_disable_current().

Also add a <linux/sched.h> dependency to a .c file that depended
on kasan.h including it.

This paves the way to remove the <linux/sched.h> include from kasan.h.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/kasan.h | 10 ++--------
 lib/sbitmap.c         |  1 +
 mm/kasan/kasan.c      | 10 ++++++++++
 3 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index c908b25bf5a558..7793036eac8062 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -30,16 +30,10 @@ static inline void *kasan_mem_to_shadow(const void *addr)
 }
 
 /* Enable reporting bugs after kasan_disable_current() */
-static inline void kasan_enable_current(void)
-{
-	current->kasan_depth++;
-}
+extern void kasan_enable_current(void);
 
 /* Disable reporting bugs for current task */
-static inline void kasan_disable_current(void)
-{
-	current->kasan_depth--;
-}
+extern void kasan_disable_current(void);
 
 void kasan_unpoison_shadow(const void *address, size_t size);
 
diff --git a/lib/sbitmap.c b/lib/sbitmap.c
index 55e11c4b2f3b8e..60e800e0b5a0d9 100644
--- a/lib/sbitmap.c
+++ b/lib/sbitmap.c
@@ -15,6 +15,7 @@
  * along with this program.  If not, see <https://www.gnu.org/licenses/>.
  */
 
+#include <linux/sched.h>
 #include <linux/random.h>
 #include <linux/sbitmap.h>
 #include <linux/seq_file.h>
diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c
index 25f0e6521f36c6..d99312ed7c2292 100644
--- a/mm/kasan/kasan.c
+++ b/mm/kasan/kasan.c
@@ -39,6 +39,16 @@
 #include "kasan.h"
 #include "../slab.h"
 
+void kasan_enable_current(void)
+{
+	current->kasan_depth++;
+}
+
+void kasan_disable_current(void)
+{
+	current->kasan_depth--;
+}
+
 /*
  * Poisons the shadow memory for 'size' bytes starting from 'addr'.
  * Memory addresses should be aligned to KASAN_SHADOW_SCALE_SIZE.

From 105ab3d8ce7269887d24d224054677125e18037c Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 1 Feb 2017 16:36:40 +0100
Subject: [PATCH 0502/1911] sched/headers: Prepare for new header dependencies
 before moving code to <linux/sched/topology.h>

We are going to split <linux/sched/topology.h> out of <linux/sched.h>, which
will have to be picked up from other headers and a couple of .c files.

Create a trivial placeholder <linux/sched/topology.h> file that just
maps to <linux/sched.h> to make this patch obviously correct and
bisectable.

Include the new header in the files that are going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/arm/kernel/topology.c     | 1 +
 arch/arm64/kernel/topology.c   | 1 +
 arch/powerpc/kernel/smp.c      | 1 +
 arch/s390/kernel/topology.c    | 1 +
 arch/x86/kernel/smpboot.c      | 1 +
 block/blk-mq.c                 | 1 +
 block/blk-softirq.c            | 1 +
 include/linux/cpuset.h         | 1 +
 include/linux/sched/topology.h | 6 ++++++
 kernel/sched/fair.c            | 2 ++
 kernel/sched/sched.h           | 1 +
 11 files changed, 17 insertions(+)
 create mode 100644 include/linux/sched/topology.h

diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c
index ebf47d91b8041f..f8a3ab82e77f51 100644
--- a/arch/arm/kernel/topology.c
+++ b/arch/arm/kernel/topology.c
@@ -21,6 +21,7 @@
 #include <linux/nodemask.h>
 #include <linux/of.h>
 #include <linux/sched.h>
+#include <linux/sched/topology.h>
 #include <linux/slab.h>
 #include <linux/string.h>
 
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index 565dd69888cc57..08243533e5ee62 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -20,6 +20,7 @@
 #include <linux/nodemask.h>
 #include <linux/of.h>
 #include <linux/sched.h>
+#include <linux/sched/topology.h>
 #include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/cpufreq.h>
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 21fdf02583fe57..fce17789c6751a 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -20,6 +20,7 @@
 #include <linux/kernel.h>
 #include <linux/export.h>
 #include <linux/sched.h>
+#include <linux/sched/topology.h>
 #include <linux/smp.h>
 #include <linux/interrupt.h>
 #include <linux/delay.h>
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 2cd5f4f1013c2b..17660e800e74f3 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -13,6 +13,7 @@
 #include <linux/export.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/topology.h>
 #include <linux/delay.h>
 #include <linux/init.h>
 #include <linux/slab.h>
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index a0d38685f7dfdd..fe7a66e6b5a0d2 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -45,6 +45,7 @@
 #include <linux/smp.h>
 #include <linux/export.h>
 #include <linux/sched.h>
+#include <linux/sched/topology.h>
 #include <linux/percpu.h>
 #include <linux/bootmem.h>
 #include <linux/err.h>
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 9e6b064e533979..746c14e0d157c6 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -20,6 +20,7 @@
 #include <linux/cpu.h>
 #include <linux/cache.h>
 #include <linux/sched/sysctl.h>
+#include <linux/sched/topology.h>
 #include <linux/delay.h>
 #include <linux/crash_dump.h>
 #include <linux/prefetch.h>
diff --git a/block/blk-softirq.c b/block/blk-softirq.c
index 06cf9807f49a3b..87b7df4851bffd 100644
--- a/block/blk-softirq.c
+++ b/block/blk-softirq.c
@@ -9,6 +9,7 @@
 #include <linux/interrupt.h>
 #include <linux/cpu.h>
 #include <linux/sched.h>
+#include <linux/sched/topology.h>
 
 #include "blk.h"
 
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index bfc204e70338ab..c608c39cb16141 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -9,6 +9,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/sched/topology.h>
 #include <linux/cpumask.h>
 #include <linux/nodemask.h>
 #include <linux/mm.h>
diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
new file mode 100644
index 00000000000000..4f1159f76f9229
--- /dev/null
+++ b/include/linux/sched/topology.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SCHED_TOPOLOGY_H
+#define _LINUX_SCHED_TOPOLOGY_H
+
+#include <linux/sched.h>
+
+#endif /* _LINUX_SCHED_TOPOLOGY_H */
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 3b60d73ab290dd..11e0ab57748ada 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -21,6 +21,8 @@
  */
 
 #include <linux/sched.h>
+#include <linux/sched/topology.h>
+
 #include <linux/latencytop.h>
 #include <linux/cpumask.h>
 #include <linux/cpuidle.h>
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 71b10a9b73cfe2..9b9cb260d9cf31 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1,6 +1,7 @@
 
 #include <linux/sched.h>
 #include <linux/sched/sysctl.h>
+#include <linux/sched/topology.h>
 #include <linux/sched/rt.h>
 #include <linux/u64_stats_sync.h>
 #include <linux/sched/deadline.h>

From 4c822698cba8bdd93724117eded12bf34eb80252 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 1 Feb 2017 16:36:40 +0100
Subject: [PATCH 0503/1911] sched/headers: Prepare for new header dependencies
 before moving code to <linux/sched/idle.h>

We are going to split  <linux/sched/idle.h> out of <linux/sched.h>, which
will have to be picked up from other headers and a couple of .c files.

Create a trivial placeholder <linux/sched/idle.h> file that just
maps to <linux/sched.h> to make this patch obviously correct and
bisectable.

Include the new header in the files that are going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/mwait.h   | 1 +
 arch/x86/kernel/process.c      | 1 +
 drivers/cpuidle/driver.c       | 1 +
 include/linux/sched/idle.h     | 6 ++++++
 include/linux/sched/topology.h | 2 ++
 kernel/sched/idle.c            | 1 +
 kernel/smp.c                   | 1 +
 7 files changed, 13 insertions(+)
 create mode 100644 include/linux/sched/idle.h

diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index f37f2d8a2989d0..bda3c27f0da06c 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -2,6 +2,7 @@
 #define _ASM_X86_MWAIT_H
 
 #include <linux/sched.h>
+#include <linux/sched/idle.h>
 
 #include <asm/cpufeature.h>
 
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 7780efa635b911..6395e0cd7dd644 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -7,6 +7,7 @@
 #include <linux/prctl.h>
 #include <linux/slab.h>
 #include <linux/sched.h>
+#include <linux/sched/idle.h>
 #include <linux/init.h>
 #include <linux/export.h>
 #include <linux/pm.h>
diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c
index ab264d39323368..e53fb861beb045 100644
--- a/drivers/cpuidle/driver.c
+++ b/drivers/cpuidle/driver.c
@@ -11,6 +11,7 @@
 #include <linux/mutex.h>
 #include <linux/module.h>
 #include <linux/sched.h>
+#include <linux/sched/idle.h>
 #include <linux/cpuidle.h>
 #include <linux/cpumask.h>
 #include <linux/tick.h>
diff --git a/include/linux/sched/idle.h b/include/linux/sched/idle.h
new file mode 100644
index 00000000000000..a3cf79b86986c1
--- /dev/null
+++ b/include/linux/sched/idle.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SCHED_IDLE_H
+#define _LINUX_SCHED_IDLE_H
+
+#include <linux/sched.h>
+
+#endif /* _LINUX_SCHED_IDLE_H */
diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index 4f1159f76f9229..184b56b8aa64ff 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -3,4 +3,6 @@
 
 #include <linux/sched.h>
 
+#include <linux/sched/idle.h>
+
 #endif /* _LINUX_SCHED_TOPOLOGY_H */
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 6a4bae0a649d9a..ac6d5176463dca 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -2,6 +2,7 @@
  * Generic entry point for the idle threads
  */
 #include <linux/sched.h>
+#include <linux/sched/idle.h>
 #include <linux/cpu.h>
 #include <linux/cpuidle.h>
 #include <linux/cpuhotplug.h>
diff --git a/kernel/smp.c b/kernel/smp.c
index 77fcdb9f27756f..a817769b53c0e1 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -17,6 +17,7 @@
 #include <linux/smp.h>
 #include <linux/cpu.h>
 #include <linux/sched.h>
+#include <linux/sched/idle.h>
 #include <linux/hypervisor.h>
 
 #include "smpboot.h"

From 84f001e15737f8214b0f5f0f7dfec0fb1027938f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 1 Feb 2017 16:36:40 +0100
Subject: [PATCH 0504/1911] sched/headers: Prepare for new header dependencies
 before moving code to <linux/sched/wake_q.h>

We are going to split <linux/sched/wake_q.h> out of <linux/sched.h>, which
will have to be picked up from other headers and a couple of .c files.

Create a trivial placeholder <linux/sched/wake_q.h> file that just
maps to <linux/sched.h> to make this patch obviously correct and
bisectable.

Include the new header in the files that are going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/wake_q.h    | 6 ++++++
 ipc/mqueue.c                    | 1 +
 ipc/msg.c                       | 1 +
 ipc/sem.c                       | 1 +
 kernel/futex.c                  | 1 +
 kernel/locking/mutex.c          | 1 +
 kernel/locking/rtmutex.c        | 1 +
 kernel/locking/rtmutex_common.h | 1 +
 kernel/locking/rwsem-xadd.c     | 3 ++-
 kernel/sched/sched.h            | 1 +
 10 files changed, 16 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/sched/wake_q.h

diff --git a/include/linux/sched/wake_q.h b/include/linux/sched/wake_q.h
new file mode 100644
index 00000000000000..6383b35e4ebae6
--- /dev/null
+++ b/include/linux/sched/wake_q.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SCHED_WAKE_Q_H
+#define _LINUX_SCHED_WAKE_Q_H
+
+#include <linux/sched.h>
+
+#endif /* _LINUX_SCHED_WAKE_Q_H */
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 4fdd970314315a..40e448de8a7eb0 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -35,6 +35,7 @@
 #include <linux/ipc_namespace.h>
 #include <linux/user_namespace.h>
 #include <linux/slab.h>
+#include <linux/sched/wake_q.h>
 
 #include <net/sock.h>
 #include "util.h"
diff --git a/ipc/msg.c b/ipc/msg.c
index e3e52ce01123c5..ecc387e573f6d2 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -31,6 +31,7 @@
 #include <linux/list.h>
 #include <linux/security.h>
 #include <linux/sched.h>
+#include <linux/sched/wake_q.h>
 #include <linux/syscalls.h>
 #include <linux/audit.h>
 #include <linux/seq_file.h>
diff --git a/ipc/sem.c b/ipc/sem.c
index e468cd1c12f0d6..947dc2348271f9 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -82,6 +82,7 @@
 #include <linux/rwsem.h>
 #include <linux/nsproxy.h>
 #include <linux/ipc_namespace.h>
+#include <linux/sched/wake_q.h>
 
 #include <linux/uaccess.h>
 #include "util.h"
diff --git a/kernel/futex.c b/kernel/futex.c
index b687cb22301ce0..8ddcf9ea953c98 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -61,6 +61,7 @@
 #include <linux/nsproxy.h>
 #include <linux/ptrace.h>
 #include <linux/sched/rt.h>
+#include <linux/sched/wake_q.h>
 #include <linux/hugetlb.h>
 #include <linux/freezer.h>
 #include <linux/bootmem.h>
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index ad2d9e22697b92..57f6311e24052f 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -21,6 +21,7 @@
 #include <linux/ww_mutex.h>
 #include <linux/sched.h>
 #include <linux/sched/rt.h>
+#include <linux/sched/wake_q.h>
 #include <linux/export.h>
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index d340be3a488f7a..d4f79849136192 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -15,6 +15,7 @@
 #include <linux/sched.h>
 #include <linux/sched/rt.h>
 #include <linux/sched/deadline.h>
+#include <linux/sched/wake_q.h>
 #include <linux/timer.h>
 
 #include "rtmutex_common.h"
diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h
index 990134617b4c02..856dfff5c33ab5 100644
--- a/kernel/locking/rtmutex_common.h
+++ b/kernel/locking/rtmutex_common.h
@@ -13,6 +13,7 @@
 #define __KERNEL_RTMUTEX_COMMON_H
 
 #include <linux/rtmutex.h>
+#include <linux/sched/wake_q.h>
 
 /*
  * This is the control structure for tasks blocked on a rt_mutex,
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index 2ad8d8dc3bb19d..4fe8d8ad439642 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -10,10 +10,11 @@
  * and Davidlohr Bueso <davidlohr@hp.com>. Based on mutexes.
  */
 #include <linux/rwsem.h>
-#include <linux/sched.h>
 #include <linux/init.h>
 #include <linux/export.h>
+#include <linux/sched.h>
 #include <linux/sched/rt.h>
+#include <linux/sched/wake_q.h>
 #include <linux/osq_lock.h>
 
 #include "rwsem.h"
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 9b9cb260d9cf31..683570739f468e 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -3,6 +3,7 @@
 #include <linux/sched/sysctl.h>
 #include <linux/sched/topology.h>
 #include <linux/sched/rt.h>
+#include <linux/sched/wake_q.h>
 #include <linux/u64_stats_sync.h>
 #include <linux/sched/deadline.h>
 #include <linux/kernel_stat.h>

From e601757102cfd3eeae068f53b3bc1234f3a2b2e9 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 1 Feb 2017 16:36:40 +0100
Subject: [PATCH 0505/1911] sched/headers: Prepare for new header dependencies
 before moving code to <linux/sched/clock.h>

We are going to split <linux/sched/clock.h> out of <linux/sched.h>, which
will have to be picked up from other headers and .c files.

Create a trivial placeholder <linux/sched/clock.h> file that just
maps to <linux/sched.h> to make this patch obviously correct and
bisectable.

Include the new header in the files that are going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/arm/mach-bcm/platsmp.c             | 1 +
 arch/arm/mach-omap2/pm-debug.c          | 1 +
 arch/arm/probes/kprobes/test-core.c     | 1 +
 arch/cris/kernel/time.c                 | 2 +-
 arch/ia64/kernel/setup.c                | 1 +
 arch/microblaze/kernel/timer.c          | 1 +
 arch/mn10300/kernel/time.c              | 1 +
 arch/parisc/kernel/setup.c              | 1 +
 arch/parisc/kernel/time.c               | 1 +
 arch/powerpc/kernel/time.c              | 1 +
 arch/s390/kernel/time.c                 | 1 +
 arch/sparc/kernel/ds.c                  | 1 +
 arch/sparc/kernel/viohs.c               | 1 +
 arch/tile/kernel/time.c                 | 1 +
 arch/x86/events/amd/ibs.c               | 1 +
 arch/x86/events/core.c                  | 1 +
 arch/x86/kernel/cpu/amd.c               | 1 +
 arch/x86/kernel/cpu/centaur.c           | 1 +
 arch/x86/kernel/cpu/common.c            | 1 +
 arch/x86/kernel/cpu/cyrix.c             | 1 +
 arch/x86/kernel/cpu/intel.c             | 1 +
 arch/x86/kernel/cpu/transmeta.c         | 1 +
 arch/x86/kernel/kvmclock.c              | 1 +
 arch/x86/kernel/nmi.c                   | 1 +
 arch/x86/kernel/tsc.c                   | 1 +
 block/cfq-iosched.c                     | 1 +
 drivers/acpi/apei/ghes.c                | 1 +
 drivers/clocksource/arm_arch_timer.c    | 1 +
 drivers/clocksource/pxa_timer.c         | 1 +
 drivers/clocksource/timer-digicolor.c   | 1 +
 drivers/cpuidle/cpuidle.c               | 1 +
 drivers/firmware/tegra/bpmp.c           | 1 +
 drivers/gpu/drm/i915/i915_gem_request.c | 2 ++
 drivers/gpu/drm/i915/intel_drv.h        | 1 +
 drivers/md/bcache/bset.c                | 1 +
 drivers/md/bcache/btree.c               | 1 +
 drivers/md/bcache/sysfs.c               | 1 +
 drivers/md/bcache/util.c                | 1 +
 drivers/md/bcache/util.h                | 1 +
 drivers/md/bcache/writeback.c           | 1 +
 drivers/misc/cxl/native.c               | 1 +
 drivers/net/irda/pxaficp_ir.c           | 1 +
 drivers/perf/arm_pmu.c                  | 1 +
 drivers/vhost/net.c                     | 1 +
 include/linux/blkdev.h                  | 1 +
 include/linux/sched/clock.h             | 6 ++++++
 include/linux/skbuff.h                  | 1 +
 include/net/busy_poll.h                 | 1 +
 kernel/events/core.c                    | 1 +
 kernel/locking/lockdep.c                | 1 +
 kernel/locking/qspinlock_stat.h         | 1 +
 kernel/printk/printk.c                  | 1 +
 kernel/sched/clock.c                    | 1 +
 kernel/sched/core.c                     | 1 +
 kernel/sched/sched.h                    | 1 +
 kernel/time/sched_clock.c               | 1 +
 kernel/time/tick-sched.c                | 1 +
 kernel/torture.c                        | 1 +
 kernel/trace/ring_buffer.c              | 1 +
 kernel/trace/trace_clock.c              | 1 +
 kernel/trace/trace_hwlat.c              | 1 +
 kernel/trace/trace_output.c             | 1 +
 kernel/watchdog.c                       | 1 +
 lib/plist.c                             | 1 +
 net/ipv4/tcp_cdg.c                      | 2 ++
 65 files changed, 72 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/sched/clock.h

diff --git a/arch/arm/mach-bcm/platsmp.c b/arch/arm/mach-bcm/platsmp.c
index 582886d0d02f72..9e3f275934eb41 100644
--- a/arch/arm/mach-bcm/platsmp.c
+++ b/arch/arm/mach-bcm/platsmp.c
@@ -21,6 +21,7 @@
 #include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 #include <linux/smp.h>
 
 #include <asm/cacheflush.h>
diff --git a/arch/arm/mach-omap2/pm-debug.c b/arch/arm/mach-omap2/pm-debug.c
index 003a6cb248bec3..5c46ea6756d7b1 100644
--- a/arch/arm/mach-omap2/pm-debug.c
+++ b/arch/arm/mach-omap2/pm-debug.c
@@ -21,6 +21,7 @@
 
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 #include <linux/clk.h>
 #include <linux/err.h>
 #include <linux/io.h>
diff --git a/arch/arm/probes/kprobes/test-core.c b/arch/arm/probes/kprobes/test-core.c
index 9775de22e2ffa3..c893726aa52d8d 100644
--- a/arch/arm/probes/kprobes/test-core.c
+++ b/arch/arm/probes/kprobes/test-core.c
@@ -203,6 +203,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/sched/clock.h>
 #include <linux/kprobes.h>
 #include <linux/errno.h>
 #include <linux/stddef.h>
diff --git a/arch/cris/kernel/time.c b/arch/cris/kernel/time.c
index 2dda6da7152159..bc562cf511a630 100644
--- a/arch/cris/kernel/time.c
+++ b/arch/cris/kernel/time.c
@@ -29,7 +29,7 @@
 #include <linux/timex.h>
 #include <linux/init.h>
 #include <linux/profile.h>
-#include <linux/sched.h>	/* just for sched_clock() - funny that */
+#include <linux/sched/clock.h>
 
 
 #define D(x)
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index d68322966f33ac..32a6cc36296f5d 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -32,6 +32,7 @@
 #include <linux/kernel.h>
 #include <linux/reboot.h>
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 #include <linux/seq_file.h>
 #include <linux/string.h>
 #include <linux/threads.h>
diff --git a/arch/microblaze/kernel/timer.c b/arch/microblaze/kernel/timer.c
index 1d6fad50fa76f6..99906619271553 100644
--- a/arch/microblaze/kernel/timer.c
+++ b/arch/microblaze/kernel/timer.c
@@ -12,6 +12,7 @@
 #include <linux/interrupt.h>
 #include <linux/delay.h>
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 #include <linux/sched_clock.h>
 #include <linux/clk.h>
 #include <linux/clockchips.h>
diff --git a/arch/mn10300/kernel/time.c b/arch/mn10300/kernel/time.c
index 67c6416a58f830..06b83b17c5f197 100644
--- a/arch/mn10300/kernel/time.c
+++ b/arch/mn10300/kernel/time.c
@@ -10,6 +10,7 @@
  * 2 of the Licence, or (at your option) any later version.
  */
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 #include <linux/kernel.h>
 #include <linux/interrupt.h>
 #include <linux/time.h>
diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c
index 068ed3607bac0c..dee6f9d6a153ce 100644
--- a/arch/parisc/kernel/setup.c
+++ b/arch/parisc/kernel/setup.c
@@ -37,6 +37,7 @@
 #include <linux/proc_fs.h>
 #include <linux/export.h>
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 
 #include <asm/processor.h>
 #include <asm/sections.h>
diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c
index 1e22f981cd81fb..89421df7016083 100644
--- a/arch/parisc/kernel/time.c
+++ b/arch/parisc/kernel/time.c
@@ -14,6 +14,7 @@
 #include <linux/module.h>
 #include <linux/rtc.h>
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 #include <linux/sched_clock.h>
 #include <linux/kernel.h>
 #include <linux/param.h>
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index bc84a8d47b9e88..37833c5dc27424 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -34,6 +34,7 @@
 #include <linux/errno.h>
 #include <linux/export.h>
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 #include <linux/kernel.h>
 #include <linux/param.h>
 #include <linux/string.h>
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index de66abb479c9eb..c31da46bc037d3 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -18,6 +18,7 @@
 #include <linux/errno.h>
 #include <linux/export.h>
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 #include <linux/kernel.h>
 #include <linux/param.h>
 #include <linux/string.h>
diff --git a/arch/sparc/kernel/ds.c b/arch/sparc/kernel/ds.c
index f87a55d7709469..b542cc7c8d94d8 100644
--- a/arch/sparc/kernel/ds.c
+++ b/arch/sparc/kernel/ds.c
@@ -9,6 +9,7 @@
 #include <linux/string.h>
 #include <linux/slab.h>
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 #include <linux/delay.h>
 #include <linux/mutex.h>
 #include <linux/kthread.h>
diff --git a/arch/sparc/kernel/viohs.c b/arch/sparc/kernel/viohs.c
index 526fcb5d8ce95d..b30b30ab3ddde2 100644
--- a/arch/sparc/kernel/viohs.c
+++ b/arch/sparc/kernel/viohs.c
@@ -8,6 +8,7 @@
 #include <linux/string.h>
 #include <linux/delay.h>
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 #include <linux/slab.h>
 
 #include <asm/ldc.h>
diff --git a/arch/tile/kernel/time.c b/arch/tile/kernel/time.c
index c9357012b1c892..5bd4e88c7c604a 100644
--- a/arch/tile/kernel/time.c
+++ b/arch/tile/kernel/time.c
@@ -20,6 +20,7 @@
 #include <linux/clockchips.h>
 #include <linux/hardirq.h>
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 #include <linux/smp.h>
 #include <linux/delay.h>
 #include <linux/module.h>
diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
index 496e60391fac68..786fd875de9287 100644
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -12,6 +12,7 @@
 #include <linux/pci.h>
 #include <linux/ptrace.h>
 #include <linux/syscore_ops.h>
+#include <linux/sched/clock.h>
 
 #include <asm/apic.h>
 
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 1635c0c8df23a6..2ecc0e97772b13 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -21,6 +21,7 @@
 #include <linux/init.h>
 #include <linux/kdebug.h>
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 #include <linux/uaccess.h>
 #include <linux/slab.h>
 #include <linux/cpu.h>
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 4e95b2e0d95fed..35a5d5dca2fae5 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -5,6 +5,7 @@
 
 #include <linux/io.h>
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 #include <linux/random.h>
 #include <asm/processor.h>
 #include <asm/apic.h>
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
index 2c234a6d94c448..adc0ebd8bed0e1 100644
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -1,5 +1,6 @@
 
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 
 #include <asm/cpufeature.h>
 #include <asm/e820.h>
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index c64ca5929cb5e0..c7e2ca966386df 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -8,6 +8,7 @@
 #include <linux/ctype.h>
 #include <linux/delay.h>
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 #include <linux/init.h>
 #include <linux/kprobes.h>
 #include <linux/kgdb.h>
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index 47416f959a48e3..0a3bc19de0177e 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -10,6 +10,7 @@
 #include <asm/tsc.h>
 #include <asm/cpufeature.h>
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 
 #include "cpu.h"
 
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 017ecd3bb5536e..fe0a615a051b19 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -4,6 +4,7 @@
 #include <linux/bitops.h>
 #include <linux/smp.h>
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 #include <linux/thread_info.h>
 #include <linux/init.h>
 #include <linux/uaccess.h>
diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c
index c1ea5b99983935..8457b49786686f 100644
--- a/arch/x86/kernel/cpu/transmeta.c
+++ b/arch/x86/kernel/cpu/transmeta.c
@@ -1,5 +1,6 @@
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 #include <linux/mm.h>
 #include <asm/cpufeature.h>
 #include <asm/msr.h>
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index bae6ea6cfb9413..d88967659098b5 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -25,6 +25,7 @@
 #include <linux/hardirq.h>
 #include <linux/memblock.h>
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 
 #include <asm/x86_init.h>
 #include <asm/reboot.h>
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index bfe4d6c96fbd8f..d17b1a940add08 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -20,6 +20,7 @@
 #include <linux/ratelimit.h>
 #include <linux/slab.h>
 #include <linux/export.h>
+#include <linux/sched/clock.h>
 
 #if defined(CONFIG_EDAC)
 #include <linux/edac.h>
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 2724dc82f992ef..46bcda4cb1c2f8 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -2,6 +2,7 @@
 
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 #include <linux/init.h>
 #include <linux/export.h>
 #include <linux/timer.h>
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 13794477785985..440b95ee593c97 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -8,6 +8,7 @@
  */
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/sched/clock.h>
 #include <linux/blkdev.h>
 #include <linux/elevator.h>
 #include <linux/ktime.h>
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index e53bef6cf53c62..b192b42a835105 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -44,6 +44,7 @@
 #include <linux/pci.h>
 #include <linux/aer.h>
 #include <linux/nmi.h>
+#include <linux/sched/clock.h>
 
 #include <acpi/ghes.h>
 #include <acpi/apei.h>
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index 93aa1364376ac8..7a8a4117f123d6 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -24,6 +24,7 @@
 #include <linux/of_address.h>
 #include <linux/io.h>
 #include <linux/slab.h>
+#include <linux/sched/clock.h>
 #include <linux/sched_clock.h>
 #include <linux/acpi.h>
 
diff --git a/drivers/clocksource/pxa_timer.c b/drivers/clocksource/pxa_timer.c
index 9cae38eebec2c7..1c24de215c142a 100644
--- a/drivers/clocksource/pxa_timer.c
+++ b/drivers/clocksource/pxa_timer.c
@@ -19,6 +19,7 @@
 #include <linux/clockchips.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
+#include <linux/sched/clock.h>
 #include <linux/sched_clock.h>
 
 #include <clocksource/pxa.h>
diff --git a/drivers/clocksource/timer-digicolor.c b/drivers/clocksource/timer-digicolor.c
index 10318cc99c0e8f..e9f50d28936290 100644
--- a/drivers/clocksource/timer-digicolor.c
+++ b/drivers/clocksource/timer-digicolor.c
@@ -31,6 +31,7 @@
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/irqreturn.h>
+#include <linux/sched/clock.h>
 #include <linux/sched_clock.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 62810ff3b00f33..548b90be768548 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -12,6 +12,7 @@
 #include <linux/kernel.h>
 #include <linux/mutex.h>
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 #include <linux/notifier.h>
 #include <linux/pm_qos.h>
 #include <linux/cpu.h>
diff --git a/drivers/firmware/tegra/bpmp.c b/drivers/firmware/tegra/bpmp.c
index 4ff02d310868b6..84e4c9a58a0c74 100644
--- a/drivers/firmware/tegra/bpmp.c
+++ b/drivers/firmware/tegra/bpmp.c
@@ -19,6 +19,7 @@
 #include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/semaphore.h>
+#include <linux/sched/clock.h>
 
 #include <soc/tegra/bpmp.h>
 #include <soc/tegra/bpmp-abi.h>
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index f31deeb727039f..df3fef393dbe00 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -24,6 +24,8 @@
 
 #include <linux/prefetch.h>
 #include <linux/dma-fence-array.h>
+#include <linux/sched.h>
+#include <linux/sched/clock.h>
 
 #include "i915_drv.h"
 
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index b9cde116dab34c..344f238b283f3b 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -28,6 +28,7 @@
 #include <linux/async.h>
 #include <linux/i2c.h>
 #include <linux/hdmi.h>
+#include <linux/sched/clock.h>
 #include <drm/i915_drm.h>
 #include "i915_drv.h"
 #include <drm/drm_crtc.h>
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index 646fe85261c17b..18526d44688de2 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -11,6 +11,7 @@
 #include "bset.h"
 
 #include <linux/console.h>
+#include <linux/sched/clock.h>
 #include <linux/random.h>
 #include <linux/prefetch.h>
 
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index a43eedd5804dd8..384559af310e46 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -32,6 +32,7 @@
 #include <linux/prefetch.h>
 #include <linux/random.h>
 #include <linux/rcupdate.h>
+#include <linux/sched/clock.h>
 #include <trace/events/bcache.h>
 
 /*
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index b3ff57d61ddea7..f90f1361698080 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -13,6 +13,7 @@
 
 #include <linux/blkdev.h>
 #include <linux/sort.h>
+#include <linux/sched/clock.h>
 
 static const char * const cache_replacement_policies[] = {
 	"lru",
diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c
index dde6172f3f105d..8c3a938f4bf068 100644
--- a/drivers/md/bcache/util.c
+++ b/drivers/md/bcache/util.c
@@ -12,6 +12,7 @@
 #include <linux/module.h>
 #include <linux/seq_file.h>
 #include <linux/types.h>
+#include <linux/sched/clock.h>
 
 #include "util.h"
 
diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h
index cf2cbc211d8388..a126919ed10276 100644
--- a/drivers/md/bcache/util.h
+++ b/drivers/md/bcache/util.h
@@ -6,6 +6,7 @@
 #include <linux/errno.h>
 #include <linux/blkdev.h>
 #include <linux/kernel.h>
+#include <linux/sched/clock.h>
 #include <linux/llist.h>
 #include <linux/ratelimit.h>
 #include <linux/vmalloc.h>
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 69e1ae59cab8b9..6ac2e48b923544 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -13,6 +13,7 @@
 
 #include <linux/delay.h>
 #include <linux/kthread.h>
+#include <linux/sched/clock.h>
 #include <trace/events/bcache.h>
 
 /* Rate limiting */
diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c
index 09505f432eda62..7ae710585267a5 100644
--- a/drivers/misc/cxl/native.c
+++ b/drivers/misc/cxl/native.c
@@ -9,6 +9,7 @@
 
 #include <linux/spinlock.h>
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 #include <linux/slab.h>
 #include <linux/mutex.h>
 #include <linux/mm.h>
diff --git a/drivers/net/irda/pxaficp_ir.c b/drivers/net/irda/pxaficp_ir.c
index 6e8f616be48eff..1dba16bc7f8d77 100644
--- a/drivers/net/irda/pxaficp_ir.c
+++ b/drivers/net/irda/pxaficp_ir.c
@@ -24,6 +24,7 @@
 #include <linux/dma/pxa-dma.h>
 #include <linux/gpio.h>
 #include <linux/slab.h>
+#include <linux/sched/clock.h>
 
 #include <net/irda/irda.h>
 #include <net/irda/irmod.h>
diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index 6d9335865880e1..9612b84bc3e008 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -20,6 +20,7 @@
 #include <linux/perf/arm_pmu.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
+#include <linux/sched/clock.h>
 #include <linux/spinlock.h>
 #include <linux/irq.h>
 #include <linux/irqdesc.h>
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 2fe35354f20e5e..5c98ad4d2f4c25 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -17,6 +17,7 @@
 #include <linux/workqueue.h>
 #include <linux/file.h>
 #include <linux/slab.h>
+#include <linux/sched/clock.h>
 #include <linux/vmalloc.h>
 
 #include <linux/net.h>
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index aecca0e7d9cadb..796016e63c1da7 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -2,6 +2,7 @@
 #define _LINUX_BLKDEV_H
 
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 
 #ifdef CONFIG_BLOCK
 
diff --git a/include/linux/sched/clock.h b/include/linux/sched/clock.h
new file mode 100644
index 00000000000000..7cb7d79b2cf9af
--- /dev/null
+++ b/include/linux/sched/clock.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SCHED_CLOCK_H
+#define _LINUX_SCHED_CLOCK_H
+
+#include <linux/sched.h>
+
+#endif /* _LINUX_SCHED_CLOCK_H */
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 69ccd263691120..c776abd86937f5 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -34,6 +34,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/netdev_features.h>
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 #include <net/flow_dissector.h>
 #include <linux/splice.h>
 #include <linux/in6.h>
diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h
index b8d637225a07dd..b96832df239ec7 100644
--- a/include/net/busy_poll.h
+++ b/include/net/busy_poll.h
@@ -25,6 +25,7 @@
 #define _LINUX_NET_BUSY_POLL_H
 
 #include <linux/netdevice.h>
+#include <linux/sched/clock.h>
 #include <net/ip.h>
 
 #ifdef CONFIG_NET_RX_BUSY_POLL
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 1031bdf9f01251..42fe16a84f979b 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -46,6 +46,7 @@
 #include <linux/filter.h>
 #include <linux/namei.h>
 #include <linux/parser.h>
+#include <linux/sched/clock.h>
 
 #include "internal.h"
 
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 9812e5dd409e98..cdafaff926cac1 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -28,6 +28,7 @@
 #define DISABLE_BRANCH_PROFILING
 #include <linux/mutex.h>
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 #include <linux/delay.h>
 #include <linux/module.h>
 #include <linux/proc_fs.h>
diff --git a/kernel/locking/qspinlock_stat.h b/kernel/locking/qspinlock_stat.h
index e852be4851fc91..4a30ef63c60764 100644
--- a/kernel/locking/qspinlock_stat.h
+++ b/kernel/locking/qspinlock_stat.h
@@ -63,6 +63,7 @@ enum qlock_stats {
  */
 #include <linux/debugfs.h>
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 #include <linux/fs.h>
 
 static const char * const qstat_names[qstat_num + 1] = {
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 34da86e73d00b9..47050eedc206ed 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -45,6 +45,7 @@
 #include <linux/utsname.h>
 #include <linux/ctype.h>
 #include <linux/uio.h>
+#include <linux/sched/clock.h>
 
 #include <linux/uaccess.h>
 #include <asm/sections.h>
diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c
index ad64efe41722be..dd7817cdbf58ed 100644
--- a/kernel/sched/clock.c
+++ b/kernel/sched/clock.c
@@ -58,6 +58,7 @@
 #include <linux/percpu.h>
 #include <linux/ktime.h>
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 #include <linux/static_key.h>
 #include <linux/workqueue.h>
 #include <linux/compiler.h>
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 1bd317db98108a..1a7fd3d21e5a75 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6,6 +6,7 @@
  *  Copyright (C) 1991-2002  Linus Torvalds
  */
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 #include <linux/cpuset.h>
 #include <linux/delayacct.h>
 #include <linux/init_task.h>
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 683570739f468e..4d386461f13a0d 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -3,6 +3,7 @@
 #include <linux/sched/sysctl.h>
 #include <linux/sched/topology.h>
 #include <linux/sched/rt.h>
+#include <linux/sched/clock.h>
 #include <linux/sched/wake_q.h>
 #include <linux/u64_stats_sync.h>
 #include <linux/sched/deadline.h>
diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index a26036d37a3895..ea6b610c4c57c3 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -13,6 +13,7 @@
 #include <linux/kernel.h>
 #include <linux/moduleparam.h>
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 #include <linux/syscore_ops.h>
 #include <linux/hrtimer.h>
 #include <linux/sched_clock.h>
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 2c115fdab39765..4fee1c3abd0b4f 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -19,6 +19,7 @@
 #include <linux/percpu.h>
 #include <linux/profile.h>
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 #include <linux/module.h>
 #include <linux/irq_work.h>
 #include <linux/posix-timers.h>
diff --git a/kernel/torture.c b/kernel/torture.c
index 01a99976f072e5..55de96529287a2 100644
--- a/kernel/torture.c
+++ b/kernel/torture.c
@@ -30,6 +30,7 @@
 #include <linux/smp.h>
 #include <linux/interrupt.h>
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 #include <linux/atomic.h>
 #include <linux/bitops.h>
 #include <linux/completion.h>
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index a85739efcc304b..96fc3c043ad654 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -6,6 +6,7 @@
 #include <linux/trace_events.h>
 #include <linux/ring_buffer.h>
 #include <linux/trace_clock.h>
+#include <linux/sched/clock.h>
 #include <linux/trace_seq.h>
 #include <linux/spinlock.h>
 #include <linux/irq_work.h>
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index 0f06532a755b71..5fdc779f411d83 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -18,6 +18,7 @@
 #include <linux/module.h>
 #include <linux/percpu.h>
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 #include <linux/ktime.h>
 #include <linux/trace_clock.h>
 
diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c
index edfacd954e1bb5..21ea6ae77d93fd 100644
--- a/kernel/trace/trace_hwlat.c
+++ b/kernel/trace/trace_hwlat.c
@@ -44,6 +44,7 @@
 #include <linux/uaccess.h>
 #include <linux/cpumask.h>
 #include <linux/delay.h>
+#include <linux/sched/clock.h>
 #include "trace.h"
 
 static struct trace_array	*hwlat_trace;
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 070866c32eb9d6..107afca9764950 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -8,6 +8,7 @@
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/ftrace.h>
+#include <linux/sched/clock.h>
 
 #include "trace_output.h"
 
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 63177be0159e94..144d7b1b0364e8 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -21,6 +21,7 @@
 #include <linux/sched/rt.h>
 #include <linux/tick.h>
 #include <linux/workqueue.h>
+#include <linux/sched/clock.h>
 
 #include <asm/irq_regs.h>
 #include <linux/kvm_para.h>
diff --git a/lib/plist.c b/lib/plist.c
index 3a30c53db06158..199408f91057d5 100644
--- a/lib/plist.c
+++ b/lib/plist.c
@@ -175,6 +175,7 @@ void plist_requeue(struct plist_node *node, struct plist_head *head)
 
 #ifdef CONFIG_DEBUG_PI_LIST
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 #include <linux/module.h>
 #include <linux/init.h>
 
diff --git a/net/ipv4/tcp_cdg.c b/net/ipv4/tcp_cdg.c
index 35b280361cb20f..50a0f3e51d5ba3 100644
--- a/net/ipv4/tcp_cdg.c
+++ b/net/ipv4/tcp_cdg.c
@@ -27,6 +27,8 @@
 #include <linux/kernel.h>
 #include <linux/random.h>
 #include <linux/module.h>
+#include <linux/sched/clock.h>
+
 #include <net/tcp.h>
 
 #define HYSTART_ACK_TRAIN	1

From ae7e81c077d60507dcec139e40a6d10cf932cf4b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 1 Feb 2017 18:07:51 +0100
Subject: [PATCH 0506/1911] sched/headers: Prepare for new header dependencies
 before moving code to <uapi/linux/sched/types.h>

We are going to move scheduler ABI details to <uapi/linux/sched/types.h>,
which will be used from a number of .c files.

Create empty placeholder header that maps to <linux/types.h>.

Include the new header in the files that are going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/arm/common/bL_switcher.c                 | 1 +
 crypto/crypto_engine.c                        | 1 +
 drivers/acpi/acpi_pad.c                       | 1 +
 drivers/block/drbd/drbd_receiver.c            | 1 +
 drivers/firmware/psci_checker.c               | 1 +
 drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 1 +
 drivers/gpu/drm/i915/intel_breadcrumbs.c      | 1 +
 drivers/media/pci/ivtv/ivtv-driver.c          | 1 +
 drivers/mmc/core/sdio_irq.c                   | 1 +
 drivers/spi/spi.c                             | 1 +
 drivers/staging/android/ion/ion_heap.c        | 1 +
 drivers/thermal/intel_powerclamp.c            | 1 +
 drivers/tty/serial/sc16is7xx.c                | 1 +
 include/uapi/linux/sched/types.h              | 6 ++++++
 kernel/irq/manage.c                           | 1 +
 kernel/kthread.c                              | 1 +
 kernel/locking/locktorture.c                  | 1 +
 kernel/rcu/rcuperf.c                          | 1 +
 kernel/rcu/rcutorture.c                       | 1 +
 kernel/rcu/tree.c                             | 1 +
 kernel/rcu/tree_plugin.h                      | 1 +
 kernel/sched/core.c                           | 1 +
 kernel/sched/cpufreq_schedutil.c              | 1 +
 kernel/trace/ring_buffer_benchmark.c          | 1 +
 kernel/trace/trace_selftest.c                 | 1 +
 kernel/watchdog.c                             | 1 +
 26 files changed, 31 insertions(+)
 create mode 100644 include/uapi/linux/sched/types.h

diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c
index 46730017b3c54c..083c9e517d223e 100644
--- a/arch/arm/common/bL_switcher.c
+++ b/arch/arm/common/bL_switcher.c
@@ -14,6 +14,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/sched.h>
+#include <uapi/linux/sched/types.h>
 #include <linux/interrupt.h>
 #include <linux/cpu_pm.h>
 #include <linux/cpu.h>
diff --git a/crypto/crypto_engine.c b/crypto/crypto_engine.c
index f1bf3418d96830..727bd5c3569e3b 100644
--- a/crypto/crypto_engine.c
+++ b/crypto/crypto_engine.c
@@ -16,6 +16,7 @@
 #include <linux/delay.h>
 #include <crypto/engine.h>
 #include <crypto/internal/hash.h>
+#include <uapi/linux/sched/types.h>
 #include "internal.h"
 
 #define CRYPTO_ENGINE_MAX_QLEN 10
diff --git a/drivers/acpi/acpi_pad.c b/drivers/acpi/acpi_pad.c
index eb76a4c10dbfb1..75443103128255 100644
--- a/drivers/acpi/acpi_pad.c
+++ b/drivers/acpi/acpi_pad.c
@@ -20,6 +20,7 @@
 #include <linux/init.h>
 #include <linux/types.h>
 #include <linux/kthread.h>
+#include <uapi/linux/sched/types.h>
 #include <linux/freezer.h>
 #include <linux/cpu.h>
 #include <linux/tick.h>
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index c7728dd77230a8..8b40a5b2f8e6b2 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -36,6 +36,7 @@
 #include <linux/memcontrol.h>
 #include <linux/mm_inline.h>
 #include <linux/slab.h>
+#include <uapi/linux/sched/types.h>
 #include <linux/pkt_sched.h>
 #define __KERNEL_SYSCALLS__
 #include <linux/unistd.h>
diff --git a/drivers/firmware/psci_checker.c b/drivers/firmware/psci_checker.c
index 29d58feaf67535..6523ce96286597 100644
--- a/drivers/firmware/psci_checker.c
+++ b/drivers/firmware/psci_checker.c
@@ -20,6 +20,7 @@
 #include <linux/cpu_pm.h>
 #include <linux/kernel.h>
 #include <linux/kthread.h>
+#include <uapi/linux/sched/types.h>
 #include <linux/module.h>
 #include <linux/preempt.h>
 #include <linux/psci.h>
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
index 1bf83ed113b3cc..16f96563cd2b8b 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
@@ -24,6 +24,7 @@
 #include <linux/kthread.h>
 #include <linux/wait.h>
 #include <linux/sched.h>
+#include <uapi/linux/sched/types.h>
 #include <drm/drmP.h>
 #include "gpu_scheduler.h"
 
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index fcfa423d08bdfb..7044e9a6abf7a5 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -23,6 +23,7 @@
  */
 
 #include <linux/kthread.h>
+#include <uapi/linux/sched/types.h>
 
 #include "i915_drv.h"
 
diff --git a/drivers/media/pci/ivtv/ivtv-driver.c b/drivers/media/pci/ivtv/ivtv-driver.c
index ab2ae53618e829..e73c153285f0d5 100644
--- a/drivers/media/pci/ivtv/ivtv-driver.c
+++ b/drivers/media/pci/ivtv/ivtv-driver.c
@@ -59,6 +59,7 @@
 #include <media/tveeprom.h>
 #include <media/i2c/saa7115.h>
 #include "tuner-xc2028.h"
+#include <uapi/linux/sched/types.h>
 
 /* If you have already X v4l cards, then set this to X. This way
    the device numbers stay matched. Example: you have a WinTV card
diff --git a/drivers/mmc/core/sdio_irq.c b/drivers/mmc/core/sdio_irq.c
index d29faf2addfe51..6d4b72080d5124 100644
--- a/drivers/mmc/core/sdio_irq.c
+++ b/drivers/mmc/core/sdio_irq.c
@@ -15,6 +15,7 @@
 
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <uapi/linux/sched/types.h>
 #include <linux/kthread.h>
 #include <linux/export.h>
 #include <linux/wait.h>
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 44222ef9471e58..90b5b2efafbf45 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -33,6 +33,7 @@
 #include <linux/pm_domain.h>
 #include <linux/export.h>
 #include <linux/sched/rt.h>
+#include <uapi/linux/sched/types.h>
 #include <linux/delay.h>
 #include <linux/kthread.h>
 #include <linux/ioport.h>
diff --git a/drivers/staging/android/ion/ion_heap.c b/drivers/staging/android/ion/ion_heap.c
index 4e5c0f17f579ae..c69d0bd536934a 100644
--- a/drivers/staging/android/ion/ion_heap.c
+++ b/drivers/staging/android/ion/ion_heap.c
@@ -20,6 +20,7 @@
 #include <linux/mm.h>
 #include <linux/rtmutex.h>
 #include <linux/sched.h>
+#include <uapi/linux/sched/types.h>
 #include <linux/scatterlist.h>
 #include <linux/vmalloc.h>
 #include "ion.h"
diff --git a/drivers/thermal/intel_powerclamp.c b/drivers/thermal/intel_powerclamp.c
index a47103a659fa4d..d718cd179ddbb2 100644
--- a/drivers/thermal/intel_powerclamp.c
+++ b/drivers/thermal/intel_powerclamp.c
@@ -50,6 +50,7 @@
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
 #include <linux/sched/rt.h>
+#include <uapi/linux/sched/types.h>
 
 #include <asm/nmi.h>
 #include <asm/msr.h>
diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c
index 793395451982d8..ca54ce074a5f84 100644
--- a/drivers/tty/serial/sc16is7xx.c
+++ b/drivers/tty/serial/sc16is7xx.c
@@ -29,6 +29,7 @@
 #include <linux/tty_flip.h>
 #include <linux/spi/spi.h>
 #include <linux/uaccess.h>
+#include <uapi/linux/sched/types.h>
 
 #define SC16IS7XX_NAME			"sc16is7xx"
 #define SC16IS7XX_MAX_DEVS		8
diff --git a/include/uapi/linux/sched/types.h b/include/uapi/linux/sched/types.h
new file mode 100644
index 00000000000000..d162d315f4b5ad
--- /dev/null
+++ b/include/uapi/linux/sched/types.h
@@ -0,0 +1,6 @@
+#ifndef _UAPI_LINUX_SCHED_TYPES_H
+#define _UAPI_LINUX_SCHED_TYPES_H
+
+#include <linux/types.h>
+
+#endif /* _UAPI_LINUX_SCHED_TYPES_H */
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 944d068b6c4887..09740952e4de13 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -17,6 +17,7 @@
 #include <linux/slab.h>
 #include <linux/sched.h>
 #include <linux/sched/rt.h>
+#include <uapi/linux/sched/types.h>
 #include <linux/task_work.h>
 
 #include "internals.h"
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 8461a4372e8aab..ef9b9eb809c743 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -5,6 +5,7 @@
  * even if we're invoked from userspace (think modprobe, hotplug cpu,
  * etc.).
  */
+#include <uapi/linux/sched/types.h>
 #include <linux/sched.h>
 #include <linux/kthread.h>
 #include <linux/completion.h>
diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
index 28350dc8ecbb17..5ea0a8969ee24f 100644
--- a/kernel/locking/locktorture.c
+++ b/kernel/locking/locktorture.c
@@ -32,6 +32,7 @@
 #include <linux/smp.h>
 #include <linux/interrupt.h>
 #include <linux/sched.h>
+#include <uapi/linux/sched/types.h>
 #include <linux/atomic.h>
 #include <linux/moduleparam.h>
 #include <linux/delay.h>
diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c
index 123ccbd2244929..a4a86fb47e4a3c 100644
--- a/kernel/rcu/rcuperf.c
+++ b/kernel/rcu/rcuperf.c
@@ -30,6 +30,7 @@
 #include <linux/rcupdate.h>
 #include <linux/interrupt.h>
 #include <linux/sched.h>
+#include <uapi/linux/sched/types.h>
 #include <linux/atomic.h>
 #include <linux/bitops.h>
 #include <linux/completion.h>
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index d81345be730ea5..6a28b79710f002 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -33,6 +33,7 @@
 #include <linux/rcupdate.h>
 #include <linux/interrupt.h>
 #include <linux/sched.h>
+#include <uapi/linux/sched/types.h>
 #include <linux/atomic.h>
 #include <linux/bitops.h>
 #include <linux/completion.h>
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index cb62ce23ffc7da..e456327a63d68e 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -49,6 +49,7 @@
 #include <linux/kernel_stat.h>
 #include <linux/wait.h>
 #include <linux/kthread.h>
+#include <uapi/linux/sched/types.h>
 #include <linux/prefetch.h>
 #include <linux/delay.h>
 #include <linux/stop_machine.h>
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index a240f3308be61c..9dabb04003be25 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -28,6 +28,7 @@
 #include <linux/gfp.h>
 #include <linux/oom.h>
 #include <linux/smpboot.h>
+#include <uapi/linux/sched/types.h>
 #include "../time/tick-internal.h"
 
 #ifdef CONFIG_RCU_BOOST
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 1a7fd3d21e5a75..ed39d1d0b64a43 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7,6 +7,7 @@
  */
 #include <linux/sched.h>
 #include <linux/sched/clock.h>
+#include <uapi/linux/sched/types.h>
 #include <linux/cpuset.h>
 #include <linux/delayacct.h>
 #include <linux/init_task.h>
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index fd465931364053..8f8de3d4d6b7a3 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -13,6 +13,7 @@
 
 #include <linux/cpufreq.h>
 #include <linux/kthread.h>
+#include <uapi/linux/sched/types.h>
 #include <linux/slab.h>
 #include <trace/events/power.h>
 
diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c
index 6df9a83e20d7eb..c190a4d5013c5e 100644
--- a/kernel/trace/ring_buffer_benchmark.c
+++ b/kernel/trace/ring_buffer_benchmark.c
@@ -6,6 +6,7 @@
 #include <linux/ring_buffer.h>
 #include <linux/completion.h>
 #include <linux/kthread.h>
+#include <uapi/linux/sched/types.h>
 #include <linux/module.h>
 #include <linux/ktime.h>
 #include <asm/local.h>
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index b0f86ea77881ec..cb917cebae291b 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -1,5 +1,6 @@
 /* Include in trace.c */
 
+#include <uapi/linux/sched/types.h>
 #include <linux/stringify.h>
 #include <linux/kthread.h>
 #include <linux/delay.h>
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 144d7b1b0364e8..52718f4512e93b 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -19,6 +19,7 @@
 #include <linux/sysctl.h>
 #include <linux/smpboot.h>
 #include <linux/sched/rt.h>
+#include <uapi/linux/sched/types.h>
 #include <linux/tick.h>
 #include <linux/workqueue.h>
 #include <linux/sched/clock.h>

From 4f17722c7256af8e17c2c4f29f170247264bdf48 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 08:45:17 +0100
Subject: [PATCH 0507/1911] sched/headers: Prepare for new header dependencies
 before moving code to <linux/sched/loadavg.h>

We are going to split <linux/sched/loadavg.h> out of <linux/sched.h>, which
will have to be picked up from a couple of .c files.

Create a trivial placeholder <linux/sched/topology.h> file that just
maps to <linux/sched.h> to make this patch obviously correct and
bisectable.

Include the new header in the files that are going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/m68k/kernel/time.c                         | 1 +
 arch/microblaze/kernel/heartbeat.c              | 1 +
 arch/powerpc/platforms/cell/cpufreq_spudemand.c | 1 +
 arch/powerpc/platforms/cell/spufs/sched.c       | 1 +
 arch/s390/appldata/appldata_os.c                | 1 +
 arch/sh/drivers/heartbeat.c                     | 1 +
 arch/sparc/kernel/led.c                         | 1 +
 drivers/cpuidle/governors/menu.c                | 1 +
 drivers/leds/trigger/ledtrig-heartbeat.c        | 1 +
 drivers/platform/x86/intel_ips.c                | 1 +
 fs/proc/loadavg.c                               | 1 +
 include/linux/sched/loadavg.h                   | 6 ++++++
 kernel/debug/kdb/kdb_main.c                     | 1 +
 kernel/sched/core.c                             | 1 +
 kernel/sched/loadavg.c                          | 1 +
 kernel/sys.c                                    | 1 +
 kernel/time/timekeeping.c                       | 1 +
 net/sched/em_meta.c                             | 1 +
 18 files changed, 23 insertions(+)
 create mode 100644 include/linux/sched/loadavg.h

diff --git a/arch/m68k/kernel/time.c b/arch/m68k/kernel/time.c
index 4e5aa2f4f52254..87160b4415fbb0 100644
--- a/arch/m68k/kernel/time.c
+++ b/arch/m68k/kernel/time.c
@@ -14,6 +14,7 @@
 #include <linux/export.h>
 #include <linux/module.h>
 #include <linux/sched.h>
+#include <linux/sched/loadavg.h>
 #include <linux/kernel.h>
 #include <linux/param.h>
 #include <linux/string.h>
diff --git a/arch/microblaze/kernel/heartbeat.c b/arch/microblaze/kernel/heartbeat.c
index 4643e3ab941495..2022130139d2de 100644
--- a/arch/microblaze/kernel/heartbeat.c
+++ b/arch/microblaze/kernel/heartbeat.c
@@ -9,6 +9,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/sched/loadavg.h>
 #include <linux/io.h>
 
 #include <asm/setup.h>
diff --git a/arch/powerpc/platforms/cell/cpufreq_spudemand.c b/arch/powerpc/platforms/cell/cpufreq_spudemand.c
index 88301e53f0856b..882944c36ef571 100644
--- a/arch/powerpc/platforms/cell/cpufreq_spudemand.c
+++ b/arch/powerpc/platforms/cell/cpufreq_spudemand.c
@@ -22,6 +22,7 @@
 
 #include <linux/cpufreq.h>
 #include <linux/sched.h>
+#include <linux/sched/loadavg.h>
 #include <linux/module.h>
 #include <linux/timer.h>
 #include <linux/workqueue.h>
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index 9b543df210fb37..d317c84dc794df 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -24,6 +24,7 @@
 
 #include <linux/errno.h>
 #include <linux/sched.h>
+#include <linux/sched/loadavg.h>
 #include <linux/sched/rt.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
diff --git a/arch/s390/appldata/appldata_os.c b/arch/s390/appldata/appldata_os.c
index 08b9e942a262ed..079446619f8903 100644
--- a/arch/s390/appldata/appldata_os.c
+++ b/arch/s390/appldata/appldata_os.c
@@ -17,6 +17,7 @@
 #include <linux/kernel_stat.h>
 #include <linux/netdevice.h>
 #include <linux/sched.h>
+#include <linux/sched/loadavg.h>
 #include <asm/appldata.h>
 #include <asm/smp.h>
 
diff --git a/arch/sh/drivers/heartbeat.c b/arch/sh/drivers/heartbeat.c
index 49bace446a1ab8..c6d96049a0bb07 100644
--- a/arch/sh/drivers/heartbeat.c
+++ b/arch/sh/drivers/heartbeat.c
@@ -21,6 +21,7 @@
 #include <linux/init.h>
 #include <linux/platform_device.h>
 #include <linux/sched.h>
+#include <linux/sched/loadavg.h>
 #include <linux/timer.h>
 #include <linux/io.h>
 #include <linux/slab.h>
diff --git a/arch/sparc/kernel/led.c b/arch/sparc/kernel/led.c
index 3ae36f36e7581f..44a3ed93c214c5 100644
--- a/arch/sparc/kernel/led.c
+++ b/arch/sparc/kernel/led.c
@@ -8,6 +8,7 @@
 #include <linux/jiffies.h>
 #include <linux/timer.h>
 #include <linux/uaccess.h>
+#include <linux/sched/loadavg.h>
 
 #include <asm/auxio.h>
 
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index 8d6d25c38c020e..fe86060e48f75e 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -18,6 +18,7 @@
 #include <linux/hrtimer.h>
 #include <linux/tick.h>
 #include <linux/sched.h>
+#include <linux/sched/loadavg.h>
 #include <linux/math64.h>
 #include <linux/cpu.h>
 
diff --git a/drivers/leds/trigger/ledtrig-heartbeat.c b/drivers/leds/trigger/ledtrig-heartbeat.c
index e6f2f8b9f09ad4..afa3b40992140b 100644
--- a/drivers/leds/trigger/ledtrig-heartbeat.c
+++ b/drivers/leds/trigger/ledtrig-heartbeat.c
@@ -17,6 +17,7 @@
 #include <linux/slab.h>
 #include <linux/timer.h>
 #include <linux/sched.h>
+#include <linux/sched/loadavg.h>
 #include <linux/leds.h>
 #include <linux/reboot.h>
 #include <linux/suspend.h>
diff --git a/drivers/platform/x86/intel_ips.c b/drivers/platform/x86/intel_ips.c
index 55663b3d72823b..58dcee562d6417 100644
--- a/drivers/platform/x86/intel_ips.c
+++ b/drivers/platform/x86/intel_ips.c
@@ -68,6 +68,7 @@
 #include <linux/module.h>
 #include <linux/pci.h>
 #include <linux/sched.h>
+#include <linux/sched/loadavg.h>
 #include <linux/seq_file.h>
 #include <linux/string.h>
 #include <linux/tick.h>
diff --git a/fs/proc/loadavg.c b/fs/proc/loadavg.c
index aec66e6c2060b8..add5255ce7e027 100644
--- a/fs/proc/loadavg.c
+++ b/fs/proc/loadavg.c
@@ -3,6 +3,7 @@
 #include <linux/pid_namespace.h>
 #include <linux/proc_fs.h>
 #include <linux/sched.h>
+#include <linux/sched/loadavg.h>
 #include <linux/seq_file.h>
 #include <linux/seqlock.h>
 #include <linux/time.h>
diff --git a/include/linux/sched/loadavg.h b/include/linux/sched/loadavg.h
new file mode 100644
index 00000000000000..3ae74be327e8cf
--- /dev/null
+++ b/include/linux/sched/loadavg.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SCHED_LOADAVG_H
+#define _LINUX_SCHED_LOADAVG_H
+
+#include <linux/sched.h>
+
+#endif /* _LINUX_SCHED_LOADAVG_H */
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index ca183919d3027a..308937c7a68772 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -18,6 +18,7 @@
 #include <linux/kmsg_dump.h>
 #include <linux/reboot.h>
 #include <linux/sched.h>
+#include <linux/sched/loadavg.h>
 #include <linux/sysrq.h>
 #include <linux/smp.h>
 #include <linux/utsname.h>
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index ed39d1d0b64a43..11a0684a29a70a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -8,6 +8,7 @@
 #include <linux/sched.h>
 #include <linux/sched/clock.h>
 #include <uapi/linux/sched/types.h>
+#include <linux/sched/loadavg.h>
 #include <linux/cpuset.h>
 #include <linux/delayacct.h>
 #include <linux/init_task.h>
diff --git a/kernel/sched/loadavg.c b/kernel/sched/loadavg.c
index a2d6eb71f06b80..7296b7308ecaeb 100644
--- a/kernel/sched/loadavg.c
+++ b/kernel/sched/loadavg.c
@@ -7,6 +7,7 @@
  */
 
 #include <linux/export.h>
+#include <linux/sched/loadavg.h>
 
 #include "sched.h"
 
diff --git a/kernel/sys.c b/kernel/sys.c
index b07adca97ea3d3..28b8a4c1bc7ea4 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -49,6 +49,7 @@
 #include <linux/binfmts.h>
 
 #include <linux/sched.h>
+#include <linux/sched/loadavg.h>
 #include <linux/rcupdate.h>
 #include <linux/uidgid.h>
 #include <linux/cred.h>
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 95b258dd75dbb1..fb564acee0f3db 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -15,6 +15,7 @@
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/sched.h>
+#include <linux/sched/loadavg.h>
 #include <linux/syscore_ops.h>
 #include <linux/clocksource.h>
 #include <linux/jiffies.h>
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index 41c80b6c39063a..ae7e4f5b348b86 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -63,6 +63,7 @@
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/loadavg.h>
 #include <linux/string.h>
 #include <linux/skbuff.h>
 #include <linux/random.h>

From 4eb5aaa3af8a54e5e9bac90e2b42bbab1f1ee5a3 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:29 +0100
Subject: [PATCH 0508/1911] sched/headers: Prepare for new header dependencies
 before moving code to <linux/sched/autogroup.h>

We are going to split <linux/sched/autogroup.h> out of <linux/sched.h>, which
will have to be picked up from other headers and a couple of .c files.

Create a trivial placeholder <linux/sched/autogroup.h> file that just
maps to <linux/sched.h> to make this patch obviously correct and
bisectable.

Include the new header in the files that are going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 fs/proc/base.c                  | 1 +
 include/linux/sched/autogroup.h | 6 ++++++
 kernel/exit.c                   | 1 +
 kernel/fork.c                   | 1 +
 kernel/sched/autogroup.h        | 1 +
 kernel/sys.c                    | 1 +
 6 files changed, 11 insertions(+)
 create mode 100644 include/linux/sched/autogroup.h

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 1e1e182d571b4a..27af4ea315a300 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -85,6 +85,7 @@
 #include <linux/user_namespace.h>
 #include <linux/fs_struct.h>
 #include <linux/slab.h>
+#include <linux/sched/autogroup.h>
 #include <linux/flex_array.h>
 #include <linux/posix-timers.h>
 #ifdef CONFIG_HARDWALL
diff --git a/include/linux/sched/autogroup.h b/include/linux/sched/autogroup.h
new file mode 100644
index 00000000000000..d745f22e57dccf
--- /dev/null
+++ b/include/linux/sched/autogroup.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SCHED_AUTOGROUP_H
+#define _LINUX_SCHED_AUTOGROUP_H
+
+#include <linux/sched.h>
+
+#endif /* _LINUX_SCHED_AUTOGROUP_H */
diff --git a/kernel/exit.c b/kernel/exit.c
index 8a768a3672a555..9c0b92833fbbac 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -6,6 +6,7 @@
 
 #include <linux/mm.h>
 #include <linux/slab.h>
+#include <linux/sched/autogroup.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/capability.h>
diff --git a/kernel/fork.c b/kernel/fork.c
index d043fedc03c819..234f4bfb8efd03 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -12,6 +12,7 @@
  */
 
 #include <linux/slab.h>
+#include <linux/sched/autogroup.h>
 #include <linux/init.h>
 #include <linux/unistd.h>
 #include <linux/module.h>
diff --git a/kernel/sched/autogroup.h b/kernel/sched/autogroup.h
index 890c95f2587a4d..ce40c810cd5c34 100644
--- a/kernel/sched/autogroup.h
+++ b/kernel/sched/autogroup.h
@@ -2,6 +2,7 @@
 
 #include <linux/kref.h>
 #include <linux/rwsem.h>
+#include <linux/sched/autogroup.h>
 
 struct autogroup {
 	/*
diff --git a/kernel/sys.c b/kernel/sys.c
index 28b8a4c1bc7ea4..cfe82ae62423ed 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -49,6 +49,7 @@
 #include <linux/binfmts.h>
 
 #include <linux/sched.h>
+#include <linux/sched/autogroup.h>
 #include <linux/sched/loadavg.h>
 #include <linux/rcupdate.h>
 #include <linux/uidgid.h>

From 6e84f31522f931027bf695752087ece278c10d3f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:29 +0100
Subject: [PATCH 0509/1911] sched/headers: Prepare for new header dependencies
 before moving code to <linux/sched/mm.h>

We are going to split <linux/sched/mm.h> out of <linux/sched.h>, which
will have to be picked up from other headers and a couple of .c files.

Create a trivial placeholder <linux/sched/mm.h> file that just
maps to <linux/sched.h> to make this patch obviously correct and
bisectable.

The APIs that are going to be moved first are:

   mm_alloc()
   __mmdrop()
   mmdrop()
   mmdrop_async_fn()
   mmdrop_async()
   mmget_not_zero()
   mmput()
   mmput_async()
   get_task_mm()
   mm_access()
   mm_release()

Include the new header in the files that are going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/arc/include/asm/mmu_context.h          | 1 +
 arch/arc/kernel/troubleshoot.c              | 2 ++
 arch/arm/mach-rpc/ecard.c                   | 1 +
 arch/frv/mm/mmu-context.c                   | 1 +
 arch/m68k/sun3/mmu_emu.c                    | 1 +
 arch/powerpc/platforms/cell/spufs/context.c | 2 ++
 drivers/android/binder.c                    | 1 +
 drivers/gpu/drm/amd/amdkfd/kfd_process.c    | 1 +
 drivers/gpu/drm/etnaviv/etnaviv_gem.c       | 1 +
 drivers/gpu/drm/i915/i915_gem_userptr.c     | 1 +
 drivers/infiniband/core/umem.c              | 1 +
 drivers/infiniband/core/umem_odp.c          | 1 +
 drivers/infiniband/hw/hfi1/file_ops.c       | 1 +
 drivers/infiniband/hw/mlx4/main.c           | 2 ++
 drivers/infiniband/hw/mlx5/main.c           | 1 +
 drivers/infiniband/hw/usnic/usnic_uiom.c    | 1 +
 drivers/iommu/amd_iommu_v2.c                | 1 +
 drivers/iommu/intel-svm.c                   | 1 +
 drivers/lguest/lguest_user.c                | 1 +
 drivers/misc/cxl/fault.c                    | 1 +
 drivers/misc/mic/scif/scif_rma.c            | 2 ++
 drivers/oprofile/buffer_sync.c              | 1 +
 drivers/vfio/vfio_iommu_spapr_tce.c         | 2 ++
 drivers/vfio/vfio_iommu_type1.c             | 1 +
 drivers/vhost/vhost.c                       | 1 +
 drivers/xen/gntdev.c                        | 1 +
 fs/exec.c                                   | 1 +
 fs/proc/array.c                             | 1 +
 fs/proc/base.c                              | 1 +
 fs/proc/task_mmu.c                          | 1 +
 fs/proc/task_nommu.c                        | 2 ++
 fs/userfaultfd.c                            | 1 +
 include/linux/sched/mm.h                    | 6 ++++++
 kernel/cgroup/cpuset.c                      | 1 +
 kernel/events/core.c                        | 1 +
 kernel/events/uprobes.c                     | 1 +
 kernel/exit.c                               | 1 +
 kernel/fork.c                               | 1 +
 kernel/futex.c                              | 1 +
 kernel/ptrace.c                             | 1 +
 kernel/sched/sched.h                        | 1 +
 kernel/sys.c                                | 1 +
 kernel/trace/trace_output.c                 | 1 +
 kernel/tsacct.c                             | 1 +
 mm/khugepaged.c                             | 1 +
 mm/ksm.c                                    | 1 +
 mm/memcontrol.c                             | 1 +
 mm/memory.c                                 | 1 +
 mm/mempolicy.c                              | 1 +
 mm/migrate.c                                | 1 +
 mm/mmu_context.c                            | 1 +
 mm/mmu_notifier.c                           | 1 +
 mm/nommu.c                                  | 1 +
 mm/oom_kill.c                               | 1 +
 mm/process_vm_access.c                      | 1 +
 mm/rmap.c                                   | 1 +
 mm/swapfile.c                               | 1 +
 mm/util.c                                   | 1 +
 virt/kvm/async_pf.c                         | 1 +
 virt/kvm/kvm_main.c                         | 1 +
 60 files changed, 71 insertions(+)
 create mode 100644 include/linux/sched/mm.h

diff --git a/arch/arc/include/asm/mmu_context.h b/arch/arc/include/asm/mmu_context.h
index b0b87f2447f524..64b5ebae1ae8bc 100644
--- a/arch/arc/include/asm/mmu_context.h
+++ b/arch/arc/include/asm/mmu_context.h
@@ -20,6 +20,7 @@
 
 #include <asm/arcregs.h>
 #include <asm/tlb.h>
+#include <linux/sched/mm.h>
 
 #include <asm-generic/mm_hooks.h>
 
diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c
index 82f9bc819f4a2d..0c48907f56a9a9 100644
--- a/arch/arc/kernel/troubleshoot.c
+++ b/arch/arc/kernel/troubleshoot.c
@@ -13,6 +13,8 @@
 #include <linux/fs_struct.h>
 #include <linux/proc_fs.h>
 #include <linux/file.h>
+#include <linux/sched/mm.h>
+
 #include <asm/arcregs.h>
 #include <asm/irqflags.h>
 
diff --git a/arch/arm/mach-rpc/ecard.c b/arch/arm/mach-rpc/ecard.c
index dc67a7fb383199..6b279d0377742c 100644
--- a/arch/arm/mach-rpc/ecard.c
+++ b/arch/arm/mach-rpc/ecard.c
@@ -31,6 +31,7 @@
 #include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/interrupt.h>
 #include <linux/completion.h>
 #include <linux/reboot.h>
diff --git a/arch/frv/mm/mmu-context.c b/arch/frv/mm/mmu-context.c
index 3473bde77f566e..4031289bf2ecc5 100644
--- a/arch/frv/mm/mmu-context.c
+++ b/arch/frv/mm/mmu-context.c
@@ -10,6 +10,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/mm.h>
 #include <asm/tlbflush.h>
 
diff --git a/arch/m68k/sun3/mmu_emu.c b/arch/m68k/sun3/mmu_emu.c
index e9d7fbe4d5ae4a..7fdc61525e0b73 100644
--- a/arch/m68k/sun3/mmu_emu.c
+++ b/arch/m68k/sun3/mmu_emu.c
@@ -15,6 +15,7 @@
 #include <linux/bootmem.h>
 #include <linux/bitops.h>
 #include <linux/module.h>
+#include <linux/sched/mm.h>
 
 #include <asm/setup.h>
 #include <asm/traps.h>
diff --git a/arch/powerpc/platforms/cell/spufs/context.c b/arch/powerpc/platforms/cell/spufs/context.c
index 3b4152faeb1fc4..b500b17254a00d 100644
--- a/arch/powerpc/platforms/cell/spufs/context.c
+++ b/arch/powerpc/platforms/cell/spufs/context.c
@@ -25,6 +25,8 @@
 #include <linux/slab.h>
 #include <linux/atomic.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
+
 #include <asm/spu.h>
 #include <asm/spu_csa.h>
 #include "spufs.h"
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 2bbcdc6fdfeec9..e33e7fbe870b36 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -32,6 +32,7 @@
 #include <linux/debugfs.h>
 #include <linux/rbtree.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/seq_file.h>
 #include <linux/uaccess.h>
 #include <linux/vmalloc.h>
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index ca5f2aa7232da7..84d1ffd1eef950 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -23,6 +23,7 @@
 #include <linux/mutex.h>
 #include <linux/log2.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/slab.h>
 #include <linux/amd-iommu.h>
 #include <linux/notifier.h>
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
index e78f1406885d10..ae2882b64b8253 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
@@ -16,6 +16,7 @@
 
 #include <linux/spinlock.h>
 #include <linux/shmem_fs.h>
+#include <linux/sched/mm.h>
 
 #include "etnaviv_drv.h"
 #include "etnaviv_gem.h"
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index 0115989e324a20..22b46398831e09 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -31,6 +31,7 @@
 #include <linux/mmu_notifier.h>
 #include <linux/mempolicy.h>
 #include <linux/swap.h>
+#include <linux/sched/mm.h>
 
 struct i915_mm_struct {
 	struct mm_struct *mm;
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 446b56a5260b73..d525b1a2986a1c 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -35,6 +35,7 @@
 #include <linux/mm.h>
 #include <linux/dma-mapping.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/export.h>
 #include <linux/hugetlb.h>
 #include <linux/slab.h>
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index f2fc0431512def..7279f259494ffc 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -32,6 +32,7 @@
 
 #include <linux/types.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/pid.h>
 #include <linux/slab.h>
 #include <linux/export.h>
diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index 3b19c16a9e4578..f78c739b330a45 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -48,6 +48,7 @@
 #include <linux/cdev.h>
 #include <linux/vmalloc.h>
 #include <linux/io.h>
+#include <linux/sched/mm.h>
 
 #include <rdma/ib.h>
 
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 88608906ce2503..56da8f0d71bbc7 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -39,6 +39,8 @@
 #include <linux/inetdevice.h>
 #include <linux/rtnetlink.h>
 #include <linux/if_vlan.h>
+#include <linux/sched/mm.h>
+
 #include <net/ipv6.h>
 #include <net/addrconf.h>
 #include <net/devlink.h>
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 5b3355268725b8..4b1ec3ff152ad9 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -41,6 +41,7 @@
 #include <asm/pat.h>
 #endif
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/delay.h>
 #include <rdma/ib_user_verbs.h>
 #include <rdma/ib_addr.h>
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c
index 1ccee6ea5bc309..ba868be5af2e91 100644
--- a/drivers/infiniband/hw/usnic/usnic_uiom.c
+++ b/drivers/infiniband/hw/usnic/usnic_uiom.c
@@ -35,6 +35,7 @@
 #include <linux/mm.h>
 #include <linux/dma-mapping.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/hugetlb.h>
 #include <linux/iommu.h>
 #include <linux/workqueue.h>
diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c
index f8ed8c95b68537..063343909b0d12 100644
--- a/drivers/iommu/amd_iommu_v2.c
+++ b/drivers/iommu/amd_iommu_v2.c
@@ -22,6 +22,7 @@
 #include <linux/profile.h>
 #include <linux/module.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/iommu.h>
 #include <linux/wait.h>
 #include <linux/pci.h>
diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index 51f2b228723f2c..23c427602c55ba 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -16,6 +16,7 @@
 #include <linux/intel-iommu.h>
 #include <linux/mmu_notifier.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/slab.h>
 #include <linux/intel-svm.h>
 #include <linux/rculist.h>
diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c
index 30c60687d277c4..1a6787bc9386a6 100644
--- a/drivers/lguest/lguest_user.c
+++ b/drivers/lguest/lguest_user.c
@@ -8,6 +8,7 @@
 #include <linux/miscdevice.h>
 #include <linux/fs.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/file.h>
 #include <linux/slab.h>
 #include <linux/export.h>
diff --git a/drivers/misc/cxl/fault.c b/drivers/misc/cxl/fault.c
index 377e650a2a1dc3..e33011e3e7e26a 100644
--- a/drivers/misc/cxl/fault.c
+++ b/drivers/misc/cxl/fault.c
@@ -9,6 +9,7 @@
 
 #include <linux/workqueue.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/pid.h>
 #include <linux/mm.h>
 #include <linux/moduleparam.h>
diff --git a/drivers/misc/mic/scif/scif_rma.c b/drivers/misc/mic/scif/scif_rma.c
index f806a4471eb913..d0e9c60f944e16 100644
--- a/drivers/misc/mic/scif/scif_rma.c
+++ b/drivers/misc/mic/scif/scif_rma.c
@@ -17,6 +17,8 @@
  */
 #include <linux/dma_remapping.h>
 #include <linux/pagemap.h>
+#include <linux/sched/mm.h>
+
 #include "scif_main.h"
 #include "scif_map.h"
 
diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c
index 642478d35e99a5..eb0c35994b5401 100644
--- a/drivers/oprofile/buffer_sync.c
+++ b/drivers/oprofile/buffer_sync.c
@@ -31,6 +31,7 @@
 #include <linux/fs.h>
 #include <linux/oprofile.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/gfp.h>
 
 #include "oprofile_stats.h"
diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
index 59b3f62a2d64eb..185d50ee1b1242 100644
--- a/drivers/vfio/vfio_iommu_spapr_tce.c
+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
@@ -20,6 +20,8 @@
 #include <linux/err.h>
 #include <linux/vfio.h>
 #include <linux/vmalloc.h>
+#include <linux/sched/mm.h>
+
 #include <asm/iommu.h>
 #include <asm/tce.h>
 #include <asm/mmu_context.h>
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index bd6f293c4ebd59..7e94c7fc90ae2d 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -32,6 +32,7 @@
 #include <linux/mm.h>
 #include <linux/rbtree.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
 #include <linux/vfio.h>
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 4269e621e254ab..4a00140a762415 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -27,6 +27,7 @@
 #include <linux/cgroup.h>
 #include <linux/module.h>
 #include <linux/sort.h>
+#include <linux/sched/mm.h>
 #include <linux/interval_tree_generic.h>
 
 #include "vhost.h"
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 2ef2b61b69dfe0..c77a0751a31173 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -32,6 +32,7 @@
 #include <linux/types.h>
 #include <linux/uaccess.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/spinlock.h>
 #include <linux/slab.h>
 #include <linux/highmem.h>
diff --git a/fs/exec.c b/fs/exec.c
index e595e152958134..8929da96cca170 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -32,6 +32,7 @@
 #include <linux/swap.h>
 #include <linux/string.h>
 #include <linux/init.h>
+#include <linux/sched/mm.h>
 #include <linux/pagemap.h>
 #include <linux/perf_event.h>
 #include <linux/highmem.h>
diff --git a/fs/proc/array.c b/fs/proc/array.c
index fe12b519d09b53..61bea3a4ecc5e6 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -60,6 +60,7 @@
 #include <linux/tty.h>
 #include <linux/string.h>
 #include <linux/mman.h>
+#include <linux/sched/mm.h>
 #include <linux/proc_fs.h>
 #include <linux/ioport.h>
 #include <linux/uaccess.h>
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 27af4ea315a300..9efe12376af1ba 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -86,6 +86,7 @@
 #include <linux/fs_struct.h>
 #include <linux/slab.h>
 #include <linux/sched/autogroup.h>
+#include <linux/sched/mm.h>
 #include <linux/flex_array.h>
 #include <linux/posix-timers.h>
 #ifdef CONFIG_HARDWALL
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index ee3efb229ef6a6..f08bd31c1081cc 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -11,6 +11,7 @@
 #include <linux/mempolicy.h>
 #include <linux/rmap.h>
 #include <linux/swap.h>
+#include <linux/sched/mm.h>
 #include <linux/swapops.h>
 #include <linux/mmu_notifier.h>
 #include <linux/page_idle.h>
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 1ef97cfcf42287..23266694db1171 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -7,6 +7,8 @@
 #include <linux/ptrace.h>
 #include <linux/slab.h>
 #include <linux/seq_file.h>
+#include <linux/sched/mm.h>
+
 #include "internal.h"
 
 /*
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 3c421d06a18e6e..18d12bfff7709c 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -15,6 +15,7 @@
 #include <linux/list.h>
 #include <linux/hashtable.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/mm.h>
 #include <linux/poll.h>
 #include <linux/slab.h>
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
new file mode 100644
index 00000000000000..a7adba1cd0a9b2
--- /dev/null
+++ b/include/linux/sched/mm.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SCHED_MM_H
+#define _LINUX_SCHED_MM_H
+
+#include <linux/sched.h>
+
+#endif /* _LINUX_SCHED_MM_H */
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index b3088886cd375b..a5c46db2855c4d 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -44,6 +44,7 @@
 #include <linux/proc_fs.h>
 #include <linux/rcupdate.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/seq_file.h>
 #include <linux/security.h>
 #include <linux/slab.h>
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 42fe16a84f979b..6f41548f2e320a 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -47,6 +47,7 @@
 #include <linux/namei.h>
 #include <linux/parser.h>
 #include <linux/sched/clock.h>
+#include <linux/sched/mm.h>
 
 #include "internal.h"
 
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index d630f8ac4d2f21..62f2dbd13efc1e 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -27,6 +27,7 @@
 #include <linux/pagemap.h>	/* read_mapping_page */
 #include <linux/slab.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/export.h>
 #include <linux/rmap.h>		/* anon_vma_prepare */
 #include <linux/mmu_notifier.h>	/* set_pte_at_notify */
diff --git a/kernel/exit.c b/kernel/exit.c
index 9c0b92833fbbac..48649ccbb649d3 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -7,6 +7,7 @@
 #include <linux/mm.h>
 #include <linux/slab.h>
 #include <linux/sched/autogroup.h>
+#include <linux/sched/mm.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/capability.h>
diff --git a/kernel/fork.c b/kernel/fork.c
index 234f4bfb8efd03..1875468e2a811a 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -13,6 +13,7 @@
 
 #include <linux/slab.h>
 #include <linux/sched/autogroup.h>
+#include <linux/sched/mm.h>
 #include <linux/init.h>
 #include <linux/unistd.h>
 #include <linux/module.h>
diff --git a/kernel/futex.c b/kernel/futex.c
index 8ddcf9ea953c98..229a744b1781be 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -62,6 +62,7 @@
 #include <linux/ptrace.h>
 #include <linux/sched/rt.h>
 #include <linux/sched/wake_q.h>
+#include <linux/sched/mm.h>
 #include <linux/hugetlb.h>
 #include <linux/freezer.h>
 #include <linux/bootmem.h>
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 49ba7c1ade9d07..6fe5a289791242 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -10,6 +10,7 @@
 #include <linux/capability.h>
 #include <linux/export.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/errno.h>
 #include <linux/mm.h>
 #include <linux/highmem.h>
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 4d386461f13a0d..e2307a6c29f13d 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -5,6 +5,7 @@
 #include <linux/sched/rt.h>
 #include <linux/sched/clock.h>
 #include <linux/sched/wake_q.h>
+#include <linux/sched/mm.h>
 #include <linux/u64_stats_sync.h>
 #include <linux/sched/deadline.h>
 #include <linux/kernel_stat.h>
diff --git a/kernel/sys.c b/kernel/sys.c
index cfe82ae62423ed..dc71ec1e196724 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -51,6 +51,7 @@
 #include <linux/sched.h>
 #include <linux/sched/autogroup.h>
 #include <linux/sched/loadavg.h>
+#include <linux/sched/mm.h>
 #include <linux/rcupdate.h>
 #include <linux/uidgid.h>
 #include <linux/cred.h>
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 107afca9764950..02a4aeb22c4785 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -9,6 +9,7 @@
 #include <linux/mutex.h>
 #include <linux/ftrace.h>
 #include <linux/sched/clock.h>
+#include <linux/sched/mm.h>
 
 #include "trace_output.h"
 
diff --git a/kernel/tsacct.c b/kernel/tsacct.c
index 5c21f053505655..571a2d3821d857 100644
--- a/kernel/tsacct.c
+++ b/kernel/tsacct.c
@@ -18,6 +18,7 @@
 
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/tsacct_kern.h>
 #include <linux/acct.h>
 #include <linux/jiffies.h>
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 34bce5c308e3b1..212a2afe88307b 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -2,6 +2,7 @@
 
 #include <linux/mm.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/mmu_notifier.h>
 #include <linux/rmap.h>
 #include <linux/swap.h>
diff --git a/mm/ksm.c b/mm/ksm.c
index 520e4c37fec738..5632c9d0245786 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -19,6 +19,7 @@
 #include <linux/fs.h>
 #include <linux/mman.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/rwsem.h>
 #include <linux/pagemap.h>
 #include <linux/rmap.h>
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 45867e439d31d7..c52ec893e241cf 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -35,6 +35,7 @@
 #include <linux/memcontrol.h>
 #include <linux/cgroup.h>
 #include <linux/mm.h>
+#include <linux/sched/mm.h>
 #include <linux/shmem_fs.h>
 #include <linux/hugetlb.h>
 #include <linux/pagemap.h>
diff --git a/mm/memory.c b/mm/memory.c
index 14fc0b40f0bb6c..b0495ec74d29a7 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -40,6 +40,7 @@
 
 #include <linux/kernel_stat.h>
 #include <linux/mm.h>
+#include <linux/sched/mm.h>
 #include <linux/hugetlb.h>
 #include <linux/mman.h>
 #include <linux/swap.h>
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 1e7873e40c9a16..90892416ed75f9 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -73,6 +73,7 @@
 #include <linux/hugetlb.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/nodemask.h>
 #include <linux/cpuset.h>
 #include <linux/slab.h>
diff --git a/mm/migrate.c b/mm/migrate.c
index 2c63ac06791bbd..9a0897a14d37be 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -40,6 +40,7 @@
 #include <linux/mmu_notifier.h>
 #include <linux/page_idle.h>
 #include <linux/page_owner.h>
+#include <linux/sched/mm.h>
 
 #include <asm/tlbflush.h>
 
diff --git a/mm/mmu_context.c b/mm/mmu_context.c
index daf67bb02b4af8..8888b124a386c5 100644
--- a/mm/mmu_context.c
+++ b/mm/mmu_context.c
@@ -5,6 +5,7 @@
 
 #include <linux/mm.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/mmu_context.h>
 #include <linux/export.h>
 
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index 32bc9f2ff7eb93..a7652acd2ab93c 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -17,6 +17,7 @@
 #include <linux/srcu.h>
 #include <linux/rcupdate.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/slab.h>
 
 /* global SRCU for all MMs */
diff --git a/mm/nommu.c b/mm/nommu.c
index aae06e854552dd..79abed514a4be9 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -17,6 +17,7 @@
 
 #include <linux/export.h>
 #include <linux/mm.h>
+#include <linux/sched/mm.h>
 #include <linux/vmacache.h>
 #include <linux/mman.h>
 #include <linux/swap.h>
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 51c091849dcb65..d2893661869893 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -22,6 +22,7 @@
 #include <linux/err.h>
 #include <linux/gfp.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/swap.h>
 #include <linux/timex.h>
 #include <linux/jiffies.h>
diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c
index 84d0c7eada2b50..8973cd231ecee9 100644
--- a/mm/process_vm_access.c
+++ b/mm/process_vm_access.c
@@ -12,6 +12,7 @@
 #include <linux/mm.h>
 #include <linux/uio.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/highmem.h>
 #include <linux/ptrace.h>
 #include <linux/slab.h>
diff --git a/mm/rmap.c b/mm/rmap.c
index 8774791e28099b..0367a0506667ce 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -46,6 +46,7 @@
  */
 
 #include <linux/mm.h>
+#include <linux/sched/mm.h>
 #include <linux/pagemap.h>
 #include <linux/swap.h>
 #include <linux/swapops.h>
diff --git a/mm/swapfile.c b/mm/swapfile.c
index fadc6a1c0da0b2..ff2bf3f61b1419 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -6,6 +6,7 @@
  */
 
 #include <linux/mm.h>
+#include <linux/sched/mm.h>
 #include <linux/hugetlb.h>
 #include <linux/mman.h>
 #include <linux/slab.h>
diff --git a/mm/util.c b/mm/util.c
index b8f538863b5a19..14f4e13323e236 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -5,6 +5,7 @@
 #include <linux/export.h>
 #include <linux/err.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/security.h>
 #include <linux/swap.h>
 #include <linux/swapops.h>
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index 2366177172f67c..bb298a200cd3f2 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -24,6 +24,7 @@
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/mmu_context.h>
+#include <linux/sched/mm.h>
 
 #include "async_pf.h"
 #include <trace/events/kvm.h>
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 35f71409d9ee4d..dd5de09bf36251 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -33,6 +33,7 @@
 #include <linux/syscore_ops.h>
 #include <linux/cpu.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/cpumask.h>
 #include <linux/smp.h>
 #include <linux/anon_inodes.h>

From f7ccbae45c5e2c1077654b0e857e7efb1aa31c92 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:30 +0100
Subject: [PATCH 0510/1911] sched/headers: Prepare for new header dependencies
 before moving code to <linux/sched/coredump.h>

We are going to split <linux/sched/coredump.h> out of <linux/sched.h>, which
will have to be picked up from other headers and a couple of .c files.

Create a trivial placeholder <linux/sched/coredump.h> file that just
maps to <linux/sched.h> to make this patch obviously correct and
bisectable.

Include the new header in the files that are going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 fs/binfmt_elf.c                | 1 +
 fs/binfmt_elf_fdpic.c          | 1 +
 fs/coredump.c                  | 1 +
 fs/exec.c                      | 1 +
 fs/proc/base.c                 | 1 +
 fs/proc/internal.h             | 1 +
 include/linux/khugepaged.h     | 3 ++-
 include/linux/ksm.h            | 1 +
 include/linux/sched/coredump.h | 6 ++++++
 kernel/cred.c                  | 1 +
 kernel/events/uprobes.c        | 1 +
 kernel/fork.c                  | 1 +
 kernel/ptrace.c                | 1 +
 kernel/sys.c                   | 1 +
 kernel/sysctl.c                | 1 +
 mm/huge_memory.c               | 1 +
 mm/khugepaged.c                | 1 +
 mm/ksm.c                       | 1 +
 mm/memory.c                    | 1 +
 mm/oom_kill.c                  | 1 +
 20 files changed, 26 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/sched/coredump.h

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 443a6f537d569f..fbbe52e1250e36 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -35,6 +35,7 @@
 #include <linux/utsname.h>
 #include <linux/coredump.h>
 #include <linux/sched.h>
+#include <linux/sched/coredump.h>
 #include <linux/dax.h>
 #include <linux/uaccess.h>
 #include <asm/param.h>
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index ffca4bbc3d63a1..222873bb689cc7 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -35,6 +35,7 @@
 #include <linux/elf-fdpic.h>
 #include <linux/elfcore.h>
 #include <linux/coredump.h>
+#include <linux/sched/coredump.h>
 #include <linux/dax.h>
 
 #include <linux/uaccess.h>
diff --git a/fs/coredump.c b/fs/coredump.c
index ae6b05629ca174..23a539b292255b 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -16,6 +16,7 @@
 #include <linux/personality.h>
 #include <linux/binfmts.h>
 #include <linux/coredump.h>
+#include <linux/sched/coredump.h>
 #include <linux/utsname.h>
 #include <linux/pid_namespace.h>
 #include <linux/module.h>
diff --git a/fs/exec.c b/fs/exec.c
index 8929da96cca170..9c80d011594eea 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -33,6 +33,7 @@
 #include <linux/string.h>
 #include <linux/init.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/coredump.h>
 #include <linux/pagemap.h>
 #include <linux/perf_event.h>
 #include <linux/highmem.h>
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 9efe12376af1ba..4c5fa704e38c51 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -87,6 +87,7 @@
 #include <linux/slab.h>
 #include <linux/sched/autogroup.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/coredump.h>
 #include <linux/flex_array.h>
 #include <linux/posix-timers.h>
 #ifdef CONFIG_HARDWALL
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 5d6960f5f1c03c..550e8b183abece 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -14,6 +14,7 @@
 #include <linux/spinlock.h>
 #include <linux/atomic.h>
 #include <linux/binfmts.h>
+#include <linux/sched/coredump.h>
 
 struct ctl_table_header;
 struct mempolicy;
diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h
index 1e032a1ddb3eaa..5d9a400af5091f 100644
--- a/include/linux/khugepaged.h
+++ b/include/linux/khugepaged.h
@@ -1,7 +1,8 @@
 #ifndef _LINUX_KHUGEPAGED_H
 #define _LINUX_KHUGEPAGED_H
 
-#include <linux/sched.h> /* MMF_VM_HUGEPAGE */
+#include <linux/sched/coredump.h> /* MMF_VM_HUGEPAGE */
+
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 extern struct attribute_group khugepaged_attr_group;
diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index 481c8c4627ca22..e1cfda4bee588d 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -12,6 +12,7 @@
 #include <linux/pagemap.h>
 #include <linux/rmap.h>
 #include <linux/sched.h>
+#include <linux/sched/coredump.h>
 
 struct stable_node;
 struct mem_cgroup;
diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h
new file mode 100644
index 00000000000000..ab81e564e07697
--- /dev/null
+++ b/include/linux/sched/coredump.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SCHED_COREDUMP_H
+#define _LINUX_SCHED_COREDUMP_H
+
+#include <linux/sched.h>
+
+#endif /* _LINUX_SCHED_COREDUMP_H */
diff --git a/kernel/cred.c b/kernel/cred.c
index 5f264fb5737dcd..2bc66075740fdd 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -12,6 +12,7 @@
 #include <linux/cred.h>
 #include <linux/slab.h>
 #include <linux/sched.h>
+#include <linux/sched/coredump.h>
 #include <linux/key.h>
 #include <linux/keyctl.h>
 #include <linux/init_task.h>
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 62f2dbd13efc1e..0e137f98a50c30 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -28,6 +28,7 @@
 #include <linux/slab.h>
 #include <linux/sched.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/coredump.h>
 #include <linux/export.h>
 #include <linux/rmap.h>		/* anon_vma_prepare */
 #include <linux/mmu_notifier.h>	/* set_pte_at_notify */
diff --git a/kernel/fork.c b/kernel/fork.c
index 1875468e2a811a..7332448b668a28 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -14,6 +14,7 @@
 #include <linux/slab.h>
 #include <linux/sched/autogroup.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/coredump.h>
 #include <linux/init.h>
 #include <linux/unistd.h>
 #include <linux/module.h>
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 6fe5a289791242..bcdbdc19eb353e 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -11,6 +11,7 @@
 #include <linux/export.h>
 #include <linux/sched.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/coredump.h>
 #include <linux/errno.h>
 #include <linux/mm.h>
 #include <linux/highmem.h>
diff --git a/kernel/sys.c b/kernel/sys.c
index dc71ec1e196724..e2d743fb7f55f0 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -52,6 +52,7 @@
 #include <linux/sched/autogroup.h>
 #include <linux/sched/loadavg.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/coredump.h>
 #include <linux/rcupdate.h>
 #include <linux/uidgid.h>
 #include <linux/cred.h>
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index bb260ceb371847..acf0a5a06da7c0 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -63,6 +63,7 @@
 #include <linux/capability.h>
 #include <linux/binfmts.h>
 #include <linux/sched/sysctl.h>
+#include <linux/sched/coredump.h>
 #include <linux/kexec.h>
 #include <linux/bpf.h>
 #include <linux/mount.h>
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 71e3dede95b424..dee92fa26bbb8e 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -9,6 +9,7 @@
 
 #include <linux/mm.h>
 #include <linux/sched.h>
+#include <linux/sched/coredump.h>
 #include <linux/highmem.h>
 #include <linux/hugetlb.h>
 #include <linux/mmu_notifier.h>
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 212a2afe88307b..ba40b7f673f4dd 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -3,6 +3,7 @@
 #include <linux/mm.h>
 #include <linux/sched.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/coredump.h>
 #include <linux/mmu_notifier.h>
 #include <linux/rmap.h>
 #include <linux/swap.h>
diff --git a/mm/ksm.c b/mm/ksm.c
index 5632c9d0245786..19b4f2dea7a591 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -20,6 +20,7 @@
 #include <linux/mman.h>
 #include <linux/sched.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/coredump.h>
 #include <linux/rwsem.h>
 #include <linux/pagemap.h>
 #include <linux/rmap.h>
diff --git a/mm/memory.c b/mm/memory.c
index b0495ec74d29a7..6fc918b2c459eb 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -41,6 +41,7 @@
 #include <linux/kernel_stat.h>
 #include <linux/mm.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/coredump.h>
 #include <linux/hugetlb.h>
 #include <linux/mman.h>
 #include <linux/swap.h>
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index d2893661869893..69f75b0146076e 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -23,6 +23,7 @@
 #include <linux/gfp.h>
 #include <linux/sched.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/coredump.h>
 #include <linux/swap.h>
 #include <linux/timex.h>
 #include <linux/jiffies.h>

From 3f07c0144132e4f59d88055ac8ff3e691a5fa2b8 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:30 +0100
Subject: [PATCH 0511/1911] sched/headers: Prepare for new header dependencies
 before moving code to <linux/sched/signal.h>

We are going to split <linux/sched/signal.h> out of <linux/sched.h>, which
will have to be picked up from other headers and a couple of .c files.

Create a trivial placeholder <linux/sched/signal.h> file that just
maps to <linux/sched.h> to make this patch obviously correct and
bisectable.

Include the new header in the files that are going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/alpha/kernel/osf_sys.c                                 | 2 +-
 arch/alpha/kernel/signal.c                                  | 2 +-
 arch/alpha/kernel/traps.c                                   | 2 +-
 arch/alpha/mm/fault.c                                       | 2 +-
 arch/arc/kernel/traps.c                                     | 2 +-
 arch/arc/mm/fault.c                                         | 2 +-
 arch/arm/kernel/ptrace.c                                    | 2 +-
 arch/arm/kernel/traps.c                                     | 2 +-
 arch/arm/mm/alignment.c                                     | 2 +-
 arch/arm/mm/fault.c                                         | 2 +-
 arch/arm/mm/init.c                                          | 1 +
 arch/arm/mm/mmap.c                                          | 2 +-
 arch/arm/vfp/vfpmodule.c                                    | 2 +-
 arch/arm64/kernel/fpsimd.c                                  | 2 +-
 arch/arm64/kernel/ptrace.c                                  | 2 +-
 arch/arm64/kernel/traps.c                                   | 2 +-
 arch/arm64/mm/fault.c                                       | 2 +-
 arch/arm64/mm/mmap.c                                        | 2 +-
 arch/avr32/kernel/traps.c                                   | 2 +-
 arch/blackfin/kernel/trace.c                                | 2 +-
 arch/blackfin/kernel/traps.c                                | 1 +
 arch/cris/mm/fault.c                                        | 1 +
 arch/frv/kernel/traps.c                                     | 2 +-
 arch/h8300/kernel/ptrace_s.c                                | 2 +-
 arch/hexagon/kernel/traps.c                                 | 2 +-
 arch/hexagon/mm/vm_fault.c                                  | 1 +
 arch/ia64/kernel/asm-offsets.c                              | 2 +-
 arch/ia64/kernel/brl_emu.c                                  | 2 +-
 arch/ia64/kernel/mca.c                                      | 2 +-
 arch/ia64/kernel/traps.c                                    | 2 +-
 arch/ia64/kernel/unaligned.c                                | 2 +-
 arch/ia64/mm/fault.c                                        | 2 +-
 arch/ia64/mm/init.c                                         | 1 +
 arch/mips/kernel/branch.c                                   | 2 +-
 arch/mips/kernel/signal_o32.c                               | 1 +
 arch/mips/mm/mmap.c                                         | 2 +-
 arch/mips/sgi-ip22/ip22-berr.c                              | 2 +-
 arch/mips/sgi-ip22/ip22-reset.c                             | 2 +-
 arch/mn10300/kernel/fpu.c                                   | 2 ++
 arch/openrisc/mm/fault.c                                    | 2 +-
 arch/parisc/kernel/sys_parisc.c                             | 1 +
 arch/parisc/kernel/unaligned.c                              | 2 +-
 arch/parisc/math-emu/driver.c                               | 3 ++-
 arch/powerpc/kvm/book3s_64_vio.c                            | 1 +
 arch/powerpc/mm/mmap.c                                      | 2 +-
 arch/powerpc/mm/mmu_context_iommu.c                         | 2 +-
 arch/powerpc/platforms/cell/spufs/fault.c                   | 2 +-
 arch/powerpc/xmon/xmon.c                                    | 2 +-
 arch/s390/kernel/nmi.c                                      | 3 +++
 arch/s390/mm/mmap.c                                         | 1 +
 arch/score/kernel/traps.c                                   | 2 +-
 arch/sh/kernel/cpu/sh2a/fpu.c                               | 2 +-
 arch/sh/kernel/hw_breakpoint.c                              | 1 +
 arch/sh/kernel/traps.c                                      | 2 ++
 arch/sh/math-emu/math.c                                     | 2 +-
 arch/sh/mm/asids-debugfs.c                                  | 2 ++
 arch/sh/mm/fault.c                                          | 1 +
 arch/sparc/kernel/sys_sparc_32.c                            | 2 +-
 arch/sparc/kernel/sys_sparc_64.c                            | 2 +-
 arch/sparc/kernel/unaligned_32.c                            | 2 +-
 arch/tile/mm/mmap.c                                         | 2 +-
 arch/um/drivers/line.c                                      | 3 ++-
 arch/um/kernel/reboot.c                                     | 2 +-
 arch/um/kernel/skas/mmu.c                                   | 3 ++-
 arch/um/kernel/tlb.c                                        | 3 ++-
 arch/um/kernel/trap.c                                       | 2 +-
 arch/unicore32/kernel/fpu-ucf64.c                           | 2 +-
 arch/unicore32/kernel/traps.c                               | 1 +
 arch/unicore32/mm/fault.c                                   | 2 +-
 arch/x86/entry/vsyscall/vsyscall_64.c                       | 1 +
 arch/x86/kernel/cpu/intel_rdt_rdtgroup.c                    | 2 +-
 arch/x86/kvm/mmu.c                                          | 1 +
 arch/x86/mm/mmap.c                                          | 2 +-
 arch/xtensa/kernel/traps.c                                  | 2 +-
 drivers/android/binder.c                                    | 2 +-
 drivers/block/drbd/drbd_int.h                               | 2 +-
 drivers/char/snsc_event.c                                   | 2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_events.c                     | 2 +-
 drivers/gpu/drm/drm_lock.c                                  | 2 ++
 drivers/gpu/drm/ttm/ttm_lock.c                              | 2 +-
 drivers/infiniband/core/umem.c                              | 2 +-
 drivers/infiniband/hw/hfi1/user_pages.c                     | 2 +-
 drivers/infiniband/hw/qib/qib_user_pages.c                  | 1 +
 drivers/infiniband/hw/usnic/usnic_uiom.c                    | 2 +-
 drivers/isdn/i4l/isdn_tty.c                                 | 1 +
 drivers/isdn/mISDN/l1oip_core.c                             | 2 ++
 drivers/md/md.c                                             | 1 +
 drivers/md/raid1.c                                          | 3 +++
 drivers/md/raid5.c                                          | 2 ++
 drivers/misc/genwqe/card_dev.c                              | 2 +-
 drivers/misc/mic/cosm/cosm_scif_server.c                    | 2 ++
 drivers/misc/mic/cosm_client/cosm_scif_client.c             | 2 ++
 drivers/misc/mic/scif/scif_rma.c                            | 1 +
 drivers/net/slip/slip.c                                     | 2 +-
 drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c     | 2 +-
 drivers/parisc/power.c                                      | 2 +-
 drivers/ps3/ps3-sys-manager.c                               | 1 +
 drivers/s390/char/fs3270.c                                  | 1 +
 drivers/s390/char/keyboard.c                                | 2 +-
 drivers/scsi/bnx2fc/bnx2fc.h                                | 2 +-
 drivers/scsi/bnx2i/bnx2i.h                                  | 2 +-
 drivers/scsi/libiscsi.c                                     | 1 +
 drivers/staging/android/lowmemorykiller.c                   | 2 +-
 drivers/staging/lustre/lnet/libcfs/linux/linux-prim.c       | 2 +-
 drivers/staging/rtl8188eu/include/osdep_service.h           | 2 +-
 drivers/staging/rtl8712/osdep_service.h                     | 2 +-
 drivers/staging/rtl8712/rtl8712_cmd.c                       | 1 +
 .../staging/vc04_services/interface/vchiq_arm/vchiq_arm.c   | 1 +
 .../staging/vc04_services/interface/vchiq_arm/vchiq_util.h  | 2 +-
 drivers/target/iscsi/iscsi_target.c                         | 1 +
 drivers/target/iscsi/iscsi_target_erl0.c                    | 2 ++
 drivers/target/iscsi/iscsi_target_login.c                   | 1 +
 drivers/target/iscsi/iscsi_target_nego.c                    | 1 +
 drivers/tty/pty.c                                           | 2 +-
 drivers/tty/sysrq.c                                         | 2 +-
 drivers/tty/tty_io.c                                        | 2 +-
 drivers/tty/vt/keyboard.c                                   | 2 +-
 drivers/tty/vt/vt.c                                         | 2 +-
 drivers/tty/vt/vt_ioctl.c                                   | 2 +-
 drivers/usb/atm/usbatm.c                                    | 2 +-
 drivers/usb/core/devio.c                                    | 1 +
 drivers/usb/gadget/function/f_mass_storage.c                | 1 +
 drivers/vfio/vfio_iommu_spapr_tce.c                         | 1 +
 drivers/vfio/vfio_iommu_type1.c                             | 2 +-
 drivers/w1/w1_family.c                                      | 2 +-
 drivers/w1/w1_int.c                                         | 1 +
 fs/attr.c                                                   | 1 +
 fs/autofs4/waitq.c                                          | 1 +
 fs/cifs/connect.c                                           | 1 +
 fs/coda/upcall.c                                            | 2 +-
 fs/coredump.c                                               | 2 +-
 fs/exec.c                                                   | 1 +
 fs/file.c                                                   | 2 +-
 fs/fs_struct.c                                              | 2 +-
 fs/jffs2/background.c                                       | 2 +-
 fs/lockd/svc.c                                              | 2 +-
 fs/ncpfs/inode.c                                            | 1 +
 fs/ncpfs/sock.c                                             | 1 +
 fs/nfs/callback.c                                           | 1 +
 fs/nfsd/nfssvc.c                                            | 2 +-
 fs/proc/fd.c                                                | 2 +-
 fs/select.c                                                 | 4 ++--
 include/linux/oom.h                                         | 2 +-
 include/linux/ptrace.h                                      | 1 +
 include/linux/sched/signal.h                                | 6 ++++++
 include/linux/signalfd.h                                    | 2 +-
 include/linux/taskstats_kern.h                              | 2 +-
 ipc/mqueue.c                                                | 1 +
 kernel/bpf/syscall.c                                        | 1 +
 kernel/cpu.c                                                | 2 +-
 kernel/debug/gdbstub.c                                      | 1 +
 kernel/debug/kdb/kdb_bt.c                                   | 2 +-
 kernel/hung_task.c                                          | 2 ++
 kernel/rcu/update.c                                         | 2 +-
 kernel/sched/sched.h                                        | 1 +
 kernel/time/itimer.c                                        | 1 +
 kernel/time/posix-cpu-timers.c                              | 2 +-
 kernel/time/tick-sched.c                                    | 2 +-
 kernel/tracepoint.c                                         | 2 +-
 kernel/tsacct.c                                             | 2 +-
 kernel/user_namespace.c                                     | 1 +
 lib/is_single_threaded.c                                    | 3 +--
 mm/filemap.c                                                | 1 +
 mm/kmemleak.c                                               | 2 +-
 mm/memory-failure.c                                         | 2 +-
 mm/vmacache.c                                               | 2 +-
 net/9p/client.c                                             | 2 +-
 net/atm/common.c                                            | 2 +-
 net/ax25/af_ax25.c                                          | 2 +-
 net/caif/caif_socket.c                                      | 2 +-
 net/core/stream.c                                           | 1 +
 net/decnet/af_decnet.c                                      | 2 +-
 net/irda/af_irda.c                                          | 1 +
 net/netrom/af_netrom.c                                      | 2 +-
 net/rose/af_rose.c                                          | 2 +-
 net/sctp/socket.c                                           | 1 +
 net/sunrpc/svc.c                                            | 2 +-
 net/unix/af_unix.c                                          | 2 +-
 net/x25/af_x25.c                                            | 2 +-
 security/selinux/hooks.c                                    | 2 +-
 180 files changed, 204 insertions(+), 121 deletions(-)
 create mode 100644 include/linux/sched/signal.h

diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index 9d27a7d333dca2..568ca29f2ad90f 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -11,7 +11,7 @@
  */
 
 #include <linux/errno.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
diff --git a/arch/alpha/kernel/signal.c b/arch/alpha/kernel/signal.c
index 17308f9253066a..b8221f112eeecd 100644
--- a/arch/alpha/kernel/signal.c
+++ b/arch/alpha/kernel/signal.c
@@ -6,7 +6,7 @@
  *  1997-11-02  Modified for POSIX.1b signals by Richard Henderson
  */
 
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/kernel.h>
 #include <linux/signal.h>
 #include <linux/errno.h>
diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c
index af2994206b4b8b..6448ab00043d23 100644
--- a/arch/alpha/kernel/traps.c
+++ b/arch/alpha/kernel/traps.c
@@ -10,7 +10,7 @@
 
 #include <linux/jiffies.h>
 #include <linux/mm.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/tty.h>
 #include <linux/delay.h>
 #include <linux/extable.h>
diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c
index 47948b4dd1574b..c25e8827e7cd03 100644
--- a/arch/alpha/mm/fault.c
+++ b/arch/alpha/mm/fault.c
@@ -4,7 +4,7 @@
  *  Copyright (C) 1995  Linus Torvalds
  */
 
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <asm/io.h>
diff --git a/arch/arc/kernel/traps.c b/arch/arc/kernel/traps.c
index c927aa84e652e0..ff83e78d0cfb55 100644
--- a/arch/arc/kernel/traps.c
+++ b/arch/arc/kernel/traps.c
@@ -13,7 +13,7 @@
  * Rahul Trivedi: Codito Technologies 2004
  */
 
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/kdebug.h>
 #include <linux/uaccess.h>
 #include <linux/ptrace.h>
diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c
index e94e5aa33985c5..162c9752887251 100644
--- a/arch/arc/mm/fault.c
+++ b/arch/arc/mm/fault.c
@@ -9,7 +9,7 @@
 
 #include <linux/signal.h>
 #include <linux/interrupt.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/errno.h>
 #include <linux/ptrace.h>
 #include <linux/uaccess.h>
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index ae738a6319f6a3..46f7bab81c40a4 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -10,7 +10,7 @@
  * published by the Free Software Foundation.
  */
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/mm.h>
 #include <linux/elf.h>
 #include <linux/smp.h>
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 9688ec0c6ef43f..dc5f1a22b3c9ad 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -24,7 +24,7 @@
 #include <linux/bug.h>
 #include <linux/delay.h>
 #include <linux/init.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/irq.h>
 
 #include <linux/atomic.h>
diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c
index 7d5f4c736a16b4..d7fe2de9cc9e6a 100644
--- a/arch/arm/mm/alignment.c
+++ b/arch/arm/mm/alignment.c
@@ -19,7 +19,7 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/init.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/uaccess.h>
 
 #include <asm/cp15.h>
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index c2b5b9892fd17d..520c7778d330c4 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -16,7 +16,7 @@
 #include <linux/kprobes.h>
 #include <linux/uaccess.h>
 #include <linux/page-flags.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/highmem.h>
 #include <linux/perf_event.h>
 
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index bf4d3bc41a7a85..2a9040dcf47e28 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -13,6 +13,7 @@
 #include <linux/init.h>
 #include <linux/bootmem.h>
 #include <linux/mman.h>
+#include <linux/sched/signal.h>
 #include <linux/export.h>
 #include <linux/nodemask.h>
 #include <linux/initrd.h>
diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c
index 66353caa35b9f7..d448f9cd771540 100644
--- a/arch/arm/mm/mmap.c
+++ b/arch/arm/mm/mmap.c
@@ -5,7 +5,7 @@
 #include <linux/mm.h>
 #include <linux/mman.h>
 #include <linux/shm.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/io.h>
 #include <linux/personality.h>
 #include <linux/random.h>
diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c
index 569d5a650a4a2c..a71a48e71fffa8 100644
--- a/arch/arm/vfp/vfpmodule.c
+++ b/arch/arm/vfp/vfpmodule.c
@@ -15,7 +15,7 @@
 #include <linux/kernel.h>
 #include <linux/notifier.h>
 #include <linux/signal.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/smp.h>
 #include <linux/init.h>
 #include <linux/uaccess.h>
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index b883f1f75216ae..06da8ea16bbe5e 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -21,7 +21,7 @@
 #include <linux/cpu_pm.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/signal.h>
 #include <linux/hardirq.h>
 
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index a22161ccf4470a..64fc32ea342271 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -22,7 +22,7 @@
 #include <linux/audit.h>
 #include <linux/compat.h>
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/ptrace.h>
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 7d47c2cdfd9315..5b8779a849a291 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -29,7 +29,7 @@
 #include <linux/kexec.h>
 #include <linux/delay.h>
 #include <linux/init.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/syscalls.h>
 
 #include <asm/atomic.h>
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 81283851c9af9a..30dd60ab8a6511 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -26,7 +26,7 @@
 #include <linux/kprobes.h>
 #include <linux/uaccess.h>
 #include <linux/page-flags.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/highmem.h>
 #include <linux/perf_event.h>
 #include <linux/preempt.h>
diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c
index 01c171723bb33b..1e0a2650c88b59 100644
--- a/arch/arm64/mm/mmap.c
+++ b/arch/arm64/mm/mmap.c
@@ -22,7 +22,7 @@
 #include <linux/mman.h>
 #include <linux/export.h>
 #include <linux/shm.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/io.h>
 #include <linux/personality.h>
 #include <linux/random.h>
diff --git a/arch/avr32/kernel/traps.c b/arch/avr32/kernel/traps.c
index eb4a3fcfbaff17..50b54132502504 100644
--- a/arch/avr32/kernel/traps.c
+++ b/arch/avr32/kernel/traps.c
@@ -14,7 +14,7 @@
 #include <linux/extable.h>
 #include <linux/module.h>	/* print_modules */
 #include <linux/notifier.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/uaccess.h>
 
 #include <asm/addrspace.h>
diff --git a/arch/blackfin/kernel/trace.c b/arch/blackfin/kernel/trace.c
index 719dd796c12cb1..01e546ad2f7acd 100644
--- a/arch/blackfin/kernel/trace.c
+++ b/arch/blackfin/kernel/trace.c
@@ -11,7 +11,7 @@
 #include <linux/thread_info.h>
 #include <linux/mm.h>
 #include <linux/oom.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/uaccess.h>
 #include <linux/module.h>
 #include <linux/kallsyms.h>
diff --git a/arch/blackfin/kernel/traps.c b/arch/blackfin/kernel/traps.c
index 1ed85ddadc0d25..32676d7721b171 100644
--- a/arch/blackfin/kernel/traps.c
+++ b/arch/blackfin/kernel/traps.c
@@ -9,6 +9,7 @@
 #include <linux/bug.h>
 #include <linux/uaccess.h>
 #include <linux/module.h>
+#include <linux/sched/signal.h>
 #include <asm/traps.h>
 #include <asm/cplb.h>
 #include <asm/blackfin.h>
diff --git a/arch/cris/mm/fault.c b/arch/cris/mm/fault.c
index 94183d3639ef5c..1fca464f1b9e75 100644
--- a/arch/cris/mm/fault.c
+++ b/arch/cris/mm/fault.c
@@ -8,6 +8,7 @@
 #include <linux/interrupt.h>
 #include <linux/extable.h>
 #include <linux/wait.h>
+#include <linux/sched/signal.h>
 #include <linux/uaccess.h>
 #include <arch/system.h>
 
diff --git a/arch/frv/kernel/traps.c b/arch/frv/kernel/traps.c
index 31221fb4348e22..43c134d6081ce5 100644
--- a/arch/frv/kernel/traps.c
+++ b/arch/frv/kernel/traps.c
@@ -9,7 +9,7 @@
  * 2 of the License, or (at your option) any later version.
  */
 
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/signal.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
diff --git a/arch/h8300/kernel/ptrace_s.c b/arch/h8300/kernel/ptrace_s.c
index ef5a9c13e76d3b..c0af930052c019 100644
--- a/arch/h8300/kernel/ptrace_s.c
+++ b/arch/h8300/kernel/ptrace_s.c
@@ -10,7 +10,7 @@
  */
 
 #include <linux/linkage.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/errno.h>
 #include <asm/ptrace.h>
 
diff --git a/arch/hexagon/kernel/traps.c b/arch/hexagon/kernel/traps.c
index 110dab152f82c2..4496bcf605efbd 100644
--- a/arch/hexagon/kernel/traps.c
+++ b/arch/hexagon/kernel/traps.c
@@ -19,7 +19,7 @@
  */
 
 #include <linux/init.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/module.h>
 #include <linux/kallsyms.h>
 #include <linux/kdebug.h>
diff --git a/arch/hexagon/mm/vm_fault.c b/arch/hexagon/mm/vm_fault.c
index 489875fd2be459..3eec33c5cfd716 100644
--- a/arch/hexagon/mm/vm_fault.c
+++ b/arch/hexagon/mm/vm_fault.c
@@ -28,6 +28,7 @@
 #include <asm/traps.h>
 #include <linux/uaccess.h>
 #include <linux/mm.h>
+#include <linux/sched/signal.h>
 #include <linux/signal.h>
 #include <linux/extable.h>
 #include <linux/hardirq.h>
diff --git a/arch/ia64/kernel/asm-offsets.c b/arch/ia64/kernel/asm-offsets.c
index 60ef83e6db71eb..8786c8b4f187ca 100644
--- a/arch/ia64/kernel/asm-offsets.c
+++ b/arch/ia64/kernel/asm-offsets.c
@@ -6,7 +6,7 @@
 
 #define ASM_OFFSETS_C 1
 
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/pid.h>
 #include <linux/clocksource.h>
 #include <linux/kbuild.h>
diff --git a/arch/ia64/kernel/brl_emu.c b/arch/ia64/kernel/brl_emu.c
index 8682df6263d6d3..987b11be0021db 100644
--- a/arch/ia64/kernel/brl_emu.c
+++ b/arch/ia64/kernel/brl_emu.c
@@ -8,7 +8,7 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/uaccess.h>
 #include <asm/processor.h>
 
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index 9509cc73b9c641..5ac51069e45342 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -72,7 +72,7 @@
 #include <linux/jiffies.h>
 #include <linux/types.h>
 #include <linux/init.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/bootmem.h>
diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c
index 8981ce98afb365..48ba46b025e17e 100644
--- a/arch/ia64/kernel/traps.c
+++ b/arch/ia64/kernel/traps.c
@@ -9,7 +9,7 @@
 
 #include <linux/kernel.h>
 #include <linux/init.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/tty.h>
 #include <linux/vt_kern.h>		/* For unblank_screen() */
 #include <linux/export.h>
diff --git a/arch/ia64/kernel/unaligned.c b/arch/ia64/kernel/unaligned.c
index 99348d7f2255ce..a13680ca1e6118 100644
--- a/arch/ia64/kernel/unaligned.c
+++ b/arch/ia64/kernel/unaligned.c
@@ -15,7 +15,7 @@
  */
 #include <linux/jiffies.h>
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/tty.h>
 #include <linux/extable.h>
 #include <linux/ratelimit.h>
diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c
index 7f2feb21753c8b..15f09cfff335b9 100644
--- a/arch/ia64/mm/fault.c
+++ b/arch/ia64/mm/fault.c
@@ -4,7 +4,7 @@
  * Copyright (C) 1998-2002 Hewlett-Packard Co
  *	David Mosberger-Tang <davidm@hpl.hp.com>
  */
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/extable.h>
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 06cdaef54b2eae..8f3efa682ee848 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -12,6 +12,7 @@
 #include <linux/elf.h>
 #include <linux/memblock.h>
 #include <linux/mm.h>
+#include <linux/sched/signal.h>
 #include <linux/mmzone.h>
 #include <linux/module.h>
 #include <linux/personality.h>
diff --git a/arch/mips/kernel/branch.c b/arch/mips/kernel/branch.c
index ae037a304ee459..b11facd11c9d05 100644
--- a/arch/mips/kernel/branch.c
+++ b/arch/mips/kernel/branch.c
@@ -7,7 +7,7 @@
  * Copyright (C) 2001 MIPS Technologies, Inc.
  */
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/signal.h>
 #include <linux/export.h>
 #include <asm/branch.h>
diff --git a/arch/mips/kernel/signal_o32.c b/arch/mips/kernel/signal_o32.c
index 5e169fc5ca5c08..2b3572fb5f1b9d 100644
--- a/arch/mips/kernel/signal_o32.c
+++ b/arch/mips/kernel/signal_o32.c
@@ -11,6 +11,7 @@
 #include <linux/compiler.h>
 #include <linux/errno.h>
 #include <linux/signal.h>
+#include <linux/sched/signal.h>
 #include <linux/uaccess.h>
 
 #include <asm/abi.h>
diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c
index d6d92c02308dd8..374d71e61ef65b 100644
--- a/arch/mips/mm/mmap.c
+++ b/arch/mips/mm/mmap.c
@@ -13,7 +13,7 @@
 #include <linux/export.h>
 #include <linux/personality.h>
 #include <linux/random.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 
 unsigned long shm_align_mask = PAGE_SIZE - 1;	/* Sane caches */
 EXPORT_SYMBOL(shm_align_mask);
diff --git a/arch/mips/sgi-ip22/ip22-berr.c b/arch/mips/sgi-ip22/ip22-berr.c
index 3f6ccd53c15d5b..ff8e1935c873a7 100644
--- a/arch/mips/sgi-ip22/ip22-berr.c
+++ b/arch/mips/sgi-ip22/ip22-berr.c
@@ -6,7 +6,7 @@
 
 #include <linux/init.h>
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 
 #include <asm/addrspace.h>
 #include <asm/traps.h>
diff --git a/arch/mips/sgi-ip22/ip22-reset.c b/arch/mips/sgi-ip22/ip22-reset.c
index a36f6b87548a5a..03a39ac5ead92e 100644
--- a/arch/mips/sgi-ip22/ip22-reset.c
+++ b/arch/mips/sgi-ip22/ip22-reset.c
@@ -10,7 +10,7 @@
 #include <linux/rtc/ds1286.h>
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/notifier.h>
 #include <linux/pm.h>
 #include <linux/timer.h>
diff --git a/arch/mn10300/kernel/fpu.c b/arch/mn10300/kernel/fpu.c
index 2578b7ae7dd55e..50ce7b447fed4b 100644
--- a/arch/mn10300/kernel/fpu.c
+++ b/arch/mn10300/kernel/fpu.c
@@ -9,6 +9,8 @@
  * 2 of the Licence, or (at your option) any later version.
  */
 #include <linux/uaccess.h>
+#include <linux/sched/signal.h>
+
 #include <asm/fpu.h>
 #include <asm/elf.h>
 #include <asm/exceptions.h>
diff --git a/arch/openrisc/mm/fault.c b/arch/openrisc/mm/fault.c
index 53592a639744f4..e310ab499385c5 100644
--- a/arch/openrisc/mm/fault.c
+++ b/arch/openrisc/mm/fault.c
@@ -18,7 +18,7 @@
 #include <linux/mm.h>
 #include <linux/interrupt.h>
 #include <linux/extable.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 
 #include <linux/uaccess.h>
 #include <asm/siginfo.h>
diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c
index bf3294171230ad..ce07cd3f250729 100644
--- a/arch/parisc/kernel/sys_parisc.c
+++ b/arch/parisc/kernel/sys_parisc.c
@@ -30,6 +30,7 @@
 #include <linux/linkage.h>
 #include <linux/mm.h>
 #include <linux/mman.h>
+#include <linux/sched/signal.h>
 #include <linux/shm.h>
 #include <linux/syscalls.h>
 #include <linux/utsname.h>
diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c
index 0a21067ac0a349..a08ab481e5567d 100644
--- a/arch/parisc/kernel/unaligned.c
+++ b/arch/parisc/kernel/unaligned.c
@@ -23,7 +23,7 @@
 #include <linux/jiffies.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/signal.h>
 #include <linux/ratelimit.h>
 #include <linux/uaccess.h>
diff --git a/arch/parisc/math-emu/driver.c b/arch/parisc/math-emu/driver.c
index 09ef4136c6935e..2fb59d2e2b294b 100644
--- a/arch/parisc/math-emu/driver.c
+++ b/arch/parisc/math-emu/driver.c
@@ -27,7 +27,8 @@
  *  Copyright (C) 2001	      Hewlett-Packard <bame@debian.org>
  */
 
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
+
 #include "float.h"
 #include "math-emu.h"
 
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index ab9d14c0e4609a..3e26cd4979f936 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -24,6 +24,7 @@
 #include <linux/highmem.h>
 #include <linux/gfp.h>
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
 #include <linux/hugetlb.h>
 #include <linux/list.h>
 #include <linux/anon_inodes.h>
diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c
index 2f1e44362198d3..8013861aeaa742 100644
--- a/arch/powerpc/mm/mmap.c
+++ b/arch/powerpc/mm/mmap.c
@@ -25,7 +25,7 @@
 #include <linux/personality.h>
 #include <linux/mm.h>
 #include <linux/random.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/elf-randomize.h>
 #include <linux/security.h>
 #include <linux/mman.h>
diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c
index 7de7124ac91bf9..497130c5c74203 100644
--- a/arch/powerpc/mm/mmu_context_iommu.c
+++ b/arch/powerpc/mm/mmu_context_iommu.c
@@ -10,7 +10,7 @@
  *
  */
 
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <linux/rculist.h>
 #include <linux/vmalloc.h>
diff --git a/arch/powerpc/platforms/cell/spufs/fault.c b/arch/powerpc/platforms/cell/spufs/fault.c
index e29e4d5afa2ddd..870c0a82d560de 100644
--- a/arch/powerpc/platforms/cell/spufs/fault.c
+++ b/arch/powerpc/platforms/cell/spufs/fault.c
@@ -19,7 +19,7 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/mm.h>
 
 #include <asm/spu.h>
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 26fa03fc9f3c8c..16321ad9e70c04 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -13,7 +13,7 @@
 
 #include <linux/kernel.h>
 #include <linux/errno.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/smp.h>
 #include <linux/mm.h>
 #include <linux/reboot.h>
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index 80c093e0c6f1a3..9bf8327154eeee 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -13,6 +13,9 @@
 #include <linux/errno.h>
 #include <linux/hardirq.h>
 #include <linux/time.h>
+#include <linux/module.h>
+#include <linux/sched/signal.h>
+
 #include <linux/export.h>
 #include <asm/lowcore.h>
 #include <asm/smp.h>
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index 7ae1282d5be98d..5ea09403bb878b 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -26,6 +26,7 @@
 #include <linux/personality.h>
 #include <linux/mm.h>
 #include <linux/mman.h>
+#include <linux/sched/signal.h>
 #include <linux/random.h>
 #include <linux/compat.h>
 #include <linux/security.h>
diff --git a/arch/score/kernel/traps.c b/arch/score/kernel/traps.c
index 569ac02f68dfe5..fa624f30f783e7 100644
--- a/arch/score/kernel/traps.c
+++ b/arch/score/kernel/traps.c
@@ -24,7 +24,7 @@
  */
 
 #include <linux/extable.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 
 #include <asm/cacheflush.h>
 #include <asm/irq.h>
diff --git a/arch/sh/kernel/cpu/sh2a/fpu.c b/arch/sh/kernel/cpu/sh2a/fpu.c
index 98bbaa447c9340..352f894bece10a 100644
--- a/arch/sh/kernel/cpu/sh2a/fpu.c
+++ b/arch/sh/kernel/cpu/sh2a/fpu.c
@@ -9,7 +9,7 @@
  *
  * FIXME! These routines can be optimized in big endian case.
  */
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/signal.h>
 #include <asm/processor.h>
 #include <asm/io.h>
diff --git a/arch/sh/kernel/hw_breakpoint.c b/arch/sh/kernel/hw_breakpoint.c
index 2197fc58418658..afe965712a6940 100644
--- a/arch/sh/kernel/hw_breakpoint.c
+++ b/arch/sh/kernel/hw_breakpoint.c
@@ -11,6 +11,7 @@
  */
 #include <linux/init.h>
 #include <linux/perf_event.h>
+#include <linux/sched/signal.h>
 #include <linux/hw_breakpoint.h>
 #include <linux/percpu.h>
 #include <linux/kallsyms.h>
diff --git a/arch/sh/kernel/traps.c b/arch/sh/kernel/traps.c
index 9513fa7840aa99..3036dee854d1e9 100644
--- a/arch/sh/kernel/traps.c
+++ b/arch/sh/kernel/traps.c
@@ -8,6 +8,8 @@
 #include <linux/hardirq.h>
 #include <linux/kernel.h>
 #include <linux/kexec.h>
+#include <linux/sched/signal.h>
+
 #include <linux/extable.h>
 #include <linux/module.h>	/* print_modules */
 #include <asm/unwinder.h>
diff --git a/arch/sh/math-emu/math.c b/arch/sh/math-emu/math.c
index 5078cb809750f5..c86f4360c6cee5 100644
--- a/arch/sh/math-emu/math.c
+++ b/arch/sh/math-emu/math.c
@@ -10,7 +10,7 @@
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/types.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/signal.h>
 #include <linux/perf_event.h>
 
diff --git a/arch/sh/mm/asids-debugfs.c b/arch/sh/mm/asids-debugfs.c
index bf95fdaedd0cf1..110bd35165bfd4 100644
--- a/arch/sh/mm/asids-debugfs.c
+++ b/arch/sh/mm/asids-debugfs.c
@@ -20,6 +20,8 @@
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
 #include <linux/spinlock.h>
+#include <linux/sched/signal.h>
+
 #include <asm/processor.h>
 #include <asm/mmu_context.h>
 
diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c
index 9bf876780cef4b..6fd1bf7481c7d8 100644
--- a/arch/sh/mm/fault.c
+++ b/arch/sh/mm/fault.c
@@ -13,6 +13,7 @@
  */
 #include <linux/kernel.h>
 #include <linux/mm.h>
+#include <linux/sched/signal.h>
 #include <linux/hardirq.h>
 #include <linux/kprobes.h>
 #include <linux/perf_event.h>
diff --git a/arch/sparc/kernel/sys_sparc_32.c b/arch/sparc/kernel/sys_sparc_32.c
index fb7b185ee94171..ae49639a484e48 100644
--- a/arch/sparc/kernel/sys_sparc_32.c
+++ b/arch/sparc/kernel/sys_sparc_32.c
@@ -7,7 +7,7 @@
 
 #include <linux/errno.h>
 #include <linux/types.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/mm.h>
 #include <linux/fs.h>
 #include <linux/file.h>
diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c
index 884c70331345d8..54d3999d811937 100644
--- a/arch/sparc/kernel/sys_sparc_64.c
+++ b/arch/sparc/kernel/sys_sparc_64.c
@@ -7,7 +7,7 @@
 
 #include <linux/errno.h>
 #include <linux/types.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/fs.h>
 #include <linux/file.h>
 #include <linux/mm.h>
diff --git a/arch/sparc/kernel/unaligned_32.c b/arch/sparc/kernel/unaligned_32.c
index d20d4e3fd129d6..8367dce5f41b5f 100644
--- a/arch/sparc/kernel/unaligned_32.c
+++ b/arch/sparc/kernel/unaligned_32.c
@@ -8,7 +8,7 @@
 
 
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/mm.h>
 #include <asm/ptrace.h>
 #include <asm/processor.h>
diff --git a/arch/tile/mm/mmap.c b/arch/tile/mm/mmap.c
index ef61c597898bc0..377e312dc27eda 100644
--- a/arch/tile/mm/mmap.c
+++ b/arch/tile/mm/mmap.c
@@ -17,7 +17,7 @@
 #include <linux/mm.h>
 #include <linux/random.h>
 #include <linux/limits.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/mman.h>
 #include <linux/compat.h>
 
diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c
index 62087028a9ce1e..366e57f5e8d635 100644
--- a/arch/um/drivers/line.c
+++ b/arch/um/drivers/line.c
@@ -5,8 +5,9 @@
 
 #include <linux/irqreturn.h>
 #include <linux/kd.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
+
 #include "chan.h"
 #include <irq_kern.h>
 #include <irq_user.h>
diff --git a/arch/um/kernel/reboot.c b/arch/um/kernel/reboot.c
index b60a9f8cda7550..79218106a03365 100644
--- a/arch/um/kernel/reboot.c
+++ b/arch/um/kernel/reboot.c
@@ -3,7 +3,7 @@
  * Licensed under the GPL
  */
 
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/spinlock.h>
 #include <linux/slab.h>
 #include <linux/oom.h>
diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c
index 3943e9d7d13d2d..7a1f2a936fd10b 100644
--- a/arch/um/kernel/skas/mmu.c
+++ b/arch/um/kernel/skas/mmu.c
@@ -5,8 +5,9 @@
  */
 
 #include <linux/mm.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
+
 #include <asm/pgalloc.h>
 #include <asm/pgtable.h>
 #include <asm/sections.h>
diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
index 3777b82759bda1..37508b190106db 100644
--- a/arch/um/kernel/tlb.c
+++ b/arch/um/kernel/tlb.c
@@ -5,7 +5,8 @@
 
 #include <linux/mm.h>
 #include <linux/module.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
+
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
 #include <as-layout.h>
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index ad8f206ab5e851..9711ae4aaa6ade 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -4,7 +4,7 @@
  */
 
 #include <linux/mm.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/hardirq.h>
 #include <linux/module.h>
 #include <linux/uaccess.h>
diff --git a/arch/unicore32/kernel/fpu-ucf64.c b/arch/unicore32/kernel/fpu-ucf64.c
index a53343a90ca294..12c8c9527b8e83 100644
--- a/arch/unicore32/kernel/fpu-ucf64.c
+++ b/arch/unicore32/kernel/fpu-ucf64.c
@@ -13,7 +13,7 @@
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/signal.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/init.h>
 
 #include <asm/fpu-ucf64.h>
diff --git a/arch/unicore32/kernel/traps.c b/arch/unicore32/kernel/traps.c
index c54e32410eade6..7f5e06f9a2026d 100644
--- a/arch/unicore32/kernel/traps.c
+++ b/arch/unicore32/kernel/traps.c
@@ -14,6 +14,7 @@
  */
 #include <linux/module.h>
 #include <linux/signal.h>
+#include <linux/sched/signal.h>
 #include <linux/spinlock.h>
 #include <linux/personality.h>
 #include <linux/kallsyms.h>
diff --git a/arch/unicore32/mm/fault.c b/arch/unicore32/mm/fault.c
index b656d216a8a85d..bbefcc46a45e41 100644
--- a/arch/unicore32/mm/fault.c
+++ b/arch/unicore32/mm/fault.c
@@ -17,7 +17,7 @@
 #include <linux/kprobes.h>
 #include <linux/uaccess.h>
 #include <linux/page-flags.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/io.h>
 
 #include <asm/pgtable.h>
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index 636c4b341f36a9..df91fb393a01e1 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -27,6 +27,7 @@
 
 #include <linux/kernel.h>
 #include <linux/timer.h>
+#include <linux/sched/signal.h>
 #include <linux/syscalls.h>
 #include <linux/ratelimit.h>
 
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index 8af04afdfcb964..d48af18e7baf94 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -25,7 +25,7 @@
 #include <linux/sysfs.h>
 #include <linux/kernfs.h>
 #include <linux/seq_file.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <linux/cpu.h>
 #include <linux/task_work.h>
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 1cda35277278ab..ac7810513d0e95 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -36,6 +36,7 @@
 #include <linux/compiler.h>
 #include <linux/srcu.h>
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
 #include <linux/uaccess.h>
 #include <linux/hash.h>
 #include <linux/kern_levels.h>
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index d2dc0438d654a8..5eabf34008f1cf 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -28,7 +28,7 @@
 #include <linux/mm.h>
 #include <linux/random.h>
 #include <linux/limits.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <asm/elf.h>
 
 struct va_alignment __read_mostly va_align = {
diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c
index 282bf721a4d685..84abd66e680dc4 100644
--- a/arch/xtensa/kernel/traps.c
+++ b/arch/xtensa/kernel/traps.c
@@ -24,7 +24,7 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/stringify.h>
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index e33e7fbe870b36..aae4d8d4be361b 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -31,7 +31,7 @@
 #include <linux/poll.h>
 #include <linux/debugfs.h>
 #include <linux/rbtree.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/sched/mm.h>
 #include <linux/seq_file.h>
 #include <linux/uaccess.h>
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 4cb8f21ff4eff2..724d1c50fc5283 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -30,7 +30,7 @@
 #include <linux/compiler.h>
 #include <linux/types.h>
 #include <linux/list.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/bitops.h>
 #include <linux/slab.h>
 #include <linux/ratelimit.h>
diff --git a/drivers/char/snsc_event.c b/drivers/char/snsc_event.c
index 59bcefd6ec7c8b..e452673dff6612 100644
--- a/drivers/char/snsc_event.c
+++ b/drivers/char/snsc_event.c
@@ -16,7 +16,7 @@
  */
 
 #include <linux/interrupt.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <asm/byteorder.h>
 #include <asm/sn/sn_sal.h>
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index 6a3470f849989a..d1ce83d73a877b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -23,7 +23,7 @@
 #include <linux/mm_types.h>
 #include <linux/slab.h>
 #include <linux/types.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/uaccess.h>
 #include <linux/mm.h>
 #include <linux/mman.h>
diff --git a/drivers/gpu/drm/drm_lock.c b/drivers/gpu/drm/drm_lock.c
index 32d43f86a8f20f..96bb6badb818d1 100644
--- a/drivers/gpu/drm/drm_lock.c
+++ b/drivers/gpu/drm/drm_lock.c
@@ -34,6 +34,8 @@
  */
 
 #include <linux/export.h>
+#include <linux/sched/signal.h>
+
 #include <drm/drmP.h>
 #include "drm_legacy.h"
 #include "drm_internal.h"
diff --git a/drivers/gpu/drm/ttm/ttm_lock.c b/drivers/gpu/drm/ttm/ttm_lock.c
index f154fb1929bd18..913f4318cdc03a 100644
--- a/drivers/gpu/drm/ttm/ttm_lock.c
+++ b/drivers/gpu/drm/ttm/ttm_lock.c
@@ -33,7 +33,7 @@
 #include <linux/atomic.h>
 #include <linux/errno.h>
 #include <linux/wait.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/module.h>
 
 #define TTM_WRITE_LOCK_PENDING    (1 << 0)
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index d525b1a2986a1c..27f155d2df8da6 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -34,7 +34,7 @@
 
 #include <linux/mm.h>
 #include <linux/dma-mapping.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/sched/mm.h>
 #include <linux/export.h>
 #include <linux/hugetlb.h>
diff --git a/drivers/infiniband/hw/hfi1/user_pages.c b/drivers/infiniband/hw/hfi1/user_pages.c
index 20f4ddcac3b0f1..68295a12b77188 100644
--- a/drivers/infiniband/hw/hfi1/user_pages.c
+++ b/drivers/infiniband/hw/hfi1/user_pages.c
@@ -46,7 +46,7 @@
  */
 
 #include <linux/mm.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/device.h>
 #include <linux/module.h>
 
diff --git a/drivers/infiniband/hw/qib/qib_user_pages.c b/drivers/infiniband/hw/qib/qib_user_pages.c
index 75f08624ac052a..ce83ba9a12eff6 100644
--- a/drivers/infiniband/hw/qib/qib_user_pages.c
+++ b/drivers/infiniband/hw/qib/qib_user_pages.c
@@ -32,6 +32,7 @@
  */
 
 #include <linux/mm.h>
+#include <linux/sched/signal.h>
 #include <linux/device.h>
 
 #include "qib.h"
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c
index ba868be5af2e91..c49db7c33979c9 100644
--- a/drivers/infiniband/hw/usnic/usnic_uiom.c
+++ b/drivers/infiniband/hw/usnic/usnic_uiom.c
@@ -34,7 +34,7 @@
 
 #include <linux/mm.h>
 #include <linux/dma-mapping.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/sched/mm.h>
 #include <linux/hugetlb.h>
 #include <linux/iommu.h>
diff --git a/drivers/isdn/i4l/isdn_tty.c b/drivers/isdn/i4l/isdn_tty.c
index 63eaa0a9f8a18e..1b169559a240b0 100644
--- a/drivers/isdn/i4l/isdn_tty.c
+++ b/drivers/isdn/i4l/isdn_tty.c
@@ -15,6 +15,7 @@
 #include <linux/slab.h>
 #include <linux/delay.h>
 #include <linux/mutex.h>
+#include <linux/sched/signal.h>
 #include "isdn_common.h"
 #include "isdn_tty.h"
 #ifdef CONFIG_ISDN_AUDIO
diff --git a/drivers/isdn/mISDN/l1oip_core.c b/drivers/isdn/mISDN/l1oip_core.c
index 67c21876c35f1a..6ceca7db62ad42 100644
--- a/drivers/isdn/mISDN/l1oip_core.c
+++ b/drivers/isdn/mISDN/l1oip_core.c
@@ -234,6 +234,8 @@
 #include <linux/workqueue.h>
 #include <linux/kthread.h>
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
+
 #include <net/sock.h>
 #include "core.h"
 #include "l1oip.h"
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 985374f20e2e3f..548d1b8014f89e 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -44,6 +44,7 @@
 
 */
 
+#include <linux/sched/signal.h>
 #include <linux/kthread.h>
 #include <linux/blkdev.h>
 #include <linux/badblocks.h>
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 7453d94eeed700..fbc2d7851b497f 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -37,7 +37,10 @@
 #include <linux/module.h>
 #include <linux/seq_file.h>
 #include <linux/ratelimit.h>
+#include <linux/sched/signal.h>
+
 #include <trace/events/block.h>
+
 #include "md.h"
 #include "raid1.h"
 #include "bitmap.h"
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 2ce23b01dbb21d..4fb09b3fcb4104 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -55,6 +55,8 @@
 #include <linux/ratelimit.h>
 #include <linux/nodemask.h>
 #include <linux/flex_array.h>
+#include <linux/sched/signal.h>
+
 #include <trace/events/block.h>
 
 #include "md.h"
diff --git a/drivers/misc/genwqe/card_dev.c b/drivers/misc/genwqe/card_dev.c
index cb290b8ca0c812..dd4617764f147d 100644
--- a/drivers/misc/genwqe/card_dev.c
+++ b/drivers/misc/genwqe/card_dev.c
@@ -29,7 +29,7 @@
 #include <linux/pci.h>
 #include <linux/string.h>
 #include <linux/fs.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/wait.h>
 #include <linux/delay.h>
 #include <linux/atomic.h>
diff --git a/drivers/misc/mic/cosm/cosm_scif_server.c b/drivers/misc/mic/cosm/cosm_scif_server.c
index 5696df4326b5c5..85f7d09cc65fd1 100644
--- a/drivers/misc/mic/cosm/cosm_scif_server.c
+++ b/drivers/misc/mic/cosm/cosm_scif_server.c
@@ -19,6 +19,8 @@
  *
  */
 #include <linux/kthread.h>
+#include <linux/sched/signal.h>
+
 #include "cosm_main.h"
 
 /*
diff --git a/drivers/misc/mic/cosm_client/cosm_scif_client.c b/drivers/misc/mic/cosm_client/cosm_scif_client.c
index 03e98bf1ac1553..aa530fcceaa995 100644
--- a/drivers/misc/mic/cosm_client/cosm_scif_client.c
+++ b/drivers/misc/mic/cosm_client/cosm_scif_client.c
@@ -22,6 +22,8 @@
 #include <linux/delay.h>
 #include <linux/reboot.h>
 #include <linux/kthread.h>
+#include <linux/sched/signal.h>
+
 #include "../cosm/cosm_main.h"
 
 #define COSM_SCIF_MAX_RETRIES 10
diff --git a/drivers/misc/mic/scif/scif_rma.c b/drivers/misc/mic/scif/scif_rma.c
index d0e9c60f944e16..329727e00e9703 100644
--- a/drivers/misc/mic/scif/scif_rma.c
+++ b/drivers/misc/mic/scif/scif_rma.c
@@ -18,6 +18,7 @@
 #include <linux/dma_remapping.h>
 #include <linux/pagemap.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/signal.h>
 
 #include "scif_main.h"
 #include "scif_map.h"
diff --git a/drivers/net/slip/slip.c b/drivers/net/slip/slip.c
index 08db4d687533c7..1da31dc47f8638 100644
--- a/drivers/net/slip/slip.c
+++ b/drivers/net/slip/slip.c
@@ -66,7 +66,7 @@
 
 #include <linux/uaccess.h>
 #include <linux/bitops.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/string.h>
 #include <linux/mm.h>
 #include <linux/interrupt.h>
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
index c5744b45ec8fbc..65689469c5a12e 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
@@ -22,7 +22,7 @@
 #include <linux/pci_ids.h>
 #include <linux/netdevice.h>
 #include <linux/interrupt.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/mmc/sdio.h>
 #include <linux/mmc/sdio_ids.h>
 #include <linux/mmc/sdio_func.h>
diff --git a/drivers/parisc/power.c b/drivers/parisc/power.c
index ef31b77404ef8e..e2a3112f1c98ef 100644
--- a/drivers/parisc/power.c
+++ b/drivers/parisc/power.c
@@ -39,7 +39,7 @@
 #include <linux/kernel.h>
 #include <linux/notifier.h>
 #include <linux/reboot.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/kthread.h>
 #include <linux/pm.h>
 
diff --git a/drivers/ps3/ps3-sys-manager.c b/drivers/ps3/ps3-sys-manager.c
index f2ab435954f6b9..73e496a7211344 100644
--- a/drivers/ps3/ps3-sys-manager.c
+++ b/drivers/ps3/ps3-sys-manager.c
@@ -22,6 +22,7 @@
 #include <linux/module.h>
 #include <linux/workqueue.h>
 #include <linux/reboot.h>
+#include <linux/sched/signal.h>
 
 #include <asm/firmware.h>
 #include <asm/lv1call.h>
diff --git a/drivers/s390/char/fs3270.c b/drivers/s390/char/fs3270.c
index 85eca1cef06305..c4518168fd02c9 100644
--- a/drivers/s390/char/fs3270.c
+++ b/drivers/s390/char/fs3270.c
@@ -12,6 +12,7 @@
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/compat.h>
+#include <linux/sched/signal.h>
 #include <linux/module.h>
 #include <linux/list.h>
 #include <linux/slab.h>
diff --git a/drivers/s390/char/keyboard.c b/drivers/s390/char/keyboard.c
index 82c913318b73be..ba0e4f93503db5 100644
--- a/drivers/s390/char/keyboard.c
+++ b/drivers/s390/char/keyboard.c
@@ -7,7 +7,7 @@
  */
 
 #include <linux/module.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <linux/sysrq.h>
 
diff --git a/drivers/scsi/bnx2fc/bnx2fc.h b/drivers/scsi/bnx2fc/bnx2fc.h
index fdd4eb4e41b21c..4fc8ed5fe067e1 100644
--- a/drivers/scsi/bnx2fc/bnx2fc.h
+++ b/drivers/scsi/bnx2fc/bnx2fc.h
@@ -39,7 +39,7 @@
 #include <linux/bitops.h>
 #include <linux/log2.h>
 #include <linux/interrupt.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/io.h>
 
 #include <scsi/scsi.h>
diff --git a/drivers/scsi/bnx2i/bnx2i.h b/drivers/scsi/bnx2i/bnx2i.h
index ed7f3228e2349c..89ef1a1678d192 100644
--- a/drivers/scsi/bnx2i/bnx2i.h
+++ b/drivers/scsi/bnx2i/bnx2i.h
@@ -25,7 +25,7 @@
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
 #include <linux/delay.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/in.h>
 #include <linux/kfifo.h>
 #include <linux/netdevice.h>
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index 834d1212b6d506..07c08ce68d70af 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -26,6 +26,7 @@
 #include <linux/delay.h>
 #include <linux/log2.h>
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
 #include <linux/module.h>
 #include <asm/unaligned.h>
 #include <net/tcp.h>
diff --git a/drivers/staging/android/lowmemorykiller.c b/drivers/staging/android/lowmemorykiller.c
index ec3b6656141242..05466004939597 100644
--- a/drivers/staging/android/lowmemorykiller.c
+++ b/drivers/staging/android/lowmemorykiller.c
@@ -37,7 +37,7 @@
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/oom.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/swap.h>
 #include <linux/rcupdate.h>
 #include <linux/profile.h>
diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-prim.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-prim.c
index cf902154f0aae0..bcf9f3dd0310c7 100644
--- a/drivers/staging/lustre/lnet/libcfs/linux/linux-prim.c
+++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-prim.c
@@ -34,7 +34,7 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/fs_struct.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 
 #include "../../../include/linux/libcfs/libcfs.h"
 
diff --git a/drivers/staging/rtl8188eu/include/osdep_service.h b/drivers/staging/rtl8188eu/include/osdep_service.h
index ee3f5ee0652976..9e390648d93e1f 100644
--- a/drivers/staging/rtl8188eu/include/osdep_service.h
+++ b/drivers/staging/rtl8188eu/include/osdep_service.h
@@ -37,7 +37,7 @@
 #include <linux/io.h>
 #include <linux/mutex.h>
 #include <linux/sem.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/etherdevice.h>
 #include <linux/wireless.h>
 #include <net/iw_handler.h>
diff --git a/drivers/staging/rtl8712/osdep_service.h b/drivers/staging/rtl8712/osdep_service.h
index b8a17097843487..5d33020554cd5d 100644
--- a/drivers/staging/rtl8712/osdep_service.h
+++ b/drivers/staging/rtl8712/osdep_service.h
@@ -33,7 +33,7 @@
 
 #include <linux/interrupt.h>
 #include <linux/semaphore.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/sem.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
diff --git a/drivers/staging/rtl8712/rtl8712_cmd.c b/drivers/staging/rtl8712/rtl8712_cmd.c
index f19b6b27aa7142..5346c657485df8 100644
--- a/drivers/staging/rtl8712/rtl8712_cmd.c
+++ b/drivers/staging/rtl8712/rtl8712_cmd.c
@@ -32,6 +32,7 @@
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
 #include <linux/module.h>
 #include <linux/kref.h>
 #include <linux/netdevice.h>
diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c
index cb0b7ca36b1ec6..8a0d214f6e9b82 100644
--- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c
+++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c
@@ -34,6 +34,7 @@
 
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/sched/signal.h>
 #include <linux/types.h>
 #include <linux/errno.h>
 #include <linux/cdev.h>
diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_util.h b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_util.h
index 4055d4bf9f740a..e63964f5a18a87 100644
--- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_util.h
+++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_util.h
@@ -47,7 +47,7 @@
 #include <linux/types.h>
 #include <linux/interrupt.h>
 #include <linux/random.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/ctype.h>
 #include <linux/uaccess.h>
 #include <linux/time.h>  /* for time_t */
diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
index da2c73a255dec1..fa1d578d56bdab 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -24,6 +24,7 @@
 #include <linux/vmalloc.h>
 #include <linux/idr.h>
 #include <linux/delay.h>
+#include <linux/sched/signal.h>
 #include <asm/unaligned.h>
 #include <net/ipv6.h>
 #include <scsi/scsi_proto.h>
diff --git a/drivers/target/iscsi/iscsi_target_erl0.c b/drivers/target/iscsi/iscsi_target_erl0.c
index b54e72c7ab0fa5..a4d5e6749932b2 100644
--- a/drivers/target/iscsi/iscsi_target_erl0.c
+++ b/drivers/target/iscsi/iscsi_target_erl0.c
@@ -17,6 +17,8 @@
  * GNU General Public License for more details.
  ******************************************************************************/
 
+#include <linux/sched/signal.h>
+
 #include <scsi/iscsi_proto.h>
 #include <target/target_core_base.h>
 #include <target/target_core_fabric.h>
diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c
index eab274d17b5cbd..b03cc03423c16b 100644
--- a/drivers/target/iscsi/iscsi_target_login.c
+++ b/drivers/target/iscsi/iscsi_target_login.c
@@ -20,6 +20,7 @@
 #include <linux/module.h>
 #include <linux/string.h>
 #include <linux/kthread.h>
+#include <linux/sched/signal.h>
 #include <linux/idr.h>
 #include <linux/tcp.h>        /* TCP_NODELAY */
 #include <net/ipv6.h>         /* ipv6_addr_v4mapped() */
diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c
index 46388c9e08dad3..29eb09e0cd1537 100644
--- a/drivers/target/iscsi/iscsi_target_nego.c
+++ b/drivers/target/iscsi/iscsi_target_nego.c
@@ -19,6 +19,7 @@
 #include <linux/ctype.h>
 #include <linux/kthread.h>
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
 #include <net/sock.h>
 #include <scsi/iscsi_proto.h>
 #include <target/target_core_base.h>
diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c
index a23fa5ed1d67f0..66b59a15780db0 100644
--- a/drivers/tty/pty.c
+++ b/drivers/tty/pty.c
@@ -12,7 +12,7 @@
 #include <linux/tty.h>
 #include <linux/tty_flip.h>
 #include <linux/fcntl.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/string.h>
 #include <linux/major.h>
 #include <linux/mm.h>
diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
index 71136742e606b9..65db0aeb3d805c 100644
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -14,7 +14,7 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/sched/rt.h>
 #include <linux/interrupt.h>
 #include <linux/mm.h>
diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index a1fd3f7d487a68..985d33f0f31576 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -69,7 +69,7 @@
 #include <linux/errno.h>
 #include <linux/signal.h>
 #include <linux/fcntl.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/interrupt.h>
 #include <linux/tty.h>
 #include <linux/tty_driver.h>
diff --git a/drivers/tty/vt/keyboard.c b/drivers/tty/vt/keyboard.c
index 397e1509fe51cc..6b3a2c00974dd0 100644
--- a/drivers/tty/vt/keyboard.c
+++ b/drivers/tty/vt/keyboard.c
@@ -26,7 +26,7 @@
 
 #include <linux/consolemap.h>
 #include <linux/module.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/tty.h>
 #include <linux/tty_flip.h>
 #include <linux/mm.h>
diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
index 9d3ce505e7aba3..5c4933bb4b5336 100644
--- a/drivers/tty/vt/vt.c
+++ b/drivers/tty/vt/vt.c
@@ -72,7 +72,7 @@
 
 #include <linux/module.h>
 #include <linux/types.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/tty.h>
 #include <linux/tty_flip.h>
 #include <linux/kernel.h>
diff --git a/drivers/tty/vt/vt_ioctl.c b/drivers/tty/vt/vt_ioctl.c
index a56edf2d58eb26..0cbfe1ff6f6c75 100644
--- a/drivers/tty/vt/vt_ioctl.c
+++ b/drivers/tty/vt/vt_ioctl.c
@@ -10,7 +10,7 @@
 
 #include <linux/types.h>
 #include <linux/errno.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/tty.h>
 #include <linux/timer.h>
 #include <linux/kernel.h>
diff --git a/drivers/usb/atm/usbatm.c b/drivers/usb/atm/usbatm.c
index 5a59da0dc98a17..3e80aa3b917aa9 100644
--- a/drivers/usb/atm/usbatm.c
+++ b/drivers/usb/atm/usbatm.c
@@ -74,7 +74,7 @@
 #include <linux/moduleparam.h>
 #include <linux/netdevice.h>
 #include <linux/proc_fs.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/signal.h>
 #include <linux/slab.h>
 #include <linux/stat.h>
diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c
index ca425e8099ea81..cfc3cff6e8d590 100644
--- a/drivers/usb/core/devio.c
+++ b/drivers/usb/core/devio.c
@@ -36,6 +36,7 @@
 
 #include <linux/fs.h>
 #include <linux/mm.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <linux/signal.h>
 #include <linux/poll.h>
diff --git a/drivers/usb/gadget/function/f_mass_storage.c b/drivers/usb/gadget/function/f_mass_storage.c
index 8f3659b65f5313..4c8aacc232c07b 100644
--- a/drivers/usb/gadget/function/f_mass_storage.c
+++ b/drivers/usb/gadget/function/f_mass_storage.c
@@ -207,6 +207,7 @@
 #include <linux/fs.h>
 #include <linux/kref.h>
 #include <linux/kthread.h>
+#include <linux/sched/signal.h>
 #include <linux/limits.h>
 #include <linux/rwsem.h>
 #include <linux/slab.h>
diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
index 185d50ee1b1242..cf3de91fbfe7a5 100644
--- a/drivers/vfio/vfio_iommu_spapr_tce.c
+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
@@ -21,6 +21,7 @@
 #include <linux/vfio.h>
 #include <linux/vmalloc.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/signal.h>
 
 #include <asm/iommu.h>
 #include <asm/tce.h>
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 7e94c7fc90ae2d..c26fa1f3ed8606 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -31,7 +31,7 @@
 #include <linux/module.h>
 #include <linux/mm.h>
 #include <linux/rbtree.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/sched/mm.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
diff --git a/drivers/w1/w1_family.c b/drivers/w1/w1_family.c
index df1c9bb90eb50b..2096f460498f9d 100644
--- a/drivers/w1/w1_family.c
+++ b/drivers/w1/w1_family.c
@@ -14,7 +14,7 @@
 
 #include <linux/spinlock.h>
 #include <linux/list.h>
-#include <linux/sched.h>	/* schedule_timeout() */
+#include <linux/sched/signal.h>
 #include <linux/delay.h>
 #include <linux/export.h>
 
diff --git a/drivers/w1/w1_int.c b/drivers/w1/w1_int.c
index 4ce1b66d5092fb..2cae7b29bb5fb5 100644
--- a/drivers/w1/w1_int.c
+++ b/drivers/w1/w1_int.c
@@ -17,6 +17,7 @@
 #include <linux/delay.h>
 #include <linux/kthread.h>
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
 #include <linux/export.h>
 #include <linux/moduleparam.h>
 
diff --git a/fs/attr.c b/fs/attr.c
index c902b3d5350800..135304146120bc 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -9,6 +9,7 @@
 #include <linux/time.h>
 #include <linux/mm.h>
 #include <linux/string.h>
+#include <linux/sched/signal.h>
 #include <linux/capability.h>
 #include <linux/fsnotify.h>
 #include <linux/fcntl.h>
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 79fbd85db4baa3..24a58bf9ca72ce 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -10,6 +10,7 @@
 #include <linux/slab.h>
 #include <linux/time.h>
 #include <linux/signal.h>
+#include <linux/sched/signal.h>
 #include <linux/file.h>
 #include "autofs_i.h"
 
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 777ad9f4fc3c84..9bf25be05636f1 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -21,6 +21,7 @@
 #include <linux/fs.h>
 #include <linux/net.h>
 #include <linux/string.h>
+#include <linux/sched/signal.h>
 #include <linux/list.h>
 #include <linux/wait.h>
 #include <linux/slab.h>
diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c
index f6c6c8adbc01ef..e82357c8997934 100644
--- a/fs/coda/upcall.c
+++ b/fs/coda/upcall.c
@@ -15,7 +15,7 @@
  */
 
 #include <linux/signal.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
diff --git a/fs/coredump.c b/fs/coredump.c
index 23a539b292255b..c74ab43b83838c 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -17,6 +17,7 @@
 #include <linux/binfmts.h>
 #include <linux/coredump.h>
 #include <linux/sched/coredump.h>
+#include <linux/sched/signal.h>
 #include <linux/utsname.h>
 #include <linux/pid_namespace.h>
 #include <linux/module.h>
@@ -34,7 +35,6 @@
 #include <linux/pipe_fs_i.h>
 #include <linux/oom.h>
 #include <linux/compat.h>
-#include <linux/sched.h>
 #include <linux/fs.h>
 #include <linux/path.h>
 #include <linux/timekeeping.h>
diff --git a/fs/exec.c b/fs/exec.c
index 9c80d011594eea..aa228b98ad5b19 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -34,6 +34,7 @@
 #include <linux/init.h>
 #include <linux/sched/mm.h>
 #include <linux/sched/coredump.h>
+#include <linux/sched/signal.h>
 #include <linux/pagemap.h>
 #include <linux/perf_event.h>
 #include <linux/highmem.h>
diff --git a/fs/file.c b/fs/file.c
index 69d6990e30210b..ad6f094f2eff2f 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -12,7 +12,7 @@
 #include <linux/mm.h>
 #include <linux/mmzone.h>
 #include <linux/time.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/file.h>
diff --git a/fs/fs_struct.c b/fs/fs_struct.c
index 7dca743b2ce1c8..543ed50f03872c 100644
--- a/fs/fs_struct.c
+++ b/fs/fs_struct.c
@@ -1,5 +1,5 @@
 #include <linux/export.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/fs.h>
 #include <linux/path.h>
 #include <linux/slab.h>
diff --git a/fs/jffs2/background.c b/fs/jffs2/background.c
index e5c1783ab64a05..453a6a1fff34ec 100644
--- a/fs/jffs2/background.c
+++ b/fs/jffs2/background.c
@@ -16,7 +16,7 @@
 #include <linux/jffs2.h>
 #include <linux/mtd/mtd.h>
 #include <linux/completion.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/freezer.h>
 #include <linux/kthread.h>
 #include "nodelist.h"
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 7e4ea3b9f4724f..e7c8b9c76e4857 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -17,7 +17,7 @@
 #include <linux/sysctl.h>
 #include <linux/moduleparam.h>
 
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/errno.h>
 #include <linux/in.h>
 #include <linux/uio.h>
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 7eb89c23c8470b..d5606099712a4c 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -30,6 +30,7 @@
 #include <linux/vfs.h>
 #include <linux/mount.h>
 #include <linux/seq_file.h>
+#include <linux/sched/signal.h>
 #include <linux/namei.h>
 
 #include <net/sock.h>
diff --git a/fs/ncpfs/sock.c b/fs/ncpfs/sock.c
index 97b111d79489c5..bdea177aa405df 100644
--- a/fs/ncpfs/sock.c
+++ b/fs/ncpfs/sock.c
@@ -16,6 +16,7 @@
 #include <linux/fcntl.h>
 #include <linux/stat.h>
 #include <linux/string.h>
+#include <linux/sched/signal.h>
 #include <linux/uaccess.h>
 #include <linux/in.h>
 #include <linux/net.h>
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 484bebc20bca6a..bb79972dc638ba 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -9,6 +9,7 @@
 #include <linux/completion.h>
 #include <linux/ip.h>
 #include <linux/module.h>
+#include <linux/sched/signal.h>
 #include <linux/sunrpc/svc.h>
 #include <linux/sunrpc/svcsock.h>
 #include <linux/nfs_fs.h>
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index efd66da992010f..786a4a2cb2d7a9 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -6,7 +6,7 @@
  * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de>
  */
 
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/freezer.h>
 #include <linux/module.h>
 #include <linux/fs_struct.h>
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index 00ce1531b2f5fd..c330495c3115ed 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -1,4 +1,4 @@
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/errno.h>
 #include <linux/dcache.h>
 #include <linux/path.h>
diff --git a/fs/select.c b/fs/select.c
index 305c0daf5d678b..e2112270d75a5f 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -15,7 +15,8 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/rt.h>
 #include <linux/syscalls.h>
 #include <linux/export.h>
 #include <linux/slab.h>
@@ -26,7 +27,6 @@
 #include <linux/fs.h>
 #include <linux/rcupdate.h>
 #include <linux/hrtimer.h>
-#include <linux/sched/rt.h>
 #include <linux/freezer.h>
 #include <net/busy_poll.h>
 #include <linux/vmalloc.h>
diff --git a/include/linux/oom.h b/include/linux/oom.h
index b4e36e92bc878f..8a266e2be5a63a 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -2,7 +2,7 @@
 #define __INCLUDE_LINUX_OOM_H
 
 
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/types.h>
 #include <linux/nodemask.h>
 #include <uapi/linux/oom.h>
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index e0e539321ab95c..422bc2e4cb6a6f 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -3,6 +3,7 @@
 
 #include <linux/compiler.h>		/* For unlikely.  */
 #include <linux/sched.h>		/* For struct task_struct.  */
+#include <linux/sched/signal.h>		/* For send_sig(), same_thread_group(), etc. */
 #include <linux/err.h>			/* for IS_ERR_VALUE */
 #include <linux/bug.h>			/* For BUG_ON.  */
 #include <linux/pid_namespace.h>	/* For task_active_pid_ns.  */
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
new file mode 100644
index 00000000000000..da69cd05cd68b5
--- /dev/null
+++ b/include/linux/sched/signal.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SCHED_SIGNAL_H
+#define _LINUX_SCHED_SIGNAL_H
+
+#include <linux/sched.h>
+
+#endif /* _LINUX_SCHED_SIGNAL_H */
diff --git a/include/linux/signalfd.h b/include/linux/signalfd.h
index eadbe227c25648..4985048640a731 100644
--- a/include/linux/signalfd.h
+++ b/include/linux/signalfd.h
@@ -8,7 +8,7 @@
 #define _LINUX_SIGNALFD_H
 
 #include <uapi/linux/signalfd.h>
-
+#include <linux/sched/signal.h>
 
 #ifdef CONFIG_SIGNALFD
 
diff --git a/include/linux/taskstats_kern.h b/include/linux/taskstats_kern.h
index 58de6edf751f5e..e2a5daf8d14f33 100644
--- a/include/linux/taskstats_kern.h
+++ b/include/linux/taskstats_kern.h
@@ -8,7 +8,7 @@
 #define _LINUX_TASKSTATS_KERN_H
 
 #include <linux/taskstats.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 
 #ifdef CONFIG_TASKSTATS
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 40e448de8a7eb0..4f7241fbeff34c 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -36,6 +36,7 @@
 #include <linux/user_namespace.h>
 #include <linux/slab.h>
 #include <linux/sched/wake_q.h>
+#include <linux/sched/signal.h>
 
 #include <net/sock.h>
 #include "util.h"
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 461eb1e66a0fdf..7af0dcc5d75556 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -13,6 +13,7 @@
 #include <linux/bpf_trace.h>
 #include <linux/syscalls.h>
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
 #include <linux/vmalloc.h>
 #include <linux/mmzone.h>
 #include <linux/anon_inodes.h>
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 0a5f630f5c5430..0aae3183b02932 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -7,7 +7,7 @@
 #include <linux/smp.h>
 #include <linux/init.h>
 #include <linux/notifier.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/unistd.h>
 #include <linux/cpu.h>
 #include <linux/oom.h>
diff --git a/kernel/debug/gdbstub.c b/kernel/debug/gdbstub.c
index 19d9a578c75316..7510dc687c0dc1 100644
--- a/kernel/debug/gdbstub.c
+++ b/kernel/debug/gdbstub.c
@@ -29,6 +29,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/sched/signal.h>
 #include <linux/kgdb.h>
 #include <linux/kdb.h>
 #include <linux/serial_core.h>
diff --git a/kernel/debug/kdb/kdb_bt.c b/kernel/debug/kdb/kdb_bt.c
index fe15fff5df5340..9b976a42376d19 100644
--- a/kernel/debug/kdb/kdb_bt.c
+++ b/kernel/debug/kdb/kdb_bt.c
@@ -12,7 +12,7 @@
 #include <linux/ctype.h>
 #include <linux/string.h>
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/kdb.h>
 #include <linux/nmi.h>
 #include "kdb_private.h"
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index 40c07e4fa116e0..129247e569023c 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -16,6 +16,8 @@
 #include <linux/export.h>
 #include <linux/sysctl.h>
 #include <linux/utsname.h>
+#include <linux/sched/signal.h>
+
 #include <trace/events/sched.h>
 
 /*
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index a0e90e0afc755e..da128deb10ec50 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -36,7 +36,7 @@
 #include <linux/spinlock.h>
 #include <linux/smp.h>
 #include <linux/interrupt.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/atomic.h>
 #include <linux/bitops.h>
 #include <linux/percpu.h>
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index e2307a6c29f13d..64124947195291 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -5,6 +5,7 @@
 #include <linux/sched/rt.h>
 #include <linux/sched/clock.h>
 #include <linux/sched/wake_q.h>
+#include <linux/sched/signal.h>
 #include <linux/sched/mm.h>
 #include <linux/u64_stats_sync.h>
 #include <linux/sched/deadline.h>
diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c
index a95f13c314645f..f6b961c5e58cf1 100644
--- a/kernel/time/itimer.c
+++ b/kernel/time/itimer.c
@@ -10,6 +10,7 @@
 #include <linux/interrupt.h>
 #include <linux/syscalls.h>
 #include <linux/time.h>
+#include <linux/sched/signal.h>
 #include <linux/posix-timers.h>
 #include <linux/hrtimer.h>
 #include <trace/events/timer.h>
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index b4377a5e426948..a2475a9f57d8fa 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -2,7 +2,7 @@
  * Implement CPU time clocks for the POSIX clock interface.
  */
 
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/posix-timers.h>
 #include <linux/errno.h>
 #include <linux/math64.h>
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 4fee1c3abd0b4f..0f411a4690a0ca 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -18,7 +18,7 @@
 #include <linux/kernel_stat.h>
 #include <linux/percpu.h>
 #include <linux/profile.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/sched/clock.h>
 #include <linux/module.h>
 #include <linux/irq_work.h>
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index 1f9a31f934a417..9f6984d52feca4 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -24,7 +24,7 @@
 #include <linux/tracepoint.h>
 #include <linux/err.h>
 #include <linux/slab.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/static_key.h>
 
 extern struct tracepoint * const __start___tracepoints_ptrs[];
diff --git a/kernel/tsacct.c b/kernel/tsacct.c
index 571a2d3821d857..d9a03b80b75d1d 100644
--- a/kernel/tsacct.c
+++ b/kernel/tsacct.c
@@ -17,7 +17,7 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/sched/mm.h>
 #include <linux/tsacct_kern.h>
 #include <linux/acct.h>
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 86b7854fec8ee0..2f735cbe05e8ac 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -8,6 +8,7 @@
 #include <linux/export.h>
 #include <linux/nsproxy.h>
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
 #include <linux/user_namespace.h>
 #include <linux/proc_ns.h>
 #include <linux/highuid.h>
diff --git a/lib/is_single_threaded.c b/lib/is_single_threaded.c
index 391fd23976a2c0..9745cfffcb6948 100644
--- a/lib/is_single_threaded.c
+++ b/lib/is_single_threaded.c
@@ -9,8 +9,7 @@
  * as published by the Free Software Foundation; either version
  * 2 of the Licence, or (at your option) any later version.
  */
-
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 
 /*
  * Returns true if the task does not share ->mm with another thread/process.
diff --git a/mm/filemap.c b/mm/filemap.c
index 1944c631e3e660..1694623a628902 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -13,6 +13,7 @@
 #include <linux/compiler.h>
 #include <linux/dax.h>
 #include <linux/fs.h>
+#include <linux/sched/signal.h>
 #include <linux/uaccess.h>
 #include <linux/capability.h>
 #include <linux/kernel_stat.h>
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index da343695302277..2df6d3687b2a8f 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -73,7 +73,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/list.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/jiffies.h>
 #include <linux/delay.h>
 #include <linux/export.h>
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 3d0f2fd4bf73fe..b74984db386e64 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -40,7 +40,7 @@
 #include <linux/mm.h>
 #include <linux/page-flags.h>
 #include <linux/kernel-page-flags.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/ksm.h>
 #include <linux/rmap.h>
 #include <linux/export.h>
diff --git a/mm/vmacache.c b/mm/vmacache.c
index 7c233f8e20eef9..4355d34c68a681 100644
--- a/mm/vmacache.c
+++ b/mm/vmacache.c
@@ -1,7 +1,7 @@
 /*
  * Copyright (C) 2014 Davidlohr Bueso.
  */
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/mm.h>
 #include <linux/vmacache.h>
 
diff --git a/net/9p/client.c b/net/9p/client.c
index 3fc94a49ccd53c..25cfd8a4bc3668 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -32,7 +32,7 @@
 #include <linux/idr.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/uaccess.h>
 #include <linux/uio.h>
 #include <net/9p/9p.h>
diff --git a/net/atm/common.c b/net/atm/common.c
index a3ca922d307b0a..9613381f5db04e 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -13,7 +13,7 @@
 #include <linux/errno.h>	/* error codes */
 #include <linux/capability.h>
 #include <linux/mm.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/time.h>		/* struct timeval */
 #include <linux/skbuff.h>
 #include <linux/bitops.h>
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 90fcf5fc2e0ac8..a8e42cedf1dbc7 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -20,7 +20,7 @@
 #include <linux/socket.h>
 #include <linux/in.h>
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/timer.h>
 #include <linux/string.h>
 #include <linux/sockios.h>
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 92cbbd2afddbf1..adcad344c84398 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -9,7 +9,7 @@
 #include <linux/fs.h>
 #include <linux/init.h>
 #include <linux/module.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/spinlock.h>
 #include <linux/mutex.h>
 #include <linux/list.h>
diff --git a/net/core/stream.c b/net/core/stream.c
index f575bcf64af2c3..20231dbb1da0c6 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -13,6 +13,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/sched/signal.h>
 #include <linux/net.h>
 #include <linux/signal.h>
 #include <linux/tcp.h>
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index a90ed67027b0cf..e6e79eda97636d 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -106,7 +106,7 @@ Version 0.0.6    2.1.110   07-aug-98   Eduardo Marcelo Serrat
 #include <linux/socket.h>
 #include <linux/in.h>
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/timer.h>
 #include <linux/string.h>
 #include <linux/sockios.h>
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index ab254041dab7f6..81adc29a448dc5 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -46,6 +46,7 @@
 #include <linux/socket.h>
 #include <linux/sockios.h>
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
 #include <linux/init.h>
 #include <linux/net.h>
 #include <linux/irda.h>
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index ed212ffc1d9d31..4bbf4526b88566 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -17,7 +17,7 @@
 #include <linux/in.h>
 #include <linux/slab.h>
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/timer.h>
 #include <linux/string.h>
 #include <linux/sockios.h>
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 9ad301c46b888f..b8a1df2c978532 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -20,7 +20,7 @@
 #include <linux/in.h>
 #include <linux/slab.h>
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/spinlock.h>
 #include <linux/timer.h>
 #include <linux/string.h>
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 465a9c8464f947..6f0a9be50f5055 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -57,6 +57,7 @@
 #include <linux/kernel.h>
 #include <linux/wait.h>
 #include <linux/time.h>
+#include <linux/sched/signal.h>
 #include <linux/ip.h>
 #include <linux/capability.h>
 #include <linux/fcntl.h>
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index b94efd93d3e498..a08aeb56b8e457 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -11,7 +11,7 @@
  */
 
 #include <linux/linkage.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/errno.h>
 #include <linux/net.h>
 #include <linux/in.h>
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index e2d18b9f910fd1..ee37b390260a62 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -85,7 +85,7 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/signal.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/errno.h>
 #include <linux/string.h>
 #include <linux/stat.h>
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 079c883aa96e5a..fd28a49dbe8f0c 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -41,7 +41,7 @@
 #include <linux/capability.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/timer.h>
 #include <linux/string.h>
 #include <linux/net.h>
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 9a8f12f8d5b7ff..b12f873f92baf0 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -28,7 +28,7 @@
 #include <linux/kernel.h>
 #include <linux/tracehook.h>
 #include <linux/errno.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/lsm_hooks.h>
 #include <linux/xattr.h>
 #include <linux/capability.h>

From 8703e8a465b1e9cadc3680b4b1248f5987e54518 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:30 +0100
Subject: [PATCH 0512/1911] sched/headers: Prepare for new header dependencies
 before moving code to <linux/sched/user.h>

We are going to split <linux/sched/user.h> out of <linux/sched.h>, which
will have to be picked up from other headers and a couple of .c files.

Create a trivial placeholder <linux/sched/user.h> file that just
maps to <linux/sched.h> to make this patch obviously correct and
bisectable.

Include the new header in the files that are going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 block/ioprio.c               | 1 +
 include/linux/cred.h         | 2 +-
 include/linux/sched/user.h   | 6 ++++++
 ipc/mqueue.c                 | 1 +
 kernel/fork.c                | 1 +
 kernel/signal.c              | 1 +
 kernel/user.c                | 1 +
 mm/mlock.c                   | 1 +
 net/core/scm.c               | 1 +
 security/keys/process_keys.c | 1 +
 10 files changed, 15 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/sched/user.h

diff --git a/block/ioprio.c b/block/ioprio.c
index 3790669232ff50..89c43e07787dfb 100644
--- a/block/ioprio.c
+++ b/block/ioprio.c
@@ -25,6 +25,7 @@
 #include <linux/ioprio.h>
 #include <linux/blkdev.h>
 #include <linux/capability.h>
+#include <linux/sched/user.h>
 #include <linux/syscalls.h>
 #include <linux/security.h>
 #include <linux/pid_namespace.h>
diff --git a/include/linux/cred.h b/include/linux/cred.h
index f0e70a1bb3acfe..045d33e480694c 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -18,8 +18,8 @@
 #include <linux/selinux.h>
 #include <linux/atomic.h>
 #include <linux/uidgid.h>
+#include <linux/sched/user.h>
 
-struct user_struct;
 struct cred;
 struct inode;
 
diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h
new file mode 100644
index 00000000000000..83238e5ddadda0
--- /dev/null
+++ b/include/linux/sched/user.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SCHED_USER_H
+#define _LINUX_SCHED_USER_H
+
+#include <linux/sched.h>
+
+#endif /* _LINUX_SCHED_USER_H */
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 4f7241fbeff34c..e8d41ff57241d8 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -37,6 +37,7 @@
 #include <linux/slab.h>
 #include <linux/sched/wake_q.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/user.h>
 
 #include <net/sock.h>
 #include "util.h"
diff --git a/kernel/fork.c b/kernel/fork.c
index 7332448b668a28..a5ad1e4ab604df 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -15,6 +15,7 @@
 #include <linux/sched/autogroup.h>
 #include <linux/sched/mm.h>
 #include <linux/sched/coredump.h>
+#include <linux/sched/user.h>
 #include <linux/init.h>
 #include <linux/unistd.h>
 #include <linux/module.h>
diff --git a/kernel/signal.c b/kernel/signal.c
index bae358532d0aee..762fcf64f0c349 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -14,6 +14,7 @@
 #include <linux/export.h>
 #include <linux/init.h>
 #include <linux/sched.h>
+#include <linux/sched/user.h>
 #include <linux/fs.h>
 #include <linux/tty.h>
 #include <linux/binfmts.h>
diff --git a/kernel/user.c b/kernel/user.c
index b069ccbfb0b037..00281add65b251 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -13,6 +13,7 @@
 #include <linux/slab.h>
 #include <linux/bitops.h>
 #include <linux/key.h>
+#include <linux/sched/user.h>
 #include <linux/interrupt.h>
 #include <linux/export.h>
 #include <linux/user_namespace.h>
diff --git a/mm/mlock.c b/mm/mlock.c
index cdbed8aaa4268c..1050511f8b2bdb 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -8,6 +8,7 @@
 #include <linux/capability.h>
 #include <linux/mman.h>
 #include <linux/mm.h>
+#include <linux/sched/user.h>
 #include <linux/swap.h>
 #include <linux/swapops.h>
 #include <linux/pagemap.h>
diff --git a/net/core/scm.c b/net/core/scm.c
index b6d83686e1496d..b1ff8a4417489f 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -14,6 +14,7 @@
 #include <linux/capability.h>
 #include <linux/errno.h>
 #include <linux/sched.h>
+#include <linux/sched/user.h>
 #include <linux/mm.h>
 #include <linux/kernel.h>
 #include <linux/stat.h>
diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c
index 918cddcd4516ae..b6fdd22205b169 100644
--- a/security/keys/process_keys.c
+++ b/security/keys/process_keys.c
@@ -12,6 +12,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/sched.h>
+#include <linux/sched/user.h>
 #include <linux/keyctl.h>
 #include <linux/fs.h>
 #include <linux/err.h>

From 38b8d208a4544c9a26b10baec89b8a21042e5305 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:31 +0100
Subject: [PATCH 0513/1911] sched/headers: Prepare for new header dependencies
 before moving code to <linux/sched/nmi.h>

We are going to move softlockup APIs out of <linux/sched.h>, which
will have to be picked up from other headers and a couple of .c files.

<linux/nmi.h> already includes <linux/sched.h>.

Include the <linux/nmi.h> header in the files that are going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/ia64/kernel/time.c               | 1 +
 arch/ia64/kernel/uncached.c           | 1 +
 arch/powerpc/kernel/swsusp_64.c       | 1 +
 arch/x86/kernel/pvclock.c             | 2 ++
 arch/x86/xen/smp.c                    | 1 +
 drivers/ide/ide-taskfile.c            | 1 +
 drivers/mtd/nand/nand_base.c          | 1 +
 drivers/video/fbdev/nvidia/nv_accel.c | 2 ++
 init/main.c                           | 1 +
 kernel/debug/debug_core.c             | 1 +
 kernel/power/hibernate.c              | 1 +
 kernel/power/snapshot.c               | 1 +
 kernel/sched/clock.c                  | 1 +
 kernel/time/tick-sched.c              | 1 +
 kernel/time/timekeeping.c             | 1 +
 15 files changed, 17 insertions(+)

diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index faa116822c4c3d..144f9db7a87665 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -16,6 +16,7 @@
 #include <linux/profile.h>
 #include <linux/sched.h>
 #include <linux/time.h>
+#include <linux/nmi.h>
 #include <linux/interrupt.h>
 #include <linux/efi.h>
 #include <linux/timex.h>
diff --git a/arch/ia64/kernel/uncached.c b/arch/ia64/kernel/uncached.c
index f3976da36721a9..583f7ff6b589e6 100644
--- a/arch/ia64/kernel/uncached.c
+++ b/arch/ia64/kernel/uncached.c
@@ -19,6 +19,7 @@
 #include <linux/errno.h>
 #include <linux/string.h>
 #include <linux/efi.h>
+#include <linux/nmi.h>
 #include <linux/genalloc.h>
 #include <linux/gfp.h>
 #include <asm/page.h>
diff --git a/arch/powerpc/kernel/swsusp_64.c b/arch/powerpc/kernel/swsusp_64.c
index 0e899e47c325b5..51db012808f5c5 100644
--- a/arch/powerpc/kernel/swsusp_64.c
+++ b/arch/powerpc/kernel/swsusp_64.c
@@ -10,6 +10,7 @@
 #include <linux/irq.h>
 #include <linux/sched.h>
 #include <linux/interrupt.h>
+#include <linux/nmi.h>
 
 void do_after_copyback(void)
 {
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index 9e93fe5803b470..5c3f6d6a507833 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -21,6 +21,8 @@
 #include <linux/sched.h>
 #include <linux/gfp.h>
 #include <linux/bootmem.h>
+#include <linux/nmi.h>
+
 #include <asm/fixmap.h>
 #include <asm/pvclock.h>
 
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 0dee6f59ea8268..7ff2f1bfb7ec05 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -18,6 +18,7 @@
 #include <linux/smp.h>
 #include <linux/irq_work.h>
 #include <linux/tick.h>
+#include <linux/nmi.h>
 
 #include <asm/paravirt.h>
 #include <asm/desc.h>
diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c
index 247b9faccce171..4c0007cb74e378 100644
--- a/drivers/ide/ide-taskfile.c
+++ b/drivers/ide/ide-taskfile.c
@@ -19,6 +19,7 @@
 #include <linux/delay.h>
 #include <linux/hdreg.h>
 #include <linux/ide.h>
+#include <linux/nmi.h>
 #include <linux/scatterlist.h>
 #include <linux/uaccess.h>
 
diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 1492c12906f6bd..b0524f8accb620 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -36,6 +36,7 @@
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/mm.h>
+#include <linux/nmi.h>
 #include <linux/types.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/nand.h>
diff --git a/drivers/video/fbdev/nvidia/nv_accel.c b/drivers/video/fbdev/nvidia/nv_accel.c
index ad6472a894ea60..7341fed63e35aa 100644
--- a/drivers/video/fbdev/nvidia/nv_accel.c
+++ b/drivers/video/fbdev/nvidia/nv_accel.c
@@ -48,6 +48,8 @@
  */
 
 #include <linux/fb.h>
+#include <linux/nmi.h>
+
 #include "nv_type.h"
 #include "nv_proto.h"
 #include "nv_dma.h"
diff --git a/init/main.c b/init/main.c
index ae9f2008fb8683..c891cfb8928f92 100644
--- a/init/main.c
+++ b/init/main.c
@@ -27,6 +27,7 @@
 #include <linux/bootmem.h>
 #include <linux/acpi.h>
 #include <linux/tty.h>
+#include <linux/nmi.h>
 #include <linux/percpu.h>
 #include <linux/kmod.h>
 #include <linux/vmalloc.h>
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index a603ef28f70c98..65c0f13637882d 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -49,6 +49,7 @@
 #include <linux/init.h>
 #include <linux/kgdb.h>
 #include <linux/kdb.h>
+#include <linux/nmi.h>
 #include <linux/pid.h>
 #include <linux/smp.h>
 #include <linux/mm.h>
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 86385af1080f09..b8be5c803cdda5 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -21,6 +21,7 @@
 #include <linux/fs.h>
 #include <linux/mount.h>
 #include <linux/pm.h>
+#include <linux/nmi.h>
 #include <linux/console.h>
 #include <linux/cpu.h>
 #include <linux/freezer.h>
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 905d5bbd595fa3..d79a38de425a0d 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -22,6 +22,7 @@
 #include <linux/device.h>
 #include <linux/init.h>
 #include <linux/bootmem.h>
+#include <linux/nmi.h>
 #include <linux/syscalls.h>
 #include <linux/console.h>
 #include <linux/highmem.h>
diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c
index dd7817cdbf58ed..a08795e216283f 100644
--- a/kernel/sched/clock.c
+++ b/kernel/sched/clock.c
@@ -58,6 +58,7 @@
 #include <linux/percpu.h>
 #include <linux/ktime.h>
 #include <linux/sched.h>
+#include <linux/nmi.h>
 #include <linux/sched/clock.h>
 #include <linux/static_key.h>
 #include <linux/workqueue.h>
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 0f411a4690a0ca..5e9e123b4321e9 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -17,6 +17,7 @@
 #include <linux/interrupt.h>
 #include <linux/kernel_stat.h>
 #include <linux/percpu.h>
+#include <linux/nmi.h>
 #include <linux/profile.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/clock.h>
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index fb564acee0f3db..5b63a2102c2907 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -14,6 +14,7 @@
 #include <linux/percpu.h>
 #include <linux/init.h>
 #include <linux/mm.h>
+#include <linux/nmi.h>
 #include <linux/sched.h>
 #include <linux/sched/loadavg.h>
 #include <linux/syscore_ops.h>

From 55687da166bf51129ed6b110d7711f4c7560abe2 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:31 +0100
Subject: [PATCH 0514/1911] sched/headers: Prepare for new header dependencies
 before moving code to <linux/sched/cpufreq.h>

We are going to split <linux/sched/cpufreq.h> out of <linux/sched.h>, which
will have to be picked up from other headers and a couple of .c files.

Create a trivial placeholder <linux/sched/cpufreq.h> file that just
maps to <linux/sched.h> to make this patch obviously correct and
bisectable.

Include the new header in the files that are going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 drivers/cpufreq/cpufreq_governor.c | 1 -
 drivers/cpufreq/cpufreq_governor.h | 1 +
 drivers/cpufreq/cpufreq_ondemand.c | 1 +
 drivers/cpufreq/intel_pstate.c     | 2 +-
 include/linux/sched/cpufreq.h      | 6 ++++++
 kernel/sched/sched.h               | 1 +
 6 files changed, 10 insertions(+), 2 deletions(-)
 create mode 100644 include/linux/sched/cpufreq.h

diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index 631bd2c86c5e6e..47e24b5384b379 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -18,7 +18,6 @@
 
 #include <linux/export.h>
 #include <linux/kernel_stat.h>
-#include <linux/sched.h>
 #include <linux/slab.h>
 
 #include "cpufreq_governor.h"
diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h
index f5717ca070cc39..0236ec2cd654b3 100644
--- a/drivers/cpufreq/cpufreq_governor.h
+++ b/drivers/cpufreq/cpufreq_governor.h
@@ -20,6 +20,7 @@
 #include <linux/atomic.h>
 #include <linux/irq_work.h>
 #include <linux/cpufreq.h>
+#include <linux/sched/cpufreq.h>
 #include <linux/kernel_stat.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index 4a017e8952962c..3937acf7e026cd 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -16,6 +16,7 @@
 #include <linux/percpu-defs.h>
 #include <linux/slab.h>
 #include <linux/tick.h>
+#include <linux/sched/cpufreq.h>
 
 #include "cpufreq_ondemand.h"
 
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index eb0f7fb7168589..8f6d29302aac50 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -19,7 +19,7 @@
 #include <linux/hrtimer.h>
 #include <linux/tick.h>
 #include <linux/slab.h>
-#include <linux/sched.h>
+#include <linux/sched/cpufreq.h>
 #include <linux/list.h>
 #include <linux/cpu.h>
 #include <linux/cpufreq.h>
diff --git a/include/linux/sched/cpufreq.h b/include/linux/sched/cpufreq.h
new file mode 100644
index 00000000000000..07ed9664fa201e
--- /dev/null
+++ b/include/linux/sched/cpufreq.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SCHED_CPUFREQ_H
+#define _LINUX_SCHED_CPUFREQ_H
+
+#include <linux/sched.h>
+
+#endif /* _LINUX_SCHED_CPUFREQ_H */
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 64124947195291..7dfeb14fa43c0d 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -7,6 +7,7 @@
 #include <linux/sched/wake_q.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/cpufreq.h>
 #include <linux/u64_stats_sync.h>
 #include <linux/sched/deadline.h>
 #include <linux/kernel_stat.h>

From 010426079ec1228a7f980d2eef766a84c0f9241a Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:31 +0100
Subject: [PATCH 0515/1911] sched/headers: Prepare for new header dependencies
 before moving more code to <linux/sched/mm.h>

We are going to split more MM APIs out of <linux/sched.h>, which
will have to be picked up from a couple of .c files.

The APIs that we are going to move are:

  arch_pick_mmap_layout()
  arch_get_unmapped_area()
  arch_get_unmapped_area_topdown()
  mm_update_next_owner()

Include the header in the files that are going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/alpha/kernel/osf_sys.c      | 1 +
 arch/arc/mm/mmap.c               | 3 ++-
 arch/arm/mm/mmap.c               | 1 +
 arch/arm64/mm/mmap.c             | 1 +
 arch/frv/mm/elf-fdpic.c          | 1 +
 arch/ia64/kernel/sys_ia64.c      | 1 +
 arch/mips/mm/mmap.c              | 1 +
 arch/parisc/kernel/cache.c       | 1 +
 arch/parisc/kernel/sys_parisc.c  | 1 +
 arch/parisc/mm/hugetlbpage.c     | 1 +
 arch/powerpc/mm/mmap.c           | 1 +
 arch/s390/mm/mmap.c              | 1 +
 arch/sh/mm/mmap.c                | 1 +
 arch/sparc/kernel/sys_sparc_32.c | 1 +
 arch/sparc/kernel/sys_sparc_64.c | 1 +
 arch/sparc/mm/hugetlbpage.c      | 1 +
 arch/tile/mm/hugetlbpage.c       | 1 +
 arch/tile/mm/mmap.c              | 1 +
 arch/x86/kernel/sys_x86_64.c     | 1 +
 arch/x86/mm/hugetlbpage.c        | 1 +
 arch/x86/mm/mmap.c               | 1 +
 arch/xtensa/kernel/syscall.c     | 1 +
 22 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index 568ca29f2ad90f..3b9b2a382ba268 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -12,6 +12,7 @@
 
 #include <linux/errno.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/mm.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
diff --git a/arch/arc/mm/mmap.c b/arch/arc/mm/mmap.c
index 2e06d56e987bf8..3e25e8d6486ba2 100644
--- a/arch/arc/mm/mmap.c
+++ b/arch/arc/mm/mmap.c
@@ -13,7 +13,8 @@
 #include <linux/fs.h>
 #include <linux/mm.h>
 #include <linux/mman.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
+
 #include <asm/cacheflush.h>
 
 #define COLOUR_ALIGN(addr, pgoff)			\
diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c
index d448f9cd771540..2239fde10b808d 100644
--- a/arch/arm/mm/mmap.c
+++ b/arch/arm/mm/mmap.c
@@ -6,6 +6,7 @@
 #include <linux/mman.h>
 #include <linux/shm.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/mm.h>
 #include <linux/io.h>
 #include <linux/personality.h>
 #include <linux/random.h>
diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c
index 1e0a2650c88b59..7b0d55756eb1c2 100644
--- a/arch/arm64/mm/mmap.c
+++ b/arch/arm64/mm/mmap.c
@@ -23,6 +23,7 @@
 #include <linux/export.h>
 #include <linux/shm.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/mm.h>
 #include <linux/io.h>
 #include <linux/personality.h>
 #include <linux/random.h>
diff --git a/arch/frv/mm/elf-fdpic.c b/arch/frv/mm/elf-fdpic.c
index 836f14707a627f..da82c25301e777 100644
--- a/arch/frv/mm/elf-fdpic.c
+++ b/arch/frv/mm/elf-fdpic.c
@@ -10,6 +10,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/mm.h>
 #include <linux/fs.h>
 #include <linux/elf-fdpic.h>
diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c
index a09c12230bc507..ce4cc60d519bb0 100644
--- a/arch/ia64/kernel/sys_ia64.c
+++ b/arch/ia64/kernel/sys_ia64.c
@@ -10,6 +10,7 @@
 #include <linux/mm.h>
 #include <linux/mman.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/shm.h>
 #include <linux/file.h>		/* doh, must come after sched.h... */
 #include <linux/smp.h>
diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c
index 374d71e61ef65b..64dd8bdd92c339 100644
--- a/arch/mips/mm/mmap.c
+++ b/arch/mips/mm/mmap.c
@@ -14,6 +14,7 @@
 #include <linux/personality.h>
 #include <linux/random.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/mm.h>
 
 unsigned long shm_align_mask = PAGE_SIZE - 1;	/* Sane caches */
 EXPORT_SYMBOL(shm_align_mask);
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index 977f0a4f5ecf2c..7820b864de1a60 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -18,6 +18,7 @@
 #include <linux/seq_file.h>
 #include <linux/pagemap.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <asm/pdc.h>
 #include <asm/cache.h>
 #include <asm/cacheflush.h>
diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c
index ce07cd3f250729..e5288638a1d9ad 100644
--- a/arch/parisc/kernel/sys_parisc.c
+++ b/arch/parisc/kernel/sys_parisc.c
@@ -31,6 +31,7 @@
 #include <linux/mm.h>
 #include <linux/mman.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/mm.h>
 #include <linux/shm.h>
 #include <linux/syscalls.h>
 #include <linux/utsname.h>
diff --git a/arch/parisc/mm/hugetlbpage.c b/arch/parisc/mm/hugetlbpage.c
index 5d6eea925cf4ec..aa50ac090e9b9d 100644
--- a/arch/parisc/mm/hugetlbpage.c
+++ b/arch/parisc/mm/hugetlbpage.c
@@ -8,6 +8,7 @@
 
 #include <linux/fs.h>
 #include <linux/mm.h>
+#include <linux/sched/mm.h>
 #include <linux/hugetlb.h>
 #include <linux/pagemap.h>
 #include <linux/sysctl.h>
diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c
index 8013861aeaa742..a5d9ef59debe25 100644
--- a/arch/powerpc/mm/mmap.c
+++ b/arch/powerpc/mm/mmap.c
@@ -26,6 +26,7 @@
 #include <linux/mm.h>
 #include <linux/random.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/mm.h>
 #include <linux/elf-randomize.h>
 #include <linux/security.h>
 #include <linux/mman.h>
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index 5ea09403bb878b..50618614881f33 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -27,6 +27,7 @@
 #include <linux/mm.h>
 #include <linux/mman.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/mm.h>
 #include <linux/random.h>
 #include <linux/compat.h>
 #include <linux/security.h>
diff --git a/arch/sh/mm/mmap.c b/arch/sh/mm/mmap.c
index 6777177807c26f..08e7af0be4a77b 100644
--- a/arch/sh/mm/mmap.c
+++ b/arch/sh/mm/mmap.c
@@ -9,6 +9,7 @@
  */
 #include <linux/io.h>
 #include <linux/mm.h>
+#include <linux/sched/mm.h>
 #include <linux/mman.h>
 #include <linux/module.h>
 #include <asm/page.h>
diff --git a/arch/sparc/kernel/sys_sparc_32.c b/arch/sparc/kernel/sys_sparc_32.c
index ae49639a484e48..ff3573059936a9 100644
--- a/arch/sparc/kernel/sys_sparc_32.c
+++ b/arch/sparc/kernel/sys_sparc_32.c
@@ -8,6 +8,7 @@
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/mm.h>
 #include <linux/mm.h>
 #include <linux/fs.h>
 #include <linux/file.h>
diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c
index 54d3999d811937..c521ee2770836a 100644
--- a/arch/sparc/kernel/sys_sparc_64.c
+++ b/arch/sparc/kernel/sys_sparc_64.c
@@ -8,6 +8,7 @@
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/mm.h>
 #include <linux/fs.h>
 #include <linux/file.h>
 #include <linux/mm.h>
diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c
index e98a3f2e8f0f48..323bc6b6e3ad0e 100644
--- a/arch/sparc/mm/hugetlbpage.c
+++ b/arch/sparc/mm/hugetlbpage.c
@@ -6,6 +6,7 @@
 
 #include <linux/fs.h>
 #include <linux/mm.h>
+#include <linux/sched/mm.h>
 #include <linux/hugetlb.h>
 #include <linux/pagemap.h>
 #include <linux/sysctl.h>
diff --git a/arch/tile/mm/hugetlbpage.c b/arch/tile/mm/hugetlbpage.c
index 77ceaa343fcef1..cb10153b5c9fb4 100644
--- a/arch/tile/mm/hugetlbpage.c
+++ b/arch/tile/mm/hugetlbpage.c
@@ -19,6 +19,7 @@
 #include <linux/init.h>
 #include <linux/fs.h>
 #include <linux/mm.h>
+#include <linux/sched/mm.h>
 #include <linux/hugetlb.h>
 #include <linux/pagemap.h>
 #include <linux/slab.h>
diff --git a/arch/tile/mm/mmap.c b/arch/tile/mm/mmap.c
index 377e312dc27eda..8ab28167c44b12 100644
--- a/arch/tile/mm/mmap.c
+++ b/arch/tile/mm/mmap.c
@@ -18,6 +18,7 @@
 #include <linux/random.h>
 #include <linux/limits.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/mm.h>
 #include <linux/mman.h>
 #include <linux/compat.h>
 
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index a55ed63b9f91b0..50215a4b934744 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -1,5 +1,6 @@
 #include <linux/errno.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/syscalls.h>
 #include <linux/mm.h>
 #include <linux/fs.h>
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index 2ae8584b44c73d..c5066a260803d4 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -7,6 +7,7 @@
 #include <linux/init.h>
 #include <linux/fs.h>
 #include <linux/mm.h>
+#include <linux/sched/mm.h>
 #include <linux/hugetlb.h>
 #include <linux/pagemap.h>
 #include <linux/err.h>
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index 5eabf34008f1cf..7940166c799b78 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -29,6 +29,7 @@
 #include <linux/random.h>
 #include <linux/limits.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/mm.h>
 #include <asm/elf.h>
 
 struct va_alignment __read_mostly va_align = {
diff --git a/arch/xtensa/kernel/syscall.c b/arch/xtensa/kernel/syscall.c
index d3fd100dffc9a0..06937928cb72dc 100644
--- a/arch/xtensa/kernel/syscall.c
+++ b/arch/xtensa/kernel/syscall.c
@@ -25,6 +25,7 @@
 #include <linux/file.h>
 #include <linux/fs.h>
 #include <linux/mman.h>
+#include <linux/sched/mm.h>
 #include <linux/shm.h>
 
 typedef void (*syscall_t)(void);

From 6a3827d7509cbf96b7e961f8957c1f01d1bcf894 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:31 +0100
Subject: [PATCH 0516/1911] sched/headers: Prepare for new header dependencies
 before moving code to <linux/sched/numa_balancing.h>

We are going to split <linux/sched/numa_balancing.h> out of <linux/sched.h>, which
will have to be picked up from other headers and a couple of .c files.

Create a trivial placeholder <linux/sched/numa_balancing.h> file that just
maps to <linux/sched.h> to make this patch obviously correct and
bisectable.

Include the new header in the files that are going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 fs/exec.c                            | 1 +
 fs/proc/array.c                      | 1 +
 include/linux/sched/numa_balancing.h | 6 ++++++
 include/trace/events/sched.h         | 2 +-
 kernel/fork.c                        | 1 +
 kernel/sched/sched.h                 | 1 +
 mm/huge_memory.c                     | 1 +
 mm/memory.c                          | 1 +
 mm/mempolicy.c                       | 1 +
 9 files changed, 14 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/sched/numa_balancing.h

diff --git a/fs/exec.c b/fs/exec.c
index aa228b98ad5b19..e857c7260b1f88 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -35,6 +35,7 @@
 #include <linux/sched/mm.h>
 #include <linux/sched/coredump.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/numa_balancing.h>
 #include <linux/pagemap.h>
 #include <linux/perf_event.h>
 #include <linux/highmem.h>
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 61bea3a4ecc5e6..cfe7f934a300aa 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -61,6 +61,7 @@
 #include <linux/string.h>
 #include <linux/mman.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/numa_balancing.h>
 #include <linux/proc_fs.h>
 #include <linux/ioport.h>
 #include <linux/uaccess.h>
diff --git a/include/linux/sched/numa_balancing.h b/include/linux/sched/numa_balancing.h
new file mode 100644
index 00000000000000..999182279f785e
--- /dev/null
+++ b/include/linux/sched/numa_balancing.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SCHED_NUMA_BALANCING_H
+#define _LINUX_SCHED_NUMA_BALANCING_H
+
+#include <linux/sched.h>
+
+#endif /* _LINUX_SCHED_NUMA_BALANCING_H */
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 9b90c57517a918..9e3ef6c99e4b0d 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -4,7 +4,7 @@
 #if !defined(_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ)
 #define _TRACE_SCHED_H
 
-#include <linux/sched.h>
+#include <linux/sched/numa_balancing.h>
 #include <linux/tracepoint.h>
 #include <linux/binfmts.h>
 
diff --git a/kernel/fork.c b/kernel/fork.c
index a5ad1e4ab604df..08d6c091ac7cf1 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -16,6 +16,7 @@
 #include <linux/sched/mm.h>
 #include <linux/sched/coredump.h>
 #include <linux/sched/user.h>
+#include <linux/sched/numa_balancing.h>
 #include <linux/init.h>
 #include <linux/unistd.h>
 #include <linux/module.h>
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 7dfeb14fa43c0d..7e8ce0347fbf9b 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -6,6 +6,7 @@
 #include <linux/sched/clock.h>
 #include <linux/sched/wake_q.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/numa_balancing.h>
 #include <linux/sched/mm.h>
 #include <linux/sched/cpufreq.h>
 #include <linux/u64_stats_sync.h>
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index dee92fa26bbb8e..d36b2af4d1bf4b 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -10,6 +10,7 @@
 #include <linux/mm.h>
 #include <linux/sched.h>
 #include <linux/sched/coredump.h>
+#include <linux/sched/numa_balancing.h>
 #include <linux/highmem.h>
 #include <linux/hugetlb.h>
 #include <linux/mmu_notifier.h>
diff --git a/mm/memory.c b/mm/memory.c
index 6fc918b2c459eb..d4994a28dc8559 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -42,6 +42,7 @@
 #include <linux/mm.h>
 #include <linux/sched/mm.h>
 #include <linux/sched/coredump.h>
+#include <linux/sched/numa_balancing.h>
 #include <linux/hugetlb.h>
 #include <linux/mman.h>
 #include <linux/swap.h>
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 90892416ed75f9..18e0105810dfd8 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -74,6 +74,7 @@
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/numa_balancing.h>
 #include <linux/nodemask.h>
 #include <linux/cpuset.h>
 #include <linux/slab.h>

From 1e4bae6468375f81a3d4bdf436deaa9ea294e12b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:32 +0100
Subject: [PATCH 0517/1911] sched/headers: Prepare for new header dependencies
 before moving code to <linux/sched/jobctl.h>

We are going to split <linux/sched/jobctl.h> out of <linux/sched.h>, which
will have to be picked up from other headers and a couple of .c files.

Create a trivial placeholder <linux/sched/jobctl.h> file that just
maps to <linux/sched.h> to make this patch obviously correct and
bisectable.

Include the new header in the file that is going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/jobctl.h | 4 ++++
 include/linux/sched/signal.h | 1 +
 2 files changed, 5 insertions(+)
 create mode 100644 include/linux/sched/jobctl.h

diff --git a/include/linux/sched/jobctl.h b/include/linux/sched/jobctl.h
new file mode 100644
index 00000000000000..228e4644937bf7
--- /dev/null
+++ b/include/linux/sched/jobctl.h
@@ -0,0 +1,4 @@
+#ifndef _LINUX_SCHED_JOBCTL_H
+#define _LINUX_SCHED_JOBCTL_H
+
+#endif /* _LINUX_SCHED_JOBCTL_H */
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index da69cd05cd68b5..ce93c4a02b7910 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -2,5 +2,6 @@
 #define _LINUX_SCHED_SIGNAL_H
 
 #include <linux/sched.h>
+#include <linux/sched/jobctl.h>
 
 #endif /* _LINUX_SCHED_SIGNAL_H */

From 7fce777cd4eacc0bdcb33017e5a4c495d28afed1 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 14:47:27 +0100
Subject: [PATCH 0518/1911] sched/headers: Prepare header dependency changes,
 move the <asm/paravirt.h> include to kernel/sched/sched.h

Recent header reorganizations unearthed this hidden dependency:

  kernel/sched/core.c:199:25: error: 'paravirt_steal_rq_enabled' undeclared (first use in this function)
  kernel/sched/core.c:200:11: error: implicit declaration of function 'paravirt_steal_clock' [-Werror=implicit-function-declaration]

So move the asm/paravirt.h include from kernel/sched/cpuclock.c to kernel/sched/sched.h.

( NOTE: We do this change before doing the changes that introduce the build failure,
        so the series remains fully bisectable. )

Reported-by: kbuild test robot <fengguang.wu@intel.com>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/cputime.c | 4 ----
 kernel/sched/sched.h   | 4 ++++
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 2ecec3a4f1eecc..54031fa681e260 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -6,10 +6,6 @@
 #include <linux/context_tracking.h>
 #include <linux/cputime.h>
 #include "sched.h"
-#ifdef CONFIG_PARAVIRT
-#include <asm/paravirt.h>
-#endif
-
 
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 7e8ce0347fbf9b..04f376cf7ba993 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -20,6 +20,10 @@
 #include <linux/tick.h>
 #include <linux/slab.h>
 
+#ifdef CONFIG_PARAVIRT
+#include <asm/paravirt.h>
+#endif
+
 #include "cpupri.h"
 #include "cpudeadline.h"
 #include "cpuacct.h"

From 5b825c3af1d8a0af4deb4a5eb349d0d0050c62e5 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 17:54:15 +0100
Subject: [PATCH 0519/1911] sched/headers: Prepare to remove <linux/cred.h>
 inclusion from <linux/sched.h>

Add #include <linux/cred.h> dependencies to all .c files rely on sched.h
doing that for them.

Note that even if the count where we need to add extra headers seems high,
it's still a net win, because <linux/sched.h> is included in over
2,200 files ...

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/arm/kernel/sys_oabi-compat.c                     | 1 +
 arch/mips/kernel/mips-mt-fpaff.c                      | 1 +
 arch/x86/include/asm/intel_rdt.h                      | 1 +
 arch/x86/kernel/cpu/intel_cacheinfo.c                 | 1 +
 block/ioprio.c                                        | 1 +
 drivers/misc/eeprom/eeprom.c                          | 1 +
 drivers/misc/lkdtm_heap.c                             | 1 +
 drivers/misc/vmw_vmci/vmci_context.c                  | 1 +
 drivers/misc/vmw_vmci/vmci_host.c                     | 1 +
 drivers/staging/lustre/lustre/include/lustre_compat.h | 1 +
 drivers/staging/lustre/lustre/ptlrpc/sec.c            | 1 +
 drivers/xen/balloon.c                                 | 1 +
 fs/9p/v9fs.c                                          | 1 +
 fs/affs/inode.c                                       | 1 +
 fs/affs/super.c                                       | 1 +
 fs/autofs4/dev-ioctl.c                                | 1 +
 fs/befs/linuxvfs.c                                    | 1 +
 fs/binfmt_elf.c                                       | 1 +
 fs/cachefiles/internal.h                              | 1 +
 fs/compat.c                                           | 1 +
 fs/exportfs/expfs.c                                   | 1 +
 fs/ext2/balloc.c                                      | 1 +
 fs/ext4/ialloc.c                                      | 2 ++
 fs/file_table.c                                       | 1 +
 fs/gfs2/inode.c                                       | 1 +
 fs/gfs2/sys.c                                         | 1 +
 fs/hfs/inode.c                                        | 1 +
 fs/hfsplus/inode.c                                    | 1 +
 fs/isofs/inode.c                                      | 1 +
 fs/jffs2/fs.c                                         | 1 +
 fs/libfs.c                                            | 1 +
 fs/namespace.c                                        | 1 +
 fs/ncpfs/ioctl.c                                      | 1 +
 fs/notify/fanotify/fanotify.c                         | 1 +
 fs/notify/inotify/inotify_fsnotify.c                  | 1 +
 fs/omfs/inode.c                                       | 1 +
 fs/overlayfs/copy_up.c                                | 1 +
 fs/overlayfs/inode.c                                  | 1 +
 fs/overlayfs/namei.c                                  | 1 +
 fs/overlayfs/super.c                                  | 1 +
 fs/overlayfs/util.c                                   | 1 +
 fs/posix_acl.c                                        | 1 +
 fs/proc/proc_sysctl.c                                 | 1 +
 fs/proc/root.c                                        | 1 +
 fs/quota/dquot.c                                      | 1 +
 fs/stat.c                                             | 1 +
 fs/xfs/xfs_ioctl.c                                    | 1 +
 include/linux/cred.h                                  | 1 +
 include/linux/sched/signal.h                          | 1 +
 include/linux/wait.h                                  | 1 +
 include/net/scm.h                                     | 1 +
 include/rdma/ib.h                                     | 1 +
 ipc/namespace.c                                       | 1 +
 kernel/pid_namespace.c                                | 1 +
 kernel/ucount.c                                       | 1 +
 kernel/uid16.c                                        | 1 +
 kernel/utsname.c                                      | 1 +
 mm/usercopy.c                                         | 1 +
 net/dns_resolver/dns_query.c                          | 2 ++
 net/netfilter/xt_owner.c                              | 2 ++
 net/sunrpc/auth.c                                     | 1 +
 security/apparmor/policy.c                            | 1 +
 security/keys/internal.h                              | 1 +
 security/keys/keyctl.c                                | 1 +
 security/keys/persistent.c                            | 2 ++
 65 files changed, 69 insertions(+)

diff --git a/arch/arm/kernel/sys_oabi-compat.c b/arch/arm/kernel/sys_oabi-compat.c
index 5f221acd21aebb..b9786f491873fa 100644
--- a/arch/arm/kernel/sys_oabi-compat.c
+++ b/arch/arm/kernel/sys_oabi-compat.c
@@ -76,6 +76,7 @@
 #include <linux/syscalls.h>
 #include <linux/errno.h>
 #include <linux/fs.h>
+#include <linux/cred.h>
 #include <linux/fcntl.h>
 #include <linux/eventpoll.h>
 #include <linux/sem.h>
diff --git a/arch/mips/kernel/mips-mt-fpaff.c b/arch/mips/kernel/mips-mt-fpaff.c
index 1a0a3b4ecc3efb..ffc6bd3464d970 100644
--- a/arch/mips/kernel/mips-mt-fpaff.c
+++ b/arch/mips/kernel/mips-mt-fpaff.c
@@ -9,6 +9,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/sched.h>
+#include <linux/cred.h>
 #include <linux/security.h>
 #include <linux/types.h>
 #include <linux/uaccess.h>
diff --git a/arch/x86/include/asm/intel_rdt.h b/arch/x86/include/asm/intel_rdt.h
index 95ce5c85b0096b..0d64397cee58e0 100644
--- a/arch/x86/include/asm/intel_rdt.h
+++ b/arch/x86/include/asm/intel_rdt.h
@@ -3,6 +3,7 @@
 
 #ifdef CONFIG_INTEL_RDT_A
 
+#include <linux/sched.h>
 #include <linux/kernfs.h>
 #include <linux/jump_label.h>
 
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 0282b0df004a86..c55fb2cb2acca8 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -11,6 +11,7 @@
 #include <linux/cacheinfo.h>
 #include <linux/cpu.h>
 #include <linux/sched.h>
+#include <linux/capability.h>
 #include <linux/sysfs.h>
 #include <linux/pci.h>
 
diff --git a/block/ioprio.c b/block/ioprio.c
index 89c43e07787dfb..4b9ab8367ddaa5 100644
--- a/block/ioprio.c
+++ b/block/ioprio.c
@@ -23,6 +23,7 @@
 #include <linux/kernel.h>
 #include <linux/export.h>
 #include <linux/ioprio.h>
+#include <linux/cred.h>
 #include <linux/blkdev.h>
 #include <linux/capability.h>
 #include <linux/sched/user.h>
diff --git a/drivers/misc/eeprom/eeprom.c b/drivers/misc/eeprom/eeprom.c
index 3d1d55157e5f3b..2fad790db3bf05 100644
--- a/drivers/misc/eeprom/eeprom.c
+++ b/drivers/misc/eeprom/eeprom.c
@@ -19,6 +19,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/device.h>
+#include <linux/capability.h>
 #include <linux/jiffies.h>
 #include <linux/i2c.h>
 #include <linux/mutex.h>
diff --git a/drivers/misc/lkdtm_heap.c b/drivers/misc/lkdtm_heap.c
index 0f1581664c1c78..ffb6aeac07b3ec 100644
--- a/drivers/misc/lkdtm_heap.c
+++ b/drivers/misc/lkdtm_heap.c
@@ -4,6 +4,7 @@
  */
 #include "lkdtm.h"
 #include <linux/slab.h>
+#include <linux/sched.h>
 
 /*
  * This tries to stay within the next largest power-of-2 kmalloc cache
diff --git a/drivers/misc/vmw_vmci/vmci_context.c b/drivers/misc/vmw_vmci/vmci_context.c
index f35f0c8606b9ad..21d0fa592145c0 100644
--- a/drivers/misc/vmw_vmci/vmci_context.c
+++ b/drivers/misc/vmw_vmci/vmci_context.c
@@ -19,6 +19,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/sched.h>
+#include <linux/cred.h>
 #include <linux/slab.h>
 
 #include "vmci_queue_pair.h"
diff --git a/drivers/misc/vmw_vmci/vmci_host.c b/drivers/misc/vmw_vmci/vmci_host.c
index ec090105eb4be7..8a16a26e9658f8 100644
--- a/drivers/misc/vmw_vmci/vmci_host.c
+++ b/drivers/misc/vmw_vmci/vmci_host.c
@@ -24,6 +24,7 @@
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/sched.h>
+#include <linux/cred.h>
 #include <linux/slab.h>
 #include <linux/file.h>
 #include <linux/init.h>
diff --git a/drivers/staging/lustre/lustre/include/lustre_compat.h b/drivers/staging/lustre/lustre/include/lustre_compat.h
index 300e96fb032ae9..da9ce195c52ecf 100644
--- a/drivers/staging/lustre/lustre/include/lustre_compat.h
+++ b/drivers/staging/lustre/lustre/include/lustre_compat.h
@@ -35,6 +35,7 @@
 
 #include <linux/fs_struct.h>
 #include <linux/namei.h>
+#include <linux/cred.h>
 
 #include "lustre_patchless_compat.h"
 
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec.c b/drivers/staging/lustre/lustre/ptlrpc/sec.c
index e860df7c45a2bd..49f34fd655c3cb 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/sec.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec.c
@@ -38,6 +38,7 @@
 
 #include "../../include/linux/libcfs/libcfs.h"
 #include <linux/crypto.h>
+#include <linux/cred.h>
 #include <linux/key.h>
 
 #include "../include/obd.h"
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index db107fa50ca14c..a6d4378eb8d9fc 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -41,6 +41,7 @@
 #include <linux/cpu.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/cred.h>
 #include <linux/errno.h>
 #include <linux/mm.h>
 #include <linux/bootmem.h>
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 072e7599583a81..a89f3cfe3c7d7f 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -29,6 +29,7 @@
 #include <linux/errno.h>
 #include <linux/fs.h>
 #include <linux/sched.h>
+#include <linux/cred.h>
 #include <linux/parser.h>
 #include <linux/idr.h>
 #include <linux/slab.h>
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index a5e6097eb5a9dd..abcc59899229c6 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -10,6 +10,7 @@
  *  (C) 1991  Linus Torvalds - minix filesystem
  */
 #include <linux/sched.h>
+#include <linux/cred.h>
 #include <linux/gfp.h>
 #include "affs.h"
 
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 37532538e8ab12..c2c27a8f128ef7 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -16,6 +16,7 @@
 #include <linux/parser.h>
 #include <linux/magic.h>
 #include <linux/sched.h>
+#include <linux/cred.h>
 #include <linux/slab.h>
 #include <linux/writeback.h>
 #include <linux/blkdev.h>
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 806df746f1a93b..734cbf8d9676bd 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -17,6 +17,7 @@
 #include <linux/file.h>
 #include <linux/fdtable.h>
 #include <linux/sched.h>
+#include <linux/cred.h>
 #include <linux/compat.h>
 #include <linux/syscalls.h>
 #include <linux/magic.h>
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 19407165f4aad9..c500e954debba1 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -18,6 +18,7 @@
 #include <linux/parser.h>
 #include <linux/namei.h>
 #include <linux/sched.h>
+#include <linux/cred.h>
 #include <linux/exportfs.h>
 
 #include "befs.h"
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index fbbe52e1250e36..2c95257fa4da89 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -36,6 +36,7 @@
 #include <linux/coredump.h>
 #include <linux/sched.h>
 #include <linux/sched/coredump.h>
+#include <linux/cred.h>
 #include <linux/dax.h>
 #include <linux/uaccess.h>
 #include <asm/param.h>
diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
index cd1effee8a4912..9bf90bcc56acd6 100644
--- a/fs/cachefiles/internal.h
+++ b/fs/cachefiles/internal.h
@@ -19,6 +19,7 @@
 #include <linux/fscache-cache.h>
 #include <linux/timer.h>
 #include <linux/wait.h>
+#include <linux/cred.h>
 #include <linux/workqueue.h>
 #include <linux/security.h>
 
diff --git a/fs/compat.c b/fs/compat.c
index e50a2114f47408..c61b506f5bc94b 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -21,6 +21,7 @@
 #include <linux/compat.h>
 #include <linux/errno.h>
 #include <linux/time.h>
+#include <linux/cred.h>
 #include <linux/fs.h>
 #include <linux/fcntl.h>
 #include <linux/namei.h>
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index a4b531be9168d5..9ec1038f937e86 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -15,6 +15,7 @@
 #include <linux/mount.h>
 #include <linux/namei.h>
 #include <linux/sched.h>
+#include <linux/cred.h>
 
 #define dprintk(fmt, args...) do{}while(0)
 
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
index 4c40c0786e168b..d0bdb74f0e151b 100644
--- a/fs/ext2/balloc.c
+++ b/fs/ext2/balloc.c
@@ -15,6 +15,7 @@
 #include <linux/quotaops.h>
 #include <linux/slab.h>
 #include <linux/sched.h>
+#include <linux/cred.h>
 #include <linux/buffer_head.h>
 #include <linux/capability.h>
 
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index b14bae2598bc53..17bc043308f33b 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -21,6 +21,8 @@
 #include <linux/random.h>
 #include <linux/bitops.h>
 #include <linux/blkdev.h>
+#include <linux/cred.h>
+
 #include <asm/byteorder.h>
 
 #include "ext4.h"
diff --git a/fs/file_table.c b/fs/file_table.c
index 6d982b57de9241..954d510b765af8 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -13,6 +13,7 @@
 #include <linux/module.h>
 #include <linux/fs.h>
 #include <linux/security.h>
+#include <linux/cred.h>
 #include <linux/eventpoll.h>
 #include <linux/rcupdate.h>
 #include <linux/mount.h>
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index eb7724b8578a04..9d28f55fbd1dbf 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -13,6 +13,7 @@
 #include <linux/buffer_head.h>
 #include <linux/namei.h>
 #include <linux/mm.h>
+#include <linux/cred.h>
 #include <linux/xattr.h>
 #include <linux/posix_acl.h>
 #include <linux/gfs2_ondisk.h>
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index f8d30e41d1d33b..7a515345610c28 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -10,6 +10,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/sched.h>
+#include <linux/cred.h>
 #include <linux/spinlock.h>
 #include <linux/completion.h>
 #include <linux/buffer_head.h>
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index f776acf2378a1c..bfbba799430f15 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -14,6 +14,7 @@
 #include <linux/pagemap.h>
 #include <linux/mpage.h>
 #include <linux/sched.h>
+#include <linux/cred.h>
 #include <linux/uio.h>
 #include <linux/xattr.h>
 
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 2e796f8302ffac..e8638d52819519 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -14,6 +14,7 @@
 #include <linux/pagemap.h>
 #include <linux/mpage.h>
 #include <linux/sched.h>
+#include <linux/cred.h>
 #include <linux/uio.h>
 
 #include "hfsplus_fs.h"
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 871c8b39209913..020ba093614641 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -15,6 +15,7 @@
 #include <linux/module.h>
 
 #include <linux/slab.h>
+#include <linux/cred.h>
 #include <linux/nls.h>
 #include <linux/ctype.h>
 #include <linux/statfs.h>
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 567653f7c0ce2e..76fa814df3d1bf 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -15,6 +15,7 @@
 #include <linux/capability.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/cred.h>
 #include <linux/fs.h>
 #include <linux/list.h>
 #include <linux/mtd/mtd.h>
diff --git a/fs/libfs.c b/fs/libfs.c
index 28d6f35feed628..217896ca4fae67 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -7,6 +7,7 @@
 #include <linux/export.h>
 #include <linux/pagemap.h>
 #include <linux/slab.h>
+#include <linux/cred.h>
 #include <linux/mount.h>
 #include <linux/vfs.h>
 #include <linux/quotaops.h>
diff --git a/fs/namespace.c b/fs/namespace.c
index 8bfad42c1ccf21..131cd7b94f4711 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -15,6 +15,7 @@
 #include <linux/user_namespace.h>
 #include <linux/namei.h>
 #include <linux/security.h>
+#include <linux/cred.h>
 #include <linux/idr.h>
 #include <linux/init.h>		/* init_rootfs */
 #include <linux/fs_struct.h>	/* get_fs_root et.al. */
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index 4434e4977cf36c..12550c2320ccbb 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -19,6 +19,7 @@
 #include <linux/highuid.h>
 #include <linux/vmalloc.h>
 #include <linux/sched.h>
+#include <linux/cred.h>
 
 #include <linux/uaccess.h>
 
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index a4c46221755ea6..e5f7e47de68e4b 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -6,6 +6,7 @@
 #include <linux/kernel.h> /* UINT_MAX */
 #include <linux/mount.h>
 #include <linux/sched.h>
+#include <linux/sched/user.h>
 #include <linux/types.h>
 #include <linux/wait.h>
 
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index f36c29398de371..1aeb837ae41405 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -30,6 +30,7 @@
 #include <linux/slab.h> /* kmem_* */
 #include <linux/types.h>
 #include <linux/sched.h>
+#include <linux/sched/user.h>
 
 #include "inotify.h"
 
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index df7ea8543a2ef0..8c9034ee7383a9 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -8,6 +8,7 @@
 #include <linux/slab.h>
 #include <linux/fs.h>
 #include <linux/vfs.h>
+#include <linux/cred.h>
 #include <linux/parser.h>
 #include <linux/buffer_head.h>
 #include <linux/vmalloc.h>
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index f57043dace6287..53d3f830358fc6 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -16,6 +16,7 @@
 #include <linux/security.h>
 #include <linux/uaccess.h>
 #include <linux/sched.h>
+#include <linux/cred.h>
 #include <linux/namei.h>
 #include <linux/fdtable.h>
 #include <linux/ratelimit.h>
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 08643ac44a0278..6639f487f83513 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -9,6 +9,7 @@
 
 #include <linux/fs.h>
 #include <linux/slab.h>
+#include <linux/cred.h>
 #include <linux/xattr.h>
 #include <linux/posix_acl.h>
 #include "overlayfs.h"
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index 023bb0b03352f4..b8b077821fb03b 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -8,6 +8,7 @@
  */
 
 #include <linux/fs.h>
+#include <linux/cred.h>
 #include <linux/namei.h>
 #include <linux/xattr.h>
 #include <linux/ratelimit.h>
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 20f48abbb82fd3..9aa37c2f7f7d0a 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -7,6 +7,7 @@
  * the Free Software Foundation.
  */
 
+#include <uapi/linux/magic.h>
 #include <linux/fs.h>
 #include <linux/namei.h>
 #include <linux/xattr.h>
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 952286f4826cc5..9dc1c0af586bad 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -10,6 +10,7 @@
 #include <linux/fs.h>
 #include <linux/mount.h>
 #include <linux/slab.h>
+#include <linux/cred.h>
 #include <linux/xattr.h>
 #include "overlayfs.h"
 #include "ovl_entry.h"
diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index c9d48dc784953f..eebf5f6cf6d564 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -15,6 +15,7 @@
 #include <linux/atomic.h>
 #include <linux/fs.h>
 #include <linux/sched.h>
+#include <linux/cred.h>
 #include <linux/posix_acl.h>
 #include <linux/posix_acl_xattr.h>
 #include <linux/xattr.h>
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 3e64c6502dc854..3d203b1f5a0234 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -8,6 +8,7 @@
 #include <linux/printk.h>
 #include <linux/security.h>
 #include <linux/sched.h>
+#include <linux/cred.h>
 #include <linux/namei.h>
 #include <linux/mm.h>
 #include <linux/module.h>
diff --git a/fs/proc/root.c b/fs/proc/root.c
index b90da888b81a3a..5d5fed20bfffe1 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -20,6 +20,7 @@
 #include <linux/mount.h>
 #include <linux/pid_namespace.h>
 #include <linux/parser.h>
+#include <linux/cred.h>
 
 #include "internal.h"
 
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 406fed92362a3d..74b489e3714d51 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -72,6 +72,7 @@
 #include <linux/proc_fs.h>
 #include <linux/security.h>
 #include <linux/sched.h>
+#include <linux/cred.h>
 #include <linux/kmod.h>
 #include <linux/namei.h>
 #include <linux/capability.h>
diff --git a/fs/stat.c b/fs/stat.c
index 3f14d1ef086805..95bd41762770c4 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -12,6 +12,7 @@
 #include <linux/fs.h>
 #include <linux/namei.h>
 #include <linux/security.h>
+#include <linux/cred.h>
 #include <linux/syscalls.h>
 #include <linux/pagemap.h>
 
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index cf1363dbf32b91..2fd7fdf5438f0b 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -43,6 +43,7 @@
 #include "xfs_acl.h"
 
 #include <linux/capability.h>
+#include <linux/cred.h>
 #include <linux/dcache.h>
 #include <linux/mount.h>
 #include <linux/namei.h>
diff --git a/include/linux/cred.h b/include/linux/cred.h
index 045d33e480694c..b03e7d049a64f4 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -18,6 +18,7 @@
 #include <linux/selinux.h>
 #include <linux/atomic.h>
 #include <linux/uidgid.h>
+#include <linux/sched.h>
 #include <linux/sched/user.h>
 
 struct cred;
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index ce93c4a02b7910..0f4e9f4a43fd78 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -1,6 +1,7 @@
 #ifndef _LINUX_SCHED_SIGNAL_H
 #define _LINUX_SCHED_SIGNAL_H
 
+#include <linux/cred.h>
 #include <linux/sched.h>
 #include <linux/sched/jobctl.h>
 
diff --git a/include/linux/wait.h b/include/linux/wait.h
index 1421132e90861b..aacb1282d19a38 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -6,6 +6,7 @@
 #include <linux/list.h>
 #include <linux/stddef.h>
 #include <linux/spinlock.h>
+
 #include <asm/current.h>
 #include <uapi/linux/wait.h>
 
diff --git a/include/net/scm.h b/include/net/scm.h
index 59fa93c01d2a16..142ea9e7a6d0d8 100644
--- a/include/net/scm.h
+++ b/include/net/scm.h
@@ -3,6 +3,7 @@
 
 #include <linux/limits.h>
 #include <linux/net.h>
+#include <linux/cred.h>
 #include <linux/security.h>
 #include <linux/pid.h>
 #include <linux/nsproxy.h>
diff --git a/include/rdma/ib.h b/include/rdma/ib.h
index a6b93706b0fc96..9b4c22a3693188 100644
--- a/include/rdma/ib.h
+++ b/include/rdma/ib.h
@@ -35,6 +35,7 @@
 
 #include <linux/types.h>
 #include <linux/sched.h>
+#include <linux/cred.h>
 
 struct ib_addr {
 	union {
diff --git a/ipc/namespace.c b/ipc/namespace.c
index 0abdea496493da..1f1d713ac19c4e 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -9,6 +9,7 @@
 #include <linux/rcupdate.h>
 #include <linux/nsproxy.h>
 #include <linux/slab.h>
+#include <linux/cred.h>
 #include <linux/fs.h>
 #include <linux/mount.h>
 #include <linux/user_namespace.h>
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index eef2ce9686366a..25314c96fbe64a 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -12,6 +12,7 @@
 #include <linux/pid_namespace.h>
 #include <linux/user_namespace.h>
 #include <linux/syscalls.h>
+#include <linux/cred.h>
 #include <linux/err.h>
 #include <linux/acct.h>
 #include <linux/slab.h>
diff --git a/kernel/ucount.c b/kernel/ucount.c
index 8a11fc0cb45912..62630a40ab3a42 100644
--- a/kernel/ucount.c
+++ b/kernel/ucount.c
@@ -8,6 +8,7 @@
 #include <linux/stat.h>
 #include <linux/sysctl.h>
 #include <linux/slab.h>
+#include <linux/cred.h>
 #include <linux/hash.h>
 #include <linux/user_namespace.h>
 
diff --git a/kernel/uid16.c b/kernel/uid16.c
index 71645ae9303a52..5c2dc5b2bf4fe8 100644
--- a/kernel/uid16.c
+++ b/kernel/uid16.c
@@ -12,6 +12,7 @@
 #include <linux/init.h>
 #include <linux/highuid.h>
 #include <linux/security.h>
+#include <linux/cred.h>
 #include <linux/syscalls.h>
 
 #include <linux/uaccess.h>
diff --git a/kernel/utsname.c b/kernel/utsname.c
index 6976cd47dcf602..06585ad296ffc2 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -14,6 +14,7 @@
 #include <linux/utsname.h>
 #include <linux/err.h>
 #include <linux/slab.h>
+#include <linux/cred.h>
 #include <linux/user_namespace.h>
 #include <linux/proc_ns.h>
 
diff --git a/mm/usercopy.c b/mm/usercopy.c
index 8345299e3e3b08..7ccad05c0d5c42 100644
--- a/mm/usercopy.c
+++ b/mm/usercopy.c
@@ -16,6 +16,7 @@
 
 #include <linux/mm.h>
 #include <linux/slab.h>
+#include <linux/sched.h>
 #include <asm/sections.h>
 
 enum {
diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c
index ecc28cff08ab81..ab9fec00a788e4 100644
--- a/net/dns_resolver/dns_query.c
+++ b/net/dns_resolver/dns_query.c
@@ -37,8 +37,10 @@
 
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/cred.h>
 #include <linux/dns_resolver.h>
 #include <linux/err.h>
+
 #include <keys/dns_resolver-type.h>
 #include <keys/user-type.h>
 
diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c
index 16477df45b3bf1..3d705c688a27b5 100644
--- a/net/netfilter/xt_owner.c
+++ b/net/netfilter/xt_owner.c
@@ -13,6 +13,8 @@
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/file.h>
+#include <linux/cred.h>
+
 #include <net/sock.h>
 #include <net/inet_sock.h>
 #include <linux/netfilter/x_tables.h>
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index a1ee933e3029b3..d2623b9f23d66c 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -8,6 +8,7 @@
 
 #include <linux/types.h>
 #include <linux/sched.h>
+#include <linux/cred.h>
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/errno.h>
diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c
index f44312a19522b6..462c5d36b87157 100644
--- a/security/apparmor/policy.c
+++ b/security/apparmor/policy.c
@@ -76,6 +76,7 @@
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/string.h>
+#include <linux/cred.h>
 #include <linux/user_namespace.h>
 
 #include "include/apparmor.h"
diff --git a/security/keys/internal.h b/security/keys/internal.h
index a705a7d92ad7a9..a2f4c0abb8d847 100644
--- a/security/keys/internal.h
+++ b/security/keys/internal.h
@@ -13,6 +13,7 @@
 #define _INTERNAL_H
 
 #include <linux/sched.h>
+#include <linux/cred.h>
 #include <linux/key-type.h>
 #include <linux/task_work.h>
 #include <linux/keyctl.h>
diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index 04a764f71ec88e..bcb0b597c39187 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -18,6 +18,7 @@
 #include <linux/keyctl.h>
 #include <linux/fs.h>
 #include <linux/capability.h>
+#include <linux/cred.h>
 #include <linux/string.h>
 #include <linux/err.h>
 #include <linux/vmalloc.h>
diff --git a/security/keys/persistent.c b/security/keys/persistent.c
index 1edc1f0a0ce2c4..d0cb5b32eff7ba 100644
--- a/security/keys/persistent.c
+++ b/security/keys/persistent.c
@@ -10,6 +10,8 @@
  */
 
 #include <linux/user_namespace.h>
+#include <linux/cred.h>
+
 #include "internal.h"
 
 unsigned persistent_keyring_expiry = 3 * 24 * 3600; /* Expire after 3 days of non-use */

From 4cf421e55d69016989548e0fb8585e69f54bd283 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 10:03:42 +0100
Subject: [PATCH 0520/1911] sched/headers: Prepare for the removal of
 <asm/ptrace.h> from <linux/sched.h>

Fix up missing #includes in other places that rely on sched.h doing that for them.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/alpha/math-emu/math.c    | 1 +
 arch/sh/kernel/cpu/fpu.c      | 1 +
 arch/sh/kernel/process.c      | 1 +
 drivers/oprofile/cpu_buffer.c | 2 ++
 4 files changed, 5 insertions(+)

diff --git a/arch/alpha/math-emu/math.c b/arch/alpha/math-emu/math.c
index fa5ae0ad8983b6..d17d705f65453c 100644
--- a/arch/alpha/math-emu/math.c
+++ b/arch/alpha/math-emu/math.c
@@ -2,6 +2,7 @@
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <asm/ptrace.h>
 
 #include <linux/uaccess.h>
 
diff --git a/arch/sh/kernel/cpu/fpu.c b/arch/sh/kernel/cpu/fpu.c
index 4e332244ea75c3..4d4737d099c654 100644
--- a/arch/sh/kernel/cpu/fpu.c
+++ b/arch/sh/kernel/cpu/fpu.c
@@ -3,6 +3,7 @@
 #include <asm/processor.h>
 #include <asm/fpu.h>
 #include <asm/traps.h>
+#include <asm/ptrace.h>
 
 int init_fpu(struct task_struct *tsk)
 {
diff --git a/arch/sh/kernel/process.c b/arch/sh/kernel/process.c
index 53bc6c4c84ecdf..30bff33e810ea7 100644
--- a/arch/sh/kernel/process.c
+++ b/arch/sh/kernel/process.c
@@ -5,6 +5,7 @@
 #include <linux/export.h>
 #include <linux/stackprotector.h>
 #include <asm/fpu.h>
+#include <asm/ptrace.h>
 
 struct kmem_cache *task_xstate_cachep = NULL;
 unsigned int xstate_size;
diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c
index 0581461c3a67be..eda2633a393d56 100644
--- a/drivers/oprofile/cpu_buffer.c
+++ b/drivers/oprofile/cpu_buffer.c
@@ -23,6 +23,8 @@
 #include <linux/oprofile.h>
 #include <linux/errno.h>
 
+#include <asm/ptrace.h>
+
 #include "event_buffer.h"
 #include "cpu_buffer.h"
 #include "buffer_sync.h"

From 037741a6d4ab2e4b0580dae97aca963d8a8dc68f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 10:08:30 +0100
Subject: [PATCH 0521/1911] sched/headers: Prepare for the removal of
 <linux/rtmutex.h> from <linux/sched.h>

Fix up missing #includes in other places that rely on sched.h doing that for them.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/i2c.h          | 1 +
 kernel/fork.c                | 1 +
 kernel/locking/locktorture.c | 1 +
 kernel/rcu/tree.h            | 1 +
 4 files changed, 4 insertions(+)

diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index bed8fbb45f31fb..6b183521c61697 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -30,6 +30,7 @@
 #include <linux/device.h>	/* for struct device */
 #include <linux/sched.h>	/* for completion */
 #include <linux/mutex.h>
+#include <linux/rtmutex.h>
 #include <linux/irqdomain.h>		/* for Host Notify IRQ */
 #include <linux/of.h>		/* for struct device_node */
 #include <linux/swab.h>		/* for swab16 */
diff --git a/kernel/fork.c b/kernel/fork.c
index 08d6c091ac7cf1..8fbe8bcd1e200a 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -17,6 +17,7 @@
 #include <linux/sched/coredump.h>
 #include <linux/sched/user.h>
 #include <linux/sched/numa_balancing.h>
+#include <linux/rtmutex.h>
 #include <linux/init.h>
 #include <linux/unistd.h>
 #include <linux/module.h>
diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
index 5ea0a8969ee24f..f24582d4dad37b 100644
--- a/kernel/locking/locktorture.c
+++ b/kernel/locking/locktorture.c
@@ -33,6 +33,7 @@
 #include <linux/interrupt.h>
 #include <linux/sched.h>
 #include <uapi/linux/sched/types.h>
+#include <linux/rtmutex.h>
 #include <linux/atomic.h>
 #include <linux/moduleparam.h>
 #include <linux/delay.h>
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index b60f2b6caa1443..ec62a05bfdb3c8 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -24,6 +24,7 @@
 
 #include <linux/cache.h>
 #include <linux/spinlock.h>
+#include <linux/rtmutex.h>
 #include <linux/threads.h>
 #include <linux/cpumask.h>
 #include <linux/seqlock.h>

From cc5efc2323a89dcf1a02c17b9b9f255c5a6e0492 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 10:06:45 +0100
Subject: [PATCH 0522/1911] sched/headers: Prepare for the removal of various
 unrelated headers from <linux/sched.h>

We are going to remove the following header inclusions from <linux/sched.h>:

	#include <asm/param.h>
	#include <linux/threads.h>
	#include <linux/kernel.h>
	#include <linux/types.h>
	#include <linux/timex.h>
	#include <linux/jiffies.h>
	#include <linux/rbtree.h>
	#include <linux/thread_info.h>
	#include <linux/cpumask.h>
	#include <linux/errno.h>
	#include <linux/nodemask.h>
	#include <linux/preempt.h>
	#include <asm/page.h>
	#include <linux/smp.h>
	#include <linux/compiler.h>
	#include <linux/completion.h>
	#include <linux/percpu.h>
	#include <linux/topology.h>
	#include <linux/rcupdate.h>
	#include <linux/time.h>
	#include <linux/timer.h>
	#include <linux/llist.h>
	#include <linux/uidgid.h>
	#include <asm/processor.h>

Fix up a single .h file that got hold of <linux/sysctl.h> via one of these headers.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/user_namespace.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 363e0e8082a9d7..08264641b502bd 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -5,6 +5,7 @@
 #include <linux/nsproxy.h>
 #include <linux/ns_common.h>
 #include <linux/sched.h>
+#include <linux/sysctl.h>
 #include <linux/err.h>
 
 #define UID_GID_MAP_MAX_EXTENTS 5

From b12fb7f46af7d548503d75be59f493f958c6d1b3 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:33 +0100
Subject: [PATCH 0523/1911] sched/headers: Prepare for new header dependencies
 before moving code to <linux/sched/xacct.h>

We are going to split <linux/sched/xacct.h> out of <linux/sched.h>, which
will have to be picked up from other headers and a couple of .c files.

Create a trivial placeholder <linux/sched/xacct.h> file that just
maps to <linux/sched.h> to make this patch obviously correct and
bisectable.

Include the new header in the .c file that is going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 fs/read_write.c             | 3 ++-
 include/linux/sched/xacct.h | 6 ++++++
 2 files changed, 8 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/sched/xacct.h

diff --git a/fs/read_write.c b/fs/read_write.c
index 5816d4c4cab09c..dc60075653a84b 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -4,8 +4,9 @@
  *  Copyright (C) 1991, 1992  Linus Torvalds
  */
 
-#include <linux/slab.h> 
+#include <linux/slab.h>
 #include <linux/stat.h>
+#include <linux/sched/xacct.h>
 #include <linux/fcntl.h>
 #include <linux/file.h>
 #include <linux/uio.h>
diff --git a/include/linux/sched/xacct.h b/include/linux/sched/xacct.h
new file mode 100644
index 00000000000000..890f7ce5cd2768
--- /dev/null
+++ b/include/linux/sched/xacct.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SCHED_XACCT_H
+#define _LINUX_SCHED_XACCT_H
+
+#include <linux/sched.h>
+
+#endif /* _LINUX_SCHED_XACCT_H */

From 174cd4b1e5fbd0d74c68cf3a74f5bd4923485512 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 19:15:33 +0100
Subject: [PATCH 0524/1911] sched/headers: Prepare to move signal wakeup &
 sigpending methods from <linux/sched.h> into <linux/sched/signal.h>

Fix up affected files that include this signal functionality via sched.h.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/arm/common/bL_switcher.c                 |  2 +-
 arch/cris/arch-v10/drivers/sync_serial.c      |  2 +-
 arch/cris/arch-v32/drivers/sync_serial.c      |  2 +-
 arch/mips/kernel/rtlx.c                       |  2 ++
 arch/mips/kvm/mips.c                          |  2 ++
 arch/powerpc/kvm/book3s_hv.c                  |  2 +-
 arch/powerpc/kvm/powerpc.c                    |  1 +
 arch/powerpc/platforms/83xx/suspend.c         |  1 +
 arch/powerpc/platforms/cell/spufs/sched.c     |  2 +-
 arch/powerpc/platforms/cell/spufs/spufs.h     |  1 +
 arch/s390/crypto/prng.c                       |  2 ++
 arch/s390/kvm/kvm-s390.c                      |  2 ++
 arch/s390/kvm/vsie.c                          |  2 ++
 arch/sh/kernel/cpu/fpu.c                      |  2 +-
 arch/sh/kernel/disassemble.c                  |  2 ++
 arch/sh/kernel/process.c                      |  2 +-
 arch/um/drivers/random.c                      |  2 +-
 arch/x86/kernel/apm_32.c                      |  2 +-
 block/blk-cgroup.c                            |  1 +
 block/blk-mq.c                                |  1 +
 crypto/algboss.c                              |  2 +-
 crypto/algif_aead.c                           |  1 +
 crypto/algif_skcipher.c                       |  1 +
 crypto/api.c                                  |  2 +-
 drivers/atm/horizon.c                         |  1 +
 drivers/base/core.c                           |  1 +
 drivers/base/power/wakeup.c                   |  2 +-
 drivers/block/drbd/drbd_main.c                |  1 +
 drivers/block/drbd/drbd_receiver.c            |  1 +
 drivers/block/drbd/drbd_worker.c              |  2 +-
 drivers/block/swim3.c                         |  2 +-
 drivers/char/applicom.c                       |  2 +-
 drivers/char/hpet.c                           |  1 +
 drivers/char/hw_random/core.c                 |  1 +
 drivers/char/ipmi/ipmi_watchdog.c             |  1 +
 drivers/char/lp.c                             |  2 +-
 drivers/char/ppdev.c                          |  2 +-
 drivers/char/rtc.c                            |  2 +-
 drivers/char/snsc.c                           |  2 +-
 drivers/dma-buf/dma-fence.c                   |  1 +
 .../gpu/drm/omapdrm/displays/panel-dsi-cm.c   |  2 +-
 drivers/gpu/drm/vc4/vc4_gem.c                 |  1 +
 drivers/gpu/vga/vgaarb.c                      |  2 +-
 drivers/hid/hid-debug.c                       |  2 +-
 drivers/hid/hid-roccat.c                      |  2 +-
 drivers/hid/hidraw.c                          |  2 +-
 drivers/hid/usbhid/hiddev.c                   |  1 +
 drivers/hsi/clients/cmt_speech.c              |  2 +-
 drivers/i2c/busses/i2c-ibm_iic.c              |  2 ++
 drivers/i2c/busses/i2c-mpc.c                  |  2 +-
 drivers/iio/industrialio-buffer.c             |  2 +-
 drivers/infiniband/ulp/ipoib/ipoib_cm.c       |  1 +
 drivers/infiniband/ulp/ipoib/ipoib_vlan.c     |  1 +
 drivers/isdn/capi/kcapi.c                     |  2 +-
 drivers/isdn/mISDN/timerdev.c                 |  2 ++
 drivers/lguest/core.c                         |  1 +
 drivers/macintosh/adb.c                       |  2 +-
 drivers/macintosh/smu.c                       |  1 +
 drivers/macintosh/via-pmu.c                   |  2 +-
 drivers/mailbox/mailbox-test.c                |  1 +
 drivers/md/dm.c                               |  1 +
 drivers/media/dvb-core/dvb_ca_en50221.c       |  2 +-
 drivers/media/dvb-core/dvb_demux.c            |  2 +-
 drivers/media/dvb-core/dvb_frontend.c         |  2 +-
 drivers/media/pci/cx18/cx18-driver.h          |  2 +-
 drivers/media/pci/ivtv/ivtv-driver.h          | 35 ++++++++++---------
 drivers/media/pci/pt1/pt1.c                   |  1 +
 drivers/media/pci/pt3/pt3.c                   |  1 +
 drivers/media/pci/solo6x10/solo6x10-i2c.c     |  1 +
 drivers/media/pci/zoran/zoran_device.c        |  1 +
 drivers/media/platform/vivid/vivid-radio-rx.c |  2 ++
 drivers/media/platform/vivid/vivid-radio-tx.c |  1 +
 drivers/media/rc/lirc_dev.c                   |  2 +-
 drivers/media/usb/cpia2/cpia2_core.c          |  1 +
 drivers/media/usb/gspca/cpia1.c               |  2 ++
 drivers/misc/cxl/fault.c                      |  2 +-
 drivers/misc/cxl/file.c                       |  2 +-
 drivers/misc/ibmasm/r_heartbeat.c             |  2 +-
 drivers/misc/lis3lv02d/lis3lv02d.c            |  1 +
 drivers/misc/mei/bus.c                        |  2 +-
 drivers/misc/mei/client.c                     |  2 +-
 drivers/misc/mei/main.c                       |  2 +-
 drivers/misc/mic/scif/scif_main.h             |  2 +-
 drivers/misc/vexpress-syscfg.c                |  2 +-
 drivers/mtd/tests/mtd_test.h                  |  2 +-
 drivers/net/bonding/bond_options.c            |  2 ++
 drivers/net/bonding/bond_sysfs.c              |  2 +-
 drivers/net/can/softing/softing_fw.c          |  2 +-
 drivers/net/ethernet/broadcom/tg3.c           |  1 +
 .../ethernet/cavium/liquidio/octeon_main.h    |  2 ++
 drivers/net/ethernet/sfc/falcon/falcon.c      |  2 ++
 drivers/net/irda/stir4200.c                   |  1 +
 drivers/net/macvtap.c                         |  2 +-
 drivers/net/ppp/ppp_generic.c                 |  1 +
 drivers/net/tun.c                             |  1 +
 drivers/net/usb/hso.c                         |  2 +-
 drivers/net/usb/qmi_wwan.c                    |  1 +
 drivers/net/wan/cosa.c                        |  2 +-
 drivers/net/wireless/ath/ath6kl/cfg80211.c    |  1 +
 .../net/wireless/broadcom/b43legacy/main.c    |  2 +-
 .../net/wireless/intersil/hostap/hostap_hw.c  |  2 +-
 .../wireless/intersil/hostap/hostap_ioctl.c   |  2 +-
 drivers/oprofile/event_buffer.c               |  2 +-
 drivers/parport/daisy.c                       |  2 +-
 drivers/parport/ieee1284.c                    |  2 +-
 drivers/parport/ieee1284_ops.c                |  2 +-
 drivers/parport/parport_ip32.c                |  2 +-
 drivers/parport/parport_pc.c                  |  2 +-
 drivers/parport/share.c                       |  2 +-
 drivers/pci/access.c                          |  2 +-
 drivers/pci/hotplug/cpci_hotplug_core.c       |  1 +
 drivers/pci/hotplug/cpqphp.h                  |  2 +-
 drivers/pci/hotplug/pciehp.h                  |  2 +-
 drivers/pci/hotplug/shpchp.h                  |  2 +-
 drivers/rtc/rtc-dev.c                         |  2 +-
 drivers/s390/cio/device.c                     |  1 +
 drivers/scsi/lpfc/lpfc_vport.c                |  1 +
 drivers/scsi/osst.c                           |  2 +-
 drivers/scsi/st.c                             |  2 +-
 drivers/soc/fsl/qbman/dpaa_sys.h              |  1 +
 drivers/staging/comedi/comedi_fops.c          |  2 +-
 drivers/staging/dgnc/dgnc_tty.c               |  2 +-
 drivers/staging/dgnc/dgnc_utils.c             |  2 +-
 drivers/staging/greybus/uart.c                |  2 +-
 .../lustre/include/lustre/lustre_user.h       |  1 +
 .../lustre/lustre/include/lustre_lib.h        |  2 +-
 .../lustre/lustre/include/obd_support.h       |  2 ++
 drivers/staging/media/lirc/lirc_sir.c         |  2 +-
 drivers/staging/media/lirc/lirc_zilog.c       |  2 +-
 drivers/staging/speakup/speakup_soft.c        |  2 +-
 drivers/target/iscsi/cxgbit/cxgbit_target.c   |  2 ++
 drivers/tty/n_gsm.c                           |  2 +-
 drivers/tty/serial/crisv10.c                  |  2 +-
 drivers/tty/serial/serial_core.c              |  1 +
 drivers/tty/tty_ioctl.c                       |  2 +-
 drivers/tty/tty_port.c                        |  2 +-
 drivers/uio/uio.c                             |  2 +-
 drivers/usb/class/cdc-acm.c                   |  1 +
 drivers/usb/class/usblp.c                     |  2 +-
 drivers/usb/gadget/function/f_fs.c            |  1 +
 drivers/usb/image/mdc800.c                    |  2 +-
 drivers/usb/misc/adutux.c                     |  1 +
 drivers/usb/misc/idmouse.c                    |  1 +
 drivers/usb/misc/rio500.c                     |  2 +-
 drivers/usb/misc/uss720.c                     |  1 +
 drivers/usb/mon/mon_bin.c                     |  1 +
 drivers/usb/mon/mon_text.c                    |  1 +
 drivers/usb/serial/digi_acceleport.c          |  1 +
 drivers/usb/serial/generic.c                  |  1 +
 drivers/vhost/net.c                           |  1 +
 drivers/vhost/vhost.c                         |  1 +
 drivers/video/fbdev/cobalt_lcdfb.c            |  1 +
 .../omap2/omapfb/displays/panel-dsi-cm.c      |  2 +-
 fs/afs/rxrpc.c                                |  2 ++
 fs/aio.c                                      |  2 +-
 fs/btrfs/ctree.h                              |  1 +
 fs/ceph/caps.c                                |  2 +-
 fs/cifs/inode.c                               |  2 ++
 fs/coda/psdev.c                               |  2 +-
 fs/dlm/user.c                                 |  1 +
 fs/ecryptfs/read_write.c                      |  2 ++
 fs/eventfd.c                                  |  2 +-
 fs/eventpoll.c                                |  2 +-
 fs/ext4/ext4.h                                |  1 +
 fs/f2fs/data.c                                |  1 +
 fs/fuse/dev.c                                 |  1 +
 fs/gfs2/lock_dlm.c                            |  1 +
 fs/gfs2/super.c                               |  2 +-
 fs/hpfs/hpfs_fn.h                             |  2 +-
 fs/hugetlbfs/inode.c                          |  2 +-
 fs/jffs2/nodemgmt.c                           |  2 +-
 fs/nfs/inode.c                                |  2 +-
 fs/nilfs2/segment.c                           |  2 ++
 fs/notify/fanotify/fanotify_user.c            |  1 +
 fs/notify/inotify/inotify_user.c              |  2 +-
 fs/ntfs/file.c                                |  2 +-
 fs/ocfs2/alloc.c                              |  1 +
 fs/ocfs2/dlm/dlmdomain.c                      |  1 +
 fs/ocfs2/dlmfs/userdlm.c                      |  1 +
 fs/ocfs2/dlmglue.c                            |  1 +
 fs/orangefs/orangefs-kernel.h                 |  2 +-
 fs/overlayfs/copy_up.c                        |  2 +-
 fs/splice.c                                   |  2 ++
 fs/userfaultfd.c                              |  2 +-
 fs/xfs/xfs_linux.h                            |  2 +-
 include/drm/drm_os_linux.h                    |  1 +
 include/linux/sunrpc/types.h                  |  1 +
 include/media/v4l2-ioctl.h                    |  1 +
 include/net/busy_poll.h                       |  1 +
 kernel/locking/mutex.c                        |  2 +-
 kernel/locking/rtmutex.c                      |  2 +-
 kernel/locking/rwsem-spinlock.c               |  2 +-
 kernel/locking/rwsem-xadd.c                   |  2 +-
 kernel/rcu/rcutorture.c                       |  2 +-
 kernel/sched/completion.c                     |  2 +-
 kernel/sched/swait.c                          |  2 +-
 kernel/sched/wait.c                           |  2 +-
 kernel/time/alarmtimer.c                      |  1 +
 kernel/time/hrtimer.c                         |  2 +-
 kernel/time/timer.c                           |  2 +-
 lib/percpu_ida.c                              |  2 +-
 mm/compaction.c                               |  1 +
 mm/gup.c                                      |  2 +-
 mm/hugetlb.c                                  |  1 +
 mm/memory_hotplug.c                           |  1 +
 mm/shmem.c                                    |  1 +
 mm/userfaultfd.c                              |  1 +
 net/atm/svc.c                                 |  2 +-
 net/bluetooth/af_bluetooth.c                  |  2 ++
 net/bluetooth/cmtp/capi.c                     |  2 +-
 net/bluetooth/hci_request.c                   |  2 ++
 net/bluetooth/l2cap_sock.c                    |  1 +
 net/bluetooth/rfcomm/sock.c                   |  1 +
 net/bluetooth/sco.c                           |  1 +
 net/bridge/br_sysfs_br.c                      |  1 +
 net/bridge/br_sysfs_if.c                      |  1 +
 net/core/ethtool.c                            |  2 +-
 net/core/net-sysfs.c                          |  1 +
 net/dccp/output.c                             |  1 +
 net/ipv4/devinet.c                            |  1 +
 net/ipv6/addrconf.c                           |  1 +
 net/irda/ircomm/ircomm_tty.c                  |  2 +-
 net/irda/irnet/irnet_ppp.c                    |  3 +-
 net/iucv/af_iucv.c                            |  2 +-
 net/kcm/kcmsock.c                             |  2 ++
 net/llc/af_llc.c                              |  2 ++
 net/nfc/llcp_sock.c                           |  1 +
 net/phonet/pep.c                              |  1 +
 net/phonet/socket.c                           |  2 ++
 net/rxrpc/conn_client.c                       |  2 ++
 net/rxrpc/recvmsg.c                           |  2 ++
 net/rxrpc/sendmsg.c                           |  2 ++
 net/tipc/socket.c                             |  2 ++
 net/vmw_vsock/af_vsock.c                      |  1 +
 net/vmw_vsock/virtio_transport_common.c       |  1 +
 sound/core/control.c                          |  1 +
 sound/core/hwdep.c                            |  1 +
 sound/core/oss/pcm_oss.c                      |  1 +
 sound/core/pcm_lib.c                          |  1 +
 sound/core/pcm_native.c                       |  1 +
 sound/core/rawmidi.c                          |  2 +-
 sound/core/seq/oss/seq_oss_device.h           |  2 +-
 sound/core/seq/oss/seq_oss_writeq.c           |  1 +
 sound/core/seq/seq_fifo.c                     |  2 ++
 sound/core/seq/seq_memory.c                   |  1 +
 sound/core/timer.c                            |  1 +
 sound/firewire/bebob/bebob.h                  |  1 +
 sound/firewire/dice/dice.h                    |  1 +
 sound/firewire/digi00x/digi00x.h              |  1 +
 sound/firewire/fireworks/fireworks.h          |  1 +
 sound/firewire/oxfw/oxfw.h                    |  1 +
 sound/firewire/tascam/tascam.h                |  1 +
 sound/isa/gus/gus_pcm.c                       |  2 ++
 sound/isa/msnd/msnd.c                         |  1 +
 sound/isa/sb/emu8000.c                        |  2 +-
 sound/isa/sb/emu8000_patch.c                  |  2 ++
 sound/isa/sb/emu8000_pcm.c                    |  2 ++
 sound/isa/wavefront/wavefront_synth.c         |  1 +
 sound/oss/dmabuf.c                            |  2 ++
 sound/oss/dmasound/dmasound_core.c            |  1 +
 sound/oss/midibuf.c                           |  2 ++
 sound/oss/msnd_pinnacle.c                     |  2 ++
 sound/oss/sound_config.h                      |  1 +
 sound/oss/swarm_cs4297a.c                     |  2 +-
 virt/kvm/kvm_main.c                           |  2 +-
 265 files changed, 319 insertions(+), 139 deletions(-)

diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c
index 083c9e517d223e..57f3b751263616 100644
--- a/arch/arm/common/bL_switcher.c
+++ b/arch/arm/common/bL_switcher.c
@@ -13,7 +13,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <uapi/linux/sched/types.h>
 #include <linux/interrupt.h>
 #include <linux/cpu_pm.h>
diff --git a/arch/cris/arch-v10/drivers/sync_serial.c b/arch/cris/arch-v10/drivers/sync_serial.c
index 9ac75d68f1847f..cc62572c1b9460 100644
--- a/arch/cris/arch-v10/drivers/sync_serial.c
+++ b/arch/cris/arch-v10/drivers/sync_serial.c
@@ -16,7 +16,7 @@
 #include <linux/types.h>
 #include <linux/errno.h>
 #include <linux/major.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/interrupt.h>
 #include <linux/poll.h>
 #include <linux/init.h>
diff --git a/arch/cris/arch-v32/drivers/sync_serial.c b/arch/cris/arch-v32/drivers/sync_serial.c
index ef515af1a37792..8efcc1a899a890 100644
--- a/arch/cris/arch-v32/drivers/sync_serial.c
+++ b/arch/cris/arch-v32/drivers/sync_serial.c
@@ -11,7 +11,7 @@
 #include <linux/types.h>
 #include <linux/errno.h>
 #include <linux/major.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/mutex.h>
 #include <linux/interrupt.h>
 #include <linux/poll.h>
diff --git a/arch/mips/kernel/rtlx.c b/arch/mips/kernel/rtlx.c
index c5c4fd54d79722..b80dd8b17a764f 100644
--- a/arch/mips/kernel/rtlx.c
+++ b/arch/mips/kernel/rtlx.c
@@ -12,6 +12,8 @@
 #include <linux/syscalls.h>
 #include <linux/moduleloader.h>
 #include <linux/atomic.h>
+#include <linux/sched/signal.h>
+
 #include <asm/mipsmtregs.h>
 #include <asm/mips_mt.h>
 #include <asm/processor.h>
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index ed81e5ac14267f..15a1b1716c2eee 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -16,8 +16,10 @@
 #include <linux/module.h>
 #include <linux/uaccess.h>
 #include <linux/vmalloc.h>
+#include <linux/sched/signal.h>
 #include <linux/fs.h>
 #include <linux/bootmem.h>
+
 #include <asm/fpu.h>
 #include <asm/page.h>
 #include <asm/cacheflush.h>
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 1e107ece4e3701..fc56a9ce785d6e 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -22,7 +22,7 @@
 #include <linux/err.h>
 #include <linux/slab.h>
 #include <linux/preempt.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/delay.h>
 #include <linux/export.h>
 #include <linux/fs.h>
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 2b38d824e9e5fa..95c91a9de351c4 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -23,6 +23,7 @@
 #include <linux/kvm_host.h>
 #include <linux/vmalloc.h>
 #include <linux/hrtimer.h>
+#include <linux/sched/signal.h>
 #include <linux/fs.h>
 #include <linux/slab.h>
 #include <linux/file.h>
diff --git a/arch/powerpc/platforms/83xx/suspend.c b/arch/powerpc/platforms/83xx/suspend.c
index 08f92f6ed228f3..978b85bb3233e5 100644
--- a/arch/powerpc/platforms/83xx/suspend.c
+++ b/arch/powerpc/platforms/83xx/suspend.c
@@ -15,6 +15,7 @@
 #include <linux/ioport.h>
 #include <linux/interrupt.h>
 #include <linux/wait.h>
+#include <linux/sched/signal.h>
 #include <linux/kthread.h>
 #include <linux/freezer.h>
 #include <linux/suspend.h>
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index d317c84dc794df..1fbb5da17dd27f 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -23,7 +23,7 @@
 #undef DEBUG
 
 #include <linux/errno.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/sched/loadavg.h>
 #include <linux/sched/rt.h>
 #include <linux/kernel.h>
diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h
index aac7339660923d..5e59f80e95dbee 100644
--- a/arch/powerpc/platforms/cell/spufs/spufs.h
+++ b/arch/powerpc/platforms/cell/spufs/spufs.h
@@ -27,6 +27,7 @@
 #include <linux/spinlock.h>
 #include <linux/fs.h>
 #include <linux/cpumask.h>
+#include <linux/sched/signal.h>
 
 #include <asm/spu.h>
 #include <asm/spu_csa.h>
diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c
index 85b7f5efe06a94..5a3ec04a7082c1 100644
--- a/arch/s390/crypto/prng.c
+++ b/arch/s390/crypto/prng.c
@@ -20,6 +20,8 @@
 #include <linux/cpufeature.h>
 #include <linux/random.h>
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
+
 #include <asm/debug.h>
 #include <linux/uaccess.h>
 #include <asm/timex.h>
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index f5694838234d5a..fd6cd05bb6a7c7 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -29,6 +29,8 @@
 #include <linux/timer.h>
 #include <linux/vmalloc.h>
 #include <linux/bitmap.h>
+#include <linux/sched/signal.h>
+
 #include <asm/asm-offsets.h>
 #include <asm/lowcore.h>
 #include <asm/stp.h>
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index 38556e3959156d..5491be39776b66 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -14,6 +14,8 @@
 #include <linux/bug.h>
 #include <linux/list.h>
 #include <linux/bitmap.h>
+#include <linux/sched/signal.h>
+
 #include <asm/gmap.h>
 #include <asm/mmu_context.h>
 #include <asm/sclp.h>
diff --git a/arch/sh/kernel/cpu/fpu.c b/arch/sh/kernel/cpu/fpu.c
index 4d4737d099c654..37e35330aae661 100644
--- a/arch/sh/kernel/cpu/fpu.c
+++ b/arch/sh/kernel/cpu/fpu.c
@@ -1,4 +1,4 @@
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <asm/processor.h>
 #include <asm/fpu.h>
diff --git a/arch/sh/kernel/disassemble.c b/arch/sh/kernel/disassemble.c
index 64d5d8dded7c15..015fee58014b14 100644
--- a/arch/sh/kernel/disassemble.c
+++ b/arch/sh/kernel/disassemble.c
@@ -12,6 +12,8 @@
 #include <linux/string.h>
 #include <linux/uaccess.h>
 
+#include <asm/ptrace.h>
+
 /*
  * Format of an instruction in memory.
  */
diff --git a/arch/sh/kernel/process.c b/arch/sh/kernel/process.c
index 30bff33e810ea7..80a61c5e301589 100644
--- a/arch/sh/kernel/process.c
+++ b/arch/sh/kernel/process.c
@@ -1,7 +1,7 @@
 #include <linux/mm.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/export.h>
 #include <linux/stackprotector.h>
 #include <asm/fpu.h>
diff --git a/arch/um/drivers/random.c b/arch/um/drivers/random.c
index 57f03050c8505b..37c51a6be690c7 100644
--- a/arch/um/drivers/random.c
+++ b/arch/um/drivers/random.c
@@ -6,7 +6,7 @@
  * This software may be used and distributed according to the terms
  * of the GNU General Public License, incorporated herein by reference.
  */
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/module.h>
 #include <linux/fs.h>
 #include <linux/interrupt.h>
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 4a7080c84a5a54..dc04b30cbd6052 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -218,7 +218,7 @@
 #include <linux/apm_bios.h>
 #include <linux/init.h>
 #include <linux/time.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/pm.h>
 #include <linux/capability.h>
 #include <linux/device.h>
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 295e98c2c8ccdf..bbe7ee00bd3d70 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -17,6 +17,7 @@
 #include <linux/ioprio.h>
 #include <linux/kdev_t.h>
 #include <linux/module.h>
+#include <linux/sched/signal.h>
 #include <linux/err.h>
 #include <linux/blkdev.h>
 #include <linux/backing-dev.h>
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 746c14e0d157c6..6f35b6fd47990b 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -21,6 +21,7 @@
 #include <linux/cache.h>
 #include <linux/sched/sysctl.h>
 #include <linux/sched/topology.h>
+#include <linux/sched/signal.h>
 #include <linux/delay.h>
 #include <linux/crash_dump.h>
 #include <linux/prefetch.h>
diff --git a/crypto/algboss.c b/crypto/algboss.c
index ccb85e1798f230..960d8548171be5 100644
--- a/crypto/algboss.c
+++ b/crypto/algboss.c
@@ -19,7 +19,7 @@
 #include <linux/module.h>
 #include <linux/notifier.h>
 #include <linux/rtnetlink.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <linux/string.h>
 
diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c
index 533265f110e029..5a805375865731 100644
--- a/crypto/algif_aead.c
+++ b/crypto/algif_aead.c
@@ -19,6 +19,7 @@
 #include <linux/init.h>
 #include <linux/list.h>
 #include <linux/kernel.h>
+#include <linux/sched/signal.h>
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/net.h>
diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c
index a9e79d8eff8774..43839b00fe6c42 100644
--- a/crypto/algif_skcipher.c
+++ b/crypto/algif_skcipher.c
@@ -18,6 +18,7 @@
 #include <linux/init.h>
 #include <linux/list.h>
 #include <linux/kernel.h>
+#include <linux/sched/signal.h>
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/net.h>
diff --git a/crypto/api.c b/crypto/api.c
index b16ce165328457..941cd4c6c7ecbb 100644
--- a/crypto/api.c
+++ b/crypto/api.c
@@ -21,7 +21,7 @@
 #include <linux/kmod.h>
 #include <linux/module.h>
 #include <linux/param.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <linux/string.h>
 #include "internal.h"
diff --git a/drivers/atm/horizon.c b/drivers/atm/horizon.c
index 2bf1ef1c3c786e..0f18480b33b55f 100644
--- a/drivers/atm/horizon.c
+++ b/drivers/atm/horizon.c
@@ -27,6 +27,7 @@
 
 #include <linux/module.h>
 #include <linux/kernel.h>
+#include <linux/sched/signal.h>
 #include <linux/mm.h>
 #include <linux/pci.h>
 #include <linux/errno.h>
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 3050e6f994031f..684bda4d14a187 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -26,6 +26,7 @@
 #include <linux/mutex.h>
 #include <linux/pm_runtime.h>
 #include <linux/netdevice.h>
+#include <linux/sched/signal.h>
 #include <linux/sysfs.h>
 
 #include "base.h"
diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c
index f546f8f107b06a..1368549704893c 100644
--- a/drivers/base/power/wakeup.c
+++ b/drivers/base/power/wakeup.c
@@ -8,7 +8,7 @@
 
 #include <linux/device.h>
 #include <linux/slab.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/capability.h>
 #include <linux/export.h>
 #include <linux/suspend.h>
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 116509852a34da..c7d530a95e5306 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -52,6 +52,7 @@
 #define __KERNEL_SYSCALLS__
 #include <linux/unistd.h>
 #include <linux/vmalloc.h>
+#include <linux/sched/signal.h>
 
 #include <linux/drbd_limits.h>
 #include "drbd_int.h"
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 8b40a5b2f8e6b2..aa6bf9692effec 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -37,6 +37,7 @@
 #include <linux/mm_inline.h>
 #include <linux/slab.h>
 #include <uapi/linux/sched/types.h>
+#include <linux/sched/signal.h>
 #include <linux/pkt_sched.h>
 #define __KERNEL_SYSCALLS__
 #include <linux/unistd.h>
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index c6755c9a0aeab4..3bff33f21435ce 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -25,7 +25,7 @@
 
 #include <linux/module.h>
 #include <linux/drbd.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/wait.h>
 #include <linux/mm.h>
 #include <linux/memcontrol.h>
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index aabd8e9d3035c8..61b3ffa4f45897 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -20,7 +20,7 @@
 
 #include <linux/stddef.h>
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/timer.h>
 #include <linux/delay.h>
 #include <linux/fd.h>
diff --git a/drivers/char/applicom.c b/drivers/char/applicom.c
index e5c62dcf2c11cb..e770ad97747235 100644
--- a/drivers/char/applicom.c
+++ b/drivers/char/applicom.c
@@ -23,7 +23,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/interrupt.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <linux/errno.h>
 #include <linux/mutex.h>
diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c
index 20b32bb8c2aff5..8bdc38d81adf91 100644
--- a/drivers/char/hpet.c
+++ b/drivers/char/hpet.c
@@ -25,6 +25,7 @@
 #include <linux/spinlock.h>
 #include <linux/sysctl.h>
 #include <linux/wait.h>
+#include <linux/sched/signal.h>
 #include <linux/bcd.h>
 #include <linux/seq_file.h>
 #include <linux/bitops.h>
diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index 5c654b5d4adf0c..503a41dfa1936b 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -17,6 +17,7 @@
 #include <linux/hw_random.h>
 #include <linux/kernel.h>
 #include <linux/kthread.h>
+#include <linux/sched/signal.h>
 #include <linux/miscdevice.h>
 #include <linux/module.h>
 #include <linux/random.h>
diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c
index 30b9e83bf1bfc6..5ca24d9b101b92 100644
--- a/drivers/char/ipmi/ipmi_watchdog.c
+++ b/drivers/char/ipmi/ipmi_watchdog.c
@@ -53,6 +53,7 @@
 #include <linux/ctype.h>
 #include <linux/delay.h>
 #include <linux/atomic.h>
+#include <linux/sched/signal.h>
 
 #ifdef CONFIG_X86
 /*
diff --git a/drivers/char/lp.c b/drivers/char/lp.c
index 5b674277065674..565e4cf04a0215 100644
--- a/drivers/char/lp.c
+++ b/drivers/char/lp.c
@@ -117,7 +117,7 @@
 #include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/major.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <linux/fcntl.h>
 #include <linux/delay.h>
diff --git a/drivers/char/ppdev.c b/drivers/char/ppdev.c
index 87885d146dbb02..2a558c706581b2 100644
--- a/drivers/char/ppdev.c
+++ b/drivers/char/ppdev.c
@@ -58,7 +58,7 @@
 
 #include <linux/module.h>
 #include <linux/init.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/device.h>
 #include <linux/ioctl.h>
 #include <linux/parport.h>
diff --git a/drivers/char/rtc.c b/drivers/char/rtc.c
index 35259961cc38f7..974d48927b0776 100644
--- a/drivers/char/rtc.c
+++ b/drivers/char/rtc.c
@@ -74,7 +74,7 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/spinlock.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/sysctl.h>
 #include <linux/wait.h>
 #include <linux/bcd.h>
diff --git a/drivers/char/snsc.c b/drivers/char/snsc.c
index ec07f0e99732ec..6aa32679fd58ea 100644
--- a/drivers/char/snsc.c
+++ b/drivers/char/snsc.c
@@ -16,7 +16,7 @@
  */
 
 #include <linux/interrupt.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/device.h>
 #include <linux/poll.h>
 #include <linux/init.h>
diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
index d1f1f456f5c48f..d195d617076d6c 100644
--- a/drivers/dma-buf/dma-fence.c
+++ b/drivers/dma-buf/dma-fence.c
@@ -22,6 +22,7 @@
 #include <linux/export.h>
 #include <linux/atomic.h>
 #include <linux/dma-fence.h>
+#include <linux/sched/signal.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/dma_fence.h>
diff --git a/drivers/gpu/drm/omapdrm/displays/panel-dsi-cm.c b/drivers/gpu/drm/omapdrm/displays/panel-dsi-cm.c
index a2bb855a2851f7..ac5800c72cb48e 100644
--- a/drivers/gpu/drm/omapdrm/displays/panel-dsi-cm.c
+++ b/drivers/gpu/drm/omapdrm/displays/panel-dsi-cm.c
@@ -18,7 +18,7 @@
 #include <linux/jiffies.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <linux/workqueue.h>
 #include <linux/of_device.h>
diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c
index ab3016982466c3..1eef98c3331dfc 100644
--- a/drivers/gpu/drm/vc4/vc4_gem.c
+++ b/drivers/gpu/drm/vc4/vc4_gem.c
@@ -26,6 +26,7 @@
 #include <linux/pm_runtime.h>
 #include <linux/device.h>
 #include <linux/io.h>
+#include <linux/sched/signal.h>
 
 #include "uapi/drm/vc4_drm.h"
 #include "vc4_drv.h"
diff --git a/drivers/gpu/vga/vgaarb.c b/drivers/gpu/vga/vgaarb.c
index 0f5b2dd2450758..92f1452dad57f6 100644
--- a/drivers/gpu/vga/vgaarb.c
+++ b/drivers/gpu/vga/vgaarb.c
@@ -41,7 +41,7 @@
 #include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/list.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/wait.h>
 #include <linux/spinlock.h>
 #include <linux/poll.h>
diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c
index acfb522a432ae5..c6c9c51c806f0d 100644
--- a/drivers/hid/hid-debug.c
+++ b/drivers/hid/hid-debug.c
@@ -30,7 +30,7 @@
 
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/export.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
diff --git a/drivers/hid/hid-roccat.c b/drivers/hid/hid-roccat.c
index 76d06cf87b2ac3..fb77dec720a465 100644
--- a/drivers/hid/hid-roccat.c
+++ b/drivers/hid/hid-roccat.c
@@ -25,7 +25,7 @@
 
 #include <linux/cdev.h>
 #include <linux/poll.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/hid-roccat.h>
 #include <linux/module.h>
 
diff --git a/drivers/hid/hidraw.c b/drivers/hid/hidraw.c
index f0e2757cb9094d..ec530454e6f687 100644
--- a/drivers/hid/hidraw.c
+++ b/drivers/hid/hidraw.c
@@ -33,7 +33,7 @@
 #include <linux/slab.h>
 #include <linux/hid.h>
 #include <linux/mutex.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/string.h>
 
 #include <linux/hidraw.h>
diff --git a/drivers/hid/usbhid/hiddev.c b/drivers/hid/usbhid/hiddev.c
index 700145b1508894..774bd701dae0b8 100644
--- a/drivers/hid/usbhid/hiddev.c
+++ b/drivers/hid/usbhid/hiddev.c
@@ -27,6 +27,7 @@
 
 #include <linux/poll.h>
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/input.h>
diff --git a/drivers/hsi/clients/cmt_speech.c b/drivers/hsi/clients/cmt_speech.c
index 7175e6bedf2185..727f968ac1cbb9 100644
--- a/drivers/hsi/clients/cmt_speech.c
+++ b/drivers/hsi/clients/cmt_speech.c
@@ -31,7 +31,7 @@
 #include <linux/slab.h>
 #include <linux/fs.h>
 #include <linux/poll.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/ioctl.h>
 #include <linux/uaccess.h>
 #include <linux/pm_qos.h>
diff --git a/drivers/i2c/busses/i2c-ibm_iic.c b/drivers/i2c/busses/i2c-ibm_iic.c
index 412b91d255ad1d..961c5f42d956f1 100644
--- a/drivers/i2c/busses/i2c-ibm_iic.c
+++ b/drivers/i2c/busses/i2c-ibm_iic.c
@@ -37,6 +37,8 @@
 #include <linux/delay.h>
 #include <linux/slab.h>
 #include <linux/interrupt.h>
+#include <linux/sched/signal.h>
+
 #include <asm/irq.h>
 #include <linux/io.h>
 #include <linux/i2c.h>
diff --git a/drivers/i2c/busses/i2c-mpc.c b/drivers/i2c/busses/i2c-mpc.c
index 565a49a0c56410..96caf378b1dc00 100644
--- a/drivers/i2c/busses/i2c-mpc.c
+++ b/drivers/i2c/busses/i2c-mpc.c
@@ -15,7 +15,7 @@
 
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
 #include <linux/of_platform.h>
diff --git a/drivers/iio/industrialio-buffer.c b/drivers/iio/industrialio-buffer.c
index 4972986f645583..d2b465140a6bdc 100644
--- a/drivers/iio/industrialio-buffer.c
+++ b/drivers/iio/industrialio-buffer.c
@@ -20,7 +20,7 @@
 #include <linux/cdev.h>
 #include <linux/slab.h>
 #include <linux/poll.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 
 #include <linux/iio/iio.h>
 #include "iio_core.h"
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index a6d6c617b59736..0cdf2b7f272f3a 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -38,6 +38,7 @@
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/moduleparam.h>
+#include <linux/sched/signal.h>
 
 #include "ipoib.h"
 
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
index deedb6fc1b05c5..3e10e3dac2e7f5 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
@@ -31,6 +31,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/sched/signal.h>
 
 #include <linux/init.h>
 #include <linux/seq_file.h>
diff --git a/drivers/isdn/capi/kcapi.c b/drivers/isdn/capi/kcapi.c
index 49d0f70c2baee3..1dfd1085a04f87 100644
--- a/drivers/isdn/capi/kcapi.c
+++ b/drivers/isdn/capi/kcapi.c
@@ -18,7 +18,7 @@
 #include <linux/interrupt.h>
 #include <linux/ioport.h>
 #include <linux/proc_fs.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/seq_file.h>
 #include <linux/skbuff.h>
 #include <linux/workqueue.h>
diff --git a/drivers/isdn/mISDN/timerdev.c b/drivers/isdn/mISDN/timerdev.c
index 9438d7ec33080a..b1e135fc1fb504 100644
--- a/drivers/isdn/mISDN/timerdev.c
+++ b/drivers/isdn/mISDN/timerdev.c
@@ -25,6 +25,8 @@
 #include <linux/module.h>
 #include <linux/mISDNif.h>
 #include <linux/mutex.h>
+#include <linux/sched/signal.h>
+
 #include "core.h"
 
 static DEFINE_MUTEX(mISDN_mutex);
diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c
index ac219045daf7c0..395ed1961dbfb6 100644
--- a/drivers/lguest/core.c
+++ b/drivers/lguest/core.c
@@ -8,6 +8,7 @@
 #include <linux/stddef.h>
 #include <linux/io.h>
 #include <linux/mm.h>
+#include <linux/sched/signal.h>
 #include <linux/vmalloc.h>
 #include <linux/cpu.h>
 #include <linux/freezer.h>
diff --git a/drivers/macintosh/adb.c b/drivers/macintosh/adb.c
index 152414e6378a75..fee939efc4fc6b 100644
--- a/drivers/macintosh/adb.c
+++ b/drivers/macintosh/adb.c
@@ -23,7 +23,7 @@
 #include <linux/module.h>
 #include <linux/fs.h>
 #include <linux/mm.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/adb.h>
 #include <linux/cuda.h>
 #include <linux/pmu.h>
diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c
index 227869159ac081..1ac66421877a7a 100644
--- a/drivers/macintosh/smu.c
+++ b/drivers/macintosh/smu.c
@@ -39,6 +39,7 @@
 #include <linux/of_platform.h>
 #include <linux/slab.h>
 #include <linux/memblock.h>
+#include <linux/sched/signal.h>
 
 #include <asm/byteorder.h>
 #include <asm/io.h>
diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c
index 43b8db2b54451f..cce99f72e4ae6d 100644
--- a/drivers/macintosh/via-pmu.c
+++ b/drivers/macintosh/via-pmu.c
@@ -23,7 +23,7 @@
 #include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/delay.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/miscdevice.h>
 #include <linux/blkdev.h>
 #include <linux/pci.h>
diff --git a/drivers/mailbox/mailbox-test.c b/drivers/mailbox/mailbox-test.c
index 9c79f8019d2a5f..97fb956bb6e04a 100644
--- a/drivers/mailbox/mailbox-test.c
+++ b/drivers/mailbox/mailbox-test.c
@@ -21,6 +21,7 @@
 #include <linux/poll.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
+#include <linux/sched/signal.h>
 
 #define MBOX_MAX_SIG_LEN	8
 #define MBOX_MAX_MSG_LEN	128
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 9f37d7fc2786ca..f4ffd1eb8f44c3 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -12,6 +12,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
+#include <linux/sched/signal.h>
 #include <linux/blkpg.h>
 #include <linux/bio.h>
 #include <linux/mempool.h>
diff --git a/drivers/media/dvb-core/dvb_ca_en50221.c b/drivers/media/dvb-core/dvb_ca_en50221.c
index 000d737ad8271f..8d65028c7a74ec 100644
--- a/drivers/media/dvb-core/dvb_ca_en50221.c
+++ b/drivers/media/dvb-core/dvb_ca_en50221.c
@@ -34,7 +34,7 @@
 #include <linux/vmalloc.h>
 #include <linux/delay.h>
 #include <linux/spinlock.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/kthread.h>
 
 #include "dvb_ca_en50221.h"
diff --git a/drivers/media/dvb-core/dvb_demux.c b/drivers/media/dvb-core/dvb_demux.c
index 4eac71e50c5f95..6628f80d184fd4 100644
--- a/drivers/media/dvb-core/dvb_demux.c
+++ b/drivers/media/dvb-core/dvb_demux.c
@@ -19,7 +19,7 @@
 
 #define pr_fmt(fmt) "dvb_demux: " fmt
 
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/spinlock.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
diff --git a/drivers/media/dvb-core/dvb_frontend.c b/drivers/media/dvb-core/dvb_frontend.c
index 85ae3669aa668b..e3fff8f64d37d8 100644
--- a/drivers/media/dvb-core/dvb_frontend.c
+++ b/drivers/media/dvb-core/dvb_frontend.c
@@ -29,7 +29,7 @@
 
 #include <linux/string.h>
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/wait.h>
 #include <linux/slab.h>
 #include <linux/poll.h>
diff --git a/drivers/media/pci/cx18/cx18-driver.h b/drivers/media/pci/cx18/cx18-driver.h
index fef3c736fcba68..7be2088c45fe6b 100644
--- a/drivers/media/pci/cx18/cx18-driver.h
+++ b/drivers/media/pci/cx18/cx18-driver.h
@@ -24,7 +24,7 @@
 #include <linux/moduleparam.h>
 #include <linux/init.h>
 #include <linux/delay.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/fs.h>
 #include <linux/pci.h>
 #include <linux/interrupt.h>
diff --git a/drivers/media/pci/ivtv/ivtv-driver.h b/drivers/media/pci/ivtv/ivtv-driver.h
index cde452e3074679..d27c5c2c07ea1e 100644
--- a/drivers/media/pci/ivtv/ivtv-driver.h
+++ b/drivers/media/pci/ivtv/ivtv-driver.h
@@ -38,37 +38,38 @@
  *                using information provided by Jiun-Kuei Jung @ AVerMedia.
  */
 
-#include <asm/byteorder.h>
+#include <linux/module.h>
+#include <linux/init.h>
 #include <linux/delay.h>
-#include <linux/device.h>
+#include <linux/sched/signal.h>
 #include <linux/fs.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
 #include <linux/i2c.h>
 #include <linux/i2c-algo-bit.h>
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/ivtv.h>
-#include <linux/kernel.h>
-#include <linux/kthread.h>
 #include <linux/list.h>
-#include <linux/module.h>
-#include <linux/mutex.h>
+#include <linux/unistd.h>
 #include <linux/pagemap.h>
-#include <linux/pci.h>
 #include <linux/scatterlist.h>
-#include <linux/sched.h>
+#include <linux/kthread.h>
+#include <linux/mutex.h>
 #include <linux/slab.h>
-#include <linux/spinlock.h>
 #include <linux/uaccess.h>
-#include <linux/unistd.h>
+#include <asm/byteorder.h>
 
-#include <media/drv-intf/cx2341x.h>
-#include <media/i2c/ir-kbd-i2c.h>
-#include <media/tuner.h>
+#include <linux/dvb/video.h>
+#include <linux/dvb/audio.h>
 #include <media/v4l2-common.h>
+#include <media/v4l2-ioctl.h>
 #include <media/v4l2-ctrls.h>
 #include <media/v4l2-device.h>
 #include <media/v4l2-fh.h>
-#include <media/v4l2-ioctl.h>
+#include <media/tuner.h>
+#include <media/drv-intf/cx2341x.h>
+#include <media/i2c/ir-kbd-i2c.h>
+
+#include <linux/ivtv.h>
 
 /* Memory layout */
 #define IVTV_ENCODER_OFFSET	0x00000000
diff --git a/drivers/media/pci/pt1/pt1.c b/drivers/media/pci/pt1/pt1.c
index da1eebd2016f5c..3219d2f3271ed9 100644
--- a/drivers/media/pci/pt1/pt1.c
+++ b/drivers/media/pci/pt1/pt1.c
@@ -18,6 +18,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/sched/signal.h>
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
diff --git a/drivers/media/pci/pt3/pt3.c b/drivers/media/pci/pt3/pt3.c
index 77f4d15f322b2d..e8b5d099215774 100644
--- a/drivers/media/pci/pt3/pt3.c
+++ b/drivers/media/pci/pt3/pt3.c
@@ -21,6 +21,7 @@
 #include <linux/module.h>
 #include <linux/pci.h>
 #include <linux/string.h>
+#include <linux/sched/signal.h>
 
 #include "dmxdev.h"
 #include "dvbdev.h"
diff --git a/drivers/media/pci/solo6x10/solo6x10-i2c.c b/drivers/media/pci/solo6x10/solo6x10-i2c.c
index c908672b2c4015..e83bb79f93497e 100644
--- a/drivers/media/pci/solo6x10/solo6x10-i2c.c
+++ b/drivers/media/pci/solo6x10/solo6x10-i2c.c
@@ -27,6 +27,7 @@
  * thread context, ACK the interrupt, and move on. -- BenC */
 
 #include <linux/kernel.h>
+#include <linux/sched/signal.h>
 
 #include "solo6x10.h"
 
diff --git a/drivers/media/pci/zoran/zoran_device.c b/drivers/media/pci/zoran/zoran_device.c
index 671907a6e6b631..40adceebca7e4e 100644
--- a/drivers/media/pci/zoran/zoran_device.c
+++ b/drivers/media/pci/zoran/zoran_device.c
@@ -28,6 +28,7 @@
 #include <linux/module.h>
 #include <linux/vmalloc.h>
 #include <linux/ktime.h>
+#include <linux/sched/signal.h>
 
 #include <linux/interrupt.h>
 #include <linux/proc_fs.h>
diff --git a/drivers/media/platform/vivid/vivid-radio-rx.c b/drivers/media/platform/vivid/vivid-radio-rx.c
index f99092ca8f5c40..47c36c26096b20 100644
--- a/drivers/media/platform/vivid/vivid-radio-rx.c
+++ b/drivers/media/platform/vivid/vivid-radio-rx.c
@@ -22,6 +22,8 @@
 #include <linux/delay.h>
 #include <linux/videodev2.h>
 #include <linux/v4l2-dv-timings.h>
+#include <linux/sched/signal.h>
+
 #include <media/v4l2-common.h>
 #include <media/v4l2-event.h>
 #include <media/v4l2-dv-timings.h>
diff --git a/drivers/media/platform/vivid/vivid-radio-tx.c b/drivers/media/platform/vivid/vivid-radio-tx.c
index 8c59d4f53200a1..0e8025b7b4dde4 100644
--- a/drivers/media/platform/vivid/vivid-radio-tx.c
+++ b/drivers/media/platform/vivid/vivid-radio-tx.c
@@ -19,6 +19,7 @@
 
 #include <linux/errno.h>
 #include <linux/kernel.h>
+#include <linux/sched/signal.h>
 #include <linux/delay.h>
 #include <linux/videodev2.h>
 #include <linux/v4l2-dv-timings.h>
diff --git a/drivers/media/rc/lirc_dev.c b/drivers/media/rc/lirc_dev.c
index a54ca531d8ef85..393dccaabdd02a 100644
--- a/drivers/media/rc/lirc_dev.c
+++ b/drivers/media/rc/lirc_dev.c
@@ -19,7 +19,7 @@
 
 #include <linux/module.h>
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/errno.h>
 #include <linux/ioctl.h>
 #include <linux/fs.h>
diff --git a/drivers/media/usb/cpia2/cpia2_core.c b/drivers/media/usb/cpia2/cpia2_core.c
index 431dd0b4b33236..b1d13444ff301e 100644
--- a/drivers/media/usb/cpia2/cpia2_core.c
+++ b/drivers/media/usb/cpia2/cpia2_core.c
@@ -32,6 +32,7 @@
 #include <linux/mm.h>
 #include <linux/vmalloc.h>
 #include <linux/firmware.h>
+#include <linux/sched/signal.h>
 
 #define FIRMWARE "cpia2/stv0672_vp4.bin"
 MODULE_FIRMWARE(FIRMWARE);
diff --git a/drivers/media/usb/gspca/cpia1.c b/drivers/media/usb/gspca/cpia1.c
index 23d3285f182a59..e91d00762e94bf 100644
--- a/drivers/media/usb/gspca/cpia1.c
+++ b/drivers/media/usb/gspca/cpia1.c
@@ -27,6 +27,8 @@
 #define MODULE_NAME "cpia1"
 
 #include <linux/input.h>
+#include <linux/sched/signal.h>
+
 #include "gspca.h"
 
 MODULE_AUTHOR("Hans de Goede <hdegoede@redhat.com>");
diff --git a/drivers/misc/cxl/fault.c b/drivers/misc/cxl/fault.c
index e33011e3e7e26a..2fa015c0556186 100644
--- a/drivers/misc/cxl/fault.c
+++ b/drivers/misc/cxl/fault.c
@@ -8,7 +8,7 @@
  */
 
 #include <linux/workqueue.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/sched/mm.h>
 #include <linux/pid.h>
 #include <linux/mm.h>
diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c
index 859959f19f1072..e7139c76f96122 100644
--- a/drivers/misc/cxl/file.c
+++ b/drivers/misc/cxl/file.c
@@ -12,7 +12,7 @@
 #include <linux/export.h>
 #include <linux/kernel.h>
 #include <linux/bitmap.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/poll.h>
 #include <linux/pid.h>
 #include <linux/fs.h>
diff --git a/drivers/misc/ibmasm/r_heartbeat.c b/drivers/misc/ibmasm/r_heartbeat.c
index 232034f5da486f..5c7dd26db716f4 100644
--- a/drivers/misc/ibmasm/r_heartbeat.c
+++ b/drivers/misc/ibmasm/r_heartbeat.c
@@ -20,7 +20,7 @@
  *
  */
 
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include "ibmasm.h"
 #include "dot_command.h"
 
diff --git a/drivers/misc/lis3lv02d/lis3lv02d.c b/drivers/misc/lis3lv02d/lis3lv02d.c
index fb8705fc3aca7c..e389b0b5278d56 100644
--- a/drivers/misc/lis3lv02d/lis3lv02d.c
+++ b/drivers/misc/lis3lv02d/lis3lv02d.c
@@ -23,6 +23,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/kernel.h>
+#include <linux/sched/signal.h>
 #include <linux/dmi.h>
 #include <linux/module.h>
 #include <linux/types.h>
diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c
index cb3e9e0ca0497d..df5f78ae3d2534 100644
--- a/drivers/misc/mei/bus.c
+++ b/drivers/misc/mei/bus.c
@@ -16,7 +16,7 @@
 #include <linux/module.h>
 #include <linux/device.h>
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/init.h>
 #include <linux/errno.h>
 #include <linux/slab.h>
diff --git a/drivers/misc/mei/client.c b/drivers/misc/mei/client.c
index 68fe37b5bc52fb..d3e3372424d616 100644
--- a/drivers/misc/mei/client.c
+++ b/drivers/misc/mei/client.c
@@ -14,7 +14,7 @@
  *
  */
 
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/wait.h>
 #include <linux/delay.h>
 #include <linux/slab.h>
diff --git a/drivers/misc/mei/main.c b/drivers/misc/mei/main.c
index 9d0b7050c79a36..bf816449cd405e 100644
--- a/drivers/misc/mei/main.c
+++ b/drivers/misc/mei/main.c
@@ -26,7 +26,7 @@
 #include <linux/init.h>
 #include <linux/ioctl.h>
 #include <linux/cdev.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/uuid.h>
 #include <linux/compat.h>
 #include <linux/jiffies.h>
diff --git a/drivers/misc/mic/scif/scif_main.h b/drivers/misc/mic/scif/scif_main.h
index a08f0b600a9e22..0e5eff9ad08063 100644
--- a/drivers/misc/mic/scif/scif_main.h
+++ b/drivers/misc/mic/scif/scif_main.h
@@ -18,7 +18,7 @@
 #ifndef SCIF_MAIN_H
 #define SCIF_MAIN_H
 
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/pci.h>
 #include <linux/miscdevice.h>
 #include <linux/dmaengine.h>
diff --git a/drivers/misc/vexpress-syscfg.c b/drivers/misc/vexpress-syscfg.c
index c344483fa7d65a..2cde80c7bb934e 100644
--- a/drivers/misc/vexpress-syscfg.c
+++ b/drivers/misc/vexpress-syscfg.c
@@ -16,7 +16,7 @@
 #include <linux/io.h>
 #include <linux/of.h>
 #include <linux/platform_device.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <linux/syscore_ops.h>
 #include <linux/vexpress.h>
diff --git a/drivers/mtd/tests/mtd_test.h b/drivers/mtd/tests/mtd_test.h
index 4b7bee17c924bb..04afd0e7074f12 100644
--- a/drivers/mtd/tests/mtd_test.h
+++ b/drivers/mtd/tests/mtd_test.h
@@ -1,5 +1,5 @@
 #include <linux/mtd/mtd.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 
 static inline int mtdtest_relax(void)
 {
diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c
index 577e57cad1dc44..1bcbb8913e1715 100644
--- a/drivers/net/bonding/bond_options.c
+++ b/drivers/net/bonding/bond_options.c
@@ -16,6 +16,8 @@
 #include <linux/rcupdate.h>
 #include <linux/ctype.h>
 #include <linux/inet.h>
+#include <linux/sched/signal.h>
+
 #include <net/bonding.h>
 
 static int bond_option_active_slave_set(struct bonding *bond,
diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index e23c3ed737deef..770623a0cc01c3 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -24,7 +24,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/device.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/fs.h>
 #include <linux/types.h>
 #include <linux/string.h>
diff --git a/drivers/net/can/softing/softing_fw.c b/drivers/net/can/softing/softing_fw.c
index 4063215c9b54b0..aac58ce6e371a8 100644
--- a/drivers/net/can/softing/softing_fw.c
+++ b/drivers/net/can/softing/softing_fw.c
@@ -17,7 +17,7 @@
  */
 
 #include <linux/firmware.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <asm/div64.h>
 #include <asm/io.h>
 
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index a448177990fe42..30d1eb9ebec9af 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -20,6 +20,7 @@
 #include <linux/moduleparam.h>
 #include <linux/stringify.h>
 #include <linux/kernel.h>
+#include <linux/sched/signal.h>
 #include <linux/types.h>
 #include <linux/compiler.h>
 #include <linux/slab.h>
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_main.h b/drivers/net/ethernet/cavium/liquidio/octeon_main.h
index 8cd38914816665..aa36e9ae767655 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_main.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_main.h
@@ -23,6 +23,8 @@
 #ifndef _OCTEON_MAIN_H_
 #define  _OCTEON_MAIN_H_
 
+#include <linux/sched/signal.h>
+
 #if BITS_PER_LONG == 32
 #define CVM_CAST64(v) ((long long)(v))
 #elif BITS_PER_LONG == 64
diff --git a/drivers/net/ethernet/sfc/falcon/falcon.c b/drivers/net/ethernet/sfc/falcon/falcon.c
index c6ff0cc5ef1839..93c713c1f627a7 100644
--- a/drivers/net/ethernet/sfc/falcon/falcon.c
+++ b/drivers/net/ethernet/sfc/falcon/falcon.c
@@ -16,6 +16,8 @@
 #include <linux/i2c.h>
 #include <linux/mii.h>
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
+
 #include "net_driver.h"
 #include "bitfield.h"
 #include "efx.h"
diff --git a/drivers/net/irda/stir4200.c b/drivers/net/irda/stir4200.c
index 42da094b68ddfa..7ee51487953155 100644
--- a/drivers/net/irda/stir4200.c
+++ b/drivers/net/irda/stir4200.c
@@ -40,6 +40,7 @@
 #include <linux/moduleparam.h>
 
 #include <linux/kernel.h>
+#include <linux/sched/signal.h>
 #include <linux/ktime.h>
 #include <linux/types.h>
 #include <linux/time.h>
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index a4bfc10b61ddd7..da85057680d657 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -9,7 +9,7 @@
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/cache.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/wait.h>
diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index a411b43a69eb43..f9c0e62716eaa7 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -24,6 +24,7 @@
 
 #include <linux/module.h>
 #include <linux/kernel.h>
+#include <linux/sched/signal.h>
 #include <linux/kmod.h>
 #include <linux/init.h>
 #include <linux/list.h>
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 30863e378925b3..dc1b1dd9157c16 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -44,6 +44,7 @@
 #include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>
+#include <linux/sched/signal.h>
 #include <linux/major.h>
 #include <linux/slab.h>
 #include <linux/poll.h>
diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c
index e7b516342678d5..4f2e8141dbe2e5 100644
--- a/drivers/net/usb/hso.c
+++ b/drivers/net/usb/hso.c
@@ -52,7 +52,7 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/delay.h>
diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 24d5272cdce510..80567455068328 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -11,6 +11,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/sched/signal.h>
 #include <linux/netdevice.h>
 #include <linux/ethtool.h>
 #include <linux/etherdevice.h>
diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c
index 087eb266601fc8..4ca71bca39acfc 100644
--- a/drivers/net/wan/cosa.c
+++ b/drivers/net/wan/cosa.c
@@ -78,7 +78,7 @@
 
 #include <linux/module.h>
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <linux/poll.h>
 #include <linux/fs.h>
diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c
index b7fe0af4cb2400..363b30a549c2b6 100644
--- a/drivers/net/wireless/ath/ath6kl/cfg80211.c
+++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c
@@ -20,6 +20,7 @@
 #include <linux/moduleparam.h>
 #include <linux/inetdevice.h>
 #include <linux/export.h>
+#include <linux/sched/signal.h>
 
 #include "core.h"
 #include "cfg80211.h"
diff --git a/drivers/net/wireless/broadcom/b43legacy/main.c b/drivers/net/wireless/broadcom/b43legacy/main.c
index e97ab2b916630e..cdafebb9c936b4 100644
--- a/drivers/net/wireless/broadcom/b43legacy/main.c
+++ b/drivers/net/wireless/broadcom/b43legacy/main.c
@@ -36,7 +36,7 @@
 #include <linux/etherdevice.h>
 #include <linux/firmware.h>
 #include <linux/workqueue.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/skbuff.h>
 #include <linux/dma-mapping.h>
 #include <linux/slab.h>
diff --git a/drivers/net/wireless/intersil/hostap/hostap_hw.c b/drivers/net/wireless/intersil/hostap/hostap_hw.c
index 544ef7adde7d2a..04dfd040a6502a 100644
--- a/drivers/net/wireless/intersil/hostap/hostap_hw.c
+++ b/drivers/net/wireless/intersil/hostap/hostap_hw.c
@@ -43,7 +43,7 @@
 #include <linux/delay.h>
 #include <linux/random.h>
 #include <linux/wait.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/rtnetlink.h>
 #include <linux/wireless.h>
 #include <net/iw_handler.h>
diff --git a/drivers/net/wireless/intersil/hostap/hostap_ioctl.c b/drivers/net/wireless/intersil/hostap/hostap_ioctl.c
index a5656bc0e6aaa7..b2c6b065b54293 100644
--- a/drivers/net/wireless/intersil/hostap/hostap_ioctl.c
+++ b/drivers/net/wireless/intersil/hostap/hostap_ioctl.c
@@ -2,7 +2,7 @@
 
 #include <linux/slab.h>
 #include <linux/types.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/ethtool.h>
 #include <linux/if_arp.h>
 #include <linux/module.h>
diff --git a/drivers/oprofile/event_buffer.c b/drivers/oprofile/event_buffer.c
index 67935fbbbcabf7..32888f2bd1a977 100644
--- a/drivers/oprofile/event_buffer.c
+++ b/drivers/oprofile/event_buffer.c
@@ -14,7 +14,7 @@
 
 #include <linux/vmalloc.h>
 #include <linux/oprofile.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/capability.h>
 #include <linux/dcookies.h>
 #include <linux/fs.h>
diff --git a/drivers/parport/daisy.c b/drivers/parport/daisy.c
index d998d0ed2bec55..46eb15fb57fff4 100644
--- a/drivers/parport/daisy.c
+++ b/drivers/parport/daisy.c
@@ -23,7 +23,7 @@
 #include <linux/parport.h>
 #include <linux/delay.h>
 #include <linux/slab.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 
 #include <asm/current.h>
 #include <linux/uaccess.h>
diff --git a/drivers/parport/ieee1284.c b/drivers/parport/ieee1284.c
index f9fd4b33a5463d..74cc6dd982d2aa 100644
--- a/drivers/parport/ieee1284.c
+++ b/drivers/parport/ieee1284.c
@@ -23,7 +23,7 @@
 #include <linux/kernel.h>
 #include <linux/interrupt.h>
 #include <linux/timer.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 
 #undef DEBUG /* undef me for production */
 
diff --git a/drivers/parport/ieee1284_ops.c b/drivers/parport/ieee1284_ops.c
index 75071605d22fc6..a959224d011bce 100644
--- a/drivers/parport/ieee1284_ops.c
+++ b/drivers/parport/ieee1284_ops.c
@@ -17,7 +17,7 @@
 #include <linux/module.h>
 #include <linux/parport.h>
 #include <linux/delay.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/uaccess.h>
 
 #undef DEBUG /* undef me for production */
diff --git a/drivers/parport/parport_ip32.c b/drivers/parport/parport_ip32.c
index 30e981be14c237..dcbeeb220dda7d 100644
--- a/drivers/parport/parport_ip32.c
+++ b/drivers/parport/parport_ip32.c
@@ -102,7 +102,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/parport.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/stddef.h>
diff --git a/drivers/parport/parport_pc.c b/drivers/parport/parport_pc.c
index 3e56e7deab8e8d..9d42dfe65d448a 100644
--- a/drivers/parport/parport_pc.c
+++ b/drivers/parport/parport_pc.c
@@ -44,7 +44,7 @@
 
 #include <linux/module.h>
 #include <linux/init.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/delay.h>
 #include <linux/errno.h>
 #include <linux/interrupt.h>
diff --git a/drivers/parport/share.c b/drivers/parport/share.c
index 3308427ed9f7a0..bc090daa850a4b 100644
--- a/drivers/parport/share.c
+++ b/drivers/parport/share.c
@@ -27,7 +27,7 @@
 #include <linux/ioport.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/kmod.h>
 #include <linux/device.h>
 
diff --git a/drivers/pci/access.c b/drivers/pci/access.c
index b9dd37c8c9ce1e..8b7382705bf27e 100644
--- a/drivers/pci/access.c
+++ b/drivers/pci/access.c
@@ -1,7 +1,7 @@
 #include <linux/delay.h>
 #include <linux/pci.h>
 #include <linux/module.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <linux/ioport.h>
 #include <linux/wait.h>
diff --git a/drivers/pci/hotplug/cpci_hotplug_core.c b/drivers/pci/hotplug/cpci_hotplug_core.c
index 7ec8a8f72c698a..95f689f5392014 100644
--- a/drivers/pci/hotplug/cpci_hotplug_core.c
+++ b/drivers/pci/hotplug/cpci_hotplug_core.c
@@ -27,6 +27,7 @@
 
 #include <linux/module.h>
 #include <linux/kernel.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <linux/pci.h>
 #include <linux/pci_hotplug.h>
diff --git a/drivers/pci/hotplug/cpqphp.h b/drivers/pci/hotplug/cpqphp.h
index 9103a7b9f3b996..48c8a066a6b78f 100644
--- a/drivers/pci/hotplug/cpqphp.h
+++ b/drivers/pci/hotplug/cpqphp.h
@@ -32,7 +32,7 @@
 #include <asm/io.h>		/* for read? and write? functions */
 #include <linux/delay.h>	/* for delays */
 #include <linux/mutex.h>
-#include <linux/sched.h>	/* for signal_pending() */
+#include <linux/sched/signal.h>	/* for signal_pending() */
 
 #define MY_NAME	"cpqphp"
 
diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h
index 37d70b5ad22f99..06109d40c4ac92 100644
--- a/drivers/pci/hotplug/pciehp.h
+++ b/drivers/pci/hotplug/pciehp.h
@@ -33,7 +33,7 @@
 #include <linux/pci.h>
 #include <linux/pci_hotplug.h>
 #include <linux/delay.h>
-#include <linux/sched.h>		/* signal_pending() */
+#include <linux/sched/signal.h>		/* signal_pending() */
 #include <linux/pcieport_if.h>
 #include <linux/mutex.h>
 #include <linux/workqueue.h>
diff --git a/drivers/pci/hotplug/shpchp.h b/drivers/pci/hotplug/shpchp.h
index 4da8fc601467be..70c7ea6af0344d 100644
--- a/drivers/pci/hotplug/shpchp.h
+++ b/drivers/pci/hotplug/shpchp.h
@@ -33,7 +33,7 @@
 #include <linux/pci.h>
 #include <linux/pci_hotplug.h>
 #include <linux/delay.h>
-#include <linux/sched.h>	/* signal_pending(), struct timer_list */
+#include <linux/sched/signal.h>	/* signal_pending(), struct timer_list */
 #include <linux/mutex.h>
 #include <linux/workqueue.h>
 
diff --git a/drivers/rtc/rtc-dev.c b/drivers/rtc/rtc-dev.c
index a6d9434addf6f7..6dc8f29697abfe 100644
--- a/drivers/rtc/rtc-dev.c
+++ b/drivers/rtc/rtc-dev.c
@@ -15,7 +15,7 @@
 
 #include <linux/module.h>
 #include <linux/rtc.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include "rtc-core.h"
 
 static dev_t rtc_devt;
diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c
index 79823ee9c1007d..b8006ea9099cd9 100644
--- a/drivers/s390/cio/device.c
+++ b/drivers/s390/cio/device.c
@@ -24,6 +24,7 @@
 #include <linux/delay.h>
 #include <linux/timer.h>
 #include <linux/kernel_stat.h>
+#include <linux/sched/signal.h>
 
 #include <asm/ccwdev.h>
 #include <asm/cio.h>
diff --git a/drivers/scsi/lpfc/lpfc_vport.c b/drivers/scsi/lpfc/lpfc_vport.c
index e18bbc66e83b1f..4e36998a266c36 100644
--- a/drivers/scsi/lpfc/lpfc_vport.c
+++ b/drivers/scsi/lpfc/lpfc_vport.c
@@ -28,6 +28,7 @@
 #include <linux/pci.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
+#include <linux/sched/signal.h>
 
 #include <scsi/scsi.h>
 #include <scsi/scsi_device.h>
diff --git a/drivers/scsi/osst.c b/drivers/scsi/osst.c
index 75ac662793a3cd..c47f4b349bac44 100644
--- a/drivers/scsi/osst.c
+++ b/drivers/scsi/osst.c
@@ -35,7 +35,7 @@ static const char * osst_version = "0.99.4";
 
 #include <linux/fs.h>
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/proc_fs.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index 81212d4bd9bf2d..e5ef78a6848ef1 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -23,7 +23,7 @@ static const char *verstr = "20160209";
 
 #include <linux/fs.h>
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/mm.h>
 #include <linux/init.h>
 #include <linux/string.h>
diff --git a/drivers/soc/fsl/qbman/dpaa_sys.h b/drivers/soc/fsl/qbman/dpaa_sys.h
index 2eaf3184f61dae..2ce394aa4c9502 100644
--- a/drivers/soc/fsl/qbman/dpaa_sys.h
+++ b/drivers/soc/fsl/qbman/dpaa_sys.h
@@ -36,6 +36,7 @@
 #include <linux/module.h>
 #include <linux/interrupt.h>
 #include <linux/kthread.h>
+#include <linux/sched/signal.h>
 #include <linux/vmalloc.h>
 #include <linux/platform_device.h>
 #include <linux/of.h>
diff --git a/drivers/staging/comedi/comedi_fops.c b/drivers/staging/comedi/comedi_fops.c
index 57e8599b54e645..8deac8d9225da2 100644
--- a/drivers/staging/comedi/comedi_fops.c
+++ b/drivers/staging/comedi/comedi_fops.c
@@ -23,7 +23,7 @@
 #include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/fcntl.h>
 #include <linux/delay.h>
 #include <linux/mm.h>
diff --git a/drivers/staging/dgnc/dgnc_tty.c b/drivers/staging/dgnc/dgnc_tty.c
index c63e591631f637..c3b8fc54883db8 100644
--- a/drivers/staging/dgnc/dgnc_tty.c
+++ b/drivers/staging/dgnc/dgnc_tty.c
@@ -19,7 +19,7 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/sched.h>	/* For jiffies, task states */
+#include <linux/sched/signal.h>	/* For jiffies, task states, etc. */
 #include <linux/interrupt.h>	/* For tasklet and interrupt structs/defines */
 #include <linux/module.h>
 #include <linux/ctype.h>
diff --git a/drivers/staging/dgnc/dgnc_utils.c b/drivers/staging/dgnc/dgnc_utils.c
index 95272f4765fcfa..6f59240024d131 100644
--- a/drivers/staging/dgnc/dgnc_utils.c
+++ b/drivers/staging/dgnc/dgnc_utils.c
@@ -1,5 +1,5 @@
 #include <linux/tty.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include "dgnc_utils.h"
 
 /*
diff --git a/drivers/staging/greybus/uart.c b/drivers/staging/greybus/uart.c
index ab0dbf5cab5aa7..43255e2e927673 100644
--- a/drivers/staging/greybus/uart.c
+++ b/drivers/staging/greybus/uart.c
@@ -14,7 +14,7 @@
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/module.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/wait.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
diff --git a/drivers/staging/lustre/lustre/include/lustre/lustre_user.h b/drivers/staging/lustre/lustre/include/lustre/lustre_user.h
index 21aec0ca9ad36b..7d8628ce0d3b3a 100644
--- a/drivers/staging/lustre/lustre/include/lustre/lustre_user.h
+++ b/drivers/staging/lustre/lustre/include/lustre/lustre_user.h
@@ -44,6 +44,7 @@
 
 #ifdef __KERNEL__
 # include <linux/quota.h>
+# include <linux/sched/signal.h>
 # include <linux/string.h> /* snprintf() */
 # include <linux/version.h>
 #else /* !__KERNEL__ */
diff --git a/drivers/staging/lustre/lustre/include/lustre_lib.h b/drivers/staging/lustre/lustre/include/lustre_lib.h
index 27f3148c43442e..b04d613846ee6f 100644
--- a/drivers/staging/lustre/lustre/include/lustre_lib.h
+++ b/drivers/staging/lustre/lustre/include/lustre_lib.h
@@ -42,7 +42,7 @@
  * @{
  */
 
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/signal.h>
 #include <linux/types.h>
 #include "../../include/linux/libcfs/libcfs.h"
diff --git a/drivers/staging/lustre/lustre/include/obd_support.h b/drivers/staging/lustre/lustre/include/obd_support.h
index aaedec7d793c85..dace6591a0a449 100644
--- a/drivers/staging/lustre/lustre/include/obd_support.h
+++ b/drivers/staging/lustre/lustre/include/obd_support.h
@@ -34,6 +34,8 @@
 #define _OBD_SUPPORT
 
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
+
 #include "../../include/linux/libcfs/libcfs.h"
 #include "lustre_compat.h"
 #include "lprocfs_status.h"
diff --git a/drivers/staging/media/lirc/lirc_sir.c b/drivers/staging/media/lirc/lirc_sir.c
index c75ae43095ba55..c6c3de94adaa25 100644
--- a/drivers/staging/media/lirc/lirc_sir.c
+++ b/drivers/staging/media/lirc/lirc_sir.c
@@ -36,7 +36,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/module.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/errno.h>
 #include <linux/signal.h>
 #include <linux/fs.h>
diff --git a/drivers/staging/media/lirc/lirc_zilog.c b/drivers/staging/media/lirc/lirc_zilog.c
index 34aac3e2eb871f..e4a533b6beb376 100644
--- a/drivers/staging/media/lirc/lirc_zilog.c
+++ b/drivers/staging/media/lirc/lirc_zilog.c
@@ -42,7 +42,7 @@
 #include <linux/module.h>
 #include <linux/kmod.h>
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/fs.h>
 #include <linux/poll.h>
 #include <linux/string.h>
diff --git a/drivers/staging/speakup/speakup_soft.c b/drivers/staging/speakup/speakup_soft.c
index ff68a384f9c218..d2ff0afd685aea 100644
--- a/drivers/staging/speakup/speakup_soft.c
+++ b/drivers/staging/speakup/speakup_soft.c
@@ -22,7 +22,7 @@
 #include <linux/unistd.h>
 #include <linux/miscdevice.h>	/* for misc_register, and SYNTH_MINOR */
 #include <linux/poll.h>		/* for poll_wait() */
-#include <linux/sched.h> /* schedule(), signal_pending(), TASK_INTERRUPTIBLE */
+#include <linux/sched/signal.h> /* schedule(), signal_pending(), TASK_INTERRUPTIBLE */
 
 #include "spk_priv.h"
 #include "speakup.h"
diff --git a/drivers/target/iscsi/cxgbit/cxgbit_target.c b/drivers/target/iscsi/cxgbit/cxgbit_target.c
index 8bcb9b71f76432..0c3e8fce3695e7 100644
--- a/drivers/target/iscsi/cxgbit/cxgbit_target.c
+++ b/drivers/target/iscsi/cxgbit/cxgbit_target.c
@@ -8,6 +8,8 @@
 
 #include <linux/workqueue.h>
 #include <linux/kthread.h>
+#include <linux/sched/signal.h>
+
 #include <asm/unaligned.h>
 #include <net/tcp.h>
 #include <target/target_core_base.h>
diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c
index f3932baed07dc3..55577cf9b6a4e0 100644
--- a/drivers/tty/n_gsm.c
+++ b/drivers/tty/n_gsm.c
@@ -39,7 +39,7 @@
 #include <linux/errno.h>
 #include <linux/signal.h>
 #include <linux/fcntl.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/interrupt.h>
 #include <linux/tty.h>
 #include <linux/ctype.h>
diff --git a/drivers/tty/serial/crisv10.c b/drivers/tty/serial/crisv10.c
index e92c23470e519f..59a2a7e18b5a25 100644
--- a/drivers/tty/serial/crisv10.c
+++ b/drivers/tty/serial/crisv10.c
@@ -12,7 +12,7 @@ static char *serial_version = "$Revision: 1.25 $";
 #include <linux/types.h>
 #include <linux/errno.h>
 #include <linux/signal.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/timer.h>
 #include <linux/interrupt.h>
 #include <linux/tty.h>
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index 9939c3d9912b35..3fe56894974a7c 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -24,6 +24,7 @@
 #include <linux/tty.h>
 #include <linux/tty_flip.h>
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
 #include <linux/init.h>
 #include <linux/console.h>
 #include <linux/of.h>
diff --git a/drivers/tty/tty_ioctl.c b/drivers/tty/tty_ioctl.c
index f27fc0f14c11f2..a9a978731c5b0d 100644
--- a/drivers/tty/tty_ioctl.c
+++ b/drivers/tty/tty_ioctl.c
@@ -9,7 +9,7 @@
 #include <linux/types.h>
 #include <linux/termios.h>
 #include <linux/errno.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/kernel.h>
 #include <linux/major.h>
 #include <linux/tty.h>
diff --git a/drivers/tty/tty_port.c b/drivers/tty/tty_port.c
index 5cd3cd93229374..1d21a9c1d33e6e 100644
--- a/drivers/tty/tty_port.c
+++ b/drivers/tty/tty_port.c
@@ -11,7 +11,7 @@
 #include <linux/timer.h>
 #include <linux/string.h>
 #include <linux/slab.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/wait.h>
 #include <linux/bitops.h>
 #include <linux/delay.h>
diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c
index 31d95dc9c202da..60ce7fd54e890e 100644
--- a/drivers/uio/uio.c
+++ b/drivers/uio/uio.c
@@ -20,7 +20,7 @@
 #include <linux/slab.h>
 #include <linux/mm.h>
 #include <linux/idr.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/string.h>
 #include <linux/kobject.h>
 #include <linux/cdev.h>
diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index 235e305f8473a2..d5388938bc7aaf 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -32,6 +32,7 @@
 #undef VERBOSE_DEBUG
 
 #include <linux/kernel.h>
+#include <linux/sched/signal.h>
 #include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/slab.h>
diff --git a/drivers/usb/class/usblp.c b/drivers/usb/class/usblp.c
index 071964c7847f1f..cc61055fb9befc 100644
--- a/drivers/usb/class/usblp.c
+++ b/drivers/usb/class/usblp.c
@@ -49,7 +49,7 @@
 
 #include <linux/module.h>
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/signal.h>
 #include <linux/poll.h>
 #include <linux/slab.h>
diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index 87fccf611b698c..a5b7cd6156987a 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -23,6 +23,7 @@
 #include <linux/export.h>
 #include <linux/hid.h>
 #include <linux/module.h>
+#include <linux/sched/signal.h>
 #include <linux/uio.h>
 #include <asm/unaligned.h>
 
diff --git a/drivers/usb/image/mdc800.c b/drivers/usb/image/mdc800.c
index 5cf2633cdb0471..e92540a21b6b5a 100644
--- a/drivers/usb/image/mdc800.c
+++ b/drivers/usb/image/mdc800.c
@@ -85,7 +85,7 @@
  * (20/10/1999)
  */
 
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/signal.h>
 #include <linux/spinlock.h>
 #include <linux/errno.h>
diff --git a/drivers/usb/misc/adutux.c b/drivers/usb/misc/adutux.c
index c5fa584d8f0a17..db9a9e6ff6bee9 100644
--- a/drivers/usb/misc/adutux.c
+++ b/drivers/usb/misc/adutux.c
@@ -21,6 +21,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/kernel.h>
+#include <linux/sched/signal.h>
 #include <linux/errno.h>
 #include <linux/slab.h>
 #include <linux/module.h>
diff --git a/drivers/usb/misc/idmouse.c b/drivers/usb/misc/idmouse.c
index debc1fd74b0df2..8b9fd7534f698b 100644
--- a/drivers/usb/misc/idmouse.c
+++ b/drivers/usb/misc/idmouse.c
@@ -17,6 +17,7 @@
 */
 
 #include <linux/kernel.h>
+#include <linux/sched/signal.h>
 #include <linux/errno.h>
 #include <linux/delay.h>
 #include <linux/slab.h>
diff --git a/drivers/usb/misc/rio500.c b/drivers/usb/misc/rio500.c
index fc329c98a6e8ea..b106ce76997bff 100644
--- a/drivers/usb/misc/rio500.c
+++ b/drivers/usb/misc/rio500.c
@@ -31,7 +31,7 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/signal.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/mutex.h>
 #include <linux/errno.h>
 #include <linux/random.h>
diff --git a/drivers/usb/misc/uss720.c b/drivers/usb/misc/uss720.c
index 0a643fa74cab74..e45a3a680db8f6 100644
--- a/drivers/usb/misc/uss720.c
+++ b/drivers/usb/misc/uss720.c
@@ -50,6 +50,7 @@
 #include <linux/completion.h>
 #include <linux/kref.h>
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
 
 /*
  * Version Information
diff --git a/drivers/usb/mon/mon_bin.c b/drivers/usb/mon/mon_bin.c
index 9fb8b1e6ecc26d..b6d8bf475c9270 100644
--- a/drivers/usb/mon/mon_bin.c
+++ b/drivers/usb/mon/mon_bin.c
@@ -8,6 +8,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/sched/signal.h>
 #include <linux/types.h>
 #include <linux/fs.h>
 #include <linux/cdev.h>
diff --git a/drivers/usb/mon/mon_text.c b/drivers/usb/mon/mon_text.c
index db1a4abf280613..19c416d69eb933 100644
--- a/drivers/usb/mon/mon_text.c
+++ b/drivers/usb/mon/mon_text.c
@@ -8,6 +8,7 @@
 #include <linux/list.h>
 #include <linux/usb.h>
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
 #include <linux/time.h>
 #include <linux/ktime.h>
 #include <linux/export.h>
diff --git a/drivers/usb/serial/digi_acceleport.c b/drivers/usb/serial/digi_acceleport.c
index eb433922598cff..ab78111e09680f 100644
--- a/drivers/usb/serial/digi_acceleport.c
+++ b/drivers/usb/serial/digi_acceleport.c
@@ -27,6 +27,7 @@
 #include <linux/uaccess.h>
 #include <linux/usb.h>
 #include <linux/wait.h>
+#include <linux/sched/signal.h>
 #include <linux/usb/serial.h>
 
 /* Defines */
diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c
index 944de657a07a8d..49ce2be90fa00e 100644
--- a/drivers/usb/serial/generic.c
+++ b/drivers/usb/serial/generic.c
@@ -10,6 +10,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/sched/signal.h>
 #include <linux/errno.h>
 #include <linux/slab.h>
 #include <linux/sysrq.h>
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 5c98ad4d2f4c25..9b519897cc17b8 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -18,6 +18,7 @@
 #include <linux/file.h>
 #include <linux/slab.h>
 #include <linux/sched/clock.h>
+#include <linux/sched/signal.h>
 #include <linux/vmalloc.h>
 
 #include <linux/net.h>
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 4a00140a762415..dcbe2e29bf1704 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -28,6 +28,7 @@
 #include <linux/module.h>
 #include <linux/sort.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/signal.h>
 #include <linux/interval_tree_generic.h>
 
 #include "vhost.h"
diff --git a/drivers/video/fbdev/cobalt_lcdfb.c b/drivers/video/fbdev/cobalt_lcdfb.c
index 038ac6934fe9d7..9da90bd242f4e4 100644
--- a/drivers/video/fbdev/cobalt_lcdfb.c
+++ b/drivers/video/fbdev/cobalt_lcdfb.c
@@ -26,6 +26,7 @@
 #include <linux/uaccess.h>
 #include <linux/platform_device.h>
 #include <linux/module.h>
+#include <linux/sched/signal.h>
 
 /*
  * Cursor position address
diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c
index 8b810696a42b6c..fd2b372d0264de 100644
--- a/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c
+++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c
@@ -19,7 +19,7 @@
 #include <linux/jiffies.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <linux/workqueue.h>
 #include <linux/of_device.h>
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 95f42872b787ad..4f6b00efc27c2d 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -10,6 +10,8 @@
  */
 
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
+
 #include <net/sock.h>
 #include <net/af_rxrpc.h>
 #include <rxrpc/packet.h>
diff --git a/fs/aio.c b/fs/aio.c
index 7e2ab9c8e39c27..0bb108476de282 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -20,7 +20,7 @@
 #include <linux/backing-dev.h>
 #include <linux/uio.h>
 
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/fs.h>
 #include <linux/file.h>
 #include <linux/mm.h>
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 105d4d43993e9f..a8812d95359dc8 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -20,6 +20,7 @@
 #define __BTRFS_CTREE__
 
 #include <linux/mm.h>
+#include <linux/sched/signal.h>
 #include <linux/highmem.h>
 #include <linux/fs.h>
 #include <linux/rwsem.h>
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index cd966f276a8d70..68c78be19d5b78 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -2,7 +2,7 @@
 
 #include <linux/fs.h>
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/wait.h>
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 7ab5be7944aa84..1858fc20eb7d7c 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -23,6 +23,8 @@
 #include <linux/slab.h>
 #include <linux/pagemap.h>
 #include <linux/freezer.h>
+#include <linux/sched/signal.h>
+
 #include <asm/div64.h>
 #include "cifsfs.h"
 #include "cifspdu.h"
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c
index 822629126e89fd..f40e3953e7fe3c 100644
--- a/fs/coda/psdev.c
+++ b/fs/coda/psdev.c
@@ -22,7 +22,7 @@
 #include <linux/kernel.h>
 #include <linux/major.h>
 #include <linux/time.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <linux/ioport.h>
 #include <linux/fcntl.h>
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index 1ce908c2232c38..23488f559cf969 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -17,6 +17,7 @@
 #include <linux/dlm.h>
 #include <linux/dlm_device.h>
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
 
 #include "dlm_internal.h"
 #include "lockspace.h"
diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c
index 158a3a39f82de7..039e627194a93b 100644
--- a/fs/ecryptfs/read_write.c
+++ b/fs/ecryptfs/read_write.c
@@ -22,6 +22,8 @@
 
 #include <linux/fs.h>
 #include <linux/pagemap.h>
+#include <linux/sched/signal.h>
+
 #include "ecryptfs_kernel.h"
 
 /**
diff --git a/fs/eventfd.c b/fs/eventfd.c
index 1231cd1999d8fe..68b9fffcb2c8e7 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -9,7 +9,7 @@
 #include <linux/poll.h>
 #include <linux/init.h>
 #include <linux/fs.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/list.h>
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 5ec16313da1a1c..341251421ced00 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -13,7 +13,7 @@
 
 #include <linux/init.h>
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/fs.h>
 #include <linux/file.h>
 #include <linux/signal.h>
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 2fd17e8e498416..77798a46b0c682 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -28,6 +28,7 @@
 #include <linux/timer.h>
 #include <linux/version.h>
 #include <linux/wait.h>
+#include <linux/sched/signal.h>
 #include <linux/blockgroup_lock.h>
 #include <linux/percpu_counter.h>
 #include <linux/ratelimit.h>
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 1375fef11146d5..1602b4bccae61e 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -22,6 +22,7 @@
 #include <linux/mm.h>
 #include <linux/memcontrol.h>
 #include <linux/cleancache.h>
+#include <linux/sched/signal.h>
 
 #include "f2fs.h"
 #include "node.h"
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index f11792672977d2..b681b43c766e11 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -11,6 +11,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/poll.h>
+#include <linux/sched/signal.h>
 #include <linux/uio.h>
 #include <linux/miscdevice.h>
 #include <linux/pagemap.h>
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 8b907c5cc9135c..0515f0a686375f 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -15,6 +15,7 @@
 #include <linux/types.h>
 #include <linux/delay.h>
 #include <linux/gfs2_ondisk.h>
+#include <linux/sched/signal.h>
 
 #include "incore.h"
 #include "glock.h"
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index e3ee387a6dfebf..361796a84fce4f 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -10,7 +10,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/bio.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/completion.h>
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index aebb78f9e47f2a..d352f3a6af7f09 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -18,7 +18,7 @@
 #include <linux/pagemap.h>
 #include <linux/buffer_head.h>
 #include <linux/slab.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/blkdev.h>
 #include <asm/unaligned.h>
 
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 54de77e78775ed..8f96461236f655 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -11,7 +11,7 @@
 
 #include <linux/thread_info.h>
 #include <asm/current.h>
-#include <linux/sched.h>		/* remove ASAP */
+#include <linux/sched/signal.h>		/* remove ASAP */
 #include <linux/falloc.h>
 #include <linux/fs.h>
 #include <linux/mount.h>
diff --git a/fs/jffs2/nodemgmt.c b/fs/jffs2/nodemgmt.c
index cda0774c2c9c53..a7bbe879cfc3dd 100644
--- a/fs/jffs2/nodemgmt.c
+++ b/fs/jffs2/nodemgmt.c
@@ -14,7 +14,7 @@
 #include <linux/kernel.h>
 #include <linux/mtd/mtd.h>
 #include <linux/compiler.h>
-#include <linux/sched.h> /* For cond_resched() */
+#include <linux/sched/signal.h>
 #include "nodelist.h"
 #include "debug.h"
 
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 5ca4d96b194218..685565b229c311 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -15,7 +15,7 @@
 
 #include <linux/module.h>
 #include <linux/init.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/time.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 7d18d62e8e079c..febed1217b3fd7 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -30,6 +30,8 @@
 #include <linux/crc32.h>
 #include <linux/pagevec.h>
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
+
 #include "nilfs.h"
 #include "btnode.h"
 #include "page.h"
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 7ebfca6a14272e..2b37f27858345c 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -14,6 +14,7 @@
 #include <linux/types.h>
 #include <linux/uaccess.h>
 #include <linux/compat.h>
+#include <linux/sched/signal.h>
 
 #include <asm/ioctls.h>
 
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 1cf41c623be1d1..498d609b26c7db 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -30,7 +30,7 @@
 #include <linux/inotify.h>
 #include <linux/kernel.h> /* roundup() */
 #include <linux/namei.h> /* LOOKUP_FOLLOW */
-#include <linux/sched.h> /* struct user */
+#include <linux/sched/signal.h>
 #include <linux/slab.h> /* struct kmem_cache */
 #include <linux/syscalls.h>
 #include <linux/types.h>
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 358ed7e1195a5c..c4f68c338735b9 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -24,7 +24,7 @@
 #include <linux/gfp.h>
 #include <linux/pagemap.h>
 #include <linux/pagevec.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/swap.h>
 #include <linux/uio.h>
 #include <linux/writeback.h>
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index d4ec0d8961a6e9..fb15a96df0b606 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -30,6 +30,7 @@
 #include <linux/swap.h>
 #include <linux/quotaops.h>
 #include <linux/blkdev.h>
+#include <linux/sched/signal.h>
 
 #include <cluster/masklog.h>
 
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 32fd261ae13d02..a2b19fbdcf4695 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -33,6 +33,7 @@
 #include <linux/delay.h>
 #include <linux/err.h>
 #include <linux/debugfs.h>
+#include <linux/sched/signal.h>
 
 #include "cluster/heartbeat.h"
 #include "cluster/nodemanager.h"
diff --git a/fs/ocfs2/dlmfs/userdlm.c b/fs/ocfs2/dlmfs/userdlm.c
index f70cda2f090d54..9cecf4857195ce 100644
--- a/fs/ocfs2/dlmfs/userdlm.c
+++ b/fs/ocfs2/dlmfs/userdlm.c
@@ -28,6 +28,7 @@
  */
 
 #include <linux/signal.h>
+#include <linux/sched/signal.h>
 
 #include <linux/module.h>
 #include <linux/fs.h>
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 8dce4099a6cae2..3b7c937a36b528 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -33,6 +33,7 @@
 #include <linux/seq_file.h>
 #include <linux/time.h>
 #include <linux/quotaops.h>
+#include <linux/sched/signal.h>
 
 #define MLOG_MASK_PREFIX ML_DLM_GLUE
 #include <cluster/masklog.h>
diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h
index 70355a9a25969b..8948683b367f59 100644
--- a/fs/orangefs/orangefs-kernel.h
+++ b/fs/orangefs/orangefs-kernel.h
@@ -41,7 +41,7 @@
 #include <linux/uaccess.h>
 #include <linux/atomic.h>
 #include <linux/uio.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/mm.h>
 #include <linux/wait.h>
 #include <linux/dcache.h>
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 53d3f830358fc6..a34aa7aa256370 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -15,7 +15,7 @@
 #include <linux/xattr.h>
 #include <linux/security.h>
 #include <linux/uaccess.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/cred.h>
 #include <linux/namei.h>
 #include <linux/fdtable.h>
diff --git a/fs/splice.c b/fs/splice.c
index 4ef78aa8ef61e8..e49336555739ef 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -33,6 +33,8 @@
 #include <linux/gfp.h>
 #include <linux/socket.h>
 #include <linux/compat.h>
+#include <linux/sched/signal.h>
+
 #include "internal.h"
 
 /*
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 18d12bfff7709c..973607df579db3 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -14,7 +14,7 @@
 
 #include <linux/list.h>
 #include <linux/hashtable.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/sched/mm.h>
 #include <linux/mm.h>
 #include <linux/poll.h>
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index 7a989de224f4b7..592fdf7111cbfb 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -55,7 +55,7 @@ typedef __u32			xfs_nlink_t;
 #include <linux/file.h>
 #include <linux/swap.h>
 #include <linux/errno.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/bitops.h>
 #include <linux/major.h>
 #include <linux/pagemap.h>
diff --git a/include/drm/drm_os_linux.h b/include/drm/drm_os_linux.h
index 86ab99bc0ac50e..35e1482ba8a155 100644
--- a/include/drm/drm_os_linux.h
+++ b/include/drm/drm_os_linux.h
@@ -4,6 +4,7 @@
  */
 
 #include <linux/interrupt.h>	/* For task queue support */
+#include <linux/sched/signal.h>
 #include <linux/delay.h>
 
 #ifndef readq
diff --git a/include/linux/sunrpc/types.h b/include/linux/sunrpc/types.h
index d222f47550afa2..11a7536c0fd26d 100644
--- a/include/linux/sunrpc/types.h
+++ b/include/linux/sunrpc/types.h
@@ -10,6 +10,7 @@
 #define _LINUX_SUNRPC_TYPES_H_
 
 #include <linux/timer.h>
+#include <linux/sched/signal.h>
 #include <linux/workqueue.h>
 #include <linux/sunrpc/debug.h>
 #include <linux/list.h>
diff --git a/include/media/v4l2-ioctl.h b/include/media/v4l2-ioctl.h
index 574ff2ae94beeb..6cd94e5ee113f0 100644
--- a/include/media/v4l2-ioctl.h
+++ b/include/media/v4l2-ioctl.h
@@ -12,6 +12,7 @@
 #include <linux/poll.h>
 #include <linux/fs.h>
 #include <linux/mutex.h>
+#include <linux/sched/signal.h>
 #include <linux/compiler.h> /* need __user */
 #include <linux/videodev2.h>
 
diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h
index b96832df239ec7..c0452de83086e5 100644
--- a/include/net/busy_poll.h
+++ b/include/net/busy_poll.h
@@ -26,6 +26,7 @@
 
 #include <linux/netdevice.h>
 #include <linux/sched/clock.h>
+#include <linux/sched/signal.h>
 #include <net/ip.h>
 
 #ifdef CONFIG_NET_RX_BUSY_POLL
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 57f6311e24052f..82ff5726bc1b43 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -19,7 +19,7 @@
  */
 #include <linux/mutex.h>
 #include <linux/ww_mutex.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/sched/rt.h>
 #include <linux/sched/wake_q.h>
 #include <linux/export.h>
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index d4f79849136192..1c8c8707853a96 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -12,7 +12,7 @@
  */
 #include <linux/spinlock.h>
 #include <linux/export.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/sched/rt.h>
 #include <linux/sched/deadline.h>
 #include <linux/sched/wake_q.h>
diff --git a/kernel/locking/rwsem-spinlock.c b/kernel/locking/rwsem-spinlock.c
index 5eacab880f672c..91e7bc8e9d5ac6 100644
--- a/kernel/locking/rwsem-spinlock.c
+++ b/kernel/locking/rwsem-spinlock.c
@@ -6,7 +6,7 @@
  * - Derived also from comments by Linus
  */
 #include <linux/rwsem.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/export.h>
 
 enum rwsem_waiter_type {
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index 4fe8d8ad439642..9bd6e768164ebb 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -12,7 +12,7 @@
 #include <linux/rwsem.h>
 #include <linux/init.h>
 #include <linux/export.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/sched/rt.h>
 #include <linux/sched/wake_q.h>
 #include <linux/osq_lock.h>
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 6a28b79710f002..cccc417a813502 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -32,7 +32,7 @@
 #include <linux/smp.h>
 #include <linux/rcupdate.h>
 #include <linux/interrupt.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <uapi/linux/sched/types.h>
 #include <linux/atomic.h>
 #include <linux/bitops.h>
diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c
index f063a25d44493f..b294a8ee284212 100644
--- a/kernel/sched/completion.c
+++ b/kernel/sched/completion.c
@@ -11,7 +11,7 @@
  * Waiting for completion is a typically sync point, but not an exclusion point.
  */
 
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/completion.h>
 
 /**
diff --git a/kernel/sched/swait.c b/kernel/sched/swait.c
index 82f0dff90030fc..3d5610dcce114d 100644
--- a/kernel/sched/swait.c
+++ b/kernel/sched/swait.c
@@ -1,4 +1,4 @@
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/swait.h>
 
 void __init_swait_queue_head(struct swait_queue_head *q, const char *name,
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index 9453efe9b25a64..1fedfcf6fc9b5c 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -5,7 +5,7 @@
  */
 #include <linux/init.h>
 #include <linux/export.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/mm.h>
 #include <linux/wait.h>
 #include <linux/hash.h>
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index e6dc9a538efa21..04939053c823aa 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -19,6 +19,7 @@
 #include <linux/hrtimer.h>
 #include <linux/timerqueue.h>
 #include <linux/rtc.h>
+#include <linux/sched/signal.h>
 #include <linux/alarmtimer.h>
 #include <linux/mutex.h>
 #include <linux/platform_device.h>
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 8e11d8d9f419e2..df512e8c04d48e 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -43,7 +43,7 @@
 #include <linux/seq_file.h>
 #include <linux/err.h>
 #include <linux/debugobjects.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/sched/sysctl.h>
 #include <linux/sched/rt.h>
 #include <linux/sched/deadline.h>
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 82a6bfa0c30789..f4465d2a25d132 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -38,7 +38,7 @@
 #include <linux/tick.h>
 #include <linux/kallsyms.h>
 #include <linux/irq_work.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/sched/sysctl.h>
 #include <linux/slab.h>
 #include <linux/compat.h>
diff --git a/lib/percpu_ida.c b/lib/percpu_ida.c
index 6d40944960de77..010410990bc691 100644
--- a/lib/percpu_ida.c
+++ b/lib/percpu_ida.c
@@ -22,7 +22,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/percpu.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/string.h>
 #include <linux/spinlock.h>
 #include <linux/percpu_ida.h>
diff --git a/mm/compaction.c b/mm/compaction.c
index 0fdfde016ee283..81e1eaa2a2cf1b 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -12,6 +12,7 @@
 #include <linux/migrate.h>
 #include <linux/compaction.h>
 #include <linux/mm_inline.h>
+#include <linux/sched/signal.h>
 #include <linux/backing-dev.h>
 #include <linux/sysctl.h>
 #include <linux/sysfs.h>
diff --git a/mm/gup.c b/mm/gup.c
index 94fab8fa432b2f..9c047e951aa3d0 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -10,7 +10,7 @@
 #include <linux/swap.h>
 #include <linux/swapops.h>
 
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/rwsem.h>
 #include <linux/hugetlb.h>
 
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 2e0e8159ce8e06..a7aa811b7d14c5 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -18,6 +18,7 @@
 #include <linux/bootmem.h>
 #include <linux/sysfs.h>
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
 #include <linux/rmap.h>
 #include <linux/swap.h>
 #include <linux/swapops.h>
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 1d3ed58f92abe1..295479b792ec48 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -6,6 +6,7 @@
 
 #include <linux/stddef.h>
 #include <linux/mm.h>
+#include <linux/sched/signal.h>
 #include <linux/swap.h>
 #include <linux/interrupt.h>
 #include <linux/pagemap.h>
diff --git a/mm/shmem.c b/mm/shmem.c
index a26649a6633fbf..de8cdef4ef9bb4 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -29,6 +29,7 @@
 #include <linux/pagemap.h>
 #include <linux/file.h>
 #include <linux/mm.h>
+#include <linux/sched/signal.h>
 #include <linux/export.h>
 #include <linux/swap.h>
 #include <linux/uio.h>
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 9f0ad2a4f10244..479e631d43c2f6 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -8,6 +8,7 @@
  */
 
 #include <linux/mm.h>
+#include <linux/sched/signal.h>
 #include <linux/pagemap.h>
 #include <linux/rmap.h>
 #include <linux/swap.h>
diff --git a/net/atm/svc.c b/net/atm/svc.c
index 878563a8354d10..db9794ec61d88e 100644
--- a/net/atm/svc.c
+++ b/net/atm/svc.c
@@ -10,7 +10,7 @@
 #include <linux/kernel.h>	/* printk */
 #include <linux/skbuff.h>
 #include <linux/wait.h>
-#include <linux/sched.h>	/* jiffies and HZ */
+#include <linux/sched/signal.h>
 #include <linux/fcntl.h>	/* O_NONBLOCK */
 #include <linux/init.h>
 #include <linux/atm.h>		/* ATM stuff */
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index cfb2faba46ded9..69e1f7d362a8b7 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -27,6 +27,8 @@
 #include <linux/module.h>
 #include <linux/debugfs.h>
 #include <linux/stringify.h>
+#include <linux/sched/signal.h>
+
 #include <asm/ioctls.h>
 
 #include <net/bluetooth/bluetooth.h>
diff --git a/net/bluetooth/cmtp/capi.c b/net/bluetooth/cmtp/capi.c
index 46ac686c8911e7..bb308224099c47 100644
--- a/net/bluetooth/cmtp/capi.c
+++ b/net/bluetooth/cmtp/capi.c
@@ -26,7 +26,7 @@
 #include <linux/types.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <linux/poll.h>
 #include <linux/fcntl.h>
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index 1015d9c8d97ddb..b5faff458d8bea 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -21,6 +21,8 @@
    SOFTWARE IS DISCLAIMED.
 */
 
+#include <linux/sched/signal.h>
+
 #include <net/bluetooth/bluetooth.h>
 #include <net/bluetooth/hci_core.h>
 #include <net/bluetooth/mgmt.h>
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index a8ba752732c985..f307b145ea5405 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -29,6 +29,7 @@
 
 #include <linux/module.h>
 #include <linux/export.h>
+#include <linux/sched/signal.h>
 
 #include <net/bluetooth/bluetooth.h>
 #include <net/bluetooth/hci_core.h>
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 7511df72347f30..aa1a814ceddca7 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -27,6 +27,7 @@
 
 #include <linux/export.h>
 #include <linux/debugfs.h>
+#include <linux/sched/signal.h>
 
 #include <net/bluetooth/bluetooth.h>
 #include <net/bluetooth/hci_core.h>
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 3125ce670c2f24..e4e9a2da1e7e7a 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -27,6 +27,7 @@
 #include <linux/module.h>
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
+#include <linux/sched/signal.h>
 
 #include <net/bluetooth/bluetooth.h>
 #include <net/bluetooth/hci_core.h>
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index 0f4034934d56f7..0b5dd607444c71 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -19,6 +19,7 @@
 #include <linux/rtnetlink.h>
 #include <linux/spinlock.h>
 #include <linux/times.h>
+#include <linux/sched/signal.h>
 
 #include "br_private.h"
 
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index 05e8946ccc0355..79aee759aba590 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -17,6 +17,7 @@
 #include <linux/if_bridge.h>
 #include <linux/rtnetlink.h>
 #include <linux/spinlock.h>
+#include <linux/sched/signal.h>
 
 #include "br_private.h"
 
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index be7bab1adcde3d..aecb2c7241b697 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -24,7 +24,7 @@
 #include <linux/vmalloc.h>
 #include <linux/slab.h>
 #include <linux/rtnetlink.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/net.h>
 
 /*
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index b0c04cf4851d67..3945821e9c1f8f 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -15,6 +15,7 @@
 #include <net/switchdev.h>
 #include <linux/if_arp.h>
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
 #include <linux/nsproxy.h>
 #include <net/sock.h>
 #include <net/net_namespace.h>
diff --git a/net/dccp/output.c b/net/dccp/output.c
index b66c84db0766f5..91a15b3c4915a3 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -14,6 +14,7 @@
 #include <linux/kernel.h>
 #include <linux/skbuff.h>
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
 
 #include <net/inet_sock.h>
 #include <net/sock.h>
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 5d367b7ff542c0..cebedd545e5e28 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -32,6 +32,7 @@
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
+#include <linux/sched/signal.h>
 #include <linux/string.h>
 #include <linux/mm.h>
 #include <linux/socket.h>
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 3a2025f5bf2c33..77362b88a661b5 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -43,6 +43,7 @@
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
+#include <linux/sched/signal.h>
 #include <linux/socket.h>
 #include <linux/sockios.h>
 #include <linux/net.h>
diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c
index 817b1b186aff78..f6061c4bb0a805 100644
--- a/net/irda/ircomm/ircomm_tty.c
+++ b/net/irda/ircomm/ircomm_tty.c
@@ -32,7 +32,7 @@
 #include <linux/module.h>
 #include <linux/fs.h>
 #include <linux/slab.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/seq_file.h>
 #include <linux/termios.h>
 #include <linux/tty.h>
diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c
index 35dbf3dc3d2831..7025dcb853d06b 100644
--- a/net/irda/irnet/irnet_ppp.c
+++ b/net/irda/irnet/irnet_ppp.c
@@ -13,8 +13,9 @@
  *	2) as a control channel (write commands, read events)
  */
 
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
+
 #include "irnet_ppp.h"		/* Private header */
 /* Please put other headers in irnet.h - Thanks */
 
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 13190b38f22ee5..89bbde1081ce5e 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -17,7 +17,7 @@
 #include <linux/list.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <linux/skbuff.h>
 #include <linux/init.h>
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index a646f348124095..309062f3debe29 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -24,6 +24,8 @@
 #include <linux/uaccess.h>
 #include <linux/workqueue.h>
 #include <linux/syscalls.h>
+#include <linux/sched/signal.h>
+
 #include <net/kcm.h>
 #include <net/netns/generic.h>
 #include <net/sock.h>
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 5e929638242028..06186d608a274e 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -26,6 +26,8 @@
 #include <linux/rtnetlink.h>
 #include <linux/init.h>
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
+
 #include <net/llc.h>
 #include <net/llc_sap.h>
 #include <net/llc_pdu.h>
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index b9edf5fae6ae97..879885b31cce5f 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -21,6 +21,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/nfc.h>
+#include <linux/sched/signal.h>
 
 #include "nfc.h"
 #include "llcp.h"
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index 8bad5624a27a9f..222bedcd95754c 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -23,6 +23,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <linux/socket.h>
 #include <net/sock.h>
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index ffd5f229758487..a6c8da3ee89349 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -27,6 +27,8 @@
 #include <linux/kernel.h>
 #include <linux/net.h>
 #include <linux/poll.h>
+#include <linux/sched/signal.h>
+
 #include <net/sock.h>
 #include <net/tcp_states.h>
 
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index 40a1ef2adeb45c..c3be03e8d09821 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -76,6 +76,8 @@
 #include <linux/slab.h>
 #include <linux/idr.h>
 #include <linux/timer.h>
+#include <linux/sched/signal.h>
+
 #include "ar-internal.h"
 
 __read_mostly unsigned int rxrpc_max_client_connections = 1000;
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index f3a688e108430a..28274a3c9831a1 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -14,6 +14,8 @@
 #include <linux/net.h>
 #include <linux/skbuff.h>
 #include <linux/export.h>
+#include <linux/sched/signal.h>
+
 #include <net/sock.h>
 #include <net/af_rxrpc.h>
 #include "ar-internal.h"
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 0a6ef217aa8ada..19b36c60fb4cb5 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -15,6 +15,8 @@
 #include <linux/gfp.h>
 #include <linux/skbuff.h>
 #include <linux/export.h>
+#include <linux/sched/signal.h>
+
 #include <net/sock.h>
 #include <net/af_rxrpc.h>
 #include "ar-internal.h"
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 6b09a778cc71fa..43e4045e72bc00 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -35,6 +35,8 @@
  */
 
 #include <linux/rhashtable.h>
+#include <linux/sched/signal.h>
+
 #include "core.h"
 #include "name_table.h"
 #include "node.h"
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 8a398b3fb532aa..9192ead6675114 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -90,6 +90,7 @@
 #include <linux/init.h>
 #include <linux/io.h>
 #include <linux/kernel.h>
+#include <linux/sched/signal.h>
 #include <linux/kmod.h>
 #include <linux/list.h>
 #include <linux/miscdevice.h>
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 849c4ad0411ee2..8d592a45b59786 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -9,6 +9,7 @@
  */
 #include <linux/spinlock.h>
 #include <linux/module.h>
+#include <linux/sched/signal.h>
 #include <linux/ctype.h>
 #include <linux/list.h>
 #include <linux/virtio.h>
diff --git a/sound/core/control.c b/sound/core/control.c
index fb096cb20a80d1..c109b82eef4bd4 100644
--- a/sound/core/control.c
+++ b/sound/core/control.c
@@ -25,6 +25,7 @@
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/time.h>
+#include <linux/sched/signal.h>
 #include <sound/core.h>
 #include <sound/minors.h>
 #include <sound/info.h>
diff --git a/sound/core/hwdep.c b/sound/core/hwdep.c
index 36d2416f90d994..9602a7e38d8a81 100644
--- a/sound/core/hwdep.c
+++ b/sound/core/hwdep.c
@@ -25,6 +25,7 @@
 #include <linux/time.h>
 #include <linux/mutex.h>
 #include <linux/module.h>
+#include <linux/sched/signal.h>
 #include <sound/core.h>
 #include <sound/control.h>
 #include <sound/minors.h>
diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c
index 698a014195152a..36baf962f9b081 100644
--- a/sound/core/oss/pcm_oss.c
+++ b/sound/core/oss/pcm_oss.c
@@ -28,6 +28,7 @@
 
 #include <linux/init.h>
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
 #include <linux/time.h>
 #include <linux/vmalloc.h>
 #include <linux/module.h>
diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c
index bb1261591a1f30..5088d4b8db2222 100644
--- a/sound/core/pcm_lib.c
+++ b/sound/core/pcm_lib.c
@@ -21,6 +21,7 @@
  */
 
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
 #include <linux/time.h>
 #include <linux/math64.h>
 #include <linux/export.h>
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index aec9c92250fd72..13dec5ec93f20d 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -23,6 +23,7 @@
 #include <linux/module.h>
 #include <linux/file.h>
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
 #include <linux/time.h>
 #include <linux/pm_qos.h>
 #include <linux/io.h>
diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c
index 8da9cb245d0150..ab890336175fbc 100644
--- a/sound/core/rawmidi.c
+++ b/sound/core/rawmidi.c
@@ -22,7 +22,7 @@
 #include <sound/core.h>
 #include <linux/major.h>
 #include <linux/init.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <linux/time.h>
 #include <linux/wait.h>
diff --git a/sound/core/seq/oss/seq_oss_device.h b/sound/core/seq/oss/seq_oss_device.h
index d7b4d016b54758..afa007c0cc2d45 100644
--- a/sound/core/seq/oss/seq_oss_device.h
+++ b/sound/core/seq/oss/seq_oss_device.h
@@ -24,7 +24,7 @@
 #include <linux/time.h>
 #include <linux/wait.h>
 #include <linux/slab.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <sound/core.h>
 #include <sound/seq_oss.h>
 #include <sound/rawmidi.h>
diff --git a/sound/core/seq/oss/seq_oss_writeq.c b/sound/core/seq/oss/seq_oss_writeq.c
index 1f6788a1844411..5e04f4df10e416 100644
--- a/sound/core/seq/oss/seq_oss_writeq.c
+++ b/sound/core/seq/oss/seq_oss_writeq.c
@@ -28,6 +28,7 @@
 #include "../seq_clientmgr.h"
 #include <linux/wait.h>
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
 
 
 /*
diff --git a/sound/core/seq/seq_fifo.c b/sound/core/seq/seq_fifo.c
index 86240d02b53077..448efd4e980edf 100644
--- a/sound/core/seq/seq_fifo.c
+++ b/sound/core/seq/seq_fifo.c
@@ -21,6 +21,8 @@
 
 #include <sound/core.h>
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
+
 #include "seq_fifo.h"
 #include "seq_lock.h"
 
diff --git a/sound/core/seq/seq_memory.c b/sound/core/seq/seq_memory.c
index dfa5156f358563..1a1acf3ddda4c9 100644
--- a/sound/core/seq/seq_memory.c
+++ b/sound/core/seq/seq_memory.c
@@ -23,6 +23,7 @@
 #include <linux/init.h>
 #include <linux/export.h>
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
 #include <linux/vmalloc.h>
 #include <sound/core.h>
 
diff --git a/sound/core/timer.c b/sound/core/timer.c
index ad153149b23167..6d4fbc43924692 100644
--- a/sound/core/timer.c
+++ b/sound/core/timer.c
@@ -27,6 +27,7 @@
 #include <linux/device.h>
 #include <linux/module.h>
 #include <linux/string.h>
+#include <linux/sched/signal.h>
 #include <sound/core.h>
 #include <sound/timer.h>
 #include <sound/control.h>
diff --git a/sound/firewire/bebob/bebob.h b/sound/firewire/bebob/bebob.h
index 175da875162d83..17678d6ab5a2d9 100644
--- a/sound/firewire/bebob/bebob.h
+++ b/sound/firewire/bebob/bebob.h
@@ -17,6 +17,7 @@
 #include <linux/mod_devicetable.h>
 #include <linux/delay.h>
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
 
 #include <sound/core.h>
 #include <sound/initval.h>
diff --git a/sound/firewire/dice/dice.h b/sound/firewire/dice/dice.h
index e6c07857f4755f..da00e75e09d4ae 100644
--- a/sound/firewire/dice/dice.h
+++ b/sound/firewire/dice/dice.h
@@ -23,6 +23,7 @@
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/wait.h>
+#include <linux/sched/signal.h>
 
 #include <sound/control.h>
 #include <sound/core.h>
diff --git a/sound/firewire/digi00x/digi00x.h b/sound/firewire/digi00x/digi00x.h
index 2cd465c0caae84..9dc761bdacca71 100644
--- a/sound/firewire/digi00x/digi00x.h
+++ b/sound/firewire/digi00x/digi00x.h
@@ -16,6 +16,7 @@
 #include <linux/mod_devicetable.h>
 #include <linux/delay.h>
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
 
 #include <sound/core.h>
 #include <sound/initval.h>
diff --git a/sound/firewire/fireworks/fireworks.h b/sound/firewire/fireworks/fireworks.h
index d73c12b8753da2..9b19c7f05d5791 100644
--- a/sound/firewire/fireworks/fireworks.h
+++ b/sound/firewire/fireworks/fireworks.h
@@ -17,6 +17,7 @@
 #include <linux/mod_devicetable.h>
 #include <linux/delay.h>
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
 
 #include <sound/core.h>
 #include <sound/initval.h>
diff --git a/sound/firewire/oxfw/oxfw.h b/sound/firewire/oxfw/oxfw.h
index 2047dcb2762516..d54d4a9ac4a159 100644
--- a/sound/firewire/oxfw/oxfw.h
+++ b/sound/firewire/oxfw/oxfw.h
@@ -13,6 +13,7 @@
 #include <linux/mutex.h>
 #include <linux/slab.h>
 #include <linux/compat.h>
+#include <linux/sched/signal.h>
 
 #include <sound/control.h>
 #include <sound/core.h>
diff --git a/sound/firewire/tascam/tascam.h b/sound/firewire/tascam/tascam.h
index 1f61011579a7fc..d3cd4065722b33 100644
--- a/sound/firewire/tascam/tascam.h
+++ b/sound/firewire/tascam/tascam.h
@@ -17,6 +17,7 @@
 #include <linux/mutex.h>
 #include <linux/slab.h>
 #include <linux/compat.h>
+#include <linux/sched/signal.h>
 
 #include <sound/core.h>
 #include <sound/initval.h>
diff --git a/sound/isa/gus/gus_pcm.c b/sound/isa/gus/gus_pcm.c
index 25f6788ccef36b..06505999155faf 100644
--- a/sound/isa/gus/gus_pcm.c
+++ b/sound/isa/gus/gus_pcm.c
@@ -27,6 +27,8 @@
 
 #include <asm/dma.h>
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
+
 #include <sound/core.h>
 #include <sound/control.h>
 #include <sound/gus.h>
diff --git a/sound/isa/msnd/msnd.c b/sound/isa/msnd/msnd.c
index 835d4aa26761e5..8109ab3d29d1be 100644
--- a/sound/isa/msnd/msnd.c
+++ b/sound/isa/msnd/msnd.c
@@ -36,6 +36,7 @@
  ********************************************************************/
 
 #include <linux/kernel.h>
+#include <linux/sched/signal.h>
 #include <linux/types.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
diff --git a/sound/isa/sb/emu8000.c b/sound/isa/sb/emu8000.c
index 94c411299e5a0d..ec180708f160ca 100644
--- a/sound/isa/sb/emu8000.c
+++ b/sound/isa/sb/emu8000.c
@@ -21,7 +21,7 @@
  */
 
 #include <linux/wait.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <linux/ioport.h>
 #include <linux/export.h>
diff --git a/sound/isa/sb/emu8000_patch.c b/sound/isa/sb/emu8000_patch.c
index 71d13c0bb7463c..c2e41d2762f70a 100644
--- a/sound/isa/sb/emu8000_patch.c
+++ b/sound/isa/sb/emu8000_patch.c
@@ -20,6 +20,8 @@
  */
 
 #include "emu8000_local.h"
+
+#include <linux/sched/signal.h>
 #include <linux/uaccess.h>
 #include <linux/moduleparam.h>
 
diff --git a/sound/isa/sb/emu8000_pcm.c b/sound/isa/sb/emu8000_pcm.c
index 250fd0006b5360..32f234f494e573 100644
--- a/sound/isa/sb/emu8000_pcm.c
+++ b/sound/isa/sb/emu8000_pcm.c
@@ -19,6 +19,8 @@
  */
 
 #include "emu8000_local.h"
+
+#include <linux/sched/signal.h>
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <sound/initval.h>
diff --git a/sound/isa/wavefront/wavefront_synth.c b/sound/isa/wavefront/wavefront_synth.c
index 718d5e3b7806f0..4dae9ff9ef5afd 100644
--- a/sound/isa/wavefront/wavefront_synth.c
+++ b/sound/isa/wavefront/wavefront_synth.c
@@ -26,6 +26,7 @@
 #include <linux/delay.h>
 #include <linux/time.h>
 #include <linux/wait.h>
+#include <linux/sched/signal.h>
 #include <linux/firmware.h>
 #include <linux/moduleparam.h>
 #include <linux/slab.h>
diff --git a/sound/oss/dmabuf.c b/sound/oss/dmabuf.c
index e3f29132d3acef..c5dd396c66a226 100644
--- a/sound/oss/dmabuf.c
+++ b/sound/oss/dmabuf.c
@@ -27,6 +27,8 @@
 
 #include <linux/mm.h>
 #include <linux/gfp.h>
+#include <linux/sched/signal.h>
+
 #include "sound_config.h"
 #include "sleep.h"
 
diff --git a/sound/oss/dmasound/dmasound_core.c b/sound/oss/dmasound/dmasound_core.c
index 5f248fb41beac9..fb3bbceb1fefdc 100644
--- a/sound/oss/dmasound/dmasound_core.c
+++ b/sound/oss/dmasound/dmasound_core.c
@@ -182,6 +182,7 @@
 #include <linux/soundcard.h>
 #include <linux/poll.h>
 #include <linux/mutex.h>
+#include <linux/sched/signal.h>
 
 #include <linux/uaccess.h>
 
diff --git a/sound/oss/midibuf.c b/sound/oss/midibuf.c
index 8f45cd999965cd..701c7625c9713a 100644
--- a/sound/oss/midibuf.c
+++ b/sound/oss/midibuf.c
@@ -16,6 +16,8 @@
 #include <linux/stddef.h>
 #include <linux/kmod.h>
 #include <linux/spinlock.h>
+#include <linux/sched/signal.h>
+
 #define MIDIBUF_C
 
 #include "sound_config.h"
diff --git a/sound/oss/msnd_pinnacle.c b/sound/oss/msnd_pinnacle.c
index a8bb4a06ba6f83..f34ec01d22394d 100644
--- a/sound/oss/msnd_pinnacle.c
+++ b/sound/oss/msnd_pinnacle.c
@@ -41,6 +41,8 @@
 #include <linux/interrupt.h>
 #include <linux/mutex.h>
 #include <linux/gfp.h>
+#include <linux/sched/signal.h>
+
 #include <asm/irq.h>
 #include <asm/io.h>
 #include "sound_config.h"
diff --git a/sound/oss/sound_config.h b/sound/oss/sound_config.h
index f2554ab78f5e5d..5253b0a704379e 100644
--- a/sound/oss/sound_config.h
+++ b/sound/oss/sound_config.h
@@ -16,6 +16,7 @@
 
 #include <linux/fs.h>
 #include <linux/sound.h>
+#include <linux/sched/signal.h>
 
 #include "os.h"
 #include "soundvers.h"
diff --git a/sound/oss/swarm_cs4297a.c b/sound/oss/swarm_cs4297a.c
index f3af63e58b363f..97899352b15fb0 100644
--- a/sound/oss/swarm_cs4297a.c
+++ b/sound/oss/swarm_cs4297a.c
@@ -64,7 +64,7 @@
 #include <linux/module.h>
 #include <linux/string.h>
 #include <linux/ioport.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/delay.h>
 #include <linux/sound.h>
 #include <linux/slab.h>
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index dd5de09bf36251..ae79f7679cc238 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -32,7 +32,7 @@
 #include <linux/file.h>
 #include <linux/syscore_ops.h>
 #include <linux/cpu.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/sched/mm.h>
 #include <linux/cpumask.h>
 #include <linux/smp.h>

From 5b3cc15aff243cb518cbeed8b1a220cbfd023d9c Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 20:43:54 +0100
Subject: [PATCH 0525/1911] sched/headers: Prepare to move the
 memalloc_noio_*() APIs to <linux/sched/mm.h>

Update the .c files that depend on these APIs.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 drivers/base/power/runtime.c | 2 +-
 drivers/md/dm-bufio.c        | 1 +
 drivers/md/dm-ioctl.c        | 1 +
 drivers/usb/core/hub.c       | 2 +-
 fs/ocfs2/cluster/tcp.c       | 1 +
 fs/xfs/kmem.c                | 1 +
 fs/xfs/xfs_buf.c             | 1 +
 mm/page_alloc.c              | 1 +
 mm/vmscan.c                  | 1 +
 net/ceph/crypto.c            | 1 +
 10 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index a14fac6a01d316..7bcf80fa9adad4 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -7,7 +7,7 @@
  * This file is released under the GPLv2.
  */
 
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/export.h>
 #include <linux/pm_runtime.h>
 #include <linux/pm_wakeirq.h>
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index d36d427a9efbf3..df4859f6ac6ad6 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -11,6 +11,7 @@
 #include <linux/device-mapper.h>
 #include <linux/dm-io.h>
 #include <linux/slab.h>
+#include <linux/sched/mm.h>
 #include <linux/jiffies.h>
 #include <linux/vmalloc.h>
 #include <linux/shrinker.h>
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index a5a9b17f0f7fcc..4da6fc6b1ffd33 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -10,6 +10,7 @@
 #include <linux/module.h>
 #include <linux/vmalloc.h>
 #include <linux/miscdevice.h>
+#include <linux/sched/mm.h>
 #include <linux/init.h>
 #include <linux/wait.h>
 #include <linux/slab.h>
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index a56c75e09786d5..f0dd08198d7426 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -15,7 +15,7 @@
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/completion.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/list.h>
 #include <linux/slab.h>
 #include <linux/ioctl.h>
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index ec000575e86343..4348027384f5ed 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -54,6 +54,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/sched/mm.h>
 #include <linux/jiffies.h>
 #include <linux/slab.h>
 #include <linux/idr.h>
diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c
index 339c696bbc0186..2dfdc62f795e63 100644
--- a/fs/xfs/kmem.c
+++ b/fs/xfs/kmem.c
@@ -16,6 +16,7 @@
  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #include <linux/mm.h>
+#include <linux/sched/mm.h>
 #include <linux/highmem.h>
 #include <linux/slab.h>
 #include <linux/swap.h>
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 8c7d01b759221b..b6208728ba3976 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -33,6 +33,7 @@
 #include <linux/migrate.h>
 #include <linux/backing-dev.h>
 #include <linux/freezer.h>
+#include <linux/sched/mm.h>
 
 #include "xfs_format.h"
 #include "xfs_log_format.h"
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index a7a6aac95a6d15..eaa64d2ffdc553 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -61,6 +61,7 @@
 #include <linux/migrate.h>
 #include <linux/hugetlb.h>
 #include <linux/sched/rt.h>
+#include <linux/sched/mm.h>
 #include <linux/page_owner.h>
 #include <linux/kthread.h>
 #include <linux/memcontrol.h>
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 70aa739c6b68be..bc8031ef994d57 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -14,6 +14,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/mm.h>
+#include <linux/sched/mm.h>
 #include <linux/module.h>
 #include <linux/gfp.h>
 #include <linux/kernel_stat.h>
diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c
index 85747b7f91a918..46008d5ac504cd 100644
--- a/net/ceph/crypto.c
+++ b/net/ceph/crypto.c
@@ -8,6 +8,7 @@
 #include <crypto/aes.h>
 #include <crypto/skcipher.h>
 #include <linux/key-type.h>
+#include <linux/sched/mm.h>
 
 #include <keys/ceph-type.h>
 #include <keys/user-type.h>

From fd7712337ff09a248df424c5843c149586a3f017 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 20:56:33 +0100
Subject: [PATCH 0526/1911] sched/headers: Prepare to remove the <linux/gfp.h>
 include from <linux/sched.h>

<linux/topology.h> is still needed - also update other headers
and .c files that depend on sched.h including gfp.h (and its
sub-headers) for them.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h    | 1 +
 include/linux/sched/mm.h | 1 +
 lib/percpu_ida.c         | 1 +
 3 files changed, 3 insertions(+)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index e87c97e1a94778..8ae7b3d85658c7 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -56,6 +56,7 @@ struct sched_param {
 #include <linux/llist.h>
 #include <linux/uidgid.h>
 #include <linux/gfp.h>
+#include <linux/topology.h>
 #include <linux/magic.h>
 #include <linux/cgroup-defs.h>
 
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index a7adba1cd0a9b2..1cf7941bb9464c 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -2,5 +2,6 @@
 #define _LINUX_SCHED_MM_H
 
 #include <linux/sched.h>
+#include <linux/gfp.h>
 
 #endif /* _LINUX_SCHED_MM_H */
diff --git a/lib/percpu_ida.c b/lib/percpu_ida.c
index 010410990bc691..6016f1deb1f5f7 100644
--- a/lib/percpu_ida.c
+++ b/lib/percpu_ida.c
@@ -14,6 +14,7 @@
  * General Public License for more details.
  */
 
+#include <linux/mm.h>
 #include <linux/bitmap.h>
 #include <linux/bitops.h>
 #include <linux/bug.h>

From 03441a3482a31462c93509939a388877e3cd9261 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:35 +0100
Subject: [PATCH 0527/1911] sched/headers: Prepare for new header dependencies
 before moving code to <linux/sched/stat.h>

We are going to split <linux/sched/stat.h> out of <linux/sched.h>, which
will have to be picked up from other headers and a couple of .c files.

Create a trivial placeholder <linux/sched/stat.h> file that just
maps to <linux/sched.h> to make this patch obviously correct and
bisectable.

Include the new header in the files that are going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/powerpc/kvm/book3s_hv.c     | 1 +
 arch/s390/appldata/appldata_os.c | 1 +
 crypto/mcryptd.c                 | 1 +
 drivers/cpuidle/governors/menu.c | 1 +
 fs/proc/loadavg.c                | 1 +
 fs/proc/root.c                   | 1 +
 fs/proc/stat.c                   | 1 +
 include/linux/sched/stat.h       | 6 ++++++
 kernel/debug/kdb/kdb_main.c      | 1 +
 kernel/exit.c                    | 1 +
 kernel/fork.c                    | 1 +
 kernel/sched/sched.h             | 1 +
 kernel/sys.c                     | 1 +
 kernel/time/tick-sched.c         | 1 +
 virt/kvm/kvm_main.c              | 1 +
 15 files changed, 20 insertions(+)
 create mode 100644 include/linux/sched/stat.h

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index fc56a9ce785d6e..1ec86d9e2a82a3 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -23,6 +23,7 @@
 #include <linux/slab.h>
 #include <linux/preempt.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/stat.h>
 #include <linux/delay.h>
 #include <linux/export.h>
 #include <linux/fs.h>
diff --git a/arch/s390/appldata/appldata_os.c b/arch/s390/appldata/appldata_os.c
index 079446619f8903..45b3178200abc1 100644
--- a/arch/s390/appldata/appldata_os.c
+++ b/arch/s390/appldata/appldata_os.c
@@ -18,6 +18,7 @@
 #include <linux/netdevice.h>
 #include <linux/sched.h>
 #include <linux/sched/loadavg.h>
+#include <linux/sched/stat.h>
 #include <asm/appldata.h>
 #include <asm/smp.h>
 
diff --git a/crypto/mcryptd.c b/crypto/mcryptd.c
index c207458d629933..4e64726588524f 100644
--- a/crypto/mcryptd.c
+++ b/crypto/mcryptd.c
@@ -24,6 +24,7 @@
 #include <linux/module.h>
 #include <linux/scatterlist.h>
 #include <linux/sched.h>
+#include <linux/sched/stat.h>
 #include <linux/slab.h>
 #include <linux/hardirq.h>
 
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index fe86060e48f75e..6d8a4026a9036a 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -19,6 +19,7 @@
 #include <linux/tick.h>
 #include <linux/sched.h>
 #include <linux/sched/loadavg.h>
+#include <linux/sched/stat.h>
 #include <linux/math64.h>
 #include <linux/cpu.h>
 
diff --git a/fs/proc/loadavg.c b/fs/proc/loadavg.c
index add5255ce7e027..983fce5c24183d 100644
--- a/fs/proc/loadavg.c
+++ b/fs/proc/loadavg.c
@@ -4,6 +4,7 @@
 #include <linux/proc_fs.h>
 #include <linux/sched.h>
 #include <linux/sched/loadavg.h>
+#include <linux/sched/stat.h>
 #include <linux/seq_file.h>
 #include <linux/seqlock.h>
 #include <linux/time.h>
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 5d5fed20bfffe1..a50ba388255f9c 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -14,6 +14,7 @@
 #include <linux/stat.h>
 #include <linux/init.h>
 #include <linux/sched.h>
+#include <linux/sched/stat.h>
 #include <linux/module.h>
 #include <linux/bitops.h>
 #include <linux/user_namespace.h>
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index e47c3e8c4dfed5..b95556e036bb54 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -5,6 +5,7 @@
 #include <linux/kernel_stat.h>
 #include <linux/proc_fs.h>
 #include <linux/sched.h>
+#include <linux/sched/stat.h>
 #include <linux/seq_file.h>
 #include <linux/slab.h>
 #include <linux/time.h>
diff --git a/include/linux/sched/stat.h b/include/linux/sched/stat.h
new file mode 100644
index 00000000000000..30fe012aecb018
--- /dev/null
+++ b/include/linux/sched/stat.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SCHED_STAT_H
+#define _LINUX_SCHED_STAT_H
+
+#include <linux/sched.h>
+
+#endif /* _LINUX_SCHED_STAT_H */
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 308937c7a68772..bf48a492b4beb1 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -19,6 +19,7 @@
 #include <linux/reboot.h>
 #include <linux/sched.h>
 #include <linux/sched/loadavg.h>
+#include <linux/sched/stat.h>
 #include <linux/sysrq.h>
 #include <linux/smp.h>
 #include <linux/utsname.h>
diff --git a/kernel/exit.c b/kernel/exit.c
index 48649ccbb649d3..5bad7dce1b7b3f 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -8,6 +8,7 @@
 #include <linux/slab.h>
 #include <linux/sched/autogroup.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/stat.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/capability.h>
diff --git a/kernel/fork.c b/kernel/fork.c
index 8fbe8bcd1e200a..8632905f64c5fc 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -17,6 +17,7 @@
 #include <linux/sched/coredump.h>
 #include <linux/sched/user.h>
 #include <linux/sched/numa_balancing.h>
+#include <linux/sched/stat.h>
 #include <linux/rtmutex.h>
 #include <linux/init.h>
 #include <linux/unistd.h>
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 04f376cf7ba993..6c8a1db44dded4 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -9,6 +9,7 @@
 #include <linux/sched/numa_balancing.h>
 #include <linux/sched/mm.h>
 #include <linux/sched/cpufreq.h>
+#include <linux/sched/stat.h>
 #include <linux/u64_stats_sync.h>
 #include <linux/sched/deadline.h>
 #include <linux/kernel_stat.h>
diff --git a/kernel/sys.c b/kernel/sys.c
index e2d743fb7f55f0..8c0392c8be5161 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -51,6 +51,7 @@
 #include <linux/sched.h>
 #include <linux/sched/autogroup.h>
 #include <linux/sched/loadavg.h>
+#include <linux/sched/stat.h>
 #include <linux/sched/mm.h>
 #include <linux/sched/coredump.h>
 #include <linux/rcupdate.h>
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 5e9e123b4321e9..29d79b175141a2 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -21,6 +21,7 @@
 #include <linux/profile.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/clock.h>
+#include <linux/sched/stat.h>
 #include <linux/module.h>
 #include <linux/irq_work.h>
 #include <linux/posix-timers.h>
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index ae79f7679cc238..799499417f5b98 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -34,6 +34,7 @@
 #include <linux/cpu.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/stat.h>
 #include <linux/cpumask.h>
 #include <linux/smp.h>
 #include <linux/anon_inodes.h>

From 370c91355c76cbcaad8ff79b4bb15a7f2ea59433 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:35 +0100
Subject: [PATCH 0528/1911] sched/headers: Prepare for new header dependencies
 before moving code to <linux/sched/nohz.h>

We are going to split <linux/sched/nohz.h> out of <linux/sched.h>, which
will have to be picked up from other headers and a couple of .c files.

Create a trivial placeholder <linux/sched/nohz.h> file that just
maps to <linux/sched.h> to make this patch obviously correct and
bisectable.

Include the new header in the files that are going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/nohz.h | 6 ++++++
 kernel/sched/sched.h       | 1 +
 kernel/time/hrtimer.c      | 1 +
 kernel/time/tick-sched.c   | 1 +
 kernel/time/timer.c        | 1 +
 5 files changed, 10 insertions(+)
 create mode 100644 include/linux/sched/nohz.h

diff --git a/include/linux/sched/nohz.h b/include/linux/sched/nohz.h
new file mode 100644
index 00000000000000..fe6caf0dab1054
--- /dev/null
+++ b/include/linux/sched/nohz.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SCHED_NOHZ_H
+#define _LINUX_SCHED_NOHZ_H
+
+#include <linux/sched.h>
+
+#endif /* _LINUX_SCHED_NOHZ_H */
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 6c8a1db44dded4..5979f47c422c13 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -10,6 +10,7 @@
 #include <linux/sched/mm.h>
 #include <linux/sched/cpufreq.h>
 #include <linux/sched/stat.h>
+#include <linux/sched/nohz.h>
 #include <linux/u64_stats_sync.h>
 #include <linux/sched/deadline.h>
 #include <linux/kernel_stat.h>
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index df512e8c04d48e..aed2207f5b2d0b 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -47,6 +47,7 @@
 #include <linux/sched/sysctl.h>
 #include <linux/sched/rt.h>
 #include <linux/sched/deadline.h>
+#include <linux/sched/nohz.h>
 #include <linux/timer.h>
 #include <linux/freezer.h>
 
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 29d79b175141a2..7fe53be860778b 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -22,6 +22,7 @@
 #include <linux/sched/signal.h>
 #include <linux/sched/clock.h>
 #include <linux/sched/stat.h>
+#include <linux/sched/nohz.h>
 #include <linux/module.h>
 #include <linux/irq_work.h>
 #include <linux/posix-timers.h>
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index f4465d2a25d132..d7999cb062290c 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -40,6 +40,7 @@
 #include <linux/irq_work.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/sysctl.h>
+#include <linux/sched/nohz.h>
 #include <linux/slab.h>
 #include <linux/compat.h>
 

From b17b01533b719e9949e437abf66436a875739b40 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:35 +0100
Subject: [PATCH 0529/1911] sched/headers: Prepare for new header dependencies
 before moving code to <linux/sched/debug.h>

We are going to split <linux/sched/debug.h> out of <linux/sched.h>, which
will have to be picked up from other headers and a couple of .c files.

Create a trivial placeholder <linux/sched/debug.h> file that just
maps to <linux/sched.h> to make this patch obviously correct and
bisectable.

Include the new header in the files that are going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/alpha/kernel/process.c               | 1 +
 arch/alpha/kernel/traps.c                 | 1 +
 arch/arc/kernel/ctx_sw.c                  | 1 +
 arch/arc/kernel/stacktrace.c              | 2 ++
 arch/arc/kernel/troubleshoot.c            | 1 +
 arch/arm/kernel/process.c                 | 1 +
 arch/arm/kernel/stacktrace.c              | 1 +
 arch/arm/kernel/traps.c                   | 1 +
 arch/arm/mm/alignment.c                   | 1 +
 arch/arm/mm/fault.c                       | 1 +
 arch/arm/probes/kprobes/core.c            | 1 +
 arch/arm64/kernel/probes/kprobes.c        | 1 +
 arch/arm64/kernel/process.c               | 1 +
 arch/arm64/kernel/stacktrace.c            | 1 +
 arch/arm64/kernel/traps.c                 | 1 +
 arch/arm64/mm/fault.c                     | 1 +
 arch/avr32/kernel/nmi_debug.c             | 1 +
 arch/avr32/kernel/process.c               | 1 +
 arch/blackfin/kernel/dumpstack.c          | 2 ++
 arch/blackfin/kernel/early_printk.c       | 1 +
 arch/blackfin/kernel/nmi.c                | 1 +
 arch/blackfin/kernel/process.c            | 1 +
 arch/blackfin/kernel/trace.c              | 1 +
 arch/blackfin/kernel/traps.c              | 1 +
 arch/blackfin/mach-common/ints-priority.c | 1 +
 arch/blackfin/mm/isram-driver.c           | 1 +
 arch/c6x/kernel/traps.c                   | 1 +
 arch/cris/arch-v10/kernel/process.c       | 1 +
 arch/cris/arch-v10/kernel/traps.c         | 2 ++
 arch/cris/arch-v32/kernel/process.c       | 1 +
 arch/cris/arch-v32/kernel/traps.c         | 2 ++
 arch/cris/kernel/irq.c                    | 1 +
 arch/cris/kernel/stacktrace.c             | 2 +-
 arch/cris/kernel/traps.c                  | 1 +
 arch/frv/kernel/process.c                 | 1 +
 arch/frv/kernel/traps.c                   | 1 +
 arch/h8300/kernel/process.c               | 1 +
 arch/h8300/kernel/traps.c                 | 1 +
 arch/hexagon/kernel/process.c             | 1 +
 arch/hexagon/kernel/traps.c               | 1 +
 arch/hexagon/kernel/vm_events.c           | 1 +
 arch/ia64/hp/sim/simserial.c              | 1 +
 arch/ia64/kernel/mca.c                    | 1 +
 arch/ia64/kernel/process.c                | 1 +
 arch/ia64/kernel/traps.c                  | 1 +
 arch/m32r/kernel/process.c                | 1 +
 arch/m32r/kernel/traps.c                  | 1 +
 arch/m68k/kernel/process.c                | 1 +
 arch/m68k/kernel/traps.c                  | 1 +
 arch/m68k/mac/macints.c                   | 1 +
 arch/metag/kernel/process.c               | 1 +
 arch/metag/kernel/stacktrace.c            | 1 +
 arch/metag/kernel/traps.c                 | 1 +
 arch/metag/mm/fault.c                     | 1 +
 arch/microblaze/kernel/exceptions.c       | 1 +
 arch/microblaze/kernel/process.c          | 1 +
 arch/microblaze/kernel/traps.c            | 1 +
 arch/mips/kernel/process.c                | 1 +
 arch/mips/kernel/stacktrace.c             | 1 +
 arch/mips/kernel/traps.c                  | 1 +
 arch/mips/sgi-ip22/ip28-berr.c            | 1 +
 arch/mips/sgi-ip27/ip27-berr.c            | 1 +
 arch/mips/sgi-ip32/ip32-berr.c            | 1 +
 arch/mips/sgi-ip32/ip32-irq.c             | 1 +
 arch/mn10300/kernel/process.c             | 1 +
 arch/mn10300/kernel/traps.c               | 1 +
 arch/nios2/kernel/process.c               | 1 +
 arch/nios2/kernel/traps.c                 | 1 +
 arch/nios2/mm/fault.c                     | 1 +
 arch/openrisc/kernel/process.c            | 1 +
 arch/openrisc/kernel/traps.c              | 1 +
 arch/parisc/kernel/pa7300lc.c             | 1 +
 arch/parisc/kernel/process.c              | 1 +
 arch/parisc/kernel/signal.c               | 1 +
 arch/parisc/kernel/traps.c                | 1 +
 arch/parisc/kernel/unaligned.c            | 1 +
 arch/parisc/mm/fault.c                    | 1 +
 arch/powerpc/kernel/process.c             | 1 +
 arch/powerpc/kernel/stacktrace.c          | 1 +
 arch/powerpc/kernel/traps.c               | 1 +
 arch/s390/kernel/dumpstack.c              | 1 +
 arch/s390/kernel/process.c                | 1 +
 arch/s390/kernel/stacktrace.c             | 1 +
 arch/s390/kernel/traps.c                  | 1 +
 arch/s390/mm/fault.c                      | 1 +
 arch/score/kernel/traps.c                 | 1 +
 arch/sh/kernel/dumpstack.c                | 1 +
 arch/sh/kernel/nmi_debug.c                | 1 +
 arch/sh/kernel/process_32.c               | 1 +
 arch/sh/kernel/process_64.c               | 1 +
 arch/sh/kernel/stacktrace.c               | 1 +
 arch/sh/kernel/traps.c                    | 1 +
 arch/sh/kernel/traps_64.c                 | 1 +
 arch/sparc/kernel/process_32.c            | 1 +
 arch/sparc/kernel/process_64.c            | 1 +
 arch/sparc/kernel/stacktrace.c            | 1 +
 arch/sparc/kernel/sun4m_irq.c             | 1 +
 arch/sparc/kernel/sys_sparc_32.c          | 1 +
 arch/sparc/kernel/sys_sparc_64.c          | 1 +
 arch/sparc/kernel/traps_32.c              | 1 +
 arch/sparc/kernel/traps_64.c              | 1 +
 arch/sparc/mm/fault_64.c                  | 1 +
 arch/tile/include/asm/stack.h             | 2 ++
 arch/tile/kernel/process.c                | 1 +
 arch/tile/kernel/signal.c                 | 1 +
 arch/tile/kernel/stack.c                  | 1 +
 arch/tile/kernel/traps.c                  | 1 +
 arch/tile/kernel/unaligned.c              | 1 +
 arch/tile/mm/fault.c                      | 1 +
 arch/um/drivers/mconsole_kern.c           | 1 +
 arch/um/kernel/process.c                  | 1 +
 arch/um/kernel/sysrq.c                    | 2 ++
 arch/um/kernel/trap.c                     | 1 +
 arch/unicore32/kernel/process.c           | 1 +
 arch/unicore32/kernel/stacktrace.c        | 1 +
 arch/unicore32/kernel/traps.c             | 1 +
 arch/unicore32/mm/alignment.c             | 1 +
 arch/x86/kernel/amd_gart_64.c             | 1 +
 arch/x86/kernel/doublefault.c             | 1 +
 arch/x86/kernel/dumpstack.c               | 1 +
 arch/x86/kernel/dumpstack_32.c            | 1 +
 arch/x86/kernel/dumpstack_64.c            | 1 +
 arch/x86/kernel/kprobes/core.c            | 1 +
 arch/x86/kernel/nmi.c                     | 1 +
 arch/x86/kernel/process.c                 | 1 +
 arch/x86/kernel/stacktrace.c              | 1 +
 arch/x86/mm/extable.c                     | 2 ++
 arch/x86/platform/uv/uv_nmi.c             | 1 +
 arch/x86/um/sysrq_32.c                    | 1 +
 arch/x86/um/sysrq_64.c                    | 1 +
 arch/xtensa/kernel/process.c              | 1 +
 arch/xtensa/kernel/traps.c                | 1 +
 drivers/base/power/main.c                 | 1 +
 drivers/tty/sysrq.c                       | 2 ++
 drivers/tty/tty_ldsem.c                   | 1 +
 drivers/tty/vt/keyboard.c                 | 2 ++
 fs/proc/base.c                            | 1 +
 include/linux/sched/debug.h               | 6 ++++++
 kernel/debug/kdb/kdb_bt.c                 | 1 +
 kernel/debug/kdb/kdb_main.c               | 1 +
 kernel/hung_task.c                        | 1 +
 kernel/latencytop.c                       | 1 +
 kernel/locking/mutex.c                    | 1 +
 kernel/locking/rtmutex-debug.c            | 1 +
 kernel/locking/rtmutex.c                  | 1 +
 kernel/locking/rwsem-spinlock.c           | 1 +
 kernel/locking/rwsem-xadd.c               | 1 +
 kernel/locking/rwsem.c                    | 1 +
 kernel/locking/semaphore.c                | 1 +
 kernel/panic.c                            | 1 +
 kernel/power/process.c                    | 1 +
 kernel/printk/printk.c                    | 1 +
 kernel/rcu/tree.c                         | 1 +
 kernel/rcu/tree_plugin.h                  | 1 +
 kernel/rcu/update.c                       | 1 +
 kernel/sched/completion.c                 | 1 +
 kernel/sched/sched.h                      | 2 +-
 kernel/sched/wait.c                       | 1 +
 kernel/signal.c                           | 1 +
 kernel/time/alarmtimer.c                  | 1 +
 kernel/time/hrtimer.c                     | 1 +
 kernel/time/timer.c                       | 1 +
 kernel/watchdog.c                         | 1 +
 kernel/watchdog_hld.c                     | 2 ++
 lib/dump_stack.c                          | 1 +
 lib/nmi_backtrace.c                       | 1 +
 166 files changed, 181 insertions(+), 2 deletions(-)
 create mode 100644 include/linux/sched/debug.h

diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c
index bca963a4aa4880..88f7a974d311ed 100644
--- a/arch/alpha/kernel/process.c
+++ b/arch/alpha/kernel/process.c
@@ -11,6 +11,7 @@
 #include <linux/errno.h>
 #include <linux/module.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c
index 6448ab00043d23..b137390e87e792 100644
--- a/arch/alpha/kernel/traps.c
+++ b/arch/alpha/kernel/traps.c
@@ -11,6 +11,7 @@
 #include <linux/jiffies.h>
 #include <linux/mm.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/debug.h>
 #include <linux/tty.h>
 #include <linux/delay.h>
 #include <linux/extable.h>
diff --git a/arch/arc/kernel/ctx_sw.c b/arch/arc/kernel/ctx_sw.c
index 6f4cb0dab1b945..9e1ae9d41925ff 100644
--- a/arch/arc/kernel/ctx_sw.c
+++ b/arch/arc/kernel/ctx_sw.c
@@ -16,6 +16,7 @@
 
 #include <asm/asm-offsets.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #ifdef CONFIG_ARC_PLAT_EZNPS
 #include <plat/ctop.h>
 #endif
diff --git a/arch/arc/kernel/stacktrace.c b/arch/arc/kernel/stacktrace.c
index b9192a653b7e3a..74315f302971b1 100644
--- a/arch/arc/kernel/stacktrace.c
+++ b/arch/arc/kernel/stacktrace.c
@@ -28,6 +28,8 @@
 #include <linux/export.h>
 #include <linux/stacktrace.h>
 #include <linux/kallsyms.h>
+#include <linux/sched/debug.h>
+
 #include <asm/arcregs.h>
 #include <asm/unwind.h>
 #include <asm/switch_to.h>
diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c
index 0c48907f56a9a9..f9caf79186d42d 100644
--- a/arch/arc/kernel/troubleshoot.c
+++ b/arch/arc/kernel/troubleshoot.c
@@ -14,6 +14,7 @@
 #include <linux/proc_fs.h>
 #include <linux/file.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/debug.h>
 
 #include <asm/arcregs.h>
 #include <asm/irqflags.h>
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 91d2d5b014145d..03d46395d1ff95 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -12,6 +12,7 @@
 
 #include <linux/export.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/stddef.h>
diff --git a/arch/arm/kernel/stacktrace.c b/arch/arm/kernel/stacktrace.c
index 92b72375c4c72a..3a2fa203637a99 100644
--- a/arch/arm/kernel/stacktrace.c
+++ b/arch/arm/kernel/stacktrace.c
@@ -1,5 +1,6 @@
 #include <linux/export.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/stacktrace.h>
 
 #include <asm/stacktrace.h>
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index dc5f1a22b3c9ad..a9dad001b97daa 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -25,6 +25,7 @@
 #include <linux/delay.h>
 #include <linux/init.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/debug.h>
 #include <linux/irq.h>
 
 #include <linux/atomic.h>
diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c
index d7fe2de9cc9e6a..2c96190e018bd6 100644
--- a/arch/arm/mm/alignment.c
+++ b/arch/arm/mm/alignment.c
@@ -14,6 +14,7 @@
 #include <linux/moduleparam.h>
 #include <linux/compiler.h>
 #include <linux/kernel.h>
+#include <linux/sched/debug.h>
 #include <linux/errno.h>
 #include <linux/string.h>
 #include <linux/proc_fs.h>
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index 520c7778d330c4..ff8b0aa2dfde88 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -17,6 +17,7 @@
 #include <linux/uaccess.h>
 #include <linux/page-flags.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/debug.h>
 #include <linux/highmem.h>
 #include <linux/perf_event.h>
 
diff --git a/arch/arm/probes/kprobes/core.c b/arch/arm/probes/kprobes/core.c
index a4ec240ee7ba38..b6dc9d838a9a39 100644
--- a/arch/arm/probes/kprobes/core.c
+++ b/arch/arm/probes/kprobes/core.c
@@ -24,6 +24,7 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/stop_machine.h>
+#include <linux/sched/debug.h>
 #include <linux/stringify.h>
 #include <asm/traps.h>
 #include <asm/opcodes.h>
diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index f0593c92279bf6..2a07aae5b8a264 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -22,6 +22,7 @@
 #include <linux/extable.h>
 #include <linux/slab.h>
 #include <linux/stop_machine.h>
+#include <linux/sched/debug.h>
 #include <linux/stringify.h>
 #include <asm/traps.h>
 #include <asm/ptrace.h>
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 1ad48f93abdd59..40a16cdd98736d 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -24,6 +24,7 @@
 #include <linux/efi.h>
 #include <linux/export.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/stddef.h>
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index 8a552a33c6efa2..7597e42feeea26 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -19,6 +19,7 @@
 #include <linux/export.h>
 #include <linux/ftrace.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/stacktrace.h>
 
 #include <asm/irq.h>
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 5b8779a849a291..5a5d8379183756 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -30,6 +30,7 @@
 #include <linux/delay.h>
 #include <linux/init.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/debug.h>
 #include <linux/syscalls.h>
 
 #include <asm/atomic.h>
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 30dd60ab8a6511..4bf899fb451baf 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -27,6 +27,7 @@
 #include <linux/uaccess.h>
 #include <linux/page-flags.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/debug.h>
 #include <linux/highmem.h>
 #include <linux/perf_event.h>
 #include <linux/preempt.h>
diff --git a/arch/avr32/kernel/nmi_debug.c b/arch/avr32/kernel/nmi_debug.c
index 3414b8566c291f..25823049bb99ea 100644
--- a/arch/avr32/kernel/nmi_debug.c
+++ b/arch/avr32/kernel/nmi_debug.c
@@ -9,6 +9,7 @@
 #include <linux/kdebug.h>
 #include <linux/notifier.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 
 #include <asm/irq.h>
 
diff --git a/arch/avr32/kernel/process.c b/arch/avr32/kernel/process.c
index 68e5b9dac0596b..dac022d88d9d86 100644
--- a/arch/avr32/kernel/process.c
+++ b/arch/avr32/kernel/process.c
@@ -6,6 +6,7 @@
  * published by the Free Software Foundation.
  */
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/module.h>
 #include <linux/kallsyms.h>
 #include <linux/fs.h>
diff --git a/arch/blackfin/kernel/dumpstack.c b/arch/blackfin/kernel/dumpstack.c
index 95ba6d9e9a3d80..3c992c1f8ef282 100644
--- a/arch/blackfin/kernel/dumpstack.c
+++ b/arch/blackfin/kernel/dumpstack.c
@@ -10,6 +10,8 @@
 #include <linux/mm.h>
 #include <linux/uaccess.h>
 #include <linux/module.h>
+#include <linux/sched/debug.h>
+
 #include <asm/trace.h>
 
 /*
diff --git a/arch/blackfin/kernel/early_printk.c b/arch/blackfin/kernel/early_printk.c
index 61fbd2de993dd9..4b89af9243d392 100644
--- a/arch/blackfin/kernel/early_printk.c
+++ b/arch/blackfin/kernel/early_printk.c
@@ -8,6 +8,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/sched/debug.h>
 #include <linux/init.h>
 #include <linux/serial_core.h>
 #include <linux/console.h>
diff --git a/arch/blackfin/kernel/nmi.c b/arch/blackfin/kernel/nmi.c
index 9919d29287dce9..633c37083e877f 100644
--- a/arch/blackfin/kernel/nmi.c
+++ b/arch/blackfin/kernel/nmi.c
@@ -17,6 +17,7 @@
 #include <linux/nmi.h>
 #include <linux/smp.h>
 #include <linux/timer.h>
+#include <linux/sched/debug.h>
 #include <asm/blackfin.h>
 #include <linux/atomic.h>
 #include <asm/cacheflush.h>
diff --git a/arch/blackfin/kernel/process.c b/arch/blackfin/kernel/process.c
index 4aa5545c4fde14..e95d094c20a648 100644
--- a/arch/blackfin/kernel/process.c
+++ b/arch/blackfin/kernel/process.c
@@ -12,6 +12,7 @@
 #include <linux/uaccess.h>
 #include <linux/slab.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/tick.h>
 #include <linux/fs.h>
 #include <linux/err.h>
diff --git a/arch/blackfin/kernel/trace.c b/arch/blackfin/kernel/trace.c
index 01e546ad2f7acd..23c05b8effb333 100644
--- a/arch/blackfin/kernel/trace.c
+++ b/arch/blackfin/kernel/trace.c
@@ -12,6 +12,7 @@
 #include <linux/mm.h>
 #include <linux/oom.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/debug.h>
 #include <linux/uaccess.h>
 #include <linux/module.h>
 #include <linux/kallsyms.h>
diff --git a/arch/blackfin/kernel/traps.c b/arch/blackfin/kernel/traps.c
index 32676d7721b171..a323a40a46e917 100644
--- a/arch/blackfin/kernel/traps.c
+++ b/arch/blackfin/kernel/traps.c
@@ -10,6 +10,7 @@
 #include <linux/uaccess.h>
 #include <linux/module.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/debug.h>
 #include <asm/traps.h>
 #include <asm/cplb.h>
 #include <asm/blackfin.h>
diff --git a/arch/blackfin/mach-common/ints-priority.c b/arch/blackfin/mach-common/ints-priority.c
index 4986b4fbcee982..13e94bf9d8ba5f 100644
--- a/arch/blackfin/mach-common/ints-priority.c
+++ b/arch/blackfin/mach-common/ints-priority.c
@@ -16,6 +16,7 @@
 #include <linux/seq_file.h>
 #include <linux/irq.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/syscore_ops.h>
 #include <linux/gpio.h>
 #include <asm/delay.h>
diff --git a/arch/blackfin/mm/isram-driver.c b/arch/blackfin/mm/isram-driver.c
index 7e2e674ed4440a..aaa1e64b753b9b 100644
--- a/arch/blackfin/mm/isram-driver.c
+++ b/arch/blackfin/mm/isram-driver.c
@@ -14,6 +14,7 @@
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 
 #include <asm/blackfin.h>
 #include <asm/dma.h>
diff --git a/arch/c6x/kernel/traps.c b/arch/c6x/kernel/traps.c
index dcc2c2f6d67c8d..09b8a40d56807b 100644
--- a/arch/c6x/kernel/traps.c
+++ b/arch/c6x/kernel/traps.c
@@ -10,6 +10,7 @@
  */
 #include <linux/module.h>
 #include <linux/ptrace.h>
+#include <linux/sched/debug.h>
 #include <linux/kallsyms.h>
 #include <linux/bug.h>
 
diff --git a/arch/cris/arch-v10/kernel/process.c b/arch/cris/arch-v10/kernel/process.c
index 96e5afef6b47b8..068a403742004b 100644
--- a/arch/cris/arch-v10/kernel/process.c
+++ b/arch/cris/arch-v10/kernel/process.c
@@ -11,6 +11,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/slab.h>
 #include <linux/err.h>
 #include <linux/fs.h>
diff --git a/arch/cris/arch-v10/kernel/traps.c b/arch/cris/arch-v10/kernel/traps.c
index 96d004fe974053..c0a501f29bd89d 100644
--- a/arch/cris/arch-v10/kernel/traps.c
+++ b/arch/cris/arch-v10/kernel/traps.c
@@ -10,6 +10,8 @@
 
 #include <linux/ptrace.h>
 #include <linux/uaccess.h>
+#include <linux/sched/debug.h>
+
 #include <arch/sv_addr_ag.h>
 #include <arch/system.h>
 
diff --git a/arch/cris/arch-v32/kernel/process.c b/arch/cris/arch-v32/kernel/process.c
index 4d1afa9f9fd367..613c2d71bf10f9 100644
--- a/arch/cris/arch-v32/kernel/process.c
+++ b/arch/cris/arch-v32/kernel/process.c
@@ -9,6 +9,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/slab.h>
 #include <linux/err.h>
 #include <linux/fs.h>
diff --git a/arch/cris/arch-v32/kernel/traps.c b/arch/cris/arch-v32/kernel/traps.c
index ad6174e217c932..a34256515036c4 100644
--- a/arch/cris/arch-v32/kernel/traps.c
+++ b/arch/cris/arch-v32/kernel/traps.c
@@ -5,6 +5,8 @@
 #include <linux/ptrace.h>
 #include <linux/extable.h>
 #include <linux/uaccess.h>
+#include <linux/sched/debug.h>
+
 #include <hwregs/supp_reg.h>
 #include <hwregs/intr_vect_defs.h>
 #include <asm/irq.h>
diff --git a/arch/cris/kernel/irq.c b/arch/cris/kernel/irq.c
index 694850e8f077af..09b864f46f8a76 100644
--- a/arch/cris/kernel/irq.c
+++ b/arch/cris/kernel/irq.c
@@ -22,6 +22,7 @@
 #include <linux/module.h>
 #include <linux/ptrace.h>
 #include <linux/irq.h>
+#include <linux/sched/debug.h>
 
 #include <linux/kernel_stat.h>
 #include <linux/signal.h>
diff --git a/arch/cris/kernel/stacktrace.c b/arch/cris/kernel/stacktrace.c
index 99838c74456dd5..f1cc3aaacd8d7b 100644
--- a/arch/cris/kernel/stacktrace.c
+++ b/arch/cris/kernel/stacktrace.c
@@ -1,5 +1,5 @@
 #include <linux/sched.h>
-#include <linux/stacktrace.h>
+#include <linux/sched/debug.h>
 #include <linux/stacktrace.h>
 #include <asm/stacktrace.h>
 
diff --git a/arch/cris/kernel/traps.c b/arch/cris/kernel/traps.c
index b2a312a7afc6b8..a01636a12a6e87 100644
--- a/arch/cris/kernel/traps.c
+++ b/arch/cris/kernel/traps.c
@@ -15,6 +15,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/utsname.h>
+#include <linux/sched/debug.h>
 #ifdef CONFIG_KALLSYMS
 #include <linux/kallsyms.h>
 #endif
diff --git a/arch/frv/kernel/process.c b/arch/frv/kernel/process.c
index b306241c4ef22f..cf4c34c09ab332 100644
--- a/arch/frv/kernel/process.c
+++ b/arch/frv/kernel/process.c
@@ -13,6 +13,7 @@
 #include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
diff --git a/arch/frv/kernel/traps.c b/arch/frv/kernel/traps.c
index 43c134d6081ce5..ce29991e4219dd 100644
--- a/arch/frv/kernel/traps.c
+++ b/arch/frv/kernel/traps.c
@@ -10,6 +10,7 @@
  */
 
 #include <linux/sched/signal.h>
+#include <linux/sched/debug.h>
 #include <linux/signal.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c
index 891974a1170440..d7cabf373fe3d6 100644
--- a/arch/h8300/kernel/process.c
+++ b/arch/h8300/kernel/process.c
@@ -25,6 +25,7 @@
 #include <linux/errno.h>
 #include <linux/module.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
diff --git a/arch/h8300/kernel/traps.c b/arch/h8300/kernel/traps.c
index 044a3612584615..ee7e3ce40d7857 100644
--- a/arch/h8300/kernel/traps.c
+++ b/arch/h8300/kernel/traps.c
@@ -16,6 +16,7 @@
 
 #include <linux/types.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/init.h>
diff --git a/arch/hexagon/kernel/process.c b/arch/hexagon/kernel/process.c
index d9edfd3fc52af9..3bdd9592d025f8 100644
--- a/arch/hexagon/kernel/process.c
+++ b/arch/hexagon/kernel/process.c
@@ -19,6 +19,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/types.h>
 #include <linux/module.h>
 #include <linux/tick.h>
diff --git a/arch/hexagon/kernel/traps.c b/arch/hexagon/kernel/traps.c
index 4496bcf605efbd..b55f13c70b3419 100644
--- a/arch/hexagon/kernel/traps.c
+++ b/arch/hexagon/kernel/traps.c
@@ -20,6 +20,7 @@
 
 #include <linux/init.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/debug.h>
 #include <linux/module.h>
 #include <linux/kallsyms.h>
 #include <linux/kdebug.h>
diff --git a/arch/hexagon/kernel/vm_events.c b/arch/hexagon/kernel/vm_events.c
index 741aaa917cda5c..04f57ef2200929 100644
--- a/arch/hexagon/kernel/vm_events.c
+++ b/arch/hexagon/kernel/vm_events.c
@@ -19,6 +19,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/sched/debug.h>
 #include <asm/registers.h>
 #include <linux/irq.h>
 #include <linux/hardirq.h>
diff --git a/arch/ia64/hp/sim/simserial.c b/arch/ia64/hp/sim/simserial.c
index 21fd50def2708b..de8cba12101315 100644
--- a/arch/ia64/hp/sim/simserial.c
+++ b/arch/ia64/hp/sim/simserial.c
@@ -14,6 +14,7 @@
 #include <linux/init.h>
 #include <linux/errno.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/tty.h>
 #include <linux/tty_flip.h>
 #include <linux/major.h>
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index 5ac51069e45342..f9fe3ac64242a1 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -73,6 +73,7 @@
 #include <linux/types.h>
 #include <linux/init.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/debug.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/bootmem.h>
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index 52deab683ba137..804b251ee5d146 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -20,6 +20,7 @@
 #include <linux/notifier.h>
 #include <linux/personality.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/stddef.h>
 #include <linux/thread_info.h>
 #include <linux/unistd.h>
diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c
index 48ba46b025e17e..7b1fe9462158e1 100644
--- a/arch/ia64/kernel/traps.c
+++ b/arch/ia64/kernel/traps.c
@@ -10,6 +10,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/debug.h>
 #include <linux/tty.h>
 #include <linux/vt_kern.h>		/* For unblank_screen() */
 #include <linux/export.h>
diff --git a/arch/m32r/kernel/process.c b/arch/m32r/kernel/process.c
index e0568bee60c072..1a227c1fdb1a4f 100644
--- a/arch/m32r/kernel/process.c
+++ b/arch/m32r/kernel/process.c
@@ -22,6 +22,7 @@
 
 #include <linux/fs.h>
 #include <linux/slab.h>
+#include <linux/sched/debug.h>
 #include <linux/module.h>
 #include <linux/ptrace.h>
 #include <linux/unistd.h>
diff --git a/arch/m32r/kernel/traps.c b/arch/m32r/kernel/traps.c
index c3c5fdfae920d5..7c05bb3935978c 100644
--- a/arch/m32r/kernel/traps.c
+++ b/arch/m32r/kernel/traps.c
@@ -14,6 +14,7 @@
 #include <linux/kallsyms.h>
 #include <linux/stddef.h>
 #include <linux/ptrace.h>
+#include <linux/sched/debug.h>
 #include <linux/mm.h>
 #include <asm/page.h>
 #include <asm/processor.h>
diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c
index f0a8e9b332cda7..b4a4675f41190b 100644
--- a/arch/m68k/kernel/process.c
+++ b/arch/m68k/kernel/process.c
@@ -13,6 +13,7 @@
 #include <linux/errno.h>
 #include <linux/module.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
diff --git a/arch/m68k/kernel/traps.c b/arch/m68k/kernel/traps.c
index 558f3840273783..a926d2c88898c2 100644
--- a/arch/m68k/kernel/traps.c
+++ b/arch/m68k/kernel/traps.c
@@ -19,6 +19,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/signal.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
diff --git a/arch/m68k/mac/macints.c b/arch/m68k/mac/macints.c
index b5cd06df71fd74..9637dee90dac0e 100644
--- a/arch/m68k/mac/macints.c
+++ b/arch/m68k/mac/macints.c
@@ -110,6 +110,7 @@
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/delay.h>
diff --git a/arch/metag/kernel/process.c b/arch/metag/kernel/process.c
index 35062796edf222..2e611bd3751585 100644
--- a/arch/metag/kernel/process.c
+++ b/arch/metag/kernel/process.c
@@ -8,6 +8,7 @@
 #include <linux/errno.h>
 #include <linux/export.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/unistd.h>
diff --git a/arch/metag/kernel/stacktrace.c b/arch/metag/kernel/stacktrace.c
index 5510361d5beac8..4f806d66a58c6b 100644
--- a/arch/metag/kernel/stacktrace.c
+++ b/arch/metag/kernel/stacktrace.c
@@ -1,5 +1,6 @@
 #include <linux/export.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/stacktrace.h>
 
 #include <asm/stacktrace.h>
diff --git a/arch/metag/kernel/traps.c b/arch/metag/kernel/traps.c
index 17b2e2e38d5a0c..5ce67f9124aa5d 100644
--- a/arch/metag/kernel/traps.c
+++ b/arch/metag/kernel/traps.c
@@ -10,6 +10,7 @@
 
 #include <linux/export.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/signal.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
diff --git a/arch/metag/mm/fault.c b/arch/metag/mm/fault.c
index c765b3621b9b9f..5055477486b6f1 100644
--- a/arch/metag/mm/fault.c
+++ b/arch/metag/mm/fault.c
@@ -8,6 +8,7 @@
 #include <linux/mm.h>
 #include <linux/kernel.h>
 #include <linux/ptrace.h>
+#include <linux/sched/debug.h>
 #include <linux/interrupt.h>
 #include <linux/uaccess.h>
 
diff --git a/arch/microblaze/kernel/exceptions.c b/arch/microblaze/kernel/exceptions.c
index 42dd12a62ff567..e6f338d0496bbd 100644
--- a/arch/microblaze/kernel/exceptions.c
+++ b/arch/microblaze/kernel/exceptions.c
@@ -17,6 +17,7 @@
 #include <linux/kernel.h>
 #include <linux/signal.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/kallsyms.h>
 
 #include <asm/exceptions.h>
diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c
index b2dd37196b3b16..8c5fdb2e285027 100644
--- a/arch/microblaze/kernel/process.c
+++ b/arch/microblaze/kernel/process.c
@@ -11,6 +11,7 @@
 #include <linux/cpu.h>
 #include <linux/export.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/pm.h>
 #include <linux/tick.h>
 #include <linux/bitops.h>
diff --git a/arch/microblaze/kernel/traps.c b/arch/microblaze/kernel/traps.c
index cb619533a19270..45bbba9d919f91 100644
--- a/arch/microblaze/kernel/traps.c
+++ b/arch/microblaze/kernel/traps.c
@@ -12,6 +12,7 @@
 #include <linux/kernel.h>
 #include <linux/kallsyms.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/debug_locks.h>
 
 #include <asm/exceptions.h>
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index 803e255b6fc376..97574cdb532d53 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -11,6 +11,7 @@
  */
 #include <linux/errno.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/tick.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
diff --git a/arch/mips/kernel/stacktrace.c b/arch/mips/kernel/stacktrace.c
index 506021f62549d9..986f910961d9dc 100644
--- a/arch/mips/kernel/stacktrace.c
+++ b/arch/mips/kernel/stacktrace.c
@@ -4,6 +4,7 @@
  *  Copyright (C) 2006 Atsushi Nemoto <anemo@mba.ocn.ne.jp>
  */
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/stacktrace.h>
 #include <linux/export.h>
 #include <asm/stacktrace.h>
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 49c6df20672a9d..1a9366a157cb22 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -24,6 +24,7 @@
 #include <linux/extable.h>
 #include <linux/mm.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/smp.h>
 #include <linux/spinlock.h>
 #include <linux/kallsyms.h>
diff --git a/arch/mips/sgi-ip22/ip28-berr.c b/arch/mips/sgi-ip22/ip28-berr.c
index 9960a8302eac59..1f2a5bc4779e6a 100644
--- a/arch/mips/sgi-ip22/ip28-berr.c
+++ b/arch/mips/sgi-ip22/ip28-berr.c
@@ -8,6 +8,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/seq_file.h>
 
 #include <asm/addrspace.h>
diff --git a/arch/mips/sgi-ip27/ip27-berr.c b/arch/mips/sgi-ip27/ip27-berr.c
index f8919b6a24c884..d12879eb2b1fa9 100644
--- a/arch/mips/sgi-ip27/ip27-berr.c
+++ b/arch/mips/sgi-ip27/ip27-berr.c
@@ -11,6 +11,7 @@
 #include <linux/kernel.h>
 #include <linux/signal.h>	/* for SIGBUS */
 #include <linux/sched.h>	/* schow_regs(), force_sig() */
+#include <linux/sched/debug.h>
 
 #include <asm/sn/addrs.h>
 #include <asm/sn/arch.h>
diff --git a/arch/mips/sgi-ip32/ip32-berr.c b/arch/mips/sgi-ip32/ip32-berr.c
index ba8f46d80ab8a5..57d8c7486fe6b7 100644
--- a/arch/mips/sgi-ip32/ip32-berr.c
+++ b/arch/mips/sgi-ip32/ip32-berr.c
@@ -10,6 +10,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <asm/traps.h>
 #include <linux/uaccess.h>
 #include <asm/addrspace.h>
diff --git a/arch/mips/sgi-ip32/ip32-irq.c b/arch/mips/sgi-ip32/ip32-irq.c
index 838d8589a1c092..a6a0ff7f5aed00 100644
--- a/arch/mips/sgi-ip32/ip32-irq.c
+++ b/arch/mips/sgi-ip32/ip32-irq.c
@@ -18,6 +18,7 @@
 #include <linux/mm.h>
 #include <linux/random.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 
 #include <asm/irq_cpu.h>
 #include <asm/mipsregs.h>
diff --git a/arch/mn10300/kernel/process.c b/arch/mn10300/kernel/process.c
index e5def2217f72db..a1e2f8301d9475 100644
--- a/arch/mn10300/kernel/process.c
+++ b/arch/mn10300/kernel/process.c
@@ -11,6 +11,7 @@
 #include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
diff --git a/arch/mn10300/kernel/traps.c b/arch/mn10300/kernel/traps.c
index a7a987c7954f0a..800fd08019698a 100644
--- a/arch/mn10300/kernel/traps.c
+++ b/arch/mn10300/kernel/traps.c
@@ -10,6 +10,7 @@
  * 2 of the Licence, or (at your option) any later version.
  */
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <linux/errno.h>
diff --git a/arch/nios2/kernel/process.c b/arch/nios2/kernel/process.c
index 2f8c74f93e705a..3ad87a6b119b62 100644
--- a/arch/nios2/kernel/process.c
+++ b/arch/nios2/kernel/process.c
@@ -14,6 +14,7 @@
 
 #include <linux/export.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/tick.h>
 #include <linux/uaccess.h>
 
diff --git a/arch/nios2/kernel/traps.c b/arch/nios2/kernel/traps.c
index 72ed30a93c8519..8184e7d6b3857d 100644
--- a/arch/nios2/kernel/traps.c
+++ b/arch/nios2/kernel/traps.c
@@ -11,6 +11,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/kernel.h>
 #include <linux/signal.h>
 #include <linux/export.h>
diff --git a/arch/nios2/mm/fault.c b/arch/nios2/mm/fault.c
index e7a14e1e0d6b6d..b804dd06ea1cec 100644
--- a/arch/nios2/mm/fault.c
+++ b/arch/nios2/mm/fault.c
@@ -13,6 +13,7 @@
 
 #include <linux/signal.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c
index 6e9d1cb519f245..899339dac938b1 100644
--- a/arch/openrisc/kernel/process.c
+++ b/arch/openrisc/kernel/process.c
@@ -22,6 +22,7 @@
 
 #include <linux/errno.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/mm.h>
diff --git a/arch/openrisc/kernel/traps.c b/arch/openrisc/kernel/traps.c
index 7e81ad258bca39..2354ab88d9484e 100644
--- a/arch/openrisc/kernel/traps.c
+++ b/arch/openrisc/kernel/traps.c
@@ -22,6 +22,7 @@
 
 #include <linux/init.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/kernel.h>
 #include <linux/extable.h>
 #include <linux/kmod.h>
diff --git a/arch/parisc/kernel/pa7300lc.c b/arch/parisc/kernel/pa7300lc.c
index 8a89780223aa35..9b245fc6756095 100644
--- a/arch/parisc/kernel/pa7300lc.c
+++ b/arch/parisc/kernel/pa7300lc.c
@@ -5,6 +5,7 @@
  *   Copyright (C) 2000 Philipp Rumpf */
 
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/smp.h>
 #include <linux/kernel.h>
 #include <asm/io.h>
diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index ea6603ee8d2498..8c283caf2b4d3e 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -43,6 +43,7 @@
 #include <linux/personality.h>
 #include <linux/ptrace.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/slab.h>
 #include <linux/stddef.h>
 #include <linux/unistd.h>
diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c
index e58925ac64d105..9e03296641d7be 100644
--- a/arch/parisc/kernel/signal.c
+++ b/arch/parisc/kernel/signal.c
@@ -13,6 +13,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/kernel.h>
diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c
index 378df9207406f2..991654c88eec86 100644
--- a/arch/parisc/kernel/traps.c
+++ b/arch/parisc/kernel/traps.c
@@ -11,6 +11,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <linux/errno.h>
diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c
index a08ab481e5567d..e36f7b75ab07b3 100644
--- a/arch/parisc/kernel/unaligned.c
+++ b/arch/parisc/kernel/unaligned.c
@@ -24,6 +24,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/debug.h>
 #include <linux/signal.h>
 #include <linux/ratelimit.h>
 #include <linux/uaccess.h>
diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c
index 1a0b4f63f0e90f..c3cac4ddfe9cdc 100644
--- a/arch/parisc/mm/fault.c
+++ b/arch/parisc/mm/fault.c
@@ -13,6 +13,7 @@
 #include <linux/mm.h>
 #include <linux/ptrace.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/interrupt.h>
 #include <linux/extable.h>
 #include <linux/uaccess.h>
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 4379a079b3c25f..76f58b87dcb224 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -16,6 +16,7 @@
 
 #include <linux/errno.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c
index 4f24606afc3f5e..66711958493cd3 100644
--- a/arch/powerpc/kernel/stacktrace.c
+++ b/arch/powerpc/kernel/stacktrace.c
@@ -12,6 +12,7 @@
 
 #include <linux/export.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/stacktrace.h>
 #include <asm/ptrace.h>
 #include <asm/processor.h>
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index e6cc56b61d0173..ff365f9de27a1c 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -17,6 +17,7 @@
 
 #include <linux/errno.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/stddef.h>
diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c
index 55d4fe174fd972..72c584d62f593d 100644
--- a/arch/s390/kernel/dumpstack.c
+++ b/arch/s390/kernel/dumpstack.c
@@ -14,6 +14,7 @@
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <asm/processor.h>
 #include <asm/debug.h>
 #include <asm/dis.h>
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 54281660582cb1..ed99ff911b676a 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -11,6 +11,7 @@
 #include <linux/compiler.h>
 #include <linux/cpu.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/elfcore.h>
diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c
index 0085b2d8ed7d3d..e66687dc61446d 100644
--- a/arch/s390/kernel/stacktrace.c
+++ b/arch/s390/kernel/stacktrace.c
@@ -6,6 +6,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/stacktrace.h>
 #include <linux/kallsyms.h>
 #include <linux/export.h>
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index 283ad7840335c1..f787b9d8f54c35 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -17,6 +17,7 @@
 #include <linux/extable.h>
 #include <linux/ptrace.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index bb5560eb2435ec..5845d3028ffca9 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -12,6 +12,7 @@
 #include <linux/perf_event.h>
 #include <linux/signal.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/string.h>
diff --git a/arch/score/kernel/traps.c b/arch/score/kernel/traps.c
index fa624f30f783e7..8a54d320fb41db 100644
--- a/arch/score/kernel/traps.c
+++ b/arch/score/kernel/traps.c
@@ -25,6 +25,7 @@
 
 #include <linux/extable.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/debug.h>
 
 #include <asm/cacheflush.h>
 #include <asm/irq.h>
diff --git a/arch/sh/kernel/dumpstack.c b/arch/sh/kernel/dumpstack.c
index 8dfe645bcc4b85..d00cab2f50f9a9 100644
--- a/arch/sh/kernel/dumpstack.c
+++ b/arch/sh/kernel/dumpstack.c
@@ -11,6 +11,7 @@
 #include <linux/kallsyms.h>
 #include <linux/ftrace.h>
 #include <linux/debug_locks.h>
+#include <linux/sched/debug.h>
 #include <linux/kdebug.h>
 #include <linux/export.h>
 #include <linux/uaccess.h>
diff --git a/arch/sh/kernel/nmi_debug.c b/arch/sh/kernel/nmi_debug.c
index ff0abbd1e6526a..730d928f0d1242 100644
--- a/arch/sh/kernel/nmi_debug.c
+++ b/arch/sh/kernel/nmi_debug.c
@@ -9,6 +9,7 @@
 #include <linux/kdebug.h>
 #include <linux/notifier.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/hardirq.h>
 
 enum nmi_action {
diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c
index 51741850a71540..5098274e0f231b 100644
--- a/arch/sh/kernel/process_32.c
+++ b/arch/sh/kernel/process_32.c
@@ -15,6 +15,7 @@
  */
 #include <linux/module.h>
 #include <linux/mm.h>
+#include <linux/sched/debug.h>
 #include <linux/slab.h>
 #include <linux/elfcore.h>
 #include <linux/kallsyms.h>
diff --git a/arch/sh/kernel/process_64.c b/arch/sh/kernel/process_64.c
index e0b271bffd6a53..ced21a417d9d96 100644
--- a/arch/sh/kernel/process_64.c
+++ b/arch/sh/kernel/process_64.c
@@ -25,6 +25,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/io.h>
+#include <linux/sched/debug.h>
 #include <asm/syscalls.h>
 #include <linux/uaccess.h>
 #include <asm/pgtable.h>
diff --git a/arch/sh/kernel/stacktrace.c b/arch/sh/kernel/stacktrace.c
index bf989e063a0cdb..7a73d2763e1ba3 100644
--- a/arch/sh/kernel/stacktrace.c
+++ b/arch/sh/kernel/stacktrace.c
@@ -10,6 +10,7 @@
  * for more details.
  */
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/stacktrace.h>
 #include <linux/thread_info.h>
 #include <linux/module.h>
diff --git a/arch/sh/kernel/traps.c b/arch/sh/kernel/traps.c
index 3036dee854d1e9..bc5c9f347b9e9f 100644
--- a/arch/sh/kernel/traps.c
+++ b/arch/sh/kernel/traps.c
@@ -4,6 +4,7 @@
 #include <linux/kdebug.h>
 #include <linux/signal.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/uaccess.h>
 #include <linux/hardirq.h>
 #include <linux/kernel.h>
diff --git a/arch/sh/kernel/traps_64.c b/arch/sh/kernel/traps_64.c
index 00835edb6e20f8..014fb08cf133a4 100644
--- a/arch/sh/kernel/traps_64.c
+++ b/arch/sh/kernel/traps_64.c
@@ -10,6 +10,7 @@
  * for more details.
  */
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <linux/errno.h>
diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c
index 48ffc3e7d1dd9a..237a471c8ed550 100644
--- a/arch/sparc/kernel/process_32.c
+++ b/arch/sparc/kernel/process_32.c
@@ -14,6 +14,7 @@
 #include <linux/errno.h>
 #include <linux/module.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/stddef.h>
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index d249ca10b20337..98f6ff6eaa5562 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -14,6 +14,7 @@
 #include <linux/errno.h>
 #include <linux/export.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/fs.h>
diff --git a/arch/sparc/kernel/stacktrace.c b/arch/sparc/kernel/stacktrace.c
index e78386a0029f69..be4c14cccc05d1 100644
--- a/arch/sparc/kernel/stacktrace.c
+++ b/arch/sparc/kernel/stacktrace.c
@@ -1,4 +1,5 @@
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/stacktrace.h>
 #include <linux/thread_info.h>
 #include <linux/ftrace.h>
diff --git a/arch/sparc/kernel/sun4m_irq.c b/arch/sparc/kernel/sun4m_irq.c
index da737c712fa89b..aa84da0b2d30d3 100644
--- a/arch/sparc/kernel/sun4m_irq.c
+++ b/arch/sparc/kernel/sun4m_irq.c
@@ -10,6 +10,7 @@
  */
 
 #include <linux/slab.h>
+#include <linux/sched/debug.h>
 
 #include <asm/timer.h>
 #include <asm/traps.h>
diff --git a/arch/sparc/kernel/sys_sparc_32.c b/arch/sparc/kernel/sys_sparc_32.c
index ff3573059936a9..7aecb239626dde 100644
--- a/arch/sparc/kernel/sys_sparc_32.c
+++ b/arch/sparc/kernel/sys_sparc_32.c
@@ -9,6 +9,7 @@
 #include <linux/types.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/debug.h>
 #include <linux/mm.h>
 #include <linux/fs.h>
 #include <linux/file.h>
diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c
index c521ee2770836a..ef4520efc8130c 100644
--- a/arch/sparc/kernel/sys_sparc_64.c
+++ b/arch/sparc/kernel/sys_sparc_64.c
@@ -9,6 +9,7 @@
 #include <linux/types.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/debug.h>
 #include <linux/fs.h>
 #include <linux/file.h>
 #include <linux/mm.h>
diff --git a/arch/sparc/kernel/traps_32.c b/arch/sparc/kernel/traps_32.c
index ecddac5a4c9628..26740a2f285d76 100644
--- a/arch/sparc/kernel/traps_32.c
+++ b/arch/sparc/kernel/traps_32.c
@@ -10,6 +10,7 @@
  */
 
 #include <linux/sched.h>  /* for jiffies */
+#include <linux/sched/debug.h>
 #include <linux/kernel.h>
 #include <linux/signal.h>
 #include <linux/smp.h>
diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c
index e022d7b0039045..4ff4c35f76b2ac 100644
--- a/arch/sparc/kernel/traps_64.c
+++ b/arch/sparc/kernel/traps_64.c
@@ -10,6 +10,7 @@
 
 #include <linux/extable.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/linkage.h>
 #include <linux/kernel.h>
 #include <linux/signal.h>
diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c
index 643c149a315154..b84c4dd14954f1 100644
--- a/arch/sparc/mm/fault_64.c
+++ b/arch/sparc/mm/fault_64.c
@@ -10,6 +10,7 @@
 #include <linux/string.h>
 #include <linux/types.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/ptrace.h>
 #include <linux/mman.h>
 #include <linux/signal.h>
diff --git a/arch/tile/include/asm/stack.h b/arch/tile/include/asm/stack.h
index c3cb42615a9fa4..3573325e340b64 100644
--- a/arch/tile/include/asm/stack.h
+++ b/arch/tile/include/asm/stack.h
@@ -17,6 +17,8 @@
 
 #include <linux/types.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
+
 #include <asm/backtrace.h>
 #include <asm/page.h>
 #include <hv/hypervisor.h>
diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c
index c84c54a1ac5501..557b4c8435d08d 100644
--- a/arch/tile/kernel/process.c
+++ b/arch/tile/kernel/process.c
@@ -13,6 +13,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/preempt.h>
 #include <linux/module.h>
 #include <linux/fs.h>
diff --git a/arch/tile/kernel/signal.c b/arch/tile/kernel/signal.c
index 87299a6cfec87d..8cc92ae6eb2750 100644
--- a/arch/tile/kernel/signal.c
+++ b/arch/tile/kernel/signal.c
@@ -14,6 +14,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/kernel.h>
diff --git a/arch/tile/kernel/stack.c b/arch/tile/kernel/stack.c
index 22bbbd3ff4a3a4..a1c0787fe9d85c 100644
--- a/arch/tile/kernel/stack.c
+++ b/arch/tile/kernel/stack.c
@@ -13,6 +13,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/kernel.h>
 #include <linux/kprobes.h>
 #include <linux/module.h>
diff --git a/arch/tile/kernel/traps.c b/arch/tile/kernel/traps.c
index 39f427bb0de2db..54804866f238a5 100644
--- a/arch/tile/kernel/traps.c
+++ b/arch/tile/kernel/traps.c
@@ -13,6 +13,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/kernel.h>
 #include <linux/kprobes.h>
 #include <linux/kdebug.h>
diff --git a/arch/tile/kernel/unaligned.c b/arch/tile/kernel/unaligned.c
index f229e979584e03..142acae7831674 100644
--- a/arch/tile/kernel/unaligned.c
+++ b/arch/tile/kernel/unaligned.c
@@ -17,6 +17,7 @@
 #include <linux/smp.h>
 #include <linux/ptrace.h>
 #include <linux/slab.h>
+#include <linux/sched/debug.h>
 #include <linux/thread_info.h>
 #include <linux/uaccess.h>
 #include <linux/mman.h>
diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c
index 709f8e9ba3e967..005c91c2adcabb 100644
--- a/arch/tile/mm/fault.c
+++ b/arch/tile/mm/fault.c
@@ -16,6 +16,7 @@
 
 #include <linux/signal.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/string.h>
diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c
index 8a4c72af3bc0ab..af326fb6510dbf 100644
--- a/arch/um/drivers/mconsole_kern.c
+++ b/arch/um/drivers/mconsole_kern.c
@@ -13,6 +13,7 @@
 #include <linux/module.h>
 #include <linux/notifier.h>
 #include <linux/reboot.h>
+#include <linux/sched/debug.h>
 #include <linux/proc_fs.h>
 #include <linux/slab.h>
 #include <linux/syscalls.h>
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index 078630d6448c07..c6c45ea4021ff0 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -17,6 +17,7 @@
 #include <linux/random.h>
 #include <linux/slab.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/seq_file.h>
 #include <linux/tick.h>
 #include <linux/threads.h>
diff --git a/arch/um/kernel/sysrq.c b/arch/um/kernel/sysrq.c
index aa1b56f5ac6894..34ae555c3e700a 100644
--- a/arch/um/kernel/sysrq.c
+++ b/arch/um/kernel/sysrq.c
@@ -11,6 +11,8 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
+
 #include <asm/sysrq.h>
 #include <asm/stacktrace.h>
 #include <os.h>
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index 9711ae4aaa6ade..59158871b9fcc3 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -8,6 +8,7 @@
 #include <linux/hardirq.h>
 #include <linux/module.h>
 #include <linux/uaccess.h>
+#include <linux/sched/debug.h>
 #include <asm/current.h>
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
diff --git a/arch/unicore32/kernel/process.c b/arch/unicore32/kernel/process.c
index d7c6b676b3a56a..c1a0eec88f1f78 100644
--- a/arch/unicore32/kernel/process.c
+++ b/arch/unicore32/kernel/process.c
@@ -13,6 +13,7 @@
 
 #include <linux/module.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/stddef.h>
diff --git a/arch/unicore32/kernel/stacktrace.c b/arch/unicore32/kernel/stacktrace.c
index b34030bdabe3e0..9976e767d51c2e 100644
--- a/arch/unicore32/kernel/stacktrace.c
+++ b/arch/unicore32/kernel/stacktrace.c
@@ -11,6 +11,7 @@
  */
 #include <linux/module.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/stacktrace.h>
 
 #include <asm/stacktrace.h>
diff --git a/arch/unicore32/kernel/traps.c b/arch/unicore32/kernel/traps.c
index 7f5e06f9a2026d..506e1a2127c6ce 100644
--- a/arch/unicore32/kernel/traps.c
+++ b/arch/unicore32/kernel/traps.c
@@ -15,6 +15,7 @@
 #include <linux/module.h>
 #include <linux/signal.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/debug.h>
 #include <linux/spinlock.h>
 #include <linux/personality.h>
 #include <linux/kallsyms.h>
diff --git a/arch/unicore32/mm/alignment.c b/arch/unicore32/mm/alignment.c
index 24e836023e6cc4..3a7f6faa87940c 100644
--- a/arch/unicore32/mm/alignment.c
+++ b/arch/unicore32/mm/alignment.c
@@ -15,6 +15,7 @@
  */
 #include <linux/compiler.h>
 #include <linux/kernel.h>
+#include <linux/sched/debug.h>
 #include <linux/errno.h>
 #include <linux/string.h>
 #include <linux/init.h>
diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
index 82dfe32faaf41b..df083efe6ee007 100644
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -17,6 +17,7 @@
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/string.h>
 #include <linux/spinlock.h>
 #include <linux/pci.h>
diff --git a/arch/x86/kernel/doublefault.c b/arch/x86/kernel/doublefault.c
index b2f7207ba86c4b..f9c324e08d8554 100644
--- a/arch/x86/kernel/doublefault.c
+++ b/arch/x86/kernel/doublefault.c
@@ -1,5 +1,6 @@
 #include <linux/mm.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/init_task.h>
 #include <linux/fs.h>
 
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 0cfd01d2754cc9..7b9e7e68f316c2 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -10,6 +10,7 @@
 #include <linux/kdebug.h>
 #include <linux/module.h>
 #include <linux/ptrace.h>
+#include <linux/sched/debug.h>
 #include <linux/ftrace.h>
 #include <linux/kexec.h>
 #include <linux/bug.h>
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index bb3b5b9a689921..b0b3a3df7c2080 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -2,6 +2,7 @@
  *  Copyright (C) 1991, 1992  Linus Torvalds
  *  Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
  */
+#include <linux/sched/debug.h>
 #include <linux/kallsyms.h>
 #include <linux/kprobes.h>
 #include <linux/uaccess.h>
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index fac189efcc347e..a8b117e93b4620 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -2,6 +2,7 @@
  *  Copyright (C) 1991, 1992  Linus Torvalds
  *  Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
  */
+#include <linux/sched/debug.h>
 #include <linux/kallsyms.h>
 #include <linux/kprobes.h>
 #include <linux/uaccess.h>
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 520b8dfe164026..6384eb754a5830 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -45,6 +45,7 @@
 #include <linux/slab.h>
 #include <linux/hardirq.h>
 #include <linux/preempt.h>
+#include <linux/sched/debug.h>
 #include <linux/extable.h>
 #include <linux/kdebug.h>
 #include <linux/kallsyms.h>
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index d17b1a940add08..f088ea4c66e72e 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -13,6 +13,7 @@
 #include <linux/spinlock.h>
 #include <linux/kprobes.h>
 #include <linux/kdebug.h>
+#include <linux/sched/debug.h>
 #include <linux/nmi.h>
 #include <linux/debugfs.h>
 #include <linux/delay.h>
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 6395e0cd7dd644..505be5589e52bd 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -8,6 +8,7 @@
 #include <linux/slab.h>
 #include <linux/sched.h>
 #include <linux/sched/idle.h>
+#include <linux/sched/debug.h>
 #include <linux/init.h>
 #include <linux/export.h>
 #include <linux/pm.h>
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 0653788026e288..330cae0025d0b7 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -4,6 +4,7 @@
  *  Copyright (C) 2006-2009 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
  */
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/stacktrace.h>
 #include <linux/export.h>
 #include <linux/uaccess.h>
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 61a7e9ea9aa16d..35ea061010a1a5 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -1,5 +1,7 @@
 #include <linux/extable.h>
 #include <linux/uaccess.h>
+#include <linux/sched/debug.h>
+
 #include <asm/traps.h>
 #include <asm/kdebug.h>
 
diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c
index 9743d0ccfec69a..c34bd8233f7c81 100644
--- a/arch/x86/platform/uv/uv_nmi.c
+++ b/arch/x86/platform/uv/uv_nmi.c
@@ -27,6 +27,7 @@
 #include <linux/moduleparam.h>
 #include <linux/nmi.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/slab.h>
 #include <linux/clocksource.h>
 
diff --git a/arch/x86/um/sysrq_32.c b/arch/x86/um/sysrq_32.c
index 16ee0e450e3e39..f2383484840d3b 100644
--- a/arch/x86/um/sysrq_32.c
+++ b/arch/x86/um/sysrq_32.c
@@ -6,6 +6,7 @@
 #include <linux/kernel.h>
 #include <linux/smp.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/kallsyms.h>
 #include <asm/ptrace.h>
 #include <asm/sysrq.h>
diff --git a/arch/x86/um/sysrq_64.c b/arch/x86/um/sysrq_64.c
index 38b4e4abd0f836..903ad91b624f29 100644
--- a/arch/x86/um/sysrq_64.c
+++ b/arch/x86/um/sysrq_64.c
@@ -7,6 +7,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/utsname.h>
 #include <asm/current.h>
 #include <asm/ptrace.h>
diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c
index 826d25104846f5..afaa3588d869c2 100644
--- a/arch/xtensa/kernel/process.c
+++ b/arch/xtensa/kernel/process.c
@@ -17,6 +17,7 @@
 
 #include <linux/errno.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c
index 84abd66e680dc4..d8bb2e62393e54 100644
--- a/arch/xtensa/kernel/traps.c
+++ b/arch/xtensa/kernel/traps.c
@@ -25,6 +25,7 @@
 
 #include <linux/kernel.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/debug.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/stringify.h>
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 249e0304597f5b..9faee1c893e53c 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -27,6 +27,7 @@
 #include <linux/pm_wakeirq.h>
 #include <linux/interrupt.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/async.h>
 #include <linux/suspend.h>
 #include <trace/events/power.h>
diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
index 65db0aeb3d805c..e5f0c7d0fe8de9 100644
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -16,6 +16,8 @@
 
 #include <linux/sched/signal.h>
 #include <linux/sched/rt.h>
+#include <linux/sched/debug.h>
+#include <linux/sched/debug.h>
 #include <linux/interrupt.h>
 #include <linux/mm.h>
 #include <linux/fs.h>
diff --git a/drivers/tty/tty_ldsem.c b/drivers/tty/tty_ldsem.c
index 9229de43e19d18..4f6b1b10b53784 100644
--- a/drivers/tty/tty_ldsem.c
+++ b/drivers/tty/tty_ldsem.c
@@ -32,6 +32,7 @@
 #include <linux/atomic.h>
 #include <linux/tty.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
diff --git a/drivers/tty/vt/keyboard.c b/drivers/tty/vt/keyboard.c
index 6b3a2c00974dd0..c5f0fc906136b5 100644
--- a/drivers/tty/vt/keyboard.c
+++ b/drivers/tty/vt/keyboard.c
@@ -27,6 +27,8 @@
 #include <linux/consolemap.h>
 #include <linux/module.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/debug.h>
+#include <linux/sched/debug.h>
 #include <linux/tty.h>
 #include <linux/tty_flip.h>
 #include <linux/mm.h>
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 4c5fa704e38c51..5914fed3712def 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -88,6 +88,7 @@
 #include <linux/sched/autogroup.h>
 #include <linux/sched/mm.h>
 #include <linux/sched/coredump.h>
+#include <linux/sched/debug.h>
 #include <linux/flex_array.h>
 #include <linux/posix-timers.h>
 #ifdef CONFIG_HARDWALL
diff --git a/include/linux/sched/debug.h b/include/linux/sched/debug.h
new file mode 100644
index 00000000000000..55bc0cd35fd5c2
--- /dev/null
+++ b/include/linux/sched/debug.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SCHED_DEBUG_H
+#define _LINUX_SCHED_DEBUG_H
+
+#include <linux/sched.h>
+
+#endif /* _LINUX_SCHED_DEBUG_H */
diff --git a/kernel/debug/kdb/kdb_bt.c b/kernel/debug/kdb/kdb_bt.c
index 9b976a42376d19..6ad4a9fcbd6f70 100644
--- a/kernel/debug/kdb/kdb_bt.c
+++ b/kernel/debug/kdb/kdb_bt.c
@@ -13,6 +13,7 @@
 #include <linux/string.h>
 #include <linux/kernel.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/debug.h>
 #include <linux/kdb.h>
 #include <linux/nmi.h>
 #include "kdb_private.h"
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index bf48a492b4beb1..c8146d53ca677a 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -20,6 +20,7 @@
 #include <linux/sched.h>
 #include <linux/sched/loadavg.h>
 #include <linux/sched/stat.h>
+#include <linux/sched/debug.h>
 #include <linux/sysrq.h>
 #include <linux/smp.h>
 #include <linux/utsname.h>
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index 129247e569023c..f0f8e2a8496fea 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -17,6 +17,7 @@
 #include <linux/sysctl.h>
 #include <linux/utsname.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/debug.h>
 
 #include <trace/events/sched.h>
 
diff --git a/kernel/latencytop.c b/kernel/latencytop.c
index b5c30d9f46c508..545839b838d691 100644
--- a/kernel/latencytop.c
+++ b/kernel/latencytop.c
@@ -55,6 +55,7 @@
 #include <linux/latencytop.h>
 #include <linux/export.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/list.h>
 #include <linux/stacktrace.h>
 
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 82ff5726bc1b43..198527a6214920 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -22,6 +22,7 @@
 #include <linux/sched/signal.h>
 #include <linux/sched/rt.h>
 #include <linux/sched/wake_q.h>
+#include <linux/sched/debug.h>
 #include <linux/export.h>
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
diff --git a/kernel/locking/rtmutex-debug.c b/kernel/locking/rtmutex-debug.c
index 62b6cee8ea7f9e..97ee9df32e0f03 100644
--- a/kernel/locking/rtmutex-debug.c
+++ b/kernel/locking/rtmutex-debug.c
@@ -18,6 +18,7 @@
  */
 #include <linux/sched.h>
 #include <linux/sched/rt.h>
+#include <linux/sched/debug.h>
 #include <linux/delay.h>
 #include <linux/export.h>
 #include <linux/spinlock.h>
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 1c8c8707853a96..6edc32ecd9c544 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -16,6 +16,7 @@
 #include <linux/sched/rt.h>
 #include <linux/sched/deadline.h>
 #include <linux/sched/wake_q.h>
+#include <linux/sched/debug.h>
 #include <linux/timer.h>
 
 #include "rtmutex_common.h"
diff --git a/kernel/locking/rwsem-spinlock.c b/kernel/locking/rwsem-spinlock.c
index 91e7bc8e9d5ac6..7bc24d477805d8 100644
--- a/kernel/locking/rwsem-spinlock.c
+++ b/kernel/locking/rwsem-spinlock.c
@@ -7,6 +7,7 @@
  */
 #include <linux/rwsem.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/debug.h>
 #include <linux/export.h>
 
 enum rwsem_waiter_type {
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index 9bd6e768164ebb..34e727f18e4945 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -15,6 +15,7 @@
 #include <linux/sched/signal.h>
 #include <linux/sched/rt.h>
 #include <linux/sched/wake_q.h>
+#include <linux/sched/debug.h>
 #include <linux/osq_lock.h>
 
 #include "rwsem.h"
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index 45ba475d4be344..90a74ccd85a4b9 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -7,6 +7,7 @@
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/export.h>
 #include <linux/rwsem.h>
 #include <linux/atomic.h>
diff --git a/kernel/locking/semaphore.c b/kernel/locking/semaphore.c
index 9512e37637dc70..561acdd399605b 100644
--- a/kernel/locking/semaphore.c
+++ b/kernel/locking/semaphore.c
@@ -29,6 +29,7 @@
 #include <linux/kernel.h>
 #include <linux/export.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/semaphore.h>
 #include <linux/spinlock.h>
 #include <linux/ftrace.h>
diff --git a/kernel/panic.c b/kernel/panic.c
index 3ec16e603e8828..a58932b41700a9 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -9,6 +9,7 @@
  * to indicate a major problem.
  */
 #include <linux/debug_locks.h>
+#include <linux/sched/debug.h>
 #include <linux/interrupt.h>
 #include <linux/kmsg_dump.h>
 #include <linux/kallsyms.h>
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 2fba066e125fa9..85e2915d896166 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -12,6 +12,7 @@
 #include <linux/oom.h>
 #include <linux/suspend.h>
 #include <linux/module.h>
+#include <linux/sched/debug.h>
 #include <linux/syscalls.h>
 #include <linux/freezer.h>
 #include <linux/delay.h>
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 47050eedc206ed..3caaaa04b92e78 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -46,6 +46,7 @@
 #include <linux/ctype.h>
 #include <linux/uio.h>
 #include <linux/sched/clock.h>
+#include <linux/sched/debug.h>
 
 #include <linux/uaccess.h>
 #include <asm/sections.h>
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index e456327a63d68e..50fee7689e7125 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -35,6 +35,7 @@
 #include <linux/rcupdate_wait.h>
 #include <linux/interrupt.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/nmi.h>
 #include <linux/atomic.h>
 #include <linux/bitops.h>
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 9dabb04003be25..0a62a8f1caacfa 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -27,6 +27,7 @@
 #include <linux/delay.h>
 #include <linux/gfp.h>
 #include <linux/oom.h>
+#include <linux/sched/debug.h>
 #include <linux/smpboot.h>
 #include <uapi/linux/sched/types.h>
 #include "../time/tick-internal.h"
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index da128deb10ec50..55c8530316c7ce 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -37,6 +37,7 @@
 #include <linux/smp.h>
 #include <linux/interrupt.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/debug.h>
 #include <linux/atomic.h>
 #include <linux/bitops.h>
 #include <linux/percpu.h>
diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c
index b294a8ee284212..53f9558fa925f3 100644
--- a/kernel/sched/completion.c
+++ b/kernel/sched/completion.c
@@ -12,6 +12,7 @@
  */
 
 #include <linux/sched/signal.h>
+#include <linux/sched/debug.h>
 #include <linux/completion.h>
 
 /**
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 5979f47c422c13..e1e819f731b234 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -11,6 +11,7 @@
 #include <linux/sched/cpufreq.h>
 #include <linux/sched/stat.h>
 #include <linux/sched/nohz.h>
+#include <linux/sched/debug.h>
 #include <linux/u64_stats_sync.h>
 #include <linux/sched/deadline.h>
 #include <linux/kernel_stat.h>
@@ -1830,7 +1831,6 @@ extern void print_rt_stats(struct seq_file *m, int cpu);
 extern void print_dl_stats(struct seq_file *m, int cpu);
 extern void
 print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq);
-
 #ifdef CONFIG_NUMA_BALANCING
 extern void
 show_numa_stats(struct task_struct *p, struct seq_file *m);
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index 1fedfcf6fc9b5c..4d2ea6f2556838 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -6,6 +6,7 @@
 #include <linux/init.h>
 #include <linux/export.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/debug.h>
 #include <linux/mm.h>
 #include <linux/wait.h>
 #include <linux/hash.h>
diff --git a/kernel/signal.c b/kernel/signal.c
index 762fcf64f0c349..15092fd8c7b1ee 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -15,6 +15,7 @@
 #include <linux/init.h>
 #include <linux/sched.h>
 #include <linux/sched/user.h>
+#include <linux/sched/debug.h>
 #include <linux/fs.h>
 #include <linux/tty.h>
 #include <linux/binfmts.h>
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 04939053c823aa..ce3a31e8eb3687 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -20,6 +20,7 @@
 #include <linux/timerqueue.h>
 #include <linux/rtc.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/debug.h>
 #include <linux/alarmtimer.h>
 #include <linux/mutex.h>
 #include <linux/platform_device.h>
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index aed2207f5b2d0b..ec08f527d7ee91 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -48,6 +48,7 @@
 #include <linux/sched/rt.h>
 #include <linux/sched/deadline.h>
 #include <linux/sched/nohz.h>
+#include <linux/sched/debug.h>
 #include <linux/timer.h>
 #include <linux/freezer.h>
 
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index d7999cb062290c..1dc0256bfb6e1f 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -41,6 +41,7 @@
 #include <linux/sched/signal.h>
 #include <linux/sched/sysctl.h>
 #include <linux/sched/nohz.h>
+#include <linux/sched/debug.h>
 #include <linux/slab.h>
 #include <linux/compat.h>
 
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 52718f4512e93b..03e0b69bb5bfd6 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -23,6 +23,7 @@
 #include <linux/tick.h>
 #include <linux/workqueue.h>
 #include <linux/sched/clock.h>
+#include <linux/sched/debug.h>
 
 #include <asm/irq_regs.h>
 #include <linux/kvm_para.h>
diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c
index b5de262a9eb98c..54a427d1f34454 100644
--- a/kernel/watchdog_hld.c
+++ b/kernel/watchdog_hld.c
@@ -13,6 +13,8 @@
 
 #include <linux/nmi.h>
 #include <linux/module.h>
+#include <linux/sched/debug.h>
+
 #include <asm/irq_regs.h>
 #include <linux/perf_event.h>
 
diff --git a/lib/dump_stack.c b/lib/dump_stack.c
index c30d07e99dba4c..625375e7f11f98 100644
--- a/lib/dump_stack.c
+++ b/lib/dump_stack.c
@@ -6,6 +6,7 @@
 #include <linux/kernel.h>
 #include <linux/export.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/smp.h>
 #include <linux/atomic.h>
 
diff --git a/lib/nmi_backtrace.c b/lib/nmi_backtrace.c
index 5f7999eacad5da..4e8a30d1c22ff5 100644
--- a/lib/nmi_backtrace.c
+++ b/lib/nmi_backtrace.c
@@ -17,6 +17,7 @@
 #include <linux/kprobes.h>
 #include <linux/nmi.h>
 #include <linux/cpu.h>
+#include <linux/sched/debug.h>
 
 #ifdef arch_trigger_cpumask_backtrace
 /* For reliability, we're prepared to waste bits here. */

From ef8bd77f332bb0a4e467d7171bbfc6c57aa08a88 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:36 +0100
Subject: [PATCH 0530/1911] sched/headers: Prepare for new header dependencies
 before moving code to <linux/sched/hotplug.h>

We are going to split <linux/sched/hotplug.h> out of <linux/sched.h>, which
will have to be picked up from other headers and a couple of .c files.

Create a trivial placeholder <linux/sched/hotplug.h> file that just
maps to <linux/sched.h> to make this patch obviously correct and
bisectable.

Include the new header in the files that are going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/arm/kernel/smp.c                        | 1 +
 arch/arm64/include/asm/mmu_context.h         | 1 +
 arch/arm64/kernel/smp.c                      | 1 +
 arch/ia64/kernel/process.c                   | 1 +
 arch/metag/kernel/smp.c                      | 1 +
 arch/mips/cavium-octeon/smp.c                | 1 +
 arch/mips/kernel/smp-bmips.c                 | 1 +
 arch/mips/kernel/smp-cps.c                   | 1 +
 arch/mips/loongson64/loongson-3/smp.c        | 1 +
 arch/powerpc/platforms/85xx/smp.c            | 1 +
 arch/powerpc/platforms/powermac/smp.c        | 1 +
 arch/powerpc/platforms/powernv/smp.c         | 1 +
 arch/powerpc/platforms/pseries/hotplug-cpu.c | 1 +
 arch/s390/kernel/smp.c                       | 1 +
 arch/sh/kernel/smp.c                         | 1 +
 arch/sparc/kernel/smp_64.c                   | 1 +
 arch/x86/kernel/smpboot.c                    | 1 +
 arch/xtensa/kernel/smp.c                     | 1 +
 include/linux/sched/hotplug.h                | 6 ++++++
 kernel/cpu.c                                 | 1 +
 kernel/sched/core.c                          | 1 +
 kernel/sched/sched.h                         | 4 +++-
 22 files changed, 29 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/sched/hotplug.h

diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 5a07c5a4b8943c..2083a5370308dd 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -12,6 +12,7 @@
 #include <linux/init.h>
 #include <linux/spinlock.h>
 #include <linux/sched.h>
+#include <linux/sched/hotplug.h>
 #include <linux/interrupt.h>
 #include <linux/cache.h>
 #include <linux/profile.h>
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index 1ef40d82cfd3ca..3c9f7d18a7e693 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -25,6 +25,7 @@
 
 #include <linux/compiler.h>
 #include <linux/sched.h>
+#include <linux/sched/hotplug.h>
 
 #include <asm/cacheflush.h>
 #include <asm/cpufeature.h>
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 827d52d78b67d1..f66e58523b9677 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -22,6 +22,7 @@
 #include <linux/init.h>
 #include <linux/spinlock.h>
 #include <linux/sched.h>
+#include <linux/sched/hotplug.h>
 #include <linux/interrupt.h>
 #include <linux/cache.h>
 #include <linux/profile.h>
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index 804b251ee5d146..2204ae450d6586 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -21,6 +21,7 @@
 #include <linux/personality.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/hotplug.h>
 #include <linux/stddef.h>
 #include <linux/thread_info.h>
 #include <linux/unistd.h>
diff --git a/arch/metag/kernel/smp.c b/arch/metag/kernel/smp.c
index c622293254e4e4..198eda75846b77 100644
--- a/arch/metag/kernel/smp.c
+++ b/arch/metag/kernel/smp.c
@@ -13,6 +13,7 @@
 #include <linux/init.h>
 #include <linux/spinlock.h>
 #include <linux/sched.h>
+#include <linux/sched/hotplug.h>
 #include <linux/interrupt.h>
 #include <linux/cache.h>
 #include <linux/profile.h>
diff --git a/arch/mips/cavium-octeon/smp.c b/arch/mips/cavium-octeon/smp.c
index 4355a4cf4d74b2..4b94b7fbafa360 100644
--- a/arch/mips/cavium-octeon/smp.c
+++ b/arch/mips/cavium-octeon/smp.c
@@ -11,6 +11,7 @@
 #include <linux/interrupt.h>
 #include <linux/kernel_stat.h>
 #include <linux/sched.h>
+#include <linux/sched/hotplug.h>
 #include <linux/init.h>
 #include <linux/export.h>
 
diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c
index 16e37a28f876cc..3daa2cae50b0b9 100644
--- a/arch/mips/kernel/smp-bmips.c
+++ b/arch/mips/kernel/smp-bmips.c
@@ -10,6 +10,7 @@
 
 #include <linux/init.h>
 #include <linux/sched.h>
+#include <linux/sched/hotplug.h>
 #include <linux/mm.h>
 #include <linux/delay.h>
 #include <linux/smp.h>
diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c
index a2544c2394e423..1d3188c23bb884 100644
--- a/arch/mips/kernel/smp-cps.c
+++ b/arch/mips/kernel/smp-cps.c
@@ -12,6 +12,7 @@
 #include <linux/io.h>
 #include <linux/irqchip/mips-gic.h>
 #include <linux/sched.h>
+#include <linux/sched/hotplug.h>
 #include <linux/slab.h>
 #include <linux/smp.h>
 #include <linux/types.h>
diff --git a/arch/mips/loongson64/loongson-3/smp.c b/arch/mips/loongson64/loongson-3/smp.c
index cfcf240cedbe26..66ede5b11355d5 100644
--- a/arch/mips/loongson64/loongson-3/smp.c
+++ b/arch/mips/loongson64/loongson-3/smp.c
@@ -17,6 +17,7 @@
 #include <linux/init.h>
 #include <linux/cpu.h>
 #include <linux/sched.h>
+#include <linux/sched/hotplug.h>
 #include <linux/smp.h>
 #include <linux/cpufreq.h>
 #include <asm/processor.h>
diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c
index a83a6d26090d1d..078097a0b09d47 100644
--- a/arch/powerpc/platforms/85xx/smp.c
+++ b/arch/powerpc/platforms/85xx/smp.c
@@ -12,6 +12,7 @@
 
 #include <linux/stddef.h>
 #include <linux/kernel.h>
+#include <linux/sched/hotplug.h>
 #include <linux/init.h>
 #include <linux/delay.h>
 #include <linux/of.h>
diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c
index c9eb7d6540eaeb..746ca7321b03c0 100644
--- a/arch/powerpc/platforms/powermac/smp.c
+++ b/arch/powerpc/platforms/powermac/smp.c
@@ -23,6 +23,7 @@
  */
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/hotplug.h>
 #include <linux/smp.h>
 #include <linux/interrupt.h>
 #include <linux/kernel_stat.h>
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index e39e6c428af1e4..8b67e1eefb5c0a 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -12,6 +12,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/sched.h>
+#include <linux/sched/hotplug.h>
 #include <linux/smp.h>
 #include <linux/interrupt.h>
 #include <linux/delay.h>
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index a1b63e00b2f7d4..7bc0e91f871544 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -24,6 +24,7 @@
 #include <linux/interrupt.h>
 #include <linux/delay.h>
 #include <linux/sched.h>	/* for idle_task_exit */
+#include <linux/sched/hotplug.h>
 #include <linux/cpu.h>
 #include <linux/of.h>
 #include <linux/slab.h>
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index d0a74d7ce433c9..b5766e03cdcbc6 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -31,6 +31,7 @@
 #include <linux/irqflags.h>
 #include <linux/cpu.h>
 #include <linux/slab.h>
+#include <linux/sched/hotplug.h>
 #include <linux/crash_dump.h>
 #include <linux/memblock.h>
 #include <asm/asm-offsets.h>
diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c
index edc4769b047eee..4abf119c129ccc 100644
--- a/arch/sh/kernel/smp.c
+++ b/arch/sh/kernel/smp.c
@@ -21,6 +21,7 @@
 #include <linux/cpu.h>
 #include <linux/interrupt.h>
 #include <linux/sched.h>
+#include <linux/sched/hotplug.h>
 #include <linux/atomic.h>
 #include <linux/clockchips.h>
 #include <asm/processor.h>
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index 8e3e13924594c2..15052d364e046a 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -6,6 +6,7 @@
 #include <linux/export.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/hotplug.h>
 #include <linux/mm.h>
 #include <linux/pagemap.h>
 #include <linux/threads.h>
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index fe7a66e6b5a0d2..f3eb266d75ff75 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -46,6 +46,7 @@
 #include <linux/export.h>
 #include <linux/sched.h>
 #include <linux/sched/topology.h>
+#include <linux/sched/hotplug.h>
 #include <linux/percpu.h>
 #include <linux/bootmem.h>
 #include <linux/err.h>
diff --git a/arch/xtensa/kernel/smp.c b/arch/xtensa/kernel/smp.c
index fcea72019df798..f2b3e1725349c7 100644
--- a/arch/xtensa/kernel/smp.c
+++ b/arch/xtensa/kernel/smp.c
@@ -21,6 +21,7 @@
 #include <linux/irq.h>
 #include <linux/kdebug.h>
 #include <linux/module.h>
+#include <linux/sched/hotplug.h>
 #include <linux/reboot.h>
 #include <linux/seq_file.h>
 #include <linux/smp.h>
diff --git a/include/linux/sched/hotplug.h b/include/linux/sched/hotplug.h
new file mode 100644
index 00000000000000..34670ed2489437
--- /dev/null
+++ b/include/linux/sched/hotplug.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SCHED_HOTPLUG_H
+#define _LINUX_SCHED_HOTPLUG_H
+
+#include <linux/sched.h>
+
+#endif /* _LINUX_SCHED_HOTPLUG_H */
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 0aae3183b02932..78c206579d0182 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -8,6 +8,7 @@
 #include <linux/init.h>
 #include <linux/notifier.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/hotplug.h>
 #include <linux/unistd.h>
 #include <linux/cpu.h>
 #include <linux/oom.h>
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 11a0684a29a70a..956383844116ab 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -9,6 +9,7 @@
 #include <linux/sched/clock.h>
 #include <uapi/linux/sched/types.h>
 #include <linux/sched/loadavg.h>
+#include <linux/sched/hotplug.h>
 #include <linux/cpuset.h>
 #include <linux/delayacct.h>
 #include <linux/init_task.h>
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index e1e819f731b234..0974eb2ef50dc2 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -3,6 +3,7 @@
 #include <linux/sched/sysctl.h>
 #include <linux/sched/topology.h>
 #include <linux/sched/rt.h>
+#include <linux/sched/deadline.h>
 #include <linux/sched/clock.h>
 #include <linux/sched/wake_q.h>
 #include <linux/sched/signal.h>
@@ -12,8 +13,9 @@
 #include <linux/sched/stat.h>
 #include <linux/sched/nohz.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/hotplug.h>
+
 #include <linux/u64_stats_sync.h>
-#include <linux/sched/deadline.h>
 #include <linux/kernel_stat.h>
 #include <linux/binfmts.h>
 #include <linux/mutex.h>

From 299300258d1bc4e997b7db340a2e06636757fe2e Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:36 +0100
Subject: [PATCH 0531/1911] sched/headers: Prepare for new header dependencies
 before moving code to <linux/sched/task.h>

We are going to split <linux/sched/task.h> out of <linux/sched.h>, which
will have to be picked up from other headers and a couple of .c files.

Create a trivial placeholder <linux/sched/task.h> file that just
maps to <linux/sched.h> to make this patch obviously correct and
bisectable.

Include the new header in the files that are going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/alpha/kernel/process.c              | 1 +
 arch/arc/kernel/process.c                | 2 ++
 arch/arm/kernel/process.c                | 1 +
 arch/arm/mm/init.c                       | 1 +
 arch/arm64/kernel/process.c              | 1 +
 arch/avr32/kernel/process.c              | 1 +
 arch/blackfin/kernel/process.c           | 1 +
 arch/blackfin/kernel/trace.c             | 1 +
 arch/c6x/kernel/process.c                | 1 +
 arch/cris/arch-v10/kernel/process.c      | 1 +
 arch/cris/arch-v32/kernel/process.c      | 1 +
 arch/cris/kernel/process.c               | 1 +
 arch/frv/kernel/process.c                | 1 +
 arch/frv/mm/mmu-context.c                | 1 +
 arch/h8300/kernel/process.c              | 1 +
 arch/hexagon/kernel/process.c            | 1 +
 arch/ia64/kernel/mca.c                   | 1 +
 arch/ia64/kernel/perfmon.c               | 1 +
 arch/ia64/kernel/process.c               | 1 +
 arch/ia64/kernel/ptrace.c                | 1 +
 arch/m32r/kernel/process.c               | 1 +
 arch/m32r/kernel/smpboot.c               | 1 +
 arch/m68k/kernel/process.c               | 1 +
 arch/metag/kernel/process.c              | 1 +
 arch/metag/kernel/traps.c                | 1 +
 arch/microblaze/kernel/process.c         | 1 +
 arch/mips/kernel/mips-mt-fpaff.c         | 1 +
 arch/mips/kernel/process.c               | 1 +
 arch/mn10300/kernel/process.c            | 1 +
 arch/mn10300/kernel/smp.c                | 1 +
 arch/nios2/kernel/process.c              | 1 +
 arch/openrisc/kernel/process.c           | 1 +
 arch/parisc/kernel/process.c             | 1 +
 arch/powerpc/kernel/process.c            | 1 +
 arch/s390/kernel/process.c               | 1 +
 arch/s390/kernel/setup.c                 | 1 +
 arch/score/kernel/process.c              | 1 +
 arch/sh/kernel/cpu/fpu.c                 | 1 +
 arch/sh/kernel/process_32.c              | 1 +
 arch/sh/kernel/process_64.c              | 1 +
 arch/sh/mm/asids-debugfs.c               | 1 +
 arch/sparc/kernel/process_32.c           | 1 +
 arch/sparc/kernel/process_64.c           | 1 +
 arch/tile/kernel/process.c               | 1 +
 arch/tile/kernel/smpboot.c               | 1 +
 arch/tile/kernel/unaligned.c             | 1 +
 arch/tile/mm/fault.c                     | 2 ++
 arch/um/kernel/exec.c                    | 1 +
 arch/um/kernel/process.c                 | 1 +
 arch/um/kernel/reboot.c                  | 1 +
 arch/unicore32/kernel/process.c          | 1 +
 arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 1 +
 arch/x86/kernel/fpu/init.c               | 1 +
 arch/x86/kernel/process.c                | 1 +
 arch/x86/kernel/process_32.c             | 1 +
 arch/x86/kernel/process_64.c             | 1 +
 arch/x86/kernel/unwind_frame.c           | 1 +
 arch/xtensa/kernel/process.c             | 1 +
 drivers/misc/kgdbts.c                    | 2 ++
 drivers/tty/sysrq.c                      | 2 +-
 drivers/tty/tty_io.c                     | 1 +
 fs/exec.c                                | 1 +
 fs/fcntl.c                               | 1 +
 fs/fs_struct.c                           | 1 +
 fs/proc/array.c                          | 1 +
 fs/proc/kcore.c                          | 1 +
 include/linux/sched/task.h               | 6 ++++++
 init/main.c                              | 1 +
 kernel/cgroup/cgroup.c                   | 1 +
 kernel/cpu.c                             | 1 +
 kernel/exit.c                            | 1 +
 kernel/fork.c                            | 1 +
 kernel/kmod.c                            | 1 +
 kernel/kthread.c                         | 1 +
 kernel/locking/lockdep.c                 | 1 +
 kernel/pid.c                             | 1 +
 kernel/pid_namespace.c                   | 1 +
 kernel/power/process.c                   | 1 +
 kernel/ptrace.c                          | 1 +
 kernel/sched/sched.h                     | 1 +
 kernel/signal.c                          | 1 +
 kernel/smpboot.c                         | 1 +
 kernel/sys.c                             | 1 +
 kernel/trace/ftrace.c                    | 1 +
 kernel/tracepoint.c                      | 1 +
 lib/dma-debug.c                          | 1 +
 mm/kmemleak.c                            | 1 +
 mm/memory-failure.c                      | 1 +
 mm/memory.c                              | 1 +
 mm/oom_kill.c                            | 1 +
 mm/rmap.c                                | 1 +
 mm/swapfile.c                            | 1 +
 mm/usercopy.c                            | 2 ++
 security/keys/keyctl.c                   | 1 +
 security/selinux/hooks.c                 | 1 +
 95 files changed, 104 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/sched/task.h

diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c
index 88f7a974d311ed..713b4fac998e70 100644
--- a/arch/alpha/kernel/process.c
+++ b/arch/alpha/kernel/process.c
@@ -12,6 +12,7 @@
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c
index a41a79a4f4feac..d618d26721ab9a 100644
--- a/arch/arc/kernel/process.c
+++ b/arch/arc/kernel/process.c
@@ -11,6 +11,8 @@
 #include <linux/errno.h>
 #include <linux/module.h>
 #include <linux/sched.h>
+#include <linux/sched/task.h>
+
 #include <linux/mm.h>
 #include <linux/fs.h>
 #include <linux/unistd.h>
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 03d46395d1ff95..d4c7c9a1afa9aa 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -13,6 +13,7 @@
 #include <linux/export.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/stddef.h>
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 2a9040dcf47e28..1d8558ff9827ff 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -14,6 +14,7 @@
 #include <linux/bootmem.h>
 #include <linux/mman.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/task.h>
 #include <linux/export.h>
 #include <linux/nodemask.h>
 #include <linux/initrd.h>
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 40a16cdd98736d..f9b8e74ff8f212 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -25,6 +25,7 @@
 #include <linux/export.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/stddef.h>
diff --git a/arch/avr32/kernel/process.c b/arch/avr32/kernel/process.c
index dac022d88d9d86..75b944a5107c8e 100644
--- a/arch/avr32/kernel/process.c
+++ b/arch/avr32/kernel/process.c
@@ -7,6 +7,7 @@
  */
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/module.h>
 #include <linux/kallsyms.h>
 #include <linux/fs.h>
diff --git a/arch/blackfin/kernel/process.c b/arch/blackfin/kernel/process.c
index e95d094c20a648..1a1f444004b1b0 100644
--- a/arch/blackfin/kernel/process.c
+++ b/arch/blackfin/kernel/process.c
@@ -13,6 +13,7 @@
 #include <linux/slab.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/tick.h>
 #include <linux/fs.h>
 #include <linux/err.h>
diff --git a/arch/blackfin/kernel/trace.c b/arch/blackfin/kernel/trace.c
index 23c05b8effb333..151f22196ab6f0 100644
--- a/arch/blackfin/kernel/trace.c
+++ b/arch/blackfin/kernel/trace.c
@@ -13,6 +13,7 @@
 #include <linux/oom.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/uaccess.h>
 #include <linux/module.h>
 #include <linux/kallsyms.h>
diff --git a/arch/c6x/kernel/process.c b/arch/c6x/kernel/process.c
index 0ee7686a78f375..6b61779d426a06 100644
--- a/arch/c6x/kernel/process.c
+++ b/arch/c6x/kernel/process.c
@@ -17,6 +17,7 @@
 #include <linux/mqueue.h>
 #include <linux/syscalls.h>
 #include <linux/reboot.h>
+#include <linux/sched/task.h>
 
 #include <asm/syscalls.h>
 
diff --git a/arch/cris/arch-v10/kernel/process.c b/arch/cris/arch-v10/kernel/process.c
index 068a403742004b..808c2186b70446 100644
--- a/arch/cris/arch-v10/kernel/process.c
+++ b/arch/cris/arch-v10/kernel/process.c
@@ -12,6 +12,7 @@
 
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/slab.h>
 #include <linux/err.h>
 #include <linux/fs.h>
diff --git a/arch/cris/arch-v32/kernel/process.c b/arch/cris/arch-v32/kernel/process.c
index 613c2d71bf10f9..c852df26294824 100644
--- a/arch/cris/arch-v32/kernel/process.c
+++ b/arch/cris/arch-v32/kernel/process.c
@@ -10,6 +10,7 @@
 
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/slab.h>
 #include <linux/err.h>
 #include <linux/fs.h>
diff --git a/arch/cris/kernel/process.c b/arch/cris/kernel/process.c
index 50a7dd451456cc..0bbd3a0c3d7087 100644
--- a/arch/cris/kernel/process.c
+++ b/arch/cris/kernel/process.c
@@ -20,6 +20,7 @@
 #include <linux/spinlock.h>
 #include <linux/init_task.h>
 #include <linux/sched.h>
+#include <linux/sched/task.h>
 #include <linux/fs.h>
 #include <linux/user.h>
 #include <linux/elfcore.h>
diff --git a/arch/frv/kernel/process.c b/arch/frv/kernel/process.c
index cf4c34c09ab332..c96dbd4b86260b 100644
--- a/arch/frv/kernel/process.c
+++ b/arch/frv/kernel/process.c
@@ -14,6 +14,7 @@
 #include <linux/errno.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
diff --git a/arch/frv/mm/mmu-context.c b/arch/frv/mm/mmu-context.c
index 4031289bf2ecc5..16946a58f64db9 100644
--- a/arch/frv/mm/mmu-context.c
+++ b/arch/frv/mm/mmu-context.c
@@ -11,6 +11,7 @@
 
 #include <linux/sched.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/task.h>
 #include <linux/mm.h>
 #include <asm/tlbflush.h>
 
diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c
index d7cabf373fe3d6..54e4d6f01865c6 100644
--- a/arch/h8300/kernel/process.c
+++ b/arch/h8300/kernel/process.c
@@ -26,6 +26,7 @@
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
diff --git a/arch/hexagon/kernel/process.c b/arch/hexagon/kernel/process.c
index 3bdd9592d025f8..a2a822a875b6d8 100644
--- a/arch/hexagon/kernel/process.c
+++ b/arch/hexagon/kernel/process.c
@@ -20,6 +20,7 @@
 
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/types.h>
 #include <linux/module.h>
 #include <linux/tick.h>
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index f9fe3ac64242a1..79c7c46d7dc175 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -74,6 +74,7 @@
 #include <linux/init.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/bootmem.h>
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 677a86826771a4..7e943d3c05ed79 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -22,6 +22,7 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/task.h>
 #include <linux/interrupt.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index 2204ae450d6586..054facf22156fb 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -22,6 +22,7 @@
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/hotplug.h>
+#include <linux/sched/task.h>
 #include <linux/stddef.h>
 #include <linux/thread_info.h>
 #include <linux/unistd.h>
diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c
index 0b1153e610ea3c..04fe1436e1cccb 100644
--- a/arch/ia64/kernel/ptrace.c
+++ b/arch/ia64/kernel/ptrace.c
@@ -11,6 +11,7 @@
  */
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/task.h>
 #include <linux/mm.h>
 #include <linux/errno.h>
 #include <linux/ptrace.h>
diff --git a/arch/m32r/kernel/process.c b/arch/m32r/kernel/process.c
index 1a227c1fdb1a4f..2a450382934c90 100644
--- a/arch/m32r/kernel/process.c
+++ b/arch/m32r/kernel/process.c
@@ -23,6 +23,7 @@
 #include <linux/fs.h>
 #include <linux/slab.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/module.h>
 #include <linux/ptrace.h>
 #include <linux/unistd.h>
diff --git a/arch/m32r/kernel/smpboot.c b/arch/m32r/kernel/smpboot.c
index f98d2f6519d633..a7d04684d2c770 100644
--- a/arch/m32r/kernel/smpboot.c
+++ b/arch/m32r/kernel/smpboot.c
@@ -45,6 +45,7 @@
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/sched.h>
+#include <linux/sched/task.h>
 #include <linux/err.h>
 #include <linux/irq.h>
 #include <linux/bootmem.h>
diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c
index b4a4675f41190b..5d335d178706ae 100644
--- a/arch/m68k/kernel/process.c
+++ b/arch/m68k/kernel/process.c
@@ -14,6 +14,7 @@
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
diff --git a/arch/metag/kernel/process.c b/arch/metag/kernel/process.c
index 2e611bd3751585..801e6f927e62ab 100644
--- a/arch/metag/kernel/process.c
+++ b/arch/metag/kernel/process.c
@@ -9,6 +9,7 @@
 #include <linux/export.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/unistd.h>
diff --git a/arch/metag/kernel/traps.c b/arch/metag/kernel/traps.c
index 5ce67f9124aa5d..23c5ac33a93fd0 100644
--- a/arch/metag/kernel/traps.c
+++ b/arch/metag/kernel/traps.c
@@ -11,6 +11,7 @@
 #include <linux/export.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/signal.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c
index 8c5fdb2e285027..a642fe5ac5ffc5 100644
--- a/arch/microblaze/kernel/process.c
+++ b/arch/microblaze/kernel/process.c
@@ -12,6 +12,7 @@
 #include <linux/export.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/pm.h>
 #include <linux/tick.h>
 #include <linux/bitops.h>
diff --git a/arch/mips/kernel/mips-mt-fpaff.c b/arch/mips/kernel/mips-mt-fpaff.c
index ffc6bd3464d970..8cab633e0e5ade 100644
--- a/arch/mips/kernel/mips-mt-fpaff.c
+++ b/arch/mips/kernel/mips-mt-fpaff.c
@@ -9,6 +9,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/sched.h>
+#include <linux/sched/task.h>
 #include <linux/cred.h>
 #include <linux/security.h>
 #include <linux/types.h>
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index 97574cdb532d53..8bfb833b31588f 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -12,6 +12,7 @@
 #include <linux/errno.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/tick.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
diff --git a/arch/mn10300/kernel/process.c b/arch/mn10300/kernel/process.c
index a1e2f8301d9475..8ca7a9de09a55d 100644
--- a/arch/mn10300/kernel/process.c
+++ b/arch/mn10300/kernel/process.c
@@ -12,6 +12,7 @@
 #include <linux/errno.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
diff --git a/arch/mn10300/kernel/smp.c b/arch/mn10300/kernel/smp.c
index e65b5cc2fa67f1..d924f7a7970829 100644
--- a/arch/mn10300/kernel/smp.c
+++ b/arch/mn10300/kernel/smp.c
@@ -22,6 +22,7 @@
 #include <linux/kernel.h>
 #include <linux/delay.h>
 #include <linux/sched.h>
+#include <linux/sched/task.h>
 #include <linux/profile.h>
 #include <linux/smp.h>
 #include <linux/cpu.h>
diff --git a/arch/nios2/kernel/process.c b/arch/nios2/kernel/process.c
index 3ad87a6b119b62..5ee1ddf6a924c3 100644
--- a/arch/nios2/kernel/process.c
+++ b/arch/nios2/kernel/process.c
@@ -15,6 +15,7 @@
 #include <linux/export.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/tick.h>
 #include <linux/uaccess.h>
 
diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c
index 899339dac938b1..0c5ad3bde0f5c5 100644
--- a/arch/openrisc/kernel/process.c
+++ b/arch/openrisc/kernel/process.c
@@ -23,6 +23,7 @@
 #include <linux/errno.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/mm.h>
diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index 8c283caf2b4d3e..e5f97239cc5ed6 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -44,6 +44,7 @@
 #include <linux/ptrace.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/slab.h>
 #include <linux/stddef.h>
 #include <linux/unistd.h>
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 76f58b87dcb224..b99b12656f6fe7 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -17,6 +17,7 @@
 #include <linux/errno.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index ed99ff911b676a..250c41f0572100 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -12,6 +12,7 @@
 #include <linux/cpu.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/elfcore.h>
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index e4d811f179715a..3de07004c02e72 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -18,6 +18,7 @@
 #include <linux/errno.h>
 #include <linux/export.h>
 #include <linux/sched.h>
+#include <linux/sched/task.h>
 #include <linux/kernel.h>
 #include <linux/memblock.h>
 #include <linux/mm.h>
diff --git a/arch/score/kernel/process.c b/arch/score/kernel/process.c
index aae9480706c2c8..32924159d8c9ed 100644
--- a/arch/score/kernel/process.c
+++ b/arch/score/kernel/process.c
@@ -28,6 +28,7 @@
 #include <linux/elfcore.h>
 #include <linux/pm.h>
 #include <linux/rcupdate.h>
+#include <linux/sched/task.h>
 
 void (*pm_power_off)(void);
 EXPORT_SYMBOL(pm_power_off);
diff --git a/arch/sh/kernel/cpu/fpu.c b/arch/sh/kernel/cpu/fpu.c
index 37e35330aae661..b76370a47bf95a 100644
--- a/arch/sh/kernel/cpu/fpu.c
+++ b/arch/sh/kernel/cpu/fpu.c
@@ -1,4 +1,5 @@
 #include <linux/sched/signal.h>
+#include <linux/sched/task.h>
 #include <linux/slab.h>
 #include <asm/processor.h>
 #include <asm/fpu.h>
diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c
index 5098274e0f231b..493c3eb86aa502 100644
--- a/arch/sh/kernel/process_32.c
+++ b/arch/sh/kernel/process_32.c
@@ -16,6 +16,7 @@
 #include <linux/module.h>
 #include <linux/mm.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/slab.h>
 #include <linux/elfcore.h>
 #include <linux/kallsyms.h>
diff --git a/arch/sh/kernel/process_64.c b/arch/sh/kernel/process_64.c
index ced21a417d9d96..056607185158b0 100644
--- a/arch/sh/kernel/process_64.c
+++ b/arch/sh/kernel/process_64.c
@@ -26,6 +26,7 @@
 #include <linux/module.h>
 #include <linux/io.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <asm/syscalls.h>
 #include <linux/uaccess.h>
 #include <asm/pgtable.h>
diff --git a/arch/sh/mm/asids-debugfs.c b/arch/sh/mm/asids-debugfs.c
index 110bd35165bfd4..e5539e0f8e3b11 100644
--- a/arch/sh/mm/asids-debugfs.c
+++ b/arch/sh/mm/asids-debugfs.c
@@ -21,6 +21,7 @@
 #include <linux/seq_file.h>
 #include <linux/spinlock.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/task.h>
 
 #include <asm/processor.h>
 #include <asm/mmu_context.h>
diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c
index 237a471c8ed550..ed87c45a785b26 100644
--- a/arch/sparc/kernel/process_32.c
+++ b/arch/sparc/kernel/process_32.c
@@ -15,6 +15,7 @@
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/stddef.h>
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index 98f6ff6eaa5562..4c82a73af893b3 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -15,6 +15,7 @@
 #include <linux/export.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/fs.h>
diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c
index 557b4c8435d08d..dbb4fa76d1dda6 100644
--- a/arch/tile/kernel/process.c
+++ b/arch/tile/kernel/process.c
@@ -14,6 +14,7 @@
 
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/preempt.h>
 #include <linux/module.h>
 #include <linux/fs.h>
diff --git a/arch/tile/kernel/smpboot.c b/arch/tile/kernel/smpboot.c
index 53ce940a50169a..f3fdd0c39b1278 100644
--- a/arch/tile/kernel/smpboot.c
+++ b/arch/tile/kernel/smpboot.c
@@ -17,6 +17,7 @@
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/sched.h>
+#include <linux/sched/task.h>
 #include <linux/kernel_stat.h>
 #include <linux/bootmem.h>
 #include <linux/notifier.h>
diff --git a/arch/tile/kernel/unaligned.c b/arch/tile/kernel/unaligned.c
index 142acae7831674..8149c38f67b6cc 100644
--- a/arch/tile/kernel/unaligned.c
+++ b/arch/tile/kernel/unaligned.c
@@ -18,6 +18,7 @@
 #include <linux/ptrace.h>
 #include <linux/slab.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/thread_info.h>
 #include <linux/uaccess.h>
 #include <linux/mman.h>
diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c
index 005c91c2adcabb..f58fa06a2214aa 100644
--- a/arch/tile/mm/fault.c
+++ b/arch/tile/mm/fault.c
@@ -17,6 +17,8 @@
 #include <linux/signal.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/string.h>
diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c
index 770ec07b6a6af0..2df4e5d11939ae 100644
--- a/arch/um/kernel/exec.c
+++ b/arch/um/kernel/exec.c
@@ -8,6 +8,7 @@
 #include <linux/fs.h>
 #include <linux/ptrace.h>
 #include <linux/sched.h>
+#include <linux/sched/task.h>
 #include <linux/slab.h>
 #include <asm/current.h>
 #include <asm/processor.h>
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index c6c45ea4021ff0..e76dc7c112519d 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -18,6 +18,7 @@
 #include <linux/slab.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/seq_file.h>
 #include <linux/tick.h>
 #include <linux/threads.h>
diff --git a/arch/um/kernel/reboot.c b/arch/um/kernel/reboot.c
index 79218106a03365..0bc921cee0b18d 100644
--- a/arch/um/kernel/reboot.c
+++ b/arch/um/kernel/reboot.c
@@ -4,6 +4,7 @@
  */
 
 #include <linux/sched/signal.h>
+#include <linux/sched/task.h>
 #include <linux/spinlock.h>
 #include <linux/slab.h>
 #include <linux/oom.h>
diff --git a/arch/unicore32/kernel/process.c b/arch/unicore32/kernel/process.c
index c1a0eec88f1f78..d58f1600b47799 100644
--- a/arch/unicore32/kernel/process.c
+++ b/arch/unicore32/kernel/process.c
@@ -14,6 +14,7 @@
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/stddef.h>
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index d48af18e7baf94..0bbe0f3a039f64 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -26,6 +26,7 @@
 #include <linux/kernfs.h>
 #include <linux/seq_file.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/task.h>
 #include <linux/slab.h>
 #include <linux/cpu.h>
 #include <linux/task_work.h>
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 19bdd1bf81607c..c2f8dde3255ca7 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -7,6 +7,7 @@
 #include <asm/cmdline.h>
 
 #include <linux/sched.h>
+#include <linux/sched/task.h>
 #include <linux/init.h>
 
 /*
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 505be5589e52bd..441f00e7be8f48 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -9,6 +9,7 @@
 #include <linux/sched.h>
 #include <linux/sched/idle.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/init.h>
 #include <linux/export.h>
 #include <linux/pm.h>
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index a0ac3e81518ad8..d79ffa47aab84f 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -12,6 +12,7 @@
 #include <linux/cpu.h>
 #include <linux/errno.h>
 #include <linux/sched.h>
+#include <linux/sched/task.h>
 #include <linux/fs.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index a61e141b6891ed..124f5652ae3cd5 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -17,6 +17,7 @@
 #include <linux/cpu.h>
 #include <linux/errno.h>
 #include <linux/sched.h>
+#include <linux/sched/task.h>
 #include <linux/fs.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c
index 23d15565d02ad7..7c0abdc9a732a0 100644
--- a/arch/x86/kernel/unwind_frame.c
+++ b/arch/x86/kernel/unwind_frame.c
@@ -1,4 +1,5 @@
 #include <linux/sched.h>
+#include <linux/sched/task.h>
 #include <asm/ptrace.h>
 #include <asm/bitops.h>
 #include <asm/stacktrace.h>
diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c
index afaa3588d869c2..af33f9d4c624ac 100644
--- a/arch/xtensa/kernel/process.c
+++ b/arch/xtensa/kernel/process.c
@@ -18,6 +18,7 @@
 #include <linux/errno.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
diff --git a/drivers/misc/kgdbts.c b/drivers/misc/kgdbts.c
index 99635dd9dbac7b..fc7efedbc4be25 100644
--- a/drivers/misc/kgdbts.c
+++ b/drivers/misc/kgdbts.c
@@ -103,6 +103,8 @@
 #include <linux/delay.h>
 #include <linux/kthread.h>
 #include <linux/module.h>
+#include <linux/sched/task.h>
+
 #include <asm/sections.h>
 
 #define v1printk(a...) do { \
diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
index e5f0c7d0fe8de9..c6fc7141d7b281 100644
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -17,7 +17,7 @@
 #include <linux/sched/signal.h>
 #include <linux/sched/rt.h>
 #include <linux/sched/debug.h>
-#include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/interrupt.h>
 #include <linux/mm.h>
 #include <linux/fs.h>
diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index 985d33f0f31576..e6d1a6510886c5 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -70,6 +70,7 @@
 #include <linux/signal.h>
 #include <linux/fcntl.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/task.h>
 #include <linux/interrupt.h>
 #include <linux/tty.h>
 #include <linux/tty_driver.h>
diff --git a/fs/exec.c b/fs/exec.c
index e857c7260b1f88..65145a3df06519 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -36,6 +36,7 @@
 #include <linux/sched/coredump.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/numa_balancing.h>
+#include <linux/sched/task.h>
 #include <linux/pagemap.h>
 #include <linux/perf_event.h>
 #include <linux/highmem.h>
diff --git a/fs/fcntl.c b/fs/fcntl.c
index e1c54f20325ca8..be8fbe289087e6 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -7,6 +7,7 @@
 #include <linux/syscalls.h>
 #include <linux/init.h>
 #include <linux/mm.h>
+#include <linux/sched/task.h>
 #include <linux/fs.h>
 #include <linux/file.h>
 #include <linux/fdtable.h>
diff --git a/fs/fs_struct.c b/fs/fs_struct.c
index 543ed50f03872c..be0250788b737c 100644
--- a/fs/fs_struct.c
+++ b/fs/fs_struct.c
@@ -1,5 +1,6 @@
 #include <linux/export.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/task.h>
 #include <linux/fs.h>
 #include <linux/path.h>
 #include <linux/slab.h>
diff --git a/fs/proc/array.c b/fs/proc/array.c
index cfe7f934a300aa..f3169b58af381c 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -62,6 +62,7 @@
 #include <linux/mman.h>
 #include <linux/sched/mm.h>
 #include <linux/sched/numa_balancing.h>
+#include <linux/sched/task.h>
 #include <linux/proc_fs.h>
 #include <linux/ioport.h>
 #include <linux/uaccess.h>
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index ea9f3d1ae83063..4ee55274f155fc 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -28,6 +28,7 @@
 #include <linux/list.h>
 #include <linux/ioport.h>
 #include <linux/memory.h>
+#include <linux/sched/task.h>
 #include <asm/sections.h>
 #include "internal.h"
 
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
new file mode 100644
index 00000000000000..0023c91ff821ee
--- /dev/null
+++ b/include/linux/sched/task.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SCHED_TASK_H
+#define _LINUX_SCHED_TASK_H
+
+#include <linux/sched.h>
+
+#endif /* _LINUX_SCHED_TASK_H */
diff --git a/init/main.c b/init/main.c
index c891cfb8928f92..47956b22add169 100644
--- a/init/main.c
+++ b/init/main.c
@@ -76,6 +76,7 @@
 #include <linux/blkdev.h>
 #include <linux/elevator.h>
 #include <linux/sched_clock.h>
+#include <linux/sched/task.h>
 #include <linux/context_tracking.h>
 #include <linux/random.h>
 #include <linux/list.h>
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index e8f87bf9840c0e..0125589c742841 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -41,6 +41,7 @@
 #include <linux/proc_fs.h>
 #include <linux/rcupdate.h>
 #include <linux/sched.h>
+#include <linux/sched/task.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/percpu-rwsem.h>
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 78c206579d0182..f7c063239fa5c7 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -9,6 +9,7 @@
 #include <linux/notifier.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/hotplug.h>
+#include <linux/sched/task.h>
 #include <linux/unistd.h>
 #include <linux/cpu.h>
 #include <linux/oom.h>
diff --git a/kernel/exit.c b/kernel/exit.c
index 5bad7dce1b7b3f..e53408d156df74 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -9,6 +9,7 @@
 #include <linux/sched/autogroup.h>
 #include <linux/sched/mm.h>
 #include <linux/sched/stat.h>
+#include <linux/sched/task.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/capability.h>
diff --git a/kernel/fork.c b/kernel/fork.c
index 8632905f64c5fc..0af64d5d8b73d1 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -18,6 +18,7 @@
 #include <linux/sched/user.h>
 #include <linux/sched/numa_balancing.h>
 #include <linux/sched/stat.h>
+#include <linux/sched/task.h>
 #include <linux/rtmutex.h>
 #include <linux/init.h>
 #include <linux/unistd.h>
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 0c407f905ca4ef..ac5f5c2d098d92 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -20,6 +20,7 @@
 */
 #include <linux/module.h>
 #include <linux/sched.h>
+#include <linux/sched/task.h>
 #include <linux/syscalls.h>
 #include <linux/unistd.h>
 #include <linux/kmod.h>
diff --git a/kernel/kthread.c b/kernel/kthread.c
index ef9b9eb809c743..2f26adea0f84d2 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -7,6 +7,7 @@
  */
 #include <uapi/linux/sched/types.h>
 #include <linux/sched.h>
+#include <linux/sched/task.h>
 #include <linux/kthread.h>
 #include <linux/completion.h>
 #include <linux/err.h>
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index cdafaff926cac1..12e38c213b7011 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -29,6 +29,7 @@
 #include <linux/mutex.h>
 #include <linux/sched.h>
 #include <linux/sched/clock.h>
+#include <linux/sched/task.h>
 #include <linux/delay.h>
 #include <linux/module.h>
 #include <linux/proc_fs.h>
diff --git a/kernel/pid.c b/kernel/pid.c
index 0291804151b587..0143ac0ddceb9c 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -38,6 +38,7 @@
 #include <linux/syscalls.h>
 #include <linux/proc_ns.h>
 #include <linux/proc_fs.h>
+#include <linux/sched/task.h>
 
 #define pid_hashfn(nr, ns)	\
 	hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift)
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 25314c96fbe64a..7c8367854dc41b 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -19,6 +19,7 @@
 #include <linux/proc_ns.h>
 #include <linux/reboot.h>
 #include <linux/export.h>
+#include <linux/sched/task.h>
 
 struct pid_cache {
 	int nr_ids;
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 85e2915d896166..c7209f060eeb7c 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -13,6 +13,7 @@
 #include <linux/suspend.h>
 #include <linux/module.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/syscalls.h>
 #include <linux/freezer.h>
 #include <linux/delay.h>
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index bcdbdc19eb353e..0af9287121746d 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -12,6 +12,7 @@
 #include <linux/sched.h>
 #include <linux/sched/mm.h>
 #include <linux/sched/coredump.h>
+#include <linux/sched/task.h>
 #include <linux/errno.h>
 #include <linux/mm.h>
 #include <linux/highmem.h>
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 0974eb2ef50dc2..6b7155ae5c335e 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -14,6 +14,7 @@
 #include <linux/sched/nohz.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/hotplug.h>
+#include <linux/sched/task.h>
 
 #include <linux/u64_stats_sync.h>
 #include <linux/kernel_stat.h>
diff --git a/kernel/signal.c b/kernel/signal.c
index 15092fd8c7b1ee..25d099fd80f786 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -16,6 +16,7 @@
 #include <linux/sched.h>
 #include <linux/sched/user.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/fs.h>
 #include <linux/tty.h>
 #include <linux/binfmts.h>
diff --git a/kernel/smpboot.c b/kernel/smpboot.c
index 4a5c6e73ecd41e..1d71c051a9515c 100644
--- a/kernel/smpboot.c
+++ b/kernel/smpboot.c
@@ -9,6 +9,7 @@
 #include <linux/list.h>
 #include <linux/slab.h>
 #include <linux/sched.h>
+#include <linux/sched/task.h>
 #include <linux/export.h>
 #include <linux/percpu.h>
 #include <linux/kthread.h>
diff --git a/kernel/sys.c b/kernel/sys.c
index 8c0392c8be5161..7f1ecd2e41c11f 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -54,6 +54,7 @@
 #include <linux/sched/stat.h>
 #include <linux/sched/mm.h>
 #include <linux/sched/coredump.h>
+#include <linux/sched/task.h>
 #include <linux/rcupdate.h>
 #include <linux/uidgid.h>
 #include <linux/cred.h>
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 0c060932639140..0d1597c9ee3057 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -15,6 +15,7 @@
 
 #include <linux/stop_machine.h>
 #include <linux/clocksource.h>
+#include <linux/sched/task.h>
 #include <linux/kallsyms.h>
 #include <linux/seq_file.h>
 #include <linux/suspend.h>
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index 9f6984d52feca4..685c50ae63000a 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -25,6 +25,7 @@
 #include <linux/err.h>
 #include <linux/slab.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/task.h>
 #include <linux/static_key.h>
 
 extern struct tracepoint * const __start___tracepoints_ptrs[];
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index 60c57ec936dbdc..942adf13418d05 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -19,6 +19,7 @@
 
 #include <linux/scatterlist.h>
 #include <linux/dma-mapping.h>
+#include <linux/sched/task.h>
 #include <linux/stacktrace.h>
 #include <linux/dma-debug.h>
 #include <linux/spinlock.h>
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 2df6d3687b2a8f..a3970573359e3a 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -74,6 +74,7 @@
 #include <linux/kernel.h>
 #include <linux/list.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/task.h>
 #include <linux/jiffies.h>
 #include <linux/delay.h>
 #include <linux/export.h>
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index b74984db386e64..27f7210e7fabd1 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -41,6 +41,7 @@
 #include <linux/page-flags.h>
 #include <linux/kernel-page-flags.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/task.h>
 #include <linux/ksm.h>
 #include <linux/rmap.h>
 #include <linux/export.h>
diff --git a/mm/memory.c b/mm/memory.c
index d4994a28dc8559..a97a4cec2e1fcd 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -43,6 +43,7 @@
 #include <linux/sched/mm.h>
 #include <linux/sched/coredump.h>
 #include <linux/sched/numa_balancing.h>
+#include <linux/sched/task.h>
 #include <linux/hugetlb.h>
 #include <linux/mman.h>
 #include <linux/swap.h>
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 69f75b0146076e..d083714a2bb924 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -24,6 +24,7 @@
 #include <linux/sched.h>
 #include <linux/sched/mm.h>
 #include <linux/sched/coredump.h>
+#include <linux/sched/task.h>
 #include <linux/swap.h>
 #include <linux/timex.h>
 #include <linux/jiffies.h>
diff --git a/mm/rmap.c b/mm/rmap.c
index 0367a0506667ce..2da487d6cea83b 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -47,6 +47,7 @@
 
 #include <linux/mm.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/task.h>
 #include <linux/pagemap.h>
 #include <linux/swap.h>
 #include <linux/swapops.h>
diff --git a/mm/swapfile.c b/mm/swapfile.c
index ff2bf3f61b1419..521ef9b6064fea 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -7,6 +7,7 @@
 
 #include <linux/mm.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/task.h>
 #include <linux/hugetlb.h>
 #include <linux/mman.h>
 #include <linux/slab.h>
diff --git a/mm/usercopy.c b/mm/usercopy.c
index 7ccad05c0d5c42..d155e12563b139 100644
--- a/mm/usercopy.c
+++ b/mm/usercopy.c
@@ -17,6 +17,8 @@
 #include <linux/mm.h>
 #include <linux/slab.h>
 #include <linux/sched.h>
+#include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <asm/sections.h>
 
 enum {
diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index bcb0b597c39187..52c34532c78562 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -12,6 +12,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/sched.h>
+#include <linux/sched/task.h>
 #include <linux/slab.h>
 #include <linux/syscalls.h>
 #include <linux/key.h>
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index b12f873f92baf0..57ff536961446e 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -29,6 +29,7 @@
 #include <linux/tracehook.h>
 #include <linux/errno.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/task.h>
 #include <linux/lsm_hooks.h>
 #include <linux/xattr.h>
 #include <linux/capability.h>

From 68db0cf10678630d286f4bbbbdfa102951a35faa Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:37 +0100
Subject: [PATCH 0532/1911] sched/headers: Prepare for new header dependencies
 before moving code to <linux/sched/task_stack.h>

We are going to split <linux/sched/task_stack.h> out of <linux/sched.h>, which
will have to be picked up from other headers and a couple of .c files.

Create a trivial placeholder <linux/sched/task_stack.h> file that just
maps to <linux/sched.h> to make this patch obviously correct and
bisectable.

Include the new header in the files that are going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/alpha/kernel/osf_sys.c            | 1 +
 arch/alpha/kernel/process.c            | 1 +
 arch/alpha/kernel/ptrace.c             | 1 +
 arch/alpha/kernel/signal.c             | 1 +
 arch/arc/kernel/kgdb.c                 | 1 +
 arch/arc/kernel/process.c              | 1 +
 arch/arc/kernel/ptrace.c               | 1 +
 arch/arc/kernel/signal.c               | 2 ++
 arch/arm/kernel/perf_regs.c            | 1 +
 arch/arm/kernel/process.c              | 1 +
 arch/arm/kernel/ptrace.c               | 1 +
 arch/arm/kernel/smp.c                  | 1 +
 arch/arm/kernel/traps.c                | 1 +
 arch/arm64/include/asm/compat.h        | 1 +
 arch/arm64/kernel/debug-monitors.c     | 1 +
 arch/arm64/kernel/kgdb.c               | 2 ++
 arch/arm64/kernel/perf_regs.c          | 1 +
 arch/arm64/kernel/process.c            | 1 +
 arch/arm64/kernel/ptrace.c             | 1 +
 arch/arm64/kernel/smp.c                | 1 +
 arch/arm64/kernel/stacktrace.c         | 1 +
 arch/arm64/kernel/traps.c              | 1 +
 arch/avr32/kernel/process.c            | 1 +
 arch/avr32/kernel/ptrace.c             | 1 +
 arch/avr32/kernel/stacktrace.c         | 1 +
 arch/blackfin/kernel/process.c         | 1 +
 arch/blackfin/kernel/ptrace.c          | 1 +
 arch/blackfin/kernel/signal.c          | 1 +
 arch/blackfin/kernel/stacktrace.c      | 1 +
 arch/blackfin/mach-common/smp.c        | 1 +
 arch/c6x/kernel/process.c              | 1 +
 arch/c6x/kernel/ptrace.c               | 1 +
 arch/cris/arch-v10/kernel/process.c    | 1 +
 arch/cris/arch-v10/kernel/ptrace.c     | 1 +
 arch/cris/arch-v10/kernel/signal.c     | 1 +
 arch/cris/arch-v32/kernel/process.c    | 1 +
 arch/cris/arch-v32/kernel/ptrace.c     | 1 +
 arch/cris/arch-v32/kernel/signal.c     | 1 +
 arch/frv/kernel/process.c              | 1 +
 arch/h8300/kernel/process.c            | 1 +
 arch/h8300/kernel/signal.c             | 1 +
 arch/hexagon/kernel/kgdb.c             | 1 +
 arch/hexagon/kernel/process.c          | 1 +
 arch/hexagon/kernel/ptrace.c           | 1 +
 arch/hexagon/kernel/signal.c           | 2 ++
 arch/hexagon/kernel/stacktrace.c       | 1 +
 arch/hexagon/kernel/traps.c            | 1 +
 arch/ia64/kernel/perfmon.c             | 1 +
 arch/ia64/kernel/process.c             | 1 +
 arch/ia64/kernel/ptrace.c              | 1 +
 arch/ia64/kernel/setup.c               | 1 +
 arch/ia64/kernel/sys_ia64.c            | 1 +
 arch/m32r/kernel/process.c             | 1 +
 arch/m32r/kernel/ptrace.c              | 1 +
 arch/m68k/kernel/process.c             | 1 +
 arch/m68k/kernel/ptrace.c              | 1 +
 arch/metag/kernel/process.c            | 1 +
 arch/metag/kernel/ptrace.c             | 2 ++
 arch/metag/kernel/signal.c             | 1 +
 arch/metag/kernel/smp.c                | 1 +
 arch/metag/kernel/traps.c              | 1 +
 arch/microblaze/kernel/process.c       | 1 +
 arch/microblaze/kernel/ptrace.c        | 1 +
 arch/microblaze/kernel/unwind.c        | 1 +
 arch/mips/include/asm/fpu.h            | 1 +
 arch/mips/kernel/crash.c               | 1 +
 arch/mips/kernel/perf_event.c          | 1 +
 arch/mips/kernel/process.c             | 1 +
 arch/mips/kernel/ptrace.c              | 1 +
 arch/mips/kernel/ptrace32.c            | 1 +
 arch/mips/kernel/stacktrace.c          | 1 +
 arch/mips/kernel/syscall.c             | 1 +
 arch/mips/loongson64/loongson-3/smp.c  | 1 +
 arch/mips/paravirt/paravirt-smp.c      | 1 +
 arch/mips/sibyte/bcm1480/smp.c         | 1 +
 arch/mn10300/kernel/process.c          | 1 +
 arch/mn10300/kernel/ptrace.c           | 1 +
 arch/nios2/kernel/process.c            | 1 +
 arch/nios2/kernel/ptrace.c             | 1 +
 arch/openrisc/kernel/process.c         | 1 +
 arch/openrisc/kernel/ptrace.c          | 1 +
 arch/parisc/kernel/process.c           | 1 +
 arch/powerpc/kernel/process.c          | 1 +
 arch/powerpc/mm/fault.c                | 1 +
 arch/powerpc/perf/perf_regs.c          | 1 +
 arch/s390/include/asm/compat.h         | 1 +
 arch/s390/include/asm/kprobes.h        | 1 +
 arch/s390/kernel/compat_signal.c       | 1 +
 arch/s390/kernel/dumpstack.c           | 1 +
 arch/s390/kernel/process.c             | 1 +
 arch/s390/kernel/ptrace.c              | 1 +
 arch/s390/kernel/runtime_instr.c       | 2 ++
 arch/s390/kernel/signal.c              | 1 +
 arch/s390/kernel/smp.c                 | 1 +
 arch/s390/kernel/uprobes.c             | 2 ++
 arch/score/kernel/process.c            | 1 +
 arch/score/kernel/ptrace.c             | 1 +
 arch/sh/kernel/cpu/fpu.c               | 1 +
 arch/sh/kernel/dumpstack.c             | 1 +
 arch/sh/kernel/kgdb.c                  | 2 ++
 arch/sh/kernel/process.c               | 1 +
 arch/sh/kernel/process_32.c            | 1 +
 arch/sh/kernel/process_64.c            | 1 +
 arch/sh/kernel/ptrace_32.c             | 1 +
 arch/sh/kernel/ptrace_64.c             | 1 +
 arch/sh/kernel/signal_32.c             | 1 +
 arch/sh/kernel/sys_sh32.c              | 1 +
 arch/sh/kernel/traps.c                 | 1 +
 arch/sh/kernel/traps_32.c              | 2 ++
 arch/sparc/kernel/process_32.c         | 1 +
 arch/sparc/kernel/process_64.c         | 1 +
 arch/sparc/kernel/ptrace_64.c          | 1 +
 arch/tile/kernel/compat_signal.c       | 1 +
 arch/tile/kernel/kgdb.c                | 2 ++
 arch/tile/kernel/process.c             | 1 +
 arch/tile/kernel/ptrace.c              | 2 ++
 arch/tile/kernel/signal.c              | 1 +
 arch/tile/kernel/stack.c               | 1 +
 arch/um/kernel/exec.c                  | 1 +
 arch/um/kernel/process.c               | 1 +
 arch/um/kernel/skas/process.c          | 1 +
 arch/unicore32/kernel/process.c        | 1 +
 arch/unicore32/kernel/ptrace.c         | 1 +
 arch/unicore32/kernel/traps.c          | 1 +
 arch/x86/entry/common.c                | 1 +
 arch/x86/entry/vdso/vma.c              | 1 +
 arch/x86/ia32/ia32_aout.c              | 1 +
 arch/x86/ia32/ia32_signal.c            | 1 +
 arch/x86/kernel/dumpstack.c            | 1 +
 arch/x86/kernel/fpu/regset.c           | 1 +
 arch/x86/kernel/ioport.c               | 1 +
 arch/x86/kernel/irq_64.c               | 1 +
 arch/x86/kernel/perf_regs.c            | 1 +
 arch/x86/kernel/process.c              | 1 +
 arch/x86/kernel/process_32.c           | 1 +
 arch/x86/kernel/process_64.c           | 1 +
 arch/x86/kernel/ptrace.c               | 1 +
 arch/x86/kernel/signal.c               | 1 +
 arch/x86/kernel/smpboot.c              | 1 +
 arch/x86/kernel/stacktrace.c           | 1 +
 arch/x86/kernel/step.c                 | 1 +
 arch/x86/kernel/traps.c                | 1 +
 arch/x86/kernel/unwind_frame.c         | 1 +
 arch/x86/kernel/vm86_32.c              | 1 +
 arch/x86/mm/fault.c                    | 1 +
 arch/xtensa/kernel/process.c           | 1 +
 arch/xtensa/kernel/ptrace.c            | 1 +
 arch/xtensa/kernel/signal.c            | 1 +
 arch/xtensa/kernel/smp.c               | 1 +
 block/blk-map.c                        | 1 +
 drivers/hv/vmbus_drv.c                 | 2 ++
 drivers/ide/ide-cd.c                   | 1 +
 drivers/md/bcache/closure.h            | 1 +
 drivers/misc/lkdtm_usercopy.c          | 1 +
 drivers/mtd/nand/gpmi-nand/gpmi-nand.c | 1 +
 fs/binfmt_aout.c                       | 1 +
 fs/binfmt_elf.c                        | 1 +
 fs/binfmt_elf_fdpic.c                  | 3 ++-
 fs/binfmt_flat.c                       | 1 +
 fs/coredump.c                          | 1 +
 include/linux/elfcore.h                | 2 ++
 include/linux/perf_regs.h              | 2 ++
 include/linux/sched/task_stack.h       | 6 ++++++
 init/main.c                            | 1 +
 kernel/events/callchain.c              | 2 ++
 kernel/exit.c                          | 1 +
 kernel/fork.c                          | 1 +
 kernel/printk/printk.c                 | 1 +
 kernel/sched/sched.h                   | 1 +
 kernel/seccomp.c                       | 1 +
 kernel/signal.c                        | 1 +
 kernel/trace/trace_stack.c             | 1 +
 lib/debugobjects.c                     | 1 +
 lib/dma-debug.c                        | 1 +
 lib/syscall.c                          | 1 +
 mm/kasan/kasan.c                       | 1 +
 mm/kmemleak.c                          | 1 +
 mm/util.c                              | 1 +
 178 files changed, 198 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/sched/task_stack.h

diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index 3b9b2a382ba268..73446baa632e1e 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -13,6 +13,7 @@
 #include <linux/errno.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/task_stack.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c
index 713b4fac998e70..0b963504072167 100644
--- a/arch/alpha/kernel/process.c
+++ b/arch/alpha/kernel/process.c
@@ -13,6 +13,7 @@
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
diff --git a/arch/alpha/kernel/ptrace.c b/arch/alpha/kernel/ptrace.c
index bc4d2cdcf21d76..285a82d491efb9 100644
--- a/arch/alpha/kernel/ptrace.c
+++ b/arch/alpha/kernel/ptrace.c
@@ -6,6 +6,7 @@
 
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/errno.h>
diff --git a/arch/alpha/kernel/signal.c b/arch/alpha/kernel/signal.c
index b8221f112eeecd..8129dd92cadc2e 100644
--- a/arch/alpha/kernel/signal.c
+++ b/arch/alpha/kernel/signal.c
@@ -7,6 +7,7 @@
  */
 
 #include <linux/sched/signal.h>
+#include <linux/sched/task_stack.h>
 #include <linux/kernel.h>
 #include <linux/signal.h>
 #include <linux/errno.h>
diff --git a/arch/arc/kernel/kgdb.c b/arch/arc/kernel/kgdb.c
index ecf6a78693758b..9a3c34af2ae810 100644
--- a/arch/arc/kernel/kgdb.c
+++ b/arch/arc/kernel/kgdb.c
@@ -10,6 +10,7 @@
 
 #include <linux/kgdb.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <asm/disasm.h>
 #include <asm/cacheflush.h>
 
diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c
index d618d26721ab9a..2a018de6d6cdbc 100644
--- a/arch/arc/kernel/process.c
+++ b/arch/arc/kernel/process.c
@@ -12,6 +12,7 @@
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 
 #include <linux/mm.h>
 #include <linux/fs.h>
diff --git a/arch/arc/kernel/ptrace.c b/arch/arc/kernel/ptrace.c
index 4442204fe238e6..31150060d38b41 100644
--- a/arch/arc/kernel/ptrace.c
+++ b/arch/arc/kernel/ptrace.c
@@ -8,6 +8,7 @@
 
 #include <linux/ptrace.h>
 #include <linux/tracehook.h>
+#include <linux/sched/task_stack.h>
 #include <linux/regset.h>
 #include <linux/unistd.h>
 #include <linux/elf.h>
diff --git a/arch/arc/kernel/signal.c b/arch/arc/kernel/signal.c
index d347bbc086fed1..48685445002e77 100644
--- a/arch/arc/kernel/signal.c
+++ b/arch/arc/kernel/signal.c
@@ -53,6 +53,8 @@
 #include <linux/uaccess.h>
 #include <linux/syscalls.h>
 #include <linux/tracehook.h>
+#include <linux/sched/task_stack.h>
+
 #include <asm/ucontext.h>
 
 struct rt_sigframe {
diff --git a/arch/arm/kernel/perf_regs.c b/arch/arm/kernel/perf_regs.c
index 592dda3f21fff0..c366b83bf9550d 100644
--- a/arch/arm/kernel/perf_regs.c
+++ b/arch/arm/kernel/perf_regs.c
@@ -3,6 +3,7 @@
 #include <linux/kernel.h>
 #include <linux/perf_event.h>
 #include <linux/bug.h>
+#include <linux/sched/task_stack.h>
 #include <asm/perf_regs.h>
 #include <asm/ptrace.h>
 
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index d4c7c9a1afa9aa..939e8b58c59d1e 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -14,6 +14,7 @@
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/stddef.h>
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index 46f7bab81c40a4..58e3771e4c5bb8 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -11,6 +11,7 @@
  */
 #include <linux/kernel.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/elf.h>
 #include <linux/smp.h>
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 2083a5370308dd..b724cff7ad60d0 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -13,6 +13,7 @@
 #include <linux/spinlock.h>
 #include <linux/sched.h>
 #include <linux/sched/hotplug.h>
+#include <linux/sched/task_stack.h>
 #include <linux/interrupt.h>
 #include <linux/cache.h>
 #include <linux/profile.h>
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index a9dad001b97daa..948c648fea009d 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -26,6 +26,7 @@
 #include <linux/init.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task_stack.h>
 #include <linux/irq.h>
 
 #include <linux/atomic.h>
diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h
index eb8432bb82b8dd..e39d487bf72438 100644
--- a/arch/arm64/include/asm/compat.h
+++ b/arch/arm64/include/asm/compat.h
@@ -23,6 +23,7 @@
  */
 #include <linux/types.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 
 #define COMPAT_USER_HZ		100
 #ifdef __AARCH64EB__
diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index 2bd426448fc190..32913567da087d 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -26,6 +26,7 @@
 #include <linux/kprobes.h>
 #include <linux/stat.h>
 #include <linux/uaccess.h>
+#include <linux/sched/task_stack.h>
 
 #include <asm/cpufeature.h>
 #include <asm/cputype.h>
diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c
index d217c9e95b06cc..2122cd187f194e 100644
--- a/arch/arm64/kernel/kgdb.c
+++ b/arch/arm64/kernel/kgdb.c
@@ -24,6 +24,8 @@
 #include <linux/kdebug.h>
 #include <linux/kgdb.h>
 #include <linux/kprobes.h>
+#include <linux/sched/task_stack.h>
+
 #include <asm/debug-monitors.h>
 #include <asm/insn.h>
 #include <asm/traps.h>
diff --git a/arch/arm64/kernel/perf_regs.c b/arch/arm64/kernel/perf_regs.c
index 3f62b35fb6f157..bd1b74c2436f5c 100644
--- a/arch/arm64/kernel/perf_regs.c
+++ b/arch/arm64/kernel/perf_regs.c
@@ -2,6 +2,7 @@
 #include <linux/kernel.h>
 #include <linux/perf_event.h>
 #include <linux/bug.h>
+#include <linux/sched/task_stack.h>
 
 #include <asm/compat.h>
 #include <asm/perf_regs.h>
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index f9b8e74ff8f212..043d373b836992 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -26,6 +26,7 @@
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/stddef.h>
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 64fc32ea342271..c142459a88f33e 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -23,6 +23,7 @@
 #include <linux/compat.h>
 #include <linux/kernel.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/ptrace.h>
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index f66e58523b9677..83c0a839a6adaa 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -23,6 +23,7 @@
 #include <linux/spinlock.h>
 #include <linux/sched.h>
 #include <linux/sched/hotplug.h>
+#include <linux/sched/task_stack.h>
 #include <linux/interrupt.h>
 #include <linux/cache.h>
 #include <linux/profile.h>
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index 7597e42feeea26..feac80c22f61f7 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -20,6 +20,7 @@
 #include <linux/ftrace.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task_stack.h>
 #include <linux/stacktrace.h>
 
 #include <asm/irq.h>
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 5a5d8379183756..bdbd0c3febf4d8 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -31,6 +31,7 @@
 #include <linux/init.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task_stack.h>
 #include <linux/syscalls.h>
 
 #include <asm/atomic.h>
diff --git a/arch/avr32/kernel/process.c b/arch/avr32/kernel/process.c
index 75b944a5107c8e..ad0dfccedb7928 100644
--- a/arch/avr32/kernel/process.c
+++ b/arch/avr32/kernel/process.c
@@ -8,6 +8,7 @@
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/module.h>
 #include <linux/kallsyms.h>
 #include <linux/fs.h>
diff --git a/arch/avr32/kernel/ptrace.c b/arch/avr32/kernel/ptrace.c
index a89b893279bbbf..41a14e96a1dbaf 100644
--- a/arch/avr32/kernel/ptrace.c
+++ b/arch/avr32/kernel/ptrace.c
@@ -8,6 +8,7 @@
 #undef DEBUG
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/ptrace.h>
 #include <linux/errno.h>
diff --git a/arch/avr32/kernel/stacktrace.c b/arch/avr32/kernel/stacktrace.c
index c09f0d8dd67946..f8cc995cf0e0d0 100644
--- a/arch/avr32/kernel/stacktrace.c
+++ b/arch/avr32/kernel/stacktrace.c
@@ -8,6 +8,7 @@
  * published by the Free Software Foundation.
  */
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/stacktrace.h>
 #include <linux/thread_info.h>
 #include <linux/module.h>
diff --git a/arch/blackfin/kernel/process.c b/arch/blackfin/kernel/process.c
index 1a1f444004b1b0..b691ef875a40c7 100644
--- a/arch/blackfin/kernel/process.c
+++ b/arch/blackfin/kernel/process.c
@@ -14,6 +14,7 @@
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/tick.h>
 #include <linux/fs.h>
 #include <linux/err.h>
diff --git a/arch/blackfin/kernel/ptrace.c b/arch/blackfin/kernel/ptrace.c
index 360d996451633c..a6827095b99a88 100644
--- a/arch/blackfin/kernel/ptrace.c
+++ b/arch/blackfin/kernel/ptrace.c
@@ -7,6 +7,7 @@
 
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/elf.h>
diff --git a/arch/blackfin/kernel/signal.c b/arch/blackfin/kernel/signal.c
index ea570db598e503..5f51727792047b 100644
--- a/arch/blackfin/kernel/signal.c
+++ b/arch/blackfin/kernel/signal.c
@@ -12,6 +12,7 @@
 #include <linux/binfmts.h>
 #include <linux/uaccess.h>
 #include <linux/tracehook.h>
+#include <linux/sched/task_stack.h>
 
 #include <asm/cacheflush.h>
 #include <asm/ucontext.h>
diff --git a/arch/blackfin/kernel/stacktrace.c b/arch/blackfin/kernel/stacktrace.c
index 30301e1eace5df..17198f3650b6d2 100644
--- a/arch/blackfin/kernel/stacktrace.c
+++ b/arch/blackfin/kernel/stacktrace.c
@@ -6,6 +6,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/stacktrace.h>
 #include <linux/thread_info.h>
 #include <linux/module.h>
diff --git a/arch/blackfin/mach-common/smp.c b/arch/blackfin/mach-common/smp.c
index a2e6db2ce811c9..3ac98252d2996e 100644
--- a/arch/blackfin/mach-common/smp.c
+++ b/arch/blackfin/mach-common/smp.c
@@ -12,6 +12,7 @@
 #include <linux/init.h>
 #include <linux/spinlock.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/interrupt.h>
 #include <linux/cache.h>
 #include <linux/clockchips.h>
diff --git a/arch/c6x/kernel/process.c b/arch/c6x/kernel/process.c
index 6b61779d426a06..c4ecb24c2d5c7b 100644
--- a/arch/c6x/kernel/process.c
+++ b/arch/c6x/kernel/process.c
@@ -18,6 +18,7 @@
 #include <linux/syscalls.h>
 #include <linux/reboot.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 
 #include <asm/syscalls.h>
 
diff --git a/arch/c6x/kernel/ptrace.c b/arch/c6x/kernel/ptrace.c
index 3c494e84444d1e..a27e1f02ce182d 100644
--- a/arch/c6x/kernel/ptrace.c
+++ b/arch/c6x/kernel/ptrace.c
@@ -14,6 +14,7 @@
 #include <linux/tracehook.h>
 #include <linux/regset.h>
 #include <linux/elf.h>
+#include <linux/sched/task_stack.h>
 
 #include <asm/cacheflush.h>
 
diff --git a/arch/cris/arch-v10/kernel/process.c b/arch/cris/arch-v10/kernel/process.c
index 808c2186b70446..e299d30105b53b 100644
--- a/arch/cris/arch-v10/kernel/process.c
+++ b/arch/cris/arch-v10/kernel/process.c
@@ -13,6 +13,7 @@
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/slab.h>
 #include <linux/err.h>
 #include <linux/fs.h>
diff --git a/arch/cris/arch-v10/kernel/ptrace.c b/arch/cris/arch-v10/kernel/ptrace.c
index eca94c7d56e770..c2f2b9b83cc4da 100644
--- a/arch/cris/arch-v10/kernel/ptrace.c
+++ b/arch/cris/arch-v10/kernel/ptrace.c
@@ -4,6 +4,7 @@
 
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/errno.h>
diff --git a/arch/cris/arch-v10/kernel/signal.c b/arch/cris/arch-v10/kernel/signal.c
index db30c98e4926ca..bab4a8dd6bfda2 100644
--- a/arch/cris/arch-v10/kernel/signal.c
+++ b/arch/cris/arch-v10/kernel/signal.c
@@ -14,6 +14,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/kernel.h>
diff --git a/arch/cris/arch-v32/kernel/process.c b/arch/cris/arch-v32/kernel/process.c
index c852df26294824..c530a8fa87ceb7 100644
--- a/arch/cris/arch-v32/kernel/process.c
+++ b/arch/cris/arch-v32/kernel/process.c
@@ -11,6 +11,7 @@
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/slab.h>
 #include <linux/err.h>
 #include <linux/fs.h>
diff --git a/arch/cris/arch-v32/kernel/ptrace.c b/arch/cris/arch-v32/kernel/ptrace.c
index c366bc05466a66..0461e95bbb629a 100644
--- a/arch/cris/arch-v32/kernel/ptrace.c
+++ b/arch/cris/arch-v32/kernel/ptrace.c
@@ -4,6 +4,7 @@
 
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/errno.h>
diff --git a/arch/cris/arch-v32/kernel/signal.c b/arch/cris/arch-v32/kernel/signal.c
index 816bf2ca93ef57..ea2e8e1398e875 100644
--- a/arch/cris/arch-v32/kernel/signal.c
+++ b/arch/cris/arch-v32/kernel/signal.c
@@ -3,6 +3,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
 #include <linux/kernel.h>
diff --git a/arch/frv/kernel/process.c b/arch/frv/kernel/process.c
index c96dbd4b86260b..5a4c92abc99ec3 100644
--- a/arch/frv/kernel/process.c
+++ b/arch/frv/kernel/process.c
@@ -15,6 +15,7 @@
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c
index 54e4d6f01865c6..0f5db5bb561b75 100644
--- a/arch/h8300/kernel/process.c
+++ b/arch/h8300/kernel/process.c
@@ -27,6 +27,7 @@
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
diff --git a/arch/h8300/kernel/signal.c b/arch/h8300/kernel/signal.c
index d784f7117f9abe..1e8070d08770a0 100644
--- a/arch/h8300/kernel/signal.c
+++ b/arch/h8300/kernel/signal.c
@@ -25,6 +25,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/kernel.h>
 #include <linux/signal.h>
diff --git a/arch/hexagon/kernel/kgdb.c b/arch/hexagon/kernel/kgdb.c
index 62dece3ad827b0..16c24b22d0b269 100644
--- a/arch/hexagon/kernel/kgdb.c
+++ b/arch/hexagon/kernel/kgdb.c
@@ -20,6 +20,7 @@
 
 #include <linux/irq.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/kdebug.h>
 #include <linux/kgdb.h>
 
diff --git a/arch/hexagon/kernel/process.c b/arch/hexagon/kernel/process.c
index a2a822a875b6d8..de715bab7956c7 100644
--- a/arch/hexagon/kernel/process.c
+++ b/arch/hexagon/kernel/process.c
@@ -21,6 +21,7 @@
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/types.h>
 #include <linux/module.h>
 #include <linux/tick.h>
diff --git a/arch/hexagon/kernel/ptrace.c b/arch/hexagon/kernel/ptrace.c
index 390a9ad14ca15f..ecd75e2e8eb391 100644
--- a/arch/hexagon/kernel/ptrace.c
+++ b/arch/hexagon/kernel/ptrace.c
@@ -22,6 +22,7 @@
 
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/errno.h>
diff --git a/arch/hexagon/kernel/signal.c b/arch/hexagon/kernel/signal.c
index c6b22b9945a72f..78aa7304a5c9f4 100644
--- a/arch/hexagon/kernel/signal.c
+++ b/arch/hexagon/kernel/signal.c
@@ -21,6 +21,8 @@
 #include <linux/linkage.h>
 #include <linux/syscalls.h>
 #include <linux/tracehook.h>
+#include <linux/sched/task_stack.h>
+
 #include <asm/registers.h>
 #include <asm/thread_info.h>
 #include <asm/unistd.h>
diff --git a/arch/hexagon/kernel/stacktrace.c b/arch/hexagon/kernel/stacktrace.c
index f94918b449a8aa..41866a06adf7f3 100644
--- a/arch/hexagon/kernel/stacktrace.c
+++ b/arch/hexagon/kernel/stacktrace.c
@@ -19,6 +19,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/stacktrace.h>
 #include <linux/thread_info.h>
 #include <linux/module.h>
diff --git a/arch/hexagon/kernel/traps.c b/arch/hexagon/kernel/traps.c
index b55f13c70b3419..2942a9204a9aad 100644
--- a/arch/hexagon/kernel/traps.c
+++ b/arch/hexagon/kernel/traps.c
@@ -21,6 +21,7 @@
 #include <linux/init.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task_stack.h>
 #include <linux/module.h>
 #include <linux/kallsyms.h>
 #include <linux/kdebug.h>
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 7e943d3c05ed79..09f86ebfcc7b4f 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -23,6 +23,7 @@
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/interrupt.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index 054facf22156fb..d344d0d691aaca 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -23,6 +23,7 @@
 #include <linux/sched/debug.h>
 #include <linux/sched/hotplug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/stddef.h>
 #include <linux/thread_info.h>
 #include <linux/unistd.h>
diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c
index 04fe1436e1cccb..3f8293378a8304 100644
--- a/arch/ia64/kernel/ptrace.c
+++ b/arch/ia64/kernel/ptrace.c
@@ -12,6 +12,7 @@
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/errno.h>
 #include <linux/ptrace.h>
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index 32a6cc36296f5d..b2b4f521618002 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -33,6 +33,7 @@
 #include <linux/reboot.h>
 #include <linux/sched.h>
 #include <linux/sched/clock.h>
+#include <linux/sched/task_stack.h>
 #include <linux/seq_file.h>
 #include <linux/string.h>
 #include <linux/threads.h>
diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c
index ce4cc60d519bb0..5ce927c854a68c 100644
--- a/arch/ia64/kernel/sys_ia64.c
+++ b/arch/ia64/kernel/sys_ia64.c
@@ -11,6 +11,7 @@
 #include <linux/mman.h>
 #include <linux/sched.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/task_stack.h>
 #include <linux/shm.h>
 #include <linux/file.h>		/* doh, must come after sched.h... */
 #include <linux/smp.h>
diff --git a/arch/m32r/kernel/process.c b/arch/m32r/kernel/process.c
index 2a450382934c90..d8ffcfec599cb6 100644
--- a/arch/m32r/kernel/process.c
+++ b/arch/m32r/kernel/process.c
@@ -24,6 +24,7 @@
 #include <linux/slab.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/module.h>
 #include <linux/ptrace.h>
 #include <linux/unistd.h>
diff --git a/arch/m32r/kernel/ptrace.c b/arch/m32r/kernel/ptrace.c
index a68acb9fa515c1..2d887400e30e3e 100644
--- a/arch/m32r/kernel/ptrace.c
+++ b/arch/m32r/kernel/ptrace.c
@@ -16,6 +16,7 @@
 
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/err.h>
 #include <linux/smp.h>
diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c
index 5d335d178706ae..e475c945c8b2bf 100644
--- a/arch/m68k/kernel/process.c
+++ b/arch/m68k/kernel/process.c
@@ -15,6 +15,7 @@
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
diff --git a/arch/m68k/kernel/ptrace.c b/arch/m68k/kernel/ptrace.c
index 9cd86d7343a638..748c63bd008130 100644
--- a/arch/m68k/kernel/ptrace.c
+++ b/arch/m68k/kernel/ptrace.c
@@ -12,6 +12,7 @@
 
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/errno.h>
diff --git a/arch/metag/kernel/process.c b/arch/metag/kernel/process.c
index 801e6f927e62ab..c4606ce743d240 100644
--- a/arch/metag/kernel/process.c
+++ b/arch/metag/kernel/process.c
@@ -10,6 +10,7 @@
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/unistd.h>
diff --git a/arch/metag/kernel/ptrace.c b/arch/metag/kernel/ptrace.c
index 7563628822bdf6..5fd16ee5280c1a 100644
--- a/arch/metag/kernel/ptrace.c
+++ b/arch/metag/kernel/ptrace.c
@@ -15,6 +15,8 @@
 #include <linux/tracehook.h>
 #include <linux/elf.h>
 #include <linux/uaccess.h>
+#include <linux/sched/task_stack.h>
+
 #include <trace/syscall.h>
 
 #define CREATE_TRACE_POINTS
diff --git a/arch/metag/kernel/signal.c b/arch/metag/kernel/signal.c
index ce49d429c74aa6..338925d808e6da 100644
--- a/arch/metag/kernel/signal.c
+++ b/arch/metag/kernel/signal.c
@@ -7,6 +7,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/kernel.h>
diff --git a/arch/metag/kernel/smp.c b/arch/metag/kernel/smp.c
index 198eda75846b77..cab2aa64ca826a 100644
--- a/arch/metag/kernel/smp.c
+++ b/arch/metag/kernel/smp.c
@@ -14,6 +14,7 @@
 #include <linux/spinlock.h>
 #include <linux/sched.h>
 #include <linux/sched/hotplug.h>
+#include <linux/sched/task_stack.h>
 #include <linux/interrupt.h>
 #include <linux/cache.h>
 #include <linux/profile.h>
diff --git a/arch/metag/kernel/traps.c b/arch/metag/kernel/traps.c
index 23c5ac33a93fd0..444851e510d5b4 100644
--- a/arch/metag/kernel/traps.c
+++ b/arch/metag/kernel/traps.c
@@ -12,6 +12,7 @@
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/signal.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c
index a642fe5ac5ffc5..e92a817e645fac 100644
--- a/arch/microblaze/kernel/process.c
+++ b/arch/microblaze/kernel/process.c
@@ -13,6 +13,7 @@
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/pm.h>
 #include <linux/tick.h>
 #include <linux/bitops.h>
diff --git a/arch/microblaze/kernel/ptrace.c b/arch/microblaze/kernel/ptrace.c
index 8cfa98cadf3d10..badd286882ae68 100644
--- a/arch/microblaze/kernel/ptrace.c
+++ b/arch/microblaze/kernel/ptrace.c
@@ -27,6 +27,7 @@
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/ptrace.h>
 #include <linux/signal.h>
 #include <linux/elf.h>
diff --git a/arch/microblaze/kernel/unwind.c b/arch/microblaze/kernel/unwind.c
index 61c04eed14d5fa..34c270cb11fcbd 100644
--- a/arch/microblaze/kernel/unwind.c
+++ b/arch/microblaze/kernel/unwind.c
@@ -17,6 +17,7 @@
 #include <linux/kallsyms.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/stacktrace.h>
 #include <linux/types.h>
 #include <linux/errno.h>
diff --git a/arch/mips/include/asm/fpu.h b/arch/mips/include/asm/fpu.h
index f06f97bd62df90..321752bcbab6ec 100644
--- a/arch/mips/include/asm/fpu.h
+++ b/arch/mips/include/asm/fpu.h
@@ -11,6 +11,7 @@
 #define _ASM_FPU_H
 
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/thread_info.h>
 #include <linux/bitops.h>
 
diff --git a/arch/mips/kernel/crash.c b/arch/mips/kernel/crash.c
index 5a71518be0f10b..ca25cd393b1ccb 100644
--- a/arch/mips/kernel/crash.c
+++ b/arch/mips/kernel/crash.c
@@ -8,6 +8,7 @@
 #include <linux/irq.h>
 #include <linux/types.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 
 /* This keeps a track of which one is crashing cpu. */
 static int crashing_cpu = -1;
diff --git a/arch/mips/kernel/perf_event.c b/arch/mips/kernel/perf_event.c
index d64056e0bb567a..f298eb2ff6c296 100644
--- a/arch/mips/kernel/perf_event.c
+++ b/arch/mips/kernel/perf_event.c
@@ -15,6 +15,7 @@
  */
 
 #include <linux/perf_event.h>
+#include <linux/sched/task_stack.h>
 
 #include <asm/stacktrace.h>
 
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index 8bfb833b31588f..fb6b6b650719ad 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -13,6 +13,7 @@
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/tick.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index fdef26382c376e..33960126726539 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -19,6 +19,7 @@
 #include <linux/elf.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/errno.h>
 #include <linux/ptrace.h>
diff --git a/arch/mips/kernel/ptrace32.c b/arch/mips/kernel/ptrace32.c
index 4f099852562667..40e212d6b26b2d 100644
--- a/arch/mips/kernel/ptrace32.c
+++ b/arch/mips/kernel/ptrace32.c
@@ -18,6 +18,7 @@
 #include <linux/compat.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/errno.h>
 #include <linux/ptrace.h>
diff --git a/arch/mips/kernel/stacktrace.c b/arch/mips/kernel/stacktrace.c
index 986f910961d9dc..7c7c902249f2af 100644
--- a/arch/mips/kernel/stacktrace.c
+++ b/arch/mips/kernel/stacktrace.c
@@ -5,6 +5,7 @@
  */
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task_stack.h>
 #include <linux/stacktrace.h>
 #include <linux/export.h>
 #include <asm/stacktrace.h>
diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c
index c86ddbaa4598cd..f1d17ece41819e 100644
--- a/arch/mips/kernel/syscall.c
+++ b/arch/mips/kernel/syscall.c
@@ -26,6 +26,7 @@
 #include <linux/uaccess.h>
 #include <linux/slab.h>
 #include <linux/elf.h>
+#include <linux/sched/task_stack.h>
 
 #include <asm/asm.h>
 #include <asm/branch.h>
diff --git a/arch/mips/loongson64/loongson-3/smp.c b/arch/mips/loongson64/loongson-3/smp.c
index 66ede5b11355d5..64659fc7394053 100644
--- a/arch/mips/loongson64/loongson-3/smp.c
+++ b/arch/mips/loongson64/loongson-3/smp.c
@@ -18,6 +18,7 @@
 #include <linux/cpu.h>
 #include <linux/sched.h>
 #include <linux/sched/hotplug.h>
+#include <linux/sched/task_stack.h>
 #include <linux/smp.h>
 #include <linux/cpufreq.h>
 #include <asm/processor.h>
diff --git a/arch/mips/paravirt/paravirt-smp.c b/arch/mips/paravirt/paravirt-smp.c
index f8d3e081b2ebc7..72eb1a56c64508 100644
--- a/arch/mips/paravirt/paravirt-smp.c
+++ b/arch/mips/paravirt/paravirt-smp.c
@@ -10,6 +10,7 @@
 #include <linux/cpumask.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 
 #include <asm/mipsregs.h>
 #include <asm/setup.h>
diff --git a/arch/mips/sibyte/bcm1480/smp.c b/arch/mips/sibyte/bcm1480/smp.c
index 4c71aea2566372..d0e94ffcc1b8b8 100644
--- a/arch/mips/sibyte/bcm1480/smp.c
+++ b/arch/mips/sibyte/bcm1480/smp.c
@@ -21,6 +21,7 @@
 #include <linux/smp.h>
 #include <linux/kernel_stat.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 
 #include <asm/mmu_context.h>
 #include <asm/io.h>
diff --git a/arch/mn10300/kernel/process.c b/arch/mn10300/kernel/process.c
index 8ca7a9de09a55d..c9fa42619c6a9a 100644
--- a/arch/mn10300/kernel/process.c
+++ b/arch/mn10300/kernel/process.c
@@ -13,6 +13,7 @@
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
diff --git a/arch/mn10300/kernel/ptrace.c b/arch/mn10300/kernel/ptrace.c
index 976020f469c1d3..8009876a7ac4e7 100644
--- a/arch/mn10300/kernel/ptrace.c
+++ b/arch/mn10300/kernel/ptrace.c
@@ -11,6 +11,7 @@
  */
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/errno.h>
diff --git a/arch/nios2/kernel/process.c b/arch/nios2/kernel/process.c
index 5ee1ddf6a924c3..869a4d59de324d 100644
--- a/arch/nios2/kernel/process.c
+++ b/arch/nios2/kernel/process.c
@@ -16,6 +16,7 @@
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/tick.h>
 #include <linux/uaccess.h>
 
diff --git a/arch/nios2/kernel/ptrace.c b/arch/nios2/kernel/ptrace.c
index 681dda92eff161..de97bcb7dd4437 100644
--- a/arch/nios2/kernel/ptrace.c
+++ b/arch/nios2/kernel/ptrace.c
@@ -14,6 +14,7 @@
 #include <linux/ptrace.h>
 #include <linux/regset.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/tracehook.h>
 #include <linux/uaccess.h>
 #include <linux/user.h>
diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c
index 0c5ad3bde0f5c5..828a29110459e8 100644
--- a/arch/openrisc/kernel/process.c
+++ b/arch/openrisc/kernel/process.c
@@ -24,6 +24,7 @@
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/mm.h>
diff --git a/arch/openrisc/kernel/ptrace.c b/arch/openrisc/kernel/ptrace.c
index 228288887d74fa..eb97a8e7c8aa79 100644
--- a/arch/openrisc/kernel/ptrace.c
+++ b/arch/openrisc/kernel/ptrace.c
@@ -18,6 +18,7 @@
 
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/string.h>
 
 #include <linux/mm.h>
diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index e5f97239cc5ed6..06f7ca7fe70b61 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -45,6 +45,7 @@
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/slab.h>
 #include <linux/stddef.h>
 #include <linux/unistd.h>
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index b99b12656f6fe7..d645da302bf22f 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -18,6 +18,7 @@
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 8dc7586589727d..51def8a515be81 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -17,6 +17,7 @@
 
 #include <linux/signal.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/string.h>
diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c
index d24a8a3668fac5..cbd82fde57702e 100644
--- a/arch/powerpc/perf/perf_regs.c
+++ b/arch/powerpc/perf/perf_regs.c
@@ -10,6 +10,7 @@
 #include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/perf_event.h>
 #include <linux/bug.h>
 #include <linux/stddef.h>
diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h
index 352f7bdaf11fc1..0ddd37e6c29d90 100644
--- a/arch/s390/include/asm/compat.h
+++ b/arch/s390/include/asm/compat.h
@@ -5,6 +5,7 @@
  */
 #include <linux/types.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/thread_info.h>
 
 #define __TYPE_IS_PTR(t) (!__builtin_types_compatible_p(typeof(0?(t)0:0ULL), u64))
diff --git a/arch/s390/include/asm/kprobes.h b/arch/s390/include/asm/kprobes.h
index 84c0f908648366..1293c4066cfc80 100644
--- a/arch/s390/include/asm/kprobes.h
+++ b/arch/s390/include/asm/kprobes.h
@@ -35,6 +35,7 @@
 #include <linux/types.h>
 #include <linux/ptrace.h>
 #include <linux/percpu.h>
+#include <linux/sched/task_stack.h>
 
 #define __ARCH_WANT_KPROBES_INSN_SLOT
 
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
index 362350cc485c42..c620049c61f2df 100644
--- a/arch/s390/kernel/compat_signal.c
+++ b/arch/s390/kernel/compat_signal.c
@@ -10,6 +10,7 @@
 
 #include <linux/compat.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/kernel.h>
diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c
index 72c584d62f593d..829e1c53005c57 100644
--- a/arch/s390/kernel/dumpstack.c
+++ b/arch/s390/kernel/dumpstack.c
@@ -15,6 +15,7 @@
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task_stack.h>
 #include <asm/processor.h>
 #include <asm/debug.h>
 #include <asm/dis.h>
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 250c41f0572100..20cd339e11aefc 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -13,6 +13,7 @@
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/elfcore.h>
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 12020b55887bfd..c14df0a1ec3ca5 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -8,6 +8,7 @@
 
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/errno.h>
diff --git a/arch/s390/kernel/runtime_instr.c b/arch/s390/kernel/runtime_instr.c
index fffa0e5462afe0..429d3a782f1cb8 100644
--- a/arch/s390/kernel/runtime_instr.c
+++ b/arch/s390/kernel/runtime_instr.c
@@ -11,6 +11,8 @@
 #include <linux/init.h>
 #include <linux/errno.h>
 #include <linux/kernel_stat.h>
+#include <linux/sched/task_stack.h>
+
 #include <asm/runtime_instr.h>
 #include <asm/cpu_mf.h>
 #include <asm/irq.h>
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index 62a4c263e8878e..289dd50f974452 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -10,6 +10,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/kernel.h>
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index b5766e03cdcbc6..47a973b5b4f184 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -32,6 +32,7 @@
 #include <linux/cpu.h>
 #include <linux/slab.h>
 #include <linux/sched/hotplug.h>
+#include <linux/sched/task_stack.h>
 #include <linux/crash_dump.h>
 #include <linux/memblock.h>
 #include <asm/asm-offsets.h>
diff --git a/arch/s390/kernel/uprobes.c b/arch/s390/kernel/uprobes.c
index 66956c09d5bf92..314e0ee3016a34 100644
--- a/arch/s390/kernel/uprobes.c
+++ b/arch/s390/kernel/uprobes.c
@@ -9,6 +9,8 @@
 #include <linux/uprobes.h>
 #include <linux/compat.h>
 #include <linux/kdebug.h>
+#include <linux/sched/task_stack.h>
+
 #include <asm/switch_to.h>
 #include <asm/facility.h>
 #include <asm/kprobes.h>
diff --git a/arch/score/kernel/process.c b/arch/score/kernel/process.c
index 32924159d8c9ed..eb64d7a677cb95 100644
--- a/arch/score/kernel/process.c
+++ b/arch/score/kernel/process.c
@@ -29,6 +29,7 @@
 #include <linux/pm.h>
 #include <linux/rcupdate.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 
 void (*pm_power_off)(void);
 EXPORT_SYMBOL(pm_power_off);
diff --git a/arch/score/kernel/ptrace.c b/arch/score/kernel/ptrace.c
index 8b75e54816c19f..d8455e60bce06c 100644
--- a/arch/score/kernel/ptrace.c
+++ b/arch/score/kernel/ptrace.c
@@ -28,6 +28,7 @@
 #include <linux/mm.h>
 #include <linux/ptrace.h>
 #include <linux/regset.h>
+#include <linux/sched/task_stack.h>
 
 #include <linux/uaccess.h>
 
diff --git a/arch/sh/kernel/cpu/fpu.c b/arch/sh/kernel/cpu/fpu.c
index b76370a47bf95a..547c7347845983 100644
--- a/arch/sh/kernel/cpu/fpu.c
+++ b/arch/sh/kernel/cpu/fpu.c
@@ -1,5 +1,6 @@
 #include <linux/sched/signal.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/slab.h>
 #include <asm/processor.h>
 #include <asm/fpu.h>
diff --git a/arch/sh/kernel/dumpstack.c b/arch/sh/kernel/dumpstack.c
index d00cab2f50f9a9..b564b1eae4aee6 100644
--- a/arch/sh/kernel/dumpstack.c
+++ b/arch/sh/kernel/dumpstack.c
@@ -12,6 +12,7 @@
 #include <linux/ftrace.h>
 #include <linux/debug_locks.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task_stack.h>
 #include <linux/kdebug.h>
 #include <linux/export.h>
 #include <linux/uaccess.h>
diff --git a/arch/sh/kernel/kgdb.c b/arch/sh/kernel/kgdb.c
index adad46e41a1d93..4f04c6638a4d28 100644
--- a/arch/sh/kernel/kgdb.c
+++ b/arch/sh/kernel/kgdb.c
@@ -14,6 +14,8 @@
 #include <linux/irq.h>
 #include <linux/io.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
+
 #include <asm/cacheflush.h>
 #include <asm/traps.h>
 
diff --git a/arch/sh/kernel/process.c b/arch/sh/kernel/process.c
index 80a61c5e301589..f8a695a223dd36 100644
--- a/arch/sh/kernel/process.c
+++ b/arch/sh/kernel/process.c
@@ -2,6 +2,7 @@
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/task_stack.h>
 #include <linux/export.h>
 #include <linux/stackprotector.h>
 #include <asm/fpu.h>
diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c
index 493c3eb86aa502..2c7bdf8cb93426 100644
--- a/arch/sh/kernel/process_32.c
+++ b/arch/sh/kernel/process_32.c
@@ -17,6 +17,7 @@
 #include <linux/mm.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/slab.h>
 #include <linux/elfcore.h>
 #include <linux/kallsyms.h>
diff --git a/arch/sh/kernel/process_64.c b/arch/sh/kernel/process_64.c
index 056607185158b0..ee2abe96f9f3a5 100644
--- a/arch/sh/kernel/process_64.c
+++ b/arch/sh/kernel/process_64.c
@@ -27,6 +27,7 @@
 #include <linux/io.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <asm/syscalls.h>
 #include <linux/uaccess.h>
 #include <asm/pgtable.h>
diff --git a/arch/sh/kernel/ptrace_32.c b/arch/sh/kernel/ptrace_32.c
index 1aabfd356b35ff..5fc3ff606210c8 100644
--- a/arch/sh/kernel/ptrace_32.c
+++ b/arch/sh/kernel/ptrace_32.c
@@ -12,6 +12,7 @@
  */
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/errno.h>
diff --git a/arch/sh/kernel/ptrace_64.c b/arch/sh/kernel/ptrace_64.c
index c49d0d05a21517..1e0656d9e7afa6 100644
--- a/arch/sh/kernel/ptrace_64.c
+++ b/arch/sh/kernel/ptrace_64.c
@@ -18,6 +18,7 @@
 #include <linux/kernel.h>
 #include <linux/rwsem.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/bitops.h>
diff --git a/arch/sh/kernel/signal_32.c b/arch/sh/kernel/signal_32.c
index 5128d3001ee588..08bce11badc6a8 100644
--- a/arch/sh/kernel/signal_32.c
+++ b/arch/sh/kernel/signal_32.c
@@ -9,6 +9,7 @@
  *
  */
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/kernel.h>
diff --git a/arch/sh/kernel/sys_sh32.c b/arch/sh/kernel/sys_sh32.c
index d5287d76809c02..a2e1231a90a306 100644
--- a/arch/sh/kernel/sys_sh32.c
+++ b/arch/sh/kernel/sys_sh32.c
@@ -1,5 +1,6 @@
 #include <linux/errno.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/sem.h>
diff --git a/arch/sh/kernel/traps.c b/arch/sh/kernel/traps.c
index bc5c9f347b9e9f..b32d1c3a4655d3 100644
--- a/arch/sh/kernel/traps.c
+++ b/arch/sh/kernel/traps.c
@@ -5,6 +5,7 @@
 #include <linux/signal.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task_stack.h>
 #include <linux/uaccess.h>
 #include <linux/hardirq.h>
 #include <linux/kernel.h>
diff --git a/arch/sh/kernel/traps_32.c b/arch/sh/kernel/traps_32.c
index ff639342a8bef9..57cff00cad1780 100644
--- a/arch/sh/kernel/traps_32.c
+++ b/arch/sh/kernel/traps_32.c
@@ -25,6 +25,8 @@
 #include <linux/sysfs.h>
 #include <linux/uaccess.h>
 #include <linux/perf_event.h>
+#include <linux/sched/task_stack.h>
+
 #include <asm/alignment.h>
 #include <asm/fpu.h>
 #include <asm/kprobes.h>
diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c
index ed87c45a785b26..b6dac8e980f071 100644
--- a/arch/sparc/kernel/process_32.c
+++ b/arch/sparc/kernel/process_32.c
@@ -16,6 +16,7 @@
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/stddef.h>
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index 4c82a73af893b3..1badc493e62ee7 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -16,6 +16,7 @@
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/fs.h>
diff --git a/arch/sparc/kernel/ptrace_64.c b/arch/sparc/kernel/ptrace_64.c
index 901063c1cf7eed..df9e731a76f51b 100644
--- a/arch/sparc/kernel/ptrace_64.c
+++ b/arch/sparc/kernel/ptrace_64.c
@@ -12,6 +12,7 @@
 
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/errno.h>
 #include <linux/export.h>
diff --git a/arch/tile/kernel/compat_signal.c b/arch/tile/kernel/compat_signal.c
index c667e104a0c251..0e863f1ee08c04 100644
--- a/arch/tile/kernel/compat_signal.c
+++ b/arch/tile/kernel/compat_signal.c
@@ -13,6 +13,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/kernel.h>
diff --git a/arch/tile/kernel/kgdb.c b/arch/tile/kernel/kgdb.c
index 9247d6b562f494..d4eb5fb2df9d79 100644
--- a/arch/tile/kernel/kgdb.c
+++ b/arch/tile/kernel/kgdb.c
@@ -19,6 +19,8 @@
 #include <linux/kdebug.h>
 #include <linux/uaccess.h>
 #include <linux/module.h>
+#include <linux/sched/task_stack.h>
+
 #include <asm/cacheflush.h>
 
 static tile_bundle_bits singlestep_insn = TILEGX_BPT_BUNDLE | DIE_SSTEPBP;
diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c
index dbb4fa76d1dda6..f0a0e18e4dfbb8 100644
--- a/arch/tile/kernel/process.c
+++ b/arch/tile/kernel/process.c
@@ -15,6 +15,7 @@
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/preempt.h>
 #include <linux/module.h>
 #include <linux/fs.h>
diff --git a/arch/tile/kernel/ptrace.c b/arch/tile/kernel/ptrace.c
index e279572824b15e..e1a078e6828e59 100644
--- a/arch/tile/kernel/ptrace.c
+++ b/arch/tile/kernel/ptrace.c
@@ -23,6 +23,8 @@
 #include <linux/elf.h>
 #include <linux/tracehook.h>
 #include <linux/context_tracking.h>
+#include <linux/sched/task_stack.h>
+
 #include <asm/traps.h>
 #include <arch/chip.h>
 
diff --git a/arch/tile/kernel/signal.c b/arch/tile/kernel/signal.c
index 8cc92ae6eb2750..f2bf557bb005f9 100644
--- a/arch/tile/kernel/signal.c
+++ b/arch/tile/kernel/signal.c
@@ -15,6 +15,7 @@
 
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/kernel.h>
diff --git a/arch/tile/kernel/stack.c b/arch/tile/kernel/stack.c
index a1c0787fe9d85c..94ecbc6676e5d8 100644
--- a/arch/tile/kernel/stack.c
+++ b/arch/tile/kernel/stack.c
@@ -14,6 +14,7 @@
 
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task_stack.h>
 #include <linux/kernel.h>
 #include <linux/kprobes.h>
 #include <linux/module.h>
diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c
index 2df4e5d11939ae..31968677a0d079 100644
--- a/arch/um/kernel/exec.c
+++ b/arch/um/kernel/exec.c
@@ -9,6 +9,7 @@
 #include <linux/ptrace.h>
 #include <linux/sched.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/slab.h>
 #include <asm/current.h>
 #include <asm/processor.h>
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index e76dc7c112519d..a9bd618200429c 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -19,6 +19,7 @@
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/seq_file.h>
 #include <linux/tick.h>
 #include <linux/threads.h>
diff --git a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c
index 527fa5881915ce..ddecf326dab28d 100644
--- a/arch/um/kernel/skas/process.c
+++ b/arch/um/kernel/skas/process.c
@@ -5,6 +5,7 @@
 
 #include <linux/init.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <as-layout.h>
 #include <kern.h>
 #include <os.h>
diff --git a/arch/unicore32/kernel/process.c b/arch/unicore32/kernel/process.c
index d58f1600b47799..d22c1dc7e39e9b 100644
--- a/arch/unicore32/kernel/process.c
+++ b/arch/unicore32/kernel/process.c
@@ -15,6 +15,7 @@
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/stddef.h>
diff --git a/arch/unicore32/kernel/ptrace.c b/arch/unicore32/kernel/ptrace.c
index 9f07c08da050d8..a102c2b4f35804 100644
--- a/arch/unicore32/kernel/ptrace.c
+++ b/arch/unicore32/kernel/ptrace.c
@@ -15,6 +15,7 @@
 #include <linux/ptrace.h>
 #include <linux/signal.h>
 #include <linux/uaccess.h>
+#include <linux/sched/task_stack.h>
 
 /*
  * this routine will get a word off of the processes privileged stack.
diff --git a/arch/unicore32/kernel/traps.c b/arch/unicore32/kernel/traps.c
index 506e1a2127c6ce..5f25b39f04d430 100644
--- a/arch/unicore32/kernel/traps.c
+++ b/arch/unicore32/kernel/traps.c
@@ -16,6 +16,7 @@
 #include <linux/signal.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task_stack.h>
 #include <linux/spinlock.h>
 #include <linux/personality.h>
 #include <linux/kallsyms.h>
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index b83c61cfd1546d..370c42c7f04683 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -9,6 +9,7 @@
 
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/errno.h>
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 572cee3fccffc5..226ca70dc6bd43 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -7,6 +7,7 @@
 #include <linux/mm.h>
 #include <linux/err.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/random.h>
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index 7c0a711989d2c0..8d0879f1d42cad 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -26,6 +26,7 @@
 #include <linux/init.h>
 #include <linux/jiffies.h>
 #include <linux/perf_event.h>
+#include <linux/sched/task_stack.h>
 
 #include <linux/uaccess.h>
 #include <asm/pgalloc.h>
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 95c0b4ae09b010..724153797209e9 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -9,6 +9,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/kernel.h>
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 7b9e7e68f316c2..09d4ac0d2661fd 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -11,6 +11,7 @@
 #include <linux/module.h>
 #include <linux/ptrace.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task_stack.h>
 #include <linux/ftrace.h>
 #include <linux/kexec.h>
 #include <linux/bug.h>
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index c114b132d12178..b188b16841e376 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -5,6 +5,7 @@
 #include <asm/fpu/signal.h>
 #include <asm/fpu/regset.h>
 #include <asm/fpu/xstate.h>
+#include <linux/sched/task_stack.h>
 
 /*
  * The xstateregs_active() routine is the same as the regset_fpregs_active() routine,
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index b01bc851745048..ca49bab3e4677c 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -4,6 +4,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/kernel.h>
 #include <linux/capability.h>
 #include <linux/errno.h>
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 6b0678a541e2c2..3be74fbdeff27f 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -15,6 +15,7 @@
 #include <linux/ftrace.h>
 #include <linux/uaccess.h>
 #include <linux/smp.h>
+#include <linux/sched/task_stack.h>
 #include <asm/io_apic.h>
 #include <asm/apic.h>
 
diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c
index da8cb987b97312..587d887f7f17b2 100644
--- a/arch/x86/kernel/perf_regs.c
+++ b/arch/x86/kernel/perf_regs.c
@@ -1,6 +1,7 @@
 #include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/perf_event.h>
 #include <linux/bug.h>
 #include <linux/stddef.h>
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 441f00e7be8f48..56b059486c3b32 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -10,6 +10,7 @@
 #include <linux/sched/idle.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/init.h>
 #include <linux/export.h>
 #include <linux/pm.h>
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index d79ffa47aab84f..4c818f8bc1352b 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -13,6 +13,7 @@
 #include <linux/errno.h>
 #include <linux/sched.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/fs.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 124f5652ae3cd5..d6b784a5520daf 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -18,6 +18,7 @@
 #include <linux/errno.h>
 #include <linux/sched.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/fs.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 9cc7d5a330ef9d..2364b23ea3e52c 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -6,6 +6,7 @@
 
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/errno.h>
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 763af1d0de64d8..396c042e9d0ee5 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -10,6 +10,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/kernel.h>
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index f3eb266d75ff75..bd1f1ad3528420 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -47,6 +47,7 @@
 #include <linux/sched.h>
 #include <linux/sched/topology.h>
 #include <linux/sched/hotplug.h>
+#include <linux/sched/task_stack.h>
 #include <linux/percpu.h>
 #include <linux/bootmem.h>
 #include <linux/err.h>
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 330cae0025d0b7..8e2b79b88e512f 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -5,6 +5,7 @@
  */
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task_stack.h>
 #include <linux/stacktrace.h>
 #include <linux/export.h>
 #include <linux/uaccess.h>
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index a23ce84a3f6ccf..f07f83b3611b6d 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -2,6 +2,7 @@
  * x86 single-step support code, common to 32-bit and 64-bit.
  */
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/ptrace.h>
 #include <asm/desc.h>
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 1dc86ee60a0319..948443e115c147 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -29,6 +29,7 @@
 #include <linux/errno.h>
 #include <linux/kexec.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/timer.h>
 #include <linux/init.h>
 #include <linux/bug.h>
diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c
index 7c0abdc9a732a0..478d15dbaee41b 100644
--- a/arch/x86/kernel/unwind_frame.c
+++ b/arch/x86/kernel/unwind_frame.c
@@ -1,5 +1,6 @@
 #include <linux/sched.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <asm/ptrace.h>
 #include <asm/bitops.h>
 #include <asm/stacktrace.h>
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 0442d98367aec5..23ee89ce59a940 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -35,6 +35,7 @@
 #include <linux/interrupt.h>
 #include <linux/syscalls.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/kernel.h>
 #include <linux/signal.h>
 #include <linux/string.h>
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index e3254ca0eec4ec..428e31763cb93e 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -4,6 +4,7 @@
  *  Copyright (C) 2008-2009, Red Hat Inc., Ingo Molnar
  */
 #include <linux/sched.h>		/* test_thread_flag(), ...	*/
+#include <linux/sched/task_stack.h>	/* task_stack_*(), ...		*/
 #include <linux/kdebug.h>		/* oops_begin/end, ...		*/
 #include <linux/extable.h>		/* search_exception_tables	*/
 #include <linux/bootmem.h>		/* max_low_pfn			*/
diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c
index af33f9d4c624ac..58f96d1230d4d5 100644
--- a/arch/xtensa/kernel/process.c
+++ b/arch/xtensa/kernel/process.c
@@ -19,6 +19,7 @@
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
diff --git a/arch/xtensa/kernel/ptrace.c b/arch/xtensa/kernel/ptrace.c
index 32519b71d914b4..e0f583fed06a40 100644
--- a/arch/xtensa/kernel/ptrace.c
+++ b/arch/xtensa/kernel/ptrace.c
@@ -20,6 +20,7 @@
 #include <linux/perf_event.h>
 #include <linux/ptrace.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/security.h>
 #include <linux/signal.h>
 #include <linux/smp.h>
diff --git a/arch/xtensa/kernel/signal.c b/arch/xtensa/kernel/signal.c
index c41294745731ee..70a13194544384 100644
--- a/arch/xtensa/kernel/signal.c
+++ b/arch/xtensa/kernel/signal.c
@@ -20,6 +20,7 @@
 #include <linux/ptrace.h>
 #include <linux/personality.h>
 #include <linux/tracehook.h>
+#include <linux/sched/task_stack.h>
 
 #include <asm/ucontext.h>
 #include <linux/uaccess.h>
diff --git a/arch/xtensa/kernel/smp.c b/arch/xtensa/kernel/smp.c
index f2b3e1725349c7..fd894eaa63f3ad 100644
--- a/arch/xtensa/kernel/smp.c
+++ b/arch/xtensa/kernel/smp.c
@@ -22,6 +22,7 @@
 #include <linux/kdebug.h>
 #include <linux/module.h>
 #include <linux/sched/hotplug.h>
+#include <linux/sched/task_stack.h>
 #include <linux/reboot.h>
 #include <linux/seq_file.h>
 #include <linux/smp.h>
diff --git a/block/blk-map.c b/block/blk-map.c
index 2f18c2a0be1b22..3b5cb863318f31 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -2,6 +2,7 @@
  * Functions related to mapping data to requests
  */
 #include <linux/kernel.h>
+#include <linux/sched/task_stack.h>
 #include <linux/module.h>
 #include <linux/bio.h>
 #include <linux/blkdev.h>
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index f7f6b9144b07c0..da6b59ba594039 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -34,6 +34,8 @@
 #include <linux/kernel_stat.h>
 #include <linux/clockchips.h>
 #include <linux/cpu.h>
+#include <linux/sched/task_stack.h>
+
 #include <asm/hyperv.h>
 #include <asm/hypervisor.h>
 #include <asm/mshyperv.h>
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index aef00511ca8646..74f1b7dc03f73c 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -28,6 +28,7 @@
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
+#include <linux/sched/task_stack.h>
 #include <linux/delay.h>
 #include <linux/timer.h>
 #include <linux/seq_file.h>
diff --git a/drivers/md/bcache/closure.h b/drivers/md/bcache/closure.h
index 9b2fe2d3e3a941..1ec84ca8114674 100644
--- a/drivers/md/bcache/closure.h
+++ b/drivers/md/bcache/closure.h
@@ -3,6 +3,7 @@
 
 #include <linux/llist.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/workqueue.h>
 
 /*
diff --git a/drivers/misc/lkdtm_usercopy.c b/drivers/misc/lkdtm_usercopy.c
index 1dd611423d8be4..df6ac985fbb586 100644
--- a/drivers/misc/lkdtm_usercopy.c
+++ b/drivers/misc/lkdtm_usercopy.c
@@ -5,6 +5,7 @@
 #include "lkdtm.h"
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mman.h>
 #include <linux/uaccess.h>
 #include <asm/cacheflush.h>
diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
index 6c062b8251d238..d52139635b67c6 100644
--- a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
+++ b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
@@ -20,6 +20,7 @@
  */
 #include <linux/clk.h>
 #include <linux/slab.h>
+#include <linux/sched/task_stack.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/mtd/partitions.h>
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index 2a59139f520b96..9be82c4e14a409 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -25,6 +25,7 @@
 #include <linux/init.h>
 #include <linux/coredump.h>
 #include <linux/slab.h>
+#include <linux/sched/task_stack.h>
 
 #include <linux/uaccess.h>
 #include <asm/cacheflush.h>
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 2c95257fa4da89..92c00a13b28bd6 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -36,6 +36,7 @@
 #include <linux/coredump.h>
 #include <linux/sched.h>
 #include <linux/sched/coredump.h>
+#include <linux/sched/task_stack.h>
 #include <linux/cred.h>
 #include <linux/dax.h>
 #include <linux/uaccess.h>
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 222873bb689cc7..6103a8149ccd33 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -15,6 +15,8 @@
 #include <linux/fs.h>
 #include <linux/stat.h>
 #include <linux/sched.h>
+#include <linux/sched/coredump.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/mman.h>
 #include <linux/errno.h>
@@ -35,7 +37,6 @@
 #include <linux/elf-fdpic.h>
 #include <linux/elfcore.h>
 #include <linux/coredump.h>
-#include <linux/sched/coredump.h>
 #include <linux/dax.h>
 
 #include <linux/uaccess.h>
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 9b2917a3029406..2edcefc0a2949a 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -19,6 +19,7 @@
 
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/mman.h>
 #include <linux/errno.h>
diff --git a/fs/coredump.c b/fs/coredump.c
index c74ab43b83838c..592683711c6432 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -18,6 +18,7 @@
 #include <linux/coredump.h>
 #include <linux/sched/coredump.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/task_stack.h>
 #include <linux/utsname.h>
 #include <linux/pid_namespace.h>
 #include <linux/module.h>
diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h
index 698d51a0eea3f3..c8240a12c42d98 100644
--- a/include/linux/elfcore.h
+++ b/include/linux/elfcore.h
@@ -3,6 +3,8 @@
 
 #include <linux/user.h>
 #include <linux/bug.h>
+#include <linux/sched/task_stack.h>
+
 #include <asm/elf.h>
 #include <uapi/linux/elfcore.h>
 
diff --git a/include/linux/perf_regs.h b/include/linux/perf_regs.h
index a5f98d53d7325b..9b7dd59fe28d5f 100644
--- a/include/linux/perf_regs.h
+++ b/include/linux/perf_regs.h
@@ -1,6 +1,8 @@
 #ifndef _LINUX_PERF_REGS_H
 #define _LINUX_PERF_REGS_H
 
+#include <linux/sched/task_stack.h>
+
 struct perf_regs {
 	__u64		abi;
 	struct pt_regs	*regs;
diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h
new file mode 100644
index 00000000000000..933cd49824c234
--- /dev/null
+++ b/include/linux/sched/task_stack.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SCHED_TASK_STACK_H
+#define _LINUX_SCHED_TASK_STACK_H
+
+#include <linux/sched.h>
+
+#endif /* _LINUX_SCHED_TASK_STACK_H */
diff --git a/init/main.c b/init/main.c
index 47956b22add169..c2d337a136d1a5 100644
--- a/init/main.c
+++ b/init/main.c
@@ -77,6 +77,7 @@
 #include <linux/elevator.h>
 #include <linux/sched_clock.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/context_tracking.h>
 #include <linux/random.h>
 #include <linux/list.h>
diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
index e9fdb5203de5c0..c04917cad1bfdc 100644
--- a/kernel/events/callchain.c
+++ b/kernel/events/callchain.c
@@ -11,6 +11,8 @@
 
 #include <linux/perf_event.h>
 #include <linux/slab.h>
+#include <linux/sched/task_stack.h>
+
 #include "internal.h"
 
 struct callchain_cpus_entries {
diff --git a/kernel/exit.c b/kernel/exit.c
index e53408d156df74..771c33fc995250 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -10,6 +10,7 @@
 #include <linux/sched/mm.h>
 #include <linux/sched/stat.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/capability.h>
diff --git a/kernel/fork.c b/kernel/fork.c
index 0af64d5d8b73d1..c87b6f03c710a7 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -19,6 +19,7 @@
 #include <linux/sched/numa_balancing.h>
 #include <linux/sched/stat.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/rtmutex.h>
 #include <linux/init.h>
 #include <linux/unistd.h>
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 3caaaa04b92e78..2984fb0f025742 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -47,6 +47,7 @@
 #include <linux/uio.h>
 #include <linux/sched/clock.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task_stack.h>
 
 #include <linux/uaccess.h>
 #include <asm/sections.h>
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 6b7155ae5c335e..4e2fec3f12a008 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -15,6 +15,7 @@
 #include <linux/sched/debug.h>
 #include <linux/sched/hotplug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 
 #include <linux/u64_stats_sync.h>
 #include <linux/kernel_stat.h>
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index e15185c28de564..65f61077ad50d9 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -18,6 +18,7 @@
 #include <linux/compat.h>
 #include <linux/coredump.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/seccomp.h>
 #include <linux/slab.h>
 #include <linux/syscalls.h>
diff --git a/kernel/signal.c b/kernel/signal.c
index 25d099fd80f786..e6d470a5f75af8 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -17,6 +17,7 @@
 #include <linux/sched/user.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/fs.h>
 #include <linux/tty.h>
 #include <linux/binfmts.h>
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 2a1abbaca10ec9..1d68b5b7ad4133 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -2,6 +2,7 @@
  * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
  *
  */
+#include <linux/sched/task_stack.h>
 #include <linux/stacktrace.h>
 #include <linux/kallsyms.h>
 #include <linux/seq_file.h>
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index 8c28cbd7e104b6..17afb043016133 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -13,6 +13,7 @@
 #include <linux/debugobjects.h>
 #include <linux/interrupt.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/seq_file.h>
 #include <linux/debugfs.h>
 #include <linux/slab.h>
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index 942adf13418d05..b157b46cc9a69c 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -17,6 +17,7 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  */
 
+#include <linux/sched/task_stack.h>
 #include <linux/scatterlist.h>
 #include <linux/dma-mapping.h>
 #include <linux/sched/task.h>
diff --git a/lib/syscall.c b/lib/syscall.c
index 63239e097b13a8..17d5ff5fa6a388 100644
--- a/lib/syscall.c
+++ b/lib/syscall.c
@@ -1,5 +1,6 @@
 #include <linux/ptrace.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/export.h>
 #include <asm/syscall.h>
 
diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c
index d99312ed7c2292..98b27195e38b07 100644
--- a/mm/kasan/kasan.c
+++ b/mm/kasan/kasan.c
@@ -29,6 +29,7 @@
 #include <linux/module.h>
 #include <linux/printk.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/slab.h>
 #include <linux/stacktrace.h>
 #include <linux/string.h>
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index a3970573359e3a..26c874e90b12ef 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -75,6 +75,7 @@
 #include <linux/list.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/jiffies.h>
 #include <linux/delay.h>
 #include <linux/export.h>
diff --git a/mm/util.c b/mm/util.c
index 14f4e13323e236..656dc5e37a8721 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -6,6 +6,7 @@
 #include <linux/err.h>
 #include <linux/sched.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/task_stack.h>
 #include <linux/security.h>
 #include <linux/swap.h>
 #include <linux/swapops.h>

From dfc3401a33086a3fd465468e171ea0e82430569b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 23:15:21 +0100
Subject: [PATCH 0533/1911] sched/headers: Prepare to move the
 'root_task_group' declaration to <linux/sched/autogroup.h>

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/init_task.h | 1 +
 kernel/sched/sched.h      | 1 +
 2 files changed, 2 insertions(+)

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 3a85d61f761422..452c9799318cf0 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -12,6 +12,7 @@
 #include <linux/securebits.h>
 #include <linux/seqlock.h>
 #include <linux/rbtree.h>
+#include <linux/sched/autogroup.h>
 #include <net/net_namespace.h>
 #include <linux/sched/rt.h>
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 4e2fec3f12a008..b1f1c844383739 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1,5 +1,6 @@
 
 #include <linux/sched.h>
+#include <linux/sched/autogroup.h>
 #include <linux/sched/sysctl.h>
 #include <linux/sched/topology.h>
 #include <linux/sched/rt.h>

From e6d930b4e0115eb0732eec0efb11c3b09a8000fc Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 23:43:50 +0100
Subject: [PATCH 0534/1911] signals: Prepare to split out
 <linux/signal_types.h> from <linux/signal.h>

Introduce dummy header and add dependencies to places that will depend on it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/mips/include/asm/abi.h  | 2 ++
 include/linux/signal.h       | 3 +--
 include/linux/signal_types.h | 7 +++++++
 3 files changed, 10 insertions(+), 2 deletions(-)
 create mode 100644 include/linux/signal_types.h

diff --git a/arch/mips/include/asm/abi.h b/arch/mips/include/asm/abi.h
index 940760844e2fe9..dba7f4b6bebfae 100644
--- a/arch/mips/include/asm/abi.h
+++ b/arch/mips/include/asm/abi.h
@@ -9,6 +9,8 @@
 #ifndef _ASM_ABI_H
 #define _ASM_ABI_H
 
+#include <linux/signal_types.h>
+
 #include <asm/signal.h>
 #include <asm/siginfo.h>
 #include <asm/vdso.h>
diff --git a/include/linux/signal.h b/include/linux/signal.h
index 5308304993bea5..d1c2b05a7a556c 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -1,9 +1,8 @@
 #ifndef _LINUX_SIGNAL_H
 #define _LINUX_SIGNAL_H
 
-#include <linux/list.h>
 #include <linux/bug.h>
-#include <uapi/linux/signal.h>
+#include <linux/signal_types.h>
 
 struct task_struct;
 
diff --git a/include/linux/signal_types.h b/include/linux/signal_types.h
new file mode 100644
index 00000000000000..28799c88f490fe
--- /dev/null
+++ b/include/linux/signal_types.h
@@ -0,0 +1,7 @@
+#ifndef _LINUX_SIGNAL_TYPES_H
+#define _LINUX_SIGNAL_TYPES_H
+
+#include <linux/list.h>
+#include <uapi/linux/signal.h>
+
+#endif /* _LINUX_SIGNAL_TYPES_H */

From f361bf4a66c9bfabace46f6ff5d97005c9b524fe Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 23:47:37 +0100
Subject: [PATCH 0535/1911] sched/headers: Prepare for the reduction of
 <linux/sched.h>'s signal API dependency

Instead of including the full <linux/signal.h>, we are going to include the
types-only <linux/signal_types.h> header in <linux/sched.h>, to further
decouple the scheduler header from the signal headers.

This means that various files which relied on the full <linux/signal.h> need
to be updated to gain an explicit dependency on it.

Update the code that relies on sched.h's inclusion of the <linux/signal.h> header.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/arm64/kernel/sys_compat.c          | 1 +
 drivers/gpu/drm/i915/i915_gem_request.c | 1 +
 drivers/isdn/mISDN/stack.c              | 2 ++
 fs/btrfs/extent-tree.c                  | 1 +
 fs/btrfs/free-space-cache.c             | 1 +
 fs/buffer.c                             | 1 +
 fs/ceph/addr.c                          | 1 +
 fs/dax.c                                | 1 +
 fs/ioctl.c                              | 2 ++
 fs/iomap.c                              | 2 ++
 fs/ocfs2/super.c                        | 1 +
 include/linux/sched.h                   | 1 +
 include/linux/sched/signal.h            | 1 +
 kernel/pid_namespace.c                  | 1 +
 mm/page-writeback.c                     | 1 +
 15 files changed, 18 insertions(+)

diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c
index abaf582fc7a8eb..8b8bbd3eaa52cc 100644
--- a/arch/arm64/kernel/sys_compat.c
+++ b/arch/arm64/kernel/sys_compat.c
@@ -21,6 +21,7 @@
 #include <linux/compat.h>
 #include <linux/personality.h>
 #include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <linux/syscalls.h>
 #include <linux/uaccess.h>
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index df3fef393dbe00..e7c3c0318ff60f 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -26,6 +26,7 @@
 #include <linux/dma-fence-array.h>
 #include <linux/sched.h>
 #include <linux/sched/clock.h>
+#include <linux/sched/signal.h>
 
 #include "i915_drv.h"
 
diff --git a/drivers/isdn/mISDN/stack.c b/drivers/isdn/mISDN/stack.c
index b324474c0c12ee..696f22fd5ab478 100644
--- a/drivers/isdn/mISDN/stack.c
+++ b/drivers/isdn/mISDN/stack.c
@@ -19,6 +19,8 @@
 #include <linux/mISDNif.h>
 #include <linux/kthread.h>
 #include <linux/sched.h>
+#include <linux/signal.h>
+
 #include "core.h"
 
 static u_int	*debug;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index c35b966335543c..dad395b0b9fc8e 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -16,6 +16,7 @@
  * Boston, MA 021110-1307, USA.
  */
 #include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/pagemap.h>
 #include <linux/writeback.h>
 #include <linux/blkdev.h>
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 1a131f7d6c1bed..493a654b60127b 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -18,6 +18,7 @@
 
 #include <linux/pagemap.h>
 #include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <linux/math64.h>
 #include <linux/ratelimit.h>
diff --git a/fs/buffer.c b/fs/buffer.c
index 28484b3ebc98c7..9196f2a270daac 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -19,6 +19,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/sched/signal.h>
 #include <linux/syscalls.h>
 #include <linux/fs.h>
 #include <linux/iomap.h>
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index f297a9e1864293..1a3e1b40799a08 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -8,6 +8,7 @@
 #include <linux/slab.h>
 #include <linux/pagevec.h>
 #include <linux/task_io_accounting_ops.h>
+#include <linux/signal.h>
 
 #include "super.h"
 #include "mds_client.h"
diff --git a/fs/dax.c b/fs/dax.c
index 7436c98b92c82d..de622d4282a650 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -27,6 +27,7 @@
 #include <linux/pagevec.h>
 #include <linux/pmem.h>
 #include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/uio.h>
 #include <linux/vmstat.h>
 #include <linux/pfn_t.h>
diff --git a/fs/ioctl.c b/fs/ioctl.c
index cb9b02940805cb..569db68d02b317 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -15,6 +15,8 @@
 #include <linux/writeback.h>
 #include <linux/buffer_head.h>
 #include <linux/falloc.h>
+#include <linux/sched/signal.h>
+
 #include "internal.h"
 
 #include <asm/ioctls.h>
diff --git a/fs/iomap.c b/fs/iomap.c
index 0f85f24106054a..3ca1a8e44135ed 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -26,6 +26,8 @@
 #include <linux/buffer_head.h>
 #include <linux/task_io_accounting_ops.h>
 #include <linux/dax.h>
+#include <linux/sched/signal.h>
+
 #include "internal.h"
 
 /*
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index a24e42f953418b..ca1646fbcaefe7 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -42,6 +42,7 @@
 #include <linux/seq_file.h>
 #include <linux/quotaops.h>
 #include <linux/cleancache.h>
+#include <linux/signal.h>
 
 #define CREATE_TRACE_POINTS
 #include "ocfs2_trace.h"
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8ae7b3d85658c7..ea05116bc3c2b0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -36,6 +36,7 @@ struct sched_param {
 #include <linux/signal.h>
 #include <linux/compiler.h>
 #include <linux/completion.h>
+#include <linux/signal_types.h>
 #include <linux/pid.h>
 #include <linux/percpu.h>
 #include <linux/topology.h>
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 0f4e9f4a43fd78..7e10a78245234a 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -1,6 +1,7 @@
 #ifndef _LINUX_SCHED_SIGNAL_H
 #define _LINUX_SCHED_SIGNAL_H
 
+#include <linux/signal.h>
 #include <linux/cred.h>
 #include <linux/sched.h>
 #include <linux/sched/jobctl.h>
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 7c8367854dc41b..de461aa0bf9acc 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -20,6 +20,7 @@
 #include <linux/reboot.h>
 #include <linux/export.h>
 #include <linux/sched/task.h>
+#include <linux/sched/signal.h>
 
 struct pid_cache {
 	int nr_ids;
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 26a60818a8fcf7..d8ac2a7fb9e7b6 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -36,6 +36,7 @@
 #include <linux/pagevec.h>
 #include <linux/timer.h>
 #include <linux/sched/rt.h>
+#include <linux/sched/signal.h>
 #include <linux/mm_inline.h>
 #include <trace/events/writeback.h>
 

From 2e58f173ab89b29a1373088b8727133dbf7322b0 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 4 Feb 2017 00:12:19 +0100
Subject: [PATCH 0536/1911] mm/headers, sched/headers: Prepare to split
 <linux/mm_types_task.h> out of <linux/mm_types.h>

We are going to separate all the MM types that are embedded directly in 'struct task_struct'
into the new <linux/mm_types_task.h> header.

Create a new <linux/mm_types_task.h> that only contains some includes from mm_types.h itself.

This should be trivially correct and easy to bisect to.

(This patch does not materially move the types yet.)

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/mm_types.h      |  6 +++---
 include/linux/mm_types_task.h | 10 ++++++++++
 2 files changed, 13 insertions(+), 3 deletions(-)
 create mode 100644 include/linux/mm_types_task.h

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 137797cd7b5052..6daaf0a519683a 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -1,9 +1,9 @@
 #ifndef _LINUX_MM_TYPES_H
 #define _LINUX_MM_TYPES_H
 
+#include <linux/mm_types_task.h>
+
 #include <linux/auxvec.h>
-#include <linux/types.h>
-#include <linux/threads.h>
 #include <linux/list.h>
 #include <linux/spinlock.h>
 #include <linux/rbtree.h>
@@ -13,7 +13,7 @@
 #include <linux/uprobes.h>
 #include <linux/page-flags-layout.h>
 #include <linux/workqueue.h>
-#include <asm/page.h>
+
 #include <asm/mmu.h>
 
 #ifndef AT_VECTOR_SIZE_ARCH
diff --git a/include/linux/mm_types_task.h b/include/linux/mm_types_task.h
new file mode 100644
index 00000000000000..2419d302f03c0b
--- /dev/null
+++ b/include/linux/mm_types_task.h
@@ -0,0 +1,10 @@
+#ifndef _LINUX_MM_TYPES_TASK_H
+#define _LINUX_MM_TYPES_TASK_H
+
+#include <linux/types.h>
+#include <linux/threads.h>
+#include <linux/atomic.h>
+
+#include <asm/page.h>
+
+#endif /* _LINUX_MM_TYPES_TASK_H */

From 589ee62844e042b0b7d19ef57fb4cff77f3ca294 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 4 Feb 2017 00:16:44 +0100
Subject: [PATCH 0537/1911] sched/headers: Prepare to remove the
 <linux/mm_types.h> dependency from <linux/sched.h>

Update code that relied on sched.h including various MM types for them.

This will allow us to remove the <linux/mm_types.h> include from <linux/sched.h>.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/alpha/include/asm/a.out-core.h              | 1 +
 arch/alpha/include/asm/mmu_context.h             | 2 ++
 arch/arc/mm/tlb.c                                | 2 ++
 arch/arm/include/asm/mmu_context.h               | 2 ++
 arch/arm/include/asm/tlbflush.h                  | 7 ++++---
 arch/arm/kernel/suspend.c                        | 1 +
 arch/arm/kernel/swp_emulate.c                    | 1 +
 arch/arm/mm/idmap.c                              | 1 +
 arch/arm64/include/asm/mmu_context.h             | 1 +
 arch/arm64/kernel/traps.c                        | 1 +
 arch/avr32/include/asm/mmu_context.h             | 2 ++
 arch/blackfin/include/asm/mmu_context.h          | 2 ++
 arch/blackfin/kernel/flat.c                      | 1 +
 arch/blackfin/kernel/process.c                   | 1 +
 arch/blackfin/mm/sram-alloc.c                    | 2 ++
 arch/cris/arch-v10/mm/tlb.c                      | 2 ++
 arch/cris/arch-v32/mm/tlb.c                      | 1 +
 arch/cris/include/asm/pgtable.h                  | 2 +-
 arch/cris/mm/tlb.c                               | 2 ++
 arch/h8300/kernel/traps.c                        | 1 +
 arch/hexagon/include/asm/mmu_context.h           | 2 ++
 arch/hexagon/kernel/smp.c                        | 1 +
 arch/ia64/include/asm/mmu_context.h              | 1 +
 arch/ia64/include/asm/pgtable.h                  | 2 +-
 arch/ia64/sn/kernel/sn2/sn2_smp.c                | 1 +
 arch/m32r/include/asm/mmu_context.h              | 2 ++
 arch/m68k/include/asm/a.out-core.h               | 1 +
 arch/m68k/include/asm/mmu_context.h              | 1 +
 arch/metag/include/asm/mmu_context.h             | 1 +
 arch/microblaze/include/asm/mmu_context_mm.h     | 2 ++
 arch/microblaze/mm/pgtable.c                     | 1 +
 arch/mips/include/asm/elf.h                      | 2 ++
 arch/mips/include/asm/mmu_context.h              | 2 ++
 arch/mips/kernel/smp.c                           | 2 +-
 arch/mips/math-emu/dsemul.c                      | 1 +
 arch/mips/mm/ioremap.c                           | 1 +
 arch/mn10300/include/asm/mmu_context.h           | 2 ++
 arch/mn10300/kernel/smp.c                        | 2 +-
 arch/mn10300/mm/tlb-smp.c                        | 2 +-
 arch/nios2/include/asm/mmu_context.h             | 2 ++
 arch/nios2/kernel/process.c                      | 1 +
 arch/powerpc/kernel/io-workarounds.c             | 2 +-
 arch/powerpc/kvm/e500_mmu_host.c                 | 2 +-
 arch/powerpc/lib/feature-fixups.c                | 1 +
 arch/powerpc/mm/hash_utils_64.c                  | 2 +-
 arch/powerpc/mm/pgtable-book3s64.c               | 2 ++
 arch/powerpc/mm/pgtable-hash64.c                 | 2 ++
 arch/powerpc/mm/pgtable-radix.c                  | 2 +-
 arch/powerpc/mm/slb.c                            | 2 ++
 arch/s390/include/asm/elf.h                      | 2 +-
 arch/s390/include/asm/mmu_context.h              | 1 +
 arch/s390/kernel/processor.c                     | 1 +
 arch/s390/kvm/gaccess.c                          | 2 ++
 arch/s390/kvm/priv.c                             | 2 ++
 arch/score/include/asm/mmu_context.h             | 2 ++
 arch/score/kernel/traps.c                        | 1 +
 arch/sh/include/asm/mmu_context.h                | 2 ++
 arch/sparc/include/asm/mmu_context_64.h          | 2 ++
 arch/sparc/include/asm/pgtable_64.h              | 3 +++
 arch/sparc/kernel/asm-offsets.c                  | 1 +
 arch/sparc/kernel/traps_32.c                     | 1 +
 arch/sparc/mm/tsb.c                              | 2 ++
 arch/tile/include/asm/mmu_context.h              | 2 ++
 arch/um/include/asm/mmu_context.h                | 2 ++
 arch/um/kernel/exec.c                            | 2 +-
 arch/um/kernel/reboot.c                          | 1 +
 arch/um/kernel/skas/process.c                    | 3 ++-
 arch/x86/entry/vsyscall/vsyscall_64.c            | 1 +
 arch/x86/events/core.c                           | 2 +-
 arch/x86/include/asm/a.out-core.h                | 2 ++
 arch/x86/include/asm/mpx.h                       | 2 ++
 arch/x86/mm/mpx.c                                | 1 +
 arch/x86/um/syscalls_64.c                        | 1 +
 arch/x86/xen/mmu.c                               | 2 +-
 arch/xtensa/include/asm/mmu_context.h            | 1 +
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c | 2 ++
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c  | 2 ++
 drivers/infiniband/hw/cxgb3/iwch_provider.c      | 2 +-
 drivers/infiniband/hw/cxgb4/iw_cxgb4.h           | 2 +-
 drivers/media/v4l2-core/videobuf-dma-sg.c        | 2 +-
 fs/binfmt_misc.c                                 | 2 +-
 fs/kernfs/file.c                                 | 2 +-
 include/drm/drm_mm.h                             | 1 +
 include/linux/init_task.h                        | 1 +
 include/linux/sched/mm.h                         | 1 +
 kernel/sched/debug.c                             | 2 +-
 kernel/sched/fair.c                              | 2 +-
 kernel/signal.c                                  | 2 +-
 lib/is_single_threaded.c                         | 1 +
 89 files changed, 125 insertions(+), 25 deletions(-)

diff --git a/arch/alpha/include/asm/a.out-core.h b/arch/alpha/include/asm/a.out-core.h
index 9e33e92e524c61..1610d078b06478 100644
--- a/arch/alpha/include/asm/a.out-core.h
+++ b/arch/alpha/include/asm/a.out-core.h
@@ -15,6 +15,7 @@
 #ifdef __KERNEL__
 
 #include <linux/user.h>
+#include <linux/mm_types.h>
 
 /*
  * Fill in the user structure for an ECOFF core dump.
diff --git a/arch/alpha/include/asm/mmu_context.h b/arch/alpha/include/asm/mmu_context.h
index 4c51c05333c604..384bd47b518717 100644
--- a/arch/alpha/include/asm/mmu_context.h
+++ b/arch/alpha/include/asm/mmu_context.h
@@ -7,6 +7,8 @@
  * Copyright (C) 1996, Linus Torvalds
  */
 
+#include <linux/mm_types.h>
+
 #include <asm/machvec.h>
 #include <asm/compiler.h>
 #include <asm-generic/mm_hooks.h>
diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index bdb295e09160b2..d0126fdfe2d854 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -53,6 +53,8 @@
 
 #include <linux/module.h>
 #include <linux/bug.h>
+#include <linux/mm_types.h>
+
 #include <asm/arcregs.h>
 #include <asm/setup.h>
 #include <asm/mmu_context.h>
diff --git a/arch/arm/include/asm/mmu_context.h b/arch/arm/include/asm/mmu_context.h
index 3cc14dd8587c09..7f303295ef1903 100644
--- a/arch/arm/include/asm/mmu_context.h
+++ b/arch/arm/include/asm/mmu_context.h
@@ -15,7 +15,9 @@
 
 #include <linux/compiler.h>
 #include <linux/sched.h>
+#include <linux/mm_types.h>
 #include <linux/preempt.h>
+
 #include <asm/cacheflush.h>
 #include <asm/cachetype.h>
 #include <asm/proc-fns.h>
diff --git a/arch/arm/include/asm/tlbflush.h b/arch/arm/include/asm/tlbflush.h
index def9e570199f90..1897b5196fb57f 100644
--- a/arch/arm/include/asm/tlbflush.h
+++ b/arch/arm/include/asm/tlbflush.h
@@ -10,6 +10,10 @@
 #ifndef _ASMARM_TLBFLUSH_H
 #define _ASMARM_TLBFLUSH_H
 
+#ifndef __ASSEMBLY__
+# include <linux/mm_types.h>
+#endif
+
 #ifdef CONFIG_MMU
 
 #include <asm/glue.h>
@@ -644,9 +648,6 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
 #elif defined(CONFIG_SMP)	/* !CONFIG_MMU */
 
 #ifndef __ASSEMBLY__
-
-#include <linux/mm_types.h>
-
 static inline void local_flush_tlb_all(void)									{ }
 static inline void local_flush_tlb_mm(struct mm_struct *mm)							{ }
 static inline void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr)			{ }
diff --git a/arch/arm/kernel/suspend.c b/arch/arm/kernel/suspend.c
index 9a2f882a0a2d1c..ef794c799cb660 100644
--- a/arch/arm/kernel/suspend.c
+++ b/arch/arm/kernel/suspend.c
@@ -1,5 +1,6 @@
 #include <linux/init.h>
 #include <linux/slab.h>
+#include <linux/mm_types.h>
 
 #include <asm/cacheflush.h>
 #include <asm/idmap.h>
diff --git a/arch/arm/kernel/swp_emulate.c b/arch/arm/kernel/swp_emulate.c
index 853221f81104c2..3bda08bee6747c 100644
--- a/arch/arm/kernel/swp_emulate.c
+++ b/arch/arm/kernel/swp_emulate.c
@@ -23,6 +23,7 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/syscalls.h>
 #include <linux/perf_event.h>
 
diff --git a/arch/arm/mm/idmap.c b/arch/arm/mm/idmap.c
index c1a48f88764ea8..3e511bec69b836 100644
--- a/arch/arm/mm/idmap.c
+++ b/arch/arm/mm/idmap.c
@@ -1,6 +1,7 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
+#include <linux/mm_types.h>
 
 #include <asm/cputype.h>
 #include <asm/idmap.h>
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index 3c9f7d18a7e693..3257895a9b5e41 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -26,6 +26,7 @@
 #include <linux/compiler.h>
 #include <linux/sched.h>
 #include <linux/sched/hotplug.h>
+#include <linux/mm_types.h>
 
 #include <asm/cacheflush.h>
 #include <asm/cpufeature.h>
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index bdbd0c3febf4d8..e52be6aa44ee7f 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -33,6 +33,7 @@
 #include <linux/sched/debug.h>
 #include <linux/sched/task_stack.h>
 #include <linux/syscalls.h>
+#include <linux/mm_types.h>
 
 #include <asm/atomic.h>
 #include <asm/bug.h>
diff --git a/arch/avr32/include/asm/mmu_context.h b/arch/avr32/include/asm/mmu_context.h
index 27ff234071009b..cd87abba8db78d 100644
--- a/arch/avr32/include/asm/mmu_context.h
+++ b/arch/avr32/include/asm/mmu_context.h
@@ -12,6 +12,8 @@
 #ifndef __ASM_AVR32_MMU_CONTEXT_H
 #define __ASM_AVR32_MMU_CONTEXT_H
 
+#include <linux/mm_types.h>
+
 #include <asm/tlbflush.h>
 #include <asm/sysreg.h>
 #include <asm-generic/mm_hooks.h>
diff --git a/arch/blackfin/include/asm/mmu_context.h b/arch/blackfin/include/asm/mmu_context.h
index 15b16d3e8de8ae..0ce6de873b27e2 100644
--- a/arch/blackfin/include/asm/mmu_context.h
+++ b/arch/blackfin/include/asm/mmu_context.h
@@ -9,6 +9,8 @@
 
 #include <linux/slab.h>
 #include <linux/sched.h>
+#include <linux/mm_types.h>
+
 #include <asm/setup.h>
 #include <asm/page.h>
 #include <asm/pgalloc.h>
diff --git a/arch/blackfin/kernel/flat.c b/arch/blackfin/kernel/flat.c
index a88daddbf074b2..b5b6584496164d 100644
--- a/arch/blackfin/kernel/flat.c
+++ b/arch/blackfin/kernel/flat.c
@@ -6,6 +6,7 @@
 
 #include <linux/module.h>
 #include <linux/sched.h>
+#include <linux/mm_types.h>
 #include <linux/flat.h>
 
 #define FLAT_BFIN_RELOC_TYPE_16_BIT 0
diff --git a/arch/blackfin/kernel/process.c b/arch/blackfin/kernel/process.c
index b691ef875a40c7..89d5162d4ca675 100644
--- a/arch/blackfin/kernel/process.c
+++ b/arch/blackfin/kernel/process.c
@@ -15,6 +15,7 @@
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
 #include <linux/sched/task_stack.h>
+#include <linux/mm_types.h>
 #include <linux/tick.h>
 #include <linux/fs.h>
 #include <linux/err.h>
diff --git a/arch/blackfin/mm/sram-alloc.c b/arch/blackfin/mm/sram-alloc.c
index 1f3b3ef3e103fb..d2a96c2c02a3e6 100644
--- a/arch/blackfin/mm/sram-alloc.c
+++ b/arch/blackfin/mm/sram-alloc.c
@@ -19,6 +19,8 @@
 #include <linux/spinlock.h>
 #include <linux/rtc.h>
 #include <linux/slab.h>
+#include <linux/mm_types.h>
+
 #include <asm/blackfin.h>
 #include <asm/mem_map.h>
 #include "blackfin_sram.h"
diff --git a/arch/cris/arch-v10/mm/tlb.c b/arch/cris/arch-v10/mm/tlb.c
index 21d78c599babc5..3225d38bdaea29 100644
--- a/arch/cris/arch-v10/mm/tlb.c
+++ b/arch/cris/arch-v10/mm/tlb.c
@@ -10,6 +10,8 @@
  *
  */
 
+#include <linux/mm_types.h>
+
 #include <asm/tlb.h>
 #include <asm/mmu_context.h>
 #include <arch/svinto.h>
diff --git a/arch/cris/arch-v32/mm/tlb.c b/arch/cris/arch-v32/mm/tlb.c
index c030d020660aaa..bc3de5b5e27c01 100644
--- a/arch/cris/arch-v32/mm/tlb.c
+++ b/arch/cris/arch-v32/mm/tlb.c
@@ -6,6 +6,7 @@
  * Authors:   Bjorn Wesen <bjornw@axis.com>
  *            Tobias Anderberg <tobiasa@axis.com>, CRISv32 port.
  */
+#include <linux/mm_types.h>
 
 #include <asm/tlb.h>
 #include <asm/mmu_context.h>
diff --git a/arch/cris/include/asm/pgtable.h b/arch/cris/include/asm/pgtable.h
index ceefc314d64d08..2a3210ba4c7204 100644
--- a/arch/cris/include/asm/pgtable.h
+++ b/arch/cris/include/asm/pgtable.h
@@ -9,7 +9,7 @@
 #include <asm-generic/pgtable-nopmd.h>
 
 #ifndef __ASSEMBLY__
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <asm/mmu.h>
 #endif
 #include <arch/pgtable.h>
diff --git a/arch/cris/mm/tlb.c b/arch/cris/mm/tlb.c
index b7f8de576777f8..8413741cfa0fbd 100644
--- a/arch/cris/mm/tlb.c
+++ b/arch/cris/mm/tlb.c
@@ -9,6 +9,8 @@
 
 #include <linux/init.h>
 #include <linux/kernel.h>
+#include <linux/mm_types.h>
+
 #include <asm/tlb.h>
 
 #define D(x)
diff --git a/arch/h8300/kernel/traps.c b/arch/h8300/kernel/traps.c
index ee7e3ce40d7857..e47a9e0dc278fa 100644
--- a/arch/h8300/kernel/traps.c
+++ b/arch/h8300/kernel/traps.c
@@ -17,6 +17,7 @@
 #include <linux/types.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/mm_types.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/init.h>
diff --git a/arch/hexagon/include/asm/mmu_context.h b/arch/hexagon/include/asm/mmu_context.h
index d423d2e73c3088..d8a071afdd1d5c 100644
--- a/arch/hexagon/include/asm/mmu_context.h
+++ b/arch/hexagon/include/asm/mmu_context.h
@@ -21,6 +21,8 @@
 #ifndef _ASM_MMU_CONTEXT_H
 #define _ASM_MMU_CONTEXT_H
 
+#include <linux/mm_types.h>
+
 #include <asm/setup.h>
 #include <asm/page.h>
 #include <asm/pgalloc.h>
diff --git a/arch/hexagon/kernel/smp.c b/arch/hexagon/kernel/smp.c
index c02a6455839e01..1f63e91a353bba 100644
--- a/arch/hexagon/kernel/smp.c
+++ b/arch/hexagon/kernel/smp.c
@@ -29,6 +29,7 @@
 #include <linux/smp.h>
 #include <linux/spinlock.h>
 #include <linux/cpu.h>
+#include <linux/mm_types.h>
 
 #include <asm/time.h>    /*  timer_interrupt  */
 #include <asm/hexagon_vm.h>
diff --git a/arch/ia64/include/asm/mmu_context.h b/arch/ia64/include/asm/mmu_context.h
index 7f2a456603cbf5..9b99368633b5ad 100644
--- a/arch/ia64/include/asm/mmu_context.h
+++ b/arch/ia64/include/asm/mmu_context.h
@@ -26,6 +26,7 @@
 #include <linux/compiler.h>
 #include <linux/percpu.h>
 #include <linux/sched.h>
+#include <linux/mm_types.h>
 #include <linux/spinlock.h>
 
 #include <asm/processor.h>
diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h
index 9f3ed9ee8f13e0..384794e665fc4a 100644
--- a/arch/ia64/include/asm/pgtable.h
+++ b/arch/ia64/include/asm/pgtable.h
@@ -147,7 +147,7 @@
 
 # ifndef __ASSEMBLY__
 
-#include <linux/sched.h>	/* for mm_struct */
+#include <linux/sched/mm.h>	/* for mm_struct */
 #include <linux/bitops.h>
 #include <asm/cacheflush.h>
 #include <asm/mmu_context.h>
diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c
index c98dc965fe82fb..b73b0ebf82148e 100644
--- a/arch/ia64/sn/kernel/sn2/sn2_smp.c
+++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c
@@ -13,6 +13,7 @@
 #include <linux/spinlock.h>
 #include <linux/threads.h>
 #include <linux/sched.h>
+#include <linux/mm_types.h>
 #include <linux/smp.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
diff --git a/arch/m32r/include/asm/mmu_context.h b/arch/m32r/include/asm/mmu_context.h
index 9fc78fc4444524..1230b7050d8e30 100644
--- a/arch/m32r/include/asm/mmu_context.h
+++ b/arch/m32r/include/asm/mmu_context.h
@@ -12,6 +12,8 @@
 #ifndef __ASSEMBLY__
 
 #include <linux/atomic.h>
+#include <linux/mm_types.h>
+
 #include <asm/pgalloc.h>
 #include <asm/mmu.h>
 #include <asm/tlbflush.h>
diff --git a/arch/m68k/include/asm/a.out-core.h b/arch/m68k/include/asm/a.out-core.h
index f6bfc1d63ff6db..ae91ea6bb30379 100644
--- a/arch/m68k/include/asm/a.out-core.h
+++ b/arch/m68k/include/asm/a.out-core.h
@@ -16,6 +16,7 @@
 
 #include <linux/user.h>
 #include <linux/elfcore.h>
+#include <linux/mm_types.h>
 
 /*
  * fill in the user structure for an a.out core dump
diff --git a/arch/m68k/include/asm/mmu_context.h b/arch/m68k/include/asm/mmu_context.h
index dc3be991d63431..4a6ae6dffa345f 100644
--- a/arch/m68k/include/asm/mmu_context.h
+++ b/arch/m68k/include/asm/mmu_context.h
@@ -2,6 +2,7 @@
 #define __M68K_MMU_CONTEXT_H
 
 #include <asm-generic/mm_hooks.h>
+#include <linux/mm_types.h>
 
 static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
 {
diff --git a/arch/metag/include/asm/mmu_context.h b/arch/metag/include/asm/mmu_context.h
index ae2a71b5e0bedf..2e0312748197db 100644
--- a/arch/metag/include/asm/mmu_context.h
+++ b/arch/metag/include/asm/mmu_context.h
@@ -9,6 +9,7 @@
 #include <asm/cacheflush.h>
 
 #include <linux/io.h>
+#include <linux/mm_types.h>
 
 static inline void enter_lazy_tlb(struct mm_struct *mm,
 				  struct task_struct *tsk)
diff --git a/arch/microblaze/include/asm/mmu_context_mm.h b/arch/microblaze/include/asm/mmu_context_mm.h
index d6864774644874..99472d2ca3404d 100644
--- a/arch/microblaze/include/asm/mmu_context_mm.h
+++ b/arch/microblaze/include/asm/mmu_context_mm.h
@@ -12,6 +12,8 @@
 #define _ASM_MICROBLAZE_MMU_CONTEXT_H
 
 #include <linux/atomic.h>
+#include <linux/mm_types.h>
+
 #include <asm/bitops.h>
 #include <asm/mmu.h>
 #include <asm-generic/mm_hooks.h>
diff --git a/arch/microblaze/mm/pgtable.c b/arch/microblaze/mm/pgtable.c
index cc732fe357ad1c..4c059923991530 100644
--- a/arch/microblaze/mm/pgtable.c
+++ b/arch/microblaze/mm/pgtable.c
@@ -31,6 +31,7 @@
 #include <linux/types.h>
 #include <linux/vmalloc.h>
 #include <linux/init.h>
+#include <linux/mm_types.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
diff --git a/arch/mips/include/asm/elf.h b/arch/mips/include/asm/elf.h
index 7a6c466e5f2a01..0eb1a75be10584 100644
--- a/arch/mips/include/asm/elf.h
+++ b/arch/mips/include/asm/elf.h
@@ -10,6 +10,8 @@
 
 #include <linux/auxvec.h>
 #include <linux/fs.h>
+#include <linux/mm_types.h>
+
 #include <uapi/linux/elf.h>
 
 #include <asm/current.h>
diff --git a/arch/mips/include/asm/mmu_context.h b/arch/mips/include/asm/mmu_context.h
index 2abf94f72c0a81..da2004cef2d5c8 100644
--- a/arch/mips/include/asm/mmu_context.h
+++ b/arch/mips/include/asm/mmu_context.h
@@ -13,8 +13,10 @@
 
 #include <linux/errno.h>
 #include <linux/sched.h>
+#include <linux/mm_types.h>
 #include <linux/smp.h>
 #include <linux/slab.h>
+
 #include <asm/cacheflush.h>
 #include <asm/dsemul.h>
 #include <asm/hazards.h>
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index 8c60a296294c59..6e71130549eae5 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -28,7 +28,7 @@
 #include <linux/export.h>
 #include <linux/time.h>
 #include <linux/timex.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/cpumask.h>
 #include <linux/cpu.h>
 #include <linux/err.h>
diff --git a/arch/mips/math-emu/dsemul.c b/arch/mips/math-emu/dsemul.c
index c4469ff4a996bc..6664908514b334 100644
--- a/arch/mips/math-emu/dsemul.c
+++ b/arch/mips/math-emu/dsemul.c
@@ -1,5 +1,6 @@
 #include <linux/err.h>
 #include <linux/slab.h>
+#include <linux/mm_types.h>
 
 #include <asm/branch.h>
 #include <asm/cacheflush.h>
diff --git a/arch/mips/mm/ioremap.c b/arch/mips/mm/ioremap.c
index 1f189627440f23..1986e09fb457c5 100644
--- a/arch/mips/mm/ioremap.c
+++ b/arch/mips/mm/ioremap.c
@@ -12,6 +12,7 @@
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
+#include <linux/mm_types.h>
 #include <asm/cacheflush.h>
 #include <asm/io.h>
 #include <asm/tlbflush.h>
diff --git a/arch/mn10300/include/asm/mmu_context.h b/arch/mn10300/include/asm/mmu_context.h
index 75dbe696f830cc..d2034f5e6eda79 100644
--- a/arch/mn10300/include/asm/mmu_context.h
+++ b/arch/mn10300/include/asm/mmu_context.h
@@ -23,6 +23,8 @@
 #define _ASM_MMU_CONTEXT_H
 
 #include <linux/atomic.h>
+#include <linux/mm_types.h>
+
 #include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
 #include <asm-generic/mm_hooks.h>
diff --git a/arch/mn10300/kernel/smp.c b/arch/mn10300/kernel/smp.c
index d924f7a7970829..35d2c3fe6f7696 100644
--- a/arch/mn10300/kernel/smp.c
+++ b/arch/mn10300/kernel/smp.c
@@ -21,7 +21,7 @@
 #include <linux/err.h>
 #include <linux/kernel.h>
 #include <linux/delay.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/sched/task.h>
 #include <linux/profile.h>
 #include <linux/smp.h>
diff --git a/arch/mn10300/mm/tlb-smp.c b/arch/mn10300/mm/tlb-smp.c
index 9a39ea9031d4f7..085f2bb691aca8 100644
--- a/arch/mn10300/mm/tlb-smp.c
+++ b/arch/mn10300/mm/tlb-smp.c
@@ -20,7 +20,7 @@
 #include <linux/err.h>
 #include <linux/kernel.h>
 #include <linux/delay.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/profile.h>
 #include <linux/smp.h>
 #include <asm/tlbflush.h>
diff --git a/arch/nios2/include/asm/mmu_context.h b/arch/nios2/include/asm/mmu_context.h
index 294b4b1f81d4e1..78ab3dacf57932 100644
--- a/arch/nios2/include/asm/mmu_context.h
+++ b/arch/nios2/include/asm/mmu_context.h
@@ -13,6 +13,8 @@
 #ifndef _ASM_NIOS2_MMU_CONTEXT_H
 #define _ASM_NIOS2_MMU_CONTEXT_H
 
+#include <linux/mm_types.h>
+
 #include <asm-generic/mm_hooks.h>
 
 extern void mmu_context_init(void);
diff --git a/arch/nios2/kernel/process.c b/arch/nios2/kernel/process.c
index 869a4d59de324d..509e7855e8dc58 100644
--- a/arch/nios2/kernel/process.c
+++ b/arch/nios2/kernel/process.c
@@ -17,6 +17,7 @@
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
 #include <linux/sched/task_stack.h>
+#include <linux/mm_types.h>
 #include <linux/tick.h>
 #include <linux/uaccess.h>
 
diff --git a/arch/powerpc/kernel/io-workarounds.c b/arch/powerpc/kernel/io-workarounds.c
index 5f8613ceb97f15..a582e0d4252552 100644
--- a/arch/powerpc/kernel/io-workarounds.c
+++ b/arch/powerpc/kernel/io-workarounds.c
@@ -12,7 +12,7 @@
 #undef DEBUG
 
 #include <linux/kernel.h>
-#include <linux/sched.h>	/* for init_mm */
+#include <linux/sched/mm.h>	/* for init_mm */
 
 #include <asm/io.h>
 #include <asm/machdep.h>
diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
index b0333cc737dd67..0fda4230f6c0f8 100644
--- a/arch/powerpc/kvm/e500_mmu_host.c
+++ b/arch/powerpc/kvm/e500_mmu_host.c
@@ -25,7 +25,7 @@
 #include <linux/highmem.h>
 #include <linux/log2.h>
 #include <linux/uaccess.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/rwsem.h>
 #include <linux/vmalloc.h>
 #include <linux/hugetlb.h>
diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index 043415f0bdb164..f3917705c686cb 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -17,6 +17,7 @@
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <linux/init.h>
+#include <linux/sched/mm.h>
 #include <asm/cputable.h>
 #include <asm/code-patching.h>
 #include <asm/page.h>
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 12d679df50bd17..c554768b1fa2d4 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -23,7 +23,7 @@
 
 #include <linux/spinlock.h>
 #include <linux/errno.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/proc_fs.h>
 #include <linux/stat.h>
 #include <linux/sysctl.h>
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
index b798ff674fabd5..5fcb3dd74c139b 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -8,6 +8,8 @@
  */
 
 #include <linux/sched.h>
+#include <linux/mm_types.h>
+
 #include <asm/pgalloc.h>
 #include <asm/tlb.h>
 
diff --git a/arch/powerpc/mm/pgtable-hash64.c b/arch/powerpc/mm/pgtable-hash64.c
index c23e286a6b8ff8..8b85a14b08eaa5 100644
--- a/arch/powerpc/mm/pgtable-hash64.c
+++ b/arch/powerpc/mm/pgtable-hash64.c
@@ -10,6 +10,8 @@
  */
 
 #include <linux/sched.h>
+#include <linux/mm_types.h>
+
 #include <asm/pgalloc.h>
 #include <asm/tlb.h>
 
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index feeda90cd06d5f..2a590a98e65215 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -8,7 +8,7 @@
  * as published by the Free Software Foundation; either version
  * 2 of the License, or (at your option) any later version.
  */
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/memblock.h>
 #include <linux/of_fdt.h>
 
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index 48fc28bab54477..5e01b2ece1d016 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -22,6 +22,8 @@
 #include <asm/cacheflush.h>
 #include <asm/smp.h>
 #include <linux/compiler.h>
+#include <linux/mm_types.h>
+
 #include <asm/udbg.h>
 #include <asm/code-patching.h>
 
diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h
index 83aaefed2a7b0a..1d48880b3cc142 100644
--- a/arch/s390/include/asm/elf.h
+++ b/arch/s390/include/asm/elf.h
@@ -132,7 +132,7 @@ typedef s390_fp_regs compat_elf_fpregset_t;
 typedef s390_compat_regs compat_elf_gregset_t;
 
 #include <linux/compat.h>
-#include <linux/sched.h>	/* for task_struct */
+#include <linux/sched/mm.h>	/* for task_struct */
 #include <asm/mmu_context.h>
 
 #include <asm/vdso.h>
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index 9b828c073176db..6e31d87fb669bd 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -9,6 +9,7 @@
 
 #include <asm/pgalloc.h>
 #include <linux/uaccess.h>
+#include <linux/mm_types.h>
 #include <asm/tlbflush.h>
 #include <asm/ctl_reg.h>
 
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
index bc2b60dcb17828..bf78545238315a 100644
--- a/arch/s390/kernel/processor.c
+++ b/arch/s390/kernel/processor.c
@@ -10,6 +10,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/seq_file.h>
+#include <linux/mm_types.h>
 #include <linux/delay.h>
 #include <linux/cpu.h>
 #include <asm/diag.h>
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 4492c93631781b..d55c829a5944c2 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -6,7 +6,9 @@
  */
 
 #include <linux/vmalloc.h>
+#include <linux/mm_types.h>
 #include <linux/err.h>
+
 #include <asm/pgtable.h>
 #include <asm/gmap.h>
 #include "kvm-s390.h"
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index fb4b494cde9bff..64b6a309f2c47c 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -15,6 +15,8 @@
 #include <linux/gfp.h>
 #include <linux/errno.h>
 #include <linux/compat.h>
+#include <linux/mm_types.h>
+
 #include <asm/asm-offsets.h>
 #include <asm/facility.h>
 #include <asm/current.h>
diff --git a/arch/score/include/asm/mmu_context.h b/arch/score/include/asm/mmu_context.h
index 2644577c96e844..073f95d350ded0 100644
--- a/arch/score/include/asm/mmu_context.h
+++ b/arch/score/include/asm/mmu_context.h
@@ -3,7 +3,9 @@
 
 #include <linux/errno.h>
 #include <linux/sched.h>
+#include <linux/mm_types.h>
 #include <linux/slab.h>
+
 #include <asm-generic/mm_hooks.h>
 
 #include <asm/cacheflush.h>
diff --git a/arch/score/kernel/traps.c b/arch/score/kernel/traps.c
index 8a54d320fb41db..fe68de6c746c44 100644
--- a/arch/score/kernel/traps.c
+++ b/arch/score/kernel/traps.c
@@ -26,6 +26,7 @@
 #include <linux/extable.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/debug.h>
+#include <linux/mm_types.h>
 
 #include <asm/cacheflush.h>
 #include <asm/irq.h>
diff --git a/arch/sh/include/asm/mmu_context.h b/arch/sh/include/asm/mmu_context.h
index 35ffdd081d2655..eb6ac3c10c4487 100644
--- a/arch/sh/include/asm/mmu_context.h
+++ b/arch/sh/include/asm/mmu_context.h
@@ -11,6 +11,8 @@
 #include <cpu/mmu_context.h>
 #include <asm/tlbflush.h>
 #include <linux/uaccess.h>
+#include <linux/mm_types.h>
+
 #include <asm/io.h>
 #include <asm-generic/mm_hooks.h>
 
diff --git a/arch/sparc/include/asm/mmu_context_64.h b/arch/sparc/include/asm/mmu_context_64.h
index d0317993e9476f..22fede6eba1160 100644
--- a/arch/sparc/include/asm/mmu_context_64.h
+++ b/arch/sparc/include/asm/mmu_context_64.h
@@ -6,6 +6,8 @@
 #ifndef __ASSEMBLY__
 
 #include <linux/spinlock.h>
+#include <linux/mm_types.h>
+
 #include <asm/spitfire.h>
 #include <asm-generic/mm_hooks.h>
 
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index 7932a4a378176c..56e49c8f770d6b 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -878,6 +878,9 @@ static inline unsigned long pud_pfn(pud_t pud)
 #define pte_offset_map			pte_index
 #define pte_unmap(pte)			do { } while (0)
 
+/* We cannot include <linux/mm_types.h> at this point yet: */
+extern struct mm_struct init_mm;
+
 /* Actual page table PTE updates.  */
 void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr,
 		   pte_t *ptep, pte_t orig, int fullmm,
diff --git a/arch/sparc/kernel/asm-offsets.c b/arch/sparc/kernel/asm-offsets.c
index f76389a3234229..3f09e1c83f5849 100644
--- a/arch/sparc/kernel/asm-offsets.c
+++ b/arch/sparc/kernel/asm-offsets.c
@@ -11,6 +11,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/mm_types.h>
 // #include <linux/mm.h>
 #include <linux/kbuild.h>
 
diff --git a/arch/sparc/kernel/traps_32.c b/arch/sparc/kernel/traps_32.c
index 26740a2f285d76..2de72a49308ad2 100644
--- a/arch/sparc/kernel/traps_32.c
+++ b/arch/sparc/kernel/traps_32.c
@@ -11,6 +11,7 @@
 
 #include <linux/sched.h>  /* for jiffies */
 #include <linux/sched/debug.h>
+#include <linux/mm_types.h>
 #include <linux/kernel.h>
 #include <linux/signal.h>
 #include <linux/smp.h>
diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c
index 23479c3d39f022..0a04811f06b78c 100644
--- a/arch/sparc/mm/tsb.c
+++ b/arch/sparc/mm/tsb.c
@@ -6,6 +6,8 @@
 #include <linux/kernel.h>
 #include <linux/preempt.h>
 #include <linux/slab.h>
+#include <linux/mm_types.h>
+
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/mmu_context.h>
diff --git a/arch/tile/include/asm/mmu_context.h b/arch/tile/include/asm/mmu_context.h
index f67753db1f782c..45a4b4c424cfd5 100644
--- a/arch/tile/include/asm/mmu_context.h
+++ b/arch/tile/include/asm/mmu_context.h
@@ -16,6 +16,8 @@
 #define _ASM_TILE_MMU_CONTEXT_H
 
 #include <linux/smp.h>
+#include <linux/mm_types.h>
+
 #include <asm/setup.h>
 #include <asm/page.h>
 #include <asm/pgalloc.h>
diff --git a/arch/um/include/asm/mmu_context.h b/arch/um/include/asm/mmu_context.h
index 1a60e1328e2fa1..94ac2739918c62 100644
--- a/arch/um/include/asm/mmu_context.h
+++ b/arch/um/include/asm/mmu_context.h
@@ -7,6 +7,8 @@
 #define __UM_MMU_CONTEXT_H
 
 #include <linux/sched.h>
+#include <linux/mm_types.h>
+
 #include <asm/mmu.h>
 
 extern void uml_setup_stubs(struct mm_struct *mm);
diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c
index 31968677a0d079..a43d42bf0a8640 100644
--- a/arch/um/kernel/exec.c
+++ b/arch/um/kernel/exec.c
@@ -7,7 +7,7 @@
 #include <linux/module.h>
 #include <linux/fs.h>
 #include <linux/ptrace.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/sched/task.h>
 #include <linux/sched/task_stack.h>
 #include <linux/slab.h>
diff --git a/arch/um/kernel/reboot.c b/arch/um/kernel/reboot.c
index 0bc921cee0b18d..71f3e9217cf2a7 100644
--- a/arch/um/kernel/reboot.c
+++ b/arch/um/kernel/reboot.c
@@ -5,6 +5,7 @@
 
 #include <linux/sched/signal.h>
 #include <linux/sched/task.h>
+#include <linux/sched/mm.h>
 #include <linux/spinlock.h>
 #include <linux/slab.h>
 #include <linux/oom.h>
diff --git a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c
index ddecf326dab28d..3a952a4f796582 100644
--- a/arch/um/kernel/skas/process.c
+++ b/arch/um/kernel/skas/process.c
@@ -4,8 +4,9 @@
  */
 
 #include <linux/init.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/sched/task_stack.h>
+
 #include <as-layout.h>
 #include <kern.h>
 #include <os.h>
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index df91fb393a01e1..ce1d7534fa530a 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -28,6 +28,7 @@
 #include <linux/kernel.h>
 #include <linux/timer.h>
 #include <linux/sched/signal.h>
+#include <linux/mm_types.h>
 #include <linux/syscalls.h>
 #include <linux/ratelimit.h>
 
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 2ecc0e97772b13..349d4d17aa7fbd 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -20,7 +20,7 @@
 #include <linux/export.h>
 #include <linux/init.h>
 #include <linux/kdebug.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/sched/clock.h>
 #include <linux/uaccess.h>
 #include <linux/slab.h>
diff --git a/arch/x86/include/asm/a.out-core.h b/arch/x86/include/asm/a.out-core.h
index 7a15588e45d472..7d3ece8bfb616b 100644
--- a/arch/x86/include/asm/a.out-core.h
+++ b/arch/x86/include/asm/a.out-core.h
@@ -17,6 +17,8 @@
 
 #include <linux/user.h>
 #include <linux/elfcore.h>
+#include <linux/mm_types.h>
+
 #include <asm/debugreg.h>
 
 /*
diff --git a/arch/x86/include/asm/mpx.h b/arch/x86/include/asm/mpx.h
index 0b416d4cf73b69..a0d662be4c5b85 100644
--- a/arch/x86/include/asm/mpx.h
+++ b/arch/x86/include/asm/mpx.h
@@ -2,6 +2,8 @@
 #define _ASM_X86_MPX_H
 
 #include <linux/types.h>
+#include <linux/mm_types.h>
+
 #include <asm/ptrace.h>
 #include <asm/insn.h>
 
diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c
index c98079684bdb29..5126dfd52b182d 100644
--- a/arch/x86/mm/mpx.c
+++ b/arch/x86/mm/mpx.c
@@ -7,6 +7,7 @@
  */
 #include <linux/kernel.h>
 #include <linux/slab.h>
+#include <linux/mm_types.h>
 #include <linux/syscalls.h>
 #include <linux/sched/sysctl.h>
 
diff --git a/arch/x86/um/syscalls_64.c b/arch/x86/um/syscalls_64.c
index e6552275320bcc..10d907098c2614 100644
--- a/arch/x86/um/syscalls_64.c
+++ b/arch/x86/um/syscalls_64.c
@@ -6,6 +6,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/uaccess.h>
 #include <asm/prctl.h> /* XXX This should get the constants from libc */
 #include <os.h>
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index f6740b5b173808..37cb5aad71de36 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -38,7 +38,7 @@
  *
  * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
  */
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/highmem.h>
 #include <linux/debugfs.h>
 #include <linux/bug.h>
diff --git a/arch/xtensa/include/asm/mmu_context.h b/arch/xtensa/include/asm/mmu_context.h
index 04c8ebdc45178c..f7e186dfc4e44e 100644
--- a/arch/xtensa/include/asm/mmu_context.h
+++ b/arch/xtensa/include/asm/mmu_context.h
@@ -17,6 +17,7 @@
 
 #include <linux/stringify.h>
 #include <linux/sched.h>
+#include <linux/mm_types.h>
 
 #include <asm/vectors.h>
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
index d83de985e88cf8..6acc4313363e1f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
@@ -23,6 +23,8 @@
 
 #include <linux/printk.h>
 #include <linux/slab.h>
+#include <linux/mm_types.h>
+
 #include "kfd_priv.h"
 #include "kfd_mqd_manager.h"
 #include "cik_regs.h"
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
index fa32c32fa1c2bc..a9b9882a9a7723 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
@@ -23,6 +23,8 @@
 
 #include <linux/printk.h>
 #include <linux/slab.h>
+#include <linux/mm_types.h>
+
 #include "kfd_priv.h"
 #include "kfd_mqd_manager.h"
 #include "vi_structs.h"
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index 318ec5267bdfe1..86ecd3ea6a4bd1 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -37,7 +37,7 @@
 #include <linux/delay.h>
 #include <linux/errno.h>
 #include <linux/list.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/spinlock.h>
 #include <linux/ethtool.h>
 #include <linux/rtnetlink.h>
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index d19662f635b1cc..5846c47c8d55e8 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -37,7 +37,7 @@
 #include <linux/idr.h>
 #include <linux/completion.h>
 #include <linux/netdevice.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/pci.h>
 #include <linux/dma-mapping.h>
 #include <linux/inet.h>
diff --git a/drivers/media/v4l2-core/videobuf-dma-sg.c b/drivers/media/v4l2-core/videobuf-dma-sg.c
index 36bd904946bd34..0b5c43f7e020da 100644
--- a/drivers/media/v4l2-core/videobuf-dma-sg.c
+++ b/drivers/media/v4l2-core/videobuf-dma-sg.c
@@ -21,7 +21,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/slab.h>
 #include <linux/interrupt.h>
 
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 9b4688ab1d8e0f..bee1a36bc2ec4e 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -12,7 +12,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/init.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/magic.h>
 #include <linux/binfmts.h>
 #include <linux/slab.h>
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index 35043a8c452905..8e4dc7ab584c2d 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -13,7 +13,7 @@
 #include <linux/slab.h>
 #include <linux/poll.h>
 #include <linux/pagemap.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/fsnotify.h>
 
 #include "kernfs-internal.h"
diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h
index d81b0ba9921fb5..2ef16bf258267e 100644
--- a/include/drm/drm_mm.h
+++ b/include/drm/drm_mm.h
@@ -40,6 +40,7 @@
 #include <linux/bug.h>
 #include <linux/rbtree.h>
 #include <linux/kernel.h>
+#include <linux/mm_types.h>
 #include <linux/list.h>
 #include <linux/spinlock.h>
 #ifdef CONFIG_DRM_DEBUG_MM
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 452c9799318cf0..f6841c19c913ef 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -15,6 +15,7 @@
 #include <linux/sched/autogroup.h>
 #include <net/net_namespace.h>
 #include <linux/sched/rt.h>
+#include <linux/mm_types.h>
 
 #include <asm/thread_info.h>
 
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 1cf7941bb9464c..d32e3932b2e337 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -2,6 +2,7 @@
 #define _LINUX_SCHED_MM_H
 
 #include <linux/sched.h>
+#include <linux/mm_types.h>
 #include <linux/gfp.h>
 
 #endif /* _LINUX_SCHED_MM_H */
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 109adc0e9cb990..e865d8bfb881eb 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -11,7 +11,7 @@
  */
 
 #include <linux/proc_fs.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/seq_file.h>
 #include <linux/kallsyms.h>
 #include <linux/utsname.h>
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 11e0ab57748ada..3e88b35ac1571c 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -20,7 +20,7 @@
  *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
  */
 
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/sched/topology.h>
 
 #include <linux/latencytop.h>
diff --git a/kernel/signal.c b/kernel/signal.c
index e6d470a5f75af8..2008aa999976a9 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -13,7 +13,7 @@
 #include <linux/slab.h>
 #include <linux/export.h>
 #include <linux/init.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/sched/user.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
diff --git a/lib/is_single_threaded.c b/lib/is_single_threaded.c
index 9745cfffcb6948..8d4678edcc0eb0 100644
--- a/lib/is_single_threaded.c
+++ b/lib/is_single_threaded.c
@@ -10,6 +10,7 @@
  * 2 of the Licence, or (at your option) any later version.
  */
 #include <linux/sched/signal.h>
+#include <linux/sched/mm.h>
 
 /*
  * Returns true if the task does not share ->mm with another thread/process.

From 9164bb4a18dfa592cd0aca455ea57abf89ca4526 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 4 Feb 2017 01:20:53 +0100
Subject: [PATCH 0538/1911] sched/headers: Prepare to move 'init_task' and
 'init_thread_union' from <linux/sched.h> to <linux/sched/task.h>

Update all usage sites first.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/arm64/kernel/setup.c     | 1 +
 arch/arm64/mm/kasan_init.c    | 1 +
 arch/frv/mm/init.c            | 1 +
 arch/metag/mm/init.c          | 1 +
 arch/nios2/kernel/setup.c     | 1 +
 arch/powerpc/kernel/paca.c    | 1 +
 arch/um/kernel/skas/process.c | 1 +
 arch/um/kernel/um_arch.c      | 2 ++
 arch/x86/kernel/cpu/common.c  | 1 +
 arch/x86/mm/kasan_init_64.c   | 1 +
 fs/namespace.c                | 2 ++
 include/linux/sched/signal.h  | 1 +
 init/init_task.c              | 1 +
 kernel/delayacct.c            | 1 +
 lib/is_single_threaded.c      | 1 +
 mm/vmacache.c                 | 1 +
 16 files changed, 18 insertions(+)

diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 952e2c0dabd51e..42274bda0ccb5a 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -42,6 +42,7 @@
 #include <linux/of_fdt.h>
 #include <linux/efi.h>
 #include <linux/psci.h>
+#include <linux/sched/task.h>
 #include <linux/mm.h>
 
 #include <asm/acpi.h>
diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
index 201d918e75759d..55d1e920554368 100644
--- a/arch/arm64/mm/kasan_init.c
+++ b/arch/arm64/mm/kasan_init.c
@@ -13,6 +13,7 @@
 #define pr_fmt(fmt) "kasan: " fmt
 #include <linux/kasan.h>
 #include <linux/kernel.h>
+#include <linux/sched/task.h>
 #include <linux/memblock.h>
 #include <linux/start_kernel.h>
 #include <linux/mm.h>
diff --git a/arch/frv/mm/init.c b/arch/frv/mm/init.c
index 88a15974352857..328f0a2923168c 100644
--- a/arch/frv/mm/init.c
+++ b/arch/frv/mm/init.c
@@ -18,6 +18,7 @@
 
 #include <linux/signal.h>
 #include <linux/sched.h>
+#include <linux/sched/task.h>
 #include <linux/pagemap.h>
 #include <linux/gfp.h>
 #include <linux/swap.h>
diff --git a/arch/metag/mm/init.c b/arch/metag/mm/init.c
index c0ec116b3993a3..188d4d9fbed4d9 100644
--- a/arch/metag/mm/init.c
+++ b/arch/metag/mm/init.c
@@ -12,6 +12,7 @@
 #include <linux/percpu.h>
 #include <linux/memblock.h>
 #include <linux/initrd.h>
+#include <linux/sched/task.h>
 
 #include <asm/setup.h>
 #include <asm/page.h>
diff --git a/arch/nios2/kernel/setup.c b/arch/nios2/kernel/setup.c
index a3fa80d1aacc2d..6e57ffa5db2769 100644
--- a/arch/nios2/kernel/setup.c
+++ b/arch/nios2/kernel/setup.c
@@ -14,6 +14,7 @@
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/sched.h>
+#include <linux/sched/task.h>
 #include <linux/console.h>
 #include <linux/bootmem.h>
 #include <linux/initrd.h>
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index fa20060ff7a52e..dfc479df9634e2 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -10,6 +10,7 @@
 #include <linux/smp.h>
 #include <linux/export.h>
 #include <linux/memblock.h>
+#include <linux/sched/task.h>
 
 #include <asm/lppaca.h>
 #include <asm/paca.h>
diff --git a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c
index 3a952a4f796582..d4dbf08722d68c 100644
--- a/arch/um/kernel/skas/process.c
+++ b/arch/um/kernel/skas/process.c
@@ -6,6 +6,7 @@
 #include <linux/init.h>
 #include <linux/sched/mm.h>
 #include <linux/sched/task_stack.h>
+#include <linux/sched/task.h>
 
 #include <as-layout.h>
 #include <kern.h>
diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
index e8175a8aa22c7b..4b85acd4020c40 100644
--- a/arch/um/kernel/um_arch.c
+++ b/arch/um/kernel/um_arch.c
@@ -11,7 +11,9 @@
 #include <linux/string.h>
 #include <linux/utsname.h>
 #include <linux/sched.h>
+#include <linux/sched/task.h>
 #include <linux/kmsg_dump.h>
+
 #include <asm/pgtable.h>
 #include <asm/processor.h>
 #include <asm/sections.h>
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index c7e2ca966386df..f2fd8fefc5899d 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -9,6 +9,7 @@
 #include <linux/delay.h>
 #include <linux/sched.h>
 #include <linux/sched/clock.h>
+#include <linux/sched/task.h>
 #include <linux/init.h>
 #include <linux/kprobes.h>
 #include <linux/kgdb.h>
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 0493c17b8a516f..8d63d7a104c3c4 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -4,6 +4,7 @@
 #include <linux/kdebug.h>
 #include <linux/mm.h>
 #include <linux/sched.h>
+#include <linux/sched/task.h>
 #include <linux/vmalloc.h>
 
 #include <asm/tlbflush.h>
diff --git a/fs/namespace.c b/fs/namespace.c
index 131cd7b94f4711..cc1375eff88c75 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -25,6 +25,8 @@
 #include <linux/magic.h>
 #include <linux/bootmem.h>
 #include <linux/task_work.h>
+#include <linux/sched/task.h>
+
 #include "pnode.h"
 #include "internal.h"
 
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 7e10a78245234a..320eca0446cd05 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -5,5 +5,6 @@
 #include <linux/cred.h>
 #include <linux/sched.h>
 #include <linux/sched/jobctl.h>
+#include <linux/sched/task.h>
 
 #endif /* _LINUX_SCHED_SIGNAL_H */
diff --git a/init/init_task.c b/init/init_task.c
index 53d4ce942a887f..66787e30a4191b 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -4,6 +4,7 @@
 #include <linux/sched.h>
 #include <linux/sched/sysctl.h>
 #include <linux/sched/rt.h>
+#include <linux/sched/task.h>
 #include <linux/init.h>
 #include <linux/fs.h>
 #include <linux/mm.h>
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index 6605496569914d..c94135fc269844 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -14,6 +14,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/sched/task.h>
 #include <linux/slab.h>
 #include <linux/taskstats.h>
 #include <linux/time.h>
diff --git a/lib/is_single_threaded.c b/lib/is_single_threaded.c
index 8d4678edcc0eb0..9c7d89df40ed9b 100644
--- a/lib/is_single_threaded.c
+++ b/lib/is_single_threaded.c
@@ -10,6 +10,7 @@
  * 2 of the Licence, or (at your option) any later version.
  */
 #include <linux/sched/signal.h>
+#include <linux/sched/task.h>
 #include <linux/sched/mm.h>
 
 /*
diff --git a/mm/vmacache.c b/mm/vmacache.c
index 4355d34c68a681..7ffa0ee341b5da 100644
--- a/mm/vmacache.c
+++ b/mm/vmacache.c
@@ -2,6 +2,7 @@
  * Copyright (C) 2014 Davidlohr Bueso.
  */
 #include <linux/sched/signal.h>
+#include <linux/sched/task.h>
 #include <linux/mm.h>
 #include <linux/vmacache.h>
 

From b2d091031075ac9a1598e3cc3a29c28f02e64c0d Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 4 Feb 2017 01:27:20 +0100
Subject: [PATCH 0539/1911] sched/headers: Prepare to use <linux/rcuupdate.h>
 instead of <linux/rculist.h> in <linux/sched.h>

We don't actually need the full rculist.h header in sched.h anymore,
we will be able to include the smaller rcupdate.h header instead.

But first update code that relied on the implicit header inclusion.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/powerpc/kvm/book3s_mmu_hpte.c  | 1 +
 arch/x86/kvm/irq_comm.c             | 2 ++
 arch/x86/kvm/page_track.c           | 2 ++
 drivers/md/bcache/btree.c           | 2 ++
 drivers/misc/vmw_vmci/vmci_event.c  | 1 +
 drivers/nvme/target/admin-cmd.c     | 2 ++
 drivers/nvme/target/core.c          | 2 ++
 drivers/s390/cio/qdio_thinint.c     | 2 ++
 drivers/scsi/libfc/fc_disc.c        | 2 ++
 drivers/scsi/libfc/fc_rport.c       | 2 ++
 include/linux/dmar.h                | 2 +-
 include/linux/pid.h                 | 2 +-
 include/linux/rhashtable.h          | 2 +-
 include/linux/sched/signal.h        | 1 +
 include/net/bluetooth/hci_core.h    | 2 ++
 kernel/trace/trace_events_hist.c    | 1 +
 kernel/trace/trace_events_trigger.c | 1 +
 kernel/trace/trace_kprobe.c         | 1 +
 kernel/trace/trace_uprobe.c         | 1 +
 lib/bug.c                           | 1 +
 lib/rhashtable.c                    | 1 +
 net/mac80211/mesh_plink.c           | 2 ++
 net/mac802154/llsec.c               | 2 ++
 security/apparmor/policy.c          | 1 +
 security/tomoyo/domain.c            | 2 ++
 security/tomoyo/group.c             | 2 ++
 security/tomoyo/util.c              | 2 ++
 27 files changed, 41 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_mmu_hpte.c b/arch/powerpc/kvm/book3s_mmu_hpte.c
index 5a1ab1250a056f..905a934c1ef469 100644
--- a/arch/powerpc/kvm/book3s_mmu_hpte.c
+++ b/arch/powerpc/kvm/book3s_mmu_hpte.c
@@ -21,6 +21,7 @@
 #include <linux/kvm_host.h>
 #include <linux/hash.h>
 #include <linux/slab.h>
+#include <linux/rculist.h>
 
 #include <asm/kvm_ppc.h>
 #include <asm/kvm_book3s.h>
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index b96d3893f121c7..6825cd36d13b7c 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -23,6 +23,8 @@
 #include <linux/kvm_host.h>
 #include <linux/slab.h>
 #include <linux/export.h>
+#include <linux/rculist.h>
+
 #include <trace/events/kvm.h>
 
 #include <asm/msidef.h>
diff --git a/arch/x86/kvm/page_track.c b/arch/x86/kvm/page_track.c
index 4a1c13eaa51833..37942e419c32e5 100644
--- a/arch/x86/kvm/page_track.c
+++ b/arch/x86/kvm/page_track.c
@@ -14,6 +14,8 @@
  */
 
 #include <linux/kvm_host.h>
+#include <linux/rculist.h>
+
 #include <asm/kvm_host.h>
 #include <asm/kvm_page_track.h>
 
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 384559af310e46..450d0e848ae436 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -33,6 +33,8 @@
 #include <linux/random.h>
 #include <linux/rcupdate.h>
 #include <linux/sched/clock.h>
+#include <linux/rculist.h>
+
 #include <trace/events/bcache.h>
 
 /*
diff --git a/drivers/misc/vmw_vmci/vmci_event.c b/drivers/misc/vmw_vmci/vmci_event.c
index 8449516d6ac648..84258a48029d41 100644
--- a/drivers/misc/vmw_vmci/vmci_event.c
+++ b/drivers/misc/vmw_vmci/vmci_event.c
@@ -19,6 +19,7 @@
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
+#include <linux/rculist.h>
 
 #include "vmci_driver.h"
 #include "vmci_event.h"
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index 94e524fea5687b..a7bcff45f4376d 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -13,6 +13,8 @@
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
+#include <linux/rculist.h>
+
 #include <generated/utsrelease.h>
 #include <asm/unaligned.h>
 #include "nvmet.h"
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 5267ce20c12d48..11b0a0a5f661b5 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -14,6 +14,8 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/random.h>
+#include <linux/rculist.h>
+
 #include "nvmet.h"
 
 static struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX];
diff --git a/drivers/s390/cio/qdio_thinint.c b/drivers/s390/cio/qdio_thinint.c
index 8ad98a902a91f1..c61164f4528e12 100644
--- a/drivers/s390/cio/qdio_thinint.c
+++ b/drivers/s390/cio/qdio_thinint.c
@@ -8,6 +8,8 @@
 #include <linux/slab.h>
 #include <linux/kernel_stat.h>
 #include <linux/atomic.h>
+#include <linux/rculist.h>
+
 #include <asm/debug.h>
 #include <asm/qdio.h>
 #include <asm/airq.h>
diff --git a/drivers/scsi/libfc/fc_disc.c b/drivers/scsi/libfc/fc_disc.c
index 6103231104dadb..fd501f8dbb1107 100644
--- a/drivers/scsi/libfc/fc_disc.c
+++ b/drivers/scsi/libfc/fc_disc.c
@@ -36,6 +36,8 @@
 #include <linux/slab.h>
 #include <linux/err.h>
 #include <linux/export.h>
+#include <linux/rculist.h>
+
 #include <asm/unaligned.h>
 
 #include <scsi/fc/fc_gs.h>
diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c
index c991f3b822f885..b44c3136eb5181 100644
--- a/drivers/scsi/libfc/fc_rport.c
+++ b/drivers/scsi/libfc/fc_rport.c
@@ -65,6 +65,8 @@
 #include <linux/timer.h>
 #include <linux/workqueue.h>
 #include <linux/export.h>
+#include <linux/rculist.h>
+
 #include <asm/unaligned.h>
 
 #include <scsi/libfc.h>
diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index e9bc9292bd3a5e..e8ffba1052d3ac 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -26,7 +26,7 @@
 #include <linux/msi.h>
 #include <linux/irqreturn.h>
 #include <linux/rwsem.h>
-#include <linux/rcupdate.h>
+#include <linux/rculist.h>
 
 struct acpi_dmar_header;
 
diff --git a/include/linux/pid.h b/include/linux/pid.h
index 298ead5512e55d..4d179316e43108 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -1,7 +1,7 @@
 #ifndef _LINUX_PID_H
 #define _LINUX_PID_H
 
-#include <linux/rcupdate.h>
+#include <linux/rculist.h>
 
 enum pid_type
 {
diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index f2e12a8459100e..092292b6675e2c 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -25,7 +25,7 @@
 #include <linux/list_nulls.h>
 #include <linux/workqueue.h>
 #include <linux/mutex.h>
-#include <linux/rcupdate.h>
+#include <linux/rculist.h>
 
 /*
  * The end of the chain is marked with a special nulls marks which has
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 320eca0446cd05..c6958a53fef3e3 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -1,6 +1,7 @@
 #ifndef _LINUX_SCHED_SIGNAL_H
 #define _LINUX_SCHED_SIGNAL_H
 
+#include <linux/rculist.h>
 #include <linux/signal.h>
 #include <linux/cred.h>
 #include <linux/sched.h>
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 90708f68cc024e..95ccc1eef55845 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -26,6 +26,8 @@
 #define __HCI_CORE_H
 
 #include <linux/leds.h>
+#include <linux/rculist.h>
+
 #include <net/bluetooth/hci.h>
 #include <net/bluetooth/hci_sock.h>
 
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index f3a960ed75a197..1c21d0e2a145a6 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -19,6 +19,7 @@
 #include <linux/mutex.h>
 #include <linux/slab.h>
 #include <linux/stacktrace.h>
+#include <linux/rculist.h>
 
 #include "tracing_map.h"
 #include "trace.h"
diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
index 6721a1e89f39c7..f2ac9d44f6c4b1 100644
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -22,6 +22,7 @@
 #include <linux/ctype.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
+#include <linux/rculist.h>
 
 #include "trace.h"
 
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index eadd96ef772f78..5f688cc724f00a 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -20,6 +20,7 @@
 
 #include <linux/module.h>
 #include <linux/uaccess.h>
+#include <linux/rculist.h>
 
 #include "trace_probe.h"
 
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index f4379e772171dc..a7581fec96818e 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -24,6 +24,7 @@
 #include <linux/uprobes.h>
 #include <linux/namei.h>
 #include <linux/string.h>
+#include <linux/rculist.h>
 
 #include "trace_probe.h"
 
diff --git a/lib/bug.c b/lib/bug.c
index bc3656e944d29b..06edbbef062322 100644
--- a/lib/bug.c
+++ b/lib/bug.c
@@ -45,6 +45,7 @@
 #include <linux/kernel.h>
 #include <linux/bug.h>
 #include <linux/sched.h>
+#include <linux/rculist.h>
 
 extern const struct bug_entry __start___bug_table[], __stop___bug_table[];
 
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index c5b9b9351cec8a..f8635fd5744259 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -19,6 +19,7 @@
 #include <linux/init.h>
 #include <linux/log2.h>
 #include <linux/sched.h>
+#include <linux/rculist.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/mm.h>
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index fcba70e57073f3..953d71e784a9ab 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -9,6 +9,8 @@
 #include <linux/gfp.h>
 #include <linux/kernel.h>
 #include <linux/random.h>
+#include <linux/rculist.h>
+
 #include "ieee80211_i.h"
 #include "rate.h"
 #include "mesh.h"
diff --git a/net/mac802154/llsec.c b/net/mac802154/llsec.c
index 6a3e1c2181d3a9..1e1c9b20bab7f4 100644
--- a/net/mac802154/llsec.c
+++ b/net/mac802154/llsec.c
@@ -18,6 +18,8 @@
 #include <linux/bug.h>
 #include <linux/completion.h>
 #include <linux/ieee802154.h>
+#include <linux/rculist.h>
+
 #include <crypto/aead.h>
 #include <crypto/skcipher.h>
 
diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c
index 462c5d36b87157..def1fbd6bdfd81 100644
--- a/security/apparmor/policy.c
+++ b/security/apparmor/policy.c
@@ -77,6 +77,7 @@
 #include <linux/spinlock.h>
 #include <linux/string.h>
 #include <linux/cred.h>
+#include <linux/rculist.h>
 #include <linux/user_namespace.h>
 
 #include "include/apparmor.h"
diff --git a/security/tomoyo/domain.c b/security/tomoyo/domain.c
index 838ffa78cfdac1..00d223e9fb37ca 100644
--- a/security/tomoyo/domain.c
+++ b/security/tomoyo/domain.c
@@ -5,8 +5,10 @@
  */
 
 #include "common.h"
+
 #include <linux/binfmts.h>
 #include <linux/slab.h>
+#include <linux/rculist.h>
 
 /* Variables definitions.*/
 
diff --git a/security/tomoyo/group.c b/security/tomoyo/group.c
index 50092534ec5408..944ad77d8fbac2 100644
--- a/security/tomoyo/group.c
+++ b/security/tomoyo/group.c
@@ -5,6 +5,8 @@
  */
 
 #include <linux/slab.h>
+#include <linux/rculist.h>
+
 #include "common.h"
 
 /**
diff --git a/security/tomoyo/util.c b/security/tomoyo/util.c
index 5fe3679137aeb7..848317fea704fe 100644
--- a/security/tomoyo/util.c
+++ b/security/tomoyo/util.c
@@ -5,6 +5,8 @@
  */
 
 #include <linux/slab.h>
+#include <linux/rculist.h>
+
 #include "common.h"
 
 /* Lock for protecting policy. */

From f719ff9bcee2a422647790f12d53d3755f47c727 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Mon, 6 Feb 2017 10:57:33 +0100
Subject: [PATCH 0540/1911] sched/headers: Prepare to move the
 task_lock()/unlock() APIs to <linux/sched/task.h>

But first update the code that uses these facilities with the
new header.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/mips/math-emu/dsemul.c                | 1 +
 block/blk-ioc.c                            | 1 +
 block/ioprio.c                             | 1 +
 drivers/staging/android/ion/ion.c          | 1 +
 drivers/staging/lustre/lustre/ptlrpc/sec.c | 1 +
 fs/proc/internal.h                         | 1 +
 fs/proc/proc_net.c                         | 1 +
 fs/proc_namespace.c                        | 2 ++
 include/linux/cpuset.h                     | 1 +
 ipc/namespace.c                            | 1 +
 kernel/cgroup/cpuset.c                     | 1 +
 kernel/sched/debug.c                       | 1 +
 kernel/utsname.c                           | 1 +
 mm/mempolicy.c                             | 1 +
 mm/mmu_context.c                           | 1 +
 net/core/net_namespace.c                   | 2 ++
 net/core/netclassid_cgroup.c               | 2 ++
 net/core/netprio_cgroup.c                  | 2 ++
 18 files changed, 22 insertions(+)

diff --git a/arch/mips/math-emu/dsemul.c b/arch/mips/math-emu/dsemul.c
index 6664908514b334..b6bfd36253694f 100644
--- a/arch/mips/math-emu/dsemul.c
+++ b/arch/mips/math-emu/dsemul.c
@@ -1,6 +1,7 @@
 #include <linux/err.h>
 #include <linux/slab.h>
 #include <linux/mm_types.h>
+#include <linux/sched/task.h>
 
 #include <asm/branch.h>
 #include <asm/cacheflush.h>
diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index b12f9c87b4c31c..6bfa3967533761 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -7,6 +7,7 @@
 #include <linux/bio.h>
 #include <linux/blkdev.h>
 #include <linux/slab.h>
+#include <linux/sched/task.h>
 
 #include "blk.h"
 
diff --git a/block/ioprio.c b/block/ioprio.c
index 4b9ab8367ddaa5..0c47a00f92a852 100644
--- a/block/ioprio.c
+++ b/block/ioprio.c
@@ -27,6 +27,7 @@
 #include <linux/blkdev.h>
 #include <linux/capability.h>
 #include <linux/sched/user.h>
+#include <linux/sched/task.h>
 #include <linux/syscalls.h>
 #include <linux/security.h>
 #include <linux/pid_namespace.h>
diff --git a/drivers/staging/android/ion/ion.c b/drivers/staging/android/ion/ion.c
index 2c3ffbcbd621a5..f45115fce4eb7c 100644
--- a/drivers/staging/android/ion/ion.c
+++ b/drivers/staging/android/ion/ion.c
@@ -36,6 +36,7 @@
 #include <linux/debugfs.h>
 #include <linux/dma-buf.h>
 #include <linux/idr.h>
+#include <linux/sched/task.h>
 
 #include "ion.h"
 #include "ion_priv.h"
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec.c b/drivers/staging/lustre/lustre/ptlrpc/sec.c
index 49f34fd655c3cb..366f2ce20f5ebb 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/sec.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec.c
@@ -40,6 +40,7 @@
 #include <linux/crypto.h>
 #include <linux/cred.h>
 #include <linux/key.h>
+#include <linux/sched/task.h>
 
 #include "../include/obd.h"
 #include "../include/obd_class.h"
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 550e8b183abece..26a6daf02185ce 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -15,6 +15,7 @@
 #include <linux/atomic.h>
 #include <linux/binfmts.h>
 #include <linux/sched/coredump.h>
+#include <linux/sched/task.h>
 
 struct ctl_table_header;
 struct mempolicy;
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index ffd72a6c6e0446..5cbc65d7a1e138 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -17,6 +17,7 @@
 #include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/sched.h>
+#include <linux/sched/task.h>
 #include <linux/module.h>
 #include <linux/bitops.h>
 #include <linux/mount.h>
diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c
index 3f1190d1899153..b5713fefb4c1b5 100644
--- a/fs/proc_namespace.c
+++ b/fs/proc_namespace.c
@@ -10,6 +10,8 @@
 #include <linux/nsproxy.h>
 #include <linux/security.h>
 #include <linux/fs_struct.h>
+#include <linux/sched/task.h>
+
 #include "proc/internal.h" /* only for get_proc_task() in ->open() */
 
 #include "pnode.h"
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index c608c39cb16141..611fce58d67039 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -10,6 +10,7 @@
 
 #include <linux/sched.h>
 #include <linux/sched/topology.h>
+#include <linux/sched/task.h>
 #include <linux/cpumask.h>
 #include <linux/nodemask.h>
 #include <linux/mm.h>
diff --git a/ipc/namespace.c b/ipc/namespace.c
index 1f1d713ac19c4e..b4d80f9f724673 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -14,6 +14,7 @@
 #include <linux/mount.h>
 #include <linux/user_namespace.h>
 #include <linux/proc_ns.h>
+#include <linux/sched/task.h>
 
 #include "util.h"
 
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index a5c46db2855c4d..0f41292be0fb7d 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -45,6 +45,7 @@
 #include <linux/rcupdate.h>
 #include <linux/sched.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/task.h>
 #include <linux/seq_file.h>
 #include <linux/security.h>
 #include <linux/slab.h>
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index e865d8bfb881eb..38f019324f1aaf 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -12,6 +12,7 @@
 
 #include <linux/proc_fs.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/task.h>
 #include <linux/seq_file.h>
 #include <linux/kallsyms.h>
 #include <linux/utsname.h>
diff --git a/kernel/utsname.c b/kernel/utsname.c
index 06585ad296ffc2..913fe4336d2b75 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -17,6 +17,7 @@
 #include <linux/cred.h>
 #include <linux/user_namespace.h>
 #include <linux/proc_ns.h>
+#include <linux/sched/task.h>
 
 static struct ucounts *inc_uts_namespaces(struct user_namespace *ns)
 {
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 18e0105810dfd8..75b2745bac4145 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -75,6 +75,7 @@
 #include <linux/sched.h>
 #include <linux/sched/mm.h>
 #include <linux/sched/numa_balancing.h>
+#include <linux/sched/task.h>
 #include <linux/nodemask.h>
 #include <linux/cpuset.h>
 #include <linux/slab.h>
diff --git a/mm/mmu_context.c b/mm/mmu_context.c
index 8888b124a386c5..3e612ae748e966 100644
--- a/mm/mmu_context.c
+++ b/mm/mmu_context.c
@@ -6,6 +6,7 @@
 #include <linux/mm.h>
 #include <linux/sched.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/task.h>
 #include <linux/mmu_context.h>
 #include <linux/export.h>
 
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 3c4bbec3971309..652468ff65b79d 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -16,6 +16,8 @@
 #include <linux/export.h>
 #include <linux/user_namespace.h>
 #include <linux/net_namespace.h>
+#include <linux/sched/task.h>
+
 #include <net/sock.h>
 #include <net/netlink.h>
 #include <net/net_namespace.h>
diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c
index 11fce17274f6ce..6ae56037bb1336 100644
--- a/net/core/netclassid_cgroup.c
+++ b/net/core/netclassid_cgroup.c
@@ -12,6 +12,8 @@
 #include <linux/slab.h>
 #include <linux/cgroup.h>
 #include <linux/fdtable.h>
+#include <linux/sched/task.h>
+
 #include <net/cls_cgroup.h>
 #include <net/sock.h>
 
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 756637dc7a5769..0f9275ee559581 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -20,6 +20,8 @@
 #include <linux/cgroup.h>
 #include <linux/rcupdate.h>
 #include <linux/atomic.h>
+#include <linux/sched/task.h>
+
 #include <net/rtnetlink.h>
 #include <net/pkt_cls.h>
 #include <net/sock.h>

From 32ef5517c298042ed58408545f475df43afe1f24 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Feb 2017 11:48:36 +0100
Subject: [PATCH 0541/1911] sched/headers: Prepare to move cputime
 functionality from <linux/sched.h> into <linux/sched/cputime.h>

Introduce a trivial, mostly empty <linux/sched/cputime.h> header
to prepare for the moving of cputime functionality out of sched.h.

Update all code that relies on these facilities.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/alpha/kernel/osf_sys.c    | 1 +
 arch/ia64/kernel/time.c        | 2 +-
 arch/powerpc/kernel/time.c     | 2 +-
 arch/s390/kernel/idle.c        | 2 +-
 arch/s390/kernel/vtime.c       | 2 +-
 arch/x86/kernel/apm_32.c       | 1 +
 arch/x86/kvm/hyperv.c          | 2 ++
 drivers/isdn/mISDN/stack.c     | 1 +
 drivers/s390/cio/cio.c         | 2 +-
 fs/binfmt_elf.c                | 1 +
 fs/binfmt_elf_fdpic.c          | 1 +
 fs/proc/array.c                | 1 +
 fs/proc/stat.c                 | 2 +-
 include/linux/sched/cputime.h  | 7 +++++++
 kernel/acct.c                  | 2 ++
 kernel/delayacct.c             | 1 +
 kernel/exit.c                  | 1 +
 kernel/fork.c                  | 1 +
 kernel/sched/cputime.c         | 2 +-
 kernel/sched/sched.h           | 1 +
 kernel/signal.c                | 1 +
 kernel/sys.c                   | 1 +
 kernel/time/itimer.c           | 1 +
 kernel/time/posix-cpu-timers.c | 1 +
 kernel/tsacct.c                | 1 +
 25 files changed, 33 insertions(+), 7 deletions(-)
 create mode 100644 include/linux/sched/cputime.h

diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index 73446baa632e1e..0b961093ca5cac 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -14,6 +14,7 @@
 #include <linux/sched/signal.h>
 #include <linux/sched/mm.h>
 #include <linux/sched/task_stack.h>
+#include <linux/sched/cputime.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 144f9db7a87665..aa7be020a9042b 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -22,7 +22,7 @@
 #include <linux/timex.h>
 #include <linux/timekeeper_internal.h>
 #include <linux/platform_device.h>
-#include <linux/cputime.h>
+#include <linux/sched/cputime.h>
 
 #include <asm/machvec.h>
 #include <asm/delay.h>
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 37833c5dc27424..07b90725855e3b 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -58,7 +58,7 @@
 #include <linux/clk-provider.h>
 #include <linux/suspend.h>
 #include <linux/rtc.h>
-#include <linux/cputime.h>
+#include <linux/sched/cputime.h>
 #include <asm/trace.h>
 
 #include <asm/io.h>
diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c
index fb07a70820af42..9340b2a07935de 100644
--- a/arch/s390/kernel/idle.c
+++ b/arch/s390/kernel/idle.c
@@ -12,7 +12,7 @@
 #include <linux/notifier.h>
 #include <linux/init.h>
 #include <linux/cpu.h>
-#include <linux/cputime.h>
+#include <linux/sched/cputime.h>
 #include <asm/nmi.h>
 #include <asm/smp.h>
 #include "entry.h"
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 31bd96e8116775..c14fc902991272 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -6,7 +6,7 @@
  */
 
 #include <linux/kernel_stat.h>
-#include <linux/cputime.h>
+#include <linux/sched/cputime.h>
 #include <linux/export.h>
 #include <linux/kernel.h>
 #include <linux/timex.h>
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index dc04b30cbd6052..5a414545e8a390 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -219,6 +219,7 @@
 #include <linux/init.h>
 #include <linux/time.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/cputime.h>
 #include <linux/pm.h>
 #include <linux/capability.h>
 #include <linux/device.h>
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index f701d443072770..ebae57ac59024a 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -28,6 +28,8 @@
 
 #include <linux/kvm_host.h>
 #include <linux/highmem.h>
+#include <linux/sched/cputime.h>
+
 #include <asm/apicdef.h>
 #include <trace/events/kvm.h>
 
diff --git a/drivers/isdn/mISDN/stack.c b/drivers/isdn/mISDN/stack.c
index 696f22fd5ab478..8b7faea2ddf88b 100644
--- a/drivers/isdn/mISDN/stack.c
+++ b/drivers/isdn/mISDN/stack.c
@@ -19,6 +19,7 @@
 #include <linux/mISDNif.h>
 #include <linux/kthread.h>
 #include <linux/sched.h>
+#include <linux/sched/cputime.h>
 #include <linux/signal.h>
 
 #include "core.h"
diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c
index de6fccc1312485..1b350665c82332 100644
--- a/drivers/s390/cio/cio.c
+++ b/drivers/s390/cio/cio.c
@@ -29,7 +29,7 @@
 #include <asm/chpid.h>
 #include <asm/airq.h>
 #include <asm/isc.h>
-#include <linux/cputime.h>
+#include <linux/sched/cputime.h>
 #include <asm/fcx.h>
 #include <asm/nmi.h>
 #include <asm/crw.h>
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 92c00a13b28bd6..5075fd5c62c86d 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -37,6 +37,7 @@
 #include <linux/sched.h>
 #include <linux/sched/coredump.h>
 #include <linux/sched/task_stack.h>
+#include <linux/sched/cputime.h>
 #include <linux/cred.h>
 #include <linux/dax.h>
 #include <linux/uaccess.h>
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 6103a8149ccd33..cf93a4fad01218 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -17,6 +17,7 @@
 #include <linux/sched.h>
 #include <linux/sched/coredump.h>
 #include <linux/sched/task_stack.h>
+#include <linux/sched/cputime.h>
 #include <linux/mm.h>
 #include <linux/mman.h>
 #include <linux/errno.h>
diff --git a/fs/proc/array.c b/fs/proc/array.c
index f3169b58af381c..88c355574aa0af 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -63,6 +63,7 @@
 #include <linux/sched/mm.h>
 #include <linux/sched/numa_balancing.h>
 #include <linux/sched/task.h>
+#include <linux/sched/cputime.h>
 #include <linux/proc_fs.h>
 #include <linux/ioport.h>
 #include <linux/uaccess.h>
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index b95556e036bb54..bd4e55f4aa20b2 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -10,7 +10,7 @@
 #include <linux/slab.h>
 #include <linux/time.h>
 #include <linux/irqnr.h>
-#include <linux/cputime.h>
+#include <linux/sched/cputime.h>
 #include <linux/tick.h>
 
 #ifndef arch_irq_stat_cpu
diff --git a/include/linux/sched/cputime.h b/include/linux/sched/cputime.h
new file mode 100644
index 00000000000000..6ed4fe43de2893
--- /dev/null
+++ b/include/linux/sched/cputime.h
@@ -0,0 +1,7 @@
+#ifndef _LINUX_SCHED_CPUTIME_H
+#define _LINUX_SCHED_CPUTIME_H
+
+#include <linux/sched/signal.h>
+#include <linux/cputime.h>
+
+#endif /* _LINUX_SCHED_CPUTIME_H */
diff --git a/kernel/acct.c b/kernel/acct.c
index ca9cb55b585599..5b1284370367aa 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -56,6 +56,8 @@
 #include <linux/syscalls.h>
 #include <linux/mount.h>
 #include <linux/uaccess.h>
+#include <linux/sched/cputime.h>
+
 #include <asm/div64.h>
 #include <linux/blkdev.h> /* sector_div */
 #include <linux/pid_namespace.h>
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index c94135fc269844..4a1c33416b6a2d 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -15,6 +15,7 @@
 
 #include <linux/sched.h>
 #include <linux/sched/task.h>
+#include <linux/sched/cputime.h>
 #include <linux/slab.h>
 #include <linux/taskstats.h>
 #include <linux/time.h>
diff --git a/kernel/exit.c b/kernel/exit.c
index 771c33fc995250..e126ebf2400c22 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -11,6 +11,7 @@
 #include <linux/sched/stat.h>
 #include <linux/sched/task.h>
 #include <linux/sched/task_stack.h>
+#include <linux/sched/cputime.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/capability.h>
diff --git a/kernel/fork.c b/kernel/fork.c
index c87b6f03c710a7..916e78004b8fe4 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -20,6 +20,7 @@
 #include <linux/sched/stat.h>
 #include <linux/sched/task.h>
 #include <linux/sched/task_stack.h>
+#include <linux/sched/cputime.h>
 #include <linux/rtmutex.h>
 #include <linux/init.h>
 #include <linux/unistd.h>
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 54031fa681e260..f3778e2b46c8dc 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -4,7 +4,7 @@
 #include <linux/kernel_stat.h>
 #include <linux/static_key.h>
 #include <linux/context_tracking.h>
-#include <linux/cputime.h>
+#include <linux/sched/cputime.h>
 #include "sched.h"
 
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index b1f1c844383739..76e3af3f39f4a9 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -17,6 +17,7 @@
 #include <linux/sched/hotplug.h>
 #include <linux/sched/task.h>
 #include <linux/sched/task_stack.h>
+#include <linux/sched/cputime.h>
 
 #include <linux/u64_stats_sync.h>
 #include <linux/kernel_stat.h>
diff --git a/kernel/signal.c b/kernel/signal.c
index 2008aa999976a9..7e59ebc2c25e66 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -18,6 +18,7 @@
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
 #include <linux/sched/task_stack.h>
+#include <linux/sched/cputime.h>
 #include <linux/fs.h>
 #include <linux/tty.h>
 #include <linux/binfmts.h>
diff --git a/kernel/sys.c b/kernel/sys.c
index 7f1ecd2e41c11f..7ff6d1b10cecac 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -55,6 +55,7 @@
 #include <linux/sched/mm.h>
 #include <linux/sched/coredump.h>
 #include <linux/sched/task.h>
+#include <linux/sched/cputime.h>
 #include <linux/rcupdate.h>
 #include <linux/uidgid.h>
 #include <linux/cred.h>
diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c
index f6b961c5e58cf1..087d6a1279b833 100644
--- a/kernel/time/itimer.c
+++ b/kernel/time/itimer.c
@@ -11,6 +11,7 @@
 #include <linux/syscalls.h>
 #include <linux/time.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/cputime.h>
 #include <linux/posix-timers.h>
 #include <linux/hrtimer.h>
 #include <trace/events/timer.h>
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index a2475a9f57d8fa..4513ad16a253f6 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -3,6 +3,7 @@
  */
 
 #include <linux/sched/signal.h>
+#include <linux/sched/cputime.h>
 #include <linux/posix-timers.h>
 #include <linux/errno.h>
 #include <linux/math64.h>
diff --git a/kernel/tsacct.c b/kernel/tsacct.c
index d9a03b80b75d1d..370724b4539185 100644
--- a/kernel/tsacct.c
+++ b/kernel/tsacct.c
@@ -19,6 +19,7 @@
 #include <linux/kernel.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/cputime.h>
 #include <linux/tsacct_kern.h>
 #include <linux/acct.h>
 #include <linux/jiffies.h>

From 3905f9ad455e0fd2ddb557566c5561b4a3027c07 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Feb 2017 12:07:04 +0100
Subject: [PATCH 0542/1911] sched/headers: Prepare to move sched_info_on() and
 force_schedstat_enabled() from <linux/sched.h> to <linux/sched/stat.h>

But first update usage sites with the new header dependency.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kvm/cpuid.c | 2 ++
 arch/x86/kvm/x86.c   | 2 ++
 fs/proc/base.c       | 1 +
 kernel/latencytop.c  | 1 +
 kernel/profile.c     | 2 ++
 5 files changed, 8 insertions(+)

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 1d155cc56629a7..efde6cc5087518 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -16,6 +16,8 @@
 #include <linux/export.h>
 #include <linux/vmalloc.h>
 #include <linux/uaccess.h>
+#include <linux/sched/stat.h>
+
 #include <asm/processor.h>
 #include <asm/user.h>
 #include <asm/fpu/xstate.h>
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b2a4b11274b04f..1faf620a6fdc20 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -54,6 +54,8 @@
 #include <linux/pvclock_gtod.h>
 #include <linux/kvm_irqfd.h>
 #include <linux/irqbypass.h>
+#include <linux/sched/stat.h>
+
 #include <trace/events/kvm.h>
 
 #include <asm/debugreg.h>
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 5914fed3712def..2dae60075f6ef8 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -89,6 +89,7 @@
 #include <linux/sched/mm.h>
 #include <linux/sched/coredump.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/stat.h>
 #include <linux/flex_array.h>
 #include <linux/posix-timers.h>
 #ifdef CONFIG_HARDWALL
diff --git a/kernel/latencytop.c b/kernel/latencytop.c
index 545839b838d691..96b4179cee6a76 100644
--- a/kernel/latencytop.c
+++ b/kernel/latencytop.c
@@ -56,6 +56,7 @@
 #include <linux/export.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/stat.h>
 #include <linux/list.h>
 #include <linux/stacktrace.h>
 
diff --git a/kernel/profile.c b/kernel/profile.c
index f67ce0aa6bc449..9aa2a4445b0d2a 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -25,6 +25,8 @@
 #include <linux/mutex.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
+#include <linux/sched/stat.h>
+
 #include <asm/sections.h>
 #include <asm/irq_regs.h>
 #include <asm/ptrace.h>

From 5c2c5c5514393e31859439ef83a34051d7c68332 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Feb 2017 14:24:31 +0100
Subject: [PATCH 0543/1911] sched/headers, vfs/execve: Prepare to move the
 do_execve*() prototypes from <linux/sched.h> to <linux/binfmts.h>

But first update the usage sites with the new header dependency.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 init/main.c   | 1 +
 kernel/kmod.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/init/main.c b/init/main.c
index c2d337a136d1a5..ca9f53fb4125b2 100644
--- a/init/main.c
+++ b/init/main.c
@@ -15,6 +15,7 @@
 #include <linux/extable.h>
 #include <linux/module.h>
 #include <linux/proc_fs.h>
+#include <linux/binfmts.h>
 #include <linux/kernel.h>
 #include <linux/syscalls.h>
 #include <linux/stackprotector.h>
diff --git a/kernel/kmod.c b/kernel/kmod.c
index ac5f5c2d098d92..563f97e2be3618 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -21,6 +21,7 @@
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/sched/task.h>
+#include <linux/binfmts.h>
 #include <linux/syscalls.h>
 #include <linux/unistd.h>
 #include <linux/kmod.h>

From 3f8c24529b42fc5044c2a44bdb8ba69aec2bee37 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Feb 2017 14:31:22 +0100
Subject: [PATCH 0544/1911] sched/headers: Prepare to move kstack_end() from
 <linux/sched.h> to <linux/sched/task_stack.h>

But first update the usage sites with the new header dependency.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/m32r/kernel/traps.c       | 2 ++
 arch/metag/kernel/stacktrace.c | 1 +
 arch/openrisc/kernel/traps.c   | 1 +
 arch/um/kernel/sysrq.c         | 1 +
 arch/xtensa/kernel/traps.c     | 1 +
 mm/slab.c                      | 1 +
 6 files changed, 7 insertions(+)

diff --git a/arch/m32r/kernel/traps.c b/arch/m32r/kernel/traps.c
index 7c05bb3935978c..832bb2bb29bd38 100644
--- a/arch/m32r/kernel/traps.c
+++ b/arch/m32r/kernel/traps.c
@@ -15,7 +15,9 @@
 #include <linux/stddef.h>
 #include <linux/ptrace.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
+
 #include <asm/page.h>
 #include <asm/processor.h>
 
diff --git a/arch/metag/kernel/stacktrace.c b/arch/metag/kernel/stacktrace.c
index 4f806d66a58c6b..91ffc4b75c332b 100644
--- a/arch/metag/kernel/stacktrace.c
+++ b/arch/metag/kernel/stacktrace.c
@@ -1,6 +1,7 @@
 #include <linux/export.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task_stack.h>
 #include <linux/stacktrace.h>
 
 #include <asm/stacktrace.h>
diff --git a/arch/openrisc/kernel/traps.c b/arch/openrisc/kernel/traps.c
index 2354ab88d9484e..803e9e756f7785 100644
--- a/arch/openrisc/kernel/traps.c
+++ b/arch/openrisc/kernel/traps.c
@@ -23,6 +23,7 @@
 #include <linux/init.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task_stack.h>
 #include <linux/kernel.h>
 #include <linux/extable.h>
 #include <linux/kmod.h>
diff --git a/arch/um/kernel/sysrq.c b/arch/um/kernel/sysrq.c
index 34ae555c3e700a..a76295f7ede9cd 100644
--- a/arch/um/kernel/sysrq.c
+++ b/arch/um/kernel/sysrq.c
@@ -12,6 +12,7 @@
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task_stack.h>
 
 #include <asm/sysrq.h>
 #include <asm/stacktrace.h>
diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c
index d8bb2e62393e54..c82c43bff2968c 100644
--- a/arch/xtensa/kernel/traps.c
+++ b/arch/xtensa/kernel/traps.c
@@ -26,6 +26,7 @@
 #include <linux/kernel.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task_stack.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/stringify.h>
diff --git a/mm/slab.c b/mm/slab.c
index bd63450a9b167f..807d86c7690886 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -116,6 +116,7 @@
 #include	<linux/kmemcheck.h>
 #include	<linux/memory.h>
 #include	<linux/prefetch.h>
+#include	<linux/sched/task_stack.h>
 
 #include	<net/sock.h>
 

From 61855b6b03df9b6a15bd265c2c3ae7b5e23da312 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Feb 2017 14:35:41 +0100
Subject: [PATCH 0545/1911] sched/headers: Prepare to move exit_files() and
 exit_itimers() from <linux/sched.h> to <linux/sched/task.h>

But first update the usage site.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/posix-timers.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index 1e6623d7675019..50a6a47020dea9 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -35,6 +35,7 @@
 #include <linux/slab.h>
 #include <linux/time.h>
 #include <linux/mutex.h>
+#include <linux/sched/task.h>
 
 #include <linux/uaccess.h>
 #include <linux/list.h>

From 1777e4635507265ba53d8dc4cd248e7d7c306fa0 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Feb 2017 14:47:12 +0100
Subject: [PATCH 0546/1911] sched/headers: Prepare to move _init() prototypes
 from <linux/sched.h> to <linux/sched/init.h>

But first introduce a trivial header and update usage sites.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/ia64/kernel/setup.c   | 1 +
 arch/m32r/kernel/traps.c   | 1 +
 arch/s390/kernel/setup.c   | 1 +
 include/linux/cpu.h        | 2 ++
 include/linux/sched/init.h | 6 ++++++
 init/main.c                | 1 +
 kernel/sched/sched.h       | 1 +
 7 files changed, 13 insertions(+)
 create mode 100644 include/linux/sched/init.h

diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index b2b4f521618002..63bef6fc0f3ee4 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -29,6 +29,7 @@
 #include <linux/bootmem.h>
 #include <linux/console.h>
 #include <linux/delay.h>
+#include <linux/cpu.h>
 #include <linux/kernel.h>
 #include <linux/reboot.h>
 #include <linux/sched.h>
diff --git a/arch/m32r/kernel/traps.c b/arch/m32r/kernel/traps.c
index 832bb2bb29bd38..647dd94a0c399f 100644
--- a/arch/m32r/kernel/traps.c
+++ b/arch/m32r/kernel/traps.c
@@ -17,6 +17,7 @@
 #include <linux/sched/debug.h>
 #include <linux/sched/task_stack.h>
 #include <linux/mm.h>
+#include <linux/cpu.h>
 
 #include <asm/page.h>
 #include <asm/processor.h>
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 3de07004c02e72..911dc0b49be05b 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -19,6 +19,7 @@
 #include <linux/export.h>
 #include <linux/sched.h>
 #include <linux/sched/task.h>
+#include <linux/cpu.h>
 #include <linux/kernel.h>
 #include <linux/memblock.h>
 #include <linux/mm.h>
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 21f9c74496e75e..f92081234afd97 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -30,6 +30,8 @@ struct cpu {
 
 extern void boot_cpu_init(void);
 extern void boot_cpu_state_init(void);
+extern void cpu_init(void);
+extern void trap_init(void);
 
 extern int register_cpu(struct cpu *cpu, int num);
 extern struct device *get_cpu_device(unsigned cpu);
diff --git a/include/linux/sched/init.h b/include/linux/sched/init.h
new file mode 100644
index 00000000000000..15e46313e55eab
--- /dev/null
+++ b/include/linux/sched/init.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SCHED_INIT_H
+#define _LINUX_SCHED_INIT_H
+
+#include <linux/sched.h>
+
+#endif /* _LINUX_SCHED_INIT_H */
diff --git a/init/main.c b/init/main.c
index ca9f53fb4125b2..eae2f15657c62c 100644
--- a/init/main.c
+++ b/init/main.c
@@ -63,6 +63,7 @@
 #include <linux/device.h>
 #include <linux/kthread.h>
 #include <linux/sched.h>
+#include <linux/sched/init.h>
 #include <linux/signal.h>
 #include <linux/idr.h>
 #include <linux/kgdb.h>
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 76e3af3f39f4a9..5cbf92214ad892 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -18,6 +18,7 @@
 #include <linux/sched/task.h>
 #include <linux/sched/task_stack.h>
 #include <linux/sched/cputime.h>
+#include <linux/sched/init.h>
 
 #include <linux/u64_stats_sync.h>
 #include <linux/kernel_stat.h>

From 0881e7bd341e2158b314596bcf2059e88e68f04e Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Feb 2017 15:30:50 +0100
Subject: [PATCH 0547/1911] sched/headers: Prepare to move the
 get_task_struct()/put_task_struct() and related APIs from <linux/sched.h> to
 <linux/sched/task.h>

But first update usage sites with the new header dependency.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 drivers/dma/dmatest.c                         | 1 +
 drivers/gpu/drm/etnaviv/etnaviv_gem.c         | 1 +
 drivers/infiniband/core/umem_odp.c            | 1 +
 drivers/infiniband/hw/mlx4/main.c             | 1 +
 drivers/infiniband/hw/mlx5/main.c             | 1 +
 drivers/md/persistent-data/dm-block-manager.c | 1 +
 drivers/misc/cxl/main.c                       | 2 ++
 drivers/net/xen-netback/interface.c           | 1 +
 drivers/oprofile/buffer_sync.c                | 1 +
 drivers/tty/tty_ldsem.c                       | 1 +
 drivers/usb/usbip/usbip_common.h              | 1 +
 kernel/irq/manage.c                           | 1 +
 12 files changed, 13 insertions(+)

diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c
index c9297605058c1a..54d581d407aa72 100644
--- a/drivers/dma/dmatest.c
+++ b/drivers/dma/dmatest.c
@@ -16,6 +16,7 @@
 #include <linux/freezer.h>
 #include <linux/init.h>
 #include <linux/kthread.h>
+#include <linux/sched/task.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/random.h>
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
index ae2882b64b8253..fd56f92f3469a0 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
@@ -17,6 +17,7 @@
 #include <linux/spinlock.h>
 #include <linux/shmem_fs.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/task.h>
 
 #include "etnaviv_drv.h"
 #include "etnaviv_gem.h"
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index 7279f259494ffc..cb2742b548bbed 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -33,6 +33,7 @@
 #include <linux/types.h>
 #include <linux/sched.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/task.h>
 #include <linux/pid.h>
 #include <linux/slab.h>
 #include <linux/export.h>
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 56da8f0d71bbc7..fba94df28cf1b1 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -40,6 +40,7 @@
 #include <linux/rtnetlink.h>
 #include <linux/if_vlan.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/task.h>
 
 #include <net/ipv6.h>
 #include <net/addrconf.h>
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 4b1ec3ff152ad9..4dc0a8785fe0d2 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -42,6 +42,7 @@
 #endif
 #include <linux/sched.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/task.h>
 #include <linux/delay.h>
 #include <rdma/ib_user_verbs.h>
 #include <rdma/ib_addr.h>
diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c
index 0863905dee028c..8589e0a140686e 100644
--- a/drivers/md/persistent-data/dm-block-manager.c
+++ b/drivers/md/persistent-data/dm-block-manager.c
@@ -13,6 +13,7 @@
 #include <linux/rwsem.h>
 #include <linux/device-mapper.h>
 #include <linux/stacktrace.h>
+#include <linux/sched/task.h>
 
 #define DM_MSG_PREFIX "block manager"
 
diff --git a/drivers/misc/cxl/main.c b/drivers/misc/cxl/main.c
index cc1706a92aceb6..b0b6ed31918ef2 100644
--- a/drivers/misc/cxl/main.c
+++ b/drivers/misc/cxl/main.c
@@ -19,6 +19,8 @@
 #include <linux/slab.h>
 #include <linux/idr.h>
 #include <linux/pci.h>
+#include <linux/sched/task.h>
+
 #include <asm/cputable.h>
 #include <misc/cxl-base.h>
 
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index a2d326760a7274..829b26cd4549a4 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -31,6 +31,7 @@
 #include "common.h"
 
 #include <linux/kthread.h>
+#include <linux/sched/task.h>
 #include <linux/ethtool.h>
 #include <linux/rtnetlink.h>
 #include <linux/if_vlan.h>
diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c
index eb0c35994b5401..ac27f3d3fbb42b 100644
--- a/drivers/oprofile/buffer_sync.c
+++ b/drivers/oprofile/buffer_sync.c
@@ -32,6 +32,7 @@
 #include <linux/oprofile.h>
 #include <linux/sched.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/task.h>
 #include <linux/gfp.h>
 
 #include "oprofile_stats.h"
diff --git a/drivers/tty/tty_ldsem.c b/drivers/tty/tty_ldsem.c
index 4f6b1b10b53784..52b7baef4f7a80 100644
--- a/drivers/tty/tty_ldsem.c
+++ b/drivers/tty/tty_ldsem.c
@@ -33,6 +33,7 @@
 #include <linux/tty.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
diff --git a/drivers/usb/usbip/usbip_common.h b/drivers/usb/usbip/usbip_common.h
index 9f490375ac9236..f8573a52e41a56 100644
--- a/drivers/usb/usbip/usbip_common.h
+++ b/drivers/usb/usbip/usbip_common.h
@@ -31,6 +31,7 @@
 #include <linux/types.h>
 #include <linux/usb.h>
 #include <linux/wait.h>
+#include <linux/sched/task.h>
 #include <uapi/linux/usbip.h>
 
 #define USBIP_VERSION "1.0.0"
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 09740952e4de13..a4afe5cc5af182 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -17,6 +17,7 @@
 #include <linux/slab.h>
 #include <linux/sched.h>
 #include <linux/sched/rt.h>
+#include <linux/sched/task.h>
 #include <uapi/linux/sched/types.h>
 #include <linux/task_work.h>
 

From 50d34394cee68dd12c5e01fff073d1167700bfce Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Feb 2017 16:03:58 +0100
Subject: [PATCH 0548/1911] sched/headers: Prepare to remove the
 <linux/magic.h> include from <linux/sched/task_stack.h>

Update files that depend on the magic.h inclusion.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 drivers/dax/dax.c                 | 1 +
 drivers/virtio/virtio_balloon.c   | 1 +
 include/linux/sched/task_stack.h  | 1 +
 mm/zsmalloc.c                     | 1 +
 security/integrity/evm/evm_main.c | 2 ++
 5 files changed, 6 insertions(+)

diff --git a/drivers/dax/dax.c b/drivers/dax/dax.c
index b75c77254fdb56..8d9829ff2a784d 100644
--- a/drivers/dax/dax.c
+++ b/drivers/dax/dax.c
@@ -13,6 +13,7 @@
 #include <linux/pagemap.h>
 #include <linux/module.h>
 #include <linux/device.h>
+#include <linux/magic.h>
 #include <linux/mount.h>
 #include <linux/pfn_t.h>
 #include <linux/hash.h>
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 9d2738e9217f10..a610061fabf6ed 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -31,6 +31,7 @@
 #include <linux/wait.h>
 #include <linux/mm.h>
 #include <linux/mount.h>
+#include <linux/magic.h>
 
 /*
  * Balloon device works in 4K page units.  So each page is pointed to by
diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h
index 933cd49824c234..d2d578e85f7dab 100644
--- a/include/linux/sched/task_stack.h
+++ b/include/linux/sched/task_stack.h
@@ -2,5 +2,6 @@
 #define _LINUX_SCHED_TASK_STACK_H
 
 #include <linux/sched.h>
+#include <linux/magic.h>
 
 #endif /* _LINUX_SCHED_TASK_STACK_H */
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index b7b1fb6c8c21d4..b7ee9c34dbd678 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -33,6 +33,7 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/magic.h>
 #include <linux/bitops.h>
 #include <linux/errno.h>
 #include <linux/highmem.h>
diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c
index e2ed498c0f5f59..063d38aef64e71 100644
--- a/security/integrity/evm/evm_main.c
+++ b/security/integrity/evm/evm_main.c
@@ -22,6 +22,8 @@
 #include <linux/xattr.h>
 #include <linux/integrity.h>
 #include <linux/evm.h>
+#include <linux/magic.h>
+
 #include <crypto/hash.h>
 #include <crypto/algapi.h>
 #include "evm.h"

From b69339ba109549beeaf45c45c52ac7025bfcd954 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Feb 2017 16:15:03 +0100
Subject: [PATCH 0549/1911] sched/headers: Prepare to remove spurious
 <linux/sched.h> inclusion dependencies

In the following patches we are going to remove various headers
from sched.h and other headers that sched.h includes.

To make those patches build cleanly prepare the scene by adding
dependencies to various files that learned to rely on those
to-be-removed dependencies.

These changes all make sense standalone: they add a header for
a data type that a particular .c or .h file is using.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 drivers/misc/vmw_vmci/vmci_resource.c | 1 +
 include/linux/sched.h                 | 1 +
 include/sound/control.h               | 1 +
 include/target/target_core_base.h     | 1 +
 4 files changed, 4 insertions(+)

diff --git a/drivers/misc/vmw_vmci/vmci_resource.c b/drivers/misc/vmw_vmci/vmci_resource.c
index 9a53a30de445cf..1ab6e8737a5f09 100644
--- a/drivers/misc/vmw_vmci/vmci_resource.c
+++ b/drivers/misc/vmw_vmci/vmci_resource.c
@@ -17,6 +17,7 @@
 #include <linux/hash.h>
 #include <linux/types.h>
 #include <linux/rculist.h>
+#include <linux/completion.h>
 
 #include "vmci_resource.h"
 #include "vmci_driver.h"
diff --git a/include/linux/sched.h b/include/linux/sched.h
index ea05116bc3c2b0..b5c6d602dfe9b4 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -18,6 +18,7 @@ struct sched_param {
 #include <linux/types.h>
 #include <linux/timex.h>
 #include <linux/jiffies.h>
+#include <linux/mutex.h>
 #include <linux/plist.h>
 #include <linux/rbtree.h>
 #include <linux/thread_info.h>
diff --git a/include/sound/control.h b/include/sound/control.h
index 21d047f229a1b5..bd7246de58e7c4 100644
--- a/include/sound/control.h
+++ b/include/sound/control.h
@@ -22,6 +22,7 @@
  *
  */
 
+#include <linux/wait.h>
 #include <sound/asound.h>
 
 #define snd_kcontrol_chip(kcontrol) ((kcontrol)->private_data)
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index 878560e60c7527..8be9ba73383d52 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -5,6 +5,7 @@
 #include <linux/dma-direction.h> /* enum dma_data_direction */
 #include <linux/percpu_ida.h>    /* struct percpu_ida */
 #include <linux/semaphore.h>     /* struct semaphore */
+#include <linux/completion.h>
 
 #define TARGET_CORE_VERSION		"v5.0"
 

From a60b9eda67beac2318fa159545ba7d022f780736 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 1 Feb 2017 16:36:40 +0100
Subject: [PATCH 0550/1911] sched/headers: Move scheduler topology interfaces
 to <linux/sched/topology.h>

The vast majority of sched.h users does not require the topology types and
interfaces, so split them out into <linux/sched/topology.h>.

This reduces the size of linux/sched.h by ~6%.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h          | 212 --------------------------------
 include/linux/sched/topology.h | 213 +++++++++++++++++++++++++++++++++
 2 files changed, 213 insertions(+), 212 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b5c6d602dfe9b4..55480782ce7f30 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -960,12 +960,6 @@ enum cpu_idle_type {
 # define SCHED_FIXEDPOINT_SHIFT	10
 # define SCHED_FIXEDPOINT_SCALE	(1L << SCHED_FIXEDPOINT_SHIFT)
 
-/*
- * Increase resolution of cpu_capacity calculations
- */
-#define SCHED_CAPACITY_SHIFT	SCHED_FIXEDPOINT_SHIFT
-#define SCHED_CAPACITY_SCALE	(1L << SCHED_CAPACITY_SHIFT)
-
 /*
  * Wake-queues are lists of tasks with a pending wakeup, whose
  * callers have already marked the task as woken internally,
@@ -1016,214 +1010,8 @@ extern void wake_q_add(struct wake_q_head *head,
 		       struct task_struct *task);
 extern void wake_up_q(struct wake_q_head *head);
 
-/*
- * sched-domains (multiprocessor balancing) declarations:
- */
-#ifdef CONFIG_SMP
-#define SD_LOAD_BALANCE		0x0001	/* Do load balancing on this domain. */
-#define SD_BALANCE_NEWIDLE	0x0002	/* Balance when about to become idle */
-#define SD_BALANCE_EXEC		0x0004	/* Balance on exec */
-#define SD_BALANCE_FORK		0x0008	/* Balance on fork, clone */
-#define SD_BALANCE_WAKE		0x0010  /* Balance on wakeup */
-#define SD_WAKE_AFFINE		0x0020	/* Wake task to waking CPU */
-#define SD_ASYM_CPUCAPACITY	0x0040  /* Groups have different max cpu capacities */
-#define SD_SHARE_CPUCAPACITY	0x0080	/* Domain members share cpu capacity */
-#define SD_SHARE_POWERDOMAIN	0x0100	/* Domain members share power domain */
-#define SD_SHARE_PKG_RESOURCES	0x0200	/* Domain members share cpu pkg resources */
-#define SD_SERIALIZE		0x0400	/* Only a single load balancing instance */
-#define SD_ASYM_PACKING		0x0800  /* Place busy groups earlier in the domain */
-#define SD_PREFER_SIBLING	0x1000	/* Prefer to place tasks in a sibling domain */
-#define SD_OVERLAP		0x2000	/* sched_domains of this level overlap */
-#define SD_NUMA			0x4000	/* cross-node balancing */
-
-#ifdef CONFIG_SCHED_SMT
-static inline int cpu_smt_flags(void)
-{
-	return SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
-}
-#endif
-
-#ifdef CONFIG_SCHED_MC
-static inline int cpu_core_flags(void)
-{
-	return SD_SHARE_PKG_RESOURCES;
-}
-#endif
-
-#ifdef CONFIG_NUMA
-static inline int cpu_numa_flags(void)
-{
-	return SD_NUMA;
-}
-#endif
-
-extern int arch_asym_cpu_priority(int cpu);
-
-struct sched_domain_attr {
-	int relax_domain_level;
-};
-
-#define SD_ATTR_INIT	(struct sched_domain_attr) {	\
-	.relax_domain_level = -1,			\
-}
-
-extern int sched_domain_level_max;
-
-struct sched_group;
-
-struct sched_domain_shared {
-	atomic_t	ref;
-	atomic_t	nr_busy_cpus;
-	int		has_idle_cores;
-};
-
-struct sched_domain {
-	/* These fields must be setup */
-	struct sched_domain *parent;	/* top domain must be null terminated */
-	struct sched_domain *child;	/* bottom domain must be null terminated */
-	struct sched_group *groups;	/* the balancing groups of the domain */
-	unsigned long min_interval;	/* Minimum balance interval ms */
-	unsigned long max_interval;	/* Maximum balance interval ms */
-	unsigned int busy_factor;	/* less balancing by factor if busy */
-	unsigned int imbalance_pct;	/* No balance until over watermark */
-	unsigned int cache_nice_tries;	/* Leave cache hot tasks for # tries */
-	unsigned int busy_idx;
-	unsigned int idle_idx;
-	unsigned int newidle_idx;
-	unsigned int wake_idx;
-	unsigned int forkexec_idx;
-	unsigned int smt_gain;
-
-	int nohz_idle;			/* NOHZ IDLE status */
-	int flags;			/* See SD_* */
-	int level;
-
-	/* Runtime fields. */
-	unsigned long last_balance;	/* init to jiffies. units in jiffies */
-	unsigned int balance_interval;	/* initialise to 1. units in ms. */
-	unsigned int nr_balance_failed; /* initialise to 0 */
-
-	/* idle_balance() stats */
-	u64 max_newidle_lb_cost;
-	unsigned long next_decay_max_lb_cost;
-
-	u64 avg_scan_cost;		/* select_idle_sibling */
-
-#ifdef CONFIG_SCHEDSTATS
-	/* load_balance() stats */
-	unsigned int lb_count[CPU_MAX_IDLE_TYPES];
-	unsigned int lb_failed[CPU_MAX_IDLE_TYPES];
-	unsigned int lb_balanced[CPU_MAX_IDLE_TYPES];
-	unsigned int lb_imbalance[CPU_MAX_IDLE_TYPES];
-	unsigned int lb_gained[CPU_MAX_IDLE_TYPES];
-	unsigned int lb_hot_gained[CPU_MAX_IDLE_TYPES];
-	unsigned int lb_nobusyg[CPU_MAX_IDLE_TYPES];
-	unsigned int lb_nobusyq[CPU_MAX_IDLE_TYPES];
-
-	/* Active load balancing */
-	unsigned int alb_count;
-	unsigned int alb_failed;
-	unsigned int alb_pushed;
-
-	/* SD_BALANCE_EXEC stats */
-	unsigned int sbe_count;
-	unsigned int sbe_balanced;
-	unsigned int sbe_pushed;
-
-	/* SD_BALANCE_FORK stats */
-	unsigned int sbf_count;
-	unsigned int sbf_balanced;
-	unsigned int sbf_pushed;
-
-	/* try_to_wake_up() stats */
-	unsigned int ttwu_wake_remote;
-	unsigned int ttwu_move_affine;
-	unsigned int ttwu_move_balance;
-#endif
-#ifdef CONFIG_SCHED_DEBUG
-	char *name;
-#endif
-	union {
-		void *private;		/* used during construction */
-		struct rcu_head rcu;	/* used during destruction */
-	};
-	struct sched_domain_shared *shared;
-
-	unsigned int span_weight;
-	/*
-	 * Span of all CPUs in this domain.
-	 *
-	 * NOTE: this field is variable length. (Allocated dynamically
-	 * by attaching extra space to the end of the structure,
-	 * depending on how many CPUs the kernel has booted up with)
-	 */
-	unsigned long span[0];
-};
-
-static inline struct cpumask *sched_domain_span(struct sched_domain *sd)
-{
-	return to_cpumask(sd->span);
-}
-
-extern void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
-				    struct sched_domain_attr *dattr_new);
-
-/* Allocate an array of sched domains, for partition_sched_domains(). */
-cpumask_var_t *alloc_sched_domains(unsigned int ndoms);
-void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms);
-
-bool cpus_share_cache(int this_cpu, int that_cpu);
-
-typedef const struct cpumask *(*sched_domain_mask_f)(int cpu);
-typedef int (*sched_domain_flags_f)(void);
-
-#define SDTL_OVERLAP	0x01
-
-struct sd_data {
-	struct sched_domain **__percpu sd;
-	struct sched_domain_shared **__percpu sds;
-	struct sched_group **__percpu sg;
-	struct sched_group_capacity **__percpu sgc;
-};
-
-struct sched_domain_topology_level {
-	sched_domain_mask_f mask;
-	sched_domain_flags_f sd_flags;
-	int		    flags;
-	int		    numa_level;
-	struct sd_data      data;
-#ifdef CONFIG_SCHED_DEBUG
-	char                *name;
-#endif
-};
-
-extern void set_sched_topology(struct sched_domain_topology_level *tl);
 extern void wake_up_if_idle(int cpu);
 
-#ifdef CONFIG_SCHED_DEBUG
-# define SD_INIT_NAME(type)		.name = #type
-#else
-# define SD_INIT_NAME(type)
-#endif
-
-#else /* CONFIG_SMP */
-
-struct sched_domain_attr;
-
-static inline void
-partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
-			struct sched_domain_attr *dattr_new)
-{
-}
-
-static inline bool cpus_share_cache(int this_cpu, int that_cpu)
-{
-	return true;
-}
-
-#endif	/* !CONFIG_SMP */
-
-
 struct io_context;			/* See blkdev.h */
 
 
diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index 184b56b8aa64ff..b3550b36e65d69 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -5,4 +5,217 @@
 
 #include <linux/sched/idle.h>
 
+/*
+ * sched-domains (multiprocessor balancing) declarations:
+ */
+#ifdef CONFIG_SMP
+
+#define SD_LOAD_BALANCE		0x0001	/* Do load balancing on this domain. */
+#define SD_BALANCE_NEWIDLE	0x0002	/* Balance when about to become idle */
+#define SD_BALANCE_EXEC		0x0004	/* Balance on exec */
+#define SD_BALANCE_FORK		0x0008	/* Balance on fork, clone */
+#define SD_BALANCE_WAKE		0x0010  /* Balance on wakeup */
+#define SD_WAKE_AFFINE		0x0020	/* Wake task to waking CPU */
+#define SD_ASYM_CPUCAPACITY	0x0040  /* Groups have different max cpu capacities */
+#define SD_SHARE_CPUCAPACITY	0x0080	/* Domain members share cpu capacity */
+#define SD_SHARE_POWERDOMAIN	0x0100	/* Domain members share power domain */
+#define SD_SHARE_PKG_RESOURCES	0x0200	/* Domain members share cpu pkg resources */
+#define SD_SERIALIZE		0x0400	/* Only a single load balancing instance */
+#define SD_ASYM_PACKING		0x0800  /* Place busy groups earlier in the domain */
+#define SD_PREFER_SIBLING	0x1000	/* Prefer to place tasks in a sibling domain */
+#define SD_OVERLAP		0x2000	/* sched_domains of this level overlap */
+#define SD_NUMA			0x4000	/* cross-node balancing */
+
+/*
+ * Increase resolution of cpu_capacity calculations
+ */
+#define SCHED_CAPACITY_SHIFT	SCHED_FIXEDPOINT_SHIFT
+#define SCHED_CAPACITY_SCALE	(1L << SCHED_CAPACITY_SHIFT)
+
+#ifdef CONFIG_SCHED_SMT
+static inline int cpu_smt_flags(void)
+{
+	return SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
+}
+#endif
+
+#ifdef CONFIG_SCHED_MC
+static inline int cpu_core_flags(void)
+{
+	return SD_SHARE_PKG_RESOURCES;
+}
+#endif
+
+#ifdef CONFIG_NUMA
+static inline int cpu_numa_flags(void)
+{
+	return SD_NUMA;
+}
+#endif
+
+extern int arch_asym_cpu_priority(int cpu);
+
+struct sched_domain_attr {
+	int relax_domain_level;
+};
+
+#define SD_ATTR_INIT	(struct sched_domain_attr) {	\
+	.relax_domain_level = -1,			\
+}
+
+extern int sched_domain_level_max;
+
+struct sched_group;
+
+struct sched_domain_shared {
+	atomic_t	ref;
+	atomic_t	nr_busy_cpus;
+	int		has_idle_cores;
+};
+
+struct sched_domain {
+	/* These fields must be setup */
+	struct sched_domain *parent;	/* top domain must be null terminated */
+	struct sched_domain *child;	/* bottom domain must be null terminated */
+	struct sched_group *groups;	/* the balancing groups of the domain */
+	unsigned long min_interval;	/* Minimum balance interval ms */
+	unsigned long max_interval;	/* Maximum balance interval ms */
+	unsigned int busy_factor;	/* less balancing by factor if busy */
+	unsigned int imbalance_pct;	/* No balance until over watermark */
+	unsigned int cache_nice_tries;	/* Leave cache hot tasks for # tries */
+	unsigned int busy_idx;
+	unsigned int idle_idx;
+	unsigned int newidle_idx;
+	unsigned int wake_idx;
+	unsigned int forkexec_idx;
+	unsigned int smt_gain;
+
+	int nohz_idle;			/* NOHZ IDLE status */
+	int flags;			/* See SD_* */
+	int level;
+
+	/* Runtime fields. */
+	unsigned long last_balance;	/* init to jiffies. units in jiffies */
+	unsigned int balance_interval;	/* initialise to 1. units in ms. */
+	unsigned int nr_balance_failed; /* initialise to 0 */
+
+	/* idle_balance() stats */
+	u64 max_newidle_lb_cost;
+	unsigned long next_decay_max_lb_cost;
+
+	u64 avg_scan_cost;		/* select_idle_sibling */
+
+#ifdef CONFIG_SCHEDSTATS
+	/* load_balance() stats */
+	unsigned int lb_count[CPU_MAX_IDLE_TYPES];
+	unsigned int lb_failed[CPU_MAX_IDLE_TYPES];
+	unsigned int lb_balanced[CPU_MAX_IDLE_TYPES];
+	unsigned int lb_imbalance[CPU_MAX_IDLE_TYPES];
+	unsigned int lb_gained[CPU_MAX_IDLE_TYPES];
+	unsigned int lb_hot_gained[CPU_MAX_IDLE_TYPES];
+	unsigned int lb_nobusyg[CPU_MAX_IDLE_TYPES];
+	unsigned int lb_nobusyq[CPU_MAX_IDLE_TYPES];
+
+	/* Active load balancing */
+	unsigned int alb_count;
+	unsigned int alb_failed;
+	unsigned int alb_pushed;
+
+	/* SD_BALANCE_EXEC stats */
+	unsigned int sbe_count;
+	unsigned int sbe_balanced;
+	unsigned int sbe_pushed;
+
+	/* SD_BALANCE_FORK stats */
+	unsigned int sbf_count;
+	unsigned int sbf_balanced;
+	unsigned int sbf_pushed;
+
+	/* try_to_wake_up() stats */
+	unsigned int ttwu_wake_remote;
+	unsigned int ttwu_move_affine;
+	unsigned int ttwu_move_balance;
+#endif
+#ifdef CONFIG_SCHED_DEBUG
+	char *name;
+#endif
+	union {
+		void *private;		/* used during construction */
+		struct rcu_head rcu;	/* used during destruction */
+	};
+	struct sched_domain_shared *shared;
+
+	unsigned int span_weight;
+	/*
+	 * Span of all CPUs in this domain.
+	 *
+	 * NOTE: this field is variable length. (Allocated dynamically
+	 * by attaching extra space to the end of the structure,
+	 * depending on how many CPUs the kernel has booted up with)
+	 */
+	unsigned long span[0];
+};
+
+static inline struct cpumask *sched_domain_span(struct sched_domain *sd)
+{
+	return to_cpumask(sd->span);
+}
+
+extern void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
+				    struct sched_domain_attr *dattr_new);
+
+/* Allocate an array of sched domains, for partition_sched_domains(). */
+cpumask_var_t *alloc_sched_domains(unsigned int ndoms);
+void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms);
+
+bool cpus_share_cache(int this_cpu, int that_cpu);
+
+typedef const struct cpumask *(*sched_domain_mask_f)(int cpu);
+typedef int (*sched_domain_flags_f)(void);
+
+#define SDTL_OVERLAP	0x01
+
+struct sd_data {
+	struct sched_domain **__percpu sd;
+	struct sched_domain_shared **__percpu sds;
+	struct sched_group **__percpu sg;
+	struct sched_group_capacity **__percpu sgc;
+};
+
+struct sched_domain_topology_level {
+	sched_domain_mask_f mask;
+	sched_domain_flags_f sd_flags;
+	int		    flags;
+	int		    numa_level;
+	struct sd_data      data;
+#ifdef CONFIG_SCHED_DEBUG
+	char                *name;
+#endif
+};
+
+extern void set_sched_topology(struct sched_domain_topology_level *tl);
+
+#ifdef CONFIG_SCHED_DEBUG
+# define SD_INIT_NAME(type)		.name = #type
+#else
+# define SD_INIT_NAME(type)
+#endif
+
+#else /* CONFIG_SMP */
+
+struct sched_domain_attr;
+
+static inline void
+partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
+			struct sched_domain_attr *dattr_new)
+{
+}
+
+static inline bool cpus_share_cache(int this_cpu, int that_cpu)
+{
+	return true;
+}
+
+#endif	/* !CONFIG_SMP */
+
 #endif /* _LINUX_SCHED_TOPOLOGY_H */

From b768917d2c08edc4b5616c86a569e524d190434b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 1 Feb 2017 16:51:00 +0100
Subject: [PATCH 0551/1911] sched/headers: Move the 'cpu_idle_type' enum from
 <linux/sched.h> to <linux/sched/idle.h>

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h      | 7 -------
 include/linux/sched/idle.h | 7 +++++++
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 55480782ce7f30..8a8252e9e0c797 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -943,13 +943,6 @@ static inline int sched_info_on(void)
 void force_schedstat_enabled(void);
 #endif
 
-enum cpu_idle_type {
-	CPU_IDLE,
-	CPU_NOT_IDLE,
-	CPU_NEWLY_IDLE,
-	CPU_MAX_IDLE_TYPES
-};
-
 /*
  * Integer metrics need fixed point arithmetic, e.g., sched/fair
  * has a few: load, load_avg, util_avg, freq, and capacity.
diff --git a/include/linux/sched/idle.h b/include/linux/sched/idle.h
index a3cf79b86986c1..6aabc9968cba70 100644
--- a/include/linux/sched/idle.h
+++ b/include/linux/sched/idle.h
@@ -3,4 +3,11 @@
 
 #include <linux/sched.h>
 
+enum cpu_idle_type {
+	CPU_IDLE,
+	CPU_NOT_IDLE,
+	CPU_NEWLY_IDLE,
+	CPU_MAX_IDLE_TYPES
+};
+
 #endif /* _LINUX_SCHED_IDLE_H */

From 4437722b0486c36dc90a1adf584fca4cea6cf1de Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 1 Feb 2017 16:58:52 +0100
Subject: [PATCH 0552/1911] sched/headers: Move the wake_up_if_idle() prototype
 to <linux/sched/idle.h>

No need to clutter <linux/sched.h> with this rarely used prototype.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h      | 2 --
 include/linux/sched/idle.h | 2 ++
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8a8252e9e0c797..c30705c3e55321 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1003,8 +1003,6 @@ extern void wake_q_add(struct wake_q_head *head,
 		       struct task_struct *task);
 extern void wake_up_q(struct wake_q_head *head);
 
-extern void wake_up_if_idle(int cpu);
-
 struct io_context;			/* See blkdev.h */
 
 
diff --git a/include/linux/sched/idle.h b/include/linux/sched/idle.h
index 6aabc9968cba70..72b6b6ab635473 100644
--- a/include/linux/sched/idle.h
+++ b/include/linux/sched/idle.h
@@ -10,4 +10,6 @@ enum cpu_idle_type {
 	CPU_MAX_IDLE_TYPES
 };
 
+extern void wake_up_if_idle(int cpu);
+
 #endif /* _LINUX_SCHED_IDLE_H */

From 5dbe91de599423d243738a205367506444ebb4a4 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 1 Feb 2017 18:47:28 +0100
Subject: [PATCH 0553/1911] sched/headers: Move idle polling methods to
 <linux/sched/idle.h>

Further reduce the size of <linux/sched.h> by moving these APIs:

	tsk_is_polling()
	__current_set_polling()
	current_set_polling_and_test()
	__current_clr_polling()
	current_clr_polling_and_test()
	current_clr_polling()

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h      | 76 --------------------------------------
 include/linux/sched/idle.h | 76 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 76 insertions(+), 76 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index c30705c3e55321..4f512e337584c2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -3176,82 +3176,6 @@ static inline int spin_needbreak(spinlock_t *lock)
 #endif
 }
 
-/*
- * Idle thread specific functions to determine the need_resched
- * polling state.
- */
-#ifdef TIF_POLLING_NRFLAG
-static inline int tsk_is_polling(struct task_struct *p)
-{
-	return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG);
-}
-
-static inline void __current_set_polling(void)
-{
-	set_thread_flag(TIF_POLLING_NRFLAG);
-}
-
-static inline bool __must_check current_set_polling_and_test(void)
-{
-	__current_set_polling();
-
-	/*
-	 * Polling state must be visible before we test NEED_RESCHED,
-	 * paired by resched_curr()
-	 */
-	smp_mb__after_atomic();
-
-	return unlikely(tif_need_resched());
-}
-
-static inline void __current_clr_polling(void)
-{
-	clear_thread_flag(TIF_POLLING_NRFLAG);
-}
-
-static inline bool __must_check current_clr_polling_and_test(void)
-{
-	__current_clr_polling();
-
-	/*
-	 * Polling state must be visible before we test NEED_RESCHED,
-	 * paired by resched_curr()
-	 */
-	smp_mb__after_atomic();
-
-	return unlikely(tif_need_resched());
-}
-
-#else
-static inline int tsk_is_polling(struct task_struct *p) { return 0; }
-static inline void __current_set_polling(void) { }
-static inline void __current_clr_polling(void) { }
-
-static inline bool __must_check current_set_polling_and_test(void)
-{
-	return unlikely(tif_need_resched());
-}
-static inline bool __must_check current_clr_polling_and_test(void)
-{
-	return unlikely(tif_need_resched());
-}
-#endif
-
-static inline void current_clr_polling(void)
-{
-	__current_clr_polling();
-
-	/*
-	 * Ensure we check TIF_NEED_RESCHED after we clear the polling bit.
-	 * Once the bit is cleared, we'll get IPIs with every new
-	 * TIF_NEED_RESCHED and the IPI handler, scheduler_ipi(), will also
-	 * fold.
-	 */
-	smp_mb(); /* paired with resched_curr() */
-
-	preempt_fold_need_resched();
-}
-
 static __always_inline bool need_resched(void)
 {
 	return unlikely(tif_need_resched());
diff --git a/include/linux/sched/idle.h b/include/linux/sched/idle.h
index 72b6b6ab635473..66ee32f87f0b31 100644
--- a/include/linux/sched/idle.h
+++ b/include/linux/sched/idle.h
@@ -12,4 +12,80 @@ enum cpu_idle_type {
 
 extern void wake_up_if_idle(int cpu);
 
+/*
+ * Idle thread specific functions to determine the need_resched
+ * polling state.
+ */
+#ifdef TIF_POLLING_NRFLAG
+static inline int tsk_is_polling(struct task_struct *p)
+{
+	return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG);
+}
+
+static inline void __current_set_polling(void)
+{
+	set_thread_flag(TIF_POLLING_NRFLAG);
+}
+
+static inline bool __must_check current_set_polling_and_test(void)
+{
+	__current_set_polling();
+
+	/*
+	 * Polling state must be visible before we test NEED_RESCHED,
+	 * paired by resched_curr()
+	 */
+	smp_mb__after_atomic();
+
+	return unlikely(tif_need_resched());
+}
+
+static inline void __current_clr_polling(void)
+{
+	clear_thread_flag(TIF_POLLING_NRFLAG);
+}
+
+static inline bool __must_check current_clr_polling_and_test(void)
+{
+	__current_clr_polling();
+
+	/*
+	 * Polling state must be visible before we test NEED_RESCHED,
+	 * paired by resched_curr()
+	 */
+	smp_mb__after_atomic();
+
+	return unlikely(tif_need_resched());
+}
+
+#else
+static inline int tsk_is_polling(struct task_struct *p) { return 0; }
+static inline void __current_set_polling(void) { }
+static inline void __current_clr_polling(void) { }
+
+static inline bool __must_check current_set_polling_and_test(void)
+{
+	return unlikely(tif_need_resched());
+}
+static inline bool __must_check current_clr_polling_and_test(void)
+{
+	return unlikely(tif_need_resched());
+}
+#endif
+
+static inline void current_clr_polling(void)
+{
+	__current_clr_polling();
+
+	/*
+	 * Ensure we check TIF_NEED_RESCHED after we clear the polling bit.
+	 * Once the bit is cleared, we'll get IPIs with every new
+	 * TIF_NEED_RESCHED and the IPI handler, scheduler_ipi(), will also
+	 * fold.
+	 */
+	smp_mb(); /* paired with resched_curr() */
+
+	preempt_fold_need_resched();
+}
+
 #endif /* _LINUX_SCHED_IDLE_H */

From eb61baf69871b9836783a81bc451189edb0d9de2 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 1 Feb 2017 17:09:06 +0100
Subject: [PATCH 0554/1911] sched/headers: Move the wake-queue types and
 interfaces from sched.h into <linux/sched/wake_q.h>

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h        | 54 +++---------------------------------
 include/linux/sched/wake_q.h | 47 +++++++++++++++++++++++++++++++
 ipc/msg.c                    |  1 -
 3 files changed, 51 insertions(+), 51 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4f512e337584c2..5cda7cf09505d4 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -953,56 +953,6 @@ void force_schedstat_enabled(void);
 # define SCHED_FIXEDPOINT_SHIFT	10
 # define SCHED_FIXEDPOINT_SCALE	(1L << SCHED_FIXEDPOINT_SHIFT)
 
-/*
- * Wake-queues are lists of tasks with a pending wakeup, whose
- * callers have already marked the task as woken internally,
- * and can thus carry on. A common use case is being able to
- * do the wakeups once the corresponding user lock as been
- * released.
- *
- * We hold reference to each task in the list across the wakeup,
- * thus guaranteeing that the memory is still valid by the time
- * the actual wakeups are performed in wake_up_q().
- *
- * One per task suffices, because there's never a need for a task to be
- * in two wake queues simultaneously; it is forbidden to abandon a task
- * in a wake queue (a call to wake_up_q() _must_ follow), so if a task is
- * already in a wake queue, the wakeup will happen soon and the second
- * waker can just skip it.
- *
- * The DEFINE_WAKE_Q macro declares and initializes the list head.
- * wake_up_q() does NOT reinitialize the list; it's expected to be
- * called near the end of a function. Otherwise, the list can be
- * re-initialized for later re-use by wake_q_init().
- *
- * Note that this can cause spurious wakeups. schedule() callers
- * must ensure the call is done inside a loop, confirming that the
- * wakeup condition has in fact occurred.
- */
-struct wake_q_node {
-	struct wake_q_node *next;
-};
-
-struct wake_q_head {
-	struct wake_q_node *first;
-	struct wake_q_node **lastp;
-};
-
-#define WAKE_Q_TAIL ((struct wake_q_node *) 0x01)
-
-#define DEFINE_WAKE_Q(name)				\
-	struct wake_q_head name = { WAKE_Q_TAIL, &name.first }
-
-static inline void wake_q_init(struct wake_q_head *head)
-{
-	head->first = WAKE_Q_TAIL;
-	head->lastp = &head->first;
-}
-
-extern void wake_q_add(struct wake_q_head *head,
-		       struct task_struct *task);
-extern void wake_up_q(struct wake_q_head *head);
-
 struct io_context;			/* See blkdev.h */
 
 
@@ -1234,6 +1184,10 @@ enum perf_event_task_context {
 	perf_nr_task_contexts,
 };
 
+struct wake_q_node {
+	struct wake_q_node *next;
+};
+
 /* Track pages that require TLB flushes */
 struct tlbflush_unmap_batch {
 	/*
diff --git a/include/linux/sched/wake_q.h b/include/linux/sched/wake_q.h
index 6383b35e4ebae6..d03d8a9047dcb5 100644
--- a/include/linux/sched/wake_q.h
+++ b/include/linux/sched/wake_q.h
@@ -1,6 +1,53 @@
 #ifndef _LINUX_SCHED_WAKE_Q_H
 #define _LINUX_SCHED_WAKE_Q_H
 
+/*
+ * Wake-queues are lists of tasks with a pending wakeup, whose
+ * callers have already marked the task as woken internally,
+ * and can thus carry on. A common use case is being able to
+ * do the wakeups once the corresponding user lock as been
+ * released.
+ *
+ * We hold reference to each task in the list across the wakeup,
+ * thus guaranteeing that the memory is still valid by the time
+ * the actual wakeups are performed in wake_up_q().
+ *
+ * One per task suffices, because there's never a need for a task to be
+ * in two wake queues simultaneously; it is forbidden to abandon a task
+ * in a wake queue (a call to wake_up_q() _must_ follow), so if a task is
+ * already in a wake queue, the wakeup will happen soon and the second
+ * waker can just skip it.
+ *
+ * The DEFINE_WAKE_Q macro declares and initializes the list head.
+ * wake_up_q() does NOT reinitialize the list; it's expected to be
+ * called near the end of a function. Otherwise, the list can be
+ * re-initialized for later re-use by wake_q_init().
+ *
+ * Note that this can cause spurious wakeups. schedule() callers
+ * must ensure the call is done inside a loop, confirming that the
+ * wakeup condition has in fact occurred.
+ */
+
 #include <linux/sched.h>
 
+struct wake_q_head {
+	struct wake_q_node *first;
+	struct wake_q_node **lastp;
+};
+
+#define WAKE_Q_TAIL ((struct wake_q_node *) 0x01)
+
+#define DEFINE_WAKE_Q(name)				\
+	struct wake_q_head name = { WAKE_Q_TAIL, &name.first }
+
+static inline void wake_q_init(struct wake_q_head *head)
+{
+	head->first = WAKE_Q_TAIL;
+	head->lastp = &head->first;
+}
+
+extern void wake_q_add(struct wake_q_head *head,
+		       struct task_struct *task);
+extern void wake_up_q(struct wake_q_head *head);
+
 #endif /* _LINUX_SCHED_WAKE_Q_H */
diff --git a/ipc/msg.c b/ipc/msg.c
index ecc387e573f6d2..104926dc72be4e 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -30,7 +30,6 @@
 #include <linux/proc_fs.h>
 #include <linux/list.h>
 #include <linux/security.h>
-#include <linux/sched.h>
 #include <linux/sched/wake_q.h>
 #include <linux/syscalls.h>
 #include <linux/audit.h>

From 5689810360c2e88ce1619e8bcfa859852f9a1d1a Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Tue, 7 Feb 2017 12:07:18 +0100
Subject: [PATCH 0555/1911] sched/headers: Move scheduler clock interfaces to
 <linux/sched/clock.h>

Move the sched_clock interfaces into a separate header file, to reduce
the size of sched.h.

Include <linux/sched/clock.h> in all files that made use of one of the
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h       | 100 ------------------------------------
 include/linux/sched/clock.h |  98 +++++++++++++++++++++++++++++++++++
 2 files changed, 98 insertions(+), 100 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5cda7cf09505d4..b42d2f9d673a22 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2190,103 +2190,6 @@ static inline void calc_load_exit_idle(void) { }
 #define cpu_relax_yield() cpu_relax()
 #endif
 
-/*
- * Do not use outside of architecture code which knows its limitations.
- *
- * sched_clock() has no promise of monotonicity or bounded drift between
- * CPUs, use (which you should not) requires disabling IRQs.
- *
- * Please use one of the three interfaces below.
- */
-extern unsigned long long notrace sched_clock(void);
-/*
- * See the comment in kernel/sched/clock.c
- */
-extern u64 running_clock(void);
-extern u64 sched_clock_cpu(int cpu);
-
-
-extern void sched_clock_init(void);
-
-#ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
-static inline void sched_clock_init_late(void)
-{
-}
-
-static inline void sched_clock_tick(void)
-{
-}
-
-static inline void clear_sched_clock_stable(void)
-{
-}
-
-static inline void sched_clock_idle_sleep_event(void)
-{
-}
-
-static inline void sched_clock_idle_wakeup_event(u64 delta_ns)
-{
-}
-
-static inline u64 cpu_clock(int cpu)
-{
-	return sched_clock();
-}
-
-static inline u64 local_clock(void)
-{
-	return sched_clock();
-}
-#else
-extern void sched_clock_init_late(void);
-/*
- * Architectures can set this to 1 if they have specified
- * CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig,
- * but then during bootup it turns out that sched_clock()
- * is reliable after all:
- */
-extern int sched_clock_stable(void);
-extern void clear_sched_clock_stable(void);
-
-extern void sched_clock_tick(void);
-extern void sched_clock_idle_sleep_event(void);
-extern void sched_clock_idle_wakeup_event(u64 delta_ns);
-
-/*
- * As outlined in clock.c, provides a fast, high resolution, nanosecond
- * time source that is monotonic per cpu argument and has bounded drift
- * between cpus.
- *
- * ######################### BIG FAT WARNING ##########################
- * # when comparing cpu_clock(i) to cpu_clock(j) for i != j, time can #
- * # go backwards !!                                                  #
- * ####################################################################
- */
-static inline u64 cpu_clock(int cpu)
-{
-	return sched_clock_cpu(cpu);
-}
-
-static inline u64 local_clock(void)
-{
-	return sched_clock_cpu(raw_smp_processor_id());
-}
-#endif
-
-#ifdef CONFIG_IRQ_TIME_ACCOUNTING
-/*
- * An i/f to runtime opt-in for irq time accounting based off of sched_clock.
- * The reason for this explicit opt-in is not to have perf penalty with
- * slow sched_clocks.
- */
-extern void enable_sched_clock_irqtime(void);
-extern void disable_sched_clock_irqtime(void);
-#else
-static inline void enable_sched_clock_irqtime(void) {}
-static inline void disable_sched_clock_irqtime(void) {}
-#endif
-
 extern unsigned long long
 task_sched_runtime(struct task_struct *task);
 
@@ -2297,9 +2200,6 @@ extern void sched_exec(void);
 #define sched_exec()   {}
 #endif
 
-extern void sched_clock_idle_sleep_event(void);
-extern void sched_clock_idle_wakeup_event(u64 delta_ns);
-
 #ifdef CONFIG_HOTPLUG_CPU
 extern void idle_task_exit(void);
 #else
diff --git a/include/linux/sched/clock.h b/include/linux/sched/clock.h
index 7cb7d79b2cf9af..ac12f71d359cd6 100644
--- a/include/linux/sched/clock.h
+++ b/include/linux/sched/clock.h
@@ -3,4 +3,102 @@
 
 #include <linux/sched.h>
 
+/*
+ * Do not use outside of architecture code which knows its limitations.
+ *
+ * sched_clock() has no promise of monotonicity or bounded drift between
+ * CPUs, use (which you should not) requires disabling IRQs.
+ *
+ * Please use one of the three interfaces below.
+ */
+extern unsigned long long notrace sched_clock(void);
+
+/*
+ * See the comment in kernel/sched/clock.c
+ */
+extern u64 running_clock(void);
+extern u64 sched_clock_cpu(int cpu);
+
+
+extern void sched_clock_init(void);
+
+#ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
+static inline void sched_clock_init_late(void)
+{
+}
+
+static inline void sched_clock_tick(void)
+{
+}
+
+static inline void clear_sched_clock_stable(void)
+{
+}
+
+static inline void sched_clock_idle_sleep_event(void)
+{
+}
+
+static inline void sched_clock_idle_wakeup_event(u64 delta_ns)
+{
+}
+
+static inline u64 cpu_clock(int cpu)
+{
+	return sched_clock();
+}
+
+static inline u64 local_clock(void)
+{
+	return sched_clock();
+}
+#else
+extern void sched_clock_init_late(void);
+/*
+ * Architectures can set this to 1 if they have specified
+ * CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig,
+ * but then during bootup it turns out that sched_clock()
+ * is reliable after all:
+ */
+extern int sched_clock_stable(void);
+extern void clear_sched_clock_stable(void);
+
+extern void sched_clock_tick(void);
+extern void sched_clock_idle_sleep_event(void);
+extern void sched_clock_idle_wakeup_event(u64 delta_ns);
+
+/*
+ * As outlined in clock.c, provides a fast, high resolution, nanosecond
+ * time source that is monotonic per cpu argument and has bounded drift
+ * between cpus.
+ *
+ * ######################### BIG FAT WARNING ##########################
+ * # when comparing cpu_clock(i) to cpu_clock(j) for i != j, time can #
+ * # go backwards !!                                                  #
+ * ####################################################################
+ */
+static inline u64 cpu_clock(int cpu)
+{
+	return sched_clock_cpu(cpu);
+}
+
+static inline u64 local_clock(void)
+{
+	return sched_clock_cpu(raw_smp_processor_id());
+}
+#endif
+
+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
+/*
+ * An i/f to runtime opt-in for irq time accounting based off of sched_clock.
+ * The reason for this explicit opt-in is not to have perf penalty with
+ * slow sched_clocks.
+ */
+extern void enable_sched_clock_irqtime(void);
+extern void disable_sched_clock_irqtime(void);
+#else
+static inline void enable_sched_clock_irqtime(void) {}
+static inline void disable_sched_clock_irqtime(void) {}
+#endif
+
 #endif /* _LINUX_SCHED_CLOCK_H */

From 47913d4ebd99c827c82c4f29eb282a119c3f2aeb Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 1 Feb 2017 18:00:26 +0100
Subject: [PATCH 0556/1911] sched/headers, delayacct: Move the 'struct
 task_delay_info' definition from <linux/sched.h> to <linux/delayacct.h>

The 'struct task_delay_info' definition does not have to be in sched.h,
because task_struct only has a pointer to it.

So move it to <linux/delayacct.h> to reduce the size of <linux/sched.h>.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/delayacct.h | 36 ++++++++++++++++++++++++++++++++++++
 include/linux/sched.h     | 39 ++++-----------------------------------
 2 files changed, 40 insertions(+), 35 deletions(-)

diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h
index 00e60f79a9cc7e..6b769cbd10001a 100644
--- a/include/linux/delayacct.h
+++ b/include/linux/delayacct.h
@@ -30,7 +30,43 @@
 #define DELAYACCT_PF_BLKIO	0x00000002	/* I am waiting on IO */
 
 #ifdef CONFIG_TASK_DELAY_ACCT
+struct task_delay_info {
+	spinlock_t	lock;
+	unsigned int	flags;	/* Private per-task flags */
+
+	/* For each stat XXX, add following, aligned appropriately
+	 *
+	 * struct timespec XXX_start, XXX_end;
+	 * u64 XXX_delay;
+	 * u32 XXX_count;
+	 *
+	 * Atomicity of updates to XXX_delay, XXX_count protected by
+	 * single lock above (split into XXX_lock if contention is an issue).
+	 */
+
+	/*
+	 * XXX_count is incremented on every XXX operation, the delay
+	 * associated with the operation is added to XXX_delay.
+	 * XXX_delay contains the accumulated delay time in nanoseconds.
+	 */
+	u64 blkio_start;	/* Shared by blkio, swapin */
+	u64 blkio_delay;	/* wait for sync block io completion */
+	u64 swapin_delay;	/* wait for swapin block io completion */
+	u32 blkio_count;	/* total count of the number of sync block */
+				/* io operations performed */
+	u32 swapin_count;	/* total count of the number of swapin block */
+				/* io operations performed */
+
+	u64 freepages_start;
+	u64 freepages_delay;	/* wait for memory reclaim */
+	u32 freepages_count;	/* total count of memory reclaim */
+};
+#endif
 
+#include <linux/sched.h>
+#include <linux/slab.h>
+
+#ifdef CONFIG_TASK_DELAY_ACCT
 extern int delayacct_on;	/* Delay accounting turned on/off */
 extern struct kmem_cache *delayacct_cache;
 extern void delayacct_init(void);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index b42d2f9d673a22..7d6998858fa381 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -893,39 +893,7 @@ struct sched_info {
 };
 #endif /* CONFIG_SCHED_INFO */
 
-#ifdef CONFIG_TASK_DELAY_ACCT
-struct task_delay_info {
-	spinlock_t	lock;
-	unsigned int	flags;	/* Private per-task flags */
-
-	/* For each stat XXX, add following, aligned appropriately
-	 *
-	 * struct timespec XXX_start, XXX_end;
-	 * u64 XXX_delay;
-	 * u32 XXX_count;
-	 *
-	 * Atomicity of updates to XXX_delay, XXX_count protected by
-	 * single lock above (split into XXX_lock if contention is an issue).
-	 */
-
-	/*
-	 * XXX_count is incremented on every XXX operation, the delay
-	 * associated with the operation is added to XXX_delay.
-	 * XXX_delay contains the accumulated delay time in nanoseconds.
-	 */
-	u64 blkio_start;	/* Shared by blkio, swapin */
-	u64 blkio_delay;	/* wait for sync block io completion */
-	u64 swapin_delay;	/* wait for swapin block io completion */
-	u32 blkio_count;	/* total count of the number of sync block */
-				/* io operations performed */
-	u32 swapin_count;	/* total count of the number of swapin block */
-				/* io operations performed */
-
-	u64 freepages_start;
-	u64 freepages_delay;	/* wait for memory reclaim */
-	u32 freepages_count;	/* total count of memory reclaim */
-};
-#endif	/* CONFIG_TASK_DELAY_ACCT */
+struct task_delay_info;
 
 static inline int sched_info_on(void)
 {
@@ -1612,9 +1580,10 @@ struct task_struct {
 
 	struct page_frag task_frag;
 
-#ifdef	CONFIG_TASK_DELAY_ACCT
-	struct task_delay_info *delays;
+#ifdef CONFIG_TASK_DELAY_ACCT
+	struct task_delay_info		*delays;
 #endif
+
 #ifdef CONFIG_FAULT_INJECTION
 	int make_it_fail;
 #endif

From e2d1e2aec572a2138dea74d53be54a1406d419c0 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 1 Feb 2017 18:07:51 +0100
Subject: [PATCH 0557/1911] sched/headers: Move various ABI definitions to
 <uapi/linux/sched/types.h>

Move scheduler ABI types (struct sched_attr, struct sched_param, etc.) into
the new UAPI header.

This further reduces the size and complexity of <linux/sched.h>.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h            | 70 +-------------------------------
 include/uapi/linux/sched/types.h | 68 +++++++++++++++++++++++++++++++
 2 files changed, 70 insertions(+), 68 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7d6998858fa381..5c481906e835c9 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -5,11 +5,6 @@
 
 #include <linux/sched/prio.h>
 
-
-struct sched_param {
-	int sched_priority;
-};
-
 #include <asm/param.h>	/* for HZ */
 
 #include <linux/capability.h>
@@ -64,69 +59,8 @@ struct sched_param {
 
 #include <asm/processor.h>
 
-#define SCHED_ATTR_SIZE_VER0	48	/* sizeof first published struct */
-
-/*
- * Extended scheduling parameters data structure.
- *
- * This is needed because the original struct sched_param can not be
- * altered without introducing ABI issues with legacy applications
- * (e.g., in sched_getparam()).
- *
- * However, the possibility of specifying more than just a priority for
- * the tasks may be useful for a wide variety of application fields, e.g.,
- * multimedia, streaming, automation and control, and many others.
- *
- * This variant (sched_attr) is meant at describing a so-called
- * sporadic time-constrained task. In such model a task is specified by:
- *  - the activation period or minimum instance inter-arrival time;
- *  - the maximum (or average, depending on the actual scheduling
- *    discipline) computation time of all instances, a.k.a. runtime;
- *  - the deadline (relative to the actual activation time) of each
- *    instance.
- * Very briefly, a periodic (sporadic) task asks for the execution of
- * some specific computation --which is typically called an instance--
- * (at most) every period. Moreover, each instance typically lasts no more
- * than the runtime and must be completed by time instant t equal to
- * the instance activation time + the deadline.
- *
- * This is reflected by the actual fields of the sched_attr structure:
- *
- *  @size		size of the structure, for fwd/bwd compat.
- *
- *  @sched_policy	task's scheduling policy
- *  @sched_flags	for customizing the scheduler behaviour
- *  @sched_nice		task's nice value      (SCHED_NORMAL/BATCH)
- *  @sched_priority	task's static priority (SCHED_FIFO/RR)
- *  @sched_deadline	representative of the task's deadline
- *  @sched_runtime	representative of the task's runtime
- *  @sched_period	representative of the task's period
- *
- * Given this task model, there are a multiplicity of scheduling algorithms
- * and policies, that can be used to ensure all the tasks will make their
- * timing constraints.
- *
- * As of now, the SCHED_DEADLINE policy (sched_dl scheduling class) is the
- * only user of this new interface. More information about the algorithm
- * available in the scheduling class file or in Documentation/.
- */
-struct sched_attr {
-	u32 size;
-
-	u32 sched_policy;
-	u64 sched_flags;
-
-	/* SCHED_NORMAL, SCHED_BATCH */
-	s32 sched_nice;
-
-	/* SCHED_FIFO, SCHED_RR */
-	u32 sched_priority;
-
-	/* SCHED_DEADLINE */
-	u64 sched_runtime;
-	u64 sched_deadline;
-	u64 sched_period;
-};
+struct sched_attr;
+struct sched_param;
 
 struct futex_pi_state;
 struct robust_list_head;
diff --git a/include/uapi/linux/sched/types.h b/include/uapi/linux/sched/types.h
index d162d315f4b5ad..307acbc82d800f 100644
--- a/include/uapi/linux/sched/types.h
+++ b/include/uapi/linux/sched/types.h
@@ -3,4 +3,72 @@
 
 #include <linux/types.h>
 
+struct sched_param {
+	int sched_priority;
+};
+
+#define SCHED_ATTR_SIZE_VER0	48	/* sizeof first published struct */
+
+/*
+ * Extended scheduling parameters data structure.
+ *
+ * This is needed because the original struct sched_param can not be
+ * altered without introducing ABI issues with legacy applications
+ * (e.g., in sched_getparam()).
+ *
+ * However, the possibility of specifying more than just a priority for
+ * the tasks may be useful for a wide variety of application fields, e.g.,
+ * multimedia, streaming, automation and control, and many others.
+ *
+ * This variant (sched_attr) is meant at describing a so-called
+ * sporadic time-constrained task. In such model a task is specified by:
+ *  - the activation period or minimum instance inter-arrival time;
+ *  - the maximum (or average, depending on the actual scheduling
+ *    discipline) computation time of all instances, a.k.a. runtime;
+ *  - the deadline (relative to the actual activation time) of each
+ *    instance.
+ * Very briefly, a periodic (sporadic) task asks for the execution of
+ * some specific computation --which is typically called an instance--
+ * (at most) every period. Moreover, each instance typically lasts no more
+ * than the runtime and must be completed by time instant t equal to
+ * the instance activation time + the deadline.
+ *
+ * This is reflected by the actual fields of the sched_attr structure:
+ *
+ *  @size		size of the structure, for fwd/bwd compat.
+ *
+ *  @sched_policy	task's scheduling policy
+ *  @sched_flags	for customizing the scheduler behaviour
+ *  @sched_nice		task's nice value      (SCHED_NORMAL/BATCH)
+ *  @sched_priority	task's static priority (SCHED_FIFO/RR)
+ *  @sched_deadline	representative of the task's deadline
+ *  @sched_runtime	representative of the task's runtime
+ *  @sched_period	representative of the task's period
+ *
+ * Given this task model, there are a multiplicity of scheduling algorithms
+ * and policies, that can be used to ensure all the tasks will make their
+ * timing constraints.
+ *
+ * As of now, the SCHED_DEADLINE policy (sched_dl scheduling class) is the
+ * only user of this new interface. More information about the algorithm
+ * available in the scheduling class file or in Documentation/.
+ */
+struct sched_attr {
+	u32 size;
+
+	u32 sched_policy;
+	u64 sched_flags;
+
+	/* SCHED_NORMAL, SCHED_BATCH */
+	s32 sched_nice;
+
+	/* SCHED_FIFO, SCHED_RR */
+	u32 sched_priority;
+
+	/* SCHED_DEADLINE */
+	u64 sched_runtime;
+	u64 sched_deadline;
+	u64 sched_period;
+};
+
 #endif /* _UAPI_LINUX_SCHED_TYPES_H */

From dea38c74cb9205341f52b8d8ae18f61247a43ea8 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 1 Feb 2017 18:25:37 +0100
Subject: [PATCH 0558/1911] sched/headers: Move loadavg related definitions
 from <linux/sched.h> to <linux/sched/loadavg.h>

Move these bits to <linux/sched/loadavg.h>, to reduce the size and
complexity of <linux/sched.h>.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h         | 27 ---------------------------
 include/linux/sched/loadavg.h | 27 +++++++++++++++++++++++++++
 2 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5c481906e835c9..78adf7a0cac17e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -71,31 +71,6 @@ struct blk_plug;
 struct filename;
 struct nameidata;
 
-/*
- * These are the constant used to fake the fixed-point load-average
- * counting. Some notes:
- *  - 11 bit fractions expand to 22 bits by the multiplies: this gives
- *    a load-average precision of 10 bits integer + 11 bits fractional
- *  - if you want to count load-averages more often, you need more
- *    precision, or rounding will get you. With 2-second counting freq,
- *    the EXP_n values would be 1981, 2034 and 2043 if still using only
- *    11 bit fractions.
- */
-extern unsigned long avenrun[];		/* Load averages */
-extern void get_avenrun(unsigned long *loads, unsigned long offset, int shift);
-
-#define FSHIFT		11		/* nr of bits of precision */
-#define FIXED_1		(1<<FSHIFT)	/* 1.0 as fixed-point */
-#define LOAD_FREQ	(5*HZ+1)	/* 5 sec intervals */
-#define EXP_1		1884		/* 1/exp(5sec/1min) as fixed-point */
-#define EXP_5		2014		/* 1/exp(5sec/5min) */
-#define EXP_15		2037		/* 1/exp(5sec/15min) */
-
-#define CALC_LOAD(load,exp,n) \
-	load *= exp; \
-	load += n*(FIXED_1-exp); \
-	load >>= FSHIFT;
-
 extern unsigned long total_forks;
 extern int nr_threads;
 DECLARE_PER_CPU(unsigned long, process_counts);
@@ -106,8 +81,6 @@ extern unsigned long nr_iowait(void);
 extern unsigned long nr_iowait_cpu(int cpu);
 extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load);
 
-extern void calc_global_load(unsigned long ticks);
-
 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
 extern void cpu_load_update_nohz_start(void);
 extern void cpu_load_update_nohz_stop(void);
diff --git a/include/linux/sched/loadavg.h b/include/linux/sched/loadavg.h
index 3ae74be327e8cf..c392e28ce0ac79 100644
--- a/include/linux/sched/loadavg.h
+++ b/include/linux/sched/loadavg.h
@@ -3,4 +3,31 @@
 
 #include <linux/sched.h>
 
+/*
+ * These are the constant used to fake the fixed-point load-average
+ * counting. Some notes:
+ *  - 11 bit fractions expand to 22 bits by the multiplies: this gives
+ *    a load-average precision of 10 bits integer + 11 bits fractional
+ *  - if you want to count load-averages more often, you need more
+ *    precision, or rounding will get you. With 2-second counting freq,
+ *    the EXP_n values would be 1981, 2034 and 2043 if still using only
+ *    11 bit fractions.
+ */
+extern unsigned long avenrun[];		/* Load averages */
+extern void get_avenrun(unsigned long *loads, unsigned long offset, int shift);
+
+#define FSHIFT		11		/* nr of bits of precision */
+#define FIXED_1		(1<<FSHIFT)	/* 1.0 as fixed-point */
+#define LOAD_FREQ	(5*HZ+1)	/* 5 sec intervals */
+#define EXP_1		1884		/* 1/exp(5sec/1min) as fixed-point */
+#define EXP_5		2014		/* 1/exp(5sec/5min) */
+#define EXP_15		2037		/* 1/exp(5sec/15min) */
+
+#define CALC_LOAD(load,exp,n) \
+	load *= exp; \
+	load += n*(FIXED_1-exp); \
+	load >>= FSHIFT;
+
+extern void calc_global_load(unsigned long ticks);
+
 #endif /* _LINUX_SCHED_LOADAVG_H */

From de8f1c77313d8c908b2897f268d466c13df161d4 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 1 Feb 2017 18:41:24 +0100
Subject: [PATCH 0559/1911] sched/headers: Move autogroup APIs into
 <linux/sched/autogroup.h>

Further reduce the size of sched.h.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h           | 18 ------------------
 include/linux/sched/autogroup.h | 21 +++++++++++++++++++++
 2 files changed, 21 insertions(+), 18 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 78adf7a0cac17e..04262072923008 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2092,24 +2092,6 @@ static inline void wake_up_nohz_cpu(int cpu) { }
 extern u64 scheduler_tick_max_deferment(void);
 #endif
 
-#ifdef CONFIG_SCHED_AUTOGROUP
-extern void sched_autogroup_create_attach(struct task_struct *p);
-extern void sched_autogroup_detach(struct task_struct *p);
-extern void sched_autogroup_fork(struct signal_struct *sig);
-extern void sched_autogroup_exit(struct signal_struct *sig);
-extern void sched_autogroup_exit_task(struct task_struct *p);
-#ifdef CONFIG_PROC_FS
-extern void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m);
-extern int proc_sched_autogroup_set_nice(struct task_struct *p, int nice);
-#endif
-#else
-static inline void sched_autogroup_create_attach(struct task_struct *p) { }
-static inline void sched_autogroup_detach(struct task_struct *p) { }
-static inline void sched_autogroup_fork(struct signal_struct *sig) { }
-static inline void sched_autogroup_exit(struct signal_struct *sig) { }
-static inline void sched_autogroup_exit_task(struct task_struct *p) { }
-#endif
-
 extern int yield_to(struct task_struct *p, bool preempt);
 extern void set_user_nice(struct task_struct *p, long nice);
 extern int task_prio(const struct task_struct *p);
diff --git a/include/linux/sched/autogroup.h b/include/linux/sched/autogroup.h
index d745f22e57dccf..586bdf37330dde 100644
--- a/include/linux/sched/autogroup.h
+++ b/include/linux/sched/autogroup.h
@@ -3,4 +3,25 @@
 
 #include <linux/sched.h>
 
+struct signal_struct;
+struct seq_file;
+
+#ifdef CONFIG_SCHED_AUTOGROUP
+extern void sched_autogroup_create_attach(struct task_struct *p);
+extern void sched_autogroup_detach(struct task_struct *p);
+extern void sched_autogroup_fork(struct signal_struct *sig);
+extern void sched_autogroup_exit(struct signal_struct *sig);
+extern void sched_autogroup_exit_task(struct task_struct *p);
+#ifdef CONFIG_PROC_FS
+extern void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m);
+extern int proc_sched_autogroup_set_nice(struct task_struct *p, int nice);
+#endif
+#else
+static inline void sched_autogroup_create_attach(struct task_struct *p) { }
+static inline void sched_autogroup_detach(struct task_struct *p) { }
+static inline void sched_autogroup_fork(struct signal_struct *sig) { }
+static inline void sched_autogroup_exit(struct signal_struct *sig) { }
+static inline void sched_autogroup_exit_task(struct task_struct *p) { }
+#endif
+
 #endif /* _LINUX_SCHED_AUTOGROUP_H */

From 4c77b18cf8b7ab37c7d5737b4609010d2ceec5f0 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 1 Mar 2017 11:24:35 +0100
Subject: [PATCH 0560/1911] sched/fair: Make select_idle_cpu() more aggressive

Kitsunyan reported desktop latency issues on his Celeron 887 because
of commit:

  1b568f0aabf2 ("sched/core: Optimize SCHED_SMT")

... even though his CPU doesn't do SMT.

The effect of running the SMT code on a !SMT part is basically a more
aggressive select_idle_cpu(). Removing the avg condition fixed things
for him.

I also know FB likes this test gone, even though other workloads like
having it.

For now, take it out by default, until we get a better idea.

Reported-by: kitsunyan <kitsunyan@inbox.ru>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Chris Mason <clm@fb.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Mike Galbraith <umgwanakikbuti@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/fair.c     | 2 +-
 kernel/sched/features.h | 5 +++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 274c747a01ce48..b3ee10dd3e85e4 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5797,7 +5797,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
 	 * Due to large variance we need a large fuzz factor; hackbench in
 	 * particularly is sensitive here.
 	 */
-	if ((avg_idle / 512) < avg_cost)
+	if (sched_feat(SIS_AVG_CPU) && (avg_idle / 512) < avg_cost)
 		return -1;
 
 	time = local_clock();
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index 69631fa46c2f84..1b3c8189b28656 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -51,6 +51,11 @@ SCHED_FEAT(NONTASK_CAPACITY, true)
  */
 SCHED_FEAT(TTWU_QUEUE, true)
 
+/*
+ * When doing wakeups, attempt to limit superfluous scans of the LLC domain.
+ */
+SCHED_FEAT(SIS_AVG_CPU, false)
+
 #ifdef HAVE_RT_PUSH_IPI
 /*
  * In order to avoid a thundering herd attack of CPUs that are

From 0ba87bb27d66b78e278167ac7e20c66520b8a612 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 1 Mar 2017 10:51:47 +0100
Subject: [PATCH 0561/1911] sched/core: Fix pick_next_task() for RT,DL

Pavan noticed that the following commit:

  49ee576809d8 ("sched/core: Optimize pick_next_task() for idle_sched_class")

... broke RT,DL balancing by robbing them of the opportinty to do new-'idle'
balancing when their last runnable task (on that runqueue) goes away.

Reported-by: Pavan Kondeti <pkondeti@codeaurora.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Fixes: 49ee576809d8 ("sched/core: Optimize pick_next_task() for idle_sched_class")
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/core.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index bbfb917a9b4998..6699d43a88430a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3273,10 +3273,15 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 	struct task_struct *p;
 
 	/*
-	 * Optimization: we know that if all tasks are in
-	 * the fair class we can call that function directly:
+	 * Optimization: we know that if all tasks are in the fair class we can
+	 * call that function directly, but only if the @prev task wasn't of a
+	 * higher scheduling class, because otherwise those loose the
+	 * opportunity to pull in more work from other CPUs.
 	 */
-	if (likely(rq->nr_running == rq->cfs.h_nr_running)) {
+	if (likely((prev->sched_class == &idle_sched_class ||
+		    prev->sched_class == &fair_sched_class) &&
+		   rq->nr_running == rq->cfs.h_nr_running)) {
+
 		p = fair_sched_class.pick_next_task(rq, prev, rf);
 		if (unlikely(p == RETRY_TASK))
 			goto again;

From f94c8d116997597fc00f0812b0ab9256e7b0c58f Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 1 Mar 2017 15:53:38 +0100
Subject: [PATCH 0562/1911] sched/clock, x86/tsc: Rework the x86 'unstable'
 sched_clock() interface

Wanpeng Li reported that since the following commit:

  acb04058de49 ("sched/clock: Fix hotplug crash")

... KVM always runs with unstable sched-clock even though KVM's
kvm_clock _is_ stable.

The problem is that we've tied clear_sched_clock_stable() to the TSC
state, and overlooked that sched_clock() is a paravirt function.

Solve this by doing two things:

 - tie the sched_clock() stable state more clearly to the TSC stable
   state for the normal (!paravirt) case.

 - only call clear_sched_clock_stable() when we mark TSC unstable
   when we use native_sched_clock().

The first means we can actually run with stable sched_clock in more
situations then before, which is good. And since commit:

  12907fbb1a69 ("sched/clock, clocksource: Add optional cs::mark_unstable() method")

... this should be reliable. Since any detection of TSC fail now results
in marking the TSC unstable.

Reported-by: Wanpeng Li <kernellwp@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Fixes: acb04058de49 ("sched/clock: Fix hotplug crash")
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/amd.c       |  4 ----
 arch/x86/kernel/cpu/centaur.c   |  2 --
 arch/x86/kernel/cpu/common.c    |  3 ---
 arch/x86/kernel/cpu/cyrix.c     |  1 -
 arch/x86/kernel/cpu/intel.c     |  4 ----
 arch/x86/kernel/cpu/transmeta.c |  2 --
 arch/x86/kernel/tsc.c           | 35 ++++++++++++++++++++++-----------
 7 files changed, 23 insertions(+), 28 deletions(-)

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 4e95b2e0d95fed..30d924ae5c3465 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -555,10 +555,6 @@ static void early_init_amd(struct cpuinfo_x86 *c)
 	if (c->x86_power & (1 << 8)) {
 		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
 		set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
-		if (check_tsc_unstable())
-			clear_sched_clock_stable();
-	} else {
-		clear_sched_clock_stable();
 	}
 
 	/* Bit 12 of 8000_0007 edx is accumulated power mechanism. */
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
index 2c234a6d94c448..bad8ff078a214f 100644
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -104,8 +104,6 @@ static void early_init_centaur(struct cpuinfo_x86 *c)
 #ifdef CONFIG_X86_64
 	set_cpu_cap(c, X86_FEATURE_SYSENTER32);
 #endif
-
-	clear_sched_clock_stable();
 }
 
 static void init_centaur(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index c64ca5929cb5e0..9d98e2e15d54d3 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -86,7 +86,6 @@ static void default_init(struct cpuinfo_x86 *c)
 			strcpy(c->x86_model_id, "386");
 	}
 #endif
-	clear_sched_clock_stable();
 }
 
 static const struct cpu_dev default_cpu = {
@@ -1075,8 +1074,6 @@ static void identify_cpu(struct cpuinfo_x86 *c)
 	 */
 	if (this_cpu->c_init)
 		this_cpu->c_init(c);
-	else
-		clear_sched_clock_stable();
 
 	/* Disable the PN if appropriate */
 	squash_the_stupid_serial_number(c);
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index 47416f959a48e3..31e679238e8d0b 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -184,7 +184,6 @@ static void early_init_cyrix(struct cpuinfo_x86 *c)
 		set_cpu_cap(c, X86_FEATURE_CYRIX_ARR);
 		break;
 	}
-	clear_sched_clock_stable();
 }
 
 static void init_cyrix(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 017ecd3bb5536e..2388bafe5c37e2 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -161,10 +161,6 @@ static void early_init_intel(struct cpuinfo_x86 *c)
 	if (c->x86_power & (1 << 8)) {
 		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
 		set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
-		if (check_tsc_unstable())
-			clear_sched_clock_stable();
-	} else {
-		clear_sched_clock_stable();
 	}
 
 	/* Penwell and Cloverview have the TSC which doesn't sleep on S3 */
diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c
index c1ea5b99983935..6a9ad73a2c5468 100644
--- a/arch/x86/kernel/cpu/transmeta.c
+++ b/arch/x86/kernel/cpu/transmeta.c
@@ -15,8 +15,6 @@ static void early_init_transmeta(struct cpuinfo_x86 *c)
 		if (xlvl >= 0x80860001)
 			c->x86_capability[CPUID_8086_0001_EDX] = cpuid_edx(0x80860001);
 	}
-
-	clear_sched_clock_stable();
 }
 
 static void init_transmeta(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 2724dc82f992ef..911129fda2f988 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -326,9 +326,16 @@ unsigned long long sched_clock(void)
 {
 	return paravirt_sched_clock();
 }
+
+static inline bool using_native_sched_clock(void)
+{
+	return pv_time_ops.sched_clock == native_sched_clock;
+}
 #else
 unsigned long long
 sched_clock(void) __attribute__((alias("native_sched_clock")));
+
+static inline bool using_native_sched_clock(void) { return true; }
 #endif
 
 int check_tsc_unstable(void)
@@ -1111,8 +1118,10 @@ static void tsc_cs_mark_unstable(struct clocksource *cs)
 {
 	if (tsc_unstable)
 		return;
+
 	tsc_unstable = 1;
-	clear_sched_clock_stable();
+	if (using_native_sched_clock())
+		clear_sched_clock_stable();
 	disable_sched_clock_irqtime();
 	pr_info("Marking TSC unstable due to clocksource watchdog\n");
 }
@@ -1134,18 +1143,20 @@ static struct clocksource clocksource_tsc = {
 
 void mark_tsc_unstable(char *reason)
 {
-	if (!tsc_unstable) {
-		tsc_unstable = 1;
+	if (tsc_unstable)
+		return;
+
+	tsc_unstable = 1;
+	if (using_native_sched_clock())
 		clear_sched_clock_stable();
-		disable_sched_clock_irqtime();
-		pr_info("Marking TSC unstable due to %s\n", reason);
-		/* Change only the rating, when not registered */
-		if (clocksource_tsc.mult)
-			clocksource_mark_unstable(&clocksource_tsc);
-		else {
-			clocksource_tsc.flags |= CLOCK_SOURCE_UNSTABLE;
-			clocksource_tsc.rating = 0;
-		}
+	disable_sched_clock_irqtime();
+	pr_info("Marking TSC unstable due to %s\n", reason);
+	/* Change only the rating, when not registered */
+	if (clocksource_tsc.mult) {
+		clocksource_mark_unstable(&clocksource_tsc);
+	} else {
+		clocksource_tsc.flags |= CLOCK_SOURCE_UNSTABLE;
+		clocksource_tsc.rating = 0;
 	}
 }
 

From 2b232e0c3b3a09f3e33750aa20e314f1b80e5361 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue, 28 Feb 2017 09:40:11 +0000
Subject: [PATCH 0563/1911] locking/ww_mutex: Replace cpu_relax() with
 cond_resched() for tests

When busy-spinning on a ww_mutex_trylock(), we depend upon the other
thread advancing and releasing the lock. This can not happen on a single
CPU unless we relinquish it:

  [ ] NMI watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [kworker/0:1:18]
  ...
  [ ] Call Trace:
  [ ]  mutex_trylock()
  [ ]  test_mutex_work+0x31/0x56
  [ ]  process_one_work+0x1b4/0x2f9
  [ ]  worker_thread+0x1b0/0x27c
  [ ]  kthread+0xd1/0xd3
  [ ]  ret_from_fork+0x19/0x30

Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: f2a5fec17395 ("locking/ww_mutex: Begin kselftests for ww_mutex")
Link: http://lkml.kernel.org/r/20170228094011.2595-1-chris@chris-wilson.co.uk
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/locking/test-ww_mutex.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/locking/test-ww_mutex.c b/kernel/locking/test-ww_mutex.c
index da6c9a34f62f5c..3eb39c588397db 100644
--- a/kernel/locking/test-ww_mutex.c
+++ b/kernel/locking/test-ww_mutex.c
@@ -50,7 +50,7 @@ static void test_mutex_work(struct work_struct *work)
 
 	if (mtx->flags & TEST_MTX_TRY) {
 		while (!ww_mutex_trylock(&mtx->mutex))
-			cpu_relax();
+			cond_resched();
 	} else {
 		ww_mutex_lock(&mtx->mutex, NULL);
 	}
@@ -88,7 +88,7 @@ static int __test_mutex(unsigned int flags)
 				ret = -EINVAL;
 				break;
 			}
-			cpu_relax();
+			cond_resched();
 		} while (time_before(jiffies, timeout));
 	} else {
 		ret = wait_for_completion_timeout(&mtx.done, TIMEOUT);

From 7fb4a2cea6b18dab56d609530d077f168169ed6b Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 1 Mar 2017 16:23:30 +0100
Subject: [PATCH 0564/1911] locking/lockdep: Add nest_lock integrity test
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Boqun reported that hlock->references can overflow. Add a debug test
for that to generate a clear error when this happens.

Without this, lockdep is likely to report a mysterious failure on
unlock.

Reported-by: Boqun Feng <boqun.feng@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Nicolai Hähnle <Nicolai.Haehnle@amd.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/locking/lockdep.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 9812e5dd409e98..c0ee8607c11e48 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -3260,10 +3260,17 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
 	if (depth) {
 		hlock = curr->held_locks + depth - 1;
 		if (hlock->class_idx == class_idx && nest_lock) {
-			if (hlock->references)
+			if (hlock->references) {
+				/*
+				 * Check: unsigned int references:12, overflow.
+				 */
+				if (DEBUG_LOCKS_WARN_ON(hlock->references == (1 << 12)-1))
+					return 0;
+
 				hlock->references++;
-			else
+			} else {
 				hlock->references = 2;
+			}
 
 			return 1;
 		}

From 857811a37129f5d2ba162d7be3986eff44724014 Mon Sep 17 00:00:00 2001
From: Boqun Feng <boqun.feng@gmail.com>
Date: Wed, 1 Mar 2017 23:01:38 +0800
Subject: [PATCH 0565/1911] locking/ww_mutex: Adjust the lock number for stress
 test
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Because there are only 12 bits in held_lock::references, so we only
support 4095 nested lock held in the same time, adjust the lock number
for ww_mutex stress test to kill one lockdep splat:

  [ ] [ BUG: bad unlock balance detected! ]
  [ ] kworker/u2:0/5 is trying to release lock (ww_class_mutex) at:
  [ ] ww_mutex_unlock()
  [ ] but there are no more locks to release!
  ...

Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Fengguang Wu <fengguang.wu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Nicolai Hähnle <Nicolai.Haehnle@amd.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20170301150138.hdixnmafzfsox7nn@tardis.cn.ibm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/locking/test-ww_mutex.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/locking/test-ww_mutex.c b/kernel/locking/test-ww_mutex.c
index 3eb39c588397db..6b7abb334ca602 100644
--- a/kernel/locking/test-ww_mutex.c
+++ b/kernel/locking/test-ww_mutex.c
@@ -627,7 +627,7 @@ static int __init test_ww_mutex_init(void)
 	if (ret)
 		return ret;
 
-	ret = stress(4096, hweight32(STRESS_ALL)*ncpus, 1<<12, STRESS_ALL);
+	ret = stress(4095, hweight32(STRESS_ALL)*ncpus, 1<<12, STRESS_ALL);
 	if (ret)
 		return ret;
 

From bb1a2c26165640ba2cbcfe06c81e9f9d6db4e643 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 1 Mar 2017 21:10:17 +0100
Subject: [PATCH 0566/1911] x86/hpet: Prevent might sleep splat on resume

Sergey reported a might sleep warning triggered from the hpet resume
path. It's caused by the call to disable_irq() from interrupt disabled
context.

The problem with the low level resume code is that it is not accounted as a
special system_state like we do during the boot process. Calling the same
code during system boot would not trigger the warning. That's inconsistent
at best.

In this particular case it's trivial to replace the disable_irq() with
disable_hardirq() because this particular code path is solely used from
system resume and the involved hpet interrupts can never be force threaded.


Reported-and-tested-by: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Borislav Petkov <bp@alien8.de>
Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1703012108460.3684@nanos
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/hpet.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index dc6ba5bda9fc83..89ff7af2de508b 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -354,7 +354,7 @@ static int hpet_resume(struct clock_event_device *evt, int timer)
 
 		irq_domain_deactivate_irq(irq_get_irq_data(hdev->irq));
 		irq_domain_activate_irq(irq_get_irq_data(hdev->irq));
-		disable_irq(hdev->irq);
+		disable_hardirq(hdev->irq);
 		irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu));
 		enable_irq(hdev->irq);
 	}

From 1657b8f84ed9fc1d2a100671f1d42d6286f20073 Mon Sep 17 00:00:00 2001
From: Waldemar Rymarkiewicz <ext.waldemar.rymarkiewicz@tieto.com>
Date: Fri, 24 Feb 2017 23:30:03 +0100
Subject: [PATCH 0567/1911] ath10k: search SMBIOS for OEM board file extension

Board Data File (BDF) is loaded upon driver boot-up procedure. The right
board data file is identified, among others, by device and sybsystem ids.

The problem, however, can occur when the (default) board data file cannot
fulfill with the vendor requirements and it is necessary to use a different
board data file.

To solve the issue QCA uses SMBIOS type 0xF8 to store Board Data File Name
Extension to specify the extension/variant name. The driver will take the
extension suffix into consideration and will load the right (non-default)
board data file if necessary.

If it is unnecessary to use extension board data file, please leave the
SMBIOS field blank and default configuration will be used.

Example:
If a default board data file for a specific board is identified by a string
      "bus=pci,vendor=168c,device=003e,subsystem-vendor=1028,
       subsystem-device=0310"
then the OEM specific data file, if used, could be identified by variant
suffix:
      "bus=pci,vendor=168c,device=003e,subsystem-vendor=1028,
       subsystem-device=0310,variant=DE_1AB"

If board data file name extension is set but board-2.bin does not contain
board data file for the variant, the driver will fallback to the default
board data file not to break backward compatibility.

This was first applied in commit f2593cb1b291 ("ath10k: Search SMBIOS for OEM
board file extension") but later reverted in commit 005c3490e9db ("Revert
"ath10k: Search SMBIOS for OEM board file extension"". This patch is now
otherwise the same as commit f2593cb1b291 except the regression fixed.

Signed-off-by: Waldemar Rymarkiewicz <ext.waldemar.rymarkiewicz@tieto.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/ath/ath10k/core.c | 101 ++++++++++++++++++++++++-
 drivers/net/wireless/ath/ath10k/core.h |  19 +++++
 2 files changed, 117 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c
index dd902b43f8f775..0a8e29e9a0ebc7 100644
--- a/drivers/net/wireless/ath/ath10k/core.c
+++ b/drivers/net/wireless/ath/ath10k/core.c
@@ -18,6 +18,8 @@
 #include <linux/module.h>
 #include <linux/firmware.h>
 #include <linux/of.h>
+#include <linux/dmi.h>
+#include <linux/ctype.h>
 #include <asm/byteorder.h>
 
 #include "core.h"
@@ -711,6 +713,72 @@ static int ath10k_core_get_board_id_from_otp(struct ath10k *ar)
 	return 0;
 }
 
+static void ath10k_core_check_bdfext(const struct dmi_header *hdr, void *data)
+{
+	struct ath10k *ar = data;
+	const char *bdf_ext;
+	const char *magic = ATH10K_SMBIOS_BDF_EXT_MAGIC;
+	u8 bdf_enabled;
+	int i;
+
+	if (hdr->type != ATH10K_SMBIOS_BDF_EXT_TYPE)
+		return;
+
+	if (hdr->length != ATH10K_SMBIOS_BDF_EXT_LENGTH) {
+		ath10k_dbg(ar, ATH10K_DBG_BOOT,
+			   "wrong smbios bdf ext type length (%d).\n",
+			   hdr->length);
+		return;
+	}
+
+	bdf_enabled = *((u8 *)hdr + ATH10K_SMBIOS_BDF_EXT_OFFSET);
+	if (!bdf_enabled) {
+		ath10k_dbg(ar, ATH10K_DBG_BOOT, "bdf variant name not found.\n");
+		return;
+	}
+
+	/* Only one string exists (per spec) */
+	bdf_ext = (char *)hdr + hdr->length;
+
+	if (memcmp(bdf_ext, magic, strlen(magic)) != 0) {
+		ath10k_dbg(ar, ATH10K_DBG_BOOT,
+			   "bdf variant magic does not match.\n");
+		return;
+	}
+
+	for (i = 0; i < strlen(bdf_ext); i++) {
+		if (!isascii(bdf_ext[i]) || !isprint(bdf_ext[i])) {
+			ath10k_dbg(ar, ATH10K_DBG_BOOT,
+				   "bdf variant name contains non ascii chars.\n");
+			return;
+		}
+	}
+
+	/* Copy extension name without magic suffix */
+	if (strscpy(ar->id.bdf_ext, bdf_ext + strlen(magic),
+		    sizeof(ar->id.bdf_ext)) < 0) {
+		ath10k_dbg(ar, ATH10K_DBG_BOOT,
+			   "bdf variant string is longer than the buffer can accommodate (variant: %s)\n",
+			    bdf_ext);
+		return;
+	}
+
+	ath10k_dbg(ar, ATH10K_DBG_BOOT,
+		   "found and validated bdf variant smbios_type 0x%x bdf %s\n",
+		   ATH10K_SMBIOS_BDF_EXT_TYPE, bdf_ext);
+}
+
+static int ath10k_core_check_smbios(struct ath10k *ar)
+{
+	ar->id.bdf_ext[0] = '\0';
+	dmi_walk(ath10k_core_check_bdfext, ar);
+
+	if (ar->id.bdf_ext[0] == '\0')
+		return -ENODATA;
+
+	return 0;
+}
+
 static int ath10k_download_and_run_otp(struct ath10k *ar)
 {
 	u32 result, address = ar->hw_params.patch_load_addr;
@@ -1020,6 +1088,23 @@ static int ath10k_core_fetch_board_data_api_n(struct ath10k *ar,
 		case ATH10K_BD_IE_BOARD:
 			ret = ath10k_core_parse_bd_ie_board(ar, data, ie_len,
 							    boardname);
+			if (ret == -ENOENT && ar->id.bdf_ext[0] != '\0') {
+				/* try default bdf if variant was not found */
+				char *s, *v = ",variant=";
+				char boardname2[100];
+
+				strlcpy(boardname2, boardname,
+					sizeof(boardname2));
+
+				s = strstr(boardname2, v);
+				if (s)
+					*s = '\0';  /* strip ",variant=%s" */
+
+				ret = ath10k_core_parse_bd_ie_board(ar, data,
+								    ie_len,
+								    boardname2);
+			}
+
 			if (ret == -ENOENT)
 				/* no match found, continue */
 				break;
@@ -1057,6 +1142,9 @@ static int ath10k_core_fetch_board_data_api_n(struct ath10k *ar,
 static int ath10k_core_create_board_name(struct ath10k *ar, char *name,
 					 size_t name_len)
 {
+	/* strlen(',variant=') + strlen(ar->id.bdf_ext) */
+	char variant[9 + ATH10K_SMBIOS_BDF_EXT_STR_LENGTH] = { 0 };
+
 	if (ar->id.bmi_ids_valid) {
 		scnprintf(name, name_len,
 			  "bus=%s,bmi-chip-id=%d,bmi-board-id=%d",
@@ -1066,12 +1154,15 @@ static int ath10k_core_create_board_name(struct ath10k *ar, char *name,
 		goto out;
 	}
 
+	if (ar->id.bdf_ext[0] != '\0')
+		scnprintf(variant, sizeof(variant), ",variant=%s",
+			  ar->id.bdf_ext);
+
 	scnprintf(name, name_len,
-		  "bus=%s,vendor=%04x,device=%04x,subsystem-vendor=%04x,subsystem-device=%04x",
+		  "bus=%s,vendor=%04x,device=%04x,subsystem-vendor=%04x,subsystem-device=%04x%s",
 		  ath10k_bus_str(ar->hif.bus),
 		  ar->id.vendor, ar->id.device,
-		  ar->id.subsystem_vendor, ar->id.subsystem_device);
-
+		  ar->id.subsystem_vendor, ar->id.subsystem_device, variant);
 out:
 	ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot using board name '%s'\n", name);
 
@@ -2128,6 +2219,10 @@ static int ath10k_core_probe_fw(struct ath10k *ar)
 		goto err_free_firmware_files;
 	}
 
+	ret = ath10k_core_check_smbios(ar);
+	if (ret)
+		ath10k_dbg(ar, ATH10K_DBG_BOOT, "bdf variant name not set.\n");
+
 	ret = ath10k_core_fetch_board_file(ar);
 	if (ret) {
 		ath10k_err(ar, "failed to fetch board file: %d\n", ret);
diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h
index 757242ef52ac14..88d14be7fcceb4 100644
--- a/drivers/net/wireless/ath/ath10k/core.h
+++ b/drivers/net/wireless/ath/ath10k/core.h
@@ -69,6 +69,23 @@
 #define ATH10K_NAPI_BUDGET      64
 #define ATH10K_NAPI_QUOTA_LIMIT 60
 
+/* SMBIOS type containing Board Data File Name Extension */
+#define ATH10K_SMBIOS_BDF_EXT_TYPE 0xF8
+
+/* SMBIOS type structure length (excluding strings-set) */
+#define ATH10K_SMBIOS_BDF_EXT_LENGTH 0x9
+
+/* Offset pointing to Board Data File Name Extension */
+#define ATH10K_SMBIOS_BDF_EXT_OFFSET 0x8
+
+/* Board Data File Name Extension string length.
+ * String format: BDF_<Customer ID>_<Extension>\0
+ */
+#define ATH10K_SMBIOS_BDF_EXT_STR_LENGTH 0x20
+
+/* The magic used by QCA spec */
+#define ATH10K_SMBIOS_BDF_EXT_MAGIC "BDF_"
+
 struct ath10k;
 
 enum ath10k_bus {
@@ -798,6 +815,8 @@ struct ath10k {
 		bool bmi_ids_valid;
 		u8 bmi_board_id;
 		u8 bmi_chip_id;
+
+		char bdf_ext[ATH10K_SMBIOS_BDF_EXT_STR_LENGTH];
 	} id;
 
 	int fw_api;

From c96d0a1c47abd5c4fa544dcedb5fac4d020ac58b Mon Sep 17 00:00:00 2001
From: Paulo Flabiano Smorigo <pfsmorigo@linux.vnet.ibm.com>
Date: Wed, 1 Mar 2017 10:58:20 -0300
Subject: [PATCH 0568/1911] crypto: vmx - Use skcipher for cbc fallback

Cc: stable@vger.kernel.org #4.10
Signed-off-by: Paulo Flabiano Smorigo <pfsmorigo@linux.vnet.ibm.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/vmx/aes_cbc.c | 47 ++++++++++++++++++------------------
 1 file changed, 24 insertions(+), 23 deletions(-)

diff --git a/drivers/crypto/vmx/aes_cbc.c b/drivers/crypto/vmx/aes_cbc.c
index 94ad5c0adbcbd3..72a26eb4e95466 100644
--- a/drivers/crypto/vmx/aes_cbc.c
+++ b/drivers/crypto/vmx/aes_cbc.c
@@ -27,11 +27,12 @@
 #include <asm/switch_to.h>
 #include <crypto/aes.h>
 #include <crypto/scatterwalk.h>
+#include <crypto/skcipher.h>
 
 #include "aesp8-ppc.h"
 
 struct p8_aes_cbc_ctx {
-	struct crypto_blkcipher *fallback;
+	struct crypto_skcipher *fallback;
 	struct aes_key enc_key;
 	struct aes_key dec_key;
 };
@@ -39,7 +40,7 @@ struct p8_aes_cbc_ctx {
 static int p8_aes_cbc_init(struct crypto_tfm *tfm)
 {
 	const char *alg;
-	struct crypto_blkcipher *fallback;
+	struct crypto_skcipher *fallback;
 	struct p8_aes_cbc_ctx *ctx = crypto_tfm_ctx(tfm);
 
 	if (!(alg = crypto_tfm_alg_name(tfm))) {
@@ -47,8 +48,9 @@ static int p8_aes_cbc_init(struct crypto_tfm *tfm)
 		return -ENOENT;
 	}
 
-	fallback =
-	    crypto_alloc_blkcipher(alg, 0, CRYPTO_ALG_NEED_FALLBACK);
+	fallback = crypto_alloc_skcipher(alg, 0,
+			CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK);
+
 	if (IS_ERR(fallback)) {
 		printk(KERN_ERR
 		       "Failed to allocate transformation for '%s': %ld\n",
@@ -56,11 +58,12 @@ static int p8_aes_cbc_init(struct crypto_tfm *tfm)
 		return PTR_ERR(fallback);
 	}
 	printk(KERN_INFO "Using '%s' as fallback implementation.\n",
-	       crypto_tfm_alg_driver_name((struct crypto_tfm *) fallback));
+		crypto_skcipher_driver_name(fallback));
+
 
-	crypto_blkcipher_set_flags(
+	crypto_skcipher_set_flags(
 		fallback,
-		crypto_blkcipher_get_flags((struct crypto_blkcipher *)tfm));
+		crypto_skcipher_get_flags((struct crypto_skcipher *)tfm));
 	ctx->fallback = fallback;
 
 	return 0;
@@ -71,7 +74,7 @@ static void p8_aes_cbc_exit(struct crypto_tfm *tfm)
 	struct p8_aes_cbc_ctx *ctx = crypto_tfm_ctx(tfm);
 
 	if (ctx->fallback) {
-		crypto_free_blkcipher(ctx->fallback);
+		crypto_free_skcipher(ctx->fallback);
 		ctx->fallback = NULL;
 	}
 }
@@ -91,7 +94,7 @@ static int p8_aes_cbc_setkey(struct crypto_tfm *tfm, const u8 *key,
 	pagefault_enable();
 	preempt_enable();
 
-	ret += crypto_blkcipher_setkey(ctx->fallback, key, keylen);
+	ret += crypto_skcipher_setkey(ctx->fallback, key, keylen);
 	return ret;
 }
 
@@ -103,15 +106,14 @@ static int p8_aes_cbc_encrypt(struct blkcipher_desc *desc,
 	struct blkcipher_walk walk;
 	struct p8_aes_cbc_ctx *ctx =
 		crypto_tfm_ctx(crypto_blkcipher_tfm(desc->tfm));
-	struct blkcipher_desc fallback_desc = {
-		.tfm = ctx->fallback,
-		.info = desc->info,
-		.flags = desc->flags
-	};
 
 	if (in_interrupt()) {
-		ret = crypto_blkcipher_encrypt(&fallback_desc, dst, src,
-					       nbytes);
+		SKCIPHER_REQUEST_ON_STACK(req, ctx->fallback);
+		skcipher_request_set_tfm(req, ctx->fallback);
+		skcipher_request_set_callback(req, desc->flags, NULL, NULL);
+		skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
+		ret = crypto_skcipher_encrypt(req);
+		skcipher_request_zero(req);
 	} else {
 		preempt_disable();
 		pagefault_disable();
@@ -144,15 +146,14 @@ static int p8_aes_cbc_decrypt(struct blkcipher_desc *desc,
 	struct blkcipher_walk walk;
 	struct p8_aes_cbc_ctx *ctx =
 		crypto_tfm_ctx(crypto_blkcipher_tfm(desc->tfm));
-	struct blkcipher_desc fallback_desc = {
-		.tfm = ctx->fallback,
-		.info = desc->info,
-		.flags = desc->flags
-	};
 
 	if (in_interrupt()) {
-		ret = crypto_blkcipher_decrypt(&fallback_desc, dst, src,
-					       nbytes);
+		SKCIPHER_REQUEST_ON_STACK(req, ctx->fallback);
+		skcipher_request_set_tfm(req, ctx->fallback);
+		skcipher_request_set_callback(req, desc->flags, NULL, NULL);
+		skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
+		ret = crypto_skcipher_decrypt(req);
+		skcipher_request_zero(req);
 	} else {
 		preempt_disable();
 		pagefault_disable();

From 5839f555fa576be57371686265206398d9ea1480 Mon Sep 17 00:00:00 2001
From: Paulo Flabiano Smorigo <pfsmorigo@linux.vnet.ibm.com>
Date: Wed, 1 Mar 2017 11:00:00 -0300
Subject: [PATCH 0569/1911] crypto: vmx - Use skcipher for xts fallback

Cc: stable@vger.kernel.org #4.10
Signed-off-by: Paulo Flabiano Smorigo <pfsmorigo@linux.vnet.ibm.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/vmx/aes_xts.c | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/drivers/crypto/vmx/aes_xts.c b/drivers/crypto/vmx/aes_xts.c
index 24353ec336c5bc..6adc9290557a4a 100644
--- a/drivers/crypto/vmx/aes_xts.c
+++ b/drivers/crypto/vmx/aes_xts.c
@@ -28,11 +28,12 @@
 #include <crypto/aes.h>
 #include <crypto/scatterwalk.h>
 #include <crypto/xts.h>
+#include <crypto/skcipher.h>
 
 #include "aesp8-ppc.h"
 
 struct p8_aes_xts_ctx {
-	struct crypto_blkcipher *fallback;
+	struct crypto_skcipher *fallback;
 	struct aes_key enc_key;
 	struct aes_key dec_key;
 	struct aes_key tweak_key;
@@ -41,7 +42,7 @@ struct p8_aes_xts_ctx {
 static int p8_aes_xts_init(struct crypto_tfm *tfm)
 {
 	const char *alg;
-	struct crypto_blkcipher *fallback;
+	struct crypto_skcipher *fallback;
 	struct p8_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm);
 
 	if (!(alg = crypto_tfm_alg_name(tfm))) {
@@ -49,8 +50,8 @@ static int p8_aes_xts_init(struct crypto_tfm *tfm)
 		return -ENOENT;
 	}
 
-	fallback =
-		crypto_alloc_blkcipher(alg, 0, CRYPTO_ALG_NEED_FALLBACK);
+	fallback = crypto_alloc_skcipher(alg, 0,
+			CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK);
 	if (IS_ERR(fallback)) {
 		printk(KERN_ERR
 			"Failed to allocate transformation for '%s': %ld\n",
@@ -58,11 +59,11 @@ static int p8_aes_xts_init(struct crypto_tfm *tfm)
 		return PTR_ERR(fallback);
 	}
 	printk(KERN_INFO "Using '%s' as fallback implementation.\n",
-		crypto_tfm_alg_driver_name((struct crypto_tfm *) fallback));
+		crypto_skcipher_driver_name(fallback));
 
-	crypto_blkcipher_set_flags(
+	crypto_skcipher_set_flags(
 		fallback,
-		crypto_blkcipher_get_flags((struct crypto_blkcipher *)tfm));
+		crypto_skcipher_get_flags((struct crypto_skcipher *)tfm));
 	ctx->fallback = fallback;
 
 	return 0;
@@ -73,7 +74,7 @@ static void p8_aes_xts_exit(struct crypto_tfm *tfm)
 	struct p8_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm);
 
 	if (ctx->fallback) {
-		crypto_free_blkcipher(ctx->fallback);
+		crypto_free_skcipher(ctx->fallback);
 		ctx->fallback = NULL;
 	}
 }
@@ -98,7 +99,7 @@ static int p8_aes_xts_setkey(struct crypto_tfm *tfm, const u8 *key,
 	pagefault_enable();
 	preempt_enable();
 
-	ret += crypto_blkcipher_setkey(ctx->fallback, key, keylen);
+	ret += crypto_skcipher_setkey(ctx->fallback, key, keylen);
 	return ret;
 }
 
@@ -113,15 +114,14 @@ static int p8_aes_xts_crypt(struct blkcipher_desc *desc,
 	struct blkcipher_walk walk;
 	struct p8_aes_xts_ctx *ctx =
 		crypto_tfm_ctx(crypto_blkcipher_tfm(desc->tfm));
-	struct blkcipher_desc fallback_desc = {
-		.tfm = ctx->fallback,
-		.info = desc->info,
-		.flags = desc->flags
-	};
 
 	if (in_interrupt()) {
-		ret = enc ? crypto_blkcipher_encrypt(&fallback_desc, dst, src, nbytes) :
-                            crypto_blkcipher_decrypt(&fallback_desc, dst, src, nbytes);
+		SKCIPHER_REQUEST_ON_STACK(req, ctx->fallback);
+		skcipher_request_set_tfm(req, ctx->fallback);
+		skcipher_request_set_callback(req, desc->flags, NULL, NULL);
+		skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
+		ret = enc? crypto_skcipher_encrypt(req) : crypto_skcipher_decrypt(req);
+		skcipher_request_zero(req);
 	} else {
 		preempt_disable();
 		pagefault_disable();

From 0695d7dc1d9f19b82ec2cae24856bddce278cfe6 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 24 Feb 2017 16:43:36 +0100
Subject: [PATCH 0570/1911] orangefs: Use RCU for destroy_inode

freeing of inodes must be RCU-delayed on all filesystems

Cc: stable@vger.kernel.org
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/orangefs/super.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c
index c48859f16e7b1f..67c24351a67f8d 100644
--- a/fs/orangefs/super.c
+++ b/fs/orangefs/super.c
@@ -115,6 +115,13 @@ static struct inode *orangefs_alloc_inode(struct super_block *sb)
 	return &orangefs_inode->vfs_inode;
 }
 
+static void orangefs_i_callback(struct rcu_head *head)
+{
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
+	kmem_cache_free(orangefs_inode_cache, orangefs_inode);
+}
+
 static void orangefs_destroy_inode(struct inode *inode)
 {
 	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
@@ -123,7 +130,7 @@ static void orangefs_destroy_inode(struct inode *inode)
 			"%s: deallocated %p destroying inode %pU\n",
 			__func__, orangefs_inode, get_khandle_from_ino(inode));
 
-	kmem_cache_free(orangefs_inode_cache, orangefs_inode);
+	call_rcu(&inode->i_rcu, orangefs_i_callback);
 }
 
 /*

From e53aff45c490ea04aa5d9e3cffa65b6b54397455 Mon Sep 17 00:00:00 2001
From: Shuah Khan <shuahkh@osg.samsung.com>
Date: Wed, 1 Mar 2017 15:15:07 -0700
Subject: [PATCH 0571/1911] selftests: lib.mk Fix individual test builds

In commit a8ba798bc8ec ("selftests: enable O and KBUILD_OUTPUT"), added
support to generate compile targets in a user specified directory. OUTPUT
variable controls the location which is undefined when tests are built in
the test directory or with "make -C tools/testing/selftests/x86".

make -C tools/testing/selftests/x86/
make: Entering directory '/lkml/linux_4.11/tools/testing/selftests/x86'
Makefile:44: warning: overriding recipe for target 'clean'
../lib.mk:51: warning: ignoring old recipe for target 'clean'
gcc -m64 -o /single_step_syscall_64 -O2 -g -std=gnu99 -pthread -Wall  single_step_syscall.c -lrt -ldl
/usr/bin/ld: cannot open output file /single_step_syscall_64: Permission denied
collect2: error: ld returned 1 exit status
Makefile:50: recipe for target '/single_step_syscall_64' failed
make: *** [/single_step_syscall_64] Error 1
make: Leaving directory '/lkml/linux_4.11/tools/testing/selftests/x86'

Same failure with "cd tools/testing/selftests/x86/;make" run.

Fix this with a change to lib.mk to define OUTPUT to be the pwd when
MAKELEVEL is 0. This covers both cases mentioned above.

Reported-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/lib.mk | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index ce96d80ad64f4d..775c589ac3c0a2 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -2,6 +2,10 @@
 # Makefile can operate with or without the kbuild infrastructure.
 CC := $(CROSS_COMPILE)gcc
 
+ifeq (0,$(MAKELEVEL))
+OUTPUT := $(shell pwd)
+endif
+
 TEST_GEN_PROGS := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS))
 TEST_GEN_FILES := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_FILES))
 

From 59f082e464ae0533813d5a1c3149b22563da93d7 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@fb.com>
Date: Wed, 1 Feb 2017 09:53:14 -0800
Subject: [PATCH 0572/1911] blk-mq: allocate blk_mq_tags and requests in
 correct node

blk_mq_tags/requests of specific hardware queue are mostly used in
specific cpus, which might not be in the same numa node as disk. For
example, a nvme card is in node 0. half hardware queue will be used by
node 0, the other node 1.

Signed-off-by: Shaohua Li <shli@fb.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 9e6b064e533979..d84c66fb37b78a 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1713,16 +1713,20 @@ struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set,
 					unsigned int reserved_tags)
 {
 	struct blk_mq_tags *tags;
+	int node;
+
+	node = blk_mq_hw_queue_to_node(set->mq_map, hctx_idx);
+	if (node == NUMA_NO_NODE)
+		node = set->numa_node;
 
-	tags = blk_mq_init_tags(nr_tags, reserved_tags,
-				set->numa_node,
+	tags = blk_mq_init_tags(nr_tags, reserved_tags, node,
 				BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags));
 	if (!tags)
 		return NULL;
 
 	tags->rqs = kzalloc_node(nr_tags * sizeof(struct request *),
 				 GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY,
-				 set->numa_node);
+				 node);
 	if (!tags->rqs) {
 		blk_mq_free_tags(tags);
 		return NULL;
@@ -1730,7 +1734,7 @@ struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set,
 
 	tags->static_rqs = kzalloc_node(nr_tags * sizeof(struct request *),
 				 GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY,
-				 set->numa_node);
+				 node);
 	if (!tags->static_rqs) {
 		kfree(tags->rqs);
 		blk_mq_free_tags(tags);
@@ -1750,6 +1754,11 @@ int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
 {
 	unsigned int i, j, entries_per_page, max_order = 4;
 	size_t rq_size, left;
+	int node;
+
+	node = blk_mq_hw_queue_to_node(set->mq_map, hctx_idx);
+	if (node == NUMA_NO_NODE)
+		node = set->numa_node;
 
 	INIT_LIST_HEAD(&tags->page_list);
 
@@ -1771,7 +1780,7 @@ int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
 			this_order--;
 
 		do {
-			page = alloc_pages_node(set->numa_node,
+			page = alloc_pages_node(node,
 				GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO,
 				this_order);
 			if (page)
@@ -1804,7 +1813,7 @@ int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
 			if (set->ops->init_request) {
 				if (set->ops->init_request(set->driver_data,
 						rq, hctx_idx, i,
-						set->numa_node)) {
+						node)) {
 					tags->static_rqs[i] = NULL;
 					goto fail;
 				}

From 27ddb689909cd0bab30524a5f720ae3a3e55acac Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@fb.com>
Date: Wed, 1 Feb 2017 09:53:15 -0800
Subject: [PATCH 0573/1911] PCI: add an API to get node from vector

Next patch will use the API to get the node from vector for nvme device

Signed-off-by: Shaohua Li <shli@fb.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/pci/msi.c   | 16 ++++++++++++++++
 include/linux/pci.h |  6 ++++++
 2 files changed, 22 insertions(+)

diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 980eaf58828180..d571bc33068651 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -1298,6 +1298,22 @@ const struct cpumask *pci_irq_get_affinity(struct pci_dev *dev, int nr)
 }
 EXPORT_SYMBOL(pci_irq_get_affinity);
 
+/**
+ * pci_irq_get_node - return the numa node of a particular msi vector
+ * @pdev:	PCI device to operate on
+ * @vec:	device-relative interrupt vector index (0-based).
+ */
+int pci_irq_get_node(struct pci_dev *pdev, int vec)
+{
+	const struct cpumask *mask;
+
+	mask = pci_irq_get_affinity(pdev, vec);
+	if (mask)
+		return local_memory_node(cpu_to_node(cpumask_first(mask)));
+	return dev_to_node(&pdev->dev);
+}
+EXPORT_SYMBOL(pci_irq_get_node);
+
 struct pci_dev *msi_desc_to_pci_dev(struct msi_desc *desc)
 {
 	return to_pci_dev(desc->dev);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 282ed32244ce79..eb3da1a04e6cdc 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1323,6 +1323,7 @@ int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs,
 void pci_free_irq_vectors(struct pci_dev *dev);
 int pci_irq_vector(struct pci_dev *dev, unsigned int nr);
 const struct cpumask *pci_irq_get_affinity(struct pci_dev *pdev, int vec);
+int pci_irq_get_node(struct pci_dev *pdev, int vec);
 
 #else
 static inline int pci_msi_vec_count(struct pci_dev *dev) { return -ENOSYS; }
@@ -1370,6 +1371,11 @@ static inline const struct cpumask *pci_irq_get_affinity(struct pci_dev *pdev,
 {
 	return cpu_possible_mask;
 }
+
+static inline int pci_irq_get_node(struct pci_dev *pdev, int vec)
+{
+	return first_online_node;
+}
 #endif
 
 static inline int

From d3af3ecdc62c46fa67ce7a681f173acb1d750e33 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@fb.com>
Date: Wed, 1 Feb 2017 09:53:16 -0800
Subject: [PATCH 0574/1911] nvme: allocate nvme_queue in correct node

nvme_queue is per-cpu queue (mostly). Allocating it in node where blk-mq
will use it.

Signed-off-by: Shaohua Li <shli@fb.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/nvme/host/pci.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 57a1af52b06e66..eee8f8426ff234 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1038,9 +1038,10 @@ static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq,
 }
 
 static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
-							int depth)
+							int depth, int node)
 {
-	struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq), GFP_KERNEL);
+	struct nvme_queue *nvmeq = kzalloc_node(sizeof(*nvmeq), GFP_KERNEL,
+							node);
 	if (!nvmeq)
 		return NULL;
 
@@ -1217,7 +1218,8 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
 
 	nvmeq = dev->queues[0];
 	if (!nvmeq) {
-		nvmeq = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH);
+		nvmeq = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH,
+					dev_to_node(dev->dev));
 		if (!nvmeq)
 			return -ENOMEM;
 	}
@@ -1309,7 +1311,9 @@ static int nvme_create_io_queues(struct nvme_dev *dev)
 	int ret = 0;
 
 	for (i = dev->queue_count; i <= dev->max_qid; i++) {
-		if (!nvme_alloc_queue(dev, i, dev->q_depth)) {
+		/* vector == qid - 1, match nvme_create_queue */
+		if (!nvme_alloc_queue(dev, i, dev->q_depth,
+		     pci_irq_get_node(to_pci_dev(dev->dev), i - 1))) {
 			ret = -ENOMEM;
 			break;
 		}

From 415b806de5576b656f3ff94366589af9a161d0c8 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Mon, 27 Feb 2017 10:04:39 -0700
Subject: [PATCH 0575/1911] blk-mq-sched: Allocate sched reserved tags as
 specified in the original queue tagset

Signed-off-by: Sagi Grimberg <sagi@grimberg.me>

Modified by me to also check at driver tag allocation time if the
original request was reserved, so we can be sure to allocate a
properly reserved tag at that point in time, too.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq-sched.c | 3 ++-
 block/blk-mq-tag.c   | 2 +-
 block/blk-mq-tag.h   | 6 ++++++
 block/blk-mq.c       | 3 +++
 4 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 98c7b061781e55..46ca965fff5cbf 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -454,7 +454,8 @@ int blk_mq_sched_setup(struct request_queue *q)
 	 */
 	ret = 0;
 	queue_for_each_hw_ctx(q, hctx, i) {
-		hctx->sched_tags = blk_mq_alloc_rq_map(set, i, q->nr_requests, 0);
+		hctx->sched_tags = blk_mq_alloc_rq_map(set, i,
+				q->nr_requests, set->reserved_tags);
 		if (!hctx->sched_tags) {
 			ret = -ENOMEM;
 			break;
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 54c84363c1b238..e48bc2c72615de 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -181,7 +181,7 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
 void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, struct blk_mq_tags *tags,
 		    struct blk_mq_ctx *ctx, unsigned int tag)
 {
-	if (tag >= tags->nr_reserved_tags) {
+	if (!blk_mq_tag_is_reserved(tags, tag)) {
 		const int real_tag = tag - tags->nr_reserved_tags;
 
 		BUG_ON(real_tag >= tags->nr_tags);
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h
index 63497423c5cd32..5cb51e53cc0353 100644
--- a/block/blk-mq-tag.h
+++ b/block/blk-mq-tag.h
@@ -85,4 +85,10 @@ static inline void blk_mq_tag_set_rq(struct blk_mq_hw_ctx *hctx,
 	hctx->tags->rqs[tag] = rq;
 }
 
+static inline bool blk_mq_tag_is_reserved(struct blk_mq_tags *tags,
+					  unsigned int tag)
+{
+	return tag < tags->nr_reserved_tags;
+}
+
 #endif
diff --git a/block/blk-mq.c b/block/blk-mq.c
index d84c66fb37b78a..4df5fb42c74fe5 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -852,6 +852,9 @@ bool blk_mq_get_driver_tag(struct request *rq, struct blk_mq_hw_ctx **hctx,
 		return true;
 	}
 
+	if (blk_mq_tag_is_reserved(data.hctx->sched_tags, rq->internal_tag))
+		data.flags |= BLK_MQ_REQ_RESERVED;
+
 	rq->tag = blk_mq_get_tag(&data);
 	if (rq->tag >= 0) {
 		if (blk_mq_tag_busy(data.hctx)) {

From 6d2809d51a5079f01a416d91dd63b0766cb685d0 Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Mon, 27 Feb 2017 10:28:27 -0800
Subject: [PATCH 0576/1911] blk-mq: make blk_mq_alloc_request_hctx() allocate a
 scheduler request

blk_mq_alloc_request_hctx() allocates a driver request directly, unlike
its blk_mq_alloc_request() counterpart. It also crashes because it
doesn't update the tags->rqs map.

Fix it by making it allocate a scheduler request.

Reported-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
Tested-by: Sagi Grimberg <sagi@grimberg.me>
---
 block/blk-mq-sched.c | 11 +++++------
 block/blk-mq.c       | 33 +++++++++++++++------------------
 2 files changed, 20 insertions(+), 24 deletions(-)

diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 46ca965fff5cbf..5697b23412a191 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -110,15 +110,14 @@ struct request *blk_mq_sched_get_request(struct request_queue *q,
 					 struct blk_mq_alloc_data *data)
 {
 	struct elevator_queue *e = q->elevator;
-	struct blk_mq_hw_ctx *hctx;
-	struct blk_mq_ctx *ctx;
 	struct request *rq;
 
 	blk_queue_enter_live(q);
-	ctx = blk_mq_get_ctx(q);
-	hctx = blk_mq_map_queue(q, ctx->cpu);
-
-	blk_mq_set_alloc_data(data, q, data->flags, ctx, hctx);
+	data->q = q;
+	if (likely(!data->ctx))
+		data->ctx = blk_mq_get_ctx(q);
+	if (likely(!data->hctx))
+		data->hctx = blk_mq_map_queue(q, data->ctx->cpu);
 
 	if (e) {
 		data->flags |= BLK_MQ_REQ_INTERNAL;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 4df5fb42c74fe5..85a7047d8b03d7 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -273,10 +273,9 @@ EXPORT_SYMBOL(blk_mq_alloc_request);
 struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw,
 		unsigned int flags, unsigned int hctx_idx)
 {
-	struct blk_mq_hw_ctx *hctx;
-	struct blk_mq_ctx *ctx;
+	struct blk_mq_alloc_data alloc_data = { .flags = flags };
 	struct request *rq;
-	struct blk_mq_alloc_data alloc_data;
+	unsigned int cpu;
 	int ret;
 
 	/*
@@ -299,25 +298,23 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw,
 	 * Check if the hardware context is actually mapped to anything.
 	 * If not tell the caller that it should skip this queue.
 	 */
-	hctx = q->queue_hw_ctx[hctx_idx];
-	if (!blk_mq_hw_queue_mapped(hctx)) {
-		ret = -EXDEV;
-		goto out_queue_exit;
+	alloc_data.hctx = q->queue_hw_ctx[hctx_idx];
+	if (!blk_mq_hw_queue_mapped(alloc_data.hctx)) {
+		blk_queue_exit(q);
+		return ERR_PTR(-EXDEV);
 	}
-	ctx = __blk_mq_get_ctx(q, cpumask_first(hctx->cpumask));
+	cpu = cpumask_first(alloc_data.hctx->cpumask);
+	alloc_data.ctx = __blk_mq_get_ctx(q, cpu);
 
-	blk_mq_set_alloc_data(&alloc_data, q, flags, ctx, hctx);
-	rq = __blk_mq_alloc_request(&alloc_data, rw);
-	if (!rq) {
-		ret = -EWOULDBLOCK;
-		goto out_queue_exit;
-	}
-
-	return rq;
+	rq = blk_mq_sched_get_request(q, NULL, rw, &alloc_data);
 
-out_queue_exit:
+	blk_mq_put_ctx(alloc_data.ctx);
 	blk_queue_exit(q);
-	return ERR_PTR(ret);
+
+	if (!rq)
+		return ERR_PTR(-EWOULDBLOCK);
+
+	return rq;
 }
 EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx);
 

From 59748398992c9c3e9d600e56cb2a5c0c546fe129 Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Mon, 27 Feb 2017 09:47:54 -0800
Subject: [PATCH 0577/1911] blk-mq: kill blk_mq_set_alloc_data()

Nothing is using it anymore.

Signed-off-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
Tested-by: Sagi Grimberg <sagi@grimberg.me>
---
 block/blk-mq.h | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/block/blk-mq.h b/block/blk-mq.h
index 24b2256186f33f..088ced003c13d7 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -146,16 +146,6 @@ struct blk_mq_alloc_data {
 	struct blk_mq_hw_ctx *hctx;
 };
 
-static inline void blk_mq_set_alloc_data(struct blk_mq_alloc_data *data,
-		struct request_queue *q, unsigned int flags,
-		struct blk_mq_ctx *ctx, struct blk_mq_hw_ctx *hctx)
-{
-	data->q = q;
-	data->flags = flags;
-	data->ctx = ctx;
-	data->hctx = hctx;
-}
-
 static inline struct blk_mq_tags *blk_mq_tags_from_data(struct blk_mq_alloc_data *data)
 {
 	if (data->flags & BLK_MQ_REQ_INTERNAL)

From 562bef4259776c19cb2423d43af1f99183910a4d Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Mon, 27 Feb 2017 09:47:55 -0800
Subject: [PATCH 0578/1911] blk-mq: move update of tags->rqs to
 __blk_mq_alloc_request()

No functional difference, it just makes a little more sense to update
the tag map where we actually allocate the tag.

Signed-off-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
Tested-by: Sagi Grimberg <sagi@grimberg.me>
---
 block/blk-mq-sched.c | 2 --
 block/blk-mq.c       | 1 +
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 5697b23412a191..09af8ff18719a4 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -134,8 +134,6 @@ struct request *blk_mq_sched_get_request(struct request_queue *q,
 			rq = __blk_mq_alloc_request(data, op);
 	} else {
 		rq = __blk_mq_alloc_request(data, op);
-		if (rq)
-			data->hctx->tags->rqs[rq->tag] = rq;
 	}
 
 	if (rq) {
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 85a7047d8b03d7..94593c6282d848 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -234,6 +234,7 @@ struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data,
 			}
 			rq->tag = tag;
 			rq->internal_tag = -1;
+			data->hctx->tags->rqs[rq->tag] = rq;
 		}
 
 		blk_mq_rq_ctx_init(data->q, data->ctx, rq, op);

From 6a8a21546507a3ec88e81c2ec927a3fb63efa8ff Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fb.com>
Date: Wed, 1 Mar 2017 11:47:22 -0500
Subject: [PATCH 0579/1911] nbd: stop leaking sockets

This was introduced in the multi-connection patch, we've been leaking
socket's ever since.

Fixes: 9561a7a ("nbd: add multi-connection support")
cc: stable@vger.kernel.org
Signed-off-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/nbd.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 0bf2b21a62cb77..c7e93f62366fb1 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -689,8 +689,10 @@ static int nbd_clear_sock(struct nbd_device *nbd, struct block_device *bdev)
 	    nbd->num_connections) {
 		int i;
 
-		for (i = 0; i < nbd->num_connections; i++)
+		for (i = 0; i < nbd->num_connections; i++) {
+			sockfd_put(nbd->socks[i]->sock);
 			kfree(nbd->socks[i]);
+		}
 		kfree(nbd->socks);
 		nbd->socks = NULL;
 		nbd->num_connections = 0;

From 6bae363ee3057a14eec93440826813603559273a Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Wed, 1 Mar 2017 14:22:10 -0500
Subject: [PATCH 0580/1911] blk-mq: Export blk_mq_freeze_queue_wait

Drivers can start a freeze, so this provides a way to wait for frozen.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c         | 3 ++-
 include/linux/blk-mq.h | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 94593c6282d848..8da2c04bb88f47 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -75,10 +75,11 @@ void blk_mq_freeze_queue_start(struct request_queue *q)
 }
 EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_start);
 
-static void blk_mq_freeze_queue_wait(struct request_queue *q)
+void blk_mq_freeze_queue_wait(struct request_queue *q)
 {
 	wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->q_usage_counter));
 }
+EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_wait);
 
 /*
  * Guarantee no request is in use, so we can change any data structure of
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 001d30d727c56c..8dacf680c8516d 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -245,6 +245,7 @@ void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
 void blk_mq_freeze_queue(struct request_queue *q);
 void blk_mq_unfreeze_queue(struct request_queue *q);
 void blk_mq_freeze_queue_start(struct request_queue *q);
+void blk_mq_freeze_queue_wait(struct request_queue *q);
 int blk_mq_reinit_tagset(struct blk_mq_tag_set *set);
 
 int blk_mq_map_queues(struct blk_mq_tag_set *set);

From f91328c40a559362b6e7b7bfee01ca17fda87592 Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Wed, 1 Mar 2017 14:22:11 -0500
Subject: [PATCH 0581/1911] blk-mq: Provide freeze queue timeout

A driver may wish to take corrective action if queued requests do not
complete within a set time.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c         | 9 +++++++++
 include/linux/blk-mq.h | 2 ++
 2 files changed, 11 insertions(+)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 8da2c04bb88f47..a5e66a7a3506f9 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -81,6 +81,15 @@ void blk_mq_freeze_queue_wait(struct request_queue *q)
 }
 EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_wait);
 
+int blk_mq_freeze_queue_wait_timeout(struct request_queue *q,
+				     unsigned long timeout)
+{
+	return wait_event_timeout(q->mq_freeze_wq,
+					percpu_ref_is_zero(&q->q_usage_counter),
+					timeout);
+}
+EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_wait_timeout);
+
 /*
  * Guarantee no request is in use, so we can change any data structure of
  * the queue afterward.
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 8dacf680c8516d..b296a900611790 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -246,6 +246,8 @@ void blk_mq_freeze_queue(struct request_queue *q);
 void blk_mq_unfreeze_queue(struct request_queue *q);
 void blk_mq_freeze_queue_start(struct request_queue *q);
 void blk_mq_freeze_queue_wait(struct request_queue *q);
+int blk_mq_freeze_queue_wait_timeout(struct request_queue *q,
+				     unsigned long timeout);
 int blk_mq_reinit_tagset(struct blk_mq_tag_set *set);
 
 int blk_mq_map_queues(struct blk_mq_tag_set *set);

From 302ad8cc09339ea261eef58a8d5f4a116a8ffda5 Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Wed, 1 Mar 2017 14:22:12 -0500
Subject: [PATCH 0582/1911] nvme: Complete all stuck requests

If the nvme driver is shutting down its controller, the drievr will not
start the queues up again, preventing blk-mq's hot CPU notifier from
making forward progress.

To fix that, this patch starts a request_queue freeze when the driver
resets a controller so no new requests may enter. The driver will wait
for frozen after IO queues are restarted to ensure the queue reference
can be reinitialized when nvme requests to unfreeze the queues.

If the driver is doing a safe shutdown, the driver will wait for the
controller to successfully complete all inflight requests so that we
don't unnecessarily fail them. Once the controller has been disabled,
the queues will be restarted to force remaining entered requests to end
in failure so that blk-mq's hot cpu notifier may progress.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/nvme/host/core.c | 47 ++++++++++++++++++++++++++++++++++++++++
 drivers/nvme/host/nvme.h |  4 ++++
 drivers/nvme/host/pci.c  | 33 +++++++++++++++++++++++-----
 3 files changed, 79 insertions(+), 5 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 25ec4e58522058..9b3b57fef446dc 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2344,6 +2344,53 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
 }
 EXPORT_SYMBOL_GPL(nvme_kill_queues);
 
+void nvme_unfreeze(struct nvme_ctrl *ctrl)
+{
+	struct nvme_ns *ns;
+
+	mutex_lock(&ctrl->namespaces_mutex);
+	list_for_each_entry(ns, &ctrl->namespaces, list)
+		blk_mq_unfreeze_queue(ns->queue);
+	mutex_unlock(&ctrl->namespaces_mutex);
+}
+EXPORT_SYMBOL_GPL(nvme_unfreeze);
+
+void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout)
+{
+	struct nvme_ns *ns;
+
+	mutex_lock(&ctrl->namespaces_mutex);
+	list_for_each_entry(ns, &ctrl->namespaces, list) {
+		timeout = blk_mq_freeze_queue_wait_timeout(ns->queue, timeout);
+		if (timeout <= 0)
+			break;
+	}
+	mutex_unlock(&ctrl->namespaces_mutex);
+}
+EXPORT_SYMBOL_GPL(nvme_wait_freeze_timeout);
+
+void nvme_wait_freeze(struct nvme_ctrl *ctrl)
+{
+	struct nvme_ns *ns;
+
+	mutex_lock(&ctrl->namespaces_mutex);
+	list_for_each_entry(ns, &ctrl->namespaces, list)
+		blk_mq_freeze_queue_wait(ns->queue);
+	mutex_unlock(&ctrl->namespaces_mutex);
+}
+EXPORT_SYMBOL_GPL(nvme_wait_freeze);
+
+void nvme_start_freeze(struct nvme_ctrl *ctrl)
+{
+	struct nvme_ns *ns;
+
+	mutex_lock(&ctrl->namespaces_mutex);
+	list_for_each_entry(ns, &ctrl->namespaces, list)
+		blk_mq_freeze_queue_start(ns->queue);
+	mutex_unlock(&ctrl->namespaces_mutex);
+}
+EXPORT_SYMBOL_GPL(nvme_start_freeze);
+
 void nvme_stop_queues(struct nvme_ctrl *ctrl)
 {
 	struct nvme_ns *ns;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index a3da1e90b99dbf..2aa20e3e5675bf 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -294,6 +294,10 @@ void nvme_queue_async_events(struct nvme_ctrl *ctrl);
 void nvme_stop_queues(struct nvme_ctrl *ctrl);
 void nvme_start_queues(struct nvme_ctrl *ctrl);
 void nvme_kill_queues(struct nvme_ctrl *ctrl);
+void nvme_unfreeze(struct nvme_ctrl *ctrl);
+void nvme_wait_freeze(struct nvme_ctrl *ctrl);
+void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout);
+void nvme_start_freeze(struct nvme_ctrl *ctrl);
 
 #define NVME_QID_ANY -1
 struct request *nvme_alloc_request(struct request_queue *q,
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index eee8f8426ff234..26a5fd05fe88aa 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1675,21 +1675,34 @@ static void nvme_pci_disable(struct nvme_dev *dev)
 static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
 {
 	int i, queues;
-	u32 csts = -1;
+	bool dead = true;
+	struct pci_dev *pdev = to_pci_dev(dev->dev);
 
 	del_timer_sync(&dev->watchdog_timer);
 
 	mutex_lock(&dev->shutdown_lock);
-	if (pci_is_enabled(to_pci_dev(dev->dev))) {
-		nvme_stop_queues(&dev->ctrl);
-		csts = readl(dev->bar + NVME_REG_CSTS);
+	if (pci_is_enabled(pdev)) {
+		u32 csts = readl(dev->bar + NVME_REG_CSTS);
+
+		if (dev->ctrl.state == NVME_CTRL_LIVE)
+			nvme_start_freeze(&dev->ctrl);
+		dead = !!((csts & NVME_CSTS_CFS) || !(csts & NVME_CSTS_RDY) ||
+			pdev->error_state  != pci_channel_io_normal);
 	}
 
+	/*
+	 * Give the controller a chance to complete all entered requests if
+	 * doing a safe shutdown.
+	 */
+	if (!dead && shutdown)
+		nvme_wait_freeze_timeout(&dev->ctrl, NVME_IO_TIMEOUT);
+	nvme_stop_queues(&dev->ctrl);
+
 	queues = dev->online_queues - 1;
 	for (i = dev->queue_count - 1; i > 0; i--)
 		nvme_suspend_queue(dev->queues[i]);
 
-	if (csts & NVME_CSTS_CFS || !(csts & NVME_CSTS_RDY)) {
+	if (dead) {
 		/* A device might become IO incapable very soon during
 		 * probe, before the admin queue is configured. Thus,
 		 * queue_count can be 0 here.
@@ -1704,6 +1717,14 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
 
 	blk_mq_tagset_busy_iter(&dev->tagset, nvme_cancel_request, &dev->ctrl);
 	blk_mq_tagset_busy_iter(&dev->admin_tagset, nvme_cancel_request, &dev->ctrl);
+
+	/*
+	 * The driver will not be starting up queues again if shutting down so
+	 * must flush all entered requests to their failed completion to avoid
+	 * deadlocking blk-mq hot-cpu notifier.
+	 */
+	if (shutdown)
+		nvme_start_queues(&dev->ctrl);
 	mutex_unlock(&dev->shutdown_lock);
 }
 
@@ -1826,7 +1847,9 @@ static void nvme_reset_work(struct work_struct *work)
 		nvme_remove_namespaces(&dev->ctrl);
 	} else {
 		nvme_start_queues(&dev->ctrl);
+		nvme_wait_freeze(&dev->ctrl);
 		nvme_dev_add(dev);
+		nvme_unfreeze(&dev->ctrl);
 	}
 
 	if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_LIVE)) {

From e02898b423802b1f3a3aaa7f16e896da069ba8f7 Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Wed, 1 Mar 2017 10:42:38 -0800
Subject: [PATCH 0583/1911] loop: fix LO_FLAGS_PARTSCAN hang

loop_reread_partitions() needs to do I/O, but we just froze the queue,
so we end up waiting forever. This can easily be reproduced with losetup
-P. Fix it by moving the reread to after we unfreeze the queue.

Fixes: ecdd09597a57 ("block/loop: fix race between I/O and set_status")
Reported-by: Tejun Heo <tj@kernel.org>
Cc: stable@vger.kernel.org
Signed-off-by: Omar Sandoval <osandov@fb.com>
Reviewed-by: Ming Lei <tom.leiming@gmail.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/loop.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 4b52a16903298c..132c9f371dcec4 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1142,13 +1142,6 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
 	     (info->lo_flags & LO_FLAGS_AUTOCLEAR))
 		lo->lo_flags ^= LO_FLAGS_AUTOCLEAR;
 
-	if ((info->lo_flags & LO_FLAGS_PARTSCAN) &&
-	     !(lo->lo_flags & LO_FLAGS_PARTSCAN)) {
-		lo->lo_flags |= LO_FLAGS_PARTSCAN;
-		lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN;
-		loop_reread_partitions(lo, lo->lo_device);
-	}
-
 	lo->lo_encrypt_key_size = info->lo_encrypt_key_size;
 	lo->lo_init[0] = info->lo_init[0];
 	lo->lo_init[1] = info->lo_init[1];
@@ -1163,6 +1156,14 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
 
  exit:
 	blk_mq_unfreeze_queue(lo->lo_queue);
+
+	if (!err && (info->lo_flags & LO_FLAGS_PARTSCAN) &&
+	     !(lo->lo_flags & LO_FLAGS_PARTSCAN)) {
+		lo->lo_flags |= LO_FLAGS_PARTSCAN;
+		lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN;
+		loop_reread_partitions(lo, lo->lo_device);
+	}
+
 	return err;
 }
 

From a5a79d00017c9eee68a9bcb40d5dfd6f45f17461 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 2 Mar 2017 16:50:13 +0100
Subject: [PATCH 0584/1911] block: Initialize bd_bdi on inode initialization

So far we initialized bd_bdi only in bdget(). That is fine for normal
bdev inodes however for the special case of the root inode of
blockdev_superblock that function is never called and thus bd_bdi is
left uninitialized. As a result bdev_evict_inode() may oops doing
bdi_put(root->bd_bdi) on that inode as can be seen when doing:

mount -t bdev none /mnt

Fix the problem by initializing bd_bdi when first allocating the inode
and then reinitializing bd_bdi in bdev_evict_inode().

Thanks to syzkaller team for finding the problem.

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Fixes: b1d2dc5659b4 ("block: Make blk_get_backing_dev_info() safe without open bdev")
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 fs/block_dev.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 77c30f15a02c31..2eca00ec43706b 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -870,6 +870,7 @@ static void init_once(void *foo)
 #ifdef CONFIG_SYSFS
 	INIT_LIST_HEAD(&bdev->bd_holder_disks);
 #endif
+	bdev->bd_bdi = &noop_backing_dev_info;
 	inode_init_once(&ei->vfs_inode);
 	/* Initialize mutex for freeze. */
 	mutex_init(&bdev->bd_fsfreeze_mutex);
@@ -884,8 +885,10 @@ static void bdev_evict_inode(struct inode *inode)
 	spin_lock(&bdev_lock);
 	list_del_init(&bdev->bd_list);
 	spin_unlock(&bdev_lock);
-	if (bdev->bd_bdi != &noop_backing_dev_info)
+	if (bdev->bd_bdi != &noop_backing_dev_info) {
 		bdi_put(bdev->bd_bdi);
+		bdev->bd_bdi = &noop_backing_dev_info;
+	}
 }
 
 static const struct super_operations bdev_sops = {
@@ -988,7 +991,6 @@ struct block_device *bdget(dev_t dev)
 		bdev->bd_contains = NULL;
 		bdev->bd_super = NULL;
 		bdev->bd_inode = inode;
-		bdev->bd_bdi = &noop_backing_dev_info;
 		bdev->bd_block_size = i_blocksize(inode);
 		bdev->bd_part_count = 0;
 		bdev->bd_invalidated = 0;

From 7afbeb6df2aa5f9e3a0fc228817a85c16dea0faa Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 24 Feb 2017 12:56:12 +0100
Subject: [PATCH 0585/1911] s390/ipl: always use load normal for CCW-type
 re-IPL

commit 14890678687c ("s390/ipl: use load normal for LPAR re-ipl")
missed to convert one code path to use load normal semantics for
re-IPL. Convert the missing code path as well.

Fixes: 14890678687c ("s390/ipl: use load normal for LPAR re-ipl")
Reported-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Acked-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/ipl.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index b67dafb7b7cfc5..e545ffe5155ab0 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -564,6 +564,8 @@ static struct kset *ipl_kset;
 
 static void __ipl_run(void *unused)
 {
+	if (MACHINE_IS_LPAR && ipl_info.type == IPL_TYPE_CCW)
+		diag308(DIAG308_LOAD_NORMAL_DUMP, NULL);
 	diag308(DIAG308_LOAD_CLEAR, NULL);
 	if (MACHINE_IS_VM)
 		__cpcmd("IPL", NULL, 0, NULL);

From 2e4d88009f57057df7672fa69a32b5224af54d37 Mon Sep 17 00:00:00 2001
From: Janosch Frank <frankja@linux.vnet.ibm.com>
Date: Thu, 2 Mar 2017 15:23:42 +0100
Subject: [PATCH 0586/1911] KVM: s390: Fix guest migration for huge guests
 resulting in panic

While we can technically not run huge page guests right now, we can
setup a guest with huge pages. Trying to migrate it will trigger a
VM_BUG_ON and, if the kernel is not configured to panic on a BUG, it
will happily try to work on non-existing page table entries.

With this patch, we always return "dirty" if we encounter a large page
when migrating. This at least fixes the immediate problem until we
have proper handling for both kind of pages.

Fixes: 15f36eb ("KVM: s390: Add proper dirty bitmap support to S390 kvm.")
Cc: <stable@vger.kernel.org> # 3.16+

Signed-off-by: Janosch Frank <frankja@linux.vnet.ibm.com>
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/mm/pgtable.c | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index b48dc5f1900b51..463e5ef02304bb 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -608,12 +608,29 @@ void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr)
 {
 	spinlock_t *ptl;
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
 	pgste_t pgste;
 	pte_t *ptep;
 	pte_t pte;
 	bool dirty;
 
-	ptep = get_locked_pte(mm, addr, &ptl);
+	pgd = pgd_offset(mm, addr);
+	pud = pud_alloc(mm, pgd, addr);
+	if (!pud)
+		return false;
+	pmd = pmd_alloc(mm, pud, addr);
+	if (!pmd)
+		return false;
+	/* We can't run guests backed by huge pages, but userspace can
+	 * still set them up and then try to migrate them without any
+	 * migration support.
+	 */
+	if (pmd_large(*pmd))
+		return true;
+
+	ptep = pte_alloc_map_lock(mm, pmd, addr, &ptl);
 	if (unlikely(!ptep))
 		return false;
 

From 3131d970f0d86bea41811766d55fc638e382daf3 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Mon, 27 Feb 2017 10:13:38 +0100
Subject: [PATCH 0587/1911] ARM: ux500: resume the second core properly

The pen hold/release scheme was copied over to Ux500 from the ARM
reference designs like most of these at the time. It is not needed
at all, and was mostly removed in commit c00def71efd9
"ARM: ux500: simplify secondary CPU boot".

However on the suspend/resume path and hot plug/unplug of CPUs,
the .cpu_die() callback was still waiting for the pen to be
released which made it spin forever and the second core never come
back online after suspend/resume.

Fix this by simply replacing the strange custom .cpu_die() with
a oneline wfi() just like e.g. the qcom platform does. This fixes
the issue and makes the second core come up properly after
suspend/resume.

As a side effect, this rids us of the completely surplus local
setup.h and hotplug.c files, and we just compile this into platsmp.c
with everything else SMP.

Cc: stable@vger.kernel.org
Fixes: c00def71efd9 ("ARM: ux500: simplify secondary CPU boot")
Cc: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/arm/mach-ux500/Makefile     |  3 ---
 arch/arm/mach-ux500/cpu-db8500.c |  2 --
 arch/arm/mach-ux500/hotplug.c    | 37 --------------------------------
 arch/arm/mach-ux500/platsmp.c    |  9 ++++++--
 arch/arm/mach-ux500/setup.h      | 16 --------------
 5 files changed, 7 insertions(+), 60 deletions(-)
 delete mode 100644 arch/arm/mach-ux500/hotplug.c
 delete mode 100644 arch/arm/mach-ux500/setup.h

diff --git a/arch/arm/mach-ux500/Makefile b/arch/arm/mach-ux500/Makefile
index c2499bff498670..a9a3453548f49c 100644
--- a/arch/arm/mach-ux500/Makefile
+++ b/arch/arm/mach-ux500/Makefile
@@ -5,7 +5,4 @@
 obj-y				:= pm.o
 obj-$(CONFIG_UX500_SOC_DB8500)	+= cpu-db8500.o
 obj-$(CONFIG_SMP)		+= platsmp.o
-obj-$(CONFIG_HOTPLUG_CPU)	+= hotplug.o
 obj-$(CONFIG_PM_GENERIC_DOMAINS) += pm_domains.o
-
-CFLAGS_hotplug.o		+= -march=armv7-a
diff --git a/arch/arm/mach-ux500/cpu-db8500.c b/arch/arm/mach-ux500/cpu-db8500.c
index 24529cf58df60f..28083ef7281958 100644
--- a/arch/arm/mach-ux500/cpu-db8500.c
+++ b/arch/arm/mach-ux500/cpu-db8500.c
@@ -31,8 +31,6 @@
 #include <asm/mach/map.h>
 #include <asm/mach/arch.h>
 
-#include "setup.h"
-
 #include "db8500-regs.h"
 
 static int __init ux500_l2x0_unlock(void)
diff --git a/arch/arm/mach-ux500/hotplug.c b/arch/arm/mach-ux500/hotplug.c
deleted file mode 100644
index 1cbed0331fd326..00000000000000
--- a/arch/arm/mach-ux500/hotplug.c
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (C) STMicroelectronics 2009
- * Copyright (C) ST-Ericsson SA 2010
- *
- * License Terms: GNU General Public License v2
- *	Based on ARM realview platform
- *
- * Author: Sundar Iyer <sundar.iyer@stericsson.com>
- *
- */
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/smp.h>
-
-#include <asm/smp_plat.h>
-
-#include "setup.h"
-
-/*
- * platform-specific code to shutdown a CPU
- *
- * Called with IRQs disabled
- */
-void ux500_cpu_die(unsigned int cpu)
-{
-	/* directly enter low power state, skipping secure registers */
-	for (;;) {
-		__asm__ __volatile__("dsb\n\t" "wfi\n\t"
-				: : : "memory");
-		if (pen_release == cpu_logical_map(cpu)) {
-			/*
-			 * OK, proper wakeup, we're done
-			 */
-			break;
-		}
-	}
-}
diff --git a/arch/arm/mach-ux500/platsmp.c b/arch/arm/mach-ux500/platsmp.c
index e0ee139fdebfef..19c165aeecd287 100644
--- a/arch/arm/mach-ux500/platsmp.c
+++ b/arch/arm/mach-ux500/platsmp.c
@@ -23,8 +23,6 @@
 #include <asm/smp_plat.h>
 #include <asm/smp_scu.h>
 
-#include "setup.h"
-
 #include "db8500-regs.h"
 
 /* Magic triggers in backup RAM */
@@ -90,6 +88,13 @@ static int ux500_boot_secondary(unsigned int cpu, struct task_struct *idle)
 	return 0;
 }
 
+#ifdef CONFIG_HOTPLUG_CPU
+void ux500_cpu_die(unsigned int cpu)
+{
+	wfi();
+}
+#endif
+
 static const struct smp_operations ux500_smp_ops __initconst = {
 	.smp_prepare_cpus	= ux500_smp_prepare_cpus,
 	.smp_boot_secondary	= ux500_boot_secondary,
diff --git a/arch/arm/mach-ux500/setup.h b/arch/arm/mach-ux500/setup.h
deleted file mode 100644
index 988e7c77068dc5..00000000000000
--- a/arch/arm/mach-ux500/setup.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/*
- * Copyright (C) 2009 ST-Ericsson.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * These symbols are needed for board-specific files to call their
- * own cpu-specific files
- */
-#ifndef __ASM_ARCH_SETUP_H
-#define __ASM_ARCH_SETUP_H
-
-extern void ux500_cpu_die(unsigned int cpu);
-
-#endif /*  __ASM_ARCH_SETUP_H */

From 474c90156c8dcc2fa815e6716cc9394d7930cb9c Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 2 Mar 2017 12:17:22 -0800
Subject: [PATCH 0588/1911] give up on gcc ilog2() constant optimizations

gcc-7 has an "optimization" pass that completely screws up, and
generates the code expansion for the (impossible) case of calling
ilog2() with a zero constant, even when the code gcc compiles does not
actually have a zero constant.

And we try to generate a compile-time error for anybody doing ilog2() on
a constant where that doesn't make sense (be it zero or negative).  So
now gcc7 will fail the build due to our sanity checking, because it
created that constant-zero case that didn't actually exist in the source
code.

There's a whole long discussion on the kernel mailing about how to work
around this gcc bug.  The gcc people themselevs have discussed their
"feature" in

   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=72785

but it's all water under the bridge, because while it looked at one
point like it would be solved by the time gcc7 was released, that was
not to be.

So now we have to deal with this compiler braindamage.

And the only simple approach seems to be to just delete the code that
tries to warn about bad uses of ilog2().

So now "ilog2()" will just return 0 not just for the value 1, but for
any non-positive value too.

It's not like I can recall anybody having ever actually tried to use
this function on any invalid value, but maybe the sanity check just
meant that such code never made it out in public.

Reported-by: Laura Abbott <labbott@redhat.com>
Cc: John Stultz <john.stultz@linaro.org>,
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/log2.h       | 13 ++-----------
 tools/include/linux/log2.h | 13 ++-----------
 2 files changed, 4 insertions(+), 22 deletions(-)

diff --git a/include/linux/log2.h b/include/linux/log2.h
index ef3d4f67118ce0..c373295f359fa5 100644
--- a/include/linux/log2.h
+++ b/include/linux/log2.h
@@ -15,12 +15,6 @@
 #include <linux/types.h>
 #include <linux/bitops.h>
 
-/*
- * deal with unrepresentable constant logarithms
- */
-extern __attribute__((const, noreturn))
-int ____ilog2_NaN(void);
-
 /*
  * non-constant log of base 2 calculators
  * - the arch may override these in asm/bitops.h if they can be implemented
@@ -85,7 +79,7 @@ unsigned long __rounddown_pow_of_two(unsigned long n)
 #define ilog2(n)				\
 (						\
 	__builtin_constant_p(n) ? (		\
-		(n) < 1 ? ____ilog2_NaN() :	\
+		(n) < 2 ? 0 :			\
 		(n) & (1ULL << 63) ? 63 :	\
 		(n) & (1ULL << 62) ? 62 :	\
 		(n) & (1ULL << 61) ? 61 :	\
@@ -148,10 +142,7 @@ unsigned long __rounddown_pow_of_two(unsigned long n)
 		(n) & (1ULL <<  4) ?  4 :	\
 		(n) & (1ULL <<  3) ?  3 :	\
 		(n) & (1ULL <<  2) ?  2 :	\
-		(n) & (1ULL <<  1) ?  1 :	\
-		(n) & (1ULL <<  0) ?  0 :	\
-		____ilog2_NaN()			\
-				   ) :		\
+		1 ) :				\
 	(sizeof(n) <= 4) ?			\
 	__ilog2_u32(n) :			\
 	__ilog2_u64(n)				\
diff --git a/tools/include/linux/log2.h b/tools/include/linux/log2.h
index 41446668ccce18..d5677d39c1e4c8 100644
--- a/tools/include/linux/log2.h
+++ b/tools/include/linux/log2.h
@@ -12,12 +12,6 @@
 #ifndef _TOOLS_LINUX_LOG2_H
 #define _TOOLS_LINUX_LOG2_H
 
-/*
- * deal with unrepresentable constant logarithms
- */
-extern __attribute__((const, noreturn))
-int ____ilog2_NaN(void);
-
 /*
  * non-constant log of base 2 calculators
  * - the arch may override these in asm/bitops.h if they can be implemented
@@ -78,7 +72,7 @@ unsigned long __rounddown_pow_of_two(unsigned long n)
 #define ilog2(n)				\
 (						\
 	__builtin_constant_p(n) ? (		\
-		(n) < 1 ? ____ilog2_NaN() :	\
+		(n) < 2 ? 0 :			\
 		(n) & (1ULL << 63) ? 63 :	\
 		(n) & (1ULL << 62) ? 62 :	\
 		(n) & (1ULL << 61) ? 61 :	\
@@ -141,10 +135,7 @@ unsigned long __rounddown_pow_of_two(unsigned long n)
 		(n) & (1ULL <<  4) ?  4 :	\
 		(n) & (1ULL <<  3) ?  3 :	\
 		(n) & (1ULL <<  2) ?  2 :	\
-		(n) & (1ULL <<  1) ?  1 :	\
-		(n) & (1ULL <<  0) ?  0 :	\
-		____ilog2_NaN()			\
-				   ) :		\
+		1 ) :				\
 	(sizeof(n) <= 4) ?			\
 	__ilog2_u32(n) :			\
 	__ilog2_u64(n)				\

From e3330039ea28dc199e3b2da993895ff742a91adf Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Mon, 27 Feb 2017 16:07:43 -0800
Subject: [PATCH 0589/1911] ipv6: check for ip6_null_entry in
 __ip6_del_rt_siblings()

Andrey reported a NULL pointer deref bug in ipv6_route_ioctl()
-> ip6_route_del() -> __ip6_del_rt_siblings() code path. This is
because ip6_null_entry is returned in this path since ip6_null_entry
is kinda default for a ipv6 route table root node. Quote from
David Ahern:

 ip6_null_entry is the root of all ipv6 fib tables making it integrated
 into the table ...

We should ignore any attempt of trying to delete it, like we do in
__ip6_del_rt() path and several others.

Reported-by: Andrey Konovalov <andreyknvl@google.com>
Fixes: 0ae8133586ad ("net: ipv6: Allow shorthand delete of all nexthops in multipath route")
Cc: David Ahern <dsa@cumulusnetworks.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Acked-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index d94f1dfa54c842..43ca90d50ae9b3 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2169,10 +2169,13 @@ int ip6_del_rt(struct rt6_info *rt)
 static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
 {
 	struct nl_info *info = &cfg->fc_nlinfo;
+	struct net *net = info->nl_net;
 	struct sk_buff *skb = NULL;
 	struct fib6_table *table;
-	int err;
+	int err = -ENOENT;
 
+	if (rt == net->ipv6.ip6_null_entry)
+		goto out_put;
 	table = rt->rt6i_table;
 	write_lock_bh(&table->tb6_lock);
 
@@ -2184,7 +2187,7 @@ static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
 		if (skb) {
 			u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
 
-			if (rt6_fill_node(info->nl_net, skb, rt,
+			if (rt6_fill_node(net, skb, rt,
 					  NULL, NULL, 0, RTM_DELROUTE,
 					  info->portid, seq, 0) < 0) {
 				kfree_skb(skb);
@@ -2198,17 +2201,18 @@ static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
 					 rt6i_siblings) {
 			err = fib6_del(sibling, info);
 			if (err)
-				goto out;
+				goto out_unlock;
 		}
 	}
 
 	err = fib6_del(rt, info);
-out:
+out_unlock:
 	write_unlock_bh(&table->tb6_lock);
+out_put:
 	ip6_rt_put(rt);
 
 	if (skb) {
-		rtnl_notify(skb, info->nl_net, info->portid, RTNLGRP_IPV6_ROUTE,
+		rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
 			    info->nlh, gfp_any());
 	}
 	return err;

From 9aea7779b764a11e357d3c74af6aee3cf90f2045 Mon Sep 17 00:00:00 2001
From: Alban Bedel <alban.bedel@avionic-design.de>
Date: Tue, 28 Feb 2017 18:08:55 +0100
Subject: [PATCH 0590/1911] drivers: net: xgene: Fix crash on DT systems

On DT systems the driver require a clock, but the probe just print a
warning and continue, leading to a crash when resetting the device.
To fix this crash and properly handle probe deferals only ignore the
missing clock if DT isn't used or if the clock doesn't exist.

Signed-off-by: Alban Bedel <alban.bedel@avionic-design.de>
Acked-by: Iyappan Subramanian <isubramanian@apm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/apm/xgene/xgene_enet_main.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
index e536301acfdec9..b3568c453b1451 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
@@ -1749,6 +1749,12 @@ static int xgene_enet_get_resources(struct xgene_enet_pdata *pdata)
 
 	pdata->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(pdata->clk)) {
+		/* Abort if the clock is defined but couldn't be retrived.
+		 * Always abort if the clock is missing on DT system as
+		 * the driver can't cope with this case.
+		 */
+		if (PTR_ERR(pdata->clk) != -ENOENT || dev->of_node)
+			return PTR_ERR(pdata->clk);
 		/* Firmware may have set up the clock already. */
 		dev_info(dev, "clocks have been setup already\n");
 	}

From 402168b4c2dc0734b8fbd282eff77da0275c5129 Mon Sep 17 00:00:00 2001
From: "Lendacky, Thomas" <Thomas.Lendacky@amd.com>
Date: Tue, 28 Feb 2017 15:02:51 -0600
Subject: [PATCH 0591/1911] amd-xgbe: Stop the PHY before releasing interrupts

Some configurations require the use of the hardware's MDIO support to
communicate with external PHYs. The MDIO commands indicate completion
through the device interrupt. When bringing down the device the interrupts
were released before stopping the external PHY, resulting in MDIO command
timeouts. Move the stopping of the PHY to before the releasing of the
interrupts.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index 3aa457c8ca21d3..248f60d171a5a0 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -1131,12 +1131,12 @@ static void xgbe_stop(struct xgbe_prv_data *pdata)
 	hw_if->disable_tx(pdata);
 	hw_if->disable_rx(pdata);
 
+	phy_if->phy_stop(pdata);
+
 	xgbe_free_irqs(pdata);
 
 	xgbe_napi_disable(pdata, 1);
 
-	phy_if->phy_stop(pdata);
-
 	hw_if->exit(pdata);
 
 	channel = pdata->channel;

From b42c6761fd1651f564491b53016046c9ebf0b2a9 Mon Sep 17 00:00:00 2001
From: "Lendacky, Thomas" <Thomas.Lendacky@amd.com>
Date: Tue, 28 Feb 2017 15:03:01 -0600
Subject: [PATCH 0592/1911] amd-xgbe: Be sure to set MDIO modes on device
 (re)start

The MDIO register mode is set when the device is probed. But when the
device is brought down and then back up, the MDIO register mode has been
reset.  Be sure to reset the mode during device startup and only change
the mode of the address specified.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amd/xgbe/xgbe-dev.c    |  2 +-
 drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c | 22 +++++++++++++++++++++
 2 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
index a7d16db5c4b21d..937f37a5dcb2cd 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
@@ -1323,7 +1323,7 @@ static int xgbe_read_ext_mii_regs(struct xgbe_prv_data *pdata, int addr,
 static int xgbe_set_ext_mii_mode(struct xgbe_prv_data *pdata, unsigned int port,
 				 enum xgbe_mdio_mode mode)
 {
-	unsigned int reg_val = 0;
+	unsigned int reg_val = XGMAC_IOREAD(pdata, MAC_MDIOCL22R);
 
 	switch (mode) {
 	case XGBE_MDIO_MODE_CL22:
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
index 9d8c953083b4ef..04804cbb7dc1cd 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
@@ -875,6 +875,16 @@ static int xgbe_phy_find_phy_device(struct xgbe_prv_data *pdata)
 	    !phy_data->sfp_phy_avail)
 		return 0;
 
+	/* Set the proper MDIO mode for the PHY */
+	ret = pdata->hw_if.set_ext_mii_mode(pdata, phy_data->mdio_addr,
+					    phy_data->phydev_mode);
+	if (ret) {
+		netdev_err(pdata->netdev,
+			   "mdio port/clause not compatible (%u/%u)\n",
+			   phy_data->mdio_addr, phy_data->phydev_mode);
+		return ret;
+	}
+
 	/* Create and connect to the PHY device */
 	phydev = get_phy_device(phy_data->mii, phy_data->mdio_addr,
 				(phy_data->phydev_mode == XGBE_MDIO_MODE_CL45));
@@ -2722,6 +2732,18 @@ static int xgbe_phy_start(struct xgbe_prv_data *pdata)
 	if (ret)
 		return ret;
 
+	/* Set the proper MDIO mode for the re-driver */
+	if (phy_data->redrv && !phy_data->redrv_if) {
+		ret = pdata->hw_if.set_ext_mii_mode(pdata, phy_data->redrv_addr,
+						    XGBE_MDIO_MODE_CL22);
+		if (ret) {
+			netdev_err(pdata->netdev,
+				   "redriver mdio port not compatible (%u)\n",
+				   phy_data->redrv_addr);
+			return ret;
+		}
+	}
+
 	/* Start in highest supported mode */
 	xgbe_phy_set_mode(pdata, phy_data->start_mode);
 

From 2697ea5a859b83ca49511dcfd98daf42584eb3cf Mon Sep 17 00:00:00 2001
From: "Lendacky, Thomas" <Thomas.Lendacky@amd.com>
Date: Tue, 28 Feb 2017 15:03:10 -0600
Subject: [PATCH 0593/1911] amd-xgbe: Don't overwrite SFP PHY mod_absent
 settings

If an SFP module is not present, xgbe_phy_sfp_phy_settings() should
return after applying the default settings. Currently there is no return
statement and the default settings are overwritten.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
index 04804cbb7dc1cd..e707c49cc55a78 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
@@ -716,6 +716,8 @@ static void xgbe_phy_sfp_phy_settings(struct xgbe_prv_data *pdata)
 		pdata->phy.duplex = DUPLEX_UNKNOWN;
 		pdata->phy.autoneg = AUTONEG_ENABLE;
 		pdata->phy.advertising = pdata->phy.supported;
+
+		return;
 	}
 
 	pdata->phy.advertising &= ~ADVERTISED_Autoneg;

From 7b36a7189fc320f0b783dd51bd1f541db56cfbdd Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Thu, 2 Mar 2017 13:59:08 -0700
Subject: [PATCH 0594/1911] block: don't call ioc_exit_icq() with the queue
 lock held for blk-mq

For legacy scheduling, we always call ioc_exit_icq() with both the
ioc and queue lock held. This poses a problem for blk-mq with
scheduling, since the queue lock isn't what we use in the scheduler.
And since we don't need the queue lock held for ioc exit there,
don't grab it and leave any extra locking up to the blk-mq scheduler.

Reported-by: Paolo Valente <paolo.valente@linaro.org>
Tested-by: Paolo Valente <paolo.valente@linaro.org>
Reviewed-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-ioc.c   | 44 +++++++++++++++++++++++++++++++-------------
 block/blk-sysfs.c |  2 --
 block/elevator.c  |  2 --
 3 files changed, 31 insertions(+), 17 deletions(-)

diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index b12f9c87b4c31c..0d7c96c0ba71c1 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -36,8 +36,8 @@ static void icq_free_icq_rcu(struct rcu_head *head)
 }
 
 /*
- * Exit an icq. Called with both ioc and q locked for sq, only ioc locked for
- * mq.
+ * Exit an icq. Called with ioc locked for blk-mq, and with both ioc
+ * and queue locked for legacy.
  */
 static void ioc_exit_icq(struct io_cq *icq)
 {
@@ -54,7 +54,10 @@ static void ioc_exit_icq(struct io_cq *icq)
 	icq->flags |= ICQ_EXITED;
 }
 
-/* Release an icq.  Called with both ioc and q locked. */
+/*
+ * Release an icq. Called with ioc locked for blk-mq, and with both ioc
+ * and queue locked for legacy.
+ */
 static void ioc_destroy_icq(struct io_cq *icq)
 {
 	struct io_context *ioc = icq->ioc;
@@ -62,7 +65,6 @@ static void ioc_destroy_icq(struct io_cq *icq)
 	struct elevator_type *et = q->elevator->type;
 
 	lockdep_assert_held(&ioc->lock);
-	lockdep_assert_held(q->queue_lock);
 
 	radix_tree_delete(&ioc->icq_tree, icq->q->id);
 	hlist_del_init(&icq->ioc_node);
@@ -222,24 +224,40 @@ void exit_io_context(struct task_struct *task)
 	put_io_context_active(ioc);
 }
 
+static void __ioc_clear_queue(struct list_head *icq_list)
+{
+	unsigned long flags;
+
+	while (!list_empty(icq_list)) {
+		struct io_cq *icq = list_entry(icq_list->next,
+					       struct io_cq, q_node);
+		struct io_context *ioc = icq->ioc;
+
+		spin_lock_irqsave(&ioc->lock, flags);
+		ioc_destroy_icq(icq);
+		spin_unlock_irqrestore(&ioc->lock, flags);
+	}
+}
+
 /**
  * ioc_clear_queue - break any ioc association with the specified queue
  * @q: request_queue being cleared
  *
- * Walk @q->icq_list and exit all io_cq's.  Must be called with @q locked.
+ * Walk @q->icq_list and exit all io_cq's.
  */
 void ioc_clear_queue(struct request_queue *q)
 {
-	lockdep_assert_held(q->queue_lock);
+	LIST_HEAD(icq_list);
 
-	while (!list_empty(&q->icq_list)) {
-		struct io_cq *icq = list_entry(q->icq_list.next,
-					       struct io_cq, q_node);
-		struct io_context *ioc = icq->ioc;
+	spin_lock_irq(q->queue_lock);
+	list_splice_init(&q->icq_list, &icq_list);
 
-		spin_lock(&ioc->lock);
-		ioc_destroy_icq(icq);
-		spin_unlock(&ioc->lock);
+	if (q->mq_ops) {
+		spin_unlock_irq(q->queue_lock);
+		__ioc_clear_queue(&icq_list);
+	} else {
+		__ioc_clear_queue(&icq_list);
+		spin_unlock_irq(q->queue_lock);
 	}
 }
 
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 002af836aa87d8..c44b321335f3eb 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -815,9 +815,7 @@ static void blk_release_queue(struct kobject *kobj)
 	blkcg_exit_queue(q);
 
 	if (q->elevator) {
-		spin_lock_irq(q->queue_lock);
 		ioc_clear_queue(q);
-		spin_unlock_irq(q->queue_lock);
 		elevator_exit(q->elevator);
 	}
 
diff --git a/block/elevator.c b/block/elevator.c
index ac1c9f481a9895..01139f549b5be7 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -983,9 +983,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
 		if (old_registered)
 			elv_unregister_queue(q);
 
-		spin_lock_irq(q->queue_lock);
 		ioc_clear_queue(q);
-		spin_unlock_irq(q->queue_lock);
 	}
 
 	/* allocate, init and register new elevator */

From 11bd44f62d86115796109b0349e6e191bc99b45a Mon Sep 17 00:00:00 2001
From: Ganesh Goudar <ganeshgr@chelsio.com>
Date: Wed, 1 Mar 2017 11:18:53 +0530
Subject: [PATCH 0595/1911] cxgb4: update latest firmware version supported

Change t4fw_version.h to update latest firmware version
number to 1.16.33.0.

Signed-off-by: Ganesh Goudar <ganeshgr@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h
index 5fdaa16426c50e..fa376444e57c56 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h
@@ -37,7 +37,7 @@
 
 #define T4FW_VERSION_MAJOR 0x01
 #define T4FW_VERSION_MINOR 0x10
-#define T4FW_VERSION_MICRO 0x1A
+#define T4FW_VERSION_MICRO 0x21
 #define T4FW_VERSION_BUILD 0x00
 
 #define T4FW_MIN_VERSION_MAJOR 0x01
@@ -46,7 +46,7 @@
 
 #define T5FW_VERSION_MAJOR 0x01
 #define T5FW_VERSION_MINOR 0x10
-#define T5FW_VERSION_MICRO 0x1A
+#define T5FW_VERSION_MICRO 0x21
 #define T5FW_VERSION_BUILD 0x00
 
 #define T5FW_MIN_VERSION_MAJOR 0x00
@@ -55,7 +55,7 @@
 
 #define T6FW_VERSION_MAJOR 0x01
 #define T6FW_VERSION_MINOR 0x10
-#define T6FW_VERSION_MICRO 0x1A
+#define T6FW_VERSION_MICRO 0x21
 #define T6FW_VERSION_BUILD 0x00
 
 #define T6FW_MIN_VERSION_MAJOR 0x00

From f1304f7ba3981e71dcf2ac7db92949eeab49b1bf Mon Sep 17 00:00:00 2001
From: Peter Downs <padowns@gmail.com>
Date: Wed, 1 Mar 2017 01:01:17 -0800
Subject: [PATCH 0596/1911] openvswitch: actions: fixed a brace coding style
 warning

Fixed a brace coding style warning reported by checkpatch.pl

Signed-off-by: Peter Downs <padowns@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/actions.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index b1beb2b94ec76c..c82301ce3fffb6 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -796,9 +796,8 @@ static void ovs_fragment(struct net *net, struct vport *vport,
 		unsigned long orig_dst;
 		struct rt6_info ovs_rt;
 
-		if (!v6ops) {
+		if (!v6ops)
 			goto err;
-		}
 
 		prepare_frag(vport, skb, orig_network_offset,
 			     ovs_key_mac_proto(key));

From d5afb6f9b6bb2c57bd0c05e76e12489dc0d037d9 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 1 Mar 2017 16:35:07 -0300
Subject: [PATCH 0597/1911] dccp: Unlock sock before calling sk_free()

The code where sk_clone() came from created a new socket and locked it,
but then, on the error path didn't unlock it.

This problem stayed there for a long while, till b0691c8ee7c2 ("net:
Unlock sock before calling sk_free()") fixed it, but unfortunately the
callers of sk_clone() (now sk_clone_locked()) were not audited and the
one in dccp_create_openreq_child() remained.

Now in the age of the syskaller fuzzer, this was finally uncovered, as
reported by Dmitry:

 ---- 8< ----

I've got the following report while running syzkaller fuzzer on
86292b33d4b7 ("Merge branch 'akpm' (patches from Andrew)")

  [ BUG: held lock freed! ]
  4.10.0+ #234 Not tainted
  -------------------------
  syz-executor6/6898 is freeing memory
  ffff88006286cac0-ffff88006286d3b7, with a lock still held there!
   (slock-AF_INET6){+.-...}, at: [<ffffffff8362c2c9>] spin_lock
  include/linux/spinlock.h:299 [inline]
   (slock-AF_INET6){+.-...}, at: [<ffffffff8362c2c9>]
  sk_clone_lock+0x3d9/0x12c0 net/core/sock.c:1504
  5 locks held by syz-executor6/6898:
   #0:  (sk_lock-AF_INET6){+.+.+.}, at: [<ffffffff839a34b4>] lock_sock
  include/net/sock.h:1460 [inline]
   #0:  (sk_lock-AF_INET6){+.+.+.}, at: [<ffffffff839a34b4>]
  inet_stream_connect+0x44/0xa0 net/ipv4/af_inet.c:681
   #1:  (rcu_read_lock){......}, at: [<ffffffff83bc1c2a>]
  inet6_csk_xmit+0x12a/0x5d0 net/ipv6/inet6_connection_sock.c:126
   #2:  (rcu_read_lock){......}, at: [<ffffffff8369b424>] __skb_unlink
  include/linux/skbuff.h:1767 [inline]
   #2:  (rcu_read_lock){......}, at: [<ffffffff8369b424>] __skb_dequeue
  include/linux/skbuff.h:1783 [inline]
   #2:  (rcu_read_lock){......}, at: [<ffffffff8369b424>]
  process_backlog+0x264/0x730 net/core/dev.c:4835
   #3:  (rcu_read_lock){......}, at: [<ffffffff83aeb5c0>]
  ip6_input_finish+0x0/0x1700 net/ipv6/ip6_input.c:59
   #4:  (slock-AF_INET6){+.-...}, at: [<ffffffff8362c2c9>] spin_lock
  include/linux/spinlock.h:299 [inline]
   #4:  (slock-AF_INET6){+.-...}, at: [<ffffffff8362c2c9>]
  sk_clone_lock+0x3d9/0x12c0 net/core/sock.c:1504

Fix it just like was done by b0691c8ee7c2 ("net: Unlock sock before calling
sk_free()").

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Cc: Cong Wang <xiyou.wangcong@gmail.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20170301153510.GE15145@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/minisocks.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 53eddf99e4f6eb..d20d948a98ed3c 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -122,6 +122,7 @@ struct sock *dccp_create_openreq_child(const struct sock *sk,
 			/* It is still raw copy of parent, so invalidate
 			 * destructor and make plain sk_free() */
 			newsk->sk_destruct = NULL;
+			bh_unlock_sock(newsk);
 			sk_free(newsk);
 			return NULL;
 		}

From 94352d45092c23874532221b4d1e4721df9d63df Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 1 Mar 2017 16:35:08 -0300
Subject: [PATCH 0598/1911] net: Introduce sk_clone_lock() error path routine

When handling problems in cloning a socket with the sk_clone_locked()
function we need to perform several steps that were open coded in it and
its callers, so introduce a routine to avoid this duplication:
sk_free_unlock_clone().

Cc: Cong Wang <xiyou.wangcong@gmail.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/n/net-ui6laqkotycunhtmqryl9bfx@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h   |  1 +
 net/core/sock.c      | 16 +++++++++++-----
 net/dccp/minisocks.c |  6 +-----
 3 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index 9ccefa5c548786..5e5997654db645 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1526,6 +1526,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
 void sk_free(struct sock *sk);
 void sk_destruct(struct sock *sk);
 struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority);
+void sk_free_unlock_clone(struct sock *sk);
 
 struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
 			     gfp_t priority);
diff --git a/net/core/sock.c b/net/core/sock.c
index e7d74940e8637d..f6fd79f33097f3 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1539,11 +1539,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
 			is_charged = sk_filter_charge(newsk, filter);
 
 		if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) {
-			/* It is still raw copy of parent, so invalidate
-			 * destructor and make plain sk_free() */
-			newsk->sk_destruct = NULL;
-			bh_unlock_sock(newsk);
-			sk_free(newsk);
+			sk_free_unlock_clone(newsk);
 			newsk = NULL;
 			goto out;
 		}
@@ -1592,6 +1588,16 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
 }
 EXPORT_SYMBOL_GPL(sk_clone_lock);
 
+void sk_free_unlock_clone(struct sock *sk)
+{
+	/* It is still raw copy of parent, so invalidate
+	 * destructor and make plain sk_free() */
+	sk->sk_destruct = NULL;
+	bh_unlock_sock(sk);
+	sk_free(sk);
+}
+EXPORT_SYMBOL_GPL(sk_free_unlock_clone);
+
 void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
 {
 	u32 max_segs = 1;
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index d20d948a98ed3c..e267e6f4c9a556 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -119,11 +119,7 @@ struct sock *dccp_create_openreq_child(const struct sock *sk,
 		 * Activate features: initialise CCIDs, sequence windows etc.
 		 */
 		if (dccp_feat_activate_values(newsk, &dreq->dreq_featneg)) {
-			/* It is still raw copy of parent, so invalidate
-			 * destructor and make plain sk_free() */
-			newsk->sk_destruct = NULL;
-			bh_unlock_sock(newsk);
-			sk_free(newsk);
+			sk_free_unlock_clone(newsk);
 			return NULL;
 		}
 		dccp_init_xmit_timers(newsk);

From 113285b473824922498d07d7f82459507b9792eb Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Thu, 2 Mar 2017 13:26:04 -0700
Subject: [PATCH 0599/1911] blk-mq: ensure that bd->last is always set
 correctly

When drivers are called with a request in blk-mq, blk-mq flags the
state such that the driver knows if this is the last request in
this call chain or not. The driver can then use that information
to defer kicking off IO until bd->last is true. However, with blk-mq
and scheduling, we need to allocate a driver tag for a request before
it can be issued. If we fail to allocate such a tag, we could end up
in the situation where the last request issued did not have
bd->last == true set. This can then cause a driver hang.

This fixes a hang with virtio-blk, which uses bd->last as a hint
on whether to kick the queue or not.

Reported-by: Chris Mason <clm@fb.com>
Tested-by: Chris Mason <clm@fb.com>
Reviewed-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c | 50 +++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 43 insertions(+), 7 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index a5e66a7a3506f9..e797607dab89d9 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -876,12 +876,9 @@ bool blk_mq_get_driver_tag(struct request *rq, struct blk_mq_hw_ctx **hctx,
 	return false;
 }
 
-static void blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx,
-				  struct request *rq)
+static void __blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx,
+				    struct request *rq)
 {
-	if (rq->tag == -1 || rq->internal_tag == -1)
-		return;
-
 	blk_mq_put_tag(hctx, hctx->tags, rq->mq_ctx, rq->tag);
 	rq->tag = -1;
 
@@ -891,6 +888,26 @@ static void blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx,
 	}
 }
 
+static void blk_mq_put_driver_tag_hctx(struct blk_mq_hw_ctx *hctx,
+				       struct request *rq)
+{
+	if (rq->tag == -1 || rq->internal_tag == -1)
+		return;
+
+	__blk_mq_put_driver_tag(hctx, rq);
+}
+
+static void blk_mq_put_driver_tag(struct request *rq)
+{
+	struct blk_mq_hw_ctx *hctx;
+
+	if (rq->tag == -1 || rq->internal_tag == -1)
+		return;
+
+	hctx = blk_mq_map_queue(rq->q, rq->mq_ctx->cpu);
+	__blk_mq_put_driver_tag(hctx, rq);
+}
+
 /*
  * If we fail getting a driver tag because all the driver tags are already
  * assigned and on the dispatch list, BUT the first entry does not have a
@@ -1000,7 +1017,19 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list)
 
 		bd.rq = rq;
 		bd.list = dptr;
-		bd.last = list_empty(list);
+
+		/*
+		 * Flag last if we have no more requests, or if we have more
+		 * but can't assign a driver tag to it.
+		 */
+		if (list_empty(list))
+			bd.last = true;
+		else {
+			struct request *nxt;
+
+			nxt = list_first_entry(list, struct request, queuelist);
+			bd.last = !blk_mq_get_driver_tag(nxt, NULL, false);
+		}
 
 		ret = q->mq_ops->queue_rq(hctx, &bd);
 		switch (ret) {
@@ -1008,7 +1037,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list)
 			queued++;
 			break;
 		case BLK_MQ_RQ_QUEUE_BUSY:
-			blk_mq_put_driver_tag(hctx, rq);
+			blk_mq_put_driver_tag_hctx(hctx, rq);
 			list_add(&rq->queuelist, list);
 			__blk_mq_requeue_request(rq);
 			break;
@@ -1038,6 +1067,13 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list)
 	 * that is where we will continue on next queue run.
 	 */
 	if (!list_empty(list)) {
+		/*
+		 * If we got a driver tag for the next request already,
+		 * free it again.
+		 */
+		rq = list_first_entry(list, struct request, queuelist);
+		blk_mq_put_driver_tag(rq);
+
 		spin_lock(&hctx->lock);
 		list_splice_init(list, &hctx->dispatch);
 		spin_unlock(&hctx->lock);

From 7db92362d2fee5887f6b0c41653b8c9f8f5d6020 Mon Sep 17 00:00:00 2001
From: Wei Wang <weiwan@google.com>
Date: Wed, 1 Mar 2017 13:29:48 -0800
Subject: [PATCH 0600/1911] tcp: fix potential double free issue for
 fastopen_req

tp->fastopen_req could potentially be double freed if a malicious
user does the following:
1. Enable TCP_FASTOPEN_CONNECT sockopt and do a connect() on the socket.
2. Call connect() with AF_UNSPEC to disconnect the socket.
3. Make this socket a listening socket by calling listen().
4. Accept incoming connections and generate child sockets. All child
   sockets will get a copy of the pointer of fastopen_req.
5. Call close() on all sockets. fastopen_req will get freed multiple
   times.

Fixes: 19f6d3f3c842 ("net/tcp-fastopen: Add new API support")
Reported-by: Andrey Konovalov <andreyknvl@google.com>
Signed-off-by: Wei Wang <weiwan@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index da385ae997a3d6..cf4555581282c6 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1110,9 +1110,14 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
 	flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0;
 	err = __inet_stream_connect(sk->sk_socket, msg->msg_name,
 				    msg->msg_namelen, flags, 1);
-	inet->defer_connect = 0;
-	*copied = tp->fastopen_req->copied;
-	tcp_free_fastopen_req(tp);
+	/* fastopen_req could already be freed in __inet_stream_connect
+	 * if the connection times out or gets rst
+	 */
+	if (tp->fastopen_req) {
+		*copied = tp->fastopen_req->copied;
+		tcp_free_fastopen_req(tp);
+		inet->defer_connect = 0;
+	}
 	return err;
 }
 
@@ -2318,6 +2323,10 @@ int tcp_disconnect(struct sock *sk, int flags)
 	memset(&tp->rx_opt, 0, sizeof(tp->rx_opt));
 	__sk_dst_reset(sk);
 
+	/* Clean up fastopen related fields */
+	tcp_free_fastopen_req(tp);
+	inet->defer_connect = 0;
+
 	WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
 
 	sk->sk_error_report(sk);

From 332524eaf739296bfc86c26d1b86a8c0040d7818 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Tue, 21 Feb 2017 21:42:30 +0100
Subject: [PATCH 0601/1911] ARM: deconfig: fix the moxart defconfig

The moxart defconfig wasn't even building a kernel for Moxart,
it was building a kernel for V4T on the nothing platform. Switch
to MULTI_V4 and keep the right drivers, update a few selections.
Now it (presumably) builds a minimalist Moxart kernel again.

Cc: Jonas Jensen <jonas.jensen@gmail.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/arm/configs/moxart_defconfig | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/arm/configs/moxart_defconfig b/arch/arm/configs/moxart_defconfig
index a3cb76cfb8282f..b2ddd534867fcb 100644
--- a/arch/arm/configs/moxart_defconfig
+++ b/arch/arm/configs/moxart_defconfig
@@ -18,9 +18,8 @@ CONFIG_EMBEDDED=y
 # CONFIG_LBDAF is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_IOSCHED_DEADLINE is not set
-CONFIG_ARCH_MULTI_V4T=y
+CONFIG_ARCH_MULTI_V4=y
 # CONFIG_ARCH_MULTI_V7 is not set
-CONFIG_KEYBOARD_GPIO_POLLED=y
 CONFIG_ARCH_MOXART=y
 CONFIG_MACH_UC7112LX=y
 CONFIG_PREEMPT=y
@@ -94,12 +93,10 @@ CONFIG_SERIAL_8250_EXTENDED=y
 CONFIG_SERIAL_8250_SHARE_IRQ=y
 CONFIG_SERIAL_OF_PLATFORM=y
 # CONFIG_HW_RANDOM is not set
-CONFIG_DEBUG_GPIO=y
-CONFIG_GPIO_SYSFS=y
 CONFIG_GPIO_MOXART=y
-CONFIG_POWER_SUPPLY=y
 CONFIG_POWER_RESET=y
 CONFIG_POWER_RESET_GPIO=y
+CONFIG_POWER_SUPPLY=y
 # CONFIG_HWMON is not set
 CONFIG_WATCHDOG=y
 CONFIG_WATCHDOG_CORE=y
@@ -107,10 +104,13 @@ CONFIG_WATCHDOG_NOWAYOUT=y
 CONFIG_MOXART_WDT=y
 # CONFIG_USB_SUPPORT is not set
 CONFIG_MMC=y
-CONFIG_MMC_SDHCI_MOXART=y
+CONFIG_MMC_SDHCI=y
+CONFIG_MMC_SDHCI_PLTFM=y
+CONFIG_MMC_MOXART=y
 CONFIG_NEW_LEDS=y
 CONFIG_LEDS_CLASS=y
 CONFIG_LEDS_GPIO=y
+CONFIG_LEDS_TRIGGERS=y
 CONFIG_LEDS_TRIGGER_TIMER=y
 CONFIG_LEDS_TRIGGER_ONESHOT=y
 CONFIG_LEDS_TRIGGER_HEARTBEAT=y

From be12502e2e64854dbe0a2ddff6d26ec1143d6890 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Wed, 1 Mar 2017 17:24:47 -0800
Subject: [PATCH 0602/1911] drivers: net: ethernet: remove incorrect __exit
 markups

Even if bus is not hot-pluggable, devices can be unbound from the
driver via sysfs, so we should not be using __exit annotations on
remove() methods. The only exception is drivers registered with
platform_driver_probe() which specifically disables sysfs bind/unbind
attributes.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amd/declance.c        | 30 +++++++++++-----------
 drivers/net/ethernet/broadcom/sb1250-mac.c |  4 +--
 drivers/net/ethernet/faraday/ftgmac100.c   |  4 +--
 drivers/net/ethernet/faraday/ftmac100.c    |  4 +--
 drivers/net/ethernet/seeq/sgiseeq.c        |  4 +--
 drivers/net/ethernet/sgi/meth.c            |  4 +--
 6 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/drivers/net/ethernet/amd/declance.c b/drivers/net/ethernet/amd/declance.c
index 76e5fc7adff519..6c98901f1b8970 100644
--- a/drivers/net/ethernet/amd/declance.c
+++ b/drivers/net/ethernet/amd/declance.c
@@ -1276,18 +1276,6 @@ static int dec_lance_probe(struct device *bdev, const int type)
 	return ret;
 }
 
-static void __exit dec_lance_remove(struct device *bdev)
-{
-	struct net_device *dev = dev_get_drvdata(bdev);
-	resource_size_t start, len;
-
-	unregister_netdev(dev);
-	start = to_tc_dev(bdev)->resource.start;
-	len = to_tc_dev(bdev)->resource.end - start + 1;
-	release_mem_region(start, len);
-	free_netdev(dev);
-}
-
 /* Find all the lance cards on the system and initialize them */
 static int __init dec_lance_platform_probe(void)
 {
@@ -1320,7 +1308,7 @@ static void __exit dec_lance_platform_remove(void)
 
 #ifdef CONFIG_TC
 static int dec_lance_tc_probe(struct device *dev);
-static int __exit dec_lance_tc_remove(struct device *dev);
+static int dec_lance_tc_remove(struct device *dev);
 
 static const struct tc_device_id dec_lance_tc_table[] = {
 	{ "DEC     ", "PMAD-AA " },
@@ -1334,7 +1322,7 @@ static struct tc_driver dec_lance_tc_driver = {
 		.name	= "declance",
 		.bus	= &tc_bus_type,
 		.probe	= dec_lance_tc_probe,
-		.remove	= __exit_p(dec_lance_tc_remove),
+		.remove	= dec_lance_tc_remove,
 	},
 };
 
@@ -1346,7 +1334,19 @@ static int dec_lance_tc_probe(struct device *dev)
         return status;
 }
 
-static int __exit dec_lance_tc_remove(struct device *dev)
+static void dec_lance_remove(struct device *bdev)
+{
+	struct net_device *dev = dev_get_drvdata(bdev);
+	resource_size_t start, len;
+
+	unregister_netdev(dev);
+	start = to_tc_dev(bdev)->resource.start;
+	len = to_tc_dev(bdev)->resource.end - start + 1;
+	release_mem_region(start, len);
+	free_netdev(dev);
+}
+
+static int dec_lance_tc_remove(struct device *dev)
 {
         put_device(dev);
         dec_lance_remove(dev);
diff --git a/drivers/net/ethernet/broadcom/sb1250-mac.c b/drivers/net/ethernet/broadcom/sb1250-mac.c
index 89d4feba1a9aea..55c8e25b43d9ad 100644
--- a/drivers/net/ethernet/broadcom/sb1250-mac.c
+++ b/drivers/net/ethernet/broadcom/sb1250-mac.c
@@ -2617,7 +2617,7 @@ static int sbmac_probe(struct platform_device *pldev)
 	return err;
 }
 
-static int __exit sbmac_remove(struct platform_device *pldev)
+static int sbmac_remove(struct platform_device *pldev)
 {
 	struct net_device *dev = platform_get_drvdata(pldev);
 	struct sbmac_softc *sc = netdev_priv(dev);
@@ -2634,7 +2634,7 @@ static int __exit sbmac_remove(struct platform_device *pldev)
 
 static struct platform_driver sbmac_driver = {
 	.probe = sbmac_probe,
-	.remove = __exit_p(sbmac_remove),
+	.remove = sbmac_remove,
 	.driver = {
 		.name = sbmac_string,
 	},
diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c
index 262587240c86e5..928b0df2b8e033 100644
--- a/drivers/net/ethernet/faraday/ftgmac100.c
+++ b/drivers/net/ethernet/faraday/ftgmac100.c
@@ -1456,7 +1456,7 @@ static int ftgmac100_probe(struct platform_device *pdev)
 	return err;
 }
 
-static int __exit ftgmac100_remove(struct platform_device *pdev)
+static int ftgmac100_remove(struct platform_device *pdev)
 {
 	struct net_device *netdev;
 	struct ftgmac100 *priv;
@@ -1483,7 +1483,7 @@ MODULE_DEVICE_TABLE(of, ftgmac100_of_match);
 
 static struct platform_driver ftgmac100_driver = {
 	.probe	= ftgmac100_probe,
-	.remove	= __exit_p(ftgmac100_remove),
+	.remove	= ftgmac100_remove,
 	.driver	= {
 		.name		= DRV_NAME,
 		.of_match_table	= ftgmac100_of_match,
diff --git a/drivers/net/ethernet/faraday/ftmac100.c b/drivers/net/ethernet/faraday/ftmac100.c
index c0ddbbe6c22689..6ac336b546e6c2 100644
--- a/drivers/net/ethernet/faraday/ftmac100.c
+++ b/drivers/net/ethernet/faraday/ftmac100.c
@@ -1156,7 +1156,7 @@ static int ftmac100_probe(struct platform_device *pdev)
 	return err;
 }
 
-static int __exit ftmac100_remove(struct platform_device *pdev)
+static int ftmac100_remove(struct platform_device *pdev)
 {
 	struct net_device *netdev;
 	struct ftmac100 *priv;
@@ -1176,7 +1176,7 @@ static int __exit ftmac100_remove(struct platform_device *pdev)
 
 static struct platform_driver ftmac100_driver = {
 	.probe		= ftmac100_probe,
-	.remove		= __exit_p(ftmac100_remove),
+	.remove		= ftmac100_remove,
 	.driver		= {
 		.name	= DRV_NAME,
 	},
diff --git a/drivers/net/ethernet/seeq/sgiseeq.c b/drivers/net/ethernet/seeq/sgiseeq.c
index ed34196028b8e8..70347720fdf98a 100644
--- a/drivers/net/ethernet/seeq/sgiseeq.c
+++ b/drivers/net/ethernet/seeq/sgiseeq.c
@@ -807,7 +807,7 @@ static int sgiseeq_probe(struct platform_device *pdev)
 	return err;
 }
 
-static int __exit sgiseeq_remove(struct platform_device *pdev)
+static int sgiseeq_remove(struct platform_device *pdev)
 {
 	struct net_device *dev = platform_get_drvdata(pdev);
 	struct sgiseeq_private *sp = netdev_priv(dev);
@@ -822,7 +822,7 @@ static int __exit sgiseeq_remove(struct platform_device *pdev)
 
 static struct platform_driver sgiseeq_driver = {
 	.probe	= sgiseeq_probe,
-	.remove	= __exit_p(sgiseeq_remove),
+	.remove	= sgiseeq_remove,
 	.driver = {
 		.name	= "sgiseeq",
 	}
diff --git a/drivers/net/ethernet/sgi/meth.c b/drivers/net/ethernet/sgi/meth.c
index 69d2d30e5ef13b..ea55abd62ec709 100644
--- a/drivers/net/ethernet/sgi/meth.c
+++ b/drivers/net/ethernet/sgi/meth.c
@@ -854,7 +854,7 @@ static int meth_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static int __exit meth_remove(struct platform_device *pdev)
+static int meth_remove(struct platform_device *pdev)
 {
 	struct net_device *dev = platform_get_drvdata(pdev);
 
@@ -866,7 +866,7 @@ static int __exit meth_remove(struct platform_device *pdev)
 
 static struct platform_driver meth_driver = {
 	.probe	= meth_probe,
-	.remove	= __exit_p(meth_remove),
+	.remove	= meth_remove,
 	.driver = {
 		.name	= "meth",
 	}

From 9d6acb3bc9058d1fd7a5297d71f14213679bb4bd Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Wed, 1 Mar 2017 20:48:39 -0800
Subject: [PATCH 0603/1911] ipv6: ignore null_entry in inet6_rtm_getroute() too

Like commit 1f17e2f2c8a8 ("net: ipv6: ignore null_entry on route dumps"),
we need to ignore null entry in inet6_rtm_getroute() too.

Return -ENETUNREACH here to sync with IPv4 behavior, as suggested by David.

Fixes: a1a22c1206 ("net: ipv6: Keep nexthop of multipath route on admin down")
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Cc: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Acked-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 43ca90d50ae9b3..229bfcc451ef50 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -3632,6 +3632,12 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
 		rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
 	}
 
+	if (rt == net->ipv6.ip6_null_entry) {
+		err = rt->dst.error;
+		ip6_rt_put(rt);
+		goto errout;
+	}
+
 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (!skb) {
 		ip6_rt_put(rt);

From 152669bd3cd2407d6f556009b95ee249c0c1a462 Mon Sep 17 00:00:00 2001
From: Dexuan Cui <decui@microsoft.com>
Date: Thu, 2 Mar 2017 13:00:53 +0000
Subject: [PATCH 0604/1911] netvsc: fix use-after-free in netvsc_change_mtu()

'nvdev' is freed in rndis_filter_device_remove -> netvsc_device_remove ->
free_netvsc_device, so we mustn't access it, before it's re-created in
rndis_filter_device_add -> netvsc_device_add.

Signed-off-by: Dexuan Cui <decui@microsoft.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Stephen Hemminger <sthemmin@microsoft.com>
Reviewed-by: Stephen Hemminger <sthemmin@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/hyperv/netvsc_drv.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 2d3cdb026a9959..bc05c895d9589d 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -859,15 +859,22 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu)
 	if (ret)
 		goto out;
 
+	memset(&device_info, 0, sizeof(device_info));
+	device_info.ring_size = ring_size;
+	device_info.num_chn = nvdev->num_chn;
+	device_info.max_num_vrss_chns = nvdev->num_chn;
+
 	ndevctx->start_remove = true;
 	rndis_filter_device_remove(hdev, nvdev);
 
+	/* 'nvdev' has been freed in rndis_filter_device_remove() ->
+	 * netvsc_device_remove () -> free_netvsc_device().
+	 * We mustn't access it before it's re-created in
+	 * rndis_filter_device_add() -> netvsc_device_add().
+	 */
+
 	ndev->mtu = mtu;
 
-	memset(&device_info, 0, sizeof(device_info));
-	device_info.ring_size = ring_size;
-	device_info.num_chn = nvdev->num_chn;
-	device_info.max_num_vrss_chns = nvdev->num_chn;
 	rndis_filter_device_add(hdev, &device_info);
 
 out:

From 31c05415f5b471fd333fe42629788364faea8e0d Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Thu, 2 Mar 2017 12:24:36 -0800
Subject: [PATCH 0605/1911] bonding: use ETH_MAX_MTU as max mtu

This restores the ability of setting bond device's mtu to 9000.

Fixes: 91572088e3fd ("net: use core MTU range checking in core net infra")
Reported-by: daznis@gmail.com
Reported-by: Brad Campbell <lists2009@fnarfbargle.com>
Cc: Jarod Wilson <jarod@redhat.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Jay Vosburgh <jay.vosburgh@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 6321f12630c8c5..8a4ba8b88e52f9 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -4179,6 +4179,7 @@ void bond_setup(struct net_device *bond_dev)
 
 	/* Initialize the device entry points */
 	ether_setup(bond_dev);
+	bond_dev->max_mtu = ETH_MAX_MTU;
 	bond_dev->netdev_ops = &bond_netdev_ops;
 	bond_dev->ethtool_ops = &bond_ethtool_ops;
 

From 9f674e48c13dcbc31ac903433727837795b81efe Mon Sep 17 00:00:00 2001
From: Anoob Soman <anoob.soman@citrix.com>
Date: Thu, 2 Mar 2017 10:50:20 +0000
Subject: [PATCH 0606/1911] xen-netback: Use GFP_ATOMIC to allocate hash

Allocation of new_hash, inside xenvif_new_hash(), always happen
in softirq context, so use GFP_ATOMIC instead of GFP_KERNEL for new
hash allocation.

Signed-off-by: Anoob Soman <anoob.soman@citrix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/xen-netback/hash.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/xen-netback/hash.c b/drivers/net/xen-netback/hash.c
index e8c5dddc54ba27..3c4c58b9fe76ed 100644
--- a/drivers/net/xen-netback/hash.c
+++ b/drivers/net/xen-netback/hash.c
@@ -39,7 +39,7 @@ static void xenvif_add_hash(struct xenvif *vif, const u8 *tag,
 	unsigned long flags;
 	bool found;
 
-	new = kmalloc(sizeof(*entry), GFP_KERNEL);
+	new = kmalloc(sizeof(*entry), GFP_ATOMIC);
 	if (!new)
 		return;
 

From f0712928be1a66c99c35f871b4df7fa23ec1574a Mon Sep 17 00:00:00 2001
From: Aurelien Aptel <aaptel@suse.com>
Date: Wed, 22 Feb 2017 14:47:17 +0100
Subject: [PATCH 0607/1911] CIFS: use DFS pathnames in SMB2+ Create requests

When connected to a DFS capable share, the client must set the
SMB2_FLAGS_DFS_OPERATIONS flag in the SMB2 header and use
DFS path names: "<server>\<share>\<path>" *without* leading \\.

Sources:

[MS-SMB2] 3.2.5.5 Receiving an SMB2 TREE_CONNECT Response
> TreeConnect.IsDfsShare MUST be set to TRUE, if the SMB2_SHARE_CAP_DFS
> bit is set in the Capabilities field of the response.

[MS-SMB2] 3.2.4.3 Application Requests Opening a File
> If TreeConnect.IsDfsShare is TRUE, the SMB2_FLAGS_DFS_OPERATIONS flag
> is set in the Flags field.

[MS-SMB2] 2.2.13 SMB2 CREATE Request, NameOffset:
> If SMB2_FLAGS_DFS_OPERATIONS is set in the Flags field of the SMB2
> header, the file name includes a prefix that will be processed during
> DFS name normalization as specified in section 3.3.5.9. Otherwise, the
> file name is relative to the share that is identified by the TreeId in
> the SMB2 header.

Signed-off-by: Aurelien Aptel <aaptel@suse.com>
Acked-by: Pavel Shilovsky <pshilov@microsoft.com>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/smb2pdu.c | 96 +++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 80 insertions(+), 16 deletions(-)

diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 2fd93eeed15a78..2069431b32e368 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1528,6 +1528,51 @@ add_durable_context(struct kvec *iov, unsigned int *num_iovec,
 	return 0;
 }
 
+static int
+alloc_path_with_tree_prefix(__le16 **out_path, int *out_size, int *out_len,
+			    const char *treename, const __le16 *path)
+{
+	int treename_len, path_len;
+	struct nls_table *cp;
+	const __le16 sep[] = {cpu_to_le16('\\'), cpu_to_le16(0x0000)};
+
+	/*
+	 * skip leading "\\"
+	 */
+	treename_len = strlen(treename);
+	if (treename_len < 2 || !(treename[0] == '\\' && treename[1] == '\\'))
+		return -EINVAL;
+
+	treename += 2;
+	treename_len -= 2;
+
+	path_len = UniStrnlen((wchar_t *)path, PATH_MAX);
+
+	/*
+	 * make room for one path separator between the treename and
+	 * path
+	 */
+	*out_len = treename_len + 1 + path_len;
+
+	/*
+	 * final path needs to be null-terminated UTF16 with a
+	 * size aligned to 8
+	 */
+
+	*out_size = roundup((*out_len+1)*2, 8);
+	*out_path = kzalloc(*out_size, GFP_KERNEL);
+	if (!*out_path)
+		return -ENOMEM;
+
+	cp = load_nls_default();
+	cifs_strtoUTF16(*out_path, treename, treename_len, cp);
+	UniStrcat(*out_path, sep);
+	UniStrcat(*out_path, path);
+	unload_nls(cp);
+
+	return 0;
+}
+
 int
 SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
 	  __u8 *oplock, struct smb2_file_all_info *buf,
@@ -1576,30 +1621,49 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
 	req->ShareAccess = FILE_SHARE_ALL_LE;
 	req->CreateDisposition = cpu_to_le32(oparms->disposition);
 	req->CreateOptions = cpu_to_le32(oparms->create_options & CREATE_OPTIONS_MASK);
-	uni_path_len = (2 * UniStrnlen((wchar_t *)path, PATH_MAX)) + 2;
-	/* do not count rfc1001 len field */
-	req->NameOffset = cpu_to_le16(sizeof(struct smb2_create_req) - 4);
 
 	iov[0].iov_base = (char *)req;
 	/* 4 for rfc1002 length field */
 	iov[0].iov_len = get_rfc1002_length(req) + 4;
-
-	/* MUST set path len (NameLength) to 0 opening root of share */
-	req->NameLength = cpu_to_le16(uni_path_len - 2);
 	/* -1 since last byte is buf[0] which is sent below (path) */
 	iov[0].iov_len--;
-	if (uni_path_len % 8 != 0) {
-		copy_size = uni_path_len / 8 * 8;
-		if (copy_size < uni_path_len)
-			copy_size += 8;
-
-		copy_path = kzalloc(copy_size, GFP_KERNEL);
-		if (!copy_path)
-			return -ENOMEM;
-		memcpy((char *)copy_path, (const char *)path,
-			uni_path_len);
+
+	req->NameOffset = cpu_to_le16(sizeof(struct smb2_create_req) - 4);
+
+	/* [MS-SMB2] 2.2.13 NameOffset:
+	 * If SMB2_FLAGS_DFS_OPERATIONS is set in the Flags field of
+	 * the SMB2 header, the file name includes a prefix that will
+	 * be processed during DFS name normalization as specified in
+	 * section 3.3.5.9. Otherwise, the file name is relative to
+	 * the share that is identified by the TreeId in the SMB2
+	 * header.
+	 */
+	if (tcon->share_flags & SHI1005_FLAGS_DFS) {
+		int name_len;
+
+		req->hdr.sync_hdr.Flags |= SMB2_FLAGS_DFS_OPERATIONS;
+		rc = alloc_path_with_tree_prefix(&copy_path, &copy_size,
+						 &name_len,
+						 tcon->treeName, path);
+		if (rc)
+			return rc;
+		req->NameLength = cpu_to_le16(name_len * 2);
 		uni_path_len = copy_size;
 		path = copy_path;
+	} else {
+		uni_path_len = (2 * UniStrnlen((wchar_t *)path, PATH_MAX)) + 2;
+		/* MUST set path len (NameLength) to 0 opening root of share */
+		req->NameLength = cpu_to_le16(uni_path_len - 2);
+		if (uni_path_len % 8 != 0) {
+			copy_size = roundup(uni_path_len, 8);
+			copy_path = kzalloc(copy_size, GFP_KERNEL);
+			if (!copy_path)
+				return -ENOMEM;
+			memcpy((char *)copy_path, (const char *)path,
+			       uni_path_len);
+			uni_path_len = copy_size;
+			path = copy_path;
+		}
 	}
 
 	iov[1].iov_len = uni_path_len;

From 9d49640a21bffd730a6ebf2a0032e022f7caf84a Mon Sep 17 00:00:00 2001
From: Aurelien Aptel <aaptel@suse.com>
Date: Mon, 13 Feb 2017 16:16:49 +0100
Subject: [PATCH 0608/1911] CIFS: implement get_dfs_refer for SMB2+

in SMB2+ the get_dfs_refer operation uses a FSCTL. The request can be
made on any Tree Connection according to the specs. Since Samba only
accepted it on an IPC connection until recently, try that first.

https://lists.samba.org/archive/samba-technical/2017-February/118859.html

3.2.4.20.3 Application Requests DFS Referral Information:
> The client MUST search for an existing Session and TreeConnect to any
> share on the server identified by ServerName for the user identified by
> UserCredentials. If no Session and TreeConnect are found, the client
> MUST establish a new Session and TreeConnect to IPC$ on the target
> server as described in section 3.2.4.2 using the supplied ServerName and
> UserCredentials.

Signed-off-by: Aurelien Aptel <aaptel@suse.com>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/smb2ops.c | 101 ++++++++++++++++++++++++++++++++++++++++++++++
 fs/cifs/smb2pdu.h |   8 ++++
 2 files changed, 109 insertions(+)

diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index eba00cd3bd16f3..b360c381b00ec9 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -1104,6 +1104,103 @@ smb2_new_lease_key(struct cifs_fid *fid)
 	generate_random_uuid(fid->lease_key);
 }
 
+static int
+smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses,
+		   const char *search_name,
+		   struct dfs_info3_param **target_nodes,
+		   unsigned int *num_of_nodes,
+		   const struct nls_table *nls_codepage, int remap)
+{
+	int rc;
+	__le16 *utf16_path = NULL;
+	int utf16_path_len = 0;
+	struct cifs_tcon *tcon;
+	struct fsctl_get_dfs_referral_req *dfs_req = NULL;
+	struct get_dfs_referral_rsp *dfs_rsp = NULL;
+	u32 dfs_req_size = 0, dfs_rsp_size = 0;
+
+	cifs_dbg(FYI, "smb2_get_dfs_refer path <%s>\n", search_name);
+
+	/*
+	 * Use any tcon from the current session. Here, the first one.
+	 */
+	spin_lock(&cifs_tcp_ses_lock);
+	tcon = list_first_entry_or_null(&ses->tcon_list, struct cifs_tcon,
+					tcon_list);
+	if (tcon)
+		tcon->tc_count++;
+	spin_unlock(&cifs_tcp_ses_lock);
+
+	if (!tcon) {
+		cifs_dbg(VFS, "session %p has no tcon available for a dfs referral request\n",
+			 ses);
+		rc = -ENOTCONN;
+		goto out;
+	}
+
+	utf16_path = cifs_strndup_to_utf16(search_name, PATH_MAX,
+					   &utf16_path_len,
+					   nls_codepage, remap);
+	if (!utf16_path) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	dfs_req_size = sizeof(*dfs_req) + utf16_path_len;
+	dfs_req = kzalloc(dfs_req_size, GFP_KERNEL);
+	if (!dfs_req) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	/* Highest DFS referral version understood */
+	dfs_req->MaxReferralLevel = DFS_VERSION;
+
+	/* Path to resolve in an UTF-16 null-terminated string */
+	memcpy(dfs_req->RequestFileName, utf16_path, utf16_path_len);
+
+	do {
+		/* try first with IPC */
+		rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID,
+				FSCTL_DFS_GET_REFERRALS,
+				true /* is_fsctl */, true /* use_ipc */,
+				(char *)dfs_req, dfs_req_size,
+				(char **)&dfs_rsp, &dfs_rsp_size);
+		if (rc == -ENOTCONN) {
+			/* try with normal tcon */
+			rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID,
+					FSCTL_DFS_GET_REFERRALS,
+					true /* is_fsctl */, false /*use_ipc*/,
+					(char *)dfs_req, dfs_req_size,
+					(char **)&dfs_rsp, &dfs_rsp_size);
+		}
+	} while (rc == -EAGAIN);
+
+	if (rc) {
+		cifs_dbg(VFS, "ioctl error in smb2_get_dfs_refer rc=%d\n", rc);
+		goto out;
+	}
+
+	rc = parse_dfs_referrals(dfs_rsp, dfs_rsp_size,
+				 num_of_nodes, target_nodes,
+				 nls_codepage, remap, search_name,
+				 true /* is_unicode */);
+	if (rc) {
+		cifs_dbg(VFS, "parse error in smb2_get_dfs_refer rc=%d\n", rc);
+		goto out;
+	}
+
+ out:
+	if (tcon) {
+		spin_lock(&cifs_tcp_ses_lock);
+		tcon->tc_count--;
+		spin_unlock(&cifs_tcp_ses_lock);
+	}
+	kfree(utf16_path);
+	kfree(dfs_req);
+	kfree(dfs_rsp);
+	return rc;
+}
 #define SMB2_SYMLINK_STRUCT_SIZE \
 	(sizeof(struct smb2_err_rsp) - 1 + sizeof(struct smb2_symlink_err_rsp))
 
@@ -2283,6 +2380,7 @@ struct smb_version_operations smb20_operations = {
 	.clone_range = smb2_clone_range,
 	.wp_retry_size = smb2_wp_retry_size,
 	.dir_needs_close = smb2_dir_needs_close,
+	.get_dfs_refer = smb2_get_dfs_refer,
 };
 
 struct smb_version_operations smb21_operations = {
@@ -2364,6 +2462,7 @@ struct smb_version_operations smb21_operations = {
 	.wp_retry_size = smb2_wp_retry_size,
 	.dir_needs_close = smb2_dir_needs_close,
 	.enum_snapshots = smb3_enum_snapshots,
+	.get_dfs_refer = smb2_get_dfs_refer,
 };
 
 struct smb_version_operations smb30_operations = {
@@ -2455,6 +2554,7 @@ struct smb_version_operations smb30_operations = {
 	.free_transform_rq = smb3_free_transform_rq,
 	.is_transform_hdr = smb3_is_transform_hdr,
 	.receive_transform = smb3_receive_transform,
+	.get_dfs_refer = smb2_get_dfs_refer,
 };
 
 #ifdef CONFIG_CIFS_SMB311
@@ -2547,6 +2647,7 @@ struct smb_version_operations smb311_operations = {
 	.free_transform_rq = smb3_free_transform_rq,
 	.is_transform_hdr = smb3_is_transform_hdr,
 	.receive_transform = smb3_receive_transform,
+	.get_dfs_refer = smb2_get_dfs_refer,
 };
 #endif /* CIFS_SMB311 */
 
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index c03b252501a155..18700fd25a0b3f 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -695,6 +695,14 @@ struct fsctl_get_integrity_information_rsp {
 /* Integrity flags for above */
 #define FSCTL_INTEGRITY_FLAG_CHECKSUM_ENFORCEMENT_OFF	0x00000001
 
+/* See MS-DFSC 2.2.2 */
+struct fsctl_get_dfs_referral_req {
+	__le16 MaxReferralLevel;
+	__u8 RequestFileName[];
+} __packed;
+
+/* DFS response is struct get_dfs_refer_rsp */
+
 /* See MS-SMB2 2.2.31.3 */
 struct network_resiliency_req {
 	__le32 Timeout;

From 165a5e22fafb127ecb5914e12e8c32a1f0d3f820 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 8 Feb 2017 08:05:56 +0100
Subject: [PATCH 0609/1911] block: Move bdi_unregister() to del_gendisk()

Commit 6cd18e711dd8 "block: destroy bdi before blockdev is
unregistered." moved bdi unregistration (at that time through
bdi_destroy()) from blk_release_queue() to blk_cleanup_queue() because
it needs to happen before blk_unregister_region() call in del_gendisk()
for MD. SCSI though will free up the device number from sd_remove()
called through a maze of callbacks from device_del() in
__scsi_remove_device() before blk_cleanup_queue() and thus similar races
as described in 6cd18e711dd8 can happen for SCSI as well as reported by
Omar [1].

Moving bdi_unregister() to del_gendisk() works for MD and fixes the
problem for SCSI since del_gendisk() gets called from sd_remove() before
freeing the device number.

This also makes device_add_disk() (calling bdi_register_owner()) more
symmetric with del_gendisk().

[1] http://marc.info/?l=linux-block&m=148554717109098&w=2

Tested-by: Lekshmi Pillai <lekshmicpillai@in.ibm.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jan Kara <jack@suse.cz>
Tested-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-core.c | 1 -
 block/genhd.c    | 5 +++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index b9e857f4afe85f..1086dac8724c99 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -578,7 +578,6 @@ void blk_cleanup_queue(struct request_queue *q)
 		q->queue_lock = &q->__queue_lock;
 	spin_unlock_irq(lock);
 
-	bdi_unregister(q->backing_dev_info);
 	put_disk_devt(q->disk_devt);
 
 	/* @q is and will stay empty, shutdown and put */
diff --git a/block/genhd.c b/block/genhd.c
index 2f444b87a5f244..b26a5ea115d00b 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -681,6 +681,11 @@ void del_gendisk(struct gendisk *disk)
 	disk->flags &= ~GENHD_FL_UP;
 
 	sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi");
+	/*
+	 * Unregister bdi before releasing device numbers (as they can get
+	 * reused and we'd get clashes in sysfs).
+	 */
+	bdi_unregister(disk->queue->backing_dev_info);
 	blk_unregister_queue(disk);
 	blk_unregister_region(disk_devt(disk), disk->minors);
 

From e148bd17f48bd17fca2f4f089ec879fa6e47e34c Mon Sep 17 00:00:00 2001
From: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Date: Tue, 14 Feb 2017 14:46:42 +0530
Subject: [PATCH 0610/1911] powerpc: Emulation support for load/store
 instructions on LE

emulate_step() uses a number of underlying kernel functions that were
initially not enabled for LE. This has been rectified since. So, fix
emulate_step() for LE for the corresponding instructions.

Cc: stable@vger.kernel.org # v3.18+
Reported-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/lib/sstep.c | 20 --------------------
 1 file changed, 20 deletions(-)

diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index 846dba2c636000..9c542ec70c5bc8 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -1799,8 +1799,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
 		goto instr_done;
 
 	case LARX:
-		if (regs->msr & MSR_LE)
-			return 0;
 		if (op.ea & (size - 1))
 			break;		/* can't handle misaligned */
 		if (!address_ok(regs, op.ea, size))
@@ -1823,8 +1821,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
 		goto ldst_done;
 
 	case STCX:
-		if (regs->msr & MSR_LE)
-			return 0;
 		if (op.ea & (size - 1))
 			break;		/* can't handle misaligned */
 		if (!address_ok(regs, op.ea, size))
@@ -1849,8 +1845,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
 		goto ldst_done;
 
 	case LOAD:
-		if (regs->msr & MSR_LE)
-			return 0;
 		err = read_mem(&regs->gpr[op.reg], op.ea, size, regs);
 		if (!err) {
 			if (op.type & SIGNEXT)
@@ -1862,8 +1856,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
 
 #ifdef CONFIG_PPC_FPU
 	case LOAD_FP:
-		if (regs->msr & MSR_LE)
-			return 0;
 		if (size == 4)
 			err = do_fp_load(op.reg, do_lfs, op.ea, size, regs);
 		else
@@ -1872,15 +1864,11 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
 #endif
 #ifdef CONFIG_ALTIVEC
 	case LOAD_VMX:
-		if (regs->msr & MSR_LE)
-			return 0;
 		err = do_vec_load(op.reg, do_lvx, op.ea & ~0xfUL, regs);
 		goto ldst_done;
 #endif
 #ifdef CONFIG_VSX
 	case LOAD_VSX:
-		if (regs->msr & MSR_LE)
-			return 0;
 		err = do_vsx_load(op.reg, do_lxvd2x, op.ea, regs);
 		goto ldst_done;
 #endif
@@ -1903,8 +1891,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
 		goto instr_done;
 
 	case STORE:
-		if (regs->msr & MSR_LE)
-			return 0;
 		if ((op.type & UPDATE) && size == sizeof(long) &&
 		    op.reg == 1 && op.update_reg == 1 &&
 		    !(regs->msr & MSR_PR) &&
@@ -1917,8 +1903,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
 
 #ifdef CONFIG_PPC_FPU
 	case STORE_FP:
-		if (regs->msr & MSR_LE)
-			return 0;
 		if (size == 4)
 			err = do_fp_store(op.reg, do_stfs, op.ea, size, regs);
 		else
@@ -1927,15 +1911,11 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
 #endif
 #ifdef CONFIG_ALTIVEC
 	case STORE_VMX:
-		if (regs->msr & MSR_LE)
-			return 0;
 		err = do_vec_store(op.reg, do_stvx, op.ea & ~0xfUL, regs);
 		goto ldst_done;
 #endif
 #ifdef CONFIG_VSX
 	case STORE_VSX:
-		if (regs->msr & MSR_LE)
-			return 0;
 		err = do_vsx_store(op.reg, do_stxvd2x, op.ea, regs);
 		goto ldst_done;
 #endif

From 4ceae137bdab2232e57b2e6fd5631a22a0160938 Mon Sep 17 00:00:00 2001
From: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Date: Tue, 14 Feb 2017 14:46:43 +0530
Subject: [PATCH 0611/1911] powerpc: emulate_step() tests for load/store
 instructions

Add new selftest that test emulate_step for Normal, Floating Point,
Vector and Vector Scalar - load/store instructions. Test should run
at boot time if CONFIG_KPROBES_SANITY_TEST and CONFIG_PPC64 is set.

Sample log:

  emulate_step_test: ld             : PASS
  emulate_step_test: lwz            : PASS
  emulate_step_test: lwzx           : PASS
  emulate_step_test: std            : PASS
  emulate_step_test: ldarx / stdcx. : PASS
  emulate_step_test: lfsx           : PASS
  emulate_step_test: stfsx          : PASS
  emulate_step_test: lfdx           : PASS
  emulate_step_test: stfdx          : PASS
  emulate_step_test: lvx            : PASS
  emulate_step_test: stvx           : PASS
  emulate_step_test: lxvd2x         : PASS
  emulate_step_test: stxvd2x        : PASS

Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
[mpe: Drop start/complete lines, make it all __init]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/ppc-opcode.h |   7 +
 arch/powerpc/lib/Makefile             |   1 +
 arch/powerpc/lib/test_emulate_step.c  | 434 ++++++++++++++++++++++++++
 3 files changed, 442 insertions(+)
 create mode 100644 arch/powerpc/lib/test_emulate_step.c

diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index d99bd442aacbe5..e7d6d86563eeda 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -284,6 +284,13 @@
 #define PPC_INST_BRANCH_COND		0x40800000
 #define PPC_INST_LBZCIX			0x7c0006aa
 #define PPC_INST_STBCIX			0x7c0007aa
+#define PPC_INST_LWZX			0x7c00002e
+#define PPC_INST_LFSX			0x7c00042e
+#define PPC_INST_STFSX			0x7c00052e
+#define PPC_INST_LFDX			0x7c0004ae
+#define PPC_INST_STFDX			0x7c0005ae
+#define PPC_INST_LVX			0x7c0000ce
+#define PPC_INST_STVX			0x7c0001ce
 
 /* macros to insert fields into opcodes */
 #define ___PPC_RA(a)	(((a) & 0x1f) << 16)
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 0e649d72fe8d0d..2b5e09020cfe37 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -20,6 +20,7 @@ obj64-y	+= copypage_64.o copyuser_64.o usercopy_64.o mem_64.o hweight_64.o \
 
 obj64-$(CONFIG_SMP)	+= locks.o
 obj64-$(CONFIG_ALTIVEC)	+= vmx-helper.o
+obj64-$(CONFIG_KPROBES_SANITY_TEST) += test_emulate_step.o
 
 obj-y			+= checksum_$(BITS).o checksum_wrappers.o
 
diff --git a/arch/powerpc/lib/test_emulate_step.c b/arch/powerpc/lib/test_emulate_step.c
new file mode 100644
index 00000000000000..2534c14475546a
--- /dev/null
+++ b/arch/powerpc/lib/test_emulate_step.c
@@ -0,0 +1,434 @@
+/*
+ * Simple sanity test for emulate_step load/store instructions.
+ *
+ * Copyright IBM Corp. 2016
+ *
+ * This program is free software;  you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) "emulate_step_test: " fmt
+
+#include <linux/ptrace.h>
+#include <asm/sstep.h>
+#include <asm/ppc-opcode.h>
+
+#define IMM_L(i)		((uintptr_t)(i) & 0xffff)
+
+/*
+ * Defined with TEST_ prefix so it does not conflict with other
+ * definitions.
+ */
+#define TEST_LD(r, base, i)	(PPC_INST_LD | ___PPC_RT(r) |		\
+					___PPC_RA(base) | IMM_L(i))
+#define TEST_LWZ(r, base, i)	(PPC_INST_LWZ | ___PPC_RT(r) |		\
+					___PPC_RA(base) | IMM_L(i))
+#define TEST_LWZX(t, a, b)	(PPC_INST_LWZX | ___PPC_RT(t) |		\
+					___PPC_RA(a) | ___PPC_RB(b))
+#define TEST_STD(r, base, i)	(PPC_INST_STD | ___PPC_RS(r) |		\
+					___PPC_RA(base) | ((i) & 0xfffc))
+#define TEST_LDARX(t, a, b, eh)	(PPC_INST_LDARX | ___PPC_RT(t) |	\
+					___PPC_RA(a) | ___PPC_RB(b) |	\
+					__PPC_EH(eh))
+#define TEST_STDCX(s, a, b)	(PPC_INST_STDCX | ___PPC_RS(s) |	\
+					___PPC_RA(a) | ___PPC_RB(b))
+#define TEST_LFSX(t, a, b)	(PPC_INST_LFSX | ___PPC_RT(t) |		\
+					___PPC_RA(a) | ___PPC_RB(b))
+#define TEST_STFSX(s, a, b)	(PPC_INST_STFSX | ___PPC_RS(s) |	\
+					___PPC_RA(a) | ___PPC_RB(b))
+#define TEST_LFDX(t, a, b)	(PPC_INST_LFDX | ___PPC_RT(t) |		\
+					___PPC_RA(a) | ___PPC_RB(b))
+#define TEST_STFDX(s, a, b)	(PPC_INST_STFDX | ___PPC_RS(s) |	\
+					___PPC_RA(a) | ___PPC_RB(b))
+#define TEST_LVX(t, a, b)	(PPC_INST_LVX | ___PPC_RT(t) |		\
+					___PPC_RA(a) | ___PPC_RB(b))
+#define TEST_STVX(s, a, b)	(PPC_INST_STVX | ___PPC_RS(s) |		\
+					___PPC_RA(a) | ___PPC_RB(b))
+#define TEST_LXVD2X(s, a, b)	(PPC_INST_LXVD2X | VSX_XX1((s), R##a, R##b))
+#define TEST_STXVD2X(s, a, b)	(PPC_INST_STXVD2X | VSX_XX1((s), R##a, R##b))
+
+
+static void __init init_pt_regs(struct pt_regs *regs)
+{
+	static unsigned long msr;
+	static bool msr_cached;
+
+	memset(regs, 0, sizeof(struct pt_regs));
+
+	if (likely(msr_cached)) {
+		regs->msr = msr;
+		return;
+	}
+
+	asm volatile("mfmsr %0" : "=r"(regs->msr));
+
+	regs->msr |= MSR_FP;
+	regs->msr |= MSR_VEC;
+	regs->msr |= MSR_VSX;
+
+	msr = regs->msr;
+	msr_cached = true;
+}
+
+static void __init show_result(char *ins, char *result)
+{
+	pr_info("%-14s : %s\n", ins, result);
+}
+
+static void __init test_ld(void)
+{
+	struct pt_regs regs;
+	unsigned long a = 0x23;
+	int stepped = -1;
+
+	init_pt_regs(&regs);
+	regs.gpr[3] = (unsigned long) &a;
+
+	/* ld r5, 0(r3) */
+	stepped = emulate_step(&regs, TEST_LD(5, 3, 0));
+
+	if (stepped == 1 && regs.gpr[5] == a)
+		show_result("ld", "PASS");
+	else
+		show_result("ld", "FAIL");
+}
+
+static void __init test_lwz(void)
+{
+	struct pt_regs regs;
+	unsigned int a = 0x4545;
+	int stepped = -1;
+
+	init_pt_regs(&regs);
+	regs.gpr[3] = (unsigned long) &a;
+
+	/* lwz r5, 0(r3) */
+	stepped = emulate_step(&regs, TEST_LWZ(5, 3, 0));
+
+	if (stepped == 1 && regs.gpr[5] == a)
+		show_result("lwz", "PASS");
+	else
+		show_result("lwz", "FAIL");
+}
+
+static void __init test_lwzx(void)
+{
+	struct pt_regs regs;
+	unsigned int a[3] = {0x0, 0x0, 0x1234};
+	int stepped = -1;
+
+	init_pt_regs(&regs);
+	regs.gpr[3] = (unsigned long) a;
+	regs.gpr[4] = 8;
+	regs.gpr[5] = 0x8765;
+
+	/* lwzx r5, r3, r4 */
+	stepped = emulate_step(&regs, TEST_LWZX(5, 3, 4));
+	if (stepped == 1 && regs.gpr[5] == a[2])
+		show_result("lwzx", "PASS");
+	else
+		show_result("lwzx", "FAIL");
+}
+
+static void __init test_std(void)
+{
+	struct pt_regs regs;
+	unsigned long a = 0x1234;
+	int stepped = -1;
+
+	init_pt_regs(&regs);
+	regs.gpr[3] = (unsigned long) &a;
+	regs.gpr[5] = 0x5678;
+
+	/* std r5, 0(r3) */
+	stepped = emulate_step(&regs, TEST_STD(5, 3, 0));
+	if (stepped == 1 || regs.gpr[5] == a)
+		show_result("std", "PASS");
+	else
+		show_result("std", "FAIL");
+}
+
+static void __init test_ldarx_stdcx(void)
+{
+	struct pt_regs regs;
+	unsigned long a = 0x1234;
+	int stepped = -1;
+	unsigned long cr0_eq = 0x1 << 29; /* eq bit of CR0 */
+
+	init_pt_regs(&regs);
+	asm volatile("mfcr %0" : "=r"(regs.ccr));
+
+
+	/*** ldarx ***/
+
+	regs.gpr[3] = (unsigned long) &a;
+	regs.gpr[4] = 0;
+	regs.gpr[5] = 0x5678;
+
+	/* ldarx r5, r3, r4, 0 */
+	stepped = emulate_step(&regs, TEST_LDARX(5, 3, 4, 0));
+
+	/*
+	 * Don't touch 'a' here. Touching 'a' can do Load/store
+	 * of 'a' which result in failure of subsequent stdcx.
+	 * Instead, use hardcoded value for comparison.
+	 */
+	if (stepped <= 0 || regs.gpr[5] != 0x1234) {
+		show_result("ldarx / stdcx.", "FAIL (ldarx)");
+		return;
+	}
+
+
+	/*** stdcx. ***/
+
+	regs.gpr[5] = 0x9ABC;
+
+	/* stdcx. r5, r3, r4 */
+	stepped = emulate_step(&regs, TEST_STDCX(5, 3, 4));
+
+	/*
+	 * Two possible scenarios that indicates successful emulation
+	 * of stdcx. :
+	 *  1. Reservation is active and store is performed. In this
+	 *     case cr0.eq bit will be set to 1.
+	 *  2. Reservation is not active and store is not performed.
+	 *     In this case cr0.eq bit will be set to 0.
+	 */
+	if (stepped == 1 && ((regs.gpr[5] == a && (regs.ccr & cr0_eq))
+			|| (regs.gpr[5] != a && !(regs.ccr & cr0_eq))))
+		show_result("ldarx / stdcx.", "PASS");
+	else
+		show_result("ldarx / stdcx.", "FAIL (stdcx.)");
+}
+
+#ifdef CONFIG_PPC_FPU
+static void __init test_lfsx_stfsx(void)
+{
+	struct pt_regs regs;
+	union {
+		float a;
+		int b;
+	} c;
+	int cached_b;
+	int stepped = -1;
+
+	init_pt_regs(&regs);
+
+
+	/*** lfsx ***/
+
+	c.a = 123.45;
+	cached_b = c.b;
+
+	regs.gpr[3] = (unsigned long) &c.a;
+	regs.gpr[4] = 0;
+
+	/* lfsx frt10, r3, r4 */
+	stepped = emulate_step(&regs, TEST_LFSX(10, 3, 4));
+
+	if (stepped == 1)
+		show_result("lfsx", "PASS");
+	else
+		show_result("lfsx", "FAIL");
+
+
+	/*** stfsx ***/
+
+	c.a = 678.91;
+
+	/* stfsx frs10, r3, r4 */
+	stepped = emulate_step(&regs, TEST_STFSX(10, 3, 4));
+
+	if (stepped == 1 && c.b == cached_b)
+		show_result("stfsx", "PASS");
+	else
+		show_result("stfsx", "FAIL");
+}
+
+static void __init test_lfdx_stfdx(void)
+{
+	struct pt_regs regs;
+	union {
+		double a;
+		long b;
+	} c;
+	long cached_b;
+	int stepped = -1;
+
+	init_pt_regs(&regs);
+
+
+	/*** lfdx ***/
+
+	c.a = 123456.78;
+	cached_b = c.b;
+
+	regs.gpr[3] = (unsigned long) &c.a;
+	regs.gpr[4] = 0;
+
+	/* lfdx frt10, r3, r4 */
+	stepped = emulate_step(&regs, TEST_LFDX(10, 3, 4));
+
+	if (stepped == 1)
+		show_result("lfdx", "PASS");
+	else
+		show_result("lfdx", "FAIL");
+
+
+	/*** stfdx ***/
+
+	c.a = 987654.32;
+
+	/* stfdx frs10, r3, r4 */
+	stepped = emulate_step(&regs, TEST_STFDX(10, 3, 4));
+
+	if (stepped == 1 && c.b == cached_b)
+		show_result("stfdx", "PASS");
+	else
+		show_result("stfdx", "FAIL");
+}
+#else
+static void __init test_lfsx_stfsx(void)
+{
+	show_result("lfsx", "SKIP (CONFIG_PPC_FPU is not set)");
+	show_result("stfsx", "SKIP (CONFIG_PPC_FPU is not set)");
+}
+
+static void __init test_lfdx_stfdx(void)
+{
+	show_result("lfdx", "SKIP (CONFIG_PPC_FPU is not set)");
+	show_result("stfdx", "SKIP (CONFIG_PPC_FPU is not set)");
+}
+#endif /* CONFIG_PPC_FPU */
+
+#ifdef CONFIG_ALTIVEC
+static void __init test_lvx_stvx(void)
+{
+	struct pt_regs regs;
+	union {
+		vector128 a;
+		u32 b[4];
+	} c;
+	u32 cached_b[4];
+	int stepped = -1;
+
+	init_pt_regs(&regs);
+
+
+	/*** lvx ***/
+
+	cached_b[0] = c.b[0] = 923745;
+	cached_b[1] = c.b[1] = 2139478;
+	cached_b[2] = c.b[2] = 9012;
+	cached_b[3] = c.b[3] = 982134;
+
+	regs.gpr[3] = (unsigned long) &c.a;
+	regs.gpr[4] = 0;
+
+	/* lvx vrt10, r3, r4 */
+	stepped = emulate_step(&regs, TEST_LVX(10, 3, 4));
+
+	if (stepped == 1)
+		show_result("lvx", "PASS");
+	else
+		show_result("lvx", "FAIL");
+
+
+	/*** stvx ***/
+
+	c.b[0] = 4987513;
+	c.b[1] = 84313948;
+	c.b[2] = 71;
+	c.b[3] = 498532;
+
+	/* stvx vrs10, r3, r4 */
+	stepped = emulate_step(&regs, TEST_STVX(10, 3, 4));
+
+	if (stepped == 1 && cached_b[0] == c.b[0] && cached_b[1] == c.b[1] &&
+	    cached_b[2] == c.b[2] && cached_b[3] == c.b[3])
+		show_result("stvx", "PASS");
+	else
+		show_result("stvx", "FAIL");
+}
+#else
+static void __init test_lvx_stvx(void)
+{
+	show_result("lvx", "SKIP (CONFIG_ALTIVEC is not set)");
+	show_result("stvx", "SKIP (CONFIG_ALTIVEC is not set)");
+}
+#endif /* CONFIG_ALTIVEC */
+
+#ifdef CONFIG_VSX
+static void __init test_lxvd2x_stxvd2x(void)
+{
+	struct pt_regs regs;
+	union {
+		vector128 a;
+		u32 b[4];
+	} c;
+	u32 cached_b[4];
+	int stepped = -1;
+
+	init_pt_regs(&regs);
+
+
+	/*** lxvd2x ***/
+
+	cached_b[0] = c.b[0] = 18233;
+	cached_b[1] = c.b[1] = 34863571;
+	cached_b[2] = c.b[2] = 834;
+	cached_b[3] = c.b[3] = 6138911;
+
+	regs.gpr[3] = (unsigned long) &c.a;
+	regs.gpr[4] = 0;
+
+	/* lxvd2x vsr39, r3, r4 */
+	stepped = emulate_step(&regs, TEST_LXVD2X(39, 3, 4));
+
+	if (stepped == 1)
+		show_result("lxvd2x", "PASS");
+	else
+		show_result("lxvd2x", "FAIL");
+
+
+	/*** stxvd2x ***/
+
+	c.b[0] = 21379463;
+	c.b[1] = 87;
+	c.b[2] = 374234;
+	c.b[3] = 4;
+
+	/* stxvd2x vsr39, r3, r4 */
+	stepped = emulate_step(&regs, TEST_STXVD2X(39, 3, 4));
+
+	if (stepped == 1 && cached_b[0] == c.b[0] && cached_b[1] == c.b[1] &&
+	    cached_b[2] == c.b[2] && cached_b[3] == c.b[3])
+		show_result("stxvd2x", "PASS");
+	else
+		show_result("stxvd2x", "FAIL");
+}
+#else
+static void __init test_lxvd2x_stxvd2x(void)
+{
+	show_result("lxvd2x", "SKIP (CONFIG_VSX is not set)");
+	show_result("stxvd2x", "SKIP (CONFIG_VSX is not set)");
+}
+#endif /* CONFIG_VSX */
+
+static int __init test_emulate_step(void)
+{
+	test_ld();
+	test_lwz();
+	test_lwzx();
+	test_std();
+	test_ldarx_stdcx();
+	test_lfsx_stfsx();
+	test_lfdx_stfdx();
+	test_lvx_stvx();
+	test_lxvd2x_stxvd2x();
+
+	return 0;
+}
+late_initcall(test_emulate_step);

From 7a70d7288c926ae88e0c773fbb506aa374e99c2d Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@ozlabs.org>
Date: Mon, 27 Feb 2017 14:32:41 +1100
Subject: [PATCH 0612/1911] powerpc/64: Invalidate process table caching after
 setting process table

The POWER9 MMU reads and caches entries from the process table.
When we kexec from one kernel to another, the second kernel sets
its process table pointer but doesn't currently do anything to
make the CPU invalidate any cached entries from the old process table.
This adds a tlbie (TLB invalidate entry) instruction with parameters
to invalidate caching of the process table after the new process
table is installed.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/mm/pgtable-radix.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index feeda90cd06d5f..01c94f141164ae 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -186,6 +186,10 @@ static void __init radix_init_pgtable(void)
 	 */
 	register_process_table(__pa(process_tb), 0, PRTB_SIZE_SHIFT - 12);
 	pr_info("Process table %p and radix root for kernel: %p\n", process_tb, init_mm.pgd);
+	asm volatile("ptesync" : : : "memory");
+	asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : :
+		     "r" (TLBIEL_INVAL_SET_LPID), "r" (0));
+	asm volatile("eieio; tlbsync; ptesync" : : : "memory");
 }
 
 static void __init radix_init_partition_table(void)

From 424f8acd328a111319ae30bf384e5dfb9bc8f8cb Mon Sep 17 00:00:00 2001
From: "Gautham R. Shenoy" <ego@linux.vnet.ibm.com>
Date: Mon, 27 Feb 2017 11:10:07 +0530
Subject: [PATCH 0613/1911] powerpc/powernv: Fix bug due to labeling ambiguity
 in power_enter_stop

Commit 09206b600c76 ("powernv: Pass PSSCR value and mask to
power9_idle_stop") added additional code in power_enter_stop() to
distinguish between stop requests whose PSSCR had ESL=EC=1 from those
which did not. When ESL=EC=1, we do a forward-jump to a location
labelled by "1", which had the code to handle the ESL=EC=1 case.

Unfortunately just a couple of instructions before this label, is the
macro IDLE_STATE_ENTER_SEQ() which also has a label "1" in its
expansion.

As a result, the current code can result in directly executing stop
instruction for deep stop requests with PSSCR ESL=EC=1, without saving
the hypervisor state.

Fix this BUG by labeling the location that handles ESL=EC=1 case with
a more descriptive label ".Lhandle_esl_ec_set" (local label suggestion
a la .Lxx from Anton Blanchard).

While at it, rename the label "2" labelling the location of the code
handling entry into deep stop states with ".Lhandle_deep_stop".

For a good measure, change the label in IDLE_STATE_ENTER_SEQ() macro
to an not-so commonly used value in order to avoid similar mishaps in
the future.

Fixes: 09206b600c76 ("powernv: Pass PSSCR value and mask to power9_idle_stop")
Signed-off-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/cpuidle.h |  4 ++--
 arch/powerpc/kernel/idle_book3s.S  | 10 ++++++----
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h
index fd321eb423cb44..155731557c9bc0 100644
--- a/arch/powerpc/include/asm/cpuidle.h
+++ b/arch/powerpc/include/asm/cpuidle.h
@@ -70,8 +70,8 @@ static inline void report_invalid_psscr_val(u64 psscr_val, int err)
 	std	r0,0(r1);					\
 	ptesync;						\
 	ld	r0,0(r1);					\
-1:	cmpd	cr0,r0,r0;					\
-	bne	1b;						\
+236:	cmpd	cr0,r0,r0;					\
+	bne	236b;						\
 	IDLE_INST;						\
 
 #define	IDLE_STATE_ENTER_SEQ_NORET(IDLE_INST)			\
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index 5f61cc0349c063..99572873667707 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -276,19 +276,21 @@ power_enter_stop:
  */
 	andis.   r4,r3,PSSCR_EC_ESL_MASK_SHIFTED
 	clrldi   r3,r3,60 /* r3 = Bits[60:63] = Requested Level (RL) */
-	bne	 1f
+	bne	 .Lhandle_esl_ec_set
 	IDLE_STATE_ENTER_SEQ(PPC_STOP)
 	li	r3,0  /* Since we didn't lose state, return 0 */
 	b 	pnv_wakeup_noloss
+
+.Lhandle_esl_ec_set:
 /*
  * Check if the requested state is a deep idle state.
  */
-1:	LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state)
+	LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state)
 	ld	r4,ADDROFF(pnv_first_deep_stop_state)(r5)
 	cmpd	r3,r4
-	bge	2f
+	bge	.Lhandle_deep_stop
 	IDLE_STATE_ENTER_SEQ_NORET(PPC_STOP)
-2:
+.Lhandle_deep_stop:
 /*
  * Entering deep idle state.
  * Clear thread bit in PACA_CORE_IDLE_STATE, save SPRs to

From a6d8a21596df041f36f4c2ccc260c459e3e851f1 Mon Sep 17 00:00:00 2001
From: Sachin Sant <sachinp@linux.vnet.ibm.com>
Date: Sun, 26 Feb 2017 11:38:39 +0530
Subject: [PATCH 0614/1911] selftest/powerpc: Fix false failures for skipped
 tests

Tests under alignment subdirectory are skipped when executed on previous
generation hardware, but harness still marks them as failed.

  test: test_copy_unaligned
  tags: git_version:unknown
  [SKIP] Test skipped on line 26
  skip: test_copy_unaligned
  selftests: copy_unaligned [FAIL]

The MAGIC_SKIP_RETURN_VALUE value assigned to rc variable is retained till
the program exit which causes the test to be marked as failed.

This patch resets the value before returning to the main() routine.
With this patch the test o/p is as follows:

  test: test_copy_unaligned
  tags: git_version:unknown
  [SKIP] Test skipped on line 26
  skip: test_copy_unaligned
  selftests: copy_unaligned [PASS]

Signed-off-by: Sachin Sant <sachinp@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 tools/testing/selftests/powerpc/harness.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/powerpc/harness.c b/tools/testing/selftests/powerpc/harness.c
index 248a820048dfe8..66d31de60b9ae9 100644
--- a/tools/testing/selftests/powerpc/harness.c
+++ b/tools/testing/selftests/powerpc/harness.c
@@ -114,9 +114,11 @@ int test_harness(int (test_function)(void), char *name)
 
 	rc = run_test(test_function, name);
 
-	if (rc == MAGIC_SKIP_RETURN_VALUE)
+	if (rc == MAGIC_SKIP_RETURN_VALUE) {
 		test_skip(name);
-	else
+		/* so that skipped test is not marked as failed */
+		rc = 0;
+	} else
 		test_finish(name, rc);
 
 	return rc;

From 4dc831aa88132f835cefe876aa0206977c4d7710 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sun, 27 Nov 2016 13:46:20 +1100
Subject: [PATCH 0615/1911] powerpc: Fix compiling a BE kernel with a
 powerpc64le toolchain

GCC can compile with either endian, but the default ABI version is set
based on the default endianness of the toolchain. Alan Modra says:

  you need both -mbig and -mabi=elfv1 to make a powerpc64le gcc
  generate powerpc64 code

The opposite is true for powerpc64 when generating -mlittle it
requires -mabi=elfv2 to generate v2 ABI, which we were already doing.

This change adds ABI annotations together with endianness for all cases,
LE and BE. This fixes the case of building a BE kernel with a toolchain
that is LE by default.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Tested-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/Makefile | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 31286fa7873c1d..19b0d1a8195930 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -72,8 +72,15 @@ GNUTARGET	:= powerpc
 MULTIPLEWORD	:= -mmultiple
 endif
 
-cflags-$(CONFIG_CPU_BIG_ENDIAN)		+= $(call cc-option,-mbig-endian)
+ifdef CONFIG_PPC64
+cflags-$(CONFIG_CPU_BIG_ENDIAN)		+= $(call cc-option,-mabi=elfv1)
+cflags-$(CONFIG_CPU_BIG_ENDIAN)		+= $(call cc-option,-mcall-aixdesc)
+aflags-$(CONFIG_CPU_BIG_ENDIAN)		+= $(call cc-option,-mabi=elfv1)
+aflags-$(CONFIG_CPU_LITTLE_ENDIAN)	+= -mabi=elfv2
+endif
+
 cflags-$(CONFIG_CPU_LITTLE_ENDIAN)	+= -mlittle-endian
+cflags-$(CONFIG_CPU_BIG_ENDIAN)		+= $(call cc-option,-mbig-endian)
 ifneq ($(cc-name),clang)
   cflags-$(CONFIG_CPU_LITTLE_ENDIAN)	+= -mno-strict-align
 endif
@@ -113,7 +120,9 @@ ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y)
 CFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mabi=elfv2,$(call cc-option,-mcall-aixdesc))
 AFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mabi=elfv2)
 else
+CFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mabi=elfv1)
 CFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mcall-aixdesc)
+AFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mabi=elfv1)
 endif
 CFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mcmodel=medium,$(call cc-option,-mminimal-toc))
 CFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mno-pointers-to-nested-functions)

From 3fb66a70a4ae886445743354e4b60e54058bb3ff Mon Sep 17 00:00:00 2001
From: Laurentiu Tudor <laurentiu.tudor@nxp.com>
Date: Thu, 16 Feb 2017 09:11:29 -0600
Subject: [PATCH 0616/1911] powerpc/booke: Fix boot crash due to null hugepd

On 32-bit book-e machines, hugepd_ok() no longer takes into account null
hugepd values, causing this crash at boot:

  Unable to handle kernel paging request for data at address 0x80000000
  ...
  NIP [c0018378] follow_huge_addr+0x38/0xf0
  LR [c001836c] follow_huge_addr+0x2c/0xf0
  Call Trace:
   follow_huge_addr+0x2c/0xf0 (unreliable)
   follow_page_mask+0x40/0x3e0
   __get_user_pages+0xc8/0x450
   get_user_pages_remote+0x8c/0x250
   copy_strings+0x110/0x390
   copy_strings_kernel+0x2c/0x50
   do_execveat_common+0x478/0x630
   do_execve+0x2c/0x40
   try_to_run_init_process+0x18/0x60
   kernel_init+0xbc/0x110
   ret_from_kernel_thread+0x5c/0x64

This impacts all nxp (ex-freescale) 32-bit booke platforms.

This was caused by the change of hugepd_t.pd from signed to unsigned,
and the update to the nohash version of hugepd_ok(). Previously
hugepd_ok() could exclude all non-huge and NULL pgds using > 0, whereas
now we need to explicitly check that the value is not zero and also that
PD_HUGE is *clear*.

This isn't protected by the pgd_none() check in __find_linux_pte_or_hugepte()
because on 32-bit we use pgtable-nopud.h, which causes the pgd_none()
check to be always false.

Fixes: 20717e1ff526 ("powerpc/mm: Fix little-endian 4K hugetlb")
Cc: stable@vger.kernel.org # v4.7+
Reported-by: Madalin-Cristian Bucur <madalin.bucur@nxp.com>
Signed-off-by: Laurentiu Tudor <laurentiu.tudor@nxp.com>
[mpe: Flesh out change log details.]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/nohash/pgtable.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h
index 0cd8a385276329..e5805ad78e127b 100644
--- a/arch/powerpc/include/asm/nohash/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/pgtable.h
@@ -230,7 +230,7 @@ static inline int hugepd_ok(hugepd_t hpd)
 	return ((hpd_val(hpd) & 0x4) != 0);
 #else
 	/* We clear the top bit to indicate hugepd */
-	return ((hpd_val(hpd) & PD_HUGE) ==  0);
+	return (hpd_val(hpd) && (hpd_val(hpd) & PD_HUGE) == 0);
 #endif
 }
 

From 68e21be2916b359fd8afb536c1911dc014cfd03e Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 1 Feb 2017 19:08:20 +0100
Subject: [PATCH 0617/1911] sched/headers: Move task->mm handling methods to
 <linux/sched/mm.h>

Move the following task->mm helper APIs into a new header file,
<linux/sched/mm.h>, to further reduce the size and complexity
of <linux/sched.h>.

Here are how the APIs are used in various kernel files:

  # mm_alloc():
  arch/arm/mach-rpc/ecard.c
  fs/exec.c
  include/linux/sched/mm.h
  kernel/fork.c

  # __mmdrop():
  arch/arc/include/asm/mmu_context.h
  include/linux/sched/mm.h
  kernel/fork.c

  # mmdrop():
  arch/arm/mach-rpc/ecard.c
  arch/m68k/sun3/mmu_emu.c
  arch/x86/mm/tlb.c
  drivers/gpu/drm/amd/amdkfd/kfd_process.c
  drivers/gpu/drm/i915/i915_gem_userptr.c
  drivers/infiniband/hw/hfi1/file_ops.c
  drivers/vfio/vfio_iommu_spapr_tce.c
  fs/exec.c
  fs/proc/base.c
  fs/proc/task_mmu.c
  fs/proc/task_nommu.c
  fs/userfaultfd.c
  include/linux/mmu_notifier.h
  include/linux/sched/mm.h
  kernel/fork.c
  kernel/futex.c
  kernel/sched/core.c
  mm/khugepaged.c
  mm/ksm.c
  mm/mmu_context.c
  mm/mmu_notifier.c
  mm/oom_kill.c
  virt/kvm/kvm_main.c

  # mmdrop_async_fn():
  include/linux/sched/mm.h

  # mmdrop_async():
  include/linux/sched/mm.h
  kernel/fork.c

  # mmget_not_zero():
  fs/userfaultfd.c
  include/linux/sched/mm.h
  mm/oom_kill.c

  # mmput():
  arch/arc/include/asm/mmu_context.h
  arch/arc/kernel/troubleshoot.c
  arch/frv/mm/mmu-context.c
  arch/powerpc/platforms/cell/spufs/context.c
  arch/sparc/include/asm/mmu_context_32.h
  drivers/android/binder.c
  drivers/gpu/drm/etnaviv/etnaviv_gem.c
  drivers/gpu/drm/i915/i915_gem_userptr.c
  drivers/infiniband/core/umem.c
  drivers/infiniband/core/umem_odp.c
  drivers/infiniband/core/uverbs_main.c
  drivers/infiniband/hw/mlx4/main.c
  drivers/infiniband/hw/mlx5/main.c
  drivers/infiniband/hw/usnic/usnic_uiom.c
  drivers/iommu/amd_iommu_v2.c
  drivers/iommu/intel-svm.c
  drivers/lguest/lguest_user.c
  drivers/misc/cxl/fault.c
  drivers/misc/mic/scif/scif_rma.c
  drivers/oprofile/buffer_sync.c
  drivers/vfio/vfio_iommu_type1.c
  drivers/vhost/vhost.c
  drivers/xen/gntdev.c
  fs/exec.c
  fs/proc/array.c
  fs/proc/base.c
  fs/proc/task_mmu.c
  fs/proc/task_nommu.c
  fs/userfaultfd.c
  include/linux/sched/mm.h
  kernel/cpuset.c
  kernel/events/core.c
  kernel/events/uprobes.c
  kernel/exit.c
  kernel/fork.c
  kernel/ptrace.c
  kernel/sys.c
  kernel/trace/trace_output.c
  kernel/tsacct.c
  mm/memcontrol.c
  mm/memory.c
  mm/mempolicy.c
  mm/migrate.c
  mm/mmu_notifier.c
  mm/nommu.c
  mm/oom_kill.c
  mm/process_vm_access.c
  mm/rmap.c
  mm/swapfile.c
  mm/util.c
  virt/kvm/async_pf.c

  # mmput_async():
  include/linux/sched/mm.h
  kernel/fork.c
  mm/oom_kill.c

  # get_task_mm():
  arch/arc/kernel/troubleshoot.c
  arch/powerpc/platforms/cell/spufs/context.c
  drivers/android/binder.c
  drivers/gpu/drm/etnaviv/etnaviv_gem.c
  drivers/infiniband/core/umem.c
  drivers/infiniband/core/umem_odp.c
  drivers/infiniband/hw/mlx4/main.c
  drivers/infiniband/hw/mlx5/main.c
  drivers/infiniband/hw/usnic/usnic_uiom.c
  drivers/iommu/amd_iommu_v2.c
  drivers/iommu/intel-svm.c
  drivers/lguest/lguest_user.c
  drivers/misc/cxl/fault.c
  drivers/misc/mic/scif/scif_rma.c
  drivers/oprofile/buffer_sync.c
  drivers/vfio/vfio_iommu_type1.c
  drivers/vhost/vhost.c
  drivers/xen/gntdev.c
  fs/proc/array.c
  fs/proc/base.c
  fs/proc/task_mmu.c
  include/linux/sched/mm.h
  kernel/cpuset.c
  kernel/events/core.c
  kernel/exit.c
  kernel/fork.c
  kernel/ptrace.c
  kernel/sys.c
  kernel/trace/trace_output.c
  kernel/tsacct.c
  mm/memcontrol.c
  mm/memory.c
  mm/mempolicy.c
  mm/migrate.c
  mm/mmu_notifier.c
  mm/nommu.c
  mm/util.c

  # mm_access():
  fs/proc/base.c
  include/linux/sched/mm.h
  kernel/fork.c
  mm/process_vm_access.c

  # mm_release():
  arch/arc/include/asm/mmu_context.h
  fs/exec.c
  include/linux/sched/mm.h
  include/uapi/linux/sched.h
  kernel/exit.c
  kernel/fork.c

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/alpha/kernel/smp.c         |  2 +-
 arch/arc/kernel/smp.c           |  2 +-
 arch/arm/kernel/smp.c           |  2 +-
 arch/arm64/kernel/smp.c         |  2 +-
 arch/blackfin/mach-common/smp.c |  2 +-
 arch/hexagon/kernel/smp.c       |  2 +-
 arch/ia64/kernel/setup.c        |  2 +-
 arch/m32r/kernel/setup.c        |  2 +-
 arch/metag/kernel/smp.c         |  2 +-
 arch/mips/kernel/traps.c        |  2 +-
 arch/parisc/kernel/smp.c        |  2 +-
 arch/powerpc/kernel/smp.c       |  2 +-
 arch/s390/kernel/processor.c    |  2 +
 arch/score/kernel/traps.c       |  1 +
 arch/sh/kernel/smp.c            |  2 +-
 arch/sparc/kernel/leon_smp.c    |  2 +-
 arch/sparc/kernel/smp_64.c      |  2 +-
 arch/sparc/kernel/sun4d_smp.c   |  2 +-
 arch/sparc/kernel/sun4m_smp.c   |  2 +-
 arch/sparc/kernel/traps_32.c    |  2 +-
 arch/sparc/kernel/traps_64.c    |  2 +-
 arch/tile/kernel/smpboot.c      |  2 +-
 arch/x86/kernel/cpu/common.c    |  2 +-
 arch/xtensa/kernel/smp.c        |  1 +
 include/linux/sched.h           | 95 ---------------------------------
 include/linux/sched/mm.h        | 95 +++++++++++++++++++++++++++++++++
 26 files changed, 120 insertions(+), 116 deletions(-)

diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c
index acb4b146a60795..9fc560459ebd64 100644
--- a/arch/alpha/kernel/smp.c
+++ b/arch/alpha/kernel/smp.c
@@ -14,7 +14,7 @@
 #include <linux/kernel.h>
 #include <linux/kernel_stat.h>
 #include <linux/module.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/mm.h>
 #include <linux/err.h>
 #include <linux/threads.h>
diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c
index b8e8d394448137..f46267153ec2e9 100644
--- a/arch/arc/kernel/smp.c
+++ b/arch/arc/kernel/smp.c
@@ -13,7 +13,7 @@
  */
 
 #include <linux/spinlock.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/interrupt.h>
 #include <linux/profile.h>
 #include <linux/mm.h>
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index b724cff7ad60d0..572a8df1b7662d 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -11,7 +11,7 @@
 #include <linux/delay.h>
 #include <linux/init.h>
 #include <linux/spinlock.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/sched/hotplug.h>
 #include <linux/sched/task_stack.h>
 #include <linux/interrupt.h>
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 83c0a839a6adaa..ef1caae02110ee 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -21,7 +21,7 @@
 #include <linux/delay.h>
 #include <linux/init.h>
 #include <linux/spinlock.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/sched/hotplug.h>
 #include <linux/sched/task_stack.h>
 #include <linux/interrupt.h>
diff --git a/arch/blackfin/mach-common/smp.c b/arch/blackfin/mach-common/smp.c
index 3ac98252d2996e..b32ddab7966c95 100644
--- a/arch/blackfin/mach-common/smp.c
+++ b/arch/blackfin/mach-common/smp.c
@@ -11,7 +11,7 @@
 #include <linux/delay.h>
 #include <linux/init.h>
 #include <linux/spinlock.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/sched/task_stack.h>
 #include <linux/interrupt.h>
 #include <linux/cache.h>
diff --git a/arch/hexagon/kernel/smp.c b/arch/hexagon/kernel/smp.c
index 1f63e91a353bba..5dbc15549e011d 100644
--- a/arch/hexagon/kernel/smp.c
+++ b/arch/hexagon/kernel/smp.c
@@ -25,7 +25,7 @@
 #include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/percpu.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/smp.h>
 #include <linux/spinlock.h>
 #include <linux/cpu.h>
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index 63bef6fc0f3ee4..23e3fd61e335e4 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -32,7 +32,7 @@
 #include <linux/cpu.h>
 #include <linux/kernel.h>
 #include <linux/reboot.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/sched/clock.h>
 #include <linux/sched/task_stack.h>
 #include <linux/seq_file.h>
diff --git a/arch/m32r/kernel/setup.c b/arch/m32r/kernel/setup.c
index b18bc0bd654470..1a9e977287e615 100644
--- a/arch/m32r/kernel/setup.c
+++ b/arch/m32r/kernel/setup.c
@@ -11,7 +11,7 @@
 #include <linux/kernel.h>
 #include <linux/stddef.h>
 #include <linux/fs.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/ioport.h>
 #include <linux/mm.h>
 #include <linux/bootmem.h>
diff --git a/arch/metag/kernel/smp.c b/arch/metag/kernel/smp.c
index cab2aa64ca826a..232a12bf3f999e 100644
--- a/arch/metag/kernel/smp.c
+++ b/arch/metag/kernel/smp.c
@@ -12,7 +12,7 @@
 #include <linux/delay.h>
 #include <linux/init.h>
 #include <linux/spinlock.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/sched/hotplug.h>
 #include <linux/sched/task_stack.h>
 #include <linux/interrupt.h>
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 1a9366a157cb22..c7d17cfb32f678 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -23,7 +23,7 @@
 #include <linux/module.h>
 #include <linux/extable.h>
 #include <linux/mm.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/sched/debug.h>
 #include <linux/smp.h>
 #include <linux/spinlock.h>
diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c
index 67b452b41ff6a6..63365106ea1907 100644
--- a/arch/parisc/kernel/smp.c
+++ b/arch/parisc/kernel/smp.c
@@ -21,7 +21,7 @@
 
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/smp.h>
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index fce17789c6751a..46f89e66a273bc 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -19,7 +19,7 @@
 
 #include <linux/kernel.h>
 #include <linux/export.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/sched/topology.h>
 #include <linux/smp.h>
 #include <linux/interrupt.h>
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
index bf78545238315a..928b929a62614a 100644
--- a/arch/s390/kernel/processor.c
+++ b/arch/s390/kernel/processor.c
@@ -8,11 +8,13 @@
 
 #include <linux/cpufeature.h>
 #include <linux/kernel.h>
+#include <linux/sched/mm.h>
 #include <linux/init.h>
 #include <linux/seq_file.h>
 #include <linux/mm_types.h>
 #include <linux/delay.h>
 #include <linux/cpu.h>
+
 #include <asm/diag.h>
 #include <asm/facility.h>
 #include <asm/elf.h>
diff --git a/arch/score/kernel/traps.c b/arch/score/kernel/traps.c
index fe68de6c746c44..e359ec67586982 100644
--- a/arch/score/kernel/traps.c
+++ b/arch/score/kernel/traps.c
@@ -24,6 +24,7 @@
  */
 
 #include <linux/extable.h>
+#include <linux/sched/mm.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/debug.h>
 #include <linux/mm_types.h>
diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c
index 4abf119c129ccc..c483422ea4d075 100644
--- a/arch/sh/kernel/smp.c
+++ b/arch/sh/kernel/smp.c
@@ -20,7 +20,7 @@
 #include <linux/module.h>
 #include <linux/cpu.h>
 #include <linux/interrupt.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/sched/hotplug.h>
 #include <linux/atomic.h>
 #include <linux/clockchips.h>
diff --git a/arch/sparc/kernel/leon_smp.c b/arch/sparc/kernel/leon_smp.c
index b99d33797e1df0..db7acf27bea2e0 100644
--- a/arch/sparc/kernel/leon_smp.c
+++ b/arch/sparc/kernel/leon_smp.c
@@ -9,7 +9,7 @@
 #include <asm/head.h>
 
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/threads.h>
 #include <linux/smp.h>
 #include <linux/interrupt.h>
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index 15052d364e046a..b3bc0ac757cc11 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -5,7 +5,7 @@
 
 #include <linux/export.h>
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/sched/hotplug.h>
 #include <linux/mm.h>
 #include <linux/pagemap.h>
diff --git a/arch/sparc/kernel/sun4d_smp.c b/arch/sparc/kernel/sun4d_smp.c
index 7b55c50eabe55a..af93b50e3ce430 100644
--- a/arch/sparc/kernel/sun4d_smp.c
+++ b/arch/sparc/kernel/sun4d_smp.c
@@ -10,7 +10,7 @@
 #include <linux/interrupt.h>
 #include <linux/profile.h>
 #include <linux/delay.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/cpu.h>
 
 #include <asm/cacheflush.h>
diff --git a/arch/sparc/kernel/sun4m_smp.c b/arch/sparc/kernel/sun4m_smp.c
index 633c4cf6fdb0bf..5547fcb1d72df5 100644
--- a/arch/sparc/kernel/sun4m_smp.c
+++ b/arch/sparc/kernel/sun4m_smp.c
@@ -8,7 +8,7 @@
 #include <linux/interrupt.h>
 #include <linux/profile.h>
 #include <linux/delay.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/cpu.h>
 
 #include <asm/cacheflush.h>
diff --git a/arch/sparc/kernel/traps_32.c b/arch/sparc/kernel/traps_32.c
index 2de72a49308ad2..466d4aed06c771 100644
--- a/arch/sparc/kernel/traps_32.c
+++ b/arch/sparc/kernel/traps_32.c
@@ -9,7 +9,7 @@
  * I hate traps on the sparc, grrr...
  */
 
-#include <linux/sched.h>  /* for jiffies */
+#include <linux/sched/mm.h>
 #include <linux/sched/debug.h>
 #include <linux/mm_types.h>
 #include <linux/kernel.h>
diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c
index 4ff4c35f76b2ac..196ee5eb4d489b 100644
--- a/arch/sparc/kernel/traps_64.c
+++ b/arch/sparc/kernel/traps_64.c
@@ -9,7 +9,7 @@
  */
 
 #include <linux/extable.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/sched/debug.h>
 #include <linux/linkage.h>
 #include <linux/kernel.h>
diff --git a/arch/tile/kernel/smpboot.c b/arch/tile/kernel/smpboot.c
index f3fdd0c39b1278..869c22e5756145 100644
--- a/arch/tile/kernel/smpboot.c
+++ b/arch/tile/kernel/smpboot.c
@@ -16,7 +16,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/sched/task.h>
 #include <linux/kernel_stat.h>
 #include <linux/bootmem.h>
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index f2fd8fefc5899d..b11b38c3b0bde1 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -7,7 +7,7 @@
 #include <linux/string.h>
 #include <linux/ctype.h>
 #include <linux/delay.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/sched/clock.h>
 #include <linux/sched/task.h>
 #include <linux/init.h>
diff --git a/arch/xtensa/kernel/smp.c b/arch/xtensa/kernel/smp.c
index fd894eaa63f3ad..932d64689bacbb 100644
--- a/arch/xtensa/kernel/smp.c
+++ b/arch/xtensa/kernel/smp.c
@@ -21,6 +21,7 @@
 #include <linux/irq.h>
 #include <linux/kdebug.h>
 #include <linux/module.h>
+#include <linux/sched/mm.h>
 #include <linux/sched/hotplug.h>
 #include <linux/sched/task_stack.h>
 #include <linux/reboot.h>
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 04262072923008..d29bbe0ee41fab 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2379,101 +2379,6 @@ static inline unsigned long sigsp(unsigned long sp, struct ksignal *ksig)
 	return sp;
 }
 
-/*
- * Routines for handling mm_structs
- */
-extern struct mm_struct * mm_alloc(void);
-
-/**
- * mmgrab() - Pin a &struct mm_struct.
- * @mm: The &struct mm_struct to pin.
- *
- * Make sure that @mm will not get freed even after the owning task
- * exits. This doesn't guarantee that the associated address space
- * will still exist later on and mmget_not_zero() has to be used before
- * accessing it.
- *
- * This is a preferred way to to pin @mm for a longer/unbounded amount
- * of time.
- *
- * Use mmdrop() to release the reference acquired by mmgrab().
- *
- * See also <Documentation/vm/active_mm.txt> for an in-depth explanation
- * of &mm_struct.mm_count vs &mm_struct.mm_users.
- */
-static inline void mmgrab(struct mm_struct *mm)
-{
-	atomic_inc(&mm->mm_count);
-}
-
-/* mmdrop drops the mm and the page tables */
-extern void __mmdrop(struct mm_struct *);
-static inline void mmdrop(struct mm_struct *mm)
-{
-	if (unlikely(atomic_dec_and_test(&mm->mm_count)))
-		__mmdrop(mm);
-}
-
-static inline void mmdrop_async_fn(struct work_struct *work)
-{
-	struct mm_struct *mm = container_of(work, struct mm_struct, async_put_work);
-	__mmdrop(mm);
-}
-
-static inline void mmdrop_async(struct mm_struct *mm)
-{
-	if (unlikely(atomic_dec_and_test(&mm->mm_count))) {
-		INIT_WORK(&mm->async_put_work, mmdrop_async_fn);
-		schedule_work(&mm->async_put_work);
-	}
-}
-
-/**
- * mmget() - Pin the address space associated with a &struct mm_struct.
- * @mm: The address space to pin.
- *
- * Make sure that the address space of the given &struct mm_struct doesn't
- * go away. This does not protect against parts of the address space being
- * modified or freed, however.
- *
- * Never use this function to pin this address space for an
- * unbounded/indefinite amount of time.
- *
- * Use mmput() to release the reference acquired by mmget().
- *
- * See also <Documentation/vm/active_mm.txt> for an in-depth explanation
- * of &mm_struct.mm_count vs &mm_struct.mm_users.
- */
-static inline void mmget(struct mm_struct *mm)
-{
-	atomic_inc(&mm->mm_users);
-}
-
-static inline bool mmget_not_zero(struct mm_struct *mm)
-{
-	return atomic_inc_not_zero(&mm->mm_users);
-}
-
-/* mmput gets rid of the mappings and all user-space */
-extern void mmput(struct mm_struct *);
-#ifdef CONFIG_MMU
-/* same as above but performs the slow path from the async context. Can
- * be called from the atomic context as well
- */
-extern void mmput_async(struct mm_struct *);
-#endif
-
-/* Grab a reference to a task's mm, if it is not already going away */
-extern struct mm_struct *get_task_mm(struct task_struct *task);
-/*
- * Grab a reference to a task's mm, if it is not already going away
- * and ptrace_may_access with the mode parameter passed to it
- * succeeds.
- */
-extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode);
-/* Remove the current tasks stale references to the old mm_struct */
-extern void mm_release(struct task_struct *, struct mm_struct *);
-
 #ifdef CONFIG_HAVE_COPY_THREAD_TLS
 extern int copy_thread_tls(unsigned long, unsigned long, unsigned long,
 			struct task_struct *, unsigned long);
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index d32e3932b2e337..be1ae55f5ab979 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -5,4 +5,99 @@
 #include <linux/mm_types.h>
 #include <linux/gfp.h>
 
+/*
+ * Routines for handling mm_structs
+ */
+extern struct mm_struct * mm_alloc(void);
+
+/**
+ * mmgrab() - Pin a &struct mm_struct.
+ * @mm: The &struct mm_struct to pin.
+ *
+ * Make sure that @mm will not get freed even after the owning task
+ * exits. This doesn't guarantee that the associated address space
+ * will still exist later on and mmget_not_zero() has to be used before
+ * accessing it.
+ *
+ * This is a preferred way to to pin @mm for a longer/unbounded amount
+ * of time.
+ *
+ * Use mmdrop() to release the reference acquired by mmgrab().
+ *
+ * See also <Documentation/vm/active_mm.txt> for an in-depth explanation
+ * of &mm_struct.mm_count vs &mm_struct.mm_users.
+ */
+static inline void mmgrab(struct mm_struct *mm)
+{
+	atomic_inc(&mm->mm_count);
+}
+
+/* mmdrop drops the mm and the page tables */
+extern void __mmdrop(struct mm_struct *);
+static inline void mmdrop(struct mm_struct *mm)
+{
+	if (unlikely(atomic_dec_and_test(&mm->mm_count)))
+		__mmdrop(mm);
+}
+
+static inline void mmdrop_async_fn(struct work_struct *work)
+{
+	struct mm_struct *mm = container_of(work, struct mm_struct, async_put_work);
+	__mmdrop(mm);
+}
+
+static inline void mmdrop_async(struct mm_struct *mm)
+{
+	if (unlikely(atomic_dec_and_test(&mm->mm_count))) {
+		INIT_WORK(&mm->async_put_work, mmdrop_async_fn);
+		schedule_work(&mm->async_put_work);
+	}
+}
+
+/**
+ * mmget() - Pin the address space associated with a &struct mm_struct.
+ * @mm: The address space to pin.
+ *
+ * Make sure that the address space of the given &struct mm_struct doesn't
+ * go away. This does not protect against parts of the address space being
+ * modified or freed, however.
+ *
+ * Never use this function to pin this address space for an
+ * unbounded/indefinite amount of time.
+ *
+ * Use mmput() to release the reference acquired by mmget().
+ *
+ * See also <Documentation/vm/active_mm.txt> for an in-depth explanation
+ * of &mm_struct.mm_count vs &mm_struct.mm_users.
+ */
+static inline void mmget(struct mm_struct *mm)
+{
+	atomic_inc(&mm->mm_users);
+}
+
+static inline bool mmget_not_zero(struct mm_struct *mm)
+{
+	return atomic_inc_not_zero(&mm->mm_users);
+}
+
+/* mmput gets rid of the mappings and all user-space */
+extern void mmput(struct mm_struct *);
+#ifdef CONFIG_MMU
+/* same as above but performs the slow path from the async context. Can
+ * be called from the atomic context as well
+ */
+extern void mmput_async(struct mm_struct *);
+#endif
+
+/* Grab a reference to a task's mm, if it is not already going away */
+extern struct mm_struct *get_task_mm(struct task_struct *task);
+/*
+ * Grab a reference to a task's mm, if it is not already going away
+ * and ptrace_may_access with the mode parameter passed to it
+ * succeeds.
+ */
+extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode);
+/* Remove the current tasks stale references to the old mm_struct */
+extern void mm_release(struct task_struct *, struct mm_struct *);
+
 #endif /* _LINUX_SCHED_MM_H */

From 11701c6768367294c5086738d49196192aaf3d60 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 1 Feb 2017 19:21:47 +0100
Subject: [PATCH 0618/1911] sched/headers: Move task->mm coredumping related
 defines and methods from <linux/sched.h> to <linux/sched/coredump.h>

This further reduces the size and complexity of <linux/sched.h>.

These are the definitions and APIs that are moved:

  # MMF_*:
  fs/binfmt_elf.c
  fs/binfmt_elf_fdpic.c
  fs/exec.c
  fs/proc/base.c
  include/linux/khugepaged.h
  include/linux/ksm.h
  include/linux/sched/coredump.h
  kernel/events/uprobes.c
  kernel/fork.c
  mm/huge_memory.c
  mm/khugepaged.c
  mm/ksm.c
  mm/memory.c
  mm/oom_kill.c

  # SUID_DUMP_*:
  arch/ia64/include/asm/processor.h
  fs/coredump.c
  fs/exec.c
  fs/proc/internal.h
  include/linux/sched/coredump.h
  kernel/ptrace.c
  kernel/sys.c
  kernel/sysctl.c

  # get_dumpable():
  arch/ia64/include/asm/processor.h
  fs/coredump.c
  fs/exec.c
  fs/proc/internal.h
  include/linux/sched/coredump.h
  kernel/ptrace.c
  kernel/sys.c

  # set_dumpable():
  fs/exec.c
  include/linux/sched/coredump.h
  kernel/cred.c
  kernel/sys.c

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h          | 68 ---------------------------------
 include/linux/sched/coredump.h | 69 ++++++++++++++++++++++++++++++++++
 2 files changed, 69 insertions(+), 68 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index d29bbe0ee41fab..7934cd0acbc78d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -361,74 +361,6 @@ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
 static inline void arch_pick_mmap_layout(struct mm_struct *mm) {}
 #endif
 
-#define SUID_DUMP_DISABLE	0	/* No setuid dumping */
-#define SUID_DUMP_USER		1	/* Dump as user of process */
-#define SUID_DUMP_ROOT		2	/* Dump as root */
-
-/* mm flags */
-
-/* for SUID_DUMP_* above */
-#define MMF_DUMPABLE_BITS 2
-#define MMF_DUMPABLE_MASK ((1 << MMF_DUMPABLE_BITS) - 1)
-
-extern void set_dumpable(struct mm_struct *mm, int value);
-/*
- * This returns the actual value of the suid_dumpable flag. For things
- * that are using this for checking for privilege transitions, it must
- * test against SUID_DUMP_USER rather than treating it as a boolean
- * value.
- */
-static inline int __get_dumpable(unsigned long mm_flags)
-{
-	return mm_flags & MMF_DUMPABLE_MASK;
-}
-
-static inline int get_dumpable(struct mm_struct *mm)
-{
-	return __get_dumpable(mm->flags);
-}
-
-/* coredump filter bits */
-#define MMF_DUMP_ANON_PRIVATE	2
-#define MMF_DUMP_ANON_SHARED	3
-#define MMF_DUMP_MAPPED_PRIVATE	4
-#define MMF_DUMP_MAPPED_SHARED	5
-#define MMF_DUMP_ELF_HEADERS	6
-#define MMF_DUMP_HUGETLB_PRIVATE 7
-#define MMF_DUMP_HUGETLB_SHARED  8
-#define MMF_DUMP_DAX_PRIVATE	9
-#define MMF_DUMP_DAX_SHARED	10
-
-#define MMF_DUMP_FILTER_SHIFT	MMF_DUMPABLE_BITS
-#define MMF_DUMP_FILTER_BITS	9
-#define MMF_DUMP_FILTER_MASK \
-	(((1 << MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT)
-#define MMF_DUMP_FILTER_DEFAULT \
-	((1 << MMF_DUMP_ANON_PRIVATE) |	(1 << MMF_DUMP_ANON_SHARED) |\
-	 (1 << MMF_DUMP_HUGETLB_PRIVATE) | MMF_DUMP_MASK_DEFAULT_ELF)
-
-#ifdef CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS
-# define MMF_DUMP_MASK_DEFAULT_ELF	(1 << MMF_DUMP_ELF_HEADERS)
-#else
-# define MMF_DUMP_MASK_DEFAULT_ELF	0
-#endif
-					/* leave room for more dump flags */
-#define MMF_VM_MERGEABLE	16	/* KSM may merge identical pages */
-#define MMF_VM_HUGEPAGE		17	/* set when VM_HUGEPAGE is set on vma */
-/*
- * This one-shot flag is dropped due to necessity of changing exe once again
- * on NFS restore
- */
-//#define MMF_EXE_FILE_CHANGED	18	/* see prctl_set_mm_exe_file() */
-
-#define MMF_HAS_UPROBES		19	/* has uprobes */
-#define MMF_RECALC_UPROBES	20	/* MMF_HAS_UPROBES can be wrong */
-#define MMF_OOM_SKIP		21	/* mm is of no interest for the OOM killer */
-#define MMF_UNSTABLE		22	/* mm is unstable for copy_from_user */
-#define MMF_HUGE_ZERO_PAGE	23      /* mm has ever used the global huge zero page */
-
-#define MMF_INIT_MASK		(MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK)
-
 struct sighand_struct {
 	atomic_t		count;
 	struct k_sigaction	action[_NSIG];
diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h
index ab81e564e07697..f46912aa4f4c03 100644
--- a/include/linux/sched/coredump.h
+++ b/include/linux/sched/coredump.h
@@ -2,5 +2,74 @@
 #define _LINUX_SCHED_COREDUMP_H
 
 #include <linux/sched.h>
+#include <linux/mm_types.h>
+
+#define SUID_DUMP_DISABLE	0	/* No setuid dumping */
+#define SUID_DUMP_USER		1	/* Dump as user of process */
+#define SUID_DUMP_ROOT		2	/* Dump as root */
+
+/* mm flags */
+
+/* for SUID_DUMP_* above */
+#define MMF_DUMPABLE_BITS 2
+#define MMF_DUMPABLE_MASK ((1 << MMF_DUMPABLE_BITS) - 1)
+
+extern void set_dumpable(struct mm_struct *mm, int value);
+/*
+ * This returns the actual value of the suid_dumpable flag. For things
+ * that are using this for checking for privilege transitions, it must
+ * test against SUID_DUMP_USER rather than treating it as a boolean
+ * value.
+ */
+static inline int __get_dumpable(unsigned long mm_flags)
+{
+	return mm_flags & MMF_DUMPABLE_MASK;
+}
+
+static inline int get_dumpable(struct mm_struct *mm)
+{
+	return __get_dumpable(mm->flags);
+}
+
+/* coredump filter bits */
+#define MMF_DUMP_ANON_PRIVATE	2
+#define MMF_DUMP_ANON_SHARED	3
+#define MMF_DUMP_MAPPED_PRIVATE	4
+#define MMF_DUMP_MAPPED_SHARED	5
+#define MMF_DUMP_ELF_HEADERS	6
+#define MMF_DUMP_HUGETLB_PRIVATE 7
+#define MMF_DUMP_HUGETLB_SHARED  8
+#define MMF_DUMP_DAX_PRIVATE	9
+#define MMF_DUMP_DAX_SHARED	10
+
+#define MMF_DUMP_FILTER_SHIFT	MMF_DUMPABLE_BITS
+#define MMF_DUMP_FILTER_BITS	9
+#define MMF_DUMP_FILTER_MASK \
+	(((1 << MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT)
+#define MMF_DUMP_FILTER_DEFAULT \
+	((1 << MMF_DUMP_ANON_PRIVATE) |	(1 << MMF_DUMP_ANON_SHARED) |\
+	 (1 << MMF_DUMP_HUGETLB_PRIVATE) | MMF_DUMP_MASK_DEFAULT_ELF)
+
+#ifdef CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS
+# define MMF_DUMP_MASK_DEFAULT_ELF	(1 << MMF_DUMP_ELF_HEADERS)
+#else
+# define MMF_DUMP_MASK_DEFAULT_ELF	0
+#endif
+					/* leave room for more dump flags */
+#define MMF_VM_MERGEABLE	16	/* KSM may merge identical pages */
+#define MMF_VM_HUGEPAGE		17	/* set when VM_HUGEPAGE is set on vma */
+/*
+ * This one-shot flag is dropped due to necessity of changing exe once again
+ * on NFS restore
+ */
+//#define MMF_EXE_FILE_CHANGED	18	/* see prctl_set_mm_exe_file() */
+
+#define MMF_HAS_UPROBES		19	/* has uprobes */
+#define MMF_RECALC_UPROBES	20	/* MMF_HAS_UPROBES can be wrong */
+#define MMF_OOM_SKIP		21	/* mm is of no interest for the OOM killer */
+#define MMF_UNSTABLE		22	/* mm is unstable for copy_from_user */
+#define MMF_HUGE_ZERO_PAGE	23      /* mm has ever used the global huge zero page */
+
+#define MMF_INIT_MASK		(MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK)
 
 #endif /* _LINUX_SCHED_COREDUMP_H */

From c3edc4010e9d102eb7b8f17d15c2ebc425fed63c Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 08:35:14 +0100
Subject: [PATCH 0619/1911] sched/headers: Move task_struct::signal and
 task_struct::sighand types and accessors into <linux/sched/signal.h>
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

task_struct::signal and task_struct::sighand are pointers, which would normally make it
straightforward to not define those types in sched.h.

That is not so, because the types are accompanied by a myriad of APIs (macros and inline
functions) that dereference them.

Split the types and the APIs out of sched.h and move them into a new header, <linux/sched/signal.h>.

With this change sched.h does not know about 'struct signal' and 'struct sighand' anymore,
trying to put accessors into sched.h as a test fails the following way:

  ./include/linux/sched.h: In function ‘test_signal_types’:
  ./include/linux/sched.h:2461:18: error: dereferencing pointer to incomplete type ‘struct signal_struct’
                    ^

This reduces the size and complexity of sched.h significantly.

Update all headers and .c code that relied on getting the signal handling
functionality from <linux/sched.h> to include <linux/sched/signal.h>.

The list of affected files in the preparatory patch was partly generated by
grepping for the APIs, and partly by doing coverage build testing, both
all[yes|mod|def|no]config builds on 64-bit and 32-bit x86, and an array of
cross-architecture builds.

Nevertheless some (trivial) build breakage is still expected related to rare
Kconfig combinations and in-flight patches to various kernel code, but most
of it should be handled by this patch.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/arm/nwfpe/fpmodule.c    |   2 +-
 arch/sh/kernel/cpu/sh4/fpu.c |   3 +-
 drivers/net/tap.c            |   2 +-
 include/linux/sched.h        | 499 +---------------------------------
 include/linux/sched/signal.h | 502 +++++++++++++++++++++++++++++++++++
 kernel/cgroup/cgroup-v1.c    |   1 +
 mm/vmalloc.c                 |   2 +-
 net/smc/af_smc.c             |   2 +
 net/smc/smc_clc.c            |   2 +
 net/smc/smc_close.c          |   2 +
 net/smc/smc_rx.c             |   2 +
 net/smc/smc_tx.c             |   2 +
 12 files changed, 520 insertions(+), 501 deletions(-)

diff --git a/arch/arm/nwfpe/fpmodule.c b/arch/arm/nwfpe/fpmodule.c
index ec717c190e2c79..1365e865084370 100644
--- a/arch/arm/nwfpe/fpmodule.c
+++ b/arch/arm/nwfpe/fpmodule.c
@@ -31,7 +31,7 @@
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/signal.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/init.h>
 
 #include <asm/thread_notify.h>
diff --git a/arch/sh/kernel/cpu/sh4/fpu.c b/arch/sh/kernel/cpu/sh4/fpu.c
index 69ab4d3c8d4149..95fd2dcb83da3b 100644
--- a/arch/sh/kernel/cpu/sh4/fpu.c
+++ b/arch/sh/kernel/cpu/sh4/fpu.c
@@ -10,8 +10,7 @@
  *
  * FIXME! These routines have not been tested for big endian case.
  */
-#include <linux/sched.h>
-#include <linux/signal.h>
+#include <linux/sched/signal.h>
 #include <linux/io.h>
 #include <cpu/fpu.h>
 #include <asm/processor.h>
diff --git a/drivers/net/tap.c b/drivers/net/tap.c
index 35b55a2fa1a154..4d4173d25dd0af 100644
--- a/drivers/net/tap.c
+++ b/drivers/net/tap.c
@@ -8,7 +8,7 @@
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/cache.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/wait.h>
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7934cd0acbc78d..c1586104d4c0b6 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -71,6 +71,9 @@ struct blk_plug;
 struct filename;
 struct nameidata;
 
+struct signal_struct;
+struct sighand_struct;
+
 extern unsigned long total_forks;
 extern int nr_threads;
 DECLARE_PER_CPU(unsigned long, process_counts);
@@ -361,13 +364,6 @@ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
 static inline void arch_pick_mmap_layout(struct mm_struct *mm) {}
 #endif
 
-struct sighand_struct {
-	atomic_t		count;
-	struct k_sigaction	action[_NSIG];
-	spinlock_t		siglock;
-	wait_queue_head_t	signalfd_wqh;
-};
-
 struct pacct_struct {
 	int			ac_flag;
 	long			ac_exitcode;
@@ -485,195 +481,6 @@ struct thread_group_cputimer {
 #include <linux/rwsem.h>
 struct autogroup;
 
-/*
- * NOTE! "signal_struct" does not have its own
- * locking, because a shared signal_struct always
- * implies a shared sighand_struct, so locking
- * sighand_struct is always a proper superset of
- * the locking of signal_struct.
- */
-struct signal_struct {
-	atomic_t		sigcnt;
-	atomic_t		live;
-	int			nr_threads;
-	struct list_head	thread_head;
-
-	wait_queue_head_t	wait_chldexit;	/* for wait4() */
-
-	/* current thread group signal load-balancing target: */
-	struct task_struct	*curr_target;
-
-	/* shared signal handling: */
-	struct sigpending	shared_pending;
-
-	/* thread group exit support */
-	int			group_exit_code;
-	/* overloaded:
-	 * - notify group_exit_task when ->count is equal to notify_count
-	 * - everyone except group_exit_task is stopped during signal delivery
-	 *   of fatal signals, group_exit_task processes the signal.
-	 */
-	int			notify_count;
-	struct task_struct	*group_exit_task;
-
-	/* thread group stop support, overloads group_exit_code too */
-	int			group_stop_count;
-	unsigned int		flags; /* see SIGNAL_* flags below */
-
-	/*
-	 * PR_SET_CHILD_SUBREAPER marks a process, like a service
-	 * manager, to re-parent orphan (double-forking) child processes
-	 * to this process instead of 'init'. The service manager is
-	 * able to receive SIGCHLD signals and is able to investigate
-	 * the process until it calls wait(). All children of this
-	 * process will inherit a flag if they should look for a
-	 * child_subreaper process at exit.
-	 */
-	unsigned int		is_child_subreaper:1;
-	unsigned int		has_child_subreaper:1;
-
-#ifdef CONFIG_POSIX_TIMERS
-
-	/* POSIX.1b Interval Timers */
-	int			posix_timer_id;
-	struct list_head	posix_timers;
-
-	/* ITIMER_REAL timer for the process */
-	struct hrtimer real_timer;
-	ktime_t it_real_incr;
-
-	/*
-	 * ITIMER_PROF and ITIMER_VIRTUAL timers for the process, we use
-	 * CPUCLOCK_PROF and CPUCLOCK_VIRT for indexing array as these
-	 * values are defined to 0 and 1 respectively
-	 */
-	struct cpu_itimer it[2];
-
-	/*
-	 * Thread group totals for process CPU timers.
-	 * See thread_group_cputimer(), et al, for details.
-	 */
-	struct thread_group_cputimer cputimer;
-
-	/* Earliest-expiration cache. */
-	struct task_cputime cputime_expires;
-
-	struct list_head cpu_timers[3];
-
-#endif
-
-	struct pid *leader_pid;
-
-#ifdef CONFIG_NO_HZ_FULL
-	atomic_t tick_dep_mask;
-#endif
-
-	struct pid *tty_old_pgrp;
-
-	/* boolean value for session group leader */
-	int leader;
-
-	struct tty_struct *tty; /* NULL if no tty */
-
-#ifdef CONFIG_SCHED_AUTOGROUP
-	struct autogroup *autogroup;
-#endif
-	/*
-	 * Cumulative resource counters for dead threads in the group,
-	 * and for reaped dead child processes forked by this group.
-	 * Live threads maintain their own counters and add to these
-	 * in __exit_signal, except for the group leader.
-	 */
-	seqlock_t stats_lock;
-	u64 utime, stime, cutime, cstime;
-	u64 gtime;
-	u64 cgtime;
-	struct prev_cputime prev_cputime;
-	unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
-	unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt;
-	unsigned long inblock, oublock, cinblock, coublock;
-	unsigned long maxrss, cmaxrss;
-	struct task_io_accounting ioac;
-
-	/*
-	 * Cumulative ns of schedule CPU time fo dead threads in the
-	 * group, not including a zombie group leader, (This only differs
-	 * from jiffies_to_ns(utime + stime) if sched_clock uses something
-	 * other than jiffies.)
-	 */
-	unsigned long long sum_sched_runtime;
-
-	/*
-	 * We don't bother to synchronize most readers of this at all,
-	 * because there is no reader checking a limit that actually needs
-	 * to get both rlim_cur and rlim_max atomically, and either one
-	 * alone is a single word that can safely be read normally.
-	 * getrlimit/setrlimit use task_lock(current->group_leader) to
-	 * protect this instead of the siglock, because they really
-	 * have no need to disable irqs.
-	 */
-	struct rlimit rlim[RLIM_NLIMITS];
-
-#ifdef CONFIG_BSD_PROCESS_ACCT
-	struct pacct_struct pacct;	/* per-process accounting information */
-#endif
-#ifdef CONFIG_TASKSTATS
-	struct taskstats *stats;
-#endif
-#ifdef CONFIG_AUDIT
-	unsigned audit_tty;
-	struct tty_audit_buf *tty_audit_buf;
-#endif
-
-	/*
-	 * Thread is the potential origin of an oom condition; kill first on
-	 * oom
-	 */
-	bool oom_flag_origin;
-	short oom_score_adj;		/* OOM kill score adjustment */
-	short oom_score_adj_min;	/* OOM kill score adjustment min value.
-					 * Only settable by CAP_SYS_RESOURCE. */
-	struct mm_struct *oom_mm;	/* recorded mm when the thread group got
-					 * killed by the oom killer */
-
-	struct mutex cred_guard_mutex;	/* guard against foreign influences on
-					 * credential calculations
-					 * (notably. ptrace) */
-};
-
-/*
- * Bits in flags field of signal_struct.
- */
-#define SIGNAL_STOP_STOPPED	0x00000001 /* job control stop in effect */
-#define SIGNAL_STOP_CONTINUED	0x00000002 /* SIGCONT since WCONTINUED reap */
-#define SIGNAL_GROUP_EXIT	0x00000004 /* group exit in progress */
-#define SIGNAL_GROUP_COREDUMP	0x00000008 /* coredump in progress */
-/*
- * Pending notifications to parent.
- */
-#define SIGNAL_CLD_STOPPED	0x00000010
-#define SIGNAL_CLD_CONTINUED	0x00000020
-#define SIGNAL_CLD_MASK		(SIGNAL_CLD_STOPPED|SIGNAL_CLD_CONTINUED)
-
-#define SIGNAL_UNKILLABLE	0x00000040 /* for init: ignore fatal signals */
-
-#define SIGNAL_STOP_MASK (SIGNAL_CLD_MASK | SIGNAL_STOP_STOPPED | \
-			  SIGNAL_STOP_CONTINUED)
-
-static inline void signal_set_stop_flags(struct signal_struct *sig,
-					 unsigned int flags)
-{
-	WARN_ON(sig->flags & (SIGNAL_GROUP_EXIT|SIGNAL_GROUP_COREDUMP));
-	sig->flags = (sig->flags & ~SIGNAL_STOP_MASK) | flags;
-}
-
-/* If true, all threads except ->group_exit_task have pending SIGKILL */
-static inline int signal_group_exit(const struct signal_struct *sig)
-{
-	return	(sig->flags & SIGNAL_GROUP_EXIT) ||
-		(sig->group_exit_task != NULL);
-}
-
 /*
  * Some day this will be a full-fledged user tracking system..
  */
@@ -2126,190 +1933,8 @@ extern int sched_fork(unsigned long clone_flags, struct task_struct *p);
 extern void sched_dead(struct task_struct *p);
 
 extern void proc_caches_init(void);
-extern void flush_signals(struct task_struct *);
-extern void ignore_signals(struct task_struct *);
-extern void flush_signal_handlers(struct task_struct *, int force_default);
-extern int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info);
-
-static inline int kernel_dequeue_signal(siginfo_t *info)
-{
-	struct task_struct *tsk = current;
-	siginfo_t __info;
-	int ret;
-
-	spin_lock_irq(&tsk->sighand->siglock);
-	ret = dequeue_signal(tsk, &tsk->blocked, info ?: &__info);
-	spin_unlock_irq(&tsk->sighand->siglock);
-
-	return ret;
-}
-
-static inline void kernel_signal_stop(void)
-{
-	spin_lock_irq(&current->sighand->siglock);
-	if (current->jobctl & JOBCTL_STOP_DEQUEUED)
-		__set_current_state(TASK_STOPPED);
-	spin_unlock_irq(&current->sighand->siglock);
-
-	schedule();
-}
 
 extern void release_task(struct task_struct * p);
-extern int send_sig_info(int, struct siginfo *, struct task_struct *);
-extern int force_sigsegv(int, struct task_struct *);
-extern int force_sig_info(int, struct siginfo *, struct task_struct *);
-extern int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp);
-extern int kill_pid_info(int sig, struct siginfo *info, struct pid *pid);
-extern int kill_pid_info_as_cred(int, struct siginfo *, struct pid *,
-				const struct cred *, u32);
-extern int kill_pgrp(struct pid *pid, int sig, int priv);
-extern int kill_pid(struct pid *pid, int sig, int priv);
-extern int kill_proc_info(int, struct siginfo *, pid_t);
-extern __must_check bool do_notify_parent(struct task_struct *, int);
-extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent);
-extern void force_sig(int, struct task_struct *);
-extern int send_sig(int, struct task_struct *, int);
-extern int zap_other_threads(struct task_struct *p);
-extern struct sigqueue *sigqueue_alloc(void);
-extern void sigqueue_free(struct sigqueue *);
-extern int send_sigqueue(struct sigqueue *,  struct task_struct *, int group);
-extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *);
-
-#ifdef TIF_RESTORE_SIGMASK
-/*
- * Legacy restore_sigmask accessors.  These are inefficient on
- * SMP architectures because they require atomic operations.
- */
-
-/**
- * set_restore_sigmask() - make sure saved_sigmask processing gets done
- *
- * This sets TIF_RESTORE_SIGMASK and ensures that the arch signal code
- * will run before returning to user mode, to process the flag.  For
- * all callers, TIF_SIGPENDING is already set or it's no harm to set
- * it.  TIF_RESTORE_SIGMASK need not be in the set of bits that the
- * arch code will notice on return to user mode, in case those bits
- * are scarce.  We set TIF_SIGPENDING here to ensure that the arch
- * signal code always gets run when TIF_RESTORE_SIGMASK is set.
- */
-static inline void set_restore_sigmask(void)
-{
-	set_thread_flag(TIF_RESTORE_SIGMASK);
-	WARN_ON(!test_thread_flag(TIF_SIGPENDING));
-}
-static inline void clear_restore_sigmask(void)
-{
-	clear_thread_flag(TIF_RESTORE_SIGMASK);
-}
-static inline bool test_restore_sigmask(void)
-{
-	return test_thread_flag(TIF_RESTORE_SIGMASK);
-}
-static inline bool test_and_clear_restore_sigmask(void)
-{
-	return test_and_clear_thread_flag(TIF_RESTORE_SIGMASK);
-}
-
-#else	/* TIF_RESTORE_SIGMASK */
-
-/* Higher-quality implementation, used if TIF_RESTORE_SIGMASK doesn't exist. */
-static inline void set_restore_sigmask(void)
-{
-	current->restore_sigmask = true;
-	WARN_ON(!test_thread_flag(TIF_SIGPENDING));
-}
-static inline void clear_restore_sigmask(void)
-{
-	current->restore_sigmask = false;
-}
-static inline bool test_restore_sigmask(void)
-{
-	return current->restore_sigmask;
-}
-static inline bool test_and_clear_restore_sigmask(void)
-{
-	if (!current->restore_sigmask)
-		return false;
-	current->restore_sigmask = false;
-	return true;
-}
-#endif
-
-static inline void restore_saved_sigmask(void)
-{
-	if (test_and_clear_restore_sigmask())
-		__set_current_blocked(&current->saved_sigmask);
-}
-
-static inline sigset_t *sigmask_to_save(void)
-{
-	sigset_t *res = &current->blocked;
-	if (unlikely(test_restore_sigmask()))
-		res = &current->saved_sigmask;
-	return res;
-}
-
-static inline int kill_cad_pid(int sig, int priv)
-{
-	return kill_pid(cad_pid, sig, priv);
-}
-
-/* These can be the second arg to send_sig_info/send_group_sig_info.  */
-#define SEND_SIG_NOINFO ((struct siginfo *) 0)
-#define SEND_SIG_PRIV	((struct siginfo *) 1)
-#define SEND_SIG_FORCED	((struct siginfo *) 2)
-
-/*
- * True if we are on the alternate signal stack.
- */
-static inline int on_sig_stack(unsigned long sp)
-{
-	/*
-	 * If the signal stack is SS_AUTODISARM then, by construction, we
-	 * can't be on the signal stack unless user code deliberately set
-	 * SS_AUTODISARM when we were already on it.
-	 *
-	 * This improves reliability: if user state gets corrupted such that
-	 * the stack pointer points very close to the end of the signal stack,
-	 * then this check will enable the signal to be handled anyway.
-	 */
-	if (current->sas_ss_flags & SS_AUTODISARM)
-		return 0;
-
-#ifdef CONFIG_STACK_GROWSUP
-	return sp >= current->sas_ss_sp &&
-		sp - current->sas_ss_sp < current->sas_ss_size;
-#else
-	return sp > current->sas_ss_sp &&
-		sp - current->sas_ss_sp <= current->sas_ss_size;
-#endif
-}
-
-static inline int sas_ss_flags(unsigned long sp)
-{
-	if (!current->sas_ss_size)
-		return SS_DISABLE;
-
-	return on_sig_stack(sp) ? SS_ONSTACK : 0;
-}
-
-static inline void sas_ss_reset(struct task_struct *p)
-{
-	p->sas_ss_sp = 0;
-	p->sas_ss_size = 0;
-	p->sas_ss_flags = SS_DISABLE;
-}
-
-static inline unsigned long sigsp(unsigned long sp, struct ksignal *ksig)
-{
-	if (unlikely((ksig->ka.sa.sa_flags & SA_ONSTACK)) && ! sas_ss_flags(sp))
-#ifdef CONFIG_STACK_GROWSUP
-		return current->sas_ss_sp;
-#else
-		return current->sas_ss_sp + current->sas_ss_size;
-#endif
-	return sp;
-}
 
 #ifdef CONFIG_HAVE_COPY_THREAD_TLS
 extern int copy_thread_tls(unsigned long, unsigned long, unsigned long,
@@ -2338,10 +1963,8 @@ static inline void exit_thread(struct task_struct *tsk)
 #endif
 
 extern void exit_files(struct task_struct *);
-extern void __cleanup_sighand(struct sighand_struct *);
 
 extern void exit_itimers(struct signal_struct *);
-extern void flush_itimer_signals(void);
 
 extern void do_group_exit(int);
 
@@ -2376,81 +1999,6 @@ static inline unsigned long wait_task_inactive(struct task_struct *p,
 }
 #endif
 
-#define tasklist_empty() \
-	list_empty(&init_task.tasks)
-
-#define next_task(p) \
-	list_entry_rcu((p)->tasks.next, struct task_struct, tasks)
-
-#define for_each_process(p) \
-	for (p = &init_task ; (p = next_task(p)) != &init_task ; )
-
-extern bool current_is_single_threaded(void);
-
-/*
- * Careful: do_each_thread/while_each_thread is a double loop so
- *          'break' will not work as expected - use goto instead.
- */
-#define do_each_thread(g, t) \
-	for (g = t = &init_task ; (g = t = next_task(g)) != &init_task ; ) do
-
-#define while_each_thread(g, t) \
-	while ((t = next_thread(t)) != g)
-
-#define __for_each_thread(signal, t)	\
-	list_for_each_entry_rcu(t, &(signal)->thread_head, thread_node)
-
-#define for_each_thread(p, t)		\
-	__for_each_thread((p)->signal, t)
-
-/* Careful: this is a double loop, 'break' won't work as expected. */
-#define for_each_process_thread(p, t)	\
-	for_each_process(p) for_each_thread(p, t)
-
-typedef int (*proc_visitor)(struct task_struct *p, void *data);
-void walk_process_tree(struct task_struct *top, proc_visitor, void *);
-
-static inline int get_nr_threads(struct task_struct *tsk)
-{
-	return tsk->signal->nr_threads;
-}
-
-static inline bool thread_group_leader(struct task_struct *p)
-{
-	return p->exit_signal >= 0;
-}
-
-/* Do to the insanities of de_thread it is possible for a process
- * to have the pid of the thread group leader without actually being
- * the thread group leader.  For iteration through the pids in proc
- * all we care about is that we have a task with the appropriate
- * pid, we don't actually care if we have the right task.
- */
-static inline bool has_group_leader_pid(struct task_struct *p)
-{
-	return task_pid(p) == p->signal->leader_pid;
-}
-
-static inline
-bool same_thread_group(struct task_struct *p1, struct task_struct *p2)
-{
-	return p1->signal == p2->signal;
-}
-
-static inline struct task_struct *next_thread(const struct task_struct *p)
-{
-	return list_entry_rcu(p->thread_group.next,
-			      struct task_struct, thread_group);
-}
-
-static inline int thread_group_empty(struct task_struct *p)
-{
-	return list_empty(&p->thread_group);
-}
-
-#define delay_group_leader(p) \
-		(thread_group_leader(p) && !thread_group_empty(p))
-
 /*
  * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring
  * subscriptions and synchronises with wait4().  Also used in procfs.  Also
@@ -2471,25 +2019,6 @@ static inline void task_unlock(struct task_struct *p)
 	spin_unlock(&p->alloc_lock);
 }
 
-extern struct sighand_struct *__lock_task_sighand(struct task_struct *tsk,
-							unsigned long *flags);
-
-static inline struct sighand_struct *lock_task_sighand(struct task_struct *tsk,
-						       unsigned long *flags)
-{
-	struct sighand_struct *ret;
-
-	ret = __lock_task_sighand(tsk, flags);
-	(void)__cond_lock(&tsk->sighand->siglock, ret);
-	return ret;
-}
-
-static inline void unlock_task_sighand(struct task_struct *tsk,
-						unsigned long *flags)
-{
-	spin_unlock_irqrestore(&tsk->sighand->siglock, *flags);
-}
-
 #ifdef CONFIG_THREAD_INFO_IN_TASK
 
 static inline struct thread_info *task_thread_info(struct task_struct *task)
@@ -2862,28 +2391,6 @@ static inline void mm_update_next_owner(struct mm_struct *mm)
 }
 #endif /* CONFIG_MEMCG */
 
-static inline unsigned long task_rlimit(const struct task_struct *tsk,
-		unsigned int limit)
-{
-	return READ_ONCE(tsk->signal->rlim[limit].rlim_cur);
-}
-
-static inline unsigned long task_rlimit_max(const struct task_struct *tsk,
-		unsigned int limit)
-{
-	return READ_ONCE(tsk->signal->rlim[limit].rlim_max);
-}
-
-static inline unsigned long rlimit(unsigned int limit)
-{
-	return task_rlimit(current, limit);
-}
-
-static inline unsigned long rlimit_max(unsigned int limit)
-{
-	return task_rlimit_max(current, limit);
-}
-
 #define SCHED_CPUFREQ_RT	(1U << 0)
 #define SCHED_CPUFREQ_DL	(1U << 1)
 #define SCHED_CPUFREQ_IOWAIT	(1U << 2)
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index c6958a53fef3e3..53fe5450f431f9 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -8,4 +8,506 @@
 #include <linux/sched/jobctl.h>
 #include <linux/sched/task.h>
 
+/*
+ * Types defining task->signal and task->sighand and APIs using them:
+ */
+
+struct sighand_struct {
+	atomic_t		count;
+	struct k_sigaction	action[_NSIG];
+	spinlock_t		siglock;
+	wait_queue_head_t	signalfd_wqh;
+};
+
+/*
+ * NOTE! "signal_struct" does not have its own
+ * locking, because a shared signal_struct always
+ * implies a shared sighand_struct, so locking
+ * sighand_struct is always a proper superset of
+ * the locking of signal_struct.
+ */
+struct signal_struct {
+	atomic_t		sigcnt;
+	atomic_t		live;
+	int			nr_threads;
+	struct list_head	thread_head;
+
+	wait_queue_head_t	wait_chldexit;	/* for wait4() */
+
+	/* current thread group signal load-balancing target: */
+	struct task_struct	*curr_target;
+
+	/* shared signal handling: */
+	struct sigpending	shared_pending;
+
+	/* thread group exit support */
+	int			group_exit_code;
+	/* overloaded:
+	 * - notify group_exit_task when ->count is equal to notify_count
+	 * - everyone except group_exit_task is stopped during signal delivery
+	 *   of fatal signals, group_exit_task processes the signal.
+	 */
+	int			notify_count;
+	struct task_struct	*group_exit_task;
+
+	/* thread group stop support, overloads group_exit_code too */
+	int			group_stop_count;
+	unsigned int		flags; /* see SIGNAL_* flags below */
+
+	/*
+	 * PR_SET_CHILD_SUBREAPER marks a process, like a service
+	 * manager, to re-parent orphan (double-forking) child processes
+	 * to this process instead of 'init'. The service manager is
+	 * able to receive SIGCHLD signals and is able to investigate
+	 * the process until it calls wait(). All children of this
+	 * process will inherit a flag if they should look for a
+	 * child_subreaper process at exit.
+	 */
+	unsigned int		is_child_subreaper:1;
+	unsigned int		has_child_subreaper:1;
+
+#ifdef CONFIG_POSIX_TIMERS
+
+	/* POSIX.1b Interval Timers */
+	int			posix_timer_id;
+	struct list_head	posix_timers;
+
+	/* ITIMER_REAL timer for the process */
+	struct hrtimer real_timer;
+	ktime_t it_real_incr;
+
+	/*
+	 * ITIMER_PROF and ITIMER_VIRTUAL timers for the process, we use
+	 * CPUCLOCK_PROF and CPUCLOCK_VIRT for indexing array as these
+	 * values are defined to 0 and 1 respectively
+	 */
+	struct cpu_itimer it[2];
+
+	/*
+	 * Thread group totals for process CPU timers.
+	 * See thread_group_cputimer(), et al, for details.
+	 */
+	struct thread_group_cputimer cputimer;
+
+	/* Earliest-expiration cache. */
+	struct task_cputime cputime_expires;
+
+	struct list_head cpu_timers[3];
+
+#endif
+
+	struct pid *leader_pid;
+
+#ifdef CONFIG_NO_HZ_FULL
+	atomic_t tick_dep_mask;
+#endif
+
+	struct pid *tty_old_pgrp;
+
+	/* boolean value for session group leader */
+	int leader;
+
+	struct tty_struct *tty; /* NULL if no tty */
+
+#ifdef CONFIG_SCHED_AUTOGROUP
+	struct autogroup *autogroup;
+#endif
+	/*
+	 * Cumulative resource counters for dead threads in the group,
+	 * and for reaped dead child processes forked by this group.
+	 * Live threads maintain their own counters and add to these
+	 * in __exit_signal, except for the group leader.
+	 */
+	seqlock_t stats_lock;
+	u64 utime, stime, cutime, cstime;
+	u64 gtime;
+	u64 cgtime;
+	struct prev_cputime prev_cputime;
+	unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
+	unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt;
+	unsigned long inblock, oublock, cinblock, coublock;
+	unsigned long maxrss, cmaxrss;
+	struct task_io_accounting ioac;
+
+	/*
+	 * Cumulative ns of schedule CPU time fo dead threads in the
+	 * group, not including a zombie group leader, (This only differs
+	 * from jiffies_to_ns(utime + stime) if sched_clock uses something
+	 * other than jiffies.)
+	 */
+	unsigned long long sum_sched_runtime;
+
+	/*
+	 * We don't bother to synchronize most readers of this at all,
+	 * because there is no reader checking a limit that actually needs
+	 * to get both rlim_cur and rlim_max atomically, and either one
+	 * alone is a single word that can safely be read normally.
+	 * getrlimit/setrlimit use task_lock(current->group_leader) to
+	 * protect this instead of the siglock, because they really
+	 * have no need to disable irqs.
+	 */
+	struct rlimit rlim[RLIM_NLIMITS];
+
+#ifdef CONFIG_BSD_PROCESS_ACCT
+	struct pacct_struct pacct;	/* per-process accounting information */
+#endif
+#ifdef CONFIG_TASKSTATS
+	struct taskstats *stats;
+#endif
+#ifdef CONFIG_AUDIT
+	unsigned audit_tty;
+	struct tty_audit_buf *tty_audit_buf;
+#endif
+
+	/*
+	 * Thread is the potential origin of an oom condition; kill first on
+	 * oom
+	 */
+	bool oom_flag_origin;
+	short oom_score_adj;		/* OOM kill score adjustment */
+	short oom_score_adj_min;	/* OOM kill score adjustment min value.
+					 * Only settable by CAP_SYS_RESOURCE. */
+	struct mm_struct *oom_mm;	/* recorded mm when the thread group got
+					 * killed by the oom killer */
+
+	struct mutex cred_guard_mutex;	/* guard against foreign influences on
+					 * credential calculations
+					 * (notably. ptrace) */
+};
+
+/*
+ * Bits in flags field of signal_struct.
+ */
+#define SIGNAL_STOP_STOPPED	0x00000001 /* job control stop in effect */
+#define SIGNAL_STOP_CONTINUED	0x00000002 /* SIGCONT since WCONTINUED reap */
+#define SIGNAL_GROUP_EXIT	0x00000004 /* group exit in progress */
+#define SIGNAL_GROUP_COREDUMP	0x00000008 /* coredump in progress */
+/*
+ * Pending notifications to parent.
+ */
+#define SIGNAL_CLD_STOPPED	0x00000010
+#define SIGNAL_CLD_CONTINUED	0x00000020
+#define SIGNAL_CLD_MASK		(SIGNAL_CLD_STOPPED|SIGNAL_CLD_CONTINUED)
+
+#define SIGNAL_UNKILLABLE	0x00000040 /* for init: ignore fatal signals */
+
+#define SIGNAL_STOP_MASK (SIGNAL_CLD_MASK | SIGNAL_STOP_STOPPED | \
+			  SIGNAL_STOP_CONTINUED)
+
+static inline void signal_set_stop_flags(struct signal_struct *sig,
+					 unsigned int flags)
+{
+	WARN_ON(sig->flags & (SIGNAL_GROUP_EXIT|SIGNAL_GROUP_COREDUMP));
+	sig->flags = (sig->flags & ~SIGNAL_STOP_MASK) | flags;
+}
+
+/* If true, all threads except ->group_exit_task have pending SIGKILL */
+static inline int signal_group_exit(const struct signal_struct *sig)
+{
+	return	(sig->flags & SIGNAL_GROUP_EXIT) ||
+		(sig->group_exit_task != NULL);
+}
+
+extern void flush_signals(struct task_struct *);
+extern void ignore_signals(struct task_struct *);
+extern void flush_signal_handlers(struct task_struct *, int force_default);
+extern int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info);
+
+static inline int kernel_dequeue_signal(siginfo_t *info)
+{
+	struct task_struct *tsk = current;
+	siginfo_t __info;
+	int ret;
+
+	spin_lock_irq(&tsk->sighand->siglock);
+	ret = dequeue_signal(tsk, &tsk->blocked, info ?: &__info);
+	spin_unlock_irq(&tsk->sighand->siglock);
+
+	return ret;
+}
+
+static inline void kernel_signal_stop(void)
+{
+	spin_lock_irq(&current->sighand->siglock);
+	if (current->jobctl & JOBCTL_STOP_DEQUEUED)
+		__set_current_state(TASK_STOPPED);
+	spin_unlock_irq(&current->sighand->siglock);
+
+	schedule();
+}
+extern int send_sig_info(int, struct siginfo *, struct task_struct *);
+extern int force_sigsegv(int, struct task_struct *);
+extern int force_sig_info(int, struct siginfo *, struct task_struct *);
+extern int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp);
+extern int kill_pid_info(int sig, struct siginfo *info, struct pid *pid);
+extern int kill_pid_info_as_cred(int, struct siginfo *, struct pid *,
+				const struct cred *, u32);
+extern int kill_pgrp(struct pid *pid, int sig, int priv);
+extern int kill_pid(struct pid *pid, int sig, int priv);
+extern int kill_proc_info(int, struct siginfo *, pid_t);
+extern __must_check bool do_notify_parent(struct task_struct *, int);
+extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent);
+extern void force_sig(int, struct task_struct *);
+extern int send_sig(int, struct task_struct *, int);
+extern int zap_other_threads(struct task_struct *p);
+extern struct sigqueue *sigqueue_alloc(void);
+extern void sigqueue_free(struct sigqueue *);
+extern int send_sigqueue(struct sigqueue *,  struct task_struct *, int group);
+extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *);
+
+#ifdef TIF_RESTORE_SIGMASK
+/*
+ * Legacy restore_sigmask accessors.  These are inefficient on
+ * SMP architectures because they require atomic operations.
+ */
+
+/**
+ * set_restore_sigmask() - make sure saved_sigmask processing gets done
+ *
+ * This sets TIF_RESTORE_SIGMASK and ensures that the arch signal code
+ * will run before returning to user mode, to process the flag.  For
+ * all callers, TIF_SIGPENDING is already set or it's no harm to set
+ * it.  TIF_RESTORE_SIGMASK need not be in the set of bits that the
+ * arch code will notice on return to user mode, in case those bits
+ * are scarce.  We set TIF_SIGPENDING here to ensure that the arch
+ * signal code always gets run when TIF_RESTORE_SIGMASK is set.
+ */
+static inline void set_restore_sigmask(void)
+{
+	set_thread_flag(TIF_RESTORE_SIGMASK);
+	WARN_ON(!test_thread_flag(TIF_SIGPENDING));
+}
+static inline void clear_restore_sigmask(void)
+{
+	clear_thread_flag(TIF_RESTORE_SIGMASK);
+}
+static inline bool test_restore_sigmask(void)
+{
+	return test_thread_flag(TIF_RESTORE_SIGMASK);
+}
+static inline bool test_and_clear_restore_sigmask(void)
+{
+	return test_and_clear_thread_flag(TIF_RESTORE_SIGMASK);
+}
+
+#else	/* TIF_RESTORE_SIGMASK */
+
+/* Higher-quality implementation, used if TIF_RESTORE_SIGMASK doesn't exist. */
+static inline void set_restore_sigmask(void)
+{
+	current->restore_sigmask = true;
+	WARN_ON(!test_thread_flag(TIF_SIGPENDING));
+}
+static inline void clear_restore_sigmask(void)
+{
+	current->restore_sigmask = false;
+}
+static inline bool test_restore_sigmask(void)
+{
+	return current->restore_sigmask;
+}
+static inline bool test_and_clear_restore_sigmask(void)
+{
+	if (!current->restore_sigmask)
+		return false;
+	current->restore_sigmask = false;
+	return true;
+}
+#endif
+
+static inline void restore_saved_sigmask(void)
+{
+	if (test_and_clear_restore_sigmask())
+		__set_current_blocked(&current->saved_sigmask);
+}
+
+static inline sigset_t *sigmask_to_save(void)
+{
+	sigset_t *res = &current->blocked;
+	if (unlikely(test_restore_sigmask()))
+		res = &current->saved_sigmask;
+	return res;
+}
+
+static inline int kill_cad_pid(int sig, int priv)
+{
+	return kill_pid(cad_pid, sig, priv);
+}
+
+/* These can be the second arg to send_sig_info/send_group_sig_info.  */
+#define SEND_SIG_NOINFO ((struct siginfo *) 0)
+#define SEND_SIG_PRIV	((struct siginfo *) 1)
+#define SEND_SIG_FORCED	((struct siginfo *) 2)
+
+/*
+ * True if we are on the alternate signal stack.
+ */
+static inline int on_sig_stack(unsigned long sp)
+{
+	/*
+	 * If the signal stack is SS_AUTODISARM then, by construction, we
+	 * can't be on the signal stack unless user code deliberately set
+	 * SS_AUTODISARM when we were already on it.
+	 *
+	 * This improves reliability: if user state gets corrupted such that
+	 * the stack pointer points very close to the end of the signal stack,
+	 * then this check will enable the signal to be handled anyway.
+	 */
+	if (current->sas_ss_flags & SS_AUTODISARM)
+		return 0;
+
+#ifdef CONFIG_STACK_GROWSUP
+	return sp >= current->sas_ss_sp &&
+		sp - current->sas_ss_sp < current->sas_ss_size;
+#else
+	return sp > current->sas_ss_sp &&
+		sp - current->sas_ss_sp <= current->sas_ss_size;
+#endif
+}
+
+static inline int sas_ss_flags(unsigned long sp)
+{
+	if (!current->sas_ss_size)
+		return SS_DISABLE;
+
+	return on_sig_stack(sp) ? SS_ONSTACK : 0;
+}
+
+static inline void sas_ss_reset(struct task_struct *p)
+{
+	p->sas_ss_sp = 0;
+	p->sas_ss_size = 0;
+	p->sas_ss_flags = SS_DISABLE;
+}
+
+static inline unsigned long sigsp(unsigned long sp, struct ksignal *ksig)
+{
+	if (unlikely((ksig->ka.sa.sa_flags & SA_ONSTACK)) && ! sas_ss_flags(sp))
+#ifdef CONFIG_STACK_GROWSUP
+		return current->sas_ss_sp;
+#else
+		return current->sas_ss_sp + current->sas_ss_size;
+#endif
+	return sp;
+}
+
+extern void __cleanup_sighand(struct sighand_struct *);
+extern void flush_itimer_signals(void);
+
+#define tasklist_empty() \
+	list_empty(&init_task.tasks)
+
+#define next_task(p) \
+	list_entry_rcu((p)->tasks.next, struct task_struct, tasks)
+
+#define for_each_process(p) \
+	for (p = &init_task ; (p = next_task(p)) != &init_task ; )
+
+extern bool current_is_single_threaded(void);
+
+/*
+ * Careful: do_each_thread/while_each_thread is a double loop so
+ *          'break' will not work as expected - use goto instead.
+ */
+#define do_each_thread(g, t) \
+	for (g = t = &init_task ; (g = t = next_task(g)) != &init_task ; ) do
+
+#define while_each_thread(g, t) \
+	while ((t = next_thread(t)) != g)
+
+#define __for_each_thread(signal, t)	\
+	list_for_each_entry_rcu(t, &(signal)->thread_head, thread_node)
+
+#define for_each_thread(p, t)		\
+	__for_each_thread((p)->signal, t)
+
+/* Careful: this is a double loop, 'break' won't work as expected. */
+#define for_each_process_thread(p, t)	\
+	for_each_process(p) for_each_thread(p, t)
+
+typedef int (*proc_visitor)(struct task_struct *p, void *data);
+void walk_process_tree(struct task_struct *top, proc_visitor, void *);
+
+static inline int get_nr_threads(struct task_struct *tsk)
+{
+	return tsk->signal->nr_threads;
+}
+
+static inline bool thread_group_leader(struct task_struct *p)
+{
+	return p->exit_signal >= 0;
+}
+
+/* Do to the insanities of de_thread it is possible for a process
+ * to have the pid of the thread group leader without actually being
+ * the thread group leader.  For iteration through the pids in proc
+ * all we care about is that we have a task with the appropriate
+ * pid, we don't actually care if we have the right task.
+ */
+static inline bool has_group_leader_pid(struct task_struct *p)
+{
+	return task_pid(p) == p->signal->leader_pid;
+}
+
+static inline
+bool same_thread_group(struct task_struct *p1, struct task_struct *p2)
+{
+	return p1->signal == p2->signal;
+}
+
+static inline struct task_struct *next_thread(const struct task_struct *p)
+{
+	return list_entry_rcu(p->thread_group.next,
+			      struct task_struct, thread_group);
+}
+
+static inline int thread_group_empty(struct task_struct *p)
+{
+	return list_empty(&p->thread_group);
+}
+
+#define delay_group_leader(p) \
+		(thread_group_leader(p) && !thread_group_empty(p))
+
+extern struct sighand_struct *__lock_task_sighand(struct task_struct *tsk,
+							unsigned long *flags);
+
+static inline struct sighand_struct *lock_task_sighand(struct task_struct *tsk,
+						       unsigned long *flags)
+{
+	struct sighand_struct *ret;
+
+	ret = __lock_task_sighand(tsk, flags);
+	(void)__cond_lock(&tsk->sighand->siglock, ret);
+	return ret;
+}
+
+static inline void unlock_task_sighand(struct task_struct *tsk,
+						unsigned long *flags)
+{
+	spin_unlock_irqrestore(&tsk->sighand->siglock, *flags);
+}
+
+static inline unsigned long task_rlimit(const struct task_struct *tsk,
+		unsigned int limit)
+{
+	return READ_ONCE(tsk->signal->rlim[limit].rlim_cur);
+}
+
+static inline unsigned long task_rlimit_max(const struct task_struct *tsk,
+		unsigned int limit)
+{
+	return READ_ONCE(tsk->signal->rlim[limit].rlim_max);
+}
+
+static inline unsigned long rlimit(unsigned int limit)
+{
+	return task_rlimit(current, limit);
+}
+
+static inline unsigned long rlimit_max(unsigned int limit)
+{
+	return task_rlimit_max(current, limit);
+}
+
 #endif /* _LINUX_SCHED_SIGNAL_H */
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index fc34bcf2329f4d..08d2cb605101b4 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -5,6 +5,7 @@
 #include <linux/sort.h>
 #include <linux/delay.h>
 #include <linux/mm.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/delayacct.h>
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index be93949b488599..b4024d688f3869 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -12,7 +12,7 @@
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/highmem.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 5d4208ad029e27..85837ab90e8916 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -27,6 +27,8 @@
 #include <linux/inetdevice.h>
 #include <linux/workqueue.h>
 #include <linux/in.h>
+#include <linux/sched/signal.h>
+
 #include <net/sock.h>
 #include <net/tcp.h>
 #include <net/smc.h>
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index cc6b6f8651ebc0..e41f594a1e1d0c 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -11,6 +11,8 @@
 
 #include <linux/in.h>
 #include <linux/if_ether.h>
+#include <linux/sched/signal.h>
+
 #include <net/sock.h>
 #include <net/tcp.h>
 
diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c
index 03dfcc6b76614a..67a71d170bedb4 100644
--- a/net/smc/smc_close.c
+++ b/net/smc/smc_close.c
@@ -9,6 +9,8 @@
  */
 
 #include <linux/workqueue.h>
+#include <linux/sched/signal.h>
+
 #include <net/sock.h>
 
 #include "smc.h"
diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c
index 5d1878732f4647..c4ef9a4ec56971 100644
--- a/net/smc/smc_rx.c
+++ b/net/smc/smc_rx.c
@@ -11,6 +11,8 @@
 
 #include <linux/net.h>
 #include <linux/rcupdate.h>
+#include <linux/sched/signal.h>
+
 #include <net/sock.h>
 
 #include "smc.h"
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index 6e73b28915ea6d..69a0013dd25cec 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -15,6 +15,8 @@
 #include <linux/net.h>
 #include <linux/rcupdate.h>
 #include <linux/workqueue.h>
+#include <linux/sched/signal.h>
+
 #include <net/sock.h>
 
 #include "smc.h"

From bcbb6a5bf7df6e37ba652d1f426eab042ec4f56b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 10:22:42 +0100
Subject: [PATCH 0620/1911] sched/headers: Move 'struct user_struct' definition
 and APIs to the new <linux/sched/user.h> header

'struct user_struct' was added to sched.h historically, but it's actually
entirely independent of task_struct and of scheduler details, so move
it to its own header.

Fix up .c files using those facilities.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h      | 53 -------------------------------------
 include/linux/sched/user.h | 54 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 54 insertions(+), 53 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index c1586104d4c0b6..71efbcafaa31e7 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -349,7 +349,6 @@ extern void io_schedule(void);
 void __noreturn do_task_dead(void);
 
 struct nsproxy;
-struct user_namespace;
 
 #ifdef CONFIG_MMU
 extern void arch_pick_mmap_layout(struct mm_struct *mm);
@@ -481,49 +480,6 @@ struct thread_group_cputimer {
 #include <linux/rwsem.h>
 struct autogroup;
 
-/*
- * Some day this will be a full-fledged user tracking system..
- */
-struct user_struct {
-	atomic_t __count;	/* reference count */
-	atomic_t processes;	/* How many processes does this user have? */
-	atomic_t sigpending;	/* How many pending signals does this user have? */
-#ifdef CONFIG_FANOTIFY
-	atomic_t fanotify_listeners;
-#endif
-#ifdef CONFIG_EPOLL
-	atomic_long_t epoll_watches; /* The number of file descriptors currently watched */
-#endif
-#ifdef CONFIG_POSIX_MQUEUE
-	/* protected by mq_lock	*/
-	unsigned long mq_bytes;	/* How many bytes can be allocated to mqueue? */
-#endif
-	unsigned long locked_shm; /* How many pages of mlocked shm ? */
-	unsigned long unix_inflight;	/* How many files in flight in unix sockets */
-	atomic_long_t pipe_bufs;  /* how many pages are allocated in pipe buffers */
-
-#ifdef CONFIG_KEYS
-	struct key *uid_keyring;	/* UID specific keyring */
-	struct key *session_keyring;	/* UID's default session keyring */
-#endif
-
-	/* Hash table maintenance information */
-	struct hlist_node uidhash_node;
-	kuid_t uid;
-
-#if defined(CONFIG_PERF_EVENTS) || defined(CONFIG_BPF_SYSCALL)
-	atomic_long_t locked_vm;
-#endif
-};
-
-extern int uids_sysfs_init(void);
-
-extern struct user_struct *find_user(kuid_t);
-
-extern struct user_struct root_user;
-#define INIT_USER (&root_user)
-
-
 struct backing_dev_info;
 struct reclaim_state;
 
@@ -1908,15 +1864,6 @@ extern struct task_struct *find_task_by_vpid(pid_t nr);
 extern struct task_struct *find_task_by_pid_ns(pid_t nr,
 		struct pid_namespace *ns);
 
-/* per-UID process charging. */
-extern struct user_struct * alloc_uid(kuid_t);
-static inline struct user_struct *get_uid(struct user_struct *u)
-{
-	atomic_inc(&u->__count);
-	return u;
-}
-extern void free_uid(struct user_struct *);
-
 #include <asm/current.h>
 
 extern void xtime_update(unsigned long ticks);
diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h
index 83238e5ddadda0..40c6363da5ef74 100644
--- a/include/linux/sched/user.h
+++ b/include/linux/sched/user.h
@@ -3,4 +3,58 @@
 
 #include <linux/sched.h>
 
+struct key;
+
+/*
+ * Some day this will be a full-fledged user tracking system..
+ */
+struct user_struct {
+	atomic_t __count;	/* reference count */
+	atomic_t processes;	/* How many processes does this user have? */
+	atomic_t sigpending;	/* How many pending signals does this user have? */
+#ifdef CONFIG_FANOTIFY
+	atomic_t fanotify_listeners;
+#endif
+#ifdef CONFIG_EPOLL
+	atomic_long_t epoll_watches; /* The number of file descriptors currently watched */
+#endif
+#ifdef CONFIG_POSIX_MQUEUE
+	/* protected by mq_lock	*/
+	unsigned long mq_bytes;	/* How many bytes can be allocated to mqueue? */
+#endif
+	unsigned long locked_shm; /* How many pages of mlocked shm ? */
+	unsigned long unix_inflight;	/* How many files in flight in unix sockets */
+	atomic_long_t pipe_bufs;  /* how many pages are allocated in pipe buffers */
+
+#ifdef CONFIG_KEYS
+	struct key *uid_keyring;	/* UID specific keyring */
+	struct key *session_keyring;	/* UID's default session keyring */
+#endif
+
+	/* Hash table maintenance information */
+	struct hlist_node uidhash_node;
+	kuid_t uid;
+
+#if defined(CONFIG_PERF_EVENTS) || defined(CONFIG_BPF_SYSCALL)
+	atomic_long_t locked_vm;
+#endif
+};
+
+extern int uids_sysfs_init(void);
+
+extern struct user_struct *find_user(kuid_t);
+
+extern struct user_struct root_user;
+#define INIT_USER (&root_user)
+
+
+/* per-UID process charging. */
+extern struct user_struct * alloc_uid(kuid_t);
+static inline struct user_struct *get_uid(struct user_struct *u)
+{
+	atomic_inc(&u->__count);
+	return u;
+}
+extern void free_uid(struct user_struct *);
+
 #endif /* _LINUX_SCHED_USER_H */

From d151b27d3f86589308cd8c891fdfd2db5f8e80d6 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 11:17:23 +0100
Subject: [PATCH 0621/1911] sched/headers: Move softlockup detector watchdog
 methods to <linux/nmi.h>

These methods don't belong into <linux/sched.h>, they are neither directly
related to task_struct or are scheduler functionality.

Put them next to the other watchdog methods in <linux/nmi.h>.

( Arguably that header's name is a misnomer, and this patch makes it
  more so - but it should be renamed in another patch. )

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/nmi.h   | 37 +++++++++++++++++++++++++++++++++++++
 include/linux/sched.h | 37 -------------------------------------
 2 files changed, 37 insertions(+), 37 deletions(-)

diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 0a3fadc32693a9..aa3cd087827038 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -7,6 +7,43 @@
 #include <linux/sched.h>
 #include <asm/irq.h>
 
+#ifdef CONFIG_LOCKUP_DETECTOR
+extern void touch_softlockup_watchdog_sched(void);
+extern void touch_softlockup_watchdog(void);
+extern void touch_softlockup_watchdog_sync(void);
+extern void touch_all_softlockup_watchdogs(void);
+extern int proc_dowatchdog_thresh(struct ctl_table *table, int write,
+				  void __user *buffer,
+				  size_t *lenp, loff_t *ppos);
+extern unsigned int  softlockup_panic;
+extern unsigned int  hardlockup_panic;
+void lockup_detector_init(void);
+#else
+static inline void touch_softlockup_watchdog_sched(void)
+{
+}
+static inline void touch_softlockup_watchdog(void)
+{
+}
+static inline void touch_softlockup_watchdog_sync(void)
+{
+}
+static inline void touch_all_softlockup_watchdogs(void)
+{
+}
+static inline void lockup_detector_init(void)
+{
+}
+#endif
+
+#ifdef CONFIG_DETECT_HUNG_TASK
+void reset_hung_task_detector(void);
+#else
+static inline void reset_hung_task_detector(void)
+{
+}
+#endif
+
 /*
  * The run state of the lockup detectors is controlled by the content of the
  * 'watchdog_enabled' variable. Each lockup detector has its dedicated bit -
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 71efbcafaa31e7..8a1d296c53a0f4 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -286,43 +286,6 @@ extern int sched_cpu_dying(unsigned int cpu);
 
 extern void sched_show_task(struct task_struct *p);
 
-#ifdef CONFIG_LOCKUP_DETECTOR
-extern void touch_softlockup_watchdog_sched(void);
-extern void touch_softlockup_watchdog(void);
-extern void touch_softlockup_watchdog_sync(void);
-extern void touch_all_softlockup_watchdogs(void);
-extern int proc_dowatchdog_thresh(struct ctl_table *table, int write,
-				  void __user *buffer,
-				  size_t *lenp, loff_t *ppos);
-extern unsigned int  softlockup_panic;
-extern unsigned int  hardlockup_panic;
-void lockup_detector_init(void);
-#else
-static inline void touch_softlockup_watchdog_sched(void)
-{
-}
-static inline void touch_softlockup_watchdog(void)
-{
-}
-static inline void touch_softlockup_watchdog_sync(void)
-{
-}
-static inline void touch_all_softlockup_watchdogs(void)
-{
-}
-static inline void lockup_detector_init(void)
-{
-}
-#endif
-
-#ifdef CONFIG_DETECT_HUNG_TASK
-void reset_hung_task_detector(void);
-#else
-static inline void reset_hung_task_detector(void)
-{
-}
-#endif
-
 /* Attach to any functions which should be ignored in wchan output. */
 #define __sched		__attribute__((__section__(".sched.text")))
 

From 8d88460edc05224b8d7ae3372173153876a02825 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 12:06:10 +0100
Subject: [PATCH 0622/1911] sched/headers: Move 'struct pacct_struct' and
 'struct cpu_itimer' form <linux/sched.h> to <linux/sched/signal.h>

These structures are actually part of 'struct signal', so move them to <linux/sched/signal.h>
where they belong.

This further decreases the size and complexity of <linux/sched.h>.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h        | 13 -------------
 include/linux/sched/signal.h | 16 ++++++++++++++++
 2 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8a1d296c53a0f4..8bf111efe98e41 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -326,19 +326,6 @@ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
 static inline void arch_pick_mmap_layout(struct mm_struct *mm) {}
 #endif
 
-struct pacct_struct {
-	int			ac_flag;
-	long			ac_exitcode;
-	unsigned long		ac_mem;
-	u64			ac_utime, ac_stime;
-	unsigned long		ac_minflt, ac_majflt;
-};
-
-struct cpu_itimer {
-	u64 expires;
-	u64 incr;
-};
-
 /**
  * struct prev_cputime - snaphsot of system and user cputime
  * @utime: time spent in user mode
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 53fe5450f431f9..30e7ceed3ef62d 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -19,6 +19,22 @@ struct sighand_struct {
 	wait_queue_head_t	signalfd_wqh;
 };
 
+/*
+ * Per-process accounting stats:
+ */
+struct pacct_struct {
+	int			ac_flag;
+	long			ac_exitcode;
+	unsigned long		ac_mem;
+	u64			ac_utime, ac_stime;
+	unsigned long		ac_minflt, ac_majflt;
+};
+
+struct cpu_itimer {
+	u64 expires;
+	u64 incr;
+};
+
 /*
  * NOTE! "signal_struct" does not have its own
  * locking, because a shared signal_struct always

From 7284c6d419b5a6ae9806927f1fd4f0cfd19a16f5 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 12:14:19 +0100
Subject: [PATCH 0623/1911] sched/headers: Move the cpufreq interfaces to
 <linux/sched/cpufreq.h>

No need to have this in the generic <linux/sched.h> header.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h         | 17 -----------------
 include/linux/sched/cpufreq.h | 21 +++++++++++++++++++++
 2 files changed, 21 insertions(+), 17 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8bf111efe98e41..aae79706ec4f47 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2288,21 +2288,4 @@ static inline void mm_update_next_owner(struct mm_struct *mm)
 }
 #endif /* CONFIG_MEMCG */
 
-#define SCHED_CPUFREQ_RT	(1U << 0)
-#define SCHED_CPUFREQ_DL	(1U << 1)
-#define SCHED_CPUFREQ_IOWAIT	(1U << 2)
-
-#define SCHED_CPUFREQ_RT_DL	(SCHED_CPUFREQ_RT | SCHED_CPUFREQ_DL)
-
-#ifdef CONFIG_CPU_FREQ
-struct update_util_data {
-       void (*func)(struct update_util_data *data, u64 time, unsigned int flags);
-};
-
-void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data,
-                       void (*func)(struct update_util_data *data, u64 time,
-				    unsigned int flags));
-void cpufreq_remove_update_util_hook(int cpu);
-#endif /* CONFIG_CPU_FREQ */
-
 #endif
diff --git a/include/linux/sched/cpufreq.h b/include/linux/sched/cpufreq.h
index 07ed9664fa201e..2bdcfbfc4c30ec 100644
--- a/include/linux/sched/cpufreq.h
+++ b/include/linux/sched/cpufreq.h
@@ -3,4 +3,25 @@
 
 #include <linux/sched.h>
 
+/*
+ * Interface between cpufreq drivers and the scheduler:
+ */
+
+#define SCHED_CPUFREQ_RT	(1U << 0)
+#define SCHED_CPUFREQ_DL	(1U << 1)
+#define SCHED_CPUFREQ_IOWAIT	(1U << 2)
+
+#define SCHED_CPUFREQ_RT_DL	(SCHED_CPUFREQ_RT | SCHED_CPUFREQ_DL)
+
+#ifdef CONFIG_CPU_FREQ
+struct update_util_data {
+       void (*func)(struct update_util_data *data, u64 time, unsigned int flags);
+};
+
+void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data,
+                       void (*func)(struct update_util_data *data, u64 time,
+				    unsigned int flags));
+void cpufreq_remove_update_util_hook(int cpu);
+#endif /* CONFIG_CPU_FREQ */
+
 #endif /* _LINUX_SCHED_CPUFREQ_H */

From 4240c8bf877f1145571106a2934c5cea0b51b178 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 12:18:24 +0100
Subject: [PATCH 0624/1911] sched/headers: Move more mm_struct related
 functionality from <linux/sched.h> to <linux/sched/mm.h>

Neither the mmap_layout nor the mm_update_next_owner() methods need to be
in <linux/sched.h> - move them to the more appropriate <linux/sched/mm.h> header.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h    | 21 ---------------------
 include/linux/sched/mm.h | 21 +++++++++++++++++++++
 2 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index aae79706ec4f47..1ddb82be6b5024 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -313,19 +313,6 @@ void __noreturn do_task_dead(void);
 
 struct nsproxy;
 
-#ifdef CONFIG_MMU
-extern void arch_pick_mmap_layout(struct mm_struct *mm);
-extern unsigned long
-arch_get_unmapped_area(struct file *, unsigned long, unsigned long,
-		       unsigned long, unsigned long);
-extern unsigned long
-arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
-			  unsigned long len, unsigned long pgoff,
-			  unsigned long flags);
-#else
-static inline void arch_pick_mmap_layout(struct mm_struct *mm) {}
-#endif
-
 /**
  * struct prev_cputime - snaphsot of system and user cputime
  * @utime: time spent in user mode
@@ -2280,12 +2267,4 @@ static inline void inc_syscw(struct task_struct *tsk)
 #define TASK_SIZE_OF(tsk)	TASK_SIZE
 #endif
 
-#ifdef CONFIG_MEMCG
-extern void mm_update_next_owner(struct mm_struct *mm);
-#else
-static inline void mm_update_next_owner(struct mm_struct *mm)
-{
-}
-#endif /* CONFIG_MEMCG */
-
 #endif
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index be1ae55f5ab979..93fad9f0f46627 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -100,4 +100,25 @@ extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode);
 /* Remove the current tasks stale references to the old mm_struct */
 extern void mm_release(struct task_struct *, struct mm_struct *);
 
+#ifdef CONFIG_MEMCG
+extern void mm_update_next_owner(struct mm_struct *mm);
+#else
+static inline void mm_update_next_owner(struct mm_struct *mm)
+{
+}
+#endif /* CONFIG_MEMCG */
+
+#ifdef CONFIG_MMU
+extern void arch_pick_mmap_layout(struct mm_struct *mm);
+extern unsigned long
+arch_get_unmapped_area(struct file *, unsigned long, unsigned long,
+		       unsigned long, unsigned long);
+extern unsigned long
+arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
+			  unsigned long len, unsigned long pgoff,
+			  unsigned long flags);
+#else
+static inline void arch_pick_mmap_layout(struct mm_struct *mm) {}
+#endif
+
 #endif /* _LINUX_SCHED_MM_H */

From abe722a1c59728c6c0ea4e4d5efcfe397c8abebc Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 12:27:56 +0100
Subject: [PATCH 0625/1911] sched/headers: Move the 'init_mm' declaration from
 <linux/sched.h> to <linux/mm_types.h>

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/mm_types.h | 2 ++
 include/linux/sched.h    | 2 --
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 6daaf0a519683a..28bc710d3467a4 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -546,6 +546,8 @@ struct mm_struct {
 	struct work_struct async_put_work;
 };
 
+extern struct mm_struct init_mm;
+
 static inline void mm_init_cpumask(struct mm_struct *mm)
 {
 #ifdef CONFIG_CPUMASK_OFFSTACK
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1ddb82be6b5024..253d8ab6256c76 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1782,8 +1782,6 @@ static inline int kstack_end(void *addr)
 extern union thread_union init_thread_union;
 extern struct task_struct init_task;
 
-extern struct   mm_struct init_mm;
-
 extern struct pid_namespace init_pid_ns;
 
 /*

From d026ce796cbca3c49678a68bb4a39fb4b9cf8192 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 12:32:21 +0100
Subject: [PATCH 0626/1911] sched/headers: Move in_vfork() from <linux/sched.h>
 to <linux/sched/mm.h>

The in_vfork() function deals with task->mm, so it better belongs
into <linux/sched/mm.h>.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h    | 26 --------------------------
 include/linux/sched/mm.h | 26 ++++++++++++++++++++++++++
 2 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 253d8ab6256c76..aa60812b4b7a64 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1242,32 +1242,6 @@ static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t)
 #define TNF_FAULT_LOCAL	0x08
 #define TNF_MIGRATE_FAIL 0x10
 
-static inline bool in_vfork(struct task_struct *tsk)
-{
-	bool ret;
-
-	/*
-	 * need RCU to access ->real_parent if CLONE_VM was used along with
-	 * CLONE_PARENT.
-	 *
-	 * We check real_parent->mm == tsk->mm because CLONE_VFORK does not
-	 * imply CLONE_VM
-	 *
-	 * CLONE_VFORK can be used with CLONE_PARENT/CLONE_THREAD and thus
-	 * ->real_parent is not necessarily the task doing vfork(), so in
-	 * theory we can't rely on task_lock() if we want to dereference it.
-	 *
-	 * And in this case we can't trust the real_parent->mm == tsk->mm
-	 * check, it can be false negative. But we do not care, if init or
-	 * another oom-unkillable task does this it should blame itself.
-	 */
-	rcu_read_lock();
-	ret = tsk->vfork_done && tsk->real_parent->mm == tsk->mm;
-	rcu_read_unlock();
-
-	return ret;
-}
-
 #ifdef CONFIG_NUMA_BALANCING
 extern void task_numa_fault(int last_node, int node, int pages, int flags);
 extern pid_t task_numa_group_id(struct task_struct *p);
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 93fad9f0f46627..64d83fa8d93aa9 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -121,4 +121,30 @@ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
 static inline void arch_pick_mmap_layout(struct mm_struct *mm) {}
 #endif
 
+static inline bool in_vfork(struct task_struct *tsk)
+{
+	bool ret;
+
+	/*
+	 * need RCU to access ->real_parent if CLONE_VM was used along with
+	 * CLONE_PARENT.
+	 *
+	 * We check real_parent->mm == tsk->mm because CLONE_VFORK does not
+	 * imply CLONE_VM
+	 *
+	 * CLONE_VFORK can be used with CLONE_PARENT/CLONE_THREAD and thus
+	 * ->real_parent is not necessarily the task doing vfork(), so in
+	 * theory we can't rely on task_lock() if we want to dereference it.
+	 *
+	 * And in this case we can't trust the real_parent->mm == tsk->mm
+	 * check, it can be false negative. But we do not care, if init or
+	 * another oom-unkillable task does this it should blame itself.
+	 */
+	rcu_read_lock();
+	ret = tsk->vfork_done && tsk->real_parent->mm == tsk->mm;
+	rcu_read_unlock();
+
+	return ret;
+}
+
 #endif /* _LINUX_SCHED_MM_H */

From 5647028d550c959577527cec9c0f29fbcea029ea Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 12:39:17 +0100
Subject: [PATCH 0627/1911] sched/headers: Move the NUMA balancing interfaces
 from <linux/sched.h> to <linux/sched/numa_balancing.h>

Split out the interface between the scheduler and the MM which
deals with page fault driven NUMA balancing, into the new
<linux/sched/numa_balancing.h> header.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h                | 35 ------------------------
 include/linux/sched/numa_balancing.h | 40 ++++++++++++++++++++++++++++
 2 files changed, 40 insertions(+), 35 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index aa60812b4b7a64..fcaea1e7b08a1d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1236,41 +1236,6 @@ static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t)
 }
 #endif
 
-#define TNF_MIGRATED	0x01
-#define TNF_NO_GROUP	0x02
-#define TNF_SHARED	0x04
-#define TNF_FAULT_LOCAL	0x08
-#define TNF_MIGRATE_FAIL 0x10
-
-#ifdef CONFIG_NUMA_BALANCING
-extern void task_numa_fault(int last_node, int node, int pages, int flags);
-extern pid_t task_numa_group_id(struct task_struct *p);
-extern void set_numabalancing_state(bool enabled);
-extern void task_numa_free(struct task_struct *p);
-extern bool should_numa_migrate_memory(struct task_struct *p, struct page *page,
-					int src_nid, int dst_cpu);
-#else
-static inline void task_numa_fault(int last_node, int node, int pages,
-				   int flags)
-{
-}
-static inline pid_t task_numa_group_id(struct task_struct *p)
-{
-	return 0;
-}
-static inline void set_numabalancing_state(bool enabled)
-{
-}
-static inline void task_numa_free(struct task_struct *p)
-{
-}
-static inline bool should_numa_migrate_memory(struct task_struct *p,
-				struct page *page, int src_nid, int dst_cpu)
-{
-	return true;
-}
-#endif
-
 static inline struct pid *task_pid(struct task_struct *task)
 {
 	return task->pids[PIDTYPE_PID].pid;
diff --git a/include/linux/sched/numa_balancing.h b/include/linux/sched/numa_balancing.h
index 999182279f785e..35d5fc77b4be9b 100644
--- a/include/linux/sched/numa_balancing.h
+++ b/include/linux/sched/numa_balancing.h
@@ -1,6 +1,46 @@
 #ifndef _LINUX_SCHED_NUMA_BALANCING_H
 #define _LINUX_SCHED_NUMA_BALANCING_H
 
+/*
+ * This is the interface between the scheduler and the MM that
+ * implements memory access pattern based NUMA-balancing:
+ */
+
 #include <linux/sched.h>
 
+#define TNF_MIGRATED	0x01
+#define TNF_NO_GROUP	0x02
+#define TNF_SHARED	0x04
+#define TNF_FAULT_LOCAL	0x08
+#define TNF_MIGRATE_FAIL 0x10
+
+#ifdef CONFIG_NUMA_BALANCING
+extern void task_numa_fault(int last_node, int node, int pages, int flags);
+extern pid_t task_numa_group_id(struct task_struct *p);
+extern void set_numabalancing_state(bool enabled);
+extern void task_numa_free(struct task_struct *p);
+extern bool should_numa_migrate_memory(struct task_struct *p, struct page *page,
+					int src_nid, int dst_cpu);
+#else
+static inline void task_numa_fault(int last_node, int node, int pages,
+				   int flags)
+{
+}
+static inline pid_t task_numa_group_id(struct task_struct *p)
+{
+	return 0;
+}
+static inline void set_numabalancing_state(bool enabled)
+{
+}
+static inline void task_numa_free(struct task_struct *p)
+{
+}
+static inline bool should_numa_migrate_memory(struct task_struct *p,
+				struct page *page, int src_nid, int dst_cpu)
+{
+	return true;
+}
+#endif
+
 #endif /* _LINUX_SCHED_NUMA_BALANCING_H */

From c41cfc6c5ba46050b416c0b0b2621cbe68c4669c Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 12:45:58 +0100
Subject: [PATCH 0628/1911] sched/headers: Move the JOBCTL_ defines and methods
 from <linux/sched.h> to <linux/sched/jobctl.h>

Only a small fraction of sched.h users actually utilizes these defines,
and they are not scheduler functionality in any case, so move them
into their separate header.

(Also make <linux/sched/jobctl.h> a self-contained header.)

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h        | 30 ------------------------------
 include/linux/sched/jobctl.h | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 32 insertions(+), 30 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index fcaea1e7b08a1d..46ba8f1b5f1f9d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1549,36 +1549,6 @@ TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab)
 TASK_PFA_TEST(LMK_WAITING, lmk_waiting)
 TASK_PFA_SET(LMK_WAITING, lmk_waiting)
 
-/*
- * task->jobctl flags
- */
-#define JOBCTL_STOP_SIGMASK	0xffff	/* signr of the last group stop */
-
-#define JOBCTL_STOP_DEQUEUED_BIT 16	/* stop signal dequeued */
-#define JOBCTL_STOP_PENDING_BIT	17	/* task should stop for group stop */
-#define JOBCTL_STOP_CONSUME_BIT	18	/* consume group stop count */
-#define JOBCTL_TRAP_STOP_BIT	19	/* trap for STOP */
-#define JOBCTL_TRAP_NOTIFY_BIT	20	/* trap for NOTIFY */
-#define JOBCTL_TRAPPING_BIT	21	/* switching to TRACED */
-#define JOBCTL_LISTENING_BIT	22	/* ptracer is listening for events */
-
-#define JOBCTL_STOP_DEQUEUED	(1UL << JOBCTL_STOP_DEQUEUED_BIT)
-#define JOBCTL_STOP_PENDING	(1UL << JOBCTL_STOP_PENDING_BIT)
-#define JOBCTL_STOP_CONSUME	(1UL << JOBCTL_STOP_CONSUME_BIT)
-#define JOBCTL_TRAP_STOP	(1UL << JOBCTL_TRAP_STOP_BIT)
-#define JOBCTL_TRAP_NOTIFY	(1UL << JOBCTL_TRAP_NOTIFY_BIT)
-#define JOBCTL_TRAPPING		(1UL << JOBCTL_TRAPPING_BIT)
-#define JOBCTL_LISTENING	(1UL << JOBCTL_LISTENING_BIT)
-
-#define JOBCTL_TRAP_MASK	(JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY)
-#define JOBCTL_PENDING_MASK	(JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK)
-
-extern bool task_set_jobctl_pending(struct task_struct *task,
-				    unsigned long mask);
-extern void task_clear_jobctl_trapping(struct task_struct *task);
-extern void task_clear_jobctl_pending(struct task_struct *task,
-				      unsigned long mask);
-
 static inline void rcu_copy_process(struct task_struct *p)
 {
 #ifdef CONFIG_PREEMPT_RCU
diff --git a/include/linux/sched/jobctl.h b/include/linux/sched/jobctl.h
index 228e4644937bf7..016afa0fb3bbce 100644
--- a/include/linux/sched/jobctl.h
+++ b/include/linux/sched/jobctl.h
@@ -1,4 +1,36 @@
 #ifndef _LINUX_SCHED_JOBCTL_H
 #define _LINUX_SCHED_JOBCTL_H
 
+#include <linux/types.h>
+
+struct task_struct;
+
+/*
+ * task->jobctl flags
+ */
+#define JOBCTL_STOP_SIGMASK	0xffff	/* signr of the last group stop */
+
+#define JOBCTL_STOP_DEQUEUED_BIT 16	/* stop signal dequeued */
+#define JOBCTL_STOP_PENDING_BIT	17	/* task should stop for group stop */
+#define JOBCTL_STOP_CONSUME_BIT	18	/* consume group stop count */
+#define JOBCTL_TRAP_STOP_BIT	19	/* trap for STOP */
+#define JOBCTL_TRAP_NOTIFY_BIT	20	/* trap for NOTIFY */
+#define JOBCTL_TRAPPING_BIT	21	/* switching to TRACED */
+#define JOBCTL_LISTENING_BIT	22	/* ptracer is listening for events */
+
+#define JOBCTL_STOP_DEQUEUED	(1UL << JOBCTL_STOP_DEQUEUED_BIT)
+#define JOBCTL_STOP_PENDING	(1UL << JOBCTL_STOP_PENDING_BIT)
+#define JOBCTL_STOP_CONSUME	(1UL << JOBCTL_STOP_CONSUME_BIT)
+#define JOBCTL_TRAP_STOP	(1UL << JOBCTL_TRAP_STOP_BIT)
+#define JOBCTL_TRAP_NOTIFY	(1UL << JOBCTL_TRAP_NOTIFY_BIT)
+#define JOBCTL_TRAPPING		(1UL << JOBCTL_TRAPPING_BIT)
+#define JOBCTL_LISTENING	(1UL << JOBCTL_LISTENING_BIT)
+
+#define JOBCTL_TRAP_MASK	(JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY)
+#define JOBCTL_PENDING_MASK	(JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK)
+
+extern bool task_set_jobctl_pending(struct task_struct *task, unsigned long mask);
+extern void task_clear_jobctl_trapping(struct task_struct *task);
+extern void task_clear_jobctl_pending(struct task_struct *task, unsigned long mask);
+
 #endif /* _LINUX_SCHED_JOBCTL_H */

From 30a1baab81189f01c427c4939610b2dde2dbec37 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 10:06:45 +0100
Subject: [PATCH 0629/1911] sched/headers: Remove various unrelated headers
 from <linux/sched.h>

Remove the following header inclusions from <linux/sched.h>:

	#include <asm/param.h>
	#include <linux/threads.h>
	#include <linux/kernel.h>
	#include <linux/types.h>
	#include <linux/timex.h>
	#include <linux/jiffies.h>
	#include <linux/rbtree.h>
	#include <linux/thread_info.h>
	#include <linux/cpumask.h>
	#include <linux/errno.h>
	#include <linux/nodemask.h>
	#include <linux/preempt.h>
	#include <asm/page.h>
	#include <linux/smp.h>
	#include <linux/compiler.h>
	#include <linux/completion.h>
	#include <linux/percpu.h>
	#include <linux/topology.h>
	#include <linux/rcupdate.h>
	#include <linux/time.h>
	#include <linux/timer.h>
	#include <linux/llist.h>
	#include <linux/uidgid.h>
	#include <asm/processor.h>

because they are either not required, or are already included
naturally as part of the remaining headers.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 28 ----------------------------
 1 file changed, 28 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 46ba8f1b5f1f9d..7752025679c01c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -5,60 +5,32 @@
 
 #include <linux/sched/prio.h>
 
-#include <asm/param.h>	/* for HZ */
-
 #include <linux/capability.h>
-#include <linux/threads.h>
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/timex.h>
-#include <linux/jiffies.h>
 #include <linux/mutex.h>
 #include <linux/plist.h>
-#include <linux/rbtree.h>
-#include <linux/thread_info.h>
-#include <linux/cpumask.h>
-#include <linux/errno.h>
-#include <linux/nodemask.h>
 #include <linux/mm_types.h>
-#include <linux/preempt.h>
-
-#include <asm/page.h>
 #include <asm/ptrace.h>
 
-#include <linux/smp.h>
 #include <linux/sem.h>
 #include <linux/shm.h>
 #include <linux/signal.h>
-#include <linux/compiler.h>
-#include <linux/completion.h>
 #include <linux/signal_types.h>
 #include <linux/pid.h>
-#include <linux/percpu.h>
-#include <linux/topology.h>
 #include <linux/seccomp.h>
-#include <linux/rcupdate.h>
 #include <linux/rculist.h>
 #include <linux/rtmutex.h>
 
-#include <linux/time.h>
-#include <linux/param.h>
 #include <linux/resource.h>
-#include <linux/timer.h>
 #include <linux/hrtimer.h>
 #include <linux/kcov.h>
 #include <linux/task_io_accounting.h>
 #include <linux/latencytop.h>
 #include <linux/cred.h>
-#include <linux/llist.h>
-#include <linux/uidgid.h>
 #include <linux/gfp.h>
 #include <linux/topology.h>
 #include <linux/magic.h>
 #include <linux/cgroup-defs.h>
 
-#include <asm/processor.h>
-
 struct sched_attr;
 struct sched_param;
 

From 9a07000400c853c57477c2a5138ae9f556c40897 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 19:10:05 +0100
Subject: [PATCH 0630/1911] sched/headers: Move CONFIG_TASK_XACCT bits from
 <linux/sched.h> to <linux/sched/xacct.h>

The CONFIG_TASK_XACCT=y accounting inline functions are only used by
fs/read_write.c, so move them into their separate header.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h       | 38 ---------------------------------
 include/linux/sched/xacct.h | 42 +++++++++++++++++++++++++++++++++++++
 2 files changed, 42 insertions(+), 38 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7752025679c01c..1201312e111e09 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2104,44 +2104,6 @@ extern struct task_group root_task_group;
 extern int task_can_switch_user(struct user_struct *up,
 					struct task_struct *tsk);
 
-#ifdef CONFIG_TASK_XACCT
-static inline void add_rchar(struct task_struct *tsk, ssize_t amt)
-{
-	tsk->ioac.rchar += amt;
-}
-
-static inline void add_wchar(struct task_struct *tsk, ssize_t amt)
-{
-	tsk->ioac.wchar += amt;
-}
-
-static inline void inc_syscr(struct task_struct *tsk)
-{
-	tsk->ioac.syscr++;
-}
-
-static inline void inc_syscw(struct task_struct *tsk)
-{
-	tsk->ioac.syscw++;
-}
-#else
-static inline void add_rchar(struct task_struct *tsk, ssize_t amt)
-{
-}
-
-static inline void add_wchar(struct task_struct *tsk, ssize_t amt)
-{
-}
-
-static inline void inc_syscr(struct task_struct *tsk)
-{
-}
-
-static inline void inc_syscw(struct task_struct *tsk)
-{
-}
-#endif
-
 #ifndef TASK_SIZE_OF
 #define TASK_SIZE_OF(tsk)	TASK_SIZE
 #endif
diff --git a/include/linux/sched/xacct.h b/include/linux/sched/xacct.h
index 890f7ce5cd2768..a28156a0d34a12 100644
--- a/include/linux/sched/xacct.h
+++ b/include/linux/sched/xacct.h
@@ -1,6 +1,48 @@
 #ifndef _LINUX_SCHED_XACCT_H
 #define _LINUX_SCHED_XACCT_H
 
+/*
+ * Extended task accounting methods:
+ */
+
 #include <linux/sched.h>
 
+#ifdef CONFIG_TASK_XACCT
+static inline void add_rchar(struct task_struct *tsk, ssize_t amt)
+{
+	tsk->ioac.rchar += amt;
+}
+
+static inline void add_wchar(struct task_struct *tsk, ssize_t amt)
+{
+	tsk->ioac.wchar += amt;
+}
+
+static inline void inc_syscr(struct task_struct *tsk)
+{
+	tsk->ioac.syscr++;
+}
+
+static inline void inc_syscw(struct task_struct *tsk)
+{
+	tsk->ioac.syscw++;
+}
+#else
+static inline void add_rchar(struct task_struct *tsk, ssize_t amt)
+{
+}
+
+static inline void add_wchar(struct task_struct *tsk, ssize_t amt)
+{
+}
+
+static inline void inc_syscr(struct task_struct *tsk)
+{
+}
+
+static inline void inc_syscw(struct task_struct *tsk)
+{
+}
+#endif
+
 #endif /* _LINUX_SCHED_XACCT_H */

From 2a1f062a4acf0be50516ceece92a7182a173d55a Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 19:15:33 +0100
Subject: [PATCH 0631/1911] sched/headers: Move signal wakeup & sigpending
 methods from <linux/sched.h> into <linux/sched/signal.h>

This reduces the size of <linux/sched.h>.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h        | 51 ----------------------------------
 include/linux/sched/signal.h | 53 +++++++++++++++++++++++++++++++++++-
 2 files changed, 52 insertions(+), 52 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1201312e111e09..1e0400069e95bd 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1931,37 +1931,6 @@ static inline int test_tsk_need_resched(struct task_struct *tsk)
 	return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
 }
 
-static inline int restart_syscall(void)
-{
-	set_tsk_thread_flag(current, TIF_SIGPENDING);
-	return -ERESTARTNOINTR;
-}
-
-static inline int signal_pending(struct task_struct *p)
-{
-	return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING));
-}
-
-static inline int __fatal_signal_pending(struct task_struct *p)
-{
-	return unlikely(sigismember(&p->pending.signal, SIGKILL));
-}
-
-static inline int fatal_signal_pending(struct task_struct *p)
-{
-	return signal_pending(p) && __fatal_signal_pending(p);
-}
-
-static inline int signal_pending_state(long state, struct task_struct *p)
-{
-	if (!(state & (TASK_INTERRUPTIBLE | TASK_WAKEKILL)))
-		return 0;
-	if (!signal_pending(p))
-		return 0;
-
-	return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p);
-}
-
 /*
  * cond_resched() and cond_resched_lock(): latency reduction via
  * explicit rescheduling in places that are safe. The return
@@ -2028,26 +1997,6 @@ static __always_inline bool need_resched(void)
 void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times);
 void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times);
 
-/*
- * Reevaluate whether the task has signals pending delivery.
- * Wake the task if so.
- * This is required every time the blocked sigset_t changes.
- * callers must hold sighand->siglock.
- */
-extern void recalc_sigpending_and_wake(struct task_struct *t);
-extern void recalc_sigpending(void);
-
-extern void signal_wake_up_state(struct task_struct *t, unsigned int state);
-
-static inline void signal_wake_up(struct task_struct *t, bool resume)
-{
-	signal_wake_up_state(t, resume ? TASK_WAKEKILL : 0);
-}
-static inline void ptrace_signal_wake_up(struct task_struct *t, bool resume)
-{
-	signal_wake_up_state(t, resume ? __TASK_TRACED : 0);
-}
-
 /*
  * Wrappers for p->thread_info->cpu access. No-op on UP.
  */
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 30e7ceed3ef62d..b3545fb4333afa 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -3,10 +3,10 @@
 
 #include <linux/rculist.h>
 #include <linux/signal.h>
-#include <linux/cred.h>
 #include <linux/sched.h>
 #include <linux/sched/jobctl.h>
 #include <linux/sched/task.h>
+#include <linux/cred.h>
 
 /*
  * Types defining task->signal and task->sighand and APIs using them:
@@ -271,6 +271,57 @@ extern void sigqueue_free(struct sigqueue *);
 extern int send_sigqueue(struct sigqueue *,  struct task_struct *, int group);
 extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *);
 
+static inline int restart_syscall(void)
+{
+	set_tsk_thread_flag(current, TIF_SIGPENDING);
+	return -ERESTARTNOINTR;
+}
+
+static inline int signal_pending(struct task_struct *p)
+{
+	return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING));
+}
+
+static inline int __fatal_signal_pending(struct task_struct *p)
+{
+	return unlikely(sigismember(&p->pending.signal, SIGKILL));
+}
+
+static inline int fatal_signal_pending(struct task_struct *p)
+{
+	return signal_pending(p) && __fatal_signal_pending(p);
+}
+
+static inline int signal_pending_state(long state, struct task_struct *p)
+{
+	if (!(state & (TASK_INTERRUPTIBLE | TASK_WAKEKILL)))
+		return 0;
+	if (!signal_pending(p))
+		return 0;
+
+	return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p);
+}
+
+/*
+ * Reevaluate whether the task has signals pending delivery.
+ * Wake the task if so.
+ * This is required every time the blocked sigset_t changes.
+ * callers must hold sighand->siglock.
+ */
+extern void recalc_sigpending_and_wake(struct task_struct *t);
+extern void recalc_sigpending(void);
+
+extern void signal_wake_up_state(struct task_struct *t, unsigned int state);
+
+static inline void signal_wake_up(struct task_struct *t, bool resume)
+{
+	signal_wake_up_state(t, resume ? TASK_WAKEKILL : 0);
+}
+static inline void ptrace_signal_wake_up(struct task_struct *t, bool resume)
+{
+	signal_wake_up_state(t, resume ? __TASK_TRACED : 0);
+}
+
 #ifdef TIF_RESTORE_SIGMASK
 /*
  * Legacy restore_sigmask accessors.  These are inefficient on

From 74444edaa0b2ef946e846d5ddf1ba90efcd7c200 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 20:43:54 +0100
Subject: [PATCH 0632/1911] sched/headers: Move the memalloc_noio_*() APIs to
 <linux/sched/mm.h>

In preparation to remove the <linux/gfp.h> include from <linux/sched.h>.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h    | 22 ----------------------
 include/linux/sched/mm.h | 22 ++++++++++++++++++++++
 2 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1e0400069e95bd..dbc3ff04750a15 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1468,28 +1468,6 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *s
 #define tsk_used_math(p) ((p)->flags & PF_USED_MATH)
 #define used_math() tsk_used_math(current)
 
-/* __GFP_IO isn't allowed if PF_MEMALLOC_NOIO is set in current->flags
- * __GFP_FS is also cleared as it implies __GFP_IO.
- */
-static inline gfp_t memalloc_noio_flags(gfp_t flags)
-{
-	if (unlikely(current->flags & PF_MEMALLOC_NOIO))
-		flags &= ~(__GFP_IO | __GFP_FS);
-	return flags;
-}
-
-static inline unsigned int memalloc_noio_save(void)
-{
-	unsigned int flags = current->flags & PF_MEMALLOC_NOIO;
-	current->flags |= PF_MEMALLOC_NOIO;
-	return flags;
-}
-
-static inline void memalloc_noio_restore(unsigned int flags)
-{
-	current->flags = (current->flags & ~PF_MEMALLOC_NOIO) | flags;
-}
-
 /* Per-process atomic flags. */
 #define PFA_NO_NEW_PRIVS 0	/* May not gain new privileges. */
 #define PFA_SPREAD_PAGE  1      /* Spread page cache over cpuset */
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 64d83fa8d93aa9..62dca4e0b4e0e6 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -147,4 +147,26 @@ static inline bool in_vfork(struct task_struct *tsk)
 	return ret;
 }
 
+/* __GFP_IO isn't allowed if PF_MEMALLOC_NOIO is set in current->flags
+ * __GFP_FS is also cleared as it implies __GFP_IO.
+ */
+static inline gfp_t memalloc_noio_flags(gfp_t flags)
+{
+	if (unlikely(current->flags & PF_MEMALLOC_NOIO))
+		flags &= ~(__GFP_IO | __GFP_FS);
+	return flags;
+}
+
+static inline unsigned int memalloc_noio_save(void)
+{
+	unsigned int flags = current->flags & PF_MEMALLOC_NOIO;
+	current->flags |= PF_MEMALLOC_NOIO;
+	return flags;
+}
+
+static inline void memalloc_noio_restore(unsigned int flags)
+{
+	current->flags = (current->flags & ~PF_MEMALLOC_NOIO) | flags;
+}
+
 #endif /* _LINUX_SCHED_MM_H */

From 3605df49556ebb7641d1f8ec2e7eeecf4dd734bf Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 11:03:12 +0100
Subject: [PATCH 0633/1911] sched/headers: Move task statistics APIs from
 <linux/sched.h> to <linux/sched/stat.h>

There are a number of task statistics related variables and methods exported
via sched.h - collect them into <linux/sched/stat.h> and include it from
their usage sites.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/s390/appldata/appldata_base.c |  1 +
 include/linux/sched.h              | 10 ----------
 include/linux/sched/stat.h         | 18 ++++++++++++++++++
 3 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c
index 5a8dfa22da7c31..ef3fb1b9201f03 100644
--- a/arch/s390/appldata/appldata_base.c
+++ b/arch/s390/appldata/appldata_base.c
@@ -12,6 +12,7 @@
 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
 
 #include <linux/module.h>
+#include <linux/sched/stat.h>
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/errno.h>
diff --git a/include/linux/sched.h b/include/linux/sched.h
index dbc3ff04750a15..415baf25351758 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -46,16 +46,6 @@ struct nameidata;
 struct signal_struct;
 struct sighand_struct;
 
-extern unsigned long total_forks;
-extern int nr_threads;
-DECLARE_PER_CPU(unsigned long, process_counts);
-extern int nr_processes(void);
-extern unsigned long nr_running(void);
-extern bool single_task_running(void);
-extern unsigned long nr_iowait(void);
-extern unsigned long nr_iowait_cpu(int cpu);
-extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load);
-
 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
 extern void cpu_load_update_nohz_start(void);
 extern void cpu_load_update_nohz_stop(void);
diff --git a/include/linux/sched/stat.h b/include/linux/sched/stat.h
index 30fe012aecb018..0d52ceb6d3ae0c 100644
--- a/include/linux/sched/stat.h
+++ b/include/linux/sched/stat.h
@@ -3,4 +3,22 @@
 
 #include <linux/sched.h>
 
+/*
+ * Various counters maintained by the scheduler and fork(),
+ * exposed via /proc, sys.c or used by drivers via these APIs.
+ *
+ * ( Note that all these values are aquired without locking,
+ *   so they can only be relied on in narrow circumstances. )
+ */
+
+extern unsigned long total_forks;
+extern int nr_threads;
+DECLARE_PER_CPU(unsigned long, process_counts);
+extern int nr_processes(void);
+extern unsigned long nr_running(void);
+extern bool single_task_running(void);
+extern unsigned long nr_iowait(void);
+extern unsigned long nr_iowait_cpu(int cpu);
+extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load);
+
 #endif /* _LINUX_SCHED_STAT_H */

From 752b3ca734cbc17c0456b846ae886c0ed39c5703 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 11:12:59 +0100
Subject: [PATCH 0634/1911] sched/headers: Move the NOHZ APIs from
 <linux/sched.h> to <linux/sched/nohz.h>

There's a number of NOHZ/dyntics related functionality in <linux/sched.h>,
but only a handful of timer files are making use of them.

Move them into their own header. This better documents these APIs
and unclutters <linux/sched.h>.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h      | 35 ----------------------------------
 include/linux/sched/nohz.h | 39 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 39 insertions(+), 35 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 415baf25351758..5f9bfc603d1981 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -46,14 +46,6 @@ struct nameidata;
 struct signal_struct;
 struct sighand_struct;
 
-#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
-extern void cpu_load_update_nohz_start(void);
-extern void cpu_load_update_nohz_stop(void);
-#else
-static inline void cpu_load_update_nohz_start(void) { }
-static inline void cpu_load_update_nohz_stop(void) { }
-#endif
-
 extern void dump_cpu_task(int cpu);
 
 struct seq_file;
@@ -204,15 +196,6 @@ extern cpumask_var_t cpu_isolated_map;
 
 extern int runqueue_is_locked(int cpu);
 
-#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
-extern void nohz_balance_enter_idle(int cpu);
-extern void set_cpu_sd_state_idle(void);
-extern int get_nohz_timer_target(void);
-#else
-static inline void nohz_balance_enter_idle(int cpu) { }
-static inline void set_cpu_sd_state_idle(void) { }
-#endif
-
 /*
  * Only dump TASK_* tasks. (0 for all tasks)
  */
@@ -1535,14 +1518,6 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p,
 }
 #endif
 
-#ifdef CONFIG_NO_HZ_COMMON
-void calc_load_enter_idle(void);
-void calc_load_exit_idle(void);
-#else
-static inline void calc_load_enter_idle(void) { }
-static inline void calc_load_exit_idle(void) { }
-#endif /* CONFIG_NO_HZ_COMMON */
-
 #ifndef cpu_relax_yield
 #define cpu_relax_yield() cpu_relax()
 #endif
@@ -1563,16 +1538,6 @@ extern void idle_task_exit(void);
 static inline void idle_task_exit(void) {}
 #endif
 
-#if defined(CONFIG_NO_HZ_COMMON) && defined(CONFIG_SMP)
-extern void wake_up_nohz_cpu(int cpu);
-#else
-static inline void wake_up_nohz_cpu(int cpu) { }
-#endif
-
-#ifdef CONFIG_NO_HZ_FULL
-extern u64 scheduler_tick_max_deferment(void);
-#endif
-
 extern int yield_to(struct task_struct *p, bool preempt);
 extern void set_user_nice(struct task_struct *p, long nice);
 extern int task_prio(const struct task_struct *p);
diff --git a/include/linux/sched/nohz.h b/include/linux/sched/nohz.h
index fe6caf0dab1054..9471f0736a3a0a 100644
--- a/include/linux/sched/nohz.h
+++ b/include/linux/sched/nohz.h
@@ -3,4 +3,43 @@
 
 #include <linux/sched.h>
 
+/*
+ * This is the interface between the scheduler and nohz/dyntics:
+ */
+
+#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
+extern void cpu_load_update_nohz_start(void);
+extern void cpu_load_update_nohz_stop(void);
+#else
+static inline void cpu_load_update_nohz_start(void) { }
+static inline void cpu_load_update_nohz_stop(void) { }
+#endif
+
+#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
+extern void nohz_balance_enter_idle(int cpu);
+extern void set_cpu_sd_state_idle(void);
+extern int get_nohz_timer_target(void);
+#else
+static inline void nohz_balance_enter_idle(int cpu) { }
+static inline void set_cpu_sd_state_idle(void) { }
+#endif
+
+#ifdef CONFIG_NO_HZ_COMMON
+void calc_load_enter_idle(void);
+void calc_load_exit_idle(void);
+#else
+static inline void calc_load_enter_idle(void) { }
+static inline void calc_load_exit_idle(void) { }
+#endif /* CONFIG_NO_HZ_COMMON */
+
+#if defined(CONFIG_NO_HZ_COMMON) && defined(CONFIG_SMP)
+extern void wake_up_nohz_cpu(int cpu);
+#else
+static inline void wake_up_nohz_cpu(int cpu) { }
+#endif
+
+#ifdef CONFIG_NO_HZ_FULL
+extern u64 scheduler_tick_max_deferment(void);
+#endif
+
 #endif /* _LINUX_SCHED_NOHZ_H */

From d3d1e320d43a7bad9603acf0214406a1e8795c63 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 11:16:09 +0100
Subject: [PATCH 0635/1911] sched/headers: Move debugging functions from
 <linux/sched.h> to <linux/sched/debug.h>

Collect the various scheduler and task state debugging APIs scattered
around <linux/sched.h> into the new <linux/sched/debug.h> header.

In particular the show_regs() and show_stack() prototype affects many files,
update them.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h       | 36 -----------------------------
 include/linux/sched/debug.h | 46 +++++++++++++++++++++++++++++++++++++
 2 files changed, 46 insertions(+), 36 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5f9bfc603d1981..76a2e522be29aa 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -46,15 +46,9 @@ struct nameidata;
 struct signal_struct;
 struct sighand_struct;
 
-extern void dump_cpu_task(int cpu);
-
 struct seq_file;
 struct cfs_rq;
 struct task_group;
-#ifdef CONFIG_SCHED_DEBUG
-extern void proc_sched_show_task(struct task_struct *p, struct seq_file *m);
-extern void proc_sched_set_task(struct task_struct *p);
-#endif
 
 /*
  * Task state bitmask. NOTE! These bits are also
@@ -196,25 +190,6 @@ extern cpumask_var_t cpu_isolated_map;
 
 extern int runqueue_is_locked(int cpu);
 
-/*
- * Only dump TASK_* tasks. (0 for all tasks)
- */
-extern void show_state_filter(unsigned long state_filter);
-
-static inline void show_state(void)
-{
-	show_state_filter(0);
-}
-
-extern void show_regs(struct pt_regs *);
-
-/*
- * TASK is a pointer to the task whose backtrace we want to see (or NULL for current
- * task), SP is the stack pointer of the first frame that should be shown in the back
- * trace (or NULL if the entire call-chain of the task should be shown).
- */
-extern void show_stack(struct task_struct *task, unsigned long *sp);
-
 extern void cpu_init (void);
 extern void trap_init(void);
 extern void update_process_times(int user);
@@ -229,17 +204,6 @@ extern int sched_cpu_dying(unsigned int cpu);
 # define sched_cpu_dying	NULL
 #endif
 
-extern void sched_show_task(struct task_struct *p);
-
-/* Attach to any functions which should be ignored in wchan output. */
-#define __sched		__attribute__((__section__(".sched.text")))
-
-/* Linker adds these: start and end of __sched functions */
-extern char __sched_text_start[], __sched_text_end[];
-
-/* Is this address in the __sched functions? */
-extern int in_sched_functions(unsigned long addr);
-
 #define	MAX_SCHEDULE_TIMEOUT	LONG_MAX
 extern signed long schedule_timeout(signed long timeout);
 extern signed long schedule_timeout_interruptible(signed long timeout);
diff --git a/include/linux/sched/debug.h b/include/linux/sched/debug.h
index 55bc0cd35fd5c2..853bbef0b47b7a 100644
--- a/include/linux/sched/debug.h
+++ b/include/linux/sched/debug.h
@@ -3,4 +3,50 @@
 
 #include <linux/sched.h>
 
+/*
+ * Various scheduler/task debugging interfaces:
+ */
+
+struct task_struct;
+
+extern void dump_cpu_task(int cpu);
+
+/*
+ * Only dump TASK_* tasks. (0 for all tasks)
+ */
+extern void show_state_filter(unsigned long state_filter);
+
+static inline void show_state(void)
+{
+	show_state_filter(0);
+}
+
+struct pt_regs;
+
+extern void show_regs(struct pt_regs *);
+
+/*
+ * TASK is a pointer to the task whose backtrace we want to see (or NULL for current
+ * task), SP is the stack pointer of the first frame that should be shown in the back
+ * trace (or NULL if the entire call-chain of the task should be shown).
+ */
+extern void show_stack(struct task_struct *task, unsigned long *sp);
+
+extern void sched_show_task(struct task_struct *p);
+
+#ifdef CONFIG_SCHED_DEBUG
+struct seq_file;
+extern void proc_sched_show_task(struct task_struct *p, struct seq_file *m);
+extern void proc_sched_set_task(struct task_struct *p);
+#endif
+
+/* Attach to any functions which should be ignored in wchan output. */
+#define __sched		__attribute__((__section__(".sched.text")))
+
+/* Linker adds these: start and end of __sched functions */
+extern char __sched_text_start[], __sched_text_end[];
+
+/* Is this address in the __sched functions? */
+extern int in_sched_functions(unsigned long addr);
+
 #endif /* _LINUX_SCHED_DEBUG_H */

From 63cc9d6fca8c220c2406f1376100a9acb55197af Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 12:04:39 +0100
Subject: [PATCH 0636/1911] sched/headers, time/timekeeping: Move the
 xtime_update() prototype from <linux/sched.h> to <linux/time.h>

This was in <linux/sched.h> only for hysterical raisins.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 2 --
 include/linux/time.h  | 2 ++
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 76a2e522be29aa..5a4fbc76feb4fd 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1579,8 +1579,6 @@ extern struct task_struct *find_task_by_pid_ns(pid_t nr,
 
 #include <asm/current.h>
 
-extern void xtime_update(unsigned long ticks);
-
 extern int wake_up_state(struct task_struct *tsk, unsigned int state);
 extern int wake_up_process(struct task_struct *tsk);
 extern void wake_up_new_task(struct task_struct *tsk);
diff --git a/include/linux/time.h b/include/linux/time.h
index 23f0f5ce30909d..4f4ab65b6e61e6 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -167,6 +167,8 @@ static inline bool timespec_inject_offset_valid(const struct timespec *ts)
 extern u32 (*arch_gettimeoffset)(void);
 #endif
 
+extern void xtime_update(unsigned long ticks);
+
 struct itimerval;
 extern int do_setitimer(int which, struct itimerval *value,
 			struct itimerval *ovalue);

From 70b8157e61d0143fb44ae9482557d7aca365da3d Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 12:11:00 +0100
Subject: [PATCH 0637/1911] sched/headers: Move <asm/current.h> include from
 the middle of <linux/sched.h> to the header portion

Linux-0.01 already defined 'current' in the middle of sched.h, so this
is an ancient historical precedent - but still in a modern kernel it
looks a bit weird that we have:

	#include <asm/current.h>

in the middle of the header.

Move it further up. If this was done for some obscure dependency
reasons then we'll trigger and document it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5a4fbc76feb4fd..ac0fed4c31307f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -31,6 +31,8 @@
 #include <linux/magic.h>
 #include <linux/cgroup-defs.h>
 
+#include <asm/current.h>
+
 struct sched_attr;
 struct sched_param;
 
@@ -1577,8 +1579,6 @@ extern struct task_struct *find_task_by_vpid(pid_t nr);
 extern struct task_struct *find_task_by_pid_ns(pid_t nr,
 		struct pid_namespace *ns);
 
-#include <asm/current.h>
-
 extern int wake_up_state(struct task_struct *tsk, unsigned int state);
 extern int wake_up_process(struct task_struct *tsk);
 extern void wake_up_new_task(struct task_struct *tsk);

From 0ca0156973a47e689f3bc817e26e15fff3f84eec Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 14:52:01 +0100
Subject: [PATCH 0638/1911] sched/headers: Split hotplug CPU interfaces out of
 <linux/sched.h> into <linux/sched/hotplug.h>

Split the CPU hotplug scheduler APIs out of the common header.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h         | 15 ---------------
 include/linux/sched/hotplug.h | 20 ++++++++++++++++++++
 2 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index ac0fed4c31307f..25cc0adb3e0830 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -196,15 +196,6 @@ extern void cpu_init (void);
 extern void trap_init(void);
 extern void update_process_times(int user);
 extern void scheduler_tick(void);
-extern int sched_cpu_starting(unsigned int cpu);
-extern int sched_cpu_activate(unsigned int cpu);
-extern int sched_cpu_deactivate(unsigned int cpu);
-
-#ifdef CONFIG_HOTPLUG_CPU
-extern int sched_cpu_dying(unsigned int cpu);
-#else
-# define sched_cpu_dying	NULL
-#endif
 
 #define	MAX_SCHEDULE_TIMEOUT	LONG_MAX
 extern signed long schedule_timeout(signed long timeout);
@@ -1498,12 +1489,6 @@ extern void sched_exec(void);
 #define sched_exec()   {}
 #endif
 
-#ifdef CONFIG_HOTPLUG_CPU
-extern void idle_task_exit(void);
-#else
-static inline void idle_task_exit(void) {}
-#endif
-
 extern int yield_to(struct task_struct *p, bool preempt);
 extern void set_user_nice(struct task_struct *p, long nice);
 extern int task_prio(const struct task_struct *p);
diff --git a/include/linux/sched/hotplug.h b/include/linux/sched/hotplug.h
index 34670ed2489437..c608d3c1ddb872 100644
--- a/include/linux/sched/hotplug.h
+++ b/include/linux/sched/hotplug.h
@@ -3,4 +3,24 @@
 
 #include <linux/sched.h>
 
+/*
+ * Scheduler interfaces for hotplug CPU support:
+ */
+
+extern int sched_cpu_starting(unsigned int cpu);
+extern int sched_cpu_activate(unsigned int cpu);
+extern int sched_cpu_deactivate(unsigned int cpu);
+
+#ifdef CONFIG_HOTPLUG_CPU
+extern int sched_cpu_dying(unsigned int cpu);
+#else
+# define sched_cpu_dying	NULL
+#endif
+
+#ifdef CONFIG_HOTPLUG_CPU
+extern void idle_task_exit(void);
+#else
+static inline void idle_task_exit(void) {}
+#endif
+
 #endif /* _LINUX_SCHED_HOTPLUG_H */

From 901b14bd946a8b7ea211105b6207e082ddd36846 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 15:24:12 +0100
Subject: [PATCH 0639/1911] sched/headers: Move task lifetime APIs from
 <linux/sched.h> to <linux/sched/task.h>

There's a fair amount of task lifetime management (a.k.a fork()/exit())
related APIs in <linux/sched.h>, but only a small fraction of
the users of the generic sched.h header make use of them.

Move these functions to the <linux/sched/task.h> header.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h      | 91 -----------------------------------
 include/linux/sched/task.h | 97 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 97 insertions(+), 91 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 25cc0adb3e0830..b1677c8db03f5d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -165,28 +165,10 @@ struct task_group;
 /* Task command name length */
 #define TASK_COMM_LEN 16
 
-#include <linux/spinlock.h>
-
-/*
- * This serializes "schedule()" and also protects
- * the run-queue from deletions/modifications (but
- * _adding_ to the beginning of the run-queue has
- * a separate lock).
- */
-extern rwlock_t tasklist_lock;
-extern spinlock_t mmlist_lock;
-
 struct task_struct;
 
-#ifdef CONFIG_PROVE_RCU
-extern int lockdep_tasklist_lock_is_held(void);
-#endif /* #ifdef CONFIG_PROVE_RCU */
-
 extern void sched_init(void);
 extern void sched_init_smp(void);
-extern asmlinkage void schedule_tail(struct task_struct *prev);
-extern void init_idle(struct task_struct *idle, int cpu);
-extern void init_idle_bootup_task(struct task_struct *idle);
 
 extern cpumask_var_t cpu_isolated_map;
 
@@ -211,8 +193,6 @@ extern void io_schedule_finish(int token);
 extern long io_schedule_timeout(long timeout);
 extern void io_schedule(void);
 
-void __noreturn do_task_dead(void);
-
 struct nsproxy;
 
 /**
@@ -1120,24 +1100,6 @@ struct task_struct {
  */
 };
 
-#ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT
-extern int arch_task_struct_size __read_mostly;
-#else
-# define arch_task_struct_size (sizeof(struct task_struct))
-#endif
-
-#ifdef CONFIG_VMAP_STACK
-static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t)
-{
-	return t->stack_vm_area;
-}
-#else
-static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t)
-{
-	return NULL;
-}
-#endif
-
 static inline struct pid *task_pid(struct task_struct *task)
 {
 	return task->pids[PIDTYPE_PID].pid;
@@ -1429,21 +1391,6 @@ TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab)
 TASK_PFA_TEST(LMK_WAITING, lmk_waiting)
 TASK_PFA_SET(LMK_WAITING, lmk_waiting)
 
-static inline void rcu_copy_process(struct task_struct *p)
-{
-#ifdef CONFIG_PREEMPT_RCU
-	p->rcu_read_lock_nesting = 0;
-	p->rcu_read_unlock_special.s = 0;
-	p->rcu_blocked_node = NULL;
-	INIT_LIST_HEAD(&p->rcu_node_entry);
-#endif /* #ifdef CONFIG_PREEMPT_RCU */
-#ifdef CONFIG_TASKS_RCU
-	p->rcu_tasks_holdout = false;
-	INIT_LIST_HEAD(&p->rcu_tasks_holdout_list);
-	p->rcu_tasks_idle_cpu = -1;
-#endif /* #ifdef CONFIG_TASKS_RCU */
-}
-
 static inline void tsk_restore_flags(struct task_struct *task,
 				unsigned long orig_flags, unsigned long flags)
 {
@@ -1572,45 +1519,11 @@ extern void wake_up_new_task(struct task_struct *tsk);
 #else
  static inline void kick_process(struct task_struct *tsk) { }
 #endif
-extern int sched_fork(unsigned long clone_flags, struct task_struct *p);
-extern void sched_dead(struct task_struct *p);
-
-extern void proc_caches_init(void);
-
-extern void release_task(struct task_struct * p);
-
-#ifdef CONFIG_HAVE_COPY_THREAD_TLS
-extern int copy_thread_tls(unsigned long, unsigned long, unsigned long,
-			struct task_struct *, unsigned long);
-#else
-extern int copy_thread(unsigned long, unsigned long, unsigned long,
-			struct task_struct *);
-
-/* Architectures that haven't opted into copy_thread_tls get the tls argument
- * via pt_regs, so ignore the tls argument passed via C. */
-static inline int copy_thread_tls(
-		unsigned long clone_flags, unsigned long sp, unsigned long arg,
-		struct task_struct *p, unsigned long tls)
-{
-	return copy_thread(clone_flags, sp, arg, p);
-}
-#endif
-extern void flush_thread(void);
-
-#ifdef CONFIG_HAVE_EXIT_THREAD
-extern void exit_thread(struct task_struct *tsk);
-#else
-static inline void exit_thread(struct task_struct *tsk)
-{
-}
-#endif
 
 extern void exit_files(struct task_struct *);
 
 extern void exit_itimers(struct signal_struct *);
 
-extern void do_group_exit(int);
-
 extern int do_execve(struct filename *,
 		     const char __user * const __user *,
 		     const char __user * const __user *);
@@ -1618,10 +1531,6 @@ extern int do_execveat(int, struct filename *,
 		       const char __user * const __user *,
 		       const char __user * const __user *,
 		       int);
-extern long _do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *, unsigned long);
-extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *);
-struct task_struct *fork_idle(int);
-extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
 
 extern void __set_task_comm(struct task_struct *tsk, const char *from, bool exec);
 static inline void set_task_comm(struct task_struct *tsk, const char *from)
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index 0023c91ff821ee..e93638a035157d 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -1,6 +1,103 @@
 #ifndef _LINUX_SCHED_TASK_H
 #define _LINUX_SCHED_TASK_H
 
+/*
+ * Interface between the scheduler and various task lifetime (fork()/exit())
+ * functionality:
+ */
+
 #include <linux/sched.h>
 
+/*
+ * This serializes "schedule()" and also protects
+ * the run-queue from deletions/modifications (but
+ * _adding_ to the beginning of the run-queue has
+ * a separate lock).
+ */
+extern rwlock_t tasklist_lock;
+extern spinlock_t mmlist_lock;
+
+#ifdef CONFIG_PROVE_RCU
+extern int lockdep_tasklist_lock_is_held(void);
+#endif /* #ifdef CONFIG_PROVE_RCU */
+
+extern asmlinkage void schedule_tail(struct task_struct *prev);
+extern void init_idle(struct task_struct *idle, int cpu);
+extern void init_idle_bootup_task(struct task_struct *idle);
+
+static inline void rcu_copy_process(struct task_struct *p)
+{
+#ifdef CONFIG_PREEMPT_RCU
+	p->rcu_read_lock_nesting = 0;
+	p->rcu_read_unlock_special.s = 0;
+	p->rcu_blocked_node = NULL;
+	INIT_LIST_HEAD(&p->rcu_node_entry);
+#endif /* #ifdef CONFIG_PREEMPT_RCU */
+#ifdef CONFIG_TASKS_RCU
+	p->rcu_tasks_holdout = false;
+	INIT_LIST_HEAD(&p->rcu_tasks_holdout_list);
+	p->rcu_tasks_idle_cpu = -1;
+#endif /* #ifdef CONFIG_TASKS_RCU */
+}
+
+extern int sched_fork(unsigned long clone_flags, struct task_struct *p);
+extern void sched_dead(struct task_struct *p);
+
+void __noreturn do_task_dead(void);
+
+extern void proc_caches_init(void);
+
+extern void release_task(struct task_struct * p);
+
+#ifdef CONFIG_HAVE_COPY_THREAD_TLS
+extern int copy_thread_tls(unsigned long, unsigned long, unsigned long,
+			struct task_struct *, unsigned long);
+#else
+extern int copy_thread(unsigned long, unsigned long, unsigned long,
+			struct task_struct *);
+
+/* Architectures that haven't opted into copy_thread_tls get the tls argument
+ * via pt_regs, so ignore the tls argument passed via C. */
+static inline int copy_thread_tls(
+		unsigned long clone_flags, unsigned long sp, unsigned long arg,
+		struct task_struct *p, unsigned long tls)
+{
+	return copy_thread(clone_flags, sp, arg, p);
+}
+#endif
+extern void flush_thread(void);
+
+#ifdef CONFIG_HAVE_EXIT_THREAD
+extern void exit_thread(struct task_struct *tsk);
+#else
+static inline void exit_thread(struct task_struct *tsk)
+{
+}
+#endif
+extern void do_group_exit(int);
+
+extern long _do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *, unsigned long);
+extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *);
+struct task_struct *fork_idle(int);
+extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
+
+
+#ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT
+extern int arch_task_struct_size __read_mostly;
+#else
+# define arch_task_struct_size (sizeof(struct task_struct))
+#endif
+
+#ifdef CONFIG_VMAP_STACK
+static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t)
+{
+	return t->stack_vm_area;
+}
+#else
+static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t)
+{
+	return NULL;
+}
+#endif
+
 #endif /* _LINUX_SCHED_TASK_H */

From 6bfbaa51ed47774492d83d182a86068cc35aa4c6 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 21:37:49 +0100
Subject: [PATCH 0640/1911] sched/headers, RCU: Move rcu_copy_process() from
 <linux/sched/task.h> to kernel/fork.c

Move rcu_copy_process() into kernel/fork.c, which is the only
user of this inline function.

This simplifies <linux/sched/task.h> to the level that <linux/sched.h>
does not have to be included in it anymore - which change is done
in a subsequent patch.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/task.h | 15 ---------------
 kernel/fork.c              | 15 +++++++++++++++
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index e93638a035157d..20ed9108f26158 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -25,21 +25,6 @@ extern asmlinkage void schedule_tail(struct task_struct *prev);
 extern void init_idle(struct task_struct *idle, int cpu);
 extern void init_idle_bootup_task(struct task_struct *idle);
 
-static inline void rcu_copy_process(struct task_struct *p)
-{
-#ifdef CONFIG_PREEMPT_RCU
-	p->rcu_read_lock_nesting = 0;
-	p->rcu_read_unlock_special.s = 0;
-	p->rcu_blocked_node = NULL;
-	INIT_LIST_HEAD(&p->rcu_node_entry);
-#endif /* #ifdef CONFIG_PREEMPT_RCU */
-#ifdef CONFIG_TASKS_RCU
-	p->rcu_tasks_holdout = false;
-	INIT_LIST_HEAD(&p->rcu_tasks_holdout_list);
-	p->rcu_tasks_idle_cpu = -1;
-#endif /* #ifdef CONFIG_TASKS_RCU */
-}
-
 extern int sched_fork(unsigned long clone_flags, struct task_struct *p);
 extern void sched_dead(struct task_struct *p);
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 916e78004b8fe4..6c463c80e93de8 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1465,6 +1465,21 @@ init_task_pid(struct task_struct *task, enum pid_type type, struct pid *pid)
 	 task->pids[type].pid = pid;
 }
 
+static inline void rcu_copy_process(struct task_struct *p)
+{
+#ifdef CONFIG_PREEMPT_RCU
+	p->rcu_read_lock_nesting = 0;
+	p->rcu_read_unlock_special.s = 0;
+	p->rcu_blocked_node = NULL;
+	INIT_LIST_HEAD(&p->rcu_node_entry);
+#endif /* #ifdef CONFIG_PREEMPT_RCU */
+#ifdef CONFIG_TASKS_RCU
+	p->rcu_tasks_holdout = false;
+	INIT_LIST_HEAD(&p->rcu_tasks_holdout_list);
+	p->rcu_tasks_idle_cpu = -1;
+#endif /* #ifdef CONFIG_TASKS_RCU */
+}
+
 /*
  * This creates a new process as a copy of the old one,
  * but does not actually start it yet.

From c7af7877eeacfeaaf6a1b6f54c481292ef116837 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 22:01:58 +0100
Subject: [PATCH 0641/1911] sched/core: Move, sort and clean up <linux/sched.h>
 structure predeclarations

Most of the structure predeclarations were at the head of sched.h, but not
all of them - there were a number of lines spread around sched.h, in
random places.

Move them to the head, and also sort them alphabetically.

Remove unused entries.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 56 ++++++++++++++++++-------------------------
 1 file changed, 23 insertions(+), 33 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b1677c8db03f5d..5398356e33c7b8 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -33,24 +33,35 @@
 
 #include <asm/current.h>
 
-struct sched_attr;
-struct sched_param;
-
-struct futex_pi_state;
-struct robust_list_head;
+/* task_struct member predeclarations: */
+struct audit_context;
+struct autogroup;
+struct backing_dev_info;
 struct bio_list;
-struct fs_struct;
-struct perf_event_context;
 struct blk_plug;
+struct cfs_rq;
 struct filename;
+struct fs_struct;
+struct futex_pi_state;
+struct io_context;
+struct mempolicy;
 struct nameidata;
-
-struct signal_struct;
-struct sighand_struct;
-
+struct nsproxy;
+struct perf_event_context;
+struct pid_namespace;
+struct pipe_inode_info;
+struct rcu_node;
+struct reclaim_state;
+struct robust_list_head;
+struct sched_attr;
+struct sched_param;
 struct seq_file;
-struct cfs_rq;
+struct sighand_struct;
+struct signal_struct;
+struct task_delay_info;
 struct task_group;
+struct task_struct;
+struct uts_namespace;
 
 /*
  * Task state bitmask. NOTE! These bits are also
@@ -165,8 +176,6 @@ struct task_group;
 /* Task command name length */
 #define TASK_COMM_LEN 16
 
-struct task_struct;
-
 extern void sched_init(void);
 extern void sched_init_smp(void);
 
@@ -193,8 +202,6 @@ extern void io_schedule_finish(int token);
 extern long io_schedule_timeout(long timeout);
 extern void io_schedule(void);
 
-struct nsproxy;
-
 /**
  * struct prev_cputime - snaphsot of system and user cputime
  * @utime: time spent in user mode
@@ -297,10 +304,6 @@ struct thread_group_cputimer {
 };
 
 #include <linux/rwsem.h>
-struct autogroup;
-
-struct backing_dev_info;
-struct reclaim_state;
 
 #ifdef CONFIG_SCHED_INFO
 struct sched_info {
@@ -314,8 +317,6 @@ struct sched_info {
 };
 #endif /* CONFIG_SCHED_INFO */
 
-struct task_delay_info;
-
 static inline int sched_info_on(void)
 {
 #ifdef CONFIG_SCHEDSTATS
@@ -342,20 +343,12 @@ void force_schedstat_enabled(void);
 # define SCHED_FIXEDPOINT_SHIFT	10
 # define SCHED_FIXEDPOINT_SCALE	(1L << SCHED_FIXEDPOINT_SHIFT)
 
-struct io_context;			/* See blkdev.h */
-
-
 #ifdef ARCH_HAS_PREFETCH_SWITCH_STACK
 extern void prefetch_stack(struct task_struct *t);
 #else
 static inline void prefetch_stack(struct task_struct *t) { }
 #endif
 
-struct audit_context;		/* See audit.c */
-struct mempolicy;
-struct pipe_inode_info;
-struct uts_namespace;
-
 struct load_weight {
 	unsigned long weight;
 	u32 inv_weight;
@@ -564,7 +557,6 @@ union rcu_special {
 	} b; /* Bits. */
 	u32 s; /* Set of bits. */
 };
-struct rcu_node;
 
 enum perf_event_task_context {
 	perf_invalid_context = -1,
@@ -1125,8 +1117,6 @@ static inline struct pid *task_session(struct task_struct *task)
 	return task->group_leader->pids[PIDTYPE_SID].pid;
 }
 
-struct pid_namespace;
-
 /*
  * the helpers to get the task's different pids as they are seen
  * from various namespaces

From d04b0ad37e4b6ac39a56c823ae76ab37cd044dc7 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 22:07:57 +0100
Subject: [PATCH 0642/1911] sched/headers: Move the PREEMPT_COUNT defines from
 <linux/sched.h> to <linux/preempt.h>

These defines are not really part of the scheduler's driver API, but are
related to the preempt count - so move them to <linux/preempt.h>.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/preempt.h | 21 +++++++++++++++++++++
 include/linux/sched.h   | 21 ---------------------
 2 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index 7eeceac52dea25..cae461224948a7 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -55,6 +55,27 @@
 /* We use the MSB mostly because its available */
 #define PREEMPT_NEED_RESCHED	0x80000000
 
+#define PREEMPT_DISABLED	(PREEMPT_DISABLE_OFFSET + PREEMPT_ENABLED)
+
+/*
+ * Disable preemption until the scheduler is running -- use an unconditional
+ * value so that it also works on !PREEMPT_COUNT kernels.
+ *
+ * Reset by start_kernel()->sched_init()->init_idle()->init_idle_preempt_count().
+ */
+#define INIT_PREEMPT_COUNT	PREEMPT_OFFSET
+
+/*
+ * Initial preempt_count value; reflects the preempt_count schedule invariant
+ * which states that during context switches:
+ *
+ *    preempt_count() == 2*PREEMPT_DISABLE_OFFSET
+ *
+ * Note: PREEMPT_DISABLE_OFFSET is 0 for !PREEMPT_COUNT kernels.
+ * Note: See finish_task_switch().
+ */
+#define FORK_PREEMPT_COUNT	(2*PREEMPT_DISABLE_OFFSET + PREEMPT_ENABLED)
+
 /* preempt_count() and related functions, depends on PREEMPT_NEED_RESCHED */
 #include <asm/preempt.h>
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5398356e33c7b8..3b3e31da416eec 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -265,27 +265,6 @@ struct task_cputime_atomic {
 		.sum_exec_runtime = ATOMIC64_INIT(0),		\
 	}
 
-#define PREEMPT_DISABLED	(PREEMPT_DISABLE_OFFSET + PREEMPT_ENABLED)
-
-/*
- * Disable preemption until the scheduler is running -- use an unconditional
- * value so that it also works on !PREEMPT_COUNT kernels.
- *
- * Reset by start_kernel()->sched_init()->init_idle()->init_idle_preempt_count().
- */
-#define INIT_PREEMPT_COUNT	PREEMPT_OFFSET
-
-/*
- * Initial preempt_count value; reflects the preempt_count schedule invariant
- * which states that during context switches:
- *
- *    preempt_count() == 2*PREEMPT_DISABLE_OFFSET
- *
- * Note: PREEMPT_DISABLE_OFFSET is 0 for !PREEMPT_COUNT kernels.
- * Note: See finish_task_switch().
- */
-#define FORK_PREEMPT_COUNT	(2*PREEMPT_DISABLE_OFFSET + PREEMPT_ENABLED)
-
 /**
  * struct thread_group_cputimer - thread group interval timer counts
  * @cputime_atomic:	atomic thread group interval timers.

From f3ac60671954c8d413532627b1be13a76f394c49 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 22:59:33 +0100
Subject: [PATCH 0643/1911] sched/headers: Move task-stack related APIs from
 <linux/sched.h> to <linux/sched/task_stack.h>

Split out the task->stack related functionality, which is not really
part of the core scheduler APIs.

Only keep task_thread_info() because it's used by sched.h.

Update the code that uses those facilities.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/mips/kernel/smp-cps.c       |   2 +-
 arch/mips/sibyte/sb1250/smp.c    |   2 +-
 include/linux/sched.h            | 115 +++----------------------------
 include/linux/sched/task_stack.h | 104 ++++++++++++++++++++++++++++
 4 files changed, 115 insertions(+), 108 deletions(-)

diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c
index 1d3188c23bb884..6d45f05538c8b3 100644
--- a/arch/mips/kernel/smp-cps.c
+++ b/arch/mips/kernel/smp-cps.c
@@ -11,7 +11,7 @@
 #include <linux/delay.h>
 #include <linux/io.h>
 #include <linux/irqchip/mips-gic.h>
-#include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/sched/hotplug.h>
 #include <linux/slab.h>
 #include <linux/smp.h>
diff --git a/arch/mips/sibyte/sb1250/smp.c b/arch/mips/sibyte/sb1250/smp.c
index 1cf66f5ff23d1a..0a4a2c3982d86d 100644
--- a/arch/mips/sibyte/sb1250/smp.c
+++ b/arch/mips/sibyte/sb1250/smp.c
@@ -21,7 +21,7 @@
 #include <linux/interrupt.h>
 #include <linux/smp.h>
 #include <linux/kernel_stat.h>
-#include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 
 #include <asm/mmu_context.h>
 #include <asm/io.h>
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3b3e31da416eec..af9590c8bfb0f3 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1450,6 +1450,15 @@ union thread_union {
 	unsigned long stack[THREAD_SIZE/sizeof(long)];
 };
 
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+static inline struct thread_info *task_thread_info(struct task_struct *task)
+{
+	return &task->thread_info;
+}
+#elif !defined(__HAVE_THREAD_FUNCTIONS)
+# define task_thread_info(task)	((struct thread_info *)(task)->stack)
+#endif
+
 #ifndef __HAVE_ARCH_KSTACK_END
 static inline int kstack_end(void *addr)
 {
@@ -1540,112 +1549,6 @@ static inline void task_unlock(struct task_struct *p)
 	spin_unlock(&p->alloc_lock);
 }
 
-#ifdef CONFIG_THREAD_INFO_IN_TASK
-
-static inline struct thread_info *task_thread_info(struct task_struct *task)
-{
-	return &task->thread_info;
-}
-
-/*
- * When accessing the stack of a non-current task that might exit, use
- * try_get_task_stack() instead.  task_stack_page will return a pointer
- * that could get freed out from under you.
- */
-static inline void *task_stack_page(const struct task_struct *task)
-{
-	return task->stack;
-}
-
-#define setup_thread_stack(new,old)	do { } while(0)
-
-static inline unsigned long *end_of_stack(const struct task_struct *task)
-{
-	return task->stack;
-}
-
-#elif !defined(__HAVE_THREAD_FUNCTIONS)
-
-#define task_thread_info(task)	((struct thread_info *)(task)->stack)
-#define task_stack_page(task)	((void *)(task)->stack)
-
-static inline void setup_thread_stack(struct task_struct *p, struct task_struct *org)
-{
-	*task_thread_info(p) = *task_thread_info(org);
-	task_thread_info(p)->task = p;
-}
-
-/*
- * Return the address of the last usable long on the stack.
- *
- * When the stack grows down, this is just above the thread
- * info struct. Going any lower will corrupt the threadinfo.
- *
- * When the stack grows up, this is the highest address.
- * Beyond that position, we corrupt data on the next page.
- */
-static inline unsigned long *end_of_stack(struct task_struct *p)
-{
-#ifdef CONFIG_STACK_GROWSUP
-	return (unsigned long *)((unsigned long)task_thread_info(p) + THREAD_SIZE) - 1;
-#else
-	return (unsigned long *)(task_thread_info(p) + 1);
-#endif
-}
-
-#endif
-
-#ifdef CONFIG_THREAD_INFO_IN_TASK
-static inline void *try_get_task_stack(struct task_struct *tsk)
-{
-	return atomic_inc_not_zero(&tsk->stack_refcount) ?
-		task_stack_page(tsk) : NULL;
-}
-
-extern void put_task_stack(struct task_struct *tsk);
-#else
-static inline void *try_get_task_stack(struct task_struct *tsk)
-{
-	return task_stack_page(tsk);
-}
-
-static inline void put_task_stack(struct task_struct *tsk) {}
-#endif
-
-#define task_stack_end_corrupted(task) \
-		(*(end_of_stack(task)) != STACK_END_MAGIC)
-
-static inline int object_is_on_stack(void *obj)
-{
-	void *stack = task_stack_page(current);
-
-	return (obj >= stack) && (obj < (stack + THREAD_SIZE));
-}
-
-extern void thread_stack_cache_init(void);
-
-#ifdef CONFIG_DEBUG_STACK_USAGE
-static inline unsigned long stack_not_used(struct task_struct *p)
-{
-	unsigned long *n = end_of_stack(p);
-
-	do { 	/* Skip over canary */
-# ifdef CONFIG_STACK_GROWSUP
-		n--;
-# else
-		n++;
-# endif
-	} while (!*n);
-
-# ifdef CONFIG_STACK_GROWSUP
-	return (unsigned long)end_of_stack(p) - (unsigned long)n;
-# else
-	return (unsigned long)n - (unsigned long)end_of_stack(p);
-# endif
-}
-#endif
-extern void set_task_stack_end_magic(struct task_struct *tsk);
-
 /* set thread flags in other task's structures
  * - see asm/thread_info.h for TIF_xxxx flags available
  */
diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h
index d2d578e85f7dab..aaa5c2a6a0e9bd 100644
--- a/include/linux/sched/task_stack.h
+++ b/include/linux/sched/task_stack.h
@@ -1,7 +1,111 @@
 #ifndef _LINUX_SCHED_TASK_STACK_H
 #define _LINUX_SCHED_TASK_STACK_H
 
+/*
+ * task->stack (kernel stack) handling interfaces:
+ */
+
 #include <linux/sched.h>
 #include <linux/magic.h>
 
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+
+/*
+ * When accessing the stack of a non-current task that might exit, use
+ * try_get_task_stack() instead.  task_stack_page will return a pointer
+ * that could get freed out from under you.
+ */
+static inline void *task_stack_page(const struct task_struct *task)
+{
+	return task->stack;
+}
+
+#define setup_thread_stack(new,old)	do { } while(0)
+
+static inline unsigned long *end_of_stack(const struct task_struct *task)
+{
+	return task->stack;
+}
+
+#elif !defined(__HAVE_THREAD_FUNCTIONS)
+
+#define task_stack_page(task)	((void *)(task)->stack)
+
+static inline void setup_thread_stack(struct task_struct *p, struct task_struct *org)
+{
+	*task_thread_info(p) = *task_thread_info(org);
+	task_thread_info(p)->task = p;
+}
+
+/*
+ * Return the address of the last usable long on the stack.
+ *
+ * When the stack grows down, this is just above the thread
+ * info struct. Going any lower will corrupt the threadinfo.
+ *
+ * When the stack grows up, this is the highest address.
+ * Beyond that position, we corrupt data on the next page.
+ */
+static inline unsigned long *end_of_stack(struct task_struct *p)
+{
+#ifdef CONFIG_STACK_GROWSUP
+	return (unsigned long *)((unsigned long)task_thread_info(p) + THREAD_SIZE) - 1;
+#else
+	return (unsigned long *)(task_thread_info(p) + 1);
+#endif
+}
+
+#endif
+
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+static inline void *try_get_task_stack(struct task_struct *tsk)
+{
+	return atomic_inc_not_zero(&tsk->stack_refcount) ?
+		task_stack_page(tsk) : NULL;
+}
+
+extern void put_task_stack(struct task_struct *tsk);
+#else
+static inline void *try_get_task_stack(struct task_struct *tsk)
+{
+	return task_stack_page(tsk);
+}
+
+static inline void put_task_stack(struct task_struct *tsk) {}
+#endif
+
+#define task_stack_end_corrupted(task) \
+		(*(end_of_stack(task)) != STACK_END_MAGIC)
+
+static inline int object_is_on_stack(void *obj)
+{
+	void *stack = task_stack_page(current);
+
+	return (obj >= stack) && (obj < (stack + THREAD_SIZE));
+}
+
+extern void thread_stack_cache_init(void);
+
+#ifdef CONFIG_DEBUG_STACK_USAGE
+static inline unsigned long stack_not_used(struct task_struct *p)
+{
+	unsigned long *n = end_of_stack(p);
+
+	do { 	/* Skip over canary */
+# ifdef CONFIG_STACK_GROWSUP
+		n--;
+# else
+		n++;
+# endif
+	} while (!*n);
+
+# ifdef CONFIG_STACK_GROWSUP
+	return (unsigned long)end_of_stack(p) - (unsigned long)n;
+# else
+	return (unsigned long)n - (unsigned long)end_of_stack(p);
+# endif
+}
+#endif
+extern void set_task_stack_end_magic(struct task_struct *tsk);
+
 #endif /* _LINUX_SCHED_TASK_STACK_H */

From 70806b21e4d64f01193a2b64a053b75ff53a2b10 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 23:15:21 +0100
Subject: [PATCH 0644/1911] sched/headers: Move the 'root_task_group'
 declaration to <linux/sched/autogroup.h>

Also remove the duplicate declaration from <linux/init_task.h>.

( That declaration was originally duplicated for dependency hell reasons,
  but there's no problem including the much smaller <linux/sched/autogroup.h>
  header now, to pick up the right prototype. )

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/init_task.h       | 2 --
 include/linux/sched.h           | 4 ----
 include/linux/sched/autogroup.h | 5 +++++
 3 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index f6841c19c913ef..91d9049f003938 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -151,8 +151,6 @@ extern struct group_info init_groups;
 
 extern struct cred init_cred;
 
-extern struct task_group root_task_group;
-
 #ifdef CONFIG_CGROUP_SCHED
 # define INIT_CGROUP_SCHED(tsk)						\
 	.sched_task_group = &root_task_group,
diff --git a/include/linux/sched.h b/include/linux/sched.h
index af9590c8bfb0f3..4934733bd6cb7d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1707,10 +1707,6 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
 extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
 extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
 
-#ifdef CONFIG_CGROUP_SCHED
-extern struct task_group root_task_group;
-#endif /* CONFIG_CGROUP_SCHED */
-
 extern int task_can_switch_user(struct user_struct *up,
 					struct task_struct *tsk);
 
diff --git a/include/linux/sched/autogroup.h b/include/linux/sched/autogroup.h
index 586bdf37330dde..fd6855548d0c7f 100644
--- a/include/linux/sched/autogroup.h
+++ b/include/linux/sched/autogroup.h
@@ -4,6 +4,7 @@
 #include <linux/sched.h>
 
 struct signal_struct;
+struct task_group;
 struct seq_file;
 
 #ifdef CONFIG_SCHED_AUTOGROUP
@@ -24,4 +25,8 @@ static inline void sched_autogroup_exit(struct signal_struct *sig) { }
 static inline void sched_autogroup_exit_task(struct task_struct *p) { }
 #endif
 
+#ifdef CONFIG_CGROUP_SCHED
+extern struct task_group root_task_group;
+#endif /* CONFIG_CGROUP_SCHED */
+
 #endif /* _LINUX_SCHED_AUTOGROUP_H */

From 76960dbf9b235b957d9d730be2c6c2a70f709026 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 23:43:50 +0100
Subject: [PATCH 0645/1911] signals: Move signal data types from
 <linux/signal.h> to <linux/signal_types.h>

Separate out just the pure data types - sched.h will be able to use
this reduced size header.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/signal.h       | 54 ---------------------------------
 include/linux/signal_types.h | 59 ++++++++++++++++++++++++++++++++++++
 2 files changed, 59 insertions(+), 54 deletions(-)

diff --git a/include/linux/signal.h b/include/linux/signal.h
index d1c2b05a7a556c..94ad6eea955035 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -8,24 +8,6 @@ struct task_struct;
 
 /* for sysctl */
 extern int print_fatal_signals;
-/*
- * Real Time signals may be queued.
- */
-
-struct sigqueue {
-	struct list_head list;
-	int flags;
-	siginfo_t info;
-	struct user_struct *user;
-};
-
-/* flags values. */
-#define SIGQUEUE_PREALLOC	1
-
-struct sigpending {
-	struct list_head list;
-	sigset_t signal;
-};
 
 #ifndef HAVE_ARCH_COPY_SIGINFO
 
@@ -271,42 +253,6 @@ extern void set_current_blocked(sigset_t *);
 extern void __set_current_blocked(const sigset_t *);
 extern int show_unhandled_signals;
 
-struct sigaction {
-#ifndef __ARCH_HAS_IRIX_SIGACTION
-	__sighandler_t	sa_handler;
-	unsigned long	sa_flags;
-#else
-	unsigned int	sa_flags;
-	__sighandler_t	sa_handler;
-#endif
-#ifdef __ARCH_HAS_SA_RESTORER
-	__sigrestore_t sa_restorer;
-#endif
-	sigset_t	sa_mask;	/* mask last for extensibility */
-};
-
-struct k_sigaction {
-	struct sigaction sa;
-#ifdef __ARCH_HAS_KA_RESTORER
-	__sigrestore_t ka_restorer;
-#endif
-};
- 
-#ifdef CONFIG_OLD_SIGACTION
-struct old_sigaction {
-	__sighandler_t sa_handler;
-	old_sigset_t sa_mask;
-	unsigned long sa_flags;
-	__sigrestore_t sa_restorer;
-};
-#endif
-
-struct ksignal {
-	struct k_sigaction ka;
-	siginfo_t info;
-	int sig;
-};
-
 extern int get_signal(struct ksignal *ksig);
 extern void signal_setup_done(int failed, struct ksignal *ksig, int stepping);
 extern void exit_signals(struct task_struct *tsk);
diff --git a/include/linux/signal_types.h b/include/linux/signal_types.h
index 28799c88f490fe..16d862a3d8f38d 100644
--- a/include/linux/signal_types.h
+++ b/include/linux/signal_types.h
@@ -1,7 +1,66 @@
 #ifndef _LINUX_SIGNAL_TYPES_H
 #define _LINUX_SIGNAL_TYPES_H
 
+/*
+ * Basic signal handling related data type definitions:
+ */
+
 #include <linux/list.h>
 #include <uapi/linux/signal.h>
 
+/*
+ * Real Time signals may be queued.
+ */
+
+struct sigqueue {
+	struct list_head list;
+	int flags;
+	siginfo_t info;
+	struct user_struct *user;
+};
+
+/* flags values. */
+#define SIGQUEUE_PREALLOC	1
+
+struct sigpending {
+	struct list_head list;
+	sigset_t signal;
+};
+
+struct sigaction {
+#ifndef __ARCH_HAS_IRIX_SIGACTION
+	__sighandler_t	sa_handler;
+	unsigned long	sa_flags;
+#else
+	unsigned int	sa_flags;
+	__sighandler_t	sa_handler;
+#endif
+#ifdef __ARCH_HAS_SA_RESTORER
+	__sigrestore_t sa_restorer;
+#endif
+	sigset_t	sa_mask;	/* mask last for extensibility */
+};
+
+struct k_sigaction {
+	struct sigaction sa;
+#ifdef __ARCH_HAS_KA_RESTORER
+	__sigrestore_t ka_restorer;
+#endif
+};
+
+#ifdef CONFIG_OLD_SIGACTION
+struct old_sigaction {
+	__sighandler_t sa_handler;
+	old_sigset_t sa_mask;
+	unsigned long sa_flags;
+	__sigrestore_t sa_restorer;
+};
+#endif
+
+struct ksignal {
+	struct k_sigaction ka;
+	siginfo_t info;
+	int sig;
+};
+
 #endif /* _LINUX_SIGNAL_TYPES_H */

From 9e7d2e44dd88ba7e29c165b6fca428e384afa5a8 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 4 Feb 2017 00:12:19 +0100
Subject: [PATCH 0646/1911] mm/headers, sched/headers: Move task related MM
 types from <linux/mm_types.> to <linux/mm_types_task.h>

Separate all the MM types that are embedded directly in 'struct task_struct'
into the <linux/mm_types_task.h> header.

The goal is to include this header in <linux/sched.h>, not the full <linux/mm_types.h>
header, to reduce the size, complexity and coupling of <linux/sched.h>.

(This patch does not change <linux/sched.h> yet.)

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/mm_types.h      | 49 -------------------------------
 include/linux/mm_types_task.h | 55 +++++++++++++++++++++++++++++++++++
 2 files changed, 55 insertions(+), 49 deletions(-)

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 28bc710d3467a4..f60f45fe226fca 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -24,11 +24,6 @@
 struct address_space;
 struct mem_cgroup;
 
-#define USE_SPLIT_PTE_PTLOCKS	(NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS)
-#define USE_SPLIT_PMD_PTLOCKS	(USE_SPLIT_PTE_PTLOCKS && \
-		IS_ENABLED(CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK))
-#define ALLOC_SPLIT_PTLOCKS	(SPINLOCK_SIZE > BITS_PER_LONG/8)
-
 /*
  * Each physical page in the system has a struct page associated with
  * it to keep track of whatever it is we are using the page for at the
@@ -231,17 +226,6 @@ struct page {
 #endif
 ;
 
-struct page_frag {
-	struct page *page;
-#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536)
-	__u32 offset;
-	__u32 size;
-#else
-	__u16 offset;
-	__u16 size;
-#endif
-};
-
 #define PAGE_FRAG_CACHE_MAX_SIZE	__ALIGN_MASK(32768, ~PAGE_MASK)
 #define PAGE_FRAG_CACHE_MAX_ORDER	get_order(PAGE_FRAG_CACHE_MAX_SIZE)
 
@@ -360,18 +344,6 @@ struct vm_area_struct {
 	struct vm_userfaultfd_ctx vm_userfaultfd_ctx;
 };
 
-/*
- * The per task VMA cache array:
- */
-#define VMACACHE_BITS 2
-#define VMACACHE_SIZE (1U << VMACACHE_BITS)
-#define VMACACHE_MASK (VMACACHE_SIZE - 1)
-
-struct vmacache {
-	u32 seqnum;
-	struct vm_area_struct *vmas[VMACACHE_SIZE];
-};
-
 struct core_thread {
 	struct task_struct *task;
 	struct core_thread *next;
@@ -383,27 +355,6 @@ struct core_state {
 	struct completion startup;
 };
 
-enum {
-	MM_FILEPAGES,	/* Resident file mapping pages */
-	MM_ANONPAGES,	/* Resident anonymous pages */
-	MM_SWAPENTS,	/* Anonymous swap entries */
-	MM_SHMEMPAGES,	/* Resident shared memory pages */
-	NR_MM_COUNTERS
-};
-
-#if USE_SPLIT_PTE_PTLOCKS && defined(CONFIG_MMU)
-#define SPLIT_RSS_COUNTING
-/* per-thread cached information, */
-struct task_rss_stat {
-	int events;	/* for synchronization threshold */
-	int count[NR_MM_COUNTERS];
-};
-#endif /* USE_SPLIT_PTE_PTLOCKS */
-
-struct mm_rss_stat {
-	atomic_long_t count[NR_MM_COUNTERS];
-};
-
 struct kioctx_table;
 struct mm_struct {
 	struct vm_area_struct *mmap;		/* list of VMAs */
diff --git a/include/linux/mm_types_task.h b/include/linux/mm_types_task.h
index 2419d302f03c0b..9526d8b9fe0ebb 100644
--- a/include/linux/mm_types_task.h
+++ b/include/linux/mm_types_task.h
@@ -1,10 +1,65 @@
 #ifndef _LINUX_MM_TYPES_TASK_H
 #define _LINUX_MM_TYPES_TASK_H
 
+/*
+ * Here are the definitions of the MM data types that are embedded in 'struct task_struct'.
+ *
+ * (These are defined separately to decouple sched.h from mm_types.h as much as possible.)
+ */
+
 #include <linux/types.h>
 #include <linux/threads.h>
 #include <linux/atomic.h>
 
 #include <asm/page.h>
 
+#define USE_SPLIT_PTE_PTLOCKS	(NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS)
+#define USE_SPLIT_PMD_PTLOCKS	(USE_SPLIT_PTE_PTLOCKS && \
+		IS_ENABLED(CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK))
+#define ALLOC_SPLIT_PTLOCKS	(SPINLOCK_SIZE > BITS_PER_LONG/8)
+
+/*
+ * The per task VMA cache array:
+ */
+#define VMACACHE_BITS 2
+#define VMACACHE_SIZE (1U << VMACACHE_BITS)
+#define VMACACHE_MASK (VMACACHE_SIZE - 1)
+
+struct vmacache {
+	u32 seqnum;
+	struct vm_area_struct *vmas[VMACACHE_SIZE];
+};
+
+enum {
+	MM_FILEPAGES,	/* Resident file mapping pages */
+	MM_ANONPAGES,	/* Resident anonymous pages */
+	MM_SWAPENTS,	/* Anonymous swap entries */
+	MM_SHMEMPAGES,	/* Resident shared memory pages */
+	NR_MM_COUNTERS
+};
+
+#if USE_SPLIT_PTE_PTLOCKS && defined(CONFIG_MMU)
+#define SPLIT_RSS_COUNTING
+/* per-thread cached information, */
+struct task_rss_stat {
+	int events;	/* for synchronization threshold */
+	int count[NR_MM_COUNTERS];
+};
+#endif /* USE_SPLIT_PTE_PTLOCKS */
+
+struct mm_rss_stat {
+	atomic_long_t count[NR_MM_COUNTERS];
+};
+
+struct page_frag {
+	struct page *page;
+#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536)
+	__u32 offset;
+	__u32 size;
+#else
+	__u16 offset;
+	__u16 size;
+#endif
+};
+
 #endif /* _LINUX_MM_TYPES_TASK_H */

From 77ba809e8b39b4e384df0433e2bd3dd0907dad29 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 4 Feb 2017 00:16:44 +0100
Subject: [PATCH 0647/1911] sched/headers: Remove the <linux/mm_types.h>
 dependency from <linux/sched.h>

Use the freshly introduced, reduced size <linux/mm_types_task.h> header instead.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 drivers/video/fbdev/auo_k190x.c | 1 +
 include/linux/sched.h           | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/video/fbdev/auo_k190x.c b/drivers/video/fbdev/auo_k190x.c
index 9580374667ba7c..0d06038324e003 100644
--- a/drivers/video/fbdev/auo_k190x.c
+++ b/drivers/video/fbdev/auo_k190x.c
@@ -9,6 +9,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/sched/mm.h>
 #include <linux/kernel.h>
 #include <linux/gpio.h>
 #include <linux/platform_device.h>
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4934733bd6cb7d..3eb284741790c4 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -8,7 +8,7 @@
 #include <linux/capability.h>
 #include <linux/mutex.h>
 #include <linux/plist.h>
-#include <linux/mm_types.h>
+#include <linux/mm_types_task.h>
 #include <asm/ptrace.h>
 
 #include <linux/sem.h>

From cdc75e9f7b14f29efcf4b162a3c673733e96db79 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 4 Feb 2017 01:20:53 +0100
Subject: [PATCH 0648/1911] sched/headers: Move 'init_task' and
 'init_thread_union' from <linux/sched.h> to <linux/sched/task.h>

'init_task' is really not part of core scheduler APIs but part of
the fork() interface between the scheduler and process management.

So move the declarations.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h      | 3 ---
 include/linux/sched/task.h | 6 ++++++
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3eb284741790c4..4ab105a2a8f9c4 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1469,9 +1469,6 @@ static inline int kstack_end(void *addr)
 }
 #endif
 
-extern union thread_union init_thread_union;
-extern struct task_struct init_task;
-
 extern struct pid_namespace init_pid_ns;
 
 /*
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index 20ed9108f26158..1be049a18d1bce 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -8,6 +8,9 @@
 
 #include <linux/sched.h>
 
+struct task_struct;
+union thread_union;
+
 /*
  * This serializes "schedule()" and also protects
  * the run-queue from deletions/modifications (but
@@ -17,6 +20,9 @@
 extern rwlock_t tasklist_lock;
 extern spinlock_t mmlist_lock;
 
+extern union thread_union init_thread_union;
+extern struct task_struct init_task;
+
 #ifdef CONFIG_PROVE_RCU
 extern int lockdep_tasklist_lock_is_held(void);
 #endif /* #ifdef CONFIG_PROVE_RCU */

From 56cd697366b6d6f67acb6c58ac7f3b185d11ef07 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Mon, 6 Feb 2017 10:57:33 +0100
Subject: [PATCH 0649/1911] sched/headers: Move the task_lock()/unlock() APIs
 to <linux/sched/task.h>

The task_lock()/task_unlock() APIs are not realated to core scheduling,
they are task lifetime APIs, i.e. they belong into <linux/sched/task.h>.

Move them.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h      | 20 --------------------
 include/linux/sched/task.h | 20 ++++++++++++++++++++
 kernel/cgroup/cgroup-v1.c  |  1 +
 kernel/cgroup/namespace.c  |  2 +-
 4 files changed, 22 insertions(+), 21 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4ab105a2a8f9c4..d481c129a82207 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1526,26 +1526,6 @@ static inline unsigned long wait_task_inactive(struct task_struct *p,
 }
 #endif
 
-/*
- * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring
- * subscriptions and synchronises with wait4().  Also used in procfs.  Also
- * pins the final release of task.io_context.  Also protects ->cpuset and
- * ->cgroup.subsys[]. And ->vfork_done.
- *
- * Nests both inside and outside of read_lock(&tasklist_lock).
- * It must not be nested with write_lock_irq(&tasklist_lock),
- * neither inside nor outside.
- */
-static inline void task_lock(struct task_struct *p)
-{
-	spin_lock(&p->alloc_lock);
-}
-
-static inline void task_unlock(struct task_struct *p)
-{
-	spin_unlock(&p->alloc_lock);
-}
-
 /* set thread flags in other task's structures
  * - see asm/thread_info.h for TIF_xxxx flags available
  */
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index 1be049a18d1bce..2be9fde588a71a 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -91,4 +91,24 @@ static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t)
 }
 #endif
 
+/*
+ * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring
+ * subscriptions and synchronises with wait4().  Also used in procfs.  Also
+ * pins the final release of task.io_context.  Also protects ->cpuset and
+ * ->cgroup.subsys[]. And ->vfork_done.
+ *
+ * Nests both inside and outside of read_lock(&tasklist_lock).
+ * It must not be nested with write_lock_irq(&tasklist_lock),
+ * neither inside nor outside.
+ */
+static inline void task_lock(struct task_struct *p)
+{
+	spin_lock(&p->alloc_lock);
+}
+
+static inline void task_unlock(struct task_struct *p)
+{
+	spin_unlock(&p->alloc_lock);
+}
+
 #endif /* _LINUX_SCHED_TASK_H */
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index 08d2cb605101b4..abc585858685c4 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -6,6 +6,7 @@
 #include <linux/delay.h>
 #include <linux/mm.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/task.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/delayacct.h>
diff --git a/kernel/cgroup/namespace.c b/kernel/cgroup/namespace.c
index cff7ea62c38f01..96d38dab6fb2f7 100644
--- a/kernel/cgroup/namespace.c
+++ b/kernel/cgroup/namespace.c
@@ -1,6 +1,6 @@
 #include "cgroup-internal.h"
 
-#include <linux/sched.h>
+#include <linux/sched/task.h>
 #include <linux/slab.h>
 #include <linux/nsproxy.h>
 #include <linux/proc_ns.h>

From 1050b27c52f615bc0e772b3119881e5304ccde6b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Feb 2017 11:48:36 +0100
Subject: [PATCH 0650/1911] sched/headers: Move cputime functionality from
 <linux/sched.h> and <linux/cputime.h> into <linux/sched/cputime.h>

Move cputime related functionality out of <linux/sched.h>, as most code
that includes <linux/sched.h> does not use that functionality.

Move data types that are not included in task_struct directly to
the signal definitions, into <linux/sched/signal.h>.

Also merge the (small) existing <linux/cputime.h> header into <linux/sched/cputime.h>.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/cputime.h       |  13 ---
 include/linux/sched.h         |  89 -----------------
 include/linux/sched/cputime.h | 182 +++++++++++++++++++++++++++++++++-
 include/linux/sched/signal.h  |  33 ++++++
 kernel/sched/stats.h          | 111 ---------------------
 5 files changed, 214 insertions(+), 214 deletions(-)
 delete mode 100644 include/linux/cputime.h

diff --git a/include/linux/cputime.h b/include/linux/cputime.h
deleted file mode 100644
index a691dc4ddc130a..00000000000000
--- a/include/linux/cputime.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef __LINUX_CPUTIME_H
-#define __LINUX_CPUTIME_H
-
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
-#include <asm/cputime.h>
-
-#ifndef cputime_to_nsecs
-# define cputime_to_nsecs(__ct)	\
-	(cputime_to_usecs(__ct) * NSEC_PER_USEC)
-#endif
-
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
-#endif /* __LINUX_CPUTIME_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d481c129a82207..2b43f55e495613 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -219,14 +219,6 @@ struct prev_cputime {
 #endif
 };
 
-static inline void prev_cputime_init(struct prev_cputime *prev)
-{
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
-	prev->utime = prev->stime = 0;
-	raw_spin_lock_init(&prev->lock);
-#endif
-}
-
 /**
  * struct task_cputime - collected CPU time counts
  * @utime:		time spent in user mode, in nanoseconds
@@ -248,40 +240,6 @@ struct task_cputime {
 #define prof_exp	stime
 #define sched_exp	sum_exec_runtime
 
-/*
- * This is the atomic variant of task_cputime, which can be used for
- * storing and updating task_cputime statistics without locking.
- */
-struct task_cputime_atomic {
-	atomic64_t utime;
-	atomic64_t stime;
-	atomic64_t sum_exec_runtime;
-};
-
-#define INIT_CPUTIME_ATOMIC \
-	(struct task_cputime_atomic) {				\
-		.utime = ATOMIC64_INIT(0),			\
-		.stime = ATOMIC64_INIT(0),			\
-		.sum_exec_runtime = ATOMIC64_INIT(0),		\
-	}
-
-/**
- * struct thread_group_cputimer - thread group interval timer counts
- * @cputime_atomic:	atomic thread group interval timers.
- * @running:		true when there are timers running and
- *			@cputime_atomic receives updates.
- * @checking_timer:	true when a thread in the group is in the
- *			process of checking for thread group timers.
- *
- * This structure contains the version of task_cputime, above, that is
- * used for thread group CPU timer calculations.
- */
-struct thread_group_cputimer {
-	struct task_cputime_atomic cputime_atomic;
-	bool running;
-	bool checking_timer;
-};
-
 #include <linux/rwsem.h>
 
 #ifdef CONFIG_SCHED_INFO
@@ -1234,44 +1192,6 @@ static inline void put_task_struct(struct task_struct *t)
 struct task_struct *task_rcu_dereference(struct task_struct **ptask);
 struct task_struct *try_get_task_struct(struct task_struct **ptask);
 
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
-extern void task_cputime(struct task_struct *t,
-			 u64 *utime, u64 *stime);
-extern u64 task_gtime(struct task_struct *t);
-#else
-static inline void task_cputime(struct task_struct *t,
-				u64 *utime, u64 *stime)
-{
-	*utime = t->utime;
-	*stime = t->stime;
-}
-
-static inline u64 task_gtime(struct task_struct *t)
-{
-	return t->gtime;
-}
-#endif
-
-#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
-static inline void task_cputime_scaled(struct task_struct *t,
-				       u64 *utimescaled,
-				       u64 *stimescaled)
-{
-	*utimescaled = t->utimescaled;
-	*stimescaled = t->stimescaled;
-}
-#else
-static inline void task_cputime_scaled(struct task_struct *t,
-				       u64 *utimescaled,
-				       u64 *stimescaled)
-{
-	task_cputime(t, utimescaled, stimescaled);
-}
-#endif
-
-extern void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st);
-extern void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st);
-
 /*
  * Per process flags
  */
@@ -1395,9 +1315,6 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p,
 #define cpu_relax_yield() cpu_relax()
 #endif
 
-extern unsigned long long
-task_sched_runtime(struct task_struct *task);
-
 /* sched_exec is called by processes performing an exec */
 #ifdef CONFIG_SMP
 extern void sched_exec(void);
@@ -1629,12 +1546,6 @@ static __always_inline bool need_resched(void)
 	return unlikely(tif_need_resched());
 }
 
-/*
- * Thread group CPU time accounting.
- */
-void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times);
-void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times);
-
 /*
  * Wrappers for p->thread_info->cpu access. No-op on UP.
  */
diff --git a/include/linux/sched/cputime.h b/include/linux/sched/cputime.h
index 6ed4fe43de2893..4c5b9735c1ae1d 100644
--- a/include/linux/sched/cputime.h
+++ b/include/linux/sched/cputime.h
@@ -2,6 +2,186 @@
 #define _LINUX_SCHED_CPUTIME_H
 
 #include <linux/sched/signal.h>
-#include <linux/cputime.h>
+
+/*
+ * cputime accounting APIs:
+ */
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+#include <asm/cputime.h>
+
+#ifndef cputime_to_nsecs
+# define cputime_to_nsecs(__ct)	\
+	(cputime_to_usecs(__ct) * NSEC_PER_USEC)
+#endif
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
+extern void task_cputime(struct task_struct *t,
+			 u64 *utime, u64 *stime);
+extern u64 task_gtime(struct task_struct *t);
+#else
+static inline void task_cputime(struct task_struct *t,
+				u64 *utime, u64 *stime)
+{
+	*utime = t->utime;
+	*stime = t->stime;
+}
+
+static inline u64 task_gtime(struct task_struct *t)
+{
+	return t->gtime;
+}
+#endif
+
+#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
+static inline void task_cputime_scaled(struct task_struct *t,
+				       u64 *utimescaled,
+				       u64 *stimescaled)
+{
+	*utimescaled = t->utimescaled;
+	*stimescaled = t->stimescaled;
+}
+#else
+static inline void task_cputime_scaled(struct task_struct *t,
+				       u64 *utimescaled,
+				       u64 *stimescaled)
+{
+	task_cputime(t, utimescaled, stimescaled);
+}
+#endif
+
+extern void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st);
+extern void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st);
+
+
+/*
+ * Thread group CPU time accounting.
+ */
+void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times);
+void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times);
+
+
+/*
+ * The following are functions that support scheduler-internal time accounting.
+ * These functions are generally called at the timer tick.  None of this depends
+ * on CONFIG_SCHEDSTATS.
+ */
+
+/**
+ * get_running_cputimer - return &tsk->signal->cputimer if cputimer is running
+ *
+ * @tsk:	Pointer to target task.
+ */
+#ifdef CONFIG_POSIX_TIMERS
+static inline
+struct thread_group_cputimer *get_running_cputimer(struct task_struct *tsk)
+{
+	struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
+
+	/* Check if cputimer isn't running. This is accessed without locking. */
+	if (!READ_ONCE(cputimer->running))
+		return NULL;
+
+	/*
+	 * After we flush the task's sum_exec_runtime to sig->sum_sched_runtime
+	 * in __exit_signal(), we won't account to the signal struct further
+	 * cputime consumed by that task, even though the task can still be
+	 * ticking after __exit_signal().
+	 *
+	 * In order to keep a consistent behaviour between thread group cputime
+	 * and thread group cputimer accounting, lets also ignore the cputime
+	 * elapsing after __exit_signal() in any thread group timer running.
+	 *
+	 * This makes sure that POSIX CPU clocks and timers are synchronized, so
+	 * that a POSIX CPU timer won't expire while the corresponding POSIX CPU
+	 * clock delta is behind the expiring timer value.
+	 */
+	if (unlikely(!tsk->sighand))
+		return NULL;
+
+	return cputimer;
+}
+#else
+static inline
+struct thread_group_cputimer *get_running_cputimer(struct task_struct *tsk)
+{
+	return NULL;
+}
+#endif
+
+/**
+ * account_group_user_time - Maintain utime for a thread group.
+ *
+ * @tsk:	Pointer to task structure.
+ * @cputime:	Time value by which to increment the utime field of the
+ *		thread_group_cputime structure.
+ *
+ * If thread group time is being maintained, get the structure for the
+ * running CPU and update the utime field there.
+ */
+static inline void account_group_user_time(struct task_struct *tsk,
+					   u64 cputime)
+{
+	struct thread_group_cputimer *cputimer = get_running_cputimer(tsk);
+
+	if (!cputimer)
+		return;
+
+	atomic64_add(cputime, &cputimer->cputime_atomic.utime);
+}
+
+/**
+ * account_group_system_time - Maintain stime for a thread group.
+ *
+ * @tsk:	Pointer to task structure.
+ * @cputime:	Time value by which to increment the stime field of the
+ *		thread_group_cputime structure.
+ *
+ * If thread group time is being maintained, get the structure for the
+ * running CPU and update the stime field there.
+ */
+static inline void account_group_system_time(struct task_struct *tsk,
+					     u64 cputime)
+{
+	struct thread_group_cputimer *cputimer = get_running_cputimer(tsk);
+
+	if (!cputimer)
+		return;
+
+	atomic64_add(cputime, &cputimer->cputime_atomic.stime);
+}
+
+/**
+ * account_group_exec_runtime - Maintain exec runtime for a thread group.
+ *
+ * @tsk:	Pointer to task structure.
+ * @ns:		Time value by which to increment the sum_exec_runtime field
+ *		of the thread_group_cputime structure.
+ *
+ * If thread group time is being maintained, get the structure for the
+ * running CPU and update the sum_exec_runtime field there.
+ */
+static inline void account_group_exec_runtime(struct task_struct *tsk,
+					      unsigned long long ns)
+{
+	struct thread_group_cputimer *cputimer = get_running_cputimer(tsk);
+
+	if (!cputimer)
+		return;
+
+	atomic64_add(ns, &cputimer->cputime_atomic.sum_exec_runtime);
+}
+
+static inline void prev_cputime_init(struct prev_cputime *prev)
+{
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+	prev->utime = prev->stime = 0;
+	raw_spin_lock_init(&prev->lock);
+#endif
+}
+
+extern unsigned long long
+task_sched_runtime(struct task_struct *task);
 
 #endif /* _LINUX_SCHED_CPUTIME_H */
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index b3545fb4333afa..2cf446704cd4fc 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -35,6 +35,39 @@ struct cpu_itimer {
 	u64 incr;
 };
 
+/*
+ * This is the atomic variant of task_cputime, which can be used for
+ * storing and updating task_cputime statistics without locking.
+ */
+struct task_cputime_atomic {
+	atomic64_t utime;
+	atomic64_t stime;
+	atomic64_t sum_exec_runtime;
+};
+
+#define INIT_CPUTIME_ATOMIC \
+	(struct task_cputime_atomic) {				\
+		.utime = ATOMIC64_INIT(0),			\
+		.stime = ATOMIC64_INIT(0),			\
+		.sum_exec_runtime = ATOMIC64_INIT(0),		\
+	}
+/**
+ * struct thread_group_cputimer - thread group interval timer counts
+ * @cputime_atomic:	atomic thread group interval timers.
+ * @running:		true when there are timers running and
+ *			@cputime_atomic receives updates.
+ * @checking_timer:	true when a thread in the group is in the
+ *			process of checking for thread group timers.
+ *
+ * This structure contains the version of task_cputime, above, that is
+ * used for thread group CPU timer calculations.
+ */
+struct thread_group_cputimer {
+	struct task_cputime_atomic cputime_atomic;
+	bool running;
+	bool checking_timer;
+};
+
 /*
  * NOTE! "signal_struct" does not have its own
  * locking, because a shared signal_struct always
diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h
index bf0da0aa0a1443..d5710651043b62 100644
--- a/kernel/sched/stats.h
+++ b/kernel/sched/stats.h
@@ -164,114 +164,3 @@ sched_info_switch(struct rq *rq,
 #define sched_info_arrive(rq, next)		do { } while (0)
 #define sched_info_switch(rq, t, next)		do { } while (0)
 #endif /* CONFIG_SCHED_INFO */
-
-/*
- * The following are functions that support scheduler-internal time accounting.
- * These functions are generally called at the timer tick.  None of this depends
- * on CONFIG_SCHEDSTATS.
- */
-
-/**
- * get_running_cputimer - return &tsk->signal->cputimer if cputimer is running
- *
- * @tsk:	Pointer to target task.
- */
-#ifdef CONFIG_POSIX_TIMERS
-static inline
-struct thread_group_cputimer *get_running_cputimer(struct task_struct *tsk)
-{
-	struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
-
-	/* Check if cputimer isn't running. This is accessed without locking. */
-	if (!READ_ONCE(cputimer->running))
-		return NULL;
-
-	/*
-	 * After we flush the task's sum_exec_runtime to sig->sum_sched_runtime
-	 * in __exit_signal(), we won't account to the signal struct further
-	 * cputime consumed by that task, even though the task can still be
-	 * ticking after __exit_signal().
-	 *
-	 * In order to keep a consistent behaviour between thread group cputime
-	 * and thread group cputimer accounting, lets also ignore the cputime
-	 * elapsing after __exit_signal() in any thread group timer running.
-	 *
-	 * This makes sure that POSIX CPU clocks and timers are synchronized, so
-	 * that a POSIX CPU timer won't expire while the corresponding POSIX CPU
-	 * clock delta is behind the expiring timer value.
-	 */
-	if (unlikely(!tsk->sighand))
-		return NULL;
-
-	return cputimer;
-}
-#else
-static inline
-struct thread_group_cputimer *get_running_cputimer(struct task_struct *tsk)
-{
-	return NULL;
-}
-#endif
-
-/**
- * account_group_user_time - Maintain utime for a thread group.
- *
- * @tsk:	Pointer to task structure.
- * @cputime:	Time value by which to increment the utime field of the
- *		thread_group_cputime structure.
- *
- * If thread group time is being maintained, get the structure for the
- * running CPU and update the utime field there.
- */
-static inline void account_group_user_time(struct task_struct *tsk,
-					   u64 cputime)
-{
-	struct thread_group_cputimer *cputimer = get_running_cputimer(tsk);
-
-	if (!cputimer)
-		return;
-
-	atomic64_add(cputime, &cputimer->cputime_atomic.utime);
-}
-
-/**
- * account_group_system_time - Maintain stime for a thread group.
- *
- * @tsk:	Pointer to task structure.
- * @cputime:	Time value by which to increment the stime field of the
- *		thread_group_cputime structure.
- *
- * If thread group time is being maintained, get the structure for the
- * running CPU and update the stime field there.
- */
-static inline void account_group_system_time(struct task_struct *tsk,
-					     u64 cputime)
-{
-	struct thread_group_cputimer *cputimer = get_running_cputimer(tsk);
-
-	if (!cputimer)
-		return;
-
-	atomic64_add(cputime, &cputimer->cputime_atomic.stime);
-}
-
-/**
- * account_group_exec_runtime - Maintain exec runtime for a thread group.
- *
- * @tsk:	Pointer to task structure.
- * @ns:		Time value by which to increment the sum_exec_runtime field
- *		of the thread_group_cputime structure.
- *
- * If thread group time is being maintained, get the structure for the
- * running CPU and update the sum_exec_runtime field there.
- */
-static inline void account_group_exec_runtime(struct task_struct *tsk,
-					      unsigned long long ns)
-{
-	struct thread_group_cputimer *cputimer = get_running_cputimer(tsk);
-
-	if (!cputimer)
-		return;
-
-	atomic64_add(ns, &cputimer->cputime_atomic.sum_exec_runtime);
-}

From a2d7a7463c38dd14b845721aac3583a26a97c66d Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Feb 2017 12:07:04 +0100
Subject: [PATCH 0651/1911] sched/headers: Move sched_info_on() and
 force_schedstat_enabled() from <linux/sched.h> to <linux/sched/stat.h>

These APIs are not core scheduler but scheduler statistics related.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h      | 16 ----------------
 include/linux/sched/stat.h | 16 ++++++++++++++++
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2b43f55e495613..707eee099332d9 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -254,22 +254,6 @@ struct sched_info {
 };
 #endif /* CONFIG_SCHED_INFO */
 
-static inline int sched_info_on(void)
-{
-#ifdef CONFIG_SCHEDSTATS
-	return 1;
-#elif defined(CONFIG_TASK_DELAY_ACCT)
-	extern int delayacct_on;
-	return delayacct_on;
-#else
-	return 0;
-#endif
-}
-
-#ifdef CONFIG_SCHEDSTATS
-void force_schedstat_enabled(void);
-#endif
-
 /*
  * Integer metrics need fixed point arithmetic, e.g., sched/fair
  * has a few: load, load_avg, util_avg, freq, and capacity.
diff --git a/include/linux/sched/stat.h b/include/linux/sched/stat.h
index 0d52ceb6d3ae0c..d8cedf27083e58 100644
--- a/include/linux/sched/stat.h
+++ b/include/linux/sched/stat.h
@@ -21,4 +21,20 @@ extern unsigned long nr_iowait(void);
 extern unsigned long nr_iowait_cpu(int cpu);
 extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load);
 
+static inline int sched_info_on(void)
+{
+#ifdef CONFIG_SCHEDSTATS
+	return 1;
+#elif defined(CONFIG_TASK_DELAY_ACCT)
+	extern int delayacct_on;
+	return delayacct_on;
+#else
+	return 0;
+#endif
+}
+
+#ifdef CONFIG_SCHEDSTATS
+void force_schedstat_enabled(void);
+#endif
+
 #endif /* _LINUX_SCHED_STAT_H */

From 28851755990c832d7050d5e1e2c91ed9d9e6fe59 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Feb 2017 14:24:31 +0100
Subject: [PATCH 0652/1911] sched/headers, vfs/execve: Move the do_execve*()
 prototypes from <linux/sched.h> to <linux/binfmts.h>

These are not scheduler related.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/binfmts.h | 10 ++++++++++
 include/linux/sched.h   |  8 --------
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 1303b570b18cc9..05488da3aee9db 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -6,6 +6,8 @@
 #include <asm/exec.h>
 #include <uapi/linux/binfmts.h>
 
+struct filename;
+
 #define CORENAME_MAX_SIZE 128
 
 /*
@@ -123,4 +125,12 @@ extern void install_exec_creds(struct linux_binprm *bprm);
 extern void set_binfmt(struct linux_binfmt *new);
 extern ssize_t read_code(struct file *, unsigned long, loff_t, size_t);
 
+extern int do_execve(struct filename *,
+		     const char __user * const __user *,
+		     const char __user * const __user *);
+extern int do_execveat(int, struct filename *,
+		       const char __user * const __user *,
+		       const char __user * const __user *,
+		       int);
+
 #endif /* _LINUX_BINFMTS_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 707eee099332d9..5f2267e41fdecc 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1400,14 +1400,6 @@ extern void exit_files(struct task_struct *);
 
 extern void exit_itimers(struct signal_struct *);
 
-extern int do_execve(struct filename *,
-		     const char __user * const __user *,
-		     const char __user * const __user *);
-extern int do_execveat(int, struct filename *,
-		       const char __user * const __user *,
-		       const char __user * const __user *,
-		       int);
-
 extern void __set_task_comm(struct task_struct *tsk, const char *from, bool exec);
 static inline void set_task_comm(struct task_struct *tsk, const char *from)
 {

From 9049863a32fad5d7158318e45f0a41a0f2192c0c Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Feb 2017 14:31:22 +0100
Subject: [PATCH 0653/1911] sched/headers: Move kstack_end() from
 <linux/sched.h> to <linux/sched/task_stack.h>

This is a task-stack related API, not core scheduler functionality.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h            | 10 ----------
 include/linux/sched/task_stack.h | 10 ++++++++++
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5f2267e41fdecc..309eb76b7eab18 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1360,16 +1360,6 @@ static inline struct thread_info *task_thread_info(struct task_struct *task)
 # define task_thread_info(task)	((struct thread_info *)(task)->stack)
 #endif
 
-#ifndef __HAVE_ARCH_KSTACK_END
-static inline int kstack_end(void *addr)
-{
-	/* Reliable end of stack detection:
-	 * Some APM bios versions misalign the stack
-	 */
-	return !(((unsigned long)addr+sizeof(void*)-1) & (THREAD_SIZE-sizeof(void*)));
-}
-#endif
-
 extern struct pid_namespace init_pid_ns;
 
 /*
diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h
index aaa5c2a6a0e9bd..df6ea6665b310e 100644
--- a/include/linux/sched/task_stack.h
+++ b/include/linux/sched/task_stack.h
@@ -108,4 +108,14 @@ static inline unsigned long stack_not_used(struct task_struct *p)
 #endif
 extern void set_task_stack_end_magic(struct task_struct *tsk);
 
+#ifndef __HAVE_ARCH_KSTACK_END
+static inline int kstack_end(void *addr)
+{
+	/* Reliable end of stack detection:
+	 * Some APM bios versions misalign the stack
+	 */
+	return !(((unsigned long)addr+sizeof(void*)-1) & (THREAD_SIZE-sizeof(void*)));
+}
+#endif
+
 #endif /* _LINUX_SCHED_TASK_STACK_H */

From 42011db0ed5a9c92b1281e1300eb3d026f3764a8 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Feb 2017 14:35:41 +0100
Subject: [PATCH 0654/1911] sched/headers: Move exit_files() and exit_itimers()
 from <linux/sched.h> to <linux/sched/task.h>

These two functions are task management related, not core scheduler APIs.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h      | 4 ----
 include/linux/sched/task.h | 3 +++
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 309eb76b7eab18..4a03c49e3bcccf 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1386,10 +1386,6 @@ extern void wake_up_new_task(struct task_struct *tsk);
  static inline void kick_process(struct task_struct *tsk) { }
 #endif
 
-extern void exit_files(struct task_struct *);
-
-extern void exit_itimers(struct signal_struct *);
-
 extern void __set_task_comm(struct task_struct *tsk, const char *from, bool exec);
 static inline void set_task_comm(struct task_struct *tsk, const char *from)
 {
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index 2be9fde588a71a..a33a8121da9a2e 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -67,6 +67,9 @@ static inline void exit_thread(struct task_struct *tsk)
 #endif
 extern void do_group_exit(int);
 
+extern void exit_files(struct task_struct *);
+extern void exit_itimers(struct signal_struct *);
+
 extern long _do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *, unsigned long);
 extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *);
 struct task_struct *fork_idle(int);

From c5a21921d55a2aee9d1e031a78cef6cda3eab78b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Feb 2017 14:47:12 +0100
Subject: [PATCH 0655/1911] sched/headers: Move _init() prototypes from
 <linux/sched.h> to <linux/sched/init.h>

trap_init() and cpu_init() belong into <linux/cpu.h>, sched_init*() into <linux/sched/init.h>.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h      | 5 -----
 include/linux/sched/init.h | 7 +++++++
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4a03c49e3bcccf..0fb56ae0abc7bf 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -176,15 +176,10 @@ struct uts_namespace;
 /* Task command name length */
 #define TASK_COMM_LEN 16
 
-extern void sched_init(void);
-extern void sched_init_smp(void);
-
 extern cpumask_var_t cpu_isolated_map;
 
 extern int runqueue_is_locked(int cpu);
 
-extern void cpu_init (void);
-extern void trap_init(void);
 extern void update_process_times(int user);
 extern void scheduler_tick(void);
 
diff --git a/include/linux/sched/init.h b/include/linux/sched/init.h
index 15e46313e55eab..f31d1607585731 100644
--- a/include/linux/sched/init.h
+++ b/include/linux/sched/init.h
@@ -3,4 +3,11 @@
 
 #include <linux/sched.h>
 
+/*
+ * Scheduler init related prototypes:
+ */
+
+extern void sched_init(void);
+extern void sched_init_smp(void);
+
 #endif /* _LINUX_SCHED_INIT_H */

From 93b5a9a7051e51ce50109046af0235268a261ba0 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Feb 2017 14:53:36 +0100
Subject: [PATCH 0656/1911] sched/headers, timekeeping: Move the timer tick
 function prototypes to <linux/timekeeping.h>

Move the update_process_times() and xtime_update() prototypes to <linux/timekeeping.h>.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h       | 1 -
 include/linux/time.h        | 2 --
 include/linux/timekeeping.h | 4 ++++
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 0fb56ae0abc7bf..dc8d94dc140f9b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -180,7 +180,6 @@ extern cpumask_var_t cpu_isolated_map;
 
 extern int runqueue_is_locked(int cpu);
 
-extern void update_process_times(int user);
 extern void scheduler_tick(void);
 
 #define	MAX_SCHEDULE_TIMEOUT	LONG_MAX
diff --git a/include/linux/time.h b/include/linux/time.h
index 4f4ab65b6e61e6..23f0f5ce30909d 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -167,8 +167,6 @@ static inline bool timespec_inject_offset_valid(const struct timespec *ts)
 extern u32 (*arch_gettimeoffset)(void);
 #endif
 
-extern void xtime_update(unsigned long ticks);
-
 struct itimerval;
 extern int do_setitimer(int which, struct itimerval *value,
 			struct itimerval *ovalue);
diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index d2e804e15c3e46..b598cbc7b57684 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -8,6 +8,10 @@
 void timekeeping_init(void);
 extern int timekeeping_suspended;
 
+/* Architecture timer tick functions: */
+extern void update_process_times(int user);
+extern void xtime_update(unsigned long ticks);
+
 /*
  * Get and set timeofday
  */

From dcc2dc45f7cf267d37843059ff75b70260596c69 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Feb 2017 15:05:49 +0100
Subject: [PATCH 0657/1911] sched/headers, mm: Move 'struct
 tlbflush_unmap_batch' from <linux/sched.h> to <linux/mm_types_task.h>

Unclutter <linux/sched.h> some more.

Also move the CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH condition inside the
structure body definition, to remove a pair of #ifdefs from sched.h.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/mm_types_task.h | 22 ++++++++++++++++++++++
 include/linux/sched.h         | 21 ---------------------
 2 files changed, 22 insertions(+), 21 deletions(-)

diff --git a/include/linux/mm_types_task.h b/include/linux/mm_types_task.h
index 9526d8b9fe0ebb..136dfdf63ba10e 100644
--- a/include/linux/mm_types_task.h
+++ b/include/linux/mm_types_task.h
@@ -10,6 +10,7 @@
 #include <linux/types.h>
 #include <linux/threads.h>
 #include <linux/atomic.h>
+#include <linux/cpumask.h>
 
 #include <asm/page.h>
 
@@ -62,4 +63,25 @@ struct page_frag {
 #endif
 };
 
+/* Track pages that require TLB flushes */
+struct tlbflush_unmap_batch {
+#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
+	/*
+	 * Each bit set is a CPU that potentially has a TLB entry for one of
+	 * the PFNs being flushed. See set_tlb_ubc_flush_pending().
+	 */
+	struct cpumask cpumask;
+
+	/* True if any bit in cpumask is set */
+	bool flush_required;
+
+	/*
+	 * If true then the PTE was dirty when unmapped. The entry must be
+	 * flushed before IO is initiated or a stale TLB entry potentially
+	 * allows an update without redirtying the page.
+	 */
+	bool writable;
+#endif
+};
+
 #endif /* _LINUX_MM_TYPES_TASK_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index dc8d94dc140f9b..8d6b7aa7f3acbe 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -484,25 +484,6 @@ struct wake_q_node {
 	struct wake_q_node *next;
 };
 
-/* Track pages that require TLB flushes */
-struct tlbflush_unmap_batch {
-	/*
-	 * Each bit set is a CPU that potentially has a TLB entry for one of
-	 * the PFNs being flushed. See set_tlb_ubc_flush_pending().
-	 */
-	struct cpumask cpumask;
-
-	/* True if any bit in cpumask is set */
-	bool flush_required;
-
-	/*
-	 * If true then the PTE was dirty when unmapped. The entry must be
-	 * flushed before IO is initiated or a stale TLB entry potentially
-	 * allows an update without redirtying the page.
-	 */
-	bool writable;
-};
-
 struct task_struct {
 #ifdef CONFIG_THREAD_INFO_IN_TASK
 	/*
@@ -895,9 +876,7 @@ struct task_struct {
 	unsigned long numa_pages_migrated;
 #endif /* CONFIG_NUMA_BALANCING */
 
-#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
 	struct tlbflush_unmap_batch tlb_ubc;
-#endif
 
 	struct rcu_head rcu;
 

From cda66725c1444db67127115d611c982b62b45d8c Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Feb 2017 15:30:50 +0100
Subject: [PATCH 0658/1911] sched/headers: Move the
 get_task_struct()/put_task_struct() and related APIs from <linux/sched.h> to
 <linux/sched/task.h>

These belong into the task lifetime API header.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h      | 14 --------------
 include/linux/sched/task.h | 15 +++++++++++++++
 2 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8d6b7aa7f3acbe..b68358c605604e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1135,20 +1135,6 @@ static inline int is_global_init(struct task_struct *tsk)
 
 extern struct pid *cad_pid;
 
-extern void free_task(struct task_struct *tsk);
-#define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0)
-
-extern void __put_task_struct(struct task_struct *t);
-
-static inline void put_task_struct(struct task_struct *t)
-{
-	if (atomic_dec_and_test(&t->usage))
-		__put_task_struct(t);
-}
-
-struct task_struct *task_rcu_dereference(struct task_struct **ptask);
-struct task_struct *try_get_task_struct(struct task_struct **ptask);
-
 /*
  * Per process flags
  */
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index a33a8121da9a2e..3886ae64148f1e 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -75,6 +75,21 @@ extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, i
 struct task_struct *fork_idle(int);
 extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
 
+extern void free_task(struct task_struct *tsk);
+
+#define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0)
+
+extern void __put_task_struct(struct task_struct *t);
+
+static inline void put_task_struct(struct task_struct *t)
+{
+	if (atomic_dec_and_test(&t->usage))
+		__put_task_struct(t);
+}
+
+struct task_struct *task_rcu_dereference(struct task_struct **ptask);
+struct task_struct *try_get_task_struct(struct task_struct **ptask);
+
 
 #ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT
 extern int arch_task_struct_size __read_mostly;

From 6f175fc9536355d8aa5c2d4854848a97c244a031 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Mon, 6 Feb 2017 11:12:45 +0100
Subject: [PATCH 0659/1911] sched/headers: Move the sched_exec() prototype to
 <linux/sched/task.h>

sched_exec() better fits into the task lifetime APIs than into the core scheduler
APIs.

This reduces the size of <linux/sched.h> a bit.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h      | 7 -------
 include/linux/sched/task.h | 7 +++++++
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b68358c605604e..bd89fc17767c1c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1258,13 +1258,6 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p,
 #define cpu_relax_yield() cpu_relax()
 #endif
 
-/* sched_exec is called by processes performing an exec */
-#ifdef CONFIG_SMP
-extern void sched_exec(void);
-#else
-#define sched_exec()   {}
-#endif
-
 extern int yield_to(struct task_struct *p, bool preempt);
 extern void set_user_nice(struct task_struct *p, long nice);
 extern int task_prio(const struct task_struct *p);
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index 3886ae64148f1e..a978d7189cfddd 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -77,6 +77,13 @@ extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
 
 extern void free_task(struct task_struct *tsk);
 
+/* sched_exec is called by processes performing an exec */
+#ifdef CONFIG_SMP
+extern void sched_exec(void);
+#else
+#define sched_exec()   {}
+#endif
+
 #define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0)
 
 extern void __put_task_struct(struct task_struct *t);

From c03cb28e7c44055cd00d11b8581b9fa46a3e3764 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 1 Feb 2017 16:36:40 +0100
Subject: [PATCH 0660/1911] sched/headers: Remove <linux/sched.h> from
 <linux/sched/topology.h>

The <linux/sched/topology.h> file is a self-contained header and users of
it either don't need <linux/sched.h> - or have already included it.

This reduces the size of the header dependency graph.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/topology.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index b3550b36e65d69..0d6fceff37bbdc 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -1,8 +1,6 @@
 #ifndef _LINUX_SCHED_TOPOLOGY_H
 #define _LINUX_SCHED_TOPOLOGY_H
 
-#include <linux/sched.h>
-
 #include <linux/sched/idle.h>
 
 /*

From f411229e1dd623a3dee623824d094546789977e0 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 1 Feb 2017 18:49:05 +0100
Subject: [PATCH 0661/1911] sched/headers: Remove tsk_is_polling()

It's not used by anything.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/idle.h | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/include/linux/sched/idle.h b/include/linux/sched/idle.h
index 66ee32f87f0b31..5ca63ebad6b40e 100644
--- a/include/linux/sched/idle.h
+++ b/include/linux/sched/idle.h
@@ -17,10 +17,6 @@ extern void wake_up_if_idle(int cpu);
  * polling state.
  */
 #ifdef TIF_POLLING_NRFLAG
-static inline int tsk_is_polling(struct task_struct *p)
-{
-	return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG);
-}
 
 static inline void __current_set_polling(void)
 {
@@ -59,7 +55,6 @@ static inline bool __must_check current_clr_polling_and_test(void)
 }
 
 #else
-static inline int tsk_is_polling(struct task_struct *p) { return 0; }
 static inline void __current_set_polling(void) { }
 static inline void __current_clr_polling(void) { }
 

From ea94763950e49d1aa622c59218ee2fc3b434ba6b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 1 Feb 2017 16:36:40 +0100
Subject: [PATCH 0662/1911] sched/headers: Remove <linux/sched.h> from
 <linux/sched/clock.h>

The <linux/sched/clock.h> file is a largely self-contained header and users of
it either don't need <linux/sched.h> - or have already included it.

This reduces the size of the header dependency graph.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/clock.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/sched/clock.h b/include/linux/sched/clock.h
index ac12f71d359cd6..4a68c67912078e 100644
--- a/include/linux/sched/clock.h
+++ b/include/linux/sched/clock.h
@@ -1,7 +1,7 @@
 #ifndef _LINUX_SCHED_CLOCK_H
 #define _LINUX_SCHED_CLOCK_H
 
-#include <linux/sched.h>
+#include <linux/smp.h>
 
 /*
  * Do not use outside of architecture code which knows its limitations.

From cc689c5b352d4a7510b11c975c5c94d4785b37e7 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 1 Feb 2017 16:36:40 +0100
Subject: [PATCH 0663/1911] sched/headers: Remove <linux/sched.h> and
 <linux/slab.h> from <linux/delayacct.h>

The <linux/delayacct.h> file is a self-contained header and users of
it either don't need <linux/sched.h> and <linux/slab.h> - or have
already included it.

This reduces the size of the header dependency graph.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/delayacct.h    | 2 --
 include/linux/fault-inject.h | 2 ++
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h
index 6b769cbd10001a..4178d24935477d 100644
--- a/include/linux/delayacct.h
+++ b/include/linux/delayacct.h
@@ -18,8 +18,6 @@
 #define _LINUX_DELAYACCT_H
 
 #include <uapi/linux/taskstats.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
 
 /*
  * Per-task flags relevant to delay accounting
diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h
index 9f4956d8601c11..728d4e0292aa77 100644
--- a/include/linux/fault-inject.h
+++ b/include/linux/fault-inject.h
@@ -61,6 +61,8 @@ static inline struct dentry *fault_create_debugfs_attr(const char *name,
 
 #endif /* CONFIG_FAULT_INJECTION */
 
+struct kmem_cache;
+
 #ifdef CONFIG_FAILSLAB
 extern bool should_failslab(struct kmem_cache *s, gfp_t gfpflags);
 #else

From 5a2d6880f461faa416c0d329d46a128cf342c1eb Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:54 +0100
Subject: [PATCH 0664/1911] sched/headers: Remove <linux/sched.h> from
 <linux/sched/loadavg.h>

The <linux/sched/loadavg.h> file is a self-contained header and users of
it either don't need <linux/sched.h> - or have already included it.

This reduces the size of the header dependency graph.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/loadavg.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/include/linux/sched/loadavg.h b/include/linux/sched/loadavg.h
index c392e28ce0ac79..4264bc6b2c2767 100644
--- a/include/linux/sched/loadavg.h
+++ b/include/linux/sched/loadavg.h
@@ -1,8 +1,6 @@
 #ifndef _LINUX_SCHED_LOADAVG_H
 #define _LINUX_SCHED_LOADAVG_H
 
-#include <linux/sched.h>
-
 /*
  * These are the constant used to fake the fixed-point load-average
  * counting. Some notes:

From 5fd73157bdfb57370b69be7c0067f08e610e7daa Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:54 +0100
Subject: [PATCH 0665/1911] sched/headers: Remove <linux/sched.h> from
 <linux/sched/autogroup.h>

The <linux/sched/autogroup.h> file is a largely self-contained header and users of
it either don't need <linux/sched.h> - or have already included it.

Add a 'task_struct' predeclaration to make it build standalone.

This reduces the size of the header dependency graph.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/autogroup.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/include/linux/sched/autogroup.h b/include/linux/sched/autogroup.h
index fd6855548d0c7f..55cd496df8849d 100644
--- a/include/linux/sched/autogroup.h
+++ b/include/linux/sched/autogroup.h
@@ -1,9 +1,8 @@
 #ifndef _LINUX_SCHED_AUTOGROUP_H
 #define _LINUX_SCHED_AUTOGROUP_H
 
-#include <linux/sched.h>
-
 struct signal_struct;
+struct task_struct;
 struct task_group;
 struct seq_file;
 

From b8d6d80b37a98e3910f1b4e799f8ebea88e94bfa Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:54 +0100
Subject: [PATCH 0666/1911] sched/headers: Remove <linux/sched.h> from
 <linux/sched/mm.h>

The <linux/sched/mm.h> file is a self-contained header and users of
it either don't need <linux/sched.h> - or have already included it.

Include kernel.h and atomic.h.

This reduces the size of the header dependency graph.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/mm.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 62dca4e0b4e0e6..830953ebb391fa 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -1,6 +1,8 @@
 #ifndef _LINUX_SCHED_MM_H
 #define _LINUX_SCHED_MM_H
 
+#include <linux/kernel.h>
+#include <linux/atomic.h>
 #include <linux/sched.h>
 #include <linux/mm_types.h>
 #include <linux/gfp.h>

From ae1cc8823204bb938d039afa43c28a84591ada9f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:54 +0100
Subject: [PATCH 0667/1911] sched/headers: Remove <linux/sched.h> from
 <linux/sched/coredump.h>

The <linux/sched/coredump.h> file is a self-contained header and users of
it either don't need <linux/sched.h> - or have already included it.

Include <linux/mm_types.h>.

This reduces the size of the header dependency graph.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/coredump.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h
index f46912aa4f4c03..69eedcef8f03fb 100644
--- a/include/linux/sched/coredump.h
+++ b/include/linux/sched/coredump.h
@@ -1,7 +1,6 @@
 #ifndef _LINUX_SCHED_COREDUMP_H
 #define _LINUX_SCHED_COREDUMP_H
 
-#include <linux/sched.h>
 #include <linux/mm_types.h>
 
 #define SUID_DUMP_DISABLE	0	/* No setuid dumping */

From 556725839e543bc2b45bd73121c0b1c1d6c23714 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 10:31:24 +0100
Subject: [PATCH 0668/1911] sched/headers: Remove unused
 'task_can_switch_user()' prototype

The function does not exist anymore.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index bd89fc17767c1c..43ed34ccb53a77 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1509,9 +1509,6 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
 extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
 extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
 
-extern int task_can_switch_user(struct user_struct *up,
-					struct task_struct *tsk);
-
 #ifndef TASK_SIZE_OF
 #define TASK_SIZE_OF(tsk)	TASK_SIZE
 #endif

From de8deb0ad79f8a49cea5110c006c8676a11611f7 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:55 +0100
Subject: [PATCH 0669/1911] sched/headers: Remove <linux/sched.h> from
 <linux/sched/user.h>

If we add <linux/uidgid.h> then <linux/sched/user.h> becomes a
self-contained header and users of it either don't need <linux/sched.h>
or have already included it.

This reduces the size of the header dependency graph.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/user.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h
index 40c6363da5ef74..5d5415e129d436 100644
--- a/include/linux/sched/user.h
+++ b/include/linux/sched/user.h
@@ -1,7 +1,8 @@
 #ifndef _LINUX_SCHED_USER_H
 #define _LINUX_SCHED_USER_H
 
-#include <linux/sched.h>
+#include <linux/uidgid.h>
+#include <linux/atomic.h>
 
 struct key;
 

From ac4e620967bb72f86371154935aa8b67cf54e225 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 11:44:31 +0100
Subject: [PATCH 0670/1911] sched/headers: Remove #include <linux/capability.h>
 from <linux/sched.h>

The <linux/sched.h> header does not actually make use of any
types or APIs defined in <linux/capability.h>, so remove its inclusion.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 43ed34ccb53a77..54eefb2ad603fc 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -5,7 +5,6 @@
 
 #include <linux/sched/prio.h>
 
-#include <linux/capability.h>
 #include <linux/mutex.h>
 #include <linux/plist.h>
 #include <linux/mm_types_task.h>

From aa829c7679a13fca667f772db1f0ea03adc29122 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 11:54:05 +0100
Subject: [PATCH 0671/1911] sched/headers: Remove <linux/cgroup-defs.h> from
 <linux/sched.h>

It's not used by anything in <linux/sched.h> anymore.

This reduces the preprocessed size of <linux/sched.h> and
speeds up the build a bit.

Also fix code that implicitly relied on headers included by <linux/cgroup-defs.h>.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h             | 1 -
 include/target/target_core_base.h | 1 +
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 54eefb2ad603fc..8991e4d7cd0fc9 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -28,7 +28,6 @@
 #include <linux/gfp.h>
 #include <linux/topology.h>
 #include <linux/magic.h>
-#include <linux/cgroup-defs.h>
 
 #include <asm/current.h>
 
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index 8be9ba73383d52..774c29b57e82fd 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -4,6 +4,7 @@
 #include <linux/configfs.h>      /* struct config_group */
 #include <linux/dma-direction.h> /* enum dma_data_direction */
 #include <linux/percpu_ida.h>    /* struct percpu_ida */
+#include <linux/percpu-refcount.h>
 #include <linux/semaphore.h>     /* struct semaphore */
 #include <linux/completion.h>
 

From ed53742d793bd92ed32114081370b199dc94467d Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:55 +0100
Subject: [PATCH 0672/1911] sched/headers: Remove <linux/sched.h> from
 <linux/sched/cpufreq.h>

Make the <linux/sched/cpufreq.h> file a self-contained header and
remove the <linux/sched.h> dependency: users of it either don't
need <linux/sched.h> - or have already included it.

This reduces the size of the header dependency graph.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/cpufreq.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/sched/cpufreq.h b/include/linux/sched/cpufreq.h
index 2bdcfbfc4c30ec..d2be2ccbb3729d 100644
--- a/include/linux/sched/cpufreq.h
+++ b/include/linux/sched/cpufreq.h
@@ -1,7 +1,7 @@
 #ifndef _LINUX_SCHED_CPUFREQ_H
 #define _LINUX_SCHED_CPUFREQ_H
 
-#include <linux/sched.h>
+#include <linux/types.h>
 
 /*
  * Interface between cpufreq drivers and the scheduler:

From 71af2ed5eeea639339e3a1497a0196bab7de4b57 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 09:57:00 +0100
Subject: [PATCH 0673/1911] kasan, sched/headers: Remove <linux/sched.h> from
 <linux/kasan.h>

<linux/kasan.h> is a low level header that is included early
in affected kernel headers. But it includes <linux/sched.h>
which complicates the cleanup of sched.h dependencies.

Remove it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/kasan.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 7793036eac8062..ceb3fe78a0d39d 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -1,7 +1,6 @@
 #ifndef _LINUX_KASAN_H
 #define _LINUX_KASAN_H
 
-#include <linux/sched.h>
 #include <linux/types.h>
 
 struct kmem_cache;

From e26512fea5bcd6602dbf02a551ed073cd4529449 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 17:54:15 +0100
Subject: [PATCH 0674/1911] sched/headers: Remove <linux/cred.h> inclusion from
 <linux/sched.h>

This reduces header dependencies and speeds up the build.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8991e4d7cd0fc9..1be69735cef319 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -24,7 +24,6 @@
 #include <linux/kcov.h>
 #include <linux/task_io_accounting.h>
 #include <linux/latencytop.h>
-#include <linux/cred.h>
 #include <linux/gfp.h>
 #include <linux/topology.h>
 #include <linux/magic.h>

From f780d89a0e820a529cf91fb78b52565e1b37b774 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 10:03:42 +0100
Subject: [PATCH 0675/1911] sched/headers: Remove <asm/ptrace.h> from
 <linux/sched.h>

This reduces header dependencies.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/sh/include/asm/fpu.h | 2 ++
 arch/sh/mm/extable_32.c   | 2 ++
 include/linux/sched.h     | 1 -
 3 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/sh/include/asm/fpu.h b/arch/sh/include/asm/fpu.h
index 09fc2bc8a790af..50921c7cc3f024 100644
--- a/arch/sh/include/asm/fpu.h
+++ b/arch/sh/include/asm/fpu.h
@@ -3,6 +3,8 @@
 
 #ifndef __ASSEMBLY__
 
+#include <asm/ptrace.h>
+
 struct task_struct;
 
 #ifdef CONFIG_SH_FPU
diff --git a/arch/sh/mm/extable_32.c b/arch/sh/mm/extable_32.c
index 24a75d315dcbba..940e871bc8169c 100644
--- a/arch/sh/mm/extable_32.c
+++ b/arch/sh/mm/extable_32.c
@@ -7,6 +7,8 @@
 #include <linux/extable.h>
 #include <linux/uaccess.h>
 
+#include <asm/ptrace.h>
+
 int fixup_exception(struct pt_regs *regs)
 {
 	const struct exception_table_entry *fixup;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1be69735cef319..9dfa9c113570d0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -8,7 +8,6 @@
 #include <linux/mutex.h>
 #include <linux/plist.h>
 #include <linux/mm_types_task.h>
-#include <asm/ptrace.h>
 
 #include <linux/sem.h>
 #include <linux/shm.h>

From 9c6da18109d603a99915b257929c0370c9d3ae40 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 10:08:30 +0100
Subject: [PATCH 0676/1911] sched/headers: Remove <linux/rtmutex.h> from
 <linux/sched.h>

This reduces header dependencies.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9dfa9c113570d0..cc5c99a27e21a7 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -16,7 +16,6 @@
 #include <linux/pid.h>
 #include <linux/seccomp.h>
 #include <linux/rculist.h>
-#include <linux/rtmutex.h>
 
 #include <linux/resource.h>
 #include <linux/hrtimer.h>

From f0a0eb699967f4f51567b563818bafe4017bef73 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 20:56:33 +0100
Subject: [PATCH 0677/1911] sched/headers: Remove the <linux/gfp.h> include
 from <linux/sched.h>

This reduces sched.h header dependencies.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index cc5c99a27e21a7..ab80596dddfdc7 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -22,7 +22,6 @@
 #include <linux/kcov.h>
 #include <linux/task_io_accounting.h>
 #include <linux/latencytop.h>
-#include <linux/gfp.h>
 #include <linux/topology.h>
 #include <linux/magic.h>
 

From dc1995392d79b0eeee147a792482b991f74c1494 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:57 +0100
Subject: [PATCH 0678/1911] sched/headers: Remove <linux/sched.h> from
 <linux/sched/stat.h>

The <linux/sched/stat.h> file is a largely self-contained header and users of
it either don't need <linux/sched.h> - or have already included it.

( Keep the <linux/percpu.h> dependency.)

This reduces the size of the header dependency graph.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/stat.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/sched/stat.h b/include/linux/sched/stat.h
index d8cedf27083e58..141b74c53fad2d 100644
--- a/include/linux/sched/stat.h
+++ b/include/linux/sched/stat.h
@@ -1,7 +1,7 @@
 #ifndef _LINUX_SCHED_STAT_H
 #define _LINUX_SCHED_STAT_H
 
-#include <linux/sched.h>
+#include <linux/percpu.h>
 
 /*
  * Various counters maintained by the scheduler and fork(),

From b0145d9b332a5117bd0abbb980ab89586a1d5f6c Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:57 +0100
Subject: [PATCH 0679/1911] sched/headers: Remove <linux/sched.h> from
 <linux/sched/nohz.h>

The <linux/sched/nohz.h> file is a self-contained header and users of
it either don't need <linux/sched.h> - or have already included it.

This reduces the size of the header dependency graph.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/nohz.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/include/linux/sched/nohz.h b/include/linux/sched/nohz.h
index 9471f0736a3a0a..4995b717500b67 100644
--- a/include/linux/sched/nohz.h
+++ b/include/linux/sched/nohz.h
@@ -1,8 +1,6 @@
 #ifndef _LINUX_SCHED_NOHZ_H
 #define _LINUX_SCHED_NOHZ_H
 
-#include <linux/sched.h>
-
 /*
  * This is the interface between the scheduler and nohz/dyntics:
  */

From 4f079e98a0db5f067c0981a526ff8938e21c52e2 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:57 +0100
Subject: [PATCH 0680/1911] sched/headers: Remove <linux/sched.h> from
 <linux/sched/debug.h>

The <linux/sched/debug.h> file is a self-contained header and users of
it either don't need <linux/sched.h> - or have already included it.

This reduces the size of the header dependency graph.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/debug.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/include/linux/sched/debug.h b/include/linux/sched/debug.h
index 853bbef0b47b7a..e0eaee54c5a4c4 100644
--- a/include/linux/sched/debug.h
+++ b/include/linux/sched/debug.h
@@ -1,8 +1,6 @@
 #ifndef _LINUX_SCHED_DEBUG_H
 #define _LINUX_SCHED_DEBUG_H
 
-#include <linux/sched.h>
-
 /*
  * Various scheduler/task debugging interfaces:
  */

From a906086ef35129d522047a296e7c4237af75fdcb Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:57 +0100
Subject: [PATCH 0681/1911] sched/headers: Remove <linux/sched.h> from
 <linux/sched/hotplug.h>

The <linux/sched/hotplug.h> file is a self-contained header and users of
it either don't need <linux/sched.h> - or have already included it.

This reduces the size of the header dependency graph.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/hotplug.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/include/linux/sched/hotplug.h b/include/linux/sched/hotplug.h
index c608d3c1ddb872..752ac7e628d72e 100644
--- a/include/linux/sched/hotplug.h
+++ b/include/linux/sched/hotplug.h
@@ -1,8 +1,6 @@
 #ifndef _LINUX_SCHED_HOTPLUG_H
 #define _LINUX_SCHED_HOTPLUG_H
 
-#include <linux/sched.h>
-
 /*
  * Scheduler interfaces for hotplug CPU support:
  */

From fae3c30c2d1ae3ff93877f64e5d55e2443d8f82f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 12:00:08 +0100
Subject: [PATCH 0682/1911] sched/headers: Remove the runqueue_is_locked()
 prototype

Remove the runqueue_is_locked() prototype, the function does not exist anymore.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index ab80596dddfdc7..ac98255d00fb2b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -172,8 +172,6 @@ struct uts_namespace;
 
 extern cpumask_var_t cpu_isolated_map;
 
-extern int runqueue_is_locked(int cpu);
-
 extern void scheduler_tick(void);
 
 #define	MAX_SCHEDULE_TIMEOUT	LONG_MAX

From cd9c513be34ceaae8bf211474b91b6897574efdd Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:58 +0100
Subject: [PATCH 0683/1911] sched/headers: Remove <linux/rwsem.h> from
 <linux/sched.h>

This is a stray header that is not needed by anything in sched.h,
so remove it.

Update files that relied on the stray inclusion.

This reduces the size of the header dependency graph.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h          | 2 --
 include/linux/user_namespace.h | 1 +
 kernel/utsname_sysctl.c        | 1 +
 3 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index ac98255d00fb2b..b361f881fe447f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -226,8 +226,6 @@ struct task_cputime {
 #define prof_exp	stime
 #define sched_exp	sum_exec_runtime
 
-#include <linux/rwsem.h>
-
 #ifdef CONFIG_SCHED_INFO
 struct sched_info {
 	/* cumulative counters */
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 08264641b502bd..faa9bfb827dae5 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -5,6 +5,7 @@
 #include <linux/nsproxy.h>
 #include <linux/ns_common.h>
 #include <linux/sched.h>
+#include <linux/rwsem.h>
 #include <linux/sysctl.h>
 #include <linux/err.h>
 
diff --git a/kernel/utsname_sysctl.c b/kernel/utsname_sysctl.c
index c8eac43267e90d..233cd8fc691082 100644
--- a/kernel/utsname_sysctl.c
+++ b/kernel/utsname_sysctl.c
@@ -14,6 +14,7 @@
 #include <linux/utsname.h>
 #include <linux/sysctl.h>
 #include <linux/wait.h>
+#include <linux/rwsem.h>
 
 #ifdef CONFIG_PROC_SYSCTL
 

From 192c9414516e7178a44f9ed48d47501c4c19771c Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 23:47:37 +0100
Subject: [PATCH 0684/1911] sched/headers: Remove <linux/signal.h> from
 <linux/sched.h>

Instead of including the full <linux/signal.h>, only include the types-only
<linux/signal_types.h> header in <linux/sched.h>, to further decouple the
scheduler header from the signal headers.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b361f881fe447f..905d4f148d6508 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -11,7 +11,6 @@
 
 #include <linux/sem.h>
 #include <linux/shm.h>
-#include <linux/signal.h>
 #include <linux/signal_types.h>
 #include <linux/pid.h>
 #include <linux/seccomp.h>

From 1d1954e038435765a623884b626731784cde768f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 4 Feb 2017 01:16:15 +0100
Subject: [PATCH 0685/1911] sched/headers: Remove the 'init_pid_ns' prototype
 from <linux/sched.h>

pid.h already defines it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 905d4f148d6508..057b2a85ecc781 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1301,8 +1301,6 @@ static inline struct thread_info *task_thread_info(struct task_struct *task)
 # define task_thread_info(task)	((struct thread_info *)(task)->stack)
 #endif
 
-extern struct pid_namespace init_pid_ns;
-
 /*
  * find a task by one of its numerical ids
  *

From b68070e146b9c2b4ece8d869a4fab9a4f14bbfb4 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 4 Feb 2017 01:27:20 +0100
Subject: [PATCH 0686/1911] sched/headers: Remove <linux/rculist.h> from
 <linux/sched.h>

We don't actually need the full rculist.h header anymore, include
the smaller rcupdate.h header instead.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 057b2a85ecc781..5720d11f32247a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -14,7 +14,7 @@
 #include <linux/signal_types.h>
 #include <linux/pid.h>
 #include <linux/seccomp.h>
-#include <linux/rculist.h>
+#include <linux/rcupdate.h>
 
 #include <linux/resource.h>
 #include <linux/hrtimer.h>

From 2c873d55cd838deef8218b6d5fe9bd336cb3113a Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Feb 2017 12:11:03 +0100
Subject: [PATCH 0687/1911] sched/core: Remove unused prefetch_stack()

prefetch_stack() is defined by IA64, but not actually used anywhere anymore.

Remove it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/ia64/include/asm/processor.h |  2 --
 arch/ia64/kernel/entry.S          | 23 -----------------------
 include/linux/sched.h             |  6 ------
 3 files changed, 31 deletions(-)

diff --git a/arch/ia64/include/asm/processor.h b/arch/ia64/include/asm/processor.h
index 03911a33640685..26a63d69c599ad 100644
--- a/arch/ia64/include/asm/processor.h
+++ b/arch/ia64/include/asm/processor.h
@@ -19,8 +19,6 @@
 #include <asm/ptrace.h>
 #include <asm/ustack.h>
 
-#define ARCH_HAS_PREFETCH_SWITCH_STACK
-
 #define IA64_NUM_PHYS_STACK_REG	96
 #define IA64_NUM_DBG_REGS	8
 
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index 6f27a663177c43..e7a716b09350d0 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -455,29 +455,6 @@ GLOBAL_ENTRY(load_switch_stack)
 	br.cond.sptk.many b7
 END(load_switch_stack)
 
-GLOBAL_ENTRY(prefetch_stack)
-	add r14 = -IA64_SWITCH_STACK_SIZE, sp
-	add r15 = IA64_TASK_THREAD_KSP_OFFSET, in0
-	;;
-	ld8 r16 = [r15]				// load next's stack pointer
-	lfetch.fault.excl [r14], 128
-	;;
-	lfetch.fault.excl [r14], 128
-	lfetch.fault [r16], 128
-	;;
-	lfetch.fault.excl [r14], 128
-	lfetch.fault [r16], 128
-	;;
-	lfetch.fault.excl [r14], 128
-	lfetch.fault [r16], 128
-	;;
-	lfetch.fault.excl [r14], 128
-	lfetch.fault [r16], 128
-	;;
-	lfetch.fault [r16], 128
-	br.ret.sptk.many rp
-END(prefetch_stack)
-
 	/*
 	 * Invoke a system call, but do some tracing before and after the call.
 	 * We MUST preserve the current register frame throughout this routine
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5720d11f32247a..bd0111a06aa259 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -247,12 +247,6 @@ struct sched_info {
 # define SCHED_FIXEDPOINT_SHIFT	10
 # define SCHED_FIXEDPOINT_SCALE	(1L << SCHED_FIXEDPOINT_SHIFT)
 
-#ifdef ARCH_HAS_PREFETCH_SWITCH_STACK
-extern void prefetch_stack(struct task_struct *t);
-#else
-static inline void prefetch_stack(struct task_struct *t) { }
-#endif
-
 struct load_weight {
 	unsigned long weight;
 	u32 inv_weight;

From 5c0d0f36414f9f8a292b42e797f9284b127d79c2 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:59 +0100
Subject: [PATCH 0688/1911] sched/headers: Remove <linux/sched.h> from
 <linux/sched/init.h>

The <linux/sched/init.h> file is a self-contained header and users of
it either don't need <linux/sched.h> - or have already included it.

This reduces the size of the header dependency graph.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/init.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/include/linux/sched/init.h b/include/linux/sched/init.h
index f31d1607585731..127215045285e6 100644
--- a/include/linux/sched/init.h
+++ b/include/linux/sched/init.h
@@ -1,8 +1,6 @@
 #ifndef _LINUX_SCHED_INIT_H
 #define _LINUX_SCHED_INIT_H
 
-#include <linux/sched.h>
-
 /*
  * Scheduler init related prototypes:
  */

From 50ff9d130014f7b19541dbf607a615a72b75b778 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Feb 2017 16:03:58 +0100
Subject: [PATCH 0689/1911] sched/headers: Remove <linux/magic.h> from
 <linux/sched/task_stack.h>

It's not used by any of the scheduler methods, but <linux/sched/task_stack.h>
needs it to pick up STACK_END_MAGIC.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h     | 1 -
 kernel/cgroup/cgroup-v1.c | 1 +
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index bd0111a06aa259..559be4f1aee554 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -22,7 +22,6 @@
 #include <linux/task_io_accounting.h>
 #include <linux/latencytop.h>
 #include <linux/topology.h>
-#include <linux/magic.h>
 
 #include <asm/current.h>
 
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index abc585858685c4..56eba9caa632ad 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -7,6 +7,7 @@
 #include <linux/mm.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/task.h>
+#include <linux/magic.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/delayacct.h>

From b2d5bfea2d00a0000da18f7667c2b0e2c2f168d9 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Mon, 6 Feb 2017 09:56:40 +0100
Subject: [PATCH 0690/1911] sched/headers, timers: Remove the <linux/sysctl.h>
 include from <linux/timer.h>

So we want to simplify <linux/sched.h>'s header dependencies, but one
roadblock of that is <linux/timer.h>'s inclusion of sysctl.h,
which brings in other, problematic headers.

Note that timer.h's inclusion of sysctl.h can be avoided if we
pre-declare ctl_table - so do that.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/timer.h          | 2 +-
 include/linux/user_namespace.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/linux/timer.h b/include/linux/timer.h
index c7bdf895179c92..e6789b8757d502 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -212,7 +212,7 @@ struct hrtimer;
 extern enum hrtimer_restart it_real_fn(struct hrtimer *);
 
 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
-#include <linux/sysctl.h>
+struct ctl_table;
 
 extern unsigned int sysctl_timer_migration;
 int timer_migration_handler(struct ctl_table *table, int write,
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index faa9bfb827dae5..be765234c0a2b9 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -5,6 +5,7 @@
 #include <linux/nsproxy.h>
 #include <linux/ns_common.h>
 #include <linux/sched.h>
+#include <linux/workqueue.h>
 #include <linux/rwsem.h>
 #include <linux/sysctl.h>
 #include <linux/err.h>

From 1a79a72c65b8f1b4f44e166d1fb5cad560461b26 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Mon, 6 Feb 2017 09:58:35 +0100
Subject: [PATCH 0691/1911] sched/headers, x86/apic: Remove the <linux/pm.h>
 header inclusion from <asm/apic.h>

We want to simplify <linux/sched.h>'s header dependencies, but one
roadblock to that is <asm/apic.h>'s inclusion of pm.h,
which brings in other, problematic headers.

Remove it, as it appears to be entirely spurious, apic.h does not
actually make use of any PM facilities.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/apic.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index eff8e36aaf7208..730ef65e83934f 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -2,7 +2,6 @@
 #define _ASM_X86_APIC_H
 
 #include <linux/cpumask.h>
-#include <linux/pm.h>
 
 #include <asm/alternative.h>
 #include <asm/cpufeature.h>

From 283cb90305cf1686594ed537c7a8cb188eba1a4d Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Mon, 6 Feb 2017 09:59:47 +0100
Subject: [PATCH 0692/1911] sched/headers, hrtimer: Remove the <linux/wait.h>
 include from <linux/hrtimer.h>

In our quest to simplify <linux/sched.h>'s header dependencies, remove
the <linux/wait.h> inclusion from <linux/hrtimer.h> - which does
not appear to be necessary, as hrtimer.h does not use waitqueues.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/hrtimer.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index e52b427223baa8..249e579ecd4c4d 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -19,7 +19,6 @@
 #include <linux/ktime.h>
 #include <linux/init.h>
 #include <linux/list.h>
-#include <linux/wait.h>
 #include <linux/percpu.h>
 #include <linux/timer.h>
 #include <linux/timerqueue.h>

From ee6a3d19f15b9b10075481088b8d4537f286d7b4 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Mon, 6 Feb 2017 10:01:09 +0100
Subject: [PATCH 0693/1911] sched/headers: Remove the <linux/topology.h>
 include from <linux/sched.h>

It's used only by a single (rarely used) inline function (task_node(p)),
which we can move to <linux/sched/topology.h>.

( Add <linux/nodemask.h>, because we rely on that. )

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/mips/sgi-ip27/ip27-smp.c  | 1 +
 include/linux/sched.h          | 7 +------
 include/linux/sched/topology.h | 7 +++++++
 3 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/arch/mips/sgi-ip27/ip27-smp.c b/arch/mips/sgi-ip27/ip27-smp.c
index f9ae6a8fa7c726..f5ed45e8f44256 100644
--- a/arch/mips/sgi-ip27/ip27-smp.c
+++ b/arch/mips/sgi-ip27/ip27-smp.c
@@ -8,6 +8,7 @@
  */
 #include <linux/init.h>
 #include <linux/sched.h>
+#include <linux/topology.h>
 #include <linux/nodemask.h>
 #include <asm/page.h>
 #include <asm/processor.h>
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 559be4f1aee554..f9dc9cfaf07948 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -4,6 +4,7 @@
 #include <uapi/linux/sched.h>
 
 #include <linux/sched/prio.h>
+#include <linux/nodemask.h>
 
 #include <linux/mutex.h>
 #include <linux/plist.h>
@@ -21,7 +22,6 @@
 #include <linux/kcov.h>
 #include <linux/task_io_accounting.h>
 #include <linux/latencytop.h>
-#include <linux/topology.h>
 
 #include <asm/current.h>
 
@@ -1454,11 +1454,6 @@ static inline unsigned int task_cpu(const struct task_struct *p)
 #endif
 }
 
-static inline int task_node(const struct task_struct *p)
-{
-	return cpu_to_node(task_cpu(p));
-}
-
 extern void set_task_cpu(struct task_struct *p, unsigned int cpu);
 
 #else
diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index 0d6fceff37bbdc..7d065abc7a470d 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -1,6 +1,8 @@
 #ifndef _LINUX_SCHED_TOPOLOGY_H
 #define _LINUX_SCHED_TOPOLOGY_H
 
+#include <linux/topology.h>
+
 #include <linux/sched/idle.h>
 
 /*
@@ -216,4 +218,9 @@ static inline bool cpus_share_cache(int this_cpu, int that_cpu)
 
 #endif	/* !CONFIG_SMP */
 
+static inline int task_node(const struct task_struct *p)
+{
+	return cpu_to_node(task_cpu(p));
+}
+
 #endif /* _LINUX_SCHED_TOPOLOGY_H */

From 7f5f8e8d97d77edf33f2836259d1f19c6f4d94f5 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Mon, 6 Feb 2017 11:44:12 +0100
Subject: [PATCH 0694/1911] sched/headers: Remove #ifdefs from <linux/sched.h>

We can remove two pairs of #ifdefs by defining structures in a smarter way.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index f9dc9cfaf07948..341f5792fbe060 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -224,8 +224,8 @@ struct task_cputime {
 #define prof_exp	stime
 #define sched_exp	sum_exec_runtime
 
-#ifdef CONFIG_SCHED_INFO
 struct sched_info {
+#ifdef CONFIG_SCHED_INFO
 	/* cumulative counters */
 	unsigned long pcount;	      /* # of times run on this cpu */
 	unsigned long long run_delay; /* time spent waiting on a runqueue */
@@ -233,8 +233,8 @@ struct sched_info {
 	/* timestamps */
 	unsigned long long last_arrival,/* when we last ran on a cpu */
 			   last_queued;	/* when we were last queued to run */
-};
 #endif /* CONFIG_SCHED_INFO */
+};
 
 /*
  * Integer metrics need fixed point arithmetic, e.g., sched/fair
@@ -309,8 +309,8 @@ struct sched_avg {
 	unsigned long load_avg, util_avg;
 };
 
-#ifdef CONFIG_SCHEDSTATS
 struct sched_statistics {
+#ifdef CONFIG_SCHEDSTATS
 	u64			wait_start;
 	u64			wait_max;
 	u64			wait_count;
@@ -342,8 +342,8 @@ struct sched_statistics {
 	u64			nr_wakeups_affine_attempts;
 	u64			nr_wakeups_passive;
 	u64			nr_wakeups_idle;
-};
 #endif
+};
 
 struct sched_entity {
 	struct load_weight	load;		/* for load-balancing */
@@ -358,9 +358,7 @@ struct sched_entity {
 
 	u64			nr_migrations;
 
-#ifdef CONFIG_SCHEDSTATS
 	struct sched_statistics statistics;
-#endif
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
 	int			depth;
@@ -530,9 +528,7 @@ struct task_struct {
 	int rcu_tasks_idle_cpu;
 #endif /* #ifdef CONFIG_TASKS_RCU */
 
-#ifdef CONFIG_SCHED_INFO
 	struct sched_info sched_info;
-#endif
 
 	struct list_head tasks;
 #ifdef CONFIG_SMP

From 5eca1c10cbaa9c366c18ca79f81f21c731e3dcc7 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Mon, 6 Feb 2017 22:06:35 +0100
Subject: [PATCH 0695/1911] sched/headers: Clean up <linux/sched.h>

Now that <linux/sched.h> dependencies have been sorted out,
do various trivial cleanups:

 - remove unnecessary structure predeclarations
 - fix various typos
 - update comments where necessary
 - remove pointless comments
 - use consistent types
 - tabulate consistently
 - use a consistent comment style
 - clean up the header section a bit
 - use a consistent style of a single field per line
 - remove line-breaks where they make the code look worse
 - etc ...

No change in functionality.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 1222 ++++++++++++++++++++++-------------------
 1 file changed, 651 insertions(+), 571 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 341f5792fbe060..d67eee84fd430f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1,38 +1,38 @@
 #ifndef _LINUX_SCHED_H
 #define _LINUX_SCHED_H
 
-#include <uapi/linux/sched.h>
+/*
+ * Define 'struct task_struct' and provide the main scheduler
+ * APIs (schedule(), wakeup variants, etc.)
+ */
 
-#include <linux/sched/prio.h>
-#include <linux/nodemask.h>
+#include <uapi/linux/sched.h>
 
-#include <linux/mutex.h>
-#include <linux/plist.h>
-#include <linux/mm_types_task.h>
+#include <asm/current.h>
 
+#include <linux/pid.h>
 #include <linux/sem.h>
 #include <linux/shm.h>
-#include <linux/signal_types.h>
-#include <linux/pid.h>
+#include <linux/kcov.h>
+#include <linux/mutex.h>
+#include <linux/plist.h>
+#include <linux/hrtimer.h>
 #include <linux/seccomp.h>
+#include <linux/nodemask.h>
 #include <linux/rcupdate.h>
-
 #include <linux/resource.h>
-#include <linux/hrtimer.h>
-#include <linux/kcov.h>
-#include <linux/task_io_accounting.h>
 #include <linux/latencytop.h>
+#include <linux/sched/prio.h>
+#include <linux/signal_types.h>
+#include <linux/mm_types_task.h>
+#include <linux/task_io_accounting.h>
 
-#include <asm/current.h>
-
-/* task_struct member predeclarations: */
+/* task_struct member predeclarations (sorted alphabetically): */
 struct audit_context;
-struct autogroup;
 struct backing_dev_info;
 struct bio_list;
 struct blk_plug;
 struct cfs_rq;
-struct filename;
 struct fs_struct;
 struct futex_pi_state;
 struct io_context;
@@ -52,8 +52,6 @@ struct sighand_struct;
 struct signal_struct;
 struct task_delay_info;
 struct task_group;
-struct task_struct;
-struct uts_namespace;
 
 /*
  * Task state bitmask. NOTE! These bits are also
@@ -65,50 +63,53 @@ struct uts_namespace;
  * modifying one set can't modify the other one by
  * mistake.
  */
-#define TASK_RUNNING		0
-#define TASK_INTERRUPTIBLE	1
-#define TASK_UNINTERRUPTIBLE	2
-#define __TASK_STOPPED		4
-#define __TASK_TRACED		8
-/* in tsk->exit_state */
-#define EXIT_DEAD		16
-#define EXIT_ZOMBIE		32
-#define EXIT_TRACE		(EXIT_ZOMBIE | EXIT_DEAD)
-/* in tsk->state again */
-#define TASK_DEAD		64
-#define TASK_WAKEKILL		128
-#define TASK_WAKING		256
-#define TASK_PARKED		512
-#define TASK_NOLOAD		1024
-#define TASK_NEW		2048
-#define TASK_STATE_MAX		4096
-
-#define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWPNn"
-
-/* Convenience macros for the sake of set_current_state */
-#define TASK_KILLABLE		(TASK_WAKEKILL | TASK_UNINTERRUPTIBLE)
-#define TASK_STOPPED		(TASK_WAKEKILL | __TASK_STOPPED)
-#define TASK_TRACED		(TASK_WAKEKILL | __TASK_TRACED)
-
-#define TASK_IDLE		(TASK_UNINTERRUPTIBLE | TASK_NOLOAD)
-
-/* Convenience macros for the sake of wake_up */
-#define TASK_NORMAL		(TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE)
-#define TASK_ALL		(TASK_NORMAL | __TASK_STOPPED | __TASK_TRACED)
-
-/* get_task_state() */
-#define TASK_REPORT		(TASK_RUNNING | TASK_INTERRUPTIBLE | \
-				 TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \
-				 __TASK_TRACED | EXIT_ZOMBIE | EXIT_DEAD)
-
-#define task_is_traced(task)	((task->state & __TASK_TRACED) != 0)
-#define task_is_stopped(task)	((task->state & __TASK_STOPPED) != 0)
-#define task_is_stopped_or_traced(task)	\
-			((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0)
-#define task_contributes_to_load(task)	\
-				((task->state & TASK_UNINTERRUPTIBLE) != 0 && \
-				 (task->flags & PF_FROZEN) == 0 && \
-				 (task->state & TASK_NOLOAD) == 0)
+
+/* Used in tsk->state: */
+#define TASK_RUNNING			0
+#define TASK_INTERRUPTIBLE		1
+#define TASK_UNINTERRUPTIBLE		2
+#define __TASK_STOPPED			4
+#define __TASK_TRACED			8
+/* Used in tsk->exit_state: */
+#define EXIT_DEAD			16
+#define EXIT_ZOMBIE			32
+#define EXIT_TRACE			(EXIT_ZOMBIE | EXIT_DEAD)
+/* Used in tsk->state again: */
+#define TASK_DEAD			64
+#define TASK_WAKEKILL			128
+#define TASK_WAKING			256
+#define TASK_PARKED			512
+#define TASK_NOLOAD			1024
+#define TASK_NEW			2048
+#define TASK_STATE_MAX			4096
+
+#define TASK_STATE_TO_CHAR_STR		"RSDTtXZxKWPNn"
+
+/* Convenience macros for the sake of set_current_state: */
+#define TASK_KILLABLE			(TASK_WAKEKILL | TASK_UNINTERRUPTIBLE)
+#define TASK_STOPPED			(TASK_WAKEKILL | __TASK_STOPPED)
+#define TASK_TRACED			(TASK_WAKEKILL | __TASK_TRACED)
+
+#define TASK_IDLE			(TASK_UNINTERRUPTIBLE | TASK_NOLOAD)
+
+/* Convenience macros for the sake of wake_up(): */
+#define TASK_NORMAL			(TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE)
+#define TASK_ALL			(TASK_NORMAL | __TASK_STOPPED | __TASK_TRACED)
+
+/* get_task_state(): */
+#define TASK_REPORT			(TASK_RUNNING | TASK_INTERRUPTIBLE | \
+					 TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \
+					 __TASK_TRACED | EXIT_ZOMBIE | EXIT_DEAD)
+
+#define task_is_traced(task)		((task->state & __TASK_TRACED) != 0)
+
+#define task_is_stopped(task)		((task->state & __TASK_STOPPED) != 0)
+
+#define task_is_stopped_or_traced(task)	((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0)
+
+#define task_contributes_to_load(task)	((task->state & TASK_UNINTERRUPTIBLE) != 0 && \
+					 (task->flags & PF_FROZEN) == 0 && \
+					 (task->state & TASK_NOLOAD) == 0)
 
 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
 
@@ -158,26 +159,24 @@ struct uts_namespace;
  *
  * Also see the comments of try_to_wake_up().
  */
-#define __set_current_state(state_value)		\
-	do { current->state = (state_value); } while (0)
-#define set_current_state(state_value)			\
-	smp_store_mb(current->state, (state_value))
-
+#define __set_current_state(state_value) do { current->state = (state_value); } while (0)
+#define set_current_state(state_value)	 smp_store_mb(current->state, (state_value))
 #endif
 
-/* Task command name length */
-#define TASK_COMM_LEN 16
+/* Task command name length: */
+#define TASK_COMM_LEN			16
 
-extern cpumask_var_t cpu_isolated_map;
+extern cpumask_var_t			cpu_isolated_map;
 
 extern void scheduler_tick(void);
 
-#define	MAX_SCHEDULE_TIMEOUT	LONG_MAX
-extern signed long schedule_timeout(signed long timeout);
-extern signed long schedule_timeout_interruptible(signed long timeout);
-extern signed long schedule_timeout_killable(signed long timeout);
-extern signed long schedule_timeout_uninterruptible(signed long timeout);
-extern signed long schedule_timeout_idle(signed long timeout);
+#define	MAX_SCHEDULE_TIMEOUT		LONG_MAX
+
+extern long schedule_timeout(long timeout);
+extern long schedule_timeout_interruptible(long timeout);
+extern long schedule_timeout_killable(long timeout);
+extern long schedule_timeout_uninterruptible(long timeout);
+extern long schedule_timeout_idle(long timeout);
 asmlinkage void schedule(void);
 extern void schedule_preempt_disabled(void);
 
@@ -197,9 +196,9 @@ extern void io_schedule(void);
  */
 struct prev_cputime {
 #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
-	u64 utime;
-	u64 stime;
-	raw_spinlock_t lock;
+	u64				utime;
+	u64				stime;
+	raw_spinlock_t			lock;
 #endif
 };
 
@@ -214,25 +213,34 @@ struct prev_cputime {
  * these counts together and treat all three of them in parallel.
  */
 struct task_cputime {
-	u64 utime;
-	u64 stime;
-	unsigned long long sum_exec_runtime;
+	u64				utime;
+	u64				stime;
+	unsigned long long		sum_exec_runtime;
 };
 
-/* Alternate field names when used to cache expirations. */
-#define virt_exp	utime
-#define prof_exp	stime
-#define sched_exp	sum_exec_runtime
+/* Alternate field names when used on cache expirations: */
+#define virt_exp			utime
+#define prof_exp			stime
+#define sched_exp			sum_exec_runtime
 
 struct sched_info {
 #ifdef CONFIG_SCHED_INFO
-	/* cumulative counters */
-	unsigned long pcount;	      /* # of times run on this cpu */
-	unsigned long long run_delay; /* time spent waiting on a runqueue */
+	/* Cumulative counters: */
+
+	/* # of times we have run on this CPU: */
+	unsigned long			pcount;
+
+	/* Time spent waiting on a runqueue: */
+	unsigned long long		run_delay;
+
+	/* Timestamps: */
+
+	/* When did we last run on a CPU? */
+	unsigned long long		last_arrival;
+
+	/* When were we last queued to run? */
+	unsigned long long		last_queued;
 
-	/* timestamps */
-	unsigned long long last_arrival,/* when we last ran on a cpu */
-			   last_queued;	/* when we were last queued to run */
 #endif /* CONFIG_SCHED_INFO */
 };
 
@@ -243,12 +251,12 @@ struct sched_info {
  * We define a basic fixed point arithmetic range, and then formalize
  * all these metrics based on that basic range.
  */
-# define SCHED_FIXEDPOINT_SHIFT	10
-# define SCHED_FIXEDPOINT_SCALE	(1L << SCHED_FIXEDPOINT_SHIFT)
+# define SCHED_FIXEDPOINT_SHIFT		10
+# define SCHED_FIXEDPOINT_SCALE		(1L << SCHED_FIXEDPOINT_SHIFT)
 
 struct load_weight {
-	unsigned long weight;
-	u32 inv_weight;
+	unsigned long			weight;
+	u32				inv_weight;
 };
 
 /*
@@ -304,69 +312,73 @@ struct load_weight {
  * issues.
  */
 struct sched_avg {
-	u64 last_update_time, load_sum;
-	u32 util_sum, period_contrib;
-	unsigned long load_avg, util_avg;
+	u64				last_update_time;
+	u64				load_sum;
+	u32				util_sum;
+	u32				period_contrib;
+	unsigned long			load_avg;
+	unsigned long			util_avg;
 };
 
 struct sched_statistics {
 #ifdef CONFIG_SCHEDSTATS
-	u64			wait_start;
-	u64			wait_max;
-	u64			wait_count;
-	u64			wait_sum;
-	u64			iowait_count;
-	u64			iowait_sum;
-
-	u64			sleep_start;
-	u64			sleep_max;
-	s64			sum_sleep_runtime;
-
-	u64			block_start;
-	u64			block_max;
-	u64			exec_max;
-	u64			slice_max;
-
-	u64			nr_migrations_cold;
-	u64			nr_failed_migrations_affine;
-	u64			nr_failed_migrations_running;
-	u64			nr_failed_migrations_hot;
-	u64			nr_forced_migrations;
-
-	u64			nr_wakeups;
-	u64			nr_wakeups_sync;
-	u64			nr_wakeups_migrate;
-	u64			nr_wakeups_local;
-	u64			nr_wakeups_remote;
-	u64			nr_wakeups_affine;
-	u64			nr_wakeups_affine_attempts;
-	u64			nr_wakeups_passive;
-	u64			nr_wakeups_idle;
+	u64				wait_start;
+	u64				wait_max;
+	u64				wait_count;
+	u64				wait_sum;
+	u64				iowait_count;
+	u64				iowait_sum;
+
+	u64				sleep_start;
+	u64				sleep_max;
+	s64				sum_sleep_runtime;
+
+	u64				block_start;
+	u64				block_max;
+	u64				exec_max;
+	u64				slice_max;
+
+	u64				nr_migrations_cold;
+	u64				nr_failed_migrations_affine;
+	u64				nr_failed_migrations_running;
+	u64				nr_failed_migrations_hot;
+	u64				nr_forced_migrations;
+
+	u64				nr_wakeups;
+	u64				nr_wakeups_sync;
+	u64				nr_wakeups_migrate;
+	u64				nr_wakeups_local;
+	u64				nr_wakeups_remote;
+	u64				nr_wakeups_affine;
+	u64				nr_wakeups_affine_attempts;
+	u64				nr_wakeups_passive;
+	u64				nr_wakeups_idle;
 #endif
 };
 
 struct sched_entity {
-	struct load_weight	load;		/* for load-balancing */
-	struct rb_node		run_node;
-	struct list_head	group_node;
-	unsigned int		on_rq;
+	/* For load-balancing: */
+	struct load_weight		load;
+	struct rb_node			run_node;
+	struct list_head		group_node;
+	unsigned int			on_rq;
 
-	u64			exec_start;
-	u64			sum_exec_runtime;
-	u64			vruntime;
-	u64			prev_sum_exec_runtime;
+	u64				exec_start;
+	u64				sum_exec_runtime;
+	u64				vruntime;
+	u64				prev_sum_exec_runtime;
 
-	u64			nr_migrations;
+	u64				nr_migrations;
 
-	struct sched_statistics statistics;
+	struct sched_statistics		statistics;
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
-	int			depth;
-	struct sched_entity	*parent;
+	int				depth;
+	struct sched_entity		*parent;
 	/* rq on which this entity is (to be) queued: */
-	struct cfs_rq		*cfs_rq;
+	struct cfs_rq			*cfs_rq;
 	/* rq "owned" by this entity/group: */
-	struct cfs_rq		*my_q;
+	struct cfs_rq			*my_q;
 #endif
 
 #ifdef CONFIG_SMP
@@ -376,49 +388,49 @@ struct sched_entity {
 	 * Put into separate cache line so it does not
 	 * collide with read-mostly values above.
 	 */
-	struct sched_avg	avg ____cacheline_aligned_in_smp;
+	struct sched_avg		avg ____cacheline_aligned_in_smp;
 #endif
 };
 
 struct sched_rt_entity {
-	struct list_head run_list;
-	unsigned long timeout;
-	unsigned long watchdog_stamp;
-	unsigned int time_slice;
-	unsigned short on_rq;
-	unsigned short on_list;
-
-	struct sched_rt_entity *back;
+	struct list_head		run_list;
+	unsigned long			timeout;
+	unsigned long			watchdog_stamp;
+	unsigned int			time_slice;
+	unsigned short			on_rq;
+	unsigned short			on_list;
+
+	struct sched_rt_entity		*back;
 #ifdef CONFIG_RT_GROUP_SCHED
-	struct sched_rt_entity	*parent;
+	struct sched_rt_entity		*parent;
 	/* rq on which this entity is (to be) queued: */
-	struct rt_rq		*rt_rq;
+	struct rt_rq			*rt_rq;
 	/* rq "owned" by this entity/group: */
-	struct rt_rq		*my_q;
+	struct rt_rq			*my_q;
 #endif
 };
 
 struct sched_dl_entity {
-	struct rb_node	rb_node;
+	struct rb_node			rb_node;
 
 	/*
 	 * Original scheduling parameters. Copied here from sched_attr
 	 * during sched_setattr(), they will remain the same until
 	 * the next sched_setattr().
 	 */
-	u64 dl_runtime;		/* maximum runtime for each instance	*/
-	u64 dl_deadline;	/* relative deadline of each instance	*/
-	u64 dl_period;		/* separation of two instances (period) */
-	u64 dl_bw;		/* dl_runtime / dl_deadline		*/
+	u64				dl_runtime;	/* Maximum runtime for each instance	*/
+	u64				dl_deadline;	/* Relative deadline of each instance	*/
+	u64				dl_period;	/* Separation of two instances (period) */
+	u64				dl_bw;		/* dl_runtime / dl_deadline		*/
 
 	/*
 	 * Actual scheduling parameters. Initialized with the values above,
 	 * they are continously updated during task execution. Note that
 	 * the remaining runtime could be < 0 in case we are in overrun.
 	 */
-	s64 runtime;		/* remaining runtime for this instance	*/
-	u64 deadline;		/* absolute deadline for this instance	*/
-	unsigned int flags;	/* specifying the scheduler behaviour	*/
+	s64				runtime;	/* Remaining runtime for this instance	*/
+	u64				deadline;	/* Absolute deadline for this instance	*/
+	unsigned int			flags;		/* Specifying the scheduler behaviour	*/
 
 	/*
 	 * Some bool flags:
@@ -431,24 +443,28 @@ struct sched_dl_entity {
 	 * outside bandwidth enforcement mechanism (but only until we
 	 * exit the critical section);
 	 *
-	 * @dl_yielded tells if task gave up the cpu before consuming
+	 * @dl_yielded tells if task gave up the CPU before consuming
 	 * all its available runtime during the last job.
 	 */
-	int dl_throttled, dl_boosted, dl_yielded;
+	int				dl_throttled;
+	int				dl_boosted;
+	int				dl_yielded;
 
 	/*
 	 * Bandwidth enforcement timer. Each -deadline task has its
 	 * own bandwidth to be enforced, thus we need one timer per task.
 	 */
-	struct hrtimer dl_timer;
+	struct hrtimer			dl_timer;
 };
 
 union rcu_special {
 	struct {
-		u8 blocked;
-		u8 need_qs;
-		u8 exp_need_qs;
-		u8 pad;	/* Otherwise the compiler can store garbage here. */
+		u8			blocked;
+		u8			need_qs;
+		u8			exp_need_qs;
+
+		/* Otherwise the compiler can store garbage here: */
+		u8			pad;
 	} b; /* Bits. */
 	u32 s; /* Set of bits. */
 };
@@ -470,361 +486,417 @@ struct task_struct {
 	 * For reasons of header soup (see current_thread_info()), this
 	 * must be the first element of task_struct.
 	 */
-	struct thread_info thread_info;
+	struct thread_info		thread_info;
 #endif
-	volatile long state;	/* -1 unrunnable, 0 runnable, >0 stopped */
-	void *stack;
-	atomic_t usage;
-	unsigned int flags;	/* per process flags, defined below */
-	unsigned int ptrace;
+	/* -1 unrunnable, 0 runnable, >0 stopped: */
+	volatile long			state;
+	void				*stack;
+	atomic_t			usage;
+	/* Per task flags (PF_*), defined further below: */
+	unsigned int			flags;
+	unsigned int			ptrace;
 
 #ifdef CONFIG_SMP
-	struct llist_node wake_entry;
-	int on_cpu;
+	struct llist_node		wake_entry;
+	int				on_cpu;
 #ifdef CONFIG_THREAD_INFO_IN_TASK
-	unsigned int cpu;	/* current CPU */
+	/* Current CPU: */
+	unsigned int			cpu;
 #endif
-	unsigned int wakee_flips;
-	unsigned long wakee_flip_decay_ts;
-	struct task_struct *last_wakee;
+	unsigned int			wakee_flips;
+	unsigned long			wakee_flip_decay_ts;
+	struct task_struct		*last_wakee;
 
-	int wake_cpu;
+	int				wake_cpu;
 #endif
-	int on_rq;
+	int				on_rq;
+
+	int				prio;
+	int				static_prio;
+	int				normal_prio;
+	unsigned int			rt_priority;
 
-	int prio, static_prio, normal_prio;
-	unsigned int rt_priority;
-	const struct sched_class *sched_class;
-	struct sched_entity se;
-	struct sched_rt_entity rt;
+	const struct sched_class	*sched_class;
+	struct sched_entity		se;
+	struct sched_rt_entity		rt;
 #ifdef CONFIG_CGROUP_SCHED
-	struct task_group *sched_task_group;
+	struct task_group		*sched_task_group;
 #endif
-	struct sched_dl_entity dl;
+	struct sched_dl_entity		dl;
 
 #ifdef CONFIG_PREEMPT_NOTIFIERS
-	/* list of struct preempt_notifier: */
-	struct hlist_head preempt_notifiers;
+	/* List of struct preempt_notifier: */
+	struct hlist_head		preempt_notifiers;
 #endif
 
 #ifdef CONFIG_BLK_DEV_IO_TRACE
-	unsigned int btrace_seq;
+	unsigned int			btrace_seq;
 #endif
 
-	unsigned int policy;
-	int nr_cpus_allowed;
-	cpumask_t cpus_allowed;
+	unsigned int			policy;
+	int				nr_cpus_allowed;
+	cpumask_t			cpus_allowed;
 
 #ifdef CONFIG_PREEMPT_RCU
-	int rcu_read_lock_nesting;
-	union rcu_special rcu_read_unlock_special;
-	struct list_head rcu_node_entry;
-	struct rcu_node *rcu_blocked_node;
+	int				rcu_read_lock_nesting;
+	union rcu_special		rcu_read_unlock_special;
+	struct list_head		rcu_node_entry;
+	struct rcu_node			*rcu_blocked_node;
 #endif /* #ifdef CONFIG_PREEMPT_RCU */
+
 #ifdef CONFIG_TASKS_RCU
-	unsigned long rcu_tasks_nvcsw;
-	bool rcu_tasks_holdout;
-	struct list_head rcu_tasks_holdout_list;
-	int rcu_tasks_idle_cpu;
+	unsigned long			rcu_tasks_nvcsw;
+	bool				rcu_tasks_holdout;
+	struct list_head		rcu_tasks_holdout_list;
+	int				rcu_tasks_idle_cpu;
 #endif /* #ifdef CONFIG_TASKS_RCU */
 
-	struct sched_info sched_info;
+	struct sched_info		sched_info;
 
-	struct list_head tasks;
+	struct list_head		tasks;
 #ifdef CONFIG_SMP
-	struct plist_node pushable_tasks;
-	struct rb_node pushable_dl_tasks;
+	struct plist_node		pushable_tasks;
+	struct rb_node			pushable_dl_tasks;
 #endif
 
-	struct mm_struct *mm, *active_mm;
+	struct mm_struct		*mm;
+	struct mm_struct		*active_mm;
 
 	/* Per-thread vma caching: */
-	struct vmacache vmacache;
-
-#if defined(SPLIT_RSS_COUNTING)
-	struct task_rss_stat	rss_stat;
-#endif
-/* task state */
-	int exit_state;
-	int exit_code, exit_signal;
-	int pdeath_signal;  /*  The signal sent when the parent dies  */
-	unsigned long jobctl;	/* JOBCTL_*, siglock protected */
-
-	/* Used for emulating ABI behavior of previous Linux versions */
-	unsigned int personality;
-
-	/* scheduler bits, serialized by scheduler locks */
-	unsigned sched_reset_on_fork:1;
-	unsigned sched_contributes_to_load:1;
-	unsigned sched_migrated:1;
-	unsigned sched_remote_wakeup:1;
-	unsigned :0; /* force alignment to the next boundary */
-
-	/* unserialized, strictly 'current' */
-	unsigned in_execve:1; /* bit to tell LSMs we're in execve */
-	unsigned in_iowait:1;
-#if !defined(TIF_RESTORE_SIGMASK)
-	unsigned restore_sigmask:1;
+	struct vmacache			vmacache;
+
+#ifdef SPLIT_RSS_COUNTING
+	struct task_rss_stat		rss_stat;
+#endif
+	int				exit_state;
+	int				exit_code;
+	int				exit_signal;
+	/* The signal sent when the parent dies: */
+	int				pdeath_signal;
+	/* JOBCTL_*, siglock protected: */
+	unsigned long			jobctl;
+
+	/* Used for emulating ABI behavior of previous Linux versions: */
+	unsigned int			personality;
+
+	/* Scheduler bits, serialized by scheduler locks: */
+	unsigned			sched_reset_on_fork:1;
+	unsigned			sched_contributes_to_load:1;
+	unsigned			sched_migrated:1;
+	unsigned			sched_remote_wakeup:1;
+	/* Force alignment to the next boundary: */
+	unsigned			:0;
+
+	/* Unserialized, strictly 'current' */
+
+	/* Bit to tell LSMs we're in execve(): */
+	unsigned			in_execve:1;
+	unsigned			in_iowait:1;
+#ifndef TIF_RESTORE_SIGMASK
+	unsigned			restore_sigmask:1;
 #endif
 #ifdef CONFIG_MEMCG
-	unsigned memcg_may_oom:1;
+	unsigned			memcg_may_oom:1;
 #ifndef CONFIG_SLOB
-	unsigned memcg_kmem_skip_account:1;
+	unsigned			memcg_kmem_skip_account:1;
 #endif
 #endif
 #ifdef CONFIG_COMPAT_BRK
-	unsigned brk_randomized:1;
+	unsigned			brk_randomized:1;
 #endif
 
-	unsigned long atomic_flags; /* Flags needing atomic access. */
+	unsigned long			atomic_flags; /* Flags requiring atomic access. */
 
-	struct restart_block restart_block;
+	struct restart_block		restart_block;
 
-	pid_t pid;
-	pid_t tgid;
+	pid_t				pid;
+	pid_t				tgid;
 
 #ifdef CONFIG_CC_STACKPROTECTOR
-	/* Canary value for the -fstack-protector gcc feature */
-	unsigned long stack_canary;
+	/* Canary value for the -fstack-protector GCC feature: */
+	unsigned long			stack_canary;
 #endif
 	/*
-	 * pointers to (original) parent process, youngest child, younger sibling,
+	 * Pointers to the (original) parent process, youngest child, younger sibling,
 	 * older sibling, respectively.  (p->father can be replaced with
 	 * p->real_parent->pid)
 	 */
-	struct task_struct __rcu *real_parent; /* real parent process */
-	struct task_struct __rcu *parent; /* recipient of SIGCHLD, wait4() reports */
+
+	/* Real parent process: */
+	struct task_struct __rcu	*real_parent;
+
+	/* Recipient of SIGCHLD, wait4() reports: */
+	struct task_struct __rcu	*parent;
+
 	/*
-	 * children/sibling forms the list of my natural children
+	 * Children/sibling form the list of natural children:
 	 */
-	struct list_head children;	/* list of my children */
-	struct list_head sibling;	/* linkage in my parent's children list */
-	struct task_struct *group_leader;	/* threadgroup leader */
+	struct list_head		children;
+	struct list_head		sibling;
+	struct task_struct		*group_leader;
 
 	/*
-	 * ptraced is the list of tasks this task is using ptrace on.
+	 * 'ptraced' is the list of tasks this task is using ptrace() on.
+	 *
 	 * This includes both natural children and PTRACE_ATTACH targets.
-	 * p->ptrace_entry is p's link on the p->parent->ptraced list.
+	 * 'ptrace_entry' is this task's link on the p->parent->ptraced list.
 	 */
-	struct list_head ptraced;
-	struct list_head ptrace_entry;
+	struct list_head		ptraced;
+	struct list_head		ptrace_entry;
 
 	/* PID/PID hash table linkage. */
-	struct pid_link pids[PIDTYPE_MAX];
-	struct list_head thread_group;
-	struct list_head thread_node;
+	struct pid_link			pids[PIDTYPE_MAX];
+	struct list_head		thread_group;
+	struct list_head		thread_node;
+
+	struct completion		*vfork_done;
 
-	struct completion *vfork_done;		/* for vfork() */
-	int __user *set_child_tid;		/* CLONE_CHILD_SETTID */
-	int __user *clear_child_tid;		/* CLONE_CHILD_CLEARTID */
+	/* CLONE_CHILD_SETTID: */
+	int __user			*set_child_tid;
 
-	u64 utime, stime;
+	/* CLONE_CHILD_CLEARTID: */
+	int __user			*clear_child_tid;
+
+	u64				utime;
+	u64				stime;
 #ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
-	u64 utimescaled, stimescaled;
+	u64				utimescaled;
+	u64				stimescaled;
 #endif
-	u64 gtime;
-	struct prev_cputime prev_cputime;
+	u64				gtime;
+	struct prev_cputime		prev_cputime;
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
-	seqcount_t vtime_seqcount;
-	unsigned long long vtime_snap;
+	seqcount_t			vtime_seqcount;
+	unsigned long long		vtime_snap;
 	enum {
-		/* Task is sleeping or running in a CPU with VTIME inactive */
+		/* Task is sleeping or running in a CPU with VTIME inactive: */
 		VTIME_INACTIVE = 0,
-		/* Task runs in userspace in a CPU with VTIME active */
+		/* Task runs in userspace in a CPU with VTIME active: */
 		VTIME_USER,
-		/* Task runs in kernelspace in a CPU with VTIME active */
+		/* Task runs in kernelspace in a CPU with VTIME active: */
 		VTIME_SYS,
 	} vtime_snap_whence;
 #endif
 
 #ifdef CONFIG_NO_HZ_FULL
-	atomic_t tick_dep_mask;
+	atomic_t			tick_dep_mask;
 #endif
-	unsigned long nvcsw, nivcsw; /* context switch counts */
-	u64 start_time;		/* monotonic time in nsec */
-	u64 real_start_time;	/* boot based time in nsec */
-/* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
-	unsigned long min_flt, maj_flt;
+	/* Context switch counts: */
+	unsigned long			nvcsw;
+	unsigned long			nivcsw;
+
+	/* Monotonic time in nsecs: */
+	u64				start_time;
+
+	/* Boot based time in nsecs: */
+	u64				real_start_time;
+
+	/* MM fault and swap info: this can arguably be seen as either mm-specific or thread-specific: */
+	unsigned long			min_flt;
+	unsigned long			maj_flt;
 
 #ifdef CONFIG_POSIX_TIMERS
-	struct task_cputime cputime_expires;
-	struct list_head cpu_timers[3];
-#endif
-
-/* process credentials */
-	const struct cred __rcu *ptracer_cred; /* Tracer's credentials at attach */
-	const struct cred __rcu *real_cred; /* objective and real subjective task
-					 * credentials (COW) */
-	const struct cred __rcu *cred;	/* effective (overridable) subjective task
-					 * credentials (COW) */
-	char comm[TASK_COMM_LEN]; /* executable name excluding path
-				     - access with [gs]et_task_comm (which lock
-				       it with task_lock())
-				     - initialized normally by setup_new_exec */
-/* file system info */
-	struct nameidata *nameidata;
+	struct task_cputime		cputime_expires;
+	struct list_head		cpu_timers[3];
+#endif
+
+	/* Process credentials: */
+
+	/* Tracer's credentials at attach: */
+	const struct cred __rcu		*ptracer_cred;
+
+	/* Objective and real subjective task credentials (COW): */
+	const struct cred __rcu		*real_cred;
+
+	/* Effective (overridable) subjective task credentials (COW): */
+	const struct cred __rcu		*cred;
+
+	/*
+	 * executable name, excluding path.
+	 *
+	 * - normally initialized setup_new_exec()
+	 * - access it with [gs]et_task_comm()
+	 * - lock it with task_lock()
+	 */
+	char				comm[TASK_COMM_LEN];
+
+	struct nameidata		*nameidata;
+
 #ifdef CONFIG_SYSVIPC
-/* ipc stuff */
-	struct sysv_sem sysvsem;
-	struct sysv_shm sysvshm;
+	struct sysv_sem			sysvsem;
+	struct sysv_shm			sysvshm;
 #endif
 #ifdef CONFIG_DETECT_HUNG_TASK
-/* hung task detection */
-	unsigned long last_switch_count;
-#endif
-/* filesystem information */
-	struct fs_struct *fs;
-/* open file information */
-	struct files_struct *files;
-/* namespaces */
-	struct nsproxy *nsproxy;
-/* signal handlers */
-	struct signal_struct *signal;
-	struct sighand_struct *sighand;
-
-	sigset_t blocked, real_blocked;
-	sigset_t saved_sigmask;	/* restored if set_restore_sigmask() was used */
-	struct sigpending pending;
-
-	unsigned long sas_ss_sp;
-	size_t sas_ss_size;
-	unsigned sas_ss_flags;
-
-	struct callback_head *task_works;
-
-	struct audit_context *audit_context;
+	unsigned long			last_switch_count;
+#endif
+	/* Filesystem information: */
+	struct fs_struct		*fs;
+
+	/* Open file information: */
+	struct files_struct		*files;
+
+	/* Namespaces: */
+	struct nsproxy			*nsproxy;
+
+	/* Signal handlers: */
+	struct signal_struct		*signal;
+	struct sighand_struct		*sighand;
+	sigset_t			blocked;
+	sigset_t			real_blocked;
+	/* Restored if set_restore_sigmask() was used: */
+	sigset_t			saved_sigmask;
+	struct sigpending		pending;
+	unsigned long			sas_ss_sp;
+	size_t				sas_ss_size;
+	unsigned int			sas_ss_flags;
+
+	struct callback_head		*task_works;
+
+	struct audit_context		*audit_context;
 #ifdef CONFIG_AUDITSYSCALL
-	kuid_t loginuid;
-	unsigned int sessionid;
+	kuid_t				loginuid;
+	unsigned int			sessionid;
 #endif
-	struct seccomp seccomp;
+	struct seccomp			seccomp;
 
-/* Thread group tracking */
-   	u32 parent_exec_id;
-   	u32 self_exec_id;
-/* Protection of (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed,
- * mempolicy */
-	spinlock_t alloc_lock;
+	/* Thread group tracking: */
+	u32				parent_exec_id;
+	u32				self_exec_id;
+
+	/* Protection against (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed, mempolicy: */
+	spinlock_t			alloc_lock;
 
 	/* Protection of the PI data structures: */
-	raw_spinlock_t pi_lock;
+	raw_spinlock_t			pi_lock;
 
-	struct wake_q_node wake_q;
+	struct wake_q_node		wake_q;
 
 #ifdef CONFIG_RT_MUTEXES
-	/* PI waiters blocked on a rt_mutex held by this task */
-	struct rb_root pi_waiters;
-	struct rb_node *pi_waiters_leftmost;
-	/* Deadlock detection and priority inheritance handling */
-	struct rt_mutex_waiter *pi_blocked_on;
+	/* PI waiters blocked on a rt_mutex held by this task: */
+	struct rb_root			pi_waiters;
+	struct rb_node			*pi_waiters_leftmost;
+	/* Deadlock detection and priority inheritance handling: */
+	struct rt_mutex_waiter		*pi_blocked_on;
 #endif
 
 #ifdef CONFIG_DEBUG_MUTEXES
-	/* mutex deadlock detection */
-	struct mutex_waiter *blocked_on;
+	/* Mutex deadlock detection: */
+	struct mutex_waiter		*blocked_on;
 #endif
+
 #ifdef CONFIG_TRACE_IRQFLAGS
-	unsigned int irq_events;
-	unsigned long hardirq_enable_ip;
-	unsigned long hardirq_disable_ip;
-	unsigned int hardirq_enable_event;
-	unsigned int hardirq_disable_event;
-	int hardirqs_enabled;
-	int hardirq_context;
-	unsigned long softirq_disable_ip;
-	unsigned long softirq_enable_ip;
-	unsigned int softirq_disable_event;
-	unsigned int softirq_enable_event;
-	int softirqs_enabled;
-	int softirq_context;
+	unsigned int			irq_events;
+	unsigned long			hardirq_enable_ip;
+	unsigned long			hardirq_disable_ip;
+	unsigned int			hardirq_enable_event;
+	unsigned int			hardirq_disable_event;
+	int				hardirqs_enabled;
+	int				hardirq_context;
+	unsigned long			softirq_disable_ip;
+	unsigned long			softirq_enable_ip;
+	unsigned int			softirq_disable_event;
+	unsigned int			softirq_enable_event;
+	int				softirqs_enabled;
+	int				softirq_context;
 #endif
+
 #ifdef CONFIG_LOCKDEP
-# define MAX_LOCK_DEPTH 48UL
-	u64 curr_chain_key;
-	int lockdep_depth;
-	unsigned int lockdep_recursion;
-	struct held_lock held_locks[MAX_LOCK_DEPTH];
-	gfp_t lockdep_reclaim_gfp;
+# define MAX_LOCK_DEPTH			48UL
+	u64				curr_chain_key;
+	int				lockdep_depth;
+	unsigned int			lockdep_recursion;
+	struct held_lock		held_locks[MAX_LOCK_DEPTH];
+	gfp_t				lockdep_reclaim_gfp;
 #endif
+
 #ifdef CONFIG_UBSAN
-	unsigned int in_ubsan;
+	unsigned int			in_ubsan;
 #endif
 
-/* journalling filesystem info */
-	void *journal_info;
+	/* Journalling filesystem info: */
+	void				*journal_info;
 
-/* stacked block device info */
-	struct bio_list *bio_list;
+	/* Stacked block device info: */
+	struct bio_list			*bio_list;
 
 #ifdef CONFIG_BLOCK
-/* stack plugging */
-	struct blk_plug *plug;
+	/* Stack plugging: */
+	struct blk_plug			*plug;
 #endif
 
-/* VM state */
-	struct reclaim_state *reclaim_state;
+	/* VM state: */
+	struct reclaim_state		*reclaim_state;
+
+	struct backing_dev_info		*backing_dev_info;
 
-	struct backing_dev_info *backing_dev_info;
+	struct io_context		*io_context;
 
-	struct io_context *io_context;
+	/* Ptrace state: */
+	unsigned long			ptrace_message;
+	siginfo_t			*last_siginfo;
 
-	unsigned long ptrace_message;
-	siginfo_t *last_siginfo; /* For ptrace use.  */
-	struct task_io_accounting ioac;
-#if defined(CONFIG_TASK_XACCT)
-	u64 acct_rss_mem1;	/* accumulated rss usage */
-	u64 acct_vm_mem1;	/* accumulated virtual memory usage */
-	u64 acct_timexpd;	/* stime + utime since last update */
+	struct task_io_accounting	ioac;
+#ifdef CONFIG_TASK_XACCT
+	/* Accumulated RSS usage: */
+	u64				acct_rss_mem1;
+	/* Accumulated virtual memory usage: */
+	u64				acct_vm_mem1;
+	/* stime + utime since last update: */
+	u64				acct_timexpd;
 #endif
 #ifdef CONFIG_CPUSETS
-	nodemask_t mems_allowed;	/* Protected by alloc_lock */
-	seqcount_t mems_allowed_seq;	/* Seqence no to catch updates */
-	int cpuset_mem_spread_rotor;
-	int cpuset_slab_spread_rotor;
+	/* Protected by ->alloc_lock: */
+	nodemask_t			mems_allowed;
+	/* Seqence number to catch updates: */
+	seqcount_t			mems_allowed_seq;
+	int				cpuset_mem_spread_rotor;
+	int				cpuset_slab_spread_rotor;
 #endif
 #ifdef CONFIG_CGROUPS
-	/* Control Group info protected by css_set_lock */
-	struct css_set __rcu *cgroups;
-	/* cg_list protected by css_set_lock and tsk->alloc_lock */
-	struct list_head cg_list;
+	/* Control Group info protected by css_set_lock: */
+	struct css_set __rcu		*cgroups;
+	/* cg_list protected by css_set_lock and tsk->alloc_lock: */
+	struct list_head		cg_list;
 #endif
 #ifdef CONFIG_INTEL_RDT_A
-	int closid;
+	int				closid;
 #endif
 #ifdef CONFIG_FUTEX
-	struct robust_list_head __user *robust_list;
+	struct robust_list_head __user	*robust_list;
 #ifdef CONFIG_COMPAT
 	struct compat_robust_list_head __user *compat_robust_list;
 #endif
-	struct list_head pi_state_list;
-	struct futex_pi_state *pi_state_cache;
+	struct list_head		pi_state_list;
+	struct futex_pi_state		*pi_state_cache;
 #endif
 #ifdef CONFIG_PERF_EVENTS
-	struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts];
-	struct mutex perf_event_mutex;
-	struct list_head perf_event_list;
+	struct perf_event_context	*perf_event_ctxp[perf_nr_task_contexts];
+	struct mutex			perf_event_mutex;
+	struct list_head		perf_event_list;
 #endif
 #ifdef CONFIG_DEBUG_PREEMPT
-	unsigned long preempt_disable_ip;
+	unsigned long			preempt_disable_ip;
 #endif
 #ifdef CONFIG_NUMA
-	struct mempolicy *mempolicy;	/* Protected by alloc_lock */
-	short il_next;
-	short pref_node_fork;
+	/* Protected by alloc_lock: */
+	struct mempolicy		*mempolicy;
+	short				il_next;
+	short				pref_node_fork;
 #endif
 #ifdef CONFIG_NUMA_BALANCING
-	int numa_scan_seq;
-	unsigned int numa_scan_period;
-	unsigned int numa_scan_period_max;
-	int numa_preferred_nid;
-	unsigned long numa_migrate_retry;
-	u64 node_stamp;			/* migration stamp  */
-	u64 last_task_numa_placement;
-	u64 last_sum_exec_runtime;
-	struct callback_head numa_work;
-
-	struct list_head numa_entry;
-	struct numa_group *numa_group;
+	int				numa_scan_seq;
+	unsigned int			numa_scan_period;
+	unsigned int			numa_scan_period_max;
+	int				numa_preferred_nid;
+	unsigned long			numa_migrate_retry;
+	/* Migration stamp: */
+	u64				node_stamp;
+	u64				last_task_numa_placement;
+	u64				last_sum_exec_runtime;
+	struct callback_head		numa_work;
+
+	struct list_head		numa_entry;
+	struct numa_group		*numa_group;
 
 	/*
 	 * numa_faults is an array split into four regions:
@@ -840,8 +912,8 @@ struct task_struct {
 	 * during the current scan window. When the scan completes, the counts
 	 * in faults_memory and faults_cpu decay and these values are copied.
 	 */
-	unsigned long *numa_faults;
-	unsigned long total_numa_faults;
+	unsigned long			*numa_faults;
+	unsigned long			total_numa_faults;
 
 	/*
 	 * numa_faults_locality tracks if faults recorded during the last
@@ -849,119 +921,132 @@ struct task_struct {
 	 * period is adapted based on the locality of the faults with different
 	 * weights depending on whether they were shared or private faults
 	 */
-	unsigned long numa_faults_locality[3];
+	unsigned long			numa_faults_locality[3];
 
-	unsigned long numa_pages_migrated;
+	unsigned long			numa_pages_migrated;
 #endif /* CONFIG_NUMA_BALANCING */
 
-	struct tlbflush_unmap_batch tlb_ubc;
+	struct tlbflush_unmap_batch	tlb_ubc;
 
-	struct rcu_head rcu;
+	struct rcu_head			rcu;
 
-	/*
-	 * cache last used pipe for splice
-	 */
-	struct pipe_inode_info *splice_pipe;
+	/* Cache last used pipe for splice(): */
+	struct pipe_inode_info		*splice_pipe;
 
-	struct page_frag task_frag;
+	struct page_frag		task_frag;
 
 #ifdef CONFIG_TASK_DELAY_ACCT
 	struct task_delay_info		*delays;
 #endif
 
 #ifdef CONFIG_FAULT_INJECTION
-	int make_it_fail;
+	int				make_it_fail;
 #endif
 	/*
-	 * when (nr_dirtied >= nr_dirtied_pause), it's time to call
-	 * balance_dirty_pages() for some dirty throttling pause
+	 * When (nr_dirtied >= nr_dirtied_pause), it's time to call
+	 * balance_dirty_pages() for a dirty throttling pause:
 	 */
-	int nr_dirtied;
-	int nr_dirtied_pause;
-	unsigned long dirty_paused_when; /* start of a write-and-pause period */
+	int				nr_dirtied;
+	int				nr_dirtied_pause;
+	/* Start of a write-and-pause period: */
+	unsigned long			dirty_paused_when;
 
 #ifdef CONFIG_LATENCYTOP
-	int latency_record_count;
-	struct latency_record latency_record[LT_SAVECOUNT];
+	int				latency_record_count;
+	struct latency_record		latency_record[LT_SAVECOUNT];
 #endif
 	/*
-	 * time slack values; these are used to round up poll() and
+	 * Time slack values; these are used to round up poll() and
 	 * select() etc timeout values. These are in nanoseconds.
 	 */
-	u64 timer_slack_ns;
-	u64 default_timer_slack_ns;
+	u64				timer_slack_ns;
+	u64				default_timer_slack_ns;
 
 #ifdef CONFIG_KASAN
-	unsigned int kasan_depth;
+	unsigned int			kasan_depth;
 #endif
+
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	/* Index of current stored address in ret_stack */
-	int curr_ret_stack;
-	/* Stack of return addresses for return function tracing */
-	struct ftrace_ret_stack	*ret_stack;
-	/* time stamp for last schedule */
-	unsigned long long ftrace_timestamp;
+	/* Index of current stored address in ret_stack: */
+	int				curr_ret_stack;
+
+	/* Stack of return addresses for return function tracing: */
+	struct ftrace_ret_stack		*ret_stack;
+
+	/* Timestamp for last schedule: */
+	unsigned long long		ftrace_timestamp;
+
 	/*
 	 * Number of functions that haven't been traced
-	 * because of depth overrun.
+	 * because of depth overrun:
 	 */
-	atomic_t trace_overrun;
-	/* Pause for the tracing */
-	atomic_t tracing_graph_pause;
+	atomic_t			trace_overrun;
+
+	/* Pause tracing: */
+	atomic_t			tracing_graph_pause;
 #endif
+
 #ifdef CONFIG_TRACING
-	/* state flags for use by tracers */
-	unsigned long trace;
-	/* bitmask and counter of trace recursion */
-	unsigned long trace_recursion;
+	/* State flags for use by tracers: */
+	unsigned long			trace;
+
+	/* Bitmask and counter of trace recursion: */
+	unsigned long			trace_recursion;
 #endif /* CONFIG_TRACING */
+
 #ifdef CONFIG_KCOV
-	/* Coverage collection mode enabled for this task (0 if disabled). */
-	enum kcov_mode kcov_mode;
-	/* Size of the kcov_area. */
-	unsigned	kcov_size;
-	/* Buffer for coverage collection. */
-	void		*kcov_area;
-	/* kcov desciptor wired with this task or NULL. */
-	struct kcov	*kcov;
+	/* Coverage collection mode enabled for this task (0 if disabled): */
+	enum kcov_mode			kcov_mode;
+
+	/* Size of the kcov_area: */
+	unsigned int			kcov_size;
+
+	/* Buffer for coverage collection: */
+	void				*kcov_area;
+
+	/* KCOV descriptor wired with this task or NULL: */
+	struct kcov			*kcov;
 #endif
+
 #ifdef CONFIG_MEMCG
-	struct mem_cgroup *memcg_in_oom;
-	gfp_t memcg_oom_gfp_mask;
-	int memcg_oom_order;
+	struct mem_cgroup		*memcg_in_oom;
+	gfp_t				memcg_oom_gfp_mask;
+	int				memcg_oom_order;
 
-	/* number of pages to reclaim on returning to userland */
-	unsigned int memcg_nr_pages_over_high;
+	/* Number of pages to reclaim on returning to userland: */
+	unsigned int			memcg_nr_pages_over_high;
 #endif
+
 #ifdef CONFIG_UPROBES
-	struct uprobe_task *utask;
+	struct uprobe_task		*utask;
 #endif
 #if defined(CONFIG_BCACHE) || defined(CONFIG_BCACHE_MODULE)
-	unsigned int	sequential_io;
-	unsigned int	sequential_io_avg;
+	unsigned int			sequential_io;
+	unsigned int			sequential_io_avg;
 #endif
 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
-	unsigned long	task_state_change;
+	unsigned long			task_state_change;
 #endif
-	int pagefault_disabled;
+	int				pagefault_disabled;
 #ifdef CONFIG_MMU
-	struct task_struct *oom_reaper_list;
+	struct task_struct		*oom_reaper_list;
 #endif
 #ifdef CONFIG_VMAP_STACK
-	struct vm_struct *stack_vm_area;
+	struct vm_struct		*stack_vm_area;
 #endif
 #ifdef CONFIG_THREAD_INFO_IN_TASK
-	/* A live task holds one reference. */
-	atomic_t stack_refcount;
+	/* A live task holds one reference: */
+	atomic_t			stack_refcount;
 #endif
-/* CPU-specific state of this task */
-	struct thread_struct thread;
-/*
- * WARNING: on x86, 'thread_struct' contains a variable-sized
- * structure.  It *MUST* be at the end of 'task_struct'.
- *
- * Do not put anything below here!
- */
+	/* CPU-specific state of this task: */
+	struct thread_struct		thread;
+
+	/*
+	 * WARNING: on x86, 'thread_struct' contains a variable-sized
+	 * structure.  It *MUST* be at the end of 'task_struct'.
+	 *
+	 * Do not put anything below here!
+	 */
 };
 
 static inline struct pid *task_pid(struct task_struct *task)
@@ -975,7 +1060,7 @@ static inline struct pid *task_tgid(struct task_struct *task)
 }
 
 /*
- * Without tasklist or rcu lock it is not safe to dereference
+ * Without tasklist or RCU lock it is not safe to dereference
  * the result of task_pgrp/task_session even if task == current,
  * we can race with another thread doing sys_setsid/sys_setpgid.
  */
@@ -1002,16 +1087,14 @@ static inline struct pid *task_session(struct task_struct *task)
  *
  * see also pid_nr() etc in include/linux/pid.h
  */
-pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type,
-			struct pid_namespace *ns);
+pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type, struct pid_namespace *ns);
 
 static inline pid_t task_pid_nr(struct task_struct *tsk)
 {
 	return tsk->pid;
 }
 
-static inline pid_t task_pid_nr_ns(struct task_struct *tsk,
-					struct pid_namespace *ns)
+static inline pid_t task_pid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
 {
 	return __task_pid_nr_ns(tsk, PIDTYPE_PID, ns);
 }
@@ -1027,15 +1110,28 @@ static inline pid_t task_tgid_nr(struct task_struct *tsk)
 	return tsk->tgid;
 }
 
-pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns);
+extern pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns);
 
 static inline pid_t task_tgid_vnr(struct task_struct *tsk)
 {
 	return pid_vnr(task_tgid(tsk));
 }
 
+/**
+ * pid_alive - check that a task structure is not stale
+ * @p: Task structure to be checked.
+ *
+ * Test if a process is not yet dead (at most zombie state)
+ * If pid_alive fails, then pointers within the task structure
+ * can be stale and must not be dereferenced.
+ *
+ * Return: 1 if the process is alive. 0 otherwise.
+ */
+static inline int pid_alive(const struct task_struct *p)
+{
+	return p->pids[PIDTYPE_PID].pid != NULL;
+}
 
-static inline int pid_alive(const struct task_struct *p);
 static inline pid_t task_ppid_nr_ns(const struct task_struct *tsk, struct pid_namespace *ns)
 {
 	pid_t pid = 0;
@@ -1053,8 +1149,7 @@ static inline pid_t task_ppid_nr(const struct task_struct *tsk)
 	return task_ppid_nr_ns(tsk, &init_pid_ns);
 }
 
-static inline pid_t task_pgrp_nr_ns(struct task_struct *tsk,
-					struct pid_namespace *ns)
+static inline pid_t task_pgrp_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
 {
 	return __task_pid_nr_ns(tsk, PIDTYPE_PGID, ns);
 }
@@ -1065,8 +1160,7 @@ static inline pid_t task_pgrp_vnr(struct task_struct *tsk)
 }
 
 
-static inline pid_t task_session_nr_ns(struct task_struct *tsk,
-					struct pid_namespace *ns)
+static inline pid_t task_session_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
 {
 	return __task_pid_nr_ns(tsk, PIDTYPE_SID, ns);
 }
@@ -1076,27 +1170,12 @@ static inline pid_t task_session_vnr(struct task_struct *tsk)
 	return __task_pid_nr_ns(tsk, PIDTYPE_SID, NULL);
 }
 
-/* obsolete, do not use */
+/* Obsolete, do not use: */
 static inline pid_t task_pgrp_nr(struct task_struct *tsk)
 {
 	return task_pgrp_nr_ns(tsk, &init_pid_ns);
 }
 
-/**
- * pid_alive - check that a task structure is not stale
- * @p: Task structure to be checked.
- *
- * Test if a process is not yet dead (at most zombie state)
- * If pid_alive fails, then pointers within the task structure
- * can be stale and must not be dereferenced.
- *
- * Return: 1 if the process is alive. 0 otherwise.
- */
-static inline int pid_alive(const struct task_struct *p)
-{
-	return p->pids[PIDTYPE_PID].pid != NULL;
-}
-
 /**
  * is_global_init - check if a task structure is init. Since init
  * is free to have sub-threads we need to check tgid.
@@ -1116,34 +1195,34 @@ extern struct pid *cad_pid;
 /*
  * Per process flags
  */
-#define PF_IDLE		0x00000002	/* I am an IDLE thread */
-#define PF_EXITING	0x00000004	/* getting shut down */
-#define PF_EXITPIDONE	0x00000008	/* pi exit done on shut down */
-#define PF_VCPU		0x00000010	/* I'm a virtual CPU */
-#define PF_WQ_WORKER	0x00000020	/* I'm a workqueue worker */
-#define PF_FORKNOEXEC	0x00000040	/* forked but didn't exec */
-#define PF_MCE_PROCESS  0x00000080      /* process policy on mce errors */
-#define PF_SUPERPRIV	0x00000100	/* used super-user privileges */
-#define PF_DUMPCORE	0x00000200	/* dumped core */
-#define PF_SIGNALED	0x00000400	/* killed by a signal */
-#define PF_MEMALLOC	0x00000800	/* Allocating memory */
-#define PF_NPROC_EXCEEDED 0x00001000	/* set_user noticed that RLIMIT_NPROC was exceeded */
-#define PF_USED_MATH	0x00002000	/* if unset the fpu must be initialized before use */
-#define PF_USED_ASYNC	0x00004000	/* used async_schedule*(), used by module init */
-#define PF_NOFREEZE	0x00008000	/* this thread should not be frozen */
-#define PF_FROZEN	0x00010000	/* frozen for system suspend */
-#define PF_FSTRANS	0x00020000	/* inside a filesystem transaction */
-#define PF_KSWAPD	0x00040000	/* I am kswapd */
-#define PF_MEMALLOC_NOIO 0x00080000	/* Allocating memory without IO involved */
-#define PF_LESS_THROTTLE 0x00100000	/* Throttle me less: I clean memory */
-#define PF_KTHREAD	0x00200000	/* I am a kernel thread */
-#define PF_RANDOMIZE	0x00400000	/* randomize virtual address space */
-#define PF_SWAPWRITE	0x00800000	/* Allowed to write to swap */
-#define PF_NO_SETAFFINITY 0x04000000	/* Userland is not allowed to meddle with cpus_allowed */
-#define PF_MCE_EARLY    0x08000000      /* Early kill for mce process policy */
-#define PF_MUTEX_TESTER	0x20000000	/* Thread belongs to the rt mutex tester */
-#define PF_FREEZER_SKIP	0x40000000	/* Freezer should not count it as freezable */
-#define PF_SUSPEND_TASK 0x80000000      /* this thread called freeze_processes and should not be frozen */
+#define PF_IDLE			0x00000002	/* I am an IDLE thread */
+#define PF_EXITING		0x00000004	/* Getting shut down */
+#define PF_EXITPIDONE		0x00000008	/* PI exit done on shut down */
+#define PF_VCPU			0x00000010	/* I'm a virtual CPU */
+#define PF_WQ_WORKER		0x00000020	/* I'm a workqueue worker */
+#define PF_FORKNOEXEC		0x00000040	/* Forked but didn't exec */
+#define PF_MCE_PROCESS		0x00000080      /* Process policy on mce errors */
+#define PF_SUPERPRIV		0x00000100	/* Used super-user privileges */
+#define PF_DUMPCORE		0x00000200	/* Dumped core */
+#define PF_SIGNALED		0x00000400	/* Killed by a signal */
+#define PF_MEMALLOC		0x00000800	/* Allocating memory */
+#define PF_NPROC_EXCEEDED	0x00001000	/* set_user() noticed that RLIMIT_NPROC was exceeded */
+#define PF_USED_MATH		0x00002000	/* If unset the fpu must be initialized before use */
+#define PF_USED_ASYNC		0x00004000	/* Used async_schedule*(), used by module init */
+#define PF_NOFREEZE		0x00008000	/* This thread should not be frozen */
+#define PF_FROZEN		0x00010000	/* Frozen for system suspend */
+#define PF_FSTRANS		0x00020000	/* Inside a filesystem transaction */
+#define PF_KSWAPD		0x00040000	/* I am kswapd */
+#define PF_MEMALLOC_NOIO	0x00080000	/* Allocating memory without IO involved */
+#define PF_LESS_THROTTLE	0x00100000	/* Throttle me less: I clean memory */
+#define PF_KTHREAD		0x00200000	/* I am a kernel thread */
+#define PF_RANDOMIZE		0x00400000	/* Randomize virtual address space */
+#define PF_SWAPWRITE		0x00800000	/* Allowed to write to swap */
+#define PF_NO_SETAFFINITY	0x04000000	/* Userland is not allowed to meddle with cpus_allowed */
+#define PF_MCE_EARLY		0x08000000      /* Early kill for mce process policy */
+#define PF_MUTEX_TESTER		0x20000000	/* Thread belongs to the rt mutex tester */
+#define PF_FREEZER_SKIP		0x40000000	/* Freezer should not count it as freezable */
+#define PF_SUSPEND_TASK		0x80000000      /* This thread called freeze_processes() and should not be frozen */
 
 /*
  * Only the _current_ task can read/write to tsk->flags, but other
@@ -1156,33 +1235,38 @@ extern struct pid *cad_pid;
  * child is not running and in turn not changing child->flags
  * at the same time the parent does it.
  */
-#define clear_stopped_child_used_math(child) do { (child)->flags &= ~PF_USED_MATH; } while (0)
-#define set_stopped_child_used_math(child) do { (child)->flags |= PF_USED_MATH; } while (0)
-#define clear_used_math() clear_stopped_child_used_math(current)
-#define set_used_math() set_stopped_child_used_math(current)
+#define clear_stopped_child_used_math(child)	do { (child)->flags &= ~PF_USED_MATH; } while (0)
+#define set_stopped_child_used_math(child)	do { (child)->flags |= PF_USED_MATH; } while (0)
+#define clear_used_math()			clear_stopped_child_used_math(current)
+#define set_used_math()				set_stopped_child_used_math(current)
+
 #define conditional_stopped_child_used_math(condition, child) \
 	do { (child)->flags &= ~PF_USED_MATH, (child)->flags |= (condition) ? PF_USED_MATH : 0; } while (0)
-#define conditional_used_math(condition) \
-	conditional_stopped_child_used_math(condition, current)
+
+#define conditional_used_math(condition)	conditional_stopped_child_used_math(condition, current)
+
 #define copy_to_stopped_child_used_math(child) \
 	do { (child)->flags &= ~PF_USED_MATH, (child)->flags |= current->flags & PF_USED_MATH; } while (0)
+
 /* NOTE: this will return 0 or PF_USED_MATH, it will never return 1 */
-#define tsk_used_math(p) ((p)->flags & PF_USED_MATH)
-#define used_math() tsk_used_math(current)
+#define tsk_used_math(p)			((p)->flags & PF_USED_MATH)
+#define used_math()				tsk_used_math(current)
 
 /* Per-process atomic flags. */
-#define PFA_NO_NEW_PRIVS 0	/* May not gain new privileges. */
-#define PFA_SPREAD_PAGE  1      /* Spread page cache over cpuset */
-#define PFA_SPREAD_SLAB  2      /* Spread some slab caches over cpuset */
-#define PFA_LMK_WAITING  3      /* Lowmemorykiller is waiting */
+#define PFA_NO_NEW_PRIVS		0	/* May not gain new privileges. */
+#define PFA_SPREAD_PAGE			1	/* Spread page cache over cpuset */
+#define PFA_SPREAD_SLAB			2	/* Spread some slab caches over cpuset */
+#define PFA_LMK_WAITING			3	/* Lowmemorykiller is waiting */
 
 
 #define TASK_PFA_TEST(name, func)					\
 	static inline bool task_##func(struct task_struct *p)		\
 	{ return test_bit(PFA_##name, &p->atomic_flags); }
+
 #define TASK_PFA_SET(name, func)					\
 	static inline void task_set_##func(struct task_struct *p)	\
 	{ set_bit(PFA_##name, &p->atomic_flags); }
+
 #define TASK_PFA_CLEAR(name, func)					\
 	static inline void task_clear_##func(struct task_struct *p)	\
 	{ clear_bit(PFA_##name, &p->atomic_flags); }
@@ -1201,30 +1285,23 @@ TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab)
 TASK_PFA_TEST(LMK_WAITING, lmk_waiting)
 TASK_PFA_SET(LMK_WAITING, lmk_waiting)
 
-static inline void tsk_restore_flags(struct task_struct *task,
-				unsigned long orig_flags, unsigned long flags)
+static inline void
+tsk_restore_flags(struct task_struct *task, unsigned long orig_flags, unsigned long flags)
 {
 	task->flags &= ~flags;
 	task->flags |= orig_flags & flags;
 }
 
-extern int cpuset_cpumask_can_shrink(const struct cpumask *cur,
-				     const struct cpumask *trial);
-extern int task_can_attach(struct task_struct *p,
-			   const struct cpumask *cs_cpus_allowed);
+extern int cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial);
+extern int task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_allowed);
 #ifdef CONFIG_SMP
-extern void do_set_cpus_allowed(struct task_struct *p,
-			       const struct cpumask *new_mask);
-
-extern int set_cpus_allowed_ptr(struct task_struct *p,
-				const struct cpumask *new_mask);
+extern void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask);
+extern int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask);
 #else
-static inline void do_set_cpus_allowed(struct task_struct *p,
-				      const struct cpumask *new_mask)
+static inline void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
 {
 }
-static inline int set_cpus_allowed_ptr(struct task_struct *p,
-				       const struct cpumask *new_mask)
+static inline int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
 {
 	if (!cpumask_test_cpu(0, new_mask))
 		return -EINVAL;
@@ -1239,6 +1316,7 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p,
 extern int yield_to(struct task_struct *p, bool preempt);
 extern void set_user_nice(struct task_struct *p, long nice);
 extern int task_prio(const struct task_struct *p);
+
 /**
  * task_nice - return the nice value of a given task.
  * @p: the task in question.
@@ -1249,16 +1327,15 @@ static inline int task_nice(const struct task_struct *p)
 {
 	return PRIO_TO_NICE((p)->static_prio);
 }
+
 extern int can_nice(const struct task_struct *p, const int nice);
 extern int task_curr(const struct task_struct *p);
 extern int idle_cpu(int cpu);
-extern int sched_setscheduler(struct task_struct *, int,
-			      const struct sched_param *);
-extern int sched_setscheduler_nocheck(struct task_struct *, int,
-				      const struct sched_param *);
-extern int sched_setattr(struct task_struct *,
-			 const struct sched_attr *);
+extern int sched_setscheduler(struct task_struct *, int, const struct sched_param *);
+extern int sched_setscheduler_nocheck(struct task_struct *, int, const struct sched_param *);
+extern int sched_setattr(struct task_struct *, const struct sched_attr *);
 extern struct task_struct *idle_task(int cpu);
+
 /**
  * is_idle_task - is the specified task an idle task?
  * @p: the task in question.
@@ -1269,6 +1346,7 @@ static inline bool is_idle_task(const struct task_struct *p)
 {
 	return !!(p->flags & PF_IDLE);
 }
+
 extern struct task_struct *curr_task(int cpu);
 extern void ia64_set_curr_task(int cpu, struct task_struct *p);
 
@@ -1302,23 +1380,25 @@ static inline struct thread_info *task_thread_info(struct task_struct *task)
  */
 
 extern struct task_struct *find_task_by_vpid(pid_t nr);
-extern struct task_struct *find_task_by_pid_ns(pid_t nr,
-		struct pid_namespace *ns);
+extern struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns);
 
 extern int wake_up_state(struct task_struct *tsk, unsigned int state);
 extern int wake_up_process(struct task_struct *tsk);
 extern void wake_up_new_task(struct task_struct *tsk);
+
 #ifdef CONFIG_SMP
- extern void kick_process(struct task_struct *tsk);
+extern void kick_process(struct task_struct *tsk);
 #else
- static inline void kick_process(struct task_struct *tsk) { }
+static inline void kick_process(struct task_struct *tsk) { }
 #endif
 
 extern void __set_task_comm(struct task_struct *tsk, const char *from, bool exec);
+
 static inline void set_task_comm(struct task_struct *tsk, const char *from)
 {
 	__set_task_comm(tsk, from, false);
 }
+
 extern char *get_task_comm(char *to, struct task_struct *tsk);
 
 #ifdef CONFIG_SMP
@@ -1326,15 +1406,15 @@ void scheduler_ipi(void);
 extern unsigned long wait_task_inactive(struct task_struct *, long match_state);
 #else
 static inline void scheduler_ipi(void) { }
-static inline unsigned long wait_task_inactive(struct task_struct *p,
-					       long match_state)
+static inline unsigned long wait_task_inactive(struct task_struct *p, long match_state)
 {
 	return 1;
 }
 #endif
 
-/* set thread flags in other task's structures
- * - see asm/thread_info.h for TIF_xxxx flags available
+/*
+ * Set thread flags in other task's structures.
+ * See asm/thread_info.h for TIF_xxxx flags available:
  */
 static inline void set_tsk_thread_flag(struct task_struct *tsk, int flag)
 {

From d3aa9c9f212a729e46653d4c1eb6a9ab190efe3a Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Thu, 15 Dec 2016 15:20:34 +0100
Subject: [PATCH 0696/1911] ixgbe: update the rss key on h/w, when ethtool ask
 for it

Currently ixgbe_set_rxfh() updates the rss_key copy in the driver
memory, but does not push the new value into the h/w. This commit
add a new helper for the latter operation and call it in
ixgbe_set_rxfh(), so that the h/w rss key value can be really
updated via ethtool.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe.h      |  1 +
 .../net/ethernet/intel/ixgbe/ixgbe_ethtool.c  |  4 +++-
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 19 ++++++++++++++++---
 3 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index a2cc43d2888801..7a951b11682152 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -929,6 +929,7 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb,
 				  struct ixgbe_adapter *adapter,
 				  struct ixgbe_ring *tx_ring);
 u32 ixgbe_rss_indir_tbl_entries(struct ixgbe_adapter *adapter);
+void ixgbe_store_key(struct ixgbe_adapter *adapter);
 void ixgbe_store_reta(struct ixgbe_adapter *adapter);
 s32 ixgbe_negotiate_fc(struct ixgbe_hw *hw, u32 adv_reg, u32 lp_reg,
 		       u32 adv_sym, u32 adv_asm, u32 lp_sym, u32 lp_asm);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
index a7574c7b12af06..90fa5bf23d1b5f 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
@@ -2998,8 +2998,10 @@ static int ixgbe_set_rxfh(struct net_device *netdev, const u32 *indir,
 	}
 
 	/* Fill out the rss hash key */
-	if (key)
+	if (key) {
 		memcpy(adapter->rss_key, key, ixgbe_get_rxfh_key_size(netdev));
+		ixgbe_store_key(adapter);
+	}
 
 	ixgbe_store_reta(adapter);
 
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 060cdce8058f9b..67ab13fd163c16 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -3473,6 +3473,21 @@ u32 ixgbe_rss_indir_tbl_entries(struct ixgbe_adapter *adapter)
 		return 512;
 }
 
+/**
+ * ixgbe_store_key - Write the RSS key to HW
+ * @adapter: device handle
+ *
+ * Write the RSS key stored in adapter.rss_key to HW.
+ */
+void ixgbe_store_key(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	int i;
+
+	for (i = 0; i < 10; i++)
+		IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), adapter->rss_key[i]);
+}
+
 /**
  * ixgbe_store_reta - Write the RETA table to HW
  * @adapter: device handle
@@ -3538,7 +3553,6 @@ static void ixgbe_store_vfreta(struct ixgbe_adapter *adapter)
 
 static void ixgbe_setup_reta(struct ixgbe_adapter *adapter)
 {
-	struct ixgbe_hw *hw = &adapter->hw;
 	u32 i, j;
 	u32 reta_entries = ixgbe_rss_indir_tbl_entries(adapter);
 	u16 rss_i = adapter->ring_feature[RING_F_RSS].indices;
@@ -3551,8 +3565,7 @@ static void ixgbe_setup_reta(struct ixgbe_adapter *adapter)
 		rss_i = 4;
 
 	/* Fill out hash function seeds */
-	for (i = 0; i < 10; i++)
-		IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), adapter->rss_key[i]);
+	ixgbe_store_key(adapter);
 
 	/* Fill out redirection table */
 	memset(adapter->rss_indir_tbl, 0, sizeof(adapter->rss_indir_tbl));

From a528d35e8bfcc521d7cb70aaf03e1bd296c8493f Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 31 Jan 2017 16:46:22 +0000
Subject: [PATCH 0697/1911] statx: Add a system call to make enhanced file info
 available

Add a system call to make extended file information available, including
file creation and some attribute flags where available through the
underlying filesystem.

The getattr inode operation is altered to take two additional arguments: a
u32 request_mask and an unsigned int flags that indicate the
synchronisation mode.  This change is propagated to the vfs_getattr*()
function.

Functions like vfs_stat() are now inline wrappers around new functions
vfs_statx() and vfs_statx_fd() to reduce stack usage.

========
OVERVIEW
========

The idea was initially proposed as a set of xattrs that could be retrieved
with getxattr(), but the general preference proved to be for a new syscall
with an extended stat structure.

A number of requests were gathered for features to be included.  The
following have been included:

 (1) Make the fields a consistent size on all arches and make them large.

 (2) Spare space, request flags and information flags are provided for
     future expansion.

 (3) Better support for the y2038 problem [Arnd Bergmann] (tv_sec is an
     __s64).

 (4) Creation time: The SMB protocol carries the creation time, which could
     be exported by Samba, which will in turn help CIFS make use of
     FS-Cache as that can be used for coherency data (stx_btime).

     This is also specified in NFSv4 as a recommended attribute and could
     be exported by NFSD [Steve French].

 (5) Lightweight stat: Ask for just those details of interest, and allow a
     netfs (such as NFS) to approximate anything not of interest, possibly
     without going to the server [Trond Myklebust, Ulrich Drepper, Andreas
     Dilger] (AT_STATX_DONT_SYNC).

 (6) Heavyweight stat: Force a netfs to go to the server, even if it thinks
     its cached attributes are up to date [Trond Myklebust]
     (AT_STATX_FORCE_SYNC).

And the following have been left out for future extension:

 (7) Data version number: Could be used by userspace NFS servers [Aneesh
     Kumar].

     Can also be used to modify fill_post_wcc() in NFSD which retrieves
     i_version directly, but has just called vfs_getattr().  It could get
     it from the kstat struct if it used vfs_xgetattr() instead.

     (There's disagreement on the exact semantics of a single field, since
     not all filesystems do this the same way).

 (8) BSD stat compatibility: Including more fields from the BSD stat such
     as creation time (st_btime) and inode generation number (st_gen)
     [Jeremy Allison, Bernd Schubert].

 (9) Inode generation number: Useful for FUSE and userspace NFS servers
     [Bernd Schubert].

     (This was asked for but later deemed unnecessary with the
     open-by-handle capability available and caused disagreement as to
     whether it's a security hole or not).

(10) Extra coherency data may be useful in making backups [Andreas Dilger].

     (No particular data were offered, but things like last backup
     timestamp, the data version number and the DOS archive bit would come
     into this category).

(11) Allow the filesystem to indicate what it can/cannot provide: A
     filesystem can now say it doesn't support a standard stat feature if
     that isn't available, so if, for instance, inode numbers or UIDs don't
     exist or are fabricated locally...

     (This requires a separate system call - I have an fsinfo() call idea
     for this).

(12) Store a 16-byte volume ID in the superblock that can be returned in
     struct xstat [Steve French].

     (Deferred to fsinfo).

(13) Include granularity fields in the time data to indicate the
     granularity of each of the times (NFSv4 time_delta) [Steve French].

     (Deferred to fsinfo).

(14) FS_IOC_GETFLAGS value.  These could be translated to BSD's st_flags.
     Note that the Linux IOC flags are a mess and filesystems such as Ext4
     define flags that aren't in linux/fs.h, so translation in the kernel
     may be a necessity (or, possibly, we provide the filesystem type too).

     (Some attributes are made available in stx_attributes, but the general
     feeling was that the IOC flags were to ext[234]-specific and shouldn't
     be exposed through statx this way).

(15) Mask of features available on file (eg: ACLs, seclabel) [Brad Boyer,
     Michael Kerrisk].

     (Deferred, probably to fsinfo.  Finding out if there's an ACL or
     seclabal might require extra filesystem operations).

(16) Femtosecond-resolution timestamps [Dave Chinner].

     (A __reserved field has been left in the statx_timestamp struct for
     this - if there proves to be a need).

(17) A set multiple attributes syscall to go with this.

===============
NEW SYSTEM CALL
===============

The new system call is:

	int ret = statx(int dfd,
			const char *filename,
			unsigned int flags,
			unsigned int mask,
			struct statx *buffer);

The dfd, filename and flags parameters indicate the file to query, in a
similar way to fstatat().  There is no equivalent of lstat() as that can be
emulated with statx() by passing AT_SYMLINK_NOFOLLOW in flags.  There is
also no equivalent of fstat() as that can be emulated by passing a NULL
filename to statx() with the fd of interest in dfd.

Whether or not statx() synchronises the attributes with the backing store
can be controlled by OR'ing a value into the flags argument (this typically
only affects network filesystems):

 (1) AT_STATX_SYNC_AS_STAT tells statx() to behave as stat() does in this
     respect.

 (2) AT_STATX_FORCE_SYNC will require a network filesystem to synchronise
     its attributes with the server - which might require data writeback to
     occur to get the timestamps correct.

 (3) AT_STATX_DONT_SYNC will suppress synchronisation with the server in a
     network filesystem.  The resulting values should be considered
     approximate.

mask is a bitmask indicating the fields in struct statx that are of
interest to the caller.  The user should set this to STATX_BASIC_STATS to
get the basic set returned by stat().  It should be noted that asking for
more information may entail extra I/O operations.

buffer points to the destination for the data.  This must be 256 bytes in
size.

======================
MAIN ATTRIBUTES RECORD
======================

The following structures are defined in which to return the main attribute
set:

	struct statx_timestamp {
		__s64	tv_sec;
		__s32	tv_nsec;
		__s32	__reserved;
	};

	struct statx {
		__u32	stx_mask;
		__u32	stx_blksize;
		__u64	stx_attributes;
		__u32	stx_nlink;
		__u32	stx_uid;
		__u32	stx_gid;
		__u16	stx_mode;
		__u16	__spare0[1];
		__u64	stx_ino;
		__u64	stx_size;
		__u64	stx_blocks;
		__u64	__spare1[1];
		struct statx_timestamp	stx_atime;
		struct statx_timestamp	stx_btime;
		struct statx_timestamp	stx_ctime;
		struct statx_timestamp	stx_mtime;
		__u32	stx_rdev_major;
		__u32	stx_rdev_minor;
		__u32	stx_dev_major;
		__u32	stx_dev_minor;
		__u64	__spare2[14];
	};

The defined bits in request_mask and stx_mask are:

	STATX_TYPE		Want/got stx_mode & S_IFMT
	STATX_MODE		Want/got stx_mode & ~S_IFMT
	STATX_NLINK		Want/got stx_nlink
	STATX_UID		Want/got stx_uid
	STATX_GID		Want/got stx_gid
	STATX_ATIME		Want/got stx_atime{,_ns}
	STATX_MTIME		Want/got stx_mtime{,_ns}
	STATX_CTIME		Want/got stx_ctime{,_ns}
	STATX_INO		Want/got stx_ino
	STATX_SIZE		Want/got stx_size
	STATX_BLOCKS		Want/got stx_blocks
	STATX_BASIC_STATS	[The stuff in the normal stat struct]
	STATX_BTIME		Want/got stx_btime{,_ns}
	STATX_ALL		[All currently available stuff]

stx_btime is the file creation time, stx_mask is a bitmask indicating the
data provided and __spares*[] are where as-yet undefined fields can be
placed.

Time fields are structures with separate seconds and nanoseconds fields
plus a reserved field in case we want to add even finer resolution.  Note
that times will be negative if before 1970; in such a case, the nanosecond
fields will also be negative if not zero.

The bits defined in the stx_attributes field convey information about a
file, how it is accessed, where it is and what it does.  The following
attributes map to FS_*_FL flags and are the same numerical value:

	STATX_ATTR_COMPRESSED		File is compressed by the fs
	STATX_ATTR_IMMUTABLE		File is marked immutable
	STATX_ATTR_APPEND		File is append-only
	STATX_ATTR_NODUMP		File is not to be dumped
	STATX_ATTR_ENCRYPTED		File requires key to decrypt in fs

Within the kernel, the supported flags are listed by:

	KSTAT_ATTR_FS_IOC_FLAGS

[Are any other IOC flags of sufficient general interest to be exposed
through this interface?]

New flags include:

	STATX_ATTR_AUTOMOUNT		Object is an automount trigger

These are for the use of GUI tools that might want to mark files specially,
depending on what they are.

Fields in struct statx come in a number of classes:

 (0) stx_dev_*, stx_blksize.

     These are local system information and are always available.

 (1) stx_mode, stx_nlinks, stx_uid, stx_gid, stx_[amc]time, stx_ino,
     stx_size, stx_blocks.

     These will be returned whether the caller asks for them or not.  The
     corresponding bits in stx_mask will be set to indicate whether they
     actually have valid values.

     If the caller didn't ask for them, then they may be approximated.  For
     example, NFS won't waste any time updating them from the server,
     unless as a byproduct of updating something requested.

     If the values don't actually exist for the underlying object (such as
     UID or GID on a DOS file), then the bit won't be set in the stx_mask,
     even if the caller asked for the value.  In such a case, the returned
     value will be a fabrication.

     Note that there are instances where the type might not be valid, for
     instance Windows reparse points.

 (2) stx_rdev_*.

     This will be set only if stx_mode indicates we're looking at a
     blockdev or a chardev, otherwise will be 0.

 (3) stx_btime.

     Similar to (1), except this will be set to 0 if it doesn't exist.

=======
TESTING
=======

The following test program can be used to test the statx system call:

	samples/statx/test-statx.c

Just compile and run, passing it paths to the files you want to examine.
The file is built automatically if CONFIG_SAMPLES is enabled.

Here's some example output.  Firstly, an NFS directory that crosses to
another FSID.  Note that the AUTOMOUNT attribute is set because transiting
this directory will cause d_automount to be invoked by the VFS.

	[root@andromeda ~]# /tmp/test-statx -A /warthog/data
	statx(/warthog/data) = 0
	results=7ff
	  Size: 4096            Blocks: 8          IO Block: 1048576  directory
	Device: 00:26           Inode: 1703937     Links: 125
	Access: (3777/drwxrwxrwx)  Uid:     0   Gid:  4041
	Access: 2016-11-24 09:02:12.219699527+0000
	Modify: 2016-11-17 10:44:36.225653653+0000
	Change: 2016-11-17 10:44:36.225653653+0000
	Attributes: 0000000000001000 (-------- -------- -------- -------- -------- -------- ---m---- --------)

Secondly, the result of automounting on that directory.

	[root@andromeda ~]# /tmp/test-statx /warthog/data
	statx(/warthog/data) = 0
	results=7ff
	  Size: 4096            Blocks: 8          IO Block: 1048576  directory
	Device: 00:27           Inode: 2           Links: 125
	Access: (3777/drwxrwxrwx)  Uid:     0   Gid:  4041
	Access: 2016-11-24 09:02:12.219699527+0000
	Modify: 2016-11-17 10:44:36.225653653+0000
	Change: 2016-11-17 10:44:36.225653653+0000

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 Documentation/filesystems/Locking             |   3 +-
 Documentation/filesystems/vfs.txt             |   3 +-
 arch/x86/entry/syscalls/syscall_32.tbl        |   1 +
 arch/x86/entry/syscalls/syscall_64.tbl        |   1 +
 drivers/base/devtmpfs.c                       |   3 +-
 drivers/block/loop.c                          |   3 +-
 drivers/mtd/ubi/build.c                       |   2 +-
 drivers/mtd/ubi/kapi.c                        |   2 +-
 drivers/staging/lustre/lustre/llite/file.c    |   9 +-
 .../lustre/lustre/llite/llite_internal.h      |   3 +-
 fs/9p/vfs_inode.c                             |  10 +-
 fs/9p/vfs_inode_dotl.c                        |   5 +-
 fs/afs/inode.c                                |   8 +-
 fs/afs/internal.h                             |   2 +-
 fs/bad_inode.c                                |   4 +-
 fs/btrfs/inode.c                              |   6 +-
 fs/ceph/inode.c                               |   6 +-
 fs/ceph/super.h                               |   4 +-
 fs/cifs/cifsfs.h                              |   2 +-
 fs/cifs/inode.c                               |   5 +-
 fs/coda/coda_linux.h                          |   2 +-
 fs/coda/inode.c                               |   7 +-
 fs/ecryptfs/inode.c                           |  13 +-
 fs/exportfs/expfs.c                           |   3 +-
 fs/ext4/ext4.h                                |   3 +-
 fs/ext4/inode.c                               |   6 +-
 fs/f2fs/f2fs.h                                |   4 +-
 fs/f2fs/file.c                                |   6 +-
 fs/fat/fat.h                                  |   4 +-
 fs/fat/file.c                                 |   5 +-
 fs/fuse/dir.c                                 |   6 +-
 fs/gfs2/inode.c                               |  11 +-
 fs/kernfs/inode.c                             |   8 +-
 fs/kernfs/kernfs-internal.h                   |   4 +-
 fs/libfs.c                                    |  12 +-
 fs/minix/inode.c                              |  11 +-
 fs/minix/minix.h                              |   2 +-
 fs/nfs/inode.c                                |  13 +-
 fs/nfs/namespace.c                            |   9 +-
 fs/nfsd/nfs4xdr.c                             |   4 +-
 fs/nfsd/vfs.h                                 |   3 +-
 fs/ocfs2/file.c                               |  11 +-
 fs/ocfs2/file.h                               |   4 +-
 fs/orangefs/inode.c                           |  13 +-
 fs/orangefs/orangefs-kernel.h                 |   5 +-
 fs/overlayfs/copy_up.c                        |   6 +-
 fs/overlayfs/dir.c                            |  10 +-
 fs/overlayfs/inode.c                          |   7 +-
 fs/proc/base.c                                |  12 +-
 fs/proc/generic.c                             |   6 +-
 fs/proc/internal.h                            |   2 +-
 fs/proc/proc_net.c                            |   6 +-
 fs/proc/proc_sysctl.c                         |   5 +-
 fs/proc/root.c                                |   6 +-
 fs/stat.c                                     | 214 ++++++++++++---
 fs/sysv/itree.c                               |   7 +-
 fs/sysv/sysv.h                                |   2 +-
 fs/ubifs/dir.c                                |   6 +-
 fs/ubifs/ubifs.h                              |   4 +-
 fs/udf/symlink.c                              |   5 +-
 fs/xfs/xfs_iops.c                             |   9 +-
 include/linux/fs.h                            |  35 ++-
 include/linux/nfs_fs.h                        |   2 +-
 include/linux/stat.h                          |  24 +-
 include/linux/syscalls.h                      |   3 +
 include/uapi/linux/fcntl.h                    |   5 +
 include/uapi/linux/stat.h                     | 131 +++++++++
 mm/shmem.c                                    |   6 +-
 samples/Kconfig                               |   6 +
 samples/Makefile                              |   2 +-
 samples/statx/Makefile                        |  10 +
 samples/statx/test-statx.c                    | 254 ++++++++++++++++++
 72 files changed, 822 insertions(+), 214 deletions(-)
 create mode 100644 samples/statx/Makefile
 create mode 100644 samples/statx/test-statx.c

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index ace63cd7af8c0d..fdcfdd79682a00 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -58,7 +58,8 @@ prototypes:
 	int (*permission) (struct inode *, int, unsigned int);
 	int (*get_acl)(struct inode *, int);
 	int (*setattr) (struct dentry *, struct iattr *);
-	int (*getattr) (struct vfsmount *, struct dentry *, struct kstat *);
+	int (*getattr) (const struct path *, struct dentry *, struct kstat *,
+			u32, unsigned int);
 	ssize_t (*listxattr) (struct dentry *, char *, size_t);
 	int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len);
 	void (*update_time)(struct inode *, struct timespec *, int);
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index b968084eeac14b..569211703721fe 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -382,7 +382,8 @@ struct inode_operations {
 	int (*permission) (struct inode *, int);
 	int (*get_acl)(struct inode *, int);
 	int (*setattr) (struct dentry *, struct iattr *);
-	int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *);
+	int (*getattr) (const struct path *, struct dentry *, struct kstat *,
+			u32, unsigned int);
 	ssize_t (*listxattr) (struct dentry *, char *, size_t);
 	void (*update_time)(struct inode *, struct timespec *, int);
 	int (*atomic_open)(struct inode *, struct dentry *, struct file *,
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 2b361854254414..9ba050fe47f30e 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -389,3 +389,4 @@
 380	i386	pkey_mprotect		sys_pkey_mprotect
 381	i386	pkey_alloc		sys_pkey_alloc
 382	i386	pkey_free		sys_pkey_free
+383	i386	statx			sys_statx
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index e93ef0b38db8e1..5aef183e2f85c5 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -338,6 +338,7 @@
 329	common	pkey_mprotect		sys_pkey_mprotect
 330	common	pkey_alloc		sys_pkey_alloc
 331	common	pkey_free		sys_pkey_free
+332	common	statx			sys_statx
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c
index 44a74cf1372c6e..d2fb9c8ed2057b 100644
--- a/drivers/base/devtmpfs.c
+++ b/drivers/base/devtmpfs.c
@@ -309,7 +309,8 @@ static int handle_remove(const char *nodename, struct device *dev)
 	if (d_really_is_positive(dentry)) {
 		struct kstat stat;
 		struct path p = {.mnt = parent.mnt, .dentry = dentry};
-		err = vfs_getattr(&p, &stat);
+		err = vfs_getattr(&p, &stat, STATX_TYPE | STATX_MODE,
+				  AT_STATX_SYNC_AS_STAT);
 		if (!err && dev_mynode(dev, d_inode(dentry), &stat)) {
 			struct iattr newattrs;
 			/*
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index eeb1db73f44e15..8f4051999741c5 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1175,7 +1175,8 @@ loop_get_status(struct loop_device *lo, struct loop_info64 *info)
 
 	if (lo->lo_state != Lo_bound)
 		return -ENXIO;
-	error = vfs_getattr(&file->f_path, &stat);
+	error = vfs_getattr(&file->f_path, &stat,
+			    STATX_INO, AT_STATX_SYNC_AS_STAT);
 	if (error)
 		return error;
 	memset(info, 0, sizeof(*info));
diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c
index 85d54f37e28ff2..77513195f50e39 100644
--- a/drivers/mtd/ubi/build.c
+++ b/drivers/mtd/ubi/build.c
@@ -1159,7 +1159,7 @@ static struct mtd_info * __init open_mtd_by_chdev(const char *mtd_dev)
 	if (err)
 		return ERR_PTR(err);
 
-	err = vfs_getattr(&path, &stat);
+	err = vfs_getattr(&path, &stat, STATX_TYPE, AT_STATX_SYNC_AS_STAT);
 	path_put(&path);
 	if (err)
 		return ERR_PTR(err);
diff --git a/drivers/mtd/ubi/kapi.c b/drivers/mtd/ubi/kapi.c
index 88b1897aeb40f7..d4b2e874449869 100644
--- a/drivers/mtd/ubi/kapi.c
+++ b/drivers/mtd/ubi/kapi.c
@@ -314,7 +314,7 @@ struct ubi_volume_desc *ubi_open_volume_path(const char *pathname, int mode)
 	if (error)
 		return ERR_PTR(error);
 
-	error = vfs_getattr(&path, &stat);
+	error = vfs_getattr(&path, &stat, STATX_TYPE, AT_STATX_SYNC_AS_STAT);
 	path_put(&path);
 	if (error)
 		return ERR_PTR(error);
diff --git a/drivers/staging/lustre/lustre/llite/file.c b/drivers/staging/lustre/lustre/llite/file.c
index 10adfcdd70354a..481c0d01d4c626 100644
--- a/drivers/staging/lustre/lustre/llite/file.c
+++ b/drivers/staging/lustre/lustre/llite/file.c
@@ -2952,15 +2952,16 @@ static int ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
 	return rc;
 }
 
-int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
+int ll_getattr(const struct path *path, struct kstat *stat,
+	       u32 request_mask, unsigned int flags)
 {
-	struct inode *inode = d_inode(de);
+	struct inode *inode = d_inode(path->dentry);
 	struct ll_sb_info *sbi = ll_i2sbi(inode);
 	struct ll_inode_info *lli = ll_i2info(inode);
 	int res;
 
-	res = ll_inode_revalidate(de, MDS_INODELOCK_UPDATE |
-				      MDS_INODELOCK_LOOKUP);
+	res = ll_inode_revalidate(path->dentry,
+				  MDS_INODELOCK_UPDATE | MDS_INODELOCK_LOOKUP);
 	ll_stats_ops_tally(sbi, LPROC_LL_GETATTR, 1);
 
 	if (res)
diff --git a/drivers/staging/lustre/lustre/llite/llite_internal.h b/drivers/staging/lustre/lustre/llite/llite_internal.h
index ecdfd0c29b7ff9..55f68acd85d1fc 100644
--- a/drivers/staging/lustre/lustre/llite/llite_internal.h
+++ b/drivers/staging/lustre/lustre/llite/llite_internal.h
@@ -750,7 +750,8 @@ int ll_file_open(struct inode *inode, struct file *file);
 int ll_file_release(struct inode *inode, struct file *file);
 int ll_release_openhandle(struct inode *, struct lookup_intent *);
 int ll_md_real_close(struct inode *inode, fmode_t fmode);
-int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat);
+int ll_getattr(const struct path *path, struct kstat *stat,
+	       u32 request_mask, unsigned int flags);
 struct posix_acl *ll_get_acl(struct inode *inode, int type);
 int ll_migrate(struct inode *parent, struct file *file, int mdtidx,
 	       const char *name, int namelen);
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index f4f4450119e42f..f1d96233670c7b 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -1047,16 +1047,18 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 
 /**
  * v9fs_vfs_getattr - retrieve file metadata
- * @mnt: mount information
- * @dentry: file to get attributes on
+ * @path: Object to query
  * @stat: metadata structure to populate
+ * @request_mask: Mask of STATX_xxx flags indicating the caller's interests
+ * @flags: AT_STATX_xxx setting
  *
  */
 
 static int
-v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
-		 struct kstat *stat)
+v9fs_vfs_getattr(const struct path *path, struct kstat *stat,
+		 u32 request_mask, unsigned int flags)
 {
+	struct dentry *dentry = path->dentry;
 	struct v9fs_session_info *v9ses;
 	struct p9_fid *fid;
 	struct p9_wstat *st;
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 5999bd050678cd..570e63ee5b713d 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -468,9 +468,10 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
 }
 
 static int
-v9fs_vfs_getattr_dotl(struct vfsmount *mnt, struct dentry *dentry,
-		 struct kstat *stat)
+v9fs_vfs_getattr_dotl(const struct path *path, struct kstat *stat,
+		 u32 request_mask, unsigned int flags)
 {
+	struct dentry *dentry = path->dentry;
 	struct v9fs_session_info *v9ses;
 	struct p9_fid *fid;
 	struct p9_stat_dotl *st;
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 86cc7264c21cda..1e4897a048d2ee 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -375,12 +375,10 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
 /*
  * read the attributes of an inode
  */
-int afs_getattr(struct vfsmount *mnt, struct dentry *dentry,
-		      struct kstat *stat)
+int afs_getattr(const struct path *path, struct kstat *stat,
+		u32 request_mask, unsigned int query_flags)
 {
-	struct inode *inode;
-
-	inode = d_inode(dentry);
+	struct inode *inode = d_inode(path->dentry);
 
 	_enter("{ ino=%lu v=%u }", inode->i_ino, inode->i_generation);
 
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 8acf3670e75649..5dfa56903a2d4b 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -533,7 +533,7 @@ extern struct inode *afs_iget(struct super_block *, struct key *,
 			      struct afs_callback *);
 extern void afs_zap_data(struct afs_vnode *);
 extern int afs_validate(struct afs_vnode *, struct key *);
-extern int afs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
+extern int afs_getattr(const struct path *, struct kstat *, u32, unsigned int);
 extern int afs_setattr(struct dentry *, struct iattr *);
 extern void afs_evict_inode(struct inode *);
 extern int afs_drop_inode(struct inode *);
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index 5f685c81929818..bb53728c7a31b0 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -89,8 +89,8 @@ static int bad_inode_permission(struct inode *inode, int mask)
 	return -EIO;
 }
 
-static int bad_inode_getattr(struct vfsmount *mnt, struct dentry *dentry,
-			struct kstat *stat)
+static int bad_inode_getattr(const struct path *path, struct kstat *stat,
+			     u32 request_mask, unsigned int query_flags)
 {
 	return -EIO;
 }
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index ee6978d804913b..c40060cc481f60 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -9413,11 +9413,11 @@ int btrfs_init_cachep(void)
 	return -ENOMEM;
 }
 
-static int btrfs_getattr(struct vfsmount *mnt,
-			 struct dentry *dentry, struct kstat *stat)
+static int btrfs_getattr(const struct path *path, struct kstat *stat,
+			 u32 request_mask, unsigned int flags)
 {
 	u64 delalloc_bytes;
-	struct inode *inode = d_inode(dentry);
+	struct inode *inode = d_inode(path->dentry);
 	u32 blocksize = inode->i_sb->s_blocksize;
 
 	generic_fillattr(inode, stat);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index fd8f771f99b7d7..d449e1c03cbd79 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -2187,10 +2187,10 @@ int ceph_permission(struct inode *inode, int mask)
  * Get all attributes.  Hopefully somedata we'll have a statlite()
  * and can limit the fields we require to be accurate.
  */
-int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry,
-		 struct kstat *stat)
+int ceph_getattr(const struct path *path, struct kstat *stat,
+		 u32 request_mask, unsigned int flags)
 {
-	struct inode *inode = d_inode(dentry);
+	struct inode *inode = d_inode(path->dentry);
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	int err;
 
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index e9410bcf41135b..fe6b9cfc4013e6 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -784,8 +784,8 @@ static inline int ceph_do_getattr(struct inode *inode, int mask, bool force)
 extern int ceph_permission(struct inode *inode, int mask);
 extern int __ceph_setattr(struct inode *inode, struct iattr *attr);
 extern int ceph_setattr(struct dentry *dentry, struct iattr *attr);
-extern int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry,
-			struct kstat *stat);
+extern int ceph_getattr(const struct path *path, struct kstat *stat,
+			u32 request_mask, unsigned int flags);
 
 /* xattr.c */
 int __ceph_setxattr(struct inode *, const char *, const void *, size_t, int);
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index c9c00a862036f6..da717fee30260b 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -83,7 +83,7 @@ extern int cifs_revalidate_dentry(struct dentry *);
 extern int cifs_invalidate_mapping(struct inode *inode);
 extern int cifs_revalidate_mapping(struct inode *inode);
 extern int cifs_zap_mapping(struct inode *inode);
-extern int cifs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
+extern int cifs_getattr(const struct path *, struct kstat *, u32, unsigned int);
 extern int cifs_setattr(struct dentry *, struct iattr *);
 
 extern const struct inode_operations cifs_file_inode_ops;
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 7ab5be7944aa84..1363fff460b937 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1990,9 +1990,10 @@ int cifs_revalidate_dentry(struct dentry *dentry)
 	return cifs_revalidate_mapping(inode);
 }
 
-int cifs_getattr(struct vfsmount *mnt, struct dentry *dentry,
-		 struct kstat *stat)
+int cifs_getattr(const struct path *path, struct kstat *stat,
+		 u32 request_mask, unsigned int flags)
 {
+	struct dentry *dentry = path->dentry;
 	struct cifs_sb_info *cifs_sb = CIFS_SB(dentry->d_sb);
 	struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
 	struct inode *inode = d_inode(dentry);
diff --git a/fs/coda/coda_linux.h b/fs/coda/coda_linux.h
index 5104d84c4f6425..d3c361883c2844 100644
--- a/fs/coda/coda_linux.h
+++ b/fs/coda/coda_linux.h
@@ -47,7 +47,7 @@ int coda_open(struct inode *i, struct file *f);
 int coda_release(struct inode *i, struct file *f);
 int coda_permission(struct inode *inode, int mask);
 int coda_revalidate_inode(struct inode *);
-int coda_getattr(struct vfsmount *, struct dentry *, struct kstat *);
+int coda_getattr(const struct path *, struct kstat *, u32, unsigned int);
 int coda_setattr(struct dentry *, struct iattr *);
 
 /* this file:  heloers */
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 71dbe7e287cef9..2dea594da19968 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -255,11 +255,12 @@ static void coda_evict_inode(struct inode *inode)
 	coda_cache_clear_inode(inode);
 }
 
-int coda_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
+int coda_getattr(const struct path *path, struct kstat *stat,
+		 u32 request_mask, unsigned int flags)
 {
-	int err = coda_revalidate_inode(d_inode(dentry));
+	int err = coda_revalidate_inode(d_inode(path->dentry));
 	if (!err)
-		generic_fillattr(d_inode(dentry), stat);
+		generic_fillattr(d_inode(path->dentry), stat);
 	return err;
 }
 
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index e7413f82d27bf3..efc2db42d17513 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -959,9 +959,10 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
 	return rc;
 }
 
-static int ecryptfs_getattr_link(struct vfsmount *mnt, struct dentry *dentry,
-				 struct kstat *stat)
+static int ecryptfs_getattr_link(const struct path *path, struct kstat *stat,
+				 u32 request_mask, unsigned int flags)
 {
+	struct dentry *dentry = path->dentry;
 	struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
 	int rc = 0;
 
@@ -983,13 +984,15 @@ static int ecryptfs_getattr_link(struct vfsmount *mnt, struct dentry *dentry,
 	return rc;
 }
 
-static int ecryptfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
-			    struct kstat *stat)
+static int ecryptfs_getattr(const struct path *path, struct kstat *stat,
+			    u32 request_mask, unsigned int flags)
 {
+	struct dentry *dentry = path->dentry;
 	struct kstat lower_stat;
 	int rc;
 
-	rc = vfs_getattr(ecryptfs_dentry_to_lower_path(dentry), &lower_stat);
+	rc = vfs_getattr(ecryptfs_dentry_to_lower_path(dentry), &lower_stat,
+			 request_mask, flags);
 	if (!rc) {
 		fsstack_copy_attr_all(d_inode(dentry),
 				      ecryptfs_inode_to_lower(d_inode(dentry)));
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index a4b531be9168d5..f2d24bb8d745e3 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -299,7 +299,8 @@ static int get_name(const struct path *path, char *name, struct dentry *child)
 	 * filesystem supports 64-bit inode numbers.  So we need to
 	 * actually call ->getattr, not just read i_ino:
 	 */
-	error = vfs_getattr_nosec(&child_path, &stat);
+	error = vfs_getattr_nosec(&child_path, &stat,
+				  STATX_INO, AT_STATX_SYNC_AS_STAT);
 	if (error)
 		return error;
 	buffer.ino = stat.ino;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 2fd17e8e498416..025d2e85f454d1 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2462,8 +2462,7 @@ extern struct inode *ext4_iget(struct super_block *, unsigned long);
 extern struct inode *ext4_iget_normal(struct super_block *, unsigned long);
 extern int  ext4_write_inode(struct inode *, struct writeback_control *);
 extern int  ext4_setattr(struct dentry *, struct iattr *);
-extern int  ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
-				struct kstat *stat);
+extern int  ext4_getattr(const struct path *, struct kstat *, u32, unsigned int);
 extern void ext4_evict_inode(struct inode *);
 extern void ext4_clear_inode(struct inode *);
 extern int  ext4_sync_inode(handle_t *, struct inode *);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 971f6634208032..7385e6a6b6cb54 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5387,13 +5387,13 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
 	return error;
 }
 
-int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
-		 struct kstat *stat)
+int ext4_getattr(const struct path *path, struct kstat *stat,
+		 u32 request_mask, unsigned int query_flags)
 {
 	struct inode *inode;
 	unsigned long long delalloc_blocks;
 
-	inode = d_inode(dentry);
+	inode = d_inode(path->dentry);
 	generic_fillattr(inode, stat);
 
 	/*
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index d1483136fed68e..e849f83d611407 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -2040,8 +2040,8 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
 void truncate_data_blocks(struct dnode_of_data *dn);
 int truncate_blocks(struct inode *inode, u64 from, bool lock);
 int f2fs_truncate(struct inode *inode);
-int f2fs_getattr(struct vfsmount *mnt, struct dentry *dentry,
-			struct kstat *stat);
+int f2fs_getattr(const struct path *path, struct kstat *stat,
+			u32 request_mask, unsigned int flags);
 int f2fs_setattr(struct dentry *dentry, struct iattr *attr);
 int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end);
 int truncate_data_blocks_range(struct dnode_of_data *dn, int count);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 78e65288f2b280..5f7317875a6726 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -633,10 +633,10 @@ int f2fs_truncate(struct inode *inode)
 	return 0;
 }
 
-int f2fs_getattr(struct vfsmount *mnt,
-			 struct dentry *dentry, struct kstat *stat)
+int f2fs_getattr(const struct path *path, struct kstat *stat,
+		 u32 request_mask, unsigned int flags)
 {
-	struct inode *inode = d_inode(dentry);
+	struct inode *inode = d_inode(path->dentry);
 	generic_fillattr(inode, stat);
 	stat->blocks <<= 3;
 	return 0;
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index e6b764a17a9c84..051dac1ce3be17 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -364,8 +364,8 @@ extern const struct file_operations fat_file_operations;
 extern const struct inode_operations fat_file_inode_operations;
 extern int fat_setattr(struct dentry *dentry, struct iattr *attr);
 extern void fat_truncate_blocks(struct inode *inode, loff_t offset);
-extern int fat_getattr(struct vfsmount *mnt, struct dentry *dentry,
-		       struct kstat *stat);
+extern int fat_getattr(const struct path *path, struct kstat *stat,
+		       u32 request_mask, unsigned int flags);
 extern int fat_file_fsync(struct file *file, loff_t start, loff_t end,
 			  int datasync);
 
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 3d04b124bce099..4724cc9ad65021 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -365,9 +365,10 @@ void fat_truncate_blocks(struct inode *inode, loff_t offset)
 	fat_flush_inodes(inode->i_sb, inode, NULL);
 }
 
-int fat_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
+int fat_getattr(const struct path *path, struct kstat *stat,
+		u32 request_mask, unsigned int flags)
 {
-	struct inode *inode = d_inode(dentry);
+	struct inode *inode = d_inode(path->dentry);
 	generic_fillattr(inode, stat);
 	stat->blksize = MSDOS_SB(inode->i_sb)->cluster_size;
 
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 811fd8929a18c1..beb3d64f16e291 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1777,10 +1777,10 @@ static int fuse_setattr(struct dentry *entry, struct iattr *attr)
 	return ret;
 }
 
-static int fuse_getattr(struct vfsmount *mnt, struct dentry *entry,
-			struct kstat *stat)
+static int fuse_getattr(const struct path *path, struct kstat *stat,
+			u32 request_mask, unsigned int flags)
 {
-	struct inode *inode = d_inode(entry);
+	struct inode *inode = d_inode(path->dentry);
 	struct fuse_conn *fc = get_fuse_conn(inode);
 
 	if (!fuse_allow_current_process(fc))
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index eb7724b8578a04..288c15f385bde5 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1959,9 +1959,10 @@ static int gfs2_setattr(struct dentry *dentry, struct iattr *attr)
 
 /**
  * gfs2_getattr - Read out an inode's attributes
- * @mnt: The vfsmount the inode is being accessed from
- * @dentry: The dentry to stat
+ * @path: Object to query
  * @stat: The inode's stats
+ * @request_mask: Mask of STATX_xxx flags indicating the caller's interests
+ * @flags: AT_STATX_xxx setting
  *
  * This may be called from the VFS directly, or from within GFS2 with the
  * inode locked, so we look to see if the glock is already locked and only
@@ -1972,10 +1973,10 @@ static int gfs2_setattr(struct dentry *dentry, struct iattr *attr)
  * Returns: errno
  */
 
-static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
-			struct kstat *stat)
+static int gfs2_getattr(const struct path *path, struct kstat *stat,
+			u32 request_mask, unsigned int flags)
 {
-	struct inode *inode = d_inode(dentry);
+	struct inode *inode = d_inode(path->dentry);
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_holder gh;
 	int error;
diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c
index ac9e108ce1eacb..fb4b4a79a0d6b4 100644
--- a/fs/kernfs/inode.c
+++ b/fs/kernfs/inode.c
@@ -200,11 +200,11 @@ static void kernfs_refresh_inode(struct kernfs_node *kn, struct inode *inode)
 		set_nlink(inode, kn->dir.subdirs + 2);
 }
 
-int kernfs_iop_getattr(struct vfsmount *mnt, struct dentry *dentry,
-		   struct kstat *stat)
+int kernfs_iop_getattr(const struct path *path, struct kstat *stat,
+		       u32 request_mask, unsigned int query_flags)
 {
-	struct kernfs_node *kn = dentry->d_fsdata;
-	struct inode *inode = d_inode(dentry);
+	struct kernfs_node *kn = path->dentry->d_fsdata;
+	struct inode *inode = d_inode(path->dentry);
 
 	mutex_lock(&kernfs_mutex);
 	kernfs_refresh_inode(kn, inode);
diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
index 3100987cf8baf7..2d5144ab425159 100644
--- a/fs/kernfs/kernfs-internal.h
+++ b/fs/kernfs/kernfs-internal.h
@@ -80,8 +80,8 @@ extern const struct xattr_handler *kernfs_xattr_handlers[];
 void kernfs_evict_inode(struct inode *inode);
 int kernfs_iop_permission(struct inode *inode, int mask);
 int kernfs_iop_setattr(struct dentry *dentry, struct iattr *iattr);
-int kernfs_iop_getattr(struct vfsmount *mnt, struct dentry *dentry,
-		       struct kstat *stat);
+int kernfs_iop_getattr(const struct path *path, struct kstat *stat,
+		       u32 request_mask, unsigned int query_flags);
 ssize_t kernfs_iop_listxattr(struct dentry *dentry, char *buf, size_t size);
 
 /*
diff --git a/fs/libfs.c b/fs/libfs.c
index 28d6f35feed628..1dfaf8f606c097 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -20,10 +20,10 @@
 
 #include "internal.h"
 
-int simple_getattr(struct vfsmount *mnt, struct dentry *dentry,
-		   struct kstat *stat)
+int simple_getattr(const struct path *path, struct kstat *stat,
+		   u32 request_mask, unsigned int query_flags)
 {
-	struct inode *inode = d_inode(dentry);
+	struct inode *inode = d_inode(path->dentry);
 	generic_fillattr(inode, stat);
 	stat->blocks = inode->i_mapping->nrpages << (PAGE_SHIFT - 9);
 	return 0;
@@ -1143,10 +1143,10 @@ static struct dentry *empty_dir_lookup(struct inode *dir, struct dentry *dentry,
 	return ERR_PTR(-ENOENT);
 }
 
-static int empty_dir_getattr(struct vfsmount *mnt, struct dentry *dentry,
-				 struct kstat *stat)
+static int empty_dir_getattr(const struct path *path, struct kstat *stat,
+			     u32 request_mask, unsigned int query_flags)
 {
-	struct inode *inode = d_inode(dentry);
+	struct inode *inode = d_inode(path->dentry);
 	generic_fillattr(inode, stat);
 	return 0;
 }
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index e7d9bf86d97595..6ac76b0434e937 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -622,11 +622,14 @@ static int minix_write_inode(struct inode *inode, struct writeback_control *wbc)
 	return err;
 }
 
-int minix_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
+int minix_getattr(const struct path *path, struct kstat *stat,
+		  u32 request_mask, unsigned int flags)
 {
-	struct super_block *sb = dentry->d_sb;
-	generic_fillattr(d_inode(dentry), stat);
-	if (INODE_VERSION(d_inode(dentry)) == MINIX_V1)
+	struct super_block *sb = path->dentry->d_sb;
+	struct inode *inode = d_inode(path->dentry);
+
+	generic_fillattr(inode, stat);
+	if (INODE_VERSION(inode) == MINIX_V1)
 		stat->blocks = (BLOCK_SIZE / 512) * V1_minix_blocks(stat->size, sb);
 	else
 		stat->blocks = (sb->s_blocksize / 512) * V2_minix_blocks(stat->size, sb);
diff --git a/fs/minix/minix.h b/fs/minix/minix.h
index 01ad81dcacc5a4..663d66138d06df 100644
--- a/fs/minix/minix.h
+++ b/fs/minix/minix.h
@@ -51,7 +51,7 @@ extern unsigned long minix_count_free_inodes(struct super_block *sb);
 extern int minix_new_block(struct inode * inode);
 extern void minix_free_block(struct inode *inode, unsigned long block);
 extern unsigned long minix_count_free_blocks(struct super_block *sb);
-extern int minix_getattr(struct vfsmount *, struct dentry *, struct kstat *);
+extern int minix_getattr(const struct path *, struct kstat *, u32, unsigned int);
 extern int minix_prepare_chunk(struct page *page, loff_t pos, unsigned len);
 
 extern void V1_minix_truncate(struct inode *);
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 5ca4d96b194218..b5425315adccab 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -703,9 +703,10 @@ static bool nfs_need_revalidate_inode(struct inode *inode)
 	return false;
 }
 
-int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
+int nfs_getattr(const struct path *path, struct kstat *stat,
+		u32 request_mask, unsigned int query_flags)
 {
-	struct inode *inode = d_inode(dentry);
+	struct inode *inode = d_inode(path->dentry);
 	int need_atime = NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATIME;
 	int err = 0;
 
@@ -726,17 +727,17 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
 	 *  - NFS never sets MS_NOATIME or MS_NODIRATIME so there is
 	 *    no point in checking those.
 	 */
- 	if ((mnt->mnt_flags & MNT_NOATIME) ||
- 	    ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)))
+	if ((path->mnt->mnt_flags & MNT_NOATIME) ||
+	    ((path->mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)))
 		need_atime = 0;
 
 	if (need_atime || nfs_need_revalidate_inode(inode)) {
 		struct nfs_server *server = NFS_SERVER(inode);
 
-		nfs_readdirplus_parent_cache_miss(dentry);
+		nfs_readdirplus_parent_cache_miss(path->dentry);
 		err = __nfs_revalidate_inode(server, inode);
 	} else
-		nfs_readdirplus_parent_cache_hit(dentry);
+		nfs_readdirplus_parent_cache_hit(path->dentry);
 	if (!err) {
 		generic_fillattr(inode, stat);
 		stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode));
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index e49d831c4e8531..786f175805827d 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -178,11 +178,12 @@ struct vfsmount *nfs_d_automount(struct path *path)
 }
 
 static int
-nfs_namespace_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
+nfs_namespace_getattr(const struct path *path, struct kstat *stat,
+			u32 request_mask, unsigned int query_flags)
 {
-	if (NFS_FH(d_inode(dentry))->size != 0)
-		return nfs_getattr(mnt, dentry, stat);
-	generic_fillattr(d_inode(dentry), stat);
+	if (NFS_FH(d_inode(path->dentry))->size != 0)
+		return nfs_getattr(path, stat, request_mask, query_flags);
+	generic_fillattr(d_inode(path->dentry), stat);
 	return 0;
 }
 
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 382c1fd05b4c8d..33017d652b1da2 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2301,7 +2301,7 @@ static int get_parent_attributes(struct svc_export *exp, struct kstat *stat)
 		if (path.dentry != path.mnt->mnt_root)
 			break;
 	}
-	err = vfs_getattr(&path, stat);
+	err = vfs_getattr(&path, stat, STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
 	path_put(&path);
 	return err;
 }
@@ -2385,7 +2385,7 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
 			goto out;
 	}
 
-	err = vfs_getattr(&path, &stat);
+	err = vfs_getattr(&path, &stat, STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
 	if (err)
 		goto out_nfserr;
 	if ((bmval0 & (FATTR4_WORD0_FILES_AVAIL | FATTR4_WORD0_FILES_FREE |
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index db98c48c735aaa..1bbdccecbf3df8 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -135,7 +135,8 @@ static inline __be32 fh_getattr(struct svc_fh *fh, struct kstat *stat)
 {
 	struct path p = {.mnt = fh->fh_export->ex_path.mnt,
 			 .dentry = fh->fh_dentry};
-	return nfserrno(vfs_getattr(&p, stat));
+	return nfserrno(vfs_getattr(&p, stat, STATX_BASIC_STATS,
+				    AT_STATX_SYNC_AS_STAT));
 }
 
 static inline int nfsd_create_is_exclusive(int createmode)
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 8836305eb37865..bfeb647459d95e 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1306,16 +1306,15 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
 	return status;
 }
 
-int ocfs2_getattr(struct vfsmount *mnt,
-		  struct dentry *dentry,
-		  struct kstat *stat)
+int ocfs2_getattr(const struct path *path, struct kstat *stat,
+		  u32 request_mask, unsigned int flags)
 {
-	struct inode *inode = d_inode(dentry);
-	struct super_block *sb = dentry->d_sb;
+	struct inode *inode = d_inode(path->dentry);
+	struct super_block *sb = path->dentry->d_sb;
 	struct ocfs2_super *osb = sb->s_fs_info;
 	int err;
 
-	err = ocfs2_inode_revalidate(dentry);
+	err = ocfs2_inode_revalidate(path->dentry);
 	if (err) {
 		if (err != -ENOENT)
 			mlog_errno(err);
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h
index 897fd9a2e51dbe..1fdc9839cd931d 100644
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -68,8 +68,8 @@ int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh,
 int ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
 		u32 clusters_to_add, int mark_unwritten);
 int ocfs2_setattr(struct dentry *dentry, struct iattr *attr);
-int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
-		  struct kstat *stat);
+int ocfs2_getattr(const struct path *path, struct kstat *stat,
+		  u32 request_mask, unsigned int flags);
 int ocfs2_permission(struct inode *inode, int mask);
 
 int ocfs2_should_update_atime(struct inode *inode,
diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c
index 5cd617980fbfa2..a304bf34b2127d 100644
--- a/fs/orangefs/inode.c
+++ b/fs/orangefs/inode.c
@@ -245,25 +245,24 @@ int orangefs_setattr(struct dentry *dentry, struct iattr *iattr)
 /*
  * Obtain attributes of an object given a dentry
  */
-int orangefs_getattr(struct vfsmount *mnt,
-		  struct dentry *dentry,
-		  struct kstat *kstat)
+int orangefs_getattr(const struct path *path, struct kstat *stat,
+		     u32 request_mask, unsigned int flags)
 {
 	int ret = -ENOENT;
-	struct inode *inode = dentry->d_inode;
+	struct inode *inode = path->dentry->d_inode;
 	struct orangefs_inode_s *orangefs_inode = NULL;
 
 	gossip_debug(GOSSIP_INODE_DEBUG,
 		     "orangefs_getattr: called on %pd\n",
-		     dentry);
+		     path->dentry);
 
 	ret = orangefs_inode_getattr(inode, 0, 0);
 	if (ret == 0) {
-		generic_fillattr(inode, kstat);
+		generic_fillattr(inode, stat);
 
 		/* override block size reported to stat */
 		orangefs_inode = ORANGEFS_I(inode);
-		kstat->blksize = orangefs_inode->blksize;
+		stat->blksize = orangefs_inode->blksize;
 	}
 	return ret;
 }
diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h
index 70355a9a25969b..0c4f03c22ce02c 100644
--- a/fs/orangefs/orangefs-kernel.h
+++ b/fs/orangefs/orangefs-kernel.h
@@ -439,9 +439,8 @@ struct inode *orangefs_new_inode(struct super_block *sb,
 
 int orangefs_setattr(struct dentry *dentry, struct iattr *iattr);
 
-int orangefs_getattr(struct vfsmount *mnt,
-		  struct dentry *dentry,
-		  struct kstat *kstat);
+int orangefs_getattr(const struct path *path, struct kstat *stat,
+		     u32 request_mask, unsigned int flags);
 
 int orangefs_permission(struct inode *inode, int mask);
 
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index f57043dace6287..a6f9ca621e0b5a 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -346,7 +346,8 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
 	ovl_path_upper(parent, &parentpath);
 	upperdir = parentpath.dentry;
 
-	err = vfs_getattr(&parentpath, &pstat);
+	err = vfs_getattr(&parentpath, &pstat,
+			  STATX_ATIME | STATX_MTIME, AT_STATX_SYNC_AS_STAT);
 	if (err)
 		return err;
 
@@ -409,7 +410,8 @@ int ovl_copy_up_flags(struct dentry *dentry, int flags)
 		}
 
 		ovl_path_lower(next, &lowerpath);
-		err = vfs_getattr(&lowerpath, &stat);
+		err = vfs_getattr(&lowerpath, &stat,
+				  STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
 		/* maybe truncate regular file. this has no effect on dirs */
 		if (flags & O_TRUNC)
 			stat.size = 0;
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index 16e06dd8945759..6515796460dfe1 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -138,9 +138,10 @@ static int ovl_set_opaque(struct dentry *dentry, struct dentry *upperdentry)
 	return err;
 }
 
-static int ovl_dir_getattr(struct vfsmount *mnt, struct dentry *dentry,
-			 struct kstat *stat)
+static int ovl_dir_getattr(const struct path *path, struct kstat *stat,
+			   u32 request_mask, unsigned int flags)
 {
+	struct dentry *dentry = path->dentry;
 	int err;
 	enum ovl_path_type type;
 	struct path realpath;
@@ -148,7 +149,7 @@ static int ovl_dir_getattr(struct vfsmount *mnt, struct dentry *dentry,
 
 	type = ovl_path_real(dentry, &realpath);
 	old_cred = ovl_override_creds(dentry->d_sb);
-	err = vfs_getattr(&realpath, stat);
+	err = vfs_getattr(&realpath, stat, request_mask, flags);
 	revert_creds(old_cred);
 	if (err)
 		return err;
@@ -264,7 +265,8 @@ static struct dentry *ovl_clear_empty(struct dentry *dentry,
 		goto out;
 
 	ovl_path_upper(dentry, &upperpath);
-	err = vfs_getattr(&upperpath, &stat);
+	err = vfs_getattr(&upperpath, &stat,
+			  STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
 	if (err)
 		goto out_unlock;
 
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 08643ac44a0278..d4bb54f7b6b42a 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -56,16 +56,17 @@ int ovl_setattr(struct dentry *dentry, struct iattr *attr)
 	return err;
 }
 
-static int ovl_getattr(struct vfsmount *mnt, struct dentry *dentry,
-			 struct kstat *stat)
+static int ovl_getattr(const struct path *path, struct kstat *stat,
+		       u32 request_mask, unsigned int flags)
 {
+	struct dentry *dentry = path->dentry;
 	struct path realpath;
 	const struct cred *old_cred;
 	int err;
 
 	ovl_path_real(dentry, &realpath);
 	old_cred = ovl_override_creds(dentry->d_sb);
-	err = vfs_getattr(&realpath, stat);
+	err = vfs_getattr(&realpath, stat, request_mask, flags);
 	revert_creds(old_cred);
 	return err;
 }
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 1e1e182d571b4a..3b5e6aa2a326f6 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1724,11 +1724,12 @@ struct inode *proc_pid_make_inode(struct super_block * sb,
 	return NULL;
 }
 
-int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
+int pid_getattr(const struct path *path, struct kstat *stat,
+		u32 request_mask, unsigned int query_flags)
 {
-	struct inode *inode = d_inode(dentry);
+	struct inode *inode = d_inode(path->dentry);
 	struct task_struct *task;
-	struct pid_namespace *pid = dentry->d_sb->s_fs_info;
+	struct pid_namespace *pid = path->dentry->d_sb->s_fs_info;
 
 	generic_fillattr(inode, stat);
 
@@ -3511,9 +3512,10 @@ static int proc_task_readdir(struct file *file, struct dir_context *ctx)
 	return 0;
 }
 
-static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
+static int proc_task_getattr(const struct path *path, struct kstat *stat,
+			     u32 request_mask, unsigned int query_flags)
 {
-	struct inode *inode = d_inode(dentry);
+	struct inode *inode = d_inode(path->dentry);
 	struct task_struct *p = get_proc_task(inode);
 	generic_fillattr(inode, stat);
 
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 06c73904d497ad..ee27feb34cf4d5 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -118,10 +118,10 @@ static int proc_notify_change(struct dentry *dentry, struct iattr *iattr)
 	return 0;
 }
 
-static int proc_getattr(struct vfsmount *mnt, struct dentry *dentry,
-			struct kstat *stat)
+static int proc_getattr(const struct path *path, struct kstat *stat,
+			u32 request_mask, unsigned int query_flags)
 {
-	struct inode *inode = d_inode(dentry);
+	struct inode *inode = d_inode(path->dentry);
 	struct proc_dir_entry *de = PDE(inode);
 	if (de && de->nlink)
 		set_nlink(inode, de->nlink);
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 5d6960f5f1c03c..e93cdc6ddb3174 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -149,7 +149,7 @@ extern int proc_pid_statm(struct seq_file *, struct pid_namespace *,
  * base.c
  */
 extern const struct dentry_operations pid_dentry_operations;
-extern int pid_getattr(struct vfsmount *, struct dentry *, struct kstat *);
+extern int pid_getattr(const struct path *, struct kstat *, u32, unsigned int);
 extern int proc_setattr(struct dentry *, struct iattr *);
 extern struct inode *proc_pid_make_inode(struct super_block *, struct task_struct *, umode_t);
 extern int pid_revalidate(struct dentry *, unsigned int);
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index ffd72a6c6e0446..9db1df2537fc9c 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -140,10 +140,10 @@ static struct dentry *proc_tgid_net_lookup(struct inode *dir,
 	return de;
 }
 
-static int proc_tgid_net_getattr(struct vfsmount *mnt, struct dentry *dentry,
-		struct kstat *stat)
+static int proc_tgid_net_getattr(const struct path *path, struct kstat *stat,
+				 u32 request_mask, unsigned int query_flags)
 {
-	struct inode *inode = d_inode(dentry);
+	struct inode *inode = d_inode(path->dentry);
 	struct net *net;
 
 	net = get_proc_task_net(inode);
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 3e64c6502dc854..3d8726445ad1eb 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -801,9 +801,10 @@ static int proc_sys_setattr(struct dentry *dentry, struct iattr *attr)
 	return 0;
 }
 
-static int proc_sys_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
+static int proc_sys_getattr(const struct path *path, struct kstat *stat,
+			    u32 request_mask, unsigned int query_flags)
 {
-	struct inode *inode = d_inode(dentry);
+	struct inode *inode = d_inode(path->dentry);
 	struct ctl_table_header *head = grab_header(inode);
 	struct ctl_table *table = PROC_I(inode)->sysctl_entry;
 
diff --git a/fs/proc/root.c b/fs/proc/root.c
index b90da888b81a3a..fb1955c8227464 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -149,10 +149,10 @@ void __init proc_root_init(void)
 	proc_sys_init();
 }
 
-static int proc_root_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat
-)
+static int proc_root_getattr(const struct path *path, struct kstat *stat,
+			     u32 request_mask, unsigned int query_flags)
 {
-	generic_fillattr(d_inode(dentry), stat);
+	generic_fillattr(d_inode(path->dentry), stat);
 	stat->nlink = proc_root.nlink + nr_processes();
 	return 0;
 }
diff --git a/fs/stat.c b/fs/stat.c
index 3f14d1ef086805..a3804feadadedf 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -18,6 +18,15 @@
 #include <linux/uaccess.h>
 #include <asm/unistd.h>
 
+/**
+ * generic_fillattr - Fill in the basic attributes from the inode struct
+ * @inode: Inode to use as the source
+ * @stat: Where to fill in the attributes
+ *
+ * Fill in the basic attributes in the kstat structure from data that's to be
+ * found on the VFS inode structure.  This is the default if no getattr inode
+ * operation is supplied.
+ */
 void generic_fillattr(struct inode *inode, struct kstat *stat)
 {
 	stat->dev = inode->i_sb->s_dev;
@@ -33,81 +42,147 @@ void generic_fillattr(struct inode *inode, struct kstat *stat)
 	stat->ctime = inode->i_ctime;
 	stat->blksize = i_blocksize(inode);
 	stat->blocks = inode->i_blocks;
-}
 
+	if (IS_NOATIME(inode))
+		stat->result_mask &= ~STATX_ATIME;
+	if (IS_AUTOMOUNT(inode))
+		stat->attributes |= STATX_ATTR_AUTOMOUNT;
+}
 EXPORT_SYMBOL(generic_fillattr);
 
 /**
  * vfs_getattr_nosec - getattr without security checks
  * @path: file to get attributes from
  * @stat: structure to return attributes in
+ * @request_mask: STATX_xxx flags indicating what the caller wants
+ * @query_flags: Query mode (KSTAT_QUERY_FLAGS)
  *
  * Get attributes without calling security_inode_getattr.
  *
  * Currently the only caller other than vfs_getattr is internal to the
- * filehandle lookup code, which uses only the inode number and returns
- * no attributes to any user.  Any other code probably wants
- * vfs_getattr.
+ * filehandle lookup code, which uses only the inode number and returns no
+ * attributes to any user.  Any other code probably wants vfs_getattr.
  */
-int vfs_getattr_nosec(struct path *path, struct kstat *stat)
+int vfs_getattr_nosec(const struct path *path, struct kstat *stat,
+		      u32 request_mask, unsigned int query_flags)
 {
 	struct inode *inode = d_backing_inode(path->dentry);
 
+	memset(stat, 0, sizeof(*stat));
+	stat->result_mask |= STATX_BASIC_STATS;
+	request_mask &= STATX_ALL;
+	query_flags &= KSTAT_QUERY_FLAGS;
 	if (inode->i_op->getattr)
-		return inode->i_op->getattr(path->mnt, path->dentry, stat);
+		return inode->i_op->getattr(path, stat, request_mask,
+					    query_flags);
 
 	generic_fillattr(inode, stat);
 	return 0;
 }
-
 EXPORT_SYMBOL(vfs_getattr_nosec);
 
-int vfs_getattr(struct path *path, struct kstat *stat)
+/*
+ * vfs_getattr - Get the enhanced basic attributes of a file
+ * @path: The file of interest
+ * @stat: Where to return the statistics
+ * @request_mask: STATX_xxx flags indicating what the caller wants
+ * @query_flags: Query mode (KSTAT_QUERY_FLAGS)
+ *
+ * Ask the filesystem for a file's attributes.  The caller must indicate in
+ * request_mask and query_flags to indicate what they want.
+ *
+ * If the file is remote, the filesystem can be forced to update the attributes
+ * from the backing store by passing AT_STATX_FORCE_SYNC in query_flags or can
+ * suppress the update by passing AT_STATX_DONT_SYNC.
+ *
+ * Bits must have been set in request_mask to indicate which attributes the
+ * caller wants retrieving.  Any such attribute not requested may be returned
+ * anyway, but the value may be approximate, and, if remote, may not have been
+ * synchronised with the server.
+ *
+ * 0 will be returned on success, and a -ve error code if unsuccessful.
+ */
+int vfs_getattr(const struct path *path, struct kstat *stat,
+		u32 request_mask, unsigned int query_flags)
 {
 	int retval;
 
 	retval = security_inode_getattr(path);
 	if (retval)
 		return retval;
-	return vfs_getattr_nosec(path, stat);
+	return vfs_getattr_nosec(path, stat, request_mask, query_flags);
 }
-
 EXPORT_SYMBOL(vfs_getattr);
 
-int vfs_fstat(unsigned int fd, struct kstat *stat)
+/**
+ * vfs_statx_fd - Get the enhanced basic attributes by file descriptor
+ * @fd: The file descriptor referring to the file of interest
+ * @stat: The result structure to fill in.
+ * @request_mask: STATX_xxx flags indicating what the caller wants
+ * @query_flags: Query mode (KSTAT_QUERY_FLAGS)
+ *
+ * This function is a wrapper around vfs_getattr().  The main difference is
+ * that it uses a file descriptor to determine the file location.
+ *
+ * 0 will be returned on success, and a -ve error code if unsuccessful.
+ */
+int vfs_statx_fd(unsigned int fd, struct kstat *stat,
+		 u32 request_mask, unsigned int query_flags)
 {
 	struct fd f = fdget_raw(fd);
 	int error = -EBADF;
 
 	if (f.file) {
-		error = vfs_getattr(&f.file->f_path, stat);
+		error = vfs_getattr(&f.file->f_path, stat,
+				    request_mask, query_flags);
 		fdput(f);
 	}
 	return error;
 }
-EXPORT_SYMBOL(vfs_fstat);
+EXPORT_SYMBOL(vfs_statx_fd);
 
-int vfs_fstatat(int dfd, const char __user *filename, struct kstat *stat,
-		int flag)
+/**
+ * vfs_statx - Get basic and extra attributes by filename
+ * @dfd: A file descriptor representing the base dir for a relative filename
+ * @filename: The name of the file of interest
+ * @flags: Flags to control the query
+ * @stat: The result structure to fill in.
+ * @request_mask: STATX_xxx flags indicating what the caller wants
+ *
+ * This function is a wrapper around vfs_getattr().  The main difference is
+ * that it uses a filename and base directory to determine the file location.
+ * Additionally, the use of AT_SYMLINK_NOFOLLOW in flags will prevent a symlink
+ * at the given name from being referenced.
+ *
+ * The caller must have preset stat->request_mask as for vfs_getattr().  The
+ * flags are also used to load up stat->query_flags.
+ *
+ * 0 will be returned on success, and a -ve error code if unsuccessful.
+ */
+int vfs_statx(int dfd, const char __user *filename, int flags,
+	      struct kstat *stat, u32 request_mask)
 {
 	struct path path;
 	int error = -EINVAL;
-	unsigned int lookup_flags = 0;
+	unsigned int lookup_flags = LOOKUP_FOLLOW | LOOKUP_AUTOMOUNT;
 
-	if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT |
-		      AT_EMPTY_PATH)) != 0)
-		goto out;
+	if ((flags & ~(AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT |
+		       AT_EMPTY_PATH | KSTAT_QUERY_FLAGS)) != 0)
+		return -EINVAL;
 
-	if (!(flag & AT_SYMLINK_NOFOLLOW))
-		lookup_flags |= LOOKUP_FOLLOW;
-	if (flag & AT_EMPTY_PATH)
+	if (flags & AT_SYMLINK_NOFOLLOW)
+		lookup_flags &= ~LOOKUP_FOLLOW;
+	if (flags & AT_NO_AUTOMOUNT)
+		lookup_flags &= ~LOOKUP_AUTOMOUNT;
+	if (flags & AT_EMPTY_PATH)
 		lookup_flags |= LOOKUP_EMPTY;
+
 retry:
 	error = user_path_at(dfd, filename, lookup_flags, &path);
 	if (error)
 		goto out;
 
-	error = vfs_getattr(&path, stat);
+	error = vfs_getattr(&path, stat, request_mask, flags);
 	path_put(&path);
 	if (retry_estale(error, lookup_flags)) {
 		lookup_flags |= LOOKUP_REVAL;
@@ -116,19 +191,7 @@ int vfs_fstatat(int dfd, const char __user *filename, struct kstat *stat,
 out:
 	return error;
 }
-EXPORT_SYMBOL(vfs_fstatat);
-
-int vfs_stat(const char __user *name, struct kstat *stat)
-{
-	return vfs_fstatat(AT_FDCWD, name, stat, 0);
-}
-EXPORT_SYMBOL(vfs_stat);
-
-int vfs_lstat(const char __user *name, struct kstat *stat)
-{
-	return vfs_fstatat(AT_FDCWD, name, stat, AT_SYMLINK_NOFOLLOW);
-}
-EXPORT_SYMBOL(vfs_lstat);
+EXPORT_SYMBOL(vfs_statx);
 
 
 #ifdef __ARCH_WANT_OLD_STAT
@@ -141,7 +204,7 @@ static int cp_old_stat(struct kstat *stat, struct __old_kernel_stat __user * sta
 {
 	static int warncount = 5;
 	struct __old_kernel_stat tmp;
-	
+
 	if (warncount > 0) {
 		warncount--;
 		printk(KERN_WARNING "VFS: Warning: %s using old stat() call. Recompile your binary.\n",
@@ -166,7 +229,7 @@ static int cp_old_stat(struct kstat *stat, struct __old_kernel_stat __user * sta
 #if BITS_PER_LONG == 32
 	if (stat->size > MAX_NON_LFS)
 		return -EOVERFLOW;
-#endif	
+#endif
 	tmp.st_size = stat->size;
 	tmp.st_atime = stat->atime.tv_sec;
 	tmp.st_mtime = stat->mtime.tv_sec;
@@ -445,6 +508,81 @@ SYSCALL_DEFINE4(fstatat64, int, dfd, const char __user *, filename,
 }
 #endif /* __ARCH_WANT_STAT64 || __ARCH_WANT_COMPAT_STAT64 */
 
+static inline int __put_timestamp(struct timespec *kts,
+				  struct statx_timestamp __user *uts)
+{
+	return (__put_user(kts->tv_sec,		&uts->tv_sec		) ||
+		__put_user(kts->tv_nsec,	&uts->tv_nsec		) ||
+		__put_user(0,			&uts->__reserved	));
+}
+
+/*
+ * Set the statx results.
+ */
+static long statx_set_result(struct kstat *stat, struct statx __user *buffer)
+{
+	uid_t uid = from_kuid_munged(current_user_ns(), stat->uid);
+	gid_t gid = from_kgid_munged(current_user_ns(), stat->gid);
+
+	if (__put_user(stat->result_mask,	&buffer->stx_mask	) ||
+	    __put_user(stat->mode,		&buffer->stx_mode	) ||
+	    __clear_user(&buffer->__spare0, sizeof(buffer->__spare0))	  ||
+	    __put_user(stat->nlink,		&buffer->stx_nlink	) ||
+	    __put_user(uid,			&buffer->stx_uid	) ||
+	    __put_user(gid,			&buffer->stx_gid	) ||
+	    __put_user(stat->attributes,	&buffer->stx_attributes	) ||
+	    __put_user(stat->blksize,		&buffer->stx_blksize	) ||
+	    __put_user(MAJOR(stat->rdev),	&buffer->stx_rdev_major	) ||
+	    __put_user(MINOR(stat->rdev),	&buffer->stx_rdev_minor	) ||
+	    __put_user(MAJOR(stat->dev),	&buffer->stx_dev_major	) ||
+	    __put_user(MINOR(stat->dev),	&buffer->stx_dev_minor	) ||
+	    __put_timestamp(&stat->atime,	&buffer->stx_atime	) ||
+	    __put_timestamp(&stat->btime,	&buffer->stx_btime	) ||
+	    __put_timestamp(&stat->ctime,	&buffer->stx_ctime	) ||
+	    __put_timestamp(&stat->mtime,	&buffer->stx_mtime	) ||
+	    __put_user(stat->ino,		&buffer->stx_ino	) ||
+	    __put_user(stat->size,		&buffer->stx_size	) ||
+	    __put_user(stat->blocks,		&buffer->stx_blocks	) ||
+	    __clear_user(&buffer->__spare1, sizeof(buffer->__spare1))	  ||
+	    __clear_user(&buffer->__spare2, sizeof(buffer->__spare2)))
+		return -EFAULT;
+
+	return 0;
+}
+
+/**
+ * sys_statx - System call to get enhanced stats
+ * @dfd: Base directory to pathwalk from *or* fd to stat.
+ * @filename: File to stat *or* NULL.
+ * @flags: AT_* flags to control pathwalk.
+ * @mask: Parts of statx struct actually required.
+ * @buffer: Result buffer.
+ *
+ * Note that if filename is NULL, then it does the equivalent of fstat() using
+ * dfd to indicate the file of interest.
+ */
+SYSCALL_DEFINE5(statx,
+		int, dfd, const char __user *, filename, unsigned, flags,
+		unsigned int, mask,
+		struct statx __user *, buffer)
+{
+	struct kstat stat;
+	int error;
+
+	if ((flags & AT_STATX_SYNC_TYPE) == AT_STATX_SYNC_TYPE)
+		return -EINVAL;
+	if (!access_ok(VERIFY_WRITE, buffer, sizeof(*buffer)))
+		return -EFAULT;
+
+	if (filename)
+		error = vfs_statx(dfd, filename, flags, &stat, mask);
+	else
+		error = vfs_statx_fd(dfd, &stat, mask, flags);
+	if (error)
+		return error;
+	return statx_set_result(&stat, buffer);
+}
+
 /* Caller is here responsible for sufficient locking (ie. inode->i_lock) */
 void __inode_add_bytes(struct inode *inode, loff_t bytes)
 {
diff --git a/fs/sysv/itree.c b/fs/sysv/itree.c
index 08d3e630b49c86..83809f5b5eca25 100644
--- a/fs/sysv/itree.c
+++ b/fs/sysv/itree.c
@@ -440,10 +440,11 @@ static unsigned sysv_nblocks(struct super_block *s, loff_t size)
 	return blocks;
 }
 
-int sysv_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
+int sysv_getattr(const struct path *path, struct kstat *stat,
+		 u32 request_mask, unsigned int flags)
 {
-	struct super_block *s = dentry->d_sb;
-	generic_fillattr(d_inode(dentry), stat);
+	struct super_block *s = path->dentry->d_sb;
+	generic_fillattr(d_inode(path->dentry), stat);
 	stat->blocks = (s->s_blocksize / 512) * sysv_nblocks(s, stat->size);
 	stat->blksize = s->s_blocksize;
 	return 0;
diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h
index 6c212288adcb09..1e7e27c729affb 100644
--- a/fs/sysv/sysv.h
+++ b/fs/sysv/sysv.h
@@ -142,7 +142,7 @@ extern struct inode *sysv_iget(struct super_block *, unsigned int);
 extern int sysv_write_inode(struct inode *, struct writeback_control *wbc);
 extern int sysv_sync_inode(struct inode *);
 extern void sysv_set_inode(struct inode *, dev_t);
-extern int sysv_getattr(struct vfsmount *, struct dentry *, struct kstat *);
+extern int sysv_getattr(const struct path *, struct kstat *, u32, unsigned int);
 extern int sysv_init_icache(void);
 extern void sysv_destroy_icache(void);
 
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 528369f3e47208..30825d882aa94a 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -1622,11 +1622,11 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	return do_rename(old_dir, old_dentry, new_dir, new_dentry, flags);
 }
 
-int ubifs_getattr(struct vfsmount *mnt, struct dentry *dentry,
-		  struct kstat *stat)
+int ubifs_getattr(const struct path *path, struct kstat *stat,
+		  u32 request_mask, unsigned int flags)
 {
 	loff_t size;
-	struct inode *inode = d_inode(dentry);
+	struct inode *inode = d_inode(path->dentry);
 	struct ubifs_inode *ui = ubifs_inode(inode);
 
 	mutex_lock(&ui->ui_mutex);
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index f0c86f076535a6..4d57e488038e34 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -1749,8 +1749,8 @@ int ubifs_update_time(struct inode *inode, struct timespec *time, int flags);
 /* dir.c */
 struct inode *ubifs_new_inode(struct ubifs_info *c, struct inode *dir,
 			      umode_t mode);
-int ubifs_getattr(struct vfsmount *mnt, struct dentry *dentry,
-		  struct kstat *stat);
+int ubifs_getattr(const struct path *path, struct kstat *stat,
+		  u32 request_mask, unsigned int flags);
 int ubifs_check_dir_empty(struct inode *dir);
 
 /* xattr.c */
diff --git a/fs/udf/symlink.c b/fs/udf/symlink.c
index f7dfef53f73968..6023c97c6da2f2 100644
--- a/fs/udf/symlink.c
+++ b/fs/udf/symlink.c
@@ -152,9 +152,10 @@ static int udf_symlink_filler(struct file *file, struct page *page)
 	return err;
 }
 
-static int udf_symlink_getattr(struct vfsmount *mnt, struct dentry *dentry,
-			       struct kstat *stat)
+static int udf_symlink_getattr(const struct path *path, struct kstat *stat,
+				u32 request_mask, unsigned int flags)
 {
+	struct dentry *dentry = path->dentry;
 	struct inode *inode = d_backing_inode(dentry);
 	struct page *page;
 
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 22c16155f1b42a..229cc6a6d8ef03 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -489,11 +489,12 @@ xfs_vn_get_link_inline(
 
 STATIC int
 xfs_vn_getattr(
-	struct vfsmount		*mnt,
-	struct dentry		*dentry,
-	struct kstat		*stat)
+	const struct path	*path,
+	struct kstat		*stat,
+	u32			request_mask,
+	unsigned int		query_flags)
 {
-	struct inode		*inode = d_inode(dentry);
+	struct inode		*inode = d_inode(path->dentry);
 	struct xfs_inode	*ip = XFS_I(inode);
 	struct xfs_mount	*mp = ip->i_mount;
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 52350947c670c9..aad3fd0ff5f831 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1709,7 +1709,7 @@ struct inode_operations {
 	int (*rename) (struct inode *, struct dentry *,
 			struct inode *, struct dentry *, unsigned int);
 	int (*setattr) (struct dentry *, struct iattr *);
-	int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *);
+	int (*getattr) (const struct path *, struct kstat *, u32, unsigned int);
 	ssize_t (*listxattr) (struct dentry *, char *, size_t);
 	int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
 		      u64 len);
@@ -2902,8 +2902,8 @@ extern int page_symlink(struct inode *inode, const char *symname, int len);
 extern const struct inode_operations page_symlink_inode_operations;
 extern void kfree_link(void *);
 extern void generic_fillattr(struct inode *, struct kstat *);
-int vfs_getattr_nosec(struct path *path, struct kstat *stat);
-extern int vfs_getattr(struct path *, struct kstat *);
+extern int vfs_getattr_nosec(const struct path *, struct kstat *, u32, unsigned int);
+extern int vfs_getattr(const struct path *, struct kstat *, u32, unsigned int);
 void __inode_add_bytes(struct inode *inode, loff_t bytes);
 void inode_add_bytes(struct inode *inode, loff_t bytes);
 void __inode_sub_bytes(struct inode *inode, loff_t bytes);
@@ -2916,10 +2916,29 @@ extern const struct inode_operations simple_symlink_inode_operations;
 
 extern int iterate_dir(struct file *, struct dir_context *);
 
-extern int vfs_stat(const char __user *, struct kstat *);
-extern int vfs_lstat(const char __user *, struct kstat *);
-extern int vfs_fstat(unsigned int, struct kstat *);
-extern int vfs_fstatat(int , const char __user *, struct kstat *, int);
+extern int vfs_statx(int, const char __user *, int, struct kstat *, u32);
+extern int vfs_statx_fd(unsigned int, struct kstat *, u32, unsigned int);
+
+static inline int vfs_stat(const char __user *filename, struct kstat *stat)
+{
+	return vfs_statx(AT_FDCWD, filename, 0, stat, STATX_BASIC_STATS);
+}
+static inline int vfs_lstat(const char __user *name, struct kstat *stat)
+{
+	return vfs_statx(AT_FDCWD, name, AT_SYMLINK_NOFOLLOW,
+			 stat, STATX_BASIC_STATS);
+}
+static inline int vfs_fstatat(int dfd, const char __user *filename,
+			      struct kstat *stat, int flags)
+{
+	return vfs_statx(dfd, filename, flags, stat, STATX_BASIC_STATS);
+}
+static inline int vfs_fstat(int fd, struct kstat *stat)
+{
+	return vfs_statx_fd(fd, stat, STATX_BASIC_STATS, 0);
+}
+
+
 extern const char *vfs_get_link(struct dentry *, struct delayed_call *);
 extern int vfs_readlink(struct dentry *, char __user *, int);
 
@@ -2949,7 +2968,7 @@ extern int dcache_dir_close(struct inode *, struct file *);
 extern loff_t dcache_dir_lseek(struct file *, loff_t, int);
 extern int dcache_readdir(struct file *, struct dir_context *);
 extern int simple_setattr(struct dentry *, struct iattr *);
-extern int simple_getattr(struct vfsmount *, struct dentry *, struct kstat *);
+extern int simple_getattr(const struct path *, struct kstat *, u32, unsigned int);
 extern int simple_statfs(struct dentry *, struct kstatfs *);
 extern int simple_open(struct inode *inode, struct file *file);
 extern int simple_link(struct dentry *, struct inode *, struct dentry *);
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index f1da8c8dd47386..287f341610864f 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -335,7 +335,7 @@ extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *);
 extern int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr);
 extern int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr);
 extern int nfs_post_op_update_inode_force_wcc_locked(struct inode *inode, struct nfs_fattr *fattr);
-extern int nfs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
+extern int nfs_getattr(const struct path *, struct kstat *, u32, unsigned int);
 extern void nfs_access_add_cache(struct inode *, struct nfs_access_entry *);
 extern void nfs_access_set_mask(struct nfs_access_entry *, u32);
 extern int nfs_permission(struct inode *, int);
diff --git a/include/linux/stat.h b/include/linux/stat.h
index 075cb0c7eb2ade..c76e524fb34b6a 100644
--- a/include/linux/stat.h
+++ b/include/linux/stat.h
@@ -18,20 +18,32 @@
 #include <linux/time.h>
 #include <linux/uidgid.h>
 
+#define KSTAT_QUERY_FLAGS (AT_STATX_SYNC_TYPE)
+
 struct kstat {
-	u64		ino;
-	dev_t		dev;
+	u32		result_mask;	/* What fields the user got */
 	umode_t		mode;
 	unsigned int	nlink;
+	uint32_t	blksize;	/* Preferred I/O size */
+	u64		attributes;
+#define KSTAT_ATTR_FS_IOC_FLAGS				\
+	(STATX_ATTR_COMPRESSED |			\
+	 STATX_ATTR_IMMUTABLE |				\
+	 STATX_ATTR_APPEND |				\
+	 STATX_ATTR_NODUMP |				\
+	 STATX_ATTR_ENCRYPTED				\
+	 )/* Attrs corresponding to FS_*_FL flags */
+	u64		ino;
+	dev_t		dev;
+	dev_t		rdev;
 	kuid_t		uid;
 	kgid_t		gid;
-	dev_t		rdev;
 	loff_t		size;
-	struct timespec  atime;
+	struct timespec	atime;
 	struct timespec	mtime;
 	struct timespec	ctime;
-	unsigned long	blksize;
-	unsigned long long	blocks;
+	struct timespec	btime;			/* File creation time */
+	u64		blocks;
 };
 
 #endif
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 91a740f6b88423..980c3c9b06f881 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -48,6 +48,7 @@ struct stat;
 struct stat64;
 struct statfs;
 struct statfs64;
+struct statx;
 struct __sysctl_args;
 struct sysinfo;
 struct timespec;
@@ -902,5 +903,7 @@ asmlinkage long sys_pkey_mprotect(unsigned long start, size_t len,
 				  unsigned long prot, int pkey);
 asmlinkage long sys_pkey_alloc(unsigned long flags, unsigned long init_val);
 asmlinkage long sys_pkey_free(int pkey);
+asmlinkage long sys_statx(int dfd, const char __user *path, unsigned flags,
+			  unsigned mask, struct statx __user *buffer);
 
 #endif
diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h
index beed138bd35938..813afd6eee713e 100644
--- a/include/uapi/linux/fcntl.h
+++ b/include/uapi/linux/fcntl.h
@@ -63,5 +63,10 @@
 #define AT_NO_AUTOMOUNT		0x800	/* Suppress terminal automount traversal */
 #define AT_EMPTY_PATH		0x1000	/* Allow empty relative pathname */
 
+#define AT_STATX_SYNC_TYPE	0x6000	/* Type of synchronisation required from statx() */
+#define AT_STATX_SYNC_AS_STAT	0x0000	/* - Do whatever stat() does */
+#define AT_STATX_FORCE_SYNC	0x2000	/* - Force the attributes to be sync'd with the server */
+#define AT_STATX_DONT_SYNC	0x4000	/* - Don't sync attributes with the server */
+
 
 #endif /* _UAPI_LINUX_FCNTL_H */
diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h
index 7fec7e36d9217d..51a6b86e370043 100644
--- a/include/uapi/linux/stat.h
+++ b/include/uapi/linux/stat.h
@@ -1,6 +1,7 @@
 #ifndef _UAPI_LINUX_STAT_H
 #define _UAPI_LINUX_STAT_H
 
+#include <linux/types.h>
 
 #if defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2)
 
@@ -41,5 +42,135 @@
 
 #endif
 
+/*
+ * Timestamp structure for the timestamps in struct statx.
+ *
+ * tv_sec holds the number of seconds before (negative) or after (positive)
+ * 00:00:00 1st January 1970 UTC.
+ *
+ * tv_nsec holds a number of nanoseconds before (0..-999,999,999 if tv_sec is
+ * negative) or after (0..999,999,999 if tv_sec is positive) the tv_sec time.
+ *
+ * Note that if both tv_sec and tv_nsec are non-zero, then the two values must
+ * either be both positive or both negative.
+ *
+ * __reserved is held in case we need a yet finer resolution.
+ */
+struct statx_timestamp {
+	__s64	tv_sec;
+	__s32	tv_nsec;
+	__s32	__reserved;
+};
+
+/*
+ * Structures for the extended file attribute retrieval system call
+ * (statx()).
+ *
+ * The caller passes a mask of what they're specifically interested in as a
+ * parameter to statx().  What statx() actually got will be indicated in
+ * st_mask upon return.
+ *
+ * For each bit in the mask argument:
+ *
+ * - if the datum is not supported:
+ *
+ *   - the bit will be cleared, and
+ *
+ *   - the datum will be set to an appropriate fabricated value if one is
+ *     available (eg. CIFS can take a default uid and gid), otherwise
+ *
+ *   - the field will be cleared;
+ *
+ * - otherwise, if explicitly requested:
+ *
+ *   - the datum will be synchronised to the server if AT_STATX_FORCE_SYNC is
+ *     set or if the datum is considered out of date, and
+ *
+ *   - the field will be filled in and the bit will be set;
+ *
+ * - otherwise, if not requested, but available in approximate form without any
+ *   effort, it will be filled in anyway, and the bit will be set upon return
+ *   (it might not be up to date, however, and no attempt will be made to
+ *   synchronise the internal state first);
+ *
+ * - otherwise the field and the bit will be cleared before returning.
+ *
+ * Items in STATX_BASIC_STATS may be marked unavailable on return, but they
+ * will have values installed for compatibility purposes so that stat() and
+ * co. can be emulated in userspace.
+ */
+struct statx {
+	/* 0x00 */
+	__u32	stx_mask;	/* What results were written [uncond] */
+	__u32	stx_blksize;	/* Preferred general I/O size [uncond] */
+	__u64	stx_attributes;	/* Flags conveying information about the file [uncond] */
+	/* 0x10 */
+	__u32	stx_nlink;	/* Number of hard links */
+	__u32	stx_uid;	/* User ID of owner */
+	__u32	stx_gid;	/* Group ID of owner */
+	__u16	stx_mode;	/* File mode */
+	__u16	__spare0[1];
+	/* 0x20 */
+	__u64	stx_ino;	/* Inode number */
+	__u64	stx_size;	/* File size */
+	__u64	stx_blocks;	/* Number of 512-byte blocks allocated */
+	__u64	__spare1[1];
+	/* 0x40 */
+	struct statx_timestamp	stx_atime;	/* Last access time */
+	struct statx_timestamp	stx_btime;	/* File creation time */
+	struct statx_timestamp	stx_ctime;	/* Last attribute change time */
+	struct statx_timestamp	stx_mtime;	/* Last data modification time */
+	/* 0x80 */
+	__u32	stx_rdev_major;	/* Device ID of special file [if bdev/cdev] */
+	__u32	stx_rdev_minor;
+	__u32	stx_dev_major;	/* ID of device containing file [uncond] */
+	__u32	stx_dev_minor;
+	/* 0x90 */
+	__u64	__spare2[14];	/* Spare space for future expansion */
+	/* 0x100 */
+};
+
+/*
+ * Flags to be stx_mask
+ *
+ * Query request/result mask for statx() and struct statx::stx_mask.
+ *
+ * These bits should be set in the mask argument of statx() to request
+ * particular items when calling statx().
+ */
+#define STATX_TYPE		0x00000001U	/* Want/got stx_mode & S_IFMT */
+#define STATX_MODE		0x00000002U	/* Want/got stx_mode & ~S_IFMT */
+#define STATX_NLINK		0x00000004U	/* Want/got stx_nlink */
+#define STATX_UID		0x00000008U	/* Want/got stx_uid */
+#define STATX_GID		0x00000010U	/* Want/got stx_gid */
+#define STATX_ATIME		0x00000020U	/* Want/got stx_atime */
+#define STATX_MTIME		0x00000040U	/* Want/got stx_mtime */
+#define STATX_CTIME		0x00000080U	/* Want/got stx_ctime */
+#define STATX_INO		0x00000100U	/* Want/got stx_ino */
+#define STATX_SIZE		0x00000200U	/* Want/got stx_size */
+#define STATX_BLOCKS		0x00000400U	/* Want/got stx_blocks */
+#define STATX_BASIC_STATS	0x000007ffU	/* The stuff in the normal stat struct */
+#define STATX_BTIME		0x00000800U	/* Want/got stx_btime */
+#define STATX_ALL		0x00000fffU	/* All currently supported flags */
+
+/*
+ * Attributes to be found in stx_attributes
+ *
+ * These give information about the features or the state of a file that might
+ * be of use to ordinary userspace programs such as GUIs or ls rather than
+ * specialised tools.
+ *
+ * Note that the flags marked [I] correspond to generic FS_IOC_FLAGS
+ * semantically.  Where possible, the numerical value is picked to correspond
+ * also.
+ */
+#define STATX_ATTR_COMPRESSED		0x00000004 /* [I] File is compressed by the fs */
+#define STATX_ATTR_IMMUTABLE		0x00000010 /* [I] File is marked immutable */
+#define STATX_ATTR_APPEND		0x00000020 /* [I] File is append-only */
+#define STATX_ATTR_NODUMP		0x00000040 /* [I] File is not to be dumped */
+#define STATX_ATTR_ENCRYPTED		0x00000800 /* [I] File requires key to decrypt in fs */
+
+#define STATX_ATTR_AUTOMOUNT		0x00001000 /* Dir: Automount trigger */
+
 
 #endif /* _UAPI_LINUX_STAT_H */
diff --git a/mm/shmem.c b/mm/shmem.c
index a26649a6633fbf..e07728f716b29b 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -958,10 +958,10 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
 }
 EXPORT_SYMBOL_GPL(shmem_truncate_range);
 
-static int shmem_getattr(struct vfsmount *mnt, struct dentry *dentry,
-			 struct kstat *stat)
+static int shmem_getattr(const struct path *path, struct kstat *stat,
+			 u32 request_mask, unsigned int query_flags)
 {
-	struct inode *inode = dentry->d_inode;
+	struct inode *inode = path->dentry->d_inode;
 	struct shmem_inode_info *info = SHMEM_I(inode);
 
 	if (info->alloced - info->swapped != inode->i_mapping->nrpages) {
diff --git a/samples/Kconfig b/samples/Kconfig
index b124f62ed6cb30..9cb63188d3ef2f 100644
--- a/samples/Kconfig
+++ b/samples/Kconfig
@@ -112,4 +112,10 @@ config SAMPLE_VFIO_MDEV_MTTY
 	  Build a virtual tty sample driver for use as a VFIO
 	  mediated device
 
+config SAMPLE_STATX
+	bool "Build example extended-stat using code"
+	depends on BROKEN
+	help
+	  Build example userspace program to use the new extended-stat syscall.
+
 endif # SAMPLES
diff --git a/samples/Makefile b/samples/Makefile
index 86a137e451d978..db54e766ddb1a3 100644
--- a/samples/Makefile
+++ b/samples/Makefile
@@ -3,4 +3,4 @@
 obj-$(CONFIG_SAMPLES)	+= kobject/ kprobes/ trace_events/ livepatch/ \
 			   hw_breakpoint/ kfifo/ kdb/ hidraw/ rpmsg/ seccomp/ \
 			   configfs/ connector/ v4l/ trace_printk/ blackfin/ \
-			   vfio-mdev/
+			   vfio-mdev/ statx/
diff --git a/samples/statx/Makefile b/samples/statx/Makefile
new file mode 100644
index 00000000000000..1f80a3d8cf45ca
--- /dev/null
+++ b/samples/statx/Makefile
@@ -0,0 +1,10 @@
+# kbuild trick to avoid linker error. Can be omitted if a module is built.
+obj- := dummy.o
+
+# List of programs to build
+hostprogs-$(CONFIG_SAMPLE_STATX) := test-statx
+
+# Tell kbuild to always build the programs
+always := $(hostprogs-y)
+
+HOSTCFLAGS_test-statx.o += -I$(objtree)/usr/include
diff --git a/samples/statx/test-statx.c b/samples/statx/test-statx.c
new file mode 100644
index 00000000000000..8571d766331dd1
--- /dev/null
+++ b/samples/statx/test-statx.c
@@ -0,0 +1,254 @@
+/* Test the statx() system call.
+ *
+ * Note that the output of this program is intended to look like the output of
+ * /bin/stat where possible.
+ *
+ * Copyright (C) 2015 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#define _GNU_SOURCE
+#define _ATFILE_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <ctype.h>
+#include <errno.h>
+#include <time.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <linux/stat.h>
+#include <linux/fcntl.h>
+#include <sys/stat.h>
+
+#define AT_STATX_SYNC_TYPE	0x6000
+#define AT_STATX_SYNC_AS_STAT	0x0000
+#define AT_STATX_FORCE_SYNC	0x2000
+#define AT_STATX_DONT_SYNC	0x4000
+
+static __attribute__((unused))
+ssize_t statx(int dfd, const char *filename, unsigned flags,
+	      unsigned int mask, struct statx *buffer)
+{
+	return syscall(__NR_statx, dfd, filename, flags, mask, buffer);
+}
+
+static void print_time(const char *field, struct statx_timestamp *ts)
+{
+	struct tm tm;
+	time_t tim;
+	char buffer[100];
+	int len;
+
+	tim = ts->tv_sec;
+	if (!localtime_r(&tim, &tm)) {
+		perror("localtime_r");
+		exit(1);
+	}
+	len = strftime(buffer, 100, "%F %T", &tm);
+	if (len == 0) {
+		perror("strftime");
+		exit(1);
+	}
+	printf("%s", field);
+	fwrite(buffer, 1, len, stdout);
+	printf(".%09u", ts->tv_nsec);
+	len = strftime(buffer, 100, "%z", &tm);
+	if (len == 0) {
+		perror("strftime2");
+		exit(1);
+	}
+	fwrite(buffer, 1, len, stdout);
+	printf("\n");
+}
+
+static void dump_statx(struct statx *stx)
+{
+	char buffer[256], ft = '?';
+
+	printf("results=%x\n", stx->stx_mask);
+
+	printf(" ");
+	if (stx->stx_mask & STATX_SIZE)
+		printf(" Size: %-15llu", (unsigned long long)stx->stx_size);
+	if (stx->stx_mask & STATX_BLOCKS)
+		printf(" Blocks: %-10llu", (unsigned long long)stx->stx_blocks);
+	printf(" IO Block: %-6llu", (unsigned long long)stx->stx_blksize);
+	if (stx->stx_mask & STATX_TYPE) {
+		switch (stx->stx_mode & S_IFMT) {
+		case S_IFIFO:	printf("  FIFO\n");			ft = 'p'; break;
+		case S_IFCHR:	printf("  character special file\n");	ft = 'c'; break;
+		case S_IFDIR:	printf("  directory\n");		ft = 'd'; break;
+		case S_IFBLK:	printf("  block special file\n");	ft = 'b'; break;
+		case S_IFREG:	printf("  regular file\n");		ft = '-'; break;
+		case S_IFLNK:	printf("  symbolic link\n");		ft = 'l'; break;
+		case S_IFSOCK:	printf("  socket\n");			ft = 's'; break;
+		default:
+			printf(" unknown type (%o)\n", stx->stx_mode & S_IFMT);
+			break;
+		}
+	} else {
+		printf(" no type\n");
+	}
+
+	sprintf(buffer, "%02x:%02x", stx->stx_dev_major, stx->stx_dev_minor);
+	printf("Device: %-15s", buffer);
+	if (stx->stx_mask & STATX_INO)
+		printf(" Inode: %-11llu", (unsigned long long) stx->stx_ino);
+	if (stx->stx_mask & STATX_NLINK)
+		printf(" Links: %-5u", stx->stx_nlink);
+	if (stx->stx_mask & STATX_TYPE) {
+		switch (stx->stx_mode & S_IFMT) {
+		case S_IFBLK:
+		case S_IFCHR:
+			printf(" Device type: %u,%u",
+			       stx->stx_rdev_major, stx->stx_rdev_minor);
+			break;
+		}
+	}
+	printf("\n");
+
+	if (stx->stx_mask & STATX_MODE)
+		printf("Access: (%04o/%c%c%c%c%c%c%c%c%c%c)  ",
+		       stx->stx_mode & 07777,
+		       ft,
+		       stx->stx_mode & S_IRUSR ? 'r' : '-',
+		       stx->stx_mode & S_IWUSR ? 'w' : '-',
+		       stx->stx_mode & S_IXUSR ? 'x' : '-',
+		       stx->stx_mode & S_IRGRP ? 'r' : '-',
+		       stx->stx_mode & S_IWGRP ? 'w' : '-',
+		       stx->stx_mode & S_IXGRP ? 'x' : '-',
+		       stx->stx_mode & S_IROTH ? 'r' : '-',
+		       stx->stx_mode & S_IWOTH ? 'w' : '-',
+		       stx->stx_mode & S_IXOTH ? 'x' : '-');
+	if (stx->stx_mask & STATX_UID)
+		printf("Uid: %5d   ", stx->stx_uid);
+	if (stx->stx_mask & STATX_GID)
+		printf("Gid: %5d\n", stx->stx_gid);
+
+	if (stx->stx_mask & STATX_ATIME)
+		print_time("Access: ", &stx->stx_atime);
+	if (stx->stx_mask & STATX_MTIME)
+		print_time("Modify: ", &stx->stx_mtime);
+	if (stx->stx_mask & STATX_CTIME)
+		print_time("Change: ", &stx->stx_ctime);
+	if (stx->stx_mask & STATX_BTIME)
+		print_time(" Birth: ", &stx->stx_btime);
+
+	if (stx->stx_attributes) {
+		unsigned char bits;
+		int loop, byte;
+
+		static char attr_representation[64 + 1] =
+			/* STATX_ATTR_ flags: */
+			"????????"	/* 63-56 */
+			"????????"	/* 55-48 */
+			"????????"	/* 47-40 */
+			"????????"	/* 39-32 */
+			"????????"	/* 31-24	0x00000000-ff000000 */
+			"????????"	/* 23-16	0x00000000-00ff0000 */
+			"???me???"	/* 15- 8	0x00000000-0000ff00 */
+			"?dai?c??"	/*  7- 0	0x00000000-000000ff */
+			;
+
+		printf("Attributes: %016llx (", stx->stx_attributes);
+		for (byte = 64 - 8; byte >= 0; byte -= 8) {
+			bits = stx->stx_attributes >> byte;
+			for (loop = 7; loop >= 0; loop--) {
+				int bit = byte + loop;
+
+				if (bits & 0x80)
+					putchar(attr_representation[63 - bit]);
+				else
+					putchar('-');
+				bits <<= 1;
+			}
+			if (byte)
+				putchar(' ');
+		}
+		printf(")\n");
+	}
+}
+
+static void dump_hex(unsigned long long *data, int from, int to)
+{
+	unsigned offset, print_offset = 1, col = 0;
+
+	from /= 8;
+	to = (to + 7) / 8;
+
+	for (offset = from; offset < to; offset++) {
+		if (print_offset) {
+			printf("%04x: ", offset * 8);
+			print_offset = 0;
+		}
+		printf("%016llx", data[offset]);
+		col++;
+		if ((col & 3) == 0) {
+			printf("\n");
+			print_offset = 1;
+		} else {
+			printf(" ");
+		}
+	}
+
+	if (!print_offset)
+		printf("\n");
+}
+
+int main(int argc, char **argv)
+{
+	struct statx stx;
+	int ret, raw = 0, atflag = AT_SYMLINK_NOFOLLOW;
+
+	unsigned int mask = STATX_ALL;
+
+	for (argv++; *argv; argv++) {
+		if (strcmp(*argv, "-F") == 0) {
+			atflag &= ~AT_STATX_SYNC_TYPE;
+			atflag |= AT_STATX_FORCE_SYNC;
+			continue;
+		}
+		if (strcmp(*argv, "-D") == 0) {
+			atflag &= ~AT_STATX_SYNC_TYPE;
+			atflag |= AT_STATX_DONT_SYNC;
+			continue;
+		}
+		if (strcmp(*argv, "-L") == 0) {
+			atflag &= ~AT_SYMLINK_NOFOLLOW;
+			continue;
+		}
+		if (strcmp(*argv, "-O") == 0) {
+			mask &= ~STATX_BASIC_STATS;
+			continue;
+		}
+		if (strcmp(*argv, "-A") == 0) {
+			atflag |= AT_NO_AUTOMOUNT;
+			continue;
+		}
+		if (strcmp(*argv, "-R") == 0) {
+			raw = 1;
+			continue;
+		}
+
+		memset(&stx, 0xbf, sizeof(stx));
+		ret = statx(AT_FDCWD, *argv, atflag, mask, &stx);
+		printf("statx(%s) = %d\n", *argv, ret);
+		if (ret < 0) {
+			perror(*argv);
+			exit(1);
+		}
+
+		if (raw)
+			dump_hex((unsigned long long *)&stx, 0, sizeof(stx));
+
+		dump_statx(&stx);
+	}
+	return 0;
+}

From c74042f3b3ca982652af99cad85252a2655c6064 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Fri, 3 Feb 2017 09:19:40 -0800
Subject: [PATCH 0698/1911] ixgbe: Limit use of 2K buffers on architectures
 with 256B or larger cache lines

On architectures that have a cache line size larger than 64 Bytes we start
running into issues where the amount of headroom for the frame starts
shrinking.

The size of skb_shared_info on a system with a 64B L1 cache line size is
320.  This increases to 384 with a 128B cache line, and 512 with a 256B
cache line.

In addition the NET_SKB_PAD value increases as well consistent with the
cache line size.  As a result when we get to a 256B cache line as seen on
the s390 we end up 768 bytes used by padding and shared info leaving us
with only 1280 bytes to use for data storage.  On architectures such as
this we should default to using 3K Rx buffers out of a 8K page instead of
trying to do 1.5K buffers out of a 4K page.

To take all of this into account I have added one small check so that we
compare the max_frame to the amount of actual data we can store.  This was
already occurring for igb, but I had overlooked it for ixgbe as it doesn't
have strict limits for 82599 once we enable jumbo frames.  By adding this
check we will automatically enable 3K Rx buffers as soon as the maximum
frame size we can handle drops below the standard Ethernet MTU.

I also went through and fixed one small typo that I found where I had left
an IGB in a variable name due to a copy/paste error.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe.h      | 2 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index 7a951b11682152..b1ecc2627a5aee 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -96,7 +96,7 @@
 #define IXGBE_MAX_FRAME_BUILD_SKB \
 	(SKB_WITH_OVERHEAD(IXGBE_RXBUFFER_2K) - IXGBE_SKB_PAD)
 #else
-#define IGB_MAX_FRAME_BUILD_SKB IXGBE_RXBUFFER_2K
+#define IXGBE_MAX_FRAME_BUILD_SKB IXGBE_RXBUFFER_2K
 #endif
 
 /*
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 67ab13fd163c16..a7a430a7be2cd9 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -3972,7 +3972,8 @@ static void ixgbe_set_rx_buffer_len(struct ixgbe_adapter *adapter)
 		if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED)
 			set_bit(__IXGBE_RX_3K_BUFFER, &rx_ring->state);
 
-		if (max_frame > (ETH_FRAME_LEN + ETH_FCS_LEN))
+		if ((max_frame > (ETH_FRAME_LEN + ETH_FCS_LEN)) ||
+		    (max_frame > IXGBE_MAX_FRAME_BUILD_SKB))
 			set_bit(__IXGBE_RX_3K_BUFFER, &rx_ring->state);
 #endif
 	}

From 284316dd42a2027afe37df34c5199eb4eabed8fd Mon Sep 17 00:00:00 2001
From: Steve French <smfrench@gmail.com>
Date: Thu, 2 Mar 2017 15:42:48 -0600
Subject: [PATCH 0699/1911] CIFS: Fix sparse warnings

Fix two minor sparse compile check warnings

Signed-off-by: Steve French <steve.french@primarydata.com>
Acked-by: Pavel Shilovsky <pshilov@microsoft.com>
Reviewed-by: Aurelien Aptel <aaptel@suse.com>
---
 fs/cifs/cifs_unicode.h | 6 +++---
 fs/cifs/cifssmb.c      | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h
index 479bc0a941f35f..3d7298cc0aeb35 100644
--- a/fs/cifs/cifs_unicode.h
+++ b/fs/cifs/cifs_unicode.h
@@ -130,10 +130,10 @@ wchar_t cifs_toupper(wchar_t in);
  * Returns:
  *     Address of the first string
  */
-static inline wchar_t *
-UniStrcat(wchar_t *ucs1, const wchar_t *ucs2)
+static inline __le16 *
+UniStrcat(__le16 *ucs1, const __le16 *ucs2)
 {
-	wchar_t *anchor = ucs1;	/* save a pointer to start of ucs1 */
+	__le16 *anchor = ucs1;	/* save a pointer to start of ucs1 */
 
 	while (*ucs1++) ;	/* To end of first string */
 	ucs1--;			/* Return to the null */
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 5005c7995b6a1a..06695067192968 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -4886,7 +4886,7 @@ CIFSGetDFSRefer(const unsigned int xid, struct cifs_ses *ses,
 				 le16_to_cpu(pSMBr->t2.DataCount),
 				 num_of_nodes, target_nodes, nls_codepage,
 				 remap, search_name,
-				 pSMBr->hdr.Flags2 & SMBFLG2_UNICODE);
+				 (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE) != 0);
 
 GetDFSRefExit:
 	cifs_buf_release(pSMB);

From ef65aaede23f75977af56a8c330bb9be8c6e125c Mon Sep 17 00:00:00 2001
From: Sachin Prabhu <sprabhu@redhat.com>
Date: Wed, 18 Jan 2017 15:35:57 +0530
Subject: [PATCH 0700/1911] smb2: Enforce sec= mount option

If the security type specified using a mount option is not supported,
the SMB2 session setup code changes the security type to RawNTLMSSP. We
should instead fail the mount and return an error.

The patch changes the code for SMB2 to make it similar to the code used
for SMB1. Like in SMB1, we now use the global security flags to select
the security method to be used when no security method is specified and
to return an error when the requested auth method is not available.

For SMB2, we also use ntlmv2 as a synonym for nltmssp.

Signed-off-by: Sachin Prabhu <sprabhu@redhat.com>
Acked-by: Pavel Shilovsky <pshilov@microsoft.com>
Acked-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/cifsglob.h  |  3 +++
 fs/cifs/cifsproto.h |  2 ++
 fs/cifs/connect.c   |  3 ++-
 fs/cifs/sess.c      |  4 ++--
 fs/cifs/smb1ops.c   |  1 +
 fs/cifs/smb2ops.c   |  4 ++++
 fs/cifs/smb2pdu.c   | 37 +++++++++++++++++++++++++++++++++----
 fs/cifs/smb2proto.h |  2 ++
 8 files changed, 49 insertions(+), 7 deletions(-)

diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index af224cda86971b..d42dd328864780 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -443,6 +443,9 @@ struct smb_version_operations {
 	int (*is_transform_hdr)(void *buf);
 	int (*receive_transform)(struct TCP_Server_Info *,
 				 struct mid_q_entry **);
+	enum securityEnum (*select_sectype)(struct TCP_Server_Info *,
+			    enum securityEnum);
+
 };
 
 struct smb_version_values {
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 9ee46c1c3ebda3..97e5d236d26559 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -533,4 +533,6 @@ int cifs_create_mf_symlink(unsigned int xid, struct cifs_tcon *tcon,
 int __cifs_calc_signature(struct smb_rqst *rqst,
 			struct TCP_Server_Info *server, char *signature,
 			struct shash_desc *shash);
+enum securityEnum cifs_select_sectype(struct TCP_Server_Info *,
+					enum securityEnum);
 #endif			/* _CIFSPROTO_H */
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 777ad9f4fc3c84..de4c56e8fb3756 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -2073,7 +2073,8 @@ match_security(struct TCP_Server_Info *server, struct smb_vol *vol)
 	 * that was specified, or "Unspecified" if that sectype was not
 	 * compatible with the given NEGOTIATE request.
 	 */
-	if (select_sectype(server, vol->sectype) == Unspecified)
+	if (server->ops->select_sectype(server, vol->sectype)
+	     == Unspecified)
 		return false;
 
 	/*
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index dcbcc927399a0f..8b0502cd39afb6 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -498,7 +498,7 @@ int build_ntlmssp_auth_blob(unsigned char **pbuffer,
 }
 
 enum securityEnum
-select_sectype(struct TCP_Server_Info *server, enum securityEnum requested)
+cifs_select_sectype(struct TCP_Server_Info *server, enum securityEnum requested)
 {
 	switch (server->negflavor) {
 	case CIFS_NEGFLAVOR_EXTENDED:
@@ -1391,7 +1391,7 @@ static int select_sec(struct cifs_ses *ses, struct sess_data *sess_data)
 {
 	int type;
 
-	type = select_sectype(ses->server, ses->sectype);
+	type = cifs_select_sectype(ses->server, ses->sectype);
 	cifs_dbg(FYI, "sess setup type %d\n", type);
 	if (type == Unspecified) {
 		cifs_dbg(VFS,
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index 67a987e4d026e0..cc93ba4da9b592 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -1087,6 +1087,7 @@ struct smb_version_operations smb1_operations = {
 	.is_read_op = cifs_is_read_op,
 	.wp_retry_size = cifs_wp_retry_size,
 	.dir_needs_close = cifs_dir_needs_close,
+	.select_sectype = cifs_select_sectype,
 #ifdef CONFIG_CIFS_XATTR
 	.query_all_EAs = CIFSSMBQAllEAs,
 	.set_EA = CIFSSMBSetEA,
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index b360c381b00ec9..0231108d9387a4 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -2381,6 +2381,7 @@ struct smb_version_operations smb20_operations = {
 	.wp_retry_size = smb2_wp_retry_size,
 	.dir_needs_close = smb2_dir_needs_close,
 	.get_dfs_refer = smb2_get_dfs_refer,
+	.select_sectype = smb2_select_sectype,
 };
 
 struct smb_version_operations smb21_operations = {
@@ -2463,6 +2464,7 @@ struct smb_version_operations smb21_operations = {
 	.dir_needs_close = smb2_dir_needs_close,
 	.enum_snapshots = smb3_enum_snapshots,
 	.get_dfs_refer = smb2_get_dfs_refer,
+	.select_sectype = smb2_select_sectype,
 };
 
 struct smb_version_operations smb30_operations = {
@@ -2555,6 +2557,7 @@ struct smb_version_operations smb30_operations = {
 	.is_transform_hdr = smb3_is_transform_hdr,
 	.receive_transform = smb3_receive_transform,
 	.get_dfs_refer = smb2_get_dfs_refer,
+	.select_sectype = smb2_select_sectype,
 };
 
 #ifdef CONFIG_CIFS_SMB311
@@ -2648,6 +2651,7 @@ struct smb_version_operations smb311_operations = {
 	.is_transform_hdr = smb3_is_transform_hdr,
 	.receive_transform = smb3_receive_transform,
 	.get_dfs_refer = smb2_get_dfs_refer,
+	.select_sectype = smb2_select_sectype,
 };
 #endif /* CIFS_SMB311 */
 
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 2069431b32e368..7446496850a3bd 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -657,6 +657,28 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon)
 	return -EIO;
 }
 
+enum securityEnum
+smb2_select_sectype(struct TCP_Server_Info *server, enum securityEnum requested)
+{
+	switch (requested) {
+	case Kerberos:
+	case RawNTLMSSP:
+		return requested;
+	case NTLMv2:
+		return RawNTLMSSP;
+	case Unspecified:
+		if (server->sec_ntlmssp &&
+			(global_secflags & CIFSSEC_MAY_NTLMSSP))
+			return RawNTLMSSP;
+		if ((server->sec_kerberos || server->sec_mskerberos) &&
+			(global_secflags & CIFSSEC_MAY_KRB5))
+			return Kerberos;
+		/* Fallthrough */
+	default:
+		return Unspecified;
+	}
+}
+
 struct SMB2_sess_data {
 	unsigned int xid;
 	struct cifs_ses *ses;
@@ -1009,10 +1031,17 @@ SMB2_sess_auth_rawntlmssp_authenticate(struct SMB2_sess_data *sess_data)
 static int
 SMB2_select_sec(struct cifs_ses *ses, struct SMB2_sess_data *sess_data)
 {
-	if (ses->sectype != Kerberos && ses->sectype != RawNTLMSSP)
-		ses->sectype = RawNTLMSSP;
+	int type;
+
+	type = smb2_select_sectype(ses->server, ses->sectype);
+	cifs_dbg(FYI, "sess setup type %d\n", type);
+	if (type == Unspecified) {
+		cifs_dbg(VFS,
+			"Unable to select appropriate authentication method!");
+		return -EINVAL;
+	}
 
-	switch (ses->sectype) {
+	switch (type) {
 	case Kerberos:
 		sess_data->func = SMB2_auth_kerberos;
 		break;
@@ -1020,7 +1049,7 @@ SMB2_select_sec(struct cifs_ses *ses, struct SMB2_sess_data *sess_data)
 		sess_data->func = SMB2_sess_auth_rawntlmssp_negotiate;
 		break;
 	default:
-		cifs_dbg(VFS, "secType %d not supported!\n", ses->sectype);
+		cifs_dbg(VFS, "secType %d not supported!\n", type);
 		return -EOPNOTSUPP;
 	}
 
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index 11d9f3013db85b..69e35873b1de73 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -181,4 +181,6 @@ extern int SMB2_lease_break(const unsigned int xid, struct cifs_tcon *tcon,
 			    __u8 *lease_key, const __le32 lease_state);
 extern int smb3_validate_negotiate(const unsigned int, struct cifs_tcon *);
 
+extern enum securityEnum smb2_select_sectype(struct TCP_Server_Info *,
+					enum securityEnum);
 #endif			/* _SMB2PROTO_H */

From 0265634eb9e04a16ae99941c320718c38eb865e0 Mon Sep 17 00:00:00 2001
From: Sean Young <sean@mess.org>
Date: Sat, 25 Feb 2017 08:28:16 -0300
Subject: [PATCH 0701/1911] [media] serial_ir: ensure we're ready to receive
 interrupts

When the interrupt requested with devm_request_irq(), serial_ir.rcdev
is still null so will cause null deference if the irq handler is called
early on.

Also ensure that timeout_timer is setup.

Link: http://lkml.kernel.org/r/CA+55aFxsh2uF8gi5sN_guY3Z+tiLv7LpJYKBw+y8vqLzp+TsnQ@mail.gmail.com

[mchehab@s-opensource.com: moved serial_ir_probe() back to its original place]

Cc: <stable@vger.kernel.org> # 4.10
Signed-off-by: Sean Young <sean@mess.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
---
 drivers/media/rc/serial_ir.c | 123 ++++++++++++++++++-----------------
 1 file changed, 62 insertions(+), 61 deletions(-)

diff --git a/drivers/media/rc/serial_ir.c b/drivers/media/rc/serial_ir.c
index 923fb2299553cb..41b54e40176c23 100644
--- a/drivers/media/rc/serial_ir.c
+++ b/drivers/media/rc/serial_ir.c
@@ -487,10 +487,69 @@ static void serial_ir_timeout(unsigned long arg)
 	ir_raw_event_handle(serial_ir.rcdev);
 }
 
+/* Needed by serial_ir_probe() */
+static int serial_ir_tx(struct rc_dev *dev, unsigned int *txbuf,
+			unsigned int count);
+static int serial_ir_tx_duty_cycle(struct rc_dev *dev, u32 cycle);
+static int serial_ir_tx_carrier(struct rc_dev *dev, u32 carrier);
+static int serial_ir_open(struct rc_dev *rcdev);
+static void serial_ir_close(struct rc_dev *rcdev);
+
 static int serial_ir_probe(struct platform_device *dev)
 {
+	struct rc_dev *rcdev;
 	int i, nlow, nhigh, result;
 
+	rcdev = devm_rc_allocate_device(&dev->dev, RC_DRIVER_IR_RAW);
+	if (!rcdev)
+		return -ENOMEM;
+
+	if (hardware[type].send_pulse && hardware[type].send_space)
+		rcdev->tx_ir = serial_ir_tx;
+	if (hardware[type].set_send_carrier)
+		rcdev->s_tx_carrier = serial_ir_tx_carrier;
+	if (hardware[type].set_duty_cycle)
+		rcdev->s_tx_duty_cycle = serial_ir_tx_duty_cycle;
+
+	switch (type) {
+	case IR_HOMEBREW:
+		rcdev->input_name = "Serial IR type home-brew";
+		break;
+	case IR_IRDEO:
+		rcdev->input_name = "Serial IR type IRdeo";
+		break;
+	case IR_IRDEO_REMOTE:
+		rcdev->input_name = "Serial IR type IRdeo remote";
+		break;
+	case IR_ANIMAX:
+		rcdev->input_name = "Serial IR type AnimaX";
+		break;
+	case IR_IGOR:
+		rcdev->input_name = "Serial IR type IgorPlug";
+		break;
+	}
+
+	rcdev->input_phys = KBUILD_MODNAME "/input0";
+	rcdev->input_id.bustype = BUS_HOST;
+	rcdev->input_id.vendor = 0x0001;
+	rcdev->input_id.product = 0x0001;
+	rcdev->input_id.version = 0x0100;
+	rcdev->open = serial_ir_open;
+	rcdev->close = serial_ir_close;
+	rcdev->dev.parent = &serial_ir.pdev->dev;
+	rcdev->allowed_protocols = RC_BIT_ALL_IR_DECODER;
+	rcdev->driver_name = KBUILD_MODNAME;
+	rcdev->map_name = RC_MAP_RC6_MCE;
+	rcdev->min_timeout = 1;
+	rcdev->timeout = IR_DEFAULT_TIMEOUT;
+	rcdev->max_timeout = 10 * IR_DEFAULT_TIMEOUT;
+	rcdev->rx_resolution = 250000;
+
+	serial_ir.rcdev = rcdev;
+
+	setup_timer(&serial_ir.timeout_timer, serial_ir_timeout,
+		    (unsigned long)&serial_ir);
+
 	result = devm_request_irq(&dev->dev, irq, serial_ir_irq_handler,
 				  share_irq ? IRQF_SHARED : 0,
 				  KBUILD_MODNAME, &hardware);
@@ -516,9 +575,6 @@ static int serial_ir_probe(struct platform_device *dev)
 		return -EBUSY;
 	}
 
-	setup_timer(&serial_ir.timeout_timer, serial_ir_timeout,
-		    (unsigned long)&serial_ir);
-
 	result = hardware_init_port();
 	if (result < 0)
 		return result;
@@ -552,7 +608,8 @@ static int serial_ir_probe(struct platform_device *dev)
 			 sense ? "low" : "high");
 
 	dev_dbg(&dev->dev, "Interrupt %d, port %04x obtained\n", irq, io);
-	return 0;
+
+	return devm_rc_register_device(&dev->dev, rcdev);
 }
 
 static int serial_ir_open(struct rc_dev *rcdev)
@@ -723,7 +780,6 @@ static void serial_ir_exit(void)
 
 static int __init serial_ir_init_module(void)
 {
-	struct rc_dev *rcdev;
 	int result;
 
 	switch (type) {
@@ -754,63 +810,9 @@ static int __init serial_ir_init_module(void)
 		sense = !!sense;
 
 	result = serial_ir_init();
-	if (result)
-		return result;
-
-	rcdev = devm_rc_allocate_device(&serial_ir.pdev->dev, RC_DRIVER_IR_RAW);
-	if (!rcdev) {
-		result = -ENOMEM;
-		goto serial_cleanup;
-	}
-
-	if (hardware[type].send_pulse && hardware[type].send_space)
-		rcdev->tx_ir = serial_ir_tx;
-	if (hardware[type].set_send_carrier)
-		rcdev->s_tx_carrier = serial_ir_tx_carrier;
-	if (hardware[type].set_duty_cycle)
-		rcdev->s_tx_duty_cycle = serial_ir_tx_duty_cycle;
-
-	switch (type) {
-	case IR_HOMEBREW:
-		rcdev->input_name = "Serial IR type home-brew";
-		break;
-	case IR_IRDEO:
-		rcdev->input_name = "Serial IR type IRdeo";
-		break;
-	case IR_IRDEO_REMOTE:
-		rcdev->input_name = "Serial IR type IRdeo remote";
-		break;
-	case IR_ANIMAX:
-		rcdev->input_name = "Serial IR type AnimaX";
-		break;
-	case IR_IGOR:
-		rcdev->input_name = "Serial IR type IgorPlug";
-		break;
-	}
-
-	rcdev->input_phys = KBUILD_MODNAME "/input0";
-	rcdev->input_id.bustype = BUS_HOST;
-	rcdev->input_id.vendor = 0x0001;
-	rcdev->input_id.product = 0x0001;
-	rcdev->input_id.version = 0x0100;
-	rcdev->open = serial_ir_open;
-	rcdev->close = serial_ir_close;
-	rcdev->dev.parent = &serial_ir.pdev->dev;
-	rcdev->allowed_protocols = RC_BIT_ALL_IR_DECODER;
-	rcdev->driver_name = KBUILD_MODNAME;
-	rcdev->map_name = RC_MAP_RC6_MCE;
-	rcdev->min_timeout = 1;
-	rcdev->timeout = IR_DEFAULT_TIMEOUT;
-	rcdev->max_timeout = 10 * IR_DEFAULT_TIMEOUT;
-	rcdev->rx_resolution = 250000;
-
-	serial_ir.rcdev = rcdev;
-
-	result = rc_register_device(rcdev);
-
 	if (!result)
 		return 0;
-serial_cleanup:
+
 	serial_ir_exit();
 	return result;
 }
@@ -818,7 +820,6 @@ static int __init serial_ir_init_module(void)
 static void __exit serial_ir_exit_module(void)
 {
 	del_timer_sync(&serial_ir.timeout_timer);
-	rc_unregister_device(serial_ir.rcdev);
 	serial_ir_exit();
 }
 

From 2e1e4949f9dfb053122785cd73540bb1e61f768b Mon Sep 17 00:00:00 2001
From: Qi Hou <qi.hou@windriver.com>
Date: Fri, 3 Mar 2017 15:57:11 +0800
Subject: [PATCH 0702/1911] i2c: add missing of_node_put in
 i2c_mux_del_adapters

Refcount of of_node is increased with of_node_get() in i2c_mux_add_adapter().
It must be decreased with of_node_put() in i2c_mux_del_adapters().

Cc: stable@vger.kernel.org
Signed-off-by: Qi Hou <qi.hou@windriver.com>
Reviewed-by: Zhang Xiao <xiao.zhang@windriver.com>
Signed-off-by: Peter Rosin <peda@axentia.se>
---
 drivers/i2c/i2c-mux.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/i2c/i2c-mux.c b/drivers/i2c/i2c-mux.c
index 83768e85a919cb..2178266bca7948 100644
--- a/drivers/i2c/i2c-mux.c
+++ b/drivers/i2c/i2c-mux.c
@@ -429,6 +429,7 @@ void i2c_mux_del_adapters(struct i2c_mux_core *muxc)
 	while (muxc->num_adapters) {
 		struct i2c_adapter *adap = muxc->adapter[--muxc->num_adapters];
 		struct i2c_mux_priv *priv = adap->algo_data;
+		struct device_node *np = adap->dev.of_node;
 
 		muxc->adapter[muxc->num_adapters] = NULL;
 
@@ -438,6 +439,7 @@ void i2c_mux_del_adapters(struct i2c_mux_core *muxc)
 
 		sysfs_remove_link(&priv->adap.dev.kobj, "mux_device");
 		i2c_del_adapter(adap);
+		of_node_put(np);
 		kfree(priv);
 	}
 }

From db5b15b74ed9a5c04bb808d18ffa2c773f5c18c0 Mon Sep 17 00:00:00 2001
From: Sean Young <sean@mess.org>
Date: Mon, 13 Feb 2017 10:35:44 -0200
Subject: [PATCH 0703/1911] [media] lirc: fix dead lock between open and
 wakeup_filter

The locking in lirc needs improvement, but for now just fix this potential
deadlock.

======================================================
[ INFO: possible circular locking dependency detected ]
4.10.0-rc1+ #1 Not tainted
-------------------------------------------------------
bash/2502 is trying to acquire lock:
 (ir_raw_handler_lock){+.+.+.}, at: [<ffffffffc06f6a5e>] ir_raw_encode_scancode+0x3e/0xb0 [rc_core]

               but task is already holding lock:
 (&dev->lock){+.+.+.}, at: [<ffffffffc06f511f>] store_filter+0x9f/0x240 [rc_core]

               which lock already depends on the new lock.

               the existing dependency chain (in reverse order) is:

               -> #2 (&dev->lock){+.+.+.}:

[<ffffffffa110adad>] lock_acquire+0xfd/0x200
[<ffffffffa1921327>] mutex_lock_nested+0x77/0x6d0
[<ffffffffc06f436a>] rc_open+0x2a/0x80 [rc_core]
[<ffffffffc07114ca>] lirc_dev_fop_open+0xda/0x1e0 [lirc_dev]
[<ffffffffa12975e0>] chrdev_open+0xb0/0x210
[<ffffffffa128eb5a>] do_dentry_open+0x20a/0x2f0
[<ffffffffa128ffcc>] vfs_open+0x4c/0x80
[<ffffffffa12a35ec>] path_openat+0x5bc/0xc00
[<ffffffffa12a5271>] do_filp_open+0x91/0x100
[<ffffffffa12903f0>] do_sys_open+0x130/0x220
[<ffffffffa12904fe>] SyS_open+0x1e/0x20
[<ffffffffa19278c1>] entry_SYSCALL_64_fastpath+0x1f/0xc2
               -> #1 (lirc_dev_lock){+.+.+.}:
[<ffffffffa110adad>] lock_acquire+0xfd/0x200
[<ffffffffa1921327>] mutex_lock_nested+0x77/0x6d0
[<ffffffffc0711f47>] lirc_register_driver+0x67/0x59b [lirc_dev]
[<ffffffffc06db7f4>] ir_lirc_register+0x1f4/0x260 [ir_lirc_codec]
[<ffffffffc06f6cac>] ir_raw_handler_register+0x7c/0xb0 [rc_core]
[<ffffffffc0398010>] 0xffffffffc0398010
[<ffffffffa1002192>] do_one_initcall+0x52/0x1b0
[<ffffffffa11ef5c8>] do_init_module+0x5f/0x1fa
[<ffffffffa11566b5>] load_module+0x2675/0x2b00
[<ffffffffa1156dcf>] SYSC_finit_module+0xdf/0x110
[<ffffffffa1156e1e>] SyS_finit_module+0xe/0x10
[<ffffffffa1003f5c>] do_syscall_64+0x6c/0x1f0
[<ffffffffa1927989>] return_from_SYSCALL_64+0x0/0x7a
               -> #0 (ir_raw_handler_lock){+.+.+.}:
[<ffffffffa110a7b7>] __lock_acquire+0x10f7/0x1290
[<ffffffffa110adad>] lock_acquire+0xfd/0x200
[<ffffffffa1921327>] mutex_lock_nested+0x77/0x6d0
[<ffffffffc06f6a5e>] ir_raw_encode_scancode+0x3e/0xb0 [rc_core]
[<ffffffffc0b0f492>] loop_set_wakeup_filter+0x62/0xbd [rc_loopback]
[<ffffffffc06f522a>] store_filter+0x1aa/0x240 [rc_core]
[<ffffffffa15e46f8>] dev_attr_store+0x18/0x30
[<ffffffffa13318e5>] sysfs_kf_write+0x45/0x60
[<ffffffffa1330b55>] kernfs_fop_write+0x155/0x1e0
[<ffffffffa1290797>] __vfs_write+0x37/0x160
[<ffffffffa12921f8>] vfs_write+0xc8/0x1e0
[<ffffffffa12936e8>] SyS_write+0x58/0xc0
[<ffffffffa19278c1>] entry_SYSCALL_64_fastpath+0x1f/0xc2

               other info that might help us debug this:

Chain exists of:
                 ir_raw_handler_lock --> lirc_dev_lock --> &dev->lock

 Possible unsafe locking scenario:

       CPU0                    CPU1
       ----                    ----
  lock(&dev->lock);
                               lock(lirc_dev_lock);
                               lock(&dev->lock);
  lock(ir_raw_handler_lock);

                *** DEADLOCK ***

4 locks held by bash/2502:
 #0:  (sb_writers#4){.+.+.+}, at: [<ffffffffa12922c5>] vfs_write+0x195/0x1e0
 #1:  (&of->mutex){+.+.+.}, at: [<ffffffffa1330b1f>] kernfs_fop_write+0x11f/0x1e0
 #2:  (s_active#215){.+.+.+}, at: [<ffffffffa1330b28>] kernfs_fop_write+0x128/0x1e0
 #3:  (&dev->lock){+.+.+.}, at: [<ffffffffc06f511f>] store_filter+0x9f/0x240 [rc_core]

               stack backtrace:
CPU: 3 PID: 2502 Comm: bash Not tainted 4.10.0-rc1+ #1
Hardware name:                  /DG45ID, BIOS IDG4510H.86A.0135.2011.0225.1100 02/25/2011
Call Trace:
 dump_stack+0x86/0xc3
 print_circular_bug+0x1be/0x210
 __lock_acquire+0x10f7/0x1290
 lock_acquire+0xfd/0x200
 ? ir_raw_encode_scancode+0x3e/0xb0 [rc_core]
 ? ir_raw_encode_scancode+0x3e/0xb0 [rc_core]
 mutex_lock_nested+0x77/0x6d0
 ? ir_raw_encode_scancode+0x3e/0xb0 [rc_core]
 ? loop_set_wakeup_filter+0x44/0xbd [rc_loopback]
 ir_raw_encode_scancode+0x3e/0xb0 [rc_core]
 loop_set_wakeup_filter+0x62/0xbd [rc_loopback]
 ? loop_set_tx_duty_cycle+0x70/0x70 [rc_loopback]
 store_filter+0x1aa/0x240 [rc_core]
 dev_attr_store+0x18/0x30
 sysfs_kf_write+0x45/0x60
 kernfs_fop_write+0x155/0x1e0
 __vfs_write+0x37/0x160
 ? rcu_read_lock_sched_held+0x4a/0x80
 ? rcu_sync_lockdep_assert+0x2f/0x60
 ? __sb_start_write+0x10c/0x220
 ? vfs_write+0x195/0x1e0
 ? security_file_permission+0x3b/0xc0
 vfs_write+0xc8/0x1e0
 SyS_write+0x58/0xc0
 entry_SYSCALL_64_fastpath+0x1f/0xc2

Signed-off-by: Sean Young <sean@mess.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
---
 drivers/media/rc/lirc_dev.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/media/rc/lirc_dev.c b/drivers/media/rc/lirc_dev.c
index a54ca531d8ef85..4630f3e6753889 100644
--- a/drivers/media/rc/lirc_dev.c
+++ b/drivers/media/rc/lirc_dev.c
@@ -436,6 +436,8 @@ int lirc_dev_fop_open(struct inode *inode, struct file *file)
 		return -ERESTARTSYS;
 
 	ir = irctls[iminor(inode)];
+	mutex_unlock(&lirc_dev_lock);
+
 	if (!ir) {
 		retval = -ENODEV;
 		goto error;
@@ -476,8 +478,6 @@ int lirc_dev_fop_open(struct inode *inode, struct file *file)
 	}
 
 error:
-	mutex_unlock(&lirc_dev_lock);
-
 	nonseekable_open(inode, file);
 
 	return retval;

From c1305a40722f3e02f3d971acc22f5991aff239a6 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Sun, 12 Feb 2017 15:01:22 -0200
Subject: [PATCH 0704/1911] [media] rc: nuvoton: fix deadlock in
 nvt_write_wakeup_codes

nvt_write_wakeup_codes acquires the same lock as the ISR but doesn't
disable interrupts on the local CPU. This caused the following
deadlock. Fix this by using spin_lock_irqsave.

[  432.362008] ================================
[  432.362074] WARNING: inconsistent lock state
[  432.362144] 4.10.0-rc7-next-20170210 #1 Not tainted
[  432.362219] --------------------------------
[  432.362286] inconsistent {HARDIRQ-ON-W} -> {IN-HARDIRQ-W} usage.
[  432.362379] swapper/0/0 [HC1[1]:SC0[0]:HE0:SE1] takes:
[  432.362457]  (&(&nvt->lock)->rlock){?.+...}, at: [<ffffffffa016b17d>] nvt_cir_isr+0x2d/0x520 [nuvoton_cir]
[  432.362611] {HARDIRQ-ON-W} state was registered at:
[  432.362686]
[  432.362698] [<ffffffff810adb7c>] __lock_acquire+0x5dc/0x1260
[  432.362812]
[  432.362817] [<ffffffff810aec29>] lock_acquire+0xe9/0x1d0
[  432.362927]
[  432.362934] [<ffffffff81609f63>] _raw_spin_lock+0x33/0x50
[  432.363045]
[  432.363051] [<ffffffffa016b822>] nvt_write_wakeup_codes.isra.12+0x22/0xe0 [nuvoton_cir]
[  432.363193]
[  432.363199] [<ffffffffa016b9bf>] wakeup_data_store+0xdf/0xf0 [nuvoton_cir]
[  432.363327]
[  432.363333] [<ffffffff81484223>] dev_attr_store+0x13/0x20
[  432.363441]
[  432.363449] [<ffffffff81232450>] sysfs_kf_write+0x40/0x50
[  432.363558]
[  432.363564] [<ffffffff81231640>] kernfs_fop_write+0x150/0x1e0
[  432.363676]
[  432.363685] [<ffffffff811b36a3>] __vfs_write+0x23/0x120
[  432.363791]
[  432.363798] [<ffffffff811b4d53>] vfs_write+0xc3/0x1e0
[  432.363902]
[  432.363909] [<ffffffff811b6124>] SyS_write+0x44/0xa0
[  432.364012]
[  432.364021] [<ffffffff81002c47>] do_syscall_64+0x57/0x140
[  432.364129]
[  432.364135] [<ffffffff8160a9e4>] return_from_SYSCALL_64+0x0/0x7a
[  432.364252] irq event stamp: 415118
[  432.364313] hardirqs last  enabled at (415115): [<ffffffff814fd2eb>] cpuidle_enter_state+0x11b/0x370
[  432.364445] hardirqs last disabled at (415116): [<ffffffff8160b2cb>] common_interrupt+0x8b/0x90
[  432.364573] softirqs last  enabled at (415118): [<ffffffff8106157c>] _local_bh_enable+0x1c/0x50
[  432.364699] softirqs last disabled at (415117): [<ffffffff810629a3>] irq_enter+0x43/0x60
[  432.364814]
               other info that might help us debug this:
[  432.364909]  Possible unsafe locking scenario:

[  432.367821]        CPU0
[  432.370645]        ----
[  432.373432]   lock(&(&nvt->lock)->rlock);
[  432.376228]   <Interrupt>
[  432.378982]     lock(&(&nvt->lock)->rlock);
[  432.381757]
                *** DEADLOCK ***

[  432.389888] no locks held by swapper/0/0.
[  432.392574]
               stack backtrace:
[  432.397774] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.10.0-rc7-next-20170210 #1
[  432.400375] Hardware name: ZOTAC ZBOX-CI321NANO/ZBOX-CI321NANO, BIOS B246P105 06/01/2015
[  432.403023] Call Trace:
[  432.405636]  <IRQ>
[  432.408208]  dump_stack+0x68/0x93
[  432.410775]  print_usage_bug+0x1dd/0x1f0
[  432.413334]  mark_lock+0x559/0x5c0
[  432.415871]  ? print_shortest_lock_dependencies+0x1a0/0x1a0
[  432.418431]  __lock_acquire+0x6b1/0x1260
[  432.420941]  lock_acquire+0xe9/0x1d0
[  432.423396]  ? nvt_cir_isr+0x2d/0x520 [nuvoton_cir]
[  432.425844]  _raw_spin_lock+0x33/0x50
[  432.428252]  ? nvt_cir_isr+0x2d/0x520 [nuvoton_cir]
[  432.430670]  nvt_cir_isr+0x2d/0x520 [nuvoton_cir]
[  432.433085]  __handle_irq_event_percpu+0x43/0x330
[  432.435493]  handle_irq_event_percpu+0x1e/0x50
[  432.437884]  handle_irq_event+0x34/0x60
[  432.440236]  handle_edge_irq+0x6a/0x150
[  432.442561]  handle_irq+0x15/0x20
[  432.444854]  do_IRQ+0x57/0x110
[  432.447115]  common_interrupt+0x90/0x90
[  432.449380] RIP: 0010:cpuidle_enter_state+0x120/0x370
[  432.451653] RSP: 0018:ffffffff81c03dd8 EFLAGS: 00000206 ORIG_RAX: ffffffffffffffcc
[  432.453994] RAX: ffffffff81c14500 RBX: 0000000000000008 RCX: 00000064aac6f2d2
[  432.456349] RDX: 0000000000000000 RSI: 0000000000000001 RDI: ffffffff81c14500
[  432.458704] RBP: ffffffff81c03e18 R08: cccccccccccccccd R09: 0000000000000018
[  432.461072] R10: 0000000000000000 R11: 0000000000000000 R12: ffff880100a21260
[  432.463450] R13: ffffffff81c7e6f8 R14: 0000000000000008 R15: ffffffff81c7e6e0
[  432.465819]  </IRQ>
[  432.468104]  ? cpuidle_enter_state+0x11b/0x370
[  432.470413]  cpuidle_enter+0x12/0x20
[  432.472698]  call_cpuidle+0x1e/0x40
[  432.474967]  do_idle+0xe3/0x1c0
[  432.477172]  cpu_startup_entry+0x18/0x20
[  432.479376]  rest_init+0x130/0x140
[  432.481565]  start_kernel+0x3cc/0x3d9
[  432.483750]  x86_64_start_reservations+0x2a/0x2c
[  432.485980]  x86_64_start_kernel+0x178/0x18b
[  432.488222]  start_cpu+0x14/0x14
[  432.490453]  ? start_cpu+0x14/0x14

Fixes: 97c129747af5 "[media] rc: nuvoton-cir: Add support wakeup via sysfs filter callback"

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: Sean Young <sean@mess.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
---
 drivers/media/rc/nuvoton-cir.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/media/rc/nuvoton-cir.c b/drivers/media/rc/nuvoton-cir.c
index b109f8246b968d..ec4b25bd2ec299 100644
--- a/drivers/media/rc/nuvoton-cir.c
+++ b/drivers/media/rc/nuvoton-cir.c
@@ -176,12 +176,13 @@ static void nvt_write_wakeup_codes(struct rc_dev *dev,
 {
 	u8 tolerance, config;
 	struct nvt_dev *nvt = dev->priv;
+	unsigned long flags;
 	int i;
 
 	/* hardcode the tolerance to 10% */
 	tolerance = DIV_ROUND_UP(count, 10);
 
-	spin_lock(&nvt->lock);
+	spin_lock_irqsave(&nvt->lock, flags);
 
 	nvt_clear_cir_wake_fifo(nvt);
 	nvt_cir_wake_reg_write(nvt, count, CIR_WAKE_FIFO_CMP_DEEP);
@@ -203,7 +204,7 @@ static void nvt_write_wakeup_codes(struct rc_dev *dev,
 
 	nvt_cir_wake_reg_write(nvt, config, CIR_WAKE_IRCON);
 
-	spin_unlock(&nvt->lock);
+	spin_unlock_irqrestore(&nvt->lock, flags);
 }
 
 static ssize_t wakeup_data_show(struct device *dev,

From 413808685dd7c9b54bbc5af79da2eaddd0fc3cb2 Mon Sep 17 00:00:00 2001
From: Sean Young <sean@mess.org>
Date: Wed, 22 Feb 2017 18:48:01 -0300
Subject: [PATCH 0705/1911] [media] rc: raw decoder for keymap protocol is not
 loaded on register

When the protocol is set via the sysfs protocols attribute, the
decoder is loaded. However, when it is not when a device is first
plugged in or registered.

Fixes: acc1c3c ("[media] media: rc: load decoder modules on-demand")

Signed-off-by: Sean Young <sean@mess.org>
Cc: <stable@vger.kernel.org> # v4.5+
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
---
 drivers/media/rc/rc-main.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/media/rc/rc-main.c b/drivers/media/rc/rc-main.c
index 2424946740e64f..a158b321b40a93 100644
--- a/drivers/media/rc/rc-main.c
+++ b/drivers/media/rc/rc-main.c
@@ -1663,6 +1663,7 @@ static int rc_setup_rx_device(struct rc_dev *dev)
 {
 	int rc;
 	struct rc_map *rc_map;
+	u64 rc_type;
 
 	if (!dev->map_name)
 		return -EINVAL;
@@ -1677,15 +1678,18 @@ static int rc_setup_rx_device(struct rc_dev *dev)
 	if (rc)
 		return rc;
 
-	if (dev->change_protocol) {
-		u64 rc_type = (1ll << rc_map->rc_type);
+	rc_type = BIT_ULL(rc_map->rc_type);
 
+	if (dev->change_protocol) {
 		rc = dev->change_protocol(dev, &rc_type);
 		if (rc < 0)
 			goto out_table;
 		dev->enabled_protocols = rc_type;
 	}
 
+	if (dev->driver_type == RC_DRIVER_IR_RAW)
+		ir_raw_load_modules(&rc_type);
+
 	set_bit(EV_KEY, dev->input_dev->evbit);
 	set_bit(EV_REP, dev->input_dev->evbit);
 	set_bit(EV_MSC, dev->input_dev->evbit);

From 5df62771c556e4ec9d7ecea1f070ff6da4bce151 Mon Sep 17 00:00:00 2001
From: Sean Young <sean@mess.org>
Date: Thu, 23 Feb 2017 06:11:21 -0300
Subject: [PATCH 0706/1911] [media] rc: protocol is not set on register for raw
 IR devices

ir_raw_event_register() sets up change_protocol(), and without that set,
rc_setup_rx_device() does not set the protocol for the device on register.

The standard udev rules run ir-keytable, which writes to the protocols
file again, which hides this problem.

Fixes: 7ff2c2b ("[media] rc-main: split setup and unregister functions")

Signed-off-by: Sean Young <sean@mess.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
---
 drivers/media/rc/rc-main.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/media/rc/rc-main.c b/drivers/media/rc/rc-main.c
index a158b321b40a93..d84533699668d2 100644
--- a/drivers/media/rc/rc-main.c
+++ b/drivers/media/rc/rc-main.c
@@ -1781,12 +1781,6 @@ int rc_register_device(struct rc_dev *dev)
 		dev->input_name ?: "Unspecified device", path ?: "N/A");
 	kfree(path);
 
-	if (dev->driver_type != RC_DRIVER_IR_RAW_TX) {
-		rc = rc_setup_rx_device(dev);
-		if (rc)
-			goto out_dev;
-	}
-
 	if (dev->driver_type == RC_DRIVER_IR_RAW ||
 	    dev->driver_type == RC_DRIVER_IR_RAW_TX) {
 		if (!raw_init) {
@@ -1795,7 +1789,13 @@ int rc_register_device(struct rc_dev *dev)
 		}
 		rc = ir_raw_event_register(dev);
 		if (rc < 0)
-			goto out_rx;
+			goto out_dev;
+	}
+
+	if (dev->driver_type != RC_DRIVER_IR_RAW_TX) {
+		rc = rc_setup_rx_device(dev);
+		if (rc)
+			goto out_raw;
 	}
 
 	/* Allow the RC sysfs nodes to be accessible */
@@ -1807,8 +1807,8 @@ int rc_register_device(struct rc_dev *dev)
 
 	return 0;
 
-out_rx:
-	rc_free_rx_device(dev);
+out_raw:
+	ir_raw_event_unregister(dev);
 out_dev:
 	device_del(&dev->dev);
 out_unlock:

From c919a3069c775c1c876bec55e00b2305d5125caa Mon Sep 17 00:00:00 2001
From: Ethan Zonca <e@ethanzonca.com>
Date: Fri, 24 Feb 2017 11:27:36 -0500
Subject: [PATCH 0707/1911] can: gs_usb: Don't use stack memory for USB
 transfers

Fixes: 05ca5270005c can: gs_usb: add ethtool set_phys_id callback to locate physical device

The gs_usb driver is performing USB transfers using buffers allocated on
the stack. This causes the driver to not function with vmapped stacks.
Instead, allocate memory for the transfer buffers.

Signed-off-by: Ethan Zonca <e@ethanzonca.com>
Cc: linux-stable <stable@vger.kernel.org> # >= v4.8
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/usb/gs_usb.c | 40 ++++++++++++++++++++++++++----------
 1 file changed, 29 insertions(+), 11 deletions(-)

diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c
index 77e3cc06a30c8c..a0dabd4038ba35 100644
--- a/drivers/net/can/usb/gs_usb.c
+++ b/drivers/net/can/usb/gs_usb.c
@@ -908,10 +908,14 @@ static int gs_usb_probe(struct usb_interface *intf,
 	struct gs_usb *dev;
 	int rc = -ENOMEM;
 	unsigned int icount, i;
-	struct gs_host_config hconf = {
-		.byte_order = 0x0000beef,
-	};
-	struct gs_device_config dconf;
+	struct gs_host_config *hconf;
+	struct gs_device_config *dconf;
+
+	hconf = kmalloc(sizeof(*hconf), GFP_KERNEL);
+	if (!hconf)
+		return -ENOMEM;
+
+	hconf->byte_order = 0x0000beef;
 
 	/* send host config */
 	rc = usb_control_msg(interface_to_usbdev(intf),
@@ -920,16 +924,22 @@ static int gs_usb_probe(struct usb_interface *intf,
 			     USB_DIR_OUT|USB_TYPE_VENDOR|USB_RECIP_INTERFACE,
 			     1,
 			     intf->altsetting[0].desc.bInterfaceNumber,
-			     &hconf,
-			     sizeof(hconf),
+			     hconf,
+			     sizeof(*hconf),
 			     1000);
 
+	kfree(hconf);
+
 	if (rc < 0) {
 		dev_err(&intf->dev, "Couldn't send data format (err=%d)\n",
 			rc);
 		return rc;
 	}
 
+	dconf = kmalloc(sizeof(*dconf), GFP_KERNEL);
+	if (!dconf)
+		return -ENOMEM;
+
 	/* read device config */
 	rc = usb_control_msg(interface_to_usbdev(intf),
 			     usb_rcvctrlpipe(interface_to_usbdev(intf), 0),
@@ -937,28 +947,33 @@ static int gs_usb_probe(struct usb_interface *intf,
 			     USB_DIR_IN|USB_TYPE_VENDOR|USB_RECIP_INTERFACE,
 			     1,
 			     intf->altsetting[0].desc.bInterfaceNumber,
-			     &dconf,
-			     sizeof(dconf),
+			     dconf,
+			     sizeof(*dconf),
 			     1000);
 	if (rc < 0) {
 		dev_err(&intf->dev, "Couldn't get device config: (err=%d)\n",
 			rc);
+		kfree(dconf);
 		return rc;
 	}
 
-	icount = dconf.icount + 1;
+	icount = dconf->icount + 1;
 	dev_info(&intf->dev, "Configuring for %d interfaces\n", icount);
 
 	if (icount > GS_MAX_INTF) {
 		dev_err(&intf->dev,
 			"Driver cannot handle more that %d CAN interfaces\n",
 			GS_MAX_INTF);
+		kfree(dconf);
 		return -EINVAL;
 	}
 
 	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
-	if (!dev)
+	if (!dev) {
+		kfree(dconf);
 		return -ENOMEM;
+	}
+
 	init_usb_anchor(&dev->rx_submitted);
 
 	atomic_set(&dev->active_channels, 0);
@@ -967,7 +982,7 @@ static int gs_usb_probe(struct usb_interface *intf,
 	dev->udev = interface_to_usbdev(intf);
 
 	for (i = 0; i < icount; i++) {
-		dev->canch[i] = gs_make_candev(i, intf, &dconf);
+		dev->canch[i] = gs_make_candev(i, intf, dconf);
 		if (IS_ERR_OR_NULL(dev->canch[i])) {
 			/* save error code to return later */
 			rc = PTR_ERR(dev->canch[i]);
@@ -978,12 +993,15 @@ static int gs_usb_probe(struct usb_interface *intf,
 				gs_destroy_candev(dev->canch[i]);
 
 			usb_kill_anchored_urbs(&dev->rx_submitted);
+			kfree(dconf);
 			kfree(dev);
 			return rc;
 		}
 		dev->canch[i]->parent = dev;
 	}
 
+	kfree(dconf);
+
 	return 0;
 }
 

From 540a27aef355e3fd8c598600d4a3c8f92127ee05 Mon Sep 17 00:00:00 2001
From: Ethan Zonca <e@ethanzonca.com>
Date: Fri, 24 Feb 2017 11:00:34 -0500
Subject: [PATCH 0708/1911] can: gs_usb: fix coding style

This patch fixes five minor style issues, spaces are between bitwise OR
operators.

Signed-off-by: Ethan Zonca <e@ethanzonca.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/usb/gs_usb.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c
index a0dabd4038ba35..300349fe8dc049 100644
--- a/drivers/net/can/usb/gs_usb.c
+++ b/drivers/net/can/usb/gs_usb.c
@@ -258,7 +258,7 @@ static int gs_cmd_reset(struct gs_usb *gsusb, struct gs_can *gsdev)
 	rc = usb_control_msg(interface_to_usbdev(intf),
 			     usb_sndctrlpipe(interface_to_usbdev(intf), 0),
 			     GS_USB_BREQ_MODE,
-			     USB_DIR_OUT|USB_TYPE_VENDOR|USB_RECIP_INTERFACE,
+			     USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_INTERFACE,
 			     gsdev->channel,
 			     0,
 			     dm,
@@ -432,7 +432,7 @@ static int gs_usb_set_bittiming(struct net_device *netdev)
 	rc = usb_control_msg(interface_to_usbdev(intf),
 			     usb_sndctrlpipe(interface_to_usbdev(intf), 0),
 			     GS_USB_BREQ_BITTIMING,
-			     USB_DIR_OUT|USB_TYPE_VENDOR|USB_RECIP_INTERFACE,
+			     USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_INTERFACE,
 			     dev->channel,
 			     0,
 			     dbt,
@@ -546,7 +546,6 @@ static netdev_tx_t gs_can_start_xmit(struct sk_buff *skb,
 				  hf,
 				  urb->transfer_dma);
 
-
 		if (rc == -ENODEV) {
 			netif_device_detach(netdev);
 		} else {
@@ -804,7 +803,7 @@ static struct gs_can *gs_make_candev(unsigned int channel,
 	rc = usb_control_msg(interface_to_usbdev(intf),
 			     usb_rcvctrlpipe(interface_to_usbdev(intf), 0),
 			     GS_USB_BREQ_BT_CONST,
-			     USB_DIR_IN|USB_TYPE_VENDOR|USB_RECIP_INTERFACE,
+			     USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_INTERFACE,
 			     channel,
 			     0,
 			     bt_const,
@@ -921,7 +920,7 @@ static int gs_usb_probe(struct usb_interface *intf,
 	rc = usb_control_msg(interface_to_usbdev(intf),
 			     usb_sndctrlpipe(interface_to_usbdev(intf), 0),
 			     GS_USB_BREQ_HOST_FORMAT,
-			     USB_DIR_OUT|USB_TYPE_VENDOR|USB_RECIP_INTERFACE,
+			     USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_INTERFACE,
 			     1,
 			     intf->altsetting[0].desc.bInterfaceNumber,
 			     hconf,
@@ -944,7 +943,7 @@ static int gs_usb_probe(struct usb_interface *intf,
 	rc = usb_control_msg(interface_to_usbdev(intf),
 			     usb_rcvctrlpipe(interface_to_usbdev(intf), 0),
 			     GS_USB_BREQ_DEVICE_CONFIG,
-			     USB_DIR_IN|USB_TYPE_VENDOR|USB_RECIP_INTERFACE,
+			     USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_INTERFACE,
 			     1,
 			     intf->altsetting[0].desc.bInterfaceNumber,
 			     dconf,

From 7c42631376306fb3f34d51fda546b50a9b6dd6ec Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Thu, 2 Mar 2017 12:03:40 +0100
Subject: [PATCH 0709/1911] can: usb_8dev: Fix memory leak of
 priv->cmd_msg_buffer

The priv->cmd_msg_buffer is allocated in the probe function, but never
kfree()ed. This patch converts the kzalloc() to resource-managed
kzalloc.

Cc: linux-stable <stable@vger.kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/usb/usb_8dev.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/drivers/net/can/usb/usb_8dev.c b/drivers/net/can/usb/usb_8dev.c
index 108a30e1509756..d000cb62d6ae8c 100644
--- a/drivers/net/can/usb/usb_8dev.c
+++ b/drivers/net/can/usb/usb_8dev.c
@@ -951,8 +951,8 @@ static int usb_8dev_probe(struct usb_interface *intf,
 	for (i = 0; i < MAX_TX_URBS; i++)
 		priv->tx_contexts[i].echo_index = MAX_TX_URBS;
 
-	priv->cmd_msg_buffer = kzalloc(sizeof(struct usb_8dev_cmd_msg),
-				      GFP_KERNEL);
+	priv->cmd_msg_buffer = devm_kzalloc(&intf->dev, sizeof(struct usb_8dev_cmd_msg),
+					    GFP_KERNEL);
 	if (!priv->cmd_msg_buffer)
 		goto cleanup_candev;
 
@@ -966,7 +966,7 @@ static int usb_8dev_probe(struct usb_interface *intf,
 	if (err) {
 		netdev_err(netdev,
 			"couldn't register CAN device: %d\n", err);
-		goto cleanup_cmd_msg_buffer;
+		goto cleanup_candev;
 	}
 
 	err = usb_8dev_cmd_version(priv, &version);
@@ -987,9 +987,6 @@ static int usb_8dev_probe(struct usb_interface *intf,
 cleanup_unregister_candev:
 	unregister_netdev(priv->netdev);
 
-cleanup_cmd_msg_buffer:
-	kfree(priv->cmd_msg_buffer);
-
 cleanup_candev:
 	free_candev(netdev);
 

From 66ddb82129df66a94219844c509074adb4330a28 Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Thu, 2 Mar 2017 15:42:49 +0100
Subject: [PATCH 0710/1911] can: flexcan: fix typo in comment

This patch fixes the typo "Disble" -> "Disable".

Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/flexcan.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c
index ea57fed375c634..13f0f219d8aa83 100644
--- a/drivers/net/can/flexcan.c
+++ b/drivers/net/can/flexcan.c
@@ -196,7 +196,7 @@
 #define FLEXCAN_QUIRK_BROKEN_ERR_STATE	BIT(1) /* [TR]WRN_INT not connected */
 #define FLEXCAN_QUIRK_DISABLE_RXFG	BIT(2) /* Disable RX FIFO Global mask */
 #define FLEXCAN_QUIRK_ENABLE_EACEN_RRS	BIT(3) /* Enable EACEN and RRS bit in ctrl2 */
-#define FLEXCAN_QUIRK_DISABLE_MECR	BIT(4) /* Disble Memory error detection */
+#define FLEXCAN_QUIRK_DISABLE_MECR	BIT(4) /* Disable Memory error detection */
 #define FLEXCAN_QUIRK_USE_OFF_TIMESTAMP	BIT(5) /* Use timestamp based offloading */
 
 /* Structure of the message buffer */

From da2f27e9e615d1c799c9582b15262458da61fddc Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Wed, 1 Mar 2017 15:33:26 +0100
Subject: [PATCH 0711/1911] netfilter: nf_conntrack_sip: fix wrong memory
 initialisation

In commit 82de0be6862cd ("netfilter: Add helper array
register/unregister functions"),
struct nf_conntrack_helper sip[MAX_PORTS][4] was changed to
sip[MAX_PORTS * 4], so the memory init should have been changed to
memset(&sip[4 * i], 0, 4 * sizeof(sip[i]));

But as the sip[] table is allocated in the BSS, it is already set to 0

Fixes: 82de0be6862cd ("netfilter: Add helper array register/unregister functions")
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_sip.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 24174c5202398f..0d17894798b5ca 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -1628,8 +1628,6 @@ static int __init nf_conntrack_sip_init(void)
 		ports[ports_c++] = SIP_PORT;
 
 	for (i = 0; i < ports_c; i++) {
-		memset(&sip[i], 0, sizeof(sip[i]));
-
 		nf_ct_helper_init(&sip[4 * i], AF_INET, IPPROTO_UDP, "sip",
 				  SIP_PORT, ports[i], i, sip_exp_policy,
 				  SIP_EXPECT_MAX,

From f9121355eb6f9babadb97bf5b34ab0cce7764406 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 1 Mar 2017 18:15:11 +0100
Subject: [PATCH 0712/1911] netfilter: nft_set_rbtree: incorrect assumption on
 lower interval lookups

In case of adjacent ranges, we may indeed see either the high part of
the range in first place or the low part of it. Remove this incorrect
assumption, let's make sure we annotate the low part of the interval in
case of we have adjacent interva intervals so we hit a matching in
lookups.

Reported-by: Simon Hanisch <hanisch@wh2.tu-dresden.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_set_rbtree.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 71e8fb886a73b7..78dfbf9588b368 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -60,11 +60,10 @@ static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
 		d = memcmp(this, key, set->klen);
 		if (d < 0) {
 			parent = parent->rb_left;
-			/* In case of adjacent ranges, we always see the high
-			 * part of the range in first place, before the low one.
-			 * So don't update interval if the keys are equal.
-			 */
-			if (interval && nft_rbtree_equal(set, this, interval))
+			if (interval &&
+			    nft_rbtree_equal(set, this, interval) &&
+			    nft_rbtree_interval_end(this) &&
+			    !nft_rbtree_interval_end(interval))
 				continue;
 			interval = rbe;
 		} else if (d > 0)

From 25e94a997b324b5f167f56d56d7106d38b78c9de Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 1 Mar 2017 12:52:31 +0100
Subject: [PATCH 0713/1911] netfilter: nf_tables: don't call
 nfnetlink_set_err() if nfnetlink_send() fails

The underlying nlmsg_multicast() already sets sk->sk_err for us to
notify socket overruns, so we should not do anything with this return
value. So we just call nfnetlink_set_err() if:

1) We fail to allocate the netlink message.

or

2) We don't have enough space in the netlink message to place attributes,
   which means that we likely need to allocate a larger message.

Before this patch, the internal ESRCH netlink error code was propagated
to userspace, which is quite misleading. Netlink semantics mandate that
listeners just hit ENOBUFS if the socket buffer overruns.

Reported-by: Alexander Alemayhu <alexander@alemayhu.com>
Tested-by: Alexander Alemayhu <alexander@alemayhu.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h |   6 +-
 net/netfilter/nf_tables_api.c     | 133 ++++++++++++------------------
 2 files changed, 58 insertions(+), 81 deletions(-)

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index ac84686aaafb0b..2aa8a9d80fbe82 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -988,9 +988,9 @@ struct nft_object *nf_tables_obj_lookup(const struct nft_table *table,
 					const struct nlattr *nla, u32 objtype,
 					u8 genmask);
 
-int nft_obj_notify(struct net *net, struct nft_table *table,
-		   struct nft_object *obj, u32 portid, u32 seq,
-		   int event, int family, int report, gfp_t gfp);
+void nft_obj_notify(struct net *net, struct nft_table *table,
+		    struct nft_object *obj, u32 portid, u32 seq,
+		    int event, int family, int report, gfp_t gfp);
 
 /**
  *	struct nft_object_type - stateful object type
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index ff7304ae58ac4f..5e0ccfd5bb37d1 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -461,16 +461,15 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net,
 	return -1;
 }
 
-static int nf_tables_table_notify(const struct nft_ctx *ctx, int event)
+static void nf_tables_table_notify(const struct nft_ctx *ctx, int event)
 {
 	struct sk_buff *skb;
 	int err;
 
 	if (!ctx->report &&
 	    !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
-		return 0;
+		return;
 
-	err = -ENOBUFS;
 	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (skb == NULL)
 		goto err;
@@ -482,14 +481,11 @@ static int nf_tables_table_notify(const struct nft_ctx *ctx, int event)
 		goto err;
 	}
 
-	err = nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES,
-			     ctx->report, GFP_KERNEL);
+	nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES,
+		       ctx->report, GFP_KERNEL);
+	return;
 err:
-	if (err < 0) {
-		nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES,
-				  err);
-	}
-	return err;
+	nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS);
 }
 
 static int nf_tables_dump_tables(struct sk_buff *skb,
@@ -1050,16 +1046,15 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net,
 	return -1;
 }
 
-static int nf_tables_chain_notify(const struct nft_ctx *ctx, int event)
+static void nf_tables_chain_notify(const struct nft_ctx *ctx, int event)
 {
 	struct sk_buff *skb;
 	int err;
 
 	if (!ctx->report &&
 	    !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
-		return 0;
+		return;
 
-	err = -ENOBUFS;
 	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (skb == NULL)
 		goto err;
@@ -1072,14 +1067,11 @@ static int nf_tables_chain_notify(const struct nft_ctx *ctx, int event)
 		goto err;
 	}
 
-	err = nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES,
-			     ctx->report, GFP_KERNEL);
+	nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES,
+		       ctx->report, GFP_KERNEL);
+	return;
 err:
-	if (err < 0) {
-		nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES,
-				  err);
-	}
-	return err;
+	nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS);
 }
 
 static int nf_tables_dump_chains(struct sk_buff *skb,
@@ -1934,18 +1926,16 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net,
 	return -1;
 }
 
-static int nf_tables_rule_notify(const struct nft_ctx *ctx,
-				 const struct nft_rule *rule,
-				 int event)
+static void nf_tables_rule_notify(const struct nft_ctx *ctx,
+				  const struct nft_rule *rule, int event)
 {
 	struct sk_buff *skb;
 	int err;
 
 	if (!ctx->report &&
 	    !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
-		return 0;
+		return;
 
-	err = -ENOBUFS;
 	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (skb == NULL)
 		goto err;
@@ -1958,14 +1948,11 @@ static int nf_tables_rule_notify(const struct nft_ctx *ctx,
 		goto err;
 	}
 
-	err = nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES,
-			     ctx->report, GFP_KERNEL);
+	nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES,
+		       ctx->report, GFP_KERNEL);
+	return;
 err:
-	if (err < 0) {
-		nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES,
-				  err);
-	}
-	return err;
+	nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS);
 }
 
 struct nft_rule_dump_ctx {
@@ -2696,9 +2683,9 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
 	return -1;
 }
 
-static int nf_tables_set_notify(const struct nft_ctx *ctx,
-				const struct nft_set *set,
-				int event, gfp_t gfp_flags)
+static void nf_tables_set_notify(const struct nft_ctx *ctx,
+				 const struct nft_set *set, int event,
+			         gfp_t gfp_flags)
 {
 	struct sk_buff *skb;
 	u32 portid = ctx->portid;
@@ -2706,9 +2693,8 @@ static int nf_tables_set_notify(const struct nft_ctx *ctx,
 
 	if (!ctx->report &&
 	    !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
-		return 0;
+		return;
 
-	err = -ENOBUFS;
 	skb = nlmsg_new(NLMSG_GOODSIZE, gfp_flags);
 	if (skb == NULL)
 		goto err;
@@ -2719,12 +2705,11 @@ static int nf_tables_set_notify(const struct nft_ctx *ctx,
 		goto err;
 	}
 
-	err = nfnetlink_send(skb, ctx->net, portid, NFNLGRP_NFTABLES,
-			     ctx->report, gfp_flags);
+	nfnetlink_send(skb, ctx->net, portid, NFNLGRP_NFTABLES, ctx->report,
+		       gfp_flags);
+	return;
 err:
-	if (err < 0)
-		nfnetlink_set_err(ctx->net, portid, NFNLGRP_NFTABLES, err);
-	return err;
+	nfnetlink_set_err(ctx->net, portid, NFNLGRP_NFTABLES, -ENOBUFS);
 }
 
 static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb)
@@ -3504,10 +3489,10 @@ static int nf_tables_fill_setelem_info(struct sk_buff *skb,
 	return -1;
 }
 
-static int nf_tables_setelem_notify(const struct nft_ctx *ctx,
-				    const struct nft_set *set,
-				    const struct nft_set_elem *elem,
-				    int event, u16 flags)
+static void nf_tables_setelem_notify(const struct nft_ctx *ctx,
+				     const struct nft_set *set,
+				     const struct nft_set_elem *elem,
+				     int event, u16 flags)
 {
 	struct net *net = ctx->net;
 	u32 portid = ctx->portid;
@@ -3515,9 +3500,8 @@ static int nf_tables_setelem_notify(const struct nft_ctx *ctx,
 	int err;
 
 	if (!ctx->report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
-		return 0;
+		return;
 
-	err = -ENOBUFS;
 	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (skb == NULL)
 		goto err;
@@ -3529,12 +3513,11 @@ static int nf_tables_setelem_notify(const struct nft_ctx *ctx,
 		goto err;
 	}
 
-	err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, ctx->report,
-			     GFP_KERNEL);
+	nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, ctx->report,
+		       GFP_KERNEL);
+	return;
 err:
-	if (err < 0)
-		nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err);
-	return err;
+	nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, -ENOBUFS);
 }
 
 static struct nft_trans *nft_trans_elem_alloc(struct nft_ctx *ctx,
@@ -4476,18 +4459,17 @@ static int nf_tables_delobj(struct net *net, struct sock *nlsk,
 	return nft_delobj(&ctx, obj);
 }
 
-int nft_obj_notify(struct net *net, struct nft_table *table,
-		   struct nft_object *obj, u32 portid, u32 seq, int event,
-		   int family, int report, gfp_t gfp)
+void nft_obj_notify(struct net *net, struct nft_table *table,
+		    struct nft_object *obj, u32 portid, u32 seq, int event,
+		    int family, int report, gfp_t gfp)
 {
 	struct sk_buff *skb;
 	int err;
 
 	if (!report &&
 	    !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
-		return 0;
+		return;
 
-	err = -ENOBUFS;
 	skb = nlmsg_new(NLMSG_GOODSIZE, gfp);
 	if (skb == NULL)
 		goto err;
@@ -4499,21 +4481,18 @@ int nft_obj_notify(struct net *net, struct nft_table *table,
 		goto err;
 	}
 
-	err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report, gfp);
+	nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report, gfp);
+	return;
 err:
-	if (err < 0) {
-		nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err);
-	}
-	return err;
+	nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, -ENOBUFS);
 }
 EXPORT_SYMBOL_GPL(nft_obj_notify);
 
-static int nf_tables_obj_notify(const struct nft_ctx *ctx,
-				struct nft_object *obj, int event)
+static void nf_tables_obj_notify(const struct nft_ctx *ctx,
+				 struct nft_object *obj, int event)
 {
-	return nft_obj_notify(ctx->net, ctx->table, obj, ctx->portid,
-			      ctx->seq, event, ctx->afi->family, ctx->report,
-			      GFP_KERNEL);
+	nft_obj_notify(ctx->net, ctx->table, obj, ctx->portid, ctx->seq, event,
+		       ctx->afi->family, ctx->report, GFP_KERNEL);
 }
 
 static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net,
@@ -4543,7 +4522,8 @@ static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net,
 	return -EMSGSIZE;
 }
 
-static int nf_tables_gen_notify(struct net *net, struct sk_buff *skb, int event)
+static void nf_tables_gen_notify(struct net *net, struct sk_buff *skb,
+				 int event)
 {
 	struct nlmsghdr *nlh = nlmsg_hdr(skb);
 	struct sk_buff *skb2;
@@ -4551,9 +4531,8 @@ static int nf_tables_gen_notify(struct net *net, struct sk_buff *skb, int event)
 
 	if (nlmsg_report(nlh) &&
 	    !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
-		return 0;
+		return;
 
-	err = -ENOBUFS;
 	skb2 = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (skb2 == NULL)
 		goto err;
@@ -4565,14 +4544,12 @@ static int nf_tables_gen_notify(struct net *net, struct sk_buff *skb, int event)
 		goto err;
 	}
 
-	err = nfnetlink_send(skb2, net, NETLINK_CB(skb).portid,
-			     NFNLGRP_NFTABLES, nlmsg_report(nlh), GFP_KERNEL);
+	nfnetlink_send(skb2, net, NETLINK_CB(skb).portid, NFNLGRP_NFTABLES,
+		       nlmsg_report(nlh), GFP_KERNEL);
+	return;
 err:
-	if (err < 0) {
-		nfnetlink_set_err(net, NETLINK_CB(skb).portid, NFNLGRP_NFTABLES,
-				  err);
-	}
-	return err;
+	nfnetlink_set_err(net, NETLINK_CB(skb).portid, NFNLGRP_NFTABLES,
+			  -ENOBUFS);
 }
 
 static int nf_tables_getgen(struct net *net, struct sock *nlsk,

From 92ad18ec26611e8a47639e600bd9dc42fbe2edcf Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Thu, 2 Mar 2017 12:53:26 -0500
Subject: [PATCH 0714/1911] ftrace/graph: Do not modify the EMPTY_HASH for the
 function_graph filter

On boot up, if the kernel command line sets a graph funtion with the kernel
command line options "ftrace_graph_filter" or "ftrace_graph_notrace" then it
updates the corresponding function graph hash, ftrace_graph_hash or
ftrace_graph_notrace_hash respectively. Unfortunately, at boot up, these
variables are pointers to the "EMPTY_HASH" which is a constant used as a
placeholder when a hash has no entities. The problem was that the comand
line version to set the hashes updated the actual EMPTY_HASH instead of
creating a new hash for the function graph. This broke the EMPTY_HASH
because not only did it modify a constant (not sure how that was allowed to
happen, except maybe because it was done at early boot, const variables were
still mutable), but it made the filters have functions listed in them when
they were actually empty.

The kernel command line function needs to allocate a new hash for the
function graph filters and assign the necessary variables to that new hash
instead.

Link: http://lkml.kernel.org/r/1488420091.7212.17.camel@linux.intel.com

Cc: Namhyung Kim <namhyung@kernel.org>
Fixes: b9b0c831bed2 ("ftrace: Convert graph filter to use hash tables")
Reported-by: Todd Brandt <todd.e.brandt@linux.intel.com>
Tested-by: Todd Brandt <todd.e.brandt@linux.intel.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/ftrace.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index fd84f2e30b6dcb..44122e7a6418bc 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -4421,10 +4421,9 @@ static void __init set_ftrace_early_graph(char *buf, int enable)
 	char *func;
 	struct ftrace_hash *hash;
 
-	if (enable)
-		hash = ftrace_graph_hash;
-	else
-		hash = ftrace_graph_notrace_hash;
+	hash = alloc_ftrace_hash(FTRACE_HASH_DEFAULT_BITS);
+	if (WARN_ON(!hash))
+		return;
 
 	while (buf) {
 		func = strsep(&buf, ",");
@@ -4434,6 +4433,11 @@ static void __init set_ftrace_early_graph(char *buf, int enable)
 			printk(KERN_DEBUG "ftrace: function %s not "
 					  "traceable\n", func);
 	}
+
+	if (enable)
+		ftrace_graph_hash = hash;
+	else
+		ftrace_graph_notrace_hash = hash;
 }
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
 

From ab42632156becd35d3884ee5c14da2bedbf3149a Mon Sep 17 00:00:00 2001
From: David Daney <david.daney@cavium.com>
Date: Wed, 1 Mar 2017 14:04:53 -0800
Subject: [PATCH 0715/1911] module: set __jump_table alignment to 8

For powerpc the __jump_table section in modules is not aligned, this
causes a WARN_ON() splat when loading a module containing a __jump_table.

Strict alignment became necessary with commit 3821fd35b58d
("jump_label: Reduce the size of struct static_key"), currently in
linux-next, which uses the two least significant bits of pointers to
__jump_table elements.

Fix by forcing __jump_table to 8, which is the same alignment used for
this section in the kernel proper.

Link: http://lkml.kernel.org/r/20170301220453.4756-1-david.daney@cavium.com

Reviewed-by: Jason Baron <jbaron@akamai.com>
Acked-by: Jessica Yu <jeyu@redhat.com>
Acked-by: Michael Ellerman <mpe@ellerman.id.au> (powerpc)
Tested-by: Sachin Sant <sachinp@linux.vnet.ibm.com>
Signed-off-by: David Daney <david.daney@cavium.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 scripts/module-common.lds | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/scripts/module-common.lds b/scripts/module-common.lds
index 73a2c7da0e551d..53234e85192a2c 100644
--- a/scripts/module-common.lds
+++ b/scripts/module-common.lds
@@ -19,4 +19,6 @@ SECTIONS {
 
 	. = ALIGN(8);
 	.init_array		0 : { *(SORT(.init_array.*)) *(.init_array) }
+
+	__jump_table		0 : ALIGN(8) { KEEP(*(__jump_table)) }
 }

From cd8d860dcce906cd477be7d0648ba6f56a52eaa6 Mon Sep 17 00:00:00 2001
From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Date: Tue, 28 Feb 2017 11:32:22 -0500
Subject: [PATCH 0716/1911] jump_label: Fix anonymous union initialization

Pre-4.6 gcc do not allow direct static initialization of members of
anonymous structs/unions. After commit 3821fd35b58d ("jump_label:
Reduce the size of struct static_key") STATIC_KEY_INIT_{TRUE|FALSE}
definitions cannot be compiled with those older compilers.

Placing initializers inside curved brackets works around this problem.

Link: http://lkml.kernel.org/r/1488299542-30765-1-git-send-email-boris.ostrovsky@oracle.com

Fixes: 3821fd35b58d ("jump_label: Reduce the size of struct static_key")
Reviewed-by: Jason Baron <jbaron@akamai.com>
Compiled-by: Chris Mason <clm@fb.com>
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/jump_label.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 680c98b2f41c06..a7f90117cf7d4f 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -166,10 +166,10 @@ extern void static_key_disable(struct static_key *key);
  */
 #define STATIC_KEY_INIT_TRUE					\
 	{ .enabled = { 1 },					\
-	  .entries = (void *)JUMP_TYPE_TRUE }
+	  { .entries = (void *)JUMP_TYPE_TRUE } }
 #define STATIC_KEY_INIT_FALSE					\
 	{ .enabled = { 0 },					\
-	  .entries = (void *)JUMP_TYPE_FALSE }
+	  { .entries = (void *)JUMP_TYPE_FALSE } }
 
 #else  /* !HAVE_JUMP_LABEL */
 

From b17ef2ed624aa7c1f68ed11acd16ecbf80fe01d7 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Thu, 2 Mar 2017 17:28:45 -0500
Subject: [PATCH 0717/1911] jump_label: Add comment about initialization order
 for anonymous unions

Commit 3821fd35b58d ("jump_label: Reduce the size of struct static_key")
broke old compilers that could not handle static initialization of anonymous
unions. Boris fixed it with a patch that added brackets around the static
initializer. But this creates a dependency between those initializers and
the structure's order of its fields. Document this dependency in case new
fields are added to struct static_key in the future.

Noted-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Suggested-by: Chris Mason <clm@fb.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/jump_label.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index a7f90117cf7d4f..28e04a33535abe 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -90,6 +90,13 @@ extern bool static_key_initialized;
 struct static_key {
 	atomic_t enabled;
 /*
+ * Note:
+ *   To make anonymous unions work with old compilers, the static
+ *   initialization of them requires brackets. This creates a dependency
+ *   on the order of the struct with the initializers. If any fields
+ *   are added, STATIC_KEY_INIT_TRUE and STATIC_KEY_INIT_FALSE may need
+ *   to be modified.
+ *
  * bit 0 => 1 if key is initially true
  *	    0 if initially false
  * bit 1 => 1 if points to struct static_key_mod

From bf7165cfa23695c51998231c4efa080fe1d3548d Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Wed, 28 Sep 2016 22:55:54 -0400
Subject: [PATCH 0718/1911] tracing: Add #undef to fix compile error

There are several trace include files that define TRACE_INCLUDE_FILE.

Include several of them in the same .c file (as I currently have in
some code I am working on), and the compile will blow up with a
"warning: "TRACE_INCLUDE_FILE" redefined #define TRACE_INCLUDE_FILE syscalls"

Every other include file in include/trace/events/ avoids that issue
by having a #undef TRACE_INCLUDE_FILE before the #define; syscalls.h
should have one, too.

Link: http://lkml.kernel.org/r/20160928225554.13bd7ac6@annuminas.surriel.com

Cc: stable@vger.kernel.org
Fixes: b8007ef74222 ("tracing: Separate raw syscall from syscall tracer")
Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/trace/events/syscalls.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/trace/events/syscalls.h b/include/trace/events/syscalls.h
index 14e49c7981359c..b35533b9427719 100644
--- a/include/trace/events/syscalls.h
+++ b/include/trace/events/syscalls.h
@@ -1,5 +1,6 @@
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM raw_syscalls
+#undef TRACE_INCLUDE_FILE
 #define TRACE_INCLUDE_FILE syscalls
 
 #if !defined(_TRACE_EVENTS_SYSCALLS_H) || defined(TRACE_HEADER_MULTI_READ)

From 65a50c656276b0846bea09dd011c0a3d35b77f3e Mon Sep 17 00:00:00 2001
From: Todd Brandt <todd.e.brandt@linux.intel.com>
Date: Thu, 2 Mar 2017 16:12:15 -0800
Subject: [PATCH 0719/1911] ftrace/graph: Add ftrace_graph_max_depth kernel
 parameter

Early trace callgraphs can be extremely large on systems with
several seconds of boot time. The max_depth parameter limits how
deep the graph trace goes and reduces the output size. This
parameter is the same as the max_graph_depth file in tracefs.

Link: http://lkml.kernel.org/r/1488499935-23216-1-git-send-email-todd.e.brandt@linux.intel.com

Signed-off-by: Todd Brandt <todd.e.brandt@linux.intel.com>
[ changed comments about debugfs to tracefs ]
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 Documentation/admin-guide/kernel-parameters.txt | 6 ++++++
 kernel/trace/ftrace.c                           | 9 +++++++++
 2 files changed, 15 insertions(+)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 21e2d88637050b..1c9016b27ee90c 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1173,6 +1173,12 @@
 			functions that can be changed at run time by the
 			set_graph_notrace file in the debugfs tracing directory.
 
+	ftrace_graph_max_depth=<uint>
+			[FTRACE] Used with the function graph tracer. This is
+			the max depth it will trace into a function. This value
+			can be changed at run time by the max_graph_depth file
+			in the tracefs tracing directory. default: 0 (no limit)
+
 	gamecon.map[2|3]=
 			[HW,JOY] Multisystem joystick and NES/SNES/PSX pad
 			support via parallel port (up to 5 devices per port)
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 44122e7a6418bc..d129ae51329a2d 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -4415,6 +4415,15 @@ static int __init set_graph_notrace_function(char *str)
 }
 __setup("ftrace_graph_notrace=", set_graph_notrace_function);
 
+static int __init set_graph_max_depth_function(char *str)
+{
+	if (!str)
+		return 0;
+	fgraph_max_depth = simple_strtoul(str, NULL, 0);
+	return 1;
+}
+__setup("ftrace_graph_max_depth=", set_graph_max_depth_function);
+
 static void __init set_ftrace_early_graph(char *buf, int enable)
 {
 	int ret;

From d67ce7da3b1eda838db7bdca86d7ec28ef37068e Mon Sep 17 00:00:00 2001
From: Paul Durrant <Paul.Durrant@citrix.com>
Date: Thu, 2 Mar 2017 12:54:25 +0000
Subject: [PATCH 0720/1911] xen-netback: keep a local pointer for vif in
 backend_disconnect()

This patch replaces use of 'be->vif' with 'vif' and hence generally
makes the function look tidier. No semantic change.

Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/xen-netback/xenbus.c | 32 ++++++++++++++++++--------------
 1 file changed, 18 insertions(+), 14 deletions(-)

diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
index bb854f92f5a5cd..d82ddc913b4c93 100644
--- a/drivers/net/xen-netback/xenbus.c
+++ b/drivers/net/xen-netback/xenbus.c
@@ -492,24 +492,28 @@ static int backend_create_xenvif(struct backend_info *be)
 
 static void backend_disconnect(struct backend_info *be)
 {
-	if (be->vif) {
+	struct xenvif *vif = be->vif;
+
+	if (vif) {
 		unsigned int queue_index;
 
-		xen_unregister_watchers(be->vif);
+		xen_unregister_watchers(vif);
 #ifdef CONFIG_DEBUG_FS
-		xenvif_debugfs_delif(be->vif);
+		xenvif_debugfs_delif(vif);
 #endif /* CONFIG_DEBUG_FS */
-		xenvif_disconnect_data(be->vif);
-		for (queue_index = 0; queue_index < be->vif->num_queues; ++queue_index)
-			xenvif_deinit_queue(&be->vif->queues[queue_index]);
-
-		spin_lock(&be->vif->lock);
-		vfree(be->vif->queues);
-		be->vif->num_queues = 0;
-		be->vif->queues = NULL;
-		spin_unlock(&be->vif->lock);
-
-		xenvif_disconnect_ctrl(be->vif);
+		xenvif_disconnect_data(vif);
+		for (queue_index = 0;
+		     queue_index < vif->num_queues;
+		     ++queue_index)
+			xenvif_deinit_queue(&vif->queues[queue_index]);
+
+		spin_lock(&vif->lock);
+		vfree(vif->queues);
+		vif->num_queues = 0;
+		vif->queues = NULL;
+		spin_unlock(&vif->lock);
+
+		xenvif_disconnect_ctrl(vif);
 	}
 }
 

From a254d8f9a8a928772ef4608342125ccb35b79d5d Mon Sep 17 00:00:00 2001
From: Paul Durrant <Paul.Durrant@citrix.com>
Date: Thu, 2 Mar 2017 12:54:26 +0000
Subject: [PATCH 0721/1911] xen-netback: don't vfree() queues under spinlock

This leads to a BUG of the following form:

[  174.512861] switch: port 2(vif3.0) entered disabled state
[  174.522735] BUG: sleeping function called from invalid context at
/home/build/linux-linus/mm/vmalloc.c:1441
[  174.523451] in_atomic(): 1, irqs_disabled(): 0, pid: 28, name: xenwatch
[  174.524131] CPU: 1 PID: 28 Comm: xenwatch Tainted: G        W
4.10.0upstream-11073-g4977ab6-dirty #1
[  174.524819] Hardware name: MSI MS-7680/H61M-P23 (MS-7680), BIOS V17.0
03/14/2011
[  174.525517] Call Trace:
[  174.526217]  show_stack+0x23/0x60
[  174.526899]  dump_stack+0x5b/0x88
[  174.527562]  ___might_sleep+0xde/0x130
[  174.528208]  __might_sleep+0x35/0xa0
[  174.528840]  ? _raw_spin_unlock_irqrestore+0x13/0x20
[  174.529463]  ? __wake_up+0x40/0x50
[  174.530089]  remove_vm_area+0x20/0x90
[  174.530724]  __vunmap+0x1d/0xc0
[  174.531346]  ? delete_object_full+0x13/0x20
[  174.531973]  vfree+0x40/0x80
[  174.532594]  set_backend_state+0x18a/0xa90
[  174.533221]  ? dwc_scan_descriptors+0x24d/0x430
[  174.533850]  ? kfree+0x5b/0xc0
[  174.534476]  ? xenbus_read+0x3d/0x50
[  174.535101]  ? xenbus_read+0x3d/0x50
[  174.535718]  ? xenbus_gather+0x31/0x90
[  174.536332]  ? ___might_sleep+0xf6/0x130
[  174.536945]  frontend_changed+0x6b/0xd0
[  174.537565]  xenbus_otherend_changed+0x7d/0x80
[  174.538185]  frontend_changed+0x12/0x20
[  174.538803]  xenwatch_thread+0x74/0x110
[  174.539417]  ? woken_wake_function+0x20/0x20
[  174.540049]  kthread+0xe5/0x120
[  174.540663]  ? xenbus_printf+0x50/0x50
[  174.541278]  ? __kthread_init_worker+0x40/0x40
[  174.541898]  ret_from_fork+0x21/0x2c
[  174.548635] switch: port 2(vif3.0) entered disabled state

This patch defers the vfree() until after the spinlock is released.

Reported-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/xen-netback/xenbus.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
index d82ddc913b4c93..d2d7cd9145b1c2 100644
--- a/drivers/net/xen-netback/xenbus.c
+++ b/drivers/net/xen-netback/xenbus.c
@@ -496,6 +496,7 @@ static void backend_disconnect(struct backend_info *be)
 
 	if (vif) {
 		unsigned int queue_index;
+		struct xenvif_queue *queues;
 
 		xen_unregister_watchers(vif);
 #ifdef CONFIG_DEBUG_FS
@@ -508,11 +509,13 @@ static void backend_disconnect(struct backend_info *be)
 			xenvif_deinit_queue(&vif->queues[queue_index]);
 
 		spin_lock(&vif->lock);
-		vfree(vif->queues);
+		queues = vif->queues;
 		vif->num_queues = 0;
 		vif->queues = NULL;
 		spin_unlock(&vif->lock);
 
+		vfree(queues);
+
 		xenvif_disconnect_ctrl(vif);
 	}
 }

From 16206524f6ea57d6dcd56fe46f9f4a06d4a1b113 Mon Sep 17 00:00:00 2001
From: Jon Mason <jon.mason@broadcom.com>
Date: Thu, 2 Mar 2017 17:59:56 -0500
Subject: [PATCH 0722/1911] net: ethernet: bgmac: init sequence bug

Fix a bug in the 'bgmac' driver init sequence that blind writes for init
sequence where it should preserve most bits other than the ones it is
deliberately manipulating.

The code now checks to see if the adapter needs to be brought out of
reset (where as before it was doing an IDM write to bring it out of
reset regardless of whether it was in reset or not).  Also, removed
unnecessary usleeps (as there is already a read present to flush the
IDM writes).

Signed-off-by: Zac Schroff <zschroff@broadcom.com>
Signed-off-by: Jon Mason <jon.mason@broadcom.com>
Fixes: f6a95a24957 ("net: ethernet: bgmac: Add platform device support")
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/broadcom/bgmac-platform.c    | 27 ++++++++++++-------
 drivers/net/ethernet/broadcom/bgmac.h         | 16 +++++++++++
 2 files changed, 34 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bgmac-platform.c b/drivers/net/ethernet/broadcom/bgmac-platform.c
index 7b1af950f312f3..da1b8b225eb9d3 100644
--- a/drivers/net/ethernet/broadcom/bgmac-platform.c
+++ b/drivers/net/ethernet/broadcom/bgmac-platform.c
@@ -51,8 +51,7 @@ static void platform_bgmac_idm_write(struct bgmac *bgmac, u16 offset, u32 value)
 
 static bool platform_bgmac_clk_enabled(struct bgmac *bgmac)
 {
-	if ((bgmac_idm_read(bgmac, BCMA_IOCTL) &
-	     (BCMA_IOCTL_CLK | BCMA_IOCTL_FGC)) != BCMA_IOCTL_CLK)
+	if ((bgmac_idm_read(bgmac, BCMA_IOCTL) & BGMAC_CLK_EN) != BGMAC_CLK_EN)
 		return false;
 	if (bgmac_idm_read(bgmac, BCMA_RESET_CTL) & BCMA_RESET_CTL_RESET)
 		return false;
@@ -61,15 +60,25 @@ static bool platform_bgmac_clk_enabled(struct bgmac *bgmac)
 
 static void platform_bgmac_clk_enable(struct bgmac *bgmac, u32 flags)
 {
-	bgmac_idm_write(bgmac, BCMA_IOCTL,
-			(BCMA_IOCTL_CLK | BCMA_IOCTL_FGC | flags));
-	bgmac_idm_read(bgmac, BCMA_IOCTL);
+	u32 val;
 
-	bgmac_idm_write(bgmac, BCMA_RESET_CTL, 0);
-	bgmac_idm_read(bgmac, BCMA_RESET_CTL);
-	udelay(1);
+	/* The Reset Control register only contains a single bit to show if the
+	 * controller is currently in reset.  Do a sanity check here, just in
+	 * case the bootloader happened to leave the device in reset.
+	 */
+	val = bgmac_idm_read(bgmac, BCMA_RESET_CTL);
+	if (val) {
+		bgmac_idm_write(bgmac, BCMA_RESET_CTL, 0);
+		bgmac_idm_read(bgmac, BCMA_RESET_CTL);
+		udelay(1);
+	}
 
-	bgmac_idm_write(bgmac, BCMA_IOCTL, (BCMA_IOCTL_CLK | flags));
+	val = bgmac_idm_read(bgmac, BCMA_IOCTL);
+	/* Some bits of BCMA_IOCTL set by HW/ATF and should not change */
+	val |= flags & ~(BGMAC_AWCACHE | BGMAC_ARCACHE | BGMAC_AWUSER |
+			 BGMAC_ARUSER);
+	val |= BGMAC_CLK_EN;
+	bgmac_idm_write(bgmac, BCMA_IOCTL, val);
 	bgmac_idm_read(bgmac, BCMA_IOCTL);
 	udelay(1);
 }
diff --git a/drivers/net/ethernet/broadcom/bgmac.h b/drivers/net/ethernet/broadcom/bgmac.h
index 248727dc62f22c..6d1c6ff1ed963e 100644
--- a/drivers/net/ethernet/broadcom/bgmac.h
+++ b/drivers/net/ethernet/broadcom/bgmac.h
@@ -213,6 +213,22 @@
 /* BCMA GMAC core specific IO Control (BCMA_IOCTL) flags */
 #define BGMAC_BCMA_IOCTL_SW_CLKEN		0x00000004	/* PHY Clock Enable */
 #define BGMAC_BCMA_IOCTL_SW_RESET		0x00000008	/* PHY Reset */
+/* The IOCTL values appear to be different in NS, NSP, and NS2, and do not match
+ * the values directly above
+ */
+#define BGMAC_CLK_EN				BIT(0)
+#define BGMAC_RESERVED_0			BIT(1)
+#define BGMAC_SOURCE_SYNC_MODE_EN		BIT(2)
+#define BGMAC_DEST_SYNC_MODE_EN			BIT(3)
+#define BGMAC_TX_CLK_OUT_INVERT_EN		BIT(4)
+#define BGMAC_DIRECT_GMII_MODE			BIT(5)
+#define BGMAC_CLK_250_SEL			BIT(6)
+#define BGMAC_AWCACHE				(0xf << 7)
+#define BGMAC_RESERVED_1			(0x1f << 11)
+#define BGMAC_ARCACHE				(0xf << 16)
+#define BGMAC_AWUSER				(0x3f << 20)
+#define BGMAC_ARUSER				(0x3f << 26)
+#define BGMAC_RESERVED				BIT(31)
 
 /* BCMA GMAC core specific IO status (BCMA_IOST) flags */
 #define BGMAC_BCMA_IOST_ATTACHED		0x00000800

From fa42245dff4a5f2f8f208da542acbd80c22f7c65 Mon Sep 17 00:00:00 2001
From: Hari Vyas <hariv@broadcom.com>
Date: Thu, 2 Mar 2017 17:59:57 -0500
Subject: [PATCH 0723/1911] net: ethernet: bgmac: mac address change bug

ndo_set_mac_address() passes struct sockaddr * as 2nd parameter to
bgmac_set_mac_address() but code assumed u8 *.  This caused two bytes
chopping and the wrong mac address was configured.

Signed-off-by: Hari Vyas <hariv@broadcom.com>
Signed-off-by: Jon Mason <jon.mason@broadcom.com>
Fixes: 4e209001b86 ("bgmac: write mac address to hardware in ndo_set_mac_address")
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bgmac.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
index 415046750bb449..fd66fca00e0177 100644
--- a/drivers/net/ethernet/broadcom/bgmac.c
+++ b/drivers/net/ethernet/broadcom/bgmac.c
@@ -1223,12 +1223,16 @@ static netdev_tx_t bgmac_start_xmit(struct sk_buff *skb,
 static int bgmac_set_mac_address(struct net_device *net_dev, void *addr)
 {
 	struct bgmac *bgmac = netdev_priv(net_dev);
+	struct sockaddr *sa = addr;
 	int ret;
 
 	ret = eth_prepare_mac_addr_change(net_dev, addr);
 	if (ret < 0)
 		return ret;
-	bgmac_write_mac_address(bgmac, (u8 *)addr);
+
+	ether_addr_copy(net_dev->dev_addr, sa->sa_data);
+	bgmac_write_mac_address(bgmac, net_dev->dev_addr);
+
 	eth_commit_mac_addr_change(net_dev, addr);
 	return 0;
 }

From 9383b33771e566fa547daa2d09c6e0f1aaa298c3 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Thu, 2 Mar 2017 15:26:20 -0800
Subject: [PATCH 0724/1911] nfp: don't tell FW about the reserved buffer space

Since commit c0f031bc8866 ("nfp_net: use alloc_frag() and build_skb()")
we are allocating buffers which have to hold both the data and skb to
be created in place by build_skb().

FW should only be told about the buffer space it can DMA to, that
is without the build_skb() headroom and tailroom.  Note: firmware
applications should validate the buffers against both MTU and
free list buffer size so oversized packets would not pass through
the NIC anyway.

Fixes: c0f031bc8866 ("nfp: use alloc_frag() and build_skb()")
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 074259cc8e066d..00a83218857a97 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -2198,7 +2198,8 @@ static int __nfp_net_set_config_and_enable(struct nfp_net *nn)
 	nfp_net_write_mac_addr(nn);
 
 	nn_writel(nn, NFP_NET_CFG_MTU, nn->netdev->mtu);
-	nn_writel(nn, NFP_NET_CFG_FLBUFSZ, nn->fl_bufsz);
+	nn_writel(nn, NFP_NET_CFG_FLBUFSZ,
+		  nn->fl_bufsz - NFP_NET_RX_BUF_NON_DATA);
 
 	/* Enable device */
 	new_ctrl |= NFP_NET_CFG_CTRL_ENABLE;

From d58cebb79b62ff84b537a35423b8d6b7f0746985 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Thu, 2 Mar 2017 15:26:21 -0800
Subject: [PATCH 0725/1911] nfp: correct DMA direction in XDP DMA sync

dma_sync_single_for_*() takes the direction in which the buffer
was mapped, not the direction of the sync.  We should sync XDP
buffers bidirectionally.

Fixes: ecd63a0217d5 ("nfp: add XDP support in the driver")
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 00a83218857a97..9179a99563afa8 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -1498,7 +1498,7 @@ nfp_net_tx_xdp_buf(struct nfp_net *nn, struct nfp_net_rx_ring *rx_ring,
 	txbuf->real_len = pkt_len;
 
 	dma_sync_single_for_device(&nn->pdev->dev, rxbuf->dma_addr + pkt_off,
-				   pkt_len, DMA_TO_DEVICE);
+				   pkt_len, DMA_BIDIRECTIONAL);
 
 	/* Build TX descriptor */
 	txd = &tx_ring->txds[wr_idx];
@@ -1611,7 +1611,7 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
 
 			dma_sync_single_for_cpu(&nn->pdev->dev,
 						rxbuf->dma_addr + pkt_off,
-						pkt_len, DMA_FROM_DEVICE);
+						pkt_len, DMA_BIDIRECTIONAL);
 			act = nfp_net_run_xdp(xdp_prog, rxbuf->frag + data_off,
 					      pkt_len);
 			switch (act) {

From 37411cad633f5e41f8a13007654909d21b19363a Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 2 Mar 2017 23:48:52 +0000
Subject: [PATCH 0726/1911] rxrpc: Fix potential NULL-pointer exception

Fix a potential NULL-pointer exception in rxrpc_do_sendmsg().  The call
state check that I added should have gone into the else-body of the
if-statement where we actually have a call to check.

Found by CoverityScan CID#1414316 ("Dereference after null check").

Fixes: 540b1c48c37a ("rxrpc: Fix deadlock between call creation and sendmsg/recvmsg")
Reported-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rxrpc/sendmsg.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 31c1538c1a8de6..27685d8cba1aef 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -517,13 +517,6 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
 		ret = -EBADSLT;
 		if (cmd != RXRPC_CMD_SEND_DATA)
 			goto error_release_sock;
-		ret = -EBUSY;
-		if (call->state == RXRPC_CALL_UNINITIALISED ||
-		    call->state == RXRPC_CALL_CLIENT_AWAIT_CONN ||
-		    call->state == RXRPC_CALL_SERVER_PREALLOC ||
-		    call->state == RXRPC_CALL_SERVER_SECURING ||
-		    call->state == RXRPC_CALL_SERVER_ACCEPTING)
-			goto error_release_sock;
 		call = rxrpc_new_client_call_for_sendmsg(rx, msg, user_call_ID,
 							 exclusive);
 		/* The socket is now unlocked... */
@@ -531,6 +524,14 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
 			return PTR_ERR(call);
 		/* ... and we have the call lock. */
 	} else {
+		ret = -EBUSY;
+		if (call->state == RXRPC_CALL_UNINITIALISED ||
+		    call->state == RXRPC_CALL_CLIENT_AWAIT_CONN ||
+		    call->state == RXRPC_CALL_SERVER_PREALLOC ||
+		    call->state == RXRPC_CALL_SERVER_SECURING ||
+		    call->state == RXRPC_CALL_SERVER_ACCEPTING)
+			goto error_release_sock;
+
 		ret = mutex_lock_interruptible(&call->user_mutex);
 		release_sock(&rx->sk);
 		if (ret < 0) {

From a8d63a53b3eac8626f62336dcc327c18f1bbca78 Mon Sep 17 00:00:00 2001
From: Zhu Yanjun <yanjun.zhu@oracle.com>
Date: Fri, 3 Mar 2017 00:44:26 -0500
Subject: [PATCH 0727/1911] rds: remove unnecessary returned value check

The function rds_trans_register always returns 0. As such, it is not
necessary to check the returned value.

Cc: Joe Jin <joe.jin@oracle.com>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Signed-off-by: Zhu Yanjun <yanjun.zhu@oracle.com>
Reviewed-by: Yuval Shaia <yuval.shaia@oracle.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/ib.c        | 6 +-----
 net/rds/rds.h       | 2 +-
 net/rds/tcp.c       | 6 +-----
 net/rds/transport.c | 4 +---
 4 files changed, 4 insertions(+), 14 deletions(-)

diff --git a/net/rds/ib.c b/net/rds/ib.c
index 0f557b24331121..7a64c8db81abdc 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -438,16 +438,12 @@ int rds_ib_init(void)
 	if (ret)
 		goto out_sysctl;
 
-	ret = rds_trans_register(&rds_ib_transport);
-	if (ret)
-		goto out_recv;
+	rds_trans_register(&rds_ib_transport);
 
 	rds_info_register_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info);
 
 	goto out;
 
-out_recv:
-	rds_ib_recv_exit();
 out_sysctl:
 	rds_ib_sysctl_exit();
 out_ibreg:
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 07fff73dd4f3f9..6f523ddfe1fb35 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -910,7 +910,7 @@ void rds_connect_path_complete(struct rds_conn_path *conn, int curr);
 void rds_connect_complete(struct rds_connection *conn);
 
 /* transport.c */
-int rds_trans_register(struct rds_transport *trans);
+void rds_trans_register(struct rds_transport *trans);
 void rds_trans_unregister(struct rds_transport *trans);
 struct rds_transport *rds_trans_get_preferred(struct net *net, __be32 addr);
 void rds_trans_put(struct rds_transport *trans);
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 5438f6725092b7..a973d3b4dff0b2 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -652,16 +652,12 @@ static int rds_tcp_init(void)
 	if (ret)
 		goto out_pernet;
 
-	ret = rds_trans_register(&rds_tcp_transport);
-	if (ret)
-		goto out_recv;
+	rds_trans_register(&rds_tcp_transport);
 
 	rds_info_register_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info);
 
 	goto out;
 
-out_recv:
-	rds_tcp_recv_exit();
 out_pernet:
 	unregister_pernet_subsys(&rds_tcp_net_ops);
 out_notifier:
diff --git a/net/rds/transport.c b/net/rds/transport.c
index 2ffd3e30c6434e..0b188dd0a344cb 100644
--- a/net/rds/transport.c
+++ b/net/rds/transport.c
@@ -40,7 +40,7 @@
 static struct rds_transport *transports[RDS_TRANS_COUNT];
 static DECLARE_RWSEM(rds_trans_sem);
 
-int rds_trans_register(struct rds_transport *trans)
+void rds_trans_register(struct rds_transport *trans)
 {
 	BUG_ON(strlen(trans->t_name) + 1 > TRANSNAMSIZ);
 
@@ -55,8 +55,6 @@ int rds_trans_register(struct rds_transport *trans)
 	}
 
 	up_write(&rds_trans_sem);
-
-	return 0;
 }
 EXPORT_SYMBOL_GPL(rds_trans_register);
 

From d0346b033899b9affa4da8c32bfb574dfb89859e Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree@solarflare.com>
Date: Fri, 3 Mar 2017 15:22:09 +0000
Subject: [PATCH 0728/1911] sfc: avoid max() in array size

It confuses sparse, which thinks the size isn't constant.  Let's achieve
 the same thing with a BUILD_BUG_ON, since we know which one should be
 bigger and don't expect them ever to change.

Signed-off-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sfc/ef10.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index 92e1c6d8b2937e..4d88e8532d3ee4 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -828,9 +828,7 @@ static int efx_ef10_alloc_piobufs(struct efx_nic *efx, unsigned int n)
 static int efx_ef10_link_piobufs(struct efx_nic *efx)
 {
 	struct efx_ef10_nic_data *nic_data = efx->nic_data;
-	_MCDI_DECLARE_BUF(inbuf,
-			  max(MC_CMD_LINK_PIOBUF_IN_LEN,
-			      MC_CMD_UNLINK_PIOBUF_IN_LEN));
+	MCDI_DECLARE_BUF(inbuf, MC_CMD_LINK_PIOBUF_IN_LEN);
 	struct efx_channel *channel;
 	struct efx_tx_queue *tx_queue;
 	unsigned int offset, index;
@@ -839,8 +837,6 @@ static int efx_ef10_link_piobufs(struct efx_nic *efx)
 	BUILD_BUG_ON(MC_CMD_LINK_PIOBUF_OUT_LEN != 0);
 	BUILD_BUG_ON(MC_CMD_UNLINK_PIOBUF_OUT_LEN != 0);
 
-	memset(inbuf, 0, sizeof(inbuf));
-
 	/* Link a buffer to each VI in the write-combining mapping */
 	for (index = 0; index < nic_data->n_piobufs; ++index) {
 		MCDI_SET_DWORD(inbuf, LINK_PIOBUF_IN_PIOBUF_HANDLE,
@@ -920,6 +916,10 @@ static int efx_ef10_link_piobufs(struct efx_nic *efx)
 	return 0;
 
 fail:
+	/* inbuf was defined for MC_CMD_LINK_PIOBUF.  We can use the same
+	 * buffer for MC_CMD_UNLINK_PIOBUF because it's shorter.
+	 */
+	BUILD_BUG_ON(MC_CMD_LINK_PIOBUF_IN_LEN < MC_CMD_UNLINK_PIOBUF_IN_LEN);
 	while (index--) {
 		MCDI_SET_DWORD(inbuf, UNLINK_PIOBUF_IN_TXQ_INSTANCE,
 			       nic_data->pio_write_vi_base + index);

From 6d43131c158f42e2138d83495d19c70ed5cc0ffe Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree@solarflare.com>
Date: Fri, 3 Mar 2017 15:22:27 +0000
Subject: [PATCH 0729/1911] sfc: fix IPID endianness in TSOv2

The value we read from the header is in network byte order, whereas
 EFX_POPULATE_QWORD_* takes values in host byte order (which it then
 converts to little-endian, as MCDI is little-endian).

Fixes: e9117e5099ea ("sfc: Firmware-Assisted TSO version 2")
Signed-off-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sfc/ef10.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index 4d88e8532d3ee4..c60c2d4c646a89 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -2183,7 +2183,7 @@ static int efx_ef10_tx_tso_desc(struct efx_tx_queue *tx_queue,
 		/* Modify IPv4 header if needed. */
 		ip->tot_len = 0;
 		ip->check = 0;
-		ipv4_id = ip->id;
+		ipv4_id = ntohs(ip->id);
 	} else {
 		/* Modify IPv6 header if needed. */
 		struct ipv6hdr *ipv6 = ipv6_hdr(skb);

From d3c1a297b6fe60fe7be637cb067b27fca46be504 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk@kernel.org>
Date: Fri, 24 Feb 2017 10:42:14 +0200
Subject: [PATCH 0730/1911] Documentation: Update path to sysrq.txt

Commit 9d85025b0418 ("docs-rst: create an user's manual book") moved the
sysrq.txt leaving old paths in the kernel docs.

Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Reviewed-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/media/v4l-drivers/bttv.rst          | 2 +-
 Documentation/s390/Debugging390.txt               | 2 +-
 Documentation/sysctl/kernel.txt                   | 2 +-
 Documentation/virtual/uml/UserModeLinux-HOWTO.txt | 6 +++---
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/Documentation/media/v4l-drivers/bttv.rst b/Documentation/media/v4l-drivers/bttv.rst
index bc63b12efafd0c..195ccaac281615 100644
--- a/Documentation/media/v4l-drivers/bttv.rst
+++ b/Documentation/media/v4l-drivers/bttv.rst
@@ -312,7 +312,7 @@ information out of a register+stack dump printed by the kernel on
 protection faults (so-called "kernel oops").
 
 If you run into some kind of deadlock, you can try to dump a call trace
-for each process using sysrq-t (see Documentation/sysrq.txt).
+for each process using sysrq-t (see Documentation/admin-guide/sysrq.rst).
 This way it is possible to figure where *exactly* some process in "D"
 state is stuck.
 
diff --git a/Documentation/s390/Debugging390.txt b/Documentation/s390/Debugging390.txt
index 3df8babcdc41ed..5ae7f868a007bd 100644
--- a/Documentation/s390/Debugging390.txt
+++ b/Documentation/s390/Debugging390.txt
@@ -2116,7 +2116,7 @@ The sysrq key reading is very picky ( I have to type the keys in an
 This is particularly useful for syncing disks unmounting & rebooting
 if the machine gets partially hung.
 
-Read Documentation/sysrq.txt for more info
+Read Documentation/admin-guide/sysrq.rst for more info
 
 References:
 ===========
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index a32b4b74864498..bac23c19836050 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -85,7 +85,7 @@ show up in /proc/sys/kernel:
 - softlockup_all_cpu_backtrace
 - soft_watchdog
 - stop-a                      [ SPARC only ]
-- sysrq                       ==> Documentation/sysrq.txt
+- sysrq                       ==> Documentation/admin-guide/sysrq.rst
 - sysctl_writes_strict
 - tainted
 - threads-max
diff --git a/Documentation/virtual/uml/UserModeLinux-HOWTO.txt b/Documentation/virtual/uml/UserModeLinux-HOWTO.txt
index f4099ca6b48354..87b80f589e1c01 100644
--- a/Documentation/virtual/uml/UserModeLinux-HOWTO.txt
+++ b/Documentation/virtual/uml/UserModeLinux-HOWTO.txt
@@ -2401,9 +2401,9 @@
 
   This takes one argument, which is a single letter.  It calls the
   generic kernel's SysRq driver, which does whatever is called for by
-  that argument.  See the SysRq documentation in Documentation/sysrq.txt
-  in your favorite kernel tree to see what letters are valid and what
-  they do.
+  that argument.  See the SysRq documentation in
+  Documentation/admin-guide/sysrq.rst in your favorite kernel tree to
+  see what letters are valid and what they do.
 
 
From 2eb6a4b26d13c51bdb8d5aefdfc846c0624b8ce4 Mon Sep 17 00:00:00 2001
From: Cao jin <caoj.fnst@cn.fujitsu.com>
Date: Wed, 1 Mar 2017 17:05:28 +0800
Subject: [PATCH 0731/1911] pcieaer doc: update the link

The original link is empty, replace it.

Signed-off-by: Cao jin <caoj.fnst@cn.fujitsu.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/PCI/pcieaer-howto.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/PCI/pcieaer-howto.txt b/Documentation/PCI/pcieaer-howto.txt
index ea8cafba255c8b..acd0dddd6bb8ba 100644
--- a/Documentation/PCI/pcieaer-howto.txt
+++ b/Documentation/PCI/pcieaer-howto.txt
@@ -256,7 +256,7 @@ After reboot with new kernel or insert the module, a device file named
 
 Then, you need a user space tool named aer-inject, which can be gotten
 from:
-    http://www.kernel.org/pub/linux/utils/pci/aer-inject/
+    https://git.kernel.org/cgit/linux/kernel/git/gong.chen/aer-inject.git/
 
 More information about aer-inject can be found in the document comes
 with its source code.

From 9857b1ad4740ec2429e8ec0a39946d45a8d2cff9 Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj38.park@gmail.com>
Date: Fri, 3 Mar 2017 15:44:02 +0900
Subject: [PATCH 0732/1911] doc/ko_KR/memory-barriers: Update
 control-dependencies section

This commit applies upstream change, commit c8241f8553e8 ("doc: Update
control-dependencies section of memory-barriers.txt"), to Korean
translation.

Signed-off-by: SeongJae Park <sj38.park@gmail.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../translations/ko_KR/memory-barriers.txt    | 68 ++++++++++---------
 1 file changed, 37 insertions(+), 31 deletions(-)

diff --git a/Documentation/translations/ko_KR/memory-barriers.txt b/Documentation/translations/ko_KR/memory-barriers.txt
index a3228a676cc18b..ce0b48d69eaae9 100644
--- a/Documentation/translations/ko_KR/memory-barriers.txt
+++ b/Documentation/translations/ko_KR/memory-barriers.txt
@@ -662,6 +662,10 @@ include/linux/rcupdate.h 의 rcu_assign_pointer() 와 rcu_dereference() 를
 컨트롤 의존성
 -------------
 
+현재의 컴파일러들은 컨트롤 의존성을 이해하고 있지 않기 때문에 컨트롤 의존성은
+약간 다루기 어려울 수 있습니다.  이 섹션의 목적은 여러분이 컴파일러의 무시로
+인해 여러분의 코드가 망가지는 걸 막을 수 있도록 돕는겁니다.
+
 로드-로드 컨트롤 의존성은 데이터 의존성 배리어만으로는 정확히 동작할 수가
 없어서 읽기 메모리 배리어를 필요로 합니다.  아래의 코드를 봅시다:
 
@@ -689,20 +693,21 @@ CPU 는 b 로부터의 로드 오퍼레이션이 a 로부터의 로드 오퍼레
 
 	q = READ_ONCE(a);
 	if (q) {
-		WRITE_ONCE(b, p);
+		WRITE_ONCE(b, 1);
 	}
 
 컨트롤 의존성은 보통 다른 타입의 배리어들과 짝을 맞춰 사용됩니다.  그렇다곤
-하나, READ_ONCE() 는 반드시 사용해야 함을 부디 명심하세요!  READ_ONCE() 가
-없다면, 컴파일러가 'a' 로부터의 로드를 'a' 로부터의 또다른 로드와, 'b' 로의
-스토어를 'b' 로의 또다른 스토어와 조합해 버려 매우 비직관적인 결과를 초래할 수
-있습니다.
+하나, READ_ONCE() 도 WRITE_ONCE() 도 선택사항이 아니라 필수사항임을 부디
+명심하세요!  READ_ONCE() 가 없다면, 컴파일러는 'a' 로부터의 로드를 'a' 로부터의
+또다른 로드와 조합할 수 있습니다.  WRITE_ONCE() 가 없다면, 컴파일러는 'b' 로의
+스토어를 'b' 로의 또라느 스토어들과 조합할 수 있습니다.  두 경우 모두 순서에
+있어 상당히 비직관적인 결과를 초래할 수 있습니다.
 
 이걸로 끝이 아닌게, 컴파일러가 변수 'a' 의 값이 항상 0이 아니라고 증명할 수
 있다면, 앞의 예에서 "if" 문을 없애서 다음과 같이 최적화 할 수도 있습니다:
 
 	q = a;
-	b = p;  /* BUG: Compiler and CPU can both reorder!!! */
+	b = 1;  /* BUG: Compiler and CPU can both reorder!!! */
 
 그러니 READ_ONCE() 를 반드시 사용하세요.
 
@@ -712,11 +717,11 @@ CPU 는 b 로부터의 로드 오퍼레이션이 a 로부터의 로드 오퍼레
 	q = READ_ONCE(a);
 	if (q) {
 		barrier();
-		WRITE_ONCE(b, p);
+		WRITE_ONCE(b, 1);
 		do_something();
 	} else {
 		barrier();
-		WRITE_ONCE(b, p);
+		WRITE_ONCE(b, 1);
 		do_something_else();
 	}
 
@@ -725,12 +730,12 @@ CPU 는 b 로부터의 로드 오퍼레이션이 a 로부터의 로드 오퍼레
 
 	q = READ_ONCE(a);
 	barrier();
-	WRITE_ONCE(b, p);  /* BUG: No ordering vs. load from a!!! */
+	WRITE_ONCE(b, 1);  /* BUG: No ordering vs. load from a!!! */
 	if (q) {
-		/* WRITE_ONCE(b, p); -- moved up, BUG!!! */
+		/* WRITE_ONCE(b, 1); -- moved up, BUG!!! */
 		do_something();
 	} else {
-		/* WRITE_ONCE(b, p); -- moved up, BUG!!! */
+		/* WRITE_ONCE(b, 1); -- moved up, BUG!!! */
 		do_something_else();
 	}
 
@@ -742,10 +747,10 @@ CPU 는 b 로부터의 로드 오퍼레이션이 a 로부터의 로드 오퍼레
 
 	q = READ_ONCE(a);
 	if (q) {
-		smp_store_release(&b, p);
+		smp_store_release(&b, 1);
 		do_something();
 	} else {
-		smp_store_release(&b, p);
+		smp_store_release(&b, 1);
 		do_something_else();
 	}
 
@@ -754,10 +759,10 @@ CPU 는 b 로부터의 로드 오퍼레이션이 a 로부터의 로드 오퍼레
 
 	q = READ_ONCE(a);
 	if (q) {
-		WRITE_ONCE(b, p);
+		WRITE_ONCE(b, 1);
 		do_something();
 	} else {
-		WRITE_ONCE(b, r);
+		WRITE_ONCE(b, 2);
 		do_something_else();
 	}
 
@@ -770,10 +775,10 @@ CPU 는 b 로부터의 로드 오퍼레이션이 a 로부터의 로드 오퍼레
 
 	q = READ_ONCE(a);
 	if (q % MAX) {
-		WRITE_ONCE(b, p);
+		WRITE_ONCE(b, 1);
 		do_something();
 	} else {
-		WRITE_ONCE(b, r);
+		WRITE_ONCE(b, 2);
 		do_something_else();
 	}
 
@@ -781,7 +786,7 @@ CPU 는 b 로부터의 로드 오퍼레이션이 a 로부터의 로드 오퍼레
 위의 코드를 아래와 같이 바꿔버릴 수 있습니다:
 
 	q = READ_ONCE(a);
-	WRITE_ONCE(b, p);
+	WRITE_ONCE(b, 1);
 	do_something_else();
 
 이렇게 되면, CPU 는 변수 'a' 로부터의 로드와 변수 'b' 로의 스토어 사이의 순서를
@@ -793,10 +798,10 @@ CPU 는 b 로부터의 로드 오퍼레이션이 a 로부터의 로드 오퍼레
 	q = READ_ONCE(a);
 	BUILD_BUG_ON(MAX <= 1); /* Order load from a with store to b. */
 	if (q % MAX) {
-		WRITE_ONCE(b, p);
+		WRITE_ONCE(b, 1);
 		do_something();
 	} else {
-		WRITE_ONCE(b, r);
+		WRITE_ONCE(b, 2);
 		do_something_else();
 	}
 
@@ -828,35 +833,33 @@ CPU 는 b 로부터의 로드 오퍼레이션이 a 로부터의 로드 오퍼레
 
 	q = READ_ONCE(a);
 	if (q) {
-		WRITE_ONCE(b, p);
+		WRITE_ONCE(b, 1);
 	} else {
-		WRITE_ONCE(b, r);
+		WRITE_ONCE(b, 2);
 	}
-	WRITE_ONCE(c, 1);  /* BUG: No ordering against the read from "a". */
+	WRITE_ONCE(c, 1);  /* BUG: No ordering against the read from 'a'. */
 
-컴파일러는 volatile 타입에 대한 액세스를 재배치 할 수 없고 이 조건 하의 "b"
+컴파일러는 volatile 타입에 대한 액세스를 재배치 할 수 없고 이 조건 하의 'b'
 로의 쓰기를 재배치 할 수 없기 때문에 여기에 순서 규칙이 존재한다고 주장하고
 싶을 겁니다.  불행히도 이 경우에, 컴파일러는 다음의 가상의 pseudo-assembly 언어
-코드처럼 "b" 로의 두개의 쓰기 오퍼레이션을 conditional-move 인스트럭션으로
+코드처럼 'b' 로의 두개의 쓰기 오퍼레이션을 conditional-move 인스트럭션으로
 번역할 수 있습니다:
 
 	ld r1,a
-	ld r2,p
-	ld r3,r
 	cmp r1,$0
-	cmov,ne r4,r2
-	cmov,eq r4,r3
+	cmov,ne r4,$1
+	cmov,eq r4,$2
 	st r4,b
 	st $1,c
 
-완화된 순서 규칙의 CPU 는 "a" 로부터의 로드와 "c" 로의 스토어 사이에 어떤
+완화된 순서 규칙의 CPU 는 'a' 로부터의 로드와 'c' 로의 스토어 사이에 어떤
 종류의 의존성도 갖지 않을 겁니다.  이 컨트롤 의존성은 두개의 cmov 인스트럭션과
 거기에 의존하는 스토어 에게만 적용될 겁니다.  짧게 말하자면, 컨트롤 의존성은
 주어진 if 문의 then 절과 else 절에게만 (그리고 이 두 절 내에서 호출되는
 함수들에게까지) 적용되지, 이 if 문을 뒤따르는 코드에는 적용되지 않습니다.
 
 마지막으로, 컨트롤 의존성은 이행성 (transitivity) 을 제공하지 -않습니다-.  이건
-x 와 y 가 둘 다 0 이라는 초기값을 가졌다는 가정 하의 두개의 예제로
+'x' 와 'y' 가 둘 다 0 이라는 초기값을 가졌다는 가정 하의 두개의 예제로
 보이겠습니다:
 
 	CPU 0                     CPU 1
@@ -924,6 +927,9 @@ http://www.cl.cam.ac.uk/users/pes20/ppc-supplemental/test6.pdf 와
   (*) 컨트롤 의존성은 이행성을 제공하지 -않습니다-.  이행성이 필요하다면,
       smp_mb() 를 사용하세요.
 
+  (*) 컴파일러는 컨트롤 의존성을 이해하고 있지 않습니다.  따라서 컴파일러가
+      여러분의 코드를 망가뜨리지 않도록 하는건 여러분이 해야 하는 일입니다.
+
 
 SMP 배리어 짝맞추기
 --------------------

From f3fc83e55533b9fddac1d4eda79956768df569ea Mon Sep 17 00:00:00 2001
From: Martyn Welch <martyn.welch@collabora.co.uk>
Date: Fri, 3 Mar 2017 22:43:30 +0000
Subject: [PATCH 0733/1911] docs: Fix htmldocs build failure

Build of HTML docs failing due to conversion of deviceiobook.tmpl in
8a8a602f and regulator.tmpl in 028f2533 to RST without removing from
DOCBOOKS in Makefile, resulting (in the case of deviceiobook) the
following error:

make[1]: *** No rule to make target 'Documentation/DocBook/deviceiobook.xml', needed by 'Documentation/DocBook/deviceiobook.aux.xml'.  Stop.
Makefile:1452: recipe for target 'htmldocs' failed
make: *** [htmldocs] Error 2

Update DOCBOOKS to reflect available books.

Signed-off-by: Martyn Welch <martyn.welch@collabora.co.uk>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/DocBook/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/DocBook/Makefile b/Documentation/DocBook/Makefile
index 7b8831fb4e37d8..c339cf59ad6930 100644
--- a/Documentation/DocBook/Makefile
+++ b/Documentation/DocBook/Makefile
@@ -7,12 +7,12 @@
 # list of DOCBOOKS.
 
 DOCBOOKS := z8530book.xml  \
-	    kernel-hacking.xml kernel-locking.xml deviceiobook.xml \
+	    kernel-hacking.xml kernel-locking.xml \
 	    writing_usb_driver.xml networking.xml \
 	    kernel-api.xml filesystems.xml lsm.xml kgdb.xml \
 	    gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \
 	    genericirq.xml s390-drivers.xml uio-howto.xml scsi.xml \
-	    sh.xml regulator.xml w1.xml \
+	    sh.xml w1.xml \
 	    writing_musb_glue_layer.xml
 
 ifeq ($(DOCBOOKS),)

From fd5d666932d51b2552ecc0280047d6b35d9b6cd1 Mon Sep 17 00:00:00 2001
From: John Keeping <john@metanate.com>
Date: Fri, 3 Mar 2017 12:24:05 +0000
Subject: [PATCH 0734/1911] Documentation/sphinx: fix primary_domain
 configuration

With Sphinx 1.5.3 I get the warning:

	WARNING: primary_domain 'C' not found, ignored.

It seems that domain names in Sphinx are case-sensitive and for the C
domain the name must be lower case.

Signed-off-by: John Keeping <john@metanate.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/conf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/conf.py b/Documentation/conf.py
index f6823cf01275aa..7fadb3b8329343 100644
--- a/Documentation/conf.py
+++ b/Documentation/conf.py
@@ -135,7 +135,7 @@
 # If true, `todo` and `todoList` produce output, else they produce nothing.
 todo_include_todos = False
 
-primary_domain = 'C'
+primary_domain = 'c'
 highlight_language = 'none'
 
 # -- Options for HTML output ----------------------------------------------

From 2bc756e7dde20972300bb8e2e46dd430d6d2d5db Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 2 Mar 2017 23:22:29 +0100
Subject: [PATCH 0735/1911] cpufreq: intel_pstate: Do not use
 performance_limits in passive mode

Using performance_limits in the passive mode doesn't make
sense, because in that mode the global limits are applied to the
frequency selected by the scaling governor.

The maximum and minimum P-state limits in performance_limits are both
set to 100 percent which will put all CPUs into the turbo range
regardless of what governor is used and what frequencies are
selected by it (that is particularly undesirable on CPUs with the
generic powersave governor attached).

For this reason, make intel_pstate_register_driver() always point
limits to powersave_limits in the passive mode.

Fixes: 001c76f05b01 (cpufreq: intel_pstate: Generic governors support)
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 9ee13f195c377e..7fc1f8a0ef4441 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -2442,8 +2442,11 @@ static int intel_pstate_register_driver(void)
 
 	intel_pstate_init_limits(&powersave_limits);
 	intel_pstate_set_performance_limits(&performance_limits);
-	limits = IS_ENABLED(CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE) ?
-			&performance_limits : &powersave_limits;
+	if (IS_ENABLED(CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE) &&
+	    intel_pstate_driver == &intel_pstate)
+		limits = &performance_limits;
+	else
+		limits = &powersave_limits;
 
 	ret = cpufreq_register_driver(intel_pstate_driver);
 	if (ret) {

From 7f17326fc0b69afbda7aed6c4ce8e2328b74203b Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 2 Mar 2017 23:24:36 +0100
Subject: [PATCH 0736/1911] cpufreq: intel_pstate: Fix
 intel_cpufreq_verify_policy()

The intel_pstate_update_perf_limits() called from
intel_cpufreq_verify_policy() may cause global P-state limits
to change which is generally confusing and unnecessary.

In the passive mode the global limits are only applied to the
frequency selected by the scaling governor (they are not taken
into account by governors when making decisions anyway), so making
them follow the per-policy limits serves no purpose and may go
against user expectations (as it generally causes the global
attributes in sysfs to change even though they have not been
written to in some cases).

Fix that by dropping the intel_pstate_update_perf_limits()
invocation from intel_cpufreq_verify_policy() (which also
reduces the code size by a few lines).

This change does not affect the per-CPU limits case, because those
limits allow any P-state to be set by default in the passive mode
and it removes the only piece of code updating them in that mode,
so the per-policy settings will be the only ones taken into account
in that case as expected.

Fixes: 001c76f05b01 (cpufreq: intel_pstate: Generic governors support)
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 7fc1f8a0ef4441..04b2aa16227637 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -2306,7 +2306,6 @@ static struct cpufreq_driver intel_pstate = {
 static int intel_cpufreq_verify_policy(struct cpufreq_policy *policy)
 {
 	struct cpudata *cpu = all_cpu_data[policy->cpu];
-	struct perf_limits *perf_limits = limits;
 
 	update_turbo_state();
 	policy->cpuinfo.max_freq = limits->turbo_disabled ?
@@ -2314,15 +2313,6 @@ static int intel_cpufreq_verify_policy(struct cpufreq_policy *policy)
 
 	cpufreq_verify_within_cpu_limits(policy);
 
-	if (per_cpu_limits)
-		perf_limits = cpu->perf_limits;
-
-	mutex_lock(&intel_pstate_limits_lock);
-
-	intel_pstate_update_perf_limits(policy, perf_limits);
-
-	mutex_unlock(&intel_pstate_limits_lock);
-
 	return 0;
 }
 

From 6407829901f074cf6beef566d6af9a0b0e238f0d Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 3 Mar 2017 23:51:31 +0100
Subject: [PATCH 0737/1911] cpufreq: intel_pstate: Avoid triggering
 cpu_frequency tracepoint unnecessarily

In the passive mode the cpu_frequency trace event is already
triggered by the cpufreq core or by scaling governors, so
intel_pstate should not trigger it once again for the same
P-state updates.

In addition to that, the frequency returned by
intel_cpufreq_fast_switch() and passed via freqs.new from
intel_cpufreq_target() to cpufreq_freq_transition_end() should
reflect the P-state actually set, so make that happen.

Fixes: 001c76f05b01 (cpufreq: intel_pstate: Generic governors support)
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 04b2aa16227637..5e066b33259874 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -1879,13 +1879,11 @@ static int intel_pstate_prepare_request(struct cpudata *cpu, int pstate)
 
 	intel_pstate_get_min_max(cpu, &min_perf, &max_perf);
 	pstate = clamp_t(int, pstate, min_perf, max_perf);
-	trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu);
 	return pstate;
 }
 
 static void intel_pstate_update_pstate(struct cpudata *cpu, int pstate)
 {
-	pstate = intel_pstate_prepare_request(cpu, pstate);
 	if (pstate == cpu->pstate.current_pstate)
 		return;
 
@@ -1905,6 +1903,8 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
 
 	update_turbo_state();
 
+	target_pstate = intel_pstate_prepare_request(cpu, target_pstate);
+	trace_cpu_frequency(target_pstate * cpu->pstate.scaling, cpu->cpu);
 	intel_pstate_update_pstate(cpu, target_pstate);
 
 	sample = &cpu->sample;
@@ -2365,6 +2365,7 @@ static int intel_cpufreq_target(struct cpufreq_policy *policy,
 		wrmsrl_on_cpu(policy->cpu, MSR_IA32_PERF_CTL,
 			      pstate_funcs.get_val(cpu, target_pstate));
 	}
+	freqs.new = target_pstate * cpu->pstate.scaling;
 	cpufreq_freq_transition_end(policy, &freqs, false);
 
 	return 0;
@@ -2378,8 +2379,9 @@ static unsigned int intel_cpufreq_fast_switch(struct cpufreq_policy *policy,
 
 	target_freq = intel_cpufreq_turbo_update(cpu, policy, target_freq);
 	target_pstate = DIV_ROUND_UP(target_freq, cpu->pstate.scaling);
+	target_pstate = intel_pstate_prepare_request(cpu, target_pstate);
 	intel_pstate_update_pstate(cpu, target_pstate);
-	return target_freq;
+	return target_pstate * cpu->pstate.scaling;
 }
 
 static int intel_cpufreq_cpu_init(struct cpufreq_policy *policy)

From f78ef7cd9a0686b979679d0de061c6dbfd8d649e Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Fri, 3 Mar 2017 12:21:14 -0800
Subject: [PATCH 0738/1911] strparser: destroy workqueue on module exit

Fixes: 43a0c6751a32 ("strparser: Stream parser for messages")
Cc: Tom Herbert <tom@herbertland.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/strparser/strparser.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c
index 41adf362936d7d..b5c279b2268017 100644
--- a/net/strparser/strparser.c
+++ b/net/strparser/strparser.c
@@ -504,6 +504,7 @@ static int __init strp_mod_init(void)
 
 static void __exit strp_mod_exit(void)
 {
+	destroy_workqueue(strp_wq);
 }
 module_init(strp_mod_init);
 module_exit(strp_mod_exit);

From 2a9c4f40ab2c281f95d41577ff0f7f78668feb73 Mon Sep 17 00:00:00 2001
From: Alexey Kardashevskiy <aik@ozlabs.ru>
Date: Thu, 2 Mar 2017 17:41:24 +1100
Subject: [PATCH 0739/1911] powerpc/powernv: Fix opal tracepoints with
 JUMP_LABEL=n

The recent commit to allow calling OPAL calls in real mode, commit
ab9bad0ead9a ("powerpc/powernv: Remove separate entry for OPAL real mode
calls"), introduced a bug when CONFIG_JUMP_LABEL=n.

The commit moved the "mfmsr r12" prior to the call to OPAL_BRANCH, but
we missed that OPAL_BRANCH clobbers r12 when jump labels are disabled.
This leads to us using the tracepoint refcount as the MSR value,
typically zero, and saving that into PACASAVEDMSR. When we return from
OPAL we use that value as the MSR value for rfid, meaning we switch to
32-bit BE real mode - hilarity ensues.

Fix it by using r11 in OPAL_BRANCH, which is not live at the time the
macro is used in OPAL_CALL.

Fixes: ab9bad0ead9a ("powerpc/powernv: Remove separate entry for OPAL real mode calls")
Suggested-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/powernv/opal-wrappers.S | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 6693f75e93d162..da8a0f7a035c10 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -39,8 +39,8 @@ opal_tracepoint_refcount:
 BEGIN_FTR_SECTION;						\
 	b	1f;						\
 END_FTR_SECTION(0, 1);						\
-	ld	r12,opal_tracepoint_refcount@toc(r2);		\
-	cmpdi	r12,0;						\
+	ld	r11,opal_tracepoint_refcount@toc(r2);		\
+	cmpdi	r11,0;						\
 	bne-	LABEL;						\
 1:
 

From 6ad966d7303b70165228dba1ee8da1a05c10eefe Mon Sep 17 00:00:00 2001
From: Shile Zhang <shile.zhang@nokia.com>
Date: Sat, 4 Feb 2017 17:03:40 +0800
Subject: [PATCH 0740/1911] powerpc/64: Fix checksum folding in csum_add()

Paul's patch to fix checksum folding, commit b492f7e4e07a ("powerpc/64:
Fix checksum folding in csum_tcpudp_nofold and ip_fast_csum_nofold")
missed a case in csum_add(). Fix it.

Signed-off-by: Shile Zhang <shile.zhang@nokia.com>
Acked-by: Paul Mackerras <paulus@ozlabs.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/checksum.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h
index 4e63787dc3becf..842124b199b585 100644
--- a/arch/powerpc/include/asm/checksum.h
+++ b/arch/powerpc/include/asm/checksum.h
@@ -112,7 +112,7 @@ static inline __wsum csum_add(__wsum csum, __wsum addend)
 
 #ifdef __powerpc64__
 	res += (__force u64)addend;
-	return (__force __wsum)((u32)res + (res >> 32));
+	return (__force __wsum) from64to32(res);
 #else
 	asm("addc %0,%0,%1;"
 	    "addze %0,%0;"

From 10e5778f54765c96fe0c8f104b7a030e5b35bc72 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Sat, 4 Mar 2017 07:02:10 -0800
Subject: [PATCH 0741/1911] ARM: OMAP2+: Fix device node reference counts

After commit 0549bde0fcb1 ("of: fix of_node leak caused in
of_find_node_opts_by_path"), the following error may be
reported when running omap images.

OF: ERROR: Bad of_node_put() on /ocp@68000000
CPU: 0 PID: 0 Comm: swapper Not tainted 4.10.0-rc7-next-20170210 #1
Hardware name: Generic OMAP3-GP (Flattened Device Tree)
[<c0310604>] (unwind_backtrace) from [<c030bbf4>] (show_stack+0x10/0x14)
[<c030bbf4>] (show_stack) from [<c05add8c>] (dump_stack+0x98/0xac)
[<c05add8c>] (dump_stack) from [<c05af1b0>] (kobject_release+0x48/0x7c)
[<c05af1b0>] (kobject_release)
	from [<c0ad1aa4>] (of_find_node_by_name+0x74/0x94)
[<c0ad1aa4>] (of_find_node_by_name)
	from [<c1215bd4>] (omap3xxx_hwmod_is_hs_ip_block_usable+0x24/0x2c)
[<c1215bd4>] (omap3xxx_hwmod_is_hs_ip_block_usable) from
[<c1215d5c>] (omap3xxx_hwmod_init+0x180/0x274)
[<c1215d5c>] (omap3xxx_hwmod_init)
	from [<c120faa8>] (omap3_init_early+0xa0/0x11c)
[<c120faa8>] (omap3_init_early)
	from [<c120fb2c>] (omap3430_init_early+0x8/0x30)
[<c120fb2c>] (omap3430_init_early)
	from [<c1204710>] (setup_arch+0xc04/0xc34)
[<c1204710>] (setup_arch) from [<c1200948>] (start_kernel+0x68/0x38c)
[<c1200948>] (start_kernel) from [<8020807c>] (0x8020807c)

of_find_node_by_name() drops the reference to the passed device node.
The commit referenced above exposes this problem.

To fix the problem, use of_get_child_by_name() instead of
of_find_node_by_name(); of_get_child_by_name() does not drop
the reference count of passed device nodes. While semantically
different, we only look for immediate children of the passed
device node, so of_get_child_by_name() is a more appropriate
function to use anyway.

Release the reference to the device node obtained with
of_get_child_by_name() after it is no longer needed to avoid
another device node leak.

While at it, clean up the code and change the return type of
omap3xxx_hwmod_is_hs_ip_block_usable() to bool to match its use
and the return type of of_device_is_available().

Cc: Qi Hou <qi.hou@windriver.com>
Cc: Peter Rosin <peda@axentia.se>
Cc: Rob Herring <robh@kernel.org>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/omap_hwmod_3xxx_data.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
index 507ff0795a8e25..0fa08c1e4701ca 100644
--- a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
@@ -3131,16 +3131,20 @@ static struct omap_hwmod_ocp_if *omap3xxx_dss_hwmod_ocp_ifs[] __initdata = {
  * Return: 0 if device named @dev_name is not likely to be accessible,
  * or 1 if it is likely to be accessible.
  */
-static int __init omap3xxx_hwmod_is_hs_ip_block_usable(struct device_node *bus,
-						       const char *dev_name)
+static bool __init omap3xxx_hwmod_is_hs_ip_block_usable(struct device_node *bus,
+							const char *dev_name)
 {
+	struct device_node *node;
+	bool available;
+
 	if (!bus)
-		return (omap_type() == OMAP2_DEVICE_TYPE_GP) ? 1 : 0;
+		return omap_type() == OMAP2_DEVICE_TYPE_GP;
 
-	if (of_device_is_available(of_find_node_by_name(bus, dev_name)))
-		return 1;
+	node = of_get_child_by_name(bus, dev_name);
+	available = of_device_is_available(node);
+	of_node_put(node);
 
-	return 0;
+	return available;
 }
 
 int __init omap3xxx_hwmod_init(void)

From b92675d998a9fa37fe9e0e35053a95b4a23c158b Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Sat, 4 Mar 2017 07:02:11 -0800
Subject: [PATCH 0742/1911] ARM: OMAP2+: Release device node after it is no
 longer needed.

The device node returned by of_find_node_by_name() needs to be released
after it is no longer needed to avoid a device node leak.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/omap_hwmod_3xxx_data.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
index 0fa08c1e4701ca..1435fee39a89ba 100644
--- a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
@@ -3213,15 +3213,20 @@ int __init omap3xxx_hwmod_init(void)
 
 	if (h_sham && omap3xxx_hwmod_is_hs_ip_block_usable(bus, "sham")) {
 		r = omap_hwmod_register_links(h_sham);
-		if (r < 0)
+		if (r < 0) {
+			of_node_put(bus);
 			return r;
+		}
 	}
 
 	if (h_aes && omap3xxx_hwmod_is_hs_ip_block_usable(bus, "aes")) {
 		r = omap_hwmod_register_links(h_aes);
-		if (r < 0)
+		if (r < 0) {
+			of_node_put(bus);
 			return r;
+		}
 	}
+	of_node_put(bus);
 
 	/*
 	 * Register hwmod links specific to certain ES levels of a

From 1c2bcc766be44467809f1798cd4ceacafe20a852 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Wed, 22 Feb 2017 17:25:42 +0100
Subject: [PATCH 0743/1911] batman-adv: Keep fragments equally sized
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The batman-adv fragmentation packets have the design problem that they
cannot be refragmented and cannot handle padding by the underlying link.
The latter often leads to problems when networks are incorrectly configured
and don't use a common MTU.

The sender could for example fragment a 1271 byte frame (plus external
ethernet header (14) and batadv unicast header (10)) to fit in a 1280 bytes
large MTU of the underlying link (max. 1294 byte frames). This would create
a 1294 bytes large frame (fragment 2) and a 55 bytes large frame
(fragment 1). The extra 54 bytes are the fragment header (20) added to each
fragment and the external ethernet header (14) for the second fragment.

Let us assume that the next hop is then not able to transport 1294 bytes to
its next hop. The 1294 byte large frame will be dropped but the 55 bytes
large fragment will still be forwarded to its destination.

Or let us assume that the underlying hardware requires that each frame has
a minimum size (e.g. 60 bytes). Then it will pad the 55 bytes frame to 60
bytes. The receiver of the 60 bytes frame will no longer be able to
correctly assemble the two frames together because it is not aware that 5
bytes of the 60 bytes frame are padding and don't belong to the reassembled
frame.

This can partly be avoided by splitting frames more equally. In this
example, the 675 and 674 bytes large fragment frames could both potentially
reach its destination without being too large or too small.

Reported-by: Martin Weinelt <martin@darmstadt.freifunk.net>
Fixes: ee75ed88879a ("batman-adv: Fragment and send skbs larger than mtu")
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Acked-by: Linus Lüssing <linus.luessing@c0d3.blue>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/fragmentation.c | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index 11149e5be4e0ef..106bda56ec98ba 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -404,7 +404,7 @@ bool batadv_frag_skb_fwd(struct sk_buff *skb,
  * batadv_frag_create - create a fragment from skb
  * @skb: skb to create fragment from
  * @frag_head: header to use in new fragment
- * @mtu: size of new fragment
+ * @fragment_size: size of new fragment
  *
  * Split the passed skb into two fragments: A new one with size matching the
  * passed mtu and the old one with the rest. The new skb contains data from the
@@ -414,11 +414,11 @@ bool batadv_frag_skb_fwd(struct sk_buff *skb,
  */
 static struct sk_buff *batadv_frag_create(struct sk_buff *skb,
 					  struct batadv_frag_packet *frag_head,
-					  unsigned int mtu)
+					  unsigned int fragment_size)
 {
 	struct sk_buff *skb_fragment;
 	unsigned int header_size = sizeof(*frag_head);
-	unsigned int fragment_size = mtu - header_size;
+	unsigned int mtu = fragment_size + header_size;
 
 	skb_fragment = netdev_alloc_skb(NULL, mtu + ETH_HLEN);
 	if (!skb_fragment)
@@ -456,7 +456,7 @@ int batadv_frag_send_packet(struct sk_buff *skb,
 	struct sk_buff *skb_fragment;
 	unsigned int mtu = neigh_node->if_incoming->net_dev->mtu;
 	unsigned int header_size = sizeof(frag_header);
-	unsigned int max_fragment_size, max_packet_size;
+	unsigned int max_fragment_size, num_fragments;
 	int ret;
 
 	/* To avoid merge and refragmentation at next-hops we never send
@@ -464,10 +464,15 @@ int batadv_frag_send_packet(struct sk_buff *skb,
 	 */
 	mtu = min_t(unsigned int, mtu, BATADV_FRAG_MAX_FRAG_SIZE);
 	max_fragment_size = mtu - header_size;
-	max_packet_size = max_fragment_size * BATADV_FRAG_MAX_FRAGMENTS;
+
+	if (skb->len == 0 || max_fragment_size == 0)
+		return -EINVAL;
+
+	num_fragments = (skb->len - 1) / max_fragment_size + 1;
+	max_fragment_size = (skb->len - 1) / num_fragments + 1;
 
 	/* Don't even try to fragment, if we need more than 16 fragments */
-	if (skb->len > max_packet_size) {
+	if (num_fragments > BATADV_FRAG_MAX_FRAGMENTS) {
 		ret = -EAGAIN;
 		goto free_skb;
 	}
@@ -507,7 +512,8 @@ int batadv_frag_send_packet(struct sk_buff *skb,
 			goto put_primary_if;
 		}
 
-		skb_fragment = batadv_frag_create(skb, &frag_header, mtu);
+		skb_fragment = batadv_frag_create(skb, &frag_header,
+						  max_fragment_size);
 		if (!skb_fragment) {
 			ret = -ENOMEM;
 			goto put_primary_if;

From 1a9070ec91b37234fe915849b767c61584c64a44 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Sat, 4 Mar 2017 15:48:50 +0100
Subject: [PATCH 0744/1911] batman-adv: Initialize gw sel_class via batadv_algo

The gateway selection class variable is shared between different algorithm
versions. But the interpretation of the content is algorithm specific. The
initialization is therefore also algorithm specific.

But this was implemented incorrectly and the initialization for BATMAN_V
always overwrote the value previously written for BATMAN_IV. This could
only be avoided when BATMAN_V was disabled during compile time.

Using a special batadv_algo hook for this initialization avoids this
problem.

Fixes: 50164d8f500f ("batman-adv: B.A.T.M.A.N. V - implement GW selection logic")
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/bat_iv_ogm.c     | 11 +++++++++++
 net/batman-adv/bat_v.c          | 14 +++++++++++---
 net/batman-adv/gateway_common.c |  5 +++++
 net/batman-adv/soft-interface.c |  1 -
 net/batman-adv/types.h          |  2 ++
 5 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index f00f666e2ccd47..7bfd0d7ef49df8 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -2477,6 +2477,16 @@ static void batadv_iv_iface_activate(struct batadv_hard_iface *hard_iface)
 	batadv_iv_ogm_schedule(hard_iface);
 }
 
+/**
+ * batadv_iv_init_sel_class - initialize GW selection class
+ * @bat_priv: the bat priv with all the soft interface information
+ */
+static void batadv_iv_init_sel_class(struct batadv_priv *bat_priv)
+{
+	/* set default TQ difference threshold to 20 */
+	atomic_set(&bat_priv->gw.sel_class, 20);
+}
+
 static struct batadv_gw_node *
 batadv_iv_gw_get_best_gw_node(struct batadv_priv *bat_priv)
 {
@@ -2823,6 +2833,7 @@ static struct batadv_algo_ops batadv_batman_iv __read_mostly = {
 		.del_if = batadv_iv_ogm_orig_del_if,
 	},
 	.gw = {
+		.init_sel_class = batadv_iv_init_sel_class,
 		.get_best_gw_node = batadv_iv_gw_get_best_gw_node,
 		.is_eligible = batadv_iv_gw_is_eligible,
 #ifdef CONFIG_BATMAN_ADV_DEBUGFS
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
index 2ac612d7bab4d0..2e2471ca84e392 100644
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -668,6 +668,16 @@ static bool batadv_v_neigh_is_sob(struct batadv_neigh_node *neigh1,
 	return ret;
 }
 
+/**
+ * batadv_v_init_sel_class - initialize GW selection class
+ * @bat_priv: the bat priv with all the soft interface information
+ */
+static void batadv_v_init_sel_class(struct batadv_priv *bat_priv)
+{
+	/* set default throughput difference threshold to 5Mbps */
+	atomic_set(&bat_priv->gw.sel_class, 50);
+}
+
 static ssize_t batadv_v_store_sel_class(struct batadv_priv *bat_priv,
 					char *buff, size_t count)
 {
@@ -1052,6 +1062,7 @@ static struct batadv_algo_ops batadv_batman_v __read_mostly = {
 		.dump = batadv_v_orig_dump,
 	},
 	.gw = {
+		.init_sel_class = batadv_v_init_sel_class,
 		.store_sel_class = batadv_v_store_sel_class,
 		.show_sel_class = batadv_v_show_sel_class,
 		.get_best_gw_node = batadv_v_gw_get_best_gw_node,
@@ -1092,9 +1103,6 @@ int batadv_v_mesh_init(struct batadv_priv *bat_priv)
 	if (ret < 0)
 		return ret;
 
-	/* set default throughput difference threshold to 5Mbps */
-	atomic_set(&bat_priv->gw.sel_class, 50);
-
 	return 0;
 }
 
diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c
index 21184810d89f69..3e3f91ab694fb8 100644
--- a/net/batman-adv/gateway_common.c
+++ b/net/batman-adv/gateway_common.c
@@ -253,6 +253,11 @@ static void batadv_gw_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
  */
 void batadv_gw_init(struct batadv_priv *bat_priv)
 {
+	if (bat_priv->algo_ops->gw.init_sel_class)
+		bat_priv->algo_ops->gw.init_sel_class(bat_priv);
+	else
+		atomic_set(&bat_priv->gw.sel_class, 1);
+
 	batadv_tvlv_handler_register(bat_priv, batadv_gw_tvlv_ogm_handler_v1,
 				     NULL, BATADV_TVLV_GW, 1,
 				     BATADV_TVLV_HANDLER_OGM_CIFNOTFND);
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 7b3494ae6ad93f..2e0b3463ab4abf 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -820,7 +820,6 @@ static int batadv_softif_init_late(struct net_device *dev)
 	atomic_set(&bat_priv->mcast.num_want_all_ipv6, 0);
 #endif
 	atomic_set(&bat_priv->gw.mode, BATADV_GW_MODE_OFF);
-	atomic_set(&bat_priv->gw.sel_class, 20);
 	atomic_set(&bat_priv->gw.bandwidth_down, 100);
 	atomic_set(&bat_priv->gw.bandwidth_up, 20);
 	atomic_set(&bat_priv->orig_interval, 1000);
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index e913aee28c98bf..5137d859694c28 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -1489,6 +1489,7 @@ struct batadv_algo_orig_ops {
 
 /**
  * struct batadv_algo_gw_ops - mesh algorithm callbacks (GW specific)
+ * @init_sel_class: initialize GW selection class (optional)
  * @store_sel_class: parse and stores a new GW selection class (optional)
  * @show_sel_class: prints the current GW selection class (optional)
  * @get_best_gw_node: select the best GW from the list of available nodes
@@ -1499,6 +1500,7 @@ struct batadv_algo_orig_ops {
  * @dump: dump gateways to a netlink socket (optional)
  */
 struct batadv_algo_gw_ops {
+	void (*init_sel_class)(struct batadv_priv *bat_priv);
 	ssize_t (*store_sel_class)(struct batadv_priv *bat_priv, char *buff,
 				   size_t count);
 	ssize_t (*show_sel_class)(struct batadv_priv *bat_priv, char *buff);

From 3bec247474469f769af41e8c80d3a100dd97dd76 Mon Sep 17 00:00:00 2001
From: Song Hongyan <hongyan.song@intel.com>
Date: Wed, 22 Feb 2017 17:17:38 +0800
Subject: [PATCH 0745/1911] iio: hid-sensor-trigger: Change get poll value
 function order to avoid sensor properties losing after resume from S3

In function _hid_sensor_power_state(), when hid_sensor_read_poll_value()
is called, sensor's all properties will be updated by the value from
sensor hardware/firmware.
In some implementation, sensor hardware/firmware will do a power cycle
during S3. In this case, after resume, once hid_sensor_read_poll_value()
is called, sensor's all properties which are kept by driver during S3
will be changed to default value.
But instead, if a set feature function is called first, sensor
hardware/firmware will be recovered to the last status. So change the
sensor_hub_set_feature() calling order to behind of set feature function
to avoid sensor properties lose.

Signed-off-by: Song Hongyan <hongyan.song@intel.com>
Acked-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/common/hid-sensors/hid-sensor-trigger.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/common/hid-sensors/hid-sensor-trigger.c b/drivers/iio/common/hid-sensors/hid-sensor-trigger.c
index a3cce3a3830079..ecf592d69043ae 100644
--- a/drivers/iio/common/hid-sensors/hid-sensor-trigger.c
+++ b/drivers/iio/common/hid-sensors/hid-sensor-trigger.c
@@ -51,8 +51,6 @@ static int _hid_sensor_power_state(struct hid_sensor_common *st, bool state)
 			st->report_state.report_id,
 			st->report_state.index,
 			HID_USAGE_SENSOR_PROP_REPORTING_STATE_ALL_EVENTS_ENUM);
-
-		poll_value = hid_sensor_read_poll_value(st);
 	} else {
 		int val;
 
@@ -89,7 +87,9 @@ static int _hid_sensor_power_state(struct hid_sensor_common *st, bool state)
 	sensor_hub_get_feature(st->hsdev, st->power_state.report_id,
 			       st->power_state.index,
 			       sizeof(state_val), &state_val);
-	if (state && poll_value)
+	if (state)
+		poll_value = hid_sensor_read_poll_value(st);
+	if (poll_value > 0)
 		msleep_interruptible(poll_value * 2);
 
 	return 0;

From 3ff861f59f6c1f5bf2bc03d2cd36ac3f992cbc06 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Wed, 1 Mar 2017 15:37:57 -0800
Subject: [PATCH 0746/1911] iio: magnetometer: ak8974: remove incorrect __exit
 markups

Even if bus is not hot-pluggable, devices can be unbound from the
driver via sysfs, so we should not be using __exit annotations on
remove() methods. The only exception is drivers registered with
platform_driver_probe() which specifically disables sysfs bind/unbind
attributes.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/magnetometer/ak8974.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/iio/magnetometer/ak8974.c b/drivers/iio/magnetometer/ak8974.c
index 6dd8cbd7ce9531..e13370dc9b1cb4 100644
--- a/drivers/iio/magnetometer/ak8974.c
+++ b/drivers/iio/magnetometer/ak8974.c
@@ -763,7 +763,7 @@ static int ak8974_probe(struct i2c_client *i2c,
 	return ret;
 }
 
-static int __exit ak8974_remove(struct i2c_client *i2c)
+static int ak8974_remove(struct i2c_client *i2c)
 {
 	struct iio_dev *indio_dev = i2c_get_clientdata(i2c);
 	struct ak8974 *ak8974 = iio_priv(indio_dev);
@@ -845,7 +845,7 @@ static struct i2c_driver ak8974_driver = {
 		.of_match_table = of_match_ptr(ak8974_of_match),
 	},
 	.probe	  = ak8974_probe,
-	.remove	  = __exit_p(ak8974_remove),
+	.remove	  = ak8974_remove,
 	.id_table = ak8974_id,
 };
 module_i2c_driver(ak8974_driver);

From c1ae3cfa0e89fa1a7ecc4c99031f5e9ae99d9201 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 5 Mar 2017 12:59:56 -0800
Subject: [PATCH 0747/1911] Linux 4.11-rc1

---
 Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index 4cb6b0a1152b5f..165cf9783a5dbb 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 4
-PATCHLEVEL = 10
+PATCHLEVEL = 11
 SUBLEVEL = 0
-EXTRAVERSION =
+EXTRAVERSION = -rc1
 NAME = Fearless Coyote
 
 # *DOCUMENTATION*

From 9e10889a3177340dcda7d29c6d8fbd97247b007b Mon Sep 17 00:00:00 2001
From: Romain Izard <romain.izard.pro@gmail.com>
Date: Fri, 17 Feb 2017 16:12:50 +0100
Subject: [PATCH 0748/1911] Revert "ARM: at91/dt: sama5d2: Use new compatible
 for ohci node"

This reverts commit cab43282682e ("ARM: at91/dt: sama5d2: Use new
compatible for ohci node")

It depends from commit 7150bc9b4d43 ("usb: ohci-at91: Forcibly suspend
ports while USB suspend") which was reverted and implemented
differently. With the new implementation, the compatible string must
remain the same.

The compatible string introduced by this commit has been used in the
default SAMA5D2 dtsi starting from Linux 4.8. As it has never been
working correctly in an official release, removing it should not be
breaking the stability rules.

Fixes: cab43282682e ("ARM: at91/dt: sama5d2: Use new compatible for ohci node")
Signed-off-by: Romain Izard <romain.izard.pro@gmail.com>
cc: <stable@vger.kernel.org>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 arch/arm/boot/dts/sama5d2.dtsi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/sama5d2.dtsi b/arch/arm/boot/dts/sama5d2.dtsi
index 22332be7214032..528b4e9c6d3d30 100644
--- a/arch/arm/boot/dts/sama5d2.dtsi
+++ b/arch/arm/boot/dts/sama5d2.dtsi
@@ -266,7 +266,7 @@
 		};
 
 		usb1: ohci@00400000 {
-			compatible = "atmel,sama5d2-ohci", "usb-ohci";
+			compatible = "atmel,at91rm9200-ohci", "usb-ohci";
 			reg = <0x00400000 0x100000>;
 			interrupts = <41 IRQ_TYPE_LEVEL_HIGH 2>;
 			clocks = <&uhphs_clk>, <&uhphs_clk>, <&uhpck>;

From 6c4f0fa643cb9e775dcc976e3db00d649468ff1d Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Thu, 2 Mar 2017 14:03:20 +0530
Subject: [PATCH 0749/1911] cpufreq: schedutil: move cached_raw_freq to struct
 sugov_policy

cached_raw_freq applies to the entire cpufreq policy and not individual
CPUs. Apart from wasting per-cpu memory, it is actually wrong to keep it
in struct sugov_cpu as we may end up comparing next_freq with a stale
cached_raw_freq of a random CPU.

Move cached_raw_freq to struct sugov_policy.

Fixes: 5cbea46984d6 (cpufreq: schedutil: map raw required frequency to driver frequency)
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 kernel/sched/cpufreq_schedutil.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 8f8de3d4d6b7a3..3ab2aeaec6ecac 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -36,6 +36,7 @@ struct sugov_policy {
 	u64 last_freq_update_time;
 	s64 freq_update_delay_ns;
 	unsigned int next_freq;
+	unsigned int cached_raw_freq;
 
 	/* The next fields are only needed if fast switch cannot be used. */
 	struct irq_work irq_work;
@@ -52,7 +53,6 @@ struct sugov_cpu {
 	struct update_util_data update_util;
 	struct sugov_policy *sg_policy;
 
-	unsigned int cached_raw_freq;
 	unsigned long iowait_boost;
 	unsigned long iowait_boost_max;
 	u64 last_update;
@@ -146,9 +146,9 @@ static unsigned int get_next_freq(struct sugov_cpu *sg_cpu, unsigned long util,
 
 	freq = (freq + (freq >> 2)) * util / max;
 
-	if (freq == sg_cpu->cached_raw_freq && sg_policy->next_freq != UINT_MAX)
+	if (freq == sg_policy->cached_raw_freq && sg_policy->next_freq != UINT_MAX)
 		return sg_policy->next_freq;
-	sg_cpu->cached_raw_freq = freq;
+	sg_policy->cached_raw_freq = freq;
 	return cpufreq_driver_resolve_freq(policy, freq);
 }
 
@@ -580,6 +580,7 @@ static int sugov_start(struct cpufreq_policy *policy)
 	sg_policy->next_freq = UINT_MAX;
 	sg_policy->work_in_progress = false;
 	sg_policy->need_freq_update = false;
+	sg_policy->cached_raw_freq = 0;
 
 	for_each_cpu(cpu, policy->cpus) {
 		struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu);
@@ -590,7 +591,6 @@ static int sugov_start(struct cpufreq_policy *policy)
 			sg_cpu->max = 0;
 			sg_cpu->flags = SCHED_CPUFREQ_RT;
 			sg_cpu->last_update = 0;
-			sg_cpu->cached_raw_freq = 0;
 			sg_cpu->iowait_boost = 0;
 			sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq;
 			cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util,

From 655cb1ebff4b7918fc560502c3297af2d3c7d114 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Thu, 2 Mar 2017 14:03:21 +0530
Subject: [PATCH 0750/1911] cpufreq: schedutil: Pass sg_policy to
 get_next_freq()

get_next_freq() uses sg_cpu only to get sg_policy, which the callers of
get_next_freq() already have. Pass sg_policy instead of sg_cpu to
get_next_freq(), to make it more efficient.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 kernel/sched/cpufreq_schedutil.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 3ab2aeaec6ecac..cd7cd489f73981 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -116,7 +116,7 @@ static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time,
 
 /**
  * get_next_freq - Compute a new frequency for a given cpufreq policy.
- * @sg_cpu: schedutil cpu object to compute the new frequency for.
+ * @sg_policy: schedutil policy object to compute the new frequency for.
  * @util: Current CPU utilization.
  * @max: CPU capacity.
  *
@@ -136,10 +136,9 @@ static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time,
  * next_freq (as calculated above) is returned, subject to policy min/max and
  * cpufreq driver limitations.
  */
-static unsigned int get_next_freq(struct sugov_cpu *sg_cpu, unsigned long util,
-				  unsigned long max)
+static unsigned int get_next_freq(struct sugov_policy *sg_policy,
+				  unsigned long util, unsigned long max)
 {
-	struct sugov_policy *sg_policy = sg_cpu->sg_policy;
 	struct cpufreq_policy *policy = sg_policy->policy;
 	unsigned int freq = arch_scale_freq_invariant() ?
 				policy->cpuinfo.max_freq : policy->cur;
@@ -213,7 +212,7 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
 	} else {
 		sugov_get_util(&util, &max);
 		sugov_iowait_boost(sg_cpu, &util, &max);
-		next_f = get_next_freq(sg_cpu, util, max);
+		next_f = get_next_freq(sg_policy, util, max);
 	}
 	sugov_update_commit(sg_policy, time, next_f);
 }
@@ -267,7 +266,7 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu,
 		sugov_iowait_boost(j_sg_cpu, &util, &max);
 	}
 
-	return get_next_freq(sg_cpu, util, max);
+	return get_next_freq(sg_policy, util, max);
 }
 
 static void sugov_update_shared(struct update_util_data *hook, u64 time,

From d82f26925599cae83c38d17d07ae982356e81318 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Tue, 28 Feb 2017 16:44:16 -0500
Subject: [PATCH 0751/1911] cpufreq: Add the "cpufreq.off=1" cmdline option

Add the "cpufreq.off=1" cmdline option.

At boot-time, this allows a user to request CONFIG_CPU_FREQ=n
behavior from a kernel built with CONFIG_CPU_FREQ=y.

This is analogous to the existing "cpuidle.off=1" option
and CONFIG_CPU_IDLE=y

This capability is valuable when we need to debug end-user
issues in the BIOS or in Linux.  It is also convenient
for enabling comparisons, which may otherwise require a new kernel,
or help from BIOS SETUP, which may be buggy or unavailable.

Signed-off-by: Len Brown <len.brown@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/admin-guide/kernel-parameters.txt | 3 +++
 drivers/cpufreq/cpufreq.c                       | 1 +
 2 files changed, 4 insertions(+)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index be7c0d9506b120..3988d2311f9707 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -662,6 +662,9 @@
 	cpuidle.off=1	[CPU_IDLE]
 			disable the cpuidle sub-system
 
+	cpufreq.off=1	[CPU_FREQ]
+			disable the cpufreq sub-system
+
 	cpu_init_udelay=N
 			[X86] Delay for N microsec between assert and de-assert
 			of APIC INIT to start processors.  This delay occurs
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 80a785ad17e81c..7790db2645d7ca 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -2532,4 +2532,5 @@ static int __init cpufreq_core_init(void)
 
 	return 0;
 }
+module_param(off, int, 0444);
 core_initcall(cpufreq_core_init);

From cd59b4bed9d11a2aefc4bb44eed9de0e6c1eea06 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 1 Mar 2017 00:07:36 +0100
Subject: [PATCH 0752/1911] cpufreq: intel_pstate: Fix global settings in
 active mode

Commit 111b8b3fe4fa (cpufreq: intel_pstate: Always keep all
limits settings in sync) changed intel_pstate to invoke
cpufreq_update_policy() for every registered CPU on global sysfs
attributes updates, but that led to undesirable effects in the
active mode if the "performance" P-state selection algorithm is
configufred for one CPU and the "powersave" one is chosen for
all of the other CPUs.

Namely, in that case, the following is possible:

 # cd /sys/devices/system/cpu/
 # cat intel_pstate/max_perf_pct
 100
 # cat intel_pstate/min_perf_pct
 26
 # echo performance > cpufreq/policy0/scaling_governor
 # cat intel_pstate/max_perf_pct
 100
 # cat intel_pstate/min_perf_pct
 100
 # echo 94 > intel_pstate/min_perf_pct
 # cat intel_pstate/min_perf_pct
 26

The reason why this happens is because intel_pstate attempts to
maintain two sets of global limits in the active mode, one for
the "performance" P-state selection algorithm and one for the
"powersave"  P-state selection algorithm, but the P-state selection
algorithms are set per policy, so the global limits cannot reflect
all of them at the same time if they are different for different
policies.

In the particular situation above, the attempt to change
min_perf_pct to 94 caused cpufreq_update_policy() to be run
for a CPU with the "powersave"  P-state selection algorithm
and intel_pstate_set_policy() called by it silently switched the
global limits to the "powersave" set which finally was reflected
by the sysfs interface.

To prevent that from happening, modify intel_pstate_update_policies()
to always switch back to the set of limits that was used right before
it has been invoked.

Fixes: 111b8b3fe4fa (cpufreq: intel_pstate: Always keep all limits settings in sync)
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 5e066b33259874..436a4c5ba70d96 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -973,11 +973,20 @@ static int intel_pstate_resume(struct cpufreq_policy *policy)
 }
 
 static void intel_pstate_update_policies(void)
+	__releases(&intel_pstate_limits_lock)
+	__acquires(&intel_pstate_limits_lock)
 {
+	struct perf_limits *saved_limits = limits;
 	int cpu;
 
+	mutex_unlock(&intel_pstate_limits_lock);
+
 	for_each_possible_cpu(cpu)
 		cpufreq_update_policy(cpu);
+
+	mutex_lock(&intel_pstate_limits_lock);
+
+	limits = saved_limits;
 }
 
 /************************** debugfs begin ************************/
@@ -1185,10 +1194,10 @@ static ssize_t store_no_turbo(struct kobject *a, struct attribute *b,
 
 	limits->no_turbo = clamp_t(int, input, 0, 1);
 
-	mutex_unlock(&intel_pstate_limits_lock);
-
 	intel_pstate_update_policies();
 
+	mutex_unlock(&intel_pstate_limits_lock);
+
 	mutex_unlock(&intel_pstate_driver_lock);
 
 	return count;
@@ -1222,10 +1231,10 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b,
 				   limits->max_perf_pct);
 	limits->max_perf = div_ext_fp(limits->max_perf_pct, 100);
 
-	mutex_unlock(&intel_pstate_limits_lock);
-
 	intel_pstate_update_policies();
 
+	mutex_unlock(&intel_pstate_limits_lock);
+
 	mutex_unlock(&intel_pstate_driver_lock);
 
 	return count;
@@ -1259,10 +1268,10 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,
 				   limits->min_perf_pct);
 	limits->min_perf = div_ext_fp(limits->min_perf_pct, 100);
 
-	mutex_unlock(&intel_pstate_limits_lock);
-
 	intel_pstate_update_policies();
 
+	mutex_unlock(&intel_pstate_limits_lock);
+
 	mutex_unlock(&intel_pstate_driver_lock);
 
 	return count;

From d74b19929130c9b5395a497030dcf161353cd526 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 1 Mar 2017 00:11:05 +0100
Subject: [PATCH 0753/1911] cpufreq: intel_pstate: Fix
 intel_pstate_verify_policy()

The code added to intel_pstate_verify_policy() by commit 1443ebbacfd7
(cpufreq: intel_pstate: Fix sysfs limits enforcement for performance
policy) should use perf_limits instead of limits, because otherwise
setting global limits via sysfs may affect policies inconsistently.

For example, in the sequence of shell commands below, the
scaling_min_freq attribute for policy1 and policy2 should be
affected in the same way, because scaling_governor is set in
the same way for both of them:

 # cat cpufreq/policy1/scaling_governor
 powersave
 # cat cpufreq/policy2/scaling_governor
 powersave
 # echo performance > cpufreq/policy0/scaling_governor
 # echo 94 > intel_pstate/min_perf_pct
 # cat cpufreq/policy0/scaling_min_freq
 2914000
 # cat cpufreq/policy1/scaling_min_freq
 2914000
 # cat cpufreq/policy2/scaling_min_freq
 800000

The are affected differently, because intel_pstate_verify_policy()
is invoked with limits set to &performance_limits (left behind by
policy0) for policy1 and with limits set to &powersave_limits (left
behind by policy1) for policy2.  Since perf_limits is set to the
set of limits matching the policy being updated, using it instead
of limits fixes the inconsistency.

Fixes: 1443ebbacfd7 (cpufreq: intel_pstate: Fix sysfs limits enforcement for performance policy)
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 436a4c5ba70d96..3dc546601c8815 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -2212,9 +2212,9 @@ static int intel_pstate_verify_policy(struct cpufreq_policy *policy)
 		unsigned int max_freq, min_freq;
 
 		max_freq = policy->cpuinfo.max_freq *
-						limits->max_sysfs_pct / 100;
+					perf_limits->max_sysfs_pct / 100;
 		min_freq = policy->cpuinfo.max_freq *
-						limits->min_sysfs_pct / 100;
+					perf_limits->min_sysfs_pct / 100;
 		cpufreq_verify_within_limits(policy, min_freq, max_freq);
 	}
 

From a240c4aa5d0f9c8bb0db380ab9c1ec1f68b923ad Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 2 Mar 2017 23:29:12 +0100
Subject: [PATCH 0754/1911] cpufreq: intel_pstate: Do not reinit performance
 limits in ->setpolicy

If the current P-state selection algorithm is set to "performance"
in intel_pstate_set_policy(), the limits may be initialized from
scratch, but only if no_turbo is not set and the maximum frequency
allowed for the given CPU (i.e. the policy object representing it)
is at least equal to the max frequency supported by the CPU.  In all
of the other cases, the limits will not be updated.

For example, the following can happen:

 # cat intel_pstate/status
 active
 # echo performance > cpufreq/policy0/scaling_governor
 # cat intel_pstate/min_perf_pct
 100
 # echo 94 > intel_pstate/min_perf_pct
 # cat intel_pstate/min_perf_pct
 100
 # cat cpufreq/policy0/scaling_max_freq
 3100000
 echo 3000000 > cpufreq/policy0/scaling_max_freq
 # cat intel_pstate/min_perf_pct
 94
 # echo 95 > intel_pstate/min_perf_pct
 # cat intel_pstate/min_perf_pct
 95

That is confusing for two reasons.  First, the initial attempt to
change min_perf_pct to 94 seems to have no effect, even though
setting the global limits should always work.  Second, after
changing scaling_max_freq for policy0 the global min_perf_pct
attribute shows 94, even though it should have not been affected
by that operation in principle.

Moreover, the final attempt to change min_perf_pct to 95 worked
as expected, because scaling_max_freq for the only policy with
scaling_governor equal to "performance" was different from the
maximum at that time.

To make all that confusion go away, modify intel_pstate_set_policy()
so that it doesn't reinitialize the limits at all.

At the same time, change intel_pstate_set_performance_limits() to
set min_sysfs_pct to 100 in the "performance" limits set so that
switching the P-state selection algorithm to "performance" causes
intel_pstate/min_perf_pct in sysfs to go to 100 (or whatever value
min_sysfs_pct in the "performance" limits is set to later).

That requires per-CPU limits to be initialized explicitly rather
than by copying the global limits to avoid setting min_sysfs_pct
in the per-CPU limits to 100.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 17 ++++-------------
 1 file changed, 4 insertions(+), 13 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 3dc546601c8815..f1f8fbe0b4c46a 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -382,6 +382,7 @@ static void intel_pstate_set_performance_limits(struct perf_limits *limits)
 	intel_pstate_init_limits(limits);
 	limits->min_perf_pct = 100;
 	limits->min_perf = int_ext_tofp(1);
+	limits->min_sysfs_pct = 100;
 }
 
 static DEFINE_MUTEX(intel_pstate_driver_lock);
@@ -2146,16 +2147,11 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
 	mutex_lock(&intel_pstate_limits_lock);
 
 	if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) {
+		pr_debug("set performance\n");
 		if (!perf_limits) {
 			limits = &performance_limits;
 			perf_limits = limits;
 		}
-		if (policy->max >= policy->cpuinfo.max_freq &&
-		    !limits->no_turbo) {
-			pr_debug("set performance\n");
-			intel_pstate_set_performance_limits(perf_limits);
-			goto out;
-		}
 	} else {
 		pr_debug("set powersave\n");
 		if (!perf_limits) {
@@ -2166,7 +2162,7 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
 	}
 
 	intel_pstate_update_perf_limits(policy, perf_limits);
- out:
+
 	if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) {
 		/*
 		 * NOHZ_FULL CPUs need this as the governor callback may not
@@ -2257,13 +2253,8 @@ static int __intel_pstate_cpu_init(struct cpufreq_policy *policy)
 
 	cpu = all_cpu_data[policy->cpu];
 
-	/*
-	 * We need sane value in the cpu->perf_limits, so inherit from global
-	 * perf_limits limits, which are seeded with values based on the
-	 * CONFIG_CPU_FREQ_DEFAULT_GOV_*, during boot up.
-	 */
 	if (per_cpu_limits)
-		memcpy(cpu->perf_limits, limits, sizeof(struct perf_limits));
+		intel_pstate_init_limits(cpu->perf_limits);
 
 	policy->min = cpu->pstate.min_pstate * cpu->pstate.scaling;
 	policy->max = cpu->pstate.turbo_pstate * cpu->pstate.scaling;

From e51b999e11b84a766d78347afe9287e2c5b91c97 Mon Sep 17 00:00:00 2001
From: Jon Mason <jon.mason@broadcom.com>
Date: Wed, 8 Feb 2017 15:37:11 -0500
Subject: [PATCH 0755/1911] ARM: dts: BCM5301X: Fix UARTs on bcm953012k

The UARTs are outputting garbage on the console.  This is due to a speed
issue.  We can simply use the clock speed (which is now defined in the
DTSI file) and everything works fine.

Signed-off-by: Jon Mason <jon.mason@broadcom.com>
Fixes: cdc36b22 ("ARM: dts: enable clock support for BCM5301X")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
---
 arch/arm/boot/dts/bcm953012k.dts | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/arm/boot/dts/bcm953012k.dts b/arch/arm/boot/dts/bcm953012k.dts
index bfd923096a8c1f..da5aa8f5ffa7f9 100644
--- a/arch/arm/boot/dts/bcm953012k.dts
+++ b/arch/arm/boot/dts/bcm953012k.dts
@@ -53,10 +53,9 @@
 };
 
 &uart0 {
-	clock-frequency = <62499840>;
+	status = "okay";
 };
 
 &uart1 {
-	clock-frequency = <62499840>;
 	status = "okay";
 };

From 88d1fa70c21d7b431386cfe70cdc514d98b0c9c4 Mon Sep 17 00:00:00 2001
From: Jon Mason <jon.mason@broadcom.com>
Date: Wed, 8 Feb 2017 15:37:12 -0500
Subject: [PATCH 0756/1911] ARM: dts: BCM5301X: Fix memory start address

Memory starts at 0x80000000, not 0.  0 "works" due to mirrior of the
first 128M of RAM to that address.  Anything greater than 128M will
quickly find nothing there.  Correcting the starting address has
everything working again.

Signed-off-by: Jon Mason <jon.mason@broadcom.com>
Fixes: 7eb05f6d ("ARM: dts: bcm5301x: Add BCM SVK DT files")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
---
 arch/arm/boot/dts/bcm953012k.dts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/bcm953012k.dts b/arch/arm/boot/dts/bcm953012k.dts
index da5aa8f5ffa7f9..ae31a5826e918e 100644
--- a/arch/arm/boot/dts/bcm953012k.dts
+++ b/arch/arm/boot/dts/bcm953012k.dts
@@ -48,7 +48,7 @@
 	};
 
 	memory {
-		reg = <0x00000000 0x10000000>;
+		reg = <0x80000000 0x10000000>;
 	};
 };
 

From 0c2bf9f95983fe30aa2f6463cb761cd42c2d521a Mon Sep 17 00:00:00 2001
From: Jon Mason <jon.mason@broadcom.com>
Date: Thu, 2 Mar 2017 19:21:32 -0500
Subject: [PATCH 0757/1911] ARM: dts: BCM5301X: Correct GIC_PPI interrupt flags
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

GIC_PPI flags were misconfigured for the timers, resulting in errors
like:
[    0.000000] GIC: PPI11 is secure or misconfigured

Changing them to being edge triggered corrects the issue

Suggested-by: Rafał Miłecki <rafal@milecki.pl>
Signed-off-by: Jon Mason <jon.mason@broadcom.com>
Fixes: d27509f1 ("ARM: BCM5301X: add dts files for BCM4708 SoC")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
---
 arch/arm/boot/dts/bcm5301x.dtsi | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/boot/dts/bcm5301x.dtsi b/arch/arm/boot/dts/bcm5301x.dtsi
index 4fbb089cf5ad3c..00de62dc0042f1 100644
--- a/arch/arm/boot/dts/bcm5301x.dtsi
+++ b/arch/arm/boot/dts/bcm5301x.dtsi
@@ -66,14 +66,14 @@
 		timer@20200 {
 			compatible = "arm,cortex-a9-global-timer";
 			reg = <0x20200 0x100>;
-			interrupts = <GIC_PPI 11 IRQ_TYPE_LEVEL_HIGH>;
+			interrupts = <GIC_PPI 11 IRQ_TYPE_EDGE_RISING>;
 			clocks = <&periph_clk>;
 		};
 
 		local-timer@20600 {
 			compatible = "arm,cortex-a9-twd-timer";
 			reg = <0x20600 0x100>;
-			interrupts = <GIC_PPI 13 IRQ_TYPE_LEVEL_HIGH>;
+			interrupts = <GIC_PPI 13 IRQ_TYPE_EDGE_RISING>;
 			clocks = <&periph_clk>;
 		};
 

From f38837b08d23e66de17d46d030e0d9ac5172ad1f Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@fb.com>
Date: Sun, 5 Mar 2017 09:41:08 -0800
Subject: [PATCH 0758/1911] bpf: add get_next_key callback to LPM map

map_get_next_key callback is mandatory. Supply dummy handler.

Fixes: b95a5c4db09b ("bpf: add a longest prefix match trie map implementation")
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/bpf/lpm_trie.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index 8bfe0afaee1082..b37bd9ab7f5742 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -500,9 +500,15 @@ static void trie_free(struct bpf_map *map)
 	raw_spin_unlock(&trie->lock);
 }
 
+static int trie_get_next_key(struct bpf_map *map, void *key, void *next_key)
+{
+	return -ENOTSUPP;
+}
+
 static const struct bpf_map_ops trie_ops = {
 	.map_alloc = trie_alloc,
 	.map_free = trie_free,
+	.map_get_next_key = trie_get_next_key,
 	.map_lookup_elem = trie_lookup_elem,
 	.map_update_elem = trie_update_elem,
 	.map_delete_elem = trie_delete_elem,

From 0878fff1f42c18e448ab5b8b4f6a3eb32365b5b6 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Sun, 5 Mar 2017 12:34:49 -0800
Subject: [PATCH 0759/1911] net: phy: Do not perform software reset for Generic
 PHY

The Generic PHY driver is a catch-all PHY driver and it should preserve
whatever prior initialization has been done by boot loader or firmware
agents. For specific PHY device configuration it is expected that a
specialized PHY driver would take over that role.

Resetting the generic PHY was a bad idea that has lead to several
complaints and downstream workarounds e.g: in OpenWrt/LEDE so restore
the behavior prior to 87aa9f9c61ad ("net: phy: consolidate PHY
reset in phy_init_hw()").

Reported-by: Felix Fietkau <nbd@nbd.name>
Fixes: 87aa9f9c61ad ("net: phy: consolidate PHY reset in phy_init_hw()")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy_device.c | 2 +-
 include/linux/phy.h          | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index daec6555f3b108..5198ccfa347f8b 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -1864,7 +1864,7 @@ static struct phy_driver genphy_driver[] = {
 	.phy_id		= 0xffffffff,
 	.phy_id_mask	= 0xffffffff,
 	.name		= "Generic PHY",
-	.soft_reset	= genphy_soft_reset,
+	.soft_reset	= genphy_no_soft_reset,
 	.config_init	= genphy_config_init,
 	.features	= PHY_GBIT_FEATURES | SUPPORTED_MII |
 			  SUPPORTED_AUI | SUPPORTED_FIBRE |
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 772476028a6507..43a774873aa96d 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -837,6 +837,10 @@ int genphy_read_status(struct phy_device *phydev);
 int genphy_suspend(struct phy_device *phydev);
 int genphy_resume(struct phy_device *phydev);
 int genphy_soft_reset(struct phy_device *phydev);
+static inline int genphy_no_soft_reset(struct phy_device *phydev)
+{
+	return 0;
+}
 void phy_driver_unregister(struct phy_driver *drv);
 void phy_drivers_unregister(struct phy_driver *drv, int n);
 int phy_driver_register(struct phy_driver *new_driver, struct module *owner);

From 2201ac6129fa162ac24da089a034bb0971648ebb Mon Sep 17 00:00:00 2001
From: Matthias Reichl <hias@horus.com>
Date: Mon, 20 Feb 2017 20:01:16 +0100
Subject: [PATCH 0760/1911] dmaengine: bcm2835: Fix cyclic DMA period splitting

The code responsible for splitting periods into chunks that
can be handled by the DMA controller missed to update total_len,
the number of bytes processed in the current period, when there
are more chunks to follow.

Therefore total_len was stuck at 0 and the code didn't work at all.
This resulted in a wrong control block layout and audio issues because
the cyclic DMA callback wasn't executing on period boundaries.

Fix this by adding the missing total_len update.

Signed-off-by: Matthias Reichl <hias@horus.com>
Signed-off-by: Martin Sperl <kernel@martin.sperl.org>
Tested-by: Clive Messer <clive.messer@digitaldreamtime.co.uk>
Reviewed-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/bcm2835-dma.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/dma/bcm2835-dma.c b/drivers/dma/bcm2835-dma.c
index e18dc596cf2447..6204cc32d09c50 100644
--- a/drivers/dma/bcm2835-dma.c
+++ b/drivers/dma/bcm2835-dma.c
@@ -251,8 +251,11 @@ static void bcm2835_dma_create_cb_set_length(
 	 */
 
 	/* have we filled in period_length yet? */
-	if (*total_len + control_block->length < period_len)
+	if (*total_len + control_block->length < period_len) {
+		/* update number of bytes in this period so far */
+		*total_len += control_block->length;
 		return;
+	}
 
 	/* calculate the length that remains to reach period_length */
 	control_block->length = period_len - *total_len;

From 854c11e250730eaffec2ad56e9224c6be8a7979d Mon Sep 17 00:00:00 2001
From: Vlad Zakharov <Vladislav.Zakharov@synopsys.com>
Date: Fri, 3 Mar 2017 14:30:00 +0300
Subject: [PATCH 0761/1911] ARC: [dts] add input clocks for cpu nodes

ARC CPU cores are driven by core_clk so we add corresponding "clocks"
property to ARC cpu nodes.

Signed-off-by: Vlad Zakharov <vzakhar@synopsys.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/boot/dts/skeleton.dtsi        | 1 +
 arch/arc/boot/dts/skeleton_hs.dtsi     | 1 +
 arch/arc/boot/dts/skeleton_hs_idu.dtsi | 1 +
 3 files changed, 3 insertions(+)

diff --git a/arch/arc/boot/dts/skeleton.dtsi b/arch/arc/boot/dts/skeleton.dtsi
index 65808fe0a290be..2891cb266cf0b5 100644
--- a/arch/arc/boot/dts/skeleton.dtsi
+++ b/arch/arc/boot/dts/skeleton.dtsi
@@ -26,6 +26,7 @@
 			device_type = "cpu";
 			compatible = "snps,arc770d";
 			reg = <0>;
+			clocks = <&core_clk>;
 		};
 	};
 
diff --git a/arch/arc/boot/dts/skeleton_hs.dtsi b/arch/arc/boot/dts/skeleton_hs.dtsi
index 2dfe8037dfbb34..5e944d3e5b74f6 100644
--- a/arch/arc/boot/dts/skeleton_hs.dtsi
+++ b/arch/arc/boot/dts/skeleton_hs.dtsi
@@ -21,6 +21,7 @@
 			device_type = "cpu";
 			compatible = "snps,archs38";
 			reg = <0>;
+			clocks = <&core_clk>;
 		};
 	};
 
diff --git a/arch/arc/boot/dts/skeleton_hs_idu.dtsi b/arch/arc/boot/dts/skeleton_hs_idu.dtsi
index 4c11079f3565a3..662c5e020693e7 100644
--- a/arch/arc/boot/dts/skeleton_hs_idu.dtsi
+++ b/arch/arc/boot/dts/skeleton_hs_idu.dtsi
@@ -21,6 +21,7 @@
 			device_type = "cpu";
 			compatible = "snps,archs38xN";
 			reg = <0>;
+			clocks = <&core_clk>;
 		};
 	};
 

From 4ed10958ae461168be310b4bbee13f745e4c1547 Mon Sep 17 00:00:00 2001
From: Vlad Zakharov <Vladislav.Zakharov@synopsys.com>
Date: Fri, 3 Mar 2017 14:30:01 +0300
Subject: [PATCH 0762/1911] ARC: [dts] add cpu nodes to ARCHS SMP device tree

Trying to get clock for CPU cores on SMP systems I found that I was only
able to get clock for core[0]. That was because only one cpu@0 node was
represented in ARC HS device tree and it was impossible to get clock for
"non-existing" cores.

So as ARC HS may have up to 4 cores we update device tree to match
maximum possible cores quantity.

Signed-off-by: Vlad Zakharov <vzakhar@synopsys.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/boot/dts/skeleton_hs_idu.dtsi | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/arch/arc/boot/dts/skeleton_hs_idu.dtsi b/arch/arc/boot/dts/skeleton_hs_idu.dtsi
index 662c5e020693e7..54b277d7dea0e4 100644
--- a/arch/arc/boot/dts/skeleton_hs_idu.dtsi
+++ b/arch/arc/boot/dts/skeleton_hs_idu.dtsi
@@ -19,10 +19,28 @@
 
 		cpu@0 {
 			device_type = "cpu";
-			compatible = "snps,archs38xN";
+			compatible = "snps,archs38";
 			reg = <0>;
 			clocks = <&core_clk>;
 		};
+		cpu@1 {
+			device_type = "cpu";
+			compatible = "snps,archs38";
+			reg = <1>;
+			clocks = <&core_clk>;
+		};
+		cpu@2 {
+			device_type = "cpu";
+			compatible = "snps,archs38";
+			reg = <2>;
+			clocks = <&core_clk>;
+		};
+		cpu@3 {
+			device_type = "cpu";
+			compatible = "snps,archs38";
+			reg = <3>;
+			clocks = <&core_clk>;
+		};
 	};
 
 	/* TIMER0 with interrupt for clockevent */

From 7f35144cea219104fe42e7c6cd0ee5103016da2e Mon Sep 17 00:00:00 2001
From: Vlad Zakharov <Vladislav.Zakharov@synopsys.com>
Date: Fri, 3 Mar 2017 14:30:02 +0300
Subject: [PATCH 0763/1911] ARC: get rate from clk driver instead of reading
 device tree

We were reading clock rate directly from device tree "clock-frequency"
property of corresponding clock node in show_cpuinfo function.

Such approach is correct only in case cpu is always clocked by
"fixed-clock". If we use clock driver that allows rate to be changed
this won't work as rate may change during the time or even
"clock-frequency" property may not be presented at all.

So this commit replaces reading device tree with getting rate from clock
driver. This approach is much more flexible and will work for both fixed
and mutable clocks.

Signed-off-by: Vlad Zakharov <vzakhar@synopsys.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/kernel/setup.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c
index 3093fa898a236a..fa62404ba58f77 100644
--- a/arch/arc/kernel/setup.c
+++ b/arch/arc/kernel/setup.c
@@ -10,6 +10,7 @@
 #include <linux/fs.h>
 #include <linux/delay.h>
 #include <linux/root_dev.h>
+#include <linux/clk.h>
 #include <linux/clk-provider.h>
 #include <linux/clocksource.h>
 #include <linux/console.h>
@@ -488,8 +489,9 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 {
 	char *str;
 	int cpu_id = ptr_to_cpu(v);
-	struct device_node *core_clk = of_find_node_by_name(NULL, "core_clk");
-	u32 freq = 0;
+	struct device *cpu_dev = get_cpu_device(cpu_id);
+	struct clk *cpu_clk;
+	unsigned long freq = 0;
 
 	if (!cpu_online(cpu_id)) {
 		seq_printf(m, "processor [%d]\t: Offline\n", cpu_id);
@@ -502,9 +504,15 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 
 	seq_printf(m, arc_cpu_mumbojumbo(cpu_id, str, PAGE_SIZE));
 
-	of_property_read_u32(core_clk, "clock-frequency", &freq);
+	cpu_clk = clk_get(cpu_dev, NULL);
+	if (IS_ERR(cpu_clk)) {
+		seq_printf(m, "CPU speed \t: Cannot get clock for processor [%d]\n",
+			   cpu_id);
+	} else {
+		freq = clk_get_rate(cpu_clk);
+	}
 	if (freq)
-		seq_printf(m, "CPU speed\t: %u.%02u Mhz\n",
+		seq_printf(m, "CPU speed\t: %lu.%02lu Mhz\n",
 			   freq / 1000000, (freq / 10000) % 100);
 
 	seq_printf(m, "Bogo MIPS\t: %lu.%02lu\n",

From ac8616e4c81dded650dfade49a7da283565d37ce Mon Sep 17 00:00:00 2001
From: Chen-Yu Tsai <wens@csie.org>
Date: Tue, 14 Feb 2017 11:35:22 +0800
Subject: [PATCH 0764/1911] clk: sunxi-ng: mp: Adjust parent rate for
 pre-dividers

The MP style clocks support an mux with pre-dividers. While the driver
correctly accounted for them in the .determine_rate callback, it did
not in the .recalc_rate and .set_rate callbacks.

This means when calculating the factors in the .set_rate callback, they
would be off by a factor of the active pre-divider. Same goes for
reading back the clock rate after it is set.

Cc: stable@vger.kernel.org
Fixes: 2ab836db5097 ("clk: sunxi-ng: Add M-P factor clock support")
Signed-off-by: Chen-Yu Tsai <wens@csie.org>
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
---
 drivers/clk/sunxi-ng/ccu_mp.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/clk/sunxi-ng/ccu_mp.c b/drivers/clk/sunxi-ng/ccu_mp.c
index 22c2ca7a2a221c..b583f186a804df 100644
--- a/drivers/clk/sunxi-ng/ccu_mp.c
+++ b/drivers/clk/sunxi-ng/ccu_mp.c
@@ -85,6 +85,10 @@ static unsigned long ccu_mp_recalc_rate(struct clk_hw *hw,
 	unsigned int m, p;
 	u32 reg;
 
+	/* Adjust parent_rate according to pre-dividers */
+	ccu_mux_helper_adjust_parent_for_prediv(&cmp->common, &cmp->mux,
+						-1, &parent_rate);
+
 	reg = readl(cmp->common.base + cmp->common.reg);
 
 	m = reg >> cmp->m.shift;
@@ -117,6 +121,10 @@ static int ccu_mp_set_rate(struct clk_hw *hw, unsigned long rate,
 	unsigned int m, p;
 	u32 reg;
 
+	/* Adjust parent_rate according to pre-dividers */
+	ccu_mux_helper_adjust_parent_for_prediv(&cmp->common, &cmp->mux,
+						-1, &parent_rate);
+
 	max_m = cmp->m.max ?: 1 << cmp->m.width;
 	max_p = cmp->p.max ?: 1 << ((1 << cmp->p.width) - 1);
 

From 69c9ae5041309553472ee798d7683be31bcdc434 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 14 Feb 2017 22:29:45 +0100
Subject: [PATCH 0765/1911] clk: sunxi: ccu-sun5i needs nkmp

A randconfig build ran into this rare link error:

drivers/clk/sunxi-ng/ccu-sun5i.o:(.data.__compound_literal.1+0x4): undefined reference to `ccu_nkmp_ops'
drivers/clk/sunxi-ng/ccu-sun5i.o:(.data.__compound_literal.7+0x4): undefined reference to `ccu_nkmp_ops'

This adds the missing 'select'.

Fixes: 5e73761786d6 ("clk: sunxi-ng: Add sun5i CCU driver")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
---
 drivers/clk/sunxi-ng/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/clk/sunxi-ng/Kconfig b/drivers/clk/sunxi-ng/Kconfig
index 695bbf9ef428f9..72109d2cf41b29 100644
--- a/drivers/clk/sunxi-ng/Kconfig
+++ b/drivers/clk/sunxi-ng/Kconfig
@@ -80,6 +80,7 @@ config SUN6I_A31_CCU
 	select SUNXI_CCU_DIV
 	select SUNXI_CCU_NK
 	select SUNXI_CCU_NKM
+	select SUNXI_CCU_NKMP
 	select SUNXI_CCU_NM
 	select SUNXI_CCU_MP
 	select SUNXI_CCU_PHASE

From 9ad0bb39fce319d7b92c17d306ed0a9f70a02e7d Mon Sep 17 00:00:00 2001
From: Chen-Yu Tsai <wens@csie.org>
Date: Tue, 14 Feb 2017 10:23:32 +0800
Subject: [PATCH 0766/1911] clk: sunxi-ng: sun6i: Fix enable bit offset for
 hdmi-ddc module clock

The enable bit offset for the hdmi-ddc module clock is wrong. It is
pointing to the main hdmi module clock enable bit.

Reported-by: Bob Ham <rah@settrans.net>
Fixes: c6e6c96d8fa6 ("clk: sunxi-ng: Add A31/A31s clocks")
Cc: stable@vger.kernel.org # 4.9.x-
Signed-off-by: Chen-Yu Tsai <wens@csie.org>
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
---
 drivers/clk/sunxi-ng/ccu-sun6i-a31.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clk/sunxi-ng/ccu-sun6i-a31.c b/drivers/clk/sunxi-ng/ccu-sun6i-a31.c
index 4c9a920ff4ab7c..89e68d29bf456a 100644
--- a/drivers/clk/sunxi-ng/ccu-sun6i-a31.c
+++ b/drivers/clk/sunxi-ng/ccu-sun6i-a31.c
@@ -608,7 +608,7 @@ static SUNXI_CCU_M_WITH_MUX_GATE(hdmi_clk, "hdmi", lcd_ch1_parents,
 				 0x150, 0, 4, 24, 2, BIT(31),
 				 CLK_SET_RATE_PARENT);
 
-static SUNXI_CCU_GATE(hdmi_ddc_clk, "hdmi-ddc", "osc24M", 0x150, BIT(31), 0);
+static SUNXI_CCU_GATE(hdmi_ddc_clk, "hdmi-ddc", "osc24M", 0x150, BIT(30), 0);
 
 static SUNXI_CCU_GATE(ps_clk, "ps", "lcd1-ch1", 0x140, BIT(31), 0);
 

From 39319f504b5d91e853f5ec7753a56e43915fcaf4 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@free-electrons.com>
Date: Wed, 30 Nov 2016 12:05:03 +0100
Subject: [PATCH 0767/1911] ARM: sun8i: Fix the mali clock rate

The Mali clock rate was improperly assumed to be 408MHz, while it was
really 384Mhz, 408MHz being the "extreme" frequency, and definitely not
stable.

Switch for the stable, correct frequency for the GPU.

Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
---
 arch/arm/boot/dts/sun8i-a23-a33.dtsi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/sun8i-a23-a33.dtsi b/arch/arm/boot/dts/sun8i-a23-a33.dtsi
index a952cc0703cc17..8a3ed21cb7bcfc 100644
--- a/arch/arm/boot/dts/sun8i-a23-a33.dtsi
+++ b/arch/arm/boot/dts/sun8i-a23-a33.dtsi
@@ -495,7 +495,7 @@
 			resets = <&ccu RST_BUS_GPU>;
 
 			assigned-clocks = <&ccu CLK_GPU>;
-			assigned-clock-rates = <408000000>;
+			assigned-clock-rates = <384000000>;
 		};
 
 		gic: interrupt-controller@01c81000 {

From 36fc579761b50784b63dafd0f2e796b659e0f5ee Mon Sep 17 00:00:00 2001
From: Tomeu Vizoso <tomeu.vizoso@collabora.com>
Date: Mon, 20 Feb 2017 16:25:45 +0100
Subject: [PATCH 0768/1911] drm/edid: Add EDID_QUIRK_FORCE_8BPC quirk for Rotel
 RSX-1058
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Rotel RSX-1058 is a receiver with 4 HDMI inputs and a HDMI output, all
1.1.

When a sink that supports deep color is connected to the output, the
receiver will send EDIDs that advertise this capability, even if it
isn't possible with HDMI versions earlier than 1.3.

Currently the kernel is assuming that deep color is possible and the
sink displays an error.

This quirk will make sure that deep color isn't used with this
particular receiver.

Fixes: 7a0baa623446 ("Revert "drm/i915: Disable 12bpc hdmi for now"")
Signed-off-by: Tomeu Vizoso <tomeu.vizoso@collabora.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170220152545.13153-1-tomeu.vizoso@collabora.com
Cc: stable@vger.kernel.org
Cc: Matt Horan <matt@matthoran.com>
Tested-by: Matt Horan <matt@matthoran.com>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99869
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
---
 drivers/gpu/drm/drm_edid.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index c8baab9bee0d05..ba58f1b11d1e16 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -148,6 +148,9 @@ static const struct edid_quirk {
 
 	/* Panel in Samsung NP700G7A-S01PL notebook reports 6bpc */
 	{ "SEC", 0xd033, EDID_QUIRK_FORCE_8BPC },
+
+	/* Rotel RSX-1058 forwards sink's EDID but only does HDMI 1.1*/
+	{ "ETR", 13896, EDID_QUIRK_FORCE_8BPC },
 };
 
 /*

From 7369090a9fb57c3fc705ce355d2e4523a5a24716 Mon Sep 17 00:00:00 2001
From: Felipe Balbi <felipe.balbi@linux.intel.com>
Date: Tue, 31 Jan 2017 13:24:54 +0200
Subject: [PATCH 0769/1911] usb: dwc3: gadget: make Set Endpoint Configuration
 macros safe

Some gadget drivers are bad, bad boys. We notice
that ADB was passing bad Burst Size which caused top
bits of param0 to be overwritten which confused DWC3
when running this command.

In order to avoid future issues, we're going to make
sure values passed by macros are always safe for the
controller. Note that ADB still needs a fix to *not*
pass bad values.

Cc: <stable@vger.kernel.org> # v3.2+
Reported-by: Mohamed Abbas <mohamed.abbas@intel.com>
Sugested-by: Adam Andruszak <adam.andruszak@intel.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/dwc3/gadget.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/usb/dwc3/gadget.h b/drivers/usb/dwc3/gadget.h
index 3129bcf74d7d8d..265e223ab64554 100644
--- a/drivers/usb/dwc3/gadget.h
+++ b/drivers/usb/dwc3/gadget.h
@@ -28,23 +28,23 @@ struct dwc3;
 #define gadget_to_dwc(g)	(container_of(g, struct dwc3, gadget))
 
 /* DEPCFG parameter 1 */
-#define DWC3_DEPCFG_INT_NUM(n)		((n) << 0)
+#define DWC3_DEPCFG_INT_NUM(n)		(((n) & 0x1f) << 0)
 #define DWC3_DEPCFG_XFER_COMPLETE_EN	(1 << 8)
 #define DWC3_DEPCFG_XFER_IN_PROGRESS_EN	(1 << 9)
 #define DWC3_DEPCFG_XFER_NOT_READY_EN	(1 << 10)
 #define DWC3_DEPCFG_FIFO_ERROR_EN	(1 << 11)
 #define DWC3_DEPCFG_STREAM_EVENT_EN	(1 << 13)
-#define DWC3_DEPCFG_BINTERVAL_M1(n)	((n) << 16)
+#define DWC3_DEPCFG_BINTERVAL_M1(n)	(((n) & 0xff) << 16)
 #define DWC3_DEPCFG_STREAM_CAPABLE	(1 << 24)
-#define DWC3_DEPCFG_EP_NUMBER(n)	((n) << 25)
+#define DWC3_DEPCFG_EP_NUMBER(n)	(((n) & 0x1f) << 25)
 #define DWC3_DEPCFG_BULK_BASED		(1 << 30)
 #define DWC3_DEPCFG_FIFO_BASED		(1 << 31)
 
 /* DEPCFG parameter 0 */
-#define DWC3_DEPCFG_EP_TYPE(n)		((n) << 1)
-#define DWC3_DEPCFG_MAX_PACKET_SIZE(n)	((n) << 3)
-#define DWC3_DEPCFG_FIFO_NUMBER(n)	((n) << 17)
-#define DWC3_DEPCFG_BURST_SIZE(n)	((n) << 22)
+#define DWC3_DEPCFG_EP_TYPE(n)		(((n) & 0x3) << 1)
+#define DWC3_DEPCFG_MAX_PACKET_SIZE(n)	(((n) & 0x7ff) << 3)
+#define DWC3_DEPCFG_FIFO_NUMBER(n)	(((n) & 0x1f) << 17)
+#define DWC3_DEPCFG_BURST_SIZE(n)	(((n) & 0xf) << 22)
 #define DWC3_DEPCFG_DATA_SEQ_NUM(n)	((n) << 26)
 /* This applies for core versions earlier than 1.94a */
 #define DWC3_DEPCFG_IGN_SEQ_NUM		(1 << 31)

From 2bfa0719ac2a9b2f3c91345873d3cdebd0296ba9 Mon Sep 17 00:00:00 2001
From: Felipe Balbi <felipe.balbi@linux.intel.com>
Date: Tue, 31 Jan 2017 14:54:45 +0200
Subject: [PATCH 0770/1911] usb: gadget: function: f_fs: pass companion
 descriptor along

If we're dealing with SuperSpeed endpoints, we need
to make sure to pass along the companion descriptor
and initialize fields needed by the Gadget
API. Eventually, f_fs.c should be converted to use
config_ep_by_speed() like all other functions,
though.

Cc: <stable@vger.kernel.org>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/gadget/function/f_fs.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index a5b7cd6156987a..7e976f00cb0277 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -1834,11 +1834,14 @@ static int ffs_func_eps_enable(struct ffs_function *func)
 	spin_lock_irqsave(&func->ffs->eps_lock, flags);
 	while(count--) {
 		struct usb_endpoint_descriptor *ds;
+		struct usb_ss_ep_comp_descriptor *comp_desc = NULL;
+		int needs_comp_desc = false;
 		int desc_idx;
 
-		if (ffs->gadget->speed == USB_SPEED_SUPER)
+		if (ffs->gadget->speed == USB_SPEED_SUPER) {
 			desc_idx = 2;
-		else if (ffs->gadget->speed == USB_SPEED_HIGH)
+			needs_comp_desc = true;
+		} else if (ffs->gadget->speed == USB_SPEED_HIGH)
 			desc_idx = 1;
 		else
 			desc_idx = 0;
@@ -1855,6 +1858,14 @@ static int ffs_func_eps_enable(struct ffs_function *func)
 
 		ep->ep->driver_data = ep;
 		ep->ep->desc = ds;
+
+		comp_desc = (struct usb_ss_ep_comp_descriptor *)(ds +
+				USB_DT_ENDPOINT_SIZE);
+		ep->ep->maxburst = comp_desc->bMaxBurst + 1;
+
+		if (needs_comp_desc)
+			ep->ep->comp_desc = comp_desc;
+
 		ret = usb_ep_enable(ep->ep);
 		if (likely(!ret)) {
 			epfile->ep = ep;

From cf3113d893d4427b166ec8695460efa7aa660923 Mon Sep 17 00:00:00 2001
From: Felipe Balbi <felipe.balbi@linux.intel.com>
Date: Fri, 17 Feb 2017 11:12:44 +0200
Subject: [PATCH 0771/1911] usb: dwc3: gadget: properly increment dequeue
 pointer on ep_dequeue

If request was already started, this means we had to
stop the transfer. With that we also need to ignore
all TRBs used by the request, however TRBs can only
be modified after completion of END_TRANSFER
command. So what we have to do here is wait for
END_TRANSFER completion and only after that jump
over TRBs by clearing HWO and incrementing dequeue
pointer.

Note that we have 2 possible types of transfers
here:

i) Linear buffer request
ii) SG-list based request

SG-list based requests will have r->num_pending_sgs
set to a valid number (> 0). Linear requests,
normally use a single TRB.

For each of these two cases, if r->unaligned flag is
set, one extra TRB has been used to align transfer
size to wMaxPacketSize.

All of these cases need to be taken into
consideration so we don't mess up our TRB ring
pointers.

Tested-by: Janusz Dziedzic <januszx.dziedzic@intel.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/dwc3/gadget.c | 63 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 63 insertions(+)

diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 4db97ecae8859b..f875b3f4b0ec38 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -1342,6 +1342,68 @@ static int dwc3_gadget_ep_dequeue(struct usb_ep *ep,
 		if (r == req) {
 			/* wait until it is processed */
 			dwc3_stop_active_transfer(dwc, dep->number, true);
+
+			/*
+			 * If request was already started, this means we had to
+			 * stop the transfer. With that we also need to ignore
+			 * all TRBs used by the request, however TRBs can only
+			 * be modified after completion of END_TRANSFER
+			 * command. So what we do here is that we wait for
+			 * END_TRANSFER completion and only after that, we jump
+			 * over TRBs by clearing HWO and incrementing dequeue
+			 * pointer.
+			 *
+			 * Note that we have 2 possible types of transfers here:
+			 *
+			 * i) Linear buffer request
+			 * ii) SG-list based request
+			 *
+			 * SG-list based requests will have r->num_pending_sgs
+			 * set to a valid number (> 0). Linear requests,
+			 * normally use a single TRB.
+			 *
+			 * For each of these two cases, if r->unaligned flag is
+			 * set, one extra TRB has been used to align transfer
+			 * size to wMaxPacketSize.
+			 *
+			 * All of these cases need to be taken into
+			 * consideration so we don't mess up our TRB ring
+			 * pointers.
+			 */
+			wait_event_lock_irq(dep->wait_end_transfer,
+					!(dep->flags & DWC3_EP_END_TRANSFER_PENDING),
+					dwc->lock);
+
+			if (!r->trb)
+				goto out1;
+
+			if (r->num_pending_sgs) {
+				struct dwc3_trb *trb;
+				int i = 0;
+
+				for (i = 0; i < r->num_pending_sgs; i++) {
+					trb = r->trb + i;
+					trb->ctrl &= ~DWC3_TRB_CTRL_HWO;
+					dwc3_ep_inc_deq(dep);
+				}
+
+				if (r->unaligned) {
+					trb = r->trb + r->num_pending_sgs + 1;
+					trb->ctrl &= ~DWC3_TRB_CTRL_HWO;
+					dwc3_ep_inc_deq(dep);
+				}
+			} else {
+				struct dwc3_trb *trb = r->trb;
+
+				trb->ctrl &= ~DWC3_TRB_CTRL_HWO;
+				dwc3_ep_inc_deq(dep);
+
+				if (r->unaligned) {
+					trb = r->trb + 1;
+					trb->ctrl &= ~DWC3_TRB_CTRL_HWO;
+					dwc3_ep_inc_deq(dep);
+				}
+			}
 			goto out1;
 		}
 		dev_err(dwc->dev, "request %p was not queued to %s\n",
@@ -1352,6 +1414,7 @@ static int dwc3_gadget_ep_dequeue(struct usb_ep *ep,
 
 out1:
 	/* giveback the request */
+	dep->queued_requests--;
 	dwc3_gadget_giveback(dep, req, -ECONNRESET);
 
 out0:

From 2e46565cf622dd0534a9d8bffe152a577b48d7aa Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Fri, 24 Feb 2017 19:11:28 +0100
Subject: [PATCH 0772/1911] USB: serial: digi_acceleport: fix OOB-event
 processing

A recent change claimed to fix an off-by-one error in the OOB-port
completion handler, but instead introduced such an error. This could
specifically led to modem-status changes going unnoticed, effectively
breaking TIOCMGET.

Note that the offending commit fixes a loop-condition underflow and is
marked for stable, but should not be backported without this fix.

Reported-by: Ben Hutchings <ben@decadent.org.uk>
Fixes: 2d380889215f ("USB: serial: digi_acceleport: fix OOB data sanity
check")
Cc: stable <stable@vger.kernel.org>	# v2.6.30: 2d380889215f
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/digi_acceleport.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/serial/digi_acceleport.c b/drivers/usb/serial/digi_acceleport.c
index ab78111e09680f..6537d3ca2797d8 100644
--- a/drivers/usb/serial/digi_acceleport.c
+++ b/drivers/usb/serial/digi_acceleport.c
@@ -1500,7 +1500,7 @@ static int digi_read_oob_callback(struct urb *urb)
 		return -1;
 
 	/* handle each oob command */
-	for (i = 0; i < urb->actual_length - 4; i += 4) {
+	for (i = 0; i < urb->actual_length - 3; i += 4) {
 		opcode = buf[i];
 		line = buf[i + 1];
 		status = buf[i + 2];

From 8f1117abb408808af9cc4c948925c726bec4755a Mon Sep 17 00:00:00 2001
From: Chuanxiao Dong <chuanxiao.dong@intel.com>
Date: Mon, 6 Mar 2017 13:05:24 +0800
Subject: [PATCH 0773/1911] drm/i915/gvt: handle workload lifecycle properly

Currently i915 has a request replay mechanism which can make sure
the request can be replayed after a GPU reset. With this mechanism,
gvt should wait until the GVT request seqno passed before complete
the current workload. So that there should be a context switch interrupt
come before gvt free the workload. In this way, workload lifecylce
matches with the i915 request lifecycle. The workload can only be freed
after the request is completed.

v2: use gvt_dbg_sched instead of gvt_err to print when wait again

Signed-off-by: Chuanxiao Dong <chuanxiao.dong@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/scheduler.c | 49 +++++++++++++++++++---------
 1 file changed, 33 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index e355a82ccabd64..d3a56c9490257d 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -151,6 +151,15 @@ static int shadow_context_status_change(struct notifier_block *nb,
 	case INTEL_CONTEXT_SCHEDULE_OUT:
 		intel_gvt_restore_render_mmio(workload->vgpu,
 					      workload->ring_id);
+		/* If the status is -EINPROGRESS means this workload
+		 * doesn't meet any issue during dispatching so when
+		 * get the SCHEDULE_OUT set the status to be zero for
+		 * good. If the status is NOT -EINPROGRESS means there
+		 * is something wrong happened during dispatching and
+		 * the status should not be set to zero
+		 */
+		if (workload->status == -EINPROGRESS)
+			workload->status = 0;
 		atomic_set(&workload->shadow_ctx_active, 0);
 		break;
 	default:
@@ -362,15 +371,23 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id)
 	workload = scheduler->current_workload[ring_id];
 	vgpu = workload->vgpu;
 
-	if (!workload->status && !vgpu->resetting) {
+	/* For the workload w/ request, needs to wait for the context
+	 * switch to make sure request is completed.
+	 * For the workload w/o request, directly complete the workload.
+	 */
+	if (workload->req) {
 		wait_event(workload->shadow_ctx_status_wq,
 			   !atomic_read(&workload->shadow_ctx_active));
 
-		update_guest_context(workload);
+		i915_gem_request_put(fetch_and_zero(&workload->req));
 
-		for_each_set_bit(event, workload->pending_events,
-				 INTEL_GVT_EVENT_MAX)
-			intel_vgpu_trigger_virtual_event(vgpu, event);
+		if (!workload->status && !vgpu->resetting) {
+			update_guest_context(workload);
+
+			for_each_set_bit(event, workload->pending_events,
+					 INTEL_GVT_EVENT_MAX)
+				intel_vgpu_trigger_virtual_event(vgpu, event);
+		}
 	}
 
 	gvt_dbg_sched("ring id %d complete workload %p status %d\n",
@@ -400,7 +417,6 @@ static int workload_thread(void *priv)
 	int ring_id = p->ring_id;
 	struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
 	struct intel_vgpu_workload *workload = NULL;
-	long lret;
 	int ret;
 	bool need_force_wake = IS_SKYLAKE(gvt->dev_priv);
 	DEFINE_WAIT_FUNC(wait, woken_wake_function);
@@ -449,23 +465,24 @@ static int workload_thread(void *priv)
 
 		gvt_dbg_sched("ring id %d wait workload %p\n",
 				workload->ring_id, workload);
-
-		lret = i915_wait_request(workload->req,
+retry:
+		i915_wait_request(workload->req,
 					 0, MAX_SCHEDULE_TIMEOUT);
-		if (lret < 0) {
-			workload->status = lret;
-			gvt_err("fail to wait workload, skip\n");
-		} else {
-			workload->status = 0;
+		/* I915 has replay mechanism and a request will be replayed
+		 * if there is i915 reset. So the seqno will be updated anyway.
+		 * If the seqno is not updated yet after waiting, which means
+		 * the replay may still be in progress and we can wait again.
+		 */
+		if (!i915_gem_request_completed(workload->req)) {
+			gvt_dbg_sched("workload %p not completed, wait again\n",
+					workload);
+			goto retry;
 		}
 
 complete:
 		gvt_dbg_sched("will complete workload %p, status: %d\n",
 				workload, workload->status);
 
-		if (workload->req)
-			i915_gem_request_put(fetch_and_zero(&workload->req));
-
 		complete_current_workload(gvt, ring_id);
 
 		if (need_force_wake)

From c2e04fdab33181b53b5a2f9662b7b607b720f79f Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@intel.com>
Date: Mon, 6 Mar 2017 17:08:30 +0800
Subject: [PATCH 0774/1911] drm/i915/gvt: protect RO and Rsvd bits of virtual
 vgpu configuration space

Per PCI specification, Configuration Register has different types (RO,
RW, RW1C, Rsvd). For RO Register bits are read-only and cannot be
altered by software. For RW1C Register bits indicate status when read.
A Set bit indicates a status event which is Cleared by writing a 1b.
Writing a 0b to RW1C bits has no effect. Reserved Register is for future
implementations, and they are read-only and must return zero when read.

Current vGPU configuration write emulation just copy the value as it is.
So we haven't emulated RO, RW1C and Rsvd Registers correctly. This patch
is following the Spec to correct emulation logic. We add a function
vgpu_cfg_mem_write to wrap the access to vGPU configuration memory.
The write function uses a RW Register bitmap to avoid RO bits be
overwritten, and emulate RW1C behavior for the particular status Register.

v2:
  new = src[i] --> new = src[i] & mask (zhenyu)

Signed-off-by: Changbin Du <changbin.du@intel.com>
Cc: Xiaoguang Chen <xiaoguang.chen@intel.com>
Cc: Zhiyuan Lv <zhiyuan.lv@intel.com>
Cc: Min He <min.he@intel.com>
Reviewed-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/cfg_space.c | 54 ++++++++++++++++++++++++++--
 1 file changed, 51 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/cfg_space.c b/drivers/gpu/drm/i915/gvt/cfg_space.c
index a77e050b85a341..b7d7721e72fadd 100644
--- a/drivers/gpu/drm/i915/gvt/cfg_space.c
+++ b/drivers/gpu/drm/i915/gvt/cfg_space.c
@@ -41,6 +41,54 @@ enum {
 	INTEL_GVT_PCI_BAR_MAX,
 };
 
+/* bitmap for writable bits (RW or RW1C bits, but cannot co-exist in one
+ * byte) byte by byte in standard pci configuration space. (not the full
+ * 256 bytes.)
+ */
+static const u8 pci_cfg_space_rw_bmp[PCI_INTERRUPT_LINE + 4] = {
+	[PCI_COMMAND]		= 0xff, 0x07,
+	[PCI_STATUS]		= 0x00, 0xf9, /* the only one RW1C byte */
+	[PCI_CACHE_LINE_SIZE]	= 0xff,
+	[PCI_BASE_ADDRESS_0 ... PCI_CARDBUS_CIS - 1] = 0xff,
+	[PCI_ROM_ADDRESS]	= 0x01, 0xf8, 0xff, 0xff,
+	[PCI_INTERRUPT_LINE]	= 0xff,
+};
+
+/**
+ * vgpu_pci_cfg_mem_write - write virtual cfg space memory
+ *
+ * Use this function to write virtual cfg space memory.
+ * For standard cfg space, only RW bits can be changed,
+ * and we emulates the RW1C behavior of PCI_STATUS register.
+ */
+static void vgpu_pci_cfg_mem_write(struct intel_vgpu *vgpu, unsigned int off,
+				   u8 *src, unsigned int bytes)
+{
+	u8 *cfg_base = vgpu_cfg_space(vgpu);
+	u8 mask, new, old;
+	int i = 0;
+
+	for (; i < bytes && (off + i < sizeof(pci_cfg_space_rw_bmp)); i++) {
+		mask = pci_cfg_space_rw_bmp[off + i];
+		old = cfg_base[off + i];
+		new = src[i] & mask;
+
+		/**
+		 * The PCI_STATUS high byte has RW1C bits, here
+		 * emulates clear by writing 1 for these bits.
+		 * Writing a 0b to RW1C bits has no effect.
+		 */
+		if (off + i == PCI_STATUS + 1)
+			new = (~new & old) & mask;
+
+		cfg_base[off + i] = (old & ~mask) | new;
+	}
+
+	/* For other configuration space directly copy as it is. */
+	if (i < bytes)
+		memcpy(cfg_base + off + i, src + i, bytes - i);
+}
+
 /**
  * intel_vgpu_emulate_cfg_read - emulate vGPU configuration space read
  *
@@ -123,7 +171,7 @@ static int emulate_pci_command_write(struct intel_vgpu *vgpu,
 	u8 changed = old ^ new;
 	int ret;
 
-	memcpy(vgpu_cfg_space(vgpu) + offset, p_data, bytes);
+	vgpu_pci_cfg_mem_write(vgpu, offset, p_data, bytes);
 	if (!(changed & PCI_COMMAND_MEMORY))
 		return 0;
 
@@ -277,10 +325,10 @@ int intel_vgpu_emulate_cfg_write(struct intel_vgpu *vgpu, unsigned int offset,
 		if (ret)
 			return ret;
 
-		memcpy(vgpu_cfg_space(vgpu) + offset, p_data, bytes);
+		vgpu_pci_cfg_mem_write(vgpu, offset, p_data, bytes);
 		break;
 	default:
-		memcpy(vgpu_cfg_space(vgpu) + offset, p_data, bytes);
+		vgpu_pci_cfg_mem_write(vgpu, offset, p_data, bytes);
 		break;
 	}
 	return 0;

From 4b30eebfc35c67771b5f58d9274d3e321b72d7a8 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Fri, 3 Mar 2017 04:25:09 +0000
Subject: [PATCH 0775/1911] ASoC: rcar: avoid SSI_MODEx settings for SSI8

SSI8 is is sharing pin with SSI7, and nothing to do for SSI_MODEx.
It is special pin and it needs special settings whole system,
but we can't confirm it, because we never have SSI8 available board.

This patch fixup SSI_MODEx settings error for SSI8 on connection test,
but should be confirmed behavior on real board in the future.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Tested-by: Hiroyuki Yokoyama <hiroyuki.yokoyama.vx@renesas.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/sh/rcar/ssiu.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/sound/soc/sh/rcar/ssiu.c b/sound/soc/sh/rcar/ssiu.c
index 4e817c8a18c0bb..14fafdaf1395f9 100644
--- a/sound/soc/sh/rcar/ssiu.c
+++ b/sound/soc/sh/rcar/ssiu.c
@@ -64,7 +64,11 @@ static int rsnd_ssiu_init(struct rsnd_mod *mod,
 	mask1 = (1 << 4) | (1 << 20);	/* mask sync bit */
 	mask2 = (1 << 4);		/* mask sync bit */
 	val1  = val2  = 0;
-	if (rsnd_ssi_is_pin_sharing(io)) {
+	if (id == 8) {
+		/*
+		 * SSI8 pin is sharing with SSI7, nothing to do.
+		 */
+	} else if (rsnd_ssi_is_pin_sharing(io)) {
 		int shift = -1;
 
 		switch (id) {

From 68925176296a8b995e503349200e256674bfe5ac Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 17 Feb 2017 14:32:18 +0000
Subject: [PATCH 0776/1911] arm64: KVM: VHE: Clear HCR_TGE when invalidating
 guest TLBs

When invalidating guest TLBs, special care must be taken to
actually shoot the guest TLBs and not the host ones if we're
running on a VHE system.  This is controlled by the HCR_EL2.TGE
bit, which we forget to clear before invalidating TLBs.

Address the issue by introducing two wrappers (__tlb_switch_to_guest
and __tlb_switch_to_host) that take care of both the VTTBR_EL2
and HCR_EL2.TGE switching.

Reported-by: Tomasz Nowicki <tnowicki@caviumnetworks.com>
Tested-by: Tomasz Nowicki <tnowicki@caviumnetworks.com>
Reviewed-by: Christoffer Dall <cdall@linaro.org>
Cc: stable@vger.kernel.org
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm64/kvm/hyp/tlb.c | 64 ++++++++++++++++++++++++++++++++++------
 1 file changed, 55 insertions(+), 9 deletions(-)

diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c
index e8e7ba2bc11f93..9e1d2b75eecd60 100644
--- a/arch/arm64/kvm/hyp/tlb.c
+++ b/arch/arm64/kvm/hyp/tlb.c
@@ -18,14 +18,62 @@
 #include <asm/kvm_hyp.h>
 #include <asm/tlbflush.h>
 
+static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm)
+{
+	u64 val;
+
+	/*
+	 * With VHE enabled, we have HCR_EL2.{E2H,TGE} = {1,1}, and
+	 * most TLB operations target EL2/EL0. In order to affect the
+	 * guest TLBs (EL1/EL0), we need to change one of these two
+	 * bits. Changing E2H is impossible (goodbye TTBR1_EL2), so
+	 * let's flip TGE before executing the TLB operation.
+	 */
+	write_sysreg(kvm->arch.vttbr, vttbr_el2);
+	val = read_sysreg(hcr_el2);
+	val &= ~HCR_TGE;
+	write_sysreg(val, hcr_el2);
+	isb();
+}
+
+static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm *kvm)
+{
+	write_sysreg(kvm->arch.vttbr, vttbr_el2);
+	isb();
+}
+
+static hyp_alternate_select(__tlb_switch_to_guest,
+			    __tlb_switch_to_guest_nvhe,
+			    __tlb_switch_to_guest_vhe,
+			    ARM64_HAS_VIRT_HOST_EXTN);
+
+static void __hyp_text __tlb_switch_to_host_vhe(struct kvm *kvm)
+{
+	/*
+	 * We're done with the TLB operation, let's restore the host's
+	 * view of HCR_EL2.
+	 */
+	write_sysreg(0, vttbr_el2);
+	write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
+}
+
+static void __hyp_text __tlb_switch_to_host_nvhe(struct kvm *kvm)
+{
+	write_sysreg(0, vttbr_el2);
+}
+
+static hyp_alternate_select(__tlb_switch_to_host,
+			    __tlb_switch_to_host_nvhe,
+			    __tlb_switch_to_host_vhe,
+			    ARM64_HAS_VIRT_HOST_EXTN);
+
 void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
 {
 	dsb(ishst);
 
 	/* Switch to requested VMID */
 	kvm = kern_hyp_va(kvm);
-	write_sysreg(kvm->arch.vttbr, vttbr_el2);
-	isb();
+	__tlb_switch_to_guest()(kvm);
 
 	/*
 	 * We could do so much better if we had the VA as well.
@@ -46,7 +94,7 @@ void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
 	dsb(ish);
 	isb();
 
-	write_sysreg(0, vttbr_el2);
+	__tlb_switch_to_host()(kvm);
 }
 
 void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm)
@@ -55,14 +103,13 @@ void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm)
 
 	/* Switch to requested VMID */
 	kvm = kern_hyp_va(kvm);
-	write_sysreg(kvm->arch.vttbr, vttbr_el2);
-	isb();
+	__tlb_switch_to_guest()(kvm);
 
 	__tlbi(vmalls12e1is);
 	dsb(ish);
 	isb();
 
-	write_sysreg(0, vttbr_el2);
+	__tlb_switch_to_host()(kvm);
 }
 
 void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu)
@@ -70,14 +117,13 @@ void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu)
 	struct kvm *kvm = kern_hyp_va(kern_hyp_va(vcpu)->kvm);
 
 	/* Switch to requested VMID */
-	write_sysreg(kvm->arch.vttbr, vttbr_el2);
-	isb();
+	__tlb_switch_to_guest()(kvm);
 
 	__tlbi(vmalle1);
 	dsb(nsh);
 	isb();
 
-	write_sysreg(0, vttbr_el2);
+	__tlb_switch_to_host()(kvm);
 }
 
 void __hyp_text __kvm_flush_vm_context(void)

From 4dfc050571523ac2bc02cbf948dd47621f7dd83f Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 21 Feb 2017 11:32:47 +0000
Subject: [PATCH 0777/1911] KVM: arm/arm64: vgic-v3: Don't pretend to support
 IRQ/FIQ bypass

Our GICv3 emulation always presents ICC_SRE_EL1 with DIB/DFB set to
zero, which implies that there is a way to bypass the GIC and
inject raw IRQ/FIQ by driving the CPU pins.

Of course, we don't allow that when the GIC is configured, but
we fail to indicate that to the guest. The obvious fix is to
set these bits (and never let them being changed again).

Reported-by: Peter Maydell <peter.maydell@linaro.org>
Acked-by: Christoffer Dall <cdall@linaro.org>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 include/linux/irqchip/arm-gic-v3.h | 2 ++
 virt/kvm/arm/vgic/vgic-v3.c        | 5 ++++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index 672cfef72fc85d..97cbca19430d82 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -373,6 +373,8 @@
 #define ICC_IGRPEN0_EL1_MASK		(1 << ICC_IGRPEN0_EL1_SHIFT)
 #define ICC_IGRPEN1_EL1_SHIFT		0
 #define ICC_IGRPEN1_EL1_MASK		(1 << ICC_IGRPEN1_EL1_SHIFT)
+#define ICC_SRE_EL1_DIB			(1U << 2)
+#define ICC_SRE_EL1_DFB			(1U << 1)
 #define ICC_SRE_EL1_SRE			(1U << 0)
 
 /*
diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
index edc6ee2dc852e9..be0f4c3e0142e0 100644
--- a/virt/kvm/arm/vgic/vgic-v3.c
+++ b/virt/kvm/arm/vgic/vgic-v3.c
@@ -229,10 +229,13 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu)
 	/*
 	 * If we are emulating a GICv3, we do it in an non-GICv2-compatible
 	 * way, so we force SRE to 1 to demonstrate this to the guest.
+	 * Also, we don't support any form of IRQ/FIQ bypass.
 	 * This goes with the spec allowing the value to be RAO/WI.
 	 */
 	if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
-		vgic_v3->vgic_sre = ICC_SRE_EL1_SRE;
+		vgic_v3->vgic_sre = (ICC_SRE_EL1_DIB |
+				     ICC_SRE_EL1_DFB |
+				     ICC_SRE_EL1_SRE);
 		vcpu->arch.vgic_cpu.pendbaser = INITIAL_PENDBASER_VALUE;
 	} else {
 		vgic_v3->vgic_sre = 0;

From 04c8f2bf9117de7b8e8bc0b90e8c4bff15f4f613 Mon Sep 17 00:00:00 2001
From: Jeeja KP <jeeja.kp@intel.com>
Date: Wed, 1 Mar 2017 22:41:23 +0530
Subject: [PATCH 0778/1911] ASoC: hdac_hdmi: avoid reference to invalid
 variable of the pin list

Using pin list array iterator outside the iteration of the list can
point to dummy element, which can be invalid. So don't use pin variable
outside the pin list iteration.

This fixes the following coccinelle warning:
sound/soc/codecs/hdac_hdmi.c:1419:5-8: ERROR: invalid reference to the
index variable of the iterator

Fixes: 2acd8309a3a4('ASoC: hdac_hdmi: Add support to handle MST capable pin')
Reported-by: Julia Lawall <Julia.Lawall@lip6.fr>
Signed-off-by: Jeeja KP <jeeja.kp@intel.com>
Acked-by: Vinod Koul <vinod.koul@intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/hdac_hdmi.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/sound/soc/codecs/hdac_hdmi.c b/sound/soc/codecs/hdac_hdmi.c
index 78fca8acd3ec0a..bb405698e1025f 100644
--- a/sound/soc/codecs/hdac_hdmi.c
+++ b/sound/soc/codecs/hdac_hdmi.c
@@ -1534,21 +1534,20 @@ static void hdac_hdmi_eld_notify_cb(void *aptr, int port, int pipe)
 			pin->mst_capable = false;
 			/* if not MST, default is port[0] */
 			hport = &pin->ports[0];
-			goto out;
 		} else {
 			for (i = 0; i < pin->num_ports; i++) {
 				pin->mst_capable = true;
 				if (pin->ports[i].id == pipe) {
 					hport = &pin->ports[i];
-					goto out;
+					break;
 				}
 			}
 		}
+
+		if (hport)
+			hdac_hdmi_present_sense(pin, hport);
 	}
 
-out:
-	if (pin && hport)
-		hdac_hdmi_present_sense(pin, hport);
 }
 
 static struct i915_audio_component_audio_ops aops = {

From 2fe42dd0f13812d38daaf05bcb1fd996afd0e87a Mon Sep 17 00:00:00 2001
From: Jeeja KP <jeeja.kp@intel.com>
Date: Wed, 1 Mar 2017 22:41:24 +0530
Subject: [PATCH 0779/1911] ASoC: hdac_hdmi: don't update the iterator in pcm
 list remove

Fix not to update the iterator element, instead use list_del to remove
entry from the list.

This fixes the following coccinelle and static checker warning:
sound/soc/codecs/hdac_hdmi.c:1884:2-21:iterator with update on line
1885
sound/soc/codecs/hdac_hdmi.c:2011 hdac_hdmi_dev_remove()
	error: potential NULL dereference 'port'.

Fixes: e0e5d3e5a53b('ASoC: hdac_hdmi: Add support for multiple ports to a PCM')
Reported-by: Julia Lawall <Julia.Lawall@lip6.fr>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Jeeja KP <jeeja.kp@intel.com>
Acked-by: Vinod Koul <vinod.koul@intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/hdac_hdmi.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/sound/soc/codecs/hdac_hdmi.c b/sound/soc/codecs/hdac_hdmi.c
index bb405698e1025f..fd272a40485b07 100644
--- a/sound/soc/codecs/hdac_hdmi.c
+++ b/sound/soc/codecs/hdac_hdmi.c
@@ -1997,7 +1997,7 @@ static int hdac_hdmi_dev_remove(struct hdac_ext_device *edev)
 	struct hdac_hdmi_pin *pin, *pin_next;
 	struct hdac_hdmi_cvt *cvt, *cvt_next;
 	struct hdac_hdmi_pcm *pcm, *pcm_next;
-	struct hdac_hdmi_port *port;
+	struct hdac_hdmi_port *port, *port_next;
 	int i;
 
 	snd_soc_unregister_codec(&edev->hdac.dev);
@@ -2007,8 +2007,9 @@ static int hdac_hdmi_dev_remove(struct hdac_ext_device *edev)
 		if (list_empty(&pcm->port_list))
 			continue;
 
-		list_for_each_entry(port, &pcm->port_list, head)
-			port = NULL;
+		list_for_each_entry_safe(port, port_next,
+					&pcm->port_list, head)
+			list_del(&port->head);
 
 		list_del(&pcm->head);
 		kfree(pcm);

From a69e2fb70350a66f91175cd2625f1e8215c5b6e9 Mon Sep 17 00:00:00 2001
From: Balbir Singh <bsingharora@gmail.com>
Date: Fri, 3 Mar 2017 11:58:44 +1100
Subject: [PATCH 0780/1911] powerpc/xics: Work around limitations of OPAL XICS
 priority handling

The CPPR (Current Processor Priority Register) of a XICS interrupt
presentation controller contains a value N, such that only interrupts
with a priority "more favoured" than N will be received by the CPU,
where "more favoured" means "less than". So if the CPPR has the value 5
then only interrupts with a priority of 0-4 inclusive will be received.

In theory the CPPR can support a value of 0 to 255 inclusive.
In practice Linux only uses values of 0, 4, 5 and 0xff. Setting the CPPR
to 0 rejects all interrupts, setting it to 0xff allows all interrupts.
The values 4 and 5 are used to differentiate IPIs from external
interrupts. Setting the CPPR to 5 allows IPIs to be received but not
external interrupts.

The CPPR emulation in the OPAL XICS implementation only directly
supports priorities 0 and 0xff. All other priorities are considered
equivalent, and mapped to a single priority value internally. This means
when using icp-opal we can not allow IPIs but not externals.

This breaks Linux's use of priority values when a CPU is hot unplugged.
After migrating IRQs away from the CPU that is being offlined, we set
the priority to 5, meaning we still want the offline CPU to receive
IPIs. But the effect of the OPAL XICS emulation's use of a single
priority value is that all interrupts are rejected by the CPU. With the
CPU offline, and not receiving IPIs, we may not be able to wake it up to
bring it back online.

The first part of the fix is in icp_opal_set_cpu_priority(). CPPR values
of 0 to 4 inclusive will correctly cause all interrupts to be rejected,
so we pass those CPPR values through to OPAL. However if we are called
with a CPPR of 5 or greater, the caller is expecting to be able to allow
IPIs but not external interrupts. We know this doesn't work, so instead
of rejecting all interrupts we choose the opposite which is to allow all
interrupts. This is still not correct behaviour, but we know for the
only existing caller (xics_migrate_irqs_away()), that it is the better
option.

The other part of the fix is in xics_migrate_irqs_away(). Instead of
setting priority (CPPR) to 0, and then back to 5 before migrating IRQs,
we migrate the IRQs before setting the priority back to 5. This should
have no effect on an ICP backend with a working set_priority(), and on
icp-opal it means we will keep all interrupts blocked until after we've
finished doing the IRQ migration. Additionally we wait for 5ms after
doing the migration to make sure there are no IRQs in flight.

Fixes: d74361881f0d ("powerpc/xics: Add ICP OPAL backend")
Cc: stable@vger.kernel.org # v4.8+
Suggested-by: Michael Ellerman <mpe@ellerman.id.au>
Reported-by: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Tested-by: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Signed-off-by: Balbir Singh <bsingharora@gmail.com>
[mpe: Rewrote comments and change log, change delay to 5ms]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/sysdev/xics/icp-opal.c    | 10 ++++++++++
 arch/powerpc/sysdev/xics/xics-common.c | 17 ++++++++++++++---
 2 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/sysdev/xics/icp-opal.c b/arch/powerpc/sysdev/xics/icp-opal.c
index f9670eabfcfa70..b53f80f0b4d822 100644
--- a/arch/powerpc/sysdev/xics/icp-opal.c
+++ b/arch/powerpc/sysdev/xics/icp-opal.c
@@ -91,6 +91,16 @@ static unsigned int icp_opal_get_irq(void)
 
 static void icp_opal_set_cpu_priority(unsigned char cppr)
 {
+	/*
+	 * Here be dragons. The caller has asked to allow only IPI's and not
+	 * external interrupts. But OPAL XIVE doesn't support that. So instead
+	 * of allowing no interrupts allow all. That's still not right, but
+	 * currently the only caller who does this is xics_migrate_irqs_away()
+	 * and it works in that case.
+	 */
+	if (cppr >= DEFAULT_PRIORITY)
+		cppr = LOWEST_PRIORITY;
+
 	xics_set_base_cppr(cppr);
 	opal_int_set_cppr(cppr);
 	iosync();
diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c
index 69d858e51ac76f..23efe4e4217221 100644
--- a/arch/powerpc/sysdev/xics/xics-common.c
+++ b/arch/powerpc/sysdev/xics/xics-common.c
@@ -20,6 +20,7 @@
 #include <linux/of.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
+#include <linux/delay.h>
 
 #include <asm/prom.h>
 #include <asm/io.h>
@@ -198,9 +199,6 @@ void xics_migrate_irqs_away(void)
 	/* Remove ourselves from the global interrupt queue */
 	xics_set_cpu_giq(xics_default_distrib_server, 0);
 
-	/* Allow IPIs again... */
-	icp_ops->set_priority(DEFAULT_PRIORITY);
-
 	for_each_irq_desc(virq, desc) {
 		struct irq_chip *chip;
 		long server;
@@ -255,6 +253,19 @@ void xics_migrate_irqs_away(void)
 unlock:
 		raw_spin_unlock_irqrestore(&desc->lock, flags);
 	}
+
+	/* Allow "sufficient" time to drop any inflight IRQ's */
+	mdelay(5);
+
+	/*
+	 * Allow IPIs again. This is done at the very end, after migrating all
+	 * interrupts, the expectation is that we'll only get woken up by an IPI
+	 * interrupt beyond this point, but leave externals masked just to be
+	 * safe. If we're using icp-opal this may actually allow all
+	 * interrupts anyway, but that should be OK.
+	 */
+	icp_ops->set_priority(DEFAULT_PRIORITY);
+
 }
 #endif /* CONFIG_HOTPLUG_CPU */
 

From 12cc9fd6b2d8ee307a735b3b9faed0d17b719463 Mon Sep 17 00:00:00 2001
From: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Date: Tue, 28 Feb 2017 17:03:47 +1100
Subject: [PATCH 0781/1911] powerpc: Parse the command line before calling CAS

On POWER9 the hypervisor requires the guest to decide whether it would
like to use a hash or radix mmu model at the time it calls
ibm,client-architecture-support (CAS) based on what the hypervisor has
said it's allowed to do. It is possible to disable radix by passing
"disable_radix" on the command line. The next patch will add support for
the new CAS format, thus we need to parse the command line before calling
CAS so we can correctly select which mmu we would like to use.

Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Reviewed-by: Paul Mackerras <paulus@ozlabs.org>
Acked-by: Balbir Singh <bsingharora@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/prom_init.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index a3944540fe0d56..bf3966d1256505 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -2993,6 +2993,11 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
 	 */
 	prom_check_initrd(r3, r4);
 
+	/*
+	 * Do early parsing of command line
+	 */
+	early_cmdline_parse();
+
 #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
 	/*
 	 * On pSeries, inform the firmware about our capabilities
@@ -3008,11 +3013,6 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
 	if (of_platform != PLATFORM_POWERMAC)
 		copy_and_flush(0, kbase, 0x100, 0);
 
-	/*
-	 * Do early parsing of command line
-	 */
-	early_cmdline_parse();
-
 	/*
 	 * Initialize memory management within prom_init
 	 */

From 014d02cbf16b3106dc8e93281d2a9c189751ed5e Mon Sep 17 00:00:00 2001
From: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Date: Tue, 28 Feb 2017 17:03:48 +1100
Subject: [PATCH 0782/1911] powerpc: Update to new option-vector-5 format for
 CAS

On POWER9 the ibm,client-architecture-support (CAS) negotiation process
has been updated to change how the host to guest negotiation is done for
the new hash/radix mmu as well as the nest mmu, process tables and guest
translation shootdown (GTSE).

This is documented in the unreleased PAPR ACR "CAS option vector
additions for P9".

The host tells the guest which options it supports in
ibm,arch-vec-5-platform-support. The guest then chooses a subset of these
to request in the CAS call and these are agreed to in the
ibm,architecture-vec-5 property of the chosen node.

Thus we read ibm,arch-vec-5-platform-support and make our selection before
calling CAS. We then parse the ibm,architecture-vec-5 property of the
chosen node to check whether we should run as hash or radix.

ibm,arch-vec-5-platform-support format:

index value pairs: <index, val> ... <index, val>

index: Option vector 5 byte number
val:   Some representation of supported values

Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Acked-by: Paul Mackerras <paulus@ozlabs.org>
[mpe: Don't print about unknown options, be consistent with OV5_FEAT]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/prom.h |  18 ++++--
 arch/powerpc/kernel/prom_init.c | 110 +++++++++++++++++++++++++++++++-
 arch/powerpc/mm/init_64.c       |  36 +++++++++--
 3 files changed, 150 insertions(+), 14 deletions(-)

diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index 4a90634e83223c..35c00d7a0cf81a 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -160,12 +160,18 @@ struct of_drconf_cell {
 #define OV5_PFO_HW_ENCR		0x1120	/* PFO Encryption Accelerator */
 #define OV5_SUB_PROCESSORS	0x1501	/* 1,2,or 4 Sub-Processors supported */
 #define OV5_XIVE_EXPLOIT	0x1701	/* XIVE exploitation supported */
-#define OV5_MMU_RADIX_300	0x1880	/* ISA v3.00 radix MMU supported */
-#define OV5_MMU_HASH_300	0x1840	/* ISA v3.00 hash MMU supported */
-#define OV5_MMU_SEGM_RADIX	0x1820	/* radix mode (no segmentation) */
-#define OV5_MMU_PROC_TBL	0x1810	/* hcall selects SLB or proc table */
-#define OV5_MMU_SLB		0x1800	/* always use SLB */
-#define OV5_MMU_GTSE		0x1808	/* Guest translation shootdown */
+/* MMU Base Architecture */
+#define OV5_MMU_SUPPORT		0x18C0	/* MMU Mode Support Mask */
+#define OV5_MMU_HASH		0x1800	/* Hash MMU Only */
+#define OV5_MMU_RADIX		0x1840	/* Radix MMU Only */
+#define OV5_MMU_EITHER		0x1880	/* Hash or Radix Supported */
+#define OV5_MMU_DYNAMIC		0x18C0	/* Hash or Radix Can Switch Later */
+#define OV5_NMMU		0x1820	/* Nest MMU Available */
+/* Hash Table Extensions */
+#define OV5_HASH_SEG_TBL	0x1980	/* In Memory Segment Tables Available */
+#define OV5_HASH_GTSE		0x1940	/* Guest Translation Shoot Down Avail */
+/* Radix Table Extensions */
+#define OV5_RADIX_GTSE		0x1A40	/* Guest Translation Shoot Down Avail */
 
 /* Option Vector 6: IBM PAPR hints */
 #define OV6_LINUX		0x02	/* Linux is our OS */
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index bf3966d1256505..1c1b44ec7642a5 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -168,6 +168,14 @@ static unsigned long __initdata prom_tce_alloc_start;
 static unsigned long __initdata prom_tce_alloc_end;
 #endif
 
+static bool __initdata prom_radix_disable;
+
+struct platform_support {
+	bool hash_mmu;
+	bool radix_mmu;
+	bool radix_gtse;
+};
+
 /* Platforms codes are now obsolete in the kernel. Now only used within this
  * file and ultimately gone too. Feel free to change them if you need, they
  * are not shared with anything outside of this file anymore
@@ -626,6 +634,12 @@ static void __init early_cmdline_parse(void)
 		prom_memory_limit = ALIGN(prom_memory_limit, 0x1000000);
 #endif
 	}
+
+	opt = strstr(prom_cmd_line, "disable_radix");
+	if (opt) {
+		prom_debug("Radix disabled from cmdline\n");
+		prom_radix_disable = true;
+	}
 }
 
 #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
@@ -695,6 +709,8 @@ struct option_vector5 {
 	u8 byte22;
 	u8 intarch;
 	u8 mmu;
+	u8 hash_ext;
+	u8 radix_ext;
 } __packed;
 
 struct option_vector6 {
@@ -850,8 +866,9 @@ struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = {
 		.reserved3 = 0,
 		.subprocessors = 1,
 		.intarch = 0,
-		.mmu = OV5_FEAT(OV5_MMU_RADIX_300) | OV5_FEAT(OV5_MMU_HASH_300) |
-			OV5_FEAT(OV5_MMU_PROC_TBL) | OV5_FEAT(OV5_MMU_GTSE),
+		.mmu = 0,
+		.hash_ext = 0,
+		.radix_ext = 0,
 	},
 
 	/* option vector 6: IBM PAPR hints */
@@ -990,6 +1007,92 @@ static int __init prom_count_smt_threads(void)
 
 }
 
+static void __init prom_parse_mmu_model(u8 val,
+					struct platform_support *support)
+{
+	switch (val) {
+	case OV5_FEAT(OV5_MMU_DYNAMIC):
+	case OV5_FEAT(OV5_MMU_EITHER): /* Either Available */
+		prom_debug("MMU - either supported\n");
+		support->radix_mmu = !prom_radix_disable;
+		support->hash_mmu = true;
+		break;
+	case OV5_FEAT(OV5_MMU_RADIX): /* Only Radix */
+		prom_debug("MMU - radix only\n");
+		if (prom_radix_disable) {
+			/*
+			 * If we __have__ to do radix, we're better off ignoring
+			 * the command line rather than not booting.
+			 */
+			prom_printf("WARNING: Ignoring cmdline option disable_radix\n");
+		}
+		support->radix_mmu = true;
+		break;
+	case OV5_FEAT(OV5_MMU_HASH):
+		prom_debug("MMU - hash only\n");
+		support->hash_mmu = true;
+		break;
+	default:
+		prom_debug("Unknown mmu support option: 0x%x\n", val);
+		break;
+	}
+}
+
+static void __init prom_parse_platform_support(u8 index, u8 val,
+					       struct platform_support *support)
+{
+	switch (index) {
+	case OV5_INDX(OV5_MMU_SUPPORT): /* MMU Model */
+		prom_parse_mmu_model(val & OV5_FEAT(OV5_MMU_SUPPORT), support);
+		break;
+	case OV5_INDX(OV5_RADIX_GTSE): /* Radix Extensions */
+		if (val & OV5_FEAT(OV5_RADIX_GTSE)) {
+			prom_debug("Radix - GTSE supported\n");
+			support->radix_gtse = true;
+		}
+		break;
+	}
+}
+
+static void __init prom_check_platform_support(void)
+{
+	struct platform_support supported = {
+		.hash_mmu = false,
+		.radix_mmu = false,
+		.radix_gtse = false
+	};
+	int prop_len = prom_getproplen(prom.chosen,
+				       "ibm,arch-vec-5-platform-support");
+	if (prop_len > 1) {
+		int i;
+		u8 vec[prop_len];
+		prom_debug("Found ibm,arch-vec-5-platform-support, len: %d\n",
+			   prop_len);
+		prom_getprop(prom.chosen, "ibm,arch-vec-5-platform-support",
+			     &vec, sizeof(vec));
+		for (i = 0; i < prop_len; i += 2) {
+			prom_debug("%d: index = 0x%x val = 0x%x\n", i / 2
+								  , vec[i]
+								  , vec[i + 1]);
+			prom_parse_platform_support(vec[i], vec[i + 1],
+						    &supported);
+		}
+	}
+
+	if (supported.radix_mmu && supported.radix_gtse) {
+		/* Radix preferred - but we require GTSE for now */
+		prom_debug("Asking for radix with GTSE\n");
+		ibm_architecture_vec.vec5.mmu = OV5_FEAT(OV5_MMU_RADIX);
+		ibm_architecture_vec.vec5.radix_ext = OV5_FEAT(OV5_RADIX_GTSE);
+	} else if (supported.hash_mmu) {
+		/* Default to hash mmu (if we can) */
+		prom_debug("Asking for hash\n");
+		ibm_architecture_vec.vec5.mmu = OV5_FEAT(OV5_MMU_HASH);
+	} else {
+		/* We're probably on a legacy hypervisor */
+		prom_debug("Assuming legacy hash support\n");
+	}
+}
 
 static void __init prom_send_capabilities(void)
 {
@@ -997,6 +1100,9 @@ static void __init prom_send_capabilities(void)
 	prom_arg_t ret;
 	u32 cores;
 
+	/* Check ibm,arch-vec-5-platform-support and fixup vec5 if required */
+	prom_check_platform_support();
+
 	root = call_prom("open", 1, 1, ADDR("/"));
 	if (root != 0) {
 		/* We need to tell the FW about the number of cores we support.
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 6aa3b76aa0d66b..9be992083d2a7f 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -356,18 +356,42 @@ static void early_check_vec5(void)
 	unsigned long root, chosen;
 	int size;
 	const u8 *vec5;
+	u8 mmu_supported;
 
 	root = of_get_flat_dt_root();
 	chosen = of_get_flat_dt_subnode_by_name(root, "chosen");
-	if (chosen == -FDT_ERR_NOTFOUND)
+	if (chosen == -FDT_ERR_NOTFOUND) {
+		cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
 		return;
+	}
 	vec5 = of_get_flat_dt_prop(chosen, "ibm,architecture-vec-5", &size);
-	if (!vec5)
+	if (!vec5) {
+		cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
 		return;
-	if (size <= OV5_INDX(OV5_MMU_RADIX_300) ||
-	    !(vec5[OV5_INDX(OV5_MMU_RADIX_300)] & OV5_FEAT(OV5_MMU_RADIX_300)))
-		/* Hypervisor doesn't support radix */
+	}
+	if (size <= OV5_INDX(OV5_MMU_SUPPORT)) {
 		cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
+		return;
+	}
+
+	/* Check for supported configuration */
+	mmu_supported = vec5[OV5_INDX(OV5_MMU_SUPPORT)] &
+			OV5_FEAT(OV5_MMU_SUPPORT);
+	if (mmu_supported == OV5_FEAT(OV5_MMU_RADIX)) {
+		/* Hypervisor only supports radix - check enabled && GTSE */
+		if (!early_radix_enabled()) {
+			pr_warn("WARNING: Ignoring cmdline option disable_radix\n");
+		}
+		if (!(vec5[OV5_INDX(OV5_RADIX_GTSE)] &
+						OV5_FEAT(OV5_RADIX_GTSE))) {
+			pr_warn("WARNING: Hypervisor doesn't support RADIX with GTSE\n");
+		}
+		/* Do radix anyway - the hypervisor said we had to */
+		cur_cpu_spec->mmu_features |= MMU_FTR_TYPE_RADIX;
+	} else if (mmu_supported == OV5_FEAT(OV5_MMU_HASH)) {
+		/* Hypervisor only supports hash - disable radix */
+		cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
+	}
 }
 
 void __init mmu_early_init_devtree(void)
@@ -383,7 +407,7 @@ void __init mmu_early_init_devtree(void)
 	 * even though the ibm,architecture-vec-5 property created by
 	 * skiboot doesn't have the necessary bits set.
 	 */
-	if (early_radix_enabled() && !(mfmsr() & MSR_HV))
+	if (!(mfmsr() & MSR_HV))
 		early_check_vec5();
 
 	if (early_radix_enabled())

From 6ba422c75facb1b1e0e206c464ee121b8073f7e0 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Sun, 5 Mar 2017 10:54:34 +1100
Subject: [PATCH 0783/1911] powerpc/64: Avoid panic during boot due to divide
 by zero in init_cache_info()

I see a panic in early boot when building with a recent gcc toolchain.
The issue is a divide by zero, which is undefined. Older toolchains
let us get away with it:

int foo(int a) { return a / 0; }

foo:
	li 9,0
	divw 3,3,9
	extsw 3,3
	blr

But newer ones catch it:

foo:
	trap

Add a check to avoid the divide by zero.

Fixes: e2827fe5c156 ("powerpc/64: Clean up ppc64_caches using a struct per cache")
Signed-off-by: Anton Blanchard <anton@samba.org>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/setup_64.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index adf2084f214b2b..9cfaa8b69b5f32 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -408,7 +408,10 @@ static void init_cache_info(struct ppc_cache_info *info, u32 size, u32 lsize,
 	info->line_size = lsize;
 	info->block_size = bsize;
 	info->log_block_size = __ilog2(bsize);
-	info->blocks_per_page = PAGE_SIZE / bsize;
+	if (bsize)
+		info->blocks_per_page = PAGE_SIZE / bsize;
+	else
+		info->blocks_per_page = 0;
 
 	if (sets == 0)
 		info->assoc = 0xffff;

From 3b3e78552d3077ec70d2640e629e07e3ab416a6a Mon Sep 17 00:00:00 2001
From: Matjaz Hegedic <matjaz.hegedic@gmail.com>
Date: Sun, 5 Mar 2017 19:16:44 +0100
Subject: [PATCH 0784/1911] x86/reboot/quirks: Add ASUS EeeBook X205TA/W reboot
 quirk

Without the parameter reboot=a, ASUS EeeBook X205TA/W will hang
when it should reboot. This adds the appropriate quirk, thus
fixing the problem.

Signed-off-by: Matjaz Hegedic <matjaz.hegedic@gmail.com>
Link: http://lkml.kernel.org/r/1488737804-20681-1-git-send-email-matjaz.hegedic@gmail.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/reboot.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 01c9cda3e1b77d..4194d6f9bb290b 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -231,6 +231,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "X205TAW"),
 		},
 	},
+	{	/* Handle problems with rebooting on ASUS EeeBook X205TAW */
+		.callback = set_acpi_reboot,
+		.ident = "ASUS EeeBook X205TAW",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "X205TAW"),
+		},
+	},
 
 	/* Certec */
 	{       /* Handle problems with rebooting on Certec BPC600 */

From f2853308b6409b97799b0beceacd9da43a82fe43 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 6 Mar 2017 10:57:48 +0200
Subject: [PATCH 0785/1911] x86/build/x86_64_defconfig: Enable CONFIG_R8169

Very common PCIe ethernet card. Already enabled in i386_defconfig.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Konstantin Khlebnikov <koct9i@gmail.com>
Link: http://lkml.kernel.org/r/20170306085748.85957-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/configs/x86_64_defconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 7ef4a099defcda..6205d3b81e6d11 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -176,6 +176,7 @@ CONFIG_E1000E=y
 CONFIG_SKY2=y
 CONFIG_FORCEDETH=y
 CONFIG_8139TOO=y
+CONFIG_R8169=y
 CONFIG_FDDI=y
 CONFIG_INPUT_POLLDEV=y
 # CONFIG_INPUT_MOUSEDEV_PSAUX is not set

From 9c7a00868c3a77c86ab07a2c51f3bb4897bd8550 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Mon, 6 Mar 2017 21:51:32 +1100
Subject: [PATCH 0786/1911] powerpc/64: Fix L1D cache shape vector reporting
 L1I values

It seems we didn't pay quite enough attention when testing the new cache
shape vectors, which means we didn't notice the bug where the vector for
the L1D was using the L1I values. Fix it, resulting in eg:

  L1I  cache size:     0x8000      32768B         32K
  L1I  line size:        0x80       8-way associative
  L1D  cache size:    0x10000      65536B         64K
  L1D  line size:        0x80       8-way associative

Fixes: 98a5f361b862 ("powerpc: Add new cache geometry aux vectors")
Cut-and-paste-bug-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Badly-reviewed-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/elf.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h
index 93b9b84568e817..09bde6e34f5d52 100644
--- a/arch/powerpc/include/asm/elf.h
+++ b/arch/powerpc/include/asm/elf.h
@@ -144,8 +144,8 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm,
 #define ARCH_DLINFO_CACHE_GEOMETRY					\
 	NEW_AUX_ENT(AT_L1I_CACHESIZE, ppc64_caches.l1i.size);		\
 	NEW_AUX_ENT(AT_L1I_CACHEGEOMETRY, get_cache_geometry(l1i));	\
-	NEW_AUX_ENT(AT_L1D_CACHESIZE, ppc64_caches.l1i.size);		\
-	NEW_AUX_ENT(AT_L1D_CACHEGEOMETRY, get_cache_geometry(l1i));	\
+	NEW_AUX_ENT(AT_L1D_CACHESIZE, ppc64_caches.l1d.size);		\
+	NEW_AUX_ENT(AT_L1D_CACHEGEOMETRY, get_cache_geometry(l1d));	\
 	NEW_AUX_ENT(AT_L2_CACHESIZE, ppc64_caches.l2.size);		\
 	NEW_AUX_ENT(AT_L2_CACHEGEOMETRY, get_cache_geometry(l2));	\
 	NEW_AUX_ENT(AT_L3_CACHESIZE, ppc64_caches.l3.size);		\

From a7d2475af7aedcb9b5c6343989a8bfadbf84429b Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Mon, 6 Mar 2017 22:53:59 +1100
Subject: [PATCH 0787/1911] powerpc: Sort the selects under CONFIG_PPC

We have a big list of selects under CONFIG_PPC, and currently they're
completely unsorted. This means people tend to add new selects at the
bottom of the list, and so two commits which both add a new select will
often conflict.

Instead sort it alphabetically. This is nicer in and of itself, but also
means two commits that add a new select will have a greater chance of
not conflicting.

Add a note at the top and bottom asking people to keep it sorted.

And while we're here pad out the 'if' expressions to make them stand
out.

Suggested-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/Kconfig | 138 ++++++++++++++++++++++---------------------
 1 file changed, 72 insertions(+), 66 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 494091762bd7f3..97a8bc8a095ce4 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -80,93 +80,99 @@ config ARCH_HAS_DMA_SET_COHERENT_MASK
 config PPC
 	bool
 	default y
-	select BUILDTIME_EXTABLE_SORT
+	#
+	# Please keep this list sorted alphabetically.
+	#
+	select ARCH_HAS_DEVMEM_IS_ALLOWED
+	select ARCH_HAS_DMA_SET_COHERENT_MASK
+	select ARCH_HAS_ELF_RANDOMIZE
+	select ARCH_HAS_GCOV_PROFILE_ALL
+	select ARCH_HAS_SCALED_CPUTIME		if VIRT_CPU_ACCOUNTING_NATIVE
+	select ARCH_HAS_SG_CHAIN
+	select ARCH_HAS_TICK_BROADCAST		if GENERIC_CLOCKEVENTS_BROADCAST
+	select ARCH_HAS_UBSAN_SANITIZE_ALL
+	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select ARCH_MIGHT_HAVE_PC_PARPORT
 	select ARCH_MIGHT_HAVE_PC_SERIO
+	select ARCH_SUPPORTS_ATOMIC_RMW
+	select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT
+	select ARCH_USE_BUILTIN_BSWAP
+	select ARCH_USE_CMPXCHG_LOCKREF		if PPC64
+	select ARCH_WANT_IPC_PARSE_VERSION
 	select BINFMT_ELF
-	select ARCH_HAS_ELF_RANDOMIZE
-	select OF
-	select OF_EARLY_FLATTREE
-	select OF_RESERVED_MEM
-	select HAVE_FTRACE_MCOUNT_RECORD
+	select BUILDTIME_EXTABLE_SORT
+	select CLONE_BACKWARDS
+	select DCACHE_WORD_ACCESS		if PPC64 && CPU_LITTLE_ENDIAN
+	select EDAC_ATOMIC_SCRUB
+	select EDAC_SUPPORT
+	select GENERIC_ATOMIC64			if PPC32
+	select GENERIC_CLOCKEVENTS
+	select GENERIC_CLOCKEVENTS_BROADCAST	if SMP
+	select GENERIC_CMOS_UPDATE
+	select GENERIC_CPU_AUTOPROBE
+	select GENERIC_IRQ_SHOW
+	select GENERIC_IRQ_SHOW_LEVEL
+	select GENERIC_SMP_IDLE_THREAD
+	select GENERIC_STRNCPY_FROM_USER
+	select GENERIC_STRNLEN_USER
+	select GENERIC_TIME_VSYSCALL_OLD
+	select HAVE_ARCH_AUDITSYSCALL
+	select HAVE_ARCH_HARDENED_USERCOPY
+	select HAVE_ARCH_JUMP_LABEL
+	select HAVE_ARCH_KGDB
+	select HAVE_ARCH_SECCOMP_FILTER
+	select HAVE_ARCH_TRACEHOOK
+	select HAVE_CBPF_JIT			if !PPC64
+	select HAVE_CONTEXT_TRACKING		if PPC64
+	select HAVE_DEBUG_KMEMLEAK
+	select HAVE_DEBUG_STACKOVERFLOW
+	select HAVE_DMA_API_DEBUG
 	select HAVE_DYNAMIC_FTRACE
-	select HAVE_DYNAMIC_FTRACE_WITH_REGS if MPROFILE_KERNEL
-	select HAVE_FUNCTION_TRACER
+	select HAVE_DYNAMIC_FTRACE_WITH_REGS	if MPROFILE_KERNEL
+	select HAVE_EBPF_JIT			if PPC64
+	select HAVE_EFFICIENT_UNALIGNED_ACCESS	if !(CPU_LITTLE_ENDIAN && POWER7_CPU)
+	select HAVE_FTRACE_MCOUNT_RECORD
 	select HAVE_FUNCTION_GRAPH_TRACER
+	select HAVE_FUNCTION_TRACER
 	select HAVE_GCC_PLUGINS
-	select SYSCTL_EXCEPTION_TRACE
-	select VIRT_TO_BUS if !PPC64
+	select HAVE_GENERIC_RCU_GUP
+	select HAVE_HW_BREAKPOINT		if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx)
 	select HAVE_IDE
 	select HAVE_IOREMAP_PROT
-	select HAVE_EFFICIENT_UNALIGNED_ACCESS if !(CPU_LITTLE_ENDIAN && POWER7_CPU)
+	select HAVE_IRQ_EXIT_ON_IRQ_STACK
+	select HAVE_KERNEL_GZIP
 	select HAVE_KPROBES
-	select HAVE_OPTPROBES if PPC64
-	select HAVE_ARCH_KGDB
 	select HAVE_KRETPROBES
-	select HAVE_ARCH_TRACEHOOK
+	select HAVE_LIVEPATCH			if HAVE_DYNAMIC_FTRACE_WITH_REGS
 	select HAVE_MEMBLOCK
 	select HAVE_MEMBLOCK_NODE_MAP
-	select HAVE_DMA_API_DEBUG
+	select HAVE_MOD_ARCH_SPECIFIC
+	select HAVE_NMI				if PERF_EVENTS
 	select HAVE_OPROFILE
-	select HAVE_DEBUG_KMEMLEAK
-	select ARCH_HAS_SG_CHAIN
-	select GENERIC_ATOMIC64 if PPC32
+	select HAVE_OPTPROBES			if PPC64
 	select HAVE_PERF_EVENTS
+	select HAVE_PERF_EVENTS_NMI		if PPC64
 	select HAVE_PERF_REGS
 	select HAVE_PERF_USER_STACK_DUMP
+	select HAVE_RCU_TABLE_FREE		if SMP
 	select HAVE_REGS_AND_STACK_ACCESS_API
-	select HAVE_HW_BREAKPOINT if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx)
-	select ARCH_WANT_IPC_PARSE_VERSION
-	select SPARSE_IRQ
+	select HAVE_SYSCALL_TRACEPOINTS
+	select HAVE_VIRT_CPU_ACCOUNTING
 	select IRQ_DOMAIN
-	select GENERIC_IRQ_SHOW
-	select GENERIC_IRQ_SHOW_LEVEL
 	select IRQ_FORCED_THREADING
-	select HAVE_RCU_TABLE_FREE if SMP
-	select HAVE_SYSCALL_TRACEPOINTS
-	select HAVE_CBPF_JIT if !PPC64
-	select HAVE_EBPF_JIT if PPC64
-	select HAVE_ARCH_JUMP_LABEL
-	select ARCH_HAVE_NMI_SAFE_CMPXCHG
-	select ARCH_HAS_GCOV_PROFILE_ALL
-	select GENERIC_SMP_IDLE_THREAD
-	select GENERIC_CMOS_UPDATE
-	select GENERIC_TIME_VSYSCALL_OLD
-	select GENERIC_CLOCKEVENTS
-	select GENERIC_CLOCKEVENTS_BROADCAST if SMP
-	select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
-	select GENERIC_STRNCPY_FROM_USER
-	select GENERIC_STRNLEN_USER
-	select HAVE_MOD_ARCH_SPECIFIC
 	select MODULES_USE_ELF_RELA
-	select CLONE_BACKWARDS
-	select ARCH_USE_BUILTIN_BSWAP
-	select OLD_SIGSUSPEND
-	select OLD_SIGACTION if PPC32
-	select HAVE_DEBUG_STACKOVERFLOW
-	select HAVE_IRQ_EXIT_ON_IRQ_STACK
-	select ARCH_USE_CMPXCHG_LOCKREF if PPC64
-	select HAVE_ARCH_AUDITSYSCALL
-	select ARCH_SUPPORTS_ATOMIC_RMW
-	select DCACHE_WORD_ACCESS if PPC64 && CPU_LITTLE_ENDIAN
 	select NO_BOOTMEM
-	select HAVE_GENERIC_RCU_GUP
-	select HAVE_PERF_EVENTS_NMI if PPC64
-	select HAVE_NMI if PERF_EVENTS
-	select EDAC_SUPPORT
-	select EDAC_ATOMIC_SCRUB
-	select ARCH_HAS_DMA_SET_COHERENT_MASK
-	select ARCH_HAS_DEVMEM_IS_ALLOWED
-	select HAVE_ARCH_SECCOMP_FILTER
-	select ARCH_HAS_UBSAN_SANITIZE_ALL
-	select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT
-	select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS
-	select GENERIC_CPU_AUTOPROBE
-	select HAVE_VIRT_CPU_ACCOUNTING
-	select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE
-	select HAVE_ARCH_HARDENED_USERCOPY
-	select HAVE_KERNEL_GZIP
-	select HAVE_CONTEXT_TRACKING if PPC64
+	select OF
+	select OF_EARLY_FLATTREE
+	select OF_RESERVED_MEM
+	select OLD_SIGACTION			if PPC32
+	select OLD_SIGSUSPEND
+	select SPARSE_IRQ
+	select SYSCTL_EXCEPTION_TRACE
+	select VIRT_TO_BUS			if !PPC64
+	#
+	# Please keep this list sorted alphabetically.
+	#
 
 config GENERIC_CSUM
 	def_bool n

From 606142af57dad981b78707234cfbd15f9f7b7125 Mon Sep 17 00:00:00 2001
From: Jonathan McDowell <noodles@earth.li>
Date: Wed, 15 Feb 2017 18:29:15 -0200
Subject: [PATCH 0788/1911] [media] dw2102: don't do DMA on stack

On Kernel 4.9, WARNINGs about doing DMA on stack are hit at
the dw2102 driver: one in su3000_power_ctrl() and the other in tt_s2_4600_frontend_attach().

Both were due to the use of buffers on the stack as parameters to
dvb_usb_generic_rw() and the resulting attempt to do DMA with them.

The device was non-functional as a result.

So, switch this driver over to use a buffer within the device state
structure, as has been done with other DVB-USB drivers.

Tested with TechnoTrend TT-connect S2-4600.

[mchehab@osg.samsung.com: fixed a warning at su3000_i2c_transfer() that
 state var were dereferenced before check 'd']
Signed-off-by: Jonathan McDowell <noodles@earth.li>
Cc: <stable@vger.kernel.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
---
 drivers/media/usb/dvb-usb/dw2102.c | 244 +++++++++++++++++------------
 1 file changed, 145 insertions(+), 99 deletions(-)

diff --git a/drivers/media/usb/dvb-usb/dw2102.c b/drivers/media/usb/dvb-usb/dw2102.c
index 6ca502d834b4f2..4f42d57f81d954 100644
--- a/drivers/media/usb/dvb-usb/dw2102.c
+++ b/drivers/media/usb/dvb-usb/dw2102.c
@@ -68,6 +68,7 @@
 struct dw2102_state {
 	u8 initialized;
 	u8 last_lock;
+	u8 data[MAX_XFER_SIZE + 4];
 	struct i2c_client *i2c_client_demod;
 	struct i2c_client *i2c_client_tuner;
 
@@ -661,62 +662,72 @@ static int su3000_i2c_transfer(struct i2c_adapter *adap, struct i2c_msg msg[],
 								int num)
 {
 	struct dvb_usb_device *d = i2c_get_adapdata(adap);
-	u8 obuf[0x40], ibuf[0x40];
+	struct dw2102_state *state;
 
 	if (!d)
 		return -ENODEV;
+
+	state = d->priv;
+
 	if (mutex_lock_interruptible(&d->i2c_mutex) < 0)
 		return -EAGAIN;
+	if (mutex_lock_interruptible(&d->data_mutex) < 0) {
+		mutex_unlock(&d->i2c_mutex);
+		return -EAGAIN;
+	}
 
 	switch (num) {
 	case 1:
 		switch (msg[0].addr) {
 		case SU3000_STREAM_CTRL:
-			obuf[0] = msg[0].buf[0] + 0x36;
-			obuf[1] = 3;
-			obuf[2] = 0;
-			if (dvb_usb_generic_rw(d, obuf, 3, ibuf, 0, 0) < 0)
+			state->data[0] = msg[0].buf[0] + 0x36;
+			state->data[1] = 3;
+			state->data[2] = 0;
+			if (dvb_usb_generic_rw(d, state->data, 3,
+					state->data, 0, 0) < 0)
 				err("i2c transfer failed.");
 			break;
 		case DW2102_RC_QUERY:
-			obuf[0] = 0x10;
-			if (dvb_usb_generic_rw(d, obuf, 1, ibuf, 2, 0) < 0)
+			state->data[0] = 0x10;
+			if (dvb_usb_generic_rw(d, state->data, 1,
+					state->data, 2, 0) < 0)
 				err("i2c transfer failed.");
-			msg[0].buf[1] = ibuf[0];
-			msg[0].buf[0] = ibuf[1];
+			msg[0].buf[1] = state->data[0];
+			msg[0].buf[0] = state->data[1];
 			break;
 		default:
 			/* always i2c write*/
-			obuf[0] = 0x08;
-			obuf[1] = msg[0].addr;
-			obuf[2] = msg[0].len;
+			state->data[0] = 0x08;
+			state->data[1] = msg[0].addr;
+			state->data[2] = msg[0].len;
 
-			memcpy(&obuf[3], msg[0].buf, msg[0].len);
+			memcpy(&state->data[3], msg[0].buf, msg[0].len);
 
-			if (dvb_usb_generic_rw(d, obuf, msg[0].len + 3,
-						ibuf, 1, 0) < 0)
+			if (dvb_usb_generic_rw(d, state->data, msg[0].len + 3,
+						state->data, 1, 0) < 0)
 				err("i2c transfer failed.");
 
 		}
 		break;
 	case 2:
 		/* always i2c read */
-		obuf[0] = 0x09;
-		obuf[1] = msg[0].len;
-		obuf[2] = msg[1].len;
-		obuf[3] = msg[0].addr;
-		memcpy(&obuf[4], msg[0].buf, msg[0].len);
-
-		if (dvb_usb_generic_rw(d, obuf, msg[0].len + 4,
-					ibuf, msg[1].len + 1, 0) < 0)
+		state->data[0] = 0x09;
+		state->data[1] = msg[0].len;
+		state->data[2] = msg[1].len;
+		state->data[3] = msg[0].addr;
+		memcpy(&state->data[4], msg[0].buf, msg[0].len);
+
+		if (dvb_usb_generic_rw(d, state->data, msg[0].len + 4,
+					state->data, msg[1].len + 1, 0) < 0)
 			err("i2c transfer failed.");
 
-		memcpy(msg[1].buf, &ibuf[1], msg[1].len);
+		memcpy(msg[1].buf, &state->data[1], msg[1].len);
 		break;
 	default:
 		warn("more than 2 i2c messages at a time is not handled yet.");
 		break;
 	}
+	mutex_unlock(&d->data_mutex);
 	mutex_unlock(&d->i2c_mutex);
 	return num;
 }
@@ -844,17 +855,23 @@ static int su3000_streaming_ctrl(struct dvb_usb_adapter *adap, int onoff)
 static int su3000_power_ctrl(struct dvb_usb_device *d, int i)
 {
 	struct dw2102_state *state = (struct dw2102_state *)d->priv;
-	u8 obuf[] = {0xde, 0};
+	int ret = 0;
 
 	info("%s: %d, initialized %d", __func__, i, state->initialized);
 
 	if (i && !state->initialized) {
+		mutex_lock(&d->data_mutex);
+
+		state->data[0] = 0xde;
+		state->data[1] = 0;
+
 		state->initialized = 1;
 		/* reset board */
-		return dvb_usb_generic_rw(d, obuf, 2, NULL, 0, 0);
+		ret = dvb_usb_generic_rw(d, state->data, 2, NULL, 0, 0);
+		mutex_unlock(&d->data_mutex);
 	}
 
-	return 0;
+	return ret;
 }
 
 static int su3000_read_mac_address(struct dvb_usb_device *d, u8 mac[6])
@@ -1309,49 +1326,57 @@ static int prof_7500_frontend_attach(struct dvb_usb_adapter *d)
 	return 0;
 }
 
-static int su3000_frontend_attach(struct dvb_usb_adapter *d)
+static int su3000_frontend_attach(struct dvb_usb_adapter *adap)
 {
-	u8 obuf[3] = { 0xe, 0x80, 0 };
-	u8 ibuf[] = { 0 };
+	struct dvb_usb_device *d = adap->dev;
+	struct dw2102_state *state = d->priv;
+
+	mutex_lock(&d->data_mutex);
+
+	state->data[0] = 0xe;
+	state->data[1] = 0x80;
+	state->data[2] = 0;
 
-	if (dvb_usb_generic_rw(d->dev, obuf, 3, ibuf, 1, 0) < 0)
+	if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0)
 		err("command 0x0e transfer failed.");
 
-	obuf[0] = 0xe;
-	obuf[1] = 0x02;
-	obuf[2] = 1;
+	state->data[0] = 0xe;
+	state->data[1] = 0x02;
+	state->data[2] = 1;
 
-	if (dvb_usb_generic_rw(d->dev, obuf, 3, ibuf, 1, 0) < 0)
+	if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0)
 		err("command 0x0e transfer failed.");
 	msleep(300);
 
-	obuf[0] = 0xe;
-	obuf[1] = 0x83;
-	obuf[2] = 0;
+	state->data[0] = 0xe;
+	state->data[1] = 0x83;
+	state->data[2] = 0;
 
-	if (dvb_usb_generic_rw(d->dev, obuf, 3, ibuf, 1, 0) < 0)
+	if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0)
 		err("command 0x0e transfer failed.");
 
-	obuf[0] = 0xe;
-	obuf[1] = 0x83;
-	obuf[2] = 1;
+	state->data[0] = 0xe;
+	state->data[1] = 0x83;
+	state->data[2] = 1;
 
-	if (dvb_usb_generic_rw(d->dev, obuf, 3, ibuf, 1, 0) < 0)
+	if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0)
 		err("command 0x0e transfer failed.");
 
-	obuf[0] = 0x51;
+	state->data[0] = 0x51;
 
-	if (dvb_usb_generic_rw(d->dev, obuf, 1, ibuf, 1, 0) < 0)
+	if (dvb_usb_generic_rw(d, state->data, 1, state->data, 1, 0) < 0)
 		err("command 0x51 transfer failed.");
 
-	d->fe_adap[0].fe = dvb_attach(ds3000_attach, &su3000_ds3000_config,
-					&d->dev->i2c_adap);
-	if (d->fe_adap[0].fe == NULL)
+	mutex_unlock(&d->data_mutex);
+
+	adap->fe_adap[0].fe = dvb_attach(ds3000_attach, &su3000_ds3000_config,
+					&d->i2c_adap);
+	if (adap->fe_adap[0].fe == NULL)
 		return -EIO;
 
-	if (dvb_attach(ts2020_attach, d->fe_adap[0].fe,
+	if (dvb_attach(ts2020_attach, adap->fe_adap[0].fe,
 				&dw2104_ts2020_config,
-				&d->dev->i2c_adap)) {
+				&d->i2c_adap)) {
 		info("Attached DS3000/TS2020!");
 		return 0;
 	}
@@ -1360,47 +1385,55 @@ static int su3000_frontend_attach(struct dvb_usb_adapter *d)
 	return -EIO;
 }
 
-static int t220_frontend_attach(struct dvb_usb_adapter *d)
+static int t220_frontend_attach(struct dvb_usb_adapter *adap)
 {
-	u8 obuf[3] = { 0xe, 0x87, 0 };
-	u8 ibuf[] = { 0 };
+	struct dvb_usb_device *d = adap->dev;
+	struct dw2102_state *state = d->priv;
+
+	mutex_lock(&d->data_mutex);
 
-	if (dvb_usb_generic_rw(d->dev, obuf, 3, ibuf, 1, 0) < 0)
+	state->data[0] = 0xe;
+	state->data[1] = 0x87;
+	state->data[2] = 0x0;
+
+	if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0)
 		err("command 0x0e transfer failed.");
 
-	obuf[0] = 0xe;
-	obuf[1] = 0x86;
-	obuf[2] = 1;
+	state->data[0] = 0xe;
+	state->data[1] = 0x86;
+	state->data[2] = 1;
 
-	if (dvb_usb_generic_rw(d->dev, obuf, 3, ibuf, 1, 0) < 0)
+	if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0)
 		err("command 0x0e transfer failed.");
 
-	obuf[0] = 0xe;
-	obuf[1] = 0x80;
-	obuf[2] = 0;
+	state->data[0] = 0xe;
+	state->data[1] = 0x80;
+	state->data[2] = 0;
 
-	if (dvb_usb_generic_rw(d->dev, obuf, 3, ibuf, 1, 0) < 0)
+	if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0)
 		err("command 0x0e transfer failed.");
 
 	msleep(50);
 
-	obuf[0] = 0xe;
-	obuf[1] = 0x80;
-	obuf[2] = 1;
+	state->data[0] = 0xe;
+	state->data[1] = 0x80;
+	state->data[2] = 1;
 
-	if (dvb_usb_generic_rw(d->dev, obuf, 3, ibuf, 1, 0) < 0)
+	if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0)
 		err("command 0x0e transfer failed.");
 
-	obuf[0] = 0x51;
+	state->data[0] = 0x51;
 
-	if (dvb_usb_generic_rw(d->dev, obuf, 1, ibuf, 1, 0) < 0)
+	if (dvb_usb_generic_rw(d, state->data, 1, state->data, 1, 0) < 0)
 		err("command 0x51 transfer failed.");
 
-	d->fe_adap[0].fe = dvb_attach(cxd2820r_attach, &cxd2820r_config,
-					&d->dev->i2c_adap, NULL);
-	if (d->fe_adap[0].fe != NULL) {
-		if (dvb_attach(tda18271_attach, d->fe_adap[0].fe, 0x60,
-					&d->dev->i2c_adap, &tda18271_config)) {
+	mutex_unlock(&d->data_mutex);
+
+	adap->fe_adap[0].fe = dvb_attach(cxd2820r_attach, &cxd2820r_config,
+					&d->i2c_adap, NULL);
+	if (adap->fe_adap[0].fe != NULL) {
+		if (dvb_attach(tda18271_attach, adap->fe_adap[0].fe, 0x60,
+					&d->i2c_adap, &tda18271_config)) {
 			info("Attached TDA18271HD/CXD2820R!");
 			return 0;
 		}
@@ -1410,23 +1443,30 @@ static int t220_frontend_attach(struct dvb_usb_adapter *d)
 	return -EIO;
 }
 
-static int m88rs2000_frontend_attach(struct dvb_usb_adapter *d)
+static int m88rs2000_frontend_attach(struct dvb_usb_adapter *adap)
 {
-	u8 obuf[] = { 0x51 };
-	u8 ibuf[] = { 0 };
+	struct dvb_usb_device *d = adap->dev;
+	struct dw2102_state *state = d->priv;
+
+	mutex_lock(&d->data_mutex);
 
-	if (dvb_usb_generic_rw(d->dev, obuf, 1, ibuf, 1, 0) < 0)
+	state->data[0] = 0x51;
+
+	if (dvb_usb_generic_rw(d, state->data, 1, state->data, 1, 0) < 0)
 		err("command 0x51 transfer failed.");
 
-	d->fe_adap[0].fe = dvb_attach(m88rs2000_attach, &s421_m88rs2000_config,
-					&d->dev->i2c_adap);
+	mutex_unlock(&d->data_mutex);
 
-	if (d->fe_adap[0].fe == NULL)
+	adap->fe_adap[0].fe = dvb_attach(m88rs2000_attach,
+					&s421_m88rs2000_config,
+					&d->i2c_adap);
+
+	if (adap->fe_adap[0].fe == NULL)
 		return -EIO;
 
-	if (dvb_attach(ts2020_attach, d->fe_adap[0].fe,
+	if (dvb_attach(ts2020_attach, adap->fe_adap[0].fe,
 				&dw2104_ts2020_config,
-				&d->dev->i2c_adap)) {
+				&d->i2c_adap)) {
 		info("Attached RS2000/TS2020!");
 		return 0;
 	}
@@ -1439,44 +1479,50 @@ static int tt_s2_4600_frontend_attach(struct dvb_usb_adapter *adap)
 {
 	struct dvb_usb_device *d = adap->dev;
 	struct dw2102_state *state = d->priv;
-	u8 obuf[3] = { 0xe, 0x80, 0 };
-	u8 ibuf[] = { 0 };
 	struct i2c_adapter *i2c_adapter;
 	struct i2c_client *client;
 	struct i2c_board_info board_info;
 	struct m88ds3103_platform_data m88ds3103_pdata = {};
 	struct ts2020_config ts2020_config = {};
 
-	if (dvb_usb_generic_rw(d, obuf, 3, ibuf, 1, 0) < 0)
+	mutex_lock(&d->data_mutex);
+
+	state->data[0] = 0xe;
+	state->data[1] = 0x80;
+	state->data[2] = 0x0;
+
+	if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0)
 		err("command 0x0e transfer failed.");
 
-	obuf[0] = 0xe;
-	obuf[1] = 0x02;
-	obuf[2] = 1;
+	state->data[0] = 0xe;
+	state->data[1] = 0x02;
+	state->data[2] = 1;
 
-	if (dvb_usb_generic_rw(d, obuf, 3, ibuf, 1, 0) < 0)
+	if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0)
 		err("command 0x0e transfer failed.");
 	msleep(300);
 
-	obuf[0] = 0xe;
-	obuf[1] = 0x83;
-	obuf[2] = 0;
+	state->data[0] = 0xe;
+	state->data[1] = 0x83;
+	state->data[2] = 0;
 
-	if (dvb_usb_generic_rw(d, obuf, 3, ibuf, 1, 0) < 0)
+	if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0)
 		err("command 0x0e transfer failed.");
 
-	obuf[0] = 0xe;
-	obuf[1] = 0x83;
-	obuf[2] = 1;
+	state->data[0] = 0xe;
+	state->data[1] = 0x83;
+	state->data[2] = 1;
 
-	if (dvb_usb_generic_rw(d, obuf, 3, ibuf, 1, 0) < 0)
+	if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0)
 		err("command 0x0e transfer failed.");
 
-	obuf[0] = 0x51;
+	state->data[0] = 0x51;
 
-	if (dvb_usb_generic_rw(d, obuf, 1, ibuf, 1, 0) < 0)
+	if (dvb_usb_generic_rw(d, state->data, 1, state->data, 1, 0) < 0)
 		err("command 0x51 transfer failed.");
 
+	mutex_unlock(&d->data_mutex);
+
 	/* attach demod */
 	m88ds3103_pdata.clk = 27000000;
 	m88ds3103_pdata.i2c_wr_max = 33;

From 9ce9f7999741f342eeffd036ab09213a2fa93040 Mon Sep 17 00:00:00 2001
From: Thor Thayer <thor.thayer@linux.intel.com>
Date: Mon, 13 Feb 2017 13:49:58 -0600
Subject: [PATCH 0789/1911] gpio: altera-a10sr: Set gpio_chip parent property

Set the gpio_chip parent property since some recent functions
such as devprop_gpiochip_set_names() can use it.

Signed-off-by: Thor Thayer <thor.thayer@linux.intel.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpio-altera-a10sr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpio/gpio-altera-a10sr.c b/drivers/gpio/gpio-altera-a10sr.c
index 9e1a138fed5337..16a8951b2beda3 100644
--- a/drivers/gpio/gpio-altera-a10sr.c
+++ b/drivers/gpio/gpio-altera-a10sr.c
@@ -96,7 +96,7 @@ static int altr_a10sr_gpio_probe(struct platform_device *pdev)
 	gpio->regmap = a10sr->regmap;
 
 	gpio->gp = altr_a10sr_gc;
-
+	gpio->gp.parent = pdev->dev.parent;
 	gpio->gp.of_node = pdev->dev.of_node;
 
 	ret = devm_gpiochip_add_data(&pdev->dev, &gpio->gp, gpio);

From fa6256db033067b57318decdc1c583512a1f8f68 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 15 Feb 2017 02:02:06 +0300
Subject: [PATCH 0790/1911] gpio: mockup: return -EFAULT if copy_from_user()
 fails

copy_from_user() returns the number of bytes remaining to be copied but
we want to return negative error codes on failue.

Fixes: 9202ba2397d1 ("gpio: mockup: implement event injecting over debugfs")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpio-mockup.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/gpio/gpio-mockup.c b/drivers/gpio/gpio-mockup.c
index 06dac72cb69c0c..d9933868921387 100644
--- a/drivers/gpio/gpio-mockup.c
+++ b/drivers/gpio/gpio-mockup.c
@@ -197,7 +197,7 @@ static ssize_t gpio_mockup_event_write(struct file *file,
 	struct seq_file *sfile;
 	struct gpio_desc *desc;
 	struct gpio_chip *gc;
-	int status, val;
+	int val;
 	char buf;
 
 	sfile = file->private_data;
@@ -206,9 +206,8 @@ static ssize_t gpio_mockup_event_write(struct file *file,
 	chip = priv->chip;
 	gc = &chip->gc;
 
-	status = copy_from_user(&buf, usr_buf, 1);
-	if (status)
-		return status;
+	if (copy_from_user(&buf, usr_buf, 1))
+		return -EFAULT;
 
 	if (buf == '0')
 		val = 0;

From b115bebc07f282067eccc06fd5aa3060ab1426da Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 17 Feb 2017 16:13:44 +0100
Subject: [PATCH 0791/1911] gpio: xgene: mark PM functions as __maybe_unused

When CONFIG_PM_SLEEP is disabled, we get a warning about unused functions:

drivers/gpio/gpio-xgene.c:155:12: warning: 'xgene_gpio_resume' defined but not used [-Wunused-function]
 static int xgene_gpio_resume(struct device *dev)
            ^~~~~~~~~~~~~~~~~
drivers/gpio/gpio-xgene.c:142:12: warning: 'xgene_gpio_suspend' defined but not used [-Wunused-function]
 static int xgene_gpio_suspend(struct device *dev)

The warnings are harmless and can be avoided by simplifying the code and marking
the functions as __maybe_unused.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpio-xgene.c | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/drivers/gpio/gpio-xgene.c b/drivers/gpio/gpio-xgene.c
index 40a8881c2ce882..f1c6ec17b90a83 100644
--- a/drivers/gpio/gpio-xgene.c
+++ b/drivers/gpio/gpio-xgene.c
@@ -42,9 +42,7 @@ struct xgene_gpio {
 	struct gpio_chip	chip;
 	void __iomem		*base;
 	spinlock_t		lock;
-#ifdef CONFIG_PM
 	u32			set_dr_val[XGENE_MAX_GPIO_BANKS];
-#endif
 };
 
 static int xgene_gpio_get(struct gpio_chip *gc, unsigned int offset)
@@ -138,8 +136,7 @@ static int xgene_gpio_dir_out(struct gpio_chip *gc,
 	return 0;
 }
 
-#ifdef CONFIG_PM
-static int xgene_gpio_suspend(struct device *dev)
+static __maybe_unused int xgene_gpio_suspend(struct device *dev)
 {
 	struct xgene_gpio *gpio = dev_get_drvdata(dev);
 	unsigned long bank_offset;
@@ -152,7 +149,7 @@ static int xgene_gpio_suspend(struct device *dev)
 	return 0;
 }
 
-static int xgene_gpio_resume(struct device *dev)
+static __maybe_unused int xgene_gpio_resume(struct device *dev)
 {
 	struct xgene_gpio *gpio = dev_get_drvdata(dev);
 	unsigned long bank_offset;
@@ -166,10 +163,6 @@ static int xgene_gpio_resume(struct device *dev)
 }
 
 static SIMPLE_DEV_PM_OPS(xgene_gpio_pm, xgene_gpio_suspend, xgene_gpio_resume);
-#define XGENE_GPIO_PM_OPS	(&xgene_gpio_pm)
-#else
-#define XGENE_GPIO_PM_OPS	NULL
-#endif
 
 static int xgene_gpio_probe(struct platform_device *pdev)
 {
@@ -241,7 +234,7 @@ static struct platform_driver xgene_gpio_driver = {
 		.name = "xgene-gpio",
 		.of_match_table = xgene_gpio_of_match,
 		.acpi_match_table = ACPI_PTR(xgene_gpio_acpi_match),
-		.pm     = XGENE_GPIO_PM_OPS,
+		.pm     = &xgene_gpio_pm,
 	},
 	.probe = xgene_gpio_probe,
 };

From f759921cfbf4847319d197a6ed7c9534d593f8bc Mon Sep 17 00:00:00 2001
From: Phil Reid <preid@electromag.com.au>
Date: Mon, 20 Feb 2017 09:41:45 +0800
Subject: [PATCH 0792/1911] gpio: altera: Use handle_level_irq when configured
 as a level_high

When a threaded irq handler is chained attached to one of the gpio
pins when configure for level irq the altera_gpio_irq_leveL_high_handler
does not mask the interrupt while being handled by the chained irq.
This resulting in the threaded irq not getting enough cycles to complete
quickly enough before the irq was disabled as faulty. handle_level_irq
should be used in this situation instead of handle_simple_irq.

In gpiochip_irqchip_add set default handler to handle_bad_irq as
per Documentation/gpio/driver.txt. Then set the correct handler in
the set_type callback.

Signed-off-by: Phil Reid <preid@electromag.com.au>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpio-altera.c | 26 +++++++++++---------------
 1 file changed, 11 insertions(+), 15 deletions(-)

diff --git a/drivers/gpio/gpio-altera.c b/drivers/gpio/gpio-altera.c
index 5bddbd507ca9f1..3fe6a21e05a571 100644
--- a/drivers/gpio/gpio-altera.c
+++ b/drivers/gpio/gpio-altera.c
@@ -90,21 +90,18 @@ static int altera_gpio_irq_set_type(struct irq_data *d,
 
 	altera_gc = gpiochip_get_data(irq_data_get_irq_chip_data(d));
 
-	if (type == IRQ_TYPE_NONE)
+	if (type == IRQ_TYPE_NONE) {
+		irq_set_handler_locked(d, handle_bad_irq);
 		return 0;
-	if (type == IRQ_TYPE_LEVEL_HIGH &&
-		altera_gc->interrupt_trigger == IRQ_TYPE_LEVEL_HIGH)
-		return 0;
-	if (type == IRQ_TYPE_EDGE_RISING &&
-		altera_gc->interrupt_trigger == IRQ_TYPE_EDGE_RISING)
-		return 0;
-	if (type == IRQ_TYPE_EDGE_FALLING &&
-		altera_gc->interrupt_trigger == IRQ_TYPE_EDGE_FALLING)
-		return 0;
-	if (type == IRQ_TYPE_EDGE_BOTH &&
-		altera_gc->interrupt_trigger == IRQ_TYPE_EDGE_BOTH)
+	}
+	if (type == altera_gc->interrupt_trigger) {
+		if (type == IRQ_TYPE_LEVEL_HIGH)
+			irq_set_handler_locked(d, handle_level_irq);
+		else
+			irq_set_handler_locked(d, handle_simple_irq);
 		return 0;
-
+	}
+	irq_set_handler_locked(d, handle_bad_irq);
 	return -EINVAL;
 }
 
@@ -230,7 +227,6 @@ static void altera_gpio_irq_edge_handler(struct irq_desc *desc)
 	chained_irq_exit(chip, desc);
 }
 
-
 static void altera_gpio_irq_leveL_high_handler(struct irq_desc *desc)
 {
 	struct altera_gpio_chip *altera_gc;
@@ -310,7 +306,7 @@ static int altera_gpio_probe(struct platform_device *pdev)
 	altera_gc->interrupt_trigger = reg;
 
 	ret = gpiochip_irqchip_add(&altera_gc->mmchip.gc, &altera_irq_chip, 0,
-		handle_simple_irq, IRQ_TYPE_NONE);
+		handle_bad_irq, IRQ_TYPE_NONE);
 
 	if (ret) {
 		dev_err(&pdev->dev, "could not add irqchip\n");

From 8e51533780ba223a3562ff4382c6b6f350c7e9a4 Mon Sep 17 00:00:00 2001
From: Timur Tabi <timur@codeaurora.org>
Date: Fri, 10 Feb 2017 17:21:00 -0600
Subject: [PATCH 0793/1911] pinctrl: qcom: add get_direction function

The get_direction callback function allows gpiolib to know the current
direction (input vs output) for a given GPIO.

This is particularly useful on ACPI systems, where the GPIOs are
configured only by firmware (typically UEFI), so the only way to
know the initial values to query the hardware directly.  Without
this function, gpiolib thinks that all GPIOs are configured for
input.

Signed-off-by: Timur Tabi <timur@codeaurora.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/qcom/pinctrl-msm.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/drivers/pinctrl/qcom/pinctrl-msm.c b/drivers/pinctrl/qcom/pinctrl-msm.c
index f8e9e1c2b2f6f4..c978be5eb9ebe3 100644
--- a/drivers/pinctrl/qcom/pinctrl-msm.c
+++ b/drivers/pinctrl/qcom/pinctrl-msm.c
@@ -422,6 +422,20 @@ static int msm_gpio_direction_output(struct gpio_chip *chip, unsigned offset, in
 	return 0;
 }
 
+static int msm_gpio_get_direction(struct gpio_chip *chip, unsigned int offset)
+{
+	struct msm_pinctrl *pctrl = gpiochip_get_data(chip);
+	const struct msm_pingroup *g;
+	u32 val;
+
+	g = &pctrl->soc->groups[offset];
+
+	val = readl(pctrl->regs + g->ctl_reg);
+
+	/* 0 = output, 1 = input */
+	return val & BIT(g->oe_bit) ? 0 : 1;
+}
+
 static int msm_gpio_get(struct gpio_chip *chip, unsigned offset)
 {
 	const struct msm_pingroup *g;
@@ -510,6 +524,7 @@ static void msm_gpio_dbg_show(struct seq_file *s, struct gpio_chip *chip)
 static struct gpio_chip msm_gpio_template = {
 	.direction_input  = msm_gpio_direction_input,
 	.direction_output = msm_gpio_direction_output,
+	.get_direction    = msm_gpio_get_direction,
 	.get              = msm_gpio_get,
 	.set              = msm_gpio_set,
 	.request          = gpiochip_generic_request,

From 96e1ce8fcd66d850196758c038bd9d6f7857c518 Mon Sep 17 00:00:00 2001
From: Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
Date: Mon, 20 Feb 2017 21:00:42 +0900
Subject: [PATCH 0794/1911] pinctrl: uniphier: change pin names of aio/xirq for
 LD11

This patch changes pin names of AIO and XIRQ according to updated
specification.

Signed-off-by: Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
Acked-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/uniphier/pinctrl-uniphier-ld11.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/pinctrl/uniphier/pinctrl-uniphier-ld11.c b/drivers/pinctrl/uniphier/pinctrl-uniphier-ld11.c
index 77a0236ee781dd..83f8864fa76ac5 100644
--- a/drivers/pinctrl/uniphier/pinctrl-uniphier-ld11.c
+++ b/drivers/pinctrl/uniphier/pinctrl-uniphier-ld11.c
@@ -390,22 +390,22 @@ static const struct pinctrl_pin_desc uniphier_ld11_pins[] = {
 	UNIPHIER_PINCTRL_PIN(140, "AO1D0", 140,
 			     140, UNIPHIER_PIN_DRV_1BIT,
 			     140, UNIPHIER_PIN_PULL_DOWN),
-	UNIPHIER_PINCTRL_PIN(141, "TCON0", 141,
+	UNIPHIER_PINCTRL_PIN(141, "AO1D1", 141,
 			     141, UNIPHIER_PIN_DRV_1BIT,
 			     141, UNIPHIER_PIN_PULL_DOWN),
-	UNIPHIER_PINCTRL_PIN(142, "TCON1", 142,
+	UNIPHIER_PINCTRL_PIN(142, "AO1D2", 142,
 			     142, UNIPHIER_PIN_DRV_1BIT,
 			     142, UNIPHIER_PIN_PULL_DOWN),
-	UNIPHIER_PINCTRL_PIN(143, "TCON2", 143,
+	UNIPHIER_PINCTRL_PIN(143, "XIRQ9", 143,
 			     143, UNIPHIER_PIN_DRV_1BIT,
 			     143, UNIPHIER_PIN_PULL_DOWN),
-	UNIPHIER_PINCTRL_PIN(144, "TCON3", 144,
+	UNIPHIER_PINCTRL_PIN(144, "XIRQ10", 144,
 			     144, UNIPHIER_PIN_DRV_1BIT,
 			     144, UNIPHIER_PIN_PULL_DOWN),
-	UNIPHIER_PINCTRL_PIN(145, "TCON4", 145,
+	UNIPHIER_PINCTRL_PIN(145, "XIRQ11", 145,
 			     145, UNIPHIER_PIN_DRV_1BIT,
 			     145, UNIPHIER_PIN_PULL_DOWN),
-	UNIPHIER_PINCTRL_PIN(146, "TCON5", 146,
+	UNIPHIER_PINCTRL_PIN(146, "XIRQ13", 146,
 			     146, UNIPHIER_PIN_DRV_1BIT,
 			     146, UNIPHIER_PIN_PULL_DOWN),
 	UNIPHIER_PINCTRL_PIN(147, "PWMA", 147,

From b3bd0f2849f4480fc4f40bb954d27e58f4f0786b Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 1 Mar 2017 23:50:19 +0100
Subject: [PATCH 0795/1911] staging/vc04_services: add CONFIG_OF dependency

After several hours of debugging this obviously bogus but elaborate
gcc-7.0.1 warning,

drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c: In function 'vchiq_complete_bulk':
drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c:603:4: error: argument 2 null where non-null expected [-Werror=nonnull]
    memcpy((char *)page_address(pages[0]) +
    ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     pagelist->offset,
     ~~~~~~~~~~~~~~~~~
     fragments,
     ~~~~~~~~~~
     head_bytes);
     ~~~~~~~~~~~
In file included from include/linux/string.h:18:0,
                 from include/linux/bitmap.h:8,
                 from include/linux/cpumask.h:11,
                 from include/linux/interrupt.h:9,
                 from drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c:37:
arch/arm/include/asm/string.h:16:15: note: in a call to function 'memcpy' declared here
 extern void * memcpy(void *, const void *, __kernel_size_t) __nocapture(2);
               ^~~~~~

I have concluded that gcc was technically right in the first place:

vchiq_complete_bulk is an externally visible function that calls
free_pagelist(), which in turn derives a pointer from the global
g_fragments_base variable.

g_fragments_base is initialized in vchiq_platform_init(), but
we only get there if of_property_read_u32() successfully reads the
cache line size. When CONFIG_OF is disabled, this always fails, and
g_fragments_base is guaranteed to be NULL when vchiq_complete_bulk()
gets called.

This adds a CONFIG_OF Kconfig dependency, which is also technically correct
but nonobvious, and thus seems like a good fit for the warning.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/vc04_services/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/staging/vc04_services/Kconfig b/drivers/staging/vc04_services/Kconfig
index e61e4ca064a8ab..74094fff436781 100644
--- a/drivers/staging/vc04_services/Kconfig
+++ b/drivers/staging/vc04_services/Kconfig
@@ -1,6 +1,7 @@
 config BCM2835_VCHIQ
 	tristate "Videocore VCHIQ"
 	depends on HAS_DMA
+	depends on OF
 	depends on RASPBERRYPI_FIRMWARE || (COMPILE_TEST && !RASPBERRYPI_FIRMWARE)
 	default y
 	help

From f2f10b7e722a75c6d75a7f7cd06b0eee3ae20f7c Mon Sep 17 00:00:00 2001
From: Daniel Drake <drake@endlessm.com>
Date: Fri, 17 Feb 2017 07:40:52 -0600
Subject: [PATCH 0796/1911] HID: chicony: Add support for another ASUS Zen AiO
 keyboard

Add support for media keys on the keyboard that comes with the
Asus V221ID and ZN241IC All In One computers.

The keys to support here are WLAN, BRIGHTNESSDOWN and BRIGHTNESSUP.

This device is not visibly branded as Chicony, and the USB Vendor ID
suggests that it is a JESS device. However this seems like the right place
to put it: the usage codes are identical to the currently supported
devices, and this driver already supports the ASUS AIO keyboard AK1D.

Signed-off-by: Daniel Drake <drake@endlessm.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/Kconfig       | 4 ++--
 drivers/hid/hid-chicony.c | 1 +
 drivers/hid/hid-core.c    | 1 +
 drivers/hid/hid-ids.h     | 1 +
 4 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig
index 1aeb80e5242461..8eab3200ac9a69 100644
--- a/drivers/hid/Kconfig
+++ b/drivers/hid/Kconfig
@@ -175,11 +175,11 @@ config HID_CHERRY
 	Support for Cherry Cymotion keyboard.
 
 config HID_CHICONY
-	tristate "Chicony Tactical pad"
+	tristate "Chicony devices"
 	depends on HID
 	default !EXPERT
 	---help---
-	Support for Chicony Tactical pad.
+	Support for Chicony Tactical pad and special keys on Chicony keyboards.
 
 config HID_CORSAIR
 	tristate "Corsair devices"
diff --git a/drivers/hid/hid-chicony.c b/drivers/hid/hid-chicony.c
index bc3cec199feefd..f04ed9aabc3f9f 100644
--- a/drivers/hid/hid-chicony.c
+++ b/drivers/hid/hid-chicony.c
@@ -86,6 +86,7 @@ static const struct hid_device_id ch_devices[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_WIRELESS2) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_AK1D) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_ACER_SWITCH12) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_JESS, USB_DEVICE_ID_JESS_ZEN_AIO_KBD) },
 	{ }
 };
 MODULE_DEVICE_TABLE(hid, ch_devices);
diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index e9e87d33744691..ae01ae601d74e6 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -1910,6 +1910,7 @@ static const struct hid_device_id hid_have_special_driver[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A081) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A0C2) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_HUION, USB_DEVICE_ID_HUION_TABLET) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_JESS, USB_DEVICE_ID_JESS_ZEN_AIO_KBD) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_JESS2, USB_DEVICE_ID_JESS2_COLOR_RUMBLE_PAD) },
 	{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_ION, USB_DEVICE_ID_ICADE) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_KENSINGTON, USB_DEVICE_ID_KS_SLIMBLADE) },
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index 86c95d30ac801f..b3df60da029711 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -557,6 +557,7 @@
 
 #define USB_VENDOR_ID_JESS		0x0c45
 #define USB_DEVICE_ID_JESS_YUREX	0x1010
+#define USB_DEVICE_ID_JESS_ZEN_AIO_KBD	0x5112
 
 #define USB_VENDOR_ID_JESS2		0x0f30
 #define USB_DEVICE_ID_JESS2_COLOR_RUMBLE_PAD 0x0111

From a687c5765b5ae19fe559e14615ddc87ebb46d409 Mon Sep 17 00:00:00 2001
From: Roderick Colenbrander <roderick.colenbrander@sony.com>
Date: Fri, 24 Feb 2017 16:14:15 -0800
Subject: [PATCH 0797/1911] HID: sony: Fix input device leak when connecting a
 DS4 twice using USB/BT

When a user connects a DS4 twice using USB and BT, we reject the
second device connection after the setup work. We then perform
a cleanup, but during cleanup we are not removing the touchpad
device. This leads to leakage of an input device, which we would
never remove. It can likely result into a kernel oops as well
when the touchpad evdev node is accessed and the underlaying HID
device has been removed from the system.

[jkosina@suse.cz: added stable annotation]
Fixes: ac797b95f532 ("HID: sony: Make the DS4 touchpad a separate device")
Cc: stable@vger.kernel.org
Signed-off-by: Roderick Colenbrander <roderick.colenbrander@sony.com>
Reviewed-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-sony.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/hid/hid-sony.c b/drivers/hid/hid-sony.c
index f405b07d038165..740996f9bdd49d 100644
--- a/drivers/hid/hid-sony.c
+++ b/drivers/hid/hid-sony.c
@@ -2632,6 +2632,8 @@ static int sony_input_configured(struct hid_device *hdev,
 		sony_leds_remove(sc);
 	if (sc->quirks & SONY_BATTERY_SUPPORT)
 		sony_battery_remove(sc);
+	if (sc->touchpad)
+		sony_unregister_touchpad(sc);
 	sony_cancel_work_sync(sc);
 	kfree(sc->output_report_dmabuf);
 	sony_remove_dev_list(sc);

From eb38d913c27f32f4df173791051fecf6aca34173 Mon Sep 17 00:00:00 2001
From: Roger Quadros <rogerq@ti.com>
Date: Thu, 2 Mar 2017 10:44:58 +0200
Subject: [PATCH 0798/1911] Revert "usb: gadget: uvc: Add missing call for
 additional setup data"

This reverts commit 4fbac5206afd01b717d4bdc58793d471f3391b4b.

This commit breaks g_webcam when used with uvc-gadget [1].

The user space application (e.g. uvc-gadget) is responsible for
sending response to UVC class specific requests on control endpoint
in uvc_send_response() in uvc_v4l2.c.

The bad commit was causing a duplicate response to be sent with
incorrect response data thus causing UVC probe to fail at the host
and broken control transfer endpoint at the gadget.

[1] - git://git.ideasonboard.org/uvc-gadget.git

Cc: <stable@vger.kernel.org> # v4.9+
Acked-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Roger Quadros <rogerq@ti.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/gadget/function/f_uvc.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/drivers/usb/gadget/function/f_uvc.c b/drivers/usb/gadget/function/f_uvc.c
index 27ed51b5082f66..29b41b5dee04d2 100644
--- a/drivers/usb/gadget/function/f_uvc.c
+++ b/drivers/usb/gadget/function/f_uvc.c
@@ -258,13 +258,6 @@ uvc_function_setup(struct usb_function *f, const struct usb_ctrlrequest *ctrl)
 	memcpy(&uvc_event->req, ctrl, sizeof(uvc_event->req));
 	v4l2_event_queue(&uvc->vdev, &v4l2_event);
 
-	/* Pass additional setup data to userspace */
-	if (uvc->event_setup_out && uvc->event_length) {
-		uvc->control_req->length = uvc->event_length;
-		return usb_ep_queue(uvc->func.config->cdev->gadget->ep0,
-			uvc->control_req, GFP_ATOMIC);
-	}
-
 	return 0;
 }
 

From 5bbc852676ae08e818241cf66a3ffe4be44225c4 Mon Sep 17 00:00:00 2001
From: Peter Chen <peter.chen@nxp.com>
Date: Tue, 28 Feb 2017 14:25:45 +0800
Subject: [PATCH 0799/1911] usb: gadget: dummy_hcd: clear usb_gadget region
 before registration

When the user does device unbind and rebind test, the kernel will
show below dump due to usb_gadget memory region is dirty after unbind.
Clear usb_gadget region for every new probe.

root@imx6qdlsolo:/sys/bus/platform/drivers/dummy_udc# echo dummy_udc.0 > bind
[  102.523312] kobject (eddd78b0): tried to init an initialized object, something is seriously wrong.
[  102.532447] CPU: 0 PID: 734 Comm: sh Not tainted 4.10.0-rc7-00872-g1b2b8e9 #1298
[  102.539866] Hardware name: Freescale i.MX6 SoloX (Device Tree)
[  102.545717] Backtrace:
[  102.548225] [<c010d090>] (dump_backtrace) from [<c010d338>] (show_stack+0x18/0x1c)
[  102.555822]  r7:ede34000 r6:60010013 r5:00000000 r4:c0f29418
[  102.561512] [<c010d320>] (show_stack) from [<c040c2a4>] (dump_stack+0xb4/0xe8)
[  102.568764] [<c040c1f0>] (dump_stack) from [<c040e6d4>] (kobject_init+0x80/0x9c)
[  102.576187]  r10:0000001f r9:eddd7000 r8:eeaf8c10 r7:eddd78a8 r6:c177891c r5:c0f3b060
[  102.584036]  r4:eddd78b0 r3:00000000
[  102.587641] [<c040e654>] (kobject_init) from [<c05359a4>] (device_initialize+0x28/0xf8)
[  102.595665]  r5:eebc4800 r4:eddd78a8
[  102.599268] [<c053597c>] (device_initialize) from [<c05382ac>] (device_register+0x14/0x20)
[  102.607556]  r7:eddd78a8 r6:00000000 r5:eebc4800 r4:eddd78a8
[  102.613256] [<c0538298>] (device_register) from [<c0668ef4>] (usb_add_gadget_udc_release+0x8c/0x1ec)
[  102.622410]  r5:eebc4800 r4:eddd7860
[  102.626015] [<c0668e68>] (usb_add_gadget_udc_release) from [<c0669068>] (usb_add_gadget_udc+0x14/0x18)
[  102.635351]  r10:0000001f r9:eddd7000 r8:eddd788c r7:bf003770 r6:eddd77f8 r5:eddd7818
[  102.643198]  r4:eddd785c r3:eddd7b24
[  102.646834] [<c0669054>] (usb_add_gadget_udc) from [<bf003428>] (dummy_udc_probe+0x170/0x1c4 [dummy_hcd])
[  102.656458] [<bf0032b8>] (dummy_udc_probe [dummy_hcd]) from [<c053d114>] (platform_drv_probe+0x54/0xb8)
[  102.665881]  r10:00000008 r9:c1778960 r8:bf004128 r7:fffffdfb r6:bf004128 r5:eeaf8c10
[  102.673727]  r4:eeaf8c10
[  102.676293] [<c053d0c0>] (platform_drv_probe) from [<c053b160>] (driver_probe_device+0x264/0x474)
[  102.685186]  r7:00000000 r6:00000000 r5:c1778960 r4:eeaf8c10
[  102.690876] [<c053aefc>] (driver_probe_device) from [<c05397c4>] (bind_store+0xb8/0x14c)
[  102.698994]  r10:eeb3bb4c r9:ede34000 r8:0000000c r7:eeaf8c44 r6:bf004128 r5:c0f3b668
[  102.706840]  r4:eeaf8c10
[  102.709402] [<c053970c>] (bind_store) from [<c0538ca8>] (drv_attr_store+0x28/0x34)
[  102.716998]  r9:ede34000 r8:00000000 r7:ee3863c0 r6:ee3863c0 r5:c0538c80 r4:c053970c
[  102.724776] [<c0538c80>] (drv_attr_store) from [<c029c930>] (sysfs_kf_write+0x50/0x54)
[  102.732711]  r5:c0538c80 r4:0000000c
[  102.736313] [<c029c8e0>] (sysfs_kf_write) from [<c029be84>] (kernfs_fop_write+0x100/0x214)
[  102.744599]  r7:ee3863c0 r6:eeb3bb40 r5:00000000 r4:00000000
[  102.750287] [<c029bd84>] (kernfs_fop_write) from [<c0222dd8>] (__vfs_write+0x34/0x120)
[  102.758231]  r10:00000000 r9:ede34000 r8:c0108bc4 r7:0000000c r6:ede35f80 r5:c029bd84
[  102.766077]  r4:ee223780
[  102.768638] [<c0222da4>] (__vfs_write) from [<c0224678>] (vfs_write+0xa8/0x170)
[  102.775974]  r9:ede34000 r8:c0108bc4 r7:ede35f80 r6:01861cb0 r5:ee223780 r4:0000000c
[  102.783743] [<c02245d0>] (vfs_write) from [<c0225498>] (SyS_write+0x4c/0xa8)
[  102.790818]  r9:ede34000 r8:c0108bc4 r7:0000000c r6:01861cb0 r5:ee223780 r4:ee223780
[  102.798595] [<c022544c>] (SyS_write) from [<c0108a20>] (ret_fast_syscall+0x0/0x1c)
[  102.806188]  r7:00000004 r6:b6e83d58 r5:01861cb0 r4:0000000c

Fixes: 90fccb529d24 ("usb: gadget: Gadget directory cleanup - group UDC drivers")
Cc: stable <stable@vger.kernel.org>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Peter Chen <peter.chen@nxp.com>
Tested-by: Xiaolong Ye <xiaolong.ye@intel.com>
Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/gadget/udc/dummy_hcd.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/usb/gadget/udc/dummy_hcd.c b/drivers/usb/gadget/udc/dummy_hcd.c
index c60abe3a68f9cf..8cabc5944d5f1d 100644
--- a/drivers/usb/gadget/udc/dummy_hcd.c
+++ b/drivers/usb/gadget/udc/dummy_hcd.c
@@ -1031,6 +1031,8 @@ static int dummy_udc_probe(struct platform_device *pdev)
 	int		rc;
 
 	dum = *((void **)dev_get_platdata(&pdev->dev));
+	/* Clear usb_gadget region for new registration to udc-core */
+	memzero_explicit(&dum->gadget, sizeof(struct usb_gadget));
 	dum->gadget.name = gadget_name;
 	dum->gadget.ops = &dummy_ops;
 	dum->gadget.max_speed = USB_SPEED_SUPER;

From 077dbaee9df53c597df532a08d721d03f4570f3d Mon Sep 17 00:00:00 2001
From: Franck Demathieu <fdemathieu@gmail.com>
Date: Thu, 23 Feb 2017 10:48:55 +0100
Subject: [PATCH 0800/1911] irqchip/crossbar: Fix incorrect type of local
 variables

The max and entry variables are unsigned according to the dt-bindings.
Fix following 3 sparse issues (-Wtypesign):

  drivers/irqchip/irq-crossbar.c:222:52: warning: incorrect type in argument 3 (different signedness)
  drivers/irqchip/irq-crossbar.c:222:52:    expected unsigned int [usertype] *out_value
  drivers/irqchip/irq-crossbar.c:222:52:    got int *<noident>

  drivers/irqchip/irq-crossbar.c:245:56: warning: incorrect type in argument 4 (different signedness)
  drivers/irqchip/irq-crossbar.c:245:56:    expected unsigned int [usertype] *out_value
  drivers/irqchip/irq-crossbar.c:245:56:    got int *<noident>

  drivers/irqchip/irq-crossbar.c:263:56: warning: incorrect type in argument 4 (different signedness)
  drivers/irqchip/irq-crossbar.c:263:56:    expected unsigned int [usertype] *out_value
  drivers/irqchip/irq-crossbar.c:263:56:    got int *<noident>

Signed-off-by: Franck Demathieu <fdemathieu@gmail.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 drivers/irqchip/irq-crossbar.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/irqchip/irq-crossbar.c b/drivers/irqchip/irq-crossbar.c
index 1eef56a89b1fbf..05bbf171df3772 100644
--- a/drivers/irqchip/irq-crossbar.c
+++ b/drivers/irqchip/irq-crossbar.c
@@ -198,7 +198,8 @@ static const struct irq_domain_ops crossbar_domain_ops = {
 
 static int __init crossbar_of_init(struct device_node *node)
 {
-	int i, size, max = 0, reserved = 0, entry;
+	int i, size, reserved = 0;
+	u32 max = 0, entry;
 	const __be32 *irqsr;
 	int ret = -ENOMEM;
 

From b3e228473e6cec7cf83b4025b4570c8066ab2dd8 Mon Sep 17 00:00:00 2001
From: Mian Yousaf Kaukab <yousaf.kaukab@suse.com>
Date: Thu, 2 Mar 2017 16:11:47 +0100
Subject: [PATCH 0801/1911] irqdomain: Add empty irq_domain_check_msi_remap

Fix following build error for s390:
drivers/vfio/vfio_iommu_type1.c: In function 'vfio_iommu_type1_attach_group':
drivers/vfio/vfio_iommu_type1.c:1290:25: error: implicit declaration of function 'irq_domain_check_msi_remap'

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Mian Yousaf Kaukab <yousaf.kaukab@suse.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 include/linux/irqdomain.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 188eced6813edd..9f3616085423cf 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -524,6 +524,10 @@ static inline struct irq_domain *irq_find_matching_fwnode(
 {
 	return NULL;
 }
+static inline bool irq_domain_check_msi_remap(void)
+{
+	return false;
+}
 #endif /* !CONFIG_IRQ_DOMAIN */
 
 #endif /* _LINUX_IRQDOMAIN_H */

From 38355b2a44776c25b0f2ad466e8c51bb805b3032 Mon Sep 17 00:00:00 2001
From: John Keeping <john@metanate.com>
Date: Tue, 28 Feb 2017 10:55:30 +0000
Subject: [PATCH 0802/1911] usb: gadget: configs: plug memory leak

When binding a gadget to a device, "name" is stored in gi->udc_name, but
this does not happen when unregistering and the string is leaked.

Signed-off-by: John Keeping <john@metanate.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/gadget/configfs.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c
index 78c44979dde382..cbff3b02840df9 100644
--- a/drivers/usb/gadget/configfs.c
+++ b/drivers/usb/gadget/configfs.c
@@ -269,6 +269,7 @@ static ssize_t gadget_dev_desc_UDC_store(struct config_item *item,
 		ret = unregister_gadget(gi);
 		if (ret)
 			goto err;
+		kfree(name);
 	} else {
 		if (gi->composite.gadget_driver.udc_name) {
 			ret = -EBUSY;

From 73561128eb72ca04c3be6e240f162a861bf68a86 Mon Sep 17 00:00:00 2001
From: Franck Demathieu <fdemathieu@gmail.com>
Date: Mon, 27 Feb 2017 11:52:46 +0100
Subject: [PATCH 0803/1911] usb: dwc3: Fix incorrect type for utmi mode

The utmi mode is unsigned according the dt-bindings.
Fix sparse issue (-Wtypesign):

  drivers/usb/dwc3/dwc3-omap.c:391:50: warning: incorrect type in argument 3 (different signedness)
  drivers/usb/dwc3/dwc3-omap.c:391:50:    expected unsigned int [usertype] *out_value
  drivers/usb/dwc3/dwc3-omap.c:391:50:    got int *<noident>

Signed-off-by: Franck Demathieu <fdemathieu@gmail.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/dwc3/dwc3-omap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/dwc3/dwc3-omap.c b/drivers/usb/dwc3/dwc3-omap.c
index 2092e46b1380e9..4a595777969ed1 100644
--- a/drivers/usb/dwc3/dwc3-omap.c
+++ b/drivers/usb/dwc3/dwc3-omap.c
@@ -392,7 +392,7 @@ static void dwc3_omap_set_utmi_mode(struct dwc3_omap *omap)
 {
 	u32			reg;
 	struct device_node	*node = omap->dev->of_node;
-	int			utmi_mode = 0;
+	u32			utmi_mode = 0;
 
 	reg = dwc3_omap_read_utmi_ctrl(omap);
 

From 4242820277b5378c9e4064e79306f326d731472f Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 22 Feb 2017 11:33:27 +0100
Subject: [PATCH 0804/1911] usb: gadget: udc: atmel: fix debug output

The debug output now contains the wrong variable, as seen from the compiler
warning:

drivers/usb/gadget/udc/atmel_usba_udc.c: In function 'usba_ep_enable':
drivers/usb/gadget/udc/atmel_usba_udc.c:632:550: error: 'ept_cfg' may be used uninitialized in this function [-Werror=maybe-uninitialized]
  DBG(DBG_ERR, "%s: EPT_CFG = 0x%lx (maxpacket = %lu)\n",

This changes the debug output the same way as the other code.

Fixes: 741d2558bf0a ("usb: gadget: udc: atmel: Update endpoint allocation scheme")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Acked-by: Nicolas Ferre <nicolas.ferre@microchip.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/gadget/udc/atmel_usba_udc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/gadget/udc/atmel_usba_udc.c b/drivers/usb/gadget/udc/atmel_usba_udc.c
index 11bbce28bc231b..2035906b8ced17 100644
--- a/drivers/usb/gadget/udc/atmel_usba_udc.c
+++ b/drivers/usb/gadget/udc/atmel_usba_udc.c
@@ -610,7 +610,7 @@ usba_ep_enable(struct usb_ep *_ep, const struct usb_endpoint_descriptor *desc)
 {
 	struct usba_ep *ep = to_usba_ep(_ep);
 	struct usba_udc *udc = ep->udc;
-	unsigned long flags, ept_cfg, maxpacket;
+	unsigned long flags, maxpacket;
 	unsigned int nr_trans;
 
 	DBG(DBG_GADGET, "%s: ep_enable: desc=%p\n", ep->ep.name, desc);
@@ -630,7 +630,7 @@ usba_ep_enable(struct usb_ep *_ep, const struct usb_endpoint_descriptor *desc)
 	ep->is_in = 0;
 
 	DBG(DBG_ERR, "%s: EPT_CFG = 0x%lx (maxpacket = %lu)\n",
-			ep->ep.name, ept_cfg, maxpacket);
+			ep->ep.name, ep->ept_cfg, maxpacket);
 
 	if (usb_endpoint_dir_in(desc)) {
 		ep->is_in = 1;

From b6e7aeeaf235901c42ec35de4633c7c69501d303 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Tue, 21 Feb 2017 22:33:11 +0100
Subject: [PATCH 0805/1911] USB: gadgetfs: Fix a potential memory leak in
 'dev_config()'

'kbuf' is allocated just a few lines above using 'memdup_user()'.
If the 'if (dev->buf)' test fails, this memory is never released.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/gadget/legacy/inode.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c
index a2615d64d07c19..0513dfa008e637 100644
--- a/drivers/usb/gadget/legacy/inode.c
+++ b/drivers/usb/gadget/legacy/inode.c
@@ -1782,8 +1782,10 @@ dev_config (struct file *fd, const char __user *buf, size_t len, loff_t *ptr)
 
 	spin_lock_irq (&dev->lock);
 	value = -EINVAL;
-	if (dev->buf)
+	if (dev->buf) {
+		kfree(kbuf);
 		goto fail;
+	}
 	dev->buf = kbuf;
 
 	/* full or low speed config */

From 3ba534df815f233535b5a3dd3de41055666bbd21 Mon Sep 17 00:00:00 2001
From: Janusz Dziedzic <januszx.dziedzic@linux.intel.com>
Date: Thu, 16 Feb 2017 14:54:12 +0100
Subject: [PATCH 0806/1911] Revert "usb: gadget: f_fs: Fix ExtCompat descriptor
 validation"

This reverts commit ac670a3a650b899fc020b81f63e810d06015b865.

This introduce bug we already fixed in
commit 53642399aa71 ("usb: gadget: f_fs: Fix wrong check on reserved1 wof OS_DESC_EXT_COMPAT")

Next FFS (adb) SS enumeration fail with Windows OS.

Signed-off-by: Janusz Dziedzic <januszx.dziedzic@linux.intel.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/gadget/function/f_fs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index 7e976f00cb0277..a0085571824d9b 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -2264,7 +2264,7 @@ static int __ffs_data_do_os_desc(enum ffs_os_desc_type type,
 
 		if (len < sizeof(*d) ||
 		    d->bFirstInterfaceNumber >= ffs->interfaces_count ||
-		    d->Reserved1)
+		    !d->Reserved1)
 			return -EINVAL;
 		for (i = 0; i < ARRAY_SIZE(d->Reserved2); ++i)
 			if (d->Reserved2[i])

From 1551e35ea4189c1f7199fe278395fc94196715f2 Mon Sep 17 00:00:00 2001
From: Roger Quadros <rogerq@ti.com>
Date: Wed, 15 Feb 2017 14:16:26 +0200
Subject: [PATCH 0807/1911] usb: dwc3: gadget: Fix system suspend/resume on TI
 platforms

On TI platforms (dra7, am437x), the DWC3_DSTS_DEVCTRLHLT bit is not set
after the device controller is stopped via DWC3_DCTL_RUN_STOP.

If we don't disconnect and stop the gadget, it stops working after a
system resume with the trace below.

There is no point in preventing gadget disconnect and gadget stop during
system suspend/resume as we're going to suspend in any case, whether
DEVCTRLHLT timed out or not.

[  141.727480] ------------[ cut here ]------------
[  141.732349] WARNING: CPU: 1 PID: 2135 at drivers/usb/dwc3/gadget.c:2384 dwc3_stop_active_transfer.constprop.4+0xc4/0xe4 [dwc3]
[  141.744299] Modules linked in: usb_f_ss_lb g_zero libcomposite xhci_plat_hcd xhci_hcd usbcore dwc3 evdev udc_core m25p80 usb_common spi_nor snd_soc_davinci_mcasp snd_soc_simple_card snd_soc_edma snd_soc_tlv3e
[  141.792163] CPU: 1 PID: 2135 Comm: irq/456-dwc3 Not tainted 4.10.0-rc8 #1138
[  141.799547] Hardware name: Generic DRA74X (Flattened Device Tree)
[  141.805940] [<c01101b4>] (unwind_backtrace) from [<c010c31c>] (show_stack+0x10/0x14)
[  141.814066] [<c010c31c>] (show_stack) from [<c04a0918>] (dump_stack+0xac/0xe0)
[  141.821648] [<c04a0918>] (dump_stack) from [<c013708c>] (__warn+0xd8/0x104)
[  141.828955] [<c013708c>] (__warn) from [<c0137164>] (warn_slowpath_null+0x20/0x28)
[  141.836902] [<c0137164>] (warn_slowpath_null) from [<bf27784c>] (dwc3_stop_active_transfer.constprop.4+0xc4/0xe4 [dwc3])
[  141.848329] [<bf27784c>] (dwc3_stop_active_transfer.constprop.4 [dwc3]) from [<bf27ab14>] (__dwc3_gadget_ep_disable+0x64/0x528 [dwc3])
[  141.861034] [<bf27ab14>] (__dwc3_gadget_ep_disable [dwc3]) from [<bf27c27c>] (dwc3_gadget_ep_disable+0x3c/0xc8 [dwc3])
[  141.872280] [<bf27c27c>] (dwc3_gadget_ep_disable [dwc3]) from [<bf23b428>] (usb_ep_disable+0x11c/0x18c [udc_core])
[  141.883160] [<bf23b428>] (usb_ep_disable [udc_core]) from [<bf342774>] (disable_ep+0x18/0x54 [usb_f_ss_lb])
[  141.893408] [<bf342774>] (disable_ep [usb_f_ss_lb]) from [<bf3437b0>] (disable_endpoints+0x18/0x50 [usb_f_ss_lb])
[  141.904168] [<bf3437b0>] (disable_endpoints [usb_f_ss_lb]) from [<bf343814>] (disable_source_sink+0x2c/0x34 [usb_f_ss_lb])
[  141.915771] [<bf343814>] (disable_source_sink [usb_f_ss_lb]) from [<bf329a9c>] (reset_config+0x48/0x7c [libcomposite])
[  141.927012] [<bf329a9c>] (reset_config [libcomposite]) from [<bf329afc>] (composite_disconnect+0x2c/0x54 [libcomposite])
[  141.938444] [<bf329afc>] (composite_disconnect [libcomposite]) from [<bf23d7dc>] (usb_gadget_udc_reset+0x10/0x34 [udc_core])
[  141.950237] [<bf23d7dc>] (usb_gadget_udc_reset [udc_core]) from [<bf276d70>] (dwc3_gadget_reset_interrupt+0x64/0x698 [dwc3])
[  141.962022] [<bf276d70>] (dwc3_gadget_reset_interrupt [dwc3]) from [<bf27952c>] (dwc3_thread_interrupt+0x618/0x1a3c [dwc3])
[  141.973723] [<bf27952c>] (dwc3_thread_interrupt [dwc3]) from [<c01a7ce8>] (irq_thread_fn+0x1c/0x54)
[  141.983215] [<c01a7ce8>] (irq_thread_fn) from [<c01a7fbc>] (irq_thread+0x120/0x1f0)
[  141.991247] [<c01a7fbc>] (irq_thread) from [<c015ba14>] (kthread+0xf8/0x138)
[  141.998641] [<c015ba14>] (kthread) from [<c01078f0>] (ret_from_fork+0x14/0x24)
[  142.006213] ---[ end trace b4ecfe9f175b9a9c ]---

Signed-off-by: Roger Quadros <rogerq@ti.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/dwc3/gadget.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index f875b3f4b0ec38..3db5eeae2bfb36 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -3291,15 +3291,10 @@ void dwc3_gadget_exit(struct dwc3 *dwc)
 
 int dwc3_gadget_suspend(struct dwc3 *dwc)
 {
-	int ret;
-
 	if (!dwc->gadget_driver)
 		return 0;
 
-	ret = dwc3_gadget_run_stop(dwc, false, false);
-	if (ret < 0)
-		return ret;
-
+	dwc3_gadget_run_stop(dwc, false, false);
 	dwc3_disconnect_gadget(dwc);
 	__dwc3_gadget_stop(dwc);
 

From 0913750f9fb6f26bcd00c8f9dd9a8d1b8d031246 Mon Sep 17 00:00:00 2001
From: Roger Quadros <rogerq@ti.com>
Date: Wed, 15 Feb 2017 13:38:22 +0200
Subject: [PATCH 0808/1911] usb: dwc3-omap: Fix missing break in
 dwc3_omap_set_mailbox()

We need to break from all cases if we want to treat
each one of them separately.

Reported-by: Gustavo A. R. Silva <garsilva@embeddedor.com>
Fixes: d2728fb3e01f ("usb: dwc3: omap: Pass VBUS and ID events transparently")
Cc: <stable@vger.kernel.org> #v4.8+
Signed-off-by: Roger Quadros <rogerq@ti.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/dwc3/dwc3-omap.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/dwc3/dwc3-omap.c b/drivers/usb/dwc3/dwc3-omap.c
index 4a595777969ed1..f8d0747810e78d 100644
--- a/drivers/usb/dwc3/dwc3-omap.c
+++ b/drivers/usb/dwc3/dwc3-omap.c
@@ -250,6 +250,7 @@ static void dwc3_omap_set_mailbox(struct dwc3_omap *omap,
 		val = dwc3_omap_read_utmi_ctrl(omap);
 		val |= USBOTGSS_UTMI_OTG_CTRL_IDDIG;
 		dwc3_omap_write_utmi_ctrl(omap, val);
+		break;
 
 	case OMAP_DWC3_VBUS_OFF:
 		val = dwc3_omap_read_utmi_ctrl(omap);

From df7545719a14fa7b481896fb8689e23d0a00f682 Mon Sep 17 00:00:00 2001
From: Petr Cvek <petr.cvek@tul.cz>
Date: Fri, 24 Feb 2017 02:54:56 +0100
Subject: [PATCH 0809/1911] usb: gadget: pxa27x: Test for a valid argument
 pointer

A call usb_put_phy(udc->transceiver) must be tested for a valid pointer.
Use an already existing test for usb_unregister_notifier call.

Acked-by: Robert Jarzmik <robert.jarzmik@free.fr>
Reported-by: Robert Jarzmik <robert.jarzmik@free.fr>
Signed-off-by: Petr Cvek <petr.cvek@tul.cz>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/gadget/udc/pxa27x_udc.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/gadget/udc/pxa27x_udc.c b/drivers/usb/gadget/udc/pxa27x_udc.c
index e1335ad5bce9f2..832c4fdbe98512 100644
--- a/drivers/usb/gadget/udc/pxa27x_udc.c
+++ b/drivers/usb/gadget/udc/pxa27x_udc.c
@@ -2534,9 +2534,10 @@ static int pxa_udc_remove(struct platform_device *_dev)
 	usb_del_gadget_udc(&udc->gadget);
 	pxa_cleanup_debugfs(udc);
 
-	if (!IS_ERR_OR_NULL(udc->transceiver))
+	if (!IS_ERR_OR_NULL(udc->transceiver)) {
 		usb_unregister_notifier(udc->transceiver, &pxa27x_udc_phy);
-	usb_put_phy(udc->transceiver);
+		usb_put_phy(udc->transceiver);
+	}
 
 	udc->transceiver = NULL;
 	the_controller = NULL;

From ef5e2fa9f65befa12f1113c734602d2c1964d2a5 Mon Sep 17 00:00:00 2001
From: Raz Manor <Raz.Manor@valens.com>
Date: Thu, 9 Feb 2017 09:41:08 +0200
Subject: [PATCH 0810/1911] usb: gadget: udc: net2280: Fix tmp reusage in
 net2280 driver

In the function scan_dma_completions() there is a reusage of tmp
variable. That coused a wrong value being used in some case when
reading a short packet terminated transaction from an endpoint,
in 2 concecutive reads.

This was my logic for the patch:

The req->td->dmadesc equals to 0 iff:
-- There was a transaction ending with a short packet, and
-- The read() to read it was shorter than the transaction length, and
-- The read() to complete it is longer than the residue.
I believe this is true from the printouts of various cases,
but I can't be positive it is correct.

Entering this if, there should be no more data in the endpoint
(a short packet terminated the transaction).
If there is, the transaction wasn't really done and we should exit and
wait for it to finish entirely. That is the inner if.
That inner if should never happen, but it is there to be on the safe
side. That is why it is marked with the comment /* paranoia */.
The size of the data available in the endpoint is ep->dma->dmacount
and it is read to tmp.
This entire clause is based on my own educated guesses.

If we passed that inner if without breaking in the original code,
than tmp & DMA_BYTE_MASK_COUNT== 0.
That means we will always pass dma bytes count of 0 to dma_done(),
meaning all the requested bytes were read.

dma_done() reports back to the upper layer that the request (read())
was done and how many bytes were read.
In the original code that would always be the request size,
regardless of the actual size of the data.
That did not make sense to me at all.

However, the original value of tmp is req->td->dmacount,
which is the dmacount value when the request's dma transaction was
finished. And that is a much more reasonable value to report back to
the caller.

To recreate the problem:
Read from a bulk out endpoint in a loop, 1024 * n bytes in each
iteration.
Connect the PLX to a host you can control.
Send to that endpoint 1024 * n + x bytes,
such that 0 < x < 1024 * n and (x % 1024) != 0
You would expect the first read() to return 1024 * n
and the second read() to return x.
But you will get the first read to return 1024 * n
and the second one to return 1024 * n.
That is true for every positive integer n.

Cc: Felipe Balbi <balbi@kernel.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: linux-usb@vger.kernel.org
Signed-off-by: Raz Manor <Raz.Manor@valens.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/gadget/udc/net2280.c | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/drivers/usb/gadget/udc/net2280.c b/drivers/usb/gadget/udc/net2280.c
index 85504419ab312e..3828c2ec8623b1 100644
--- a/drivers/usb/gadget/udc/net2280.c
+++ b/drivers/usb/gadget/udc/net2280.c
@@ -1146,15 +1146,15 @@ static int scan_dma_completions(struct net2280_ep *ep)
 	 */
 	while (!list_empty(&ep->queue)) {
 		struct net2280_request	*req;
-		u32			tmp;
+		u32 req_dma_count;
 
 		req = list_entry(ep->queue.next,
 				struct net2280_request, queue);
 		if (!req->valid)
 			break;
 		rmb();
-		tmp = le32_to_cpup(&req->td->dmacount);
-		if ((tmp & BIT(VALID_BIT)) != 0)
+		req_dma_count = le32_to_cpup(&req->td->dmacount);
+		if ((req_dma_count & BIT(VALID_BIT)) != 0)
 			break;
 
 		/* SHORT_PACKET_TRANSFERRED_INTERRUPT handles "usb-short"
@@ -1163,40 +1163,41 @@ static int scan_dma_completions(struct net2280_ep *ep)
 		 */
 		if (unlikely(req->td->dmadesc == 0)) {
 			/* paranoia */
-			tmp = readl(&ep->dma->dmacount);
-			if (tmp & DMA_BYTE_COUNT_MASK)
+			u32 const ep_dmacount = readl(&ep->dma->dmacount);
+
+			if (ep_dmacount & DMA_BYTE_COUNT_MASK)
 				break;
 			/* single transfer mode */
-			dma_done(ep, req, tmp, 0);
+			dma_done(ep, req, req_dma_count, 0);
 			num_completed++;
 			break;
 		} else if (!ep->is_in &&
 			   (req->req.length % ep->ep.maxpacket) &&
 			   !(ep->dev->quirks & PLX_PCIE)) {
 
-			tmp = readl(&ep->regs->ep_stat);
+			u32 const ep_stat = readl(&ep->regs->ep_stat);
 			/* AVOID TROUBLE HERE by not issuing short reads from
 			 * your gadget driver.  That helps avoids errata 0121,
 			 * 0122, and 0124; not all cases trigger the warning.
 			 */
-			if ((tmp & BIT(NAK_OUT_PACKETS)) == 0) {
+			if ((ep_stat & BIT(NAK_OUT_PACKETS)) == 0) {
 				ep_warn(ep->dev, "%s lost packet sync!\n",
 						ep->ep.name);
 				req->req.status = -EOVERFLOW;
 			} else {
-				tmp = readl(&ep->regs->ep_avail);
-				if (tmp) {
+				u32 const ep_avail = readl(&ep->regs->ep_avail);
+				if (ep_avail) {
 					/* fifo gets flushed later */
 					ep->out_overflow = 1;
 					ep_dbg(ep->dev,
 						"%s dma, discard %d len %d\n",
-						ep->ep.name, tmp,
+						ep->ep.name, ep_avail,
 						req->req.length);
 					req->req.status = -EOVERFLOW;
 				}
 			}
 		}
-		dma_done(ep, req, tmp, 0);
+		dma_done(ep, req, req_dma_count, 0);
 		num_completed++;
 	}
 

From 4bd035eae255a4f1464dca063885fa415e58c12e Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 23 Feb 2017 00:26:09 +0000
Subject: [PATCH 0811/1911] EDAC, xgene: Fix wrongly spelled "procesing"

Fix spelling mistake in dev_err message.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Reviewed-by: Loc Ho <lho@apm.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: http://lkml.kernel.org/r/20170223002609.9440-1-colin.king@canonical.com
Signed-off-by: Borislav Petkov <bp@suse.de>
---
 drivers/edac/xgene_edac.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/edac/xgene_edac.c b/drivers/edac/xgene_edac.c
index 6c270d9d304a8e..669246056812e8 100644
--- a/drivers/edac/xgene_edac.c
+++ b/drivers/edac/xgene_edac.c
@@ -1596,7 +1596,7 @@ static void xgene_edac_pa_report(struct edac_device_ctl_info *edac_dev)
 	reg = readl(ctx->dev_csr + IOBPATRANSERRINTSTS);
 	if (!reg)
 		goto chk_iob_axi0;
-	dev_err(edac_dev->dev, "IOB procesing agent (PA) transaction error\n");
+	dev_err(edac_dev->dev, "IOB processing agent (PA) transaction error\n");
 	if (reg & IOBPA_RDATA_CORRUPT_MASK)
 		dev_err(edac_dev->dev, "IOB PA read data RAM error\n");
 	if (reg & IOBPA_M_RDATA_CORRUPT_MASK)

From 587d7e72aedca91cee80c0a56811649c3efab765 Mon Sep 17 00:00:00 2001
From: Jim Mattson <jmattson@google.com>
Date: Thu, 2 Mar 2017 12:41:48 -0800
Subject: [PATCH 0812/1911] kvm: nVMX: VMCLEAR should not cause the vCPU to
 shut down
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

VMCLEAR should silently ignore a failure to clear the launch state of
the VMCS referenced by the operand.

Signed-off-by: Jim Mattson <jmattson@google.com>
[Changed "kvm_write_guest(vcpu->kvm" to "kvm_vcpu_write_guest(vcpu".]
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/vmx.c | 22 ++++------------------
 1 file changed, 4 insertions(+), 18 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 283aa860183350..3b626d6dc3ac1a 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -7258,9 +7258,8 @@ static int handle_vmoff(struct kvm_vcpu *vcpu)
 static int handle_vmclear(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	u32 zero = 0;
 	gpa_t vmptr;
-	struct vmcs12 *vmcs12;
-	struct page *page;
 
 	if (!nested_vmx_check_permission(vcpu))
 		return 1;
@@ -7271,22 +7270,9 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
 	if (vmptr == vmx->nested.current_vmptr)
 		nested_release_vmcs12(vmx);
 
-	page = nested_get_page(vcpu, vmptr);
-	if (page == NULL) {
-		/*
-		 * For accurate processor emulation, VMCLEAR beyond available
-		 * physical memory should do nothing at all. However, it is
-		 * possible that a nested vmx bug, not a guest hypervisor bug,
-		 * resulted in this case, so let's shut down before doing any
-		 * more damage:
-		 */
-		kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
-		return 1;
-	}
-	vmcs12 = kmap(page);
-	vmcs12->launch_state = 0;
-	kunmap(page);
-	nested_release_page(page);
+	kvm_vcpu_write_guest(vcpu,
+			vmptr + offsetof(struct vmcs12, launch_state),
+			&zero, sizeof(zero));
 
 	nested_free_vmcs02(vmx, vmptr);
 

From c771c14baa3319c85512e575671bf0bb18824c11 Mon Sep 17 00:00:00 2001
From: Eryu Guan <eguan@redhat.com>
Date: Thu, 2 Mar 2017 15:02:06 -0800
Subject: [PATCH 0813/1911] iomap: invalidate page caches should be after
 iomap_dio_complete() in direct write

After XFS switching to iomap based DIO (commit acdda3aae146 ("xfs:
use iomap_dio_rw")), I started to notice dio29/dio30 tests failures
from LTP run on ppc64 hosts, and they can be reproduced on x86_64
hosts with 512B/1k block size XFS too.

dio29	diotest3 -b 65536 -n 100 -i 1000 -o 1024000
dio30	diotest6 -b 65536 -n 100 -i 1000 -o 1024000

The failure message is like:
bufcmp: offset 0: Expected: 0x62, got 0x0
diotest03    1  TPASS  :  Read with Direct IO, Write without
diotest03    2  TFAIL  :  diotest3.c:142: comparsion failed; child=98 offset=1425408
diotest03    3  TFAIL  :  diotest3.c:194: Write Direct-child 98 failed

Direct write wrote 0x62 but buffer read got zero. This is because,
when doing direct write to a hole or preallocated file, we
invalidate the page caches before converting the extent from
unwritten state to normal state, which is done by
iomap_dio_complete(), thus leave a window for other buffer reader to
cache the unwritten state extent.

Consider this case, with sub-page blocksize XFS, two processes are
direct writing to different blocksize-aligned regions (say 512B) of
the same preallocated file, and reading the region back via buffered
I/O to compare contents.

process A, region [0,512]		process B, region [512,1024]
xfs_file_write_iter
 xfs_file_aio_dio_write
  iomap_dio_rw
   iomap_apply
   invalidate_inode_pages2_range
   					xfs_file_write_iter
				 	xfs_file_aio_dio_write
					  iomap_dio_rw
					   iomap_apply
					   invalidate_inode_pages2_range
					   iomap_dio_complete
					xfs_file_read_iter
					 xfs_file_buffered_aio_read
					  generic_file_read_iter
					   do_generic_file_read
					    <readahead fills pagecache with 0>
   iomap_dio_complete
xfs_file_read_iter
 <read gets 0 from pagecache>

Process A first invalidates page caches, at this point the
underlying extent is still in unwritten state (iomap_dio_complete
not called yet), and process B finishs direct write and populates
page caches via readahead, which caches zeros in page for region A,
then process A reads zeros from page cache, instead of the actual
data.

Fix it by invalidating page caches after converting unwritten extent
to make sure we read content from disk after extent state changed,
as what we did before switching to iomap based dio.

Also introduce a new 'start' variable to save the original write
offset (iomap_dio_complete() updates iocb->ki_pos), and a 'err'
variable for invalidating caches result, cause we can't reuse 'ret'
anymore.

Signed-off-by: Eryu Guan <eguan@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/iomap.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/fs/iomap.c b/fs/iomap.c
index 3ca1a8e44135ed..141c3cd55a8b2d 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -846,7 +846,8 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 	struct address_space *mapping = iocb->ki_filp->f_mapping;
 	struct inode *inode = file_inode(iocb->ki_filp);
 	size_t count = iov_iter_count(iter);
-	loff_t pos = iocb->ki_pos, end = iocb->ki_pos + count - 1, ret = 0;
+	loff_t pos = iocb->ki_pos, start = pos;
+	loff_t end = iocb->ki_pos + count - 1, ret = 0;
 	unsigned int flags = IOMAP_DIRECT;
 	struct blk_plug plug;
 	struct iomap_dio *dio;
@@ -887,12 +888,12 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 	}
 
 	if (mapping->nrpages) {
-		ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, end);
+		ret = filemap_write_and_wait_range(mapping, start, end);
 		if (ret)
 			goto out_free_dio;
 
 		ret = invalidate_inode_pages2_range(mapping,
-				iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT);
+				start >> PAGE_SHIFT, end >> PAGE_SHIFT);
 		WARN_ON_ONCE(ret);
 		ret = 0;
 	}
@@ -941,6 +942,8 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 		__set_current_state(TASK_RUNNING);
 	}
 
+	ret = iomap_dio_complete(dio);
+
 	/*
 	 * Try again to invalidate clean pages which might have been cached by
 	 * non-direct readahead, or faulted in by get_user_pages() if the source
@@ -949,12 +952,12 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 	 * this invalidation fails, tough, the write still worked...
 	 */
 	if (iov_iter_rw(iter) == WRITE && mapping->nrpages) {
-		ret = invalidate_inode_pages2_range(mapping,
-				iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT);
-		WARN_ON_ONCE(ret);
+		int err = invalidate_inode_pages2_range(mapping,
+				start >> PAGE_SHIFT, end >> PAGE_SHIFT);
+		WARN_ON_ONCE(err);
 	}
 
-	return iomap_dio_complete(dio);
+	return ret;
 
 out_free_dio:
 	kfree(dio);

From 312eb712e15868236dd03c67971ab2c1d79b4ce6 Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Fri, 17 Feb 2017 18:44:11 +0100
Subject: [PATCH 0814/1911] cgroup: Fix indenting in PID controller
 documentation

Follow the common documentation style in the file and indent the
interface file description by a tab instead of just a space.

Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 Documentation/cgroup-v2.txt | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/Documentation/cgroup-v2.txt b/Documentation/cgroup-v2.txt
index 3b8449f8ac7e80..49d7c997fa1ee7 100644
--- a/Documentation/cgroup-v2.txt
+++ b/Documentation/cgroup-v2.txt
@@ -1142,16 +1142,17 @@ used by the kernel.
 
   pids.max
 
- A read-write single value file which exists on non-root cgroups.  The
- default is "max".
+	A read-write single value file which exists on non-root
+	cgroups.  The default is "max".
 
- Hard limit of number of processes.
+	Hard limit of number of processes.
 
   pids.current
 
- A read-only single value file which exists on all cgroups.
+	A read-only single value file which exists on all cgroups.
 
- The number of processes currently in the cgroup and its descendants.
+	The number of processes currently in the cgroup and its
+	descendants.
 
 Organisational operations are not blocked by cgroup policies, so it is
 possible to have pids.current > pids.max.  This can be done by either

From 1d18c2747f937f1b5ec65ce6bf4ccb9ca1aea9e8 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 1 Mar 2017 15:39:07 -0500
Subject: [PATCH 0815/1911] cgroup/pids: remove spurious suspicious RCU usage
 warning

pids_can_fork() is special in that the css association is guaranteed
to be stable throughout the function and thus doesn't need RCU
protection around task_css access.  When determining the css to charge
the pid, task_css_check() is used to override the RCU sanity check.

While adding a warning message on fork rejection from pids limit,
135b8b37bd91 ("cgroup: Add pids controller event when fork fails
because of pid limit") incorrectly added a task_css access which is
neither RCU protected or explicitly annotated.  This triggers the
following suspicious RCU usage warning when RCU debugging is enabled.

  cgroup: fork rejected by pids controller in

  ===============================
  [ ERR: suspicious RCU usage.  ]
  4.10.0-work+ #1 Not tainted
  -------------------------------
  ./include/linux/cgroup.h:435 suspicious rcu_dereference_check() usage!

  other info that might help us debug this:

  rcu_scheduler_active = 2, debug_locks = 0
  1 lock held by bash/1748:
   #0:  (&cgroup_threadgroup_rwsem){+++++.}, at: [<ffffffff81052c96>] _do_fork+0xe6/0x6e0

  stack backtrace:
  CPU: 3 PID: 1748 Comm: bash Not tainted 4.10.0-work+ #1
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.9.3-1.fc25 04/01/2014
  Call Trace:
   dump_stack+0x68/0x93
   lockdep_rcu_suspicious+0xd7/0x110
   pids_can_fork+0x1c7/0x1d0
   cgroup_can_fork+0x67/0xc0
   copy_process.part.58+0x1709/0x1e90
   _do_fork+0xe6/0x6e0
   SyS_clone+0x19/0x20
   do_syscall_64+0x5c/0x140
   entry_SYSCALL64_slow_path+0x25/0x25
  RIP: 0033:0x7f7853fab93a
  RSP: 002b:00007ffc12d05c90 EFLAGS: 00000246 ORIG_RAX: 0000000000000038
  RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f7853fab93a
  RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000001200011
  RBP: 00007ffc12d05cc0 R08: 0000000000000000 R09: 00007f78548db700
  R10: 00007f78548db9d0 R11: 0000000000000246 R12: 00000000000006d4
  R13: 0000000000000001 R14: 0000000000000000 R15: 000055e3ebe2c04d
  /asdf

There's no reason to dereference task_css again here when the
associated css is already available.  Fix it by replacing the
task_cgroup() call with css->cgroup.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Mike Galbraith <efault@gmx.de>
Fixes: 135b8b37bd91 ("cgroup: Add pids controller event when fork fails because of pid limit")
Cc: Kenny Yu <kennyyu@fb.com>
Cc: stable@vger.kernel.org # v4.8+
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/cgroup/pids.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/cgroup/pids.c b/kernel/cgroup/pids.c
index e756dae493008e..2237201d66d5da 100644
--- a/kernel/cgroup/pids.c
+++ b/kernel/cgroup/pids.c
@@ -229,7 +229,7 @@ static int pids_can_fork(struct task_struct *task)
 		/* Only log the first time events_limit is incremented. */
 		if (atomic64_inc_return(&pids->events_limit) == 1) {
 			pr_info("cgroup: fork rejected by pids controller in ");
-			pr_cont_cgroup_path(task_cgroup(current, pids_cgrp_id));
+			pr_cont_cgroup_path(css->cgroup);
 			pr_cont("\n");
 		}
 		cgroup_file_notify(&pids->events_file);

From b6a6759daf55dade2b65089957832759d502acfb Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Sat, 25 Feb 2017 01:56:48 -0800
Subject: [PATCH 0816/1911] cgroups: censor kernel pointer in debug files

As found in grsecurity, this avoids exposing a kernel pointer through
the cgroup debug entries.

Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/cgroup/cgroup-v1.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index 56eba9caa632ad..1dc22f6b49f5e0 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -1329,7 +1329,7 @@ static int cgroup_css_links_read(struct seq_file *seq, void *v)
 		struct task_struct *task;
 		int count = 0;
 
-		seq_printf(seq, "css_set %p\n", cset);
+		seq_printf(seq, "css_set %pK\n", cset);
 
 		list_for_each_entry(task, &cset->tasks, cg_list) {
 			if (count++ > MAX_TASKS_SHOWN_PER_CSS)

From d85fc67dd11e9a32966140677d4d6429ca540b25 Mon Sep 17 00:00:00 2001
From: Gwendal Grignou <gwendal@chromium.org>
Date: Fri, 3 Mar 2017 09:00:09 -0800
Subject: [PATCH 0817/1911] libata: transport: Remove circular dependency at
 free time

Without this patch, failed probe would not free resources like irq.

ata port tdev object currently hold a reference to the ata port
object.  Therefore the ata port object release function will not get
called until the ata_tport_release is called. But that would never
happen, releasing the last reference of ata port dev is done by
scsi_host_release, which is called by ata_host_release when the ata
port object is released.

The ata device objects actually do not need to explicitly hold a
reference to their real counterpart, given the transport objects are
the children of these objects and device_add() is call for each child.
We know the parent will not be deleted until we call the child's
device_del().

Reported-by: Matthew Whitehead <tedheadster@gmail.com>
Tested-by: Matthew Whitehead <tedheadster@gmail.com>
Suggested-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Gwendal Grignou <gwendal@chromium.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 drivers/ata/libata-transport.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/drivers/ata/libata-transport.c b/drivers/ata/libata-transport.c
index 46698232e6bff0..19e6e539a061b9 100644
--- a/drivers/ata/libata-transport.c
+++ b/drivers/ata/libata-transport.c
@@ -224,7 +224,6 @@ static DECLARE_TRANSPORT_CLASS(ata_port_class,
 
 static void ata_tport_release(struct device *dev)
 {
-	put_device(dev->parent);
 }
 
 /**
@@ -284,7 +283,7 @@ int ata_tport_add(struct device *parent,
 	device_initialize(dev);
 	dev->type = &ata_port_type;
 
-	dev->parent = get_device(parent);
+	dev->parent = parent;
 	dev->release = ata_tport_release;
 	dev_set_name(dev, "ata%d", ap->print_id);
 	transport_setup_device(dev);
@@ -348,7 +347,6 @@ static DECLARE_TRANSPORT_CLASS(ata_link_class,
 
 static void ata_tlink_release(struct device *dev)
 {
-	put_device(dev->parent);
 }
 
 /**
@@ -410,7 +408,7 @@ int ata_tlink_add(struct ata_link *link)
 	int error;
 
 	device_initialize(dev);
-	dev->parent = get_device(&ap->tdev);
+	dev->parent = &ap->tdev;
 	dev->release = ata_tlink_release;
 	if (ata_is_host_link(link))
 		dev_set_name(dev, "link%d", ap->print_id);
@@ -589,7 +587,6 @@ static DECLARE_TRANSPORT_CLASS(ata_dev_class,
 
 static void ata_tdev_release(struct device *dev)
 {
-	put_device(dev->parent);
 }
 
 /**
@@ -662,7 +659,7 @@ static int ata_tdev_add(struct ata_device *ata_dev)
 	int error;
 
 	device_initialize(dev);
-	dev->parent = get_device(&link->tdev);
+	dev->parent = &link->tdev;
 	dev->release = ata_tdev_release;
 	if (ata_is_host_link(link))
 		dev_set_name(dev, "dev%d.%d", ap->print_id,ata_dev->devno);

From 0580b762a4d6b70817476b90042813f8573283fa Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 6 Mar 2017 15:26:54 -0500
Subject: [PATCH 0818/1911] libata: drop WARN from protocol error in
 ata_sff_qc_issue()

ata_sff_qc_issue() expects upper layers to never issue commands on a
command protocol that it doesn't implement.  While the assumption
holds fine with the usual IO path, nothing filters based on the
command protocol in the passthrough path (which was added later),
allowing the warning to be tripped with a passthrough command with the
right (well, wrong) protocol.

Failing with AC_ERR_SYSTEM is the right thing to do anyway.  Remove
the unnecessary WARN.

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Link: http://lkml.kernel.org/r/CACT4Y+bXkvevNZU8uP6X0QVqsj6wNoUA_1exfTSOzc+SmUtMOA@mail.gmail.com
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 drivers/ata/libata-sff.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index 2bd92dca3e6204..274d6d7193d7ca 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -1482,7 +1482,6 @@ unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc)
 		break;
 
 	default:
-		WARN_ON_ONCE(1);
 		return AC_ERR_SYSTEM;
 	}
 

From 637fdbae60d6cb9f6e963c1079d7e0445c86ff7d Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 6 Mar 2017 15:33:42 -0500
Subject: [PATCH 0819/1911] workqueue: trigger WARN if queue_delayed_work() is
 called with NULL @wq

If queue_delayed_work() gets called with NULL @wq, the kernel will
oops asynchronuosly on timer expiration which isn't too helpful in
tracking down the offender.  This actually happened with smc.

__queue_delayed_work() already does several input sanity checks
synchronously.  Add NULL @wq check.

Reported-by: Dave Jones <davej@codemonkey.org.uk>
Link: http://lkml.kernel.org/r/20170227171439.jshx3qplflyrgcv7@codemonkey.org.uk
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 072cbc9b175dc1..c0168b7da1eaf2 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1507,6 +1507,7 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
 	struct timer_list *timer = &dwork->timer;
 	struct work_struct *work = &dwork->work;
 
+	WARN_ON_ONCE(!wq);
 	WARN_ON_ONCE(timer->function != delayed_work_timer_fn ||
 		     timer->data != (unsigned long)dwork);
 	WARN_ON_ONCE(timer_pending(timer));

From 320661b08dd6f1746d5c7ab4eb435ec64b97cd45 Mon Sep 17 00:00:00 2001
From: Tahsin Erdogan <tahsin@google.com>
Date: Sat, 25 Feb 2017 13:00:19 -0800
Subject: [PATCH 0820/1911] percpu: acquire pcpu_lock when updating
 pcpu_nr_empty_pop_pages

Update to pcpu_nr_empty_pop_pages in pcpu_alloc() is currently done
without holding pcpu_lock. This can lead to bad updates to the variable.
Add missing lock calls.

Fixes: b539b87fed37 ("percpu: implmeent pcpu_nr_empty_pop_pages and chunk->nr_populated")
Signed-off-by: Tahsin Erdogan <tahsin@google.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: stable@vger.kernel.org # v3.18+
---
 mm/percpu.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/mm/percpu.c b/mm/percpu.c
index 5696039b5c0707..60a6488e9e6d49 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1011,8 +1011,11 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
 		mutex_unlock(&pcpu_alloc_mutex);
 	}
 
-	if (chunk != pcpu_reserved_chunk)
+	if (chunk != pcpu_reserved_chunk) {
+		spin_lock_irqsave(&pcpu_lock, flags);
 		pcpu_nr_empty_pop_pages -= occ_pages;
+		spin_unlock_irqrestore(&pcpu_lock, flags);
+	}
 
 	if (pcpu_nr_empty_pop_pages < PCPU_EMPTY_POP_PAGES_LOW)
 		pcpu_schedule_balance_work();

From 8a1df543de8ad879d3c80bdda4c67ac4f82e7ee0 Mon Sep 17 00:00:00 2001
From: Tahsin Erdogan <tahsin@google.com>
Date: Sat, 25 Feb 2017 12:59:26 -0800
Subject: [PATCH 0821/1911] percpu: remove unused chunk_alloc parameter from
 pcpu_get_pages()

pcpu_get_pages() doesn't use chunk_alloc parameter, remove it.

Fixes: fbbb7f4e149f ("percpu: remove the usage of separate populated bitmap in percpu-vm")
Signed-off-by: Tahsin Erdogan <tahsin@google.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 mm/percpu-vm.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/mm/percpu-vm.c b/mm/percpu-vm.c
index 538998a137d24e..9ac639499bd114 100644
--- a/mm/percpu-vm.c
+++ b/mm/percpu-vm.c
@@ -21,7 +21,6 @@ static struct page *pcpu_chunk_page(struct pcpu_chunk *chunk,
 
 /**
  * pcpu_get_pages - get temp pages array
- * @chunk: chunk of interest
  *
  * Returns pointer to array of pointers to struct page which can be indexed
  * with pcpu_page_idx().  Note that there is only one array and accesses
@@ -30,7 +29,7 @@ static struct page *pcpu_chunk_page(struct pcpu_chunk *chunk,
  * RETURNS:
  * Pointer to temp pages array on success.
  */
-static struct page **pcpu_get_pages(struct pcpu_chunk *chunk_alloc)
+static struct page **pcpu_get_pages(void)
 {
 	static struct page **pages;
 	size_t pages_size = pcpu_nr_units * pcpu_unit_pages * sizeof(pages[0]);
@@ -275,7 +274,7 @@ static int pcpu_populate_chunk(struct pcpu_chunk *chunk,
 {
 	struct page **pages;
 
-	pages = pcpu_get_pages(chunk);
+	pages = pcpu_get_pages();
 	if (!pages)
 		return -ENOMEM;
 
@@ -313,7 +312,7 @@ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk,
 	 * successful population attempt so the temp pages array must
 	 * be available now.
 	 */
-	pages = pcpu_get_pages(chunk);
+	pages = pcpu_get_pages();
 	BUG_ON(!pages);
 
 	/* unmap and free */

From 040757f738e13caaa9c5078bca79aa97e11dde88 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 5 Mar 2017 15:03:22 -0600
Subject: [PATCH 0822/1911] ucount: Remove the atomicity from ucount->count

Always increment/decrement ucount->count under the ucounts_lock.  The
increments are there already and moving the decrements there means the
locking logic of the code is simpler.  This simplification in the
locking logic fixes a race between put_ucounts and get_ucounts that
could result in a use-after-free because the count could go zero then
be found by get_ucounts and then be freed by put_ucounts.

A bug presumably this one was found by a combination of syzkaller and
KASAN.  JongWhan Kim reported the syzkaller failure and Dmitry Vyukov
spotted the race in the code.

Cc: stable@vger.kernel.org
Fixes: f6b2db1a3e8d ("userns: Make the count of user namespaces per user")
Reported-by: JongHwan Kim <zzoru007@gmail.com>
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Reviewed-by: Andrei Vagin <avagin@gmail.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/user_namespace.h |  2 +-
 kernel/ucount.c                | 18 +++++++++++-------
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index be765234c0a2b9..32354b4b4b2ba5 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -72,7 +72,7 @@ struct ucounts {
 	struct hlist_node node;
 	struct user_namespace *ns;
 	kuid_t uid;
-	atomic_t count;
+	int count;
 	atomic_t ucount[UCOUNT_COUNTS];
 };
 
diff --git a/kernel/ucount.c b/kernel/ucount.c
index 62630a40ab3a42..b4eeee03934fe8 100644
--- a/kernel/ucount.c
+++ b/kernel/ucount.c
@@ -144,7 +144,7 @@ static struct ucounts *get_ucounts(struct user_namespace *ns, kuid_t uid)
 
 		new->ns = ns;
 		new->uid = uid;
-		atomic_set(&new->count, 0);
+		new->count = 0;
 
 		spin_lock_irq(&ucounts_lock);
 		ucounts = find_ucounts(ns, uid, hashent);
@@ -155,8 +155,10 @@ static struct ucounts *get_ucounts(struct user_namespace *ns, kuid_t uid)
 			ucounts = new;
 		}
 	}
-	if (!atomic_add_unless(&ucounts->count, 1, INT_MAX))
+	if (ucounts->count == INT_MAX)
 		ucounts = NULL;
+	else
+		ucounts->count += 1;
 	spin_unlock_irq(&ucounts_lock);
 	return ucounts;
 }
@@ -165,13 +167,15 @@ static void put_ucounts(struct ucounts *ucounts)
 {
 	unsigned long flags;
 
-	if (atomic_dec_and_test(&ucounts->count)) {
-		spin_lock_irqsave(&ucounts_lock, flags);
+	spin_lock_irqsave(&ucounts_lock, flags);
+	ucounts->count -= 1;
+	if (!ucounts->count)
 		hlist_del_init(&ucounts->node);
-		spin_unlock_irqrestore(&ucounts_lock, flags);
+	else
+		ucounts = NULL;
+	spin_unlock_irqrestore(&ucounts_lock, flags);
 
-		kfree(ucounts);
-	}
+	kfree(ucounts);
 }
 
 static inline bool atomic_inc_below(atomic_t *v, int u)

From bd571195c9535c0b074fc7cd1b541b93817ed647 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 2 Mar 2017 15:58:03 +0100
Subject: [PATCH 0823/1911] scsi: qedi: fix build error without DEBUG_FS

Without CONFIG_DEBUG_FS, we run into a link error:

drivers/scsi/qedi/qedi_iscsi.o: In function `qedi_ep_poll':
qedi_iscsi.c:(.text.qedi_ep_poll+0x134): undefined reference to `do_not_recover'
drivers/scsi/qedi/qedi_iscsi.o: In function `qedi_ep_disconnect':
qedi_iscsi.c:(.text.qedi_ep_disconnect+0x36c): undefined reference to `do_not_recover'
drivers/scsi/qedi/qedi_iscsi.o: In function `qedi_ep_connect':
qedi_iscsi.c:(.text.qedi_ep_connect+0x350): undefined reference to `do_not_recover'
drivers/scsi/qedi/qedi_fw.o: In function `qedi_tmf_work':
qedi_fw.c:(.text.qedi_tmf_work+0x3b4): undefined reference to `do_not_recover'

This defines the symbol as a constant in this case, as there is no way to
set it to anything other than zero without DEBUG_FS. In addition, I'm renaming
it to qedi_do_not_recover in order to put it into a driver specific namespace,
as "do_not_recover" is a really bad name for a kernel-wide global identifier
when it is used only in one driver.

Fixes: ace7f46ba5fd ("scsi: qedi: Add QLogic FastLinQ offload iSCSI driver framework.")
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Manish Rangankar <Manish.Rangankar@cavium.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qedi/qedi_debugfs.c | 16 ++++++++--------
 drivers/scsi/qedi/qedi_fw.c      |  4 ++--
 drivers/scsi/qedi/qedi_gbl.h     |  8 +++++++-
 drivers/scsi/qedi/qedi_iscsi.c   |  8 ++++----
 4 files changed, 21 insertions(+), 15 deletions(-)

diff --git a/drivers/scsi/qedi/qedi_debugfs.c b/drivers/scsi/qedi/qedi_debugfs.c
index 95593627424140..59417199bf363a 100644
--- a/drivers/scsi/qedi/qedi_debugfs.c
+++ b/drivers/scsi/qedi/qedi_debugfs.c
@@ -14,7 +14,7 @@
 #include <linux/debugfs.h>
 #include <linux/module.h>
 
-int do_not_recover;
+int qedi_do_not_recover;
 static struct dentry *qedi_dbg_root;
 
 void
@@ -74,22 +74,22 @@ qedi_dbg_exit(void)
 static ssize_t
 qedi_dbg_do_not_recover_enable(struct qedi_dbg_ctx *qedi_dbg)
 {
-	if (!do_not_recover)
-		do_not_recover = 1;
+	if (!qedi_do_not_recover)
+		qedi_do_not_recover = 1;
 
 	QEDI_INFO(qedi_dbg, QEDI_LOG_DEBUGFS, "do_not_recover=%d\n",
-		  do_not_recover);
+		  qedi_do_not_recover);
 	return 0;
 }
 
 static ssize_t
 qedi_dbg_do_not_recover_disable(struct qedi_dbg_ctx *qedi_dbg)
 {
-	if (do_not_recover)
-		do_not_recover = 0;
+	if (qedi_do_not_recover)
+		qedi_do_not_recover = 0;
 
 	QEDI_INFO(qedi_dbg, QEDI_LOG_DEBUGFS, "do_not_recover=%d\n",
-		  do_not_recover);
+		  qedi_do_not_recover);
 	return 0;
 }
 
@@ -141,7 +141,7 @@ qedi_dbg_do_not_recover_cmd_read(struct file *filp, char __user *buffer,
 	if (*ppos)
 		return 0;
 
-	cnt = sprintf(buffer, "do_not_recover=%d\n", do_not_recover);
+	cnt = sprintf(buffer, "do_not_recover=%d\n", qedi_do_not_recover);
 	cnt = min_t(int, count, cnt - *ppos);
 	*ppos += cnt;
 	return cnt;
diff --git a/drivers/scsi/qedi/qedi_fw.c b/drivers/scsi/qedi/qedi_fw.c
index c9f0ef4e11b33c..2bce3efc66a4b4 100644
--- a/drivers/scsi/qedi/qedi_fw.c
+++ b/drivers/scsi/qedi/qedi_fw.c
@@ -1461,9 +1461,9 @@ static void qedi_tmf_work(struct work_struct *work)
 		  get_itt(tmf_hdr->rtt), get_itt(ctask->itt), cmd->task_id,
 		  qedi_conn->iscsi_conn_id);
 
-	if (do_not_recover) {
+	if (qedi_do_not_recover) {
 		QEDI_ERR(&qedi->dbg_ctx, "DONT SEND CLEANUP/ABORT %d\n",
-			 do_not_recover);
+			 qedi_do_not_recover);
 		goto abort_ret;
 	}
 
diff --git a/drivers/scsi/qedi/qedi_gbl.h b/drivers/scsi/qedi/qedi_gbl.h
index 8e488de88ece9f..63d793f460645d 100644
--- a/drivers/scsi/qedi/qedi_gbl.h
+++ b/drivers/scsi/qedi/qedi_gbl.h
@@ -12,8 +12,14 @@
 
 #include "qedi_iscsi.h"
 
+#ifdef CONFIG_DEBUG_FS
+extern int qedi_do_not_recover;
+#else
+#define qedi_do_not_recover (0)
+#endif
+
 extern uint qedi_io_tracing;
-extern int do_not_recover;
+
 extern struct scsi_host_template qedi_host_template;
 extern struct iscsi_transport qedi_iscsi_transport;
 extern const struct qed_iscsi_ops *qedi_ops;
diff --git a/drivers/scsi/qedi/qedi_iscsi.c b/drivers/scsi/qedi/qedi_iscsi.c
index b9f79d36142d5e..4cc474364c5056 100644
--- a/drivers/scsi/qedi/qedi_iscsi.c
+++ b/drivers/scsi/qedi/qedi_iscsi.c
@@ -833,7 +833,7 @@ qedi_ep_connect(struct Scsi_Host *shost, struct sockaddr *dst_addr,
 		return ERR_PTR(ret);
 	}
 
-	if (do_not_recover) {
+	if (qedi_do_not_recover) {
 		ret = -ENOMEM;
 		return ERR_PTR(ret);
 	}
@@ -957,7 +957,7 @@ static int qedi_ep_poll(struct iscsi_endpoint *ep, int timeout_ms)
 	struct qedi_endpoint *qedi_ep;
 	int ret = 0;
 
-	if (do_not_recover)
+	if (qedi_do_not_recover)
 		return 1;
 
 	qedi_ep = ep->dd_data;
@@ -1025,7 +1025,7 @@ static void qedi_ep_disconnect(struct iscsi_endpoint *ep)
 		}
 
 		if (test_bit(QEDI_IN_RECOVERY, &qedi->flags)) {
-			if (do_not_recover) {
+			if (qedi_do_not_recover) {
 				QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_INFO,
 					  "Do not recover cid=0x%x\n",
 					  qedi_ep->iscsi_cid);
@@ -1039,7 +1039,7 @@ static void qedi_ep_disconnect(struct iscsi_endpoint *ep)
 		}
 	}
 
-	if (do_not_recover)
+	if (qedi_do_not_recover)
 		goto ep_exit_recover;
 
 	switch (qedi_ep->state) {

From 934767c56b0d9dbb95a40e9e6e4d9dcdc3a165ad Mon Sep 17 00:00:00 2001
From: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Date: Thu, 2 Mar 2017 09:21:33 -0800
Subject: [PATCH 0824/1911] scsi: aacraid: Fix typo in blink status

The return status of the adapter check on KERNEL_PANIC is supposed to be
the upper 16 bits of the OMR status register.

Fixes: c421530bf848604e (scsi: aacraid: Reorder Adpater status check)
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Reviewed-by: Dave Carroll <david.carroll@microsemi.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/aacraid/src.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/aacraid/src.c b/drivers/scsi/aacraid/src.c
index 2e5338dec621fb..7b0410e0f56948 100644
--- a/drivers/scsi/aacraid/src.c
+++ b/drivers/scsi/aacraid/src.c
@@ -468,7 +468,7 @@ static int aac_src_check_health(struct aac_dev *dev)
 	return -1;
 
 err_blink:
-	return (status > 16) & 0xFF;
+	return (status >> 16) & 0xFF;
 }
 
 static inline u32 aac_get_vector(struct aac_dev *dev)

From 23456565acf6d452e0368f7380aecd584c019c67 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Thu, 2 Mar 2017 17:14:47 -0800
Subject: [PATCH 0825/1911] scsi: qla2xxx: Fix ql_dump_buffer

Recent printk changes for KERN_CONT cause this logging to be defectively
emitted on multiple lines.  Fix it.

Also reduces object size a trivial amount.

$ size drivers/scsi/qla2xxx/qla_dbg.o*
   text	   data	    bss	    dec	    hex	filename
  39125	      0	      0	  39125	   98d5	drivers/scsi/qla2xxx/qla_dbg.o.new
  39164	      0	      0	  39164	   98fc	drivers/scsi/qla2xxx/qla_dbg.o.old

Signed-off-by: Joe Perches <joe@perches.com>
Acked-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qla2xxx/qla_dbg.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_dbg.c b/drivers/scsi/qla2xxx/qla_dbg.c
index 21d9fb7fc88796..51b4179469d185 100644
--- a/drivers/scsi/qla2xxx/qla_dbg.c
+++ b/drivers/scsi/qla2xxx/qla_dbg.c
@@ -2707,13 +2707,9 @@ ql_dump_buffer(uint32_t level, scsi_qla_host_t *vha, int32_t id,
 	    "%-+5d  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F\n", size);
 	ql_dbg(level, vha, id,
 	    "----- -----------------------------------------------\n");
-	for (cnt = 0; cnt < size; cnt++, buf++) {
-		if (cnt % 16 == 0)
-			ql_dbg(level, vha, id, "%04x:", cnt & ~0xFU);
-		printk(" %02x", *buf);
-		if (cnt % 16 == 15)
-			printk("\n");
+	for (cnt = 0; cnt < size; cnt += 16) {
+		ql_dbg(level, vha, id, "%04x: ", cnt);
+		print_hex_dump(KERN_CONT, "", DUMP_PREFIX_NONE, 16, 1,
+			       buf + cnt, min(16U, size - cnt), false);
 	}
-	if (cnt % 16 != 0)
-		printk("\n");
 }

From c527de41aea24c2cdb6638818008d810013b4d39 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 3 Mar 2017 07:57:16 -0700
Subject: [PATCH 0826/1911] scsi: vmw_pvscsi: handle the return value from
 pci_alloc_irq_vectors correctly
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It returns the number of vectors allocated when successful, so check for
a negative error only.

Fixes: 2e48e349 ("scsi: vmw_pvscsi: switch to pci_alloc_irq_vectors")
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reported-by: Loïc Yhuel <loic.yhuel@gmail.com>
Tested-by: Loïc Yhuel <loic.yhuel@gmail.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/vmw_pvscsi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/vmw_pvscsi.c b/drivers/scsi/vmw_pvscsi.c
index ef474a7487449b..c374e3b5c678d2 100644
--- a/drivers/scsi/vmw_pvscsi.c
+++ b/drivers/scsi/vmw_pvscsi.c
@@ -1487,7 +1487,7 @@ static int pvscsi_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		irq_flag &= ~PCI_IRQ_MSI;
 
 	error = pci_alloc_irq_vectors(adapter->dev, 1, 1, irq_flag);
-	if (error)
+	if (error < 0)
 		goto out_reset_adapter;
 
 	adapter->use_req_threshold = pvscsi_setup_req_threshold(adapter, true);

From db6b2060bcae1ce1f9bde73a423e710b625ae1ef Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Sat, 4 Mar 2017 00:07:04 -0800
Subject: [PATCH 0827/1911] scsi: qedf: Fix defective logging format and
 argument mismatches

Add __printf compiler verification of format and arguments.  Fix
fallout.

Signed-off-by: Joe Perches <joe@perches.com>
Acked-by: Chad Dupuis <chad.dupuis@cavium.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qedf/qedf_dbg.h | 13 ++++++++-----
 drivers/scsi/qedf/qedf_fip.c |  2 +-
 drivers/scsi/qedf/qedf_io.c  |  4 ++--
 3 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/drivers/scsi/qedf/qedf_dbg.h b/drivers/scsi/qedf/qedf_dbg.h
index 23bd70628a2f05..7d173f48a81e8d 100644
--- a/drivers/scsi/qedf/qedf_dbg.h
+++ b/drivers/scsi/qedf/qedf_dbg.h
@@ -81,14 +81,17 @@ struct qedf_dbg_ctx {
 #define QEDF_INFO(pdev, level, fmt, ...)	\
 		qedf_dbg_info(pdev, __func__, __LINE__, level, fmt,	\
 			      ## __VA_ARGS__)
-
-extern void qedf_dbg_err(struct qedf_dbg_ctx *qedf, const char *func, u32 line,
+__printf(4, 5)
+void qedf_dbg_err(struct qedf_dbg_ctx *qedf, const char *func, u32 line,
 			  const char *fmt, ...);
-extern void qedf_dbg_warn(struct qedf_dbg_ctx *qedf, const char *func, u32 line,
+__printf(4, 5)
+void qedf_dbg_warn(struct qedf_dbg_ctx *qedf, const char *func, u32 line,
 			   const char *, ...);
-extern void qedf_dbg_notice(struct qedf_dbg_ctx *qedf, const char *func,
+__printf(4, 5)
+void qedf_dbg_notice(struct qedf_dbg_ctx *qedf, const char *func,
 			    u32 line, const char *, ...);
-extern void qedf_dbg_info(struct qedf_dbg_ctx *qedf, const char *func, u32 line,
+__printf(5, 6)
+void qedf_dbg_info(struct qedf_dbg_ctx *qedf, const char *func, u32 line,
 			  u32 info, const char *fmt, ...);
 
 /* GRC Dump related defines */
diff --git a/drivers/scsi/qedf/qedf_fip.c b/drivers/scsi/qedf/qedf_fip.c
index 868d423380d120..ed58b9104f58b8 100644
--- a/drivers/scsi/qedf/qedf_fip.c
+++ b/drivers/scsi/qedf/qedf_fip.c
@@ -203,7 +203,7 @@ void qedf_fip_recv(struct qedf_ctx *qedf, struct sk_buff *skb)
 			case FIP_DT_MAC:
 				mp = (struct fip_mac_desc *)desc;
 				QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_LL2,
-				    "fd_mac=%pM.\n", __func__, mp->fd_mac);
+				    "fd_mac=%pM\n", mp->fd_mac);
 				ether_addr_copy(cvl_mac, mp->fd_mac);
 				break;
 			case FIP_DT_NAME:
diff --git a/drivers/scsi/qedf/qedf_io.c b/drivers/scsi/qedf/qedf_io.c
index ee0dcf9d3aba78..46debe5034af10 100644
--- a/drivers/scsi/qedf/qedf_io.c
+++ b/drivers/scsi/qedf/qedf_io.c
@@ -1342,7 +1342,7 @@ void qedf_scsi_completion(struct qedf_ctx *qedf, struct fcoe_cqe *cqe,
 		} else {
 			refcount = kref_read(&io_req->refcount);
 			QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO,
-			    "%d:0:%d:%d xid=0x%0x op=0x%02x "
+			    "%d:0:%d:%lld xid=0x%0x op=0x%02x "
 			    "lba=%02x%02x%02x%02x cdb_status=%d "
 			    "fcp_resid=0x%x refcount=%d.\n",
 			    qedf->lport->host->host_no, sc_cmd->device->id,
@@ -1426,7 +1426,7 @@ void qedf_scsi_done(struct qedf_ctx *qedf, struct qedf_ioreq *io_req,
 
 	sc_cmd->result = result << 16;
 	refcount = kref_read(&io_req->refcount);
-	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO, "%d:0:%d:%d: Completing "
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO, "%d:0:%d:%lld: Completing "
 	    "sc_cmd=%p result=0x%08x op=0x%02x lba=0x%02x%02x%02x%02x, "
 	    "allowed=%d retries=%d refcount=%d.\n",
 	    qedf->lport->host->host_no, sc_cmd->device->id,

From fd2b18b4a7bf01453324733a18297ae9a197a4ae Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 6 Mar 2017 10:32:27 -0800
Subject: [PATCH 0828/1911] scsi: qedf: Use vsprintf extension %pad

Using %llx for a dma_addr_t can lead to format/argument mismatches.  Use
%pad and the address of the dma_addr_t instead.

Signed-off-by: Joe Perches <joe@perches.com>
Acked-by: Chad Dupuis <chad.dupuis@cavium.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qedf/qedf_main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c
index d9d7a86b5f8baf..8e2a160490e66a 100644
--- a/drivers/scsi/qedf/qedf_main.c
+++ b/drivers/scsi/qedf/qedf_main.c
@@ -2456,8 +2456,8 @@ static int qedf_alloc_bdq(struct qedf_ctx *qedf)
 	}
 
 	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC,
-	    "BDQ PBL addr=0x%p dma=0x%llx.\n", qedf->bdq_pbl,
-	    qedf->bdq_pbl_dma);
+		  "BDQ PBL addr=0x%p dma=%pad\n",
+		  qedf->bdq_pbl, &qedf->bdq_pbl_dma);
 
 	/*
 	 * Populate BDQ PBL with physical and virtual address of individual

From 33cc559a81397bc813c392617d40ddfdfa3cffbd Mon Sep 17 00:00:00 2001
From: Tomas Jasek <tomsik68@gmail.com>
Date: Fri, 3 Mar 2017 13:45:48 +0100
Subject: [PATCH 0829/1911] scsi: lpfc: replace init_timer by setup_timer

This patch shortens every init_timer in lpfc module followed by function
and data assignment using setup_timer.  This is purely cleanup patch, it
does not add new functionality nor remove any existing functionality.

An init_timer call in this form:

    init_timer(&vport->fc_disctmo);
    vport->fc_disctmo.function = lpfc_disc_timeout;
    vport->fc_disctmo.data = vport;

is shortened to:

    setup_timer(&vport->fc_disctmo, lpfc_disc_timeout, vport);

It increases readability and reduces chances of mistakes done by
developers.

Signed-off-by: Tomas Jasek <tomsik68@gmail.com>
Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Cc: James Smart <james.smart@broadcom.com>
Cc: Dick Kennedy <dick.kennedy@broadcom.com>
Cc: "James E.J. Bottomley" <jejb@linux.vnet.ibm.com>
Cc: "Martin K. Petersen" <martin.petersen@oracle.com>
Cc: <linux-scsi@vger.kernel.org>
Acked-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_hbadisc.c |  5 ++--
 drivers/scsi/lpfc/lpfc_init.c    | 47 ++++++++++++--------------------
 2 files changed, 19 insertions(+), 33 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index 194a14d5f8a982..2612dac7518671 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -4344,9 +4344,8 @@ lpfc_initialize_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 {
 	INIT_LIST_HEAD(&ndlp->els_retry_evt.evt_listp);
 	INIT_LIST_HEAD(&ndlp->dev_loss_evt.evt_listp);
-	init_timer(&ndlp->nlp_delayfunc);
-	ndlp->nlp_delayfunc.function = lpfc_els_retry_delay;
-	ndlp->nlp_delayfunc.data = (unsigned long)ndlp;
+	setup_timer(&ndlp->nlp_delayfunc, lpfc_els_retry_delay,
+			(unsigned long)ndlp);
 	ndlp->nlp_DID = did;
 	ndlp->vport = vport;
 	ndlp->phba = vport->phba;
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 0ee429d773f394..f395f2e4aa9709 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -3734,17 +3734,14 @@ lpfc_create_port(struct lpfc_hba *phba, int instance, struct device *dev)
 	INIT_LIST_HEAD(&vport->rcv_buffer_list);
 	spin_lock_init(&vport->work_port_lock);
 
-	init_timer(&vport->fc_disctmo);
-	vport->fc_disctmo.function = lpfc_disc_timeout;
-	vport->fc_disctmo.data = (unsigned long)vport;
+	setup_timer(&vport->fc_disctmo, lpfc_disc_timeout,
+			(unsigned long)vport);
 
-	init_timer(&vport->els_tmofunc);
-	vport->els_tmofunc.function = lpfc_els_timeout;
-	vport->els_tmofunc.data = (unsigned long)vport;
+	setup_timer(&vport->els_tmofunc, lpfc_els_timeout,
+			(unsigned long)vport);
 
-	init_timer(&vport->delayed_disc_tmo);
-	vport->delayed_disc_tmo.function = lpfc_delayed_disc_tmo;
-	vport->delayed_disc_tmo.data = (unsigned long)vport;
+	setup_timer(&vport->delayed_disc_tmo, lpfc_delayed_disc_tmo,
+			(unsigned long)vport);
 
 	error = scsi_add_host_with_dma(shost, dev, &phba->pcidev->dev);
 	if (error)
@@ -5406,21 +5403,15 @@ lpfc_setup_driver_resource_phase1(struct lpfc_hba *phba)
 	INIT_LIST_HEAD(&phba->luns);
 
 	/* MBOX heartbeat timer */
-	init_timer(&psli->mbox_tmo);
-	psli->mbox_tmo.function = lpfc_mbox_timeout;
-	psli->mbox_tmo.data = (unsigned long) phba;
+	setup_timer(&psli->mbox_tmo, lpfc_mbox_timeout, (unsigned long)phba);
 	/* Fabric block timer */
-	init_timer(&phba->fabric_block_timer);
-	phba->fabric_block_timer.function = lpfc_fabric_block_timeout;
-	phba->fabric_block_timer.data = (unsigned long) phba;
+	setup_timer(&phba->fabric_block_timer, lpfc_fabric_block_timeout,
+			(unsigned long)phba);
 	/* EA polling mode timer */
-	init_timer(&phba->eratt_poll);
-	phba->eratt_poll.function = lpfc_poll_eratt;
-	phba->eratt_poll.data = (unsigned long) phba;
+	setup_timer(&phba->eratt_poll, lpfc_poll_eratt,
+			(unsigned long)phba);
 	/* Heartbeat timer */
-	init_timer(&phba->hb_tmofunc);
-	phba->hb_tmofunc.function = lpfc_hb_timeout;
-	phba->hb_tmofunc.data = (unsigned long)phba;
+	setup_timer(&phba->hb_tmofunc, lpfc_hb_timeout, (unsigned long)phba);
 
 	return 0;
 }
@@ -5446,9 +5437,8 @@ lpfc_sli_driver_resource_setup(struct lpfc_hba *phba)
 	 */
 
 	/* FCP polling mode timer */
-	init_timer(&phba->fcp_poll_timer);
-	phba->fcp_poll_timer.function = lpfc_poll_timeout;
-	phba->fcp_poll_timer.data = (unsigned long) phba;
+	setup_timer(&phba->fcp_poll_timer, lpfc_poll_timeout,
+			(unsigned long)phba);
 
 	/* Host attention work mask setup */
 	phba->work_ha_mask = (HA_ERATT | HA_MBATT | HA_LATT);
@@ -5617,14 +5607,11 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 	 * Initialize timers used by driver
 	 */
 
-	init_timer(&phba->rrq_tmr);
-	phba->rrq_tmr.function = lpfc_rrq_timeout;
-	phba->rrq_tmr.data = (unsigned long)phba;
+	setup_timer(&phba->rrq_tmr, lpfc_rrq_timeout, (unsigned long)phba);
 
 	/* FCF rediscover timer */
-	init_timer(&phba->fcf.redisc_wait);
-	phba->fcf.redisc_wait.function = lpfc_sli4_fcf_redisc_wait_tmo;
-	phba->fcf.redisc_wait.data = (unsigned long)phba;
+	setup_timer(&phba->fcf.redisc_wait, lpfc_sli4_fcf_redisc_wait_tmo,
+			(unsigned long)phba);
 
 	/*
 	 * Control structure for handling external multi-buffer mailbox

From 70e5afd57d6d97d69bf5fd0187c2bb5a79990504 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sat, 4 Mar 2017 09:30:21 -0800
Subject: [PATCH 0830/1911] scsi: lpfc: remove redundant assignment of sgel

From: Colin Ian King <colin.king@canonical.com>

In the NVMET_FCOP_RSP case, sgel is assigned but never used and
hence is redundant and can be removed.

Detected by CoverityScan, CID#1411658 ("Unused value")

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_nvmet.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c
index c421e1738ee989..e59a0a89d357b8 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.c
+++ b/drivers/scsi/lpfc/lpfc_nvmet.c
@@ -1445,7 +1445,6 @@ lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *phba,
 
 	case NVMET_FCOP_RSP:
 		/* Words 0 - 2 */
-		sgel = &rsp->sg[0];
 		physaddr = rsp->rspdma;
 		wqe->fcp_trsp.bde.tus.f.bdeFlags = BUFF_TYPE_BDE_64;
 		wqe->fcp_trsp.bde.tus.f.bdeSize = rsp->rsplen;

From 7aabe84b8a1bb7c3f75fb6a23dc96f8999bdcc8f Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sat, 4 Mar 2017 09:30:22 -0800
Subject: [PATCH 0831/1911] scsi: lpfc: sanity check hrq is null before
 dereferencing it

From: Colin Ian King <colin.king@canonical.com>

The sanity check for hrq should be moved to before the deference
of hrq to ensure we don't perform a null pointer deference.

Detected by CoverityScan, CID#1411650 ("Dereference before null check")

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_sli.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index d8d8693040acd0..562a5286bc479b 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -15185,17 +15185,17 @@ lpfc_mrq_create(struct lpfc_hba *phba, struct lpfc_queue **hrqp,
 		drq = drqp[idx];
 		cq  = cqp[idx];
 
-		if (hrq->entry_count != drq->entry_count) {
-			status = -EINVAL;
-			goto out;
-		}
-
 		/* sanity check on queue memory */
 		if (!hrq || !drq || !cq) {
 			status = -ENODEV;
 			goto out;
 		}
 
+		if (hrq->entry_count != drq->entry_count) {
+			status = -EINVAL;
+			goto out;
+		}
+
 		if (idx == 0) {
 			bf_set(lpfc_mbx_rq_create_num_pages,
 			       &rq_create->u.request,

From 332ba3b5d6d27a60d445704ed7c88c7e9f958a30 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sat, 4 Mar 2017 09:30:23 -0800
Subject: [PATCH 0832/1911] scsi: lpfc: don't dereference dma_buf->iocbq before
 null check

From: Colin Ian King <colin.king@canonical.com>

dma_buf->iocbq is being dereferenced immediately before it is
being null checked, so we have a potential null pointer dereference
bug.  Fix this by only dereferencing it only once we have passed
a null check on the pointer.

Detected by CoverityScan, CID#1411652 ("Dereference before null check")

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_mem.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/lpfc/lpfc_mem.c b/drivers/scsi/lpfc/lpfc_mem.c
index c61d8d692edeee..5986c7957199df 100644
--- a/drivers/scsi/lpfc/lpfc_mem.c
+++ b/drivers/scsi/lpfc/lpfc_mem.c
@@ -646,7 +646,6 @@ lpfc_sli4_nvmet_alloc(struct lpfc_hba *phba)
 	}
 
 	dma_buf->iocbq = lpfc_sli_get_iocbq(phba);
-	dma_buf->iocbq->iocb_flag = LPFC_IO_NVMET;
 	if (!dma_buf->iocbq) {
 		kfree(dma_buf->context);
 		pci_pool_free(phba->lpfc_drb_pool, dma_buf->dbuf.virt,
@@ -658,6 +657,7 @@ lpfc_sli4_nvmet_alloc(struct lpfc_hba *phba)
 				"2621 Ran out of nvmet iocb/WQEs\n");
 		return NULL;
 	}
+	dma_buf->iocbq->iocb_flag = LPFC_IO_NVMET;
 	nvmewqe = dma_buf->iocbq;
 	wqe = (union lpfc_wqe128 *)&nvmewqe->wqe;
 	/* Initialize WQE */

From d11f54b7d1d40463024be3b40ab7be2c8a84fa43 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sat, 4 Mar 2017 09:30:24 -0800
Subject: [PATCH 0833/1911] scsi: lpfc: fix missing spin_unlock on
 sql_list_lock

From: Colin Ian King <colin.king@canonical.com>

In the case where sglq is null, the current code just returns without
unlocking the spinlock sql_list_lock. Fix this by breaking out of the
while loop and the exit path will then unlock and return NULL as was
the original intention.

Detected by CoverityScan, CID#1411635 ("Missing unlock")

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_sli.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 562a5286bc479b..7389c130ffccca 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -952,7 +952,7 @@ __lpfc_sli_get_els_sglq(struct lpfc_hba *phba, struct lpfc_iocbq *piocbq)
 	start_sglq = sglq;
 	while (!found) {
 		if (!sglq)
-			return NULL;
+			break;
 		if (ndlp && ndlp->active_rrqs_xri_bitmap &&
 		    test_bit(sglq->sli4_lxritag,
 		    ndlp->active_rrqs_xri_bitmap)) {

From 5d181531bc6169e19a02a27d202cf0e982db9d0e Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sat, 4 Mar 2017 09:30:25 -0800
Subject: [PATCH 0834/1911] scsi: lpfc: Fix crash during Hardware error
 recovery on SLI3 adapters

if REG_VPI fails, the driver was incorrectly issuing INIT_VFI
(a SLI4 command) on a SLI3 adapter.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_els.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index 2d26440e6f2fe6..d260a1391baf86 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -8371,11 +8371,17 @@ lpfc_cmpl_reg_new_vport(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 			spin_lock_irq(shost->host_lock);
 			vport->fc_flag |= FC_VPORT_NEEDS_REG_VPI;
 			spin_unlock_irq(shost->host_lock);
-			if (vport->port_type == LPFC_PHYSICAL_PORT
-				&& !(vport->fc_flag & FC_LOGO_RCVD_DID_CHNG))
-				lpfc_issue_init_vfi(vport);
-			else
+			if (mb->mbxStatus == MBX_NOT_FINISHED)
+				break;
+			if ((vport->port_type == LPFC_PHYSICAL_PORT) &&
+			    !(vport->fc_flag & FC_LOGO_RCVD_DID_CHNG)) {
+				if (phba->sli_rev == LPFC_SLI_REV4)
+					lpfc_issue_init_vfi(vport);
+				else
+					lpfc_initial_flogi(vport);
+			} else {
 				lpfc_initial_fdisc(vport);
+			}
 			break;
 		}
 	} else {

From 8b3616392d32d2b04ce649caf6684da1b7050d8c Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sat, 4 Mar 2017 09:30:26 -0800
Subject: [PATCH 0835/1911] scsi: lpfc: Fix RCTL value on NVME LS request and
 response

NVME LS requests and responses had wrong R_CTL values.
Use the FC4 ELS Request and Response defines (defines badly
named, they are FC4 LS's) instead of the base ELS values.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_nvme.c  | 2 +-
 drivers/scsi/lpfc/lpfc_nvmet.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c
index 609a908ea9db5b..6346d4dee9ff57 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.c
+++ b/drivers/scsi/lpfc/lpfc_nvme.c
@@ -316,7 +316,7 @@ lpfc_nvme_gen_req(struct lpfc_vport *vport, struct lpfc_dmabuf *bmp,
 	bf_set(wqe_dfctl, &wqe->gen_req.wge_ctl, 0);
 	bf_set(wqe_si, &wqe->gen_req.wge_ctl, 1);
 	bf_set(wqe_la, &wqe->gen_req.wge_ctl, 1);
-	bf_set(wqe_rctl, &wqe->gen_req.wge_ctl, FC_RCTL_DD_UNSOL_CTL);
+	bf_set(wqe_rctl, &wqe->gen_req.wge_ctl, FC_RCTL_ELS4_REQ);
 	bf_set(wqe_type, &wqe->gen_req.wge_ctl, FC_TYPE_NVME);
 
 	/* Word 6 */
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c
index e59a0a89d357b8..6c2221aac394bd 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.c
+++ b/drivers/scsi/lpfc/lpfc_nvmet.c
@@ -1114,7 +1114,7 @@ lpfc_nvmet_prep_ls_wqe(struct lpfc_hba *phba,
 	bf_set(wqe_dfctl, &wqe->xmit_sequence.wge_ctl, 0);
 	bf_set(wqe_ls, &wqe->xmit_sequence.wge_ctl, 1);
 	bf_set(wqe_la, &wqe->xmit_sequence.wge_ctl, 0);
-	bf_set(wqe_rctl, &wqe->xmit_sequence.wge_ctl, FC_RCTL_DD_SOL_CTL);
+	bf_set(wqe_rctl, &wqe->xmit_sequence.wge_ctl, FC_RCTL_ELS4_REP);
 	bf_set(wqe_type, &wqe->xmit_sequence.wge_ctl, FC_TYPE_NVME);
 
 	/* Word 6 */

From b06a622ffe8dddcc09dad23dc768f7d1540fb4d6 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sat, 4 Mar 2017 09:30:27 -0800
Subject: [PATCH 0836/1911] scsi: lpfc: Fix NVME CMD IU byte swapped word 1
 problem

Word 1 in NVME CMD IU appears byte swapped from value placed in WQE
Should be Big Endian value in WQE word 16

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_nvme.c | 17 ++++-------------
 1 file changed, 4 insertions(+), 13 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c
index 6346d4dee9ff57..bf3ccc5d59ace7 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.c
+++ b/drivers/scsi/lpfc/lpfc_nvme.c
@@ -620,15 +620,15 @@ lpfc_nvme_adj_fcp_sgls(struct lpfc_vport *vport,
 	 * Embed the payload in the last half of the WQE
 	 * WQE words 16-30 get the NVME CMD IU payload
 	 *
-	 * WQE Word 16 is already setup with flags
-	 * WQE words 17-19 get payload Words 2-4
+	 * WQE words 16-19 get payload Words 1-4
 	 * WQE words 20-21 get payload Words 6-7
 	 * WQE words 22-29 get payload Words 16-23
 	 */
-	wptr = &wqe->words[17];  /* WQE ptr */
+	wptr = &wqe->words[16];  /* WQE ptr */
 	dptr = (uint32_t *)nCmd->cmdaddr;  /* payload ptr */
-	dptr += 2;		/* Skip Words 0-1 in payload */
+	dptr++;			/* Skip Word 0 in payload */
 
+	*wptr++ = *dptr++;	/* Word 1 */
 	*wptr++ = *dptr++;	/* Word 2 */
 	*wptr++ = *dptr++;	/* Word 3 */
 	*wptr++ = *dptr++;	/* Word 4 */
@@ -978,9 +978,6 @@ lpfc_nvme_prep_io_cmd(struct lpfc_vport *vport,
 			bf_set(wqe_cmd_type, &wqe->generic.wqe_com,
 			       NVME_WRITE_CMD);
 
-			/* Word 16 */
-			wqe->words[16] = LPFC_NVME_EMBED_WRITE;
-
 			phba->fc4NvmeOutputRequests++;
 		} else {
 			/* Word 7 */
@@ -1002,9 +999,6 @@ lpfc_nvme_prep_io_cmd(struct lpfc_vport *vport,
 			bf_set(wqe_cmd_type, &wqe->generic.wqe_com,
 			       NVME_READ_CMD);
 
-			/* Word 16 */
-			wqe->words[16] = LPFC_NVME_EMBED_READ;
-
 			phba->fc4NvmeInputRequests++;
 		}
 	} else {
@@ -1026,9 +1020,6 @@ lpfc_nvme_prep_io_cmd(struct lpfc_vport *vport,
 		/* Word 11 */
 		bf_set(wqe_cmd_type, &wqe->generic.wqe_com, NVME_READ_CMD);
 
-		/* Word 16 */
-		wqe->words[16] = LPFC_NVME_EMBED_CMD;
-
 		phba->fc4NvmeControlRequests++;
 	}
 	/*

From a5068b46958870633ea340692f301507ef78d00b Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sat, 4 Mar 2017 09:30:28 -0800
Subject: [PATCH 0837/1911] scsi: lpfc: Fix IO submission if WQ is full

For both initiator and target: if WQ is full, return -EBUSY.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_nvme.c  | 2 +-
 drivers/scsi/lpfc/lpfc_nvmet.c | 5 ++++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c
index bf3ccc5d59ace7..e1bf31eca845ae 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.c
+++ b/drivers/scsi/lpfc/lpfc_nvme.c
@@ -1310,7 +1310,7 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport,
 				 "sid: x%x did: x%x oxid: x%x\n",
 				 ret, vport->fc_myDID, ndlp->nlp_DID,
 				 lpfc_ncmd->cur_iocbq.sli4_xritag);
-		ret = -EINVAL;
+		ret = -EBUSY;
 		goto out_free_nvme_buf;
 	}
 
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c
index 6c2221aac394bd..735e2ba70198a3 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.c
+++ b/drivers/scsi/lpfc/lpfc_nvmet.c
@@ -571,6 +571,7 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport,
 		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
 				"6102 Bad state IO x%x aborted\n",
 				ctxp->oxid);
+		rc = -ENXIO;
 		goto aerr;
 	}
 
@@ -580,6 +581,7 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport,
 		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
 				"6152 FCP Drop IO x%x: Prep\n",
 				ctxp->oxid);
+		rc = -ENXIO;
 		goto aerr;
 	}
 
@@ -618,8 +620,9 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport,
 	ctxp->wqeq->hba_wqidx = 0;
 	nvmewqeq->context2 = NULL;
 	nvmewqeq->context3 = NULL;
+	rc = -EBUSY;
 aerr:
-	return -ENXIO;
+	return rc;
 }
 
 static void

From 3ebd9b4701ef77358030a0ef9cb6586db2149712 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sat, 4 Mar 2017 09:30:29 -0800
Subject: [PATCH 0838/1911] scsi: lpfc: Fix nvme allocation bug on failed
 nvme_fc_register_localport

nvme bufs get allocated even when the registration fails.
Move allocation into the rsgistration success path.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_nvme.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c
index e1bf31eca845ae..26cde7c040bafa 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.c
+++ b/drivers/scsi/lpfc/lpfc_nvme.c
@@ -2164,10 +2164,10 @@ lpfc_nvme_create_localport(struct lpfc_vport *vport)
 		lport->vport = vport;
 		INIT_LIST_HEAD(&lport->rport_list);
 		vport->nvmei_support = 1;
+		len  = lpfc_new_nvme_buf(vport, phba->sli4_hba.nvme_xri_max);
+		vport->phba->total_nvme_bufs += len;
 	}
 
-	len  = lpfc_new_nvme_buf(vport, phba->sli4_hba.nvme_xri_max);
-	vport->phba->total_nvme_bufs += len;
 	return ret;
 }
 

From 318083ad9230ff13cdac34ae4c4135e0c4e2d9ad Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sat, 4 Mar 2017 09:30:30 -0800
Subject: [PATCH 0839/1911] scsi: lpfc: add NVME exchange aborts

previous code did little more than log a message.

This patch adds abort path support, modeled after the SCSI code paths.
Currently addresses only the initiator path. Target path under
development, but stubbed out.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc.h         |  1 +
 drivers/scsi/lpfc/lpfc_hbadisc.c |  2 +
 drivers/scsi/lpfc/lpfc_init.c    |  7 ++++
 drivers/scsi/lpfc/lpfc_nvme.c    | 64 +++++++++++++++++++++++++++++---
 drivers/scsi/lpfc/lpfc_nvme.h    |  1 +
 drivers/scsi/lpfc/lpfc_nvmet.c   | 21 +++++++++--
 drivers/scsi/lpfc/lpfc_sli.c     | 51 ++++++++++++++++++++++++-
 drivers/scsi/lpfc/lpfc_sli4.h    |  6 +++
 8 files changed, 143 insertions(+), 10 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index 0bba2e30b4f09f..de6cd57dc7f6f5 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -692,6 +692,7 @@ struct lpfc_hba {
 					 * capability
 					 */
 #define HBA_NVME_IOQ_FLUSH      0x80000 /* NVME IO queues flushed. */
+#define NVME_XRI_ABORT_EVENT	0x100000
 
 	uint32_t fcp_ring_in_use; /* When polling test if intr-hndlr active*/
 	struct lpfc_dmabuf slim2p;
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index 2612dac7518671..bd8635d303d241 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -641,6 +641,8 @@ lpfc_work_done(struct lpfc_hba *phba)
 			lpfc_handle_rrq_active(phba);
 		if (phba->hba_flag & FCP_XRI_ABORT_EVENT)
 			lpfc_sli4_fcp_xri_abort_event_proc(phba);
+		if (phba->hba_flag & NVME_XRI_ABORT_EVENT)
+			lpfc_sli4_nvme_xri_abort_event_proc(phba);
 		if (phba->hba_flag & ELS_XRI_ABORT_EVENT)
 			lpfc_sli4_els_xri_abort_event_proc(phba);
 		if (phba->hba_flag & ASYNC_EVENT)
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index f395f2e4aa9709..a0cba631869e6e 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -5723,6 +5723,8 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 		/* Initialize the Abort nvme buffer list used by driver */
 		spin_lock_init(&phba->sli4_hba.abts_nvme_buf_list_lock);
 		INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_nvme_buf_list);
+		/* Fast-path XRI aborted CQ Event work queue list */
+		INIT_LIST_HEAD(&phba->sli4_hba.sp_nvme_xri_aborted_work_queue);
 	}
 
 	/* This abort list used by worker thread */
@@ -8960,6 +8962,11 @@ lpfc_sli4_cq_event_release_all(struct lpfc_hba *phba)
 	/* Pending ELS XRI abort events */
 	list_splice_init(&phba->sli4_hba.sp_els_xri_aborted_work_queue,
 			 &cqelist);
+	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
+		/* Pending NVME XRI abort events */
+		list_splice_init(&phba->sli4_hba.sp_nvme_xri_aborted_work_queue,
+				 &cqelist);
+	}
 	/* Pending asynnc events */
 	list_splice_init(&phba->sli4_hba.sp_asynce_work_queue,
 			 &cqelist);
diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c
index 26cde7c040bafa..b9012fee16320e 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.c
+++ b/drivers/scsi/lpfc/lpfc_nvme.c
@@ -1277,6 +1277,7 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport,
 	pnvme_fcreq->private = (void *)lpfc_ncmd;
 	lpfc_ncmd->nvmeCmd = pnvme_fcreq;
 	lpfc_ncmd->nrport = rport;
+	lpfc_ncmd->ndlp = ndlp;
 	lpfc_ncmd->start_time = jiffies;
 
 	lpfc_nvme_prep_io_cmd(vport, lpfc_ncmd, ndlp);
@@ -1812,10 +1813,10 @@ lpfc_post_nvme_sgl_list(struct lpfc_hba *phba,
 						pdma_phys_sgl1, cur_xritag);
 				if (status) {
 					/* failure, put on abort nvme list */
-					lpfc_ncmd->exch_busy = 1;
+					lpfc_ncmd->flags |= LPFC_SBUF_XBUSY;
 				} else {
 					/* success, put on NVME buffer list */
-					lpfc_ncmd->exch_busy = 0;
+					lpfc_ncmd->flags &= ~LPFC_SBUF_XBUSY;
 					lpfc_ncmd->status = IOSTAT_SUCCESS;
 					num_posted++;
 				}
@@ -1845,10 +1846,10 @@ lpfc_post_nvme_sgl_list(struct lpfc_hba *phba,
 					 struct lpfc_nvme_buf, list);
 			if (status) {
 				/* failure, put on abort nvme list */
-				lpfc_ncmd->exch_busy = 1;
+				lpfc_ncmd->flags |= LPFC_SBUF_XBUSY;
 			} else {
 				/* success, put on NVME buffer list */
-				lpfc_ncmd->exch_busy = 0;
+				lpfc_ncmd->flags &= ~LPFC_SBUF_XBUSY;
 				lpfc_ncmd->status = IOSTAT_SUCCESS;
 				num_posted++;
 			}
@@ -2090,7 +2091,7 @@ lpfc_release_nvme_buf(struct lpfc_hba *phba, struct lpfc_nvme_buf *lpfc_ncmd)
 	unsigned long iflag = 0;
 
 	lpfc_ncmd->nonsg_phys = 0;
-	if (lpfc_ncmd->exch_busy) {
+	if (lpfc_ncmd->flags & LPFC_SBUF_XBUSY) {
 		spin_lock_irqsave(&phba->sli4_hba.abts_nvme_buf_list_lock,
 					iflag);
 		lpfc_ncmd->nvmeCmd = NULL;
@@ -2453,3 +2454,56 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 			 "6168: State error: lport %p, rport%p FCID x%06x\n",
 			 vport->localport, ndlp->rport, ndlp->nlp_DID);
 }
+
+/**
+ * lpfc_sli4_nvme_xri_aborted - Fast-path process of NVME xri abort
+ * @phba: pointer to lpfc hba data structure.
+ * @axri: pointer to the fcp xri abort wcqe structure.
+ *
+ * This routine is invoked by the worker thread to process a SLI4 fast-path
+ * FCP aborted xri.
+ **/
+void
+lpfc_sli4_nvme_xri_aborted(struct lpfc_hba *phba,
+			   struct sli4_wcqe_xri_aborted *axri)
+{
+	uint16_t xri = bf_get(lpfc_wcqe_xa_xri, axri);
+	uint16_t rxid = bf_get(lpfc_wcqe_xa_remote_xid, axri);
+	struct lpfc_nvme_buf *lpfc_ncmd, *next_lpfc_ncmd;
+	struct lpfc_nodelist *ndlp;
+	unsigned long iflag = 0;
+	int rrq_empty = 0;
+
+	if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME))
+		return;
+	spin_lock_irqsave(&phba->hbalock, iflag);
+	spin_lock(&phba->sli4_hba.abts_nvme_buf_list_lock);
+	list_for_each_entry_safe(lpfc_ncmd, next_lpfc_ncmd,
+				 &phba->sli4_hba.lpfc_abts_nvme_buf_list,
+				 list) {
+		if (lpfc_ncmd->cur_iocbq.sli4_xritag == xri) {
+			list_del(&lpfc_ncmd->list);
+			lpfc_ncmd->flags &= ~LPFC_SBUF_XBUSY;
+			lpfc_ncmd->status = IOSTAT_SUCCESS;
+			spin_unlock(
+				&phba->sli4_hba.abts_nvme_buf_list_lock);
+
+			rrq_empty = list_empty(&phba->active_rrq_list);
+			spin_unlock_irqrestore(&phba->hbalock, iflag);
+			ndlp = lpfc_ncmd->ndlp;
+			if (ndlp) {
+				lpfc_set_rrq_active(
+					phba, ndlp,
+					lpfc_ncmd->cur_iocbq.sli4_lxritag,
+					rxid, 1);
+				lpfc_sli4_abts_err_handler(phba, ndlp, axri);
+			}
+			lpfc_release_nvme_buf(phba, lpfc_ncmd);
+			if (rrq_empty)
+				lpfc_worker_wake_up(phba);
+			return;
+		}
+	}
+	spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock);
+	spin_unlock_irqrestore(&phba->hbalock, iflag);
+}
diff --git a/drivers/scsi/lpfc/lpfc_nvme.h b/drivers/scsi/lpfc/lpfc_nvme.h
index b2fae5e813f8a8..1347deb8dd6cbd 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.h
+++ b/drivers/scsi/lpfc/lpfc_nvme.h
@@ -57,6 +57,7 @@ struct lpfc_nvme_buf {
 	struct list_head list;
 	struct nvmefc_fcp_req *nvmeCmd;
 	struct lpfc_nvme_rport *nrport;
+	struct lpfc_nodelist *ndlp;
 
 	uint32_t timeout;
 
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c
index 735e2ba70198a3..48ce9858bc115d 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.c
+++ b/drivers/scsi/lpfc/lpfc_nvmet.c
@@ -734,6 +734,21 @@ lpfc_nvmet_update_targetport(struct lpfc_hba *phba)
 	return 0;
 }
 
+/**
+ * lpfc_sli4_nvmet_xri_aborted - Fast-path process of nvmet xri abort
+ * @phba: pointer to lpfc hba data structure.
+ * @axri: pointer to the nvmet xri abort wcqe structure.
+ *
+ * This routine is invoked by the worker thread to process a SLI4 fast-path
+ * NVMET aborted xri.
+ **/
+void
+lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba,
+			    struct sli4_wcqe_xri_aborted *axri)
+{
+	/* TODO: work in progress */
+}
+
 void
 lpfc_nvmet_destroy_targetport(struct lpfc_hba *phba)
 {
@@ -958,7 +973,7 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
 
 	atomic_inc(&tgtp->rcv_fcp_cmd_drop);
 	lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
-			"6159 FCP Drop IO x%x: nvmet_fc_rcv_fcp_req x%x\n",
+			"6159 FCP Drop IO x%x: err x%x\n",
 			ctxp->oxid, rc);
 dropit:
 	lpfc_nvmeio_data(phba, "NVMET FCP DROP: xri x%x sz %d from %06x\n",
@@ -1683,8 +1698,8 @@ lpfc_nvmet_unsol_issue_abort(struct lpfc_hba *phba,
 	struct lpfc_nodelist *ndlp;
 
 	lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
-			"6067 %s: Entrypoint: sid %x xri %x\n", __func__,
-			sid, xri);
+			"6067 Abort: sid %x xri x%x/x%x\n",
+			sid, xri, ctxp->wqeq->sli4_xritag);
 
 	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
 
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 7389c130ffccca..8a606d0d2c795d 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -12212,6 +12212,41 @@ void lpfc_sli4_fcp_xri_abort_event_proc(struct lpfc_hba *phba)
 	}
 }
 
+/**
+ * lpfc_sli4_nvme_xri_abort_event_proc - Process nvme xri abort event
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked by the worker thread to process all the pending
+ * SLI4 NVME abort XRI events.
+ **/
+void lpfc_sli4_nvme_xri_abort_event_proc(struct lpfc_hba *phba)
+{
+	struct lpfc_cq_event *cq_event;
+
+	/* First, declare the fcp xri abort event has been handled */
+	spin_lock_irq(&phba->hbalock);
+	phba->hba_flag &= ~NVME_XRI_ABORT_EVENT;
+	spin_unlock_irq(&phba->hbalock);
+	/* Now, handle all the fcp xri abort events */
+	while (!list_empty(&phba->sli4_hba.sp_nvme_xri_aborted_work_queue)) {
+		/* Get the first event from the head of the event queue */
+		spin_lock_irq(&phba->hbalock);
+		list_remove_head(&phba->sli4_hba.sp_nvme_xri_aborted_work_queue,
+				 cq_event, struct lpfc_cq_event, list);
+		spin_unlock_irq(&phba->hbalock);
+		/* Notify aborted XRI for NVME work queue */
+		if (phba->nvmet_support) {
+			lpfc_sli4_nvmet_xri_aborted(phba,
+						    &cq_event->cqe.wcqe_axri);
+		} else {
+			lpfc_sli4_nvme_xri_aborted(phba,
+						   &cq_event->cqe.wcqe_axri);
+		}
+		/* Free the event processed back to the free pool */
+		lpfc_sli4_cq_event_release(phba, cq_event);
+	}
+}
+
 /**
  * lpfc_sli4_els_xri_abort_event_proc - Process els xri abort event
  * @phba: pointer to lpfc hba data structure.
@@ -12709,10 +12744,22 @@ lpfc_sli4_sp_handle_abort_xri_wcqe(struct lpfc_hba *phba,
 		spin_unlock_irqrestore(&phba->hbalock, iflags);
 		workposted = true;
 		break;
+	case LPFC_NVME:
+		spin_lock_irqsave(&phba->hbalock, iflags);
+		list_add_tail(&cq_event->list,
+			      &phba->sli4_hba.sp_nvme_xri_aborted_work_queue);
+		/* Set the nvme xri abort event flag */
+		phba->hba_flag |= NVME_XRI_ABORT_EVENT;
+		spin_unlock_irqrestore(&phba->hbalock, iflags);
+		workposted = true;
+		break;
 	default:
 		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
-				"0603 Invalid work queue CQE subtype (x%x)\n",
-				cq->subtype);
+				"0603 Invalid CQ subtype %d: "
+				"%08x %08x %08x %08x\n",
+				cq->subtype, wcqe->word0, wcqe->parameter,
+				wcqe->word2, wcqe->word3);
+		lpfc_sli4_cq_event_release(phba, cq_event);
 		workposted = false;
 		break;
 	}
diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h
index 91153c9f6d1825..710458cf11d62f 100644
--- a/drivers/scsi/lpfc/lpfc_sli4.h
+++ b/drivers/scsi/lpfc/lpfc_sli4.h
@@ -642,6 +642,7 @@ struct lpfc_sli4_hba {
 	struct list_head sp_asynce_work_queue;
 	struct list_head sp_fcp_xri_aborted_work_queue;
 	struct list_head sp_els_xri_aborted_work_queue;
+	struct list_head sp_nvme_xri_aborted_work_queue;
 	struct list_head sp_unsol_work_queue;
 	struct lpfc_sli4_link link_state;
 	struct lpfc_sli4_lnk_info lnk_info;
@@ -794,9 +795,14 @@ void lpfc_sli4_fcf_redisc_event_proc(struct lpfc_hba *);
 int lpfc_sli4_resume_rpi(struct lpfc_nodelist *,
 			void (*)(struct lpfc_hba *, LPFC_MBOXQ_t *), void *);
 void lpfc_sli4_fcp_xri_abort_event_proc(struct lpfc_hba *);
+void lpfc_sli4_nvme_xri_abort_event_proc(struct lpfc_hba *phba);
 void lpfc_sli4_els_xri_abort_event_proc(struct lpfc_hba *);
 void lpfc_sli4_fcp_xri_aborted(struct lpfc_hba *,
 			       struct sli4_wcqe_xri_aborted *);
+void lpfc_sli4_nvme_xri_aborted(struct lpfc_hba *phba,
+				struct sli4_wcqe_xri_aborted *axri);
+void lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba,
+				 struct sli4_wcqe_xri_aborted *axri);
 void lpfc_sli4_els_xri_aborted(struct lpfc_hba *,
 			       struct sli4_wcqe_xri_aborted *);
 void lpfc_sli4_vport_delete_els_xri_aborted(struct lpfc_vport *);

From 96418b5e2c8867da3279d877f5d1ffabfe460c3d Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sat, 4 Mar 2017 09:30:31 -0800
Subject: [PATCH 0840/1911] scsi: lpfc: Fix eh_deadline setting for sli3
 adapters.

A previous change unilaterally removed the hba reset entry point
from the sli3 host template. This was done to allow tape devices
being used for back up from being removed. Why was this done ?
When there was non-responding device on the fabric, the error
escalation policy would escalate to the reset handler. When the
reset handler was called, it would reset the adapter, dropping
link, thus logging out and terminating all i/o's - on any target.
If there was a tape device on the same adapter that wasn't in
error, it would kill the tape i/o's, effectively killing the
tape device state.  With the reset point removed, the adapter
reset avoided the fabric logout, allowing the other devices to
continue to operate unaffected. A hack - yes. Hint: we really
need a transport I_T nexus reset callback added to the eh process
(in between the SCSI target reset and hba reset points), so a
fc logout could occur to the one bad target only and stop the error
escalation process.

This patch commonizes the approach so it can be used for sli3 and sli4
adapters, but mandates the admin, via module parameter, specifically
identify which adapters the resets are to be removed for. Additionally,
bus_reset, which sends Target Reset TMFs to all targets, is also removed
from the template as it too has the same effect as the adapter reset.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Laurence Oberman <loberman@redhat.com>
Tested-by:   Laurence Oberman <loberman@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc.h      |  1 +
 drivers/scsi/lpfc/lpfc_attr.c |  6 ++++
 drivers/scsi/lpfc/lpfc_crtn.h |  4 ++-
 drivers/scsi/lpfc/lpfc_init.c | 61 +++++++++++++++++++++++++++++++++--
 drivers/scsi/lpfc/lpfc_scsi.c |  3 +-
 5 files changed, 69 insertions(+), 6 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index de6cd57dc7f6f5..763f32dd2d230e 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -99,6 +99,7 @@ struct lpfc_sli2_slim;
 #define FC_MAX_ADPTMSG		64
 
 #define MAX_HBAEVT	32
+#define MAX_HBAS_NO_RESET 16
 
 /* Number of MSI-X vectors the driver uses */
 #define LPFC_MSIX_VECTORS	2
diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index 4114cf4308d0fe..b741dcb8ee64ab 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -3010,6 +3010,12 @@ MODULE_PARM_DESC(lpfc_poll, "FCP ring polling mode control:"
 static DEVICE_ATTR(lpfc_poll, S_IRUGO | S_IWUSR,
 		   lpfc_poll_show, lpfc_poll_store);
 
+int lpfc_no_hba_reset_cnt;
+unsigned long lpfc_no_hba_reset[MAX_HBAS_NO_RESET] = {
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+module_param_array(lpfc_no_hba_reset, ulong, &lpfc_no_hba_reset_cnt, 0444);
+MODULE_PARM_DESC(lpfc_no_hba_reset, "WWPN of HBAs that should not be reset");
+
 LPFC_ATTR(sli_mode, 0, 0, 3,
 	"SLI mode selector:"
 	" 0 - auto (SLI-3 if supported),"
diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h
index 843dd73004da02..54e6ac42fbcd42 100644
--- a/drivers/scsi/lpfc/lpfc_crtn.h
+++ b/drivers/scsi/lpfc/lpfc_crtn.h
@@ -384,7 +384,7 @@ void lpfc_free_sysfs_attr(struct lpfc_vport *);
 extern struct device_attribute *lpfc_hba_attrs[];
 extern struct device_attribute *lpfc_vport_attrs[];
 extern struct scsi_host_template lpfc_template;
-extern struct scsi_host_template lpfc_template_s3;
+extern struct scsi_host_template lpfc_template_no_hr;
 extern struct scsi_host_template lpfc_template_nvme;
 extern struct scsi_host_template lpfc_vport_template;
 extern struct fc_function_template lpfc_transport_functions;
@@ -554,3 +554,5 @@ void lpfc_nvme_abort_fcreq_cmpl(struct lpfc_hba *phba,
 				struct lpfc_wcqe_complete *abts_cmpl);
 extern int lpfc_enable_nvmet_cnt;
 extern unsigned long long lpfc_enable_nvmet[];
+extern int lpfc_no_hba_reset_cnt;
+extern unsigned long lpfc_no_hba_reset[];
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index a0cba631869e6e..4fa21a9fd883c6 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -3555,6 +3555,44 @@ lpfc_sli4_scsi_sgl_update(struct lpfc_hba *phba)
 	return rc;
 }
 
+static uint64_t
+lpfc_get_wwpn(struct lpfc_hba *phba)
+{
+	uint64_t wwn;
+	int rc;
+	LPFC_MBOXQ_t *mboxq;
+	MAILBOX_t *mb;
+
+
+	mboxq = (LPFC_MBOXQ_t *) mempool_alloc(phba->mbox_mem_pool,
+						GFP_KERNEL);
+	if (!mboxq)
+		return (uint64_t)-1;
+
+	/* First get WWN of HBA instance */
+	lpfc_read_nv(phba, mboxq);
+	rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL);
+	if (rc != MBX_SUCCESS) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"6019 Mailbox failed , mbxCmd x%x "
+				"READ_NV, mbxStatus x%x\n",
+				bf_get(lpfc_mqe_command, &mboxq->u.mqe),
+				bf_get(lpfc_mqe_status, &mboxq->u.mqe));
+		mempool_free(mboxq, phba->mbox_mem_pool);
+		return (uint64_t) -1;
+	}
+	mb = &mboxq->u.mb;
+	memcpy(&wwn, (char *)mb->un.varRDnvp.portname, sizeof(uint64_t));
+	/* wwn is WWPN of HBA instance */
+	mempool_free(mboxq, phba->mbox_mem_pool);
+	if (phba->sli_rev == LPFC_SLI_REV4)
+		return be64_to_cpu(wwn);
+	else
+		return (((wwn & 0xffffffff00000000) >> 32) |
+			((wwn & 0x00000000ffffffff) << 32));
+
+}
+
 /**
  * lpfc_sli4_nvme_sgl_update - update xri-sgl sizing and mapping
  * @phba: pointer to lpfc hba data structure.
@@ -3676,17 +3714,32 @@ lpfc_create_port(struct lpfc_hba *phba, int instance, struct device *dev)
 	struct lpfc_vport *vport;
 	struct Scsi_Host  *shost = NULL;
 	int error = 0;
+	int i;
+	uint64_t wwn;
+	bool use_no_reset_hba = false;
+
+	wwn = lpfc_get_wwpn(phba);
+
+	for (i = 0; i < lpfc_no_hba_reset_cnt; i++) {
+		if (wwn == lpfc_no_hba_reset[i]) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+					"6020 Setting use_no_reset port=%llx\n",
+					wwn);
+			use_no_reset_hba = true;
+			break;
+		}
+	}
 
 	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP) {
 		if (dev != &phba->pcidev->dev) {
 			shost = scsi_host_alloc(&lpfc_vport_template,
 						sizeof(struct lpfc_vport));
 		} else {
-			if (phba->sli_rev == LPFC_SLI_REV4)
+			if (!use_no_reset_hba)
 				shost = scsi_host_alloc(&lpfc_template,
 						sizeof(struct lpfc_vport));
 			else
-				shost = scsi_host_alloc(&lpfc_template_s3,
+				shost = scsi_host_alloc(&lpfc_template_no_hr,
 						sizeof(struct lpfc_vport));
 		}
 	} else if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
@@ -5472,7 +5525,8 @@ lpfc_sli_driver_resource_setup(struct lpfc_hba *phba)
 
 	/* Initialize the host templates the configured values. */
 	lpfc_vport_template.sg_tablesize = phba->cfg_sg_seg_cnt;
-	lpfc_template_s3.sg_tablesize = phba->cfg_sg_seg_cnt;
+	lpfc_template_no_hr.sg_tablesize = phba->cfg_sg_seg_cnt;
+	lpfc_template.sg_tablesize = phba->cfg_sg_seg_cnt;
 
 	/* There are going to be 2 reserved BDEs: 1 FCP cmnd + 1 FCP rsp */
 	if (phba->cfg_enable_bg) {
@@ -5693,6 +5747,7 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 	/* Initialize the host templates with the updated values. */
 	lpfc_vport_template.sg_tablesize = phba->cfg_sg_seg_cnt;
 	lpfc_template.sg_tablesize = phba->cfg_sg_seg_cnt;
+	lpfc_template_no_hr.sg_tablesize = phba->cfg_sg_seg_cnt;
 
 	if (phba->cfg_sg_dma_buf_size  <= LPFC_MIN_SG_SLI4_BUF_SZ)
 		phba->cfg_sg_dma_buf_size = LPFC_MIN_SG_SLI4_BUF_SZ;
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index 9d6384af9fce7e..bcfd6d6ab16d0c 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -5953,7 +5953,7 @@ struct scsi_host_template lpfc_template_nvme = {
 	.track_queue_depth	= 0,
 };
 
-struct scsi_host_template lpfc_template_s3 = {
+struct scsi_host_template lpfc_template_no_hr = {
 	.module			= THIS_MODULE,
 	.name			= LPFC_DRIVER_NAME,
 	.proc_name		= LPFC_DRIVER_NAME,
@@ -6015,7 +6015,6 @@ struct scsi_host_template lpfc_vport_template = {
 	.eh_abort_handler	= lpfc_abort_handler,
 	.eh_device_reset_handler = lpfc_device_reset_handler,
 	.eh_target_reset_handler = lpfc_target_reset_handler,
-	.eh_bus_reset_handler	= lpfc_bus_reset_handler,
 	.slave_alloc		= lpfc_slave_alloc,
 	.slave_configure	= lpfc_slave_configure,
 	.slave_destroy		= lpfc_slave_destroy,

From 856984b71cefab1580e9439e5382db1247d689b0 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sat, 4 Mar 2017 09:30:32 -0800
Subject: [PATCH 0841/1911] scsi: lpfc: add transport eh_timed_out reference

Christoph's prior patch missed the template for the sli3 adapters,
which is now the "no host reset" template. Add the transport
eh_timed_out handler to the no host reset template

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_scsi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index bcfd6d6ab16d0c..54fd0c81ceaf69 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -5959,6 +5959,7 @@ struct scsi_host_template lpfc_template_no_hr = {
 	.proc_name		= LPFC_DRIVER_NAME,
 	.info			= lpfc_info,
 	.queuecommand		= lpfc_queuecommand,
+	.eh_timed_out		= fc_eh_timed_out,
 	.eh_abort_handler	= lpfc_abort_handler,
 	.eh_device_reset_handler = lpfc_device_reset_handler,
 	.eh_target_reset_handler = lpfc_target_reset_handler,

From 166d721120c1cf768af11706c3e0411324bf138f Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sat, 4 Mar 2017 09:30:33 -0800
Subject: [PATCH 0842/1911] scsi: lpfc: Rework lpfc Kconfig for NVME options

Reworked Kconfig so that lfpc only requires the scsi stack.
NVME Initiator and NVME Target support can be enabled if
the other NVMe subsystems have been enabled.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/Kconfig           | 19 ++++++++++++++++---
 drivers/scsi/lpfc/lpfc_nvme.c  | 18 +++++++++++++++---
 drivers/scsi/lpfc/lpfc_nvmet.c | 10 ++++++++++
 3 files changed, 41 insertions(+), 6 deletions(-)

diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 230043c1c90ffc..4bf55b5d78be53 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -1241,19 +1241,32 @@ config SCSI_LPFC
 	tristate "Emulex LightPulse Fibre Channel Support"
 	depends on PCI && SCSI
 	depends on SCSI_FC_ATTRS
-	depends on NVME_FC && NVME_TARGET_FC
 	select CRC_T10DIF
-	help
+	---help---
           This lpfc driver supports the Emulex LightPulse
           Family of Fibre Channel PCI host adapters.
 
 config SCSI_LPFC_DEBUG_FS
 	bool "Emulex LightPulse Fibre Channel debugfs Support"
 	depends on SCSI_LPFC && DEBUG_FS
-	help
+	---help---
 	  This makes debugging information from the lpfc driver
 	  available via the debugfs filesystem.
 
+config LPFC_NVME_INITIATOR
+	bool "Emulex LightPulse Fibre Channel NVME Initiator Support"
+	depends on SCSI_LPFC && NVME_FC
+	---help---
+	  This enables NVME Initiator support in the Emulex lpfc driver.
+
+config LPFC_NVME_TARGET
+	bool "Emulex LightPulse Fibre Channel NVME Initiator Support"
+	depends on SCSI_LPFC && NVME_TARGET_FC
+	---help---
+	  This enables NVME Target support in the Emulex lpfc driver.
+	  Target enablement must still be enabled on a per adapter
+	  basis by module parameters.
+
 config SCSI_SIM710
 	tristate "Simple 53c710 SCSI support (Compaq, NCR machines)"
 	depends on (EISA || MCA) && SCSI
diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c
index b9012fee16320e..0a4c1908140940 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.c
+++ b/drivers/scsi/lpfc/lpfc_nvme.c
@@ -2127,11 +2127,12 @@ lpfc_release_nvme_buf(struct lpfc_hba *phba, struct lpfc_nvme_buf *lpfc_ncmd)
 int
 lpfc_nvme_create_localport(struct lpfc_vport *vport)
 {
+	int ret = 0;
 	struct lpfc_hba  *phba = vport->phba;
 	struct nvme_fc_port_info nfcp_info;
 	struct nvme_fc_local_port *localport;
 	struct lpfc_nvme_lport *lport;
-	int len, ret = 0;
+	int len;
 
 	/* Initialize this localport instance.  The vport wwn usage ensures
 	 * that NPIV is accounted for.
@@ -2148,8 +2149,12 @@ lpfc_nvme_create_localport(struct lpfc_vport *vport)
 	/* localport is allocated from the stack, but the registration
 	 * call allocates heap memory as well as the private area.
 	 */
+#ifdef CONFIG_LPFC_NVME_INITIATOR
 	ret = nvme_fc_register_localport(&nfcp_info, &lpfc_nvme_template,
 					 &vport->phba->pcidev->dev, &localport);
+#else
+	ret = -ENOMEM;
+#endif
 	if (!ret) {
 		lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME | LOG_NVME_DISC,
 				 "6005 Successfully registered local "
@@ -2185,6 +2190,7 @@ lpfc_nvme_create_localport(struct lpfc_vport *vport)
 void
 lpfc_nvme_destroy_localport(struct lpfc_vport *vport)
 {
+#ifdef CONFIG_LPFC_NVME_INITIATOR
 	struct nvme_fc_local_port *localport;
 	struct lpfc_nvme_lport *lport;
 	struct lpfc_nvme_rport *rport = NULL, *rport_next = NULL;
@@ -2200,7 +2206,6 @@ lpfc_nvme_destroy_localport(struct lpfc_vport *vport)
 	lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME,
 			 "6011 Destroying NVME localport %p\n",
 			 localport);
-
 	list_for_each_entry_safe(rport, rport_next, &lport->rport_list, list) {
 		/* The last node ref has to get released now before the rport
 		 * private memory area is released by the transport.
@@ -2214,6 +2219,7 @@ lpfc_nvme_destroy_localport(struct lpfc_vport *vport)
 					 "6008 rport fail destroy %x\n", ret);
 		wait_for_completion_timeout(&rport->rport_unreg_done, 5);
 	}
+
 	/* lport's rport list is clear.  Unregister
 	 * lport and release resources.
 	 */
@@ -2237,6 +2243,7 @@ lpfc_nvme_destroy_localport(struct lpfc_vport *vport)
 				 "Failed, status x%x\n",
 				 ret);
 	}
+#endif
 }
 
 void
@@ -2267,6 +2274,7 @@ lpfc_nvme_update_localport(struct lpfc_vport *vport)
 int
 lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 {
+#ifdef CONFIG_LPFC_NVME_INITIATOR
 	int ret = 0;
 	struct nvme_fc_local_port *localport;
 	struct lpfc_nvme_lport *lport;
@@ -2340,7 +2348,6 @@ lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 			rpinfo.port_role |= FC_PORT_ROLE_NVME_INITIATOR;
 		rpinfo.port_name = wwn_to_u64(ndlp->nlp_portname.u.wwn);
 		rpinfo.node_name = wwn_to_u64(ndlp->nlp_nodename.u.wwn);
-
 		ret = nvme_fc_register_remoteport(localport, &rpinfo,
 						  &remote_port);
 		if (!ret) {
@@ -2376,6 +2383,9 @@ lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 				 ndlp->nlp_type, ndlp->nlp_DID, ndlp);
 	}
 	return ret;
+#else
+	return 0;
+#endif
 }
 
 /* lpfc_nvme_unregister_port - unbind the DID and port_role from this rport.
@@ -2393,6 +2403,7 @@ lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 void
 lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 {
+#ifdef CONFIG_LPFC_NVME_INITIATOR
 	int ret;
 	struct nvme_fc_local_port *localport;
 	struct lpfc_nvme_lport *lport;
@@ -2450,6 +2461,7 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 	return;
 
  input_err:
+#endif
 	lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC,
 			 "6168: State error: lport %p, rport%p FCID x%06x\n",
 			 vport->localport, ndlp->rport, ndlp->nlp_DID);
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c
index 48ce9858bc115d..a69ca6ea1d6c30 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.c
+++ b/drivers/scsi/lpfc/lpfc_nvmet.c
@@ -671,9 +671,13 @@ lpfc_nvmet_create_targetport(struct lpfc_hba *phba)
 	lpfc_tgttemplate.target_features = NVMET_FCTGTFEAT_READDATA_RSP |
 					   NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED;
 
+#ifdef CONFIG_LPFC_NVME_TARGET
 	error = nvmet_fc_register_targetport(&pinfo, &lpfc_tgttemplate,
 					     &phba->pcidev->dev,
 					     &phba->targetport);
+#else
+	error = -ENOMEM;
+#endif
 	if (error) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_DISC,
 				"6025 Cannot register NVME targetport "
@@ -752,6 +756,7 @@ lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba,
 void
 lpfc_nvmet_destroy_targetport(struct lpfc_hba *phba)
 {
+#ifdef CONFIG_LPFC_NVME_TARGET
 	struct lpfc_nvmet_tgtport *tgtp;
 
 	if (phba->nvmet_support == 0)
@@ -763,6 +768,7 @@ lpfc_nvmet_destroy_targetport(struct lpfc_hba *phba)
 		wait_for_completion_timeout(&tgtp->tport_unreg_done, 5);
 	}
 	phba->targetport = NULL;
+#endif
 }
 
 /**
@@ -782,6 +788,7 @@ static void
 lpfc_nvmet_unsol_ls_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 			   struct hbq_dmabuf *nvmebuf)
 {
+#ifdef CONFIG_LPFC_NVME_TARGET
 	struct lpfc_nvmet_tgtport *tgtp;
 	struct fc_frame_header *fc_hdr;
 	struct lpfc_nvmet_rcv_ctx *ctxp;
@@ -862,6 +869,7 @@ lpfc_nvmet_unsol_ls_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 
 	atomic_inc(&tgtp->xmt_ls_abort);
 	lpfc_nvmet_unsol_ls_issue_abort(phba, ctxp, sid, oxid);
+#endif
 }
 
 /**
@@ -883,6 +891,7 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
 			    struct rqb_dmabuf *nvmebuf,
 			    uint64_t isr_timestamp)
 {
+#ifdef CONFIG_LPFC_NVME_TARGET
 	struct lpfc_nvmet_rcv_ctx *ctxp;
 	struct lpfc_nvmet_tgtport *tgtp;
 	struct fc_frame_header *fc_hdr;
@@ -988,6 +997,7 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
 		/* We assume a rcv'ed cmd ALWAYs fits into 1 buffer */
 		lpfc_nvmet_rq_post(phba, NULL, &nvmebuf->hbuf);
 	}
+#endif
 }
 
 /**

From 43140ca68d1a071ddbe92f10a3256e01701ae390 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sat, 4 Mar 2017 09:30:34 -0800
Subject: [PATCH 0843/1911] scsi: lpfc: Rename LPFC_MAX_EQ_DELAY to
 LPFC_MAX_EQ_DELAY_EQID_CNT

Without apriori understanding of what the define is, the name gives
a very different impression of what it is (a max delay value
for an EQ).  Rename the define so it reflects what it is: the number
of EQ IDs that can be set in one instance of the MODIFY_EQ_DELAY
mbx command.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_attr.c | 3 ++-
 drivers/scsi/lpfc/lpfc_hw4.h  | 4 ++--
 drivers/scsi/lpfc/lpfc_init.c | 7 ++-----
 drivers/scsi/lpfc/lpfc_sli.c  | 5 ++++-
 4 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index b741dcb8ee64ab..fbd3a563be53f5 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -4457,7 +4457,8 @@ lpfc_fcp_imax_store(struct device *dev, struct device_attribute *attr,
 		return -EINVAL;
 
 	phba->cfg_fcp_imax = (uint32_t)val;
-	for (i = 0; i < phba->io_channel_irqs; i++)
+
+	for (i = 0; i < phba->io_channel_irqs; i += LPFC_MAX_EQ_DELAY_EQID_CNT)
 		lpfc_modify_hba_eq_delay(phba, i);
 
 	return strlen(buf);
diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h
index cfdb068a3bfccb..15277705cb6b8c 100644
--- a/drivers/scsi/lpfc/lpfc_hw4.h
+++ b/drivers/scsi/lpfc/lpfc_hw4.h
@@ -1001,7 +1001,7 @@ struct eq_delay_info {
 	uint32_t phase;
 	uint32_t delay_multi;
 };
-#define	LPFC_MAX_EQ_DELAY	8
+#define	LPFC_MAX_EQ_DELAY_EQID_CNT	8
 
 struct sgl_page_pairs {
 	uint32_t sgl_pg0_addr_lo;
@@ -1070,7 +1070,7 @@ struct lpfc_mbx_modify_eq_delay {
 	union {
 		struct {
 			uint32_t num_eq;
-			struct eq_delay_info eq[LPFC_MAX_EQ_DELAY];
+			struct eq_delay_info eq[LPFC_MAX_EQ_DELAY_EQID_CNT];
 		} request;
 		struct {
 			uint32_t word0;
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 4fa21a9fd883c6..c883110178ea7f 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -8756,12 +8756,9 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba)
 		}
 	}
 
-	/*
-	 * Configure EQ delay multipier for interrupt coalescing using
-	 * MODIFY_EQ_DELAY for all EQs created, LPFC_MAX_EQ_DELAY at a time.
-	 */
-	for (qidx = 0; qidx < io_channel; qidx += LPFC_MAX_EQ_DELAY)
+	for (qidx = 0; qidx < io_channel; qidx += LPFC_MAX_EQ_DELAY_EQID_CNT)
 		lpfc_modify_hba_eq_delay(phba, qidx);
+
 	return 0;
 
 out_destroy:
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 8a606d0d2c795d..618cdacf13ddc1 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -1,3 +1,4 @@
+
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
@@ -13874,6 +13875,8 @@ lpfc_dual_chute_pci_bar_map(struct lpfc_hba *phba, uint16_t pci_barset)
  * @startq: The starting FCP EQ to modify
  *
  * This function sends an MODIFY_EQ_DELAY mailbox command to the HBA.
+ * The command allows up to LPFC_MAX_EQ_DELAY_EQID_CNT EQ ID's to be
+ * updated in one mailbox command.
  *
  * The @phba struct is used to send mailbox command to HBA. The @startq
  * is used to get the starting FCP EQ to change.
@@ -13926,7 +13929,7 @@ lpfc_modify_hba_eq_delay(struct lpfc_hba *phba, uint32_t startq)
 		eq_delay->u.request.eq[cnt].phase = 0;
 		eq_delay->u.request.eq[cnt].delay_multi = dmult;
 		cnt++;
-		if (cnt >= LPFC_MAX_EQ_DELAY)
+		if (cnt >= LPFC_MAX_EQ_DELAY_EQID_CNT)
 			break;
 	}
 	eq_delay->u.request.num_eq = cnt;

From da6b044a28fe603fe2c3fd908cda8150aa0abe74 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sat, 4 Mar 2017 09:30:35 -0800
Subject: [PATCH 0844/1911] scsi: lpfc: correct double print

Correct a merge error that had debug data printed twice for the
same element

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_debugfs.c | 22 ----------------------
 1 file changed, 22 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index 9f4798e9d9380d..913eed822cb8ed 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -3653,17 +3653,6 @@ lpfc_idiag_queacc_write(struct file *file, const char __user *buf,
 			idiag.ptr_private = phba->sli4_hba.nvmels_cq;
 			goto pass_check;
 		}
-		/* NVME LS complete queue */
-		if (phba->sli4_hba.nvmels_cq &&
-		    phba->sli4_hba.nvmels_cq->queue_id == queid) {
-			/* Sanity check */
-			rc = lpfc_idiag_que_param_check(
-					phba->sli4_hba.nvmels_cq, index, count);
-			if (rc)
-				goto error_out;
-			idiag.ptr_private = phba->sli4_hba.nvmels_cq;
-			goto pass_check;
-		}
 		/* FCP complete queue */
 		if (phba->sli4_hba.fcp_cq) {
 			for (qidx = 0; qidx < phba->cfg_fcp_io_channel;
@@ -3738,17 +3727,6 @@ lpfc_idiag_queacc_write(struct file *file, const char __user *buf,
 			idiag.ptr_private = phba->sli4_hba.nvmels_wq;
 			goto pass_check;
 		}
-		/* NVME LS work queue */
-		if (phba->sli4_hba.nvmels_wq &&
-		    phba->sli4_hba.nvmels_wq->queue_id == queid) {
-			/* Sanity check */
-			rc = lpfc_idiag_que_param_check(
-					phba->sli4_hba.nvmels_wq, index, count);
-			if (rc)
-				goto error_out;
-			idiag.ptr_private = phba->sli4_hba.nvmels_wq;
-			goto pass_check;
-		}
 		/* FCP work queue */
 		if (phba->sli4_hba.fcp_wq) {
 			for (qidx = 0; qidx < phba->cfg_fcp_io_channel;

From ba3bd6e2a9a2753439a1fd1fe39e8d5162fb3aa9 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sat, 4 Mar 2017 09:30:36 -0800
Subject: [PATCH 0845/1911] scsi: lpfc: remove dead sli3 nvme code

Remove nvme teardown calls that should not be there on sli3 devices

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_init.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index c883110178ea7f..661bd25a404a2f 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -10446,12 +10446,7 @@ lpfc_pci_remove_one_s3(struct pci_dev *pdev)
 	fc_remove_host(shost);
 	scsi_remove_host(shost);
 
-	/* Perform ndlp cleanup on the physical port.  The nvme and nvmet
-	 * localports are destroyed after to cleanup all transport memory.
-	 */
 	lpfc_cleanup(vport);
-	lpfc_nvmet_destroy_targetport(phba);
-	lpfc_nvme_destroy_localport(vport);
 
 	/*
 	 * Bring down the SLI Layer. This step disable all interrupts,

From cd46cdedb3238f1f878c42650ec05c6344c7083d Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sat, 4 Mar 2017 09:30:37 -0800
Subject: [PATCH 0846/1911] scsi: lpfc: correct rdp diag portnames

NVME merge reverted diag port names to the physical port.
They should be the vport.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_els.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index d260a1391baf86..d9c61d030034da 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -5177,15 +5177,15 @@ lpfc_rdp_res_speed(struct fc_rdp_port_speed_desc *desc, struct lpfc_hba *phba)
 
 static uint32_t
 lpfc_rdp_res_diag_port_names(struct fc_rdp_port_name_desc *desc,
-		struct lpfc_hba *phba)
+		struct lpfc_vport *vport)
 {
 
 	desc->tag = cpu_to_be32(RDP_PORT_NAMES_DESC_TAG);
 
-	memcpy(desc->port_names.wwnn, phba->wwnn,
+	memcpy(desc->port_names.wwnn, &vport->fc_nodename,
 			sizeof(desc->port_names.wwnn));
 
-	memcpy(desc->port_names.wwpn, phba->wwpn,
+	memcpy(desc->port_names.wwpn, &vport->fc_portname,
 			sizeof(desc->port_names.wwpn));
 
 	desc->length = cpu_to_be32(sizeof(desc->port_names));
@@ -5279,7 +5279,7 @@ lpfc_els_rdp_cmpl(struct lpfc_hba *phba, struct lpfc_rdp_context *rdp_context,
 	len += lpfc_rdp_res_link_error((struct fc_rdp_link_error_status_desc *)
 				       (len + pcmd), &rdp_context->link_stat);
 	len += lpfc_rdp_res_diag_port_names((struct fc_rdp_port_name_desc *)
-					     (len + pcmd), phba);
+					     (len + pcmd), vport);
 	len += lpfc_rdp_res_attach_port_names((struct fc_rdp_port_name_desc *)
 					(len + pcmd), vport, ndlp);
 	len += lpfc_rdp_res_fec_desc((struct fc_fec_rdp_desc *)(len + pcmd),

From 2ade92ae6d6572858acb2bde6d3664af3ad592e2 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sat, 4 Mar 2017 09:30:38 -0800
Subject: [PATCH 0847/1911] scsi: lpfc: code cleanups in NVME initiator base

This patch addresses the smatch issues identified by Dan Carpenter
in http://www.spinics.net/lists/linux-scsi/msg105663.html

The issues are:

drivers/scsi/lpfc/lpfc_hbadisc.c:316 lpfc_dev_loss_tmo_handler()
warn: we tested 'vport->load_flag & 2' before and it was 'false'

Action: removed item from test

drivers/scsi/lpfc/lpfc_hbadisc.c:701 lpfc_work_done()
warn: test_bit() takes a bit number

Action: changed definition so bit number

drivers/scsi/lpfc/lpfc_hbadisc.c:2206 lpfc_mbx_cmpl_fcf_scan_read_fcf_rec()
error: uninitialized symbol 'vlan_id'.
drivers/scsi/lpfc/lpfc_hbadisc.c:2582 lpfc_mbx_cmpl_fcf_rr_read_fcf_rec()
error: uninitialized symbol 'vlan_id'.
drivers/scsi/lpfc/lpfc_hbadisc.c:2683 lpfc_mbx_cmpl_read_fcf_rec() error:
uninitialized symbol 'vlan_id'.

Action: initilized value

drivers/scsi/lpfc/lpfc_hbadisc.c:4025 lpfc_register_remote_port()
error: we previously assumed 'rdata' could be null (see line 4023)

Action: refactored check block

drivers/scsi/lpfc/lpfc_hbadisc.c:4613 lpfc_sli4_dequeue_nport_iocbs()
error: double unlock 'irq:'

Action: removed inner irq reference

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc.h         |  2 +-
 drivers/scsi/lpfc/lpfc_hbadisc.c | 17 +++++++++--------
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index 763f32dd2d230e..257bbdd0f0b83a 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -105,7 +105,7 @@ struct lpfc_sli2_slim;
 #define LPFC_MSIX_VECTORS	2
 
 /* lpfc wait event data ready flag */
-#define LPFC_DATA_READY		(1<<0)
+#define LPFC_DATA_READY		0	/* bit 0 */
 
 /* queue dump line buffer size */
 #define LPFC_LBUF_SZ		128
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index bd8635d303d241..180b072beef6b0 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -313,8 +313,7 @@ lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *ndlp)
 				 ndlp->nlp_state, ndlp->nlp_rpi);
 	}
 
-	if (!(vport->load_flag & FC_UNLOADING) &&
-	    !(ndlp->nlp_flag & NLP_DELAY_TMO) &&
+	if (!(ndlp->nlp_flag & NLP_DELAY_TMO) &&
 	    !(ndlp->nlp_flag & NLP_NPR_2B_DISC) &&
 	    (ndlp->nlp_state != NLP_STE_UNMAPPED_NODE) &&
 	    (ndlp->nlp_state != NLP_STE_REG_LOGIN_ISSUE) &&
@@ -2175,7 +2174,7 @@ lpfc_mbx_cmpl_fcf_scan_read_fcf_rec(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq)
 	uint32_t boot_flag, addr_mode;
 	uint16_t fcf_index, next_fcf_index;
 	struct lpfc_fcf_rec *fcf_rec = NULL;
-	uint16_t vlan_id;
+	uint16_t vlan_id = LPFC_FCOE_NULL_VID;
 	bool select_new_fcf;
 	int rc;
 
@@ -4022,9 +4021,11 @@ lpfc_register_remote_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 		rdata = rport->dd_data;
 		/* break the link before dropping the ref */
 		ndlp->rport = NULL;
-		if (rdata && rdata->pnode == ndlp)
-			lpfc_nlp_put(ndlp);
-		rdata->pnode = NULL;
+		if (rdata) {
+			if (rdata->pnode == ndlp)
+				lpfc_nlp_put(ndlp);
+			rdata->pnode = NULL;
+		}
 		/* drop reference for earlier registeration */
 		put_device(&rport->dev);
 	}
@@ -4607,9 +4608,9 @@ lpfc_sli4_dequeue_nport_iocbs(struct lpfc_hba *phba,
 		pring = qp->pring;
 		if (!pring)
 			continue;
-		spin_lock_irq(&pring->ring_lock);
+		spin_lock(&pring->ring_lock);
 		__lpfc_dequeue_nport_iocbs(phba, ndlp, pring, dequeue_list);
-		spin_unlock_irq(&pring->ring_lock);
+		spin_unlock(&pring->ring_lock);
 	}
 	spin_unlock_irq(&phba->hbalock);
 }

From b5ccc7d61c76006f839b41c6f15876342b46cb02 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sat, 4 Mar 2017 09:30:39 -0800
Subject: [PATCH 0848/1911] scsi: lpfc: code cleanups in NVME initiator
 discovery

This patch addresses the smatch issues identified by Dan Carpenter
in http://www.spinics.net/lists/linux-scsi/msg105665.html

The issues are:

drivers/scsi/lpfc/lpfc_ct.c:943 lpfc_cmpl_ct_cmd_gft_id()
error: we previously assumed 'ndlp' could be null (see line 928)

Action: moved under if check

drivers/scsi/lpfc/lpfc_nvmet.c:1694 lpfc_nvmet_unsol_issue_abort()
error: we previously assumed 'ndlp' could be null (see line 1690)

Action: conditionalized arg in printf stmt

drivers/scsi/lpfc/lpfc_nvmet.c:1792 lpfc_nvmet_sol_fcp_issue_abort()
error: we previously assumed 'ndlp' could be null (see line 1788)

Action: conditionalized arg in printf stmt

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_ct.c    | 2 +-
 drivers/scsi/lpfc/lpfc_nvmet.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c
index c22bb3f887e15b..d3e9af983015cc 100644
--- a/drivers/scsi/lpfc/lpfc_ct.c
+++ b/drivers/scsi/lpfc/lpfc_ct.c
@@ -939,8 +939,8 @@ lpfc_cmpl_ct_cmd_gft_id(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 					 "FC4 x%08x, Data: x%08x x%08x\n",
 					 ndlp, did, ndlp->nlp_fc4_type,
 					 FC_TYPE_FCP, FC_TYPE_NVME);
+			ndlp->nlp_prev_state = NLP_STE_REG_LOGIN_ISSUE;
 		}
-		ndlp->nlp_prev_state = NLP_STE_REG_LOGIN_ISSUE;
 		lpfc_nlp_set_state(vport, ndlp, NLP_STE_PRLI_ISSUE);
 		lpfc_issue_els_prli(vport, ndlp, 0);
 	} else
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c
index a69ca6ea1d6c30..b7739a554fe005 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.c
+++ b/drivers/scsi/lpfc/lpfc_nvmet.c
@@ -1720,7 +1720,7 @@ lpfc_nvmet_unsol_issue_abort(struct lpfc_hba *phba,
 		atomic_inc(&tgtp->xmt_abort_rsp_error);
 		lpfc_printf_log(phba, KERN_WARNING, LOG_NVME_ABTS,
 				"6134 Drop ABTS - wrong NDLP state x%x.\n",
-				ndlp->nlp_state);
+				(ndlp) ? ndlp->nlp_state : NLP_STE_MAX_STATE);
 
 		/* No failure to an ABTS request. */
 		return 0;
@@ -1818,7 +1818,7 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba,
 		atomic_inc(&tgtp->xmt_abort_rsp_error);
 		lpfc_printf_log(phba, KERN_WARNING, LOG_NVME_ABTS,
 				"6160 Drop ABTS - wrong NDLP state x%x.\n",
-				ndlp->nlp_state);
+				(ndlp) ? ndlp->nlp_state : NLP_STE_MAX_STATE);
 
 		/* No failure to an ABTS request. */
 		return 0;

From eeb810849a20e32aa1b60335e46ea5446ba07e1f Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sat, 4 Mar 2017 09:30:40 -0800
Subject: [PATCH 0849/1911] scsi: lpfc: revise version number to 11.2.0.10

Revise lpfc version number to 11.2.0.10

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_version.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h
index 86c6c9b26b823a..d4e95e28f4e3d5 100644
--- a/drivers/scsi/lpfc/lpfc_version.h
+++ b/drivers/scsi/lpfc/lpfc_version.h
@@ -20,7 +20,7 @@
  * included with this package.                                     *
  *******************************************************************/
 
-#define LPFC_DRIVER_VERSION "11.2.0.7"
+#define LPFC_DRIVER_VERSION "11.2.0.10"
 #define LPFC_DRIVER_NAME		"lpfc"
 
 /* Used for SLI 2/3 */

From 1fbdbcea80056acfc8c8506594744f5ec52208c1 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Sun, 5 Mar 2017 17:05:57 -0800
Subject: [PATCH 0850/1911] avr32: Fix build error caused by include file
 reshuffling

Various avr32 builds fail:

  arch/avr32/oprofile/backtrace.c:58: error: dereferencing pointer to incomplete type
  arch/avr32/oprofile/backtrace.c:60: error: implicit declaration of function 'user_mode'

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Acked-by: Hans-Christian Noren Egtvedt <egtvedt@samfundet.no>
Cc: Haavard Skinnemoen <hskinnemoen@gmail.com>
Cc: Hans-Christian Egtvedt <egtvedt@samfundet.no>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert Richter <rric@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: oprofile-list@lists.sf.net
Fixes: f780d89a0e82 ("sched/headers: Remove <asm/ptrace.h> from ...")
Link: http://lkml.kernel.org/r/1488762357-4500-1-git-send-email-linux@roeck-us.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/avr32/oprofile/backtrace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/avr32/oprofile/backtrace.c b/arch/avr32/oprofile/backtrace.c
index 75d9ad6f99cf56..29cf2f191bfd28 100644
--- a/arch/avr32/oprofile/backtrace.c
+++ b/arch/avr32/oprofile/backtrace.c
@@ -14,7 +14,7 @@
  */
 
 #include <linux/oprofile.h>
-#include <linux/sched.h>
+#include <linux/ptrace.h>
 #include <linux/uaccess.h>
 
 /* The first two words of each frame on the stack look like this if we have

From 80aa1a54f054a1c71cc88e0cad6cfa0d20a10f23 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Sun, 5 Mar 2017 10:27:14 -0800
Subject: [PATCH 0851/1911] h8300: Fix build breakage caused by header file
 changes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix the following h8300 build failures:

  arch/h8300/kernel/ptrace_h.c: In function ‘trace_trap’:
  arch/h8300/kernel/ptrace_h.c:253:3: error: implicit declaration of function ‘force_sig’

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Cc: uclinux-h8-devel@lists.sourceforge.jp
Fixes: c3edc4010e9d ("sched/headers: Move task_struct::signal and ...")
Link: http://lkml.kernel.org/r/1488738434-3504-1-git-send-email-linux@roeck-us.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/h8300/kernel/ptrace_h.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/h8300/kernel/ptrace_h.c b/arch/h8300/kernel/ptrace_h.c
index fe3b5673babaa4..f5ff3b794c8512 100644
--- a/arch/h8300/kernel/ptrace_h.c
+++ b/arch/h8300/kernel/ptrace_h.c
@@ -9,7 +9,7 @@
  */
 
 #include <linux/linkage.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <asm/ptrace.h>
 
 #define BREAKINST 0x5730 /* trapa #3 */

From bb35e4515411396219431fa235bf21bf9c2794e9 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Sun, 5 Mar 2017 17:13:31 -0800
Subject: [PATCH 0852/1911] drivers/char/nwbutton: Fix build breakage caused by
 include file reshuffling

Fix:

  drivers/char/nwbutton.c: In function 'button_sequence_finished':
  drivers/char/nwbutton.c:134:3: error: implicit declaration of function 'kill_cad_pid'

The declaration has been moved from one include file to another.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: c3edc4010e9d102 ("sched/headers: Move task_struct::signal and ...")
Link: http://lkml.kernel.org/r/1488762811-9022-1-git-send-email-linux@roeck-us.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 drivers/char/nwbutton.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/char/nwbutton.c b/drivers/char/nwbutton.c
index a5b1eb276c0bf9..e6d0d271c58c83 100644
--- a/drivers/char/nwbutton.c
+++ b/drivers/char/nwbutton.c
@@ -6,7 +6,7 @@
 
 #include <linux/module.h>
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/interrupt.h>
 #include <linux/time.h>
 #include <linux/timer.h>

From 5c51f4ae84df0f9df33ac08aa5be50061a8b4242 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Thu, 2 Mar 2017 16:57:23 -0600
Subject: [PATCH 0853/1911] objtool: Fix another GCC jump table detection issue

Arnd Bergmann reported a (false positive) objtool warning:

  drivers/infiniband/sw/rxe/rxe_resp.o: warning: objtool: rxe_responder()+0xfe: sibling call from callable instruction with changed frame pointer

The issue is in find_switch_table().  It tries to find a switch
statement's jump table by walking backwards from an indirect jump
instruction, looking for a relocation to the .rodata section.  In this
case it stopped walking prematurely: the first .rodata relocation it
encountered was for a variable (resp_state_name) instead of a jump
table, so it just assumed there wasn't a jump table.

The fix is to ignore any .rodata relocation which refers to an ELF
object symbol.  This works because the jump tables are anonymous and
have no symbols associated with them.

Reported-by: Arnd Bergmann <arnd@arndb.de>
Tested-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 3732710ff6f2 ("objtool: Improve rare switch jump table pattern detection")
Link: http://lkml.kernel.org/r/20170302225723.3ndbsnl4hkqbne7a@treble
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/objtool/builtin-check.c | 15 ++++++++++++---
 tools/objtool/elf.c           | 12 ++++++++++++
 tools/objtool/elf.h           |  1 +
 3 files changed, 25 insertions(+), 3 deletions(-)

diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
index 4cfdbb5b696783..066086dd59a801 100644
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -805,11 +805,20 @@ static struct rela *find_switch_table(struct objtool_file *file,
 		     insn->jump_dest->offset > orig_insn->offset))
 		    break;
 
+		/* look for a relocation which references .rodata */
 		text_rela = find_rela_by_dest_range(insn->sec, insn->offset,
 						    insn->len);
-		if (text_rela && text_rela->sym == file->rodata->sym)
-			return find_rela_by_dest(file->rodata,
-						 text_rela->addend);
+		if (!text_rela || text_rela->sym != file->rodata->sym)
+			continue;
+
+		/*
+		 * Make sure the .rodata address isn't associated with a
+		 * symbol.  gcc jump tables are anonymous data.
+		 */
+		if (find_symbol_containing(file->rodata, text_rela->addend))
+			continue;
+
+		return find_rela_by_dest(file->rodata, text_rela->addend);
 	}
 
 	return NULL;
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index 0d7983ac63ef9e..d897702ce74278 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -85,6 +85,18 @@ struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset)
 	return NULL;
 }
 
+struct symbol *find_symbol_containing(struct section *sec, unsigned long offset)
+{
+	struct symbol *sym;
+
+	list_for_each_entry(sym, &sec->symbol_list, list)
+		if (sym->type != STT_SECTION &&
+		    offset >= sym->offset && offset < sym->offset + sym->len)
+			return sym;
+
+	return NULL;
+}
+
 struct rela *find_rela_by_dest_range(struct section *sec, unsigned long offset,
 				     unsigned int len)
 {
diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h
index aa1ff6596684f9..731973e1a3f5eb 100644
--- a/tools/objtool/elf.h
+++ b/tools/objtool/elf.h
@@ -79,6 +79,7 @@ struct elf {
 struct elf *elf_open(const char *name);
 struct section *find_section_by_name(struct elf *elf, const char *name);
 struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset);
+struct symbol *find_symbol_containing(struct section *sec, unsigned long offset);
 struct rela *find_rela_by_dest(struct section *sec, unsigned long offset);
 struct rela *find_rela_by_dest_range(struct section *sec, unsigned long offset,
 				     unsigned int len);

From fa3aa7a54fe6d3abf128f13cd4bbd40eaa48fed2 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Tue, 7 Mar 2017 10:55:34 +0100
Subject: [PATCH 0854/1911] jiffies: Revert bogus conversion of NSEC_PER_SEC to
 TICK_NSEC

commit 93825f2ec736 converted NSEC_PER_SEC to TICK_NSEC because the author
confused NSEC_PER_JIFFY with NSEC_PER_SEC.

As a result, the calculation of refined jiffies got broken, triggering
lockups.

Fixes: 93825f2ec736 ("jiffies: Reuse TICK_NSEC instead of NSEC_PER_JIFFY")
Reported-and-tested-by: Meelis Roos <mroos@linux.ee>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1488880534-3777-1-git-send-email-fweisbec@gmail.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/jiffies.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index 7906b3f0c41a1a..497719127bf9f6 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -125,7 +125,7 @@ int register_refined_jiffies(long cycles_per_second)
 	shift_hz += cycles_per_tick/2;
 	do_div(shift_hz, cycles_per_tick);
 	/* Calculate nsec_per_tick using shift_hz */
-	nsec_per_tick = (u64)TICK_NSEC << 8;
+	nsec_per_tick = (u64)NSEC_PER_SEC << 8;
 	nsec_per_tick += (u32)shift_hz/2;
 	do_div(nsec_per_tick, (u32)shift_hz);
 

From 67430a39ca7a6af28aade5acb92d43ee257c1014 Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Date: Mon, 6 Mar 2017 16:54:33 +0000
Subject: [PATCH 0855/1911] ASoC: wm_adsp: Return an error on write to a
 disabled volatile control

Volatile controls should only be accessed when the firmware is active,
currently however writes to these controls will succeed, but the data
will be lost, if the firmware is powered down. Update this behaviour such
that an error is returned the same as it is for reads.

Signed-off-by: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/wm_adsp.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c
index d151224ffcca41..6313b3da967b94 100644
--- a/sound/soc/codecs/wm_adsp.c
+++ b/sound/soc/codecs/wm_adsp.c
@@ -899,7 +899,10 @@ static int wm_coeff_put(struct snd_kcontrol *kctl,
 
 	mutex_lock(&ctl->dsp->pwr_lock);
 
-	memcpy(ctl->cache, p, ctl->len);
+	if (ctl->flags & WMFW_CTL_FLAG_VOLATILE)
+		ret = -EPERM;
+	else
+		memcpy(ctl->cache, p, ctl->len);
 
 	ctl->set = 1;
 	if (ctl->enabled && ctl->dsp->running)
@@ -926,6 +929,8 @@ static int wm_coeff_tlv_put(struct snd_kcontrol *kctl,
 		ctl->set = 1;
 		if (ctl->enabled && ctl->dsp->running)
 			ret = wm_coeff_write_control(ctl, ctl->cache, size);
+		else if (ctl->flags & WMFW_CTL_FLAG_VOLATILE)
+			ret = -EPERM;
 	}
 
 	mutex_unlock(&ctl->dsp->pwr_lock);

From 7b4af793a7a4f8e04175eb6600ba9c8ba855ad20 Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Date: Mon, 6 Mar 2017 16:54:34 +0000
Subject: [PATCH 0856/1911] ASoC: wm_adsp: Acknowledge controls should also
 check the DSP is running

We should not be writing acknowledge controls until the firmware is
running, as in the case of preloaded firmwares the DSP memory may be
unaccessible to whilst in the preloaded state. This means a write to the
control during this time could be lost.

Signed-off-by: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/wm_adsp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c
index 6313b3da967b94..bbdb72f73df19d 100644
--- a/sound/soc/codecs/wm_adsp.c
+++ b/sound/soc/codecs/wm_adsp.c
@@ -952,7 +952,7 @@ static int wm_coeff_put_acked(struct snd_kcontrol *kctl,
 
 	mutex_lock(&ctl->dsp->pwr_lock);
 
-	if (ctl->enabled)
+	if (ctl->enabled && ctl->dsp->running)
 		ret = wm_coeff_write_acked_control(ctl, val);
 	else
 		ret = -EPERM;

From 68598d2ea886322f9b4b0058e5b288418622de95 Mon Sep 17 00:00:00 2001
From: "Dmitry V. Levin" <ldv@altlinux.org>
Date: Wed, 1 Mar 2017 02:12:50 +0300
Subject: [PATCH 0857/1911] btrfs: remove btrfs_err_str function from
 uapi/linux/btrfs.h

btrfs_err_str function is not called from anywhere and is replicated
in the userspace headers for btrfs-progs.

It's removal also fixes the following linux/btrfs.h userspace
compilation error:

/usr/include/linux/btrfs.h: In function 'btrfs_err_str':
/usr/include/linux/btrfs.h:740:11: error: 'NULL' undeclared (first use in this function)
    return NULL;

Suggested-by: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Dmitry V. Levin <ldv@altlinux.org>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 include/uapi/linux/btrfs.h | 27 ---------------------------
 1 file changed, 27 deletions(-)

diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index db4c253f8011b2..dcfc3a5a9cb1d2 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -713,33 +713,6 @@ enum btrfs_err_code {
 	BTRFS_ERROR_DEV_ONLY_WRITABLE,
 	BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS
 };
-/* An error code to error string mapping for the kernel
-*  error codes
-*/
-static inline char *btrfs_err_str(enum btrfs_err_code err_code)
-{
-	switch (err_code) {
-		case BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET:
-			return "unable to go below two devices on raid1";
-		case BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET:
-			return "unable to go below four devices on raid10";
-		case BTRFS_ERROR_DEV_RAID5_MIN_NOT_MET:
-			return "unable to go below two devices on raid5";
-		case BTRFS_ERROR_DEV_RAID6_MIN_NOT_MET:
-			return "unable to go below three devices on raid6";
-		case BTRFS_ERROR_DEV_TGT_REPLACE:
-			return "unable to remove the dev_replace target dev";
-		case BTRFS_ERROR_DEV_MISSING_NOT_FOUND:
-			return "no missing devices found to remove";
-		case BTRFS_ERROR_DEV_ONLY_WRITABLE:
-			return "unable to remove the only writeable device";
-		case BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS:
-			return "add/delete/balance/replace/resize operation "\
-				"in progress";
-		default:
-			return NULL;
-	}
-}
 
 #define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \
 				   struct btrfs_ioctl_vol_args)

From cda82ace3d4eff6a38f00a65db435fe35a3916f0 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Wed, 21 Dec 2016 11:26:55 +0100
Subject: [PATCH 0858/1911] dt-bindings: arm: update Armada CP110 system
 controller binding
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It turns out that in the CP110 HW block present in Marvell Armada
7K/8K SoCs, gatable clock n°18 not only controls SD/MMC, but also the
GOP block. This commit updates the Device Tree binding for this piece
of hardware accordingly.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 .../bindings/arm/marvell/cp110-system-controller0.txt       | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Documentation/devicetree/bindings/arm/marvell/cp110-system-controller0.txt b/Documentation/devicetree/bindings/arm/marvell/cp110-system-controller0.txt
index 30c546900b6021..07dbb358182ccd 100644
--- a/Documentation/devicetree/bindings/arm/marvell/cp110-system-controller0.txt
+++ b/Documentation/devicetree/bindings/arm/marvell/cp110-system-controller0.txt
@@ -45,7 +45,7 @@ The following clocks are available:
    - 1 15	SATA
    - 1 16	SATA USB
    - 1 17	Main
-   - 1 18	SD/MMC
+   - 1 18	SD/MMC/GOP
    - 1 21	Slow IO (SPI, NOR, BootROM, I2C, UART)
    - 1 22	USB3H0
    - 1 23	USB3H1
@@ -65,7 +65,7 @@ Required properties:
 	"cpm-audio", "cpm-communit", "cpm-nand", "cpm-ppv2", "cpm-sdio",
 	"cpm-mg-domain", "cpm-mg-core", "cpm-xor1", "cpm-xor0", "cpm-gop-dp", "none",
 	"cpm-pcie_x10", "cpm-pcie_x11", "cpm-pcie_x4", "cpm-pcie-xor", "cpm-sata",
-	"cpm-sata-usb", "cpm-main", "cpm-sd-mmc", "none", "none", "cpm-slow-io",
+	"cpm-sata-usb", "cpm-main", "cpm-sd-mmc-gop", "none", "none", "cpm-slow-io",
 	"cpm-usb3h0", "cpm-usb3h1", "cpm-usb3dev", "cpm-eip150", "cpm-eip197";
 
 Example:
@@ -78,6 +78,6 @@ Example:
 		gate-clock-output-names = "cpm-audio", "cpm-communit", "cpm-nand", "cpm-ppv2", "cpm-sdio",
 			"cpm-mg-domain", "cpm-mg-core", "cpm-xor1", "cpm-xor0", "cpm-gop-dp", "none",
 			"cpm-pcie_x10", "cpm-pcie_x11", "cpm-pcie_x4", "cpm-pcie-xor", "cpm-sata",
-			"cpm-sata-usb", "cpm-main", "cpm-sd-mmc", "none", "none", "cpm-slow-io",
+			"cpm-sata-usb", "cpm-main", "cpm-sd-mmc-gop", "none", "none", "cpm-slow-io",
 			"cpm-usb3h0", "cpm-usb3h1", "cpm-usb3dev", "cpm-eip150", "cpm-eip197";
 	};

From 253160a8ad06bcc1c1db16a58b1f06d5128f6c5e Mon Sep 17 00:00:00 2001
From: Leonard Crestez <leonard.crestez@nxp.com>
Date: Mon, 20 Feb 2017 15:20:56 +0200
Subject: [PATCH 0859/1911] clk: core: Copy connection id

Some drivers use sprintf to build clk connection id names but the clk
core will save those strings and occasionally print them back. Duplicate
the con_id strings instead of fixing all the users.

Signed-off-by: Leonard Crestez <leonard.crestez@nxp.com>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/clk/clk.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index 0fb39fe217d17a..67201f67a14af7 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -2502,7 +2502,7 @@ struct clk *__clk_create_clk(struct clk_hw *hw, const char *dev_id,
 
 	clk->core = hw->core;
 	clk->dev_id = dev_id;
-	clk->con_id = con_id;
+	clk->con_id = kstrdup_const(con_id, GFP_KERNEL);
 	clk->max_rate = ULONG_MAX;
 
 	clk_prepare_lock();
@@ -2518,6 +2518,7 @@ void __clk_free_clk(struct clk *clk)
 	hlist_del(&clk->clks_node);
 	clk_prepare_unlock();
 
+	kfree_const(clk->con_id);
 	kfree(clk);
 }
 

From 9afd30dbc82a9dbea4101aba57beb2a2a7e1b8d5 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Tue, 28 Feb 2017 18:53:53 +0100
Subject: [PATCH 0860/1911] libceph: fix crush_decode() for older maps

Older (shorter) CRUSH maps too need to be finalized.

Fixes: 66a0e2d579db ("crush: remove mutable part of CRUSH map")
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 net/ceph/osdmap.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 6824c0ec8373e7..cc22dd282a3e0b 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -390,9 +390,8 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
 	dout("crush decode tunable chooseleaf_stable = %d\n",
 	     c->chooseleaf_stable);
 
-	crush_finalize(c);
-
 done:
+	crush_finalize(c);
 	dout("crush_decode success\n");
 	return c;
 

From b581a5854eee4b7851dedb0f8c2ceb54fb902c06 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Wed, 1 Mar 2017 17:33:27 +0100
Subject: [PATCH 0861/1911] libceph: don't set weight to IN when OSD is
 destroyed

Since ceph.git commit 4e28f9e63644 ("osd/OSDMap: clear osd_info,
osd_xinfo on osd deletion"), weight is set to IN when OSD is deleted.
This changes the result of applying an incremental for clients, not
just OSDs.  Because CRUSH computations are obviously affected,
pre-4e28f9e63644 servers disagree with post-4e28f9e63644 clients on
object placement, resulting in misdirected requests.

Mirrors ceph.git commit a6009d1039a55e2c77f431662b3d6cc5a8e8e63f.

Fixes: 930c53286977 ("libceph: apply new_state before new_up_client on incrementals")
Link: http://tracker.ceph.com/issues/19122
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Sage Weil <sage@redhat.com>
---
 net/ceph/osdmap.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index cc22dd282a3e0b..ffe9e904d4d1d1 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -1379,7 +1379,6 @@ static int decode_new_up_state_weight(void **p, void *end,
 		if ((map->osd_state[osd] & CEPH_OSD_EXISTS) &&
 		    (xorstate & CEPH_OSD_EXISTS)) {
 			pr_info("osd%d does not exist\n", osd);
-			map->osd_weight[osd] = CEPH_OSD_IN;
 			ret = set_primary_affinity(map, osd,
 						   CEPH_OSD_DEFAULT_PRIMARY_AFFINITY);
 			if (ret)

From 8767b293a4ab6632f9288f34bcf2ab9ba20dca3a Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Thu, 2 Mar 2017 19:56:57 +0100
Subject: [PATCH 0862/1911] rbd: supported_features bus attribute

... so that userspace can generate meaningful error messages and spell
out unsupported features that need to be disabled.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Sage Weil <sage@redhat.com>
---
 drivers/block/rbd.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 4d680772379828..517838b659646d 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -120,10 +120,11 @@ static int atomic_dec_return_safe(atomic_t *v)
 
 /* Feature bits */
 
-#define RBD_FEATURE_LAYERING	(1<<0)
-#define RBD_FEATURE_STRIPINGV2	(1<<1)
-#define RBD_FEATURE_EXCLUSIVE_LOCK (1<<2)
-#define RBD_FEATURE_DATA_POOL (1<<7)
+#define RBD_FEATURE_LAYERING		(1ULL<<0)
+#define RBD_FEATURE_STRIPINGV2		(1ULL<<1)
+#define RBD_FEATURE_EXCLUSIVE_LOCK	(1ULL<<2)
+#define RBD_FEATURE_DATA_POOL		(1ULL<<7)
+
 #define RBD_FEATURES_ALL	(RBD_FEATURE_LAYERING |		\
 				 RBD_FEATURE_STRIPINGV2 |	\
 				 RBD_FEATURE_EXCLUSIVE_LOCK |	\
@@ -499,16 +500,23 @@ static bool rbd_is_lock_owner(struct rbd_device *rbd_dev)
 	return is_lock_owner;
 }
 
+static ssize_t rbd_supported_features_show(struct bus_type *bus, char *buf)
+{
+	return sprintf(buf, "0x%llx\n", RBD_FEATURES_SUPPORTED);
+}
+
 static BUS_ATTR(add, S_IWUSR, NULL, rbd_add);
 static BUS_ATTR(remove, S_IWUSR, NULL, rbd_remove);
 static BUS_ATTR(add_single_major, S_IWUSR, NULL, rbd_add_single_major);
 static BUS_ATTR(remove_single_major, S_IWUSR, NULL, rbd_remove_single_major);
+static BUS_ATTR(supported_features, S_IRUGO, rbd_supported_features_show, NULL);
 
 static struct attribute *rbd_bus_attrs[] = {
 	&bus_attr_add.attr,
 	&bus_attr_remove.attr,
 	&bus_attr_add_single_major.attr,
 	&bus_attr_remove_single_major.attr,
+	&bus_attr_supported_features.attr,
 	NULL,
 };
 

From 7cc5e38f2f0b0b58a22a4c18a56348dd99a71270 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Sun, 12 Feb 2017 17:11:07 +0100
Subject: [PATCH 0863/1911] libceph: osd_request_timeout option

osd_request_timeout specifies how many seconds to wait for a response
from OSDs before returning -ETIMEDOUT from an OSD request.  0 (default)
means no limit.

osd_request_timeout is osdkeepalive-precise -- in-flight requests are
swept through every osdkeepalive seconds.  With ack vs commit behaviour
gone, abort_request() is really simple.

This is based on a patch from Artur Molchanov <artur.molchanov@synesis.ru>.

Tested-by: Artur Molchanov <artur.molchanov@synesis.ru>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Sage Weil <sage@redhat.com>
---
 include/linux/ceph/libceph.h    |  2 ++
 include/linux/ceph/osd_client.h |  1 +
 net/ceph/ceph_common.c          | 15 ++++++++++++++
 net/ceph/osd_client.c           | 36 ++++++++++++++++++++++++++++++++-
 4 files changed, 53 insertions(+), 1 deletion(-)

diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index 1816c5e2658171..88cd5dc8e238a2 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -48,6 +48,7 @@ struct ceph_options {
 	unsigned long mount_timeout;		/* jiffies */
 	unsigned long osd_idle_ttl;		/* jiffies */
 	unsigned long osd_keepalive_timeout;	/* jiffies */
+	unsigned long osd_request_timeout;	/* jiffies */
 
 	/*
 	 * any type that can't be simply compared or doesn't need need
@@ -68,6 +69,7 @@ struct ceph_options {
 #define CEPH_MOUNT_TIMEOUT_DEFAULT	msecs_to_jiffies(60 * 1000)
 #define CEPH_OSD_KEEPALIVE_DEFAULT	msecs_to_jiffies(5 * 1000)
 #define CEPH_OSD_IDLE_TTL_DEFAULT	msecs_to_jiffies(60 * 1000)
+#define CEPH_OSD_REQUEST_TIMEOUT_DEFAULT 0  /* no timeout */
 
 #define CEPH_MONC_HUNT_INTERVAL		msecs_to_jiffies(3 * 1000)
 #define CEPH_MONC_PING_INTERVAL		msecs_to_jiffies(10 * 1000)
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 2ea0c282f3dc93..c125b5d9e13ced 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -189,6 +189,7 @@ struct ceph_osd_request {
 
 	/* internal */
 	unsigned long r_stamp;                /* jiffies, send or check time */
+	unsigned long r_start_stamp;          /* jiffies */
 	int r_attempts;
 	struct ceph_eversion r_replay_version; /* aka reassert_version */
 	u32 r_last_force_resend;
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 464e88599b9d29..108533859a5329 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -230,6 +230,7 @@ enum {
 	Opt_osdkeepalivetimeout,
 	Opt_mount_timeout,
 	Opt_osd_idle_ttl,
+	Opt_osd_request_timeout,
 	Opt_last_int,
 	/* int args above */
 	Opt_fsid,
@@ -256,6 +257,7 @@ static match_table_t opt_tokens = {
 	{Opt_osdkeepalivetimeout, "osdkeepalive=%d"},
 	{Opt_mount_timeout, "mount_timeout=%d"},
 	{Opt_osd_idle_ttl, "osd_idle_ttl=%d"},
+	{Opt_osd_request_timeout, "osd_request_timeout=%d"},
 	/* int args above */
 	{Opt_fsid, "fsid=%s"},
 	{Opt_name, "name=%s"},
@@ -361,6 +363,7 @@ ceph_parse_options(char *options, const char *dev_name,
 	opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT;
 	opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT;
 	opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT;
+	opt->osd_request_timeout = CEPH_OSD_REQUEST_TIMEOUT_DEFAULT;
 
 	/* get mon ip(s) */
 	/* ip1[:port1][,ip2[:port2]...] */
@@ -473,6 +476,15 @@ ceph_parse_options(char *options, const char *dev_name,
 			}
 			opt->mount_timeout = msecs_to_jiffies(intval * 1000);
 			break;
+		case Opt_osd_request_timeout:
+			/* 0 is "wait forever" (i.e. infinite timeout) */
+			if (intval < 0 || intval > INT_MAX / 1000) {
+				pr_err("osd_request_timeout out of range\n");
+				err = -EINVAL;
+				goto out;
+			}
+			opt->osd_request_timeout = msecs_to_jiffies(intval * 1000);
+			break;
 
 		case Opt_share:
 			opt->flags &= ~CEPH_OPT_NOSHARE;
@@ -557,6 +569,9 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client)
 	if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT)
 		seq_printf(m, "osdkeepalivetimeout=%d,",
 		    jiffies_to_msecs(opt->osd_keepalive_timeout) / 1000);
+	if (opt->osd_request_timeout != CEPH_OSD_REQUEST_TIMEOUT_DEFAULT)
+		seq_printf(m, "osd_request_timeout=%d,",
+			   jiffies_to_msecs(opt->osd_request_timeout) / 1000);
 
 	/* drop redundant comma */
 	if (m->count != pos)
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index b65bbf9f45ebb2..e15ea9e4c4955f 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1709,6 +1709,8 @@ static void account_request(struct ceph_osd_request *req)
 
 	req->r_flags |= CEPH_OSD_FLAG_ONDISK;
 	atomic_inc(&req->r_osdc->num_requests);
+
+	req->r_start_stamp = jiffies;
 }
 
 static void submit_request(struct ceph_osd_request *req, bool wrlocked)
@@ -1789,6 +1791,14 @@ static void cancel_request(struct ceph_osd_request *req)
 	ceph_osdc_put_request(req);
 }
 
+static void abort_request(struct ceph_osd_request *req, int err)
+{
+	dout("%s req %p tid %llu err %d\n", __func__, req, req->r_tid, err);
+
+	cancel_map_check(req);
+	complete_request(req, err);
+}
+
 static void check_pool_dne(struct ceph_osd_request *req)
 {
 	struct ceph_osd_client *osdc = req->r_osdc;
@@ -2487,6 +2497,7 @@ static void handle_timeout(struct work_struct *work)
 		container_of(work, struct ceph_osd_client, timeout_work.work);
 	struct ceph_options *opts = osdc->client->options;
 	unsigned long cutoff = jiffies - opts->osd_keepalive_timeout;
+	unsigned long expiry_cutoff = jiffies - opts->osd_request_timeout;
 	LIST_HEAD(slow_osds);
 	struct rb_node *n, *p;
 
@@ -2502,15 +2513,23 @@ static void handle_timeout(struct work_struct *work)
 		struct ceph_osd *osd = rb_entry(n, struct ceph_osd, o_node);
 		bool found = false;
 
-		for (p = rb_first(&osd->o_requests); p; p = rb_next(p)) {
+		for (p = rb_first(&osd->o_requests); p; ) {
 			struct ceph_osd_request *req =
 			    rb_entry(p, struct ceph_osd_request, r_node);
 
+			p = rb_next(p); /* abort_request() */
+
 			if (time_before(req->r_stamp, cutoff)) {
 				dout(" req %p tid %llu on osd%d is laggy\n",
 				     req, req->r_tid, osd->o_osd);
 				found = true;
 			}
+			if (opts->osd_request_timeout &&
+			    time_before(req->r_start_stamp, expiry_cutoff)) {
+				pr_err_ratelimited("tid %llu on osd%d timeout\n",
+				       req->r_tid, osd->o_osd);
+				abort_request(req, -ETIMEDOUT);
+			}
 		}
 		for (p = rb_first(&osd->o_linger_requests); p; p = rb_next(p)) {
 			struct ceph_osd_linger_request *lreq =
@@ -2530,6 +2549,21 @@ static void handle_timeout(struct work_struct *work)
 			list_move_tail(&osd->o_keepalive_item, &slow_osds);
 	}
 
+	if (opts->osd_request_timeout) {
+		for (p = rb_first(&osdc->homeless_osd.o_requests); p; ) {
+			struct ceph_osd_request *req =
+			    rb_entry(p, struct ceph_osd_request, r_node);
+
+			p = rb_next(p); /* abort_request() */
+
+			if (time_before(req->r_start_stamp, expiry_cutoff)) {
+				pr_err_ratelimited("tid %llu on osd%d timeout\n",
+				       req->r_tid, osdc->homeless_osd.o_osd);
+				abort_request(req, -ETIMEDOUT);
+			}
+		}
+	}
+
 	if (atomic_read(&osdc->num_homeless) || !list_empty(&slow_osds))
 		maybe_request_map(osdc);
 

From 9b1b23f03abdd25ffde8bbfe5824b89bc0448c28 Mon Sep 17 00:00:00 2001
From: Heiko Stuebner <heiko@sntech.de>
Date: Wed, 1 Mar 2017 22:00:41 +0100
Subject: [PATCH 0864/1911] clk: rockchip: add "," to
 mux_pll_src_apll_dpll_gpll_usb480m_p on rk3036

The mux_pll_src_apll_dpll_gpll_usb480m_p parent list was missing a ","
between the 3rd and 4th parent names, making them fall together and thus
lookups fail. Fix that.

Fixes: 5190c08b2989 ("clk: rockchip: add clock controller for rk3036")
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/clk/rockchip/clk-rk3036.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clk/rockchip/clk-rk3036.c b/drivers/clk/rockchip/clk-rk3036.c
index 924f560dcf80e8..dcde70f4c1056f 100644
--- a/drivers/clk/rockchip/clk-rk3036.c
+++ b/drivers/clk/rockchip/clk-rk3036.c
@@ -127,7 +127,7 @@ PNAME(mux_ddrphy_p)		= { "dpll_ddr", "gpll_ddr" };
 PNAME(mux_pll_src_3plls_p)	= { "apll", "dpll", "gpll" };
 PNAME(mux_timer_p)		= { "xin24m", "pclk_peri_src" };
 
-PNAME(mux_pll_src_apll_dpll_gpll_usb480m_p)	= { "apll", "dpll", "gpll" "usb480m" };
+PNAME(mux_pll_src_apll_dpll_gpll_usb480m_p)	= { "apll", "dpll", "gpll", "usb480m" };
 
 PNAME(mux_mmc_src_p)	= { "apll", "dpll", "gpll", "xin24m" };
 PNAME(mux_i2s_pre_p)	= { "i2s_src", "i2s_frac", "ext_i2s", "xin12m" };

From f8ba2d68e54fbca340ad0fce97397291ba9637bc Mon Sep 17 00:00:00 2001
From: Heiko Stuebner <heiko@sntech.de>
Date: Wed, 1 Mar 2017 22:00:42 +0100
Subject: [PATCH 0865/1911] clk: rockchip: Make uartpll a child of the gpll on
 rk3036

The shared uart-pll is on boot a child of the apll that can get changed
by cpu frequency scaling. So move it away to the more stable gpll to
make sure the uart doesn't break on cpu frequency changes.

This turned up during the 4.11 merge-window when commit
6a171b299379 ("serial: 8250_dw: Allow hardware flow control to be used")
added general termios enablement making the uart on rk3036 change
frequency and thus making it susceptible for the frequency scaling issue.

Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/clk/rockchip/clk-rk3036.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/clk/rockchip/clk-rk3036.c b/drivers/clk/rockchip/clk-rk3036.c
index dcde70f4c1056f..00d4150e33c374 100644
--- a/drivers/clk/rockchip/clk-rk3036.c
+++ b/drivers/clk/rockchip/clk-rk3036.c
@@ -450,6 +450,13 @@ static void __init rk3036_clk_init(struct device_node *np)
 		return;
 	}
 
+	/*
+	 * Make uart_pll_clk a child of the gpll, as all other sources are
+	 * not that usable / stable.
+	 */
+	writel_relaxed(HIWORD_UPDATE(0x2, 0x3, 10),
+		       reg_base + RK2928_CLKSEL_CON(13));
+
 	ctx = rockchip_clk_init(np, reg_base, CLK_NR_CLKS);
 	if (IS_ERR(ctx)) {
 		pr_err("%s: rockchip clk init failed\n", __func__);

From d1a6fe41d3c4ff0d26f0b186d774493555ca5282 Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <takashi.sakamoto@miraclelinux.com>
Date: Fri, 24 Feb 2017 11:48:41 +0900
Subject: [PATCH 0866/1911] ASoC: Intel: Skylake: fix invalid memory access due
 to wrong reference of pointer

In 'skl_tplg_set_module_init_data()', a pointer to 'params' member of
'struct skl_algo_data' is calculated, then casted to (u32 *) and assigned
to a member of configuration data. The configuration data is passed to the
other functions and used to process intel IPC. In this processing, the
value of member is used to get message data, however this can bring invalid
memory access in 'skl_set_module_params()' as a result of calculation of
a pointer for actual message data.

(sound/soc/intel/skylake/skl-topology.c)
skl_tplg_init_pipe_modules()
->skl_tplg_set_module_init_data() (has this bug)
->skl_tplg_set_module_params()
  (sound/soc/intel/skylake/skl-messages.c)
  ->skl_set_module_params()
    ((char *)param) + data_offset

This commit fixes the bug.

Fixes: abb740033b56 ("ASoC: Intel: Skylake: Add support to configure module params")
Signed-off-by: Takashi Sakamoto <takashi.sakamoto@miraclelinux.com>
Acked-by: Vinod Koul <vinod.koul@intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Cc: <stable@vger.kernel.org> # v4.5+
---
 sound/soc/intel/skylake/skl-topology.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/soc/intel/skylake/skl-topology.c b/sound/soc/intel/skylake/skl-topology.c
index ed58b5b3555a86..2dbfb1b24ef4a6 100644
--- a/sound/soc/intel/skylake/skl-topology.c
+++ b/sound/soc/intel/skylake/skl-topology.c
@@ -512,7 +512,7 @@ static int skl_tplg_set_module_init_data(struct snd_soc_dapm_widget *w)
 			if (bc->set_params != SKL_PARAM_INIT)
 				continue;
 
-			mconfig->formats_config.caps = (u32 *)&bc->params;
+			mconfig->formats_config.caps = (u32 *)bc->params;
 			mconfig->formats_config.caps_size = bc->size;
 
 			break;

From cd3ac9affc43b44f49d7af70d275f0bd426ba643 Mon Sep 17 00:00:00 2001
From: Songjun Wu <songjun.wu@microchip.com>
Date: Fri, 24 Feb 2017 15:10:43 +0800
Subject: [PATCH 0867/1911] ASoC: atmel-classd: fix audio clock rate

Fix the audio clock rate according to the datasheet.

Reported-by: Dushara Jayasinghe <dushara@successful.com.au>
Signed-off-by: Songjun Wu <songjun.wu@microchip.com>
Acked-by: Nicolas Ferre <nicolas.ferre@microchip.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Cc: stable@vger.kernel.org
---
 sound/soc/atmel/atmel-classd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/soc/atmel/atmel-classd.c b/sound/soc/atmel/atmel-classd.c
index 89ac5f5a93eb31..7ae46c2647d453 100644
--- a/sound/soc/atmel/atmel-classd.c
+++ b/sound/soc/atmel/atmel-classd.c
@@ -349,7 +349,7 @@ static int atmel_classd_codec_dai_digital_mute(struct snd_soc_dai *codec_dai,
 }
 
 #define CLASSD_ACLK_RATE_11M2896_MPY_8 (112896 * 100 * 8)
-#define CLASSD_ACLK_RATE_12M288_MPY_8  (12228 * 1000 * 8)
+#define CLASSD_ACLK_RATE_12M288_MPY_8  (12288 * 1000 * 8)
 
 static struct {
 	int rate;

From 90922a2d03d84de36bf8a9979d62580102f31a92 Mon Sep 17 00:00:00 2001
From: Shanker Donthineni <shankerd@codeaurora.org>
Date: Tue, 7 Mar 2017 08:20:38 -0600
Subject: [PATCH 0868/1911] irqchip/gicv3-its: Add workaround for QDF2400 ITS
 erratum 0065

On Qualcomm Datacenter Technologies QDF2400 SoCs, the ITS hardware
implementation uses 16Bytes for Interrupt Translation Entry (ITE),
but reports an incorrect value of 8Bytes in GITS_TYPER.ITTE_size.

It might cause kernel memory corruption depending on the number
of MSI(x) that are configured and the amount of memory that has
been allocated for ITEs in its_create_device().

This patch fixes the potential memory corruption by setting the
correct ITE size to 16Bytes.

Cc: stable@vger.kernel.org
Signed-off-by: Shanker Donthineni <shankerd@codeaurora.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 Documentation/arm64/silicon-errata.txt |  1 +
 arch/arm64/Kconfig                     | 10 ++++++++++
 drivers/irqchip/irq-gic-v3-its.c       | 16 ++++++++++++++++
 3 files changed, 27 insertions(+)

diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt
index a71b8095dbd8df..2f66683500b8e4 100644
--- a/Documentation/arm64/silicon-errata.txt
+++ b/Documentation/arm64/silicon-errata.txt
@@ -68,3 +68,4 @@ stable kernels.
 |                |                 |                 |                             |
 | Qualcomm Tech. | Falkor v1       | E1003           | QCOM_FALKOR_ERRATUM_1003    |
 | Qualcomm Tech. | Falkor v1       | E1009           | QCOM_FALKOR_ERRATUM_1009    |
+| Qualcomm Tech. | QDF2400 ITS     | E0065           | QCOM_QDF2400_ERRATUM_0065   |
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index a39029b5414eb2..8c7c244247b6b3 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -508,6 +508,16 @@ config QCOM_FALKOR_ERRATUM_1009
 
 	  If unsure, say Y.
 
+config QCOM_QDF2400_ERRATUM_0065
+	bool "QDF2400 E0065: Incorrect GITS_TYPER.ITT_Entry_size"
+	default y
+	help
+	  On Qualcomm Datacenter Technologies QDF2400 SoC, ITS hardware reports
+	  ITE size incorrectly. The GITS_TYPER.ITT_Entry_size field should have
+	  been indicated as 16Bytes (0xf), not 8Bytes (0x7).
+
+	  If unsure, say Y.
+
 endmenu
 
 
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 23201004fd7a68..f77f840d2b5f79 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -1601,6 +1601,14 @@ static void __maybe_unused its_enable_quirk_cavium_23144(void *data)
 	its->flags |= ITS_FLAGS_WORKAROUND_CAVIUM_23144;
 }
 
+static void __maybe_unused its_enable_quirk_qdf2400_e0065(void *data)
+{
+	struct its_node *its = data;
+
+	/* On QDF2400, the size of the ITE is 16Bytes */
+	its->ite_size = 16;
+}
+
 static const struct gic_quirk its_quirks[] = {
 #ifdef CONFIG_CAVIUM_ERRATUM_22375
 	{
@@ -1617,6 +1625,14 @@ static const struct gic_quirk its_quirks[] = {
 		.mask	= 0xffff0fff,
 		.init	= its_enable_quirk_cavium_23144,
 	},
+#endif
+#ifdef CONFIG_QCOM_QDF2400_ERRATUM_0065
+	{
+		.desc	= "ITS: QDF2400 erratum 0065",
+		.iidr	= 0x00001070, /* QDF2400 ITS rev 1.x */
+		.mask	= 0xffffffff,
+		.init	= its_enable_quirk_qdf2400_e0065,
+	},
 #endif
 	{
 	}

From 4b9de5da7e120c7f02395da729f0ec77ce7a6044 Mon Sep 17 00:00:00 2001
From: Franck Demathieu <fdemathieu@gmail.com>
Date: Mon, 6 Mar 2017 14:41:06 +0100
Subject: [PATCH 0869/1911] irqchip/crossbar: Fix incorrect type of register
 size

The 'size' variable is unsigned according to the dt-bindings.
As this variable is used as integer in other places, create a new variable
that allows to fix the following sparse issue (-Wtypesign):

  drivers/irqchip/irq-crossbar.c:279:52: warning: incorrect type in argument 3 (different signedness)
  drivers/irqchip/irq-crossbar.c:279:52:    expected unsigned int [usertype] *out_value
  drivers/irqchip/irq-crossbar.c:279:52:    got int *<noident>

Signed-off-by: Franck Demathieu <fdemathieu@gmail.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 drivers/irqchip/irq-crossbar.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/irqchip/irq-crossbar.c b/drivers/irqchip/irq-crossbar.c
index 05bbf171df3772..1070b7b959f2d1 100644
--- a/drivers/irqchip/irq-crossbar.c
+++ b/drivers/irqchip/irq-crossbar.c
@@ -199,7 +199,7 @@ static const struct irq_domain_ops crossbar_domain_ops = {
 static int __init crossbar_of_init(struct device_node *node)
 {
 	int i, size, reserved = 0;
-	u32 max = 0, entry;
+	u32 max = 0, entry, reg_size;
 	const __be32 *irqsr;
 	int ret = -ENOMEM;
 
@@ -276,9 +276,9 @@ static int __init crossbar_of_init(struct device_node *node)
 	if (!cb->register_offsets)
 		goto err_irq_map;
 
-	of_property_read_u32(node, "ti,reg-size", &size);
+	of_property_read_u32(node, "ti,reg-size", &reg_size);
 
-	switch (size) {
+	switch (reg_size) {
 	case 1:
 		cb->write = crossbar_writeb;
 		break;
@@ -304,7 +304,7 @@ static int __init crossbar_of_init(struct device_node *node)
 			continue;
 
 		cb->register_offsets[i] = reserved;
-		reserved += size;
+		reserved += reg_size;
 	}
 
 	of_property_read_u32(node, "ti,irqs-safe-map", &cb->safe_map);

From 2f707d97982286b307ef2a9b034e19aabc1abb56 Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpeng.li@hotmail.com>
Date: Mon, 6 Mar 2017 04:03:28 -0800
Subject: [PATCH 0870/1911] KVM: nVMX: reset nested_run_pending if the vCPU is
 going to be reset
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reported by syzkaller:

    WARNING: CPU: 1 PID: 27742 at arch/x86/kvm/vmx.c:11029
    nested_vmx_vmexit+0x5c35/0x74d0 arch/x86/kvm/vmx.c:11029
    CPU: 1 PID: 27742 Comm: a.out Not tainted 4.10.0+ #229
    Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
    Call Trace:
     __dump_stack lib/dump_stack.c:15 [inline]
     dump_stack+0x2ee/0x3ef lib/dump_stack.c:51
     panic+0x1fb/0x412 kernel/panic.c:179
     __warn+0x1c4/0x1e0 kernel/panic.c:540
     warn_slowpath_null+0x2c/0x40 kernel/panic.c:583
     nested_vmx_vmexit+0x5c35/0x74d0 arch/x86/kvm/vmx.c:11029
     vmx_leave_nested arch/x86/kvm/vmx.c:11136 [inline]
     vmx_set_msr+0x1565/0x1910 arch/x86/kvm/vmx.c:3324
     kvm_set_msr+0xd4/0x170 arch/x86/kvm/x86.c:1099
     do_set_msr+0x11e/0x190 arch/x86/kvm/x86.c:1128
     __msr_io arch/x86/kvm/x86.c:2577 [inline]
     msr_io+0x24b/0x450 arch/x86/kvm/x86.c:2614
     kvm_arch_vcpu_ioctl+0x35b/0x46a0 arch/x86/kvm/x86.c:3497
     kvm_vcpu_ioctl+0x232/0x1120 arch/x86/kvm/../../../virt/kvm/kvm_main.c:2721
     vfs_ioctl fs/ioctl.c:43 [inline]
     do_vfs_ioctl+0x1bf/0x1790 fs/ioctl.c:683
     SYSC_ioctl fs/ioctl.c:698 [inline]
     SyS_ioctl+0x8f/0xc0 fs/ioctl.c:689
     entry_SYSCALL_64_fastpath+0x1f/0xc2

The syzkaller folks reported a nested_run_pending warning during userspace
clear VMX capability which is exposed to L1 before.

The warning gets thrown while doing

(*(uint32_t*)0x20aecfe8 = (uint32_t)0x1);
(*(uint32_t*)0x20aecfec = (uint32_t)0x0);
(*(uint32_t*)0x20aecff0 = (uint32_t)0x3a);
(*(uint32_t*)0x20aecff4 = (uint32_t)0x0);
(*(uint64_t*)0x20aecff8 = (uint64_t)0x0);
r[29] = syscall(__NR_ioctl, r[4], 0x4008ae89ul,
		0x20aecfe8ul, 0, 0, 0, 0, 0, 0);

i.e. KVM_SET_MSR ioctl with

struct kvm_msrs {
	.nmsrs = 1,
		.pad = 0,
		.entries = {
			{.index = MSR_IA32_FEATURE_CONTROL,
			 .reserved = 0,
			 .data = 0}
		}
}

The VMLANCH/VMRESUME emulation should be stopped since the CPU is going to
reset here. This patch resets the nested_run_pending since the CPU is going
to be reset hence there should be nothing pending.

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Suggested-by: Radim Krčmář <rkrcmar@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: David Hildenbrand <david@redhat.com>
Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/vmx.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 3b626d6dc3ac1a..ab338581b3ecc2 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -11107,8 +11107,10 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
  */
 static void vmx_leave_nested(struct kvm_vcpu *vcpu)
 {
-	if (is_guest_mode(vcpu))
+	if (is_guest_mode(vcpu)) {
+		to_vmx(vcpu)->nested.nested_run_pending = 0;
 		nested_vmx_vmexit(vcpu, -1, 0, 0);
+	}
 	free_nested(to_vmx(vcpu));
 }
 

From 370a0ec1819990f8e2a93df7cc9c0146980ed45f Mon Sep 17 00:00:00 2001
From: Jintack Lim <jintack@cs.columbia.edu>
Date: Mon, 6 Mar 2017 05:42:37 -0800
Subject: [PATCH 0871/1911] KVM: arm/arm64: Let vcpu thread modify its own
 active state

Currently, if a vcpu thread tries to change the active state of an
interrupt which is already on the same vcpu's AP list, it will loop
forever. Since the VGIC mmio handler is called after a vcpu has
already synced back the LR state to the struct vgic_irq, we can just
let it proceed safely.

Cc: stable@vger.kernel.org
Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Jintack Lim <jintack@cs.columbia.edu>
Signed-off-by: Christoffer Dall <cdall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 virt/kvm/arm/vgic/vgic-mmio.c | 32 ++++++++++++++++++++++++--------
 1 file changed, 24 insertions(+), 8 deletions(-)

diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c
index 3654b4c835ef73..2a5db135272215 100644
--- a/virt/kvm/arm/vgic/vgic-mmio.c
+++ b/virt/kvm/arm/vgic/vgic-mmio.c
@@ -180,21 +180,37 @@ unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu,
 static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
 				    bool new_active_state)
 {
+	struct kvm_vcpu *requester_vcpu;
 	spin_lock(&irq->irq_lock);
+
+	/*
+	 * The vcpu parameter here can mean multiple things depending on how
+	 * this function is called; when handling a trap from the kernel it
+	 * depends on the GIC version, and these functions are also called as
+	 * part of save/restore from userspace.
+	 *
+	 * Therefore, we have to figure out the requester in a reliable way.
+	 *
+	 * When accessing VGIC state from user space, the requester_vcpu is
+	 * NULL, which is fine, because we guarantee that no VCPUs are running
+	 * when accessing VGIC state from user space so irq->vcpu->cpu is
+	 * always -1.
+	 */
+	requester_vcpu = kvm_arm_get_running_vcpu();
+
 	/*
 	 * If this virtual IRQ was written into a list register, we
 	 * have to make sure the CPU that runs the VCPU thread has
-	 * synced back LR state to the struct vgic_irq.  We can only
-	 * know this for sure, when either this irq is not assigned to
-	 * anyone's AP list anymore, or the VCPU thread is not
-	 * running on any CPUs.
+	 * synced back the LR state to the struct vgic_irq.
 	 *
-	 * In the opposite case, we know the VCPU thread may be on its
-	 * way back from the guest and still has to sync back this
-	 * IRQ, so we release and re-acquire the spin_lock to let the
-	 * other thread sync back the IRQ.
+	 * As long as the conditions below are true, we know the VCPU thread
+	 * may be on its way back from the guest (we kicked the VCPU thread in
+	 * vgic_change_active_prepare)  and still has to sync back this IRQ,
+	 * so we release and re-acquire the spin_lock to let the other thread
+	 * sync back the IRQ.
 	 */
 	while (irq->vcpu && /* IRQ may have state in an LR somewhere */
+	       irq->vcpu != requester_vcpu && /* Current thread is not the VCPU thread */
 	       irq->vcpu->cpu != -1) /* VCPU thread is running */
 		cond_resched_lock(&irq->irq_lock);
 

From f050fe7a9164945dd1c28be05bf00e8cfb082ccf Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 20 Feb 2017 12:30:11 +0000
Subject: [PATCH 0872/1911] arm: KVM: Survive unknown traps from guests

Currently we BUG() if we see a HSR.EC value we don't recognise. As
configurable disables/enables are added to the architecture (controlled
by RES1/RES0 bits respectively), with associated synchronous exceptions,
it may be possible for a guest to trigger exceptions with classes that
we don't recognise.

While we can't service these exceptions in a manner useful to the guest,
we can avoid bringing down the host. Per ARM DDI 0406C.c, all currently
unallocated HSR EC encodings are reserved, and per ARM DDI
0487A.k_iss10775, page G6-4395, EC values within the range 0x00 - 0x2c
are reserved for future use with synchronous exceptions, and EC values
within the range 0x2d - 0x3f may be used for either synchronous or
asynchronous exceptions.

The patch makes KVM handle any unknown EC by injecting an UNDEFINED
exception into the guest, with a corresponding (ratelimited) warning in
the host dmesg. We could later improve on this with with a new (opt-in)
exit to the host userspace.

Cc: Dave Martin <dave.martin@arm.com>
Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm/include/asm/kvm_arm.h |  1 +
 arch/arm/kvm/handle_exit.c     | 19 ++++++++++++-------
 2 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index e22089fb44dc86..a3f0b3d500895b 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -209,6 +209,7 @@
 #define HSR_EC_IABT_HYP	(0x21)
 #define HSR_EC_DABT	(0x24)
 #define HSR_EC_DABT_HYP	(0x25)
+#define HSR_EC_MAX	(0x3f)
 
 #define HSR_WFI_IS_WFE		(_AC(1, UL) << 0)
 
diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c
index 4e40d1955e3534..96af65a30d78b1 100644
--- a/arch/arm/kvm/handle_exit.c
+++ b/arch/arm/kvm/handle_exit.c
@@ -79,7 +79,19 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	return 1;
 }
 
+static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	u32 hsr = kvm_vcpu_get_hsr(vcpu);
+
+	kvm_pr_unimpl("Unknown exception class: hsr: %#08x\n",
+		      hsr);
+
+	kvm_inject_undefined(vcpu);
+	return 1;
+}
+
 static exit_handle_fn arm_exit_handlers[] = {
+	[0 ... HSR_EC_MAX]	= kvm_handle_unknown_ec,
 	[HSR_EC_WFI]		= kvm_handle_wfx,
 	[HSR_EC_CP15_32]	= kvm_handle_cp15_32,
 	[HSR_EC_CP15_64]	= kvm_handle_cp15_64,
@@ -98,13 +110,6 @@ static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
 {
 	u8 hsr_ec = kvm_vcpu_trap_get_class(vcpu);
 
-	if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) ||
-	    !arm_exit_handlers[hsr_ec]) {
-		kvm_err("Unknown exception class: hsr: %#08x\n",
-			(unsigned int)kvm_vcpu_get_hsr(vcpu));
-		BUG();
-	}
-
 	return arm_exit_handlers[hsr_ec];
 }
 

From ba4dd156eabdca93501d92a980ba27fa5f4bbd27 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 20 Feb 2017 12:30:12 +0000
Subject: [PATCH 0873/1911] arm64: KVM: Survive unknown traps from guests

Currently we BUG() if we see an ESR_EL2.EC value we don't recognise. As
configurable disables/enables are added to the architecture (controlled
by RES1/RES0 bits respectively), with associated synchronous exceptions,
it may be possible for a guest to trigger exceptions with classes that
we don't recognise.

While we can't service these exceptions in a manner useful to the guest,
we can avoid bringing down the host. Per ARM DDI 0487A.k_iss10775, page
D7-1937, EC values within the range 0x00 - 0x2c are reserved for future
use with synchronous exceptions, and EC values within the range 0x2d -
0x3f may be used for either synchronous or asynchronous exceptions.

The patch makes KVM handle any unknown EC by injecting an UNDEFINED
exception into the guest, with a corresponding (ratelimited) warning in
the host dmesg. We could later improve on this with with a new (opt-in)
exit to the host userspace.

Cc: Dave Martin <dave.martin@arm.com>
Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm64/kvm/handle_exit.c | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 1bfe30dfbfe77f..fa1b18e364fc9d 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -135,7 +135,19 @@ static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	return ret;
 }
 
+static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	u32 hsr = kvm_vcpu_get_hsr(vcpu);
+
+	kvm_pr_unimpl("Unknown exception class: hsr: %#08x -- %s\n",
+		      hsr, esr_get_class_string(hsr));
+
+	kvm_inject_undefined(vcpu);
+	return 1;
+}
+
 static exit_handle_fn arm_exit_handlers[] = {
+	[0 ... ESR_ELx_EC_MAX]	= kvm_handle_unknown_ec,
 	[ESR_ELx_EC_WFx]	= kvm_handle_wfx,
 	[ESR_ELx_EC_CP15_32]	= kvm_handle_cp15_32,
 	[ESR_ELx_EC_CP15_64]	= kvm_handle_cp15_64,
@@ -162,13 +174,6 @@ static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
 	u32 hsr = kvm_vcpu_get_hsr(vcpu);
 	u8 hsr_ec = ESR_ELx_EC(hsr);
 
-	if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) ||
-	    !arm_exit_handlers[hsr_ec]) {
-		kvm_err("Unknown exception class: hsr: %#08x -- %s\n",
-			hsr, esr_get_class_string(hsr));
-		BUG();
-	}
-
 	return arm_exit_handlers[hsr_ec];
 }
 

From a5e1e6ca94a8cec51571fd62e3eaec269717969c Mon Sep 17 00:00:00 2001
From: Andre Przywara <andre.przywara@arm.com>
Date: Thu, 16 Feb 2017 10:41:20 +0000
Subject: [PATCH 0874/1911] KVM: arm/arm64: VGIC: Fix command handling while
 ITS being disabled

The ITS spec says that ITS commands are only processed when the ITS
is enabled (section 8.19.4, Enabled, bit[0]). Our emulation was not taking
this into account.
Fix this by checking the enabled state before handling CWRITER writes.

On the other hand that means that CWRITER could advance while the ITS
is disabled, and enabling it would need those commands to be processed.
Fix this case as well by refactoring actual command processing and
calling this from both the GITS_CWRITER and GITS_CTLR handlers.

Reviewed-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Christoffer Dall <cdall@linaro.org>
Signed-off-by: Andre Przywara <andre.przywara@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 virt/kvm/arm/vgic/vgic-its.c | 109 +++++++++++++++++++++--------------
 1 file changed, 65 insertions(+), 44 deletions(-)

diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index 571b64a01c5097..8d1da1af4b09e4 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -360,29 +360,6 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu)
 	return ret;
 }
 
-static unsigned long vgic_mmio_read_its_ctlr(struct kvm *vcpu,
-					     struct vgic_its *its,
-					     gpa_t addr, unsigned int len)
-{
-	u32 reg = 0;
-
-	mutex_lock(&its->cmd_lock);
-	if (its->creadr == its->cwriter)
-		reg |= GITS_CTLR_QUIESCENT;
-	if (its->enabled)
-		reg |= GITS_CTLR_ENABLE;
-	mutex_unlock(&its->cmd_lock);
-
-	return reg;
-}
-
-static void vgic_mmio_write_its_ctlr(struct kvm *kvm, struct vgic_its *its,
-				     gpa_t addr, unsigned int len,
-				     unsigned long val)
-{
-	its->enabled = !!(val & GITS_CTLR_ENABLE);
-}
-
 static unsigned long vgic_mmio_read_its_typer(struct kvm *kvm,
 					      struct vgic_its *its,
 					      gpa_t addr, unsigned int len)
@@ -1161,33 +1138,16 @@ static void vgic_mmio_write_its_cbaser(struct kvm *kvm, struct vgic_its *its,
 #define ITS_CMD_SIZE			32
 #define ITS_CMD_OFFSET(reg)		((reg) & GENMASK(19, 5))
 
-/*
- * By writing to CWRITER the guest announces new commands to be processed.
- * To avoid any races in the first place, we take the its_cmd lock, which
- * protects our ring buffer variables, so that there is only one user
- * per ITS handling commands at a given time.
- */
-static void vgic_mmio_write_its_cwriter(struct kvm *kvm, struct vgic_its *its,
-					gpa_t addr, unsigned int len,
-					unsigned long val)
+/* Must be called with the cmd_lock held. */
+static void vgic_its_process_commands(struct kvm *kvm, struct vgic_its *its)
 {
 	gpa_t cbaser;
 	u64 cmd_buf[4];
-	u32 reg;
 
-	if (!its)
-		return;
-
-	mutex_lock(&its->cmd_lock);
-
-	reg = update_64bit_reg(its->cwriter, addr & 7, len, val);
-	reg = ITS_CMD_OFFSET(reg);
-	if (reg >= ITS_CMD_BUFFER_SIZE(its->cbaser)) {
-		mutex_unlock(&its->cmd_lock);
+	/* Commands are only processed when the ITS is enabled. */
+	if (!its->enabled)
 		return;
-	}
 
-	its->cwriter = reg;
 	cbaser = CBASER_ADDRESS(its->cbaser);
 
 	while (its->cwriter != its->creadr) {
@@ -1207,6 +1167,34 @@ static void vgic_mmio_write_its_cwriter(struct kvm *kvm, struct vgic_its *its,
 		if (its->creadr == ITS_CMD_BUFFER_SIZE(its->cbaser))
 			its->creadr = 0;
 	}
+}
+
+/*
+ * By writing to CWRITER the guest announces new commands to be processed.
+ * To avoid any races in the first place, we take the its_cmd lock, which
+ * protects our ring buffer variables, so that there is only one user
+ * per ITS handling commands at a given time.
+ */
+static void vgic_mmio_write_its_cwriter(struct kvm *kvm, struct vgic_its *its,
+					gpa_t addr, unsigned int len,
+					unsigned long val)
+{
+	u64 reg;
+
+	if (!its)
+		return;
+
+	mutex_lock(&its->cmd_lock);
+
+	reg = update_64bit_reg(its->cwriter, addr & 7, len, val);
+	reg = ITS_CMD_OFFSET(reg);
+	if (reg >= ITS_CMD_BUFFER_SIZE(its->cbaser)) {
+		mutex_unlock(&its->cmd_lock);
+		return;
+	}
+	its->cwriter = reg;
+
+	vgic_its_process_commands(kvm, its);
 
 	mutex_unlock(&its->cmd_lock);
 }
@@ -1287,6 +1275,39 @@ static void vgic_mmio_write_its_baser(struct kvm *kvm,
 	*regptr = reg;
 }
 
+static unsigned long vgic_mmio_read_its_ctlr(struct kvm *vcpu,
+					     struct vgic_its *its,
+					     gpa_t addr, unsigned int len)
+{
+	u32 reg = 0;
+
+	mutex_lock(&its->cmd_lock);
+	if (its->creadr == its->cwriter)
+		reg |= GITS_CTLR_QUIESCENT;
+	if (its->enabled)
+		reg |= GITS_CTLR_ENABLE;
+	mutex_unlock(&its->cmd_lock);
+
+	return reg;
+}
+
+static void vgic_mmio_write_its_ctlr(struct kvm *kvm, struct vgic_its *its,
+				     gpa_t addr, unsigned int len,
+				     unsigned long val)
+{
+	mutex_lock(&its->cmd_lock);
+
+	its->enabled = !!(val & GITS_CTLR_ENABLE);
+
+	/*
+	 * Try to process any pending commands. This function bails out early
+	 * if the ITS is disabled or no commands have been queued.
+	 */
+	vgic_its_process_commands(kvm, its);
+
+	mutex_unlock(&its->cmd_lock);
+}
+
 #define REGISTER_ITS_DESC(off, rd, wr, length, acc)		\
 {								\
 	.reg_offset = off,					\

From 8c71fff434e5ecf5ff27bd61db1bc9ac4c2b2a1b Mon Sep 17 00:00:00 2001
From: Kieran Bingham <kieran.bingham+renesas@ideasonboard.com>
Date: Fri, 3 Mar 2017 06:31:48 -0300
Subject: [PATCH 0875/1911] [media] v4l: vsp1: Adapt vsp1_du_setup_lif()
 interface to use a structure

The interface to configure the LIF in the VSP1 requires adapting the
function prototype for any changes. This makes extending the interface
difficult.

Change the function prototype to pass a structure which can be easily
extended.

This changes the means of disabling the pipeline, by now passing a NULL
configuration rather than passing either a 0 width or height.

[Fixed kerneldoc, made vsp1_du_setup_lif() cfg argument const]

Signed-off-by: Kieran Bingham <kieran.bingham+renesas@ideasonboard.com>
Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Acked-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
---
 drivers/gpu/drm/rcar-du/rcar_du_vsp.c  |  8 +++++--
 drivers/media/platform/vsp1/vsp1_drm.c | 33 +++++++++++++-------------
 include/media/vsp1.h                   | 13 ++++++++--
 3 files changed, 33 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/rcar-du/rcar_du_vsp.c b/drivers/gpu/drm/rcar-du/rcar_du_vsp.c
index 83ebd162f3ef5e..22da2d671297f3 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_vsp.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_vsp.c
@@ -32,6 +32,10 @@ void rcar_du_vsp_enable(struct rcar_du_crtc *crtc)
 {
 	const struct drm_display_mode *mode = &crtc->crtc.state->adjusted_mode;
 	struct rcar_du_device *rcdu = crtc->group->dev;
+	struct vsp1_du_lif_config cfg = {
+		.width = mode->hdisplay,
+		.height = mode->vdisplay,
+	};
 	struct rcar_du_plane_state state = {
 		.state = {
 			.crtc = &crtc->crtc,
@@ -66,12 +70,12 @@ void rcar_du_vsp_enable(struct rcar_du_crtc *crtc)
 	 */
 	crtc->group->need_restart = true;
 
-	vsp1_du_setup_lif(crtc->vsp->vsp, mode->hdisplay, mode->vdisplay);
+	vsp1_du_setup_lif(crtc->vsp->vsp, &cfg);
 }
 
 void rcar_du_vsp_disable(struct rcar_du_crtc *crtc)
 {
-	vsp1_du_setup_lif(crtc->vsp->vsp, 0, 0);
+	vsp1_du_setup_lif(crtc->vsp->vsp, NULL);
 }
 
 void rcar_du_vsp_atomic_begin(struct rcar_du_crtc *crtc)
diff --git a/drivers/media/platform/vsp1/vsp1_drm.c b/drivers/media/platform/vsp1/vsp1_drm.c
index b4b583f7137a54..b4c0f10fc3b0f1 100644
--- a/drivers/media/platform/vsp1/vsp1_drm.c
+++ b/drivers/media/platform/vsp1/vsp1_drm.c
@@ -54,12 +54,11 @@ EXPORT_SYMBOL_GPL(vsp1_du_init);
 /**
  * vsp1_du_setup_lif - Setup the output part of the VSP pipeline
  * @dev: the VSP device
- * @width: output frame width in pixels
- * @height: output frame height in pixels
+ * @cfg: the LIF configuration
  *
- * Configure the output part of VSP DRM pipeline for the given frame @width and
- * @height. This sets up formats on the BRU source pad, the WPF0 sink and source
- * pads, and the LIF sink pad.
+ * Configure the output part of VSP DRM pipeline for the given frame @cfg.width
+ * and @cfg.height. This sets up formats on the BRU source pad, the WPF0 sink
+ * and source pads, and the LIF sink pad.
  *
  * As the media bus code on the BRU source pad is conditioned by the
  * configuration of the BRU sink 0 pad, we also set up the formats on all BRU
@@ -69,8 +68,7 @@ EXPORT_SYMBOL_GPL(vsp1_du_init);
  *
  * Return 0 on success or a negative error code on failure.
  */
-int vsp1_du_setup_lif(struct device *dev, unsigned int width,
-		      unsigned int height)
+int vsp1_du_setup_lif(struct device *dev, const struct vsp1_du_lif_config *cfg)
 {
 	struct vsp1_device *vsp1 = dev_get_drvdata(dev);
 	struct vsp1_pipeline *pipe = &vsp1->drm->pipe;
@@ -79,11 +77,8 @@ int vsp1_du_setup_lif(struct device *dev, unsigned int width,
 	unsigned int i;
 	int ret;
 
-	dev_dbg(vsp1->dev, "%s: configuring LIF with format %ux%u\n",
-		__func__, width, height);
-
-	if (width == 0 || height == 0) {
-		/* Zero width or height means the CRTC is being disabled, stop
+	if (!cfg) {
+		/* NULL configuration means the CRTC is being disabled, stop
 		 * the pipeline and turn the light off.
 		 */
 		ret = vsp1_pipeline_stop(pipe);
@@ -108,6 +103,9 @@ int vsp1_du_setup_lif(struct device *dev, unsigned int width,
 		return 0;
 	}
 
+	dev_dbg(vsp1->dev, "%s: configuring LIF with format %ux%u\n",
+		__func__, cfg->width, cfg->height);
+
 	/* Configure the format at the BRU sinks and propagate it through the
 	 * pipeline.
 	 */
@@ -117,8 +115,8 @@ int vsp1_du_setup_lif(struct device *dev, unsigned int width,
 	for (i = 0; i < bru->entity.source_pad; ++i) {
 		format.pad = i;
 
-		format.format.width = width;
-		format.format.height = height;
+		format.format.width = cfg->width;
+		format.format.height = cfg->height;
 		format.format.code = MEDIA_BUS_FMT_ARGB8888_1X32;
 		format.format.field = V4L2_FIELD_NONE;
 
@@ -133,8 +131,8 @@ int vsp1_du_setup_lif(struct device *dev, unsigned int width,
 	}
 
 	format.pad = bru->entity.source_pad;
-	format.format.width = width;
-	format.format.height = height;
+	format.format.width = cfg->width;
+	format.format.height = cfg->height;
 	format.format.code = MEDIA_BUS_FMT_ARGB8888_1X32;
 	format.format.field = V4L2_FIELD_NONE;
 
@@ -180,7 +178,8 @@ int vsp1_du_setup_lif(struct device *dev, unsigned int width,
 	/* Verify that the format at the output of the pipeline matches the
 	 * requested frame size and media bus code.
 	 */
-	if (format.format.width != width || format.format.height != height ||
+	if (format.format.width != cfg->width ||
+	    format.format.height != cfg->height ||
 	    format.format.code != MEDIA_BUS_FMT_ARGB8888_1X32) {
 		dev_dbg(vsp1->dev, "%s: format mismatch\n", __func__);
 		return -EPIPE;
diff --git a/include/media/vsp1.h b/include/media/vsp1.h
index 458b400373d44d..38aac554dbbab6 100644
--- a/include/media/vsp1.h
+++ b/include/media/vsp1.h
@@ -20,8 +20,17 @@ struct device;
 
 int vsp1_du_init(struct device *dev);
 
-int vsp1_du_setup_lif(struct device *dev, unsigned int width,
-		      unsigned int height);
+/**
+ * struct vsp1_du_lif_config - VSP LIF configuration
+ * @width: output frame width
+ * @height: output frame height
+ */
+struct vsp1_du_lif_config {
+	unsigned int width;
+	unsigned int height;
+};
+
+int vsp1_du_setup_lif(struct device *dev, const struct vsp1_du_lif_config *cfg);
 
 struct vsp1_du_atomic_config {
 	u32 pixelformat;

From 45838660e34d90db8d4f7cbc8fd66e8aff79f4fe Mon Sep 17 00:00:00 2001
From: Kai-Heng Feng <kai.heng.feng@canonical.com>
Date: Tue, 7 Mar 2017 09:31:29 -0800
Subject: [PATCH 0876/1911] Input: i8042 - add noloop quirk for Dell Embedded
 Box PC 3000

The aux port does not get detected without noloop quirk, so external PS/2
mouse cannot work as result.

The PS/2 mouse can work with this quirk.

BugLink: https://bugs.launchpad.net/bugs/1591053
Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Reviewed-by: Marcos Paulo de Souza <marcos.souza.org@gmail.com>
Cc: stable@vger.kernel.org
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/serio/i8042-x86ia64io.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h
index 05afd16ea9c9ef..e856b073d53367 100644
--- a/drivers/input/serio/i8042-x86ia64io.h
+++ b/drivers/input/serio/i8042-x86ia64io.h
@@ -119,6 +119,13 @@ static const struct dmi_system_id __initconst i8042_dmi_noloop_table[] = {
 			DMI_MATCH(DMI_PRODUCT_VERSION, "DL760"),
 		},
 	},
+	{
+		/* Dell Embedded Box PC 3000 */
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Embedded Box PC 3000"),
+		},
+	},
 	{
 		/* OQO Model 01 */
 		.matches = {

From 82f2341c94d270421f383641b7cd670e474db56b Mon Sep 17 00:00:00 2001
From: Alexander Popov <alex.popov@linux.com>
Date: Tue, 28 Feb 2017 19:54:40 +0300
Subject: [PATCH 0877/1911] tty: n_hdlc: get rid of racy n_hdlc.tbuf

Currently N_HDLC line discipline uses a self-made singly linked list for
data buffers and has n_hdlc.tbuf pointer for buffer retransmitting after
an error.

The commit be10eb7589337e5defbe214dae038a53dd21add8
("tty: n_hdlc add buffer flushing") introduced racy access to n_hdlc.tbuf.
After tx error concurrent flush_tx_queue() and n_hdlc_send_frames() can put
one data buffer to tx_free_buf_list twice. That causes double free in
n_hdlc_release().

Let's use standard kernel linked list and get rid of n_hdlc.tbuf:
in case of tx error put current data buffer after the head of tx_buf_list.

Signed-off-by: Alexander Popov <alex.popov@linux.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/n_hdlc.c | 132 ++++++++++++++++++++++---------------------
 1 file changed, 69 insertions(+), 63 deletions(-)

diff --git a/drivers/tty/n_hdlc.c b/drivers/tty/n_hdlc.c
index 1bacbc3b19a05c..e94aea8c0d0535 100644
--- a/drivers/tty/n_hdlc.c
+++ b/drivers/tty/n_hdlc.c
@@ -114,7 +114,7 @@
 #define DEFAULT_TX_BUF_COUNT 3
 
 struct n_hdlc_buf {
-	struct n_hdlc_buf *link;
+	struct list_head  list_item;
 	int		  count;
 	char		  buf[1];
 };
@@ -122,8 +122,7 @@ struct n_hdlc_buf {
 #define	N_HDLC_BUF_SIZE	(sizeof(struct n_hdlc_buf) + maxframe)
 
 struct n_hdlc_buf_list {
-	struct n_hdlc_buf *head;
-	struct n_hdlc_buf *tail;
+	struct list_head  list;
 	int		  count;
 	spinlock_t	  spinlock;
 };
@@ -136,7 +135,6 @@ struct n_hdlc_buf_list {
  * @backup_tty - TTY to use if tty gets closed
  * @tbusy - reentrancy flag for tx wakeup code
  * @woke_up - FIXME: describe this field
- * @tbuf - currently transmitting tx buffer
  * @tx_buf_list - list of pending transmit frame buffers
  * @rx_buf_list - list of received frame buffers
  * @tx_free_buf_list - list unused transmit frame buffers
@@ -149,7 +147,6 @@ struct n_hdlc {
 	struct tty_struct	*backup_tty;
 	int			tbusy;
 	int			woke_up;
-	struct n_hdlc_buf	*tbuf;
 	struct n_hdlc_buf_list	tx_buf_list;
 	struct n_hdlc_buf_list	rx_buf_list;
 	struct n_hdlc_buf_list	tx_free_buf_list;
@@ -159,6 +156,8 @@ struct n_hdlc {
 /*
  * HDLC buffer list manipulation functions
  */
+static void n_hdlc_buf_return(struct n_hdlc_buf_list *buf_list,
+						struct n_hdlc_buf *buf);
 static void n_hdlc_buf_put(struct n_hdlc_buf_list *list,
 			   struct n_hdlc_buf *buf);
 static struct n_hdlc_buf *n_hdlc_buf_get(struct n_hdlc_buf_list *list);
@@ -208,16 +207,9 @@ static void flush_tx_queue(struct tty_struct *tty)
 {
 	struct n_hdlc *n_hdlc = tty2n_hdlc(tty);
 	struct n_hdlc_buf *buf;
-	unsigned long flags;
 
 	while ((buf = n_hdlc_buf_get(&n_hdlc->tx_buf_list)))
 		n_hdlc_buf_put(&n_hdlc->tx_free_buf_list, buf);
- 	spin_lock_irqsave(&n_hdlc->tx_buf_list.spinlock, flags);
-	if (n_hdlc->tbuf) {
-		n_hdlc_buf_put(&n_hdlc->tx_free_buf_list, n_hdlc->tbuf);
-		n_hdlc->tbuf = NULL;
-	}
-	spin_unlock_irqrestore(&n_hdlc->tx_buf_list.spinlock, flags);
 }
 
 static struct tty_ldisc_ops n_hdlc_ldisc = {
@@ -283,7 +275,6 @@ static void n_hdlc_release(struct n_hdlc *n_hdlc)
 		} else
 			break;
 	}
-	kfree(n_hdlc->tbuf);
 	kfree(n_hdlc);
 	
 }	/* end of n_hdlc_release() */
@@ -402,13 +393,7 @@ static void n_hdlc_send_frames(struct n_hdlc *n_hdlc, struct tty_struct *tty)
 	n_hdlc->woke_up = 0;
 	spin_unlock_irqrestore(&n_hdlc->tx_buf_list.spinlock, flags);
 
-	/* get current transmit buffer or get new transmit */
-	/* buffer from list of pending transmit buffers */
-		
-	tbuf = n_hdlc->tbuf;
-	if (!tbuf)
-		tbuf = n_hdlc_buf_get(&n_hdlc->tx_buf_list);
-		
+	tbuf = n_hdlc_buf_get(&n_hdlc->tx_buf_list);
 	while (tbuf) {
 		if (debuglevel >= DEBUG_LEVEL_INFO)	
 			printk("%s(%d)sending frame %p, count=%d\n",
@@ -420,7 +405,7 @@ static void n_hdlc_send_frames(struct n_hdlc *n_hdlc, struct tty_struct *tty)
 
 		/* rollback was possible and has been done */
 		if (actual == -ERESTARTSYS) {
-			n_hdlc->tbuf = tbuf;
+			n_hdlc_buf_return(&n_hdlc->tx_buf_list, tbuf);
 			break;
 		}
 		/* if transmit error, throw frame away by */
@@ -435,10 +420,7 @@ static void n_hdlc_send_frames(struct n_hdlc *n_hdlc, struct tty_struct *tty)
 					
 			/* free current transmit buffer */
 			n_hdlc_buf_put(&n_hdlc->tx_free_buf_list, tbuf);
-			
-			/* this tx buffer is done */
-			n_hdlc->tbuf = NULL;
-			
+
 			/* wait up sleeping writers */
 			wake_up_interruptible(&tty->write_wait);
 	
@@ -448,10 +430,12 @@ static void n_hdlc_send_frames(struct n_hdlc *n_hdlc, struct tty_struct *tty)
 			if (debuglevel >= DEBUG_LEVEL_INFO)	
 				printk("%s(%d)frame %p pending\n",
 					__FILE__,__LINE__,tbuf);
-					
-			/* buffer not accepted by driver */
-			/* set this buffer as pending buffer */
-			n_hdlc->tbuf = tbuf;
+
+			/*
+			 * the buffer was not accepted by driver,
+			 * return it back into tx queue
+			 */
+			n_hdlc_buf_return(&n_hdlc->tx_buf_list, tbuf);
 			break;
 		}
 	}
@@ -749,7 +733,8 @@ static int n_hdlc_tty_ioctl(struct tty_struct *tty, struct file *file,
 	int error = 0;
 	int count;
 	unsigned long flags;
-	
+	struct n_hdlc_buf *buf = NULL;
+
 	if (debuglevel >= DEBUG_LEVEL_INFO)	
 		printk("%s(%d)n_hdlc_tty_ioctl() called %d\n",
 			__FILE__,__LINE__,cmd);
@@ -763,8 +748,10 @@ static int n_hdlc_tty_ioctl(struct tty_struct *tty, struct file *file,
 		/* report count of read data available */
 		/* in next available frame (if any) */
 		spin_lock_irqsave(&n_hdlc->rx_buf_list.spinlock,flags);
-		if (n_hdlc->rx_buf_list.head)
-			count = n_hdlc->rx_buf_list.head->count;
+		buf = list_first_entry_or_null(&n_hdlc->rx_buf_list.list,
+						struct n_hdlc_buf, list_item);
+		if (buf)
+			count = buf->count;
 		else
 			count = 0;
 		spin_unlock_irqrestore(&n_hdlc->rx_buf_list.spinlock,flags);
@@ -776,8 +763,10 @@ static int n_hdlc_tty_ioctl(struct tty_struct *tty, struct file *file,
 		count = tty_chars_in_buffer(tty);
 		/* add size of next output frame in queue */
 		spin_lock_irqsave(&n_hdlc->tx_buf_list.spinlock,flags);
-		if (n_hdlc->tx_buf_list.head)
-			count += n_hdlc->tx_buf_list.head->count;
+		buf = list_first_entry_or_null(&n_hdlc->tx_buf_list.list,
+						struct n_hdlc_buf, list_item);
+		if (buf)
+			count += buf->count;
 		spin_unlock_irqrestore(&n_hdlc->tx_buf_list.spinlock,flags);
 		error = put_user(count, (int __user *)arg);
 		break;
@@ -825,14 +814,14 @@ static unsigned int n_hdlc_tty_poll(struct tty_struct *tty, struct file *filp,
 		poll_wait(filp, &tty->write_wait, wait);
 
 		/* set bits for operations that won't block */
-		if (n_hdlc->rx_buf_list.head)
+		if (!list_empty(&n_hdlc->rx_buf_list.list))
 			mask |= POLLIN | POLLRDNORM;	/* readable */
 		if (test_bit(TTY_OTHER_CLOSED, &tty->flags))
 			mask |= POLLHUP;
 		if (tty_hung_up_p(filp))
 			mask |= POLLHUP;
 		if (!tty_is_writelocked(tty) &&
-				n_hdlc->tx_free_buf_list.head)
+				!list_empty(&n_hdlc->tx_free_buf_list.list))
 			mask |= POLLOUT | POLLWRNORM;	/* writable */
 	}
 	return mask;
@@ -856,7 +845,12 @@ static struct n_hdlc *n_hdlc_alloc(void)
 	spin_lock_init(&n_hdlc->tx_free_buf_list.spinlock);
 	spin_lock_init(&n_hdlc->rx_buf_list.spinlock);
 	spin_lock_init(&n_hdlc->tx_buf_list.spinlock);
-	
+
+	INIT_LIST_HEAD(&n_hdlc->rx_free_buf_list.list);
+	INIT_LIST_HEAD(&n_hdlc->tx_free_buf_list.list);
+	INIT_LIST_HEAD(&n_hdlc->rx_buf_list.list);
+	INIT_LIST_HEAD(&n_hdlc->tx_buf_list.list);
+
 	/* allocate free rx buffer list */
 	for(i=0;i<DEFAULT_RX_BUF_COUNT;i++) {
 		buf = kmalloc(N_HDLC_BUF_SIZE, GFP_KERNEL);
@@ -883,54 +877,66 @@ static struct n_hdlc *n_hdlc_alloc(void)
 	
 }	/* end of n_hdlc_alloc() */
 
+/**
+ * n_hdlc_buf_return - put the HDLC buffer after the head of the specified list
+ * @buf_list - pointer to the buffer list
+ * @buf - pointer to the buffer
+ */
+static void n_hdlc_buf_return(struct n_hdlc_buf_list *buf_list,
+						struct n_hdlc_buf *buf)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&buf_list->spinlock, flags);
+
+	list_add(&buf->list_item, &buf_list->list);
+	buf_list->count++;
+
+	spin_unlock_irqrestore(&buf_list->spinlock, flags);
+}
+
 /**
  * n_hdlc_buf_put - add specified HDLC buffer to tail of specified list
- * @list - pointer to buffer list
+ * @buf_list - pointer to buffer list
  * @buf	- pointer to buffer
  */
-static void n_hdlc_buf_put(struct n_hdlc_buf_list *list,
+static void n_hdlc_buf_put(struct n_hdlc_buf_list *buf_list,
 			   struct n_hdlc_buf *buf)
 {
 	unsigned long flags;
-	spin_lock_irqsave(&list->spinlock,flags);
-	
-	buf->link=NULL;
-	if (list->tail)
-		list->tail->link = buf;
-	else
-		list->head = buf;
-	list->tail = buf;
-	(list->count)++;
-	
-	spin_unlock_irqrestore(&list->spinlock,flags);
-	
+
+	spin_lock_irqsave(&buf_list->spinlock, flags);
+
+	list_add_tail(&buf->list_item, &buf_list->list);
+	buf_list->count++;
+
+	spin_unlock_irqrestore(&buf_list->spinlock, flags);
 }	/* end of n_hdlc_buf_put() */
 
 /**
  * n_hdlc_buf_get - remove and return an HDLC buffer from list
- * @list - pointer to HDLC buffer list
+ * @buf_list - pointer to HDLC buffer list
  * 
  * Remove and return an HDLC buffer from the head of the specified HDLC buffer
  * list.
  * Returns a pointer to HDLC buffer if available, otherwise %NULL.
  */
-static struct n_hdlc_buf* n_hdlc_buf_get(struct n_hdlc_buf_list *list)
+static struct n_hdlc_buf *n_hdlc_buf_get(struct n_hdlc_buf_list *buf_list)
 {
 	unsigned long flags;
 	struct n_hdlc_buf *buf;
-	spin_lock_irqsave(&list->spinlock,flags);
-	
-	buf = list->head;
+
+	spin_lock_irqsave(&buf_list->spinlock, flags);
+
+	buf = list_first_entry_or_null(&buf_list->list,
+						struct n_hdlc_buf, list_item);
 	if (buf) {
-		list->head = buf->link;
-		(list->count)--;
+		list_del(&buf->list_item);
+		buf_list->count--;
 	}
-	if (!list->head)
-		list->tail = NULL;
-	
-	spin_unlock_irqrestore(&list->spinlock,flags);
+
+	spin_unlock_irqrestore(&buf_list->spinlock, flags);
 	return buf;
-	
 }	/* end of n_hdlc_buf_get() */
 
 static char hdlc_banner[] __initdata =

From 2eacc79c27eb683c4a3ded80c2629387ee0d4e04 Mon Sep 17 00:00:00 2001
From: Rehas Sachdeva <aquannie@gmail.com>
Date: Sat, 18 Feb 2017 07:31:00 -0500
Subject: [PATCH 0878/1911] radix tree test suite: Add test for idr_get_next()

Assert that idr_get_next() returns the next populated entry in the tree with
an ID greater than or equal to the value pointed to by @nextid argument.

Signed-off-by: Rehas Sachdeva <aquannie@gmail.com>
Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 tools/testing/radix-tree/idr-test.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c
index a26098c6123d1c..f20690ac3a9775 100644
--- a/tools/testing/radix-tree/idr-test.c
+++ b/tools/testing/radix-tree/idr-test.c
@@ -153,6 +153,30 @@ void idr_nowait_test(void)
 	idr_destroy(&idr);
 }
 
+void idr_get_next_test(void)
+{
+	unsigned long i;
+	int nextid;
+	DEFINE_IDR(idr);
+
+	int indices[] = {4, 7, 9, 15, 65, 128, 1000, 99999, 0};
+
+	for(i = 0; indices[i]; i++) {
+		struct item *item = item_create(indices[i], 0);
+		assert(idr_alloc(&idr, item, indices[i], indices[i+1],
+				 GFP_KERNEL) == indices[i]);
+	}
+
+	for(i = 0, nextid = 0; indices[i]; i++) {
+		idr_get_next(&idr, &nextid);
+		assert(nextid == indices[i]);
+		nextid++;
+	}
+
+	idr_for_each(&idr, item_idr_free, &idr);
+	idr_destroy(&idr);
+}
+
 void idr_checks(void)
 {
 	unsigned long i;
@@ -202,6 +226,7 @@ void idr_checks(void)
 	idr_alloc_test();
 	idr_null_test();
 	idr_nowait_test();
+	idr_get_next_test();
 }
 
 /*

From 166bb1f532fd9fe1b81c6b411ad5d5c9dd21a685 Mon Sep 17 00:00:00 2001
From: Rehas Sachdeva <aquannie@gmail.com>
Date: Mon, 20 Feb 2017 06:40:00 -0500
Subject: [PATCH 0879/1911] radix tree test suite: Add tests for
 ida_simple_get() and ida_simple_remove()

Assert that ida_simple_get() allocates an id in the passed range or returns
error on failure, and ida_simple_remove() releases an allocated id.

Signed-off-by: Rehas Sachdeva <aquannie@gmail.com>
Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 tools/testing/radix-tree/idr-test.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c
index f20690ac3a9775..86de901fa5c6be 100644
--- a/tools/testing/radix-tree/idr-test.c
+++ b/tools/testing/radix-tree/idr-test.c
@@ -387,6 +387,24 @@ void ida_check_random(void)
 		goto repeat;
 }
 
+void ida_simple_get_remove_test(void)
+{
+	DEFINE_IDA(ida);
+	unsigned long i;
+
+	for (i = 0; i < 10000; i++) {
+		assert(ida_simple_get(&ida, 0, 20000, GFP_KERNEL) == i);
+	}
+	assert(ida_simple_get(&ida, 5, 30, GFP_KERNEL) < 0);
+
+	for (i = 0; i < 10000; i++) {
+		ida_simple_remove(&ida, i);
+	}
+	assert(ida_is_empty(&ida));
+
+	ida_destroy(&ida);
+}
+
 void ida_checks(void)
 {
 	DEFINE_IDA(ida);
@@ -453,6 +471,7 @@ void ida_checks(void)
 	ida_check_max();
 	ida_check_conv();
 	ida_check_random();
+	ida_simple_get_remove_test();
 
 	radix_tree_cpu_dead(1);
 }

From c629a344accd15022dd6d87fa28c8e22f978fbda Mon Sep 17 00:00:00 2001
From: Rehas Sachdeva <aquannie@gmail.com>
Date: Sun, 26 Feb 2017 06:33:00 -0500
Subject: [PATCH 0880/1911] radix tree test suite: Add test for
 radix_tree_clear_tags()

Assert that radix_tree_clear_tags() clears the tags on the passed node and
slot. Assert that the case where the radix tree has only one entry at index
zero and the node is NULL, is also handled.

Signed-off-by: Rehas Sachdeva <aquannie@gmail.com>
Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 tools/testing/radix-tree/tag_check.c | 29 ++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/tools/testing/radix-tree/tag_check.c b/tools/testing/radix-tree/tag_check.c
index d4ff009892456a..36dcf7d6945dc6 100644
--- a/tools/testing/radix-tree/tag_check.c
+++ b/tools/testing/radix-tree/tag_check.c
@@ -330,6 +330,34 @@ static void single_check(void)
 	item_kill_tree(&tree);
 }
 
+void radix_tree_clear_tags_test(void)
+{
+	unsigned long index;
+	struct radix_tree_node *node;
+	struct radix_tree_iter iter;
+	void **slot;
+
+	RADIX_TREE(tree, GFP_KERNEL);
+
+	item_insert(&tree, 0);
+	item_tag_set(&tree, 0, 0);
+	__radix_tree_lookup(&tree, 0, &node, &slot);
+	radix_tree_clear_tags(&tree, node, slot);
+	assert(item_tag_get(&tree, 0, 0) == 0);
+
+	for (index = 0; index < 1000; index++) {
+		item_insert(&tree, index);
+		item_tag_set(&tree, index, 0);
+	}
+
+	radix_tree_for_each_slot(slot, &tree, &iter, 0) {
+		radix_tree_clear_tags(&tree, iter.node, slot);
+		assert(item_tag_get(&tree, iter.index, 0) == 0);
+	}
+
+	item_kill_tree(&tree);
+}
+
 void tag_check(void)
 {
 	single_check();
@@ -347,4 +375,5 @@ void tag_check(void)
 	thrash_tags();
 	rcu_barrier();
 	printv(2, "after thrash_tags: %d allocated\n", nr_allocated);
+	radix_tree_clear_tags_test();
 }

From 0d4a41c1a0335dc515c6e7fabd447263b57f6457 Mon Sep 17 00:00:00 2001
From: Rehas Sachdeva <aquannie@gmail.com>
Date: Sun, 26 Feb 2017 16:17:00 -0500
Subject: [PATCH 0881/1911] radix tree test suite: Add performance benchmarks

Add performance benchmarks for radix tree insertion, tagging and deletion.

Signed-off-by: Rehas Sachdeva <aquannie@gmail.com>
Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 tools/testing/radix-tree/benchmark.c | 82 +++++++++++++++++++++++++---
 1 file changed, 75 insertions(+), 7 deletions(-)

diff --git a/tools/testing/radix-tree/benchmark.c b/tools/testing/radix-tree/benchmark.c
index 9b09ddfe462fd3..35741b9c2a4a1e 100644
--- a/tools/testing/radix-tree/benchmark.c
+++ b/tools/testing/radix-tree/benchmark.c
@@ -17,6 +17,9 @@
 #include <time.h>
 #include "test.h"
 
+#define for_each_index(i, base, order) \
+	        for (i = base; i < base + (1 << order); i++)
+
 #define NSEC_PER_SEC	1000000000L
 
 static long long benchmark_iter(struct radix_tree_root *root, bool tagged)
@@ -57,22 +60,87 @@ static long long benchmark_iter(struct radix_tree_root *root, bool tagged)
 	return nsec;
 }
 
+static void benchmark_insert(struct radix_tree_root *root,
+			     unsigned long size, unsigned long step, int order)
+{
+	struct timespec start, finish;
+	unsigned long index;
+	long long nsec;
+
+	clock_gettime(CLOCK_MONOTONIC, &start);
+
+	for (index = 0 ; index < size ; index += step)
+		item_insert_order(root, index, order);
+
+	clock_gettime(CLOCK_MONOTONIC, &finish);
+
+	nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC +
+	       (finish.tv_nsec - start.tv_nsec);
+
+	printv(2, "Size: %8ld, step: %8ld, order: %d, insertion: %15lld ns\n",
+		size, step, order, nsec);
+}
+
+static void benchmark_tagging(struct radix_tree_root *root,
+			     unsigned long size, unsigned long step, int order)
+{
+	struct timespec start, finish;
+	unsigned long index;
+	long long nsec;
+
+	clock_gettime(CLOCK_MONOTONIC, &start);
+
+	for (index = 0 ; index < size ; index += step)
+		radix_tree_tag_set(root, index, 0);
+
+	clock_gettime(CLOCK_MONOTONIC, &finish);
+
+	nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC +
+	       (finish.tv_nsec - start.tv_nsec);
+
+	printv(2, "Size: %8ld, step: %8ld, order: %d, tagging: %17lld ns\n",
+		size, step, order, nsec);
+}
+
+static void benchmark_delete(struct radix_tree_root *root,
+			     unsigned long size, unsigned long step, int order)
+{
+	struct timespec start, finish;
+	unsigned long index, i;
+	long long nsec;
+
+	clock_gettime(CLOCK_MONOTONIC, &start);
+
+	for (index = 0 ; index < size ; index += step)
+		for_each_index(i, index, order)
+			item_delete(root, i);
+
+	clock_gettime(CLOCK_MONOTONIC, &finish);
+
+	nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC +
+	       (finish.tv_nsec - start.tv_nsec);
+
+	printv(2, "Size: %8ld, step: %8ld, order: %d, deletion: %16lld ns\n",
+		size, step, order, nsec);
+}
+
 static void benchmark_size(unsigned long size, unsigned long step, int order)
 {
 	RADIX_TREE(tree, GFP_KERNEL);
 	long long normal, tagged;
-	unsigned long index;
 
-	for (index = 0 ; index < size ; index += step) {
-		item_insert_order(&tree, index, order);
-		radix_tree_tag_set(&tree, index, 0);
-	}
+	benchmark_insert(&tree, size, step, order);
+	benchmark_tagging(&tree, size, step, order);
 
 	tagged = benchmark_iter(&tree, true);
 	normal = benchmark_iter(&tree, false);
 
-	printv(2, "Size %ld, step %6ld, order %d tagged %10lld ns, normal %10lld ns\n",
-		size, step, order, tagged, normal);
+	printv(2, "Size: %8ld, step: %8ld, order: %d, tagged iteration: %8lld ns\n",
+		size, step, order, tagged);
+	printv(2, "Size: %8ld, step: %8ld, order: %d, normal iteration: %8lld ns\n",
+		size, step, order, normal);
+
+	benchmark_delete(&tree, size, step, order);
 
 	item_kill_tree(&tree);
 	rcu_barrier();

From 6478581c85cd3091a6188a2433a8093f335f8f2a Mon Sep 17 00:00:00 2001
From: Rehas Sachdeva <aquannie@gmail.com>
Date: Mon, 27 Feb 2017 07:53:00 -0500
Subject: [PATCH 0882/1911] radix tree test suite: Add performance test for
 radix_tree_split()

Signed-off-by: Rehas Sachdeva <aquannie@gmail.com>
Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 tools/testing/radix-tree/benchmark.c | 44 ++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/tools/testing/radix-tree/benchmark.c b/tools/testing/radix-tree/benchmark.c
index 35741b9c2a4a1e..b03c3f30c740c9 100644
--- a/tools/testing/radix-tree/benchmark.c
+++ b/tools/testing/radix-tree/benchmark.c
@@ -146,6 +146,46 @@ static void benchmark_size(unsigned long size, unsigned long step, int order)
 	rcu_barrier();
 }
 
+static long long  __benchmark_split(unsigned long index,
+				    int old_order, int new_order)
+{
+	struct timespec start, finish;
+	long long nsec;
+	RADIX_TREE(tree, GFP_ATOMIC);
+
+	item_insert_order(&tree, index, old_order);
+
+	clock_gettime(CLOCK_MONOTONIC, &start);
+	radix_tree_split(&tree, index, new_order);
+	clock_gettime(CLOCK_MONOTONIC, &finish);
+	nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC +
+	       (finish.tv_nsec - start.tv_nsec);
+
+	item_kill_tree(&tree);
+
+	return nsec;
+
+}
+
+static void benchmark_split(unsigned long size, unsigned long step)
+{
+	int i, j, idx;
+	long long nsec = 0;
+
+
+	for (idx = 0; idx < size; idx += step) {
+		for (i = 3; i < 11; i++) {
+			for (j = 0; j < i; j++) {
+				nsec += __benchmark_split(idx, i, j);
+			}
+		}
+	}
+
+	printv(2, "Size %8ld, step %8ld, split time %10lld ns\n",
+			size, step, nsec);
+
+}
+
 void benchmark(void)
 {
 	unsigned long size[] = {1 << 10, 1 << 20, 0};
@@ -163,4 +203,8 @@ void benchmark(void)
 	for (c = 0; size[c]; c++)
 		for (s = 0; step[s]; s++)
 			benchmark_size(size[c], step[s] << 9, 9);
+
+	for (c = 0; size[c]; c++)
+		for (s = 0; step[s]; s++)
+			benchmark_split(size[c], step[s]);
 }

From 54f4d3341c8fe31e20915e2c1fb322ff8a069832 Mon Sep 17 00:00:00 2001
From: Rehas Sachdeva <aquannie@gmail.com>
Date: Mon, 27 Feb 2017 08:11:00 -0500
Subject: [PATCH 0883/1911] radix tree test suite: Add performance test for
 radix_tree_join()

Signed-off-by: Rehas Sachdeva <aquannie@gmail.com>
Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 tools/testing/radix-tree/benchmark.c | 47 ++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)

diff --git a/tools/testing/radix-tree/benchmark.c b/tools/testing/radix-tree/benchmark.c
index b03c3f30c740c9..99c40f3ed1337f 100644
--- a/tools/testing/radix-tree/benchmark.c
+++ b/tools/testing/radix-tree/benchmark.c
@@ -186,6 +186,50 @@ static void benchmark_split(unsigned long size, unsigned long step)
 
 }
 
+static long long  __benchmark_join(unsigned long index,
+			     unsigned order1, unsigned order2)
+{
+	unsigned long loc;
+	struct timespec start, finish;
+	long long nsec;
+	void *item, *item2 = item_create(index + 1, order1);
+	RADIX_TREE(tree, GFP_KERNEL);
+
+	item_insert_order(&tree, index, order2);
+	item = radix_tree_lookup(&tree, index);
+
+	clock_gettime(CLOCK_MONOTONIC, &start);
+	radix_tree_join(&tree, index + 1, order1, item2);
+	clock_gettime(CLOCK_MONOTONIC, &finish);
+	nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC +
+		(finish.tv_nsec - start.tv_nsec);
+
+	loc = find_item(&tree, item);
+	if (loc == -1)
+		free(item);
+
+	item_kill_tree(&tree);
+
+	return nsec;
+}
+
+static void benchmark_join(unsigned long step)
+{
+	int i, j, idx;
+	long long nsec = 0;
+
+	for (idx = 0; idx < 1 << 10; idx += step) {
+		for (i = 1; i < 15; i++) {
+			for (j = 0; j < i; j++) {
+				nsec += __benchmark_join(idx, i, j);
+			}
+		}
+	}
+
+	printv(2, "Size %8d, step %8ld, join time %10lld ns\n",
+			1 << 10, step, nsec);
+}
+
 void benchmark(void)
 {
 	unsigned long size[] = {1 << 10, 1 << 20, 0};
@@ -207,4 +251,7 @@ void benchmark(void)
 	for (c = 0; size[c]; c++)
 		for (s = 0; step[s]; s++)
 			benchmark_split(size[c], step[s]);
+
+	for (s = 0; step[s]; s++)
+		benchmark_join(step[s]);
 }

From c4634b08d9eb9c13be2296230bf33c79390dbf9c Mon Sep 17 00:00:00 2001
From: Rehas Sachdeva <aquannie@gmail.com>
Date: Mon, 27 Feb 2017 08:49:00 -0500
Subject: [PATCH 0884/1911] radix tree test suite: Build 32 bit binaries

Add option 'make BUILD=32' for building 32-bit binaries.

Signed-off-by: Rehas Sachdeva <aquannie@gmail.com>
Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 tools/testing/radix-tree/Makefile | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile
index f11315bedefc3d..b59c2a9ef778ed 100644
--- a/tools/testing/radix-tree/Makefile
+++ b/tools/testing/radix-tree/Makefile
@@ -10,6 +10,10 @@ ifndef SHIFT
 	SHIFT=3
 endif
 
+ifeq ($(BUILD), 32)
+	CFLAGS += -m32
+endif
+
 targets: mapshift $(TARGETS)
 
 main:	$(OFILES)

From 284d96a494d705b9c5330c900e976fceda885b9f Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Thu, 2 Mar 2017 04:29:00 -0500
Subject: [PATCH 0885/1911] radix tree test suite: Fix build with --as-needed

Currently the radix tree test suite doesn't build with toolchains that
use --as-needed by default, for example Ubuntu's:

  cc -I. -I../../include -g -O2 -Wall -D_LGPL_SOURCE -fsanitize=address -lpthread -lurcu main.o ... -o main
  /usr/bin/ld: regression1.o: undefined reference to symbol 'pthread_join@@GLIBC_2.17'
  /lib/powerpc64le-linux-gnu/libpthread.so.0: error adding symbols: DSO missing from command line
  collect2: error: ld returned 1 exit status

This is caused by the custom makefile rules placing LDFLAGS before the
.o files that need the libraries.

We could fix it by using --no-as-needed, or rewriting the custom rules.
But we can also just drop the custom rules and move the libraries to
LDLIBS, and then the default rules work correctly - with the one caveat
that we need to add -fsanitize=address to LDFLAGS because that must be
passed to the linker as well as the compiler.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 tools/testing/radix-tree/Makefile | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile
index b59c2a9ef778ed..022488f50fc605 100644
--- a/tools/testing/radix-tree/Makefile
+++ b/tools/testing/radix-tree/Makefile
@@ -1,6 +1,7 @@
 
 CFLAGS += -I. -I../../include -g -O2 -Wall -D_LGPL_SOURCE -fsanitize=address
-LDFLAGS += -lpthread -lurcu
+LDFLAGS += -fsanitize=address
+LDLIBS+= -lpthread -lurcu
 TARGETS = main idr-test multiorder
 CORE_OFILES := radix-tree.o idr.o linux.o test.o find_bit.o
 OFILES = main.o $(CORE_OFILES) regression1.o regression2.o regression3.o \
@@ -17,13 +18,10 @@ endif
 targets: mapshift $(TARGETS)
 
 main:	$(OFILES)
-	$(CC) $(CFLAGS) $(LDFLAGS) $^ -o main
 
 idr-test: idr-test.o $(CORE_OFILES)
-	$(CC) $(CFLAGS) $(LDFLAGS) $^ -o idr-test
 
 multiorder: multiorder.o $(CORE_OFILES)
-	$(CC) $(CFLAGS) $(LDFLAGS) $^ -o multiorder
 
 clean:
 	$(RM) $(TARGETS) *.o radix-tree.c idr.c generated/map-shift.h

From 3f1b6f9d49ba5a209d745fa2448657d8b66ed0c0 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Thu, 2 Mar 2017 12:24:28 -0500
Subject: [PATCH 0886/1911] radix tree test suite: Depend on Makefile and
 quieten grep

Changing the CFLAGS in the Makefile didn't always lead to a
recompilation because the OFILES didn't depend on the Makefile.
Also, after doing make clean, grep would still complain about
a missing map-shift.h; we need -s as well as -q.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 tools/testing/radix-tree/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile
index 022488f50fc605..4c6289c5d41598 100644
--- a/tools/testing/radix-tree/Makefile
+++ b/tools/testing/radix-tree/Makefile
@@ -28,7 +28,7 @@ clean:
 
 vpath %.c ../../lib
 
-$(OFILES): *.h */*.h generated/map-shift.h \
+$(OFILES): Makefile *.h */*.h generated/map-shift.h \
 	../../include/linux/*.h \
 	../../include/asm/*.h \
 	../../../include/linux/radix-tree.h \
@@ -43,7 +43,7 @@ idr.c: ../../../lib/idr.c
 .PHONY: mapshift
 
 mapshift:
-	@if ! grep -qw $(SHIFT) generated/map-shift.h; then		\
+	@if ! grep -qws $(SHIFT) generated/map-shift.h; then		\
 		echo "#define RADIX_TREE_MAP_SHIFT $(SHIFT)" >		\
 				generated/map-shift.h;			\
 	fi

From 4ecd9542dbc3e07f3bd3870aac12839f72b47db4 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Fri, 3 Mar 2017 12:16:10 -0500
Subject: [PATCH 0887/1911] ida: Free correct IDA bitmap

There's a relatively rare race where we look at the per-cpu preallocated
IDA bitmap, see it's NULL, allocate a new one, and atomically update it.
If the kmalloc() happened to sleep and we were rescheduled to a different
CPU, or an interrupt came in at the exact right time, another task
might have successfully allocated a bitmap and already deposited it.
I forgot what the semantics of cmpxchg() were and ended up freeing the
wrong bitmap leading to KASAN reporting a use-after-free.

Dmitry found the bug with syzkaller & wrote the patch.  I wrote the test
case that will reproduce the bug without his patch being applied.

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 lib/radix-tree.c                    |  4 ++--
 tools/testing/radix-tree/idr-test.c | 34 ++++++++++++++++++++++++++---
 tools/testing/radix-tree/main.c     |  1 +
 tools/testing/radix-tree/test.h     |  1 +
 4 files changed, 35 insertions(+), 5 deletions(-)

diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 5ed506d648c4e5..691a9ad48497b0 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -2129,8 +2129,8 @@ int ida_pre_get(struct ida *ida, gfp_t gfp)
 		struct ida_bitmap *bitmap = kmalloc(sizeof(*bitmap), gfp);
 		if (!bitmap)
 			return 0;
-		bitmap = this_cpu_cmpxchg(ida_bitmap, NULL, bitmap);
-		kfree(bitmap);
+		if (this_cpu_cmpxchg(ida_bitmap, NULL, bitmap))
+			kfree(bitmap);
 	}
 
 	return 1;
diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c
index 86de901fa5c6be..30cd0b296f1a76 100644
--- a/tools/testing/radix-tree/idr-test.c
+++ b/tools/testing/radix-tree/idr-test.c
@@ -363,7 +363,7 @@ void ida_check_random(void)
 {
 	DEFINE_IDA(ida);
 	DECLARE_BITMAP(bitmap, 2048);
-	int id;
+	int id, err;
 	unsigned int i;
 	time_t s = time(NULL);
 
@@ -377,8 +377,11 @@ void ida_check_random(void)
 			ida_remove(&ida, bit);
 		} else {
 			__set_bit(bit, bitmap);
-			ida_pre_get(&ida, GFP_KERNEL);
-			assert(!ida_get_new_above(&ida, bit, &id));
+			do {
+				ida_pre_get(&ida, GFP_KERNEL);
+				err = ida_get_new_above(&ida, bit, &id);
+			} while (err == -ENOMEM);
+			assert(!err);
 			assert(id == bit);
 		}
 	}
@@ -476,11 +479,36 @@ void ida_checks(void)
 	radix_tree_cpu_dead(1);
 }
 
+static void *ida_random_fn(void *arg)
+{
+	rcu_register_thread();
+	ida_check_random();
+	rcu_unregister_thread();
+	return NULL;
+}
+
+void ida_thread_tests(void)
+{
+	pthread_t threads[10];
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(threads); i++)
+		if (pthread_create(&threads[i], NULL, ida_random_fn, NULL)) {
+			perror("creating ida thread");
+			exit(1);
+		}
+
+	while (i--)
+		pthread_join(threads[i], NULL);
+}
+
 int __weak main(void)
 {
 	radix_tree_init();
 	idr_checks();
 	ida_checks();
+	ida_thread_tests();
+	radix_tree_cpu_dead(1);
 	rcu_barrier();
 	if (nr_allocated)
 		printf("nr_allocated = %d\n", nr_allocated);
diff --git a/tools/testing/radix-tree/main.c b/tools/testing/radix-tree/main.c
index b829127d567057..bc9a78449572f1 100644
--- a/tools/testing/radix-tree/main.c
+++ b/tools/testing/radix-tree/main.c
@@ -368,6 +368,7 @@ int main(int argc, char **argv)
 	iteration_test(0, 10 + 90 * long_run);
 	iteration_test(7, 10 + 90 * long_run);
 	single_thread_tests(long_run);
+	ida_thread_tests();
 
 	/* Free any remaining preallocated nodes */
 	radix_tree_cpu_dead(0);
diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h
index b30e11d9d271c3..0f8220cc61663f 100644
--- a/tools/testing/radix-tree/test.h
+++ b/tools/testing/radix-tree/test.h
@@ -36,6 +36,7 @@ void iteration_test(unsigned order, unsigned duration);
 void benchmark(void);
 void idr_checks(void);
 void ida_checks(void);
+void ida_thread_tests(void);
 
 struct item *
 item_tag_set(struct radix_tree_root *root, unsigned long index, int tag);

From f0f3f2d0a3e0f7c48163fd8b45f5909d46e7f371 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Fri, 3 Mar 2017 12:28:37 -0500
Subject: [PATCH 0888/1911] radix tree test suite: Specify -m32 in LDFLAGS too

Michael's patch to use the default make rule for linking and the patch
from Rehas to use -m32 if building a 32-bit test-suite on a 64-bit
platform don't work well together.

Reported-by: Rehas Sachdeva <aquannie@gmail.com>
Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 tools/testing/radix-tree/Makefile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile
index 4c6289c5d41598..6a9480c03cbdfc 100644
--- a/tools/testing/radix-tree/Makefile
+++ b/tools/testing/radix-tree/Makefile
@@ -13,6 +13,7 @@ endif
 
 ifeq ($(BUILD), 32)
 	CFLAGS += -m32
+	LDFLAGS += -m32
 endif
 
 targets: mapshift $(TARGETS)

From 544714d8e17c33822319d5a1a00e5ddc4db502b6 Mon Sep 17 00:00:00 2001
From: Jaehoon Chung <jh80.chung@samsung.com>
Date: Tue, 7 Mar 2017 19:54:05 +0900
Subject: [PATCH 0889/1911] PCI: exynos: Initialize elbi_base even when using
 PHY framework

Even when using the PHY framework, we need the elbi_base.  Before this
patch, we didn't initialize elbi_base, which caused NULL pointer
dereferences later.

Fixes: e7cd7ef58e1f ("PCI: exynos: Support the PHY generic framework")
Signed-off-by: Jaehoon Chung <jh80.chung@samsung.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/dwc/pci-exynos.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/pci/dwc/pci-exynos.c b/drivers/pci/dwc/pci-exynos.c
index 993b650ef2759c..44f774c12fb25e 100644
--- a/drivers/pci/dwc/pci-exynos.c
+++ b/drivers/pci/dwc/pci-exynos.c
@@ -132,10 +132,6 @@ static int exynos5440_pcie_get_mem_resources(struct platform_device *pdev,
 	struct device *dev = pci->dev;
 	struct resource *res;
 
-	/* If using the PHY framework, doesn't need to get other resource */
-	if (ep->using_phy)
-		return 0;
-
 	ep->mem_res = devm_kzalloc(dev, sizeof(*ep->mem_res), GFP_KERNEL);
 	if (!ep->mem_res)
 		return -ENOMEM;
@@ -145,6 +141,10 @@ static int exynos5440_pcie_get_mem_resources(struct platform_device *pdev,
 	if (IS_ERR(ep->mem_res->elbi_base))
 		return PTR_ERR(ep->mem_res->elbi_base);
 
+	/* If using the PHY framework, doesn't need to get other resource */
+	if (ep->using_phy)
+		return 0;
+
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
 	ep->mem_res->phy_base = devm_ioremap_resource(dev, res);
 	if (IS_ERR(ep->mem_res->phy_base))

From f98c7bce570bdbe344b74ff5daa7dfeef3f22929 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk@kernel.org>
Date: Sat, 25 Feb 2017 18:36:44 +0200
Subject: [PATCH 0890/1911] serial: samsung: Continue to work if DMA request
 fails
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If DMA is not available (even when configured in DeviceTree), the driver
will fail the startup procedure thus making serial console not
available.

For example this causes boot failure on QEMU ARMv7 (Exynos4210, SMDKC210):
    [    1.302575] OF: amba_device_add() failed (-19) for /amba/pdma@12680000
    ...
    [   11.435732] samsung-uart 13800000.serial: DMA request failed
    [   72.963893] samsung-uart 13800000.serial: DMA request failed
    [   73.143361] Kernel panic - not syncing: Attempted to kill init! exitcode=0x00000000

DMA is not necessary for serial to work, so continue with UART startup
after emitting a warning.

Fixes: 62c37eedb74c ("serial: samsung: add dma reqest/release functions")
Cc: <stable@vger.kernel.org>
Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/samsung.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/tty/serial/samsung.c b/drivers/tty/serial/samsung.c
index b4f86c219db1e0..7a17aedbf902e0 100644
--- a/drivers/tty/serial/samsung.c
+++ b/drivers/tty/serial/samsung.c
@@ -1031,8 +1031,10 @@ static int s3c64xx_serial_startup(struct uart_port *port)
 	if (ourport->dma) {
 		ret = s3c24xx_serial_request_dma(ourport);
 		if (ret < 0) {
-			dev_warn(port->dev, "DMA request failed\n");
-			return ret;
+			dev_warn(port->dev,
+				 "DMA request failed, DMA will not be used\n");
+			devm_kfree(port->dev, ourport->dma);
+			ourport->dma = NULL;
 		}
 	}
 

From a3a4a816b4b194c45d0217e8b9e08b2639802cda Mon Sep 17 00:00:00 2001
From: Christian Lamparter <chunkeey@googlemail.com>
Date: Mon, 27 Feb 2017 21:54:50 +0100
Subject: [PATCH 0891/1911] dt: emac: document device-tree based phy discovery
 and setup

This patch adds documentation for a new "phy-handle" property,
"fixed-link" and "mdio" sub-node. These allows the enumeration
of PHYs which are supported by the phy library under drivers/net/phy.

The EMAC ethernet controller in IBM and AMCC 4xx chips is
currently stuck with a few privately defined phy
implementations. It has no support for PHYs which
are supported by the generic phylib.

Acked-by: Rob Herring <robh@kernel.org>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Christian Lamparter <chunkeey@googlemail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../devicetree/bindings/powerpc/4xx/emac.txt  | 62 ++++++++++++++++++-
 1 file changed, 60 insertions(+), 2 deletions(-)

diff --git a/Documentation/devicetree/bindings/powerpc/4xx/emac.txt b/Documentation/devicetree/bindings/powerpc/4xx/emac.txt
index 712baf6c3e246f..44b842b6ca154d 100644
--- a/Documentation/devicetree/bindings/powerpc/4xx/emac.txt
+++ b/Documentation/devicetree/bindings/powerpc/4xx/emac.txt
@@ -71,6 +71,9 @@
 			  For Axon it can be absent, though my current driver
 			  doesn't handle phy-address yet so for now, keep
 			  0x00ffffff in it.
+    - phy-handle	: Used to describe configurations where a external PHY
+			  is used. Please refer to:
+			  Documentation/devicetree/bindings/net/ethernet.txt
     - rx-fifo-size-gige : 1 cell, Rx fifo size in bytes for 1000 Mb/sec
 			  operations (if absent the value is the same as
 			  rx-fifo-size).  For Axon, either absent or 2048.
@@ -81,8 +84,22 @@
 			  offload, phandle of the TAH device node.
     - tah-channel       : 1 cell, optional. If appropriate, channel used on the
 			  TAH engine.
+    - fixed-link	: Fixed-link subnode describing a link to a non-MDIO
+			  managed entity. See
+			  Documentation/devicetree/bindings/net/fixed-link.txt
+			  for details.
+    - mdio subnode	: When the EMAC has a phy connected to its local
+			  mdio, which us supported by the kernel's network
+			  PHY library in drivers/net/phy, there must be device
+			  tree subnode with the following required properties:
+				- #address-cells: Must be <1>.
+				- #size-cells: Must be <0>.
 
-    Example:
+			  For PHY definitions: Please refer to
+			  Documentation/devicetree/bindings/net/phy.txt and
+			  Documentation/devicetree/bindings/net/ethernet.txt
+
+    Examples:
 
 	EMAC0: ethernet@40000800 {
 		device_type = "network";
@@ -104,6 +121,48 @@
 		zmii-channel = <0>;
 	};
 
+	EMAC1: ethernet@ef600c00 {
+		device_type = "network";
+		compatible = "ibm,emac-apm821xx", "ibm,emac4sync";
+		interrupt-parent = <&EMAC1>;
+		interrupts = <0 1>;
+		#interrupt-cells = <1>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		interrupt-map = <0 &UIC2 0x10 IRQ_TYPE_LEVEL_HIGH /* Status */
+				 1 &UIC2 0x14 IRQ_TYPE_LEVEL_HIGH /* Wake */>;
+		reg = <0xef600c00 0x000000c4>;
+		local-mac-address = [000000000000]; /* Filled in by U-Boot */
+		mal-device = <&MAL0>;
+		mal-tx-channel = <0>;
+		mal-rx-channel = <0>;
+		cell-index = <0>;
+		max-frame-size = <9000>;
+		rx-fifo-size = <16384>;
+		tx-fifo-size = <2048>;
+		fifo-entry-size = <10>;
+		phy-mode = "rgmii";
+		phy-handle = <&phy0>;
+		phy-map = <0x00000000>;
+		rgmii-device = <&RGMII0>;
+		rgmii-channel = <0>;
+		tah-device = <&TAH0>;
+		tah-channel = <0>;
+		has-inverted-stacr-oc;
+		has-new-stacr-staopc;
+
+	        mdio {
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			phy0: ethernet-phy@0 {
+				compatible = "ethernet-phy-ieee802.3-c22";
+				reg = <0>;
+			};
+		};
+	};
+
+
       ii) McMAL node
 
     Required properties:
@@ -145,4 +204,3 @@
     - revision           : as provided by the RGMII new version register if
 			   available.
 			   For Axon: 0x0000012a
-

From 0d5370d1d85251e5893ab7c90a429464de2e140b Mon Sep 17 00:00:00 2001
From: Ethan Zhao <ethan.zhao@oracle.com>
Date: Mon, 27 Feb 2017 17:08:44 +0900
Subject: [PATCH 0892/1911] PCI: Prevent VPD access for QLogic ISP2722

QLogic ISP2722-based 16/32Gb Fibre Channel to PCIe Adapter has the VPD
access issue too, while read the common pci-sysfs access interface shown as

 /sys/devices/pci0000:00/0000:00:03.2/0000:0b:00.0/vpd

with simple 'cat' could cause system hang and panic:

  Kernel panic - not syncing: An NMI occurred. Depending on your system the reason for the NMI is logged in any one of the following resources:
  1. Integrated Management Log (IML)
  2. OA Syslog
  3. OA Forward Progress Log
  4. iLO Event Log
  CPU: 0 PID: 15070 Comm: udevadm Not tainted 4.1.12
  Hardware name: HP ProLiant DL380 Gen9/ProLiant DL380 Gen9, BIOS P89 12/27/2015
   0000000000000086 000000007f0cdf51 ffff880c4fa05d58 ffffffff817193de
   ffffffffa00b42d8 0000000000000075 ffff880c4fa05dd8 ffffffff81714072
   0000000000000008 ffff880c4fa05de8 ffff880c4fa05d88 000000007f0cdf51
  Call Trace:
   <NMI>  [<ffffffff817193de>] dump_stack+0x63/0x81
   [<ffffffff81714072>] panic+0xd0/0x20e
   [<ffffffffa00b390d>] hpwdt_pretimeout+0xdd/0xe0 [hpwdt]
   [<ffffffff81021fc9>] ? sched_clock+0x9/0x10
   [<ffffffff8101c101>] nmi_handle+0x91/0x170
   [<ffffffff8101c10c>] ? nmi_handle+0x9c/0x170
   [<ffffffff8101c5fe>] io_check_error+0x1e/0xa0
   [<ffffffff8101c719>] default_do_nmi+0x99/0x140
   [<ffffffff8101c8b4>] do_nmi+0xf4/0x170
   [<ffffffff817232c5>] end_repeat_nmi+0x1a/0x1e
   [<ffffffff815d724b>] ? pci_conf1_read+0xeb/0x120
   [<ffffffff815d724b>] ? pci_conf1_read+0xeb/0x120
   [<ffffffff815d724b>] ? pci_conf1_read+0xeb/0x120
   <<EOE>>  [<ffffffff815db4b3>] raw_pci_read+0x23/0x40
   [<ffffffff815db4fc>] pci_read+0x2c/0x30
   [<ffffffff8136f612>] pci_user_read_config_word+0x72/0x110
   [<ffffffff8136f746>] pci_vpd_pci22_wait+0x96/0x130
   [<ffffffff8136ff9b>] pci_vpd_pci22_read+0xdb/0x1a0
   [<ffffffff8136ea30>] pci_read_vpd+0x20/0x30
   [<ffffffff8137d590>] read_vpd_attr+0x30/0x40
   [<ffffffff8128e037>] sysfs_kf_bin_read+0x47/0x70
   [<ffffffff8128d24e>] kernfs_fop_read+0xae/0x180
   [<ffffffff8120dd97>] __vfs_read+0x37/0x100
   [<ffffffff812ba7e4>] ? security_file_permission+0x84/0xa0
   [<ffffffff8120e366>] ? rw_verify_area+0x56/0xe0
   [<ffffffff8120e476>] vfs_read+0x86/0x140
   [<ffffffff8120f3f5>] SyS_read+0x55/0xd0
   [<ffffffff81720f2e>] system_call_fastpath+0x12/0x71
  Shutting down cpus with NMI
  Kernel Offset: disabled
  drm_kms_helper: panic occurred, switching back to text console

So blacklist the access to its VPD.

Signed-off-by: Ethan Zhao <ethan.zhao@oracle.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
CC: stable@vger.kernel.org	# v4.6+
---
 drivers/pci/quirks.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index f754453fe754e9..673683660b5c70 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -2174,6 +2174,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x005d, quirk_blacklist_vpd);
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x005f, quirk_blacklist_vpd);
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATTANSIC, PCI_ANY_ID,
 		quirk_blacklist_vpd);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_QLOGIC, 0x2261, quirk_blacklist_vpd);
 
 /*
  * For Broadcom 5706, 5708, 5709 rev. A nics, any read beyond the

From 3bd7db63a841e8c5297bb18ad801df67d5e38ad2 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Wed, 1 Mar 2017 00:25:40 -0800
Subject: [PATCH 0893/1911] PCI/ASPM: Always set link->downstream to avoid NULL
 dereference on remove

We call pcie_aspm_exit_link_state() when we remove a device.  If the device
is the last PCIe function to be removed below a bridge and the bridge has
an ASPM link_state struct, we disable ASPM on the link.  Disabling ASPM
requires link->downstream (used in pcie_config_aspm_link()).

We previously set link->downstream in pcie_aspm_cap_init(), but only if the
device was not blacklisted.  Removing the blacklisted device caused a NULL
pointer dereference in the pcie_aspm_exit_link_state() ->
pcie_config_aspm_link() path:

  # echo 1 > /sys/bus/pci/devices/0000\:0b\:00.0/remove
  ...
   BUG: unable to handle kernel NULL pointer dereference at 0000000000000080
   IP: pcie_config_aspm_link+0x5d/0x2b0
   Call Trace:
    pcie_aspm_exit_link_state+0x75/0x130
    pci_stop_bus_device+0xa4/0xb0
    pci_stop_and_remove_bus_device_locked+0x1a/0x30
    remove_store+0x50/0x70
    dev_attr_store+0x18/0x30
    sysfs_kf_write+0x44/0x60
    kernfs_fop_write+0x10e/0x190
    __vfs_write+0x28/0x110
    ? rcu_read_lock_sched_held+0x5d/0x80
    ? rcu_sync_lockdep_assert+0x2c/0x60
    ? __sb_start_write+0x173/0x1a0
    ? vfs_write+0xb3/0x180
    vfs_write+0xc4/0x180
    SyS_write+0x49/0xa0
    do_syscall_64+0xa6/0x1c0
    entry_SYSCALL64_slow_path+0x25/0x25
   ---[ end trace bd187ee0267df5d9 ]---

To avoid this, set link->downstream in alloc_pcie_link_state(), so every
pcie_link_state structure has a valid link->downstream pointer.

[bhelgaas: changelog]
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Rajat Jain <rajatja@google.com>
CC: stable@vger.kernel.org
---
 drivers/pci/pcie/aspm.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
index 973472c23d8904..1dfa10cc566beb 100644
--- a/drivers/pci/pcie/aspm.c
+++ b/drivers/pci/pcie/aspm.c
@@ -478,7 +478,7 @@ static void aspm_calc_l1ss_info(struct pcie_link_state *link,
 
 static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist)
 {
-	struct pci_dev *child, *parent = link->pdev;
+	struct pci_dev *child = link->downstream, *parent = link->pdev;
 	struct pci_bus *linkbus = parent->subordinate;
 	struct aspm_register_info upreg, dwreg;
 
@@ -491,9 +491,7 @@ static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist)
 
 	/* Get upstream/downstream components' register state */
 	pcie_get_aspm_reg(parent, &upreg);
-	child = pci_function_0(linkbus);
 	pcie_get_aspm_reg(child, &dwreg);
-	link->downstream = child;
 
 	/*
 	 * If ASPM not supported, don't mess with the clocks and link,
@@ -800,6 +798,7 @@ static struct pcie_link_state *alloc_pcie_link_state(struct pci_dev *pdev)
 	INIT_LIST_HEAD(&link->children);
 	INIT_LIST_HEAD(&link->link);
 	link->pdev = pdev;
+	link->downstream = pci_function_0(pdev->subordinate);
 
 	/*
 	 * Root Ports and PCI/PCI-X to PCIe Bridges are roots of PCIe

From 0253f2681f4445e9003fb27dc743ef7d25e8f3b9 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 28 Feb 2017 22:12:04 +0100
Subject: [PATCH 0894/1911] net/mlx5e: add IPV6 dependency

The ethernet support now calls directly into the ipv6 core code, which
fails if IPV6 is a loadable module but mlx5 is built-in:

drivers/net/ethernet/mellanox/mlx5/core/en_tc.o: In function `mlx5e_create_encap_header_ipv6':
en_tc.c:(.text.mlx5e_create_encap_header_ipv6+0x110): undefined reference to `ip6_route_output_flags'

This adds a dependency to ensure that MLX5_CORE_EN can only be built
if we are able link the kernel successfully. The downside is that the
ethernet option can be hidden. Alternatively we could make MLX5_CORE
depend on "IPV6 || !IPV6", which would force MLX5_CORE to be a module
when IPV6 is, including in configurations where we don't use the ethernet
support at all.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
index ddb4ca4ff930a7..117170014e8897 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
@@ -14,6 +14,7 @@ config MLX5_CORE
 config MLX5_CORE_EN
 	bool "Mellanox Technologies ConnectX-4 Ethernet support"
 	depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE
+	depends on IPV6=y || IPV6=n || MLX5_CORE=m
 	imply PTP_1588_CLOCK
 	default n
 	---help---

From 4342696df764ec65dcdfbd0c10d90ea52505f8ba Mon Sep 17 00:00:00 2001
From: "Blomme, Maarten" <Maarten.Blomme@flir.com>
Date: Thu, 2 Mar 2017 13:08:36 +0100
Subject: [PATCH 0895/1911] spi_ks8995: fix "BUG: key accdaa28 not in .data!"

Signed-off-by: Maarten Blomme <Maarten.Blomme@flir.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/spi_ks8995.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/phy/spi_ks8995.c b/drivers/net/phy/spi_ks8995.c
index 93ffedfa299412..aa7209d794f398 100644
--- a/drivers/net/phy/spi_ks8995.c
+++ b/drivers/net/phy/spi_ks8995.c
@@ -498,6 +498,7 @@ static int ks8995_probe(struct spi_device *spi)
 	if (err)
 		return err;
 
+	sysfs_attr_init(&ks->regs_attr.attr);
 	err = sysfs_create_bin_file(&spi->dev.kobj, &ks->regs_attr);
 	if (err) {
 		dev_err(&spi->dev, "unable to create sysfs file, err=%d\n",

From 239870f2a0ebf75cc8f6d987dc528c5243f93d69 Mon Sep 17 00:00:00 2001
From: "Blomme, Maarten" <Maarten.Blomme@flir.com>
Date: Thu, 2 Mar 2017 13:08:49 +0100
Subject: [PATCH 0896/1911] spi_ks8995: regs_size incorrect for some devices

Signed-off-by: Maarten Blomme <Maarten.Blomme@flir.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/spi_ks8995.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/phy/spi_ks8995.c b/drivers/net/phy/spi_ks8995.c
index aa7209d794f398..1e2d4f1179da31 100644
--- a/drivers/net/phy/spi_ks8995.c
+++ b/drivers/net/phy/spi_ks8995.c
@@ -491,8 +491,8 @@ static int ks8995_probe(struct spi_device *spi)
 	if (err)
 		return err;
 
-	ks->regs_attr.size = ks->chip->regs_size;
 	memcpy(&ks->regs_attr, &ks8995_registers_attr, sizeof(ks->regs_attr));
+	ks->regs_attr.size = ks->chip->regs_size;
 
 	err = ks8995_reset(ks);
 	if (err)

From 466e8bf10ac104d96e1ea813e8126e11cb72ea20 Mon Sep 17 00:00:00 2001
From: Michal Schmidt <mschmidt@redhat.com>
Date: Fri, 3 Mar 2017 17:08:28 +0100
Subject: [PATCH 0897/1911] bnx2x: prevent crash when accessing PTP with
 interface down

It is possible to crash the kernel by accessing a PTP device while its
associated bnx2x interface is down. Before the interface is brought up,
the timecounter is not initialized, so accessing it results in NULL
dereference.

Fix it by checking if the interface is up.

Use -ENETDOWN as the error code when the interface is down.
 -EFAULT in bnx2x_ptp_adjfreq() did not seem right.

Tested using phc_ctl get/set/adj/freq commands.

Signed-off-by: Michal Schmidt <mschmidt@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/broadcom/bnx2x/bnx2x_main.c  | 20 ++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index d8d06fdfc42b9d..d57290b9ea75ff 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -13738,7 +13738,7 @@ static int bnx2x_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
 	if (!netif_running(bp->dev)) {
 		DP(BNX2X_MSG_PTP,
 		   "PTP adjfreq called while the interface is down\n");
-		return -EFAULT;
+		return -ENETDOWN;
 	}
 
 	if (ppb < 0) {
@@ -13797,6 +13797,12 @@ static int bnx2x_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
 {
 	struct bnx2x *bp = container_of(ptp, struct bnx2x, ptp_clock_info);
 
+	if (!netif_running(bp->dev)) {
+		DP(BNX2X_MSG_PTP,
+		   "PTP adjtime called while the interface is down\n");
+		return -ENETDOWN;
+	}
+
 	DP(BNX2X_MSG_PTP, "PTP adjtime called, delta = %llx\n", delta);
 
 	timecounter_adjtime(&bp->timecounter, delta);
@@ -13809,6 +13815,12 @@ static int bnx2x_ptp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
 	struct bnx2x *bp = container_of(ptp, struct bnx2x, ptp_clock_info);
 	u64 ns;
 
+	if (!netif_running(bp->dev)) {
+		DP(BNX2X_MSG_PTP,
+		   "PTP gettime called while the interface is down\n");
+		return -ENETDOWN;
+	}
+
 	ns = timecounter_read(&bp->timecounter);
 
 	DP(BNX2X_MSG_PTP, "PTP gettime called, ns = %llu\n", ns);
@@ -13824,6 +13836,12 @@ static int bnx2x_ptp_settime(struct ptp_clock_info *ptp,
 	struct bnx2x *bp = container_of(ptp, struct bnx2x, ptp_clock_info);
 	u64 ns;
 
+	if (!netif_running(bp->dev)) {
+		DP(BNX2X_MSG_PTP,
+		   "PTP settime called while the interface is down\n");
+		return -ENETDOWN;
+	}
+
 	ns = timespec64_to_ns(ts);
 
 	DP(BNX2X_MSG_PTP, "PTP settime called, ns = %llu\n", ns);

From 850268d320f0c7c5eb7ad0a62ef21859fa331ded Mon Sep 17 00:00:00 2001
From: Michal Schmidt <mschmidt@redhat.com>
Date: Fri, 3 Mar 2017 17:08:29 +0100
Subject: [PATCH 0898/1911] bnx2x: lower verbosity of VF stats debug messages

When BNX2X_MSG_IOV is enabled, the driver produces too many VF statistics
messages. Lower the verbosity of the VF stats messages similarly as in
commit 76ca70fabbdaa3 ("bnx2x: [Debug] change verbosity of some prints").

Signed-off-by: Michal Schmidt <mschmidt@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
index 6fad22adbbb9e7..9f0f851774f25d 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
@@ -1899,7 +1899,8 @@ void bnx2x_iov_adjust_stats_req(struct bnx2x *bp)
 			continue;
 		}
 
-		DP(BNX2X_MSG_IOV, "add addresses for vf %d\n", vf->abs_vfid);
+		DP_AND((BNX2X_MSG_IOV | BNX2X_MSG_STATS),
+		       "add addresses for vf %d\n", vf->abs_vfid);
 		for_each_vfq(vf, j) {
 			struct bnx2x_vf_queue *rxq = vfq_get(vf, j);
 
@@ -1920,11 +1921,12 @@ void bnx2x_iov_adjust_stats_req(struct bnx2x *bp)
 				cpu_to_le32(U64_HI(q_stats_addr));
 			cur_query_entry->address.lo =
 				cpu_to_le32(U64_LO(q_stats_addr));
-			DP(BNX2X_MSG_IOV,
-			   "added address %x %x for vf %d queue %d client %d\n",
-			   cur_query_entry->address.hi,
-			   cur_query_entry->address.lo, cur_query_entry->funcID,
-			   j, cur_query_entry->index);
+			DP_AND((BNX2X_MSG_IOV | BNX2X_MSG_STATS),
+			       "added address %x %x for vf %d queue %d client %d\n",
+			       cur_query_entry->address.hi,
+			       cur_query_entry->address.lo,
+			       cur_query_entry->funcID,
+			       j, cur_query_entry->index);
 			cur_query_entry++;
 			cur_data_offset += sizeof(struct per_queue_stats);
 			stats_count++;

From 22118d861cec5da6ed525aaf12a3de9bfeffc58f Mon Sep 17 00:00:00 2001
From: Michal Schmidt <mschmidt@redhat.com>
Date: Fri, 3 Mar 2017 17:08:30 +0100
Subject: [PATCH 0899/1911] bnx2x: fix possible overrun of VFPF multicast
 addresses array

It is too late to check for the limit of the number of VF multicast
addresses after they have already been copied to the req->multicast[]
array, possibly overflowing it.

Do the check before copying.

Also fix the error path to not skip unlocking vf2pf_mutex.

Signed-off-by: Michal Schmidt <mschmidt@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c  | 23 +++++++++----------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
index bfae300cf25ff8..c2d327d9dff0ce 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
@@ -868,7 +868,7 @@ int bnx2x_vfpf_set_mcast(struct net_device *dev)
 	struct bnx2x *bp = netdev_priv(dev);
 	struct vfpf_set_q_filters_tlv *req = &bp->vf2pf_mbox->req.set_q_filters;
 	struct pfvf_general_resp_tlv *resp = &bp->vf2pf_mbox->resp.general_resp;
-	int rc, i = 0;
+	int rc = 0, i = 0;
 	struct netdev_hw_addr *ha;
 
 	if (bp->state != BNX2X_STATE_OPEN) {
@@ -883,6 +883,15 @@ int bnx2x_vfpf_set_mcast(struct net_device *dev)
 	/* Get Rx mode requested */
 	DP(NETIF_MSG_IFUP, "dev->flags = %x\n", dev->flags);
 
+	/* We support PFVF_MAX_MULTICAST_PER_VF mcast addresses tops */
+	if (netdev_mc_count(dev) > PFVF_MAX_MULTICAST_PER_VF) {
+		DP(NETIF_MSG_IFUP,
+		   "VF supports not more than %d multicast MAC addresses\n",
+		   PFVF_MAX_MULTICAST_PER_VF);
+		rc = -EINVAL;
+		goto out;
+	}
+
 	netdev_for_each_mc_addr(ha, dev) {
 		DP(NETIF_MSG_IFUP, "Adding mcast MAC: %pM\n",
 		   bnx2x_mc_addr(ha));
@@ -890,16 +899,6 @@ int bnx2x_vfpf_set_mcast(struct net_device *dev)
 		i++;
 	}
 
-	/* We support four PFVF_MAX_MULTICAST_PER_VF mcast
-	  * addresses tops
-	  */
-	if (i >= PFVF_MAX_MULTICAST_PER_VF) {
-		DP(NETIF_MSG_IFUP,
-		   "VF supports not more than %d multicast MAC addresses\n",
-		   PFVF_MAX_MULTICAST_PER_VF);
-		return -EINVAL;
-	}
-
 	req->n_multicast = i;
 	req->flags |= VFPF_SET_Q_FILTERS_MULTICAST_CHANGED;
 	req->vf_qid = 0;
@@ -924,7 +923,7 @@ int bnx2x_vfpf_set_mcast(struct net_device *dev)
 out:
 	bnx2x_vfpf_finalize(bp, &req->first_tlv);
 
-	return 0;
+	return rc;
 }
 
 /* request pf to add a vlan for the vf */

From 83bd9eb8fc69cdd5135ed6e1f066adc8841800fd Mon Sep 17 00:00:00 2001
From: Michal Schmidt <mschmidt@redhat.com>
Date: Fri, 3 Mar 2017 17:08:31 +0100
Subject: [PATCH 0900/1911] bnx2x: fix detection of VLAN filtering feature for
 VF

VFs are currently missing the VLAN filtering feature, because we were
checking the PF's acquire response before actually performing the acquire.

Fix it by setting the feature flag later when we have the PF response.

Signed-off-by: Michal Schmidt <mschmidt@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index d57290b9ea75ff..ac76fc251d268e 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -13292,17 +13292,15 @@ static int bnx2x_init_dev(struct bnx2x *bp, struct pci_dev *pdev,
 	dev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
 		NETIF_F_TSO | NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_HIGHDMA;
 
-	/* VF with OLD Hypervisor or old PF do not support filtering */
 	if (IS_PF(bp)) {
 		if (chip_is_e1x)
 			bp->accept_any_vlan = true;
 		else
 			dev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER;
-#ifdef CONFIG_BNX2X_SRIOV
-	} else if (bp->acquire_resp.pfdev_info.pf_cap & PFVF_CAP_VLAN_FILTER) {
-		dev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER;
-#endif
 	}
+	/* For VF we'll know whether to enable VLAN filtering after
+	 * getting a response to CHANNEL_TLV_ACQUIRE from PF.
+	 */
 
 	dev->features |= dev->hw_features | NETIF_F_HW_VLAN_CTAG_RX;
 	dev->features |= NETIF_F_HIGHDMA;
@@ -14009,6 +14007,14 @@ static int bnx2x_init_one(struct pci_dev *pdev,
 		rc = bnx2x_vfpf_acquire(bp, tx_count, rx_count);
 		if (rc)
 			goto init_one_freemem;
+
+#ifdef CONFIG_BNX2X_SRIOV
+		/* VF with OLD Hypervisor or old PF do not support filtering */
+		if (bp->acquire_resp.pfdev_info.pf_cap & PFVF_CAP_VLAN_FILTER) {
+			dev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+			dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+		}
+#endif
 	}
 
 	/* Enable SRIOV if capability found in configuration space */

From 78d5505432436516456c12abbe705ec8dee7ee2b Mon Sep 17 00:00:00 2001
From: Michal Schmidt <mschmidt@redhat.com>
Date: Fri, 3 Mar 2017 17:08:32 +0100
Subject: [PATCH 0901/1911] bnx2x: do not rollback VF MAC/VLAN filters we did
 not configure

On failure to configure a VF MAC/VLAN filter we should not attempt to
rollback filters that we failed to configure with -EEXIST.

Signed-off-by: Michal Schmidt <mschmidt@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c | 8 +++++++-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h | 1 +
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
index 9f0f851774f25d..2068bb8f54950d 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
@@ -434,7 +434,9 @@ static int bnx2x_vf_mac_vlan_config(struct bnx2x *bp,
 
 	/* Add/Remove the filter */
 	rc = bnx2x_config_vlan_mac(bp, &ramrod);
-	if (rc && rc != -EEXIST) {
+	if (rc == -EEXIST)
+		return 0;
+	if (rc) {
 		BNX2X_ERR("Failed to %s %s\n",
 			  filter->add ? "add" : "delete",
 			  (filter->type == BNX2X_VF_FILTER_VLAN_MAC) ?
@@ -444,6 +446,8 @@ static int bnx2x_vf_mac_vlan_config(struct bnx2x *bp,
 		return rc;
 	}
 
+	filter->applied = true;
+
 	return 0;
 }
 
@@ -471,6 +475,8 @@ int bnx2x_vf_mac_vlan_config_list(struct bnx2x *bp, struct bnx2x_virtf *vf,
 		BNX2X_ERR("Managed only %d/%d filters - rolling back\n",
 			  i, filters->count + 1);
 		while (--i >= 0) {
+			if (!filters->filters[i].applied)
+				continue;
 			filters->filters[i].add = !filters->filters[i].add;
 			bnx2x_vf_mac_vlan_config(bp, vf, qid,
 						 &filters->filters[i],
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
index 7a6d406f4c1117..888d0b6632e86f 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
@@ -114,6 +114,7 @@ struct bnx2x_vf_mac_vlan_filter {
 	(BNX2X_VF_FILTER_MAC | BNX2X_VF_FILTER_VLAN) /*shortcut*/
 
 	bool add;
+	bool applied;
 	u8 *mac;
 	u16 vid;
 };

From 74bcbeb7d77ec92e4262fc340cb436ef7d98ba01 Mon Sep 17 00:00:00 2001
From: Michal Schmidt <mschmidt@redhat.com>
Date: Fri, 3 Mar 2017 17:08:33 +0100
Subject: [PATCH 0902/1911] bnx2x: fix incorrect filter count in an error
 message

filters->count is the number of filters we were supposed to configure.
There is no reason to increase it by +1 when printing the count in an error
message.

Signed-off-by: Michal Schmidt <mschmidt@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
index 2068bb8f54950d..bdfd53b46bc568 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
@@ -473,7 +473,7 @@ int bnx2x_vf_mac_vlan_config_list(struct bnx2x *bp, struct bnx2x_virtf *vf,
 	/* Rollback if needed */
 	if (i != filters->count) {
 		BNX2X_ERR("Managed only %d/%d filters - rolling back\n",
-			  i, filters->count + 1);
+			  i, filters->count);
 		while (--i >= 0) {
 			if (!filters->filters[i].applied)
 				continue;

From e39513259450a8312cb98a9a3b16bb924310dbcc Mon Sep 17 00:00:00 2001
From: Michal Schmidt <mschmidt@redhat.com>
Date: Fri, 3 Mar 2017 17:08:34 +0100
Subject: [PATCH 0903/1911] bnx2x: add missing configuration of VF VLAN filters

Configuring VLANs from the VF side had no effect, because the PF ignored
filters of type VFPF_VLAN_FILTER in the VF-PF message.

Add the missing filter type to configure.

Signed-off-by: Michal Schmidt <mschmidt@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c    | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
index c2d327d9dff0ce..76a4668c50fe98 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
@@ -1777,6 +1777,23 @@ static int bnx2x_vf_mbx_qfilters(struct bnx2x *bp, struct bnx2x_virtf *vf)
 				goto op_err;
 		}
 
+		/* build vlan list */
+		fl = NULL;
+
+		rc = bnx2x_vf_mbx_macvlan_list(bp, vf, msg, &fl,
+					       VFPF_VLAN_FILTER);
+		if (rc)
+			goto op_err;
+
+		if (fl) {
+			/* set vlan list */
+			rc = bnx2x_vf_mac_vlan_config_list(bp, vf, fl,
+							   msg->vf_qid,
+							   false);
+			if (rc)
+				goto op_err;
+		}
+
 	}
 
 	if (msg->flags & VFPF_SET_Q_FILTERS_RX_MASK_CHANGED) {

From 02b2faaf0af1d85585f6d6980e286d53612acfc2 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 3 Mar 2017 14:08:21 -0800
Subject: [PATCH 0904/1911] tcp: fix various issues for sockets morphing to
 listen state

Dmitry Vyukov reported a divide by 0 triggered by syzkaller, exploiting
tcp_disconnect() path that was never really considered and/or used
before syzkaller ;)

I was not able to reproduce the bug, but it seems issues here are the
three possible actions that assumed they would never trigger on a
listener.

1) tcp_write_timer_handler
2) tcp_delack_timer_handler
3) MTU reduction

Only IPv6 MTU reduction was properly testing TCP_CLOSE and TCP_LISTEN
 states from tcp_v6_mtu_reduced()

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c  | 7 +++++--
 net/ipv4/tcp_timer.c | 6 ++++--
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 9a89b8deafae1e..8f3ec1365497a5 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -279,10 +279,13 @@ EXPORT_SYMBOL(tcp_v4_connect);
  */
 void tcp_v4_mtu_reduced(struct sock *sk)
 {
-	struct dst_entry *dst;
 	struct inet_sock *inet = inet_sk(sk);
-	u32 mtu = tcp_sk(sk)->mtu_info;
+	struct dst_entry *dst;
+	u32 mtu;
 
+	if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
+		return;
+	mtu = tcp_sk(sk)->mtu_info;
 	dst = inet_csk_update_pmtu(sk, mtu);
 	if (!dst)
 		return;
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 40d893556e6701..b2ab411c6d3728 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -249,7 +249,8 @@ void tcp_delack_timer_handler(struct sock *sk)
 
 	sk_mem_reclaim_partial(sk);
 
-	if (sk->sk_state == TCP_CLOSE || !(icsk->icsk_ack.pending & ICSK_ACK_TIMER))
+	if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) ||
+	    !(icsk->icsk_ack.pending & ICSK_ACK_TIMER))
 		goto out;
 
 	if (time_after(icsk->icsk_ack.timeout, jiffies)) {
@@ -552,7 +553,8 @@ void tcp_write_timer_handler(struct sock *sk)
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	int event;
 
-	if (sk->sk_state == TCP_CLOSE || !icsk->icsk_pending)
+	if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) ||
+	    !icsk->icsk_pending)
 		goto out;
 
 	if (time_after(icsk->icsk_timeout, jiffies)) {

From 146d8fef9da1ba854de7fa28b9cb9eb147535f88 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Sat, 4 Mar 2017 00:01:41 +0000
Subject: [PATCH 0905/1911] rxrpc: Call state should be read with READ_ONCE()
 under some circumstances

The call state may be changed at any time by the data-ready routine in
response to received packets, so if the call state is to be read and acted
upon several times in a function, READ_ONCE() must be used unless the call
state lock is held.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rxrpc/input.c   | 12 +++++++-----
 net/rxrpc/recvmsg.c |  4 ++--
 net/rxrpc/sendmsg.c | 48 ++++++++++++++++++++++++++++-----------------
 3 files changed, 39 insertions(+), 25 deletions(-)

diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 9f4cfa25af7c92..d74921c4969b2d 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -420,6 +420,7 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb,
 			     u16 skew)
 {
 	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+	enum rxrpc_call_state state;
 	unsigned int offset = sizeof(struct rxrpc_wire_header);
 	unsigned int ix;
 	rxrpc_serial_t serial = sp->hdr.serial, ack_serial = 0;
@@ -434,14 +435,15 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb,
 	_proto("Rx DATA %%%u { #%u f=%02x }",
 	       sp->hdr.serial, seq, sp->hdr.flags);
 
-	if (call->state >= RXRPC_CALL_COMPLETE)
+	state = READ_ONCE(call->state);
+	if (state >= RXRPC_CALL_COMPLETE)
 		return;
 
 	/* Received data implicitly ACKs all of the request packets we sent
 	 * when we're acting as a client.
 	 */
-	if ((call->state == RXRPC_CALL_CLIENT_SEND_REQUEST ||
-	     call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY) &&
+	if ((state == RXRPC_CALL_CLIENT_SEND_REQUEST ||
+	     state == RXRPC_CALL_CLIENT_AWAIT_REPLY) &&
 	    !rxrpc_receiving_reply(call))
 		return;
 
@@ -799,7 +801,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb,
 		return rxrpc_proto_abort("AK0", call, 0);
 
 	/* Ignore ACKs unless we are or have just been transmitting. */
-	switch (call->state) {
+	switch (READ_ONCE(call->state)) {
 	case RXRPC_CALL_CLIENT_SEND_REQUEST:
 	case RXRPC_CALL_CLIENT_AWAIT_REPLY:
 	case RXRPC_CALL_SERVER_SEND_REPLY:
@@ -940,7 +942,7 @@ static void rxrpc_input_call_packet(struct rxrpc_call *call,
 static void rxrpc_input_implicit_end_call(struct rxrpc_connection *conn,
 					  struct rxrpc_call *call)
 {
-	switch (call->state) {
+	switch (READ_ONCE(call->state)) {
 	case RXRPC_CALL_SERVER_AWAIT_ACK:
 		rxrpc_call_completed(call);
 		break;
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index 6491ca46a03fda..3e2f1a8e9c5b51 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -527,7 +527,7 @@ int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 		msg->msg_namelen = len;
 	}
 
-	switch (call->state) {
+	switch (READ_ONCE(call->state)) {
 	case RXRPC_CALL_SERVER_ACCEPTING:
 		ret = rxrpc_recvmsg_new_call(rx, call, msg, flags);
 		break;
@@ -640,7 +640,7 @@ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call,
 
 	mutex_lock(&call->user_mutex);
 
-	switch (call->state) {
+	switch (READ_ONCE(call->state)) {
 	case RXRPC_CALL_CLIENT_RECV_REPLY:
 	case RXRPC_CALL_SERVER_RECV_REQUEST:
 	case RXRPC_CALL_SERVER_ACK_REQUEST:
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index bc2d3dcff9de76..47ccfeacc1c6a2 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -488,6 +488,7 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
 int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
 	__releases(&rx->sk.sk_lock.slock)
 {
+	enum rxrpc_call_state state;
 	enum rxrpc_command cmd;
 	struct rxrpc_call *call;
 	unsigned long user_call_ID = 0;
@@ -526,13 +527,17 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
 			return PTR_ERR(call);
 		/* ... and we have the call lock. */
 	} else {
-		ret = -EBUSY;
-		if (call->state == RXRPC_CALL_UNINITIALISED ||
-		    call->state == RXRPC_CALL_CLIENT_AWAIT_CONN ||
-		    call->state == RXRPC_CALL_SERVER_PREALLOC ||
-		    call->state == RXRPC_CALL_SERVER_SECURING ||
-		    call->state == RXRPC_CALL_SERVER_ACCEPTING)
+		switch (READ_ONCE(call->state)) {
+		case RXRPC_CALL_UNINITIALISED:
+		case RXRPC_CALL_CLIENT_AWAIT_CONN:
+		case RXRPC_CALL_SERVER_PREALLOC:
+		case RXRPC_CALL_SERVER_SECURING:
+		case RXRPC_CALL_SERVER_ACCEPTING:
+			ret = -EBUSY;
 			goto error_release_sock;
+		default:
+			break;
+		}
 
 		ret = mutex_lock_interruptible(&call->user_mutex);
 		release_sock(&rx->sk);
@@ -542,10 +547,11 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
 		}
 	}
 
+	state = READ_ONCE(call->state);
 	_debug("CALL %d USR %lx ST %d on CONN %p",
-	       call->debug_id, call->user_call_ID, call->state, call->conn);
+	       call->debug_id, call->user_call_ID, state, call->conn);
 
-	if (call->state >= RXRPC_CALL_COMPLETE) {
+	if (state >= RXRPC_CALL_COMPLETE) {
 		/* it's too late for this call */
 		ret = -ESHUTDOWN;
 	} else if (cmd == RXRPC_CMD_SEND_ABORT) {
@@ -555,12 +561,12 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
 	} else if (cmd != RXRPC_CMD_SEND_DATA) {
 		ret = -EINVAL;
 	} else if (rxrpc_is_client_call(call) &&
-		   call->state != RXRPC_CALL_CLIENT_SEND_REQUEST) {
+		   state != RXRPC_CALL_CLIENT_SEND_REQUEST) {
 		/* request phase complete for this client call */
 		ret = -EPROTO;
 	} else if (rxrpc_is_service_call(call) &&
-		   call->state != RXRPC_CALL_SERVER_ACK_REQUEST &&
-		   call->state != RXRPC_CALL_SERVER_SEND_REPLY) {
+		   state != RXRPC_CALL_SERVER_ACK_REQUEST &&
+		   state != RXRPC_CALL_SERVER_SEND_REPLY) {
 		/* Reply phase not begun or not complete for service call. */
 		ret = -EPROTO;
 	} else {
@@ -605,14 +611,20 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call,
 	_debug("CALL %d USR %lx ST %d on CONN %p",
 	       call->debug_id, call->user_call_ID, call->state, call->conn);
 
-	if (call->state >= RXRPC_CALL_COMPLETE) {
-		ret = -ESHUTDOWN; /* it's too late for this call */
-	} else if (call->state != RXRPC_CALL_CLIENT_SEND_REQUEST &&
-		   call->state != RXRPC_CALL_SERVER_ACK_REQUEST &&
-		   call->state != RXRPC_CALL_SERVER_SEND_REPLY) {
-		ret = -EPROTO; /* request phase complete for this client call */
-	} else {
+	switch (READ_ONCE(call->state)) {
+	case RXRPC_CALL_CLIENT_SEND_REQUEST:
+	case RXRPC_CALL_SERVER_ACK_REQUEST:
+	case RXRPC_CALL_SERVER_SEND_REPLY:
 		ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len);
+		break;
+	case RXRPC_CALL_COMPLETE:
+		/* It's too late for this call */
+		ret = -ESHUTDOWN;
+		break;
+	default:
+		 /* Request phase complete for this client call */
+		ret = -EPROTO;
+		break;
 	}
 
 	mutex_unlock(&call->user_mutex);

From dd4f10722aeb10f4f582948839f066bebe44e5fb Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 3 Mar 2017 21:01:02 -0800
Subject: [PATCH 0906/1911] net: fix socket refcounting in
 skb_complete_wifi_ack()

TX skbs do not necessarily hold a reference on skb->sk->sk_refcnt
By the time TX completion happens, sk_refcnt might be already 0.

sock_hold()/sock_put() would then corrupt critical state, like
sk_wmem_alloc.

Fixes: bf7fa551e0ce ("mac80211: Resolve sk_refcnt/sk_wmem_alloc issue in wifi ack path")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Alexander Duyck <alexander.h.duyck@intel.com>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: Soheil Hassas Yeganeh <soheil@google.com>
Cc: Willem de Bruijn <willemb@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index f3557958e9bf14..e2f37a560ec43c 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3893,7 +3893,7 @@ void skb_complete_wifi_ack(struct sk_buff *skb, bool acked)
 {
 	struct sock *sk = skb->sk;
 	struct sock_exterr_skb *serr;
-	int err;
+	int err = 1;
 
 	skb->wifi_acked_valid = 1;
 	skb->wifi_acked = acked;
@@ -3903,14 +3903,15 @@ void skb_complete_wifi_ack(struct sk_buff *skb, bool acked)
 	serr->ee.ee_errno = ENOMSG;
 	serr->ee.ee_origin = SO_EE_ORIGIN_TXSTATUS;
 
-	/* take a reference to prevent skb_orphan() from freeing the socket */
-	sock_hold(sk);
-
-	err = sock_queue_err_skb(sk, skb);
+	/* Take a reference to prevent skb_orphan() from freeing the socket,
+	 * but only if the socket refcount is not zero.
+	 */
+	if (likely(atomic_inc_not_zero(&sk->sk_refcnt))) {
+		err = sock_queue_err_skb(sk, skb);
+		sock_put(sk);
+	}
 	if (err)
 		kfree_skb(skb);
-
-	sock_put(sk);
 }
 EXPORT_SYMBOL_GPL(skb_complete_wifi_ack);
 

From 9ac25fc063751379cb77434fef9f3b088cd3e2f7 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 3 Mar 2017 21:01:03 -0800
Subject: [PATCH 0907/1911] net: fix socket refcounting in
 skb_complete_tx_timestamp()

TX skbs do not necessarily hold a reference on skb->sk->sk_refcnt
By the time TX completion happens, sk_refcnt might be already 0.

sock_hold()/sock_put() would then corrupt critical state, like
sk_wmem_alloc and lead to leaks or use after free.

Fixes: 62bccb8cdb69 ("net-timestamp: Make the clone operation stand-alone from phy timestamping")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Alexander Duyck <alexander.h.duyck@intel.com>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: Soheil Hassas Yeganeh <soheil@google.com>
Cc: Willem de Bruijn <willemb@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index e2f37a560ec43c..cd4ba8c6b60916 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3828,13 +3828,14 @@ void skb_complete_tx_timestamp(struct sk_buff *skb,
 	if (!skb_may_tx_timestamp(sk, false))
 		return;
 
-	/* take a reference to prevent skb_orphan() from freeing the socket */
-	sock_hold(sk);
-
-	*skb_hwtstamps(skb) = *hwtstamps;
-	__skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND);
-
-	sock_put(sk);
+	/* Take a reference to prevent skb_orphan() from freeing the socket,
+	 * but only if the socket refcount is not zero.
+	 */
+	if (likely(atomic_inc_not_zero(&sk->sk_refcnt))) {
+		*skb_hwtstamps(skb) = *hwtstamps;
+		__skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND);
+		sock_put(sk);
+	}
 }
 EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp);
 

From 8edc3affc0770886c7bfb3436b0fdd09bce13167 Mon Sep 17 00:00:00 2001
From: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Date: Sat, 4 Mar 2017 08:57:33 -0800
Subject: [PATCH 0908/1911] rds: tcp: Take explicit refcounts on struct net

It is incorrect for the rds_connection to piggyback on the
sock_net() refcount for the netns because this gives rise to
a chicken-and-egg problem during rds_conn_destroy. Instead explicitly
take a ref on the net, and hold the netns down till the connection
tear-down is complete.

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/connection.c | 1 +
 net/rds/rds.h        | 6 +++---
 net/rds/tcp.c        | 4 ++--
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/net/rds/connection.c b/net/rds/connection.c
index 0e04dcceb1d416..1fa75ab7b73323 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -429,6 +429,7 @@ void rds_conn_destroy(struct rds_connection *conn)
 	 */
 	rds_cong_remove_conn(conn);
 
+	put_net(conn->c_net);
 	kmem_cache_free(rds_conn_slab, conn);
 
 	spin_lock_irqsave(&rds_conn_lock, flags);
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 39518ef7af4dfb..82d38ccf5e8bcf 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -147,7 +147,7 @@ struct rds_connection {
 
 	/* Protocol version */
 	unsigned int		c_version;
-	possible_net_t		c_net;
+	struct net		*c_net;
 
 	struct list_head	c_map_item;
 	unsigned long		c_map_queued;
@@ -162,13 +162,13 @@ struct rds_connection {
 static inline
 struct net *rds_conn_net(struct rds_connection *conn)
 {
-	return read_pnet(&conn->c_net);
+	return conn->c_net;
 }
 
 static inline
 void rds_conn_net_set(struct rds_connection *conn, struct net *net)
 {
-	write_pnet(&conn->c_net, net);
+	conn->c_net = get_net(net);
 }
 
 #define RDS_FLAG_CONG_BITMAP	0x01
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index a973d3b4dff0b2..65c8e3b3b710a0 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -529,7 +529,7 @@ static void rds_tcp_kill_sock(struct net *net)
 	flush_work(&rtn->rds_tcp_accept_w);
 	spin_lock_irq(&rds_tcp_conn_lock);
 	list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
-		struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);
+		struct net *c_net = tc->t_cpath->cp_conn->c_net;
 
 		if (net != c_net || !tc->t_sock)
 			continue;
@@ -584,7 +584,7 @@ static void rds_tcp_sysctl_reset(struct net *net)
 
 	spin_lock_irq(&rds_tcp_conn_lock);
 	list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
-		struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);
+		struct net *c_net = tc->t_cpath->cp_conn->c_net;
 
 		if (net != c_net || !tc->t_sock)
 			continue;

From 16c09b1c7657522a321b04aa7f4300865b7cb292 Mon Sep 17 00:00:00 2001
From: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Date: Sat, 4 Mar 2017 08:57:34 -0800
Subject: [PATCH 0909/1911] rds: tcp: Reorder initialization sequence in
 rds_tcp_init to avoid races

Order of initialization in rds_tcp_init needs to be done so
that resources are set up and destroyed in the correct synchronization
sequence with both the data path, as well as netns create/destroy
path. Specifically,

- we must call register_pernet_subsys and get the rds_tcp_netid
  before calling register_netdevice_notifier, otherwise we risk
  the sequence
    1. register_netdevice_notifier sets up netdev notifier callback
    2. rds_tcp_dev_event -> rds_tcp_kill_sock uses netid 0, and finds
       the wrong rtn, resulting in a panic with string that is of the form:

  BUG: unable to handle kernel NULL pointer dereference at 000000000000000d
  IP: rds_tcp_kill_sock+0x3a/0x1d0 [rds_tcp]
         :

- the rds_tcp_incoming_slab kmem_cache must be initialized before the
  datapath starts up. The latter can happen any time after the
  pernet_subsys registration of rds_tcp_net_ops, whose -> init
  function sets up the listen socket. If the rds_tcp_incoming_slab has
  not been set up at that time, a panic of the form below may be
  encountered

  BUG: unable to handle kernel NULL pointer dereference at 0000000000000014
  IP: kmem_cache_alloc+0x90/0x1c0
     :
  rds_tcp_data_recv+0x1e7/0x370 [rds_tcp]
  tcp_read_sock+0x96/0x1c0
  rds_tcp_recv_path+0x65/0x80 [rds_tcp]
     :

Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/tcp.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 65c8e3b3b710a0..fbf807a0cc4abd 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -638,19 +638,19 @@ static int rds_tcp_init(void)
 		goto out;
 	}
 
-	ret = register_netdevice_notifier(&rds_tcp_dev_notifier);
-	if (ret) {
-		pr_warn("could not register rds_tcp_dev_notifier\n");
+	ret = rds_tcp_recv_init();
+	if (ret)
 		goto out_slab;
-	}
 
 	ret = register_pernet_subsys(&rds_tcp_net_ops);
 	if (ret)
-		goto out_notifier;
+		goto out_recv;
 
-	ret = rds_tcp_recv_init();
-	if (ret)
+	ret = register_netdevice_notifier(&rds_tcp_dev_notifier);
+	if (ret) {
+		pr_warn("could not register rds_tcp_dev_notifier\n");
 		goto out_pernet;
+	}
 
 	rds_trans_register(&rds_tcp_transport);
 
@@ -660,9 +660,8 @@ static int rds_tcp_init(void)
 
 out_pernet:
 	unregister_pernet_subsys(&rds_tcp_net_ops);
-out_notifier:
-	if (unregister_netdevice_notifier(&rds_tcp_dev_notifier))
-		pr_warn("could not unregister rds_tcp_dev_notifier\n");
+out_recv:
+	rds_tcp_recv_exit();
 out_slab:
 	kmem_cache_destroy(rds_tcp_conn_slab);
 out:

From b21dd4506b71bdb9c5a20e759255cd2513ea7ebe Mon Sep 17 00:00:00 2001
From: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Date: Sat, 4 Mar 2017 08:57:35 -0800
Subject: [PATCH 0910/1911] rds: tcp: Sequence teardown of listen and acceptor
 sockets to avoid races

Commit a93d01f5777e ("RDS: TCP: avoid bad page reference in
rds_tcp_listen_data_ready") added the function
rds_tcp_listen_sock_def_readable()  to handle the case when a
partially set-up acceptor socket drops into rds_tcp_listen_data_ready().
However, if the listen socket (rtn->rds_tcp_listen_sock) is itself going
through a tear-down via rds_tcp_listen_stop(), the (*ready)() will be
null and we would hit a panic  of the form
  BUG: unable to handle kernel NULL pointer dereference at   (null)
  IP:           (null)
   :
  ? rds_tcp_listen_data_ready+0x59/0xb0 [rds_tcp]
  tcp_data_queue+0x39d/0x5b0
  tcp_rcv_established+0x2e5/0x660
  tcp_v4_do_rcv+0x122/0x220
  tcp_v4_rcv+0x8b7/0x980
    :
In the above case, it is not fatal to encounter a NULL value for
ready- we should just drop the packet and let the flush of the
acceptor thread finish gracefully.

In general, the tear-down sequence for listen() and accept() socket
that is ensured by this commit is:
     rtn->rds_tcp_listen_sock = NULL; /* prevent any new accepts */
     In rds_tcp_listen_stop():
         serialize with, and prevent, further callbacks using lock_sock()
         flush rds_wq
         flush acceptor workq
         sock_release(listen socket)

Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/tcp.c        | 15 ++++++++++-----
 net/rds/tcp.h        |  2 +-
 net/rds/tcp_listen.c |  9 +++++++--
 3 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index fbf807a0cc4abd..22569007677357 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -484,9 +484,10 @@ static void __net_exit rds_tcp_exit_net(struct net *net)
 	 * we do need to clean up the listen socket here.
 	 */
 	if (rtn->rds_tcp_listen_sock) {
-		rds_tcp_listen_stop(rtn->rds_tcp_listen_sock);
+		struct socket *lsock = rtn->rds_tcp_listen_sock;
+
 		rtn->rds_tcp_listen_sock = NULL;
-		flush_work(&rtn->rds_tcp_accept_w);
+		rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w);
 	}
 }
 
@@ -523,10 +524,10 @@ static void rds_tcp_kill_sock(struct net *net)
 	struct rds_tcp_connection *tc, *_tc;
 	LIST_HEAD(tmp_list);
 	struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
+	struct socket *lsock = rtn->rds_tcp_listen_sock;
 
-	rds_tcp_listen_stop(rtn->rds_tcp_listen_sock);
 	rtn->rds_tcp_listen_sock = NULL;
-	flush_work(&rtn->rds_tcp_accept_w);
+	rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w);
 	spin_lock_irq(&rds_tcp_conn_lock);
 	list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
 		struct net *c_net = tc->t_cpath->cp_conn->c_net;
@@ -546,8 +547,12 @@ static void rds_tcp_kill_sock(struct net *net)
 void *rds_tcp_listen_sock_def_readable(struct net *net)
 {
 	struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
+	struct socket *lsock = rtn->rds_tcp_listen_sock;
+
+	if (!lsock)
+		return NULL;
 
-	return rtn->rds_tcp_listen_sock->sk->sk_user_data;
+	return lsock->sk->sk_user_data;
 }
 
 static int rds_tcp_dev_event(struct notifier_block *this,
diff --git a/net/rds/tcp.h b/net/rds/tcp.h
index 9a1cc890657679..56ea6620fcf97c 100644
--- a/net/rds/tcp.h
+++ b/net/rds/tcp.h
@@ -66,7 +66,7 @@ void rds_tcp_state_change(struct sock *sk);
 
 /* tcp_listen.c */
 struct socket *rds_tcp_listen_init(struct net *);
-void rds_tcp_listen_stop(struct socket *);
+void rds_tcp_listen_stop(struct socket *sock, struct work_struct *acceptor);
 void rds_tcp_listen_data_ready(struct sock *sk);
 int rds_tcp_accept_one(struct socket *sock);
 int rds_tcp_keepalive(struct socket *sock);
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 67d0929c7d3d0c..2c69a508a693f0 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -223,6 +223,9 @@ void rds_tcp_listen_data_ready(struct sock *sk)
 	 * before it has been accepted and the accepter has set up their
 	 * data_ready.. we only want to queue listen work for our listening
 	 * socket
+	 *
+	 * (*ready)() may be null if we are racing with netns delete, and
+	 * the listen socket is being torn down.
 	 */
 	if (sk->sk_state == TCP_LISTEN)
 		rds_tcp_accept_work(sk);
@@ -231,7 +234,8 @@ void rds_tcp_listen_data_ready(struct sock *sk)
 
 out:
 	read_unlock_bh(&sk->sk_callback_lock);
-	ready(sk);
+	if (ready)
+		ready(sk);
 }
 
 struct socket *rds_tcp_listen_init(struct net *net)
@@ -271,7 +275,7 @@ struct socket *rds_tcp_listen_init(struct net *net)
 	return NULL;
 }
 
-void rds_tcp_listen_stop(struct socket *sock)
+void rds_tcp_listen_stop(struct socket *sock, struct work_struct *acceptor)
 {
 	struct sock *sk;
 
@@ -292,5 +296,6 @@ void rds_tcp_listen_stop(struct socket *sock)
 
 	/* wait for accepts to stop and close the socket */
 	flush_workqueue(rds_wq);
+	flush_work(acceptor);
 	sock_release(sock);
 }

From 6c4dc75c251721f517e9daeb5370ea606b5b35ce Mon Sep 17 00:00:00 2001
From: Alexey Khoroshilov <khoroshilov@ispras.ru>
Date: Sun, 5 Mar 2017 03:01:55 +0300
Subject: [PATCH 0911/1911] net/sched: act_skbmod: remove unneeded
 rcu_read_unlock in tcf_skbmod_dump

Found by Linux Driver Verification project (linuxtesting.org).

Signed-off-by: Alexey Khoroshilov <khoroshilov@ispras.ru>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_skbmod.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index 3b7074e2302487..c736627f8f4a0e 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -228,7 +228,6 @@ static int tcf_skbmod_dump(struct sk_buff *skb, struct tc_action *a,
 
 	return skb->len;
 nla_put_failure:
-	rcu_read_unlock();
 	nlmsg_trim(skb, b);
 	return -1;
 }

From 142c0ac445792c492579cb01f1cfd4e32e6dfcce Mon Sep 17 00:00:00 2001
From: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Date: Sun, 5 Mar 2017 12:18:41 -0600
Subject: [PATCH 0912/1911] ibmvnic: Fix overflowing firmware/hardware TX queue

Use a counter to track the number of outstanding transmissions sent
that have not received completions. If the counter reaches the maximum
number of queue entries, stop transmissions on that queue. As we receive
more completions from firmware, wake the queue once the counter reaches
an acceptable level.

This patch prevents hardware/firmware TX queue from filling up and
and generating errors.  Since incorporating this fix, internal testing
has reported that these firmware errors have stopped.

Signed-off-by: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 27 ++++++++++++++++++++++++++-
 drivers/net/ethernet/ibm/ibmvnic.h |  1 +
 2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 9198e6bd5160f9..0e87b83f25e2c3 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -705,6 +705,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 	u8 *hdrs = (u8 *)&adapter->tx_rx_desc_req;
 	struct device *dev = &adapter->vdev->dev;
 	struct ibmvnic_tx_buff *tx_buff = NULL;
+	struct ibmvnic_sub_crq_queue *tx_scrq;
 	struct ibmvnic_tx_pool *tx_pool;
 	unsigned int tx_send_failed = 0;
 	unsigned int tx_map_failed = 0;
@@ -724,6 +725,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 	int ret = 0;
 
 	tx_pool = &adapter->tx_pool[queue_num];
+	tx_scrq = adapter->tx_scrq[queue_num];
 	txq = netdev_get_tx_queue(netdev, skb_get_queue_mapping(skb));
 	handle_array = (u64 *)((u8 *)(adapter->login_rsp_buf) +
 				   be32_to_cpu(adapter->login_rsp_buf->
@@ -826,6 +828,14 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 		ret = NETDEV_TX_BUSY;
 		goto out;
 	}
+
+	atomic_inc(&tx_scrq->used);
+
+	if (atomic_read(&tx_scrq->used) >= adapter->req_tx_entries_per_subcrq) {
+		netdev_info(netdev, "Stopping queue %d\n", queue_num);
+		netif_stop_subqueue(netdev, queue_num);
+	}
+
 	tx_packets++;
 	tx_bytes += skb->len;
 	txq->trans_start = jiffies;
@@ -1213,6 +1223,7 @@ static struct ibmvnic_sub_crq_queue *init_sub_crq_queue(struct ibmvnic_adapter
 	scrq->adapter = adapter;
 	scrq->size = 4 * PAGE_SIZE / sizeof(*scrq->msgs);
 	scrq->cur = 0;
+	atomic_set(&scrq->used, 0);
 	scrq->rx_skb_top = NULL;
 	spin_lock_init(&scrq->lock);
 
@@ -1355,8 +1366,22 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter *adapter,
 						 DMA_TO_DEVICE);
 			}
 
-			if (txbuff->last_frag)
+			if (txbuff->last_frag) {
+				atomic_dec(&scrq->used);
+
+				if (atomic_read(&scrq->used) <=
+				    (adapter->req_tx_entries_per_subcrq / 2) &&
+				    netif_subqueue_stopped(adapter->netdev,
+							   txbuff->skb)) {
+					netif_wake_subqueue(adapter->netdev,
+							    scrq->pool_index);
+					netdev_dbg(adapter->netdev,
+						   "Started queue %d\n",
+						   scrq->pool_index);
+				}
+
 				dev_kfree_skb_any(txbuff->skb);
+			}
 
 			adapter->tx_pool[pool].free_map[adapter->tx_pool[pool].
 						     producer_index] = index;
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h
index 422824f1f42a8a..1993b42666f73d 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -863,6 +863,7 @@ struct ibmvnic_sub_crq_queue {
 	spinlock_t lock;
 	struct sk_buff *rx_skb_top;
 	struct ibmvnic_adapter *adapter;
+	atomic_t used;
 };
 
 struct ibmvnic_long_term_buff {

From 068d9f90a6978c3e3a662d9e85204a7d6be240d2 Mon Sep 17 00:00:00 2001
From: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Date: Sun, 5 Mar 2017 12:18:42 -0600
Subject: [PATCH 0913/1911] ibmvnic: Allocate number of rx/tx buffers agreed on
 by firmware

The amount of TX/RX buffers that the vNIC driver currently allocates
is different from the amount agreed upon in negotiation with firmware.
Correct that by allocating the requested number of buffers confirmed
by firmware.

Signed-off-by: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 0e87b83f25e2c3..5f11b4dc95d2d1 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -404,7 +404,7 @@ static int ibmvnic_open(struct net_device *netdev)
 	send_map_query(adapter);
 	for (i = 0; i < rxadd_subcrqs; i++) {
 		init_rx_pool(adapter, &adapter->rx_pool[i],
-			     IBMVNIC_BUFFS_PER_POOL, i,
+			     adapter->req_rx_add_entries_per_subcrq, i,
 			     be64_to_cpu(size_array[i]), 1);
 		if (alloc_rx_pool(adapter, &adapter->rx_pool[i])) {
 			dev_err(dev, "Couldn't alloc rx pool\n");
@@ -419,23 +419,23 @@ static int ibmvnic_open(struct net_device *netdev)
 	for (i = 0; i < tx_subcrqs; i++) {
 		tx_pool = &adapter->tx_pool[i];
 		tx_pool->tx_buff =
-		    kcalloc(adapter->max_tx_entries_per_subcrq,
+		    kcalloc(adapter->req_tx_entries_per_subcrq,
 			    sizeof(struct ibmvnic_tx_buff), GFP_KERNEL);
 		if (!tx_pool->tx_buff)
 			goto tx_pool_alloc_failed;
 
 		if (alloc_long_term_buff(adapter, &tx_pool->long_term_buff,
-					 adapter->max_tx_entries_per_subcrq *
+					 adapter->req_tx_entries_per_subcrq *
 					 adapter->req_mtu))
 			goto tx_ltb_alloc_failed;
 
 		tx_pool->free_map =
-		    kcalloc(adapter->max_tx_entries_per_subcrq,
+		    kcalloc(adapter->req_tx_entries_per_subcrq,
 			    sizeof(int), GFP_KERNEL);
 		if (!tx_pool->free_map)
 			goto tx_fm_alloc_failed;
 
-		for (j = 0; j < adapter->max_tx_entries_per_subcrq; j++)
+		for (j = 0; j < adapter->req_tx_entries_per_subcrq; j++)
 			tx_pool->free_map[j] = j;
 
 		tx_pool->consumer_index = 0;
@@ -746,7 +746,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 
 	tx_pool->consumer_index =
 	    (tx_pool->consumer_index + 1) %
-		adapter->max_tx_entries_per_subcrq;
+		adapter->req_tx_entries_per_subcrq;
 
 	tx_buff = &tx_pool->tx_buff[index];
 	tx_buff->skb = skb;
@@ -819,7 +819,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 
 		if (tx_pool->consumer_index == 0)
 			tx_pool->consumer_index =
-				adapter->max_tx_entries_per_subcrq - 1;
+				adapter->req_tx_entries_per_subcrq - 1;
 		else
 			tx_pool->consumer_index--;
 
@@ -1387,7 +1387,7 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter *adapter,
 						     producer_index] = index;
 			adapter->tx_pool[pool].producer_index =
 			    (adapter->tx_pool[pool].producer_index + 1) %
-			    adapter->max_tx_entries_per_subcrq;
+			    adapter->req_tx_entries_per_subcrq;
 		}
 		/* remove tx_comp scrq*/
 		next->tx_comp.first = 0;

From 62f8f4d9066c1c6f2474845d1ca7e2891f2ae3fd Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 5 Mar 2017 10:52:16 -0800
Subject: [PATCH 0914/1911] dccp: fix use-after-free in
 dccp_feat_activate_values

Dmitry reported crashes in DCCP stack [1]

Problem here is that when I got rid of listener spinlock, I missed the
fact that DCCP stores a complex state in struct dccp_request_sock,
while TCP does not.

Since multiple cpus could access it at the same time, we need to add
protection.

[1]
BUG: KASAN: use-after-free in dccp_feat_activate_values+0x967/0xab0
net/dccp/feat.c:1541 at addr ffff88003713be68
Read of size 8 by task syz-executor2/8457
CPU: 2 PID: 8457 Comm: syz-executor2 Not tainted 4.10.0-rc7+ #127
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
Call Trace:
 <IRQ>
 __dump_stack lib/dump_stack.c:15 [inline]
 dump_stack+0x292/0x398 lib/dump_stack.c:51
 kasan_object_err+0x1c/0x70 mm/kasan/report.c:162
 print_address_description mm/kasan/report.c:200 [inline]
 kasan_report_error mm/kasan/report.c:289 [inline]
 kasan_report.part.1+0x20e/0x4e0 mm/kasan/report.c:311
 kasan_report mm/kasan/report.c:332 [inline]
 __asan_report_load8_noabort+0x29/0x30 mm/kasan/report.c:332
 dccp_feat_activate_values+0x967/0xab0 net/dccp/feat.c:1541
 dccp_create_openreq_child+0x464/0x610 net/dccp/minisocks.c:121
 dccp_v6_request_recv_sock+0x1f6/0x1960 net/dccp/ipv6.c:457
 dccp_check_req+0x335/0x5a0 net/dccp/minisocks.c:186
 dccp_v6_rcv+0x69e/0x1d00 net/dccp/ipv6.c:711
 ip6_input_finish+0x46d/0x17a0 net/ipv6/ip6_input.c:279
 NF_HOOK include/linux/netfilter.h:257 [inline]
 ip6_input+0xdb/0x590 net/ipv6/ip6_input.c:322
 dst_input include/net/dst.h:507 [inline]
 ip6_rcv_finish+0x289/0x890 net/ipv6/ip6_input.c:69
 NF_HOOK include/linux/netfilter.h:257 [inline]
 ipv6_rcv+0x12ec/0x23d0 net/ipv6/ip6_input.c:203
 __netif_receive_skb_core+0x1ae5/0x3400 net/core/dev.c:4190
 __netif_receive_skb+0x2a/0x170 net/core/dev.c:4228
 process_backlog+0xe5/0x6c0 net/core/dev.c:4839
 napi_poll net/core/dev.c:5202 [inline]
 net_rx_action+0xe70/0x1900 net/core/dev.c:5267
 __do_softirq+0x2fb/0xb7d kernel/softirq.c:284
 do_softirq_own_stack+0x1c/0x30 arch/x86/entry/entry_64.S:902
 </IRQ>
 do_softirq.part.17+0x1e8/0x230 kernel/softirq.c:328
 do_softirq kernel/softirq.c:176 [inline]
 __local_bh_enable_ip+0x1f2/0x200 kernel/softirq.c:181
 local_bh_enable include/linux/bottom_half.h:31 [inline]
 rcu_read_unlock_bh include/linux/rcupdate.h:971 [inline]
 ip6_finish_output2+0xbb0/0x23d0 net/ipv6/ip6_output.c:123
 ip6_finish_output+0x302/0x960 net/ipv6/ip6_output.c:148
 NF_HOOK_COND include/linux/netfilter.h:246 [inline]
 ip6_output+0x1cb/0x8d0 net/ipv6/ip6_output.c:162
 ip6_xmit+0xcdf/0x20d0 include/net/dst.h:501
 inet6_csk_xmit+0x320/0x5f0 net/ipv6/inet6_connection_sock.c:179
 dccp_transmit_skb+0xb09/0x1120 net/dccp/output.c:141
 dccp_xmit_packet+0x215/0x760 net/dccp/output.c:280
 dccp_write_xmit+0x168/0x1d0 net/dccp/output.c:362
 dccp_sendmsg+0x79c/0xb10 net/dccp/proto.c:796
 inet_sendmsg+0x164/0x5b0 net/ipv4/af_inet.c:744
 sock_sendmsg_nosec net/socket.c:635 [inline]
 sock_sendmsg+0xca/0x110 net/socket.c:645
 SYSC_sendto+0x660/0x810 net/socket.c:1687
 SyS_sendto+0x40/0x50 net/socket.c:1655
 entry_SYSCALL_64_fastpath+0x1f/0xc2
RIP: 0033:0x4458b9
RSP: 002b:00007f8ceb77bb58 EFLAGS: 00000282 ORIG_RAX: 000000000000002c
RAX: ffffffffffffffda RBX: 0000000000000017 RCX: 00000000004458b9
RDX: 0000000000000023 RSI: 0000000020e60000 RDI: 0000000000000017
RBP: 00000000006e1b90 R08: 00000000200f9fe1 R09: 0000000000000020
R10: 0000000000008010 R11: 0000000000000282 R12: 00000000007080a8
R13: 0000000000000000 R14: 00007f8ceb77c9c0 R15: 00007f8ceb77c700
Object at ffff88003713be50, in cache kmalloc-64 size: 64
Allocated:
PID = 8446
 save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:57
 save_stack+0x43/0xd0 mm/kasan/kasan.c:502
 set_track mm/kasan/kasan.c:514 [inline]
 kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:605
 kmem_cache_alloc_trace+0x82/0x270 mm/slub.c:2738
 kmalloc include/linux/slab.h:490 [inline]
 dccp_feat_entry_new+0x214/0x410 net/dccp/feat.c:467
 dccp_feat_push_change+0x38/0x220 net/dccp/feat.c:487
 __feat_register_sp+0x223/0x2f0 net/dccp/feat.c:741
 dccp_feat_propagate_ccid+0x22b/0x2b0 net/dccp/feat.c:949
 dccp_feat_server_ccid_dependencies+0x1b3/0x250 net/dccp/feat.c:1012
 dccp_make_response+0x1f1/0xc90 net/dccp/output.c:423
 dccp_v6_send_response+0x4ec/0xc20 net/dccp/ipv6.c:217
 dccp_v6_conn_request+0xaba/0x11b0 net/dccp/ipv6.c:377
 dccp_rcv_state_process+0x51e/0x1650 net/dccp/input.c:606
 dccp_v6_do_rcv+0x213/0x350 net/dccp/ipv6.c:632
 sk_backlog_rcv include/net/sock.h:893 [inline]
 __sk_receive_skb+0x36f/0xcc0 net/core/sock.c:479
 dccp_v6_rcv+0xba5/0x1d00 net/dccp/ipv6.c:742
 ip6_input_finish+0x46d/0x17a0 net/ipv6/ip6_input.c:279
 NF_HOOK include/linux/netfilter.h:257 [inline]
 ip6_input+0xdb/0x590 net/ipv6/ip6_input.c:322
 dst_input include/net/dst.h:507 [inline]
 ip6_rcv_finish+0x289/0x890 net/ipv6/ip6_input.c:69
 NF_HOOK include/linux/netfilter.h:257 [inline]
 ipv6_rcv+0x12ec/0x23d0 net/ipv6/ip6_input.c:203
 __netif_receive_skb_core+0x1ae5/0x3400 net/core/dev.c:4190
 __netif_receive_skb+0x2a/0x170 net/core/dev.c:4228
 process_backlog+0xe5/0x6c0 net/core/dev.c:4839
 napi_poll net/core/dev.c:5202 [inline]
 net_rx_action+0xe70/0x1900 net/core/dev.c:5267
 __do_softirq+0x2fb/0xb7d kernel/softirq.c:284
Freed:
PID = 15
 save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:57
 save_stack+0x43/0xd0 mm/kasan/kasan.c:502
 set_track mm/kasan/kasan.c:514 [inline]
 kasan_slab_free+0x73/0xc0 mm/kasan/kasan.c:578
 slab_free_hook mm/slub.c:1355 [inline]
 slab_free_freelist_hook mm/slub.c:1377 [inline]
 slab_free mm/slub.c:2954 [inline]
 kfree+0xe8/0x2b0 mm/slub.c:3874
 dccp_feat_entry_destructor.part.4+0x48/0x60 net/dccp/feat.c:418
 dccp_feat_entry_destructor net/dccp/feat.c:416 [inline]
 dccp_feat_list_pop net/dccp/feat.c:541 [inline]
 dccp_feat_activate_values+0x57f/0xab0 net/dccp/feat.c:1543
 dccp_create_openreq_child+0x464/0x610 net/dccp/minisocks.c:121
 dccp_v6_request_recv_sock+0x1f6/0x1960 net/dccp/ipv6.c:457
 dccp_check_req+0x335/0x5a0 net/dccp/minisocks.c:186
 dccp_v6_rcv+0x69e/0x1d00 net/dccp/ipv6.c:711
 ip6_input_finish+0x46d/0x17a0 net/ipv6/ip6_input.c:279
 NF_HOOK include/linux/netfilter.h:257 [inline]
 ip6_input+0xdb/0x590 net/ipv6/ip6_input.c:322
 dst_input include/net/dst.h:507 [inline]
 ip6_rcv_finish+0x289/0x890 net/ipv6/ip6_input.c:69
 NF_HOOK include/linux/netfilter.h:257 [inline]
 ipv6_rcv+0x12ec/0x23d0 net/ipv6/ip6_input.c:203
 __netif_receive_skb_core+0x1ae5/0x3400 net/core/dev.c:4190
 __netif_receive_skb+0x2a/0x170 net/core/dev.c:4228
 process_backlog+0xe5/0x6c0 net/core/dev.c:4839
 napi_poll net/core/dev.c:5202 [inline]
 net_rx_action+0xe70/0x1900 net/core/dev.c:5267
 __do_softirq+0x2fb/0xb7d kernel/softirq.c:284
Memory state around the buggy address:
 ffff88003713bd00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
 ffff88003713bd80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
>ffff88003713be00: fc fc fc fc fc fc fc fc fc fc fb fb fb fb fb fb
                                                          ^

Fixes: 079096f103fa ("tcp/dccp: install syn_recv requests into ehash table")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Tested-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dccp.h |  1 +
 net/dccp/minisocks.c | 24 ++++++++++++++++--------
 2 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 61d042bbbf6072..68449293c4b623 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -163,6 +163,7 @@ struct dccp_request_sock {
 	__u64			 dreq_isr;
 	__u64			 dreq_gsr;
 	__be32			 dreq_service;
+	spinlock_t		 dreq_lock;
 	struct list_head	 dreq_featneg;
 	__u32			 dreq_timestamp_echo;
 	__u32			 dreq_timestamp_time;
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index e267e6f4c9a556..abd07a44321985 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -142,6 +142,13 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
 	struct dccp_request_sock *dreq = dccp_rsk(req);
 	bool own_req;
 
+	/* TCP/DCCP listeners became lockless.
+	 * DCCP stores complex state in its request_sock, so we need
+	 * a protection for them, now this code runs without being protected
+	 * by the parent (listener) lock.
+	 */
+	spin_lock_bh(&dreq->dreq_lock);
+
 	/* Check for retransmitted REQUEST */
 	if (dccp_hdr(skb)->dccph_type == DCCP_PKT_REQUEST) {
 
@@ -156,7 +163,7 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
 			inet_rtx_syn_ack(sk, req);
 		}
 		/* Network Duplicate, discard packet */
-		return NULL;
+		goto out;
 	}
 
 	DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR;
@@ -182,20 +189,20 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
 
 	child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL,
 							 req, &own_req);
-	if (!child)
-		goto listen_overflow;
-
-	return inet_csk_complete_hashdance(sk, child, req, own_req);
+	if (child) {
+		child = inet_csk_complete_hashdance(sk, child, req, own_req);
+		goto out;
+	}
 
-listen_overflow:
-	dccp_pr_debug("listen_overflow!\n");
 	DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY;
 drop:
 	if (dccp_hdr(skb)->dccph_type != DCCP_PKT_RESET)
 		req->rsk_ops->send_reset(sk, skb);
 
 	inet_csk_reqsk_queue_drop(sk, req);
-	return NULL;
+out:
+	spin_unlock_bh(&dreq->dreq_lock);
+	return child;
 }
 
 EXPORT_SYMBOL_GPL(dccp_check_req);
@@ -246,6 +253,7 @@ int dccp_reqsk_init(struct request_sock *req,
 {
 	struct dccp_request_sock *dreq = dccp_rsk(req);
 
+	spin_lock_init(&dreq->dreq_lock);
 	inet_rsk(req)->ir_rmt_port = dccp_hdr(skb)->dccph_sport;
 	inet_rsk(req)->ir_num	   = ntohs(dccp_hdr(skb)->dccph_dport);
 	inet_rsk(req)->acked	   = 0;

From 15e668070a64bb97f102ad9cf3bccbca0545cda8 Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Sun, 5 Mar 2017 12:34:53 -0800
Subject: [PATCH 0915/1911] ipv6: reorder icmpv6_init() and ip6_mr_init()

Andrey reported the following kernel crash:

kasan: GPF could be caused by NULL-ptr deref or user memory access
general protection fault: 0000 [#1] SMP KASAN
Dumping ftrace buffer:
   (ftrace buffer empty)
Modules linked in:
CPU: 0 PID: 14446 Comm: syz-executor6 Not tainted 4.10.0+ #82
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
task: ffff88001f311700 task.stack: ffff88001f6e8000
RIP: 0010:ip6mr_sk_done+0x15a/0x3d0 net/ipv6/ip6mr.c:1618
RSP: 0018:ffff88001f6ef418 EFLAGS: 00010202
RAX: dffffc0000000000 RBX: 1ffff10003edde8c RCX: ffffc900043ee000
RDX: 0000000000000004 RSI: ffffffff83e3b3f8 RDI: 0000000000000020
RBP: ffff88001f6ef508 R08: fffffbfff0dcc5d8 R09: 0000000000000000
R10: ffffffff86e62ec0 R11: 0000000000000000 R12: 0000000000000000
R13: 0000000000000000 R14: ffff88001f6ef4e0 R15: ffff8800380a0040
FS:  00007f7a52cec700(0000) GS:ffff88003ec00000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 000000000061c500 CR3: 000000001f1ae000 CR4: 00000000000006f0
DR0: 0000000020000000 DR1: 0000000020000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000600
Call Trace:
 rawv6_close+0x4c/0x80 net/ipv6/raw.c:1217
 inet_release+0xed/0x1c0 net/ipv4/af_inet.c:425
 inet6_release+0x50/0x70 net/ipv6/af_inet6.c:432
 sock_release+0x8d/0x1e0 net/socket.c:597
 __sock_create+0x39d/0x880 net/socket.c:1226
 sock_create_kern+0x3f/0x50 net/socket.c:1243
 inet_ctl_sock_create+0xbb/0x280 net/ipv4/af_inet.c:1526
 icmpv6_sk_init+0x163/0x500 net/ipv6/icmp.c:954
 ops_init+0x10a/0x550 net/core/net_namespace.c:115
 setup_net+0x261/0x660 net/core/net_namespace.c:291
 copy_net_ns+0x27e/0x540 net/core/net_namespace.c:396
9pnet_virtio: no channels available for device ./file1
 create_new_namespaces+0x437/0x9b0 kernel/nsproxy.c:106
 unshare_nsproxy_namespaces+0xae/0x1e0 kernel/nsproxy.c:205
 SYSC_unshare kernel/fork.c:2281 [inline]
 SyS_unshare+0x64e/0x1000 kernel/fork.c:2231
 entry_SYSCALL_64_fastpath+0x1f/0xc2

This is because net->ipv6.mr6_tables is not initialized at that point,
ip6mr_rules_init() is not called yet, therefore on the error path when
we iterator the list, we trigger this oops. Fix this by reordering
ip6mr_rules_init() before icmpv6_sk_init().

Reported-by: Andrey Konovalov <andreyknvl@google.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/af_inet6.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 04db40620ea65c..a9a9553ee63df8 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -920,12 +920,12 @@ static int __init inet6_init(void)
 	err = register_pernet_subsys(&inet6_net_ops);
 	if (err)
 		goto register_pernet_fail;
-	err = icmpv6_init();
-	if (err)
-		goto icmp_fail;
 	err = ip6_mr_init();
 	if (err)
 		goto ipmr_fail;
+	err = icmpv6_init();
+	if (err)
+		goto icmp_fail;
 	err = ndisc_init();
 	if (err)
 		goto ndisc_fail;
@@ -1061,10 +1061,10 @@ static int __init inet6_init(void)
 	ndisc_cleanup();
 ndisc_fail:
 	ip6_mr_cleanup();
-ipmr_fail:
-	icmpv6_cleanup();
 icmp_fail:
 	unregister_pernet_subsys(&inet6_net_ops);
+ipmr_fail:
+	icmpv6_cleanup();
 register_pernet_fail:
 	sock_unregister(PF_INET6);
 	rtnl_unregister_all(PF_INET6);

From 97ee351b50a49717543533cfb85b4bf9d88c9680 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Tue, 7 Mar 2017 16:14:49 +1100
Subject: [PATCH 0916/1911] powerpc/boot: Fix zImage TOC alignment

Recent toolchains force the TOC to be 256 byte aligned. We need to
enforce this alignment in the zImage linker script, otherwise pointers
to our TOC variables (__toc_start) could be incorrect. If the actual
start of the TOC and __toc_start don't have the same value we crash
early in the zImage wrapper.

Cc: stable@vger.kernel.org
Suggested-by: Alan Modra <amodra@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/boot/zImage.lds.S | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/boot/zImage.lds.S b/arch/powerpc/boot/zImage.lds.S
index 861e72109df2da..f080abfc2f83fb 100644
--- a/arch/powerpc/boot/zImage.lds.S
+++ b/arch/powerpc/boot/zImage.lds.S
@@ -68,6 +68,7 @@ SECTIONS
   }
 
 #ifdef CONFIG_PPC64_BOOT_WRAPPER
+  . = ALIGN(256);
   .got :
   {
     __toc_start = .;

From 3802a345321a08093ba2ddb1849e736f84e8d450 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 7 Mar 2017 16:45:58 -0800
Subject: [PATCH 0917/1911] xfs: only reclaim unwritten COW extents
 periodically

We only want to reclaim preallocations from our periodic work item.
Currently this is archived by looking for a dirty inode, but that check
is rather fragile.  Instead add a flag to xfs_reflink_cancel_cow_* so
that the caller can ask for just cancelling unwritten extents in the COW
fork.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
[darrick: fix typos in commit message]
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_aops.c    |  2 +-
 fs/xfs/xfs_icache.c  |  2 +-
 fs/xfs/xfs_inode.c   |  2 +-
 fs/xfs/xfs_reflink.c | 23 ++++++++++++++++-------
 fs/xfs/xfs_reflink.h |  4 ++--
 fs/xfs/xfs_super.c   |  2 +-
 6 files changed, 22 insertions(+), 13 deletions(-)

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index bf65a9ea864293..aa8a6f0d09c3b6 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -293,7 +293,7 @@ xfs_end_io(
 			goto done;
 		if (ioend->io_bio->bi_error) {
 			error = xfs_reflink_cancel_cow_range(ip,
-					ioend->io_offset, ioend->io_size);
+					ioend->io_offset, ioend->io_size, true);
 			goto done;
 		}
 		error = xfs_reflink_end_cow(ip, ioend->io_offset,
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 7234b9748c36e0..3531f8f72fa5e1 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -1608,7 +1608,7 @@ xfs_inode_free_cowblocks(
 	xfs_ilock(ip, XFS_IOLOCK_EXCL);
 	xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
 
-	ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF);
+	ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, false);
 
 	xfs_iunlock(ip, XFS_MMAPLOCK_EXCL);
 	xfs_iunlock(ip, XFS_IOLOCK_EXCL);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index edfa6a55b0646d..7eaf1ef74e3c63 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1615,7 +1615,7 @@ xfs_itruncate_extents(
 
 	/* Remove all pending CoW reservations. */
 	error = xfs_reflink_cancel_cow_blocks(ip, &tp, first_unmap_block,
-			last_block);
+			last_block, true);
 	if (error)
 		goto out;
 
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index da6d08fb359c8e..4a84c5ea266d8f 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -548,14 +548,18 @@ xfs_reflink_trim_irec_to_next_cow(
 }
 
 /*
- * Cancel all pending CoW reservations for some block range of an inode.
+ * Cancel CoW reservations for some block range of an inode.
+ *
+ * If cancel_real is true this function cancels all COW fork extents for the
+ * inode; if cancel_real is false, real extents are not cleared.
  */
 int
 xfs_reflink_cancel_cow_blocks(
 	struct xfs_inode		*ip,
 	struct xfs_trans		**tpp,
 	xfs_fileoff_t			offset_fsb,
-	xfs_fileoff_t			end_fsb)
+	xfs_fileoff_t			end_fsb,
+	bool				cancel_real)
 {
 	struct xfs_ifork		*ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
 	struct xfs_bmbt_irec		got, del;
@@ -579,7 +583,7 @@ xfs_reflink_cancel_cow_blocks(
 					&idx, &got, &del);
 			if (error)
 				break;
-		} else {
+		} else if (del.br_state == XFS_EXT_UNWRITTEN || cancel_real) {
 			xfs_trans_ijoin(*tpp, ip, 0);
 			xfs_defer_init(&dfops, &firstfsb);
 
@@ -621,13 +625,17 @@ xfs_reflink_cancel_cow_blocks(
 }
 
 /*
- * Cancel all pending CoW reservations for some byte range of an inode.
+ * Cancel CoW reservations for some byte range of an inode.
+ *
+ * If cancel_real is true this function cancels all COW fork extents for the
+ * inode; if cancel_real is false, real extents are not cleared.
  */
 int
 xfs_reflink_cancel_cow_range(
 	struct xfs_inode	*ip,
 	xfs_off_t		offset,
-	xfs_off_t		count)
+	xfs_off_t		count,
+	bool			cancel_real)
 {
 	struct xfs_trans	*tp;
 	xfs_fileoff_t		offset_fsb;
@@ -653,7 +661,8 @@ xfs_reflink_cancel_cow_range(
 	xfs_trans_ijoin(tp, ip, 0);
 
 	/* Scrape out the old CoW reservations */
-	error = xfs_reflink_cancel_cow_blocks(ip, &tp, offset_fsb, end_fsb);
+	error = xfs_reflink_cancel_cow_blocks(ip, &tp, offset_fsb, end_fsb,
+			cancel_real);
 	if (error)
 		goto out_cancel;
 
@@ -1450,7 +1459,7 @@ xfs_reflink_clear_inode_flag(
 	 * We didn't find any shared blocks so turn off the reflink flag.
 	 * First, get rid of any leftover CoW mappings.
 	 */
-	error = xfs_reflink_cancel_cow_blocks(ip, tpp, 0, NULLFILEOFF);
+	error = xfs_reflink_cancel_cow_blocks(ip, tpp, 0, NULLFILEOFF, true);
 	if (error)
 		return error;
 
diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h
index 33ac9b8db68380..d29a7967f0290e 100644
--- a/fs/xfs/xfs_reflink.h
+++ b/fs/xfs/xfs_reflink.h
@@ -39,9 +39,9 @@ extern void xfs_reflink_trim_irec_to_next_cow(struct xfs_inode *ip,
 
 extern int xfs_reflink_cancel_cow_blocks(struct xfs_inode *ip,
 		struct xfs_trans **tpp, xfs_fileoff_t offset_fsb,
-		xfs_fileoff_t end_fsb);
+		xfs_fileoff_t end_fsb, bool cancel_real);
 extern int xfs_reflink_cancel_cow_range(struct xfs_inode *ip, xfs_off_t offset,
-		xfs_off_t count);
+		xfs_off_t count, bool cancel_real);
 extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset,
 		xfs_off_t count);
 extern int xfs_reflink_recover_cow(struct xfs_mount *mp);
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 890862f2447c19..685c042a120f16 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -953,7 +953,7 @@ xfs_fs_destroy_inode(
 	XFS_STATS_INC(ip->i_mount, vn_remove);
 
 	if (xfs_is_reflink_inode(ip)) {
-		error = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF);
+		error = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, true);
 		if (error && !XFS_FORCED_SHUTDOWN(ip->i_mount))
 			xfs_warn(ip->i_mount,
 "Error %d while evicting CoW blocks for inode %llu.",

From f1c635b439a5c01776fe3a25b1e2dc546ea82e6f Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen@networkplumber.org>
Date: Tue, 7 Mar 2017 09:15:53 -0800
Subject: [PATCH 0918/1911] scsi: storvsc: Workaround for virtual DVD SCSI
 version

Hyper-V host emulation of SCSI for virtual DVD device reports SCSI
version 0 (UNKNOWN) but is still capable of supporting REPORTLUN.

Without this patch, a GEN2 Linux guest on Hyper-V will not boot 4.11
successfully with virtual DVD ROM device. What happens is that the SCSI
scan process falls back to doing sequential probing by INQUIRY.  But the
storvsc driver has a previous workaround that masks/blocks all errors
reports from INQUIRY (or MODE_SENSE) commands.  This workaround causes
the scan to then populate a full set of bogus LUN's on the target and
then sends kernel spinning off into a death spiral doing block reads on
the non-existent LUNs.

By setting the correct blacklist flags, the target with the DVD device
is scanned with REPORTLUN and that works correctly.

Patch needs to go in current 4.11, it is safe but not necessary in older
kernels.

Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
Reviewed-by: K. Y. Srinivasan <kys@microsoft.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/storvsc_drv.c | 27 +++++++++++++++++----------
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index 585e54f6512cdd..f555174f2cb782 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -400,8 +400,6 @@ MODULE_PARM_DESC(storvsc_vcpus_per_sub_channel, "Ratio of VCPUs to subchannels")
  */
 static int storvsc_timeout = 180;
 
-static int msft_blist_flags = BLIST_TRY_VPD_PAGES;
-
 #if IS_ENABLED(CONFIG_SCSI_FC_ATTRS)
 static struct scsi_transport_template *fc_transport_template;
 #endif
@@ -1383,6 +1381,22 @@ static int storvsc_do_io(struct hv_device *device,
 	return ret;
 }
 
+static int storvsc_device_alloc(struct scsi_device *sdevice)
+{
+	/*
+	 * Set blist flag to permit the reading of the VPD pages even when
+	 * the target may claim SPC-2 compliance. MSFT targets currently
+	 * claim SPC-2 compliance while they implement post SPC-2 features.
+	 * With this flag we can correctly handle WRITE_SAME_16 issues.
+	 *
+	 * Hypervisor reports SCSI_UNKNOWN type for DVD ROM device but
+	 * still supports REPORT LUN.
+	 */
+	sdevice->sdev_bflags = BLIST_REPORTLUN2 | BLIST_TRY_VPD_PAGES;
+
+	return 0;
+}
+
 static int storvsc_device_configure(struct scsi_device *sdevice)
 {
 
@@ -1395,14 +1409,6 @@ static int storvsc_device_configure(struct scsi_device *sdevice)
 
 	sdevice->no_write_same = 1;
 
-	/*
-	 * Add blist flags to permit the reading of the VPD pages even when
-	 * the target may claim SPC-2 compliance. MSFT targets currently
-	 * claim SPC-2 compliance while they implement post SPC-2 features.
-	 * With this patch we can correctly handle WRITE_SAME_16 issues.
-	 */
-	sdevice->sdev_bflags |= msft_blist_flags;
-
 	/*
 	 * If the host is WIN8 or WIN8 R2, claim conformance to SPC-3
 	 * if the device is a MSFT virtual device.  If the host is
@@ -1661,6 +1667,7 @@ static struct scsi_host_template scsi_driver = {
 	.eh_host_reset_handler =	storvsc_host_reset_handler,
 	.proc_name =		"storvsc_host",
 	.eh_timed_out =		storvsc_eh_timed_out,
+	.slave_alloc =		storvsc_device_alloc,
 	.slave_configure =	storvsc_device_configure,
 	.cmd_per_lun =		255,
 	.this_id =		-1,

From 85e8a23936ab3442de0c42da97d53b29f004ece1 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Mon, 13 Feb 2017 08:49:20 +1100
Subject: [PATCH 0919/1911] scsi: lpfc: Add shutdown method for kexec

We see lpfc devices regularly fail during kexec. Fix this by adding a
shutdown method which mirrors the remove method.

Cc: <stable@vger.kernel.org>
Signed-off-by: Anton Blanchard <anton@samba.org>
Reviewed-by: Mauricio Faria de Oliveira <mauricfo@linux.vnet.ibm.com>
Tested-by: Mauricio Faria de Oliveira <mauricfo@linux.vnet.ibm.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_init.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 661bd25a404a2f..2697d49da4d776 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -12059,6 +12059,7 @@ static struct pci_driver lpfc_driver = {
 	.id_table	= lpfc_id_table,
 	.probe		= lpfc_pci_probe_one,
 	.remove		= lpfc_pci_remove_one,
+	.shutdown	= lpfc_pci_remove_one,
 	.suspend        = lpfc_pci_suspend_one,
 	.resume		= lpfc_pci_resume_one,
 	.err_handler    = &lpfc_err_handler,

From 627c845c0907894a1e5cd2d90ff4fc86c9e4458e Mon Sep 17 00:00:00 2001
From: Tina Zhang <tina.zhang@intel.com>
Date: Tue, 7 Mar 2017 04:08:34 -0500
Subject: [PATCH 0920/1911] drm/i915/gvt: change some gvt_err to gvt_dbg_cmd

gvt_err should be used for dumping error message. This patch changes
some gvt_err to gvt_dbg_cmd, as they are only debugging message, not
errors.

Signed-off-by: Tina Zhang <tina.zhang@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/cmd_parser.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
index 7bb11a555b767f..f53cab604d498d 100644
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
@@ -669,7 +669,7 @@ static inline void print_opcode(u32 cmd, int ring_id)
 	if (d_info == NULL)
 		return;
 
-	gvt_err("opcode=0x%x %s sub_ops:",
+	gvt_dbg_cmd("opcode=0x%x %s sub_ops:",
 			cmd >> (32 - d_info->op_len), d_info->name);
 
 	for (i = 0; i < d_info->nr_sub_op; i++)
@@ -694,23 +694,23 @@ static void parser_exec_state_dump(struct parser_exec_state *s)
 	int cnt = 0;
 	int i;
 
-	gvt_err("  vgpu%d RING%d: ring_start(%08lx) ring_end(%08lx)"
+	gvt_dbg_cmd("  vgpu%d RING%d: ring_start(%08lx) ring_end(%08lx)"
 			" ring_head(%08lx) ring_tail(%08lx)\n", s->vgpu->id,
 			s->ring_id, s->ring_start, s->ring_start + s->ring_size,
 			s->ring_head, s->ring_tail);
 
-	gvt_err("  %s %s ip_gma(%08lx) ",
+	gvt_dbg_cmd("  %s %s ip_gma(%08lx) ",
 			s->buf_type == RING_BUFFER_INSTRUCTION ?
 			"RING_BUFFER" : "BATCH_BUFFER",
 			s->buf_addr_type == GTT_BUFFER ?
 			"GTT" : "PPGTT", s->ip_gma);
 
 	if (s->ip_va == NULL) {
-		gvt_err(" ip_va(NULL)");
+		gvt_dbg_cmd(" ip_va(NULL)");
 		return;
 	}
 
-	gvt_err("  ip_va=%p: %08x %08x %08x %08x\n",
+	gvt_dbg_cmd("  ip_va=%p: %08x %08x %08x %08x\n",
 			s->ip_va, cmd_val(s, 0), cmd_val(s, 1),
 			cmd_val(s, 2), cmd_val(s, 3));
 

From 787eb485509f9d58962bd8b4dbc6a5ac6e2034fe Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 2 Mar 2017 15:02:51 -0800
Subject: [PATCH 0921/1911] xfs: fix and streamline error handling in
 xfs_end_io

There are two different cases of buffered I/O errors:

 - first we can have an already shutdown fs.  In that case we should skip
   any on-disk operations and just clean up the appen transaction if
   present and destroy the ioend
 - a real I/O error.  In that case we should cleanup any lingering COW
   blocks.  This gets skipped in the current code and is fixed by this
   patch.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_aops.c | 59 ++++++++++++++++++++++-------------------------
 1 file changed, 27 insertions(+), 32 deletions(-)

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index aa8a6f0d09c3b6..61494295d92fe1 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -274,54 +274,49 @@ xfs_end_io(
 	struct xfs_ioend	*ioend =
 		container_of(work, struct xfs_ioend, io_work);
 	struct xfs_inode	*ip = XFS_I(ioend->io_inode);
+	xfs_off_t		offset = ioend->io_offset;
+	size_t			size = ioend->io_size;
 	int			error = ioend->io_bio->bi_error;
 
 	/*
-	 * Set an error if the mount has shut down and proceed with end I/O
-	 * processing so it can perform whatever cleanups are necessary.
+	 * Just clean up the in-memory strutures if the fs has been shut down.
 	 */
-	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+	if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
 		error = -EIO;
+		goto done;
+	}
 
 	/*
-	 * For a CoW extent, we need to move the mapping from the CoW fork
-	 * to the data fork.  If instead an error happened, just dump the
-	 * new blocks.
+	 * Clean up any COW blocks on an I/O error.
 	 */
-	if (ioend->io_type == XFS_IO_COW) {
-		if (error)
-			goto done;
-		if (ioend->io_bio->bi_error) {
-			error = xfs_reflink_cancel_cow_range(ip,
-					ioend->io_offset, ioend->io_size, true);
-			goto done;
+	if (unlikely(error)) {
+		switch (ioend->io_type) {
+		case XFS_IO_COW:
+			xfs_reflink_cancel_cow_range(ip, offset, size, true);
+			break;
 		}
-		error = xfs_reflink_end_cow(ip, ioend->io_offset,
-				ioend->io_size);
-		if (error)
-			goto done;
+
+		goto done;
 	}
 
 	/*
-	 * For unwritten extents we need to issue transactions to convert a
-	 * range to normal written extens after the data I/O has finished.
-	 * Detecting and handling completion IO errors is done individually
-	 * for each case as different cleanup operations need to be performed
-	 * on error.
+	 * Success:  commit the COW or unwritten blocks if needed.
 	 */
-	if (ioend->io_type == XFS_IO_UNWRITTEN) {
-		if (error)
-			goto done;
-		error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
-						  ioend->io_size);
-	} else if (ioend->io_append_trans) {
-		error = xfs_setfilesize_ioend(ioend, error);
-	} else {
-		ASSERT(!xfs_ioend_is_append(ioend) ||
-		       ioend->io_type == XFS_IO_COW);
+	switch (ioend->io_type) {
+	case XFS_IO_COW:
+		error = xfs_reflink_end_cow(ip, offset, size);
+		break;
+	case XFS_IO_UNWRITTEN:
+		error = xfs_iomap_write_unwritten(ip, offset, size);
+		break;
+	default:
+		ASSERT(!xfs_ioend_is_append(ioend) || ioend->io_append_trans);
+		break;
 	}
 
 done:
+	if (ioend->io_append_trans)
+		error = xfs_setfilesize_ioend(ioend, error);
 	xfs_destroy_ioend(ioend, error);
 }
 

From d5825712ee98d68a2c17bc89dad2c30276894cba Mon Sep 17 00:00:00 2001
From: Chandan Rajendra <chandan@linux.vnet.ibm.com>
Date: Thu, 2 Mar 2017 15:06:33 -0800
Subject: [PATCH 0922/1911] xfs: Use xfs_icluster_size_fsb() to calculate inode
 alignment mask

When block size is larger than inode cluster size, the call to
XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size) returns 0. Also, mkfs.xfs
would have set xfs_sb->sb_inoalignmt to 0. Hence in
xfs_set_inoalignment(), xfs_mount->m_inoalign_mask gets initialized to
-1 instead of 0. However, xfs_mount->m_sinoalign would get correctly
intialized to 0 because for every positive value of xfs_mount->m_dalign,
the condition "!(mp->m_dalign & mp->m_inoalign_mask)" would evaluate to
false.

Also, xfs_imap() worked fine even with xfs_mount->m_inoalign_mask having
-1 as the value because blks_per_cluster variable would have the value 1
and hence we would never have a need to use xfs_mount->m_inoalign_mask
to compute the inode chunk's agbno and offset within the chunk.

Signed-off-by: Chandan Rajendra <chandan@linux.vnet.ibm.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_mount.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 450bde68bb7528..688ebff1f66384 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -513,8 +513,7 @@ STATIC void
 xfs_set_inoalignment(xfs_mount_t *mp)
 {
 	if (xfs_sb_version_hasalign(&mp->m_sb) &&
-	    mp->m_sb.sb_inoalignmt >=
-	    XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size))
+		mp->m_sb.sb_inoalignmt >= xfs_icluster_size_fsb(mp))
 		mp->m_inoalign_mask = mp->m_sb.sb_inoalignmt - 1;
 	else
 		mp->m_inoalign_mask = 0;

From 08b005f1333154ae5b404ca28766e0ffb9f1c150 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Mon, 6 Mar 2017 11:58:20 -0800
Subject: [PATCH 0923/1911] xfs: remove kmem_zalloc_greedy

The sole remaining caller of kmem_zalloc_greedy is bulkstat, which uses
it to grab 1-4 pages for staging of inobt records.  The infinite loop in
the greedy allocation function is causing hangs[1] in generic/269, so
just get rid of the greedy allocator in favor of kmem_zalloc_large.
This makes bulkstat somewhat more likely to ENOMEM if there's really no
pages to spare, but eliminates a source of hangs.

[1] http://lkml.kernel.org/r/20170301044634.rgidgdqqiiwsmfpj%40XZHOUW.usersys.redhat.com

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
v2: remove single-page fallback
---
 fs/xfs/kmem.c       | 18 ------------------
 fs/xfs/kmem.h       |  2 --
 fs/xfs/xfs_itable.c |  6 ++----
 3 files changed, 2 insertions(+), 24 deletions(-)

diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c
index 2dfdc62f795e63..70a5b55e0870a0 100644
--- a/fs/xfs/kmem.c
+++ b/fs/xfs/kmem.c
@@ -25,24 +25,6 @@
 #include "kmem.h"
 #include "xfs_message.h"
 
-/*
- * Greedy allocation.  May fail and may return vmalloced memory.
- */
-void *
-kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize)
-{
-	void		*ptr;
-	size_t		kmsize = maxsize;
-
-	while (!(ptr = vzalloc(kmsize))) {
-		if ((kmsize >>= 1) <= minsize)
-			kmsize = minsize;
-	}
-	if (ptr)
-		*size = kmsize;
-	return ptr;
-}
-
 void *
 kmem_alloc(size_t size, xfs_km_flags_t flags)
 {
diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h
index 689f746224e7ab..f0fc84fcaac255 100644
--- a/fs/xfs/kmem.h
+++ b/fs/xfs/kmem.h
@@ -69,8 +69,6 @@ static inline void  kmem_free(const void *ptr)
 }
 
 
-extern void *kmem_zalloc_greedy(size_t *, size_t, size_t);
-
 static inline void *
 kmem_zalloc(size_t size, xfs_km_flags_t flags)
 {
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 66e881790c1710..2a6d9b1558e00d 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -361,7 +361,6 @@ xfs_bulkstat(
 	xfs_agino_t		agino;	/* inode # in allocation group */
 	xfs_agnumber_t		agno;	/* allocation group number */
 	xfs_btree_cur_t		*cur;	/* btree cursor for ialloc btree */
-	size_t			irbsize; /* size of irec buffer in bytes */
 	xfs_inobt_rec_incore_t	*irbuf;	/* start of irec buffer */
 	int			nirbuf;	/* size of irbuf */
 	int			ubcount; /* size of user's buffer */
@@ -388,11 +387,10 @@ xfs_bulkstat(
 	*ubcountp = 0;
 	*done = 0;
 
-	irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4);
+	irbuf = kmem_zalloc_large(PAGE_SIZE * 4, KM_SLEEP);
 	if (!irbuf)
 		return -ENOMEM;
-
-	nirbuf = irbsize / sizeof(*irbuf);
+	nirbuf = (PAGE_SIZE * 4) / sizeof(*irbuf);
 
 	/*
 	 * Loop over the allocation groups, starting from the last

From aa2be9b3d6d2d699e9ca7cbfc00867c80e5da213 Mon Sep 17 00:00:00 2001
From: Daniel Axtens <dja@axtens.net>
Date: Fri, 3 Mar 2017 17:56:55 +1100
Subject: [PATCH 0924/1911] crypto: powerpc - Fix initialisation of crc32c
 context

Turning on crypto self-tests on a POWER8 shows:

    alg: hash: Test 1 failed for crc32c-vpmsum
    00000000: ff ff ff ff

Comparing the code with the Intel CRC32c implementation on which
ours is based shows that we are doing an init with 0, not ~0
as CRC32c requires.

This probably wasn't caught because btrfs does its own weird
open-coded initialisation.

Initialise our internal context to ~0 on init.

This makes the self-tests pass, and btrfs continues to work.

Fixes: 6dd7a82cc54e ("crypto: powerpc - Add POWER8 optimised crc32c")
Cc: Anton Blanchard <anton@samba.org>
Cc: stable@vger.kernel.org
Signed-off-by: Daniel Axtens <dja@axtens.net>
Acked-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/powerpc/crypto/crc32c-vpmsum_glue.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/crypto/crc32c-vpmsum_glue.c b/arch/powerpc/crypto/crc32c-vpmsum_glue.c
index 9fa046d56ebadd..411994551afc13 100644
--- a/arch/powerpc/crypto/crc32c-vpmsum_glue.c
+++ b/arch/powerpc/crypto/crc32c-vpmsum_glue.c
@@ -52,7 +52,7 @@ static int crc32c_vpmsum_cra_init(struct crypto_tfm *tfm)
 {
 	u32 *key = crypto_tfm_ctx(tfm);
 
-	*key = 0;
+	*key = ~0;
 
 	return 0;
 }

From 07de4bc88ce6a4d898cad9aa4c99c1df7e87702d Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk@kernel.org>
Date: Sun, 5 Mar 2017 19:14:07 +0200
Subject: [PATCH 0925/1911] crypto: s5p-sss - Fix completing crypto request in
 IRQ handler

In a regular interrupt handler driver was finishing the crypt/decrypt
request by calling complete on crypto request.  This is disallowed since
converting to skcipher in commit b286d8b1a690 ("crypto: skcipher - Add
skcipher walk interface") and causes a warning:
	WARNING: CPU: 0 PID: 0 at crypto/skcipher.c:430 skcipher_walk_first+0x13c/0x14c

The interrupt is marked shared but in fact there are no other users
sharing it.  Thus the simplest solution seems to be to just use a
threaded interrupt handler, after converting it to oneshot.

Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/s5p-sss.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/s5p-sss.c b/drivers/crypto/s5p-sss.c
index dce1af0ce85ce8..a668286d62cb17 100644
--- a/drivers/crypto/s5p-sss.c
+++ b/drivers/crypto/s5p-sss.c
@@ -805,8 +805,9 @@ static int s5p_aes_probe(struct platform_device *pdev)
 		dev_warn(dev, "feed control interrupt is not available.\n");
 		goto err_irq;
 	}
-	err = devm_request_irq(dev, pdata->irq_fc, s5p_aes_interrupt,
-			       IRQF_SHARED, pdev->name, pdev);
+	err = devm_request_threaded_irq(dev, pdata->irq_fc, NULL,
+					s5p_aes_interrupt, IRQF_ONESHOT,
+					pdev->name, pdev);
 	if (err < 0) {
 		dev_warn(dev, "feed control interrupt is not available.\n");
 		goto err_irq;

From 45c2fdde01299b02a6e3225e848598a3c1e55539 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Tue, 7 Mar 2017 15:14:46 +0100
Subject: [PATCH 0926/1911] hwrng: omap - write registers after enabling the
 clock

Commit 383212425c926 ("hwrng: omap - Add device variant for SafeXcel
IP-76 found in Armada 8K") added support for the SafeXcel IP-76 variant
of the IP. This modification included getting a reference and enabling a
clock. Unfortunately, this was done *after* writing to the
RNG_INTMASK_REG register. This generally works fine when the driver is
built-in because the clock might have been left enabled by the
bootloader, but fails short when the driver is built as a module: it
causes a system hang because a register is being accessed while the
clock is not enabled.

This commit fixes that by making the register access *after* enabling
the clock.

This issue was found by the kernelci.org testing effort.

Fixes: 383212425c926 ("hwrng: omap - Add device variant for SafeXcel IP-76 found in Armada 8K")
Cc: <stable@vger.kernel.org>
Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/omap-rng.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/char/hw_random/omap-rng.c b/drivers/char/hw_random/omap-rng.c
index 3ad86fdf954e96..efa3747c175099 100644
--- a/drivers/char/hw_random/omap-rng.c
+++ b/drivers/char/hw_random/omap-rng.c
@@ -397,7 +397,6 @@ static int of_get_omap_rng_device_details(struct omap_rng_dev *priv,
 				irq, err);
 			return err;
 		}
-		omap_rng_write(priv, RNG_INTMASK_REG, RNG_SHUTDOWN_OFLO_MASK);
 
 		priv->clk = of_clk_get(pdev->dev.of_node, 0);
 		if (IS_ERR(priv->clk) && PTR_ERR(priv->clk) == -EPROBE_DEFER)
@@ -408,6 +407,8 @@ static int of_get_omap_rng_device_details(struct omap_rng_dev *priv,
 				dev_err(&pdev->dev, "unable to enable the clk, "
 						    "err = %d\n", err);
 		}
+
+		omap_rng_write(priv, RNG_INTMASK_REG, RNG_SHUTDOWN_OFLO_MASK);
 	}
 	return 0;
 }

From 761c2510283066324cab7859930777ee34cbca78 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Tue, 7 Mar 2017 15:14:47 +0100
Subject: [PATCH 0927/1911] hwrng: omap - use devm_clk_get() instead of
 of_clk_get()

The omap-rng driver currently uses of_clk_get() to get a reference to
the clock, but never releases that reference. This commit fixes that by
using devm_clk_get() instead.

Fixes: 383212425c926 ("hwrng: omap - Add device variant for SafeXcel IP-76 found in Armada 8K")
Cc: <stable@vger.kernel.org>
Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/omap-rng.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/char/hw_random/omap-rng.c b/drivers/char/hw_random/omap-rng.c
index efa3747c175099..d2866280f13092 100644
--- a/drivers/char/hw_random/omap-rng.c
+++ b/drivers/char/hw_random/omap-rng.c
@@ -398,7 +398,7 @@ static int of_get_omap_rng_device_details(struct omap_rng_dev *priv,
 			return err;
 		}
 
-		priv->clk = of_clk_get(pdev->dev.of_node, 0);
+		priv->clk = devm_clk_get(&pdev->dev, NULL);
 		if (IS_ERR(priv->clk) && PTR_ERR(priv->clk) == -EPROBE_DEFER)
 			return -EPROBE_DEFER;
 		if (!IS_ERR(priv->clk)) {

From b985735be7afea3a5e0570ce2ea0b662c0e12e19 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Tue, 7 Mar 2017 15:14:48 +0100
Subject: [PATCH 0928/1911] hwrng: omap - Do not access INTMASK_REG on EIP76

The INTMASK_REG register does not exist on EIP76. Due to this, the call:

   omap_rng_write(priv, RNG_INTMASK_REG, RNG_SHUTDOWN_OFLO_MASK);

ends up, through the reg_map_eip76[] array, in accessing the register at
offset 0, which is the RNG_OUTPUT_0_REG. This by itself doesn't cause
any problem, but clearly doesn't enable the interrupt as it was
expected.

On EIP76, the register that allows to enable the interrupt is
RNG_CONTROL_REG. And just like RNG_INTMASK_REG, it's bit 1 of this
register that allows to enable the shutdown_oflo interrupt.

Fixes: 383212425c926 ("hwrng: omap - Add device variant for SafeXcel IP-76 found in Armada 8K")
Cc: <stable@vger.kernel.org>
Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/omap-rng.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/char/hw_random/omap-rng.c b/drivers/char/hw_random/omap-rng.c
index d2866280f13092..b1ad12552b566a 100644
--- a/drivers/char/hw_random/omap-rng.c
+++ b/drivers/char/hw_random/omap-rng.c
@@ -408,7 +408,18 @@ static int of_get_omap_rng_device_details(struct omap_rng_dev *priv,
 						    "err = %d\n", err);
 		}
 
-		omap_rng_write(priv, RNG_INTMASK_REG, RNG_SHUTDOWN_OFLO_MASK);
+		/*
+		 * On OMAP4, enabling the shutdown_oflo interrupt is
+		 * done in the interrupt mask register. There is no
+		 * such register on EIP76, and it's enabled by the
+		 * same bit in the control register
+		 */
+		if (priv->pdata->regs[RNG_INTMASK_REG])
+			omap_rng_write(priv, RNG_INTMASK_REG,
+				       RNG_SHUTDOWN_OFLO_MASK);
+		else
+			omap_rng_write(priv, RNG_CONTROL_REG,
+				       RNG_SHUTDOWN_OFLO_MASK);
 	}
 	return 0;
 }

From a04e54f2c35823ca32d56afcd5cea5b783e2f51a Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Thu, 3 Nov 2016 23:06:53 -0700
Subject: [PATCH 0929/1911] target/pscsi: Fix TYPE_TAPE + TYPE_MEDIMUM_CHANGER
 export

The following fixes a divide by zero OOPs with TYPE_TAPE
due to pscsi_tape_read_blocksize() failing causing a zero
sd->sector_size being propigated up via dev_attrib.hw_block_size.

It also fixes another long-standing bug where TYPE_TAPE and
TYPE_MEDIMUM_CHANGER where using pscsi_create_type_other(),
which does not call scsi_device_get() to take the device
reference.  Instead, rename pscsi_create_type_rom() to
pscsi_create_type_nondisk() and use it for all cases.

Finally, also drop a dump_stack() in pscsi_get_blocks() for
non TYPE_DISK, which in modern target-core can get invoked
via target_sense_desc_format() during CHECK_CONDITION.

Reported-by: Malcolm Haak <insanemal@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_pscsi.c | 47 ++++++++----------------------
 1 file changed, 12 insertions(+), 35 deletions(-)

diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c
index a8f8e53f2f5748..44d92f23a3f054 100644
--- a/drivers/target/target_core_pscsi.c
+++ b/drivers/target/target_core_pscsi.c
@@ -154,7 +154,7 @@ static void pscsi_tape_read_blocksize(struct se_device *dev,
 
 	buf = kzalloc(12, GFP_KERNEL);
 	if (!buf)
-		return;
+		goto out_free;
 
 	memset(cdb, 0, MAX_COMMAND_SIZE);
 	cdb[0] = MODE_SENSE;
@@ -169,9 +169,10 @@ static void pscsi_tape_read_blocksize(struct se_device *dev,
 	 * If MODE_SENSE still returns zero, set the default value to 1024.
 	 */
 	sdev->sector_size = (buf[9] << 16) | (buf[10] << 8) | (buf[11]);
+out_free:
 	if (!sdev->sector_size)
 		sdev->sector_size = 1024;
-out_free:
+
 	kfree(buf);
 }
 
@@ -314,9 +315,10 @@ static int pscsi_add_device_to_list(struct se_device *dev,
 				sd->lun, sd->queue_depth);
 	}
 
-	dev->dev_attrib.hw_block_size = sd->sector_size;
+	dev->dev_attrib.hw_block_size =
+		min_not_zero((int)sd->sector_size, 512);
 	dev->dev_attrib.hw_max_sectors =
-		min_t(int, sd->host->max_sectors, queue_max_hw_sectors(q));
+		min_not_zero(sd->host->max_sectors, queue_max_hw_sectors(q));
 	dev->dev_attrib.hw_queue_depth = sd->queue_depth;
 
 	/*
@@ -339,8 +341,10 @@ static int pscsi_add_device_to_list(struct se_device *dev,
 	/*
 	 * For TYPE_TAPE, attempt to determine blocksize with MODE_SENSE.
 	 */
-	if (sd->type == TYPE_TAPE)
+	if (sd->type == TYPE_TAPE) {
 		pscsi_tape_read_blocksize(dev, sd);
+		dev->dev_attrib.hw_block_size = sd->sector_size;
+	}
 	return 0;
 }
 
@@ -406,7 +410,7 @@ static int pscsi_create_type_disk(struct se_device *dev, struct scsi_device *sd)
 /*
  * Called with struct Scsi_Host->host_lock called.
  */
-static int pscsi_create_type_rom(struct se_device *dev, struct scsi_device *sd)
+static int pscsi_create_type_nondisk(struct se_device *dev, struct scsi_device *sd)
 	__releases(sh->host_lock)
 {
 	struct pscsi_hba_virt *phv = dev->se_hba->hba_ptr;
@@ -433,28 +437,6 @@ static int pscsi_create_type_rom(struct se_device *dev, struct scsi_device *sd)
 	return 0;
 }
 
-/*
- * Called with struct Scsi_Host->host_lock called.
- */
-static int pscsi_create_type_other(struct se_device *dev,
-		struct scsi_device *sd)
-	__releases(sh->host_lock)
-{
-	struct pscsi_hba_virt *phv = dev->se_hba->hba_ptr;
-	struct Scsi_Host *sh = sd->host;
-	int ret;
-
-	spin_unlock_irq(sh->host_lock);
-	ret = pscsi_add_device_to_list(dev, sd);
-	if (ret)
-		return ret;
-
-	pr_debug("CORE_PSCSI[%d] - Added Type: %s for %d:%d:%d:%llu\n",
-		phv->phv_host_id, scsi_device_type(sd->type), sh->host_no,
-		sd->channel, sd->id, sd->lun);
-	return 0;
-}
-
 static int pscsi_configure_device(struct se_device *dev)
 {
 	struct se_hba *hba = dev->se_hba;
@@ -542,11 +524,8 @@ static int pscsi_configure_device(struct se_device *dev)
 		case TYPE_DISK:
 			ret = pscsi_create_type_disk(dev, sd);
 			break;
-		case TYPE_ROM:
-			ret = pscsi_create_type_rom(dev, sd);
-			break;
 		default:
-			ret = pscsi_create_type_other(dev, sd);
+			ret = pscsi_create_type_nondisk(dev, sd);
 			break;
 		}
 
@@ -611,8 +590,7 @@ static void pscsi_free_device(struct se_device *dev)
 		else if (pdv->pdv_lld_host)
 			scsi_host_put(pdv->pdv_lld_host);
 
-		if ((sd->type == TYPE_DISK) || (sd->type == TYPE_ROM))
-			scsi_device_put(sd);
+		scsi_device_put(sd);
 
 		pdv->pdv_sd = NULL;
 	}
@@ -1064,7 +1042,6 @@ static sector_t pscsi_get_blocks(struct se_device *dev)
 	if (pdv->pdv_bd && pdv->pdv_bd->bd_part)
 		return pdv->pdv_bd->bd_part->nr_sects;
 
-	dump_stack();
 	return 0;
 }
 

From 13603685c1f12c67a7a2427f00b63f39a2b6f7c9 Mon Sep 17 00:00:00 2001
From: Max Lohrmann <post@wickenrode.com>
Date: Tue, 7 Mar 2017 22:09:56 -0800
Subject: [PATCH 0930/1911] target: Fix VERIFY_16 handling in sbc_parse_cdb

As reported by Max, the Windows 2008 R2 chkdsk utility expects
VERIFY_16 to be supported, and does not handle the returned
CHECK_CONDITION properly, resulting in an infinite loop.

The kernel will log huge amounts of this error:

kernel: TARGET_CORE[iSCSI]: Unsupported SCSI Opcode 0x8f, sending
CHECK_CONDITION.

Signed-off-by: Max Lohrmann <post@wickenrode.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_sbc.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c
index 68d8aef7ab78d4..c194063f169b13 100644
--- a/drivers/target/target_core_sbc.c
+++ b/drivers/target/target_core_sbc.c
@@ -1105,9 +1105,15 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops)
 			return ret;
 		break;
 	case VERIFY:
+	case VERIFY_16:
 		size = 0;
-		sectors = transport_get_sectors_10(cdb);
-		cmd->t_task_lba = transport_lba_32(cdb);
+		if (cdb[0] == VERIFY) {
+			sectors = transport_get_sectors_10(cdb);
+			cmd->t_task_lba = transport_lba_32(cdb);
+		} else {
+			sectors = transport_get_sectors_16(cdb);
+			cmd->t_task_lba = transport_lba_64(cdb);
+		}
 		cmd->execute_cmd = sbc_emulate_noop;
 		goto check_lba;
 	case REZERO_UNIT:

From 69eb1596b4df8ca834ba6f9a3df40f78943f6dca Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 17 Feb 2017 15:32:23 +0100
Subject: [PATCH 0931/1911] staging: octeon: remove unused variable

A cleanup patch left one local variable without a reference:

drivers/staging/octeon/ethernet-rx.c:339:28: warning: unused variable 'priv' [-Wunused-variable]

This removes the declaration too.

Fixes: 66812da3a689 ("staging: octeon: Use net_device_stats from struct net_device")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/octeon/ethernet-rx.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/staging/octeon/ethernet-rx.c b/drivers/staging/octeon/ethernet-rx.c
index 7f8cf875157c60..65a2856319948e 100644
--- a/drivers/staging/octeon/ethernet-rx.c
+++ b/drivers/staging/octeon/ethernet-rx.c
@@ -336,7 +336,6 @@ static int cvm_oct_poll(struct oct_rx_group *rx_group, int budget)
 		if (likely((port < TOTAL_NUMBER_OF_PORTS) &&
 			   cvm_oct_device[port])) {
 			struct net_device *dev = cvm_oct_device[port];
-			struct octeon_ethernet *priv = netdev_priv(dev);
 
 			/*
 			 * Only accept packets for devices that are

From fc69910f329d61821897871e0e957eda39beb3d8 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 8 Mar 2017 08:29:31 +0100
Subject: [PATCH 0932/1911] MIPS: Add missing include files

After the split of linux/sched.h, several platforms in arch/mips stopped building.

Add the respective additional #include statements to fix the problem I first
tried adding these into asm/processor.h, but ran into circular header
dependencies with that which I could not figure out.

The commit I listed as causing the problem is the branch merge, as there is
likely a combination of multiple patches in that branch.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-mips@linux-mips.org
Cc: ralf@linux-mips.org
Fixes: 1827adb11ad2 ("Merge branch 'WIP.sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip")
Link: http://lkml.kernel.org/r/20170308072931.3836696-1-arnd@arndb.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/mips/cavium-octeon/cpu.c                  | 2 ++
 arch/mips/cavium-octeon/crypto/octeon-crypto.c | 1 +
 arch/mips/cavium-octeon/smp.c                  | 1 +
 arch/mips/include/asm/fpu.h                    | 1 +
 arch/mips/kernel/smp-bmips.c                   | 1 +
 arch/mips/kernel/smp-mt.c                      | 1 +
 arch/mips/loongson64/loongson-3/cop2-ex.c      | 1 +
 arch/mips/netlogic/common/smp.c                | 1 +
 arch/mips/netlogic/xlp/cop2-ex.c               | 3 +++
 arch/mips/sgi-ip22/ip28-berr.c                 | 1 +
 arch/mips/sgi-ip27/ip27-berr.c                 | 2 ++
 arch/mips/sgi-ip27/ip27-smp.c                  | 3 +++
 arch/mips/sgi-ip32/ip32-berr.c                 | 1 +
 arch/mips/sgi-ip32/ip32-reset.c                | 1 +
 14 files changed, 20 insertions(+)

diff --git a/arch/mips/cavium-octeon/cpu.c b/arch/mips/cavium-octeon/cpu.c
index a5b427909b5cac..036d56cc459168 100644
--- a/arch/mips/cavium-octeon/cpu.c
+++ b/arch/mips/cavium-octeon/cpu.c
@@ -10,7 +10,9 @@
 #include <linux/irqflags.h>
 #include <linux/notifier.h>
 #include <linux/prefetch.h>
+#include <linux/ptrace.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 
 #include <asm/cop2.h>
 #include <asm/current.h>
diff --git a/arch/mips/cavium-octeon/crypto/octeon-crypto.c b/arch/mips/cavium-octeon/crypto/octeon-crypto.c
index 4d22365844af30..cfb4a146cf1786 100644
--- a/arch/mips/cavium-octeon/crypto/octeon-crypto.c
+++ b/arch/mips/cavium-octeon/crypto/octeon-crypto.c
@@ -9,6 +9,7 @@
 #include <asm/cop2.h>
 #include <linux/export.h>
 #include <linux/interrupt.h>
+#include <linux/sched/task_stack.h>
 
 #include "octeon-crypto.h"
 
diff --git a/arch/mips/cavium-octeon/smp.c b/arch/mips/cavium-octeon/smp.c
index 4b94b7fbafa360..3de786545ded10 100644
--- a/arch/mips/cavium-octeon/smp.c
+++ b/arch/mips/cavium-octeon/smp.c
@@ -12,6 +12,7 @@
 #include <linux/kernel_stat.h>
 #include <linux/sched.h>
 #include <linux/sched/hotplug.h>
+#include <linux/sched/task_stack.h>
 #include <linux/init.h>
 #include <linux/export.h>
 
diff --git a/arch/mips/include/asm/fpu.h b/arch/mips/include/asm/fpu.h
index 321752bcbab6ec..f94455f964ec00 100644
--- a/arch/mips/include/asm/fpu.h
+++ b/arch/mips/include/asm/fpu.h
@@ -12,6 +12,7 @@
 
 #include <linux/sched.h>
 #include <linux/sched/task_stack.h>
+#include <linux/ptrace.h>
 #include <linux/thread_info.h>
 #include <linux/bitops.h>
 
diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c
index 3daa2cae50b0b9..1b070a76fcdd4c 100644
--- a/arch/mips/kernel/smp-bmips.c
+++ b/arch/mips/kernel/smp-bmips.c
@@ -11,6 +11,7 @@
 #include <linux/init.h>
 #include <linux/sched.h>
 #include <linux/sched/hotplug.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/delay.h>
 #include <linux/smp.h>
diff --git a/arch/mips/kernel/smp-mt.c b/arch/mips/kernel/smp-mt.c
index e077ea3e11fb36..e398cbc3d7767d 100644
--- a/arch/mips/kernel/smp-mt.c
+++ b/arch/mips/kernel/smp-mt.c
@@ -23,6 +23,7 @@
 #include <linux/interrupt.h>
 #include <linux/irqchip/mips-gic.h>
 #include <linux/compiler.h>
+#include <linux/sched/task_stack.h>
 #include <linux/smp.h>
 
 #include <linux/atomic.h>
diff --git a/arch/mips/loongson64/loongson-3/cop2-ex.c b/arch/mips/loongson64/loongson-3/cop2-ex.c
index ea13764d0a035c..621d6af5f6eb8e 100644
--- a/arch/mips/loongson64/loongson-3/cop2-ex.c
+++ b/arch/mips/loongson64/loongson-3/cop2-ex.c
@@ -13,6 +13,7 @@
 #include <linux/init.h>
 #include <linux/sched.h>
 #include <linux/notifier.h>
+#include <linux/ptrace.h>
 
 #include <asm/fpu.h>
 #include <asm/cop2.h>
diff --git a/arch/mips/netlogic/common/smp.c b/arch/mips/netlogic/common/smp.c
index 10d86d54880ab8..bddf1ef553a4f6 100644
--- a/arch/mips/netlogic/common/smp.c
+++ b/arch/mips/netlogic/common/smp.c
@@ -35,6 +35,7 @@
 #include <linux/kernel.h>
 #include <linux/delay.h>
 #include <linux/init.h>
+#include <linux/sched/task_stack.h>
 #include <linux/smp.h>
 #include <linux/irq.h>
 
diff --git a/arch/mips/netlogic/xlp/cop2-ex.c b/arch/mips/netlogic/xlp/cop2-ex.c
index 52bc5de420052c..21e439b3db707f 100644
--- a/arch/mips/netlogic/xlp/cop2-ex.c
+++ b/arch/mips/netlogic/xlp/cop2-ex.c
@@ -9,11 +9,14 @@
  * Copyright (C) 2009 Wind River Systems,
  *   written by Ralf Baechle <ralf@linux-mips.org>
  */
+#include <linux/capability.h>
 #include <linux/init.h>
 #include <linux/irqflags.h>
 #include <linux/notifier.h>
 #include <linux/prefetch.h>
+#include <linux/ptrace.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 
 #include <asm/cop2.h>
 #include <asm/current.h>
diff --git a/arch/mips/sgi-ip22/ip28-berr.c b/arch/mips/sgi-ip22/ip28-berr.c
index 1f2a5bc4779e6a..75460e1e106b2c 100644
--- a/arch/mips/sgi-ip22/ip28-berr.c
+++ b/arch/mips/sgi-ip22/ip28-berr.c
@@ -9,6 +9,7 @@
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/signal.h>
 #include <linux/seq_file.h>
 
 #include <asm/addrspace.h>
diff --git a/arch/mips/sgi-ip27/ip27-berr.c b/arch/mips/sgi-ip27/ip27-berr.c
index d12879eb2b1fa9..83efe03d5c600f 100644
--- a/arch/mips/sgi-ip27/ip27-berr.c
+++ b/arch/mips/sgi-ip27/ip27-berr.c
@@ -12,7 +12,9 @@
 #include <linux/signal.h>	/* for SIGBUS */
 #include <linux/sched.h>	/* schow_regs(), force_sig() */
 #include <linux/sched/debug.h>
+#include <linux/sched/signal.h>
 
+#include <asm/ptrace.h>
 #include <asm/sn/addrs.h>
 #include <asm/sn/arch.h>
 #include <asm/sn/sn0/hub.h>
diff --git a/arch/mips/sgi-ip27/ip27-smp.c b/arch/mips/sgi-ip27/ip27-smp.c
index f5ed45e8f44256..4cd47d23d81a76 100644
--- a/arch/mips/sgi-ip27/ip27-smp.c
+++ b/arch/mips/sgi-ip27/ip27-smp.c
@@ -8,10 +8,13 @@
  */
 #include <linux/init.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/topology.h>
 #include <linux/nodemask.h>
+
 #include <asm/page.h>
 #include <asm/processor.h>
+#include <asm/ptrace.h>
 #include <asm/sn/arch.h>
 #include <asm/sn/gda.h>
 #include <asm/sn/intr.h>
diff --git a/arch/mips/sgi-ip32/ip32-berr.c b/arch/mips/sgi-ip32/ip32-berr.c
index 57d8c7486fe6b7..c1f12a9cf305f4 100644
--- a/arch/mips/sgi-ip32/ip32-berr.c
+++ b/arch/mips/sgi-ip32/ip32-berr.c
@@ -11,6 +11,7 @@
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/signal.h>
 #include <asm/traps.h>
 #include <linux/uaccess.h>
 #include <asm/addrspace.h>
diff --git a/arch/mips/sgi-ip32/ip32-reset.c b/arch/mips/sgi-ip32/ip32-reset.c
index 8bd415c8729f97..b3b442def42383 100644
--- a/arch/mips/sgi-ip32/ip32-reset.c
+++ b/arch/mips/sgi-ip32/ip32-reset.c
@@ -13,6 +13,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/notifier.h>
 #include <linux/delay.h>
 #include <linux/rtc/ds1685.h>

From 2e6c7747730296a6d4fd700894286db1132598c4 Mon Sep 17 00:00:00 2001
From: James Hogan <james.hogan@imgtec.com>
Date: Thu, 16 Feb 2017 12:39:01 +0000
Subject: [PATCH 0933/1911] MIPS: Force o32 fp64 support on 32bit MIPS64r6
 kernels

When a 32-bit kernel is configured to support MIPS64r6 (CPU_MIPS64_R6),
MIPS_O32_FP64_SUPPORT won't be selected as it should be because
MIPS32_O32 is disabled (o32 is already the default ABI available on
32-bit kernels).

This results in userland FP breakage as CP0_Status.FR is read-only 1
since r6 (when an FPU is present) so __enable_fpu() will fail to clear
FR. This causes the FPU emulator to get used which will incorrectly
emulate 32-bit FPU registers.

Force o32 fp64 support in this case by also selecting
MIPS_O32_FP64_SUPPORT from CPU_MIPS64_R6 if 32BIT.

Fixes: 4e9d324d4288 ("MIPS: Require O32 FP64 support for MIPS64 with O32 compat")
Signed-off-by: James Hogan <james.hogan@imgtec.com>
Reviewed-by: Paul Burton <paul.burton@imgtec.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: linux-mips@linux-mips.org
Cc: <stable@vger.kernel.org> # 4.0.x-
Patchwork: https://patchwork.linux-mips.org/patch/15310/
Signed-off-by: James Hogan <james.hogan@imgtec.com>
---
 arch/mips/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index a008a9f03072de..e0bb576410bbdf 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -1531,7 +1531,7 @@ config CPU_MIPS64_R6
 	select CPU_SUPPORTS_HIGHMEM
 	select CPU_SUPPORTS_MSA
 	select GENERIC_CSUM
-	select MIPS_O32_FP64_SUPPORT if MIPS32_O32
+	select MIPS_O32_FP64_SUPPORT if 32BIT || MIPS32_O32
 	select HAVE_KVM
 	help
 	  Choose this option to build a kernel for release 6 or later of the

From 4b5347a24a0f2d3272032c120664b484478455de Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Thu, 23 Feb 2017 14:50:24 +0000
Subject: [PATCH 0934/1911] MIPS: End spinlocks with .insn

When building for microMIPS we need to ensure that the assembler always
knows that there is code at the target of a branch or jump. Recent
toolchains will fail to link a microMIPS kernel when this isn't the case
due to what it thinks is a branch to non-microMIPS code.

mips-mti-linux-gnu-ld kernel/built-in.o: .spinlock.text+0x2fc: Unsupported branch between ISA modes.
mips-mti-linux-gnu-ld final link failed: Bad value

This is due to inline assembly labels in spinlock.h not being followed
by an instruction mnemonic, either due to a .subsection pseudo-op or the
end of the inline asm block.

Fix this with a .insn direction after such labels.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Signed-off-by: James Hogan <james.hogan@imgtec.com>
Reviewed-by: Maciej W. Rozycki <macro@imgtec.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: linux-mips@linux-mips.org
Cc: linux-kernel@vger.kernel.org
Cc: <stable@vger.kernel.org>
Patchwork: https://patchwork.linux-mips.org/patch/15325/
Signed-off-by: James Hogan <james.hogan@imgtec.com>
---
 arch/mips/include/asm/spinlock.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/mips/include/asm/spinlock.h b/arch/mips/include/asm/spinlock.h
index f485afe5151476..a8df44d60607ba 100644
--- a/arch/mips/include/asm/spinlock.h
+++ b/arch/mips/include/asm/spinlock.h
@@ -127,7 +127,7 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
 		"	andi	%[ticket], %[ticket], 0xffff		\n"
 		"	bne	%[ticket], %[my_ticket], 4f		\n"
 		"	 subu	%[ticket], %[my_ticket], %[ticket]	\n"
-		"2:							\n"
+		"2:	.insn						\n"
 		"	.subsection 2					\n"
 		"4:	andi	%[ticket], %[ticket], 0xffff		\n"
 		"	sll	%[ticket], 5				\n"
@@ -202,7 +202,7 @@ static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock)
 		"	sc	%[ticket], %[ticket_ptr]		\n"
 		"	beqz	%[ticket], 1b				\n"
 		"	 li	%[ticket], 1				\n"
-		"2:							\n"
+		"2:	.insn						\n"
 		"	.subsection 2					\n"
 		"3:	b	2b					\n"
 		"	 li	%[ticket], 0				\n"
@@ -382,7 +382,7 @@ static inline int arch_read_trylock(arch_rwlock_t *rw)
 		"	.set	reorder					\n"
 		__WEAK_LLSC_MB
 		"	li	%2, 1					\n"
-		"2:							\n"
+		"2:	.insn						\n"
 		: "=" GCC_OFF_SMALL_ASM() (rw->lock), "=&r" (tmp), "=&r" (ret)
 		: GCC_OFF_SMALL_ASM() (rw->lock)
 		: "memory");
@@ -422,7 +422,7 @@ static inline int arch_write_trylock(arch_rwlock_t *rw)
 			"	lui	%1, 0x8000			\n"
 			"	sc	%1, %0				\n"
 			"	li	%2, 1				\n"
-			"2:						\n"
+			"2:	.insn					\n"
 			: "=" GCC_OFF_SMALL_ASM() (rw->lock), "=&r" (tmp),
 			  "=&r" (ret)
 			: GCC_OFF_SMALL_ASM() (rw->lock)

From 7c5a3d813050ee235817b0220dd8c42359a9efd8 Mon Sep 17 00:00:00 2001
From: John Crispin <john@phrozen.org>
Date: Sat, 25 Feb 2017 11:54:23 +0100
Subject: [PATCH 0935/1911] MIPS: ralink: Fix typos in rt3883 pinctrl

There are two copy & paste errors in the definition of the 5GHz LNA and
second ethernet pinmux.

Fixes: f576fb6a0700 ("MIPS: ralink: cleanup the soc specific pinmux data")
Signed-off-by: John Crispin <john@phrozen.org>
Signed-off-by: Daniel Golle <daniel@makrotopia.org>
Cc: linux-mips@linux-mips.org
Cc: <stable@vger.kernel.org> # 3.19.x-
Patchwork: https://patchwork.linux-mips.org/patch/15328/
Signed-off-by: James Hogan <james.hogan@imgtec.com>
---
 arch/mips/ralink/rt3883.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/mips/ralink/rt3883.c b/arch/mips/ralink/rt3883.c
index c4ffd43d3996ac..48ce701557a451 100644
--- a/arch/mips/ralink/rt3883.c
+++ b/arch/mips/ralink/rt3883.c
@@ -35,7 +35,7 @@ static struct rt2880_pmx_func uartlite_func[] = { FUNC("uartlite", 0, 15, 2) };
 static struct rt2880_pmx_func jtag_func[] = { FUNC("jtag", 0, 17, 5) };
 static struct rt2880_pmx_func mdio_func[] = { FUNC("mdio", 0, 22, 2) };
 static struct rt2880_pmx_func lna_a_func[] = { FUNC("lna a", 0, 32, 3) };
-static struct rt2880_pmx_func lna_g_func[] = { FUNC("lna a", 0, 35, 3) };
+static struct rt2880_pmx_func lna_g_func[] = { FUNC("lna g", 0, 35, 3) };
 static struct rt2880_pmx_func pci_func[] = {
 	FUNC("pci-dev", 0, 40, 32),
 	FUNC("pci-host2", 1, 40, 32),
@@ -43,7 +43,7 @@ static struct rt2880_pmx_func pci_func[] = {
 	FUNC("pci-fnc", 3, 40, 32)
 };
 static struct rt2880_pmx_func ge1_func[] = { FUNC("ge1", 0, 72, 12) };
-static struct rt2880_pmx_func ge2_func[] = { FUNC("ge1", 0, 84, 12) };
+static struct rt2880_pmx_func ge2_func[] = { FUNC("ge2", 0, 84, 12) };
 
 static struct rt2880_pmx_group rt3883_pinmux_data[] = {
 	GRP("i2c", i2c_func, 1, RT3883_GPIO_MODE_I2C),

From 0c7e2bc87ea6c2eb6f369998f74a0278e64863e4 Mon Sep 17 00:00:00 2001
From: James Hogan <james.hogan@imgtec.com>
Date: Sat, 4 Mar 2017 00:32:03 +0000
Subject: [PATCH 0936/1911] MIPS: Include asm/ptrace.h now linux/sched.h
 doesn't

Use of the task_pt_regs() based macros in MIPS' asm/processor.h for
accessing the user context on the kernel stack need the definition of
struct pt_regs from asm/ptrace.h. __own_fpu() in asm/fpu.h uses these
macros but implicitly depended on linux/sched.h to include asm/ptrace.h.

Since commit f780d89a0e82 ("sched/headers: Remove <asm/ptrace.h> from
<linux/sched.h>") however linux/sched.h no longer includes asm/ptrace.h,
so include it explicitly from asm/fpu.h where it is needed instead.

This fixes build errors such as:

./arch/mips/include/asm/fpu.h: In function '__own_fpu':
./arch/mips/include/asm/processor.h:385:31: error: invalid application of 'sizeof' to incomplete type 'struct pt_regs'
     THREAD_SIZE - 32 - sizeof(struct pt_regs))
                               ^

Fixes: f780d89a0e82 ("sched/headers: Remove <asm/ptrace.h> from <linux/sched.h>")
Signed-off-by: James Hogan <james.hogan@imgtec.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/15386/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/fpu.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/mips/include/asm/fpu.h b/arch/mips/include/asm/fpu.h
index 321752bcbab6ec..1527efaf4af49e 100644
--- a/arch/mips/include/asm/fpu.h
+++ b/arch/mips/include/asm/fpu.h
@@ -20,6 +20,7 @@
 #include <asm/cpu-features.h>
 #include <asm/fpu_emulator.h>
 #include <asm/hazards.h>
+#include <asm/ptrace.h>
 #include <asm/processor.h>
 #include <asm/current.h>
 #include <asm/msa.h>

From 9cb74b5e134c9f133001dd1585deef5353cd85f1 Mon Sep 17 00:00:00 2001
From: James Hogan <james.hogan@imgtec.com>
Date: Sat, 4 Mar 2017 00:41:25 +0000
Subject: [PATCH 0937/1911] MIPS: Wire up statx system call

Wire up the statx system call for MIPS, which was introduced in commit
a528d35e8bfc ("statx: Add a system call to make enhanced file info
available").

Signed-off-by: James Hogan <james.hogan@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/15387/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/uapi/asm/unistd.h | 15 +++++++++------
 arch/mips/kernel/scall32-o32.S      |  1 +
 arch/mips/kernel/scall64-64.S       |  1 +
 arch/mips/kernel/scall64-n32.S      |  1 +
 arch/mips/kernel/scall64-o32.S      |  1 +
 5 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/arch/mips/include/uapi/asm/unistd.h b/arch/mips/include/uapi/asm/unistd.h
index 3e940dbe02629a..78faf4292e907c 100644
--- a/arch/mips/include/uapi/asm/unistd.h
+++ b/arch/mips/include/uapi/asm/unistd.h
@@ -386,17 +386,18 @@
 #define __NR_pkey_mprotect		(__NR_Linux + 363)
 #define __NR_pkey_alloc			(__NR_Linux + 364)
 #define __NR_pkey_free			(__NR_Linux + 365)
+#define __NR_statx			(__NR_Linux + 366)
 
 
 /*
  * Offset of the last Linux o32 flavoured syscall
  */
-#define __NR_Linux_syscalls		365
+#define __NR_Linux_syscalls		366
 
 #endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */
 
 #define __NR_O32_Linux			4000
-#define __NR_O32_Linux_syscalls		365
+#define __NR_O32_Linux_syscalls		366
 
 #if _MIPS_SIM == _MIPS_SIM_ABI64
 
@@ -730,16 +731,17 @@
 #define __NR_pkey_mprotect		(__NR_Linux + 323)
 #define __NR_pkey_alloc			(__NR_Linux + 324)
 #define __NR_pkey_free			(__NR_Linux + 325)
+#define __NR_statx			(__NR_Linux + 326)
 
 /*
  * Offset of the last Linux 64-bit flavoured syscall
  */
-#define __NR_Linux_syscalls		325
+#define __NR_Linux_syscalls		326
 
 #endif /* _MIPS_SIM == _MIPS_SIM_ABI64 */
 
 #define __NR_64_Linux			5000
-#define __NR_64_Linux_syscalls		325
+#define __NR_64_Linux_syscalls		326
 
 #if _MIPS_SIM == _MIPS_SIM_NABI32
 
@@ -1077,15 +1079,16 @@
 #define __NR_pkey_mprotect		(__NR_Linux + 327)
 #define __NR_pkey_alloc			(__NR_Linux + 328)
 #define __NR_pkey_free			(__NR_Linux + 329)
+#define __NR_statx			(__NR_Linux + 330)
 
 /*
  * Offset of the last N32 flavoured syscall
  */
-#define __NR_Linux_syscalls		329
+#define __NR_Linux_syscalls		330
 
 #endif /* _MIPS_SIM == _MIPS_SIM_NABI32 */
 
 #define __NR_N32_Linux			6000
-#define __NR_N32_Linux_syscalls		329
+#define __NR_N32_Linux_syscalls		330
 
 #endif /* _UAPI_ASM_UNISTD_H */
diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S
index c29d397eee86cf..80ed68b2c95e41 100644
--- a/arch/mips/kernel/scall32-o32.S
+++ b/arch/mips/kernel/scall32-o32.S
@@ -600,3 +600,4 @@ EXPORT(sys_call_table)
 	PTR	sys_pkey_mprotect
 	PTR	sys_pkey_alloc
 	PTR	sys_pkey_free			/* 4365 */
+	PTR	sys_statx
diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S
index 0687f96ee91269..49765b44aa9b3b 100644
--- a/arch/mips/kernel/scall64-64.S
+++ b/arch/mips/kernel/scall64-64.S
@@ -438,4 +438,5 @@ EXPORT(sys_call_table)
 	PTR	sys_pkey_mprotect
 	PTR	sys_pkey_alloc
 	PTR	sys_pkey_free			/* 5325 */
+	PTR	sys_statx
 	.size	sys_call_table,.-sys_call_table
diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
index 0331ba39a065b8..90bad2d1b2d3e2 100644
--- a/arch/mips/kernel/scall64-n32.S
+++ b/arch/mips/kernel/scall64-n32.S
@@ -433,4 +433,5 @@ EXPORT(sysn32_call_table)
 	PTR	sys_pkey_mprotect
 	PTR	sys_pkey_alloc
 	PTR	sys_pkey_free
+	PTR	sys_statx			/* 6330 */
 	.size	sysn32_call_table,.-sysn32_call_table
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index 5a47042dd25f7a..2dd70bd104e1a0 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -588,4 +588,5 @@ EXPORT(sys32_call_table)
 	PTR	sys_pkey_mprotect
 	PTR	sys_pkey_alloc
 	PTR	sys_pkey_free			/* 4365 */
+	PTR	sys_statx
 	.size	sys32_call_table,.-sys32_call_table

From 12aff99723901bcc0e2a6a34343a4f62c371fdd9 Mon Sep 17 00:00:00 2001
From: Fabio Estevam <fabio.estevam@nxp.com>
Date: Tue, 7 Feb 2017 17:14:14 -0200
Subject: [PATCH 0938/1911] ARM: dts: imx6sx-udoo-neo: Fix reboot hang

After issuing a 'reboot' command the imx6sx-udoo-neo board does not
reboot as expected and it just hangs instead.

In mainline kernel only LDO enabled mode is supported. Do not provide
arm-supply/soc-supply nodes in the device tree, so that the board operates
in LDO enabled mode and can then successfully reboot via watchdog.

Fixes: 76e691fc7653b85d39 ("ARM: dts: imx6sx: Add UDOO Neo support")
Signed-off-by: Fabio Estevam <fabio.estevam@nxp.com>
Tested-by: Breno Lima <breno.lima@nxp.com>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 arch/arm/boot/dts/imx6sx-udoo-neo.dtsi | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/arch/arm/boot/dts/imx6sx-udoo-neo.dtsi b/arch/arm/boot/dts/imx6sx-udoo-neo.dtsi
index 49f466fe0b1dc2..dcfc9759143375 100644
--- a/arch/arm/boot/dts/imx6sx-udoo-neo.dtsi
+++ b/arch/arm/boot/dts/imx6sx-udoo-neo.dtsi
@@ -121,11 +121,6 @@
 	};
 };
 
-&cpu0 {
-	arm-supply = <&sw1a_reg>;
-	soc-supply = <&sw1c_reg>;
-};
-
 &fec1 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_enet1>;

From 35b2719e72d375f3e32c819858165668d948d5f2 Mon Sep 17 00:00:00 2001
From: Felipe Balbi <felipe.balbi@linux.intel.com>
Date: Wed, 8 Mar 2017 13:56:37 +0200
Subject: [PATCH 0939/1911] usb: dwc3: gadget: make to increment req->remaining
 in all cases

Sometimes, we might get a completion for a TRB which is left with HWO
bit. Even in these cases, we should increment req->remaining to
properly report total transferred size. I noticed this while debuggin
a separate problem seen with MSC tests from USBCV. Sometimes we would
erroneously report a completion for a 512-byte transfer when, in
reality, we transferred 0 bytes.

Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/dwc3/gadget.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 3db5eeae2bfb36..0d75158e43fe48 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -2189,12 +2189,12 @@ static int __dwc3_cleanup_done_trbs(struct dwc3 *dwc, struct dwc3_ep *dep,
 		return 1;
 	}
 
-	if ((trb->ctrl & DWC3_TRB_CTRL_HWO) && status != -ESHUTDOWN)
-		return 1;
-
 	count = trb->size & DWC3_TRB_SIZE_MASK;
 	req->remaining += count;
 
+	if ((trb->ctrl & DWC3_TRB_CTRL_HWO) && status != -ESHUTDOWN)
+		return 1;
+
 	if (dep->direction) {
 		if (count) {
 			trb_status = DWC3_TRB_SIZE_TRBSTS(trb->size);

From f1994a9c0930de4b2244816e62120cad08283cdc Mon Sep 17 00:00:00 2001
From: Bard Liao <bardliao@realtek.com>
Date: Wed, 8 Mar 2017 19:03:10 +0800
Subject: [PATCH 0940/1911] ASoC: rt5665: fix getting wrong work handler
 container

We got rt5665 private data from wrong work. It will result in kernel
panic.

Signed-off-by: Bard Liao <bardliao@realtek.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Cc: stable@vger.kernel.org
---
 sound/soc/codecs/rt5665.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/soc/codecs/rt5665.c b/sound/soc/codecs/rt5665.c
index 324461e985b391..fe2cf1ed8237a4 100644
--- a/sound/soc/codecs/rt5665.c
+++ b/sound/soc/codecs/rt5665.c
@@ -1241,7 +1241,7 @@ static irqreturn_t rt5665_irq(int irq, void *data)
 static void rt5665_jd_check_handler(struct work_struct *work)
 {
 	struct rt5665_priv *rt5665 = container_of(work, struct rt5665_priv,
-		calibrate_work.work);
+		jd_check_work.work);
 
 	if (snd_soc_read(rt5665->codec, RT5665_AJD1_CTRL) & 0x0010) {
 		/* jack out */

From 593dd5d9fb66c0345cef5fc8caf4199bb6d4e093 Mon Sep 17 00:00:00 2001
From: Bard Liao <bardliao@realtek.com>
Date: Wed, 8 Mar 2017 19:05:29 +0800
Subject: [PATCH 0941/1911] ASoC: rt5665: increase LDO level

Too low LDO level will cause a few functions unstable.

Signed-off-by: Bard Liao <bardliao@realtek.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/rt5665.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/soc/codecs/rt5665.c b/sound/soc/codecs/rt5665.c
index fe2cf1ed8237a4..a4c07c2ee41497 100644
--- a/sound/soc/codecs/rt5665.c
+++ b/sound/soc/codecs/rt5665.c
@@ -4798,7 +4798,7 @@ static int rt5665_i2c_probe(struct i2c_client *i2c,
 	/* Enhance performance*/
 	regmap_update_bits(rt5665->regmap, RT5665_PWR_ANLG_1,
 		RT5665_HP_DRIVER_MASK | RT5665_LDO1_DVO_MASK,
-		RT5665_HP_DRIVER_5X | RT5665_LDO1_DVO_09);
+		RT5665_HP_DRIVER_5X | RT5665_LDO1_DVO_12);
 
 	INIT_DELAYED_WORK(&rt5665->jack_detect_work,
 				rt5665_jack_detect_handler);

From 8f365313beb20742b68cb29a7d1ca6b27c036d81 Mon Sep 17 00:00:00 2001
From: Bard Liao <bardliao@realtek.com>
Date: Wed, 8 Mar 2017 19:05:31 +0800
Subject: [PATCH 0942/1911] ASoC: rt5665: Vref3 is necessary for Mono Amp

Vref3 is necessary for Mono Amp. So add it to dapm routes

Signed-off-by: Bard Liao <bardliao@realtek.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/rt5665.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/soc/codecs/rt5665.c b/sound/soc/codecs/rt5665.c
index a4c07c2ee41497..7a0244e5e32104 100644
--- a/sound/soc/codecs/rt5665.c
+++ b/sound/soc/codecs/rt5665.c
@@ -3912,6 +3912,7 @@ static const struct snd_soc_dapm_route rt5665_dapm_routes[] = {
 	{"Mono MIX", "MONOVOL Switch", "MONOVOL"},
 	{"Mono Amp", NULL, "Mono MIX"},
 	{"Mono Amp", NULL, "Vref2"},
+	{"Mono Amp", NULL, "Vref3"},
 	{"Mono Amp", NULL, "CLKDET SYS"},
 	{"Mono Amp", NULL, "CLKDET MONO"},
 	{"Mono Playback", "Switch", "Mono Amp"},

From 09b50c3703cc354100fe36202ef5e52ee128b904 Mon Sep 17 00:00:00 2001
From: Bard Liao <bardliao@realtek.com>
Date: Wed, 8 Mar 2017 19:05:32 +0800
Subject: [PATCH 0943/1911] ASoC: rt5665: CLKDET is also a power of ASRC

We need to power on CLKDET to use ASRC function.

Signed-off-by: Bard Liao <bardliao@realtek.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/rt5665.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sound/soc/codecs/rt5665.c b/sound/soc/codecs/rt5665.c
index 7a0244e5e32104..61137160c11614 100644
--- a/sound/soc/codecs/rt5665.c
+++ b/sound/soc/codecs/rt5665.c
@@ -3178,6 +3178,9 @@ static const struct snd_soc_dapm_route rt5665_dapm_routes[] = {
 	{"DAC Mono Right Filter", NULL, "DAC Mono R ASRC", is_using_asrc},
 	{"DAC Stereo1 Filter", NULL, "DAC STO1 ASRC", is_using_asrc},
 	{"DAC Stereo2 Filter", NULL, "DAC STO2 ASRC", is_using_asrc},
+	{"I2S1 ASRC", NULL, "CLKDET"},
+	{"I2S2 ASRC", NULL, "CLKDET"},
+	{"I2S3 ASRC", NULL, "CLKDET"},
 
 	/*Vref*/
 	{"Mic Det Power", NULL, "Vref2"},

From 0b1d250afb8eb9d65afb568bac9b9f9253a82b49 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Mon, 6 Mar 2017 17:36:37 +0100
Subject: [PATCH 0944/1911] USB: serial: io_ti: fix NULL-deref in interrupt
 callback

Fix a NULL-pointer dereference in the interrupt callback should a
malicious device send data containing a bad port number by adding the
missing sanity check.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/io_ti.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/usb/serial/io_ti.c b/drivers/usb/serial/io_ti.c
index ceaeebaa6f9058..4561dd4cde8b87 100644
--- a/drivers/usb/serial/io_ti.c
+++ b/drivers/usb/serial/io_ti.c
@@ -1674,6 +1674,12 @@ static void edge_interrupt_callback(struct urb *urb)
 	function    = TIUMP_GET_FUNC_FROM_CODE(data[0]);
 	dev_dbg(dev, "%s - port_number %d, function %d, info 0x%x\n", __func__,
 		port_number, function, data[1]);
+
+	if (port_number >= edge_serial->serial->num_ports) {
+		dev_err(dev, "bad port number %d\n", port_number);
+		goto exit;
+	}
+
 	port = edge_serial->serial->port[port_number];
 	edge_port = usb_get_serial_port_data(port);
 	if (!edge_port) {

From 30572418b445d85fcfe6c8fe84c947d2606767d8 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Mon, 6 Mar 2017 17:36:38 +0100
Subject: [PATCH 0945/1911] USB: serial: omninet: fix reference leaks at open

This driver needlessly took another reference to the tty on open, a
reference which was then never released on close. This lead to not just
a leak of the tty, but also a driver reference leak that prevented the
driver from being unloaded after a port had once been opened.

Fixes: 4a90f09b20f4 ("tty: usb-serial krefs")
Cc: stable <stable@vger.kernel.org>	# 2.6.28
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/omninet.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/drivers/usb/serial/omninet.c b/drivers/usb/serial/omninet.c
index a180b17d24323b..76564b3bebb9bc 100644
--- a/drivers/usb/serial/omninet.c
+++ b/drivers/usb/serial/omninet.c
@@ -142,12 +142,6 @@ static int omninet_port_remove(struct usb_serial_port *port)
 
 static int omninet_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
-	struct usb_serial	*serial = port->serial;
-	struct usb_serial_port	*wport;
-
-	wport = serial->port[1];
-	tty_port_tty_set(&wport->port, tty);
-
 	return usb_serial_generic_open(tty, port);
 }
 

From 367ec1706745912702c187722065285cd4d2aee7 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Mon, 6 Mar 2017 17:36:39 +0100
Subject: [PATCH 0946/1911] USB: serial: omninet: drop open callback

Remove the now redundant open callback and let core call the generic
handler for us instead.

Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/omninet.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/drivers/usb/serial/omninet.c b/drivers/usb/serial/omninet.c
index 76564b3bebb9bc..dd706953b46609 100644
--- a/drivers/usb/serial/omninet.c
+++ b/drivers/usb/serial/omninet.c
@@ -31,7 +31,6 @@
 #define BT_IGNITIONPRO_ID	0x2000
 
 /* function prototypes */
-static int  omninet_open(struct tty_struct *tty, struct usb_serial_port *port);
 static void omninet_process_read_urb(struct urb *urb);
 static void omninet_write_bulk_callback(struct urb *urb);
 static int  omninet_write(struct tty_struct *tty, struct usb_serial_port *port,
@@ -60,7 +59,6 @@ static struct usb_serial_driver zyxel_omninet_device = {
 	.attach =		omninet_attach,
 	.port_probe =		omninet_port_probe,
 	.port_remove =		omninet_port_remove,
-	.open =			omninet_open,
 	.write =		omninet_write,
 	.write_room =		omninet_write_room,
 	.write_bulk_callback =	omninet_write_bulk_callback,
@@ -140,11 +138,6 @@ static int omninet_port_remove(struct usb_serial_port *port)
 	return 0;
 }
 
-static int omninet_open(struct tty_struct *tty, struct usb_serial_port *port)
-{
-	return usb_serial_generic_open(tty, port);
-}
-
 #define OMNINET_HEADERLEN	4
 #define OMNINET_BULKOUTSIZE	64
 #define OMNINET_PAYLOADSIZE	(OMNINET_BULKOUTSIZE - OMNINET_HEADERLEN)

From 654b404f2a222f918af9b0cd18ad469d0c941a8e Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Mon, 6 Mar 2017 17:36:40 +0100
Subject: [PATCH 0947/1911] USB: serial: io_ti: fix information leak in
 completion handler

Add missing sanity check to the bulk-in completion handler to avoid an
integer underflow that can be triggered by a malicious device.

This avoids leaking 128 kB of memory content from after the URB transfer
buffer to user space.

Fixes: 8c209e6782ca ("USB: make actual_length in struct urb field u32")
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Cc: stable <stable@vger.kernel.org>	# 2.6.30
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/io_ti.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/serial/io_ti.c b/drivers/usb/serial/io_ti.c
index 4561dd4cde8b87..a76b95d3215787 100644
--- a/drivers/usb/serial/io_ti.c
+++ b/drivers/usb/serial/io_ti.c
@@ -1761,7 +1761,7 @@ static void edge_bulk_in_callback(struct urb *urb)
 
 	port_number = edge_port->port->port_number;
 
-	if (edge_port->lsr_event) {
+	if (urb->actual_length > 0 && edge_port->lsr_event) {
 		edge_port->lsr_event = 0;
 		dev_dbg(dev, "%s ===== Port %u LSR Status = %02x, Data = %02x ======\n",
 			__func__, port_number, edge_port->lsr_mask, *data);

From 8c76d7cd520ebffc1ea9ea0850d87a224a50c7f2 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Mon, 6 Mar 2017 17:36:41 +0100
Subject: [PATCH 0948/1911] USB: serial: safe_serial: fix information leak in
 completion handler

Add missing sanity check to the bulk-in completion handler to avoid an
integer underflow that could be triggered by a malicious device.

This avoids leaking up to 56 bytes from after the URB transfer buffer to
user space.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/safe_serial.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/usb/serial/safe_serial.c b/drivers/usb/serial/safe_serial.c
index 93c6c9b08daae5..8a069aa154eda4 100644
--- a/drivers/usb/serial/safe_serial.c
+++ b/drivers/usb/serial/safe_serial.c
@@ -200,6 +200,11 @@ static void safe_process_read_urb(struct urb *urb)
 	if (!safe)
 		goto out;
 
+	if (length < 2) {
+		dev_err(&port->dev, "malformed packet\n");
+		return;
+	}
+
 	fcs = fcs_compute10(data, length, CRC10_INITFCS);
 	if (fcs) {
 		dev_err(&port->dev, "%s - bad CRC %x\n", __func__, fcs);

From 99c014a87979c9244806685f3c3fc7767f79f645 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Wed, 8 Mar 2017 10:16:17 -0500
Subject: [PATCH 0949/1911] ktest: Fix while loop in wait_for_input

The run_command function was changed to use the wait_for_input function to
allow having a timeout if the command to run takes too much time. There was
a bug in the wait_for_input where it could end up going into an infinite
loop. There's two issues here. One is that the return value of the sysread
wasn't used for the write (to write a proper size), and that it should
continue processing the passed in file descriptor too even if there was
input. There was no check for error, if for some reason STDIN returned an
error, the function would go into an infinite loop and never exit.

Reported-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Tested-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Fixes: 6e98d1b4415f ("ktest: Add timeout to ssh command")
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 tools/testing/ktest/ktest.pl | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl
index 29470b5547119a..0c006a2f165d1a 100755
--- a/tools/testing/ktest/ktest.pl
+++ b/tools/testing/ktest/ktest.pl
@@ -1904,11 +1904,12 @@ sub wait_for_input
 
 	# copy data from stdin to the console
 	if (vec($rout, fileno(\*STDIN), 1) == 1) {
-	    sysread(\*STDIN, $buf, 1000);
-	    syswrite($fp, $buf, 1000);
-	    next;
+	    $nr = sysread(\*STDIN, $buf, 1000);
+	    syswrite($fp, $buf, $nr) if ($nr > 0);
 	}
 
+	next if (vec($rout, fileno($fp), 1) != 1);
+
 	$line = "";
 
 	# try to read one char at a time

From f7c6401ff84ab8ffffc281a29aa0a787f7eb346e Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Wed, 8 Mar 2017 10:36:59 -0500
Subject: [PATCH 0950/1911] ktest: Make sure wait_for_input does honor the
 timeout

The function wait_for_input takes in a timeout, and even has a default
timeout. But if for some reason the STDIN descriptor keeps sending in data,
the function will never time out. The timout is to wait for the data from
the passed in file descriptor, not for STDIN. Adding a test in the case
where there's no data from the passed in file descriptor that checks to see
if the timeout passed, will ensure that it will timeout properly even if
there's input in STDIN.

Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 tools/testing/ktest/ktest.pl | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl
index 0c006a2f165d1a..49f7c8b9d9c4ef 100755
--- a/tools/testing/ktest/ktest.pl
+++ b/tools/testing/ktest/ktest.pl
@@ -1880,6 +1880,7 @@ sub get_grub_index {
 sub wait_for_input
 {
     my ($fp, $time) = @_;
+    my $start_time;
     my $rin;
     my $rout;
     my $nr;
@@ -1895,12 +1896,12 @@ sub wait_for_input
     vec($rin, fileno($fp), 1) = 1;
     vec($rin, fileno(\*STDIN), 1) = 1;
 
+    $start_time = time;
+
     while (1) {
 	$nr = select($rout=$rin, undef, undef, $time);
 
-	if ($nr <= 0) {
-	    return undef;
-	}
+	last if ($nr <= 0);
 
 	# copy data from stdin to the console
 	if (vec($rout, fileno(\*STDIN), 1) == 1) {
@@ -1908,7 +1909,11 @@ sub wait_for_input
 	    syswrite($fp, $buf, $nr) if ($nr > 0);
 	}
 
-	next if (vec($rout, fileno($fp), 1) != 1);
+	# The timeout is based on time waiting for the fp data
+	if (vec($rout, fileno($fp), 1) != 1) {
+	    last if (defined($time) && (time - $start_time > $time));
+	    next;
+	}
 
 	$line = "";
 
@@ -1918,12 +1923,11 @@ sub wait_for_input
 	    last if ($ch eq "\n");
 	}
 
-	if (!length($line)) {
-	    return undef;
-	}
+	last if (!length($line));
 
 	return $line;
     }
+    return undef;
 }
 
 sub reboot_to {

From 2501c1bb054290679baad0ff7f4f07c714251f4c Mon Sep 17 00:00:00 2001
From: Chris Brandt <chris.brandt@renesas.com>
Date: Mon, 6 Mar 2017 15:20:51 -0500
Subject: [PATCH 0951/1911] i2c: riic: fix restart condition

While modifying the driver to use the STOP interrupt, the completion of the
intermediate transfers need to wake the driver back up in order to initiate
the next transfer (restart condition). Otherwise you get never ending
interrupts and only the first transfer sent.

Fixes: 71ccea095ea1 ("i2c: riic: correctly finish transfers")
Reported-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Chris Brandt <chris.brandt@renesas.com>
Tested-by: Simon Horman <horms+renesas@verge.net.au>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-riic.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-riic.c b/drivers/i2c/busses/i2c-riic.c
index 8f11d347b3ec48..c811af4c8d817b 100644
--- a/drivers/i2c/busses/i2c-riic.c
+++ b/drivers/i2c/busses/i2c-riic.c
@@ -218,8 +218,12 @@ static irqreturn_t riic_tend_isr(int irq, void *data)
 	}
 
 	if (riic->is_last || riic->err) {
-		riic_clear_set_bit(riic, 0, ICIER_SPIE, RIIC_ICIER);
+		riic_clear_set_bit(riic, ICIER_TEIE, ICIER_SPIE, RIIC_ICIER);
 		writeb(ICCR2_SP, riic->base + RIIC_ICCR2);
+	} else {
+		/* Transfer is complete, but do not send STOP */
+		riic_clear_set_bit(riic, ICIER_TEIE, 0, RIIC_ICIER);
+		complete(&riic->msg_done);
 	}
 
 	return IRQ_HANDLED;

From 737f98cfe7de8df7433a4d846850aa8efa44bd48 Mon Sep 17 00:00:00 2001
From: Ming Lei <tom.leiming@gmail.com>
Date: Wed, 22 Feb 2017 18:13:59 +0800
Subject: [PATCH 0952/1911] blk-mq: initialize mq kobjects in
 blk_mq_init_allocated_queue()

Both q->mq_kobj and sw queues' kobjects should have been initialized
once, instead of doing that each add_disk context.

Also this patch removes clearing of ctx in blk_mq_init_cpu_queues()
because percpu allocator fills zero to allocated variable.

This patch fixes one issue[1] reported from Omar.

[1] kernel wearning when doing unbind/bind on one scsi-mq device

[   19.347924] kobject (ffff8800791ea0b8): tried to init an initialized object, something is seriously wrong.
[   19.349781] CPU: 1 PID: 84 Comm: kworker/u8:1 Not tainted 4.10.0-rc7-00210-g53f39eeaa263 #34
[   19.350686] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.1-20161122_114906-anatol 04/01/2014
[   19.350920] Workqueue: events_unbound async_run_entry_fn
[   19.350920] Call Trace:
[   19.350920]  dump_stack+0x63/0x83
[   19.350920]  kobject_init+0x77/0x90
[   19.350920]  blk_mq_register_dev+0x40/0x130
[   19.350920]  blk_register_queue+0xb6/0x190
[   19.350920]  device_add_disk+0x1ec/0x4b0
[   19.350920]  sd_probe_async+0x10d/0x1c0 [sd_mod]
[   19.350920]  async_run_entry_fn+0x48/0x150
[   19.350920]  process_one_work+0x1d0/0x480
[   19.350920]  worker_thread+0x48/0x4e0
[   19.350920]  kthread+0x101/0x140
[   19.350920]  ? process_one_work+0x480/0x480
[   19.350920]  ? kthread_create_on_node+0x60/0x60
[   19.350920]  ret_from_fork+0x2c/0x40

Cc: Omar Sandoval <osandov@osandov.com>
Signed-off-by: Ming Lei <tom.leiming@gmail.com>
Tested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq-sysfs.c | 4 +---
 block/blk-mq.c       | 4 +++-
 block/blk-mq.h       | 1 +
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index 295e69670c3934..124305407c8015 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -277,7 +277,7 @@ void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx)
 	kobject_init(&hctx->kobj, &blk_mq_hw_ktype);
 }
 
-static void blk_mq_sysfs_init(struct request_queue *q)
+void blk_mq_sysfs_init(struct request_queue *q)
 {
 	struct blk_mq_ctx *ctx;
 	int cpu;
@@ -297,8 +297,6 @@ int blk_mq_register_dev(struct device *dev, struct request_queue *q)
 
 	blk_mq_disable_hotplug();
 
-	blk_mq_sysfs_init(q);
-
 	ret = kobject_add(&q->mq_kobj, kobject_get(&dev->kobj), "%s", "mq");
 	if (ret < 0)
 		goto out;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index b2fd175e84d79a..ed4b55176cddae 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2045,7 +2045,6 @@ static void blk_mq_init_cpu_queues(struct request_queue *q,
 		struct blk_mq_ctx *__ctx = per_cpu_ptr(q->queue_ctx, i);
 		struct blk_mq_hw_ctx *hctx;
 
-		memset(__ctx, 0, sizeof(*__ctx));
 		__ctx->cpu = i;
 		spin_lock_init(&__ctx->lock);
 		INIT_LIST_HEAD(&__ctx->rq_list);
@@ -2352,6 +2351,9 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
 	if (!q->queue_ctx)
 		goto err_exit;
 
+	/* init q->mq_kobj and sw queues' kobjects */
+	blk_mq_sysfs_init(q);
+
 	q->queue_hw_ctx = kzalloc_node(nr_cpu_ids * sizeof(*(q->queue_hw_ctx)),
 						GFP_KERNEL, set->numa_node);
 	if (!q->queue_hw_ctx)
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 088ced003c13d7..ad8bfd7473ef7e 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -77,6 +77,7 @@ static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q,
 /*
  * sysfs helpers
  */
+extern void blk_mq_sysfs_init(struct request_queue *q);
 extern int blk_mq_sysfs_register(struct request_queue *q);
 extern void blk_mq_sysfs_unregister(struct request_queue *q);
 extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx);

From 7ea5fe31c12dd8bcf4a9c5a4a7e8e23826a9a3b8 Mon Sep 17 00:00:00 2001
From: Ming Lei <tom.leiming@gmail.com>
Date: Wed, 22 Feb 2017 18:14:00 +0800
Subject: [PATCH 0953/1911] blk-mq: make lifetime consitent between q/ctx and
 its kobject

Currently from kobject view, both q->mq_kobj and ctx->kobj can
be released during one cycle of blk_mq_register_dev() and
blk_mq_unregister_dev(). Actually, sw queue's lifetime is
same with its request queue's, which is covered by request_queue->kobj.

So we don't need to call kobject_put() for the two kinds of
kobject in __blk_mq_unregister_dev(), instead we do that
in release handler of request queue.

Signed-off-by: Ming Lei <tom.leiming@gmail.com>
Tested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq-sysfs.c | 20 +++++++++++++-------
 block/blk-mq.c       |  7 ++++++-
 block/blk-mq.h       |  1 +
 3 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index 124305407c8015..77fb238af2be9b 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -242,15 +242,11 @@ static int blk_mq_register_hctx(struct blk_mq_hw_ctx *hctx)
 static void __blk_mq_unregister_dev(struct device *dev, struct request_queue *q)
 {
 	struct blk_mq_hw_ctx *hctx;
-	struct blk_mq_ctx *ctx;
-	int i, j;
+	int i;
 
 	queue_for_each_hw_ctx(q, hctx, i) {
 		blk_mq_unregister_hctx(hctx);
 
-		hctx_for_each_ctx(hctx, ctx, j)
-			kobject_put(&ctx->kobj);
-
 		kobject_put(&hctx->kobj);
 	}
 
@@ -258,8 +254,6 @@ static void __blk_mq_unregister_dev(struct device *dev, struct request_queue *q)
 
 	kobject_uevent(&q->mq_kobj, KOBJ_REMOVE);
 	kobject_del(&q->mq_kobj);
-	kobject_put(&q->mq_kobj);
-
 	kobject_put(&dev->kobj);
 
 	q->mq_sysfs_init_done = false;
@@ -277,6 +271,18 @@ void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx)
 	kobject_init(&hctx->kobj, &blk_mq_hw_ktype);
 }
 
+void blk_mq_sysfs_deinit(struct request_queue *q)
+{
+	struct blk_mq_ctx *ctx;
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		ctx = per_cpu_ptr(q->queue_ctx, cpu);
+		kobject_put(&ctx->kobj);
+	}
+	kobject_put(&q->mq_kobj);
+}
+
 void blk_mq_sysfs_init(struct request_queue *q)
 {
 	struct blk_mq_ctx *ctx;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index ed4b55176cddae..b985c236f50f54 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2264,7 +2264,12 @@ void blk_mq_release(struct request_queue *q)
 
 	kfree(q->queue_hw_ctx);
 
-	/* ctx kobj stays in queue_ctx */
+	/*
+	 * release .mq_kobj and sw queue's kobject now because
+	 * both share lifetime with request queue.
+	 */
+	blk_mq_sysfs_deinit(q);
+
 	free_percpu(q->queue_ctx);
 }
 
diff --git a/block/blk-mq.h b/block/blk-mq.h
index ad8bfd7473ef7e..b79f9a7d8cf620 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -78,6 +78,7 @@ static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q,
  * sysfs helpers
  */
 extern void blk_mq_sysfs_init(struct request_queue *q);
+extern void blk_mq_sysfs_deinit(struct request_queue *q);
 extern int blk_mq_sysfs_register(struct request_queue *q);
 extern void blk_mq_sysfs_unregister(struct request_queue *q);
 extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx);

From 6c8b232efea1ad3d263ff8b9c16b7e8767a77488 Mon Sep 17 00:00:00 2001
From: Ming Lei <tom.leiming@gmail.com>
Date: Wed, 22 Feb 2017 18:14:01 +0800
Subject: [PATCH 0954/1911] blk-mq: make lifetime consistent between hctx and
 its kobject

This patch removes kobject_put() over hctx in __blk_mq_unregister_dev(),
and trys to keep lifetime consistent between hctx and hctx's kobject.

Now blk_mq_sysfs_register() and blk_mq_sysfs_unregister() become
totally symmetrical, and kobject's refcounter drops to zero just
when the hctx is freed.

Signed-off-by: Ming Lei <tom.leiming@gmail.com>
Tested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq-sysfs.c | 15 ++++++++++-----
 block/blk-mq.c       |  5 +----
 2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index 77fb238af2be9b..cb19ec16a7fc03 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -17,6 +17,14 @@ static void blk_mq_sysfs_release(struct kobject *kobj)
 {
 }
 
+static void blk_mq_hw_sysfs_release(struct kobject *kobj)
+{
+	struct blk_mq_hw_ctx *hctx = container_of(kobj, struct blk_mq_hw_ctx,
+						  kobj);
+	kfree(hctx->ctxs);
+	kfree(hctx);
+}
+
 struct blk_mq_ctx_sysfs_entry {
 	struct attribute attr;
 	ssize_t (*show)(struct blk_mq_ctx *, char *);
@@ -200,7 +208,7 @@ static struct kobj_type blk_mq_ctx_ktype = {
 static struct kobj_type blk_mq_hw_ktype = {
 	.sysfs_ops	= &blk_mq_hw_sysfs_ops,
 	.default_attrs	= default_hw_ctx_attrs,
-	.release	= blk_mq_sysfs_release,
+	.release	= blk_mq_hw_sysfs_release,
 };
 
 static void blk_mq_unregister_hctx(struct blk_mq_hw_ctx *hctx)
@@ -244,12 +252,9 @@ static void __blk_mq_unregister_dev(struct device *dev, struct request_queue *q)
 	struct blk_mq_hw_ctx *hctx;
 	int i;
 
-	queue_for_each_hw_ctx(q, hctx, i) {
+	queue_for_each_hw_ctx(q, hctx, i)
 		blk_mq_unregister_hctx(hctx);
 
-		kobject_put(&hctx->kobj);
-	}
-
 	blk_mq_debugfs_unregister_hctxs(q);
 
 	kobject_uevent(&q->mq_kobj, KOBJ_REMOVE);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index b985c236f50f54..f70595e5fb86dd 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2256,8 +2256,7 @@ void blk_mq_release(struct request_queue *q)
 	queue_for_each_hw_ctx(q, hctx, i) {
 		if (!hctx)
 			continue;
-		kfree(hctx->ctxs);
-		kfree(hctx);
+		kobject_put(&hctx->kobj);
 	}
 
 	q->mq_map = NULL;
@@ -2336,8 +2335,6 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
 			blk_mq_exit_hctx(q, set, hctx, j);
 			free_cpumask_var(hctx->cpumask);
 			kobject_put(&hctx->kobj);
-			kfree(hctx->ctxs);
-			kfree(hctx);
 			hctxs[j] = NULL;
 
 		}

From 01388df37627d2e89f0b835377c0eb39d81f671c Mon Sep 17 00:00:00 2001
From: Ming Lei <tom.leiming@gmail.com>
Date: Wed, 22 Feb 2017 18:14:02 +0800
Subject: [PATCH 0955/1911] blk-mq: free hctx->cpumask in release handler of
 hctx's kobject

It is obviously that hctx->cpumask is per hctx, and both
share same lifetime, so this patch moves freeing of hctx->cpumask
into release handler of hctx's kobject.

Signed-off-by: Ming Lei <tom.leiming@gmail.com>
Tested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq-sysfs.c |  1 +
 block/blk-mq.c       | 12 ------------
 2 files changed, 1 insertion(+), 12 deletions(-)

diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index cb19ec16a7fc03..d745ab81033afa 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -21,6 +21,7 @@ static void blk_mq_hw_sysfs_release(struct kobject *kobj)
 {
 	struct blk_mq_hw_ctx *hctx = container_of(kobj, struct blk_mq_hw_ctx,
 						  kobj);
+	free_cpumask_var(hctx->cpumask);
 	kfree(hctx->ctxs);
 	kfree(hctx);
 }
diff --git a/block/blk-mq.c b/block/blk-mq.c
index f70595e5fb86dd..159187a28d6652 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1955,16 +1955,6 @@ static void blk_mq_exit_hw_queues(struct request_queue *q,
 	}
 }
 
-static void blk_mq_free_hw_queues(struct request_queue *q,
-		struct blk_mq_tag_set *set)
-{
-	struct blk_mq_hw_ctx *hctx;
-	unsigned int i;
-
-	queue_for_each_hw_ctx(q, hctx, i)
-		free_cpumask_var(hctx->cpumask);
-}
-
 static int blk_mq_init_hctx(struct request_queue *q,
 		struct blk_mq_tag_set *set,
 		struct blk_mq_hw_ctx *hctx, unsigned hctx_idx)
@@ -2333,7 +2323,6 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
 			if (hctx->tags)
 				blk_mq_free_map_and_requests(set, j);
 			blk_mq_exit_hctx(q, set, hctx, j);
-			free_cpumask_var(hctx->cpumask);
 			kobject_put(&hctx->kobj);
 			hctxs[j] = NULL;
 
@@ -2446,7 +2435,6 @@ void blk_mq_free_queue(struct request_queue *q)
 	blk_mq_del_queue_tag_set(q);
 
 	blk_mq_exit_hw_queues(q, set, set->nr_hw_queues);
-	blk_mq_free_hw_queues(q, set);
 }
 
 /* Basically redo blk_mq_init_queue with queue frozen */

From 0bc315381fe9ed9fb91db8b0e82171b645ac008f Mon Sep 17 00:00:00 2001
From: Johannes Thumshirn <jthumshirn@suse.de>
Date: Mon, 6 Mar 2017 11:23:35 +0100
Subject: [PATCH 0956/1911] zram: set physical queue limits to avoid array out
 of bounds accesses

zram can handle at most SECTORS_PER_PAGE sectors in a bio's bvec. When using
the NVMe over Fabrics loopback target which potentially sends a huge bulk of
pages attached to the bio's bvec this results in a kernel panic because of
array out of bounds accesses in zram_decompress_page().

Signed-off-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/zram/zram_drv.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index e27d89a36c3417..dceb5edd1e5455 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -1189,6 +1189,8 @@ static int zram_add(void)
 	blk_queue_io_min(zram->disk->queue, PAGE_SIZE);
 	blk_queue_io_opt(zram->disk->queue, PAGE_SIZE);
 	zram->disk->queue->limits.discard_granularity = PAGE_SIZE;
+	zram->disk->queue->limits.max_sectors = SECTORS_PER_PAGE;
+	zram->disk->queue->limits.chunk_sectors = 0;
 	blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX);
 	/*
 	 * zram_bio_discard() will clear all logical blocks if logical block

From b0bfdfc2bf7fa85317824c6a389fc373dfcef5bc Mon Sep 17 00:00:00 2001
From: Jon Derrick <jonathan.derrick@intel.com>
Date: Mon, 6 Mar 2017 08:41:04 -0700
Subject: [PATCH 0957/1911] block/sed: Fix opal user range check and unused
 variables

Fixes check that the opal user is within the range, and cleans up unused
method variables.

Signed-off-by: Jon Derrick <jonathan.derrick@intel.com>
Reviewed-by: Scott Bauer <scott.bauer@intel.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/sed-opal.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/block/sed-opal.c b/block/sed-opal.c
index 1e18dca360fc50..14035f826b5e35 100644
--- a/block/sed-opal.c
+++ b/block/sed-opal.c
@@ -1023,7 +1023,6 @@ static int finalize_and_send(struct opal_dev *dev, cont_fn cont)
 
 static int gen_key(struct opal_dev *dev, void *data)
 {
-	const u8 *method;
 	u8 uid[OPAL_UID_LENGTH];
 	int err = 0;
 
@@ -1031,7 +1030,6 @@ static int gen_key(struct opal_dev *dev, void *data)
 	set_comid(dev, dev->comid);
 
 	memcpy(uid, dev->prev_data, min(sizeof(uid), dev->prev_d_len));
-	method = opalmethod[OPAL_GENKEY];
 	kfree(dev->prev_data);
 	dev->prev_data = NULL;
 
@@ -1669,7 +1667,6 @@ static int add_user_to_lr(struct opal_dev *dev, void *data)
 static int lock_unlock_locking_range(struct opal_dev *dev, void *data)
 {
 	u8 lr_buffer[OPAL_UID_LENGTH];
-	const u8 *method;
 	struct opal_lock_unlock *lkul = data;
 	u8 read_locked = 1, write_locked = 1;
 	int err = 0;
@@ -1677,7 +1674,6 @@ static int lock_unlock_locking_range(struct opal_dev *dev, void *data)
 	clear_opal_cmd(dev);
 	set_comid(dev, dev->comid);
 
-	method = opalmethod[OPAL_SET];
 	if (build_locking_range(lr_buffer, sizeof(lr_buffer),
 				lkul->session.opal_key.lr) < 0)
 		return -ERANGE;
@@ -1733,14 +1729,12 @@ static int lock_unlock_locking_range_sum(struct opal_dev *dev, void *data)
 {
 	u8 lr_buffer[OPAL_UID_LENGTH];
 	u8 read_locked = 1, write_locked = 1;
-	const u8 *method;
 	struct opal_lock_unlock *lkul = data;
 	int ret;
 
 	clear_opal_cmd(dev);
 	set_comid(dev, dev->comid);
 
-	method = opalmethod[OPAL_SET];
 	if (build_locking_range(lr_buffer, sizeof(lr_buffer),
 				lkul->session.opal_key.lr) < 0)
 		return -ERANGE;
@@ -2133,7 +2127,7 @@ static int opal_add_user_to_lr(struct opal_dev *dev,
 		pr_err("Locking state was not RO or RW\n");
 		return -EINVAL;
 	}
-	if (lk_unlk->session.who < OPAL_USER1 &&
+	if (lk_unlk->session.who < OPAL_USER1 ||
 	    lk_unlk->session.who > OPAL_USER9) {
 		pr_err("Authority was not within the range of users: %d\n",
 		       lk_unlk->session.who);
@@ -2316,7 +2310,7 @@ static int opal_activate_user(struct opal_dev *dev,
 	int ret;
 
 	/* We can't activate Admin1 it's active as manufactured */
-	if (opal_session->who < OPAL_USER1 &&
+	if (opal_session->who < OPAL_USER1 ||
 	    opal_session->who > OPAL_USER9) {
 		pr_err("Who was not a valid user: %d\n", opal_session->who);
 		return -EINVAL;

From 02dbfa5e5583523035f05636c614a0eca77f1aab Mon Sep 17 00:00:00 2001
From: Qi Hou <qi.hou@windriver.com>
Date: Fri, 3 Mar 2017 15:57:11 +0800
Subject: [PATCH 0958/1911] i2c: add missing of_node_put in
 i2c_mux_del_adapters

Refcount of of_node is increased with of_node_get() in i2c_mux_add_adapter().
It must be decreased with of_node_put() in i2c_mux_del_adapters().

Signe-off-by: Qi Hou <qi.hou@windriver.com>
Reviewed-by: Zhang Xiao <xiao.zhang@windriver.com>
Acked-by: Peter Rosin <peda@axentia.se>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/i2c-mux.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/i2c/i2c-mux.c b/drivers/i2c/i2c-mux.c
index 83768e85a919cb..2178266bca7948 100644
--- a/drivers/i2c/i2c-mux.c
+++ b/drivers/i2c/i2c-mux.c
@@ -429,6 +429,7 @@ void i2c_mux_del_adapters(struct i2c_mux_core *muxc)
 	while (muxc->num_adapters) {
 		struct i2c_adapter *adap = muxc->adapter[--muxc->num_adapters];
 		struct i2c_mux_priv *priv = adap->algo_data;
+		struct device_node *np = adap->dev.of_node;
 
 		muxc->adapter[muxc->num_adapters] = NULL;
 
@@ -438,6 +439,7 @@ void i2c_mux_del_adapters(struct i2c_mux_core *muxc)
 
 		sysfs_remove_link(&priv->adap.dev.kobj, "mux_device");
 		i2c_del_adapter(adap);
+		of_node_put(np);
 		kfree(priv);
 	}
 }

From 2de3ec4f1d4ba6ee380478055104eb918bd50cce Mon Sep 17 00:00:00 2001
From: Jaedon Shin <jaedon.shin@gmail.com>
Date: Fri, 3 Mar 2017 10:55:03 +0900
Subject: [PATCH 0959/1911] i2c: brcmstb: Fix START and STOP conditions

The BSC data buffers to send and receive data are each of size 32 bytes
or 8 bytes 'xfersz' depending on SoC. The problem observed for all the
combined message transfer was if length of data transfer was a multiple
of 'xfersz' a repeated START was being transmitted by BSC driver. Fixed
this by appropriately setting START/STOP conditions for such transfers.

Fixes: dd1aa2524bc5 ("i2c: brcmstb: Add Broadcom settop SoC i2c controller driver")
Signed-off-by: Jaedon Shin <jaedon.shin@gmail.com>
Acked-by: Kamal Dasu <kdasu.kdev@gmail.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-brcmstb.c | 27 +++++++++++++++++++++------
 1 file changed, 21 insertions(+), 6 deletions(-)

diff --git a/drivers/i2c/busses/i2c-brcmstb.c b/drivers/i2c/busses/i2c-brcmstb.c
index 0652281662a8b3..78792b4d6437c7 100644
--- a/drivers/i2c/busses/i2c-brcmstb.c
+++ b/drivers/i2c/busses/i2c-brcmstb.c
@@ -465,6 +465,7 @@ static int brcmstb_i2c_xfer(struct i2c_adapter *adapter,
 	u8 *tmp_buf;
 	int len = 0;
 	int xfersz = brcmstb_i2c_get_xfersz(dev);
+	u32 cond, cond_per_msg;
 
 	if (dev->is_suspended)
 		return -EBUSY;
@@ -481,10 +482,11 @@ static int brcmstb_i2c_xfer(struct i2c_adapter *adapter,
 			pmsg->buf ? pmsg->buf[0] : '0', pmsg->len);
 
 		if (i < (num - 1) && (msgs[i + 1].flags & I2C_M_NOSTART))
-			brcmstb_set_i2c_start_stop(dev, ~(COND_START_STOP));
+			cond = ~COND_START_STOP;
 		else
-			brcmstb_set_i2c_start_stop(dev,
-						   COND_RESTART | COND_NOSTOP);
+			cond = COND_RESTART | COND_NOSTOP;
+
+		brcmstb_set_i2c_start_stop(dev, cond);
 
 		/* Send slave address */
 		if (!(pmsg->flags & I2C_M_NOSTART)) {
@@ -497,13 +499,24 @@ static int brcmstb_i2c_xfer(struct i2c_adapter *adapter,
 			}
 		}
 
+		cond_per_msg = cond;
+
 		/* Perform data transfer */
 		while (len) {
 			bytes_to_xfer = min(len, xfersz);
 
-			if (len <= xfersz && i == (num - 1))
-				brcmstb_set_i2c_start_stop(dev,
-							   ~(COND_START_STOP));
+			if (len <= xfersz) {
+				if (i == (num - 1))
+					cond_per_msg = cond_per_msg &
+						~(COND_RESTART | COND_NOSTOP);
+				else
+					cond_per_msg = cond;
+			} else {
+				cond_per_msg = (cond_per_msg & ~COND_RESTART) |
+					COND_NOSTOP;
+			}
+
+			brcmstb_set_i2c_start_stop(dev, cond_per_msg);
 
 			rc = brcmstb_i2c_xfer_bsc_data(dev, tmp_buf,
 						       bytes_to_xfer, pmsg);
@@ -512,6 +525,8 @@ static int brcmstb_i2c_xfer(struct i2c_adapter *adapter,
 
 			len -=  bytes_to_xfer;
 			tmp_buf += bytes_to_xfer;
+
+			cond_per_msg = COND_NOSTART | COND_NOSTOP;
 		}
 	}
 

From 7b4fdf77a450ec0fdcb2f677b080ddbf2c186544 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 3 Mar 2017 21:44:00 +0100
Subject: [PATCH 0960/1911] netfilter: don't track fragmented packets

Andrey reports syzkaller splat caused by

NF_CT_ASSERT(!ip_is_fragment(ip_hdr(skb)));

in ipv4 nat.  But this assertion (and the comment) are wrong, this function
does see fragments when IP_NODEFRAG setsockopt is used.

As conntrack doesn't track packets without complete l4 header, only the
first fragment is tracked.

Because applying nat to first packet but not the rest makes no sense this
also turns off tracking of all fragments.

Reported-by: Andrey Konovalov <andreyknvl@google.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 4 ++++
 net/ipv4/netfilter/nf_nat_l3proto_ipv4.c       | 5 -----
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index bc1486f2c06433..2e14ed11a35cfc 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -165,6 +165,10 @@ static unsigned int ipv4_conntrack_local(void *priv,
 	if (skb->len < sizeof(struct iphdr) ||
 	    ip_hdrlen(skb) < sizeof(struct iphdr))
 		return NF_ACCEPT;
+
+	if (ip_is_fragment(ip_hdr(skb))) /* IP_NODEFRAG setsockopt set */
+		return NF_ACCEPT;
+
 	return nf_conntrack_in(state->net, PF_INET, state->hook, skb);
 }
 
diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
index f8aad03d674b05..6f5e8d01b87693 100644
--- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
@@ -255,11 +255,6 @@ nf_nat_ipv4_fn(void *priv, struct sk_buff *skb,
 	/* maniptype == SRC for postrouting. */
 	enum nf_nat_manip_type maniptype = HOOK2MANIP(state->hook);
 
-	/* We never see fragments: conntrack defrags on pre-routing
-	 * and local-out, and nf_nat_out protects post-routing.
-	 */
-	NF_CT_ASSERT(!ip_is_fragment(ip_hdr(skb)));
-
 	ct = nf_ct_get(skb, &ctinfo);
 	/* Can't track?  It's not due to stress, or conntrack would
 	 * have dropped it.  Hence it's the user's responsibilty to

From 1945250d979203ed326b5a44fd705b6b2c46eec5 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa@the-dreams.de>
Date: Mon, 27 Feb 2017 20:25:05 +0100
Subject: [PATCH 0961/1911] i2c: m65xx: drop superfluous quirk structure

All length fields in Linux I2C are u16, so a HW length limitation of 16
bit lengths is not a limitation. Remove the quirk structure.

Tested-by: Jun Gao <jun.gao@mediatek.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-mt65xx.c | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/drivers/i2c/busses/i2c-mt65xx.c b/drivers/i2c/busses/i2c-mt65xx.c
index 4a7d9bc2142ba3..45d61714c81bd2 100644
--- a/drivers/i2c/busses/i2c-mt65xx.c
+++ b/drivers/i2c/busses/i2c-mt65xx.c
@@ -172,14 +172,6 @@ static const struct i2c_adapter_quirks mt6577_i2c_quirks = {
 	.max_comb_2nd_msg_len = 31,
 };
 
-static const struct i2c_adapter_quirks mt8173_i2c_quirks = {
-	.max_num_msgs = 65535,
-	.max_write_len = 65535,
-	.max_read_len = 65535,
-	.max_comb_1st_msg_len = 65535,
-	.max_comb_2nd_msg_len = 65535,
-};
-
 static const struct mtk_i2c_compatible mt6577_compat = {
 	.quirks = &mt6577_i2c_quirks,
 	.pmic_i2c = 0,
@@ -199,7 +191,6 @@ static const struct mtk_i2c_compatible mt6589_compat = {
 };
 
 static const struct mtk_i2c_compatible mt8173_compat = {
-	.quirks = &mt8173_i2c_quirks,
 	.pmic_i2c = 0,
 	.dcm = 1,
 	.auto_restart = 1,

From 8e05ba7f848475bdc3aa546cf88418f7e51a6671 Mon Sep 17 00:00:00 2001
From: Ying Xue <ying.xue@windriver.com>
Date: Sat, 4 Mar 2017 18:00:02 +0800
Subject: [PATCH 0962/1911] netfilter: nf_nat_sctp: fix ICMP packet to be
 dropped accidently

Regarding RFC 792, the first 64 bits of the original SCTP datagram's
data could be contained in ICMP packet, such as:

    0                   1                   2                   3
    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |     Type      |     Code      |          Checksum             |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |                             unused                            |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |      Internet Header + 64 bits of Original Data Datagram      |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+

However, according to RFC 4960, SCTP datagram header is as below:

    0                   1                   2                   3
    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |     Source Port Number        |     Destination Port Number   |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |                      Verification Tag                         |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |                           Checksum                            |
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+

It means only the first three fields of SCTP header can be carried in
ICMP packet except for Checksum field.

At present in sctp_manip_pkt(), no matter whether the packet is ICMP or
not, it always calculates SCTP packet checksum. However, not only the
calculation of checksum is unnecessary for ICMP, but also it causes
another fatal issue that ICMP packet is dropped. The header size of
SCTP is used to identify whether the writeable length of skb is bigger
than skb->len through skb_make_writable() in sctp_manip_pkt(). But
when it deals with ICMP packet, skb_make_writable() directly returns
false as the writeable length of skb is bigger than skb->len.
Subsequently ICMP is dropped.

Now we correct this misbahavior. When sctp_manip_pkt() handles ICMP
packet, 8 bytes rather than the whole SCTP header size is used to check
if writeable length of skb is overflowed. Meanwhile, as it's meaningless
to calculate checksum when packet is ICMP, the computation of checksum
is ignored as well.

Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_nat_proto_sctp.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/nf_nat_proto_sctp.c b/net/netfilter/nf_nat_proto_sctp.c
index 31d358691af096..804e8a0ab36ef5 100644
--- a/net/netfilter/nf_nat_proto_sctp.c
+++ b/net/netfilter/nf_nat_proto_sctp.c
@@ -33,8 +33,16 @@ sctp_manip_pkt(struct sk_buff *skb,
 	       enum nf_nat_manip_type maniptype)
 {
 	sctp_sctphdr_t *hdr;
+	int hdrsize = 8;
 
-	if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
+	/* This could be an inner header returned in imcp packet; in such
+	 * cases we cannot update the checksum field since it is outside
+	 * of the 8 bytes of transport layer headers we are guaranteed.
+	 */
+	if (skb->len >= hdroff + sizeof(*hdr))
+		hdrsize = sizeof(*hdr);
+
+	if (!skb_make_writable(skb, hdroff + hdrsize))
 		return false;
 
 	hdr = (struct sctphdr *)(skb->data + hdroff);
@@ -47,6 +55,9 @@ sctp_manip_pkt(struct sk_buff *skb,
 		hdr->dest = tuple->dst.u.sctp.port;
 	}
 
+	if (hdrsize < sizeof(*hdr))
+		return true;
+
 	if (skb->ip_summed != CHECKSUM_PARTIAL) {
 		hdr->checksum = sctp_compute_cksum(skb, hdroff);
 		skb->ip_summed = CHECKSUM_NONE;

From b0c1e95ab44feaad8831f2c06a3473c974003b49 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Tue, 28 Feb 2017 11:10:51 -0800
Subject: [PATCH 0963/1911] i2c: copy device properties when using
 i2c_register_board_info()

This will allow marking device property lists as __initdata, the same as
board info structures themselves.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/i2c-boardinfo.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/i2c/i2c-boardinfo.c b/drivers/i2c/i2c-boardinfo.c
index 6e5fac6a5262a0..5b8f6c3a695074 100644
--- a/drivers/i2c/i2c-boardinfo.c
+++ b/drivers/i2c/i2c-boardinfo.c
@@ -15,6 +15,7 @@
 #include <linux/export.h>
 #include <linux/i2c.h>
 #include <linux/kernel.h>
+#include <linux/property.h>
 #include <linux/rwsem.h>
 #include <linux/slab.h>
 
@@ -55,6 +56,7 @@ EXPORT_SYMBOL_GPL(__i2c_first_dynamic_bus_num);
  *
  * The board info passed can safely be __initdata, but be careful of embedded
  * pointers (for platform_data, functions, etc) since that won't be copied.
+ * Device properties are deep-copied though.
  */
 int i2c_register_board_info(int busnum, struct i2c_board_info const *info, unsigned len)
 {
@@ -78,6 +80,14 @@ int i2c_register_board_info(int busnum, struct i2c_board_info const *info, unsig
 
 		devinfo->busnum = busnum;
 		devinfo->board_info = *info;
+
+		if (info->properties) {
+			devinfo->board_info.properties =
+					property_entries_dup(info->properties);
+			if (IS_ERR(devinfo->board_info.properties))
+				return PTR_ERR(devinfo->board_info.properties);
+		}
+
 		list_add_tail(&devinfo->list, &__i2c_board_list);
 	}
 

From 3b0277f198ac928f323c42e180680d2f79aa980d Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Tue, 7 Mar 2017 21:06:38 +0100
Subject: [PATCH 0964/1911] i2c: meson: fix wrong variable usage in
 meson_i2c_put_data

Most likely a copy & paste error.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Acked-by: Jerome Brunet <jbrunet@baylibre.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Fixes: 30021e3707a7 ("i2c: add support for Amlogic Meson I2C controller")
---
 drivers/i2c/busses/i2c-meson.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-meson.c b/drivers/i2c/busses/i2c-meson.c
index 2aa61bbbd307b9..73b97c71a484ee 100644
--- a/drivers/i2c/busses/i2c-meson.c
+++ b/drivers/i2c/busses/i2c-meson.c
@@ -175,7 +175,7 @@ static void meson_i2c_put_data(struct meson_i2c *i2c, char *buf, int len)
 		wdata1 |= *buf++ << ((i - 4) * 8);
 
 	writel(wdata0, i2c->regs + REG_TOK_WDATA0);
-	writel(wdata0, i2c->regs + REG_TOK_WDATA1);
+	writel(wdata1, i2c->regs + REG_TOK_WDATA1);
 
 	dev_dbg(i2c->dev, "%s: data %08x %08x len %d\n", __func__,
 		wdata0, wdata1, len);

From ab809fd81fde3d416f8656d8f814a0777fb9b65e Mon Sep 17 00:00:00 2001
From: Zhangfei Gao <zhangfei.gao@linaro.org>
Date: Tue, 27 Dec 2016 22:22:40 +0800
Subject: [PATCH 0965/1911] i2c: designware: add reset interface

Some platforms like hi3660 need do reset first to allow accessing registers

Signed-off-by: Zhangfei Gao <zhangfei.gao@linaro.org>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Tested-by: Ramiro Oliveira <ramiro.oliveira@synopsys.com>
Acked-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-designware-core.h    |  1 +
 drivers/i2c/busses/i2c-designware-platdrv.c | 28 ++++++++++++++++++---
 2 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/drivers/i2c/busses/i2c-designware-core.h b/drivers/i2c/busses/i2c-designware-core.h
index c1db3a5a340f59..d9aaf1790e0eff 100644
--- a/drivers/i2c/busses/i2c-designware-core.h
+++ b/drivers/i2c/busses/i2c-designware-core.h
@@ -88,6 +88,7 @@ struct dw_i2c_dev {
 	void __iomem		*base;
 	struct completion	cmd_complete;
 	struct clk		*clk;
+	struct reset_control	*rst;
 	u32			(*get_clk_rate_khz) (struct dw_i2c_dev *dev);
 	struct dw_pci_controller *controller;
 	int			cmd_err;
diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c
index 6ce4313231257f..79c4b4ea053969 100644
--- a/drivers/i2c/busses/i2c-designware-platdrv.c
+++ b/drivers/i2c/busses/i2c-designware-platdrv.c
@@ -38,6 +38,7 @@
 #include <linux/pm_runtime.h>
 #include <linux/property.h>
 #include <linux/io.h>
+#include <linux/reset.h>
 #include <linux/slab.h>
 #include <linux/acpi.h>
 #include <linux/platform_data/i2c-designware.h>
@@ -199,6 +200,14 @@ static int dw_i2c_plat_probe(struct platform_device *pdev)
 	dev->irq = irq;
 	platform_set_drvdata(pdev, dev);
 
+	dev->rst = devm_reset_control_get_optional_exclusive(&pdev->dev, NULL);
+	if (IS_ERR(dev->rst)) {
+		if (PTR_ERR(dev->rst) == -EPROBE_DEFER)
+			return -EPROBE_DEFER;
+	} else {
+		reset_control_deassert(dev->rst);
+	}
+
 	if (pdata) {
 		dev->clk_freq = pdata->i2c_scl_freq;
 	} else {
@@ -235,12 +244,13 @@ static int dw_i2c_plat_probe(struct platform_device *pdev)
 	    && dev->clk_freq != 1000000 && dev->clk_freq != 3400000) {
 		dev_err(&pdev->dev,
 			"Only 100kHz, 400kHz, 1MHz and 3.4MHz supported");
-		return -EINVAL;
+		r = -EINVAL;
+		goto exit_reset;
 	}
 
 	r = i2c_dw_eval_lock_support(dev);
 	if (r)
-		return r;
+		goto exit_reset;
 
 	dev->functionality = I2C_FUNC_10BIT_ADDR | DW_IC_DEFAULT_FUNCTIONALITY;
 
@@ -286,10 +296,18 @@ static int dw_i2c_plat_probe(struct platform_device *pdev)
 	}
 
 	r = i2c_dw_probe(dev);
-	if (r && !dev->pm_runtime_disabled)
-		pm_runtime_disable(&pdev->dev);
+	if (r)
+		goto exit_probe;
 
 	return r;
+
+exit_probe:
+	if (!dev->pm_runtime_disabled)
+		pm_runtime_disable(&pdev->dev);
+exit_reset:
+	if (!IS_ERR_OR_NULL(dev->rst))
+		reset_control_assert(dev->rst);
+	return r;
 }
 
 static int dw_i2c_plat_remove(struct platform_device *pdev)
@@ -306,6 +324,8 @@ static int dw_i2c_plat_remove(struct platform_device *pdev)
 	pm_runtime_put_sync(&pdev->dev);
 	if (!dev->pm_runtime_disabled)
 		pm_runtime_disable(&pdev->dev);
+	if (!IS_ERR_OR_NULL(dev->rst))
+		reset_control_assert(dev->rst);
 
 	return 0;
 }

From 568af6de058cb2b0c5b98d98ffcf37cdc6bc38a7 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sat, 4 Mar 2017 19:53:47 +0100
Subject: [PATCH 0966/1911] netfilter: nf_tables: set pktinfo->thoff at AH
 header if found

Phil Sutter reports that IPv6 AH header matching is broken. From
userspace, nft generates bytecode that expects to find the AH header at
NFT_PAYLOAD_TRANSPORT_HEADER both for IPv4 and IPv6. However,
pktinfo->thoff is set to the inner header after the AH header in IPv6,
while in IPv4 pktinfo->thoff points to the AH header indeed. This
behaviour is inconsistent. This patch fixes this problem by updating
ipv6_find_hdr() to get the IP6_FH_F_AUTH flag so this function stops at
the AH header, so both IPv4 and IPv6 pktinfo->thoff point to the AH
header.

This is also inconsistent when trying to match encapsulated headers:

1) A packet that looks like IPv4 + AH + TCP dport 22 will *not* match.
2) A packet that looks like IPv6 + AH + TCP dport 22 will match.

Reported-by: Phil Sutter <phil@nwl.cc>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables_ipv6.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/include/net/netfilter/nf_tables_ipv6.h b/include/net/netfilter/nf_tables_ipv6.h
index d150b506620173..97983d1c05e4d3 100644
--- a/include/net/netfilter/nf_tables_ipv6.h
+++ b/include/net/netfilter/nf_tables_ipv6.h
@@ -9,12 +9,13 @@ nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt,
 		     struct sk_buff *skb,
 		     const struct nf_hook_state *state)
 {
+	unsigned int flags = IP6_FH_F_AUTH;
 	int protohdr, thoff = 0;
 	unsigned short frag_off;
 
 	nft_set_pktinfo(pkt, skb, state);
 
-	protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, NULL);
+	protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, &flags);
 	if (protohdr < 0) {
 		nft_set_pktinfo_proto_unspec(pkt, skb);
 		return;
@@ -32,6 +33,7 @@ __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt,
 				const struct nf_hook_state *state)
 {
 #if IS_ENABLED(CONFIG_IPV6)
+	unsigned int flags = IP6_FH_F_AUTH;
 	struct ipv6hdr *ip6h, _ip6h;
 	unsigned int thoff = 0;
 	unsigned short frag_off;
@@ -50,7 +52,7 @@ __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt,
 	if (pkt_len + sizeof(*ip6h) > skb->len)
 		return -1;
 
-	protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, NULL);
+	protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, &flags);
 	if (protohdr < 0)
 		return -1;
 

From b6f8fec4448aa52a8c36a392aa1ca2ea99acd460 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 8 Mar 2017 17:48:31 +0100
Subject: [PATCH 0967/1911] block: Allow bdi re-registration

SCSI can call device_add_disk() several times for one request queue when
a device in unbound and bound, creating new gendisk each time. This will
lead to bdi being repeatedly registered and unregistered. This was not a
big problem until commit 165a5e22fafb "block: Move bdi_unregister() to
del_gendisk()" since bdi was only registered repeatedly (bdi_register()
handles repeated calls fine, only we ended up leaking reference to
gendisk due to overwriting bdi->owner) but unregistered only in
blk_cleanup_queue() which didn't get called repeatedly. After
165a5e22fafb we were doing correct bdi_register() - bdi_unregister()
cycles however bdi_unregister() is not prepared for it. So make sure
bdi_unregister() cleans up bdi in such a way that it is prepared for
a possible following bdi_register() call.

An easy way to provoke this behavior is to enable
CONFIG_DEBUG_TEST_DRIVER_REMOVE and use scsi_debug driver to create a
scsi disk which immediately hangs without this fix.

Fixes: 165a5e22fafb127ecb5914e12e8c32a1f0d3f820
Signed-off-by: Jan Kara <jack@suse.cz>
Tested-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 mm/backing-dev.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 6d861d090e9fc7..6ac932210f563c 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -710,6 +710,11 @@ static void cgwb_bdi_destroy(struct backing_dev_info *bdi)
 	 */
 	atomic_dec(&bdi->usage_cnt);
 	wait_event(cgwb_release_wait, !atomic_read(&bdi->usage_cnt));
+	/*
+	 * Grab back our reference so that we hold it when @bdi gets
+	 * re-registered.
+	 */
+	atomic_inc(&bdi->usage_cnt);
 }
 
 /**
@@ -857,6 +862,8 @@ int bdi_register_owner(struct backing_dev_info *bdi, struct device *owner)
 			MINOR(owner->devt));
 	if (rc)
 		return rc;
+	/* Leaking owner reference... */
+	WARN_ON(bdi->owner);
 	bdi->owner = owner;
 	get_device(owner);
 	return 0;

From df23de55615fa7a190a85f49a950ccecdd9102f3 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 8 Mar 2017 17:48:32 +0100
Subject: [PATCH 0968/1911] bdi: Fix use-after-free in wb_congested_put()

bdi_writeback_congested structures get created for each blkcg and bdi
regardless whether bdi is registered or not. When they are created in
unregistered bdi and the request queue (and thus bdi) is then destroyed
while blkg still holds reference to bdi_writeback_congested structure,
this structure will be referencing freed bdi and last wb_congested_put()
will try to remove the structure from already freed bdi.

With commit 165a5e22fafb "block: Move bdi_unregister() to
del_gendisk()", SCSI started to destroy bdis without calling
bdi_unregister() first (previously it was calling bdi_unregister() even
for unregistered bdis) and thus the code detaching
bdi_writeback_congested in cgwb_bdi_destroy() was not triggered and we
started hitting this use-after-free bug. It is enough to boot a KVM
instance with virtio-scsi device to trigger this behavior.

Fix the problem by detaching bdi_writeback_congested structures in
bdi_exit() instead of bdi_unregister(). This is also more logical as
they can get attached to bdi regardless whether it ever got registered
or not.

Fixes: 165a5e22fafb127ecb5914e12e8c32a1f0d3f820
Signed-off-by: Jan Kara <jack@suse.cz>
Tested-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 mm/backing-dev.c | 36 +++++++++++++++++++++---------------
 1 file changed, 21 insertions(+), 15 deletions(-)

diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 6ac932210f563c..c6f2a37028c205 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -683,30 +683,18 @@ static int cgwb_bdi_init(struct backing_dev_info *bdi)
 static void cgwb_bdi_destroy(struct backing_dev_info *bdi)
 {
 	struct radix_tree_iter iter;
-	struct rb_node *rbn;
 	void **slot;
 
 	WARN_ON(test_bit(WB_registered, &bdi->wb.state));
 
 	spin_lock_irq(&cgwb_lock);
-
 	radix_tree_for_each_slot(slot, &bdi->cgwb_tree, &iter, 0)
 		cgwb_kill(*slot);
-
-	while ((rbn = rb_first(&bdi->cgwb_congested_tree))) {
-		struct bdi_writeback_congested *congested =
-			rb_entry(rbn, struct bdi_writeback_congested, rb_node);
-
-		rb_erase(rbn, &bdi->cgwb_congested_tree);
-		congested->bdi = NULL;	/* mark @congested unlinked */
-	}
-
 	spin_unlock_irq(&cgwb_lock);
 
 	/*
-	 * All cgwb's and their congested states must be shutdown and
-	 * released before returning.  Drain the usage counter to wait for
-	 * all cgwb's and cgwb_congested's ever created on @bdi.
+	 * All cgwb's must be shutdown and released before returning.  Drain
+	 * the usage counter to wait for all cgwb's ever created on @bdi.
 	 */
 	atomic_dec(&bdi->usage_cnt);
 	wait_event(cgwb_release_wait, !atomic_read(&bdi->usage_cnt));
@@ -754,6 +742,21 @@ void wb_blkcg_offline(struct blkcg *blkcg)
 	spin_unlock_irq(&cgwb_lock);
 }
 
+static void cgwb_bdi_exit(struct backing_dev_info *bdi)
+{
+	struct rb_node *rbn;
+
+	spin_lock_irq(&cgwb_lock);
+	while ((rbn = rb_first(&bdi->cgwb_congested_tree))) {
+		struct bdi_writeback_congested *congested =
+			rb_entry(rbn, struct bdi_writeback_congested, rb_node);
+
+		rb_erase(rbn, &bdi->cgwb_congested_tree);
+		congested->bdi = NULL;	/* mark @congested unlinked */
+	}
+	spin_unlock_irq(&cgwb_lock);
+}
+
 #else	/* CONFIG_CGROUP_WRITEBACK */
 
 static int cgwb_bdi_init(struct backing_dev_info *bdi)
@@ -774,7 +777,9 @@ static int cgwb_bdi_init(struct backing_dev_info *bdi)
 	return 0;
 }
 
-static void cgwb_bdi_destroy(struct backing_dev_info *bdi)
+static void cgwb_bdi_destroy(struct backing_dev_info *bdi) { }
+
+static void cgwb_bdi_exit(struct backing_dev_info *bdi)
 {
 	wb_congested_put(bdi->wb_congested);
 }
@@ -905,6 +910,7 @@ static void bdi_exit(struct backing_dev_info *bdi)
 {
 	WARN_ON_ONCE(bdi->dev);
 	wb_exit(&bdi->wb);
+	cgwb_bdi_exit(bdi);
 }
 
 static void release_bdi(struct kref *ref)

From 90f16fddcc2802726142b8386c65ccb89f044613 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 8 Mar 2017 17:48:33 +0100
Subject: [PATCH 0969/1911] block: Make del_gendisk() safer for disks without
 queues

Commit 165a5e22fafb "block: Move bdi_unregister() to del_gendisk()"
added disk->queue dereference to del_gendisk(). Although del_gendisk()
is not supposed to be called without disk->queue valid and
blk_unregister_queue() warns in that case, this change will make it oops
instead. Return to the old more robust behavior of just warning when
del_gendisk() gets called for gendisk with disk->queue being NULL.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Tested-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/genhd.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/block/genhd.c b/block/genhd.c
index b26a5ea115d00b..94f323842b5274 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -681,12 +681,16 @@ void del_gendisk(struct gendisk *disk)
 	disk->flags &= ~GENHD_FL_UP;
 
 	sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi");
-	/*
-	 * Unregister bdi before releasing device numbers (as they can get
-	 * reused and we'd get clashes in sysfs).
-	 */
-	bdi_unregister(disk->queue->backing_dev_info);
-	blk_unregister_queue(disk);
+	if (disk->queue) {
+		/*
+		 * Unregister bdi before releasing device numbers (as they can
+		 * get reused and we'd get clashes in sysfs).
+		 */
+		bdi_unregister(disk->queue->backing_dev_info);
+		blk_unregister_queue(disk);
+	} else {
+		WARN_ON(1);
+	}
 	blk_unregister_region(disk_devt(disk), disk->minors);
 
 	part_stat_set_all(&disk->part0, 0);

From c01228db4ba965986511a5b28c478bddd7e2726e Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 8 Mar 2017 17:48:34 +0100
Subject: [PATCH 0970/1911] Revert "scsi, block: fix duplicate bdi name
 registration crashes"

This reverts commit 0dba1314d4f81115dce711292ec7981d17231064. It causes
leaking of device numbers for SCSI when SCSI registers multiple gendisks
for one request_queue in succession. It can be easily reproduced using
Omar's script [1] on kernel with CONFIG_DEBUG_TEST_DRIVER_REMOVE.
Furthermore the protection provided by this commit is not needed anymore
as the problem it was fixing got also fixed by commit 165a5e22fafb
"block: Move bdi_unregister() to del_gendisk()".

[1]: http://marc.info/?l=linux-block&m=148554717109098&w=2

Signed-off-by: Jan Kara <jack@suse.cz>
Acked-by: Dan Williams <dan.j.williams@intel.com>
Tested-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-core.c       |  2 --
 block/genhd.c          | 21 ---------------------
 drivers/scsi/sd.c      | 41 ++++++++---------------------------------
 include/linux/blkdev.h |  1 -
 include/linux/genhd.h  |  8 --------
 5 files changed, 8 insertions(+), 65 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 1086dac8724c99..a76895c9776d9c 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -578,8 +578,6 @@ void blk_cleanup_queue(struct request_queue *q)
 		q->queue_lock = &q->__queue_lock;
 	spin_unlock_irq(lock);
 
-	put_disk_devt(q->disk_devt);
-
 	/* @q is and will stay empty, shutdown and put */
 	blk_put_queue(q);
 }
diff --git a/block/genhd.c b/block/genhd.c
index 94f323842b5274..a9c516a8b37dbc 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -572,20 +572,6 @@ static void register_disk(struct device *parent, struct gendisk *disk)
 	disk_part_iter_exit(&piter);
 }
 
-void put_disk_devt(struct disk_devt *disk_devt)
-{
-	if (disk_devt && atomic_dec_and_test(&disk_devt->count))
-		disk_devt->release(disk_devt);
-}
-EXPORT_SYMBOL(put_disk_devt);
-
-void get_disk_devt(struct disk_devt *disk_devt)
-{
-	if (disk_devt)
-		atomic_inc(&disk_devt->count);
-}
-EXPORT_SYMBOL(get_disk_devt);
-
 /**
  * device_add_disk - add partitioning information to kernel list
  * @parent: parent device for the disk
@@ -626,13 +612,6 @@ void device_add_disk(struct device *parent, struct gendisk *disk)
 
 	disk_alloc_events(disk);
 
-	/*
-	 * Take a reference on the devt and assign it to queue since it
-	 * must not be reallocated while the bdi is registered
-	 */
-	disk->queue->disk_devt = disk->disk_devt;
-	get_disk_devt(disk->disk_devt);
-
 	/* Register BDI before referencing it from bdev */
 	bdi = disk->queue->backing_dev_info;
 	bdi_register_owner(bdi, disk_to_dev(disk));
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index c7839f6c35ccc4..d277e8620e3e39 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -3075,23 +3075,6 @@ static void sd_probe_async(void *data, async_cookie_t cookie)
 	put_device(&sdkp->dev);
 }
 
-struct sd_devt {
-	int idx;
-	struct disk_devt disk_devt;
-};
-
-static void sd_devt_release(struct disk_devt *disk_devt)
-{
-	struct sd_devt *sd_devt = container_of(disk_devt, struct sd_devt,
-			disk_devt);
-
-	spin_lock(&sd_index_lock);
-	ida_remove(&sd_index_ida, sd_devt->idx);
-	spin_unlock(&sd_index_lock);
-
-	kfree(sd_devt);
-}
-
 /**
  *	sd_probe - called during driver initialization and whenever a
  *	new scsi device is attached to the system. It is called once
@@ -3113,7 +3096,6 @@ static void sd_devt_release(struct disk_devt *disk_devt)
 static int sd_probe(struct device *dev)
 {
 	struct scsi_device *sdp = to_scsi_device(dev);
-	struct sd_devt *sd_devt;
 	struct scsi_disk *sdkp;
 	struct gendisk *gd;
 	int index;
@@ -3139,13 +3121,9 @@ static int sd_probe(struct device *dev)
 	if (!sdkp)
 		goto out;
 
-	sd_devt = kzalloc(sizeof(*sd_devt), GFP_KERNEL);
-	if (!sd_devt)
-		goto out_free;
-
 	gd = alloc_disk(SD_MINORS);
 	if (!gd)
-		goto out_free_devt;
+		goto out_free;
 
 	do {
 		if (!ida_pre_get(&sd_index_ida, GFP_KERNEL))
@@ -3161,11 +3139,6 @@ static int sd_probe(struct device *dev)
 		goto out_put;
 	}
 
-	atomic_set(&sd_devt->disk_devt.count, 1);
-	sd_devt->disk_devt.release = sd_devt_release;
-	sd_devt->idx = index;
-	gd->disk_devt = &sd_devt->disk_devt;
-
 	error = sd_format_disk_name("sd", index, gd->disk_name, DISK_NAME_LEN);
 	if (error) {
 		sdev_printk(KERN_WARNING, sdp, "SCSI disk (sd) name length exceeded.\n");
@@ -3205,12 +3178,11 @@ static int sd_probe(struct device *dev)
 	return 0;
 
  out_free_index:
-	put_disk_devt(&sd_devt->disk_devt);
-	sd_devt = NULL;
+	spin_lock(&sd_index_lock);
+	ida_remove(&sd_index_ida, index);
+	spin_unlock(&sd_index_lock);
  out_put:
 	put_disk(gd);
- out_free_devt:
-	kfree(sd_devt);
  out_free:
 	kfree(sdkp);
  out:
@@ -3271,7 +3243,10 @@ static void scsi_disk_release(struct device *dev)
 	struct scsi_disk *sdkp = to_scsi_disk(dev);
 	struct gendisk *disk = sdkp->disk;
 	
-	put_disk_devt(disk->disk_devt);
+	spin_lock(&sd_index_lock);
+	ida_remove(&sd_index_ida, sdkp->index);
+	spin_unlock(&sd_index_lock);
+
 	disk->private_data = NULL;
 	put_disk(disk);
 	put_device(&sdkp->device->sdev_gendev);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 796016e63c1da7..5a7da607ca045f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -435,7 +435,6 @@ struct request_queue {
 	struct delayed_work	delay_work;
 
 	struct backing_dev_info	*backing_dev_info;
-	struct disk_devt	*disk_devt;
 
 	/*
 	 * The queue owner gets to use this for whatever they like.
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index a999d281a2f1e4..76f39754e7b029 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -167,13 +167,6 @@ struct blk_integrity {
 };
 
 #endif	/* CONFIG_BLK_DEV_INTEGRITY */
-struct disk_devt {
-	atomic_t count;
-	void (*release)(struct disk_devt *disk_devt);
-};
-
-void put_disk_devt(struct disk_devt *disk_devt);
-void get_disk_devt(struct disk_devt *disk_devt);
 
 struct gendisk {
 	/* major, first_minor and minors are input parameters only,
@@ -183,7 +176,6 @@ struct gendisk {
 	int first_minor;
 	int minors;                     /* maximum number of minors, =1 for
                                          * disks that can't be partitioned. */
-	struct disk_devt *disk_devt;
 
 	char disk_name[DISK_NAME_LEN];	/* name of major driver */
 	char *(*devnode)(struct gendisk *gd, umode_t *mode);

From 79bd99596b7305ab08109a8bf44a6a4511dbf1cd Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Wed, 8 Mar 2017 07:38:05 +1100
Subject: [PATCH 0971/1911] blk: improve order of bio handling in
 generic_make_request()

To avoid recursion on the kernel stack when stacked block devices
are in use, generic_make_request() will, when called recursively,
queue new requests for later handling.  They will be handled when the
make_request_fn for the current bio completes.

If any bios are submitted by a make_request_fn, these will ultimately
be handled seqeuntially.  If the handling of one of those generates
further requests, they will be added to the end of the queue.

This strict first-in-first-out behaviour can lead to deadlocks in
various ways, normally because a request might need to wait for a
previous request to the same device to complete.  This can happen when
they share a mempool, and can happen due to interdependencies
particular to the device.  Both md and dm have examples where this happens.

These deadlocks can be erradicated by more selective ordering of bios.
Specifically by handling them in depth-first order.  That is: when the
handling of one bio generates one or more further bios, they are
handled immediately after the parent, before any siblings of the
parent.  That way, when generic_make_request() calls make_request_fn
for some particular device, we can be certain that all previously
submited requests for that device have been completely handled and are
not waiting for anything in the queue of requests maintained in
generic_make_request().

An easy way to achieve this would be to use a last-in-first-out stack
instead of a queue.  However this will change the order of consecutive
bios submitted by a make_request_fn, which could have unexpected consequences.
Instead we take a slightly more complex approach.
A fresh queue is created for each call to a make_request_fn.  After it completes,
any bios for a different device are placed on the front of the main queue, followed
by any bios for the same device, followed by all bios that were already on
the queue before the make_request_fn was called.
This provides the depth-first approach without reordering bios on the same level.

This, by itself, it not enough to remove all deadlocks.  It just makes
it possible for drivers to take the extra step required themselves.

To avoid deadlocks, drivers must never risk waiting for a request
after submitting one to generic_make_request.  This includes never
allocing from a mempool twice in the one call to a make_request_fn.

A common pattern in drivers is to call bio_split() in a loop, handling
the first part and then looping around to possibly split the next part.
Instead, a driver that finds it needs to split a bio should queue
(with generic_make_request) the second part, handle the first part,
and then return.  The new code in generic_make_request will ensure the
requests to underlying bios are processed first, then the second bio
that was split off.  If it splits again, the same process happens.  In
each case one bio will be completely handled before the next one is attempted.

With this is place, it should be possible to disable the
punt_bios_to_recover() recovery thread for many block devices, and
eventually it may be possible to remove it completely.

Ref: http://www.spinics.net/lists/raid/msg54680.html
Tested-by: Jinpu Wang <jinpu.wang@profitbricks.com>
Inspired-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-core.c | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index a76895c9776d9c..0eeb99ef654f4a 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2015,17 +2015,34 @@ blk_qc_t generic_make_request(struct bio *bio)
 		struct request_queue *q = bdev_get_queue(bio->bi_bdev);
 
 		if (likely(blk_queue_enter(q, false) == 0)) {
+			struct bio_list hold;
+			struct bio_list lower, same;
+
+			/* Create a fresh bio_list for all subordinate requests */
+			hold = bio_list_on_stack;
+			bio_list_init(&bio_list_on_stack);
 			ret = q->make_request_fn(q, bio);
 
 			blk_queue_exit(q);
 
-			bio = bio_list_pop(current->bio_list);
+			/* sort new bios into those for a lower level
+			 * and those for the same level
+			 */
+			bio_list_init(&lower);
+			bio_list_init(&same);
+			while ((bio = bio_list_pop(&bio_list_on_stack)) != NULL)
+				if (q == bdev_get_queue(bio->bi_bdev))
+					bio_list_add(&same, bio);
+				else
+					bio_list_add(&lower, bio);
+			/* now assemble so we handle the lowest level first */
+			bio_list_merge(&bio_list_on_stack, &lower);
+			bio_list_merge(&bio_list_on_stack, &same);
+			bio_list_merge(&bio_list_on_stack, &hold);
 		} else {
-			struct bio *bio_next = bio_list_pop(current->bio_list);
-
 			bio_io_error(bio);
-			bio = bio_next;
 		}
+		bio = bio_list_pop(current->bio_list);
 	} while (bio);
 	current->bio_list = NULL; /* deactivate */
 

From 672a2c87c83649fb0167202342ce85af9a3b4f1c Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 8 Mar 2017 14:56:05 +0100
Subject: [PATCH 0972/1911] axonram: Fix gendisk handling

It is invalid to call del_gendisk() when disk->queue is NULL. Fix error
handling in axon_ram_probe() to avoid doing that.

Also del_gendisk() does not drop a reference to gendisk allocated by
alloc_disk(). That has to be done by put_disk(). Add that call where
needed.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 arch/powerpc/sysdev/axonram.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c
index ada29eaed6e280..f523ac88315070 100644
--- a/arch/powerpc/sysdev/axonram.c
+++ b/arch/powerpc/sysdev/axonram.c
@@ -274,7 +274,9 @@ static int axon_ram_probe(struct platform_device *device)
 			if (bank->disk->major > 0)
 				unregister_blkdev(bank->disk->major,
 						bank->disk->disk_name);
-			del_gendisk(bank->disk);
+			if (bank->disk->flags & GENHD_FL_UP)
+				del_gendisk(bank->disk);
+			put_disk(bank->disk);
 		}
 		device->dev.platform_data = NULL;
 		if (bank->io_addr != 0)
@@ -299,6 +301,7 @@ axon_ram_remove(struct platform_device *device)
 	device_remove_file(&device->dev, &dev_attr_ecc);
 	free_irq(bank->irq_id, device);
 	del_gendisk(bank->disk);
+	put_disk(bank->disk);
 	iounmap((void __iomem *) bank->io_addr);
 	kfree(bank);
 

From f65e6fad293b3a5793b7fa2044800506490e7a2e Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 8 Mar 2017 09:58:08 -0800
Subject: [PATCH 0973/1911] xfs: use iomap new flag for newly allocated
 delalloc blocks

Commit fa7f138 ("xfs: clear delalloc and cache on buffered write
failure") fixed one regression in the iomap error handling code and
exposed another. The fundamental problem is that if a buffered write
is a rewrite of preexisting delalloc blocks and the write fails, the
failure handling code can punch out preexisting blocks with valid
file data.

This was reproduced directly by sub-block writes in the LTP
kernel/syscalls/write/write03 test. A first 100 byte write allocates
a single block in a file. A subsequent 100 byte write fails and
punches out the block, including the data successfully written by
the previous write.

To address this problem, update the ->iomap_begin() handler to
distinguish newly allocated delalloc blocks from preexisting
delalloc blocks via the IOMAP_F_NEW flag. Use this flag in the
->iomap_end() handler to decide when a failed or short write should
punch out delalloc blocks.

This introduces the subtle requirement that ->iomap_begin() should
never combine newly allocated delalloc blocks with existing blocks
in the resulting iomap descriptor. This can occur when a new
delalloc reservation merges with a neighboring extent that is part
of the current write, for example. Therefore, drop the
post-allocation extent lookup from xfs_bmapi_reserve_delalloc() and
just return the record inserted into the fork. This ensures only new
blocks are returned and thus that preexisting delalloc blocks are
always handled as "found" blocks and not punched out on a failed
rewrite.

Reported-by: Xiong Zhou <xzhou@redhat.com>
Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_bmap.c | 24 ++++++++++++++----------
 fs/xfs/xfs_iomap.c       | 25 ++++++++++++++++++-------
 2 files changed, 32 insertions(+), 17 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index a9c66d47757a75..bfa59a1a2d0981 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -4150,6 +4150,19 @@ xfs_bmapi_read(
 	return 0;
 }
 
+/*
+ * Add a delayed allocation extent to an inode. Blocks are reserved from the
+ * global pool and the extent inserted into the inode in-core extent tree.
+ *
+ * On entry, got refers to the first extent beyond the offset of the extent to
+ * allocate or eof is specified if no such extent exists. On return, got refers
+ * to the extent record that was inserted to the inode fork.
+ *
+ * Note that the allocated extent may have been merged with contiguous extents
+ * during insertion into the inode fork. Thus, got does not reflect the current
+ * state of the inode fork on return. If necessary, the caller can use lastx to
+ * look up the updated record in the inode fork.
+ */
 int
 xfs_bmapi_reserve_delalloc(
 	struct xfs_inode	*ip,
@@ -4236,13 +4249,8 @@ xfs_bmapi_reserve_delalloc(
 	got->br_startblock = nullstartblock(indlen);
 	got->br_blockcount = alen;
 	got->br_state = XFS_EXT_NORM;
-	xfs_bmap_add_extent_hole_delay(ip, whichfork, lastx, got);
 
-	/*
-	 * Update our extent pointer, given that xfs_bmap_add_extent_hole_delay
-	 * might have merged it into one of the neighbouring ones.
-	 */
-	xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *lastx), got);
+	xfs_bmap_add_extent_hole_delay(ip, whichfork, lastx, got);
 
 	/*
 	 * Tag the inode if blocks were preallocated. Note that COW fork
@@ -4254,10 +4262,6 @@ xfs_bmapi_reserve_delalloc(
 	if (whichfork == XFS_COW_FORK && (prealloc || aoff < off || alen > len))
 		xfs_inode_set_cowblocks_tag(ip);
 
-	ASSERT(got->br_startoff <= aoff);
-	ASSERT(got->br_startoff + got->br_blockcount >= aoff + alen);
-	ASSERT(isnullstartblock(got->br_startblock));
-	ASSERT(got->br_state == XFS_EXT_NORM);
 	return 0;
 
 out_unreserve_blocks:
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 41662fb14e87d8..288ee5b840d738 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -630,6 +630,11 @@ xfs_file_iomap_begin_delay(
 		goto out_unlock;
 	}
 
+	/*
+	 * Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch
+	 * them out if the write happens to fail.
+	 */
+	iomap->flags = IOMAP_F_NEW;
 	trace_xfs_iomap_alloc(ip, offset, count, 0, &got);
 done:
 	if (isnullstartblock(got.br_startblock))
@@ -1071,16 +1076,22 @@ xfs_file_iomap_end_delalloc(
 	struct xfs_inode	*ip,
 	loff_t			offset,
 	loff_t			length,
-	ssize_t			written)
+	ssize_t			written,
+	struct iomap		*iomap)
 {
 	struct xfs_mount	*mp = ip->i_mount;
 	xfs_fileoff_t		start_fsb;
 	xfs_fileoff_t		end_fsb;
 	int			error = 0;
 
-	/* behave as if the write failed if drop writes is enabled */
-	if (xfs_mp_drop_writes(mp))
+	/*
+	 * Behave as if the write failed if drop writes is enabled. Set the NEW
+	 * flag to force delalloc cleanup.
+	 */
+	if (xfs_mp_drop_writes(mp)) {
+		iomap->flags |= IOMAP_F_NEW;
 		written = 0;
+	}
 
 	/*
 	 * start_fsb refers to the first unused block after a short write. If
@@ -1094,14 +1105,14 @@ xfs_file_iomap_end_delalloc(
 	end_fsb = XFS_B_TO_FSB(mp, offset + length);
 
 	/*
-	 * Trim back delalloc blocks if we didn't manage to write the whole
-	 * range reserved.
+	 * Trim delalloc blocks if they were allocated by this write and we
+	 * didn't manage to write the whole range.
 	 *
 	 * We don't need to care about racing delalloc as we hold i_mutex
 	 * across the reserve/allocate/unreserve calls. If there are delalloc
 	 * blocks in the range, they are ours.
 	 */
-	if (start_fsb < end_fsb) {
+	if ((iomap->flags & IOMAP_F_NEW) && start_fsb < end_fsb) {
 		truncate_pagecache_range(VFS_I(ip), XFS_FSB_TO_B(mp, start_fsb),
 					 XFS_FSB_TO_B(mp, end_fsb) - 1);
 
@@ -1131,7 +1142,7 @@ xfs_file_iomap_end(
 {
 	if ((flags & IOMAP_WRITE) && iomap->type == IOMAP_DELALLOC)
 		return xfs_file_iomap_end_delalloc(XFS_I(inode), offset,
-				length, written);
+				length, written, iomap);
 	return 0;
 }
 

From bd0f9b356d00aa241ced36fb075a07041c28d3b8 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Tue, 7 Mar 2017 15:33:14 -0800
Subject: [PATCH 0974/1911] sched/headers: fix up header file dependency on
 <linux/sched/signal.h>

The scheduler header file split and cleanups ended up exposing a few
nasty header file dependencies, and in particular it showed how we in
<linux/wait.h> ended up depending on "signal_pending()", which now comes
from <linux/sched/signal.h>.

That's a very subtle and annoying dependency, which already caused a
semantic merge conflict (see commit e58bc927835a "Pull overlayfs updates
from Miklos Szeredi", which added that fixup in the merge commit).

It turns out that we can avoid this dependency _and_ improve code
generation by moving the guts of the fairly nasty helper #define
__wait_event_interruptible_locked() to out-of-line code.  The code that
includes the signal_pending() check is all in the slow-path where we
actually go to sleep waiting for the event anyway, so using a helper
function is the right thing to do.

Using a helper function is also what we already did for the non-locked
versions, see the "__wait_event*()" macros and the "prepare_to_wait*()"
set of helper functions.

We might want to try to unify all these macro games, we have a _lot_ of
subtly different wait-event loops.  But this is the minimal patch to fix
the annoying header dependency.

Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/wait.h | 31 ++++++++++---------------------
 kernel/sched/wait.c  | 39 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 49 insertions(+), 21 deletions(-)

diff --git a/include/linux/wait.h b/include/linux/wait.h
index aacb1282d19a38..db076ca7f11da0 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -620,30 +620,19 @@ do {									\
 	__ret;								\
 })
 
+extern int do_wait_intr(wait_queue_head_t *, wait_queue_t *);
+extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_t *);
 
-#define __wait_event_interruptible_locked(wq, condition, exclusive, irq) \
+#define __wait_event_interruptible_locked(wq, condition, exclusive, fn) \
 ({									\
-	int __ret = 0;							\
+	int __ret;							\
 	DEFINE_WAIT(__wait);						\
 	if (exclusive)							\
 		__wait.flags |= WQ_FLAG_EXCLUSIVE;			\
 	do {								\
-		if (likely(list_empty(&__wait.task_list)))		\
-			__add_wait_queue_tail(&(wq), &__wait);		\
-		set_current_state(TASK_INTERRUPTIBLE);			\
-		if (signal_pending(current)) {				\
-			__ret = -ERESTARTSYS;				\
+		__ret = fn(&(wq), &__wait);				\
+		if (__ret)						\
 			break;						\
-		}							\
-		if (irq)						\
-			spin_unlock_irq(&(wq).lock);			\
-		else							\
-			spin_unlock(&(wq).lock);			\
-		schedule();						\
-		if (irq)						\
-			spin_lock_irq(&(wq).lock);			\
-		else							\
-			spin_lock(&(wq).lock);				\
 	} while (!(condition));						\
 	__remove_wait_queue(&(wq), &__wait);				\
 	__set_current_state(TASK_RUNNING);				\
@@ -676,7 +665,7 @@ do {									\
  */
 #define wait_event_interruptible_locked(wq, condition)			\
 	((condition)							\
-	 ? 0 : __wait_event_interruptible_locked(wq, condition, 0, 0))
+	 ? 0 : __wait_event_interruptible_locked(wq, condition, 0, do_wait_intr))
 
 /**
  * wait_event_interruptible_locked_irq - sleep until a condition gets true
@@ -703,7 +692,7 @@ do {									\
  */
 #define wait_event_interruptible_locked_irq(wq, condition)		\
 	((condition)							\
-	 ? 0 : __wait_event_interruptible_locked(wq, condition, 0, 1))
+	 ? 0 : __wait_event_interruptible_locked(wq, condition, 0, do_wait_intr_irq))
 
 /**
  * wait_event_interruptible_exclusive_locked - sleep exclusively until a condition gets true
@@ -734,7 +723,7 @@ do {									\
  */
 #define wait_event_interruptible_exclusive_locked(wq, condition)	\
 	((condition)							\
-	 ? 0 : __wait_event_interruptible_locked(wq, condition, 1, 0))
+	 ? 0 : __wait_event_interruptible_locked(wq, condition, 1, do_wait_intr))
 
 /**
  * wait_event_interruptible_exclusive_locked_irq - sleep until a condition gets true
@@ -765,7 +754,7 @@ do {									\
  */
 #define wait_event_interruptible_exclusive_locked_irq(wq, condition)	\
 	((condition)							\
-	 ? 0 : __wait_event_interruptible_locked(wq, condition, 1, 1))
+	 ? 0 : __wait_event_interruptible_locked(wq, condition, 1, do_wait_intr_irq))
 
 
 #define __wait_event_killable(wq, condition)				\
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index 4d2ea6f2556838..b8c84c6dee64bd 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -242,6 +242,45 @@ long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_t *wait, int state)
 }
 EXPORT_SYMBOL(prepare_to_wait_event);
 
+/*
+ * Note! These two wait functions are entered with the
+ * wait-queue lock held (and interrupts off in the _irq
+ * case), so there is no race with testing the wakeup
+ * condition in the caller before they add the wait
+ * entry to the wake queue.
+ */
+int do_wait_intr(wait_queue_head_t *wq, wait_queue_t *wait)
+{
+	if (likely(list_empty(&wait->task_list)))
+		__add_wait_queue_tail(wq, wait);
+
+	set_current_state(TASK_INTERRUPTIBLE);
+	if (signal_pending(current))
+		return -ERESTARTSYS;
+
+	spin_unlock(&wq->lock);
+	schedule();
+	spin_lock(&wq->lock);
+	return 0;
+}
+EXPORT_SYMBOL(do_wait_intr);
+
+int do_wait_intr_irq(wait_queue_head_t *wq, wait_queue_t *wait)
+{
+	if (likely(list_empty(&wait->task_list)))
+		__add_wait_queue_tail(wq, wait);
+
+	set_current_state(TASK_INTERRUPTIBLE);
+	if (signal_pending(current))
+		return -ERESTARTSYS;
+
+	spin_unlock_irq(&wq->lock);
+	schedule();
+	spin_lock_irq(&wq->lock);
+	return 0;
+}
+EXPORT_SYMBOL(do_wait_intr_irq);
+
 /**
  * finish_wait - clean up after waiting in a queue
  * @q: waitqueue waited on

From 2fcc319d2467a5f5b78f35f79fd6e22741a31b1e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 8 Mar 2017 10:38:53 -0800
Subject: [PATCH 0975/1911] xfs: try any AG when allocating the first btree
 block when reflinking

When a reflink operation causes the bmap code to allocate a btree block
we're currently doing single-AG allocations due to having ->firstblock
set and then try any higher AG due a little reflink quirk we've put in
when adding the reflink code.  But given that we do not have a minleft
reservation of any kind in this AG we can still not have any space in
the same or higher AG even if the file system has enough free space.
To fix this use a XFS_ALLOCTYPE_FIRST_AG allocation in this fall back
path instead.

[And yes, we need to redo this properly instead of piling hacks over
 hacks.  I'm working on that, but it's not going to be a small series.
 In the meantime this fixes the customer reported issue]

Also add a warning for failing allocations to make it easier to debug.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_bmap.c       | 10 +++++++---
 fs/xfs/libxfs/xfs_bmap_btree.c |  6 +++---
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index bfa59a1a2d0981..9bd104f3290896 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -763,8 +763,8 @@ xfs_bmap_extents_to_btree(
 		args.type = XFS_ALLOCTYPE_START_BNO;
 		args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino);
 	} else if (dfops->dop_low) {
-try_another_ag:
 		args.type = XFS_ALLOCTYPE_START_BNO;
+try_another_ag:
 		args.fsbno = *firstblock;
 	} else {
 		args.type = XFS_ALLOCTYPE_NEAR_BNO;
@@ -790,13 +790,17 @@ xfs_bmap_extents_to_btree(
 	if (xfs_sb_version_hasreflink(&cur->bc_mp->m_sb) &&
 	    args.fsbno == NULLFSBLOCK &&
 	    args.type == XFS_ALLOCTYPE_NEAR_BNO) {
-		dfops->dop_low = true;
+		args.type = XFS_ALLOCTYPE_FIRST_AG;
 		goto try_another_ag;
 	}
+	if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) {
+		xfs_iroot_realloc(ip, -1, whichfork);
+		xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+		return -ENOSPC;
+	}
 	/*
 	 * Allocation can't fail, the space was reserved.
 	 */
-	ASSERT(args.fsbno != NULLFSBLOCK);
 	ASSERT(*firstblock == NULLFSBLOCK ||
 	       args.agno >= XFS_FSB_TO_AGNO(mp, *firstblock));
 	*firstblock = cur->bc_private.b.firstblock = args.fsbno;
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index f93072b58a5832..fd55db47938562 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -447,8 +447,8 @@ xfs_bmbt_alloc_block(
 
 	if (args.fsbno == NULLFSBLOCK) {
 		args.fsbno = be64_to_cpu(start->l);
-try_another_ag:
 		args.type = XFS_ALLOCTYPE_START_BNO;
+try_another_ag:
 		/*
 		 * Make sure there is sufficient room left in the AG to
 		 * complete a full tree split for an extent insert.  If
@@ -488,8 +488,8 @@ xfs_bmbt_alloc_block(
 	if (xfs_sb_version_hasreflink(&cur->bc_mp->m_sb) &&
 	    args.fsbno == NULLFSBLOCK &&
 	    args.type == XFS_ALLOCTYPE_NEAR_BNO) {
-		cur->bc_private.b.dfops->dop_low = true;
 		args.fsbno = cur->bc_private.b.firstblock;
+		args.type = XFS_ALLOCTYPE_FIRST_AG;
 		goto try_another_ag;
 	}
 
@@ -506,7 +506,7 @@ xfs_bmbt_alloc_block(
 			goto error0;
 		cur->bc_private.b.dfops->dop_low = true;
 	}
-	if (args.fsbno == NULLFSBLOCK) {
+	if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) {
 		XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
 		*stat = 0;
 		return 0;

From 04bb94b13c02e9dbc92d622cddf88937127bd7ed Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 8 Mar 2017 10:42:13 -0800
Subject: [PATCH 0976/1911] overlayfs: remove now unnecessary header file
 include

This removes the extra include header file that was added in commit
e58bc927835a "Pull overlayfs updates from Miklos Szeredi" now that it
is no longer needed.

There are probably other such includes that got added during the
scheduler header splitup series, but this is the one that annoyed me
personally and I know about.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/overlayfs/util.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 1953986ee6bc22..6e610a205e1556 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -12,7 +12,6 @@
 #include <linux/slab.h>
 #include <linux/cred.h>
 #include <linux/xattr.h>
-#include <linux/sched/signal.h>
 #include "overlayfs.h"
 #include "ovl_entry.h"
 

From b4fb8f66f1ae2e167d06c12d018025a8d4d3ba7e Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Wed, 8 Mar 2017 09:35:39 -0800
Subject: [PATCH 0977/1911] mm, page_alloc: Add missing check for memory holes

Commit 13ad59df67f1 ("mm, page_alloc: avoid page_to_pfn() when merging
buddies") moved the check for memory holes out of page_is_buddy() and
had the callers do the check.

But this wasn't done correctly in one place which caused ia64 to crash
very early in boot.

Update to fix that and make ia64 boot again.

[ v2: Vlastimil pointed out we don't need to call page_to_pfn()
      since we already have the result of that in "buddy_pfn" ]

Fixes: 13ad59df67f1 ("avoid page_to_pfn() when merging buddies")
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page_alloc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index eaa64d2ffdc553..6cbde310abed8d 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -873,7 +873,8 @@ static inline void __free_one_page(struct page *page,
 		higher_page = page + (combined_pfn - pfn);
 		buddy_pfn = __find_buddy_pfn(combined_pfn, order + 1);
 		higher_buddy = higher_page + (buddy_pfn - combined_pfn);
-		if (page_is_buddy(higher_page, higher_buddy, order + 1)) {
+		if (pfn_valid_within(buddy_pfn) &&
+		    page_is_buddy(higher_page, higher_buddy, order + 1)) {
 			list_add_tail(&page->lru,
 				&zone->free_area[order].free_list[migratetype]);
 			goto out;

From c085bd5119d5d0bdf3ef591a5563566be7dedced Mon Sep 17 00:00:00 2001
From: Jim Qu <Jim.Qu@amd.com>
Date: Wed, 1 Mar 2017 15:53:29 +0800
Subject: [PATCH 0978/1911] drm/amd/amdgpu: fix console deadlock if late init
 failed

Signed-off-by: Jim Qu <Jim.Qu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 6abb238b25c97e..4120b351a8e5cc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2094,8 +2094,11 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
 	}
 
 	r = amdgpu_late_init(adev);
-	if (r)
+	if (r) {
+		if (fbcon)
+			console_unlock();
 		return r;
+	}
 
 	/* pin cursors */
 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {

From 67b0503db9c29b04eadfeede6bebbfe5ddad94ef Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stefan=20Br=C3=BCns?= <stefan.bruens@rwth-aachen.de>
Date: Sun, 12 Feb 2017 13:02:13 -0200
Subject: [PATCH 0979/1911] [media] dvb-usb-firmware: don't do DMA on stack
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The buffer allocation for the firmware data was changed in
commit 43fab9793c1f ("[media] dvb-usb: don't use stack for firmware load")
but the same applies for the reset value.

Fixes: 43fab9793c1f ("[media] dvb-usb: don't use stack for firmware load")
Cc: stable@vger.kernel.org
Signed-off-by: Stefan Brüns <stefan.bruens@rwth-aachen.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
---
 drivers/media/usb/dvb-usb/dvb-usb-firmware.c | 22 +++++++++++---------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/drivers/media/usb/dvb-usb/dvb-usb-firmware.c b/drivers/media/usb/dvb-usb/dvb-usb-firmware.c
index ab9866024ec798..04033efe7ad539 100644
--- a/drivers/media/usb/dvb-usb/dvb-usb-firmware.c
+++ b/drivers/media/usb/dvb-usb/dvb-usb-firmware.c
@@ -36,16 +36,18 @@ static int usb_cypress_writemem(struct usb_device *udev,u16 addr,u8 *data, u8 le
 int usb_cypress_load_firmware(struct usb_device *udev, const struct firmware *fw, int type)
 {
 	struct hexline *hx;
-	u8 reset;
-	int ret,pos=0;
+	u8 *buf;
+	int ret, pos = 0;
+	u16 cpu_cs_register = cypress[type].cpu_cs_register;
 
-	hx = kmalloc(sizeof(*hx), GFP_KERNEL);
-	if (!hx)
+	buf = kmalloc(sizeof(*hx), GFP_KERNEL);
+	if (!buf)
 		return -ENOMEM;
+	hx = (struct hexline *)buf;
 
 	/* stop the CPU */
-	reset = 1;
-	if ((ret = usb_cypress_writemem(udev,cypress[type].cpu_cs_register,&reset,1)) != 1)
+	buf[0] = 1;
+	if (usb_cypress_writemem(udev, cpu_cs_register, buf, 1) != 1)
 		err("could not stop the USB controller CPU.");
 
 	while ((ret = dvb_usb_get_hexline(fw, hx, &pos)) > 0) {
@@ -61,21 +63,21 @@ int usb_cypress_load_firmware(struct usb_device *udev, const struct firmware *fw
 	}
 	if (ret < 0) {
 		err("firmware download failed at %d with %d",pos,ret);
-		kfree(hx);
+		kfree(buf);
 		return ret;
 	}
 
 	if (ret == 0) {
 		/* restart the CPU */
-		reset = 0;
-		if (ret || usb_cypress_writemem(udev,cypress[type].cpu_cs_register,&reset,1) != 1) {
+		buf[0] = 0;
+		if (usb_cypress_writemem(udev, cpu_cs_register, buf, 1) != 1) {
 			err("could not restart the USB controller CPU.");
 			ret = -EINVAL;
 		}
 	} else
 		ret = -EIO;
 
-	kfree(hx);
+	kfree(buf);
 
 	return ret;
 }

From f04d108029063a8a67528a88449c412aecf4d3d8 Mon Sep 17 00:00:00 2001
From: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Date: Mon, 20 Feb 2017 19:26:30 +0530
Subject: [PATCH 0980/1911] powerpc/perf: Fix perf_get_data_addr() for power9
 DD1

Power9 DD1 do not support PMU_HAS_SIER flag and sdsync in
perf_get_data_addr() defaults to MMCRA_SDSYNC which is wrong. Since
power9 MMCRA does not support SDSYNC bit, patch includes PPMU_NO_SIAR
flag to the check and set the sdsync with MMCRA_SAMPLE_ENABLE;

Fixes: 27593d72c4ad ("powerpc/perf: Use MSR to report privilege level on P9 DD1")
Signed-off-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/perf/core-book3s.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 595dd718ea8718..2ff13249f87a61 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -188,6 +188,8 @@ static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp)
 			sdsync = POWER7P_MMCRA_SDAR_VALID;
 		else if (ppmu->flags & PPMU_ALT_SIPR)
 			sdsync = POWER6_MMCRA_SDSYNC;
+		else if (ppmu->flags & PPMU_NO_SIAR)
+			sdsync = MMCRA_SAMPLE_ENABLE;
 		else
 			sdsync = MMCRA_SDSYNC;
 

From 78b4416aa249365dd3c1b64da4d3a232014320b0 Mon Sep 17 00:00:00 2001
From: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Date: Mon, 20 Feb 2017 19:29:03 +0530
Subject: [PATCH 0981/1911] powerpc/perf: Handle sdar_mode for marked event in
 power9

MMCRA[SDAR_MODE] specifices how the SDAR should be updated in
continous sampling mode. On P9 it must be set to 0b00 when
MMCRA[63] is set.

Fixes: c7c3f568beff2 ('powerpc/perf: macros for power9 format encoding')
Signed-off-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/perf/isa207-common.c | 43 ++++++++++++++++++++++++++-----
 arch/powerpc/perf/isa207-common.h |  1 +
 2 files changed, 37 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c
index e79fb5fb817dbe..cd951fd231c404 100644
--- a/arch/powerpc/perf/isa207-common.c
+++ b/arch/powerpc/perf/isa207-common.c
@@ -65,12 +65,41 @@ static bool is_event_valid(u64 event)
 	return !(event & ~valid_mask);
 }
 
-static u64 mmcra_sdar_mode(u64 event)
+static inline bool is_event_marked(u64 event)
 {
-	if (cpu_has_feature(CPU_FTR_ARCH_300) && !cpu_has_feature(CPU_FTR_POWER9_DD1))
-		return p9_SDAR_MODE(event) << MMCRA_SDAR_MODE_SHIFT;
+	if (event & EVENT_IS_MARKED)
+		return true;
+
+	return false;
+}
 
-	return MMCRA_SDAR_MODE_TLB;
+static void mmcra_sdar_mode(u64 event, unsigned long *mmcra)
+{
+	/*
+	 * MMCRA[SDAR_MODE] specifices how the SDAR should be updated in
+	 * continous sampling mode.
+	 *
+	 * Incase of Power8:
+	 * MMCRA[SDAR_MODE] will be programmed as "0b01" for continous sampling
+	 * mode and will be un-changed when setting MMCRA[63] (Marked events).
+	 *
+	 * Incase of Power9:
+	 * Marked event: MMCRA[SDAR_MODE] will be set to 0b00 ('No Updates'),
+	 *               or if group already have any marked events.
+	 * Non-Marked events (for DD1):
+	 *	MMCRA[SDAR_MODE] will be set to 0b01
+	 * For rest
+	 *	MMCRA[SDAR_MODE] will be set from event code.
+	 */
+	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+		if (is_event_marked(event) || (*mmcra & MMCRA_SAMPLE_ENABLE))
+			*mmcra &= MMCRA_SDAR_MODE_NO_UPDATES;
+		else if (!cpu_has_feature(CPU_FTR_POWER9_DD1))
+			*mmcra |=  p9_SDAR_MODE(event) << MMCRA_SDAR_MODE_SHIFT;
+		else if (cpu_has_feature(CPU_FTR_POWER9_DD1))
+			*mmcra |= MMCRA_SDAR_MODE_TLB;
+	} else
+		*mmcra |= MMCRA_SDAR_MODE_TLB;
 }
 
 static u64 thresh_cmp_val(u64 value)
@@ -180,7 +209,7 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp)
 		value |= CNST_L1_QUAL_VAL(cache);
 	}
 
-	if (event & EVENT_IS_MARKED) {
+	if (is_event_marked(event)) {
 		mask  |= CNST_SAMPLE_MASK;
 		value |= CNST_SAMPLE_VAL(event >> EVENT_SAMPLE_SHIFT);
 	}
@@ -276,7 +305,7 @@ int isa207_compute_mmcr(u64 event[], int n_ev,
 		}
 
 		/* In continuous sampling mode, update SDAR on TLB miss */
-		mmcra |= mmcra_sdar_mode(event[i]);
+		mmcra_sdar_mode(event[i], &mmcra);
 
 		if (event[i] & EVENT_IS_L1) {
 			cache = event[i] >> EVENT_CACHE_SEL_SHIFT;
@@ -285,7 +314,7 @@ int isa207_compute_mmcr(u64 event[], int n_ev,
 			mmcr1 |= (cache & 1) << MMCR1_DC_QUAL_SHIFT;
 		}
 
-		if (event[i] & EVENT_IS_MARKED) {
+		if (is_event_marked(event[i])) {
 			mmcra |= MMCRA_SAMPLE_ENABLE;
 
 			val = (event[i] >> EVENT_SAMPLE_SHIFT) & EVENT_SAMPLE_MASK;
diff --git a/arch/powerpc/perf/isa207-common.h b/arch/powerpc/perf/isa207-common.h
index cf9bd89901595c..899210f14ee432 100644
--- a/arch/powerpc/perf/isa207-common.h
+++ b/arch/powerpc/perf/isa207-common.h
@@ -246,6 +246,7 @@
 #define MMCRA_THR_CMP_SHIFT		32
 #define MMCRA_SDAR_MODE_SHIFT		42
 #define MMCRA_SDAR_MODE_TLB		(1ull << MMCRA_SDAR_MODE_SHIFT)
+#define MMCRA_SDAR_MODE_NO_UPDATES	~(0x3ull << MMCRA_SDAR_MODE_SHIFT)
 #define MMCRA_IFM_SHIFT			30
 
 /* MMCR1 Threshold Compare bit constant for power9 */

From 605df8d674ac65e044a0bf4998b28c2f350b7f9e Mon Sep 17 00:00:00 2001
From: Cyril Bur <cyrilbur@gmail.com>
Date: Tue, 7 Mar 2017 11:39:31 +1100
Subject: [PATCH 0982/1911] selftests/powerpc: Replace stxvx and lxvx with
 stxvd2x/lxvd2x

On POWER8 (ISA 2.07) lxvx and stxvx are defined to be extended mnemonics
of lxvd2x and stxvd2x. For POWER9 (ISA 3.0) the HW architects in their
infinite wisdom made lxvx and stxvx instructions in their own right.

POWER9 aware GCC will use the POWER9 instruction for lxvx and stxvx
causing these selftests to fail on POWER8. Further compounding the
issue, because of the way -mvsx works it will cause the power9
instructions to be used regardless of -mcpu=power8 to GCC or -mpower8 to
AS.

The safest way to address the problem for now is to not use the extended
mnemonic. We don't care how the CPU loads the values from memory since
the tests only performs register comparisons, so using stdvd2x/lxvd2x
does not impact the test.

Signed-off-by: Cyril Bur <cyrilbur@gmail.com>
Acked-by: Balbir Singh<bsingharora@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 .../selftests/powerpc/include/vsx_asm.h       | 48 +++++++++----------
 1 file changed, 24 insertions(+), 24 deletions(-)

diff --git a/tools/testing/selftests/powerpc/include/vsx_asm.h b/tools/testing/selftests/powerpc/include/vsx_asm.h
index d828bfb6ef2d9a..54064ced9e95b3 100644
--- a/tools/testing/selftests/powerpc/include/vsx_asm.h
+++ b/tools/testing/selftests/powerpc/include/vsx_asm.h
@@ -16,56 +16,56 @@
  */
 FUNC_START(load_vsx)
 	li	r5,0
-	lxvx	vs20,r5,r3
+	lxvd2x	vs20,r5,r3
 	addi	r5,r5,16
-	lxvx	vs21,r5,r3
+	lxvd2x	vs21,r5,r3
 	addi	r5,r5,16
-	lxvx	vs22,r5,r3
+	lxvd2x	vs22,r5,r3
 	addi	r5,r5,16
-	lxvx	vs23,r5,r3
+	lxvd2x	vs23,r5,r3
 	addi	r5,r5,16
-	lxvx	vs24,r5,r3
+	lxvd2x	vs24,r5,r3
 	addi	r5,r5,16
-	lxvx	vs25,r5,r3
+	lxvd2x	vs25,r5,r3
 	addi	r5,r5,16
-	lxvx	vs26,r5,r3
+	lxvd2x	vs26,r5,r3
 	addi	r5,r5,16
-	lxvx	vs27,r5,r3
+	lxvd2x	vs27,r5,r3
 	addi	r5,r5,16
-	lxvx	vs28,r5,r3
+	lxvd2x	vs28,r5,r3
 	addi	r5,r5,16
-	lxvx	vs29,r5,r3
+	lxvd2x	vs29,r5,r3
 	addi	r5,r5,16
-	lxvx	vs30,r5,r3
+	lxvd2x	vs30,r5,r3
 	addi	r5,r5,16
-	lxvx	vs31,r5,r3
+	lxvd2x	vs31,r5,r3
 	blr
 FUNC_END(load_vsx)
 
 FUNC_START(store_vsx)
 	li	r5,0
-	stxvx	vs20,r5,r3
+	stxvd2x	vs20,r5,r3
 	addi	r5,r5,16
-	stxvx	vs21,r5,r3
+	stxvd2x	vs21,r5,r3
 	addi	r5,r5,16
-	stxvx	vs22,r5,r3
+	stxvd2x	vs22,r5,r3
 	addi	r5,r5,16
-	stxvx	vs23,r5,r3
+	stxvd2x	vs23,r5,r3
 	addi	r5,r5,16
-	stxvx	vs24,r5,r3
+	stxvd2x	vs24,r5,r3
 	addi	r5,r5,16
-	stxvx	vs25,r5,r3
+	stxvd2x	vs25,r5,r3
 	addi	r5,r5,16
-	stxvx	vs26,r5,r3
+	stxvd2x	vs26,r5,r3
 	addi	r5,r5,16
-	stxvx	vs27,r5,r3
+	stxvd2x	vs27,r5,r3
 	addi	r5,r5,16
-	stxvx	vs28,r5,r3
+	stxvd2x	vs28,r5,r3
 	addi	r5,r5,16
-	stxvx	vs29,r5,r3
+	stxvd2x	vs29,r5,r3
 	addi	r5,r5,16
-	stxvx	vs30,r5,r3
+	stxvd2x	vs30,r5,r3
 	addi	r5,r5,16
-	stxvx	vs31,r5,r3
+	stxvd2x	vs31,r5,r3
 	blr
 FUNC_END(store_vsx)

From b78125e00fce0a858c7827dd47ce500320d6d0f7 Mon Sep 17 00:00:00 2001
From: Philippe Reynes <tremyfr@gmail.com>
Date: Tue, 28 Feb 2017 23:49:38 +0100
Subject: [PATCH 0983/1911] net: smsc: smc911x: use new api
 ethtool_{get|set}_link_ksettings

The ethtool api {get|set}_settings is deprecated.
We move this driver to new api {get|set}_link_ksettings.

As I don't have the hardware, I'd be very pleased if
someone may test this patch.

Signed-off-by: Philippe Reynes <tremyfr@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/smsc/smc911x.c | 51 +++++++++++++++--------------
 1 file changed, 27 insertions(+), 24 deletions(-)

diff --git a/drivers/net/ethernet/smsc/smc911x.c b/drivers/net/ethernet/smsc/smc911x.c
index 4f19c6166182e7..36307d34f64181 100644
--- a/drivers/net/ethernet/smsc/smc911x.c
+++ b/drivers/net/ethernet/smsc/smc911x.c
@@ -1446,40 +1446,40 @@ static int smc911x_close(struct net_device *dev)
  * Ethtool support
  */
 static int
-smc911x_ethtool_getsettings(struct net_device *dev, struct ethtool_cmd *cmd)
+smc911x_ethtool_get_link_ksettings(struct net_device *dev,
+				   struct ethtool_link_ksettings *cmd)
 {
 	struct smc911x_local *lp = netdev_priv(dev);
 	int ret, status;
 	unsigned long flags;
+	u32 supported;
 
 	DBG(SMC_DEBUG_FUNC, dev, "--> %s\n", __func__);
-	cmd->maxtxpkt = 1;
-	cmd->maxrxpkt = 1;
 
 	if (lp->phy_type != 0) {
 		spin_lock_irqsave(&lp->lock, flags);
-		ret = mii_ethtool_gset(&lp->mii, cmd);
+		ret = mii_ethtool_get_link_ksettings(&lp->mii, cmd);
 		spin_unlock_irqrestore(&lp->lock, flags);
 	} else {
-		cmd->supported = SUPPORTED_10baseT_Half |
+		supported = SUPPORTED_10baseT_Half |
 				SUPPORTED_10baseT_Full |
 				SUPPORTED_TP | SUPPORTED_AUI;
 
 		if (lp->ctl_rspeed == 10)
-			ethtool_cmd_speed_set(cmd, SPEED_10);
+			cmd->base.speed = SPEED_10;
 		else if (lp->ctl_rspeed == 100)
-			ethtool_cmd_speed_set(cmd, SPEED_100);
-
-		cmd->autoneg = AUTONEG_DISABLE;
-		if (lp->mii.phy_id==1)
-			cmd->transceiver = XCVR_INTERNAL;
-		else
-			cmd->transceiver = XCVR_EXTERNAL;
-		cmd->port = 0;
+			cmd->base.speed = SPEED_100;
+
+		cmd->base.autoneg = AUTONEG_DISABLE;
+		cmd->base.port = 0;
 		SMC_GET_PHY_SPECIAL(lp, lp->mii.phy_id, status);
-		cmd->duplex =
+		cmd->base.duplex =
 			(status & (PHY_SPECIAL_SPD_10FULL_ | PHY_SPECIAL_SPD_100FULL_)) ?
 				DUPLEX_FULL : DUPLEX_HALF;
+
+		ethtool_convert_legacy_u32_to_link_mode(
+			cmd->link_modes.supported, supported);
+
 		ret = 0;
 	}
 
@@ -1487,7 +1487,8 @@ smc911x_ethtool_getsettings(struct net_device *dev, struct ethtool_cmd *cmd)
 }
 
 static int
-smc911x_ethtool_setsettings(struct net_device *dev, struct ethtool_cmd *cmd)
+smc911x_ethtool_set_link_ksettings(struct net_device *dev,
+				   const struct ethtool_link_ksettings *cmd)
 {
 	struct smc911x_local *lp = netdev_priv(dev);
 	int ret;
@@ -1495,16 +1496,18 @@ smc911x_ethtool_setsettings(struct net_device *dev, struct ethtool_cmd *cmd)
 
 	if (lp->phy_type != 0) {
 		spin_lock_irqsave(&lp->lock, flags);
-		ret = mii_ethtool_sset(&lp->mii, cmd);
+		ret = mii_ethtool_set_link_ksettings(&lp->mii, cmd);
 		spin_unlock_irqrestore(&lp->lock, flags);
 	} else {
-		if (cmd->autoneg != AUTONEG_DISABLE ||
-			cmd->speed != SPEED_10 ||
-			(cmd->duplex != DUPLEX_HALF && cmd->duplex != DUPLEX_FULL) ||
-			(cmd->port != PORT_TP && cmd->port != PORT_AUI))
+		if (cmd->base.autoneg != AUTONEG_DISABLE ||
+		    cmd->base.speed != SPEED_10 ||
+		    (cmd->base.duplex != DUPLEX_HALF &&
+		     cmd->base.duplex != DUPLEX_FULL) ||
+		    (cmd->base.port != PORT_TP &&
+		     cmd->base.port != PORT_AUI))
 			return -EINVAL;
 
-		lp->ctl_rfduplx = cmd->duplex == DUPLEX_FULL;
+		lp->ctl_rfduplx = cmd->base.duplex == DUPLEX_FULL;
 
 		ret = 0;
 	}
@@ -1686,8 +1689,6 @@ static int smc911x_ethtool_geteeprom_len(struct net_device *dev)
 }
 
 static const struct ethtool_ops smc911x_ethtool_ops = {
-	.get_settings	 = smc911x_ethtool_getsettings,
-	.set_settings	 = smc911x_ethtool_setsettings,
 	.get_drvinfo	 = smc911x_ethtool_getdrvinfo,
 	.get_msglevel	 = smc911x_ethtool_getmsglevel,
 	.set_msglevel	 = smc911x_ethtool_setmsglevel,
@@ -1698,6 +1699,8 @@ static const struct ethtool_ops smc911x_ethtool_ops = {
 	.get_eeprom_len = smc911x_ethtool_geteeprom_len,
 	.get_eeprom = smc911x_ethtool_geteeprom,
 	.set_eeprom = smc911x_ethtool_seteeprom,
+	.get_link_ksettings	 = smc911x_ethtool_get_link_ksettings,
+	.set_link_ksettings	 = smc911x_ethtool_set_link_ksettings,
 };
 
 /*

From 7b022a1b706f7684341e285e627a74a98e730301 Mon Sep 17 00:00:00 2001
From: Philippe Reynes <tremyfr@gmail.com>
Date: Sat, 4 Mar 2017 12:42:39 +0100
Subject: [PATCH 0984/1911] net: smsc: smc91x: use new api
 ethtool_{get|set}_link_ksettings

The ethtool api {get|set}_settings is deprecated.
We move this driver to new api {get|set}_link_ksettings.

As I don't have the hardware, I'd be very pleased if
someone may test this patch.

Signed-off-by: Philippe Reynes <tremyfr@gmail.com>
Tested-by: Robert Jarzmik <robert.jarzmik@free.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/smsc/smc91x.c | 47 ++++++++++++++++--------------
 1 file changed, 25 insertions(+), 22 deletions(-)

diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c
index 65077c77082a2f..91e9bd7159ab37 100644
--- a/drivers/net/ethernet/smsc/smc91x.c
+++ b/drivers/net/ethernet/smsc/smc91x.c
@@ -1535,32 +1535,33 @@ static int smc_close(struct net_device *dev)
  * Ethtool support
  */
 static int
-smc_ethtool_getsettings(struct net_device *dev, struct ethtool_cmd *cmd)
+smc_ethtool_get_link_ksettings(struct net_device *dev,
+			       struct ethtool_link_ksettings *cmd)
 {
 	struct smc_local *lp = netdev_priv(dev);
 	int ret;
 
-	cmd->maxtxpkt = 1;
-	cmd->maxrxpkt = 1;
-
 	if (lp->phy_type != 0) {
 		spin_lock_irq(&lp->lock);
-		ret = mii_ethtool_gset(&lp->mii, cmd);
+		ret = mii_ethtool_get_link_ksettings(&lp->mii, cmd);
 		spin_unlock_irq(&lp->lock);
 	} else {
-		cmd->supported = SUPPORTED_10baseT_Half |
+		u32 supported = SUPPORTED_10baseT_Half |
 				 SUPPORTED_10baseT_Full |
 				 SUPPORTED_TP | SUPPORTED_AUI;
 
 		if (lp->ctl_rspeed == 10)
-			ethtool_cmd_speed_set(cmd, SPEED_10);
+			cmd->base.speed = SPEED_10;
 		else if (lp->ctl_rspeed == 100)
-			ethtool_cmd_speed_set(cmd, SPEED_100);
+			cmd->base.speed = SPEED_100;
+
+		cmd->base.autoneg = AUTONEG_DISABLE;
+		cmd->base.port = 0;
+		cmd->base.duplex = lp->tcr_cur_mode & TCR_SWFDUP ?
+			DUPLEX_FULL : DUPLEX_HALF;
 
-		cmd->autoneg = AUTONEG_DISABLE;
-		cmd->transceiver = XCVR_INTERNAL;
-		cmd->port = 0;
-		cmd->duplex = lp->tcr_cur_mode & TCR_SWFDUP ? DUPLEX_FULL : DUPLEX_HALF;
+		ethtool_convert_legacy_u32_to_link_mode(
+			cmd->link_modes.supported, supported);
 
 		ret = 0;
 	}
@@ -1569,24 +1570,26 @@ smc_ethtool_getsettings(struct net_device *dev, struct ethtool_cmd *cmd)
 }
 
 static int
-smc_ethtool_setsettings(struct net_device *dev, struct ethtool_cmd *cmd)
+smc_ethtool_set_link_ksettings(struct net_device *dev,
+			       const struct ethtool_link_ksettings *cmd)
 {
 	struct smc_local *lp = netdev_priv(dev);
 	int ret;
 
 	if (lp->phy_type != 0) {
 		spin_lock_irq(&lp->lock);
-		ret = mii_ethtool_sset(&lp->mii, cmd);
+		ret = mii_ethtool_set_link_ksettings(&lp->mii, cmd);
 		spin_unlock_irq(&lp->lock);
 	} else {
-		if (cmd->autoneg != AUTONEG_DISABLE ||
-		    cmd->speed != SPEED_10 ||
-		    (cmd->duplex != DUPLEX_HALF && cmd->duplex != DUPLEX_FULL) ||
-		    (cmd->port != PORT_TP && cmd->port != PORT_AUI))
+		if (cmd->base.autoneg != AUTONEG_DISABLE ||
+		    cmd->base.speed != SPEED_10 ||
+		    (cmd->base.duplex != DUPLEX_HALF &&
+		     cmd->base.duplex != DUPLEX_FULL) ||
+		    (cmd->base.port != PORT_TP && cmd->base.port != PORT_AUI))
 			return -EINVAL;
 
-//		lp->port = cmd->port;
-		lp->ctl_rfduplx = cmd->duplex == DUPLEX_FULL;
+//		lp->port = cmd->base.port;
+		lp->ctl_rfduplx = cmd->base.duplex == DUPLEX_FULL;
 
 //		if (netif_running(dev))
 //			smc_set_port(dev);
@@ -1744,8 +1747,6 @@ static int smc_ethtool_seteeprom(struct net_device *dev,
 
 
 static const struct ethtool_ops smc_ethtool_ops = {
-	.get_settings	= smc_ethtool_getsettings,
-	.set_settings	= smc_ethtool_setsettings,
 	.get_drvinfo	= smc_ethtool_getdrvinfo,
 
 	.get_msglevel	= smc_ethtool_getmsglevel,
@@ -1755,6 +1756,8 @@ static const struct ethtool_ops smc_ethtool_ops = {
 	.get_eeprom_len = smc_ethtool_geteeprom_len,
 	.get_eeprom	= smc_ethtool_geteeprom,
 	.set_eeprom	= smc_ethtool_seteeprom,
+	.get_link_ksettings	= smc_ethtool_get_link_ksettings,
+	.set_link_ksettings	= smc_ethtool_set_link_ksettings,
 };
 
 static const struct net_device_ops smc_netdev_ops = {

From 15883a43af0bcd10b3f3173bca4a0e60518bc154 Mon Sep 17 00:00:00 2001
From: Philippe Reynes <tremyfr@gmail.com>
Date: Sat, 4 Mar 2017 16:16:12 +0100
Subject: [PATCH 0985/1911] net: sun: cassini: use new api
 ethtool_{get|set}_link_ksettings

The ethtool api {get|set}_settings is deprecated.
We move this driver to new api {get|set}_link_ksettings.

As I don't have the hardware, I'd be very pleased if
someone may test this patch.

Signed-off-by: Philippe Reynes <tremyfr@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sun/cassini.c | 98 ++++++++++++++++--------------
 1 file changed, 52 insertions(+), 46 deletions(-)

diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c
index 0e8e89f17dbb11..382993c1561c5c 100644
--- a/drivers/net/ethernet/sun/cassini.c
+++ b/drivers/net/ethernet/sun/cassini.c
@@ -691,7 +691,8 @@ static void cas_mif_poll(struct cas *cp, const int enable)
 }
 
 /* Must be invoked under cp->lock */
-static void cas_begin_auto_negotiation(struct cas *cp, struct ethtool_cmd *ep)
+static void cas_begin_auto_negotiation(struct cas *cp,
+				       const struct ethtool_link_ksettings *ep)
 {
 	u16 ctl;
 #if 1
@@ -704,16 +705,16 @@ static void cas_begin_auto_negotiation(struct cas *cp, struct ethtool_cmd *ep)
 	if (!ep)
 		goto start_aneg;
 	lcntl = cp->link_cntl;
-	if (ep->autoneg == AUTONEG_ENABLE)
+	if (ep->base.autoneg == AUTONEG_ENABLE) {
 		cp->link_cntl = BMCR_ANENABLE;
-	else {
-		u32 speed = ethtool_cmd_speed(ep);
+	} else {
+		u32 speed = ep->base.speed;
 		cp->link_cntl = 0;
 		if (speed == SPEED_100)
 			cp->link_cntl |= BMCR_SPEED100;
 		else if (speed == SPEED_1000)
 			cp->link_cntl |= CAS_BMCR_SPEED1000;
-		if (ep->duplex == DUPLEX_FULL)
+		if (ep->base.duplex == DUPLEX_FULL)
 			cp->link_cntl |= BMCR_FULLDPLX;
 	}
 #if 1
@@ -4528,19 +4529,21 @@ static void cas_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info
 	strlcpy(info->bus_info, pci_name(cp->pdev), sizeof(info->bus_info));
 }
 
-static int cas_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int cas_get_link_ksettings(struct net_device *dev,
+				  struct ethtool_link_ksettings *cmd)
 {
 	struct cas *cp = netdev_priv(dev);
 	u16 bmcr;
 	int full_duplex, speed, pause;
 	unsigned long flags;
 	enum link_state linkstate = link_up;
+	u32 supported, advertising;
 
-	cmd->advertising = 0;
-	cmd->supported = SUPPORTED_Autoneg;
+	advertising = 0;
+	supported = SUPPORTED_Autoneg;
 	if (cp->cas_flags & CAS_FLAG_1000MB_CAP) {
-		cmd->supported |= SUPPORTED_1000baseT_Full;
-		cmd->advertising |= ADVERTISED_1000baseT_Full;
+		supported |= SUPPORTED_1000baseT_Full;
+		advertising |= ADVERTISED_1000baseT_Full;
 	}
 
 	/* Record PHY settings if HW is on. */
@@ -4548,17 +4551,15 @@ static int cas_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 	bmcr = 0;
 	linkstate = cp->lstate;
 	if (CAS_PHY_MII(cp->phy_type)) {
-		cmd->port = PORT_MII;
-		cmd->transceiver = (cp->cas_flags & CAS_FLAG_SATURN) ?
-			XCVR_INTERNAL : XCVR_EXTERNAL;
-		cmd->phy_address = cp->phy_addr;
-		cmd->advertising |= ADVERTISED_TP | ADVERTISED_MII |
+		cmd->base.port = PORT_MII;
+		cmd->base.phy_address = cp->phy_addr;
+		advertising |= ADVERTISED_TP | ADVERTISED_MII |
 			ADVERTISED_10baseT_Half |
 			ADVERTISED_10baseT_Full |
 			ADVERTISED_100baseT_Half |
 			ADVERTISED_100baseT_Full;
 
-		cmd->supported |=
+		supported |=
 			(SUPPORTED_10baseT_Half |
 			 SUPPORTED_10baseT_Full |
 			 SUPPORTED_100baseT_Half |
@@ -4574,11 +4575,10 @@ static int cas_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 		}
 
 	} else {
-		cmd->port = PORT_FIBRE;
-		cmd->transceiver = XCVR_INTERNAL;
-		cmd->phy_address = 0;
-		cmd->supported   |= SUPPORTED_FIBRE;
-		cmd->advertising |= ADVERTISED_FIBRE;
+		cmd->base.port = PORT_FIBRE;
+		cmd->base.phy_address = 0;
+		supported   |= SUPPORTED_FIBRE;
+		advertising |= ADVERTISED_FIBRE;
 
 		if (cp->hw_running) {
 			/* pcs uses the same bits as mii */
@@ -4590,21 +4590,20 @@ static int cas_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 	spin_unlock_irqrestore(&cp->lock, flags);
 
 	if (bmcr & BMCR_ANENABLE) {
-		cmd->advertising |= ADVERTISED_Autoneg;
-		cmd->autoneg = AUTONEG_ENABLE;
-		ethtool_cmd_speed_set(cmd, ((speed == 10) ?
+		advertising |= ADVERTISED_Autoneg;
+		cmd->base.autoneg = AUTONEG_ENABLE;
+		cmd->base.speed =  ((speed == 10) ?
 					    SPEED_10 :
 					    ((speed == 1000) ?
-					     SPEED_1000 : SPEED_100)));
-		cmd->duplex = full_duplex ? DUPLEX_FULL : DUPLEX_HALF;
+					     SPEED_1000 : SPEED_100));
+		cmd->base.duplex = full_duplex ? DUPLEX_FULL : DUPLEX_HALF;
 	} else {
-		cmd->autoneg = AUTONEG_DISABLE;
-		ethtool_cmd_speed_set(cmd, ((bmcr & CAS_BMCR_SPEED1000) ?
+		cmd->base.autoneg = AUTONEG_DISABLE;
+		cmd->base.speed = ((bmcr & CAS_BMCR_SPEED1000) ?
 					    SPEED_1000 :
 					    ((bmcr & BMCR_SPEED100) ?
-					     SPEED_100 : SPEED_10)));
-		cmd->duplex =
-			(bmcr & BMCR_FULLDPLX) ?
+					     SPEED_100 : SPEED_10));
+		cmd->base.duplex = (bmcr & BMCR_FULLDPLX) ?
 			DUPLEX_FULL : DUPLEX_HALF;
 	}
 	if (linkstate != link_up) {
@@ -4619,39 +4618,46 @@ static int cas_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 		 * settings that we configured.
 		 */
 		if (cp->link_cntl & BMCR_ANENABLE) {
-			ethtool_cmd_speed_set(cmd, 0);
-			cmd->duplex = 0xff;
+			cmd->base.speed = 0;
+			cmd->base.duplex = 0xff;
 		} else {
-			ethtool_cmd_speed_set(cmd, SPEED_10);
+			cmd->base.speed = SPEED_10;
 			if (cp->link_cntl & BMCR_SPEED100) {
-				ethtool_cmd_speed_set(cmd, SPEED_100);
+				cmd->base.speed = SPEED_100;
 			} else if (cp->link_cntl & CAS_BMCR_SPEED1000) {
-				ethtool_cmd_speed_set(cmd, SPEED_1000);
+				cmd->base.speed = SPEED_1000;
 			}
-			cmd->duplex = (cp->link_cntl & BMCR_FULLDPLX)?
+			cmd->base.duplex = (cp->link_cntl & BMCR_FULLDPLX) ?
 				DUPLEX_FULL : DUPLEX_HALF;
 		}
 	}
+
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+						supported);
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
+						advertising);
+
 	return 0;
 }
 
-static int cas_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int cas_set_link_ksettings(struct net_device *dev,
+				  const struct ethtool_link_ksettings *cmd)
 {
 	struct cas *cp = netdev_priv(dev);
 	unsigned long flags;
-	u32 speed = ethtool_cmd_speed(cmd);
+	u32 speed = cmd->base.speed;
 
 	/* Verify the settings we care about. */
-	if (cmd->autoneg != AUTONEG_ENABLE &&
-	    cmd->autoneg != AUTONEG_DISABLE)
+	if (cmd->base.autoneg != AUTONEG_ENABLE &&
+	    cmd->base.autoneg != AUTONEG_DISABLE)
 		return -EINVAL;
 
-	if (cmd->autoneg == AUTONEG_DISABLE &&
+	if (cmd->base.autoneg == AUTONEG_DISABLE &&
 	    ((speed != SPEED_1000 &&
 	      speed != SPEED_100 &&
 	      speed != SPEED_10) ||
-	     (cmd->duplex != DUPLEX_HALF &&
-	      cmd->duplex != DUPLEX_FULL)))
+	     (cmd->base.duplex != DUPLEX_HALF &&
+	      cmd->base.duplex != DUPLEX_FULL)))
 		return -EINVAL;
 
 	/* Apply settings and restart link process. */
@@ -4753,8 +4759,6 @@ static void cas_get_ethtool_stats(struct net_device *dev,
 
 static const struct ethtool_ops cas_ethtool_ops = {
 	.get_drvinfo		= cas_get_drvinfo,
-	.get_settings		= cas_get_settings,
-	.set_settings		= cas_set_settings,
 	.nway_reset		= cas_nway_reset,
 	.get_link		= cas_get_link,
 	.get_msglevel		= cas_get_msglevel,
@@ -4764,6 +4768,8 @@ static const struct ethtool_ops cas_ethtool_ops = {
 	.get_sset_count		= cas_get_sset_count,
 	.get_strings		= cas_get_strings,
 	.get_ethtool_stats	= cas_get_ethtool_stats,
+	.get_link_ksettings	= cas_get_link_ksettings,
+	.set_link_ksettings	= cas_set_link_ksettings,
 };
 
 static int cas_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)

From 56c07e9501e875883584c00fe6977c3addb1f873 Mon Sep 17 00:00:00 2001
From: Philippe Reynes <tremyfr@gmail.com>
Date: Sat, 4 Mar 2017 17:50:06 +0100
Subject: [PATCH 0986/1911] net: sun: niu: use new api
 ethtool_{get|set}_link_ksettings

The ethtool api {get|set}_settings is deprecated.
We move this driver to new api {get|set}_link_ksettings.

As I don't have the hardware, I'd be very pleased if
someone may test this patch.

Signed-off-by: Philippe Reynes <tremyfr@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sun/niu.c | 37 ++++++++++++++++++----------------
 1 file changed, 20 insertions(+), 17 deletions(-)

diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c
index 57978056b3366f..2dcca249eb9c73 100644
--- a/drivers/net/ethernet/sun/niu.c
+++ b/drivers/net/ethernet/sun/niu.c
@@ -6813,7 +6813,8 @@ static void niu_get_drvinfo(struct net_device *dev,
 			sizeof(info->bus_info));
 }
 
-static int niu_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int niu_get_link_ksettings(struct net_device *dev,
+				  struct ethtool_link_ksettings *cmd)
 {
 	struct niu *np = netdev_priv(dev);
 	struct niu_link_config *lp;
@@ -6821,28 +6822,30 @@ static int niu_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 	lp = &np->link_config;
 
 	memset(cmd, 0, sizeof(*cmd));
-	cmd->phy_address = np->phy_addr;
-	cmd->supported = lp->supported;
-	cmd->advertising = lp->active_advertising;
-	cmd->autoneg = lp->active_autoneg;
-	ethtool_cmd_speed_set(cmd, lp->active_speed);
-	cmd->duplex = lp->active_duplex;
-	cmd->port = (np->flags & NIU_FLAGS_FIBER) ? PORT_FIBRE : PORT_TP;
-	cmd->transceiver = (np->flags & NIU_FLAGS_XCVR_SERDES) ?
-		XCVR_EXTERNAL : XCVR_INTERNAL;
+	cmd->base.phy_address = np->phy_addr;
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+						lp->supported);
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
+						lp->active_advertising);
+	cmd->base.autoneg = lp->active_autoneg;
+	cmd->base.speed = lp->active_speed;
+	cmd->base.duplex = lp->active_duplex;
+	cmd->base.port = (np->flags & NIU_FLAGS_FIBER) ? PORT_FIBRE : PORT_TP;
 
 	return 0;
 }
 
-static int niu_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int niu_set_link_ksettings(struct net_device *dev,
+				  const struct ethtool_link_ksettings *cmd)
 {
 	struct niu *np = netdev_priv(dev);
 	struct niu_link_config *lp = &np->link_config;
 
-	lp->advertising = cmd->advertising;
-	lp->speed = ethtool_cmd_speed(cmd);
-	lp->duplex = cmd->duplex;
-	lp->autoneg = cmd->autoneg;
+	ethtool_convert_link_mode_to_legacy_u32(&lp->advertising,
+						cmd->link_modes.advertising);
+	lp->speed = cmd->base.speed;
+	lp->duplex = cmd->base.duplex;
+	lp->autoneg = cmd->base.autoneg;
 	return niu_init_link(np);
 }
 
@@ -7902,14 +7905,14 @@ static const struct ethtool_ops niu_ethtool_ops = {
 	.nway_reset		= niu_nway_reset,
 	.get_eeprom_len		= niu_get_eeprom_len,
 	.get_eeprom		= niu_get_eeprom,
-	.get_settings		= niu_get_settings,
-	.set_settings		= niu_set_settings,
 	.get_strings		= niu_get_strings,
 	.get_sset_count		= niu_get_sset_count,
 	.get_ethtool_stats	= niu_get_ethtool_stats,
 	.set_phys_id		= niu_set_phys_id,
 	.get_rxnfc		= niu_get_nfc,
 	.set_rxnfc		= niu_set_nfc,
+	.get_link_ksettings	= niu_get_link_ksettings,
+	.set_link_ksettings	= niu_set_link_ksettings,
 };
 
 static int niu_ldg_assign_ldn(struct niu *np, struct niu_parent *parent,

From 9dff2defef3ce1933a44d59ec139987e19645841 Mon Sep 17 00:00:00 2001
From: Philippe Reynes <tremyfr@gmail.com>
Date: Sun, 5 Mar 2017 00:04:18 +0100
Subject: [PATCH 0987/1911] net: sun: sungem: use new api
 ethtool_{get|set}_link_ksettings

The ethtool api {get|set}_settings is deprecated.
We move this driver to new api {get|set}_link_ksettings.

As I don't have the hardware, I'd be very pleased if
someone may test this patch.

Signed-off-by: Philippe Reynes <tremyfr@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sun/sungem.c | 98 ++++++++++++++++++-------------
 1 file changed, 57 insertions(+), 41 deletions(-)

diff --git a/drivers/net/ethernet/sun/sungem.c b/drivers/net/ethernet/sun/sungem.c
index 5c5952e782cd22..dbfca04667603a 100644
--- a/drivers/net/ethernet/sun/sungem.c
+++ b/drivers/net/ethernet/sun/sungem.c
@@ -1250,12 +1250,17 @@ static void gem_stop_dma(struct gem *gp)
 
 
 // XXX dbl check what that function should do when called on PCS PHY
-static void gem_begin_auto_negotiation(struct gem *gp, struct ethtool_cmd *ep)
+static void gem_begin_auto_negotiation(struct gem *gp,
+				       const struct ethtool_link_ksettings *ep)
 {
 	u32 advertise, features;
 	int autoneg;
 	int speed;
 	int duplex;
+	u32 advertising;
+
+	ethtool_convert_link_mode_to_legacy_u32(&advertising,
+						ep->link_modes.advertising);
 
 	if (gp->phy_type != phy_mii_mdio0 &&
      	    gp->phy_type != phy_mii_mdio1)
@@ -1278,13 +1283,13 @@ static void gem_begin_auto_negotiation(struct gem *gp, struct ethtool_cmd *ep)
 	/* Setup link parameters */
 	if (!ep)
 		goto start_aneg;
-	if (ep->autoneg == AUTONEG_ENABLE) {
-		advertise = ep->advertising;
+	if (ep->base.autoneg == AUTONEG_ENABLE) {
+		advertise = advertising;
 		autoneg = 1;
 	} else {
 		autoneg = 0;
-		speed = ethtool_cmd_speed(ep);
-		duplex = ep->duplex;
+		speed = ep->base.speed;
+		duplex = ep->base.duplex;
 	}
 
 start_aneg:
@@ -2515,85 +2520,96 @@ static void gem_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info
 	strlcpy(info->bus_info, pci_name(gp->pdev), sizeof(info->bus_info));
 }
 
-static int gem_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int gem_get_link_ksettings(struct net_device *dev,
+				  struct ethtool_link_ksettings *cmd)
 {
 	struct gem *gp = netdev_priv(dev);
+	u32 supported, advertising;
 
 	if (gp->phy_type == phy_mii_mdio0 ||
 	    gp->phy_type == phy_mii_mdio1) {
 		if (gp->phy_mii.def)
-			cmd->supported = gp->phy_mii.def->features;
+			supported = gp->phy_mii.def->features;
 		else
-			cmd->supported = (SUPPORTED_10baseT_Half |
+			supported = (SUPPORTED_10baseT_Half |
 					  SUPPORTED_10baseT_Full);
 
 		/* XXX hardcoded stuff for now */
-		cmd->port = PORT_MII;
-		cmd->transceiver = XCVR_EXTERNAL;
-		cmd->phy_address = 0; /* XXX fixed PHYAD */
+		cmd->base.port = PORT_MII;
+		cmd->base.phy_address = 0; /* XXX fixed PHYAD */
 
 		/* Return current PHY settings */
-		cmd->autoneg = gp->want_autoneg;
-		ethtool_cmd_speed_set(cmd, gp->phy_mii.speed);
-		cmd->duplex = gp->phy_mii.duplex;
-		cmd->advertising = gp->phy_mii.advertising;
+		cmd->base.autoneg = gp->want_autoneg;
+		cmd->base.speed = gp->phy_mii.speed;
+		cmd->base.duplex = gp->phy_mii.duplex;
+		advertising = gp->phy_mii.advertising;
 
 		/* If we started with a forced mode, we don't have a default
 		 * advertise set, we need to return something sensible so
 		 * userland can re-enable autoneg properly.
 		 */
-		if (cmd->advertising == 0)
-			cmd->advertising = cmd->supported;
+		if (advertising == 0)
+			advertising = supported;
 	} else { // XXX PCS ?
-		cmd->supported =
+		supported =
 			(SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full |
 			 SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full |
 			 SUPPORTED_Autoneg);
-		cmd->advertising = cmd->supported;
-		ethtool_cmd_speed_set(cmd, 0);
-		cmd->duplex = cmd->port = cmd->phy_address =
-			cmd->transceiver = cmd->autoneg = 0;
+		advertising = supported;
+		cmd->base.speed = 0;
+		cmd->base.duplex = 0;
+		cmd->base.port = 0;
+		cmd->base.phy_address = 0;
+		cmd->base.autoneg = 0;
 
 		/* serdes means usually a Fibre connector, with most fixed */
 		if (gp->phy_type == phy_serdes) {
-			cmd->port = PORT_FIBRE;
-			cmd->supported = (SUPPORTED_1000baseT_Half |
+			cmd->base.port = PORT_FIBRE;
+			supported = (SUPPORTED_1000baseT_Half |
 				SUPPORTED_1000baseT_Full |
 				SUPPORTED_FIBRE | SUPPORTED_Autoneg |
 				SUPPORTED_Pause | SUPPORTED_Asym_Pause);
-			cmd->advertising = cmd->supported;
-			cmd->transceiver = XCVR_INTERNAL;
+			advertising = supported;
 			if (gp->lstate == link_up)
-				ethtool_cmd_speed_set(cmd, SPEED_1000);
-			cmd->duplex = DUPLEX_FULL;
-			cmd->autoneg = 1;
+				cmd->base.speed = SPEED_1000;
+			cmd->base.duplex = DUPLEX_FULL;
+			cmd->base.autoneg = 1;
 		}
 	}
-	cmd->maxtxpkt = cmd->maxrxpkt = 0;
+
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+						supported);
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
+						advertising);
 
 	return 0;
 }
 
-static int gem_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int gem_set_link_ksettings(struct net_device *dev,
+				  const struct ethtool_link_ksettings *cmd)
 {
 	struct gem *gp = netdev_priv(dev);
-	u32 speed = ethtool_cmd_speed(cmd);
+	u32 speed = cmd->base.speed;
+	u32 advertising;
+
+	ethtool_convert_link_mode_to_legacy_u32(&advertising,
+						cmd->link_modes.advertising);
 
 	/* Verify the settings we care about. */
-	if (cmd->autoneg != AUTONEG_ENABLE &&
-	    cmd->autoneg != AUTONEG_DISABLE)
+	if (cmd->base.autoneg != AUTONEG_ENABLE &&
+	    cmd->base.autoneg != AUTONEG_DISABLE)
 		return -EINVAL;
 
-	if (cmd->autoneg == AUTONEG_ENABLE &&
-	    cmd->advertising == 0)
+	if (cmd->base.autoneg == AUTONEG_ENABLE &&
+	    advertising == 0)
 		return -EINVAL;
 
-	if (cmd->autoneg == AUTONEG_DISABLE &&
+	if (cmd->base.autoneg == AUTONEG_DISABLE &&
 	    ((speed != SPEED_1000 &&
 	      speed != SPEED_100 &&
 	      speed != SPEED_10) ||
-	     (cmd->duplex != DUPLEX_HALF &&
-	      cmd->duplex != DUPLEX_FULL)))
+	     (cmd->base.duplex != DUPLEX_HALF &&
+	      cmd->base.duplex != DUPLEX_FULL)))
 		return -EINVAL;
 
 	/* Apply settings and restart link process. */
@@ -2666,13 +2682,13 @@ static int gem_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 static const struct ethtool_ops gem_ethtool_ops = {
 	.get_drvinfo		= gem_get_drvinfo,
 	.get_link		= ethtool_op_get_link,
-	.get_settings		= gem_get_settings,
-	.set_settings		= gem_set_settings,
 	.nway_reset		= gem_nway_reset,
 	.get_msglevel		= gem_get_msglevel,
 	.set_msglevel		= gem_set_msglevel,
 	.get_wol		= gem_get_wol,
 	.set_wol		= gem_set_wol,
+	.get_link_ksettings	= gem_get_link_ksettings,
+	.set_link_ksettings	= gem_set_link_ksettings,
 };
 
 static int gem_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)

From 8103015df56d97cc3a4167d2e070ee14cc022bae Mon Sep 17 00:00:00 2001
From: Philippe Reynes <tremyfr@gmail.com>
Date: Sun, 5 Mar 2017 22:25:39 +0100
Subject: [PATCH 0988/1911] net: sun: sunhme: use new api
 ethtool_{get|set}_link_ksettings

The ethtool api {get|set}_settings is deprecated.
We move this driver to new api {get|set}_link_ksettings.

As I don't have the hardware, I'd be very pleased if
someone may test this patch.

Signed-off-by: Philippe Reynes <tremyfr@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sun/sunhme.c | 62 +++++++++++++++++--------------
 1 file changed, 34 insertions(+), 28 deletions(-)

diff --git a/drivers/net/ethernet/sun/sunhme.c b/drivers/net/ethernet/sun/sunhme.c
index 72ff05cd3ed80c..53ff66ef53acfe 100644
--- a/drivers/net/ethernet/sun/sunhme.c
+++ b/drivers/net/ethernet/sun/sunhme.c
@@ -1294,9 +1294,10 @@ static void happy_meal_init_rings(struct happy_meal *hp)
 }
 
 /* hp->happy_lock must be held */
-static void happy_meal_begin_auto_negotiation(struct happy_meal *hp,
-					      void __iomem *tregs,
-					      struct ethtool_cmd *ep)
+static void
+happy_meal_begin_auto_negotiation(struct happy_meal *hp,
+				  void __iomem *tregs,
+				  const struct ethtool_link_ksettings *ep)
 {
 	int timeout;
 
@@ -1309,7 +1310,7 @@ static void happy_meal_begin_auto_negotiation(struct happy_meal *hp,
 	/* XXX Check BMSR_ANEGCAPABLE, should not be necessary though. */
 
 	hp->sw_advertise = happy_meal_tcvr_read(hp, tregs, MII_ADVERTISE);
-	if (ep == NULL || ep->autoneg == AUTONEG_ENABLE) {
+	if (!ep || ep->base.autoneg == AUTONEG_ENABLE) {
 		/* Advertise everything we can support. */
 		if (hp->sw_bmsr & BMSR_10HALF)
 			hp->sw_advertise |= (ADVERTISE_10HALF);
@@ -1384,14 +1385,14 @@ static void happy_meal_begin_auto_negotiation(struct happy_meal *hp,
 		/* Disable auto-negotiation in BMCR, enable the duplex and
 		 * speed setting, init the timer state machine, and fire it off.
 		 */
-		if (ep == NULL || ep->autoneg == AUTONEG_ENABLE) {
+		if (!ep || ep->base.autoneg == AUTONEG_ENABLE) {
 			hp->sw_bmcr = BMCR_SPEED100;
 		} else {
-			if (ethtool_cmd_speed(ep) == SPEED_100)
+			if (ep->base.speed == SPEED_100)
 				hp->sw_bmcr = BMCR_SPEED100;
 			else
 				hp->sw_bmcr = 0;
-			if (ep->duplex == DUPLEX_FULL)
+			if (ep->base.duplex == DUPLEX_FULL)
 				hp->sw_bmcr |= BMCR_FULLDPLX;
 		}
 		happy_meal_tcvr_write(hp, tregs, MII_BMCR, hp->sw_bmcr);
@@ -2434,20 +2435,21 @@ static void happy_meal_set_multicast(struct net_device *dev)
 }
 
 /* Ethtool support... */
-static int hme_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int hme_get_link_ksettings(struct net_device *dev,
+				  struct ethtool_link_ksettings *cmd)
 {
 	struct happy_meal *hp = netdev_priv(dev);
 	u32 speed;
+	u32 supported;
 
-	cmd->supported =
+	supported =
 		(SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full |
 		 SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full |
 		 SUPPORTED_Autoneg | SUPPORTED_TP | SUPPORTED_MII);
 
 	/* XXX hardcoded stuff for now */
-	cmd->port = PORT_TP; /* XXX no MII support */
-	cmd->transceiver = XCVR_INTERNAL; /* XXX no external xcvr support */
-	cmd->phy_address = 0; /* XXX fixed PHYAD */
+	cmd->base.port = PORT_TP; /* XXX no MII support */
+	cmd->base.phy_address = 0; /* XXX fixed PHYAD */
 
 	/* Record PHY settings. */
 	spin_lock_irq(&hp->happy_lock);
@@ -2456,41 +2458,45 @@ static int hme_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 	spin_unlock_irq(&hp->happy_lock);
 
 	if (hp->sw_bmcr & BMCR_ANENABLE) {
-		cmd->autoneg = AUTONEG_ENABLE;
+		cmd->base.autoneg = AUTONEG_ENABLE;
 		speed = ((hp->sw_lpa & (LPA_100HALF | LPA_100FULL)) ?
 			 SPEED_100 : SPEED_10);
 		if (speed == SPEED_100)
-			cmd->duplex =
+			cmd->base.duplex =
 				(hp->sw_lpa & (LPA_100FULL)) ?
 				DUPLEX_FULL : DUPLEX_HALF;
 		else
-			cmd->duplex =
+			cmd->base.duplex =
 				(hp->sw_lpa & (LPA_10FULL)) ?
 				DUPLEX_FULL : DUPLEX_HALF;
 	} else {
-		cmd->autoneg = AUTONEG_DISABLE;
+		cmd->base.autoneg = AUTONEG_DISABLE;
 		speed = (hp->sw_bmcr & BMCR_SPEED100) ? SPEED_100 : SPEED_10;
-		cmd->duplex =
+		cmd->base.duplex =
 			(hp->sw_bmcr & BMCR_FULLDPLX) ?
 			DUPLEX_FULL : DUPLEX_HALF;
 	}
-	ethtool_cmd_speed_set(cmd, speed);
+	cmd->base.speed = speed;
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+						supported);
+
 	return 0;
 }
 
-static int hme_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int hme_set_link_ksettings(struct net_device *dev,
+				  const struct ethtool_link_ksettings *cmd)
 {
 	struct happy_meal *hp = netdev_priv(dev);
 
 	/* Verify the settings we care about. */
-	if (cmd->autoneg != AUTONEG_ENABLE &&
-	    cmd->autoneg != AUTONEG_DISABLE)
+	if (cmd->base.autoneg != AUTONEG_ENABLE &&
+	    cmd->base.autoneg != AUTONEG_DISABLE)
 		return -EINVAL;
-	if (cmd->autoneg == AUTONEG_DISABLE &&
-	    ((ethtool_cmd_speed(cmd) != SPEED_100 &&
-	      ethtool_cmd_speed(cmd) != SPEED_10) ||
-	     (cmd->duplex != DUPLEX_HALF &&
-	      cmd->duplex != DUPLEX_FULL)))
+	if (cmd->base.autoneg == AUTONEG_DISABLE &&
+	    ((cmd->base.speed != SPEED_100 &&
+	      cmd->base.speed != SPEED_10) ||
+	     (cmd->base.duplex != DUPLEX_HALF &&
+	      cmd->base.duplex != DUPLEX_FULL)))
 		return -EINVAL;
 
 	/* Ok, do it to it. */
@@ -2537,10 +2543,10 @@ static u32 hme_get_link(struct net_device *dev)
 }
 
 static const struct ethtool_ops hme_ethtool_ops = {
-	.get_settings		= hme_get_settings,
-	.set_settings		= hme_set_settings,
 	.get_drvinfo		= hme_get_drvinfo,
 	.get_link		= hme_get_link,
+	.get_link_ksettings	= hme_get_link_ksettings,
+	.set_link_ksettings	= hme_set_link_ksettings,
 };
 
 static int hme_version_printed;

From f441df6b9ff41e1af7289b69d5ce379053f900a5 Mon Sep 17 00:00:00 2001
From: Philippe Reynes <tremyfr@gmail.com>
Date: Sun, 5 Mar 2017 23:21:06 +0100
Subject: [PATCH 0989/1911] net: toshiba: ps3_genic_net: use new api
 ethtool_{get|set}_link_ksettings

The ethtool api {get|set}_settings is deprecated.
We move this driver to new api {get|set}_link_ksettings.

As I don't have the hardware, I'd be very pleased if
someone may test this patch.

Signed-off-by: Philippe Reynes <tremyfr@gmail.com>
Tested-by: Geoff Levand <geoff@infradead.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/toshiba/ps3_gelic_net.c | 51 +++++++++++---------
 1 file changed, 29 insertions(+), 22 deletions(-)

diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.c b/drivers/net/ethernet/toshiba/ps3_gelic_net.c
index 72013314bba81f..fa6a06571187ed 100644
--- a/drivers/net/ethernet/toshiba/ps3_gelic_net.c
+++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.c
@@ -1206,61 +1206,68 @@ void gelic_net_get_drvinfo(struct net_device *netdev,
 	strlcpy(info->version, DRV_VERSION, sizeof(info->version));
 }
 
-static int gelic_ether_get_settings(struct net_device *netdev,
-				    struct ethtool_cmd *cmd)
+static int gelic_ether_get_link_ksettings(struct net_device *netdev,
+					  struct ethtool_link_ksettings *cmd)
 {
 	struct gelic_card *card = netdev_card(netdev);
+	u32 supported, advertising;
 
 	gelic_card_get_ether_port_status(card, 0);
 
 	if (card->ether_port_status & GELIC_LV1_ETHER_FULL_DUPLEX)
-		cmd->duplex = DUPLEX_FULL;
+		cmd->base.duplex = DUPLEX_FULL;
 	else
-		cmd->duplex = DUPLEX_HALF;
+		cmd->base.duplex = DUPLEX_HALF;
 
 	switch (card->ether_port_status & GELIC_LV1_ETHER_SPEED_MASK) {
 	case GELIC_LV1_ETHER_SPEED_10:
-		ethtool_cmd_speed_set(cmd, SPEED_10);
+		cmd->base.speed = SPEED_10;
 		break;
 	case GELIC_LV1_ETHER_SPEED_100:
-		ethtool_cmd_speed_set(cmd, SPEED_100);
+		cmd->base.speed = SPEED_100;
 		break;
 	case GELIC_LV1_ETHER_SPEED_1000:
-		ethtool_cmd_speed_set(cmd, SPEED_1000);
+		cmd->base.speed = SPEED_1000;
 		break;
 	default:
 		pr_info("%s: speed unknown\n", __func__);
-		ethtool_cmd_speed_set(cmd, SPEED_10);
+		cmd->base.speed = SPEED_10;
 		break;
 	}
 
-	cmd->supported = SUPPORTED_TP | SUPPORTED_Autoneg |
+	supported = SUPPORTED_TP | SUPPORTED_Autoneg |
 			SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full |
 			SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full |
 			SUPPORTED_1000baseT_Full;
-	cmd->advertising = cmd->supported;
+	advertising = supported;
 	if (card->link_mode & GELIC_LV1_ETHER_AUTO_NEG) {
-		cmd->autoneg = AUTONEG_ENABLE;
+		cmd->base.autoneg = AUTONEG_ENABLE;
 	} else {
-		cmd->autoneg = AUTONEG_DISABLE;
-		cmd->advertising &= ~ADVERTISED_Autoneg;
+		cmd->base.autoneg = AUTONEG_DISABLE;
+		advertising &= ~ADVERTISED_Autoneg;
 	}
-	cmd->port = PORT_TP;
+	cmd->base.port = PORT_TP;
+
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+						supported);
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
+						advertising);
 
 	return 0;
 }
 
-static int gelic_ether_set_settings(struct net_device *netdev,
-				    struct ethtool_cmd *cmd)
+static int
+gelic_ether_set_link_ksettings(struct net_device *netdev,
+			       const struct ethtool_link_ksettings *cmd)
 {
 	struct gelic_card *card = netdev_card(netdev);
 	u64 mode;
 	int ret;
 
-	if (cmd->autoneg == AUTONEG_ENABLE) {
+	if (cmd->base.autoneg == AUTONEG_ENABLE) {
 		mode = GELIC_LV1_ETHER_AUTO_NEG;
 	} else {
-		switch (cmd->speed) {
+		switch (cmd->base.speed) {
 		case SPEED_10:
 			mode = GELIC_LV1_ETHER_SPEED_10;
 			break;
@@ -1273,9 +1280,9 @@ static int gelic_ether_set_settings(struct net_device *netdev,
 		default:
 			return -EINVAL;
 		}
-		if (cmd->duplex == DUPLEX_FULL)
+		if (cmd->base.duplex == DUPLEX_FULL) {
 			mode |= GELIC_LV1_ETHER_FULL_DUPLEX;
-		else if (cmd->speed == SPEED_1000) {
+		} else if (cmd->base.speed == SPEED_1000) {
 			pr_info("1000 half duplex is not supported.\n");
 			return -EINVAL;
 		}
@@ -1370,11 +1377,11 @@ static int gelic_net_set_wol(struct net_device *netdev,
 
 static const struct ethtool_ops gelic_ether_ethtool_ops = {
 	.get_drvinfo	= gelic_net_get_drvinfo,
-	.get_settings	= gelic_ether_get_settings,
-	.set_settings	= gelic_ether_set_settings,
 	.get_link	= ethtool_op_get_link,
 	.get_wol	= gelic_net_get_wol,
 	.set_wol	= gelic_net_set_wol,
+	.get_link_ksettings = gelic_ether_get_link_ksettings,
+	.set_link_ksettings = gelic_ether_set_link_ksettings,
 };
 
 /**

From 50ad480e4d64fde941fcac39db1ab29a83d86703 Mon Sep 17 00:00:00 2001
From: Philippe Reynes <tremyfr@gmail.com>
Date: Sun, 5 Mar 2017 23:46:00 +0100
Subject: [PATCH 0990/1911] net: toshiba: spider_net: use new api
 ethtool_{get|set}_link_ksettings

The ethtool api {get|set}_settings is deprecated.
We move this driver to new api {get|set}_link_ksettings.

As I don't have the hardware, I'd be very pleased if
someone may test this patch.

Signed-off-by: Philippe Reynes <tremyfr@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/toshiba/spider_net_ethtool.c | 24 +++++++++++--------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/toshiba/spider_net_ethtool.c b/drivers/net/ethernet/toshiba/spider_net_ethtool.c
index ffe519382e111a..16bd036d06820e 100644
--- a/drivers/net/ethernet/toshiba/spider_net_ethtool.c
+++ b/drivers/net/ethernet/toshiba/spider_net_ethtool.c
@@ -47,19 +47,23 @@ static struct {
 };
 
 static int
-spider_net_ethtool_get_settings(struct net_device *netdev,
-			       struct ethtool_cmd *cmd)
+spider_net_ethtool_get_link_ksettings(struct net_device *netdev,
+				      struct ethtool_link_ksettings *cmd)
 {
 	struct spider_net_card *card;
 	card = netdev_priv(netdev);
 
-	cmd->supported   = (SUPPORTED_1000baseT_Full |
-			     SUPPORTED_FIBRE);
-	cmd->advertising = (ADVERTISED_1000baseT_Full |
-			     ADVERTISED_FIBRE);
-	cmd->port = PORT_FIBRE;
-	ethtool_cmd_speed_set(cmd, card->phy.speed);
-	cmd->duplex = DUPLEX_FULL;
+	ethtool_link_ksettings_zero_link_mode(cmd, supported);
+	ethtool_link_ksettings_add_link_mode(cmd, supported, 1000baseT_Full);
+	ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE);
+
+	ethtool_link_ksettings_zero_link_mode(cmd, advertising);
+	ethtool_link_ksettings_add_link_mode(cmd, advertising, 1000baseT_Full);
+	ethtool_link_ksettings_add_link_mode(cmd, advertising, FIBRE);
+
+	cmd->base.port = PORT_FIBRE;
+	cmd->base.speed = card->phy.speed;
+	cmd->base.duplex = DUPLEX_FULL;
 
 	return 0;
 }
@@ -166,7 +170,6 @@ static void spider_net_get_strings(struct net_device *netdev, u32 stringset,
 }
 
 const struct ethtool_ops spider_net_ethtool_ops = {
-	.get_settings		= spider_net_ethtool_get_settings,
 	.get_drvinfo		= spider_net_ethtool_get_drvinfo,
 	.get_wol		= spider_net_ethtool_get_wol,
 	.get_msglevel		= spider_net_ethtool_get_msglevel,
@@ -177,5 +180,6 @@ const struct ethtool_ops spider_net_ethtool_ops = {
 	.get_strings		= spider_net_get_strings,
 	.get_sset_count		= spider_net_get_sset_count,
 	.get_ethtool_stats	= spider_net_get_ethtool_stats,
+	.get_link_ksettings	= spider_net_ethtool_get_link_ksettings,
 };
 

From b793f081674e363f71699b0b32fc9a1890e52db2 Mon Sep 17 00:00:00 2001
From: Christian Lamparter <chunkeey@googlemail.com>
Date: Mon, 6 Mar 2017 14:34:27 +0100
Subject: [PATCH 0991/1911] net: ibm: emac: fix regression caused by
 emac_dt_phy_probe()

Julian Margetson reported a panic on his SAM460EX with Kernel 4.11-rc1:
| Unable to handle kernel paging request for data at address 0x00000014
| Oops: Kernel access of bad area, sig: 11 [#1]
| PREEMPT
| Canyonlands
| Modules linked in:
| CPU: 0 PID: 1 Comm: swapper Not tainted [...]
| task: ea838000 task.stack: ea836000
| NIP: c0599f5c LR: c0599dd8 CTR: 00000000
| REGS: ea837c80 TRAP: 0300   Not tainted [...]
| MSR: 00029000 <CE,EE,ME>
|  CR: 24371242  XER: 20000000
| DEAR: 00000014 ESR: 00000000
| GPR00: c0599ce8 ea837d30 ea838000 c0e52dcc c0d56ffb [...]
| NIP [c0599f5c] emac_probe+0xfb4/0x1304
| LR [c0599dd8] emac_probe+0xe30/0x1304
| Call Trace:
| [ea837d30] [c0599ce8] emac_probe+0xd40/0x1304 (unreliable)
| [ea837d80] [c0533504] platform_drv_probe+0x48/0x90
| [ea837da0] [c0531c14] driver_probe_device+0x15c/0x2c4
| [ea837dd0] [c0531e04] __driver_attach+0x88/0xb0
| ---[ end trace ... ]---

The problem is caused by emac_dt_phy_probe() returing success (0)
for existing device-trees configurations that do not specify a
"phy-handle" property. This caused the code to skip the existing
phy probe and setup. Which led to essential phy related
data-structures being uninitialized.

This patch also removes the unused variable in emac_dt_phy_connect().

Fixes: a577ca6badb5261d ("net: emac: add support for device-tree based PHY discovery and setup")
Reported-by: Julian Margetson <runaway@candw.ms>
Signed-off-by: Christian Lamparter <chunkeey@googlemail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/emac/core.c | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c
index 275c2e2349ad92..c44036d5761a4c 100644
--- a/drivers/net/ethernet/ibm/emac/core.c
+++ b/drivers/net/ethernet/ibm/emac/core.c
@@ -2589,8 +2589,6 @@ static int emac_dt_mdio_probe(struct emac_instance *dev)
 static int emac_dt_phy_connect(struct emac_instance *dev,
 			       struct device_node *phy_handle)
 {
-	int res;
-
 	dev->phy.def = devm_kzalloc(&dev->ofdev->dev, sizeof(*dev->phy.def),
 				    GFP_KERNEL);
 	if (!dev->phy.def)
@@ -2617,7 +2615,7 @@ static int emac_dt_phy_probe(struct emac_instance *dev)
 {
 	struct device_node *np = dev->ofdev->dev.of_node;
 	struct device_node *phy_handle;
-	int res = 0;
+	int res = 1;
 
 	phy_handle = of_parse_phandle(np, "phy-handle", 0);
 
@@ -2714,13 +2712,24 @@ static int emac_init_phy(struct emac_instance *dev)
 	if (emac_has_feature(dev, EMAC_FTR_HAS_RGMII)) {
 		int res = emac_dt_phy_probe(dev);
 
-		mutex_unlock(&emac_phy_map_lock);
-		if (!res)
+		switch (res) {
+		case 1:
+			/* No phy-handle property configured.
+			 * Continue with the existing phy probe
+			 * and setup code.
+			 */
+			break;
+
+		case 0:
+			mutex_unlock(&emac_phy_map_lock);
 			goto init_phy;
 
-		dev_err(&dev->ofdev->dev, "failed to attach dt phy (%d).\n",
-			res);
-		return res;
+		default:
+			mutex_unlock(&emac_phy_map_lock);
+			dev_err(&dev->ofdev->dev, "failed to attach dt phy (%d).\n",
+				res);
+			return res;
+		}
 	}
 
 	if (dev->phy_address != 0xffffffff)

From aac1561ad98e74f6ea43337f9b9714ab0b1f493e Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 8 Mar 2017 22:17:10 -0800
Subject: [PATCH 0992/1911] net: Revert ksettings conversions.

Those were supposed to go into the net-next tree not
the net tree.  Oops...

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/smsc/smc911x.c           | 51 +++++-----
 drivers/net/ethernet/sun/cassini.c            | 98 +++++++++----------
 drivers/net/ethernet/sun/niu.c                | 37 ++++---
 drivers/net/ethernet/sun/sungem.c             | 98 ++++++++-----------
 drivers/net/ethernet/sun/sunhme.c             | 62 ++++++------
 drivers/net/ethernet/toshiba/ps3_gelic_net.c  | 51 +++++-----
 .../net/ethernet/toshiba/spider_net_ethtool.c | 24 ++---
 7 files changed, 188 insertions(+), 233 deletions(-)

diff --git a/drivers/net/ethernet/smsc/smc911x.c b/drivers/net/ethernet/smsc/smc911x.c
index 36307d34f64181..4f19c6166182e7 100644
--- a/drivers/net/ethernet/smsc/smc911x.c
+++ b/drivers/net/ethernet/smsc/smc911x.c
@@ -1446,40 +1446,40 @@ static int smc911x_close(struct net_device *dev)
  * Ethtool support
  */
 static int
-smc911x_ethtool_get_link_ksettings(struct net_device *dev,
-				   struct ethtool_link_ksettings *cmd)
+smc911x_ethtool_getsettings(struct net_device *dev, struct ethtool_cmd *cmd)
 {
 	struct smc911x_local *lp = netdev_priv(dev);
 	int ret, status;
 	unsigned long flags;
-	u32 supported;
 
 	DBG(SMC_DEBUG_FUNC, dev, "--> %s\n", __func__);
+	cmd->maxtxpkt = 1;
+	cmd->maxrxpkt = 1;
 
 	if (lp->phy_type != 0) {
 		spin_lock_irqsave(&lp->lock, flags);
-		ret = mii_ethtool_get_link_ksettings(&lp->mii, cmd);
+		ret = mii_ethtool_gset(&lp->mii, cmd);
 		spin_unlock_irqrestore(&lp->lock, flags);
 	} else {
-		supported = SUPPORTED_10baseT_Half |
+		cmd->supported = SUPPORTED_10baseT_Half |
 				SUPPORTED_10baseT_Full |
 				SUPPORTED_TP | SUPPORTED_AUI;
 
 		if (lp->ctl_rspeed == 10)
-			cmd->base.speed = SPEED_10;
+			ethtool_cmd_speed_set(cmd, SPEED_10);
 		else if (lp->ctl_rspeed == 100)
-			cmd->base.speed = SPEED_100;
-
-		cmd->base.autoneg = AUTONEG_DISABLE;
-		cmd->base.port = 0;
+			ethtool_cmd_speed_set(cmd, SPEED_100);
+
+		cmd->autoneg = AUTONEG_DISABLE;
+		if (lp->mii.phy_id==1)
+			cmd->transceiver = XCVR_INTERNAL;
+		else
+			cmd->transceiver = XCVR_EXTERNAL;
+		cmd->port = 0;
 		SMC_GET_PHY_SPECIAL(lp, lp->mii.phy_id, status);
-		cmd->base.duplex =
+		cmd->duplex =
 			(status & (PHY_SPECIAL_SPD_10FULL_ | PHY_SPECIAL_SPD_100FULL_)) ?
 				DUPLEX_FULL : DUPLEX_HALF;
-
-		ethtool_convert_legacy_u32_to_link_mode(
-			cmd->link_modes.supported, supported);
-
 		ret = 0;
 	}
 
@@ -1487,8 +1487,7 @@ smc911x_ethtool_get_link_ksettings(struct net_device *dev,
 }
 
 static int
-smc911x_ethtool_set_link_ksettings(struct net_device *dev,
-				   const struct ethtool_link_ksettings *cmd)
+smc911x_ethtool_setsettings(struct net_device *dev, struct ethtool_cmd *cmd)
 {
 	struct smc911x_local *lp = netdev_priv(dev);
 	int ret;
@@ -1496,18 +1495,16 @@ smc911x_ethtool_set_link_ksettings(struct net_device *dev,
 
 	if (lp->phy_type != 0) {
 		spin_lock_irqsave(&lp->lock, flags);
-		ret = mii_ethtool_set_link_ksettings(&lp->mii, cmd);
+		ret = mii_ethtool_sset(&lp->mii, cmd);
 		spin_unlock_irqrestore(&lp->lock, flags);
 	} else {
-		if (cmd->base.autoneg != AUTONEG_DISABLE ||
-		    cmd->base.speed != SPEED_10 ||
-		    (cmd->base.duplex != DUPLEX_HALF &&
-		     cmd->base.duplex != DUPLEX_FULL) ||
-		    (cmd->base.port != PORT_TP &&
-		     cmd->base.port != PORT_AUI))
+		if (cmd->autoneg != AUTONEG_DISABLE ||
+			cmd->speed != SPEED_10 ||
+			(cmd->duplex != DUPLEX_HALF && cmd->duplex != DUPLEX_FULL) ||
+			(cmd->port != PORT_TP && cmd->port != PORT_AUI))
 			return -EINVAL;
 
-		lp->ctl_rfduplx = cmd->base.duplex == DUPLEX_FULL;
+		lp->ctl_rfduplx = cmd->duplex == DUPLEX_FULL;
 
 		ret = 0;
 	}
@@ -1689,6 +1686,8 @@ static int smc911x_ethtool_geteeprom_len(struct net_device *dev)
 }
 
 static const struct ethtool_ops smc911x_ethtool_ops = {
+	.get_settings	 = smc911x_ethtool_getsettings,
+	.set_settings	 = smc911x_ethtool_setsettings,
 	.get_drvinfo	 = smc911x_ethtool_getdrvinfo,
 	.get_msglevel	 = smc911x_ethtool_getmsglevel,
 	.set_msglevel	 = smc911x_ethtool_setmsglevel,
@@ -1699,8 +1698,6 @@ static const struct ethtool_ops smc911x_ethtool_ops = {
 	.get_eeprom_len = smc911x_ethtool_geteeprom_len,
 	.get_eeprom = smc911x_ethtool_geteeprom,
 	.set_eeprom = smc911x_ethtool_seteeprom,
-	.get_link_ksettings	 = smc911x_ethtool_get_link_ksettings,
-	.set_link_ksettings	 = smc911x_ethtool_set_link_ksettings,
 };
 
 /*
diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c
index 382993c1561c5c..0e8e89f17dbb11 100644
--- a/drivers/net/ethernet/sun/cassini.c
+++ b/drivers/net/ethernet/sun/cassini.c
@@ -691,8 +691,7 @@ static void cas_mif_poll(struct cas *cp, const int enable)
 }
 
 /* Must be invoked under cp->lock */
-static void cas_begin_auto_negotiation(struct cas *cp,
-				       const struct ethtool_link_ksettings *ep)
+static void cas_begin_auto_negotiation(struct cas *cp, struct ethtool_cmd *ep)
 {
 	u16 ctl;
 #if 1
@@ -705,16 +704,16 @@ static void cas_begin_auto_negotiation(struct cas *cp,
 	if (!ep)
 		goto start_aneg;
 	lcntl = cp->link_cntl;
-	if (ep->base.autoneg == AUTONEG_ENABLE) {
+	if (ep->autoneg == AUTONEG_ENABLE)
 		cp->link_cntl = BMCR_ANENABLE;
-	} else {
-		u32 speed = ep->base.speed;
+	else {
+		u32 speed = ethtool_cmd_speed(ep);
 		cp->link_cntl = 0;
 		if (speed == SPEED_100)
 			cp->link_cntl |= BMCR_SPEED100;
 		else if (speed == SPEED_1000)
 			cp->link_cntl |= CAS_BMCR_SPEED1000;
-		if (ep->base.duplex == DUPLEX_FULL)
+		if (ep->duplex == DUPLEX_FULL)
 			cp->link_cntl |= BMCR_FULLDPLX;
 	}
 #if 1
@@ -4529,21 +4528,19 @@ static void cas_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info
 	strlcpy(info->bus_info, pci_name(cp->pdev), sizeof(info->bus_info));
 }
 
-static int cas_get_link_ksettings(struct net_device *dev,
-				  struct ethtool_link_ksettings *cmd)
+static int cas_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 {
 	struct cas *cp = netdev_priv(dev);
 	u16 bmcr;
 	int full_duplex, speed, pause;
 	unsigned long flags;
 	enum link_state linkstate = link_up;
-	u32 supported, advertising;
 
-	advertising = 0;
-	supported = SUPPORTED_Autoneg;
+	cmd->advertising = 0;
+	cmd->supported = SUPPORTED_Autoneg;
 	if (cp->cas_flags & CAS_FLAG_1000MB_CAP) {
-		supported |= SUPPORTED_1000baseT_Full;
-		advertising |= ADVERTISED_1000baseT_Full;
+		cmd->supported |= SUPPORTED_1000baseT_Full;
+		cmd->advertising |= ADVERTISED_1000baseT_Full;
 	}
 
 	/* Record PHY settings if HW is on. */
@@ -4551,15 +4548,17 @@ static int cas_get_link_ksettings(struct net_device *dev,
 	bmcr = 0;
 	linkstate = cp->lstate;
 	if (CAS_PHY_MII(cp->phy_type)) {
-		cmd->base.port = PORT_MII;
-		cmd->base.phy_address = cp->phy_addr;
-		advertising |= ADVERTISED_TP | ADVERTISED_MII |
+		cmd->port = PORT_MII;
+		cmd->transceiver = (cp->cas_flags & CAS_FLAG_SATURN) ?
+			XCVR_INTERNAL : XCVR_EXTERNAL;
+		cmd->phy_address = cp->phy_addr;
+		cmd->advertising |= ADVERTISED_TP | ADVERTISED_MII |
 			ADVERTISED_10baseT_Half |
 			ADVERTISED_10baseT_Full |
 			ADVERTISED_100baseT_Half |
 			ADVERTISED_100baseT_Full;
 
-		supported |=
+		cmd->supported |=
 			(SUPPORTED_10baseT_Half |
 			 SUPPORTED_10baseT_Full |
 			 SUPPORTED_100baseT_Half |
@@ -4575,10 +4574,11 @@ static int cas_get_link_ksettings(struct net_device *dev,
 		}
 
 	} else {
-		cmd->base.port = PORT_FIBRE;
-		cmd->base.phy_address = 0;
-		supported   |= SUPPORTED_FIBRE;
-		advertising |= ADVERTISED_FIBRE;
+		cmd->port = PORT_FIBRE;
+		cmd->transceiver = XCVR_INTERNAL;
+		cmd->phy_address = 0;
+		cmd->supported   |= SUPPORTED_FIBRE;
+		cmd->advertising |= ADVERTISED_FIBRE;
 
 		if (cp->hw_running) {
 			/* pcs uses the same bits as mii */
@@ -4590,20 +4590,21 @@ static int cas_get_link_ksettings(struct net_device *dev,
 	spin_unlock_irqrestore(&cp->lock, flags);
 
 	if (bmcr & BMCR_ANENABLE) {
-		advertising |= ADVERTISED_Autoneg;
-		cmd->base.autoneg = AUTONEG_ENABLE;
-		cmd->base.speed =  ((speed == 10) ?
+		cmd->advertising |= ADVERTISED_Autoneg;
+		cmd->autoneg = AUTONEG_ENABLE;
+		ethtool_cmd_speed_set(cmd, ((speed == 10) ?
 					    SPEED_10 :
 					    ((speed == 1000) ?
-					     SPEED_1000 : SPEED_100));
-		cmd->base.duplex = full_duplex ? DUPLEX_FULL : DUPLEX_HALF;
+					     SPEED_1000 : SPEED_100)));
+		cmd->duplex = full_duplex ? DUPLEX_FULL : DUPLEX_HALF;
 	} else {
-		cmd->base.autoneg = AUTONEG_DISABLE;
-		cmd->base.speed = ((bmcr & CAS_BMCR_SPEED1000) ?
+		cmd->autoneg = AUTONEG_DISABLE;
+		ethtool_cmd_speed_set(cmd, ((bmcr & CAS_BMCR_SPEED1000) ?
 					    SPEED_1000 :
 					    ((bmcr & BMCR_SPEED100) ?
-					     SPEED_100 : SPEED_10));
-		cmd->base.duplex = (bmcr & BMCR_FULLDPLX) ?
+					     SPEED_100 : SPEED_10)));
+		cmd->duplex =
+			(bmcr & BMCR_FULLDPLX) ?
 			DUPLEX_FULL : DUPLEX_HALF;
 	}
 	if (linkstate != link_up) {
@@ -4618,46 +4619,39 @@ static int cas_get_link_ksettings(struct net_device *dev,
 		 * settings that we configured.
 		 */
 		if (cp->link_cntl & BMCR_ANENABLE) {
-			cmd->base.speed = 0;
-			cmd->base.duplex = 0xff;
+			ethtool_cmd_speed_set(cmd, 0);
+			cmd->duplex = 0xff;
 		} else {
-			cmd->base.speed = SPEED_10;
+			ethtool_cmd_speed_set(cmd, SPEED_10);
 			if (cp->link_cntl & BMCR_SPEED100) {
-				cmd->base.speed = SPEED_100;
+				ethtool_cmd_speed_set(cmd, SPEED_100);
 			} else if (cp->link_cntl & CAS_BMCR_SPEED1000) {
-				cmd->base.speed = SPEED_1000;
+				ethtool_cmd_speed_set(cmd, SPEED_1000);
 			}
-			cmd->base.duplex = (cp->link_cntl & BMCR_FULLDPLX) ?
+			cmd->duplex = (cp->link_cntl & BMCR_FULLDPLX)?
 				DUPLEX_FULL : DUPLEX_HALF;
 		}
 	}
-
-	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
-						supported);
-	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
-						advertising);
-
 	return 0;
 }
 
-static int cas_set_link_ksettings(struct net_device *dev,
-				  const struct ethtool_link_ksettings *cmd)
+static int cas_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 {
 	struct cas *cp = netdev_priv(dev);
 	unsigned long flags;
-	u32 speed = cmd->base.speed;
+	u32 speed = ethtool_cmd_speed(cmd);
 
 	/* Verify the settings we care about. */
-	if (cmd->base.autoneg != AUTONEG_ENABLE &&
-	    cmd->base.autoneg != AUTONEG_DISABLE)
+	if (cmd->autoneg != AUTONEG_ENABLE &&
+	    cmd->autoneg != AUTONEG_DISABLE)
 		return -EINVAL;
 
-	if (cmd->base.autoneg == AUTONEG_DISABLE &&
+	if (cmd->autoneg == AUTONEG_DISABLE &&
 	    ((speed != SPEED_1000 &&
 	      speed != SPEED_100 &&
 	      speed != SPEED_10) ||
-	     (cmd->base.duplex != DUPLEX_HALF &&
-	      cmd->base.duplex != DUPLEX_FULL)))
+	     (cmd->duplex != DUPLEX_HALF &&
+	      cmd->duplex != DUPLEX_FULL)))
 		return -EINVAL;
 
 	/* Apply settings and restart link process. */
@@ -4759,6 +4753,8 @@ static void cas_get_ethtool_stats(struct net_device *dev,
 
 static const struct ethtool_ops cas_ethtool_ops = {
 	.get_drvinfo		= cas_get_drvinfo,
+	.get_settings		= cas_get_settings,
+	.set_settings		= cas_set_settings,
 	.nway_reset		= cas_nway_reset,
 	.get_link		= cas_get_link,
 	.get_msglevel		= cas_get_msglevel,
@@ -4768,8 +4764,6 @@ static const struct ethtool_ops cas_ethtool_ops = {
 	.get_sset_count		= cas_get_sset_count,
 	.get_strings		= cas_get_strings,
 	.get_ethtool_stats	= cas_get_ethtool_stats,
-	.get_link_ksettings	= cas_get_link_ksettings,
-	.set_link_ksettings	= cas_set_link_ksettings,
 };
 
 static int cas_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c
index 2dcca249eb9c73..57978056b3366f 100644
--- a/drivers/net/ethernet/sun/niu.c
+++ b/drivers/net/ethernet/sun/niu.c
@@ -6813,8 +6813,7 @@ static void niu_get_drvinfo(struct net_device *dev,
 			sizeof(info->bus_info));
 }
 
-static int niu_get_link_ksettings(struct net_device *dev,
-				  struct ethtool_link_ksettings *cmd)
+static int niu_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 {
 	struct niu *np = netdev_priv(dev);
 	struct niu_link_config *lp;
@@ -6822,30 +6821,28 @@ static int niu_get_link_ksettings(struct net_device *dev,
 	lp = &np->link_config;
 
 	memset(cmd, 0, sizeof(*cmd));
-	cmd->base.phy_address = np->phy_addr;
-	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
-						lp->supported);
-	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
-						lp->active_advertising);
-	cmd->base.autoneg = lp->active_autoneg;
-	cmd->base.speed = lp->active_speed;
-	cmd->base.duplex = lp->active_duplex;
-	cmd->base.port = (np->flags & NIU_FLAGS_FIBER) ? PORT_FIBRE : PORT_TP;
+	cmd->phy_address = np->phy_addr;
+	cmd->supported = lp->supported;
+	cmd->advertising = lp->active_advertising;
+	cmd->autoneg = lp->active_autoneg;
+	ethtool_cmd_speed_set(cmd, lp->active_speed);
+	cmd->duplex = lp->active_duplex;
+	cmd->port = (np->flags & NIU_FLAGS_FIBER) ? PORT_FIBRE : PORT_TP;
+	cmd->transceiver = (np->flags & NIU_FLAGS_XCVR_SERDES) ?
+		XCVR_EXTERNAL : XCVR_INTERNAL;
 
 	return 0;
 }
 
-static int niu_set_link_ksettings(struct net_device *dev,
-				  const struct ethtool_link_ksettings *cmd)
+static int niu_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 {
 	struct niu *np = netdev_priv(dev);
 	struct niu_link_config *lp = &np->link_config;
 
-	ethtool_convert_link_mode_to_legacy_u32(&lp->advertising,
-						cmd->link_modes.advertising);
-	lp->speed = cmd->base.speed;
-	lp->duplex = cmd->base.duplex;
-	lp->autoneg = cmd->base.autoneg;
+	lp->advertising = cmd->advertising;
+	lp->speed = ethtool_cmd_speed(cmd);
+	lp->duplex = cmd->duplex;
+	lp->autoneg = cmd->autoneg;
 	return niu_init_link(np);
 }
 
@@ -7905,14 +7902,14 @@ static const struct ethtool_ops niu_ethtool_ops = {
 	.nway_reset		= niu_nway_reset,
 	.get_eeprom_len		= niu_get_eeprom_len,
 	.get_eeprom		= niu_get_eeprom,
+	.get_settings		= niu_get_settings,
+	.set_settings		= niu_set_settings,
 	.get_strings		= niu_get_strings,
 	.get_sset_count		= niu_get_sset_count,
 	.get_ethtool_stats	= niu_get_ethtool_stats,
 	.set_phys_id		= niu_set_phys_id,
 	.get_rxnfc		= niu_get_nfc,
 	.set_rxnfc		= niu_set_nfc,
-	.get_link_ksettings	= niu_get_link_ksettings,
-	.set_link_ksettings	= niu_set_link_ksettings,
 };
 
 static int niu_ldg_assign_ldn(struct niu *np, struct niu_parent *parent,
diff --git a/drivers/net/ethernet/sun/sungem.c b/drivers/net/ethernet/sun/sungem.c
index dbfca04667603a..5c5952e782cd22 100644
--- a/drivers/net/ethernet/sun/sungem.c
+++ b/drivers/net/ethernet/sun/sungem.c
@@ -1250,17 +1250,12 @@ static void gem_stop_dma(struct gem *gp)
 
 
 // XXX dbl check what that function should do when called on PCS PHY
-static void gem_begin_auto_negotiation(struct gem *gp,
-				       const struct ethtool_link_ksettings *ep)
+static void gem_begin_auto_negotiation(struct gem *gp, struct ethtool_cmd *ep)
 {
 	u32 advertise, features;
 	int autoneg;
 	int speed;
 	int duplex;
-	u32 advertising;
-
-	ethtool_convert_link_mode_to_legacy_u32(&advertising,
-						ep->link_modes.advertising);
 
 	if (gp->phy_type != phy_mii_mdio0 &&
      	    gp->phy_type != phy_mii_mdio1)
@@ -1283,13 +1278,13 @@ static void gem_begin_auto_negotiation(struct gem *gp,
 	/* Setup link parameters */
 	if (!ep)
 		goto start_aneg;
-	if (ep->base.autoneg == AUTONEG_ENABLE) {
-		advertise = advertising;
+	if (ep->autoneg == AUTONEG_ENABLE) {
+		advertise = ep->advertising;
 		autoneg = 1;
 	} else {
 		autoneg = 0;
-		speed = ep->base.speed;
-		duplex = ep->base.duplex;
+		speed = ethtool_cmd_speed(ep);
+		duplex = ep->duplex;
 	}
 
 start_aneg:
@@ -2520,96 +2515,85 @@ static void gem_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info
 	strlcpy(info->bus_info, pci_name(gp->pdev), sizeof(info->bus_info));
 }
 
-static int gem_get_link_ksettings(struct net_device *dev,
-				  struct ethtool_link_ksettings *cmd)
+static int gem_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 {
 	struct gem *gp = netdev_priv(dev);
-	u32 supported, advertising;
 
 	if (gp->phy_type == phy_mii_mdio0 ||
 	    gp->phy_type == phy_mii_mdio1) {
 		if (gp->phy_mii.def)
-			supported = gp->phy_mii.def->features;
+			cmd->supported = gp->phy_mii.def->features;
 		else
-			supported = (SUPPORTED_10baseT_Half |
+			cmd->supported = (SUPPORTED_10baseT_Half |
 					  SUPPORTED_10baseT_Full);
 
 		/* XXX hardcoded stuff for now */
-		cmd->base.port = PORT_MII;
-		cmd->base.phy_address = 0; /* XXX fixed PHYAD */
+		cmd->port = PORT_MII;
+		cmd->transceiver = XCVR_EXTERNAL;
+		cmd->phy_address = 0; /* XXX fixed PHYAD */
 
 		/* Return current PHY settings */
-		cmd->base.autoneg = gp->want_autoneg;
-		cmd->base.speed = gp->phy_mii.speed;
-		cmd->base.duplex = gp->phy_mii.duplex;
-		advertising = gp->phy_mii.advertising;
+		cmd->autoneg = gp->want_autoneg;
+		ethtool_cmd_speed_set(cmd, gp->phy_mii.speed);
+		cmd->duplex = gp->phy_mii.duplex;
+		cmd->advertising = gp->phy_mii.advertising;
 
 		/* If we started with a forced mode, we don't have a default
 		 * advertise set, we need to return something sensible so
 		 * userland can re-enable autoneg properly.
 		 */
-		if (advertising == 0)
-			advertising = supported;
+		if (cmd->advertising == 0)
+			cmd->advertising = cmd->supported;
 	} else { // XXX PCS ?
-		supported =
+		cmd->supported =
 			(SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full |
 			 SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full |
 			 SUPPORTED_Autoneg);
-		advertising = supported;
-		cmd->base.speed = 0;
-		cmd->base.duplex = 0;
-		cmd->base.port = 0;
-		cmd->base.phy_address = 0;
-		cmd->base.autoneg = 0;
+		cmd->advertising = cmd->supported;
+		ethtool_cmd_speed_set(cmd, 0);
+		cmd->duplex = cmd->port = cmd->phy_address =
+			cmd->transceiver = cmd->autoneg = 0;
 
 		/* serdes means usually a Fibre connector, with most fixed */
 		if (gp->phy_type == phy_serdes) {
-			cmd->base.port = PORT_FIBRE;
-			supported = (SUPPORTED_1000baseT_Half |
+			cmd->port = PORT_FIBRE;
+			cmd->supported = (SUPPORTED_1000baseT_Half |
 				SUPPORTED_1000baseT_Full |
 				SUPPORTED_FIBRE | SUPPORTED_Autoneg |
 				SUPPORTED_Pause | SUPPORTED_Asym_Pause);
-			advertising = supported;
+			cmd->advertising = cmd->supported;
+			cmd->transceiver = XCVR_INTERNAL;
 			if (gp->lstate == link_up)
-				cmd->base.speed = SPEED_1000;
-			cmd->base.duplex = DUPLEX_FULL;
-			cmd->base.autoneg = 1;
+				ethtool_cmd_speed_set(cmd, SPEED_1000);
+			cmd->duplex = DUPLEX_FULL;
+			cmd->autoneg = 1;
 		}
 	}
-
-	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
-						supported);
-	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
-						advertising);
+	cmd->maxtxpkt = cmd->maxrxpkt = 0;
 
 	return 0;
 }
 
-static int gem_set_link_ksettings(struct net_device *dev,
-				  const struct ethtool_link_ksettings *cmd)
+static int gem_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 {
 	struct gem *gp = netdev_priv(dev);
-	u32 speed = cmd->base.speed;
-	u32 advertising;
-
-	ethtool_convert_link_mode_to_legacy_u32(&advertising,
-						cmd->link_modes.advertising);
+	u32 speed = ethtool_cmd_speed(cmd);
 
 	/* Verify the settings we care about. */
-	if (cmd->base.autoneg != AUTONEG_ENABLE &&
-	    cmd->base.autoneg != AUTONEG_DISABLE)
+	if (cmd->autoneg != AUTONEG_ENABLE &&
+	    cmd->autoneg != AUTONEG_DISABLE)
 		return -EINVAL;
 
-	if (cmd->base.autoneg == AUTONEG_ENABLE &&
-	    advertising == 0)
+	if (cmd->autoneg == AUTONEG_ENABLE &&
+	    cmd->advertising == 0)
 		return -EINVAL;
 
-	if (cmd->base.autoneg == AUTONEG_DISABLE &&
+	if (cmd->autoneg == AUTONEG_DISABLE &&
 	    ((speed != SPEED_1000 &&
 	      speed != SPEED_100 &&
 	      speed != SPEED_10) ||
-	     (cmd->base.duplex != DUPLEX_HALF &&
-	      cmd->base.duplex != DUPLEX_FULL)))
+	     (cmd->duplex != DUPLEX_HALF &&
+	      cmd->duplex != DUPLEX_FULL)))
 		return -EINVAL;
 
 	/* Apply settings and restart link process. */
@@ -2682,13 +2666,13 @@ static int gem_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 static const struct ethtool_ops gem_ethtool_ops = {
 	.get_drvinfo		= gem_get_drvinfo,
 	.get_link		= ethtool_op_get_link,
+	.get_settings		= gem_get_settings,
+	.set_settings		= gem_set_settings,
 	.nway_reset		= gem_nway_reset,
 	.get_msglevel		= gem_get_msglevel,
 	.set_msglevel		= gem_set_msglevel,
 	.get_wol		= gem_get_wol,
 	.set_wol		= gem_set_wol,
-	.get_link_ksettings	= gem_get_link_ksettings,
-	.set_link_ksettings	= gem_set_link_ksettings,
 };
 
 static int gem_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
diff --git a/drivers/net/ethernet/sun/sunhme.c b/drivers/net/ethernet/sun/sunhme.c
index 53ff66ef53acfe..72ff05cd3ed80c 100644
--- a/drivers/net/ethernet/sun/sunhme.c
+++ b/drivers/net/ethernet/sun/sunhme.c
@@ -1294,10 +1294,9 @@ static void happy_meal_init_rings(struct happy_meal *hp)
 }
 
 /* hp->happy_lock must be held */
-static void
-happy_meal_begin_auto_negotiation(struct happy_meal *hp,
-				  void __iomem *tregs,
-				  const struct ethtool_link_ksettings *ep)
+static void happy_meal_begin_auto_negotiation(struct happy_meal *hp,
+					      void __iomem *tregs,
+					      struct ethtool_cmd *ep)
 {
 	int timeout;
 
@@ -1310,7 +1309,7 @@ happy_meal_begin_auto_negotiation(struct happy_meal *hp,
 	/* XXX Check BMSR_ANEGCAPABLE, should not be necessary though. */
 
 	hp->sw_advertise = happy_meal_tcvr_read(hp, tregs, MII_ADVERTISE);
-	if (!ep || ep->base.autoneg == AUTONEG_ENABLE) {
+	if (ep == NULL || ep->autoneg == AUTONEG_ENABLE) {
 		/* Advertise everything we can support. */
 		if (hp->sw_bmsr & BMSR_10HALF)
 			hp->sw_advertise |= (ADVERTISE_10HALF);
@@ -1385,14 +1384,14 @@ happy_meal_begin_auto_negotiation(struct happy_meal *hp,
 		/* Disable auto-negotiation in BMCR, enable the duplex and
 		 * speed setting, init the timer state machine, and fire it off.
 		 */
-		if (!ep || ep->base.autoneg == AUTONEG_ENABLE) {
+		if (ep == NULL || ep->autoneg == AUTONEG_ENABLE) {
 			hp->sw_bmcr = BMCR_SPEED100;
 		} else {
-			if (ep->base.speed == SPEED_100)
+			if (ethtool_cmd_speed(ep) == SPEED_100)
 				hp->sw_bmcr = BMCR_SPEED100;
 			else
 				hp->sw_bmcr = 0;
-			if (ep->base.duplex == DUPLEX_FULL)
+			if (ep->duplex == DUPLEX_FULL)
 				hp->sw_bmcr |= BMCR_FULLDPLX;
 		}
 		happy_meal_tcvr_write(hp, tregs, MII_BMCR, hp->sw_bmcr);
@@ -2435,21 +2434,20 @@ static void happy_meal_set_multicast(struct net_device *dev)
 }
 
 /* Ethtool support... */
-static int hme_get_link_ksettings(struct net_device *dev,
-				  struct ethtool_link_ksettings *cmd)
+static int hme_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 {
 	struct happy_meal *hp = netdev_priv(dev);
 	u32 speed;
-	u32 supported;
 
-	supported =
+	cmd->supported =
 		(SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full |
 		 SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full |
 		 SUPPORTED_Autoneg | SUPPORTED_TP | SUPPORTED_MII);
 
 	/* XXX hardcoded stuff for now */
-	cmd->base.port = PORT_TP; /* XXX no MII support */
-	cmd->base.phy_address = 0; /* XXX fixed PHYAD */
+	cmd->port = PORT_TP; /* XXX no MII support */
+	cmd->transceiver = XCVR_INTERNAL; /* XXX no external xcvr support */
+	cmd->phy_address = 0; /* XXX fixed PHYAD */
 
 	/* Record PHY settings. */
 	spin_lock_irq(&hp->happy_lock);
@@ -2458,45 +2456,41 @@ static int hme_get_link_ksettings(struct net_device *dev,
 	spin_unlock_irq(&hp->happy_lock);
 
 	if (hp->sw_bmcr & BMCR_ANENABLE) {
-		cmd->base.autoneg = AUTONEG_ENABLE;
+		cmd->autoneg = AUTONEG_ENABLE;
 		speed = ((hp->sw_lpa & (LPA_100HALF | LPA_100FULL)) ?
 			 SPEED_100 : SPEED_10);
 		if (speed == SPEED_100)
-			cmd->base.duplex =
+			cmd->duplex =
 				(hp->sw_lpa & (LPA_100FULL)) ?
 				DUPLEX_FULL : DUPLEX_HALF;
 		else
-			cmd->base.duplex =
+			cmd->duplex =
 				(hp->sw_lpa & (LPA_10FULL)) ?
 				DUPLEX_FULL : DUPLEX_HALF;
 	} else {
-		cmd->base.autoneg = AUTONEG_DISABLE;
+		cmd->autoneg = AUTONEG_DISABLE;
 		speed = (hp->sw_bmcr & BMCR_SPEED100) ? SPEED_100 : SPEED_10;
-		cmd->base.duplex =
+		cmd->duplex =
 			(hp->sw_bmcr & BMCR_FULLDPLX) ?
 			DUPLEX_FULL : DUPLEX_HALF;
 	}
-	cmd->base.speed = speed;
-	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
-						supported);
-
+	ethtool_cmd_speed_set(cmd, speed);
 	return 0;
 }
 
-static int hme_set_link_ksettings(struct net_device *dev,
-				  const struct ethtool_link_ksettings *cmd)
+static int hme_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 {
 	struct happy_meal *hp = netdev_priv(dev);
 
 	/* Verify the settings we care about. */
-	if (cmd->base.autoneg != AUTONEG_ENABLE &&
-	    cmd->base.autoneg != AUTONEG_DISABLE)
+	if (cmd->autoneg != AUTONEG_ENABLE &&
+	    cmd->autoneg != AUTONEG_DISABLE)
 		return -EINVAL;
-	if (cmd->base.autoneg == AUTONEG_DISABLE &&
-	    ((cmd->base.speed != SPEED_100 &&
-	      cmd->base.speed != SPEED_10) ||
-	     (cmd->base.duplex != DUPLEX_HALF &&
-	      cmd->base.duplex != DUPLEX_FULL)))
+	if (cmd->autoneg == AUTONEG_DISABLE &&
+	    ((ethtool_cmd_speed(cmd) != SPEED_100 &&
+	      ethtool_cmd_speed(cmd) != SPEED_10) ||
+	     (cmd->duplex != DUPLEX_HALF &&
+	      cmd->duplex != DUPLEX_FULL)))
 		return -EINVAL;
 
 	/* Ok, do it to it. */
@@ -2543,10 +2537,10 @@ static u32 hme_get_link(struct net_device *dev)
 }
 
 static const struct ethtool_ops hme_ethtool_ops = {
+	.get_settings		= hme_get_settings,
+	.set_settings		= hme_set_settings,
 	.get_drvinfo		= hme_get_drvinfo,
 	.get_link		= hme_get_link,
-	.get_link_ksettings	= hme_get_link_ksettings,
-	.set_link_ksettings	= hme_set_link_ksettings,
 };
 
 static int hme_version_printed;
diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.c b/drivers/net/ethernet/toshiba/ps3_gelic_net.c
index fa6a06571187ed..72013314bba81f 100644
--- a/drivers/net/ethernet/toshiba/ps3_gelic_net.c
+++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.c
@@ -1206,68 +1206,61 @@ void gelic_net_get_drvinfo(struct net_device *netdev,
 	strlcpy(info->version, DRV_VERSION, sizeof(info->version));
 }
 
-static int gelic_ether_get_link_ksettings(struct net_device *netdev,
-					  struct ethtool_link_ksettings *cmd)
+static int gelic_ether_get_settings(struct net_device *netdev,
+				    struct ethtool_cmd *cmd)
 {
 	struct gelic_card *card = netdev_card(netdev);
-	u32 supported, advertising;
 
 	gelic_card_get_ether_port_status(card, 0);
 
 	if (card->ether_port_status & GELIC_LV1_ETHER_FULL_DUPLEX)
-		cmd->base.duplex = DUPLEX_FULL;
+		cmd->duplex = DUPLEX_FULL;
 	else
-		cmd->base.duplex = DUPLEX_HALF;
+		cmd->duplex = DUPLEX_HALF;
 
 	switch (card->ether_port_status & GELIC_LV1_ETHER_SPEED_MASK) {
 	case GELIC_LV1_ETHER_SPEED_10:
-		cmd->base.speed = SPEED_10;
+		ethtool_cmd_speed_set(cmd, SPEED_10);
 		break;
 	case GELIC_LV1_ETHER_SPEED_100:
-		cmd->base.speed = SPEED_100;
+		ethtool_cmd_speed_set(cmd, SPEED_100);
 		break;
 	case GELIC_LV1_ETHER_SPEED_1000:
-		cmd->base.speed = SPEED_1000;
+		ethtool_cmd_speed_set(cmd, SPEED_1000);
 		break;
 	default:
 		pr_info("%s: speed unknown\n", __func__);
-		cmd->base.speed = SPEED_10;
+		ethtool_cmd_speed_set(cmd, SPEED_10);
 		break;
 	}
 
-	supported = SUPPORTED_TP | SUPPORTED_Autoneg |
+	cmd->supported = SUPPORTED_TP | SUPPORTED_Autoneg |
 			SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full |
 			SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full |
 			SUPPORTED_1000baseT_Full;
-	advertising = supported;
+	cmd->advertising = cmd->supported;
 	if (card->link_mode & GELIC_LV1_ETHER_AUTO_NEG) {
-		cmd->base.autoneg = AUTONEG_ENABLE;
+		cmd->autoneg = AUTONEG_ENABLE;
 	} else {
-		cmd->base.autoneg = AUTONEG_DISABLE;
-		advertising &= ~ADVERTISED_Autoneg;
+		cmd->autoneg = AUTONEG_DISABLE;
+		cmd->advertising &= ~ADVERTISED_Autoneg;
 	}
-	cmd->base.port = PORT_TP;
-
-	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
-						supported);
-	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
-						advertising);
+	cmd->port = PORT_TP;
 
 	return 0;
 }
 
-static int
-gelic_ether_set_link_ksettings(struct net_device *netdev,
-			       const struct ethtool_link_ksettings *cmd)
+static int gelic_ether_set_settings(struct net_device *netdev,
+				    struct ethtool_cmd *cmd)
 {
 	struct gelic_card *card = netdev_card(netdev);
 	u64 mode;
 	int ret;
 
-	if (cmd->base.autoneg == AUTONEG_ENABLE) {
+	if (cmd->autoneg == AUTONEG_ENABLE) {
 		mode = GELIC_LV1_ETHER_AUTO_NEG;
 	} else {
-		switch (cmd->base.speed) {
+		switch (cmd->speed) {
 		case SPEED_10:
 			mode = GELIC_LV1_ETHER_SPEED_10;
 			break;
@@ -1280,9 +1273,9 @@ gelic_ether_set_link_ksettings(struct net_device *netdev,
 		default:
 			return -EINVAL;
 		}
-		if (cmd->base.duplex == DUPLEX_FULL) {
+		if (cmd->duplex == DUPLEX_FULL)
 			mode |= GELIC_LV1_ETHER_FULL_DUPLEX;
-		} else if (cmd->base.speed == SPEED_1000) {
+		else if (cmd->speed == SPEED_1000) {
 			pr_info("1000 half duplex is not supported.\n");
 			return -EINVAL;
 		}
@@ -1377,11 +1370,11 @@ static int gelic_net_set_wol(struct net_device *netdev,
 
 static const struct ethtool_ops gelic_ether_ethtool_ops = {
 	.get_drvinfo	= gelic_net_get_drvinfo,
+	.get_settings	= gelic_ether_get_settings,
+	.set_settings	= gelic_ether_set_settings,
 	.get_link	= ethtool_op_get_link,
 	.get_wol	= gelic_net_get_wol,
 	.set_wol	= gelic_net_set_wol,
-	.get_link_ksettings = gelic_ether_get_link_ksettings,
-	.set_link_ksettings = gelic_ether_set_link_ksettings,
 };
 
 /**
diff --git a/drivers/net/ethernet/toshiba/spider_net_ethtool.c b/drivers/net/ethernet/toshiba/spider_net_ethtool.c
index 16bd036d06820e..ffe519382e111a 100644
--- a/drivers/net/ethernet/toshiba/spider_net_ethtool.c
+++ b/drivers/net/ethernet/toshiba/spider_net_ethtool.c
@@ -47,23 +47,19 @@ static struct {
 };
 
 static int
-spider_net_ethtool_get_link_ksettings(struct net_device *netdev,
-				      struct ethtool_link_ksettings *cmd)
+spider_net_ethtool_get_settings(struct net_device *netdev,
+			       struct ethtool_cmd *cmd)
 {
 	struct spider_net_card *card;
 	card = netdev_priv(netdev);
 
-	ethtool_link_ksettings_zero_link_mode(cmd, supported);
-	ethtool_link_ksettings_add_link_mode(cmd, supported, 1000baseT_Full);
-	ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE);
-
-	ethtool_link_ksettings_zero_link_mode(cmd, advertising);
-	ethtool_link_ksettings_add_link_mode(cmd, advertising, 1000baseT_Full);
-	ethtool_link_ksettings_add_link_mode(cmd, advertising, FIBRE);
-
-	cmd->base.port = PORT_FIBRE;
-	cmd->base.speed = card->phy.speed;
-	cmd->base.duplex = DUPLEX_FULL;
+	cmd->supported   = (SUPPORTED_1000baseT_Full |
+			     SUPPORTED_FIBRE);
+	cmd->advertising = (ADVERTISED_1000baseT_Full |
+			     ADVERTISED_FIBRE);
+	cmd->port = PORT_FIBRE;
+	ethtool_cmd_speed_set(cmd, card->phy.speed);
+	cmd->duplex = DUPLEX_FULL;
 
 	return 0;
 }
@@ -170,6 +166,7 @@ static void spider_net_get_strings(struct net_device *netdev, u32 stringset,
 }
 
 const struct ethtool_ops spider_net_ethtool_ops = {
+	.get_settings		= spider_net_ethtool_get_settings,
 	.get_drvinfo		= spider_net_ethtool_get_drvinfo,
 	.get_wol		= spider_net_ethtool_get_wol,
 	.get_msglevel		= spider_net_ethtool_get_msglevel,
@@ -180,6 +177,5 @@ const struct ethtool_ops spider_net_ethtool_ops = {
 	.get_strings		= spider_net_get_strings,
 	.get_sset_count		= spider_net_get_sset_count,
 	.get_ethtool_stats	= spider_net_get_ethtool_stats,
-	.get_link_ksettings	= spider_net_ethtool_get_link_ksettings,
 };
 

From 3331aa378e9bcbd0d16de9034b0c20f4050e26b4 Mon Sep 17 00:00:00 2001
From: Jarod Wilson <jarod@redhat.com>
Date: Mon, 6 Mar 2017 08:48:58 -0500
Subject: [PATCH 0993/1911] team: use ETH_MAX_MTU as max mtu

This restores the ability to set a team device's mtu to anything higher
than 1500. Similar to the reported issue with bonding, the team driver
calls ether_setup(), which sets an initial max_mtu of 1500, while the
underlying hardware can handle something much larger. Just set it to
ETH_MAX_MTU to support all possible values, and the limitations of the
underlying devices will prevent setting anything too large.

Fixes: 91572088e3fd ("net: use core MTU range checking in core net infra")
CC: Cong Wang <xiyou.wangcong@gmail.com>
CC: Jiri Pirko <jiri@resnulli.us>
CC: netdev@vger.kernel.org
Signed-off-by: Jarod Wilson <jarod@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/team/team.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index 4a24b5d15f5a5d..1b52520715aec6 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -2072,6 +2072,7 @@ static int team_dev_type_check_change(struct net_device *dev,
 static void team_setup(struct net_device *dev)
 {
 	ether_setup(dev);
+	dev->max_mtu = ETH_MAX_MTU;
 
 	dev->netdev_ops = &team_netdev_ops;
 	dev->ethtool_ops = &team_ethtool_ops;

From f7887d40e541f74402df0684a1463c0a0bb68c68 Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Mon, 6 Mar 2017 08:53:04 -0800
Subject: [PATCH 0994/1911] vrf: Fix use-after-free in vrf_xmit

KASAN detected a use-after-free:

[  269.467067] BUG: KASAN: use-after-free in vrf_xmit+0x7f1/0x827 [vrf] at addr ffff8800350a21c0
[  269.467067] Read of size 4 by task ssh/1879
[  269.467067] CPU: 1 PID: 1879 Comm: ssh Not tainted 4.10.0+ #249
[  269.467067] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.7.5-20140531_083030-gandalf 04/01/2014
[  269.467067] Call Trace:
[  269.467067]  dump_stack+0x81/0xb6
[  269.467067]  kasan_object_err+0x21/0x78
[  269.467067]  kasan_report+0x2f7/0x450
[  269.467067]  ? vrf_xmit+0x7f1/0x827 [vrf]
[  269.467067]  ? ip_output+0xa4/0xdb
[  269.467067]  __asan_load4+0x6b/0x6d
[  269.467067]  vrf_xmit+0x7f1/0x827 [vrf]
...

Which corresponds to the skb access after xmit handling. Fix by saving
skb->len and using the saved value to update stats.

Fixes: 193125dbd8eb2 ("net: Introduce VRF device driver")
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vrf.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 22379da6340077..fea687f35b5ac6 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -340,6 +340,7 @@ static netdev_tx_t is_ip_tx_frame(struct sk_buff *skb, struct net_device *dev)
 
 static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev)
 {
+	int len = skb->len;
 	netdev_tx_t ret = is_ip_tx_frame(skb, dev);
 
 	if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) {
@@ -347,7 +348,7 @@ static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev)
 
 		u64_stats_update_begin(&dstats->syncp);
 		dstats->tx_pkts++;
-		dstats->tx_bytes += skb->len;
+		dstats->tx_bytes += len;
 		u64_stats_update_end(&dstats->syncp);
 	} else {
 		this_cpu_inc(dev->dstats->tx_drps);

From 713c43b315fc160dc4ff3da0020ebe09b8a65587 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Mon, 6 Mar 2017 21:22:09 +0100
Subject: [PATCH 0995/1911] mlxsw: spectrum_flower: Remove bogus warns in
 mlxsw_sp_flower_destroy

This warnings may be hit even in case they should not - in case user
puts a TC-flower rule which failed to be offloaded. So just remove them.

Reported-by: Petr Machata <petrm@mellanox.com>
Reported-by: Ido Schimmel <idosch@mellanox.com>
Fixes: commit 7aa0f5aa9030 ("mlxsw: spectrum: Implement TC flower offload")
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Acked-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
index 22ab429253778d..ae6cccc666e461 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
@@ -303,11 +303,11 @@ void mlxsw_sp_flower_destroy(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress,
 	ruleset = mlxsw_sp_acl_ruleset_get(mlxsw_sp, mlxsw_sp_port->dev,
 					   ingress,
 					   MLXSW_SP_ACL_PROFILE_FLOWER);
-	if (WARN_ON(IS_ERR(ruleset)))
+	if (IS_ERR(ruleset))
 		return;
 
 	rule = mlxsw_sp_acl_rule_lookup(mlxsw_sp, ruleset, f->cookie);
-	if (!WARN_ON(!rule)) {
+	if (rule) {
 		mlxsw_sp_acl_rule_del(mlxsw_sp, rule);
 		mlxsw_sp_acl_rule_destroy(mlxsw_sp, rule);
 	}

From 7aafac11e308d37ed3c509829bb43d80c1811ac3 Mon Sep 17 00:00:00 2001
From: Alexey Kardashevskiy <aik@ozlabs.ru>
Date: Wed, 22 Feb 2017 15:43:59 +1100
Subject: [PATCH 0996/1911] powerpc/powernv/ioda2: Gracefully fail if too many
 TCE levels requested

The IODA2 specification says that a 64 DMA address cannot use top 4 bits
(3 are reserved and one is a "TVE select"); bottom page_shift bits
cannot be used for multilevel table addressing either.

The existing IODA2 table allocation code aligns the minimum TCE table
size to PAGE_SIZE so in the case of 64K system pages and 4K IOMMU pages,
we have 64-4-12=48 bits. Since 64K page stores 8192 TCEs, i.e. needs
13 bits, the maximum number of levels is 48/13 = 3 so we physically
cannot address more and EEH happens on DMA accesses.

This adds a check that too many levels were requested.

It is still possible to have 5 levels in the case of 4K system page size.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Acked-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 6901a06da2f90b..957a57a6c81236 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -2624,6 +2624,9 @@ static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
 	level_shift = entries_shift + 3;
 	level_shift = max_t(unsigned, level_shift, PAGE_SHIFT);
 
+	if ((level_shift - 3) * levels + page_shift >= 60)
+		return -EINVAL;
+
 	/* Allocate TCE table */
 	addr = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift,
 			levels, tce_table_size, &offset, &total_allocated);

From e61555c29c28a4a3b6ba6207f4a0883ee236004d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=A9my=20Lefaure?= <jeremy.lefaure@lse.epita.fr>
Date: Wed, 8 Mar 2017 20:18:09 -0500
Subject: [PATCH 0997/1911] EDAC, i5000, i5400: Fix use of MTR_DRAM_WIDTH macro
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The MTR_DRAM_WIDTH macro returns the data width. It is sometimes used
as if it returned a boolean true if the width if 8. Fix the tests where
MTR_DRAM_WIDTH is misused.

Signed-off-by: Jérémy Lefaure <jeremy.lefaure@lse.epita.fr>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: http://lkml.kernel.org/r/20170309011809.8340-1-jeremy.lefaure@lse.epita.fr
Signed-off-by: Borislav Petkov <bp@suse.de>
---
 drivers/edac/i5000_edac.c | 2 +-
 drivers/edac/i5400_edac.c | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/edac/i5000_edac.c b/drivers/edac/i5000_edac.c
index 1670d27bcac82d..f683919981b067 100644
--- a/drivers/edac/i5000_edac.c
+++ b/drivers/edac/i5000_edac.c
@@ -1293,7 +1293,7 @@ static int i5000_init_csrows(struct mem_ctl_info *mci)
 			dimm->mtype = MEM_FB_DDR2;
 
 			/* ask what device type on this row */
-			if (MTR_DRAM_WIDTH(mtr))
+			if (MTR_DRAM_WIDTH(mtr) == 8)
 				dimm->dtype = DEV_X8;
 			else
 				dimm->dtype = DEV_X4;
diff --git a/drivers/edac/i5400_edac.c b/drivers/edac/i5400_edac.c
index abf6ef22e22060..37a9ba71da449b 100644
--- a/drivers/edac/i5400_edac.c
+++ b/drivers/edac/i5400_edac.c
@@ -1207,13 +1207,14 @@ static int i5400_init_dimms(struct mem_ctl_info *mci)
 
 			dimm->nr_pages = size_mb << 8;
 			dimm->grain = 8;
-			dimm->dtype = MTR_DRAM_WIDTH(mtr) ? DEV_X8 : DEV_X4;
+			dimm->dtype = MTR_DRAM_WIDTH(mtr) == 8 ?
+				      DEV_X8 : DEV_X4;
 			dimm->mtype = MEM_FB_DDR2;
 			/*
 			 * The eccc mechanism is SDDC (aka SECC), with
 			 * is similar to Chipkill.
 			 */
-			dimm->edac_mode = MTR_DRAM_WIDTH(mtr) ?
+			dimm->edac_mode = MTR_DRAM_WIDTH(mtr) == 8 ?
 					  EDAC_S8ECD8ED : EDAC_S4ECD4ED;
 			ndimms++;
 		}

From 89cf83d4e065ff9fbd2ddc674489c8058eeca758 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Thu, 16 Feb 2017 12:54:41 +0000
Subject: [PATCH 0998/1911] drm/i915: Squelch any ktime/jiffie rounding errors
 for wait-ioctl

We wait upon jiffies, but report the time elapsed using a
high-resolution timer. This discrepancy can lead to us timing out the
wait prior to us reporting the elapsed time as complete.

This restores the squelching lost in commit e95433c73a11 ("drm/i915:
Rearrange i915_wait_request() accounting with callers").

Fixes: e95433c73a11 ("drm/i915: Rearrange i915_wait_request() accounting with callers")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Matthew Auld <matthew.william.auld@gmail.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: <drm-intel-fixes@lists.freedesktop.org> # v4.10-rc1+
Cc: stable@vger.kernel.org
Link: http://patchwork.freedesktop.org/patch/msgid/20170216125441.30923-1-chris@chris-wilson.co.uk
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
(cherry picked from commit c1d2061b28c2aa25ec39b60d9c248e6beebd7315)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 6908123162d17c..c45af09555dc9a 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3029,6 +3029,16 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 		args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start));
 		if (args->timeout_ns < 0)
 			args->timeout_ns = 0;
+
+		/*
+		 * Apparently ktime isn't accurate enough and occasionally has a
+		 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch
+		 * things up to make the test happy. We allow up to 1 jiffy.
+		 *
+		 * This is a regression from the timespec->ktime conversion.
+		 */
+		if (ret == -ETIME && !nsecs_to_jiffies(args->timeout_ns))
+			args->timeout_ns = 0;
 	}
 
 	i915_gem_object_put(obj);

From 1d972d6021a1388021df51a58248e68372ce2b5d Mon Sep 17 00:00:00 2001
From: Ander Conselvan de Oliveira <ander.conselvan.de.oliveira@intel.com>
Date: Thu, 23 Feb 2017 09:15:57 +0200
Subject: [PATCH 0999/1911] drm/i915/glk: Fix watermark computations for third
 sprite plane
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Geminilake has a third sprite plane (or fourth universal plane) that is
independent from the cursor. Make sure that for_each_plane_id_on_crtc()
is aware of that extra plane so that the watermark code takes it into
account.

Fixes: e9c9882556fc ("drm/i915/glk: Configure number of sprite planes properly")
Cc: Ander Conselvan de Oliveira <ander.conselvan.de.oliveira@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Daniel Vetter <daniel.vetter@intel.com>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: intel-gfx@lists.freedesktop.org
Cc: <drm-intel-fixes@lists.freedesktop.org>
Signed-off-by: Ander Conselvan de Oliveira <ander.conselvan.de.oliveira@intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170223071600.14356-2-ander.conselvan.de.oliveira@intel.com
(cherry picked from commit 19c3164db457e0fc65d4501fd354506228576241)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 0a4b42d313912c..7febe6eecf722a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -293,6 +293,7 @@ enum plane_id {
 	PLANE_PRIMARY,
 	PLANE_SPRITE0,
 	PLANE_SPRITE1,
+	PLANE_SPRITE2,
 	PLANE_CURSOR,
 	I915_MAX_PLANES,
 };

From b717a0392530ae8da0da041abe5c3a6098b55660 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Fri, 24 Feb 2017 11:43:06 +0000
Subject: [PATCH 1000/1911] drm/i915/fbdev: Stop repeating tile configuration
 on stagnation

If we cease making progress in finding matching outputs for a tiled
configuration, stop looping over the remaining unconfigured outputs.

v2: Use conn_seq (instead of pass) to only apply tile configuration on
first pass.

Fixes: b0ee9e7fa5b4 ("drm/fb: add support for tiled monitor configurations. (v2)")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tomasz Lis <tomasz.lis@intel.com>
Cc: Dave Airlie <airlied@redhat.com>
Cc: Daniel Vetter <daniel.vetter@intel.com>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Sean Paul <seanpaul@chromium.org>
Cc: <stable@vger.kernel.org> # v3.19+
Reviewed-by: Tomasz Lis <tomasz.lis@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170224114306.4400-1-chris@chris-wilson.co.uk
(cherry picked from commit 754a76591b12c88f57ad8b4ca533a5c9566a1922)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/intel_fbdev.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c
index 1b8ba2e7753957..2d449fb5d1d2b0 100644
--- a/drivers/gpu/drm/i915/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/intel_fbdev.c
@@ -357,14 +357,13 @@ static bool intel_fb_initial_config(struct drm_fb_helper *fb_helper,
 				    bool *enabled, int width, int height)
 {
 	struct drm_i915_private *dev_priv = to_i915(fb_helper->dev);
-	unsigned long conn_configured, mask;
+	unsigned long conn_configured, conn_seq, mask;
 	unsigned int count = min(fb_helper->connector_count, BITS_PER_LONG);
 	int i, j;
 	bool *save_enabled;
 	bool fallback = true;
 	int num_connectors_enabled = 0;
 	int num_connectors_detected = 0;
-	int pass = 0;
 
 	save_enabled = kcalloc(count, sizeof(bool), GFP_KERNEL);
 	if (!save_enabled)
@@ -374,6 +373,7 @@ static bool intel_fb_initial_config(struct drm_fb_helper *fb_helper,
 	mask = BIT(count) - 1;
 	conn_configured = 0;
 retry:
+	conn_seq = conn_configured;
 	for (i = 0; i < count; i++) {
 		struct drm_fb_helper_connector *fb_conn;
 		struct drm_connector *connector;
@@ -387,7 +387,7 @@ static bool intel_fb_initial_config(struct drm_fb_helper *fb_helper,
 		if (conn_configured & BIT(i))
 			continue;
 
-		if (pass == 0 && !connector->has_tile)
+		if (conn_seq == 0 && !connector->has_tile)
 			continue;
 
 		if (connector->status == connector_status_connected)
@@ -498,10 +498,8 @@ static bool intel_fb_initial_config(struct drm_fb_helper *fb_helper,
 		conn_configured |= BIT(i);
 	}
 
-	if ((conn_configured & mask) != mask) {
-		pass++;
+	if ((conn_configured & mask) != mask && conn_configured != conn_seq)
 		goto retry;
-	}
 
 	/*
 	 * If the BIOS didn't enable everything it could, fall back to have the

From 8c9923707f30ff56d9fd242053594b18f38d8036 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Mon, 27 Feb 2017 12:26:54 +0000
Subject: [PATCH 1001/1911] drm/i915: Remove the vma from the drm_mm if binding
 fails

As we track whether a vma has been inserted into the drm_mm using the
vma->flags, if we fail to bind the vma into the GTT we do not update
those bits and will attempt to reinsert the vma into the drm_mm on
future passes. To prevent that, we want to unwind i915_vma_insert() if
we fail in our attempt to bind.

Fixes: 59bfa1248e22 ("drm/i915: Start passing around i915_vma from execbuffer")
Testcase: igt/drv_selftest/live_gtt
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Matthew Auld <matthew.william.auld@gmail.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: <stable@vger.kernel.org> # v4.9+
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170227122654.27651-3-chris@chris-wilson.co.uk
(cherry picked from commit 31c7effa39f21f0fea1b3250ae9ff32b9c7e1ae5)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_vma.c | 57 +++++++++++++++++++++------------
 1 file changed, 37 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 155906e848120a..df20e9bc1c0f3d 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -512,10 +512,36 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 	return ret;
 }
 
+static void
+i915_vma_remove(struct i915_vma *vma)
+{
+	struct drm_i915_gem_object *obj = vma->obj;
+
+	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
+	GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND));
+
+	drm_mm_remove_node(&vma->node);
+	list_move_tail(&vma->vm_link, &vma->vm->unbound_list);
+
+	/* Since the unbound list is global, only move to that list if
+	 * no more VMAs exist.
+	 */
+	if (--obj->bind_count == 0)
+		list_move_tail(&obj->global_link,
+			       &to_i915(obj->base.dev)->mm.unbound_list);
+
+	/* And finally now the object is completely decoupled from this vma,
+	 * we can drop its hold on the backing storage and allow it to be
+	 * reaped by the shrinker.
+	 */
+	i915_gem_object_unpin_pages(obj);
+	GEM_BUG_ON(atomic_read(&obj->mm.pages_pin_count) < obj->bind_count);
+}
+
 int __i915_vma_do_pin(struct i915_vma *vma,
 		      u64 size, u64 alignment, u64 flags)
 {
-	unsigned int bound = vma->flags;
+	const unsigned int bound = vma->flags;
 	int ret;
 
 	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
@@ -524,18 +550,18 @@ int __i915_vma_do_pin(struct i915_vma *vma,
 
 	if (WARN_ON(bound & I915_VMA_PIN_OVERFLOW)) {
 		ret = -EBUSY;
-		goto err;
+		goto err_unpin;
 	}
 
 	if ((bound & I915_VMA_BIND_MASK) == 0) {
 		ret = i915_vma_insert(vma, size, alignment, flags);
 		if (ret)
-			goto err;
+			goto err_unpin;
 	}
 
 	ret = i915_vma_bind(vma, vma->obj->cache_level, flags);
 	if (ret)
-		goto err;
+		goto err_remove;
 
 	if ((bound ^ vma->flags) & I915_VMA_GLOBAL_BIND)
 		__i915_vma_set_map_and_fenceable(vma);
@@ -544,7 +570,12 @@ int __i915_vma_do_pin(struct i915_vma *vma,
 	GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags));
 	return 0;
 
-err:
+err_remove:
+	if ((bound & I915_VMA_BIND_MASK) == 0) {
+		GEM_BUG_ON(vma->pages);
+		i915_vma_remove(vma);
+	}
+err_unpin:
 	__i915_vma_unpin(vma);
 	return ret;
 }
@@ -657,9 +688,6 @@ int i915_vma_unbind(struct i915_vma *vma)
 	}
 	vma->flags &= ~(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND);
 
-	drm_mm_remove_node(&vma->node);
-	list_move_tail(&vma->vm_link, &vma->vm->unbound_list);
-
 	if (vma->pages != obj->mm.pages) {
 		GEM_BUG_ON(!vma->pages);
 		sg_free_table(vma->pages);
@@ -667,18 +695,7 @@ int i915_vma_unbind(struct i915_vma *vma)
 	}
 	vma->pages = NULL;
 
-	/* Since the unbound list is global, only move to that list if
-	 * no more VMAs exist. */
-	if (--obj->bind_count == 0)
-		list_move_tail(&obj->global_link,
-			       &to_i915(obj->base.dev)->mm.unbound_list);
-
-	/* And finally now the object is completely decoupled from this vma,
-	 * we can drop its hold on the backing storage and allow it to be
-	 * reaped by the shrinker.
-	 */
-	i915_gem_object_unpin_pages(obj);
-	GEM_BUG_ON(atomic_read(&obj->mm.pages_pin_count) < obj->bind_count);
+	i915_vma_remove(vma);
 
 destroy:
 	if (unlikely(i915_vma_is_closed(vma)))

From 34dc8993eef63681b062871413a9484008a2a78f Mon Sep 17 00:00:00 2001
From: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Date: Wed, 15 Feb 2017 15:52:59 +0200
Subject: [PATCH 1002/1911] drm/i915: Avoid tweaking evaluation thresholds on
 Baytrail v3
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Certain Baytrails, namely the 4 cpu core variants, have been
plaqued by spurious system hangs, mostly occurring with light loads.

Multiple bisects by various people point to a commit which changes the
reclocking strategy for Baytrail to follow its bigger brethen:
commit 8fb55197e64d ("drm/i915: Agressive downclocking on Baytrail")

There is also a review comment attached to this commit from Deepak S
on avoiding punit access on Cherryview and thus it was excluded on
common reclocking path. By taking the same approach and omitting
the punit access by not tweaking the thresholds when the hardware
has been asked to move into different frequency, considerable gains
in stability have been observed.

With J1900 box, light render/video load would end up in system hang
in usually less than 12 hours. With this patch applied, the cumulative
uptime has now been 34 days without issues. To provoke system hang,
light loads on both render and bsd engines in parallel have been used:
glxgears >/dev/null 2>/dev/null &
mpv --vo=vaapi --hwdec=vaapi --loop=inf vid.mp4

So far, author has not witnessed system hang with above load
and this patch applied. Reports from the tenacious people at
kernel bugzilla are also promising.

Considering that the punit access frequency with this patch is
considerably less, there is a possibility that this will push
the, still unknown, root cause past the triggering point on most loads.

But as we now can reliably reproduce the hang independently,
we can reduce the pain that users are having and use a
static thresholds until a root cause is found.

v3: don't break debugfs and simplification (Chris Wilson)

References: https://bugzilla.kernel.org/show_bug.cgi?id=109051
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Jani Nikula <jani.nikula@intel.com>
Cc: fritsch@xbmc.org
Cc: miku@iki.fi
Cc: Ezequiel Garcia <ezequiel@vanguardiasur.com.ar>
CC: Michal Feix <michal@feix.cz>
Cc: Hans de Goede <hdegoede@redhat.com>
Cc: Deepak S <deepak.s@linux.intel.com>
Cc: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Cc: <stable@vger.kernel.org> # v4.2+
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Acked-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1487166779-26945-1-git-send-email-mika.kuoppala@intel.com
(cherry picked from commit 6067a27d1f0184596d51decbac1c1fdc4acb012f)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/intel_pm.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 249623d45be0ca..65cd4c56c9dda3 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -4891,6 +4891,12 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
 		break;
 	}
 
+	/* When byt can survive without system hang with dynamic
+	 * sw freq adjustments, this restriction can be lifted.
+	 */
+	if (IS_VALLEYVIEW(dev_priv))
+		goto skip_hw_write;
+
 	I915_WRITE(GEN6_RP_UP_EI,
 		   GT_INTERVAL_FROM_US(dev_priv, ei_up));
 	I915_WRITE(GEN6_RP_UP_THRESHOLD,
@@ -4911,6 +4917,7 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
 		   GEN6_RP_UP_BUSY_AVG |
 		   GEN6_RP_DOWN_IDLE_AVG);
 
+skip_hw_write:
 	dev_priv->rps.power = new_power;
 	dev_priv->rps.up_threshold = threshold_up;
 	dev_priv->rps.down_threshold = threshold_down;

From d253371c4c2f5fc2d884ef25f64decd7549aff5a Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Fri, 24 Feb 2017 16:32:10 +0200
Subject: [PATCH 1003/1911] drm/i915/gen9: Increase PCODE request timeout to
 50ms
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

After
commit 2c7d0602c815277f7cb7c932b091288710d8aba7
Author: Imre Deak <imre.deak@intel.com>
Date:   Mon Dec 5 18:27:37 2016 +0200

    drm/i915/gen9: Fix PCODE polling during CDCLK change notification

there is still one report of the CDCLK-change request timing out on a
KBL machine, see the Reference link. On that machine the maximum time
the request took to succeed was 34ms, so increase the timeout to 50ms.

v2:
- Change timeout from 100 to 50 ms to maintain the current 50 ms limit
  for atomic waits in the driver. (Chris, Tvrtko)

Reference: https://bugs.freedesktop.org/show_bug.cgi?id=99345
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Acked-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: http://patchwork.freedesktop.org/patch/msgid/1487946730-17162-1-git-send-email-imre.deak@intel.com
(cherry picked from commit 0129936ddda26afd5d9d207c4e86b2425952579f)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/intel_pm.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 65cd4c56c9dda3..940bab22d4649b 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -7923,10 +7923,10 @@ static bool skl_pcode_try_request(struct drm_i915_private *dev_priv, u32 mbox,
  * @timeout_base_ms: timeout for polling with preemption enabled
  *
  * Keep resending the @request to @mbox until PCODE acknowledges it, PCODE
- * reports an error or an overall timeout of @timeout_base_ms+10 ms expires.
+ * reports an error or an overall timeout of @timeout_base_ms+50 ms expires.
  * The request is acknowledged once the PCODE reply dword equals @reply after
  * applying @reply_mask. Polling is first attempted with preemption enabled
- * for @timeout_base_ms and if this times out for another 10 ms with
+ * for @timeout_base_ms and if this times out for another 50 ms with
  * preemption disabled.
  *
  * Returns 0 on success, %-ETIMEDOUT in case of a timeout, <0 in case of some
@@ -7962,14 +7962,15 @@ int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request,
 	 * worst case) _and_ PCODE was busy for some reason even after a
 	 * (queued) request and @timeout_base_ms delay. As a workaround retry
 	 * the poll with preemption disabled to maximize the number of
-	 * requests. Increase the timeout from @timeout_base_ms to 10ms to
+	 * requests. Increase the timeout from @timeout_base_ms to 50ms to
 	 * account for interrupts that could reduce the number of these
-	 * requests.
+	 * requests, and for any quirks of the PCODE firmware that delays
+	 * the request completion.
 	 */
 	DRM_DEBUG_KMS("PCODE timeout, retrying with preemption disabled\n");
 	WARN_ON_ONCE(timeout_base_ms > 3);
 	preempt_disable();
-	ret = wait_for_atomic(COND, 10);
+	ret = wait_for_atomic(COND, 50);
 	preempt_enable();
 
 out:

From 38230243ef316ac696956d75dc78a22e3aa789b9 Mon Sep 17 00:00:00 2001
From: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Date: Tue, 28 Feb 2017 15:28:47 +0100
Subject: [PATCH 1004/1911] drm/i915: Move updating color management to before
 vblank evasion
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This cannot be done reliably during vblank evasasion
since the color management registers are not double buffered.

The original commit that moved it always during vblank evasion was
wrong, so revert it to before vblank evasion again.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Fixes: 20a34e78f0d7 ("drm/i915: Update color management during vblank evasion.")
Cc: stable@vger.kernel.org # v4.7+
Link: http://patchwork.freedesktop.org/patch/msgid/1488292128-14540-1-git-send-email-maarten.lankhorst@linux.intel.com
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
(cherry picked from commit 567f0792a6ad11c0c2620944b8eeb777359fb85a)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/intel_display.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 01341670738fbb..9a8b6a13233d93 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -14946,17 +14946,19 @@ static void intel_begin_crtc_commit(struct drm_crtc *crtc,
 		to_intel_atomic_state(old_crtc_state->state);
 	bool modeset = needs_modeset(crtc->state);
 
+	if (!modeset &&
+	    (intel_cstate->base.color_mgmt_changed ||
+	     intel_cstate->update_pipe)) {
+		intel_color_set_csc(crtc->state);
+		intel_color_load_luts(crtc->state);
+	}
+
 	/* Perform vblank evasion around commit operation */
 	intel_pipe_update_start(intel_crtc);
 
 	if (modeset)
 		goto out;
 
-	if (crtc->state->color_mgmt_changed || to_intel_crtc_state(crtc->state)->update_pipe) {
-		intel_color_set_csc(crtc->state);
-		intel_color_load_luts(crtc->state);
-	}
-
 	if (intel_cstate->update_pipe)
 		intel_update_pipe_config(intel_crtc, old_intel_cstate);
 	else if (INTEL_GEN(dev_priv) >= 9)

From 0d9dc306e15b59bf50db87ebcb1e2248586d4733 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue, 7 Mar 2017 13:20:31 +0000
Subject: [PATCH 1005/1911] drm/i915: Store a permanent error in obj->mm.pages

Once the object has been truncated, it is unrecoverable. To facilitate
detection of this state store the error in obj->mm.pages.

This is required for the next patch which should be applied to v4.10
(via stable), so we also need to mark this patch for backporting. In
that regard, let's consider this to be a fix/improvement too.

v2: Avoid dereferencing the ERR_PTR when freeing the object.

Fixes: 1233e2db199d ("drm/i915: Move object backing storage manipulation to its own locking")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: <stable@vger.kernel.org> # v4.10+
Link: http://patchwork.freedesktop.org/patch/msgid/20170307132031.32461-1-chris@chris-wilson.co.uk
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
(cherry picked from commit 4e5462ee843c883790e9609cf560d88960ea4227)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index c45af09555dc9a..3591e8656ff908 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2119,6 +2119,7 @@ i915_gem_object_truncate(struct drm_i915_gem_object *obj)
 	 */
 	shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
 	obj->mm.madv = __I915_MADV_PURGED;
+	obj->mm.pages = ERR_PTR(-EFAULT);
 }
 
 /* Try to discard unwanted pages */
@@ -2218,7 +2219,9 @@ void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
 
 	__i915_gem_object_reset_page_iter(obj);
 
-	obj->ops->put_pages(obj, pages);
+	if (!IS_ERR(pages))
+		obj->ops->put_pages(obj, pages);
+
 unlock:
 	mutex_unlock(&obj->mm.lock);
 }
@@ -2437,7 +2440,7 @@ int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
 	if (err)
 		return err;
 
-	if (unlikely(!obj->mm.pages)) {
+	if (unlikely(IS_ERR_OR_NULL(obj->mm.pages))) {
 		err = ____i915_gem_object_get_pages(obj);
 		if (err)
 			goto unlock;
@@ -2515,7 +2518,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
 
 	pinned = true;
 	if (!atomic_inc_not_zero(&obj->mm.pages_pin_count)) {
-		if (unlikely(!obj->mm.pages)) {
+		if (unlikely(IS_ERR_OR_NULL(obj->mm.pages))) {
 			ret = ____i915_gem_object_get_pages(obj);
 			if (ret)
 				goto err_unlock;

From 4e6fdafa7ac395ad47a80a0e7b4fd1e11550f862 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue, 7 Mar 2017 12:03:38 +0000
Subject: [PATCH 1006/1911] drm/i915: Use pagecache write to prepopulate
 shmemfs from pwrite-ioctl

Before we instantiate/pin the backing store for our use, we
can prepopulate the shmemfs filp efficiently using a write into the
pagecache. We avoid the penalty of instantiating all the pages, important
if the user is just writing to a few and never uses the object on the GPU,
and using a direct write into shmemfs allows it to avoid the cost of
retrieving a page (mostly the clear-before-use, but in theory we could
curtail swapin) before it is overwritten.

This can be extended later to provide additional specialisation for
other backends (other than shmemfs). For now it provides a defense
against very large write-only allocations from exhausting all of system
memory.

v2: Smelling fixes.

Fixes: fe115628d567 ("drm/i915: Implement pwrite without struct-mutex")
References: https://bugs.freedesktop.org/show_bug.cgi?id=99107
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Matthew Auld <matthew.william.auld@gmail.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: <stable@vger.kernel.org> # v4.10+
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170307120338.7277-2-chris@chris-wilson.co.uk
(cherry picked from commit 7c55e2c5772dcf3cbacd0fa2bcfeefae416b73f7)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c        | 78 ++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gem_object.h |  3 +
 2 files changed, 81 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 3591e8656ff908..10777da730394f 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1434,6 +1434,12 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 
 	trace_i915_gem_object_pwrite(obj, args->offset, args->size);
 
+	ret = -ENODEV;
+	if (obj->ops->pwrite)
+		ret = obj->ops->pwrite(obj, args);
+	if (ret != -ENODEV)
+		goto err;
+
 	ret = i915_gem_object_wait(obj,
 				   I915_WAIT_INTERRUPTIBLE |
 				   I915_WAIT_ALL,
@@ -2566,6 +2572,75 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
 	goto out_unlock;
 }
 
+static int
+i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj,
+			   const struct drm_i915_gem_pwrite *arg)
+{
+	struct address_space *mapping = obj->base.filp->f_mapping;
+	char __user *user_data = u64_to_user_ptr(arg->data_ptr);
+	u64 remain, offset;
+	unsigned int pg;
+
+	/* Before we instantiate/pin the backing store for our use, we
+	 * can prepopulate the shmemfs filp efficiently using a write into
+	 * the pagecache. We avoid the penalty of instantiating all the
+	 * pages, important if the user is just writing to a few and never
+	 * uses the object on the GPU, and using a direct write into shmemfs
+	 * allows it to avoid the cost of retrieving a page (either swapin
+	 * or clearing-before-use) before it is overwritten.
+	 */
+	if (READ_ONCE(obj->mm.pages))
+		return -ENODEV;
+
+	/* Before the pages are instantiated the object is treated as being
+	 * in the CPU domain. The pages will be clflushed as required before
+	 * use, and we can freely write into the pages directly. If userspace
+	 * races pwrite with any other operation; corruption will ensue -
+	 * that is userspace's prerogative!
+	 */
+
+	remain = arg->size;
+	offset = arg->offset;
+	pg = offset_in_page(offset);
+
+	do {
+		unsigned int len, unwritten;
+		struct page *page;
+		void *data, *vaddr;
+		int err;
+
+		len = PAGE_SIZE - pg;
+		if (len > remain)
+			len = remain;
+
+		err = pagecache_write_begin(obj->base.filp, mapping,
+					    offset, len, 0,
+					    &page, &data);
+		if (err < 0)
+			return err;
+
+		vaddr = kmap(page);
+		unwritten = copy_from_user(vaddr + pg, user_data, len);
+		kunmap(page);
+
+		err = pagecache_write_end(obj->base.filp, mapping,
+					  offset, len, len - unwritten,
+					  page, data);
+		if (err < 0)
+			return err;
+
+		if (unwritten)
+			return -EFAULT;
+
+		remain -= len;
+		user_data += len;
+		offset += len;
+		pg = 0;
+	} while (remain);
+
+	return 0;
+}
+
 static bool ban_context(const struct i915_gem_context *ctx)
 {
 	return (i915_gem_context_is_bannable(ctx) &&
@@ -3987,8 +4062,11 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
 static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
 	.flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
 		 I915_GEM_OBJECT_IS_SHRINKABLE,
+
 	.get_pages = i915_gem_object_get_pages_gtt,
 	.put_pages = i915_gem_object_put_pages_gtt,
+
+	.pwrite = i915_gem_object_pwrite_gtt,
 };
 
 struct drm_i915_gem_object *
diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
index bf90b07163d126..76b80a0be79767 100644
--- a/drivers/gpu/drm/i915/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/i915_gem_object.h
@@ -54,6 +54,9 @@ struct drm_i915_gem_object_ops {
 	struct sg_table *(*get_pages)(struct drm_i915_gem_object *);
 	void (*put_pages)(struct drm_i915_gem_object *, struct sg_table *);
 
+	int (*pwrite)(struct drm_i915_gem_object *,
+		      const struct drm_i915_gem_pwrite *);
+
 	int (*dmabuf_export)(struct drm_i915_gem_object *);
 	void (*release)(struct drm_i915_gem_object *);
 };

From edd06b8353772dca7afcd4640dafa83b521edd55 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Tue, 7 Mar 2017 22:54:19 +0200
Subject: [PATCH 1007/1911] drm/i915: Nuke debug messages from the pipe update
 critical section
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

printks are slow so we should not be doing them from the vblank evade
critical section. These could explain why we sometimes seem to
blow past our 100 usec deadline.

The problem has been there ever since commit bfd16b2a23dc ("drm/i915:
Make updating pipe without modeset atomic.") but it may not have
been readily visible until commit e1edbd44e23b ("drm/i915: Complain
if we take too long under vblank evasion.") increased our chances
of noticing it.

Cc: stable@vger.kernel.org
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Fixes: bfd16b2a23dc ("drm/i915: Make updating pipe without modeset atomic.")
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170307205419.19447-1-ville.syrjala@linux.intel.com
Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
(cherry picked from commit c3f8ad57a01a31397e5a0349a226a32f35ddc19c)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/intel_display.c | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 9a8b6a13233d93..b3e0cd133b4957 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -3669,10 +3669,6 @@ static void intel_update_pipe_config(struct intel_crtc *crtc,
 	/* drm_atomic_helper_update_legacy_modeset_state might not be called. */
 	crtc->base.mode = crtc->base.state->mode;
 
-	DRM_DEBUG_KMS("Updating pipe size %ix%i -> %ix%i\n",
-		      old_crtc_state->pipe_src_w, old_crtc_state->pipe_src_h,
-		      pipe_config->pipe_src_w, pipe_config->pipe_src_h);
-
 	/*
 	 * Update pipe size and adjust fitter if needed: the reason for this is
 	 * that in compute_mode_changes we check the native mode (not the pfit
@@ -4796,23 +4792,17 @@ static void skylake_pfit_enable(struct intel_crtc *crtc)
 	struct intel_crtc_scaler_state *scaler_state =
 		&crtc->config->scaler_state;
 
-	DRM_DEBUG_KMS("for crtc_state = %p\n", crtc->config);
-
 	if (crtc->config->pch_pfit.enabled) {
 		int id;
 
-		if (WARN_ON(crtc->config->scaler_state.scaler_id < 0)) {
-			DRM_ERROR("Requesting pfit without getting a scaler first\n");
+		if (WARN_ON(crtc->config->scaler_state.scaler_id < 0))
 			return;
-		}
 
 		id = scaler_state->scaler_id;
 		I915_WRITE(SKL_PS_CTRL(pipe, id), PS_SCALER_EN |
 			PS_FILTER_MEDIUM | scaler_state->scalers[id].mode);
 		I915_WRITE(SKL_PS_WIN_POS(pipe, id), crtc->config->pch_pfit.pos);
 		I915_WRITE(SKL_PS_WIN_SZ(pipe, id), crtc->config->pch_pfit.size);
-
-		DRM_DEBUG_KMS("for crtc_state = %p scaler_id = %d\n", crtc->config, id);
 	}
 }
 

From 5a8cf90d743f2d05433c6109f6c1b9b904b0cdb7 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Thu, 2 Feb 2017 20:47:41 +0000
Subject: [PATCH 1008/1911] drm/i915: Drain the freed state from the tail of
 the next commit

If we have any residual freed atomic state from earlier commits, flush
the freed list after performing the current modeset. This prevents the
freed list from ever-growing if userspace manages to starve the kernel
threads (i.e. we are never able to run our free state worker and
eventually the system may even oom).

Fixes: 6f0f02dc56f1 ("drm/i915: Move atomic state free from out of fence release")
Testcase: igt/kms_cursor/legacy/all-pipes-single-bo
Reported-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/20170202204741.18231-1-chris@chris-wilson.co.uk
Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
(cherry picked from commit ba318c61a9719577b6f451c055f364e4116874b2)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/intel_display.c | 34 +++++++++++++++++-----------
 1 file changed, 21 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index b3e0cd133b4957..3282b0f4b13412 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -14369,6 +14369,24 @@ static void skl_update_crtcs(struct drm_atomic_state *state,
 	} while (progress);
 }
 
+static void intel_atomic_helper_free_state(struct drm_i915_private *dev_priv)
+{
+	struct intel_atomic_state *state, *next;
+	struct llist_node *freed;
+
+	freed = llist_del_all(&dev_priv->atomic_helper.free_list);
+	llist_for_each_entry_safe(state, next, freed, freed)
+		drm_atomic_state_put(&state->base);
+}
+
+static void intel_atomic_helper_free_state_worker(struct work_struct *work)
+{
+	struct drm_i915_private *dev_priv =
+		container_of(work, typeof(*dev_priv), atomic_helper.free_work);
+
+	intel_atomic_helper_free_state(dev_priv);
+}
+
 static void intel_atomic_commit_tail(struct drm_atomic_state *state)
 {
 	struct drm_device *dev = state->dev;
@@ -14535,6 +14553,8 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state)
 	 * can happen also when the device is completely off.
 	 */
 	intel_uncore_arm_unclaimed_mmio_detection(dev_priv);
+
+	intel_atomic_helper_free_state(dev_priv);
 }
 
 static void intel_atomic_commit_work(struct work_struct *work)
@@ -16591,18 +16611,6 @@ static void sanitize_watermarks(struct drm_device *dev)
 	drm_modeset_acquire_fini(&ctx);
 }
 
-static void intel_atomic_helper_free_state(struct work_struct *work)
-{
-	struct drm_i915_private *dev_priv =
-		container_of(work, typeof(*dev_priv), atomic_helper.free_work);
-	struct intel_atomic_state *state, *next;
-	struct llist_node *freed;
-
-	freed = llist_del_all(&dev_priv->atomic_helper.free_list);
-	llist_for_each_entry_safe(state, next, freed, freed)
-		drm_atomic_state_put(&state->base);
-}
-
 int intel_modeset_init(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
@@ -16623,7 +16631,7 @@ int intel_modeset_init(struct drm_device *dev)
 	dev->mode_config.funcs = &intel_mode_funcs;
 
 	INIT_WORK(&dev_priv->atomic_helper.free_work,
-		  intel_atomic_helper_free_state);
+		  intel_atomic_helper_free_state_worker);
 
 	intel_init_quirks(dev);
 

From a677e7046ab5edb33d051bda60cb3be0d60a48cc Mon Sep 17 00:00:00 2001
From: Linu Cherian <linu.cherian@cavium.com>
Date: Wed, 8 Mar 2017 11:38:32 +0530
Subject: [PATCH 1009/1911] KVM: Add documentation for KVM_CAP_NR_MEMSLOTS

Add documentation for KVM_CAP_NR_MEMSLOTS capability.

Reviewed-by: Christoffer Dall <cdall@linaro.org>
Signed-off-by: Linu Cherian <linu.cherian@cavium.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 Documentation/virtual/kvm/api.txt | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 069450938b795d..3c248f772ae616 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -951,6 +951,10 @@ This ioctl allows the user to create or modify a guest physical memory
 slot.  When changing an existing slot, it may be moved in the guest
 physical memory space, or its flags may be modified.  It may not be
 resized.  Slots may not overlap in guest physical address space.
+Bits 0-15 of "slot" specifies the slot id and this value should be
+less than the maximum number of user memory slots supported per VM.
+The maximum allowed slots can be queried using KVM_CAP_NR_MEMSLOTS,
+if this capability is supported by the architecture.
 
 If KVM_CAP_MULTI_ADDRESS_SPACE is available, bits 16-31 of "slot"
 specifies the address space which is being modified.  They must be

From 7af4df85796589e60a2dfc0f821eca0c4bbce4d2 Mon Sep 17 00:00:00 2001
From: Linu Cherian <linu.cherian@cavium.com>
Date: Wed, 8 Mar 2017 11:38:33 +0530
Subject: [PATCH 1010/1911] KVM: arm/arm64: Enable KVM_CAP_NR_MEMSLOTS on
 arm/arm64

Return KVM_USER_MEM_SLOTS for userspace capability query on
NR_MEMSLOTS.

Reviewed-by: Christoffer Dall <cdall@linaro.org>
Signed-off-by: Linu Cherian <linu.cherian@cavium.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm/kvm/arm.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index c9a2103faeb9ac..96dba7cd8be7b4 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -221,6 +221,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_MAX_VCPUS:
 		r = KVM_MAX_VCPUS;
 		break;
+	case KVM_CAP_NR_MEMSLOTS:
+		r = KVM_USER_MEM_SLOTS;
+		break;
 	case KVM_CAP_MSI_DEVID:
 		if (!kvm)
 			r = -EINVAL;

From 3e92f94a3b8e925a6dd7ec88a5794b2084b5fb65 Mon Sep 17 00:00:00 2001
From: Linu Cherian <linu.cherian@cavium.com>
Date: Wed, 8 Mar 2017 11:38:34 +0530
Subject: [PATCH 1011/1911] KVM: arm/arm64: Remove KVM_PRIVATE_MEM_SLOTS
 definition that are unused

arm/arm64 architecture doesnt use private memslots, hence removing
KVM_PRIVATE_MEM_SLOTS macro definition.

Reviewed-by: Christoffer Dall <cdall@linaro.org>
Signed-off-by: Linu Cherian <linu.cherian@cavium.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm/include/asm/kvm_host.h   | 1 -
 arch/arm64/include/asm/kvm_host.h | 1 -
 2 files changed, 2 deletions(-)

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index cc495d799c6764..31ee468ce667de 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -30,7 +30,6 @@
 #define __KVM_HAVE_ARCH_INTC_INITIALIZED
 
 #define KVM_USER_MEM_SLOTS 32
-#define KVM_PRIVATE_MEM_SLOTS 4
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
 #define KVM_HAVE_ONE_REG
 #define KVM_HALT_POLL_NS_DEFAULT 500000
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index f21fd38943708f..6ac17ee887c93d 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -31,7 +31,6 @@
 #define __KVM_HAVE_ARCH_INTC_INITIALIZED
 
 #define KVM_USER_MEM_SLOTS 32
-#define KVM_PRIVATE_MEM_SLOTS 4
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
 #define KVM_HALT_POLL_NS_DEFAULT 500000
 

From 955a3fc6d2a1c11d6d00bce4f3816100ce0530cf Mon Sep 17 00:00:00 2001
From: Linu Cherian <linu.cherian@cavium.com>
Date: Wed, 8 Mar 2017 11:38:35 +0530
Subject: [PATCH 1012/1911] KVM: arm64: Increase number of user memslots to 512

Having only 32 memslots is a real constraint for the maximum
number of PCI devices that can be assigned to a single guest.
Assuming each PCI device/virtual function having two memory BAR
regions, we could assign only 15 devices/virtual functions to a
guest.

Hence increase KVM_USER_MEM_SLOTS to 512 as done in other archs like
powerpc.

Reviewed-by: Christoffer Dall <cdall@linaro.org>
Signed-off-by: Linu Cherian <linu.cherian@cavium.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm64/include/asm/kvm_host.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 6ac17ee887c93d..e7705e7bb07b13 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -30,7 +30,7 @@
 
 #define __KVM_HAVE_ARCH_INTC_INITIALIZED
 
-#define KVM_USER_MEM_SLOTS 32
+#define KVM_USER_MEM_SLOTS 512
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
 #define KVM_HALT_POLL_NS_DEFAULT 500000
 

From db08e1d53034a54fe177ced70476fda73954b9e9 Mon Sep 17 00:00:00 2001
From: Alexey Kardashevskiy <aik@ozlabs.ru>
Date: Tue, 21 Feb 2017 13:41:31 +1100
Subject: [PATCH 1013/1911] powerpc/powernv/ioda2: Update iommu table base on
 ownership change

On POWERNV platform, in order to do DMA via IOMMU (i.e. 32bit DMA in
our case), a device needs an iommu_table pointer set via
set_iommu_table_base().

The codeflow is:
- pnv_pci_ioda2_setup_dma_pe()
	- pnv_pci_ioda2_setup_default_config()
	- pnv_ioda_setup_bus_dma() [1]

pnv_pci_ioda2_setup_dma_pe() creates IOMMU groups,
pnv_pci_ioda2_setup_default_config() does default DMA setup,
pnv_ioda_setup_bus_dma() takes a bus PE (on IODA2, all physical function
PEs as bus PEs except NPU), walks through all underlying buses and
devices, adds all devices to an IOMMU group and sets iommu_table.

On IODA2, when VFIO is used, it takes ownership over a PE which means it
removes all tables and creates new ones (with a possibility of sharing
them among PEs). So when the ownership is returned from VFIO to
the kernel, the iommu_table pointer written to a device at [1] is
stale and needs an update.

This adds an "add_to_group" parameter to pnv_ioda_setup_bus_dma()
(in fact re-adds as it used to be there a while ago for different
reasons) to tell the helper if a device needs to be added to
an IOMMU group with an iommu_table update or just the latter.

This calls pnv_ioda_setup_bus_dma(..., false) from
pnv_ioda2_release_ownership() so when the ownership is restored,
32bit DMA can work again for a device. This does the same thing
on obtaining ownership as the iommu_table point is stale at this point
anyway and it is safer to have NULL there.

We did not hit this earlier as all tested devices in recent years were
only using 64bit DMA; the rare exception for this is MPT3 SAS adapter
which uses both 32bit and 64bit DMA access and it has not been tested
with VFIO much.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Acked-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 957a57a6c81236..e36738291c3205 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1775,17 +1775,20 @@ static u64 pnv_pci_ioda_dma_get_required_mask(struct pci_dev *pdev)
 }
 
 static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe,
-				   struct pci_bus *bus)
+				   struct pci_bus *bus,
+				   bool add_to_group)
 {
 	struct pci_dev *dev;
 
 	list_for_each_entry(dev, &bus->devices, bus_list) {
 		set_iommu_table_base(&dev->dev, pe->table_group.tables[0]);
 		set_dma_offset(&dev->dev, pe->tce_bypass_base);
-		iommu_add_device(&dev->dev);
+		if (add_to_group)
+			iommu_add_device(&dev->dev);
 
 		if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate)
-			pnv_ioda_setup_bus_dma(pe, dev->subordinate);
+			pnv_ioda_setup_bus_dma(pe, dev->subordinate,
+					add_to_group);
 	}
 }
 
@@ -2191,7 +2194,7 @@ static void pnv_pci_ioda1_setup_dma_pe(struct pnv_phb *phb,
 		set_iommu_table_base(&pe->pdev->dev, tbl);
 		iommu_add_device(&pe->pdev->dev);
 	} else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))
-		pnv_ioda_setup_bus_dma(pe, pe->pbus);
+		pnv_ioda_setup_bus_dma(pe, pe->pbus, true);
 
 	return;
  fail:
@@ -2426,6 +2429,8 @@ static void pnv_ioda2_take_ownership(struct iommu_table_group *table_group)
 
 	pnv_pci_ioda2_set_bypass(pe, false);
 	pnv_pci_ioda2_unset_window(&pe->table_group, 0);
+	if (pe->pbus)
+		pnv_ioda_setup_bus_dma(pe, pe->pbus, false);
 	pnv_ioda2_table_free(tbl);
 }
 
@@ -2435,6 +2440,8 @@ static void pnv_ioda2_release_ownership(struct iommu_table_group *table_group)
 						table_group);
 
 	pnv_pci_ioda2_setup_default_config(pe);
+	if (pe->pbus)
+		pnv_ioda_setup_bus_dma(pe, pe->pbus, false);
 }
 
 static struct iommu_table_group_ops pnv_pci_ioda2_ops = {
@@ -2731,7 +2738,7 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
 	if (pe->flags & PNV_IODA_PE_DEV)
 		iommu_add_device(&pe->pdev->dev);
 	else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))
-		pnv_ioda_setup_bus_dma(pe, pe->pbus);
+		pnv_ioda_setup_bus_dma(pe, pe->pbus, true);
 }
 
 #ifdef CONFIG_PCI_MSI

From 85550f9148a852ed363a386577ad31b97b95dfb8 Mon Sep 17 00:00:00 2001
From: Jelle Martijn Kok <jmkok@youcom.nl>
Date: Tue, 21 Feb 2017 12:48:18 +0100
Subject: [PATCH 1014/1911] usb: ohci-at91: Do not drop unhandled USB suspend
 control requests

In patch 2e2aa1bc7eff90ecm, USB suspend and wakeup control requests are
passed to SFR_OHCIICR register. If a processor does not have such a
register, this hub control request will be dropped.

If no such a SFR register is available, all USB suspend control requests
will now be processed using ohci_hub_control()
(like before patch 2e2aa1bc7eff90ecm.)

Tested on an Atmel AT91SAM9G20 with an on-board TI TUSB2046B hub chip
If the last USB device is unplugged from the USB hub, the hub goes into
sleep and will not wakeup when an USB devices is inserted.

Fixes: 2e2aa1bc7eff90ec ("usb: ohci-at91: Forcibly suspend ports while USB suspend")
Signed-off-by: Jelle Martijn Kok <jmkok@youcom.nl>
Tested-by: Wenyou Yang <wenyou.yang@atmel.com>
Cc: Wenyou Yang <wenyou.yang@atmel.com>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: stable <stable@vger.kernel.org>
Acked-by: Nicolas Ferre <nicolas.ferre@microchip.com>
Reviewed-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/ohci-at91.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/host/ohci-at91.c b/drivers/usb/host/ohci-at91.c
index 414e3c376dbbd5..5302f988e7e670 100644
--- a/drivers/usb/host/ohci-at91.c
+++ b/drivers/usb/host/ohci-at91.c
@@ -350,7 +350,7 @@ static int ohci_at91_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
 
 		case USB_PORT_FEAT_SUSPEND:
 			dev_dbg(hcd->self.controller, "SetPortFeat: SUSPEND\n");
-			if (valid_port(wIndex)) {
+			if (valid_port(wIndex) && ohci_at91->sfr_regmap) {
 				ohci_at91_port_suspend(ohci_at91->sfr_regmap,
 						       1);
 				return 0;
@@ -393,7 +393,7 @@ static int ohci_at91_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
 
 		case USB_PORT_FEAT_SUSPEND:
 			dev_dbg(hcd->self.controller, "ClearPortFeature: SUSPEND\n");
-			if (valid_port(wIndex)) {
+			if (valid_port(wIndex) && ohci_at91->sfr_regmap) {
 				ohci_at91_port_suspend(ohci_at91->sfr_regmap,
 						       0);
 				return 0;

From fd567653bdb908009b650f079bfd4b63169e2ac4 Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javier@osg.samsung.com>
Date: Wed, 22 Feb 2017 15:23:22 -0300
Subject: [PATCH 1015/1911] usb: phy: isp1301: Add OF device ID table

The driver doesn't have a struct of_device_id table but supported devices
are registered via Device Trees. This is working on the assumption that a
I2C device registered via OF will always match a legacy I2C device ID and
that the MODALIAS reported will always be of the form i2c:<device>.

But this could change in the future so the correct approach is to have an
OF device ID table if the devices are registered via OF.

Signed-off-by: Javier Martinez Canillas <javier@osg.samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/phy/phy-isp1301.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/usb/phy/phy-isp1301.c b/drivers/usb/phy/phy-isp1301.c
index db68156568e6e7..b3b33cf7ddf608 100644
--- a/drivers/usb/phy/phy-isp1301.c
+++ b/drivers/usb/phy/phy-isp1301.c
@@ -33,6 +33,12 @@ static const struct i2c_device_id isp1301_id[] = {
 };
 MODULE_DEVICE_TABLE(i2c, isp1301_id);
 
+static const struct of_device_id isp1301_of_match[] = {
+	{.compatible = "nxp,isp1301" },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, isp1301_of_match);
+
 static struct i2c_client *isp1301_i2c_client;
 
 static int __isp1301_write(struct isp1301 *isp, u8 reg, u8 value, u8 clear)
@@ -130,6 +136,7 @@ static int isp1301_remove(struct i2c_client *client)
 static struct i2c_driver isp1301_driver = {
 	.driver = {
 		.name = DRV_NAME,
+		.of_match_table = of_match_ptr(isp1301_of_match),
 	},
 	.probe = isp1301_probe,
 	.remove = isp1301_remove,

From b7321e81fc369abe353cf094d4f0dc2fe11ab95f Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Tue, 7 Mar 2017 16:11:03 +0100
Subject: [PATCH 1016/1911] USB: iowarrior: fix NULL-deref at probe

Make sure to check for the required interrupt-in endpoint to avoid
dereferencing a NULL-pointer should a malicious device lack such an
endpoint.

Note that a fairly recent change purported to fix this issue, but added
an insufficient test on the number of endpoints only, a test which can
now be removed.

Fixes: 4ec0ef3a8212 ("USB: iowarrior: fix oops with malicious USB descriptors")
Fixes: 946b960d13c1 ("USB: add driver for iowarrior devices.")
Cc: stable <stable@vger.kernel.org>	# 2.6.21
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/misc/iowarrior.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c
index 095778ff984de2..3ad058cbe6caaf 100644
--- a/drivers/usb/misc/iowarrior.c
+++ b/drivers/usb/misc/iowarrior.c
@@ -781,12 +781,6 @@ static int iowarrior_probe(struct usb_interface *interface,
 	iface_desc = interface->cur_altsetting;
 	dev->product_id = le16_to_cpu(udev->descriptor.idProduct);
 
-	if (iface_desc->desc.bNumEndpoints < 1) {
-		dev_err(&interface->dev, "Invalid number of endpoints\n");
-		retval = -EINVAL;
-		goto error;
-	}
-
 	/* set up the endpoint information */
 	for (i = 0; i < iface_desc->desc.bNumEndpoints; ++i) {
 		endpoint = &iface_desc->endpoint[i].desc;
@@ -797,6 +791,13 @@ static int iowarrior_probe(struct usb_interface *interface,
 			/* this one will match for the IOWarrior56 only */
 			dev->int_out_endpoint = endpoint;
 	}
+
+	if (!dev->int_in_endpoint) {
+		dev_err(&interface->dev, "no interrupt-in endpoint found\n");
+		retval = -ENODEV;
+		goto error;
+	}
+
 	/* we have to check the report_size often, so remember it in the endianness suitable for our machine */
 	dev->report_size = usb_endpoint_maxp(dev->int_in_endpoint);
 	if ((dev->interface->cur_altsetting->desc.bInterfaceNumber == 0) &&

From de46e56653de7b3b54baa625bd582635008b8d05 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Tue, 7 Mar 2017 16:11:04 +0100
Subject: [PATCH 1017/1911] USB: iowarrior: fix NULL-deref in write

Make sure to verify that we have the required interrupt-out endpoint for
IOWarrior56 devices to avoid dereferencing a NULL-pointer in write
should a malicious device lack such an endpoint.

Fixes: 946b960d13c1 ("USB: add driver for iowarrior devices.")
Cc: stable <stable@vger.kernel.org>     # 2.6.21
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/misc/iowarrior.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c
index 3ad058cbe6caaf..37c63cb39714b8 100644
--- a/drivers/usb/misc/iowarrior.c
+++ b/drivers/usb/misc/iowarrior.c
@@ -798,6 +798,14 @@ static int iowarrior_probe(struct usb_interface *interface,
 		goto error;
 	}
 
+	if (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56) {
+		if (!dev->int_out_endpoint) {
+			dev_err(&interface->dev, "no interrupt-out endpoint found\n");
+			retval = -ENODEV;
+			goto error;
+		}
+	}
+
 	/* we have to check the report_size often, so remember it in the endianness suitable for our machine */
 	dev->report_size = usb_endpoint_maxp(dev->int_in_endpoint);
 	if ((dev->interface->cur_altsetting->desc.bInterfaceNumber == 0) &&

From d595259fbb7a7afed241b1afb2c4fe4b47de47fa Mon Sep 17 00:00:00 2001
From: Tobias Jakobi <tjakobi@math.uni-bielefeld.de>
Date: Tue, 28 Feb 2017 00:46:58 +0100
Subject: [PATCH 1018/1911] usb-storage: Add ignore-residue quirk for Initio
 INIC-3619

This USB-SATA bridge chip is used in a StarTech enclosure for
optical drives.

Without the quirk MakeMKV fails during the key exchange with an
installed BluRay drive:
> Error 'Scsi error - ILLEGAL REQUEST:COPY PROTECTION KEY EXCHANGE FAILURE - KEY NOT ESTABLISHED'
> occurred while issuing SCSI command AD010..080002400 to device 'SG:dev_11:2'

Signed-off-by: Tobias Jakobi <tjakobi@math.uni-bielefeld.de>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/storage/unusual_devs.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h
index 16cc18369111d0..9129f6cb823074 100644
--- a/drivers/usb/storage/unusual_devs.h
+++ b/drivers/usb/storage/unusual_devs.h
@@ -2071,6 +2071,20 @@ UNUSUAL_DEV(  0x1370, 0x6828, 0x0110, 0x0110,
 		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_IGNORE_RESIDUE ),
 
+/*
+ * Reported by Tobias Jakobi <tjakobi@math.uni-bielefeld.de>
+ * The INIC-3619 bridge is used in the StarTech SLSODDU33B
+ * SATA-USB enclosure for slimline optical drives.
+ *
+ * The quirk enables MakeMKV to properly exchange keys with
+ * an installed BD drive.
+ */
+UNUSUAL_DEV(  0x13fd, 0x3609, 0x0209, 0x0209,
+		"Initio Corporation",
+		"INIC-3619",
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+		US_FL_IGNORE_RESIDUE ),
+
 /* Reported by Qinglin Ye <yestyle@gmail.com> */
 UNUSUAL_DEV(  0x13fe, 0x3600, 0x0100, 0x0100,
 		"Kingston",

From cfa47afe77b393e2c24a57e7e9611857a0b064f1 Mon Sep 17 00:00:00 2001
From: Richard Leitner <richard.leitner@skidata.com>
Date: Mon, 6 Mar 2017 09:24:20 +0100
Subject: [PATCH 1019/1911] usb: usb251xb: remove max_{power,current}_{sp,bp}
 properties

Remove the max_{power,current}_{sp,bp} properties of the usb251xb driver
from devicetree. This is done to simplify the dt bindings as requested
by Rob Herring in https://lkml.org/lkml/2017/2/15/1283. If those
properties are ever needed by somebody they can be enabled again easily.

Signed-off-by: Richard Leitner <richard.leitner@skidata.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../devicetree/bindings/usb/usb251xb.txt      | 20 ----------------
 drivers/usb/misc/usb251xb.c                   | 24 ++++---------------
 2 files changed, 4 insertions(+), 40 deletions(-)

diff --git a/Documentation/devicetree/bindings/usb/usb251xb.txt b/Documentation/devicetree/bindings/usb/usb251xb.txt
index 0c065f77658f13..a5efd10ace9e0b 100644
--- a/Documentation/devicetree/bindings/usb/usb251xb.txt
+++ b/Documentation/devicetree/bindings/usb/usb251xb.txt
@@ -40,26 +40,6 @@ Optional properties :
 	device connected.
  - sp-disabled-ports : Specifies the ports which will be self-power disabled
  - bp-disabled-ports : Specifies the ports which will be bus-power disabled
- - max-sp-power : Specifies the maximum current the hub consumes from an
-	upstream port when operating as self-powered hub including the power
-	consumption of a permanently attached peripheral if the hub is
-	configured as a compound device. The value is given in mA in a 0 - 500
-	range (default is 2).
- - max-bp-power : Specifies the maximum current the hub consumes from an
-	upstream port when operating as bus-powered hub including the power
-	consumption of a permanently attached peripheral if the hub is
-	configured as a compound device. The value is given in mA in a 0 - 500
-	range (default is 100).
- - max-sp-current : Specifies the maximum current the hub consumes from an
-	upstream port when operating as self-powered hub EXCLUDING the power
-	consumption of a permanently attached peripheral if the hub is
-	configured as a compound device. The value is given in mA in a 0 - 500
-	range (default is 2).
- - max-bp-current : Specifies the maximum current the hub consumes from an
-	upstream port when operating as bus-powered hub EXCLUDING the power
-	consumption of a permanently attached peripheral if the hub is
-	configured as a compound device. The value is given in mA in a 0 - 500
-	range (default is 100).
  - power-on-time : Specifies the time it takes from the time the host initiates
 	the power-on sequence to a port until the port has adequate power. The
 	value is given in ms in a 0 - 510 range (default is 100ms).
diff --git a/drivers/usb/misc/usb251xb.c b/drivers/usb/misc/usb251xb.c
index 4e18600dc9b43e..3f9c3060c477e7 100644
--- a/drivers/usb/misc/usb251xb.c
+++ b/drivers/usb/misc/usb251xb.c
@@ -432,26 +432,6 @@ static int usb251xb_get_ofdata(struct usb251xb *hub,
 		}
 	}
 
-	hub->max_power_sp = USB251XB_DEF_MAX_POWER_SELF;
-	if (!of_property_read_u32(np, "max-sp-power", property_u32))
-		hub->max_power_sp = min_t(u8, be32_to_cpu(*property_u32) / 2,
-					  250);
-
-	hub->max_power_bp = USB251XB_DEF_MAX_POWER_BUS;
-	if (!of_property_read_u32(np, "max-bp-power", property_u32))
-		hub->max_power_bp = min_t(u8, be32_to_cpu(*property_u32) / 2,
-					  250);
-
-	hub->max_current_sp = USB251XB_DEF_MAX_CURRENT_SELF;
-	if (!of_property_read_u32(np, "max-sp-current", property_u32))
-		hub->max_current_sp = min_t(u8, be32_to_cpu(*property_u32) / 2,
-					    250);
-
-	hub->max_current_bp = USB251XB_DEF_MAX_CURRENT_BUS;
-	if (!of_property_read_u32(np, "max-bp-current", property_u32))
-		hub->max_current_bp = min_t(u8, be32_to_cpu(*property_u32) / 2,
-					    250);
-
 	hub->power_on_time = USB251XB_DEF_POWER_ON_TIME;
 	if (!of_property_read_u32(np, "power-on-time", property_u32))
 		hub->power_on_time = min_t(u8, be32_to_cpu(*property_u32) / 2,
@@ -492,6 +472,10 @@ static int usb251xb_get_ofdata(struct usb251xb *hub,
 	/* The following parameters are currently not exposed to devicetree, but
 	 * may be as soon as needed.
 	 */
+	hub->max_power_sp = USB251XB_DEF_MAX_POWER_SELF;
+	hub->max_power_bp = USB251XB_DEF_MAX_POWER_BUS;
+	hub->max_current_sp = USB251XB_DEF_MAX_CURRENT_SELF;
+	hub->max_current_bp = USB251XB_DEF_MAX_CURRENT_BUS;
 	hub->bat_charge_en = USB251XB_DEF_BATTERY_CHARGING_ENABLE;
 	hub->boost_up = USB251XB_DEF_BOOST_UP;
 	hub->boost_x = USB251XB_DEF_BOOST_X;

From 7f7d8ba3b2140d993887a7db7a83d85c1f8db0e8 Mon Sep 17 00:00:00 2001
From: Richard Leitner <richard.leitner@skidata.com>
Date: Mon, 6 Mar 2017 09:24:21 +0100
Subject: [PATCH 1020/1911] usb: usb251xb: dt: add unit suffix to oc-delay and
 power-on-time

Rename oc-delay-* to oc-delay-us and make it expect a time value.
Furthermore add -ms suffix to power-on-time. There changes were
suggested by Rob Herring in https://lkml.org/lkml/2017/2/15/1283.

Signed-off-by: Richard Leitner <richard.leitner@skidata.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../devicetree/bindings/usb/usb251xb.txt      | 10 +++---
 drivers/usb/misc/usb251xb.c                   | 35 +++++++++++--------
 2 files changed, 26 insertions(+), 19 deletions(-)

diff --git a/Documentation/devicetree/bindings/usb/usb251xb.txt b/Documentation/devicetree/bindings/usb/usb251xb.txt
index a5efd10ace9e0b..91499ae028db71 100644
--- a/Documentation/devicetree/bindings/usb/usb251xb.txt
+++ b/Documentation/devicetree/bindings/usb/usb251xb.txt
@@ -31,7 +31,9 @@ Optional properties :
 	(default is individual)
  - dynamic-power-switching : enable auto-switching from self- to bus-powered
 	operation if the local power source is removed or unavailable
- - oc-delay-{100us,4ms,8ms,16ms} : set over current timer delay (default is 8ms)
+ - oc-delay-us : Delay time (in microseconds) for filtering the over-current
+	sense inputs. Valid values are 100, 4000, 8000 (default) and 16000. If
+	an invalid value is given, the default is used instead.
  - compound-device : indicated the hub is part of a compound device
  - port-mapping-mode : enable port mapping mode
  - string-support : enable string descriptor support (required for manufacturer,
@@ -40,9 +42,9 @@ Optional properties :
 	device connected.
  - sp-disabled-ports : Specifies the ports which will be self-power disabled
  - bp-disabled-ports : Specifies the ports which will be bus-power disabled
- - power-on-time : Specifies the time it takes from the time the host initiates
-	the power-on sequence to a port until the port has adequate power. The
-	value is given in ms in a 0 - 510 range (default is 100ms).
+ - power-on-time-ms : Specifies the time it takes from the time the host
+	initiates the power-on sequence to a port until the port has adequate
+	power. The value is given in ms in a 0 - 510 range (default is 100ms).
 
 Examples:
 	usb2512b@2c {
diff --git a/drivers/usb/misc/usb251xb.c b/drivers/usb/misc/usb251xb.c
index 3f9c3060c477e7..91f66d68bcb7b5 100644
--- a/drivers/usb/misc/usb251xb.c
+++ b/drivers/usb/misc/usb251xb.c
@@ -375,18 +375,24 @@ static int usb251xb_get_ofdata(struct usb251xb *hub,
 	if (of_get_property(np, "dynamic-power-switching", NULL))
 		hub->conf_data2 |= BIT(7);
 
-	if (of_get_property(np, "oc-delay-100us", NULL)) {
-		hub->conf_data2 &= ~BIT(5);
-		hub->conf_data2 &= ~BIT(4);
-	} else if (of_get_property(np, "oc-delay-4ms", NULL)) {
-		hub->conf_data2 &= ~BIT(5);
-		hub->conf_data2 |= BIT(4);
-	} else if (of_get_property(np, "oc-delay-8ms", NULL)) {
-		hub->conf_data2 |= BIT(5);
-		hub->conf_data2 &= ~BIT(4);
-	} else if (of_get_property(np, "oc-delay-16ms", NULL)) {
-		hub->conf_data2 |= BIT(5);
-		hub->conf_data2 |= BIT(4);
+	if (!of_property_read_u32(np, "oc-delay-us", property_u32)) {
+		if (*property_u32 == 100) {
+			/* 100 us*/
+			hub->conf_data2 &= ~BIT(5);
+			hub->conf_data2 &= ~BIT(4);
+		} else if (*property_u32 == 4000) {
+			/* 4 ms */
+			hub->conf_data2 &= ~BIT(5);
+			hub->conf_data2 |= BIT(4);
+		} else if (*property_u32 == 16000) {
+			/* 16 ms */
+			hub->conf_data2 |= BIT(5);
+			hub->conf_data2 |= BIT(4);
+		} else {
+			/* 8 ms (DEFAULT) */
+			hub->conf_data2 |= BIT(5);
+			hub->conf_data2 &= ~BIT(4);
+		}
 	}
 
 	if (of_get_property(np, "compound-device", NULL))
@@ -433,9 +439,8 @@ static int usb251xb_get_ofdata(struct usb251xb *hub,
 	}
 
 	hub->power_on_time = USB251XB_DEF_POWER_ON_TIME;
-	if (!of_property_read_u32(np, "power-on-time", property_u32))
-		hub->power_on_time = min_t(u8, be32_to_cpu(*property_u32) / 2,
-					   255);
+	if (!of_property_read_u32(np, "power-on-time-ms", property_u32))
+		hub->power_on_time = min_t(u8, *property_u32 / 2, 255);
 
 	if (of_property_read_u16_array(np, "language-id", &hub->lang_id, 1))
 		hub->lang_id = USB251XB_DEF_LANGUAGE_ID;

From fa56fe4ca4a1e4d9715f144857c98074e41bc94f Mon Sep 17 00:00:00 2001
From: Richard Leitner <richard.leitner@skidata.com>
Date: Mon, 6 Mar 2017 09:24:22 +0100
Subject: [PATCH 1021/1911] doc: dt-bindings: usb251xb: mark reg as required

Mark the reg property as required and furthermore fix some typos and
spellings in the documentation.

Signed-off-by: Richard Leitner <richard.leitner@skidata.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../devicetree/bindings/usb/usb251xb.txt      | 23 ++++++++++---------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/Documentation/devicetree/bindings/usb/usb251xb.txt b/Documentation/devicetree/bindings/usb/usb251xb.txt
index 91499ae028db71..3957d4edaa745f 100644
--- a/Documentation/devicetree/bindings/usb/usb251xb.txt
+++ b/Documentation/devicetree/bindings/usb/usb251xb.txt
@@ -7,18 +7,18 @@ Required properties :
  - compatible : Should be "microchip,usb251xb" or one of the specific types:
 	"microchip,usb2512b", "microchip,usb2512bi", "microchip,usb2513b",
 	"microchip,usb2513bi", "microchip,usb2514b", "microchip,usb2514bi"
- - hub-reset-gpios : Should specify the gpio for hub reset
+ - reset-gpios : Should specify the gpio for hub reset
+ - reg : I2C address on the selected bus (default is <0x2C>)
 
 Optional properties :
- - reg : I2C address on the selected bus (default is <0x2C>)
  - skip-config : Skip Hub configuration, but only send the USB-Attach command
- - vendor-id : USB Vendor ID of the hub (16 bit, default is 0x0424)
- - product-id : USB Product ID of the hub (16 bit, default depends on type)
- - device-id : USB Device ID of the hub (16 bit, default is 0x0bb3)
- - language-id : USB Language ID (16 bit, default is 0x0000)
- - manufacturer : USB Manufacturer string (max 31 characters long)
- - product : USB Product string (max 31 characters long)
- - serial : USB Serial string (max 31 characters long)
+ - vendor-id : Set USB Vendor ID of the hub (16 bit, default is 0x0424)
+ - product-id : Set USB Product ID of the hub (16 bit, default depends on type)
+ - device-id : Set USB Device ID of the hub (16 bit, default is 0x0bb3)
+ - language-id : Set USB Language ID (16 bit, default is 0x0000)
+ - manufacturer : Set USB Manufacturer string (max 31 characters long)
+ - product : Set USB Product string (max 31 characters long)
+ - serial : Set USB Serial string (max 31 characters long)
  - {bus,self}-powered : selects between self- and bus-powered operation (default
 	is self-powered)
  - disable-hi-speed : disable USB Hi-Speed support
@@ -34,7 +34,7 @@ Optional properties :
  - oc-delay-us : Delay time (in microseconds) for filtering the over-current
 	sense inputs. Valid values are 100, 4000, 8000 (default) and 16000. If
 	an invalid value is given, the default is used instead.
- - compound-device : indicated the hub is part of a compound device
+ - compound-device : indicate the hub is part of a compound device
  - port-mapping-mode : enable port mapping mode
  - string-support : enable string descriptor support (required for manufacturer,
 	product and serial string configuration)
@@ -49,7 +49,8 @@ Optional properties :
 Examples:
 	usb2512b@2c {
 		compatible = "microchip,usb2512b";
-		hub-reset-gpios = <&gpio1 4 GPIO_ACTIVE_LOW>;
+		reg = <0x2c>;
+		reset-gpios = <&gpio1 4 GPIO_ACTIVE_LOW>;
 	};
 
 	usb2514b@2c {

From 829b84db0c0c120ae5855a69cc0c94ff75fafabb Mon Sep 17 00:00:00 2001
From: Richard Leitner <richard.leitner@skidata.com>
Date: Mon, 6 Mar 2017 09:24:23 +0100
Subject: [PATCH 1022/1911] MAINTAINERS: usb251xb: remove reference inexistent
 file

The platform_data header file was dropped in the merged version of the
USB251xB driver. Therefore remove its reference from the MAINTAINERS file.

Signed-off-by: Richard Leitner <richard.leitner@skidata.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 MAINTAINERS | 1 -
 1 file changed, 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index c265a5fe48481f..c776906f67a9f6 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8307,7 +8307,6 @@ M:	Richard Leitner <richard.leitner@skidata.com>
 L:	linux-usb@vger.kernel.org
 S:	Maintained
 F:	drivers/usb/misc/usb251xb.c
-F:	include/linux/platform_data/usb251xb.h
 F:	Documentation/devicetree/bindings/usb/usb251xb.txt
 
 MICROSOFT SURFACE PRO 3 BUTTON DRIVER

From 2f6821462fe3ace62df3f1b5a9463153e8288298 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Fri, 24 Feb 2017 19:11:28 +0100
Subject: [PATCH 1023/1911] USB: serial: digi_acceleport: fix OOB-event
 processing

A recent change claimed to fix an off-by-one error in the OOB-port
completion handler, but instead introduced such an error. This could
specifically led to modem-status changes going unnoticed, effectively
breaking TIOCMGET.

Note that the offending commit fixes a loop-condition underflow and is
marked for stable, but should not be backported without this fix.

Reported-by: Ben Hutchings <ben@decadent.org.uk>
Fixes: 2d380889215f ("USB: serial: digi_acceleport: fix OOB data sanity check")
Cc: stable <stable@vger.kernel.org>	# v2.6.30
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/digi_acceleport.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/serial/digi_acceleport.c b/drivers/usb/serial/digi_acceleport.c
index ab78111e09680f..6537d3ca2797d8 100644
--- a/drivers/usb/serial/digi_acceleport.c
+++ b/drivers/usb/serial/digi_acceleport.c
@@ -1500,7 +1500,7 @@ static int digi_read_oob_callback(struct urb *urb)
 		return -1;
 
 	/* handle each oob command */
-	for (i = 0; i < urb->actual_length - 4; i += 4) {
+	for (i = 0; i < urb->actual_length - 3; i += 4) {
 		opcode = buf[i];
 		line = buf[i + 1];
 		status = buf[i + 2];

From 9200c6f177638909dbbaded8aeeeccbd48744400 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
Date: Thu, 9 Feb 2017 00:30:22 +0100
Subject: [PATCH 1024/1911] Revert "phy: Add USB3 PHY support for Broadcom NSP
 SoC"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit d7bc1a7d41bf ("phy: Add USB3 PHY support for
Broadcom NSP SoC") as we already have driver for this PHY (shared by NS
and NSP). It was added in commit e5666281d9ea ("phy: bcm-ns-usb3: new
driver for USB 3.0 PHY on Northstar").

Instead of adding separated driver & duplicating code we should work on
improving existing (old) one. Thanks to work done by Broadcom we know
there is MDIO bus we weren't aware of & we know register names which
makes initialization more clear. This is very valuable info and we
should work on using it in existing driver afterwards.

Acked-by: Jon Mason <jon.mason@broadcom.com>
Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
---
 drivers/phy/Kconfig            |   8 --
 drivers/phy/Makefile           |   1 -
 drivers/phy/phy-bcm-nsp-usb3.c | 177 ---------------------------------
 3 files changed, 186 deletions(-)
 delete mode 100644 drivers/phy/phy-bcm-nsp-usb3.c

diff --git a/drivers/phy/Kconfig b/drivers/phy/Kconfig
index dc5277ad1b5a7a..c11044439fd447 100644
--- a/drivers/phy/Kconfig
+++ b/drivers/phy/Kconfig
@@ -510,12 +510,4 @@ config PHY_MESON8B_USB2
 	  and GXBB SoCs.
 	  If unsure, say N.
 
-config PHY_NSP_USB3
-	tristate "Broadcom NorthStar plus USB3 PHY driver"
-	depends on OF && (ARCH_BCM_NSP || COMPILE_TEST)
-	select GENERIC_PHY
-	default ARCH_BCM_NSP
-	help
-	  Enable this to support the Broadcom Northstar plus USB3 PHY.
-	  If unsure, say N.
 endmenu
diff --git a/drivers/phy/Makefile b/drivers/phy/Makefile
index e7b0feb1e125a5..dd8f3b5d2918cd 100644
--- a/drivers/phy/Makefile
+++ b/drivers/phy/Makefile
@@ -62,4 +62,3 @@ obj-$(CONFIG_PHY_CYGNUS_PCIE)		+= phy-bcm-cygnus-pcie.o
 obj-$(CONFIG_ARCH_TEGRA) += tegra/
 obj-$(CONFIG_PHY_NS2_PCIE)		+= phy-bcm-ns2-pcie.o
 obj-$(CONFIG_PHY_MESON8B_USB2)		+= phy-meson8b-usb2.o
-obj-$(CONFIG_PHY_NSP_USB3)		+= phy-bcm-nsp-usb3.o
diff --git a/drivers/phy/phy-bcm-nsp-usb3.c b/drivers/phy/phy-bcm-nsp-usb3.c
deleted file mode 100644
index 49024eaa55459b..00000000000000
--- a/drivers/phy/phy-bcm-nsp-usb3.c
+++ /dev/null
@@ -1,177 +0,0 @@
-/*
- * Copyright (C) 2016 Broadcom
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation version 2.
- *
- * This program is distributed "as is" WITHOUT ANY WARRANTY of any
- * kind, whether express or implied; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#include <linux/delay.h>
-#include <linux/io.h>
-#include <linux/kernel.h>
-#include <linux/mfd/syscon.h>
-#include <linux/mdio.h>
-#include <linux/module.h>
-#include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/phy/phy.h>
-#include <linux/regmap.h>
-
-#define NSP_USB3_RST_CTRL_OFFSET	0x3f8
-
-/* mdio reg access */
-#define NSP_USB3_PHY_BASE_ADDR_REG	0x1f
-
-#define NSP_USB3_PHY_PLL30_BLOCK	0x8000
-#define NSP_USB3_PLL_CONTROL		0x01
-#define NSP_USB3_PLLA_CONTROL0		0x0a
-#define NSP_USB3_PLLA_CONTROL1		0x0b
-
-#define NSP_USB3_PHY_TX_PMD_BLOCK	0x8040
-#define NSP_USB3_TX_PMD_CONTROL1	0x01
-
-#define NSP_USB3_PHY_PIPE_BLOCK		0x8060
-#define NSP_USB3_LFPS_CMP		0x02
-#define NSP_USB3_LFPS_DEGLITCH		0x03
-
-struct nsp_usb3_phy {
-	struct regmap *usb3_ctrl;
-	struct phy *phy;
-	struct mdio_device *mdiodev;
-};
-
-static int nsp_usb3_phy_init(struct phy *phy)
-{
-	struct nsp_usb3_phy *iphy = phy_get_drvdata(phy);
-	struct mii_bus *bus = iphy->mdiodev->bus;
-	int addr = iphy->mdiodev->addr;
-	u32 data;
-	int rc;
-
-	rc = regmap_read(iphy->usb3_ctrl, 0, &data);
-	if (rc)
-		return rc;
-	data |= 1;
-	rc = regmap_write(iphy->usb3_ctrl, 0, data);
-	if (rc)
-		return rc;
-
-	rc = regmap_write(iphy->usb3_ctrl, NSP_USB3_RST_CTRL_OFFSET, 1);
-	if (rc)
-		return rc;
-
-	rc = mdiobus_write(bus, addr, NSP_USB3_PHY_BASE_ADDR_REG,
-			   NSP_USB3_PHY_PLL30_BLOCK);
-	if (rc)
-		return rc;
-
-	rc = mdiobus_write(bus, addr, NSP_USB3_PLL_CONTROL, 0x1000);
-	if (rc)
-		return rc;
-
-	rc = mdiobus_write(bus, addr, NSP_USB3_PLLA_CONTROL0, 0x6400);
-	if (rc)
-		return rc;
-
-	rc = mdiobus_write(bus, addr, NSP_USB3_PLLA_CONTROL1, 0xc000);
-	if (rc)
-		return rc;
-
-	rc = mdiobus_write(bus, addr, NSP_USB3_PLLA_CONTROL1, 0x8000);
-	if (rc)
-		return rc;
-
-	rc = regmap_write(iphy->usb3_ctrl, NSP_USB3_RST_CTRL_OFFSET, 0);
-	if (rc)
-		return rc;
-
-	rc = mdiobus_write(bus, addr, NSP_USB3_PLL_CONTROL, 0x9000);
-	if (rc)
-		return rc;
-
-	rc = mdiobus_write(bus, addr, NSP_USB3_PHY_BASE_ADDR_REG,
-			   NSP_USB3_PHY_PIPE_BLOCK);
-	if (rc)
-		return rc;
-
-	rc = mdiobus_write(bus, addr, NSP_USB3_LFPS_CMP, 0xf30d);
-	if (rc)
-		return rc;
-
-	rc = mdiobus_write(bus, addr, NSP_USB3_LFPS_DEGLITCH, 0x6302);
-	if (rc)
-		return rc;
-
-	rc = mdiobus_write(bus, addr, NSP_USB3_PHY_BASE_ADDR_REG,
-			   NSP_USB3_PHY_TX_PMD_BLOCK);
-	if (rc)
-		return rc;
-
-	rc = mdiobus_write(bus, addr, NSP_USB3_TX_PMD_CONTROL1, 0x1003);
-
-	return rc;
-}
-
-static struct phy_ops nsp_usb3_phy_ops = {
-	.init	= nsp_usb3_phy_init,
-	.owner	= THIS_MODULE,
-};
-
-static int nsp_usb3_phy_probe(struct mdio_device *mdiodev)
-{
-	struct device *dev = &mdiodev->dev;
-	struct phy_provider *provider;
-	struct nsp_usb3_phy *iphy;
-
-	iphy = devm_kzalloc(dev, sizeof(*iphy), GFP_KERNEL);
-	if (!iphy)
-		return -ENOMEM;
-	iphy->mdiodev = mdiodev;
-
-	iphy->usb3_ctrl = syscon_regmap_lookup_by_phandle(dev->of_node,
-						 "usb3-ctrl-syscon");
-	if (IS_ERR(iphy->usb3_ctrl))
-		return PTR_ERR(iphy->usb3_ctrl);
-
-	iphy->phy = devm_phy_create(dev, dev->of_node, &nsp_usb3_phy_ops);
-	if (IS_ERR(iphy->phy)) {
-		dev_err(dev, "failed to create PHY\n");
-		return PTR_ERR(iphy->phy);
-	}
-
-	phy_set_drvdata(iphy->phy, iphy);
-
-	provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate);
-	if (IS_ERR(provider)) {
-		dev_err(dev, "could not register PHY provider\n");
-		return PTR_ERR(provider);
-	}
-
-	return 0;
-}
-
-static const struct of_device_id nsp_usb3_phy_of_match[] = {
-	{.compatible = "brcm,nsp-usb3-phy",},
-	{ /* sentinel */ }
-};
-
-static struct mdio_driver nsp_usb3_phy_driver = {
-	.mdiodrv = {
-		.driver = {
-			.name = "nsp-usb3-phy",
-			.of_match_table = nsp_usb3_phy_of_match,
-		},
-	},
-	.probe = nsp_usb3_phy_probe,
-};
-
-mdio_module_driver(nsp_usb3_phy_driver);
-
-MODULE_DESCRIPTION("Broadcom NSP USB3 PHY driver");
-MODULE_LICENSE("GPL v2");
-MODULE_AUTHOR("Yendapally Reddy Dhananjaya Reddy <yendapally.reddy@broadcom.com");

From 677941a304b72620f519e542fb7c370424dcc297 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
Date: Thu, 9 Feb 2017 00:30:23 +0100
Subject: [PATCH 1025/1911] Revert "dt-bindings: phy: Add documentation for NSP
 USB3 PHY"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit c8ca631f9480 ("dt-bindings: phy: Add documentation
for NSP USB3 PHY") to match reverting commit adding the new PHY driver.
Please note we revert this commit before it reached stable release.

If new compatible string is needed it should be added to the existing
bcm-ns-usb3-phy.txt which already describes this PHY.

Acked-by: Jon Mason <jon.mason@broadcom.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
---
 .../bindings/phy/brcm,nsp-usb3-phy.txt        | 39 -------------------
 1 file changed, 39 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/phy/brcm,nsp-usb3-phy.txt

diff --git a/Documentation/devicetree/bindings/phy/brcm,nsp-usb3-phy.txt b/Documentation/devicetree/bindings/phy/brcm,nsp-usb3-phy.txt
deleted file mode 100644
index e68ae5dec9c9ef..00000000000000
--- a/Documentation/devicetree/bindings/phy/brcm,nsp-usb3-phy.txt
+++ /dev/null
@@ -1,39 +0,0 @@
-Broadcom USB3 phy binding for northstar plus SoC
-The USB3 phy is internal to the SoC and is accessed using mdio interface.
-
-Required mdio bus properties:
-- reg: Should be 0x0 for SoC internal USB3 phy
-- #address-cells: must be 1
-- #size-cells: must be 0
-
-Required USB3 PHY properties:
-- compatible: should be "brcm,nsp-usb3-phy"
-- reg: USB3 Phy address on SoC internal MDIO bus and it should be 0x10.
-- usb3-ctrl-syscon: handler of syscon node defining physical address
-  of usb3 control register.
-- #phy-cells: must be 0
-
-Required usb3 control properties:
-- compatible: should be "brcm,nsp-usb3-ctrl"
-- reg: offset and length of the control registers
-
-Example:
-
-	mdio@0 {
-		reg = <0x0>;
-		#address-cells = <1>;
-		#size-cells = <0>;
-
-		usb3_phy: usb-phy@10 {
-			compatible = "brcm,nsp-usb3-phy";
-			reg = <0x10>;
-			usb3-ctrl-syscon = <&usb3_ctrl>;
-			#phy-cells = <0>;
-			status = "disabled";
-		};
-	};
-
-	usb3_ctrl: syscon@104408 {
-		compatible = "brcm,nsp-usb3-ctrl", "syscon";
-		reg = <0x104408 0x3fc>;
-	};

From 11d94e026b962eee6e1fbc4237f2cbfbec839067 Mon Sep 17 00:00:00 2001
From: Jaehoon Chung <jh80.chung@samsung.com>
Date: Wed, 8 Mar 2017 17:22:42 +0900
Subject: [PATCH 1026/1911] phy: phy-exynos-pcie: fix the wrong error return

When it doesn't get the blk_base's resource, it was returned
the error about phy_base, not blk_base.
This patch is for fixing the wrong error return about blk_base.

Fixes: cf0adb8e281b ("phy: phy-exynos-pcie: Add support for Exynos PCIe PHY")

Signed-off-by: Jaehoon Chung <jh80.chung@samsung.com>
Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>
---
 drivers/phy/phy-exynos-pcie.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/phy/phy-exynos-pcie.c b/drivers/phy/phy-exynos-pcie.c
index 4f60b83641d595..60baf25d98e25e 100644
--- a/drivers/phy/phy-exynos-pcie.c
+++ b/drivers/phy/phy-exynos-pcie.c
@@ -254,8 +254,8 @@ static int exynos_pcie_phy_probe(struct platform_device *pdev)
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
 	exynos_phy->blk_base = devm_ioremap_resource(dev, res);
-	if (IS_ERR(exynos_phy->phy_base))
-		return PTR_ERR(exynos_phy->phy_base);
+	if (IS_ERR(exynos_phy->blk_base))
+		return PTR_ERR(exynos_phy->blk_base);
 
 	exynos_phy->drv_data = drv_data;
 

From 1a09b6a7c10e22c489a8b212dd6862b1fd9674ad Mon Sep 17 00:00:00 2001
From: Stephen Boyd <stephen.boyd@linaro.org>
Date: Thu, 9 Mar 2017 13:45:44 +0530
Subject: [PATCH 1027/1911] phy: qcom-usb-hs: Add depends on EXTCON

We get the following compile errors if EXTCON is enabled as a
module but this driver is builtin:

drivers/built-in.o: In function `qcom_usb_hs_phy_power_off':
phy-qcom-usb-hs.c:(.text+0x1089): undefined reference to `extcon_unregister_notifier'
drivers/built-in.o: In function `qcom_usb_hs_phy_probe':
phy-qcom-usb-hs.c:(.text+0x11b5): undefined reference to `extcon_get_edev_by_phandle'
drivers/built-in.o: In function `qcom_usb_hs_phy_power_on':
phy-qcom-usb-hs.c:(.text+0x128e): undefined reference to `extcon_get_state'
phy-qcom-usb-hs.c:(.text+0x12a9): undefined reference to `extcon_register_notifier'

so let's mark this as needing to follow the modular status of
the extcon framework.

Fixes: 9994a33865f4 e2427b09ba929c2b9 (phy: Add support for Qualcomm's USB HS phy")
Signed-off-by: Stephen Boyd <stephen.boyd@linaro.org>
Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>
---
 drivers/phy/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/phy/Kconfig b/drivers/phy/Kconfig
index c11044439fd447..005cadb7a3f8e9 100644
--- a/drivers/phy/Kconfig
+++ b/drivers/phy/Kconfig
@@ -449,6 +449,7 @@ config PHY_QCOM_UFS
 config PHY_QCOM_USB_HS
 	tristate "Qualcomm USB HS PHY module"
 	depends on USB_ULPI_BUS
+	depends on EXTCON || !EXTCON # if EXTCON=m, this cannot be built-in
 	select GENERIC_PHY
 	help
 	  Support for the USB high-speed ULPI compliant phy on Qualcomm

From 28b62b1458685d8f68f67d9b2d511bf8fa32b746 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk@kernel.org>
Date: Wed, 8 Mar 2017 23:14:20 +0200
Subject: [PATCH 1028/1911] crypto: s5p-sss - Fix spinlock recursion on
 LRW(AES)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Running TCRYPT with LRW compiled causes spinlock recursion:

    testing speed of async lrw(aes) (lrw(ecb-aes-s5p)) encryption
    tcrypt: test 0 (256 bit key, 16 byte blocks): 19007 operations in 1 seconds (304112 bytes)
    tcrypt: test 1 (256 bit key, 64 byte blocks): 15753 operations in 1 seconds (1008192 bytes)
    tcrypt: test 2 (256 bit key, 256 byte blocks): 14293 operations in 1 seconds (3659008 bytes)
    tcrypt: test 3 (256 bit key, 1024 byte blocks): 11906 operations in 1 seconds (12191744 bytes)
    tcrypt: test 4 (256 bit key, 8192 byte blocks):
    BUG: spinlock recursion on CPU#1, irq/84-10830000/89
     lock: 0xeea99a68, .magic: dead4ead, .owner: irq/84-10830000/89, .owner_cpu: 1
    CPU: 1 PID: 89 Comm: irq/84-10830000 Not tainted 4.11.0-rc1-00001-g897ca6d0800d #559
    Hardware name: SAMSUNG EXYNOS (Flattened Device Tree)
    [<c010e1ec>] (unwind_backtrace) from [<c010ae1c>] (show_stack+0x10/0x14)
    [<c010ae1c>] (show_stack) from [<c03449c0>] (dump_stack+0x78/0x8c)
    [<c03449c0>] (dump_stack) from [<c015de68>] (do_raw_spin_lock+0x11c/0x120)
    [<c015de68>] (do_raw_spin_lock) from [<c0720110>] (_raw_spin_lock_irqsave+0x20/0x28)
    [<c0720110>] (_raw_spin_lock_irqsave) from [<c0572ca0>] (s5p_aes_crypt+0x2c/0xb4)
    [<c0572ca0>] (s5p_aes_crypt) from [<bf1d8aa4>] (do_encrypt+0x78/0xb0 [lrw])
    [<bf1d8aa4>] (do_encrypt [lrw]) from [<bf1d8b00>] (encrypt_done+0x24/0x54 [lrw])
    [<bf1d8b00>] (encrypt_done [lrw]) from [<c05732a0>] (s5p_aes_complete+0x60/0xcc)
    [<c05732a0>] (s5p_aes_complete) from [<c0573440>] (s5p_aes_interrupt+0x134/0x1a0)
    [<c0573440>] (s5p_aes_interrupt) from [<c01667c4>] (irq_thread_fn+0x1c/0x54)
    [<c01667c4>] (irq_thread_fn) from [<c0166a98>] (irq_thread+0x12c/0x1e0)
    [<c0166a98>] (irq_thread) from [<c0136a28>] (kthread+0x108/0x138)
    [<c0136a28>] (kthread) from [<c0107778>] (ret_from_fork+0x14/0x3c)

Interrupt handling routine was calling req->base.complete() under
spinlock.  In most cases this wasn't fatal but when combined with some
of the cipher modes (like LRW) this caused recursion - starting the new
encryption (s5p_aes_crypt()) while still holding the spinlock from
previous round (s5p_aes_complete()).

Beside that, the s5p_aes_interrupt() error handling path could execute
two completions in case of error for RX and TX blocks.

Rewrite the interrupt handling routine and the completion by:

1. Splitting the operations on scatterlist copies from
   s5p_aes_complete() into separate s5p_sg_done(). This still should be
   done under lock.
   The s5p_aes_complete() now only calls req->base.complete() and it has
   to be called outside of lock.

2. Moving the s5p_aes_complete() out of spinlock critical sections.
   In interrupt service routine s5p_aes_interrupts(), it appeared in few
   places, including error paths inside other functions called from ISR.
   This code was not so obvious to read so simplify it by putting the
   s5p_aes_complete() only within ISR level.

Reported-by: Nathan Royce <nroycea+kernel@gmail.com>
Cc: <stable@vger.kernel.org> # v4.10.x: 07de4bc88c crypto: s5p-sss - Fix completing
Cc: <stable@vger.kernel.org> # v4.10.x
Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/s5p-sss.c | 127 +++++++++++++++++++++++++--------------
 1 file changed, 82 insertions(+), 45 deletions(-)

diff --git a/drivers/crypto/s5p-sss.c b/drivers/crypto/s5p-sss.c
index a668286d62cb17..1b9da3dc799b05 100644
--- a/drivers/crypto/s5p-sss.c
+++ b/drivers/crypto/s5p-sss.c
@@ -270,7 +270,7 @@ static void s5p_sg_copy_buf(void *buf, struct scatterlist *sg,
 	scatterwalk_done(&walk, out, 0);
 }
 
-static void s5p_aes_complete(struct s5p_aes_dev *dev, int err)
+static void s5p_sg_done(struct s5p_aes_dev *dev)
 {
 	if (dev->sg_dst_cpy) {
 		dev_dbg(dev->dev,
@@ -281,8 +281,11 @@ static void s5p_aes_complete(struct s5p_aes_dev *dev, int err)
 	}
 	s5p_free_sg_cpy(dev, &dev->sg_src_cpy);
 	s5p_free_sg_cpy(dev, &dev->sg_dst_cpy);
+}
 
-	/* holding a lock outside */
+/* Calls the completion. Cannot be called with dev->lock hold. */
+static void s5p_aes_complete(struct s5p_aes_dev *dev, int err)
+{
 	dev->req->base.complete(&dev->req->base, err);
 	dev->busy = false;
 }
@@ -368,51 +371,44 @@ static int s5p_set_indata(struct s5p_aes_dev *dev, struct scatterlist *sg)
 }
 
 /*
- * Returns true if new transmitting (output) data is ready and its
- * address+length have to be written to device (by calling
- * s5p_set_dma_outdata()). False otherwise.
+ * Returns -ERRNO on error (mapping of new data failed).
+ * On success returns:
+ *  - 0 if there is no more data,
+ *  - 1 if new transmitting (output) data is ready and its address+length
+ *     have to be written to device (by calling s5p_set_dma_outdata()).
  */
-static bool s5p_aes_tx(struct s5p_aes_dev *dev)
+static int s5p_aes_tx(struct s5p_aes_dev *dev)
 {
-	int err = 0;
-	bool ret = false;
+	int ret = 0;
 
 	s5p_unset_outdata(dev);
 
 	if (!sg_is_last(dev->sg_dst)) {
-		err = s5p_set_outdata(dev, sg_next(dev->sg_dst));
-		if (err)
-			s5p_aes_complete(dev, err);
-		else
-			ret = true;
-	} else {
-		s5p_aes_complete(dev, err);
-
-		dev->busy = true;
-		tasklet_schedule(&dev->tasklet);
+		ret = s5p_set_outdata(dev, sg_next(dev->sg_dst));
+		if (!ret)
+			ret = 1;
 	}
 
 	return ret;
 }
 
 /*
- * Returns true if new receiving (input) data is ready and its
- * address+length have to be written to device (by calling
- * s5p_set_dma_indata()). False otherwise.
+ * Returns -ERRNO on error (mapping of new data failed).
+ * On success returns:
+ *  - 0 if there is no more data,
+ *  - 1 if new receiving (input) data is ready and its address+length
+ *     have to be written to device (by calling s5p_set_dma_indata()).
  */
-static bool s5p_aes_rx(struct s5p_aes_dev *dev)
+static int s5p_aes_rx(struct s5p_aes_dev *dev/*, bool *set_dma*/)
 {
-	int err;
-	bool ret = false;
+	int ret = 0;
 
 	s5p_unset_indata(dev);
 
 	if (!sg_is_last(dev->sg_src)) {
-		err = s5p_set_indata(dev, sg_next(dev->sg_src));
-		if (err)
-			s5p_aes_complete(dev, err);
-		else
-			ret = true;
+		ret = s5p_set_indata(dev, sg_next(dev->sg_src));
+		if (!ret)
+			ret = 1;
 	}
 
 	return ret;
@@ -422,33 +418,73 @@ static irqreturn_t s5p_aes_interrupt(int irq, void *dev_id)
 {
 	struct platform_device *pdev = dev_id;
 	struct s5p_aes_dev *dev = platform_get_drvdata(pdev);
-	bool set_dma_tx = false;
-	bool set_dma_rx = false;
+	int err_dma_tx = 0;
+	int err_dma_rx = 0;
+	bool tx_end = false;
 	unsigned long flags;
 	uint32_t status;
+	int err;
 
 	spin_lock_irqsave(&dev->lock, flags);
 
+	/*
+	 * Handle rx or tx interrupt. If there is still data (scatterlist did not
+	 * reach end), then map next scatterlist entry.
+	 * In case of such mapping error, s5p_aes_complete() should be called.
+	 *
+	 * If there is no more data in tx scatter list, call s5p_aes_complete()
+	 * and schedule new tasklet.
+	 */
 	status = SSS_READ(dev, FCINTSTAT);
 	if (status & SSS_FCINTSTAT_BRDMAINT)
-		set_dma_rx = s5p_aes_rx(dev);
-	if (status & SSS_FCINTSTAT_BTDMAINT)
-		set_dma_tx = s5p_aes_tx(dev);
+		err_dma_rx = s5p_aes_rx(dev);
+
+	if (status & SSS_FCINTSTAT_BTDMAINT) {
+		if (sg_is_last(dev->sg_dst))
+			tx_end = true;
+		err_dma_tx = s5p_aes_tx(dev);
+	}
 
 	SSS_WRITE(dev, FCINTPEND, status);
 
-	/*
-	 * Writing length of DMA block (either receiving or transmitting)
-	 * will start the operation immediately, so this should be done
-	 * at the end (even after clearing pending interrupts to not miss the
-	 * interrupt).
-	 */
-	if (set_dma_tx)
-		s5p_set_dma_outdata(dev, dev->sg_dst);
-	if (set_dma_rx)
-		s5p_set_dma_indata(dev, dev->sg_src);
+	if (err_dma_rx < 0) {
+		err = err_dma_rx;
+		goto error;
+	}
+	if (err_dma_tx < 0) {
+		err = err_dma_tx;
+		goto error;
+	}
+
+	if (tx_end) {
+		s5p_sg_done(dev);
+
+		spin_unlock_irqrestore(&dev->lock, flags);
+
+		s5p_aes_complete(dev, 0);
+		dev->busy = true;
+		tasklet_schedule(&dev->tasklet);
+	} else {
+		/*
+		 * Writing length of DMA block (either receiving or
+		 * transmitting) will start the operation immediately, so this
+		 * should be done at the end (even after clearing pending
+		 * interrupts to not miss the interrupt).
+		 */
+		if (err_dma_tx == 1)
+			s5p_set_dma_outdata(dev, dev->sg_dst);
+		if (err_dma_rx == 1)
+			s5p_set_dma_indata(dev, dev->sg_src);
 
+		spin_unlock_irqrestore(&dev->lock, flags);
+	}
+
+	return IRQ_HANDLED;
+
+error:
+	s5p_sg_done(dev);
 	spin_unlock_irqrestore(&dev->lock, flags);
+	s5p_aes_complete(dev, err);
 
 	return IRQ_HANDLED;
 }
@@ -597,8 +633,9 @@ static void s5p_aes_crypt_start(struct s5p_aes_dev *dev, unsigned long mode)
 	s5p_unset_indata(dev);
 
 indata_error:
-	s5p_aes_complete(dev, err);
+	s5p_sg_done(dev);
 	spin_unlock_irqrestore(&dev->lock, flags);
+	s5p_aes_complete(dev, err);
 }
 
 static void s5p_tasklet_cb(unsigned long data)

From d6c098a1db468b7fd4635e831f276851dfd8852c Mon Sep 17 00:00:00 2001
From: Brian Norris <briannorris@chromium.org>
Date: Wed, 8 Mar 2017 15:18:54 -0800
Subject: [PATCH 1029/1911] ASoC: don't dereference NULL pcm_{new,free}

Not all platform drivers have pcm_{new,free} callbacks. Seen with a
"snd-soc-dummy" codec from sound/soc/rockchip/rk3399_gru_sound.c.

Fixes: 99b04f4c4051 ("ASoC: add Component level pcm_new/pcm_free")
Signed-off-by: Brian Norris <briannorris@chromium.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/soc-core.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 6dca408faae334..2722bb0c557310 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -3326,7 +3326,10 @@ static int snd_soc_platform_drv_pcm_new(struct snd_soc_pcm_runtime *rtd)
 {
 	struct snd_soc_platform *platform = rtd->platform;
 
-	return platform->driver->pcm_new(rtd);
+	if (platform->driver->pcm_new)
+		return platform->driver->pcm_new(rtd);
+	else
+		return 0;
 }
 
 static void snd_soc_platform_drv_pcm_free(struct snd_pcm *pcm)
@@ -3334,7 +3337,8 @@ static void snd_soc_platform_drv_pcm_free(struct snd_pcm *pcm)
 	struct snd_soc_pcm_runtime *rtd = pcm->private_data;
 	struct snd_soc_platform *platform = rtd->platform;
 
-	platform->driver->pcm_free(pcm);
+	if (platform->driver->pcm_free)
+		platform->driver->pcm_free(pcm);
 }
 
 /**

From 05d8d34611139f8435af90ac54b65eb31e82e388 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= <rkrcmar@redhat.com>
Date: Tue, 7 Mar 2017 17:51:49 +0100
Subject: [PATCH 1030/1911] KVM: nVMX: do not warn when MSR bitmap address is
 not backed
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Before trying to do nested_get_page() in nested_vmx_merge_msr_bitmap(),
we have already checked that the MSR bitmap address is valid (4k aligned
and within physical limits).  SDM doesn't specify what happens if the
there is no memory mapped at the valid address, but Intel CPUs treat the
situation as if the bitmap was configured to trap all MSRs.

KVM already does that by returning false and a correct handling doesn't
need the guest-trigerrable warning that was reported by syzkaller:
(The warning was originally there to catch some possible bugs in nVMX.)

  ------------[ cut here ]------------
  WARNING: CPU: 0 PID: 7832 at arch/x86/kvm/vmx.c:9709
  nested_vmx_merge_msr_bitmap arch/x86/kvm/vmx.c:9709 [inline]
  WARNING: CPU: 0 PID: 7832 at arch/x86/kvm/vmx.c:9709
  nested_get_vmcs12_pages+0xfb6/0x15c0 arch/x86/kvm/vmx.c:9640
  Kernel panic - not syncing: panic_on_warn set ...
  CPU: 0 PID: 7832 Comm: syz-executor1 Not tainted 4.10.0+ #229
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
  Call Trace:
   __dump_stack lib/dump_stack.c:15 [inline]
   dump_stack+0x2ee/0x3ef lib/dump_stack.c:51
   panic+0x1fb/0x412 kernel/panic.c:179
   __warn+0x1c4/0x1e0 kernel/panic.c:540
   warn_slowpath_null+0x2c/0x40 kernel/panic.c:583
   nested_vmx_merge_msr_bitmap arch/x86/kvm/vmx.c:9709 [inline]
   nested_get_vmcs12_pages+0xfb6/0x15c0 arch/x86/kvm/vmx.c:9640
   enter_vmx_non_root_mode arch/x86/kvm/vmx.c:10471 [inline]
   nested_vmx_run+0x6186/0xaab0 arch/x86/kvm/vmx.c:10561
   handle_vmlaunch+0x1a/0x20 arch/x86/kvm/vmx.c:7312
   vmx_handle_exit+0xfc0/0x3f00 arch/x86/kvm/vmx.c:8526
   vcpu_enter_guest arch/x86/kvm/x86.c:6982 [inline]
   vcpu_run arch/x86/kvm/x86.c:7044 [inline]
   kvm_arch_vcpu_ioctl_run+0x1418/0x4840 arch/x86/kvm/x86.c:7205
   kvm_vcpu_ioctl+0x673/0x1120 arch/x86/kvm/../../../virt/kvm/kvm_main.c:2570

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
[Jim Mattson explained the bare metal behavior: "I believe this behavior
 would be documented in the chipset data sheet rather than the SDM,
 since the chipset returns all 1s for an unclaimed read."]
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/vmx.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index ab338581b3ecc2..98e82ee1e69966 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -9680,10 +9680,8 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
 		return false;
 
 	page = nested_get_page(vcpu, vmcs12->msr_bitmap);
-	if (!page) {
-		WARN_ON(1);
+	if (!page)
 		return false;
-	}
 	msr_bitmap_l1 = (unsigned long *)kmap(page);
 
 	memset(msr_bitmap_l0, 0xff, PAGE_SIZE);

From 9ad224744218a352964f31007a1420f2420a08a0 Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javier@osg.samsung.com>
Date: Thu, 9 Mar 2017 11:05:33 -0300
Subject: [PATCH 1031/1911] i2c: exynos5: Avoid transaction timeouts due
 TRANSFER_DONE_AUTO not set

After commit 7999eecb7e56 ("i2c: exynos5: fix arbitration lost handling"),
some I2C transactions are failing because the TRANSFER_DONE_AUTO field is
not set in the I2C_TRANS_STATUS register so the i2c->status value is left
to -EINVAL causing the i2c->msg_complete completion to never be signaled.

For example, when reading the time of an I2C rtc on an Exynos5800 machine:

$ cat /sys/class/rtc/rtc0/time
[   25.924594] exynos5-hsi2c 12e10000.i2c: rx timeout
[   65.028365] max77686-rtc max77802-rtc: Fail to read time reg(-22)
cat: /sys/class/rtc/rtc0/time: Invalid argument

The Exynos5422 manual states clearly that most I2C_TRANS_STATUS reg bits
(including TRANSFER_DONE_AUTO) are cleared after the register is read. So
reading has side effects and should only be done if HSI2C_INT_I2C was set.

Fixes: 7999eecb7e56 ("i2c: exynos5: fix arbitration lost handling")
Signed-off-by: Javier Martinez Canillas <javier@osg.samsung.com>
Reviewed-by: Andrzej Hajda <a.hajda@samsung.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-exynos5.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-exynos5.c b/drivers/i2c/busses/i2c-exynos5.c
index cbd93ce0661f22..736a8247210173 100644
--- a/drivers/i2c/busses/i2c-exynos5.c
+++ b/drivers/i2c/busses/i2c-exynos5.c
@@ -457,7 +457,6 @@ static irqreturn_t exynos5_i2c_irq(int irqno, void *dev_id)
 
 	int_status = readl(i2c->regs + HSI2C_INT_STATUS);
 	writel(int_status, i2c->regs + HSI2C_INT_STATUS);
-	trans_status = readl(i2c->regs + HSI2C_TRANS_STATUS);
 
 	/* handle interrupt related to the transfer status */
 	if (i2c->variant->hw == HSI2C_EXYNOS7) {
@@ -482,11 +481,13 @@ static irqreturn_t exynos5_i2c_irq(int irqno, void *dev_id)
 			goto stop;
 		}
 
+		trans_status = readl(i2c->regs + HSI2C_TRANS_STATUS);
 		if ((trans_status & HSI2C_MASTER_ST_MASK) == HSI2C_MASTER_ST_LOSE) {
 			i2c->state = -EAGAIN;
 			goto stop;
 		}
 	} else if (int_status & HSI2C_INT_I2C) {
+		trans_status = readl(i2c->regs + HSI2C_TRANS_STATUS);
 		if (trans_status & HSI2C_NO_DEV_ACK) {
 			dev_dbg(i2c->dev, "No ACK from device\n");
 			i2c->state = -ENXIO;

From 8ce0928e6867fda4d208d7bddf251bce96ab8431 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa@the-dreams.de>
Date: Thu, 9 Mar 2017 16:32:17 +0100
Subject: [PATCH 1032/1911] Revert "i2c: add missing of_node_put in
 i2c_mux_del_adapters"

This reverts commit 02dbfa5e5583523035f05636c614a0eca77f1aab. I grabbed
the wrong version from the list and will pull the proper one from Peter
Rosin's mux tree.

Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/i2c-mux.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/i2c/i2c-mux.c b/drivers/i2c/i2c-mux.c
index 2178266bca7948..83768e85a919cb 100644
--- a/drivers/i2c/i2c-mux.c
+++ b/drivers/i2c/i2c-mux.c
@@ -429,7 +429,6 @@ void i2c_mux_del_adapters(struct i2c_mux_core *muxc)
 	while (muxc->num_adapters) {
 		struct i2c_adapter *adap = muxc->adapter[--muxc->num_adapters];
 		struct i2c_mux_priv *priv = adap->algo_data;
-		struct device_node *np = adap->dev.of_node;
 
 		muxc->adapter[muxc->num_adapters] = NULL;
 
@@ -439,7 +438,6 @@ void i2c_mux_del_adapters(struct i2c_mux_core *muxc)
 
 		sysfs_remove_link(&priv->adap.dev.kobj, "mux_device");
 		i2c_del_adapter(adap);
-		of_node_put(np);
 		kfree(priv);
 	}
 }

From 806dbb20efde821910b5f747befed794077a9109 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa@the-dreams.de>
Date: Thu, 9 Mar 2017 16:41:48 +0100
Subject: [PATCH 1033/1911] Revert "i2c: copy device properties when using
 i2c_register_board_info()"

This reverts commit b0c1e95ab44feaad8831f2c06a3473c974003b49. It
contains a flaw and the next version has more features added which makes
me want to move it to the next cycle.

Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/i2c-boardinfo.c | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/drivers/i2c/i2c-boardinfo.c b/drivers/i2c/i2c-boardinfo.c
index 5b8f6c3a695074..6e5fac6a5262a0 100644
--- a/drivers/i2c/i2c-boardinfo.c
+++ b/drivers/i2c/i2c-boardinfo.c
@@ -15,7 +15,6 @@
 #include <linux/export.h>
 #include <linux/i2c.h>
 #include <linux/kernel.h>
-#include <linux/property.h>
 #include <linux/rwsem.h>
 #include <linux/slab.h>
 
@@ -56,7 +55,6 @@ EXPORT_SYMBOL_GPL(__i2c_first_dynamic_bus_num);
  *
  * The board info passed can safely be __initdata, but be careful of embedded
  * pointers (for platform_data, functions, etc) since that won't be copied.
- * Device properties are deep-copied though.
  */
 int i2c_register_board_info(int busnum, struct i2c_board_info const *info, unsigned len)
 {
@@ -80,14 +78,6 @@ int i2c_register_board_info(int busnum, struct i2c_board_info const *info, unsig
 
 		devinfo->busnum = busnum;
 		devinfo->board_info = *info;
-
-		if (info->properties) {
-			devinfo->board_info.properties =
-					property_entries_dup(info->properties);
-			if (IS_ERR(devinfo->board_info.properties))
-				return PTR_ERR(devinfo->board_info.properties);
-		}
-
 		list_add_tail(&devinfo->list, &__i2c_board_list);
 	}
 

From 6022c5cadf1a43ca30f431f128daa6163909ad60 Mon Sep 17 00:00:00 2001
From: Yuantian Tang <andy.tang@nxp.com>
Date: Thu, 9 Mar 2017 17:13:29 +0800
Subject: [PATCH 1034/1911] ahci: qoriq: correct the sata ecc setting error

Sata ecc is controlled by only 1 bit which is 24bit in big-endian
in ecc register. So only setting 24bit to disable sata ecc prevents
other bits from being overwritten in ecc register.

Signed-off-by: Tang Yuantian <andy.tang@nxp.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 drivers/ata/ahci_qoriq.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/ata/ahci_qoriq.c b/drivers/ata/ahci_qoriq.c
index 85d833289f28f8..4c96f3ac4976d9 100644
--- a/drivers/ata/ahci_qoriq.c
+++ b/drivers/ata/ahci_qoriq.c
@@ -177,7 +177,8 @@ static int ahci_qoriq_phy_init(struct ahci_host_priv *hpriv)
 	case AHCI_LS1043A:
 		if (!qpriv->ecc_addr)
 			return -EINVAL;
-		writel(ECC_DIS_ARMV8_CH2, qpriv->ecc_addr);
+		writel(readl(qpriv->ecc_addr) | ECC_DIS_ARMV8_CH2,
+				qpriv->ecc_addr);
 		writel(AHCI_PORT_PHY_1_CFG, reg_base + PORT_PHY1);
 		writel(AHCI_PORT_TRANS_CFG, reg_base + PORT_TRANS);
 		if (qpriv->is_dmacoherent)
@@ -194,7 +195,8 @@ static int ahci_qoriq_phy_init(struct ahci_host_priv *hpriv)
 	case AHCI_LS1046A:
 		if (!qpriv->ecc_addr)
 			return -EINVAL;
-		writel(ECC_DIS_ARMV8_CH2, qpriv->ecc_addr);
+		writel(readl(qpriv->ecc_addr) | ECC_DIS_ARMV8_CH2,
+				qpriv->ecc_addr);
 		writel(AHCI_PORT_PHY_1_CFG, reg_base + PORT_PHY1);
 		writel(AHCI_PORT_TRANS_CFG, reg_base + PORT_TRANS);
 		if (qpriv->is_dmacoherent)

From 94a631d91ad341b3b4bdac72d1104d9f090e0ca9 Mon Sep 17 00:00:00 2001
From: Chunfeng Yun <chunfeng.yun@mediatek.com>
Date: Thu, 9 Mar 2017 15:39:34 +0200
Subject: [PATCH 1035/1911] usb: xhci-mtk: check hcc_params after adding
 primary hcd

hcc_params is set in xhci_gen_setup() called from usb_add_hcd(),
so checks the Maximum Primary Stream Array Size in the hcc_params
register after adding primary hcd.

Signed-off-by: Chunfeng Yun <chunfeng.yun@mediatek.com>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-mtk.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/host/xhci-mtk.c b/drivers/usb/host/xhci-mtk.c
index 9066ec9e0c2e7a..6ac73a6b4da67c 100644
--- a/drivers/usb/host/xhci-mtk.c
+++ b/drivers/usb/host/xhci-mtk.c
@@ -678,13 +678,13 @@ static int xhci_mtk_probe(struct platform_device *pdev)
 		goto power_off_phys;
 	}
 
-	if (HCC_MAX_PSA(xhci->hcc_params) >= 4)
-		xhci->shared_hcd->can_do_streams = 1;
-
 	ret = usb_add_hcd(hcd, irq, IRQF_SHARED);
 	if (ret)
 		goto put_usb3_hcd;
 
+	if (HCC_MAX_PSA(xhci->hcc_params) >= 4)
+		xhci->shared_hcd->can_do_streams = 1;
+
 	ret = usb_add_hcd(xhci->shared_hcd, irq, IRQF_SHARED);
 	if (ret)
 		goto dealloc_usb2_hcd;

From 20e4e37e4a2f1bfd43bcc8c3e666e47665036cc3 Mon Sep 17 00:00:00 2001
From: Chunfeng Yun <chunfeng.yun@mediatek.com>
Date: Thu, 9 Mar 2017 15:39:35 +0200
Subject: [PATCH 1036/1911] usb: xhci: remove dummy extra_priv_size for size of
 xhci_hcd struct

because hcd_priv_size is already size of xhci_hcd struct,
extra_priv_size is not needed anymore for MTK and tegra drivers.

Signed-off-by: Chunfeng Yun <chunfeng.yun@mediatek.com>
Tested-by: Thierry Reding <treding@nvidia.com>
Acked-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-mtk.c   | 1 -
 drivers/usb/host/xhci-tegra.c | 1 -
 2 files changed, 2 deletions(-)

diff --git a/drivers/usb/host/xhci-mtk.c b/drivers/usb/host/xhci-mtk.c
index 6ac73a6b4da67c..67d5dc79b6b50e 100644
--- a/drivers/usb/host/xhci-mtk.c
+++ b/drivers/usb/host/xhci-mtk.c
@@ -382,7 +382,6 @@ static int usb_wakeup_of_property_parse(struct xhci_hcd_mtk *mtk,
 
 static int xhci_mtk_setup(struct usb_hcd *hcd);
 static const struct xhci_driver_overrides xhci_mtk_overrides __initconst = {
-	.extra_priv_size = sizeof(struct xhci_hcd),
 	.reset = xhci_mtk_setup,
 };
 
diff --git a/drivers/usb/host/xhci-tegra.c b/drivers/usb/host/xhci-tegra.c
index a59fafb4b329f5..74436f8ca5382f 100644
--- a/drivers/usb/host/xhci-tegra.c
+++ b/drivers/usb/host/xhci-tegra.c
@@ -1308,7 +1308,6 @@ static int tegra_xhci_setup(struct usb_hcd *hcd)
 }
 
 static const struct xhci_driver_overrides tegra_xhci_overrides __initconst = {
-	.extra_priv_size = sizeof(struct xhci_hcd),
 	.reset = tegra_xhci_setup,
 };
 

From f95e60a7dbecd2de816bb3ad517b3d4fbc20b507 Mon Sep 17 00:00:00 2001
From: Peter Chen <peter.chen@nxp.com>
Date: Thu, 9 Mar 2017 15:39:36 +0200
Subject: [PATCH 1037/1911] usb: host: xhci-dbg: HCIVERSION should be a binary
 number

According to xHCI spec, HCIVERSION containing a BCD encoding
of the xHCI specification revision number, 0100h corresponds
to xHCI version 1.0. Change "100" as "0x100".

Cc: Lu Baolu <baolu.lu@linux.intel.com>
Cc: stable <stable@vger.kernel.org>
Fixes: 04abb6de2825 ("xhci: Read and parse new xhci
	1.1 capability register")
Signed-off-by: Peter Chen <peter.chen@nxp.com>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-dbg.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/host/xhci-dbg.c b/drivers/usb/host/xhci-dbg.c
index 363d125300eacf..2b4a00fa735dfe 100644
--- a/drivers/usb/host/xhci-dbg.c
+++ b/drivers/usb/host/xhci-dbg.c
@@ -109,7 +109,7 @@ static void xhci_print_cap_regs(struct xhci_hcd *xhci)
 	xhci_dbg(xhci, "RTSOFF 0x%x:\n", temp & RTSOFF_MASK);
 
 	/* xhci 1.1 controllers have the HCCPARAMS2 register */
-	if (hci_version > 100) {
+	if (hci_version > 0x100) {
 		temp = readl(&xhci->cap_regs->hcc_params2);
 		xhci_dbg(xhci, "HCC PARAMS2 0x%x:\n", (unsigned int) temp);
 		xhci_dbg(xhci, "  HC %s Force save context capability",

From dcc7620cad5ad1326a78f4031a7bf4f0e5b42984 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Thu, 9 Mar 2017 15:39:37 +0200
Subject: [PATCH 1038/1911] usb: host: xhci-plat: Fix timeout on removal of hot
 pluggable xhci controllers

Upstream commit 98d74f9ceaef ("xhci: fix 10 second timeout on removal of
PCI hotpluggable xhci controllers") fixes a problem with hot pluggable PCI
xhci controllers which can result in excessive timeouts, to the point where
the system reports a deadlock.

The same problem is seen with hot pluggable xhci controllers using the
xhci-plat driver, such as the driver used for Type-C ports on rk3399.
Similar to hot-pluggable PCI controllers, the driver for this chip
removes the xhci controller from the system when the Type-C cable is
disconnected.

The solution for PCI devices works just as well for non-PCI devices
and avoids the problem.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-plat.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c
index 6d33b42ffcf522..bd02a6cd8e2c08 100644
--- a/drivers/usb/host/xhci-plat.c
+++ b/drivers/usb/host/xhci-plat.c
@@ -286,6 +286,8 @@ static int xhci_plat_remove(struct platform_device *dev)
 	struct xhci_hcd	*xhci = hcd_to_xhci(hcd);
 	struct clk *clk = xhci->clk;
 
+	xhci->xhc_state |= XHCI_STATE_REMOVING;
+
 	usb_remove_hcd(xhci->shared_hcd);
 	usb_phy_shutdown(hcd->usb_phy);
 

From 6d399783e9d4e9bd44931501948059d24ad96ff8 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@fb.com>
Date: Thu, 23 Feb 2017 12:26:41 -0800
Subject: [PATCH 1039/1911] md/raid10: submit bio directly to replacement disk

Commit 57c67df(md/raid10: submit IO from originating thread instead of
md thread) submits bio directly for normal disks but not for replacement
disks. There is no point we shouldn't do this for replacement disks.

Cc: NeilBrown <neilb@suse.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/raid10.c | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 063c43d83b72c2..1443305613c54e 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1477,11 +1477,24 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
 			mbio->bi_bdev = (void*)rdev;
 
 			atomic_inc(&r10_bio->remaining);
+
+			cb = blk_check_plugged(raid10_unplug, mddev,
+					       sizeof(*plug));
+			if (cb)
+				plug = container_of(cb, struct raid10_plug_cb,
+						    cb);
+			else
+				plug = NULL;
 			spin_lock_irqsave(&conf->device_lock, flags);
-			bio_list_add(&conf->pending_bio_list, mbio);
-			conf->pending_count++;
+			if (plug) {
+				bio_list_add(&plug->pending, mbio);
+				plug->pending_cnt++;
+			} else {
+				bio_list_add(&conf->pending_bio_list, mbio);
+				conf->pending_count++;
+			}
 			spin_unlock_irqrestore(&conf->device_lock, flags);
-			if (!mddev_check_plugged(mddev))
+			if (!plug)
 				md_wakeup_thread(mddev->thread);
 		}
 	}

From 99b3d74ec05c4a4c57766a90d65b53d78ab06404 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@fb.com>
Date: Thu, 23 Feb 2017 12:31:10 -0800
Subject: [PATCH 1040/1911] md: delete dead code

Nobody is using mddev_check_plugged(), so delete the dead code

Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/md.c | 8 --------
 drivers/md/md.h | 6 ------
 2 files changed, 14 deletions(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 548d1b8014f89e..82bd1f3d2b1903 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -440,14 +440,6 @@ void md_flush_request(struct mddev *mddev, struct bio *bio)
 }
 EXPORT_SYMBOL(md_flush_request);
 
-void md_unplug(struct blk_plug_cb *cb, bool from_schedule)
-{
-	struct mddev *mddev = cb->data;
-	md_wakeup_thread(mddev->thread);
-	kfree(cb);
-}
-EXPORT_SYMBOL(md_unplug);
-
 static inline struct mddev *mddev_get(struct mddev *mddev)
 {
 	atomic_inc(&mddev->active);
diff --git a/drivers/md/md.h b/drivers/md/md.h
index b8859cbf84b618..dde8ecb760c871 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -676,16 +676,10 @@ extern void mddev_resume(struct mddev *mddev);
 extern struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
 				   struct mddev *mddev);
 
-extern void md_unplug(struct blk_plug_cb *cb, bool from_schedule);
 extern void md_reload_sb(struct mddev *mddev, int raid_disk);
 extern void md_update_sb(struct mddev *mddev, int force);
 extern void md_kick_rdev_from_array(struct md_rdev * rdev);
 struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr);
-static inline int mddev_check_plugged(struct mddev *mddev)
-{
-	return !!blk_check_plugged(md_unplug, mddev,
-				   sizeof(struct blk_plug_cb));
-}
 
 static inline void rdev_dec_pending(struct md_rdev *rdev, struct mddev *mddev)
 {

From 9c8043f337f14d1743006dfc59c03e80a42e3884 Mon Sep 17 00:00:00 2001
From: Guoqing Jiang <gqjiang@suse.com>
Date: Fri, 24 Feb 2017 11:15:12 +0800
Subject: [PATCH 1041/1911] md-cluster: free md_cluster_info if node leave
 cluster

To avoid memory leak, we need to free the cinfo which
is allocated when node join cluster.

Reviewed-by: NeilBrown <neilb@suse.com>
Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/md-cluster.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index 2b13117fb918cb..ba7edcdd09cec7 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -974,6 +974,7 @@ static int leave(struct mddev *mddev)
 	lockres_free(cinfo->bitmap_lockres);
 	unlock_all_bitmaps(mddev);
 	dlm_release_lockspace(cinfo->lockspace, 2);
+	kfree(cinfo);
 	return 0;
 }
 

From 75df023f4f2188c21181996e28234fef9351ef45 Mon Sep 17 00:00:00 2001
From: Guoqing Jiang <gqjiang@suse.com>
Date: Fri, 24 Feb 2017 11:15:13 +0800
Subject: [PATCH 1042/1911] md-cluster: remove useless memset from
 gather_all_resync_info

This memset is not needed.  The lvb is already zeroed because
it was recently allocated by lockres_init, which uses kzalloc(),
and read_resync_info() doesn't need it to be zero anyway.

Reviewed-by: NeilBrown <neilb@suse.com>
Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/md-cluster.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index ba7edcdd09cec7..321ecac2302780 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -777,7 +777,6 @@ static int gather_all_resync_info(struct mddev *mddev, int total_slots)
 		bm_lockres->flags |= DLM_LKF_NOQUEUE;
 		ret = dlm_lock_sync(bm_lockres, DLM_LOCK_PW);
 		if (ret == -EAGAIN) {
-			memset(bm_lockres->lksb.sb_lvbptr, '\0', LVB_SIZE);
 			s = read_resync_info(mddev, bm_lockres);
 			if (s) {
 				pr_info("%s:%d Resync[%llu..%llu] in progress on %d\n",

From c94836342192b05d599d6aa3397f732f7a238689 Mon Sep 17 00:00:00 2001
From: Guoqing Jiang <gqjiang@suse.com>
Date: Fri, 24 Feb 2017 11:15:23 +0800
Subject: [PATCH 1043/1911] md: move funcs from pers->resize to update_size

raid1_resize and raid5_resize should also check the
mddev->queue if run underneath dm-raid.

And both set_capacity and revalidate_disk are used in
pers->resize such as raid1, raid10 and raid5. So
move them from personality file to common code.

Reviewed-by: NeilBrown <neilb@suse.com>
Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/md.c     | 8 ++++++--
 drivers/md/raid1.c  | 2 --
 drivers/md/raid10.c | 4 ----
 drivers/md/raid5.c  | 2 --
 4 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 82bd1f3d2b1903..bd15a18485c85c 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -6525,8 +6525,12 @@ static int update_size(struct mddev *mddev, sector_t num_sectors)
 			return -ENOSPC;
 	}
 	rv = mddev->pers->resize(mddev, num_sectors);
-	if (!rv)
-		revalidate_disk(mddev->gendisk);
+	if (!rv) {
+		if (mddev->queue) {
+			set_capacity(mddev->gendisk, mddev->array_sectors);
+			revalidate_disk(mddev->gendisk);
+		}
+	}
 	return rv;
 }
 
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index fbc2d7851b497f..10c3865e118674 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -3246,8 +3246,6 @@ static int raid1_resize(struct mddev *mddev, sector_t sectors)
 			return ret;
 	}
 	md_set_array_sectors(mddev, newsize);
-	set_capacity(mddev->gendisk, mddev->array_sectors);
-	revalidate_disk(mddev->gendisk);
 	if (sectors > mddev->dev_sectors &&
 	    mddev->recovery_cp > mddev->dev_sectors) {
 		mddev->recovery_cp = mddev->dev_sectors;
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 1443305613c54e..c4db6d1fb6a238 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -3956,10 +3956,6 @@ static int raid10_resize(struct mddev *mddev, sector_t sectors)
 			return ret;
 	}
 	md_set_array_sectors(mddev, size);
-	if (mddev->queue) {
-		set_capacity(mddev->gendisk, mddev->array_sectors);
-		revalidate_disk(mddev->gendisk);
-	}
 	if (sectors > mddev->dev_sectors &&
 	    mddev->recovery_cp > oldsize) {
 		mddev->recovery_cp = oldsize;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 4fb09b3fcb4104..6bfedfcf41c17a 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -7605,8 +7605,6 @@ static int raid5_resize(struct mddev *mddev, sector_t sectors)
 			return ret;
 	}
 	md_set_array_sectors(mddev, newsize);
-	set_capacity(mddev->gendisk, mddev->array_sectors);
-	revalidate_disk(mddev->gendisk);
 	if (sectors > mddev->dev_sectors &&
 	    mddev->recovery_cp > mddev->dev_sectors) {
 		mddev->recovery_cp = mddev->dev_sectors;

From 1b3bae49fba52f1ec499c36c53bc07761a9f6c4d Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Wed, 1 Mar 2017 07:31:28 +1100
Subject: [PATCH 1044/1911] md: don't impose the MD_SB_DISKS limit on arrays
 without metadata.

These arrays, created with "mdadm --build" don't benefit from a limit.
The default will be used, which is '0' and is interpreted as "don't
impose a limit".

Reported-by: ian_bruce@mail.ru
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/md.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index bd15a18485c85c..cd89ad3c3a0d2c 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -6450,11 +6450,10 @@ static int set_array_info(struct mddev *mddev, mdu_array_info_t *info)
 	mddev->layout        = info->layout;
 	mddev->chunk_sectors = info->chunk_size >> 9;
 
-	mddev->max_disks     = MD_SB_DISKS;
-
 	if (mddev->persistent) {
-		mddev->flags         = 0;
-		mddev->sb_flags         = 0;
+		mddev->max_disks = MD_SB_DISKS;
+		mddev->flags = 0;
+		mddev->sb_flags = 0;
 	}
 	set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
 

From 61eb2b43b99ebdc9bc6bc83d9792257b243e7cb3 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@fb.com>
Date: Tue, 28 Feb 2017 13:00:20 -0800
Subject: [PATCH 1045/1911] md/raid1/10: fix potential deadlock

Neil Brown pointed out a potential deadlock in raid 10 code with
bio_split/chain. The raid1 code could have the same issue, but recent
barrier rework makes it less likely to happen. The deadlock happens in
below sequence:

1. generic_make_request(bio), this will set current->bio_list
2. raid10_make_request will split bio to bio1 and bio2
3. __make_request(bio1), wait_barrer, add underlayer disk bio to
current->bio_list
4. __make_request(bio2), wait_barrer

If raise_barrier happens between 3 & 4, since wait_barrier runs at 3,
raise_barrier waits for IO completion from 3. And since raise_barrier
sets barrier, 4 waits for raise_barrier. But IO from 3 can't be
dispatched because raid10_make_request() doesn't finished yet.

The solution is to adjust the IO ordering. Quotes from Neil:
"
It is much safer to:

    if (need to split) {
        split = bio_split(bio, ...)
        bio_chain(...)
        make_request_fn(split);
        generic_make_request(bio);
   } else
        make_request_fn(mddev, bio);

This way we first process the initial section of the bio (in 'split')
which will queue some requests to the underlying devices.  These
requests will be queued in generic_make_request.
Then we queue the remainder of the bio, which will be added to the end
of the generic_make_request queue.
Then we return.
generic_make_request() will pop the lower-level device requests off the
queue and handle them first.  Then it will process the remainder
of the original bio once the first section has been fully processed.
"

Note, this only happens in read path. In write path, the bio is flushed to
underlaying disks either by blk flush (from schedule) or offladed to raid1/10d.
It's queued in current->bio_list.

Cc: Coly Li <colyli@suse.de>
Cc: stable@vger.kernel.org (v3.14+, only the raid10 part)
Suggested-by: NeilBrown <neilb@suse.com>
Reviewed-by: Jack Wang <jinpu.wang@profitbricks.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/raid1.c  | 25 +++++++++++++++++++++++--
 drivers/md/raid10.c | 18 ++++++++++++++++++
 2 files changed, 41 insertions(+), 2 deletions(-)

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 10c3865e118674..c33e96e33b8e2b 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1587,9 +1587,30 @@ static void raid1_make_request(struct mddev *mddev, struct bio *bio)
 			split = bio;
 		}
 
-		if (bio_data_dir(split) == READ)
+		if (bio_data_dir(split) == READ) {
 			raid1_read_request(mddev, split);
-		else
+
+			/*
+			 * If a bio is splitted, the first part of bio will
+			 * pass barrier but the bio is queued in
+			 * current->bio_list (see generic_make_request). If
+			 * there is a raise_barrier() called here, the second
+			 * part of bio can't pass barrier. But since the first
+			 * part bio isn't dispatched to underlaying disks yet,
+			 * the barrier is never released, hence raise_barrier
+			 * will alays wait. We have a deadlock.
+			 * Note, this only happens in read path. For write
+			 * path, the first part of bio is dispatched in a
+			 * schedule() call (because of blk plug) or offloaded
+			 * to raid10d.
+			 * Quitting from the function immediately can change
+			 * the bio order queued in bio_list and avoid the deadlock.
+			 */
+			if (split != bio) {
+				generic_make_request(bio);
+				break;
+			}
+		} else
 			raid1_write_request(mddev, split);
 	} while (split != bio);
 }
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index c4db6d1fb6a238..b1b1f982a722be 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1584,7 +1584,25 @@ static void raid10_make_request(struct mddev *mddev, struct bio *bio)
 			split = bio;
 		}
 
+		/*
+		 * If a bio is splitted, the first part of bio will pass
+		 * barrier but the bio is queued in current->bio_list (see
+		 * generic_make_request). If there is a raise_barrier() called
+		 * here, the second part of bio can't pass barrier. But since
+		 * the first part bio isn't dispatched to underlaying disks
+		 * yet, the barrier is never released, hence raise_barrier will
+		 * alays wait. We have a deadlock.
+		 * Note, this only happens in read path. For write path, the
+		 * first part of bio is dispatched in a schedule() call
+		 * (because of blk plug) or offloaded to raid10d.
+		 * Quitting from the function immediately can change the bio
+		 * order queued in bio_list and avoid the deadlock.
+		 */
 		__make_request(mddev, split);
+		if (split != bio && bio_data_dir(bio) == READ) {
+			generic_make_request(bio);
+			break;
+		}
 	} while (split != bio);
 
 	/* In case raid10d snuck in to freeze_array */

From 6e347b5e05ea2ac4ac467a5a1cfaebb2c7f06f80 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Thu, 9 Mar 2017 11:27:07 -0600
Subject: [PATCH 1046/1911] PCI: iproc: Save host bridge window resource in
 struct iproc_pcie
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The host bridge memory window resource is inserted into the iomem_resource
tree and cannot be deallocated until the host bridge itself is removed.

Previously, the window was on the stack, which meant the iomem_resource
entry pointed into the stack and was corrupted as soon as the probe
function returned, which caused memory corruption and errors like this:

  pcie_iproc_bcma bcma0:8: resource collision: [mem 0x40000000-0x47ffffff] conflicts with PCIe MEM space [mem 0x40000000-0x47ffffff]

Move the memory window resource from the stack into struct iproc_pcie so
its lifetime matches that of the host bridge.

Fixes: c3245a566400 ("PCI: iproc: Request host bridge window resources")
Reported-and-tested-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
CC: stable@vger.kernel.org	# v4.8+
---
 drivers/pci/host/pcie-iproc-bcma.c     | 24 ++++++++++++------------
 drivers/pci/host/pcie-iproc-platform.c | 19 ++++++++++---------
 drivers/pci/host/pcie-iproc.h          |  1 +
 3 files changed, 23 insertions(+), 21 deletions(-)

diff --git a/drivers/pci/host/pcie-iproc-bcma.c b/drivers/pci/host/pcie-iproc-bcma.c
index bd4c9ec25edc22..384c27e664fec8 100644
--- a/drivers/pci/host/pcie-iproc-bcma.c
+++ b/drivers/pci/host/pcie-iproc-bcma.c
@@ -44,8 +44,7 @@ static int iproc_pcie_bcma_probe(struct bcma_device *bdev)
 {
 	struct device *dev = &bdev->dev;
 	struct iproc_pcie *pcie;
-	LIST_HEAD(res);
-	struct resource res_mem;
+	LIST_HEAD(resources);
 	int ret;
 
 	pcie = devm_kzalloc(dev, sizeof(*pcie), GFP_KERNEL);
@@ -63,22 +62,23 @@ static int iproc_pcie_bcma_probe(struct bcma_device *bdev)
 
 	pcie->base_addr = bdev->addr;
 
-	res_mem.start = bdev->addr_s[0];
-	res_mem.end = bdev->addr_s[0] + SZ_128M - 1;
-	res_mem.name = "PCIe MEM space";
-	res_mem.flags = IORESOURCE_MEM;
-	pci_add_resource(&res, &res_mem);
+	pcie->mem.start = bdev->addr_s[0];
+	pcie->mem.end = bdev->addr_s[0] + SZ_128M - 1;
+	pcie->mem.name = "PCIe MEM space";
+	pcie->mem.flags = IORESOURCE_MEM;
+	pci_add_resource(&resources, &pcie->mem);
 
 	pcie->map_irq = iproc_pcie_bcma_map_irq;
 
-	ret = iproc_pcie_setup(pcie, &res);
-	if (ret)
+	ret = iproc_pcie_setup(pcie, &resources);
+	if (ret) {
 		dev_err(dev, "PCIe controller setup failed\n");
-
-	pci_free_resource_list(&res);
+		pci_free_resource_list(&resources);
+		return ret;
+	}
 
 	bcma_set_drvdata(bdev, pcie);
-	return ret;
+	return 0;
 }
 
 static void iproc_pcie_bcma_remove(struct bcma_device *bdev)
diff --git a/drivers/pci/host/pcie-iproc-platform.c b/drivers/pci/host/pcie-iproc-platform.c
index f4909bb0b2ad15..8c6a327ca6cdf8 100644
--- a/drivers/pci/host/pcie-iproc-platform.c
+++ b/drivers/pci/host/pcie-iproc-platform.c
@@ -51,7 +51,7 @@ static int iproc_pcie_pltfm_probe(struct platform_device *pdev)
 	struct device_node *np = dev->of_node;
 	struct resource reg;
 	resource_size_t iobase = 0;
-	LIST_HEAD(res);
+	LIST_HEAD(resources);
 	int ret;
 
 	pcie = devm_kzalloc(dev, sizeof(*pcie), GFP_KERNEL);
@@ -96,10 +96,10 @@ static int iproc_pcie_pltfm_probe(struct platform_device *pdev)
 		pcie->phy = NULL;
 	}
 
-	ret = of_pci_get_host_bridge_resources(np, 0, 0xff, &res, &iobase);
+	ret = of_pci_get_host_bridge_resources(np, 0, 0xff, &resources,
+					       &iobase);
 	if (ret) {
-		dev_err(dev,
-			"unable to get PCI host bridge resources\n");
+		dev_err(dev, "unable to get PCI host bridge resources\n");
 		return ret;
 	}
 
@@ -112,14 +112,15 @@ static int iproc_pcie_pltfm_probe(struct platform_device *pdev)
 		pcie->map_irq = of_irq_parse_and_map_pci;
 	}
 
-	ret = iproc_pcie_setup(pcie, &res);
-	if (ret)
+	ret = iproc_pcie_setup(pcie, &resources);
+	if (ret) {
 		dev_err(dev, "PCIe controller setup failed\n");
-
-	pci_free_resource_list(&res);
+		pci_free_resource_list(&resources);
+		return ret;
+	}
 
 	platform_set_drvdata(pdev, pcie);
-	return ret;
+	return 0;
 }
 
 static int iproc_pcie_pltfm_remove(struct platform_device *pdev)
diff --git a/drivers/pci/host/pcie-iproc.h b/drivers/pci/host/pcie-iproc.h
index 04fed8e907f12b..0bbe2ea44f3e15 100644
--- a/drivers/pci/host/pcie-iproc.h
+++ b/drivers/pci/host/pcie-iproc.h
@@ -90,6 +90,7 @@ struct iproc_pcie {
 #ifdef CONFIG_ARM
 	struct pci_sys_data sysdata;
 #endif
+	struct resource mem;
 	struct pci_bus *root_bus;
 	struct phy *phy;
 	int (*map_irq)(const struct pci_dev *, u8, u8);

From 6fb895692a034393d58679cd8d00c9e229719a5f Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Thu, 9 Mar 2017 17:24:02 +0300
Subject: [PATCH 1047/1911] x86/cpufeature: Add 5-level paging detection

Look for 'la57' in /proc/cpuinfo to see if your machine supports 5-level
paging.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/include/asm/cpufeatures.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 4e7772387c6e92..b04bb6dfed7f84 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -289,7 +289,8 @@
 #define X86_FEATURE_PKU		(16*32+ 3) /* Protection Keys for Userspace */
 #define X86_FEATURE_OSPKE	(16*32+ 4) /* OS Protection Keys Enable */
 #define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */
-#define X86_FEATURE_RDPID	(16*32+ 22) /* RDPID instruction */
+#define X86_FEATURE_LA57	(16*32+16) /* 5-level page tables */
+#define X86_FEATURE_RDPID	(16*32+22) /* RDPID instruction */
 
 /* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */
 #define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */

From 505a60e225606fbd3d2eadc31ff793d939ba66f1 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Thu, 9 Mar 2017 17:24:03 +0300
Subject: [PATCH 1048/1911] asm-generic: introduce 5level-fixup.h

We are going to switch core MM to 5-level paging abstraction.

This is preparation step which adds <asm-generic/5level-fixup.h>
As with 4level-fixup.h, the new header allows quickly make all
architectures compatible with 5-level paging in core MM.

In long run we would like to switch architectures to properly folded p4d
level by using <asm-generic/pgtable-nop4d.h>, but it requires more
changes to arch-specific code.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/4level-fixup.h |  3 ++-
 include/asm-generic/5level-fixup.h | 41 ++++++++++++++++++++++++++++++
 include/linux/mm.h                 |  3 +++
 3 files changed, 46 insertions(+), 1 deletion(-)
 create mode 100644 include/asm-generic/5level-fixup.h

diff --git a/include/asm-generic/4level-fixup.h b/include/asm-generic/4level-fixup.h
index 5bdab6bffd23cf..928fd66b127122 100644
--- a/include/asm-generic/4level-fixup.h
+++ b/include/asm-generic/4level-fixup.h
@@ -15,7 +15,6 @@
 	((unlikely(pgd_none(*(pud))) && __pmd_alloc(mm, pud, address))? \
  		NULL: pmd_offset(pud, address))
 
-#define pud_alloc(mm, pgd, address)	(pgd)
 #define pud_offset(pgd, start)		(pgd)
 #define pud_none(pud)			0
 #define pud_bad(pud)			0
@@ -35,4 +34,6 @@
 #undef  pud_addr_end
 #define pud_addr_end(addr, end)		(end)
 
+#include <asm-generic/5level-fixup.h>
+
 #endif
diff --git a/include/asm-generic/5level-fixup.h b/include/asm-generic/5level-fixup.h
new file mode 100644
index 00000000000000..b5ca82dc41753f
--- /dev/null
+++ b/include/asm-generic/5level-fixup.h
@@ -0,0 +1,41 @@
+#ifndef _5LEVEL_FIXUP_H
+#define _5LEVEL_FIXUP_H
+
+#define __ARCH_HAS_5LEVEL_HACK
+#define __PAGETABLE_P4D_FOLDED
+
+#define P4D_SHIFT			PGDIR_SHIFT
+#define P4D_SIZE			PGDIR_SIZE
+#define P4D_MASK			PGDIR_MASK
+#define PTRS_PER_P4D			1
+
+#define p4d_t				pgd_t
+
+#define pud_alloc(mm, p4d, address) \
+	((unlikely(pgd_none(*(p4d))) && __pud_alloc(mm, p4d, address)) ? \
+		NULL : pud_offset(p4d, address))
+
+#define p4d_alloc(mm, pgd, address)	(pgd)
+#define p4d_offset(pgd, start)		(pgd)
+#define p4d_none(p4d)			0
+#define p4d_bad(p4d)			0
+#define p4d_present(p4d)		1
+#define p4d_ERROR(p4d)			do { } while (0)
+#define p4d_clear(p4d)			pgd_clear(p4d)
+#define p4d_val(p4d)			pgd_val(p4d)
+#define p4d_populate(mm, p4d, pud)	pgd_populate(mm, p4d, pud)
+#define p4d_page(p4d)			pgd_page(p4d)
+#define p4d_page_vaddr(p4d)		pgd_page_vaddr(p4d)
+
+#define __p4d(x)			__pgd(x)
+#define set_p4d(p4dp, p4d)		set_pgd(p4dp, p4d)
+
+#undef p4d_free_tlb
+#define p4d_free_tlb(tlb, x, addr)	do { } while (0)
+#define p4d_free(mm, x)			do { } while (0)
+#define __p4d_free_tlb(tlb, x, addr)	do { } while (0)
+
+#undef  p4d_addr_end
+#define p4d_addr_end(addr, end)		(end)
+
+#endif
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0d65dd72c0f49e..be1fe264eb3731 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1619,11 +1619,14 @@ int __pte_alloc_kernel(pmd_t *pmd, unsigned long address);
  * Remove it when 4level-fixup.h has been removed.
  */
 #if defined(CONFIG_MMU) && !defined(__ARCH_HAS_4LEVEL_HACK)
+
+#ifndef __ARCH_HAS_5LEVEL_HACK
 static inline pud_t *pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
 {
 	return (unlikely(pgd_none(*pgd)) && __pud_alloc(mm, pgd, address))?
 		NULL: pud_offset(pgd, address);
 }
+#endif /* !__ARCH_HAS_5LEVEL_HACK */
 
 static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
 {

From 30ec842660bd0d056d4a7028ac5bd4a82b113d4f Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Thu, 9 Mar 2017 17:24:04 +0300
Subject: [PATCH 1049/1911] asm-generic: introduce __ARCH_USE_5LEVEL_HACK

We are going to introduce <asm-generic/pgtable-nop4d.h> to provide
abstraction for properly (in opposite to 5level-fixup.h hack) folded
p4d level. The new header will be included from pgtable-nopud.h.

If an architecture uses <asm-generic/nop*d.h>, we cannot use
5level-fixup.h directly to quickly convert the architecture to 5-level
paging as it would conflict with pgtable-nop4d.h.

With this patch an architecture can define __ARCH_USE_5LEVEL_HACK before
inclusion <asm-genenric/nop*d.h> to use 5level-fixup.h.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/pgtable-nop4d-hack.h | 62 ++++++++++++++++++++++++
 include/asm-generic/pgtable-nopud.h      |  5 ++
 2 files changed, 67 insertions(+)
 create mode 100644 include/asm-generic/pgtable-nop4d-hack.h

diff --git a/include/asm-generic/pgtable-nop4d-hack.h b/include/asm-generic/pgtable-nop4d-hack.h
new file mode 100644
index 00000000000000..752fb7511750e0
--- /dev/null
+++ b/include/asm-generic/pgtable-nop4d-hack.h
@@ -0,0 +1,62 @@
+#ifndef _PGTABLE_NOP4D_HACK_H
+#define _PGTABLE_NOP4D_HACK_H
+
+#ifndef __ASSEMBLY__
+#include <asm-generic/5level-fixup.h>
+
+#define __PAGETABLE_PUD_FOLDED
+
+/*
+ * Having the pud type consist of a pgd gets the size right, and allows
+ * us to conceptually access the pgd entry that this pud is folded into
+ * without casting.
+ */
+typedef struct { pgd_t pgd; } pud_t;
+
+#define PUD_SHIFT	PGDIR_SHIFT
+#define PTRS_PER_PUD	1
+#define PUD_SIZE	(1UL << PUD_SHIFT)
+#define PUD_MASK	(~(PUD_SIZE-1))
+
+/*
+ * The "pgd_xxx()" functions here are trivial for a folded two-level
+ * setup: the pud is never bad, and a pud always exists (as it's folded
+ * into the pgd entry)
+ */
+static inline int pgd_none(pgd_t pgd)		{ return 0; }
+static inline int pgd_bad(pgd_t pgd)		{ return 0; }
+static inline int pgd_present(pgd_t pgd)	{ return 1; }
+static inline void pgd_clear(pgd_t *pgd)	{ }
+#define pud_ERROR(pud)				(pgd_ERROR((pud).pgd))
+
+#define pgd_populate(mm, pgd, pud)		do { } while (0)
+/*
+ * (puds are folded into pgds so this doesn't get actually called,
+ * but the define is needed for a generic inline function.)
+ */
+#define set_pgd(pgdptr, pgdval)	set_pud((pud_t *)(pgdptr), (pud_t) { pgdval })
+
+static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address)
+{
+	return (pud_t *)pgd;
+}
+
+#define pud_val(x)				(pgd_val((x).pgd))
+#define __pud(x)				((pud_t) { __pgd(x) })
+
+#define pgd_page(pgd)				(pud_page((pud_t){ pgd }))
+#define pgd_page_vaddr(pgd)			(pud_page_vaddr((pud_t){ pgd }))
+
+/*
+ * allocating and freeing a pud is trivial: the 1-entry pud is
+ * inside the pgd, so has no extra memory associated with it.
+ */
+#define pud_alloc_one(mm, address)		NULL
+#define pud_free(mm, x)				do { } while (0)
+#define __pud_free_tlb(tlb, x, a)		do { } while (0)
+
+#undef  pud_addr_end
+#define pud_addr_end(addr, end)			(end)
+
+#endif /* __ASSEMBLY__ */
+#endif /* _PGTABLE_NOP4D_HACK_H */
diff --git a/include/asm-generic/pgtable-nopud.h b/include/asm-generic/pgtable-nopud.h
index 810431d8351b16..5e49430a30a404 100644
--- a/include/asm-generic/pgtable-nopud.h
+++ b/include/asm-generic/pgtable-nopud.h
@@ -3,6 +3,10 @@
 
 #ifndef __ASSEMBLY__
 
+#ifdef __ARCH_USE_5LEVEL_HACK
+#include <asm-generic/pgtable-nop4d-hack.h>
+#else
+
 #define __PAGETABLE_PUD_FOLDED
 
 /*
@@ -58,4 +62,5 @@ static inline pud_t * pud_offset(pgd_t * pgd, unsigned long address)
 #define pud_addr_end(addr, end)			(end)
 
 #endif /* __ASSEMBLY__ */
+#endif /* !__ARCH_USE_5LEVEL_HACK */
 #endif /* _PGTABLE_NOPUD_H */

From 9849a5697d3defb2087cb6b9be5573a142697889 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Thu, 9 Mar 2017 17:24:05 +0300
Subject: [PATCH 1050/1911] arch, mm: convert all architectures to use
 5level-fixup.h

If an architecture uses 4level-fixup.h we don't need to do anything as
it includes 5level-fixup.h.

If an architecture uses pgtable-nop*d.h, define __ARCH_USE_5LEVEL_HACK
before inclusion of the header. It makes asm-generic code to use
5level-fixup.h.

If an architecture has 4-level paging or folds levels on its own,
include 5level-fixup.h directly.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arc/include/asm/hugepage.h                  | 1 +
 arch/arc/include/asm/pgtable.h                   | 1 +
 arch/arm/include/asm/pgtable.h                   | 1 +
 arch/arm64/include/asm/pgtable-types.h           | 4 ++++
 arch/avr32/include/asm/pgtable-2level.h          | 1 +
 arch/cris/include/asm/pgtable.h                  | 1 +
 arch/frv/include/asm/pgtable.h                   | 1 +
 arch/h8300/include/asm/pgtable.h                 | 1 +
 arch/hexagon/include/asm/pgtable.h               | 1 +
 arch/ia64/include/asm/pgtable.h                  | 2 ++
 arch/metag/include/asm/pgtable.h                 | 1 +
 arch/microblaze/include/asm/page.h               | 3 ++-
 arch/mips/include/asm/pgtable-32.h               | 1 +
 arch/mips/include/asm/pgtable-64.h               | 1 +
 arch/mn10300/include/asm/page.h                  | 1 +
 arch/nios2/include/asm/pgtable.h                 | 1 +
 arch/openrisc/include/asm/pgtable.h              | 1 +
 arch/powerpc/include/asm/book3s/32/pgtable.h     | 1 +
 arch/powerpc/include/asm/book3s/64/pgtable.h     | 3 +++
 arch/powerpc/include/asm/nohash/32/pgtable.h     | 1 +
 arch/powerpc/include/asm/nohash/64/pgtable-4k.h  | 3 +++
 arch/powerpc/include/asm/nohash/64/pgtable-64k.h | 1 +
 arch/s390/include/asm/pgtable.h                  | 1 +
 arch/score/include/asm/pgtable.h                 | 1 +
 arch/sh/include/asm/pgtable-2level.h             | 1 +
 arch/sh/include/asm/pgtable-3level.h             | 1 +
 arch/sparc/include/asm/pgtable_64.h              | 1 +
 arch/tile/include/asm/pgtable_32.h               | 1 +
 arch/tile/include/asm/pgtable_64.h               | 1 +
 arch/um/include/asm/pgtable-2level.h             | 1 +
 arch/um/include/asm/pgtable-3level.h             | 1 +
 arch/unicore32/include/asm/pgtable.h             | 1 +
 arch/x86/include/asm/pgtable_types.h             | 4 ++++
 arch/xtensa/include/asm/pgtable.h                | 1 +
 34 files changed, 46 insertions(+), 1 deletion(-)

diff --git a/arch/arc/include/asm/hugepage.h b/arch/arc/include/asm/hugepage.h
index 317ff773e1ca5f..b18fcb60690822 100644
--- a/arch/arc/include/asm/hugepage.h
+++ b/arch/arc/include/asm/hugepage.h
@@ -11,6 +11,7 @@
 #define _ASM_ARC_HUGEPAGE_H
 
 #include <linux/types.h>
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 
 static inline pte_t pmd_pte(pmd_t pmd)
diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h
index e94ca72b974e7c..ee22d40afef43b 100644
--- a/arch/arc/include/asm/pgtable.h
+++ b/arch/arc/include/asm/pgtable.h
@@ -37,6 +37,7 @@
 
 #include <asm/page.h>
 #include <asm/mmu.h>
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 #include <linux/const.h>
 
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index a8d656d9aec715..1c462381c225ee 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -20,6 +20,7 @@
 
 #else
 
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopud.h>
 #include <asm/memory.h>
 #include <asm/pgtable-hwdef.h>
diff --git a/arch/arm64/include/asm/pgtable-types.h b/arch/arm64/include/asm/pgtable-types.h
index 69b2fd41503ca3..345a072b5856d4 100644
--- a/arch/arm64/include/asm/pgtable-types.h
+++ b/arch/arm64/include/asm/pgtable-types.h
@@ -55,9 +55,13 @@ typedef struct { pteval_t pgprot; } pgprot_t;
 #define __pgprot(x)	((pgprot_t) { (x) } )
 
 #if CONFIG_PGTABLE_LEVELS == 2
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 #elif CONFIG_PGTABLE_LEVELS == 3
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopud.h>
+#elif CONFIG_PGTABLE_LEVELS == 4
+#include <asm-generic/5level-fixup.h>
 #endif
 
 #endif	/* __ASM_PGTABLE_TYPES_H */
diff --git a/arch/avr32/include/asm/pgtable-2level.h b/arch/avr32/include/asm/pgtable-2level.h
index 425dd567b5b955..d5b1c63993ec29 100644
--- a/arch/avr32/include/asm/pgtable-2level.h
+++ b/arch/avr32/include/asm/pgtable-2level.h
@@ -8,6 +8,7 @@
 #ifndef __ASM_AVR32_PGTABLE_2LEVEL_H
 #define __ASM_AVR32_PGTABLE_2LEVEL_H
 
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 
 /*
diff --git a/arch/cris/include/asm/pgtable.h b/arch/cris/include/asm/pgtable.h
index 2a3210ba4c7204..fa3a73004cc570 100644
--- a/arch/cris/include/asm/pgtable.h
+++ b/arch/cris/include/asm/pgtable.h
@@ -6,6 +6,7 @@
 #define _CRIS_PGTABLE_H
 
 #include <asm/page.h>
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 
 #ifndef __ASSEMBLY__
diff --git a/arch/frv/include/asm/pgtable.h b/arch/frv/include/asm/pgtable.h
index a0513d463a1fa8..ab6e7e961b545c 100644
--- a/arch/frv/include/asm/pgtable.h
+++ b/arch/frv/include/asm/pgtable.h
@@ -16,6 +16,7 @@
 #ifndef _ASM_PGTABLE_H
 #define _ASM_PGTABLE_H
 
+#include <asm-generic/5level-fixup.h>
 #include <asm/mem-layout.h>
 #include <asm/setup.h>
 #include <asm/processor.h>
diff --git a/arch/h8300/include/asm/pgtable.h b/arch/h8300/include/asm/pgtable.h
index 8341db67821dd1..7d265d28ba5eec 100644
--- a/arch/h8300/include/asm/pgtable.h
+++ b/arch/h8300/include/asm/pgtable.h
@@ -1,5 +1,6 @@
 #ifndef _H8300_PGTABLE_H
 #define _H8300_PGTABLE_H
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopud.h>
 #include <asm-generic/pgtable.h>
 #define pgtable_cache_init()   do { } while (0)
diff --git a/arch/hexagon/include/asm/pgtable.h b/arch/hexagon/include/asm/pgtable.h
index 49eab8136ec307..24a9177fb897b6 100644
--- a/arch/hexagon/include/asm/pgtable.h
+++ b/arch/hexagon/include/asm/pgtable.h
@@ -26,6 +26,7 @@
  */
 #include <linux/swap.h>
 #include <asm/page.h>
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 
 /* A handy thing to have if one has the RAM. Declared in head.S */
diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h
index 384794e665fc4a..6cc22c8d8923e9 100644
--- a/arch/ia64/include/asm/pgtable.h
+++ b/arch/ia64/include/asm/pgtable.h
@@ -587,8 +587,10 @@ extern struct page *zero_page_memmap_ptr;
 
 
 #if CONFIG_PGTABLE_LEVELS == 3
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopud.h>
 #endif
+#include <asm-generic/5level-fixup.h>
 #include <asm-generic/pgtable.h>
 
 #endif /* _ASM_IA64_PGTABLE_H */
diff --git a/arch/metag/include/asm/pgtable.h b/arch/metag/include/asm/pgtable.h
index ffa3a3a2ecadda..0c151e5af07928 100644
--- a/arch/metag/include/asm/pgtable.h
+++ b/arch/metag/include/asm/pgtable.h
@@ -6,6 +6,7 @@
 #define _METAG_PGTABLE_H
 
 #include <asm/pgtable-bits.h>
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 
 /* Invalid regions on Meta: 0x00000000-0x001FFFFF and 0xFFFF0000-0xFFFFFFFF */
diff --git a/arch/microblaze/include/asm/page.h b/arch/microblaze/include/asm/page.h
index fd850879854dff..d506bb0893f94e 100644
--- a/arch/microblaze/include/asm/page.h
+++ b/arch/microblaze/include/asm/page.h
@@ -95,7 +95,8 @@ typedef struct { unsigned long pgd; } pgd_t;
 #   else /* CONFIG_MMU */
 typedef struct { unsigned long	ste[64]; }	pmd_t;
 typedef struct { pmd_t		pue[1]; }	pud_t;
-typedef struct { pud_t		pge[1]; }	pgd_t;
+typedef struct { pud_t		p4e[1]; }	p4d_t;
+typedef struct { p4d_t		pge[1]; }	pgd_t;
 #   endif /* CONFIG_MMU */
 
 # define pte_val(x)	((x).pte)
diff --git a/arch/mips/include/asm/pgtable-32.h b/arch/mips/include/asm/pgtable-32.h
index d21f3da7bdb619..6f94bed571c441 100644
--- a/arch/mips/include/asm/pgtable-32.h
+++ b/arch/mips/include/asm/pgtable-32.h
@@ -16,6 +16,7 @@
 #include <asm/cachectl.h>
 #include <asm/fixmap.h>
 
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 
 extern int temp_tlb_entry;
diff --git a/arch/mips/include/asm/pgtable-64.h b/arch/mips/include/asm/pgtable-64.h
index 514cbc0a6a6760..130a2a6c153156 100644
--- a/arch/mips/include/asm/pgtable-64.h
+++ b/arch/mips/include/asm/pgtable-64.h
@@ -17,6 +17,7 @@
 #include <asm/cachectl.h>
 #include <asm/fixmap.h>
 
+#define __ARCH_USE_5LEVEL_HACK
 #if defined(CONFIG_PAGE_SIZE_64KB) && !defined(CONFIG_MIPS_VA_BITS_48)
 #include <asm-generic/pgtable-nopmd.h>
 #else
diff --git a/arch/mn10300/include/asm/page.h b/arch/mn10300/include/asm/page.h
index 3810a6f740fdf6..dfe730a5ede04a 100644
--- a/arch/mn10300/include/asm/page.h
+++ b/arch/mn10300/include/asm/page.h
@@ -57,6 +57,7 @@ typedef struct page *pgtable_t;
 #define __pgd(x)	((pgd_t) { (x) })
 #define __pgprot(x)	((pgprot_t) { (x) })
 
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 
 #endif /* !__ASSEMBLY__ */
diff --git a/arch/nios2/include/asm/pgtable.h b/arch/nios2/include/asm/pgtable.h
index 298393c3cb426f..db4f7d17922078 100644
--- a/arch/nios2/include/asm/pgtable.h
+++ b/arch/nios2/include/asm/pgtable.h
@@ -22,6 +22,7 @@
 #include <asm/tlbflush.h>
 
 #include <asm/pgtable-bits.h>
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 
 #define FIRST_USER_ADDRESS	0UL
diff --git a/arch/openrisc/include/asm/pgtable.h b/arch/openrisc/include/asm/pgtable.h
index 3567aa7be55504..ff97374ca0693d 100644
--- a/arch/openrisc/include/asm/pgtable.h
+++ b/arch/openrisc/include/asm/pgtable.h
@@ -25,6 +25,7 @@
 #ifndef __ASM_OPENRISC_PGTABLE_H
 #define __ASM_OPENRISC_PGTABLE_H
 
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 
 #ifndef __ASSEMBLY__
diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h
index 01222363881556..26ed228d4dc6b7 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -1,6 +1,7 @@
 #ifndef _ASM_POWERPC_BOOK3S_32_PGTABLE_H
 #define _ASM_POWERPC_BOOK3S_32_PGTABLE_H
 
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 
 #include <asm/book3s/32/hash.h>
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 1eeeb72c70158a..13c39b6d5d64cb 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -1,9 +1,12 @@
 #ifndef _ASM_POWERPC_BOOK3S_64_PGTABLE_H_
 #define _ASM_POWERPC_BOOK3S_64_PGTABLE_H_
 
+#include <asm-generic/5level-fixup.h>
+
 #ifndef __ASSEMBLY__
 #include <linux/mmdebug.h>
 #endif
+
 /*
  * Common bits between hash and Radix page table
  */
diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h
index ba9921bf202e0c..5134ade2e85016 100644
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -1,6 +1,7 @@
 #ifndef _ASM_POWERPC_NOHASH_32_PGTABLE_H
 #define _ASM_POWERPC_NOHASH_32_PGTABLE_H
 
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 
 #ifndef __ASSEMBLY__
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable-4k.h b/arch/powerpc/include/asm/nohash/64/pgtable-4k.h
index d0db98793dd83d..9f4de0a1035efb 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable-4k.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable-4k.h
@@ -1,5 +1,8 @@
 #ifndef _ASM_POWERPC_NOHASH_64_PGTABLE_4K_H
 #define _ASM_POWERPC_NOHASH_64_PGTABLE_4K_H
+
+#include <asm-generic/5level-fixup.h>
+
 /*
  * Entries per page directory level.  The PTE level must use a 64b record
  * for each page table entry.  The PMD and PGD level use a 32b record for
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable-64k.h b/arch/powerpc/include/asm/nohash/64/pgtable-64k.h
index 55b28ef3409af5..1facb584dd2962 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable-64k.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable-64k.h
@@ -1,6 +1,7 @@
 #ifndef _ASM_POWERPC_NOHASH_64_PGTABLE_64K_H
 #define _ASM_POWERPC_NOHASH_64_PGTABLE_64K_H
 
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopud.h>
 
 
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 7ed1972b1920eb..93e37b12e88237 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -24,6 +24,7 @@
  * the S390 page table tree.
  */
 #ifndef __ASSEMBLY__
+#include <asm-generic/5level-fixup.h>
 #include <linux/sched.h>
 #include <linux/mm_types.h>
 #include <linux/page-flags.h>
diff --git a/arch/score/include/asm/pgtable.h b/arch/score/include/asm/pgtable.h
index 0553e5cd5985a0..46ff8fd678a75c 100644
--- a/arch/score/include/asm/pgtable.h
+++ b/arch/score/include/asm/pgtable.h
@@ -2,6 +2,7 @@
 #define _ASM_SCORE_PGTABLE_H
 
 #include <linux/const.h>
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 
 #include <asm/fixmap.h>
diff --git a/arch/sh/include/asm/pgtable-2level.h b/arch/sh/include/asm/pgtable-2level.h
index 19bd89db17e717..f75cf438725766 100644
--- a/arch/sh/include/asm/pgtable-2level.h
+++ b/arch/sh/include/asm/pgtable-2level.h
@@ -1,6 +1,7 @@
 #ifndef __ASM_SH_PGTABLE_2LEVEL_H
 #define __ASM_SH_PGTABLE_2LEVEL_H
 
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 
 /*
diff --git a/arch/sh/include/asm/pgtable-3level.h b/arch/sh/include/asm/pgtable-3level.h
index 249a985d96482e..9b1e776eca31be 100644
--- a/arch/sh/include/asm/pgtable-3level.h
+++ b/arch/sh/include/asm/pgtable-3level.h
@@ -1,6 +1,7 @@
 #ifndef __ASM_SH_PGTABLE_3LEVEL_H
 #define __ASM_SH_PGTABLE_3LEVEL_H
 
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopud.h>
 
 /*
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index 56e49c8f770d6b..8a598528ec1f04 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -12,6 +12,7 @@
  * the SpitFire page tables.
  */
 
+#include <asm-generic/5level-fixup.h>
 #include <linux/compiler.h>
 #include <linux/const.h>
 #include <asm/types.h>
diff --git a/arch/tile/include/asm/pgtable_32.h b/arch/tile/include/asm/pgtable_32.h
index d26a4227903683..5f8c615cb5e9bd 100644
--- a/arch/tile/include/asm/pgtable_32.h
+++ b/arch/tile/include/asm/pgtable_32.h
@@ -74,6 +74,7 @@ extern unsigned long VMALLOC_RESERVE /* = CONFIG_VMALLOC_RESERVE */;
 #define MAXMEM		(_VMALLOC_START - PAGE_OFFSET)
 
 /* We have no pmd or pud since we are strictly a two-level page table */
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 
 static inline int pud_huge_page(pud_t pud)	{ return 0; }
diff --git a/arch/tile/include/asm/pgtable_64.h b/arch/tile/include/asm/pgtable_64.h
index e96cec52f6d8aa..96fe58b451188a 100644
--- a/arch/tile/include/asm/pgtable_64.h
+++ b/arch/tile/include/asm/pgtable_64.h
@@ -59,6 +59,7 @@
 #ifndef __ASSEMBLY__
 
 /* We have no pud since we are a three-level page table. */
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopud.h>
 
 /*
diff --git a/arch/um/include/asm/pgtable-2level.h b/arch/um/include/asm/pgtable-2level.h
index cfbe597524698c..179c0ea87a0c3b 100644
--- a/arch/um/include/asm/pgtable-2level.h
+++ b/arch/um/include/asm/pgtable-2level.h
@@ -8,6 +8,7 @@
 #ifndef __UM_PGTABLE_2LEVEL_H
 #define __UM_PGTABLE_2LEVEL_H
 
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 
 /* PGDIR_SHIFT determines what a third-level page table entry can map */
diff --git a/arch/um/include/asm/pgtable-3level.h b/arch/um/include/asm/pgtable-3level.h
index bae8523a162fd3..c4d876dfb9acd1 100644
--- a/arch/um/include/asm/pgtable-3level.h
+++ b/arch/um/include/asm/pgtable-3level.h
@@ -7,6 +7,7 @@
 #ifndef __UM_PGTABLE_3LEVEL_H
 #define __UM_PGTABLE_3LEVEL_H
 
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopud.h>
 
 /* PGDIR_SHIFT determines what a third-level page table entry can map */
diff --git a/arch/unicore32/include/asm/pgtable.h b/arch/unicore32/include/asm/pgtable.h
index 818d0f5598e324..a4f2bef37e7069 100644
--- a/arch/unicore32/include/asm/pgtable.h
+++ b/arch/unicore32/include/asm/pgtable.h
@@ -12,6 +12,7 @@
 #ifndef __UNICORE_PGTABLE_H__
 #define __UNICORE_PGTABLE_H__
 
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 #include <asm/cpu-single.h>
 
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 8b4de22d64299e..62484333673d98 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -273,6 +273,8 @@ static inline pgdval_t pgd_flags(pgd_t pgd)
 }
 
 #if CONFIG_PGTABLE_LEVELS > 3
+#include <asm-generic/5level-fixup.h>
+
 typedef struct { pudval_t pud; } pud_t;
 
 static inline pud_t native_make_pud(pmdval_t val)
@@ -285,6 +287,7 @@ static inline pudval_t native_pud_val(pud_t pud)
 	return pud.pud;
 }
 #else
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopud.h>
 
 static inline pudval_t native_pud_val(pud_t pud)
@@ -306,6 +309,7 @@ static inline pmdval_t native_pmd_val(pmd_t pmd)
 	return pmd.pmd;
 }
 #else
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 
 static inline pmdval_t native_pmd_val(pmd_t pmd)
diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h
index 8aa0e0d9cbb21f..30dd5b2e4ad5af 100644
--- a/arch/xtensa/include/asm/pgtable.h
+++ b/arch/xtensa/include/asm/pgtable.h
@@ -11,6 +11,7 @@
 #ifndef _XTENSA_PGTABLE_H
 #define _XTENSA_PGTABLE_H
 
+#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 #include <asm/page.h>
 #include <asm/kmem_layout.h>

From 048456dcf2c56ad6f6248e2899dda92fb6a613f6 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Thu, 9 Mar 2017 17:24:06 +0300
Subject: [PATCH 1051/1911] asm-generic: introduce
 <asm-generic/pgtable-nop4d.h>

Like with pgtable-nopud.h for 4-level paging, this new header is base
for converting an architectures to properly folded p4d_t level.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/pgtable-nop4d.h | 56 +++++++++++++++++++++++++++++
 include/asm-generic/pgtable-nopud.h | 43 +++++++++++-----------
 include/asm-generic/tlb.h           | 14 ++++++--
 3 files changed, 89 insertions(+), 24 deletions(-)
 create mode 100644 include/asm-generic/pgtable-nop4d.h

diff --git a/include/asm-generic/pgtable-nop4d.h b/include/asm-generic/pgtable-nop4d.h
new file mode 100644
index 00000000000000..de364ecb8df68f
--- /dev/null
+++ b/include/asm-generic/pgtable-nop4d.h
@@ -0,0 +1,56 @@
+#ifndef _PGTABLE_NOP4D_H
+#define _PGTABLE_NOP4D_H
+
+#ifndef __ASSEMBLY__
+
+#define __PAGETABLE_P4D_FOLDED
+
+typedef struct { pgd_t pgd; } p4d_t;
+
+#define P4D_SHIFT	PGDIR_SHIFT
+#define PTRS_PER_P4D	1
+#define P4D_SIZE	(1UL << P4D_SHIFT)
+#define P4D_MASK	(~(P4D_SIZE-1))
+
+/*
+ * The "pgd_xxx()" functions here are trivial for a folded two-level
+ * setup: the p4d is never bad, and a p4d always exists (as it's folded
+ * into the pgd entry)
+ */
+static inline int pgd_none(pgd_t pgd)		{ return 0; }
+static inline int pgd_bad(pgd_t pgd)		{ return 0; }
+static inline int pgd_present(pgd_t pgd)	{ return 1; }
+static inline void pgd_clear(pgd_t *pgd)	{ }
+#define p4d_ERROR(p4d)				(pgd_ERROR((p4d).pgd))
+
+#define pgd_populate(mm, pgd, p4d)		do { } while (0)
+/*
+ * (p4ds are folded into pgds so this doesn't get actually called,
+ * but the define is needed for a generic inline function.)
+ */
+#define set_pgd(pgdptr, pgdval)	set_p4d((p4d_t *)(pgdptr), (p4d_t) { pgdval })
+
+static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
+{
+	return (p4d_t *)pgd;
+}
+
+#define p4d_val(x)				(pgd_val((x).pgd))
+#define __p4d(x)				((p4d_t) { __pgd(x) })
+
+#define pgd_page(pgd)				(p4d_page((p4d_t){ pgd }))
+#define pgd_page_vaddr(pgd)			(p4d_page_vaddr((p4d_t){ pgd }))
+
+/*
+ * allocating and freeing a p4d is trivial: the 1-entry p4d is
+ * inside the pgd, so has no extra memory associated with it.
+ */
+#define p4d_alloc_one(mm, address)		NULL
+#define p4d_free(mm, x)				do { } while (0)
+#define __p4d_free_tlb(tlb, x, a)		do { } while (0)
+
+#undef  p4d_addr_end
+#define p4d_addr_end(addr, end)			(end)
+
+#endif /* __ASSEMBLY__ */
+#endif /* _PGTABLE_NOP4D_H */
diff --git a/include/asm-generic/pgtable-nopud.h b/include/asm-generic/pgtable-nopud.h
index 5e49430a30a404..c2b9b96d6268f4 100644
--- a/include/asm-generic/pgtable-nopud.h
+++ b/include/asm-generic/pgtable-nopud.h
@@ -6,53 +6,54 @@
 #ifdef __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nop4d-hack.h>
 #else
+#include <asm-generic/pgtable-nop4d.h>
 
 #define __PAGETABLE_PUD_FOLDED
 
 /*
- * Having the pud type consist of a pgd gets the size right, and allows
- * us to conceptually access the pgd entry that this pud is folded into
+ * Having the pud type consist of a p4d gets the size right, and allows
+ * us to conceptually access the p4d entry that this pud is folded into
  * without casting.
  */
-typedef struct { pgd_t pgd; } pud_t;
+typedef struct { p4d_t p4d; } pud_t;
 
-#define PUD_SHIFT	PGDIR_SHIFT
+#define PUD_SHIFT	P4D_SHIFT
 #define PTRS_PER_PUD	1
 #define PUD_SIZE  	(1UL << PUD_SHIFT)
 #define PUD_MASK  	(~(PUD_SIZE-1))
 
 /*
- * The "pgd_xxx()" functions here are trivial for a folded two-level
+ * The "p4d_xxx()" functions here are trivial for a folded two-level
  * setup: the pud is never bad, and a pud always exists (as it's folded
- * into the pgd entry)
+ * into the p4d entry)
  */
-static inline int pgd_none(pgd_t pgd)		{ return 0; }
-static inline int pgd_bad(pgd_t pgd)		{ return 0; }
-static inline int pgd_present(pgd_t pgd)	{ return 1; }
-static inline void pgd_clear(pgd_t *pgd)	{ }
-#define pud_ERROR(pud)				(pgd_ERROR((pud).pgd))
+static inline int p4d_none(p4d_t p4d)		{ return 0; }
+static inline int p4d_bad(p4d_t p4d)		{ return 0; }
+static inline int p4d_present(p4d_t p4d)	{ return 1; }
+static inline void p4d_clear(p4d_t *p4d)	{ }
+#define pud_ERROR(pud)				(p4d_ERROR((pud).p4d))
 
-#define pgd_populate(mm, pgd, pud)		do { } while (0)
+#define p4d_populate(mm, p4d, pud)		do { } while (0)
 /*
- * (puds are folded into pgds so this doesn't get actually called,
+ * (puds are folded into p4ds so this doesn't get actually called,
  * but the define is needed for a generic inline function.)
  */
-#define set_pgd(pgdptr, pgdval)			set_pud((pud_t *)(pgdptr), (pud_t) { pgdval })
+#define set_p4d(p4dptr, p4dval)	set_pud((pud_t *)(p4dptr), (pud_t) { p4dval })
 
-static inline pud_t * pud_offset(pgd_t * pgd, unsigned long address)
+static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
 {
-	return (pud_t *)pgd;
+	return (pud_t *)p4d;
 }
 
-#define pud_val(x)				(pgd_val((x).pgd))
-#define __pud(x)				((pud_t) { __pgd(x) } )
+#define pud_val(x)				(p4d_val((x).p4d))
+#define __pud(x)				((pud_t) { __p4d(x) })
 
-#define pgd_page(pgd)				(pud_page((pud_t){ pgd }))
-#define pgd_page_vaddr(pgd)			(pud_page_vaddr((pud_t){ pgd }))
+#define p4d_page(p4d)				(pud_page((pud_t){ p4d }))
+#define p4d_page_vaddr(p4d)			(pud_page_vaddr((pud_t){ p4d }))
 
 /*
  * allocating and freeing a pud is trivial: the 1-entry pud is
- * inside the pgd, so has no extra memory associated with it.
+ * inside the p4d, so has no extra memory associated with it.
  */
 #define pud_alloc_one(mm, address)		NULL
 #define pud_free(mm, x)				do { } while (0)
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index 4329bc6ef04b7b..8afa4335e5b2bf 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -270,6 +270,12 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
 		__pte_free_tlb(tlb, ptep, address);		\
 	} while (0)
 
+#define pmd_free_tlb(tlb, pmdp, address)			\
+	do {							\
+		__tlb_adjust_range(tlb, address, PAGE_SIZE);		\
+		__pmd_free_tlb(tlb, pmdp, address);		\
+	} while (0)
+
 #ifndef __ARCH_HAS_4LEVEL_HACK
 #define pud_free_tlb(tlb, pudp, address)			\
 	do {							\
@@ -278,11 +284,13 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
 	} while (0)
 #endif
 
-#define pmd_free_tlb(tlb, pmdp, address)			\
+#ifndef __ARCH_HAS_5LEVEL_HACK
+#define p4d_free_tlb(tlb, pudp, address)			\
 	do {							\
-		__tlb_adjust_range(tlb, address, PAGE_SIZE);	\
-		__pmd_free_tlb(tlb, pmdp, address);		\
+		__tlb_adjust_range(tlb, address, PAGE_SIZE);		\
+		__p4d_free_tlb(tlb, pudp, address);		\
 	} while (0)
+#endif
 
 #define tlb_migrate_finish(mm) do {} while (0)
 

From c2febafc67734a62196c1b9dfba926412d4077ba Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Thu, 9 Mar 2017 17:24:07 +0300
Subject: [PATCH 1052/1911] mm: convert generic code to 5-level paging

Convert all non-architecture-specific code to 5-level paging.

It's mostly mechanical adding handling one more page table level in
places where we deal with pud_t.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/sgi-gru/grufault.c |   9 +-
 fs/userfaultfd.c                |   6 +-
 include/asm-generic/pgtable.h   |  48 +++++++-
 include/linux/hugetlb.h         |   5 +-
 include/linux/kasan.h           |   1 +
 include/linux/mm.h              |  31 ++++-
 lib/ioremap.c                   |  39 +++++-
 mm/gup.c                        |  46 ++++++-
 mm/huge_memory.c                |   7 +-
 mm/hugetlb.c                    |  29 +++--
 mm/kasan/kasan_init.c           |  44 ++++++-
 mm/memory.c                     | 207 ++++++++++++++++++++++++++------
 mm/mlock.c                      |   1 +
 mm/mprotect.c                   |  26 +++-
 mm/mremap.c                     |  13 +-
 mm/page_vma_mapped.c            |   6 +-
 mm/pagewalk.c                   |  32 ++++-
 mm/pgtable-generic.c            |   6 +
 mm/rmap.c                       |   7 +-
 mm/sparse-vmemmap.c             |  22 +++-
 mm/swapfile.c                   |  26 +++-
 mm/userfaultfd.c                |  23 ++--
 mm/vmalloc.c                    |  81 +++++++++----
 23 files changed, 595 insertions(+), 120 deletions(-)

diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c
index 6fb773dbcd0c32..93be82fc338ad8 100644
--- a/drivers/misc/sgi-gru/grufault.c
+++ b/drivers/misc/sgi-gru/grufault.c
@@ -219,15 +219,20 @@ static int atomic_pte_lookup(struct vm_area_struct *vma, unsigned long vaddr,
 	int write, unsigned long *paddr, int *pageshift)
 {
 	pgd_t *pgdp;
-	pmd_t *pmdp;
+	p4d_t *p4dp;
 	pud_t *pudp;
+	pmd_t *pmdp;
 	pte_t pte;
 
 	pgdp = pgd_offset(vma->vm_mm, vaddr);
 	if (unlikely(pgd_none(*pgdp)))
 		goto err;
 
-	pudp = pud_offset(pgdp, vaddr);
+	p4dp = p4d_offset(pgdp, vaddr);
+	if (unlikely(p4d_none(*p4dp)))
+		goto err;
+
+	pudp = pud_offset(p4dp, vaddr);
 	if (unlikely(pud_none(*pudp)))
 		goto err;
 
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 973607df579db3..02ce3944d0f555 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -267,6 +267,7 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx,
 {
 	struct mm_struct *mm = ctx->mm;
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd, _pmd;
 	pte_t *pte;
@@ -277,7 +278,10 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx,
 	pgd = pgd_offset(mm, address);
 	if (!pgd_present(*pgd))
 		goto out;
-	pud = pud_offset(pgd, address);
+	p4d = p4d_offset(pgd, address);
+	if (!p4d_present(*p4d))
+		goto out;
+	pud = pud_offset(p4d, address);
 	if (!pud_present(*pud))
 		goto out;
 	pmd = pmd_offset(pud, address);
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index f4ca23b158b3b7..1fad160f35de8e 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -10,9 +10,9 @@
 #include <linux/bug.h>
 #include <linux/errno.h>
 
-#if 4 - defined(__PAGETABLE_PUD_FOLDED) - defined(__PAGETABLE_PMD_FOLDED) != \
-	CONFIG_PGTABLE_LEVELS
-#error CONFIG_PGTABLE_LEVELS is not consistent with __PAGETABLE_{PUD,PMD}_FOLDED
+#if 5 - defined(__PAGETABLE_P4D_FOLDED) - defined(__PAGETABLE_PUD_FOLDED) - \
+	defined(__PAGETABLE_PMD_FOLDED) != CONFIG_PGTABLE_LEVELS
+#error CONFIG_PGTABLE_LEVELS is not consistent with __PAGETABLE_{P4D,PUD,PMD}_FOLDED
 #endif
 
 /*
@@ -424,6 +424,13 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
 	(__boundary - 1 < (end) - 1)? __boundary: (end);		\
 })
 
+#ifndef p4d_addr_end
+#define p4d_addr_end(addr, end)						\
+({	unsigned long __boundary = ((addr) + P4D_SIZE) & P4D_MASK;	\
+	(__boundary - 1 < (end) - 1)? __boundary: (end);		\
+})
+#endif
+
 #ifndef pud_addr_end
 #define pud_addr_end(addr, end)						\
 ({	unsigned long __boundary = ((addr) + PUD_SIZE) & PUD_MASK;	\
@@ -444,6 +451,7 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
  * Do the tests inline, but report and clear the bad entry in mm/memory.c.
  */
 void pgd_clear_bad(pgd_t *);
+void p4d_clear_bad(p4d_t *);
 void pud_clear_bad(pud_t *);
 void pmd_clear_bad(pmd_t *);
 
@@ -458,6 +466,17 @@ static inline int pgd_none_or_clear_bad(pgd_t *pgd)
 	return 0;
 }
 
+static inline int p4d_none_or_clear_bad(p4d_t *p4d)
+{
+	if (p4d_none(*p4d))
+		return 1;
+	if (unlikely(p4d_bad(*p4d))) {
+		p4d_clear_bad(p4d);
+		return 1;
+	}
+	return 0;
+}
+
 static inline int pud_none_or_clear_bad(pud_t *pud)
 {
 	if (pud_none(*pud))
@@ -844,11 +863,30 @@ static inline int pmd_protnone(pmd_t pmd)
 #endif /* CONFIG_MMU */
 
 #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
+
+#ifndef __PAGETABLE_P4D_FOLDED
+int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot);
+int p4d_clear_huge(p4d_t *p4d);
+#else
+static inline int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot)
+{
+	return 0;
+}
+static inline int p4d_clear_huge(p4d_t *p4d)
+{
+	return 0;
+}
+#endif /* !__PAGETABLE_P4D_FOLDED */
+
 int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot);
 int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot);
 int pud_clear_huge(pud_t *pud);
 int pmd_clear_huge(pmd_t *pmd);
 #else	/* !CONFIG_HAVE_ARCH_HUGE_VMAP */
+static inline int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot)
+{
+	return 0;
+}
 static inline int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
 {
 	return 0;
@@ -857,6 +895,10 @@ static inline int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
 {
 	return 0;
 }
+static inline int p4d_clear_huge(p4d_t *p4d)
+{
+	return 0;
+}
 static inline int pud_clear_huge(pud_t *pud)
 {
 	return 0;
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 503099d8aada53..b857fc8cc2ecae 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -122,7 +122,7 @@ struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
 struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address,
 				pud_t *pud, int flags);
 int pmd_huge(pmd_t pmd);
-int pud_huge(pud_t pmd);
+int pud_huge(pud_t pud);
 unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
 		unsigned long address, unsigned long end, pgprot_t newprot);
 
@@ -197,6 +197,9 @@ static inline void __unmap_hugepage_range(struct mmu_gather *tlb,
 #ifndef pgd_huge
 #define pgd_huge(x)	0
 #endif
+#ifndef p4d_huge
+#define p4d_huge(x)	0
+#endif
 
 #ifndef pgd_write
 static inline int pgd_write(pgd_t pgd)
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index ceb3fe78a0d39d..1c823bef4c1510 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -18,6 +18,7 @@ extern unsigned char kasan_zero_page[PAGE_SIZE];
 extern pte_t kasan_zero_pte[PTRS_PER_PTE];
 extern pmd_t kasan_zero_pmd[PTRS_PER_PMD];
 extern pud_t kasan_zero_pud[PTRS_PER_PUD];
+extern p4d_t kasan_zero_p4d[PTRS_PER_P4D];
 
 void kasan_populate_zero_shadow(const void *shadow_start,
 				const void *shadow_end);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index be1fe264eb3731..5f01c88f0800da 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1560,14 +1560,24 @@ static inline pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr,
 	return ptep;
 }
 
+#ifdef __PAGETABLE_P4D_FOLDED
+static inline int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd,
+						unsigned long address)
+{
+	return 0;
+}
+#else
+int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address);
+#endif
+
 #ifdef __PAGETABLE_PUD_FOLDED
-static inline int __pud_alloc(struct mm_struct *mm, pgd_t *pgd,
+static inline int __pud_alloc(struct mm_struct *mm, p4d_t *p4d,
 						unsigned long address)
 {
 	return 0;
 }
 #else
-int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address);
+int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address);
 #endif
 
 #if defined(__PAGETABLE_PMD_FOLDED) || !defined(CONFIG_MMU)
@@ -1621,10 +1631,18 @@ int __pte_alloc_kernel(pmd_t *pmd, unsigned long address);
 #if defined(CONFIG_MMU) && !defined(__ARCH_HAS_4LEVEL_HACK)
 
 #ifndef __ARCH_HAS_5LEVEL_HACK
-static inline pud_t *pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
+static inline p4d_t *p4d_alloc(struct mm_struct *mm, pgd_t *pgd,
+		unsigned long address)
+{
+	return (unlikely(pgd_none(*pgd)) && __p4d_alloc(mm, pgd, address)) ?
+		NULL : p4d_offset(pgd, address);
+}
+
+static inline pud_t *pud_alloc(struct mm_struct *mm, p4d_t *p4d,
+		unsigned long address)
 {
-	return (unlikely(pgd_none(*pgd)) && __pud_alloc(mm, pgd, address))?
-		NULL: pud_offset(pgd, address);
+	return (unlikely(p4d_none(*p4d)) && __pud_alloc(mm, p4d, address)) ?
+		NULL : pud_offset(p4d, address);
 }
 #endif /* !__ARCH_HAS_5LEVEL_HACK */
 
@@ -2388,7 +2406,8 @@ void sparse_mem_maps_populate_node(struct page **map_map,
 
 struct page *sparse_mem_map_populate(unsigned long pnum, int nid);
 pgd_t *vmemmap_pgd_populate(unsigned long addr, int node);
-pud_t *vmemmap_pud_populate(pgd_t *pgd, unsigned long addr, int node);
+p4d_t *vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node);
+pud_t *vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node);
 pmd_t *vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node);
 pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node);
 void *vmemmap_alloc_block(unsigned long size, int node);
diff --git a/lib/ioremap.c b/lib/ioremap.c
index a3e14ce92a5684..4bb30206b9426f 100644
--- a/lib/ioremap.c
+++ b/lib/ioremap.c
@@ -14,6 +14,7 @@
 #include <asm/pgtable.h>
 
 #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
+static int __read_mostly ioremap_p4d_capable;
 static int __read_mostly ioremap_pud_capable;
 static int __read_mostly ioremap_pmd_capable;
 static int __read_mostly ioremap_huge_disabled;
@@ -35,6 +36,11 @@ void __init ioremap_huge_init(void)
 	}
 }
 
+static inline int ioremap_p4d_enabled(void)
+{
+	return ioremap_p4d_capable;
+}
+
 static inline int ioremap_pud_enabled(void)
 {
 	return ioremap_pud_capable;
@@ -46,6 +52,7 @@ static inline int ioremap_pmd_enabled(void)
 }
 
 #else	/* !CONFIG_HAVE_ARCH_HUGE_VMAP */
+static inline int ioremap_p4d_enabled(void) { return 0; }
 static inline int ioremap_pud_enabled(void) { return 0; }
 static inline int ioremap_pmd_enabled(void) { return 0; }
 #endif	/* CONFIG_HAVE_ARCH_HUGE_VMAP */
@@ -94,14 +101,14 @@ static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr,
 	return 0;
 }
 
-static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr,
+static inline int ioremap_pud_range(p4d_t *p4d, unsigned long addr,
 		unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
 {
 	pud_t *pud;
 	unsigned long next;
 
 	phys_addr -= addr;
-	pud = pud_alloc(&init_mm, pgd, addr);
+	pud = pud_alloc(&init_mm, p4d, addr);
 	if (!pud)
 		return -ENOMEM;
 	do {
@@ -120,6 +127,32 @@ static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr,
 	return 0;
 }
 
+static inline int ioremap_p4d_range(pgd_t *pgd, unsigned long addr,
+		unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
+{
+	p4d_t *p4d;
+	unsigned long next;
+
+	phys_addr -= addr;
+	p4d = p4d_alloc(&init_mm, pgd, addr);
+	if (!p4d)
+		return -ENOMEM;
+	do {
+		next = p4d_addr_end(addr, end);
+
+		if (ioremap_p4d_enabled() &&
+		    ((next - addr) == P4D_SIZE) &&
+		    IS_ALIGNED(phys_addr + addr, P4D_SIZE)) {
+			if (p4d_set_huge(p4d, phys_addr + addr, prot))
+				continue;
+		}
+
+		if (ioremap_pud_range(p4d, addr, next, phys_addr + addr, prot))
+			return -ENOMEM;
+	} while (p4d++, addr = next, addr != end);
+	return 0;
+}
+
 int ioremap_page_range(unsigned long addr,
 		       unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
 {
@@ -135,7 +168,7 @@ int ioremap_page_range(unsigned long addr,
 	pgd = pgd_offset_k(addr);
 	do {
 		next = pgd_addr_end(addr, end);
-		err = ioremap_pud_range(pgd, addr, next, phys_addr+addr, prot);
+		err = ioremap_p4d_range(pgd, addr, next, phys_addr+addr, prot);
 		if (err)
 			break;
 	} while (pgd++, addr = next, addr != end);
diff --git a/mm/gup.c b/mm/gup.c
index 9c047e951aa3d0..c74bad1bf6e8f2 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -226,6 +226,7 @@ struct page *follow_page_mask(struct vm_area_struct *vma,
 			      unsigned int *page_mask)
 {
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd;
 	spinlock_t *ptl;
@@ -243,8 +244,13 @@ struct page *follow_page_mask(struct vm_area_struct *vma,
 	pgd = pgd_offset(mm, address);
 	if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
 		return no_page_table(vma, flags);
-
-	pud = pud_offset(pgd, address);
+	p4d = p4d_offset(pgd, address);
+	if (p4d_none(*p4d))
+		return no_page_table(vma, flags);
+	BUILD_BUG_ON(p4d_huge(*p4d));
+	if (unlikely(p4d_bad(*p4d)))
+		return no_page_table(vma, flags);
+	pud = pud_offset(p4d, address);
 	if (pud_none(*pud))
 		return no_page_table(vma, flags);
 	if (pud_huge(*pud) && vma->vm_flags & VM_HUGETLB) {
@@ -325,6 +331,7 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address,
 		struct page **page)
 {
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
@@ -338,7 +345,9 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address,
 	else
 		pgd = pgd_offset_gate(mm, address);
 	BUG_ON(pgd_none(*pgd));
-	pud = pud_offset(pgd, address);
+	p4d = p4d_offset(pgd, address);
+	BUG_ON(p4d_none(*p4d));
+	pud = pud_offset(p4d, address);
 	BUG_ON(pud_none(*pud));
 	pmd = pmd_offset(pud, address);
 	if (pmd_none(*pmd))
@@ -1400,13 +1409,13 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
 	return 1;
 }
 
-static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
+static int gup_pud_range(p4d_t p4d, unsigned long addr, unsigned long end,
 			 int write, struct page **pages, int *nr)
 {
 	unsigned long next;
 	pud_t *pudp;
 
-	pudp = pud_offset(&pgd, addr);
+	pudp = pud_offset(&p4d, addr);
 	do {
 		pud_t pud = READ_ONCE(*pudp);
 
@@ -1428,6 +1437,31 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
 	return 1;
 }
 
+static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end,
+			 int write, struct page **pages, int *nr)
+{
+	unsigned long next;
+	p4d_t *p4dp;
+
+	p4dp = p4d_offset(&pgd, addr);
+	do {
+		p4d_t p4d = READ_ONCE(*p4dp);
+
+		next = p4d_addr_end(addr, end);
+		if (p4d_none(p4d))
+			return 0;
+		BUILD_BUG_ON(p4d_huge(p4d));
+		if (unlikely(is_hugepd(__hugepd(p4d_val(p4d))))) {
+			if (!gup_huge_pd(__hugepd(p4d_val(p4d)), addr,
+					 P4D_SHIFT, next, write, pages, nr))
+				return 0;
+		} else if (!gup_p4d_range(p4d, addr, next, write, pages, nr))
+			return 0;
+	} while (p4dp++, addr = next, addr != end);
+
+	return 1;
+}
+
 /*
  * Like get_user_pages_fast() except it's IRQ-safe in that it won't fall back to
  * the regular GUP. It will only return non-negative values.
@@ -1478,7 +1512,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
 			if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
 					 PGDIR_SHIFT, next, write, pages, &nr))
 				break;
-		} else if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
+		} else if (!gup_p4d_range(pgd, addr, next, write, pages, &nr))
 			break;
 	} while (pgdp++, addr = next, addr != end);
 	local_irq_restore(flags);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index d36b2af4d1bf4b..e4766de257090c 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2048,6 +2048,7 @@ void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address,
 		bool freeze, struct page *page)
 {
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd;
 
@@ -2055,7 +2056,11 @@ void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address,
 	if (!pgd_present(*pgd))
 		return;
 
-	pud = pud_offset(pgd, address);
+	p4d = p4d_offset(pgd, address);
+	if (!p4d_present(*p4d))
+		return;
+
+	pud = pud_offset(p4d, address);
 	if (!pud_present(*pud))
 		return;
 
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index a7aa811b7d14c5..3d0aab9ee80d1f 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4555,7 +4555,8 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
 int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
 {
 	pgd_t *pgd = pgd_offset(mm, *addr);
-	pud_t *pud = pud_offset(pgd, *addr);
+	p4d_t *p4d = p4d_offset(pgd, *addr);
+	pud_t *pud = pud_offset(p4d, *addr);
 
 	BUG_ON(page_count(virt_to_page(ptep)) == 0);
 	if (page_count(virt_to_page(ptep)) == 1)
@@ -4586,11 +4587,13 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
 			unsigned long addr, unsigned long sz)
 {
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 	pte_t *pte = NULL;
 
 	pgd = pgd_offset(mm, addr);
-	pud = pud_alloc(mm, pgd, addr);
+	p4d = p4d_offset(pgd, addr);
+	pud = pud_alloc(mm, p4d, addr);
 	if (pud) {
 		if (sz == PUD_SIZE) {
 			pte = (pte_t *)pud;
@@ -4610,18 +4613,22 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
 pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
 {
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
-	pmd_t *pmd = NULL;
+	pmd_t *pmd;
 
 	pgd = pgd_offset(mm, addr);
-	if (pgd_present(*pgd)) {
-		pud = pud_offset(pgd, addr);
-		if (pud_present(*pud)) {
-			if (pud_huge(*pud))
-				return (pte_t *)pud;
-			pmd = pmd_offset(pud, addr);
-		}
-	}
+	if (!pgd_present(*pgd))
+		return NULL;
+	p4d = p4d_offset(pgd, addr);
+	if (!p4d_present(*p4d))
+		return NULL;
+	pud = pud_offset(p4d, addr);
+	if (!pud_present(*pud))
+		return NULL;
+	if (pud_huge(*pud))
+		return (pte_t *)pud;
+	pmd = pmd_offset(pud, addr);
 	return (pte_t *) pmd;
 }
 
diff --git a/mm/kasan/kasan_init.c b/mm/kasan/kasan_init.c
index 31238dad85fbc6..b96a5f773d8808 100644
--- a/mm/kasan/kasan_init.c
+++ b/mm/kasan/kasan_init.c
@@ -30,6 +30,9 @@
  */
 unsigned char kasan_zero_page[PAGE_SIZE] __page_aligned_bss;
 
+#if CONFIG_PGTABLE_LEVELS > 4
+p4d_t kasan_zero_p4d[PTRS_PER_P4D] __page_aligned_bss;
+#endif
 #if CONFIG_PGTABLE_LEVELS > 3
 pud_t kasan_zero_pud[PTRS_PER_PUD] __page_aligned_bss;
 #endif
@@ -82,10 +85,10 @@ static void __init zero_pmd_populate(pud_t *pud, unsigned long addr,
 	} while (pmd++, addr = next, addr != end);
 }
 
-static void __init zero_pud_populate(pgd_t *pgd, unsigned long addr,
+static void __init zero_pud_populate(p4d_t *p4d, unsigned long addr,
 				unsigned long end)
 {
-	pud_t *pud = pud_offset(pgd, addr);
+	pud_t *pud = pud_offset(p4d, addr);
 	unsigned long next;
 
 	do {
@@ -107,6 +110,23 @@ static void __init zero_pud_populate(pgd_t *pgd, unsigned long addr,
 	} while (pud++, addr = next, addr != end);
 }
 
+static void __init zero_p4d_populate(pgd_t *pgd, unsigned long addr,
+				unsigned long end)
+{
+	p4d_t *p4d = p4d_offset(pgd, addr);
+	unsigned long next;
+
+	do {
+		next = p4d_addr_end(addr, end);
+
+		if (p4d_none(*p4d)) {
+			p4d_populate(&init_mm, p4d,
+				early_alloc(PAGE_SIZE, NUMA_NO_NODE));
+		}
+		zero_pud_populate(p4d, addr, next);
+	} while (p4d++, addr = next, addr != end);
+}
+
 /**
  * kasan_populate_zero_shadow - populate shadow memory region with
  *                               kasan_zero_page
@@ -125,6 +145,7 @@ void __init kasan_populate_zero_shadow(const void *shadow_start,
 		next = pgd_addr_end(addr, end);
 
 		if (IS_ALIGNED(addr, PGDIR_SIZE) && end - addr >= PGDIR_SIZE) {
+			p4d_t *p4d;
 			pud_t *pud;
 			pmd_t *pmd;
 
@@ -135,9 +156,22 @@ void __init kasan_populate_zero_shadow(const void *shadow_start,
 			 * 3,2 - level page tables where we don't have
 			 * puds,pmds, so pgd_populate(), pud_populate()
 			 * is noops.
+			 *
+			 * The ifndef is required to avoid build breakage.
+			 *
+			 * With 5level-fixup.h, pgd_populate() is not nop and
+			 * we reference kasan_zero_p4d. It's not defined
+			 * unless 5-level paging enabled.
+			 *
+			 * The ifndef can be dropped once all KASAN-enabled
+			 * architectures will switch to pgtable-nop4d.h.
 			 */
-			pgd_populate(&init_mm, pgd, lm_alias(kasan_zero_pud));
-			pud = pud_offset(pgd, addr);
+#ifndef __ARCH_HAS_5LEVEL_HACK
+			pgd_populate(&init_mm, pgd, lm_alias(kasan_zero_p4d));
+#endif
+			p4d = p4d_offset(pgd, addr);
+			p4d_populate(&init_mm, p4d, lm_alias(kasan_zero_pud));
+			pud = pud_offset(p4d, addr);
 			pud_populate(&init_mm, pud, lm_alias(kasan_zero_pmd));
 			pmd = pmd_offset(pud, addr);
 			pmd_populate_kernel(&init_mm, pmd, lm_alias(kasan_zero_pte));
@@ -148,6 +182,6 @@ void __init kasan_populate_zero_shadow(const void *shadow_start,
 			pgd_populate(&init_mm, pgd,
 				early_alloc(PAGE_SIZE, NUMA_NO_NODE));
 		}
-		zero_pud_populate(pgd, addr, next);
+		zero_p4d_populate(pgd, addr, next);
 	} while (pgd++, addr = next, addr != end);
 }
diff --git a/mm/memory.c b/mm/memory.c
index a97a4cec2e1fcd..7f1c2163b3cea0 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -445,7 +445,7 @@ static inline void free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
 	mm_dec_nr_pmds(tlb->mm);
 }
 
-static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
+static inline void free_pud_range(struct mmu_gather *tlb, p4d_t *p4d,
 				unsigned long addr, unsigned long end,
 				unsigned long floor, unsigned long ceiling)
 {
@@ -454,7 +454,7 @@ static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
 	unsigned long start;
 
 	start = addr;
-	pud = pud_offset(pgd, addr);
+	pud = pud_offset(p4d, addr);
 	do {
 		next = pud_addr_end(addr, end);
 		if (pud_none_or_clear_bad(pud))
@@ -462,6 +462,39 @@ static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
 		free_pmd_range(tlb, pud, addr, next, floor, ceiling);
 	} while (pud++, addr = next, addr != end);
 
+	start &= P4D_MASK;
+	if (start < floor)
+		return;
+	if (ceiling) {
+		ceiling &= P4D_MASK;
+		if (!ceiling)
+			return;
+	}
+	if (end - 1 > ceiling - 1)
+		return;
+
+	pud = pud_offset(p4d, start);
+	p4d_clear(p4d);
+	pud_free_tlb(tlb, pud, start);
+}
+
+static inline void free_p4d_range(struct mmu_gather *tlb, pgd_t *pgd,
+				unsigned long addr, unsigned long end,
+				unsigned long floor, unsigned long ceiling)
+{
+	p4d_t *p4d;
+	unsigned long next;
+	unsigned long start;
+
+	start = addr;
+	p4d = p4d_offset(pgd, addr);
+	do {
+		next = p4d_addr_end(addr, end);
+		if (p4d_none_or_clear_bad(p4d))
+			continue;
+		free_pud_range(tlb, p4d, addr, next, floor, ceiling);
+	} while (p4d++, addr = next, addr != end);
+
 	start &= PGDIR_MASK;
 	if (start < floor)
 		return;
@@ -473,9 +506,9 @@ static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
 	if (end - 1 > ceiling - 1)
 		return;
 
-	pud = pud_offset(pgd, start);
+	p4d = p4d_offset(pgd, start);
 	pgd_clear(pgd);
-	pud_free_tlb(tlb, pud, start);
+	p4d_free_tlb(tlb, p4d, start);
 }
 
 /*
@@ -539,7 +572,7 @@ void free_pgd_range(struct mmu_gather *tlb,
 		next = pgd_addr_end(addr, end);
 		if (pgd_none_or_clear_bad(pgd))
 			continue;
-		free_pud_range(tlb, pgd, addr, next, floor, ceiling);
+		free_p4d_range(tlb, pgd, addr, next, floor, ceiling);
 	} while (pgd++, addr = next, addr != end);
 }
 
@@ -658,7 +691,8 @@ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr,
 			  pte_t pte, struct page *page)
 {
 	pgd_t *pgd = pgd_offset(vma->vm_mm, addr);
-	pud_t *pud = pud_offset(pgd, addr);
+	p4d_t *p4d = p4d_offset(pgd, addr);
+	pud_t *pud = pud_offset(p4d, addr);
 	pmd_t *pmd = pmd_offset(pud, addr);
 	struct address_space *mapping;
 	pgoff_t index;
@@ -1023,16 +1057,16 @@ static inline int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct *src
 }
 
 static inline int copy_pud_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
-		pgd_t *dst_pgd, pgd_t *src_pgd, struct vm_area_struct *vma,
+		p4d_t *dst_p4d, p4d_t *src_p4d, struct vm_area_struct *vma,
 		unsigned long addr, unsigned long end)
 {
 	pud_t *src_pud, *dst_pud;
 	unsigned long next;
 
-	dst_pud = pud_alloc(dst_mm, dst_pgd, addr);
+	dst_pud = pud_alloc(dst_mm, dst_p4d, addr);
 	if (!dst_pud)
 		return -ENOMEM;
-	src_pud = pud_offset(src_pgd, addr);
+	src_pud = pud_offset(src_p4d, addr);
 	do {
 		next = pud_addr_end(addr, end);
 		if (pud_trans_huge(*src_pud) || pud_devmap(*src_pud)) {
@@ -1056,6 +1090,28 @@ static inline int copy_pud_range(struct mm_struct *dst_mm, struct mm_struct *src
 	return 0;
 }
 
+static inline int copy_p4d_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+		pgd_t *dst_pgd, pgd_t *src_pgd, struct vm_area_struct *vma,
+		unsigned long addr, unsigned long end)
+{
+	p4d_t *src_p4d, *dst_p4d;
+	unsigned long next;
+
+	dst_p4d = p4d_alloc(dst_mm, dst_pgd, addr);
+	if (!dst_p4d)
+		return -ENOMEM;
+	src_p4d = p4d_offset(src_pgd, addr);
+	do {
+		next = p4d_addr_end(addr, end);
+		if (p4d_none_or_clear_bad(src_p4d))
+			continue;
+		if (copy_pud_range(dst_mm, src_mm, dst_p4d, src_p4d,
+						vma, addr, next))
+			return -ENOMEM;
+	} while (dst_p4d++, src_p4d++, addr = next, addr != end);
+	return 0;
+}
+
 int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 		struct vm_area_struct *vma)
 {
@@ -1111,7 +1167,7 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 		next = pgd_addr_end(addr, end);
 		if (pgd_none_or_clear_bad(src_pgd))
 			continue;
-		if (unlikely(copy_pud_range(dst_mm, src_mm, dst_pgd, src_pgd,
+		if (unlikely(copy_p4d_range(dst_mm, src_mm, dst_pgd, src_pgd,
 					    vma, addr, next))) {
 			ret = -ENOMEM;
 			break;
@@ -1267,14 +1323,14 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
 }
 
 static inline unsigned long zap_pud_range(struct mmu_gather *tlb,
-				struct vm_area_struct *vma, pgd_t *pgd,
+				struct vm_area_struct *vma, p4d_t *p4d,
 				unsigned long addr, unsigned long end,
 				struct zap_details *details)
 {
 	pud_t *pud;
 	unsigned long next;
 
-	pud = pud_offset(pgd, addr);
+	pud = pud_offset(p4d, addr);
 	do {
 		next = pud_addr_end(addr, end);
 		if (pud_trans_huge(*pud) || pud_devmap(*pud)) {
@@ -1295,6 +1351,25 @@ static inline unsigned long zap_pud_range(struct mmu_gather *tlb,
 	return addr;
 }
 
+static inline unsigned long zap_p4d_range(struct mmu_gather *tlb,
+				struct vm_area_struct *vma, pgd_t *pgd,
+				unsigned long addr, unsigned long end,
+				struct zap_details *details)
+{
+	p4d_t *p4d;
+	unsigned long next;
+
+	p4d = p4d_offset(pgd, addr);
+	do {
+		next = p4d_addr_end(addr, end);
+		if (p4d_none_or_clear_bad(p4d))
+			continue;
+		next = zap_pud_range(tlb, vma, p4d, addr, next, details);
+	} while (p4d++, addr = next, addr != end);
+
+	return addr;
+}
+
 void unmap_page_range(struct mmu_gather *tlb,
 			     struct vm_area_struct *vma,
 			     unsigned long addr, unsigned long end,
@@ -1310,7 +1385,7 @@ void unmap_page_range(struct mmu_gather *tlb,
 		next = pgd_addr_end(addr, end);
 		if (pgd_none_or_clear_bad(pgd))
 			continue;
-		next = zap_pud_range(tlb, vma, pgd, addr, next, details);
+		next = zap_p4d_range(tlb, vma, pgd, addr, next, details);
 	} while (pgd++, addr = next, addr != end);
 	tlb_end_vma(tlb, vma);
 }
@@ -1465,16 +1540,24 @@ EXPORT_SYMBOL_GPL(zap_vma_ptes);
 pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr,
 			spinlock_t **ptl)
 {
-	pgd_t *pgd = pgd_offset(mm, addr);
-	pud_t *pud = pud_alloc(mm, pgd, addr);
-	if (pud) {
-		pmd_t *pmd = pmd_alloc(mm, pud, addr);
-		if (pmd) {
-			VM_BUG_ON(pmd_trans_huge(*pmd));
-			return pte_alloc_map_lock(mm, pmd, addr, ptl);
-		}
-	}
-	return NULL;
+	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud;
+	pmd_t *pmd;
+
+	pgd = pgd_offset(mm, addr);
+	p4d = p4d_alloc(mm, pgd, addr);
+	if (!p4d)
+		return NULL;
+	pud = pud_alloc(mm, p4d, addr);
+	if (!pud)
+		return NULL;
+	pmd = pmd_alloc(mm, pud, addr);
+	if (!pmd)
+		return NULL;
+
+	VM_BUG_ON(pmd_trans_huge(*pmd));
+	return pte_alloc_map_lock(mm, pmd, addr, ptl);
 }
 
 /*
@@ -1740,7 +1823,7 @@ static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud,
 	return 0;
 }
 
-static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd,
+static inline int remap_pud_range(struct mm_struct *mm, p4d_t *p4d,
 			unsigned long addr, unsigned long end,
 			unsigned long pfn, pgprot_t prot)
 {
@@ -1748,7 +1831,7 @@ static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd,
 	unsigned long next;
 
 	pfn -= addr >> PAGE_SHIFT;
-	pud = pud_alloc(mm, pgd, addr);
+	pud = pud_alloc(mm, p4d, addr);
 	if (!pud)
 		return -ENOMEM;
 	do {
@@ -1760,6 +1843,26 @@ static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd,
 	return 0;
 }
 
+static inline int remap_p4d_range(struct mm_struct *mm, pgd_t *pgd,
+			unsigned long addr, unsigned long end,
+			unsigned long pfn, pgprot_t prot)
+{
+	p4d_t *p4d;
+	unsigned long next;
+
+	pfn -= addr >> PAGE_SHIFT;
+	p4d = p4d_alloc(mm, pgd, addr);
+	if (!p4d)
+		return -ENOMEM;
+	do {
+		next = p4d_addr_end(addr, end);
+		if (remap_pud_range(mm, p4d, addr, next,
+				pfn + (addr >> PAGE_SHIFT), prot))
+			return -ENOMEM;
+	} while (p4d++, addr = next, addr != end);
+	return 0;
+}
+
 /**
  * remap_pfn_range - remap kernel memory to userspace
  * @vma: user vma to map to
@@ -1816,7 +1919,7 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
 	flush_cache_range(vma, addr, end);
 	do {
 		next = pgd_addr_end(addr, end);
-		err = remap_pud_range(mm, pgd, addr, next,
+		err = remap_p4d_range(mm, pgd, addr, next,
 				pfn + (addr >> PAGE_SHIFT), prot);
 		if (err)
 			break;
@@ -1932,7 +2035,7 @@ static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud,
 	return err;
 }
 
-static int apply_to_pud_range(struct mm_struct *mm, pgd_t *pgd,
+static int apply_to_pud_range(struct mm_struct *mm, p4d_t *p4d,
 				     unsigned long addr, unsigned long end,
 				     pte_fn_t fn, void *data)
 {
@@ -1940,7 +2043,7 @@ static int apply_to_pud_range(struct mm_struct *mm, pgd_t *pgd,
 	unsigned long next;
 	int err;
 
-	pud = pud_alloc(mm, pgd, addr);
+	pud = pud_alloc(mm, p4d, addr);
 	if (!pud)
 		return -ENOMEM;
 	do {
@@ -1952,6 +2055,26 @@ static int apply_to_pud_range(struct mm_struct *mm, pgd_t *pgd,
 	return err;
 }
 
+static int apply_to_p4d_range(struct mm_struct *mm, pgd_t *pgd,
+				     unsigned long addr, unsigned long end,
+				     pte_fn_t fn, void *data)
+{
+	p4d_t *p4d;
+	unsigned long next;
+	int err;
+
+	p4d = p4d_alloc(mm, pgd, addr);
+	if (!p4d)
+		return -ENOMEM;
+	do {
+		next = p4d_addr_end(addr, end);
+		err = apply_to_pud_range(mm, p4d, addr, next, fn, data);
+		if (err)
+			break;
+	} while (p4d++, addr = next, addr != end);
+	return err;
+}
+
 /*
  * Scan a region of virtual memory, filling in page tables as necessary
  * and calling a provided function on each leaf page table.
@@ -1970,7 +2093,7 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
 	pgd = pgd_offset(mm, addr);
 	do {
 		next = pgd_addr_end(addr, end);
-		err = apply_to_pud_range(mm, pgd, addr, next, fn, data);
+		err = apply_to_p4d_range(mm, pgd, addr, next, fn, data);
 		if (err)
 			break;
 	} while (pgd++, addr = next, addr != end);
@@ -3653,11 +3776,15 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
 	};
 	struct mm_struct *mm = vma->vm_mm;
 	pgd_t *pgd;
+	p4d_t *p4d;
 	int ret;
 
 	pgd = pgd_offset(mm, address);
+	p4d = p4d_alloc(mm, pgd, address);
+	if (!p4d)
+		return VM_FAULT_OOM;
 
-	vmf.pud = pud_alloc(mm, pgd, address);
+	vmf.pud = pud_alloc(mm, p4d, address);
 	if (!vmf.pud)
 		return VM_FAULT_OOM;
 	if (pud_none(*vmf.pud) && transparent_hugepage_enabled(vma)) {
@@ -3784,7 +3911,7 @@ EXPORT_SYMBOL_GPL(handle_mm_fault);
  * Allocate page upper directory.
  * We've already handled the fast-path in-line.
  */
-int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
+int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address)
 {
 	pud_t *new = pud_alloc_one(mm, address);
 	if (!new)
@@ -3793,10 +3920,17 @@ int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
 	smp_wmb(); /* See comment in __pte_alloc */
 
 	spin_lock(&mm->page_table_lock);
-	if (pgd_present(*pgd))		/* Another has populated it */
+#ifndef __ARCH_HAS_5LEVEL_HACK
+	if (p4d_present(*p4d))		/* Another has populated it */
+		pud_free(mm, new);
+	else
+		p4d_populate(mm, p4d, new);
+#else
+	if (pgd_present(*p4d))		/* Another has populated it */
 		pud_free(mm, new);
 	else
-		pgd_populate(mm, pgd, new);
+		pgd_populate(mm, p4d, new);
+#endif /* __ARCH_HAS_5LEVEL_HACK */
 	spin_unlock(&mm->page_table_lock);
 	return 0;
 }
@@ -3839,6 +3973,7 @@ static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address,
 		pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp)
 {
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *ptep;
@@ -3847,7 +3982,11 @@ static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address,
 	if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
 		goto out;
 
-	pud = pud_offset(pgd, address);
+	p4d = p4d_offset(pgd, address);
+	if (p4d_none(*p4d) || unlikely(p4d_bad(*p4d)))
+		goto out;
+
+	pud = pud_offset(p4d, address);
 	if (pud_none(*pud) || unlikely(pud_bad(*pud)))
 		goto out;
 
diff --git a/mm/mlock.c b/mm/mlock.c
index 1050511f8b2bdb..945edac4681014 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -380,6 +380,7 @@ static unsigned long __munlock_pagevec_fill(struct pagevec *pvec,
 	pte = get_locked_pte(vma->vm_mm, start,	&ptl);
 	/* Make sure we do not cross the page table boundary */
 	end = pgd_addr_end(start, end);
+	end = p4d_addr_end(start, end);
 	end = pud_addr_end(start, end);
 	end = pmd_addr_end(start, end);
 
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 848e946b08e58e..8edd0d576254d4 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -193,14 +193,14 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
 }
 
 static inline unsigned long change_pud_range(struct vm_area_struct *vma,
-		pgd_t *pgd, unsigned long addr, unsigned long end,
+		p4d_t *p4d, unsigned long addr, unsigned long end,
 		pgprot_t newprot, int dirty_accountable, int prot_numa)
 {
 	pud_t *pud;
 	unsigned long next;
 	unsigned long pages = 0;
 
-	pud = pud_offset(pgd, addr);
+	pud = pud_offset(p4d, addr);
 	do {
 		next = pud_addr_end(addr, end);
 		if (pud_none_or_clear_bad(pud))
@@ -212,6 +212,26 @@ static inline unsigned long change_pud_range(struct vm_area_struct *vma,
 	return pages;
 }
 
+static inline unsigned long change_p4d_range(struct vm_area_struct *vma,
+		pgd_t *pgd, unsigned long addr, unsigned long end,
+		pgprot_t newprot, int dirty_accountable, int prot_numa)
+{
+	p4d_t *p4d;
+	unsigned long next;
+	unsigned long pages = 0;
+
+	p4d = p4d_offset(pgd, addr);
+	do {
+		next = p4d_addr_end(addr, end);
+		if (p4d_none_or_clear_bad(p4d))
+			continue;
+		pages += change_pud_range(vma, p4d, addr, next, newprot,
+				 dirty_accountable, prot_numa);
+	} while (p4d++, addr = next, addr != end);
+
+	return pages;
+}
+
 static unsigned long change_protection_range(struct vm_area_struct *vma,
 		unsigned long addr, unsigned long end, pgprot_t newprot,
 		int dirty_accountable, int prot_numa)
@@ -230,7 +250,7 @@ static unsigned long change_protection_range(struct vm_area_struct *vma,
 		next = pgd_addr_end(addr, end);
 		if (pgd_none_or_clear_bad(pgd))
 			continue;
-		pages += change_pud_range(vma, pgd, addr, next, newprot,
+		pages += change_p4d_range(vma, pgd, addr, next, newprot,
 				 dirty_accountable, prot_numa);
 	} while (pgd++, addr = next, addr != end);
 
diff --git a/mm/mremap.c b/mm/mremap.c
index 8233b0105c8258..cd8a1b199ef949 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -32,6 +32,7 @@
 static pmd_t *get_old_pmd(struct mm_struct *mm, unsigned long addr)
 {
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd;
 
@@ -39,7 +40,11 @@ static pmd_t *get_old_pmd(struct mm_struct *mm, unsigned long addr)
 	if (pgd_none_or_clear_bad(pgd))
 		return NULL;
 
-	pud = pud_offset(pgd, addr);
+	p4d = p4d_offset(pgd, addr);
+	if (p4d_none_or_clear_bad(p4d))
+		return NULL;
+
+	pud = pud_offset(p4d, addr);
 	if (pud_none_or_clear_bad(pud))
 		return NULL;
 
@@ -54,11 +59,15 @@ static pmd_t *alloc_new_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
 			    unsigned long addr)
 {
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd;
 
 	pgd = pgd_offset(mm, addr);
-	pud = pud_alloc(mm, pgd, addr);
+	p4d = p4d_alloc(mm, pgd, addr);
+	if (!p4d)
+		return NULL;
+	pud = pud_alloc(mm, p4d, addr);
 	if (!pud)
 		return NULL;
 
diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
index a23001a22c1518..c4c9def8ffea47 100644
--- a/mm/page_vma_mapped.c
+++ b/mm/page_vma_mapped.c
@@ -104,6 +104,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
 	struct mm_struct *mm = pvmw->vma->vm_mm;
 	struct page *page = pvmw->page;
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 
 	/* The only possible pmd mapping has been handled on last iteration */
@@ -133,7 +134,10 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
 	pgd = pgd_offset(mm, pvmw->address);
 	if (!pgd_present(*pgd))
 		return false;
-	pud = pud_offset(pgd, pvmw->address);
+	p4d = p4d_offset(pgd, pvmw->address);
+	if (!p4d_present(*p4d))
+		return false;
+	pud = pud_offset(p4d, pvmw->address);
 	if (!pud_present(*pud))
 		return false;
 	pvmw->pmd = pmd_offset(pud, pvmw->address);
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index 03761577ae86e4..60f7856e508fb9 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -69,14 +69,14 @@ static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
 	return err;
 }
 
-static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end,
+static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end,
 			  struct mm_walk *walk)
 {
 	pud_t *pud;
 	unsigned long next;
 	int err = 0;
 
-	pud = pud_offset(pgd, addr);
+	pud = pud_offset(p4d, addr);
 	do {
  again:
 		next = pud_addr_end(addr, end);
@@ -113,6 +113,32 @@ static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end,
 	return err;
 }
 
+static int walk_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end,
+			  struct mm_walk *walk)
+{
+	p4d_t *p4d;
+	unsigned long next;
+	int err = 0;
+
+	p4d = p4d_offset(pgd, addr);
+	do {
+		next = p4d_addr_end(addr, end);
+		if (p4d_none_or_clear_bad(p4d)) {
+			if (walk->pte_hole)
+				err = walk->pte_hole(addr, next, walk);
+			if (err)
+				break;
+			continue;
+		}
+		if (walk->pmd_entry || walk->pte_entry)
+			err = walk_pud_range(p4d, addr, next, walk);
+		if (err)
+			break;
+	} while (p4d++, addr = next, addr != end);
+
+	return err;
+}
+
 static int walk_pgd_range(unsigned long addr, unsigned long end,
 			  struct mm_walk *walk)
 {
@@ -131,7 +157,7 @@ static int walk_pgd_range(unsigned long addr, unsigned long end,
 			continue;
 		}
 		if (walk->pmd_entry || walk->pte_entry)
-			err = walk_pud_range(pgd, addr, next, walk);
+			err = walk_p4d_range(pgd, addr, next, walk);
 		if (err)
 			break;
 	} while (pgd++, addr = next, addr != end);
diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c
index 4ed5908c65b0f1..c99d9512a45b8a 100644
--- a/mm/pgtable-generic.c
+++ b/mm/pgtable-generic.c
@@ -22,6 +22,12 @@ void pgd_clear_bad(pgd_t *pgd)
 	pgd_clear(pgd);
 }
 
+void p4d_clear_bad(p4d_t *p4d)
+{
+	p4d_ERROR(*p4d);
+	p4d_clear(p4d);
+}
+
 void pud_clear_bad(pud_t *pud)
 {
 	pud_ERROR(*pud);
diff --git a/mm/rmap.c b/mm/rmap.c
index 2da487d6cea83b..2984403a24247b 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -684,6 +684,7 @@ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
 pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address)
 {
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd = NULL;
 	pmd_t pmde;
@@ -692,7 +693,11 @@ pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address)
 	if (!pgd_present(*pgd))
 		goto out;
 
-	pud = pud_offset(pgd, address);
+	p4d = p4d_offset(pgd, address);
+	if (!p4d_present(*p4d))
+		goto out;
+
+	pud = pud_offset(p4d, address);
 	if (!pud_present(*pud))
 		goto out;
 
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index 574c67b663fe8a..a56c3989f77312 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -196,9 +196,9 @@ pmd_t * __meminit vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node)
 	return pmd;
 }
 
-pud_t * __meminit vmemmap_pud_populate(pgd_t *pgd, unsigned long addr, int node)
+pud_t * __meminit vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node)
 {
-	pud_t *pud = pud_offset(pgd, addr);
+	pud_t *pud = pud_offset(p4d, addr);
 	if (pud_none(*pud)) {
 		void *p = vmemmap_alloc_block(PAGE_SIZE, node);
 		if (!p)
@@ -208,6 +208,18 @@ pud_t * __meminit vmemmap_pud_populate(pgd_t *pgd, unsigned long addr, int node)
 	return pud;
 }
 
+p4d_t * __meminit vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node)
+{
+	p4d_t *p4d = p4d_offset(pgd, addr);
+	if (p4d_none(*p4d)) {
+		void *p = vmemmap_alloc_block(PAGE_SIZE, node);
+		if (!p)
+			return NULL;
+		p4d_populate(&init_mm, p4d, p);
+	}
+	return p4d;
+}
+
 pgd_t * __meminit vmemmap_pgd_populate(unsigned long addr, int node)
 {
 	pgd_t *pgd = pgd_offset_k(addr);
@@ -225,6 +237,7 @@ int __meminit vmemmap_populate_basepages(unsigned long start,
 {
 	unsigned long addr = start;
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
@@ -233,7 +246,10 @@ int __meminit vmemmap_populate_basepages(unsigned long start,
 		pgd = vmemmap_pgd_populate(addr, node);
 		if (!pgd)
 			return -ENOMEM;
-		pud = vmemmap_pud_populate(pgd, addr, node);
+		p4d = vmemmap_p4d_populate(pgd, addr, node);
+		if (!p4d)
+			return -ENOMEM;
+		pud = vmemmap_pud_populate(p4d, addr, node);
 		if (!pud)
 			return -ENOMEM;
 		pmd = vmemmap_pmd_populate(pud, addr, node);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 521ef9b6064fea..178130880b9085 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1517,7 +1517,7 @@ static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud,
 	return 0;
 }
 
-static inline int unuse_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
+static inline int unuse_pud_range(struct vm_area_struct *vma, p4d_t *p4d,
 				unsigned long addr, unsigned long end,
 				swp_entry_t entry, struct page *page)
 {
@@ -1525,7 +1525,7 @@ static inline int unuse_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
 	unsigned long next;
 	int ret;
 
-	pud = pud_offset(pgd, addr);
+	pud = pud_offset(p4d, addr);
 	do {
 		next = pud_addr_end(addr, end);
 		if (pud_none_or_clear_bad(pud))
@@ -1537,6 +1537,26 @@ static inline int unuse_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
 	return 0;
 }
 
+static inline int unuse_p4d_range(struct vm_area_struct *vma, pgd_t *pgd,
+				unsigned long addr, unsigned long end,
+				swp_entry_t entry, struct page *page)
+{
+	p4d_t *p4d;
+	unsigned long next;
+	int ret;
+
+	p4d = p4d_offset(pgd, addr);
+	do {
+		next = p4d_addr_end(addr, end);
+		if (p4d_none_or_clear_bad(p4d))
+			continue;
+		ret = unuse_pud_range(vma, p4d, addr, next, entry, page);
+		if (ret)
+			return ret;
+	} while (p4d++, addr = next, addr != end);
+	return 0;
+}
+
 static int unuse_vma(struct vm_area_struct *vma,
 				swp_entry_t entry, struct page *page)
 {
@@ -1560,7 +1580,7 @@ static int unuse_vma(struct vm_area_struct *vma,
 		next = pgd_addr_end(addr, end);
 		if (pgd_none_or_clear_bad(pgd))
 			continue;
-		ret = unuse_pud_range(vma, pgd, addr, next, entry, page);
+		ret = unuse_p4d_range(vma, pgd, addr, next, entry, page);
 		if (ret)
 			return ret;
 	} while (pgd++, addr = next, addr != end);
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 479e631d43c2f6..8bcb501bce60b8 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -128,19 +128,22 @@ static int mfill_zeropage_pte(struct mm_struct *dst_mm,
 static pmd_t *mm_alloc_pmd(struct mm_struct *mm, unsigned long address)
 {
 	pgd_t *pgd;
+	p4d_t *p4d;
 	pud_t *pud;
-	pmd_t *pmd = NULL;
 
 	pgd = pgd_offset(mm, address);
-	pud = pud_alloc(mm, pgd, address);
-	if (pud)
-		/*
-		 * Note that we didn't run this because the pmd was
-		 * missing, the *pmd may be already established and in
-		 * turn it may also be a trans_huge_pmd.
-		 */
-		pmd = pmd_alloc(mm, pud, address);
-	return pmd;
+	p4d = p4d_alloc(mm, pgd, address);
+	if (!p4d)
+		return NULL;
+	pud = pud_alloc(mm, p4d, address);
+	if (!pud)
+		return NULL;
+	/*
+	 * Note that we didn't run this because the pmd was
+	 * missing, the *pmd may be already established and in
+	 * turn it may also be a trans_huge_pmd.
+	 */
+	return pmd_alloc(mm, pud, address);
 }
 
 #ifdef CONFIG_HUGETLB_PAGE
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index b4024d688f3869..0dd80222b20bbd 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -86,12 +86,12 @@ static void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end)
 	} while (pmd++, addr = next, addr != end);
 }
 
-static void vunmap_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end)
+static void vunmap_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end)
 {
 	pud_t *pud;
 	unsigned long next;
 
-	pud = pud_offset(pgd, addr);
+	pud = pud_offset(p4d, addr);
 	do {
 		next = pud_addr_end(addr, end);
 		if (pud_clear_huge(pud))
@@ -102,6 +102,22 @@ static void vunmap_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end)
 	} while (pud++, addr = next, addr != end);
 }
 
+static void vunmap_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end)
+{
+	p4d_t *p4d;
+	unsigned long next;
+
+	p4d = p4d_offset(pgd, addr);
+	do {
+		next = p4d_addr_end(addr, end);
+		if (p4d_clear_huge(p4d))
+			continue;
+		if (p4d_none_or_clear_bad(p4d))
+			continue;
+		vunmap_pud_range(p4d, addr, next);
+	} while (p4d++, addr = next, addr != end);
+}
+
 static void vunmap_page_range(unsigned long addr, unsigned long end)
 {
 	pgd_t *pgd;
@@ -113,7 +129,7 @@ static void vunmap_page_range(unsigned long addr, unsigned long end)
 		next = pgd_addr_end(addr, end);
 		if (pgd_none_or_clear_bad(pgd))
 			continue;
-		vunmap_pud_range(pgd, addr, next);
+		vunmap_p4d_range(pgd, addr, next);
 	} while (pgd++, addr = next, addr != end);
 }
 
@@ -160,13 +176,13 @@ static int vmap_pmd_range(pud_t *pud, unsigned long addr,
 	return 0;
 }
 
-static int vmap_pud_range(pgd_t *pgd, unsigned long addr,
+static int vmap_pud_range(p4d_t *p4d, unsigned long addr,
 		unsigned long end, pgprot_t prot, struct page **pages, int *nr)
 {
 	pud_t *pud;
 	unsigned long next;
 
-	pud = pud_alloc(&init_mm, pgd, addr);
+	pud = pud_alloc(&init_mm, p4d, addr);
 	if (!pud)
 		return -ENOMEM;
 	do {
@@ -177,6 +193,23 @@ static int vmap_pud_range(pgd_t *pgd, unsigned long addr,
 	return 0;
 }
 
+static int vmap_p4d_range(pgd_t *pgd, unsigned long addr,
+		unsigned long end, pgprot_t prot, struct page **pages, int *nr)
+{
+	p4d_t *p4d;
+	unsigned long next;
+
+	p4d = p4d_alloc(&init_mm, pgd, addr);
+	if (!p4d)
+		return -ENOMEM;
+	do {
+		next = p4d_addr_end(addr, end);
+		if (vmap_pud_range(p4d, addr, next, prot, pages, nr))
+			return -ENOMEM;
+	} while (p4d++, addr = next, addr != end);
+	return 0;
+}
+
 /*
  * Set up page tables in kva (addr, end). The ptes shall have prot "prot", and
  * will have pfns corresponding to the "pages" array.
@@ -196,7 +229,7 @@ static int vmap_page_range_noflush(unsigned long start, unsigned long end,
 	pgd = pgd_offset_k(addr);
 	do {
 		next = pgd_addr_end(addr, end);
-		err = vmap_pud_range(pgd, addr, next, prot, pages, &nr);
+		err = vmap_p4d_range(pgd, addr, next, prot, pages, &nr);
 		if (err)
 			return err;
 	} while (pgd++, addr = next, addr != end);
@@ -237,6 +270,10 @@ struct page *vmalloc_to_page(const void *vmalloc_addr)
 	unsigned long addr = (unsigned long) vmalloc_addr;
 	struct page *page = NULL;
 	pgd_t *pgd = pgd_offset_k(addr);
+	p4d_t *p4d;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *ptep, pte;
 
 	/*
 	 * XXX we might need to change this if we add VIRTUAL_BUG_ON for
@@ -244,21 +281,23 @@ struct page *vmalloc_to_page(const void *vmalloc_addr)
 	 */
 	VIRTUAL_BUG_ON(!is_vmalloc_or_module_addr(vmalloc_addr));
 
-	if (!pgd_none(*pgd)) {
-		pud_t *pud = pud_offset(pgd, addr);
-		if (!pud_none(*pud)) {
-			pmd_t *pmd = pmd_offset(pud, addr);
-			if (!pmd_none(*pmd)) {
-				pte_t *ptep, pte;
-
-				ptep = pte_offset_map(pmd, addr);
-				pte = *ptep;
-				if (pte_present(pte))
-					page = pte_page(pte);
-				pte_unmap(ptep);
-			}
-		}
-	}
+	if (pgd_none(*pgd))
+		return NULL;
+	p4d = p4d_offset(pgd, addr);
+	if (p4d_none(*p4d))
+		return NULL;
+	pud = pud_offset(p4d, addr);
+	if (pud_none(*pud))
+		return NULL;
+	pmd = pmd_offset(pud, addr);
+	if (pmd_none(*pmd))
+		return NULL;
+
+	ptep = pte_offset_map(pmd, addr);
+	pte = *ptep;
+	if (pte_present(pte))
+		page = pte_page(pte);
+	pte_unmap(ptep);
 	return page;
 }
 EXPORT_SYMBOL(vmalloc_to_page);

From 90eceff1a375f6ffa78caf8654e787c0a8a591ef Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Thu, 9 Mar 2017 17:24:08 +0300
Subject: [PATCH 1053/1911] mm: introduce __p4d_alloc()

For full 5-level paging we need a helper to allocate p4d page table.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memory.c | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/mm/memory.c b/mm/memory.c
index 7f1c2163b3cea0..235ba51b2fbf07 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3906,6 +3906,29 @@ int handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
 }
 EXPORT_SYMBOL_GPL(handle_mm_fault);
 
+#ifndef __PAGETABLE_P4D_FOLDED
+/*
+ * Allocate p4d page table.
+ * We've already handled the fast-path in-line.
+ */
+int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
+{
+	p4d_t *new = p4d_alloc_one(mm, address);
+	if (!new)
+		return -ENOMEM;
+
+	smp_wmb(); /* See comment in __pte_alloc */
+
+	spin_lock(&mm->page_table_lock);
+	if (pgd_present(*pgd))		/* Another has populated it */
+		p4d_free(mm, new);
+	else
+		pgd_populate(mm, pgd, new);
+	spin_unlock(&mm->page_table_lock);
+	return 0;
+}
+#endif /* __PAGETABLE_P4D_FOLDED */
+
 #ifndef __PAGETABLE_PUD_FOLDED
 /*
  * Allocate page upper directory.

From 5427290d64c752b97d6a2af7506771c7d10eb750 Mon Sep 17 00:00:00 2001
From: Kinglong Mee <kinglongmee@gmail.com>
Date: Tue, 7 Mar 2017 17:22:43 +0800
Subject: [PATCH 1054/1911] SUNRPC/backchanel: set XPT_CONG_CTRL flag for bc
 xprt

The xprt for backchannel is created separately, not in TCP/UDP code.  It
needs the XPT_CONG_CTRL flag set on it too--otherwise requests on the
NFSv4.1 backchannel are rjected in svc_process_common():

1191         if (versp->vs_need_cong_ctrl &&
1192             !test_bit(XPT_CONG_CTRL, &rqstp->rq_xprt->xpt_flags))
1193                 goto err_bad_vers;

Fixes: 5283b03ee5 ("nfs/nfsd/sunrpc: enforce transport...")
Signed-off-by: Kinglong Mee <kinglongmee@gmail.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/svcsock.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 8931e33b65412d..2b720fa35c4ff7 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1635,6 +1635,7 @@ static struct svc_xprt *svc_bc_create_socket(struct svc_serv *serv,
 
 	xprt = &svsk->sk_xprt;
 	svc_xprt_init(net, &svc_tcp_bc_class, xprt, serv);
+	set_bit(XPT_CONG_CTRL, &svsk->sk_xprt.xpt_flags);
 
 	serv->sv_bc_xprt = xprt;
 

From 5be083cedc087b2d457ef2e9c2d5698c94d3d5e4 Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Mon, 6 Mar 2017 15:57:31 -0800
Subject: [PATCH 1055/1911] net: ipv6: Remove redundant RTA_OIF in multipath
 routes

Dinesh reported that RTA_MULTIPATH nexthops are 8-bytes larger with IPv6
than IPv4. The recent refactoring for multipath support in netlink
messages does discriminate between non-multipath which needs the OIF
and multipath which adds a rtnexthop struct for each hop making the
RTA_OIF attribute redundant. Resolve by adding a flag to the info
function to skip the oif for multipath.

Fixes: beb1afac518d ("net: ipv6: Add support to dump multipath routes
       via RTA_MULTIPATH attribute")
Reported-by: Dinesh Dutt <ddutt@cumulusnetworks.com>
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 229bfcc451ef50..35c58b669ebdfd 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -3299,7 +3299,6 @@ static size_t rt6_nlmsg_size(struct rt6_info *rt)
 		nexthop_len = nla_total_size(0)	 /* RTA_MULTIPATH */
 			    + NLA_ALIGN(sizeof(struct rtnexthop))
 			    + nla_total_size(16) /* RTA_GATEWAY */
-			    + nla_total_size(4)  /* RTA_OIF */
 			    + lwtunnel_get_encap_size(rt->dst.lwtstate);
 
 		nexthop_len *= rt->rt6i_nsiblings;
@@ -3323,7 +3322,7 @@ static size_t rt6_nlmsg_size(struct rt6_info *rt)
 }
 
 static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
-			    unsigned int *flags)
+			    unsigned int *flags, bool skip_oif)
 {
 	if (!netif_running(rt->dst.dev) || !netif_carrier_ok(rt->dst.dev)) {
 		*flags |= RTNH_F_LINKDOWN;
@@ -3336,7 +3335,8 @@ static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
 			goto nla_put_failure;
 	}
 
-	if (rt->dst.dev &&
+	/* not needed for multipath encoding b/c it has a rtnexthop struct */
+	if (!skip_oif && rt->dst.dev &&
 	    nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
 		goto nla_put_failure;
 
@@ -3350,6 +3350,7 @@ static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
 	return -EMSGSIZE;
 }
 
+/* add multipath next hop */
 static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
 {
 	struct rtnexthop *rtnh;
@@ -3362,7 +3363,7 @@ static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
 	rtnh->rtnh_hops = 0;
 	rtnh->rtnh_ifindex = rt->dst.dev ? rt->dst.dev->ifindex : 0;
 
-	if (rt6_nexthop_info(skb, rt, &flags) < 0)
+	if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
 		goto nla_put_failure;
 
 	rtnh->rtnh_flags = flags;
@@ -3515,7 +3516,7 @@ static int rt6_fill_node(struct net *net,
 
 		nla_nest_end(skb, mp);
 	} else {
-		if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags) < 0)
+		if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
 			goto nla_put_failure;
 	}
 

From 67e303e0c7683957eb4e530453705a43a6d4f966 Mon Sep 17 00:00:00 2001
From: VSR Burru <veerasenareddy.burru@cavium.com>
Date: Mon, 6 Mar 2017 18:45:59 -0800
Subject: [PATCH 1056/1911] liquidio: improve UDP TX performance

Improve UDP TX performance by:
* reducing the ring size from 2K to 512
* replacing the numerous streaming DMA allocations for info buffers and
  gather lists with one large consistent DMA allocation per ring

BQL is not effective here.  We reduced the ring size because there is heavy
overhead with dma_map_single every so often.  With iommu=on, dma_map_single
in PF Tx data path was taking longer time (~700usec) for every ~250
packets.  Debugged intel_iommu code, and found that PF driver is utilizing
too many static IO virtual address mapping entries (for gather list entries
and info buffers): about 100K entries for two PF's each using 8 rings.
Also, finding an empty entry (in rbtree of device domain's iova mapping in
kernel) during Tx path becomes a bottleneck every so often; the loop to
find the empty entry goes through over 40K iterations; this is too costly
and was the major overhead.  Overhead is low when this loop quits quickly.

Netperf benchmark numbers before and after patch:

PF UDP TX
+--------+--------+------------+------------+---------+
|        |        |  Before    |  After     |         |
| Number |        |  Patch     |  Patch     |         |
|  of    | Packet | Throughput | Throughput | Percent |
| Flows  |  Size  |  (Gbps)    |  (Gbps)    | Change  |
+--------+--------+------------+------------+---------+
|        |   360  |   0.52     |   0.93     |  +78.9  |
|   1    |  1024  |   1.62     |   2.84     |  +75.3  |
|        |  1518  |   2.44     |   4.21     |  +72.5  |
+--------+--------+------------+------------+---------+
|        |   360  |   0.45     |   1.59     | +253.3  |
|   4    |  1024  |   1.34     |   5.48     | +308.9  |
|        |  1518  |   2.27     |   8.31     | +266.1  |
+--------+--------+------------+------------+---------+
|        |   360  |   0.40     |   1.61     | +302.5  |
|   8    |  1024  |   1.64     |   4.24     | +158.5  |
|        |  1518  |   2.87     |   6.52     | +127.2  |
+--------+--------+------------+------------+---------+

VF UDP TX
+--------+--------+------------+------------+---------+
|        |        |  Before    |  After     |         |
| Number |        |  Patch     |  Patch     |         |
|  of    | Packet | Throughput | Throughput | Percent |
| Flows  |  Size  |  (Gbps)    |  (Gbps)    | Change  |
+--------+--------+------------+------------+---------+
|        |   360  |   1.28     |   1.49     |  +16.4  |
|   1    |  1024  |   4.44     |   4.39     |   -1.1  |
|        |  1518  |   6.08     |   6.51     |   +7.1  |
+--------+--------+------------+------------+---------+
|        |   360  |   2.35     |   2.35     |    0.0  |
|   4    |  1024  |   6.41     |   8.07     |  +25.9  |
|        |  1518  |   9.56     |   9.54     |   -0.2  |
+--------+--------+------------+------------+---------+
|        |   360  |   3.41     |   3.65     |   +7.0  |
|   8    |  1024  |   9.35     |   9.34     |   -0.1  |
|        |  1518  |   9.56     |   9.57     |   +0.1  |
+--------+--------+------------+------------+---------+

Signed-off-by: VSR Burru <veerasenareddy.burru@cavium.com>
Signed-off-by: Felix Manlunas <felix.manlunas@cavium.com>
Signed-off-by: Derek Chickles <derek.chickles@cavium.com>
Signed-off-by: Raghu Vatsavayi <raghu.vatsavayi@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/cavium/liquidio/lio_main.c   | 110 +++++++++---------
 .../ethernet/cavium/liquidio/lio_vf_main.c    | 104 +++++++++--------
 .../ethernet/cavium/liquidio/octeon_config.h  |   6 +-
 .../ethernet/cavium/liquidio/octeon_droq.c    |  17 +--
 .../ethernet/cavium/liquidio/octeon_droq.h    |   4 +-
 .../ethernet/cavium/liquidio/octeon_main.h    |  42 -------
 .../ethernet/cavium/liquidio/octeon_network.h |  43 ++++---
 7 files changed, 144 insertions(+), 182 deletions(-)

diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index be9c0e3f5ade7d..92f46b1375c325 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -152,7 +152,7 @@ struct octnic_gather {
 	 */
 	struct octeon_sg_entry *sg;
 
-	u64 sg_dma_ptr;
+	dma_addr_t sg_dma_ptr;
 };
 
 struct handshake {
@@ -734,6 +734,9 @@ static void delete_glists(struct lio *lio)
 	struct octnic_gather *g;
 	int i;
 
+	kfree(lio->glist_lock);
+	lio->glist_lock = NULL;
+
 	if (!lio->glist)
 		return;
 
@@ -741,23 +744,26 @@ static void delete_glists(struct lio *lio)
 		do {
 			g = (struct octnic_gather *)
 				list_delete_head(&lio->glist[i]);
-			if (g) {
-				if (g->sg) {
-					dma_unmap_single(&lio->oct_dev->
-							 pci_dev->dev,
-							 g->sg_dma_ptr,
-							 g->sg_size,
-							 DMA_TO_DEVICE);
-					kfree((void *)((unsigned long)g->sg -
-						       g->adjust));
-				}
+			if (g)
 				kfree(g);
-			}
 		} while (g);
+
+		if (lio->glists_virt_base && lio->glists_virt_base[i]) {
+			lio_dma_free(lio->oct_dev,
+				     lio->glist_entry_size * lio->tx_qsize,
+				     lio->glists_virt_base[i],
+				     lio->glists_dma_base[i]);
+		}
 	}
 
-	kfree((void *)lio->glist);
-	kfree((void *)lio->glist_lock);
+	kfree(lio->glists_virt_base);
+	lio->glists_virt_base = NULL;
+
+	kfree(lio->glists_dma_base);
+	lio->glists_dma_base = NULL;
+
+	kfree(lio->glist);
+	lio->glist = NULL;
 }
 
 /**
@@ -772,13 +778,30 @@ static int setup_glists(struct octeon_device *oct, struct lio *lio, int num_iqs)
 	lio->glist_lock = kcalloc(num_iqs, sizeof(*lio->glist_lock),
 				  GFP_KERNEL);
 	if (!lio->glist_lock)
-		return 1;
+		return -ENOMEM;
 
 	lio->glist = kcalloc(num_iqs, sizeof(*lio->glist),
 			     GFP_KERNEL);
 	if (!lio->glist) {
-		kfree((void *)lio->glist_lock);
-		return 1;
+		kfree(lio->glist_lock);
+		lio->glist_lock = NULL;
+		return -ENOMEM;
+	}
+
+	lio->glist_entry_size =
+		ROUNDUP8((ROUNDUP4(OCTNIC_MAX_SG) >> 2) * OCT_SG_ENTRY_SIZE);
+
+	/* allocate memory to store virtual and dma base address of
+	 * per glist consistent memory
+	 */
+	lio->glists_virt_base = kcalloc(num_iqs, sizeof(*lio->glists_virt_base),
+					GFP_KERNEL);
+	lio->glists_dma_base = kcalloc(num_iqs, sizeof(*lio->glists_dma_base),
+				       GFP_KERNEL);
+
+	if (!lio->glists_virt_base || !lio->glists_dma_base) {
+		delete_glists(lio);
+		return -ENOMEM;
 	}
 
 	for (i = 0; i < num_iqs; i++) {
@@ -788,6 +811,16 @@ static int setup_glists(struct octeon_device *oct, struct lio *lio, int num_iqs)
 
 		INIT_LIST_HEAD(&lio->glist[i]);
 
+		lio->glists_virt_base[i] =
+			lio_dma_alloc(oct,
+				      lio->glist_entry_size * lio->tx_qsize,
+				      &lio->glists_dma_base[i]);
+
+		if (!lio->glists_virt_base[i]) {
+			delete_glists(lio);
+			return -ENOMEM;
+		}
+
 		for (j = 0; j < lio->tx_qsize; j++) {
 			g = kzalloc_node(sizeof(*g), GFP_KERNEL,
 					 numa_node);
@@ -796,43 +829,18 @@ static int setup_glists(struct octeon_device *oct, struct lio *lio, int num_iqs)
 			if (!g)
 				break;
 
-			g->sg_size = ((ROUNDUP4(OCTNIC_MAX_SG) >> 2) *
-				      OCT_SG_ENTRY_SIZE);
+			g->sg = lio->glists_virt_base[i] +
+				(j * lio->glist_entry_size);
 
-			g->sg = kmalloc_node(g->sg_size + 8,
-					     GFP_KERNEL, numa_node);
-			if (!g->sg)
-				g->sg = kmalloc(g->sg_size + 8, GFP_KERNEL);
-			if (!g->sg) {
-				kfree(g);
-				break;
-			}
-
-			/* The gather component should be aligned on 64-bit
-			 * boundary
-			 */
-			if (((unsigned long)g->sg) & 7) {
-				g->adjust = 8 - (((unsigned long)g->sg) & 7);
-				g->sg = (struct octeon_sg_entry *)
-					((unsigned long)g->sg + g->adjust);
-			}
-			g->sg_dma_ptr = dma_map_single(&oct->pci_dev->dev,
-						       g->sg, g->sg_size,
-						       DMA_TO_DEVICE);
-			if (dma_mapping_error(&oct->pci_dev->dev,
-					      g->sg_dma_ptr)) {
-				kfree((void *)((unsigned long)g->sg -
-					       g->adjust));
-				kfree(g);
-				break;
-			}
+			g->sg_dma_ptr = lio->glists_dma_base[i] +
+					(j * lio->glist_entry_size);
 
 			list_add_tail(&g->list, &lio->glist[i]);
 		}
 
 		if (j != lio->tx_qsize) {
 			delete_glists(lio);
-			return 1;
+			return -ENOMEM;
 		}
 	}
 
@@ -1885,9 +1893,6 @@ static void free_netsgbuf(void *buf)
 		i++;
 	}
 
-	dma_sync_single_for_cpu(&lio->oct_dev->pci_dev->dev,
-				g->sg_dma_ptr, g->sg_size, DMA_TO_DEVICE);
-
 	iq = skb_iq(lio, skb);
 	spin_lock(&lio->glist_lock[iq]);
 	list_add_tail(&g->list, &lio->glist[iq]);
@@ -1933,9 +1938,6 @@ static void free_netsgbuf_with_resp(void *buf)
 		i++;
 	}
 
-	dma_sync_single_for_cpu(&lio->oct_dev->pci_dev->dev,
-				g->sg_dma_ptr, g->sg_size, DMA_TO_DEVICE);
-
 	iq = skb_iq(lio, skb);
 
 	spin_lock(&lio->glist_lock[iq]);
@@ -3273,8 +3275,6 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
 			i++;
 		}
 
-		dma_sync_single_for_device(&oct->pci_dev->dev, g->sg_dma_ptr,
-					   g->sg_size, DMA_TO_DEVICE);
 		dptr = g->sg_dma_ptr;
 
 		if (OCTEON_CN23XX_PF(oct))
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
index 9d5e03502c76cb..7b83be4ce1fe0c 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
@@ -108,6 +108,8 @@ struct octnic_gather {
 	 * received from the IP layer.
 	 */
 	struct octeon_sg_entry *sg;
+
+	dma_addr_t sg_dma_ptr;
 };
 
 struct octeon_device_priv {
@@ -490,6 +492,9 @@ static void delete_glists(struct lio *lio)
 	struct octnic_gather *g;
 	int i;
 
+	kfree(lio->glist_lock);
+	lio->glist_lock = NULL;
+
 	if (!lio->glist)
 		return;
 
@@ -497,17 +502,26 @@ static void delete_glists(struct lio *lio)
 		do {
 			g = (struct octnic_gather *)
 			    list_delete_head(&lio->glist[i]);
-			if (g) {
-				if (g->sg)
-					kfree((void *)((unsigned long)g->sg -
-							g->adjust));
+			if (g)
 				kfree(g);
-			}
 		} while (g);
+
+		if (lio->glists_virt_base && lio->glists_virt_base[i]) {
+			lio_dma_free(lio->oct_dev,
+				     lio->glist_entry_size * lio->tx_qsize,
+				     lio->glists_virt_base[i],
+				     lio->glists_dma_base[i]);
+		}
 	}
 
+	kfree(lio->glists_virt_base);
+	lio->glists_virt_base = NULL;
+
+	kfree(lio->glists_dma_base);
+	lio->glists_dma_base = NULL;
+
 	kfree(lio->glist);
-	kfree(lio->glist_lock);
+	lio->glist = NULL;
 }
 
 /**
@@ -522,13 +536,30 @@ static int setup_glists(struct lio *lio, int num_iqs)
 	lio->glist_lock =
 	    kzalloc(sizeof(*lio->glist_lock) * num_iqs, GFP_KERNEL);
 	if (!lio->glist_lock)
-		return 1;
+		return -ENOMEM;
 
 	lio->glist =
 	    kzalloc(sizeof(*lio->glist) * num_iqs, GFP_KERNEL);
 	if (!lio->glist) {
 		kfree(lio->glist_lock);
-		return 1;
+		lio->glist_lock = NULL;
+		return -ENOMEM;
+	}
+
+	lio->glist_entry_size =
+		ROUNDUP8((ROUNDUP4(OCTNIC_MAX_SG) >> 2) * OCT_SG_ENTRY_SIZE);
+
+	/* allocate memory to store virtual and dma base address of
+	 * per glist consistent memory
+	 */
+	lio->glists_virt_base = kcalloc(num_iqs, sizeof(*lio->glists_virt_base),
+					GFP_KERNEL);
+	lio->glists_dma_base = kcalloc(num_iqs, sizeof(*lio->glists_dma_base),
+				       GFP_KERNEL);
+
+	if (!lio->glists_virt_base || !lio->glists_dma_base) {
+		delete_glists(lio);
+		return -ENOMEM;
 	}
 
 	for (i = 0; i < num_iqs; i++) {
@@ -536,34 +567,33 @@ static int setup_glists(struct lio *lio, int num_iqs)
 
 		INIT_LIST_HEAD(&lio->glist[i]);
 
+		lio->glists_virt_base[i] =
+			lio_dma_alloc(lio->oct_dev,
+				      lio->glist_entry_size * lio->tx_qsize,
+				      &lio->glists_dma_base[i]);
+
+		if (!lio->glists_virt_base[i]) {
+			delete_glists(lio);
+			return -ENOMEM;
+		}
+
 		for (j = 0; j < lio->tx_qsize; j++) {
 			g = kzalloc(sizeof(*g), GFP_KERNEL);
 			if (!g)
 				break;
 
-			g->sg_size = ((ROUNDUP4(OCTNIC_MAX_SG) >> 2) *
-				      OCT_SG_ENTRY_SIZE);
+			g->sg = lio->glists_virt_base[i] +
+				(j * lio->glist_entry_size);
 
-			g->sg = kmalloc(g->sg_size + 8, GFP_KERNEL);
-			if (!g->sg) {
-				kfree(g);
-				break;
-			}
+			g->sg_dma_ptr = lio->glists_dma_base[i] +
+					(j * lio->glist_entry_size);
 
-			/* The gather component should be aligned on 64-bit
-			 * boundary
-			 */
-			if (((unsigned long)g->sg) & 7) {
-				g->adjust = 8 - (((unsigned long)g->sg) & 7);
-				g->sg = (struct octeon_sg_entry *)
-					((unsigned long)g->sg + g->adjust);
-			}
 			list_add_tail(&g->list, &lio->glist[i]);
 		}
 
 		if (j != lio->tx_qsize) {
 			delete_glists(lio);
-			return 1;
+			return -ENOMEM;
 		}
 	}
 
@@ -1324,10 +1354,6 @@ static void free_netsgbuf(void *buf)
 		i++;
 	}
 
-	dma_unmap_single(&lio->oct_dev->pci_dev->dev,
-			 finfo->dptr, g->sg_size,
-			 DMA_TO_DEVICE);
-
 	iq = skb_iq(lio, skb);
 
 	spin_lock(&lio->glist_lock[iq]);
@@ -1374,10 +1400,6 @@ static void free_netsgbuf_with_resp(void *buf)
 		i++;
 	}
 
-	dma_unmap_single(&lio->oct_dev->pci_dev->dev,
-			 finfo->dptr, g->sg_size,
-			 DMA_TO_DEVICE);
-
 	iq = skb_iq(lio, skb);
 
 	spin_lock(&lio->glist_lock[iq]);
@@ -2382,23 +2404,7 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
 			i++;
 		}
 
-		dptr = dma_map_single(&oct->pci_dev->dev,
-				      g->sg, g->sg_size,
-				      DMA_TO_DEVICE);
-		if (dma_mapping_error(&oct->pci_dev->dev, dptr)) {
-			dev_err(&oct->pci_dev->dev, "%s DMA mapping error 4\n",
-				__func__);
-			dma_unmap_single(&oct->pci_dev->dev, g->sg[0].ptr[0],
-					 skb->len - skb->data_len,
-					 DMA_TO_DEVICE);
-			for (j = 1; j <= frags; j++) {
-				frag = &skb_shinfo(skb)->frags[j - 1];
-				dma_unmap_page(&oct->pci_dev->dev,
-					       g->sg[j >> 2].ptr[j & 3],
-					       frag->size, DMA_TO_DEVICE);
-			}
-			return NETDEV_TX_BUSY;
-		}
+		dptr = g->sg_dma_ptr;
 
 		ndata.cmd.cmd3.dptr = dptr;
 		finfo->dptr = dptr;
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_config.h b/drivers/net/ethernet/cavium/liquidio/octeon_config.h
index b3dc2e9651a8e2..d29ebc531151f0 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_config.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_config.h
@@ -71,17 +71,17 @@
 #define   CN23XX_MAX_RINGS_PER_VF          8
 
 #define   CN23XX_MAX_INPUT_QUEUES	CN23XX_MAX_RINGS_PER_PF
-#define   CN23XX_MAX_IQ_DESCRIPTORS	2048
+#define   CN23XX_MAX_IQ_DESCRIPTORS	512
 #define   CN23XX_DB_MIN                 1
 #define   CN23XX_DB_MAX                 8
 #define   CN23XX_DB_TIMEOUT             1
 
 #define   CN23XX_MAX_OUTPUT_QUEUES	CN23XX_MAX_RINGS_PER_PF
-#define   CN23XX_MAX_OQ_DESCRIPTORS	2048
+#define   CN23XX_MAX_OQ_DESCRIPTORS	512
 #define   CN23XX_OQ_BUF_SIZE		1536
 #define   CN23XX_OQ_PKTSPER_INTR	128
 /*#define CAVIUM_ONLY_CN23XX_RX_PERF*/
-#define   CN23XX_OQ_REFIL_THRESHOLD	128
+#define   CN23XX_OQ_REFIL_THRESHOLD	16
 
 #define   CN23XX_OQ_INTR_PKT		64
 #define   CN23XX_OQ_INTR_TIME		100
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
index 0be87d119a979e..79f809479af6e7 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
@@ -155,11 +155,6 @@ octeon_droq_destroy_ring_buffers(struct octeon_device *oct,
 			recv_buffer_destroy(droq->recv_buf_list[i].buffer,
 					    pg_info);
 
-		if (droq->desc_ring && droq->desc_ring[i].info_ptr)
-			lio_unmap_ring_info(oct->pci_dev,
-					    (u64)droq->
-					    desc_ring[i].info_ptr,
-					    OCT_DROQ_INFO_SIZE);
 		droq->recv_buf_list[i].buffer = NULL;
 	}
 
@@ -211,10 +206,7 @@ int octeon_delete_droq(struct octeon_device *oct, u32 q_no)
 	vfree(droq->recv_buf_list);
 
 	if (droq->info_base_addr)
-		cnnic_free_aligned_dma(oct->pci_dev, droq->info_list,
-				       droq->info_alloc_size,
-				       droq->info_base_addr,
-				       droq->info_list_dma);
+		lio_free_info_buffer(oct, droq);
 
 	if (droq->desc_ring)
 		lio_dma_free(oct, (droq->max_count * OCT_DROQ_DESC_SIZE),
@@ -294,12 +286,7 @@ int octeon_init_droq(struct octeon_device *oct,
 	dev_dbg(&oct->pci_dev->dev, "droq[%d]: num_desc: %d\n", q_no,
 		droq->max_count);
 
-	droq->info_list =
-		cnnic_numa_alloc_aligned_dma((droq->max_count *
-					      OCT_DROQ_INFO_SIZE),
-					     &droq->info_alloc_size,
-					     &droq->info_base_addr,
-					     numa_node);
+	droq->info_list = lio_alloc_info_buffer(oct, droq);
 	if (!droq->info_list) {
 		dev_err(&oct->pci_dev->dev, "Cannot allocate memory for info list.\n");
 		lio_dma_free(oct, (droq->max_count * OCT_DROQ_DESC_SIZE),
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.h b/drivers/net/ethernet/cavium/liquidio/octeon_droq.h
index e62074090681d3..6982c0af5eccb7 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.h
@@ -325,10 +325,10 @@ struct octeon_droq {
 	size_t desc_ring_dma;
 
 	/** Info ptr list are allocated at this virtual address. */
-	size_t info_base_addr;
+	void *info_base_addr;
 
 	/** DMA mapped address of the info list */
-	size_t info_list_dma;
+	dma_addr_t info_list_dma;
 
 	/** Allocated size of info list. */
 	u32 info_alloc_size;
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_main.h b/drivers/net/ethernet/cavium/liquidio/octeon_main.h
index aa36e9ae767655..bed9ef17bc26b4 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_main.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_main.h
@@ -140,48 +140,6 @@ static inline int octeon_map_pci_barx(struct octeon_device *oct,
 	return 1;
 }
 
-static inline void *
-cnnic_numa_alloc_aligned_dma(u32 size,
-			     u32 *alloc_size,
-			     size_t *orig_ptr,
-			     int numa_node)
-{
-	int retries = 0;
-	void *ptr = NULL;
-
-#define OCTEON_MAX_ALLOC_RETRIES     1
-	do {
-		struct page *page = NULL;
-
-		page = alloc_pages_node(numa_node,
-					GFP_KERNEL,
-					get_order(size));
-		if (!page)
-			page = alloc_pages(GFP_KERNEL,
-					   get_order(size));
-		ptr = (void *)page_address(page);
-		if ((unsigned long)ptr & 0x07) {
-			__free_pages(page, get_order(size));
-			ptr = NULL;
-			/* Increment the size required if the first
-			 * attempt failed.
-			 */
-			if (!retries)
-				size += 7;
-		}
-		retries++;
-	} while ((retries <= OCTEON_MAX_ALLOC_RETRIES) && !ptr);
-
-	*alloc_size = size;
-	*orig_ptr = (unsigned long)ptr;
-	if ((unsigned long)ptr & 0x07)
-		ptr = (void *)(((unsigned long)ptr + 7) & ~(7UL));
-	return ptr;
-}
-
-#define cnnic_free_aligned_dma(pci_dev, ptr, size, orig_ptr, dma_addr) \
-		free_pages(orig_ptr, get_order(size))
-
 static inline int
 sleep_cond(wait_queue_head_t *wait_queue, int *condition)
 {
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_network.h b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
index 6bb89419006eb5..eef2a1e8a7e3f9 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_network.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
@@ -62,6 +62,9 @@ struct lio {
 
 	/** Array of gather component linked lists */
 	struct list_head *glist;
+	void **glists_virt_base;
+	dma_addr_t *glists_dma_base;
+	u32 glist_entry_size;
 
 	/** Pointer to the NIC properties for the Octeon device this network
 	 *  interface is associated with.
@@ -344,6 +347,29 @@ static inline void tx_buffer_free(void *buffer)
 #define lio_dma_free(oct, size, virt_addr, dma_addr) \
 	dma_free_coherent(&(oct)->pci_dev->dev, size, virt_addr, dma_addr)
 
+static inline void *
+lio_alloc_info_buffer(struct octeon_device *oct,
+		      struct octeon_droq *droq)
+{
+	void *virt_ptr;
+
+	virt_ptr = lio_dma_alloc(oct, (droq->max_count * OCT_DROQ_INFO_SIZE),
+				 &droq->info_list_dma);
+	if (virt_ptr) {
+		droq->info_alloc_size = droq->max_count * OCT_DROQ_INFO_SIZE;
+		droq->info_base_addr = virt_ptr;
+	}
+
+	return virt_ptr;
+}
+
+static inline void lio_free_info_buffer(struct octeon_device *oct,
+					struct octeon_droq *droq)
+{
+	lio_dma_free(oct, droq->info_alloc_size, droq->info_base_addr,
+		     droq->info_list_dma);
+}
+
 static inline
 void *get_rbd(struct sk_buff *skb)
 {
@@ -359,22 +385,7 @@ void *get_rbd(struct sk_buff *skb)
 static inline u64
 lio_map_ring_info(struct octeon_droq *droq, u32 i)
 {
-	dma_addr_t dma_addr;
-	struct octeon_device *oct = droq->oct_dev;
-
-	dma_addr = dma_map_single(&oct->pci_dev->dev, &droq->info_list[i],
-				  OCT_DROQ_INFO_SIZE, DMA_FROM_DEVICE);
-
-	WARN_ON(dma_mapping_error(&oct->pci_dev->dev, dma_addr));
-
-	return (u64)dma_addr;
-}
-
-static inline void
-lio_unmap_ring_info(struct pci_dev *pci_dev,
-		    u64 info_ptr, u32 size)
-{
-	dma_unmap_single(&pci_dev->dev, info_ptr, size, DMA_FROM_DEVICE);
+	return droq->info_list_dma + (i * sizeof(struct octeon_droq_info));
 }
 
 static inline u64

From 3b12f73a5c2977153f28a224392fd4729b50d1dc Mon Sep 17 00:00:00 2001
From: Zhu Yanjun <yanjun.zhu@oracle.com>
Date: Tue, 7 Mar 2017 02:48:36 -0500
Subject: [PATCH 1057/1911] rds: ib: add error handle

In the function rds_ib_setup_qp, the error handle is missing. When some
error occurs, it is possible that memory leak occurs. As such, error
handle is added.

Cc: Joe Jin <joe.jin@oracle.com>
Reviewed-by: Junxiao Bi <junxiao.bi@oracle.com>
Reviewed-by: Guanglei Li <guanglei.li@oracle.com>
Signed-off-by: Zhu Yanjun <yanjun.zhu@oracle.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/ib_cm.c | 47 ++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 36 insertions(+), 11 deletions(-)

diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index ce3775abc6e7a1..1c38d2c7caa8e9 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -442,7 +442,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
 		ic->i_send_cq = NULL;
 		ibdev_put_vector(rds_ibdev, ic->i_scq_vector);
 		rdsdebug("ib_create_cq send failed: %d\n", ret);
-		goto out;
+		goto rds_ibdev_out;
 	}
 
 	ic->i_rcq_vector = ibdev_get_unused_vector(rds_ibdev);
@@ -456,19 +456,19 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
 		ic->i_recv_cq = NULL;
 		ibdev_put_vector(rds_ibdev, ic->i_rcq_vector);
 		rdsdebug("ib_create_cq recv failed: %d\n", ret);
-		goto out;
+		goto send_cq_out;
 	}
 
 	ret = ib_req_notify_cq(ic->i_send_cq, IB_CQ_NEXT_COMP);
 	if (ret) {
 		rdsdebug("ib_req_notify_cq send failed: %d\n", ret);
-		goto out;
+		goto recv_cq_out;
 	}
 
 	ret = ib_req_notify_cq(ic->i_recv_cq, IB_CQ_SOLICITED);
 	if (ret) {
 		rdsdebug("ib_req_notify_cq recv failed: %d\n", ret);
-		goto out;
+		goto recv_cq_out;
 	}
 
 	/* XXX negotiate max send/recv with remote? */
@@ -494,7 +494,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
 	ret = rdma_create_qp(ic->i_cm_id, ic->i_pd, &attr);
 	if (ret) {
 		rdsdebug("rdma_create_qp failed: %d\n", ret);
-		goto out;
+		goto recv_cq_out;
 	}
 
 	ic->i_send_hdrs = ib_dma_alloc_coherent(dev,
@@ -504,7 +504,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
 	if (!ic->i_send_hdrs) {
 		ret = -ENOMEM;
 		rdsdebug("ib_dma_alloc_coherent send failed\n");
-		goto out;
+		goto qp_out;
 	}
 
 	ic->i_recv_hdrs = ib_dma_alloc_coherent(dev,
@@ -514,7 +514,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
 	if (!ic->i_recv_hdrs) {
 		ret = -ENOMEM;
 		rdsdebug("ib_dma_alloc_coherent recv failed\n");
-		goto out;
+		goto send_hdrs_dma_out;
 	}
 
 	ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header),
@@ -522,7 +522,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
 	if (!ic->i_ack) {
 		ret = -ENOMEM;
 		rdsdebug("ib_dma_alloc_coherent ack failed\n");
-		goto out;
+		goto recv_hdrs_dma_out;
 	}
 
 	ic->i_sends = vzalloc_node(ic->i_send_ring.w_nr * sizeof(struct rds_ib_send_work),
@@ -530,7 +530,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
 	if (!ic->i_sends) {
 		ret = -ENOMEM;
 		rdsdebug("send allocation failed\n");
-		goto out;
+		goto ack_dma_out;
 	}
 
 	ic->i_recvs = vzalloc_node(ic->i_recv_ring.w_nr * sizeof(struct rds_ib_recv_work),
@@ -538,7 +538,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
 	if (!ic->i_recvs) {
 		ret = -ENOMEM;
 		rdsdebug("recv allocation failed\n");
-		goto out;
+		goto sends_out;
 	}
 
 	rds_ib_recv_init_ack(ic);
@@ -546,8 +546,33 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
 	rdsdebug("conn %p pd %p cq %p %p\n", conn, ic->i_pd,
 		 ic->i_send_cq, ic->i_recv_cq);
 
-out:
+	return ret;
+
+sends_out:
+	vfree(ic->i_sends);
+ack_dma_out:
+	ib_dma_free_coherent(dev, sizeof(struct rds_header),
+			     ic->i_ack, ic->i_ack_dma);
+recv_hdrs_dma_out:
+	ib_dma_free_coherent(dev, ic->i_recv_ring.w_nr *
+					sizeof(struct rds_header),
+					ic->i_recv_hdrs, ic->i_recv_hdrs_dma);
+send_hdrs_dma_out:
+	ib_dma_free_coherent(dev, ic->i_send_ring.w_nr *
+					sizeof(struct rds_header),
+					ic->i_send_hdrs, ic->i_send_hdrs_dma);
+qp_out:
+	rdma_destroy_qp(ic->i_cm_id);
+recv_cq_out:
+	if (!ib_destroy_cq(ic->i_recv_cq))
+		ic->i_recv_cq = NULL;
+send_cq_out:
+	if (!ib_destroy_cq(ic->i_send_cq))
+		ic->i_send_cq = NULL;
+rds_ibdev_out:
+	rds_ib_remove_conn(rds_ibdev, conn);
 	rds_ib_dev_put(rds_ibdev);
+
 	return ret;
 }
 

From 83abb7d7c91f4ac20e47c3089a10bb93b2ea8994 Mon Sep 17 00:00:00 2001
From: Sunil Goutham <sgoutham@cavium.com>
Date: Tue, 7 Mar 2017 18:09:08 +0530
Subject: [PATCH 1058/1911] net: thunderx: Fix IOMMU translation faults

ACPI support has been added to ARM IOMMU driver in 4.10 kernel
and that has resulted in VNIC interfaces throwing translation
faults when kernel is booted with ACPI as driver was not using
DMA API. This patch fixes the issue by using DMA API which inturn
will create translation tables when IOMMU is enabled.

Also VNIC doesn't have a seperate receive buffer ring per receive
queue, so there is no 1:1 descriptor index matching between CQE_RX
and the index in buffer ring from where a buffer has been used for
DMA'ing. Unlike other NICs, here it's not possible to maintain dma
address to virt address mappings within the driver. This leaves us
no other choice but to use IOMMU's IOVA address conversion API to
get buffer's virtual address which can be given to network stack
for processing.

Signed-off-by: Sunil Goutham <sgoutham@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cavium/thunder/nic.h     |   1 +
 .../net/ethernet/cavium/thunder/nicvf_main.c  |  12 +-
 .../ethernet/cavium/thunder/nicvf_queues.c    | 178 ++++++++++++++----
 .../ethernet/cavium/thunder/nicvf_queues.h    |   4 +-
 4 files changed, 156 insertions(+), 39 deletions(-)

diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h
index e739c715356283..2269ff562d9562 100644
--- a/drivers/net/ethernet/cavium/thunder/nic.h
+++ b/drivers/net/ethernet/cavium/thunder/nic.h
@@ -269,6 +269,7 @@ struct nicvf {
 #define	MAX_QUEUES_PER_QSET			8
 	struct queue_set	*qs;
 	struct nicvf_cq_poll	*napi[8];
+	void			*iommu_domain;
 	u8			vf_id;
 	u8			sqs_id;
 	bool                    sqs_mode;
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index 6feaa24bcfd42b..24017588f53171 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -16,6 +16,7 @@
 #include <linux/log2.h>
 #include <linux/prefetch.h>
 #include <linux/irq.h>
+#include <linux/iommu.h>
 
 #include "nic_reg.h"
 #include "nic.h"
@@ -525,7 +526,12 @@ static void nicvf_snd_pkt_handler(struct net_device *netdev,
 			/* Get actual TSO descriptors and free them */
 			tso_sqe =
 			 (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, hdr->rsvd2);
+			nicvf_unmap_sndq_buffers(nic, sq, hdr->rsvd2,
+						 tso_sqe->subdesc_cnt);
 			nicvf_put_sq_desc(sq, tso_sqe->subdesc_cnt + 1);
+		} else {
+			nicvf_unmap_sndq_buffers(nic, sq, cqe_tx->sqe_ptr,
+						 hdr->subdesc_cnt);
 		}
 		nicvf_put_sq_desc(sq, hdr->subdesc_cnt + 1);
 		prefetch(skb);
@@ -576,6 +582,7 @@ static void nicvf_rcv_pkt_handler(struct net_device *netdev,
 {
 	struct sk_buff *skb;
 	struct nicvf *nic = netdev_priv(netdev);
+	struct nicvf *snic = nic;
 	int err = 0;
 	int rq_idx;
 
@@ -592,7 +599,7 @@ static void nicvf_rcv_pkt_handler(struct net_device *netdev,
 	if (err && !cqe_rx->rb_cnt)
 		return;
 
-	skb = nicvf_get_rcv_skb(nic, cqe_rx);
+	skb = nicvf_get_rcv_skb(snic, cqe_rx);
 	if (!skb) {
 		netdev_dbg(nic->netdev, "Packet not received\n");
 		return;
@@ -1643,6 +1650,9 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (!pass1_silicon(nic->pdev))
 		nic->hw_tso = true;
 
+	/* Get iommu domain for iova to physical addr conversion */
+	nic->iommu_domain = iommu_get_domain_for_dev(dev);
+
 	pci_read_config_word(nic->pdev, PCI_SUBSYSTEM_ID, &sdevid);
 	if (sdevid == 0xA134)
 		nic->t88 = true;
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
index ac0390be3b126e..b1962dbd04093b 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
@@ -10,6 +10,7 @@
 #include <linux/netdevice.h>
 #include <linux/ip.h>
 #include <linux/etherdevice.h>
+#include <linux/iommu.h>
 #include <net/ip.h>
 #include <net/tso.h>
 
@@ -18,6 +19,16 @@
 #include "q_struct.h"
 #include "nicvf_queues.h"
 
+#define NICVF_PAGE_ORDER ((PAGE_SIZE <= 4096) ?  PAGE_ALLOC_COSTLY_ORDER : 0)
+
+static inline u64 nicvf_iova_to_phys(struct nicvf *nic, dma_addr_t dma_addr)
+{
+	/* Translation is installed only when IOMMU is present */
+	if (nic->iommu_domain)
+		return iommu_iova_to_phys(nic->iommu_domain, dma_addr);
+	return dma_addr;
+}
+
 static void nicvf_get_page(struct nicvf *nic)
 {
 	if (!nic->rb_pageref || !nic->rb_page)
@@ -87,7 +98,7 @@ static void nicvf_free_q_desc_mem(struct nicvf *nic, struct q_desc_mem *dmem)
 static inline int nicvf_alloc_rcv_buffer(struct nicvf *nic, gfp_t gfp,
 					 u32 buf_len, u64 **rbuf)
 {
-	int order = (PAGE_SIZE <= 4096) ?  PAGE_ALLOC_COSTLY_ORDER : 0;
+	int order = NICVF_PAGE_ORDER;
 
 	/* Check if request can be accomodated in previous allocated page */
 	if (nic->rb_page &&
@@ -97,22 +108,27 @@ static inline int nicvf_alloc_rcv_buffer(struct nicvf *nic, gfp_t gfp,
 	}
 
 	nicvf_get_page(nic);
-	nic->rb_page = NULL;
 
 	/* Allocate a new page */
+	nic->rb_page = alloc_pages(gfp | __GFP_COMP | __GFP_NOWARN,
+				   order);
 	if (!nic->rb_page) {
-		nic->rb_page = alloc_pages(gfp | __GFP_COMP | __GFP_NOWARN,
-					   order);
-		if (!nic->rb_page) {
-			this_cpu_inc(nic->pnicvf->drv_stats->
-				     rcv_buffer_alloc_failures);
-			return -ENOMEM;
-		}
-		nic->rb_page_offset = 0;
+		this_cpu_inc(nic->pnicvf->drv_stats->rcv_buffer_alloc_failures);
+		return -ENOMEM;
 	}
-
+	nic->rb_page_offset = 0;
 ret:
-	*rbuf = (u64 *)((u64)page_address(nic->rb_page) + nic->rb_page_offset);
+	/* HW will ensure data coherency, CPU sync not required */
+	*rbuf = (u64 *)((u64)dma_map_page_attrs(&nic->pdev->dev, nic->rb_page,
+						nic->rb_page_offset, buf_len,
+						DMA_FROM_DEVICE,
+						DMA_ATTR_SKIP_CPU_SYNC));
+	if (dma_mapping_error(&nic->pdev->dev, (dma_addr_t)*rbuf)) {
+		if (!nic->rb_page_offset)
+			__free_pages(nic->rb_page, order);
+		nic->rb_page = NULL;
+		return -ENOMEM;
+	}
 	nic->rb_page_offset += buf_len;
 
 	return 0;
@@ -158,16 +174,21 @@ static int  nicvf_init_rbdr(struct nicvf *nic, struct rbdr *rbdr,
 	rbdr->dma_size = buf_size;
 	rbdr->enable = true;
 	rbdr->thresh = RBDR_THRESH;
+	rbdr->head = 0;
+	rbdr->tail = 0;
 
 	nic->rb_page = NULL;
 	for (idx = 0; idx < ring_len; idx++) {
 		err = nicvf_alloc_rcv_buffer(nic, GFP_KERNEL, RCV_FRAG_LEN,
 					     &rbuf);
-		if (err)
+		if (err) {
+			/* To free already allocated and mapped ones */
+			rbdr->tail = idx - 1;
 			return err;
+		}
 
 		desc = GET_RBDR_DESC(rbdr, idx);
-		desc->buf_addr = virt_to_phys(rbuf) >> NICVF_RCV_BUF_ALIGN;
+		desc->buf_addr = (u64)rbuf >> NICVF_RCV_BUF_ALIGN;
 	}
 
 	nicvf_get_page(nic);
@@ -179,7 +200,7 @@ static int  nicvf_init_rbdr(struct nicvf *nic, struct rbdr *rbdr,
 static void nicvf_free_rbdr(struct nicvf *nic, struct rbdr *rbdr)
 {
 	int head, tail;
-	u64 buf_addr;
+	u64 buf_addr, phys_addr;
 	struct rbdr_entry_t *desc;
 
 	if (!rbdr)
@@ -192,18 +213,26 @@ static void nicvf_free_rbdr(struct nicvf *nic, struct rbdr *rbdr)
 	head = rbdr->head;
 	tail = rbdr->tail;
 
-	/* Free SKBs */
+	/* Release page references */
 	while (head != tail) {
 		desc = GET_RBDR_DESC(rbdr, head);
-		buf_addr = desc->buf_addr << NICVF_RCV_BUF_ALIGN;
-		put_page(virt_to_page(phys_to_virt(buf_addr)));
+		buf_addr = ((u64)desc->buf_addr) << NICVF_RCV_BUF_ALIGN;
+		phys_addr = nicvf_iova_to_phys(nic, buf_addr);
+		dma_unmap_page_attrs(&nic->pdev->dev, buf_addr, RCV_FRAG_LEN,
+				     DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
+		if (phys_addr)
+			put_page(virt_to_page(phys_to_virt(phys_addr)));
 		head++;
 		head &= (rbdr->dmem.q_len - 1);
 	}
-	/* Free SKB of tail desc */
+	/* Release buffer of tail desc */
 	desc = GET_RBDR_DESC(rbdr, tail);
-	buf_addr = desc->buf_addr << NICVF_RCV_BUF_ALIGN;
-	put_page(virt_to_page(phys_to_virt(buf_addr)));
+	buf_addr = ((u64)desc->buf_addr) << NICVF_RCV_BUF_ALIGN;
+	phys_addr = nicvf_iova_to_phys(nic, buf_addr);
+	dma_unmap_page_attrs(&nic->pdev->dev, buf_addr, RCV_FRAG_LEN,
+			     DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
+	if (phys_addr)
+		put_page(virt_to_page(phys_to_virt(phys_addr)));
 
 	/* Free RBDR ring */
 	nicvf_free_q_desc_mem(nic, &rbdr->dmem);
@@ -250,7 +279,7 @@ static void nicvf_refill_rbdr(struct nicvf *nic, gfp_t gfp)
 			break;
 
 		desc = GET_RBDR_DESC(rbdr, tail);
-		desc->buf_addr = virt_to_phys(rbuf) >> NICVF_RCV_BUF_ALIGN;
+		desc->buf_addr = (u64)rbuf >> NICVF_RCV_BUF_ALIGN;
 		refill_rb_cnt--;
 		new_rb++;
 	}
@@ -361,9 +390,29 @@ static int nicvf_init_snd_queue(struct nicvf *nic,
 	return 0;
 }
 
+void nicvf_unmap_sndq_buffers(struct nicvf *nic, struct snd_queue *sq,
+			      int hdr_sqe, u8 subdesc_cnt)
+{
+	u8 idx;
+	struct sq_gather_subdesc *gather;
+
+	/* Unmap DMA mapped skb data buffers */
+	for (idx = 0; idx < subdesc_cnt; idx++) {
+		hdr_sqe++;
+		hdr_sqe &= (sq->dmem.q_len - 1);
+		gather = (struct sq_gather_subdesc *)GET_SQ_DESC(sq, hdr_sqe);
+		/* HW will ensure data coherency, CPU sync not required */
+		dma_unmap_page_attrs(&nic->pdev->dev, gather->addr,
+				     gather->size, DMA_TO_DEVICE,
+				     DMA_ATTR_SKIP_CPU_SYNC);
+	}
+}
+
 static void nicvf_free_snd_queue(struct nicvf *nic, struct snd_queue *sq)
 {
 	struct sk_buff *skb;
+	struct sq_hdr_subdesc *hdr;
+	struct sq_hdr_subdesc *tso_sqe;
 
 	if (!sq)
 		return;
@@ -379,8 +428,22 @@ static void nicvf_free_snd_queue(struct nicvf *nic, struct snd_queue *sq)
 	smp_rmb();
 	while (sq->head != sq->tail) {
 		skb = (struct sk_buff *)sq->skbuff[sq->head];
-		if (skb)
-			dev_kfree_skb_any(skb);
+		if (!skb)
+			goto next;
+		hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, sq->head);
+		/* Check for dummy descriptor used for HW TSO offload on 88xx */
+		if (hdr->dont_send) {
+			/* Get actual TSO descriptors and unmap them */
+			tso_sqe =
+			 (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, hdr->rsvd2);
+			nicvf_unmap_sndq_buffers(nic, sq, hdr->rsvd2,
+						 tso_sqe->subdesc_cnt);
+		} else {
+			nicvf_unmap_sndq_buffers(nic, sq, sq->head,
+						 hdr->subdesc_cnt);
+		}
+		dev_kfree_skb_any(skb);
+next:
 		sq->head++;
 		sq->head &= (sq->dmem.q_len - 1);
 	}
@@ -882,6 +945,14 @@ static inline int nicvf_get_sq_desc(struct snd_queue *sq, int desc_cnt)
 	return qentry;
 }
 
+/* Rollback to previous tail pointer when descriptors not used */
+static inline void nicvf_rollback_sq_desc(struct snd_queue *sq,
+					  int qentry, int desc_cnt)
+{
+	sq->tail = qentry;
+	atomic_add(desc_cnt, &sq->free_cnt);
+}
+
 /* Free descriptor back to SQ for future use */
 void nicvf_put_sq_desc(struct snd_queue *sq, int desc_cnt)
 {
@@ -1207,8 +1278,9 @@ int nicvf_sq_append_skb(struct nicvf *nic, struct snd_queue *sq,
 			struct sk_buff *skb, u8 sq_num)
 {
 	int i, size;
-	int subdesc_cnt, tso_sqe = 0;
+	int subdesc_cnt, hdr_sqe = 0;
 	int qentry;
+	u64 dma_addr;
 
 	subdesc_cnt = nicvf_sq_subdesc_required(nic, skb);
 	if (subdesc_cnt > atomic_read(&sq->free_cnt))
@@ -1223,12 +1295,21 @@ int nicvf_sq_append_skb(struct nicvf *nic, struct snd_queue *sq,
 	/* Add SQ header subdesc */
 	nicvf_sq_add_hdr_subdesc(nic, sq, qentry, subdesc_cnt - 1,
 				 skb, skb->len);
-	tso_sqe = qentry;
+	hdr_sqe = qentry;
 
 	/* Add SQ gather subdescs */
 	qentry = nicvf_get_nxt_sqentry(sq, qentry);
 	size = skb_is_nonlinear(skb) ? skb_headlen(skb) : skb->len;
-	nicvf_sq_add_gather_subdesc(sq, qentry, size, virt_to_phys(skb->data));
+	/* HW will ensure data coherency, CPU sync not required */
+	dma_addr = dma_map_page_attrs(&nic->pdev->dev, virt_to_page(skb->data),
+				      offset_in_page(skb->data), size,
+				      DMA_TO_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
+	if (dma_mapping_error(&nic->pdev->dev, dma_addr)) {
+		nicvf_rollback_sq_desc(sq, qentry, subdesc_cnt);
+		return 0;
+	}
+
+	nicvf_sq_add_gather_subdesc(sq, qentry, size, dma_addr);
 
 	/* Check for scattered buffer */
 	if (!skb_is_nonlinear(skb))
@@ -1241,15 +1322,26 @@ int nicvf_sq_append_skb(struct nicvf *nic, struct snd_queue *sq,
 
 		qentry = nicvf_get_nxt_sqentry(sq, qentry);
 		size = skb_frag_size(frag);
-		nicvf_sq_add_gather_subdesc(sq, qentry, size,
-					    virt_to_phys(
-					    skb_frag_address(frag)));
+		dma_addr = dma_map_page_attrs(&nic->pdev->dev,
+					      skb_frag_page(frag),
+					      frag->page_offset, size,
+					      DMA_TO_DEVICE,
+					      DMA_ATTR_SKIP_CPU_SYNC);
+		if (dma_mapping_error(&nic->pdev->dev, dma_addr)) {
+			/* Free entire chain of mapped buffers
+			 * here 'i' = frags mapped + above mapped skb->data
+			 */
+			nicvf_unmap_sndq_buffers(nic, sq, hdr_sqe, i);
+			nicvf_rollback_sq_desc(sq, qentry, subdesc_cnt);
+			return 0;
+		}
+		nicvf_sq_add_gather_subdesc(sq, qentry, size, dma_addr);
 	}
 
 doorbell:
 	if (nic->t88 && skb_shinfo(skb)->gso_size) {
 		qentry = nicvf_get_nxt_sqentry(sq, qentry);
-		nicvf_sq_add_cqe_subdesc(sq, qentry, tso_sqe, skb);
+		nicvf_sq_add_cqe_subdesc(sq, qentry, hdr_sqe, skb);
 	}
 
 	nicvf_sq_doorbell(nic, skb, sq_num, subdesc_cnt);
@@ -1282,6 +1374,7 @@ struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic, struct cqe_rx_t *cqe_rx)
 	int offset;
 	u16 *rb_lens = NULL;
 	u64 *rb_ptrs = NULL;
+	u64 phys_addr;
 
 	rb_lens = (void *)cqe_rx + (3 * sizeof(u64));
 	/* Except 88xx pass1 on all other chips CQE_RX2_S is added to
@@ -1296,15 +1389,23 @@ struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic, struct cqe_rx_t *cqe_rx)
 	else
 		rb_ptrs = (void *)cqe_rx + (7 * sizeof(u64));
 
-	netdev_dbg(nic->netdev, "%s rb_cnt %d rb0_ptr %llx rb0_sz %d\n",
-		   __func__, cqe_rx->rb_cnt, cqe_rx->rb0_ptr, cqe_rx->rb0_sz);
-
 	for (frag = 0; frag < cqe_rx->rb_cnt; frag++) {
 		payload_len = rb_lens[frag_num(frag)];
+		phys_addr = nicvf_iova_to_phys(nic, *rb_ptrs);
+		if (!phys_addr) {
+			if (skb)
+				dev_kfree_skb_any(skb);
+			return NULL;
+		}
+
 		if (!frag) {
 			/* First fragment */
+			dma_unmap_page_attrs(&nic->pdev->dev,
+					     *rb_ptrs - cqe_rx->align_pad,
+					     RCV_FRAG_LEN, DMA_FROM_DEVICE,
+					     DMA_ATTR_SKIP_CPU_SYNC);
 			skb = nicvf_rb_ptr_to_skb(nic,
-						  *rb_ptrs - cqe_rx->align_pad,
+						  phys_addr - cqe_rx->align_pad,
 						  payload_len);
 			if (!skb)
 				return NULL;
@@ -1312,8 +1413,11 @@ struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic, struct cqe_rx_t *cqe_rx)
 			skb_put(skb, payload_len);
 		} else {
 			/* Add fragments */
-			page = virt_to_page(phys_to_virt(*rb_ptrs));
-			offset = phys_to_virt(*rb_ptrs) - page_address(page);
+			dma_unmap_page_attrs(&nic->pdev->dev, *rb_ptrs,
+					     RCV_FRAG_LEN, DMA_FROM_DEVICE,
+					     DMA_ATTR_SKIP_CPU_SYNC);
+			page = virt_to_page(phys_to_virt(phys_addr));
+			offset = phys_to_virt(phys_addr) - page_address(page);
 			skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
 					offset, payload_len, RCV_FRAG_LEN);
 		}
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
index 5cb84da99a2de5..10cb4b84625b14 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
@@ -87,7 +87,7 @@
 #define RCV_BUF_COUNT		(1ULL << (RBDR_SIZE + 13))
 #define MAX_RCV_BUF_COUNT	(1ULL << (RBDR_SIZE6 + 13))
 #define RBDR_THRESH		(RCV_BUF_COUNT / 2)
-#define DMA_BUFFER_LEN		2048 /* In multiples of 128bytes */
+#define DMA_BUFFER_LEN		1536 /* In multiples of 128bytes */
 #define RCV_FRAG_LEN	 (SKB_DATA_ALIGN(DMA_BUFFER_LEN + NET_SKB_PAD) + \
 			 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
 
@@ -301,6 +301,8 @@ struct queue_set {
 
 #define	CQ_ERR_MASK	(CQ_WR_FULL | CQ_WR_DISABLE | CQ_WR_FAULT)
 
+void nicvf_unmap_sndq_buffers(struct nicvf *nic, struct snd_queue *sq,
+			      int hdr_sqe, u8 subdesc_cnt);
 void nicvf_config_vlan_stripping(struct nicvf *nic,
 				 netdev_features_t features);
 int nicvf_set_qset_resources(struct nicvf *nic);

From 18de7ba95f6e5ab150e482618123d92ee2240dc0 Mon Sep 17 00:00:00 2001
From: Sunil Goutham <sgoutham@cavium.com>
Date: Tue, 7 Mar 2017 18:09:09 +0530
Subject: [PATCH 1059/1911] net: thunderx: Fix LMAC mode debug prints for
 QSGMII mode

When BGX/LMACs are in QSGMII mode, for some LMACs, mode info is
not being printed. This patch will fix that. With changes already
done to not do any sort of serdes 2 lane mapping config calculation
in kernel driver, we can get rid of this logic.

Signed-off-by: Sunil Goutham <sgoutham@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cavium/thunder/thunder_bgx.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
index 4c8e8cf730bbc2..9b8a53e138e7de 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
@@ -1011,12 +1011,6 @@ static void bgx_print_qlm_mode(struct bgx *bgx, u8 lmacid)
 			dev_info(dev, "%s: 40G_KR4\n", (char *)str);
 		break;
 	case BGX_MODE_QSGMII:
-		if ((lmacid == 0) &&
-		    (bgx_get_lane2sds_cfg(bgx, lmac) != lmacid))
-			return;
-		if ((lmacid == 2) &&
-		    (bgx_get_lane2sds_cfg(bgx, lmac) == lmacid))
-			return;
 		dev_info(dev, "%s: QSGMII\n", (char *)str);
 		break;
 	case BGX_MODE_RGMII:

From 78aacb6f6eeea3c581a29b4a50438d0bdf85ad0b Mon Sep 17 00:00:00 2001
From: Sunil Goutham <sgoutham@cavium.com>
Date: Tue, 7 Mar 2017 18:09:10 +0530
Subject: [PATCH 1060/1911] net: thunderx: Fix invalid mac addresses for node1
 interfaces

When booted with ACPI, random mac addresses are being
assigned to node1 interfaces due to mismatch of bgx_id
in BGX driver and ACPI tables.

This patch fixes this issue by setting maximum BGX devices
per node based on platform/soc instead of a macro. This
change will set the bgx_id appropriately.

Signed-off-by: Sunil Goutham <sgoutham@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/cavium/thunder/thunder_bgx.c | 58 ++++++++++++++-----
 .../net/ethernet/cavium/thunder/thunder_bgx.h |  1 -
 2 files changed, 45 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
index 9b8a53e138e7de..64a1095e4d1495 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
@@ -123,14 +123,44 @@ static int bgx_poll_reg(struct bgx *bgx, u8 lmac, u64 reg, u64 mask, bool zero)
 	return 1;
 }
 
+static int max_bgx_per_node;
+static void set_max_bgx_per_node(struct pci_dev *pdev)
+{
+	u16 sdevid;
+
+	if (max_bgx_per_node)
+		return;
+
+	pci_read_config_word(pdev, PCI_SUBSYSTEM_ID, &sdevid);
+	switch (sdevid) {
+	case PCI_SUBSYS_DEVID_81XX_BGX:
+		max_bgx_per_node = MAX_BGX_PER_CN81XX;
+		break;
+	case PCI_SUBSYS_DEVID_83XX_BGX:
+		max_bgx_per_node = MAX_BGX_PER_CN83XX;
+		break;
+	case PCI_SUBSYS_DEVID_88XX_BGX:
+	default:
+		max_bgx_per_node = MAX_BGX_PER_CN88XX;
+		break;
+	}
+}
+
+static struct bgx *get_bgx(int node, int bgx_idx)
+{
+	int idx = (node * max_bgx_per_node) + bgx_idx;
+
+	return bgx_vnic[idx];
+}
+
 /* Return number of BGX present in HW */
 unsigned bgx_get_map(int node)
 {
 	int i;
 	unsigned map = 0;
 
-	for (i = 0; i < MAX_BGX_PER_NODE; i++) {
-		if (bgx_vnic[(node * MAX_BGX_PER_NODE) + i])
+	for (i = 0; i < max_bgx_per_node; i++) {
+		if (bgx_vnic[(node * max_bgx_per_node) + i])
 			map |= (1 << i);
 	}
 
@@ -143,7 +173,7 @@ int bgx_get_lmac_count(int node, int bgx_idx)
 {
 	struct bgx *bgx;
 
-	bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
+	bgx = get_bgx(node, bgx_idx);
 	if (bgx)
 		return bgx->lmac_count;
 
@@ -158,7 +188,7 @@ void bgx_get_lmac_link_state(int node, int bgx_idx, int lmacid, void *status)
 	struct bgx *bgx;
 	struct lmac *lmac;
 
-	bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
+	bgx = get_bgx(node, bgx_idx);
 	if (!bgx)
 		return;
 
@@ -172,7 +202,7 @@ EXPORT_SYMBOL(bgx_get_lmac_link_state);
 
 const u8 *bgx_get_lmac_mac(int node, int bgx_idx, int lmacid)
 {
-	struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
+	struct bgx *bgx = get_bgx(node, bgx_idx);
 
 	if (bgx)
 		return bgx->lmac[lmacid].mac;
@@ -183,7 +213,7 @@ EXPORT_SYMBOL(bgx_get_lmac_mac);
 
 void bgx_set_lmac_mac(int node, int bgx_idx, int lmacid, const u8 *mac)
 {
-	struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
+	struct bgx *bgx = get_bgx(node, bgx_idx);
 
 	if (!bgx)
 		return;
@@ -194,7 +224,7 @@ EXPORT_SYMBOL(bgx_set_lmac_mac);
 
 void bgx_lmac_rx_tx_enable(int node, int bgx_idx, int lmacid, bool enable)
 {
-	struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
+	struct bgx *bgx = get_bgx(node, bgx_idx);
 	struct lmac *lmac;
 	u64 cfg;
 
@@ -217,7 +247,7 @@ EXPORT_SYMBOL(bgx_lmac_rx_tx_enable);
 void bgx_lmac_get_pfc(int node, int bgx_idx, int lmacid, void *pause)
 {
 	struct pfc *pfc = (struct pfc *)pause;
-	struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx];
+	struct bgx *bgx = get_bgx(node, bgx_idx);
 	struct lmac *lmac;
 	u64 cfg;
 
@@ -237,7 +267,7 @@ EXPORT_SYMBOL(bgx_lmac_get_pfc);
 void bgx_lmac_set_pfc(int node, int bgx_idx, int lmacid, void *pause)
 {
 	struct pfc *pfc = (struct pfc *)pause;
-	struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx];
+	struct bgx *bgx = get_bgx(node, bgx_idx);
 	struct lmac *lmac;
 	u64 cfg;
 
@@ -369,7 +399,7 @@ u64 bgx_get_rx_stats(int node, int bgx_idx, int lmac, int idx)
 {
 	struct bgx *bgx;
 
-	bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
+	bgx = get_bgx(node, bgx_idx);
 	if (!bgx)
 		return 0;
 
@@ -383,7 +413,7 @@ u64 bgx_get_tx_stats(int node, int bgx_idx, int lmac, int idx)
 {
 	struct bgx *bgx;
 
-	bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
+	bgx = get_bgx(node, bgx_idx);
 	if (!bgx)
 		return 0;
 
@@ -411,7 +441,7 @@ void bgx_lmac_internal_loopback(int node, int bgx_idx,
 	struct lmac *lmac;
 	u64    cfg;
 
-	bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
+	bgx = get_bgx(node, bgx_idx);
 	if (!bgx)
 		return;
 
@@ -1328,11 +1358,13 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		goto err_release_regions;
 	}
 
+	set_max_bgx_per_node(pdev);
+
 	pci_read_config_word(pdev, PCI_DEVICE_ID, &sdevid);
 	if (sdevid != PCI_DEVICE_ID_THUNDER_RGX) {
 		bgx->bgx_id = (pci_resource_start(pdev,
 			PCI_CFG_REG_BAR_NUM) >> 24) & BGX_ID_MASK;
-		bgx->bgx_id += nic_get_node_id(pdev) * MAX_BGX_PER_NODE;
+		bgx->bgx_id += nic_get_node_id(pdev) * max_bgx_per_node;
 		bgx->max_lmac = MAX_LMAC_PER_BGX;
 		bgx_vnic[bgx->bgx_id] = bgx;
 	} else {
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
index a60f189429bb65..c5080f2cead5d0 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
@@ -22,7 +22,6 @@
 #define    MAX_BGX_PER_CN88XX			2
 #define    MAX_BGX_PER_CN81XX			3 /* 2 BGXs + 1 RGX */
 #define    MAX_BGX_PER_CN83XX			4
-#define    MAX_BGX_PER_NODE			4
 #define    MAX_LMAC_PER_BGX			4
 #define    MAX_BGX_CHANS_PER_LMAC		16
 #define    MAX_DMAC_PER_LMAC			8

From 36fa35d22bffc78c85b5e68adbdd99e914bec764 Mon Sep 17 00:00:00 2001
From: Thanneeru Srinivasulu <tsrinivasulu@cavium.com>
Date: Tue, 7 Mar 2017 18:09:11 +0530
Subject: [PATCH 1061/1911] net: thunderx: Allow IPv6 frames with zero UDP
 checksum

Do not consider IPv6 frames with zero UDP checksum as frames
with bad checksum and drop them.

Signed-off-by: Thanneeru Srinivasulu <tsrinivasulu@cavium.com>
Signed-off-by: Sunil Goutham <sgoutham@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cavium/thunder/nicvf_queues.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
index b1962dbd04093b..f13289f0d2386d 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
@@ -622,9 +622,11 @@ static void nicvf_rcv_queue_config(struct nicvf *nic, struct queue_set *qs,
 	nicvf_send_msg_to_pf(nic, &mbx);
 
 	if (!nic->sqs_mode && (qidx == 0)) {
-		/* Enable checking L3/L4 length and TCP/UDP checksums */
+		/* Enable checking L3/L4 length and TCP/UDP checksums
+		 * Also allow IPv6 pkts with zero UDP checksum.
+		 */
 		nicvf_queue_reg_write(nic, NIC_QSET_RQ_GEN_CFG, 0,
-				      (BIT(24) | BIT(23) | BIT(21)));
+				      (BIT(24) | BIT(23) | BIT(21) | BIT(20)));
 		nicvf_config_vlan_stripping(nic, nic->netdev->features);
 	}
 

From 8aad6f14c09d78862fc04e47214d398add9bb8ce Mon Sep 17 00:00:00 2001
From: "robert.foss@collabora.com" <robert.foss@collabora.com>
Date: Tue, 7 Mar 2017 11:46:25 -0500
Subject: [PATCH 1062/1911] qed: Fix copy of uninitialized memory

In qed_ll2_start_ooo() the ll2_info variable is uninitialized and then
passed to qed_ll2_acquire_connection() where it is copied into a new
memory space.

This shouldn't cause any issue as long as non of the copied memory is
every read.
But the potential for a bug being introduced by reading this memory
is real.

Detected by CoverityScan, CID#1399632 ("Uninitialized scalar variable")

Signed-off-by: Robert Foss <robert.foss@collabora.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_ll2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
index 9a0b9af10a572f..5fb34db377c8c3 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
@@ -968,7 +968,7 @@ static int qed_ll2_start_ooo(struct qed_dev *cdev,
 {
 	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
 	u8 *handle = &hwfn->pf_params.iscsi_pf_params.ll2_ooo_queue_id;
-	struct qed_ll2_conn ll2_info;
+	struct qed_ll2_conn ll2_info = { 0 };
 	int rc;
 
 	ll2_info.conn_type = QED_LL2_TYPE_ISCSI_OOO;

From 294acf1c01bace5cea5d30b510504238bf5f7c25 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Tue, 7 Mar 2017 18:33:31 +0100
Subject: [PATCH 1063/1911] net/tunnel: set inner protocol in network gro hooks

The gso code of several tunnels type (gre and udp tunnels)
takes for granted that the skb->inner_protocol is properly
initialized and drops the packet elsewhere.

On the forwarding path no one is initializing such field,
so gro encapsulated packets are dropped on forward.

Since commit 38720352412a ("gre: Use inner_proto to obtain
inner header protocol"), this can be reproduced when the
encapsulated packets use gre as the tunneling protocol.

The issue happens also with vxlan and geneve tunnels since
commit 8bce6d7d0d1e ("udp: Generalize skb_udp_segment"), if the
forwarding host's ingress nic has h/w offload for such tunnel
and a vxlan/geneve device is configured on top of it, regardless
of the configured peer address and vni.

To address the issue, this change initialize the inner_protocol
field for encapsulated packets in both ipv4 and ipv6 gro complete
callbacks.

Fixes: 38720352412a ("gre: Use inner_proto to obtain inner header protocol")
Fixes: 8bce6d7d0d1e ("udp: Generalize skb_udp_segment")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Acked-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/af_inet.c     | 4 +++-
 net/ipv6/ip6_offload.c | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 602d40f43687c9..5091f46826fadb 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1487,8 +1487,10 @@ int inet_gro_complete(struct sk_buff *skb, int nhoff)
 	int proto = iph->protocol;
 	int err = -ENOSYS;
 
-	if (skb->encapsulation)
+	if (skb->encapsulation) {
+		skb_set_inner_protocol(skb, cpu_to_be16(ETH_P_IP));
 		skb_set_inner_network_header(skb, nhoff);
+	}
 
 	csum_replace2(&iph->check, iph->tot_len, newlen);
 	iph->tot_len = newlen;
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index 0838e6d01d2e49..93e58a5e18374b 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -294,8 +294,10 @@ static int ipv6_gro_complete(struct sk_buff *skb, int nhoff)
 	struct ipv6hdr *iph = (struct ipv6hdr *)(skb->data + nhoff);
 	int err = -ENOSYS;
 
-	if (skb->encapsulation)
+	if (skb->encapsulation) {
+		skb_set_inner_protocol(skb, cpu_to_be16(ETH_P_IPV6));
 		skb_set_inner_network_header(skb, nhoff);
+	}
 
 	iph->payload_len = htons(skb->len - nhoff - sizeof(*iph));
 

From 745cb7f8a5de0805cade3de3991b7a95317c7c73 Mon Sep 17 00:00:00 2001
From: "Dmitry V. Levin" <ldv@altlinux.org>
Date: Tue, 7 Mar 2017 23:50:50 +0300
Subject: [PATCH 1064/1911] uapi: fix linux/packet_diag.h userspace compilation
 error

Replace MAX_ADDR_LEN with its numeric value to fix the following
linux/packet_diag.h userspace compilation error:

/usr/include/linux/packet_diag.h:67:17: error: 'MAX_ADDR_LEN' undeclared here (not in a function)
  __u8 pdmc_addr[MAX_ADDR_LEN];

This is not the first case in the UAPI where the numeric value
of MAX_ADDR_LEN is used instead of symbolic one, uapi/linux/if_link.h
already does the same:

$ grep MAX_ADDR_LEN include/uapi/linux/if_link.h
	__u8 mac[32]; /* MAX_ADDR_LEN */

There are no UAPI headers besides these two that use MAX_ADDR_LEN.

Signed-off-by: Dmitry V. Levin <ldv@altlinux.org>
Acked-by: Pavel Emelyanov <xemul@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/packet_diag.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/uapi/linux/packet_diag.h b/include/uapi/linux/packet_diag.h
index d08c63f3dd6ff4..0c5d5dd61b6ab1 100644
--- a/include/uapi/linux/packet_diag.h
+++ b/include/uapi/linux/packet_diag.h
@@ -64,7 +64,7 @@ struct packet_diag_mclist {
 	__u32	pdmc_count;
 	__u16	pdmc_type;
 	__u16	pdmc_alen;
-	__u8	pdmc_addr[MAX_ADDR_LEN];
+	__u8	pdmc_addr[32]; /* MAX_ADDR_LEN */
 };
 
 struct packet_diag_ring {

From 9f691549f76d488a0c74397b3e51e943865ea01f Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@fb.com>
Date: Tue, 7 Mar 2017 20:00:12 -0800
Subject: [PATCH 1065/1911] bpf: fix struct htab_elem layout

when htab_elem is removed from the bucket list the htab_elem.hash_node.next
field should not be overridden too early otherwise we have a tiny race window
between lookup and delete.
The bug was discovered by manual code analysis and reproducible
only with explicit udelay() in lookup_elem_raw().

Fixes: 6c9059817432 ("bpf: pre-allocate hash map elements")
Reported-by: Jonathan Perry <jonperry@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/bpf/hashtab.c | 25 ++++++++++++++++++++-----
 1 file changed, 20 insertions(+), 5 deletions(-)

diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 3ea87fb19a9416..63c86a7be2a1de 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -45,8 +45,13 @@ enum extra_elem_state {
 struct htab_elem {
 	union {
 		struct hlist_node hash_node;
-		struct bpf_htab *htab;
-		struct pcpu_freelist_node fnode;
+		struct {
+			void *padding;
+			union {
+				struct bpf_htab *htab;
+				struct pcpu_freelist_node fnode;
+			};
+		};
 	};
 	union {
 		struct rcu_head rcu;
@@ -162,7 +167,8 @@ static int prealloc_init(struct bpf_htab *htab)
 				 offsetof(struct htab_elem, lru_node),
 				 htab->elem_size, htab->map.max_entries);
 	else
-		pcpu_freelist_populate(&htab->freelist, htab->elems,
+		pcpu_freelist_populate(&htab->freelist,
+				       htab->elems + offsetof(struct htab_elem, fnode),
 				       htab->elem_size, htab->map.max_entries);
 
 	return 0;
@@ -217,6 +223,11 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
 	int err, i;
 	u64 cost;
 
+	BUILD_BUG_ON(offsetof(struct htab_elem, htab) !=
+		     offsetof(struct htab_elem, hash_node.pprev));
+	BUILD_BUG_ON(offsetof(struct htab_elem, fnode.next) !=
+		     offsetof(struct htab_elem, hash_node.pprev));
+
 	if (lru && !capable(CAP_SYS_ADMIN))
 		/* LRU implementation is much complicated than other
 		 * maps.  Hence, limit to CAP_SYS_ADMIN for now.
@@ -582,9 +593,13 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
 	int err = 0;
 
 	if (prealloc) {
-		l_new = (struct htab_elem *)pcpu_freelist_pop(&htab->freelist);
-		if (!l_new)
+		struct pcpu_freelist_node *l;
+
+		l = pcpu_freelist_pop(&htab->freelist);
+		if (!l)
 			err = -E2BIG;
+		else
+			l_new = container_of(l, struct htab_elem, fnode);
 	} else {
 		if (atomic_inc_return(&htab->count) > htab->map.max_entries) {
 			atomic_dec(&htab->count);

From 4fe8435909fddc97b81472026aa954e06dd192a5 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@fb.com>
Date: Tue, 7 Mar 2017 20:00:13 -0800
Subject: [PATCH 1066/1911] bpf: convert htab map to hlist_nulls

when all map elements are pre-allocated one cpu can delete and reuse htab_elem
while another cpu is still walking the hlist. In such case the lookup may
miss the element. Convert hlist to hlist_nulls to avoid such scenario.
When bucket lock is taken there is no need to take such precautions,
so only convert map_lookup and map_get_next to nulls.
The race window is extremely small and only reproducible with explicit
udelay() inside lookup_nulls_elem_raw()

Similar to hlist add hlist_nulls_for_each_entry_safe() and
hlist_nulls_entry_safe() helpers.

Fixes: 6c9059817432 ("bpf: pre-allocate hash map elements")
Reported-by: Jonathan Perry <jonperry@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/list_nulls.h    |  5 ++
 include/linux/rculist_nulls.h | 14 ++++++
 kernel/bpf/hashtab.c          | 94 ++++++++++++++++++++++-------------
 3 files changed, 79 insertions(+), 34 deletions(-)

diff --git a/include/linux/list_nulls.h b/include/linux/list_nulls.h
index b01fe100908430..87ff4f58a2f018 100644
--- a/include/linux/list_nulls.h
+++ b/include/linux/list_nulls.h
@@ -29,6 +29,11 @@ struct hlist_nulls_node {
 	((ptr)->first = (struct hlist_nulls_node *) NULLS_MARKER(nulls))
 
 #define hlist_nulls_entry(ptr, type, member) container_of(ptr,type,member)
+
+#define hlist_nulls_entry_safe(ptr, type, member) \
+	({ typeof(ptr) ____ptr = (ptr); \
+	   !is_a_nulls(____ptr) ? hlist_nulls_entry(____ptr, type, member) : NULL; \
+	})
 /**
  * ptr_is_a_nulls - Test if a ptr is a nulls
  * @ptr: ptr to be tested
diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h
index 4ae95f7e8597b0..a23a3315318048 100644
--- a/include/linux/rculist_nulls.h
+++ b/include/linux/rculist_nulls.h
@@ -156,5 +156,19 @@ static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n,
 		({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1; }); \
 		pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos)))
 
+/**
+ * hlist_nulls_for_each_entry_safe -
+ *   iterate over list of given type safe against removal of list entry
+ * @tpos:	the type * to use as a loop cursor.
+ * @pos:	the &struct hlist_nulls_node to use as a loop cursor.
+ * @head:	the head for your list.
+ * @member:	the name of the hlist_nulls_node within the struct.
+ */
+#define hlist_nulls_for_each_entry_safe(tpos, pos, head, member)		\
+	for (({barrier();}),							\
+	     pos = rcu_dereference_raw(hlist_nulls_first_rcu(head));		\
+		(!is_a_nulls(pos)) &&						\
+		({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member);	\
+		   pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos)); 1; });)
 #endif
 #endif
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 63c86a7be2a1de..afe5bab376c981 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -13,11 +13,12 @@
 #include <linux/bpf.h>
 #include <linux/jhash.h>
 #include <linux/filter.h>
+#include <linux/rculist_nulls.h>
 #include "percpu_freelist.h"
 #include "bpf_lru_list.h"
 
 struct bucket {
-	struct hlist_head head;
+	struct hlist_nulls_head head;
 	raw_spinlock_t lock;
 };
 
@@ -44,7 +45,7 @@ enum extra_elem_state {
 /* each htab element is struct htab_elem + key + value */
 struct htab_elem {
 	union {
-		struct hlist_node hash_node;
+		struct hlist_nulls_node hash_node;
 		struct {
 			void *padding;
 			union {
@@ -337,7 +338,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
 		goto free_htab;
 
 	for (i = 0; i < htab->n_buckets; i++) {
-		INIT_HLIST_HEAD(&htab->buckets[i].head);
+		INIT_HLIST_NULLS_HEAD(&htab->buckets[i].head, i);
 		raw_spin_lock_init(&htab->buckets[i].lock);
 	}
 
@@ -377,28 +378,52 @@ static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash)
 	return &htab->buckets[hash & (htab->n_buckets - 1)];
 }
 
-static inline struct hlist_head *select_bucket(struct bpf_htab *htab, u32 hash)
+static inline struct hlist_nulls_head *select_bucket(struct bpf_htab *htab, u32 hash)
 {
 	return &__select_bucket(htab, hash)->head;
 }
 
-static struct htab_elem *lookup_elem_raw(struct hlist_head *head, u32 hash,
+/* this lookup function can only be called with bucket lock taken */
+static struct htab_elem *lookup_elem_raw(struct hlist_nulls_head *head, u32 hash,
 					 void *key, u32 key_size)
 {
+	struct hlist_nulls_node *n;
 	struct htab_elem *l;
 
-	hlist_for_each_entry_rcu(l, head, hash_node)
+	hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
 		if (l->hash == hash && !memcmp(&l->key, key, key_size))
 			return l;
 
 	return NULL;
 }
 
+/* can be called without bucket lock. it will repeat the loop in
+ * the unlikely event when elements moved from one bucket into another
+ * while link list is being walked
+ */
+static struct htab_elem *lookup_nulls_elem_raw(struct hlist_nulls_head *head,
+					       u32 hash, void *key,
+					       u32 key_size, u32 n_buckets)
+{
+	struct hlist_nulls_node *n;
+	struct htab_elem *l;
+
+again:
+	hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
+		if (l->hash == hash && !memcmp(&l->key, key, key_size))
+			return l;
+
+	if (unlikely(get_nulls_value(n) != (hash & (n_buckets - 1))))
+		goto again;
+
+	return NULL;
+}
+
 /* Called from syscall or from eBPF program */
 static void *__htab_map_lookup_elem(struct bpf_map *map, void *key)
 {
 	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
-	struct hlist_head *head;
+	struct hlist_nulls_head *head;
 	struct htab_elem *l;
 	u32 hash, key_size;
 
@@ -411,7 +436,7 @@ static void *__htab_map_lookup_elem(struct bpf_map *map, void *key)
 
 	head = select_bucket(htab, hash);
 
-	l = lookup_elem_raw(head, hash, key, key_size);
+	l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets);
 
 	return l;
 }
@@ -444,8 +469,9 @@ static void *htab_lru_map_lookup_elem(struct bpf_map *map, void *key)
 static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node)
 {
 	struct bpf_htab *htab = (struct bpf_htab *)arg;
-	struct htab_elem *l, *tgt_l;
-	struct hlist_head *head;
+	struct htab_elem *l = NULL, *tgt_l;
+	struct hlist_nulls_head *head;
+	struct hlist_nulls_node *n;
 	unsigned long flags;
 	struct bucket *b;
 
@@ -455,9 +481,9 @@ static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node)
 
 	raw_spin_lock_irqsave(&b->lock, flags);
 
-	hlist_for_each_entry_rcu(l, head, hash_node)
+	hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
 		if (l == tgt_l) {
-			hlist_del_rcu(&l->hash_node);
+			hlist_nulls_del_rcu(&l->hash_node);
 			break;
 		}
 
@@ -470,7 +496,7 @@ static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node)
 static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
 {
 	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
-	struct hlist_head *head;
+	struct hlist_nulls_head *head;
 	struct htab_elem *l, *next_l;
 	u32 hash, key_size;
 	int i;
@@ -484,7 +510,7 @@ static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
 	head = select_bucket(htab, hash);
 
 	/* lookup the key */
-	l = lookup_elem_raw(head, hash, key, key_size);
+	l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets);
 
 	if (!l) {
 		i = 0;
@@ -492,7 +518,7 @@ static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
 	}
 
 	/* key was found, get next key in the same bucket */
-	next_l = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(&l->hash_node)),
+	next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_next_rcu(&l->hash_node)),
 				  struct htab_elem, hash_node);
 
 	if (next_l) {
@@ -511,7 +537,7 @@ static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
 		head = select_bucket(htab, i);
 
 		/* pick first element in the bucket */
-		next_l = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)),
+		next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_first_rcu(head)),
 					  struct htab_elem, hash_node);
 		if (next_l) {
 			/* if it's not empty, just return it */
@@ -676,7 +702,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
 {
 	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
 	struct htab_elem *l_new = NULL, *l_old;
-	struct hlist_head *head;
+	struct hlist_nulls_head *head;
 	unsigned long flags;
 	struct bucket *b;
 	u32 key_size, hash;
@@ -715,9 +741,9 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
 	/* add new element to the head of the list, so that
 	 * concurrent search will find it before old elem
 	 */
-	hlist_add_head_rcu(&l_new->hash_node, head);
+	hlist_nulls_add_head_rcu(&l_new->hash_node, head);
 	if (l_old) {
-		hlist_del_rcu(&l_old->hash_node);
+		hlist_nulls_del_rcu(&l_old->hash_node);
 		free_htab_elem(htab, l_old);
 	}
 	ret = 0;
@@ -731,7 +757,7 @@ static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value,
 {
 	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
 	struct htab_elem *l_new, *l_old = NULL;
-	struct hlist_head *head;
+	struct hlist_nulls_head *head;
 	unsigned long flags;
 	struct bucket *b;
 	u32 key_size, hash;
@@ -772,10 +798,10 @@ static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value,
 	/* add new element to the head of the list, so that
 	 * concurrent search will find it before old elem
 	 */
-	hlist_add_head_rcu(&l_new->hash_node, head);
+	hlist_nulls_add_head_rcu(&l_new->hash_node, head);
 	if (l_old) {
 		bpf_lru_node_set_ref(&l_new->lru_node);
-		hlist_del_rcu(&l_old->hash_node);
+		hlist_nulls_del_rcu(&l_old->hash_node);
 	}
 	ret = 0;
 
@@ -796,7 +822,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
 {
 	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
 	struct htab_elem *l_new = NULL, *l_old;
-	struct hlist_head *head;
+	struct hlist_nulls_head *head;
 	unsigned long flags;
 	struct bucket *b;
 	u32 key_size, hash;
@@ -835,7 +861,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
 			ret = PTR_ERR(l_new);
 			goto err;
 		}
-		hlist_add_head_rcu(&l_new->hash_node, head);
+		hlist_nulls_add_head_rcu(&l_new->hash_node, head);
 	}
 	ret = 0;
 err:
@@ -849,7 +875,7 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
 {
 	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
 	struct htab_elem *l_new = NULL, *l_old;
-	struct hlist_head *head;
+	struct hlist_nulls_head *head;
 	unsigned long flags;
 	struct bucket *b;
 	u32 key_size, hash;
@@ -897,7 +923,7 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
 	} else {
 		pcpu_copy_value(htab, htab_elem_get_ptr(l_new, key_size),
 				value, onallcpus);
-		hlist_add_head_rcu(&l_new->hash_node, head);
+		hlist_nulls_add_head_rcu(&l_new->hash_node, head);
 		l_new = NULL;
 	}
 	ret = 0;
@@ -925,7 +951,7 @@ static int htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
 static int htab_map_delete_elem(struct bpf_map *map, void *key)
 {
 	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
-	struct hlist_head *head;
+	struct hlist_nulls_head *head;
 	struct bucket *b;
 	struct htab_elem *l;
 	unsigned long flags;
@@ -945,7 +971,7 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key)
 	l = lookup_elem_raw(head, hash, key, key_size);
 
 	if (l) {
-		hlist_del_rcu(&l->hash_node);
+		hlist_nulls_del_rcu(&l->hash_node);
 		free_htab_elem(htab, l);
 		ret = 0;
 	}
@@ -957,7 +983,7 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key)
 static int htab_lru_map_delete_elem(struct bpf_map *map, void *key)
 {
 	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
-	struct hlist_head *head;
+	struct hlist_nulls_head *head;
 	struct bucket *b;
 	struct htab_elem *l;
 	unsigned long flags;
@@ -977,7 +1003,7 @@ static int htab_lru_map_delete_elem(struct bpf_map *map, void *key)
 	l = lookup_elem_raw(head, hash, key, key_size);
 
 	if (l) {
-		hlist_del_rcu(&l->hash_node);
+		hlist_nulls_del_rcu(&l->hash_node);
 		ret = 0;
 	}
 
@@ -992,12 +1018,12 @@ static void delete_all_elements(struct bpf_htab *htab)
 	int i;
 
 	for (i = 0; i < htab->n_buckets; i++) {
-		struct hlist_head *head = select_bucket(htab, i);
-		struct hlist_node *n;
+		struct hlist_nulls_head *head = select_bucket(htab, i);
+		struct hlist_nulls_node *n;
 		struct htab_elem *l;
 
-		hlist_for_each_entry_safe(l, n, head, hash_node) {
-			hlist_del_rcu(&l->hash_node);
+		hlist_nulls_for_each_entry_safe(l, n, head, hash_node) {
+			hlist_nulls_del_rcu(&l->hash_node);
 			if (l->state != HTAB_EXTRA_ELEM_USED)
 				htab_elem_free(htab, l);
 		}

From 834e0f8ae4e104fa026ffe5cdee58491f3078403 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Wed, 8 Mar 2017 17:40:17 -0500
Subject: [PATCH 1067/1911] drm/amdgpu: validate paramaters in the gem ioctl
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reject it if there are any invalid flags or domains.

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 51d75946338460..106cf83c2e6b46 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -202,6 +202,27 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
 	bool kernel = false;
 	int r;
 
+	/* reject invalid gem flags */
+	if (args->in.domain_flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
+				      AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
+				      AMDGPU_GEM_CREATE_CPU_GTT_USWC |
+				      AMDGPU_GEM_CREATE_VRAM_CLEARED|
+				      AMDGPU_GEM_CREATE_SHADOW |
+				      AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) {
+		r = -EINVAL;
+		goto error_unlock;
+	}
+	/* reject invalid gem domains */
+	if (args->in.domains & ~(AMDGPU_GEM_DOMAIN_CPU |
+				 AMDGPU_GEM_DOMAIN_GTT |
+				 AMDGPU_GEM_DOMAIN_VRAM |
+				 AMDGPU_GEM_DOMAIN_GDS |
+				 AMDGPU_GEM_DOMAIN_GWS |
+				 AMDGPU_GEM_DOMAIN_OA)) {
+		r = -EINVAL;
+		goto error_unlock;
+	}
+
 	/* create a gem object to contain this object in */
 	if (args->in.domains & (AMDGPU_GEM_DOMAIN_GDS |
 	    AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA)) {

From a5b11dac1f57c4b327c2d6eccb8fdd01499f9e17 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Wed, 8 Mar 2017 17:23:21 -0500
Subject: [PATCH 1068/1911] drm/amdgpu: bump driver version for some new
 features
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We added new gem ioctl flags and the new fences ioctl, but forgot
to bump the version.

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 75fc376ba73587..f7adbace428a49 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -59,9 +59,10 @@
  * - 3.7.0 - Add support for VCE clock list packet
  * - 3.8.0 - Add support raster config init in the kernel
  * - 3.9.0 - Add support for memory query info about VRAM and GTT.
+ * - 3.10.0 - Add support for new fences ioctl, new gem ioctl flags
  */
 #define KMS_DRIVER_MAJOR	3
-#define KMS_DRIVER_MINOR	9
+#define KMS_DRIVER_MINOR	10
 #define KMS_DRIVER_PATCHLEVEL	0
 
 int amdgpu_vram_limit = 0;

From 8bd49ac86677ca43d64f08c45864e438283d6a76 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 6 Mar 2017 09:57:17 +0100
Subject: [PATCH 1069/1911] s390: wire up statx system call

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/uapi/asm/unistd.h | 4 +++-
 arch/s390/kernel/compat_wrapper.c   | 1 +
 arch/s390/kernel/syscalls.S         | 2 ++
 3 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h
index 4384bc797a54f9..152de9b796e149 100644
--- a/arch/s390/include/uapi/asm/unistd.h
+++ b/arch/s390/include/uapi/asm/unistd.h
@@ -313,7 +313,9 @@
 #define __NR_copy_file_range	375
 #define __NR_preadv2		376
 #define __NR_pwritev2		377
-#define NR_syscalls 378
+/* Number 378 is reserved for guarded storage */
+#define __NR_statx		379
+#define NR_syscalls 380
 
 /* 
  * There are some system calls that are not present on 64 bit, some
diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c
index ae2cda5eee5a99..e89cc2e71db169 100644
--- a/arch/s390/kernel/compat_wrapper.c
+++ b/arch/s390/kernel/compat_wrapper.c
@@ -178,3 +178,4 @@ COMPAT_SYSCALL_WRAP3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
 COMPAT_SYSCALL_WRAP6(sendto, int, fd, void __user *, buff, size_t, len, unsigned int, flags, struct sockaddr __user *, addr, int, addr_len);
 COMPAT_SYSCALL_WRAP3(mlock2, unsigned long, start, size_t, len, int, flags);
 COMPAT_SYSCALL_WRAP6(copy_file_range, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, size_t, len, unsigned int, flags);
+COMPAT_SYSCALL_WRAP5(statx, int, dfd, const char __user *, path, unsigned, flags, unsigned, mask, struct statx __user *, buffer);
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index 9b59e6212d8fd2..2659b5cfeddba4 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -386,3 +386,5 @@ SYSCALL(sys_mlock2,compat_sys_mlock2)
 SYSCALL(sys_copy_file_range,compat_sys_copy_file_range) /* 375 */
 SYSCALL(sys_preadv2,compat_sys_preadv2)
 SYSCALL(sys_pwritev2,compat_sys_pwritev2)
+NI_SYSCALL
+SYSCALL(sys_statx,compat_sys_statx)

From 6bbc4a4144b1a69743022ac68dfaf6e7d993abb9 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Thu, 9 Mar 2017 16:16:28 -0800
Subject: [PATCH 1070/1911] userfaultfd: shmem: __do_fault requires
 VM_FAULT_NOPAGE

__do_fault assumes vmf->page has been initialized and is valid if
VM_FAULT_NOPAGE is not returned by vma->vm_ops->fault(vma, vmf).

handle_userfault() in turn should return VM_FAULT_NOPAGE if it doesn't
return VM_FAULT_SIGBUS or VM_FAULT_RETRY (the other two possibilities).

This VM_FAULT_NOPAGE case is only invoked when signal are pending and it
didn't matter for anonymous memory before.  It only started to matter
since shmem was introduced.  hugetlbfs also takes a different path and
doesn't exercise __do_fault.

Link: http://lkml.kernel.org/r/20170228154201.GH5816@redhat.com
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Cc: "Kirill A. Shutemov" <kirill@shutemov.name>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/userfaultfd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 973607df579db3..f62199b90fd01c 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -490,7 +490,7 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
 			 * in such case.
 			 */
 			down_read(&mm->mmap_sem);
-			ret = 0;
+			ret = VM_FAULT_NOPAGE;
 		}
 	}
 

From 8a1115ff6b6d90cf1066ec3a0c4e51276553eebe Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Thu, 9 Mar 2017 16:16:31 -0800
Subject: [PATCH 1071/1911] scripts/spelling.txt: add "disble(d)" pattern and
 fix typo instances

Fix typos and add the following to the scripts/spelling.txt:

  disble||disable
  disbled||disabled

I kept the TSL2563_INT_DISBLED in /drivers/iio/light/tsl2563.c
untouched.  The macro is not referenced at all, but this commit is
touching only comment blocks just in case.

Link: http://lkml.kernel.org/r/1481573103-11329-20-git-send-email-yamada.masahiro@socionext.com
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/dev-tools/kcov.rst        | 2 +-
 arch/cris/arch-v32/drivers/cryptocop.c  | 2 +-
 arch/x86/kernel/ftrace.c                | 2 +-
 drivers/crypto/ux500/cryp/cryp.c        | 2 +-
 drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c  | 2 +-
 drivers/hv/channel.c                    | 2 +-
 drivers/isdn/hisax/st5481_b.c           | 2 +-
 drivers/mtd/spi-nor/spi-nor.c           | 2 +-
 drivers/net/ethernet/qlogic/qlge/qlge.h | 2 +-
 drivers/scsi/aic7xxx/aic79xx_core.c     | 2 +-
 drivers/usb/gadget/legacy/inode.c       | 3 +--
 drivers/usb/host/xhci.c                 | 4 ++--
 include/linux/regulator/machine.h       | 2 +-
 kernel/cgroup/cgroup.c                  | 2 +-
 kernel/events/core.c                    | 2 +-
 scripts/spelling.txt                    | 2 ++
 sound/soc/amd/acp-pcm-dma.c             | 2 +-
 17 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/Documentation/dev-tools/kcov.rst b/Documentation/dev-tools/kcov.rst
index 2c41b713841fd4..44886c91e112d4 100644
--- a/Documentation/dev-tools/kcov.rst
+++ b/Documentation/dev-tools/kcov.rst
@@ -10,7 +10,7 @@ Note that kcov does not aim to collect as much coverage as possible. It aims
 to collect more or less stable coverage that is function of syscall inputs.
 To achieve this goal it does not collect coverage in soft/hard interrupts
 and instrumentation of some inherently non-deterministic parts of kernel is
-disbled (e.g. scheduler, locking).
+disabled (e.g. scheduler, locking).
 
 Usage
 -----
diff --git a/arch/cris/arch-v32/drivers/cryptocop.c b/arch/cris/arch-v32/drivers/cryptocop.c
index ae6903d7fdbe08..14970f11bbf2b6 100644
--- a/arch/cris/arch-v32/drivers/cryptocop.c
+++ b/arch/cris/arch-v32/drivers/cryptocop.c
@@ -2086,7 +2086,7 @@ static void cryptocop_job_queue_close(void)
 		dma_in_cfg.en = regk_dma_no;
 		REG_WR(dma, IN_DMA_INST, rw_cfg, dma_in_cfg);
 
-		/* Disble the cryptocop. */
+		/* Disable the cryptocop. */
 		rw_cfg = REG_RD(strcop, regi_strcop, rw_cfg);
 		rw_cfg.en = 0;
 		REG_WR(strcop, regi_strcop, rw_cfg, rw_cfg);
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 8639bb2ae05868..8f3d9cf26ff9f7 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -535,7 +535,7 @@ static void run_sync(void)
 {
 	int enable_irqs = irqs_disabled();
 
-	/* We may be called with interrupts disbled (on bootup). */
+	/* We may be called with interrupts disabled (on bootup). */
 	if (enable_irqs)
 		local_irq_enable();
 	on_each_cpu(do_sync_core, NULL, 1);
diff --git a/drivers/crypto/ux500/cryp/cryp.c b/drivers/crypto/ux500/cryp/cryp.c
index 43a0c8a26ab0c5..00a16ab601cb07 100644
--- a/drivers/crypto/ux500/cryp/cryp.c
+++ b/drivers/crypto/ux500/cryp/cryp.c
@@ -82,7 +82,7 @@ void cryp_activity(struct cryp_device_data *device_data,
 void cryp_flush_inoutfifo(struct cryp_device_data *device_data)
 {
 	/*
-	 * We always need to disble the hardware before trying to flush the
+	 * We always need to disable the hardware before trying to flush the
 	 * FIFO. This is something that isn't written in the design
 	 * specification, but we have been informed by the hardware designers
 	 * that this must be done.
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index 31375bdde6f176..011800f621c6ce 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -788,7 +788,7 @@ static int sdma_v3_0_start(struct amdgpu_device *adev)
 		}
 	}
 
-	/* disble sdma engine before programing it */
+	/* disable sdma engine before programing it */
 	sdma_v3_0_ctx_switch_enable(adev, false);
 	sdma_v3_0_enable(adev, false);
 
diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 81a80c82f1bd2b..bd0d1988feb2ad 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -543,7 +543,7 @@ static int vmbus_close_internal(struct vmbus_channel *channel)
 	/*
 	 * In case a device driver's probe() fails (e.g.,
 	 * util_probe() -> vmbus_open() returns -ENOMEM) and the device is
-	 * rescinded later (e.g., we dynamically disble an Integrated Service
+	 * rescinded later (e.g., we dynamically disable an Integrated Service
 	 * in Hyper-V Manager), the driver's remove() invokes vmbus_close():
 	 * here we should skip most of the below cleanup work.
 	 */
diff --git a/drivers/isdn/hisax/st5481_b.c b/drivers/isdn/hisax/st5481_b.c
index 409849165838fb..f64a36007800cf 100644
--- a/drivers/isdn/hisax/st5481_b.c
+++ b/drivers/isdn/hisax/st5481_b.c
@@ -239,7 +239,7 @@ static void st5481B_mode(struct st5481_bcs *bcs, int mode)
 			}
 		}
 	} else {
-		// Disble B channel interrupts
+		// Disable B channel interrupts
 		st5481_usb_device_ctrl_msg(adapter, FFMSK_B1+(bcs->channel * 2), 0, NULL, NULL);
 
 		// Disable B channel FIFOs
diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index 1ae872bfc3ba5b..747645c74134de 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -186,7 +186,7 @@ static inline int write_enable(struct spi_nor *nor)
 }
 
 /*
- * Send write disble instruction to the chip.
+ * Send write disable instruction to the chip.
  */
 static inline int write_disable(struct spi_nor *nor)
 {
diff --git a/drivers/net/ethernet/qlogic/qlge/qlge.h b/drivers/net/ethernet/qlogic/qlge/qlge.h
index 6d31f92ef2b634..90b3b46f85cc8c 100644
--- a/drivers/net/ethernet/qlogic/qlge/qlge.h
+++ b/drivers/net/ethernet/qlogic/qlge/qlge.h
@@ -1163,7 +1163,7 @@ struct ib_mac_iocb_rsp {
 	u8 opcode;		/* 0x20 */
 	u8 flags1;
 #define IB_MAC_IOCB_RSP_OI	0x01	/* Overide intr delay */
-#define IB_MAC_IOCB_RSP_I	0x02	/* Disble Intr Generation */
+#define IB_MAC_IOCB_RSP_I	0x02	/* Disable Intr Generation */
 #define IB_MAC_CSUM_ERR_MASK 0x1c	/* A mask to use for csum errs */
 #define IB_MAC_IOCB_RSP_TE	0x04	/* Checksum error */
 #define IB_MAC_IOCB_RSP_NU	0x08	/* No checksum rcvd */
diff --git a/drivers/scsi/aic7xxx/aic79xx_core.c b/drivers/scsi/aic7xxx/aic79xx_core.c
index 109e2c99e6c162..95d8f25cbccab7 100644
--- a/drivers/scsi/aic7xxx/aic79xx_core.c
+++ b/drivers/scsi/aic7xxx/aic79xx_core.c
@@ -6278,7 +6278,7 @@ ahd_reset(struct ahd_softc *ahd, int reinit)
 		 * does not disable its parity logic prior to
 		 * the start of the reset.  This may cause a
 		 * parity error to be detected and thus a
-		 * spurious SERR or PERR assertion.  Disble
+		 * spurious SERR or PERR assertion.  Disable
 		 * PERR and SERR responses during the CHIPRST.
 		 */
 		mod_cmd = cmd & ~(PCIM_CMD_PERRESPEN|PCIM_CMD_SERRESPEN);
diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c
index a2615d64d07c19..79a2d8fba6b606 100644
--- a/drivers/usb/gadget/legacy/inode.c
+++ b/drivers/usb/gadget/legacy/inode.c
@@ -84,8 +84,7 @@ static int ep_open(struct inode *, struct file *);
 
 /* /dev/gadget/$CHIP represents ep0 and the whole device */
 enum ep0_state {
-	/* DISBLED is the initial state.
-	 */
+	/* DISABLED is the initial state. */
 	STATE_DEV_DISABLED = 0,
 
 	/* Only one open() of /dev/gadget/$CHIP; only one file tracks
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 6d6c46000e56cc..50aee8b7718b30 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -868,7 +868,7 @@ static void xhci_disable_port_wake_on_bits(struct xhci_hcd *xhci)
 
 	spin_lock_irqsave(&xhci->lock, flags);
 
-	/* disble usb3 ports Wake bits*/
+	/* disable usb3 ports Wake bits */
 	port_index = xhci->num_usb3_ports;
 	port_array = xhci->usb3_ports;
 	while (port_index--) {
@@ -879,7 +879,7 @@ static void xhci_disable_port_wake_on_bits(struct xhci_hcd *xhci)
 			writel(t2, port_array[port_index]);
 	}
 
-	/* disble usb2 ports Wake bits*/
+	/* disable usb2 ports Wake bits */
 	port_index = xhci->num_usb2_ports;
 	port_array = xhci->usb2_ports;
 	while (port_index--) {
diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h
index ad3e5158e586dc..c9f795e9a2ee26 100644
--- a/include/linux/regulator/machine.h
+++ b/include/linux/regulator/machine.h
@@ -65,7 +65,7 @@ struct regulator_state {
 	int uV;	/* suspend voltage */
 	unsigned int mode; /* suspend regulator operating mode */
 	int enabled; /* is regulator enabled in this suspend state */
-	int disabled; /* is the regulator disbled in this suspend state */
+	int disabled; /* is the regulator disabled in this suspend state */
 };
 
 /**
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 0125589c742841..48851327a15e18 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -2669,7 +2669,7 @@ static bool css_visible(struct cgroup_subsys_state *css)
  *
  * Returns 0 on success, -errno on failure.  On failure, csses which have
  * been processed already aren't cleaned up.  The caller is responsible for
- * cleaning up with cgroup_apply_control_disble().
+ * cleaning up with cgroup_apply_control_disable().
  */
 static int cgroup_apply_control_enable(struct cgroup *cgrp)
 {
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 6f41548f2e320a..a17ed56c8ce1f9 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -998,7 +998,7 @@ list_update_cgroup_event(struct perf_event *event,
  */
 #define PERF_CPU_HRTIMER (1000 / HZ)
 /*
- * function must be called with interrupts disbled
+ * function must be called with interrupts disabled
  */
 static enum hrtimer_restart perf_mux_hrtimer_handler(struct hrtimer *hr)
 {
diff --git a/scripts/spelling.txt b/scripts/spelling.txt
index 0458b037c8a137..6dae4df472f69e 100644
--- a/scripts/spelling.txt
+++ b/scripts/spelling.txt
@@ -372,6 +372,8 @@ disassocation||disassociation
 disapear||disappear
 disapeared||disappeared
 disappared||disappeared
+disble||disable
+disbled||disabled
 disconnet||disconnect
 discontinous||discontinuous
 dispertion||dispersion
diff --git a/sound/soc/amd/acp-pcm-dma.c b/sound/soc/amd/acp-pcm-dma.c
index ec1067a679da40..08b1399d1da2b8 100644
--- a/sound/soc/amd/acp-pcm-dma.c
+++ b/sound/soc/amd/acp-pcm-dma.c
@@ -89,7 +89,7 @@ static void acp_reg_write(u32 val, void __iomem *acp_mmio, u32 reg)
 	writel(val, acp_mmio + (reg * 4));
 }
 
-/* Configure a given dma channel parameters - enable/disble,
+/* Configure a given dma channel parameters - enable/disable,
  * number of descriptors, priority
  */
 static void config_acp_dma_channel(void __iomem *acp_mmio, u8 ch_num,

From 505d3085d7120a9f4cd0d6ffaa876968854b3baa Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Thu, 9 Mar 2017 16:16:33 -0800
Subject: [PATCH 1072/1911] scripts/spelling.txt: add "overide" pattern and fix
 typo instances

Fix typos and add the following to the scripts/spelling.txt:

  overide||override

While we are here, fix the doubled "address" in the touched line
Documentation/devicetree/bindings/regulator/ti-abb-regulator.txt.

Also, fix the comment block style in the touched hunks in
drivers/media/dvb-frontends/drx39xyj/drx_driver.h.

Link: http://lkml.kernel.org/r/1481573103-11329-21-git-send-email-yamada.masahiro@socionext.com
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 .../devicetree/bindings/regulator/ti-abb-regulator.txt    | 2 +-
 drivers/block/paride/pcd.c                                | 2 +-
 drivers/block/paride/pd.c                                 | 2 +-
 drivers/block/paride/pf.c                                 | 2 +-
 drivers/block/paride/pg.c                                 | 2 +-
 drivers/block/paride/pt.c                                 | 2 +-
 drivers/media/dvb-frontends/drx39xyj/drx_driver.h         | 8 +++-----
 drivers/net/ethernet/qlogic/qlge/qlge.h                   | 2 +-
 include/dt-bindings/sound/cs42l42.h                       | 2 +-
 include/net/irda/timer.h                                  | 2 +-
 kernel/trace/trace_stack.c                                | 2 +-
 scripts/spelling.txt                                      | 1 +
 tools/lguest/lguest.c                                     | 2 +-
 tools/lib/bpf/Makefile                                    | 2 +-
 tools/lib/traceevent/Makefile                             | 2 +-
 tools/lib/traceevent/event-parse.h                        | 2 +-
 16 files changed, 18 insertions(+), 19 deletions(-)

diff --git a/Documentation/devicetree/bindings/regulator/ti-abb-regulator.txt b/Documentation/devicetree/bindings/regulator/ti-abb-regulator.txt
index c3f6546ebac777..6a23ad9ac53a4c 100644
--- a/Documentation/devicetree/bindings/regulator/ti-abb-regulator.txt
+++ b/Documentation/devicetree/bindings/regulator/ti-abb-regulator.txt
@@ -45,7 +45,7 @@ Required Properties:
 Optional Properties:
 - reg-names: In addition to the required properties, the following are optional
   - "efuse-address"	- Contains efuse base address used to pick up ABB info.
-  - "ldo-address"	- Contains address of ABB LDO overide register address.
+  - "ldo-address"	- Contains address of ABB LDO override register.
 	"efuse-address" is required for this.
 - ti,ldovbb-vset-mask	- Required if ldo-address is set, mask for LDO override
 	register to provide override vset value.
diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c
index 10aed84244f518..939641d6e2625e 100644
--- a/drivers/block/paride/pcd.c
+++ b/drivers/block/paride/pcd.c
@@ -50,7 +50,7 @@
                         the slower the port i/o.  In some cases, setting
                         this to zero will speed up the device. (default -1)
                         
-            major       You may use this parameter to overide the
+            major       You may use this parameter to override the
                         default major number (46) that this driver
                         will use.  Be sure to change the device
                         name as well.
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index 644ba0888bd41b..9cfd2e06a64917 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -61,7 +61,7 @@
                         first drive found.
 			
 
-            major       You may use this parameter to overide the
+            major       You may use this parameter to override the
                         default major number (45) that this driver
                         will use.  Be sure to change the device
                         name as well.
diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c
index ed93e8badf5684..14c5d32f5d8bc0 100644
--- a/drivers/block/paride/pf.c
+++ b/drivers/block/paride/pf.c
@@ -59,7 +59,7 @@
                         the slower the port i/o.  In some cases, setting
                         this to zero will speed up the device. (default -1)
 
-	    major	You may use this parameter to overide the
+	    major	You may use this parameter to override the
 			default major number (47) that this driver
 			will use.  Be sure to change the device
 			name as well.
diff --git a/drivers/block/paride/pg.c b/drivers/block/paride/pg.c
index 5db955fe3a9490..3b5882bfb7364e 100644
--- a/drivers/block/paride/pg.c
+++ b/drivers/block/paride/pg.c
@@ -84,7 +84,7 @@
 			the slower the port i/o.  In some cases, setting
 			this to zero will speed up the device. (default -1)
 
-	    major	You may use this parameter to overide the
+	    major	You may use this parameter to override the
 			default major number (97) that this driver
 			will use.  Be sure to change the device
 			name as well.
diff --git a/drivers/block/paride/pt.c b/drivers/block/paride/pt.c
index 61fc6824299ac1..e815312a00add6 100644
--- a/drivers/block/paride/pt.c
+++ b/drivers/block/paride/pt.c
@@ -61,7 +61,7 @@
                         the slower the port i/o.  In some cases, setting
                         this to zero will speed up the device. (default -1)
 
-	    major	You may use this parameter to overide the
+	    major	You may use this parameter to override the
 			default major number (96) that this driver
 			will use.  Be sure to change the device
 			name as well.
diff --git a/drivers/media/dvb-frontends/drx39xyj/drx_driver.h b/drivers/media/dvb-frontends/drx39xyj/drx_driver.h
index 7a681d8202c7ee..4442e478db72a2 100644
--- a/drivers/media/dvb-frontends/drx39xyj/drx_driver.h
+++ b/drivers/media/dvb-frontends/drx39xyj/drx_driver.h
@@ -256,8 +256,7 @@ int drxbsp_tuner_default_i2c_write_read(struct tuner_instance *tuner,
 *
 * The actual DAP implementation may be restricted to only one of the modes.
 * A compiler warning or error will be generated if the DAP implementation
-* overides or cannot handle the mode defined below.
-*
+* overrides or cannot handle the mode defined below.
 */
 #ifndef DRXDAP_SINGLE_MASTER
 #define DRXDAP_SINGLE_MASTER 1
@@ -272,7 +271,7 @@ int drxbsp_tuner_default_i2c_write_read(struct tuner_instance *tuner,
 *
 * This maximum size may be restricted by the actual DAP implementation.
 * A compiler warning or error will be generated if the DAP implementation
-* overides or cannot handle the chunksize defined below.
+* overrides or cannot handle the chunksize defined below.
 *
 * Beware that the DAP uses  DRXDAP_MAX_WCHUNKSIZE to create a temporary data
 * buffer. Do not undefine or choose too large, unless your system is able to
@@ -292,8 +291,7 @@ int drxbsp_tuner_default_i2c_write_read(struct tuner_instance *tuner,
 *
 * This maximum size may be restricted by the actual DAP implementation.
 * A compiler warning or error will be generated if the DAP implementation
-* overides or cannot handle the chunksize defined below.
-*
+* overrides or cannot handle the chunksize defined below.
 */
 #ifndef DRXDAP_MAX_RCHUNKSIZE
 #define  DRXDAP_MAX_RCHUNKSIZE 60
diff --git a/drivers/net/ethernet/qlogic/qlge/qlge.h b/drivers/net/ethernet/qlogic/qlge/qlge.h
index 90b3b46f85cc8c..84ac50f92c9c51 100644
--- a/drivers/net/ethernet/qlogic/qlge/qlge.h
+++ b/drivers/net/ethernet/qlogic/qlge/qlge.h
@@ -1162,7 +1162,7 @@ struct ob_mac_tso_iocb_rsp {
 struct ib_mac_iocb_rsp {
 	u8 opcode;		/* 0x20 */
 	u8 flags1;
-#define IB_MAC_IOCB_RSP_OI	0x01	/* Overide intr delay */
+#define IB_MAC_IOCB_RSP_OI	0x01	/* Override intr delay */
 #define IB_MAC_IOCB_RSP_I	0x02	/* Disable Intr Generation */
 #define IB_MAC_CSUM_ERR_MASK 0x1c	/* A mask to use for csum errs */
 #define IB_MAC_IOCB_RSP_TE	0x04	/* Checksum error */
diff --git a/include/dt-bindings/sound/cs42l42.h b/include/dt-bindings/sound/cs42l42.h
index 399a123aed5815..db69d84ed7d141 100644
--- a/include/dt-bindings/sound/cs42l42.h
+++ b/include/dt-bindings/sound/cs42l42.h
@@ -20,7 +20,7 @@
 #define CS42L42_HPOUT_LOAD_1NF		0
 #define CS42L42_HPOUT_LOAD_10NF		1
 
-/* HPOUT Clamp to GND Overide */
+/* HPOUT Clamp to GND Override */
 #define CS42L42_HPOUT_CLAMP_EN		0
 #define CS42L42_HPOUT_CLAMP_DIS		1
 
diff --git a/include/net/irda/timer.h b/include/net/irda/timer.h
index cb2615ccf761d6..d784f242cf7b4d 100644
--- a/include/net/irda/timer.h
+++ b/include/net/irda/timer.h
@@ -59,7 +59,7 @@ struct lap_cb;
  *  Slot timer must never exceed 85 ms, and must always be at least 25 ms, 
  *  suggested to  75-85 msec by IrDA lite. This doesn't work with a lot of
  *  devices, and other stackes uses a lot more, so it's best we do it as well
- *  (Note : this is the default value and sysctl overides it - Jean II)
+ *  (Note : this is the default value and sysctl overrides it - Jean II)
  */
 #define SLOT_TIMEOUT            (90*HZ/1000)
 
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 1d68b5b7ad4133..5fb1f2c87e6b84 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -65,7 +65,7 @@ void stack_trace_print(void)
 }
 
 /*
- * When arch-specific code overides this function, the following
+ * When arch-specific code overrides this function, the following
  * data should be filled up, assuming stack_trace_max_lock is held to
  * prevent concurrent updates.
  *     stack_trace_index[]
diff --git a/scripts/spelling.txt b/scripts/spelling.txt
index 6dae4df472f69e..0545f5a8cabed7 100644
--- a/scripts/spelling.txt
+++ b/scripts/spelling.txt
@@ -734,6 +734,7 @@ oustanding||outstanding
 overaall||overall
 overhread||overhead
 overlaping||overlapping
+overide||override
 overrided||overridden
 overriden||overridden
 overun||overrun
diff --git a/tools/lguest/lguest.c b/tools/lguest/lguest.c
index 11c8d9bc762ef0..5d19fdf80292c2 100644
--- a/tools/lguest/lguest.c
+++ b/tools/lguest/lguest.c
@@ -1387,7 +1387,7 @@ static bool pci_data_iowrite(u16 port, u32 mask, u32 val)
 		/* Allow writing to any other BAR, or expansion ROM */
 		iowrite(portoff, val, mask, &d->config_words[reg]);
 		return true;
-		/* We let them overide latency timer and cacheline size */
+		/* We let them override latency timer and cacheline size */
 	} else if (&d->config_words[reg] == (void *)&d->config.cacheline_size) {
 		/* Only let them change the first two fields. */
 		if (mask == 0xFFFFFFFF)
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index e2efddf1023177..1f5300e56b44dc 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -132,7 +132,7 @@ else
   Q = @
 endif
 
-# Disable command line variables (CFLAGS) overide from top
+# Disable command line variables (CFLAGS) override from top
 # level Makefile (perf), otherwise build Makefile will get
 # the same command line setup.
 MAKEOVERRIDES=
diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile
index 47076b15eebeaa..9b8555ea3459c8 100644
--- a/tools/lib/traceevent/Makefile
+++ b/tools/lib/traceevent/Makefile
@@ -135,7 +135,7 @@ else
   Q = @
 endif
 
-# Disable command line variables (CFLAGS) overide from top
+# Disable command line variables (CFLAGS) override from top
 # level Makefile (perf), otherwise build Makefile will get
 # the same command line setup.
 MAKEOVERRIDES=
diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h
index 66342804161c80..0c03538df74c01 100644
--- a/tools/lib/traceevent/event-parse.h
+++ b/tools/lib/traceevent/event-parse.h
@@ -140,7 +140,7 @@ struct pevent_plugin_option {
  *   struct pevent_plugin_option PEVENT_PLUGIN_OPTIONS[] = {
  *	{
  *		.name = "option-name",
- *		.plugin_alias = "overide-file-name", (optional)
+ *		.plugin_alias = "override-file-name", (optional)
  *		.description = "description of option to show users",
  *	},
  *	{

From 52c50ca75c534c0772b71900a29b3a71439b32ef Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Thu, 9 Mar 2017 16:16:36 -0800
Subject: [PATCH 1073/1911] powerpc/mm: handle protnone ptes on fork

We need to mark pages of parent process read only on fork.  Numa fault
pte needs a protnone ptes variant with saved write flag set.  On fork we
need to make sure we remove the saved write bit.  Instead of adding the
protnone check in the caller update ptep_set_wrprotect variants to clear
savedwrite bit.

Without this we see random segfaults in application on fork.

Fixes: c137a2757b886 ("powerpc/mm/autonuma: switch ppc64 to its own implementation of saved write")
Link: http://lkml.kernel.org/r/1488203787-17849-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Rik van Riel <riel@surriel.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Paul Mackerras <paulus@ozlabs.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/include/asm/book3s/64/pgtable.h | 73 +++++++++++---------
 1 file changed, 42 insertions(+), 31 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 1eeeb72c70158a..f0b08acda5ebf8 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -347,23 +347,53 @@ static inline int __ptep_test_and_clear_young(struct mm_struct *mm,
 	__r;							\
 })
 
+static inline int pte_write(pte_t pte)
+{
+	return !!(pte_raw(pte) & cpu_to_be64(_PAGE_WRITE));
+}
+
+#ifdef CONFIG_NUMA_BALANCING
+#define pte_savedwrite pte_savedwrite
+static inline bool pte_savedwrite(pte_t pte)
+{
+	/*
+	 * Saved write ptes are prot none ptes that doesn't have
+	 * privileged bit sit. We mark prot none as one which has
+	 * present and pviliged bit set and RWX cleared. To mark
+	 * protnone which used to have _PAGE_WRITE set we clear
+	 * the privileged bit.
+	 */
+	return !(pte_raw(pte) & cpu_to_be64(_PAGE_RWX | _PAGE_PRIVILEGED));
+}
+#else
+#define pte_savedwrite pte_savedwrite
+static inline bool pte_savedwrite(pte_t pte)
+{
+	return false;
+}
+#endif
+
 #define __HAVE_ARCH_PTEP_SET_WRPROTECT
 static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
 				      pte_t *ptep)
 {
-	if ((pte_raw(*ptep) & cpu_to_be64(_PAGE_WRITE)) == 0)
-		return;
-
-	pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 0);
+	if (pte_write(*ptep))
+		pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 0);
+	else if (unlikely(pte_savedwrite(*ptep)))
+		pte_update(mm, addr, ptep, 0, _PAGE_PRIVILEGED, 0);
 }
 
 static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
 					   unsigned long addr, pte_t *ptep)
 {
-	if ((pte_raw(*ptep) & cpu_to_be64(_PAGE_WRITE)) == 0)
-		return;
-
-	pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 1);
+	/*
+	 * We should not find protnone for hugetlb, but this complete the
+	 * interface.
+	 */
+	if (pte_write(*ptep))
+		pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 1);
+	else if (unlikely(pte_savedwrite(*ptep)))
+		pte_update(mm, addr, ptep, 0, _PAGE_PRIVILEGED, 1);
 }
 
 #define __HAVE_ARCH_PTEP_GET_AND_CLEAR
@@ -397,11 +427,6 @@ static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
 	pte_update(mm, addr, ptep, ~0UL, 0, 0);
 }
 
-static inline int pte_write(pte_t pte)
-{
-	return !!(pte_raw(pte) & cpu_to_be64(_PAGE_WRITE));
-}
-
 static inline int pte_dirty(pte_t pte)
 {
 	return !!(pte_raw(pte) & cpu_to_be64(_PAGE_DIRTY));
@@ -466,19 +491,6 @@ static inline pte_t pte_clear_savedwrite(pte_t pte)
 	return __pte(pte_val(pte) | _PAGE_PRIVILEGED);
 }
 
-#define pte_savedwrite pte_savedwrite
-static inline bool pte_savedwrite(pte_t pte)
-{
-	/*
-	 * Saved write ptes are prot none ptes that doesn't have
-	 * privileged bit sit. We mark prot none as one which has
-	 * present and pviliged bit set and RWX cleared. To mark
-	 * protnone which used to have _PAGE_WRITE set we clear
-	 * the privileged bit.
-	 */
-	VM_BUG_ON(!pte_protnone(pte));
-	return !(pte_raw(pte) & cpu_to_be64(_PAGE_RWX | _PAGE_PRIVILEGED));
-}
 #endif /* CONFIG_NUMA_BALANCING */
 
 static inline int pte_present(pte_t pte)
@@ -982,11 +994,10 @@ static inline int __pmdp_test_and_clear_young(struct mm_struct *mm,
 static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr,
 				      pmd_t *pmdp)
 {
-
-	if ((pmd_raw(*pmdp) & cpu_to_be64(_PAGE_WRITE)) == 0)
-		return;
-
-	pmd_hugepage_update(mm, addr, pmdp, _PAGE_WRITE, 0);
+	if (pmd_write((*pmdp)))
+		pmd_hugepage_update(mm, addr, pmdp, _PAGE_WRITE, 0);
+	else if (unlikely(pmd_savedwrite(*pmdp)))
+		pmd_hugepage_update(mm, addr, pmdp, 0, _PAGE_PRIVILEGED);
 }
 
 static inline int pmd_trans_huge(pmd_t pmd)

From d19469e8415813cceaa494b6f538e327b9a95f3b Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Thu, 9 Mar 2017 16:16:39 -0800
Subject: [PATCH 1074/1911] power/mm: update pte_write and pte_wrprotect to
 handle savedwrite

We use pte_write() to check whethwer the pte entry is writable.  This is
mostly used to later mark the pte read only if it is writable.  The other
use of pte_write() is to check whether the pte_entry is writable so that
hardware page table entry can be marked accordingly.  This is used in kvm
where we look at qemu page table entry and update hardware hash page table
for the guest with correct write enable bit.

With the above, for the first usage we should also check the savedwrite
bit so that we can correctly clear the savedwite bit.  For the later, we
add a new variant __pte_write().

With this we can revert write_protect_page part of 595cd8f256d2 ("mm/ksm:
handle protnone saved writes when making page write protect").  But I left
it as it is as an example code for savedwrite check.

Fixes: c137a2757b886 ("powerpc/mm/autonuma: switch ppc64 to its own implementation of saved write")
Link: http://lkml.kernel.org/r/1488203787-17849-2-git-send-email-aneesh.kumar@linux.vnet.ibm.com
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Rik van Riel <riel@surriel.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Paul Mackerras <paulus@ozlabs.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/include/asm/book3s/64/pgtable.h | 24 ++++++++++++++++----
 arch/powerpc/kvm/book3s_64_mmu_hv.c          |  2 +-
 arch/powerpc/kvm/book3s_hv_rm_mmu.c          |  2 +-
 3 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index f0b08acda5ebf8..ec1e731e6a2d6d 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -347,7 +347,7 @@ static inline int __ptep_test_and_clear_young(struct mm_struct *mm,
 	__r;							\
 })
 
-static inline int pte_write(pte_t pte)
+static inline int __pte_write(pte_t pte)
 {
 	return !!(pte_raw(pte) & cpu_to_be64(_PAGE_WRITE));
 }
@@ -373,11 +373,16 @@ static inline bool pte_savedwrite(pte_t pte)
 }
 #endif
 
+static inline int pte_write(pte_t pte)
+{
+	return __pte_write(pte) || pte_savedwrite(pte);
+}
+
 #define __HAVE_ARCH_PTEP_SET_WRPROTECT
 static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
 				      pte_t *ptep)
 {
-	if (pte_write(*ptep))
+	if (__pte_write(*ptep))
 		pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 0);
 	else if (unlikely(pte_savedwrite(*ptep)))
 		pte_update(mm, addr, ptep, 0, _PAGE_PRIVILEGED, 0);
@@ -390,7 +395,7 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
 	 * We should not find protnone for hugetlb, but this complete the
 	 * interface.
 	 */
-	if (pte_write(*ptep))
+	if (__pte_write(*ptep))
 		pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 1);
 	else if (unlikely(pte_savedwrite(*ptep)))
 		pte_update(mm, addr, ptep, 0, _PAGE_PRIVILEGED, 1);
@@ -490,7 +495,13 @@ static inline pte_t pte_clear_savedwrite(pte_t pte)
 	VM_BUG_ON(!pte_protnone(pte));
 	return __pte(pte_val(pte) | _PAGE_PRIVILEGED);
 }
-
+#else
+#define pte_clear_savedwrite pte_clear_savedwrite
+static inline pte_t pte_clear_savedwrite(pte_t pte)
+{
+	VM_WARN_ON(1);
+	return __pte(pte_val(pte) & ~_PAGE_WRITE);
+}
 #endif /* CONFIG_NUMA_BALANCING */
 
 static inline int pte_present(pte_t pte)
@@ -518,6 +529,8 @@ static inline unsigned long pte_pfn(pte_t pte)
 /* Generic modifiers for PTE bits */
 static inline pte_t pte_wrprotect(pte_t pte)
 {
+	if (unlikely(pte_savedwrite(pte)))
+		return pte_clear_savedwrite(pte);
 	return __pte(pte_val(pte) & ~_PAGE_WRITE);
 }
 
@@ -938,6 +951,7 @@ static inline int pmd_protnone(pmd_t pmd)
 
 #define __HAVE_ARCH_PMD_WRITE
 #define pmd_write(pmd)		pte_write(pmd_pte(pmd))
+#define __pmd_write(pmd)	__pte_write(pmd_pte(pmd))
 #define pmd_savedwrite(pmd)	pte_savedwrite(pmd_pte(pmd))
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -994,7 +1008,7 @@ static inline int __pmdp_test_and_clear_young(struct mm_struct *mm,
 static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr,
 				      pmd_t *pmdp)
 {
-	if (pmd_write((*pmdp)))
+	if (__pmd_write((*pmdp)))
 		pmd_hugepage_update(mm, addr, pmdp, _PAGE_WRITE, 0);
 	else if (unlikely(pmd_savedwrite(*pmdp)))
 		pmd_hugepage_update(mm, addr, pmdp, 0, _PAGE_PRIVILEGED);
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index f3158fb16de34b..8c68145ba1bd35 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -601,7 +601,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 							 hva, NULL, NULL);
 			if (ptep) {
 				pte = kvmppc_read_update_linux_pte(ptep, 1);
-				if (pte_write(pte))
+				if (__pte_write(pte))
 					write_ok = 1;
 			}
 			local_irq_restore(flags);
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 6fca970373ee90..ce6f2121fffe46 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -256,7 +256,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 		}
 		pte = kvmppc_read_update_linux_pte(ptep, writing);
 		if (pte_present(pte) && !pte_protnone(pte)) {
-			if (writing && !pte_write(pte))
+			if (writing && !__pte_write(pte))
 				/* make the actual HPTE be read-only */
 				ptel = hpte_make_readonly(ptel);
 			is_ci = pte_ci(pte);

From ef947b2529f918d9606533eb9c32b187ed6a5ede Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Thu, 9 Mar 2017 16:16:42 -0800
Subject: [PATCH 1075/1911] x86, mm: fix gup_pte_range() vs DAX mappings

gup_pte_range() fails to check pte_allows_gup() before translating a DAX
pte entry, pte_devmap(), to a page.  This allows writes to read-only
mappings, and bypasses the DAX cacheline dirty tracking due to missed
'mkwrite' faults.  The gup_huge_pmd() path and the gup_huge_pud() path
correctly check pte_allows_gup() before checking for _devmap() entries.

Fixes: 3565fce3a659 ("mm, x86: get_user_pages() for dax mappings")
Link: http://lkml.kernel.org/r/148804251312.36605.12665024794196605053.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Reported-by: Dave Hansen <dave.hansen@linux.intel.com>
Reported-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Xiong Zhou <xzhou@redhat.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/mm/gup.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index 99c7805a96937c..9d32ee6088079c 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -120,6 +120,11 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
 			return 0;
 		}
 
+		if (!pte_allows_gup(pte_val(pte), write)) {
+			pte_unmap(ptep);
+			return 0;
+		}
+
 		if (pte_devmap(pte)) {
 			pgmap = get_dev_pagemap(pte_pfn(pte), pgmap);
 			if (unlikely(!pgmap)) {
@@ -127,8 +132,7 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
 				pte_unmap(ptep);
 				return 0;
 			}
-		} else if (!pte_allows_gup(pte_val(pte), write) ||
-			   pte_special(pte)) {
+		} else if (pte_special(pte)) {
 			pte_unmap(ptep);
 			return 0;
 		}

From b2e593e271b0760ebc8999e5f9dd068ae2b9d30a Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Thu, 9 Mar 2017 16:16:45 -0800
Subject: [PATCH 1076/1911] x86, mm: unify exit paths in gup_pte_range()

All exit paths from gup_pte_range() require pte_unmap() of the original
pte page before returning.  Refactor the code to have a single exit
point to do the unmap.

This mirrors the flow of the generic gup_pte_range() in mm/gup.c.

Link: http://lkml.kernel.org/r/148804251828.36605.14910389618497006945.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/mm/gup.c | 39 ++++++++++++++++++++-------------------
 1 file changed, 20 insertions(+), 19 deletions(-)

diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index 9d32ee6088079c..1f3b6ef105cda5 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -106,36 +106,35 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
 		unsigned long end, int write, struct page **pages, int *nr)
 {
 	struct dev_pagemap *pgmap = NULL;
-	int nr_start = *nr;
-	pte_t *ptep;
+	int nr_start = *nr, ret = 0;
+	pte_t *ptep, *ptem;
 
-	ptep = pte_offset_map(&pmd, addr);
+	/*
+	 * Keep the original mapped PTE value (ptem) around since we
+	 * might increment ptep off the end of the page when finishing
+	 * our loop iteration.
+	 */
+	ptem = ptep = pte_offset_map(&pmd, addr);
 	do {
 		pte_t pte = gup_get_pte(ptep);
 		struct page *page;
 
 		/* Similar to the PMD case, NUMA hinting must take slow path */
-		if (pte_protnone(pte)) {
-			pte_unmap(ptep);
-			return 0;
-		}
+		if (pte_protnone(pte))
+			break;
 
-		if (!pte_allows_gup(pte_val(pte), write)) {
-			pte_unmap(ptep);
-			return 0;
-		}
+		if (!pte_allows_gup(pte_val(pte), write))
+			break;
 
 		if (pte_devmap(pte)) {
 			pgmap = get_dev_pagemap(pte_pfn(pte), pgmap);
 			if (unlikely(!pgmap)) {
 				undo_dev_pagemap(nr, nr_start, pages);
-				pte_unmap(ptep);
-				return 0;
+				break;
 			}
-		} else if (pte_special(pte)) {
-			pte_unmap(ptep);
-			return 0;
-		}
+		} else if (pte_special(pte))
+			break;
+
 		VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
 		page = pte_page(pte);
 		get_page(page);
@@ -145,9 +144,11 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
 		(*nr)++;
 
 	} while (ptep++, addr += PAGE_SIZE, addr != end);
-	pte_unmap(ptep - 1);
+	if (addr == end)
+		ret = 1;
+	pte_unmap(ptem);
 
-	return 1;
+	return ret;
 }
 
 static inline void get_head_page_multiple(struct page *page, int nr)

From dd0db88d8094a6d9d4d1fc5fcd56ab619f54ccf8 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Thu, 9 Mar 2017 16:16:49 -0800
Subject: [PATCH 1077/1911] userfaultfd: non-cooperative: rollback
 userfaultfd_exit

Patch series "userfaultfd non-cooperative further update for 4.11 merge
window".

Unfortunately I noticed one relevant bug in userfaultfd_exit while doing
more testing.  I've been doing testing before and this was also tested
by kbuild bot and exercised by the selftest, but this bug never
reproduced before.

I dropped userfaultfd_exit as result.  I dropped it because of
implementation difficulty in receiving signals in __mmput and because I
think -ENOSPC as result from the background UFFDIO_COPY should be enough
already.

Before I decided to remove userfaultfd_exit, I noticed userfaultfd_exit
wasn't exercised by the selftest and when I tried to exercise it, after
moving it to a more correct place in __mmput where it would make more
sense and where the vma list is stable, it resulted in the
event_wait_completion in D state.  So then I added the second patch to
be sure even if we call userfaultfd_event_wait_completion too late
during task exit(), we won't risk to generate tasks in D state.  The
same check exists in handle_userfault() for the same reason, except it
makes a difference there, while here is just a robustness check and it's
run under WARN_ON_ONCE.

While looking at the userfaultfd_event_wait_completion() function I
looked back at its callers too while at it and I think it's not ok to
stop executing dup_fctx on the fcs list because we relay on
userfaultfd_event_wait_completion to execute
userfaultfd_ctx_put(fctx->orig) which is paired against
userfaultfd_ctx_get(fctx->orig) in dup_userfault just before
list_add(fcs).  This change only takes care of fctx->orig but this area
also needs further review looking for similar problems in fctx->new.

The only patch that is urgent is the first because it's an use after
free during a SMP race condition that affects all processes if
CONFIG_USERFAULTFD=y.  Very hard to reproduce though and probably
impossible without SLUB poisoning enabled.

This patch (of 3):

I once reproduced this oops with the userfaultfd selftest, it's not
easily reproducible and it requires SLUB poisoning to reproduce.

    general protection fault: 0000 [#1] SMP
    Modules linked in:
    CPU: 2 PID: 18421 Comm: userfaultfd Tainted: G               ------------ T 3.10.0+ #15
    Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.10.1-0-g8891697-prebuilt.qemu-project.org 04/01/2014
    task: ffff8801f83b9440 ti: ffff8801f833c000 task.ti: ffff8801f833c000
    RIP: 0010:[<ffffffff81451299>]  [<ffffffff81451299>] userfaultfd_exit+0x29/0xa0
    RSP: 0018:ffff8801f833fe80  EFLAGS: 00010202
    RAX: ffff8801f833ffd8 RBX: 6b6b6b6b6b6b6b6b RCX: ffff8801f83b9440
    RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff8800baf18600
    RBP: ffff8801f833fee8 R08: 0000000000000000 R09: 0000000000000001
    R10: 0000000000000000 R11: ffffffff8127ceb3 R12: 0000000000000000
    R13: ffff8800baf186b0 R14: ffff8801f83b99f8 R15: 00007faed746c700
    FS:  0000000000000000(0000) GS:ffff88023fc80000(0000) knlGS:0000000000000000
    CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
    CR2: 00007faf0966f028 CR3: 0000000001bc6000 CR4: 00000000000006e0
    DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
    DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
    Call Trace:
      do_exit+0x297/0xd10
      SyS_exit+0x17/0x20
      tracesys+0xdd/0xe2
    Code: 00 00 66 66 66 66 90 55 48 89 e5 41 54 53 48 83 ec 58 48 8b 1f 48 85 db 75 11 eb 73 66 0f 1f 44 00 00 48 8b 5b 10 48 85 db 74 64 <4c> 8b a3 b8 00 00 00 4d 85 e4 74 eb 41 f6 84 24 2c 01 00 00 80
    RIP  [<ffffffff81451299>] userfaultfd_exit+0x29/0xa0
     RSP <ffff8801f833fe80>
    ---[ end trace 9fecd6dcb442846a ]---

In the debugger I located the "mm" pointer in the stack and walking
mm->mmap->vm_next through the end shows the vma->vm_next list is fully
consistent and it is null terminated list as expected.  So this has to
be an SMP race condition where userfaultfd_exit was running while the
vma list was being modified by another CPU.

When userfaultfd_exit() run one of the ->vm_next pointers pointed to
SLAB_POISON (RBX is the vma pointer and is 0x6b6b..).

The reason is that it's not running in __mmput but while there are still
other threads running and it's not holding the mmap_sem (it can't as it
has to wait the even to be received by the manager).  So this is an use
after free that was happening for all processes.

One more implementation problem aside from the race condition:
userfaultfd_exit has really to check a flag in mm->flags before walking
the vma or it's going to slowdown the exit() path for regular tasks.

One more implementation problem: at that point signals can't be
delivered so it would also create a task in D state if the manager
doesn't read the event.

The major design issue: it overall looks superfluous as the manager can
check for -ENOSPC in the background transfer:

	if (mmget_not_zero(ctx->mm)) {
[..]
	} else {
		return -ENOSPC;
	}

It's safer to roll it back and re-introduce it later if at all.

[rppt@linux.vnet.ibm.com: documentation fixup after removal of UFFD_EVENT_EXIT]
  Link: http://lkml.kernel.org/r/1488345437-4364-1-git-send-email-rppt@linux.vnet.ibm.com
Link: http://lkml.kernel.org/r/20170224181957.19736-2-aarcange@redhat.com
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Acked-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/vm/userfaultfd.txt |  4 ----
 fs/userfaultfd.c                 | 28 ----------------------------
 include/linux/userfaultfd_k.h    |  6 ------
 include/uapi/linux/userfaultfd.h |  5 +----
 kernel/exit.c                    |  1 -
 5 files changed, 1 insertion(+), 43 deletions(-)

diff --git a/Documentation/vm/userfaultfd.txt b/Documentation/vm/userfaultfd.txt
index 0e5543a920e5b2..bb2f945f87ab6a 100644
--- a/Documentation/vm/userfaultfd.txt
+++ b/Documentation/vm/userfaultfd.txt
@@ -172,10 +172,6 @@ the same read(2) protocol as for the page fault notifications. The
 manager has to explicitly enable these events by setting appropriate
 bits in uffdio_api.features passed to UFFDIO_API ioctl:
 
-UFFD_FEATURE_EVENT_EXIT - enable notification about exit() of the
-non-cooperative process. When the monitored process exits, the uffd
-manager will get UFFD_EVENT_EXIT.
-
 UFFD_FEATURE_EVENT_FORK - enable userfaultfd hooks for fork(). When
 this feature is enabled, the userfaultfd context of the parent process
 is duplicated into the newly created process. The manager receives
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index f62199b90fd01c..16d0cc600fa9d5 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -775,34 +775,6 @@ void userfaultfd_unmap_complete(struct mm_struct *mm, struct list_head *uf)
 	}
 }
 
-void userfaultfd_exit(struct mm_struct *mm)
-{
-	struct vm_area_struct *vma = mm->mmap;
-
-	/*
-	 * We can do the vma walk without locking because the caller
-	 * (exit_mm) knows it now has exclusive access
-	 */
-	while (vma) {
-		struct userfaultfd_ctx *ctx = vma->vm_userfaultfd_ctx.ctx;
-
-		if (ctx && (ctx->features & UFFD_FEATURE_EVENT_EXIT)) {
-			struct userfaultfd_wait_queue ewq;
-
-			userfaultfd_ctx_get(ctx);
-
-			msg_init(&ewq.msg);
-			ewq.msg.event = UFFD_EVENT_EXIT;
-
-			userfaultfd_event_wait_completion(ctx, &ewq);
-
-			ctx->features &= ~UFFD_FEATURE_EVENT_EXIT;
-		}
-
-		vma = vma->vm_next;
-	}
-}
-
 static int userfaultfd_release(struct inode *inode, struct file *file)
 {
 	struct userfaultfd_ctx *ctx = file->private_data;
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index 0468548acebfef..f2b79bf4c8954c 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -72,8 +72,6 @@ extern int userfaultfd_unmap_prep(struct vm_area_struct *vma,
 extern void userfaultfd_unmap_complete(struct mm_struct *mm,
 				       struct list_head *uf);
 
-extern void userfaultfd_exit(struct mm_struct *mm);
-
 #else /* CONFIG_USERFAULTFD */
 
 /* mm helpers */
@@ -139,10 +137,6 @@ static inline void userfaultfd_unmap_complete(struct mm_struct *mm,
 {
 }
 
-static inline void userfaultfd_exit(struct mm_struct *mm)
-{
-}
-
 #endif /* CONFIG_USERFAULTFD */
 
 #endif /* _LINUX_USERFAULTFD_K_H */
diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h
index c055947c5c989f..3b059530dac95f 100644
--- a/include/uapi/linux/userfaultfd.h
+++ b/include/uapi/linux/userfaultfd.h
@@ -18,8 +18,7 @@
  * means the userland is reading).
  */
 #define UFFD_API ((__u64)0xAA)
-#define UFFD_API_FEATURES (UFFD_FEATURE_EVENT_EXIT |		\
-			   UFFD_FEATURE_EVENT_FORK |		\
+#define UFFD_API_FEATURES (UFFD_FEATURE_EVENT_FORK |		\
 			   UFFD_FEATURE_EVENT_REMAP |		\
 			   UFFD_FEATURE_EVENT_REMOVE |	\
 			   UFFD_FEATURE_EVENT_UNMAP |		\
@@ -113,7 +112,6 @@ struct uffd_msg {
 #define UFFD_EVENT_REMAP	0x14
 #define UFFD_EVENT_REMOVE	0x15
 #define UFFD_EVENT_UNMAP	0x16
-#define UFFD_EVENT_EXIT		0x17
 
 /* flags for UFFD_EVENT_PAGEFAULT */
 #define UFFD_PAGEFAULT_FLAG_WRITE	(1<<0)	/* If this was a write fault */
@@ -163,7 +161,6 @@ struct uffdio_api {
 #define UFFD_FEATURE_MISSING_HUGETLBFS		(1<<4)
 #define UFFD_FEATURE_MISSING_SHMEM		(1<<5)
 #define UFFD_FEATURE_EVENT_UNMAP		(1<<6)
-#define UFFD_FEATURE_EVENT_EXIT			(1<<7)
 	__u64 features;
 
 	__u64 ioctls;
diff --git a/kernel/exit.c b/kernel/exit.c
index e126ebf2400c22..516acdb0e0ec9b 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -554,7 +554,6 @@ static void exit_mm(void)
 	enter_lazy_tlb(mm, current);
 	task_unlock(current);
 	mm_update_next_owner(mm);
-	userfaultfd_exit(mm);
 	mmput(mm);
 	if (test_thread_flag(TIF_MEMDIE))
 		exit_oom_victim();

From 9a69a829f9b656c2a220d65a94ecf7b5887c5da1 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Thu, 9 Mar 2017 16:16:52 -0800
Subject: [PATCH 1078/1911] userfaultfd: non-cooperative: robustness check

Similar to the handle_userfault() case, also make sure to never attempt
to send any event past the PF_EXITING point of no return.

This is purely a robustness check.

Link: http://lkml.kernel.org/r/20170224181957.19736-3-aarcange@redhat.com
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/userfaultfd.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 16d0cc600fa9d5..668bbbd2e04dc8 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -530,8 +530,13 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
 static int userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
 					     struct userfaultfd_wait_queue *ewq)
 {
-	int ret = 0;
+	int ret;
+
+	ret = -1;
+	if (WARN_ON_ONCE(current->flags & PF_EXITING))
+		goto out;
 
+	ret = 0;
 	ewq->ctx = ctx;
 	init_waitqueue_entry(&ewq->wq, current);
 
@@ -566,7 +571,7 @@ static int userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
 	 * ctx may go away after this if the userfault pseudo fd is
 	 * already released.
 	 */
-
+out:
 	userfaultfd_ctx_put(ctx);
 	return ret;
 }

From 8c9e7bb7a41f2bbd54b2caefb274fb3de239819f Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Thu, 9 Mar 2017 16:16:54 -0800
Subject: [PATCH 1079/1911] userfaultfd: non-cooperative: release all ctx in
 dup_userfaultfd_complete

Don't stop running dup_fctx() even if userfaultfd_event_wait_completion
fails as it has to run userfaultfd_ctx_put on all ctx to pair against
the userfaultfd_ctx_get that was run on all fctx->orig in
dup_userfaultfd.

Link: http://lkml.kernel.org/r/20170224181957.19736-4-aarcange@redhat.com
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/userfaultfd.c | 18 +++++-------------
 1 file changed, 5 insertions(+), 13 deletions(-)

diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 668bbbd2e04dc8..dd48052e086f6d 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -527,16 +527,12 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
 	return ret;
 }
 
-static int userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
-					     struct userfaultfd_wait_queue *ewq)
+static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
+					      struct userfaultfd_wait_queue *ewq)
 {
-	int ret;
-
-	ret = -1;
 	if (WARN_ON_ONCE(current->flags & PF_EXITING))
 		goto out;
 
-	ret = 0;
 	ewq->ctx = ctx;
 	init_waitqueue_entry(&ewq->wq, current);
 
@@ -552,7 +548,6 @@ static int userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
 			break;
 		if (ACCESS_ONCE(ctx->released) ||
 		    fatal_signal_pending(current)) {
-			ret = -1;
 			__remove_wait_queue(&ctx->event_wqh, &ewq->wq);
 			break;
 		}
@@ -573,7 +568,6 @@ static int userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
 	 */
 out:
 	userfaultfd_ctx_put(ctx);
-	return ret;
 }
 
 static void userfaultfd_event_complete(struct userfaultfd_ctx *ctx,
@@ -631,7 +625,7 @@ int dup_userfaultfd(struct vm_area_struct *vma, struct list_head *fcs)
 	return 0;
 }
 
-static int dup_fctx(struct userfaultfd_fork_ctx *fctx)
+static void dup_fctx(struct userfaultfd_fork_ctx *fctx)
 {
 	struct userfaultfd_ctx *ctx = fctx->orig;
 	struct userfaultfd_wait_queue ewq;
@@ -641,17 +635,15 @@ static int dup_fctx(struct userfaultfd_fork_ctx *fctx)
 	ewq.msg.event = UFFD_EVENT_FORK;
 	ewq.msg.arg.reserved.reserved1 = (unsigned long)fctx->new;
 
-	return userfaultfd_event_wait_completion(ctx, &ewq);
+	userfaultfd_event_wait_completion(ctx, &ewq);
 }
 
 void dup_userfaultfd_complete(struct list_head *fcs)
 {
-	int ret = 0;
 	struct userfaultfd_fork_ctx *fctx, *n;
 
 	list_for_each_entry_safe(fctx, n, fcs, list) {
-		if (!ret)
-			ret = dup_fctx(fctx);
+		dup_fctx(fctx);
 		list_del(&fctx->list);
 		kfree(fctx);
 	}

From cbfd0c1001bedb4b051cf4a1f5df24f1500381bc Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 9 Mar 2017 16:16:57 -0800
Subject: [PATCH 1080/1911] include/linux/fs.h: fix unsigned enum warning with
 gcc-4.2

With arm-linux-gcc-4.2, almost every file we build in the kernel ends up
with this warning:

  include/linux/fs.h:2648: warning: comparison of unsigned expression < 0 is always false

Later versions don't have this problem, but it's easy enough to work
around.

Link: http://lkml.kernel.org/r/20161216105634.235457-12-arnd@arndb.de
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Russell King <rmk+kernel@armlinux.org.uk>
Cc: Brendan Gregg <brendan.d.gregg@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/fs.h b/include/linux/fs.h
index aad3fd0ff5f831..7251f7bb45e8b8 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2678,7 +2678,7 @@ static const char * const kernel_read_file_str[] = {
 
 static inline const char *kernel_read_file_id_str(enum kernel_read_file_id id)
 {
-	if (id < 0 || id >= READING_MAX_ID)
+	if ((unsigned)id >= READING_MAX_ID)
 		return kernel_read_file_str[READING_UNKNOWN];
 
 	return kernel_read_file_str[id];

From ce9311cf95ad8baf044a014738d76973d93b739a Mon Sep 17 00:00:00 2001
From: Yisheng Xie <xieyisheng1@huawei.com>
Date: Thu, 9 Mar 2017 16:17:00 -0800
Subject: [PATCH 1081/1911] mm/vmstats: add thp_split_pud event for clarity

We added support for PUD-sized transparent hugepages, however we count
the event "thp split pud" into thp_split_pmd event.

To separate the event count of thp split pud from pmd, add a new event
named thp_split_pud.

Link: http://lkml.kernel.org/r/1488282380-5076-1-git-send-email-xieyisheng1@huawei.com
Signed-off-by: Yisheng Xie <xieyisheng1@huawei.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Sebastian Siewior <bigeasy@linutronix.de>
Cc: Hugh Dickins <hughd@google.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Ebru Akagunduz <ebru.akagunduz@gmail.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Hanjun Guo <guohanjun@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/vm_event_item.h | 3 +++
 mm/huge_memory.c              | 2 +-
 mm/vmstat.c                   | 3 +++
 3 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index 6aa1b6cb58285d..a80b7b59cf3341 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -79,6 +79,9 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 		THP_SPLIT_PAGE_FAILED,
 		THP_DEFERRED_SPLIT_PAGE,
 		THP_SPLIT_PMD,
+#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+		THP_SPLIT_PUD,
+#endif
 		THP_ZERO_PAGE_ALLOC,
 		THP_ZERO_PAGE_ALLOC_FAILED,
 #endif
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index d36b2af4d1bf4b..8f037e256c5445 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1828,7 +1828,7 @@ static void __split_huge_pud_locked(struct vm_area_struct *vma, pud_t *pud,
 	VM_BUG_ON_VMA(vma->vm_end < haddr + HPAGE_PUD_SIZE, vma);
 	VM_BUG_ON(!pud_trans_huge(*pud) && !pud_devmap(*pud));
 
-	count_vm_event(THP_SPLIT_PMD);
+	count_vm_event(THP_SPLIT_PUD);
 
 	pudp_huge_clear_flush_notify(vma, haddr, pud);
 }
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 69f9aff39a2eaf..b1947f0cbee2f9 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1065,6 +1065,9 @@ const char * const vmstat_text[] = {
 	"thp_split_page_failed",
 	"thp_deferred_split_page",
 	"thp_split_pmd",
+#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+	"thp_split_pud",
+#endif
 	"thp_zero_page_alloc",
 	"thp_zero_page_alloc_failed",
 #endif

From f4b7ac68f438fa8521bbbf421f194ff10b0a7577 Mon Sep 17 00:00:00 2001
From: Masanari Iida <standby24x7@gmail.com>
Date: Thu, 9 Mar 2017 16:17:03 -0800
Subject: [PATCH 1082/1911] drivers/md/bcache/util.h: remove duplicate
 inclusion of blkdev.h

Link: http://lkml.kernel.org/r/20170226060230.11555-1-standby24x7@gmail.com
Signed-off-by: Masanari Iida <standby24x7@gmail.com>
Acked-by: Coly Li <colyli@suse.de>
Cc: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/md/bcache/util.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h
index a126919ed10276..5d13930f0f22fc 100644
--- a/drivers/md/bcache/util.h
+++ b/drivers/md/bcache/util.h
@@ -4,7 +4,6 @@
 
 #include <linux/blkdev.h>
 #include <linux/errno.h>
-#include <linux/blkdev.h>
 #include <linux/kernel.h>
 #include <linux/sched/clock.h>
 #include <linux/llist.h>

From bfc7228b9a9647e1c353e50b40297a2929801759 Mon Sep 17 00:00:00 2001
From: Laurent Dufour <ldufour@linux.vnet.ibm.com>
Date: Thu, 9 Mar 2017 16:17:06 -0800
Subject: [PATCH 1083/1911] mm/cgroup: avoid panic when init with low memory

The system may panic when initialisation is done when almost all the
memory is assigned to the huge pages using the kernel command line
parameter hugepage=xxxx.  Panic may occur like this:

  Unable to handle kernel paging request for data at address 0x00000000
  Faulting instruction address: 0xc000000000302b88
  Oops: Kernel access of bad area, sig: 11 [#1]
  SMP NR_CPUS=2048 [    0.082424] NUMA
  pSeries
  Modules linked in:
  CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.9.0-15-generic #16-Ubuntu
  task: c00000021ed01600 task.stack: c00000010d108000
  NIP: c000000000302b88 LR: c000000000270e04 CTR: c00000000016cfd0
  REGS: c00000010d10b2c0 TRAP: 0300   Not tainted (4.9.0-15-generic)
  MSR: 8000000002009033 <SF,VEC,EE,ME,IR,DR,RI,LE>[ 0.082770]   CR: 28424422  XER: 00000000
  CFAR: c0000000003d28b8 DAR: 0000000000000000 DSISR: 40000000 SOFTE: 1
  GPR00: c000000000270e04 c00000010d10b540 c00000000141a300 c00000010fff6300
  GPR04: 0000000000000000 00000000026012c0 c00000010d10b630 0000000487ab0000
  GPR08: 000000010ee90000 c000000001454fd8 0000000000000000 0000000000000000
  GPR12: 0000000000004400 c00000000fb80000 00000000026012c0 00000000026012c0
  GPR16: 00000000026012c0 0000000000000000 0000000000000000 0000000000000002
  GPR20: 000000000000000c 0000000000000000 0000000000000000 00000000024200c0
  GPR24: c0000000016eef48 0000000000000000 c00000010fff7d00 00000000026012c0
  GPR28: 0000000000000000 c00000010fff7d00 c00000010fff6300 c00000010d10b6d0
  NIP mem_cgroup_soft_limit_reclaim+0xf8/0x4f0
  LR do_try_to_free_pages+0x1b4/0x450
  Call Trace:
    do_try_to_free_pages+0x1b4/0x450
    try_to_free_pages+0xf8/0x270
    __alloc_pages_nodemask+0x7a8/0xff0
    new_slab+0x104/0x8e0
    ___slab_alloc+0x620/0x700
    __slab_alloc+0x34/0x60
    kmem_cache_alloc_node_trace+0xdc/0x310
    mem_cgroup_init+0x158/0x1c8
    do_one_initcall+0x68/0x1d0
    kernel_init_freeable+0x278/0x360
    kernel_init+0x24/0x170
    ret_from_kernel_thread+0x5c/0x74
  Instruction dump:
  eb81ffe0 eba1ffe8 ebc1fff0 ebe1fff8 4e800020 3d230001 e9499a42 3d220004
  3929acd8 794a1f24 7d295214 eac90100 <e9360000> 2fa90000 419eff74 3b200000
  ---[ end trace 342f5208b00d01b6 ]---

This is a chicken and egg issue where the kernel try to get free memory
when allocating per node data in mem_cgroup_init(), but in that path
mem_cgroup_soft_limit_reclaim() is called which assumes that these data
are allocated.

As mem_cgroup_soft_limit_reclaim() is best effort, it should return when
these data are not yet allocated.

This patch also fixes potential null pointer access in
mem_cgroup_remove_from_trees() and mem_cgroup_update_tree().

Link: http://lkml.kernel.org/r/1487856999-16581-2-git-send-email-ldufour@linux.vnet.ibm.com
Signed-off-by: Laurent Dufour <ldufour@linux.vnet.ibm.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Balbir Singh <bsingharora@gmail.com>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memcontrol.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c52ec893e241cf..76f513cc1b0ee7 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -466,6 +466,8 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page)
 	struct mem_cgroup_tree_per_node *mctz;
 
 	mctz = soft_limit_tree_from_page(page);
+	if (!mctz)
+		return;
 	/*
 	 * Necessary to update all ancestors when hierarchy is used.
 	 * because their event counter is not touched.
@@ -503,7 +505,8 @@ static void mem_cgroup_remove_from_trees(struct mem_cgroup *memcg)
 	for_each_node(nid) {
 		mz = mem_cgroup_nodeinfo(memcg, nid);
 		mctz = soft_limit_tree_node(nid);
-		mem_cgroup_remove_exceeded(mz, mctz);
+		if (mctz)
+			mem_cgroup_remove_exceeded(mz, mctz);
 	}
 }
 
@@ -2558,7 +2561,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
 	 * is empty. Do it lockless to prevent lock bouncing. Races
 	 * are acceptable as soft limit is best effort anyway.
 	 */
-	if (RB_EMPTY_ROOT(&mctz->rb_root))
+	if (!mctz || RB_EMPTY_ROOT(&mctz->rb_root))
 		return 0;
 
 	/*

From 7eb76d457fd758d396bc2e65cb0ace5aae614149 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.vnet.ibm.com>
Date: Thu, 9 Mar 2017 16:17:09 -0800
Subject: [PATCH 1084/1911] userfaultfd: non-cooperative: fix fork fctx->new
 memleak

We have a memleak in the ->new ctx if the uffd of the parent is closed
before the fork event is read, nothing frees the new context.

Link: http://lkml.kernel.org/r/20170302173738.18994-2-aarcange@redhat.com
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Reported-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/userfaultfd.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index dd48052e086f6d..2407249998c326 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -549,6 +549,15 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
 		if (ACCESS_ONCE(ctx->released) ||
 		    fatal_signal_pending(current)) {
 			__remove_wait_queue(&ctx->event_wqh, &ewq->wq);
+			if (ewq->msg.event == UFFD_EVENT_FORK) {
+				struct userfaultfd_ctx *new;
+
+				new = (struct userfaultfd_ctx *)
+					(unsigned long)
+					ewq->msg.arg.reserved.reserved1;
+
+				userfaultfd_ctx_put(new);
+			}
 			break;
 		}
 

From 70ccb92fdd90b35bb6f9200093d4ffd6cb38156b Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Thu, 9 Mar 2017 16:17:11 -0800
Subject: [PATCH 1085/1911] userfaultfd: non-cooperative: userfaultfd_remove
 revalidate vma in MADV_DONTNEED

userfaultfd_remove() has to be execute before zapping the pagetables or
UFFDIO_COPY could keep filling pages after zap_page_range returned,
which would result in non zero data after a MADV_DONTNEED.

However userfaultfd_remove() may have to release the mmap_sem.  This was
handled correctly in MADV_REMOVE, but MADV_DONTNEED accessed a
potentially stale vma (the very vma passed to zap_page_range(vma, ...)).

The fix consists in revalidating the vma in case userfaultfd_remove()
had to release the mmap_sem.

This also optimizes away an unnecessary down_read/up_read in the
MADV_REMOVE case if UFFD_EVENT_FORK had to be delivered.

It all remains zero runtime cost in case CONFIG_USERFAULTFD=n as
userfaultfd_remove() will be defined as "true" at build time.

Link: http://lkml.kernel.org/r/20170302173738.18994-3-aarcange@redhat.com
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/userfaultfd.c              |  9 +++----
 include/linux/userfaultfd_k.h |  7 +++---
 mm/madvise.c                  | 44 ++++++++++++++++++++++++++++++++---
 3 files changed, 47 insertions(+), 13 deletions(-)

diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 2407249998c326..9fd5e51ffb316d 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -695,8 +695,7 @@ void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *vm_ctx,
 	userfaultfd_event_wait_completion(ctx, &ewq);
 }
 
-void userfaultfd_remove(struct vm_area_struct *vma,
-			struct vm_area_struct **prev,
+bool userfaultfd_remove(struct vm_area_struct *vma,
 			unsigned long start, unsigned long end)
 {
 	struct mm_struct *mm = vma->vm_mm;
@@ -705,13 +704,11 @@ void userfaultfd_remove(struct vm_area_struct *vma,
 
 	ctx = vma->vm_userfaultfd_ctx.ctx;
 	if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_REMOVE))
-		return;
+		return true;
 
 	userfaultfd_ctx_get(ctx);
 	up_read(&mm->mmap_sem);
 
-	*prev = NULL; /* We wait for ACK w/o the mmap semaphore */
-
 	msg_init(&ewq.msg);
 
 	ewq.msg.event = UFFD_EVENT_REMOVE;
@@ -720,7 +717,7 @@ void userfaultfd_remove(struct vm_area_struct *vma,
 
 	userfaultfd_event_wait_completion(ctx, &ewq);
 
-	down_read(&mm->mmap_sem);
+	return false;
 }
 
 static bool has_unmap_ctx(struct userfaultfd_ctx *ctx, struct list_head *unmaps,
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index f2b79bf4c8954c..48a3483dccb123 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -61,8 +61,7 @@ extern void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *,
 					unsigned long from, unsigned long to,
 					unsigned long len);
 
-extern void userfaultfd_remove(struct vm_area_struct *vma,
-			       struct vm_area_struct **prev,
+extern bool userfaultfd_remove(struct vm_area_struct *vma,
 			       unsigned long start,
 			       unsigned long end);
 
@@ -118,11 +117,11 @@ static inline void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *ctx,
 {
 }
 
-static inline void userfaultfd_remove(struct vm_area_struct *vma,
-				      struct vm_area_struct **prev,
+static inline bool userfaultfd_remove(struct vm_area_struct *vma,
 				      unsigned long start,
 				      unsigned long end)
 {
+	return true;
 }
 
 static inline int userfaultfd_unmap_prep(struct vm_area_struct *vma,
diff --git a/mm/madvise.c b/mm/madvise.c
index dc5927c812d3d1..7a2abf0127aef7 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -513,7 +513,43 @@ static long madvise_dontneed(struct vm_area_struct *vma,
 	if (!can_madv_dontneed_vma(vma))
 		return -EINVAL;
 
-	userfaultfd_remove(vma, prev, start, end);
+	if (!userfaultfd_remove(vma, start, end)) {
+		*prev = NULL; /* mmap_sem has been dropped, prev is stale */
+
+		down_read(&current->mm->mmap_sem);
+		vma = find_vma(current->mm, start);
+		if (!vma)
+			return -ENOMEM;
+		if (start < vma->vm_start) {
+			/*
+			 * This "vma" under revalidation is the one
+			 * with the lowest vma->vm_start where start
+			 * is also < vma->vm_end. If start <
+			 * vma->vm_start it means an hole materialized
+			 * in the user address space within the
+			 * virtual range passed to MADV_DONTNEED.
+			 */
+			return -ENOMEM;
+		}
+		if (!can_madv_dontneed_vma(vma))
+			return -EINVAL;
+		if (end > vma->vm_end) {
+			/*
+			 * Don't fail if end > vma->vm_end. If the old
+			 * vma was splitted while the mmap_sem was
+			 * released the effect of the concurrent
+			 * operation may not cause MADV_DONTNEED to
+			 * have an undefined result. There may be an
+			 * adjacent next vma that we'll walk
+			 * next. userfaultfd_remove() will generate an
+			 * UFFD_EVENT_REMOVE repetition on the
+			 * end-vma->vm_end range, but the manager can
+			 * handle a repetition fine.
+			 */
+			end = vma->vm_end;
+		}
+		VM_WARN_ON(start >= end);
+	}
 	zap_page_range(vma, start, end - start);
 	return 0;
 }
@@ -554,8 +590,10 @@ static long madvise_remove(struct vm_area_struct *vma,
 	 * mmap_sem.
 	 */
 	get_file(f);
-	userfaultfd_remove(vma, prev, start, end);
-	up_read(&current->mm->mmap_sem);
+	if (userfaultfd_remove(vma, start, end)) {
+		/* mmap_sem was not released by userfaultfd_remove() */
+		up_read(&current->mm->mmap_sem);
+	}
 	error = vfs_fallocate(f,
 				FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
 				offset, end - start);

From 46aa6a302b53f543f8e8b8e1714dc5e449ad36a6 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Thu, 9 Mar 2017 16:17:14 -0800
Subject: [PATCH 1086/1911] userfaultfd: selftest: vm: allow to build in vm/
 directory

linux/tools/testing/selftests/vm $ make

  gcc -Wall -I ../../../../usr/include     compaction_test.c -lrt -o /compaction_test
  /usr/lib/gcc/x86_64-pc-linux-gnu/4.9.4/../../../../x86_64-pc-linux-gnu/bin/ld: cannot open output file /compaction_test: Permission denied
  collect2: error: ld returned 1 exit status
  make: *** [../lib.mk:54: /compaction_test] Error 1

Since commit a8ba798bc8ec ("selftests: enable O and KBUILD_OUTPUT")
selftests/vm build fails if run from the "selftests/vm" directory, but
it works in the selftests/ directory.  It's quicker to be able to do a
local vm-only build after a tree wipe and this patch allows for it
again.

Link: http://lkml.kernel.org/r/20170302173738.18994-4-aarcange@redhat.com
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/vm/Makefile | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index 4cff7e7ddcc47b..41642ba5e318a1 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -1,5 +1,9 @@
 # Makefile for vm selftests
 
+ifndef OUTPUT
+  OUTPUT := $(shell pwd)
+endif
+
 CFLAGS = -Wall -I ../../../../usr/include $(EXTRA_CFLAGS)
 LDLIBS = -lrt
 TEST_GEN_FILES = compaction_test

From c9a1b80daeb50e39f21fd7e62f7224f317ac85f0 Mon Sep 17 00:00:00 2001
From: AKASHI Takahiro <takahiro.akashi@linaro.org>
Date: Thu, 9 Mar 2017 16:17:17 -0800
Subject: [PATCH 1087/1911] mm/memblock.c: fix memblock_next_valid_pfn()

Obviously, we should not access memblock.memory.regions[right] if
'right' is outside of [0..memblock.memory.cnt>.

Fixes: b92df1de5d28 ("mm: page_alloc: skip over regions of invalid pfns where possible")
Link: http://lkml.kernel.org/r/20170303023745.9104-1-takahiro.akashi@linaro.org
Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
Cc: Paul Burton <paul.burton@imgtec.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memblock.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/mm/memblock.c b/mm/memblock.c
index b64b47803e529a..696f06d17c4e89 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1118,7 +1118,10 @@ unsigned long __init_memblock memblock_next_valid_pfn(unsigned long pfn,
 		}
 	} while (left < right);
 
-	return min(PHYS_PFN(type->regions[right].base), max_pfn);
+	if (right == type->cnt)
+		return max_pfn;
+	else
+		return min(PHYS_PFN(type->regions[right].base), max_pfn);
 }
 
 /**

From 8346242a7e32c4c93316684ad8f45473932586b9 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Thu, 9 Mar 2017 16:17:20 -0800
Subject: [PATCH 1088/1911] rmap: fix NULL-pointer dereference on THP
 munlocking

The following test case triggers NULL-pointer derefernce in
try_to_unmap_one():

	#include <fcntl.h>
	#include <stdlib.h>
	#include <unistd.h>
	#include <sys/mman.h>

	int main(int argc, char *argv[])
	{
		int fd;

		system("mount -t tmpfs -o huge=always none /mnt");
		fd = open("/mnt/test", O_CREAT | O_RDWR);
		ftruncate(fd, 2UL << 20);
		mmap(NULL, 2UL << 20, PROT_READ | PROT_WRITE,
				MAP_SHARED | MAP_FIXED | MAP_LOCKED, fd, 0);
		mmap(NULL, 2UL << 20, PROT_READ | PROT_WRITE,
				MAP_SHARED | MAP_LOCKED, fd, 0);
		munlockall();
		return 0;
	}

Apparently, there's a case when we call try_to_unmap() on huge PMDs:
it's TTU_MUNLOCK.

Let's handle this case correctly.

Fixes: c7ab0d2fdc84 ("mm: convert try_to_unmap_one() to use page_vma_mapped_walk()")
Link: http://lkml.kernel.org/r/20170302151159.30592-1-kirill.shutemov@linux.intel.com
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/rmap.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/mm/rmap.c b/mm/rmap.c
index 2da487d6cea83b..250635dc47b77b 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1316,12 +1316,6 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 	}
 
 	while (page_vma_mapped_walk(&pvmw)) {
-		subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
-		address = pvmw.address;
-
-		/* Unexpected PMD-mapped THP? */
-		VM_BUG_ON_PAGE(!pvmw.pte, page);
-
 		/*
 		 * If the page is mlock()d, we cannot swap it out.
 		 * If it's recently referenced (perhaps page_referenced
@@ -1345,6 +1339,13 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 				continue;
 		}
 
+		/* Unexpected PMD-mapped THP? */
+		VM_BUG_ON_PAGE(!pvmw.pte, page);
+
+		subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
+		address = pvmw.address;
+
+
 		if (!(flags & TTU_IGNORE_ACCESS)) {
 			if (ptep_clear_flush_young_notify(vma, address,
 						pvmw.pte)) {

From 6ebb4a1b848fe75323135f93e72c78f8780fd268 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Thu, 9 Mar 2017 16:17:23 -0800
Subject: [PATCH 1089/1911] thp: fix another corner case of munlock() vs. THPs

The following test case triggers BUG() in munlock_vma_pages_range():

	int main(int argc, char *argv[])
	{
		int fd;

		system("mount -t tmpfs -o huge=always none /mnt");
		fd = open("/mnt/test", O_CREAT | O_RDWR);
		ftruncate(fd, 4UL << 20);
		mmap(NULL, 4UL << 20, PROT_READ | PROT_WRITE,
				MAP_SHARED | MAP_FIXED | MAP_LOCKED, fd, 0);
		mmap(NULL, 4096, PROT_READ | PROT_WRITE,
				MAP_SHARED | MAP_LOCKED, fd, 0);
		munlockall();
		return 0;
	}

The second mmap() create PTE-mapping of the first huge page in file.  It
makes kernel munlock the page as we never keep PTE-mapped page mlocked.

On munlockall() when we handle vma created by the first mmap(),
munlock_vma_page() returns page_mask == 0, as the page is not mlocked
anymore.  On next iteration follow_page_mask() return tail page, but
page_mask is HPAGE_NR_PAGES - 1.  It makes us skip to the first tail
page of the next huge page and step on
VM_BUG_ON_PAGE(PageMlocked(page)).

The fix is not use the page_mask from follow_page_mask() at all.  It has
no use for us.

Link: http://lkml.kernel.org/r/20170302150252.34120-1-kirill.shutemov@linux.intel.com
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: <stable@vger.kernel.org>    [4.5+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/mlock.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/mm/mlock.c b/mm/mlock.c
index 1050511f8b2bdb..02f138244bf5ee 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -442,7 +442,7 @@ void munlock_vma_pages_range(struct vm_area_struct *vma,
 
 	while (start < end) {
 		struct page *page;
-		unsigned int page_mask;
+		unsigned int page_mask = 0;
 		unsigned long page_increm;
 		struct pagevec pvec;
 		struct zone *zone;
@@ -456,8 +456,7 @@ void munlock_vma_pages_range(struct vm_area_struct *vma,
 		 * suits munlock very well (and if somehow an abnormal page
 		 * has sneaked into the range, we won't oops here: great).
 		 */
-		page = follow_page_mask(vma, start, FOLL_GET | FOLL_DUMP,
-				&page_mask);
+		page = follow_page(vma, start, FOLL_GET | FOLL_DUMP);
 
 		if (page && !IS_ERR(page)) {
 			if (PageTransTail(page)) {
@@ -468,8 +467,8 @@ void munlock_vma_pages_range(struct vm_area_struct *vma,
 				/*
 				 * Any THP page found by follow_page_mask() may
 				 * have gotten split before reaching
-				 * munlock_vma_page(), so we need to recompute
-				 * the page_mask here.
+				 * munlock_vma_page(), so we need to compute
+				 * the page_mask here instead.
 				 */
 				page_mask = munlock_vma_page(page);
 				unlock_page(page);

From 40e952f9d687928b32db20226f085ae660a7237c Mon Sep 17 00:00:00 2001
From: Tahsin Erdogan <tahsin@google.com>
Date: Thu, 9 Mar 2017 16:17:26 -0800
Subject: [PATCH 1090/1911] mm: do not call mem_cgroup_free() from within
 mem_cgroup_alloc()

mem_cgroup_free() indirectly calls wb_domain_exit() which is not
prepared to deal with a struct wb_domain object that hasn't executed
wb_domain_init().  For instance, the following warning message is
printed by lockdep if alloc_percpu() fails in mem_cgroup_alloc():

  INFO: trying to register non-static key.
  the code is fine but needs lockdep annotation.
  turning off the locking correctness validator.
  CPU: 1 PID: 1950 Comm: mkdir Not tainted 4.10.0+ #151
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
  Call Trace:
   dump_stack+0x67/0x99
   register_lock_class+0x36d/0x540
   __lock_acquire+0x7f/0x1a30
   lock_acquire+0xcc/0x200
   del_timer_sync+0x3c/0xc0
   wb_domain_exit+0x14/0x20
   mem_cgroup_free+0x14/0x40
   mem_cgroup_css_alloc+0x3f9/0x620
   cgroup_apply_control_enable+0x190/0x390
   cgroup_mkdir+0x290/0x3d0
   kernfs_iop_mkdir+0x58/0x80
   vfs_mkdir+0x10e/0x1a0
   SyS_mkdirat+0xa8/0xd0
   SyS_mkdir+0x14/0x20
   entry_SYSCALL_64_fastpath+0x18/0xad

Add __mem_cgroup_free() which skips wb_domain_exit().  This is used by
both mem_cgroup_free() and mem_cgroup_alloc() clean up.

Fixes: 0b8f73e104285 ("mm: memcontrol: clean up alloc, online, offline, free functions")
Link: http://lkml.kernel.org/r/20170306192122.24262-1-tahsin@google.com
Signed-off-by: Tahsin Erdogan <tahsin@google.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memcontrol.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 76f513cc1b0ee7..2bd7541d7c1123 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4138,17 +4138,22 @@ static void free_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
 	kfree(memcg->nodeinfo[node]);
 }
 
-static void mem_cgroup_free(struct mem_cgroup *memcg)
+static void __mem_cgroup_free(struct mem_cgroup *memcg)
 {
 	int node;
 
-	memcg_wb_domain_exit(memcg);
 	for_each_node(node)
 		free_mem_cgroup_per_node_info(memcg, node);
 	free_percpu(memcg->stat);
 	kfree(memcg);
 }
 
+static void mem_cgroup_free(struct mem_cgroup *memcg)
+{
+	memcg_wb_domain_exit(memcg);
+	__mem_cgroup_free(memcg);
+}
+
 static struct mem_cgroup *mem_cgroup_alloc(void)
 {
 	struct mem_cgroup *memcg;
@@ -4199,7 +4204,7 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
 fail:
 	if (memcg->id.id > 0)
 		idr_remove(&mem_cgroup_idr, memcg->id.id);
-	mem_cgroup_free(memcg);
+	__mem_cgroup_free(memcg);
 	return NULL;
 }
 

From 68fd814a3391c7e017ae6ace8855788748edd981 Mon Sep 17 00:00:00 2001
From: Dmitry Vyukov <dvyukov@google.com>
Date: Thu, 9 Mar 2017 16:17:28 -0800
Subject: [PATCH 1091/1911] kasan: resched in quarantine_remove_cache()

We see reported stalls/lockups in quarantine_remove_cache() on machines
with large amounts of RAM.  quarantine_remove_cache() needs to scan
whole quarantine in order to take out all objects belonging to the
cache.  Quarantine is currently 1/32-th of RAM, e.g.  on a machine with
256GB of memory that will be 8GB.  Moreover quarantine scanning is a
walk over uncached linked list, which is slow.

Add cond_resched() after scanning of each non-empty batch of objects.
Batches are specifically kept of reasonable size for quarantine_put().
On a machine with 256GB of RAM we should have ~512 non-empty batches,
each with 16MB of objects.

Link: http://lkml.kernel.org/r/20170308154239.25440-1-dvyukov@google.com
Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
Acked-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/kasan/quarantine.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/mm/kasan/quarantine.c b/mm/kasan/quarantine.c
index 6f1ed163087369..4ac39f20757a8b 100644
--- a/mm/kasan/quarantine.c
+++ b/mm/kasan/quarantine.c
@@ -283,8 +283,15 @@ void quarantine_remove_cache(struct kmem_cache *cache)
 	on_each_cpu(per_cpu_remove_cache, cache, 1);
 
 	spin_lock_irqsave(&quarantine_lock, flags);
-	for (i = 0; i < QUARANTINE_BATCHES; i++)
+	for (i = 0; i < QUARANTINE_BATCHES; i++) {
+		if (qlist_empty(&global_quarantine[i]))
+			continue;
 		qlist_move_cache(&global_quarantine[i], &to_free, cache);
+		/* Scanning whole quarantine can take a while. */
+		spin_unlock_irqrestore(&quarantine_lock, flags);
+		cond_resched();
+		spin_lock_irqsave(&quarantine_lock, flags);
+	}
 	spin_unlock_irqrestore(&quarantine_lock, flags);
 
 	qlist_free_all(&to_free, cache);

From ce5bec54bb5debbbe51b40270d8f209a23cadae4 Mon Sep 17 00:00:00 2001
From: Dmitry Vyukov <dvyukov@google.com>
Date: Thu, 9 Mar 2017 16:17:32 -0800
Subject: [PATCH 1092/1911] kasan: fix races in quarantine_remove_cache()

quarantine_remove_cache() frees all pending objects that belong to the
cache, before we destroy the cache itself.  However there are currently
two possibilities how it can fail to do so.

First, another thread can hold some of the objects from the cache in
temp list in quarantine_put().  quarantine_put() has a windows of
enabled interrupts, and on_each_cpu() in quarantine_remove_cache() can
finish right in that window.  These objects will be later freed into the
destroyed cache.

Then, quarantine_reduce() has the same problem.  It grabs a batch of
objects from the global quarantine, then unlocks quarantine_lock and
then frees the batch.  quarantine_remove_cache() can finish while some
objects from the cache are still in the local to_free list in
quarantine_reduce().

Fix the race with quarantine_put() by disabling interrupts for the whole
duration of quarantine_put().  In combination with on_each_cpu() in
quarantine_remove_cache() it ensures that quarantine_remove_cache()
either sees the objects in the per-cpu list or in the global list.

Fix the race with quarantine_reduce() by protecting quarantine_reduce()
with srcu critical section and then doing synchronize_srcu() at the end
of quarantine_remove_cache().

I've done some assessment of how good synchronize_srcu() works in this
case.  And on a 4 CPU VM I see that it blocks waiting for pending read
critical sections in about 2-3% of cases.  Which looks good to me.

I suspect that these races are the root cause of some GPFs that I
episodically hit.  Previously I did not have any explanation for them.

  BUG: unable to handle kernel NULL pointer dereference at 00000000000000c8
  IP: qlist_free_all+0x2e/0xc0 mm/kasan/quarantine.c:155
  PGD 6aeea067
  PUD 60ed7067
  PMD 0
  Oops: 0000 [#1] SMP KASAN
  Dumping ftrace buffer:
     (ftrace buffer empty)
  Modules linked in:
  CPU: 0 PID: 13667 Comm: syz-executor2 Not tainted 4.10.0+ #60
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
  task: ffff88005f948040 task.stack: ffff880069818000
  RIP: 0010:qlist_free_all+0x2e/0xc0 mm/kasan/quarantine.c:155
  RSP: 0018:ffff88006981f298 EFLAGS: 00010246
  RAX: ffffea0000ffff00 RBX: 0000000000000000 RCX: ffffea0000ffff1f
  RDX: 0000000000000000 RSI: ffff88003fffc3e0 RDI: 0000000000000000
  RBP: ffff88006981f2c0 R08: ffff88002fed7bd8 R09: 00000001001f000d
  R10: 00000000001f000d R11: ffff88006981f000 R12: ffff88003fffc3e0
  R13: ffff88006981f2d0 R14: ffffffff81877fae R15: 0000000080000000
  FS:  00007fb911a2d700(0000) GS:ffff88003ec00000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  CR2: 00000000000000c8 CR3: 0000000060ed6000 CR4: 00000000000006f0
  Call Trace:
   quarantine_reduce+0x10e/0x120 mm/kasan/quarantine.c:239
   kasan_kmalloc+0xca/0xe0 mm/kasan/kasan.c:590
   kasan_slab_alloc+0x12/0x20 mm/kasan/kasan.c:544
   slab_post_alloc_hook mm/slab.h:456 [inline]
   slab_alloc_node mm/slub.c:2718 [inline]
   kmem_cache_alloc_node+0x1d3/0x280 mm/slub.c:2754
   __alloc_skb+0x10f/0x770 net/core/skbuff.c:219
   alloc_skb include/linux/skbuff.h:932 [inline]
   _sctp_make_chunk+0x3b/0x260 net/sctp/sm_make_chunk.c:1388
   sctp_make_data net/sctp/sm_make_chunk.c:1420 [inline]
   sctp_make_datafrag_empty+0x208/0x360 net/sctp/sm_make_chunk.c:746
   sctp_datamsg_from_user+0x7e8/0x11d0 net/sctp/chunk.c:266
   sctp_sendmsg+0x2611/0x3970 net/sctp/socket.c:1962
   inet_sendmsg+0x164/0x5b0 net/ipv4/af_inet.c:761
   sock_sendmsg_nosec net/socket.c:633 [inline]
   sock_sendmsg+0xca/0x110 net/socket.c:643
   SYSC_sendto+0x660/0x810 net/socket.c:1685
   SyS_sendto+0x40/0x50 net/socket.c:1653

I am not sure about backporting.  The bug is quite hard to trigger, I've
seen it few times during our massive continuous testing (however, it
could be cause of some other episodic stray crashes as it leads to
memory corruption...).  If it is triggered, the consequences are very
bad -- almost definite bad memory corruption.  The fix is non trivial
and has chances of introducing new bugs.  I am also not sure how
actively people use KASAN on older releases.

[dvyukov@google.com: - sorted includes[
  Link: http://lkml.kernel.org/r/20170309094028.51088-1-dvyukov@google.com
Link: http://lkml.kernel.org/r/20170308151532.5070-1-dvyukov@google.com
Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
Acked-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Greg Thelen <gthelen@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/kasan/quarantine.c | 42 ++++++++++++++++++++++++++++++++++++------
 1 file changed, 36 insertions(+), 6 deletions(-)

diff --git a/mm/kasan/quarantine.c b/mm/kasan/quarantine.c
index 4ac39f20757a8b..3a8ddf8baf7dc3 100644
--- a/mm/kasan/quarantine.c
+++ b/mm/kasan/quarantine.c
@@ -25,6 +25,7 @@
 #include <linux/printk.h>
 #include <linux/shrinker.h>
 #include <linux/slab.h>
+#include <linux/srcu.h>
 #include <linux/string.h>
 #include <linux/types.h>
 
@@ -103,6 +104,7 @@ static int quarantine_tail;
 /* Total size of all objects in global_quarantine across all batches. */
 static unsigned long quarantine_size;
 static DEFINE_SPINLOCK(quarantine_lock);
+DEFINE_STATIC_SRCU(remove_cache_srcu);
 
 /* Maximum size of the global queue. */
 static unsigned long quarantine_max_size;
@@ -173,17 +175,22 @@ void quarantine_put(struct kasan_free_meta *info, struct kmem_cache *cache)
 	struct qlist_head *q;
 	struct qlist_head temp = QLIST_INIT;
 
+	/*
+	 * Note: irq must be disabled until after we move the batch to the
+	 * global quarantine. Otherwise quarantine_remove_cache() can miss
+	 * some objects belonging to the cache if they are in our local temp
+	 * list. quarantine_remove_cache() executes on_each_cpu() at the
+	 * beginning which ensures that it either sees the objects in per-cpu
+	 * lists or in the global quarantine.
+	 */
 	local_irq_save(flags);
 
 	q = this_cpu_ptr(&cpu_quarantine);
 	qlist_put(q, &info->quarantine_link, cache->size);
-	if (unlikely(q->bytes > QUARANTINE_PERCPU_SIZE))
+	if (unlikely(q->bytes > QUARANTINE_PERCPU_SIZE)) {
 		qlist_move_all(q, &temp);
 
-	local_irq_restore(flags);
-
-	if (unlikely(!qlist_empty(&temp))) {
-		spin_lock_irqsave(&quarantine_lock, flags);
+		spin_lock(&quarantine_lock);
 		WRITE_ONCE(quarantine_size, quarantine_size + temp.bytes);
 		qlist_move_all(&temp, &global_quarantine[quarantine_tail]);
 		if (global_quarantine[quarantine_tail].bytes >=
@@ -196,20 +203,33 @@ void quarantine_put(struct kasan_free_meta *info, struct kmem_cache *cache)
 			if (new_tail != quarantine_head)
 				quarantine_tail = new_tail;
 		}
-		spin_unlock_irqrestore(&quarantine_lock, flags);
+		spin_unlock(&quarantine_lock);
 	}
+
+	local_irq_restore(flags);
 }
 
 void quarantine_reduce(void)
 {
 	size_t total_size, new_quarantine_size, percpu_quarantines;
 	unsigned long flags;
+	int srcu_idx;
 	struct qlist_head to_free = QLIST_INIT;
 
 	if (likely(READ_ONCE(quarantine_size) <=
 		   READ_ONCE(quarantine_max_size)))
 		return;
 
+	/*
+	 * srcu critical section ensures that quarantine_remove_cache()
+	 * will not miss objects belonging to the cache while they are in our
+	 * local to_free list. srcu is chosen because (1) it gives us private
+	 * grace period domain that does not interfere with anything else,
+	 * and (2) it allows synchronize_srcu() to return without waiting
+	 * if there are no pending read critical sections (which is the
+	 * expected case).
+	 */
+	srcu_idx = srcu_read_lock(&remove_cache_srcu);
 	spin_lock_irqsave(&quarantine_lock, flags);
 
 	/*
@@ -237,6 +257,7 @@ void quarantine_reduce(void)
 	spin_unlock_irqrestore(&quarantine_lock, flags);
 
 	qlist_free_all(&to_free, NULL);
+	srcu_read_unlock(&remove_cache_srcu, srcu_idx);
 }
 
 static void qlist_move_cache(struct qlist_head *from,
@@ -280,6 +301,13 @@ void quarantine_remove_cache(struct kmem_cache *cache)
 	unsigned long flags, i;
 	struct qlist_head to_free = QLIST_INIT;
 
+	/*
+	 * Must be careful to not miss any objects that are being moved from
+	 * per-cpu list to the global quarantine in quarantine_put(),
+	 * nor objects being freed in quarantine_reduce(). on_each_cpu()
+	 * achieves the first goal, while synchronize_srcu() achieves the
+	 * second.
+	 */
 	on_each_cpu(per_cpu_remove_cache, cache, 1);
 
 	spin_lock_irqsave(&quarantine_lock, flags);
@@ -295,4 +323,6 @@ void quarantine_remove_cache(struct kmem_cache *cache)
 	spin_unlock_irqrestore(&quarantine_lock, flags);
 
 	qlist_free_all(&to_free, cache);
+
+	synchronize_srcu(&remove_cache_srcu);
 }

From ca5b58ea3db88b5e69ba2a1f6bc3cf239cdcc64f Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Date: Thu, 9 Mar 2017 16:17:34 -0800
Subject: [PATCH 1093/1911] sh: cayman: IDE support fix

Remove incorrect CONFIG_IDE ifdef (CONFIG_IDE config option is for
internal drivers/ide/ use) and make IDE hardware interface always
initialized (not only when IDE subsystem is built-in).

This patch allows Cayman board to work with modular IDE subsystem
support and removes the requirement of having the whole core IDE
subsystem built-in when using libata PATA support.

Link: http://lkml.kernel.org/r/1990884.yFoE6lSB9G@amdc3058
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Cc: Rich Felker <dalias@libc.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/sh/boards/mach-cayman/setup.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/sh/boards/mach-cayman/setup.c b/arch/sh/boards/mach-cayman/setup.c
index 340fd40b381dc3..9c292c27e0d711 100644
--- a/arch/sh/boards/mach-cayman/setup.c
+++ b/arch/sh/boards/mach-cayman/setup.c
@@ -128,7 +128,6 @@ static int __init smsc_superio_setup(void)
 	SMSC_SUPERIO_WRITE_INDEXED(1, SMSC_PRIMARY_INT_INDEX);
 	SMSC_SUPERIO_WRITE_INDEXED(12, SMSC_SECONDARY_INT_INDEX);
 
-#ifdef CONFIG_IDE
 	/*
 	 * Only IDE1 exists on the Cayman
 	 */
@@ -158,7 +157,6 @@ static int __init smsc_superio_setup(void)
 	SMSC_SUPERIO_WRITE_INDEXED(0x01, 0xc5); /* GP45 = IDE1_IRQ */
 	SMSC_SUPERIO_WRITE_INDEXED(0x00, 0xc6); /* GP46 = nIOROP */
 	SMSC_SUPERIO_WRITE_INDEXED(0x00, 0xc7); /* GP47 = nIOWOP */
-#endif
 
 	/* Exit the configuration state */
 	outb(SMSC_EXIT_CONFIG_KEY, SMSC_CONFIG_PORT_ADDR);

From c0d0e351285161a515396b7b1ee53ec9ffd97e3c Mon Sep 17 00:00:00 2001
From: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Date: Thu, 9 Mar 2017 16:17:37 -0800
Subject: [PATCH 1094/1911] fat: fix using uninitialized fields of
 fat_inode/fsinfo_inode

Recently fallocate patch was merged and it uses
MSDOS_I(inode)->mmu_private at fat_evict_inode().  However,
fat_inode/fsinfo_inode that was introduced in past didn't initialize
MSDOS_I(inode) properly.

With those combinations, it became the cause of accessing random entry
in FAT area.

Link: http://lkml.kernel.org/r/87pohrj4i8.fsf@mail.parknet.co.jp
Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Reported-by: Moreno Bartalucci <moreno.bartalucci@tecnorama.it>
Tested-by: Moreno Bartalucci <moreno.bartalucci@tecnorama.it>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/fat/inode.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 338d2f73eb29c8..a2c05f2ada6dd8 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -1359,6 +1359,16 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat,
 	return 0;
 }
 
+static void fat_dummy_inode_init(struct inode *inode)
+{
+	/* Initialize this dummy inode to work as no-op. */
+	MSDOS_I(inode)->mmu_private = 0;
+	MSDOS_I(inode)->i_start = 0;
+	MSDOS_I(inode)->i_logstart = 0;
+	MSDOS_I(inode)->i_attrs = 0;
+	MSDOS_I(inode)->i_pos = 0;
+}
+
 static int fat_read_root(struct inode *inode)
 {
 	struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
@@ -1803,12 +1813,13 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
 	fat_inode = new_inode(sb);
 	if (!fat_inode)
 		goto out_fail;
-	MSDOS_I(fat_inode)->i_pos = 0;
+	fat_dummy_inode_init(fat_inode);
 	sbi->fat_inode = fat_inode;
 
 	fsinfo_inode = new_inode(sb);
 	if (!fsinfo_inode)
 		goto out_fail;
+	fat_dummy_inode_init(fsinfo_inode);
 	fsinfo_inode->i_ino = MSDOS_FSINFO_INO;
 	sbi->fsinfo_inode = fsinfo_inode;
 	insert_inode_hash(fsinfo_inode);

From 2378cd6181edd94748d699448823609977283b11 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Thu, 9 Mar 2017 16:17:40 -0800
Subject: [PATCH 1095/1911] userfaultfd: remove wrong comment from
 userfaultfd_ctx_get()

It's a void function, so there is no return value;

Link: http://lkml.kernel.org/r/20170309150817.7510-1-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/userfaultfd.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 9fd5e51ffb316d..2bb1c72380f29d 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -138,8 +138,6 @@ static int userfaultfd_wake_function(wait_queue_t *wq, unsigned mode,
  * userfaultfd_ctx_get - Acquires a reference to the internal userfaultfd
  * context.
  * @ctx: [in] Pointer to the userfaultfd context.
- *
- * Returns: In case of success, returns not zero.
  */
 static void userfaultfd_ctx_get(struct userfaultfd_ctx *ctx)
 {

From 10f2889ba35aeb251b9945ec4c461af8c124c41f Mon Sep 17 00:00:00 2001
From: Stefan Agner <stefan@agner.ch>
Date: Wed, 14 Dec 2016 17:28:41 -0800
Subject: [PATCH 1096/1911] drm: mxsfb: use bus_format to determine LCD bus
 width

The LCD bus width does not need to align with the pixel format. The
LCDIF controller automatically converts between pixel formats and
bus width by padding or dropping LSBs.

The DRM subsystem has the notion of bus_format which allows to
determine what bus_formats are supported by the display. Choose the
first available or fallback to 24 bit if none are available.

Signed-off-by: Stefan Agner <stefan@agner.ch>
Acked-by: Marek Vasut <marex@denx.de>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/mxsfb/mxsfb_crtc.c | 34 ++++++++++++++++++++++++++++--
 drivers/gpu/drm/mxsfb/mxsfb_regs.h |  1 +
 2 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/mxsfb/mxsfb_crtc.c b/drivers/gpu/drm/mxsfb/mxsfb_crtc.c
index e10a4eda4078ba..259f71b3a76ac5 100644
--- a/drivers/gpu/drm/mxsfb/mxsfb_crtc.c
+++ b/drivers/gpu/drm/mxsfb/mxsfb_crtc.c
@@ -65,13 +65,11 @@ static int mxsfb_set_pixel_fmt(struct mxsfb_drm_private *mxsfb)
 	switch (format) {
 	case DRM_FORMAT_RGB565:
 		dev_dbg(drm->dev, "Setting up RGB565 mode\n");
-		ctrl |= CTRL_SET_BUS_WIDTH(STMLCDIF_16BIT);
 		ctrl |= CTRL_SET_WORD_LENGTH(0);
 		ctrl1 |= CTRL1_SET_BYTE_PACKAGING(0xf);
 		break;
 	case DRM_FORMAT_XRGB8888:
 		dev_dbg(drm->dev, "Setting up XRGB8888 mode\n");
-		ctrl |= CTRL_SET_BUS_WIDTH(STMLCDIF_24BIT);
 		ctrl |= CTRL_SET_WORD_LENGTH(3);
 		/* Do not use packed pixels = one pixel per word instead. */
 		ctrl1 |= CTRL1_SET_BYTE_PACKAGING(0x7);
@@ -87,6 +85,36 @@ static int mxsfb_set_pixel_fmt(struct mxsfb_drm_private *mxsfb)
 	return 0;
 }
 
+static void mxsfb_set_bus_fmt(struct mxsfb_drm_private *mxsfb)
+{
+	struct drm_crtc *crtc = &mxsfb->pipe.crtc;
+	struct drm_device *drm = crtc->dev;
+	u32 bus_format = MEDIA_BUS_FMT_RGB888_1X24;
+	u32 reg;
+
+	reg = readl(mxsfb->base + LCDC_CTRL);
+
+	if (mxsfb->connector.display_info.num_bus_formats)
+		bus_format = mxsfb->connector.display_info.bus_formats[0];
+
+	reg &= ~CTRL_BUS_WIDTH_MASK;
+	switch (bus_format) {
+	case MEDIA_BUS_FMT_RGB565_1X16:
+		reg |= CTRL_SET_BUS_WIDTH(STMLCDIF_16BIT);
+		break;
+	case MEDIA_BUS_FMT_RGB666_1X18:
+		reg |= CTRL_SET_BUS_WIDTH(STMLCDIF_18BIT);
+		break;
+	case MEDIA_BUS_FMT_RGB888_1X24:
+		reg |= CTRL_SET_BUS_WIDTH(STMLCDIF_24BIT);
+		break;
+	default:
+		dev_err(drm->dev, "Unknown media bus format %d\n", bus_format);
+		break;
+	}
+	writel(reg, mxsfb->base + LCDC_CTRL);
+}
+
 static void mxsfb_enable_controller(struct mxsfb_drm_private *mxsfb)
 {
 	u32 reg;
@@ -175,6 +203,8 @@ static void mxsfb_crtc_mode_set_nofb(struct mxsfb_drm_private *mxsfb)
 
 	writel(vdctrl0, mxsfb->base + LCDC_VDCTRL0);
 
+	mxsfb_set_bus_fmt(mxsfb);
+
 	/* Frame length in lines. */
 	writel(m->crtc_vtotal, mxsfb->base + LCDC_VDCTRL1);
 
diff --git a/drivers/gpu/drm/mxsfb/mxsfb_regs.h b/drivers/gpu/drm/mxsfb/mxsfb_regs.h
index 31d62cd0d3d78a..66a6ba9ec533ff 100644
--- a/drivers/gpu/drm/mxsfb/mxsfb_regs.h
+++ b/drivers/gpu/drm/mxsfb/mxsfb_regs.h
@@ -44,6 +44,7 @@
 #define CTRL_DATA_SELECT		(1 << 16)
 #define CTRL_SET_BUS_WIDTH(x)		(((x) & 0x3) << 10)
 #define CTRL_GET_BUS_WIDTH(x)		(((x) >> 10) & 0x3)
+#define CTRL_BUS_WIDTH_MASK		(0x3 << 10)
 #define CTRL_SET_WORD_LENGTH(x)		(((x) & 0x3) << 8)
 #define CTRL_GET_WORD_LENGTH(x)		(((x) >> 8) & 0x3)
 #define CTRL_MASTER			(1 << 5)

From 53990e416bb7adaa59d045f325a47f31a11b75ee Mon Sep 17 00:00:00 2001
From: Stefan Agner <stefan@agner.ch>
Date: Wed, 14 Dec 2016 12:48:09 -0800
Subject: [PATCH 1097/1911] drm: mxsfb: fix pixel clock polarity

The DRM subsystem specifies the pixel clock polarity from a
controllers perspective: DRM_BUS_FLAG_PIXDATA_NEGEDGE means
the controller drives the data on pixel clocks falling edge.
That is the controllers DOTCLK_POL=0 (Default is data launched
at negative edge).

Also change the data enable logic to be high active by default
and only change if explicitly requested via bus_flags. With
that defaults are:
- Data enable: high active
- Pixel clock polarity: controller drives data on negative edge

Signed-off-by: Stefan Agner <stefan@agner.ch>
Acked-by: Marek Vasut <marex@denx.de>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/mxsfb/mxsfb_crtc.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/mxsfb/mxsfb_crtc.c b/drivers/gpu/drm/mxsfb/mxsfb_crtc.c
index 259f71b3a76ac5..165abd22743637 100644
--- a/drivers/gpu/drm/mxsfb/mxsfb_crtc.c
+++ b/drivers/gpu/drm/mxsfb/mxsfb_crtc.c
@@ -196,9 +196,16 @@ static void mxsfb_crtc_mode_set_nofb(struct mxsfb_drm_private *mxsfb)
 		vdctrl0 |= VDCTRL0_HSYNC_ACT_HIGH;
 	if (m->flags & DRM_MODE_FLAG_PVSYNC)
 		vdctrl0 |= VDCTRL0_VSYNC_ACT_HIGH;
-	if (bus_flags & DRM_BUS_FLAG_DE_HIGH)
+	/* Make sure Data Enable is high active by default */
+	if (!(bus_flags & DRM_BUS_FLAG_DE_LOW))
 		vdctrl0 |= VDCTRL0_ENABLE_ACT_HIGH;
-	if (bus_flags & DRM_BUS_FLAG_PIXDATA_NEGEDGE)
+	/*
+	 * DRM_BUS_FLAG_PIXDATA_ defines are controller centric,
+	 * controllers VDCTRL0_DOTCLK is display centric.
+	 * Drive on positive edge       -> display samples on falling edge
+	 * DRM_BUS_FLAG_PIXDATA_POSEDGE -> VDCTRL0_DOTCLK_ACT_FALLING
+	 */
+	if (bus_flags & DRM_BUS_FLAG_PIXDATA_POSEDGE)
 		vdctrl0 |= VDCTRL0_DOTCLK_ACT_FALLING;
 
 	writel(vdctrl0, mxsfb->base + LCDC_VDCTRL0);

From 7ad7a5acfb96215216f46b9848bd2d341663358f Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Sat, 28 Jan 2017 18:01:57 +0100
Subject: [PATCH 1098/1911] drm: mxsfb: Fix crash when provided invalid DT
 bindings

The mxsfb driver will crash if the mxsfb DT node has a subnode,
but the content of the subnode is not of-graph binding with an
endpoint linking to panel. The crash was triggered by providing
old-style panel bindings to the mxsfb driver instead of the new
of-graph ones.

The problem happens in mxsfb_create_output(), which is invoked
from mxsfb_load(). The mxsfb_create_output() iterates over all
mxsfb DT subnode endpoints and tries to bind a panel on each
endpoint. If there is any problem binding the panel, that is,
mxsfb->panel == NULL, this function will return an error code,
otherwise success 0 is returned.

If the subnodes do not specify of-graph binding with an endpoint,
the iteration over endpoints in mxsfb_create_output() will have
zero cycles and the function will immediatelly return 0, but the
mxsfb->panel will remain NULL. This is propagated back into the
mxsfb_load(), which does not detect any problem and expects that
the mxsfb->panel is valid, thus calls mxsfb_panel_attach(). But
since mxsfb->panel == NULL, mxsfb_panel_attach() is called with
first argument NULL and this crashes the kernel.

This patch fixes the problem by explicitly checking for valid
mxsfb->panel at the end of the iteration in mxsfb_create_output().

Signed-off-by: Marek Vasut <marex@denx.de>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Dave Airlie <airlied@redhat.com>
Cc: Stefan Agner <stefan@agner.ch>
Cc: Breno Matheus Lima <brenomatheus@gmail.com>
Tested-by: Breno Lima <breno.lima@nxp.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/mxsfb/mxsfb_out.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/mxsfb/mxsfb_out.c b/drivers/gpu/drm/mxsfb/mxsfb_out.c
index fa8d173994071d..b8e81422d4e26f 100644
--- a/drivers/gpu/drm/mxsfb/mxsfb_out.c
+++ b/drivers/gpu/drm/mxsfb/mxsfb_out.c
@@ -112,6 +112,7 @@ static int mxsfb_attach_endpoint(struct drm_device *drm,
 
 int mxsfb_create_output(struct drm_device *drm)
 {
+	struct mxsfb_drm_private *mxsfb = drm->dev_private;
 	struct device_node *ep_np = NULL;
 	struct of_endpoint ep;
 	int ret;
@@ -127,5 +128,8 @@ int mxsfb_create_output(struct drm_device *drm)
 		}
 	}
 
+	if (!mxsfb->panel)
+		return -EPROBE_DEFER;
+
 	return 0;
 }

From d42986b6c600da4215973878f1c889cdabbdd122 Mon Sep 17 00:00:00 2001
From: Fabio Estevam <fabio.estevam@nxp.com>
Date: Thu, 2 Feb 2017 19:26:38 -0200
Subject: [PATCH 1099/1911] drm: mxsfb_crtc: Fix the framebuffer misplacement

Currently the framebuffer content is displayed with incorrect offsets
in both the vertical and horizontal directions.

The fbdev version of the driver does not show this problem. Breno Lima
dumped the eLCDIF controller registers on both the drm and fbdev drivers
and noticed that the VDCTRL3 register is configured incorrectly in the
drm driver.

The fbdev driver calculates the vertical and horizontal wait counts
of the VDCTRL3 register by doing: back porch + sync length.

Looking at the horizontal and vertical timing diagram from
include/drm/drm_modes.h this value corresponds to:

crtc_[hv]total - crtc_[hv]sync_start

So fix the VDCTRL3 register setting accordingly so that the eLCDIF
controller can properly show the framebuffer content in the correct
position.

Reported-by: Breno Lima <breno.lima@nxp.com>
Signed-off-by: Fabio Estevam <fabio.estevam@nxp.com>
Tested-by: Breno Lima <breno.lima@nxp.com>
Tested-by: Marek Vasut <marex@denx.de>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/mxsfb/mxsfb_crtc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/mxsfb/mxsfb_crtc.c b/drivers/gpu/drm/mxsfb/mxsfb_crtc.c
index 165abd22743637..1144e0c9e8942d 100644
--- a/drivers/gpu/drm/mxsfb/mxsfb_crtc.c
+++ b/drivers/gpu/drm/mxsfb/mxsfb_crtc.c
@@ -221,8 +221,8 @@ static void mxsfb_crtc_mode_set_nofb(struct mxsfb_drm_private *mxsfb)
 	       VDCTRL2_SET_HSYNC_PERIOD(m->crtc_htotal),
 	       mxsfb->base + LCDC_VDCTRL2);
 
-	writel(SET_HOR_WAIT_CNT(m->crtc_hblank_end - m->crtc_hsync_end) |
-	       SET_VERT_WAIT_CNT(m->crtc_vblank_end - m->crtc_vsync_end),
+	writel(SET_HOR_WAIT_CNT(m->crtc_htotal - m->crtc_hsync_start) |
+	       SET_VERT_WAIT_CNT(m->crtc_vtotal - m->crtc_vsync_start),
 	       mxsfb->base + LCDC_VDCTRL3);
 
 	writel(SET_DOTCLK_H_VALID_DATA_CNT(m->hdisplay),

From 3f81e1340706e9a7f854808e2f580c3106805d0c Mon Sep 17 00:00:00 2001
From: Fabio Estevam <fabio.estevam@nxp.com>
Date: Wed, 1 Feb 2017 15:19:47 -0200
Subject: [PATCH 1100/1911] drm: mxsfb: Implement drm_panel handling

Currently when the 'power-supply' regulator is passed via device tree
it does not actually work since drm_panel_prepare()/drm_panel_enable()
are never called.

Quoting Thierry Reding: "It should really call drm_panel_prepare() and
drm_panel_enable() while switching on the display pipeline and
drm_panel_disable(), followed by drm_panel_unprepare() while switching
off the display pipeline."

So do as suggested, so that the 'power-supply' regulator can be functional.

Reported-by: Breno Lima <breno.lima@nxp.com>
Suggested-by: Thierry Reding <thierry.reding@gmail.com>
Signed-off-by: Fabio Estevam <fabio.estevam@nxp.com>
Tested-by: Marek Vasut <marex@denx.de>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/mxsfb/mxsfb_drv.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/mxsfb/mxsfb_drv.c b/drivers/gpu/drm/mxsfb/mxsfb_drv.c
index cdfbe0284635de..ff6d6a6f842e5a 100644
--- a/drivers/gpu/drm/mxsfb/mxsfb_drv.c
+++ b/drivers/gpu/drm/mxsfb/mxsfb_drv.c
@@ -102,14 +102,18 @@ static void mxsfb_pipe_enable(struct drm_simple_display_pipe *pipe,
 {
 	struct mxsfb_drm_private *mxsfb = drm_pipe_to_mxsfb_drm_private(pipe);
 
+	drm_panel_prepare(mxsfb->panel);
 	mxsfb_crtc_enable(mxsfb);
+	drm_panel_enable(mxsfb->panel);
 }
 
 static void mxsfb_pipe_disable(struct drm_simple_display_pipe *pipe)
 {
 	struct mxsfb_drm_private *mxsfb = drm_pipe_to_mxsfb_drm_private(pipe);
 
+	drm_panel_disable(mxsfb->panel);
 	mxsfb_crtc_disable(mxsfb);
+	drm_panel_unprepare(mxsfb->panel);
 }
 
 static void mxsfb_pipe_update(struct drm_simple_display_pipe *pipe,

From 9f138fa609c47403374a862a08a41394be53d461 Mon Sep 17 00:00:00 2001
From: Alexander Potapenko <glider@google.com>
Date: Wed, 8 Mar 2017 18:08:16 +0100
Subject: [PATCH 1101/1911] net: initialize msg.msg_flags in recvfrom

KMSAN reports a use of uninitialized memory in put_cmsg() because
msg.msg_flags in recvfrom haven't been initialized properly.
The flag values don't affect the result on this path, but it's still a
good idea to initialize them explicitly.

Signed-off-by: Alexander Potapenko <glider@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/socket.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/socket.c b/net/socket.c
index 2c1e8677ff2d4f..e0757e648c0c45 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1731,6 +1731,7 @@ SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
 	/* We assume all kernel code knows the size of sockaddr_storage */
 	msg.msg_namelen = 0;
 	msg.msg_iocb = NULL;
+	msg.msg_flags = 0;
 	if (sock->file->f_flags & O_NONBLOCK)
 		flags |= MSG_DONTWAIT;
 	err = sock_recvmsg(sock, &msg, flags);

From 6cbac9828627b89487144c1e4448d7e0a6ab22ca Mon Sep 17 00:00:00 2001
From: LABBE Corentin <clabbe.montjoie@gmail.com>
Date: Wed, 8 Mar 2017 16:46:57 +0100
Subject: [PATCH 1102/1911] tun: remove copyright printing

Printing copyright does not give any useful information on the boot
process.
Furthermore, the email address printed is obsolete since
commit ba57b6f20429 ("MAINTAINERS: fix bouncing tun/tap entries")

Signed-off-by: Corentin Labbe <clabbe.montjoie@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tun.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index dc1b1dd9157c16..f58b7d850114b0 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -2570,7 +2570,6 @@ static int __init tun_init(void)
 	int ret = 0;
 
 	pr_info("%s, %s\n", DRV_DESCRIPTION, DRV_VERSION);
-	pr_info("%s\n", DRV_COPYRIGHT);
 
 	ret = rtnl_link_register(&tun_link_ops);
 	if (ret) {

From 3c2217a675bac22afb149166e0de71809189850d Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Wed, 8 Mar 2017 18:44:32 -0500
Subject: [PATCH 1103/1911] bnxt_en: Perform function reset earlier during
 probe.

The firmware call to do function reset is done too late.  It is causing
the rings that have been reserved to be freed.  In NPAR mode, this bug
is causing us to run out of rings.

Fixes: 391be5c27364 ("bnxt_en: Implement new scheme to reserve tx rings.")
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 235733e91c791b..33293ac32779fe 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -7444,6 +7444,10 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (rc)
 		goto init_err_pci_clean;
 
+	rc = bnxt_hwrm_func_reset(bp);
+	if (rc)
+		goto init_err_pci_clean;
+
 	bnxt_hwrm_fw_set_time(bp);
 
 	dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_SG |
@@ -7554,10 +7558,6 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (rc)
 		goto init_err_pci_clean;
 
-	rc = bnxt_hwrm_func_reset(bp);
-	if (rc)
-		goto init_err_pci_clean;
-
 	rc = bnxt_init_int_mode(bp);
 	if (rc)
 		goto init_err_pci_clean;

From b386cd362ffea09d05c56bfa85d104562e860647 Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Wed, 8 Mar 2017 18:44:33 -0500
Subject: [PATCH 1104/1911] bnxt_en: Call bnxt_ulp_stop() during tx timeout.

If we call bnxt_reset_task() due to tx timeout, we should call
bnxt_ulp_stop() to inform the RDMA driver about the error and the
impending reset.

Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 33293ac32779fe..bbe93713e22672 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -6495,8 +6495,14 @@ static void bnxt_reset_task(struct bnxt *bp, bool silent)
 	if (!silent)
 		bnxt_dbg_dump_states(bp);
 	if (netif_running(bp->dev)) {
+		int rc;
+
+		if (!silent)
+			bnxt_ulp_stop(bp);
 		bnxt_close_nic(bp, false, false);
-		bnxt_open_nic(bp, false, false);
+		rc = bnxt_open_nic(bp, false, false);
+		if (!silent && !rc)
+			bnxt_ulp_start(bp);
 	}
 }
 

From bc39f885a9c3bdbff0a96ecaf07b162a78eff6e4 Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Wed, 8 Mar 2017 18:44:34 -0500
Subject: [PATCH 1105/1911] bnxt_en: Check if firmware LLDP agent is running.

Set DCB_CAP_DCBX_HOST capability flag only if the firmware LLDP agent
is not running.

Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c     | 4 ++++
 drivers/net/ethernet/broadcom/bnxt/bnxt.h     | 1 +
 drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c | 2 +-
 3 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index bbe93713e22672..869d4c97a9e847 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -4465,6 +4465,10 @@ static int bnxt_hwrm_func_qcfg(struct bnxt *bp)
 		vf->vlan = le16_to_cpu(resp->vlan) & VLAN_VID_MASK;
 	}
 #endif
+	if (BNXT_PF(bp) && (le16_to_cpu(resp->flags) &
+			    FUNC_QCFG_RESP_FLAGS_FW_DCBX_AGENT_ENABLED))
+		bp->flags |= BNXT_FLAG_FW_LLDP_AGENT;
+
 	switch (resp->port_partition_type) {
 	case FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR1_0:
 	case FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR1_5:
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index faf26a2f726b80..c7a5b84a5cb20e 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -993,6 +993,7 @@ struct bnxt {
 					 BNXT_FLAG_ROCEV2_CAP)
 	#define BNXT_FLAG_NO_AGG_RINGS	0x20000
 	#define BNXT_FLAG_RX_PAGE_MODE	0x40000
+	#define BNXT_FLAG_FW_LLDP_AGENT	0x80000
 	#define BNXT_FLAG_CHIP_NITRO_A0	0x1000000
 
 	#define BNXT_FLAG_ALL_CONFIG_FEATS (BNXT_FLAG_TPA |		\
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
index fdf2d8caf7bfaa..03532061d211b1 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
@@ -474,7 +474,7 @@ void bnxt_dcb_init(struct bnxt *bp)
 		return;
 
 	bp->dcbx_cap = DCB_CAP_DCBX_VER_IEEE;
-	if (BNXT_PF(bp))
+	if (BNXT_PF(bp) && !(bp->flags & BNXT_FLAG_FW_LLDP_AGENT))
 		bp->dcbx_cap |= DCB_CAP_DCBX_HOST;
 	else
 		bp->dcbx_cap |= DCB_CAP_DCBX_LLD_MANAGED;

From 520ad89a54edea84496695d528f73ddcf4a52ea4 Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Wed, 8 Mar 2017 18:44:35 -0500
Subject: [PATCH 1106/1911] bnxt_en: Ignore 0 value in autoneg supported speed
 from firmware.

In some situations, the firmware will return 0 for autoneg supported
speed.  This may happen if the firmware detects no SFP module, for
example.  The driver should ignore this so that we don't end up with
an invalid autoneg setting with nothing advertised.  When SFP module
is inserted, we'll get the updated settings from firmware at that time.

Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 869d4c97a9e847..32de4589d16a2c 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -5511,8 +5511,9 @@ static int bnxt_hwrm_phy_qcaps(struct bnxt *bp)
 		bp->lpi_tmr_hi = le32_to_cpu(resp->valid_tx_lpi_timer_high) &
 				 PORT_PHY_QCAPS_RESP_TX_LPI_TIMER_HIGH_MASK;
 	}
-	link_info->support_auto_speeds =
-		le16_to_cpu(resp->supported_speeds_auto_mode);
+	if (resp->supported_speeds_auto_mode)
+		link_info->support_auto_speeds =
+			le16_to_cpu(resp->supported_speeds_auto_mode);
 
 hwrm_phy_qcaps_exit:
 	mutex_unlock(&bp->hwrm_cmd_lock);

From cdfbabfb2f0ce983fdaa42f20e5f7842178fc01e Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 9 Mar 2017 08:09:05 +0000
Subject: [PATCH 1107/1911] net: Work around lockdep limitation in sockets that
 use sockets

Lockdep issues a circular dependency warning when AFS issues an operation
through AF_RXRPC from a context in which the VFS/VM holds the mmap_sem.

The theory lockdep comes up with is as follows:

 (1) If the pagefault handler decides it needs to read pages from AFS, it
     calls AFS with mmap_sem held and AFS begins an AF_RXRPC call, but
     creating a call requires the socket lock:

	mmap_sem must be taken before sk_lock-AF_RXRPC

 (2) afs_open_socket() opens an AF_RXRPC socket and binds it.  rxrpc_bind()
     binds the underlying UDP socket whilst holding its socket lock.
     inet_bind() takes its own socket lock:

	sk_lock-AF_RXRPC must be taken before sk_lock-AF_INET

 (3) Reading from a TCP socket into a userspace buffer might cause a fault
     and thus cause the kernel to take the mmap_sem, but the TCP socket is
     locked whilst doing this:

	sk_lock-AF_INET must be taken before mmap_sem

However, lockdep's theory is wrong in this instance because it deals only
with lock classes and not individual locks.  The AF_INET lock in (2) isn't
really equivalent to the AF_INET lock in (3) as the former deals with a
socket entirely internal to the kernel that never sees userspace.  This is
a limitation in the design of lockdep.

Fix the general case by:

 (1) Double up all the locking keys used in sockets so that one set are
     used if the socket is created by userspace and the other set is used
     if the socket is created by the kernel.

 (2) Store the kern parameter passed to sk_alloc() in a variable in the
     sock struct (sk_kern_sock).  This informs sock_lock_init(),
     sock_init_data() and sk_clone_lock() as to the lock keys to be used.

     Note that the child created by sk_clone_lock() inherits the parent's
     kern setting.

 (3) Add a 'kern' parameter to ->accept() that is analogous to the one
     passed in to ->create() that distinguishes whether kernel_accept() or
     sys_accept4() was the caller and can be passed to sk_alloc().

     Note that a lot of accept functions merely dequeue an already
     allocated socket.  I haven't touched these as the new socket already
     exists before we get the parameter.

     Note also that there are a couple of places where I've made the accepted
     socket unconditionally kernel-based:

	irda_accept()
	rds_rcp_accept_one()
	tcp_accept_from_sock()

     because they follow a sock_create_kern() and accept off of that.

Whilst creating this, I noticed that lustre and ocfs don't create sockets
through sock_create_kern() and thus they aren't marked as for-kernel,
though they appear to be internal.  I wonder if these should do that so
that they use the new set of lock keys.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 crypto/af_alg.c                               |   9 +-
 crypto/algif_hash.c                           |   9 +-
 drivers/staging/lustre/lnet/lnet/lib-socket.c |   4 +-
 fs/dlm/lowcomms.c                             |   2 +-
 fs/ocfs2/cluster/tcp.c                        |   2 +-
 include/crypto/if_alg.h                       |   2 +-
 include/linux/net.h                           |   2 +-
 include/net/inet_common.h                     |   3 +-
 include/net/inet_connection_sock.h            |   2 +-
 include/net/sctp/structs.h                    |   3 +-
 include/net/sock.h                            |   9 +-
 net/atm/svc.c                                 |   5 +-
 net/ax25/af_ax25.c                            |   3 +-
 net/bluetooth/l2cap_sock.c                    |   2 +-
 net/bluetooth/rfcomm/sock.c                   |   3 +-
 net/bluetooth/sco.c                           |   2 +-
 net/core/sock.c                               | 106 ++++++++++--------
 net/decnet/af_decnet.c                        |   5 +-
 net/ipv4/af_inet.c                            |   5 +-
 net/ipv4/inet_connection_sock.c               |   2 +-
 net/irda/af_irda.c                            |   5 +-
 net/iucv/af_iucv.c                            |   2 +-
 net/llc/af_llc.c                              |   4 +-
 net/netrom/af_netrom.c                        |   3 +-
 net/nfc/llcp_sock.c                           |   2 +-
 net/phonet/pep.c                              |   6 +-
 net/phonet/socket.c                           |   4 +-
 net/rds/tcp_listen.c                          |   2 +-
 net/rose/af_rose.c                            |   3 +-
 net/sctp/ipv6.c                               |   5 +-
 net/sctp/protocol.c                           |   5 +-
 net/sctp/socket.c                             |   4 +-
 net/smc/af_smc.c                              |   2 +-
 net/socket.c                                  |   4 +-
 net/tipc/socket.c                             |   8 +-
 net/unix/af_unix.c                            |   5 +-
 net/vmw_vsock/af_vsock.c                      |   3 +-
 net/x25/af_x25.c                              |   3 +-
 38 files changed, 142 insertions(+), 108 deletions(-)

diff --git a/crypto/af_alg.c b/crypto/af_alg.c
index f5e18c2a48527b..690deca17c3528 100644
--- a/crypto/af_alg.c
+++ b/crypto/af_alg.c
@@ -266,7 +266,7 @@ static int alg_setsockopt(struct socket *sock, int level, int optname,
 	return err;
 }
 
-int af_alg_accept(struct sock *sk, struct socket *newsock)
+int af_alg_accept(struct sock *sk, struct socket *newsock, bool kern)
 {
 	struct alg_sock *ask = alg_sk(sk);
 	const struct af_alg_type *type;
@@ -281,7 +281,7 @@ int af_alg_accept(struct sock *sk, struct socket *newsock)
 	if (!type)
 		goto unlock;
 
-	sk2 = sk_alloc(sock_net(sk), PF_ALG, GFP_KERNEL, &alg_proto, 0);
+	sk2 = sk_alloc(sock_net(sk), PF_ALG, GFP_KERNEL, &alg_proto, kern);
 	err = -ENOMEM;
 	if (!sk2)
 		goto unlock;
@@ -323,9 +323,10 @@ int af_alg_accept(struct sock *sk, struct socket *newsock)
 }
 EXPORT_SYMBOL_GPL(af_alg_accept);
 
-static int alg_accept(struct socket *sock, struct socket *newsock, int flags)
+static int alg_accept(struct socket *sock, struct socket *newsock, int flags,
+		      bool kern)
 {
-	return af_alg_accept(sock->sk, newsock);
+	return af_alg_accept(sock->sk, newsock, kern);
 }
 
 static const struct proto_ops alg_proto_ops = {
diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c
index 54fc90e8339ce8..5e92bd275ef38e 100644
--- a/crypto/algif_hash.c
+++ b/crypto/algif_hash.c
@@ -239,7 +239,8 @@ static int hash_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 	return err ?: len;
 }
 
-static int hash_accept(struct socket *sock, struct socket *newsock, int flags)
+static int hash_accept(struct socket *sock, struct socket *newsock, int flags,
+		       bool kern)
 {
 	struct sock *sk = sock->sk;
 	struct alg_sock *ask = alg_sk(sk);
@@ -260,7 +261,7 @@ static int hash_accept(struct socket *sock, struct socket *newsock, int flags)
 	if (err)
 		return err;
 
-	err = af_alg_accept(ask->parent, newsock);
+	err = af_alg_accept(ask->parent, newsock, kern);
 	if (err)
 		return err;
 
@@ -378,7 +379,7 @@ static int hash_recvmsg_nokey(struct socket *sock, struct msghdr *msg,
 }
 
 static int hash_accept_nokey(struct socket *sock, struct socket *newsock,
-			     int flags)
+			     int flags, bool kern)
 {
 	int err;
 
@@ -386,7 +387,7 @@ static int hash_accept_nokey(struct socket *sock, struct socket *newsock,
 	if (err)
 		return err;
 
-	return hash_accept(sock, newsock, flags);
+	return hash_accept(sock, newsock, flags, kern);
 }
 
 static struct proto_ops algif_hash_ops_nokey = {
diff --git a/drivers/staging/lustre/lnet/lnet/lib-socket.c b/drivers/staging/lustre/lnet/lnet/lib-socket.c
index b7b87ecefcdfc7..9fca8d225ee092 100644
--- a/drivers/staging/lustre/lnet/lnet/lib-socket.c
+++ b/drivers/staging/lustre/lnet/lnet/lib-socket.c
@@ -532,7 +532,7 @@ lnet_sock_accept(struct socket **newsockp, struct socket *sock)
 
 	newsock->ops = sock->ops;
 
-	rc = sock->ops->accept(sock, newsock, O_NONBLOCK);
+	rc = sock->ops->accept(sock, newsock, O_NONBLOCK, false);
 	if (rc == -EAGAIN) {
 		/* Nothing ready, so wait for activity */
 		init_waitqueue_entry(&wait, current);
@@ -540,7 +540,7 @@ lnet_sock_accept(struct socket **newsockp, struct socket *sock)
 		set_current_state(TASK_INTERRUPTIBLE);
 		schedule();
 		remove_wait_queue(sk_sleep(sock->sk), &wait);
-		rc = sock->ops->accept(sock, newsock, O_NONBLOCK);
+		rc = sock->ops->accept(sock, newsock, O_NONBLOCK, false);
 	}
 
 	if (rc)
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 7d398d300e972c..9382db998ec954 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -743,7 +743,7 @@ static int tcp_accept_from_sock(struct connection *con)
 	newsock->type = con->sock->type;
 	newsock->ops = con->sock->ops;
 
-	result = con->sock->ops->accept(con->sock, newsock, O_NONBLOCK);
+	result = con->sock->ops->accept(con->sock, newsock, O_NONBLOCK, true);
 	if (result < 0)
 		goto accept_err;
 
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 4348027384f5ed..d0ab7e56d0b41a 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -1863,7 +1863,7 @@ static int o2net_accept_one(struct socket *sock, int *more)
 
 	new_sock->type = sock->type;
 	new_sock->ops = sock->ops;
-	ret = sock->ops->accept(sock, new_sock, O_NONBLOCK);
+	ret = sock->ops->accept(sock, new_sock, O_NONBLOCK, false);
 	if (ret < 0)
 		goto out;
 
diff --git a/include/crypto/if_alg.h b/include/crypto/if_alg.h
index a2bfd7843f18f6..e2b9c6fe271496 100644
--- a/include/crypto/if_alg.h
+++ b/include/crypto/if_alg.h
@@ -73,7 +73,7 @@ int af_alg_unregister_type(const struct af_alg_type *type);
 
 int af_alg_release(struct socket *sock);
 void af_alg_release_parent(struct sock *sk);
-int af_alg_accept(struct sock *sk, struct socket *newsock);
+int af_alg_accept(struct sock *sk, struct socket *newsock, bool kern);
 
 int af_alg_make_sg(struct af_alg_sgl *sgl, struct iov_iter *iter, int len);
 void af_alg_free_sg(struct af_alg_sgl *sgl);
diff --git a/include/linux/net.h b/include/linux/net.h
index cd0c8bd0a1dec0..0620f5e18c96b7 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -146,7 +146,7 @@ struct proto_ops {
 	int		(*socketpair)(struct socket *sock1,
 				      struct socket *sock2);
 	int		(*accept)    (struct socket *sock,
-				      struct socket *newsock, int flags);
+				      struct socket *newsock, int flags, bool kern);
 	int		(*getname)   (struct socket *sock,
 				      struct sockaddr *addr,
 				      int *sockaddr_len, int peer);
diff --git a/include/net/inet_common.h b/include/net/inet_common.h
index b7952d55b9c000..f39ae697347f65 100644
--- a/include/net/inet_common.h
+++ b/include/net/inet_common.h
@@ -20,7 +20,8 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
 			  int addr_len, int flags, int is_sendmsg);
 int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr,
 		       int addr_len, int flags);
-int inet_accept(struct socket *sock, struct socket *newsock, int flags);
+int inet_accept(struct socket *sock, struct socket *newsock, int flags,
+		bool kern);
 int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size);
 ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
 		      size_t size, int flags);
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 826f198374f809..c7a577976bec08 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -258,7 +258,7 @@ inet_csk_rto_backoff(const struct inet_connection_sock *icsk,
         return (unsigned long)min_t(u64, when, max_when);
 }
 
-struct sock *inet_csk_accept(struct sock *sk, int flags, int *err);
+struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern);
 
 int inet_csk_get_port(struct sock *sk, unsigned short snum);
 
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index a244db5e5ff7fa..07a0b128625a4e 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -476,7 +476,8 @@ struct sctp_pf {
 	int  (*send_verify) (struct sctp_sock *, union sctp_addr *);
 	int  (*supported_addrs)(const struct sctp_sock *, __be16 *);
 	struct sock *(*create_accept_sk) (struct sock *sk,
-					  struct sctp_association *asoc);
+					  struct sctp_association *asoc,
+					  bool kern);
 	int (*addr_to_user)(struct sctp_sock *sk, union sctp_addr *addr);
 	void (*to_sk_saddr)(union sctp_addr *, struct sock *sk);
 	void (*to_sk_daddr)(union sctp_addr *, struct sock *sk);
diff --git a/include/net/sock.h b/include/net/sock.h
index 5e5997654db645..03252d53975de7 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -236,6 +236,7 @@ struct sock_common {
   *	@sk_shutdown: mask of %SEND_SHUTDOWN and/or %RCV_SHUTDOWN
   *	@sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings
   *	@sk_lock:	synchronizer
+  *	@sk_kern_sock: True if sock is using kernel lock classes
   *	@sk_rcvbuf: size of receive buffer in bytes
   *	@sk_wq: sock wait queue and async head
   *	@sk_rx_dst: receive input route used by early demux
@@ -430,7 +431,8 @@ struct sock {
 #endif
 
 	kmemcheck_bitfield_begin(flags);
-	unsigned int		sk_padding : 2,
+	unsigned int		sk_padding : 1,
+				sk_kern_sock : 1,
 				sk_no_check_tx : 1,
 				sk_no_check_rx : 1,
 				sk_userlocks : 4,
@@ -1015,7 +1017,8 @@ struct proto {
 					int addr_len);
 	int			(*disconnect)(struct sock *sk, int flags);
 
-	struct sock *		(*accept)(struct sock *sk, int flags, int *err);
+	struct sock *		(*accept)(struct sock *sk, int flags, int *err,
+					  bool kern);
 
 	int			(*ioctl)(struct sock *sk, int cmd,
 					 unsigned long arg);
@@ -1573,7 +1576,7 @@ int sock_cmsg_send(struct sock *sk, struct msghdr *msg,
 int sock_no_bind(struct socket *, struct sockaddr *, int);
 int sock_no_connect(struct socket *, struct sockaddr *, int, int);
 int sock_no_socketpair(struct socket *, struct socket *);
-int sock_no_accept(struct socket *, struct socket *, int);
+int sock_no_accept(struct socket *, struct socket *, int, bool);
 int sock_no_getname(struct socket *, struct sockaddr *, int *, int);
 unsigned int sock_no_poll(struct file *, struct socket *,
 			  struct poll_table_struct *);
diff --git a/net/atm/svc.c b/net/atm/svc.c
index db9794ec61d88e..5589de7086af4e 100644
--- a/net/atm/svc.c
+++ b/net/atm/svc.c
@@ -318,7 +318,8 @@ static int svc_listen(struct socket *sock, int backlog)
 	return error;
 }
 
-static int svc_accept(struct socket *sock, struct socket *newsock, int flags)
+static int svc_accept(struct socket *sock, struct socket *newsock, int flags,
+		      bool kern)
 {
 	struct sock *sk = sock->sk;
 	struct sk_buff *skb;
@@ -329,7 +330,7 @@ static int svc_accept(struct socket *sock, struct socket *newsock, int flags)
 
 	lock_sock(sk);
 
-	error = svc_create(sock_net(sk), newsock, 0, 0);
+	error = svc_create(sock_net(sk), newsock, 0, kern);
 	if (error)
 		goto out;
 
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index a8e42cedf1dbc7..b7c486752b3acf 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1320,7 +1320,8 @@ static int __must_check ax25_connect(struct socket *sock,
 	return err;
 }
 
-static int ax25_accept(struct socket *sock, struct socket *newsock, int flags)
+static int ax25_accept(struct socket *sock, struct socket *newsock, int flags,
+		       bool kern)
 {
 	struct sk_buff *skb;
 	struct sock *newsk;
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index f307b145ea5405..507b80d59dec4f 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -301,7 +301,7 @@ static int l2cap_sock_listen(struct socket *sock, int backlog)
 }
 
 static int l2cap_sock_accept(struct socket *sock, struct socket *newsock,
-			     int flags)
+			     int flags, bool kern)
 {
 	DEFINE_WAIT_FUNC(wait, woken_wake_function);
 	struct sock *sk = sock->sk, *nsk;
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index aa1a814ceddca7..ac3c650cb234f9 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -471,7 +471,8 @@ static int rfcomm_sock_listen(struct socket *sock, int backlog)
 	return err;
 }
 
-static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int flags)
+static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int flags,
+			      bool kern)
 {
 	DEFINE_WAIT_FUNC(wait, woken_wake_function);
 	struct sock *sk = sock->sk, *nsk;
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index e4e9a2da1e7e7a..728e0c8dc8e74c 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -627,7 +627,7 @@ static int sco_sock_listen(struct socket *sock, int backlog)
 }
 
 static int sco_sock_accept(struct socket *sock, struct socket *newsock,
-			   int flags)
+			   int flags, bool kern)
 {
 	DEFINE_WAIT_FUNC(wait, woken_wake_function);
 	struct sock *sk = sock->sk, *ch;
diff --git a/net/core/sock.c b/net/core/sock.c
index f6fd79f33097f3..a96d5f7a5734a5 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -197,66 +197,55 @@ EXPORT_SYMBOL(sk_net_capable);
 
 /*
  * Each address family might have different locking rules, so we have
- * one slock key per address family:
+ * one slock key per address family and separate keys for internal and
+ * userspace sockets.
  */
 static struct lock_class_key af_family_keys[AF_MAX];
+static struct lock_class_key af_family_kern_keys[AF_MAX];
 static struct lock_class_key af_family_slock_keys[AF_MAX];
+static struct lock_class_key af_family_kern_slock_keys[AF_MAX];
 
 /*
  * Make lock validator output more readable. (we pre-construct these
  * strings build-time, so that runtime initialization of socket
  * locks is fast):
  */
+
+#define _sock_locks(x)						  \
+  x "AF_UNSPEC",	x "AF_UNIX"     ,	x "AF_INET"     , \
+  x "AF_AX25"  ,	x "AF_IPX"      ,	x "AF_APPLETALK", \
+  x "AF_NETROM",	x "AF_BRIDGE"   ,	x "AF_ATMPVC"   , \
+  x "AF_X25"   ,	x "AF_INET6"    ,	x "AF_ROSE"     , \
+  x "AF_DECnet",	x "AF_NETBEUI"  ,	x "AF_SECURITY" , \
+  x "AF_KEY"   ,	x "AF_NETLINK"  ,	x "AF_PACKET"   , \
+  x "AF_ASH"   ,	x "AF_ECONET"   ,	x "AF_ATMSVC"   , \
+  x "AF_RDS"   ,	x "AF_SNA"      ,	x "AF_IRDA"     , \
+  x "AF_PPPOX" ,	x "AF_WANPIPE"  ,	x "AF_LLC"      , \
+  x "27"       ,	x "28"          ,	x "AF_CAN"      , \
+  x "AF_TIPC"  ,	x "AF_BLUETOOTH",	x "IUCV"        , \
+  x "AF_RXRPC" ,	x "AF_ISDN"     ,	x "AF_PHONET"   , \
+  x "AF_IEEE802154",	x "AF_CAIF"	,	x "AF_ALG"      , \
+  x "AF_NFC"   ,	x "AF_VSOCK"    ,	x "AF_KCM"      , \
+  x "AF_QIPCRTR",	x "AF_SMC"	,	x "AF_MAX"
+
 static const char *const af_family_key_strings[AF_MAX+1] = {
-  "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX"     , "sk_lock-AF_INET"     ,
-  "sk_lock-AF_AX25"  , "sk_lock-AF_IPX"      , "sk_lock-AF_APPLETALK",
-  "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE"   , "sk_lock-AF_ATMPVC"   ,
-  "sk_lock-AF_X25"   , "sk_lock-AF_INET6"    , "sk_lock-AF_ROSE"     ,
-  "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI"  , "sk_lock-AF_SECURITY" ,
-  "sk_lock-AF_KEY"   , "sk_lock-AF_NETLINK"  , "sk_lock-AF_PACKET"   ,
-  "sk_lock-AF_ASH"   , "sk_lock-AF_ECONET"   , "sk_lock-AF_ATMSVC"   ,
-  "sk_lock-AF_RDS"   , "sk_lock-AF_SNA"      , "sk_lock-AF_IRDA"     ,
-  "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE"  , "sk_lock-AF_LLC"      ,
-  "sk_lock-27"       , "sk_lock-28"          , "sk_lock-AF_CAN"      ,
-  "sk_lock-AF_TIPC"  , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV"        ,
-  "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN"     , "sk_lock-AF_PHONET"   ,
-  "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG"      ,
-  "sk_lock-AF_NFC"   , "sk_lock-AF_VSOCK"    , "sk_lock-AF_KCM"      ,
-  "sk_lock-AF_QIPCRTR", "sk_lock-AF_SMC"     , "sk_lock-AF_MAX"
+	_sock_locks("sk_lock-")
 };
 static const char *const af_family_slock_key_strings[AF_MAX+1] = {
-  "slock-AF_UNSPEC", "slock-AF_UNIX"     , "slock-AF_INET"     ,
-  "slock-AF_AX25"  , "slock-AF_IPX"      , "slock-AF_APPLETALK",
-  "slock-AF_NETROM", "slock-AF_BRIDGE"   , "slock-AF_ATMPVC"   ,
-  "slock-AF_X25"   , "slock-AF_INET6"    , "slock-AF_ROSE"     ,
-  "slock-AF_DECnet", "slock-AF_NETBEUI"  , "slock-AF_SECURITY" ,
-  "slock-AF_KEY"   , "slock-AF_NETLINK"  , "slock-AF_PACKET"   ,
-  "slock-AF_ASH"   , "slock-AF_ECONET"   , "slock-AF_ATMSVC"   ,
-  "slock-AF_RDS"   , "slock-AF_SNA"      , "slock-AF_IRDA"     ,
-  "slock-AF_PPPOX" , "slock-AF_WANPIPE"  , "slock-AF_LLC"      ,
-  "slock-27"       , "slock-28"          , "slock-AF_CAN"      ,
-  "slock-AF_TIPC"  , "slock-AF_BLUETOOTH", "slock-AF_IUCV"     ,
-  "slock-AF_RXRPC" , "slock-AF_ISDN"     , "slock-AF_PHONET"   ,
-  "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG"      ,
-  "slock-AF_NFC"   , "slock-AF_VSOCK"    ,"slock-AF_KCM"       ,
-  "slock-AF_QIPCRTR", "slock-AF_SMC"     , "slock-AF_MAX"
+	_sock_locks("slock-")
 };
 static const char *const af_family_clock_key_strings[AF_MAX+1] = {
-  "clock-AF_UNSPEC", "clock-AF_UNIX"     , "clock-AF_INET"     ,
-  "clock-AF_AX25"  , "clock-AF_IPX"      , "clock-AF_APPLETALK",
-  "clock-AF_NETROM", "clock-AF_BRIDGE"   , "clock-AF_ATMPVC"   ,
-  "clock-AF_X25"   , "clock-AF_INET6"    , "clock-AF_ROSE"     ,
-  "clock-AF_DECnet", "clock-AF_NETBEUI"  , "clock-AF_SECURITY" ,
-  "clock-AF_KEY"   , "clock-AF_NETLINK"  , "clock-AF_PACKET"   ,
-  "clock-AF_ASH"   , "clock-AF_ECONET"   , "clock-AF_ATMSVC"   ,
-  "clock-AF_RDS"   , "clock-AF_SNA"      , "clock-AF_IRDA"     ,
-  "clock-AF_PPPOX" , "clock-AF_WANPIPE"  , "clock-AF_LLC"      ,
-  "clock-27"       , "clock-28"          , "clock-AF_CAN"      ,
-  "clock-AF_TIPC"  , "clock-AF_BLUETOOTH", "clock-AF_IUCV"     ,
-  "clock-AF_RXRPC" , "clock-AF_ISDN"     , "clock-AF_PHONET"   ,
-  "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG"      ,
-  "clock-AF_NFC"   , "clock-AF_VSOCK"    , "clock-AF_KCM"      ,
-  "clock-AF_QIPCRTR", "clock-AF_SMC"     , "clock-AF_MAX"
+	_sock_locks("clock-")
+};
+
+static const char *const af_family_kern_key_strings[AF_MAX+1] = {
+	_sock_locks("k-sk_lock-")
+};
+static const char *const af_family_kern_slock_key_strings[AF_MAX+1] = {
+	_sock_locks("k-slock-")
+};
+static const char *const af_family_kern_clock_key_strings[AF_MAX+1] = {
+	_sock_locks("k-clock-")
 };
 
 /*
@@ -264,6 +253,7 @@ static const char *const af_family_clock_key_strings[AF_MAX+1] = {
  * so split the lock classes by using a per-AF key:
  */
 static struct lock_class_key af_callback_keys[AF_MAX];
+static struct lock_class_key af_kern_callback_keys[AF_MAX];
 
 /* Take into consideration the size of the struct sk_buff overhead in the
  * determination of these values, since that is non-constant across
@@ -1293,7 +1283,16 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
  */
 static inline void sock_lock_init(struct sock *sk)
 {
-	sock_lock_init_class_and_name(sk,
+	if (sk->sk_kern_sock)
+		sock_lock_init_class_and_name(
+			sk,
+			af_family_kern_slock_key_strings[sk->sk_family],
+			af_family_kern_slock_keys + sk->sk_family,
+			af_family_kern_key_strings[sk->sk_family],
+			af_family_kern_keys + sk->sk_family);
+	else
+		sock_lock_init_class_and_name(
+			sk,
 			af_family_slock_key_strings[sk->sk_family],
 			af_family_slock_keys + sk->sk_family,
 			af_family_key_strings[sk->sk_family],
@@ -1399,6 +1398,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
 		 * why we need sk_prot_creator -acme
 		 */
 		sk->sk_prot = sk->sk_prot_creator = prot;
+		sk->sk_kern_sock = kern;
 		sock_lock_init(sk);
 		sk->sk_net_refcnt = kern ? 0 : 1;
 		if (likely(sk->sk_net_refcnt))
@@ -2277,7 +2277,8 @@ int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
 }
 EXPORT_SYMBOL(sock_no_socketpair);
 
-int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
+int sock_no_accept(struct socket *sock, struct socket *newsock, int flags,
+		   bool kern)
 {
 	return -EOPNOTSUPP;
 }
@@ -2481,7 +2482,14 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 	}
 
 	rwlock_init(&sk->sk_callback_lock);
-	lockdep_set_class_and_name(&sk->sk_callback_lock,
+	if (sk->sk_kern_sock)
+		lockdep_set_class_and_name(
+			&sk->sk_callback_lock,
+			af_kern_callback_keys + sk->sk_family,
+			af_family_kern_clock_key_strings[sk->sk_family]);
+	else
+		lockdep_set_class_and_name(
+			&sk->sk_callback_lock,
 			af_callback_keys + sk->sk_family,
 			af_family_clock_key_strings[sk->sk_family]);
 
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index e6e79eda97636d..7de5b40a5d0d12 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -1070,7 +1070,8 @@ static struct sk_buff *dn_wait_for_connect(struct sock *sk, long *timeo)
 	return skb == NULL ? ERR_PTR(err) : skb;
 }
 
-static int dn_accept(struct socket *sock, struct socket *newsock, int flags)
+static int dn_accept(struct socket *sock, struct socket *newsock, int flags,
+		     bool kern)
 {
 	struct sock *sk = sock->sk, *newsk;
 	struct sk_buff *skb = NULL;
@@ -1099,7 +1100,7 @@ static int dn_accept(struct socket *sock, struct socket *newsock, int flags)
 
 	cb = DN_SKB_CB(skb);
 	sk->sk_ack_backlog--;
-	newsk = dn_alloc_sock(sock_net(sk), newsock, sk->sk_allocation, 0);
+	newsk = dn_alloc_sock(sock_net(sk), newsock, sk->sk_allocation, kern);
 	if (newsk == NULL) {
 		release_sock(sk);
 		kfree_skb(skb);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 5091f46826fadb..6b1fc6e4278ef4 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -689,11 +689,12 @@ EXPORT_SYMBOL(inet_stream_connect);
  *	Accept a pending connection. The TCP layer now gives BSD semantics.
  */
 
-int inet_accept(struct socket *sock, struct socket *newsock, int flags)
+int inet_accept(struct socket *sock, struct socket *newsock, int flags,
+		bool kern)
 {
 	struct sock *sk1 = sock->sk;
 	int err = -EINVAL;
-	struct sock *sk2 = sk1->sk_prot->accept(sk1, flags, &err);
+	struct sock *sk2 = sk1->sk_prot->accept(sk1, flags, &err, kern);
 
 	if (!sk2)
 		goto do_err;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index b4d5980ade3b58..5e313c1ac94fc8 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -424,7 +424,7 @@ static int inet_csk_wait_for_connect(struct sock *sk, long timeo)
 /*
  * This will accept the next outstanding connection.
  */
-struct sock *inet_csk_accept(struct sock *sk, int flags, int *err)
+struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct request_sock_queue *queue = &icsk->icsk_accept_queue;
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index 81adc29a448dc5..8d77ad5cadaff3 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -828,7 +828,8 @@ static int irda_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
  *    Wait for incoming connection
  *
  */
-static int irda_accept(struct socket *sock, struct socket *newsock, int flags)
+static int irda_accept(struct socket *sock, struct socket *newsock, int flags,
+		       bool kern)
 {
 	struct sock *sk = sock->sk;
 	struct irda_sock *new, *self = irda_sk(sk);
@@ -836,7 +837,7 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags)
 	struct sk_buff *skb = NULL;
 	int err;
 
-	err = irda_create(sock_net(sk), newsock, sk->sk_protocol, 0);
+	err = irda_create(sock_net(sk), newsock, sk->sk_protocol, kern);
 	if (err)
 		return err;
 
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 89bbde1081ce5e..84de7b6326dcdf 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -938,7 +938,7 @@ static int iucv_sock_listen(struct socket *sock, int backlog)
 
 /* Accept a pending connection */
 static int iucv_sock_accept(struct socket *sock, struct socket *newsock,
-			    int flags)
+			    int flags, bool kern)
 {
 	DECLARE_WAITQUEUE(wait, current);
 	struct sock *sk = sock->sk, *nsk;
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 06186d608a274e..cb4fff785cbf5a 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -641,11 +641,13 @@ static void llc_cmsg_rcv(struct msghdr *msg, struct sk_buff *skb)
  *	@sock: Socket which connections arrive on.
  *	@newsock: Socket to move incoming connection to.
  *	@flags: User specified operational flags.
+ *	@kern: If the socket is kernel internal
  *
  *	Accept a new incoming connection.
  *	Returns 0 upon success, negative otherwise.
  */
-static int llc_ui_accept(struct socket *sock, struct socket *newsock, int flags)
+static int llc_ui_accept(struct socket *sock, struct socket *newsock, int flags,
+			 bool kern)
 {
 	struct sock *sk = sock->sk, *newsk;
 	struct llc_sock *llc, *newllc;
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 4bbf4526b88566..ebf16f7f90892d 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -765,7 +765,8 @@ static int nr_connect(struct socket *sock, struct sockaddr *uaddr,
 	return err;
 }
 
-static int nr_accept(struct socket *sock, struct socket *newsock, int flags)
+static int nr_accept(struct socket *sock, struct socket *newsock, int flags,
+		     bool kern)
 {
 	struct sk_buff *skb;
 	struct sock *newsk;
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index 879885b31cce5f..2ffb18e73df6c0 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -441,7 +441,7 @@ struct sock *nfc_llcp_accept_dequeue(struct sock *parent,
 }
 
 static int llcp_sock_accept(struct socket *sock, struct socket *newsock,
-			    int flags)
+			    int flags, bool kern)
 {
 	DECLARE_WAITQUEUE(wait, current);
 	struct sock *sk = sock->sk, *new_sk;
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index 222bedcd95754c..e81537991ddf0d 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -772,7 +772,8 @@ static void pep_sock_close(struct sock *sk, long timeout)
 	sock_put(sk);
 }
 
-static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp)
+static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp,
+				    bool kern)
 {
 	struct pep_sock *pn = pep_sk(sk), *newpn;
 	struct sock *newsk = NULL;
@@ -846,7 +847,8 @@ static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp)
 	}
 
 	/* Create a new to-be-accepted sock */
-	newsk = sk_alloc(sock_net(sk), PF_PHONET, GFP_KERNEL, sk->sk_prot, 0);
+	newsk = sk_alloc(sock_net(sk), PF_PHONET, GFP_KERNEL, sk->sk_prot,
+			 kern);
 	if (!newsk) {
 		pep_reject_conn(sk, skb, PN_PIPE_ERR_OVERLOAD, GFP_KERNEL);
 		err = -ENOBUFS;
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index a6c8da3ee89349..64634e3ec2fc78 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -305,7 +305,7 @@ static int pn_socket_connect(struct socket *sock, struct sockaddr *addr,
 }
 
 static int pn_socket_accept(struct socket *sock, struct socket *newsock,
-				int flags)
+			    int flags, bool kern)
 {
 	struct sock *sk = sock->sk;
 	struct sock *newsk;
@@ -314,7 +314,7 @@ static int pn_socket_accept(struct socket *sock, struct socket *newsock,
 	if (unlikely(sk->sk_state != TCP_LISTEN))
 		return -EINVAL;
 
-	newsk = sk->sk_prot->accept(sk, flags, &err);
+	newsk = sk->sk_prot->accept(sk, flags, &err, kern);
 	if (!newsk)
 		return err;
 
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 2c69a508a693f0..507678853e6cb3 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -133,7 +133,7 @@ int rds_tcp_accept_one(struct socket *sock)
 
 	new_sock->type = sock->type;
 	new_sock->ops = sock->ops;
-	ret = sock->ops->accept(sock, new_sock, O_NONBLOCK);
+	ret = sock->ops->accept(sock, new_sock, O_NONBLOCK, true);
 	if (ret < 0)
 		goto out;
 
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index b8a1df2c978532..4a972925702367 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -871,7 +871,8 @@ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le
 	return err;
 }
 
-static int rose_accept(struct socket *sock, struct socket *newsock, int flags)
+static int rose_accept(struct socket *sock, struct socket *newsock, int flags,
+		       bool kern)
 {
 	struct sk_buff *skb;
 	struct sock *newsk;
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 063baac5b9fe40..961ee59f696a0b 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -640,14 +640,15 @@ static sctp_scope_t sctp_v6_scope(union sctp_addr *addr)
 
 /* Create and initialize a new sk for the socket to be returned by accept(). */
 static struct sock *sctp_v6_create_accept_sk(struct sock *sk,
-					     struct sctp_association *asoc)
+					     struct sctp_association *asoc,
+					     bool kern)
 {
 	struct sock *newsk;
 	struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
 	struct sctp6_sock *newsctp6sk;
 	struct ipv6_txoptions *opt;
 
-	newsk = sk_alloc(sock_net(sk), PF_INET6, GFP_KERNEL, sk->sk_prot, 0);
+	newsk = sk_alloc(sock_net(sk), PF_INET6, GFP_KERNEL, sk->sk_prot, kern);
 	if (!newsk)
 		goto out;
 
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 1b6d4574d2b02a..989a900383b57c 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -575,10 +575,11 @@ static int sctp_v4_is_ce(const struct sk_buff *skb)
 
 /* Create and initialize a new sk for the socket returned by accept(). */
 static struct sock *sctp_v4_create_accept_sk(struct sock *sk,
-					     struct sctp_association *asoc)
+					     struct sctp_association *asoc,
+					     bool kern)
 {
 	struct sock *newsk = sk_alloc(sock_net(sk), PF_INET, GFP_KERNEL,
-			sk->sk_prot, 0);
+			sk->sk_prot, kern);
 	struct inet_sock *newinet;
 
 	if (!newsk)
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 6f0a9be50f5055..0f378ea2ae3882 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4116,7 +4116,7 @@ static int sctp_disconnect(struct sock *sk, int flags)
  * descriptor will be returned from accept() to represent the newly
  * formed association.
  */
-static struct sock *sctp_accept(struct sock *sk, int flags, int *err)
+static struct sock *sctp_accept(struct sock *sk, int flags, int *err, bool kern)
 {
 	struct sctp_sock *sp;
 	struct sctp_endpoint *ep;
@@ -4151,7 +4151,7 @@ static struct sock *sctp_accept(struct sock *sk, int flags, int *err)
 	 */
 	asoc = list_entry(ep->asocs.next, struct sctp_association, asocs);
 
-	newsk = sp->pf->create_accept_sk(sk, asoc);
+	newsk = sp->pf->create_accept_sk(sk, asoc, kern);
 	if (!newsk) {
 		error = -ENOMEM;
 		goto out;
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 85837ab90e8916..093803786eacf3 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -944,7 +944,7 @@ static int smc_listen(struct socket *sock, int backlog)
 }
 
 static int smc_accept(struct socket *sock, struct socket *new_sock,
-		      int flags)
+		      int flags, bool kern)
 {
 	struct sock *sk = sock->sk, *nsk;
 	DECLARE_WAITQUEUE(wait, current);
diff --git a/net/socket.c b/net/socket.c
index e0757e648c0c45..e034fe4164beec 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1506,7 +1506,7 @@ SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
 	if (err)
 		goto out_fd;
 
-	err = sock->ops->accept(sock, newsock, sock->file->f_flags);
+	err = sock->ops->accept(sock, newsock, sock->file->f_flags, false);
 	if (err < 0)
 		goto out_fd;
 
@@ -3239,7 +3239,7 @@ int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
 	if (err < 0)
 		goto done;
 
-	err = sock->ops->accept(sock, *newsock, flags);
+	err = sock->ops->accept(sock, *newsock, flags, true);
 	if (err < 0) {
 		sock_release(*newsock);
 		*newsock = NULL;
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 43e4045e72bc00..7130e73bd42c21 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -115,7 +115,8 @@ static void tipc_data_ready(struct sock *sk);
 static void tipc_write_space(struct sock *sk);
 static void tipc_sock_destruct(struct sock *sk);
 static int tipc_release(struct socket *sock);
-static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags);
+static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags,
+		       bool kern);
 static void tipc_sk_timeout(unsigned long data);
 static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
 			   struct tipc_name_seq const *seq);
@@ -2029,7 +2030,8 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo)
  *
  * Returns 0 on success, errno otherwise
  */
-static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags)
+static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags,
+		       bool kern)
 {
 	struct sock *new_sk, *sk = sock->sk;
 	struct sk_buff *buf;
@@ -2051,7 +2053,7 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags)
 
 	buf = skb_peek(&sk->sk_receive_queue);
 
-	res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, 0);
+	res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, kern);
 	if (res)
 		goto exit;
 	security_sk_clone(sock->sk, new_sock->sk);
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index ee37b390260a62..928691c434087e 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -636,7 +636,7 @@ static int unix_bind(struct socket *, struct sockaddr *, int);
 static int unix_stream_connect(struct socket *, struct sockaddr *,
 			       int addr_len, int flags);
 static int unix_socketpair(struct socket *, struct socket *);
-static int unix_accept(struct socket *, struct socket *, int);
+static int unix_accept(struct socket *, struct socket *, int, bool);
 static int unix_getname(struct socket *, struct sockaddr *, int *, int);
 static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
 static unsigned int unix_dgram_poll(struct file *, struct socket *,
@@ -1402,7 +1402,8 @@ static void unix_sock_inherit_flags(const struct socket *old,
 		set_bit(SOCK_PASSSEC, &new->flags);
 }
 
-static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
+static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
+		       bool kern)
 {
 	struct sock *sk = sock->sk;
 	struct sock *tsk;
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 9192ead6675114..9f770f33c10098 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1250,7 +1250,8 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
 	return err;
 }
 
-static int vsock_accept(struct socket *sock, struct socket *newsock, int flags)
+static int vsock_accept(struct socket *sock, struct socket *newsock, int flags,
+			bool kern)
 {
 	struct sock *listener;
 	int err;
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index fd28a49dbe8f0c..8b911c29860e79 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -852,7 +852,8 @@ static int x25_wait_for_data(struct sock *sk, long timeout)
 	return rc;
 }
 
-static int x25_accept(struct socket *sock, struct socket *newsock, int flags)
+static int x25_accept(struct socket *sock, struct socket *newsock, int flags,
+		      bool kern)
 {
 	struct sock *sk = sock->sk;
 	struct sock *newsk;

From 4b3b45edba9222e518a1ec72df841eba3609fe34 Mon Sep 17 00:00:00 2001
From: Alexey Kodanev <alexey.kodanev@oracle.com>
Date: Thu, 9 Mar 2017 13:56:46 +0300
Subject: [PATCH 1108/1911] udp: avoid ufo handling on IP payload compression
 packets

commit c146066ab802 ("ipv4: Don't use ufo handling on later transformed
packets") and commit f89c56ce710a ("ipv6: Don't use ufo handling on
later transformed packets") added a check that 'rt->dst.header_len' isn't
zero in order to skip UFO, but it doesn't include IPcomp in transport mode
where it equals zero.

Packets, after payload compression, may not require further fragmentation,
and if original length exceeds MTU, later compressed packets will be
transmitted incorrectly. This can be reproduced with LTP udp_ipsec.sh test
on veth device with enabled UFO, MTU is 1500 and UDP payload is 2000:

* IPv4 case, offset is wrong + unnecessary fragmentation
    udp_ipsec.sh -p comp -m transport -s 2000 &
    tcpdump -ni ltp_ns_veth2
    ...
    IP (tos 0x0, ttl 64, id 45203, offset 0, flags [+],
      proto Compressed IP (108), length 49)
      10.0.0.2 > 10.0.0.1: IPComp(cpi=0x1000)
    IP (tos 0x0, ttl 64, id 45203, offset 1480, flags [none],
      proto UDP (17), length 21) 10.0.0.2 > 10.0.0.1: ip-proto-17

* IPv6 case, sending small fragments
    udp_ipsec.sh -6 -p comp -m transport -s 2000 &
    tcpdump -ni ltp_ns_veth2
    ...
    IP6 (flowlabel 0x6b9ba, hlim 64, next-header Compressed IP (108)
      payload length: 37) fd00::2 > fd00::1: IPComp(cpi=0x1000)
    IP6 (flowlabel 0x6b9ba, hlim 64, next-header Compressed IP (108)
      payload length: 21) fd00::2 > fd00::1: IPComp(cpi=0x1000)

Fix it by checking 'rt->dst.xfrm' pointer to 'xfrm_state' struct, skip UFO
if xfrm is set. So the new check will include both cases: IPcomp and IPsec.

Fixes: c146066ab802 ("ipv4: Don't use ufo handling on later transformed packets")
Fixes: f89c56ce710a ("ipv6: Don't use ufo handling on later transformed packets")
Signed-off-by: Alexey Kodanev <alexey.kodanev@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_output.c  | 2 +-
 net/ipv6/ip6_output.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 737ce826d7ecfa..7a3fd25e8913a9 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -966,7 +966,7 @@ static int __ip_append_data(struct sock *sk,
 	cork->length += length;
 	if ((((length + fragheaderlen) > mtu) || (skb && skb_is_gso(skb))) &&
 	    (sk->sk_protocol == IPPROTO_UDP) &&
-	    (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len &&
+	    (rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) &&
 	    (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx) {
 		err = ip_ufo_append_data(sk, queue, getfrag, from, length,
 					 hh_len, fragheaderlen, transhdrlen,
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 528b3c1f3fdee4..df42096e1f0453 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1385,7 +1385,7 @@ static int __ip6_append_data(struct sock *sk,
 	if ((((length + fragheaderlen) > mtu) ||
 	     (skb && skb_is_gso(skb))) &&
 	    (sk->sk_protocol == IPPROTO_UDP) &&
-	    (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len &&
+	    (rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) &&
 	    (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk)) {
 		err = ip6_ufo_append_data(sk, queue, getfrag, from, length,
 					  hh_len, fragheaderlen, exthdrlen,

From 6fc166d62c6f9f6eda5ea300f671d34e89bdd3c8 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 9 Mar 2017 08:10:32 +0000
Subject: [PATCH 1109/1911] rxrpc: rxrpc_kernel_send_data() needs to handle
 failed call better

If rxrpc_kernel_send_data() is asked to send data through a call that has
already failed (due to a remote abort, received protocol error or network
error), then return the associated error code saved in the call rather than
ESHUTDOWN.

This allows the caller to work out whether to ask for the abort code or not
based on this.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rxrpc/sendmsg.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 47ccfeacc1c6a2..97ab214ca4118d 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -618,8 +618,9 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call,
 		ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len);
 		break;
 	case RXRPC_CALL_COMPLETE:
-		/* It's too late for this call */
-		ret = -ESHUTDOWN;
+		read_lock_bh(&call->state_lock);
+		ret = -call->error;
+		read_unlock_bh(&call->state_lock);
 		break;
 	default:
 		 /* Request phase complete for this client call */

From d7aba644ffdebf756e51e26a2229055211838e89 Mon Sep 17 00:00:00 2001
From: "Lendacky, Thomas" <Thomas.Lendacky@amd.com>
Date: Thu, 9 Mar 2017 17:48:23 -0600
Subject: [PATCH 1110/1911] amd-xgbe: Enable IRQs only if napi_complete_done()
 is true

Depending on the hardware, the amd-xgbe driver may use disable_irq_nosync()
and enable_irq() when an interrupt is received to process Rx packets. If
the napi_complete_done() return value isn't checked an unbalanced enable
for the IRQ could result, generating a warning stack trace.

Update the driver to only enable interrupts if napi_complete_done() returns
true.

Reported-by: Jeremy Linton <jeremy.linton@arm.com>
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index 248f60d171a5a0..ffea9859f5a72b 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -2272,10 +2272,7 @@ static int xgbe_one_poll(struct napi_struct *napi, int budget)
 	processed = xgbe_rx_poll(channel, budget);
 
 	/* If we processed everything, we are done */
-	if (processed < budget) {
-		/* Turn off polling */
-		napi_complete_done(napi, processed);
-
+	if ((processed < budget) && napi_complete_done(napi, processed)) {
 		/* Enable Tx and Rx interrupts */
 		if (pdata->channel_irq_mode)
 			xgbe_enable_rx_tx_int(pdata, channel);
@@ -2317,10 +2314,7 @@ static int xgbe_all_poll(struct napi_struct *napi, int budget)
 	} while ((processed < budget) && (processed != last_processed));
 
 	/* If we processed everything, we are done */
-	if (processed < budget) {
-		/* Turn off polling */
-		napi_complete_done(napi, processed);
-
+	if ((processed < budget) && napi_complete_done(napi, processed)) {
 		/* Enable Tx and Rx interrupts */
 		xgbe_enable_rx_tx_ints(pdata);
 	}

From ffff71328a3c321f7c14cc1edd33577717037744 Mon Sep 17 00:00:00 2001
From: Doug Berger <opendmb@gmail.com>
Date: Thu, 9 Mar 2017 16:58:43 -0800
Subject: [PATCH 1111/1911] net: bcmgenet: correct the RBUF_OVFL_CNT and
 RBUF_ERR_CNT MIB values

The location of the RBUF overflow and error counters has moved between
different version of the GENET MAC.  This commit corrects the driver to
read from the correct locations depending on the version of the GENET
MAC.

Fixes: 1c1008c793fa ("net: bcmgenet: add main driver file")
Signed-off-by: Doug Berger <opendmb@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/broadcom/genet/bcmgenet.c    | 60 ++++++++++++++++---
 .../net/ethernet/broadcom/genet/bcmgenet.h    | 10 +++-
 2 files changed, 60 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index f92896835d2a4c..f0fb7eca8eb472 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -1,7 +1,7 @@
 /*
  * Broadcom GENET (Gigabit Ethernet) controller driver
  *
- * Copyright (c) 2014 Broadcom Corporation
+ * Copyright (c) 2014-2017 Broadcom
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -778,8 +778,9 @@ static const struct bcmgenet_stats bcmgenet_gstrings_stats[] = {
 	STAT_GENET_RUNT("rx_runt_bytes", mib.rx_runt_bytes),
 	/* Misc UniMAC counters */
 	STAT_GENET_MISC("rbuf_ovflow_cnt", mib.rbuf_ovflow_cnt,
-			UMAC_RBUF_OVFL_CNT),
-	STAT_GENET_MISC("rbuf_err_cnt", mib.rbuf_err_cnt, UMAC_RBUF_ERR_CNT),
+			UMAC_RBUF_OVFL_CNT_V1),
+	STAT_GENET_MISC("rbuf_err_cnt", mib.rbuf_err_cnt,
+			UMAC_RBUF_ERR_CNT_V1),
 	STAT_GENET_MISC("mdf_err_cnt", mib.mdf_err_cnt, UMAC_MDF_ERR_CNT),
 	STAT_GENET_SOFT_MIB("alloc_rx_buff_failed", mib.alloc_rx_buff_failed),
 	STAT_GENET_SOFT_MIB("rx_dma_failed", mib.rx_dma_failed),
@@ -821,6 +822,45 @@ static void bcmgenet_get_strings(struct net_device *dev, u32 stringset,
 	}
 }
 
+static u32 bcmgenet_update_stat_misc(struct bcmgenet_priv *priv, u16 offset)
+{
+	u16 new_offset;
+	u32 val;
+
+	switch (offset) {
+	case UMAC_RBUF_OVFL_CNT_V1:
+		if (GENET_IS_V2(priv))
+			new_offset = RBUF_OVFL_CNT_V2;
+		else
+			new_offset = RBUF_OVFL_CNT_V3PLUS;
+
+		val = bcmgenet_rbuf_readl(priv,	new_offset);
+		/* clear if overflowed */
+		if (val == ~0)
+			bcmgenet_rbuf_writel(priv, 0, new_offset);
+		break;
+	case UMAC_RBUF_ERR_CNT_V1:
+		if (GENET_IS_V2(priv))
+			new_offset = RBUF_ERR_CNT_V2;
+		else
+			new_offset = RBUF_ERR_CNT_V3PLUS;
+
+		val = bcmgenet_rbuf_readl(priv,	new_offset);
+		/* clear if overflowed */
+		if (val == ~0)
+			bcmgenet_rbuf_writel(priv, 0, new_offset);
+		break;
+	default:
+		val = bcmgenet_umac_readl(priv, offset);
+		/* clear if overflowed */
+		if (val == ~0)
+			bcmgenet_umac_writel(priv, 0, offset);
+		break;
+	}
+
+	return val;
+}
+
 static void bcmgenet_update_mib_counters(struct bcmgenet_priv *priv)
 {
 	int i, j = 0;
@@ -845,10 +885,16 @@ static void bcmgenet_update_mib_counters(struct bcmgenet_priv *priv)
 						  UMAC_MIB_START + j + offset);
 			break;
 		case BCMGENET_STAT_MISC:
-			val = bcmgenet_umac_readl(priv, s->reg_offset);
-			/* clear if overflowed */
-			if (val == ~0)
-				bcmgenet_umac_writel(priv, 0, s->reg_offset);
+			if (GENET_IS_V1(priv)) {
+				val = bcmgenet_umac_readl(priv, s->reg_offset);
+				/* clear if overflowed */
+				if (val == ~0)
+					bcmgenet_umac_writel(priv, 0,
+							     s->reg_offset);
+			} else {
+				val = bcmgenet_update_stat_misc(priv,
+								s->reg_offset);
+			}
 			break;
 		}
 
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
index 1e2dc34d331a49..d5fb0d772dcdb0 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014 Broadcom Corporation
+ * Copyright (c) 2014-2017 Broadcom
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -214,7 +214,9 @@ struct bcmgenet_mib_counters {
 #define  MDIO_REG_SHIFT			16
 #define  MDIO_REG_MASK			0x1F
 
-#define UMAC_RBUF_OVFL_CNT		0x61C
+#define UMAC_RBUF_OVFL_CNT_V1		0x61C
+#define RBUF_OVFL_CNT_V2		0x80
+#define RBUF_OVFL_CNT_V3PLUS		0x94
 
 #define UMAC_MPD_CTRL			0x620
 #define  MPD_EN				(1 << 0)
@@ -224,7 +226,9 @@ struct bcmgenet_mib_counters {
 
 #define UMAC_MPD_PW_MS			0x624
 #define UMAC_MPD_PW_LS			0x628
-#define UMAC_RBUF_ERR_CNT		0x634
+#define UMAC_RBUF_ERR_CNT_V1		0x634
+#define RBUF_ERR_CNT_V2			0x84
+#define RBUF_ERR_CNT_V3PLUS		0x98
 #define UMAC_MDF_ERR_CNT		0x638
 #define UMAC_MDF_CTRL			0x650
 #define UMAC_MDF_ADDR			0x654

From 1ad3d225e5a40ca6c586989b4baaca710544c15a Mon Sep 17 00:00:00 2001
From: Doug Berger <opendmb@gmail.com>
Date: Thu, 9 Mar 2017 16:58:44 -0800
Subject: [PATCH 1112/1911] net: bcmgenet: correct MIB access of UniMAC RUNT
 counters

The gap between the Tx status counters and the Rx RUNT counters is now
being added to allow correct reporting of the registers.

Fixes: 1c1008c793fa ("net: bcmgenet: add main driver file")
Signed-off-by: Doug Berger <opendmb@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/genet/bcmgenet.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index f0fb7eca8eb472..01a172d95328d9 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -876,13 +876,16 @@ static void bcmgenet_update_mib_counters(struct bcmgenet_priv *priv)
 		case BCMGENET_STAT_NETDEV:
 		case BCMGENET_STAT_SOFT:
 			continue;
-		case BCMGENET_STAT_MIB_RX:
-		case BCMGENET_STAT_MIB_TX:
 		case BCMGENET_STAT_RUNT:
-			if (s->type != BCMGENET_STAT_MIB_RX)
-				offset = BCMGENET_STAT_OFFSET;
+			offset += BCMGENET_STAT_OFFSET;
+			/* fall through */
+		case BCMGENET_STAT_MIB_TX:
+			offset += BCMGENET_STAT_OFFSET;
+			/* fall through */
+		case BCMGENET_STAT_MIB_RX:
 			val = bcmgenet_umac_readl(priv,
 						  UMAC_MIB_START + j + offset);
+			offset = 0;	/* Reset Offset */
 			break;
 		case BCMGENET_STAT_MISC:
 			if (GENET_IS_V1(priv)) {

From eca4bad73409aedc6ff22f823c18b67a4f08c851 Mon Sep 17 00:00:00 2001
From: Doug Berger <opendmb@gmail.com>
Date: Thu, 9 Mar 2017 16:58:45 -0800
Subject: [PATCH 1113/1911] net: bcmgenet: reserved phy revisions must be
 checked first

The reserved gphy_rev value of 0x01ff must be tested before the old
or new scheme for GPHY major versioning are tested, otherwise it will
be treated as 0xff00 according to the old scheme.

Fixes: b04a2f5b9ff5 ("net: bcmgenet: add support for new GENET PHY revision scheme")
Signed-off-by: Doug Berger <opendmb@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/genet/bcmgenet.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 01a172d95328d9..99f8d90246331b 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -3226,6 +3226,12 @@ static void bcmgenet_set_hw_params(struct bcmgenet_priv *priv)
 	 */
 	gphy_rev = reg & 0xffff;
 
+	/* This is reserved so should require special treatment */
+	if (gphy_rev == 0 || gphy_rev == 0x01ff) {
+		pr_warn("Invalid GPHY revision detected: 0x%04x\n", gphy_rev);
+		return;
+	}
+
 	/* This is the good old scheme, just GPHY major, no minor nor patch */
 	if ((gphy_rev & 0xf0) != 0)
 		priv->gphy_rev = gphy_rev << 8;
@@ -3234,12 +3240,6 @@ static void bcmgenet_set_hw_params(struct bcmgenet_priv *priv)
 	else if ((gphy_rev & 0xff00) != 0)
 		priv->gphy_rev = gphy_rev;
 
-	/* This is reserved so should require special treatment */
-	else if (gphy_rev == 0 || gphy_rev == 0x01ff) {
-		pr_warn("Invalid GPHY revision detected: 0x%04x\n", gphy_rev);
-		return;
-	}
-
 #ifdef CONFIG_PHYS_ADDR_T_64BIT
 	if (!(params->flags & GENET_HAS_40BITS))
 		pr_warn("GENET does not support 40-bits PA\n");

From 7627409cc4970e8c8b9de6945ad86a575290a94e Mon Sep 17 00:00:00 2001
From: Doug Berger <opendmb@gmail.com>
Date: Thu, 9 Mar 2017 16:58:46 -0800
Subject: [PATCH 1114/1911] net: bcmgenet: power down internal phy if open or
 resume fails

Since the internal PHY is powered up during the open and resume
functions it should be powered back down if the functions fail.

Fixes: 1c1008c793fa ("net: bcmgenet: add main driver file")
Signed-off-by: Doug Berger <opendmb@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/genet/bcmgenet.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 99f8d90246331b..475dc14931af14 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -2850,6 +2850,8 @@ static int bcmgenet_open(struct net_device *dev)
 err_fini_dma:
 	bcmgenet_fini_dma(priv);
 err_clk_disable:
+	if (priv->internal_phy)
+		bcmgenet_power_down(priv, GENET_POWER_PASSIVE);
 	clk_disable_unprepare(priv->clk);
 	return ret;
 }
@@ -3551,6 +3553,8 @@ static int bcmgenet_resume(struct device *d)
 	return 0;
 
 out_clk_disable:
+	if (priv->internal_phy)
+		bcmgenet_power_down(priv, GENET_POWER_PASSIVE);
 	clk_disable_unprepare(priv->clk);
 	return ret;
 }

From 07c52d6a0b955a8a28834f9354793cfc4b81d0e9 Mon Sep 17 00:00:00 2001
From: Doug Berger <opendmb@gmail.com>
Date: Thu, 9 Mar 2017 16:58:47 -0800
Subject: [PATCH 1115/1911] net: bcmgenet: synchronize irq0 status between the
 isr and task

Add a spinlock to ensure that irq0_stat is not unintentionally altered
as the result of preemption.  Also removed unserviced irq0 interrupts
and removed irq1_stat since there is no bottom half service for those
interrupts.

Fixes: 1c1008c793fa ("net: bcmgenet: add main driver file")
Signed-off-by: Doug Berger <opendmb@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/broadcom/genet/bcmgenet.c    | 73 ++++++++++---------
 .../net/ethernet/broadcom/genet/bcmgenet.h    |  6 +-
 2 files changed, 44 insertions(+), 35 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 475dc14931af14..527cecaf12c14f 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -2506,24 +2506,28 @@ static int bcmgenet_init_dma(struct bcmgenet_priv *priv)
 /* Interrupt bottom half */
 static void bcmgenet_irq_task(struct work_struct *work)
 {
+	unsigned long flags;
+	unsigned int status;
 	struct bcmgenet_priv *priv = container_of(
 			work, struct bcmgenet_priv, bcmgenet_irq_work);
 
 	netif_dbg(priv, intr, priv->dev, "%s\n", __func__);
 
-	if (priv->irq0_stat & UMAC_IRQ_MPD_R) {
-		priv->irq0_stat &= ~UMAC_IRQ_MPD_R;
+	spin_lock_irqsave(&priv->lock, flags);
+	status = priv->irq0_stat;
+	priv->irq0_stat = 0;
+	spin_unlock_irqrestore(&priv->lock, flags);
+
+	if (status & UMAC_IRQ_MPD_R) {
 		netif_dbg(priv, wol, priv->dev,
 			  "magic packet detected, waking up\n");
 		bcmgenet_power_up(priv, GENET_POWER_WOL_MAGIC);
 	}
 
 	/* Link UP/DOWN event */
-	if (priv->irq0_stat & UMAC_IRQ_LINK_EVENT) {
+	if (status & UMAC_IRQ_LINK_EVENT)
 		phy_mac_interrupt(priv->phydev,
-				  !!(priv->irq0_stat & UMAC_IRQ_LINK_UP));
-		priv->irq0_stat &= ~UMAC_IRQ_LINK_EVENT;
-	}
+				  !!(status & UMAC_IRQ_LINK_UP));
 }
 
 /* bcmgenet_isr1: handle Rx and Tx priority queues */
@@ -2532,22 +2536,21 @@ static irqreturn_t bcmgenet_isr1(int irq, void *dev_id)
 	struct bcmgenet_priv *priv = dev_id;
 	struct bcmgenet_rx_ring *rx_ring;
 	struct bcmgenet_tx_ring *tx_ring;
-	unsigned int index;
+	unsigned int index, status;
 
-	/* Save irq status for bottom-half processing. */
-	priv->irq1_stat =
-		bcmgenet_intrl2_1_readl(priv, INTRL2_CPU_STAT) &
+	/* Read irq status */
+	status = bcmgenet_intrl2_1_readl(priv, INTRL2_CPU_STAT) &
 		~bcmgenet_intrl2_1_readl(priv, INTRL2_CPU_MASK_STATUS);
 
 	/* clear interrupts */
-	bcmgenet_intrl2_1_writel(priv, priv->irq1_stat, INTRL2_CPU_CLEAR);
+	bcmgenet_intrl2_1_writel(priv, status, INTRL2_CPU_CLEAR);
 
 	netif_dbg(priv, intr, priv->dev,
-		  "%s: IRQ=0x%x\n", __func__, priv->irq1_stat);
+		  "%s: IRQ=0x%x\n", __func__, status);
 
 	/* Check Rx priority queue interrupts */
 	for (index = 0; index < priv->hw_params->rx_queues; index++) {
-		if (!(priv->irq1_stat & BIT(UMAC_IRQ1_RX_INTR_SHIFT + index)))
+		if (!(status & BIT(UMAC_IRQ1_RX_INTR_SHIFT + index)))
 			continue;
 
 		rx_ring = &priv->rx_rings[index];
@@ -2560,7 +2563,7 @@ static irqreturn_t bcmgenet_isr1(int irq, void *dev_id)
 
 	/* Check Tx priority queue interrupts */
 	for (index = 0; index < priv->hw_params->tx_queues; index++) {
-		if (!(priv->irq1_stat & BIT(index)))
+		if (!(status & BIT(index)))
 			continue;
 
 		tx_ring = &priv->tx_rings[index];
@@ -2580,19 +2583,20 @@ static irqreturn_t bcmgenet_isr0(int irq, void *dev_id)
 	struct bcmgenet_priv *priv = dev_id;
 	struct bcmgenet_rx_ring *rx_ring;
 	struct bcmgenet_tx_ring *tx_ring;
+	unsigned int status;
+	unsigned long flags;
 
-	/* Save irq status for bottom-half processing. */
-	priv->irq0_stat =
-		bcmgenet_intrl2_0_readl(priv, INTRL2_CPU_STAT) &
+	/* Read irq status */
+	status = bcmgenet_intrl2_0_readl(priv, INTRL2_CPU_STAT) &
 		~bcmgenet_intrl2_0_readl(priv, INTRL2_CPU_MASK_STATUS);
 
 	/* clear interrupts */
-	bcmgenet_intrl2_0_writel(priv, priv->irq0_stat, INTRL2_CPU_CLEAR);
+	bcmgenet_intrl2_0_writel(priv, status, INTRL2_CPU_CLEAR);
 
 	netif_dbg(priv, intr, priv->dev,
-		  "IRQ=0x%x\n", priv->irq0_stat);
+		  "IRQ=0x%x\n", status);
 
-	if (priv->irq0_stat & UMAC_IRQ_RXDMA_DONE) {
+	if (status & UMAC_IRQ_RXDMA_DONE) {
 		rx_ring = &priv->rx_rings[DESC_INDEX];
 
 		if (likely(napi_schedule_prep(&rx_ring->napi))) {
@@ -2601,7 +2605,7 @@ static irqreturn_t bcmgenet_isr0(int irq, void *dev_id)
 		}
 	}
 
-	if (priv->irq0_stat & UMAC_IRQ_TXDMA_DONE) {
+	if (status & UMAC_IRQ_TXDMA_DONE) {
 		tx_ring = &priv->tx_rings[DESC_INDEX];
 
 		if (likely(napi_schedule_prep(&tx_ring->napi))) {
@@ -2610,22 +2614,23 @@ static irqreturn_t bcmgenet_isr0(int irq, void *dev_id)
 		}
 	}
 
-	if (priv->irq0_stat & (UMAC_IRQ_PHY_DET_R |
-				UMAC_IRQ_PHY_DET_F |
-				UMAC_IRQ_LINK_EVENT |
-				UMAC_IRQ_HFB_SM |
-				UMAC_IRQ_HFB_MM |
-				UMAC_IRQ_MPD_R)) {
-		/* all other interested interrupts handled in bottom half */
-		schedule_work(&priv->bcmgenet_irq_work);
-	}
-
 	if ((priv->hw_params->flags & GENET_HAS_MDIO_INTR) &&
-	    priv->irq0_stat & (UMAC_IRQ_MDIO_DONE | UMAC_IRQ_MDIO_ERROR)) {
-		priv->irq0_stat &= ~(UMAC_IRQ_MDIO_DONE | UMAC_IRQ_MDIO_ERROR);
+		status & (UMAC_IRQ_MDIO_DONE | UMAC_IRQ_MDIO_ERROR)) {
 		wake_up(&priv->wq);
 	}
 
+	/* all other interested interrupts handled in bottom half */
+	status &= (UMAC_IRQ_LINK_EVENT |
+		   UMAC_IRQ_MPD_R);
+	if (status) {
+		/* Save irq status for bottom-half processing. */
+		spin_lock_irqsave(&priv->lock, flags);
+		priv->irq0_stat |= status;
+		spin_unlock_irqrestore(&priv->lock, flags);
+
+		schedule_work(&priv->bcmgenet_irq_work);
+	}
+
 	return IRQ_HANDLED;
 }
 
@@ -3327,6 +3332,8 @@ static int bcmgenet_probe(struct platform_device *pdev)
 		goto err;
 	}
 
+	spin_lock_init(&priv->lock);
+
 	SET_NETDEV_DEV(dev, &pdev->dev);
 	dev_set_drvdata(&pdev->dev, dev);
 	ether_addr_copy(dev->dev_addr, macaddr);
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
index d5fb0d772dcdb0..db7f289d65ae2a 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
@@ -623,11 +623,13 @@ struct bcmgenet_priv {
 	struct work_struct bcmgenet_irq_work;
 	int irq0;
 	int irq1;
-	unsigned int irq0_stat;
-	unsigned int irq1_stat;
 	int wol_irq;
 	bool wol_irq_disabled;
 
+	/* shared status */
+	spinlock_t lock;
+	unsigned int irq0_stat;
+
 	/* HW descriptors/checksum variables */
 	bool desc_64b_en;
 	bool desc_rxchk_en;

From 6be371b053dc86f11465cc1abce2e99bda0a0574 Mon Sep 17 00:00:00 2001
From: Doug Berger <opendmb@gmail.com>
Date: Thu, 9 Mar 2017 16:58:48 -0800
Subject: [PATCH 1116/1911] net: bcmgenet: Power up the internal PHY before
 probing the MII

When using the internal PHY it must be powered up when the MII is probed
or the PHY will not be detected.  Since the PHY is powered up at reset
this has not been a problem.  However, when the kernel is restarted with
kexec the PHY will likely be powered down when the kernel starts so it
will not be detected and the Ethernet link will not be established.

This commit explicitly powers up the internal PHY when the GENET driver
is probed to correct this behavior.

Fixes: 1c1008c793fa ("net: bcmgenet: add main driver file")
Signed-off-by: Doug Berger <opendmb@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/genet/bcmgenet.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 527cecaf12c14f..f9309884077532 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -3289,6 +3289,7 @@ static int bcmgenet_probe(struct platform_device *pdev)
 	const void *macaddr;
 	struct resource *r;
 	int err = -EIO;
+	const char *phy_mode_str;
 
 	/* Up to GENET_MAX_MQ_CNT + 1 TX queues and RX queues */
 	dev = alloc_etherdev_mqs(sizeof(*priv), GENET_MAX_MQ_CNT + 1,
@@ -3396,6 +3397,13 @@ static int bcmgenet_probe(struct platform_device *pdev)
 		priv->clk_eee = NULL;
 	}
 
+	/* If this is an internal GPHY, power it on now, before UniMAC is
+	 * brought out of reset as absolutely no UniMAC activity is allowed
+	 */
+	if (dn && !of_property_read_string(dn, "phy-mode", &phy_mode_str) &&
+	    !strcasecmp(phy_mode_str, "internal"))
+		bcmgenet_power_up(priv, GENET_POWER_PASSIVE);
+
 	err = reset_umac(priv);
 	if (err)
 		goto err_clk_disable;

From 89316fa34ab8afac8d693f41a5bc268673f1da15 Mon Sep 17 00:00:00 2001
From: Edwin Chan <edwin.chan@broadcom.com>
Date: Thu, 9 Mar 2017 16:58:49 -0800
Subject: [PATCH 1117/1911] net: bcmgenet: add begin/complete ethtool ops

Make sure clock is enabled for ethtool ops.

Fixes: 1c1008c793fa ("net: bcmgenet: add main driver file")
Signed-off-by: Edwin Chan <edwin.chan@broadcom.com>
Signed-off-by: Doug Berger <opendmb@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/genet/bcmgenet.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index f9309884077532..ec1f3014e4109d 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -450,6 +450,22 @@ static inline void bcmgenet_rdma_ring_writel(struct bcmgenet_priv *priv,
 			genet_dma_ring_regs[r]);
 }
 
+static int bcmgenet_begin(struct net_device *dev)
+{
+	struct bcmgenet_priv *priv = netdev_priv(dev);
+
+	/* Turn on the clock */
+	return clk_prepare_enable(priv->clk);
+}
+
+static void bcmgenet_complete(struct net_device *dev)
+{
+	struct bcmgenet_priv *priv = netdev_priv(dev);
+
+	/* Turn off the clock */
+	clk_disable_unprepare(priv->clk);
+}
+
 static int bcmgenet_get_link_ksettings(struct net_device *dev,
 				       struct ethtool_link_ksettings *cmd)
 {
@@ -1022,6 +1038,8 @@ static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_eee *e)
 
 /* standard ethtool support functions. */
 static const struct ethtool_ops bcmgenet_ethtool_ops = {
+	.begin			= bcmgenet_begin,
+	.complete		= bcmgenet_complete,
 	.get_strings		= bcmgenet_get_strings,
 	.get_sset_count		= bcmgenet_get_sset_count,
 	.get_ethtool_stats	= bcmgenet_get_ethtool_stats,

From 6d22fe14005ce66d1a120495ac16499b944feb95 Mon Sep 17 00:00:00 2001
From: Doug Berger <opendmb@gmail.com>
Date: Thu, 9 Mar 2017 16:58:50 -0800
Subject: [PATCH 1118/1911] net: bcmgenet: decouple flow control from
 bcmgenet_tx_reclaim

The bcmgenet_tx_reclaim() function is used to reclaim transmit
resources in different places within the driver.  Most of them
should not affect the state of the transmit flow control.

This commit relocates the logic for waking tx queues based on
freed resources to the napi polling function where it is more
appropriate.

Fixes: 1c1008c793fa ("net: bcmgenet: add main driver file")
Signed-off-by: Doug Berger <opendmb@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/broadcom/genet/bcmgenet.c    | 20 ++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index ec1f3014e4109d..69015fa50f2096 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -1234,7 +1234,6 @@ static unsigned int __bcmgenet_tx_reclaim(struct net_device *dev,
 	struct bcmgenet_priv *priv = netdev_priv(dev);
 	struct device *kdev = &priv->pdev->dev;
 	struct enet_cb *tx_cb_ptr;
-	struct netdev_queue *txq;
 	unsigned int pkts_compl = 0;
 	unsigned int bytes_compl = 0;
 	unsigned int c_index;
@@ -1286,13 +1285,8 @@ static unsigned int __bcmgenet_tx_reclaim(struct net_device *dev,
 	dev->stats.tx_packets += pkts_compl;
 	dev->stats.tx_bytes += bytes_compl;
 
-	txq = netdev_get_tx_queue(dev, ring->queue);
-	netdev_tx_completed_queue(txq, pkts_compl, bytes_compl);
-
-	if (ring->free_bds > (MAX_SKB_FRAGS + 1)) {
-		if (netif_tx_queue_stopped(txq))
-			netif_tx_wake_queue(txq);
-	}
+	netdev_tx_completed_queue(netdev_get_tx_queue(dev, ring->queue),
+				  pkts_compl, bytes_compl);
 
 	return pkts_compl;
 }
@@ -1315,8 +1309,16 @@ static int bcmgenet_tx_poll(struct napi_struct *napi, int budget)
 	struct bcmgenet_tx_ring *ring =
 		container_of(napi, struct bcmgenet_tx_ring, napi);
 	unsigned int work_done = 0;
+	struct netdev_queue *txq;
+	unsigned long flags;
 
-	work_done = bcmgenet_tx_reclaim(ring->priv->dev, ring);
+	spin_lock_irqsave(&ring->lock, flags);
+	work_done = __bcmgenet_tx_reclaim(ring->priv->dev, ring);
+	if (ring->free_bds > (MAX_SKB_FRAGS + 1)) {
+		txq = netdev_get_tx_queue(ring->priv->dev, ring->queue);
+		netif_tx_wake_queue(txq);
+	}
+	spin_unlock_irqrestore(&ring->lock, flags);
 
 	if (work_done == 0) {
 		napi_complete(napi);

From 46f401c4297a2232a037ad8801b6c83c90414cf7 Mon Sep 17 00:00:00 2001
From: Larry Finger <Larry.Finger@lwfinger.net>
Date: Thu, 9 Mar 2017 20:33:51 -0600
Subject: [PATCH 1119/1911] powerpc/pmac: Fix crash in dma-mapping.h with NULL
 dma_ops

Commit 5657933dbb6e ("treewide: Move dma_ops from struct dev_archdata
into struct device") introduced a crash for macio devices, an example
backtrace being:

  kernel BUG at ./include/linux/dma-mapping.h:465!
  Oops: Exception in kernel mode, sig: 5 [#1]
  ...
  NIP [c031ddb0] dmam_alloc_coherent+0x74/0x140
  LR [c031de70] dmam_alloc_coherent+0x134/0x140
  Call Trace:
   dmam_alloc_coherent+0x134/0x140 (unreliable)
   pata_macio_port_start+0x3c/0x8c
   ata_host_start.part.5+0xfc/0x208
   ata_host_activate+0x128/0x154
   pata_macio_common_init+0x2f0/0x538
   pata_macio_attach+0xd8/0x180
   macio_device_probe+0x5c/0xec
   driver_probe_device+0x21c/0x314
   __driver_attach+0xcc/0xd0
   bus_for_each_dev+0x68/0xb4
   bus_add_driver+0x1dc/0x244
   driver_register+0x88/0x130
   pata_macio_init+0x5c/0x88
   do_one_initcall+0x40/0x170
   kernel_init_freeable+0x134/0x1d0
   kernel_init+0x18/0x110
   ret_from_kernel_thread+0x5c/0x64

This was caused by the device having NULL dma_ops, triggering the
BUG_ON(). Previously the device inherited its dma_ops via the assignment
to dev->ofdev.dev.archdata. However after commit 5657933dbb6e the
dma_ops are moved into dev->ofdev.dev, and so they need to be explicitly
copied.

Fixes: 5657933dbb6e ("treewide: Move dma_ops from struct dev_archdata into struct device")
Signed-off-by: Larry Finger <Larry.Finger@lwfinger.net>
Suggested-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
[mpe: Rewrite change log, add backtrace]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 drivers/macintosh/macio_asic.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/macintosh/macio_asic.c b/drivers/macintosh/macio_asic.c
index 3f041b1870335a..f757cef293f868 100644
--- a/drivers/macintosh/macio_asic.c
+++ b/drivers/macintosh/macio_asic.c
@@ -392,6 +392,7 @@ static struct macio_dev * macio_add_one_device(struct macio_chip *chip,
 	 * To get all the fields, copy all archdata
 	 */
 	dev->ofdev.dev.archdata = chip->lbus.pdev->dev.archdata;
+	dev->ofdev.dev.dma_ops = chip->lbus.pdev->dev.dma_ops;
 #endif /* CONFIG_PCI */
 
 #ifdef DEBUG

From 1363875bdb6317a2d0798284d7aaf320f0782f6d Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Tue, 28 Feb 2017 12:00:46 +1000
Subject: [PATCH 1120/1911] powerpc/64s: fix handling of non-synchronous
 machine checks

A synchronous machine check is an exception raised by the attempt to
execute the current instruction. If the error can't be corrected, it
can make sense to SIGBUS the currently running process.

In other cases, the error condition is not related to the current
instruction, so killing the current process is not the right thing to
do.

Today, all machine checks are MCE_SEV_ERROR_SYNC, so this has no
practical change. It will be used to handle POWER9 asynchronous
machine checks.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/powernv/opal.c | 21 ++++++---------------
 1 file changed, 6 insertions(+), 15 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 86d9fde93c175f..e0f856bfbfe8f3 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -395,7 +395,6 @@ static int opal_recover_mce(struct pt_regs *regs,
 					struct machine_check_event *evt)
 {
 	int recovered = 0;
-	uint64_t ea = get_mce_fault_addr(evt);
 
 	if (!(regs->msr & MSR_RI)) {
 		/* If MSR_RI isn't set, we cannot recover */
@@ -404,26 +403,18 @@ static int opal_recover_mce(struct pt_regs *regs,
 	} else if (evt->disposition == MCE_DISPOSITION_RECOVERED) {
 		/* Platform corrected itself */
 		recovered = 1;
-	} else if (ea && !is_kernel_addr(ea)) {
+	} else if (evt->severity == MCE_SEV_FATAL) {
+		/* Fatal machine check */
+		pr_err("Machine check interrupt is fatal\n");
+		recovered = 0;
+	} else if ((evt->severity == MCE_SEV_ERROR_SYNC) &&
+			(user_mode(regs) && !is_global_init(current))) {
 		/*
-		 * Faulting address is not in kernel text. We should be fine.
-		 * We need to find which process uses this address.
 		 * For now, kill the task if we have received exception when
 		 * in userspace.
 		 *
 		 * TODO: Queue up this address for hwpoisioning later.
 		 */
-		if (user_mode(regs) && !is_global_init(current)) {
-			_exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
-			recovered = 1;
-		} else
-			recovered = 0;
-	} else if (user_mode(regs) && !is_global_init(current) &&
-		evt->severity == MCE_SEV_ERROR_SYNC) {
-		/*
-		 * If we have received a synchronous error when in userspace
-		 * kill the task.
-		 */
 		_exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
 		recovered = 1;
 	}

From c1bbf387d6191e6e18f3adc4db45b922822c2ba4 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Tue, 28 Feb 2017 12:00:47 +1000
Subject: [PATCH 1121/1911] powerpc/64s: allow machine check handler to set
 severity and initiator

Currently severity and initiator are always set to MCE_SEV_ERROR_SYNC and
MCE_INITIATOR_CPU in the core mce code. Allow them to be set by the
machine specific mce handlers.

No functional change for existing handlers.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/mce.h  | 3 ++-
 arch/powerpc/kernel/mce.c       | 5 +++--
 arch/powerpc/kernel/mce_power.c | 6 ++++++
 3 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
index f97d8cb6bdf64f..b2a5865ccd8755 100644
--- a/arch/powerpc/include/asm/mce.h
+++ b/arch/powerpc/include/asm/mce.h
@@ -177,7 +177,8 @@ struct mce_error_info {
 		enum MCE_EratErrorType erat_error_type:8;
 		enum MCE_TlbErrorType tlb_error_type:8;
 	} u;
-	uint8_t		reserved[2];
+	enum MCE_Severity	severity:8;
+	enum MCE_Initiator	initiator:8;
 };
 
 #define MAX_MC_EVT	100
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index c6923ff451311b..949507277436dc 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -90,13 +90,14 @@ void save_mce_event(struct pt_regs *regs, long handled,
 	mce->gpr3 = regs->gpr[3];
 	mce->in_use = 1;
 
-	mce->initiator = MCE_INITIATOR_CPU;
 	/* Mark it recovered if we have handled it and MSR(RI=1). */
 	if (handled && (regs->msr & MSR_RI))
 		mce->disposition = MCE_DISPOSITION_RECOVERED;
 	else
 		mce->disposition = MCE_DISPOSITION_NOT_RECOVERED;
-	mce->severity = MCE_SEV_ERROR_SYNC;
+
+	mce->initiator = mce_err->initiator;
+	mce->severity = mce_err->severity;
 
 	/*
 	 * Populate the mce error_type and type-specific error_type.
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index 7353991c4ecee6..c37fc5fdd43327 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -281,6 +281,9 @@ long __machine_check_early_realmode_p7(struct pt_regs *regs)
 	long handled = 1;
 	struct mce_error_info mce_error_info = { 0 };
 
+	mce_error_info.severity = MCE_SEV_ERROR_SYNC;
+	mce_error_info.initiator = MCE_INITIATOR_CPU;
+
 	srr1 = regs->msr;
 	nip = regs->nip;
 
@@ -352,6 +355,9 @@ long __machine_check_early_realmode_p8(struct pt_regs *regs)
 	long handled = 1;
 	struct mce_error_info mce_error_info = { 0 };
 
+	mce_error_info.severity = MCE_SEV_ERROR_SYNC;
+	mce_error_info.initiator = MCE_INITIATOR_CPU;
+
 	srr1 = regs->msr;
 	nip = regs->nip;
 

From 7b9f71f974a12740e79e918cfd58c2fce0b5b580 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Tue, 28 Feb 2017 12:00:48 +1000
Subject: [PATCH 1122/1911] powerpc/64s: POWER9 machine check handler

Add POWER9 machine check handler. There are several new types of errors
added, so logging messages for those are also added.

This doesn't attempt to reuse any of the P7/8 defines or functions,
because that becomes too complex. The better option in future is to use
a table driven approach.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/bitops.h |   4 +
 arch/powerpc/include/asm/mce.h    | 105 ++++++++++++++
 arch/powerpc/kernel/cputable.c    |   3 +
 arch/powerpc/kernel/mce.c         |  83 +++++++++++
 arch/powerpc/kernel/mce_power.c   | 231 ++++++++++++++++++++++++++++++
 5 files changed, 426 insertions(+)

diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h
index 73eb794d616381..bc5fdfd227886a 100644
--- a/arch/powerpc/include/asm/bitops.h
+++ b/arch/powerpc/include/asm/bitops.h
@@ -51,6 +51,10 @@
 #define PPC_BIT(bit)		(1UL << PPC_BITLSHIFT(bit))
 #define PPC_BITMASK(bs, be)	((PPC_BIT(bs) - PPC_BIT(be)) | PPC_BIT(bs))
 
+/* Put a PPC bit into a "normal" bit position */
+#define PPC_BITEXTRACT(bits, ppc_bit, dst_bit)			\
+	((((bits) >> PPC_BITLSHIFT(ppc_bit)) & 1) << (dst_bit))
+
 #include <asm/barrier.h>
 
 /* Macro for generating the ***_bits() functions */
diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
index b2a5865ccd8755..ed62efe01e49ed 100644
--- a/arch/powerpc/include/asm/mce.h
+++ b/arch/powerpc/include/asm/mce.h
@@ -66,6 +66,55 @@
 
 #define P8_DSISR_MC_SLB_ERRORS		(P7_DSISR_MC_SLB_ERRORS | \
 					 P8_DSISR_MC_ERAT_MULTIHIT_SEC)
+
+/*
+ * Machine Check bits on power9
+ */
+#define P9_SRR1_MC_LOADSTORE(srr1)	(((srr1) >> PPC_BITLSHIFT(42)) & 1)
+
+#define P9_SRR1_MC_IFETCH(srr1)	(	\
+	PPC_BITEXTRACT(srr1, 45, 0) |	\
+	PPC_BITEXTRACT(srr1, 44, 1) |	\
+	PPC_BITEXTRACT(srr1, 43, 2) |	\
+	PPC_BITEXTRACT(srr1, 36, 3) )
+
+/* 0 is reserved */
+#define P9_SRR1_MC_IFETCH_UE				1
+#define P9_SRR1_MC_IFETCH_SLB_PARITY			2
+#define P9_SRR1_MC_IFETCH_SLB_MULTIHIT			3
+#define P9_SRR1_MC_IFETCH_ERAT_MULTIHIT			4
+#define P9_SRR1_MC_IFETCH_TLB_MULTIHIT			5
+#define P9_SRR1_MC_IFETCH_UE_TLB_RELOAD			6
+/* 7 is reserved */
+#define P9_SRR1_MC_IFETCH_LINK_TIMEOUT			8
+#define P9_SRR1_MC_IFETCH_LINK_TABLEWALK_TIMEOUT	9
+/* 10 ? */
+#define P9_SRR1_MC_IFETCH_RA			11
+#define P9_SRR1_MC_IFETCH_RA_TABLEWALK		12
+#define P9_SRR1_MC_IFETCH_RA_ASYNC_STORE		13
+#define P9_SRR1_MC_IFETCH_LINK_ASYNC_STORE_TIMEOUT	14
+#define P9_SRR1_MC_IFETCH_RA_TABLEWALK_FOREIGN	15
+
+/* DSISR bits for machine check (On Power9) */
+#define P9_DSISR_MC_UE					(PPC_BIT(48))
+#define P9_DSISR_MC_UE_TABLEWALK			(PPC_BIT(49))
+#define P9_DSISR_MC_LINK_LOAD_TIMEOUT			(PPC_BIT(50))
+#define P9_DSISR_MC_LINK_TABLEWALK_TIMEOUT		(PPC_BIT(51))
+#define P9_DSISR_MC_ERAT_MULTIHIT			(PPC_BIT(52))
+#define P9_DSISR_MC_TLB_MULTIHIT_MFTLB			(PPC_BIT(53))
+#define P9_DSISR_MC_USER_TLBIE				(PPC_BIT(54))
+#define P9_DSISR_MC_SLB_PARITY_MFSLB			(PPC_BIT(55))
+#define P9_DSISR_MC_SLB_MULTIHIT_MFSLB			(PPC_BIT(56))
+#define P9_DSISR_MC_RA_LOAD				(PPC_BIT(57))
+#define P9_DSISR_MC_RA_TABLEWALK			(PPC_BIT(58))
+#define P9_DSISR_MC_RA_TABLEWALK_FOREIGN		(PPC_BIT(59))
+#define P9_DSISR_MC_RA_FOREIGN				(PPC_BIT(60))
+
+/* SLB error bits */
+#define P9_DSISR_MC_SLB_ERRORS		(P9_DSISR_MC_ERAT_MULTIHIT | \
+					 P9_DSISR_MC_SLB_PARITY_MFSLB | \
+					 P9_DSISR_MC_SLB_MULTIHIT_MFSLB)
+
 enum MCE_Version {
 	MCE_V1 = 1,
 };
@@ -93,6 +142,9 @@ enum MCE_ErrorType {
 	MCE_ERROR_TYPE_SLB = 2,
 	MCE_ERROR_TYPE_ERAT = 3,
 	MCE_ERROR_TYPE_TLB = 4,
+	MCE_ERROR_TYPE_USER = 5,
+	MCE_ERROR_TYPE_RA = 6,
+	MCE_ERROR_TYPE_LINK = 7,
 };
 
 enum MCE_UeErrorType {
@@ -121,6 +173,32 @@ enum MCE_TlbErrorType {
 	MCE_TLB_ERROR_MULTIHIT = 2,
 };
 
+enum MCE_UserErrorType {
+	MCE_USER_ERROR_INDETERMINATE = 0,
+	MCE_USER_ERROR_TLBIE = 1,
+};
+
+enum MCE_RaErrorType {
+	MCE_RA_ERROR_INDETERMINATE = 0,
+	MCE_RA_ERROR_IFETCH = 1,
+	MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH = 2,
+	MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN = 3,
+	MCE_RA_ERROR_LOAD = 4,
+	MCE_RA_ERROR_STORE = 5,
+	MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE = 6,
+	MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN = 7,
+	MCE_RA_ERROR_LOAD_STORE_FOREIGN = 8,
+};
+
+enum MCE_LinkErrorType {
+	MCE_LINK_ERROR_INDETERMINATE = 0,
+	MCE_LINK_ERROR_IFETCH_TIMEOUT = 1,
+	MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT = 2,
+	MCE_LINK_ERROR_LOAD_TIMEOUT = 3,
+	MCE_LINK_ERROR_STORE_TIMEOUT = 4,
+	MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT = 5,
+};
+
 struct machine_check_event {
 	enum MCE_Version	version:8;	/* 0x00 */
 	uint8_t			in_use;		/* 0x01 */
@@ -166,6 +244,30 @@ struct machine_check_event {
 			uint64_t	effective_address;
 			uint8_t		reserved_2[16];
 		} tlb_error;
+
+		struct {
+			enum MCE_UserErrorType user_error_type:8;
+			uint8_t		effective_address_provided;
+			uint8_t		reserved_1[6];
+			uint64_t	effective_address;
+			uint8_t		reserved_2[16];
+		} user_error;
+
+		struct {
+			enum MCE_RaErrorType ra_error_type:8;
+			uint8_t		effective_address_provided;
+			uint8_t		reserved_1[6];
+			uint64_t	effective_address;
+			uint8_t		reserved_2[16];
+		} ra_error;
+
+		struct {
+			enum MCE_LinkErrorType link_error_type:8;
+			uint8_t		effective_address_provided;
+			uint8_t		reserved_1[6];
+			uint64_t	effective_address;
+			uint8_t		reserved_2[16];
+		} link_error;
 	} u;
 };
 
@@ -176,6 +278,9 @@ struct mce_error_info {
 		enum MCE_SlbErrorType slb_error_type:8;
 		enum MCE_EratErrorType erat_error_type:8;
 		enum MCE_TlbErrorType tlb_error_type:8;
+		enum MCE_UserErrorType user_error_type:8;
+		enum MCE_RaErrorType ra_error_type:8;
+		enum MCE_LinkErrorType link_error_type:8;
 	} u;
 	enum MCE_Severity	severity:8;
 	enum MCE_Initiator	initiator:8;
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index bb7a1890aeb7fb..e79b9daa873c18 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -77,6 +77,7 @@ extern void __flush_tlb_power8(unsigned int action);
 extern void __flush_tlb_power9(unsigned int action);
 extern long __machine_check_early_realmode_p7(struct pt_regs *regs);
 extern long __machine_check_early_realmode_p8(struct pt_regs *regs);
+extern long __machine_check_early_realmode_p9(struct pt_regs *regs);
 #endif /* CONFIG_PPC64 */
 #if defined(CONFIG_E500)
 extern void __setup_cpu_e5500(unsigned long offset, struct cpu_spec* spec);
@@ -540,6 +541,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_setup		= __setup_cpu_power9,
 		.cpu_restore		= __restore_cpu_power9,
 		.flush_tlb		= __flush_tlb_power9,
+		.machine_check_early	= __machine_check_early_realmode_p9,
 		.platform		= "power9",
 	},
 	{	/* Power9 */
@@ -559,6 +561,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_setup		= __setup_cpu_power9,
 		.cpu_restore		= __restore_cpu_power9,
 		.flush_tlb		= __flush_tlb_power9,
+		.machine_check_early	= __machine_check_early_realmode_p9,
 		.platform		= "power9",
 	},
 	{	/* Cell Broadband Engine */
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index 949507277436dc..a1475e6aef3a51 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -58,6 +58,15 @@ static void mce_set_error_info(struct machine_check_event *mce,
 	case MCE_ERROR_TYPE_TLB:
 		mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type;
 		break;
+	case MCE_ERROR_TYPE_USER:
+		mce->u.user_error.user_error_type = mce_err->u.user_error_type;
+		break;
+	case MCE_ERROR_TYPE_RA:
+		mce->u.ra_error.ra_error_type = mce_err->u.ra_error_type;
+		break;
+	case MCE_ERROR_TYPE_LINK:
+		mce->u.link_error.link_error_type = mce_err->u.link_error_type;
+		break;
 	case MCE_ERROR_TYPE_UNKNOWN:
 	default:
 		break;
@@ -116,6 +125,15 @@ void save_mce_event(struct pt_regs *regs, long handled,
 	} else if (mce->error_type == MCE_ERROR_TYPE_ERAT) {
 		mce->u.erat_error.effective_address_provided = true;
 		mce->u.erat_error.effective_address = addr;
+	} else if (mce->error_type == MCE_ERROR_TYPE_USER) {
+		mce->u.user_error.effective_address_provided = true;
+		mce->u.user_error.effective_address = addr;
+	} else if (mce->error_type == MCE_ERROR_TYPE_RA) {
+		mce->u.ra_error.effective_address_provided = true;
+		mce->u.ra_error.effective_address = addr;
+	} else if (mce->error_type == MCE_ERROR_TYPE_LINK) {
+		mce->u.link_error.effective_address_provided = true;
+		mce->u.link_error.effective_address = addr;
 	} else if (mce->error_type == MCE_ERROR_TYPE_UE) {
 		mce->u.ue_error.effective_address_provided = true;
 		mce->u.ue_error.effective_address = addr;
@@ -240,6 +258,29 @@ void machine_check_print_event_info(struct machine_check_event *evt)
 		"Parity",
 		"Multihit",
 	};
+	static const char *mc_user_types[] = {
+		"Indeterminate",
+		"tlbie(l) invalid",
+	};
+	static const char *mc_ra_types[] = {
+		"Indeterminate",
+		"Instruction fetch (bad)",
+		"Page table walk ifetch (bad)",
+		"Page table walk ifetch (foreign)",
+		"Load (bad)",
+		"Store (bad)",
+		"Page table walk Load/Store (bad)",
+		"Page table walk Load/Store (foreign)",
+		"Load/Store (foreign)",
+	};
+	static const char *mc_link_types[] = {
+		"Indeterminate",
+		"Instruction fetch (timeout)",
+		"Page table walk ifetch (timeout)",
+		"Load (timeout)",
+		"Store (timeout)",
+		"Page table walk Load/Store (timeout)",
+	};
 
 	/* Print things out */
 	if (evt->version != MCE_V1) {
@@ -316,6 +357,36 @@ void machine_check_print_event_info(struct machine_check_event *evt)
 			printk("%s    Effective address: %016llx\n",
 			       level, evt->u.tlb_error.effective_address);
 		break;
+	case MCE_ERROR_TYPE_USER:
+		subtype = evt->u.user_error.user_error_type <
+			ARRAY_SIZE(mc_user_types) ?
+			mc_user_types[evt->u.user_error.user_error_type]
+			: "Unknown";
+		printk("%s  Error type: User [%s]\n", level, subtype);
+		if (evt->u.user_error.effective_address_provided)
+			printk("%s    Effective address: %016llx\n",
+			       level, evt->u.user_error.effective_address);
+		break;
+	case MCE_ERROR_TYPE_RA:
+		subtype = evt->u.ra_error.ra_error_type <
+			ARRAY_SIZE(mc_ra_types) ?
+			mc_ra_types[evt->u.ra_error.ra_error_type]
+			: "Unknown";
+		printk("%s  Error type: Real address [%s]\n", level, subtype);
+		if (evt->u.ra_error.effective_address_provided)
+			printk("%s    Effective address: %016llx\n",
+			       level, evt->u.ra_error.effective_address);
+		break;
+	case MCE_ERROR_TYPE_LINK:
+		subtype = evt->u.link_error.link_error_type <
+			ARRAY_SIZE(mc_link_types) ?
+			mc_link_types[evt->u.link_error.link_error_type]
+			: "Unknown";
+		printk("%s  Error type: Link [%s]\n", level, subtype);
+		if (evt->u.link_error.effective_address_provided)
+			printk("%s    Effective address: %016llx\n",
+			       level, evt->u.link_error.effective_address);
+		break;
 	default:
 	case MCE_ERROR_TYPE_UNKNOWN:
 		printk("%s  Error type: Unknown\n", level);
@@ -342,6 +413,18 @@ uint64_t get_mce_fault_addr(struct machine_check_event *evt)
 		if (evt->u.tlb_error.effective_address_provided)
 			return evt->u.tlb_error.effective_address;
 		break;
+	case MCE_ERROR_TYPE_USER:
+		if (evt->u.user_error.effective_address_provided)
+			return evt->u.user_error.effective_address;
+		break;
+	case MCE_ERROR_TYPE_RA:
+		if (evt->u.ra_error.effective_address_provided)
+			return evt->u.ra_error.effective_address;
+		break;
+	case MCE_ERROR_TYPE_LINK:
+		if (evt->u.link_error.effective_address_provided)
+			return evt->u.link_error.effective_address;
+		break;
 	default:
 	case MCE_ERROR_TYPE_UNKNOWN:
 		break;
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index c37fc5fdd43327..763d6f58caa8ca 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -116,6 +116,51 @@ static void flush_and_reload_slb(void)
 }
 #endif
 
+static void flush_erat(void)
+{
+	asm volatile(PPC_INVALIDATE_ERAT : : :"memory");
+}
+
+#define MCE_FLUSH_SLB 1
+#define MCE_FLUSH_TLB 2
+#define MCE_FLUSH_ERAT 3
+
+static int mce_flush(int what)
+{
+#ifdef CONFIG_PPC_STD_MMU_64
+	if (what == MCE_FLUSH_SLB) {
+		flush_and_reload_slb();
+		return 1;
+	}
+#endif
+	if (what == MCE_FLUSH_ERAT) {
+		flush_erat();
+		return 1;
+	}
+	if (what == MCE_FLUSH_TLB) {
+		if (cur_cpu_spec && cur_cpu_spec->flush_tlb) {
+			cur_cpu_spec->flush_tlb(TLB_INVAL_SCOPE_GLOBAL);
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+static int mce_handle_flush_derrors(uint64_t dsisr, uint64_t slb, uint64_t tlb, uint64_t erat)
+{
+	if ((dsisr & slb) && mce_flush(MCE_FLUSH_SLB))
+		dsisr &= ~slb;
+	if ((dsisr & erat) && mce_flush(MCE_FLUSH_ERAT))
+		dsisr &= ~erat;
+	if ((dsisr & tlb) && mce_flush(MCE_FLUSH_TLB))
+		dsisr &= ~tlb;
+	/* Any other errors we don't understand? */
+	if (dsisr)
+		return 0;
+	return 1;
+}
+
 static long mce_handle_derror(uint64_t dsisr, uint64_t slb_error_bits)
 {
 	long handled = 1;
@@ -378,3 +423,189 @@ long __machine_check_early_realmode_p8(struct pt_regs *regs)
 	save_mce_event(regs, handled, &mce_error_info, nip, addr);
 	return handled;
 }
+
+static int mce_handle_derror_p9(struct pt_regs *regs)
+{
+	uint64_t dsisr = regs->dsisr;
+
+	return mce_handle_flush_derrors(dsisr,
+			P9_DSISR_MC_SLB_PARITY_MFSLB |
+			P9_DSISR_MC_SLB_MULTIHIT_MFSLB,
+
+			P9_DSISR_MC_TLB_MULTIHIT_MFTLB,
+
+			P9_DSISR_MC_ERAT_MULTIHIT);
+}
+
+static int mce_handle_ierror_p9(struct pt_regs *regs)
+{
+	uint64_t srr1 = regs->msr;
+
+	switch (P9_SRR1_MC_IFETCH(srr1)) {
+	case P9_SRR1_MC_IFETCH_SLB_PARITY:
+	case P9_SRR1_MC_IFETCH_SLB_MULTIHIT:
+		return mce_flush(MCE_FLUSH_SLB);
+	case P9_SRR1_MC_IFETCH_TLB_MULTIHIT:
+		return mce_flush(MCE_FLUSH_TLB);
+	case P9_SRR1_MC_IFETCH_ERAT_MULTIHIT:
+		return mce_flush(MCE_FLUSH_ERAT);
+	default:
+		return 0;
+	}
+}
+
+static void mce_get_derror_p9(struct pt_regs *regs,
+		struct mce_error_info *mce_err, uint64_t *addr)
+{
+	uint64_t dsisr = regs->dsisr;
+
+	mce_err->severity = MCE_SEV_ERROR_SYNC;
+	mce_err->initiator = MCE_INITIATOR_CPU;
+
+	if (dsisr & P9_DSISR_MC_USER_TLBIE)
+		*addr = regs->nip;
+	else
+		*addr = regs->dar;
+
+	if (dsisr & P9_DSISR_MC_UE) {
+		mce_err->error_type = MCE_ERROR_TYPE_UE;
+		mce_err->u.ue_error_type = MCE_UE_ERROR_LOAD_STORE;
+	} else if (dsisr & P9_DSISR_MC_UE_TABLEWALK) {
+		mce_err->error_type = MCE_ERROR_TYPE_UE;
+		mce_err->u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE;
+	} else if (dsisr & P9_DSISR_MC_LINK_LOAD_TIMEOUT) {
+		mce_err->error_type = MCE_ERROR_TYPE_LINK;
+		mce_err->u.link_error_type = MCE_LINK_ERROR_LOAD_TIMEOUT;
+	} else if (dsisr & P9_DSISR_MC_LINK_TABLEWALK_TIMEOUT) {
+		mce_err->error_type = MCE_ERROR_TYPE_LINK;
+		mce_err->u.link_error_type = MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT;
+	} else if (dsisr & P9_DSISR_MC_ERAT_MULTIHIT) {
+		mce_err->error_type = MCE_ERROR_TYPE_ERAT;
+		mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
+	} else if (dsisr & P9_DSISR_MC_TLB_MULTIHIT_MFTLB) {
+		mce_err->error_type = MCE_ERROR_TYPE_TLB;
+		mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
+	} else if (dsisr & P9_DSISR_MC_USER_TLBIE) {
+		mce_err->error_type = MCE_ERROR_TYPE_USER;
+		mce_err->u.user_error_type = MCE_USER_ERROR_TLBIE;
+	} else if (dsisr & P9_DSISR_MC_SLB_PARITY_MFSLB) {
+		mce_err->error_type = MCE_ERROR_TYPE_SLB;
+		mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY;
+	} else if (dsisr & P9_DSISR_MC_SLB_MULTIHIT_MFSLB) {
+		mce_err->error_type = MCE_ERROR_TYPE_SLB;
+		mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
+	} else if (dsisr & P9_DSISR_MC_RA_LOAD) {
+		mce_err->error_type = MCE_ERROR_TYPE_RA;
+		mce_err->u.ra_error_type = MCE_RA_ERROR_LOAD;
+	} else if (dsisr & P9_DSISR_MC_RA_TABLEWALK) {
+		mce_err->error_type = MCE_ERROR_TYPE_RA;
+		mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE;
+	} else if (dsisr & P9_DSISR_MC_RA_TABLEWALK_FOREIGN) {
+		mce_err->error_type = MCE_ERROR_TYPE_RA;
+		mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN;
+	} else if (dsisr & P9_DSISR_MC_RA_FOREIGN) {
+		mce_err->error_type = MCE_ERROR_TYPE_RA;
+		mce_err->u.ra_error_type = MCE_RA_ERROR_LOAD_STORE_FOREIGN;
+	}
+}
+
+static void mce_get_ierror_p9(struct pt_regs *regs,
+		struct mce_error_info *mce_err, uint64_t *addr)
+{
+	uint64_t srr1 = regs->msr;
+
+	switch (P9_SRR1_MC_IFETCH(srr1)) {
+	case P9_SRR1_MC_IFETCH_RA_ASYNC_STORE:
+	case P9_SRR1_MC_IFETCH_LINK_ASYNC_STORE_TIMEOUT:
+		mce_err->severity = MCE_SEV_FATAL;
+		break;
+	default:
+		mce_err->severity = MCE_SEV_ERROR_SYNC;
+		break;
+	}
+
+	mce_err->initiator = MCE_INITIATOR_CPU;
+
+	*addr = regs->nip;
+
+	switch (P9_SRR1_MC_IFETCH(srr1)) {
+	case P9_SRR1_MC_IFETCH_UE:
+		mce_err->error_type = MCE_ERROR_TYPE_UE;
+		mce_err->u.ue_error_type = MCE_UE_ERROR_IFETCH;
+		break;
+	case P9_SRR1_MC_IFETCH_SLB_PARITY:
+		mce_err->error_type = MCE_ERROR_TYPE_SLB;
+		mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY;
+		break;
+	case P9_SRR1_MC_IFETCH_SLB_MULTIHIT:
+		mce_err->error_type = MCE_ERROR_TYPE_SLB;
+		mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
+		break;
+	case P9_SRR1_MC_IFETCH_ERAT_MULTIHIT:
+		mce_err->error_type = MCE_ERROR_TYPE_ERAT;
+		mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
+		break;
+	case P9_SRR1_MC_IFETCH_TLB_MULTIHIT:
+		mce_err->error_type = MCE_ERROR_TYPE_TLB;
+		mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
+		break;
+	case P9_SRR1_MC_IFETCH_UE_TLB_RELOAD:
+		mce_err->error_type = MCE_ERROR_TYPE_UE;
+		mce_err->u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH;
+		break;
+	case P9_SRR1_MC_IFETCH_LINK_TIMEOUT:
+		mce_err->error_type = MCE_ERROR_TYPE_LINK;
+		mce_err->u.link_error_type = MCE_LINK_ERROR_IFETCH_TIMEOUT;
+		break;
+	case P9_SRR1_MC_IFETCH_LINK_TABLEWALK_TIMEOUT:
+		mce_err->error_type = MCE_ERROR_TYPE_LINK;
+		mce_err->u.link_error_type = MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT;
+		break;
+	case P9_SRR1_MC_IFETCH_RA:
+		mce_err->error_type = MCE_ERROR_TYPE_RA;
+		mce_err->u.ra_error_type = MCE_RA_ERROR_IFETCH;
+		break;
+	case P9_SRR1_MC_IFETCH_RA_TABLEWALK:
+		mce_err->error_type = MCE_ERROR_TYPE_RA;
+		mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH;
+		break;
+	case P9_SRR1_MC_IFETCH_RA_ASYNC_STORE:
+		mce_err->error_type = MCE_ERROR_TYPE_RA;
+		mce_err->u.ra_error_type = MCE_RA_ERROR_STORE;
+		break;
+	case P9_SRR1_MC_IFETCH_LINK_ASYNC_STORE_TIMEOUT:
+		mce_err->error_type = MCE_ERROR_TYPE_LINK;
+		mce_err->u.link_error_type = MCE_LINK_ERROR_STORE_TIMEOUT;
+		break;
+	case P9_SRR1_MC_IFETCH_RA_TABLEWALK_FOREIGN:
+		mce_err->error_type = MCE_ERROR_TYPE_RA;
+		mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN;
+		break;
+	default:
+		break;
+	}
+}
+
+long __machine_check_early_realmode_p9(struct pt_regs *regs)
+{
+	uint64_t nip, addr;
+	long handled;
+	struct mce_error_info mce_error_info = { 0 };
+
+	nip = regs->nip;
+
+	if (P9_SRR1_MC_LOADSTORE(regs->msr)) {
+		handled = mce_handle_derror_p9(regs);
+		mce_get_derror_p9(regs, &mce_error_info, &addr);
+	} else {
+		handled = mce_handle_ierror_p9(regs);
+		mce_get_ierror_p9(regs, &mce_error_info, &addr);
+	}
+
+	/* Handle UE error. */
+	if (mce_error_info.error_type == MCE_ERROR_TYPE_UE)
+		handled = mce_handle_ue_error(regs);
+
+	save_mce_event(regs, handled, &mce_error_info, nip, addr);
+	return handled;
+}

From 296739839fa2851e6badc77dcfc45050094cb102 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Thu, 9 Mar 2017 20:53:31 +0100
Subject: [PATCH 1123/1911] net: phy: marvell: Fix double free of hwmon device

The hwmon temperature sensor devices is registered using a devm_hwmon
API call.  The marvell_release() would then manually free the device,
not using a devm_hmon API, resulting in the device being removed
twice, leading to a crash in kernfs_find_ns() during the second
removal.

Remove the manual removal, which makes marvell_release() empty, so
remove it as well.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Fixes: 0b04680fdae4 ("phy: marvell: Add support for temperature sensor")
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/marvell.c | 15 ---------------
 1 file changed, 15 deletions(-)

diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index f9d0fa315a4762..272b051a019975 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -1883,17 +1883,6 @@ static int m88e1510_probe(struct phy_device *phydev)
 	return m88e1510_hwmon_probe(phydev);
 }
 
-static void marvell_remove(struct phy_device *phydev)
-{
-#ifdef CONFIG_HWMON
-
-	struct marvell_priv *priv = phydev->priv;
-
-	if (priv && priv->hwmon_dev)
-		hwmon_device_unregister(priv->hwmon_dev);
-#endif
-}
-
 static struct phy_driver marvell_drivers[] = {
 	{
 		.phy_id = MARVELL_PHY_ID_88E1101,
@@ -1974,7 +1963,6 @@ static struct phy_driver marvell_drivers[] = {
 		.features = PHY_GBIT_FEATURES,
 		.flags = PHY_HAS_INTERRUPT,
 		.probe = &m88e1121_probe,
-		.remove = &marvell_remove,
 		.config_init = &m88e1121_config_init,
 		.config_aneg = &m88e1121_config_aneg,
 		.read_status = &marvell_read_status,
@@ -2087,7 +2075,6 @@ static struct phy_driver marvell_drivers[] = {
 		.features = PHY_GBIT_FEATURES | SUPPORTED_FIBRE,
 		.flags = PHY_HAS_INTERRUPT,
 		.probe = &m88e1510_probe,
-		.remove = &marvell_remove,
 		.config_init = &m88e1510_config_init,
 		.config_aneg = &m88e1510_config_aneg,
 		.read_status = &marvell_read_status,
@@ -2109,7 +2096,6 @@ static struct phy_driver marvell_drivers[] = {
 		.features = PHY_GBIT_FEATURES,
 		.flags = PHY_HAS_INTERRUPT,
 		.probe = m88e1510_probe,
-		.remove = &marvell_remove,
 		.config_init = &marvell_config_init,
 		.config_aneg = &m88e1510_config_aneg,
 		.read_status = &marvell_read_status,
@@ -2127,7 +2113,6 @@ static struct phy_driver marvell_drivers[] = {
 		.phy_id_mask = MARVELL_PHY_ID_MASK,
 		.name = "Marvell 88E1545",
 		.probe = m88e1510_probe,
-		.remove = &marvell_remove,
 		.features = PHY_GBIT_FEATURES,
 		.flags = PHY_HAS_INTERRUPT,
 		.config_init = &marvell_config_init,

From a1016e94cce9fb6ea56d7602263783e2d95d6e92 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Thu, 9 Mar 2017 17:14:32 +0000
Subject: [PATCH 1124/1911] ARM: wire up statx syscall

Wire up the new statx syscall for ARM.

Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/tools/syscall.tbl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index 3c2cb5d5adfa4f..0bb0e9c6376c4a 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -411,3 +411,4 @@
 394	common	pkey_mprotect		sys_pkey_mprotect
 395	common	pkey_alloc		sys_pkey_alloc
 396	common	pkey_free		sys_pkey_free
+397	common	statx			sys_statx

From bba8376aea1dcbbe22bbda118c52abee317c7609 Mon Sep 17 00:00:00 2001
From: Matjaz Hegedic <matjaz.hegedic@gmail.com>
Date: Thu, 9 Mar 2017 14:00:17 +0100
Subject: [PATCH 1125/1911] x86/reboot/quirks: Fix typo in ASUS EeeBook X205TA
 reboot quirk

The reboot quirk for ASUS EeeBook X205TA contains a typo in
DMI_PRODUCT_NAME, improperly referring to X205TAW instead of
X205TA, which prevents the quirk from being triggered. The
model X205TAW already has a reboot quirk of its own.

This fix simply removes the inappropriate final letter W.

Fixes: 90b28ded88dd ("x86/reboot/quirks: Add ASUS EeeBook X205TA reboot quirk")
Signed-off-by: Matjaz Hegedic <matjaz.hegedic@gmail.com>
Link: http://lkml.kernel.org/r/1489064417-7445-1-git-send-email-matjaz.hegedic@gmail.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/reboot.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 4194d6f9bb290b..067f9813fd2cf7 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -228,7 +228,7 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
 		.ident = "ASUS EeeBook X205TA",
 		.matches = {
 			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "X205TAW"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "X205TA"),
 		},
 	},
 	{	/* Handle problems with rebooting on ASUS EeeBook X205TAW */

From 9a8b0a230aca55ee142fd76f4765f1da1799da93 Mon Sep 17 00:00:00 2001
From: Mihail Atanassov <mihail.atanassov@arm.com>
Date: Wed, 15 Feb 2017 14:00:15 +0000
Subject: [PATCH 1126/1911] drm: mali-dp: Remove mclk rate management

The rate of mclk depends on the use-case. If no downscaling is required,
then mclk == pxlclk is a valid option; with downscaling however, the
rate at which mclk runs determines how much a plane can be downscaled
before composition. This is a system integration + power management
issue that is more suited to firmware rather than this driver.

Signed-off-by: Mihail Atanassov <mihail.atanassov@arm.com>
Signed-off-by: Liviu Dudau <Liviu.Dudau@arm.com>
---
 drivers/gpu/drm/arm/malidp_crtc.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/arm/malidp_crtc.c b/drivers/gpu/drm/arm/malidp_crtc.c
index 08e6a71f5d05f4..294b53697334cc 100644
--- a/drivers/gpu/drm/arm/malidp_crtc.c
+++ b/drivers/gpu/drm/arm/malidp_crtc.c
@@ -63,8 +63,7 @@ static void malidp_crtc_enable(struct drm_crtc *crtc)
 
 	clk_prepare_enable(hwdev->pxlclk);
 
-	/* mclk needs to be set to the same or higher rate than pxlclk */
-	clk_set_rate(hwdev->mclk, crtc->state->adjusted_mode.crtc_clock * 1000);
+	/* We rely on firmware to set mclk to a sensible level. */
 	clk_set_rate(hwdev->pxlclk, crtc->state->adjusted_mode.crtc_clock * 1000);
 
 	hwdev->modeset(hwdev, &vm);

From d1479f6108006555fe33d7cfe8db4f95ad614b9a Mon Sep 17 00:00:00 2001
From: Mihail Atanassov <mihail.atanassov@arm.com>
Date: Thu, 9 Feb 2017 11:32:00 +0000
Subject: [PATCH 1127/1911] drm: mali-dp: Fix smart layer not going to
 composition

Use rectangle 1 as a generic plane. Existing code already sets the smart
layer bounding box size + offset. The rectangles' offsets are relative
to the bounding box, so there is no need to set R1's offset (reset value
is 0), just its size which is the same as the bounding box.

Signed-off-by: Mihail Atanassov <mihail.atanassov@arm.com>
Signed-off-by: Liviu Dudau <liviu.dudau@arm.com>
---
 drivers/gpu/drm/arm/malidp_hw.c     |  2 +-
 drivers/gpu/drm/arm/malidp_planes.c | 18 ++++++++++++++++--
 drivers/gpu/drm/arm/malidp_regs.h   |  1 +
 3 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/arm/malidp_hw.c b/drivers/gpu/drm/arm/malidp_hw.c
index 488aedf5b58d54..9f5513006eeef8 100644
--- a/drivers/gpu/drm/arm/malidp_hw.c
+++ b/drivers/gpu/drm/arm/malidp_hw.c
@@ -83,7 +83,7 @@ static const struct malidp_layer malidp550_layers[] = {
 	{ DE_VIDEO1, MALIDP550_DE_LV1_BASE, MALIDP550_DE_LV1_PTR_BASE, MALIDP_DE_LV_STRIDE0 },
 	{ DE_GRAPHICS1, MALIDP550_DE_LG_BASE, MALIDP550_DE_LG_PTR_BASE, MALIDP_DE_LG_STRIDE },
 	{ DE_VIDEO2, MALIDP550_DE_LV2_BASE, MALIDP550_DE_LV2_PTR_BASE, MALIDP_DE_LV_STRIDE0 },
-	{ DE_SMART, MALIDP550_DE_LS_BASE, MALIDP550_DE_LS_PTR_BASE, 0 },
+	{ DE_SMART, MALIDP550_DE_LS_BASE, MALIDP550_DE_LS_PTR_BASE, MALIDP550_DE_LS_R1_STRIDE },
 };
 
 #define MALIDP_DE_DEFAULT_PREFETCH_START	5
diff --git a/drivers/gpu/drm/arm/malidp_planes.c b/drivers/gpu/drm/arm/malidp_planes.c
index 414aada10fe5e7..d5aec082294cbd 100644
--- a/drivers/gpu/drm/arm/malidp_planes.c
+++ b/drivers/gpu/drm/arm/malidp_planes.c
@@ -37,6 +37,8 @@
 #define   LAYER_V_VAL(x)		(((x) & 0x1fff) << 16)
 #define MALIDP_LAYER_COMP_SIZE		0x010
 #define MALIDP_LAYER_OFFSET		0x014
+#define MALIDP550_LS_ENABLE		0x01c
+#define MALIDP550_LS_R1_IN_SIZE		0x020
 
 /*
  * This 4-entry look-up-table is used to determine the full 8-bit alpha value
@@ -242,6 +244,11 @@ static void malidp_de_plane_update(struct drm_plane *plane,
 			LAYER_V_VAL(plane->state->crtc_y),
 			mp->layer->base + MALIDP_LAYER_OFFSET);
 
+	if (mp->layer->id == DE_SMART)
+		malidp_hw_write(mp->hwdev,
+				LAYER_H_VAL(src_w) | LAYER_V_VAL(src_h),
+				mp->layer->base + MALIDP550_LS_R1_IN_SIZE);
+
 	/* first clear the rotation bits */
 	val = malidp_hw_read(mp->hwdev, mp->layer->base + MALIDP_LAYER_CONTROL);
 	val &= ~LAYER_ROT_MASK;
@@ -330,9 +337,16 @@ int malidp_de_planes_init(struct drm_device *drm)
 		plane->hwdev = malidp->dev;
 		plane->layer = &map->layers[i];
 
-		/* Skip the features which the SMART layer doesn't have */
-		if (id == DE_SMART)
+		if (id == DE_SMART) {
+			/*
+			 * Enable the first rectangle in the SMART layer to be
+			 * able to use it as a drm plane.
+			 */
+			malidp_hw_write(malidp->dev, 1,
+					plane->layer->base + MALIDP550_LS_ENABLE);
+			/* Skip the features which the SMART layer doesn't have. */
 			continue;
+		}
 
 		drm_plane_create_rotation_property(&plane->base, DRM_ROTATE_0, flags);
 		malidp_hw_write(malidp->dev, MALIDP_ALPHA_LUT,
diff --git a/drivers/gpu/drm/arm/malidp_regs.h b/drivers/gpu/drm/arm/malidp_regs.h
index aff6d4a84e998c..b816067a65c572 100644
--- a/drivers/gpu/drm/arm/malidp_regs.h
+++ b/drivers/gpu/drm/arm/malidp_regs.h
@@ -84,6 +84,7 @@
 /* Stride register offsets relative to Lx_BASE */
 #define MALIDP_DE_LG_STRIDE		0x18
 #define MALIDP_DE_LV_STRIDE0		0x18
+#define MALIDP550_DE_LS_R1_STRIDE	0x28
 
 /* macros to set values into registers */
 #define MALIDP_DE_H_FRONTPORCH(x)	(((x) & 0xfff) << 0)

From 8b38f890eff6efc153130254a96452570f971159 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Fri, 10 Mar 2017 10:41:50 +0900
Subject: [PATCH 1128/1911] MAINTAINERS: add Masahiro Yamada as a Kbuild
 maintainer

It has been difficult lately for Michal to work on Kbuild on his
regular basis.  We discussed the maintainership of Kbuild, and I
decided to be a co-maintainer.

Add myself to the maintainer field, and replace the repository with
my own.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Acked-by: Michal Marek <mmarek@suse.com>
---
 MAINTAINERS | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index c265a5fe48481f..f1023c52b1e360 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7084,9 +7084,9 @@ S:	Maintained
 F:	fs/autofs4/
 
 KERNEL BUILD + files below scripts/ (unless maintained elsewhere)
+M:	Masahiro Yamada <yamada.masahiro@socionext.com>
 M:	Michal Marek <mmarek@suse.com>
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/mmarek/kbuild.git for-next
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/mmarek/kbuild.git rc-fixes
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-kbuild.git
 L:	linux-kbuild@vger.kernel.org
 S:	Maintained
 F:	Documentation/kbuild/

From 702f2ac87a9a8da23bf8506466bc70175fc970b2 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 10 Mar 2017 07:48:49 +0000
Subject: [PATCH 1129/1911] rxrpc: Wake up the transmitter if Rx window size
 increases on the peer

The RxRPC ACK packet may contain an extension that includes the peer's
current Rx window size for this call.  We adjust the local Tx window size
to match.  However, the transmitter can stall if the receive window is
reduced to 0 by the peer and then reopened.

This is because the normal way that the transmitter is re-energised is by
dropping something out of our Tx queue and thus making space.  When a
single gap is made, the transmitter is woken up.  However, because there's
nothing in the Tx queue at this point, this doesn't happen.

To fix this, perform a wake_up() any time we see the peer's Rx window size
increasing.

The observable symptom is that calls start failing on ETIMEDOUT and the
following:

	kAFS: SERVER DEAD state=-62

appears in dmesg.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rxrpc/input.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index d74921c4969b2d..18b2ad8be8e2b5 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -652,6 +652,7 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb,
 	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
 	struct rxrpc_peer *peer;
 	unsigned int mtu;
+	bool wake = false;
 	u32 rwind = ntohl(ackinfo->rwind);
 
 	_proto("Rx ACK %%%u Info { rx=%u max=%u rwin=%u jm=%u }",
@@ -659,9 +660,14 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb,
 	       ntohl(ackinfo->rxMTU), ntohl(ackinfo->maxMTU),
 	       rwind, ntohl(ackinfo->jumbo_max));
 
-	if (rwind > RXRPC_RXTX_BUFF_SIZE - 1)
-		rwind = RXRPC_RXTX_BUFF_SIZE - 1;
-	call->tx_winsize = rwind;
+	if (call->tx_winsize != rwind) {
+		if (rwind > RXRPC_RXTX_BUFF_SIZE - 1)
+			rwind = RXRPC_RXTX_BUFF_SIZE - 1;
+		if (rwind > call->tx_winsize)
+			wake = true;
+		call->tx_winsize = rwind;
+	}
+
 	if (call->cong_ssthresh > rwind)
 		call->cong_ssthresh = rwind;
 
@@ -675,6 +681,9 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb,
 		spin_unlock_bh(&peer->lock);
 		_net("Net MTU %u (maxdata %u)", peer->mtu, peer->maxdata);
 	}
+
+	if (wake)
+		wake_up(&call->waitq);
 }
 
 /*

From cb6950b7152fb3760942f9cb16bd2a35e5a1bfd1 Mon Sep 17 00:00:00 2001
From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
Date: Tue, 7 Mar 2017 00:34:57 +0530
Subject: [PATCH 1130/1911] arm64: kprobes: remove kprobe_exceptions_notify

Commit fc62d0207ae0 ("kprobes: Introduce weak variant of
kprobe_exceptions_notify()") introduces a generic empty version of the
function for architectures that don't need special handling, like arm64.
As such, remove the arch/arm64/ specific handler.

Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kernel/probes/kprobes.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index 2a07aae5b8a264..c5c45942fb6e66 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -372,12 +372,6 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr)
 	return 0;
 }
 
-int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
-				       unsigned long val, void *data)
-{
-	return NOTIFY_DONE;
-}
-
 static void __kprobes kprobe_handler(struct pt_regs *regs)
 {
 	struct kprobe *p, *cur_kprobe;

From b0de0ccc8b9edd8846828e0ecdc35deacdf186b0 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 6 Mar 2017 19:06:40 +0000
Subject: [PATCH 1131/1911] arm64: kasan: avoid bad virt_to_pfn()

Booting a v4.11-rc1 kernel with DEBUG_VIRTUAL and KASAN enabled produces
the following splat (trimmed for brevity):

[    0.000000] virt_to_phys used for non-linear address: ffff200008080000 (0xffff200008080000)
[    0.000000] WARNING: CPU: 0 PID: 0 at arch/arm64/mm/physaddr.c:14 __virt_to_phys+0x48/0x70
[    0.000000] PC is at __virt_to_phys+0x48/0x70
[    0.000000] LR is at __virt_to_phys+0x48/0x70
[    0.000000] Call trace:
[    0.000000] [<ffff2000080b1ac0>] __virt_to_phys+0x48/0x70
[    0.000000] [<ffff20000a03b86c>] kasan_init+0x1c0/0x498
[    0.000000] [<ffff20000a034018>] setup_arch+0x2fc/0x948
[    0.000000] [<ffff20000a030c68>] start_kernel+0xb8/0x570
[    0.000000] [<ffff20000a0301e8>] __primary_switched+0x6c/0x74

This is because we use virt_to_pfn() on a kernel image address when
trying to figure out its nid, so that we can allocate its shadow from
the same node.

As with other recent changes, this patch uses lm_alias() to solve this.

We could instead use NUMA_NO_NODE, as x86 does for all shadow
allocations, though we'll likely want the "real" memory shadow to be
backed from its corresponding nid anyway, so we may as well be
consistent and find the nid for the image shadow.

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Acked-by: Laura Abbott <labbott@redhat.com>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/mm/kasan_init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
index 55d1e920554368..687a358a37337a 100644
--- a/arch/arm64/mm/kasan_init.c
+++ b/arch/arm64/mm/kasan_init.c
@@ -162,7 +162,7 @@ void __init kasan_init(void)
 	clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
 
 	vmemmap_populate(kimg_shadow_start, kimg_shadow_end,
-			 pfn_to_nid(virt_to_pfn(_text)));
+			 pfn_to_nid(virt_to_pfn(lm_alias(_text))));
 
 	/*
 	 * vmemmap_populate() has populated the shadow region that covers the

From 5c2a625937ba49bc691089370638223d310cda9a Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Wed, 8 Mar 2017 16:27:04 -0800
Subject: [PATCH 1132/1911] arm64: support keyctl() system call in 32-bit mode

As is the case for a number of other architectures that have a 32-bit
compat mode, enable KEYS_COMPAT if both COMPAT and KEYS are enabled.
This allows AArch32 programs to use the keyctl() system call when
running on an AArch64 kernel.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/Kconfig | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index a39029b5414eb2..f21e9a76ff67e5 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1063,6 +1063,10 @@ config SYSVIPC_COMPAT
 	def_bool y
 	depends on COMPAT && SYSVIPC
 
+config KEYS_COMPAT
+	def_bool y
+	depends on COMPAT && KEYS
+
 endmenu
 
 menu "Power management options"

From 14088540ad63c648e5cdf490412033f792d16b6b Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Fri, 10 Mar 2017 17:44:18 +0000
Subject: [PATCH 1133/1911] arm64: use const cap for system_uses_ttbr0_pan()

Since commit 4b65a5db362783ab ("arm64: Introduce
uaccess_{disable,enable} functionality based on TTBR0_EL1"),
system_uses_ttbr0_pan() has used cpus_have_cap() to determine whether
PAN is present.

Since commit a4023f682739439b ("arm64: Add hypervisor safe helper for
checking constant capabilities"), which was introduced around the same
time, cpus_have_cap() doesn't try to use a static key, and must always
perform a load, test, and consitional branch (likely a tbnz for the
latter two).

Elsewhere, we moved to using cpus_have_const_cap(), which can use a
static key (i.e. a non-conditional branch), which is patched at runtime
when the feature is detected.

This patch makes system_uses_ttbr0_pan() use cpus_have_const_cap(). The
static key is likely a win for hot-paths like the uacccess primitives,
and this makes our usage consistent regardless.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/cpufeature.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 05310ad8c5abec..f31c48d0cd6873 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -251,7 +251,7 @@ static inline bool system_supports_fpsimd(void)
 static inline bool system_uses_ttbr0_pan(void)
 {
 	return IS_ENABLED(CONFIG_ARM64_SW_TTBR0_PAN) &&
-		!cpus_have_cap(ARM64_HAS_PAN);
+		!cpus_have_const_cap(ARM64_HAS_PAN);
 }
 
 #endif /* __ASSEMBLY__ */

From af36370569eb37420e1e78a2e60c277b781fcd00 Mon Sep 17 00:00:00 2001
From: Paul Blakey <paulb@mellanox.com>
Date: Fri, 10 Mar 2017 14:33:01 +0200
Subject: [PATCH 1134/1911] net/mlx5: Fix create autogroup prev initializer

The autogroups list is a list of non overlapping group boundaries
sorted by their start index. If the autogroups list wasn't empty
and an empty group slot was found at the start of the list,
the new group was added to the end of the list instead of the
beginning, as the prev initializer was incorrect.
When this was repeated, it caused multiple groups to have
overlapping boundaries.

Fixed that by correctly initializing the prev pointer to the
start of the list.

Fixes: eccec8da3b4e ('net/mlx5: Keep autogroups list ordered')
Signed-off-by: Paul Blakey <paulb@mellanox.com>
Reviewed-by: Mark Bloch <markb@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 2478516a61e2ea..ded27bb9a3b604 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -1136,7 +1136,7 @@ static struct mlx5_flow_group *create_autogroup(struct mlx5_flow_table *ft,
 						u32 *match_criteria)
 {
 	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
-	struct list_head *prev = ft->node.children.prev;
+	struct list_head *prev = &ft->node.children;
 	unsigned int candidate_index = 0;
 	struct mlx5_flow_group *fg;
 	void *match_criteria_addr;

From 5d47f6c89d568ab61712d8c40676fbb020b68752 Mon Sep 17 00:00:00 2001
From: Daniel Jurgens <danielj@mellanox.com>
Date: Fri, 10 Mar 2017 14:33:02 +0200
Subject: [PATCH 1135/1911] net/mlx5: Don't save PCI state when PCI error is
 detected

When a PCI error is detected the PCI state could be corrupt, don't save
it in that flow. Save the state after initialization. After restoring the
PCI state during slot reset save it again, restoring the state destroys
the previously saved state info.

Fixes: 05ac2c0b7438 ('net/mlx5: Fix race between PCI error handlers and
health work')
Signed-off-by: Daniel Jurgens <danielj@mellanox.com>

Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/main.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index c4242a4e81309f..e2bd600d19de09 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1352,6 +1352,7 @@ static int init_one(struct pci_dev *pdev,
 	if (err)
 		goto clean_load;
 
+	pci_save_state(pdev);
 	return 0;
 
 clean_load:
@@ -1407,9 +1408,8 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev,
 
 	mlx5_enter_error_state(dev);
 	mlx5_unload_one(dev, priv, false);
-	/* In case of kernel call save the pci state and drain the health wq */
+	/* In case of kernel call drain the health wq */
 	if (state) {
-		pci_save_state(pdev);
 		mlx5_drain_health_wq(dev);
 		mlx5_pci_disable_device(dev);
 	}
@@ -1461,6 +1461,7 @@ static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev)
 
 	pci_set_master(pdev);
 	pci_restore_state(pdev);
+	pci_save_state(pdev);
 
 	if (wait_vital(pdev)) {
 		dev_err(&pdev->dev, "%s: wait_vital timed out\n", __func__);

From 33e21c59526e9147d7c68913995298f10c35cd6f Mon Sep 17 00:00:00 2001
From: Huy Nguyen <huyn@mellanox.com>
Date: Fri, 10 Mar 2017 14:33:03 +0200
Subject: [PATCH 1136/1911] net/mlx5e: remove IEEE/CEE mode check when setting
 DCBX mode

Currently, the function setdcbx fails if the request dcbx mode
is either IEEE or CEE. We remove the IEEE/CEE mode check because
we support both IEEE and CEE interfaces.

Fixes: 3a6a931dfb8e ("net/mlx5e: Support DCBX CEE API")
Signed-off-by: Huy Nguyen <huyn@mellanox.com>
Reviewed-by: Daniel Jurgens <danielj@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
index 0523ed47f597c7..8fa23f6a1f67f6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
@@ -302,6 +302,9 @@ static u8 mlx5e_dcbnl_setdcbx(struct net_device *dev, u8 mode)
 	struct mlx5e_priv *priv = netdev_priv(dev);
 	struct mlx5e_dcbx *dcbx = &priv->dcbx;
 
+	if (mode & DCB_CAP_DCBX_LLD_MANAGED)
+		return 1;
+
 	if ((!mode) && MLX5_CAP_GEN(priv->mdev, dcbx)) {
 		if (dcbx->mode == MLX5E_DCBX_PARAM_VER_OPER_AUTO)
 			return 0;
@@ -315,13 +318,10 @@ static u8 mlx5e_dcbnl_setdcbx(struct net_device *dev, u8 mode)
 		return 1;
 	}
 
-	if (mlx5e_dcbnl_switch_to_host_mode(netdev_priv(dev)))
+	if (!(mode & DCB_CAP_DCBX_HOST))
 		return 1;
 
-	if ((mode & DCB_CAP_DCBX_LLD_MANAGED) ||
-	    !(mode & DCB_CAP_DCBX_VER_CEE) ||
-	    !(mode & DCB_CAP_DCBX_VER_IEEE) ||
-	    !(mode & DCB_CAP_DCBX_HOST))
+	if (mlx5e_dcbnl_switch_to_host_mode(netdev_priv(dev)))
 		return 1;
 
 	return 0;

From 65ba8fb7d5c6803ec236bb8d6650465fed7f9769 Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Fri, 10 Mar 2017 14:33:04 +0200
Subject: [PATCH 1137/1911] net/mlx5e: Avoid wrong identification of rules on
 deletion

When deleting offloaded TC flows, we must correctly identify E-switch
rules. The current check could get us wrong w.r.t to rules set on the
PF. Since it's possible to set NIC rules on the PF, switch to SRIOV
offloads mode and then attempt to delete a NIC rule.

To solve that, we add a flags field to offloaded rules, set it on
creation time and use that over the code where currently needed.

Fixes: 8b32580df1cb ('net/mlx5e: Add TC vlan action for SRIOV offloads')
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlx5/core/en_tc.c   | 33 ++++++++++---------
 1 file changed, 18 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 44406a5ec15d96..79481f4cf26432 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -48,9 +48,14 @@
 #include "eswitch.h"
 #include "vxlan.h"
 
+enum {
+	MLX5E_TC_FLOW_ESWITCH	= BIT(0),
+};
+
 struct mlx5e_tc_flow {
 	struct rhash_head	node;
 	u64			cookie;
+	u8			flags;
 	struct mlx5_flow_handle *rule;
 	struct list_head	encap; /* flows sharing the same encap */
 	struct mlx5_esw_flow_attr *attr;
@@ -177,7 +182,7 @@ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
 		mlx5_fc_destroy(priv->mdev, counter);
 	}
 
-	if (esw && esw->mode == SRIOV_OFFLOADS) {
+	if (flow->flags & MLX5E_TC_FLOW_ESWITCH) {
 		mlx5_eswitch_del_vlan_action(esw, flow->attr);
 		if (flow->attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP)
 			mlx5e_detach_encap(priv, flow);
@@ -598,6 +603,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
 }
 
 static int parse_cls_flower(struct mlx5e_priv *priv,
+			    struct mlx5e_tc_flow *flow,
 			    struct mlx5_flow_spec *spec,
 			    struct tc_cls_flower_offload *f)
 {
@@ -609,7 +615,7 @@ static int parse_cls_flower(struct mlx5e_priv *priv,
 
 	err = __parse_cls_flower(priv, spec, f, &min_inline);
 
-	if (!err && esw->mode == SRIOV_OFFLOADS &&
+	if (!err && (flow->flags & MLX5E_TC_FLOW_ESWITCH) &&
 	    rep->vport != FDB_UPLINK_VPORT) {
 		if (min_inline > esw->offloads.inline_mode) {
 			netdev_warn(priv->netdev,
@@ -1132,23 +1138,19 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol,
 			   struct tc_cls_flower_offload *f)
 {
 	struct mlx5e_tc_table *tc = &priv->fs.tc;
-	int err = 0;
-	bool fdb_flow = false;
+	int err, attr_size = 0;
 	u32 flow_tag, action;
 	struct mlx5e_tc_flow *flow;
 	struct mlx5_flow_spec *spec;
 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+	u8 flow_flags = 0;
 
-	if (esw && esw->mode == SRIOV_OFFLOADS)
-		fdb_flow = true;
-
-	if (fdb_flow)
-		flow = kzalloc(sizeof(*flow) +
-			       sizeof(struct mlx5_esw_flow_attr),
-			       GFP_KERNEL);
-	else
-		flow = kzalloc(sizeof(*flow), GFP_KERNEL);
+	if (esw && esw->mode == SRIOV_OFFLOADS) {
+		flow_flags = MLX5E_TC_FLOW_ESWITCH;
+		attr_size  = sizeof(struct mlx5_esw_flow_attr);
+	}
 
+	flow = kzalloc(sizeof(*flow) + attr_size, GFP_KERNEL);
 	spec = mlx5_vzalloc(sizeof(*spec));
 	if (!spec || !flow) {
 		err = -ENOMEM;
@@ -1156,12 +1158,13 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol,
 	}
 
 	flow->cookie = f->cookie;
+	flow->flags = flow_flags;
 
-	err = parse_cls_flower(priv, spec, f);
+	err = parse_cls_flower(priv, flow, spec, f);
 	if (err < 0)
 		goto err_free;
 
-	if (fdb_flow) {
+	if (flow->flags & MLX5E_TC_FLOW_ESWITCH) {
 		flow->attr  = (struct mlx5_esw_flow_attr *)(flow + 1);
 		err = parse_tc_fdb_actions(priv, f->exts, flow);
 		if (err < 0)

From ea29bd304d7b522f6162a36f394e690c579b5a63 Mon Sep 17 00:00:00 2001
From: Eugenia Emantayev <eugenia@mellanox.com>
Date: Fri, 10 Mar 2017 14:33:05 +0200
Subject: [PATCH 1138/1911] net/mlx5e: Fix loopback selftest

Change packet type handler to ETH_P_IP instead of ETH_P_ALL
since we are already expecting an IP packet.

Also, using ETH_P_ALL will cause the loopback test packet type handler
to be called on all outgoing packets, especially our own self loopback
test SKB, which will be validated on xmit as well, and we don't want that.

Tested with:
ethtool -t ethX
validated that the loopback test passes.

Fixes: 0952da791c97 ('net/mlx5e: Add support for loopback selftest')
Signed-off-by: Eugenia Emantayev <eugenia@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
index 31e3cb7ee5feeb..5621dcfda4f186 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
@@ -204,9 +204,6 @@ mlx5e_test_loopback_validate(struct sk_buff *skb,
 	struct iphdr *iph;
 
 	/* We are only going to peek, no need to clone the SKB */
-	if (skb->protocol != htons(ETH_P_IP))
-		goto out;
-
 	if (MLX5E_TEST_PKT_SIZE - ETH_HLEN > skb_headlen(skb))
 		goto out;
 
@@ -249,7 +246,7 @@ static int mlx5e_test_loopback_setup(struct mlx5e_priv *priv,
 	lbtp->loopback_ok = false;
 	init_completion(&lbtp->comp);
 
-	lbtp->pt.type = htons(ETH_P_ALL);
+	lbtp->pt.type = htons(ETH_P_IP);
 	lbtp->pt.func = mlx5e_test_loopback_validate;
 	lbtp->pt.dev = priv->netdev;
 	lbtp->pt.af_packet_priv = lbtp;

From 0e4c0e6ea7d4a988a5ae2791c7cb5769b5256dad Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Fri, 17 Feb 2017 15:25:08 +0100
Subject: [PATCH 1139/1911] arm64: kernel: Update kerneldoc for cpu_suspend()
 rename

Commit af391b15f7b56ce1 ("arm64: kernel: rename __cpu_suspend to keep it
aligned with arm") renamed cpu_suspend() to arm_cpuidle_suspend(), but
forgot to update the kerneldoc header.

Fixes: af391b15f7b56ce1 ("arm64: kernel: rename __cpu_suspend to keep it aligned with arm")
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kernel/cpuidle.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c
index 75a0f8acef669c..fd691087dc9ad5 100644
--- a/arch/arm64/kernel/cpuidle.c
+++ b/arch/arm64/kernel/cpuidle.c
@@ -30,7 +30,7 @@ int arm_cpuidle_init(unsigned int cpu)
 }
 
 /**
- * cpu_suspend() - function to enter a low-power idle state
+ * arm_cpuidle_suspend() - function to enter a low-power idle state
  * @arg: argument to pass to CPU suspend operations
  *
  * Return: 0 on success, -EOPNOTSUPP if CPU suspend hook not initialized, CPU

From b6573da139ecbee6f2c77392b51266d4521d50ac Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Date: Fri, 10 Mar 2017 10:10:54 -0800
Subject: [PATCH 1140/1911] Input: synaptics-rmi4 - prevent null pointer
 dereference in f30

If the platform data has f30_data.disable set, f30 in rmi_f30_config()
might be null. Prevent a kernel oops by checking for non-null f30.

Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/rmi4/rmi_f30.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/input/rmi4/rmi_f30.c b/drivers/input/rmi4/rmi_f30.c
index 3422464af229bc..b8572b342dcb13 100644
--- a/drivers/input/rmi4/rmi_f30.c
+++ b/drivers/input/rmi4/rmi_f30.c
@@ -170,6 +170,10 @@ static int rmi_f30_config(struct rmi_function *fn)
 				rmi_get_platform_data(fn->rmi_dev);
 	int error;
 
+	/* can happen if f30_data.disable is set */
+	if (!f30)
+		return 0;
+
 	if (pdata->f30_data.trackstick_buttons) {
 		/* Try [re-]establish link to F03. */
 		f30->f03 = rmi_find_function(fn->rmi_dev, 0x03);

From ed46e66cc1b3d684042f92dfa2ab15ee917b4cac Mon Sep 17 00:00:00 2001
From: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
Date: Mon, 27 Feb 2017 14:30:25 +0200
Subject: [PATCH 1141/1911] iommu/io-pgtable-arm: Check for leaf entry before
 dereferencing it

Do a check for already installed leaf entry at the current level before
dereferencing it in order to avoid walking the page table down with
wrong pointer to the next level.

Signed-off-by: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
CC: Will Deacon <will.deacon@arm.com>
CC: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/iommu/io-pgtable-arm.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index feacc54bec683b..f9bc6ebb8140b0 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -335,8 +335,12 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
 		if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)
 			pte |= ARM_LPAE_PTE_NSTABLE;
 		__arm_lpae_set_pte(ptep, pte, cfg);
-	} else {
+	} else if (!iopte_leaf(pte, lvl)) {
 		cptep = iopte_deref(pte, data);
+	} else {
+		/* We require an unmap first */
+		WARN_ON(!selftest_running);
+		return -EEXIST;
 	}
 
 	/* Rinse, repeat */

From a03849e7210277fa212779b7cd9c30e1ab6194b2 Mon Sep 17 00:00:00 2001
From: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
Date: Mon, 27 Feb 2017 14:30:26 +0200
Subject: [PATCH 1142/1911] iommu/io-pgtable-arm-v7s: Check for leaf entry
 before dereferencing it

Do a check for already installed leaf entry at the current level before
dereferencing it in order to avoid walking the page table down with
wrong pointer to the next level.

Signed-off-by: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
CC: Will Deacon <will.deacon@arm.com>
CC: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/iommu/io-pgtable-arm-v7s.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c
index 1c049e2e12bf0d..8d6ca28c3e1f14 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -422,8 +422,12 @@ static int __arm_v7s_map(struct arm_v7s_io_pgtable *data, unsigned long iova,
 			pte |= ARM_V7S_ATTR_NS_TABLE;
 
 		__arm_v7s_set_pte(ptep, pte, 1, cfg);
-	} else {
+	} else if (ARM_V7S_PTE_IS_TABLE(pte, lvl)) {
 		cptep = iopte_deref(pte, lvl);
+	} else {
+		/* We require an unmap first */
+		WARN_ON(!selftest_running);
+		return -EEXIST;
 	}
 
 	/* Rinse, repeat */

From d8a8ed9758241e138933c67e40db2db2790eca19 Mon Sep 17 00:00:00 2001
From: Tom St Denis <tom.stdenis@amd.com>
Date: Thu, 9 Mar 2017 13:21:07 -0500
Subject: [PATCH 1143/1911] drm/amd/amdgpu: Disable GFX_PG on Carrizo until
 compute issues solved

Currently compute jobs will stall if GFX_PG is enabled.  Until this
is resolved we'll disable GFX_PG.

Signed-off-by: Tom St Denis <tom.stdenis@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/vi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index 50bdb24ef8d6e9..4a785d6acfb9af 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -1051,7 +1051,7 @@ static int vi_common_early_init(void *handle)
 		/* rev0 hardware requires workarounds to support PG */
 		adev->pg_flags = 0;
 		if (adev->rev_id != 0x00) {
-			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
+			adev->pg_flags |=
 				AMD_PG_SUPPORT_GFX_SMG |
 				AMD_PG_SUPPORT_GFX_PIPELINE |
 				AMD_PG_SUPPORT_CP |

From 607523d19c9d67ba4cf7bdaced644f11ed04992c Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Fri, 10 Mar 2017 12:13:04 +1000
Subject: [PATCH 1144/1911] drm/amdgpu: fix parser init error path to avoid
 crash in parser fini
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If we don't reset the chunk info in the error path, the subsequent
fini path will double free.

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index d2d0f60ff36d1f..99424cb8020bdf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -240,6 +240,8 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
 	for (; i >= 0; i--)
 		drm_free_large(p->chunks[i].kdata);
 	kfree(p->chunks);
+	p->chunks = NULL;
+	p->nchunks = 0;
 put_ctx:
 	amdgpu_ctx_put(p->ctx);
 free_chunk:

From 40c50c1fecdf012a3bf055ec813f0ef2eda2749c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 10 Mar 2017 13:17:18 +0100
Subject: [PATCH 1145/1911] kexec, x86/purgatory: Unbreak it and clean it up

The purgatory code defines global variables which are referenced via a
symbol lookup in the kexec code (core and arch).

A recent commit addressing sparse warnings made these static and thereby
broke kexec_file.

Why did this happen? Simply because the whole machinery is undocumented and
lacks any form of forward declarations. The variable names are unspecific
and lack a prefix, so adding forward declarations creates shadow variables
in the core code. Aside of that the code relies on magic constants and
duplicate struct definitions with no way to ensure that these things stay
in sync. The section placement of the purgatory variables happened by
chance and not by design.

Unbreak kexec and cleanup the mess:

 - Add proper forward declarations and document the usage
 - Use common struct definition
 - Use the proper common defines instead of magic constants
 - Add a purgatory_ prefix to have a proper name space
 - Use ARRAY_SIZE() instead of a homebrewn reimplementation
 - Add proper sections to the purgatory variables [ From Mike ]

Fixes: 72042a8c7b01 ("x86/purgatory: Make functions and variables static")
Reported-by: Mike Galbraith <<efault@gmx.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Nicholas Mc Guire <der.herr@hofr.at>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: "Tobin C. Harding" <me@tobin.cc>
Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1703101315140.3681@nanos
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/powerpc/purgatory/trampoline.S | 12 +++++-----
 arch/x86/include/asm/purgatory.h    | 20 +++++++++++++++++
 arch/x86/kernel/machine_kexec_64.c  |  9 +++++---
 arch/x86/purgatory/purgatory.c      | 35 ++++++++++++++---------------
 arch/x86/purgatory/purgatory.h      |  8 -------
 arch/x86/purgatory/setup-x86_64.S   |  2 +-
 arch/x86/purgatory/sha256.h         |  1 -
 include/linux/purgatory.h           | 23 +++++++++++++++++++
 kernel/kexec_file.c                 |  8 +++----
 kernel/kexec_internal.h             |  6 +----
 10 files changed, 78 insertions(+), 46 deletions(-)
 create mode 100644 arch/x86/include/asm/purgatory.h
 delete mode 100644 arch/x86/purgatory/purgatory.h
 create mode 100644 include/linux/purgatory.h

diff --git a/arch/powerpc/purgatory/trampoline.S b/arch/powerpc/purgatory/trampoline.S
index f9760ccf403236..3696ea6c4826b9 100644
--- a/arch/powerpc/purgatory/trampoline.S
+++ b/arch/powerpc/purgatory/trampoline.S
@@ -116,13 +116,13 @@ dt_offset:
 
 	.data
 	.balign 8
-.globl sha256_digest
-sha256_digest:
+.globl purgatory_sha256_digest
+purgatory_sha256_digest:
 	.skip	32
-	.size sha256_digest, . - sha256_digest
+	.size purgatory_sha256_digest, . - purgatory_sha256_digest
 
 	.balign 8
-.globl sha_regions
-sha_regions:
+.globl purgatory_sha_regions
+purgatory_sha_regions:
 	.skip	8 * 2 * 16
-	.size sha_regions, . - sha_regions
+	.size purgatory_sha_regions, . - purgatory_sha_regions
diff --git a/arch/x86/include/asm/purgatory.h b/arch/x86/include/asm/purgatory.h
new file mode 100644
index 00000000000000..d7da2729903d72
--- /dev/null
+++ b/arch/x86/include/asm/purgatory.h
@@ -0,0 +1,20 @@
+#ifndef _ASM_X86_PURGATORY_H
+#define _ASM_X86_PURGATORY_H
+
+#ifndef __ASSEMBLY__
+#include <linux/purgatory.h>
+
+extern void purgatory(void);
+/*
+ * These forward declarations serve two purposes:
+ *
+ * 1) Make sparse happy when checking arch/purgatory
+ * 2) Document that these are required to be global so the symbol
+ *    lookup in kexec works
+ */
+extern unsigned long purgatory_backup_dest;
+extern unsigned long purgatory_backup_src;
+extern unsigned long purgatory_backup_sz;
+#endif	/* __ASSEMBLY__ */
+
+#endif /* _ASM_PURGATORY_H */
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 307b1f4543de4b..857cdbd0286757 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -194,19 +194,22 @@ static int arch_update_purgatory(struct kimage *image)
 
 	/* Setup copying of backup region */
 	if (image->type == KEXEC_TYPE_CRASH) {
-		ret = kexec_purgatory_get_set_symbol(image, "backup_dest",
+		ret = kexec_purgatory_get_set_symbol(image,
+				"purgatory_backup_dest",
 				&image->arch.backup_load_addr,
 				sizeof(image->arch.backup_load_addr), 0);
 		if (ret)
 			return ret;
 
-		ret = kexec_purgatory_get_set_symbol(image, "backup_src",
+		ret = kexec_purgatory_get_set_symbol(image,
+				"purgatory_backup_src",
 				&image->arch.backup_src_start,
 				sizeof(image->arch.backup_src_start), 0);
 		if (ret)
 			return ret;
 
-		ret = kexec_purgatory_get_set_symbol(image, "backup_sz",
+		ret = kexec_purgatory_get_set_symbol(image,
+				"purgatory_backup_sz",
 				&image->arch.backup_src_sz,
 				sizeof(image->arch.backup_src_sz), 0);
 		if (ret)
diff --git a/arch/x86/purgatory/purgatory.c b/arch/x86/purgatory/purgatory.c
index b6d5c8946e664a..470edad96bb956 100644
--- a/arch/x86/purgatory/purgatory.c
+++ b/arch/x86/purgatory/purgatory.c
@@ -10,22 +10,19 @@
  * Version 2.  See the file COPYING for more details.
  */
 
+#include <linux/bug.h>
+#include <asm/purgatory.h>
+
 #include "sha256.h"
-#include "purgatory.h"
 #include "../boot/string.h"
 
-struct sha_region {
-	unsigned long start;
-	unsigned long len;
-};
-
-static unsigned long backup_dest;
-static unsigned long backup_src;
-static unsigned long backup_sz;
+unsigned long purgatory_backup_dest __section(.kexec-purgatory);
+unsigned long purgatory_backup_src __section(.kexec-purgatory);
+unsigned long purgatory_backup_sz __section(.kexec-purgatory);
 
-static u8 sha256_digest[SHA256_DIGEST_SIZE] = { 0 };
+u8 purgatory_sha256_digest[SHA256_DIGEST_SIZE] __section(.kexec-purgatory);
 
-struct sha_region sha_regions[16] = {};
+struct kexec_sha_region purgatory_sha_regions[KEXEC_SEGMENT_MAX] __section(.kexec-purgatory);
 
 /*
  * On x86, second kernel requries first 640K of memory to boot. Copy
@@ -34,26 +31,28 @@ struct sha_region sha_regions[16] = {};
  */
 static int copy_backup_region(void)
 {
-	if (backup_dest)
-		memcpy((void *)backup_dest, (void *)backup_src, backup_sz);
-
+	if (purgatory_backup_dest) {
+		memcpy((void *)purgatory_backup_dest,
+		       (void *)purgatory_backup_src, purgatory_backup_sz);
+	}
 	return 0;
 }
 
 static int verify_sha256_digest(void)
 {
-	struct sha_region *ptr, *end;
+	struct kexec_sha_region *ptr, *end;
 	u8 digest[SHA256_DIGEST_SIZE];
 	struct sha256_state sctx;
 
 	sha256_init(&sctx);
-	end = &sha_regions[sizeof(sha_regions)/sizeof(sha_regions[0])];
-	for (ptr = sha_regions; ptr < end; ptr++)
+	end = purgatory_sha_regions + ARRAY_SIZE(purgatory_sha_regions);
+
+	for (ptr = purgatory_sha_regions; ptr < end; ptr++)
 		sha256_update(&sctx, (uint8_t *)(ptr->start), ptr->len);
 
 	sha256_final(&sctx, digest);
 
-	if (memcmp(digest, sha256_digest, sizeof(digest)))
+	if (memcmp(digest, purgatory_sha256_digest, sizeof(digest)))
 		return 1;
 
 	return 0;
diff --git a/arch/x86/purgatory/purgatory.h b/arch/x86/purgatory/purgatory.h
deleted file mode 100644
index e2e365a6c192a1..00000000000000
--- a/arch/x86/purgatory/purgatory.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef PURGATORY_H
-#define PURGATORY_H
-
-#ifndef __ASSEMBLY__
-extern void purgatory(void);
-#endif	/* __ASSEMBLY__ */
-
-#endif /* PURGATORY_H */
diff --git a/arch/x86/purgatory/setup-x86_64.S b/arch/x86/purgatory/setup-x86_64.S
index f90e9dfa90bb92..dfae9b9e60b5ba 100644
--- a/arch/x86/purgatory/setup-x86_64.S
+++ b/arch/x86/purgatory/setup-x86_64.S
@@ -9,7 +9,7 @@
  * This source code is licensed under the GNU General Public License,
  * Version 2.  See the file COPYING for more details.
  */
-#include "purgatory.h"
+#include <asm/purgatory.h>
 
 	.text
 	.globl purgatory_start
diff --git a/arch/x86/purgatory/sha256.h b/arch/x86/purgatory/sha256.h
index bd15a4127735e5..2867d9825a57e5 100644
--- a/arch/x86/purgatory/sha256.h
+++ b/arch/x86/purgatory/sha256.h
@@ -10,7 +10,6 @@
 #ifndef SHA256_H
 #define SHA256_H
 
-
 #include <linux/types.h>
 #include <crypto/sha.h>
 
diff --git a/include/linux/purgatory.h b/include/linux/purgatory.h
new file mode 100644
index 00000000000000..d60d4e27860960
--- /dev/null
+++ b/include/linux/purgatory.h
@@ -0,0 +1,23 @@
+#ifndef _LINUX_PURGATORY_H
+#define _LINUX_PURGATORY_H
+
+#include <linux/types.h>
+#include <crypto/sha.h>
+#include <uapi/linux/kexec.h>
+
+struct kexec_sha_region {
+	unsigned long start;
+	unsigned long len;
+};
+
+/*
+ * These forward declarations serve two purposes:
+ *
+ * 1) Make sparse happy when checking arch/purgatory
+ * 2) Document that these are required to be global so the symbol
+ *    lookup in kexec works
+ */
+extern struct kexec_sha_region purgatory_sha_regions[KEXEC_SEGMENT_MAX];
+extern u8 purgatory_sha256_digest[SHA256_DIGEST_SIZE];
+
+#endif
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index b56a558e406db6..b118735fea9da4 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -614,13 +614,13 @@ static int kexec_calculate_store_digests(struct kimage *image)
 		ret = crypto_shash_final(desc, digest);
 		if (ret)
 			goto out_free_digest;
-		ret = kexec_purgatory_get_set_symbol(image, "sha_regions",
-						sha_regions, sha_region_sz, 0);
+		ret = kexec_purgatory_get_set_symbol(image, "purgatory_sha_regions",
+						     sha_regions, sha_region_sz, 0);
 		if (ret)
 			goto out_free_digest;
 
-		ret = kexec_purgatory_get_set_symbol(image, "sha256_digest",
-						digest, SHA256_DIGEST_SIZE, 0);
+		ret = kexec_purgatory_get_set_symbol(image, "purgatory_sha256_digest",
+						     digest, SHA256_DIGEST_SIZE, 0);
 		if (ret)
 			goto out_free_digest;
 	}
diff --git a/kernel/kexec_internal.h b/kernel/kexec_internal.h
index 4cef7e4706b098..799a8a4521870a 100644
--- a/kernel/kexec_internal.h
+++ b/kernel/kexec_internal.h
@@ -15,11 +15,7 @@ int kimage_is_destination_range(struct kimage *image,
 extern struct mutex kexec_mutex;
 
 #ifdef CONFIG_KEXEC_FILE
-struct kexec_sha_region {
-	unsigned long start;
-	unsigned long len;
-};
-
+#include <linux/purgatory.h>
 void kimage_file_post_load_cleanup(struct kimage *image);
 #else /* CONFIG_KEXEC_FILE */
 static inline void kimage_file_post_load_cleanup(struct kimage *image) { }

From 3fb632e40d7667d8bedfabc28850ac06d5493f54 Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Fri, 10 Mar 2017 11:27:23 +0800
Subject: [PATCH 1146/1911] md: fix super_offset endianness in
 super_1_rdev_size_change

The sb->super_offset should be big-endian, but the rdev->sb_start is in
host byte order, so fix this by adding cpu_to_le64.

Signed-off-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/md.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index cd89ad3c3a0d2c..6e76d97a8fc38b 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1879,7 +1879,7 @@ super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
 	}
 	sb = page_address(rdev->sb_page);
 	sb->data_size = cpu_to_le64(num_sectors);
-	sb->super_offset = rdev->sb_start;
+	sb->super_offset = cpu_to_le64(rdev->sb_start);
 	sb->sb_csum = calc_sb_1_csum(sb);
 	do {
 		md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,

From 1345921393ba23b60d3fcf15933e699232ad25ae Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Fri, 10 Mar 2017 11:49:12 +0800
Subject: [PATCH 1147/1911] md: fix incorrect use of lexx_to_cpu in
 does_sb_need_changing

The sb->layout is of type __le32, so we shoud use le32_to_cpu.

Signed-off-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/md.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 6e76d97a8fc38b..f6ae1d67bcd02c 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -2287,7 +2287,7 @@ static bool does_sb_need_changing(struct mddev *mddev)
 	/* Check if any mddev parameters have changed */
 	if ((mddev->dev_sectors != le64_to_cpu(sb->size)) ||
 	    (mddev->reshape_position != le64_to_cpu(sb->reshape_position)) ||
-	    (mddev->layout != le64_to_cpu(sb->layout)) ||
+	    (mddev->layout != le32_to_cpu(sb->layout)) ||
 	    (mddev->raid_disks != le32_to_cpu(sb->raid_disks)) ||
 	    (mddev->chunk_sectors != le32_to_cpu(sb->chunksize)))
 		return true;

From c952cd4e949ab3d07287efc2e80246e03727d15d Mon Sep 17 00:00:00 2001
From: Kinglong Mee <kinglongmee@gmail.com>
Date: Fri, 10 Mar 2017 09:52:20 +0800
Subject: [PATCH 1148/1911] nfsd: map the ENOKEY to nfserr_perm for avoiding
 warning

Now that Ext4 and f2fs filesystems support encrypted directories and
files, attempts to access those files may return ENOKEY, resulting in
the following WARNING.

Map ENOKEY to nfserr_perm instead of nfserr_io.

[ 1295.411759] ------------[ cut here ]------------
[ 1295.411787] WARNING: CPU: 0 PID: 12786 at fs/nfsd/nfsproc.c:796 nfserrno+0x74/0x80 [nfsd]
[ 1295.411806] nfsd: non-standard errno: -126
[ 1295.411816] Modules linked in: nfsd nfs_acl auth_rpcgss nfsv4 nfs lockd fscache tun bridge stp llc fuse ip_set nfnetlink vmw_vsock_vmci_transport vsock snd_seq_midi snd_seq_midi_event coretemp crct10dif_pclmul crc32_generic crc32_pclmul snd_ens1371 gameport ghash_clmulni_intel snd_ac97_codec f2fs intel_rapl_perf ac97_bus snd_seq ppdev snd_pcm snd_rawmidi snd_timer vmw_balloon snd_seq_device snd joydev soundcore parport_pc parport nfit acpi_cpufreq tpm_tis vmw_vmci tpm_tis_core tpm shpchp i2c_piix4 grace sunrpc xfs libcrc32c vmwgfx drm_kms_helper ttm drm crc32c_intel e1000 mptspi scsi_transport_spi serio_raw mptscsih mptbase ata_generic pata_acpi fjes [last unloaded: nfs_acl]
[ 1295.412522] CPU: 0 PID: 12786 Comm: nfsd Tainted: G        W       4.11.0-rc1+ #521
[ 1295.412959] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 07/02/2015
[ 1295.413814] Call Trace:
[ 1295.414252]  dump_stack+0x63/0x86
[ 1295.414666]  __warn+0xcb/0xf0
[ 1295.415087]  warn_slowpath_fmt+0x5f/0x80
[ 1295.415502]  ? put_filp+0x42/0x50
[ 1295.415927]  nfserrno+0x74/0x80 [nfsd]
[ 1295.416339]  nfsd_open+0xd7/0x180 [nfsd]
[ 1295.416746]  nfs4_get_vfs_file+0x367/0x3c0 [nfsd]
[ 1295.417182]  ? security_inode_permission+0x41/0x60
[ 1295.417591]  nfsd4_process_open2+0x9b2/0x1200 [nfsd]
[ 1295.418007]  nfsd4_open+0x481/0x790 [nfsd]
[ 1295.418409]  nfsd4_proc_compound+0x395/0x680 [nfsd]
[ 1295.418812]  nfsd_dispatch+0xb8/0x1f0 [nfsd]
[ 1295.419233]  svc_process_common+0x4d9/0x830 [sunrpc]
[ 1295.419631]  svc_process+0xfe/0x1b0 [sunrpc]
[ 1295.420033]  nfsd+0xe9/0x150 [nfsd]
[ 1295.420420]  kthread+0x101/0x140
[ 1295.420802]  ? nfsd_destroy+0x60/0x60 [nfsd]
[ 1295.421199]  ? kthread_park+0x90/0x90
[ 1295.421598]  ret_from_fork+0x2c/0x40
[ 1295.421996] ---[ end trace 0d5a969cd7852e1f ]---

Signed-off-by: Kinglong Mee <kinglongmee@gmail.com>
Cc: stable@vger.kernel.org
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfsproc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index fa82b7707e8531..03a7e9da4da022 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -786,6 +786,7 @@ nfserrno (int errno)
 		{ nfserr_serverfault, -ESERVERFAULT },
 		{ nfserr_serverfault, -ENFILE },
 		{ nfserr_io, -EUCLEAN },
+		{ nfserr_perm, -ENOKEY },
 	};
 	int	i;
 

From abcb4dacb098a1baca746406a8775e9930f47f3f Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Fri, 10 Mar 2017 11:36:39 +1100
Subject: [PATCH 1149/1911] NFSD: further refinement of content of
 /proc/fs/nfsd/versions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Prior to
  e35659f1b03c ("NFSD: correctly range-check v4.x minor version when setting versions.")

v4.0 could not be disabled without disabling all NFSv4 protocols.
So the 'versions' file contained ±4 ±4.1 ±4.2.
Writing "-4" would disable all v4 completely.  Writing +4 would enabled those
minor versions that are currently enabled, either by default or otherwise.

After that commit, it was possible to disable v4.0 independently.  To
maximize backward compatibility with use cases which never disabled
v4.0, the "versions" file would never contain "+4.0" - that was implied
by "+4", unless explicitly negated by "-4.0".

This introduced an inconsistency in that it was possible to disable all
minor versions, but still have the major version advertised.
e.g. "-4.0 -4.1 -4.2 +4" would result in NFSv4 support being advertised,
but all attempts to use it rejected.

Commit
  d3635ff07e8c ("nfsd: fix configuration of supported minor versions")

and following removed this inconsistency. If all minor version were disabled,
the major would be disabled too.  If any minor was enabled, the major would be
disabled.
This patch also treated "+4" as equivalent to "+4.0" and "-4" as "-4.0".
A consequence of this is that writing "-4" would only disable 4.0.
This is a regression against the earlier behaviour, in a use case that rpc.nfsd
actually uses.
The command "rpc.nfsd -N 4" will write "+2 +3 -4" to the versions files.
Previously, that would disable v4 completely.  Now it will only disable v4.0.

Also "4.0" never appears in the "versions" file when read.
So if only v4.1 is available, the previous kernel would have reported
"+4 -4.0 +4.1 -4.2"  the current kernel reports "-4 +4.1 -4.2" which
could easily confuse.

This patch restores the implication that "+4" and "-4" apply more
globals and do not imply "4.0".
Specifically:
 writing "-4" will disable all 4.x minor versions.
 writing "+4" will enable all 4.1 minor version if none are currently enabled.
    rpc.nfsd will list minor versions before major versions, so
      rpc.nfsd -V 4.2 -N 4.1
    will write "-4.1 +4.2 +2 +3 +4"
    so it would be a regression for "+4" to enable always all versions.
 reading "-4" implies that no v4.x are enabled
 reading "+4" implies that some v4.x are enabled, and that v4.0 is enabled unless
 "-4.0" is also present.  All other minor versions will explicitly be listed.

Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfsctl.c | 43 +++++++++++++++++++++++++++++++++----------
 1 file changed, 33 insertions(+), 10 deletions(-)

diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 73e75ac905258c..8bf8f667a8cf2f 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -538,13 +538,21 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size)
 
 static ssize_t
 nfsd_print_version_support(char *buf, int remaining, const char *sep,
-		unsigned vers, unsigned minor)
+		unsigned vers, int minor)
 {
-	const char *format = (minor == 0) ? "%s%c%u" : "%s%c%u.%u";
+	const char *format = minor < 0 ? "%s%c%u" : "%s%c%u.%u";
 	bool supported = !!nfsd_vers(vers, NFSD_TEST);
 
-	if (vers == 4 && !nfsd_minorversion(minor, NFSD_TEST))
+	if (vers == 4 && minor >= 0 &&
+	    !nfsd_minorversion(minor, NFSD_TEST))
 		supported = false;
+	if (minor == 0 && supported)
+		/*
+		 * special case for backward compatability.
+		 * +4.0 is never reported, it is implied by
+		 * +4, unless -4.0 is present.
+		 */
+		return 0;
 	return snprintf(buf, remaining, format, sep,
 			supported ? '+' : '-', vers, minor);
 }
@@ -554,7 +562,6 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
 	char *mesg = buf;
 	char *vers, *minorp, sign;
 	int len, num, remaining;
-	unsigned minor;
 	ssize_t tlen = 0;
 	char *sep;
 	struct nfsd_net *nn = net_generic(netns(file), nfsd_net_id);
@@ -575,6 +582,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
 		if (len <= 0) return -EINVAL;
 		do {
 			enum vers_op cmd;
+			unsigned minor;
 			sign = *vers;
 			if (sign == '+' || sign == '-')
 				num = simple_strtol((vers+1), &minorp, 0);
@@ -585,8 +593,8 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
 					return -EINVAL;
 				if (kstrtouint(minorp+1, 0, &minor) < 0)
 					return -EINVAL;
-			} else
-				minor = 0;
+			}
+
 			cmd = sign == '-' ? NFSD_CLEAR : NFSD_SET;
 			switch(num) {
 			case 2:
@@ -594,8 +602,20 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
 				nfsd_vers(num, cmd);
 				break;
 			case 4:
-				if (nfsd_minorversion(minor, cmd) >= 0)
-					break;
+				if (*minorp == '.') {
+					if (nfsd_minorversion(minor, cmd) < 0)
+						return -EINVAL;
+				} else if ((cmd == NFSD_SET) != nfsd_vers(num, NFSD_TEST)) {
+					/*
+					 * Either we have +4 and no minors are enabled,
+					 * or we have -4 and at least one minor is enabled.
+					 * In either case, propagate 'cmd' to all minors.
+					 */
+					minor = 0;
+					while (nfsd_minorversion(minor, cmd) >= 0)
+						minor++;
+				}
+				break;
 			default:
 				return -EINVAL;
 			}
@@ -612,9 +632,11 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
 	sep = "";
 	remaining = SIMPLE_TRANSACTION_LIMIT;
 	for (num=2 ; num <= 4 ; num++) {
+		int minor;
 		if (!nfsd_vers(num, NFSD_AVAIL))
 			continue;
-		minor = 0;
+
+		minor = -1;
 		do {
 			len = nfsd_print_version_support(buf, remaining,
 					sep, num, minor);
@@ -624,7 +646,8 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
 			buf += len;
 			tlen += len;
 			minor++;
-			sep = " ";
+			if (len)
+				sep = " ";
 		} while (num == 4 && minor <= NFSD_SUPPORTED_MINOR_VERSION);
 	}
 out:

From 928c6fb3a9bfd6c5b287aa3465226add551c13c0 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Fri, 10 Mar 2017 11:36:39 +1100
Subject: [PATCH 1150/1911] NFSD: fix nfsd_minorversion(.., NFSD_AVAIL)

Current code will return 1 if the version is supported,
and -1 if it isn't.
This is confusing and inconsistent with the one place where this
is used.
So change to return 1 if it is supported, and zero if not.
i.e. an error is never returned.

Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfssvc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 786a4a2cb2d7a9..892137b1e33030 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -167,7 +167,8 @@ nfsd_adjust_nfsd_versions4(void)
 
 int nfsd_minorversion(u32 minorversion, enum vers_op change)
 {
-	if (minorversion > NFSD_SUPPORTED_MINOR_VERSION)
+	if (minorversion > NFSD_SUPPORTED_MINOR_VERSION &&
+	    change != NFSD_AVAIL)
 		return -1;
 	switch(change) {
 	case NFSD_SET:

From 800a938f0bf9130c8256116649c0cc5806bfb2fd Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Fri, 10 Mar 2017 11:36:39 +1100
Subject: [PATCH 1151/1911] NFSD: fix nfsd_reset_versions for NFSv4.

If you write "-2 -3 -4" to the "versions" file, it will
notice that no versions are enabled, and nfsd_reset_versions()
is called.
This enables all major versions, not no minor versions.
So we lose the invariant that NFSv4 is only advertised when
at least one minor is enabled.

Fix the code to explicitly enable minor versions for v4,
change it to use nfsd_vers() to test and set, and simplify
the code.

Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfssvc.c | 25 +++++++++++--------------
 1 file changed, 11 insertions(+), 14 deletions(-)

diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 892137b1e33030..31e1f959345715 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -416,23 +416,20 @@ static void nfsd_last_thread(struct svc_serv *serv, struct net *net)
 
 void nfsd_reset_versions(void)
 {
-	int found_one = 0;
 	int i;
 
-	for (i = NFSD_MINVERS; i < NFSD_NRVERS; i++) {
-		if (nfsd_program.pg_vers[i])
-			found_one = 1;
-	}
+	for (i = 0; i < NFSD_NRVERS; i++)
+		if (nfsd_vers(i, NFSD_TEST))
+			return;
 
-	if (!found_one) {
-		for (i = NFSD_MINVERS; i < NFSD_NRVERS; i++)
-			nfsd_program.pg_vers[i] = nfsd_version[i];
-#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
-		for (i = NFSD_ACL_MINVERS; i < NFSD_ACL_NRVERS; i++)
-			nfsd_acl_program.pg_vers[i] =
-				nfsd_acl_version[i];
-#endif
-	}
+	for (i = 0; i < NFSD_NRVERS; i++)
+		if (i != 4)
+			nfsd_vers(i, NFSD_SET);
+		else {
+			int minor = 0;
+			while (nfsd_minorversion(minor, NFSD_SET) >= 0)
+				minor++;
+		}
 }
 
 /*

From a4c2a13129f7c5bcf81704c06851601593303fd5 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Tue, 28 Feb 2017 17:14:41 -0800
Subject: [PATCH 1152/1911] Input: i8042 - add TUXEDO BU1406 (N24_25BU) to the
 nomux list

TUXEDO BU1406 does not implement active multiplexing mode properly,
and takes around 550 ms in i8042_set_mux_mode(). Given that the
device does not have external AUX port, there is no downside in
disabling the MUX mode.

Reported-by: Paul Menzel <pmenzel@molgen.mpg.de>
Suggested-by: Vojtech Pavlik <vojtech@suse.cz>
Reviewed-by: Marcos Paulo de Souza <marcos.souza.org@gmail.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/serio/i8042-x86ia64io.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h
index e856b073d53367..312bd6ca919806 100644
--- a/drivers/input/serio/i8042-x86ia64io.h
+++ b/drivers/input/serio/i8042-x86ia64io.h
@@ -520,6 +520,13 @@ static const struct dmi_system_id __initconst i8042_dmi_nomux_table[] = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "IC4I"),
 		},
 	},
+	{
+		/* TUXEDO BU1406 */
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Notebook"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "N24_25BU"),
+		},
+	},
 	{ }
 };
 

From 92ef6f97a66e580189a41a132d0f8a9f78d6ddce Mon Sep 17 00:00:00 2001
From: Matjaz Hegedic <matjaz.hegedic@gmail.com>
Date: Fri, 10 Mar 2017 14:33:09 -0800
Subject: [PATCH 1153/1911] Input: elan_i2c - add ASUS EeeBook X205TA special
 touchpad fw

EeeBook X205TA is yet another ASUS device with a special touchpad
firmware that needs to be accounted for during initialization, or
else the touchpad will go into an invalid state upon suspend/resume.
Adding the appropriate ic_type and product_id check fixes the problem.

Signed-off-by: Matjaz Hegedic <matjaz.hegedic@gmail.com>
Acked-by: KT Liao <kt.liao@emc.com.tw>
Cc: stable@vger.kernel.org
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/mouse/elan_i2c_core.c | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c
index 352050e9031dc3..d5ab9ddef3e37e 100644
--- a/drivers/input/mouse/elan_i2c_core.c
+++ b/drivers/input/mouse/elan_i2c_core.c
@@ -218,17 +218,19 @@ static int elan_query_product(struct elan_tp_data *data)
 
 static int elan_check_ASUS_special_fw(struct elan_tp_data *data)
 {
-	if (data->ic_type != 0x0E)
-		return false;
-
-	switch (data->product_id) {
-	case 0x05 ... 0x07:
-	case 0x09:
-	case 0x13:
+	if (data->ic_type == 0x0E) {
+		switch (data->product_id) {
+		case 0x05 ... 0x07:
+		case 0x09:
+		case 0x13:
+			return true;
+		}
+	} else if (data->ic_type == 0x08 && data->product_id == 0x26) {
+		/* ASUS EeeBook X205TA */
 		return true;
-	default:
-		return false;
 	}
+
+	return false;
 }
 
 static int __elan_initialize(struct elan_tp_data *data)

From 0134ed4fb9e78672ee9f7b18007114404c81e63f Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Fri, 10 Mar 2017 13:24:22 -0700
Subject: [PATCH 1154/1911] device-dax: fix pmd/pte fault fallback handling

Jeff Moyer reports:

    With a device dax alignment of 4KB or 2MB, I get sigbus when running
    the attached fio job file for the current kernel (4.11.0-rc1+).  If
    I specify an alignment of 1GB, it works.

    I turned on debug output, and saw that it was failing in the huge
    fault code.

     dax dax1.0: dax_open
     dax dax1.0: dax_mmap
     dax dax1.0: dax_dev_huge_fault: fio: write (0x7f08f0a00000 -
     dax dax1.0: __dax_dev_pud_fault: phys_to_pgoff(0xffffffffcf60
     dax dax1.0: dax_release

    fio config for reproduce:
    [global]
    ioengine=dev-dax
    direct=0
    filename=/dev/dax0.0
    bs=2m

    [write]
    rw=write

    [read]
    stonewall
    rw=read

The driver fails to fallback when taking a fault that is larger than
the device alignment, or handling a larger fault when a smaller
mapping is already established. While we could support larger
mappings for a device with a smaller alignment, that change is
too large for the immediate fix. The simplest change is to force
fallback until the fault size matches the alignment.

Fixes: dee410792419 ("/dev/dax, core: file operations and dax-mmap")
Cc: <stable@vger.kernel.org>
Reported-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dax/dax.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/drivers/dax/dax.c b/drivers/dax/dax.c
index 8d9829ff2a784d..a284dc532e4645 100644
--- a/drivers/dax/dax.c
+++ b/drivers/dax/dax.c
@@ -427,6 +427,7 @@ static int __dax_dev_pte_fault(struct dax_dev *dax_dev, struct vm_fault *vmf)
 	int rc = VM_FAULT_SIGBUS;
 	phys_addr_t phys;
 	pfn_t pfn;
+	unsigned int fault_size = PAGE_SIZE;
 
 	if (check_vma(dax_dev, vmf->vma, __func__))
 		return VM_FAULT_SIGBUS;
@@ -437,6 +438,9 @@ static int __dax_dev_pte_fault(struct dax_dev *dax_dev, struct vm_fault *vmf)
 		return VM_FAULT_SIGBUS;
 	}
 
+	if (fault_size != dax_region->align)
+		return VM_FAULT_SIGBUS;
+
 	phys = pgoff_to_phys(dax_dev, vmf->pgoff, PAGE_SIZE);
 	if (phys == -1) {
 		dev_dbg(dev, "%s: phys_to_pgoff(%#lx) failed\n", __func__,
@@ -464,6 +468,7 @@ static int __dax_dev_pmd_fault(struct dax_dev *dax_dev, struct vm_fault *vmf)
 	phys_addr_t phys;
 	pgoff_t pgoff;
 	pfn_t pfn;
+	unsigned int fault_size = PMD_SIZE;
 
 	if (check_vma(dax_dev, vmf->vma, __func__))
 		return VM_FAULT_SIGBUS;
@@ -480,6 +485,16 @@ static int __dax_dev_pmd_fault(struct dax_dev *dax_dev, struct vm_fault *vmf)
 		return VM_FAULT_SIGBUS;
 	}
 
+	if (fault_size < dax_region->align)
+		return VM_FAULT_SIGBUS;
+	else if (fault_size > dax_region->align)
+		return VM_FAULT_FALLBACK;
+
+	/* if we are outside of the VMA */
+	if (pmd_addr < vmf->vma->vm_start ||
+			(pmd_addr + PMD_SIZE) > vmf->vma->vm_end)
+		return VM_FAULT_SIGBUS;
+
 	pgoff = linear_page_index(vmf->vma, pmd_addr);
 	phys = pgoff_to_phys(dax_dev, pgoff, PMD_SIZE);
 	if (phys == -1) {

From 4607ebf04b8190d2c7aa5504959e9fddfc0cd736 Mon Sep 17 00:00:00 2001
From: "Allan, Bruce W" <bruce.w.allan@intel.com>
Date: Tue, 7 Mar 2017 15:48:23 -0800
Subject: [PATCH 1155/1911] kbuild: external module build warnings when
 KBUILD_OUTPUT set and W=1

Commit db547ef19064 ("Kbuild: don't add obj tree in additional includes")
causes warnings (-Wmissing-include-dirs) when compiling external modules
with KBUILD_OUTPUT set and W=1.  This is because $src can be an absolute
path to the external module source which when prefixed with -I$(srctree)/
generates an incorrect directory path.

Signed-off-by: Bruce Allan <bruce.w.allan@intel.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 scripts/Makefile.lib | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index 0a07f9014944ed..7234e61e7ce370 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -155,7 +155,7 @@ else
 # $(call addtree,-I$(obj)) locates .h files in srctree, from generated .c files
 #   and locates generated .h files
 # FIXME: Replace both with specific CFLAGS* statements in the makefiles
-__c_flags	= $(if $(obj),-I$(srctree)/$(src) -I$(obj)) \
+__c_flags	= $(if $(obj),$(call addtree,-I$(src)) -I$(obj)) \
 		  $(call flags,_c_flags)
 __a_flags	= $(call flags,_a_flags)
 __cpp_flags     = $(call flags,_cpp_flags)

From 70b085b06c4560a69e95607f77bb4c2b2e41943c Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Fri, 10 Mar 2017 13:24:27 -0700
Subject: [PATCH 1156/1911] device-dax: fix pud fault fallback handling

Jeff Moyer reports:

    With a device dax alignment of 4KB or 2MB, I get sigbus when running
    the attached fio job file for the current kernel (4.11.0-rc1+).  If
    I specify an alignment of 1GB, it works.

    I turned on debug output, and saw that it was failing in the huge
    fault code.

     dax dax1.0: dax_open
     dax dax1.0: dax_mmap
     dax dax1.0: dax_dev_huge_fault: fio: write (0x7f08f0a00000 -
     dax dax1.0: __dax_dev_pud_fault: phys_to_pgoff(0xffffffffcf60)
     dax dax1.0: dax_release

    fio config for reproduce:
    [global]
    ioengine=dev-dax
    direct=0
    filename=/dev/dax0.0
    bs=2m

    [write]
    rw=write

    [read]
    stonewall
    rw=read

The driver fails to fallback when taking a fault that is larger than
the device alignment, or handling a larger fault when a smaller
mapping is already established. While we could support larger
mappings for a device with a smaller alignment, that change is
too large for the immediate fix. The simplest change is to force
fallback until the fault size matches the alignment.

Reported-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dax/dax.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/dax/dax.c b/drivers/dax/dax.c
index a284dc532e4645..523fecec7bdaec 100644
--- a/drivers/dax/dax.c
+++ b/drivers/dax/dax.c
@@ -518,6 +518,8 @@ static int __dax_dev_pud_fault(struct dax_dev *dax_dev, struct vm_fault *vmf)
 	phys_addr_t phys;
 	pgoff_t pgoff;
 	pfn_t pfn;
+	unsigned int fault_size = PUD_SIZE;
+
 
 	if (check_vma(dax_dev, vmf->vma, __func__))
 		return VM_FAULT_SIGBUS;
@@ -534,6 +536,16 @@ static int __dax_dev_pud_fault(struct dax_dev *dax_dev, struct vm_fault *vmf)
 		return VM_FAULT_SIGBUS;
 	}
 
+	if (fault_size < dax_region->align)
+		return VM_FAULT_SIGBUS;
+	else if (fault_size > dax_region->align)
+		return VM_FAULT_FALLBACK;
+
+	/* if we are outside of the VMA */
+	if (pud_addr < vmf->vma->vm_start ||
+			(pud_addr + PUD_SIZE) > vmf->vma->vm_end)
+		return VM_FAULT_SIGBUS;
+
 	pgoff = linear_page_index(vmf->vma, pud_addr);
 	phys = pgoff_to_phys(dax_dev, pgoff, PUD_SIZE);
 	if (phys == -1) {

From 52084f89b38cdd896b59627c629915ef1a7bf615 Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Thu, 9 Mar 2017 16:56:01 -0700
Subject: [PATCH 1157/1911] device-dax: fix debug output typo

The debug output for return the return data of pgoff_to_phys() in the
fault handlers has 'phys' and 'pgoff' incorrectly swapped.

Reported-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dax/dax.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/dax/dax.c b/drivers/dax/dax.c
index 523fecec7bdaec..80c6db279ae10c 100644
--- a/drivers/dax/dax.c
+++ b/drivers/dax/dax.c
@@ -443,7 +443,7 @@ static int __dax_dev_pte_fault(struct dax_dev *dax_dev, struct vm_fault *vmf)
 
 	phys = pgoff_to_phys(dax_dev, vmf->pgoff, PAGE_SIZE);
 	if (phys == -1) {
-		dev_dbg(dev, "%s: phys_to_pgoff(%#lx) failed\n", __func__,
+		dev_dbg(dev, "%s: pgoff_to_phys(%#lx) failed\n", __func__,
 				vmf->pgoff);
 		return VM_FAULT_SIGBUS;
 	}
@@ -498,7 +498,7 @@ static int __dax_dev_pmd_fault(struct dax_dev *dax_dev, struct vm_fault *vmf)
 	pgoff = linear_page_index(vmf->vma, pmd_addr);
 	phys = pgoff_to_phys(dax_dev, pgoff, PMD_SIZE);
 	if (phys == -1) {
-		dev_dbg(dev, "%s: phys_to_pgoff(%#lx) failed\n", __func__,
+		dev_dbg(dev, "%s: pgoff_to_phys(%#lx) failed\n", __func__,
 				pgoff);
 		return VM_FAULT_SIGBUS;
 	}
@@ -549,7 +549,7 @@ static int __dax_dev_pud_fault(struct dax_dev *dax_dev, struct vm_fault *vmf)
 	pgoff = linear_page_index(vmf->vma, pud_addr);
 	phys = pgoff_to_phys(dax_dev, pgoff, PUD_SIZE);
 	if (phys == -1) {
-		dev_dbg(dev, "%s: phys_to_pgoff(%#lx) failed\n", __func__,
+		dev_dbg(dev, "%s: pgoff_to_phys(%#lx) failed\n", __func__,
 				pgoff);
 		return VM_FAULT_SIGBUS;
 	}

From c962cff17dfa11f4a8227ac16de2b28aea3312e4 Mon Sep 17 00:00:00 2001
From: Dou Liyang <douly.fnst@cn.fujitsu.com>
Date: Fri, 3 Mar 2017 16:02:23 +0800
Subject: [PATCH 1158/1911] Revert "x86/acpi: Set persistent cpuid <-> nodeid
 mapping when booting"

Revert: dc6db24d2476 ("x86/acpi: Set persistent cpuid <-> nodeid mapping when booting")

The mapping of "cpuid <-> nodeid" is established at boot time via ACPI
tables to keep associations of workqueues and other node related items
consistent across cpu hotplug.

But, ACPI tables are unreliable and failures with that boot time mapping
have been reported on machines where the ACPI table and the physical
information which is retrieved at actual hotplug is inconsistent.

Revert the mapping implementation so it can be replaced with a less error
prone approach.

Signed-off-by: Dou Liyang <douly.fnst@cn.fujitsu.com>
Tested-by: Xiaolong Ye <xiaolong.ye@intel.com>
Cc: rjw@rjwysocki.net
Cc: linux-acpi@vger.kernel.org
Cc: guzheng1@huawei.com
Cc: izumi.taku@jp.fujitsu.com
Cc: lenb@kernel.org
Link: http://lkml.kernel.org/r/1488528147-2279-2-git-send-email-douly.fnst@cn.fujitsu.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/acpi/boot.c   |  2 +-
 drivers/acpi/acpi_processor.c |  5 ---
 drivers/acpi/bus.c            |  1 -
 drivers/acpi/processor_core.c | 73 -----------------------------------
 include/linux/acpi.h          |  3 --
 5 files changed, 1 insertion(+), 83 deletions(-)

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index ae32838cac5fd2..f6b0e87d2388a8 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -710,7 +710,7 @@ static void __init acpi_set_irq_model_ioapic(void)
 #ifdef CONFIG_ACPI_HOTPLUG_CPU
 #include <acpi/processor.h>
 
-int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
+static int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
 {
 #ifdef CONFIG_ACPI_NUMA
 	int nid;
diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c
index 4467a8089ab890..5d208a99d0c98d 100644
--- a/drivers/acpi/acpi_processor.c
+++ b/drivers/acpi/acpi_processor.c
@@ -182,11 +182,6 @@ int __weak arch_register_cpu(int cpu)
 
 void __weak arch_unregister_cpu(int cpu) {}
 
-int __weak acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
-{
-	return -ENODEV;
-}
-
 static int acpi_processor_hotadd_init(struct acpi_processor *pr)
 {
 	unsigned long long sta;
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 80cb5eb75b633d..34fbe027e73a26 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -1249,7 +1249,6 @@ static int __init acpi_init(void)
 	acpi_wakeup_device_init();
 	acpi_debugger_init();
 	acpi_setup_sb_notify_handler();
-	acpi_set_processor_mapping();
 	return 0;
 }
 
diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c
index 611a5585a9024a..a8438620465943 100644
--- a/drivers/acpi/processor_core.c
+++ b/drivers/acpi/processor_core.c
@@ -278,79 +278,6 @@ int acpi_get_cpuid(acpi_handle handle, int type, u32 acpi_id)
 }
 EXPORT_SYMBOL_GPL(acpi_get_cpuid);
 
-#ifdef CONFIG_ACPI_HOTPLUG_CPU
-static bool __init
-map_processor(acpi_handle handle, phys_cpuid_t *phys_id, int *cpuid)
-{
-	int type, id;
-	u32 acpi_id;
-	acpi_status status;
-	acpi_object_type acpi_type;
-	unsigned long long tmp;
-	union acpi_object object = { 0 };
-	struct acpi_buffer buffer = { sizeof(union acpi_object), &object };
-
-	status = acpi_get_type(handle, &acpi_type);
-	if (ACPI_FAILURE(status))
-		return false;
-
-	switch (acpi_type) {
-	case ACPI_TYPE_PROCESSOR:
-		status = acpi_evaluate_object(handle, NULL, NULL, &buffer);
-		if (ACPI_FAILURE(status))
-			return false;
-		acpi_id = object.processor.proc_id;
-
-		/* validate the acpi_id */
-		if(acpi_processor_validate_proc_id(acpi_id))
-			return false;
-		break;
-	case ACPI_TYPE_DEVICE:
-		status = acpi_evaluate_integer(handle, "_UID", NULL, &tmp);
-		if (ACPI_FAILURE(status))
-			return false;
-		acpi_id = tmp;
-		break;
-	default:
-		return false;
-	}
-
-	type = (acpi_type == ACPI_TYPE_DEVICE) ? 1 : 0;
-
-	*phys_id = __acpi_get_phys_id(handle, type, acpi_id, false);
-	id = acpi_map_cpuid(*phys_id, acpi_id);
-
-	if (id < 0)
-		return false;
-	*cpuid = id;
-	return true;
-}
-
-static acpi_status __init
-set_processor_node_mapping(acpi_handle handle, u32 lvl, void *context,
-			   void **rv)
-{
-	phys_cpuid_t phys_id;
-	int cpu_id;
-
-	if (!map_processor(handle, &phys_id, &cpu_id))
-		return AE_ERROR;
-
-	acpi_map_cpu2node(handle, cpu_id, phys_id);
-	return AE_OK;
-}
-
-void __init acpi_set_processor_mapping(void)
-{
-	/* Set persistent cpu <-> node mapping for all processors. */
-	acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT,
-			    ACPI_UINT32_MAX, set_processor_node_mapping,
-			    NULL, NULL, NULL);
-}
-#else
-void __init acpi_set_processor_mapping(void) {}
-#endif /* CONFIG_ACPI_HOTPLUG_CPU */
-
 #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC
 static int get_ioapic_id(struct acpi_subtable_header *entry, u32 gsi_base,
 			 u64 *phys_addr, int *ioapic_id)
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 673acda012af44..63a7519b00cc08 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -294,11 +294,8 @@ bool acpi_processor_validate_proc_id(int proc_id);
 int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 acpi_id,
 		 int *pcpu);
 int acpi_unmap_cpu(int cpu);
-int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid);
 #endif /* CONFIG_ACPI_HOTPLUG_CPU */
 
-void acpi_set_processor_mapping(void);
-
 #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC
 int acpi_get_ioapic_id(acpi_handle handle, u32 gsi_base, u64 *phys_addr);
 #endif

From 09c3f2bd5c7e5f18687663acb6adc6b167484ca5 Mon Sep 17 00:00:00 2001
From: Dou Liyang <douly.fnst@cn.fujitsu.com>
Date: Fri, 3 Mar 2017 16:02:24 +0800
Subject: [PATCH 1159/1911] Revert"x86/acpi: Enable MADT APIs to return
 disabled apicids"

Revert: 8ad893faf2ea ("x86/acpi: Enable MADT APIs to return disabled apicids")

Remove the leftovers of the boot time 'cpuid <-> nodeid' mapping approach.

Signed-off-by: Dou Liyang <douly.fnst@cn.fujitsu.com>
Tested-by: Xiaolong Ye <xiaolong.ye@intel.com>
Cc: rjw@rjwysocki.net
Cc: linux-acpi@vger.kernel.org
Cc: guzheng1@huawei.com
Cc: izumi.taku@jp.fujitsu.com
Cc: lenb@kernel.org
Link: http://lkml.kernel.org/r/1488528147-2279-3-git-send-email-douly.fnst@cn.fujitsu.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/acpi/processor_core.c | 60 +++++++++++++----------------------
 1 file changed, 22 insertions(+), 38 deletions(-)

diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c
index a8438620465943..b933061b6b607c 100644
--- a/drivers/acpi/processor_core.c
+++ b/drivers/acpi/processor_core.c
@@ -32,12 +32,12 @@ static struct acpi_table_madt *get_madt_table(void)
 }
 
 static int map_lapic_id(struct acpi_subtable_header *entry,
-		 u32 acpi_id, phys_cpuid_t *apic_id, bool ignore_disabled)
+		 u32 acpi_id, phys_cpuid_t *apic_id)
 {
 	struct acpi_madt_local_apic *lapic =
 		container_of(entry, struct acpi_madt_local_apic, header);
 
-	if (ignore_disabled && !(lapic->lapic_flags & ACPI_MADT_ENABLED))
+	if (!(lapic->lapic_flags & ACPI_MADT_ENABLED))
 		return -ENODEV;
 
 	if (lapic->processor_id != acpi_id)
@@ -48,13 +48,12 @@ static int map_lapic_id(struct acpi_subtable_header *entry,
 }
 
 static int map_x2apic_id(struct acpi_subtable_header *entry,
-		int device_declaration, u32 acpi_id, phys_cpuid_t *apic_id,
-		bool ignore_disabled)
+		int device_declaration, u32 acpi_id, phys_cpuid_t *apic_id)
 {
 	struct acpi_madt_local_x2apic *apic =
 		container_of(entry, struct acpi_madt_local_x2apic, header);
 
-	if (ignore_disabled && !(apic->lapic_flags & ACPI_MADT_ENABLED))
+	if (!(apic->lapic_flags & ACPI_MADT_ENABLED))
 		return -ENODEV;
 
 	if (device_declaration && (apic->uid == acpi_id)) {
@@ -66,13 +65,12 @@ static int map_x2apic_id(struct acpi_subtable_header *entry,
 }
 
 static int map_lsapic_id(struct acpi_subtable_header *entry,
-		int device_declaration, u32 acpi_id, phys_cpuid_t *apic_id,
-		bool ignore_disabled)
+		int device_declaration, u32 acpi_id, phys_cpuid_t *apic_id)
 {
 	struct acpi_madt_local_sapic *lsapic =
 		container_of(entry, struct acpi_madt_local_sapic, header);
 
-	if (ignore_disabled && !(lsapic->lapic_flags & ACPI_MADT_ENABLED))
+	if (!(lsapic->lapic_flags & ACPI_MADT_ENABLED))
 		return -ENODEV;
 
 	if (device_declaration) {
@@ -89,13 +87,12 @@ static int map_lsapic_id(struct acpi_subtable_header *entry,
  * Retrieve the ARM CPU physical identifier (MPIDR)
  */
 static int map_gicc_mpidr(struct acpi_subtable_header *entry,
-		int device_declaration, u32 acpi_id, phys_cpuid_t *mpidr,
-		bool ignore_disabled)
+		int device_declaration, u32 acpi_id, phys_cpuid_t *mpidr)
 {
 	struct acpi_madt_generic_interrupt *gicc =
 	    container_of(entry, struct acpi_madt_generic_interrupt, header);
 
-	if (ignore_disabled && !(gicc->flags & ACPI_MADT_ENABLED))
+	if (!(gicc->flags & ACPI_MADT_ENABLED))
 		return -ENODEV;
 
 	/* device_declaration means Device object in DSDT, in the
@@ -112,7 +109,7 @@ static int map_gicc_mpidr(struct acpi_subtable_header *entry,
 }
 
 static phys_cpuid_t map_madt_entry(struct acpi_table_madt *madt,
-				   int type, u32 acpi_id, bool ignore_disabled)
+				   int type, u32 acpi_id)
 {
 	unsigned long madt_end, entry;
 	phys_cpuid_t phys_id = PHYS_CPUID_INVALID;	/* CPU hardware ID */
@@ -130,20 +127,16 @@ static phys_cpuid_t map_madt_entry(struct acpi_table_madt *madt,
 		struct acpi_subtable_header *header =
 			(struct acpi_subtable_header *)entry;
 		if (header->type == ACPI_MADT_TYPE_LOCAL_APIC) {
-			if (!map_lapic_id(header, acpi_id, &phys_id,
-					  ignore_disabled))
+			if (!map_lapic_id(header, acpi_id, &phys_id))
 				break;
 		} else if (header->type == ACPI_MADT_TYPE_LOCAL_X2APIC) {
-			if (!map_x2apic_id(header, type, acpi_id, &phys_id,
-					   ignore_disabled))
+			if (!map_x2apic_id(header, type, acpi_id, &phys_id))
 				break;
 		} else if (header->type == ACPI_MADT_TYPE_LOCAL_SAPIC) {
-			if (!map_lsapic_id(header, type, acpi_id, &phys_id,
-					   ignore_disabled))
+			if (!map_lsapic_id(header, type, acpi_id, &phys_id))
 				break;
 		} else if (header->type == ACPI_MADT_TYPE_GENERIC_INTERRUPT) {
-			if (!map_gicc_mpidr(header, type, acpi_id, &phys_id,
-					    ignore_disabled))
+			if (!map_gicc_mpidr(header, type, acpi_id, &phys_id))
 				break;
 		}
 		entry += header->length;
@@ -161,15 +154,14 @@ phys_cpuid_t __init acpi_map_madt_entry(u32 acpi_id)
 	if (!madt)
 		return PHYS_CPUID_INVALID;
 
-	rv = map_madt_entry(madt, 1, acpi_id, true);
+	rv = map_madt_entry(madt, 1, acpi_id);
 
 	acpi_put_table((struct acpi_table_header *)madt);
 
 	return rv;
 }
 
-static phys_cpuid_t map_mat_entry(acpi_handle handle, int type, u32 acpi_id,
-				  bool ignore_disabled)
+static phys_cpuid_t map_mat_entry(acpi_handle handle, int type, u32 acpi_id)
 {
 	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
 	union acpi_object *obj;
@@ -190,38 +182,30 @@ static phys_cpuid_t map_mat_entry(acpi_handle handle, int type, u32 acpi_id,
 
 	header = (struct acpi_subtable_header *)obj->buffer.pointer;
 	if (header->type == ACPI_MADT_TYPE_LOCAL_APIC)
-		map_lapic_id(header, acpi_id, &phys_id, ignore_disabled);
+		map_lapic_id(header, acpi_id, &phys_id);
 	else if (header->type == ACPI_MADT_TYPE_LOCAL_SAPIC)
-		map_lsapic_id(header, type, acpi_id, &phys_id, ignore_disabled);
+		map_lsapic_id(header, type, acpi_id, &phys_id);
 	else if (header->type == ACPI_MADT_TYPE_LOCAL_X2APIC)
-		map_x2apic_id(header, type, acpi_id, &phys_id, ignore_disabled);
+		map_x2apic_id(header, type, acpi_id, &phys_id);
 	else if (header->type == ACPI_MADT_TYPE_GENERIC_INTERRUPT)
-		map_gicc_mpidr(header, type, acpi_id, &phys_id,
-			       ignore_disabled);
+		map_gicc_mpidr(header, type, acpi_id, &phys_id);
 
 exit:
 	kfree(buffer.pointer);
 	return phys_id;
 }
 
-static phys_cpuid_t __acpi_get_phys_id(acpi_handle handle, int type,
-				       u32 acpi_id, bool ignore_disabled)
+phys_cpuid_t acpi_get_phys_id(acpi_handle handle, int type, u32 acpi_id)
 {
 	phys_cpuid_t phys_id;
 
-	phys_id = map_mat_entry(handle, type, acpi_id, ignore_disabled);
+	phys_id = map_mat_entry(handle, type, acpi_id);
 	if (invalid_phys_cpuid(phys_id))
-		phys_id = map_madt_entry(get_madt_table(), type, acpi_id,
-					   ignore_disabled);
+		phys_id = map_madt_entry(get_madt_table(), type, acpi_id);
 
 	return phys_id;
 }
 
-phys_cpuid_t acpi_get_phys_id(acpi_handle handle, int type, u32 acpi_id)
-{
-	return __acpi_get_phys_id(handle, type, acpi_id, true);
-}
-
 int acpi_map_cpuid(phys_cpuid_t phys_id, u32 acpi_id)
 {
 #ifdef CONFIG_SMP

From 2b85b3d22920db7473e5fed5719e7955c0ec323e Mon Sep 17 00:00:00 2001
From: Dou Liyang <douly.fnst@cn.fujitsu.com>
Date: Fri, 3 Mar 2017 16:02:25 +0800
Subject: [PATCH 1160/1911] x86/acpi: Restore the order of CPU IDs

The following commits:

  f7c28833c2 ("x86/acpi: Enable acpi to register all possible cpus at
boot time") and 8f54969dc8 ("x86/acpi: Introduce persistent storage
for cpuid <-> apicid mapping")

... registered all the possible CPUs at boot time via ACPI tables to
make the mapping of cpuid <-> apicid fixed. Both enabled and disabled
CPUs could have a logical CPU ID after boot time.

But, ACPI tables are unreliable. the number amd order of Local APIC
entries which depends on the firmware is often inconsistent with the
physical devices. Even if they are consistent, The disabled CPUs which
take up some logical CPU IDs will also make the order discontinuous.

Revert the part of disabled CPUs registration, keep the allocation
logic of logical CPU IDs and also keep some code location changes.

Signed-off-by: Dou Liyang <douly.fnst@cn.fujitsu.com>
Tested-by: Xiaolong Ye <xiaolong.ye@intel.com>
Cc: rjw@rjwysocki.net
Cc: linux-acpi@vger.kernel.org
Cc: guzheng1@huawei.com
Cc: izumi.taku@jp.fujitsu.com
Cc: lenb@kernel.org
Link: http://lkml.kernel.org/r/1488528147-2279-4-git-send-email-douly.fnst@cn.fujitsu.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/acpi/boot.c |  7 ++++++-
 arch/x86/kernel/apic/apic.c | 26 +++++++-------------------
 2 files changed, 13 insertions(+), 20 deletions(-)

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index f6b0e87d2388a8..b2879cc23db470 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -179,10 +179,15 @@ static int acpi_register_lapic(int id, u32 acpiid, u8 enabled)
 		return -EINVAL;
 	}
 
+	if (!enabled) {
+		++disabled_cpus;
+		return -EINVAL;
+	}
+
 	if (boot_cpu_physical_apicid != -1U)
 		ver = boot_cpu_apic_version;
 
-	cpu = __generic_processor_info(id, ver, enabled);
+	cpu = generic_processor_info(id, ver);
 	if (cpu >= 0)
 		early_per_cpu(x86_cpu_to_acpiid, cpu) = acpiid;
 
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index aee7deddabd089..8ccb7ef512e05d 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2063,7 +2063,7 @@ static int allocate_logical_cpuid(int apicid)
 	return nr_logical_cpuids++;
 }
 
-int __generic_processor_info(int apicid, int version, bool enabled)
+int generic_processor_info(int apicid, int version)
 {
 	int cpu, max = nr_cpu_ids;
 	bool boot_cpu_detected = physid_isset(boot_cpu_physical_apicid,
@@ -2121,11 +2121,9 @@ int __generic_processor_info(int apicid, int version, bool enabled)
 	if (num_processors >= nr_cpu_ids) {
 		int thiscpu = max + disabled_cpus;
 
-		if (enabled) {
-			pr_warning("APIC: NR_CPUS/possible_cpus limit of %i "
-				   "reached. Processor %d/0x%x ignored.\n",
-				   max, thiscpu, apicid);
-		}
+		pr_warning("APIC: NR_CPUS/possible_cpus limit of %i "
+			   "reached. Processor %d/0x%x ignored.\n",
+			   max, thiscpu, apicid);
 
 		disabled_cpus++;
 		return -EINVAL;
@@ -2177,23 +2175,13 @@ int __generic_processor_info(int apicid, int version, bool enabled)
 		apic->x86_32_early_logical_apicid(cpu);
 #endif
 	set_cpu_possible(cpu, true);
-
-	if (enabled) {
-		num_processors++;
-		physid_set(apicid, phys_cpu_present_map);
-		set_cpu_present(cpu, true);
-	} else {
-		disabled_cpus++;
-	}
+	physid_set(apicid, phys_cpu_present_map);
+	set_cpu_present(cpu, true);
+	num_processors++;
 
 	return cpu;
 }
 
-int generic_processor_info(int apicid, int version)
-{
-	return __generic_processor_info(apicid, version, true);
-}
-
 int hard_smp_processor_id(void)
 {
 	return read_apic_id();

From 8c8cb30f49b86333d8e036e1945cf1a78c03577e Mon Sep 17 00:00:00 2001
From: Dou Liyang <douly.fnst@cn.fujitsu.com>
Date: Fri, 3 Mar 2017 16:02:26 +0800
Subject: [PATCH 1161/1911] acpi/processor: Implement DEVICE operator for
 processor enumeration

ACPI allows to declare processors either with the PROCESSOR or with the
DEVICE operator. The current implementation handles only the PROCESSOR
operator.

On a system which uses the DEVICE operator for processor enumeration the
evaluation fails.

Check for the ACPI type of the ACPI handle and evaluate PROCESSOR and
DEVICE types separately.

Signed-off-by: Dou Liyang <douly.fnst@cn.fujitsu.com>
Tested-by: Xiaolong Ye <xiaolong.ye@intel.com>
Cc: rjw@rjwysocki.net
Cc: linux-acpi@vger.kernel.org
Cc: guzheng1@huawei.com
Cc: izumi.taku@jp.fujitsu.com
Cc: lenb@kernel.org
Link: http://lkml.kernel.org/r/1488528147-2279-5-git-send-email-douly.fnst@cn.fujitsu.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/acpi/acpi_processor.c | 39 ++++++++++++++++++++++++++++-------
 1 file changed, 32 insertions(+), 7 deletions(-)

diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c
index 5d208a99d0c98d..9a98d7e0020041 100644
--- a/drivers/acpi/acpi_processor.c
+++ b/drivers/acpi/acpi_processor.c
@@ -633,25 +633,50 @@ static acpi_status __init acpi_processor_ids_walk(acpi_handle handle,
 						  void **rv)
 {
 	acpi_status status;
+	acpi_object_type acpi_type;
+	unsigned long long uid;
 	union acpi_object object = { 0 };
 	struct acpi_buffer buffer = { sizeof(union acpi_object), &object };
 
-	status = acpi_evaluate_object(handle, NULL, NULL, &buffer);
+	status = acpi_get_type(handle, &acpi_type);
 	if (ACPI_FAILURE(status))
-		acpi_handle_info(handle, "Not get the processor object\n");
-	else
-		processor_validated_ids_update(object.processor.proc_id);
+		return false;
+
+	switch (acpi_type) {
+	case ACPI_TYPE_PROCESSOR:
+		status = acpi_evaluate_object(handle, NULL, NULL, &buffer);
+		if (ACPI_FAILURE(status))
+			goto err;
+		uid = object.processor.proc_id;
+		break;
+
+	case ACPI_TYPE_DEVICE:
+		status = acpi_evaluate_integer(handle, "_UID", NULL, &uid);
+		if (ACPI_FAILURE(status))
+			goto err;
+		break;
+	default:
+		goto err;
+	}
+
+	processor_validated_ids_update(uid);
+	return true;
+
+err:
+	acpi_handle_info(handle, "Invalid processor object\n");
+	return false;
 
-	return AE_OK;
 }
 
-static void __init acpi_processor_check_duplicates(void)
+void __init acpi_processor_check_duplicates(void)
 {
-	/* Search all processor nodes in ACPI namespace */
+	/* check the correctness for all processors in ACPI namespace */
 	acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT,
 						ACPI_UINT32_MAX,
 						acpi_processor_ids_walk,
 						NULL, NULL, NULL);
+	acpi_get_devices(ACPI_PROCESSOR_DEVICE_HID, acpi_processor_ids_walk,
+						NULL, NULL);
 }
 
 bool __init acpi_processor_validate_proc_id(int proc_id)

From a77d6cd968497792e072b74dff45b891ba778ddb Mon Sep 17 00:00:00 2001
From: Dou Liyang <douly.fnst@cn.fujitsu.com>
Date: Fri, 3 Mar 2017 16:02:27 +0800
Subject: [PATCH 1162/1911] acpi/processor: Check for duplicate processor ids
 at hotplug time

The check for duplicate processor ids happens at boot time based on the
ACPI table contents, but the final sanity checks for a processor happen
at hotplug time.

At hotplug time, where the physical information is available, which might
differ from the ACPI table information, a check for duplicate processor
ids is missing.

Add it to the hotplug checks and rename the function so it better
reflects its purpose.

Signed-off-by: Dou Liyang <douly.fnst@cn.fujitsu.com>
Tested-by: Xiaolong Ye <xiaolong.ye@intel.com>
Cc: rjw@rjwysocki.net
Cc: linux-acpi@vger.kernel.org
Cc: guzheng1@huawei.com
Cc: izumi.taku@jp.fujitsu.com
Cc: lenb@kernel.org
Link: http://lkml.kernel.org/r/1488528147-2279-6-git-send-email-douly.fnst@cn.fujitsu.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/acpi/acpi_processor.c | 13 ++++++++++---
 include/linux/acpi.h          |  2 +-
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c
index 9a98d7e0020041..0143135b3abe37 100644
--- a/drivers/acpi/acpi_processor.c
+++ b/drivers/acpi/acpi_processor.c
@@ -280,6 +280,13 @@ static int acpi_processor_get_info(struct acpi_device *device)
 		pr->acpi_id = value;
 	}
 
+	if (acpi_duplicate_processor_id(pr->acpi_id)) {
+		dev_err(&device->dev,
+			"Failed to get unique processor _UID (0x%x)\n",
+			pr->acpi_id);
+		return -ENODEV;
+	}
+
 	pr->phys_id = acpi_get_phys_id(pr->handle, device_declaration,
 					pr->acpi_id);
 	if (invalid_phys_cpuid(pr->phys_id))
@@ -580,7 +587,7 @@ static struct acpi_scan_handler processor_container_handler = {
 static int nr_unique_ids __initdata;
 
 /* The number of the duplicate processor IDs */
-static int nr_duplicate_ids __initdata;
+static int nr_duplicate_ids;
 
 /* Used to store the unique processor IDs */
 static int unique_processor_ids[] __initdata = {
@@ -588,7 +595,7 @@ static int unique_processor_ids[] __initdata = {
 };
 
 /* Used to store the duplicate processor IDs */
-static int duplicate_processor_ids[] __initdata = {
+static int duplicate_processor_ids[] = {
 	[0 ... NR_CPUS - 1] = -1,
 };
 
@@ -679,7 +686,7 @@ void __init acpi_processor_check_duplicates(void)
 						NULL, NULL);
 }
 
-bool __init acpi_processor_validate_proc_id(int proc_id)
+bool acpi_duplicate_processor_id(int proc_id)
 {
 	int i;
 
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 63a7519b00cc08..9b05886f9773cd 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -287,7 +287,7 @@ static inline bool invalid_phys_cpuid(phys_cpuid_t phys_id)
 }
 
 /* Validate the processor object's proc_id */
-bool acpi_processor_validate_proc_id(int proc_id);
+bool acpi_duplicate_processor_id(int proc_id);
 
 #ifdef CONFIG_ACPI_HOTPLUG_CPU
 /* Arch dependent functions for cpu hotplug support */

From 0acf611997d9d05dbfb559c3c6e379c861eb5957 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Wed, 22 Feb 2017 11:07:57 -0800
Subject: [PATCH 1163/1911] score: Fix implicit includes now failing build
 after extable change

After changing from module.h to extable.h, score builds fail with:

  arch/score/kernel/traps.c: In function 'do_ri':
  arch/score/kernel/traps.c:248:4: error: implicit declaration of function 'user_disable_single_step'
  arch/score/mm/extable.c: In function 'fixup_exception':
  arch/score/mm/extable.c:32:38: error: dereferencing pointer to incomplete type
  arch/score/mm/extable.c:34:24: error: dereferencing pointer to incomplete type

because extable.h doesn't drag in the same amount of headers as the
module.h did.  Add in the headers which were implicitly expected.

Fixes: 90858794c960 ("module.h: remove extable.h include now users have migrated")
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
[PG: tweak commit log; refresh for sched header refactoring.]
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
---
 arch/score/kernel/traps.c | 1 +
 arch/score/mm/extable.c   | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/arch/score/kernel/traps.c b/arch/score/kernel/traps.c
index e359ec67586982..12daf45369b442 100644
--- a/arch/score/kernel/traps.c
+++ b/arch/score/kernel/traps.c
@@ -24,6 +24,7 @@
  */
 
 #include <linux/extable.h>
+#include <linux/ptrace.h>
 #include <linux/sched/mm.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/debug.h>
diff --git a/arch/score/mm/extable.c b/arch/score/mm/extable.c
index ec871355fc2d60..6736a3ad628609 100644
--- a/arch/score/mm/extable.c
+++ b/arch/score/mm/extable.c
@@ -24,6 +24,8 @@
  */
 
 #include <linux/extable.h>
+#include <linux/ptrace.h>
+#include <asm/extable.h>
 
 int fixup_exception(struct pt_regs *regs)
 {

From 33d8c15559df4f0bce25d7e16ebb5879e249f2e7 Mon Sep 17 00:00:00 2001
From: Romain Izard <romain.izard.pro@gmail.com>
Date: Thu, 9 Mar 2017 17:58:38 +0100
Subject: [PATCH 1164/1911] Revert "clocksource/drivers/tcb_clksrc: Use 32 bit
 tcb as sched_clock"

This reverts commit 7b9f1d16e6d1 ("clocksource/drivers/tcb_clksrc: Use
32 bit tcb as sched_clock"). In the current state, the kernel warns
against a late registration of the new sched_clock, the printk clock
resets after only a few minutes, and it seems that scheduling can be
affected as well.

Signed-off-by: Romain Izard <romain.izard.pro@gmail.com>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/clocksource/tcb_clksrc.c | 16 +---------------
 1 file changed, 1 insertion(+), 15 deletions(-)

diff --git a/drivers/clocksource/tcb_clksrc.c b/drivers/clocksource/tcb_clksrc.c
index 745844ee973e1d..d4ca9962a7595a 100644
--- a/drivers/clocksource/tcb_clksrc.c
+++ b/drivers/clocksource/tcb_clksrc.c
@@ -10,7 +10,6 @@
 #include <linux/io.h>
 #include <linux/platform_device.h>
 #include <linux/atmel_tc.h>
-#include <linux/sched_clock.h>
 
 
 /*
@@ -57,14 +56,9 @@ static u64 tc_get_cycles(struct clocksource *cs)
 	return (upper << 16) | lower;
 }
 
-static u32 tc_get_cv32(void)
-{
-	return __raw_readl(tcaddr + ATMEL_TC_REG(0, CV));
-}
-
 static u64 tc_get_cycles32(struct clocksource *cs)
 {
-	return tc_get_cv32();
+	return __raw_readl(tcaddr + ATMEL_TC_REG(0, CV));
 }
 
 static struct clocksource clksrc = {
@@ -75,11 +69,6 @@ static struct clocksource clksrc = {
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
-static u64 notrace tc_read_sched_clock(void)
-{
-	return tc_get_cv32();
-}
-
 #ifdef CONFIG_GENERIC_CLOCKEVENTS
 
 struct tc_clkevt_device {
@@ -350,9 +339,6 @@ static int __init tcb_clksrc_init(void)
 		clksrc.read = tc_get_cycles32;
 		/* setup ony channel 0 */
 		tcb_setup_single_chan(tc, best_divisor_idx);
-
-		/* register sched_clock on chips with single 32 bit counter */
-		sched_clock_register(tc_read_sched_clock, 32, divided_rate);
 	} else {
 		/* tclib will give us three clocks no matter what the
 		 * underlying platform supports.

From f5fe1b51905df7cfe4fdfd85c5fb7bc5b71a094f Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Fri, 10 Mar 2017 17:00:47 +1100
Subject: [PATCH 1165/1911] blk: Ensure users for current->bio_list can see the
 full list.

Commit 79bd99596b73 ("blk: improve order of bio handling in generic_make_request()")
changed current->bio_list so that it did not contain *all* of the
queued bios, but only those submitted by the currently running
make_request_fn.

There are two places which walk the list and requeue selected bios,
and others that check if the list is empty.  These are no longer
correct.

So redefine current->bio_list to point to an array of two lists, which
contain all queued bios, and adjust various code to test or walk both
lists.

Signed-off-by: NeilBrown <neilb@suse.com>
Fixes: 79bd99596b73 ("blk: improve order of bio handling in generic_make_request()")
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/bio.c         | 12 +++++++++---
 block/blk-core.c    | 30 ++++++++++++++++++------------
 drivers/md/dm.c     | 29 ++++++++++++++++-------------
 drivers/md/raid10.c |  3 ++-
 4 files changed, 45 insertions(+), 29 deletions(-)

diff --git a/block/bio.c b/block/bio.c
index 5eec5e08417f6f..e75878f8b14af8 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -376,10 +376,14 @@ static void punt_bios_to_rescuer(struct bio_set *bs)
 	bio_list_init(&punt);
 	bio_list_init(&nopunt);
 
-	while ((bio = bio_list_pop(current->bio_list)))
+	while ((bio = bio_list_pop(&current->bio_list[0])))
 		bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio);
+	current->bio_list[0] = nopunt;
 
-	*current->bio_list = nopunt;
+	bio_list_init(&nopunt);
+	while ((bio = bio_list_pop(&current->bio_list[1])))
+		bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio);
+	current->bio_list[1] = nopunt;
 
 	spin_lock(&bs->rescue_lock);
 	bio_list_merge(&bs->rescue_list, &punt);
@@ -466,7 +470,9 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
 		 * we retry with the original gfp_flags.
 		 */
 
-		if (current->bio_list && !bio_list_empty(current->bio_list))
+		if (current->bio_list &&
+		    (!bio_list_empty(&current->bio_list[0]) ||
+		     !bio_list_empty(&current->bio_list[1])))
 			gfp_mask &= ~__GFP_DIRECT_RECLAIM;
 
 		p = mempool_alloc(bs->bio_pool, gfp_mask);
diff --git a/block/blk-core.c b/block/blk-core.c
index 0eeb99ef654f4a..d772c221cc178b 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1973,7 +1973,14 @@ generic_make_request_checks(struct bio *bio)
  */
 blk_qc_t generic_make_request(struct bio *bio)
 {
-	struct bio_list bio_list_on_stack;
+	/*
+	 * bio_list_on_stack[0] contains bios submitted by the current
+	 * make_request_fn.
+	 * bio_list_on_stack[1] contains bios that were submitted before
+	 * the current make_request_fn, but that haven't been processed
+	 * yet.
+	 */
+	struct bio_list bio_list_on_stack[2];
 	blk_qc_t ret = BLK_QC_T_NONE;
 
 	if (!generic_make_request_checks(bio))
@@ -1990,7 +1997,7 @@ blk_qc_t generic_make_request(struct bio *bio)
 	 * should be added at the tail
 	 */
 	if (current->bio_list) {
-		bio_list_add(current->bio_list, bio);
+		bio_list_add(&current->bio_list[0], bio);
 		goto out;
 	}
 
@@ -2009,18 +2016,17 @@ blk_qc_t generic_make_request(struct bio *bio)
 	 * bio_list, and call into ->make_request() again.
 	 */
 	BUG_ON(bio->bi_next);
-	bio_list_init(&bio_list_on_stack);
-	current->bio_list = &bio_list_on_stack;
+	bio_list_init(&bio_list_on_stack[0]);
+	current->bio_list = bio_list_on_stack;
 	do {
 		struct request_queue *q = bdev_get_queue(bio->bi_bdev);
 
 		if (likely(blk_queue_enter(q, false) == 0)) {
-			struct bio_list hold;
 			struct bio_list lower, same;
 
 			/* Create a fresh bio_list for all subordinate requests */
-			hold = bio_list_on_stack;
-			bio_list_init(&bio_list_on_stack);
+			bio_list_on_stack[1] = bio_list_on_stack[0];
+			bio_list_init(&bio_list_on_stack[0]);
 			ret = q->make_request_fn(q, bio);
 
 			blk_queue_exit(q);
@@ -2030,19 +2036,19 @@ blk_qc_t generic_make_request(struct bio *bio)
 			 */
 			bio_list_init(&lower);
 			bio_list_init(&same);
-			while ((bio = bio_list_pop(&bio_list_on_stack)) != NULL)
+			while ((bio = bio_list_pop(&bio_list_on_stack[0])) != NULL)
 				if (q == bdev_get_queue(bio->bi_bdev))
 					bio_list_add(&same, bio);
 				else
 					bio_list_add(&lower, bio);
 			/* now assemble so we handle the lowest level first */
-			bio_list_merge(&bio_list_on_stack, &lower);
-			bio_list_merge(&bio_list_on_stack, &same);
-			bio_list_merge(&bio_list_on_stack, &hold);
+			bio_list_merge(&bio_list_on_stack[0], &lower);
+			bio_list_merge(&bio_list_on_stack[0], &same);
+			bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]);
 		} else {
 			bio_io_error(bio);
 		}
-		bio = bio_list_pop(current->bio_list);
+		bio = bio_list_pop(&bio_list_on_stack[0]);
 	} while (bio);
 	current->bio_list = NULL; /* deactivate */
 
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index f4ffd1eb8f44c3..dfb75979e4555d 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -989,26 +989,29 @@ static void flush_current_bio_list(struct blk_plug_cb *cb, bool from_schedule)
 	struct dm_offload *o = container_of(cb, struct dm_offload, cb);
 	struct bio_list list;
 	struct bio *bio;
+	int i;
 
 	INIT_LIST_HEAD(&o->cb.list);
 
 	if (unlikely(!current->bio_list))
 		return;
 
-	list = *current->bio_list;
-	bio_list_init(current->bio_list);
-
-	while ((bio = bio_list_pop(&list))) {
-		struct bio_set *bs = bio->bi_pool;
-		if (unlikely(!bs) || bs == fs_bio_set) {
-			bio_list_add(current->bio_list, bio);
-			continue;
+	for (i = 0; i < 2; i++) {
+		list = current->bio_list[i];
+		bio_list_init(&current->bio_list[i]);
+
+		while ((bio = bio_list_pop(&list))) {
+			struct bio_set *bs = bio->bi_pool;
+			if (unlikely(!bs) || bs == fs_bio_set) {
+				bio_list_add(&current->bio_list[i], bio);
+				continue;
+			}
+
+			spin_lock(&bs->rescue_lock);
+			bio_list_add(&bs->rescue_list, bio);
+			queue_work(bs->rescue_workqueue, &bs->rescue_work);
+			spin_unlock(&bs->rescue_lock);
 		}
-
-		spin_lock(&bs->rescue_lock);
-		bio_list_add(&bs->rescue_list, bio);
-		queue_work(bs->rescue_workqueue, &bs->rescue_work);
-		spin_unlock(&bs->rescue_lock);
 	}
 }
 
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 063c43d83b72c2..0536658c9d40cd 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -974,7 +974,8 @@ static void wait_barrier(struct r10conf *conf)
 				    !conf->barrier ||
 				    (atomic_read(&conf->nr_pending) &&
 				     current->bio_list &&
-				     !bio_list_empty(current->bio_list)),
+				     (!bio_list_empty(&current->bio_list[0]) ||
+				      !bio_list_empty(&current->bio_list[1]))),
 				    conf->resync_lock);
 		conf->nr_waiting--;
 		if (!conf->nr_waiting)

From 2c4ea6e28dbf15ab93632c5c189f3948366b8885 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sat, 11 Mar 2017 01:31:19 +0100
Subject: [PATCH 1166/1911] x86/tlb: Fix tlb flushing when lguest clears PGE

Fengguang reported random corruptions from various locations on x86-32
after commits d2852a224050 ("arch: add ARCH_HAS_SET_MEMORY config") and
9d876e79df6a ("bpf: fix unlocking of jited image when module ronx not set")
that uses the former. While x86-32 doesn't have a JIT like x86_64, the
bpf_prog_lock_ro() and bpf_prog_unlock_ro() got enabled due to
ARCH_HAS_SET_MEMORY, whereas Fengguang's test kernel doesn't have module
support built in and therefore never had the DEBUG_SET_MODULE_RONX setting
enabled.

After investigating the crashes further, it turned out that using
set_memory_ro() and set_memory_rw() didn't have the desired effect, for
example, setting the pages as read-only on x86-32 would still let
probe_kernel_write() succeed without error. This behavior would manifest
itself in situations where the vmalloc'ed buffer was accessed prior to
set_memory_*() such as in case of bpf_prog_alloc(). In cases where it
wasn't, the page attribute changes seemed to have taken effect, leading to
the conclusion that a TLB invalidate didn't happen. Moreover, it turned out
that this issue reproduced with qemu in "-cpu kvm64" mode, but not for
"-cpu host". When the issue occurs, change_page_attr_set_clr() did trigger
a TLB flush as expected via __flush_tlb_all() through cpa_flush_range(),
though.

There are 3 variants for issuing a TLB flush: invpcid_flush_all() (depends
on CPU feature bits X86_FEATURE_INVPCID, X86_FEATURE_PGE), cr4 based flush
(depends on X86_FEATURE_PGE), and cr3 based flush.  For "-cpu host" case in
my setup, the flush used invpcid_flush_all() variant, whereas for "-cpu
kvm64", the flush was cr4 based. Switching the kvm64 case to cr3 manually
worked fine, and further investigating the cr4 one turned out that
X86_CR4_PGE bit was not set in cr4 register, meaning the
__native_flush_tlb_global_irq_disabled() wrote cr4 twice with the same
value instead of clearing X86_CR4_PGE in the first write to trigger the
flush.

It turned out that X86_CR4_PGE was cleared from cr4 during init from
lguest_arch_host_init() via adjust_pge(). The X86_FEATURE_PGE bit is also
cleared from there due to concerns of using PGE in guest kernel that can
lead to hard to trace bugs (see bff672e630a0 ("lguest: documentation V:
Host") in init()). The CPU feature bits are cleared in dynamic
boot_cpu_data, but they never propagated to __flush_tlb_all() as it uses
static_cpu_has() instead of boot_cpu_has() for testing which variant of TLB
flushing to use, meaning they still used the old setting of the host
kernel.

Clearing via setup_clear_cpu_cap(X86_FEATURE_PGE) so this would propagate
to static_cpu_has() checks is too late at this point as sections have been
patched already, so for now, it seems reasonable to switch back to
boot_cpu_has(X86_FEATURE_PGE) as it was prior to commit c109bf95992b
("x86/cpufeature: Remove cpu_has_pge"). This lets the TLB flush trigger via
cr3 as originally intended, properly makes the new page attributes visible
and thus fixes the crashes seen by Fengguang.

Fixes: c109bf95992b ("x86/cpufeature: Remove cpu_has_pge")
Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: bp@suse.de
Cc: Kees Cook <keescook@chromium.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: netdev@vger.kernel.org
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: lkp@01.org
Cc: Laura Abbott <labbott@redhat.com>
Cc: stable@vger.kernel.org
Link: http://lkml.kernrl.org/r/20170301125426.l4nf65rx4wahohyl@wfg-t540p.sh.intel.com
Link: http://lkml.kernel.org/r/25c41ad9eca164be4db9ad84f768965b7eb19d9e.1489191673.git.daniel@iogearbox.net
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/include/asm/tlbflush.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 6fa85944af83d8..fc5abff9b7fd63 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -188,7 +188,7 @@ static inline void __native_flush_tlb_single(unsigned long addr)
 
 static inline void __flush_tlb_all(void)
 {
-	if (static_cpu_has(X86_FEATURE_PGE))
+	if (boot_cpu_has(X86_FEATURE_PGE))
 		__flush_tlb_global();
 	else
 		__flush_tlb();

From 4495c08e84729385774601b5146d51d9e5849f81 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 12 Mar 2017 14:47:08 -0700
Subject: [PATCH 1167/1911] Linux 4.11-rc2

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 165cf9783a5dbb..b841fb36beb2b5 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 4
 PATCHLEVEL = 11
 SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION = -rc2
 NAME = Fearless Coyote
 
 # *DOCUMENTATION*

From d1c4e9bf73e739b937ddd9dc4cf0f6de2e6117da Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Paulo=20Rechi=20Vita?= <jprvita@gmail.com>
Date: Mon, 20 Feb 2017 14:50:23 -0500
Subject: [PATCH 1168/1911] platform/x86: asus-wmi: Remove quirk_no_rfkill
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

With the detection introduced in the previous patches, we don't need
these static DMI-based quirks anymore.

This reverts the following commits:
56a37a72002b "asus-wmi: Add quirk_no_rfkill_wapf4 for the Asus X456UA"
a961a285b479 "asus-wmi: Add quirk_no_rfkill_wapf4 for the Asus X456UF"
6b7ff2af5286 "asus-wmi: Add quirk_no_rfkill for the Asus Z550MA"
02db9ff7af18 "asus-wmi: Add quirk_no_rfkill for the Asus U303LB"
2d735244b798 "asus-wmi: Add quirk_no_rfkill for the Asus N552VW"
a977e59c0c67 "asus-wmi: Create quirk for airplane_mode LED"

Signed-off-by: João Paulo Rechi Vita <jprvita@endlessm.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
[dvhart: minor commit message corrections]
Signed-off-by: Darren Hart (VMware) <dvhart@infradead.org>
---
 drivers/platform/x86/asus-nb-wmi.c | 49 ++----------------------------
 drivers/platform/x86/asus-wmi.c    |  5 +--
 drivers/platform/x86/asus-wmi.h    |  1 -
 3 files changed, 3 insertions(+), 52 deletions(-)

diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c
index 5be4783e40d4c9..dea98ffb6f606a 100644
--- a/drivers/platform/x86/asus-nb-wmi.c
+++ b/drivers/platform/x86/asus-nb-wmi.c
@@ -103,15 +103,6 @@ static struct quirk_entry quirk_asus_x200ca = {
 	.wapf = 2,
 };
 
-static struct quirk_entry quirk_no_rfkill = {
-	.no_rfkill = true,
-};
-
-static struct quirk_entry quirk_no_rfkill_wapf4 = {
-	.wapf = 4,
-	.no_rfkill = true,
-};
-
 static struct quirk_entry quirk_asus_ux303ub = {
 	.wmi_backlight_native = true,
 };
@@ -194,7 +185,7 @@ static const struct dmi_system_id asus_quirks[] = {
 			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
 			DMI_MATCH(DMI_PRODUCT_NAME, "X456UA"),
 		},
-		.driver_data = &quirk_no_rfkill_wapf4,
+		.driver_data = &quirk_asus_wapf4,
 	},
 	{
 		.callback = dmi_matched,
@@ -203,7 +194,7 @@ static const struct dmi_system_id asus_quirks[] = {
 			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
 			DMI_MATCH(DMI_PRODUCT_NAME, "X456UF"),
 		},
-		.driver_data = &quirk_no_rfkill_wapf4,
+		.driver_data = &quirk_asus_wapf4,
 	},
 	{
 		.callback = dmi_matched,
@@ -367,42 +358,6 @@ static const struct dmi_system_id asus_quirks[] = {
 		},
 		.driver_data = &quirk_asus_x200ca,
 	},
-	{
-		.callback = dmi_matched,
-		.ident = "ASUSTeK COMPUTER INC. X555UB",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "X555UB"),
-		},
-		.driver_data = &quirk_no_rfkill,
-	},
-	{
-		.callback = dmi_matched,
-		.ident = "ASUSTeK COMPUTER INC. N552VW",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "N552VW"),
-		},
-		.driver_data = &quirk_no_rfkill,
-	},
-	{
-		.callback = dmi_matched,
-		.ident = "ASUSTeK COMPUTER INC. U303LB",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "U303LB"),
-		},
-		.driver_data = &quirk_no_rfkill,
-	},
-	{
-		.callback = dmi_matched,
-		.ident = "ASUSTeK COMPUTER INC. Z550MA",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "Z550MA"),
-		},
-		.driver_data = &quirk_no_rfkill,
-	},
 	{
 		.callback = dmi_matched,
 		.ident = "ASUSTeK COMPUTER INC. UX303UB",
diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c
index 8499d3ae4257b6..8fe5890bf539f4 100644
--- a/drivers/platform/x86/asus-wmi.c
+++ b/drivers/platform/x86/asus-wmi.c
@@ -2111,10 +2111,7 @@ static int asus_wmi_add(struct platform_device *pdev)
 	if (result & (ASUS_WMI_DSTS_PRESENCE_BIT | ASUS_WMI_DSTS_USER_BIT))
 		asus->driver->wlan_ctrl_by_user = 1;
 
-	if (asus->driver->wlan_ctrl_by_user && ashs_present())
-		asus->driver->quirks->no_rfkill = 1;
-
-	if (!asus->driver->quirks->no_rfkill) {
+	if (!(asus->driver->wlan_ctrl_by_user && ashs_present())) {
 		err = asus_wmi_rfkill_init(asus);
 		if (err)
 			goto fail_rfkill;
diff --git a/drivers/platform/x86/asus-wmi.h b/drivers/platform/x86/asus-wmi.h
index fdff626c3b51b0..c9589d9342bbf8 100644
--- a/drivers/platform/x86/asus-wmi.h
+++ b/drivers/platform/x86/asus-wmi.h
@@ -39,7 +39,6 @@ struct key_entry;
 struct asus_wmi;
 
 struct quirk_entry {
-	bool no_rfkill;
 	bool hotplug_wireless;
 	bool scalar_panel_brightness;
 	bool store_backlight_power;

From 088db931e065bd3b159fa2e588eab859ef0d9d23 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Fri, 10 Mar 2017 18:33:28 +0000
Subject: [PATCH 1169/1911] thermal: Fix potential deadlock in cpu_cooling

cooling_list_lock is covering not just cpufreq_dev_count, but also the
calls to cpufreq_register_notifier() and cpufreq_unregister_notifier().

Since cooling_list_lock is also used within cpufreq_thermal_notifier(),
lockdep reports a potential deadlock. Fix it by testing the condition
under cooling_list_lock and dropping the lock before calling
cpufreq_register_notifier(). And variable cpufreq_dev_count is removed
at the same time, because it's no longer needed after the fix.

Fixes: ae606089621e ("thermal: convert cpu_cooling to use an IDA")
Reported-and-Tested-by: Russell King <linux@armlinux.org.uk>
Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 drivers/thermal/cpu_cooling.c | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
index 91048eeca28b2d..11b5ea685518f8 100644
--- a/drivers/thermal/cpu_cooling.c
+++ b/drivers/thermal/cpu_cooling.c
@@ -107,8 +107,6 @@ struct cpufreq_cooling_device {
 };
 static DEFINE_IDA(cpufreq_ida);
 
-static unsigned int cpufreq_dev_count;
-
 static DEFINE_MUTEX(cooling_list_lock);
 static LIST_HEAD(cpufreq_dev_list);
 
@@ -771,6 +769,7 @@ __cpufreq_cooling_register(struct device_node *np,
 	unsigned int freq, i, num_cpus;
 	int ret;
 	struct thermal_cooling_device_ops *cooling_ops;
+	bool first;
 
 	if (!alloc_cpumask_var(&temp_mask, GFP_KERNEL))
 		return ERR_PTR(-ENOMEM);
@@ -874,13 +873,14 @@ __cpufreq_cooling_register(struct device_node *np,
 	cpufreq_dev->cool_dev = cool_dev;
 
 	mutex_lock(&cooling_list_lock);
+	/* Register the notifier for first cpufreq cooling device */
+	first = list_empty(&cpufreq_dev_list);
 	list_add(&cpufreq_dev->node, &cpufreq_dev_list);
+	mutex_unlock(&cooling_list_lock);
 
-	/* Register the notifier for first cpufreq cooling device */
-	if (!cpufreq_dev_count++)
+	if (first)
 		cpufreq_register_notifier(&thermal_cpufreq_notifier_block,
 					  CPUFREQ_POLICY_NOTIFIER);
-	mutex_unlock(&cooling_list_lock);
 
 	goto put_policy;
 
@@ -1021,6 +1021,7 @@ EXPORT_SYMBOL(of_cpufreq_power_cooling_register);
 void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev)
 {
 	struct cpufreq_cooling_device *cpufreq_dev;
+	bool last;
 
 	if (!cdev)
 		return;
@@ -1028,14 +1029,15 @@ void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev)
 	cpufreq_dev = cdev->devdata;
 
 	mutex_lock(&cooling_list_lock);
+	list_del(&cpufreq_dev->node);
 	/* Unregister the notifier for the last cpufreq cooling device */
-	if (!--cpufreq_dev_count)
+	last = list_empty(&cpufreq_dev_list);
+	mutex_unlock(&cooling_list_lock);
+
+	if (last)
 		cpufreq_unregister_notifier(&thermal_cpufreq_notifier_block,
 					    CPUFREQ_POLICY_NOTIFIER);
 
-	list_del(&cpufreq_dev->node);
-	mutex_unlock(&cooling_list_lock);
-
 	thermal_cooling_device_unregister(cpufreq_dev->cool_dev);
 	ida_simple_remove(&cpufreq_ida, cpufreq_dev->id);
 	kfree(cpufreq_dev->dyn_power_table);

From a4e49c9bc98d099b2a19933b9da55cd0ad1da421 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 7 Feb 2017 09:40:01 +0530
Subject: [PATCH 1170/1911] thermal: devfreq: Simplify expression

There is no need to check for IS_ERR() as we are looking for a very
particular error value here. Drop the first check.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 drivers/thermal/devfreq_cooling.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c
index 7743a78d472397..4793fc7b06dd3f 100644
--- a/drivers/thermal/devfreq_cooling.c
+++ b/drivers/thermal/devfreq_cooling.c
@@ -186,7 +186,7 @@ get_static_power(struct devfreq_cooling_device *dfc, unsigned long freq)
 		return 0;
 
 	opp = dev_pm_opp_find_freq_exact(dev, freq, true);
-	if (IS_ERR(opp) && (PTR_ERR(opp) == -ERANGE))
+	if (PTR_ERR(opp) == -ERANGE)
 		opp = dev_pm_opp_find_freq_exact(dev, freq, false);
 
 	voltage = dev_pm_opp_get_voltage(opp) / 1000; /* mV */

From 8327b830f293ff48eaade64b4fc2e247b7655615 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 7 Feb 2017 09:40:02 +0530
Subject: [PATCH 1171/1911] thermal: devfreq_cooling: Replace dev_warn with
 dev_err

There isn't much the user can do on seeing this warning, as the hardware
is actually okay. dev_err suits much better here.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 drivers/thermal/devfreq_cooling.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c
index 4793fc7b06dd3f..e99d0e026a53ca 100644
--- a/drivers/thermal/devfreq_cooling.c
+++ b/drivers/thermal/devfreq_cooling.c
@@ -193,9 +193,9 @@ get_static_power(struct devfreq_cooling_device *dfc, unsigned long freq)
 	dev_pm_opp_put(opp);
 
 	if (voltage == 0) {
-		dev_warn_ratelimited(dev,
-				     "Failed to get voltage for frequency %lu: %ld\n",
-				     freq, IS_ERR(opp) ? PTR_ERR(opp) : 0);
+		dev_err_ratelimited(dev,
+				    "Failed to get voltage for frequency %lu: %ld\n",
+				    freq, IS_ERR(opp) ? PTR_ERR(opp) : 0);
 		return 0;
 	}
 

From afd1f4e0a76f0c56422af7056942f28578f1643e Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 7 Feb 2017 09:40:03 +0530
Subject: [PATCH 1172/1911] thermal: devfreq: Check OPP for errors

It is possible for dev_pm_opp_find_freq_exact() to return errors. It was
all fine earlier as dev_pm_opp_get_voltage() had a check within it to
check for invalid OPPs, but dev_pm_opp_put() doesn't have any similar
checks and the callers need to make sure OPP is valid before calling
them.

Also update the later dev_warn_ratelimited() to not print the error
message as the OPP is guaranteed to be valid now.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 drivers/thermal/devfreq_cooling.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c
index e99d0e026a53ca..4bf4ad58cffda0 100644
--- a/drivers/thermal/devfreq_cooling.c
+++ b/drivers/thermal/devfreq_cooling.c
@@ -189,13 +189,19 @@ get_static_power(struct devfreq_cooling_device *dfc, unsigned long freq)
 	if (PTR_ERR(opp) == -ERANGE)
 		opp = dev_pm_opp_find_freq_exact(dev, freq, false);
 
+	if (IS_ERR(opp)) {
+		dev_err_ratelimited(dev, "Failed to find OPP for frequency %lu: %ld\n",
+				    freq, PTR_ERR(opp));
+		return 0;
+	}
+
 	voltage = dev_pm_opp_get_voltage(opp) / 1000; /* mV */
 	dev_pm_opp_put(opp);
 
 	if (voltage == 0) {
 		dev_err_ratelimited(dev,
-				    "Failed to get voltage for frequency %lu: %ld\n",
-				    freq, IS_ERR(opp) ? PTR_ERR(opp) : 0);
+				    "Failed to get voltage for frequency %lu\n",
+				    freq);
 		return 0;
 	}
 

From 9aec9082bf3c0fb83020a0f562c9cc8078ac91f1 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 7 Feb 2017 09:40:04 +0530
Subject: [PATCH 1173/1911] thermal: cpu_cooling: Replace dev_warn with dev_err

There isn't much the user can do on seeing these warnings, as the
hardware is actually okay. dev_err suits much better here.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 drivers/thermal/cpu_cooling.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
index 11b5ea685518f8..59f0bd53f329e8 100644
--- a/drivers/thermal/cpu_cooling.c
+++ b/drivers/thermal/cpu_cooling.c
@@ -397,9 +397,9 @@ static int get_static_power(struct cpufreq_cooling_device *cpufreq_device,
 	dev_pm_opp_put(opp);
 
 	if (voltage == 0) {
-		dev_warn_ratelimited(cpufreq_device->cpu_dev,
-				     "Failed to get voltage for frequency %lu: %ld\n",
-				     freq_hz, IS_ERR(opp) ? PTR_ERR(opp) : 0);
+		dev_err_ratelimited(cpufreq_device->cpu_dev,
+				    "Failed to get voltage for frequency %lu: %ld\n",
+				    freq_hz, IS_ERR(opp) ? PTR_ERR(opp) : 0);
 		return -EINVAL;
 	}
 
@@ -691,9 +691,9 @@ static int cpufreq_power2state(struct thermal_cooling_device *cdev,
 
 	*state = cpufreq_cooling_get_level(cpu, target_freq);
 	if (*state == THERMAL_CSTATE_INVALID) {
-		dev_warn_ratelimited(&cdev->device,
-				     "Failed to convert %dKHz for cpu %d into a cdev state\n",
-				     target_freq, cpu);
+		dev_err_ratelimited(&cdev->device,
+				    "Failed to convert %dKHz for cpu %d into a cdev state\n",
+				    target_freq, cpu);
 		return -EINVAL;
 	}
 

From 3ea3217cf91804be6ed9b368ef4ac7911eb1dadc Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 7 Feb 2017 09:40:05 +0530
Subject: [PATCH 1174/1911] thermal: cpu_cooling: Check OPP for errors

It is possible for dev_pm_opp_find_freq_exact() to return errors. It was
all fine earlier as dev_pm_opp_get_voltage() had a check within it to
check for invalid OPPs, but dev_pm_opp_put() doesn't have any similar
checks and the callers need to make sure OPP is valid before calling
them.

Also update the later dev_warn_ratelimited() to not print the error
message as the OPP is guaranteed to be valid now.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 drivers/thermal/cpu_cooling.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
index 59f0bd53f329e8..69d0f430b2d190 100644
--- a/drivers/thermal/cpu_cooling.c
+++ b/drivers/thermal/cpu_cooling.c
@@ -393,13 +393,20 @@ static int get_static_power(struct cpufreq_cooling_device *cpufreq_device,
 
 	opp = dev_pm_opp_find_freq_exact(cpufreq_device->cpu_dev, freq_hz,
 					 true);
+	if (IS_ERR(opp)) {
+		dev_warn_ratelimited(cpufreq_device->cpu_dev,
+				     "Failed to find OPP for frequency %lu: %ld\n",
+				     freq_hz, PTR_ERR(opp));
+		return -EINVAL;
+	}
+
 	voltage = dev_pm_opp_get_voltage(opp);
 	dev_pm_opp_put(opp);
 
 	if (voltage == 0) {
 		dev_err_ratelimited(cpufreq_device->cpu_dev,
-				    "Failed to get voltage for frequency %lu: %ld\n",
-				    freq_hz, IS_ERR(opp) ? PTR_ERR(opp) : 0);
+				    "Failed to get voltage for frequency %lu\n",
+				    freq_hz);
 		return -EINVAL;
 	}
 

From bafa687dccbe16cbf9b1824409836d79d6dc6543 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 23 Feb 2017 12:31:54 +0200
Subject: [PATCH 1175/1911] extcon: int3496: Propagate error code of
 gpiod_to_irq()

gpiod_to_irq() doesn't return 0. Thus, we just adjust condition and
replace -EINVAL by actual error code it returns.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
---
 drivers/extcon/extcon-intel-int3496.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/extcon/extcon-intel-int3496.c b/drivers/extcon/extcon-intel-int3496.c
index a3131b036de681..38eb6cab938fff 100644
--- a/drivers/extcon/extcon-intel-int3496.c
+++ b/drivers/extcon/extcon-intel-int3496.c
@@ -100,9 +100,9 @@ static int int3496_probe(struct platform_device *pdev)
 	}
 
 	data->usb_id_irq = gpiod_to_irq(data->gpio_usb_id);
-	if (data->usb_id_irq <= 0) {
+	if (data->usb_id_irq < 0) {
 		dev_err(dev, "can't get USB ID IRQ: %d\n", data->usb_id_irq);
-		return -EINVAL;
+		return data->usb_id_irq;
 	}
 
 	data->gpio_vbus_en = devm_gpiod_get_index(dev, "vbus en",

From ecd052250b51c8cee4d9720adea05690dfc77c64 Mon Sep 17 00:00:00 2001
From: David Arcari <darcari@redhat.com>
Date: Thu, 9 Mar 2017 13:28:33 -0500
Subject: [PATCH 1176/1911] net: ethernet: aquantia: call set_irq_affinity_hint
 before free_irq

When a network interface controlled by the aquantia ethernet driver is brought
down a warning is output in dmesg (see below).

The problem is that aq_pci_func_free_irqs() is calling free_irq() before it is
calling irq_set_affinity_hint().

WARNING: CPU: 4 PID: 10068 at kernel/irq/manage.c:1503 __free_irq+0x24d/0x2b0
<snip>
Call Trace:
 dump_stack+0x63/0x87
 __warn+0xd1/0xf0
 warn_slowpath_null+0x1d/0x20
 __free_irq+0x24d/0x2b0
 free_irq+0x39/0x90
 aq_pci_func_free_irqs+0x52/0xa0 [atlantic]
 aq_nic_stop+0xca/0xd0 [atlantic]
 aq_ndev_close+0x1d/0x40 [atlantic]
 __dev_close_many+0x99/0x100
 __dev_close+0x67/0xb0
<snip>

Fixes: 36a4a50f4048 ("net: ethernet: aquantia: switch to pci_alloc_irq_vectors")

Cc: Christoph Hellwig <hch@lst.de>
Cc: Pavel Belous <pavel.belous@aquantia.com>
Signed-off-by: David Arcari <darcari@redhat.com>
Tested-by: Pavel Belous <pavel.belous@aquantia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
index 581de71a958a35..4c6c882c6a1c42 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
@@ -213,9 +213,9 @@ void aq_pci_func_free_irqs(struct aq_pci_func_s *self)
 		if (!((1U << i) & self->msix_entry_mask))
 			continue;
 
-		free_irq(pci_irq_vector(pdev, i), self->aq_vec[i]);
 		if (pdev->msix_enabled)
 			irq_set_affinity_hint(pci_irq_vector(pdev, i), NULL);
+		free_irq(pci_irq_vector(pdev, i), self->aq_vec[i]);
 		self->msix_entry_mask &= ~(1U << i);
 	}
 }

From 7ce101246655935b014b11d81f815342921f5654 Mon Sep 17 00:00:00 2001
From: stephen hemminger <stephen@networkplumber.org>
Date: Thu, 9 Mar 2017 14:58:29 -0800
Subject: [PATCH 1177/1911] netvsc: handle select_queue when device is being
 removed

Move the send indirection table from the inner device (netvsc)
to the network device context.

It is possible that netvsc_device is not present (remove in progress).
This solves potential use after free issues when packet is being
created during MTU change, shutdown, or queue count changes.

Fixes: d8e18ee0fa96 ("netvsc: enhance transmit select_queue")
Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/hyperv/hyperv_net.h |  3 ++-
 drivers/net/hyperv/netvsc.c     |  8 ++------
 drivers/net/hyperv/netvsc_drv.c | 11 +++--------
 3 files changed, 7 insertions(+), 15 deletions(-)

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index d3e73ac158aee6..f9f3dba7a58800 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -700,6 +700,8 @@ struct net_device_context {
 
 	u32 tx_checksum_mask;
 
+	u32 tx_send_table[VRSS_SEND_TAB_SIZE];
+
 	/* Ethtool settings */
 	u8 duplex;
 	u32 speed;
@@ -757,7 +759,6 @@ struct netvsc_device {
 
 	struct nvsp_message revoke_packet;
 
-	u32 send_table[VRSS_SEND_TAB_SIZE];
 	u32 max_chn;
 	u32 num_chn;
 	spinlock_t sc_lock; /* Protects num_sc_offered variable */
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index d35ebd993b3852..4c1d8cca247b92 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -1136,15 +1136,11 @@ static void netvsc_receive(struct net_device *ndev,
 static void netvsc_send_table(struct hv_device *hdev,
 			      struct nvsp_message *nvmsg)
 {
-	struct netvsc_device *nvscdev;
 	struct net_device *ndev = hv_get_drvdata(hdev);
+	struct net_device_context *net_device_ctx = netdev_priv(ndev);
 	int i;
 	u32 count, *tab;
 
-	nvscdev = get_outbound_net_device(hdev);
-	if (!nvscdev)
-		return;
-
 	count = nvmsg->msg.v5_msg.send_table.count;
 	if (count != VRSS_SEND_TAB_SIZE) {
 		netdev_err(ndev, "Received wrong send-table size:%u\n", count);
@@ -1155,7 +1151,7 @@ static void netvsc_send_table(struct hv_device *hdev,
 		      nvmsg->msg.v5_msg.send_table.offset);
 
 	for (i = 0; i < count; i++)
-		nvscdev->send_table[i] = tab[i];
+		net_device_ctx->tx_send_table[i] = tab[i];
 }
 
 static void netvsc_send_vf(struct net_device_context *net_device_ctx,
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index bc05c895d9589d..5ede87f30463e8 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -206,17 +206,15 @@ static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
 			void *accel_priv, select_queue_fallback_t fallback)
 {
 	struct net_device_context *net_device_ctx = netdev_priv(ndev);
-	struct netvsc_device *nvsc_dev = net_device_ctx->nvdev;
+	unsigned int num_tx_queues = ndev->real_num_tx_queues;
 	struct sock *sk = skb->sk;
 	int q_idx = sk_tx_queue_get(sk);
 
-	if (q_idx < 0 || skb->ooo_okay ||
-	    q_idx >= ndev->real_num_tx_queues) {
+	if (q_idx < 0 || skb->ooo_okay || q_idx >= num_tx_queues) {
 		u16 hash = __skb_tx_hash(ndev, skb, VRSS_SEND_TAB_SIZE);
 		int new_idx;
 
-		new_idx = nvsc_dev->send_table[hash]
-			% nvsc_dev->num_chn;
+		new_idx = net_device_ctx->tx_send_table[hash] % num_tx_queues;
 
 		if (q_idx != new_idx && sk &&
 		    sk_fullsock(sk) && rcu_access_pointer(sk->sk_dst_cache))
@@ -225,9 +223,6 @@ static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
 		q_idx = new_idx;
 	}
 
-	if (unlikely(!nvsc_dev->chan_table[q_idx].channel))
-		q_idx = 0;
-
 	return q_idx;
 }
 

From 88a7cddce2506b0b6c06a9f6e51379d0d275353b Mon Sep 17 00:00:00 2001
From: Neil Jerram <neil@tigera.io>
Date: Fri, 10 Mar 2017 12:24:57 +0000
Subject: [PATCH 1178/1911] Make IP 'forwarding' doc more precise

It wasn't clear if the 'forwarding' setting needs to be enabled on the
interface that packets are received from, or on the interface that
packets are forwarded to, or both.

In fact (according to my code reading) the setting is relevant on the
interface that packets are received from, so this change updates the doc
to say that.

Signed-off-by: Neil Jerram <neil@tigera.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index fc73eeb7b3b8b1..ab02304613771b 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -1006,7 +1006,8 @@ accept_redirects - BOOLEAN
 		FALSE (router)
 
 forwarding - BOOLEAN
-	Enable IP forwarding on this interface.
+	Enable IP forwarding on this interface.  This controls whether packets
+	received _on_ this interface can be forwarded.
 
 mc_forwarding - BOOLEAN
 	Do multicast routing. The kernel needs to be compiled with CONFIG_MROUTE

From 52491c7607c5527138095edf44c53169dc1ddb82 Mon Sep 17 00:00:00 2001
From: Etienne Noss <etienne.noss@wifirst.fr>
Date: Fri, 10 Mar 2017 16:55:32 +0100
Subject: [PATCH 1179/1911] act_connmark: avoid crashing on malformed nlattrs
 with null parms
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

tcf_connmark_init does not check in its configuration if TCA_CONNMARK_PARMS
is set, resulting in a null pointer dereference when trying to access it.

[501099.043007] BUG: unable to handle kernel NULL pointer dereference at 0000000000000004
[501099.043039] IP: [<ffffffffc10c60fb>] tcf_connmark_init+0x8b/0x180 [act_connmark]
...
[501099.044334] Call Trace:
[501099.044345]  [<ffffffffa47270e8>] ? tcf_action_init_1+0x198/0x1b0
[501099.044363]  [<ffffffffa47271b0>] ? tcf_action_init+0xb0/0x120
[501099.044380]  [<ffffffffa47250a4>] ? tcf_exts_validate+0xc4/0x110
[501099.044398]  [<ffffffffc0f5fa97>] ? u32_set_parms+0xa7/0x270 [cls_u32]
[501099.044417]  [<ffffffffc0f60bf0>] ? u32_change+0x680/0x87b [cls_u32]
[501099.044436]  [<ffffffffa4725d1d>] ? tc_ctl_tfilter+0x4dd/0x8a0
[501099.044454]  [<ffffffffa44a23a1>] ? security_capable+0x41/0x60
[501099.044471]  [<ffffffffa470ca01>] ? rtnetlink_rcv_msg+0xe1/0x220
[501099.044490]  [<ffffffffa470c920>] ? rtnl_newlink+0x870/0x870
[501099.044507]  [<ffffffffa472cc61>] ? netlink_rcv_skb+0xa1/0xc0
[501099.044524]  [<ffffffffa47073f4>] ? rtnetlink_rcv+0x24/0x30
[501099.044541]  [<ffffffffa472c634>] ? netlink_unicast+0x184/0x230
[501099.044558]  [<ffffffffa472c9d8>] ? netlink_sendmsg+0x2f8/0x3b0
[501099.044576]  [<ffffffffa46d8880>] ? sock_sendmsg+0x30/0x40
[501099.044592]  [<ffffffffa46d8e03>] ? SYSC_sendto+0xd3/0x150
[501099.044608]  [<ffffffffa425fda1>] ? __do_page_fault+0x2d1/0x510
[501099.044626]  [<ffffffffa47fbd7b>] ? system_call_fast_compare_end+0xc/0x9b

Fixes: 22a5dc0e5e3e ("net: sched: Introduce connmark action")
Signed-off-by: Étienne Noss <etienne.noss@wifirst.fr>
Signed-off-by: Victorien Molle <victorien.molle@wifirst.fr>
Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_connmark.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index ab80629099622c..f9bb43c25697e7 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -113,6 +113,9 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
 	if (ret < 0)
 		return ret;
 
+	if (!tb[TCA_CONNMARK_PARMS])
+		return -EINVAL;
+
 	parm = nla_data(tb[TCA_CONNMARK_PARMS]);
 
 	if (!tcf_hash_check(tn, parm->index, a, bind)) {

From e37791ec1ad785b59022ae211f63a16189bacebf Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Fri, 10 Mar 2017 09:46:15 -0800
Subject: [PATCH 1180/1911] mpls: Send route delete notifications when router
 module is unloaded

When the mpls_router module is unloaded, mpls routes are deleted but
notifications are not sent to userspace leaving userspace caches
out of sync. Add the call to mpls_notify_route in mpls_net_exit as
routes are freed.

Fixes: 0189197f44160 ("mpls: Basic routing support")
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mpls/af_mpls.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 3818686182b210..a1477989ed0b7c 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -2028,6 +2028,7 @@ static void mpls_net_exit(struct net *net)
 	for (index = 0; index < platform_labels; index++) {
 		struct mpls_route *rt = rtnl_dereference(platform_label[index]);
 		RCU_INIT_POINTER(platform_label[index], NULL);
+		mpls_notify_route(net, index, rt, NULL, NULL);
 		mpls_rt_free(rt);
 	}
 	rtnl_unlock();

From b17075d5c1988b83f840d272c795ac17d57ce804 Mon Sep 17 00:00:00 2001
From: Igor Druzhinin <igor.druzhinin@citrix.com>
Date: Fri, 10 Mar 2017 21:36:22 +0000
Subject: [PATCH 1181/1911] xen-netback: fix race condition on XenBus
 disconnect

In some cases during XenBus disconnect event handling and subsequent
queue resource release there may be some TX handlers active on
other processors. Use RCU in order to synchronize with them.

Signed-off-by: Igor Druzhinin <igor.druzhinin@citrix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/xen-netback/interface.c | 26 +++++++++++++++++---------
 drivers/net/xen-netback/netback.c   |  2 +-
 drivers/net/xen-netback/xenbus.c    | 20 ++++++++++----------
 3 files changed, 28 insertions(+), 20 deletions(-)

diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index 829b26cd4549a4..8397f6c9245158 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -165,13 +165,17 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct xenvif *vif = netdev_priv(dev);
 	struct xenvif_queue *queue = NULL;
-	unsigned int num_queues = vif->num_queues;
+	unsigned int num_queues;
 	u16 index;
 	struct xenvif_rx_cb *cb;
 
 	BUG_ON(skb->dev != dev);
 
-	/* Drop the packet if queues are not set up */
+	/* Drop the packet if queues are not set up.
+	 * This handler should be called inside an RCU read section
+	 * so we don't need to enter it here explicitly.
+	 */
+	num_queues = READ_ONCE(vif->num_queues);
 	if (num_queues < 1)
 		goto drop;
 
@@ -222,18 +226,18 @@ static struct net_device_stats *xenvif_get_stats(struct net_device *dev)
 {
 	struct xenvif *vif = netdev_priv(dev);
 	struct xenvif_queue *queue = NULL;
+	unsigned int num_queues;
 	u64 rx_bytes = 0;
 	u64 rx_packets = 0;
 	u64 tx_bytes = 0;
 	u64 tx_packets = 0;
 	unsigned int index;
 
-	spin_lock(&vif->lock);
-	if (vif->queues == NULL)
-		goto out;
+	rcu_read_lock();
+	num_queues = READ_ONCE(vif->num_queues);
 
 	/* Aggregate tx and rx stats from each queue */
-	for (index = 0; index < vif->num_queues; ++index) {
+	for (index = 0; index < num_queues; ++index) {
 		queue = &vif->queues[index];
 		rx_bytes += queue->stats.rx_bytes;
 		rx_packets += queue->stats.rx_packets;
@@ -241,8 +245,7 @@ static struct net_device_stats *xenvif_get_stats(struct net_device *dev)
 		tx_packets += queue->stats.tx_packets;
 	}
 
-out:
-	spin_unlock(&vif->lock);
+	rcu_read_unlock();
 
 	vif->dev->stats.rx_bytes = rx_bytes;
 	vif->dev->stats.rx_packets = rx_packets;
@@ -378,10 +381,13 @@ static void xenvif_get_ethtool_stats(struct net_device *dev,
 				     struct ethtool_stats *stats, u64 * data)
 {
 	struct xenvif *vif = netdev_priv(dev);
-	unsigned int num_queues = vif->num_queues;
+	unsigned int num_queues;
 	int i;
 	unsigned int queue_index;
 
+	rcu_read_lock();
+	num_queues = READ_ONCE(vif->num_queues);
+
 	for (i = 0; i < ARRAY_SIZE(xenvif_stats); i++) {
 		unsigned long accum = 0;
 		for (queue_index = 0; queue_index < num_queues; ++queue_index) {
@@ -390,6 +396,8 @@ static void xenvif_get_ethtool_stats(struct net_device *dev,
 		}
 		data[i] = accum;
 	}
+
+	rcu_read_unlock();
 }
 
 static void xenvif_get_strings(struct net_device *dev, u32 stringset, u8 * data)
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index f9bcf4a665bcae..602d408fa25e98 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -214,7 +214,7 @@ static void xenvif_fatal_tx_err(struct xenvif *vif)
 	netdev_err(vif->dev, "fatal error; disabling device\n");
 	vif->disabled = true;
 	/* Disable the vif from queue 0's kthread */
-	if (vif->queues)
+	if (vif->num_queues)
 		xenvif_kick_thread(&vif->queues[0]);
 }
 
diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
index d2d7cd9145b1c2..a56d3eab35dd65 100644
--- a/drivers/net/xen-netback/xenbus.c
+++ b/drivers/net/xen-netback/xenbus.c
@@ -495,26 +495,26 @@ static void backend_disconnect(struct backend_info *be)
 	struct xenvif *vif = be->vif;
 
 	if (vif) {
+		unsigned int num_queues = vif->num_queues;
 		unsigned int queue_index;
-		struct xenvif_queue *queues;
 
 		xen_unregister_watchers(vif);
 #ifdef CONFIG_DEBUG_FS
 		xenvif_debugfs_delif(vif);
 #endif /* CONFIG_DEBUG_FS */
 		xenvif_disconnect_data(vif);
-		for (queue_index = 0;
-		     queue_index < vif->num_queues;
-		     ++queue_index)
-			xenvif_deinit_queue(&vif->queues[queue_index]);
 
-		spin_lock(&vif->lock);
-		queues = vif->queues;
+		/* At this point some of the handlers may still be active
+		 * so we need to have additional synchronization here.
+		 */
 		vif->num_queues = 0;
-		vif->queues = NULL;
-		spin_unlock(&vif->lock);
+		synchronize_net();
 
-		vfree(queues);
+		for (queue_index = 0; queue_index < num_queues; ++queue_index)
+			xenvif_deinit_queue(&vif->queues[queue_index]);
+
+		vfree(vif->queues);
+		vif->queues = NULL;
 
 		xenvif_disconnect_ctrl(vif);
 	}

From 79099aab38c8f5c746748b066ae74ba984fe2cc8 Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Fri, 10 Mar 2017 14:11:39 -0800
Subject: [PATCH 1182/1911] mpls: Do not decrement alive counter for unregister
 events

Multipath routes can be rendered usesless when a device in one of the
paths is deleted. For example:

$ ip -f mpls ro ls
100
	nexthop as to 200 via inet 172.16.2.2  dev virt12
	nexthop as to 300 via inet 172.16.3.2  dev br0
101
	nexthop as to 201 via inet6 2000:2::2  dev virt12
	nexthop as to 301 via inet6 2000:3::2  dev br0

$ ip li del br0

When br0 is deleted the other hop is not considered in
mpls_select_multipath because of the alive check -- rt_nhn_alive
is 0.

rt_nhn_alive is decremented once in mpls_ifdown when the device is taken
down (NETDEV_DOWN) and again when it is deleted (NETDEV_UNREGISTER). For
a 2 hop route, deleting one device drops the alive count to 0. Since
devices are taken down before unregistering, the decrement on
NETDEV_UNREGISTER is redundant.

Fixes: c89359a42e2a4 ("mpls: support for dead routes")
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mpls/af_mpls.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index a1477989ed0b7c..33211f9a265608 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -1288,7 +1288,8 @@ static void mpls_ifdown(struct net_device *dev, int event)
 				/* fall through */
 			case NETDEV_CHANGE:
 				nh->nh_flags |= RTNH_F_LINKDOWN;
-				ACCESS_ONCE(rt->rt_nhn_alive) = rt->rt_nhn_alive - 1;
+				if (event != NETDEV_UNREGISTER)
+					ACCESS_ONCE(rt->rt_nhn_alive) = rt->rt_nhn_alive - 1;
 				break;
 			}
 			if (event == NETDEV_UNREGISTER)

From 1da8ac7c49fb2879ba95006d8bd1095e6870ea1a Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@fb.com>
Date: Fri, 10 Mar 2017 22:05:55 -0800
Subject: [PATCH 1183/1911] selftests/bpf: fix broken build

Recent merge of 'linux-kselftest-4.11-rc1' tree broke bpf test build.
None of the tests were building and test_verifier.c had tons of compiler errors.
Fix it and add #ifdef CAP_IS_SUPPORTED to support old versions of libcap.
Tested on centos 6.8 and 7

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Tested-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/include/uapi/linux/bpf_perf_event.h   | 18 ++++++++++++++++++
 tools/testing/selftests/bpf/Makefile        |  4 +++-
 tools/testing/selftests/bpf/test_verifier.c |  4 ++++
 3 files changed, 25 insertions(+), 1 deletion(-)
 create mode 100644 tools/include/uapi/linux/bpf_perf_event.h

diff --git a/tools/include/uapi/linux/bpf_perf_event.h b/tools/include/uapi/linux/bpf_perf_event.h
new file mode 100644
index 00000000000000..0674272598205c
--- /dev/null
+++ b/tools/include/uapi/linux/bpf_perf_event.h
@@ -0,0 +1,18 @@
+/* Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#ifndef _UAPI__LINUX_BPF_PERF_EVENT_H__
+#define _UAPI__LINUX_BPF_PERF_EVENT_H__
+
+#include <linux/types.h>
+#include <linux/ptrace.h>
+
+struct bpf_perf_event_data {
+	struct pt_regs regs;
+	__u64 sample_period;
+};
+
+#endif /* _UAPI__LINUX_BPF_PERF_EVENT_H__ */
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 4b498265dae6dc..67531f47781b40 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -1,12 +1,14 @@
 LIBDIR := ../../../lib
 BPFOBJ := $(LIBDIR)/bpf/bpf.o
 
-CFLAGS += -Wall -O2 -lcap -I../../../include/uapi -I$(LIBDIR)
+CFLAGS += -Wall -O2 -lcap -I../../../include/uapi -I$(LIBDIR) $(BPFOBJ)
 
 TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map
 
 TEST_PROGS := test_kmod.sh
 
+all: $(TEST_GEN_PROGS)
+
 .PHONY: all clean force
 
 # force a rebuild of BPFOBJ when its dependencies are updated
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index e1f5b9eea1e874..d1555e4240c0fb 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -8,6 +8,8 @@
  * License as published by the Free Software Foundation.
  */
 
+#include <asm/types.h>
+#include <linux/types.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -4583,10 +4585,12 @@ static bool is_admin(void)
 	cap_flag_value_t sysadmin = CAP_CLEAR;
 	const cap_value_t cap_val = CAP_SYS_ADMIN;
 
+#ifdef CAP_IS_SUPPORTED
 	if (!CAP_IS_SUPPORTED(CAP_SETFCAP)) {
 		perror("cap_get_flag");
 		return false;
 	}
+#endif
 	caps = cap_get_proc();
 	if (!caps) {
 		perror("cap_get_proc");

From 65869a47f3488253f5fd88cc4f14e0a4e2601a55 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sat, 11 Mar 2017 16:55:49 +0100
Subject: [PATCH 1184/1911] bpf: improve read-only handling

Improve bpf_{prog,jit_binary}_{un,}lock_ro() by throwing a
one-time warning in case of an error when the image couldn't
be set read-only, and also mark struct bpf_prog as locked when
bpf_prog_lock_ro() was called.

Reason for the latter is that bpf_prog_unlock_ro() is called from
various places including error paths, and we shouldn't mess with
page attributes when really not needed.

For bpf_jit_binary_unlock_ro() this is not needed as jited flag
implicitly indicates this, thus for archs with ARCH_HAS_SET_MEMORY
we're guaranteed to have a previously locked image. Overall, this
should also help us to identify any further potential issues with
set_memory_*() helpers.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 0c167fdee5f7d1..fbf7b39e810355 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -409,6 +409,7 @@ struct bpf_prog {
 	u16			pages;		/* Number of allocated pages */
 	kmemcheck_bitfield_begin(meta);
 	u16			jited:1,	/* Is our filter JIT'ed? */
+				locked:1,	/* Program image locked? */
 				gpl_compatible:1, /* Is filter GPL compatible? */
 				cb_access:1,	/* Is control block accessed? */
 				dst_needed:1,	/* Do we need dst entry? */
@@ -554,22 +555,29 @@ static inline bool bpf_prog_was_classic(const struct bpf_prog *prog)
 #ifdef CONFIG_ARCH_HAS_SET_MEMORY
 static inline void bpf_prog_lock_ro(struct bpf_prog *fp)
 {
-	set_memory_ro((unsigned long)fp, fp->pages);
+	fp->locked = 1;
+	WARN_ON_ONCE(set_memory_ro((unsigned long)fp, fp->pages));
 }
 
 static inline void bpf_prog_unlock_ro(struct bpf_prog *fp)
 {
-	set_memory_rw((unsigned long)fp, fp->pages);
+	if (fp->locked) {
+		WARN_ON_ONCE(set_memory_rw((unsigned long)fp, fp->pages));
+		/* In case set_memory_rw() fails, we want to be the first
+		 * to crash here instead of some random place later on.
+		 */
+		fp->locked = 0;
+	}
 }
 
 static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr)
 {
-	set_memory_ro((unsigned long)hdr, hdr->pages);
+	WARN_ON_ONCE(set_memory_ro((unsigned long)hdr, hdr->pages));
 }
 
 static inline void bpf_jit_binary_unlock_ro(struct bpf_binary_header *hdr)
 {
-	set_memory_rw((unsigned long)hdr, hdr->pages);
+	WARN_ON_ONCE(set_memory_rw((unsigned long)hdr, hdr->pages));
 }
 #else
 static inline void bpf_prog_lock_ro(struct bpf_prog *fp)

From 80354c29025833acd72ddac1ffa21c6cb50128cd Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Sun, 12 Mar 2017 17:07:44 +0200
Subject: [PATCH 1185/1911] x86/platform/intel-mid: Correct MSI IRQ line for
 watchdog device

The interrupt line used for the watchdog is 12, according to the official
Intel Edison BSP code.

And indeed after fixing it we start getting an interrupt and thus the
watchdog starts working again:

  [  191.699951] Kernel panic - not syncing: Kernel Watchdog

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: David Cohen <david.a.cohen@linux.intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 78a3bb9e408b ("x86: intel-mid: add watchdog platform code for Merrifield")
Link: http://lkml.kernel.org/r/20170312150744.45493-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c
index 86edd1e941eb07..9e304e2ea4f55c 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c
@@ -19,7 +19,7 @@
 #include <asm/intel_scu_ipc.h>
 #include <asm/io_apic.h>
 
-#define TANGIER_EXT_TIMER0_MSI 15
+#define TANGIER_EXT_TIMER0_MSI 12
 
 static struct platform_device wdt_dev = {
 	.name = "intel_mid_wdt",

From 9fa1d7537242bd580ffa99c4725a0407096aad26 Mon Sep 17 00:00:00 2001
From: Tomi Valkeinen <tomi.valkeinen@ti.com>
Date: Tue, 28 Feb 2017 10:11:45 +0200
Subject: [PATCH 1186/1911] drm/omap: fix dmabuf mmap for dma_alloc'ed buffers

omap_gem_dmabuf_mmap() returns an error (with a WARN) when called for a
buffer which is allocated with dma_alloc_*(). This prevents dmabuf mmap
from working on SoCs without DMM, e.g. AM4 and OMAP3.

I could not find any reason for omap_gem_dmabuf_mmap() rejecting such
buffers, and just removing the if() fixes the limitation.

Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
---
 drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c b/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c
index af267c35d813cc..ee5883f59be5a1 100644
--- a/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c
+++ b/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c
@@ -147,9 +147,6 @@ static int omap_gem_dmabuf_mmap(struct dma_buf *buffer,
 	struct drm_gem_object *obj = buffer->priv;
 	int ret = 0;
 
-	if (WARN_ON(!obj->filp))
-		return -EINVAL;
-
 	ret = drm_gem_mmap_obj(obj, omap_gem_mmap_size(obj), vma);
 	if (ret < 0)
 		return ret;

From 337ba7fbf0fbc12242359c4af114878618b90951 Mon Sep 17 00:00:00 2001
From: "Dmitry V. Levin" <ldv@altlinux.org>
Date: Sat, 25 Feb 2017 16:29:50 +0300
Subject: [PATCH 1187/1911] uapi: fix drm/omap_drm.h userspace compilation
 errors

Consistently use types from linux/types.h like in other uapi drm/*_drm.h
header files to fix the following drm/omap_drm.h userspace compilation
errors:

/usr/include/drm/omap_drm.h:36:2: error: unknown type name 'uint64_t'
  uint64_t param;   /* in */
/usr/include/drm/omap_drm.h:37:2: error: unknown type name 'uint64_t'
  uint64_t value;   /* in (set_param), out (get_param) */
/usr/include/drm/omap_drm.h:56:2: error: unknown type name 'uint32_t'
  uint32_t bytes;  /* (for non-tiled formats) */
/usr/include/drm/omap_drm.h:58:3: error: unknown type name 'uint16_t'
   uint16_t width;
/usr/include/drm/omap_drm.h:59:3: error: unknown type name 'uint16_t'
   uint16_t height;
/usr/include/drm/omap_drm.h:65:2: error: unknown type name 'uint32_t'
  uint32_t flags;   /* in */
/usr/include/drm/omap_drm.h:66:2: error: unknown type name 'uint32_t'
  uint32_t handle;  /* out */
/usr/include/drm/omap_drm.h:67:2: error: unknown type name 'uint32_t'
  uint32_t __pad;
/usr/include/drm/omap_drm.h:77:2: error: unknown type name 'uint32_t'
  uint32_t handle;  /* buffer handle (in) */
/usr/include/drm/omap_drm.h:78:2: error: unknown type name 'uint32_t'
  uint32_t op;   /* mask of omap_gem_op (in) */
/usr/include/drm/omap_drm.h:82:2: error: unknown type name 'uint32_t'
  uint32_t handle;  /* buffer handle (in) */
/usr/include/drm/omap_drm.h:83:2: error: unknown type name 'uint32_t'
  uint32_t op;   /* mask of omap_gem_op (in) */
/usr/include/drm/omap_drm.h:88:2: error: unknown type name 'uint32_t'
  uint32_t nregions;
/usr/include/drm/omap_drm.h:89:2: error: unknown type name 'uint32_t'
  uint32_t __pad;
/usr/include/drm/omap_drm.h:93:2: error: unknown type name 'uint32_t'
  uint32_t handle;  /* buffer handle (in) */
/usr/include/drm/omap_drm.h:94:2: error: unknown type name 'uint32_t'
  uint32_t pad;
/usr/include/drm/omap_drm.h:95:2: error: unknown type name 'uint64_t'
  uint64_t offset;  /* mmap offset (out) */
/usr/include/drm/omap_drm.h:102:2: error: unknown type name 'uint32_t'
  uint32_t size;   /* virtual size for mmap'ing (out) */
/usr/include/drm/omap_drm.h:103:2: error: unknown type name 'uint32_t'
  uint32_t __pad;

Fixes: ef6503e89194 ("drm: Kbuild: add omap_drm.h to the installed headers")
Signed-off-by: Dmitry V. Levin <ldv@altlinux.org>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
---
 include/uapi/drm/omap_drm.h | 38 ++++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/include/uapi/drm/omap_drm.h b/include/uapi/drm/omap_drm.h
index 407cb55df6ac17..7fb97863c94577 100644
--- a/include/uapi/drm/omap_drm.h
+++ b/include/uapi/drm/omap_drm.h
@@ -33,8 +33,8 @@ extern "C" {
 #define OMAP_PARAM_CHIPSET_ID	1	/* ie. 0x3430, 0x4430, etc */
 
 struct drm_omap_param {
-	uint64_t param;			/* in */
-	uint64_t value;			/* in (set_param), out (get_param) */
+	__u64 param;			/* in */
+	__u64 value;			/* in (set_param), out (get_param) */
 };
 
 #define OMAP_BO_SCANOUT		0x00000001	/* scanout capable (phys contiguous) */
@@ -53,18 +53,18 @@ struct drm_omap_param {
 #define OMAP_BO_TILED		(OMAP_BO_TILED_8 | OMAP_BO_TILED_16 | OMAP_BO_TILED_32)
 
 union omap_gem_size {
-	uint32_t bytes;		/* (for non-tiled formats) */
+	__u32 bytes;		/* (for non-tiled formats) */
 	struct {
-		uint16_t width;
-		uint16_t height;
+		__u16 width;
+		__u16 height;
 	} tiled;		/* (for tiled formats) */
 };
 
 struct drm_omap_gem_new {
 	union omap_gem_size size;	/* in */
-	uint32_t flags;			/* in */
-	uint32_t handle;		/* out */
-	uint32_t __pad;
+	__u32 flags;			/* in */
+	__u32 handle;			/* out */
+	__u32 __pad;
 };
 
 /* mask of operations: */
@@ -74,33 +74,33 @@ enum omap_gem_op {
 };
 
 struct drm_omap_gem_cpu_prep {
-	uint32_t handle;		/* buffer handle (in) */
-	uint32_t op;			/* mask of omap_gem_op (in) */
+	__u32 handle;			/* buffer handle (in) */
+	__u32 op;			/* mask of omap_gem_op (in) */
 };
 
 struct drm_omap_gem_cpu_fini {
-	uint32_t handle;		/* buffer handle (in) */
-	uint32_t op;			/* mask of omap_gem_op (in) */
+	__u32 handle;			/* buffer handle (in) */
+	__u32 op;			/* mask of omap_gem_op (in) */
 	/* TODO maybe here we pass down info about what regions are touched
 	 * by sw so we can be clever about cache ops?  For now a placeholder,
 	 * set to zero and we just do full buffer flush..
 	 */
-	uint32_t nregions;
-	uint32_t __pad;
+	__u32 nregions;
+	__u32 __pad;
 };
 
 struct drm_omap_gem_info {
-	uint32_t handle;		/* buffer handle (in) */
-	uint32_t pad;
-	uint64_t offset;		/* mmap offset (out) */
+	__u32 handle;			/* buffer handle (in) */
+	__u32 pad;
+	__u64 offset;			/* mmap offset (out) */
 	/* note: in case of tiled buffers, the user virtual size can be
 	 * different from the physical size (ie. how many pages are needed
 	 * to back the object) which is returned in DRM_IOCTL_GEM_OPEN..
 	 * This size here is the one that should be used if you want to
 	 * mmap() the buffer:
 	 */
-	uint32_t size;			/* virtual size for mmap'ing (out) */
-	uint32_t __pad;
+	__u32 size;			/* virtual size for mmap'ing (out) */
+	__u32 __pad;
 };
 
 #define DRM_OMAP_GET_PARAM		0x00

From fd89b23a4632d3cbdee398048497e026edadfb71 Mon Sep 17 00:00:00 2001
From: Liping Zhang <zlpnobody@gmail.com>
Date: Mon, 6 Mar 2017 00:02:52 +0800
Subject: [PATCH 1188/1911] netfilter: nft_set_bitmap: fetch the element key
 based on the set->klen

Currently we just assume the element key as a u32 integer, regardless of
the set key length.

This is incorrect, for example, the tcp port number is only 16 bits.
So when we use the nft_payload expr to get the tcp dport and store
it to dreg, the dport will be stored at 0~15 bits, and 16~31 bits
will be padded with zero.

So the reg->data[dreg] will be looked like as below:
  0          15           31
  +-+-+-+-+-+-+-+-+-+-+-+-+
  | tcp dport |      0    |
  +-+-+-+-+-+-+-+-+-+-+-+-+
But for these big-endian systems, if we treate this register as a u32
integer, the element key will be larger than 65535, so the following
lookup in bitmap set will cause out of bound access.

Another issue is that if we add element with comments in bitmap
set(although the comments will be ignored eventually), the element will
vanish strangely. Because we treate the element key as a u32 integer, so
the comments will become the part of the element key, then the element
key will also be larger than 65535 and out of bound access will happen:
  # nft add element t s { 1 comment test }

Since set->klen is 1 or 2, it's fine to treate the element key as a u8 or
u16 integer.

Fixes: 665153ff5752 ("netfilter: nf_tables: add bitmap set type")
Signed-off-by: Liping Zhang <zlpnobody@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_set_bitmap.c | 27 +++++++++++++++++----------
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c
index 152d226552c174..9b024e22717b80 100644
--- a/net/netfilter/nft_set_bitmap.c
+++ b/net/netfilter/nft_set_bitmap.c
@@ -45,9 +45,17 @@ struct nft_bitmap {
 	u8	bitmap[];
 };
 
-static inline void nft_bitmap_location(u32 key, u32 *idx, u32 *off)
+static inline void nft_bitmap_location(const struct nft_set *set,
+				       const void *key,
+				       u32 *idx, u32 *off)
 {
-	u32 k = (key << 1);
+	u32 k;
+
+	if (set->klen == 2)
+		k = *(u16 *)key;
+	else
+		k = *(u8 *)key;
+	k <<= 1;
 
 	*idx = k / BITS_PER_BYTE;
 	*off = k % BITS_PER_BYTE;
@@ -69,7 +77,7 @@ static bool nft_bitmap_lookup(const struct net *net, const struct nft_set *set,
 	u8 genmask = nft_genmask_cur(net);
 	u32 idx, off;
 
-	nft_bitmap_location(*key, &idx, &off);
+	nft_bitmap_location(set, key, &idx, &off);
 
 	return nft_bitmap_active(priv->bitmap, idx, off, genmask);
 }
@@ -83,7 +91,7 @@ static int nft_bitmap_insert(const struct net *net, const struct nft_set *set,
 	u8 genmask = nft_genmask_next(net);
 	u32 idx, off;
 
-	nft_bitmap_location(nft_set_ext_key(ext)->data[0], &idx, &off);
+	nft_bitmap_location(set, nft_set_ext_key(ext), &idx, &off);
 	if (nft_bitmap_active(priv->bitmap, idx, off, genmask))
 		return -EEXIST;
 
@@ -102,7 +110,7 @@ static void nft_bitmap_remove(const struct net *net,
 	u8 genmask = nft_genmask_next(net);
 	u32 idx, off;
 
-	nft_bitmap_location(nft_set_ext_key(ext)->data[0], &idx, &off);
+	nft_bitmap_location(set, nft_set_ext_key(ext), &idx, &off);
 	/* Enter 00 state. */
 	priv->bitmap[idx] &= ~(genmask << off);
 }
@@ -116,7 +124,7 @@ static void nft_bitmap_activate(const struct net *net,
 	u8 genmask = nft_genmask_next(net);
 	u32 idx, off;
 
-	nft_bitmap_location(nft_set_ext_key(ext)->data[0], &idx, &off);
+	nft_bitmap_location(set, nft_set_ext_key(ext), &idx, &off);
 	/* Enter 11 state. */
 	priv->bitmap[idx] |= (genmask << off);
 }
@@ -128,7 +136,7 @@ static bool nft_bitmap_flush(const struct net *net,
 	u8 genmask = nft_genmask_next(net);
 	u32 idx, off;
 
-	nft_bitmap_location(nft_set_ext_key(ext)->data[0], &idx, &off);
+	nft_bitmap_location(set, nft_set_ext_key(ext), &idx, &off);
 	/* Enter 10 state, similar to deactivation. */
 	priv->bitmap[idx] &= ~(genmask << off);
 
@@ -161,10 +169,9 @@ static void *nft_bitmap_deactivate(const struct net *net,
 	struct nft_bitmap *priv = nft_set_priv(set);
 	u8 genmask = nft_genmask_next(net);
 	struct nft_set_ext *ext;
-	u32 idx, off, key = 0;
+	u32 idx, off;
 
-	memcpy(&key, elem->key.val.data, set->klen);
-	nft_bitmap_location(key, &idx, &off);
+	nft_bitmap_location(set, elem->key.val.data, &idx, &off);
 
 	if (!nft_bitmap_active(priv->bitmap, idx, off, genmask))
 		return NULL;

From 10596608c4d62cb8c1c2b806debcbd32fe657e71 Mon Sep 17 00:00:00 2001
From: Liping Zhang <zlpnobody@gmail.com>
Date: Wed, 8 Mar 2017 22:54:18 +0800
Subject: [PATCH 1189/1911] netfilter: nf_tables: fix mismatch in big-endian
 system

Currently, there are two different methods to store an u16 integer to
the u32 data register. For example:
  u32 *dest = &regs->data[priv->dreg];
  1. *dest = 0; *(u16 *) dest = val_u16;
  2. *dest = val_u16;

For method 1, the u16 value will be stored like this, either in
big-endian or little-endian system:
  0          15           31
  +-+-+-+-+-+-+-+-+-+-+-+-+
  |   Value   |     0     |
  +-+-+-+-+-+-+-+-+-+-+-+-+

For method 2, in little-endian system, the u16 value will be the same
as listed above. But in big-endian system, the u16 value will be stored
like this:
  0          15           31
  +-+-+-+-+-+-+-+-+-+-+-+-+
  |     0     |   Value   |
  +-+-+-+-+-+-+-+-+-+-+-+-+

So later we use "memcmp(&regs->data[priv->sreg], data, 2);" to do
compare in nft_cmp, nft_lookup expr ..., method 2 will get the wrong
result in big-endian system, as 0~15 bits will always be zero.

For the similar reason, when loading an u16 value from the u32 data
register, we should use "*(u16 *) sreg;" instead of "(u16)*sreg;",
the 2nd method will get the wrong value in the big-endian system.

So introduce some wrapper functions to store/load an u8 or u16
integer to/from the u32 data register, and use them in the right
place.

Signed-off-by: Liping Zhang <zlpnobody@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h   | 29 +++++++++++++++++++++
 net/ipv4/netfilter/nft_masq_ipv4.c  |  8 +++---
 net/ipv4/netfilter/nft_redir_ipv4.c |  8 +++---
 net/ipv6/netfilter/nft_masq_ipv6.c  |  8 +++---
 net/ipv6/netfilter/nft_redir_ipv6.c |  8 +++---
 net/netfilter/nft_ct.c              | 18 +++++++------
 net/netfilter/nft_meta.c            | 40 +++++++++++++++--------------
 net/netfilter/nft_nat.c             |  8 +++---
 8 files changed, 80 insertions(+), 47 deletions(-)

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 2aa8a9d80fbe82..70c5ca0c60b187 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -103,6 +103,35 @@ struct nft_regs {
 	};
 };
 
+/* Store/load an u16 or u8 integer to/from the u32 data register.
+ *
+ * Note, when using concatenations, register allocation happens at 32-bit
+ * level. So for store instruction, pad the rest part with zero to avoid
+ * garbage values.
+ */
+
+static inline void nft_reg_store16(u32 *dreg, u16 val)
+{
+	*dreg = 0;
+	*(u16 *)dreg = val;
+}
+
+static inline void nft_reg_store8(u32 *dreg, u8 val)
+{
+	*dreg = 0;
+	*(u8 *)dreg = val;
+}
+
+static inline u16 nft_reg_load16(u32 *sreg)
+{
+	return *(u16 *)sreg;
+}
+
+static inline u8 nft_reg_load8(u32 *sreg)
+{
+	return *(u8 *)sreg;
+}
+
 static inline void nft_data_copy(u32 *dst, const struct nft_data *src,
 				 unsigned int len)
 {
diff --git a/net/ipv4/netfilter/nft_masq_ipv4.c b/net/ipv4/netfilter/nft_masq_ipv4.c
index a0ea8aad1bf150..f1867727711930 100644
--- a/net/ipv4/netfilter/nft_masq_ipv4.c
+++ b/net/ipv4/netfilter/nft_masq_ipv4.c
@@ -26,10 +26,10 @@ static void nft_masq_ipv4_eval(const struct nft_expr *expr,
 	memset(&range, 0, sizeof(range));
 	range.flags = priv->flags;
 	if (priv->sreg_proto_min) {
-		range.min_proto.all =
-			*(__be16 *)&regs->data[priv->sreg_proto_min];
-		range.max_proto.all =
-			*(__be16 *)&regs->data[priv->sreg_proto_max];
+		range.min_proto.all = (__force __be16)nft_reg_load16(
+			&regs->data[priv->sreg_proto_min]);
+		range.max_proto.all = (__force __be16)nft_reg_load16(
+			&regs->data[priv->sreg_proto_max]);
 	}
 	regs->verdict.code = nf_nat_masquerade_ipv4(pkt->skb, nft_hook(pkt),
 						    &range, nft_out(pkt));
diff --git a/net/ipv4/netfilter/nft_redir_ipv4.c b/net/ipv4/netfilter/nft_redir_ipv4.c
index 1650ed23c15dd0..5120be1d31185d 100644
--- a/net/ipv4/netfilter/nft_redir_ipv4.c
+++ b/net/ipv4/netfilter/nft_redir_ipv4.c
@@ -26,10 +26,10 @@ static void nft_redir_ipv4_eval(const struct nft_expr *expr,
 
 	memset(&mr, 0, sizeof(mr));
 	if (priv->sreg_proto_min) {
-		mr.range[0].min.all =
-			*(__be16 *)&regs->data[priv->sreg_proto_min];
-		mr.range[0].max.all =
-			*(__be16 *)&regs->data[priv->sreg_proto_max];
+		mr.range[0].min.all = (__force __be16)nft_reg_load16(
+			&regs->data[priv->sreg_proto_min]);
+		mr.range[0].max.all = (__force __be16)nft_reg_load16(
+			&regs->data[priv->sreg_proto_max]);
 		mr.range[0].flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
 	}
 
diff --git a/net/ipv6/netfilter/nft_masq_ipv6.c b/net/ipv6/netfilter/nft_masq_ipv6.c
index 6c5b5b1830a74f..4146536e9c1517 100644
--- a/net/ipv6/netfilter/nft_masq_ipv6.c
+++ b/net/ipv6/netfilter/nft_masq_ipv6.c
@@ -27,10 +27,10 @@ static void nft_masq_ipv6_eval(const struct nft_expr *expr,
 	memset(&range, 0, sizeof(range));
 	range.flags = priv->flags;
 	if (priv->sreg_proto_min) {
-		range.min_proto.all =
-			*(__be16 *)&regs->data[priv->sreg_proto_min];
-		range.max_proto.all =
-			*(__be16 *)&regs->data[priv->sreg_proto_max];
+		range.min_proto.all = (__force __be16)nft_reg_load16(
+			&regs->data[priv->sreg_proto_min]);
+		range.max_proto.all = (__force __be16)nft_reg_load16(
+			&regs->data[priv->sreg_proto_max]);
 	}
 	regs->verdict.code = nf_nat_masquerade_ipv6(pkt->skb, &range,
 						    nft_out(pkt));
diff --git a/net/ipv6/netfilter/nft_redir_ipv6.c b/net/ipv6/netfilter/nft_redir_ipv6.c
index f5ac080fc0849b..a27e424f690d69 100644
--- a/net/ipv6/netfilter/nft_redir_ipv6.c
+++ b/net/ipv6/netfilter/nft_redir_ipv6.c
@@ -26,10 +26,10 @@ static void nft_redir_ipv6_eval(const struct nft_expr *expr,
 
 	memset(&range, 0, sizeof(range));
 	if (priv->sreg_proto_min) {
-		range.min_proto.all =
-			*(__be16 *)&regs->data[priv->sreg_proto_min],
-		range.max_proto.all =
-			*(__be16 *)&regs->data[priv->sreg_proto_max],
+		range.min_proto.all = (__force __be16)nft_reg_load16(
+			&regs->data[priv->sreg_proto_min]);
+		range.max_proto.all = (__force __be16)nft_reg_load16(
+			&regs->data[priv->sreg_proto_max]);
 		range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
 	}
 
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index bf548a7a71ec9b..91585b5e53070d 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -83,7 +83,7 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
 
 	switch (priv->key) {
 	case NFT_CT_DIRECTION:
-		*dest = CTINFO2DIR(ctinfo);
+		nft_reg_store8(dest, CTINFO2DIR(ctinfo));
 		return;
 	case NFT_CT_STATUS:
 		*dest = ct->status;
@@ -151,20 +151,22 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
 		return;
 	}
 	case NFT_CT_L3PROTOCOL:
-		*dest = nf_ct_l3num(ct);
+		nft_reg_store8(dest, nf_ct_l3num(ct));
 		return;
 	case NFT_CT_PROTOCOL:
-		*dest = nf_ct_protonum(ct);
+		nft_reg_store8(dest, nf_ct_protonum(ct));
 		return;
 #ifdef CONFIG_NF_CONNTRACK_ZONES
 	case NFT_CT_ZONE: {
 		const struct nf_conntrack_zone *zone = nf_ct_zone(ct);
+		u16 zoneid;
 
 		if (priv->dir < IP_CT_DIR_MAX)
-			*dest = nf_ct_zone_id(zone, priv->dir);
+			zoneid = nf_ct_zone_id(zone, priv->dir);
 		else
-			*dest = zone->id;
+			zoneid = zone->id;
 
+		nft_reg_store16(dest, zoneid);
 		return;
 	}
 #endif
@@ -183,10 +185,10 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
 		       nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16);
 		return;
 	case NFT_CT_PROTO_SRC:
-		*dest = (__force __u16)tuple->src.u.all;
+		nft_reg_store16(dest, (__force u16)tuple->src.u.all);
 		return;
 	case NFT_CT_PROTO_DST:
-		*dest = (__force __u16)tuple->dst.u.all;
+		nft_reg_store16(dest, (__force u16)tuple->dst.u.all);
 		return;
 	default:
 		break;
@@ -205,7 +207,7 @@ static void nft_ct_set_zone_eval(const struct nft_expr *expr,
 	const struct nft_ct *priv = nft_expr_priv(expr);
 	struct sk_buff *skb = pkt->skb;
 	enum ip_conntrack_info ctinfo;
-	u16 value = regs->data[priv->sreg];
+	u16 value = nft_reg_load16(&regs->data[priv->sreg]);
 	struct nf_conn *ct;
 
 	ct = nf_ct_get(skb, &ctinfo);
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index e1f5ca9b423b5f..7b60e01f38ff9f 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -45,16 +45,15 @@ void nft_meta_get_eval(const struct nft_expr *expr,
 		*dest = skb->len;
 		break;
 	case NFT_META_PROTOCOL:
-		*dest = 0;
-		*(__be16 *)dest = skb->protocol;
+		nft_reg_store16(dest, (__force u16)skb->protocol);
 		break;
 	case NFT_META_NFPROTO:
-		*dest = nft_pf(pkt);
+		nft_reg_store8(dest, nft_pf(pkt));
 		break;
 	case NFT_META_L4PROTO:
 		if (!pkt->tprot_set)
 			goto err;
-		*dest = pkt->tprot;
+		nft_reg_store8(dest, pkt->tprot);
 		break;
 	case NFT_META_PRIORITY:
 		*dest = skb->priority;
@@ -85,14 +84,12 @@ void nft_meta_get_eval(const struct nft_expr *expr,
 	case NFT_META_IIFTYPE:
 		if (in == NULL)
 			goto err;
-		*dest = 0;
-		*(u16 *)dest = in->type;
+		nft_reg_store16(dest, in->type);
 		break;
 	case NFT_META_OIFTYPE:
 		if (out == NULL)
 			goto err;
-		*dest = 0;
-		*(u16 *)dest = out->type;
+		nft_reg_store16(dest, out->type);
 		break;
 	case NFT_META_SKUID:
 		sk = skb_to_full_sk(skb);
@@ -142,19 +139,19 @@ void nft_meta_get_eval(const struct nft_expr *expr,
 #endif
 	case NFT_META_PKTTYPE:
 		if (skb->pkt_type != PACKET_LOOPBACK) {
-			*dest = skb->pkt_type;
+			nft_reg_store8(dest, skb->pkt_type);
 			break;
 		}
 
 		switch (nft_pf(pkt)) {
 		case NFPROTO_IPV4:
 			if (ipv4_is_multicast(ip_hdr(skb)->daddr))
-				*dest = PACKET_MULTICAST;
+				nft_reg_store8(dest, PACKET_MULTICAST);
 			else
-				*dest = PACKET_BROADCAST;
+				nft_reg_store8(dest, PACKET_BROADCAST);
 			break;
 		case NFPROTO_IPV6:
-			*dest = PACKET_MULTICAST;
+			nft_reg_store8(dest, PACKET_MULTICAST);
 			break;
 		case NFPROTO_NETDEV:
 			switch (skb->protocol) {
@@ -168,14 +165,14 @@ void nft_meta_get_eval(const struct nft_expr *expr,
 					goto err;
 
 				if (ipv4_is_multicast(iph->daddr))
-					*dest = PACKET_MULTICAST;
+					nft_reg_store8(dest, PACKET_MULTICAST);
 				else
-					*dest = PACKET_BROADCAST;
+					nft_reg_store8(dest, PACKET_BROADCAST);
 
 				break;
 			}
 			case htons(ETH_P_IPV6):
-				*dest = PACKET_MULTICAST;
+				nft_reg_store8(dest, PACKET_MULTICAST);
 				break;
 			default:
 				WARN_ON_ONCE(1);
@@ -230,7 +227,9 @@ void nft_meta_set_eval(const struct nft_expr *expr,
 {
 	const struct nft_meta *meta = nft_expr_priv(expr);
 	struct sk_buff *skb = pkt->skb;
-	u32 value = regs->data[meta->sreg];
+	u32 *sreg = &regs->data[meta->sreg];
+	u32 value = *sreg;
+	u8 pkt_type;
 
 	switch (meta->key) {
 	case NFT_META_MARK:
@@ -240,9 +239,12 @@ void nft_meta_set_eval(const struct nft_expr *expr,
 		skb->priority = value;
 		break;
 	case NFT_META_PKTTYPE:
-		if (skb->pkt_type != value &&
-		    skb_pkt_type_ok(value) && skb_pkt_type_ok(skb->pkt_type))
-			skb->pkt_type = value;
+		pkt_type = nft_reg_load8(sreg);
+
+		if (skb->pkt_type != pkt_type &&
+		    skb_pkt_type_ok(pkt_type) &&
+		    skb_pkt_type_ok(skb->pkt_type))
+			skb->pkt_type = pkt_type;
 		break;
 	case NFT_META_NFTRACE:
 		skb->nf_trace = !!value;
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index 19a7bf3236f968..439e0bd152a004 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -65,10 +65,10 @@ static void nft_nat_eval(const struct nft_expr *expr,
 	}
 
 	if (priv->sreg_proto_min) {
-		range.min_proto.all =
-			*(__be16 *)&regs->data[priv->sreg_proto_min];
-		range.max_proto.all =
-			*(__be16 *)&regs->data[priv->sreg_proto_max];
+		range.min_proto.all = (__force __be16)nft_reg_load16(
+			&regs->data[priv->sreg_proto_min]);
+		range.max_proto.all = (__force __be16)nft_reg_load16(
+			&regs->data[priv->sreg_proto_max]);
 		range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
 	}
 

From 4ca60d08cbe65f501baad64af50fceba79c19fbb Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 9 Mar 2017 23:22:30 +0100
Subject: [PATCH 1190/1911] netfilter: bridge: honor frag_max_size when
 refragmenting

consider a bridge with mtu 9000, but end host sending smaller
packets to another host with mtu < 9000.

In this case, after reassembly, bridge+defrag would refragment,
and then attempt to send the reassembled packet as long as it
was below 9k.

Instead we have to cap by the largest fragment size seen.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/bridge/br_netfilter_hooks.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 95087e6e825836..3c5185021c1c0d 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -721,18 +721,20 @@ static unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb)
 
 static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
-	struct nf_bridge_info *nf_bridge;
-	unsigned int mtu_reserved;
+	struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
+	unsigned int mtu, mtu_reserved;
 
 	mtu_reserved = nf_bridge_mtu_reduction(skb);
+	mtu = skb->dev->mtu;
+
+	if (nf_bridge->frag_max_size && nf_bridge->frag_max_size < mtu)
+		mtu = nf_bridge->frag_max_size;
 
-	if (skb_is_gso(skb) || skb->len + mtu_reserved <= skb->dev->mtu) {
+	if (skb_is_gso(skb) || skb->len + mtu_reserved <= mtu) {
 		nf_bridge_info_free(skb);
 		return br_dev_queue_push_xmit(net, sk, skb);
 	}
 
-	nf_bridge = nf_bridge_info_get(skb);
-
 	/* This is wrong! We should preserve the original fragment
 	 * boundaries by preserving frag_list rather than refragmenting.
 	 */

From 170a1fb9c01bc40b7e8fd57a32ac9a0e131ec5b6 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Sat, 11 Mar 2017 00:25:26 -0500
Subject: [PATCH 1191/1911] netfilter: Force fake conntrack entry to be at
 least 8 bytes aligned

Since the nfct and nfctinfo have been combined, the nf_conn structure
must be at least 8 bytes aligned, as the 3 LSB bits are used for the
nfctinfo. But there's a fake nf_conn structure to denote untracked
connections, which is created by a PER_CPU construct. This does not
guarantee that it will be 8 bytes aligned and can break the logic in
determining the correct nfctinfo.

I triggered this on a 32bit machine with the following error:

BUG: unable to handle kernel NULL pointer dereference at 00000af4
IP: nf_ct_deliver_cached_events+0x1b/0xfb
*pdpt = 0000000031962001 *pde = 0000000000000000

Oops: 0000 [#1] SMP
[Modules linked in: ip6t_REJECT nf_reject_ipv6 nf_conntrack_ipv6 nf_defrag_ipv6 ip6table_filter ip6_tables ipv6 crc_ccitt ppdev r8169 parport_pc parport
  OK  ]
CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.10.0-test+ #75
Hardware name: MSI MS-7823/CSM-H87M-G43 (MS-7823), BIOS V1.6 02/22/2014
task: c126ec00 task.stack: c1258000
EIP: nf_ct_deliver_cached_events+0x1b/0xfb
EFLAGS: 00010202 CPU: 0
EAX: 0021cd01 EBX: 00000000 ECX: 27b0c767 EDX: 32bcb17a
ESI: f34135c0 EDI: f34135c0 EBP: f2debd60 ESP: f2debd3c
 DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068
CR0: 80050033 CR2: 00000af4 CR3: 309a0440 CR4: 001406f0
Call Trace:
 <SOFTIRQ>
 ? ipv6_skip_exthdr+0xac/0xcb
 ipv6_confirm+0x10c/0x119 [nf_conntrack_ipv6]
 nf_hook_slow+0x22/0xc7
 nf_hook+0x9a/0xad [ipv6]
 ? ip6t_do_table+0x356/0x379 [ip6_tables]
 ? ip6_fragment+0x9e9/0x9e9 [ipv6]
 ip6_output+0xee/0x107 [ipv6]
 ? ip6_fragment+0x9e9/0x9e9 [ipv6]
 dst_output+0x36/0x4d [ipv6]
 NF_HOOK.constprop.37+0xb2/0xba [ipv6]
 ? icmp6_dst_alloc+0x2c/0xfd [ipv6]
 ? local_bh_enable+0x14/0x14 [ipv6]
 mld_sendpack+0x1c5/0x281 [ipv6]
 ? mark_held_locks+0x40/0x5c
 mld_ifc_timer_expire+0x1f6/0x21e [ipv6]
 call_timer_fn+0x135/0x283
 ? detach_if_pending+0x55/0x55
 ? mld_dad_timer_expire+0x3e/0x3e [ipv6]
 __run_timers+0x111/0x14b
 ? mld_dad_timer_expire+0x3e/0x3e [ipv6]
 run_timer_softirq+0x1c/0x36
 __do_softirq+0x185/0x37c
 ? test_ti_thread_flag.constprop.19+0xd/0xd
 do_softirq_own_stack+0x22/0x28
 </SOFTIRQ>
 irq_exit+0x5a/0xa4
 smp_apic_timer_interrupt+0x2a/0x34
 apic_timer_interrupt+0x37/0x3c

By using DEFINE/DECLARE_PER_CPU_ALIGNED we can enforce at least 8 byte
alignment as all cache line sizes are at least 8 bytes or more.

Fixes: a9e419dc7be6 ("netfilter: merge ctinfo into nfct pointer storage area")
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack.h | 2 +-
 net/netfilter/nf_conntrack_core.c    | 6 +++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index f540f9ad2af4f6..19605878da4739 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -244,7 +244,7 @@ extern s32 (*nf_ct_nat_offset)(const struct nf_conn *ct,
 			       u32 seq);
 
 /* Fake conntrack entry for untracked connections */
-DECLARE_PER_CPU(struct nf_conn, nf_conntrack_untracked);
+DECLARE_PER_CPU_ALIGNED(struct nf_conn, nf_conntrack_untracked);
 static inline struct nf_conn *nf_ct_untracked_get(void)
 {
 	return raw_cpu_ptr(&nf_conntrack_untracked);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 071b97fcbefb08..ffb78e5f7b7091 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -181,7 +181,11 @@ EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
 unsigned int nf_conntrack_max __read_mostly;
 seqcount_t nf_conntrack_generation __read_mostly;
 
-DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked);
+/* nf_conn must be 8 bytes aligned, as the 3 LSB bits are used
+ * for the nfctinfo. We cheat by (ab)using the PER CPU cache line
+ * alignment to enforce this.
+ */
+DEFINE_PER_CPU_ALIGNED(struct nf_conn, nf_conntrack_untracked);
 EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked);
 
 static unsigned int nf_conntrack_hash_rnd __read_mostly;

From e920dde5160887d07b738f5a7f593b1fa9b1e32e Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 10 Mar 2017 18:32:31 +0100
Subject: [PATCH 1192/1911] netfilter: nft_set_bitmap: keep a list of dummy
 elements

Element comments may come without any prior set flag, so we have to keep
a list of dummy struct nft_set_ext to keep this information around. This
is only useful for set dumps to userspace. From the packet path, this
set type relies on the bitmap representation. This patch simplifies the
logic since we don't need to allocate the dummy nft_set_ext structure
anymore on the fly at the cost of increasing memory consumption because
of the list of dummy struct nft_set_ext.

Fixes: 665153ff5752 ("netfilter: nf_tables: add bitmap set type")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_set_bitmap.c | 146 +++++++++++++++------------------
 1 file changed, 66 insertions(+), 80 deletions(-)

diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c
index 9b024e22717b80..8ebbc2940f4c59 100644
--- a/net/netfilter/nft_set_bitmap.c
+++ b/net/netfilter/nft_set_bitmap.c
@@ -15,6 +15,11 @@
 #include <linux/netfilter/nf_tables.h>
 #include <net/netfilter/nf_tables.h>
 
+struct nft_bitmap_elem {
+	struct list_head	head;
+	struct nft_set_ext	ext;
+};
+
 /* This bitmap uses two bits to represent one element. These two bits determine
  * the element state in the current and the future generation.
  *
@@ -41,8 +46,9 @@
  *      restore its previous state.
  */
 struct nft_bitmap {
-	u16	bitmap_size;
-	u8	bitmap[];
+	struct	list_head	list;
+	u16			bitmap_size;
+	u8			bitmap[];
 };
 
 static inline void nft_bitmap_location(const struct nft_set *set,
@@ -82,21 +88,43 @@ static bool nft_bitmap_lookup(const struct net *net, const struct nft_set *set,
 	return nft_bitmap_active(priv->bitmap, idx, off, genmask);
 }
 
+static struct nft_bitmap_elem *
+nft_bitmap_elem_find(const struct nft_set *set, struct nft_bitmap_elem *this,
+		     u8 genmask)
+{
+	const struct nft_bitmap *priv = nft_set_priv(set);
+	struct nft_bitmap_elem *be;
+
+	list_for_each_entry_rcu(be, &priv->list, head) {
+		if (memcmp(nft_set_ext_key(&be->ext),
+			   nft_set_ext_key(&this->ext), set->klen) ||
+		    !nft_set_elem_active(&be->ext, genmask))
+			continue;
+
+		return be;
+	}
+	return NULL;
+}
+
 static int nft_bitmap_insert(const struct net *net, const struct nft_set *set,
 			     const struct nft_set_elem *elem,
-			     struct nft_set_ext **_ext)
+			     struct nft_set_ext **ext)
 {
 	struct nft_bitmap *priv = nft_set_priv(set);
-	struct nft_set_ext *ext = elem->priv;
+	struct nft_bitmap_elem *new = elem->priv, *be;
 	u8 genmask = nft_genmask_next(net);
 	u32 idx, off;
 
-	nft_bitmap_location(set, nft_set_ext_key(ext), &idx, &off);
-	if (nft_bitmap_active(priv->bitmap, idx, off, genmask))
+	be = nft_bitmap_elem_find(set, new, genmask);
+	if (be) {
+		*ext = &be->ext;
 		return -EEXIST;
+	}
 
+	nft_bitmap_location(set, nft_set_ext_key(&new->ext), &idx, &off);
 	/* Enter 01 state. */
 	priv->bitmap[idx] |= (genmask << off);
+	list_add_tail_rcu(&new->head, &priv->list);
 
 	return 0;
 }
@@ -106,13 +134,14 @@ static void nft_bitmap_remove(const struct net *net,
 			      const struct nft_set_elem *elem)
 {
 	struct nft_bitmap *priv = nft_set_priv(set);
-	struct nft_set_ext *ext = elem->priv;
+	struct nft_bitmap_elem *be = elem->priv;
 	u8 genmask = nft_genmask_next(net);
 	u32 idx, off;
 
-	nft_bitmap_location(set, nft_set_ext_key(ext), &idx, &off);
+	nft_bitmap_location(set, nft_set_ext_key(&be->ext), &idx, &off);
 	/* Enter 00 state. */
 	priv->bitmap[idx] &= ~(genmask << off);
+	list_del_rcu(&be->head);
 }
 
 static void nft_bitmap_activate(const struct net *net,
@@ -120,73 +149,52 @@ static void nft_bitmap_activate(const struct net *net,
 				const struct nft_set_elem *elem)
 {
 	struct nft_bitmap *priv = nft_set_priv(set);
-	struct nft_set_ext *ext = elem->priv;
+	struct nft_bitmap_elem *be = elem->priv;
 	u8 genmask = nft_genmask_next(net);
 	u32 idx, off;
 
-	nft_bitmap_location(set, nft_set_ext_key(ext), &idx, &off);
+	nft_bitmap_location(set, nft_set_ext_key(&be->ext), &idx, &off);
 	/* Enter 11 state. */
 	priv->bitmap[idx] |= (genmask << off);
+	nft_set_elem_change_active(net, set, &be->ext);
 }
 
 static bool nft_bitmap_flush(const struct net *net,
-			     const struct nft_set *set, void *ext)
+			     const struct nft_set *set, void *_be)
 {
 	struct nft_bitmap *priv = nft_set_priv(set);
 	u8 genmask = nft_genmask_next(net);
+	struct nft_bitmap_elem *be = _be;
 	u32 idx, off;
 
-	nft_bitmap_location(set, nft_set_ext_key(ext), &idx, &off);
+	nft_bitmap_location(set, nft_set_ext_key(&be->ext), &idx, &off);
 	/* Enter 10 state, similar to deactivation. */
 	priv->bitmap[idx] &= ~(genmask << off);
+	nft_set_elem_change_active(net, set, &be->ext);
 
 	return true;
 }
 
-static struct nft_set_ext *nft_bitmap_ext_alloc(const struct nft_set *set,
-						const struct nft_set_elem *elem)
-{
-	struct nft_set_ext_tmpl tmpl;
-	struct nft_set_ext *ext;
-
-	nft_set_ext_prepare(&tmpl);
-	nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen);
-
-	ext = kzalloc(tmpl.len, GFP_KERNEL);
-	if (!ext)
-		return NULL;
-
-	nft_set_ext_init(ext, &tmpl);
-	memcpy(nft_set_ext_key(ext), elem->key.val.data, set->klen);
-
-	return ext;
-}
-
 static void *nft_bitmap_deactivate(const struct net *net,
 				   const struct nft_set *set,
 				   const struct nft_set_elem *elem)
 {
 	struct nft_bitmap *priv = nft_set_priv(set);
+	struct nft_bitmap_elem *this = elem->priv, *be;
 	u8 genmask = nft_genmask_next(net);
-	struct nft_set_ext *ext;
 	u32 idx, off;
 
 	nft_bitmap_location(set, elem->key.val.data, &idx, &off);
 
-	if (!nft_bitmap_active(priv->bitmap, idx, off, genmask))
-		return NULL;
-
-	/* We have no real set extension since this is a bitmap, allocate this
-	 * dummy object that is released from the commit/abort path.
-	 */
-	ext = nft_bitmap_ext_alloc(set, elem);
-	if (!ext)
+	be = nft_bitmap_elem_find(set, this, genmask);
+	if (!be)
 		return NULL;
 
 	/* Enter 10 state. */
 	priv->bitmap[idx] &= ~(genmask << off);
+	nft_set_elem_change_active(net, set, &be->ext);
 
-	return ext;
+	return be;
 }
 
 static void nft_bitmap_walk(const struct nft_ctx *ctx,
@@ -194,47 +202,23 @@ static void nft_bitmap_walk(const struct nft_ctx *ctx,
 			    struct nft_set_iter *iter)
 {
 	const struct nft_bitmap *priv = nft_set_priv(set);
-	struct nft_set_ext_tmpl tmpl;
+	struct nft_bitmap_elem *be;
 	struct nft_set_elem elem;
-	struct nft_set_ext *ext;
-	int idx, off;
-	u16 key;
-
-	nft_set_ext_prepare(&tmpl);
-	nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen);
-
-	for (idx = 0; idx < priv->bitmap_size; idx++) {
-		for (off = 0; off < BITS_PER_BYTE; off += 2) {
-			if (iter->count < iter->skip)
-				goto cont;
-
-			if (!nft_bitmap_active(priv->bitmap, idx, off,
-					       iter->genmask))
-				goto cont;
-
-			ext = kzalloc(tmpl.len, GFP_KERNEL);
-			if (!ext) {
-				iter->err = -ENOMEM;
-				return;
-			}
-			nft_set_ext_init(ext, &tmpl);
-			key = ((idx * BITS_PER_BYTE) + off) >> 1;
-			memcpy(nft_set_ext_key(ext), &key, set->klen);
-
-			elem.priv = ext;
-			iter->err = iter->fn(ctx, set, iter, &elem);
-
-			/* On set flush, this dummy extension object is released
-			 * from the commit/abort path.
-			 */
-			if (!iter->flush)
-				kfree(ext);
-
-			if (iter->err < 0)
-				return;
+
+	list_for_each_entry_rcu(be, &priv->list, head) {
+		if (iter->count < iter->skip)
+			goto cont;
+		if (!nft_set_elem_active(&be->ext, iter->genmask))
+			goto cont;
+
+		elem.priv = be;
+
+		iter->err = iter->fn(ctx, set, iter, &elem);
+
+		if (iter->err < 0)
+			return;
 cont:
-			iter->count++;
-		}
+		iter->count++;
 	}
 }
 
@@ -265,6 +249,7 @@ static int nft_bitmap_init(const struct nft_set *set,
 {
 	struct nft_bitmap *priv = nft_set_priv(set);
 
+	INIT_LIST_HEAD(&priv->list);
 	priv->bitmap_size = nft_bitmap_size(set->klen);
 
 	return 0;
@@ -290,6 +275,7 @@ static bool nft_bitmap_estimate(const struct nft_set_desc *desc, u32 features,
 
 static struct nft_set_ops nft_bitmap_ops __read_mostly = {
 	.privsize	= nft_bitmap_privsize,
+	.elemsize	= offsetof(struct nft_bitmap_elem, ext),
 	.estimate	= nft_bitmap_estimate,
 	.init		= nft_bitmap_init,
 	.destroy	= nft_bitmap_destroy,

From a1c2ff53726907aff5feb37e4cfd45c1ff626431 Mon Sep 17 00:00:00 2001
From: Hiroyuki Yokoyama <hiroyuki.yokoyama.vx@renesas.com>
Date: Wed, 1 Mar 2017 03:51:00 +0000
Subject: [PATCH 1193/1911] ASoC: rsnd: fix sound route path when using
 SRC6/SRC9

This patch fixes the problem that the missing value of the route path
setting table and incorrect values are set in the CMD_ROUTE_SELECT
register.

Signed-off-by: Hiroyuki Yokoyama <hiroyuki.yokoyama.vx@renesas.com>
[Kuninori: shared data on MIX and non-MIX case]
Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/sh/rcar/cmd.c | 36 ++++++++++++++++++++----------------
 1 file changed, 20 insertions(+), 16 deletions(-)

diff --git a/sound/soc/sh/rcar/cmd.c b/sound/soc/sh/rcar/cmd.c
index abb5eaac854a9b..7d92a24b7cfa55 100644
--- a/sound/soc/sh/rcar/cmd.c
+++ b/sound/soc/sh/rcar/cmd.c
@@ -31,23 +31,24 @@ static int rsnd_cmd_init(struct rsnd_mod *mod,
 	struct rsnd_mod *mix = rsnd_io_to_mod_mix(io);
 	struct device *dev = rsnd_priv_to_dev(priv);
 	u32 data;
+	u32 path[] = {
+		[1] = 1 << 0,
+		[5] = 1 << 8,
+		[6] = 1 << 12,
+		[9] = 1 << 15,
+	};
 
 	if (!mix && !dvc)
 		return 0;
 
+	if (ARRAY_SIZE(path) < rsnd_mod_id(mod) + 1)
+		return -ENXIO;
+
 	if (mix) {
 		struct rsnd_dai *rdai;
 		struct rsnd_mod *src;
 		struct rsnd_dai_stream *tio;
 		int i;
-		u32 path[] = {
-			[0] = 0,
-			[1] = 1 << 0,
-			[2] = 0,
-			[3] = 0,
-			[4] = 0,
-			[5] = 1 << 8
-		};
 
 		/*
 		 * it is assuming that integrater is well understanding about
@@ -70,16 +71,19 @@ static int rsnd_cmd_init(struct rsnd_mod *mod,
 	} else {
 		struct rsnd_mod *src = rsnd_io_to_mod_src(io);
 
-		u32 path[] = {
-			[0] = 0x30000,
-			[1] = 0x30001,
-			[2] = 0x40000,
-			[3] = 0x10000,
-			[4] = 0x20000,
-			[5] = 0x40100
+		u8 cmd_case[] = {
+			[0] = 0x3,
+			[1] = 0x3,
+			[2] = 0x4,
+			[3] = 0x1,
+			[4] = 0x2,
+			[5] = 0x4,
+			[6] = 0x1,
+			[9] = 0x2,
 		};
 
-		data = path[rsnd_mod_id(src)];
+		data = path[rsnd_mod_id(src)] |
+			cmd_case[rsnd_mod_id(src)] << 16;
 	}
 
 	dev_dbg(dev, "ctu/mix path = 0x%08x", data);

From 0067d4b020ea07a58540acb2c5fcd3364bf326e0 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Mon, 13 Mar 2017 16:10:11 +0200
Subject: [PATCH 1194/1911] blk-mq: Fix tagset reinit in the presence of cpu
 hot-unplug

In case cpu was unplugged, we need to make sure not to assume
that the tags for that cpu are still allocated. so check
for null tags when reinitializing a tagset.

Reported-by: Yi Zhang <yizhan@redhat.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq-tag.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index e48bc2c72615de..9d97bfc4d4657b 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -295,6 +295,9 @@ int blk_mq_reinit_tagset(struct blk_mq_tag_set *set)
 	for (i = 0; i < set->nr_hw_queues; i++) {
 		struct blk_mq_tags *tags = set->tags[i];
 
+		if (!tags)
+			continue;
+
 		for (j = 0; j < tags->nr_tags; j++) {
 			if (!tags->static_rqs[j])
 				continue;

From 4a3a485b1ed0e109718cc8c9d094fa0f552de9b2 Mon Sep 17 00:00:00 2001
From: Tahsin Erdogan <tahsin@google.com>
Date: Fri, 10 Mar 2017 12:09:49 -0800
Subject: [PATCH 1195/1911] writeback: fix memory leak in wb_queue_work()

When WB_registered flag is not set, wb_queue_work() skips queuing the
work, but does not perform the necessary clean up. In particular, if
work->auto_free is true, it should free the memory.

The leak condition can be reprouced by following these steps:

   mount /dev/sdb /mnt/sdb
   /* In qemu console: device_del sdb */
   umount /dev/sdb

Above will result in a wb_queue_work() call on an unregistered wb and
thus leak memory.

Reported-by: John Sperbeck <jsperbeck@google.com>
Signed-off-by: Tahsin Erdogan <tahsin@google.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 fs/fs-writeback.c | 35 +++++++++++++++++++++--------------
 1 file changed, 21 insertions(+), 14 deletions(-)

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index ef600591d96f9a..63ee2940775ce9 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -173,19 +173,33 @@ static void wb_wakeup(struct bdi_writeback *wb)
 	spin_unlock_bh(&wb->work_lock);
 }
 
+static void finish_writeback_work(struct bdi_writeback *wb,
+				  struct wb_writeback_work *work)
+{
+	struct wb_completion *done = work->done;
+
+	if (work->auto_free)
+		kfree(work);
+	if (done && atomic_dec_and_test(&done->cnt))
+		wake_up_all(&wb->bdi->wb_waitq);
+}
+
 static void wb_queue_work(struct bdi_writeback *wb,
 			  struct wb_writeback_work *work)
 {
 	trace_writeback_queue(wb, work);
 
-	spin_lock_bh(&wb->work_lock);
-	if (!test_bit(WB_registered, &wb->state))
-		goto out_unlock;
 	if (work->done)
 		atomic_inc(&work->done->cnt);
-	list_add_tail(&work->list, &wb->work_list);
-	mod_delayed_work(bdi_wq, &wb->dwork, 0);
-out_unlock:
+
+	spin_lock_bh(&wb->work_lock);
+
+	if (test_bit(WB_registered, &wb->state)) {
+		list_add_tail(&work->list, &wb->work_list);
+		mod_delayed_work(bdi_wq, &wb->dwork, 0);
+	} else
+		finish_writeback_work(wb, work);
+
 	spin_unlock_bh(&wb->work_lock);
 }
 
@@ -1873,16 +1887,9 @@ static long wb_do_writeback(struct bdi_writeback *wb)
 
 	set_bit(WB_writeback_running, &wb->state);
 	while ((work = get_next_work_item(wb)) != NULL) {
-		struct wb_completion *done = work->done;
-
 		trace_writeback_exec(wb, work);
-
 		wrote += wb_writeback(wb, work);
-
-		if (work->auto_free)
-			kfree(work);
-		if (done && atomic_dec_and_test(&done->cnt))
-			wake_up_all(&wb->bdi->wb_waitq);
+		finish_writeback_work(wb, work);
 	}
 
 	/*

From 6f1f622019f95d79d6e2f8bb3781144ad0aff75f Mon Sep 17 00:00:00 2001
From: Kinglong Mee <kinglongmee@gmail.com>
Date: Mon, 6 Mar 2017 17:49:42 +0800
Subject: [PATCH 1196/1911] nfs4: fix a typo of NFS_ATTR_FATTR_GROUP_NAME

This typo cause a memory leak, and a bad client's group id.
unreferenced object 0xffff96d8073998d0 (size 8):
  comm "kworker/0:3", pid 34224, jiffies 4295361338 (age 761.752s)
  hex dump (first 8 bytes):
    30 00 39 07 d8 96 ff ff                          0.9.....
  backtrace:
    [<ffffffffb883212a>] kmemleak_alloc+0x4a/0xa0
    [<ffffffffb8237bc0>] __kmalloc+0x140/0x220
    [<ffffffffc05c921c>] xdr_stream_decode_string_dup+0x7c/0x110 [sunrpc]
    [<ffffffffc08edcf0>] decode_getfattr_attrs+0x940/0x1630 [nfsv4]
    [<ffffffffc08eea7b>] decode_getfattr_generic.constprop.108+0x9b/0x100 [nfsv4]
    [<ffffffffc08eebaf>] nfs4_xdr_dec_open+0xcf/0x100 [nfsv4]
    [<ffffffffc05bf9c7>] rpcauth_unwrap_resp+0xa7/0xe0 [sunrpc]
    [<ffffffffc05afc70>] call_decode+0x1e0/0x810 [sunrpc]
    [<ffffffffc05bc64d>] __rpc_execute+0x8d/0x420 [sunrpc]
    [<ffffffffc05bc9f2>] rpc_async_schedule+0x12/0x20 [sunrpc]
    [<ffffffffb80bb077>] process_one_work+0x197/0x430
    [<ffffffffb80bb35e>] worker_thread+0x4e/0x4a0
    [<ffffffffb80c1d41>] kthread+0x101/0x140
    [<ffffffffb8839a5c>] ret_from_fork+0x2c/0x40
    [<ffffffffffffffff>] 0xffffffffffffffff

Fixes: 686a816ab6 ("NFSv4: Clean up owner/group attribute decode")
Signed-off-by: Kinglong Mee <kinglongmee@gmail.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfs4xdr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index f0369e36275341..80ce289eea0532 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -3942,7 +3942,7 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap,
 		if (len <= 0)
 			goto out;
 		dprintk("%s: name=%s\n", __func__, group_name->data);
-		return NFS_ATTR_FATTR_OWNER_NAME;
+		return NFS_ATTR_FATTR_GROUP_NAME;
 	} else {
 		len = xdr_stream_decode_opaque_inline(xdr, (void **)&p,
 				XDR_MAX_NETOBJ);

From 366a1569bff3fe14abfdf9285e31e05e091745f5 Mon Sep 17 00:00:00 2001
From: Kinglong Mee <kinglongmee@gmail.com>
Date: Mon, 6 Mar 2017 22:29:14 +0800
Subject: [PATCH 1197/1911] NFSv4: fix a reference leak caused WARNING messages

Because nfs4_opendata_access() has close the state when access is denied,
so the state isn't leak.
Rather than revert the commit a974deee47, I'd like clean the strange state close.

[ 1615.094218] ------------[ cut here ]------------
[ 1615.094607] WARNING: CPU: 0 PID: 23702 at lib/list_debug.c:31 __list_add_valid+0x8e/0xa0
[ 1615.094913] list_add double add: new=ffff9d7901d9f608, prev=ffff9d7901d9f608, next=ffff9d7901ee8dd0.
[ 1615.095458] Modules linked in: nfsv4(E) nfs(E) nfsd(E) tun bridge stp llc fuse ip_set nfnetlink vmw_vsock_vmci_transport vsock f2fs snd_seq_midi snd_seq_midi_event fscrypto coretemp ppdev crct10dif_pclmul crc32_pclmul ghash_clmulni_intel intel_rapl_perf vmw_balloon snd_ens1371 joydev gameport snd_ac97_codec ac97_bus snd_seq snd_pcm snd_rawmidi snd_timer snd_seq_device snd soundcore nfit parport_pc parport acpi_cpufreq tpm_tis tpm_tis_core tpm i2c_piix4 vmw_vmci shpchp auth_rpcgss nfs_acl lockd(E) grace sunrpc(E) xfs libcrc32c vmwgfx drm_kms_helper ttm drm crc32c_intel mptspi e1000 serio_raw scsi_transport_spi mptscsih mptbase ata_generic pata_acpi fjes [last unloaded: nfs]
[ 1615.097663] CPU: 0 PID: 23702 Comm: fstest Tainted: G        W   E   4.11.0-rc1+ #517
[ 1615.098015] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 07/02/2015
[ 1615.098807] Call Trace:
[ 1615.099183]  dump_stack+0x63/0x86
[ 1615.099578]  __warn+0xcb/0xf0
[ 1615.099967]  warn_slowpath_fmt+0x5f/0x80
[ 1615.100370]  __list_add_valid+0x8e/0xa0
[ 1615.100760]  nfs4_put_state_owner+0x75/0xc0 [nfsv4]
[ 1615.101136]  __nfs4_close+0x109/0x140 [nfsv4]
[ 1615.101524]  nfs4_close_state+0x15/0x20 [nfsv4]
[ 1615.101949]  nfs4_close_context+0x21/0x30 [nfsv4]
[ 1615.102691]  __put_nfs_open_context+0xb8/0x110 [nfs]
[ 1615.103155]  put_nfs_open_context+0x10/0x20 [nfs]
[ 1615.103586]  nfs4_file_open+0x13b/0x260 [nfsv4]
[ 1615.103978]  do_dentry_open+0x20a/0x2f0
[ 1615.104369]  ? nfs4_copy_file_range+0x30/0x30 [nfsv4]
[ 1615.104739]  vfs_open+0x4c/0x70
[ 1615.105106]  ? may_open+0x5a/0x100
[ 1615.105469]  path_openat+0x623/0x1420
[ 1615.105823]  do_filp_open+0x91/0x100
[ 1615.106174]  ? __alloc_fd+0x3f/0x170
[ 1615.106568]  do_sys_open+0x130/0x220
[ 1615.106920]  ? __put_cred+0x3d/0x50
[ 1615.107256]  SyS_open+0x1e/0x20
[ 1615.107588]  entry_SYSCALL_64_fastpath+0x1a/0xa9
[ 1615.107922] RIP: 0033:0x7fab599069b0
[ 1615.108247] RSP: 002b:00007ffcf0600d78 EFLAGS: 00000246 ORIG_RAX: 0000000000000002
[ 1615.108575] RAX: ffffffffffffffda RBX: 00007fab59bcfae0 RCX: 00007fab599069b0
[ 1615.108896] RDX: 0000000000000200 RSI: 0000000000000200 RDI: 00007ffcf060255e
[ 1615.109211] RBP: 0000000000040010 R08: 0000000000000000 R09: 0000000000000016
[ 1615.109515] R10: 00000000000006a1 R11: 0000000000000246 R12: 0000000000041000
[ 1615.109806] R13: 0000000000040010 R14: 0000000000001000 R15: 0000000000002710
[ 1615.110152] ---[ end trace 96ed63b1306bf2f3 ]---

Fixes: a974deee47 ("NFSv4: Fix memory and state leak in...")
Signed-off-by: Kinglong Mee <kinglongmee@gmail.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfs4proc.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 1b183686c6d4f0..c1f5369cd33988 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2258,8 +2258,6 @@ static int nfs4_opendata_access(struct rpc_cred *cred,
 	if ((mask & ~cache.mask & (MAY_READ | MAY_EXEC)) == 0)
 		return 0;
 
-	/* even though OPEN succeeded, access is denied. Close the file */
-	nfs4_close_state(state, fmode);
 	return -EACCES;
 }
 

From aac66bf5f916f645bd57029490a72c3f91f2c274 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Mon, 6 Mar 2017 23:54:01 +0000
Subject: [PATCH 1198/1911] drm/i915: use correct node for handling cache
 domain eviction

It looks like we were incorrectly comparing vma->node against itself
instead of the target node, when evicting for a node on systems where we
need guard pages between regions with different cache domains. As a
consequence we can end up trying to needlessly evict neighbouring nodes,
even if they have the same cache domain, and if they were pinned we
would fail the eviction.

Fixes: 625d988acc28 ("drm/i915: Extract reserving space in the GTT to a helper")
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: http://patchwork.freedesktop.org/patch/msgid/20170306235414.23407-3-matthew.auld@intel.com
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
(cherry picked from commit fe65cbdbc97929e4a522716ed279a36783656142)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_evict.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index c181b1bb3d2c9e..3be2503aa042c0 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -293,12 +293,12 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
 		 * those as well to make room for our guard pages.
 		 */
 		if (check_color) {
-			if (vma->node.start + vma->node.size == node->start) {
-				if (vma->node.color == node->color)
+			if (node->start + node->size == target->start) {
+				if (node->color == target->color)
 					continue;
 			}
-			if (vma->node.start == node->start + node->size) {
-				if (vma->node.color == node->color)
+			if (node->start == target->start + target->size) {
+				if (node->color == target->color)
 					continue;
 			}
 		}

From 3a0d137de035cc8c70194d9988ded61825b5ff8a Mon Sep 17 00:00:00 2001
From: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Date: Wed, 8 Mar 2017 13:00:07 +0100
Subject: [PATCH 1199/1911] drm/i915: Nuke skl_update_plane debug message from
 the pipe update critical section
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

printks are slow so we should not be doing them from the vblank evade
critical section. These could explain why we sometimes seem to
blow past our 100 usec deadline.

The problem has been there ever since commit c331879ce8ea ("drm/i915:
skylake sprite plane scaling using shared scalers.") but it may not have
been readily visible until commit e1edbd44e23b ("drm/i915: Complain
if we take too long under vblank evasion.") increased our chances
of noticing it.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1488974407-25175-1-git-send-email-maarten.lankhorst@linux.intel.com
Fixes: c331879ce8ea ("drm/i915: skylake sprite plane scaling using shared scalers")
Cc: <stable@vger.kernel.org> # v4.2+
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
[mlankhorst: Add missing tags, point to the correct offending commit]
(cherry picked from commit d38146b9ee16264ff9a88bf3391ab9f2f5af3646)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/intel_sprite.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
index 9ef54688872a86..9481ca9a3ae7e0 100644
--- a/drivers/gpu/drm/i915/intel_sprite.c
+++ b/drivers/gpu/drm/i915/intel_sprite.c
@@ -254,9 +254,6 @@ skl_update_plane(struct drm_plane *drm_plane,
 		int scaler_id = plane_state->scaler_id;
 		const struct intel_scaler *scaler;
 
-		DRM_DEBUG_KMS("plane = %d PS_PLANE_SEL(plane) = 0x%x\n",
-			      plane_id, PS_PLANE_SEL(plane_id));
-
 		scaler = &crtc_state->scaler_state.scalers[scaler_id];
 
 		I915_WRITE(SKL_PS_CTRL(pipe, scaler_id),

From 6aef660370a9c246956ba6d01eebd8063c4214cb Mon Sep 17 00:00:00 2001
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Date: Fri, 10 Mar 2017 09:32:49 +0000
Subject: [PATCH 1200/1911] drm/i915: Fix forcewake active domain tracking

In commit 003342a50021 ("drm/i915: Keep track of active
forcewake domains in a bitmask") I forgot to adjust the
newly introduce fw_domains_active state across reset.

This caused the assert_forcewakes_inactive to trigger
during suspend and resume if there were user held
forcewakes.

v2: Bitmask checks are required since vfuncs are not
    always present.

v3: Move bitmask tracking to get/put vfunc for simplicity.
    (Chris Wilson)

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Fixes: 003342a50021 ("drm/i915: Keep track of active forcewake domains in a bitmask")
Testcase: igt/drv_suspend/forcewake
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: "Paneri, Praveen" <praveen.paneri@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniel Vetter <daniel.vetter@intel.com>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: intel-gfx@lists.freedesktop.org
Cc: v4.10+ <stable@vger.kernel.org>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: http://patchwork.freedesktop.org/patch/msgid/20170310093249.4484-1-tvrtko.ursulin@linux.intel.com
(cherry picked from commit b8473050805f35add97f3ff57570d55a01808df5)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/intel_uncore.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index abe08885a5ba4e..b7ff592b14f5e0 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -119,6 +119,8 @@ fw_domains_get(struct drm_i915_private *dev_priv, enum forcewake_domains fw_doma
 
 	for_each_fw_domain_masked(d, fw_domains, dev_priv)
 		fw_domain_wait_ack(d);
+
+	dev_priv->uncore.fw_domains_active |= fw_domains;
 }
 
 static void
@@ -130,6 +132,8 @@ fw_domains_put(struct drm_i915_private *dev_priv, enum forcewake_domains fw_doma
 		fw_domain_put(d);
 		fw_domain_posting_read(d);
 	}
+
+	dev_priv->uncore.fw_domains_active &= ~fw_domains;
 }
 
 static void
@@ -240,10 +244,8 @@ intel_uncore_fw_release_timer(struct hrtimer *timer)
 	if (WARN_ON(domain->wake_count == 0))
 		domain->wake_count++;
 
-	if (--domain->wake_count == 0) {
+	if (--domain->wake_count == 0)
 		dev_priv->uncore.funcs.force_wake_put(dev_priv, domain->mask);
-		dev_priv->uncore.fw_domains_active &= ~domain->mask;
-	}
 
 	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
 
@@ -454,10 +456,8 @@ static void __intel_uncore_forcewake_get(struct drm_i915_private *dev_priv,
 			fw_domains &= ~domain->mask;
 	}
 
-	if (fw_domains) {
+	if (fw_domains)
 		dev_priv->uncore.funcs.force_wake_get(dev_priv, fw_domains);
-		dev_priv->uncore.fw_domains_active |= fw_domains;
-	}
 }
 
 /**
@@ -968,7 +968,6 @@ static noinline void ___force_wake_auto(struct drm_i915_private *dev_priv,
 		fw_domain_arm_timer(domain);
 
 	dev_priv->uncore.funcs.force_wake_get(dev_priv, fw_domains);
-	dev_priv->uncore.fw_domains_active |= fw_domains;
 }
 
 static inline void __force_wake_auto(struct drm_i915_private *dev_priv,

From ce70df089143c49385b4f32f39d41fb50fbf6a7c Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Mon, 13 Mar 2017 08:22:13 +0300
Subject: [PATCH 1201/1911] mm, gup: fix typo in gup_p4d_range()

gup_p4d_range() should call gup_pud_range(), not itself.

[ This was not noticed on x86: this is the HAVE_GENERIC_RCU_GUP code
  used by arm[64] and powerpc    - Linus ]

Fixes: c2febafc6773 ("mm: convert generic code to 5-level paging")
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Reported-by: Chris Packham <chris.packham@alliedtelesis.co.nz>
Reported-by: Anton Blanchard <anton@samba.org>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/gup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/gup.c b/mm/gup.c
index c74bad1bf6e8f2..04aa405350dce8 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1455,7 +1455,7 @@ static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end,
 			if (!gup_huge_pd(__hugepd(p4d_val(p4d)), addr,
 					 P4D_SHIFT, next, write, pages, nr))
 				return 0;
-		} else if (!gup_p4d_range(p4d, addr, next, write, pages, nr))
+		} else if (!gup_pud_range(p4d, addr, next, write, pages, nr))
 			return 0;
 	} while (p4dp++, addr = next, addr != end);
 

From 04166f48d9593af4513ae06c0f966c0cee300a20 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 13 Mar 2017 13:24:03 +0100
Subject: [PATCH 1202/1911] Revert "netfilter: nf_tables: add flush field to
 struct nft_set_iter"

This reverts commit 1f48ff6c5393aa7fe290faf5d633164f105b0aa7.

This patch is not required anymore now that we keep a dummy list of
set elements in the bitmap set implementation, so revert this before
we forget this code has no clients.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h | 1 -
 net/netfilter/nf_tables_api.c     | 4 ----
 2 files changed, 5 deletions(-)

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 70c5ca0c60b187..0136028652bdb8 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -232,7 +232,6 @@ struct nft_set_elem {
 struct nft_set;
 struct nft_set_iter {
 	u8		genmask;
-	bool		flush;
 	unsigned int	count;
 	unsigned int	skip;
 	int		err;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 5e0ccfd5bb37d1..434c739dfecaa8 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -3145,7 +3145,6 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
 		iter.count	= 0;
 		iter.err	= 0;
 		iter.fn		= nf_tables_bind_check_setelem;
-		iter.flush	= false;
 
 		set->ops->walk(ctx, set, &iter);
 		if (iter.err < 0)
@@ -3399,7 +3398,6 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
 	args.iter.count		= 0;
 	args.iter.err		= 0;
 	args.iter.fn		= nf_tables_dump_setelem;
-	args.iter.flush		= false;
 	set->ops->walk(&ctx, set, &args.iter);
 
 	nla_nest_end(skb, nest);
@@ -3963,7 +3961,6 @@ static int nf_tables_delsetelem(struct net *net, struct sock *nlsk,
 		struct nft_set_iter iter = {
 			.genmask	= genmask,
 			.fn		= nft_flush_set,
-			.flush		= true,
 		};
 		set->ops->walk(&ctx, set, &iter);
 
@@ -5114,7 +5111,6 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx,
 			iter.count	= 0;
 			iter.err	= 0;
 			iter.fn		= nf_tables_loop_check_setelem;
-			iter.flush	= false;
 
 			set->ops->walk(ctx, set, &iter);
 			if (iter.err < 0)

From c5f7c5a9a0f84c511a8a336491f9b8a3060b6517 Mon Sep 17 00:00:00 2001
From: Elena Reshetova <elena.reshetova@intel.com>
Date: Mon, 6 Mar 2017 16:21:16 +0200
Subject: [PATCH 1203/1911] drivers, xen: convert grant_map.users from atomic_t
 to refcount_t

refcount_t type and corresponding API should be
used instead of atomic_t when the variable is used as
a reference counter. This allows to avoid accidental
refcounter overflows that might lead to use-after-free
situations.

Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
Signed-off-by: Hans Liljestrand <ishkamiel@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David Windsor <dwindsor@gmail.com>
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
 drivers/xen/gntdev.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index c77a0751a31173..f3bf8f4e2d6cef 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -36,6 +36,7 @@
 #include <linux/spinlock.h>
 #include <linux/slab.h>
 #include <linux/highmem.h>
+#include <linux/refcount.h>
 
 #include <xen/xen.h>
 #include <xen/grant_table.h>
@@ -86,7 +87,7 @@ struct grant_map {
 	int index;
 	int count;
 	int flags;
-	atomic_t users;
+	refcount_t users;
 	struct unmap_notify notify;
 	struct ioctl_gntdev_grant_ref *grants;
 	struct gnttab_map_grant_ref   *map_ops;
@@ -166,7 +167,7 @@ static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count)
 
 	add->index = 0;
 	add->count = count;
-	atomic_set(&add->users, 1);
+	refcount_set(&add->users, 1);
 
 	return add;
 
@@ -212,7 +213,7 @@ static void gntdev_put_map(struct gntdev_priv *priv, struct grant_map *map)
 	if (!map)
 		return;
 
-	if (!atomic_dec_and_test(&map->users))
+	if (!refcount_dec_and_test(&map->users))
 		return;
 
 	atomic_sub(map->count, &pages_mapped);
@@ -400,7 +401,7 @@ static void gntdev_vma_open(struct vm_area_struct *vma)
 	struct grant_map *map = vma->vm_private_data;
 
 	pr_debug("gntdev_vma_open %p\n", vma);
-	atomic_inc(&map->users);
+	refcount_inc(&map->users);
 }
 
 static void gntdev_vma_close(struct vm_area_struct *vma)
@@ -1004,7 +1005,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
 		goto unlock_out;
 	}
 
-	atomic_inc(&map->users);
+	refcount_inc(&map->users);
 
 	vma->vm_ops = &gntdev_vmops;
 

From 44fee88cea43d3c2cac962e0439cb10a3cabff6d Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 13 Mar 2017 15:57:12 +0100
Subject: [PATCH 1204/1911] x86/tsc: Fix ART for TSC_KNOWN_FREQ

Subhransu reported that convert_art_to_tsc() isn't working for him.

The ART to TSC relation is only set up for systems which use the refined
TSC calibration. Systems with known TSC frequency (available via CPUID 15)
are not using the refined calibration and therefor the ART to TSC relation
is never established.

Add the setup to the known frequency init path which skips ART
calibration. The init code needs to be duplicated as for systems which use
refined calibration the ART setup must be delayed until calibration has
been done.

The problem has been there since the ART support was introdduced, but only
detected now because Subhransu tested the first time on hardware which has
TSC frequency enumerated via CPUID 15.

Note for stable: The conditional has changed from TSC_RELIABLE to
     	 	 TSC_KNOWN_FREQUENCY.

[ tglx: Rewrote changelog and identified the proper 'Fixes' commit ]

Fixes: f9677e0f8308 ("x86/tsc: Always Running Timer (ART) correlated clocksource")
Reported-by: "Prusty, Subhransu S" <subhransu.s.prusty@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: stable@vger.kernel.org
Cc: christopher.s.hall@intel.com
Cc: kevin.b.stanton@intel.com
Cc: john.stultz@linaro.org
Cc: akataria@vmware.com
Link: http://lkml.kernel.org/r/20170313145712.GI3312@twins.programming.kicks-ass.net
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/tsc.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 4f7a9833d8e51f..c73a7f9e881aa2 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -1333,6 +1333,8 @@ static int __init init_tsc_clocksource(void)
 	 * the refined calibration and directly register it as a clocksource.
 	 */
 	if (boot_cpu_has(X86_FEATURE_TSC_KNOWN_FREQ)) {
+		if (boot_cpu_has(X86_FEATURE_ART))
+			art_related_clocksource = &clocksource_tsc;
 		clocksource_register_khz(&clocksource_tsc, tsc_khz);
 		return 0;
 	}

From 67e194007be08d071294456274dd53e0a04fdf90 Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Mon, 13 Mar 2017 13:28:09 +0100
Subject: [PATCH 1205/1911] ipv6: make ECMP route replacement less greedy

Commit 27596472473a ("ipv6: fix ECMP route replacement") introduced a
loop that removes all siblings of an ECMP route that is being
replaced. However, this loop doesn't stop when it has replaced
siblings, and keeps removing other routes with a higher metric.
We also end up triggering the WARN_ON after the loop, because after
this nsiblings < 0.

Instead, stop the loop when we have taken care of all routes with the
same metric as the route being replaced.

  Reproducer:
  ===========
    #!/bin/sh

    ip netns add ns1
    ip netns add ns2
    ip -net ns1 link set lo up

    for x in 0 1 2 ; do
        ip link add veth$x netns ns2 type veth peer name eth$x netns ns1
        ip -net ns1 link set eth$x up
        ip -net ns2 link set veth$x up
    done

    ip -net ns1 -6 r a 2000::/64 nexthop via fe80::0 dev eth0 \
            nexthop via fe80::1 dev eth1 nexthop via fe80::2 dev eth2
    ip -net ns1 -6 r a 2000::/64 via fe80::42 dev eth0 metric 256
    ip -net ns1 -6 r a 2000::/64 via fe80::43 dev eth0 metric 2048

    echo "before replace, 3 routes"
    ip -net ns1 -6 r | grep -v '^fe80\|^ff00'
    echo

    ip -net ns1 -6 r c 2000::/64 nexthop via fe80::4 dev eth0 \
            nexthop via fe80::5 dev eth1 nexthop via fe80::6 dev eth2

    echo "after replace, only 2 routes, metric 2048 is gone"
    ip -net ns1 -6 r | grep -v '^fe80\|^ff00'

Fixes: 27596472473a ("ipv6: fix ECMP route replacement")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Reviewed-by: Xin Long <lucien.xin@gmail.com>
Reviewed-by: Michal Kubecek <mkubecek@suse.cz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_fib.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index e4266746e4a2af..d4bf2c68a545b4 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -923,6 +923,8 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 			ins = &rt->dst.rt6_next;
 			iter = *ins;
 			while (iter) {
+				if (iter->rt6i_metric > rt->rt6i_metric)
+					break;
 				if (rt6_qualify_for_ecmp(iter)) {
 					*ins = iter->dst.rt6_next;
 					fib6_purge_rt(iter, fn, info->nl_net);

From 68c32f9c2a36d410aa242e661506e5b2c2764179 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Mon, 13 Mar 2017 13:39:01 +0100
Subject: [PATCH 1206/1911] isdn/gigaset: fix NULL-deref at probe

Make sure to check the number of endpoints to avoid dereferencing a
NULL-pointer should a malicious device lack endpoints.

Fixes: cf7776dc05b8 ("[PATCH] isdn4linux: Siemens Gigaset drivers -
direct USB connection")
Cc: stable <stable@vger.kernel.org>	# 2.6.17
Cc: Hansjoerg Lipp <hjlipp@web.de>

Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/gigaset/bas-gigaset.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/isdn/gigaset/bas-gigaset.c b/drivers/isdn/gigaset/bas-gigaset.c
index 11e13c56126fba..2da3ff650e1d55 100644
--- a/drivers/isdn/gigaset/bas-gigaset.c
+++ b/drivers/isdn/gigaset/bas-gigaset.c
@@ -2317,6 +2317,9 @@ static int gigaset_probe(struct usb_interface *interface,
 		return -ENODEV;
 	}
 
+	if (hostif->desc.bNumEndpoints < 1)
+		return -ENODEV;
+
 	dev_info(&udev->dev,
 		 "%s: Device matched (Vendor: 0x%x, Product: 0x%x)\n",
 		 __func__, le16_to_cpu(udev->descriptor.idVendor),

From 6e526fdff7be4f13b24f929a04c0e9ae6761291e Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Mon, 13 Mar 2017 13:42:03 +0100
Subject: [PATCH 1207/1911] net: wimax/i2400m: fix NULL-deref at probe

Make sure to check the number of endpoints to avoid dereferencing a
NULL-pointer or accessing memory beyond the endpoint array should a
malicious device lack the expected endpoints.

The endpoints are specifically dereferenced in the i2400m_bootrom_init
path during probe (e.g. in i2400mu_tx_bulk_out).

Fixes: f398e4240fce ("i2400m/USB: probe/disconnect, dev init/shutdown
and reset backends")
Cc: Inaky Perez-Gonzalez <inaky@linux.intel.com>

Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wimax/i2400m/usb.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/wimax/i2400m/usb.c b/drivers/net/wimax/i2400m/usb.c
index e7f5910a65191f..f8eb66ef2944ea 100644
--- a/drivers/net/wimax/i2400m/usb.c
+++ b/drivers/net/wimax/i2400m/usb.c
@@ -467,6 +467,9 @@ int i2400mu_probe(struct usb_interface *iface,
 	struct i2400mu *i2400mu;
 	struct usb_device *usb_dev = interface_to_usbdev(iface);
 
+	if (iface->cur_altsetting->desc.bNumEndpoints < 4)
+		return -ENODEV;
+
 	if (usb_dev->speed != USB_SPEED_HIGH)
 		dev_err(dev, "device not connected as high speed\n");
 

From 0d443b70cc92d741cbc1dcbf1079897b3d8bc3cc Mon Sep 17 00:00:00 2001
From: Mike Travis <mike.travis@hpe.com>
Date: Tue, 7 Mar 2017 15:08:42 -0600
Subject: [PATCH 1208/1911] x86/platform: Remove warning message for duplicate
 NMI handlers

Remove the WARNING message associated with multiple NMI handlers as
there are at least two that are legitimate.  These are the KGDB and the
UV handlers and both want to be called if the NMI has not been claimed
by any other NMI handler.

Use of the UNKNOWN NMI call chain dramatically lowers the NMI call rate
when high frequency NMI tools are in use, notably the perf tools.  It is
required on systems that cannot sustain a high NMI call rate without
adversely affecting the system operation.

Signed-off-by: Mike Travis <mike.travis@hpe.com>
Reviewed-by: Dimitri Sivanich <dimitri.sivanich@hpe.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Russ Anderson <russ.anderson@hpe.com>
Cc: Frank Ramsay <frank.ramsay@hpe.com>
Cc: Tony Ernst <tony.ernst@hpe.com>
Link: http://lkml.kernel.org/r/20170307210841.730959611@asylum.americas.sgi.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/nmi.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index f088ea4c66e72e..a723ae9440ab25 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -166,11 +166,9 @@ int __register_nmi_handler(unsigned int type, struct nmiaction *action)
 	spin_lock_irqsave(&desc->lock, flags);
 
 	/*
-	 * most handlers of type NMI_UNKNOWN never return because
-	 * they just assume the NMI is theirs.  Just a sanity check
-	 * to manage expectations
+	 * Indicate if there are multiple registrations on the
+	 * internal NMI handler call chains (SERR and IO_CHECK).
 	 */
-	WARN_ON_ONCE(type == NMI_UNKNOWN && !list_empty(&desc->head));
 	WARN_ON_ONCE(type == NMI_SERR && !list_empty(&desc->head));
 	WARN_ON_ONCE(type == NMI_IO_CHECK && !list_empty(&desc->head));
 

From 79e49503efe53a8c51d8b695bedc8a346c5e4a87 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 13 Mar 2017 16:24:28 +0100
Subject: [PATCH 1209/1911] ipv6: avoid write to a possibly cloned skb

ip6_fragment, in case skb has a fraglist, checks if the
skb is cloned.  If it is, it will move to the 'slow path' and allocates
new skbs for each fragment.

However, right before entering the slowpath loop, it updates the
nexthdr value of the last ipv6 extension header to NEXTHDR_FRAGMENT,
to account for the fragment header that will be inserted in the new
ipv6-fragment skbs.

In case original skb is cloned this munges nexthdr value of another
skb.  Avoid this by doing the nexthdr update for each of the new fragment
skbs separately.

This was observed with tcpdump on a bridge device where netfilter ipv6
reassembly is active:  tcpdump shows malformed fragment headers as
the l4 header (icmpv6, tcp, etc). is decoded as a fragment header.

Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
Reported-by: Andreas Karis <akaris@redhat.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_output.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index df42096e1f0453..58f6288e9ba53e 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -768,13 +768,14 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
 	 *	Fragment the datagram.
 	 */
 
-	*prevhdr = NEXTHDR_FRAGMENT;
 	troom = rt->dst.dev->needed_tailroom;
 
 	/*
 	 *	Keep copying data until we run out.
 	 */
 	while (left > 0)	{
+		u8 *fragnexthdr_offset;
+
 		len = left;
 		/* IF: it doesn't fit, use 'mtu' - the data space left */
 		if (len > mtu)
@@ -819,6 +820,10 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
 		 */
 		skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
 
+		fragnexthdr_offset = skb_network_header(frag);
+		fragnexthdr_offset += prevhdr - skb_network_header(skb);
+		*fragnexthdr_offset = NEXTHDR_FRAGMENT;
+
 		/*
 		 *	Build fragment header.
 		 */

From a13b2082ece95247779b9995c4e91b4246bed023 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 13 Mar 2017 17:38:17 +0100
Subject: [PATCH 1210/1911] bridge: drop netfilter fake rtable unconditionally

Andreas reports kernel oops during rmmod of the br_netfilter module.
Hannes debugged the oops down to a NULL rt6info->rt6i_indev.

Problem is that br_netfilter has the nasty concept of adding a fake
rtable to skb->dst; this happens in a br_netfilter prerouting hook.

A second hook (in bridge LOCAL_IN) is supposed to remove these again
before the skb is handed up the stack.

However, on module unload hooks get unregistered which means an
skb could traverse the prerouting hook that attaches the fake_rtable,
while the 'fake rtable remove' hook gets removed from the hooklist
immediately after.

Fixes: 34666d467cbf1e2e3c7 ("netfilter: bridge: move br_netfilter out of the core")
Reported-by: Andreas Karis <akaris@redhat.com>
Debugged-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: Florian Westphal <fw@strlen.de>
Acked-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_input.c           |  1 +
 net/bridge/br_netfilter_hooks.c | 21 ---------------------
 2 files changed, 1 insertion(+), 21 deletions(-)

diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 236f34244dbe1f..013f2290bfa56d 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -30,6 +30,7 @@ EXPORT_SYMBOL(br_should_route_hook);
 static int
 br_netif_receive_skb(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
+	br_drop_fake_rtable(skb);
 	return netif_receive_skb(skb);
 }
 
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 95087e6e825836..fa87fbd62bb788 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -521,21 +521,6 @@ static unsigned int br_nf_pre_routing(void *priv,
 }
 
 
-/* PF_BRIDGE/LOCAL_IN ************************************************/
-/* The packet is locally destined, which requires a real
- * dst_entry, so detach the fake one.  On the way up, the
- * packet would pass through PRE_ROUTING again (which already
- * took place when the packet entered the bridge), but we
- * register an IPv4 PRE_ROUTING 'sabotage' hook that will
- * prevent this from happening. */
-static unsigned int br_nf_local_in(void *priv,
-				   struct sk_buff *skb,
-				   const struct nf_hook_state *state)
-{
-	br_drop_fake_rtable(skb);
-	return NF_ACCEPT;
-}
-
 /* PF_BRIDGE/FORWARD *************************************************/
 static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
@@ -907,12 +892,6 @@ static struct nf_hook_ops br_nf_ops[] __read_mostly = {
 		.hooknum = NF_BR_PRE_ROUTING,
 		.priority = NF_BR_PRI_BRNF,
 	},
-	{
-		.hook = br_nf_local_in,
-		.pf = NFPROTO_BRIDGE,
-		.hooknum = NF_BR_LOCAL_IN,
-		.priority = NF_BR_PRI_BRNF,
-	},
 	{
 		.hook = br_nf_forward_ip,
 		.pf = NFPROTO_BRIDGE,

From c42f8218610aa09d7d3795e5810387673c1f84b6 Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Thu, 9 Mar 2017 17:20:04 +0100
Subject: [PATCH 1211/1911] iio: sw-device: Fix config group initialization

Use the IS_ENABLED() helper macro to ensure that the configfs group is
initialized either when configfs is built-in or when configfs is built as a
module. Otherwise software device creation will result in undefined
behaviour when configfs is built as a module since the configfs group for
the device not properly initialized.

Similar to commit b2f0c09664b7 ("iio: sw-trigger: Fix config group
initialization").

Fixes: 0f3a8c3f34f7 ("iio: Add support for creating IIO devices via configfs")
Reported-by: Miguel Robles <miguel.robles@farole.net>
Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Acked-by: Daniel Baluta <daniel.baluta@gmail.com>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 include/linux/iio/sw_device.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/iio/sw_device.h b/include/linux/iio/sw_device.h
index 23ca4151552796..fa793193306798 100644
--- a/include/linux/iio/sw_device.h
+++ b/include/linux/iio/sw_device.h
@@ -62,7 +62,7 @@ void iio_swd_group_init_type_name(struct iio_sw_device *d,
 				  const char *name,
 				  struct config_item_type *type)
 {
-#ifdef CONFIG_CONFIGFS_FS
+#if IS_ENABLED(CONFIG_CONFIGFS_FS)
 	config_group_init_type_name(&d->group, name, type);
 #endif
 }

From c836e5cf0d0880d6668966476c4ffe727f29f69a Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 8 Mar 2017 13:24:21 +0200
Subject: [PATCH 1212/1911] x86/platform/intel-mid: Use common power off
 sequence

Intel Medfield may use common for Intel MID devices power sequence.
Remove unneded custom power off stub.

While here, remove function forward declaration.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20170308112422.67533-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/platform/intel-mid/mfld.c | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/arch/x86/platform/intel-mid/mfld.c b/arch/x86/platform/intel-mid/mfld.c
index e793fe509971f4..e42978d4deafeb 100644
--- a/arch/x86/platform/intel-mid/mfld.c
+++ b/arch/x86/platform/intel-mid/mfld.c
@@ -17,16 +17,6 @@
 
 #include "intel_mid_weak_decls.h"
 
-static void penwell_arch_setup(void);
-/* penwell arch ops */
-static struct intel_mid_ops penwell_ops = {
-	.arch_setup = penwell_arch_setup,
-};
-
-static void mfld_power_off(void)
-{
-}
-
 static unsigned long __init mfld_calibrate_tsc(void)
 {
 	unsigned long fast_calibrate;
@@ -63,9 +53,12 @@ static unsigned long __init mfld_calibrate_tsc(void)
 static void __init penwell_arch_setup(void)
 {
 	x86_platform.calibrate_tsc = mfld_calibrate_tsc;
-	pm_power_off = mfld_power_off;
 }
 
+static struct intel_mid_ops penwell_ops = {
+	.arch_setup = penwell_arch_setup,
+};
+
 void *get_penwell_ops(void)
 {
 	return &penwell_ops;

From 859bb6d59066b96341020e0991f191631cabe59d Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 8 Mar 2017 13:24:22 +0200
Subject: [PATCH 1213/1911] x86/platform/intel-mid: Add power button support
 for Merrifield

Intel Merrifield platform has a Basin Cove PMIC to handle in particular
power button events. Add necessary bits to enable it.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20170308112422.67533-2-andriy.shevchenko@linux.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 .../platform/intel-mid/device_libs/Makefile   |  1 +
 .../device_libs/platform_mrfld_power_btn.c    | 82 +++++++++++++++++++
 2 files changed, 83 insertions(+)
 create mode 100644 arch/x86/platform/intel-mid/device_libs/platform_mrfld_power_btn.c

diff --git a/arch/x86/platform/intel-mid/device_libs/Makefile b/arch/x86/platform/intel-mid/device_libs/Makefile
index a7dbec4dce2758..3dbde04febdcca 100644
--- a/arch/x86/platform/intel-mid/device_libs/Makefile
+++ b/arch/x86/platform/intel-mid/device_libs/Makefile
@@ -26,5 +26,6 @@ obj-$(subst m,y,$(CONFIG_GPIO_PCA953X)) += platform_pcal9555a.o
 obj-$(subst m,y,$(CONFIG_GPIO_PCA953X)) += platform_tca6416.o
 # MISC Devices
 obj-$(subst m,y,$(CONFIG_KEYBOARD_GPIO)) += platform_gpio_keys.o
+obj-$(subst m,y,$(CONFIG_INTEL_MID_POWER_BUTTON)) += platform_mrfld_power_btn.o
 obj-$(subst m,y,$(CONFIG_RTC_DRV_CMOS)) += platform_mrfld_rtc.o
 obj-$(subst m,y,$(CONFIG_INTEL_MID_WATCHDOG)) += platform_mrfld_wdt.o
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_power_btn.c b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_power_btn.c
new file mode 100644
index 00000000000000..a6c3705a28ad49
--- /dev/null
+++ b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_power_btn.c
@@ -0,0 +1,82 @@
+/*
+ * Intel Merrifield power button support
+ *
+ * (C) Copyright 2017 Intel Corporation
+ *
+ * Author: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/platform_device.h>
+#include <linux/sfi.h>
+
+#include <asm/intel-mid.h>
+#include <asm/intel_scu_ipc.h>
+
+static struct resource mrfld_power_btn_resources[] = {
+	{
+		.flags		= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device mrfld_power_btn_dev = {
+	.name		= "msic_power_btn",
+	.id		= PLATFORM_DEVID_NONE,
+	.num_resources	= ARRAY_SIZE(mrfld_power_btn_resources),
+	.resource	= mrfld_power_btn_resources,
+};
+
+static int mrfld_power_btn_scu_status_change(struct notifier_block *nb,
+					     unsigned long code, void *data)
+{
+	if (code == SCU_DOWN) {
+		platform_device_unregister(&mrfld_power_btn_dev);
+		return 0;
+	}
+
+	return platform_device_register(&mrfld_power_btn_dev);
+}
+
+static struct notifier_block mrfld_power_btn_scu_notifier = {
+	.notifier_call	= mrfld_power_btn_scu_status_change,
+};
+
+static int __init register_mrfld_power_btn(void)
+{
+	if (intel_mid_identify_cpu() != INTEL_MID_CPU_CHIP_TANGIER)
+		return -ENODEV;
+
+	/*
+	 * We need to be sure that the SCU IPC is ready before
+	 * PMIC power button device can be registered:
+	 */
+	intel_scu_notifier_add(&mrfld_power_btn_scu_notifier);
+
+	return 0;
+}
+arch_initcall(register_mrfld_power_btn);
+
+static void __init *mrfld_power_btn_platform_data(void *info)
+{
+	struct resource *res = mrfld_power_btn_resources;
+	struct sfi_device_table_entry *pentry = info;
+
+	res->start = res->end = pentry->irq;
+	return NULL;
+}
+
+static const struct devs_id mrfld_power_btn_dev_id __initconst = {
+	.name			= "bcove_power_btn",
+	.type			= SFI_DEV_TYPE_IPC,
+	.delay			= 1,
+	.msic			= 1,
+	.get_platform_data	= &mrfld_power_btn_platform_data,
+};
+
+sfi_device(mrfld_power_btn_dev_id);

From 9aea151f282df2b82a44fd7058a239334437c266 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Sun, 26 Feb 2017 01:02:09 +0100
Subject: [PATCH 1214/1911] ARM: dts: add the AB8500 clocks to the device tree

This adds the AB8500 clocks to the device tree using the new
bindings from the clk subsystem, making audio work again.

Cc: Lee Jones <lee.jones@linaro.org>
Cc: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Olof Johansson <olof@lixom.net>
---
 arch/arm/boot/dts/ste-dbx5x0.dtsi  | 19 +++++++++++++++++++
 arch/arm/boot/dts/ste-href.dtsi    |  9 ---------
 arch/arm/boot/dts/ste-snowball.dts |  9 ---------
 3 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/arch/arm/boot/dts/ste-dbx5x0.dtsi b/arch/arm/boot/dts/ste-dbx5x0.dtsi
index 82d8c477129359..162e1eb5373d34 100644
--- a/arch/arm/boot/dts/ste-dbx5x0.dtsi
+++ b/arch/arm/boot/dts/ste-dbx5x0.dtsi
@@ -14,6 +14,7 @@
 #include <dt-bindings/mfd/dbx500-prcmu.h>
 #include <dt-bindings/arm/ux500_pm_domains.h>
 #include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/clock/ste-ab8500.h>
 #include "skeleton.dtsi"
 
 / {
@@ -603,6 +604,11 @@
 				interrupt-controller;
 				#interrupt-cells = <2>;
 
+				ab8500_clock: clock-controller {
+					compatible = "stericsson,ab8500-clk";
+					#clock-cells = <1>;
+				};
+
 				ab8500_gpio: ab8500-gpio {
 					compatible = "stericsson,ab8500-gpio";
 					gpio-controller;
@@ -686,6 +692,8 @@
 
 				ab8500-pwm {
 					compatible = "stericsson,ab8500-pwm";
+					clocks = <&ab8500_clock AB8500_SYSCLK_INT>;
+					clock-names = "intclk";
 				};
 
 				ab8500-debugfs {
@@ -700,6 +708,9 @@
 					V-AMIC2-supply = <&ab8500_ldo_anamic2_reg>;
 					V-DMIC-supply = <&ab8500_ldo_dmic_reg>;
 
+					clocks = <&ab8500_clock AB8500_SYSCLK_AUDIO>;
+					clock-names = "audioclk";
+
 					stericsson,earpeice-cmv = <950>; /* Units in mV. */
 				};
 
@@ -1095,6 +1106,14 @@
 			status = "disabled";
 		};
 
+		sound {
+			compatible = "stericsson,snd-soc-mop500";
+			stericsson,cpu-dai = <&msp1 &msp3>;
+			stericsson,audio-codec = <&codec>;
+			clocks = <&prcmu_clk PRCMU_SYSCLK>, <&ab8500_clock AB8500_SYSCLK_ULP>, <&ab8500_clock AB8500_SYSCLK_INT>;
+			clock-names = "sysclk", "ulpclk", "intclk";
+		};
+
 		msp0: msp@80123000 {
 			compatible = "stericsson,ux500-msp-i2s";
 			reg = <0x80123000 0x1000>;
diff --git a/arch/arm/boot/dts/ste-href.dtsi b/arch/arm/boot/dts/ste-href.dtsi
index f37f9e10713cc8..9e359e4f342e76 100644
--- a/arch/arm/boot/dts/ste-href.dtsi
+++ b/arch/arm/boot/dts/ste-href.dtsi
@@ -186,15 +186,6 @@
 			status = "okay";
 		};
 
-		sound {
-			compatible = "stericsson,snd-soc-mop500";
-
-			stericsson,cpu-dai = <&msp1 &msp3>;
-			stericsson,audio-codec = <&codec>;
-			clocks = <&prcmu_clk PRCMU_SYSCLK>;
-			clock-names = "sysclk";
-		};
-
 		msp0: msp@80123000 {
 			pinctrl-names = "default";
 			pinctrl-0 = <&msp0_default_mode>;
diff --git a/arch/arm/boot/dts/ste-snowball.dts b/arch/arm/boot/dts/ste-snowball.dts
index dd5514def60424..ade1d0d4e5f45c 100644
--- a/arch/arm/boot/dts/ste-snowball.dts
+++ b/arch/arm/boot/dts/ste-snowball.dts
@@ -159,15 +159,6 @@
 				     "", "", "", "", "", "", "", "";
 		};
 
-		sound {
-			compatible = "stericsson,snd-soc-mop500";
-
-			stericsson,cpu-dai = <&msp1 &msp3>;
-			stericsson,audio-codec = <&codec>;
-			clocks = <&prcmu_clk PRCMU_SYSCLK>;
-			clock-names = "sysclk";
-		};
-
 		msp0: msp@80123000 {
 			pinctrl-names = "default";
 			pinctrl-0 = <&msp0_default_mode>;

From 6e7408acd04d06c04981c0c0fb5a2462b16fae4f Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Sun, 12 Mar 2017 18:12:56 +0100
Subject: [PATCH 1215/1911] cpufreq: intel_pstate: Update
 pid_params.sample_rate_ns in pid_param_set()

Fix the debugfs interface for PID tuning to actually update
pid_params.sample_rate_ns on PID parameters updates, as changing
pid_params.sample_rate_ms via debugfs has no effect now.

Fixes: a4675fbc4a7a (cpufreq: intel_pstate: Replace timers with utilization update callbacks)
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/intel_pstate.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 3d37219a0dd7af..f9fe910f3b838c 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -989,6 +989,7 @@ static void intel_pstate_update_policies(void)
 static int pid_param_set(void *data, u64 val)
 {
 	*(u32 *)data = val;
+	pid_params.sample_rate_ns = pid_params.sample_rate_ms * NSEC_PER_MSEC;
 	intel_pstate_reset_all_pid();
 	return 0;
 }

From be3606ff739d1c1be36389f8737c577ad87e1f57 Mon Sep 17 00:00:00 2001
From: Andrey Ryabinin <aryabinin@virtuozzo.com>
Date: Mon, 13 Mar 2017 19:33:37 +0300
Subject: [PATCH 1216/1911] x86/kasan: Fix boot with KASAN=y and
 PROFILE_ANNOTATED_BRANCHES=y

The kernel doesn't boot with both PROFILE_ANNOTATED_BRANCHES=y and KASAN=y
options selected. With branch profiling enabled we end up calling
ftrace_likely_update() before kasan_early_init(). ftrace_likely_update() is
built with KASAN instrumentation, so calling it before kasan has been
initialized leads to crash.

Use DISABLE_BRANCH_PROFILING define to make sure that we don't call
ftrace_likely_update() from early code before kasan_early_init().

Fixes: ef7f0d6a6ca8 ("x86_64: add KASan support")
Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: kasan-dev@googlegroups.com
Cc: Alexander Potapenko <glider@google.com>
Cc: stable@vger.kernel.org
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: lkp@01.org
Cc: Dmitry Vyukov <dvyukov@google.com>
Link: http://lkml.kernel.org/r/20170313163337.1704-1-aryabinin@virtuozzo.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/head64.c    | 1 +
 arch/x86/mm/kasan_init_64.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 54a2372f5dbb1e..b5785c197e5347 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -4,6 +4,7 @@
  *  Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
  */
 
+#define DISABLE_BRANCH_PROFILING
 #include <linux/init.h>
 #include <linux/linkage.h>
 #include <linux/types.h>
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 8d63d7a104c3c4..4c90cfdc128b83 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -1,3 +1,4 @@
+#define DISABLE_BRANCH_PROFILING
 #define pr_fmt(fmt) "kasan: " fmt
 #include <linux/bootmem.h>
 #include <linux/kasan.h>

From 91864f5852f9996210fad400cf70fb85af091243 Mon Sep 17 00:00:00 2001
From: Andrey Vagin <avagin@openvz.org>
Date: Sun, 12 Mar 2017 21:36:18 -0700
Subject: [PATCH 1217/1911] net: use net->count to check whether a netns is
 alive or not
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The previous idea was to check whether a net namespace is in
net_exit_list or not. It doesn't work, because net->exit_list is used in
__register_pernet_operations and __unregister_pernet_operations where
all namespaces are added to a temporary list to make cleanup in a error
case, so list_empty(&net->exit_list) always returns false.

Reported-by: Mantas Mikulėnas <grawity@gmail.com>
Fixes: 002d8a1a6c11 ("net: skip genenerating uevents for network namespaces that are exiting")
Signed-off-by: Andrei Vagin <avagin@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/net-sysfs.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 3945821e9c1f8f..65ea0ff4017c16 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -953,7 +953,7 @@ net_rx_queue_update_kobjects(struct net_device *dev, int old_num, int new_num)
 	while (--i >= new_num) {
 		struct kobject *kobj = &dev->_rx[i].kobj;
 
-		if (!list_empty(&dev_net(dev)->exit_list))
+		if (!atomic_read(&dev_net(dev)->count))
 			kobj->uevent_suppress = 1;
 		if (dev->sysfs_rx_queue_group)
 			sysfs_remove_group(kobj, dev->sysfs_rx_queue_group);
@@ -1371,7 +1371,7 @@ netdev_queue_update_kobjects(struct net_device *dev, int old_num, int new_num)
 	while (--i >= new_num) {
 		struct netdev_queue *queue = dev->_tx + i;
 
-		if (!list_empty(&dev_net(dev)->exit_list))
+		if (!atomic_read(&dev_net(dev)->count))
 			queue->kobj.uevent_suppress = 1;
 #ifdef CONFIG_BQL
 		sysfs_remove_group(&queue->kobj, &dql_group);
@@ -1558,7 +1558,7 @@ void netdev_unregister_kobject(struct net_device *ndev)
 {
 	struct device *dev = &(ndev->dev);
 
-	if (!list_empty(&dev_net(ndev)->exit_list))
+	if (!atomic_read(&dev_net(ndev)->count))
 		dev_set_uevent_suppress(dev, 1);
 
 	kobject_get(&dev->kobj);

From c80498e36d4ef3e24599d363c622fbf22a1293cc Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Mon, 13 Mar 2017 16:24:03 +0100
Subject: [PATCH 1218/1911] vxlan: fix ovs support

The required changes in the function vxlan_dev_create() were missing
in commit 8bcdc4f3a20b.
The vxlan device is not registered anymore after this patch and the error
path causes an stack dump:
 WARNING: CPU: 3 PID: 1498 at net/core/dev.c:6713 rollback_registered_many+0x9d/0x3f0

Fixes: 8bcdc4f3a20b ("vxlan: add changelink support")
CC: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Acked-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c | 73 +++++++++++++++++++++++++--------------------
 1 file changed, 40 insertions(+), 33 deletions(-)

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index e375560cc74e5f..bdb6ae16d4a85b 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -2976,6 +2976,44 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
 	return 0;
 }
 
+static int __vxlan_dev_create(struct net *net, struct net_device *dev,
+			      struct vxlan_config *conf)
+{
+	struct vxlan_net *vn = net_generic(net, vxlan_net_id);
+	struct vxlan_dev *vxlan = netdev_priv(dev);
+	int err;
+
+	err = vxlan_dev_configure(net, dev, conf, false);
+	if (err)
+		return err;
+
+	dev->ethtool_ops = &vxlan_ethtool_ops;
+
+	/* create an fdb entry for a valid default destination */
+	if (!vxlan_addr_any(&vxlan->default_dst.remote_ip)) {
+		err = vxlan_fdb_create(vxlan, all_zeros_mac,
+				       &vxlan->default_dst.remote_ip,
+				       NUD_REACHABLE | NUD_PERMANENT,
+				       NLM_F_EXCL | NLM_F_CREATE,
+				       vxlan->cfg.dst_port,
+				       vxlan->default_dst.remote_vni,
+				       vxlan->default_dst.remote_vni,
+				       vxlan->default_dst.remote_ifindex,
+				       NTF_SELF);
+		if (err)
+			return err;
+	}
+
+	err = register_netdevice(dev);
+	if (err) {
+		vxlan_fdb_delete_default(vxlan, vxlan->default_dst.remote_vni);
+		return err;
+	}
+
+	list_add(&vxlan->next, &vn->vxlan_list);
+	return 0;
+}
+
 static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
 			 struct net_device *dev, struct vxlan_config *conf,
 			 bool changelink)
@@ -3172,8 +3210,6 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
 static int vxlan_newlink(struct net *src_net, struct net_device *dev,
 			 struct nlattr *tb[], struct nlattr *data[])
 {
-	struct vxlan_net *vn = net_generic(src_net, vxlan_net_id);
-	struct vxlan_dev *vxlan = netdev_priv(dev);
 	struct vxlan_config conf;
 	int err;
 
@@ -3181,36 +3217,7 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev,
 	if (err)
 		return err;
 
-	err = vxlan_dev_configure(src_net, dev, &conf, false);
-	if (err)
-		return err;
-
-	dev->ethtool_ops = &vxlan_ethtool_ops;
-
-	/* create an fdb entry for a valid default destination */
-	if (!vxlan_addr_any(&vxlan->default_dst.remote_ip)) {
-		err = vxlan_fdb_create(vxlan, all_zeros_mac,
-				       &vxlan->default_dst.remote_ip,
-				       NUD_REACHABLE | NUD_PERMANENT,
-				       NLM_F_EXCL | NLM_F_CREATE,
-				       vxlan->cfg.dst_port,
-				       vxlan->default_dst.remote_vni,
-				       vxlan->default_dst.remote_vni,
-				       vxlan->default_dst.remote_ifindex,
-				       NTF_SELF);
-		if (err)
-			return err;
-	}
-
-	err = register_netdevice(dev);
-	if (err) {
-		vxlan_fdb_delete_default(vxlan, vxlan->default_dst.remote_vni);
-		return err;
-	}
-
-	list_add(&vxlan->next, &vn->vxlan_list);
-
-	return 0;
+	return __vxlan_dev_create(src_net, dev, &conf);
 }
 
 static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[],
@@ -3440,7 +3447,7 @@ struct net_device *vxlan_dev_create(struct net *net, const char *name,
 	if (IS_ERR(dev))
 		return dev;
 
-	err = vxlan_dev_configure(net, dev, conf, false);
+	err = __vxlan_dev_create(net, dev, conf);
 	if (err < 0) {
 		free_netdev(dev);
 		return ERR_PTR(err);

From 09498087f922bb623b66db9e89c6fd11a3799867 Mon Sep 17 00:00:00 2001
From: Heiko Stuebner <heiko@sntech.de>
Date: Thu, 9 Mar 2017 07:41:16 +0100
Subject: [PATCH 1219/1911] serial: 8250_dw: Honor clk_round_rate errors in
 dw8250_set_termios

clk_round_rate returns a signed long and may possibly return errors
in it, for example if there is no possible rate.

Till now dw8250_set_termios ignored any error, the signednes and would
just use the value as input to clk_set_rate. This of course falls apart
if there is an actual error, so check for errors and only try to set
a rate if the value is actually valid.

This turned up on some Rockchip platforms after commit
6a171b299379 ("serial: 8250_dw: Allow hardware flow control to be used")
enabled set_termios callback in all cases, not only ACPI.

Fixes: 6a171b299379 ("serial: 8250_dw: Allow hardware flow control to be used")
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_dw.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c
index 6ee55a2d47bb42..223ac234ddb28a 100644
--- a/drivers/tty/serial/8250/8250_dw.c
+++ b/drivers/tty/serial/8250/8250_dw.c
@@ -257,7 +257,7 @@ static void dw8250_set_termios(struct uart_port *p, struct ktermios *termios,
 {
 	unsigned int baud = tty_termios_baud_rate(termios);
 	struct dw8250_data *d = p->private_data;
-	unsigned int rate;
+	long rate;
 	int ret;
 
 	if (IS_ERR(d->clk) || !old)
@@ -265,7 +265,10 @@ static void dw8250_set_termios(struct uart_port *p, struct ktermios *termios,
 
 	clk_disable_unprepare(d->clk);
 	rate = clk_round_rate(d->clk, baud * 16);
-	ret = clk_set_rate(d->clk, rate);
+	if (rate < 0)
+		ret = rate;
+	else
+		ret = clk_set_rate(d->clk, rate);
 	clk_prepare_enable(d->clk);
 
 	if (!ret)

From b15bfbe6427712d1b992bf806a6df9c05002a0a4 Mon Sep 17 00:00:00 2001
From: James Hogan <james.hogan@imgtec.com>
Date: Sat, 4 Mar 2017 13:09:58 +0000
Subject: [PATCH 1220/1911] serial: 8250_dw: Fix breakage when HAVE_CLK=n

Commit 6a171b299379 ("serial: 8250_dw: Allow hardware flow control to be
used") recently broke the 8250_dw driver on platforms which don't select
HAVE_CLK, as dw8250_set_termios() gets confused by the behaviour of the
fallback HAVE_CLK=n clock API in linux/clk.h which pretends everything
is fine but returns (valid) NULL clocks and 0 HZ clock rates.

That 0 rate is written into the uartclk resulting in a crash at boot,
e.g. on Cavium Octeon III based UTM-8 we get something like this:

1180000000800.serial: ttyS0 at MMIO 0x1180000000800 (irq = 41, base_baud = 25000000) is a OCTEON
------------[ cut here ]------------
WARNING: CPU: 2 PID: 1 at drivers/tty/serial/serial_core.c:441 uart_get_baud_rate+0xfc/0x1f0
...
Call Trace:
...
[<ffffffff8149c2e4>] uart_get_baud_rate+0xfc/0x1f0
[<ffffffff814a5098>] serial8250_do_set_termios+0xb0/0x440
[<ffffffff8149c710>] uart_set_options+0xe8/0x190
[<ffffffff814a6cdc>] serial8250_console_setup+0x84/0x158
[<ffffffff814a11ec>] univ8250_console_setup+0x54/0x70
[<ffffffff811901a0>] register_console+0x1c8/0x418
[<ffffffff8149f004>] uart_add_one_port+0x434/0x4b0
[<ffffffff814a1af8>] serial8250_register_8250_port+0x2d8/0x440
[<ffffffff814aa620>] dw8250_probe+0x388/0x5e8
...

The clock API is defined such that NULL is a valid clock handle so it
wouldn't be right to check explicitly for NULL. Instead treat a
clk_round_rate() return value of 0 as an error which prevents uartclk
being overwritten.

Fixes: 6a171b299379 ("serial: 8250_dw: Allow hardware flow control to be used")
Signed-off-by: James Hogan <james.hogan@imgtec.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: David Daney <david.daney@cavium.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: linux-serial@vger.kernel.org
Cc: linux-clk@vger.kernel.org
Cc: linux-mips@linux-mips.org
Cc: bcm-kernel-feedback-list@broadcom.com
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Reviewed-by: Jason Uy <jason.uy@broadcom.com>
Reviewed-by: Heiko Stuebner <heiko@sntech.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_dw.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c
index 223ac234ddb28a..e65808c482f184 100644
--- a/drivers/tty/serial/8250/8250_dw.c
+++ b/drivers/tty/serial/8250/8250_dw.c
@@ -267,6 +267,8 @@ static void dw8250_set_termios(struct uart_port *p, struct ktermios *termios,
 	rate = clk_round_rate(d->clk, baud * 16);
 	if (rate < 0)
 		ret = rate;
+	else if (rate == 0)
+		ret = -ENOENT;
 	else
 		ret = clk_set_rate(d->clk, rate);
 	clk_prepare_enable(d->clk);

From 542ed784671d4678406c77ed6dd01593a0cdbea1 Mon Sep 17 00:00:00 2001
From: Timur Tabi <timur@codeaurora.org>
Date: Tue, 28 Feb 2017 14:30:33 -0600
Subject: [PATCH 1221/1911] tty: acpi/spcr: QDF2400 E44 checks for wrong OEM
 revision

For Qualcomm Technologies QDF2400 SOCs that are affected by erratum E44,
the ACPI oem_revision field is actually set to 1, not 0.

Fixes: d8a4995bcea1 ("tty: pl011: Work around QDF2400 E44 stuck BUSY bit")

Tested-by: Manoj Iyer <manoj.iyer@canonical.com>
Signed-off-by: Timur Tabi <timur@codeaurora.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/acpi/spcr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/acpi/spcr.c b/drivers/acpi/spcr.c
index 01c94669a2b0ad..3afa8c1fa12702 100644
--- a/drivers/acpi/spcr.c
+++ b/drivers/acpi/spcr.c
@@ -30,7 +30,7 @@ static bool qdf2400_erratum_44_present(struct acpi_table_header *h)
 		return true;
 
 	if (!memcmp(h->oem_table_id, "QDF2400 ", ACPI_OEM_TABLE_ID_SIZE) &&
-			h->oem_revision == 0)
+			h->oem_revision == 1)
 		return true;
 
 	return false;

From 3f8ed54aee491bbb83656592c2d0ad7b78d045ca Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Mon, 13 Mar 2017 18:30:12 -0700
Subject: [PATCH 1222/1911] cpufreq: intel_pstate: Correct frequency setting in
 the HWP mode

In the functions intel_pstate_hwp_set(), min/max range from HWP capability
MSR along with max_perf_pct and min_perf_pct, is used to set the HWP
request MSR. In some cases this doesn't result in the correct HWP max/min
in HWP request.

For example: In the following case:

HWP capabilities from MSR 0x771
0x70a1220

Here cpufreq min/max frequencies from above MSR dump are 700MHz and 3.2GHz
respectively.

This will result in
hwp_min = 0x07
hwp_max = 0x20

To limit max frequency to 2GHz:

perf_limits->max_perf_pct = 63 (2GHz as a percent of 3.2GHz rounded up)

With the current calculation:
adj_range = max_perf_pct * range / 100;
adj_range = 63 * (32 - 7) / 100
adj_range = 15

max = hw_min + adj_range;
max = 7 + 15 = 22

This will result in HWP request of 0x160f, which will result in a
frequency cap of 2.2GHz not 2GHz.

The problem with the above calculation is that hwp_min of 7 is treated
as 0% in the range. But max_perf_pct is calculated with respect to minimum
as 0 and max as 3.2GHz or hwp_max, so adding hwp_min to it will result in
more than the desired.

Since the min_perf_pct and max_perf_pct is already a percent of max
frequency or hwp_max, this min/max HWP request value can be calculated
directly applying these percentage to hwp_max.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index f9fe910f3b838c..ee12641ee010c9 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -845,7 +845,7 @@ static struct freq_attr *hwp_cpufreq_attrs[] = {
 
 static void intel_pstate_hwp_set(struct cpufreq_policy *policy)
 {
-	int min, hw_min, max, hw_max, cpu, range, adj_range;
+	int min, hw_min, max, hw_max, cpu;
 	struct perf_limits *perf_limits = limits;
 	u64 value, cap;
 
@@ -863,20 +863,17 @@ static void intel_pstate_hwp_set(struct cpufreq_policy *policy)
 			hw_max = HWP_GUARANTEED_PERF(cap);
 		else
 			hw_max = HWP_HIGHEST_PERF(cap);
-		range = hw_max - hw_min;
 
 		max_perf_pct = perf_limits->max_perf_pct;
 		min_perf_pct = perf_limits->min_perf_pct;
+		min = hw_max * min_perf_pct / 100;
 
 		rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value);
-		adj_range = min_perf_pct * range / 100;
-		min = hw_min + adj_range;
+
 		value &= ~HWP_MIN_PERF(~0L);
 		value |= HWP_MIN_PERF(min);
 
-		adj_range = max_perf_pct * range / 100;
-		max = hw_min + adj_range;
-
+		max = hw_max * max_perf_pct / 100;
 		value &= ~HWP_MAX_PERF(~0L);
 		value |= HWP_MAX_PERF(max);
 

From 64ff64b90e62c860772fd0be50b7cfcef1d8a9b2 Mon Sep 17 00:00:00 2001
From: Shivasharan S <shivasharan.srikanteshwara@broadcom.com>
Date: Fri, 10 Mar 2017 03:22:12 -0800
Subject: [PATCH 1223/1911] scsi: megaraid_sas: enable intx only if msix
 request fails

Without this fix, driver will enable INTx Interrupt pin even though
MSI-x vectors are enabled. See below lspci output. DisINTx is unset for
MSIx setup.

lspci -s 85:00.0 -vvv |grep INT |grep Control
Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr-
Stepping- SERR+ FastB2B- DisINTx-

After applying this fix, driver will enable INTx Interrupt pin only if
Legacy interrupt method is required.  See below lspci output. DisINTx is
set for MSIx setup.  lspci -s 85:00.0 -vvv |grep INT |grep Control
Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr-
Stepping- SERR+ FastB2B- DisINTx+

Signed-off-by: Kashyap Desai <kashyap.desai@broadcom.com>
Reviewed-by: Tomas Henzl <thenzl@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/megaraid/megaraid_sas_base.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c
index 7ac9a9ee9bd473..016ffcebaf66bd 100644
--- a/drivers/scsi/megaraid/megaraid_sas_base.c
+++ b/drivers/scsi/megaraid/megaraid_sas_base.c
@@ -5034,10 +5034,12 @@ megasas_setup_irqs_msix(struct megasas_instance *instance, u8 is_probe)
 					 &instance->irq_context[j]);
 			/* Retry irq register for IO_APIC*/
 			instance->msix_vectors = 0;
-			if (is_probe)
+			if (is_probe) {
+				pci_free_irq_vectors(instance->pdev);
 				return megasas_setup_irqs_ioapic(instance);
-			else
+			} else {
 				return -1;
+			}
 		}
 	}
 	return 0;
@@ -5277,9 +5279,11 @@ static int megasas_init_fw(struct megasas_instance *instance)
 			MPI2_REPLY_POST_HOST_INDEX_OFFSET);
 	}
 
-	i = pci_alloc_irq_vectors(instance->pdev, 1, 1, PCI_IRQ_LEGACY);
-	if (i < 0)
-		goto fail_setup_irqs;
+	if (!instance->msix_vectors) {
+		i = pci_alloc_irq_vectors(instance->pdev, 1, 1, PCI_IRQ_LEGACY);
+		if (i < 0)
+			goto fail_setup_irqs;
+	}
 
 	dev_info(&instance->pdev->dev,
 		"firmware supports msix\t: (%d)", fw_msix_count);

From 49524b3c6e12375627ddd870613fcc6b24909898 Mon Sep 17 00:00:00 2001
From: Shivasharan S <shivasharan.srikanteshwara@broadcom.com>
Date: Fri, 10 Mar 2017 03:22:13 -0800
Subject: [PATCH 1224/1911] scsi: megaraid_sas: add correct return type check
 for ldio hint logic for raid1

Return value check of atomic_dec_if_positive is required as it returns
old value minus one.  Without this fix, driver will send small ios to
firmware path and that will be a performance issue.

Not critical, but good to have r1_ldio_hint as default value in sdev
private.

Signed-off-by: Kashyap Desai <kashyap.desai@broadcom.com>
Signed-off-by: Shivasharan S <shivasharan.srikanteshwara@broadcom.com>
Reviewed-by: Tomas Henzl <thenzl@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/megaraid/megaraid_sas_base.c   | 3 +++
 drivers/scsi/megaraid/megaraid_sas_fusion.c | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c
index 016ffcebaf66bd..0016f12cc563e7 100644
--- a/drivers/scsi/megaraid/megaraid_sas_base.c
+++ b/drivers/scsi/megaraid/megaraid_sas_base.c
@@ -1963,6 +1963,9 @@ static int megasas_slave_alloc(struct scsi_device *sdev)
 	if (!mr_device_priv_data)
 		return -ENOMEM;
 	sdev->hostdata = mr_device_priv_data;
+
+	atomic_set(&mr_device_priv_data->r1_ldio_hint,
+		   instance->r1_ldio_hint_default);
 	return 0;
 }
 
diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c
index 29650ba669da58..ebd746e2d97cb0 100644
--- a/drivers/scsi/megaraid/megaraid_sas_fusion.c
+++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c
@@ -2338,7 +2338,7 @@ megasas_build_ldio_fusion(struct megasas_instance *instance,
 				fp_possible = false;
 				atomic_dec(&instance->fw_outstanding);
 			} else if ((scsi_buff_len > MR_LARGE_IO_MIN_SIZE) ||
-				   atomic_dec_if_positive(&mrdev_priv->r1_ldio_hint)) {
+				   (atomic_dec_if_positive(&mrdev_priv->r1_ldio_hint) > 0)) {
 				fp_possible = false;
 				atomic_dec(&instance->fw_outstanding);
 				if (scsi_buff_len > MR_LARGE_IO_MIN_SIZE)

From 874d025da667d19b728141437ccbefe9dbaf9e7b Mon Sep 17 00:00:00 2001
From: Shivasharan S <shivasharan.srikanteshwara@broadcom.com>
Date: Fri, 10 Mar 2017 03:22:14 -0800
Subject: [PATCH 1225/1911] scsi: megaraid_sas: raid6 also require cpuSel check
 same as raid5

Without this fix, raid6 performance will not be optimal.

Signed-off-by: Kashyap Desai <kashyap.desai@broadcom.com>
Signed-off-by: Shivasharan S <shivasharan.srikanteshwara@broadcom.com>
Reviewed-by: Tomas Henzl <thenzl@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/megaraid/megaraid_sas_fusion.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c
index ebd746e2d97cb0..f990ab4d45e1bf 100644
--- a/drivers/scsi/megaraid/megaraid_sas_fusion.c
+++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c
@@ -2159,7 +2159,7 @@ megasas_set_raidflag_cpu_affinity(union RAID_CONTEXT_UNION *praid_context,
 				cpu_sel = MR_RAID_CTX_CPUSEL_1;
 
 			if (is_stream_detected(rctx_g35) &&
-			    (raid->level == 5) &&
+			    ((raid->level == 5) || (raid->level == 6)) &&
 			    (raid->writeMode == MR_RL_WRITE_THROUGH_MODE) &&
 			    (cpu_sel == MR_RAID_CTX_CPUSEL_FCFS))
 				cpu_sel = MR_RAID_CTX_CPUSEL_0;

From 22487b66594f18f2a7c211f3ab8bb02dec74d37b Mon Sep 17 00:00:00 2001
From: Shivasharan S <shivasharan.srikanteshwara@broadcom.com>
Date: Fri, 10 Mar 2017 03:22:15 -0800
Subject: [PATCH 1226/1911] scsi: megaraid_sas: Driver version upgrade

Signed-off-by: Kashyap Desai <kashyap.desai@broadcom.com>
Signed-off-by: Shivasharan S <shivasharan.srikanteshwara@broadcom.com>
Reviewed-by: Tomas Henzl <thenzl@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/megaraid/megaraid_sas.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/megaraid/megaraid_sas.h b/drivers/scsi/megaraid/megaraid_sas.h
index e7e5974e1a2c43..2b209bbb4c9165 100644
--- a/drivers/scsi/megaraid/megaraid_sas.h
+++ b/drivers/scsi/megaraid/megaraid_sas.h
@@ -35,8 +35,8 @@
 /*
  * MegaRAID SAS Driver meta data
  */
-#define MEGASAS_VERSION				"07.701.16.00-rc1"
-#define MEGASAS_RELDATE				"February 2, 2017"
+#define MEGASAS_VERSION				"07.701.17.00-rc1"
+#define MEGASAS_RELDATE				"March 2, 2017"
 
 /*
  * Device IDs

From 23f963e91fd81f44f6b316b1c24db563354c6be8 Mon Sep 17 00:00:00 2001
From: Matthias Kaehlcke <mka@chromium.org>
Date: Mon, 13 Mar 2017 14:30:29 -0700
Subject: [PATCH 1227/1911] dmaengine: Fix array index out of bounds warning in
 __get_unmap_pool()

This fixes the following warning when building with clang and
CONFIG_DMA_ENGINE_RAID=n :

drivers/dma/dmaengine.c:1102:11: error: array index 2 is past the end of the array (which contains 1 element) [-Werror,-Warray-bounds]
                return &unmap_pool[2];
                        ^          ~
drivers/dma/dmaengine.c:1083:1: note: array 'unmap_pool' declared here
static struct dmaengine_unmap_pool unmap_pool[] = {
^
drivers/dma/dmaengine.c:1104:11: error: array index 3 is past the end of the array (which contains 1 element) [-Werror,-Warray-bounds]
                return &unmap_pool[3];
                        ^          ~
drivers/dma/dmaengine.c:1083:1: note: array 'unmap_pool' declared here
static struct dmaengine_unmap_pool unmap_pool[] = {

Signed-off-by: Matthias Kaehlcke <mka@chromium.org>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/dmaengine.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 24e0221fd66d1f..d9118ec2302541 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -1108,12 +1108,14 @@ static struct dmaengine_unmap_pool *__get_unmap_pool(int nr)
 	switch (order) {
 	case 0 ... 1:
 		return &unmap_pool[0];
+#if IS_ENABLED(CONFIG_DMA_ENGINE_RAID)
 	case 2 ... 4:
 		return &unmap_pool[1];
 	case 5 ... 7:
 		return &unmap_pool[2];
 	case 8:
 		return &unmap_pool[3];
+#endif
 	default:
 		BUG();
 		return NULL;

From 02bb56ddc6711639c549d81c7b9f6d845da243a9 Mon Sep 17 00:00:00 2001
From: Zhao Qiang <qiang.zhao@nxp.com>
Date: Tue, 14 Mar 2017 09:38:33 +0800
Subject: [PATCH 1228/1911] ucc/hdlc: fix two little issue

1. modify bd_status from u32 to u16 in function hdlc_rx_done,
because bd_status register is 16bits
2. write bd_length register before writing bd_status register

Signed-off-by: Zhao Qiang <qiang.zhao@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/fsl_ucc_hdlc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c
index a5045b5279d70a..6742ae60566045 100644
--- a/drivers/net/wan/fsl_ucc_hdlc.c
+++ b/drivers/net/wan/fsl_ucc_hdlc.c
@@ -381,8 +381,8 @@ static netdev_tx_t ucc_hdlc_tx(struct sk_buff *skb, struct net_device *dev)
 	/* set bd status and length */
 	bd_status = (bd_status & T_W_S) | T_R_S | T_I_S | T_L_S | T_TC_S;
 
-	iowrite16be(bd_status, &bd->status);
 	iowrite16be(skb->len, &bd->length);
+	iowrite16be(bd_status, &bd->status);
 
 	/* Move to next BD in the ring */
 	if (!(bd_status & T_W_S))
@@ -457,7 +457,7 @@ static int hdlc_rx_done(struct ucc_hdlc_private *priv, int rx_work_limit)
 	struct sk_buff *skb;
 	hdlc_device *hdlc = dev_to_hdlc(dev);
 	struct qe_bd *bd;
-	u32 bd_status;
+	u16 bd_status;
 	u16 length, howmany = 0;
 	u8 *bdbuffer;
 	int i;

From 45caeaa5ac0b4b11784ac6f932c0ad4c6b67cda0 Mon Sep 17 00:00:00 2001
From: Jon Maxwell <jmaxwell37@gmail.com>
Date: Fri, 10 Mar 2017 16:40:33 +1100
Subject: [PATCH 1229/1911] dccp/tcp: fix routing redirect race
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As Eric Dumazet pointed out this also needs to be fixed in IPv6.
v2: Contains the IPv6 tcp/Ipv6 dccp patches as well.

We have seen a few incidents lately where a dst_enty has been freed
with a dangling TCP socket reference (sk->sk_dst_cache) pointing to that
dst_entry. If the conditions/timings are right a crash then ensues when the
freed dst_entry is referenced later on. A Common crashing back trace is:

 #8 [] page_fault at ffffffff8163e648
    [exception RIP: __tcp_ack_snd_check+74]
.
.
 #9 [] tcp_rcv_established at ffffffff81580b64
#10 [] tcp_v4_do_rcv at ffffffff8158b54a
#11 [] tcp_v4_rcv at ffffffff8158cd02
#12 [] ip_local_deliver_finish at ffffffff815668f4
#13 [] ip_local_deliver at ffffffff81566bd9
#14 [] ip_rcv_finish at ffffffff8156656d
#15 [] ip_rcv at ffffffff81566f06
#16 [] __netif_receive_skb_core at ffffffff8152b3a2
#17 [] __netif_receive_skb at ffffffff8152b608
#18 [] netif_receive_skb at ffffffff8152b690
#19 [] vmxnet3_rq_rx_complete at ffffffffa015eeaf [vmxnet3]
#20 [] vmxnet3_poll_rx_only at ffffffffa015f32a [vmxnet3]
#21 [] net_rx_action at ffffffff8152bac2
#22 [] __do_softirq at ffffffff81084b4f
#23 [] call_softirq at ffffffff8164845c
#24 [] do_softirq at ffffffff81016fc5
#25 [] irq_exit at ffffffff81084ee5
#26 [] do_IRQ at ffffffff81648ff8

Of course it may happen with other NIC drivers as well.

It's found the freed dst_entry here:

 224 static bool tcp_in_quickack_mode(struct sock *sk)↩
 225 {↩
 226 ▹       const struct inet_connection_sock *icsk = inet_csk(sk);↩
 227 ▹       const struct dst_entry *dst = __sk_dst_get(sk);↩
 228 ↩
 229 ▹       return (dst && dst_metric(dst, RTAX_QUICKACK)) ||↩
 230 ▹       ▹       (icsk->icsk_ack.quick && !icsk->icsk_ack.pingpong);↩
 231 }↩

But there are other backtraces attributed to the same freed dst_entry in
netfilter code as well.

All the vmcores showed 2 significant clues:

- Remote hosts behind the default gateway had always been redirected to a
different gateway. A rtable/dst_entry will be added for that host. Making
more dst_entrys with lower reference counts. Making this more probable.

- All vmcores showed a postitive LockDroppedIcmps value, e.g:

LockDroppedIcmps                  267

A closer look at the tcp_v4_err() handler revealed that do_redirect() will run
regardless of whether user space has the socket locked. This can result in a
race condition where the same dst_entry cached in sk->sk_dst_entry can be
decremented twice for the same socket via:

do_redirect()->__sk_dst_check()-> dst_release().

Which leads to the dst_entry being prematurely freed with another socket
pointing to it via sk->sk_dst_cache and a subsequent crash.

To fix this skip do_redirect() if usespace has the socket locked. Instead let
the redirect take place later when user space does not have the socket
locked.

The dccp/IPv6 code is very similar in this respect, so fixing it there too.

As Eric Garver pointed out the following commit now invalidates routes. Which
can set the dst->obsolete flag so that ipv4_dst_check() returns null and
triggers the dst_release().

Fixes: ceb3320610d6 ("ipv4: Kill routes during PMTU/redirect updates.")
Cc: Eric Garver <egarver@redhat.com>
Cc: Hannes Sowa <hsowa@redhat.com>
Signed-off-by: Jon Maxwell <jmaxwell37@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ipv4.c     | 3 ++-
 net/dccp/ipv6.c     | 8 +++++---
 net/ipv4/tcp_ipv4.c | 3 ++-
 net/ipv6/tcp_ipv6.c | 8 +++++---
 4 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 409d0cfd344748..b99168b0fabf2a 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -289,7 +289,8 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
 
 	switch (type) {
 	case ICMP_REDIRECT:
-		dccp_do_redirect(skb, sk);
+		if (!sock_owned_by_user(sk))
+			dccp_do_redirect(skb, sk);
 		goto out;
 	case ICMP_SOURCE_QUENCH:
 		/* Just silently ignore these. */
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 233b57367758c6..d9b6a4e403e701 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -122,10 +122,12 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	np = inet6_sk(sk);
 
 	if (type == NDISC_REDIRECT) {
-		struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
+		if (!sock_owned_by_user(sk)) {
+			struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
 
-		if (dst)
-			dst->ops->redirect(dst, sk, skb);
+			if (dst)
+				dst->ops->redirect(dst, sk, skb);
+		}
 		goto out;
 	}
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 8f3ec1365497a5..575e19dcc01763 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -431,7 +431,8 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 
 	switch (type) {
 	case ICMP_REDIRECT:
-		do_redirect(icmp_skb, sk);
+		if (!sock_owned_by_user(sk))
+			do_redirect(icmp_skb, sk);
 		goto out;
 	case ICMP_SOURCE_QUENCH:
 		/* Just silently ignore these. */
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 60a5295a7de6e8..49fa2e8c3fa921 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -391,10 +391,12 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	np = inet6_sk(sk);
 
 	if (type == NDISC_REDIRECT) {
-		struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
+		if (!sock_owned_by_user(sk)) {
+			struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
 
-		if (dst)
-			dst->ops->redirect(dst, sk, skb);
+			if (dst)
+				dst->ops->redirect(dst, sk, skb);
+		}
 		goto out;
 	}
 

From b20e2d54789c6acbf6bd0efdbec2cf5fa4d90ef1 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Mon, 13 Mar 2017 00:00:26 +0100
Subject: [PATCH 1230/1911] tun: fix premature POLLOUT notification on tun
 devices
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

aszlig observed failing ssh tunnels (-w) during initialization since
commit cc9da6cc4f56e0 ("ipv6: addrconf: use stable address generator for
ARPHRD_NONE"). We already had reports that the mentioned commit breaks
Juniper VPN connections. I can't clearly say that the Juniper VPN client
has the same problem, but it is worth a try to hint to this patch.

Because of the early generation of link local addresses, the kernel now
can start asking for routers on the local subnet much earlier than usual.
Those router solicitation packets arrive inside the ssh channels and
should be transmitted to the tun fd before the configuration scripts
might have upped the interface and made it ready for transmission.

ssh polls on the interface and receives back a POLL_OUT. It tries to send
the earily router solicitation packet to the tun interface.  Unfortunately
it hasn't been up'ed yet by config scripts, thus failing with -EIO. ssh
doesn't retry again and considers the tun interface broken forever.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=121131
Fixes: cc9da6cc4f56 ("ipv6: addrconf: use stable address generator for ARPHRD_NONE")
Cc: Bjørn Mork <bjorn@mork.no>
Reported-by: Valdis Kletnieks <Valdis.Kletnieks@vt.edu>
Cc: Valdis Kletnieks <Valdis.Kletnieks@vt.edu>
Reported-by: Jonas Lippuner <jonas@lippuner.ca>
Cc: Jonas Lippuner <jonas@lippuner.ca>
Reported-by: aszlig <aszlig@redmoonstudios.org>
Cc: aszlig <aszlig@redmoonstudios.org>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tun.c | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index f58b7d850114b0..34cc3c590aa5c5 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -822,7 +822,18 @@ static void tun_net_uninit(struct net_device *dev)
 /* Net device open. */
 static int tun_net_open(struct net_device *dev)
 {
+	struct tun_struct *tun = netdev_priv(dev);
+	int i;
+
 	netif_tx_start_all_queues(dev);
+
+	for (i = 0; i < tun->numqueues; i++) {
+		struct tun_file *tfile;
+
+		tfile = rtnl_dereference(tun->tfiles[i]);
+		tfile->socket.sk->sk_write_space(tfile->socket.sk);
+	}
+
 	return 0;
 }
 
@@ -1103,9 +1114,10 @@ static unsigned int tun_chr_poll(struct file *file, poll_table *wait)
 	if (!skb_array_empty(&tfile->tx_array))
 		mask |= POLLIN | POLLRDNORM;
 
-	if (sock_writeable(sk) ||
-	    (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags) &&
-	     sock_writeable(sk)))
+	if (tun->dev->flags & IFF_UP &&
+	    (sock_writeable(sk) ||
+	     (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags) &&
+	      sock_writeable(sk))))
 		mask |= POLLOUT | POLLWRNORM;
 
 	if (tun->dev->reg_state != NETREG_REGISTERED)

From 72ef9c4125c7b257e3a714d62d778ab46583d6a3 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Mon, 13 Mar 2017 00:01:30 +0100
Subject: [PATCH 1231/1911] dccp: fix memory leak during tear-down of
 unsuccessful connection request

This patch fixes a memory leak, which happens if the connection request
is not fulfilled between parsing the DCCP options and handling the SYN
(because e.g. the backlog is full), because we forgot to free the
list of ack vectors.

Reported-by: Jianwen Ji <jiji@redhat.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/ccid2.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index f053198e730c48..5e3a7302f7747e 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -749,6 +749,7 @@ static void ccid2_hc_tx_exit(struct sock *sk)
 	for (i = 0; i < hc->tx_seqbufc; i++)
 		kfree(hc->tx_seqbuf[i]);
 	hc->tx_seqbufc = 0;
+	dccp_ackvec_parsed_cleanup(&hc->tx_av_chunks);
 }
 
 static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)

From b0addd3fa6bcd119be9428996d5d4522479ab240 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Mon, 13 Mar 2017 13:47:48 +0100
Subject: [PATCH 1232/1911] USB: idmouse: fix NULL-deref at probe

Make sure to check the number of endpoints to avoid dereferencing a
NULL-pointer should a malicious device lack endpoints.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/misc/idmouse.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/usb/misc/idmouse.c b/drivers/usb/misc/idmouse.c
index 8b9fd7534f698b..502bfe30a077a2 100644
--- a/drivers/usb/misc/idmouse.c
+++ b/drivers/usb/misc/idmouse.c
@@ -347,6 +347,9 @@ static int idmouse_probe(struct usb_interface *interface,
 	if (iface_desc->desc.bInterfaceClass != 0x0A)
 		return -ENODEV;
 
+	if (iface_desc->desc.bNumEndpoints < 1)
+		return -ENODEV;
+
 	/* allocate memory for our device state and initialize it */
 	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
 	if (dev == NULL)

From 1dc56c52d2484be09c7398a5207d6b11a4256be9 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Mon, 13 Mar 2017 13:47:49 +0100
Subject: [PATCH 1233/1911] USB: lvtest: fix NULL-deref at probe

Make sure to check the number of endpoints to avoid dereferencing a
NULL-pointer should the probed device lack endpoints.

Note that this driver does not bind to any devices by default.

Fixes: ce21bfe603b3 ("USB: Add LVS Test device driver")
Cc: stable <stable@vger.kernel.org>     # 3.17
Cc: Pratyush Anand <pratyush.anand@gmail.com>
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/misc/lvstest.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/usb/misc/lvstest.c b/drivers/usb/misc/lvstest.c
index 77176511658f33..d3d12475326663 100644
--- a/drivers/usb/misc/lvstest.c
+++ b/drivers/usb/misc/lvstest.c
@@ -366,6 +366,10 @@ static int lvs_rh_probe(struct usb_interface *intf,
 
 	hdev = interface_to_usbdev(intf);
 	desc = intf->cur_altsetting;
+
+	if (desc->desc.bNumEndpoints < 1)
+		return -ENODEV;
+
 	endpoint = &desc->endpoint[0].desc;
 
 	/* valid only for SS root hub */

From f259ca3eed6e4b79ac3d5c5c9fb259fb46e86217 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Mon, 13 Mar 2017 13:47:50 +0100
Subject: [PATCH 1234/1911] USB: uss720: fix NULL-deref at probe

Make sure to check the number of endpoints to avoid dereferencing a
NULL-pointer or accessing memory beyond the endpoint array should a
malicious device lack the expected endpoints.

Note that the endpoint access that causes the NULL-deref is currently
only used for debugging purposes during probe so the oops only happens
when dynamic debugging is enabled. This means the driver could be
rewritten to continue to accept device with only two endpoints, should
such devices exist.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/misc/uss720.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/usb/misc/uss720.c b/drivers/usb/misc/uss720.c
index e45a3a680db8f6..07014cad6dbe35 100644
--- a/drivers/usb/misc/uss720.c
+++ b/drivers/usb/misc/uss720.c
@@ -709,6 +709,11 @@ static int uss720_probe(struct usb_interface *intf,
 
 	interface = intf->cur_altsetting;
 
+	if (interface->desc.bNumEndpoints < 3) {
+		usb_put_dev(usbdev);
+		return -ENODEV;
+	}
+
 	/*
 	 * Allocate parport interface 
 	 */

From 03ace948a4eb89d1cf51c06afdfc41ebca5fdb27 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Mon, 13 Mar 2017 13:47:51 +0100
Subject: [PATCH 1235/1911] USB: wusbcore: fix NULL-deref at probe

Make sure to check the number of endpoints to avoid dereferencing a
NULL-pointer or accessing memory beyond the endpoint array should a
malicious device lack the expected endpoints.

This specifically fixes the NULL-pointer dereference when probing HWA HC
devices.

Fixes: df3654236e31 ("wusb: add the Wire Adapter (WA) core")
Cc: stable <stable@vger.kernel.org>     # 2.6.28
Cc: Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
Cc: David Vrabel <david.vrabel@csr.com>
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/wusbcore/wa-hc.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/usb/wusbcore/wa-hc.c b/drivers/usb/wusbcore/wa-hc.c
index 252c7bd9218afd..d01496fd27fe88 100644
--- a/drivers/usb/wusbcore/wa-hc.c
+++ b/drivers/usb/wusbcore/wa-hc.c
@@ -39,6 +39,9 @@ int wa_create(struct wahc *wa, struct usb_interface *iface,
 	int result;
 	struct device *dev = &iface->dev;
 
+	if (iface->cur_altsetting->desc.bNumEndpoints < 3)
+		return -ENODEV;
+
 	result = wa_rpipes_create(wa);
 	if (result < 0)
 		goto error_rpipes_create;

From daf229b15907fbfdb6ee183aac8ca428cb57e361 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Mon, 13 Mar 2017 13:47:52 +0100
Subject: [PATCH 1236/1911] uwb: hwa-rc: fix NULL-deref at probe

Make sure to check the number of endpoints to avoid dereferencing a
NULL-pointer should a malicious device lack endpoints.

Note that the dereference happens in the start callback which is called
during probe.

Fixes: de520b8bd552 ("uwb: add HWA radio controller driver")
Cc: stable <stable@vger.kernel.org>     # 2.6.28
Cc: Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
Cc: David Vrabel <david.vrabel@csr.com>
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/uwb/hwa-rc.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/uwb/hwa-rc.c b/drivers/uwb/hwa-rc.c
index 0aa6c3c29d1726..35a1e777b4497a 100644
--- a/drivers/uwb/hwa-rc.c
+++ b/drivers/uwb/hwa-rc.c
@@ -823,6 +823,9 @@ static int hwarc_probe(struct usb_interface *iface,
 	struct hwarc *hwarc;
 	struct device *dev = &iface->dev;
 
+	if (iface->cur_altsetting->desc.bNumEndpoints < 1)
+		return -ENODEV;
+
 	result = -ENOMEM;
 	uwb_rc = uwb_rc_alloc();
 	if (uwb_rc == NULL) {

From 4ce362711d78a4999011add3115b8f4b0bc25e8c Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Mon, 13 Mar 2017 13:47:53 +0100
Subject: [PATCH 1237/1911] uwb: i1480-dfu: fix NULL-deref at probe

Make sure to check the number of endpoints to avoid dereferencing a
NULL-pointer should a malicious device lack endpoints.

Note that the dereference happens in the cmd and wait_init_done
callbacks which are called during probe.

Fixes: 1ba47da52712 ("uwb: add the i1480 DFU driver")
Cc: stable <stable@vger.kernel.org>     # 2.6.28
Cc: Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
Cc: David Vrabel <david.vrabel@csr.com>
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/uwb/i1480/dfu/usb.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/uwb/i1480/dfu/usb.c b/drivers/uwb/i1480/dfu/usb.c
index 2bfc846ac07134..6345e85822a424 100644
--- a/drivers/uwb/i1480/dfu/usb.c
+++ b/drivers/uwb/i1480/dfu/usb.c
@@ -362,6 +362,9 @@ int i1480_usb_probe(struct usb_interface *iface, const struct usb_device_id *id)
 				 result);
 	}
 
+	if (iface->cur_altsetting->desc.bNumEndpoints < 1)
+		return -ENODEV;
+
 	result = -ENOMEM;
 	i1480_usb = kzalloc(sizeof(*i1480_usb), GFP_KERNEL);
 	if (i1480_usb == NULL) {

From 3243367b209faed5c320a4e5f9a565ee2a2ba958 Mon Sep 17 00:00:00 2001
From: Samuel Thibault <samuel.thibault@ens-lyon.org>
Date: Mon, 13 Mar 2017 20:50:08 +0100
Subject: [PATCH 1238/1911] usb-core: Add LINEAR_FRAME_INTR_BINTERVAL USB quirk

Some USB 2.0 devices erroneously report millisecond values in
bInterval. The generic config code manages to catch most of them,
but in some cases it's not completely enough.

The case at stake here is a USB 2.0 braille device, which wants to
announce 10ms and thus sets bInterval to 10, but with the USB 2.0
computation that yields to 64ms.  It happens that one can type fast
enough to reach this interval and get the device buffers overflown,
leading to problematic latencies.  The generic config code does not
catch this case because the 64ms is considered a sane enough value.

This change thus adds a USB_QUIRK_LINEAR_FRAME_INTR_BINTERVAL quirk
to mark devices which actually report milliseconds in bInterval,
and marks Vario Ultra devices as needing it.

Signed-off-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/config.c  | 10 ++++++++++
 drivers/usb/core/quirks.c  |  8 ++++++++
 include/linux/usb/quirks.h |  6 ++++++
 3 files changed, 24 insertions(+)

diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c
index 25dbd8c7aec733..4be52c602e9b7a 100644
--- a/drivers/usb/core/config.c
+++ b/drivers/usb/core/config.c
@@ -280,6 +280,16 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum,
 
 			/*
 			 * Adjust bInterval for quirked devices.
+			 */
+			/*
+			 * This quirk fixes bIntervals reported in ms.
+			 */
+			if (to_usb_device(ddev)->quirks &
+				USB_QUIRK_LINEAR_FRAME_INTR_BINTERVAL) {
+				n = clamp(fls(d->bInterval) + 3, i, j);
+				i = j = n;
+			}
+			/*
 			 * This quirk fixes bIntervals reported in
 			 * linear microframes.
 			 */
diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
index 24f9f98968a5d8..96b21b0dac1e8c 100644
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -170,6 +170,14 @@ static const struct usb_device_id usb_quirk_list[] = {
 	/* M-Systems Flash Disk Pioneers */
 	{ USB_DEVICE(0x08ec, 0x1000), .driver_info = USB_QUIRK_RESET_RESUME },
 
+	/* Baum Vario Ultra */
+	{ USB_DEVICE(0x0904, 0x6101), .driver_info =
+			USB_QUIRK_LINEAR_FRAME_INTR_BINTERVAL },
+	{ USB_DEVICE(0x0904, 0x6102), .driver_info =
+			USB_QUIRK_LINEAR_FRAME_INTR_BINTERVAL },
+	{ USB_DEVICE(0x0904, 0x6103), .driver_info =
+			USB_QUIRK_LINEAR_FRAME_INTR_BINTERVAL },
+
 	/* Keytouch QWERTY Panel keyboard */
 	{ USB_DEVICE(0x0926, 0x3333), .driver_info =
 			USB_QUIRK_CONFIG_INTF_STRINGS },
diff --git a/include/linux/usb/quirks.h b/include/linux/usb/quirks.h
index 1d0043dc34e427..de2a722fe3cf7c 100644
--- a/include/linux/usb/quirks.h
+++ b/include/linux/usb/quirks.h
@@ -50,4 +50,10 @@
 /* device can't handle Link Power Management */
 #define USB_QUIRK_NO_LPM			BIT(10)
 
+/*
+ * Device reports its bInterval as linear frames instead of the
+ * USB 2.0 calculation.
+ */
+#define USB_QUIRK_LINEAR_FRAME_INTR_BINTERVAL	BIT(11)
+
 #endif /* __LINUX_USB_QUIRKS_H */

From 0090114d336a9604aa2d90bc83f20f7cd121b76c Mon Sep 17 00:00:00 2001
From: Bin Liu <b-liu@ti.com>
Date: Fri, 10 Mar 2017 14:43:35 -0600
Subject: [PATCH 1239/1911] usb: musb: cppi41: don't check early-TX-interrupt
 for Isoch transfer

The CPPI 4.1 driver polls register to workaround the premature TX
interrupt issue, but it causes audio playback underrun when triggered in
Isoch transfers.

Isoch doesn't do back-to-back transfers, the TX should be done by the
time the next transfer is scheduled. So skip this polling workaround for
Isoch transfer.

Fixes: a655f481d83d6 ("usb: musb: musb_cppi41: handle pre-mature TX complete interrupt")
Cc: <stable@vger.kernel.org> #4.1+
Reported-by: Alexandre Bailon <abailon@baylibre.com>
Acked-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Tested-by: Alexandre Bailon <abailon@baylibre.com>
Signed-off-by: Bin Liu <b-liu@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/musb/musb_cppi41.c | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/musb/musb_cppi41.c b/drivers/usb/musb/musb_cppi41.c
index 00e272bfee39a9..355655f8a3fbc9 100644
--- a/drivers/usb/musb/musb_cppi41.c
+++ b/drivers/usb/musb/musb_cppi41.c
@@ -238,8 +238,27 @@ static void cppi41_dma_callback(void *private_data,
 			transferred < cppi41_channel->packet_sz)
 		cppi41_channel->prog_len = 0;
 
-	if (cppi41_channel->is_tx)
-		empty = musb_is_tx_fifo_empty(hw_ep);
+	if (cppi41_channel->is_tx) {
+		u8 type;
+
+		if (is_host_active(musb))
+			type = hw_ep->out_qh->type;
+		else
+			type = hw_ep->ep_in.type;
+
+		if (type == USB_ENDPOINT_XFER_ISOC)
+			/*
+			 * Don't use the early-TX-interrupt workaround below
+			 * for Isoch transfter. Since Isoch are periodic
+			 * transfer, by the time the next transfer is
+			 * scheduled, the current one should be done already.
+			 *
+			 * This avoids audio playback underrun issue.
+			 */
+			empty = true;
+		else
+			empty = musb_is_tx_fifo_empty(hw_ep);
+	}
 
 	if (!cppi41_channel->is_tx || empty) {
 		cppi41_trans_done(cppi41_channel);

From 6b7ad496084e3d8ffa844a02e0f7f76f3eb82203 Mon Sep 17 00:00:00 2001
From: Bin Liu <b-liu@ti.com>
Date: Fri, 10 Mar 2017 14:43:36 -0600
Subject: [PATCH 1240/1911] usb: musb: dsps: fix iounmap in error and exit
 paths

Cleanly iounmap the pointer in error and exit paths.

Signed-off-by: Bin Liu <b-liu@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/musb/musb_dsps.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/musb/musb_dsps.c b/drivers/usb/musb/musb_dsps.c
index 7c047c4a2565cc..9c7ee26ef38806 100644
--- a/drivers/usb/musb/musb_dsps.c
+++ b/drivers/usb/musb/musb_dsps.c
@@ -933,7 +933,7 @@ static int dsps_probe(struct platform_device *pdev)
 	if (usb_get_dr_mode(&pdev->dev) == USB_DR_MODE_PERIPHERAL) {
 		ret = dsps_setup_optional_vbus_irq(pdev, glue);
 		if (ret)
-			return ret;
+			goto err_iounmap;
 	}
 
 	platform_set_drvdata(pdev, glue);
@@ -946,6 +946,8 @@ static int dsps_probe(struct platform_device *pdev)
 
 err:
 	pm_runtime_disable(&pdev->dev);
+err_iounmap:
+	iounmap(glue->usbss_base);
 	return ret;
 }
 
@@ -956,6 +958,7 @@ static int dsps_remove(struct platform_device *pdev)
 	platform_device_unregister(glue->musb);
 
 	pm_runtime_disable(&pdev->dev);
+	iounmap(glue->usbss_base);
 
 	return 0;
 }

From bc1e2154542071e3cfe1734b143af9b8bdacf8bd Mon Sep 17 00:00:00 2001
From: Bin Liu <b-liu@ti.com>
Date: Fri, 10 Mar 2017 14:43:37 -0600
Subject: [PATCH 1241/1911] usb: musb: fix possible spinlock deadlock

The DSPS glue calls del_timer_sync() in its musb_platform_disable()
implementation, which requires the caller to not hold a lock. But
musb_remove() calls musb_platform_disable() will musb->lock held. This
could causes spinlock deadlock.

So change musb_remove() to call musb_platform_disable() without holds
musb->lock. This doesn't impact the musb_platform_disable implementation
in other glue drivers.

root@am335x-evm:~# modprobe -r musb-dsps
[  126.134879] musb-hdrc musb-hdrc.1: remove, state 1
[  126.140465] usb usb2: USB disconnect, device number 1
[  126.146178] usb 2-1: USB disconnect, device number 2
[  126.416985] musb-hdrc musb-hdrc.1: USB bus 2 deregistered
[  126.423943]
[  126.425525] ======================================================
[  126.431997] [ INFO: possible circular locking dependency detected ]
[  126.438564] 4.11.0-rc1-00003-g1557f13bca04-dirty #77 Not tainted
[  126.444852] -------------------------------------------------------
[  126.451414] modprobe/778 is trying to acquire lock:
[  126.456523]  (((&glue->timer))){+.-...}, at: [<c01b8788>] del_timer_sync+0x0/0xd0
[  126.464403]
[  126.464403] but task is already holding lock:
[  126.470511]  (&(&musb->lock)->rlock){-.-...}, at: [<bf30b7f8>] musb_remove+0x50/0x1
30 [musb_hdrc]
[  126.479965]
[  126.479965] which lock already depends on the new lock.
[  126.479965]
[  126.488531]
[  126.488531] the existing dependency chain (in reverse order) is:
[  126.496368]
[  126.496368] -> #1 (&(&musb->lock)->rlock){-.-...}:
[  126.502968]        otg_timer+0x80/0xec [musb_dsps]
[  126.507990]        call_timer_fn+0xb4/0x390
[  126.512372]        expire_timers+0xf0/0x1fc
[  126.516754]        run_timer_softirq+0x80/0x178
[  126.521511]        __do_softirq+0xc4/0x554
[  126.525802]        irq_exit+0xe8/0x158
[  126.529735]        __handle_domain_irq+0x58/0xb8
[  126.534583]        __irq_usr+0x54/0x80
[  126.538507]
[  126.538507] -> #0 (((&glue->timer))){+.-...}:
[  126.544636]        del_timer_sync+0x40/0xd0
[  126.549066]        musb_remove+0x6c/0x130 [musb_hdrc]
[  126.554370]        platform_drv_remove+0x24/0x3c
[  126.559206]        device_release_driver_internal+0x14c/0x1e0
[  126.565225]        bus_remove_device+0xd8/0x108
[  126.569970]        device_del+0x1e4/0x308
[  126.574170]        platform_device_del+0x24/0x8c
[  126.579006]        platform_device_unregister+0xc/0x20
[  126.584394]        dsps_remove+0x14/0x30 [musb_dsps]
[  126.589595]        platform_drv_remove+0x24/0x3c
[  126.594432]        device_release_driver_internal+0x14c/0x1e0
[  126.600450]        driver_detach+0x38/0x6c
[  126.604740]        bus_remove_driver+0x4c/0xa0
[  126.609407]        SyS_delete_module+0x11c/0x1e4
[  126.614252]        __sys_trace_return+0x0/0x10

Fixes: ea2f35c01d5ea ("usb: musb: Fix sleeping function called from invalid context for hdrc glue")
Cc: <stable@vger.kernel.org> #4.9+
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Bin Liu <b-liu@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/musb/musb_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c
index d8bae6ca890475..0c3664ab705eed 100644
--- a/drivers/usb/musb/musb_core.c
+++ b/drivers/usb/musb/musb_core.c
@@ -2490,8 +2490,8 @@ static int musb_remove(struct platform_device *pdev)
 	musb_host_cleanup(musb);
 	musb_gadget_cleanup(musb);
 
-	spin_lock_irqsave(&musb->lock, flags);
 	musb_platform_disable(musb);
+	spin_lock_irqsave(&musb->lock, flags);
 	musb_disable_interrupts(musb);
 	musb_writeb(musb->mregs, MUSB_DEVCTL, 0);
 	spin_unlock_irqrestore(&musb->lock, flags);

From 0043c1dfbec7b6e2427409059b05347d6f51aa9f Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Wed, 8 Feb 2017 09:24:25 +0000
Subject: [PATCH 1242/1911] serial: st-asc: Use new GPIOD API to obtain RTS pin

The commits mentioned below adapt the GPIO API to allow more information
to be passed directly through devm_get_gpiod_from_child() in the first
instance.  This facilitates the removal of subsequent calls, such as
gpiod_direction_output().  This patch firstly moves to utilise the new
API and secondly removes the now superfluous call do set the direction.

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Suggested-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
[Also drop the header file dummies that only this driver was using]
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/tty/serial/st-asc.c   | 11 ++++++-----
 include/linux/gpio/consumer.h | 16 ----------------
 2 files changed, 6 insertions(+), 21 deletions(-)

diff --git a/drivers/tty/serial/st-asc.c b/drivers/tty/serial/st-asc.c
index bcf1d33e6ffe0b..c334bcc59c649e 100644
--- a/drivers/tty/serial/st-asc.c
+++ b/drivers/tty/serial/st-asc.c
@@ -575,12 +575,13 @@ static void asc_set_termios(struct uart_port *port, struct ktermios *termios,
 			pinctrl_select_state(ascport->pinctrl,
 					     ascport->states[NO_HW_FLOWCTRL]);
 
-			gpiod =	devm_get_gpiod_from_child(port->dev, "rts",
-							  &np->fwnode);
-			if (!IS_ERR(gpiod)) {
-				gpiod_direction_output(gpiod, 0);
+			gpiod = devm_fwnode_get_gpiod_from_child(port->dev,
+								 "rts",
+								 &np->fwnode,
+								 GPIOD_OUT_LOW,
+								 np->name);
+			if (!IS_ERR(gpiod))
 				ascport->rts = gpiod;
-			}
 		}
 	}
 
diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h
index 2484b2fcc6eb58..933d936566055d 100644
--- a/include/linux/gpio/consumer.h
+++ b/include/linux/gpio/consumer.h
@@ -143,15 +143,6 @@ struct gpio_desc *devm_fwnode_get_index_gpiod_from_child(struct device *dev,
 						struct fwnode_handle *child,
 						enum gpiod_flags flags,
 						const char *label);
-/* FIXME: delete this helper when users are switched over */
-static inline struct gpio_desc *devm_get_gpiod_from_child(struct device *dev,
-			  const char *con_id, struct fwnode_handle *child)
-{
-	return devm_fwnode_get_index_gpiod_from_child(dev, con_id,
-						      0, child,
-						      GPIOD_ASIS,
-						      "?");
-}
 
 #else /* CONFIG_GPIOLIB */
 
@@ -444,13 +435,6 @@ struct gpio_desc *devm_fwnode_get_index_gpiod_from_child(struct device *dev,
 	return ERR_PTR(-ENOSYS);
 }
 
-/* FIXME: delete this when all users are switched over */
-static inline struct gpio_desc *devm_get_gpiod_from_child(struct device *dev,
-			  const char *con_id, struct fwnode_handle *child)
-{
-	return ERR_PTR(-ENOSYS);
-}
-
 #endif /* CONFIG_GPIOLIB */
 
 static inline

From 6e9f44eaaef0df7b846e9316fa9ca72a02025d44 Mon Sep 17 00:00:00 2001
From: Dan Williams <dcbw@redhat.com>
Date: Thu, 9 Mar 2017 11:32:28 -0600
Subject: [PATCH 1243/1911] USB: serial: option: add Quectel UC15, UC20, EC21,
 and EC25 modems

Add Quectel UC15, UC20, EC21, and EC25.  The EC20 is handled by
qcserial due to a USB VID/PID conflict with an existing Acer
device.

Signed-off-by: Dan Williams <dcbw@redhat.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/option.c | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 42cc72e54c051b..af67a0de6b5d47 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -233,6 +233,14 @@ static void option_instat_callback(struct urb *urb);
 #define BANDRICH_PRODUCT_1012			0x1012
 
 #define QUALCOMM_VENDOR_ID			0x05C6
+/* These Quectel products use Qualcomm's vendor ID */
+#define QUECTEL_PRODUCT_UC20			0x9003
+#define QUECTEL_PRODUCT_UC15			0x9090
+
+#define QUECTEL_VENDOR_ID			0x2c7c
+/* These Quectel products use Quectel's vendor ID */
+#define QUECTEL_PRODUCT_EC21			0x0121
+#define QUECTEL_PRODUCT_EC25			0x0125
 
 #define CMOTECH_VENDOR_ID			0x16d8
 #define CMOTECH_PRODUCT_6001			0x6001
@@ -1161,7 +1169,14 @@ static const struct usb_device_id option_ids[] = {
 	{ USB_DEVICE(QUALCOMM_VENDOR_ID, 0x6613)}, /* Onda H600/ZTE MF330 */
 	{ USB_DEVICE(QUALCOMM_VENDOR_ID, 0x0023)}, /* ONYX 3G device */
 	{ USB_DEVICE(QUALCOMM_VENDOR_ID, 0x9000)}, /* SIMCom SIM5218 */
-	{ USB_DEVICE(QUALCOMM_VENDOR_ID, 0x9003), /* Quectel UC20 */
+	/* Quectel products using Qualcomm vendor ID */
+	{ USB_DEVICE(QUALCOMM_VENDOR_ID, QUECTEL_PRODUCT_UC15)},
+	{ USB_DEVICE(QUALCOMM_VENDOR_ID, QUECTEL_PRODUCT_UC20),
+	  .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
+	/* Quectel products using Quectel vendor ID */
+	{ USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC21),
+	  .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
+	{ USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC25),
 	  .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
 	{ USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6001) },
 	{ USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CMU_300) },

From 60b89f1928af80b546b5c3fd8714a62f6f4b8844 Mon Sep 17 00:00:00 2001
From: Nicolas Ferre <nicolas.ferre@microchip.com>
Date: Tue, 14 Mar 2017 09:38:04 +0100
Subject: [PATCH 1244/1911] ARM: at91: pm: cpu_idle: switch DDR to power-down
 mode

On some DDR controllers, compatible with the sama5d3 one,
the sequence to enter/exit/re-enter the self-refresh mode adds
more constrains than what is currently written in the at91_idle
driver. An actual access to the DDR chip is needed between exit
and re-enter of this mode which is somehow difficult to implement.
This sequence can completely hang the SoC. It is particularly
experienced on parts which embed a L2 cache if the code run
between IDLE calls fits in it...

Moreover, as the intention is to enter and exit pretty rapidly
from IDLE, the power-down mode is a good candidate.

So now we use power-down instead of self-refresh. As we can
simplify the code for sama5d3 compatible DDR controllers,
we instantiate a new sama5d3_ddr_standby() function.

Signed-off-by: Nicolas Ferre <nicolas.ferre@microchip.com>
Cc: <stable@vger.kernel.org> # v4.1+
Fixes: 017b5522d5e3 ("ARM: at91: Add new binding for sama5d3-ddramc")
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
---
 arch/arm/mach-at91/pm.c | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c
index 3d89b7905bd903..a277981f414d8d 100644
--- a/arch/arm/mach-at91/pm.c
+++ b/arch/arm/mach-at91/pm.c
@@ -289,6 +289,22 @@ static void at91_ddr_standby(void)
 		at91_ramc_write(1, AT91_DDRSDRC_LPR, saved_lpr1);
 }
 
+static void sama5d3_ddr_standby(void)
+{
+	u32 lpr0;
+	u32 saved_lpr0;
+
+	saved_lpr0 = at91_ramc_read(0, AT91_DDRSDRC_LPR);
+	lpr0 = saved_lpr0 & ~AT91_DDRSDRC_LPCB;
+	lpr0 |= AT91_DDRSDRC_LPCB_POWER_DOWN;
+
+	at91_ramc_write(0, AT91_DDRSDRC_LPR, lpr0);
+
+	cpu_do_idle();
+
+	at91_ramc_write(0, AT91_DDRSDRC_LPR, saved_lpr0);
+}
+
 /* We manage both DDRAM/SDRAM controllers, we need more than one value to
  * remember.
  */
@@ -323,7 +339,7 @@ static const struct of_device_id const ramc_ids[] __initconst = {
 	{ .compatible = "atmel,at91rm9200-sdramc", .data = at91rm9200_standby },
 	{ .compatible = "atmel,at91sam9260-sdramc", .data = at91sam9_sdram_standby },
 	{ .compatible = "atmel,at91sam9g45-ddramc", .data = at91_ddr_standby },
-	{ .compatible = "atmel,sama5d3-ddramc", .data = at91_ddr_standby },
+	{ .compatible = "atmel,sama5d3-ddramc", .data = sama5d3_ddr_standby },
 	{ /*sentinel*/ }
 };
 

From da9a796f5475b4d3a339083af719982b7ab4a12b Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Mon, 13 Mar 2017 16:59:57 +0000
Subject: [PATCH 1245/1911] drm/i915: Split GEM resetting into 3 phases

Currently we do a reset prepare/finish around the call to reset the GPU,
but it looks like we need a later stage after the hw has been
reinitialised to allow GEM to restart itself. Start by splitting the 2
GEM phases into 3:

  prepare - before the reset, check if GEM recovered, then stop GEM

  reset - after the reset, update GEM bookkeeping

  finish - after the re-initialisation following the reset, restart GEM

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170208143033.11651-2-chris@chris-wilson.co.uk
Link: http://patchwork.freedesktop.org/patch/msgid/20170313165958.13970-1-chris@chris-wilson.co.uk
(cherry picked from commit d80270931314a88d79d9bd5e0a5df93c12196375)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.c | 3 ++-
 drivers/gpu/drm/i915/i915_drv.h | 1 +
 drivers/gpu/drm/i915/i915_gem.c | 7 ++++++-
 3 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index e703556eba999a..2093d203665d0d 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1788,7 +1788,7 @@ void i915_reset(struct drm_i915_private *dev_priv)
 		goto error;
 	}
 
-	i915_gem_reset_finish(dev_priv);
+	i915_gem_reset(dev_priv);
 	intel_overlay_reset(dev_priv);
 
 	/* Ok, now get things going again... */
@@ -1811,6 +1811,7 @@ void i915_reset(struct drm_i915_private *dev_priv)
 		goto error;
 	}
 
+	i915_gem_reset_finish(dev_priv);
 	i915_queue_hangcheck(dev_priv);
 
 wakeup:
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 7febe6eecf722a..1d20c2d00f4285 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3342,6 +3342,7 @@ static inline u32 i915_reset_count(struct i915_gpu_error *error)
 }
 
 int i915_gem_reset_prepare(struct drm_i915_private *dev_priv);
+void i915_gem_reset(struct drm_i915_private *dev_priv);
 void i915_gem_reset_finish(struct drm_i915_private *dev_priv);
 void i915_gem_set_wedged(struct drm_i915_private *dev_priv);
 void i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 10777da730394f..27fe5a9315f03d 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2834,7 +2834,7 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine)
 	engine->reset_hw(engine, request);
 }
 
-void i915_gem_reset_finish(struct drm_i915_private *dev_priv)
+void i915_gem_reset(struct drm_i915_private *dev_priv)
 {
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
@@ -2856,6 +2856,11 @@ void i915_gem_reset_finish(struct drm_i915_private *dev_priv)
 	}
 }
 
+void i915_gem_reset_finish(struct drm_i915_private *dev_priv)
+{
+	lockdep_assert_held(&dev_priv->drm.struct_mutex);
+}
+
 static void nop_submit_request(struct drm_i915_gem_request *request)
 {
 	dma_fence_set_error(&request->fence, -EIO);

From 4565bf58d45b8aded94e446666839955cdb5030c Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Mon, 13 Mar 2017 16:59:58 +0000
Subject: [PATCH 1246/1911] drm/i915: Disable engine->irq_tasklet around resets

When we restart the engines, and we have active requests, a request on
the first engine may complete and queue a request to the second engine
before we try to restart the second engine. That queueing of the
request may race with the engine to restart, and so may corrupt the
current state. Disabling the engine->irq_tasklet prevents the two paths
from writing into ELSP simultaneously (and modifyin the execlists_port[]
at the same time).

Include fixup 1d309634bcf4 ("drm/i915: Kill the tasklet then disable")

Fixes: 821ed7df6e2a ("drm/i915: Update reset path to fix incomplete requests")
Testcase: igt/gem_exec_fence/await-hang
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170208143033.11651-3-chris@chris-wilson.co.uk
Link: http://patchwork.freedesktop.org/patch/msgid/20170313165958.13970-2-chris@chris-wilson.co.uk
(cherry picked from commit 1f7b847d72c3583df5048d83bd945d0c2c524c28)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 27fe5a9315f03d..1051fdf37d205b 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2719,7 +2719,16 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv)
 	for_each_engine(engine, dev_priv, id) {
 		struct drm_i915_gem_request *request;
 
+		/* Prevent request submission to the hardware until we have
+		 * completed the reset in i915_gem_reset_finish(). If a request
+		 * is completed by one engine, it may then queue a request
+		 * to a second via its engine->irq_tasklet *just* as we are
+		 * calling engine->init_hw() and also writing the ELSP.
+		 * Turning off the engine->irq_tasklet until the reset is over
+		 * prevents the race.
+		 */
 		tasklet_kill(&engine->irq_tasklet);
+		tasklet_disable(&engine->irq_tasklet);
 
 		if (engine_stalled(engine)) {
 			request = i915_gem_find_active_request(engine);
@@ -2858,7 +2867,13 @@ void i915_gem_reset(struct drm_i915_private *dev_priv)
 
 void i915_gem_reset_finish(struct drm_i915_private *dev_priv)
 {
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
+
+	for_each_engine(engine, dev_priv, id)
+		tasklet_enable(&engine->irq_tasklet);
 }
 
 static void nop_submit_request(struct drm_i915_gem_request *request)

From 35a3abfd198e6c69a6644784bb09a2d951fc6b21 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Mon, 13 Mar 2017 17:02:31 +0000
Subject: [PATCH 1247/1911] drm/i915: Only enable hotplug interrupts if the
 display interrupts are enabled
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In order to prevent accessing the hpd registers outside of the display
power wells, we should refrain from writing to the registers before the
display interrupts are enabled.

[    4.740136] WARNING: CPU: 1 PID: 221 at drivers/gpu/drm/i915/intel_uncore.c:795 __unclaimed_reg_debug+0x44/0x50 [i915]
[    4.740155] Unclaimed read from register 0x1e1110
[    4.740168] Modules linked in: i915(+) intel_gtt drm_kms_helper prime_numbers
[    4.740190] CPU: 1 PID: 221 Comm: systemd-udevd Not tainted 4.10.0-rc6+ #384
[    4.740203] Hardware name:                  /        , BIOS PYBSWCEL.86A.0027.2015.0507.1758 05/07/2015
[    4.740220] Call Trace:
[    4.740236]  dump_stack+0x4d/0x6f
[    4.740251]  __warn+0xc1/0xe0
[    4.740265]  warn_slowpath_fmt+0x4a/0x50
[    4.740281]  ? insert_work+0x77/0xc0
[    4.740355]  ? fwtable_write32+0x90/0x130 [i915]
[    4.740431]  __unclaimed_reg_debug+0x44/0x50 [i915]
[    4.740507]  fwtable_read32+0xd8/0x130 [i915]
[    4.740575]  i915_hpd_irq_setup+0xa5/0x100 [i915]
[    4.740649]  intel_hpd_init+0x68/0x80 [i915]
[    4.740716]  i915_driver_load+0xe19/0x1380 [i915]
[    4.740784]  i915_pci_probe+0x32/0x90 [i915]
[    4.740799]  pci_device_probe+0x8b/0xf0
[    4.740815]  driver_probe_device+0x2b6/0x450
[    4.740828]  __driver_attach+0xda/0xe0
[    4.740841]  ? driver_probe_device+0x450/0x450
[    4.740853]  bus_for_each_dev+0x5b/0x90
[    4.740865]  driver_attach+0x19/0x20
[    4.740878]  bus_add_driver+0x166/0x260
[    4.740892]  driver_register+0x5b/0xd0
[    4.740906]  ? 0xffffffffa0166000
[    4.740920]  __pci_register_driver+0x47/0x50
[    4.740985]  i915_init+0x5c/0x5e [i915]
[    4.740999]  do_one_initcall+0x3e/0x160
[    4.741015]  ? __vunmap+0x7c/0xc0
[    4.741029]  ? kmem_cache_alloc+0xcf/0x120
[    4.741045]  do_init_module+0x55/0x1c4
[    4.741060]  load_module+0x1f3f/0x25b0
[    4.741073]  ? __symbol_put+0x40/0x40
[    4.741086]  ? kernel_read_file+0x100/0x190
[    4.741100]  SYSC_finit_module+0xbc/0xf0
[    4.741112]  SyS_finit_module+0x9/0x10
[    4.741125]  entry_SYSCALL_64_fastpath+0x17/0x98
[    4.741135] RIP: 0033:0x7f8559a140f9
[    4.741145] RSP: 002b:00007fff7509a3e8 EFLAGS: 00000246 ORIG_RAX: 0000000000000139
[    4.741161] RAX: ffffffffffffffda RBX: 00007f855aba02d1 RCX: 00007f8559a140f9
[    4.741172] RDX: 0000000000000000 RSI: 000055b6db0914f0 RDI: 0000000000000011
[    4.741183] RBP: 0000000000020000 R08: 0000000000000000 R09: 000000000000000e
[    4.741193] R10: 0000000000000011 R11: 0000000000000246 R12: 000055b6db0854d0
[    4.741204] R13: 000055b6db091150 R14: 0000000000000000 R15: 000055b6db035924

v2: Set dev_priv->display_irqs_enabled to true for all platforms other
than vlv/chv that manually control the display power domain.

Fixes: 19625e85c6ec ("drm/i915: Enable polling when we don't have hpd")
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97798
Suggested-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Lyude <cpaul@redhat.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Hans de Goede <jwrdegoede@fedoraproject.org>
Cc: stable@vger.kernel.org
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170215131547.5064-1-chris@chris-wilson.co.uk
Link: http://patchwork.freedesktop.org/patch/msgid/20170313170231.18633-1-chris@chris-wilson.co.uk
(cherry picked from commit 262fd485ac6b476479f41f00bb104f6a1766ae66)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_irq.c      | 10 ++++++++++
 drivers/gpu/drm/i915/intel_hotplug.c | 14 ++++++++------
 2 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index e6ffef2f707a01..4fc8973744b427 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -4266,6 +4266,16 @@ void intel_irq_init(struct drm_i915_private *dev_priv)
 	if (!IS_GEN2(dev_priv))
 		dev->vblank_disable_immediate = true;
 
+	/* Most platforms treat the display irq block as an always-on
+	 * power domain. vlv/chv can disable it at runtime and need
+	 * special care to avoid writing any of the display block registers
+	 * outside of the power domain. We defer setting up the display irqs
+	 * in this case to the runtime pm.
+	 */
+	dev_priv->display_irqs_enabled = true;
+	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
+		dev_priv->display_irqs_enabled = false;
+
 	dev->driver->get_vblank_timestamp = i915_get_vblank_timestamp;
 	dev->driver->get_scanout_position = i915_get_crtc_scanoutpos;
 
diff --git a/drivers/gpu/drm/i915/intel_hotplug.c b/drivers/gpu/drm/i915/intel_hotplug.c
index b62e3f8ad415f6..54208bef7a8356 100644
--- a/drivers/gpu/drm/i915/intel_hotplug.c
+++ b/drivers/gpu/drm/i915/intel_hotplug.c
@@ -219,7 +219,7 @@ static void intel_hpd_irq_storm_reenable_work(struct work_struct *work)
 			}
 		}
 	}
-	if (dev_priv->display.hpd_irq_setup)
+	if (dev_priv->display_irqs_enabled && dev_priv->display.hpd_irq_setup)
 		dev_priv->display.hpd_irq_setup(dev_priv);
 	spin_unlock_irq(&dev_priv->irq_lock);
 
@@ -425,7 +425,7 @@ void intel_hpd_irq_handler(struct drm_i915_private *dev_priv,
 		}
 	}
 
-	if (storm_detected)
+	if (storm_detected && dev_priv->display_irqs_enabled)
 		dev_priv->display.hpd_irq_setup(dev_priv);
 	spin_unlock(&dev_priv->irq_lock);
 
@@ -471,10 +471,12 @@ void intel_hpd_init(struct drm_i915_private *dev_priv)
 	 * Interrupt setup is already guaranteed to be single-threaded, this is
 	 * just to make the assert_spin_locked checks happy.
 	 */
-	spin_lock_irq(&dev_priv->irq_lock);
-	if (dev_priv->display.hpd_irq_setup)
-		dev_priv->display.hpd_irq_setup(dev_priv);
-	spin_unlock_irq(&dev_priv->irq_lock);
+	if (dev_priv->display_irqs_enabled && dev_priv->display.hpd_irq_setup) {
+		spin_lock_irq(&dev_priv->irq_lock);
+		if (dev_priv->display_irqs_enabled)
+			dev_priv->display.hpd_irq_setup(dev_priv);
+		spin_unlock_irq(&dev_priv->irq_lock);
+	}
 }
 
 static void i915_hpd_poll_init_work(struct work_struct *work)

From 0f5418e564ac6452b9086295646e602a9addc4bf Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Mon, 13 Mar 2017 17:04:33 +0000
Subject: [PATCH 1248/1911] drm/i915: Drop support for I915_EXEC_CONSTANTS_*
 execbuf parameters.

This patch makes the I915_PARAM_HAS_EXEC_CONSTANTS getparam return 0
(indicating the optional feature is not supported), and makes execbuf
always return -EINVAL if the flags are used.

Apparently, no userspace ever shipped which used this optional feature:
I checked the git history of Mesa, xf86-video-intel, libva, and Beignet,
and there were zero commits showing a use of these flags.  Kernel commit
72bfa19c8deb4 apparently introduced the feature prematurely.  According
to Chris, the intention was to use this in cairo-drm, but "the use was
broken for gen6", so I don't think it ever happened.

'relative_constants_mode' has always been tracked per-device, but this
has actually been wrong ever since hardware contexts were introduced, as
the INSTPM register is saved (and automatically restored) as part of the
render ring context. The software per-device value could therefore get
out of sync with the hardware per-context value.  This meant that using
them is actually unsafe: a client which tried to use them could damage
the state of other clients, causing the GPU to interpret their BO
offsets as absolute pointers, leading to bogus memory reads.

These flags were also never ported to execlist mode, making them no-ops
on Gen9+ (which requires execlists), and Gen8 in the default mode.

On Gen8+, userspace can write these registers directly, achieving the
same effect.  On Gen6-7.5, it likely makes sense to extend the command
parser to support them.  I don't think anyone wants this on Gen4-5.

Based on a patch by Dave Gordon.

v3: Return -ENODEV for the getparam, as this is what we do for other
    obsolete features.  Suggested by Chris Wilson.

Cc: stable@vger.kernel.org
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=92448
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: http://patchwork.freedesktop.org/patch/msgid/20170215093446.21291-1-kenneth@whitecape.org
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: http://patchwork.freedesktop.org/patch/msgid/20170313170433.26843-1-chris@chris-wilson.co.uk
(cherry picked from commit ef0f411f51475f4eebf9fc1b19a85be698af19ff)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.c            |  4 +-
 drivers/gpu/drm/i915/i915_drv.h            |  2 -
 drivers/gpu/drm/i915/i915_gem.c            |  2 -
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 52 +---------------------
 4 files changed, 3 insertions(+), 57 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 2093d203665d0d..6cd78bb2064d83 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -248,6 +248,7 @@ static int i915_getparam(struct drm_device *dev, void *data,
 	case I915_PARAM_IRQ_ACTIVE:
 	case I915_PARAM_ALLOW_BATCHBUFFER:
 	case I915_PARAM_LAST_DISPATCH:
+	case I915_PARAM_HAS_EXEC_CONSTANTS:
 		/* Reject all old ums/dri params. */
 		return -ENODEV;
 	case I915_PARAM_CHIPSET_ID:
@@ -274,9 +275,6 @@ static int i915_getparam(struct drm_device *dev, void *data,
 	case I915_PARAM_HAS_BSD2:
 		value = !!dev_priv->engine[VCS2];
 		break;
-	case I915_PARAM_HAS_EXEC_CONSTANTS:
-		value = INTEL_GEN(dev_priv) >= 4;
-		break;
 	case I915_PARAM_HAS_LLC:
 		value = HAS_LLC(dev_priv);
 		break;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 1d20c2d00f4285..80be09831a520f 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2064,8 +2064,6 @@ struct drm_i915_private {
 
 	const struct intel_device_info info;
 
-	int relative_constants_mode;
-
 	void __iomem *regs;
 
 	struct intel_uncore uncore;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 1051fdf37d205b..67b1fc5a03313b 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4694,8 +4694,6 @@ i915_gem_load_init(struct drm_i915_private *dev_priv)
 	init_waitqueue_head(&dev_priv->gpu_error.wait_queue);
 	init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
 
-	dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL;
-
 	init_waitqueue_head(&dev_priv->pending_flip_queue);
 
 	dev_priv->mm.interruptible = true;
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index d02cfaefe1c84e..30e0675fd7dab7 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1408,10 +1408,7 @@ execbuf_submit(struct i915_execbuffer_params *params,
 	       struct drm_i915_gem_execbuffer2 *args,
 	       struct list_head *vmas)
 {
-	struct drm_i915_private *dev_priv = params->request->i915;
 	u64 exec_start, exec_len;
-	int instp_mode;
-	u32 instp_mask;
 	int ret;
 
 	ret = i915_gem_execbuffer_move_to_gpu(params->request, vmas);
@@ -1422,56 +1419,11 @@ execbuf_submit(struct i915_execbuffer_params *params,
 	if (ret)
 		return ret;
 
-	instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK;
-	instp_mask = I915_EXEC_CONSTANTS_MASK;
-	switch (instp_mode) {
-	case I915_EXEC_CONSTANTS_REL_GENERAL:
-	case I915_EXEC_CONSTANTS_ABSOLUTE:
-	case I915_EXEC_CONSTANTS_REL_SURFACE:
-		if (instp_mode != 0 && params->engine->id != RCS) {
-			DRM_DEBUG("non-0 rel constants mode on non-RCS\n");
-			return -EINVAL;
-		}
-
-		if (instp_mode != dev_priv->relative_constants_mode) {
-			if (INTEL_INFO(dev_priv)->gen < 4) {
-				DRM_DEBUG("no rel constants on pre-gen4\n");
-				return -EINVAL;
-			}
-
-			if (INTEL_INFO(dev_priv)->gen > 5 &&
-			    instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) {
-				DRM_DEBUG("rel surface constants mode invalid on gen5+\n");
-				return -EINVAL;
-			}
-
-			/* The HW changed the meaning on this bit on gen6 */
-			if (INTEL_INFO(dev_priv)->gen >= 6)
-				instp_mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE;
-		}
-		break;
-	default:
-		DRM_DEBUG("execbuf with unknown constants: %d\n", instp_mode);
+	if (args->flags & I915_EXEC_CONSTANTS_MASK) {
+		DRM_DEBUG("I915_EXEC_CONSTANTS_* unsupported\n");
 		return -EINVAL;
 	}
 
-	if (params->engine->id == RCS &&
-	    instp_mode != dev_priv->relative_constants_mode) {
-		struct intel_ring *ring = params->request->ring;
-
-		ret = intel_ring_begin(params->request, 4);
-		if (ret)
-			return ret;
-
-		intel_ring_emit(ring, MI_NOOP);
-		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
-		intel_ring_emit_reg(ring, INSTPM);
-		intel_ring_emit(ring, instp_mask << 16 | instp_mode);
-		intel_ring_advance(ring);
-
-		dev_priv->relative_constants_mode = instp_mode;
-	}
-
 	if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
 		ret = i915_reset_gen7_sol_offsets(params->request);
 		if (ret)

From 8f68d591d4765b2e1ce9d916ac7bc5583285c4ad Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Mon, 13 Mar 2017 17:06:17 +0000
Subject: [PATCH 1249/1911] drm/i915: Stop using RP_DOWN_EI on Baytrail

On Baytrail, we manually calculate busyness over the evaluation interval
to avoid issues with miscaluations with RC6 enabled. However, it turns
out that the DOWN_EI interrupt generator is completely bust - it
operates in two modes, continuous or never. Neither of which are
conducive to good behaviour. Stop unmask the DOWN_EI interrupt and just
compute everything from the UP_EI which does seem to correspond to the
desired interval.

v2: Fixup gen6_rps_pm_mask() as well
v3: Inline vlv_c0_above() to combine the now identical elapsed
calculation for up/down and simplify the threshold testing

Fixes: 43cf3bf084ba ("drm/i915: Improved w/a for rps on Baytrail")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: <stable@vger.kernel.org> # v4.1+
Link: http://patchwork.freedesktop.org/patch/msgid/20170309211232.28878-1-chris@chris-wilson.co.uk
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170313170617.31564-1-chris@chris-wilson.co.uk
(cherry picked from commit e0e8c7cb6eb68e9256de2d8cbeb481d3701c05ac)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h |  2 +-
 drivers/gpu/drm/i915/i915_irq.c | 73 +++++++++++++--------------------
 drivers/gpu/drm/i915/intel_pm.c |  5 ++-
 3 files changed, 32 insertions(+), 48 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 80be09831a520f..1e53c31b6826ec 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1325,7 +1325,7 @@ struct intel_gen6_power_mgmt {
 	unsigned boosts;
 
 	/* manual wa residency calculations */
-	struct intel_rps_ei up_ei, down_ei;
+	struct intel_rps_ei ei;
 
 	/*
 	 * Protects RPS/RC6 register access and PCU communication.
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 4fc8973744b427..b6c886ac901bd7 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1046,68 +1046,51 @@ static void vlv_c0_read(struct drm_i915_private *dev_priv,
 	ei->media_c0 = I915_READ(VLV_MEDIA_C0_COUNT);
 }
 
-static bool vlv_c0_above(struct drm_i915_private *dev_priv,
-			 const struct intel_rps_ei *old,
-			 const struct intel_rps_ei *now,
-			 int threshold)
-{
-	u64 time, c0;
-	unsigned int mul = 100;
-
-	if (old->cz_clock == 0)
-		return false;
-
-	if (I915_READ(VLV_COUNTER_CONTROL) & VLV_COUNT_RANGE_HIGH)
-		mul <<= 8;
-
-	time = now->cz_clock - old->cz_clock;
-	time *= threshold * dev_priv->czclk_freq;
-
-	/* Workload can be split between render + media, e.g. SwapBuffers
-	 * being blitted in X after being rendered in mesa. To account for
-	 * this we need to combine both engines into our activity counter.
-	 */
-	c0 = now->render_c0 - old->render_c0;
-	c0 += now->media_c0 - old->media_c0;
-	c0 *= mul * VLV_CZ_CLOCK_TO_MILLI_SEC;
-
-	return c0 >= time;
-}
-
 void gen6_rps_reset_ei(struct drm_i915_private *dev_priv)
 {
-	vlv_c0_read(dev_priv, &dev_priv->rps.down_ei);
-	dev_priv->rps.up_ei = dev_priv->rps.down_ei;
+	memset(&dev_priv->rps.ei, 0, sizeof(dev_priv->rps.ei));
 }
 
 static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
 {
+	const struct intel_rps_ei *prev = &dev_priv->rps.ei;
 	struct intel_rps_ei now;
 	u32 events = 0;
 
-	if ((pm_iir & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED)) == 0)
+	if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0)
 		return 0;
 
 	vlv_c0_read(dev_priv, &now);
 	if (now.cz_clock == 0)
 		return 0;
 
-	if (pm_iir & GEN6_PM_RP_DOWN_EI_EXPIRED) {
-		if (!vlv_c0_above(dev_priv,
-				  &dev_priv->rps.down_ei, &now,
-				  dev_priv->rps.down_threshold))
-			events |= GEN6_PM_RP_DOWN_THRESHOLD;
-		dev_priv->rps.down_ei = now;
-	}
+	if (prev->cz_clock) {
+		u64 time, c0;
+		unsigned int mul;
 
-	if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
-		if (vlv_c0_above(dev_priv,
-				 &dev_priv->rps.up_ei, &now,
-				 dev_priv->rps.up_threshold))
-			events |= GEN6_PM_RP_UP_THRESHOLD;
-		dev_priv->rps.up_ei = now;
+		mul = VLV_CZ_CLOCK_TO_MILLI_SEC * 100; /* scale to threshold% */
+		if (I915_READ(VLV_COUNTER_CONTROL) & VLV_COUNT_RANGE_HIGH)
+			mul <<= 8;
+
+		time = now.cz_clock - prev->cz_clock;
+		time *= dev_priv->czclk_freq;
+
+		/* Workload can be split between render + media,
+		 * e.g. SwapBuffers being blitted in X after being rendered in
+		 * mesa. To account for this we need to combine both engines
+		 * into our activity counter.
+		 */
+		c0 = now.render_c0 - prev->render_c0;
+		c0 += now.media_c0 - prev->media_c0;
+		c0 *= mul;
+
+		if (c0 > time * dev_priv->rps.up_threshold)
+			events = GEN6_PM_RP_UP_THRESHOLD;
+		else if (c0 < time * dev_priv->rps.down_threshold)
+			events = GEN6_PM_RP_DOWN_THRESHOLD;
 	}
 
+	dev_priv->rps.ei = now;
 	return events;
 }
 
@@ -4228,7 +4211,7 @@ void intel_irq_init(struct drm_i915_private *dev_priv)
 	/* Let's track the enabled rps events */
 	if (IS_VALLEYVIEW(dev_priv))
 		/* WaGsvRC0ResidencyMethod:vlv */
-		dev_priv->pm_rps_events = GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED;
+		dev_priv->pm_rps_events = GEN6_PM_RP_UP_EI_EXPIRED;
 	else
 		dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
 
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 940bab22d4649b..6a29784d2b4137 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -4928,8 +4928,9 @@ static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val)
 {
 	u32 mask = 0;
 
+	/* We use UP_EI_EXPIRED interupts for both up/down in manual mode */
 	if (val > dev_priv->rps.min_freq_softlimit)
-		mask |= GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT;
+		mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT;
 	if (val < dev_priv->rps.max_freq_softlimit)
 		mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD;
 
@@ -5039,7 +5040,7 @@ void gen6_rps_busy(struct drm_i915_private *dev_priv)
 {
 	mutex_lock(&dev_priv->rps.hw_lock);
 	if (dev_priv->rps.enabled) {
-		if (dev_priv->pm_rps_events & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED))
+		if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED)
 			gen6_rps_reset_ei(dev_priv);
 		I915_WRITE(GEN6_PMINTRMSK,
 			   gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq));

From abf8315f71dc5a2ee56fb60830dcb2861982dc91 Mon Sep 17 00:00:00 2001
From: Jyri Sarha <jsarha@ti.com>
Date: Tue, 31 Jan 2017 16:18:42 +0200
Subject: [PATCH 1250/1911] drm/tilcdc: Fix hardcoded fail-return value in
 tilcdc_crtc_create()

Fix badly hardcoded return return value under fail-label. All goto
branches to the label set the "ret"-variable accordingly.

Signed-off-by: Jyri Sarha <jsarha@ti.com>
Reviewed-by: Gabriel Krisman Bertazi <krisman@collabora.co.uk>
---
 drivers/gpu/drm/tilcdc/tilcdc_crtc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c
index f80bf9385e412d..abcbcd9f5851f9 100644
--- a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c
+++ b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c
@@ -1036,5 +1036,5 @@ int tilcdc_crtc_create(struct drm_device *dev)
 
 fail:
 	tilcdc_crtc_destroy(crtc);
-	return -ENOMEM;
+	return ret;
 }

From 11abbc9f39e002a2b25657e00abac8056cb39e93 Mon Sep 17 00:00:00 2001
From: Jyri Sarha <jsarha@ti.com>
Date: Wed, 1 Mar 2017 10:30:28 +0200
Subject: [PATCH 1251/1911] drm/tilcdc: Set framebuffer DMA address to HW only
 if CRTC is enabled

Touching HW while clocks are off is a serious error and for instance
breaks suspend functionality. After this patch tilcdc_crtc_update_fb()
always updates the primary plane's framebuffer pointer, increases fb's
reference count and stores vblank event. tilcdc_crtc_update_fb() only
writes the fb's DMA address to HW if the crtc is enabled, as
tilcdc_crtc_enable() takes care of writing the address on enable.

This patch also refactors the tilcdc_crtc_update_fb() a bit. Number of
subsequent small changes had made it almost unreadable. There should
be no other functional changes but checking the CRTC's enable
state. However, the locking goes a bit differently and some of the
redundant checks have been removed in this new version.

The enable_lock should be enough to protect the access to
tilcdc_crtc->enabled. The irq_lock protects the access to last_vblank
and next_fb. The check for vrefresh and last_vblank being valid is
redundant, as the vrefresh should be always valid if the CRTC is
enabled and now last_vblank should be too, because it is initialized
to current time when CRTC raster is enabled. If for some reason the
values are not correctly initialized the division by zero warning is
quite appropriate.

Signed-off-by: Jyri Sarha <jsarha@ti.com>
Reviewed-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
---
 drivers/gpu/drm/tilcdc/tilcdc_crtc.c | 35 ++++++++++++++++++----------
 1 file changed, 23 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c
index abcbcd9f5851f9..d745e8b50fb864 100644
--- a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c
+++ b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c
@@ -464,6 +464,7 @@ static void tilcdc_crtc_enable(struct drm_crtc *crtc)
 {
 	struct drm_device *dev = crtc->dev;
 	struct tilcdc_crtc *tilcdc_crtc = to_tilcdc_crtc(crtc);
+	unsigned long flags;
 
 	WARN_ON(!drm_modeset_is_locked(&crtc->mutex));
 	mutex_lock(&tilcdc_crtc->enable_lock);
@@ -484,7 +485,17 @@ static void tilcdc_crtc_enable(struct drm_crtc *crtc)
 	tilcdc_write_mask(dev, LCDC_RASTER_CTRL_REG,
 			  LCDC_PALETTE_LOAD_MODE(DATA_ONLY),
 			  LCDC_PALETTE_LOAD_MODE_MASK);
+
+	/* There is no real chance for a race here as the time stamp
+	 * is taken before the raster DMA is started. The spin-lock is
+	 * taken to have a memory barrier after taking the time-stamp
+	 * and to avoid a context switch between taking the stamp and
+	 * enabling the raster.
+	 */
+	spin_lock_irqsave(&tilcdc_crtc->irq_lock, flags);
+	tilcdc_crtc->last_vblank = ktime_get();
 	tilcdc_set(dev, LCDC_RASTER_CTRL_REG, LCDC_RASTER_ENABLE);
+	spin_unlock_irqrestore(&tilcdc_crtc->irq_lock, flags);
 
 	drm_crtc_vblank_on(crtc);
 
@@ -539,7 +550,6 @@ static void tilcdc_crtc_off(struct drm_crtc *crtc, bool shutdown)
 	}
 
 	drm_flip_work_commit(&tilcdc_crtc->unref_work, priv->wq);
-	tilcdc_crtc->last_vblank = 0;
 
 	tilcdc_crtc->enabled = false;
 	mutex_unlock(&tilcdc_crtc->enable_lock);
@@ -602,7 +612,6 @@ int tilcdc_crtc_update_fb(struct drm_crtc *crtc,
 {
 	struct tilcdc_crtc *tilcdc_crtc = to_tilcdc_crtc(crtc);
 	struct drm_device *dev = crtc->dev;
-	unsigned long flags;
 
 	WARN_ON(!drm_modeset_is_locked(&crtc->mutex));
 
@@ -614,28 +623,30 @@ int tilcdc_crtc_update_fb(struct drm_crtc *crtc,
 	drm_framebuffer_reference(fb);
 
 	crtc->primary->fb = fb;
+	tilcdc_crtc->event = event;
 
-	spin_lock_irqsave(&tilcdc_crtc->irq_lock, flags);
+	mutex_lock(&tilcdc_crtc->enable_lock);
 
-	if (crtc->hwmode.vrefresh && ktime_to_ns(tilcdc_crtc->last_vblank)) {
+	if (tilcdc_crtc->enabled) {
+		unsigned long flags;
 		ktime_t next_vblank;
 		s64 tdiff;
 
-		next_vblank = ktime_add_us(tilcdc_crtc->last_vblank,
-			1000000 / crtc->hwmode.vrefresh);
+		spin_lock_irqsave(&tilcdc_crtc->irq_lock, flags);
 
+		next_vblank = ktime_add_us(tilcdc_crtc->last_vblank,
+					   1000000 / crtc->hwmode.vrefresh);
 		tdiff = ktime_to_us(ktime_sub(next_vblank, ktime_get()));
 
 		if (tdiff < TILCDC_VBLANK_SAFETY_THRESHOLD_US)
 			tilcdc_crtc->next_fb = fb;
-	}
-
-	if (tilcdc_crtc->next_fb != fb)
-		set_scanout(crtc, fb);
+		else
+			set_scanout(crtc, fb);
 
-	tilcdc_crtc->event = event;
+		spin_unlock_irqrestore(&tilcdc_crtc->irq_lock, flags);
+	}
 
-	spin_unlock_irqrestore(&tilcdc_crtc->irq_lock, flags);
+	mutex_unlock(&tilcdc_crtc->enable_lock);
 
 	return 0;
 }

From 1b2e5ea0b7061be3ffdcd85918c2f428edace4ba Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Sun, 12 Feb 2017 17:19:59 +0000
Subject: [PATCH 1252/1911] drm/i915: Always call i915_gem_reset_finish()
 following i915_gem_reset_prepare()

As i915_gem_reset_finish() undoes the steps from
i915_gem_reset_prepare() to leave the system in a fully-working state,
e.g. to be able to free the breadcrumb signal threads, make sure that we
always call it even on the error path.

Fixes: da9a796f5475 ("drm/i915: Split GEM resetting into 3 phases")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: http://patchwork.freedesktop.org/patch/msgid/20170212172002.23072-2-chris@chris-wilson.co.uk
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
(cherry picked from commit 8d613c539c74fa9055f88f4116196d7c820bd98f)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 6cd78bb2064d83..1c75402a59c137 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1809,10 +1809,10 @@ void i915_reset(struct drm_i915_private *dev_priv)
 		goto error;
 	}
 
-	i915_gem_reset_finish(dev_priv);
 	i915_queue_hangcheck(dev_priv);
 
 wakeup:
+	i915_gem_reset_finish(dev_priv);
 	enable_irq(dev_priv->drm.irq);
 	wake_up_bit(&error->flags, I915_RESET_IN_PROGRESS);
 	return;

From fa23b9d1b89fdc34f296f02e496a20aeff5736be Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 14 Mar 2017 13:54:12 +0100
Subject: [PATCH 1253/1911] irqchip/mvebu-odmi: Select GENERIC_MSI_IRQ_DOMAIN

This driver uses the MSI domain but has no strict dependency on PCI_MSI, so we
may run into a build failure when CONFIG_GENERIC_MSI_IRQ_DOMAIN is disabled:

drivers/irqchip/irq-mvebu-odmi.c:152:15: error: variable 'odmi_msi_ops' has initializer but incomplete type
 static struct msi_domain_ops odmi_msi_ops = {
               ^~~~~~~~~~~~~~
drivers/irqchip/irq-mvebu-odmi.c:155:15: error: variable 'odmi_msi_domain_info' has initializer but incomplete type
 static struct msi_domain_info odmi_msi_domain_info = {
               ^~~~~~~~~~~~~~~
drivers/irqchip/irq-mvebu-odmi.c:156:3: error: 'struct msi_domain_info' has no member named 'flags'
  .flags = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS),
   ^~~~~
drivers/irqchip/irq-mvebu-odmi.c:156:12: error: 'MSI_FLAG_USE_DEF_DOM_OPS' undeclared here (not in a function)
  .flags = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS),
            ^~~~~~~~~~~~~~~~~~~~~~~~
drivers/irqchip/irq-mvebu-odmi.c:156:39: error: 'MSI_FLAG_USE_DEF_CHIP_OPS' undeclared here (not in a function); did you mean 'MSI_FLAG_USE_DEF_DOM_OPS'?

Selecting the option from this driver seems to solve this nicely, though I could
not find any other instance of this in irqchip drivers.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 drivers/irqchip/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index 125528f39e92c2..8162121bb1bcd7 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -262,6 +262,7 @@ config IRQ_MXS
 
 config MVEBU_ODMI
 	bool
+	select GENERIC_MSI_IRQ_DOMAIN
 
 config MVEBU_PIC
 	bool

From 318465adace471387cfd5f768daa64bb4c623695 Mon Sep 17 00:00:00 2001
From: Andy Yan <andy.yan@rock-chips.com>
Date: Mon, 13 Feb 2017 19:02:39 +0800
Subject: [PATCH 1254/1911] dt-bindings: rockchip-dw-mshc: rename RK1108 to
 RV1108

Rockchip finally named the SOC as RV1108, so change it.

Signed-off-by: Andy Yan <andy.yan@rock-chips.com>
Acked-by: Rob Herring <robh@kernel.org>
Reviewed-by: Heiko Stuebner <heiko@sntech.de>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 Documentation/devicetree/bindings/mmc/rockchip-dw-mshc.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/mmc/rockchip-dw-mshc.txt b/Documentation/devicetree/bindings/mmc/rockchip-dw-mshc.txt
index ea9c1c9607f612..520d61dad6dd7f 100644
--- a/Documentation/devicetree/bindings/mmc/rockchip-dw-mshc.txt
+++ b/Documentation/devicetree/bindings/mmc/rockchip-dw-mshc.txt
@@ -13,7 +13,7 @@ Required Properties:
 	- "rockchip,rk2928-dw-mshc": for Rockchip RK2928 and following,
 							before RK3288
 	- "rockchip,rk3288-dw-mshc": for Rockchip RK3288
-	- "rockchip,rk1108-dw-mshc", "rockchip,rk3288-dw-mshc": for Rockchip RK1108
+	- "rockchip,rv1108-dw-mshc", "rockchip,rk3288-dw-mshc": for Rockchip RV1108
 	- "rockchip,rk3036-dw-mshc", "rockchip,rk3288-dw-mshc": for Rockchip RK3036
 	- "rockchip,rk3368-dw-mshc", "rockchip,rk3288-dw-mshc": for Rockchip RK3368
 	- "rockchip,rk3399-dw-mshc", "rockchip,rk3288-dw-mshc": for Rockchip RK3399

From 16681037e75ce08f2980ac5dbb03414429c7a55d Mon Sep 17 00:00:00 2001
From: Anssi Hannula <anssi.hannula@bitwise.fi>
Date: Mon, 13 Feb 2017 14:06:10 +0200
Subject: [PATCH 1255/1911] mmc: sdhci-of-arasan: fix incorrect timeout clock

sdhci_arasan_get_timeout_clock() divides the frequency it has with (1 <<
(13 + divisor)).

However, the divisor is not some Arasan-specific value, but instead is
just the Data Timeout Counter Value from the SDHCI Timeout Control
Register.

Applying it here like this is wrong as the sdhci driver already takes
that value into account when calculating timeouts, and in fact it *sets*
that register value based on how long a timeout is wanted.

Additionally, sdhci core interprets the .get_timeout_clock callback
return value as if it were read from hardware registers, i.e. the unit
should be kHz or MHz depending on SDHCI_TIMEOUT_CLK_UNIT capability bit.
This bit is set at least on the tested Zynq-7000 SoC.

With the tested hardware (SDHCI_TIMEOUT_CLK_UNIT set) this results in
too high a timeout clock rate being reported, causing the core to use
longer-than-needed timeouts. Additionally, on a partitioned MMC
(therefore having erase_group_def bit set) mmc_calc_max_discard()
disables discard support as it looks like controller does not support
the long timeouts needed for that.

Do not apply the extra divisor and return the timeout clock in the
expected unit.

Tested with a Zynq-7000 SoC and a partitioned Toshiba THGBMAG5A1JBAWR
eMMC card.

Signed-off-by: Anssi Hannula <anssi.hannula@bitwise.fi>
Fixes: e3ec3a3d11ad ("mmc: arasan: Add driver for Arasan SDHCI")
Cc: <stable@vger.kernel.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-of-arasan.c | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/drivers/mmc/host/sdhci-of-arasan.c b/drivers/mmc/host/sdhci-of-arasan.c
index 410a55b1c25fe5..1cfd7f90033944 100644
--- a/drivers/mmc/host/sdhci-of-arasan.c
+++ b/drivers/mmc/host/sdhci-of-arasan.c
@@ -28,13 +28,9 @@
 #include "sdhci-pltfm.h"
 #include <linux/of.h>
 
-#define SDHCI_ARASAN_CLK_CTRL_OFFSET	0x2c
 #define SDHCI_ARASAN_VENDOR_REGISTER	0x78
 
 #define VENDOR_ENHANCED_STROBE		BIT(0)
-#define CLK_CTRL_TIMEOUT_SHIFT		16
-#define CLK_CTRL_TIMEOUT_MASK		(0xf << CLK_CTRL_TIMEOUT_SHIFT)
-#define CLK_CTRL_TIMEOUT_MIN_EXP	13
 
 #define PHY_CLK_TOO_SLOW_HZ		400000
 
@@ -163,15 +159,15 @@ static int sdhci_arasan_syscon_write(struct sdhci_host *host,
 
 static unsigned int sdhci_arasan_get_timeout_clock(struct sdhci_host *host)
 {
-	u32 div;
 	unsigned long freq;
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
 
-	div = readl(host->ioaddr + SDHCI_ARASAN_CLK_CTRL_OFFSET);
-	div = (div & CLK_CTRL_TIMEOUT_MASK) >> CLK_CTRL_TIMEOUT_SHIFT;
+	/* SDHCI timeout clock is in kHz */
+	freq = DIV_ROUND_UP(clk_get_rate(pltfm_host->clk), 1000);
 
-	freq = clk_get_rate(pltfm_host->clk);
-	freq /= 1 << (CLK_CTRL_TIMEOUT_MIN_EXP + div);
+	/* or in MHz */
+	if (host->caps & SDHCI_TIMEOUT_CLK_UNIT)
+		freq = DIV_ROUND_UP(freq, 1000);
 
 	return freq;
 }

From 9c31b087348cb2b5e668261f2eee2f224b3780b5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Mon, 13 Feb 2017 19:58:18 +0200
Subject: [PATCH 1256/1911] drm/i915: Reject HDMI 12bpc if the sink doesn't
 indicate support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Check that the sink really declared 12bpc support before we enable it.
This should not actually never happen since it's mandatory for HDMI
sinks to support 12bpc if they support any deep color modes. But
reality disagrees with the theory and there are actually sinks in
the wild that violate the spec.

v2: Fix the output_types check
    Update commit message to state that these things are in fact real

Cc: stable@vger.kernel.org
Cc: Nicholas Sielicki <nicholas.sielicki@gmail.com>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99250
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170213175818.24958-1-ville.syrjala@linux.intel.com
Reviewed-by: Shashank Sharma <shashank.sharma@intel.com>
(cherry picked from commit c750bdd3e7e204cc88b32806c3864487a03cd84b)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/intel_hdmi.c | 24 +++++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c
index ebae2bd839189c..24b2fa5b62824d 100644
--- a/drivers/gpu/drm/i915/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/intel_hdmi.c
@@ -1298,16 +1298,34 @@ intel_hdmi_mode_valid(struct drm_connector *connector,
 
 static bool hdmi_12bpc_possible(struct intel_crtc_state *crtc_state)
 {
-	struct drm_device *dev = crtc_state->base.crtc->dev;
+	struct drm_i915_private *dev_priv =
+		to_i915(crtc_state->base.crtc->dev);
+	struct drm_atomic_state *state = crtc_state->base.state;
+	struct drm_connector_state *connector_state;
+	struct drm_connector *connector;
+	int i;
 
-	if (HAS_GMCH_DISPLAY(to_i915(dev)))
+	if (HAS_GMCH_DISPLAY(dev_priv))
 		return false;
 
 	/*
 	 * HDMI 12bpc affects the clocks, so it's only possible
 	 * when not cloning with other encoder types.
 	 */
-	return crtc_state->output_types == 1 << INTEL_OUTPUT_HDMI;
+	if (crtc_state->output_types != 1 << INTEL_OUTPUT_HDMI)
+		return false;
+
+	for_each_connector_in_state(state, connector, connector_state, i) {
+		const struct drm_display_info *info = &connector->display_info;
+
+		if (connector_state->crtc != crtc_state->base.crtc)
+			continue;
+
+		if ((info->edid_hdmi_dc_modes & DRM_EDID_HDMI_DC_36) == 0)
+			return false;
+	}
+
+	return true;
 }
 
 bool intel_hdmi_compute_config(struct intel_encoder *encoder,

From 773dc118756b1f38766063e90e582016be868f09 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Wed, 1 Mar 2017 14:11:47 -0800
Subject: [PATCH 1257/1911] mmc: core: Fix access to HS400-ES devices

HS400-ES devices fail to initialize with the following error messages.

mmc1: power class selection to bus width 8 ddr 0 failed
mmc1: error -110 whilst initialising MMC card

This was seen on Samsung Chromebook Plus. Code analysis points to
commit 3d4ef329757c ("mmc: core: fix multi-bit bus width without
high-speed mode"), which attempts to set the bus width for all but
HS200 devices unconditionally. However, for HS400-ES, the bus width
is already selected.

Cc: Anssi Hannula <anssi.hannula@bitwise.fi>
Cc: Douglas Anderson <dianders@chromium.org>
Cc: Brian Norris <briannorris@chromium.org>
Fixes: 3d4ef329757c ("mmc: core: fix multi-bit bus width ...")
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Reviewed-by: Douglas Anderson <dianders@chromium.org>
Reviewed-by: Shawn Lin <shawn.lin@rock-chip.com>
Tested-by: Heiko Stuebner <heiko@sntech.de>
Cc: <stable@vger.kernel.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/mmc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 7fd722868875f3..b502601df22815 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -1730,7 +1730,7 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 		err = mmc_select_hs400(card);
 		if (err)
 			goto free_card;
-	} else {
+	} else if (!mmc_card_hs400es(card)) {
 		/* Select the desired bus width optionally */
 		err = mmc_select_bus_width(card);
 		if (err > 0 && mmc_card_hs(card)) {

From 2602b740e45cc64feb55d5a9ee8db744ab3becbb Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Mon, 13 Mar 2017 14:36:32 +0200
Subject: [PATCH 1258/1911] mmc: block: Fix is_waiting_last_req set incorrectly

Commit 15520111500c ("mmc: core: Further fix thread wake-up") allowed a
queue to release the host with is_waiting_last_req set to true. A queue
waiting to claim the host will not reset it, which can result in the
queue getting stuck in a loop.

Fixes: 15520111500c ("mmc: core: Further fix thread wake-up")
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: stable@vger.kernel.org # v4.10+
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/block.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
index 1621fa08e20692..e59107ca512a5e 100644
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c
@@ -1817,6 +1817,7 @@ void mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
 		mmc_blk_issue_flush(mq, req);
 	} else {
 		mmc_blk_issue_rw_rq(mq, req);
+		card->host->context_info.is_waiting_last_req = false;
 	}
 
 out:

From 8ecc34448e24e9e8a8f3a9b37be70b43c6af5288 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Mon, 13 Mar 2017 14:36:33 +0200
Subject: [PATCH 1259/1911] mmc: block: Fix cmd error reset failure path

Commit 4e1f780032c5 ("mmc: block: break out mmc_blk_rw_cmd_abort()")
assumed the request had not completed, but in one case it had. Fix that.

Fixes: 4e1f780032c5 ("mmc: block: break out mmc_blk_rw_cmd_abort()")
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/block.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
index e59107ca512a5e..05afefcfb61114 100644
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c
@@ -1701,7 +1701,8 @@ static void mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *new_req)
 		case MMC_BLK_CMD_ERR:
 			req_pending = mmc_blk_rw_cmd_err(md, card, brq, old_req, req_pending);
 			if (mmc_blk_reset(md, card->host, type)) {
-				mmc_blk_rw_cmd_abort(card, old_req);
+				if (req_pending)
+					mmc_blk_rw_cmd_abort(card, old_req);
 				mmc_blk_rw_try_restart(mq, new_req);
 				return;
 			}

From 0977762f6d15f13caccc20d71a5dec47d098907d Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Mon, 13 Mar 2017 13:44:35 -0700
Subject: [PATCH 1260/1911] md/r5cache: fix set_syndrome_sources() for data in
 cache

Before this patch, device InJournal will be included in prexor
(SYNDROME_SRC_WANT_DRAIN) but not in reconstruct (SYNDROME_SRC_WRITTEN). So it
will break parity calculation. With srctype == SYNDROME_SRC_WRITTEN, we need
include both dev with non-null ->written and dev with R5_InJournal. This fixes
logic in 1e6d690(md/r5cache: caching phase of r5cache)

Cc: stable@vger.kernel.org (v4.10+)
Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/raid5.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 6bfedfcf41c17a..ed5cd705b985f1 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -1401,7 +1401,8 @@ static int set_syndrome_sources(struct page **srcs,
 		     (test_bit(R5_Wantdrain, &dev->flags) ||
 		      test_bit(R5_InJournal, &dev->flags))) ||
 		    (srctype == SYNDROME_SRC_WRITTEN &&
-		     dev->written)) {
+		     (dev->written ||
+		      test_bit(R5_InJournal, &dev->flags)))) {
 			if (test_bit(R5_InJournal, &dev->flags))
 				srcs[slot] = sh->dev[i].orig_page;
 			else

From 9c62110454b088b4914ffe375c2dbc19643eac34 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Tue, 14 Mar 2017 11:51:59 -0600
Subject: [PATCH 1261/1911] blk-mq-sched: don't run the queue async from
 blk_mq_try_issue_directly()

If we have scheduling enabled, we jump directly to insert-and-run.
That's fine, but we run the queue async and we don't pass in information
on whether we can block from this context or not. Fixup both these
cases.

Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 159187a28d6652..a4546f060e8093 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1434,7 +1434,8 @@ static blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx, struct request *rq)
 	return blk_tag_to_qc_t(rq->internal_tag, hctx->queue_num, true);
 }
 
-static void blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie)
+static void blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie,
+				      bool may_sleep)
 {
 	struct request_queue *q = rq->q;
 	struct blk_mq_queue_data bd = {
@@ -1475,7 +1476,7 @@ static void blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie)
 	}
 
 insert:
-	blk_mq_sched_insert_request(rq, false, true, true, false);
+	blk_mq_sched_insert_request(rq, false, true, false, may_sleep);
 }
 
 /*
@@ -1569,11 +1570,11 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 
 		if (!(data.hctx->flags & BLK_MQ_F_BLOCKING)) {
 			rcu_read_lock();
-			blk_mq_try_issue_directly(old_rq, &cookie);
+			blk_mq_try_issue_directly(old_rq, &cookie, false);
 			rcu_read_unlock();
 		} else {
 			srcu_idx = srcu_read_lock(&data.hctx->queue_rq_srcu);
-			blk_mq_try_issue_directly(old_rq, &cookie);
+			blk_mq_try_issue_directly(old_rq, &cookie, true);
 			srcu_read_unlock(&data.hctx->queue_rq_srcu, srcu_idx);
 		}
 		goto done;

From 8c53ad2139137dd4bf506a2c2b888de3816e8f75 Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Mon, 13 Mar 2017 15:14:08 +0800
Subject: [PATCH 1262/1911] drm/amd/powerplay: fix copy error in
 smu7_clockpoweragting.c

Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c
index 8cf71f3c6d0ea4..261b828ad59086 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c
@@ -178,7 +178,7 @@ int smu7_powergate_vce(struct pp_hwmgr *hwmgr, bool bgate)
 	if (bgate) {
 		cgs_set_powergating_state(hwmgr->device,
 						AMD_IP_BLOCK_TYPE_VCE,
-						AMD_PG_STATE_UNGATE);
+						AMD_PG_STATE_GATE);
 		cgs_set_clockgating_state(hwmgr->device,
 				AMD_IP_BLOCK_TYPE_VCE,
 				AMD_CG_STATE_GATE);

From 11353b9d10392e79e32603d2178e75feb25eaf0d Mon Sep 17 00:00:00 2001
From: Zhilong Liu <zlliu@suse.com>
Date: Tue, 14 Mar 2017 15:52:26 +0800
Subject: [PATCH 1263/1911] md/raid1: fix a trivial typo in comments

raid1.c: fix a trivial typo in comments of freeze_array().

Cc: Jack Wang <jack.wang.usish@gmail.com>
Cc: Guoqing Jiang <gqjiang@suse.com>
Cc: John Stoffel <john@stoffel.org>
Acked-by: Coly Li <colyli@suse.de>
Signed-off-by: Zhilong Liu <zlliu@suse.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/raid1.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index c33e96e33b8e2b..a34f58772022c9 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1027,7 +1027,7 @@ static int get_unqueued_pending(struct r1conf *conf)
 static void freeze_array(struct r1conf *conf, int extra)
 {
 	/* Stop sync I/O and normal I/O and wait for everything to
-	 * go quite.
+	 * go quiet.
 	 * This is called in two situations:
 	 * 1) management command handlers (reshape, remove disk, quiesce).
 	 * 2) one normal I/O request failed.

From dc434e056fe1dada20df7ba07f32739d3a701adf Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Tue, 14 Mar 2017 16:06:45 +0100
Subject: [PATCH 1264/1911] cpu/hotplug: Serialize callback invocations proper

The setup/remove_state/instance() functions in the hotplug core code are
serialized against concurrent CPU hotplug, but unfortunately not serialized
against themself.

As a consequence a concurrent invocation of these function results in
corruption of the callback machinery because two instances try to invoke
callbacks on remote cpus at the same time. This results in missing callback
invocations and initiator threads waiting forever on the completion.

The obvious solution to replace get_cpu_online() with cpu_hotplug_begin()
is not possible because at least one callsite calls into these functions
from a get_online_cpu() locked region.

Extend the protection scope of the cpuhp_state_mutex from solely protecting
the state arrays to cover the callback invocation machinery as well.

Fixes: 5b7aa87e0482 ("cpu/hotplug: Implement setup/removal interface")
Reported-and-tested-by: Bart Van Assche <Bart.VanAssche@sandisk.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: hpa@zytor.com
Cc: mingo@kernel.org
Cc: akpm@linux-foundation.org
Cc: torvalds@linux-foundation.org
Link: http://lkml.kernel.org/r/20170314150645.g4tdyoszlcbajmna@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/cpu.c | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/kernel/cpu.c b/kernel/cpu.c
index f7c063239fa5c7..37b223e4fc05b7 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -1335,26 +1335,21 @@ static int cpuhp_store_callbacks(enum cpuhp_state state, const char *name,
 	struct cpuhp_step *sp;
 	int ret = 0;
 
-	mutex_lock(&cpuhp_state_mutex);
-
 	if (state == CPUHP_AP_ONLINE_DYN || state == CPUHP_BP_PREPARE_DYN) {
 		ret = cpuhp_reserve_state(state);
 		if (ret < 0)
-			goto out;
+			return ret;
 		state = ret;
 	}
 	sp = cpuhp_get_step(state);
-	if (name && sp->name) {
-		ret = -EBUSY;
-		goto out;
-	}
+	if (name && sp->name)
+		return -EBUSY;
+
 	sp->startup.single = startup;
 	sp->teardown.single = teardown;
 	sp->name = name;
 	sp->multi_instance = multi_instance;
 	INIT_HLIST_HEAD(&sp->list);
-out:
-	mutex_unlock(&cpuhp_state_mutex);
 	return ret;
 }
 
@@ -1428,6 +1423,7 @@ int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
 		return -EINVAL;
 
 	get_online_cpus();
+	mutex_lock(&cpuhp_state_mutex);
 
 	if (!invoke || !sp->startup.multi)
 		goto add_node;
@@ -1447,16 +1443,14 @@ int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
 		if (ret) {
 			if (sp->teardown.multi)
 				cpuhp_rollback_install(cpu, state, node);
-			goto err;
+			goto unlock;
 		}
 	}
 add_node:
 	ret = 0;
-	mutex_lock(&cpuhp_state_mutex);
 	hlist_add_head(node, &sp->list);
+unlock:
 	mutex_unlock(&cpuhp_state_mutex);
-
-err:
 	put_online_cpus();
 	return ret;
 }
@@ -1491,6 +1485,7 @@ int __cpuhp_setup_state(enum cpuhp_state state,
 		return -EINVAL;
 
 	get_online_cpus();
+	mutex_lock(&cpuhp_state_mutex);
 
 	ret = cpuhp_store_callbacks(state, name, startup, teardown,
 				    multi_instance);
@@ -1524,6 +1519,7 @@ int __cpuhp_setup_state(enum cpuhp_state state,
 		}
 	}
 out:
+	mutex_unlock(&cpuhp_state_mutex);
 	put_online_cpus();
 	/*
 	 * If the requested state is CPUHP_AP_ONLINE_DYN, return the
@@ -1547,6 +1543,8 @@ int __cpuhp_state_remove_instance(enum cpuhp_state state,
 		return -EINVAL;
 
 	get_online_cpus();
+	mutex_lock(&cpuhp_state_mutex);
+
 	if (!invoke || !cpuhp_get_teardown_cb(state))
 		goto remove;
 	/*
@@ -1563,7 +1561,6 @@ int __cpuhp_state_remove_instance(enum cpuhp_state state,
 	}
 
 remove:
-	mutex_lock(&cpuhp_state_mutex);
 	hlist_del(node);
 	mutex_unlock(&cpuhp_state_mutex);
 	put_online_cpus();
@@ -1571,6 +1568,7 @@ int __cpuhp_state_remove_instance(enum cpuhp_state state,
 	return 0;
 }
 EXPORT_SYMBOL_GPL(__cpuhp_state_remove_instance);
+
 /**
  * __cpuhp_remove_state - Remove the callbacks for an hotplug machine state
  * @state:	The state to remove
@@ -1589,6 +1587,7 @@ void __cpuhp_remove_state(enum cpuhp_state state, bool invoke)
 
 	get_online_cpus();
 
+	mutex_lock(&cpuhp_state_mutex);
 	if (sp->multi_instance) {
 		WARN(!hlist_empty(&sp->list),
 		     "Error: Removing state %d which has instances left.\n",
@@ -1613,6 +1612,7 @@ void __cpuhp_remove_state(enum cpuhp_state state, bool invoke)
 	}
 remove:
 	cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
+	mutex_unlock(&cpuhp_state_mutex);
 	put_online_cpus();
 }
 EXPORT_SYMBOL(__cpuhp_remove_state);

From 37c343b4f4e70e9dc328ab04903c0ec8d154c1a4 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevich@gmail.com>
Date: Tue, 14 Mar 2017 08:58:08 -0400
Subject: [PATCH 1265/1911] net: Resend IGMP memberships upon peer
 notification.

When we notify peers of potential changes,  it's also good to update
IGMP memberships.  For example, during VM migration, updating IGMP
memberships will redirect existing multicast streams to the VM at the
new location.

Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/core/dev.c b/net/core/dev.c
index 8637b2b71f3d47..7869ae3837ca74 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1304,6 +1304,7 @@ void netdev_notify_peers(struct net_device *dev)
 {
 	rtnl_lock();
 	call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev);
+	call_netdevice_notifiers(NETDEV_RESEND_IGMP, dev);
 	rtnl_unlock();
 }
 EXPORT_SYMBOL(netdev_notify_peers);

From f004ec065b4879d6bc9ba0211af2169b3ce3097f Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Tue, 14 Mar 2017 14:00:00 +0100
Subject: [PATCH 1266/1911] mlxsw: reg: Fix SPVM max record count

The num_rec field is 8 bit, so the maximal count number is 255. This
fixes vlans not being enabled for wider ranges than 255.

Fixes: b2e345f9a454 ("mlxsw: reg: Add Switch Port VID and Switch Port VLAN Membership registers definitions")
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/reg.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 0899e2d310e262..65e19422b80a4d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -769,7 +769,7 @@ static inline void mlxsw_reg_spvid_pack(char *payload, u8 local_port, u16 pvid)
 #define MLXSW_REG_SPVM_ID 0x200F
 #define MLXSW_REG_SPVM_BASE_LEN 0x04 /* base length, without records */
 #define MLXSW_REG_SPVM_REC_LEN 0x04 /* record length */
-#define MLXSW_REG_SPVM_REC_MAX_COUNT 256
+#define MLXSW_REG_SPVM_REC_MAX_COUNT 255
 #define MLXSW_REG_SPVM_LEN (MLXSW_REG_SPVM_BASE_LEN +	\
 		    MLXSW_REG_SPVM_REC_LEN * MLXSW_REG_SPVM_REC_MAX_COUNT)
 

From e9093b1183bbac462d2caef3eac165778c0b1bf1 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Tue, 14 Mar 2017 14:00:01 +0100
Subject: [PATCH 1267/1911] mlxsw: reg: Fix SPVMLR max record count

The num_rec field is 8 bit, so the maximal count number is 255.
This fixes vlans learning not being enabled for wider ranges than 255.

Fixes: a4feea74cd7a ("mlxsw: reg: Add Switch Port VLAN MAC Learning register definition")
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/reg.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 65e19422b80a4d..d9616daf8a7056 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -1702,7 +1702,7 @@ static inline void mlxsw_reg_sfmr_pack(char *payload,
 #define MLXSW_REG_SPVMLR_ID 0x2020
 #define MLXSW_REG_SPVMLR_BASE_LEN 0x04 /* base length, without records */
 #define MLXSW_REG_SPVMLR_REC_LEN 0x04 /* record length */
-#define MLXSW_REG_SPVMLR_REC_MAX_COUNT 256
+#define MLXSW_REG_SPVMLR_REC_MAX_COUNT 255
 #define MLXSW_REG_SPVMLR_LEN (MLXSW_REG_SPVMLR_BASE_LEN + \
 			      MLXSW_REG_SPVMLR_REC_LEN * \
 			      MLXSW_REG_SPVMLR_REC_MAX_COUNT)

From f3e48119b97f56fb09310c95d49da122a27003d7 Mon Sep 17 00:00:00 2001
From: Ram Amrani <Ram.Amrani@cavium.com>
Date: Tue, 14 Mar 2017 15:25:58 +0200
Subject: [PATCH 1268/1911] qed: Align CIDs according to DORQ requirement

The Doorbell HW block can be configured at a granularity
of 16 x CIDs, so we need to make sure that the actual number
of CIDs configured would be a multiplication of 16.

Today, when RoCE is enabled - given that the number is unaligned,
doorbelling the higher CIDs would fail to reach the firmware and
would eventually timeout.

Fixes: dbb799c39717 ("qed: Initialize hardware for new protocols")
Signed-off-by: Ram Amrani <Ram.Amrani@cavium.com>
Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_cxt.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
index d42d03df751acb..7e3a6fed3da6d9 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
@@ -422,8 +422,9 @@ static void qed_cxt_set_proto_cid_count(struct qed_hwfn *p_hwfn,
 		u32 page_sz = p_mgr->clients[ILT_CLI_CDUC].p_size.val;
 		u32 cxt_size = CONN_CXT_SIZE(p_hwfn);
 		u32 elems_per_page = ILT_PAGE_IN_BYTES(page_sz) / cxt_size;
+		u32 align = elems_per_page * DQ_RANGE_ALIGN;
 
-		p_conn->cid_count = roundup(p_conn->cid_count, elems_per_page);
+		p_conn->cid_count = roundup(p_conn->cid_count, align);
 	}
 }
 

From 3ef310a7d99216e0fbdff29f0cb13bc54180373a Mon Sep 17 00:00:00 2001
From: Tomer Tayar <Tomer.Tayar@cavium.com>
Date: Tue, 14 Mar 2017 15:25:59 +0200
Subject: [PATCH 1269/1911] qed: Prevent creation of too-big u32-chains

Current Logic would allow the creation of a chain with U32_MAX + 1
elements, when the actual maximum supported by the driver infrastructure
is U32_MAX.

Fixes: a91eb52abb50 ("qed: Revisit chain implementation")
Signed-off-by: Tomer Tayar <Tomer.Tayar@cavium.com>
Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_dev.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index e2a081ceaf520c..e518f914eab13f 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -2389,9 +2389,8 @@ qed_chain_alloc_sanity_check(struct qed_dev *cdev,
 	 * size/capacity fields are of a u32 type.
 	 */
 	if ((cnt_type == QED_CHAIN_CNT_TYPE_U16 &&
-	     chain_size > 0x10000) ||
-	    (cnt_type == QED_CHAIN_CNT_TYPE_U32 &&
-	     chain_size > 0x100000000ULL)) {
+	     chain_size > ((u32)U16_MAX + 1)) ||
+	    (cnt_type == QED_CHAIN_CNT_TYPE_U32 && chain_size > U32_MAX)) {
 		DP_NOTICE(cdev,
 			  "The actual chain size (0x%llx) is larger than the maximal possible value\n",
 			  chain_size);

From 752ecb2da11124a948567076b60767dc8034cfa5 Mon Sep 17 00:00:00 2001
From: "Mintz, Yuval" <Yuval.Mintz@cavium.com>
Date: Tue, 14 Mar 2017 15:26:00 +0200
Subject: [PATCH 1270/1911] qed: Fix mapping leak on LL2 rx flow

When receiving an Rx LL2 packet, qed fails to unmap the previous buffer.

Fixes: 0a7fb11c23c0 ("qed: Add Light L2 support");
Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_ll2.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
index 5fb34db377c8c3..29ae5ec2ac2db6 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
@@ -211,6 +211,8 @@ static void qed_ll2b_complete_rx_packet(struct qed_hwfn *p_hwfn,
 	/* If need to reuse or there's no replacement buffer, repost this */
 	if (rc)
 		goto out_post;
+	dma_unmap_single(&cdev->pdev->dev, buffer->phys_addr,
+			 cdev->ll2->rx_size, DMA_FROM_DEVICE);
 
 	skb = build_skb(buffer->data, 0);
 	if (!skb) {

From 4621ceb279d065151eb940ce8a4728b10c0646c7 Mon Sep 17 00:00:00 2001
From: "Mintz, Yuval" <Yuval.Mintz@cavium.com>
Date: Tue, 14 Mar 2017 15:26:01 +0200
Subject: [PATCH 1271/1911] qed: Free previous connections when releasing iSCSI

Fixes: fc831825f99e ("qed: Add support for hardware offloaded iSCSI")
Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_iscsi.c | 28 +++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
index 3a44d6b395fac9..7e73b05eeab862 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
@@ -786,6 +786,23 @@ static void qed_iscsi_release_connection(struct qed_hwfn *p_hwfn,
 	spin_unlock_bh(&p_hwfn->p_iscsi_info->lock);
 }
 
+void qed_iscsi_free_connection(struct qed_hwfn *p_hwfn,
+			       struct qed_iscsi_conn *p_conn)
+{
+	qed_chain_free(p_hwfn->cdev, &p_conn->xhq);
+	qed_chain_free(p_hwfn->cdev, &p_conn->uhq);
+	qed_chain_free(p_hwfn->cdev, &p_conn->r2tq);
+	dma_free_coherent(&p_hwfn->cdev->pdev->dev,
+			  sizeof(struct tcp_upload_params),
+			  p_conn->tcp_upload_params_virt_addr,
+			  p_conn->tcp_upload_params_phys_addr);
+	dma_free_coherent(&p_hwfn->cdev->pdev->dev,
+			  sizeof(struct scsi_terminate_extra_params),
+			  p_conn->queue_cnts_virt_addr,
+			  p_conn->queue_cnts_phys_addr);
+	kfree(p_conn);
+}
+
 struct qed_iscsi_info *qed_iscsi_alloc(struct qed_hwfn *p_hwfn)
 {
 	struct qed_iscsi_info *p_iscsi_info;
@@ -807,6 +824,17 @@ void qed_iscsi_setup(struct qed_hwfn *p_hwfn,
 void qed_iscsi_free(struct qed_hwfn *p_hwfn,
 		    struct qed_iscsi_info *p_iscsi_info)
 {
+	struct qed_iscsi_conn *p_conn = NULL;
+
+	while (!list_empty(&p_hwfn->p_iscsi_info->free_list)) {
+		p_conn = list_first_entry(&p_hwfn->p_iscsi_info->free_list,
+					  struct qed_iscsi_conn, list_entry);
+		if (p_conn) {
+			list_del(&p_conn->list_entry);
+			qed_iscsi_free_connection(p_hwfn, p_conn);
+		}
+	}
+
 	kfree(p_iscsi_info);
 }
 

From 1df2adedcce17ad4a39fba74f0e2b611f797fe10 Mon Sep 17 00:00:00 2001
From: Ram Amrani <Ram.Amrani@cavium.com>
Date: Tue, 14 Mar 2017 15:26:02 +0200
Subject: [PATCH 1272/1911] qed: Fix interrupt flags on Rx LL2

Before iterating over the the LL2 Rx ring, the ring's
spinlock is taken via spin_lock_irqsave().
The actual processing of the packet [including handling
by the protocol driver] is done without said lock,
so qed releases the spinlock and re-claims it afterwards.

Problem is that the final spin_lock_irqrestore() at the end
of the iteration uses the original flags saved from the
initial irqsave() instead of the flags from the most recent
irqsave(). So it's possible that the interrupt status would
be incorrect at the end of the processing.

Fixes: 0a7fb11c23c0 ("qed: Add Light L2 support");
CC: Ram Amrani <Ram.Amrani@cavium.com>
Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_ll2.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
index 29ae5ec2ac2db6..0d3cef409c96d0 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
@@ -476,7 +476,7 @@ qed_ll2_rxq_completion_gsi(struct qed_hwfn *p_hwfn,
 static int qed_ll2_rxq_completion_reg(struct qed_hwfn *p_hwfn,
 				      struct qed_ll2_info *p_ll2_conn,
 				      union core_rx_cqe_union *p_cqe,
-				      unsigned long lock_flags,
+				      unsigned long *p_lock_flags,
 				      bool b_last_cqe)
 {
 	struct qed_ll2_rx_queue *p_rx = &p_ll2_conn->rx_queue;
@@ -497,10 +497,10 @@ static int qed_ll2_rxq_completion_reg(struct qed_hwfn *p_hwfn,
 			  "Mismatch between active_descq and the LL2 Rx chain\n");
 	list_add_tail(&p_pkt->list_entry, &p_rx->free_descq);
 
-	spin_unlock_irqrestore(&p_rx->lock, lock_flags);
+	spin_unlock_irqrestore(&p_rx->lock, *p_lock_flags);
 	qed_ll2b_complete_rx_packet(p_hwfn, p_ll2_conn->my_id,
 				    p_pkt, &p_cqe->rx_cqe_fp, b_last_cqe);
-	spin_lock_irqsave(&p_rx->lock, lock_flags);
+	spin_lock_irqsave(&p_rx->lock, *p_lock_flags);
 
 	return 0;
 }
@@ -540,7 +540,8 @@ static int qed_ll2_rxq_completion(struct qed_hwfn *p_hwfn, void *cookie)
 			break;
 		case CORE_RX_CQE_TYPE_REGULAR:
 			rc = qed_ll2_rxq_completion_reg(p_hwfn, p_ll2_conn,
-							cqe, flags, b_last_cqe);
+							cqe, &flags,
+							b_last_cqe);
 			break;
 		default:
 			rc = -EIO;

From db31d330e8b0ad8926725eb2af6f6422c07ca7ab Mon Sep 17 00:00:00 2001
From: "Mintz, Yuval" <Yuval.Mintz@cavium.com>
Date: Tue, 14 Mar 2017 15:26:03 +0200
Subject: [PATCH 1273/1911] qed: Correct out-of-bound access in OOO history

Need to set the number of entries in database, otherwise the logic
would quickly surpass the array.

Fixes: 1d6cff4fca43 ("qed: Add iSCSI out of order packet handling")
Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_ooo.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_ooo.c b/drivers/net/ethernet/qlogic/qed/qed_ooo.c
index 7d731c6cb8923d..378afce58b3f0a 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ooo.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_ooo.c
@@ -159,6 +159,8 @@ struct qed_ooo_info *qed_ooo_alloc(struct qed_hwfn *p_hwfn)
 	if (!p_ooo_info->ooo_history.p_cqes)
 		goto no_history_mem;
 
+	p_ooo_info->ooo_history.num_of_cqes = QED_MAX_NUM_OOO_HISTORY_ENTRIES;
+
 	return p_ooo_info;
 
 no_history_mem:

From 6b116b1d6a521a1907b3c18cb7a8592a655f660c Mon Sep 17 00:00:00 2001
From: "Mintz, Yuval" <Yuval.Mintz@cavium.com>
Date: Tue, 14 Mar 2017 15:26:04 +0200
Subject: [PATCH 1274/1911] qed: Enable iSCSI Out-of-Order

Missing in the initial submission, qed fails to propagate qedi's
request to enable OOO to firmware.

Fixes: fc831825f99e ("qed: Add support for hardware offloaded iSCSI")
Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_iscsi.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
index 7e73b05eeab862..098766f7fe88a6 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
@@ -190,6 +190,9 @@ qed_sp_iscsi_func_start(struct qed_hwfn *p_hwfn,
 	p_init->num_sq_pages_in_ring = p_params->num_sq_pages_in_ring;
 	p_init->num_r2tq_pages_in_ring = p_params->num_r2tq_pages_in_ring;
 	p_init->num_uhq_pages_in_ring = p_params->num_uhq_pages_in_ring;
+	p_init->ooo_enable = p_params->ooo_enable;
+	p_init->ll2_rx_queue_id = p_hwfn->hw_info.resc_start[QED_LL2_QUEUE] +
+				  p_params->ll2_ooo_queue_id;
 	p_init->func_params.log_page_size = p_params->log_page_size;
 	val = p_params->num_tasks;
 	p_init->func_params.num_tasks = cpu_to_le16(val);

From d434936e4cbb10181463622962f30b989d3e9e19 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 14 Mar 2017 13:12:53 +0100
Subject: [PATCH 1275/1911] mm, x86: Fix native_pud_clear build error

We still get a build error in random configurations, after this has been
modified a few times:

In file included from include/linux/mm.h:68:0,
                 from include/linux/suspend.h:8,
                 from arch/x86/kernel/asm-offsets.c:12:
arch/x86/include/asm/pgtable.h:66:26: error: redefinition of 'native_pud_clear'
 #define pud_clear(pud)   native_pud_clear(pud)

My interpretation is that the build error comes from a typo in __PAGETABLE_PUD_FOLDED,
so fix that typo now, and remove the incorrect #ifdef around the native_pud_clear
definition.

Fixes: 3e761a42e19c ("mm, x86: fix HIGHMEM64 && PARAVIRT build config for native_pud_clear()")
Fixes: a00cc7d9dd93 ("mm, x86: add support for PUD-sized transparent hugepages")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Dave Jiang <dave.jiang@intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Borislav Petkov <bp@suse.de>
Cc: Thomas Garnier <thgarnie@google.com>
Link: http://lkml.kernel.org/r/20170314121330.182155-1-arnd@arndb.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/include/asm/pgtable-3level.h | 3 ---
 arch/x86/include/asm/pgtable.h        | 2 +-
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
index 72277b1028a5f5..50d35e3185f553 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -121,12 +121,9 @@ static inline void native_pmd_clear(pmd_t *pmd)
 	*(tmp + 1) = 0;
 }
 
-#if !defined(CONFIG_SMP) || (defined(CONFIG_HIGHMEM64G) && \
-		defined(CONFIG_PARAVIRT))
 static inline void native_pud_clear(pud_t *pudp)
 {
 }
-#endif
 
 static inline void pud_clear(pud_t *pudp)
 {
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 1cfb36b8c024ab..585ee0d42d18fc 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -62,7 +62,7 @@ extern struct mm_struct *pgd_page_get_mm(struct page *page);
 # define set_pud(pudp, pud)		native_set_pud(pudp, pud)
 #endif
 
-#ifndef __PAGETABLE_PMD_FOLDED
+#ifndef __PAGETABLE_PUD_FOLDED
 #define pud_clear(pud)			native_pud_clear(pud)
 #endif
 

From c236c8e95a3d395b0494e7108f0d41cf36ec107c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Sat, 4 Mar 2017 10:27:18 +0100
Subject: [PATCH 1276/1911] futex: Fix potential use-after-free in
 FUTEX_REQUEUE_PI

While working on the futex code, I stumbled over this potential
use-after-free scenario. Dmitry triggered it later with syzkaller.

pi_mutex is a pointer into pi_state, which we drop the reference on in
unqueue_me_pi(). So any access to that pointer after that is bad.

Since other sites already do rt_mutex_unlock() with hb->lock held, see
for example futex_lock_pi(), simply move the unlock before
unqueue_me_pi().

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Darren Hart <dvhart@linux.intel.com>
Cc: juri.lelli@arm.com
Cc: bigeasy@linutronix.de
Cc: xlpang@redhat.com
Cc: rostedt@goodmis.org
Cc: mathieu.desnoyers@efficios.com
Cc: jdesfossez@efficios.com
Cc: dvhart@infradead.org
Cc: bristot@redhat.com
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/20170304093558.801744246@infradead.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/futex.c | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/kernel/futex.c b/kernel/futex.c
index 229a744b1781be..3a4775fd7468a2 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -2815,7 +2815,6 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
 {
 	struct hrtimer_sleeper timeout, *to = NULL;
 	struct rt_mutex_waiter rt_waiter;
-	struct rt_mutex *pi_mutex = NULL;
 	struct futex_hash_bucket *hb;
 	union futex_key key2 = FUTEX_KEY_INIT;
 	struct futex_q q = futex_q_init;
@@ -2907,6 +2906,8 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
 			spin_unlock(q.lock_ptr);
 		}
 	} else {
+		struct rt_mutex *pi_mutex;
+
 		/*
 		 * We have been woken up by futex_unlock_pi(), a timeout, or a
 		 * signal.  futex_unlock_pi() will not destroy the lock_ptr nor
@@ -2930,18 +2931,19 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
 		if (res)
 			ret = (res < 0) ? res : 0;
 
+		/*
+		 * If fixup_pi_state_owner() faulted and was unable to handle
+		 * the fault, unlock the rt_mutex and return the fault to
+		 * userspace.
+		 */
+		if (ret && rt_mutex_owner(pi_mutex) == current)
+			rt_mutex_unlock(pi_mutex);
+
 		/* Unqueue and drop the lock. */
 		unqueue_me_pi(&q);
 	}
 
-	/*
-	 * If fixup_pi_state_owner() faulted and was unable to handle the
-	 * fault, unlock the rt_mutex and return the fault to userspace.
-	 */
-	if (ret == -EFAULT) {
-		if (pi_mutex && rt_mutex_owner(pi_mutex) == current)
-			rt_mutex_unlock(pi_mutex);
-	} else if (ret == -EINTR) {
+	if (ret == -EINTR) {
 		/*
 		 * We've already been requeued, but cannot restart by calling
 		 * futex_lock_pi() directly. We could restart this syscall, but

From 9bbb25afeb182502ca4f2c4f3f88af0681b34cae Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Sat, 4 Mar 2017 10:27:19 +0100
Subject: [PATCH 1277/1911] futex: Add missing error handling to
 FUTEX_REQUEUE_PI

Thomas spotted that fixup_pi_state_owner() can return errors and we
fail to unlock the rt_mutex in that case.

Reported-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Darren Hart <dvhart@linux.intel.com>
Cc: juri.lelli@arm.com
Cc: bigeasy@linutronix.de
Cc: xlpang@redhat.com
Cc: rostedt@goodmis.org
Cc: mathieu.desnoyers@efficios.com
Cc: jdesfossez@efficios.com
Cc: dvhart@infradead.org
Cc: bristot@redhat.com
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/20170304093558.867401760@infradead.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/futex.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/kernel/futex.c b/kernel/futex.c
index 3a4775fd7468a2..45858ec739411f 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -2898,6 +2898,8 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
 		if (q.pi_state && (q.pi_state->owner != current)) {
 			spin_lock(q.lock_ptr);
 			ret = fixup_pi_state_owner(uaddr2, &q, current);
+			if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current)
+				rt_mutex_unlock(&q.pi_state->pi_mutex);
 			/*
 			 * Drop the reference to the pi state which
 			 * the requeue_pi() code acquired for us.

From 87a6b2975f0d340c75b7488d22d61d2f98fb8abf Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Mon, 13 Mar 2017 23:27:47 -0500
Subject: [PATCH 1278/1911] x86/unwind: Fix last frame check for aligned
 function stacks

Pavel Machek reported the following warning on x86-32:

  WARNING: kernel stack frame pointer at f50cdf98 in swapper/2:0 has bad value   (null)

The warning is caused by the unwinder not realizing that it reached the
end of the stack, due to an unusual prologue which gcc sometimes
generates for aligned stacks.  The prologue is based on a gcc feature
called the Dynamic Realign Argument Pointer (DRAP).  It's almost always
enabled for aligned stacks when -maccumulate-outgoing-args isn't set.

This issue is similar to the one fixed by the following commit:

  8023e0e2a48d ("x86/unwind: Adjust last frame check for aligned function stacks")

... but that fix was specific to x86-64.

Make the fix more generic to cover x86-32 as well, and also ensure that
the return address referred to by the frame pointer is a copy of the
original return address.

Fixes: acb4608ad186 ("x86/unwind: Create stack frames for saved syscall registers")
Reported-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/50d4924db716c264b14f1633037385ec80bf89d2.1489465609.git.jpoimboe@redhat.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/unwind_frame.c | 36 ++++++++++++++++++++++++++++------
 1 file changed, 30 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c
index 478d15dbaee41b..08339262b666e5 100644
--- a/arch/x86/kernel/unwind_frame.c
+++ b/arch/x86/kernel/unwind_frame.c
@@ -82,19 +82,43 @@ static size_t regs_size(struct pt_regs *regs)
 	return sizeof(*regs);
 }
 
+#ifdef CONFIG_X86_32
+#define GCC_REALIGN_WORDS 3
+#else
+#define GCC_REALIGN_WORDS 1
+#endif
+
 static bool is_last_task_frame(struct unwind_state *state)
 {
-	unsigned long bp = (unsigned long)state->bp;
-	unsigned long regs = (unsigned long)task_pt_regs(state->task);
+	unsigned long *last_bp = (unsigned long *)task_pt_regs(state->task) - 2;
+	unsigned long *aligned_bp = last_bp - GCC_REALIGN_WORDS;
 
 	/*
 	 * We have to check for the last task frame at two different locations
 	 * because gcc can occasionally decide to realign the stack pointer and
-	 * change the offset of the stack frame by a word in the prologue of a
-	 * function called by head/entry code.
+	 * change the offset of the stack frame in the prologue of a function
+	 * called by head/entry code.  Examples:
+	 *
+	 * <start_secondary>:
+	 *      push   %edi
+	 *      lea    0x8(%esp),%edi
+	 *      and    $0xfffffff8,%esp
+	 *      pushl  -0x4(%edi)
+	 *      push   %ebp
+	 *      mov    %esp,%ebp
+	 *
+	 * <x86_64_start_kernel>:
+	 *      lea    0x8(%rsp),%r10
+	 *      and    $0xfffffffffffffff0,%rsp
+	 *      pushq  -0x8(%r10)
+	 *      push   %rbp
+	 *      mov    %rsp,%rbp
+	 *
+	 * Note that after aligning the stack, it pushes a duplicate copy of
+	 * the return address before pushing the frame pointer.
 	 */
-	return bp == regs - FRAME_HEADER_SIZE ||
-	       bp == regs - FRAME_HEADER_SIZE - sizeof(long);
+	return (state->bp == last_bp ||
+		(state->bp == aligned_bp && *(aligned_bp+1) == *(last_bp+1)));
 }
 
 /*

From 49ec8f5b6ae3ab60385492cad900ffc8a523c895 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Tue, 14 Mar 2017 15:20:53 +0100
Subject: [PATCH 1279/1911] x86/intel_rdt: Put group node in rdtgroup_kn_unlock

The rdtgroup_kn_unlock waits for the last user to release and put its
node. But it's calling kernfs_put on the node which calls the
rdtgroup_kn_unlock, which might not be the group's directory node, but
another group's file node.

This race could be easily reproduced by running 2 instances
of following script:

  mount -t resctrl resctrl /sys/fs/resctrl/
  pushd /sys/fs/resctrl/
  mkdir krava
  echo "krava" > krava/schemata
  rmdir krava
  popd
  umount  /sys/fs/resctrl

It triggers the slub debug error message with following command
line config: slub_debug=,kernfs_node_cache.

Call kernfs_put on the group's node to fix it.

Fixes: 60cf5e101fd4 ("x86/intel_rdt: Add mkdir to resctrl file system")
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Shaohua Li <shli@fb.com>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/1489501253-20248-1-git-send-email-jolsa@kernel.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index c05509d38b1f1e..9ac2a5cdd9c206 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -727,7 +727,7 @@ void rdtgroup_kn_unlock(struct kernfs_node *kn)
 	if (atomic_dec_and_test(&rdtgrp->waitcount) &&
 	    (rdtgrp->flags & RDT_DELETED)) {
 		kernfs_unbreak_active_protection(kn);
-		kernfs_put(kn);
+		kernfs_put(rdtgrp->kn);
 		kfree(rdtgrp);
 	} else {
 		kernfs_unbreak_active_protection(kn);

From 655d9ca9ac075da1ef2a45012ba48a39f6eb1f58 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 14 Mar 2017 22:27:11 +0100
Subject: [PATCH 1280/1911] drm: amd: remove broken include path

The AMD ACP driver adds "-I../acp -I../acp/include" to the gcc command
line, which makes no sense, since these are evaluated relative to the
build directory. When we build with "make W=1", they instead cause
a warning:

cc1: error: ../acp/: No such file or directory [-Werror=missing-include-dirs]
cc1: error: ../acp/include: No such file or directory [-Werror=missing-include-dirs]
cc1: all warnings being treated as errors
../scripts/Makefile.build:289: recipe for target 'drivers/gpu/drm/amd/amdgpu/amdgpu_drv.o' failed
../scripts/Makefile.build:289: recipe for target 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.o' failed
../scripts/Makefile.build:289: recipe for target 'drivers/gpu/drm/amd/amdgpu/amdgpu_kms.o' failed

This removes the subdir-ccflags variable that evidently did not
serve any purpose here.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/acp/Makefile | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/acp/Makefile b/drivers/gpu/drm/amd/acp/Makefile
index 8363cb57915b0b..8a08e81ee90d57 100644
--- a/drivers/gpu/drm/amd/acp/Makefile
+++ b/drivers/gpu/drm/amd/acp/Makefile
@@ -3,6 +3,4 @@
 # of AMDSOC/AMDGPU drm driver.
 # It provides the HW control for ACP related functionalities.
 
-subdir-ccflags-y += -I$(AMDACPPATH)/ -I$(AMDACPPATH)/include
-
 AMD_ACP_FILES := $(AMDACPPATH)/acp_hw.o

From 630a04e79dd41ff746b545d4fc052e0abb836120 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Wed, 15 Mar 2017 00:24:25 -0700
Subject: [PATCH 1281/1911] xfs: verify inline directory data forks

When we're reading or writing the data fork of an inline directory,
check the contents to make sure we're not overflowing buffers or eating
garbage data.  xfs/348 corrupts an inline symlink into an inline
directory, triggering a buffer overflow bug.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
---
v2: add more checks consistent with _dir2_sf_check and make the verifier
usable from anywhere.
---
 fs/xfs/libxfs/xfs_dir2_priv.h  |  2 +
 fs/xfs/libxfs/xfs_dir2_sf.c    | 87 ++++++++++++++++++++++++++++++++++
 fs/xfs/libxfs/xfs_inode_fork.c | 26 ++++++++--
 fs/xfs/libxfs/xfs_inode_fork.h |  2 +-
 fs/xfs/xfs_dir2_readdir.c      | 11 -----
 fs/xfs/xfs_inode.c             | 12 +++--
 6 files changed, 122 insertions(+), 18 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_dir2_priv.h b/fs/xfs/libxfs/xfs_dir2_priv.h
index d04547fcf274af..eb00bc133bca67 100644
--- a/fs/xfs/libxfs/xfs_dir2_priv.h
+++ b/fs/xfs/libxfs/xfs_dir2_priv.h
@@ -125,6 +125,8 @@ extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino);
 extern int xfs_dir2_sf_lookup(struct xfs_da_args *args);
 extern int xfs_dir2_sf_removename(struct xfs_da_args *args);
 extern int xfs_dir2_sf_replace(struct xfs_da_args *args);
+extern int xfs_dir2_sf_verify(struct xfs_mount *mp, struct xfs_dir2_sf_hdr *sfp,
+		int size);
 
 /* xfs_dir2_readdir.c */
 extern int xfs_readdir(struct xfs_inode *dp, struct dir_context *ctx,
diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c
index c6809ff41197d9..96b45cd6c63f06 100644
--- a/fs/xfs/libxfs/xfs_dir2_sf.c
+++ b/fs/xfs/libxfs/xfs_dir2_sf.c
@@ -629,6 +629,93 @@ xfs_dir2_sf_check(
 }
 #endif	/* DEBUG */
 
+/* Verify the consistency of an inline directory. */
+int
+xfs_dir2_sf_verify(
+	struct xfs_mount		*mp,
+	struct xfs_dir2_sf_hdr		*sfp,
+	int				size)
+{
+	struct xfs_dir2_sf_entry	*sfep;
+	struct xfs_dir2_sf_entry	*next_sfep;
+	char				*endp;
+	const struct xfs_dir_ops	*dops;
+	xfs_ino_t			ino;
+	int				i;
+	int				i8count;
+	int				offset;
+	__uint8_t			filetype;
+
+	dops = xfs_dir_get_ops(mp, NULL);
+
+	/*
+	 * Give up if the directory is way too short.
+	 */
+	XFS_WANT_CORRUPTED_RETURN(mp, size >
+			offsetof(struct xfs_dir2_sf_hdr, parent));
+	XFS_WANT_CORRUPTED_RETURN(mp, size >=
+			xfs_dir2_sf_hdr_size(sfp->i8count));
+
+	endp = (char *)sfp + size;
+
+	/* Check .. entry */
+	ino = dops->sf_get_parent_ino(sfp);
+	i8count = ino > XFS_DIR2_MAX_SHORT_INUM;
+	XFS_WANT_CORRUPTED_RETURN(mp, !xfs_dir_ino_validate(mp, ino));
+	offset = dops->data_first_offset;
+
+	/* Check all reported entries */
+	sfep = xfs_dir2_sf_firstentry(sfp);
+	for (i = 0; i < sfp->count; i++) {
+		/*
+		 * struct xfs_dir2_sf_entry has a variable length.
+		 * Check the fixed-offset parts of the structure are
+		 * within the data buffer.
+		 */
+		XFS_WANT_CORRUPTED_RETURN(mp,
+				((char *)sfep + sizeof(*sfep)) < endp);
+
+		/* Don't allow names with known bad length. */
+		XFS_WANT_CORRUPTED_RETURN(mp, sfep->namelen > 0);
+		XFS_WANT_CORRUPTED_RETURN(mp, sfep->namelen < MAXNAMELEN);
+
+		/*
+		 * Check that the variable-length part of the structure is
+		 * within the data buffer.  The next entry starts after the
+		 * name component, so nextentry is an acceptable test.
+		 */
+		next_sfep = dops->sf_nextentry(sfp, sfep);
+		XFS_WANT_CORRUPTED_RETURN(mp, endp >= (char *)next_sfep);
+
+		/* Check that the offsets always increase. */
+		XFS_WANT_CORRUPTED_RETURN(mp,
+				xfs_dir2_sf_get_offset(sfep) >= offset);
+
+		/* Check the inode number. */
+		ino = dops->sf_get_ino(sfp, sfep);
+		i8count += ino > XFS_DIR2_MAX_SHORT_INUM;
+		XFS_WANT_CORRUPTED_RETURN(mp, !xfs_dir_ino_validate(mp, ino));
+
+		/* Check the file type. */
+		filetype = dops->sf_get_ftype(sfep);
+		XFS_WANT_CORRUPTED_RETURN(mp, filetype < XFS_DIR3_FT_MAX);
+
+		offset = xfs_dir2_sf_get_offset(sfep) +
+				dops->data_entsize(sfep->namelen);
+
+		sfep = next_sfep;
+	}
+	XFS_WANT_CORRUPTED_RETURN(mp, i8count == sfp->i8count);
+	XFS_WANT_CORRUPTED_RETURN(mp, (void *)sfep == (void *)endp);
+
+	/* Make sure this whole thing ought to be in local format. */
+	XFS_WANT_CORRUPTED_RETURN(mp, offset +
+	       (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) +
+	       (uint)sizeof(xfs_dir2_block_tail_t) <= mp->m_dir_geo->blksize);
+
+	return 0;
+}
+
 /*
  * Create a new (shortform) directory.
  */
diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
index 25c1e078aef6a5..9653e964eda4f9 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -33,6 +33,8 @@
 #include "xfs_trace.h"
 #include "xfs_attr_sf.h"
 #include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_dir2_priv.h"
 
 kmem_zone_t *xfs_ifork_zone;
 
@@ -320,6 +322,7 @@ xfs_iformat_local(
 	int		whichfork,
 	int		size)
 {
+	int		error;
 
 	/*
 	 * If the size is unreasonable, then something
@@ -336,6 +339,14 @@ xfs_iformat_local(
 		return -EFSCORRUPTED;
 	}
 
+	if (S_ISDIR(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK) {
+		error = xfs_dir2_sf_verify(ip->i_mount,
+				(struct xfs_dir2_sf_hdr *)XFS_DFORK_DPTR(dip),
+				size);
+		if (error)
+			return error;
+	}
+
 	xfs_init_local_fork(ip, whichfork, XFS_DFORK_PTR(dip, whichfork), size);
 	return 0;
 }
@@ -856,7 +867,7 @@ xfs_iextents_copy(
  * In these cases, the format always takes precedence, because the
  * format indicates the current state of the fork.
  */
-void
+int
 xfs_iflush_fork(
 	xfs_inode_t		*ip,
 	xfs_dinode_t		*dip,
@@ -866,6 +877,7 @@ xfs_iflush_fork(
 	char			*cp;
 	xfs_ifork_t		*ifp;
 	xfs_mount_t		*mp;
+	int			error;
 	static const short	brootflag[2] =
 		{ XFS_ILOG_DBROOT, XFS_ILOG_ABROOT };
 	static const short	dataflag[2] =
@@ -874,7 +886,7 @@ xfs_iflush_fork(
 		{ XFS_ILOG_DEXT, XFS_ILOG_AEXT };
 
 	if (!iip)
-		return;
+		return 0;
 	ifp = XFS_IFORK_PTR(ip, whichfork);
 	/*
 	 * This can happen if we gave up in iformat in an error path,
@@ -882,12 +894,19 @@ xfs_iflush_fork(
 	 */
 	if (!ifp) {
 		ASSERT(whichfork == XFS_ATTR_FORK);
-		return;
+		return 0;
 	}
 	cp = XFS_DFORK_PTR(dip, whichfork);
 	mp = ip->i_mount;
 	switch (XFS_IFORK_FORMAT(ip, whichfork)) {
 	case XFS_DINODE_FMT_LOCAL:
+		if (S_ISDIR(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK) {
+			error = xfs_dir2_sf_verify(mp,
+					(struct xfs_dir2_sf_hdr *)ifp->if_u1.if_data,
+					ifp->if_bytes);
+			if (error)
+				return error;
+		}
 		if ((iip->ili_fields & dataflag[whichfork]) &&
 		    (ifp->if_bytes > 0)) {
 			ASSERT(ifp->if_u1.if_data != NULL);
@@ -940,6 +959,7 @@ xfs_iflush_fork(
 		ASSERT(0);
 		break;
 	}
+	return 0;
 }
 
 /*
diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h
index 7fb8365326d1a7..132dc59fdde694 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.h
+++ b/fs/xfs/libxfs/xfs_inode_fork.h
@@ -140,7 +140,7 @@ typedef struct xfs_ifork {
 struct xfs_ifork *xfs_iext_state_to_fork(struct xfs_inode *ip, int state);
 
 int		xfs_iformat_fork(struct xfs_inode *, struct xfs_dinode *);
-void		xfs_iflush_fork(struct xfs_inode *, struct xfs_dinode *,
+int		xfs_iflush_fork(struct xfs_inode *, struct xfs_dinode *,
 				struct xfs_inode_log_item *, int);
 void		xfs_idestroy_fork(struct xfs_inode *, int);
 void		xfs_idata_realloc(struct xfs_inode *, int, int);
diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c
index 003a99b83bd884..ad9396e516f6e3 100644
--- a/fs/xfs/xfs_dir2_readdir.c
+++ b/fs/xfs/xfs_dir2_readdir.c
@@ -71,22 +71,11 @@ xfs_dir2_sf_getdents(
 	struct xfs_da_geometry	*geo = args->geo;
 
 	ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
-	/*
-	 * Give up if the directory is way too short.
-	 */
-	if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) {
-		ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
-		return -EIO;
-	}
-
 	ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
 	ASSERT(dp->i_df.if_u1.if_data != NULL);
 
 	sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
 
-	if (dp->i_d.di_size < xfs_dir2_sf_hdr_size(sfp->i8count))
-		return -EFSCORRUPTED;
-
 	/*
 	 * If the block number in the offset is out of range, we're done.
 	 */
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 7eaf1ef74e3c63..c7fe2c2123ab83 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -3475,6 +3475,7 @@ xfs_iflush_int(
 	struct xfs_inode_log_item *iip = ip->i_itemp;
 	struct xfs_dinode	*dip;
 	struct xfs_mount	*mp = ip->i_mount;
+	int			error;
 
 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
 	ASSERT(xfs_isiflocked(ip));
@@ -3557,9 +3558,14 @@ xfs_iflush_int(
 	if (ip->i_d.di_flushiter == DI_MAX_FLUSH)
 		ip->i_d.di_flushiter = 0;
 
-	xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK);
-	if (XFS_IFORK_Q(ip))
-		xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK);
+	error = xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK);
+	if (error)
+		return error;
+	if (XFS_IFORK_Q(ip)) {
+		error = xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK);
+		if (error)
+			return error;
+	}
 	xfs_inobp_check(mp, bp);
 
 	/*

From e67351d56a709853046fbe652b981fb7ca4c3dcc Mon Sep 17 00:00:00 2001
From: Ander Conselvan de Oliveira <ander.conselvan.de.oliveira@intel.com>
Date: Wed, 15 Mar 2017 11:57:47 +0200
Subject: [PATCH 1282/1911] drm/i915/glk: Remove MODULE_FIRMWARE() tag from
 Geminilake's DMC

Geminilake's DMC is not yet available in the linux-firmware repository.
To prevent userspace tools such as mkinitramfs to complain about
missing firmware, remove the MODULE_FIRMWARE() tag for now.

Fixes: dbb28b5c3d3c ("drm/i915/DMC/GLK: Load DMC on GLK")
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Anusha Srivatsa <anusha.srivatsa@intel.com>
Cc: Daniel Vetter <daniel.vetter@intel.com>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: intel-gfx@lists.freedesktop.org
Cc: <drm-intel-fixes@lists.freedesktop.org>
Signed-off-by: Ander Conselvan de Oliveira <ander.conselvan.de.oliveira@intel.com>
Acked-by: Jani Nikula <jani.nikula@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170306085651.14008-1-ander.conselvan.de.oliveira@intel.com
Link: http://patchwork.freedesktop.org/patch/msgid/20170315095747.21845-1-ander.conselvan.de.oliveira@intel.com
(cherry picked from commit d9321a03efcda867b3a8c6327e01808516f0acd7)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/intel_csr.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c
index 0085bc745f6aa5..de219b71fb76ec 100644
--- a/drivers/gpu/drm/i915/intel_csr.c
+++ b/drivers/gpu/drm/i915/intel_csr.c
@@ -35,7 +35,6 @@
  */
 
 #define I915_CSR_GLK "i915/glk_dmc_ver1_01.bin"
-MODULE_FIRMWARE(I915_CSR_GLK);
 #define GLK_CSR_VERSION_REQUIRED	CSR_VERSION(1, 1)
 
 #define I915_CSR_KBL "i915/kbl_dmc_ver1_01.bin"

From e609ccef5222c73b46b322be7d3796d60bff353d Mon Sep 17 00:00:00 2001
From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Date: Fri, 24 Feb 2017 16:04:15 +0200
Subject: [PATCH 1283/1911] intel_th: Don't leak module refcount on failure to
 activate

Output 'activation' may fail for the reasons of the output driver,
for example, if msc's buffer is not allocated. We forget, however,
to drop the module reference in this case. So each attempt at
activation in this case leaks a reference, preventing the module
from ever unloading.

This patch adds the missing module_put() in the activation error
path.

Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: stable@vger.kernel.org # v4.8+
---
 drivers/hwtracing/intel_th/core.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/hwtracing/intel_th/core.c b/drivers/hwtracing/intel_th/core.c
index cdd9b3b26195aa..7563eceeaaeaa3 100644
--- a/drivers/hwtracing/intel_th/core.c
+++ b/drivers/hwtracing/intel_th/core.c
@@ -221,8 +221,10 @@ static int intel_th_output_activate(struct intel_th_device *thdev)
 	else
 		intel_th_trace_enable(thdev);
 
-	if (ret)
+	if (ret) {
 		pm_runtime_put(&thdev->dev);
+		module_put(thdrv->driver.owner);
+	}
 
 	return ret;
 }

From 5118ccd34780f4637a9360be580f41f4c1feab48 Mon Sep 17 00:00:00 2001
From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Date: Tue, 8 Sep 2015 14:03:55 +0300
Subject: [PATCH 1284/1911] intel_th: pci: Add Denverton SOC support

This adds Intel(R) Trace Hub PCI ID for Denverton SOC.

Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
---
 drivers/hwtracing/intel_th/pci.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/hwtracing/intel_th/pci.c b/drivers/hwtracing/intel_th/pci.c
index 0bba3842336e6d..04bd57b0100a53 100644
--- a/drivers/hwtracing/intel_th/pci.c
+++ b/drivers/hwtracing/intel_th/pci.c
@@ -85,6 +85,11 @@ static const struct pci_device_id intel_th_pci_id_table[] = {
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xa2a6),
 		.driver_data = (kernel_ulong_t)0,
 	},
+	{
+		/* Denverton */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x19e1),
+		.driver_data = (kernel_ulong_t)0,
+	},
 	{ 0 },
 };
 

From 340837f985c2cb87ca0868d4aa9ce42b0fab3a21 Mon Sep 17 00:00:00 2001
From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Date: Thu, 30 Jun 2016 16:10:51 +0300
Subject: [PATCH 1285/1911] intel_th: pci: Add Gemini Lake support

This adds Intel(R) Trace Hub PCI ID for Gemini Lake SOC.

Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
---
 drivers/hwtracing/intel_th/pci.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/hwtracing/intel_th/pci.c b/drivers/hwtracing/intel_th/pci.c
index 04bd57b0100a53..590cf90dd21a61 100644
--- a/drivers/hwtracing/intel_th/pci.c
+++ b/drivers/hwtracing/intel_th/pci.c
@@ -90,6 +90,11 @@ static const struct pci_device_id intel_th_pci_id_table[] = {
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x19e1),
 		.driver_data = (kernel_ulong_t)0,
 	},
+	{
+		/* Gemini Lake */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x318e),
+		.driver_data = (kernel_ulong_t)0,
+	},
 	{ 0 },
 };
 

From 28ea06c46fbcab63fd9a55531387b7928a18a590 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Mon, 6 Mar 2017 12:58:42 -0500
Subject: [PATCH 1286/1911] gfs2: Avoid alignment hole in struct lm_lockname

Commit 88ffbf3e03 switches to using rhashtables for glocks, hashing over
the entire struct lm_lockname instead of its individual fields.  On some
architectures, struct lm_lockname contains a hole of uninitialized
memory due to alignment rules, which now leads to incorrect hash values.
Get rid of that hole.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Signed-off-by: Bob Peterson <rpeterso@redhat.com>
CC: <stable@vger.kernel.org> #v4.3+
---
 fs/gfs2/incore.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index c45084ac642d19..511e1ed7e2ded7 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -207,7 +207,7 @@ struct lm_lockname {
 	struct gfs2_sbd *ln_sbd;
 	u64 ln_number;
 	unsigned int ln_type;
-};
+} __packed __aligned(sizeof(int));
 
 #define lm_name_equal(name1, name2) \
         (((name1)->ln_number == (name2)->ln_number) &&	\

From 8af42949d1681379c1a97d230de9242c1f4f326a Mon Sep 17 00:00:00 2001
From: Stafford Horne <shorne@gmail.com>
Date: Mon, 13 Mar 2017 07:41:55 +0900
Subject: [PATCH 1287/1911] openrisc: xchg: fix `computed is not used` warning

When building allmodconfig this warning shows.

  fs/ocfs2/file.c: In function 'ocfs2_file_write_iter':
  ./arch/openrisc/include/asm/cmpxchg.h:81:3: warning: value computed is
  not used [-Wunused-value]
    ((typeof(*(ptr)))__xchg((unsigned long)(with), (ptr), sizeof(*(ptr))))
     ^

Applying the same patch logic that was done to the cmpxchg macro.

Signed-off-by: Stafford Horne <shorne@gmail.com>
---
 arch/openrisc/include/asm/cmpxchg.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/arch/openrisc/include/asm/cmpxchg.h b/arch/openrisc/include/asm/cmpxchg.h
index 5fcb9ac7269385..f0a5d8b844d6b8 100644
--- a/arch/openrisc/include/asm/cmpxchg.h
+++ b/arch/openrisc/include/asm/cmpxchg.h
@@ -77,7 +77,11 @@ static inline unsigned long __xchg(unsigned long val, volatile void *ptr,
 	return val;
 }
 
-#define xchg(ptr, with) \
-	((typeof(*(ptr)))__xchg((unsigned long)(with), (ptr), sizeof(*(ptr))))
+#define xchg(ptr, with) 						\
+	({								\
+		(__typeof__(*(ptr))) __xchg((unsigned long)(with),	\
+					    (ptr),			\
+					    sizeof(*(ptr)));		\
+	})
 
 #endif /* __ASM_OPENRISC_CMPXCHG_H */

From 154e67cd8e8f964809d0e75e44bb121b169c75b3 Mon Sep 17 00:00:00 2001
From: Stafford Horne <shorne@gmail.com>
Date: Mon, 13 Mar 2017 07:44:45 +0900
Subject: [PATCH 1288/1911] openrisc: fix issue handling 8 byte get_user calls

Was getting the following error with allmodconfig:

  ERROR: "__get_user_bad" [lib/test_user_copy.ko] undefined!

This was simply a missing break statement, causing an unwanted fall
through.

Signed-off-by: Stafford Horne <shorne@gmail.com>
---
 arch/openrisc/include/asm/uaccess.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/openrisc/include/asm/uaccess.h b/arch/openrisc/include/asm/uaccess.h
index 140faa16685a23..1311e6b1399166 100644
--- a/arch/openrisc/include/asm/uaccess.h
+++ b/arch/openrisc/include/asm/uaccess.h
@@ -211,7 +211,7 @@ do {									\
 	case 1: __get_user_asm(x, ptr, retval, "l.lbz"); break;		\
 	case 2: __get_user_asm(x, ptr, retval, "l.lhz"); break;		\
 	case 4: __get_user_asm(x, ptr, retval, "l.lwz"); break;		\
-	case 8: __get_user_asm2(x, ptr, retval);			\
+	case 8: __get_user_asm2(x, ptr, retval); break;			\
 	default: (x) = __get_user_bad();				\
 	}								\
 } while (0)

From 363dad58e4a0f72dce0bf12d361d617239a80317 Mon Sep 17 00:00:00 2001
From: Stafford Horne <shorne@gmail.com>
Date: Tue, 14 Mar 2017 22:52:49 +0900
Subject: [PATCH 1289/1911] openrisc: Export symbols needed by modules

This was detected by allmodconfig, errors reported:

 ERROR: "empty_zero_page" [net/ceph/libceph.ko] undefined!
 ERROR: "__ucmpdi2" [lib/842/842_decompress.ko] undefined!
 ERROR: "empty_zero_page" [fs/nfs/objlayout/objlayoutdriver.ko] undefined!
 ERROR: "empty_zero_page" [fs/exofs/exofs.ko] undefined!
 ERROR: "empty_zero_page" [fs/crypto/fscrypto.ko] undefined!
 ERROR: "__ucmpdi2" [fs/btrfs/btrfs.ko] undefined!
 ERROR: "pm_power_off" [drivers/regulator/act8865-regulator.ko] undefined!
 ERROR: "__ucmpdi2" [drivers/media/i2c/adv7842.ko] undefined!
 ERROR: "__clear_user" [drivers/md/dm-mod.ko] undefined!
 ERROR: "__clear_user" [net/netfilter/x_tables.ko] undefined!

Signed-off-by: Stafford Horne <shorne@gmail.com>
---
 arch/openrisc/kernel/or32_ksyms.c | 4 ++++
 arch/openrisc/kernel/process.c    | 1 +
 2 files changed, 5 insertions(+)

diff --git a/arch/openrisc/kernel/or32_ksyms.c b/arch/openrisc/kernel/or32_ksyms.c
index 5c4695d13542fc..ee3e604959e15c 100644
--- a/arch/openrisc/kernel/or32_ksyms.c
+++ b/arch/openrisc/kernel/or32_ksyms.c
@@ -30,6 +30,7 @@
 #include <asm/hardirq.h>
 #include <asm/delay.h>
 #include <asm/pgalloc.h>
+#include <asm/pgtable.h>
 
 #define DECLARE_EXPORT(name) extern void name(void); EXPORT_SYMBOL(name)
 
@@ -42,6 +43,9 @@ DECLARE_EXPORT(__muldi3);
 DECLARE_EXPORT(__ashrdi3);
 DECLARE_EXPORT(__ashldi3);
 DECLARE_EXPORT(__lshrdi3);
+DECLARE_EXPORT(__ucmpdi2);
 
+EXPORT_SYMBOL(empty_zero_page);
 EXPORT_SYMBOL(__copy_tofrom_user);
+EXPORT_SYMBOL(__clear_user);
 EXPORT_SYMBOL(memset);
diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c
index 828a29110459e8..f8da545854f979 100644
--- a/arch/openrisc/kernel/process.c
+++ b/arch/openrisc/kernel/process.c
@@ -90,6 +90,7 @@ void arch_cpu_idle(void)
 }
 
 void (*pm_power_off) (void) = machine_power_off;
+EXPORT_SYMBOL(pm_power_off);
 
 /*
  * When a process does an "exec", machine state like FPU and debug

From e4c204ced0ac25e02e58679f07096c5bac0b0d96 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 14 Mar 2017 16:18:34 +0100
Subject: [PATCH 1290/1911] cpufreq: intel_pstate: Avoid percentages in
 limits-related computations

Currently, intel_pstate_update_perf_limits() first converts the
policy minimum and maximum limits into percentages of the maximum
turbo frequency (rounding up to an integer) and then converts these
percentages to fractions (by using fixed-point arithmetic to divide
them by 100).

That introduces a rounding error unnecessarily, because the fractions
can be obtained by carrying out fixed-point divisions directly on the
input numbers.

Rework the computations in intel_pstate_hwp_set() to use fractions
instead of percentages (and drop redundant local variables from
there) and modify intel_pstate_update_perf_limits() to compute the
fractions directly and percentages out of them.

While at it, introduce percent_ext_fp() for converting percentages
to fractions (with extended number of fraction bits) and use it in
the computations.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 56 +++++++++++++++++-----------------
 1 file changed, 28 insertions(+), 28 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index ee12641ee010c9..08e134ffba68e2 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -84,6 +84,11 @@ static inline u64 div_ext_fp(u64 x, u64 y)
 	return div64_u64(x << EXT_FRAC_BITS, y);
 }
 
+static inline int32_t percent_ext_fp(int percent)
+{
+	return div_ext_fp(percent, 100);
+}
+
 /**
  * struct sample -	Store performance sample
  * @core_avg_perf:	Ratio of APERF/MPERF which is the actual average
@@ -850,7 +855,6 @@ static void intel_pstate_hwp_set(struct cpufreq_policy *policy)
 	u64 value, cap;
 
 	for_each_cpu(cpu, policy->cpus) {
-		int max_perf_pct, min_perf_pct;
 		struct cpudata *cpu_data = all_cpu_data[cpu];
 		s16 epp;
 
@@ -864,16 +868,14 @@ static void intel_pstate_hwp_set(struct cpufreq_policy *policy)
 		else
 			hw_max = HWP_HIGHEST_PERF(cap);
 
-		max_perf_pct = perf_limits->max_perf_pct;
-		min_perf_pct = perf_limits->min_perf_pct;
-		min = hw_max * min_perf_pct / 100;
+		min = fp_ext_toint(hw_max * perf_limits->min_perf);
 
 		rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value);
 
 		value &= ~HWP_MIN_PERF(~0L);
 		value |= HWP_MIN_PERF(min);
 
-		max = hw_max * max_perf_pct / 100;
+		max = fp_ext_toint(hw_max * perf_limits->max_perf);
 		value &= ~HWP_MAX_PERF(~0L);
 		value |= HWP_MAX_PERF(max);
 
@@ -1223,7 +1225,7 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b,
 				   limits->max_perf_pct);
 	limits->max_perf_pct = max(limits->min_perf_pct,
 				   limits->max_perf_pct);
-	limits->max_perf = div_ext_fp(limits->max_perf_pct, 100);
+	limits->max_perf = percent_ext_fp(limits->max_perf_pct);
 
 	intel_pstate_update_policies();
 
@@ -1260,7 +1262,7 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,
 				   limits->min_perf_pct);
 	limits->min_perf_pct = min(limits->max_perf_pct,
 				   limits->min_perf_pct);
-	limits->min_perf = div_ext_fp(limits->min_perf_pct, 100);
+	limits->min_perf = percent_ext_fp(limits->min_perf_pct);
 
 	intel_pstate_update_policies();
 
@@ -2078,36 +2080,34 @@ static void intel_pstate_clear_update_util_hook(unsigned int cpu)
 static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy,
 					    struct perf_limits *limits)
 {
+	int32_t max_policy_perf, min_policy_perf;
 
-	limits->max_policy_pct = DIV_ROUND_UP(policy->max * 100,
-					      policy->cpuinfo.max_freq);
-	limits->max_policy_pct = clamp_t(int, limits->max_policy_pct, 0, 100);
+	max_policy_perf = div_ext_fp(policy->max, policy->cpuinfo.max_freq);
+	max_policy_perf = clamp_t(int32_t, max_policy_perf, 0, int_ext_tofp(1));
 	if (policy->max == policy->min) {
-		limits->min_policy_pct = limits->max_policy_pct;
+		min_policy_perf = max_policy_perf;
 	} else {
-		limits->min_policy_pct = DIV_ROUND_UP(policy->min * 100,
-						      policy->cpuinfo.max_freq);
-		limits->min_policy_pct = clamp_t(int, limits->min_policy_pct,
-						 0, 100);
+		min_policy_perf = div_ext_fp(policy->min,
+					     policy->cpuinfo.max_freq);
+		min_policy_perf = clamp_t(int32_t, min_policy_perf,
+					  0, max_policy_perf);
 	}
 
-	/* Normalize user input to [min_policy_pct, max_policy_pct] */
-	limits->min_perf_pct = max(limits->min_policy_pct,
-				   limits->min_sysfs_pct);
-	limits->min_perf_pct = min(limits->max_policy_pct,
-				   limits->min_perf_pct);
-	limits->max_perf_pct = min(limits->max_policy_pct,
-				   limits->max_sysfs_pct);
-	limits->max_perf_pct = max(limits->min_policy_pct,
-				   limits->max_perf_pct);
+	/* Normalize user input to [min_perf, max_perf] */
+	limits->min_perf = max(min_policy_perf,
+			       percent_ext_fp(limits->min_sysfs_pct));
+	limits->min_perf = min(limits->min_perf, max_policy_perf);
+	limits->max_perf = min(max_policy_perf,
+			       percent_ext_fp(limits->max_sysfs_pct));
+	limits->max_perf = max(min_policy_perf, limits->max_perf);
 
-	/* Make sure min_perf_pct <= max_perf_pct */
-	limits->min_perf_pct = min(limits->max_perf_pct, limits->min_perf_pct);
+	/* Make sure min_perf <= max_perf */
+	limits->min_perf = min(limits->min_perf, limits->max_perf);
 
-	limits->min_perf = div_ext_fp(limits->min_perf_pct, 100);
-	limits->max_perf = div_ext_fp(limits->max_perf_pct, 100);
 	limits->max_perf = round_up(limits->max_perf, EXT_FRAC_BITS);
 	limits->min_perf = round_up(limits->min_perf, EXT_FRAC_BITS);
+	limits->max_perf_pct = fp_ext_toint(limits->max_perf * 100);
+	limits->min_perf_pct = fp_ext_toint(limits->min_perf * 100);
 
 	pr_debug("cpu:%d max_perf_pct:%d min_perf_pct:%d\n", policy->cpu,
 		 limits->max_perf_pct, limits->min_perf_pct);

From 4494dbc6dec37817f2cc2aa7604039a9e87ada18 Mon Sep 17 00:00:00 2001
From: Liping Zhang <zlpnobody@gmail.com>
Date: Wed, 15 Mar 2017 22:22:08 +0800
Subject: [PATCH 1291/1911] netfilter: nft_ct: do cleanup work when
 NFTA_CT_DIRECTION is invalid

We should jump to invoke __nft_ct_set_destroy() instead of just
return error.

Fixes: edee4f1e9245 ("netfilter: nft_ct: add zone id set support")
Signed-off-by: Liping Zhang <zlpnobody@gmail.com>
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_ct.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 91585b5e53070d..0264258c46feb5 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -544,7 +544,8 @@ static int nft_ct_set_init(const struct nft_ctx *ctx,
 		case IP_CT_DIR_REPLY:
 			break;
 		default:
-			return -EINVAL;
+			err = -EINVAL;
+			goto err1;
 		}
 	}
 

From 1b53cf9815bb4744958d41f3795d5d5a1d365e2d Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Tue, 21 Feb 2017 15:07:11 -0800
Subject: [PATCH 1292/1911] fscrypt: remove broken support for detecting
 keyring key revocation

Filesystem encryption ostensibly supported revoking a keyring key that
had been used to "unlock" encrypted files, causing those files to become
"locked" again.  This was, however, buggy for several reasons, the most
severe of which was that when key revocation happened to be detected for
an inode, its fscrypt_info was immediately freed, even while other
threads could be using it for encryption or decryption concurrently.
This could be exploited to crash the kernel or worse.

This patch fixes the use-after-free by removing the code which detects
the keyring key having been revoked, invalidated, or expired.  Instead,
an encrypted inode that is "unlocked" now simply remains unlocked until
it is evicted from memory.  Note that this is no worse than the case for
block device-level encryption, e.g. dm-crypt, and it still remains
possible for a privileged user to evict unused pages, inodes, and
dentries by running 'sync; echo 3 > /proc/sys/vm/drop_caches', or by
simply unmounting the filesystem.  In fact, one of those actions was
already needed anyway for key revocation to work even somewhat sanely.
This change is not expected to break any applications.

In the future I'd like to implement a real API for fscrypt key
revocation that interacts sanely with ongoing filesystem operations ---
waiting for existing operations to complete and blocking new operations,
and invalidating and sanitizing key material and plaintext from the VFS
caches.  But this is a hard problem, and for now this bug must be fixed.

This bug affected almost all versions of ext4, f2fs, and ubifs
encryption, and it was potentially reachable in any kernel configured
with encryption support (CONFIG_EXT4_ENCRYPTION=y,
CONFIG_EXT4_FS_ENCRYPTION=y, CONFIG_F2FS_FS_ENCRYPTION=y, or
CONFIG_UBIFS_FS_ENCRYPTION=y).  Note that older kernels did not use the
shared fs/crypto/ code, but due to the potential security implications
of this bug, it may still be worthwhile to backport this fix to them.

Fixes: b7236e21d55f ("ext4 crypto: reorganize how we store keys in the inode")
Cc: stable@vger.kernel.org # v4.2+
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Acked-by: Michael Halcrow <mhalcrow@google.com>
---
 fs/crypto/crypto.c          | 10 +------
 fs/crypto/fname.c           |  2 +-
 fs/crypto/fscrypt_private.h |  4 ---
 fs/crypto/keyinfo.c         | 52 +++++++------------------------------
 4 files changed, 11 insertions(+), 57 deletions(-)

diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c
index 02a7a9286449d4..6d6eca394d4d41 100644
--- a/fs/crypto/crypto.c
+++ b/fs/crypto/crypto.c
@@ -327,7 +327,6 @@ EXPORT_SYMBOL(fscrypt_decrypt_page);
 static int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags)
 {
 	struct dentry *dir;
-	struct fscrypt_info *ci;
 	int dir_has_key, cached_with_key;
 
 	if (flags & LOOKUP_RCU)
@@ -339,18 +338,11 @@ static int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags)
 		return 0;
 	}
 
-	ci = d_inode(dir)->i_crypt_info;
-	if (ci && ci->ci_keyring_key &&
-	    (ci->ci_keyring_key->flags & ((1 << KEY_FLAG_INVALIDATED) |
-					  (1 << KEY_FLAG_REVOKED) |
-					  (1 << KEY_FLAG_DEAD))))
-		ci = NULL;
-
 	/* this should eventually be an flag in d_flags */
 	spin_lock(&dentry->d_lock);
 	cached_with_key = dentry->d_flags & DCACHE_ENCRYPTED_WITH_KEY;
 	spin_unlock(&dentry->d_lock);
-	dir_has_key = (ci != NULL);
+	dir_has_key = (d_inode(dir)->i_crypt_info != NULL);
 	dput(dir);
 
 	/*
diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c
index 13052b85c3930f..37b49894c76234 100644
--- a/fs/crypto/fname.c
+++ b/fs/crypto/fname.c
@@ -350,7 +350,7 @@ int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname,
 		fname->disk_name.len = iname->len;
 		return 0;
 	}
-	ret = fscrypt_get_crypt_info(dir);
+	ret = fscrypt_get_encryption_info(dir);
 	if (ret && ret != -EOPNOTSUPP)
 		return ret;
 
diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h
index fdbb8af32eafdb..e39696e644942a 100644
--- a/fs/crypto/fscrypt_private.h
+++ b/fs/crypto/fscrypt_private.h
@@ -67,7 +67,6 @@ struct fscrypt_info {
 	u8 ci_filename_mode;
 	u8 ci_flags;
 	struct crypto_skcipher *ci_ctfm;
-	struct key *ci_keyring_key;
 	u8 ci_master_key[FS_KEY_DESCRIPTOR_SIZE];
 };
 
@@ -101,7 +100,4 @@ extern int fscrypt_do_page_crypto(const struct inode *inode,
 extern struct page *fscrypt_alloc_bounce_page(struct fscrypt_ctx *ctx,
 					      gfp_t gfp_flags);
 
-/* keyinfo.c */
-extern int fscrypt_get_crypt_info(struct inode *);
-
 #endif /* _FSCRYPT_PRIVATE_H */
diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c
index 02eb6b9e44387d..cb3e82abf0347b 100644
--- a/fs/crypto/keyinfo.c
+++ b/fs/crypto/keyinfo.c
@@ -95,6 +95,7 @@ static int validate_user_key(struct fscrypt_info *crypt_info,
 	kfree(description);
 	if (IS_ERR(keyring_key))
 		return PTR_ERR(keyring_key);
+	down_read(&keyring_key->sem);
 
 	if (keyring_key->type != &key_type_logon) {
 		printk_once(KERN_WARNING
@@ -102,11 +103,9 @@ static int validate_user_key(struct fscrypt_info *crypt_info,
 		res = -ENOKEY;
 		goto out;
 	}
-	down_read(&keyring_key->sem);
 	ukp = user_key_payload(keyring_key);
 	if (ukp->datalen != sizeof(struct fscrypt_key)) {
 		res = -EINVAL;
-		up_read(&keyring_key->sem);
 		goto out;
 	}
 	master_key = (struct fscrypt_key *)ukp->data;
@@ -117,17 +116,11 @@ static int validate_user_key(struct fscrypt_info *crypt_info,
 				"%s: key size incorrect: %d\n",
 				__func__, master_key->size);
 		res = -ENOKEY;
-		up_read(&keyring_key->sem);
 		goto out;
 	}
 	res = derive_key_aes(ctx->nonce, master_key->raw, raw_key);
-	up_read(&keyring_key->sem);
-	if (res)
-		goto out;
-
-	crypt_info->ci_keyring_key = keyring_key;
-	return 0;
 out:
+	up_read(&keyring_key->sem);
 	key_put(keyring_key);
 	return res;
 }
@@ -169,12 +162,11 @@ static void put_crypt_info(struct fscrypt_info *ci)
 	if (!ci)
 		return;
 
-	key_put(ci->ci_keyring_key);
 	crypto_free_skcipher(ci->ci_ctfm);
 	kmem_cache_free(fscrypt_info_cachep, ci);
 }
 
-int fscrypt_get_crypt_info(struct inode *inode)
+int fscrypt_get_encryption_info(struct inode *inode)
 {
 	struct fscrypt_info *crypt_info;
 	struct fscrypt_context ctx;
@@ -184,21 +176,15 @@ int fscrypt_get_crypt_info(struct inode *inode)
 	u8 *raw_key = NULL;
 	int res;
 
+	if (inode->i_crypt_info)
+		return 0;
+
 	res = fscrypt_initialize(inode->i_sb->s_cop->flags);
 	if (res)
 		return res;
 
 	if (!inode->i_sb->s_cop->get_context)
 		return -EOPNOTSUPP;
-retry:
-	crypt_info = ACCESS_ONCE(inode->i_crypt_info);
-	if (crypt_info) {
-		if (!crypt_info->ci_keyring_key ||
-				key_validate(crypt_info->ci_keyring_key) == 0)
-			return 0;
-		fscrypt_put_encryption_info(inode, crypt_info);
-		goto retry;
-	}
 
 	res = inode->i_sb->s_cop->get_context(inode, &ctx, sizeof(ctx));
 	if (res < 0) {
@@ -229,7 +215,6 @@ int fscrypt_get_crypt_info(struct inode *inode)
 	crypt_info->ci_data_mode = ctx.contents_encryption_mode;
 	crypt_info->ci_filename_mode = ctx.filenames_encryption_mode;
 	crypt_info->ci_ctfm = NULL;
-	crypt_info->ci_keyring_key = NULL;
 	memcpy(crypt_info->ci_master_key, ctx.master_key_descriptor,
 				sizeof(crypt_info->ci_master_key));
 
@@ -273,14 +258,8 @@ int fscrypt_get_crypt_info(struct inode *inode)
 	if (res)
 		goto out;
 
-	kzfree(raw_key);
-	raw_key = NULL;
-	if (cmpxchg(&inode->i_crypt_info, NULL, crypt_info) != NULL) {
-		put_crypt_info(crypt_info);
-		goto retry;
-	}
-	return 0;
-
+	if (cmpxchg(&inode->i_crypt_info, NULL, crypt_info) == NULL)
+		crypt_info = NULL;
 out:
 	if (res == -ENOKEY)
 		res = 0;
@@ -288,6 +267,7 @@ int fscrypt_get_crypt_info(struct inode *inode)
 	kzfree(raw_key);
 	return res;
 }
+EXPORT_SYMBOL(fscrypt_get_encryption_info);
 
 void fscrypt_put_encryption_info(struct inode *inode, struct fscrypt_info *ci)
 {
@@ -305,17 +285,3 @@ void fscrypt_put_encryption_info(struct inode *inode, struct fscrypt_info *ci)
 	put_crypt_info(ci);
 }
 EXPORT_SYMBOL(fscrypt_put_encryption_info);
-
-int fscrypt_get_encryption_info(struct inode *inode)
-{
-	struct fscrypt_info *ci = inode->i_crypt_info;
-
-	if (!ci ||
-		(ci->ci_keyring_key &&
-		 (ci->ci_keyring_key->flags & ((1 << KEY_FLAG_INVALIDATED) |
-					       (1 << KEY_FLAG_REVOKED) |
-					       (1 << KEY_FLAG_DEAD)))))
-		return fscrypt_get_crypt_info(inode);
-	return 0;
-}
-EXPORT_SYMBOL(fscrypt_get_encryption_info);

From 62a10498afb27370ec6018e9d802b74850fd8d9a Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Tue, 14 Mar 2017 09:34:49 +0900
Subject: [PATCH 1293/1911] ASoC: rcar: clear DE bit only in PDMACHCR when it
 stops

R-Car datasheet indicates "Clear DE in PDMACHCR" for transfer stop,
but current code clears all bits in PDMACHCR.
Because of this, DE bit might never been cleared,
and it causes CMD overflow. This patch fixes this issue.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Tested-by: Hiroyuki Yokoyama <hiroyuki.yokoyama.vx@renesas.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/sh/rcar/dma.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/sound/soc/sh/rcar/dma.c b/sound/soc/sh/rcar/dma.c
index 1f405c83386759..c2e199b4fcf459 100644
--- a/sound/soc/sh/rcar/dma.c
+++ b/sound/soc/sh/rcar/dma.c
@@ -454,6 +454,20 @@ static u32 rsnd_dmapp_read(struct rsnd_dma *dma, u32 reg)
 	return ioread32(rsnd_dmapp_addr(dmac, dma, reg));
 }
 
+static void rsnd_dmapp_bset(struct rsnd_dma *dma, u32 data, u32 mask, u32 reg)
+{
+	struct rsnd_mod *mod = rsnd_mod_get(dma);
+	struct rsnd_priv *priv = rsnd_mod_to_priv(mod);
+	struct rsnd_dma_ctrl *dmac = rsnd_priv_to_dmac(priv);
+	volatile void __iomem *addr = rsnd_dmapp_addr(dmac, dma, reg);
+	u32 val = ioread32(addr);
+
+	val &= ~mask;
+	val |= (data & mask);
+
+	iowrite32(val, addr);
+}
+
 static int rsnd_dmapp_stop(struct rsnd_mod *mod,
 			   struct rsnd_dai_stream *io,
 			   struct rsnd_priv *priv)
@@ -461,10 +475,10 @@ static int rsnd_dmapp_stop(struct rsnd_mod *mod,
 	struct rsnd_dma *dma = rsnd_mod_to_dma(mod);
 	int i;
 
-	rsnd_dmapp_write(dma, 0, PDMACHCR);
+	rsnd_dmapp_bset(dma, 0,  PDMACHCR_DE, PDMACHCR);
 
 	for (i = 0; i < 1024; i++) {
-		if (0 == rsnd_dmapp_read(dma, PDMACHCR))
+		if (0 == (rsnd_dmapp_read(dma, PDMACHCR) & PDMACHCR_DE))
 			return 0;
 		udelay(1);
 	}

From 85b29008d8af6d94a0723aaa8d93cfb6e041158b Mon Sep 17 00:00:00 2001
From: Don Brace <don.brace@microsemi.com>
Date: Fri, 10 Mar 2017 14:35:11 -0600
Subject: [PATCH 1294/1911] scsi: hpsa: update check for logical volume status

 - Add in a new case for volume offline. Resolves internal testing bug
   for multilun array management.
 - Return correct status for failed TURs.

Reviewed-by: Scott Benesh <scott.benesh@microsemi.com>
Reviewed-by: Scott Teel <scott.teel@microsemi.com>
Signed-off-by: Don Brace <don.brace@microsemi.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/hpsa.c     | 35 ++++++++++++++++-------------------
 drivers/scsi/hpsa_cmd.h |  2 ++
 2 files changed, 18 insertions(+), 19 deletions(-)

diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index 524a0c755ed7e7..90b76c4c6d36d6 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -3714,7 +3714,7 @@ static int hpsa_get_volume_status(struct ctlr_info *h,
  *  # (integer code indicating one of several NOT READY states
  *     describing why a volume is to be kept offline)
  */
-static int hpsa_volume_offline(struct ctlr_info *h,
+static unsigned char hpsa_volume_offline(struct ctlr_info *h,
 					unsigned char scsi3addr[])
 {
 	struct CommandList *c;
@@ -3735,7 +3735,7 @@ static int hpsa_volume_offline(struct ctlr_info *h,
 					DEFAULT_TIMEOUT);
 	if (rc) {
 		cmd_free(h, c);
-		return 0;
+		return HPSA_VPD_LV_STATUS_UNSUPPORTED;
 	}
 	sense = c->err_info->SenseInfo;
 	if (c->err_info->SenseLen > sizeof(c->err_info->SenseInfo))
@@ -3746,19 +3746,13 @@ static int hpsa_volume_offline(struct ctlr_info *h,
 	cmd_status = c->err_info->CommandStatus;
 	scsi_status = c->err_info->ScsiStatus;
 	cmd_free(h, c);
-	/* Is the volume 'not ready'? */
-	if (cmd_status != CMD_TARGET_STATUS ||
-		scsi_status != SAM_STAT_CHECK_CONDITION ||
-		sense_key != NOT_READY ||
-		asc != ASC_LUN_NOT_READY)  {
-		return 0;
-	}
 
 	/* Determine the reason for not ready state */
 	ldstat = hpsa_get_volume_status(h, scsi3addr);
 
 	/* Keep volume offline in certain cases: */
 	switch (ldstat) {
+	case HPSA_LV_FAILED:
 	case HPSA_LV_UNDERGOING_ERASE:
 	case HPSA_LV_NOT_AVAILABLE:
 	case HPSA_LV_UNDERGOING_RPI:
@@ -3780,7 +3774,7 @@ static int hpsa_volume_offline(struct ctlr_info *h,
 	default:
 		break;
 	}
-	return 0;
+	return HPSA_LV_OK;
 }
 
 /*
@@ -3853,10 +3847,10 @@ static int hpsa_update_device_info(struct ctlr_info *h,
 	/* Do an inquiry to the device to see what it is. */
 	if (hpsa_scsi_do_inquiry(h, scsi3addr, 0, inq_buff,
 		(unsigned char) OBDR_TAPE_INQ_SIZE) != 0) {
-		/* Inquiry failed (msg printed already) */
 		dev_err(&h->pdev->dev,
-			"hpsa_update_device_info: inquiry failed\n");
-		rc = -EIO;
+			"%s: inquiry failed, device will be skipped.\n",
+			__func__);
+		rc = HPSA_INQUIRY_FAILED;
 		goto bail_out;
 	}
 
@@ -3885,15 +3879,19 @@ static int hpsa_update_device_info(struct ctlr_info *h,
 	if ((this_device->devtype == TYPE_DISK ||
 		this_device->devtype == TYPE_ZBC) &&
 		is_logical_dev_addr_mode(scsi3addr)) {
-		int volume_offline;
+		unsigned char volume_offline;
 
 		hpsa_get_raid_level(h, scsi3addr, &this_device->raid_level);
 		if (h->fw_support & MISC_FW_RAID_OFFLOAD_BASIC)
 			hpsa_get_ioaccel_status(h, scsi3addr, this_device);
 		volume_offline = hpsa_volume_offline(h, scsi3addr);
-		if (volume_offline < 0 || volume_offline > 0xff)
-			volume_offline = HPSA_VPD_LV_STATUS_UNSUPPORTED;
-		this_device->volume_offline = volume_offline & 0xff;
+		if (volume_offline == HPSA_LV_FAILED) {
+			rc = HPSA_LV_FAILED;
+			dev_err(&h->pdev->dev,
+				"%s: LV failed, device will be skipped.\n",
+				__func__);
+			goto bail_out;
+		}
 	} else {
 		this_device->raid_level = RAID_UNKNOWN;
 		this_device->offload_config = 0;
@@ -4379,8 +4377,7 @@ static void hpsa_update_scsi_devices(struct ctlr_info *h)
 			goto out;
 		}
 		if (rc) {
-			dev_warn(&h->pdev->dev,
-				"Inquiry failed, skipping device.\n");
+			h->drv_req_rescan = 1;
 			continue;
 		}
 
diff --git a/drivers/scsi/hpsa_cmd.h b/drivers/scsi/hpsa_cmd.h
index a584cdf0705846..5961705eef7675 100644
--- a/drivers/scsi/hpsa_cmd.h
+++ b/drivers/scsi/hpsa_cmd.h
@@ -156,6 +156,7 @@
 #define CFGTBL_BusType_Fibre2G  0x00000200l
 
 /* VPD Inquiry types */
+#define HPSA_INQUIRY_FAILED		0x02
 #define HPSA_VPD_SUPPORTED_PAGES        0x00
 #define HPSA_VPD_LV_DEVICE_ID           0x83
 #define HPSA_VPD_LV_DEVICE_GEOMETRY     0xC1
@@ -166,6 +167,7 @@
 /* Logical volume states */
 #define HPSA_VPD_LV_STATUS_UNSUPPORTED			0xff
 #define HPSA_LV_OK                                      0x0
+#define HPSA_LV_FAILED					0x01
 #define HPSA_LV_NOT_AVAILABLE				0x0b
 #define HPSA_LV_UNDERGOING_ERASE			0x0F
 #define HPSA_LV_UNDERGOING_RPI				0x12

From 87b9e6aa87d9411f1059aa245c0c79976bc557ac Mon Sep 17 00:00:00 2001
From: Don Brace <don.brace@microsemi.com>
Date: Fri, 10 Mar 2017 14:35:17 -0600
Subject: [PATCH 1295/1911] scsi: hpsa: limit outstanding rescans

Avoid rescan storms. No need to queue another if one is pending.

Reviewed-by: Scott Benesh <scott.benesh@microsemi.com>
Reviewed-by: Scott Teel <scott.teel@microsemi.com>
Reviewed-by: Tomas Henzl <thenzl@redhat.com>
Signed-off-by: Don Brace <don.brace@microsemi.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/hpsa.c | 16 +++++++++++++++-
 drivers/scsi/hpsa.h |  1 +
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index 90b76c4c6d36d6..0a8ac68f4ca117 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -5555,7 +5555,7 @@ static void hpsa_scan_complete(struct ctlr_info *h)
 
 	spin_lock_irqsave(&h->scan_lock, flags);
 	h->scan_finished = 1;
-	wake_up_all(&h->scan_wait_queue);
+	wake_up(&h->scan_wait_queue);
 	spin_unlock_irqrestore(&h->scan_lock, flags);
 }
 
@@ -5573,11 +5573,23 @@ static void hpsa_scan_start(struct Scsi_Host *sh)
 	if (unlikely(lockup_detected(h)))
 		return hpsa_scan_complete(h);
 
+	/*
+	 * If a scan is already waiting to run, no need to add another
+	 */
+	spin_lock_irqsave(&h->scan_lock, flags);
+	if (h->scan_waiting) {
+		spin_unlock_irqrestore(&h->scan_lock, flags);
+		return;
+	}
+
+	spin_unlock_irqrestore(&h->scan_lock, flags);
+
 	/* wait until any scan already in progress is finished. */
 	while (1) {
 		spin_lock_irqsave(&h->scan_lock, flags);
 		if (h->scan_finished)
 			break;
+		h->scan_waiting = 1;
 		spin_unlock_irqrestore(&h->scan_lock, flags);
 		wait_event(h->scan_wait_queue, h->scan_finished);
 		/* Note: We don't need to worry about a race between this
@@ -5587,6 +5599,7 @@ static void hpsa_scan_start(struct Scsi_Host *sh)
 		 */
 	}
 	h->scan_finished = 0; /* mark scan as in progress */
+	h->scan_waiting = 0;
 	spin_unlock_irqrestore(&h->scan_lock, flags);
 
 	if (unlikely(lockup_detected(h)))
@@ -8789,6 +8802,7 @@ static int hpsa_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	init_waitqueue_head(&h->event_sync_wait_queue);
 	mutex_init(&h->reset_mutex);
 	h->scan_finished = 1; /* no scan currently in progress */
+	h->scan_waiting = 0;
 
 	pci_set_drvdata(pdev, h);
 	h->ndevices = 0;
diff --git a/drivers/scsi/hpsa.h b/drivers/scsi/hpsa.h
index bf6cdc1066544f..6f04f2ad412530 100644
--- a/drivers/scsi/hpsa.h
+++ b/drivers/scsi/hpsa.h
@@ -201,6 +201,7 @@ struct ctlr_info {
 	dma_addr_t		errinfo_pool_dhandle;
 	unsigned long  		*cmd_pool_bits;
 	int			scan_finished;
+	u8			scan_waiting : 1;
 	spinlock_t		scan_lock;
 	wait_queue_head_t	scan_wait_queue;
 

From 2ef2884980873081a4edae92f9d88dd580c85f6e Mon Sep 17 00:00:00 2001
From: Don Brace <don.brace@microsemi.com>
Date: Fri, 10 Mar 2017 14:35:23 -0600
Subject: [PATCH 1296/1911] scsi: hpsa: do not timeout reset operations

Resets can take longer than DEFAULT_TIMEOUT.

Reviewed-by: Scott Benesh <scott.benesh@microsemi.com>
Reviewed-by: Scott Teel <scott.teel@microsemi.com>
Reviewed-by: Tomas Henzl <thenzl@redhat.com>
Signed-off-by: Don Brace <don.brace@microsemi.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/hpsa.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index 0a8ac68f4ca117..0d0be7754a6531 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -2956,7 +2956,7 @@ static int hpsa_send_reset(struct ctlr_info *h, unsigned char *scsi3addr,
 	/* fill_cmd can't fail here, no data buffer to map. */
 	(void) fill_cmd(c, reset_type, h, NULL, 0, 0,
 			scsi3addr, TYPE_MSG);
-	rc = hpsa_scsi_do_simple_cmd(h, c, reply_queue, DEFAULT_TIMEOUT);
+	rc = hpsa_scsi_do_simple_cmd(h, c, reply_queue, NO_TIMEOUT);
 	if (rc) {
 		dev_warn(&h->pdev->dev, "Failed to send reset command\n");
 		goto out;

From 949d7fa158b2b1af533bdb1af0dda8ab103ac58d Mon Sep 17 00:00:00 2001
From: Tomas Winkler <tomas.winkler@intel.com>
Date: Sun, 12 Mar 2017 12:22:02 +0200
Subject: [PATCH 1297/1911] scsi: ufs: don't check unsigned type for a negative
 value

Fix compilation warning:

drivers/scsi/ufs/ufshcd.c:7645:13: warning: comparison of unsigned
expression < 0 is always false [-Wtype-limits]
if ((value < UFS_PM_LVL_0) || (value >= UFS_PM_LVL_MAX))

Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Reviewed-by: Subhash Jadavani <subhashj@codeaurora.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/ufs/ufshcd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 1359913bf840ce..e8c26e6e623726 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -7642,7 +7642,7 @@ static inline ssize_t ufshcd_pm_lvl_store(struct device *dev,
 	if (kstrtoul(buf, 0, &value))
 		return -EINVAL;
 
-	if ((value < UFS_PM_LVL_0) || (value >= UFS_PM_LVL_MAX))
+	if (value >= UFS_PM_LVL_MAX)
 		return -EINVAL;
 
 	spin_lock_irqsave(hba->host->host_lock, flags);

From 7d7080335f8d93a51e8238b6e85be8af4ba452b6 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Wed, 8 Mar 2017 14:36:01 -0800
Subject: [PATCH 1298/1911] scsi: lpfc: Finalize Kconfig options for nvme

Reviewing the result of what was just added for Kconfig, we made a poor
choice. It worked well for full kernel builds, but not so much for how
it would be deployed on a distro.

Here's the final result:
- lpfc will compile in NVME initiator and/or NVME target support based
  on whether the kernel has the corresponding subsystem support.
  Kconfig is not used to drive this specifically for lpfc.
- There is a module parameter, lpfc_enable_fc4_type, that indicates
  whether the ports will do FCP-only or FCP & NVME (NVME-only not yet
  possible due to dependency on fc transport). As FCP & NVME divvys up
  exchange resources, and given NVME will not be often initially, the
  default is changed to FCP only.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/Kconfig           | 14 --------------
 drivers/scsi/lpfc/lpfc_attr.c  |  4 ++--
 drivers/scsi/lpfc/lpfc_init.c  |  7 +++++++
 drivers/scsi/lpfc/lpfc_nvme.c  |  8 ++++----
 drivers/scsi/lpfc/lpfc_nvmet.c |  8 ++++----
 5 files changed, 17 insertions(+), 24 deletions(-)

diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 4bf55b5d78be53..3c52867dfe28e3 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -1253,20 +1253,6 @@ config SCSI_LPFC_DEBUG_FS
 	  This makes debugging information from the lpfc driver
 	  available via the debugfs filesystem.
 
-config LPFC_NVME_INITIATOR
-	bool "Emulex LightPulse Fibre Channel NVME Initiator Support"
-	depends on SCSI_LPFC && NVME_FC
-	---help---
-	  This enables NVME Initiator support in the Emulex lpfc driver.
-
-config LPFC_NVME_TARGET
-	bool "Emulex LightPulse Fibre Channel NVME Initiator Support"
-	depends on SCSI_LPFC && NVME_TARGET_FC
-	---help---
-	  This enables NVME Target support in the Emulex lpfc driver.
-	  Target enablement must still be enabled on a per adapter
-	  basis by module parameters.
-
 config SCSI_SIM710
 	tristate "Simple 53c710 SCSI support (Compaq, NCR machines)"
 	depends on (EISA || MCA) && SCSI
diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index fbd3a563be53f5..84aa62f1a4de73 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -3315,9 +3315,9 @@ LPFC_ATTR_R(nvmet_mrq_post, LPFC_DEF_MRQ_POST,
  * lpfc_enable_fc4_type: Defines what FC4 types are supported.
  * Supported Values:  1 - register just FCP
  *                    3 - register both FCP and NVME
- * Supported values are [1,3]. Default value is 3
+ * Supported values are [1,3]. Default value is 1
  */
-LPFC_ATTR_R(enable_fc4_type, LPFC_ENABLE_BOTH,
+LPFC_ATTR_R(enable_fc4_type, LPFC_ENABLE_FCP,
 	    LPFC_ENABLE_FCP, LPFC_ENABLE_BOTH,
 	    "Define fc4 type to register with fabric.");
 
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 2697d49da4d776..6cc561b042118e 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -5891,10 +5891,17 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 		/* Check to see if it matches any module parameter */
 		for (i = 0; i < lpfc_enable_nvmet_cnt; i++) {
 			if (wwn == lpfc_enable_nvmet[i]) {
+#if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
 				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 						"6017 NVME Target %016llx\n",
 						wwn);
 				phba->nvmet_support = 1; /* a match */
+#else
+				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+						"6021 Can't enable NVME Target."
+						" NVME_TARGET_FC infrastructure"
+						" is not in kernel\n");
+#endif
 			}
 		}
 	}
diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c
index 0a4c1908140940..0024de1c6c1fea 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.c
+++ b/drivers/scsi/lpfc/lpfc_nvme.c
@@ -2149,7 +2149,7 @@ lpfc_nvme_create_localport(struct lpfc_vport *vport)
 	/* localport is allocated from the stack, but the registration
 	 * call allocates heap memory as well as the private area.
 	 */
-#ifdef CONFIG_LPFC_NVME_INITIATOR
+#if (IS_ENABLED(CONFIG_NVME_FC))
 	ret = nvme_fc_register_localport(&nfcp_info, &lpfc_nvme_template,
 					 &vport->phba->pcidev->dev, &localport);
 #else
@@ -2190,7 +2190,7 @@ lpfc_nvme_create_localport(struct lpfc_vport *vport)
 void
 lpfc_nvme_destroy_localport(struct lpfc_vport *vport)
 {
-#ifdef CONFIG_LPFC_NVME_INITIATOR
+#if (IS_ENABLED(CONFIG_NVME_FC))
 	struct nvme_fc_local_port *localport;
 	struct lpfc_nvme_lport *lport;
 	struct lpfc_nvme_rport *rport = NULL, *rport_next = NULL;
@@ -2274,7 +2274,7 @@ lpfc_nvme_update_localport(struct lpfc_vport *vport)
 int
 lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 {
-#ifdef CONFIG_LPFC_NVME_INITIATOR
+#if (IS_ENABLED(CONFIG_NVME_FC))
 	int ret = 0;
 	struct nvme_fc_local_port *localport;
 	struct lpfc_nvme_lport *lport;
@@ -2403,7 +2403,7 @@ lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 void
 lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 {
-#ifdef CONFIG_LPFC_NVME_INITIATOR
+#if (IS_ENABLED(CONFIG_NVME_FC))
 	int ret;
 	struct nvme_fc_local_port *localport;
 	struct lpfc_nvme_lport *lport;
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c
index b7739a554fe005..7ca868f394da62 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.c
+++ b/drivers/scsi/lpfc/lpfc_nvmet.c
@@ -671,7 +671,7 @@ lpfc_nvmet_create_targetport(struct lpfc_hba *phba)
 	lpfc_tgttemplate.target_features = NVMET_FCTGTFEAT_READDATA_RSP |
 					   NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED;
 
-#ifdef CONFIG_LPFC_NVME_TARGET
+#if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
 	error = nvmet_fc_register_targetport(&pinfo, &lpfc_tgttemplate,
 					     &phba->pcidev->dev,
 					     &phba->targetport);
@@ -756,7 +756,7 @@ lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba,
 void
 lpfc_nvmet_destroy_targetport(struct lpfc_hba *phba)
 {
-#ifdef CONFIG_LPFC_NVME_TARGET
+#if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
 	struct lpfc_nvmet_tgtport *tgtp;
 
 	if (phba->nvmet_support == 0)
@@ -788,7 +788,7 @@ static void
 lpfc_nvmet_unsol_ls_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 			   struct hbq_dmabuf *nvmebuf)
 {
-#ifdef CONFIG_LPFC_NVME_TARGET
+#if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
 	struct lpfc_nvmet_tgtport *tgtp;
 	struct fc_frame_header *fc_hdr;
 	struct lpfc_nvmet_rcv_ctx *ctxp;
@@ -891,7 +891,7 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
 			    struct rqb_dmabuf *nvmebuf,
 			    uint64_t isr_timestamp)
 {
-#ifdef CONFIG_LPFC_NVME_TARGET
+#if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
 	struct lpfc_nvmet_rcv_ctx *ctxp;
 	struct lpfc_nvmet_tgtport *tgtp;
 	struct fc_frame_header *fc_hdr;

From 94840e3c802daa1a62985957f36ac48faf8ceedd Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Wed, 22 Feb 2017 13:25:14 -0800
Subject: [PATCH 1299/1911] fscrypt: eliminate ->prepare_context() operation

The only use of the ->prepare_context() fscrypt operation was to allow
ext4 to evict inline data from the inode before ->set_context().
However, there is no reason why this cannot be done as simply the first
step in ->set_context(), and in fact it makes more sense to do it that
way because then the policy modes and flags get validated before any
real work is done.  Therefore, merge ext4_prepare_context() into
ext4_set_context(), and remove ->prepare_context().

Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/crypto/policy.c             |  7 -------
 fs/ext4/super.c                | 10 ++++------
 include/linux/fscrypt_common.h |  1 -
 3 files changed, 4 insertions(+), 14 deletions(-)

diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c
index 14b76da7126948..4908906d54d562 100644
--- a/fs/crypto/policy.c
+++ b/fs/crypto/policy.c
@@ -33,17 +33,10 @@ static int create_encryption_context_from_policy(struct inode *inode,
 				const struct fscrypt_policy *policy)
 {
 	struct fscrypt_context ctx;
-	int res;
 
 	if (!inode->i_sb->s_cop->set_context)
 		return -EOPNOTSUPP;
 
-	if (inode->i_sb->s_cop->prepare_context) {
-		res = inode->i_sb->s_cop->prepare_context(inode);
-		if (res)
-			return res;
-	}
-
 	ctx.format = FS_ENCRYPTION_CONTEXT_FORMAT_V1;
 	memcpy(ctx.master_key_descriptor, policy->master_key_descriptor,
 					FS_KEY_DESCRIPTOR_SIZE);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 2e03a0a88d92f7..a9448db1cf7e87 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1120,17 +1120,16 @@ static int ext4_get_context(struct inode *inode, void *ctx, size_t len)
 				 EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, ctx, len);
 }
 
-static int ext4_prepare_context(struct inode *inode)
-{
-	return ext4_convert_inline_data(inode);
-}
-
 static int ext4_set_context(struct inode *inode, const void *ctx, size_t len,
 							void *fs_data)
 {
 	handle_t *handle = fs_data;
 	int res, res2, retries = 0;
 
+	res = ext4_convert_inline_data(inode);
+	if (res)
+		return res;
+
 	/*
 	 * If a journal handle was specified, then the encryption context is
 	 * being set on a new inode via inheritance and is part of a larger
@@ -1196,7 +1195,6 @@ static unsigned ext4_max_namelen(struct inode *inode)
 static const struct fscrypt_operations ext4_cryptops = {
 	.key_prefix		= "ext4:",
 	.get_context		= ext4_get_context,
-	.prepare_context	= ext4_prepare_context,
 	.set_context		= ext4_set_context,
 	.dummy_context		= ext4_dummy_context,
 	.is_encrypted		= ext4_encrypted_inode,
diff --git a/include/linux/fscrypt_common.h b/include/linux/fscrypt_common.h
index 547f81592ba134..10c1abfbac6c45 100644
--- a/include/linux/fscrypt_common.h
+++ b/include/linux/fscrypt_common.h
@@ -87,7 +87,6 @@ struct fscrypt_operations {
 	unsigned int flags;
 	const char *key_prefix;
 	int (*get_context)(struct inode *, void *, size_t);
-	int (*prepare_context)(struct inode *);
 	int (*set_context)(struct inode *, const void *, size_t, void *);
 	int (*dummy_context)(struct inode *);
 	bool (*is_encrypted)(struct inode *);

From b9cf625d6ecde0d372e23ae022feead72b4228a6 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Wed, 15 Mar 2017 14:52:02 -0400
Subject: [PATCH 1300/1911] ext4: mark inode dirty after converting inline
 directory

If ext4_convert_inline_data() was called on a directory with inline
data, the filesystem was left in an inconsistent state (as considered by
e2fsck) because the file size was not increased to cover the new block.
This happened because the inode was not marked dirty after i_disksize
was updated.  Fix this by marking the inode dirty at the end of
ext4_finish_convert_inline_dir().

This bug was probably not noticed before because most users mark the
inode dirty afterwards for other reasons.  But if userspace executed
FS_IOC_SET_ENCRYPTION_POLICY with invalid parameters, as exercised by
'kvm-xfstests -c adv generic/396', then the inode was never marked dirty
after updating i_disksize.

Cc: stable@vger.kernel.org  # 3.10+
Fixes: 3c47d54170b6a678875566b1b8d6dcf57904e49b
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/inline.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 30a9f210d1e32c..375fb1c05d49ce 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -1169,10 +1169,9 @@ static int ext4_finish_convert_inline_dir(handle_t *handle,
 	set_buffer_uptodate(dir_block);
 	err = ext4_handle_dirty_dirent_node(handle, inode, dir_block);
 	if (err)
-		goto out;
+		return err;
 	set_buffer_verified(dir_block);
-out:
-	return err;
+	return ext4_mark_inode_dirty(handle, inode);
 }
 
 static int ext4_convert_inline_data_nolock(handle_t *handle,

From cd9cb405e0b948363811dc74dbb2890f56f2cb87 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Wed, 15 Mar 2017 15:08:48 -0400
Subject: [PATCH 1301/1911] jbd2: don't leak memory if setting up journal fails

In journal_init_common(), if we failed to allocate the j_wbuf array, or
if we failed to create the buffer_head for the journal superblock, we
leaked the memory allocated for the revocation tables.  Fix this.

Cc: stable@vger.kernel.org # 4.9
Fixes: f0c9fd5458bacf7b12a9a579a727dc740cbe047e
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Jan Kara <jack@suse.cz>
---
 fs/jbd2/journal.c | 22 +++++++++++-----------
 fs/jbd2/revoke.c  |  1 +
 2 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index a1a359bfcc9cd4..5adc2fb62b0fab 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1125,10 +1125,8 @@ static journal_t *journal_init_common(struct block_device *bdev,
 
 	/* Set up a default-sized revoke table for the new mount. */
 	err = jbd2_journal_init_revoke(journal, JOURNAL_REVOKE_DEFAULT_HASH);
-	if (err) {
-		kfree(journal);
-		return NULL;
-	}
+	if (err)
+		goto err_cleanup;
 
 	spin_lock_init(&journal->j_history_lock);
 
@@ -1145,23 +1143,25 @@ static journal_t *journal_init_common(struct block_device *bdev,
 	journal->j_wbufsize = n;
 	journal->j_wbuf = kmalloc_array(n, sizeof(struct buffer_head *),
 					GFP_KERNEL);
-	if (!journal->j_wbuf) {
-		kfree(journal);
-		return NULL;
-	}
+	if (!journal->j_wbuf)
+		goto err_cleanup;
 
 	bh = getblk_unmovable(journal->j_dev, start, journal->j_blocksize);
 	if (!bh) {
 		pr_err("%s: Cannot get buffer for journal superblock\n",
 			__func__);
-		kfree(journal->j_wbuf);
-		kfree(journal);
-		return NULL;
+		goto err_cleanup;
 	}
 	journal->j_sb_buffer = bh;
 	journal->j_superblock = (journal_superblock_t *)bh->b_data;
 
 	return journal;
+
+err_cleanup:
+	kfree(journal->j_wbuf);
+	jbd2_journal_destroy_revoke(journal);
+	kfree(journal);
+	return NULL;
 }
 
 /* jbd2_journal_init_dev and jbd2_journal_init_inode:
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index cfc38b5521189f..f9aefcda585418 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -280,6 +280,7 @@ int jbd2_journal_init_revoke(journal_t *journal, int hash_size)
 
 fail1:
 	jbd2_journal_destroy_revoke_table(journal->j_revoke_table[0]);
+	journal->j_revoke_table[0] = NULL;
 fail0:
 	return -ENOMEM;
 }

From fe8daf5fa715f7214952f06a387e4b7de818c5be Mon Sep 17 00:00:00 2001
From: Taku Izumi <izumi.taku@jp.fujitsu.com>
Date: Wed, 15 Mar 2017 13:47:50 +0900
Subject: [PATCH 1302/1911] fjes: Fix wrong netdevice feature flags

This patch fixes netdev->features for Extended Socket network device.

Currently Extended Socket network device's netdev->feature claims
NETIF_F_HW_CSUM, however this is completely wrong. There's no feature
of checksum offloading.
That causes invalid TCP/UDP checksum and packet rejection when IP
forwarding from Extended Socket network device to other network device.

NETIF_F_HW_CSUM should be omitted.

Signed-off-by: Taku Izumi <izumi.taku@jp.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/fjes/fjes_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/fjes/fjes_main.c b/drivers/net/fjes/fjes_main.c
index b75d9cdcfb0c41..c4b3c4b77a9c1b 100644
--- a/drivers/net/fjes/fjes_main.c
+++ b/drivers/net/fjes/fjes_main.c
@@ -1316,7 +1316,7 @@ static void fjes_netdev_setup(struct net_device *netdev)
 	netdev->min_mtu = fjes_support_mtu[0];
 	netdev->max_mtu = fjes_support_mtu[3];
 	netdev->flags |= IFF_BROADCAST;
-	netdev->features |= NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_CTAG_FILTER;
+	netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
 }
 
 static void fjes_irq_watch_task(struct work_struct *work)

From e83bb3e6f3efa21f4a9d883a25d0ecd9dfb431e1 Mon Sep 17 00:00:00 2001
From: Michael Engl <michael.engl@wjw-solutions.com>
Date: Tue, 3 Oct 2017 13:57:00 +0100
Subject: [PATCH 1303/1911] iio: adc: ti_am335x_adc: fix fifo overrun recovery

The tiadc_irq_h(int irq, void *private) function is handling FIFO
overruns by clearing flags, disabling and enabling the ADC to
recover.

If the ADC is running in continuous mode a FIFO overrun happens
regularly. If the disabling of the ADC happens concurrently with
a new conversion. It might happen that the enabling of the ADC
is ignored by the hardware. This stops the ADC permanently. No
more interrupts are triggered.

According to the AM335x Reference Manual (SPRUH73H October 2011 -
Revised April 2013 - Chapter 12.4 and 12.5) it is necessary to
check the ADC FSM bits in REG_ADCFSM before enabling the ADC
again. Because the disabling of the ADC is done right after the
current conversion has been finished.

To trigger this bug it is necessary to run the ADC in continuous
mode. The ADC values of all channels need to be read in an endless
loop. The bug appears within the first 6 hours (~5.4 million
handled FIFO overruns). The user space application will hang on
reading new values from the character device.

Fixes: ca9a563805f7a ("iio: ti_am335x_adc: Add continuous sampling
support")
Signed-off-by: Michael Engl <michael.engl@wjw-solutions.com>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/adc/ti_am335x_adc.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/iio/adc/ti_am335x_adc.c b/drivers/iio/adc/ti_am335x_adc.c
index ad9dec30bb304f..4282ceca3d8f9f 100644
--- a/drivers/iio/adc/ti_am335x_adc.c
+++ b/drivers/iio/adc/ti_am335x_adc.c
@@ -169,7 +169,9 @@ static irqreturn_t tiadc_irq_h(int irq, void *private)
 {
 	struct iio_dev *indio_dev = private;
 	struct tiadc_device *adc_dev = iio_priv(indio_dev);
-	unsigned int status, config;
+	unsigned int status, config, adc_fsm;
+	unsigned short count = 0;
+
 	status = tiadc_readl(adc_dev, REG_IRQSTATUS);
 
 	/*
@@ -183,6 +185,15 @@ static irqreturn_t tiadc_irq_h(int irq, void *private)
 		tiadc_writel(adc_dev, REG_CTRL, config);
 		tiadc_writel(adc_dev, REG_IRQSTATUS, IRQENB_FIFO1OVRRUN
 				| IRQENB_FIFO1UNDRFLW | IRQENB_FIFO1THRES);
+
+		/* wait for idle state.
+		 * ADC needs to finish the current conversion
+		 * before disabling the module
+		 */
+		do {
+			adc_fsm = tiadc_readl(adc_dev, REG_ADCFSM);
+		} while (adc_fsm != 0x10 && count++ < 100);
+
 		tiadc_writel(adc_dev, REG_CTRL, (config | CNTRLREG_TSCSSENB));
 		return IRQ_HANDLED;
 	} else if (status & IRQENB_FIFO1THRES) {

From 5f655322b1ba4bd46e26e307d04098f9c84df764 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Tue, 14 Mar 2017 11:47:29 -0400
Subject: [PATCH 1304/1911] parisc: support R_PARISC_SECREL32 relocation in
 modules

The parisc kernel doesn't work with CONFIG_MODVERSIONS since the commit
71810db27c1c853b335675bee335d893bc3d324b. It can't load modules with the
error: "module unix: Unknown relocation: 41".

The commit changes __kcrctab from 64-bit valus to 32-bit values. The
assembler generates R_PARISC_SECREL32 secrel relocation for them and the
module loader doesn't support this relocation.

This patch adds the R_PARISC_SECREL32 relocation to the module loader.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Cc: stable@vger.kernel.org	# v4.10+
Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/kernel/module.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c
index a0ecdb4abcc878..c66c943d93224f 100644
--- a/arch/parisc/kernel/module.c
+++ b/arch/parisc/kernel/module.c
@@ -620,6 +620,10 @@ int apply_relocate_add(Elf_Shdr *sechdrs,
 			 */
 			*loc = fsel(val, addend); 
 			break;
+		case R_PARISC_SECREL32:
+			/* 32-bit section relative address. */
+			*loc = fsel(val, addend);
+			break;
 		case R_PARISC_DPREL21L:
 			/* left 21 bit of relative address */
 			val = lrsel(val - dp, addend);
@@ -807,6 +811,10 @@ int apply_relocate_add(Elf_Shdr *sechdrs,
 			 */
 			*loc = fsel(val, addend); 
 			break;
+		case R_PARISC_SECREL32:
+			/* 32-bit section relative address. */
+			*loc = fsel(val, addend);
+			break;
 		case R_PARISC_FPTR64:
 			/* 64-bit function address */
 			if(in_local(me, (void *)(val + addend))) {

From 316ec0624f951166daedbe446988ef92ae72b59f Mon Sep 17 00:00:00 2001
From: John David Anglin <dave.anglin@bell.net>
Date: Sat, 11 Mar 2017 18:03:34 -0500
Subject: [PATCH 1305/1911] parisc: Optimize flush_kernel_vmap_range and
 invalidate_kernel_vmap_range

The previously submitted patch did not resolve the random segmentation
faults observed on the phantom buildd system.  There are still
unresolved problems with the Debian 4.8 and 4.9 kernels on C8000.

The attached patch removes the flush of the offset map pages and does a
whole data cache flush for large ranges.  No other arch flushes the
offset map in these routines as far as I can tell.

I have not observed any random segmentation faults on rp3440 in two
weeks of testing with 4.10.0 and 4.10.1.

Signed-off-by: John David Anglin <dave.anglin@bell.net>
Cc: stable@vger.kernel.org      # v4.8+
Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/include/asm/cacheflush.h | 23 ++---------------------
 arch/parisc/kernel/cache.c           | 22 ++++++++++++++++++++++
 2 files changed, 24 insertions(+), 21 deletions(-)

diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h
index 19c9c3c5f267ea..c7e15cc5c6683b 100644
--- a/arch/parisc/include/asm/cacheflush.h
+++ b/arch/parisc/include/asm/cacheflush.h
@@ -43,28 +43,9 @@ static inline void flush_kernel_dcache_page(struct page *page)
 
 #define flush_kernel_dcache_range(start,size) \
 	flush_kernel_dcache_range_asm((start), (start)+(size));
-/* vmap range flushes and invalidates.  Architecturally, we don't need
- * the invalidate, because the CPU should refuse to speculate once an
- * area has been flushed, so invalidate is left empty */
-static inline void flush_kernel_vmap_range(void *vaddr, int size)
-{
-	unsigned long start = (unsigned long)vaddr;
-
-	flush_kernel_dcache_range_asm(start, start + size);
-}
-static inline void invalidate_kernel_vmap_range(void *vaddr, int size)
-{
-	unsigned long start = (unsigned long)vaddr;
-	void *cursor = vaddr;
 
-	for ( ; cursor < vaddr + size; cursor += PAGE_SIZE) {
-		struct page *page = vmalloc_to_page(cursor);
-
-		if (test_and_clear_bit(PG_dcache_dirty, &page->flags))
-			flush_kernel_dcache_page(page);
-	}
-	flush_kernel_dcache_range_asm(start, start + size);
-}
+void flush_kernel_vmap_range(void *vaddr, int size);
+void invalidate_kernel_vmap_range(void *vaddr, int size);
 
 #define flush_cache_vmap(start, end)		flush_cache_all()
 #define flush_cache_vunmap(start, end)		flush_cache_all()
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index 0dc72d5de86153..c32a0909521665 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -616,3 +616,25 @@ flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long
 		__flush_cache_page(vma, vmaddr, PFN_PHYS(pfn));
 	}
 }
+
+void flush_kernel_vmap_range(void *vaddr, int size)
+{
+	unsigned long start = (unsigned long)vaddr;
+
+	if ((unsigned long)size > parisc_cache_flush_threshold)
+		flush_data_cache();
+	else
+		flush_kernel_dcache_range_asm(start, start + size);
+}
+EXPORT_SYMBOL(flush_kernel_vmap_range);
+
+void invalidate_kernel_vmap_range(void *vaddr, int size)
+{
+	unsigned long start = (unsigned long)vaddr;
+
+	if ((unsigned long)size > parisc_cache_flush_threshold)
+		flush_data_cache();
+	else
+		flush_kernel_dcache_range_asm(start, start + size);
+}
+EXPORT_SYMBOL(invalidate_kernel_vmap_range);

From 63d32d1e09cb2fc65b084b261976c06b40d19115 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Wed, 15 Mar 2017 21:10:17 +0100
Subject: [PATCH 1306/1911] parisc: Wire up statx system call

Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/include/uapi/asm/unistd.h | 3 ++-
 arch/parisc/kernel/syscall_table.S    | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/parisc/include/uapi/asm/unistd.h b/arch/parisc/include/uapi/asm/unistd.h
index 6b0741e7a7ed3e..667c99421003e4 100644
--- a/arch/parisc/include/uapi/asm/unistd.h
+++ b/arch/parisc/include/uapi/asm/unistd.h
@@ -362,8 +362,9 @@
 #define __NR_copy_file_range	(__NR_Linux + 346)
 #define __NR_preadv2		(__NR_Linux + 347)
 #define __NR_pwritev2		(__NR_Linux + 348)
+#define __NR_statx		(__NR_Linux + 349)
 
-#define __NR_Linux_syscalls	(__NR_pwritev2 + 1)
+#define __NR_Linux_syscalls	(__NR_statx + 1)
 
 
 #define __IGNORE_select		/* newselect */
diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S
index 3cfef1de8061af..44aeaa9c039fc4 100644
--- a/arch/parisc/kernel/syscall_table.S
+++ b/arch/parisc/kernel/syscall_table.S
@@ -444,6 +444,7 @@
 	ENTRY_SAME(copy_file_range)
 	ENTRY_COMP(preadv2)
 	ENTRY_COMP(pwritev2)
+	ENTRY_SAME(statx)
 
 
 .ifne (. - 90b) - (__NR_Linux_syscalls * (91b - 90b))

From 0f424de1fd9bc4ab24bd1fe5430ab5618e803e31 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Tue, 14 Mar 2017 14:42:03 -0400
Subject: [PATCH 1307/1911] drm/radeon/si: add dpm quirk for Oland
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

OLAND 0x1002:0x6604 0x1028:0x066F 0x00 seems to have problems
with higher sclks.

Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/radeon/si_dpm.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c
index d12b8978142f69..72e1588580a118 100644
--- a/drivers/gpu/drm/radeon/si_dpm.c
+++ b/drivers/gpu/drm/radeon/si_dpm.c
@@ -2984,6 +2984,12 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev,
 		    (rdev->pdev->device == 0x6667)) {
 			max_sclk = 75000;
 		}
+	} else if (rdev->family == CHIP_OLAND) {
+		if ((rdev->pdev->device == 0x6604) &&
+		    (rdev->pdev->subsystem_vendor == 0x1028) &&
+		    (rdev->pdev->subsystem_device == 0x066F)) {
+			max_sclk = 75000;
+		}
 	}
 
 	if (rps->vce_active) {

From 18a8de1bc37e97dff1c96ee6cf49adbd02a0f775 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Tue, 14 Mar 2017 19:24:19 -0400
Subject: [PATCH 1308/1911] drm/amdgpu/si: add dpm quirk for Oland
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

OLAND 0x1002:0x6604 0x1028:0x066F 0x00 seems to have problems
with higher sclks.

Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/si_dpm.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c
index f55e45b52fbce2..33b504bafb8824 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c
@@ -3464,6 +3464,12 @@ static void si_apply_state_adjust_rules(struct amdgpu_device *adev,
 		    (adev->pdev->device == 0x6667)) {
 			max_sclk = 75000;
 		}
+	} else if (adev->asic_type == CHIP_OLAND) {
+		if ((adev->pdev->device == 0x6604) &&
+		    (adev->pdev->subsystem_vendor == 0x1028) &&
+		    (adev->pdev->subsystem_device == 0x066F)) {
+			max_sclk = 75000;
+		}
 	}
 
 	if (rps->vce_active) {

From 801a6aa9a63c90724e8899982ad8c7f16be1e2cd Mon Sep 17 00:00:00 2001
From: Tom St Denis <tom.stdenis@amd.com>
Date: Wed, 15 Mar 2017 05:34:25 -0400
Subject: [PATCH 1309/1911] drm/amd/amdgpu:  Fix debugfs reg read/write address
 width
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The MMIO space is wider now so we mask the lower 22 bits
instead of 18.

Signed-off-by: Tom St Denis <tom.stdenis@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 4120b351a8e5cc..a3a105ec99e2d7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2590,7 +2590,7 @@ static ssize_t amdgpu_debugfs_regs_read(struct file *f, char __user *buf,
 		use_bank = 0;
 	}
 
-	*pos &= 0x3FFFF;
+	*pos &= (1UL << 22) - 1;
 
 	if (use_bank) {
 		if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) ||
@@ -2666,7 +2666,7 @@ static ssize_t amdgpu_debugfs_regs_write(struct file *f, const char __user *buf,
 		use_bank = 0;
 	}
 
-	*pos &= 0x3FFFF;
+	*pos &= (1UL << 22) - 1;
 
 	if (use_bank) {
 		if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) ||

From 186ecf14e58befba434f0774eea89e35f64d3c6a Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Wed, 15 Mar 2017 21:48:42 +0100
Subject: [PATCH 1310/1911] parisc: Avoid compiler warnings with access_ok()

Commit 09b871ffd4d8 (parisc: Define access_ok() as macro) missed to mark uaddr
as used, which then gives compiler warnings about unused variables.

Fix it by comparing uaddr to uaddr which then gets optimized away by the
compiler.

Signed-off-by: Helge Deller <deller@gmx.de>
Fixes: 09b871ffd4d8 ("parisc: Define access_ok() as macro")
---
 arch/parisc/include/asm/uaccess.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h
index fb4382c28259b3..edfbf9d6a6dd76 100644
--- a/arch/parisc/include/asm/uaccess.h
+++ b/arch/parisc/include/asm/uaccess.h
@@ -32,7 +32,8 @@
  * that put_user is the same as __put_user, etc.
  */
 
-#define access_ok(type, uaddr, size) (1)
+#define access_ok(type, uaddr, size)	\
+	( (uaddr) == (uaddr) )
 
 #define put_user __put_user
 #define get_user __get_user

From 3d20f1f7bd575d147ffa75621fa560eea0aec690 Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Wed, 15 Mar 2017 18:10:47 +0200
Subject: [PATCH 1311/1911] net/openvswitch: Set the ipv6 source tunnel key
 address attribute correctly

When dealing with ipv6 source tunnel key address attribute
(OVS_TUNNEL_KEY_ATTR_IPV6_SRC) we are wrongly setting the tunnel
dst ip, fix that.

Fixes: 6b26ba3a7d95 ('openvswitch: netlink attributes for IPv6 tunneling')
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Reported-by: Paul Blakey <paulb@mellanox.com>
Acked-by: Jiri Benc <jbenc@redhat.com>
Acked-by: Joe Stringer <joe@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/flow_netlink.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 6f5fa50f716d06..a08ff834676ba9 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -604,7 +604,7 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
 			ipv4 = true;
 			break;
 		case OVS_TUNNEL_KEY_ATTR_IPV6_SRC:
-			SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.dst,
+			SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.src,
 					nla_get_in6_addr(a), is_mask);
 			ipv6 = true;
 			break;

From 88d339e2d3be955848a034970970931a7ed33956 Mon Sep 17 00:00:00 2001
From: Pablo Neira <pablo@netfilter.org>
Date: Wed, 15 Mar 2017 18:39:46 +0100
Subject: [PATCH 1312/1911] MAINTAINERS: remove MACVLAN and VLAN entries

macvlan.c file seems to be both in VLAN and MACVLAN DRIVER, so remove
the MACVLAN DRIVER since this is redundant.

I propose with this patch to remove the VLAN (802.1Q) entry so this just
falls into the NETWORKING [GENERAL].

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 15 ---------------
 1 file changed, 15 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index c776906f67a9f6..33875e07482b8a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7774,13 +7774,6 @@ F:	include/net/mac80211.h
 F:	net/mac80211/
 F:	drivers/net/wireless/mac80211_hwsim.[ch]
 
-MACVLAN DRIVER
-M:	Patrick McHardy <kaber@trash.net>
-L:	netdev@vger.kernel.org
-S:	Maintained
-F:	drivers/net/macvlan.c
-F:	include/linux/if_macvlan.h
-
 MAILBOX API
 M:	Jassi Brar <jassisinghbrar@gmail.com>
 L:	linux-kernel@vger.kernel.org
@@ -13383,14 +13376,6 @@ W:	https://linuxtv.org
 S:	Maintained
 F:	drivers/media/platform/vivid/*
 
-VLAN (802.1Q)
-M:	Patrick McHardy <kaber@trash.net>
-L:	netdev@vger.kernel.org
-S:	Maintained
-F:	drivers/net/macvlan.c
-F:	include/linux/if_*vlan.h
-F:	net/8021q/
-
 VLYNQ BUS
 M:	Florian Fainelli <f.fainelli@gmail.com>
 L:	openwrt-devel@lists.openwrt.org (subscribers-only)

From 5371bbf4b295eea334ed453efa286afa2c3ccff3 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Wed, 15 Mar 2017 12:57:21 -0700
Subject: [PATCH 1313/1911] net: bcmgenet: Do not suspend PHY if Wake-on-LAN is
 enabled

Suspending the PHY would be putting it in a low power state where it
may no longer allow us to do Wake-on-LAN.

Fixes: cc013fb48898 ("net: bcmgenet: correctly suspend and resume PHY device")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/genet/bcmgenet.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 69015fa50f2096..365895ed3c3e24 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -3481,7 +3481,8 @@ static int bcmgenet_suspend(struct device *d)
 
 	bcmgenet_netif_stop(dev);
 
-	phy_suspend(priv->phydev);
+	if (!device_may_wakeup(d))
+		phy_suspend(priv->phydev);
 
 	netif_device_detach(dev);
 
@@ -3578,7 +3579,8 @@ static int bcmgenet_resume(struct device *d)
 
 	netif_device_attach(dev);
 
-	phy_resume(priv->phydev);
+	if (!device_may_wakeup(d))
+		phy_resume(priv->phydev);
 
 	if (priv->eee.eee_enabled)
 		bcmgenet_eee_enable_set(dev, true);

From 622c36f143fc9566ba49d7cec994c2da1182d9e2 Mon Sep 17 00:00:00 2001
From: "Lendacky, Thomas" <Thomas.Lendacky@amd.com>
Date: Wed, 15 Mar 2017 15:11:23 -0500
Subject: [PATCH 1314/1911] amd-xgbe: Fix jumbo MTU processing on newer
 hardware

Newer hardware does not provide a cumulative payload length when multiple
descriptors are needed to handle the data. Once the MTU increases beyond
the size that can be handled by a single descriptor, the SKB does not get
built properly by the driver.

The driver will now calculate the size of the data buffers used by the
hardware.  The first buffer of the first descriptor is for packet headers
or packet headers and data when the headers can't be split. Subsequent
descriptors in a multi-descriptor chain will not use the first buffer. The
second buffer is used by all the descriptors in the chain for payload data.
Based on whether the driver is processing the first, intermediate, or last
descriptor it can calculate the buffer usage and build the SKB properly.

Tested and verified on both old and new hardware.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amd/xgbe/xgbe-common.h |   6 +-
 drivers/net/ethernet/amd/xgbe/xgbe-dev.c    |  20 ++--
 drivers/net/ethernet/amd/xgbe/xgbe-drv.c    | 102 ++++++++++++--------
 3 files changed, 78 insertions(+), 50 deletions(-)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-common.h b/drivers/net/ethernet/amd/xgbe/xgbe-common.h
index 8a280e7d66bddc..86f1626816ffa6 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-common.h
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-common.h
@@ -1148,8 +1148,8 @@
 #define RX_PACKET_ATTRIBUTES_CSUM_DONE_WIDTH	1
 #define RX_PACKET_ATTRIBUTES_VLAN_CTAG_INDEX	1
 #define RX_PACKET_ATTRIBUTES_VLAN_CTAG_WIDTH	1
-#define RX_PACKET_ATTRIBUTES_INCOMPLETE_INDEX	2
-#define RX_PACKET_ATTRIBUTES_INCOMPLETE_WIDTH	1
+#define RX_PACKET_ATTRIBUTES_LAST_INDEX		2
+#define RX_PACKET_ATTRIBUTES_LAST_WIDTH		1
 #define RX_PACKET_ATTRIBUTES_CONTEXT_NEXT_INDEX	3
 #define RX_PACKET_ATTRIBUTES_CONTEXT_NEXT_WIDTH	1
 #define RX_PACKET_ATTRIBUTES_CONTEXT_INDEX	4
@@ -1158,6 +1158,8 @@
 #define RX_PACKET_ATTRIBUTES_RX_TSTAMP_WIDTH	1
 #define RX_PACKET_ATTRIBUTES_RSS_HASH_INDEX	6
 #define RX_PACKET_ATTRIBUTES_RSS_HASH_WIDTH	1
+#define RX_PACKET_ATTRIBUTES_FIRST_INDEX	7
+#define RX_PACKET_ATTRIBUTES_FIRST_WIDTH	1
 
 #define RX_NORMAL_DESC0_OVT_INDEX		0
 #define RX_NORMAL_DESC0_OVT_WIDTH		16
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
index 937f37a5dcb2cd..24a687ce438818 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
@@ -1896,10 +1896,15 @@ static int xgbe_dev_read(struct xgbe_channel *channel)
 
 	/* Get the header length */
 	if (XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, FD)) {
+		XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES,
+			       FIRST, 1);
 		rdata->rx.hdr_len = XGMAC_GET_BITS_LE(rdesc->desc2,
 						      RX_NORMAL_DESC2, HL);
 		if (rdata->rx.hdr_len)
 			pdata->ext_stats.rx_split_header_packets++;
+	} else {
+		XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES,
+			       FIRST, 0);
 	}
 
 	/* Get the RSS hash */
@@ -1922,19 +1927,16 @@ static int xgbe_dev_read(struct xgbe_channel *channel)
 		}
 	}
 
-	/* Get the packet length */
-	rdata->rx.len = XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, PL);
-
-	if (!XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, LD)) {
-		/* Not all the data has been transferred for this packet */
-		XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES,
-			       INCOMPLETE, 1);
+	/* Not all the data has been transferred for this packet */
+	if (!XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, LD))
 		return 0;
-	}
 
 	/* This is the last of the data for this packet */
 	XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES,
-		       INCOMPLETE, 0);
+		       LAST, 1);
+
+	/* Get the packet length */
+	rdata->rx.len = XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, PL);
 
 	/* Set checksum done indicator as appropriate */
 	if (netdev->features & NETIF_F_RXCSUM)
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index ffea9859f5a72b..a713abd9d03e63 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -1971,13 +1971,12 @@ static struct sk_buff *xgbe_create_skb(struct xgbe_prv_data *pdata,
 {
 	struct sk_buff *skb;
 	u8 *packet;
-	unsigned int copy_len;
 
 	skb = napi_alloc_skb(napi, rdata->rx.hdr.dma_len);
 	if (!skb)
 		return NULL;
 
-	/* Start with the header buffer which may contain just the header
+	/* Pull in the header buffer which may contain just the header
 	 * or the header plus data
 	 */
 	dma_sync_single_range_for_cpu(pdata->dev, rdata->rx.hdr.dma_base,
@@ -1986,30 +1985,49 @@ static struct sk_buff *xgbe_create_skb(struct xgbe_prv_data *pdata,
 
 	packet = page_address(rdata->rx.hdr.pa.pages) +
 		 rdata->rx.hdr.pa.pages_offset;
-	copy_len = (rdata->rx.hdr_len) ? rdata->rx.hdr_len : len;
-	copy_len = min(rdata->rx.hdr.dma_len, copy_len);
-	skb_copy_to_linear_data(skb, packet, copy_len);
-	skb_put(skb, copy_len);
-
-	len -= copy_len;
-	if (len) {
-		/* Add the remaining data as a frag */
-		dma_sync_single_range_for_cpu(pdata->dev,
-					      rdata->rx.buf.dma_base,
-					      rdata->rx.buf.dma_off,
-					      rdata->rx.buf.dma_len,
-					      DMA_FROM_DEVICE);
-
-		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
-				rdata->rx.buf.pa.pages,
-				rdata->rx.buf.pa.pages_offset,
-				len, rdata->rx.buf.dma_len);
-		rdata->rx.buf.pa.pages = NULL;
-	}
+	skb_copy_to_linear_data(skb, packet, len);
+	skb_put(skb, len);
 
 	return skb;
 }
 
+static unsigned int xgbe_rx_buf1_len(struct xgbe_ring_data *rdata,
+				     struct xgbe_packet_data *packet)
+{
+	/* Always zero if not the first descriptor */
+	if (!XGMAC_GET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES, FIRST))
+		return 0;
+
+	/* First descriptor with split header, return header length */
+	if (rdata->rx.hdr_len)
+		return rdata->rx.hdr_len;
+
+	/* First descriptor but not the last descriptor and no split header,
+	 * so the full buffer was used
+	 */
+	if (!XGMAC_GET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES, LAST))
+		return rdata->rx.hdr.dma_len;
+
+	/* First descriptor and last descriptor and no split header, so
+	 * calculate how much of the buffer was used
+	 */
+	return min_t(unsigned int, rdata->rx.hdr.dma_len, rdata->rx.len);
+}
+
+static unsigned int xgbe_rx_buf2_len(struct xgbe_ring_data *rdata,
+				     struct xgbe_packet_data *packet,
+				     unsigned int len)
+{
+	/* Always the full buffer if not the last descriptor */
+	if (!XGMAC_GET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES, LAST))
+		return rdata->rx.buf.dma_len;
+
+	/* Last descriptor so calculate how much of the buffer was used
+	 * for the last bit of data
+	 */
+	return rdata->rx.len - len;
+}
+
 static int xgbe_tx_poll(struct xgbe_channel *channel)
 {
 	struct xgbe_prv_data *pdata = channel->pdata;
@@ -2092,8 +2110,8 @@ static int xgbe_rx_poll(struct xgbe_channel *channel, int budget)
 	struct napi_struct *napi;
 	struct sk_buff *skb;
 	struct skb_shared_hwtstamps *hwtstamps;
-	unsigned int incomplete, error, context_next, context;
-	unsigned int len, rdesc_len, max_len;
+	unsigned int last, error, context_next, context;
+	unsigned int len, buf1_len, buf2_len, max_len;
 	unsigned int received = 0;
 	int packet_count = 0;
 
@@ -2103,7 +2121,7 @@ static int xgbe_rx_poll(struct xgbe_channel *channel, int budget)
 	if (!ring)
 		return 0;
 
-	incomplete = 0;
+	last = 0;
 	context_next = 0;
 
 	napi = (pdata->per_channel_irq) ? &channel->napi : &pdata->napi;
@@ -2137,9 +2155,8 @@ static int xgbe_rx_poll(struct xgbe_channel *channel, int budget)
 		received++;
 		ring->cur++;
 
-		incomplete = XGMAC_GET_BITS(packet->attributes,
-					    RX_PACKET_ATTRIBUTES,
-					    INCOMPLETE);
+		last = XGMAC_GET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES,
+				      LAST);
 		context_next = XGMAC_GET_BITS(packet->attributes,
 					      RX_PACKET_ATTRIBUTES,
 					      CONTEXT_NEXT);
@@ -2148,7 +2165,7 @@ static int xgbe_rx_poll(struct xgbe_channel *channel, int budget)
 					 CONTEXT);
 
 		/* Earlier error, just drain the remaining data */
-		if ((incomplete || context_next) && error)
+		if ((!last || context_next) && error)
 			goto read_again;
 
 		if (error || packet->errors) {
@@ -2160,16 +2177,22 @@ static int xgbe_rx_poll(struct xgbe_channel *channel, int budget)
 		}
 
 		if (!context) {
-			/* Length is cumulative, get this descriptor's length */
-			rdesc_len = rdata->rx.len - len;
-			len += rdesc_len;
+			/* Get the data length in the descriptor buffers */
+			buf1_len = xgbe_rx_buf1_len(rdata, packet);
+			len += buf1_len;
+			buf2_len = xgbe_rx_buf2_len(rdata, packet, len);
+			len += buf2_len;
 
-			if (rdesc_len && !skb) {
+			if (!skb) {
 				skb = xgbe_create_skb(pdata, napi, rdata,
-						      rdesc_len);
-				if (!skb)
+						      buf1_len);
+				if (!skb) {
 					error = 1;
-			} else if (rdesc_len) {
+					goto skip_data;
+				}
+			}
+
+			if (buf2_len) {
 				dma_sync_single_range_for_cpu(pdata->dev,
 							rdata->rx.buf.dma_base,
 							rdata->rx.buf.dma_off,
@@ -2179,13 +2202,14 @@ static int xgbe_rx_poll(struct xgbe_channel *channel, int budget)
 				skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
 						rdata->rx.buf.pa.pages,
 						rdata->rx.buf.pa.pages_offset,
-						rdesc_len,
+						buf2_len,
 						rdata->rx.buf.dma_len);
 				rdata->rx.buf.pa.pages = NULL;
 			}
 		}
 
-		if (incomplete || context_next)
+skip_data:
+		if (!last || context_next)
 			goto read_again;
 
 		if (!skb)
@@ -2243,7 +2267,7 @@ static int xgbe_rx_poll(struct xgbe_channel *channel, int budget)
 	}
 
 	/* Check if we need to save state before leaving */
-	if (received && (incomplete || context_next)) {
+	if (received && (!last || context_next)) {
 		rdata = XGBE_GET_DESC_DATA(ring, ring->cur);
 		rdata->state_saved = 1;
 		rdata->state.skb = skb;

From 22a0e18eac7a9e986fec76c60fa4a2926d1291e2 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 15 Mar 2017 13:21:28 -0700
Subject: [PATCH 1315/1911] net: properly release sk_frag.page

I mistakenly added the code to release sk->sk_frag in
sk_common_release() instead of sk_destruct()

TCP sockets using sk->sk_allocation == GFP_ATOMIC do no call
sk_common_release() at close time, thus leaking one (order-3) page.

iSCSI is using such sockets.

Fixes: 5640f7685831 ("net: use a per task frag allocator")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/net/core/sock.c b/net/core/sock.c
index a96d5f7a5734a5..acb0d413749968 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1442,6 +1442,11 @@ static void __sk_destruct(struct rcu_head *head)
 		pr_debug("%s: optmem leakage (%d bytes) detected\n",
 			 __func__, atomic_read(&sk->sk_omem_alloc));
 
+	if (sk->sk_frag.page) {
+		put_page(sk->sk_frag.page);
+		sk->sk_frag.page = NULL;
+	}
+
 	if (sk->sk_peer_cred)
 		put_cred(sk->sk_peer_cred);
 	put_pid(sk->sk_peer_pid);
@@ -2787,11 +2792,6 @@ void sk_common_release(struct sock *sk)
 
 	sk_refcnt_debug_release(sk);
 
-	if (sk->sk_frag.page) {
-		put_page(sk->sk_frag.page);
-		sk->sk_frag.page = NULL;
-	}
-
 	sock_put(sk);
 }
 EXPORT_SYMBOL(sk_common_release);

From 5f7c2beef819d9ea2d1b814edf6f5981420e9cf8 Mon Sep 17 00:00:00 2001
From: Bill Kuzeja <William.Kuzeja@stratus.com>
Date: Tue, 14 Mar 2017 13:28:44 -0400
Subject: [PATCH 1316/1911] scsi: qla2xxx: Fix crash in qla2xxx_eh_abort on bad
 ptr

After a Qlogic card breaks when initializing (test case), the system can
crash in qla2xxx_eh_abort if processing anything but a scsi command type
srb.

Fixes: 1535aa75a3d8 ("scsi: qla2xxx: fix invalid DMA access after command aborts in PCI device remove")
Signed-off-by: Bill Kuzeja <william.kuzeja@stratus.com>
Acked-By: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qla2xxx/qla_os.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 71b6b20ae82b94..579363a6f44f67 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -1617,7 +1617,8 @@ qla2x00_abort_all_cmds(scsi_qla_host_t *vha, int res)
 				/* Don't abort commands in adapter during EEH
 				 * recovery as it's not accessible/responding.
 				 */
-				if (GET_CMD_SP(sp) && !ha->flags.eeh_busy) {
+				if (GET_CMD_SP(sp) && !ha->flags.eeh_busy &&
+				    (sp->type == SRB_SCSI_CMD)) {
 					/* Get a reference to the sp and drop the lock.
 					 * The reference ensures this sp->done() call
 					 * - and not the call in qla2xxx_eh_abort() -

From e498520edec6655e93ac5e768b04f4fd2299fe4d Mon Sep 17 00:00:00 2001
From: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Date: Tue, 14 Mar 2017 09:20:19 -0700
Subject: [PATCH 1317/1911] scsi: aacraid: Fix potential null access

Currently, command threads fails to return ioctls commands for older
controller versions, since it returns when all the fibs have been
allocated. Another issue is even all the fibs have not been allocated,
the correct allocated fibs is not updated nor freed.

Fixes: 113156bcea9ef1e6 (scsi: aacraid: Reworked aac_command_thread)
Reported-by: Tomas Henzl <thenzl@redhat.com>
Signed-off-by: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Reviewed-by: Dave Carroll <david.carroll@microsemi.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/aacraid/commsup.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c
index a3ad042934870d..c8172f16cf33cd 100644
--- a/drivers/scsi/aacraid/commsup.c
+++ b/drivers/scsi/aacraid/commsup.c
@@ -2056,7 +2056,6 @@ static int fillup_pools(struct aac_dev *dev, struct hw_fib **hw_fib_pool,
 {
 	struct hw_fib **hw_fib_p;
 	struct fib **fib_p;
-	int rcode = 1;
 
 	hw_fib_p = hw_fib_pool;
 	fib_p = fib_pool;
@@ -2074,11 +2073,11 @@ static int fillup_pools(struct aac_dev *dev, struct hw_fib **hw_fib_pool,
 		}
 	}
 
+	/*
+	 * Get the actual number of allocated fibs
+	 */
 	num = hw_fib_p - hw_fib_pool;
-	if (!num)
-		rcode = 0;
-
-	return rcode;
+	return num;
 }
 
 static void wakeup_fibctx_threads(struct aac_dev *dev,
@@ -2186,7 +2185,6 @@ static void aac_process_events(struct aac_dev *dev)
 	struct fib *fib;
 	unsigned long flags;
 	spinlock_t *t_lock;
-	unsigned int rcode;
 
 	t_lock = dev->queues->queue[HostNormCmdQueue].lock;
 	spin_lock_irqsave(t_lock, flags);
@@ -2269,8 +2267,8 @@ static void aac_process_events(struct aac_dev *dev)
 		 * Fill up fib pointer pools with actual fibs
 		 * and hw_fibs
 		 */
-		rcode = fillup_pools(dev, hw_fib_pool, fib_pool, num);
-		if (!rcode)
+		num = fillup_pools(dev, hw_fib_pool, fib_pool, num);
+		if (!num)
 			goto free_mem;
 
 		/*

From 046885251ae24b0fa1adbc5e3ca4a4bc9930c1f3 Mon Sep 17 00:00:00 2001
From: Manish Rangankar <manish.rangankar@cavium.com>
Date: Wed, 15 Mar 2017 01:32:53 -0700
Subject: [PATCH 1318/1911] scsi: qedi: Add PCI device-ID for QL41xxx adapters.

Signed-off-by: Manish Rangankar <manish.rangankar@cavium.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qedi/qedi_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c
index 8e3d92807cb803..92775a8b74b1cd 100644
--- a/drivers/scsi/qedi/qedi_main.c
+++ b/drivers/scsi/qedi/qedi_main.c
@@ -2007,6 +2007,7 @@ static void qedi_remove(struct pci_dev *pdev)
 
 static struct pci_device_id qedi_pci_tbl[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, 0x165E) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, 0x8084) },
 	{ 0 },
 };
 MODULE_DEVICE_TABLE(pci, qedi_pci_tbl);

From 645b8ef5943f95b74240568105ce2be21c6640b4 Mon Sep 17 00:00:00 2001
From: Tomas Winkler <tomas.winkler@intel.com>
Date: Tue, 14 Mar 2017 14:19:36 +0200
Subject: [PATCH 1319/1911] scsi: ufshcd-platform: remove the useless cast in
 ERR_PTR/IS_ERR

IS_ERR and ERR_PTR already forcefully cast their argument, hence there
is no need for additional (complex) casting.

Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Reviewed-by: Subhash Jadavani <subhashj@codeaurora.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/ufs/ufshcd-pltfrm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/ufs/ufshcd-pltfrm.c b/drivers/scsi/ufs/ufshcd-pltfrm.c
index a72a4ba78125b0..8e5e6c04c035e1 100644
--- a/drivers/scsi/ufs/ufshcd-pltfrm.c
+++ b/drivers/scsi/ufs/ufshcd-pltfrm.c
@@ -309,8 +309,8 @@ int ufshcd_pltfrm_init(struct platform_device *pdev,
 
 	mem_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	mmio_base = devm_ioremap_resource(dev, mem_res);
-	if (IS_ERR(*(void **)&mmio_base)) {
-		err = PTR_ERR(*(void **)&mmio_base);
+	if (IS_ERR(mmio_base)) {
+		err = PTR_ERR(mmio_base);
 		goto out;
 	}
 

From 9b4f603e7a9f4282aec451063ffbbb8bb410dcd9 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 15 Mar 2017 00:12:16 +0100
Subject: [PATCH 1320/1911] cpufreq: Fix and clean up show_cpuinfo_cur_freq()

There is a missing newline in show_cpuinfo_cur_freq(), so add it,
but while at it clean that function up somewhat too.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Cc: All applicable <stable@vger.kernel.org>
---
 drivers/cpufreq/cpufreq.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 38b9fdf854a49a..b8ff617d449d92 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -680,9 +680,11 @@ static ssize_t show_cpuinfo_cur_freq(struct cpufreq_policy *policy,
 					char *buf)
 {
 	unsigned int cur_freq = __cpufreq_get(policy);
-	if (!cur_freq)
-		return sprintf(buf, "<unknown>");
-	return sprintf(buf, "%u\n", cur_freq);
+
+	if (cur_freq)
+		return sprintf(buf, "%u\n", cur_freq);
+
+	return sprintf(buf, "<unknown>\n");
 }
 
 /**

From a733ded50b6ea846200073e7381a302df71e13b3 Mon Sep 17 00:00:00 2001
From: Tomas Winkler <tomas.winkler@intel.com>
Date: Sun, 5 Mar 2017 21:40:41 +0200
Subject: [PATCH 1321/1911] mei: fix deadlock on mei reset

This patch fixes 'mei: synchronize irq before initiating a reset'
The patch had introduced a deadlock between irq thread and mei_reset()
as they are both holding the same device lock.

---> device_lock:
	mei_reset()
                        <---- interrupt thread
	                        device_lock
---> synchornize_irq()
       wait on interrupt thread == (dead lock)

The fix is to call synchronize_irq
prior to call locked mei_reset function.

Cc: <stable@vger.kernel.org> #4.10+
Fixes: f302bb0de6ac (mei: synchronize irq before initiating a reset)
Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/mei/init.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/mei/init.c b/drivers/misc/mei/init.c
index cfb1cdf176fa90..13c55b8f926186 100644
--- a/drivers/misc/mei/init.c
+++ b/drivers/misc/mei/init.c
@@ -124,8 +124,6 @@ int mei_reset(struct mei_device *dev)
 
 	mei_clear_interrupts(dev);
 
-	mei_synchronize_irq(dev);
-
 	/* we're already in reset, cancel the init timer
 	 * if the reset was called due the hbm protocol error
 	 * we need to call it before hw start
@@ -304,6 +302,9 @@ static void mei_reset_work(struct work_struct *work)
 		container_of(work, struct mei_device,  reset_work);
 	int ret;
 
+	mei_clear_interrupts(dev);
+	mei_synchronize_irq(dev);
+
 	mutex_lock(&dev->device_lock);
 
 	ret = mei_reset(dev);
@@ -328,6 +329,9 @@ void mei_stop(struct mei_device *dev)
 
 	mei_cancel_work(dev);
 
+	mei_clear_interrupts(dev);
+	mei_synchronize_irq(dev);
+
 	mutex_lock(&dev->device_lock);
 
 	dev->dev_state = MEI_DEV_POWER_DOWN;

From c6240cacdb2c3cb56a21fb3ea0c105154ab87a2a Mon Sep 17 00:00:00 2001
From: Alexander Usyskin <alexander.usyskin@intel.com>
Date: Sun, 5 Mar 2017 21:40:42 +0200
Subject: [PATCH 1322/1911] mei: don't wait for os version message reply

The driver still struggles with firmwares that do not replay to the OS
version request. It is safe not waiting for the replay. First, the driver
doesn't do anything with the replay second the connection is closed
immediately, hence the packet will be just safely discarded in case it
is received and last the driver won't get stuck if the firmware won't
reply.

Cc: <stable@vger.kernel.org> #4.10+
Signed-off-by: Alexander Usyskin <alexander.usyskin@intel.com>
Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/mei/bus-fixup.c | 14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/drivers/misc/mei/bus-fixup.c b/drivers/misc/mei/bus-fixup.c
index 3600c9993a9830..29f2daed37e07b 100644
--- a/drivers/misc/mei/bus-fixup.c
+++ b/drivers/misc/mei/bus-fixup.c
@@ -112,11 +112,9 @@ struct mkhi_msg {
 
 static int mei_osver(struct mei_cl_device *cldev)
 {
-	int ret;
 	const size_t size = sizeof(struct mkhi_msg_hdr) +
 			    sizeof(struct mkhi_fwcaps) +
 			    sizeof(struct mei_os_ver);
-	size_t length = 8;
 	char buf[size];
 	struct mkhi_msg *req;
 	struct mkhi_fwcaps *fwcaps;
@@ -137,15 +135,7 @@ static int mei_osver(struct mei_cl_device *cldev)
 	os_ver = (struct mei_os_ver *)fwcaps->data;
 	os_ver->os_type = OSTYPE_LINUX;
 
-	ret = __mei_cl_send(cldev->cl, buf, size, mode);
-	if (ret < 0)
-		return ret;
-
-	ret = __mei_cl_recv(cldev->cl, buf, length, 0);
-	if (ret < 0)
-		return ret;
-
-	return 0;
+	return __mei_cl_send(cldev->cl, buf, size, mode);
 }
 
 static void mei_mkhi_fix(struct mei_cl_device *cldev)
@@ -160,7 +150,7 @@ static void mei_mkhi_fix(struct mei_cl_device *cldev)
 		return;
 
 	ret = mei_osver(cldev);
-	if (ret)
+	if (ret < 0)
 		dev_err(&cldev->dev, "OS version command failed %d\n", ret);
 
 	mei_cldev_disable(cldev);

From 8200f2085abe7f29a016381f3122000cc7b2a760 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen@networkplumber.org>
Date: Sat, 4 Mar 2017 18:13:57 -0700
Subject: [PATCH 1323/1911] vmbus: use rcu for per-cpu channel list

The per-cpu channel list is now referred to in the interrupt
routine. This is mostly safe since the host will not normally generate
an interrupt when channel is being deleted but if it did then there
would be a use after free problem.

To solve, this use RCU protection on ther per-cpu list.

Fixes: 631e63a9f346 ("vmbus: change to per channel tasklet")

Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/channel_mgmt.c | 7 ++++---
 drivers/hv/vmbus_drv.c    | 6 +++++-
 include/linux/hyperv.h    | 7 +++++++
 3 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index f33465d78a0256..d2cfa3eb71a24e 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -350,7 +350,8 @@ static struct vmbus_channel *alloc_channel(void)
 static void free_channel(struct vmbus_channel *channel)
 {
 	tasklet_kill(&channel->callback_event);
-	kfree(channel);
+
+	kfree_rcu(channel, rcu);
 }
 
 static void percpu_channel_enq(void *arg)
@@ -359,14 +360,14 @@ static void percpu_channel_enq(void *arg)
 	struct hv_per_cpu_context *hv_cpu
 		= this_cpu_ptr(hv_context.cpu_context);
 
-	list_add_tail(&channel->percpu_list, &hv_cpu->chan_list);
+	list_add_tail_rcu(&channel->percpu_list, &hv_cpu->chan_list);
 }
 
 static void percpu_channel_deq(void *arg)
 {
 	struct vmbus_channel *channel = arg;
 
-	list_del(&channel->percpu_list);
+	list_del_rcu(&channel->percpu_list);
 }
 
 
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index da6b59ba594039..8370b9dc6037c1 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -939,8 +939,10 @@ static void vmbus_chan_sched(struct hv_per_cpu_context *hv_cpu)
 		if (relid == 0)
 			continue;
 
+		rcu_read_lock();
+
 		/* Find channel based on relid */
-		list_for_each_entry(channel, &hv_cpu->chan_list, percpu_list) {
+		list_for_each_entry_rcu(channel, &hv_cpu->chan_list, percpu_list) {
 			if (channel->offermsg.child_relid != relid)
 				continue;
 
@@ -956,6 +958,8 @@ static void vmbus_chan_sched(struct hv_per_cpu_context *hv_cpu)
 				tasklet_schedule(&channel->callback_event);
 			}
 		}
+
+		rcu_read_unlock();
 	}
 }
 
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 62bbf3c1aa4a04..c4c7ae91f9d1df 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -845,6 +845,13 @@ struct vmbus_channel {
 	 * link up channels based on their CPU affinity.
 	 */
 	struct list_head percpu_list;
+
+	/*
+	 * Defer freeing channel until after all cpu's have
+	 * gone through grace period.
+	 */
+	struct rcu_head rcu;
+
 	/*
 	 * For performance critical channels (storage, networking
 	 * etc,), Hyper-V has a mechanism to enhance the throughput

From dad72a1d28442b03aac86836a42de2d00a1014ab Mon Sep 17 00:00:00 2001
From: Dexuan Cui <decui@microsoft.com>
Date: Sat, 4 Mar 2017 18:13:58 -0700
Subject: [PATCH 1324/1911] vmbus: remove hv_event_tasklet_disable/enable

With the recent introduction of per-channel tasklet, we need to update
the way we handle the 3 concurrency issues:

1. hv_process_channel_removal -> percpu_channel_deq vs.
   vmbus_chan_sched -> list_for_each_entry(..., percpu_list);

2. vmbus_process_offer -> percpu_channel_enq/deq vs. vmbus_chan_sched.

3. vmbus_close_internal vs. the per-channel tasklet vmbus_on_event;

The first 2 issues can be handled by Stephen's recent patch
"vmbus: use rcu for per-cpu channel list", and the third issue
can be handled by calling tasklet_disable in vmbus_close_internal here.

We don't need the original hv_event_tasklet_disable/enable since we
now use per-channel tasklet instead of the previous per-CPU tasklet,
and actually we must remove them due to the side effect now:
vmbus_process_offer -> hv_event_tasklet_enable -> tasklet_schedule will
start the per-channel callback prematurely, cauing NULL dereferencing
(the channel may haven't been properly configured to run the callback yet).

Fixes: 631e63a9f346 ("vmbus: change to per channel tasklet")

Signed-off-by: Dexuan Cui <decui@microsoft.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Stephen Hemminger <sthemmin@microsoft.com>
Tested-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/channel.c      | 12 ++++--------
 drivers/hv/channel_mgmt.c | 19 -------------------
 include/linux/hyperv.h    |  3 ---
 3 files changed, 4 insertions(+), 30 deletions(-)

diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index bd0d1988feb2ad..57b2958205c71b 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -530,15 +530,13 @@ static int vmbus_close_internal(struct vmbus_channel *channel)
 	int ret;
 
 	/*
-	 * vmbus_on_event(), running in the tasklet, can race
+	 * vmbus_on_event(), running in the per-channel tasklet, can race
 	 * with vmbus_close_internal() in the case of SMP guest, e.g., when
 	 * the former is accessing channel->inbound.ring_buffer, the latter
-	 * could be freeing the ring_buffer pages.
-	 *
-	 * To resolve the race, we can serialize them by disabling the
-	 * tasklet when the latter is running here.
+	 * could be freeing the ring_buffer pages, so here we must stop it
+	 * first.
 	 */
-	hv_event_tasklet_disable(channel);
+	tasklet_disable(&channel->callback_event);
 
 	/*
 	 * In case a device driver's probe() fails (e.g.,
@@ -605,8 +603,6 @@ static int vmbus_close_internal(struct vmbus_channel *channel)
 		get_order(channel->ringbuffer_pagecount * PAGE_SIZE));
 
 out:
-	hv_event_tasklet_enable(channel);
-
 	return ret;
 }
 
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index d2cfa3eb71a24e..bf846d078d857a 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -382,19 +382,6 @@ static void vmbus_release_relid(u32 relid)
 		       true);
 }
 
-void hv_event_tasklet_disable(struct vmbus_channel *channel)
-{
-	tasklet_disable(&channel->callback_event);
-}
-
-void hv_event_tasklet_enable(struct vmbus_channel *channel)
-{
-	tasklet_enable(&channel->callback_event);
-
-	/* In case there is any pending event */
-	tasklet_schedule(&channel->callback_event);
-}
-
 void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid)
 {
 	unsigned long flags;
@@ -403,7 +390,6 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid)
 	BUG_ON(!channel->rescind);
 	BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex));
 
-	hv_event_tasklet_disable(channel);
 	if (channel->target_cpu != get_cpu()) {
 		put_cpu();
 		smp_call_function_single(channel->target_cpu,
@@ -412,7 +398,6 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid)
 		percpu_channel_deq(channel);
 		put_cpu();
 	}
-	hv_event_tasklet_enable(channel);
 
 	if (channel->primary_channel == NULL) {
 		list_del(&channel->listentry);
@@ -506,7 +491,6 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
 
 	init_vp_index(newchannel, dev_type);
 
-	hv_event_tasklet_disable(newchannel);
 	if (newchannel->target_cpu != get_cpu()) {
 		put_cpu();
 		smp_call_function_single(newchannel->target_cpu,
@@ -516,7 +500,6 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
 		percpu_channel_enq(newchannel);
 		put_cpu();
 	}
-	hv_event_tasklet_enable(newchannel);
 
 	/*
 	 * This state is used to indicate a successful open
@@ -566,7 +549,6 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
 	list_del(&newchannel->listentry);
 	mutex_unlock(&vmbus_connection.channel_mutex);
 
-	hv_event_tasklet_disable(newchannel);
 	if (newchannel->target_cpu != get_cpu()) {
 		put_cpu();
 		smp_call_function_single(newchannel->target_cpu,
@@ -575,7 +557,6 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
 		percpu_channel_deq(newchannel);
 		put_cpu();
 	}
-	hv_event_tasklet_enable(newchannel);
 
 	vmbus_release_relid(newchannel->offermsg.child_relid);
 
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index c4c7ae91f9d1df..970771a5f73902 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1437,9 +1437,6 @@ extern bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, u8 *buf,
 				const int *srv_version, int srv_vercnt,
 				int *nego_fw_version, int *nego_srv_version);
 
-void hv_event_tasklet_disable(struct vmbus_channel *channel);
-void hv_event_tasklet_enable(struct vmbus_channel *channel);
-
 void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid);
 
 void vmbus_setevent(struct vmbus_channel *channel);

From e9c18ae6eb2b312f16c63e34b43ea23926daa398 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Sat, 4 Mar 2017 18:13:59 -0700
Subject: [PATCH 1325/1911] Drivers: hv: util: move waiting for release to
 hv_utils_transport itself

Waiting for release_event in all three drivers introduced issues on release
as on_reset() hook is not always called. E.g. if the device was never
opened we will never get the completion.

Move the waiting code to hvutil_transport_destroy() and make sure it is
only called when the device is open. hvt->lock serialization should
guarantee the absence of races.

Fixes: 5a66fecbf6aa ("Drivers: hv: util: kvp: Fix a rescind processing issue")
Fixes: 20951c7535b5 ("Drivers: hv: util: Fcopy: Fix a rescind processing issue")
Fixes: d77044d142e9 ("Drivers: hv: util: Backup: Fix a rescind processing issue")

Reported-by: Dexuan Cui <decui@microsoft.com>
Tested-by: Dexuan Cui <decui@microsoft.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/hv_fcopy.c           |  4 ----
 drivers/hv/hv_kvp.c             |  4 ----
 drivers/hv/hv_snapshot.c        |  4 ----
 drivers/hv/hv_utils_transport.c | 12 ++++++++----
 drivers/hv/hv_utils_transport.h |  1 +
 5 files changed, 9 insertions(+), 16 deletions(-)

diff --git a/drivers/hv/hv_fcopy.c b/drivers/hv/hv_fcopy.c
index 9aee6014339dff..a5596a642ed06b 100644
--- a/drivers/hv/hv_fcopy.c
+++ b/drivers/hv/hv_fcopy.c
@@ -71,7 +71,6 @@ static DECLARE_WORK(fcopy_send_work, fcopy_send_data);
 static const char fcopy_devname[] = "vmbus/hv_fcopy";
 static u8 *recv_buffer;
 static struct hvutil_transport *hvt;
-static struct completion release_event;
 /*
  * This state maintains the version number registered by the daemon.
  */
@@ -331,7 +330,6 @@ static void fcopy_on_reset(void)
 
 	if (cancel_delayed_work_sync(&fcopy_timeout_work))
 		fcopy_respond_to_host(HV_E_FAIL);
-	complete(&release_event);
 }
 
 int hv_fcopy_init(struct hv_util_service *srv)
@@ -339,7 +337,6 @@ int hv_fcopy_init(struct hv_util_service *srv)
 	recv_buffer = srv->recv_buffer;
 	fcopy_transaction.recv_channel = srv->channel;
 
-	init_completion(&release_event);
 	/*
 	 * When this driver loads, the user level daemon that
 	 * processes the host requests may not yet be running.
@@ -361,5 +358,4 @@ void hv_fcopy_deinit(void)
 	fcopy_transaction.state = HVUTIL_DEVICE_DYING;
 	cancel_delayed_work_sync(&fcopy_timeout_work);
 	hvutil_transport_destroy(hvt);
-	wait_for_completion(&release_event);
 }
diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c
index de263712e247c2..a1adfe2cfb3424 100644
--- a/drivers/hv/hv_kvp.c
+++ b/drivers/hv/hv_kvp.c
@@ -101,7 +101,6 @@ static DECLARE_WORK(kvp_sendkey_work, kvp_send_key);
 static const char kvp_devname[] = "vmbus/hv_kvp";
 static u8 *recv_buffer;
 static struct hvutil_transport *hvt;
-static struct completion release_event;
 /*
  * Register the kernel component with the user-level daemon.
  * As part of this registration, pass the LIC version number.
@@ -714,7 +713,6 @@ static void kvp_on_reset(void)
 	if (cancel_delayed_work_sync(&kvp_timeout_work))
 		kvp_respond_to_host(NULL, HV_E_FAIL);
 	kvp_transaction.state = HVUTIL_DEVICE_INIT;
-	complete(&release_event);
 }
 
 int
@@ -723,7 +721,6 @@ hv_kvp_init(struct hv_util_service *srv)
 	recv_buffer = srv->recv_buffer;
 	kvp_transaction.recv_channel = srv->channel;
 
-	init_completion(&release_event);
 	/*
 	 * When this driver loads, the user level daemon that
 	 * processes the host requests may not yet be running.
@@ -747,5 +744,4 @@ void hv_kvp_deinit(void)
 	cancel_delayed_work_sync(&kvp_timeout_work);
 	cancel_work_sync(&kvp_sendkey_work);
 	hvutil_transport_destroy(hvt);
-	wait_for_completion(&release_event);
 }
diff --git a/drivers/hv/hv_snapshot.c b/drivers/hv/hv_snapshot.c
index bcc03f0748d61c..e659d1b94a5794 100644
--- a/drivers/hv/hv_snapshot.c
+++ b/drivers/hv/hv_snapshot.c
@@ -79,7 +79,6 @@ static int dm_reg_value;
 static const char vss_devname[] = "vmbus/hv_vss";
 static __u8 *recv_buffer;
 static struct hvutil_transport *hvt;
-static struct completion release_event;
 
 static void vss_timeout_func(struct work_struct *dummy);
 static void vss_handle_request(struct work_struct *dummy);
@@ -361,13 +360,11 @@ static void vss_on_reset(void)
 	if (cancel_delayed_work_sync(&vss_timeout_work))
 		vss_respond_to_host(HV_E_FAIL);
 	vss_transaction.state = HVUTIL_DEVICE_INIT;
-	complete(&release_event);
 }
 
 int
 hv_vss_init(struct hv_util_service *srv)
 {
-	init_completion(&release_event);
 	if (vmbus_proto_version < VERSION_WIN8_1) {
 		pr_warn("Integration service 'Backup (volume snapshot)'"
 			" not supported on this host version.\n");
@@ -400,5 +397,4 @@ void hv_vss_deinit(void)
 	cancel_delayed_work_sync(&vss_timeout_work);
 	cancel_work_sync(&vss_handle_request_work);
 	hvutil_transport_destroy(hvt);
-	wait_for_completion(&release_event);
 }
diff --git a/drivers/hv/hv_utils_transport.c b/drivers/hv/hv_utils_transport.c
index c235a951526711..4402a71e23f7f7 100644
--- a/drivers/hv/hv_utils_transport.c
+++ b/drivers/hv/hv_utils_transport.c
@@ -182,10 +182,11 @@ static int hvt_op_release(struct inode *inode, struct file *file)
 	 * connects back.
 	 */
 	hvt_reset(hvt);
-	mutex_unlock(&hvt->lock);
 
 	if (mode_old == HVUTIL_TRANSPORT_DESTROY)
-		hvt_transport_free(hvt);
+		complete(&hvt->release);
+
+	mutex_unlock(&hvt->lock);
 
 	return 0;
 }
@@ -304,6 +305,7 @@ struct hvutil_transport *hvutil_transport_init(const char *name,
 
 	init_waitqueue_head(&hvt->outmsg_q);
 	mutex_init(&hvt->lock);
+	init_completion(&hvt->release);
 
 	spin_lock(&hvt_list_lock);
 	list_add(&hvt->list, &hvt_list);
@@ -351,6 +353,8 @@ void hvutil_transport_destroy(struct hvutil_transport *hvt)
 	if (hvt->cn_id.idx > 0 && hvt->cn_id.val > 0)
 		cn_del_callback(&hvt->cn_id);
 
-	if (mode_old != HVUTIL_TRANSPORT_CHARDEV)
-		hvt_transport_free(hvt);
+	if (mode_old == HVUTIL_TRANSPORT_CHARDEV)
+		wait_for_completion(&hvt->release);
+
+	hvt_transport_free(hvt);
 }
diff --git a/drivers/hv/hv_utils_transport.h b/drivers/hv/hv_utils_transport.h
index d98f5225c3e693..79afb626e16689 100644
--- a/drivers/hv/hv_utils_transport.h
+++ b/drivers/hv/hv_utils_transport.h
@@ -41,6 +41,7 @@ struct hvutil_transport {
 	int outmsg_len;                     /* its length */
 	wait_queue_head_t outmsg_q;         /* poll/read wait queue */
 	struct mutex lock;                  /* protects struct members */
+	struct completion release;          /* synchronize with fd release */
 };
 
 struct hvutil_transport *hvutil_transport_init(const char *name,

From 5a16dfc855127906fcd2935fb039bc8989313915 Mon Sep 17 00:00:00 2001
From: Dexuan Cui <decui@microsoft.com>
Date: Sat, 4 Mar 2017 18:14:00 -0700
Subject: [PATCH 1326/1911] Drivers: hv: util: don't forget to init
 host_ts.lock

Without the patch, I always get a "BUG: spinlock bad magic" warning.

Fixes: 3716a49a81ba ("hv_utils: implement Hyper-V PTP source")

Signed-off-by: Dexuan Cui <decui@microsoft.com>
Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Stephen Hemminger <sthemmin@microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/hv_util.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/hv/hv_util.c b/drivers/hv/hv_util.c
index 3042eaa13062bb..186b10083c552b 100644
--- a/drivers/hv/hv_util.c
+++ b/drivers/hv/hv_util.c
@@ -590,6 +590,8 @@ static int hv_timesync_init(struct hv_util_service *srv)
 	if (!hyperv_cs)
 		return -ENODEV;
 
+	spin_lock_init(&host_ts.lock);
+
 	INIT_WORK(&wrk.work, hv_set_host_time);
 
 	/*

From 9a5476020a5f06a0fc6f17097efc80275d2f03cd Mon Sep 17 00:00:00 2001
From: "K. Y. Srinivasan" <kys@microsoft.com>
Date: Mon, 13 Mar 2017 15:57:09 -0700
Subject: [PATCH 1327/1911] Drivers: hv: vmbus: Don't leak channel ids

If we cannot allocate memory for the channel, free the relid
associated with the channel.

Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/channel_mgmt.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index bf846d078d857a..fbcb0635230828 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -796,6 +796,7 @@ static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
 	/* Allocate the channel object and save this offer. */
 	newchannel = alloc_channel();
 	if (!newchannel) {
+		vmbus_release_relid(offer->child_relid);
 		pr_err("Unable to allocate channel object\n");
 		return;
 	}

From 5e030d5ce9d99a899b648413139ff65bab12b038 Mon Sep 17 00:00:00 2001
From: "K. Y. Srinivasan" <kys@microsoft.com>
Date: Sun, 12 Mar 2017 20:00:30 -0700
Subject: [PATCH 1328/1911] Drivers: hv: vmbus: Don't leak memory when a
 channel is rescinded

When we close a channel that has been rescinded, we will leak memory since
vmbus_teardown_gpadl() returns an error. Fix this so that we can properly
cleanup the memory allocated to the ring buffers.

Fixes: ccb61f8a99e6 ("Drivers: hv: vmbus: Fix a rescind handling bug")

Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/channel.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 57b2958205c71b..321b8833fa6f35 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -502,12 +502,15 @@ int vmbus_teardown_gpadl(struct vmbus_channel *channel, u32 gpadl_handle)
 
 	wait_for_completion(&info->waitevent);
 
-	if (channel->rescind) {
-		ret = -ENODEV;
-		goto post_msg_err;
-	}
-
 post_msg_err:
+	/*
+	 * If the channel has been rescinded;
+	 * we will be awakened by the rescind
+	 * handler; set the error code to zero so we don't leak memory.
+	 */
+	if (channel->rescind)
+		ret = 0;
+
 	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
 	list_del(&info->msglistentry);
 	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);

From 9a3fcf912ef7f5c6e18f9af6875dd13f7311f7aa Mon Sep 17 00:00:00 2001
From: Sara Sharon <sara.sharon@intel.com>
Date: Tue, 14 Mar 2017 09:50:35 +0200
Subject: [PATCH 1329/1911] iwlwifi: mvm: cleanup pending frames in DQA mode

When a station is asleep, the fw will set it as "asleep".
All queues that are used only by one station will be stopped by
the fw.

In pre-DQA mode this was relevant for aggregation queues. However,
in DQA mode a queue is owned by one station only, so all queues
will be stopped.
As a result, we don't expect to get filtered frames back to
mac80211 and don't have to maintain the entire pending_frames
state logic, the same way as we do in aggregations.

The correct behavior is to align DQA behavior with the aggregation
queue behaviour pre-DQA:
- Don't count pending frames.
- Let mac80211 know we have frames in these queues so that it can
properly handle trigger frames.

When a trigger frame is received, mac80211 tells the driver to send
frames from the queues using release_buffered_frames.
The driver will tell the fw to let frames out even if the station
is asleep. This is done by iwl_mvm_sta_modify_sleep_tx_count.

Reported-and-tested-by: Jens Axboe <axboe@kernel.dk>
Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Sara Sharon <sara.sharon@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 .../net/wireless/intel/iwlwifi/mvm/mac80211.c |  5 ++-
 drivers/net/wireless/intel/iwlwifi/mvm/sta.c  | 11 ++---
 drivers/net/wireless/intel/iwlwifi/mvm/sta.h  |  2 +-
 drivers/net/wireless/intel/iwlwifi/mvm/tx.c   | 41 ++++++++-----------
 4 files changed, 28 insertions(+), 31 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
index d37b1695c64eac..6927caecd48e51 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
@@ -2319,7 +2319,7 @@ iwl_mvm_mac_release_buffered_frames(struct ieee80211_hw *hw,
 {
 	struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw);
 
-	/* Called when we need to transmit (a) frame(s) from agg queue */
+	/* Called when we need to transmit (a) frame(s) from agg or dqa queue */
 
 	iwl_mvm_sta_modify_sleep_tx_count(mvm, sta, reason, num_frames,
 					  tids, more_data, true);
@@ -2338,7 +2338,8 @@ static void __iwl_mvm_mac_sta_notify(struct ieee80211_hw *hw,
 	for (tid = 0; tid < IWL_MAX_TID_COUNT; tid++) {
 		struct iwl_mvm_tid_data *tid_data = &mvmsta->tid_data[tid];
 
-		if (tid_data->state != IWL_AGG_ON &&
+		if (!iwl_mvm_is_dqa_supported(mvm) &&
+		    tid_data->state != IWL_AGG_ON &&
 		    tid_data->state != IWL_EMPTYING_HW_QUEUE_DELBA)
 			continue;
 
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
index bd1dcc863d8f33..b51a2853cc804a 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
@@ -3135,7 +3135,7 @@ void iwl_mvm_sta_modify_sleep_tx_count(struct iwl_mvm *mvm,
 				       struct ieee80211_sta *sta,
 				       enum ieee80211_frame_release_type reason,
 				       u16 cnt, u16 tids, bool more_data,
-				       bool agg)
+				       bool single_sta_queue)
 {
 	struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta);
 	struct iwl_mvm_add_sta_cmd cmd = {
@@ -3155,14 +3155,14 @@ void iwl_mvm_sta_modify_sleep_tx_count(struct iwl_mvm *mvm,
 	for_each_set_bit(tid, &_tids, IWL_MAX_TID_COUNT)
 		cmd.awake_acs |= BIT(tid_to_ucode_ac[tid]);
 
-	/* If we're releasing frames from aggregation queues then check if the
-	 * all queues combined that we're releasing frames from have
+	/* If we're releasing frames from aggregation or dqa queues then check
+	 * if all the queues that we're releasing frames from, combined, have:
 	 *  - more frames than the service period, in which case more_data
 	 *    needs to be set
 	 *  - fewer than 'cnt' frames, in which case we need to adjust the
 	 *    firmware command (but do that unconditionally)
 	 */
-	if (agg) {
+	if (single_sta_queue) {
 		int remaining = cnt;
 		int sleep_tx_count;
 
@@ -3172,7 +3172,8 @@ void iwl_mvm_sta_modify_sleep_tx_count(struct iwl_mvm *mvm,
 			u16 n_queued;
 
 			tid_data = &mvmsta->tid_data[tid];
-			if (WARN(tid_data->state != IWL_AGG_ON &&
+			if (WARN(!iwl_mvm_is_dqa_supported(mvm) &&
+				 tid_data->state != IWL_AGG_ON &&
 				 tid_data->state != IWL_EMPTYING_HW_QUEUE_DELBA,
 				 "TID %d state is %d\n",
 				 tid, tid_data->state)) {
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h
index 4be34f902278c8..1927ce6077984f 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h
@@ -547,7 +547,7 @@ void iwl_mvm_sta_modify_sleep_tx_count(struct iwl_mvm *mvm,
 				       struct ieee80211_sta *sta,
 				       enum ieee80211_frame_release_type reason,
 				       u16 cnt, u16 tids, bool more_data,
-				       bool agg);
+				       bool single_sta_queue);
 int iwl_mvm_drain_sta(struct iwl_mvm *mvm, struct iwl_mvm_sta *mvmsta,
 		      bool drain);
 void iwl_mvm_sta_modify_disable_tx(struct iwl_mvm *mvm,
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
index dd2b4a30081993..3f37075f4cde3c 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
@@ -7,7 +7,7 @@
  *
  * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
- * Copyright(c) 2016        Intel Deutschland GmbH
+ * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -34,6 +34,7 @@
  *
  * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -628,8 +629,10 @@ int iwl_mvm_tx_skb_non_sta(struct iwl_mvm *mvm, struct sk_buff *skb)
 	 * values.
 	 * Note that we don't need to make sure it isn't agg'd, since we're
 	 * TXing non-sta
+	 * For DQA mode - we shouldn't increase it though
 	 */
-	atomic_inc(&mvm->pending_frames[sta_id]);
+	if (!iwl_mvm_is_dqa_supported(mvm))
+		atomic_inc(&mvm->pending_frames[sta_id]);
 
 	return 0;
 }
@@ -1005,11 +1008,8 @@ static int iwl_mvm_tx_mpdu(struct iwl_mvm *mvm, struct sk_buff *skb,
 
 	spin_unlock(&mvmsta->lock);
 
-	/* Increase pending frames count if this isn't AMPDU */
-	if ((iwl_mvm_is_dqa_supported(mvm) &&
-	     mvmsta->tid_data[tx_cmd->tid_tspec].state != IWL_AGG_ON &&
-	     mvmsta->tid_data[tx_cmd->tid_tspec].state != IWL_AGG_STARTING) ||
-	    (!iwl_mvm_is_dqa_supported(mvm) && !is_ampdu))
+	/* Increase pending frames count if this isn't AMPDU or DQA queue */
+	if (!iwl_mvm_is_dqa_supported(mvm) && !is_ampdu)
 		atomic_inc(&mvm->pending_frames[mvmsta->sta_id]);
 
 	return 0;
@@ -1079,12 +1079,13 @@ static void iwl_mvm_check_ratid_empty(struct iwl_mvm *mvm,
 	lockdep_assert_held(&mvmsta->lock);
 
 	if ((tid_data->state == IWL_AGG_ON ||
-	     tid_data->state == IWL_EMPTYING_HW_QUEUE_DELBA) &&
+	     tid_data->state == IWL_EMPTYING_HW_QUEUE_DELBA ||
+	     iwl_mvm_is_dqa_supported(mvm)) &&
 	    iwl_mvm_tid_queued(tid_data) == 0) {
 		/*
-		 * Now that this aggregation queue is empty tell mac80211 so it
-		 * knows we no longer have frames buffered for the station on
-		 * this TID (for the TIM bitmap calculation.)
+		 * Now that this aggregation or DQA queue is empty tell
+		 * mac80211 so it knows we no longer have frames buffered for
+		 * the station on this TID (for the TIM bitmap calculation.)
 		 */
 		ieee80211_sta_set_buffered(sta, tid, false);
 	}
@@ -1257,7 +1258,6 @@ static void iwl_mvm_rx_tx_cmd_single(struct iwl_mvm *mvm,
 	u8 skb_freed = 0;
 	u16 next_reclaimed, seq_ctl;
 	bool is_ndp = false;
-	bool txq_agg = false; /* Is this TXQ aggregated */
 
 	__skb_queue_head_init(&skbs);
 
@@ -1283,6 +1283,10 @@ static void iwl_mvm_rx_tx_cmd_single(struct iwl_mvm *mvm,
 			info->flags |= IEEE80211_TX_STAT_ACK;
 			break;
 		case TX_STATUS_FAIL_DEST_PS:
+			/* In DQA, the FW should have stopped the queue and not
+			 * return this status
+			 */
+			WARN_ON(iwl_mvm_is_dqa_supported(mvm));
 			info->flags |= IEEE80211_TX_STAT_TX_FILTERED;
 			break;
 		default:
@@ -1387,15 +1391,6 @@ static void iwl_mvm_rx_tx_cmd_single(struct iwl_mvm *mvm,
 			bool send_eosp_ndp = false;
 
 			spin_lock_bh(&mvmsta->lock);
-			if (iwl_mvm_is_dqa_supported(mvm)) {
-				enum iwl_mvm_agg_state state;
-
-				state = mvmsta->tid_data[tid].state;
-				txq_agg = (state == IWL_AGG_ON ||
-					state == IWL_EMPTYING_HW_QUEUE_DELBA);
-			} else {
-				txq_agg = txq_id >= mvm->first_agg_queue;
-			}
 
 			if (!is_ndp) {
 				tid_data->next_reclaimed = next_reclaimed;
@@ -1452,11 +1447,11 @@ static void iwl_mvm_rx_tx_cmd_single(struct iwl_mvm *mvm,
 	 * If the txq is not an AMPDU queue, there is no chance we freed
 	 * several skbs. Check that out...
 	 */
-	if (txq_agg)
+	if (iwl_mvm_is_dqa_supported(mvm) || txq_id >= mvm->first_agg_queue)
 		goto out;
 
 	/* We can't free more than one frame at once on a shared queue */
-	WARN_ON(!iwl_mvm_is_dqa_supported(mvm) && (skb_freed > 1));
+	WARN_ON(skb_freed > 1);
 
 	/* If we have still frames for this STA nothing to do here */
 	if (!atomic_sub_and_test(skb_freed, &mvm->pending_frames[sta_id]))

From b7048ea12fbb2724ee0cd30752d4fac43cab0651 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Wed, 15 Mar 2017 16:31:58 +0200
Subject: [PATCH 1330/1911] drm/i915: Do .init_clock_gating() earlier to avoid
 it clobbering watermarks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently ILK-BDW explicitly disable LP1+ watermarks from their
.init_clock_gating() hooks. Unfortunately that hook gets called way too
late since by that time we've already initialized all the watermark
state tracking which then gets out of sync with the hardware state.

We may eventually want to consider killing off the explicit LP1+
disable from .init_clock_gating(). In the meantime however, we can
avoid the problem by reordering the init sequence such that
intel_modeset_init_hw()->intel_init_clock_gating() gets called
prior to the hardware state takeover.

I suppose prior to the two stage watermark programming we were
magically saved by something that forced the watermarks to be
reprogrammed fully after .init_clock_gating() got called. But
now that no longer happens.

Note that the diff might look a bit odd as it kills off one
call of intel_update_cdclk(), but that's fine because
intel_modeset_init_hw() does the exact same thing. Previously
we just did it twice.

Actually even this new init sequence is pretty bogus as
.init_clock_gating() really should be called before any gem
hardware init since it can  configure various clock gating
workarounds and whatnot that affect the GT side as well. Also
intel_modeset_init() really should get split up into better
defined init stages. Another "fun" detail is that
intel_modeset_gem_init() is where RPS/RC6 gets configured.
Why that is done from the display code is beyond me. I've
decided to leave all this be for now, and just try to fix
the init sequence enough for watermarks to work.

Cc: stable@vger.kernel.org
Cc: Gabriele Mazzotta <gabriele.mzt@gmail.com>
Cc: David Purton <dcpurton@marshwiggle.net>
Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Reported-by: Gabriele Mazzotta <gabriele.mzt@gmail.com>
Reported-by: David Purton <dcpurton@marshwiggle.net>
Tested-by: Gabriele Mazzotta <gabriele.mzt@gmail.com>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96645
Fixes: ed4a6a7ca853 ("drm/i915: Add two-stage ILK-style watermark programming (v11)")
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170220140443.30891-1-ville.syrjala@linux.intel.com
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/20170315143158.31780-1-ville.syrjala@linux.intel.com
(cherry picked from commit 5be6e33400992d3450e1c8234a5af353e1560580)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/intel_display.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 3282b0f4b13412..ed1f4f272b4fb3 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -16696,12 +16696,11 @@ int intel_modeset_init(struct drm_device *dev)
 		}
 	}
 
-	intel_update_czclk(dev_priv);
-	intel_update_cdclk(dev_priv);
-	dev_priv->atomic_cdclk_freq = dev_priv->cdclk_freq;
-
 	intel_shared_dpll_init(dev);
 
+	intel_update_czclk(dev_priv);
+	intel_modeset_init_hw(dev);
+
 	if (dev_priv->max_cdclk_freq == 0)
 		intel_update_max_cdclk(dev_priv);
 
@@ -17258,8 +17257,6 @@ void intel_modeset_gem_init(struct drm_device *dev)
 
 	intel_init_gt_powersave(dev_priv);
 
-	intel_modeset_init_hw(dev);
-
 	intel_setup_overlay(dev_priv);
 }
 

From abda288bb207e5c681306299126af8c022709c18 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Sun, 19 Feb 2017 16:33:35 -0800
Subject: [PATCH 1331/1911] auxdisplay: img-ascii-lcd: add missing sentinel
 entry in img_ascii_lcd_matches

The OF device table must be terminated, otherwise we'll be walking past it
and into areas unknown.

Fixes: 0cad855fbd08 ("auxdisplay: img-ascii-lcd: driver for simple ASCII...")
Cc: stable@vger.kernel.org
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Tested-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/auxdisplay/img-ascii-lcd.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/auxdisplay/img-ascii-lcd.c b/drivers/auxdisplay/img-ascii-lcd.c
index bf43b5d2aafcaf..83f1439e57fd8c 100644
--- a/drivers/auxdisplay/img-ascii-lcd.c
+++ b/drivers/auxdisplay/img-ascii-lcd.c
@@ -218,6 +218,7 @@ static const struct of_device_id img_ascii_lcd_matches[] = {
 	{ .compatible = "img,boston-lcd", .data = &boston_config },
 	{ .compatible = "mti,malta-lcd", .data = &malta_config },
 	{ .compatible = "mti,sead3-lcd", .data = &sead3_config },
+	{ /* sentinel */ }
 };
 
 /**

From 4e841d3eb9294ce4137fdb5d0a88f1bceab9c212 Mon Sep 17 00:00:00 2001
From: Brian Norris <briannorris@chromium.org>
Date: Fri, 10 Mar 2017 17:39:21 -0800
Subject: [PATCH 1332/1911] mwifiex: pcie: don't leak DMA buffers when removing

When PCIe FLR support was added, much of the remove/release code for
PCIe was migrated to ->down_dev(), but ->down_dev() is never called for
device removal. Let's refactor the cleanup to be done in both cases.

Also, drop the comments above mwifiex_cleanup_pcie(), because they were
clearly wrong, and it's better to have clear and obvious code than to
detail the code steps in comments anyway.

Fixes: 4c5dae59d2e9 ("mwifiex: add PCIe function level reset support")
Cc: <stable@vger.kernel.org>
Signed-off-by: Brian Norris <briannorris@chromium.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/marvell/mwifiex/pcie.c | 38 ++++++++++-----------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/drivers/net/wireless/marvell/mwifiex/pcie.c b/drivers/net/wireless/marvell/mwifiex/pcie.c
index a0d918094889df..b8c990d10d6ecb 100644
--- a/drivers/net/wireless/marvell/mwifiex/pcie.c
+++ b/drivers/net/wireless/marvell/mwifiex/pcie.c
@@ -2739,6 +2739,21 @@ static void mwifiex_pcie_device_dump(struct mwifiex_adapter *adapter)
 	schedule_work(&card->work);
 }
 
+static void mwifiex_pcie_free_buffers(struct mwifiex_adapter *adapter)
+{
+	struct pcie_service_card *card = adapter->card;
+	const struct mwifiex_pcie_card_reg *reg = card->pcie.reg;
+
+	if (reg->sleep_cookie)
+		mwifiex_pcie_delete_sleep_cookie_buf(adapter);
+
+	mwifiex_pcie_delete_cmdrsp_buf(adapter);
+	mwifiex_pcie_delete_evtbd_ring(adapter);
+	mwifiex_pcie_delete_rxbd_ring(adapter);
+	mwifiex_pcie_delete_txbd_ring(adapter);
+	card->cmdrsp_buf = NULL;
+}
+
 /*
  * This function initializes the PCI-E host memory space, WCB rings, etc.
  *
@@ -2850,13 +2865,6 @@ static int mwifiex_init_pcie(struct mwifiex_adapter *adapter)
 
 /*
  * This function cleans up the allocated card buffers.
- *
- * The following are freed by this function -
- *      - TXBD ring buffers
- *      - RXBD ring buffers
- *      - Event BD ring buffers
- *      - Command response ring buffer
- *      - Sleep cookie buffer
  */
 static void mwifiex_cleanup_pcie(struct mwifiex_adapter *adapter)
 {
@@ -2875,6 +2883,8 @@ static void mwifiex_cleanup_pcie(struct mwifiex_adapter *adapter)
 				    "Failed to write driver not-ready signature\n");
 	}
 
+	mwifiex_pcie_free_buffers(adapter);
+
 	if (pdev) {
 		pci_iounmap(pdev, card->pci_mmap);
 		pci_iounmap(pdev, card->pci_mmap1);
@@ -3126,10 +3136,7 @@ static void mwifiex_pcie_up_dev(struct mwifiex_adapter *adapter)
 	pci_iounmap(pdev, card->pci_mmap1);
 }
 
-/* This function cleans up the PCI-E host memory space.
- * Some code is extracted from mwifiex_unregister_dev()
- *
- */
+/* This function cleans up the PCI-E host memory space. */
 static void mwifiex_pcie_down_dev(struct mwifiex_adapter *adapter)
 {
 	struct pcie_service_card *card = adapter->card;
@@ -3140,14 +3147,7 @@ static void mwifiex_pcie_down_dev(struct mwifiex_adapter *adapter)
 
 	adapter->seq_num = 0;
 
-	if (reg->sleep_cookie)
-		mwifiex_pcie_delete_sleep_cookie_buf(adapter);
-
-	mwifiex_pcie_delete_cmdrsp_buf(adapter);
-	mwifiex_pcie_delete_evtbd_ring(adapter);
-	mwifiex_pcie_delete_rxbd_ring(adapter);
-	mwifiex_pcie_delete_txbd_ring(adapter);
-	card->cmdrsp_buf = NULL;
+	mwifiex_pcie_free_buffers(adapter);
 }
 
 static struct mwifiex_if_ops pcie_ops = {

From ba1c7e45ec224cc8d2df33ecaee1946d48e79231 Mon Sep 17 00:00:00 2001
From: Brian Norris <briannorris@chromium.org>
Date: Fri, 10 Mar 2017 17:39:22 -0800
Subject: [PATCH 1333/1911] mwifiex: set adapter->dev before starting to use
 mwifiex_dbg()

The mwifiex_dbg() log handler utilizes the struct device in
adapter->dev. Without it, it decides not to print anything.

As of commit 2e02b5814217 ("mwifiex: Allow mwifiex early access to device
structure"), we started assigning that pointer only after we finished
mwifiex_register() -- this effectively neuters any mwifiex_dbg() logging
done before this point.

Let's move the device assignment into mwifiex_register().

Fixes: 2e02b5814217 ("mwifiex: Allow mwifiex early access to device structure")
Cc: Rajat Jain <rajatja@google.com>
Signed-off-by: Brian Norris <briannorris@chromium.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/marvell/mwifiex/main.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/marvell/mwifiex/main.c b/drivers/net/wireless/marvell/mwifiex/main.c
index 5ebca1d0cfc750..43d040e02e4d3a 100644
--- a/drivers/net/wireless/marvell/mwifiex/main.c
+++ b/drivers/net/wireless/marvell/mwifiex/main.c
@@ -57,8 +57,8 @@ MODULE_PARM_DESC(mfg_mode, "manufacturing mode enable:1, disable:0");
  * In case of any errors during inittialization, this function also ensures
  * proper cleanup before exiting.
  */
-static int mwifiex_register(void *card, struct mwifiex_if_ops *if_ops,
-			    void **padapter)
+static int mwifiex_register(void *card, struct device *dev,
+			    struct mwifiex_if_ops *if_ops, void **padapter)
 {
 	struct mwifiex_adapter *adapter;
 	int i;
@@ -68,6 +68,7 @@ static int mwifiex_register(void *card, struct mwifiex_if_ops *if_ops,
 		return -ENOMEM;
 
 	*padapter = adapter;
+	adapter->dev = dev;
 	adapter->card = card;
 
 	/* Save interface specific operations in adapter */
@@ -1568,12 +1569,11 @@ mwifiex_add_card(void *card, struct completion *fw_done,
 {
 	struct mwifiex_adapter *adapter;
 
-	if (mwifiex_register(card, if_ops, (void **)&adapter)) {
+	if (mwifiex_register(card, dev, if_ops, (void **)&adapter)) {
 		pr_err("%s: software init failed\n", __func__);
 		goto err_init_sw;
 	}
 
-	adapter->dev = dev;
 	mwifiex_probe_of(adapter);
 
 	adapter->iface_type = iface_type;

From 36908c4e5b1063eff3e11336fab544a76c625b69 Mon Sep 17 00:00:00 2001
From: Brian Norris <briannorris@chromium.org>
Date: Fri, 10 Mar 2017 17:39:23 -0800
Subject: [PATCH 1334/1911] mwifiex: uninit wakeup info when removing device

We manually init wakeup info, but we don't detach it on device removal.
This means that if we (for example) rmmod + modprobe the driver, the
device framework might return -EEXIST the second time, and we'll
complain in the logs:

[  839.311881] mwifiex_pcie 0000:01:00.0: fail to init wakeup for mwifiex

AFAICT, there's no other negative effect.

But we can fix this by disabling wakeup on remove, similar to what a few
other drivers do (e.g., the power supply framework).

This code (and bug) has existed on SDIO for a while, but it got moved
around and enabled for PCIe with commit 853402a00823 ("mwifiex: Enable
WoWLAN for both sdio and pcie").

Signed-off-by: Brian Norris <briannorris@chromium.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/marvell/mwifiex/main.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/wireless/marvell/mwifiex/main.c b/drivers/net/wireless/marvell/mwifiex/main.c
index 43d040e02e4d3a..b62e03d11c2e27 100644
--- a/drivers/net/wireless/marvell/mwifiex/main.c
+++ b/drivers/net/wireless/marvell/mwifiex/main.c
@@ -1718,6 +1718,9 @@ int mwifiex_remove_card(struct mwifiex_adapter *adapter)
 	wiphy_unregister(adapter->wiphy);
 	wiphy_free(adapter->wiphy);
 
+	if (adapter->irq_wakeup >= 0)
+		device_init_wakeup(adapter->dev, false);
+
 	/* Unregister device */
 	mwifiex_dbg(adapter, INFO,
 		    "info: unregister device\n");

From cf8c44d42c4f4f38468a53e9ce2a0314e7ebeaa1 Mon Sep 17 00:00:00 2001
From: Amitkumar Karwar <akarwar@marvell.com>
Date: Tue, 28 Feb 2017 18:54:31 +0530
Subject: [PATCH 1335/1911] MAINTAINERS: update for mwifiex driver maintainers

Ganapathi & Xinming are starting to take a more active role in the
mwifiex driver maintainership here onwards on account of organizational
changes.

CC: Xinming Hu <huxm@marvell.com>
CC: Ganapathi Bhat <gbhat@marvell.com>
Signed-off-by: Amitkumar Karwar <akarwar@marvell.com>
Signed-off-by: Nishant Sarmukadam <nishants@marvell.com>
Signed-off-by: Cathy Luo <cluo@marvell.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 MAINTAINERS | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 33875e07482b8a..078c38217daabf 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7846,6 +7846,8 @@ F:	drivers/net/ethernet/marvell/mvneta.*
 MARVELL MWIFIEX WIRELESS DRIVER
 M:	Amitkumar Karwar <akarwar@marvell.com>
 M:	Nishant Sarmukadam <nishants@marvell.com>
+M:	Ganapathi Bhat <gbhat@marvell.com>
+M:	Xinming Hu <huxm@marvell.com>
 L:	linux-wireless@vger.kernel.org
 S:	Maintained
 F:	drivers/net/wireless/marvell/mwifiex/

From 6bce725a78de1b171928ce66dec2bae4b569e5d1 Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Wed, 8 Mar 2017 14:30:34 +0100
Subject: [PATCH 1336/1911] x86/mpx: Make unnecessarily global function static

Make the function get_user_bd_entry() static as it is not used outside of
arch/x86/mm/mpx.c

This fixes a sparse warning.

Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/mm/mpx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c
index 5126dfd52b182d..cd44ae727df7f4 100644
--- a/arch/x86/mm/mpx.c
+++ b/arch/x86/mm/mpx.c
@@ -590,7 +590,7 @@ static unsigned long mpx_bd_entry_to_bt_addr(struct mm_struct *mm,
  * we might run off the end of the bounds table if we are on
  * a 64-bit kernel and try to get 8 bytes.
  */
-int get_user_bd_entry(struct mm_struct *mm, unsigned long *bd_entry_ret,
+static int get_user_bd_entry(struct mm_struct *mm, unsigned long *bd_entry_ret,
 		long __user *bd_entry_ptr)
 {
 	u32 bd_entry_32;

From dcc3b5ffe1b32771c9a22e2c916fb94c4fcf5b79 Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpeng.li@hotmail.com>
Date: Mon, 6 Mar 2017 21:51:28 -0800
Subject: [PATCH 1337/1911] sched/deadline: Add missing update_rq_clock() in
 dl_task_timer()

The following warning can be triggered by hot-unplugging the CPU
on which an active SCHED_DEADLINE task is running on:

 ------------[ cut here ]------------
 WARNING: CPU: 7 PID: 0 at kernel/sched/sched.h:833 replenish_dl_entity+0x71e/0xc40
 rq->clock_update_flags < RQCF_ACT_SKIP
 CPU: 7 PID: 0 Comm: swapper/7 Tainted: G    B           4.11.0-rc1+ #24
 Hardware name: LENOVO ThinkCentre M8500t-N000/SHARKBAY, BIOS FBKTC1AUS 02/16/2016
 Call Trace:
  <IRQ>
  dump_stack+0x85/0xc4
  __warn+0x172/0x1b0
  warn_slowpath_fmt+0xb4/0xf0
  ? __warn+0x1b0/0x1b0
  ? debug_check_no_locks_freed+0x2c0/0x2c0
  ? cpudl_set+0x3d/0x2b0
  replenish_dl_entity+0x71e/0xc40
  enqueue_task_dl+0x2ea/0x12e0
  ? dl_task_timer+0x777/0x990
  ? __hrtimer_run_queues+0x270/0xa50
  dl_task_timer+0x316/0x990
  ? enqueue_task_dl+0x12e0/0x12e0
  ? enqueue_task_dl+0x12e0/0x12e0
  __hrtimer_run_queues+0x270/0xa50
  ? hrtimer_cancel+0x20/0x20
  ? hrtimer_interrupt+0x119/0x600
  hrtimer_interrupt+0x19c/0x600
  ? trace_hardirqs_off+0xd/0x10
  local_apic_timer_interrupt+0x74/0xe0
  smp_apic_timer_interrupt+0x76/0xa0
  apic_timer_interrupt+0x93/0xa0

The DL task will be migrated to a suitable later deadline rq once the DL
timer fires and currnet rq is offline. The rq clock of the new rq should
be updated. This patch fixes it by updating the rq clock after holding
the new rq's rq lock.

Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Juri Lelli <juri.lelli@arm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1488865888-15894-1-git-send-email-wanpeng.li@hotmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/deadline.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 99b2c33a9fbcb4..c6db3fd727fec1 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -638,6 +638,7 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
 		lockdep_unpin_lock(&rq->lock, rf.cookie);
 		rq = dl_task_offline_migration(rq, p);
 		rf.cookie = lockdep_pin_lock(&rq->lock);
+		update_rq_clock(rq);
 
 		/*
 		 * Now that the task has been migrated to the new RQ and we

From 6e5f32f7a43f45ee55c401c0b9585eb01f9629a8 Mon Sep 17 00:00:00 2001
From: Matt Fleming <matt@codeblueprint.co.uk>
Date: Fri, 17 Feb 2017 12:07:30 +0000
Subject: [PATCH 1338/1911] sched/loadavg: Avoid loadavg spikes caused by
 delayed NO_HZ accounting

If we crossed a sample window while in NO_HZ we will add LOAD_FREQ to
the pending sample window time on exit, setting the next update not
one window into the future, but two.

This situation on exiting NO_HZ is described by:

  this_rq->calc_load_update < jiffies < calc_load_update

In this scenario, what we should be doing is:

  this_rq->calc_load_update = calc_load_update		     [ next window ]

But what we actually do is:

  this_rq->calc_load_update = calc_load_update + LOAD_FREQ   [ next+1 window ]

This has the effect of delaying load average updates for potentially
up to ~9seconds.

This can result in huge spikes in the load average values due to
per-cpu uninterruptible task counts being out of sync when accumulated
across all CPUs.

It's safe to update the per-cpu active count if we wake between sample
windows because any load that we left in 'calc_load_idle' will have
been zero'd when the idle load was folded in calc_global_load().

This issue is easy to reproduce before,

  commit 9d89c257dfb9 ("sched/fair: Rewrite runnable load and utilization average tracking")

just by forking short-lived process pipelines built from ps(1) and
grep(1) in a loop. I'm unable to reproduce the spikes after that
commit, but the bug still seems to be present from code review.

Signed-off-by: Matt Fleming <matt@codeblueprint.co.uk>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Mike Galbraith <umgwanakikbuti@gmail.com>
Cc: Morten Rasmussen <morten.rasmussen@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Fixes: commit 5167e8d ("sched/nohz: Rewrite and fix load-avg computation -- again")
Link: http://lkml.kernel.org/r/20170217120731.11868-2-matt@codeblueprint.co.uk
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/loadavg.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/sched/loadavg.c b/kernel/sched/loadavg.c
index 7296b7308ecaeb..3a55f3f9ffe4e5 100644
--- a/kernel/sched/loadavg.c
+++ b/kernel/sched/loadavg.c
@@ -202,8 +202,9 @@ void calc_load_exit_idle(void)
 	struct rq *this_rq = this_rq();
 
 	/*
-	 * If we're still before the sample window, we're done.
+	 * If we're still before the pending sample window, we're done.
 	 */
+	this_rq->calc_load_update = calc_load_update;
 	if (time_before(jiffies, this_rq->calc_load_update))
 		return;
 
@@ -212,7 +213,6 @@ void calc_load_exit_idle(void)
 	 * accounted through the nohz accounting, so skip the entire deal and
 	 * sync up for the next window.
 	 */
-	this_rq->calc_load_update = calc_load_update;
 	if (time_before(jiffies, this_rq->calc_load_update + 10))
 		this_rq->calc_load_update += LOAD_FREQ;
 }

From caeb5882979bc6f3c8766fcf59c6269b38f521bc Mon Sep 17 00:00:00 2001
From: Matt Fleming <matt@codeblueprint.co.uk>
Date: Fri, 17 Feb 2017 12:07:31 +0000
Subject: [PATCH 1339/1911] sched/loadavg: Use {READ,WRITE}_ONCE() for sample
 window

'calc_load_update' is accessed without any kind of locking and there's
a clear assumption in the code that only a single value is read or
written.

Make this explicit by using READ_ONCE() and WRITE_ONCE(), and avoid
unintentionally seeing multiple values, or having the load/stores
split.

Technically the loads in calc_global_*() don't require this since
those are the only functions that update 'calc_load_update', but I've
added the READ_ONCE() for consistency.

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Matt Fleming <matt@codeblueprint.co.uk>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Mike Galbraith <umgwanakikbuti@gmail.com>
Cc: Morten Rasmussen <morten.rasmussen@arm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Link: http://lkml.kernel.org/r/20170217120731.11868-3-matt@codeblueprint.co.uk
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/loadavg.c | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/kernel/sched/loadavg.c b/kernel/sched/loadavg.c
index 3a55f3f9ffe4e5..f15fb2bdbc0dee 100644
--- a/kernel/sched/loadavg.c
+++ b/kernel/sched/loadavg.c
@@ -169,7 +169,7 @@ static inline int calc_load_write_idx(void)
 	 * If the folding window started, make sure we start writing in the
 	 * next idle-delta.
 	 */
-	if (!time_before(jiffies, calc_load_update))
+	if (!time_before(jiffies, READ_ONCE(calc_load_update)))
 		idx++;
 
 	return idx & 1;
@@ -204,7 +204,7 @@ void calc_load_exit_idle(void)
 	/*
 	 * If we're still before the pending sample window, we're done.
 	 */
-	this_rq->calc_load_update = calc_load_update;
+	this_rq->calc_load_update = READ_ONCE(calc_load_update);
 	if (time_before(jiffies, this_rq->calc_load_update))
 		return;
 
@@ -308,13 +308,15 @@ calc_load_n(unsigned long load, unsigned long exp,
  */
 static void calc_global_nohz(void)
 {
+	unsigned long sample_window;
 	long delta, active, n;
 
-	if (!time_before(jiffies, calc_load_update + 10)) {
+	sample_window = READ_ONCE(calc_load_update);
+	if (!time_before(jiffies, sample_window + 10)) {
 		/*
 		 * Catch-up, fold however many we are behind still
 		 */
-		delta = jiffies - calc_load_update - 10;
+		delta = jiffies - sample_window - 10;
 		n = 1 + (delta / LOAD_FREQ);
 
 		active = atomic_long_read(&calc_load_tasks);
@@ -324,7 +326,7 @@ static void calc_global_nohz(void)
 		avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n);
 		avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n);
 
-		calc_load_update += n * LOAD_FREQ;
+		WRITE_ONCE(calc_load_update, sample_window + n * LOAD_FREQ);
 	}
 
 	/*
@@ -352,9 +354,11 @@ static inline void calc_global_nohz(void) { }
  */
 void calc_global_load(unsigned long ticks)
 {
+	unsigned long sample_window;
 	long active, delta;
 
-	if (time_before(jiffies, calc_load_update + 10))
+	sample_window = READ_ONCE(calc_load_update);
+	if (time_before(jiffies, sample_window + 10))
 		return;
 
 	/*
@@ -371,7 +375,7 @@ void calc_global_load(unsigned long ticks)
 	avenrun[1] = calc_load(avenrun[1], EXP_5, active);
 	avenrun[2] = calc_load(avenrun[2], EXP_15, active);
 
-	calc_load_update += LOAD_FREQ;
+	WRITE_ONCE(calc_load_update, sample_window + LOAD_FREQ);
 
 	/*
 	 * In case we idled for multiple LOAD_FREQ intervals, catch up in bulk.

From 17fcbd590d0c3e35bd9646e2215f86586378bc42 Mon Sep 17 00:00:00 2001
From: Niklas Cassel <niklas.cassel@axis.com>
Date: Sat, 25 Feb 2017 01:17:53 +0100
Subject: [PATCH 1340/1911] locking/rwsem: Fix down_write_killable() for
 CONFIG_RWSEM_GENERIC_SPINLOCK=y

We hang if SIGKILL has been sent, but the task is stuck in down_read()
(after do_exit()), even though no task is doing down_write() on the
rwsem in question:

  INFO: task libupnp:21868 blocked for more than 120 seconds.
  libupnp         D    0 21868      1 0x08100008
  ...
  Call Trace:
  __schedule()
  schedule()
  __down_read()
  do_exit()
  do_group_exit()
  __wake_up_parent()

This bug has already been fixed for CONFIG_RWSEM_XCHGADD_ALGORITHM=y in
the following commit:

 04cafed7fc19 ("locking/rwsem: Fix down_write_killable()")

... however, this bug also exists for CONFIG_RWSEM_GENERIC_SPINLOCK=y.

Signed-off-by: Niklas Cassel <niklas.cassel@axis.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: <mhocko@suse.com>
Cc: <stable@vger.kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Niklas Cassel <niklass@axis.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: d47996082f52 ("locking/rwsem: Introduce basis for down_write_killable()")
Link: http://lkml.kernel.org/r/1487981873-12649-1-git-send-email-niklass@axis.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/locking/rwsem-spinlock.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/kernel/locking/rwsem-spinlock.c b/kernel/locking/rwsem-spinlock.c
index 7bc24d477805d8..c65f7989f850d1 100644
--- a/kernel/locking/rwsem-spinlock.c
+++ b/kernel/locking/rwsem-spinlock.c
@@ -213,10 +213,9 @@ int __sched __down_write_common(struct rw_semaphore *sem, int state)
 		 */
 		if (sem->count == 0)
 			break;
-		if (signal_pending_state(state, current)) {
-			ret = -EINTR;
-			goto out;
-		}
+		if (signal_pending_state(state, current))
+			goto out_nolock;
+
 		set_current_state(state);
 		raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
 		schedule();
@@ -224,12 +223,19 @@ int __sched __down_write_common(struct rw_semaphore *sem, int state)
 	}
 	/* got the lock */
 	sem->count = -1;
-out:
 	list_del(&waiter.list);
 
 	raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
 
 	return ret;
+
+out_nolock:
+	list_del(&waiter.list);
+	if (!list_empty(&sem->wait_list))
+		__rwsem_do_wake(sem, 1);
+	raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
+
+	return -EINTR;
 }
 
 void __sched __down_write(struct rw_semaphore *sem)

From 03270c6ac6207fc55bbf9d20d195029dca210c79 Mon Sep 17 00:00:00 2001
From: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Date: Mon, 6 Mar 2017 23:23:42 +0000
Subject: [PATCH 1341/1911] parport: fix attempt to write duplicate procfiles

Usually every parallel port will have a single pardev registered with
it. But ppdev driver is an exception. This userspace parallel port
driver allows to create multiple parrallel port devices for a single
parallel port. And as a result we were having a nice warning like:
"sysctl table check failed:
/dev/parport/parport0/devices/ppdev0/timeslice Sysctl already exists"

Use the same logic as used in parport_register_device() and register
the proc files only once for each parallel port.

Fixes: 6fa45a226897 ("parport: add device-model to parport subsystem")
Cc: stable <stable@vger.kernel.org> # v4.4+
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1414656
Bugzilla: https://bugs.archlinux.org/task/52322
Tested-by: James Feeney <james@nurealm.net>
Signed-off-by: Sudip Mukherjee <sudip.mukherjee@codethink.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/parport/share.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/parport/share.c b/drivers/parport/share.c
index bc090daa850a4b..5dc53d420ca8ca 100644
--- a/drivers/parport/share.c
+++ b/drivers/parport/share.c
@@ -939,8 +939,10 @@ parport_register_dev_model(struct parport *port, const char *name,
 	 * pardevice fields. -arca
 	 */
 	port->ops->init_state(par_dev, par_dev->state);
-	port->proc_device = par_dev;
-	parport_device_proc_register(par_dev);
+	if (!test_and_set_bit(PARPORT_DEVPROC_REGISTERED, &port->devflags)) {
+		port->proc_device = par_dev;
+		parport_device_proc_register(par_dev);
+	}
 
 	return par_dev;
 

From 9a69645dde1188723d80745c1bc6ee9af2cbe2a7 Mon Sep 17 00:00:00 2001
From: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Date: Mon, 6 Mar 2017 23:23:43 +0000
Subject: [PATCH 1342/1911] ppdev: fix registering same device name

Usually every parallel port will have a single pardev registered with
it. But ppdev driver is an exception. This userspace parallel port
driver allows to create multiple parrallel port devices for a single
parallel port. And as a result we were having a big warning like:
"sysfs: cannot create duplicate filename '/devices/parport0/ppdev0.0'".
And with that many parallel port printers stopped working.

We have been using the minor number as the id field while registering
a parralel port device with a parralel port. But when there are
multiple parrallel port device for one single parallel port, they all
tried to register with the same name like 'pardev0.0' and everything
started failing.
Use an incremented index as the id instead of the minor number.

Fixes: 8b7d3a9d903e ("ppdev: use new parport device model")
Cc: stable <stable@vger.kernel.org> # v4.9+
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1414656
Bugzilla: https://bugs.archlinux.org/task/52322
Tested-by: James Feeney <james@nurealm.net>
Signed-off-by: Sudip Mukherjee <sudip.mukherjee@codethink.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/char/ppdev.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/char/ppdev.c b/drivers/char/ppdev.c
index 2a558c706581b2..3e73bcdf9e658d 100644
--- a/drivers/char/ppdev.c
+++ b/drivers/char/ppdev.c
@@ -84,11 +84,14 @@ struct pp_struct {
 	struct ieee1284_info state;
 	struct ieee1284_info saved_state;
 	long default_inactivity;
+	int index;
 };
 
 /* should we use PARDEVICE_MAX here? */
 static struct device *devices[PARPORT_MAX];
 
+static DEFINE_IDA(ida_index);
+
 /* pp_struct.flags bitfields */
 #define PP_CLAIMED    (1<<0)
 #define PP_EXCL       (1<<1)
@@ -290,7 +293,7 @@ static int register_device(int minor, struct pp_struct *pp)
 	struct pardevice *pdev = NULL;
 	char *name;
 	struct pardev_cb ppdev_cb;
-	int rc = 0;
+	int rc = 0, index;
 
 	name = kasprintf(GFP_KERNEL, CHRDEV "%x", minor);
 	if (name == NULL)
@@ -303,20 +306,23 @@ static int register_device(int minor, struct pp_struct *pp)
 		goto err;
 	}
 
+	index = ida_simple_get(&ida_index, 0, 0, GFP_KERNEL);
 	memset(&ppdev_cb, 0, sizeof(ppdev_cb));
 	ppdev_cb.irq_func = pp_irq;
 	ppdev_cb.flags = (pp->flags & PP_EXCL) ? PARPORT_FLAG_EXCL : 0;
 	ppdev_cb.private = pp;
-	pdev = parport_register_dev_model(port, name, &ppdev_cb, minor);
+	pdev = parport_register_dev_model(port, name, &ppdev_cb, index);
 	parport_put_port(port);
 
 	if (!pdev) {
 		pr_warn("%s: failed to register device!\n", name);
 		rc = -ENXIO;
+		ida_simple_remove(&ida_index, index);
 		goto err;
 	}
 
 	pp->pdev = pdev;
+	pp->index = index;
 	dev_dbg(&pdev->dev, "registered pardevice\n");
 err:
 	kfree(name);
@@ -755,6 +761,7 @@ static int pp_release(struct inode *inode, struct file *file)
 
 	if (pp->pdev) {
 		parport_unregister_device(pp->pdev);
+		ida_simple_remove(&ida_index, pp->index);
 		pp->pdev = NULL;
 		pr_debug(CHRDEV "%x: unregistered pardevice\n", minor);
 	}

From c3423563c68fc454b805b46cb69fd4816db933e2 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 3 Mar 2017 07:58:06 -0700
Subject: [PATCH 1343/1911] vmw_vmci: handle the return value from
 pci_alloc_irq_vectors correctly
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It returns the number of vectors allocated when successful, so check for
a negative error only.

Fixes: 3bb434cd ("vmw_vmci: switch to pci_irq_alloc_vectors")
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reported-by: Loïc Yhuel <loic.yhuel@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/vmw_vmci/vmci_guest.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/vmw_vmci/vmci_guest.c b/drivers/misc/vmw_vmci/vmci_guest.c
index 9d659542a335b4..dad5abee656ef5 100644
--- a/drivers/misc/vmw_vmci/vmci_guest.c
+++ b/drivers/misc/vmw_vmci/vmci_guest.c
@@ -566,10 +566,10 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
 	 */
 	error = pci_alloc_irq_vectors(pdev, VMCI_MAX_INTRS, VMCI_MAX_INTRS,
 			PCI_IRQ_MSIX);
-	if (error) {
+	if (error < 0) {
 		error = pci_alloc_irq_vectors(pdev, 1, 1,
 				PCI_IRQ_MSIX | PCI_IRQ_MSI | PCI_IRQ_LEGACY);
-		if (error)
+		if (error < 0)
 			goto err_remove_bitmap;
 	} else {
 		vmci_dev->exclusive_vectors = true;

From 5ac69d37784b237707a7b15d199cdb6c6fdb6780 Mon Sep 17 00:00:00 2001
From: Daniel Bristot de Oliveira <bristot@redhat.com>
Date: Thu, 2 Mar 2017 15:10:57 +0100
Subject: [PATCH 1344/1911] sched/deadline: Make sure the replenishment timer
 fires in the next period

Currently, the replenishment timer is set to fire at the deadline
of a task. Although that works for implicit deadline tasks because the
deadline is equals to the begin of the next period, that is not correct
for constrained deadline tasks (deadline < period).

For instance:

f.c:
 --------------- %< ---------------
int main (void)
{
	for(;;);
}
 --------------- >% ---------------

  # gcc -o f f.c

  # trace-cmd record -e sched:sched_switch                              \
				   -e syscalls:sys_exit_sched_setattr   \
   chrt -d --sched-runtime  490000000					\
           --sched-deadline 500000000					\
	   --sched-period  1000000000 0 ./f

  # trace-cmd report | grep "{pid of ./f}"

After setting parameters, the task is replenished and continue running
until being throttled:

         f-11295 [003] 13322.113776: sys_exit_sched_setattr: 0x0

The task is throttled after running 492318 ms, as expected:

         f-11295 [003] 13322.606094: sched_switch:   f:11295 [-1] R ==> watchdog/3:32 [0]

But then, the task is replenished 500719 ms after the first
replenishment:

    <idle>-0     [003] 13322.614495: sched_switch:   swapper/3:0 [120] R ==> f:11295 [-1]

Running for 490277 ms:

         f-11295 [003] 13323.104772: sched_switch:   f:11295 [-1] R ==>  swapper/3:0 [120]

Hence, in the first period, the task runs 2 * runtime, and that is a bug.

During the first replenishment, the next deadline is set one period away.
So the runtime / period starts to be respected. However, as the second
replenishment took place in the wrong instant, the next replenishment
will also be held in a wrong instant of time. Rather than occurring in
the nth period away from the first activation, it is taking place
in the (nth period - relative deadline).

Signed-off-by: Daniel Bristot de Oliveira <bristot@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Luca Abeni <luca.abeni@santannapisa.it>
Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Reviewed-by: Juri Lelli <juri.lelli@arm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Romulo Silva de Oliveira <romulo.deoliveira@ufsc.br>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tommaso Cucinotta <tommaso.cucinotta@sssup.it>
Link: http://lkml.kernel.org/r/ac50d89887c25285b47465638354b63362f8adff.1488392936.git.bristot@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/deadline.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index c6db3fd727fec1..445e2787bf808c 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -505,10 +505,15 @@ static void update_dl_entity(struct sched_dl_entity *dl_se,
 	}
 }
 
+static inline u64 dl_next_period(struct sched_dl_entity *dl_se)
+{
+	return dl_se->deadline - dl_se->dl_deadline + dl_se->dl_period;
+}
+
 /*
  * If the entity depleted all its runtime, and if we want it to sleep
  * while waiting for some new execution time to become available, we
- * set the bandwidth enforcement timer to the replenishment instant
+ * set the bandwidth replenishment timer to the replenishment instant
  * and try to activate it.
  *
  * Notice that it is important for the caller to know if the timer
@@ -530,7 +535,7 @@ static int start_dl_timer(struct task_struct *p)
 	 * that it is actually coming from rq->clock and not from
 	 * hrtimer's time base reading.
 	 */
-	act = ns_to_ktime(dl_se->deadline);
+	act = ns_to_ktime(dl_next_period(dl_se));
 	now = hrtimer_cb_get_time(timer);
 	delta = ktime_to_ns(now) - rq_clock(rq);
 	act = ktime_add_ns(act, delta);

From df8eac8cafce7d086be3bd5cf5a838fa37594dfb Mon Sep 17 00:00:00 2001
From: Daniel Bristot de Oliveira <bristot@redhat.com>
Date: Thu, 2 Mar 2017 15:10:58 +0100
Subject: [PATCH 1345/1911] sched/deadline: Throttle a constrained deadline
 task activated after the deadline

During the activation, CBS checks if it can reuse the current task's
runtime and period. If the deadline of the task is in the past, CBS
cannot use the runtime, and so it replenishes the task. This rule
works fine for implicit deadline tasks (deadline == period), and the
CBS was designed for implicit deadline tasks. However, a task with
constrained deadline (deadine < period) might be awakened after the
deadline, but before the next period. In this case, replenishing the
task would allow it to run for runtime / deadline. As in this case
deadline < period, CBS enables a task to run for more than the
runtime / period. In a very loaded system, this can cause a domino
effect, making other tasks miss their deadlines.

To avoid this problem, in the activation of a constrained deadline
task after the deadline but before the next period, throttle the
task and set the replenishing timer to the begin of the next period,
unless it is boosted.

Reproducer:

 --------------- %< ---------------
  int main (int argc, char **argv)
  {
	int ret;
	int flags = 0;
	unsigned long l = 0;
	struct timespec ts;
	struct sched_attr attr;

	memset(&attr, 0, sizeof(attr));
	attr.size = sizeof(attr);

	attr.sched_policy   = SCHED_DEADLINE;
	attr.sched_runtime  = 2 * 1000 * 1000;		/* 2 ms */
	attr.sched_deadline = 2 * 1000 * 1000;		/* 2 ms */
	attr.sched_period   = 2 * 1000 * 1000 * 1000;	/* 2 s */

	ts.tv_sec = 0;
	ts.tv_nsec = 2000 * 1000;			/* 2 ms */

	ret = sched_setattr(0, &attr, flags);

	if (ret < 0) {
		perror("sched_setattr");
		exit(-1);
	}

	for(;;) {
		/* XXX: you may need to adjust the loop */
		for (l = 0; l < 150000; l++);
		/*
		 * The ideia is to go to sleep right before the deadline
		 * and then wake up before the next period to receive
		 * a new replenishment.
		 */
		nanosleep(&ts, NULL);
	}

	exit(0);
  }
  --------------- >% ---------------

On my box, this reproducer uses almost 50% of the CPU time, which is
obviously wrong for a task with 2/2000 reservation.

Signed-off-by: Daniel Bristot de Oliveira <bristot@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Juri Lelli <juri.lelli@arm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Luca Abeni <luca.abeni@santannapisa.it>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Romulo Silva de Oliveira <romulo.deoliveira@ufsc.br>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tommaso Cucinotta <tommaso.cucinotta@sssup.it>
Link: http://lkml.kernel.org/r/edf58354e01db46bf42df8d2dd32418833f68c89.1488392936.git.bristot@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/deadline.c | 45 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 445e2787bf808c..736d8b9d9bab83 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -695,6 +695,37 @@ void init_dl_task_timer(struct sched_dl_entity *dl_se)
 	timer->function = dl_task_timer;
 }
 
+/*
+ * During the activation, CBS checks if it can reuse the current task's
+ * runtime and period. If the deadline of the task is in the past, CBS
+ * cannot use the runtime, and so it replenishes the task. This rule
+ * works fine for implicit deadline tasks (deadline == period), and the
+ * CBS was designed for implicit deadline tasks. However, a task with
+ * constrained deadline (deadine < period) might be awakened after the
+ * deadline, but before the next period. In this case, replenishing the
+ * task would allow it to run for runtime / deadline. As in this case
+ * deadline < period, CBS enables a task to run for more than the
+ * runtime / period. In a very loaded system, this can cause a domino
+ * effect, making other tasks miss their deadlines.
+ *
+ * To avoid this problem, in the activation of a constrained deadline
+ * task after the deadline but before the next period, throttle the
+ * task and set the replenishing timer to the begin of the next period,
+ * unless it is boosted.
+ */
+static inline void dl_check_constrained_dl(struct sched_dl_entity *dl_se)
+{
+	struct task_struct *p = dl_task_of(dl_se);
+	struct rq *rq = rq_of_dl_rq(dl_rq_of_se(dl_se));
+
+	if (dl_time_before(dl_se->deadline, rq_clock(rq)) &&
+	    dl_time_before(rq_clock(rq), dl_next_period(dl_se))) {
+		if (unlikely(dl_se->dl_boosted || !start_dl_timer(p)))
+			return;
+		dl_se->dl_throttled = 1;
+	}
+}
+
 static
 int dl_runtime_exceeded(struct sched_dl_entity *dl_se)
 {
@@ -928,6 +959,11 @@ static void dequeue_dl_entity(struct sched_dl_entity *dl_se)
 	__dequeue_dl_entity(dl_se);
 }
 
+static inline bool dl_is_constrained(struct sched_dl_entity *dl_se)
+{
+	return dl_se->dl_deadline < dl_se->dl_period;
+}
+
 static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
 {
 	struct task_struct *pi_task = rt_mutex_get_top_task(p);
@@ -953,6 +989,15 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
 		return;
 	}
 
+	/*
+	 * Check if a constrained deadline task was activated
+	 * after the deadline but before the next period.
+	 * If that is the case, the task will be throttled and
+	 * the replenishment timer will be set to the next period.
+	 */
+	if (!p->dl.dl_throttled && dl_is_constrained(&p->dl))
+		dl_check_constrained_dl(&p->dl);
+
 	/*
 	 * If p is throttled, we do nothing. In fact, if it exhausted
 	 * its budget it needs a replenishment and, since it now is on

From 2317d5f1c34913bac5971d93d69fb6c31bb74670 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Thu, 2 Mar 2017 15:10:59 +0100
Subject: [PATCH 1346/1911] sched/deadline: Use deadline instead of period when
 calculating overflow

I was testing Daniel's changes with his test case, and tweaked it a
little. Instead of having the runtime equal to the deadline, I
increased the deadline ten fold.

Daniel's test case had:

	attr.sched_runtime  = 2 * 1000 * 1000;		/* 2 ms */
	attr.sched_deadline = 2 * 1000 * 1000;		/* 2 ms */
	attr.sched_period   = 2 * 1000 * 1000 * 1000;	/* 2 s */

To make it more interesting, I changed it to:

	attr.sched_runtime  =  2 * 1000 * 1000;		/* 2 ms */
	attr.sched_deadline = 20 * 1000 * 1000;		/* 20 ms */
	attr.sched_period   =  2 * 1000 * 1000 * 1000;	/* 2 s */

The results were rather surprising. The behavior that Daniel's patch
was fixing came back. The task started using much more than .1% of the
CPU. More like 20%.

Looking into this I found that it was due to the dl_entity_overflow()
constantly returning true. That's because it uses the relative period
against relative runtime vs the absolute deadline against absolute
runtime.

  runtime / (deadline - t) > dl_runtime / dl_period

There's even a comment mentioning this, and saying that when relative
deadline equals relative period, that the equation is the same as using
deadline instead of period. That comment is backwards! What we really
want is:

  runtime / (deadline - t) > dl_runtime / dl_deadline

We care about if the runtime can make its deadline, not its period. And
then we can say "when the deadline equals the period, the equation is
the same as using dl_period instead of dl_deadline".

After correcting this, now when the task gets enqueued, it can throttle
correctly, and Daniel's fix to the throttling of sleeping deadline
tasks works even when the runtime and deadline are not the same.

Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Daniel Bristot de Oliveira <bristot@redhat.com>
Cc: Juri Lelli <juri.lelli@arm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Luca Abeni <luca.abeni@santannapisa.it>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Romulo Silva de Oliveira <romulo.deoliveira@ufsc.br>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tommaso Cucinotta <tommaso.cucinotta@sssup.it>
Link: http://lkml.kernel.org/r/02135a27f1ae3fe5fd032568a5a2f370e190e8d7.1488392936.git.bristot@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/deadline.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 736d8b9d9bab83..a2ce59015642c3 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -445,13 +445,13 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se,
  *
  * This function returns true if:
  *
- *   runtime / (deadline - t) > dl_runtime / dl_period ,
+ *   runtime / (deadline - t) > dl_runtime / dl_deadline ,
  *
  * IOW we can't recycle current parameters.
  *
- * Notice that the bandwidth check is done against the period. For
+ * Notice that the bandwidth check is done against the deadline. For
  * task with deadline equal to period this is the same of using
- * dl_deadline instead of dl_period in the equation above.
+ * dl_period instead of dl_deadline in the equation above.
  */
 static bool dl_entity_overflow(struct sched_dl_entity *dl_se,
 			       struct sched_dl_entity *pi_se, u64 t)
@@ -476,7 +476,7 @@ static bool dl_entity_overflow(struct sched_dl_entity *dl_se,
 	 * of anything below microseconds resolution is actually fiction
 	 * (but still we want to give the user that illusion >;).
 	 */
-	left = (pi_se->dl_period >> DL_SCALE) * (dl_se->runtime >> DL_SCALE);
+	left = (pi_se->dl_deadline >> DL_SCALE) * (dl_se->runtime >> DL_SCALE);
 	right = ((dl_se->deadline - t) >> DL_SCALE) *
 		(pi_se->dl_runtime >> DL_SCALE);
 

From ea90e0dc8cecba6359b481e24d9c37160f6f524f Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 15 Mar 2017 14:26:04 +0100
Subject: [PATCH 1347/1911] nl80211: fix dumpit error path RTNL deadlocks

Sowmini pointed out Dmitry's RTNL deadlock report to me, and it turns out
to be perfectly accurate - there are various error paths that miss unlock
of the RTNL.

To fix those, change the locking a bit to not be conditional in all those
nl80211_prepare_*_dump() functions, but make those require the RTNL to
start with, and fix the buggy error paths. This also let me use sparse
(by appropriately overriding the rtnl_lock/rtnl_unlock functions) to
validate the changes.

Cc: stable@vger.kernel.org
Reported-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/nl80211.c | 127 ++++++++++++++++++-----------------------
 1 file changed, 56 insertions(+), 71 deletions(-)

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index d7f8be4e321a32..2312dc2ffdb98b 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -545,22 +545,18 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb,
 {
 	int err;
 
-	rtnl_lock();
-
 	if (!cb->args[0]) {
 		err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize,
 				  genl_family_attrbuf(&nl80211_fam),
 				  nl80211_fam.maxattr, nl80211_policy);
 		if (err)
-			goto out_unlock;
+			return err;
 
 		*wdev = __cfg80211_wdev_from_attrs(
 					sock_net(skb->sk),
 					genl_family_attrbuf(&nl80211_fam));
-		if (IS_ERR(*wdev)) {
-			err = PTR_ERR(*wdev);
-			goto out_unlock;
-		}
+		if (IS_ERR(*wdev))
+			return PTR_ERR(*wdev);
 		*rdev = wiphy_to_rdev((*wdev)->wiphy);
 		/* 0 is the first index - add 1 to parse only once */
 		cb->args[0] = (*rdev)->wiphy_idx + 1;
@@ -570,10 +566,8 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb,
 		struct wiphy *wiphy = wiphy_idx_to_wiphy(cb->args[0] - 1);
 		struct wireless_dev *tmp;
 
-		if (!wiphy) {
-			err = -ENODEV;
-			goto out_unlock;
-		}
+		if (!wiphy)
+			return -ENODEV;
 		*rdev = wiphy_to_rdev(wiphy);
 		*wdev = NULL;
 
@@ -584,21 +578,11 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb,
 			}
 		}
 
-		if (!*wdev) {
-			err = -ENODEV;
-			goto out_unlock;
-		}
+		if (!*wdev)
+			return -ENODEV;
 	}
 
 	return 0;
- out_unlock:
-	rtnl_unlock();
-	return err;
-}
-
-static void nl80211_finish_wdev_dump(struct cfg80211_registered_device *rdev)
-{
-	rtnl_unlock();
 }
 
 /* IE validation */
@@ -2608,17 +2592,17 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback *
 	int filter_wiphy = -1;
 	struct cfg80211_registered_device *rdev;
 	struct wireless_dev *wdev;
+	int ret;
 
 	rtnl_lock();
 	if (!cb->args[2]) {
 		struct nl80211_dump_wiphy_state state = {
 			.filter_wiphy = -1,
 		};
-		int ret;
 
 		ret = nl80211_dump_wiphy_parse(skb, cb, &state);
 		if (ret)
-			return ret;
+			goto out_unlock;
 
 		filter_wiphy = state.filter_wiphy;
 
@@ -2663,12 +2647,14 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback *
 		wp_idx++;
 	}
  out:
-	rtnl_unlock();
-
 	cb->args[0] = wp_idx;
 	cb->args[1] = if_idx;
 
-	return skb->len;
+	ret = skb->len;
+ out_unlock:
+	rtnl_unlock();
+
+	return ret;
 }
 
 static int nl80211_get_interface(struct sk_buff *skb, struct genl_info *info)
@@ -4452,9 +4438,10 @@ static int nl80211_dump_station(struct sk_buff *skb,
 	int sta_idx = cb->args[2];
 	int err;
 
+	rtnl_lock();
 	err = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev);
 	if (err)
-		return err;
+		goto out_err;
 
 	if (!wdev->netdev) {
 		err = -EINVAL;
@@ -4489,7 +4476,7 @@ static int nl80211_dump_station(struct sk_buff *skb,
 	cb->args[2] = sta_idx;
 	err = skb->len;
  out_err:
-	nl80211_finish_wdev_dump(rdev);
+	rtnl_unlock();
 
 	return err;
 }
@@ -5275,9 +5262,10 @@ static int nl80211_dump_mpath(struct sk_buff *skb,
 	int path_idx = cb->args[2];
 	int err;
 
+	rtnl_lock();
 	err = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev);
 	if (err)
-		return err;
+		goto out_err;
 
 	if (!rdev->ops->dump_mpath) {
 		err = -EOPNOTSUPP;
@@ -5310,7 +5298,7 @@ static int nl80211_dump_mpath(struct sk_buff *skb,
 	cb->args[2] = path_idx;
 	err = skb->len;
  out_err:
-	nl80211_finish_wdev_dump(rdev);
+	rtnl_unlock();
 	return err;
 }
 
@@ -5470,9 +5458,10 @@ static int nl80211_dump_mpp(struct sk_buff *skb,
 	int path_idx = cb->args[2];
 	int err;
 
+	rtnl_lock();
 	err = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev);
 	if (err)
-		return err;
+		goto out_err;
 
 	if (!rdev->ops->dump_mpp) {
 		err = -EOPNOTSUPP;
@@ -5505,7 +5494,7 @@ static int nl80211_dump_mpp(struct sk_buff *skb,
 	cb->args[2] = path_idx;
 	err = skb->len;
  out_err:
-	nl80211_finish_wdev_dump(rdev);
+	rtnl_unlock();
 	return err;
 }
 
@@ -7674,9 +7663,12 @@ static int nl80211_dump_scan(struct sk_buff *skb, struct netlink_callback *cb)
 	int start = cb->args[2], idx = 0;
 	int err;
 
+	rtnl_lock();
 	err = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev);
-	if (err)
+	if (err) {
+		rtnl_unlock();
 		return err;
+	}
 
 	wdev_lock(wdev);
 	spin_lock_bh(&rdev->bss_lock);
@@ -7699,7 +7691,7 @@ static int nl80211_dump_scan(struct sk_buff *skb, struct netlink_callback *cb)
 	wdev_unlock(wdev);
 
 	cb->args[2] = idx;
-	nl80211_finish_wdev_dump(rdev);
+	rtnl_unlock();
 
 	return skb->len;
 }
@@ -7784,9 +7776,10 @@ static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb)
 	int res;
 	bool radio_stats;
 
+	rtnl_lock();
 	res = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev);
 	if (res)
-		return res;
+		goto out_err;
 
 	/* prepare_wdev_dump parsed the attributes */
 	radio_stats = attrbuf[NL80211_ATTR_SURVEY_RADIO_STATS];
@@ -7827,7 +7820,7 @@ static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb)
 	cb->args[2] = survey_idx;
 	res = skb->len;
  out_err:
-	nl80211_finish_wdev_dump(rdev);
+	rtnl_unlock();
 	return res;
 }
 
@@ -11508,17 +11501,13 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb,
 	void *data = NULL;
 	unsigned int data_len = 0;
 
-	rtnl_lock();
-
 	if (cb->args[0]) {
 		/* subtract the 1 again here */
 		struct wiphy *wiphy = wiphy_idx_to_wiphy(cb->args[0] - 1);
 		struct wireless_dev *tmp;
 
-		if (!wiphy) {
-			err = -ENODEV;
-			goto out_unlock;
-		}
+		if (!wiphy)
+			return -ENODEV;
 		*rdev = wiphy_to_rdev(wiphy);
 		*wdev = NULL;
 
@@ -11538,23 +11527,19 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb,
 	err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize,
 			  attrbuf, nl80211_fam.maxattr, nl80211_policy);
 	if (err)
-		goto out_unlock;
+		return err;
 
 	if (!attrbuf[NL80211_ATTR_VENDOR_ID] ||
-	    !attrbuf[NL80211_ATTR_VENDOR_SUBCMD]) {
-		err = -EINVAL;
-		goto out_unlock;
-	}
+	    !attrbuf[NL80211_ATTR_VENDOR_SUBCMD])
+		return -EINVAL;
 
 	*wdev = __cfg80211_wdev_from_attrs(sock_net(skb->sk), attrbuf);
 	if (IS_ERR(*wdev))
 		*wdev = NULL;
 
 	*rdev = __cfg80211_rdev_from_attrs(sock_net(skb->sk), attrbuf);
-	if (IS_ERR(*rdev)) {
-		err = PTR_ERR(*rdev);
-		goto out_unlock;
-	}
+	if (IS_ERR(*rdev))
+		return PTR_ERR(*rdev);
 
 	vid = nla_get_u32(attrbuf[NL80211_ATTR_VENDOR_ID]);
 	subcmd = nla_get_u32(attrbuf[NL80211_ATTR_VENDOR_SUBCMD]);
@@ -11567,19 +11552,15 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb,
 		if (vcmd->info.vendor_id != vid || vcmd->info.subcmd != subcmd)
 			continue;
 
-		if (!vcmd->dumpit) {
-			err = -EOPNOTSUPP;
-			goto out_unlock;
-		}
+		if (!vcmd->dumpit)
+			return -EOPNOTSUPP;
 
 		vcmd_idx = i;
 		break;
 	}
 
-	if (vcmd_idx < 0) {
-		err = -EOPNOTSUPP;
-		goto out_unlock;
-	}
+	if (vcmd_idx < 0)
+		return -EOPNOTSUPP;
 
 	if (attrbuf[NL80211_ATTR_VENDOR_DATA]) {
 		data = nla_data(attrbuf[NL80211_ATTR_VENDOR_DATA]);
@@ -11596,9 +11577,6 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb,
 
 	/* keep rtnl locked in successful case */
 	return 0;
- out_unlock:
-	rtnl_unlock();
-	return err;
 }
 
 static int nl80211_vendor_cmd_dump(struct sk_buff *skb,
@@ -11613,9 +11591,10 @@ static int nl80211_vendor_cmd_dump(struct sk_buff *skb,
 	int err;
 	struct nlattr *vendor_data;
 
+	rtnl_lock();
 	err = nl80211_prepare_vendor_dump(skb, cb, &rdev, &wdev);
 	if (err)
-		return err;
+		goto out;
 
 	vcmd_idx = cb->args[2];
 	data = (void *)cb->args[3];
@@ -11624,15 +11603,21 @@ static int nl80211_vendor_cmd_dump(struct sk_buff *skb,
 
 	if (vcmd->flags & (WIPHY_VENDOR_CMD_NEED_WDEV |
 			   WIPHY_VENDOR_CMD_NEED_NETDEV)) {
-		if (!wdev)
-			return -EINVAL;
+		if (!wdev) {
+			err = -EINVAL;
+			goto out;
+		}
 		if (vcmd->flags & WIPHY_VENDOR_CMD_NEED_NETDEV &&
-		    !wdev->netdev)
-			return -EINVAL;
+		    !wdev->netdev) {
+			err = -EINVAL;
+			goto out;
+		}
 
 		if (vcmd->flags & WIPHY_VENDOR_CMD_NEED_RUNNING) {
-			if (!wdev_running(wdev))
-				return -ENETDOWN;
+			if (!wdev_running(wdev)) {
+				err = -ENETDOWN;
+				goto out;
+			}
 		}
 	}
 

From 7c468447f40645fbf2a033dfdaa92b1957130d50 Mon Sep 17 00:00:00 2001
From: Gary R Hook <ghook@amd.com>
Date: Fri, 10 Mar 2017 12:28:18 -0600
Subject: [PATCH 1348/1911] crypto: ccp - Assign DMA commands to the channel's
 CCP

The CCP driver generally uses a round-robin approach when
assigning operations to available CCPs. For the DMA engine,
however, the DMA mappings of the SGs are associated with a
specific CCP. When an IOMMU is enabled, the IOMMU is
programmed based on this specific device.

If the DMA operations are not performed by that specific
CCP then addressing errors and I/O page faults will occur.

Update the CCP driver to allow a specific CCP device to be
requested for an operation and use this in the DMA engine
support.

Cc: <stable@vger.kernel.org> # 4.9.x-
Signed-off-by: Gary R Hook <gary.hook@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/ccp/ccp-dev.c       | 5 ++++-
 drivers/crypto/ccp/ccp-dmaengine.c | 1 +
 include/linux/ccp.h                | 2 +-
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/ccp/ccp-dev.c b/drivers/crypto/ccp/ccp-dev.c
index 511ab042b5e793..92d1c6959f08b8 100644
--- a/drivers/crypto/ccp/ccp-dev.c
+++ b/drivers/crypto/ccp/ccp-dev.c
@@ -283,11 +283,14 @@ EXPORT_SYMBOL_GPL(ccp_version);
  */
 int ccp_enqueue_cmd(struct ccp_cmd *cmd)
 {
-	struct ccp_device *ccp = ccp_get_device();
+	struct ccp_device *ccp;
 	unsigned long flags;
 	unsigned int i;
 	int ret;
 
+	/* Some commands might need to be sent to a specific device */
+	ccp = cmd->ccp ? cmd->ccp : ccp_get_device();
+
 	if (!ccp)
 		return -ENODEV;
 
diff --git a/drivers/crypto/ccp/ccp-dmaengine.c b/drivers/crypto/ccp/ccp-dmaengine.c
index e5d9278f401974..8d0eeb46d4a274 100644
--- a/drivers/crypto/ccp/ccp-dmaengine.c
+++ b/drivers/crypto/ccp/ccp-dmaengine.c
@@ -390,6 +390,7 @@ static struct ccp_dma_desc *ccp_create_desc(struct dma_chan *dma_chan,
 			goto err;
 
 		ccp_cmd = &cmd->ccp_cmd;
+		ccp_cmd->ccp = chan->ccp;
 		ccp_pt = &ccp_cmd->u.passthru_nomap;
 		ccp_cmd->flags = CCP_CMD_MAY_BACKLOG;
 		ccp_cmd->flags |= CCP_CMD_PASSTHRU_NO_DMA_MAP;
diff --git a/include/linux/ccp.h b/include/linux/ccp.h
index c71dd8fa57640e..c41b8d99dd0e7f 100644
--- a/include/linux/ccp.h
+++ b/include/linux/ccp.h
@@ -556,7 +556,7 @@ enum ccp_engine {
  * struct ccp_cmd - CCP operation request
  * @entry: list element (ccp driver use only)
  * @work: work element used for callbacks (ccp driver use only)
- * @ccp: CCP device to be run on (ccp driver use only)
+ * @ccp: CCP device to be run on
  * @ret: operation return code (ccp driver use only)
  * @flags: cmd processing flags
  * @engine: CCP operation to perform

From 69db7009318758769d625b023402161c750f7876 Mon Sep 17 00:00:00 2001
From: Prarit Bhargava <prarit@redhat.com>
Date: Tue, 14 Mar 2017 07:36:01 -0400
Subject: [PATCH 1349/1911] hwrng: amd - Revert managed API changes

After commit 31b2a73c9c5f ("hwrng: amd - Migrate to managed API"), the
amd-rng driver uses devres with pci_dev->dev to keep track of resources,
but does not actually register a PCI driver.  This results in the
following issues:

1. The message

WARNING: CPU: 2 PID: 621 at drivers/base/dd.c:349 driver_probe_device+0x38c

is output when the i2c_amd756 driver loads and attempts to register a PCI
driver.  The PCI & device subsystems assume that no resources have been
registered for the device, and the WARN_ON() triggers since amd-rng has
already do so.

2.  The driver leaks memory because the driver does not attach to a
device.  The driver only uses the PCI device as a reference.   devm_*()
functions will release resources on driver detach, which the amd-rng
driver will never do.  As a result,

3.  The driver cannot be reloaded because there is always a use of the
ioport and region after the first load of the driver.

Revert the changes made by 31b2a73c9c5f ("hwrng: amd - Migrate to managed
API").

Cc: <stable@vger.kernel.org>
Signed-off-by: Prarit Bhargava <prarit@redhat.com>
Fixes: 31b2a73c9c5f ("hwrng: amd - Migrate to managed API").
Cc: Matt Mackall <mpm@selenic.com>
Cc: Corentin LABBE <clabbe.montjoie@gmail.com>
Cc: PrasannaKumar Muralidharan <prasannatsmkumar@gmail.com>
Cc: Wei Yongjun <weiyongjun1@huawei.com>
Cc: linux-crypto@vger.kernel.org
Cc: linux-geode@lists.infradead.org
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/amd-rng.c | 42 ++++++++++++++++++++++++++------
 1 file changed, 34 insertions(+), 8 deletions(-)

diff --git a/drivers/char/hw_random/amd-rng.c b/drivers/char/hw_random/amd-rng.c
index 4a99ac756f0815..9959c762da2f8e 100644
--- a/drivers/char/hw_random/amd-rng.c
+++ b/drivers/char/hw_random/amd-rng.c
@@ -55,6 +55,7 @@ MODULE_DEVICE_TABLE(pci, pci_tbl);
 struct amd768_priv {
 	void __iomem *iobase;
 	struct pci_dev *pcidev;
+	u32 pmbase;
 };
 
 static int amd_rng_read(struct hwrng *rng, void *buf, size_t max, bool wait)
@@ -148,33 +149,58 @@ static int __init mod_init(void)
 	if (pmbase == 0)
 		return -EIO;
 
-	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
 	if (!priv)
 		return -ENOMEM;
 
-	if (!devm_request_region(&pdev->dev, pmbase + PMBASE_OFFSET,
-				PMBASE_SIZE, DRV_NAME)) {
+	if (!request_region(pmbase + PMBASE_OFFSET, PMBASE_SIZE, DRV_NAME)) {
 		dev_err(&pdev->dev, DRV_NAME " region 0x%x already in use!\n",
 			pmbase + 0xF0);
-		return -EBUSY;
+		err = -EBUSY;
+		goto out;
 	}
 
-	priv->iobase = devm_ioport_map(&pdev->dev, pmbase + PMBASE_OFFSET,
-			PMBASE_SIZE);
+	priv->iobase = ioport_map(pmbase + PMBASE_OFFSET, PMBASE_SIZE);
 	if (!priv->iobase) {
 		pr_err(DRV_NAME "Cannot map ioport\n");
-		return -ENOMEM;
+		err = -EINVAL;
+		goto err_iomap;
 	}
 
 	amd_rng.priv = (unsigned long)priv;
+	priv->pmbase = pmbase;
 	priv->pcidev = pdev;
 
 	pr_info(DRV_NAME " detected\n");
-	return devm_hwrng_register(&pdev->dev, &amd_rng);
+	err = hwrng_register(&amd_rng);
+	if (err) {
+		pr_err(DRV_NAME " registering failed (%d)\n", err);
+		goto err_hwrng;
+	}
+	return 0;
+
+err_hwrng:
+	ioport_unmap(priv->iobase);
+err_iomap:
+	release_region(pmbase + PMBASE_OFFSET, PMBASE_SIZE);
+out:
+	kfree(priv);
+	return err;
 }
 
 static void __exit mod_exit(void)
 {
+	struct amd768_priv *priv;
+
+	priv = (struct amd768_priv *)amd_rng.priv;
+
+	hwrng_unregister(&amd_rng);
+
+	ioport_unmap(priv->iobase);
+
+	release_region(priv->pmbase + PMBASE_OFFSET, PMBASE_SIZE);
+
+	kfree(priv);
 }
 
 module_init(mod_init);

From 8c75704ebcac2ffa31ee7bcc359baf701b52bf00 Mon Sep 17 00:00:00 2001
From: Prarit Bhargava <prarit@redhat.com>
Date: Tue, 14 Mar 2017 07:36:02 -0400
Subject: [PATCH 1350/1911] hwrng: geode - Revert managed API changes

After commit e9afc746299d ("hwrng: geode - Use linux/io.h instead of
asm/io.h") the geode-rng driver uses devres with pci_dev->dev to keep
track of resources, but does not actually register a PCI driver.  This
results in the following issues:

1.  The driver leaks memory because the driver does not attach to a
device.  The driver only uses the PCI device as a reference.   devm_*()
functions will release resources on driver detach, which the geode-rng
driver will never do.  As a result,

2.  The driver cannot be reloaded because there is always a use of the
ioport and region after the first load of the driver.

Revert the changes made by  e9afc746299d ("hwrng: geode - Use linux/io.h
instead of asm/io.h").

Cc: <stable@vger.kernel.org>
Signed-off-by: Prarit Bhargava <prarit@redhat.com>
Fixes: 6e9b5e76882c ("hwrng: geode - Migrate to managed API")
Cc: Matt Mackall <mpm@selenic.com>
Cc: Corentin LABBE <clabbe.montjoie@gmail.com>
Cc: PrasannaKumar Muralidharan <prasannatsmkumar@gmail.com>
Cc: Wei Yongjun <weiyongjun1@huawei.com>
Cc: linux-crypto@vger.kernel.org
Cc: linux-geode@lists.infradead.org
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/geode-rng.c | 50 +++++++++++++++++++++---------
 1 file changed, 35 insertions(+), 15 deletions(-)

diff --git a/drivers/char/hw_random/geode-rng.c b/drivers/char/hw_random/geode-rng.c
index e7a2459420291b..e1d421a36a138d 100644
--- a/drivers/char/hw_random/geode-rng.c
+++ b/drivers/char/hw_random/geode-rng.c
@@ -31,6 +31,9 @@
 #include <linux/module.h>
 #include <linux/pci.h>
 
+
+#define PFX	KBUILD_MODNAME ": "
+
 #define GEODE_RNG_DATA_REG   0x50
 #define GEODE_RNG_STATUS_REG 0x54
 
@@ -82,6 +85,7 @@ static struct hwrng geode_rng = {
 
 static int __init mod_init(void)
 {
+	int err = -ENODEV;
 	struct pci_dev *pdev = NULL;
 	const struct pci_device_id *ent;
 	void __iomem *mem;
@@ -89,27 +93,43 @@ static int __init mod_init(void)
 
 	for_each_pci_dev(pdev) {
 		ent = pci_match_id(pci_tbl, pdev);
-		if (ent) {
-			rng_base = pci_resource_start(pdev, 0);
-			if (rng_base == 0)
-				return -ENODEV;
-
-			mem = devm_ioremap(&pdev->dev, rng_base, 0x58);
-			if (!mem)
-				return -ENOMEM;
-			geode_rng.priv = (unsigned long)mem;
-
-			pr_info("AMD Geode RNG detected\n");
-			return devm_hwrng_register(&pdev->dev, &geode_rng);
-		}
+		if (ent)
+			goto found;
 	}
-
 	/* Device not found. */
-	return -ENODEV;
+	goto out;
+
+found:
+	rng_base = pci_resource_start(pdev, 0);
+	if (rng_base == 0)
+		goto out;
+	err = -ENOMEM;
+	mem = ioremap(rng_base, 0x58);
+	if (!mem)
+		goto out;
+	geode_rng.priv = (unsigned long)mem;
+
+	pr_info("AMD Geode RNG detected\n");
+	err = hwrng_register(&geode_rng);
+	if (err) {
+		pr_err(PFX "RNG registering failed (%d)\n",
+		       err);
+		goto err_unmap;
+	}
+out:
+	return err;
+
+err_unmap:
+	iounmap(mem);
+	goto out;
 }
 
 static void __exit mod_exit(void)
 {
+	void __iomem *mem = (void __iomem *)geode_rng.priv;
+
+	hwrng_unregister(&geode_rng);
+	iounmap(mem);
 }
 
 module_init(mod_init);

From f717629c7f834ab2efa05c7dbf0826f1d7c32ade Mon Sep 17 00:00:00 2001
From: Chandan Rajendra <chandan@linux.vnet.ibm.com>
Date: Thu, 16 Mar 2017 14:37:11 +0530
Subject: [PATCH 1351/1911] powerpc: Wire up statx() syscall

Test runs on a ppc64 BE guest succeeded. linux/samples/statx/test-statx
program was executed on the following file types,

1. Regular file
2. Directory
3. device file
4. symlink
5. Named pipe

The test run also included invoking test-statx with the runtime options
provided in the main() function of test-statx.c

Signed-off-by: Chandan Rajendra <chandan@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/systbl.h      | 1 +
 arch/powerpc/include/asm/unistd.h      | 2 +-
 arch/powerpc/include/uapi/asm/unistd.h | 1 +
 3 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
index 4b369d83fe9ce1..1c9470881c4abe 100644
--- a/arch/powerpc/include/asm/systbl.h
+++ b/arch/powerpc/include/asm/systbl.h
@@ -387,3 +387,4 @@ SYSCALL(copy_file_range)
 COMPAT_SYS_SPU(preadv2)
 COMPAT_SYS_SPU(pwritev2)
 SYSCALL(kexec_file_load)
+SYSCALL(statx)
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index eb1acee91a2034..9ba11dbcaca98f 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -12,7 +12,7 @@
 #include <uapi/asm/unistd.h>
 
 
-#define NR_syscalls		383
+#define NR_syscalls		384
 
 #define __NR__exit __NR_exit
 
diff --git a/arch/powerpc/include/uapi/asm/unistd.h b/arch/powerpc/include/uapi/asm/unistd.h
index 2f26335a3c42a8..b85f1422885746 100644
--- a/arch/powerpc/include/uapi/asm/unistd.h
+++ b/arch/powerpc/include/uapi/asm/unistd.h
@@ -393,5 +393,6 @@
 #define __NR_preadv2		380
 #define __NR_pwritev2		381
 #define __NR_kexec_file_load	382
+#define __NR_statx		383
 
 #endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */

From 5c71ad17f97e84d6d7e11a8e193d5d96890ed2ed Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Thu, 9 Mar 2017 01:45:39 +0800
Subject: [PATCH 1352/1911] EDAC, pnd2_edac: Add new EDAC driver for Intel SoC
 platforms

Initial target for this driver is the Intel Apollo Lake platform and
Denverton micro-server, they use the same internal memory controller IP
called Pondicherry2.

Memory controller registers are not in PCI config space like earlier
Intel memory controllers. For Apollo Lake platform they are accessed via
a "side-band" interface, for Denverton micro-server they are access via
PCI config space and memory map I/O. This driver is for Apollo Lake and
Denverton, but only the Denverton is fully enabled while we wait for the
sideband driver.

Apollo lake driver and initial cut at Denverton driver by Tony Luck.
Extensive cleanup, refactoring and basic verification by Qiuxu Zhuo.

Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: http://lkml.kernel.org/r/20170308174539.14432-1-qiuxu.zhuo@intel.com
Signed-off-by: Borislav Petkov <bp@suse.de>
---
 MAINTAINERS              |    6 +
 drivers/edac/Kconfig     |    9 +
 drivers/edac/Makefile    |    1 +
 drivers/edac/pnd2_edac.c | 1542 ++++++++++++++++++++++++++++++++++++++
 drivers/edac/pnd2_edac.h |  301 ++++++++
 5 files changed, 1859 insertions(+)
 create mode 100644 drivers/edac/pnd2_edac.c
 create mode 100644 drivers/edac/pnd2_edac.h

diff --git a/MAINTAINERS b/MAINTAINERS
index c265a5fe48481f..572fe4252ac462 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4776,6 +4776,12 @@ L:	linux-edac@vger.kernel.org
 S:	Maintained
 F:	drivers/edac/mpc85xx_edac.[ch]
 
+EDAC-PND2
+M:	Tony Luck <tony.luck@intel.com>
+L:	linux-edac@vger.kernel.org
+S:	Maintained
+F:	drivers/edac/pnd2_edac.[ch]
+
 EDAC-PASEMI
 M:	Egor Martovetsky <egor@pasemi.com>
 L:	linux-edac@vger.kernel.org
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 82d85cce81f815..be3eac6ad54d89 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -259,6 +259,15 @@ config EDAC_SKX
 	  Support for error detection and correction the Intel
 	  Skylake server Integrated Memory Controllers.
 
+config EDAC_PND2
+	tristate "Intel Pondicherry2"
+	depends on EDAC_MM_EDAC && PCI && X86_64 && X86_MCE_INTEL
+	help
+	  Support for error detection and correction on the Intel
+	  Pondicherry2 Integrated Memory Controller. This SoC IP is
+	  first used on the Apollo Lake platform and Denverton
+	  micro-server but may appear on others in the future.
+
 config EDAC_MPC85XX
 	tristate "Freescale MPC83xx / MPC85xx"
 	depends on EDAC_MM_EDAC && FSL_SOC
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index 88e472e8b9a918..587107e909967d 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -32,6 +32,7 @@ obj-$(CONFIG_EDAC_I7300)		+= i7300_edac.o
 obj-$(CONFIG_EDAC_I7CORE)		+= i7core_edac.o
 obj-$(CONFIG_EDAC_SBRIDGE)		+= sb_edac.o
 obj-$(CONFIG_EDAC_SKX)			+= skx_edac.o
+obj-$(CONFIG_EDAC_PND2)			+= pnd2_edac.o
 obj-$(CONFIG_EDAC_E7XXX)		+= e7xxx_edac.o
 obj-$(CONFIG_EDAC_E752X)		+= e752x_edac.o
 obj-$(CONFIG_EDAC_I82443BXGX)		+= i82443bxgx_edac.o
diff --git a/drivers/edac/pnd2_edac.c b/drivers/edac/pnd2_edac.c
new file mode 100644
index 00000000000000..14d39f05226efd
--- /dev/null
+++ b/drivers/edac/pnd2_edac.c
@@ -0,0 +1,1542 @@
+/*
+ * Driver for Pondicherry2 memory controller.
+ *
+ * Copyright (c) 2016, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * [Derived from sb_edac.c]
+ *
+ * Translation of system physical addresses to DIMM addresses
+ * is a two stage process:
+ *
+ * First the Pondicherry 2 memory controller handles slice and channel interleaving
+ * in "sys2pmi()". This is (almost) completley common between platforms.
+ *
+ * Then a platform specific dunit (DIMM unit) completes the process to provide DIMM,
+ * rank, bank, row and column using the appropriate "dunit_ops" functions/parameters.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/pci_ids.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/edac.h>
+#include <linux/mmzone.h>
+#include <linux/smp.h>
+#include <linux/bitmap.h>
+#include <linux/math64.h>
+#include <linux/mod_devicetable.h>
+#include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
+#include <asm/processor.h>
+#include <asm/mce.h>
+
+#include "edac_mc.h"
+#include "edac_module.h"
+#include "pnd2_edac.h"
+
+#define APL_NUM_CHANNELS	4
+#define DNV_NUM_CHANNELS	2
+#define DNV_MAX_DIMMS		2 /* Max DIMMs per channel */
+
+enum type {
+	APL,
+	DNV, /* All requests go to PMI CH0 on each slice (CH1 disabled) */
+};
+
+struct dram_addr {
+	int chan;
+	int dimm;
+	int rank;
+	int bank;
+	int row;
+	int col;
+};
+
+struct pnd2_pvt {
+	int dimm_geom[APL_NUM_CHANNELS];
+	u64 tolm, tohm;
+};
+
+/*
+ * System address space is divided into multiple regions with
+ * different interleave rules in each. The as0/as1 regions
+ * have no interleaving at all. The as2 region is interleaved
+ * between two channels. The mot region is magic and may overlap
+ * other regions, with its interleave rules taking precedence.
+ * Addresses not in any of these regions are interleaved across
+ * all four channels.
+ */
+static struct region {
+	u64	base;
+	u64	limit;
+	u8	enabled;
+} mot, as0, as1, as2;
+
+static struct dunit_ops {
+	char *name;
+	enum type type;
+	int pmiaddr_shift;
+	int pmiidx_shift;
+	int channels;
+	int dimms_per_channel;
+	int (*rd_reg)(int port, int off, int op, void *data, size_t sz, char *name);
+	int (*get_registers)(void);
+	int (*check_ecc)(void);
+	void (*mk_region)(char *name, struct region *rp, void *asym);
+	void (*get_dimm_config)(struct mem_ctl_info *mci);
+	int (*pmi2mem)(struct mem_ctl_info *mci, u64 pmiaddr, u32 pmiidx,
+				   struct dram_addr *daddr, char *msg);
+} *ops;
+
+static struct mem_ctl_info *pnd2_mci;
+
+#define PND2_MSG_SIZE	256
+
+/* Debug macros */
+#define pnd2_printk(level, fmt, arg...)			\
+	edac_printk(level, "pnd2", fmt, ##arg)
+
+#define pnd2_mc_printk(mci, level, fmt, arg...)	\
+	edac_mc_chipset_printk(mci, level, "pnd2", fmt, ##arg)
+
+#define MOT_CHAN_INTLV_BIT_1SLC_2CH 12
+#define MOT_CHAN_INTLV_BIT_2SLC_2CH 13
+#define SELECTOR_DISABLED (-1)
+#define _4GB (1ul << 32)
+
+#define PMI_ADDRESS_WIDTH	31
+#define PND_MAX_PHYS_BIT	39
+
+#define APL_ASYMSHIFT		28
+#define DNV_ASYMSHIFT		31
+#define CH_HASH_MASK_LSB	6
+#define SLICE_HASH_MASK_LSB	6
+#define MOT_SLC_INTLV_BIT	12
+#define LOG2_PMI_ADDR_GRANULARITY	5
+#define MOT_SHIFT	24
+
+#define GET_BITFIELD(v, lo, hi)	(((v) & GENMASK_ULL(hi, lo)) >> (lo))
+#define U64_LSHIFT(val, s)	((u64)(val) << (s))
+
+#ifdef CONFIG_X86_INTEL_SBI_APL
+#include "linux/platform_data/sbi_apl.h"
+int sbi_send(int port, int off, int op, u32 *data)
+{
+	struct sbi_apl_message sbi_arg;
+	int ret, read = 0;
+
+	memset(&sbi_arg, 0, sizeof(sbi_arg));
+
+	if (op == 0 || op == 4 || op == 6)
+		read = 1;
+	else
+		sbi_arg.data = *data;
+
+	sbi_arg.opcode = op;
+	sbi_arg.port_address = port;
+	sbi_arg.register_offset = off;
+	ret = sbi_apl_commit(&sbi_arg);
+	if (ret || sbi_arg.status)
+		edac_dbg(2, "sbi_send status=%d ret=%d data=%x\n",
+				 sbi_arg.status, ret, sbi_arg.data);
+
+	if (ret == 0)
+		ret = sbi_arg.status;
+
+	if (ret == 0 && read)
+		*data = sbi_arg.data;
+
+	return ret;
+}
+#else
+int sbi_send(int port, int off, int op, u32 *data)
+{
+	return -EUNATCH;
+}
+#endif
+
+static int apl_rd_reg(int port, int off, int op, void *data, size_t sz, char *name)
+{
+	int	ret = 0;
+
+	edac_dbg(2, "Read %s port=%x off=%x op=%x\n", name, port, off, op);
+	switch (sz) {
+	case 8:
+		ret = sbi_send(port, off + 4, op, (u32 *)(data + 4));
+	case 4:
+		ret = sbi_send(port, off, op, (u32 *)data);
+		pnd2_printk(KERN_DEBUG, "%s=%x%08x ret=%d\n", name,
+					sz == 8 ? *((u32 *)(data + 4)) : 0, *((u32 *)data), ret);
+		break;
+	}
+
+	return ret;
+}
+
+static u64 get_mem_ctrl_hub_base_addr(void)
+{
+	struct b_cr_mchbar_lo_pci lo;
+	struct b_cr_mchbar_hi_pci hi;
+	struct pci_dev *pdev;
+
+	pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x1980, NULL);
+	if (pdev) {
+		pci_read_config_dword(pdev, 0x48, (u32 *)&lo);
+		pci_read_config_dword(pdev, 0x4c, (u32 *)&hi);
+		pci_dev_put(pdev);
+	} else {
+		return 0;
+	}
+
+	if (!lo.enable) {
+		edac_dbg(2, "MMIO via memory controller hub base address is disabled!\n");
+		return 0;
+	}
+
+	return U64_LSHIFT(hi.base, 32) | U64_LSHIFT(lo.base, 15);
+}
+
+static u64 get_sideband_reg_base_addr(void)
+{
+	struct pci_dev *pdev;
+	u32 hi, lo;
+
+	pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x19dd, NULL);
+	if (pdev) {
+		pci_read_config_dword(pdev, 0x10, &lo);
+		pci_read_config_dword(pdev, 0x14, &hi);
+		pci_dev_put(pdev);
+		return (U64_LSHIFT(hi, 32) | U64_LSHIFT(lo, 0));
+	} else {
+		return 0xfd000000;
+	}
+}
+
+static int dnv_rd_reg(int port, int off, int op, void *data, size_t sz, char *name)
+{
+	struct pci_dev *pdev;
+	char *base;
+	u64 addr;
+
+	if (op == 4) {
+		pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x1980, NULL);
+		if (!pdev)
+			return -ENODEV;
+
+		pci_read_config_dword(pdev, off, data);
+		pci_dev_put(pdev);
+	} else {
+		/* MMIO via memory controller hub base address */
+		if (op == 0 && port == 0x4c) {
+			addr = get_mem_ctrl_hub_base_addr();
+			if (!addr)
+				return -ENODEV;
+		} else {
+			/* MMIO via sideband register base address */
+			addr = get_sideband_reg_base_addr();
+			if (!addr)
+				return -ENODEV;
+			addr += (port << 16);
+		}
+
+		base = ioremap((resource_size_t)addr, 0x10000);
+		if (!base)
+			return -ENODEV;
+
+		if (sz == 8)
+			*(u32 *)(data + 4) = *(u32 *)(base + off + 4);
+		*(u32 *)data = *(u32 *)(base + off);
+
+		iounmap(base);
+	}
+
+	edac_dbg(2, "Read %s=%.8x_%.8x\n", name,
+			(sz == 8) ? *(u32 *)(data + 4) : 0, *(u32 *)data);
+
+	return 0;
+}
+
+#define RD_REGP(regp, regname, port)	\
+	ops->rd_reg(port,					\
+		regname##_offset,				\
+		regname##_r_opcode,				\
+		regp, sizeof(struct regname),	\
+		#regname)
+
+#define RD_REG(regp, regname)			\
+	ops->rd_reg(regname ## _port,		\
+		regname##_offset,				\
+		regname##_r_opcode,				\
+		regp, sizeof(struct regname),	\
+		#regname)
+
+static u64 top_lm, top_hm;
+static bool two_slices;
+static bool two_channels; /* Both PMI channels in one slice enabled */
+
+static u8 sym_chan_mask;
+static u8 asym_chan_mask;
+static u8 chan_mask;
+
+static int slice_selector = -1;
+static int chan_selector = -1;
+static u64 slice_hash_mask;
+static u64 chan_hash_mask;
+
+static void mk_region(char *name, struct region *rp, u64 base, u64 limit)
+{
+	rp->enabled = 1;
+	rp->base = base;
+	rp->limit = limit;
+	edac_dbg(2, "Region:%s [%llx, %llx]\n", name, base, limit);
+}
+
+static void mk_region_mask(char *name, struct region *rp, u64 base, u64 mask)
+{
+	if (mask == 0) {
+		pr_info(FW_BUG "MOT mask cannot be zero\n");
+		return;
+	}
+	if (mask != GENMASK_ULL(PND_MAX_PHYS_BIT, __ffs(mask))) {
+		pr_info(FW_BUG "MOT mask not power of two\n");
+		return;
+	}
+	if (base & ~mask) {
+		pr_info(FW_BUG "MOT region base/mask alignment error\n");
+		return;
+	}
+	rp->base = base;
+	rp->limit = (base | ~mask) & GENMASK_ULL(PND_MAX_PHYS_BIT, 0);
+	rp->enabled = 1;
+	edac_dbg(2, "Region:%s [%llx, %llx]\n", name, base, rp->limit);
+}
+
+static bool in_region(struct region *rp, u64 addr)
+{
+	if (!rp->enabled)
+		return false;
+
+	return rp->base <= addr && addr <= rp->limit;
+}
+
+static int gen_sym_mask(struct b_cr_slice_channel_hash *p)
+{
+	int mask = 0;
+
+	if (!p->slice_0_mem_disabled)
+		mask |= p->sym_slice0_channel_enabled;
+
+	if (!p->slice_1_disabled)
+		mask |= p->sym_slice1_channel_enabled << 2;
+
+	if (p->ch_1_disabled || p->enable_pmi_dual_data_mode)
+		mask &= 0x5;
+
+	return mask;
+}
+
+static int gen_asym_mask(struct b_cr_slice_channel_hash *p,
+			 struct b_cr_asym_mem_region0_mchbar *as0,
+			 struct b_cr_asym_mem_region1_mchbar *as1,
+			 struct b_cr_asym_2way_mem_region_mchbar *as2way)
+{
+	const int intlv[] = { 0x5, 0xA, 0x3, 0xC };
+	int mask = 0;
+
+	if (as2way->asym_2way_interleave_enable)
+		mask = intlv[as2way->asym_2way_intlv_mode];
+	if (as0->slice0_asym_enable)
+		mask |= (1 << as0->slice0_asym_channel_select);
+	if (as1->slice1_asym_enable)
+		mask |= (4 << as1->slice1_asym_channel_select);
+	if (p->slice_0_mem_disabled)
+		mask &= 0xc;
+	if (p->slice_1_disabled)
+		mask &= 0x3;
+	if (p->ch_1_disabled || p->enable_pmi_dual_data_mode)
+		mask &= 0x5;
+
+	return mask;
+}
+
+static struct b_cr_tolud_pci tolud;
+static struct b_cr_touud_lo_pci touud_lo;
+static struct b_cr_touud_hi_pci touud_hi;
+static struct b_cr_asym_mem_region0_mchbar asym0;
+static struct b_cr_asym_mem_region1_mchbar asym1;
+static struct b_cr_asym_2way_mem_region_mchbar asym_2way;
+static struct b_cr_mot_out_base_mchbar mot_base;
+static struct b_cr_mot_out_mask_mchbar mot_mask;
+static struct b_cr_slice_channel_hash chash;
+
+/* Apollo Lake dunit */
+/*
+ * Validated on board with just two DIMMs in the [0] and [2] positions
+ * in this array. Other port number matches documentation, but caution
+ * advised.
+ */
+static const int apl_dports[APL_NUM_CHANNELS] = { 0x18, 0x10, 0x11, 0x19 };
+static struct d_cr_drp0 drp0[APL_NUM_CHANNELS];
+
+/* Denverton dunit */
+static const int dnv_dports[DNV_NUM_CHANNELS] = { 0x10, 0x12 };
+static struct d_cr_dsch dsch;
+static struct d_cr_ecc_ctrl ecc_ctrl[DNV_NUM_CHANNELS];
+static struct d_cr_drp drp[DNV_NUM_CHANNELS];
+static struct d_cr_dmap dmap[DNV_NUM_CHANNELS];
+static struct d_cr_dmap1 dmap1[DNV_NUM_CHANNELS];
+static struct d_cr_dmap2 dmap2[DNV_NUM_CHANNELS];
+static struct d_cr_dmap3 dmap3[DNV_NUM_CHANNELS];
+static struct d_cr_dmap4 dmap4[DNV_NUM_CHANNELS];
+static struct d_cr_dmap5 dmap5[DNV_NUM_CHANNELS];
+
+static void apl_mk_region(char *name, struct region *rp, void *asym)
+{
+	struct b_cr_asym_mem_region0_mchbar *a = asym;
+
+	mk_region(name, rp,
+			  U64_LSHIFT(a->slice0_asym_base, APL_ASYMSHIFT),
+			  U64_LSHIFT(a->slice0_asym_limit, APL_ASYMSHIFT) +
+			  GENMASK_ULL(APL_ASYMSHIFT - 1, 0));
+}
+
+static void dnv_mk_region(char *name, struct region *rp, void *asym)
+{
+	struct b_cr_asym_mem_region_denverton *a = asym;
+
+	mk_region(name, rp,
+			  U64_LSHIFT(a->slice_asym_base, DNV_ASYMSHIFT),
+			  U64_LSHIFT(a->slice_asym_limit, DNV_ASYMSHIFT) +
+			  GENMASK_ULL(DNV_ASYMSHIFT - 1, 0));
+}
+
+static int apl_get_registers(void)
+{
+	int i;
+
+	if (RD_REG(&asym_2way, b_cr_asym_2way_mem_region_mchbar))
+		return -ENODEV;
+
+	for (i = 0; i < APL_NUM_CHANNELS; i++)
+		if (RD_REGP(&drp0[i], d_cr_drp0, apl_dports[i]))
+			return -ENODEV;
+
+	return 0;
+}
+
+static int dnv_get_registers(void)
+{
+	int i;
+
+	if (RD_REG(&dsch, d_cr_dsch))
+		return -ENODEV;
+
+	for (i = 0; i < DNV_NUM_CHANNELS; i++)
+		if (RD_REGP(&ecc_ctrl[i], d_cr_ecc_ctrl, dnv_dports[i]) ||
+			RD_REGP(&drp[i], d_cr_drp, dnv_dports[i]) ||
+			RD_REGP(&dmap[i], d_cr_dmap, dnv_dports[i]) ||
+			RD_REGP(&dmap1[i], d_cr_dmap1, dnv_dports[i]) ||
+			RD_REGP(&dmap2[i], d_cr_dmap2, dnv_dports[i]) ||
+			RD_REGP(&dmap3[i], d_cr_dmap3, dnv_dports[i]) ||
+			RD_REGP(&dmap4[i], d_cr_dmap4, dnv_dports[i]) ||
+			RD_REGP(&dmap5[i], d_cr_dmap5, dnv_dports[i]))
+			return -ENODEV;
+
+	return 0;
+}
+
+/*
+ * Read all the h/w config registers once here (they don't
+ * change at run time. Figure out which address ranges have
+ * which interleave characteristics.
+ */
+static int get_registers(void)
+{
+	const int intlv[] = { 10, 11, 12, 12 };
+
+	if (RD_REG(&tolud, b_cr_tolud_pci) ||
+		RD_REG(&touud_lo, b_cr_touud_lo_pci) ||
+		RD_REG(&touud_hi, b_cr_touud_hi_pci) ||
+		RD_REG(&asym0, b_cr_asym_mem_region0_mchbar) ||
+		RD_REG(&asym1, b_cr_asym_mem_region1_mchbar) ||
+		RD_REG(&mot_base, b_cr_mot_out_base_mchbar) ||
+		RD_REG(&mot_mask, b_cr_mot_out_mask_mchbar) ||
+		RD_REG(&chash, b_cr_slice_channel_hash))
+		return -ENODEV;
+
+	if (ops->get_registers())
+		return -ENODEV;
+
+	if (ops->type == DNV) {
+		/* PMI channel idx (always 0) for asymmetric region */
+		asym0.slice0_asym_channel_select = 0;
+		asym1.slice1_asym_channel_select = 0;
+		/* PMI channel bitmap (always 1) for symmetric region */
+		chash.sym_slice0_channel_enabled = 0x1;
+		chash.sym_slice1_channel_enabled = 0x1;
+	}
+
+	if (asym0.slice0_asym_enable)
+		ops->mk_region("as0", &as0, &asym0);
+
+	if (asym1.slice1_asym_enable)
+		ops->mk_region("as1", &as1, &asym1);
+
+	if (asym_2way.asym_2way_interleave_enable) {
+		mk_region("as2way", &as2,
+				  U64_LSHIFT(asym_2way.asym_2way_base, APL_ASYMSHIFT),
+				  U64_LSHIFT(asym_2way.asym_2way_limit, APL_ASYMSHIFT) +
+				  GENMASK_ULL(APL_ASYMSHIFT - 1, 0));
+	}
+
+	if (mot_base.imr_en) {
+		mk_region_mask("mot", &mot,
+					   U64_LSHIFT(mot_base.mot_out_base, MOT_SHIFT),
+					   U64_LSHIFT(mot_mask.mot_out_mask, MOT_SHIFT));
+	}
+
+	top_lm = U64_LSHIFT(tolud.tolud, 20);
+	top_hm = U64_LSHIFT(touud_hi.touud, 32) | U64_LSHIFT(touud_lo.touud, 20);
+
+	two_slices = !chash.slice_1_disabled &&
+				 !chash.slice_0_mem_disabled &&
+				 (chash.sym_slice0_channel_enabled != 0) &&
+				 (chash.sym_slice1_channel_enabled != 0);
+	two_channels = !chash.ch_1_disabled &&
+				 !chash.enable_pmi_dual_data_mode &&
+				 ((chash.sym_slice0_channel_enabled == 3) ||
+				 (chash.sym_slice1_channel_enabled == 3));
+
+	sym_chan_mask = gen_sym_mask(&chash);
+	asym_chan_mask = gen_asym_mask(&chash, &asym0, &asym1, &asym_2way);
+	chan_mask = sym_chan_mask | asym_chan_mask;
+
+	if (two_slices && !two_channels) {
+		if (chash.hvm_mode)
+			slice_selector = 29;
+		else
+			slice_selector = intlv[chash.interleave_mode];
+	} else if (!two_slices && two_channels) {
+		if (chash.hvm_mode)
+			chan_selector = 29;
+		else
+			chan_selector = intlv[chash.interleave_mode];
+	} else if (two_slices && two_channels) {
+		if (chash.hvm_mode) {
+			slice_selector = 29;
+			chan_selector = 30;
+		} else {
+			slice_selector = intlv[chash.interleave_mode];
+			chan_selector = intlv[chash.interleave_mode] + 1;
+		}
+	}
+
+	if (two_slices) {
+		if (!chash.hvm_mode)
+			slice_hash_mask = chash.slice_hash_mask << SLICE_HASH_MASK_LSB;
+		if (!two_channels)
+			slice_hash_mask |= BIT_ULL(slice_selector);
+	}
+
+	if (two_channels) {
+		if (!chash.hvm_mode)
+			chan_hash_mask = chash.ch_hash_mask << CH_HASH_MASK_LSB;
+		if (!two_slices)
+			chan_hash_mask |= BIT_ULL(chan_selector);
+	}
+
+	return 0;
+}
+
+/* Get a contiguous memory address (remove the MMIO gap) */
+static u64 remove_mmio_gap(u64 sys)
+{
+	return (sys < _4GB) ? sys : sys - (_4GB - top_lm);
+}
+
+/* Squeeze out one address bit, shift upper part down to fill gap */
+static void remove_addr_bit(u64 *addr, int bitidx)
+{
+	u64	mask;
+
+	if (bitidx == -1)
+		return;
+
+	mask = (1ull << bitidx) - 1;
+	*addr = ((*addr >> 1) & ~mask) | (*addr & mask);
+}
+
+/* XOR all the bits from addr specified in mask */
+static int hash_by_mask(u64 addr, u64 mask)
+{
+	u64 result = addr & mask;
+
+	result = (result >> 32) ^ result;
+	result = (result >> 16) ^ result;
+	result = (result >> 8) ^ result;
+	result = (result >> 4) ^ result;
+	result = (result >> 2) ^ result;
+	result = (result >> 1) ^ result;
+
+	return (int)result & 1;
+}
+
+/*
+ * First stage decode. Take the system address and figure out which
+ * second stage will deal with it based on interleave modes.
+ */
+static int sys2pmi(const u64 addr, u32 *pmiidx, u64 *pmiaddr, char *msg)
+{
+	u64 contig_addr, contig_base, contig_offset, contig_base_adj;
+	int mot_intlv_bit = two_slices ? MOT_CHAN_INTLV_BIT_2SLC_2CH :
+						MOT_CHAN_INTLV_BIT_1SLC_2CH;
+	int slice_intlv_bit_rm = SELECTOR_DISABLED;
+	int chan_intlv_bit_rm = SELECTOR_DISABLED;
+	/* Determine if address is in the MOT region. */
+	bool mot_hit = in_region(&mot, addr);
+	/* Calculate the number of symmetric regions enabled. */
+	int sym_channels = hweight8(sym_chan_mask);
+
+	/*
+	 * The amount we need to shift the asym base can be determined by the
+	 * number of enabled symmetric channels.
+	 * NOTE: This can only work because symmetric memory is not supposed
+	 * to do a 3-way interleave.
+	 */
+	int sym_chan_shift = sym_channels >> 1;
+
+	/* Give up if address is out of range, or in MMIO gap */
+	if (addr >= (1ul << PND_MAX_PHYS_BIT) ||
+	   (addr >= top_lm && addr < _4GB) || addr >= top_hm) {
+		snprintf(msg, PND2_MSG_SIZE, "Error address 0x%llx is not DRAM", addr);
+		return -EINVAL;
+	}
+
+	/* Get a contiguous memory address (remove the MMIO gap) */
+	contig_addr = remove_mmio_gap(addr);
+
+	if (in_region(&as0, addr)) {
+		*pmiidx = asym0.slice0_asym_channel_select;
+
+		contig_base = remove_mmio_gap(as0.base);
+		contig_offset = contig_addr - contig_base;
+		contig_base_adj = (contig_base >> sym_chan_shift) *
+						  ((chash.sym_slice0_channel_enabled >> (*pmiidx & 1)) & 1);
+		contig_addr = contig_offset + ((sym_channels > 0) ? contig_base_adj : 0ull);
+	} else if (in_region(&as1, addr)) {
+		*pmiidx = 2u + asym1.slice1_asym_channel_select;
+
+		contig_base = remove_mmio_gap(as1.base);
+		contig_offset = contig_addr - contig_base;
+		contig_base_adj = (contig_base >> sym_chan_shift) *
+						  ((chash.sym_slice1_channel_enabled >> (*pmiidx & 1)) & 1);
+		contig_addr = contig_offset + ((sym_channels > 0) ? contig_base_adj : 0ull);
+	} else if (in_region(&as2, addr) && (asym_2way.asym_2way_intlv_mode == 0x3ul)) {
+		bool channel1;
+
+		mot_intlv_bit = MOT_CHAN_INTLV_BIT_1SLC_2CH;
+		*pmiidx = (asym_2way.asym_2way_intlv_mode & 1) << 1;
+		channel1 = mot_hit ? ((bool)((addr >> mot_intlv_bit) & 1)) :
+			hash_by_mask(contig_addr, chan_hash_mask);
+		*pmiidx |= (u32)channel1;
+
+		contig_base = remove_mmio_gap(as2.base);
+		chan_intlv_bit_rm = mot_hit ? mot_intlv_bit : chan_selector;
+		contig_offset = contig_addr - contig_base;
+		remove_addr_bit(&contig_offset, chan_intlv_bit_rm);
+		contig_addr = (contig_base >> sym_chan_shift) + contig_offset;
+	} else {
+		/* Otherwise we're in normal, boring symmetric mode. */
+		*pmiidx = 0u;
+
+		if (two_slices) {
+			bool slice1;
+
+			if (mot_hit) {
+				slice_intlv_bit_rm = MOT_SLC_INTLV_BIT;
+				slice1 = (addr >> MOT_SLC_INTLV_BIT) & 1;
+			} else {
+				slice_intlv_bit_rm = slice_selector;
+				slice1 = hash_by_mask(addr, slice_hash_mask);
+			}
+
+			*pmiidx = (u32)slice1 << 1;
+		}
+
+		if (two_channels) {
+			bool channel1;
+
+			mot_intlv_bit = two_slices ? MOT_CHAN_INTLV_BIT_2SLC_2CH :
+							MOT_CHAN_INTLV_BIT_1SLC_2CH;
+
+			if (mot_hit) {
+				chan_intlv_bit_rm = mot_intlv_bit;
+				channel1 = (addr >> mot_intlv_bit) & 1;
+			} else {
+				chan_intlv_bit_rm = chan_selector;
+				channel1 = hash_by_mask(contig_addr, chan_hash_mask);
+			}
+
+			*pmiidx |= (u32)channel1;
+		}
+	}
+
+	/* Remove the chan_selector bit first */
+	remove_addr_bit(&contig_addr, chan_intlv_bit_rm);
+	/* Remove the slice bit (we remove it second because it must be lower */
+	remove_addr_bit(&contig_addr, slice_intlv_bit_rm);
+	*pmiaddr = contig_addr;
+
+	return 0;
+}
+
+/* Translate PMI address to memory (rank, row, bank, column) */
+#define C(n) (0x10 | (n))	/* column */
+#define B(n) (0x20 | (n))	/* bank */
+#define R(n) (0x40 | (n))	/* row */
+#define RS   (0x80)			/* rank */
+
+/* addrdec values */
+#define AMAP_1KB	0
+#define AMAP_2KB	1
+#define AMAP_4KB	2
+#define AMAP_RSVD	3
+
+/* dden values */
+#define DEN_4Gb		0
+#define DEN_8Gb		2
+
+/* dwid values */
+#define X8		0
+#define X16		1
+
+static struct dimm_geometry {
+	u8	addrdec;
+	u8	dden;
+	u8	dwid;
+	u8	rowbits, colbits;
+	u16	bits[PMI_ADDRESS_WIDTH];
+} dimms[] = {
+	{
+		.addrdec = AMAP_1KB, .dden = DEN_4Gb, .dwid = X16,
+		.rowbits = 15, .colbits = 10,
+		.bits = {
+			C(2),  C(3),  C(4),  C(5),  C(6),  B(0),  B(1),  B(2),  R(0),
+			R(1),  R(2),  R(3),  R(4),  R(5),  R(6),  R(7),  R(8),  R(9),
+			R(10), C(7),  C(8),  C(9),  R(11), RS,    R(12), R(13), R(14),
+			0,     0,     0,     0
+		}
+	},
+	{
+		.addrdec = AMAP_1KB, .dden = DEN_4Gb, .dwid = X8,
+		.rowbits = 16, .colbits = 10,
+		.bits = {
+			C(2),  C(3),  C(4),  C(5),  C(6),  B(0),  B(1),  B(2),  R(0),
+			R(1),  R(2),  R(3),  R(4),  R(5),  R(6),  R(7),  R(8),  R(9),
+			R(10), C(7),  C(8),  C(9),  R(11), RS,    R(12), R(13), R(14),
+			R(15), 0,     0,     0
+		}
+	},
+	{
+		.addrdec = AMAP_1KB, .dden = DEN_8Gb, .dwid = X16,
+		.rowbits = 16, .colbits = 10,
+		.bits = {
+			C(2),  C(3),  C(4),  C(5),  C(6),  B(0),  B(1),  B(2),  R(0),
+			R(1),  R(2),  R(3),  R(4),  R(5),  R(6),  R(7),  R(8),  R(9),
+			R(10), C(7),  C(8),  C(9),  R(11), RS,    R(12), R(13), R(14),
+			R(15), 0,     0,     0
+		}
+	},
+	{
+		.addrdec = AMAP_1KB, .dden = DEN_8Gb, .dwid = X8,
+		.rowbits = 16, .colbits = 11,
+		.bits = {
+			C(2),  C(3),  C(4),  C(5),  C(6),  B(0),  B(1),  B(2),  R(0),
+			R(1),  R(2),  R(3),  R(4),  R(5),  R(6),  R(7),  R(8),  R(9),
+			R(10), C(7),  C(8),  C(9),  R(11), RS,    C(11), R(12), R(13),
+			R(14), R(15), 0,     0
+		}
+	},
+	{
+		.addrdec = AMAP_2KB, .dden = DEN_4Gb, .dwid = X16,
+		.rowbits = 15, .colbits = 10,
+		.bits = {
+			C(2),  C(3),  C(4),  C(5),  C(6),  C(7),  B(0),  B(1),  B(2),
+			R(0),  R(1),  R(2),  R(3),  R(4),  R(5),  R(6),  R(7),  R(8),
+			R(9),  R(10), C(8),  C(9),  R(11), RS,    R(12), R(13), R(14),
+			0,     0,     0,     0
+		}
+	},
+	{
+		.addrdec = AMAP_2KB, .dden = DEN_4Gb, .dwid = X8,
+		.rowbits = 16, .colbits = 10,
+		.bits = {
+			C(2),  C(3),  C(4),  C(5),  C(6),  C(7),  B(0),  B(1),  B(2),
+			R(0),  R(1),  R(2),  R(3),  R(4),  R(5),  R(6),  R(7),  R(8),
+			R(9),  R(10), C(8),  C(9),  R(11), RS,    R(12), R(13), R(14),
+			R(15), 0,     0,     0
+		}
+	},
+	{
+		.addrdec = AMAP_2KB, .dden = DEN_8Gb, .dwid = X16,
+		.rowbits = 16, .colbits = 10,
+		.bits = {
+			C(2),  C(3),  C(4),  C(5),  C(6),  C(7),  B(0),  B(1),  B(2),
+			R(0),  R(1),  R(2),  R(3),  R(4),  R(5),  R(6),  R(7),  R(8),
+			R(9),  R(10), C(8),  C(9),  R(11), RS,    R(12), R(13), R(14),
+			R(15), 0,     0,     0
+		}
+	},
+	{
+		.addrdec = AMAP_2KB, .dden = DEN_8Gb, .dwid = X8,
+		.rowbits = 16, .colbits = 11,
+		.bits = {
+			C(2),  C(3),  C(4),  C(5),  C(6),  C(7),  B(0),  B(1),  B(2),
+			R(0),  R(1),  R(2),  R(3),  R(4),  R(5),  R(6),  R(7),  R(8),
+			R(9),  R(10), C(8),  C(9),  R(11), RS,    C(11), R(12), R(13),
+			R(14), R(15), 0,     0
+		}
+	},
+	{
+		.addrdec = AMAP_4KB, .dden = DEN_4Gb, .dwid = X16,
+		.rowbits = 15, .colbits = 10,
+		.bits = {
+			C(2),  C(3),  C(4),  C(5),  C(6),  C(7),  C(8),  B(0),  B(1),
+			B(2),  R(0),  R(1),  R(2),  R(3),  R(4),  R(5),  R(6),  R(7),
+			R(8),  R(9),  R(10), C(9),  R(11), RS,    R(12), R(13), R(14),
+			0,     0,     0,     0
+		}
+	},
+	{
+		.addrdec = AMAP_4KB, .dden = DEN_4Gb, .dwid = X8,
+		.rowbits = 16, .colbits = 10,
+		.bits = {
+			C(2),  C(3),  C(4),  C(5),  C(6),  C(7),  C(8),  B(0),  B(1),
+			B(2),  R(0),  R(1),  R(2),  R(3),  R(4),  R(5),  R(6),  R(7),
+			R(8),  R(9),  R(10), C(9),  R(11), RS,    R(12), R(13), R(14),
+			R(15), 0,     0,     0
+		}
+	},
+	{
+		.addrdec = AMAP_4KB, .dden = DEN_8Gb, .dwid = X16,
+		.rowbits = 16, .colbits = 10,
+		.bits = {
+			C(2),  C(3),  C(4),  C(5),  C(6),  C(7),  C(8),  B(0),  B(1),
+			B(2),  R(0),  R(1),  R(2),  R(3),  R(4),  R(5),  R(6),  R(7),
+			R(8),  R(9),  R(10), C(9),  R(11), RS,    R(12), R(13), R(14),
+			R(15), 0,     0,     0
+		}
+	},
+	{
+		.addrdec = AMAP_4KB, .dden = DEN_8Gb, .dwid = X8,
+		.rowbits = 16, .colbits = 11,
+		.bits = {
+			C(2),  C(3),  C(4),  C(5),  C(6),  C(7),  C(8),  B(0),  B(1),
+			B(2),  R(0),  R(1),  R(2),  R(3),  R(4),  R(5),  R(6),  R(7),
+			R(8),  R(9),  R(10), C(9),  R(11), RS,    C(11), R(12), R(13),
+			R(14), R(15), 0,     0
+		}
+	}
+};
+
+static int bank_hash(u64 pmiaddr, int idx, int shft)
+{
+	int bhash = 0;
+
+	switch (idx) {
+	case 0:
+		bhash ^= ((pmiaddr >> (12 + shft)) ^ (pmiaddr >> (9 + shft))) & 1;
+		break;
+	case 1:
+		bhash ^= (((pmiaddr >> (10 + shft)) ^ (pmiaddr >> (8 + shft))) & 1) << 1;
+		bhash ^= ((pmiaddr >> 22) & 1) << 1;
+		break;
+	case 2:
+		bhash ^= (((pmiaddr >> (13 + shft)) ^ (pmiaddr >> (11 + shft))) & 1) << 2;
+		break;
+	}
+
+	return bhash;
+}
+
+static int rank_hash(u64 pmiaddr)
+{
+	return ((pmiaddr >> 16) ^ (pmiaddr >> 10)) & 1;
+}
+
+/* Second stage decode. Compute rank, bank, row & column. */
+static int apl_pmi2mem(struct mem_ctl_info *mci, u64 pmiaddr, u32 pmiidx,
+		       struct dram_addr *daddr, char *msg)
+{
+	struct d_cr_drp0 *cr_drp0 = &drp0[pmiidx];
+	struct pnd2_pvt *pvt = mci->pvt_info;
+	int g = pvt->dimm_geom[pmiidx];
+	struct dimm_geometry *d = &dimms[g];
+	int column = 0, bank = 0, row = 0, rank = 0;
+	int i, idx, type, skiprs = 0;
+
+	for (i = 0; i < PMI_ADDRESS_WIDTH; i++) {
+		int	bit = (pmiaddr >> i) & 1;
+
+		if (i + skiprs >= PMI_ADDRESS_WIDTH) {
+			snprintf(msg, PND2_MSG_SIZE, "Bad dimm_geometry[] table\n");
+			return -EINVAL;
+		}
+
+		type = d->bits[i + skiprs] & ~0xf;
+		idx = d->bits[i + skiprs] & 0xf;
+
+		/*
+		 * On single rank DIMMs ignore the rank select bit
+		 * and shift remainder of "bits[]" down one place.
+		 */
+		if (type == RS && (cr_drp0->rken0 + cr_drp0->rken1) == 1) {
+			skiprs = 1;
+			type = d->bits[i + skiprs] & ~0xf;
+			idx = d->bits[i + skiprs] & 0xf;
+		}
+
+		switch (type) {
+		case C(0):
+			column |= (bit << idx);
+			break;
+		case B(0):
+			bank |= (bit << idx);
+			if (cr_drp0->bahen)
+				bank ^= bank_hash(pmiaddr, idx, d->addrdec);
+			break;
+		case R(0):
+			row |= (bit << idx);
+			break;
+		case RS:
+			rank = bit;
+			if (cr_drp0->rsien)
+				rank ^= rank_hash(pmiaddr);
+			break;
+		default:
+			if (bit) {
+				snprintf(msg, PND2_MSG_SIZE, "Bad translation\n");
+				return -EINVAL;
+			}
+			goto done;
+		}
+	}
+
+done:
+	daddr->col = column;
+	daddr->bank = bank;
+	daddr->row = row;
+	daddr->rank = rank;
+	daddr->dimm = 0;
+
+	return 0;
+}
+
+/* Pluck bit "in" from pmiaddr and return value shifted to bit "out" */
+#define dnv_get_bit(pmi, in, out) ((int)(((pmi) >> (in)) & 1u) << (out))
+
+static int dnv_pmi2mem(struct mem_ctl_info *mci, u64 pmiaddr, u32 pmiidx,
+					   struct dram_addr *daddr, char *msg)
+{
+	/* Rank 0 or 1 */
+	daddr->rank = dnv_get_bit(pmiaddr, dmap[pmiidx].rs0 + 13, 0);
+	/* Rank 2 or 3 */
+	daddr->rank |= dnv_get_bit(pmiaddr, dmap[pmiidx].rs1 + 13, 1);
+
+	/*
+	 * Normally ranks 0,1 are DIMM0, and 2,3 are DIMM1, but we
+	 * flip them if DIMM1 is larger than DIMM0.
+	 */
+	daddr->dimm = (daddr->rank >= 2) ^ drp[pmiidx].dimmflip;
+
+	daddr->bank = dnv_get_bit(pmiaddr, dmap[pmiidx].ba0 + 6, 0);
+	daddr->bank |= dnv_get_bit(pmiaddr, dmap[pmiidx].ba1 + 6, 1);
+	daddr->bank |= dnv_get_bit(pmiaddr, dmap[pmiidx].bg0 + 6, 2);
+	if (dsch.ddr4en)
+		daddr->bank |= dnv_get_bit(pmiaddr, dmap[pmiidx].bg1 + 6, 3);
+	if (dmap1[pmiidx].bxor) {
+		if (dsch.ddr4en) {
+			daddr->bank ^= dnv_get_bit(pmiaddr, dmap3[pmiidx].row6 + 6, 0);
+			daddr->bank ^= dnv_get_bit(pmiaddr, dmap3[pmiidx].row7 + 6, 1);
+			if (dsch.chan_width == 0)
+				/* 64/72 bit dram channel width */
+				daddr->bank ^= dnv_get_bit(pmiaddr, dmap5[pmiidx].ca3 + 6, 2);
+			else
+				/* 32/40 bit dram channel width */
+				daddr->bank ^= dnv_get_bit(pmiaddr, dmap5[pmiidx].ca4 + 6, 2);
+			daddr->bank ^= dnv_get_bit(pmiaddr, dmap2[pmiidx].row2 + 6, 3);
+		} else {
+			daddr->bank ^= dnv_get_bit(pmiaddr, dmap2[pmiidx].row2 + 6, 0);
+			daddr->bank ^= dnv_get_bit(pmiaddr, dmap3[pmiidx].row6 + 6, 1);
+			if (dsch.chan_width == 0)
+				daddr->bank ^= dnv_get_bit(pmiaddr, dmap5[pmiidx].ca3 + 6, 2);
+			else
+				daddr->bank ^= dnv_get_bit(pmiaddr, dmap5[pmiidx].ca4 + 6, 2);
+		}
+	}
+
+	daddr->row = dnv_get_bit(pmiaddr, dmap2[pmiidx].row0 + 6, 0);
+	daddr->row |= dnv_get_bit(pmiaddr, dmap2[pmiidx].row1 + 6, 1);
+	daddr->row |= dnv_get_bit(pmiaddr, dmap2[pmiidx].row2 + 6, 2);
+	daddr->row |= dnv_get_bit(pmiaddr, dmap2[pmiidx].row3 + 6, 3);
+	daddr->row |= dnv_get_bit(pmiaddr, dmap2[pmiidx].row4 + 6, 4);
+	daddr->row |= dnv_get_bit(pmiaddr, dmap2[pmiidx].row5 + 6, 5);
+	daddr->row |= dnv_get_bit(pmiaddr, dmap3[pmiidx].row6 + 6, 6);
+	daddr->row |= dnv_get_bit(pmiaddr, dmap3[pmiidx].row7 + 6, 7);
+	daddr->row |= dnv_get_bit(pmiaddr, dmap3[pmiidx].row8 + 6, 8);
+	daddr->row |= dnv_get_bit(pmiaddr, dmap3[pmiidx].row9 + 6, 9);
+	daddr->row |= dnv_get_bit(pmiaddr, dmap3[pmiidx].row10 + 6, 10);
+	daddr->row |= dnv_get_bit(pmiaddr, dmap3[pmiidx].row11 + 6, 11);
+	daddr->row |= dnv_get_bit(pmiaddr, dmap4[pmiidx].row12 + 6, 12);
+	daddr->row |= dnv_get_bit(pmiaddr, dmap4[pmiidx].row13 + 6, 13);
+	if (dmap4[pmiidx].row14 != 31)
+		daddr->row |= dnv_get_bit(pmiaddr, dmap4[pmiidx].row14 + 6, 14);
+	if (dmap4[pmiidx].row15 != 31)
+		daddr->row |= dnv_get_bit(pmiaddr, dmap4[pmiidx].row15 + 6, 15);
+	if (dmap4[pmiidx].row16 != 31)
+		daddr->row |= dnv_get_bit(pmiaddr, dmap4[pmiidx].row16 + 6, 16);
+	if (dmap4[pmiidx].row17 != 31)
+		daddr->row |= dnv_get_bit(pmiaddr, dmap4[pmiidx].row17 + 6, 17);
+
+	daddr->col = dnv_get_bit(pmiaddr, dmap5[pmiidx].ca3 + 6, 3);
+	daddr->col |= dnv_get_bit(pmiaddr, dmap5[pmiidx].ca4 + 6, 4);
+	daddr->col |= dnv_get_bit(pmiaddr, dmap5[pmiidx].ca5 + 6, 5);
+	daddr->col |= dnv_get_bit(pmiaddr, dmap5[pmiidx].ca6 + 6, 6);
+	daddr->col |= dnv_get_bit(pmiaddr, dmap5[pmiidx].ca7 + 6, 7);
+	daddr->col |= dnv_get_bit(pmiaddr, dmap5[pmiidx].ca8 + 6, 8);
+	daddr->col |= dnv_get_bit(pmiaddr, dmap5[pmiidx].ca9 + 6, 9);
+	if (!dsch.ddr4en && dmap1[pmiidx].ca11 != 0x3f)
+		daddr->col |= dnv_get_bit(pmiaddr, dmap1[pmiidx].ca11 + 13, 11);
+
+	return 0;
+}
+
+static int check_channel(int ch)
+{
+	if (drp0[ch].dramtype != 0) {
+		pnd2_printk(KERN_INFO, "Unsupported DIMM in channel %d\n", ch);
+		return 1;
+	} else if (drp0[ch].eccen == 0) {
+		pnd2_printk(KERN_INFO, "ECC disabled on channel %d\n", ch);
+		return 1;
+	}
+	return 0;
+}
+
+static int apl_check_ecc_active(void)
+{
+	int	i, ret = 0;
+
+	/* Check dramtype and ECC mode for each present DIMM */
+	for (i = 0; i < APL_NUM_CHANNELS; i++)
+		if (chan_mask & BIT(i))
+			ret += check_channel(i);
+	return ret ? -EINVAL : 0;
+}
+
+#define DIMMS_PRESENT(d) ((d)->rken0 + (d)->rken1 + (d)->rken2 + (d)->rken3)
+
+static int check_unit(int ch)
+{
+	struct d_cr_drp *d = &drp[ch];
+
+	if (DIMMS_PRESENT(d) && !ecc_ctrl[ch].eccen) {
+		pnd2_printk(KERN_INFO, "ECC disabled on channel %d\n", ch);
+		return 1;
+	}
+	return 0;
+}
+
+static int dnv_check_ecc_active(void)
+{
+	int	i, ret = 0;
+
+	for (i = 0; i < DNV_NUM_CHANNELS; i++)
+		ret += check_unit(i);
+	return ret ? -EINVAL : 0;
+}
+
+static int get_memory_error_data(struct mem_ctl_info *mci, u64 addr,
+								 struct dram_addr *daddr, char *msg)
+{
+	u64	pmiaddr;
+	u32	pmiidx;
+	int	ret;
+
+	ret = sys2pmi(addr, &pmiidx, &pmiaddr, msg);
+	if (ret)
+		return ret;
+
+	pmiaddr >>= ops->pmiaddr_shift;
+	/* pmi channel idx to dimm channel idx */
+	pmiidx >>= ops->pmiidx_shift;
+	daddr->chan = pmiidx;
+
+	ret = ops->pmi2mem(mci, pmiaddr, pmiidx, daddr, msg);
+	if (ret)
+		return ret;
+
+	edac_dbg(0, "SysAddr=%llx PmiAddr=%llx Channel=%d DIMM=%d Rank=%d Bank=%d Row=%d Column=%d\n",
+			 addr, pmiaddr, daddr->chan, daddr->dimm, daddr->rank, daddr->bank, daddr->row, daddr->col);
+
+	return 0;
+}
+
+static void pnd2_mce_output_error(struct mem_ctl_info *mci, const struct mce *m,
+				  struct dram_addr *daddr)
+{
+	enum hw_event_mc_err_type tp_event;
+	char *optype, msg[PND2_MSG_SIZE];
+	bool ripv = m->mcgstatus & MCG_STATUS_RIPV;
+	bool overflow = m->status & MCI_STATUS_OVER;
+	bool uc_err = m->status & MCI_STATUS_UC;
+	bool recov = m->status & MCI_STATUS_S;
+	u32 core_err_cnt = GET_BITFIELD(m->status, 38, 52);
+	u32 mscod = GET_BITFIELD(m->status, 16, 31);
+	u32 errcode = GET_BITFIELD(m->status, 0, 15);
+	u32 optypenum = GET_BITFIELD(m->status, 4, 6);
+	int rc;
+
+	tp_event = uc_err ? (ripv ? HW_EVENT_ERR_FATAL : HW_EVENT_ERR_UNCORRECTED) :
+						 HW_EVENT_ERR_CORRECTED;
+
+	/*
+	 * According with Table 15-9 of the Intel Architecture spec vol 3A,
+	 * memory errors should fit in this mask:
+	 *	000f 0000 1mmm cccc (binary)
+	 * where:
+	 *	f = Correction Report Filtering Bit. If 1, subsequent errors
+	 *	    won't be shown
+	 *	mmm = error type
+	 *	cccc = channel
+	 * If the mask doesn't match, report an error to the parsing logic
+	 */
+	if (!((errcode & 0xef80) == 0x80)) {
+		optype = "Can't parse: it is not a mem";
+	} else {
+		switch (optypenum) {
+		case 0:
+			optype = "generic undef request error";
+			break;
+		case 1:
+			optype = "memory read error";
+			break;
+		case 2:
+			optype = "memory write error";
+			break;
+		case 3:
+			optype = "addr/cmd error";
+			break;
+		case 4:
+			optype = "memory scrubbing error";
+			break;
+		default:
+			optype = "reserved";
+			break;
+		}
+	}
+
+	/* Only decode errors with an valid address (ADDRV) */
+	if (!(m->status & MCI_STATUS_ADDRV))
+		return;
+
+	rc = get_memory_error_data(mci, m->addr, daddr, msg);
+	if (rc)
+		goto address_error;
+
+	snprintf(msg, sizeof(msg),
+		 "%s%s err_code:%04x:%04x channel:%d DIMM:%d rank:%d row:%d bank:%d col:%d",
+		 overflow ? " OVERFLOW" : "", (uc_err && recov) ? " recoverable" : "", mscod,
+		 errcode, daddr->chan, daddr->dimm, daddr->rank, daddr->row, daddr->bank, daddr->col);
+
+	edac_dbg(0, "%s\n", msg);
+
+	/* Call the helper to output message */
+	edac_mc_handle_error(tp_event, mci, core_err_cnt, m->addr >> PAGE_SHIFT,
+						 m->addr & ~PAGE_MASK, 0, daddr->chan, 0, -1, optype, msg);
+
+	return;
+
+address_error:
+	edac_mc_handle_error(tp_event, mci, core_err_cnt, 0, 0, 0, -1, -1, -1, msg, "");
+}
+
+static void apl_get_dimm_config(struct mem_ctl_info *mci)
+{
+	struct pnd2_pvt	*pvt = mci->pvt_info;
+	struct dimm_info *dimm;
+	struct d_cr_drp0 *d;
+	u64	capacity;
+	int	i, g;
+
+	for (i = 0; i < APL_NUM_CHANNELS; i++) {
+		if (!(chan_mask & BIT(i)))
+			continue;
+
+		dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers, i, 0, 0);
+		if (!dimm) {
+			edac_dbg(0, "No allocated DIMM for channel %d\n", i);
+			continue;
+		}
+
+		d = &drp0[i];
+		for (g = 0; g < ARRAY_SIZE(dimms); g++)
+			if (dimms[g].addrdec == d->addrdec &&
+			    dimms[g].dden == d->dden &&
+			    dimms[g].dwid == d->dwid)
+				break;
+
+		if (g == ARRAY_SIZE(dimms)) {
+			edac_dbg(0, "Channel %d: unrecognized DIMM\n", i);
+			continue;
+		}
+
+		pvt->dimm_geom[i] = g;
+		capacity = (d->rken0 + d->rken1) * 8 * (1ul << dimms[g].rowbits) *
+				   (1ul << dimms[g].colbits);
+		edac_dbg(0, "Channel %d: %lld MByte DIMM\n", i, capacity >> (20 - 3));
+		dimm->nr_pages = MiB_TO_PAGES(capacity >> (20 - 3));
+		dimm->grain = 32;
+		dimm->dtype = (d->dwid == 0) ? DEV_X8 : DEV_X16;
+		dimm->mtype = MEM_DDR3;
+		dimm->edac_mode = EDAC_SECDED;
+		snprintf(dimm->label, sizeof(dimm->label), "Slice#%d_Chan#%d", i / 2, i % 2);
+	}
+}
+
+static const int dnv_dtypes[] = {
+	DEV_X8, DEV_X4, DEV_X16, DEV_UNKNOWN
+};
+
+static void dnv_get_dimm_config(struct mem_ctl_info *mci)
+{
+	int	i, j, ranks_of_dimm[DNV_MAX_DIMMS], banks, rowbits, colbits, memtype;
+	struct dimm_info *dimm;
+	struct d_cr_drp *d;
+	u64	capacity;
+
+	if (dsch.ddr4en) {
+		memtype = MEM_DDR4;
+		banks = 16;
+		colbits = 10;
+	} else {
+		memtype = MEM_DDR3;
+		banks = 8;
+	}
+
+	for (i = 0; i < DNV_NUM_CHANNELS; i++) {
+		if (dmap4[i].row14 == 31)
+			rowbits = 14;
+		else if (dmap4[i].row15 == 31)
+			rowbits = 15;
+		else if (dmap4[i].row16 == 31)
+			rowbits = 16;
+		else if (dmap4[i].row17 == 31)
+			rowbits = 17;
+		else
+			rowbits = 18;
+
+		if (memtype == MEM_DDR3) {
+			if (dmap1[i].ca11 != 0x3f)
+				colbits = 12;
+			else
+				colbits = 10;
+		}
+
+		d = &drp[i];
+		/* DIMM0 is present if rank0 and/or rank1 is enabled */
+		ranks_of_dimm[0] = d->rken0 + d->rken1;
+		/* DIMM1 is present if rank2 and/or rank3 is enabled */
+		ranks_of_dimm[1] = d->rken2 + d->rken3;
+
+		for (j = 0; j < DNV_MAX_DIMMS; j++) {
+			if (!ranks_of_dimm[j])
+				continue;
+
+			dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers, i, j, 0);
+			if (!dimm) {
+				edac_dbg(0, "No allocated DIMM for channel %d DIMM %d\n", i, j);
+				continue;
+			}
+
+			capacity = ranks_of_dimm[j] * banks * (1ul << rowbits) * (1ul << colbits);
+			edac_dbg(0, "Channel %d DIMM %d: %lld MByte DIMM\n", i, j, capacity >> (20 - 3));
+			dimm->nr_pages = MiB_TO_PAGES(capacity >> (20 - 3));
+			dimm->grain = 32;
+			dimm->dtype = dnv_dtypes[j ? d->dimmdwid0 : d->dimmdwid1];
+			dimm->mtype = memtype;
+			dimm->edac_mode = EDAC_SECDED;
+			snprintf(dimm->label, sizeof(dimm->label), "Chan#%d_DIMM#%d", i, j);
+		}
+	}
+}
+
+static int pnd2_register_mci(struct mem_ctl_info **ppmci)
+{
+	struct edac_mc_layer layers[2];
+	struct mem_ctl_info *mci;
+	struct pnd2_pvt *pvt;
+	int rc;
+
+	rc = ops->check_ecc();
+	if (rc < 0)
+		return rc;
+
+	/* Allocate a new MC control structure */
+	layers[0].type = EDAC_MC_LAYER_CHANNEL;
+	layers[0].size = ops->channels;
+	layers[0].is_virt_csrow = false;
+	layers[1].type = EDAC_MC_LAYER_SLOT;
+	layers[1].size = ops->dimms_per_channel;
+	layers[1].is_virt_csrow = true;
+	mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(*pvt));
+	if (!mci)
+		return -ENOMEM;
+
+	pvt = mci->pvt_info;
+	memset(pvt, 0, sizeof(*pvt));
+
+	mci->mod_name = "pnd2_edac.c";
+	mci->dev_name = ops->name;
+	mci->ctl_name = "Pondicherry2";
+
+	/* Get dimm basic config and the memory layout */
+	ops->get_dimm_config(mci);
+
+	if (edac_mc_add_mc(mci)) {
+		edac_dbg(0, "MC: failed edac_mc_add_mc()\n");
+		edac_mc_free(mci);
+		return -EINVAL;
+	}
+
+	*ppmci = mci;
+
+	return 0;
+}
+
+static void pnd2_unregister_mci(struct mem_ctl_info *mci)
+{
+	if (unlikely(!mci || !mci->pvt_info)) {
+		pnd2_printk(KERN_ERR, "Couldn't find mci handler\n");
+		return;
+	}
+
+	/* Remove MC sysfs nodes */
+	edac_mc_del_mc(NULL);
+	edac_dbg(1, "%s: free mci struct\n", mci->ctl_name);
+	edac_mc_free(mci);
+}
+
+/*
+ * Callback function registered with core kernel mce code.
+ * Called once for each logged error.
+ */
+static int pnd2_mce_check_error(struct notifier_block *nb, unsigned long val, void *data)
+{
+	struct mce *mce = (struct mce *)data;
+	struct mem_ctl_info *mci;
+	struct dram_addr daddr;
+	char *type;
+
+	if (get_edac_report_status() == EDAC_REPORTING_DISABLED)
+		return NOTIFY_DONE;
+
+	mci = pnd2_mci;
+	if (!mci)
+		return NOTIFY_DONE;
+
+	/*
+	 * Just let mcelog handle it if the error is
+	 * outside the memory controller. A memory error
+	 * is indicated by bit 7 = 1 and bits = 8-11,13-15 = 0.
+	 * bit 12 has an special meaning.
+	 */
+	if ((mce->status & 0xefff) >> 7 != 1)
+		return NOTIFY_DONE;
+
+	if (mce->mcgstatus & MCG_STATUS_MCIP)
+		type = "Exception";
+	else
+		type = "Event";
+
+	pnd2_mc_printk(mci, KERN_INFO, "HANDLING MCE MEMORY ERROR\n");
+	pnd2_mc_printk(mci, KERN_INFO, "CPU %u: Machine Check %s: %llx Bank %u: %llx\n",
+				   mce->extcpu, type, mce->mcgstatus, mce->bank, mce->status);
+	pnd2_mc_printk(mci, KERN_INFO, "TSC %llx ", mce->tsc);
+	pnd2_mc_printk(mci, KERN_INFO, "ADDR %llx ", mce->addr);
+	pnd2_mc_printk(mci, KERN_INFO, "MISC %llx ", mce->misc);
+	pnd2_mc_printk(mci, KERN_INFO, "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n",
+				   mce->cpuvendor, mce->cpuid, mce->time, mce->socketid, mce->apicid);
+
+	pnd2_mce_output_error(mci, mce, &daddr);
+
+	/* Advice mcelog that the error were handled */
+	return NOTIFY_STOP;
+}
+
+static struct notifier_block pnd2_mce_dec = {
+	.notifier_call	= pnd2_mce_check_error,
+};
+
+#ifdef CONFIG_EDAC_DEBUG
+/*
+ * Write an address to this file to exercise the address decode
+ * logic in this driver.
+ */
+static u64 pnd2_fake_addr;
+#define PND2_BLOB_SIZE 1024
+static char pnd2_result[PND2_BLOB_SIZE];
+static struct dentry *pnd2_test;
+static struct debugfs_blob_wrapper pnd2_blob = {
+	.data = pnd2_result,
+	.size = 0
+};
+
+static int debugfs_u64_set(void *data, u64 val)
+{
+	struct dram_addr daddr;
+	struct mce m;
+
+	*(u64 *)data = val;
+	m.mcgstatus = 0;
+	/* ADDRV + MemRd + Unknown channel */
+	m.status = MCI_STATUS_ADDRV + 0x9f;
+	m.addr = val;
+	pnd2_mce_output_error(pnd2_mci, &m, &daddr);
+	snprintf(pnd2_blob.data, PND2_BLOB_SIZE,
+			 "SysAddr=%llx Channel=%d DIMM=%d Rank=%d Bank=%d Row=%d Column=%d\n",
+			 m.addr, daddr.chan, daddr.dimm, daddr.rank, daddr.bank, daddr.row, daddr.col);
+	pnd2_blob.size = strlen(pnd2_blob.data);
+
+	return 0;
+}
+DEFINE_DEBUGFS_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n");
+
+static void setup_pnd2_debug(void)
+{
+	pnd2_test = edac_debugfs_create_dir("pnd2_test");
+	edac_debugfs_create_file("pnd2_debug_addr", 0200, pnd2_test,
+							 &pnd2_fake_addr, &fops_u64_wo);
+	debugfs_create_blob("pnd2_debug_results", 0400, pnd2_test, &pnd2_blob);
+}
+
+static void teardown_pnd2_debug(void)
+{
+	debugfs_remove_recursive(pnd2_test);
+}
+#endif
+
+static int pnd2_probe(void)
+{
+	int rc;
+
+	edac_dbg(2, "\n");
+	rc = get_registers();
+	if (rc)
+		return rc;
+
+	return pnd2_register_mci(&pnd2_mci);
+}
+
+static void pnd2_remove(void)
+{
+	edac_dbg(0, "\n");
+	pnd2_unregister_mci(pnd2_mci);
+}
+
+static struct dunit_ops apl_ops = {
+		.name			= "pnd2/apl",
+		.type			= APL,
+		.pmiaddr_shift		= LOG2_PMI_ADDR_GRANULARITY,
+		.pmiidx_shift		= 0,
+		.channels		= APL_NUM_CHANNELS,
+		.dimms_per_channel	= 1,
+		.rd_reg			= apl_rd_reg,
+		.get_registers		= apl_get_registers,
+		.check_ecc		= apl_check_ecc_active,
+		.mk_region		= apl_mk_region,
+		.get_dimm_config	= apl_get_dimm_config,
+		.pmi2mem		= apl_pmi2mem,
+};
+
+static struct dunit_ops dnv_ops = {
+		.name			= "pnd2/dnv",
+		.type			= DNV,
+		.pmiaddr_shift		= 0,
+		.pmiidx_shift		= 1,
+		.channels		= DNV_NUM_CHANNELS,
+		.dimms_per_channel	= 2,
+		.rd_reg			= dnv_rd_reg,
+		.get_registers		= dnv_get_registers,
+		.check_ecc		= dnv_check_ecc_active,
+		.mk_region		= dnv_mk_region,
+		.get_dimm_config	= dnv_get_dimm_config,
+		.pmi2mem		= dnv_pmi2mem,
+};
+
+static const struct x86_cpu_id pnd2_cpuids[] = {
+	{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GOLDMONT, 0, (kernel_ulong_t)&apl_ops },
+	{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_DENVERTON, 0, (kernel_ulong_t)&dnv_ops },
+	{ }
+};
+MODULE_DEVICE_TABLE(x86cpu, pnd2_cpuids);
+
+static int __init pnd2_init(void)
+{
+	const struct x86_cpu_id *id;
+	int rc;
+
+	edac_dbg(2, "\n");
+
+	id = x86_match_cpu(pnd2_cpuids);
+	if (!id)
+		return -ENODEV;
+
+	ops = (struct dunit_ops *)id->driver_data;
+
+	/* Ensure that the OPSTATE is set correctly for POLL or NMI */
+	opstate_init();
+
+	rc = pnd2_probe();
+	if (rc < 0) {
+		pnd2_printk(KERN_ERR, "Failed to register device with error %d.\n", rc);
+		return rc;
+	}
+
+	if (!pnd2_mci)
+		return -ENODEV;
+
+	mce_register_decode_chain(&pnd2_mce_dec);
+	setup_pnd2_debug();
+
+	return 0;
+}
+
+static void __exit pnd2_exit(void)
+{
+	edac_dbg(2, "\n");
+	teardown_pnd2_debug();
+	mce_unregister_decode_chain(&pnd2_mce_dec);
+	pnd2_remove();
+}
+
+module_init(pnd2_init);
+module_exit(pnd2_exit);
+
+module_param(edac_op_state, int, 0444);
+MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Tony Luck");
+MODULE_DESCRIPTION("MC Driver for Intel SoC using Pondicherry memory controller");
diff --git a/drivers/edac/pnd2_edac.h b/drivers/edac/pnd2_edac.h
new file mode 100644
index 00000000000000..61b6e79492bb11
--- /dev/null
+++ b/drivers/edac/pnd2_edac.h
@@ -0,0 +1,301 @@
+/*
+ * Register bitfield descriptions for Pondicherry2 memory controller.
+ *
+ * Copyright (c) 2016, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _PND2_REGS_H
+#define _PND2_REGS_H
+
+struct b_cr_touud_lo_pci {
+	u32	lock : 1;
+	u32	reserved_1 : 19;
+	u32	touud : 12;
+};
+
+#define b_cr_touud_lo_pci_port 0x4c
+#define b_cr_touud_lo_pci_offset 0xa8
+#define b_cr_touud_lo_pci_r_opcode 0x04
+
+struct b_cr_touud_hi_pci {
+	u32	touud : 7;
+	u32	reserved_0 : 25;
+};
+
+#define b_cr_touud_hi_pci_port 0x4c
+#define b_cr_touud_hi_pci_offset 0xac
+#define b_cr_touud_hi_pci_r_opcode 0x04
+
+struct b_cr_tolud_pci {
+	u32	lock : 1;
+	u32	reserved_0 : 19;
+	u32	tolud : 12;
+};
+
+#define b_cr_tolud_pci_port 0x4c
+#define b_cr_tolud_pci_offset 0xbc
+#define b_cr_tolud_pci_r_opcode 0x04
+
+struct b_cr_mchbar_lo_pci {
+	u32 enable : 1;
+	u32 pad_3_1 : 3;
+	u32 pad_14_4: 11;
+	u32 base: 17;
+};
+
+struct b_cr_mchbar_hi_pci {
+	u32 base : 7;
+	u32 pad_31_7 : 25;
+};
+
+/* Symmetric region */
+struct b_cr_slice_channel_hash {
+	u64	slice_1_disabled : 1;
+	u64	hvm_mode : 1;
+	u64	interleave_mode : 2;
+	u64	slice_0_mem_disabled : 1;
+	u64	reserved_0 : 1;
+	u64	slice_hash_mask : 14;
+	u64	reserved_1 : 11;
+	u64	enable_pmi_dual_data_mode : 1;
+	u64	ch_1_disabled : 1;
+	u64	reserved_2 : 1;
+	u64	sym_slice0_channel_enabled : 2;
+	u64	sym_slice1_channel_enabled : 2;
+	u64	ch_hash_mask : 14;
+	u64	reserved_3 : 11;
+	u64	lock : 1;
+};
+
+#define b_cr_slice_channel_hash_port 0x4c
+#define b_cr_slice_channel_hash_offset 0x4c58
+#define b_cr_slice_channel_hash_r_opcode 0x06
+
+struct b_cr_mot_out_base_mchbar {
+	u32	reserved_0 : 14;
+	u32	mot_out_base : 15;
+	u32	reserved_1 : 1;
+	u32	tr_en : 1;
+	u32	imr_en : 1;
+};
+
+#define b_cr_mot_out_base_mchbar_port 0x4c
+#define b_cr_mot_out_base_mchbar_offset 0x6af0
+#define b_cr_mot_out_base_mchbar_r_opcode 0x00
+
+struct b_cr_mot_out_mask_mchbar {
+	u32	reserved_0 : 14;
+	u32	mot_out_mask : 15;
+	u32	reserved_1 : 1;
+	u32	ia_iwb_en : 1;
+	u32	gt_iwb_en : 1;
+};
+
+#define b_cr_mot_out_mask_mchbar_port 0x4c
+#define b_cr_mot_out_mask_mchbar_offset 0x6af4
+#define b_cr_mot_out_mask_mchbar_r_opcode 0x00
+
+struct b_cr_asym_mem_region0_mchbar {
+	u32	pad : 4;
+	u32	slice0_asym_base : 11;
+	u32	pad_18_15 : 4;
+	u32	slice0_asym_limit : 11;
+	u32	slice0_asym_channel_select : 1;
+	u32	slice0_asym_enable : 1;
+};
+
+#define b_cr_asym_mem_region0_mchbar_port 0x4c
+#define b_cr_asym_mem_region0_mchbar_offset 0x6e40
+#define b_cr_asym_mem_region0_mchbar_r_opcode 0x00
+
+struct b_cr_asym_mem_region1_mchbar {
+	u32	pad : 4;
+	u32	slice1_asym_base : 11;
+	u32	pad_18_15 : 4;
+	u32	slice1_asym_limit : 11;
+	u32	slice1_asym_channel_select : 1;
+	u32	slice1_asym_enable : 1;
+};
+
+#define b_cr_asym_mem_region1_mchbar_port 0x4c
+#define b_cr_asym_mem_region1_mchbar_offset 0x6e44
+#define b_cr_asym_mem_region1_mchbar_r_opcode 0x00
+
+/* Some bit fields moved in above two structs on Denverton */
+struct b_cr_asym_mem_region_denverton {
+	u32	pad : 4;
+	u32	slice_asym_base : 8;
+	u32	pad_19_12 : 8;
+	u32	slice_asym_limit : 8;
+	u32	pad_28_30 : 3;
+	u32	slice_asym_enable : 1;
+};
+
+struct b_cr_asym_2way_mem_region_mchbar {
+	u32	pad : 2;
+	u32	asym_2way_intlv_mode : 2;
+	u32	asym_2way_base : 11;
+	u32	pad_16_15 : 2;
+	u32	asym_2way_limit : 11;
+	u32	pad_30_28 : 3;
+	u32	asym_2way_interleave_enable : 1;
+};
+
+#define b_cr_asym_2way_mem_region_mchbar_port 0x4c
+#define b_cr_asym_2way_mem_region_mchbar_offset 0x6e50
+#define b_cr_asym_2way_mem_region_mchbar_r_opcode 0x00
+
+/* Apollo Lake d-unit */
+
+struct d_cr_drp0 {
+	u32	rken0 : 1;
+	u32	rken1 : 1;
+	u32	ddmen : 1;
+	u32	rsvd3 : 1;
+	u32	dwid : 2;
+	u32	dden : 3;
+	u32	rsvd13_9 : 5;
+	u32	rsien : 1;
+	u32	bahen : 1;
+	u32	rsvd18_16 : 3;
+	u32	caswizzle : 2;
+	u32	eccen : 1;
+	u32	dramtype : 3;
+	u32	blmode : 3;
+	u32	addrdec : 2;
+	u32	dramdevice_pr : 2;
+};
+
+#define d_cr_drp0_offset 0x1400
+#define d_cr_drp0_r_opcode 0x00
+
+/* Denverton d-unit */
+
+struct d_cr_dsch {
+	u32	ch0en : 1;
+	u32	ch1en : 1;
+	u32	ddr4en : 1;
+	u32	coldwake : 1;
+	u32	newbypdis : 1;
+	u32	chan_width : 1;
+	u32	rsvd6_6 : 1;
+	u32	ooodis : 1;
+	u32	rsvd18_8 : 11;
+	u32	ic : 1;
+	u32	rsvd31_20 : 12;
+};
+
+#define d_cr_dsch_port 0x16
+#define d_cr_dsch_offset 0x0
+#define d_cr_dsch_r_opcode 0x0
+
+struct d_cr_ecc_ctrl {
+	u32	eccen : 1;
+	u32	rsvd31_1 : 31;
+};
+
+#define d_cr_ecc_ctrl_offset 0x180
+#define d_cr_ecc_ctrl_r_opcode 0x0
+
+struct d_cr_drp {
+	u32	rken0 : 1;
+	u32	rken1 : 1;
+	u32	rken2 : 1;
+	u32	rken3 : 1;
+	u32	dimmdwid0 : 2;
+	u32	dimmdden0 : 2;
+	u32	dimmdwid1 : 2;
+	u32	dimmdden1 : 2;
+	u32	rsvd15_12 : 4;
+	u32	dimmflip : 1;
+	u32	rsvd31_17 : 15;
+};
+
+#define d_cr_drp_offset 0x158
+#define d_cr_drp_r_opcode 0x0
+
+struct d_cr_dmap {
+	u32	ba0 : 5;
+	u32	ba1 : 5;
+	u32	bg0 : 5; /* if ddr3, ba2 = bg0 */
+	u32	bg1 : 5; /* if ddr3, ba3 = bg1 */
+	u32	rs0 : 5;
+	u32	rs1 : 5;
+	u32	rsvd : 2;
+};
+
+#define d_cr_dmap_offset 0x174
+#define d_cr_dmap_r_opcode 0x0
+
+struct d_cr_dmap1 {
+	u32	ca11 : 6;
+	u32	bxor : 1;
+	u32	rsvd : 25;
+};
+
+#define d_cr_dmap1_offset 0xb4
+#define d_cr_dmap1_r_opcode 0x0
+
+struct d_cr_dmap2 {
+	u32	row0 : 5;
+	u32	row1 : 5;
+	u32	row2 : 5;
+	u32	row3 : 5;
+	u32	row4 : 5;
+	u32	row5 : 5;
+	u32	rsvd : 2;
+};
+
+#define d_cr_dmap2_offset 0x148
+#define d_cr_dmap2_r_opcode 0x0
+
+struct d_cr_dmap3 {
+	u32	row6 : 5;
+	u32	row7 : 5;
+	u32	row8 : 5;
+	u32	row9 : 5;
+	u32	row10 : 5;
+	u32	row11 : 5;
+	u32	rsvd : 2;
+};
+
+#define d_cr_dmap3_offset 0x14c
+#define d_cr_dmap3_r_opcode 0x0
+
+struct d_cr_dmap4 {
+	u32	row12 : 5;
+	u32	row13 : 5;
+	u32	row14 : 5;
+	u32	row15 : 5;
+	u32	row16 : 5;
+	u32	row17 : 5;
+	u32	rsvd : 2;
+};
+
+#define d_cr_dmap4_offset 0x150
+#define d_cr_dmap4_r_opcode 0x0
+
+struct d_cr_dmap5 {
+	u32	ca3 : 4;
+	u32	ca4 : 4;
+	u32	ca5 : 4;
+	u32	ca6 : 4;
+	u32	ca7 : 4;
+	u32	ca8 : 4;
+	u32	ca9 : 4;
+	u32	rsvd : 4;
+};
+
+#define d_cr_dmap5_offset 0x154
+#define d_cr_dmap5_r_opcode 0x0
+
+#endif /* _PND2_REGS_H */

From 40ceda09c8c84694c2ca6b00bcc6dc71e8e62d96 Mon Sep 17 00:00:00 2001
From: yong mao <yong.mao@mediatek.com>
Date: Sat, 4 Mar 2017 15:10:03 +0800
Subject: [PATCH 1353/1911] mmc: mediatek: Fixed bug where clock frequency
 could be set wrong

This patch can fix two issues:

Issue 1:
In previous code, div may be overflow when setting clock frequency
as f_min. We can use DIV_ROUND_UP to fix this boundary related
issue.

Issue 2:
In previous code, we can not set the correct clock frequency when
div equals 0xff.

Signed-off-by: Yong Mao <yong.mao@mediatek.com>
Signed-off-by: Chaotian Jing <chaotian.jing@mediatek.com>
Reviewed-by: Daniel Kurtz <djkurtz@chromium.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/mtk-sd.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c
index 8e32580c12b520..b235d8da0602a8 100644
--- a/drivers/mmc/host/mtk-sd.c
+++ b/drivers/mmc/host/mtk-sd.c
@@ -580,7 +580,7 @@ static void msdc_set_mclk(struct msdc_host *host, unsigned char timing, u32 hz)
 		}
 	}
 	sdr_set_field(host->base + MSDC_CFG, MSDC_CFG_CKMOD | MSDC_CFG_CKDIV,
-			(mode << 8) | (div % 0xff));
+		      (mode << 8) | div);
 	sdr_set_bits(host->base + MSDC_CFG, MSDC_CFG_CKPDN);
 	while (!(readl(host->base + MSDC_CFG) & MSDC_CFG_CKSTB))
 		cpu_relax();
@@ -1559,7 +1559,7 @@ static int msdc_drv_probe(struct platform_device *pdev)
 	host->src_clk_freq = clk_get_rate(host->src_clk);
 	/* Set host parameters to mmc */
 	mmc->ops = &mt_msdc_ops;
-	mmc->f_min = host->src_clk_freq / (4 * 255);
+	mmc->f_min = DIV_ROUND_UP(host->src_clk_freq, 4 * 255);
 
 	mmc->caps |= MMC_CAP_ERASE | MMC_CAP_CMD23;
 	/* MMC core transfer sizes tunable parameters */

From e552a8389aa409e257b7dcba74f67f128f979ccc Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 16 Mar 2017 13:47:48 +0100
Subject: [PATCH 1354/1911] perf/core: Fix use-after-free in perf_release()

Dmitry reported syzcaller tripped a use-after-free in perf_release().

After much puzzlement Oleg spotted the below scenario:

  Task1                           Task2

  fork()
    perf_event_init_task()
    /* ... */
    goto bad_fork_$foo;
    /* ... */
    perf_event_free_task()
      mutex_lock(ctx->lock)
      perf_free_event(B)

                                  perf_event_release_kernel(A)
                                    mutex_lock(A->child_mutex)
                                    list_for_each_entry(child, ...) {
                                      /* child == B */
                                      ctx = B->ctx;
                                      get_ctx(ctx);
                                      mutex_unlock(A->child_mutex);

        mutex_lock(A->child_mutex)
        list_del_init(B->child_list)
        mutex_unlock(A->child_mutex)

        /* ... */

      mutex_unlock(ctx->lock);
      put_ctx() /* >0 */
    free_task();
                                      mutex_lock(ctx->lock);
                                      mutex_lock(A->child_mutex);
                                      /* ... */
                                      mutex_unlock(A->child_mutex);
                                      mutex_unlock(ctx->lock)
                                      put_ctx() /* 0 */
                                        ctx->task && !TOMBSTONE
                                          put_task_struct() /* UAF */

This patch closes the hole by making perf_event_free_task() destroy the
task <-> ctx relation such that perf_event_release_kernel() will no longer
observe the now dead task.

Spotted-by: Oleg Nesterov <oleg@redhat.com>
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: fweisbec@gmail.com
Cc: oleg@redhat.com
Cc: stable@vger.kernel.org
Fixes: c6e5b73242d2 ("perf: Synchronously clean up child events")
Link: http://lkml.kernel.org/r/20170314155949.GE32474@worktop
Link: http://lkml.kernel.org/r/20170316125823.140295131@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 1031bdf9f01251..4742909c56e653 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -10415,6 +10415,17 @@ void perf_event_free_task(struct task_struct *task)
 			continue;
 
 		mutex_lock(&ctx->mutex);
+		raw_spin_lock_irq(&ctx->lock);
+		/*
+		 * Destroy the task <-> ctx relation and mark the context dead.
+		 *
+		 * This is important because even though the task hasn't been
+		 * exposed yet the context has been (through child_list).
+		 */
+		RCU_INIT_POINTER(task->perf_event_ctxp[ctxn], NULL);
+		WRITE_ONCE(ctx->task, TASK_TOMBSTONE);
+		put_task_struct(task); /* cannot be last */
+		raw_spin_unlock_irq(&ctx->lock);
 again:
 		list_for_each_entry_safe(event, tmp, &ctx->pinned_groups,
 				group_entry)

From e7cc4865f0f31698ef2f7aac01a50e78968985b7 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 16 Mar 2017 13:47:49 +0100
Subject: [PATCH 1355/1911] perf/core: Fix event inheritance on fork()

While hunting for clues to a use-after-free, Oleg spotted that
perf_event_init_context() can loose an error value with the result
that fork() can succeed even though we did not fully inherit the perf
event context.

Spotted-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: oleg@redhat.com
Cc: stable@vger.kernel.org
Fixes: 889ff0150661 ("perf/core: Split context's event group list into pinned and non-pinned lists")
Link: http://lkml.kernel.org/r/20170316125823.190342547@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 4742909c56e653..fc7c9a85944d65 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -10679,7 +10679,7 @@ static int perf_event_init_context(struct task_struct *child, int ctxn)
 		ret = inherit_task_group(event, parent, parent_ctx,
 					 child, ctxn, &inherited_all);
 		if (ret)
-			break;
+			goto out_unlock;
 	}
 
 	/*
@@ -10695,7 +10695,7 @@ static int perf_event_init_context(struct task_struct *child, int ctxn)
 		ret = inherit_task_group(event, parent, parent_ctx,
 					 child, ctxn, &inherited_all);
 		if (ret)
-			break;
+			goto out_unlock;
 	}
 
 	raw_spin_lock_irqsave(&parent_ctx->lock, flags);
@@ -10723,6 +10723,7 @@ static int perf_event_init_context(struct task_struct *child, int ctxn)
 	}
 
 	raw_spin_unlock_irqrestore(&parent_ctx->lock, flags);
+out_unlock:
 	mutex_unlock(&parent_ctx->mutex);
 
 	perf_unpin_context(parent_ctx);

From 15121c789e001168decac6483d192bdb7ea29e74 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 16 Mar 2017 13:47:50 +0100
Subject: [PATCH 1356/1911] perf/core: Simplify perf_event_free_task()

We have ctx->event_list that contains all events; no need to
repeatedly iterate the group lists to find them all.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: fweisbec@gmail.com
Link: http://lkml.kernel.org/r/20170316125823.239678244@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index fc7c9a85944d65..5f21e5e09ba411 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -10426,21 +10426,11 @@ void perf_event_free_task(struct task_struct *task)
 		WRITE_ONCE(ctx->task, TASK_TOMBSTONE);
 		put_task_struct(task); /* cannot be last */
 		raw_spin_unlock_irq(&ctx->lock);
-again:
-		list_for_each_entry_safe(event, tmp, &ctx->pinned_groups,
-				group_entry)
-			perf_free_event(event, ctx);
 
-		list_for_each_entry_safe(event, tmp, &ctx->flexible_groups,
-				group_entry)
+		list_for_each_entry_safe(event, tmp, &ctx->event_list, event_entry)
 			perf_free_event(event, ctx);
 
-		if (!list_empty(&ctx->pinned_groups) ||
-				!list_empty(&ctx->flexible_groups))
-			goto again;
-
 		mutex_unlock(&ctx->mutex);
-
 		put_ctx(ctx);
 	}
 }

From d8a8cfc76919b6c830305266b23ba671623f37ff Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 16 Mar 2017 13:47:51 +0100
Subject: [PATCH 1357/1911] perf/core: Better explain the inherit magic

While going through the event inheritance code Oleg got confused.

Add some comments to better explain the silent dissapearance of
orphaned events.

So what happens is that at perf_event_release_kernel() time; when an
event looses its connection to userspace (and ceases to exist from the
user's perspective) we can still have an arbitrary amount of inherited
copies of the event. We want to synchronously find and remove all
these child events.

Since that requires a bit of lock juggling, there is the possibility
that concurrent clone()s will create new child events. Therefore we
first mark the parent event as DEAD, which marks all the extant child
events as orphaned.

We then avoid copying orphaned events; in order to avoid getting more
of them.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: fweisbec@gmail.com
Link: http://lkml.kernel.org/r/20170316125823.289567442@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 36 +++++++++++++++++++++++++++++++++---
 1 file changed, 33 insertions(+), 3 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 5f21e5e09ba411..7298e149b7323f 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4254,7 +4254,7 @@ int perf_event_release_kernel(struct perf_event *event)
 
 	raw_spin_lock_irq(&ctx->lock);
 	/*
-	 * Mark this even as STATE_DEAD, there is no external reference to it
+	 * Mark this event as STATE_DEAD, there is no external reference to it
 	 * anymore.
 	 *
 	 * Anybody acquiring event->child_mutex after the below loop _must_
@@ -10468,7 +10468,12 @@ const struct perf_event_attr *perf_event_attrs(struct perf_event *event)
 }
 
 /*
- * inherit a event from parent task to child task:
+ * Inherit a event from parent task to child task.
+ *
+ * Returns:
+ *  - valid pointer on success
+ *  - NULL for orphaned events
+ *  - IS_ERR() on error
  */
 static struct perf_event *
 inherit_event(struct perf_event *parent_event,
@@ -10562,6 +10567,16 @@ inherit_event(struct perf_event *parent_event,
 	return child_event;
 }
 
+/*
+ * Inherits an event group.
+ *
+ * This will quietly suppress orphaned events; !inherit_event() is not an error.
+ * This matches with perf_event_release_kernel() removing all child events.
+ *
+ * Returns:
+ *  - 0 on success
+ *  - <0 on error
+ */
 static int inherit_group(struct perf_event *parent_event,
 	      struct task_struct *parent,
 	      struct perf_event_context *parent_ctx,
@@ -10576,6 +10591,11 @@ static int inherit_group(struct perf_event *parent_event,
 				 child, NULL, child_ctx);
 	if (IS_ERR(leader))
 		return PTR_ERR(leader);
+	/*
+	 * @leader can be NULL here because of is_orphaned_event(). In this
+	 * case inherit_event() will create individual events, similar to what
+	 * perf_group_detach() would do anyway.
+	 */
 	list_for_each_entry(sub, &parent_event->sibling_list, group_entry) {
 		child_ctr = inherit_event(sub, parent, parent_ctx,
 					    child, leader, child_ctx);
@@ -10585,6 +10605,17 @@ static int inherit_group(struct perf_event *parent_event,
 	return 0;
 }
 
+/*
+ * Creates the child task context and tries to inherit the event-group.
+ *
+ * Clears @inherited_all on !attr.inherited or error. Note that we'll leave
+ * inherited_all set when we 'fail' to inherit an orphaned event; this is
+ * consistent with perf_event_release_kernel() removing all child events.
+ *
+ * Returns:
+ *  - 0 on success
+ *  - <0 on error
+ */
 static int
 inherit_task_group(struct perf_event *event, struct task_struct *parent,
 		   struct perf_event_context *parent_ctx,
@@ -10607,7 +10638,6 @@ inherit_task_group(struct perf_event *event, struct task_struct *parent,
 		 * First allocate and initialize a context for the
 		 * child.
 		 */
-
 		child_ctx = alloc_perf_context(parent_ctx->pmu, child);
 		if (!child_ctx)
 			return -ENOMEM;

From 66822d815ae61ecb2d9dba9031517e8a8476969d Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Wed, 15 Mar 2017 21:11:46 -0400
Subject: [PATCH 1358/1911] drm/radeon: reinstate oland workaround for sclk

Higher sclks seem to be unstable on some boards.

bug: https://bugs.freedesktop.org/show_bug.cgi?id=100222

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/radeon/si_dpm.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c
index 72e1588580a118..c7af9fdd20c729 100644
--- a/drivers/gpu/drm/radeon/si_dpm.c
+++ b/drivers/gpu/drm/radeon/si_dpm.c
@@ -2985,9 +2985,13 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev,
 			max_sclk = 75000;
 		}
 	} else if (rdev->family == CHIP_OLAND) {
-		if ((rdev->pdev->device == 0x6604) &&
-		    (rdev->pdev->subsystem_vendor == 0x1028) &&
-		    (rdev->pdev->subsystem_device == 0x066F)) {
+		if ((rdev->pdev->revision == 0xC7) ||
+		    (rdev->pdev->revision == 0x80) ||
+		    (rdev->pdev->revision == 0x81) ||
+		    (rdev->pdev->revision == 0x83) ||
+		    (rdev->pdev->revision == 0x87) ||
+		    (rdev->pdev->device == 0x6604) ||
+		    (rdev->pdev->device == 0x6605)) {
 			max_sclk = 75000;
 		}
 	}

From e11ddff68a7c455e63c4b46154a3e75c699a7b55 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Wed, 15 Mar 2017 21:13:25 -0400
Subject: [PATCH 1359/1911] drm/amdgpu: reinstate oland workaround for sclk

Higher sclks seem to be unstable on some boards.

bug: https://bugs.freedesktop.org/show_bug.cgi?id=100222

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/si_dpm.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c
index 33b504bafb8824..c5dec210d52999 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c
@@ -3465,9 +3465,13 @@ static void si_apply_state_adjust_rules(struct amdgpu_device *adev,
 			max_sclk = 75000;
 		}
 	} else if (adev->asic_type == CHIP_OLAND) {
-		if ((adev->pdev->device == 0x6604) &&
-		    (adev->pdev->subsystem_vendor == 0x1028) &&
-		    (adev->pdev->subsystem_device == 0x066F)) {
+		if ((adev->pdev->revision == 0xC7) ||
+		    (adev->pdev->revision == 0x80) ||
+		    (adev->pdev->revision == 0x81) ||
+		    (adev->pdev->revision == 0x83) ||
+		    (adev->pdev->revision == 0x87) ||
+		    (adev->pdev->device == 0x6604) ||
+		    (adev->pdev->device == 0x6605)) {
 			max_sclk = 75000;
 		}
 	}

From 60a970a6c58dedb4c6b2d90b75f8d6ad7c7b34dc Mon Sep 17 00:00:00 2001
From: Huang Rui <ray.huang@amd.com>
Date: Wed, 15 Mar 2017 10:13:32 +0800
Subject: [PATCH 1360/1911] drm/amdgpu: fix the clearing wb size
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The clearing wb size should be the one that it is assigned.

Signed-off-by: Huang Rui <ray.huang@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index a3a105ec99e2d7..de0cf3315484c9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -475,7 +475,7 @@ static int amdgpu_wb_init(struct amdgpu_device *adev)
 	int r;
 
 	if (adev->wb.wb_obj == NULL) {
-		r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * 4,
+		r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t),
 					    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
 					    &adev->wb.wb_obj, &adev->wb.gpu_addr,
 					    (void **)&adev->wb.wb);
@@ -488,7 +488,7 @@ static int amdgpu_wb_init(struct amdgpu_device *adev)
 		memset(&adev->wb.used, 0, sizeof(adev->wb.used));
 
 		/* clear wb memory */
-		memset((char *)adev->wb.wb, 0, AMDGPU_GPU_PAGE_SIZE);
+		memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t));
 	}
 
 	return 0;

From 9986943ef5d61a9bea3c86000d91d3b789f0060e Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Thu, 16 Mar 2017 04:22:09 +0000
Subject: [PATCH 1361/1911] ASoC: rcar: dma: remove unnecessary "volatile"

commit 2a3af642eb20("ASoC: rcar: clear DE bit only in PDMACHCR...")
added rsnd_dmapp_bset(), but it used copy-paste. Thus, it had
unnecessary "volatile", and had below warning on x86.
This patch fix it.

   sound/soc/sh/rcar/dma.c: In function 'rsnd_dmapp_bset':
>> sound/soc/sh/rcar/dma.c:463:21: warning: passing argument 1 of \
   'ioread32' discards 'volatile' qualifier from pointer target \
   type [-Wdiscarded-qualifiers]
     u32 val = ioread32(addr);
                        ^~~~
   In file included from arch/x86/include/asm/io.h:203:0,
                    from arch/x86/include/asm/realmode.h:5,
                    from arch/x86/include/asm/acpi.h:33,
                    from arch/x86/include/asm/fixmap.h:19,
                    from arch/x86/include/asm/apic.h:10,
                    from arch/x86/include/asm/smp.h:12,
                    from include/linux/smp.h:59,
                    from include/linux/topology.h:33,
                    from include/linux/gfp.h:8,
                    from include/linux/idr.h:16,
                    from include/linux/kernfs.h:14,
                    from include/linux/sysfs.h:15,
                    from include/linux/kobject.h:21,
                    from include/linux/of.h:21,
                    from include/linux/of_dma.h:16,
                    from sound/soc/sh/rcar/dma.c:12:
   include/asm-generic/iomap.h:31:21: note: expected 'void *' \
   but argument is of type 'volatile void *'
    extern unsigned int ioread32(void __iomem *);
                        ^~~~~~~~
Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/sh/rcar/dma.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/soc/sh/rcar/dma.c b/sound/soc/sh/rcar/dma.c
index c2e199b4fcf459..241cb3b08a0755 100644
--- a/sound/soc/sh/rcar/dma.c
+++ b/sound/soc/sh/rcar/dma.c
@@ -459,7 +459,7 @@ static void rsnd_dmapp_bset(struct rsnd_dma *dma, u32 data, u32 mask, u32 reg)
 	struct rsnd_mod *mod = rsnd_mod_get(dma);
 	struct rsnd_priv *priv = rsnd_mod_to_priv(mod);
 	struct rsnd_dma_ctrl *dmac = rsnd_priv_to_dmac(priv);
-	volatile void __iomem *addr = rsnd_dmapp_addr(dmac, dma, reg);
+	void __iomem *addr = rsnd_dmapp_addr(dmac, dma, reg);
 	u32 val = ioread32(addr);
 
 	val &= ~mask;

From 763811987d5088d0461164f80e9d16869847a040 Mon Sep 17 00:00:00 2001
From: Bard Liao <bardliao@realtek.com>
Date: Thu, 16 Mar 2017 13:58:40 +0800
Subject: [PATCH 1362/1911] ASoC: rt5665: fix define of RT5665_HP_DRIVER_5X

It is (0x3 << 2), not (0x2 << 2).

Signed-off-by: Bard Liao <bardliao@realtek.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/rt5665.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/soc/codecs/rt5665.h b/sound/soc/codecs/rt5665.h
index 12f7080a0d3c3f..a30f5e6d062882 100644
--- a/sound/soc/codecs/rt5665.h
+++ b/sound/soc/codecs/rt5665.h
@@ -1106,7 +1106,7 @@
 #define RT5665_HP_DRIVER_MASK			(0x3 << 2)
 #define RT5665_HP_DRIVER_1X			(0x0 << 2)
 #define RT5665_HP_DRIVER_3X			(0x1 << 2)
-#define RT5665_HP_DRIVER_5X			(0x2 << 2)
+#define RT5665_HP_DRIVER_5X			(0x3 << 2)
 #define RT5665_LDO1_DVO_MASK			(0x3)
 #define RT5665_LDO1_DVO_09			(0x0)
 #define RT5665_LDO1_DVO_10			(0x1)

From 8bcd37d8b21de13d068414c018c9599281294a01 Mon Sep 17 00:00:00 2001
From: "Winkler, Tomas" <tomas.winkler@intel.com>
Date: Thu, 9 Mar 2017 16:58:21 +0200
Subject: [PATCH 1363/1911] mmc: core: mmc_blk_rw_cmd_err - remove unused
 variable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix compilation warning:

drivers/mmc/core/block.c:1563:24: warning: variable ‘mq_rq’ set but not
used [-Wunused-but-set-variable]  struct mmc_queue_req *mq_rq;

Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/block.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
index 05afefcfb61114..ff3da960c47361 100644
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c
@@ -1560,11 +1560,8 @@ static bool mmc_blk_rw_cmd_err(struct mmc_blk_data *md, struct mmc_card *card,
 			       struct mmc_blk_request *brq, struct request *req,
 			       bool old_req_pending)
 {
-	struct mmc_queue_req *mq_rq;
 	bool req_pending;
 
-	mq_rq = container_of(brq, struct mmc_queue_req, brq);
-
 	/*
 	 * If this is an SD card and we're writing, we can first
 	 * mark the known good sectors as ok.

From 2ce0c7b65505e0d915e99389cced45b478dc935d Mon Sep 17 00:00:00 2001
From: Romain Izard <romain.izard.pro@gmail.com>
Date: Thu, 9 Mar 2017 16:18:20 +0100
Subject: [PATCH 1364/1911] mmc: sdhci-of-at91: Support external regulators

The SDHCI controller in the SAMA5D2 chip requires a valid voltage set
in the power control register, otherwise commands will fail with a
timeout error.

When using the regulator framework to specify the regulator used by the
mmc device, the voltage is not configured, and it is not possible to use
the connected device.

Implement a custom 'set_power' function for this specific hardware, that
configures the voltage in the register in all cases.

Signed-off-by: Romain Izard <romain.izard.pro@gmail.com>
Acked-by: Ludovic Desroches <ludovic.desroches@microchip.com>
Cc: stable@vger.kernel.org #4.9+
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-of-at91.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/drivers/mmc/host/sdhci-of-at91.c b/drivers/mmc/host/sdhci-of-at91.c
index 2f9ad213377a2c..7fd964256faa7d 100644
--- a/drivers/mmc/host/sdhci-of-at91.c
+++ b/drivers/mmc/host/sdhci-of-at91.c
@@ -85,11 +85,30 @@ static void sdhci_at91_set_clock(struct sdhci_host *host, unsigned int clock)
 	sdhci_writew(host, clk, SDHCI_CLOCK_CONTROL);
 }
 
+/*
+ * In this specific implementation of the SDHCI controller, the power register
+ * needs to have a valid voltage set even when the power supply is managed by
+ * an external regulator.
+ */
+static void sdhci_at91_set_power(struct sdhci_host *host, unsigned char mode,
+		     unsigned short vdd)
+{
+	if (!IS_ERR(host->mmc->supply.vmmc)) {
+		struct mmc_host *mmc = host->mmc;
+
+		spin_unlock_irq(&host->lock);
+		mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, vdd);
+		spin_lock_irq(&host->lock);
+	}
+	sdhci_set_power_noreg(host, mode, vdd);
+}
+
 static const struct sdhci_ops sdhci_at91_sama5d2_ops = {
 	.set_clock		= sdhci_at91_set_clock,
 	.set_bus_width		= sdhci_set_bus_width,
 	.reset			= sdhci_reset,
 	.set_uhs_signaling	= sdhci_set_uhs_signaling,
+	.set_power		= sdhci_at91_set_power,
 };
 
 static const struct sdhci_pltfm_data soc_data_sama5d2 = {

From 181302dc7239add8ab1449c23ecab193f52ee6ab Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Mon, 13 Mar 2017 13:40:22 +0100
Subject: [PATCH 1365/1911] mmc: ushc: fix NULL-deref at probe

Make sure to check the number of endpoints to avoid dereferencing a
NULL-pointer should a malicious device lack endpoints.

Fixes: 53f3a9e26ed5 ("mmc: USB SD Host Controller (USHC) driver")
Cc: stable <stable@vger.kernel.org>	# 2.6.37
Cc: David Vrabel <david.vrabel@csr.com>
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/ushc.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/mmc/host/ushc.c b/drivers/mmc/host/ushc.c
index d2c386f09d69f4..1d843357422e8a 100644
--- a/drivers/mmc/host/ushc.c
+++ b/drivers/mmc/host/ushc.c
@@ -426,6 +426,9 @@ static int ushc_probe(struct usb_interface *intf, const struct usb_device_id *id
 	struct ushc_data *ushc;
 	int ret;
 
+	if (intf->cur_altsetting->desc.bNumEndpoints < 1)
+		return -ENODEV;
+
 	mmc = mmc_alloc_host(sizeof(struct ushc_data), &intf->dev);
 	if (mmc == NULL)
 		return -ENOMEM;

From efd4b81abbe1ac753717f2f10cd3dab8bed6c103 Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Thu, 16 Mar 2017 09:46:14 -0600
Subject: [PATCH 1366/1911] blk-stat: fix blk_stat_sum() if all samples are
 batched

We need to flush the batch _before_ we check the number of samples,
otherwise we'll miss all of the batched samples.

Fixes: cf43e6b ("block: add scalable completion tracking of requests")
Signed-off-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-stat.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/block/blk-stat.c b/block/blk-stat.c
index 9b43efb8933fb9..186fcb981e9b1d 100644
--- a/block/blk-stat.c
+++ b/block/blk-stat.c
@@ -30,11 +30,11 @@ static void blk_stat_flush_batch(struct blk_rq_stat *stat)
 
 static void blk_stat_sum(struct blk_rq_stat *dst, struct blk_rq_stat *src)
 {
+	blk_stat_flush_batch(src);
+
 	if (!src->nr_samples)
 		return;
 
-	blk_stat_flush_batch(src);
-
 	dst->min = min(dst->min, src->min);
 	dst->max = max(dst->max, src->max);
 

From 29c8bbbd6e21daa0997d1c3ee886b897ee7ad652 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 16 Mar 2017 16:27:43 +0000
Subject: [PATCH 1367/1911] afs: Fix missing put_page()

In afs_writepages_region(), inside the loop where we find dirty pages to
deal with, one of the if-statements is missing a put_page().

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/write.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/afs/write.c b/fs/afs/write.c
index c83c1a0e851fb3..e919e64cd4e0df 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -513,6 +513,7 @@ static int afs_writepages_region(struct address_space *mapping,
 
 		if (PageWriteback(page) || !PageDirty(page)) {
 			unlock_page(page);
+			put_page(page);
 			continue;
 		}
 

From 5611ef280d814042825ee17688f5751266fc538b Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 16 Mar 2017 16:27:43 +0000
Subject: [PATCH 1368/1911] afs: Fix page overput in afs_fill_page()

afs_fill_page() loads the page it wants to fill into the afs_read request
without incrementing its refcount - but then calls afs_put_read() to clean
up afterwards, which then releases a ref on the page.

Fix this by getting a ref on the page before calling
afs_vnode_fetch_data().

This causes sync after a write to hang in afs_writepages_region() because
find_get_pages_tag() gets confused and doesn't return.

Fixes: 196ee9cd2d04 ("afs: Make afs_fs_fetch_data() take a list of pages")
Reported-by: Marc Dionne <marc.dionne@auristor.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Marc Dionne <marc.dionne@auristor.com>
---
 fs/afs/write.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/afs/write.c b/fs/afs/write.c
index e919e64cd4e0df..3ac52f6a96ff66 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -101,6 +101,7 @@ static int afs_fill_page(struct afs_vnode *vnode, struct key *key,
 	req->pos = pos;
 	req->nr_pages = 1;
 	req->pages[0] = page;
+	get_page(page);
 
 	i_size = i_size_read(&vnode->vfs_inode);
 	if (pos + PAGE_SIZE > i_size)

From 6186f0788b31f44affceeedc7b48eb10faea120d Mon Sep 17 00:00:00 2001
From: Marc Dionne <marc.dionne@auristor.com>
Date: Thu, 16 Mar 2017 16:27:43 +0000
Subject: [PATCH 1369/1911] afs: Populate group ID from vnode status

The group was hard coded to GLOBAL_ROOT_GID; use the group
ID that was received from the server.

Signed-off-by: Marc Dionne <marc.dionne@auristor.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 1e4897a048d2ee..299dbaeb2e2abd 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -70,7 +70,7 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
 
 	set_nlink(inode, vnode->status.nlink);
 	inode->i_uid		= vnode->status.owner;
-	inode->i_gid		= GLOBAL_ROOT_GID;
+	inode->i_gid            = vnode->status.group;
 	inode->i_size		= vnode->status.size;
 	inode->i_ctime.tv_sec	= vnode->status.mtime_server;
 	inode->i_ctime.tv_nsec	= 0;

From 627f46943ff90bcc32ddeb675d881c043c6fa2ae Mon Sep 17 00:00:00 2001
From: Marc Dionne <marc.dionne@auristor.com>
Date: Thu, 16 Mar 2017 16:27:44 +0000
Subject: [PATCH 1370/1911] afs: Adjust mode bits processing

Mode bits for an afs file should not be enforced in the usual
way.

For files, the absence of user bits can restrict file access
with respect to what is granted by the server.

These bits apply regardless of the owner or the current uid; the
rest of the mode bits (group, other) are ignored.

Signed-off-by: Marc Dionne <marc.dionne@auristor.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/security.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/fs/afs/security.c b/fs/afs/security.c
index 8d010422dc8962..bfa9d342838355 100644
--- a/fs/afs/security.c
+++ b/fs/afs/security.c
@@ -340,17 +340,22 @@ int afs_permission(struct inode *inode, int mask)
 	} else {
 		if (!(access & AFS_ACE_LOOKUP))
 			goto permission_denied;
+		if ((mask & MAY_EXEC) && !(inode->i_mode & S_IXUSR))
+			goto permission_denied;
 		if (mask & (MAY_EXEC | MAY_READ)) {
 			if (!(access & AFS_ACE_READ))
 				goto permission_denied;
+			if (!(inode->i_mode & S_IRUSR))
+				goto permission_denied;
 		} else if (mask & MAY_WRITE) {
 			if (!(access & AFS_ACE_WRITE))
 				goto permission_denied;
+			if (!(inode->i_mode & S_IWUSR))
+				goto permission_denied;
 		}
 	}
 
 	key_put(key);
-	ret = generic_permission(inode, mask);
 	_leave(" = %d", ret);
 	return ret;
 

From bcd89270d93b7edebb5de5e5e7dca1a77a33496e Mon Sep 17 00:00:00 2001
From: Marc Dionne <marc.dionne@auristor.com>
Date: Thu, 16 Mar 2017 16:27:44 +0000
Subject: [PATCH 1371/1911] afs: Deal with an empty callback array

Servers may send a callback array that is the same size as
the FID array, or an empty array.  If the callback count is
0, the code would attempt to read (fid_count * 12) bytes of
data, which would fail and result in an unmarshalling error.
This would lead to stale data for remotely modified files
or directories.

Store the callback array size in the internal afs_call
structure and use that to determine the amount of data to
read.

Signed-off-by: Marc Dionne <marc.dionne@auristor.com>
---
 fs/afs/cmservice.c | 11 +++++------
 fs/afs/internal.h  |  5 ++++-
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 2edbdcbf6432ad..3062cceb5c2aeb 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -187,7 +187,6 @@ static int afs_deliver_cb_callback(struct afs_call *call)
 	struct afs_callback *cb;
 	struct afs_server *server;
 	__be32 *bp;
-	u32 tmp;
 	int ret, loop;
 
 	_enter("{%u}", call->unmarshall);
@@ -249,9 +248,9 @@ static int afs_deliver_cb_callback(struct afs_call *call)
 		if (ret < 0)
 			return ret;
 
-		tmp = ntohl(call->tmp);
-		_debug("CB count: %u", tmp);
-		if (tmp != call->count && tmp != 0)
+		call->count2 = ntohl(call->tmp);
+		_debug("CB count: %u", call->count2);
+		if (call->count2 != call->count && call->count2 != 0)
 			return -EBADMSG;
 		call->offset = 0;
 		call->unmarshall++;
@@ -259,14 +258,14 @@ static int afs_deliver_cb_callback(struct afs_call *call)
 	case 4:
 		_debug("extract CB array");
 		ret = afs_extract_data(call, call->buffer,
-				       call->count * 3 * 4, false);
+				       call->count2 * 3 * 4, false);
 		if (ret < 0)
 			return ret;
 
 		_debug("unmarshall CB array");
 		cb = call->request;
 		bp = call->buffer;
-		for (loop = call->count; loop > 0; loop--, cb++) {
+		for (loop = call->count2; loop > 0; loop--, cb++) {
 			cb->version	= ntohl(*bp++);
 			cb->expiry	= ntohl(*bp++);
 			cb->type	= ntohl(*bp++);
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 5dfa56903a2d4b..8499870147effe 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -90,7 +90,10 @@ struct afs_call {
 	unsigned		request_size;	/* size of request data */
 	unsigned		reply_max;	/* maximum size of reply */
 	unsigned		first_offset;	/* offset into mapping[first] */
-	unsigned		last_to;	/* amount of mapping[last] */
+	union {
+		unsigned	last_to;	/* amount of mapping[last] */
+		unsigned	count2;		/* count used in unmarshalling */
+	};
 	unsigned char		unmarshall;	/* unmarshalling phase */
 	bool			incoming;	/* T if incoming call */
 	bool			send_pages;	/* T if data from mapping should be sent */

From 6db3ac3c4bc552837d232ec794559a2fae2815a0 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 16 Mar 2017 16:27:44 +0000
Subject: [PATCH 1372/1911] afs: Handle better the server returning excess or
 short data

When an AFS server is given an FS.FetchData{,64} request to read data from
a file, it is permitted by the protocol to return more or less than was
requested.  kafs currently relies on the latter behaviour in readpage{,s}
to handle a partial page at the end of the file (we just ask for a whole
page and clear space beyond the short read).

However, we don't handle all cases.  Add:

 (1) Handle excess data by discarding it rather than aborting.  Note that
     we use a common static buffer to discard into so that the decryption
     algorithm advances the PCBC state.

 (2) Handle a short read that affects more than just the last page.

Note that if a read comes up unexpectedly short of long, it's possible that
the server's copy of the file changed - in which case the data version
number will have been incremented and the callback will have been broken -
in which case all the pages currently attached to the inode will be zapped
anyway at some point.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/file.c     |  7 +++++--
 fs/afs/fsclient.c | 49 +++++++++++++++++++++++++++++++++--------------
 2 files changed, 40 insertions(+), 16 deletions(-)

diff --git a/fs/afs/file.c b/fs/afs/file.c
index ba7b71fba34bcc..a38e1c30d1106a 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -184,10 +184,13 @@ int afs_page_filler(void *data, struct page *page)
 		if (!req)
 			goto enomem;
 
+		/* We request a full page.  If the page is a partial one at the
+		 * end of the file, the server will return a short read and the
+		 * unmarshalling code will clear the unfilled space.
+		 */
 		atomic_set(&req->usage, 1);
 		req->pos = (loff_t)page->index << PAGE_SHIFT;
-		req->len = min_t(size_t, i_size_read(inode) - req->pos,
-				 PAGE_SIZE);
+		req->len = PAGE_SIZE;
 		req->nr_pages = 1;
 		req->pages[0] = page;
 		get_page(page);
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index ac8e766978dc44..bf8904a1a58fb6 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -16,6 +16,12 @@
 #include "internal.h"
 #include "afs_fs.h"
 
+/*
+ * We need somewhere to discard into in case the server helpfully returns more
+ * than we asked for in FS.FetchData{,64}.
+ */
+static u8 afs_discard_buffer[64];
+
 /*
  * decode an AFSFid block
  */
@@ -353,12 +359,6 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
 
 		req->actual_len |= ntohl(call->tmp);
 		_debug("DATA length: %llu", req->actual_len);
-		/* Check that the server didn't want to send us extra.  We
-		 * might want to just discard instead, but that requires
-		 * cooperation from AF_RXRPC.
-		 */
-		if (req->actual_len > req->len)
-			return -EBADMSG;
 
 		req->remain = req->actual_len;
 		call->offset = req->pos & (PAGE_SIZE - 1);
@@ -368,6 +368,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
 		call->unmarshall++;
 
 	begin_page:
+		ASSERTCMP(req->index, <, req->nr_pages);
 		if (req->remain > PAGE_SIZE - call->offset)
 			size = PAGE_SIZE - call->offset;
 		else
@@ -390,18 +391,37 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
 			if (req->page_done)
 				req->page_done(call, req);
 			if (req->remain > 0) {
-				req->index++;
 				call->offset = 0;
+				req->index++;
+				if (req->index >= req->nr_pages)
+					goto begin_discard;
 				goto begin_page;
 			}
 		}
+		goto no_more_data;
+
+		/* Discard any excess data the server gave us */
+	begin_discard:
+	case 4:
+		size = min_t(size_t, sizeof(afs_discard_buffer), req->remain);
+		call->count = size;
+		_debug("extract discard %u/%llu %zu/%u",
+		       req->remain, req->actual_len, call->offset, call->count);
+
+		call->offset = 0;
+		ret = afs_extract_data(call, afs_discard_buffer, call->count, true);
+		req->remain -= call->offset;
+		if (ret < 0)
+			return ret;
+		if (req->remain > 0)
+			goto begin_discard;
 
 	no_more_data:
 		call->offset = 0;
-		call->unmarshall++;
+		call->unmarshall = 5;
 
 		/* extract the metadata */
-	case 4:
+	case 5:
 		ret = afs_extract_data(call, call->buffer,
 				       (21 + 3 + 6) * 4, false);
 		if (ret < 0)
@@ -416,16 +436,17 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
 		call->offset = 0;
 		call->unmarshall++;
 
-	case 5:
+	case 6:
 		break;
 	}
 
-	if (call->count < PAGE_SIZE) {
-		buffer = kmap(req->pages[req->index]);
-		memset(buffer + call->count, 0, PAGE_SIZE - call->count);
-		kunmap(req->pages[req->index]);
+	for (; req->index < req->nr_pages; req->index++) {
+		if (call->count < PAGE_SIZE)
+			zero_user_segment(req->pages[req->index],
+					  call->count, PAGE_SIZE);
 		if (req->page_done)
 			req->page_done(call, req);
+		call->count = 0;
 	}
 
 	_leave(" = 0 [done]");

From 3448e6521755862446aed28e29abf12565d8844e Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 16 Mar 2017 16:27:44 +0000
Subject: [PATCH 1373/1911] afs: Kill struct afs_read::pg_offset

Kill struct afs_read::pg_offset as nothing uses it.  It's unnecessary as pos
can be masked off.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/internal.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 8499870147effe..7784a8bc375c74 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -135,7 +135,6 @@ struct afs_read {
 	atomic_t		usage;
 	unsigned int		remain;		/* Amount remaining */
 	unsigned int		index;		/* Which page we're reading into */
-	unsigned int		pg_offset;	/* Offset in page we're at */
 	unsigned int		nr_pages;
 	void (*page_done)(struct afs_call *, struct afs_read *);
 	struct page		*pages[];

From e8e581a88c5f5fc7cf1f636d122b77fbcfc8c2f6 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 16 Mar 2017 16:27:44 +0000
Subject: [PATCH 1374/1911] afs: Handle a short write to an AFS page

Handle the situation where afs_write_begin() is told to expect that a
full-page write will be made, but this doesn't happen (EFAULT, CTRL-C,
etc.), and so afs_write_end() sees a partial write took place.  Currently,
no attempt is to deal with the discrepency.

Fix this by loading the gap from the server.

Reported-by: Al Viro <viro@ZenIV.linux.org.uk>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/fsclient.c |  4 +++-
 fs/afs/internal.h |  2 +-
 fs/afs/write.c    | 28 +++++++++++++++++++---------
 3 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index bf8904a1a58fb6..6f917dd1238c06 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -393,8 +393,10 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
 			if (req->remain > 0) {
 				call->offset = 0;
 				req->index++;
-				if (req->index >= req->nr_pages)
+				if (req->index >= req->nr_pages) {
+					call->unmarshall = 4;
 					goto begin_discard;
+				}
 				goto begin_page;
 			}
 		}
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 7784a8bc375c74..dc2cb486e12798 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -130,7 +130,7 @@ struct afs_call_type {
  */
 struct afs_read {
 	loff_t			pos;		/* Where to start reading */
-	loff_t			len;		/* How much to read */
+	loff_t			len;		/* How much we're asking for */
 	loff_t			actual_len;	/* How much we're actually getting */
 	atomic_t		usage;
 	unsigned int		remain;		/* Amount remaining */
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 3ac52f6a96ff66..ea66890fc18887 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -84,10 +84,9 @@ void afs_put_writeback(struct afs_writeback *wb)
  * partly or wholly fill a page that's under preparation for writing
  */
 static int afs_fill_page(struct afs_vnode *vnode, struct key *key,
-			 loff_t pos, struct page *page)
+			 loff_t pos, unsigned int len, struct page *page)
 {
 	struct afs_read *req;
-	loff_t i_size;
 	int ret;
 
 	_enter(",,%llu", (unsigned long long)pos);
@@ -99,16 +98,11 @@ static int afs_fill_page(struct afs_vnode *vnode, struct key *key,
 
 	atomic_set(&req->usage, 1);
 	req->pos = pos;
+	req->len = len;
 	req->nr_pages = 1;
 	req->pages[0] = page;
 	get_page(page);
 
-	i_size = i_size_read(&vnode->vfs_inode);
-	if (pos + PAGE_SIZE > i_size)
-		req->len = i_size - pos;
-	else
-		req->len = PAGE_SIZE;
-
 	ret = afs_vnode_fetch_data(vnode, key, req);
 	afs_put_read(req);
 	if (ret < 0) {
@@ -164,7 +158,7 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
 	/* page won't leak in error case: it eventually gets cleaned off LRU */
 
 	if (!PageUptodate(page) && len != PAGE_SIZE) {
-		ret = afs_fill_page(vnode, key, index << PAGE_SHIFT, page);
+		ret = afs_fill_page(vnode, key, pos & PAGE_MASK, PAGE_SIZE, page);
 		if (ret < 0) {
 			kfree(candidate);
 			_leave(" = %d [prep]", ret);
@@ -258,7 +252,9 @@ int afs_write_end(struct file *file, struct address_space *mapping,
 		  struct page *page, void *fsdata)
 {
 	struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
+	struct key *key = file->private_data;
 	loff_t i_size, maybe_i_size;
+	int ret;
 
 	_enter("{%x:%u},{%lx}",
 	       vnode->fid.vid, vnode->fid.vnode, page->index);
@@ -274,6 +270,20 @@ int afs_write_end(struct file *file, struct address_space *mapping,
 		spin_unlock(&vnode->writeback_lock);
 	}
 
+	if (!PageUptodate(page)) {
+		if (copied < len) {
+			/* Try and load any missing data from the server.  The
+			 * unmarshalling routine will take care of clearing any
+			 * bits that are beyond the EOF.
+			 */
+			ret = afs_fill_page(vnode, key, pos + copied,
+					    len - copied, page);
+			if (ret < 0)
+				return ret;
+		}
+		SetPageUptodate(page);
+	}
+
 	set_page_dirty(page);
 	if (PageDirty(page))
 		_debug("dirtied");

From 58fed94dfb17e89556b5705f20f90e5b2971b6a1 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 16 Mar 2017 16:27:45 +0000
Subject: [PATCH 1375/1911] afs: Flush outstanding writes when an fd is closed

Flush outstanding writes in afs when an fd is closed.  This is what NFS and
CIFS do.

Reported-by: Marc Dionne <marc.c.dionne@gmail.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/file.c     |  1 +
 fs/afs/internal.h |  1 +
 fs/afs/write.c    | 14 ++++++++++++++
 3 files changed, 16 insertions(+)

diff --git a/fs/afs/file.c b/fs/afs/file.c
index a38e1c30d1106a..b5829443ff6933 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -30,6 +30,7 @@ static int afs_readpages(struct file *filp, struct address_space *mapping,
 
 const struct file_operations afs_file_operations = {
 	.open		= afs_open,
+	.flush		= afs_flush,
 	.release	= afs_release,
 	.llseek		= generic_file_llseek,
 	.read_iter	= generic_file_read_iter,
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index dc2cb486e12798..af1d91ec7f2c62 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -720,6 +720,7 @@ extern int afs_writepages(struct address_space *, struct writeback_control *);
 extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *);
 extern ssize_t afs_file_write(struct kiocb *, struct iov_iter *);
 extern int afs_writeback_all(struct afs_vnode *);
+extern int afs_flush(struct file *, fl_owner_t);
 extern int afs_fsync(struct file *, loff_t, loff_t, int);
 
 
diff --git a/fs/afs/write.c b/fs/afs/write.c
index ea66890fc18887..f1450ea0940685 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -757,6 +757,20 @@ int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 	return ret;
 }
 
+/*
+ * Flush out all outstanding writes on a file opened for writing when it is
+ * closed.
+ */
+int afs_flush(struct file *file, fl_owner_t id)
+{
+	_enter("");
+
+	if ((file->f_mode & FMODE_WRITE) == 0)
+		return 0;
+
+	return vfs_fsync(file, 0);
+}
+
 /*
  * notification that a previously read-only page is about to become writable
  * - if it returns an error, the caller will deliver a bus error signal

From 944c74f472f926785b1948efa0e73e2f1b3b539b Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 16 Mar 2017 16:27:45 +0000
Subject: [PATCH 1376/1911] afs: Distinguish mountpoints from symlinks by file
 mode alone

In AFS, mountpoints appear as symlinks with mode 0644 and normal symlinks
have mode 0777, so use this to distinguish them rather than reading the
content and parsing it.  In the case of a mountpoint, the symlink body is a
formatted string indicating the location of the target volume.

Note that with this, kAFS no longer 'pre-fetches' the contents of symlinks,
so afs_readpage() may fail with an access-denial because when the VFS calls
d_automount(), it wraps the call in an credentials override that sets the
initial creds - thereby preventing access to the caller's keyrings and the
authentication keys held therein.

To this end, a patch reverting that change to the VFS is required also.

Reported-by: Jeffrey Altman <jaltman@auristor.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/inode.c    | 29 +++++++++++++-------------
 fs/afs/internal.h |  1 -
 fs/afs/mntpt.c    | 53 -----------------------------------------------
 3 files changed, 15 insertions(+), 68 deletions(-)

diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 299dbaeb2e2abd..ade6ec3873cf45 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -54,8 +54,21 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
 		inode->i_fop	= &afs_dir_file_operations;
 		break;
 	case AFS_FTYPE_SYMLINK:
-		inode->i_mode	= S_IFLNK | vnode->status.mode;
-		inode->i_op	= &page_symlink_inode_operations;
+		/* Symlinks with a mode of 0644 are actually mountpoints. */
+		if ((vnode->status.mode & 0777) == 0644) {
+			inode->i_flags |= S_AUTOMOUNT;
+
+			spin_lock(&vnode->lock);
+			set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags);
+			spin_unlock(&vnode->lock);
+
+			inode->i_mode	= S_IFDIR | 0555;
+			inode->i_op	= &afs_mntpt_inode_operations;
+			inode->i_fop	= &afs_mntpt_file_operations;
+		} else {
+			inode->i_mode	= S_IFLNK | vnode->status.mode;
+			inode->i_op	= &page_symlink_inode_operations;
+		}
 		inode_nohighmem(inode);
 		break;
 	default:
@@ -79,18 +92,6 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
 	inode->i_generation	= vnode->fid.unique;
 	inode->i_version	= vnode->status.data_version;
 	inode->i_mapping->a_ops	= &afs_fs_aops;
-
-	/* check to see whether a symbolic link is really a mountpoint */
-	if (vnode->status.type == AFS_FTYPE_SYMLINK) {
-		afs_mntpt_check_symlink(vnode, key);
-
-		if (test_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags)) {
-			inode->i_mode	= S_IFDIR | vnode->status.mode;
-			inode->i_op	= &afs_mntpt_inode_operations;
-			inode->i_fop	= &afs_mntpt_file_operations;
-		}
-	}
-
 	return 0;
 }
 
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index af1d91ec7f2c62..39de154fb42ed8 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -559,7 +559,6 @@ extern const struct inode_operations afs_autocell_inode_operations;
 extern const struct file_operations afs_mntpt_file_operations;
 
 extern struct vfsmount *afs_d_automount(struct path *);
-extern int afs_mntpt_check_symlink(struct afs_vnode *, struct key *);
 extern void afs_mntpt_kill_timer(void);
 
 /*
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index d4fb0afc0097d4..bd3b65cde282a2 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -46,59 +46,6 @@ static DECLARE_DELAYED_WORK(afs_mntpt_expiry_timer, afs_mntpt_expiry_timed_out);
 
 static unsigned long afs_mntpt_expiry_timeout = 10 * 60;
 
-/*
- * check a symbolic link to see whether it actually encodes a mountpoint
- * - sets the AFS_VNODE_MOUNTPOINT flag on the vnode appropriately
- */
-int afs_mntpt_check_symlink(struct afs_vnode *vnode, struct key *key)
-{
-	struct page *page;
-	size_t size;
-	char *buf;
-	int ret;
-
-	_enter("{%x:%u,%u}",
-	       vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique);
-
-	/* read the contents of the symlink into the pagecache */
-	page = read_cache_page(AFS_VNODE_TO_I(vnode)->i_mapping, 0,
-			       afs_page_filler, key);
-	if (IS_ERR(page)) {
-		ret = PTR_ERR(page);
-		goto out;
-	}
-
-	ret = -EIO;
-	if (PageError(page))
-		goto out_free;
-
-	buf = kmap(page);
-
-	/* examine the symlink's contents */
-	size = vnode->status.size;
-	_debug("symlink to %*.*s", (int) size, (int) size, buf);
-
-	if (size > 2 &&
-	    (buf[0] == '%' || buf[0] == '#') &&
-	    buf[size - 1] == '.'
-	    ) {
-		_debug("symlink is a mountpoint");
-		spin_lock(&vnode->lock);
-		set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags);
-		vnode->vfs_inode.i_flags |= S_AUTOMOUNT;
-		spin_unlock(&vnode->lock);
-	}
-
-	ret = 0;
-
-	kunmap(page);
-out_free:
-	put_page(page);
-out:
-	_leave(" = %d", ret);
-	return ret;
-}
-
 /*
  * no valid lookup procedure on this sort of dir
  */

From 1d7e4ebf291d3103466006926329e39aa355944f Mon Sep 17 00:00:00 2001
From: Andreea-Cristina Bernat <bernat.ada@gmail.com>
Date: Thu, 16 Mar 2017 16:27:45 +0000
Subject: [PATCH 1377/1911] afs: inode: Replace rcu_assign_pointer() with
 RCU_INIT_POINTER()

The use of "rcu_assign_pointer()" is NULLing out the pointer.
According to RCU_INIT_POINTER()'s block comment:
"1.   This use of RCU_INIT_POINTER() is NULLing out the pointer"
it is better to use it instead of rcu_assign_pointer() because it has a
smaller overhead.

The following Coccinelle semantic patch was used:
@@
@@

- rcu_assign_pointer
+ RCU_INIT_POINTER
  (..., NULL)

Signed-off-by: Andreea-Cristina Bernat <bernat.ada@gmail.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index ade6ec3873cf45..e083e086b7ca8b 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -445,7 +445,7 @@ void afs_evict_inode(struct inode *inode)
 
 	mutex_lock(&vnode->permits_lock);
 	permits = vnode->permits;
-	rcu_assign_pointer(vnode->permits, NULL);
+	RCU_INIT_POINTER(vnode->permits, NULL);
 	mutex_unlock(&vnode->permits_lock);
 	if (permits)
 		call_rcu(&permits->rcu, afs_zap_permits);

From df8a09d1b8f9e693ec3f6b7e0162fc817f2cf0db Mon Sep 17 00:00:00 2001
From: Andreea-Cristina Bernat <bernat.ada@gmail.com>
Date: Thu, 16 Mar 2017 16:27:45 +0000
Subject: [PATCH 1378/1911] afs: security: Replace rcu_assign_pointer() with
 RCU_INIT_POINTER()

The use of "rcu_assign_pointer()" is NULLing out the pointer.
According to RCU_INIT_POINTER()'s block comment:
"1.   This use of RCU_INIT_POINTER() is NULLing out the pointer"
it is better to use it instead of rcu_assign_pointer() because it has a
smaller overhead.

The following Coccinelle semantic patch was used:
@@
@@

- rcu_assign_pointer
+ RCU_INIT_POINTER
  (..., NULL)

Signed-off-by: Andreea-Cristina Bernat <bernat.ada@gmail.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/security.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/afs/security.c b/fs/afs/security.c
index bfa9d342838355..ecb86a6701801c 100644
--- a/fs/afs/security.c
+++ b/fs/afs/security.c
@@ -114,7 +114,7 @@ void afs_clear_permits(struct afs_vnode *vnode)
 
 	mutex_lock(&vnode->permits_lock);
 	permits = vnode->permits;
-	rcu_assign_pointer(vnode->permits, NULL);
+	RCU_INIT_POINTER(vnode->permits, NULL);
 	mutex_unlock(&vnode->permits_lock);
 
 	if (permits)

From 8a79790bf0b7da216627ffb85f52cfb4adbf1e4e Mon Sep 17 00:00:00 2001
From: Tina Ruchandani <ruchandani.tina@gmail.com>
Date: Thu, 16 Mar 2017 16:27:46 +0000
Subject: [PATCH 1379/1911] afs: Migrate vlocation fields to 64-bit

get_seconds() returns real wall-clock seconds. On 32-bit systems
this value will overflow in year 2038 and beyond. This patch changes
afs's vlocation record to use ktime_get_real_seconds() instead, for the
fields time_of_death and update_at.

Signed-off-by: Tina Ruchandani <ruchandani.tina@gmail.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/callback.c  |  7 ++++---
 fs/afs/internal.h  |  7 ++++---
 fs/afs/server.c    |  6 +++---
 fs/afs/vlocation.c | 16 +++++++++-------
 4 files changed, 20 insertions(+), 16 deletions(-)

diff --git a/fs/afs/callback.c b/fs/afs/callback.c
index b29447e03ede0d..25d404d22caebc 100644
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -362,7 +362,7 @@ static void afs_callback_updater(struct work_struct *work)
 {
 	struct afs_server *server;
 	struct afs_vnode *vnode, *xvnode;
-	time_t now;
+	time64_t now;
 	long timeout;
 	int ret;
 
@@ -370,7 +370,7 @@ static void afs_callback_updater(struct work_struct *work)
 
 	_enter("");
 
-	now = get_seconds();
+	now = ktime_get_real_seconds();
 
 	/* find the first vnode to update */
 	spin_lock(&server->cb_lock);
@@ -424,7 +424,8 @@ static void afs_callback_updater(struct work_struct *work)
 
 	/* and then reschedule */
 	_debug("reschedule");
-	vnode->update_at = get_seconds() + afs_vnode_update_timeout;
+	vnode->update_at = ktime_get_real_seconds() +
+			afs_vnode_update_timeout;
 
 	spin_lock(&server->cb_lock);
 
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 39de154fb42ed8..97a16ce200be8b 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -11,6 +11,7 @@
 
 #include <linux/compiler.h>
 #include <linux/kernel.h>
+#include <linux/ktime.h>
 #include <linux/fs.h>
 #include <linux/pagemap.h>
 #include <linux/rxrpc.h>
@@ -249,7 +250,7 @@ struct afs_cache_vhash {
  */
 struct afs_vlocation {
 	atomic_t		usage;
-	time_t			time_of_death;	/* time at which put reduced usage to 0 */
+	time64_t		time_of_death;	/* time at which put reduced usage to 0 */
 	struct list_head	link;		/* link in cell volume location list */
 	struct list_head	grave;		/* link in master graveyard list */
 	struct list_head	update;		/* link in master update list */
@@ -260,7 +261,7 @@ struct afs_vlocation {
 	struct afs_cache_vlocation vldb;	/* volume information DB record */
 	struct afs_volume	*vols[3];	/* volume access record pointer (index by type) */
 	wait_queue_head_t	waitq;		/* status change waitqueue */
-	time_t			update_at;	/* time at which record should be updated */
+	time64_t		update_at;	/* time at which record should be updated */
 	spinlock_t		lock;		/* access lock */
 	afs_vlocation_state_t	state;		/* volume location state */
 	unsigned short		upd_rej_cnt;	/* ENOMEDIUM count during update */
@@ -273,7 +274,7 @@ struct afs_vlocation {
  */
 struct afs_server {
 	atomic_t		usage;
-	time_t			time_of_death;	/* time at which put reduced usage to 0 */
+	time64_t		time_of_death;	/* time at which put reduced usage to 0 */
 	struct in_addr		addr;		/* server address */
 	struct afs_cell		*cell;		/* cell in which server resides */
 	struct list_head	link;		/* link in cell's server list */
diff --git a/fs/afs/server.c b/fs/afs/server.c
index d4066ab7dd5505..c001b1f2455fbf 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -242,7 +242,7 @@ void afs_put_server(struct afs_server *server)
 	spin_lock(&afs_server_graveyard_lock);
 	if (atomic_read(&server->usage) == 0) {
 		list_move_tail(&server->grave, &afs_server_graveyard);
-		server->time_of_death = get_seconds();
+		server->time_of_death = ktime_get_real_seconds();
 		queue_delayed_work(afs_wq, &afs_server_reaper,
 				   afs_server_timeout * HZ);
 	}
@@ -277,9 +277,9 @@ static void afs_reap_server(struct work_struct *work)
 	LIST_HEAD(corpses);
 	struct afs_server *server;
 	unsigned long delay, expiry;
-	time_t now;
+	time64_t now;
 
-	now = get_seconds();
+	now = ktime_get_real_seconds();
 	spin_lock(&afs_server_graveyard_lock);
 
 	while (!list_empty(&afs_server_graveyard)) {
diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c
index d7d8dd8c0b3187..37b7c3b342a6b5 100644
--- a/fs/afs/vlocation.c
+++ b/fs/afs/vlocation.c
@@ -340,7 +340,8 @@ static void afs_vlocation_queue_for_updates(struct afs_vlocation *vl)
 	struct afs_vlocation *xvl;
 
 	/* wait at least 10 minutes before updating... */
-	vl->update_at = get_seconds() + afs_vlocation_update_timeout;
+	vl->update_at = ktime_get_real_seconds() +
+			afs_vlocation_update_timeout;
 
 	spin_lock(&afs_vlocation_updates_lock);
 
@@ -506,7 +507,7 @@ void afs_put_vlocation(struct afs_vlocation *vl)
 	if (atomic_read(&vl->usage) == 0) {
 		_debug("buried");
 		list_move_tail(&vl->grave, &afs_vlocation_graveyard);
-		vl->time_of_death = get_seconds();
+		vl->time_of_death = ktime_get_real_seconds();
 		queue_delayed_work(afs_wq, &afs_vlocation_reap,
 				   afs_vlocation_timeout * HZ);
 
@@ -543,11 +544,11 @@ static void afs_vlocation_reaper(struct work_struct *work)
 	LIST_HEAD(corpses);
 	struct afs_vlocation *vl;
 	unsigned long delay, expiry;
-	time_t now;
+	time64_t now;
 
 	_enter("");
 
-	now = get_seconds();
+	now = ktime_get_real_seconds();
 	spin_lock(&afs_vlocation_graveyard_lock);
 
 	while (!list_empty(&afs_vlocation_graveyard)) {
@@ -622,13 +623,13 @@ static void afs_vlocation_updater(struct work_struct *work)
 {
 	struct afs_cache_vlocation vldb;
 	struct afs_vlocation *vl, *xvl;
-	time_t now;
+	time64_t now;
 	long timeout;
 	int ret;
 
 	_enter("");
 
-	now = get_seconds();
+	now = ktime_get_real_seconds();
 
 	/* find a record to update */
 	spin_lock(&afs_vlocation_updates_lock);
@@ -684,7 +685,8 @@ static void afs_vlocation_updater(struct work_struct *work)
 
 	/* and then reschedule */
 	_debug("reschedule");
-	vl->update_at = get_seconds() + afs_vlocation_update_timeout;
+	vl->update_at = ktime_get_real_seconds() +
+			afs_vlocation_update_timeout;
 
 	spin_lock(&afs_vlocation_updates_lock);
 

From 56e714312e7dbd6bb83b2f78d3ec19a404c7649f Mon Sep 17 00:00:00 2001
From: Tina Ruchandani <ruchandani.tina@gmail.com>
Date: Thu, 16 Mar 2017 16:27:46 +0000
Subject: [PATCH 1380/1911] afs: Prevent callback expiry timer overflow

get_seconds() returns real wall-clock seconds. On 32-bit systems
this value will overflow in year 2038 and beyond. This patch changes
afs_vnode record to use ktime_get_real_seconds() instead, for the
fields cb_expires and cb_expires_at.

Signed-off-by: Tina Ruchandani <ruchandani.tina@gmail.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/fsclient.c | 2 +-
 fs/afs/inode.c    | 7 ++++---
 fs/afs/internal.h | 4 ++--
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 6f917dd1238c06..c05452a0939870 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -145,7 +145,7 @@ static void xdr_decode_AFSCallBack(const __be32 **_bp, struct afs_vnode *vnode)
 	vnode->cb_version	= ntohl(*bp++);
 	vnode->cb_expiry	= ntohl(*bp++);
 	vnode->cb_type		= ntohl(*bp++);
-	vnode->cb_expires	= vnode->cb_expiry + get_seconds();
+	vnode->cb_expires	= vnode->cb_expiry + ktime_get_real_seconds();
 	*_bp = bp;
 }
 
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index e083e086b7ca8b..4079c832ff27e7 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -246,12 +246,13 @@ struct inode *afs_iget(struct super_block *sb, struct key *key,
 			vnode->cb_version = 0;
 			vnode->cb_expiry = 0;
 			vnode->cb_type = 0;
-			vnode->cb_expires = get_seconds();
+			vnode->cb_expires = ktime_get_real_seconds();
 		} else {
 			vnode->cb_version = cb->version;
 			vnode->cb_expiry = cb->expiry;
 			vnode->cb_type = cb->type;
-			vnode->cb_expires = vnode->cb_expiry + get_seconds();
+			vnode->cb_expires = vnode->cb_expiry +
+				ktime_get_real_seconds();
 		}
 	}
 
@@ -324,7 +325,7 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
 	    !test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags) &&
 	    !test_bit(AFS_VNODE_MODIFIED, &vnode->flags) &&
 	    !test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) {
-		if (vnode->cb_expires < get_seconds() + 10) {
+		if (vnode->cb_expires < ktime_get_real_seconds() + 10) {
 			_debug("callback expired");
 			set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
 		} else {
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 97a16ce200be8b..832555003d0396 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -377,8 +377,8 @@ struct afs_vnode {
 	struct rb_node		server_rb;	/* link in server->fs_vnodes */
 	struct rb_node		cb_promise;	/* link in server->cb_promises */
 	struct work_struct	cb_broken_work;	/* work to be done on callback break */
-	time_t			cb_expires;	/* time at which callback expires */
-	time_t			cb_expires_at;	/* time used to order cb_promise */
+	time64_t		cb_expires;	/* time at which callback expires */
+	time64_t		cb_expires_at;	/* time used to order cb_promise */
 	unsigned		cb_version;	/* callback version */
 	unsigned		cb_expiry;	/* callback expiry time */
 	afs_callback_type_t	cb_type;	/* type of callback */

From 29f069853287dcb46eaf45a50dbf1232c1444ac6 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 16 Mar 2017 16:27:46 +0000
Subject: [PATCH 1381/1911] afs: Fix AFS read bug

Fix a bug in AFS read whereby the request page afs_read::index isn't
incremented after calling ->page_done() if ->remain reaches 0, indicating
that the data read is complete.

Without this a NULL pointer exception happens when ->page_done() is called
twice for the last page because the page clearing loop will call it also
and afs_readpages_page_done() clears the current entry in the page list.

BUG: unable to handle kernel NULL pointer dereference at           (null)
IP: afs_readpages_page_done+0x21/0xa4 [kafs]
PGD 0
Oops: 0002 [#1] SMP
Modules linked in: kafs(E)
CPU: 2 PID: 3002 Comm: md5sum Tainted: G            E   4.10.0-fscache #485
Hardware name: ASUS All Series/H97-PLUS, BIOS 2306 10/09/2014
task: ffff8804017d86c0 task.stack: ffff8803fc1d8000
RIP: 0010:afs_readpages_page_done+0x21/0xa4 [kafs]
RSP: 0018:ffff8803fc1db978 EFLAGS: 00010282
RAX: ffff880405d39af8 RBX: 0000000000000000 RCX: ffff880407d83ed4
RDX: 0000000000000000 RSI: ffff880405d39a00 RDI: ffff880405c6f400
RBP: ffff8803fc1db988 R08: 0000000000000000 R09: 0000000000000001
R10: ffff8803fc1db820 R11: ffff88040cf56000 R12: ffff8804088f1780
R13: ffff8804017d86c0 R14: ffff8804088f1780 R15: 0000000000003840
FS:  00007f8154469700(0000) GS:ffff88041fb00000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000000000000 CR3: 00000004016ec000 CR4: 00000000001406e0
Call Trace:
 afs_deliver_fs_fetch_data+0x5b9/0x60e [kafs]
 ? afs_make_call+0x316/0x4e8 [kafs]
 ? afs_make_call+0x359/0x4e8 [kafs]
 afs_deliver_to_call+0x173/0x2e8 [kafs]
 ? afs_make_call+0x316/0x4e8 [kafs]
 afs_make_call+0x37a/0x4e8 [kafs]
 ? wake_up_q+0x4f/0x4f
 ? __init_waitqueue_head+0x36/0x49
 afs_fs_fetch_data+0x21c/0x227 [kafs]
 ? afs_fs_fetch_data+0x21c/0x227 [kafs]
 afs_vnode_fetch_data+0xf3/0x1d2 [kafs]
 afs_readpages+0x314/0x3fd [kafs]
 __do_page_cache_readahead+0x208/0x2c5
 ondemand_readahead+0x3a2/0x3b7
 ? ondemand_readahead+0x3a2/0x3b7
 page_cache_async_readahead+0x5e/0x67
 generic_file_read_iter+0x23b/0x70c
 ? __inode_security_revalidate+0x2f/0x62
 __vfs_read+0xc4/0xe8
 vfs_read+0xd1/0x15a
 SyS_read+0x4c/0x89
 do_syscall_64+0x80/0x191
 entry_SYSCALL64_slow_path+0x25/0x25

Reported-by: Marc Dionne <marc.dionne@auristor.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Marc Dionne <marc.dionne@auristor.com>
---
 fs/afs/fsclient.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index c05452a0939870..4314f9e63a2c68 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -390,9 +390,9 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
 		if (call->offset == PAGE_SIZE) {
 			if (req->page_done)
 				req->page_done(call, req);
+			req->index++;
 			if (req->remain > 0) {
 				call->offset = 0;
-				req->index++;
 				if (req->index >= req->nr_pages) {
 					call->unmarshall = 4;
 					goto begin_discard;

From 6a0e3999e5cb3daa0468073fcdee0767422a4056 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 16 Mar 2017 16:27:46 +0000
Subject: [PATCH 1382/1911] afs: Make struct afs_read::remain 64-bit

Make struct afs_read::remain 64-bit so that it can handle huge transfers if
we ever request them or the server decides to give us a bit extra data (the
other fields there are already 64-bit).

Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Marc Dionne <marc.dionne@auristor.com>
---
 fs/afs/fsclient.c | 8 ++++----
 fs/afs/internal.h | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 4314f9e63a2c68..0778c5b6b59b4b 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -321,7 +321,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
 	void *buffer;
 	int ret;
 
-	_enter("{%u,%zu/%u;%u/%llu}",
+	_enter("{%u,%zu/%u;%llu/%llu}",
 	       call->unmarshall, call->offset, call->count,
 	       req->remain, req->actual_len);
 
@@ -379,7 +379,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
 
 		/* extract the returned data */
 	case 3:
-		_debug("extract data %u/%llu %zu/%u",
+		_debug("extract data %llu/%llu %zu/%u",
 		       req->remain, req->actual_len, call->offset, call->count);
 
 		buffer = kmap(req->pages[req->index]);
@@ -405,9 +405,9 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
 		/* Discard any excess data the server gave us */
 	begin_discard:
 	case 4:
-		size = min_t(size_t, sizeof(afs_discard_buffer), req->remain);
+		size = min_t(loff_t, sizeof(afs_discard_buffer), req->remain);
 		call->count = size;
-		_debug("extract discard %u/%llu %zu/%u",
+		_debug("extract discard %llu/%llu %zu/%u",
 		       req->remain, req->actual_len, call->offset, call->count);
 
 		call->offset = 0;
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 832555003d0396..a6901360fb81d4 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -133,8 +133,8 @@ struct afs_read {
 	loff_t			pos;		/* Where to start reading */
 	loff_t			len;		/* How much we're asking for */
 	loff_t			actual_len;	/* How much we're actually getting */
+	loff_t			remain;		/* Amount remaining */
 	atomic_t		usage;
-	unsigned int		remain;		/* Amount remaining */
 	unsigned int		index;		/* Which page we're reading into */
 	unsigned int		nr_pages;
 	void (*page_done)(struct afs_call *, struct afs_read *);

From 2f5705a5c805e7f761f2228820656bb9363a3d8c Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 16 Mar 2017 16:27:46 +0000
Subject: [PATCH 1383/1911] afs: Use a bvec rather than a kvec in
 afs_send_pages()

Use a bvec rather than a kvec in afs_send_pages() as we don't then have to
call kmap() in advance.  This allows us to pass the array of contiguous
pages that we extracted through to rxrpc in one go rather than passing a
single page at a time.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/rxrpc.c | 97 +++++++++++++++++++++++++++-----------------------
 1 file changed, 52 insertions(+), 45 deletions(-)

diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 419ef05dcb5ec7..bf45307ff201d0 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -259,67 +259,74 @@ void afs_flat_call_destructor(struct afs_call *call)
 	call->buffer = NULL;
 }
 
+#define AFS_BVEC_MAX 8
+
+/*
+ * Load the given bvec with the next few pages.
+ */
+static void afs_load_bvec(struct afs_call *call, struct msghdr *msg,
+			  struct bio_vec *bv, pgoff_t first, pgoff_t last,
+			  unsigned offset)
+{
+	struct page *pages[AFS_BVEC_MAX];
+	unsigned int nr, n, i, to, bytes = 0;
+
+	nr = min_t(pgoff_t, last - first + 1, AFS_BVEC_MAX);
+	n = find_get_pages_contig(call->mapping, first, nr, pages);
+	ASSERTCMP(n, ==, nr);
+
+	msg->msg_flags |= MSG_MORE;
+	for (i = 0; i < nr; i++) {
+		to = PAGE_SIZE;
+		if (first + i >= last) {
+			to = call->last_to;
+			msg->msg_flags &= ~MSG_MORE;
+		}
+		bv[i].bv_page = pages[i];
+		bv[i].bv_len = to - offset;
+		bv[i].bv_offset = offset;
+		bytes += to - offset;
+		offset = 0;
+	}
+
+	iov_iter_bvec(&msg->msg_iter, WRITE | ITER_BVEC, bv, nr, bytes);
+}
+
 /*
  * attach the data from a bunch of pages on an inode to a call
  */
 static int afs_send_pages(struct afs_call *call, struct msghdr *msg)
 {
-	struct page *pages[8];
-	unsigned count, n, loop, offset, to;
+	struct bio_vec bv[AFS_BVEC_MAX];
+	unsigned int bytes, nr, loop, offset;
 	pgoff_t first = call->first, last = call->last;
 	int ret;
 
-	_enter("");
-
 	offset = call->first_offset;
 	call->first_offset = 0;
 
 	do {
-		_debug("attach %lx-%lx", first, last);
-
-		count = last - first + 1;
-		if (count > ARRAY_SIZE(pages))
-			count = ARRAY_SIZE(pages);
-		n = find_get_pages_contig(call->mapping, first, count, pages);
-		ASSERTCMP(n, ==, count);
-
-		loop = 0;
-		do {
-			struct bio_vec bvec = {.bv_page = pages[loop],
-					       .bv_offset = offset};
-			msg->msg_flags = 0;
-			to = PAGE_SIZE;
-			if (first + loop >= last)
-				to = call->last_to;
-			else
-				msg->msg_flags = MSG_MORE;
-			bvec.bv_len = to - offset;
-			offset = 0;
-
-			_debug("- range %u-%u%s",
-			       offset, to, msg->msg_flags ? " [more]" : "");
-			iov_iter_bvec(&msg->msg_iter, WRITE | ITER_BVEC,
-				      &bvec, 1, to - offset);
-
-			/* have to change the state *before* sending the last
-			 * packet as RxRPC might give us the reply before it
-			 * returns from sending the request */
-			if (first + loop >= last)
-				call->state = AFS_CALL_AWAIT_REPLY;
-			ret = rxrpc_kernel_send_data(afs_socket, call->rxcall,
-						     msg, to - offset);
-			if (ret < 0)
-				break;
-		} while (++loop < count);
-		first += count;
-
-		for (loop = 0; loop < count; loop++)
-			put_page(pages[loop]);
+		afs_load_bvec(call, msg, bv, first, last, offset);
+		offset = 0;
+		bytes = msg->msg_iter.count;
+		nr = msg->msg_iter.nr_segs;
+
+		/* Have to change the state *before* sending the last
+		 * packet as RxRPC might give us the reply before it
+		 * returns from sending the request.
+		 */
+		if (first + nr >= last)
+			call->state = AFS_CALL_AWAIT_REPLY;
+		ret = rxrpc_kernel_send_data(afs_socket, call->rxcall,
+					     msg, bytes);
+		for (loop = 0; loop < nr; loop++)
+			put_page(bv[loop].bv_page);
 		if (ret < 0)
 			break;
+
+		first += nr;
 	} while (first <= last);
 
-	_leave(" = %d", ret);
 	return ret;
 }
 

From 146a1192783697810b63a1e41c4d59fc93387340 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 16 Mar 2017 16:27:47 +0000
Subject: [PATCH 1384/1911] afs: Fix the maths in afs_fs_store_data()

afs_fs_store_data() works out of the size of the write it's going to make,
but it uses 32-bit unsigned subtraction in one place that gets
automatically cast to loff_t.

However, if to < offset, then the number goes negative, but as the result
isn't signed, this doesn't get sign-extended to 64-bits when placed in a
loff_t.

Fix by casting the operands to loff_t.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/fsclient.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 0778c5b6b59b4b..d9234b767287ac 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -1236,7 +1236,7 @@ int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb,
 	_enter(",%x,{%x:%u},,",
 	       key_serial(wb->key), vnode->fid.vid, vnode->fid.vnode);
 
-	size = to - offset;
+	size = (loff_t)to - (loff_t)offset;
 	if (first != last)
 		size += (loff_t)(last - first) << PAGE_SHIFT;
 	pos = (loff_t)first << PAGE_SHIFT;

From 1157f153f37a8586765034470e4f00a4a6c4ce6f Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 16 Mar 2017 16:27:47 +0000
Subject: [PATCH 1385/1911] afs: Invalid op ID should abort with RXGEN_OPCODE

When we are given an invalid operation ID, we should abort that with
RXGEN_OPCODE rather than RX_INVALID_OPERATION.

Also map RXGEN_OPCODE to -ENOTSUPP.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/misc.c  | 2 ++
 fs/afs/rxrpc.c | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/afs/misc.c b/fs/afs/misc.c
index 91ea1aa0d8b3ab..100b207efc9ead 100644
--- a/fs/afs/misc.c
+++ b/fs/afs/misc.c
@@ -84,6 +84,8 @@ int afs_abort_to_error(u32 abort_code)
 	case RXKADDATALEN:	return -EKEYREJECTED;
 	case RXKADILLEGALLEVEL:	return -EKEYREJECTED;
 
+	case RXGEN_OPCODE:	return -ENOTSUPP;
+
 	default:		return -EREMOTEIO;
 	}
 }
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index bf45307ff201d0..bf7761fe6ef5c7 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -465,7 +465,7 @@ static void afs_deliver_to_call(struct afs_call *call)
 						abort_code, -ret, "KNC");
 			goto do_abort;
 		case -ENOTSUPP:
-			abort_code = RX_INVALID_OPERATION;
+			abort_code = RXGEN_OPCODE;
 			rxrpc_kernel_abort_call(afs_socket, call->rxcall,
 						abort_code, -ret, "KIV");
 			goto do_abort;

From 70af0e3bd65142f9e674961c975451638a7ce1d5 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 16 Mar 2017 16:27:47 +0000
Subject: [PATCH 1386/1911] afs: Better abort and net error handling

If we receive a network error, a remote abort or a protocol error whilst
we're still transmitting data, make sure we return an appropriate error to
the caller rather than ESHUTDOWN or ECONNABORTED.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/rxrpc.c | 35 +++++++++++++++++++++++++++--------
 1 file changed, 27 insertions(+), 8 deletions(-)

diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index bf7761fe6ef5c7..22d26b3690709b 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -340,6 +340,8 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
 	struct rxrpc_call *rxcall;
 	struct msghdr msg;
 	struct kvec iov[1];
+	size_t offset;
+	u32 abort_code;
 	int ret;
 
 	_enter("%x,{%d},", addr->s_addr, ntohs(call->port));
@@ -388,9 +390,11 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
 	msg.msg_controllen	= 0;
 	msg.msg_flags		= (call->send_pages ? MSG_MORE : 0);
 
-	/* have to change the state *before* sending the last packet as RxRPC
-	 * might give us the reply before it returns from sending the
-	 * request */
+	/* We have to change the state *before* sending the last packet as
+	 * rxrpc might give us the reply before it returns from sending the
+	 * request.  Further, if the send fails, we may already have been given
+	 * a notification and may have collected it.
+	 */
 	if (!call->send_pages)
 		call->state = AFS_CALL_AWAIT_REPLY;
 	ret = rxrpc_kernel_send_data(afs_socket, rxcall,
@@ -412,7 +416,17 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
 	return afs_wait_for_call_to_complete(call);
 
 error_do_abort:
-	rxrpc_kernel_abort_call(afs_socket, rxcall, RX_USER_ABORT, -ret, "KSD");
+	call->state = AFS_CALL_COMPLETE;
+	if (ret != -ECONNABORTED) {
+		rxrpc_kernel_abort_call(afs_socket, rxcall, RX_USER_ABORT,
+					-ret, "KSD");
+	} else {
+		abort_code = 0;
+		offset = 0;
+		rxrpc_kernel_recv_data(afs_socket, rxcall, NULL, 0, &offset,
+				       false, &abort_code);
+		ret = call->type->abort_to_error(abort_code);
+	}
 error_kill_call:
 	afs_put_call(call);
 	_leave(" = %d", ret);
@@ -459,16 +473,18 @@ static void afs_deliver_to_call(struct afs_call *call)
 		case -EINPROGRESS:
 		case -EAGAIN:
 			goto out;
+		case -ECONNABORTED:
+			goto call_complete;
 		case -ENOTCONN:
 			abort_code = RX_CALL_DEAD;
 			rxrpc_kernel_abort_call(afs_socket, call->rxcall,
 						abort_code, -ret, "KNC");
-			goto do_abort;
+			goto save_error;
 		case -ENOTSUPP:
 			abort_code = RXGEN_OPCODE;
 			rxrpc_kernel_abort_call(afs_socket, call->rxcall,
 						abort_code, -ret, "KIV");
-			goto do_abort;
+			goto save_error;
 		case -ENODATA:
 		case -EBADMSG:
 		case -EMSGSIZE:
@@ -478,7 +494,7 @@ static void afs_deliver_to_call(struct afs_call *call)
 				abort_code = RXGEN_SS_UNMARSHAL;
 			rxrpc_kernel_abort_call(afs_socket, call->rxcall,
 						abort_code, EBADMSG, "KUM");
-			goto do_abort;
+			goto save_error;
 		}
 	}
 
@@ -489,8 +505,9 @@ static void afs_deliver_to_call(struct afs_call *call)
 	_leave("");
 	return;
 
-do_abort:
+save_error:
 	call->error = ret;
+call_complete:
 	call->state = AFS_CALL_COMPLETE;
 	goto done;
 }
@@ -538,6 +555,8 @@ static int afs_wait_for_call_to_complete(struct afs_call *call)
 		_debug("call incomplete");
 		rxrpc_kernel_abort_call(afs_socket, call->rxcall,
 					RX_CALL_DEAD, -ret, abort_why);
+	} else if (call->error < 0) {
+		ret = call->error;
 	}
 
 	_debug("call complete");

From ab94f5d0dd6fd82e7eeca5e7c8096eaea0a0261f Mon Sep 17 00:00:00 2001
From: Marc Dionne <marc.dionne@auristor.com>
Date: Thu, 16 Mar 2017 16:27:47 +0000
Subject: [PATCH 1387/1911] afs: Populate and use client modification time

The inode timestamps should be set from the client time
in the status received from the server, rather than the
server time which is meant for internal server use.

Set AFS_SET_MTIME and populate the mtime for operations
that take an input status, such as file/dir creation
and StoreData.  If an input time is not provided the
server will set the vnode times based on the current server
time.

In a situation where the server has some skew with the
client, this could lead to the client seeing a timestamp
in the future for a file that it just created or wrote.

Signed-off-by: Marc Dionne <marc.dionne@auristor.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/fsclient.c | 18 +++++++++---------
 fs/afs/inode.c    |  2 +-
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index d9234b767287ac..19f76ae36982df 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -111,7 +111,7 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
 			vnode->vfs_inode.i_mode = mode;
 		}
 
-		vnode->vfs_inode.i_ctime.tv_sec	= status->mtime_server;
+		vnode->vfs_inode.i_ctime.tv_sec	= status->mtime_client;
 		vnode->vfs_inode.i_mtime	= vnode->vfs_inode.i_ctime;
 		vnode->vfs_inode.i_atime	= vnode->vfs_inode.i_ctime;
 		vnode->vfs_inode.i_version	= data_version;
@@ -734,8 +734,8 @@ int afs_fs_create(struct afs_server *server,
 		memset(bp, 0, padsz);
 		bp = (void *) bp + padsz;
 	}
-	*bp++ = htonl(AFS_SET_MODE);
-	*bp++ = 0; /* mtime */
+	*bp++ = htonl(AFS_SET_MODE | AFS_SET_MTIME);
+	*bp++ = htonl(vnode->vfs_inode.i_mtime.tv_sec); /* mtime */
 	*bp++ = 0; /* owner */
 	*bp++ = 0; /* group */
 	*bp++ = htonl(mode & S_IALLUGO); /* unix mode */
@@ -1003,8 +1003,8 @@ int afs_fs_symlink(struct afs_server *server,
 		memset(bp, 0, c_padsz);
 		bp = (void *) bp + c_padsz;
 	}
-	*bp++ = htonl(AFS_SET_MODE);
-	*bp++ = 0; /* mtime */
+	*bp++ = htonl(AFS_SET_MODE | AFS_SET_MTIME);
+	*bp++ = htonl(vnode->vfs_inode.i_mtime.tv_sec); /* mtime */
 	*bp++ = 0; /* owner */
 	*bp++ = 0; /* group */
 	*bp++ = htonl(S_IRWXUGO); /* unix mode */
@@ -1203,8 +1203,8 @@ static int afs_fs_store_data64(struct afs_server *server,
 	*bp++ = htonl(vnode->fid.vnode);
 	*bp++ = htonl(vnode->fid.unique);
 
-	*bp++ = 0; /* mask */
-	*bp++ = 0; /* mtime */
+	*bp++ = htonl(AFS_SET_MTIME); /* mask */
+	*bp++ = htonl(vnode->vfs_inode.i_mtime.tv_sec); /* mtime */
 	*bp++ = 0; /* owner */
 	*bp++ = 0; /* group */
 	*bp++ = 0; /* unix mode */
@@ -1280,8 +1280,8 @@ int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb,
 	*bp++ = htonl(vnode->fid.vnode);
 	*bp++ = htonl(vnode->fid.unique);
 
-	*bp++ = 0; /* mask */
-	*bp++ = 0; /* mtime */
+	*bp++ = htonl(AFS_SET_MTIME); /* mask */
+	*bp++ = htonl(vnode->vfs_inode.i_mtime.tv_sec); /* mtime */
 	*bp++ = 0; /* owner */
 	*bp++ = 0; /* group */
 	*bp++ = 0; /* unix mode */
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 4079c832ff27e7..aae55dd151087e 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -85,7 +85,7 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
 	inode->i_uid		= vnode->status.owner;
 	inode->i_gid            = vnode->status.group;
 	inode->i_size		= vnode->status.size;
-	inode->i_ctime.tv_sec	= vnode->status.mtime_server;
+	inode->i_ctime.tv_sec	= vnode->status.mtime_client;
 	inode->i_ctime.tv_nsec	= 0;
 	inode->i_atime		= inode->i_mtime = inode->i_ctime;
 	inode->i_blocks		= 0;

From 68ae849d7e674b83610bc7fdf74b21621a09b9ac Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 16 Mar 2017 16:27:48 +0000
Subject: [PATCH 1388/1911] afs: Don't set PG_error on local EINTR or ENOMEM
 when filling a page

Don't set PG_error on a page if we get local EINTR or ENOMEM when filling a
page for writing.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/file.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/fs/afs/file.c b/fs/afs/file.c
index b5829443ff6933..0d5b8508869bf0 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -212,7 +212,13 @@ int afs_page_filler(void *data, struct page *page)
 			fscache_uncache_page(vnode->cache, page);
 #endif
 			BUG_ON(PageFsCache(page));
-			goto error;
+
+			if (ret == -EINTR ||
+			    ret == -ENOMEM ||
+			    ret == -ERESTARTSYS ||
+			    ret == -EAGAIN)
+				goto error;
+			goto io_error;
 		}
 
 		SetPageUptodate(page);
@@ -231,10 +237,12 @@ int afs_page_filler(void *data, struct page *page)
 	_leave(" = 0");
 	return 0;
 
+io_error:
+	SetPageError(page);
+	goto error;
 enomem:
 	ret = -ENOMEM;
 error:
-	SetPageError(page);
 	unlock_page(page);
 	_leave(" = %d", ret);
 	return ret;

From 6d06b0d25209c80e99c1e89700f1e09694a3766b Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 16 Mar 2017 16:27:48 +0000
Subject: [PATCH 1389/1911] afs: Fix page leak in afs_write_begin()

afs_write_begin() leaks a ref and a lock on a page if afs_fill_page()
fails.  Fix the leak by unlocking and releasing the page in the error path.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/write.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/fs/afs/write.c b/fs/afs/write.c
index f1450ea0940685..6e13e96c3db0d3 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -154,12 +154,12 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
 		kfree(candidate);
 		return -ENOMEM;
 	}
-	*pagep = page;
-	/* page won't leak in error case: it eventually gets cleaned off LRU */
 
 	if (!PageUptodate(page) && len != PAGE_SIZE) {
 		ret = afs_fill_page(vnode, key, pos & PAGE_MASK, PAGE_SIZE, page);
 		if (ret < 0) {
+			unlock_page(page);
+			put_page(page);
 			kfree(candidate);
 			_leave(" = %d [prep]", ret);
 			return ret;
@@ -167,6 +167,9 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
 		SetPageUptodate(page);
 	}
 
+	/* page won't leak in error case: it eventually gets cleaned off LRU */
+	*pagep = page;
+
 try_again:
 	spin_lock(&vnode->writeback_lock);
 

From 7286a35e893176169b09715096a4aca557e2ccd2 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 16 Mar 2017 16:27:48 +0000
Subject: [PATCH 1390/1911] afs: Fix afs_kill_pages()

Fix afs_kill_pages() in two ways:

 (1) If a writeback has been partially flushed, then if we try and kill the
     pages it contains, some of them may no longer be undergoing writeback
     and end_page_writeback() will assert.

     Fix this by checking to see whether the page in question is actually
     undergoing writeback before ending that writeback.

 (2) The loop that scans for pages to kill doesn't increase the first page
     index, and so the loop may not terminate, but it will try to process
     the same pages over and over again.

     Fix this by increasing the first page index to one after the last page
     we processed.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/write.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/fs/afs/write.c b/fs/afs/write.c
index 6e13e96c3db0d3..134de066789862 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -321,10 +321,14 @@ static void afs_kill_pages(struct afs_vnode *vnode, bool error,
 		ASSERTCMP(pv.nr, ==, count);
 
 		for (loop = 0; loop < count; loop++) {
-			ClearPageUptodate(pv.pages[loop]);
+			struct page *page = pv.pages[loop];
+			ClearPageUptodate(page);
 			if (error)
-				SetPageError(pv.pages[loop]);
-			end_page_writeback(pv.pages[loop]);
+				SetPageError(page);
+			if (PageWriteback(page))
+				end_page_writeback(page);
+			if (page->index >= first)
+				first = page->index + 1;
 		}
 
 		__pagevec_release(&pv);

From 445783d0ec173a52bef2e9b129de7d716a19b9fa Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 16 Mar 2017 16:27:48 +0000
Subject: [PATCH 1391/1911] afs: Fix an off-by-one error in afs_send_pages()

afs_send_pages() should only put the call into the AFS_CALL_AWAIT_REPLY
state if it has sent all the pages - but the check it makes is incorrect
and sometimes it will finish the loop early.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/rxrpc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 22d26b3690709b..b12da6aa54129b 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -315,7 +315,7 @@ static int afs_send_pages(struct afs_call *call, struct msghdr *msg)
 		 * packet as RxRPC might give us the reply before it
 		 * returns from sending the request.
 		 */
-		if (first + nr >= last)
+		if (first + nr - 1 >= last)
 			call->state = AFS_CALL_AWAIT_REPLY;
 		ret = rxrpc_kernel_send_data(afs_socket, call->rxcall,
 					     msg, bytes);

From 954cd6dc02a65065aecb7150962c0870c5b0e322 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 16 Mar 2017 16:27:49 +0000
Subject: [PATCH 1392/1911] afs: Fix abort on signal while waiting for call
 completion

Fix the way in which a call that's in progress and being waited for is
aborted in the case that EINTR is detected.  We should be sending
RX_USER_ABORT rather than RX_CALL_DEAD as the abort code.

Note that since the only two ways out of the loop are if the call completes
or if a signal happens, the kill-the-call clause after the loop has
finished can only happen in the case of EINTR.  This means that we only
have one abort case to deal with, not two, and the "KWC" case can never
happen and so can be deleted.

Note further that simply aborting the call isn't necessarily the best thing
here since at this point: the request has been entirely sent and it's
likely the server will do the operation anyway - whether we abort it or
not.  In future, we should punt the handling of the remainder of the call
off to a background thread.

Reported-by: Marc Dionne <marc.c.dionne@auristor.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/rxrpc.c | 19 ++++++-------------
 1 file changed, 6 insertions(+), 13 deletions(-)

diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index b12da6aa54129b..8f76b13d55494b 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -517,7 +517,6 @@ static void afs_deliver_to_call(struct afs_call *call)
  */
 static int afs_wait_for_call_to_complete(struct afs_call *call)
 {
-	const char *abort_why;
 	int ret;
 
 	DECLARE_WAITQUEUE(myself, current);
@@ -536,13 +535,8 @@ static int afs_wait_for_call_to_complete(struct afs_call *call)
 			continue;
 		}
 
-		abort_why = "KWC";
-		ret = call->error;
-		if (call->state == AFS_CALL_COMPLETE)
-			break;
-		abort_why = "KWI";
-		ret = -EINTR;
-		if (signal_pending(current))
+		if (call->state == AFS_CALL_COMPLETE ||
+		    signal_pending(current))
 			break;
 		schedule();
 	}
@@ -550,15 +544,14 @@ static int afs_wait_for_call_to_complete(struct afs_call *call)
 	remove_wait_queue(&call->waitq, &myself);
 	__set_current_state(TASK_RUNNING);
 
-	/* kill the call */
+	/* Kill off the call if it's still live. */
 	if (call->state < AFS_CALL_COMPLETE) {
-		_debug("call incomplete");
+		_debug("call interrupted");
 		rxrpc_kernel_abort_call(afs_socket, call->rxcall,
-					RX_CALL_DEAD, -ret, abort_why);
-	} else if (call->error < 0) {
-		ret = call->error;
+					RX_USER_ABORT, -EINTR, "KWI");
 	}
 
+	ret = call->error;
 	_debug("call complete");
 	afs_put_call(call);
 	_leave(" = %d", ret);

From 65a151094edeb04e8f5f6f1502028e2383e81bb8 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 16 Mar 2017 16:27:49 +0000
Subject: [PATCH 1393/1911] afs: ->writepage() shouldn't call
 clear_page_dirty_for_io()

The ->writepage() op shouldn't call clear_page_dirty_for_io() as that has
already been called by the caller.

Fix afs_writepage() by moving the call out of
afs_write_back_from_locked_page() to afs_writepages_region() where it is
needed.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/write.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/afs/write.c b/fs/afs/write.c
index 134de066789862..e5f150bccfb5d1 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -231,7 +231,7 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
 	if (wb->state == AFS_WBACK_PENDING)
 		wb->state = AFS_WBACK_CONFLICTING;
 	spin_unlock(&vnode->writeback_lock);
-	if (PageDirty(page)) {
+	if (clear_page_dirty_for_io(page)) {
 		ret = afs_write_back_from_locked_page(wb, page);
 		if (ret < 0) {
 			afs_put_writeback(candidate);
@@ -353,8 +353,6 @@ static int afs_write_back_from_locked_page(struct afs_writeback *wb,
 	_enter(",%lx", primary_page->index);
 
 	count = 1;
-	if (!clear_page_dirty_for_io(primary_page))
-		BUG();
 	if (test_set_page_writeback(primary_page))
 		BUG();
 
@@ -542,6 +540,8 @@ static int afs_writepages_region(struct address_space *mapping,
 		wb->state = AFS_WBACK_WRITING;
 		spin_unlock(&wb->vnode->writeback_lock);
 
+		if (!clear_page_dirty_for_io(page))
+			BUG();
 		ret = afs_write_back_from_locked_page(wb, page);
 		unlock_page(page);
 		put_page(page);

From c5051c7bc777dffa5661569dec5997f432b9a34a Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 16 Mar 2017 16:27:49 +0000
Subject: [PATCH 1394/1911] afs: Don't wait for page writeback with the page
 lock held

Drop the page lock before waiting for page writeback.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/write.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/fs/afs/write.c b/fs/afs/write.c
index e5f150bccfb5d1..2d2fccd5044bcd 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -518,17 +518,16 @@ static int afs_writepages_region(struct address_space *mapping,
 		 */
 		lock_page(page);
 
-		if (page->mapping != mapping) {
+		if (page->mapping != mapping || !PageDirty(page)) {
 			unlock_page(page);
 			put_page(page);
 			continue;
 		}
 
-		if (wbc->sync_mode != WB_SYNC_NONE)
-			wait_on_page_writeback(page);
-
-		if (PageWriteback(page) || !PageDirty(page)) {
+		if (PageWriteback(page)) {
 			unlock_page(page);
+			if (wbc->sync_mode != WB_SYNC_NONE)
+				wait_on_page_writeback(page);
 			put_page(page);
 			continue;
 		}

From e4c5d3762e2d6d274bd1cc948c47063becfa2103 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Mon, 27 Feb 2017 18:44:45 +0200
Subject: [PATCH 1395/1911] nvme-loop: fix a possible use-after-free when
 destroying the admin queue

we need to destroy the nvmet sq and let it finish gracefully
before continue to cleanup the queue.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/target/loop.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index d1f06e7768ff1d..74e04a0855d452 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -288,9 +288,9 @@ static struct blk_mq_ops nvme_loop_admin_mq_ops = {
 
 static void nvme_loop_destroy_admin_queue(struct nvme_loop_ctrl *ctrl)
 {
+	nvmet_sq_destroy(&ctrl->queues[0].nvme_sq);
 	blk_cleanup_queue(ctrl->ctrl.admin_q);
 	blk_mq_free_tag_set(&ctrl->admin_tag_set);
-	nvmet_sq_destroy(&ctrl->queues[0].nvme_sq);
 }
 
 static void nvme_loop_free_ctrl(struct nvme_ctrl *nctrl)

From d11ea004a458b982e19b188c386e25a9b66ec446 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Mon, 6 Mar 2017 18:46:20 +0200
Subject: [PATCH 1396/1911] nvmet: confirm sq percpu has scheduled and switched
 to atomic

percpu_ref_kill is not enough to prevent subsequent
percpu_ref_tryget_live from failing. Hence call
perfcpu_ref_kill_confirm to make it safe.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/target/core.c  | 11 ++++++++++-
 drivers/nvme/target/nvmet.h |  1 +
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 11b0a0a5f661b5..798653b329b28c 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -425,6 +425,13 @@ void nvmet_sq_setup(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq,
 	ctrl->sqs[qid] = sq;
 }
 
+static void nvmet_confirm_sq(struct percpu_ref *ref)
+{
+	struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref);
+
+	complete(&sq->confirm_done);
+}
+
 void nvmet_sq_destroy(struct nvmet_sq *sq)
 {
 	/*
@@ -433,7 +440,8 @@ void nvmet_sq_destroy(struct nvmet_sq *sq)
 	 */
 	if (sq->ctrl && sq->ctrl->sqs && sq->ctrl->sqs[0] == sq)
 		nvmet_async_events_free(sq->ctrl);
-	percpu_ref_kill(&sq->ref);
+	percpu_ref_kill_and_confirm(&sq->ref, nvmet_confirm_sq);
+	wait_for_completion(&sq->confirm_done);
 	wait_for_completion(&sq->free_done);
 	percpu_ref_exit(&sq->ref);
 
@@ -461,6 +469,7 @@ int nvmet_sq_init(struct nvmet_sq *sq)
 		return ret;
 	}
 	init_completion(&sq->free_done);
+	init_completion(&sq->confirm_done);
 
 	return 0;
 }
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 1370eee0a3c0f6..f7ff15f17ca97d 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -73,6 +73,7 @@ struct nvmet_sq {
 	u16			qid;
 	u16			size;
 	struct completion	free_done;
+	struct completion	confirm_done;
 };
 
 /**

From b25634e2a051bef4b2524b11adddfbfa6448f6cd Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Thu, 9 Mar 2017 13:45:52 +0200
Subject: [PATCH 1397/1911] nvmet-rdma: Fix a possible uninitialized variable
 dereference

When handling a new recv command, we grab a new rsp resource and
check for the queue state being live. In case the queue is not in
live state, we simply restore the rsp back to the free list. However
in this flow we didn't set rsp->queue yet, so we cannot dereference it.

Instead, make sure to initialize rsp->queue (and other rsp members)
as soon as possible so we won't reference uninitialized variables.

Reported-by: Yi Zhang <yizhan@redhat.com>
Reported-by: Raju Rangoju <rajur@chelsio.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Tested-by: Raju Rangoju <rajur@chelsio.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/target/rdma.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 9aa1da3778b3ac..ecc4fe86256124 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -703,11 +703,6 @@ static void nvmet_rdma_handle_command(struct nvmet_rdma_queue *queue,
 {
 	u16 status;
 
-	cmd->queue = queue;
-	cmd->n_rdma = 0;
-	cmd->req.port = queue->port;
-
-
 	ib_dma_sync_single_for_cpu(queue->dev->device,
 		cmd->cmd->sge[0].addr, cmd->cmd->sge[0].length,
 		DMA_FROM_DEVICE);
@@ -760,9 +755,12 @@ static void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc)
 
 	cmd->queue = queue;
 	rsp = nvmet_rdma_get_rsp(queue);
+	rsp->queue = queue;
 	rsp->cmd = cmd;
 	rsp->flags = 0;
 	rsp->req.cmd = cmd->nvme_cmd;
+	rsp->req.port = queue->port;
+	rsp->n_rdma = 0;
 
 	if (unlikely(queue->state != NVMET_RDMA_Q_LIVE)) {
 		unsigned long flags;

From b334e19ae9381f12a7521976883022385d2b7eef Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 13 Jan 2017 16:40:01 +0100
Subject: [PATCH 1398/1911] Kbuild: use cc-disable-warning consistently for
 maybe-uninitialized

In commit a76bcf557ef4 ("Kbuild: enable -Wmaybe-uninitialized warning
for "make W=1""), I reverted another change that happened to fix a problem
with old compilers, and now we get this report again with old compilers
(prior to gcc-4.8) and GCOV enabled:

   cc1: warnings being treated as errors
   drivers/gpu/drm/i915/intel_ringbuffer.c: In function 'intel_ring_setup_status_page':
   drivers/gpu/drm/i915/intel_ringbuffer.c:438: error: 'mmio.reg' may be used uninitialized in this function
   At top level:
>> cc1: error: unrecognized command line option "-Wno-maybe-uninitialized"

The problem is that we turn off the warning conditionally in a number
of places as we should, but one of them does it unconditionally.
Instead, change it to call cc-disable-warning as we do elsewhere.

The original patch that caused it was merged into linux-4.7, then
4.8 removed the change and 4.9 brought it back, so we probably want
a backport to 4.9 once this is merged.

Use a ':=' assignment instead of '=' to force the cc-disable-warning
call to only be evaluated once instead of every time.

Cc: stable@vger.kernel.org
Fixes: a76bcf557ef4 ("Kbuild: enable -Wmaybe-uninitialized warning for "make W=1"")
Fixes: e72e2dfe7c16 ("gcov: disable -Wmaybe-uninitialized warning")
Reported-by: kbuild test robot <fengguang.wu@intel.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 165cf9783a5dbb..b8a6b862ed7376 100644
--- a/Makefile
+++ b/Makefile
@@ -372,7 +372,7 @@ LDFLAGS_MODULE  =
 CFLAGS_KERNEL	=
 AFLAGS_KERNEL	=
 LDFLAGS_vmlinux =
-CFLAGS_GCOV	= -fprofile-arcs -ftest-coverage -fno-tree-loop-im -Wno-maybe-uninitialized
+CFLAGS_GCOV	:= -fprofile-arcs -ftest-coverage -fno-tree-loop-im $(call cc-disable-warning,maybe-uninitialized,)
 CFLAGS_KCOV	:= $(call cc-option,-fsanitize-coverage=trace-pc,)
 
 
From 8f3dbfd79ed9ef9770305a7cc4e13dfd31ad2cd0 Mon Sep 17 00:00:00 2001
From: Kris Murphy <kriskend@linux.vnet.ibm.com>
Date: Thu, 16 Mar 2017 10:51:28 -0500
Subject: [PATCH 1399/1911] openvswitch: Add missing case
 OVS_TUNNEL_KEY_ATTR_PAD

Added a case for OVS_TUNNEL_KEY_ATTR_PAD to the switch statement
in ip_tun_from_nlattr in order to prevent the default case
returning an error.

Fixes: b46f6ded906e ("libnl: nla_put_be64(): align on a 64-bit area")
Signed-off-by: Kris Murphy <kriskend@linux.vnet.ibm.com>
Acked-by: Joe Stringer <joe@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/flow_netlink.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index a08ff834676ba9..1105a838bab83f 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -665,6 +665,8 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
 			tun_flags |= TUNNEL_VXLAN_OPT;
 			opts_type = type;
 			break;
+		case OVS_TUNNEL_KEY_ATTR_PAD:
+			break;
 		default:
 			OVS_NLERR(log, "Unknown IP tunnel attribute %d",
 				  type);

From 59cf8bed44a79ec42303151dd014fdb6434254bb Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Thu, 16 Mar 2017 11:34:02 -0700
Subject: [PATCH 1400/1911] Input: iforce - validate number of endpoints before
 using them

Make sure to check the number of endpoints to avoid dereferencing a
NULL-pointer or accessing memory that lie beyond the end of the endpoint
array should a malicious device lack the expected endpoints.

Signed-off-by: Johan Hovold <johan@kernel.org>
Cc: stable@vger.kernel.org
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/joystick/iforce/iforce-usb.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/input/joystick/iforce/iforce-usb.c b/drivers/input/joystick/iforce/iforce-usb.c
index d96aa27dfcdc97..db64adfbe1aff0 100644
--- a/drivers/input/joystick/iforce/iforce-usb.c
+++ b/drivers/input/joystick/iforce/iforce-usb.c
@@ -141,6 +141,9 @@ static int iforce_usb_probe(struct usb_interface *intf,
 
 	interface = intf->cur_altsetting;
 
+	if (interface->desc.bNumEndpoints < 2)
+		return -ENODEV;
+
 	epirq = &interface->endpoint[0].desc;
 	epout = &interface->endpoint[1].desc;
 

From ac2ee9ba953afe88f7a673e1c0c839227b1d7891 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Thu, 16 Mar 2017 11:35:12 -0700
Subject: [PATCH 1401/1911] Input: cm109 - validate number of endpoints before
 using them

Make sure to check the number of endpoints to avoid dereferencing a
NULL-pointer should a malicious device lack endpoints.

Fixes: c04148f915e5 ("Input: add driver for USB VoIP phones with CM109...")
Signed-off-by: Johan Hovold <johan@kernel.org>
Cc: stable@vger.kernel.org	# 2.6.28
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/misc/cm109.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/input/misc/cm109.c b/drivers/input/misc/cm109.c
index 9cc6d057c302a1..23c191a2a0715a 100644
--- a/drivers/input/misc/cm109.c
+++ b/drivers/input/misc/cm109.c
@@ -700,6 +700,10 @@ static int cm109_usb_probe(struct usb_interface *intf,
 	int error = -ENOMEM;
 
 	interface = intf->cur_altsetting;
+
+	if (interface->desc.bNumEndpoints < 1)
+		return -ENODEV;
+
 	endpoint = &interface->endpoint[0].desc;
 
 	if (!usb_endpoint_is_int_in(endpoint))

From 1916d319271664241b7aa0cd2b05e32bdb310ce9 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Thu, 16 Mar 2017 11:36:13 -0700
Subject: [PATCH 1402/1911] Input: ims-pcu - validate number of endpoints
 before using them

Make sure to check the number of endpoints to avoid dereferencing a
NULL-pointer should a malicious device lack control-interface endpoints.

Fixes: 628329d52474 ("Input: add IMS Passenger Control Unit driver")
Signed-off-by: Johan Hovold <johan@kernel.org>
Cc: stable@vger.kernel.org	# 3.10
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/misc/ims-pcu.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/input/misc/ims-pcu.c b/drivers/input/misc/ims-pcu.c
index 9c0ea36913b4a9..f4e8fbec6a942a 100644
--- a/drivers/input/misc/ims-pcu.c
+++ b/drivers/input/misc/ims-pcu.c
@@ -1667,6 +1667,10 @@ static int ims_pcu_parse_cdc_data(struct usb_interface *intf, struct ims_pcu *pc
 		return -EINVAL;
 
 	alt = pcu->ctrl_intf->cur_altsetting;
+
+	if (alt->desc.bNumEndpoints < 1)
+		return -ENODEV;
+
 	pcu->ep_ctrl = &alt->endpoint[0].desc;
 	pcu->max_ctrl_size = usb_endpoint_maxp(pcu->ep_ctrl);
 

From 5cc4a1a9f5c179795c8a1f2b0f4361829d6a070e Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Thu, 16 Mar 2017 11:37:01 -0700
Subject: [PATCH 1403/1911] Input: yealink - validate number of endpoints
 before using them

Make sure to check the number of endpoints to avoid dereferencing a
NULL-pointer should a malicious device lack endpoints.

Fixes: aca951a22a1d ("[PATCH] input-driver-yealink-P1K-usb-phone")
Signed-off-by: Johan Hovold <johan@kernel.org>
Cc: stable@vger.kernel.org	# 2.6.14
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/misc/yealink.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/input/misc/yealink.c b/drivers/input/misc/yealink.c
index 79c964c075f140..6e7ff9561d9261 100644
--- a/drivers/input/misc/yealink.c
+++ b/drivers/input/misc/yealink.c
@@ -875,6 +875,10 @@ static int usb_probe(struct usb_interface *intf, const struct usb_device_id *id)
 	int ret, pipe, i;
 
 	interface = intf->cur_altsetting;
+
+	if (interface->desc.bNumEndpoints < 1)
+		return -ENODEV;
+
 	endpoint = &interface->endpoint[0].desc;
 	if (!usb_endpoint_is_int_in(endpoint))
 		return -ENODEV;

From ba340d7b83703768ce566f53f857543359aa1b98 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Thu, 16 Mar 2017 11:39:29 -0700
Subject: [PATCH 1404/1911] Input: hanwang - validate number of endpoints
 before using them

Make sure to check the number of endpoints to avoid dereferencing a
NULL-pointer should a malicious device lack endpoints.

Fixes: bba5394ad3bd ("Input: add support for Hanwang tablets")
Signed-off-by: Johan Hovold <johan@kernel.org>
Cc: stable@vger.kernel.org	# 2.6.37
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/tablet/hanwang.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/input/tablet/hanwang.c b/drivers/input/tablet/hanwang.c
index cd852059b99e81..df4bea96d7ed7d 100644
--- a/drivers/input/tablet/hanwang.c
+++ b/drivers/input/tablet/hanwang.c
@@ -340,6 +340,9 @@ static int hanwang_probe(struct usb_interface *intf, const struct usb_device_id
 	int error;
 	int i;
 
+	if (intf->cur_altsetting->desc.bNumEndpoints < 1)
+		return -ENODEV;
+
 	hanwang = kzalloc(sizeof(struct hanwang), GFP_KERNEL);
 	input_dev = input_allocate_device();
 	if (!hanwang || !input_dev) {

From cb1b494663e037253337623bf1ef2df727883cb7 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Thu, 16 Mar 2017 11:41:55 -0700
Subject: [PATCH 1405/1911] Input: kbtab - validate number of endpoints before
 using them

Make sure to check the number of endpoints to avoid dereferencing a
NULL-pointer should a malicious device lack endpoints.

Signed-off-by: Johan Hovold <johan@kernel.org>
Cc: stable@vger.kernel.org
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/tablet/kbtab.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/input/tablet/kbtab.c b/drivers/input/tablet/kbtab.c
index e850d7e8afbc4d..4d9d64908b595f 100644
--- a/drivers/input/tablet/kbtab.c
+++ b/drivers/input/tablet/kbtab.c
@@ -122,6 +122,9 @@ static int kbtab_probe(struct usb_interface *intf, const struct usb_device_id *i
 	struct input_dev *input_dev;
 	int error = -ENOMEM;
 
+	if (intf->cur_altsetting->desc.bNumEndpoints < 1)
+		return -ENODEV;
+
 	kbtab = kzalloc(sizeof(struct kbtab), GFP_KERNEL);
 	input_dev = input_allocate_device();
 	if (!kbtab || !input_dev)

From 92461f5d723037530c1f36cce93640770037812c Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Thu, 16 Mar 2017 11:43:09 -0700
Subject: [PATCH 1406/1911] Input: sur40 - validate number of endpoints before
 using them

Make sure to check the number of endpoints to avoid dereferencing a
NULL-pointer or accessing memory that lie beyond the end of the endpoint
array should a malicious device lack the expected endpoints.

Fixes: bdb5c57f209c ("Input: add sur40 driver for Samsung SUR40... ")
Signed-off-by: Johan Hovold <johan@kernel.org>
Cc: stable@vger.kernel.org	# 3.13
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/touchscreen/sur40.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/input/touchscreen/sur40.c b/drivers/input/touchscreen/sur40.c
index aefb6e11f88a08..4c0eecae065c11 100644
--- a/drivers/input/touchscreen/sur40.c
+++ b/drivers/input/touchscreen/sur40.c
@@ -527,6 +527,9 @@ static int sur40_probe(struct usb_interface *interface,
 	if (iface_desc->desc.bInterfaceClass != 0xFF)
 		return -ENODEV;
 
+	if (iface_desc->desc.bNumEndpoints < 5)
+		return -ENODEV;
+
 	/* Use endpoint #4 (0x86). */
 	endpoint = &iface_desc->endpoint[4].desc;
 	if (endpoint->bEndpointAddress != TOUCH_ENDPOINT)

From acfa28b3649ec07775efaac0c00de2db39d71634 Mon Sep 17 00:00:00 2001
From: Jon Mason <jon.mason@broadcom.com>
Date: Wed, 1 Mar 2017 18:02:28 -0500
Subject: [PATCH 1407/1911] ARM: dts: NSP: GPIO reboot open-source

The libgpio code pre-sets the GPIO values for the gpio-reset in the
device tree.  This results in the device being reset during bringup.
To prevent this pre-setting, use the "open-source" flag in the device
tree.

Signed-off-by: Jon Mason <jon.mason@broadcom.com>
Fixes: b1aaf88 ("ARM: dts: NSP: Add GPIO reboot method to bcm958625hr DTS file")
Fixes: 10baed1 ("ARM: dts: NSP: Add GPIO reboot method to bcm958625xmc DTS file")
Fixes: 088e3148 ("ARM: dts: NSP: Add new DT file for bcm958522er")
Fixes: e3227c1 ("ARM: dts: NSP: Add new DT file for bcm958525er")
Fixes: 2f8bc00 ("ARM: dts: NSP: Add new DT file for bcm958622hr")
Fixes: d454c37 ("ARM: dts: NSP: Add new DT file for bcm958623hr")
Fixes: f27eacf ("ARM: dts: NSP: Add new DT file for bcm988312hr")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
---
 arch/arm/boot/dts/bcm958522er.dts  | 1 +
 arch/arm/boot/dts/bcm958525er.dts  | 1 +
 arch/arm/boot/dts/bcm958525xmc.dts | 1 +
 arch/arm/boot/dts/bcm958622hr.dts  | 1 +
 arch/arm/boot/dts/bcm958623hr.dts  | 1 +
 arch/arm/boot/dts/bcm958625hr.dts  | 1 +
 arch/arm/boot/dts/bcm988312hr.dts  | 1 +
 7 files changed, 7 insertions(+)

diff --git a/arch/arm/boot/dts/bcm958522er.dts b/arch/arm/boot/dts/bcm958522er.dts
index 3f04a40eb90cc9..df05e7f568af3e 100644
--- a/arch/arm/boot/dts/bcm958522er.dts
+++ b/arch/arm/boot/dts/bcm958522er.dts
@@ -55,6 +55,7 @@
 	gpio-restart {
 		compatible = "gpio-restart";
 		gpios = <&gpioa 15 GPIO_ACTIVE_LOW>;
+		open-source;
 		priority = <200>;
 	};
 };
diff --git a/arch/arm/boot/dts/bcm958525er.dts b/arch/arm/boot/dts/bcm958525er.dts
index 9fd542200d3d52..4a3ab19c62819f 100644
--- a/arch/arm/boot/dts/bcm958525er.dts
+++ b/arch/arm/boot/dts/bcm958525er.dts
@@ -55,6 +55,7 @@
 	gpio-restart {
 		compatible = "gpio-restart";
 		gpios = <&gpioa 15 GPIO_ACTIVE_LOW>;
+		open-source;
 		priority = <200>;
 	};
 };
diff --git a/arch/arm/boot/dts/bcm958525xmc.dts b/arch/arm/boot/dts/bcm958525xmc.dts
index 41e7fd350fcd1b..81f78435d8c76c 100644
--- a/arch/arm/boot/dts/bcm958525xmc.dts
+++ b/arch/arm/boot/dts/bcm958525xmc.dts
@@ -55,6 +55,7 @@
 	gpio-restart {
 		compatible = "gpio-restart";
 		gpios = <&gpioa 31 GPIO_ACTIVE_LOW>;
+		open-source;
 		priority = <200>;
 	};
 };
diff --git a/arch/arm/boot/dts/bcm958622hr.dts b/arch/arm/boot/dts/bcm958622hr.dts
index 477c4860db5223..c88b8fefcb2f13 100644
--- a/arch/arm/boot/dts/bcm958622hr.dts
+++ b/arch/arm/boot/dts/bcm958622hr.dts
@@ -55,6 +55,7 @@
 	gpio-restart {
 		compatible = "gpio-restart";
 		gpios = <&gpioa 15 GPIO_ACTIVE_LOW>;
+		open-source;
 		priority = <200>;
 	};
 };
diff --git a/arch/arm/boot/dts/bcm958623hr.dts b/arch/arm/boot/dts/bcm958623hr.dts
index c0a499d5ba447d..d503fa0dde310f 100644
--- a/arch/arm/boot/dts/bcm958623hr.dts
+++ b/arch/arm/boot/dts/bcm958623hr.dts
@@ -55,6 +55,7 @@
 	gpio-restart {
 		compatible = "gpio-restart";
 		gpios = <&gpioa 15 GPIO_ACTIVE_LOW>;
+		open-source;
 		priority = <200>;
 	};
 };
diff --git a/arch/arm/boot/dts/bcm958625hr.dts b/arch/arm/boot/dts/bcm958625hr.dts
index f7eb5854a22448..cc0363b843c1a0 100644
--- a/arch/arm/boot/dts/bcm958625hr.dts
+++ b/arch/arm/boot/dts/bcm958625hr.dts
@@ -55,6 +55,7 @@
 	gpio-restart {
 		compatible = "gpio-restart";
 		gpios = <&gpioa 15 GPIO_ACTIVE_LOW>;
+		open-source;
 		priority = <200>;
 	};
 };
diff --git a/arch/arm/boot/dts/bcm988312hr.dts b/arch/arm/boot/dts/bcm988312hr.dts
index 16666324fda8b5..74e15a3cd9f8ef 100644
--- a/arch/arm/boot/dts/bcm988312hr.dts
+++ b/arch/arm/boot/dts/bcm988312hr.dts
@@ -55,6 +55,7 @@
 	gpio-restart {
 		compatible = "gpio-restart";
 		gpios = <&gpioa 15 GPIO_ACTIVE_LOW>;
+		open-source;
 		priority = <200>;
 	};
 };

From bf33f87dd04c371ea33feb821b60d63d754e3124 Mon Sep 17 00:00:00 2001
From: peter chang <dpf@google.com>
Date: Wed, 15 Feb 2017 14:11:54 -0800
Subject: [PATCH 1408/1911] scsi: sg: check length passed to SG_NEXT_CMD_LEN

The user can control the size of the next command passed along, but the
value passed to the ioctl isn't checked against the usable max command
size.

Cc: <stable@vger.kernel.org>
Signed-off-by: Peter Chang <dpf@google.com>
Acked-by: Douglas Gilbert <dgilbert@interlog.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/sg.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index e831e01f9fa685..849ff8104be27f 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -996,6 +996,8 @@ sg_ioctl(struct file *filp, unsigned int cmd_in, unsigned long arg)
 		result = get_user(val, ip);
 		if (result)
 			return result;
+		if (val > SG_MAX_CDB_SIZE)
+			return -ENOMEM;
 		sfp->next_cmd_len = (val > 0) ? val : 0;
 		return 0;
 	case SG_GET_VERSION_NUM:

From 271df90e4e530c17f237b27034d6341cb2c2f536 Mon Sep 17 00:00:00 2001
From: Vitaly Wool <vitalywool@gmail.com>
Date: Thu, 16 Mar 2017 16:40:19 -0700
Subject: [PATCH 1409/1911] z3fold: fix spinlock unlocking in page reclaim

Commmit 5a27aa822029 ("z3fold: add kref refcounting") introduced a bug
in z3fold_reclaim_page() with function exit that may leave pool->lock
spinlock held.  Here comes the trivial fix.

Fixes: 5a27aa822029 ("z3fold: add kref refcounting")
Link: http://lkml.kernel.org/r/20170311222239.7b83d8e7ef1914e05497649f@gmail.com
Reported-by: Alexey Khoroshilov <khoroshilov@ispras.ru>
Signed-off-by: Vitaly Wool <vitalywool@gmail.com>
Cc: Dan Streetman <ddstreet@ieee.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/z3fold.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mm/z3fold.c b/mm/z3fold.c
index 8970a2fd3b1a53..f9492bccfd794a 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -667,6 +667,7 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
 			z3fold_page_unlock(zhdr);
 			spin_lock(&pool->lock);
 			if (kref_put(&zhdr->refcount, release_z3fold_page)) {
+				spin_unlock(&pool->lock);
 				atomic64_dec(&pool->pages_nr);
 				return 0;
 			}

From 5be9b730b09c45c358bbfe7f51d254e306cccc07 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Thu, 16 Mar 2017 16:40:21 -0700
Subject: [PATCH 1410/1911] kasan: add a prototype of task_struct to avoid
 warning

Add a prototype of task_struct to fix below warning on arm64.

  In file included from arch/arm64/kernel/probes/kprobes.c:19:0:
  include/linux/kasan.h:81:132: error: 'struct task_struct' declared inside parameter list will not be visible outside of this definition or declaration [-Werror]
   static inline void kasan_unpoison_task_stack(struct task_struct *task) {}

As same as other types (kmem_cache, page, and vm_struct) this adds a
prototype of task_struct data structure on top of kasan.h.

[arnd] A related warning was fixed before, but now appears in a
different line in the same file in v4.11-rc2.  The patch from Masami
Hiramatsu still seems appropriate, so let's take his version.

Fixes: 71af2ed5eeea ("kasan, sched/headers: Remove <linux/sched.h> from <linux/kasan.h>")
Link: https://patchwork.kernel.org/patch/9569839/
Link: http://lkml.kernel.org/r/20170313141517.3397802-1-arnd@arndb.de
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Acked-by: Alexander Potapenko <glider@google.com>
Acked-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kasan.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 1c823bef4c1510..5734480c959094 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -6,6 +6,7 @@
 struct kmem_cache;
 struct page;
 struct vm_struct;
+struct task_struct;
 
 #ifdef CONFIG_KASAN
 

From d0f33ac9ae7b2a727fb678235ae37baf1d0608d5 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 16 Mar 2017 16:40:24 -0700
Subject: [PATCH 1411/1911] mm, x86: fix native_pud_clear build error

We still get a build error in random configurations, after this has been
modified a few times:

  In file included from include/linux/mm.h:68:0,
                   from include/linux/suspend.h:8,
                   from arch/x86/kernel/asm-offsets.c:12:
  arch/x86/include/asm/pgtable.h:66:26: error: redefinition of 'native_pud_clear'
   #define pud_clear(pud)   native_pud_clear(pud)

My interpretation is that the build error comes from a typo in
__PAGETABLE_PUD_FOLDED, so fix that typo now, and remove the incorrect
#ifdef around the native_pud_clear definition.

Fixes: 3e761a42e19c ("mm, x86: fix HIGHMEM64 && PARAVIRT build config for native_pud_clear()")
Fixes: a00cc7d9dd93 ("mm, x86: add support for PUD-sized transparent hugepages")
Link: http://lkml.kernel.org/r/20170314121330.182155-1-arnd@arndb.de
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Ackedy-by: Dave Jiang <dave.jiang@intel.com>
Cc: Matthew Wilcox <mawilcox@microsoft.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Thomas Garnier <thgarnie@google.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Borislav Petkov <bp@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/include/asm/pgtable-3level.h | 3 ---
 arch/x86/include/asm/pgtable.h        | 2 +-
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
index 72277b1028a5f5..50d35e3185f553 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -121,12 +121,9 @@ static inline void native_pmd_clear(pmd_t *pmd)
 	*(tmp + 1) = 0;
 }
 
-#if !defined(CONFIG_SMP) || (defined(CONFIG_HIGHMEM64G) && \
-		defined(CONFIG_PARAVIRT))
 static inline void native_pud_clear(pud_t *pudp)
 {
 }
-#endif
 
 static inline void pud_clear(pud_t *pudp)
 {
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 1cfb36b8c024ab..585ee0d42d18fc 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -62,7 +62,7 @@ extern struct mm_struct *pgd_page_get_mm(struct page *page);
 # define set_pud(pudp, pud)		native_set_pud(pudp, pud)
 #endif
 
-#ifndef __PAGETABLE_PMD_FOLDED
+#ifndef __PAGETABLE_PUD_FOLDED
 #define pud_clear(pud)			native_pud_clear(pud)
 #endif
 

From 171012f561274784160f666f8398af8b42216e1f Mon Sep 17 00:00:00 2001
From: Dmitry Vyukov <dvyukov@google.com>
Date: Thu, 16 Mar 2017 16:40:27 -0700
Subject: [PATCH 1412/1911] mm: don't warn when vmalloc() fails due to a fatal
 signal

When vmalloc() fails it prints a very lengthy message with all the
details about memory consumption assuming that it happened due to OOM.

However, vmalloc() can also fail due to fatal signal pending.  In such
case the message is quite confusing because it suggests that it is OOM
but the numbers suggest otherwise.  The messages can also pollute
console considerably.

Don't warn when vmalloc() fails due to fatal signal pending.

Link: http://lkml.kernel.org/r/20170313114425.72724-1-dvyukov@google.com
Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
Reviewed-by: Matthew Wilcox <mawilcox@microsoft.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/vmalloc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 0dd80222b20bbd..0b057628a7ba5c 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1683,7 +1683,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
 
 		if (fatal_signal_pending(current)) {
 			area->nr_pages = i;
-			goto fail;
+			goto fail_no_warn;
 		}
 
 		if (node == NUMA_NO_NODE)
@@ -1709,6 +1709,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
 	warn_alloc(gfp_mask, NULL,
 			  "vmalloc: allocation failure, allocated %ld of %ld bytes",
 			  (area->nr_pages*PAGE_SIZE), area->size);
+fail_no_warn:
 	vfree(area->addr);
 	return NULL;
 }

From 55adc1d05dca9e949cdf46c747cb1e91c0e9143d Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Thu, 16 Mar 2017 16:40:30 -0700
Subject: [PATCH 1413/1911] mm: add private lock to serialize memory hotplug
 operations

Commit bfc8c90139eb ("mem-hotplug: implement get/put_online_mems")
introduced new functions get/put_online_mems() and mem_hotplug_begin/end()
in order to allow similar semantics for memory hotplug like for cpu
hotplug.

The corresponding functions for cpu hotplug are get/put_online_cpus()
and cpu_hotplug_begin/done() for cpu hotplug.

The commit however missed to introduce functions that would serialize
memory hotplug operations like they are done for cpu hotplug with
cpu_maps_update_begin/done().

This basically leaves mem_hotplug.active_writer unprotected and allows
concurrent writers to modify it, which may lead to problems as outlined
by commit f931ab479dd2 ("mm: fix devm_memremap_pages crash, use
mem_hotplug_{begin, done}").

That commit was extended again with commit b5d24fda9c3d ("mm,
devm_memremap_pages: hold device_hotplug lock over mem_hotplug_{begin,
done}") which serializes memory hotplug operations for some call sites
by using the device_hotplug lock.

In addition with commit 3fc21924100b ("mm: validate device_hotplug is held
for memory hotplug") a sanity check was added to mem_hotplug_begin() to
verify that the device_hotplug lock is held.

This in turn triggers the following warning on s390:

WARNING: CPU: 6 PID: 1 at drivers/base/core.c:643 assert_held_device_hotplug+0x4a/0x58
 Call Trace:
  assert_held_device_hotplug+0x40/0x58)
  mem_hotplug_begin+0x34/0xc8
  add_memory_resource+0x7e/0x1f8
  add_memory+0xda/0x130
  add_memory_merged+0x15c/0x178
  sclp_detect_standby_memory+0x2ae/0x2f8
  do_one_initcall+0xa2/0x150
  kernel_init_freeable+0x228/0x2d8
  kernel_init+0x2a/0x140
  kernel_thread_starter+0x6/0xc

One possible fix would be to add more lock_device_hotplug() and
unlock_device_hotplug() calls around each call site of
mem_hotplug_begin/end().  But that would give the device_hotplug lock
additional semantics it better should not have (serialize memory hotplug
operations).

Instead add a new memory_add_remove_lock which has the similar semantics
like cpu_add_remove_lock for cpu hotplug.

To keep things hopefully a bit easier the lock will be locked and unlocked
within the mem_hotplug_begin/end() functions.

Link: http://lkml.kernel.org/r/20170314125226.16779-2-heiko.carstens@de.ibm.com
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Reported-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Acked-by: Dan Williams <dan.j.williams@intel.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Ben Hutchings <ben@decadent.org.uk>
Cc: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/memremap.c   | 4 ----
 mm/memory_hotplug.c | 6 +++++-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/kernel/memremap.c b/kernel/memremap.c
index 06123234f1189c..07e85e5229da84 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -247,11 +247,9 @@ static void devm_memremap_pages_release(struct device *dev, void *data)
 	align_start = res->start & ~(SECTION_SIZE - 1);
 	align_size = ALIGN(resource_size(res), SECTION_SIZE);
 
-	lock_device_hotplug();
 	mem_hotplug_begin();
 	arch_remove_memory(align_start, align_size);
 	mem_hotplug_done();
-	unlock_device_hotplug();
 
 	untrack_pfn(NULL, PHYS_PFN(align_start), align_size);
 	pgmap_radix_release(res);
@@ -364,11 +362,9 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
 	if (error)
 		goto err_pfn_remap;
 
-	lock_device_hotplug();
 	mem_hotplug_begin();
 	error = arch_add_memory(nid, align_start, align_size, true);
 	mem_hotplug_done();
-	unlock_device_hotplug();
 	if (error)
 		goto err_add_memory;
 
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 295479b792ec48..6fa7208bcd564e 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -125,9 +125,12 @@ void put_online_mems(void)
 
 }
 
+/* Serializes write accesses to mem_hotplug.active_writer. */
+static DEFINE_MUTEX(memory_add_remove_lock);
+
 void mem_hotplug_begin(void)
 {
-	assert_held_device_hotplug();
+	mutex_lock(&memory_add_remove_lock);
 
 	mem_hotplug.active_writer = current;
 
@@ -147,6 +150,7 @@ void mem_hotplug_done(void)
 	mem_hotplug.active_writer = NULL;
 	mutex_unlock(&mem_hotplug.lock);
 	memhp_lock_release();
+	mutex_unlock(&memory_add_remove_lock);
 }
 
 /* add this memory to iomem resource */

From 15c9e10d9ad4d41d076148bbff1de7f659f68852 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Thu, 16 Mar 2017 16:40:33 -0700
Subject: [PATCH 1414/1911] drivers core: remove assert_held_device_hotplug()

The last caller of assert_held_device_hotplug() is gone, so remove it again.

Link: http://lkml.kernel.org/r/20170314125226.16779-3-heiko.carstens@de.ibm.com
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Acked-by: Dan Williams <dan.j.williams@intel.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Ben Hutchings <ben@decadent.org.uk>
Cc: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Sebastian Ott <sebott@linux.vnet.ibm.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/base/core.c    | 5 -----
 include/linux/device.h | 1 -
 2 files changed, 6 deletions(-)

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 684bda4d14a187..6bb60fb6a30b7b 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -639,11 +639,6 @@ int lock_device_hotplug_sysfs(void)
 	return restart_syscall();
 }
 
-void assert_held_device_hotplug(void)
-{
-	lockdep_assert_held(&device_hotplug_lock);
-}
-
 #ifdef CONFIG_BLOCK
 static inline int device_is_not_partition(struct device *dev)
 {
diff --git a/include/linux/device.h b/include/linux/device.h
index 30c4570e928dfe..9ef518af5515a0 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -1140,7 +1140,6 @@ static inline bool device_supports_offline(struct device *dev)
 extern void lock_device_hotplug(void);
 extern void unlock_device_hotplug(void);
 extern int lock_device_hotplug_sysfs(void);
-void assert_held_device_hotplug(void);
 extern int device_offline(struct device *dev);
 extern int device_online(struct device *dev);
 extern void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode);

From 966fa72a716ceafc69de901a31f7cc1f52b35f81 Mon Sep 17 00:00:00 2001
From: Vaibhav Jain <vaibhav@linux.vnet.ibm.com>
Date: Tue, 14 Mar 2017 08:17:00 +0530
Subject: [PATCH 1415/1911] kernfs: Check KERNFS_HAS_RELEASE before calling
 kernfs_release_file()

Recently started seeing a kernel oops when a module tries removing a
memory mapped sysfs bin_attribute. On closer investigation the root
cause seems to be kernfs_release_file() trying to call
kernfs_op.release() callback that's NULL for such sysfs
bin_attributes. The oops occurs when kernfs_release_file() is called from
kernfs_drain_open_files() to cleanup any open handles with active
memory mappings.

The patch fixes this by checking for flag KERNFS_HAS_RELEASE before
calling kernfs_release_file() in function kernfs_drain_open_files().

On ppc64-le arch with cxl module the oops back-trace is of the
form below:
[  861.381126] Unable to handle kernel paging request for instruction fetch
[  861.381360] Faulting instruction address: 0x00000000
[  861.381428] Oops: Kernel access of bad area, sig: 11 [#1]
....
[  861.382481] NIP: 0000000000000000 LR: c000000000362c60 CTR:
0000000000000000
....
Call Trace:
[c000000f1680b750] [c000000000362c34] kernfs_drain_open_files+0x104/0x1d0 (unreliable)
[c000000f1680b790] [c00000000035fa00] __kernfs_remove+0x260/0x2c0
[c000000f1680b820] [c000000000360da0] kernfs_remove_by_name_ns+0x60/0xe0
[c000000f1680b8b0] [c0000000003638f4] sysfs_remove_bin_file+0x24/0x40
[c000000f1680b8d0] [c00000000062a164] device_remove_bin_file+0x24/0x40
[c000000f1680b8f0] [d000000009b7b22c] cxl_sysfs_afu_remove+0x144/0x170 [cxl]
[c000000f1680b940] [d000000009b7c7e4] cxl_remove+0x6c/0x1a0 [cxl]
[c000000f1680b990] [c00000000052f694] pci_device_remove+0x64/0x110
[c000000f1680b9d0] [c0000000006321d4] device_release_driver_internal+0x1f4/0x2b0
[c000000f1680ba20] [c000000000525cb0] pci_stop_bus_device+0xa0/0xd0
[c000000f1680ba60] [c000000000525e80] pci_stop_and_remove_bus_device+0x20/0x40
[c000000f1680ba90] [c00000000004a6c4] pci_hp_remove_devices+0x84/0xc0
[c000000f1680bad0] [c00000000004a688] pci_hp_remove_devices+0x48/0xc0
[c000000f1680bb10] [c0000000009dfda4] eeh_reset_device+0xb0/0x290
[c000000f1680bbb0] [c000000000032b4c] eeh_handle_normal_event+0x47c/0x530
[c000000f1680bc60] [c000000000032e64] eeh_handle_event+0x174/0x350
[c000000f1680bd10] [c000000000033228] eeh_event_handler+0x1e8/0x1f0
[c000000f1680bdc0] [c0000000000d384c] kthread+0x14c/0x190
[c000000f1680be30] [c00000000000b5a0] ret_from_kernel_thread+0x5c/0xbc

Fixes: f83f3c515654 ("kernfs: fix locking around kernfs_ops->release() callback")
Signed-off-by: Vaibhav Jain <vaibhav@linux.vnet.ibm.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/kernfs/file.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index 8e4dc7ab584c2d..ac2dfe0c5a9c85 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -809,7 +809,8 @@ void kernfs_drain_open_files(struct kernfs_node *kn)
 		if (kn->flags & KERNFS_HAS_MMAP)
 			unmap_mapping_range(inode->i_mapping, 0, 0, 1);
 
-		kernfs_release_file(kn, of);
+		if (kn->flags & KERNFS_HAS_RELEASE)
+			kernfs_release_file(kn, of);
 	}
 
 	mutex_unlock(&kernfs_open_file_mutex);

From 4cbe4dac82e423ecc9a0ba46af24a860853259f4 Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Mon, 13 Mar 2017 19:29:08 +0200
Subject: [PATCH 1416/1911] net/mlx4_core: Avoid delays during VF driver device
 shutdown

Some Hypervisors detach VFs from VMs by instantly causing an FLR event
to be generated for a VF.

In the mlx4 case, this will cause that VF's comm channel to be disabled
before the VM has an opportunity to invoke the VF device's "shutdown"
method.

For such Hypervisors, there is a race condition between the VF's
shutdown method and its internal-error detection/reset thread.

The internal-error detection/reset thread (which runs every 5 seconds) also
detects a disabled comm channel. If the internal-error detection/reset
flow wins the race, we still get delays (while that flow tries repeatedly
to detect comm-channel recovery).

The cited commit fixed the command timeout problem when the
internal-error detection/reset flow loses the race.

This commit avoids the unneeded delays when the internal-error
detection/reset flow wins.

Fixes: d585df1c5ccf ("net/mlx4_core: Avoid command timeouts during VF driver device shutdown")
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Reported-by: Simon Xiao <sixiao@microsoft.com>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/cmd.c  | 11 +++++++++++
 drivers/net/ethernet/mellanox/mlx4/main.c | 11 +++++++++++
 include/linux/mlx4/device.h               |  1 +
 3 files changed, 23 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index e8c105164931f3..0e0fa703056595 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -2305,6 +2305,17 @@ static int sync_toggles(struct mlx4_dev *dev)
 		rd_toggle = swab32(readl(&priv->mfunc.comm->slave_read));
 		if (wr_toggle == 0xffffffff || rd_toggle == 0xffffffff) {
 			/* PCI might be offline */
+
+			/* If device removal has been requested,
+			 * do not continue retrying.
+			 */
+			if (dev->persist->interface_state &
+			    MLX4_INTERFACE_STATE_NOWAIT) {
+				mlx4_warn(dev,
+					  "communication channel is offline\n");
+				return -EIO;
+			}
+
 			msleep(100);
 			wr_toggle = swab32(readl(&priv->mfunc.comm->
 					   slave_write));
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 21377c315083b6..703205475524d6 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -1940,6 +1940,14 @@ static int mlx4_comm_check_offline(struct mlx4_dev *dev)
 			       (u32)(1 << COMM_CHAN_OFFLINE_OFFSET));
 		if (!offline_bit)
 			return 0;
+
+		/* If device removal has been requested,
+		 * do not continue retrying.
+		 */
+		if (dev->persist->interface_state &
+		    MLX4_INTERFACE_STATE_NOWAIT)
+			break;
+
 		/* There are cases as part of AER/Reset flow that PF needs
 		 * around 100 msec to load. We therefore sleep for 100 msec
 		 * to allow other tasks to make use of that CPU during this
@@ -3955,6 +3963,9 @@ static void mlx4_remove_one(struct pci_dev *pdev)
 	struct devlink *devlink = priv_to_devlink(priv);
 	int active_vfs = 0;
 
+	if (mlx4_is_slave(dev))
+		persist->interface_state |= MLX4_INTERFACE_STATE_NOWAIT;
+
 	mutex_lock(&persist->interface_state_mutex);
 	persist->interface_state |= MLX4_INTERFACE_STATE_DELETION;
 	mutex_unlock(&persist->interface_state_mutex);
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 7e66e4f62858f3..1beb1ec2fbdf33 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -476,6 +476,7 @@ enum {
 enum {
 	MLX4_INTERFACE_STATE_UP		= 1 << 0,
 	MLX4_INTERFACE_STATE_DELETION	= 1 << 1,
+	MLX4_INTERFACE_STATE_NOWAIT	= 1 << 2,
 };
 
 #define MSTR_SM_CHANGE_MASK (MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK | \

From bc9ab9231ec8c08352ea860480523d88a221a68f Mon Sep 17 00:00:00 2001
From: David Arcari <darcari@redhat.com>
Date: Mon, 13 Mar 2017 19:07:16 -0400
Subject: [PATCH 1417/1911] net: ethernet: aquantia: set net_device mtu when
 mtu is changed

When the aquantia device mtu is changed the net_device structure is not
updated.  As a result the ip command does not properly reflect the mtu change.

Commit 5513e16421cb incorrectly assumed that __dev_set_mtu() was making the
assignment ndev->mtu = new_mtu;  This is not true in the case where the driver
has a ndo_change_mtu routine.

Fixes: 5513e16421cb ("net: ethernet: aquantia: Fixes for aq_ndev_change_mtu")

Cc: Pavel Belous <Pavel.Belous@aquantia.com>
Signed-off-by: David Arcari <darcari@redhat.com>
Tested-by: Pavel Belous <pavel.belous@aquantia.com>
Reviewed-by: Jarod Wilson <jarod@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/aquantia/atlantic/aq_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_main.c b/drivers/net/ethernet/aquantia/atlantic/aq_main.c
index dad63623be6a93..d05fbfdce5e52e 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_main.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_main.c
@@ -98,6 +98,7 @@ static int aq_ndev_change_mtu(struct net_device *ndev, int new_mtu)
 
 	if (err < 0)
 		goto err_exit;
+	ndev->mtu = new_mtu;
 
 	if (netif_running(ndev)) {
 		aq_ndev_close(ndev);

From 61733c91c454a61be0ffc93fe46a5d5f2f048c1c Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Mon, 13 Mar 2017 16:49:10 -0700
Subject: [PATCH 1418/1911] net: mpls: Fix nexthop alive tracking on down
 events

Alive tracking of nexthops can account for a link twice if the carrier
goes down followed by an admin down of the same link rendering multipath
routes useless. This is similar to 79099aab38c8 for UNREGISTER events and
DOWN events.

Fix by tracking number of alive nexthops in mpls_ifdown similar to the
logic in mpls_ifup. Checking the flags per nexthop once after all events
have been processed is simpler than trying to maintian a running count
through all event combinations.

Also, WRITE_ONCE is used instead of ACCESS_ONCE to set rt_nhn_alive
per a comment from checkpatch:
    WARNING: Prefer WRITE_ONCE(<FOO>, <BAR>) over ACCESS_ONCE(<FOO>) = <BAR>

Fixes: c89359a42e2a4 ("mpls: support for dead routes")
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Acked-by: Robert Shearman <rshearma@brocade.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mpls/af_mpls.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 33211f9a265608..6414079aa7297e 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -1269,6 +1269,8 @@ static void mpls_ifdown(struct net_device *dev, int event)
 {
 	struct mpls_route __rcu **platform_label;
 	struct net *net = dev_net(dev);
+	unsigned int nh_flags = RTNH_F_DEAD | RTNH_F_LINKDOWN;
+	unsigned int alive;
 	unsigned index;
 
 	platform_label = rtnl_dereference(net->mpls.platform_label);
@@ -1278,9 +1280,11 @@ static void mpls_ifdown(struct net_device *dev, int event)
 		if (!rt)
 			continue;
 
+		alive = 0;
 		change_nexthops(rt) {
 			if (rtnl_dereference(nh->nh_dev) != dev)
-				continue;
+				goto next;
+
 			switch (event) {
 			case NETDEV_DOWN:
 			case NETDEV_UNREGISTER:
@@ -1288,13 +1292,16 @@ static void mpls_ifdown(struct net_device *dev, int event)
 				/* fall through */
 			case NETDEV_CHANGE:
 				nh->nh_flags |= RTNH_F_LINKDOWN;
-				if (event != NETDEV_UNREGISTER)
-					ACCESS_ONCE(rt->rt_nhn_alive) = rt->rt_nhn_alive - 1;
 				break;
 			}
 			if (event == NETDEV_UNREGISTER)
 				RCU_INIT_POINTER(nh->nh_dev, NULL);
+next:
+			if (!(nh->nh_flags & nh_flags))
+				alive++;
 		} endfor_nexthops(rt);
+
+		WRITE_ONCE(rt->rt_nhn_alive, alive);
 	}
 }
 

From 4ee39733fbecf04cf9f346de2d64788c35028079 Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Wed, 15 Mar 2017 18:14:33 -0700
Subject: [PATCH 1419/1911] net: ipv6: set route type for anycast routes

Anycast routes have the RTF_ANYCAST flag set, but when dumping routes
for userspace the route type is not set to RTN_ANYCAST. Make it so.

Fixes: 58c4fb86eabcb ("[IPV6]: Flag RTF_ANYCAST for anycast routes")
CC: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 35c58b669ebdfd..9db1418993f2b8 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -3423,6 +3423,8 @@ static int rt6_fill_node(struct net *net,
 	}
 	else if (rt->rt6i_flags & RTF_LOCAL)
 		rtm->rtm_type = RTN_LOCAL;
+	else if (rt->rt6i_flags & RTF_ANYCAST)
+		rtm->rtm_type = RTN_ANYCAST;
 	else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
 		rtm->rtm_type = RTN_LOCAL;
 	else

From 687e0687f71ec00e0132a21fef802dee88c2f1ad Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Tue, 14 Mar 2017 17:55:45 +0100
Subject: [PATCH 1420/1911] USB: usbtmc: add missing endpoint sanity check

USBTMC devices are required to have a bulk-in and a bulk-out endpoint,
but the driver failed to verify this, something which could lead to the
endpoint addresses being taken from uninitialised memory.

Make sure to zero all private data as part of allocation, and add the
missing endpoint sanity check.

Note that this also addresses a more recently introduced issue, where
the interrupt-in-presence flag would also be uninitialised whenever the
optional interrupt-in endpoint is not present. This in turn could lead
to an interrupt urb being allocated, initialised and submitted based on
uninitialised values.

Fixes: dbf3e7f654c0 ("Implement an ioctl to support the USMTMC-USB488 READ_STATUS_BYTE operation.")
Fixes: 5b775f672cc9 ("USB: add USB test and measurement class driver")
Cc: stable <stable@vger.kernel.org>     # 2.6.28
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/class/usbtmc.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c
index f03692ec552056..5e3446db4513ac 100644
--- a/drivers/usb/class/usbtmc.c
+++ b/drivers/usb/class/usbtmc.c
@@ -1381,7 +1381,7 @@ static int usbtmc_probe(struct usb_interface *intf,
 
 	dev_dbg(&intf->dev, "%s called\n", __func__);
 
-	data = kmalloc(sizeof(*data), GFP_KERNEL);
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
 	if (!data)
 		return -ENOMEM;
 
@@ -1444,6 +1444,13 @@ static int usbtmc_probe(struct usb_interface *intf,
 			break;
 		}
 	}
+
+	if (!data->bulk_out || !data->bulk_in) {
+		dev_err(&intf->dev, "bulk endpoints not found\n");
+		retcode = -ENODEV;
+		goto err_put;
+	}
+
 	/* Find int endpoint */
 	for (n = 0; n < iface_desc->desc.bNumEndpoints; n++) {
 		endpoint = &iface_desc->endpoint[n].desc;
@@ -1512,6 +1519,7 @@ static int usbtmc_probe(struct usb_interface *intf,
 	sysfs_remove_group(&intf->dev.kobj, &capability_attr_grp);
 	sysfs_remove_group(&intf->dev.kobj, &data_attr_grp);
 	usbtmc_free_int(data);
+err_put:
 	kref_put(&data->kref, usbtmc_delete);
 	return retcode;
 }

From 2e47c53503eb9faff42b3cfa144a833344dd1f89 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Tue, 14 Mar 2017 17:55:46 +0100
Subject: [PATCH 1421/1911] USB: usbtmc: fix probe error path

Make sure to initialise the return value to avoid having allocation
failures going unnoticed when allocating interrupt-endpoint resources.

This prevents use-after-free or worse when the device is later unbound.

Fixes: dbf3e7f654c0 ("Implement an ioctl to support the USMTMC-USB488 READ_STATUS_BYTE operation.")
Cc: stable <stable@vger.kernel.org>     # 4.6
Cc: Dave Penkler <dpenkler@gmail.com>
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/class/usbtmc.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c
index 5e3446db4513ac..8fb309a0ff6b5d 100644
--- a/drivers/usb/class/usbtmc.c
+++ b/drivers/usb/class/usbtmc.c
@@ -1476,8 +1476,10 @@ static int usbtmc_probe(struct usb_interface *intf,
 	if (data->iin_ep_present) {
 		/* allocate int urb */
 		data->iin_urb = usb_alloc_urb(0, GFP_KERNEL);
-		if (!data->iin_urb)
+		if (!data->iin_urb) {
+			retcode = -ENOMEM;
 			goto error_register;
+		}
 
 		/* Protect interrupt in endpoint data until iin_urb is freed */
 		kref_get(&data->kref);
@@ -1485,8 +1487,10 @@ static int usbtmc_probe(struct usb_interface *intf,
 		/* allocate buffer for interrupt in */
 		data->iin_buffer = kmalloc(data->iin_wMaxPacketSize,
 					GFP_KERNEL);
-		if (!data->iin_buffer)
+		if (!data->iin_buffer) {
+			retcode = -ENOMEM;
 			goto error_register;
+		}
 
 		/* fill interrupt urb */
 		usb_fill_int_urb(data->iin_urb, data->usb_dev,

From cdd7928df0d2efaa3270d711963773a08a4cc8ab Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oneukum@suse.com>
Date: Tue, 14 Mar 2017 12:09:56 +0100
Subject: [PATCH 1422/1911] ACM gadget: fix endianness in notifications
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The gadget code exports the bitfield for serial status changes
over the wire in its internal endianness. The fix is to convert
to little endian before sending it over the wire.

Signed-off-by: Oliver Neukum <oneukum@suse.com>
Tested-by: 家瑋 <momo1208@gmail.com>
CC: stable@vger.kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/function/f_acm.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/gadget/function/f_acm.c b/drivers/usb/gadget/function/f_acm.c
index a30766ca422644..5e3828d9dac7f3 100644
--- a/drivers/usb/gadget/function/f_acm.c
+++ b/drivers/usb/gadget/function/f_acm.c
@@ -535,13 +535,15 @@ static int acm_notify_serial_state(struct f_acm *acm)
 {
 	struct usb_composite_dev *cdev = acm->port.func.config->cdev;
 	int			status;
+	__le16			serial_state;
 
 	spin_lock(&acm->lock);
 	if (acm->notify_req) {
 		dev_dbg(&cdev->gadget->dev, "acm ttyGS%d serial state %04x\n",
 			acm->port_num, acm->serial_state);
+		serial_state = cpu_to_le16(acm->serial_state);
 		status = acm_cdc_notify(acm, USB_CDC_NOTIFY_SERIAL_STATE,
-				0, &acm->serial_state, sizeof(acm->serial_state));
+				0, &serial_state, sizeof(acm->serial_state));
 	} else {
 		acm->pending = true;
 		status = 0;

From 9501df3cd9204f5859f649182431616a31ee88a1 Mon Sep 17 00:00:00 2001
From: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Date: Wed, 15 Mar 2017 23:38:07 -0400
Subject: [PATCH 1423/1911] ibmvnic: Free tx/rx scrq pointer array when
 releasing sub-crqs

The pointer array for the tx/rx sub crqs should be free'ed when
releasing the tx/rx sub crqs.

Signed-off-by: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 5f11b4dc95d2d1..b23d6545f83562 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1257,6 +1257,7 @@ static void release_sub_crqs(struct ibmvnic_adapter *adapter)
 				release_sub_crq_queue(adapter,
 						      adapter->tx_scrq[i]);
 			}
+		kfree(adapter->tx_scrq);
 		adapter->tx_scrq = NULL;
 	}
 
@@ -1269,6 +1270,7 @@ static void release_sub_crqs(struct ibmvnic_adapter *adapter)
 				release_sub_crq_queue(adapter,
 						      adapter->rx_scrq[i]);
 			}
+		kfree(adapter->rx_scrq);
 		adapter->rx_scrq = NULL;
 	}
 }

From 4d4a6ac73e7466c2085c307fac41f74ce4568a45 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 16 Mar 2017 16:27:10 +0000
Subject: [PATCH 1424/1911] rxrpc: Ignore BUSY packets on old calls

If we receive a BUSY packet for a call we think we've just completed, the
packet is handed off to the connection processor to deal with - but the
connection processor doesn't expect a BUSY packet and so flags a protocol
error.

Fix this by simply ignoring the BUSY packet for the moment.

The symptom of this may appear as a system call failing with EPROTO.  This
may be triggered by pressing ctrl-C under some circumstances.

This comes about we abort calls due to interruption by a signal (which we
shouldn't do, but that's going to be a large fix and mostly in fs/afs/).
What happens is that we abort the call and may also abort follow up calls
too (this needs offloading somehoe).  So we see a transmission of something
like the following sequence of packets:

	DATA for call N
	ABORT call N
	DATA for call N+1
	ABORT call N+1

in very quick succession on the same channel.  However, the peer may have
deferred the processing of the ABORT from the call N to a background thread
and thus sees the DATA message from the call N+1 coming in before it has
cleared the channel.  Thus it sends a BUSY packet[*].

[*] Note that some implementations (OpenAFS, for example) mark the BUSY
    packet with one plus the callNumber of the call prior to call N.
    Ordinarily, this would be call N, but there's no requirement for the
    calls on a channel to be numbered strictly sequentially (the number is
    required to increase).

    This is wrong and means that the callNumber in the BUSY packet should
    be ignored (it really ought to be N+1 since that's what it's in
    response to).

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rxrpc/conn_event.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index 3f9d8d7ec6323a..b099b64366f356 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -275,6 +275,10 @@ static int rxrpc_process_event(struct rxrpc_connection *conn,
 		rxrpc_conn_retransmit_call(conn, skb);
 		return 0;
 
+	case RXRPC_PACKET_TYPE_BUSY:
+		/* Just ignore BUSY packets for now. */
+		return 0;
+
 	case RXRPC_PACKET_TYPE_ABORT:
 		if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
 				  &wtmp, sizeof(wtmp)) < 0)

From d12c917691b45d9dffcfe7c2362d25caa40905fd Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Thu, 16 Mar 2017 10:32:42 -0700
Subject: [PATCH 1425/1911] bridge: resolve a false alarm of lockdep

Andrei reported a false alarm of lockdep at net/bridge/br_fdb.c:109,
this is because in Andrei's case, a spin_bug() was already triggered
before this, therefore the debug_locks is turned off, lockdep_is_held()
is no longer accurate after that. We should use lockdep_assert_held_once()
instead of lockdep_is_held() to respect debug_locks.

Fixes: 410b3d48f5111 ("bridge: fdb: add proper lock checks in searching functions")
Reported-by: Andrei Vagin <avagin@gmail.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Acked-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_fdb.c     | 2 +-
 net/bridge/br_private.h | 9 ---------
 2 files changed, 1 insertion(+), 10 deletions(-)

diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 4f598dc2d9168c..6e08b7199dd744 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -106,7 +106,7 @@ static struct net_bridge_fdb_entry *br_fdb_find(struct net_bridge *br,
 	struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)];
 	struct net_bridge_fdb_entry *fdb;
 
-	WARN_ON_ONCE(!br_hash_lock_held(br));
+	lockdep_assert_held_once(&br->hash_lock);
 
 	rcu_read_lock();
 	fdb = fdb_find_rcu(head, addr, vid);
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 2288fca7756c51..61368186edea53 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -531,15 +531,6 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
 int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p,
 			      const unsigned char *addr, u16 vid);
 
-static inline bool br_hash_lock_held(struct net_bridge *br)
-{
-#ifdef CONFIG_LOCKDEP
-	return lockdep_is_held(&br->hash_lock);
-#else
-	return true;
-#endif
-}
-
 /* br_forward.c */
 enum br_pkt_type {
 	BR_PKT_UNICAST,

From e14b4db7a567ff507453ecd9c64da51bbc2b6d23 Mon Sep 17 00:00:00 2001
From: stephen hemminger <stephen@networkplumber.org>
Date: Thu, 16 Mar 2017 12:21:32 -0700
Subject: [PATCH 1426/1911] netvsc: fix race during initialization

When device is being setup on boot, there is a small race where
network device callback is registered, but the netvsc_device pointer
is not set yet.  This can cause a NULL ptr dereference if packet
arrives during this window.

Fixes: 46b4f7f5d1f7 ("netvsc: eliminate per-device outstanding send counter")
Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/hyperv/netvsc.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 4c1d8cca247b92..8dd0b87703288c 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -1231,8 +1231,11 @@ void netvsc_channel_cb(void *context)
 		return;
 
 	net_device = net_device_to_netvsc_device(ndev);
-	if (unlikely(net_device->destroy) &&
-	    netvsc_channel_idle(net_device, q_idx))
+	if (unlikely(!net_device))
+		return;
+
+	if (unlikely(net_device->destroy &&
+		     netvsc_channel_idle(net_device, q_idx)))
 		return;
 
 	/* commit_rd_index() -> hv_signal_on_read() needs this. */

From 7b2db29fbb4e766fcd02207eb2e2087170bd6ebc Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Wed, 8 Mar 2017 10:19:36 -0800
Subject: [PATCH 1427/1911] usb: hub: Fix crash after failure to read BOS
 descriptor

If usb_get_bos_descriptor() returns an error, usb->bos will be NULL.
Nevertheless, it is dereferenced unconditionally in
hub_set_initial_usb2_lpm_policy() if usb2_hw_lpm_capable is set.
This results in a crash.

usb 5-1: unable to get BOS descriptor
...
Unable to handle kernel NULL pointer dereference at virtual address 00000008
pgd = ffffffc00165f000
[00000008] *pgd=000000000174f003, *pud=000000000174f003,
		*pmd=0000000001750003, *pte=00e8000001751713
Internal error: Oops: 96000005 [#1] PREEMPT SMP
Modules linked in: uinput uvcvideo videobuf2_vmalloc cmac [ ... ]
CPU: 5 PID: 3353 Comm: kworker/5:3 Tainted: G    B 4.4.52 #480
Hardware name: Google Kevin (DT)
Workqueue: events driver_set_config_work
task: ffffffc0c3690000 ti: ffffffc0ae9a8000 task.ti: ffffffc0ae9a8000
PC is at hub_port_init+0xc3c/0xd10
LR is at hub_port_init+0xc3c/0xd10
...
Call trace:
[<ffffffc0007fbbfc>] hub_port_init+0xc3c/0xd10
[<ffffffc0007fbe2c>] usb_reset_and_verify_device+0x15c/0x82c
[<ffffffc0007fc5e0>] usb_reset_device+0xe4/0x298
[<ffffffbffc0e3fcc>] rtl8152_probe+0x84/0x9b0 [r8152]
[<ffffffc00080ca8c>] usb_probe_interface+0x244/0x2f8
[<ffffffc000774a24>] driver_probe_device+0x180/0x3b4
[<ffffffc000774e48>] __device_attach_driver+0xb4/0xe0
[<ffffffc000772168>] bus_for_each_drv+0xb4/0xe4
[<ffffffc0007747ec>] __device_attach+0xd0/0x158
[<ffffffc000775080>] device_initial_probe+0x24/0x30
[<ffffffc0007739d4>] bus_probe_device+0x50/0xe4
[<ffffffc000770bd0>] device_add+0x414/0x738
[<ffffffc000809fe8>] usb_set_configuration+0x89c/0x914
[<ffffffc00080a120>] driver_set_config_work+0xc0/0xf0
[<ffffffc000249bb8>] process_one_work+0x390/0x6b8
[<ffffffc00024abcc>] worker_thread+0x480/0x610
[<ffffffc000251a80>] kthread+0x164/0x178
[<ffffffc0002045d0>] ret_from_fork+0x10/0x40

Since we don't know anything about LPM capabilities without BOS descriptor,
don't attempt to enable LPM if it is not available.

Fixes: 890dae886721 ("xhci: Enable LPM support only for hardwired ...")
Cc: stable <stable@vger.kernel.org>
Cc: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Acked-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hub.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index f0dd08198d7426..5286bf67869a83 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -4275,7 +4275,7 @@ static void hub_set_initial_usb2_lpm_policy(struct usb_device *udev)
 	struct usb_hub *hub = usb_hub_to_struct_hub(udev->parent);
 	int connect_type = USB_PORT_CONNECT_TYPE_UNKNOWN;
 
-	if (!udev->usb2_hw_lpm_capable)
+	if (!udev->usb2_hw_lpm_capable || !udev->bos)
 		return;
 
 	if (hub)

From db7f00b8dba6d687b6ab1f2e9309acfd214fcb4b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 16 Mar 2017 15:43:19 -0700
Subject: [PATCH 1428/1911] tcp: tcp_get_info() should read tcp_time_stamp
 later

Commit b369e7fd41f7 ("tcp: make TCP_INFO more consistent") moved
lock_sock_fast() earlier in tcp_get_info()

This has the minor effect that jiffies value being sampled at the
beginning of tcp_get_info() is more likely to be off by one, and we
report big tcpi_last_data_sent values (like 0xFFFFFFFF).

Since we lock the socket, fetching tcp_time_stamp right before
doing the jiffies_to_msecs() calls is enough to remove these
wrong values.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index cf4555581282c6..1e319a525d51b0 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2770,7 +2770,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
 {
 	const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */
 	const struct inet_connection_sock *icsk = inet_csk(sk);
-	u32 now = tcp_time_stamp, intv;
+	u32 now, intv;
 	u64 rate64;
 	bool slow;
 	u32 rate;
@@ -2839,6 +2839,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
 	info->tcpi_retrans = tp->retrans_out;
 	info->tcpi_fackets = tp->fackets_out;
 
+	now = tcp_time_stamp;
 	info->tcpi_last_data_sent = jiffies_to_msecs(now - tp->lsndtime);
 	info->tcpi_last_data_recv = jiffies_to_msecs(now - icsk->icsk_ack.lrcvtime);
 	info->tcpi_last_ack_recv = jiffies_to_msecs(now - tp->rcv_tstamp);

From b767ad726c2aa6219318bf0da83fbe690e653d9a Mon Sep 17 00:00:00 2001
From: Aleksey Makarov <aleksey.makarov@linaro.org>
Date: Wed, 1 Mar 2017 18:23:02 +0300
Subject: [PATCH 1429/1911] Revert "tty: serial: pl011: add ttyAMA for matching
 pl011 console"

The original patch makes the condition always true, so it is wrong.

It masks (but not fixes) the bug described in the commit message
but introduces a regression (no console is selected by SPCR)
in regular (no 'console=ttyAMA') case.

s/||/&&/ would not fix the problem as the root cause was identified
incorrectly.

This reverts commit aea9a80ba98a0c9b4de88850260e9fbdcc98360b.

Signed-off-by: Aleksey Makarov <aleksey.makarov@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Sudeep Holla <sudeep.holla@arm.com>
Tested-by: Jayachandran C <jnair@caviumnetworks.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/amba-pl011.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c
index 8789ea423ccfd1..56f92d7348bf45 100644
--- a/drivers/tty/serial/amba-pl011.c
+++ b/drivers/tty/serial/amba-pl011.c
@@ -2373,7 +2373,7 @@ static int __init pl011_console_match(struct console *co, char *name, int idx,
 	if (strcmp(name, "qdf2400_e44") == 0) {
 		pr_info_once("UART: Working around QDF2400 SoC erratum 44");
 		qdf2400_e44_present = true;
-	} else if (strcmp(name, "pl011") != 0 || strcmp(name, "ttyAMA") != 0) {
+	} else if (strcmp(name, "pl011") != 0) {
 		return -ENODEV;
 	}
 

From 5362544bebe85071188dd9e479b5a5040841c895 Mon Sep 17 00:00:00 2001
From: Dmitry Vyukov <dvyukov@google.com>
Date: Sat, 4 Mar 2017 14:55:19 +0100
Subject: [PATCH 1430/1911] tty: don't panic on OOM in tty_set_ldisc()

If tty_ldisc_open() fails in tty_set_ldisc(), it tries to go back
to the old discipline or N_TTY. But that can fail as well, in such
case it panics. This is not a graceful way to handle OOM.

Leave ldisc==NULL if all attempts fail instead.
Also use existing tty_ldisc_reinit() helper function instead of
tty_ldisc_restore(). Also don't WARN/BUG in tty_ldisc_reinit()
if N_TTY fails, which would have the same net effect of bringing
kernel down on OOM. Instead print a single line message about
what has happened.

Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
Cc: syzkaller@googlegroups.com
Cc: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Jiri Slaby <jslaby@suse.com>
Cc: Peter Hurley <peter@hurleysoftware.com>
Cc: One Thousand Gnomes <gnomes@lxorguk.ukuu.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/tty_ldisc.c | 85 ++++++++---------------------------------
 1 file changed, 16 insertions(+), 69 deletions(-)

diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c
index 68947f6de5ad63..c3956ca022e465 100644
--- a/drivers/tty/tty_ldisc.c
+++ b/drivers/tty/tty_ldisc.c
@@ -488,41 +488,6 @@ static void tty_ldisc_close(struct tty_struct *tty, struct tty_ldisc *ld)
 	tty_ldisc_debug(tty, "%p: closed\n", ld);
 }
 
-/**
- *	tty_ldisc_restore	-	helper for tty ldisc change
- *	@tty: tty to recover
- *	@old: previous ldisc
- *
- *	Restore the previous line discipline or N_TTY when a line discipline
- *	change fails due to an open error
- */
-
-static void tty_ldisc_restore(struct tty_struct *tty, struct tty_ldisc *old)
-{
-	struct tty_ldisc *new_ldisc;
-	int r;
-
-	/* There is an outstanding reference here so this is safe */
-	old = tty_ldisc_get(tty, old->ops->num);
-	WARN_ON(IS_ERR(old));
-	tty->ldisc = old;
-	tty_set_termios_ldisc(tty, old->ops->num);
-	if (tty_ldisc_open(tty, old) < 0) {
-		tty_ldisc_put(old);
-		/* This driver is always present */
-		new_ldisc = tty_ldisc_get(tty, N_TTY);
-		if (IS_ERR(new_ldisc))
-			panic("n_tty: get");
-		tty->ldisc = new_ldisc;
-		tty_set_termios_ldisc(tty, N_TTY);
-		r = tty_ldisc_open(tty, new_ldisc);
-		if (r < 0)
-			panic("Couldn't open N_TTY ldisc for "
-			      "%s --- error %d.",
-			      tty_name(tty), r);
-	}
-}
-
 /**
  *	tty_set_ldisc		-	set line discipline
  *	@tty: the terminal to set
@@ -536,12 +501,7 @@ static void tty_ldisc_restore(struct tty_struct *tty, struct tty_ldisc *old)
 
 int tty_set_ldisc(struct tty_struct *tty, int disc)
 {
-	int retval;
-	struct tty_ldisc *old_ldisc, *new_ldisc;
-
-	new_ldisc = tty_ldisc_get(tty, disc);
-	if (IS_ERR(new_ldisc))
-		return PTR_ERR(new_ldisc);
+	int retval, old_disc;
 
 	tty_lock(tty);
 	retval = tty_ldisc_lock(tty, 5 * HZ);
@@ -554,7 +514,8 @@ int tty_set_ldisc(struct tty_struct *tty, int disc)
 	}
 
 	/* Check the no-op case */
-	if (tty->ldisc->ops->num == disc)
+	old_disc = tty->ldisc->ops->num;
+	if (old_disc == disc)
 		goto out;
 
 	if (test_bit(TTY_HUPPED, &tty->flags)) {
@@ -563,34 +524,25 @@ int tty_set_ldisc(struct tty_struct *tty, int disc)
 		goto out;
 	}
 
-	old_ldisc = tty->ldisc;
-
-	/* Shutdown the old discipline. */
-	tty_ldisc_close(tty, old_ldisc);
-
-	/* Now set up the new line discipline. */
-	tty->ldisc = new_ldisc;
-	tty_set_termios_ldisc(tty, disc);
-
-	retval = tty_ldisc_open(tty, new_ldisc);
+	retval = tty_ldisc_reinit(tty, disc);
 	if (retval < 0) {
 		/* Back to the old one or N_TTY if we can't */
-		tty_ldisc_put(new_ldisc);
-		tty_ldisc_restore(tty, old_ldisc);
+		if (tty_ldisc_reinit(tty, old_disc) < 0) {
+			pr_err("tty: TIOCSETD failed, reinitializing N_TTY\n");
+			if (tty_ldisc_reinit(tty, N_TTY) < 0) {
+				/* At this point we have tty->ldisc == NULL. */
+				pr_err("tty: reinitializing N_TTY failed\n");
+			}
+		}
 	}
 
-	if (tty->ldisc->ops->num != old_ldisc->ops->num && tty->ops->set_ldisc) {
+	if (tty->ldisc && tty->ldisc->ops->num != old_disc &&
+	    tty->ops->set_ldisc) {
 		down_read(&tty->termios_rwsem);
 		tty->ops->set_ldisc(tty);
 		up_read(&tty->termios_rwsem);
 	}
 
-	/* At this point we hold a reference to the new ldisc and a
-	   reference to the old ldisc, or we hold two references to
-	   the old ldisc (if it was restored as part of error cleanup
-	   above). In either case, releasing a single reference from
-	   the old ldisc is correct. */
-	new_ldisc = old_ldisc;
 out:
 	tty_ldisc_unlock(tty);
 
@@ -598,7 +550,6 @@ int tty_set_ldisc(struct tty_struct *tty, int disc)
 	   already running */
 	tty_buffer_restart_work(tty->port);
 err:
-	tty_ldisc_put(new_ldisc);	/* drop the extra reference */
 	tty_unlock(tty);
 	return retval;
 }
@@ -659,10 +610,8 @@ int tty_ldisc_reinit(struct tty_struct *tty, int disc)
 	int retval;
 
 	ld = tty_ldisc_get(tty, disc);
-	if (IS_ERR(ld)) {
-		BUG_ON(disc == N_TTY);
+	if (IS_ERR(ld))
 		return PTR_ERR(ld);
-	}
 
 	if (tty->ldisc) {
 		tty_ldisc_close(tty, tty->ldisc);
@@ -674,10 +623,8 @@ int tty_ldisc_reinit(struct tty_struct *tty, int disc)
 	tty_set_termios_ldisc(tty, disc);
 	retval = tty_ldisc_open(tty, tty->ldisc);
 	if (retval) {
-		if (!WARN_ON(disc == N_TTY)) {
-			tty_ldisc_put(tty->ldisc);
-			tty->ldisc = NULL;
-		}
+		tty_ldisc_put(tty->ldisc);
+		tty->ldisc = NULL;
 	}
 	return retval;
 }

From a4a3e061149f09c075f108b6f1cf04d9739a6bc2 Mon Sep 17 00:00:00 2001
From: Dmitry Vyukov <dvyukov@google.com>
Date: Sat, 4 Mar 2017 13:46:12 +0100
Subject: [PATCH 1431/1911] tty: fix data race in tty_ldisc_ref_wait()

tty_ldisc_ref_wait() checks tty->ldisc under tty->ldisc_sem.
But if ldisc==NULL it releases them sem and reloads
tty->ldisc without holding the sem. This is wrong and
can lead to returning non-NULL ldisc without protection.

Don't reload tty->ldisc second time.

Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
Cc: syzkaller@googlegroups.com
Cc: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Jiri Slaby <jslaby@suse.com>
Cc: Peter Hurley <peter@hurleysoftware.com>
Cc: One Thousand Gnomes <gnomes@lxorguk.ukuu.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/tty_ldisc.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c
index c3956ca022e465..b0500a0a87b861 100644
--- a/drivers/tty/tty_ldisc.c
+++ b/drivers/tty/tty_ldisc.c
@@ -271,10 +271,13 @@ const struct file_operations tty_ldiscs_proc_fops = {
 
 struct tty_ldisc *tty_ldisc_ref_wait(struct tty_struct *tty)
 {
+	struct tty_ldisc *ld;
+
 	ldsem_down_read(&tty->ldisc_sem, MAX_SCHEDULE_TIMEOUT);
-	if (!tty->ldisc)
+	ld = tty->ldisc;
+	if (!ld)
 		ldsem_up_read(&tty->ldisc_sem);
-	return tty->ldisc;
+	return ld;
 }
 EXPORT_SYMBOL_GPL(tty_ldisc_ref_wait);
 

From 8971e1c79d3f6c9a5e6f7a65c50c41f434a4dae6 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Fri, 17 Mar 2017 16:02:35 +1100
Subject: [PATCH 1432/1911] powerpc/pseries: Don't give a warning when HPT
 resizing isn't available

As of commit 438cc81a41e8 ("powerpc/pseries: Automatically resize HPT
for memory hot add/remove"), when running on the pseries platform, we
always attempt to use the PAPR extension to resize the hashed page
table (HPT) when we add or remove memory.

This is fine, but when the extension is not available we'll give a
harmless, but scary warning. Instead check if the firmware supports HPT
resizing before populating the mmu_hash_ops.resize_hpt pointer.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/pseries/lpar.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 251060cf171364..8b1fe895daa3f0 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -751,7 +751,9 @@ void __init hpte_init_pseries(void)
 	mmu_hash_ops.flush_hash_range	 = pSeries_lpar_flush_hash_range;
 	mmu_hash_ops.hpte_clear_all      = pseries_hpte_clear_all;
 	mmu_hash_ops.hugepage_invalidate = pSeries_lpar_hugepage_invalidate;
-	mmu_hash_ops.resize_hpt		 = pseries_lpar_resize_hpt;
+
+	if (firmware_has_feature(FW_FEATURE_HPT_RESIZE))
+		mmu_hash_ops.resize_hpt = pseries_lpar_resize_hpt;
 }
 
 void radix_init_pseries(void)

From 5dc855d44c2ad960a86f593c60461f1ae1566b6d Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Thu, 16 Mar 2017 12:59:39 -0700
Subject: [PATCH 1433/1911] x86/perf: Fix CR4.PCE propagation to use active_mm
 instead of mm

If one thread mmaps a perf event while another thread in the same mm
is in some context where active_mm != mm (which can happen in the
scheduler, for example), refresh_pce() would write the wrong value
to CR4.PCE.  This broke some PAPI tests.

Reported-and-tested-by: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: Andy Lutomirski <luto@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Borislav Petkov <bpetkov@suse.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org
Fixes: 7911d3f7af14 ("perf/x86: Only allow rdpmc if a perf_event is mapped")
Link: http://lkml.kernel.org/r/0c5b38a76ea50e405f9abe07a13dfaef87c173a1.1489694270.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 1635c0c8df23a6..e07b36c5588afa 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2100,8 +2100,8 @@ static int x86_pmu_event_init(struct perf_event *event)
 
 static void refresh_pce(void *ignored)
 {
-	if (current->mm)
-		load_mm_cr4(current->mm);
+	if (current->active_mm)
+		load_mm_cr4(current->active_mm);
 }
 
 static void x86_pmu_event_mapped(struct perf_event *event)

From 4b07372a32c0c1505a7634ad7e607d83340ef645 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Thu, 16 Mar 2017 12:59:40 -0700
Subject: [PATCH 1434/1911] x86/perf: Clarify why x86_pmu_event_mapped() isn't
 racy

Naively, it looks racy, but ->mmap_sem saves it.  Add a comment and a
lockdep assertion.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Borislav Petkov <bpetkov@suse.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Link: http://lkml.kernel.org/r/03a1e629063899168dfc4707f3bb6e581e21f5c6.1489694270.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/core.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index e07b36c5588afa..183a972f9210fb 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2109,6 +2109,18 @@ static void x86_pmu_event_mapped(struct perf_event *event)
 	if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
 		return;
 
+	/*
+	 * This function relies on not being called concurrently in two
+	 * tasks in the same mm.  Otherwise one task could observe
+	 * perf_rdpmc_allowed > 1 and return all the way back to
+	 * userspace with CR4.PCE clear while another task is still
+	 * doing on_each_cpu_mask() to propagate CR4.PCE.
+	 *
+	 * For now, this can't happen because all callers hold mmap_sem
+	 * for write.  If this changes, we'll need a different solution.
+	 */
+	lockdep_assert_held_exclusive(&current->mm->mmap_sem);
+
 	if (atomic_inc_return(&current->mm->context.perf_rdpmc_allowed) == 1)
 		on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
 }

From 2cd29f2387be70de9feb4c9f8dbc7c0bd55748ce Mon Sep 17 00:00:00 2001
From: Robert Middleton <robert.middleton@rm5248.com>
Date: Wed, 15 Mar 2017 16:56:47 -0400
Subject: [PATCH 1435/1911] gpio:mcp23s08 Fixed missing interrupts

When an interrupt occurs on an MCP23S08 chip, the INTF register will only
contain one bit as causing the interrupt.  If more than two pins change at
the same time on the chip, this causes one of the pins to not be reported.
This patch fixes the logic for checking if a pin has changed, so that
multiple pins will always cause more than one change.

Cc: stable@vger.kernel.org
Signed-off-by: Robert Middleton <robert.middleton@rm5248.com>
Tested-by: Phil Reid <preid@electromag.com.au>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpio-mcp23s08.c | 65 +++++++++++++++++++++++++++++++++---
 1 file changed, 60 insertions(+), 5 deletions(-)

diff --git a/drivers/gpio/gpio-mcp23s08.c b/drivers/gpio/gpio-mcp23s08.c
index bdb692345428cc..2a57d024481db8 100644
--- a/drivers/gpio/gpio-mcp23s08.c
+++ b/drivers/gpio/gpio-mcp23s08.c
@@ -270,8 +270,10 @@ mcp23s08_direction_output(struct gpio_chip *chip, unsigned offset, int value)
 static irqreturn_t mcp23s08_irq(int irq, void *data)
 {
 	struct mcp23s08 *mcp = data;
-	int intcap, intf, i;
+	int intcap, intf, i, gpio, gpio_orig, intcap_mask;
 	unsigned int child_irq;
+	bool intf_set, intcap_changed, gpio_bit_changed,
+		defval_changed, gpio_set;
 
 	mutex_lock(&mcp->lock);
 	if (mcp_read(mcp, MCP_INTF, &intf) < 0) {
@@ -287,14 +289,67 @@ static irqreturn_t mcp23s08_irq(int irq, void *data)
 	}
 
 	mcp->cache[MCP_INTCAP] = intcap;
+
+	/* This clears the interrupt(configurable on S18) */
+	if (mcp_read(mcp, MCP_GPIO, &gpio) < 0) {
+		mutex_unlock(&mcp->lock);
+		return IRQ_HANDLED;
+	}
+	gpio_orig = mcp->cache[MCP_GPIO];
+	mcp->cache[MCP_GPIO] = gpio;
 	mutex_unlock(&mcp->lock);
 
+	if (mcp->cache[MCP_INTF] == 0) {
+		/* There is no interrupt pending */
+		return IRQ_HANDLED;
+	}
+
+	dev_dbg(mcp->chip.parent,
+		"intcap 0x%04X intf 0x%04X gpio_orig 0x%04X gpio 0x%04X\n",
+		intcap, intf, gpio_orig, gpio);
 
 	for (i = 0; i < mcp->chip.ngpio; i++) {
-		if ((BIT(i) & mcp->cache[MCP_INTF]) &&
-		    ((BIT(i) & intcap & mcp->irq_rise) ||
-		     (mcp->irq_fall & ~intcap & BIT(i)) ||
-		     (BIT(i) & mcp->cache[MCP_INTCON]))) {
+		/* We must check all of the inputs on the chip,
+		 * otherwise we may not notice a change on >=2 pins.
+		 *
+		 * On at least the mcp23s17, INTCAP is only updated
+		 * one byte at a time(INTCAPA and INTCAPB are
+		 * not written to at the same time - only on a per-bank
+		 * basis).
+		 *
+		 * INTF only contains the single bit that caused the
+		 * interrupt per-bank.  On the mcp23s17, there is
+		 * INTFA and INTFB.  If two pins are changed on the A
+		 * side at the same time, INTF will only have one bit
+		 * set.  If one pin on the A side and one pin on the B
+		 * side are changed at the same time, INTF will have
+		 * two bits set.  Thus, INTF can't be the only check
+		 * to see if the input has changed.
+		 */
+
+		intf_set = BIT(i) & mcp->cache[MCP_INTF];
+		if (i < 8 && intf_set)
+			intcap_mask = 0x00FF;
+		else if (i >= 8 && intf_set)
+			intcap_mask = 0xFF00;
+		else
+			intcap_mask = 0x00;
+
+		intcap_changed = (intcap_mask &
+			(BIT(i) & mcp->cache[MCP_INTCAP])) !=
+			(intcap_mask & (BIT(i) & gpio_orig));
+		gpio_set = BIT(i) & mcp->cache[MCP_GPIO];
+		gpio_bit_changed = (BIT(i) & gpio_orig) !=
+			(BIT(i) & mcp->cache[MCP_GPIO]);
+		defval_changed = (BIT(i) & mcp->cache[MCP_INTCON]) &&
+			((BIT(i) & mcp->cache[MCP_GPIO]) !=
+			(BIT(i) & mcp->cache[MCP_DEFVAL]));
+
+		if (((gpio_bit_changed || intcap_changed) &&
+			(BIT(i) & mcp->irq_rise) && gpio_set) ||
+		    ((gpio_bit_changed || intcap_changed) &&
+			(BIT(i) & mcp->irq_fall) && !gpio_set) ||
+		    defval_changed) {
 			child_irq = irq_find_mapping(mcp->chip.irqdomain, i);
 			handle_nested_irq(child_irq);
 		}

From 4938ca90166d6d3061793789e2eef42cd934fa97 Mon Sep 17 00:00:00 2001
From: Zhao Yan <yan.y.zhao@intel.com>
Date: Thu, 9 Mar 2017 10:09:44 +0800
Subject: [PATCH 1436/1911] drm/i915/gvt: handle force-nonpriv registers, cmd
 parser part

this patch adds force non-priv registers check in LRI cmds handler

v4:
transform is_force_nonpriv_mmio() from macro to inline fuction to eliminate
checkpatch warning

v3:
per zhenyu's comment, fix some style warnings

v2:
per zhenyu's comment, refine the code to remove cascaded ifs

Signed-off-by: Zhao Yan <yan.y.zhao@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/cmd_parser.c | 23 +++++++++++++++++++++++
 drivers/gpu/drm/i915/gvt/handlers.c   | 17 +++++++++++++++++
 drivers/gpu/drm/i915/gvt/mmio.h       |  3 +++
 3 files changed, 43 insertions(+)

diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
index 7ae6e2b241c829..919c83abaeb148 100644
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
@@ -817,6 +817,25 @@ static bool is_shadowed_mmio(unsigned int offset)
 	return ret;
 }
 
+static inline bool is_force_nonpriv_mmio(unsigned int offset)
+{
+	return (offset >= 0x24d0 && offset < 0x2500);
+}
+
+static int force_nonpriv_reg_handler(struct parser_exec_state *s,
+				     unsigned int offset, unsigned int index)
+{
+	struct intel_gvt *gvt = s->vgpu->gvt;
+	unsigned int data = cmd_val(s, index + 1);
+
+	if (!intel_gvt_in_force_nonpriv_whitelist(gvt, data)) {
+		gvt_err("Unexpected forcenonpriv 0x%x LRI write, value=0x%x\n",
+			offset, data);
+		return -EINVAL;
+	}
+	return 0;
+}
+
 static int cmd_reg_handler(struct parser_exec_state *s,
 	unsigned int offset, unsigned int index, char *cmd)
 {
@@ -841,6 +860,10 @@ static int cmd_reg_handler(struct parser_exec_state *s,
 		return 0;
 	}
 
+	if (is_force_nonpriv_mmio(offset) &&
+	    force_nonpriv_reg_handler(s, offset, index))
+		return -EINVAL;
+
 	if (offset == i915_mmio_reg_offset(DERRMR) ||
 		offset == i915_mmio_reg_offset(FORCEWAKE_MT)) {
 		/* Writing to HW VGT_PVINFO_PAGE offset will be discarded */
diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index 8e43395c748a15..de975f40aebf4d 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -2988,3 +2988,20 @@ int intel_vgpu_default_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
 	write_vreg(vgpu, offset, p_data, bytes);
 	return 0;
 }
+
+/**
+ * intel_gvt_in_force_nonpriv_whitelist - if a mmio is in whitelist to be
+ * force-nopriv register
+ *
+ * @gvt: a GVT device
+ * @offset: register offset
+ *
+ * Returns:
+ * True if the register is in force-nonpriv whitelist;
+ * False if outside;
+ */
+bool intel_gvt_in_force_nonpriv_whitelist(struct intel_gvt *gvt,
+					  unsigned int offset)
+{
+	return in_whitelist(offset);
+}
diff --git a/drivers/gpu/drm/i915/gvt/mmio.h b/drivers/gpu/drm/i915/gvt/mmio.h
index 3bc620f56f351e..a3a027025cd0a4 100644
--- a/drivers/gpu/drm/i915/gvt/mmio.h
+++ b/drivers/gpu/drm/i915/gvt/mmio.h
@@ -107,4 +107,7 @@ int intel_vgpu_default_mmio_read(struct intel_vgpu *vgpu, unsigned int offset,
 				 void *p_data, unsigned int bytes);
 int intel_vgpu_default_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
 				  void *p_data, unsigned int bytes);
+
+bool intel_gvt_in_force_nonpriv_whitelist(struct intel_gvt *gvt,
+					  unsigned int offset);
 #endif

From 695fbc08d80f93ecca18a1abd8f52c2ab77fdc8d Mon Sep 17 00:00:00 2001
From: Tina Zhang <tina.zhang@intel.com>
Date: Fri, 10 Mar 2017 04:26:53 -0500
Subject: [PATCH 1437/1911] drm/i915/gvt: replace the gvt_err with gvt_vgpu_err

gvt_err should be used only for the very few critical error message
during host i915 drvier initialization. This patch
1. removes the redundant gvt_err;
2. creates a new gvt_vgpu_err to show errors caused by vgpu;
3. replaces the most gvt_err with gvt_vgpu_err;
4. leaves very few gvt_err for dumping gvt error during host gvt
   initialization.

v2. change name to gvt_vgpu_err and add vgpu id to the message. (Kevin)
    add gpu id to gvt_vgpu_err. (Zhi)
v3. remove gpu id from gvt_vgpu_err caller. (Zhi)
v4. add vgpu check to the gvt_vgpu_err macro. (Zhiyuan)
v5. add comments for v3 and v4.
v6. split the big patch into two, with this patch only for checking
    gvt_vgpu_err. (Zhenyu)
v7. rebase to staging branch
v8. rebase to fix branch

Signed-off-by: Tina Zhang <tina.zhang@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/aperture_gm.c |  8 +--
 drivers/gpu/drm/i915/gvt/cmd_parser.c  | 84 +++++++++++++++-----------
 drivers/gpu/drm/i915/gvt/debug.h       |  8 +++
 drivers/gpu/drm/i915/gvt/edid.c        | 13 ++--
 drivers/gpu/drm/i915/gvt/execlist.c    | 29 ++++-----
 drivers/gpu/drm/i915/gvt/gtt.c         | 74 +++++++++++------------
 drivers/gpu/drm/i915/gvt/handlers.c    | 28 ++++-----
 drivers/gpu/drm/i915/gvt/kvmgt.c       | 33 ++++++----
 drivers/gpu/drm/i915/gvt/mmio.c        | 38 ++++++------
 drivers/gpu/drm/i915/gvt/opregion.c    | 10 +--
 drivers/gpu/drm/i915/gvt/render.c      |  2 +-
 drivers/gpu/drm/i915/gvt/scheduler.c   | 11 ++--
 12 files changed, 180 insertions(+), 158 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/aperture_gm.c b/drivers/gpu/drm/i915/gvt/aperture_gm.c
index 3b6caaca975135..325618d969feed 100644
--- a/drivers/gpu/drm/i915/gvt/aperture_gm.c
+++ b/drivers/gpu/drm/i915/gvt/aperture_gm.c
@@ -242,7 +242,7 @@ static int alloc_resource(struct intel_vgpu *vgpu,
 	const char *item;
 
 	if (!param->low_gm_sz || !param->high_gm_sz || !param->fence_sz) {
-		gvt_err("Invalid vGPU creation params\n");
+		gvt_vgpu_err("Invalid vGPU creation params\n");
 		return -EINVAL;
 	}
 
@@ -285,9 +285,9 @@ static int alloc_resource(struct intel_vgpu *vgpu,
 	return 0;
 
 no_enough_resource:
-	gvt_err("vgpu%d: fail to allocate resource %s\n", vgpu->id, item);
-	gvt_err("vgpu%d: request %luMB avail %luMB max %luMB taken %luMB\n",
-		vgpu->id, BYTES_TO_MB(request), BYTES_TO_MB(avail),
+	gvt_vgpu_err("fail to allocate resource %s\n", item);
+	gvt_vgpu_err("request %luMB avail %luMB max %luMB taken %luMB\n",
+		BYTES_TO_MB(request), BYTES_TO_MB(avail),
 		BYTES_TO_MB(max), BYTES_TO_MB(taken));
 	return -ENOSPC;
 }
diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
index 919c83abaeb148..2ca0506d8d8c59 100644
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
@@ -843,20 +843,19 @@ static int cmd_reg_handler(struct parser_exec_state *s,
 	struct intel_gvt *gvt = vgpu->gvt;
 
 	if (offset + 4 > gvt->device_info.mmio_size) {
-		gvt_err("%s access to (%x) outside of MMIO range\n",
+		gvt_vgpu_err("%s access to (%x) outside of MMIO range\n",
 				cmd, offset);
 		return -EINVAL;
 	}
 
 	if (!intel_gvt_mmio_is_cmd_access(gvt, offset)) {
-		gvt_err("vgpu%d: %s access to non-render register (%x)\n",
-				s->vgpu->id, cmd, offset);
+		gvt_vgpu_err("%s access to non-render register (%x)\n",
+				cmd, offset);
 		return 0;
 	}
 
 	if (is_shadowed_mmio(offset)) {
-		gvt_err("vgpu%d: found access of shadowed MMIO %x\n",
-				s->vgpu->id, offset);
+		gvt_vgpu_err("found access of shadowed MMIO %x\n", offset);
 		return 0;
 	}
 
@@ -1152,6 +1151,7 @@ static int skl_decode_mi_display_flip(struct parser_exec_state *s,
 		struct mi_display_flip_command_info *info)
 {
 	struct drm_i915_private *dev_priv = s->vgpu->gvt->dev_priv;
+	struct intel_vgpu *vgpu = s->vgpu;
 	u32 dword0 = cmd_val(s, 0);
 	u32 dword1 = cmd_val(s, 1);
 	u32 dword2 = cmd_val(s, 2);
@@ -1190,7 +1190,7 @@ static int skl_decode_mi_display_flip(struct parser_exec_state *s,
 		break;
 
 	default:
-		gvt_err("unknown plane code %d\n", plane);
+		gvt_vgpu_err("unknown plane code %d\n", plane);
 		return -EINVAL;
 	}
 
@@ -1297,25 +1297,26 @@ static int update_plane_mmio_from_mi_display_flip(
 static int cmd_handler_mi_display_flip(struct parser_exec_state *s)
 {
 	struct mi_display_flip_command_info info;
+	struct intel_vgpu *vgpu = s->vgpu;
 	int ret;
 	int i;
 	int len = cmd_length(s);
 
 	ret = decode_mi_display_flip(s, &info);
 	if (ret) {
-		gvt_err("fail to decode MI display flip command\n");
+		gvt_vgpu_err("fail to decode MI display flip command\n");
 		return ret;
 	}
 
 	ret = check_mi_display_flip(s, &info);
 	if (ret) {
-		gvt_err("invalid MI display flip command\n");
+		gvt_vgpu_err("invalid MI display flip command\n");
 		return ret;
 	}
 
 	ret = update_plane_mmio_from_mi_display_flip(s, &info);
 	if (ret) {
-		gvt_err("fail to update plane mmio\n");
+		gvt_vgpu_err("fail to update plane mmio\n");
 		return ret;
 	}
 
@@ -1373,7 +1374,8 @@ static inline int cmd_address_audit(struct parser_exec_state *s,
 	int ret;
 
 	if (op_size > max_surface_size) {
-		gvt_err("command address audit fail name %s\n", s->info->name);
+		gvt_vgpu_err("command address audit fail name %s\n",
+			s->info->name);
 		return -EINVAL;
 	}
 
@@ -1390,7 +1392,7 @@ static inline int cmd_address_audit(struct parser_exec_state *s,
 	}
 	return 0;
 err:
-	gvt_err("cmd_parser: Malicious %s detected, addr=0x%lx, len=%d!\n",
+	gvt_vgpu_err("cmd_parser: Malicious %s detected, addr=0x%lx, len=%d!\n",
 			s->info->name, guest_gma, op_size);
 
 	pr_err("cmd dump: ");
@@ -1435,8 +1437,10 @@ static int cmd_handler_mi_store_data_imm(struct parser_exec_state *s)
 
 static inline int unexpected_cmd(struct parser_exec_state *s)
 {
-	gvt_err("vgpu%d: Unexpected %s in command buffer!\n",
-			s->vgpu->id, s->info->name);
+	struct intel_vgpu *vgpu = s->vgpu;
+
+	gvt_vgpu_err("Unexpected %s in command buffer!\n", s->info->name);
+
 	return -EINVAL;
 }
 
@@ -1539,7 +1543,7 @@ static int copy_gma_to_hva(struct intel_vgpu *vgpu, struct intel_vgpu_mm *mm,
 	while (gma != end_gma) {
 		gpa = intel_vgpu_gma_to_gpa(mm, gma);
 		if (gpa == INTEL_GVT_INVALID_ADDR) {
-			gvt_err("invalid gma address: %lx\n", gma);
+			gvt_vgpu_err("invalid gma address: %lx\n", gma);
 			return -EFAULT;
 		}
 
@@ -1580,6 +1584,7 @@ static uint32_t find_bb_size(struct parser_exec_state *s)
 	uint32_t bb_size = 0;
 	uint32_t cmd_len = 0;
 	bool met_bb_end = false;
+	struct intel_vgpu *vgpu = s->vgpu;
 	u32 cmd;
 
 	/* get the start gm address of the batch buffer */
@@ -1588,7 +1593,7 @@ static uint32_t find_bb_size(struct parser_exec_state *s)
 
 	info = get_cmd_info(s->vgpu->gvt, cmd, s->ring_id);
 	if (info == NULL) {
-		gvt_err("unknown cmd 0x%x, opcode=0x%x\n",
+		gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x\n",
 				cmd, get_opcode(cmd, s->ring_id));
 		return -EINVAL;
 	}
@@ -1597,7 +1602,7 @@ static uint32_t find_bb_size(struct parser_exec_state *s)
 				gma, gma + 4, &cmd);
 		info = get_cmd_info(s->vgpu->gvt, cmd, s->ring_id);
 		if (info == NULL) {
-			gvt_err("unknown cmd 0x%x, opcode=0x%x\n",
+			gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x\n",
 				cmd, get_opcode(cmd, s->ring_id));
 			return -EINVAL;
 		}
@@ -1622,6 +1627,7 @@ static uint32_t find_bb_size(struct parser_exec_state *s)
 static int perform_bb_shadow(struct parser_exec_state *s)
 {
 	struct intel_shadow_bb_entry *entry_obj;
+	struct intel_vgpu *vgpu = s->vgpu;
 	unsigned long gma = 0;
 	uint32_t bb_size;
 	void *dst = NULL;
@@ -1656,7 +1662,7 @@ static int perform_bb_shadow(struct parser_exec_state *s)
 
 	ret = i915_gem_object_set_to_cpu_domain(entry_obj->obj, false);
 	if (ret) {
-		gvt_err("failed to set shadow batch to CPU\n");
+		gvt_vgpu_err("failed to set shadow batch to CPU\n");
 		goto unmap_src;
 	}
 
@@ -1668,7 +1674,7 @@ static int perform_bb_shadow(struct parser_exec_state *s)
 			      gma, gma + bb_size,
 			      dst);
 	if (ret) {
-		gvt_err("fail to copy guest ring buffer\n");
+		gvt_vgpu_err("fail to copy guest ring buffer\n");
 		goto unmap_src;
 	}
 
@@ -1699,15 +1705,16 @@ static int cmd_handler_mi_batch_buffer_start(struct parser_exec_state *s)
 {
 	bool second_level;
 	int ret = 0;
+	struct intel_vgpu *vgpu = s->vgpu;
 
 	if (s->buf_type == BATCH_BUFFER_2ND_LEVEL) {
-		gvt_err("Found MI_BATCH_BUFFER_START in 2nd level BB\n");
+		gvt_vgpu_err("Found MI_BATCH_BUFFER_START in 2nd level BB\n");
 		return -EINVAL;
 	}
 
 	second_level = BATCH_BUFFER_2ND_LEVEL_BIT(cmd_val(s, 0)) == 1;
 	if (second_level && (s->buf_type != BATCH_BUFFER_INSTRUCTION)) {
-		gvt_err("Jumping to 2nd level BB from RB is not allowed\n");
+		gvt_vgpu_err("Jumping to 2nd level BB from RB is not allowed\n");
 		return -EINVAL;
 	}
 
@@ -1725,7 +1732,7 @@ static int cmd_handler_mi_batch_buffer_start(struct parser_exec_state *s)
 	if (batch_buffer_needs_scan(s)) {
 		ret = perform_bb_shadow(s);
 		if (ret < 0)
-			gvt_err("invalid shadow batch buffer\n");
+			gvt_vgpu_err("invalid shadow batch buffer\n");
 	} else {
 		/* emulate a batch buffer end to do return right */
 		ret = cmd_handler_mi_batch_buffer_end(s);
@@ -2452,6 +2459,7 @@ static int cmd_parser_exec(struct parser_exec_state *s)
 	int ret = 0;
 	cycles_t t0, t1, t2;
 	struct parser_exec_state s_before_advance_custom;
+	struct intel_vgpu *vgpu = s->vgpu;
 
 	t0 = get_cycles();
 
@@ -2459,7 +2467,7 @@ static int cmd_parser_exec(struct parser_exec_state *s)
 
 	info = get_cmd_info(s->vgpu->gvt, cmd, s->ring_id);
 	if (info == NULL) {
-		gvt_err("unknown cmd 0x%x, opcode=0x%x\n",
+		gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x\n",
 				cmd, get_opcode(cmd, s->ring_id));
 		return -EINVAL;
 	}
@@ -2475,7 +2483,7 @@ static int cmd_parser_exec(struct parser_exec_state *s)
 	if (info->handler) {
 		ret = info->handler(s);
 		if (ret < 0) {
-			gvt_err("%s handler error\n", info->name);
+			gvt_vgpu_err("%s handler error\n", info->name);
 			return ret;
 		}
 	}
@@ -2486,7 +2494,7 @@ static int cmd_parser_exec(struct parser_exec_state *s)
 	if (!(info->flag & F_IP_ADVANCE_CUSTOM)) {
 		ret = cmd_advance_default(s);
 		if (ret) {
-			gvt_err("%s IP advance error\n", info->name);
+			gvt_vgpu_err("%s IP advance error\n", info->name);
 			return ret;
 		}
 	}
@@ -2509,6 +2517,7 @@ static int command_scan(struct parser_exec_state *s,
 
 	unsigned long gma_head, gma_tail, gma_bottom;
 	int ret = 0;
+	struct intel_vgpu *vgpu = s->vgpu;
 
 	gma_head = rb_start + rb_head;
 	gma_tail = rb_start + rb_tail;
@@ -2520,7 +2529,7 @@ static int command_scan(struct parser_exec_state *s,
 		if (s->buf_type == RING_BUFFER_INSTRUCTION) {
 			if (!(s->ip_gma >= rb_start) ||
 				!(s->ip_gma < gma_bottom)) {
-				gvt_err("ip_gma %lx out of ring scope."
+				gvt_vgpu_err("ip_gma %lx out of ring scope."
 					"(base:0x%lx, bottom: 0x%lx)\n",
 					s->ip_gma, rb_start,
 					gma_bottom);
@@ -2528,7 +2537,7 @@ static int command_scan(struct parser_exec_state *s,
 				return -EINVAL;
 			}
 			if (gma_out_of_range(s->ip_gma, gma_head, gma_tail)) {
-				gvt_err("ip_gma %lx out of range."
+				gvt_vgpu_err("ip_gma %lx out of range."
 					"base 0x%lx head 0x%lx tail 0x%lx\n",
 					s->ip_gma, rb_start,
 					rb_head, rb_tail);
@@ -2538,7 +2547,7 @@ static int command_scan(struct parser_exec_state *s,
 		}
 		ret = cmd_parser_exec(s);
 		if (ret) {
-			gvt_err("cmd parser error\n");
+			gvt_vgpu_err("cmd parser error\n");
 			parser_exec_state_dump(s);
 			break;
 		}
@@ -2662,7 +2671,7 @@ static int shadow_workload_ring_buffer(struct intel_vgpu_workload *workload)
 				gma_head, gma_top,
 				workload->shadow_ring_buffer_va);
 		if (ret) {
-			gvt_err("fail to copy guest ring buffer\n");
+			gvt_vgpu_err("fail to copy guest ring buffer\n");
 			return ret;
 		}
 		copy_len = gma_top - gma_head;
@@ -2674,7 +2683,7 @@ static int shadow_workload_ring_buffer(struct intel_vgpu_workload *workload)
 			gma_head, gma_tail,
 			workload->shadow_ring_buffer_va + copy_len);
 	if (ret) {
-		gvt_err("fail to copy guest ring buffer\n");
+		gvt_vgpu_err("fail to copy guest ring buffer\n");
 		return ret;
 	}
 	ring->tail += workload->rb_len;
@@ -2685,16 +2694,17 @@ static int shadow_workload_ring_buffer(struct intel_vgpu_workload *workload)
 int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload)
 {
 	int ret;
+	struct intel_vgpu *vgpu = workload->vgpu;
 
 	ret = shadow_workload_ring_buffer(workload);
 	if (ret) {
-		gvt_err("fail to shadow workload ring_buffer\n");
+		gvt_vgpu_err("fail to shadow workload ring_buffer\n");
 		return ret;
 	}
 
 	ret = scan_workload(workload);
 	if (ret) {
-		gvt_err("scan workload error\n");
+		gvt_vgpu_err("scan workload error\n");
 		return ret;
 	}
 	return 0;
@@ -2704,6 +2714,7 @@ static int shadow_indirect_ctx(struct intel_shadow_wa_ctx *wa_ctx)
 {
 	int ctx_size = wa_ctx->indirect_ctx.size;
 	unsigned long guest_gma = wa_ctx->indirect_ctx.guest_gma;
+	struct intel_vgpu *vgpu = wa_ctx->workload->vgpu;
 	struct drm_i915_gem_object *obj;
 	int ret = 0;
 	void *map;
@@ -2717,14 +2728,14 @@ static int shadow_indirect_ctx(struct intel_shadow_wa_ctx *wa_ctx)
 	/* get the va of the shadow batch buffer */
 	map = i915_gem_object_pin_map(obj, I915_MAP_WB);
 	if (IS_ERR(map)) {
-		gvt_err("failed to vmap shadow indirect ctx\n");
+		gvt_vgpu_err("failed to vmap shadow indirect ctx\n");
 		ret = PTR_ERR(map);
 		goto put_obj;
 	}
 
 	ret = i915_gem_object_set_to_cpu_domain(obj, false);
 	if (ret) {
-		gvt_err("failed to set shadow indirect ctx to CPU\n");
+		gvt_vgpu_err("failed to set shadow indirect ctx to CPU\n");
 		goto unmap_src;
 	}
 
@@ -2733,7 +2744,7 @@ static int shadow_indirect_ctx(struct intel_shadow_wa_ctx *wa_ctx)
 				guest_gma, guest_gma + ctx_size,
 				map);
 	if (ret) {
-		gvt_err("fail to copy guest indirect ctx\n");
+		gvt_vgpu_err("fail to copy guest indirect ctx\n");
 		goto unmap_src;
 	}
 
@@ -2767,13 +2778,14 @@ static int combine_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
 int intel_gvt_scan_and_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
 {
 	int ret;
+	struct intel_vgpu *vgpu = wa_ctx->workload->vgpu;
 
 	if (wa_ctx->indirect_ctx.size == 0)
 		return 0;
 
 	ret = shadow_indirect_ctx(wa_ctx);
 	if (ret) {
-		gvt_err("fail to shadow indirect ctx\n");
+		gvt_vgpu_err("fail to shadow indirect ctx\n");
 		return ret;
 	}
 
@@ -2781,7 +2793,7 @@ int intel_gvt_scan_and_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
 
 	ret = scan_wa_ctx(wa_ctx);
 	if (ret) {
-		gvt_err("scan wa ctx error\n");
+		gvt_vgpu_err("scan wa ctx error\n");
 		return ret;
 	}
 
diff --git a/drivers/gpu/drm/i915/gvt/debug.h b/drivers/gpu/drm/i915/gvt/debug.h
index 68cba7bd980af8..b0cff4dc268479 100644
--- a/drivers/gpu/drm/i915/gvt/debug.h
+++ b/drivers/gpu/drm/i915/gvt/debug.h
@@ -27,6 +27,14 @@
 #define gvt_err(fmt, args...) \
 	DRM_ERROR("gvt: "fmt, ##args)
 
+#define gvt_vgpu_err(fmt, args...)					\
+do {									\
+	if (IS_ERR_OR_NULL(vgpu))					\
+		DRM_DEBUG_DRIVER("gvt: "fmt, ##args);			\
+	else								\
+		DRM_DEBUG_DRIVER("gvt: vgpu %d: "fmt, vgpu->id, ##args);\
+} while (0)
+
 #define gvt_dbg_core(fmt, args...) \
 	DRM_DEBUG_DRIVER("gvt: core: "fmt, ##args)
 
diff --git a/drivers/gpu/drm/i915/gvt/edid.c b/drivers/gpu/drm/i915/gvt/edid.c
index bda85dff7b2a99..f1648fe5e5eaac 100644
--- a/drivers/gpu/drm/i915/gvt/edid.c
+++ b/drivers/gpu/drm/i915/gvt/edid.c
@@ -52,16 +52,16 @@ static unsigned char edid_get_byte(struct intel_vgpu *vgpu)
 	unsigned char chr = 0;
 
 	if (edid->state == I2C_NOT_SPECIFIED || !edid->slave_selected) {
-		gvt_err("Driver tries to read EDID without proper sequence!\n");
+		gvt_vgpu_err("Driver tries to read EDID without proper sequence!\n");
 		return 0;
 	}
 	if (edid->current_edid_read >= EDID_SIZE) {
-		gvt_err("edid_get_byte() exceeds the size of EDID!\n");
+		gvt_vgpu_err("edid_get_byte() exceeds the size of EDID!\n");
 		return 0;
 	}
 
 	if (!edid->edid_available) {
-		gvt_err("Reading EDID but EDID is not available!\n");
+		gvt_vgpu_err("Reading EDID but EDID is not available!\n");
 		return 0;
 	}
 
@@ -72,7 +72,7 @@ static unsigned char edid_get_byte(struct intel_vgpu *vgpu)
 		chr = edid_data->edid_block[edid->current_edid_read];
 		edid->current_edid_read++;
 	} else {
-		gvt_err("No EDID available during the reading?\n");
+		gvt_vgpu_err("No EDID available during the reading?\n");
 	}
 	return chr;
 }
@@ -223,7 +223,7 @@ static int gmbus1_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
 			vgpu_vreg(vgpu, PCH_GMBUS2) |= GMBUS_ACTIVE;
 			break;
 		default:
-			gvt_err("Unknown/reserved GMBUS cycle detected!\n");
+			gvt_vgpu_err("Unknown/reserved GMBUS cycle detected!\n");
 			break;
 		}
 		/*
@@ -292,8 +292,7 @@ static int gmbus3_mmio_read(struct intel_vgpu *vgpu, unsigned int offset,
 		 */
 	} else {
 		memcpy(p_data, &vgpu_vreg(vgpu, offset), bytes);
-		gvt_err("vgpu%d: warning: gmbus3 read with nothing returned\n",
-				vgpu->id);
+		gvt_vgpu_err("warning: gmbus3 read with nothing returned\n");
 	}
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c
index 46eb9fd3c03f6b..f1f426a97aa9d4 100644
--- a/drivers/gpu/drm/i915/gvt/execlist.c
+++ b/drivers/gpu/drm/i915/gvt/execlist.c
@@ -172,6 +172,7 @@ static int emulate_execlist_ctx_schedule_out(
 		struct intel_vgpu_execlist *execlist,
 		struct execlist_ctx_descriptor_format *ctx)
 {
+	struct intel_vgpu *vgpu = execlist->vgpu;
 	struct intel_vgpu_execlist_slot *running = execlist->running_slot;
 	struct intel_vgpu_execlist_slot *pending = execlist->pending_slot;
 	struct execlist_ctx_descriptor_format *ctx0 = &running->ctx[0];
@@ -183,7 +184,7 @@ static int emulate_execlist_ctx_schedule_out(
 	gvt_dbg_el("schedule out context id %x\n", ctx->context_id);
 
 	if (WARN_ON(!same_context(ctx, execlist->running_context))) {
-		gvt_err("schedule out context is not running context,"
+		gvt_vgpu_err("schedule out context is not running context,"
 				"ctx id %x running ctx id %x\n",
 				ctx->context_id,
 				execlist->running_context->context_id);
@@ -254,7 +255,7 @@ static struct intel_vgpu_execlist_slot *get_next_execlist_slot(
 	status.udw = vgpu_vreg(vgpu, status_reg + 4);
 
 	if (status.execlist_queue_full) {
-		gvt_err("virtual execlist slots are full\n");
+		gvt_vgpu_err("virtual execlist slots are full\n");
 		return NULL;
 	}
 
@@ -270,11 +271,12 @@ static int emulate_execlist_schedule_in(struct intel_vgpu_execlist *execlist,
 
 	struct execlist_ctx_descriptor_format *ctx0, *ctx1;
 	struct execlist_context_status_format status;
+	struct intel_vgpu *vgpu = execlist->vgpu;
 
 	gvt_dbg_el("emulate schedule-in\n");
 
 	if (!slot) {
-		gvt_err("no available execlist slot\n");
+		gvt_vgpu_err("no available execlist slot\n");
 		return -EINVAL;
 	}
 
@@ -375,7 +377,6 @@ static void prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload)
 
 		vma = i915_gem_object_ggtt_pin(entry_obj->obj, NULL, 0, 4, 0);
 		if (IS_ERR(vma)) {
-			gvt_err("Cannot pin\n");
 			return;
 		}
 
@@ -428,7 +429,6 @@ static void prepare_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
 	vma = i915_gem_object_ggtt_pin(wa_ctx->indirect_ctx.obj, NULL,
 				       0, CACHELINE_BYTES, 0);
 	if (IS_ERR(vma)) {
-		gvt_err("Cannot pin indirect ctx obj\n");
 		return;
 	}
 
@@ -561,6 +561,7 @@ static int prepare_mm(struct intel_vgpu_workload *workload)
 {
 	struct execlist_ctx_descriptor_format *desc = &workload->ctx_desc;
 	struct intel_vgpu_mm *mm;
+	struct intel_vgpu *vgpu = workload->vgpu;
 	int page_table_level;
 	u32 pdp[8];
 
@@ -569,7 +570,7 @@ static int prepare_mm(struct intel_vgpu_workload *workload)
 	} else if (desc->addressing_mode == 3) { /* legacy 64 bit */
 		page_table_level = 4;
 	} else {
-		gvt_err("Advanced Context mode(SVM) is not supported!\n");
+		gvt_vgpu_err("Advanced Context mode(SVM) is not supported!\n");
 		return -EINVAL;
 	}
 
@@ -583,7 +584,7 @@ static int prepare_mm(struct intel_vgpu_workload *workload)
 		mm = intel_vgpu_create_mm(workload->vgpu, INTEL_GVT_MM_PPGTT,
 				pdp, page_table_level, 0);
 		if (IS_ERR(mm)) {
-			gvt_err("fail to create mm object.\n");
+			gvt_vgpu_err("fail to create mm object.\n");
 			return PTR_ERR(mm);
 		}
 	}
@@ -609,7 +610,7 @@ static int submit_context(struct intel_vgpu *vgpu, int ring_id,
 	ring_context_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm,
 			(u32)((desc->lrca + 1) << GTT_PAGE_SHIFT));
 	if (ring_context_gpa == INTEL_GVT_INVALID_ADDR) {
-		gvt_err("invalid guest context LRCA: %x\n", desc->lrca);
+		gvt_vgpu_err("invalid guest context LRCA: %x\n", desc->lrca);
 		return -EINVAL;
 	}
 
@@ -724,8 +725,7 @@ int intel_vgpu_submit_execlist(struct intel_vgpu *vgpu, int ring_id)
 			continue;
 
 		if (!desc[i]->privilege_access) {
-			gvt_err("vgpu%d: unexpected GGTT elsp submission\n",
-					vgpu->id);
+			gvt_vgpu_err("unexpected GGTT elsp submission\n");
 			return -EINVAL;
 		}
 
@@ -735,15 +735,13 @@ int intel_vgpu_submit_execlist(struct intel_vgpu *vgpu, int ring_id)
 	}
 
 	if (!valid_desc_bitmap) {
-		gvt_err("vgpu%d: no valid desc in a elsp submission\n",
-				vgpu->id);
+		gvt_vgpu_err("no valid desc in a elsp submission\n");
 		return -EINVAL;
 	}
 
 	if (!test_bit(0, (void *)&valid_desc_bitmap) &&
 			test_bit(1, (void *)&valid_desc_bitmap)) {
-		gvt_err("vgpu%d: weird elsp submission, desc 0 is not valid\n",
-				vgpu->id);
+		gvt_vgpu_err("weird elsp submission, desc 0 is not valid\n");
 		return -EINVAL;
 	}
 
@@ -752,8 +750,7 @@ int intel_vgpu_submit_execlist(struct intel_vgpu *vgpu, int ring_id)
 		ret = submit_context(vgpu, ring_id, &valid_desc[i],
 				emulate_schedule_in);
 		if (ret) {
-			gvt_err("vgpu%d: fail to schedule workload\n",
-					vgpu->id);
+			gvt_vgpu_err("fail to schedule workload\n");
 			return ret;
 		}
 		emulate_schedule_in = false;
diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
index 6a5ff23ded907c..da73127158241b 100644
--- a/drivers/gpu/drm/i915/gvt/gtt.c
+++ b/drivers/gpu/drm/i915/gvt/gtt.c
@@ -49,8 +49,8 @@ bool intel_gvt_ggtt_validate_range(struct intel_vgpu *vgpu, u64 addr, u32 size)
 {
 	if ((!vgpu_gmadr_is_valid(vgpu, addr)) || (size
 			&& !vgpu_gmadr_is_valid(vgpu, addr + size - 1))) {
-		gvt_err("vgpu%d: invalid range gmadr 0x%llx size 0x%x\n",
-				vgpu->id, addr, size);
+		gvt_vgpu_err("invalid range gmadr 0x%llx size 0x%x\n",
+				addr, size);
 		return false;
 	}
 	return true;
@@ -430,7 +430,7 @@ static int gtt_entry_p2m(struct intel_vgpu *vgpu, struct intel_gvt_gtt_entry *p,
 
 	mfn = intel_gvt_hypervisor_gfn_to_mfn(vgpu, gfn);
 	if (mfn == INTEL_GVT_INVALID_ADDR) {
-		gvt_err("fail to translate gfn: 0x%lx\n", gfn);
+		gvt_vgpu_err("fail to translate gfn: 0x%lx\n", gfn);
 		return -ENXIO;
 	}
 
@@ -611,7 +611,7 @@ static inline int init_shadow_page(struct intel_vgpu *vgpu,
 
 	daddr = dma_map_page(kdev, p->page, 0, 4096, PCI_DMA_BIDIRECTIONAL);
 	if (dma_mapping_error(kdev, daddr)) {
-		gvt_err("fail to map dma addr\n");
+		gvt_vgpu_err("fail to map dma addr\n");
 		return -EINVAL;
 	}
 
@@ -735,7 +735,7 @@ static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_shadow_page(
 		if (reclaim_one_mm(vgpu->gvt))
 			goto retry;
 
-		gvt_err("fail to allocate ppgtt shadow page\n");
+		gvt_vgpu_err("fail to allocate ppgtt shadow page\n");
 		return ERR_PTR(-ENOMEM);
 	}
 
@@ -750,14 +750,14 @@ static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_shadow_page(
 	 */
 	ret = init_shadow_page(vgpu, &spt->shadow_page, type);
 	if (ret) {
-		gvt_err("fail to initialize shadow page for spt\n");
+		gvt_vgpu_err("fail to initialize shadow page for spt\n");
 		goto err;
 	}
 
 	ret = intel_vgpu_init_guest_page(vgpu, &spt->guest_page,
 			gfn, ppgtt_write_protection_handler, NULL);
 	if (ret) {
-		gvt_err("fail to initialize guest page for spt\n");
+		gvt_vgpu_err("fail to initialize guest page for spt\n");
 		goto err;
 	}
 
@@ -776,8 +776,7 @@ static struct intel_vgpu_ppgtt_spt *ppgtt_find_shadow_page(
 	if (p)
 		return shadow_page_to_ppgtt_spt(p);
 
-	gvt_err("vgpu%d: fail to find ppgtt shadow page: 0x%lx\n",
-			vgpu->id, mfn);
+	gvt_vgpu_err("fail to find ppgtt shadow page: 0x%lx\n", mfn);
 	return NULL;
 }
 
@@ -827,8 +826,8 @@ static int ppgtt_invalidate_shadow_page_by_shadow_entry(struct intel_vgpu *vgpu,
 	}
 	s = ppgtt_find_shadow_page(vgpu, ops->get_pfn(e));
 	if (!s) {
-		gvt_err("vgpu%d: fail to find shadow page: mfn: 0x%lx\n",
-				vgpu->id, ops->get_pfn(e));
+		gvt_vgpu_err("fail to find shadow page: mfn: 0x%lx\n",
+				ops->get_pfn(e));
 		return -ENXIO;
 	}
 	return ppgtt_invalidate_shadow_page(s);
@@ -836,6 +835,7 @@ static int ppgtt_invalidate_shadow_page_by_shadow_entry(struct intel_vgpu *vgpu,
 
 static int ppgtt_invalidate_shadow_page(struct intel_vgpu_ppgtt_spt *spt)
 {
+	struct intel_vgpu *vgpu = spt->vgpu;
 	struct intel_gvt_gtt_entry e;
 	unsigned long index;
 	int ret;
@@ -854,7 +854,7 @@ static int ppgtt_invalidate_shadow_page(struct intel_vgpu_ppgtt_spt *spt)
 
 	for_each_present_shadow_entry(spt, &e, index) {
 		if (!gtt_type_is_pt(get_next_pt_type(e.type))) {
-			gvt_err("GVT doesn't support pse bit for now\n");
+			gvt_vgpu_err("GVT doesn't support pse bit for now\n");
 			return -EINVAL;
 		}
 		ret = ppgtt_invalidate_shadow_page_by_shadow_entry(
@@ -868,8 +868,8 @@ static int ppgtt_invalidate_shadow_page(struct intel_vgpu_ppgtt_spt *spt)
 	ppgtt_free_shadow_page(spt);
 	return 0;
 fail:
-	gvt_err("vgpu%d: fail: shadow page %p shadow entry 0x%llx type %d\n",
-			spt->vgpu->id, spt, e.val64, e.type);
+	gvt_vgpu_err("fail: shadow page %p shadow entry 0x%llx type %d\n",
+			spt, e.val64, e.type);
 	return ret;
 }
 
@@ -914,8 +914,8 @@ static struct intel_vgpu_ppgtt_spt *ppgtt_populate_shadow_page_by_guest_entry(
 	}
 	return s;
 fail:
-	gvt_err("vgpu%d: fail: shadow page %p guest entry 0x%llx type %d\n",
-			vgpu->id, s, we->val64, we->type);
+	gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n",
+			s, we->val64, we->type);
 	return ERR_PTR(ret);
 }
 
@@ -953,7 +953,7 @@ static int ppgtt_populate_shadow_page(struct intel_vgpu_ppgtt_spt *spt)
 
 	for_each_present_guest_entry(spt, &ge, i) {
 		if (!gtt_type_is_pt(get_next_pt_type(ge.type))) {
-			gvt_err("GVT doesn't support pse bit now\n");
+			gvt_vgpu_err("GVT doesn't support pse bit now\n");
 			ret = -EINVAL;
 			goto fail;
 		}
@@ -969,8 +969,8 @@ static int ppgtt_populate_shadow_page(struct intel_vgpu_ppgtt_spt *spt)
 	}
 	return 0;
 fail:
-	gvt_err("vgpu%d: fail: shadow page %p guest entry 0x%llx type %d\n",
-			vgpu->id, spt, ge.val64, ge.type);
+	gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n",
+			spt, ge.val64, ge.type);
 	return ret;
 }
 
@@ -999,7 +999,7 @@ static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_guest_page *gpt,
 		struct intel_vgpu_ppgtt_spt *s =
 			ppgtt_find_shadow_page(vgpu, ops->get_pfn(&e));
 		if (!s) {
-			gvt_err("fail to find guest page\n");
+			gvt_vgpu_err("fail to find guest page\n");
 			ret = -ENXIO;
 			goto fail;
 		}
@@ -1011,8 +1011,8 @@ static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_guest_page *gpt,
 	ppgtt_set_shadow_entry(spt, &e, index);
 	return 0;
 fail:
-	gvt_err("vgpu%d: fail: shadow page %p guest entry 0x%llx type %d\n",
-			vgpu->id, spt, e.val64, e.type);
+	gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n",
+			spt, e.val64, e.type);
 	return ret;
 }
 
@@ -1046,8 +1046,8 @@ static int ppgtt_handle_guest_entry_add(struct intel_vgpu_guest_page *gpt,
 	}
 	return 0;
 fail:
-	gvt_err("vgpu%d: fail: spt %p guest entry 0x%llx type %d\n", vgpu->id,
-			spt, we->val64, we->type);
+	gvt_vgpu_err("fail: spt %p guest entry 0x%llx type %d\n",
+		spt, we->val64, we->type);
 	return ret;
 }
 
@@ -1250,8 +1250,8 @@ static int ppgtt_handle_guest_write_page_table(
 	}
 	return 0;
 fail:
-	gvt_err("vgpu%d: fail: shadow page %p guest entry 0x%llx type %d.\n",
-			vgpu->id, spt, we->val64, we->type);
+	gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d.\n",
+			spt, we->val64, we->type);
 	return ret;
 }
 
@@ -1493,7 +1493,7 @@ static int shadow_mm(struct intel_vgpu_mm *mm)
 
 		spt = ppgtt_populate_shadow_page_by_guest_entry(vgpu, &ge);
 		if (IS_ERR(spt)) {
-			gvt_err("fail to populate guest root pointer\n");
+			gvt_vgpu_err("fail to populate guest root pointer\n");
 			ret = PTR_ERR(spt);
 			goto fail;
 		}
@@ -1566,7 +1566,7 @@ struct intel_vgpu_mm *intel_vgpu_create_mm(struct intel_vgpu *vgpu,
 
 	ret = gtt->mm_alloc_page_table(mm);
 	if (ret) {
-		gvt_err("fail to allocate page table for mm\n");
+		gvt_vgpu_err("fail to allocate page table for mm\n");
 		goto fail;
 	}
 
@@ -1584,7 +1584,7 @@ struct intel_vgpu_mm *intel_vgpu_create_mm(struct intel_vgpu *vgpu,
 	}
 	return mm;
 fail:
-	gvt_err("fail to create mm\n");
+	gvt_vgpu_err("fail to create mm\n");
 	if (mm)
 		intel_gvt_mm_unreference(mm);
 	return ERR_PTR(ret);
@@ -1760,7 +1760,7 @@ unsigned long intel_vgpu_gma_to_gpa(struct intel_vgpu_mm *mm, unsigned long gma)
 			mm->page_table_level, gma, gpa);
 	return gpa;
 err:
-	gvt_err("invalid mm type: %d gma %lx\n", mm->type, gma);
+	gvt_vgpu_err("invalid mm type: %d gma %lx\n", mm->type, gma);
 	return INTEL_GVT_INVALID_ADDR;
 }
 
@@ -1836,8 +1836,7 @@ static int emulate_gtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off,
 	if (ops->test_present(&e)) {
 		ret = gtt_entry_p2m(vgpu, &e, &m);
 		if (ret) {
-			gvt_err("vgpu%d: fail to translate guest gtt entry\n",
-					vgpu->id);
+			gvt_vgpu_err("fail to translate guest gtt entry\n");
 			return ret;
 		}
 	} else {
@@ -1893,14 +1892,14 @@ static int alloc_scratch_pages(struct intel_vgpu *vgpu,
 
 	scratch_pt = (void *)get_zeroed_page(GFP_KERNEL);
 	if (!scratch_pt) {
-		gvt_err("fail to allocate scratch page\n");
+		gvt_vgpu_err("fail to allocate scratch page\n");
 		return -ENOMEM;
 	}
 
 	daddr = dma_map_page(dev, virt_to_page(scratch_pt), 0,
 			4096, PCI_DMA_BIDIRECTIONAL);
 	if (dma_mapping_error(dev, daddr)) {
-		gvt_err("fail to dmamap scratch_pt\n");
+		gvt_vgpu_err("fail to dmamap scratch_pt\n");
 		__free_page(virt_to_page(scratch_pt));
 		return -ENOMEM;
 	}
@@ -2003,7 +2002,7 @@ int intel_vgpu_init_gtt(struct intel_vgpu *vgpu)
 	ggtt_mm = intel_vgpu_create_mm(vgpu, INTEL_GVT_MM_GGTT,
 			NULL, 1, 0);
 	if (IS_ERR(ggtt_mm)) {
-		gvt_err("fail to create mm for ggtt.\n");
+		gvt_vgpu_err("fail to create mm for ggtt.\n");
 		return PTR_ERR(ggtt_mm);
 	}
 
@@ -2076,7 +2075,6 @@ static int setup_spt_oos(struct intel_gvt *gvt)
 	for (i = 0; i < preallocated_oos_pages; i++) {
 		oos_page = kzalloc(sizeof(*oos_page), GFP_KERNEL);
 		if (!oos_page) {
-			gvt_err("fail to pre-allocate oos page\n");
 			ret = -ENOMEM;
 			goto fail;
 		}
@@ -2166,7 +2164,7 @@ int intel_vgpu_g2v_create_ppgtt_mm(struct intel_vgpu *vgpu,
 		mm = intel_vgpu_create_mm(vgpu, INTEL_GVT_MM_PPGTT,
 				pdp, page_table_level, 0);
 		if (IS_ERR(mm)) {
-			gvt_err("fail to create mm\n");
+			gvt_vgpu_err("fail to create mm\n");
 			return PTR_ERR(mm);
 		}
 	}
@@ -2196,7 +2194,7 @@ int intel_vgpu_g2v_destroy_ppgtt_mm(struct intel_vgpu *vgpu,
 
 	mm = intel_vgpu_find_ppgtt_mm(vgpu, page_table_level, pdp);
 	if (!mm) {
-		gvt_err("fail to find ppgtt instance.\n");
+		gvt_vgpu_err("fail to find ppgtt instance.\n");
 		return -EINVAL;
 	}
 	intel_gvt_mm_unreference(mm);
diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index de975f40aebf4d..eaff45d417e8a0 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -181,11 +181,9 @@ static int sanitize_fence_mmio_access(struct intel_vgpu *vgpu,
 					GVT_FAILSAFE_UNSUPPORTED_GUEST);
 
 		if (!vgpu->mmio.disable_warn_untrack) {
-			gvt_err("vgpu%d: found oob fence register access\n",
-					vgpu->id);
-			gvt_err("vgpu%d: total fence %d, access fence %d\n",
-					vgpu->id, vgpu_fence_sz(vgpu),
-					fence_num);
+			gvt_vgpu_err("found oob fence register access\n");
+			gvt_vgpu_err("total fence %d, access fence %d\n",
+					vgpu_fence_sz(vgpu), fence_num);
 		}
 		memset(p_data, 0, bytes);
 		return -EINVAL;
@@ -249,7 +247,7 @@ static int mul_force_wake_write(struct intel_vgpu *vgpu,
 			break;
 		default:
 			/*should not hit here*/
-			gvt_err("invalid forcewake offset 0x%x\n", offset);
+			gvt_vgpu_err("invalid forcewake offset 0x%x\n", offset);
 			return -EINVAL;
 		}
 	} else {
@@ -530,7 +528,7 @@ static int check_fdi_rx_train_status(struct intel_vgpu *vgpu,
 		fdi_tx_train_bits = FDI_LINK_TRAIN_PATTERN_2;
 		fdi_iir_check_bits = FDI_RX_SYMBOL_LOCK;
 	} else {
-		gvt_err("Invalid train pattern %d\n", train_pattern);
+		gvt_vgpu_err("Invalid train pattern %d\n", train_pattern);
 		return -EINVAL;
 	}
 
@@ -588,7 +586,7 @@ static int update_fdi_rx_iir_status(struct intel_vgpu *vgpu,
 	else if (FDI_RX_IMR_TO_PIPE(offset) != INVALID_INDEX)
 		index = FDI_RX_IMR_TO_PIPE(offset);
 	else {
-		gvt_err("Unsupport registers %x\n", offset);
+		gvt_vgpu_err("Unsupport registers %x\n", offset);
 		return -EINVAL;
 	}
 
@@ -818,7 +816,7 @@ static int dp_aux_ch_ctl_mmio_write(struct intel_vgpu *vgpu,
 	u32 data;
 
 	if (!dpy_is_valid_port(port_index)) {
-		gvt_err("GVT(%d): Unsupported DP port access!\n", vgpu->id);
+		gvt_vgpu_err("Unsupported DP port access!\n");
 		return 0;
 	}
 
@@ -1016,8 +1014,7 @@ static void write_virtual_sbi_register(struct intel_vgpu *vgpu,
 
 	if (i == num) {
 		if (num == SBI_REG_MAX) {
-			gvt_err("vgpu%d: SBI caching meets maximum limits\n",
-					vgpu->id);
+			gvt_vgpu_err("SBI caching meets maximum limits\n");
 			return;
 		}
 		display->sbi.number++;
@@ -1097,7 +1094,7 @@ static int pvinfo_mmio_read(struct intel_vgpu *vgpu, unsigned int offset,
 		break;
 	}
 	if (invalid_read)
-		gvt_err("invalid pvinfo read: [%x:%x] = %x\n",
+		gvt_vgpu_err("invalid pvinfo read: [%x:%x] = %x\n",
 				offset, bytes, *(u32 *)p_data);
 	vgpu->pv_notified = true;
 	return 0;
@@ -1125,7 +1122,7 @@ static int handle_g2v_notification(struct intel_vgpu *vgpu, int notification)
 	case 1:	/* Remove this in guest driver. */
 		break;
 	default:
-		gvt_err("Invalid PV notification %d\n", notification);
+		gvt_vgpu_err("Invalid PV notification %d\n", notification);
 	}
 	return ret;
 }
@@ -1181,7 +1178,7 @@ static int pvinfo_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
 		enter_failsafe_mode(vgpu, GVT_FAILSAFE_INSUFFICIENT_RESOURCE);
 		break;
 	default:
-		gvt_err("invalid pvinfo write offset %x bytes %x data %x\n",
+		gvt_vgpu_err("invalid pvinfo write offset %x bytes %x data %x\n",
 				offset, bytes, data);
 		break;
 	}
@@ -1415,7 +1412,8 @@ static int elsp_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
 	if (execlist->elsp_dwords.index == 3) {
 		ret = intel_vgpu_submit_execlist(vgpu, ring_id);
 		if(ret)
-			gvt_err("fail submit workload on ring %d\n", ring_id);
+			gvt_vgpu_err("fail submit workload on ring %d\n",
+				ring_id);
 	}
 
 	++execlist->elsp_dwords.index;
diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index 84d801638edece..cd218b07c6f62e 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -426,7 +426,7 @@ static void kvmgt_protect_table_del(struct kvmgt_guest_info *info,
 
 static int intel_vgpu_create(struct kobject *kobj, struct mdev_device *mdev)
 {
-	struct intel_vgpu *vgpu;
+	struct intel_vgpu *vgpu = NULL;
 	struct intel_vgpu_type *type;
 	struct device *pdev;
 	void *gvt;
@@ -437,7 +437,7 @@ static int intel_vgpu_create(struct kobject *kobj, struct mdev_device *mdev)
 
 	type = intel_gvt_find_vgpu_type(gvt, kobject_name(kobj));
 	if (!type) {
-		gvt_err("failed to find type %s to create\n",
+		gvt_vgpu_err("failed to find type %s to create\n",
 						kobject_name(kobj));
 		ret = -EINVAL;
 		goto out;
@@ -446,7 +446,7 @@ static int intel_vgpu_create(struct kobject *kobj, struct mdev_device *mdev)
 	vgpu = intel_gvt_ops->vgpu_create(gvt, type);
 	if (IS_ERR_OR_NULL(vgpu)) {
 		ret = vgpu == NULL ? -EFAULT : PTR_ERR(vgpu);
-		gvt_err("failed to create intel vgpu: %d\n", ret);
+		gvt_vgpu_err("failed to create intel vgpu: %d\n", ret);
 		goto out;
 	}
 
@@ -526,7 +526,8 @@ static int intel_vgpu_open(struct mdev_device *mdev)
 	ret = vfio_register_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY, &events,
 				&vgpu->vdev.iommu_notifier);
 	if (ret != 0) {
-		gvt_err("vfio_register_notifier for iommu failed: %d\n", ret);
+		gvt_vgpu_err("vfio_register_notifier for iommu failed: %d\n",
+			ret);
 		goto out;
 	}
 
@@ -534,7 +535,8 @@ static int intel_vgpu_open(struct mdev_device *mdev)
 	ret = vfio_register_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY, &events,
 				&vgpu->vdev.group_notifier);
 	if (ret != 0) {
-		gvt_err("vfio_register_notifier for group failed: %d\n", ret);
+		gvt_vgpu_err("vfio_register_notifier for group failed: %d\n",
+			ret);
 		goto undo_iommu;
 	}
 
@@ -635,7 +637,7 @@ static ssize_t intel_vgpu_rw(struct mdev_device *mdev, char *buf,
 
 
 	if (index >= VFIO_PCI_NUM_REGIONS) {
-		gvt_err("invalid index: %u\n", index);
+		gvt_vgpu_err("invalid index: %u\n", index);
 		return -EINVAL;
 	}
 
@@ -669,7 +671,7 @@ static ssize_t intel_vgpu_rw(struct mdev_device *mdev, char *buf,
 	case VFIO_PCI_VGA_REGION_INDEX:
 	case VFIO_PCI_ROM_REGION_INDEX:
 	default:
-		gvt_err("unsupported region: %u\n", index);
+		gvt_vgpu_err("unsupported region: %u\n", index);
 	}
 
 	return ret == 0 ? count : ret;
@@ -861,7 +863,7 @@ static int intel_vgpu_set_msi_trigger(struct intel_vgpu *vgpu,
 
 		trigger = eventfd_ctx_fdget(fd);
 		if (IS_ERR(trigger)) {
-			gvt_err("eventfd_ctx_fdget failed\n");
+			gvt_vgpu_err("eventfd_ctx_fdget failed\n");
 			return PTR_ERR(trigger);
 		}
 		vgpu->vdev.msi_trigger = trigger;
@@ -1120,7 +1122,7 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd,
 			ret = vfio_set_irqs_validate_and_prepare(&hdr, max,
 						VFIO_PCI_NUM_IRQS, &data_size);
 			if (ret) {
-				gvt_err("intel:vfio_set_irqs_validate_and_prepare failed\n");
+				gvt_vgpu_err("intel:vfio_set_irqs_validate_and_prepare failed\n");
 				return -EINVAL;
 			}
 			if (data_size) {
@@ -1310,7 +1312,7 @@ static int kvmgt_guest_init(struct mdev_device *mdev)
 
 	kvm = vgpu->vdev.kvm;
 	if (!kvm || kvm->mm != current->mm) {
-		gvt_err("KVM is required to use Intel vGPU\n");
+		gvt_vgpu_err("KVM is required to use Intel vGPU\n");
 		return -ESRCH;
 	}
 
@@ -1337,8 +1339,10 @@ static int kvmgt_guest_init(struct mdev_device *mdev)
 
 static bool kvmgt_guest_exit(struct kvmgt_guest_info *info)
 {
+	struct intel_vgpu *vgpu = info->vgpu;
+
 	if (!info) {
-		gvt_err("kvmgt_guest_info invalid\n");
+		gvt_vgpu_err("kvmgt_guest_info invalid\n");
 		return false;
 	}
 
@@ -1383,12 +1387,14 @@ static unsigned long kvmgt_gfn_to_pfn(unsigned long handle, unsigned long gfn)
 	unsigned long iova, pfn;
 	struct kvmgt_guest_info *info;
 	struct device *dev;
+	struct intel_vgpu *vgpu;
 	int rc;
 
 	if (!handle_valid(handle))
 		return INTEL_GVT_INVALID_ADDR;
 
 	info = (struct kvmgt_guest_info *)handle;
+	vgpu = info->vgpu;
 	iova = gvt_cache_find(info->vgpu, gfn);
 	if (iova != INTEL_GVT_INVALID_ADDR)
 		return iova;
@@ -1397,13 +1403,14 @@ static unsigned long kvmgt_gfn_to_pfn(unsigned long handle, unsigned long gfn)
 	dev = mdev_dev(info->vgpu->vdev.mdev);
 	rc = vfio_pin_pages(dev, &gfn, 1, IOMMU_READ | IOMMU_WRITE, &pfn);
 	if (rc != 1) {
-		gvt_err("vfio_pin_pages failed for gfn 0x%lx: %d\n", gfn, rc);
+		gvt_vgpu_err("vfio_pin_pages failed for gfn 0x%lx: %d\n",
+			gfn, rc);
 		return INTEL_GVT_INVALID_ADDR;
 	}
 	/* transfer to host iova for GFX to use DMA */
 	rc = gvt_dma_map_iova(info->vgpu, pfn, &iova);
 	if (rc) {
-		gvt_err("gvt_dma_map_iova failed for gfn: 0x%lx\n", gfn);
+		gvt_vgpu_err("gvt_dma_map_iova failed for gfn: 0x%lx\n", gfn);
 		vfio_unpin_pages(dev, &gfn, 1);
 		return INTEL_GVT_INVALID_ADDR;
 	}
diff --git a/drivers/gpu/drm/i915/gvt/mmio.c b/drivers/gpu/drm/i915/gvt/mmio.c
index 60b698cb836592..1ba3bdb0934166 100644
--- a/drivers/gpu/drm/i915/gvt/mmio.c
+++ b/drivers/gpu/drm/i915/gvt/mmio.c
@@ -142,10 +142,10 @@ int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, uint64_t pa,
 			ret = intel_gvt_hypervisor_read_gpa(vgpu, pa,
 					p_data, bytes);
 			if (ret) {
-				gvt_err("vgpu%d: guest page read error %d, "
+				gvt_vgpu_err("guest page read error %d, "
 					"gfn 0x%lx, pa 0x%llx, var 0x%x, len %d\n",
-					vgpu->id, ret,
-					gp->gfn, pa, *(u32 *)p_data, bytes);
+					ret, gp->gfn, pa, *(u32 *)p_data,
+					bytes);
 			}
 			mutex_unlock(&gvt->lock);
 			return ret;
@@ -200,14 +200,13 @@ int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, uint64_t pa,
 		ret = intel_vgpu_default_mmio_read(vgpu, offset, p_data, bytes);
 
 		if (!vgpu->mmio.disable_warn_untrack) {
-			gvt_err("vgpu%d: read untracked MMIO %x(%dB) val %x\n",
-				vgpu->id, offset, bytes, *(u32 *)p_data);
+			gvt_vgpu_err("read untracked MMIO %x(%dB) val %x\n",
+				offset, bytes, *(u32 *)p_data);
 
 			if (offset == 0x206c) {
-				gvt_err("------------------------------------------\n");
-				gvt_err("vgpu%d: likely triggers a gfx reset\n",
-					vgpu->id);
-				gvt_err("------------------------------------------\n");
+				gvt_vgpu_err("------------------------------------------\n");
+				gvt_vgpu_err("likely triggers a gfx reset\n");
+				gvt_vgpu_err("------------------------------------------\n");
 				vgpu->mmio.disable_warn_untrack = true;
 			}
 		}
@@ -220,8 +219,8 @@ int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, uint64_t pa,
 	mutex_unlock(&gvt->lock);
 	return 0;
 err:
-	gvt_err("vgpu%d: fail to emulate MMIO read %08x len %d\n",
-			vgpu->id, offset, bytes);
+	gvt_vgpu_err("fail to emulate MMIO read %08x len %d\n",
+			offset, bytes);
 	mutex_unlock(&gvt->lock);
 	return ret;
 }
@@ -259,10 +258,11 @@ int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, uint64_t pa,
 		if (gp) {
 			ret = gp->handler(gp, pa, p_data, bytes);
 			if (ret) {
-				gvt_err("vgpu%d: guest page write error %d, "
-					"gfn 0x%lx, pa 0x%llx, var 0x%x, len %d\n",
-					vgpu->id, ret,
-					gp->gfn, pa, *(u32 *)p_data, bytes);
+				gvt_err("guest page write error %d, "
+					"gfn 0x%lx, pa 0x%llx, "
+					"var 0x%x, len %d\n",
+					ret, gp->gfn, pa,
+					*(u32 *)p_data, bytes);
 			}
 			mutex_unlock(&gvt->lock);
 			return ret;
@@ -329,8 +329,8 @@ int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, uint64_t pa,
 
 			/* all register bits are RO. */
 			if (ro_mask == ~(u64)0) {
-				gvt_err("vgpu%d: try to write RO reg %x\n",
-						vgpu->id, offset);
+				gvt_vgpu_err("try to write RO reg %x\n",
+					offset);
 				ret = 0;
 				goto out;
 			}
@@ -360,8 +360,8 @@ int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, uint64_t pa,
 	mutex_unlock(&gvt->lock);
 	return 0;
 err:
-	gvt_err("vgpu%d: fail to emulate MMIO write %08x len %d\n",
-			vgpu->id, offset, bytes);
+	gvt_vgpu_err("fail to emulate MMIO write %08x len %d\n", offset,
+		     bytes);
 	mutex_unlock(&gvt->lock);
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/gvt/opregion.c b/drivers/gpu/drm/i915/gvt/opregion.c
index 5d1caf9daba9bd..311799136d7f6e 100644
--- a/drivers/gpu/drm/i915/gvt/opregion.c
+++ b/drivers/gpu/drm/i915/gvt/opregion.c
@@ -67,14 +67,15 @@ static int map_vgpu_opregion(struct intel_vgpu *vgpu, bool map)
 		mfn = intel_gvt_hypervisor_virt_to_mfn(vgpu_opregion(vgpu)->va
 			+ i * PAGE_SIZE);
 		if (mfn == INTEL_GVT_INVALID_ADDR) {
-			gvt_err("fail to get MFN from VA\n");
+			gvt_vgpu_err("fail to get MFN from VA\n");
 			return -EINVAL;
 		}
 		ret = intel_gvt_hypervisor_map_gfn_to_mfn(vgpu,
 				vgpu_opregion(vgpu)->gfn[i],
 				mfn, 1, map);
 		if (ret) {
-			gvt_err("fail to map GFN to MFN, errno: %d\n", ret);
+			gvt_vgpu_err("fail to map GFN to MFN, errno: %d\n",
+				ret);
 			return ret;
 		}
 	}
@@ -287,7 +288,7 @@ int intel_vgpu_emulate_opregion_request(struct intel_vgpu *vgpu, u32 swsci)
 	parm = vgpu_opregion(vgpu)->va + INTEL_GVT_OPREGION_PARM;
 
 	if (!(swsci & SWSCI_SCI_SELECT)) {
-		gvt_err("vgpu%d: requesting SMI service\n", vgpu->id);
+		gvt_vgpu_err("requesting SMI service\n");
 		return 0;
 	}
 	/* ignore non 0->1 trasitions */
@@ -300,9 +301,8 @@ int intel_vgpu_emulate_opregion_request(struct intel_vgpu *vgpu, u32 swsci)
 	func = GVT_OPREGION_FUNC(*scic);
 	subfunc = GVT_OPREGION_SUBFUNC(*scic);
 	if (!querying_capabilities(*scic)) {
-		gvt_err("vgpu%d: requesting runtime service: func \"%s\","
+		gvt_vgpu_err("requesting runtime service: func \"%s\","
 				" subfunc \"%s\"\n",
-				vgpu->id,
 				opregion_func_name(func),
 				opregion_subfunc_name(subfunc));
 		/*
diff --git a/drivers/gpu/drm/i915/gvt/render.c b/drivers/gpu/drm/i915/gvt/render.c
index 73f052a4f4244d..95ee091ce085de 100644
--- a/drivers/gpu/drm/i915/gvt/render.c
+++ b/drivers/gpu/drm/i915/gvt/render.c
@@ -167,7 +167,7 @@ static void handle_tlb_pending_event(struct intel_vgpu *vgpu, int ring_id)
 	I915_WRITE_FW(reg, 0x1);
 
 	if (wait_for_atomic((I915_READ_FW(reg) == 0), 50))
-		gvt_err("timeout in invalidate ring (%d) tlb\n", ring_id);
+		gvt_vgpu_err("timeout in invalidate ring (%d) tlb\n", ring_id);
 	else
 		vgpu_vreg(vgpu, regs[ring_id]) = 0;
 
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index d3a56c9490257d..d29e4352d529dc 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -84,7 +84,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
 				(u32)((workload->ctx_desc.lrca + i) <<
 				GTT_PAGE_SHIFT));
 		if (context_gpa == INTEL_GVT_INVALID_ADDR) {
-			gvt_err("Invalid guest context descriptor\n");
+			gvt_vgpu_err("Invalid guest context descriptor\n");
 			return -EINVAL;
 		}
 
@@ -176,6 +176,7 @@ static int dispatch_workload(struct intel_vgpu_workload *workload)
 	struct i915_gem_context *shadow_ctx = workload->vgpu->shadow_ctx;
 	struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv;
 	struct drm_i915_gem_request *rq;
+	struct intel_vgpu *vgpu = workload->vgpu;
 	int ret;
 
 	gvt_dbg_sched("ring id %d prepare to dispatch workload %p\n",
@@ -189,7 +190,7 @@ static int dispatch_workload(struct intel_vgpu_workload *workload)
 
 	rq = i915_gem_request_alloc(dev_priv->engine[ring_id], shadow_ctx);
 	if (IS_ERR(rq)) {
-		gvt_err("fail to allocate gem request\n");
+		gvt_vgpu_err("fail to allocate gem request\n");
 		ret = PTR_ERR(rq);
 		goto out;
 	}
@@ -322,7 +323,7 @@ static void update_guest_context(struct intel_vgpu_workload *workload)
 				(u32)((workload->ctx_desc.lrca + i) <<
 					GTT_PAGE_SHIFT));
 		if (context_gpa == INTEL_GVT_INVALID_ADDR) {
-			gvt_err("invalid guest context descriptor\n");
+			gvt_vgpu_err("invalid guest context descriptor\n");
 			return;
 		}
 
@@ -417,6 +418,7 @@ static int workload_thread(void *priv)
 	int ring_id = p->ring_id;
 	struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
 	struct intel_vgpu_workload *workload = NULL;
+	struct intel_vgpu *vgpu = NULL;
 	int ret;
 	bool need_force_wake = IS_SKYLAKE(gvt->dev_priv);
 	DEFINE_WAIT_FUNC(wait, woken_wake_function);
@@ -459,7 +461,8 @@ static int workload_thread(void *priv)
 		mutex_unlock(&gvt->lock);
 
 		if (ret) {
-			gvt_err("fail to dispatch workload, skip\n");
+			vgpu = workload->vgpu;
+			gvt_vgpu_err("fail to dispatch workload, skip\n");
 			goto complete;
 		}
 

From 3f765a341798ebd4e0ece7cce34399a8fd4a7f9f Mon Sep 17 00:00:00 2001
From: Yulei Zhang <yulei.zhang@intel.com>
Date: Mon, 13 Mar 2017 23:21:27 +0800
Subject: [PATCH 1438/1911] drm/i915/gvt: correct the ggtt valid bit check in
 pipe control command

GGTT valid bit in pipe control command move to DWORD1 after SNB, so
change the valid check code correspondingly.

v2:
per Zhenyu's comment, replace the bit check with MACRO define
PIPE_CONTROL_GLOBAL_GTT_IVB

Signed-off-by: Yulei Zhang <yulei.zhang@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/cmd_parser.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
index 2ca0506d8d8c59..2b92cc8a7d1aa5 100644
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
@@ -1030,7 +1030,7 @@ static int cmd_handler_pipe_control(struct parser_exec_state *s)
 			ret = cmd_reg_handler(s, 0x2358, 1, "pipe_ctrl");
 		else if (post_sync == 1) {
 			/* check ggtt*/
-			if ((cmd_val(s, 2) & (1 << 2))) {
+			if ((cmd_val(s, 1) & PIPE_CONTROL_GLOBAL_GTT_IVB)) {
 				gma = cmd_val(s, 2) & GENMASK(31, 3);
 				if (gmadr_bytes == 8)
 					gma |= (cmd_gma_hi(s, 3)) << 32;

From 3dce2aca02929f180ab66171b333fa48fe485a03 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Wed, 8 Mar 2017 22:08:08 +0000
Subject: [PATCH 1439/1911] drm/i915/gvt: Remove bogus retry around
 i915_wait_request

commit 8f1117abb408 ("drm/i915/gvt: handle workload lifecycle properly")
includes some nonsense to retry a indefinite wait - i915_wait_request()
does not return until the request is completed when used from an
uninterruptible context.

Fixes: 8f1117abb408 ("drm/i915/gvt: handle workload lifecycle properly"
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Chuanxiao Dong <chuanxiao.dong@intel.com>
Cc: Zhenyu Wang <zhenyuw@linux.intel.com>
Cc: Zhi Wang <zhi.a.wang@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/scheduler.c | 14 +-------------
 1 file changed, 1 insertion(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index d29e4352d529dc..dd8f8cc69a76fc 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -468,19 +468,7 @@ static int workload_thread(void *priv)
 
 		gvt_dbg_sched("ring id %d wait workload %p\n",
 				workload->ring_id, workload);
-retry:
-		i915_wait_request(workload->req,
-					 0, MAX_SCHEDULE_TIMEOUT);
-		/* I915 has replay mechanism and a request will be replayed
-		 * if there is i915 reset. So the seqno will be updated anyway.
-		 * If the seqno is not updated yet after waiting, which means
-		 * the replay may still be in progress and we can wait again.
-		 */
-		if (!i915_gem_request_completed(workload->req)) {
-			gvt_dbg_sched("workload %p not completed, wait again\n",
-					workload);
-			goto retry;
-		}
+		i915_wait_request(workload->req, 0, MAX_SCHEDULE_TIMEOUT);
 
 complete:
 		gvt_dbg_sched("will complete workload %p, status: %d\n",

From cf2135ca3d50d6468e9216fef3d0d33c31af635b Mon Sep 17 00:00:00 2001
From: Chuanxiao Dong <chuanxiao.dong@intel.com>
Date: Thu, 9 Mar 2017 23:07:12 +0800
Subject: [PATCH 1440/1911] drm/i915/gvt: add enable_execlists check before
 enable gvt

The GVT-g needs execlists to be enabled otherwise gvt should be
disabled. Add a check for enable_execlists before enabling gvt.

v2: use DRM_INFO in response to the user action

Signed-off-by: Chuanxiao Dong <chuanxiao.dong@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/intel_gvt.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_gvt.c b/drivers/gpu/drm/i915/intel_gvt.c
index d23c0fcff7516a..8c04eca84351cb 100644
--- a/drivers/gpu/drm/i915/intel_gvt.c
+++ b/drivers/gpu/drm/i915/intel_gvt.c
@@ -77,6 +77,11 @@ int intel_gvt_init(struct drm_i915_private *dev_priv)
 		goto bail;
 	}
 
+	if (!i915.enable_execlists) {
+		DRM_INFO("GPU guest virtualisation [GVT-g] disabled due to disabled execlist submission [i915.enable_execlists module parameter]\n");
+		goto bail;
+	}
+
 	/*
 	 * We're not in host or fail to find a MPT module, disable GVT-g
 	 */

From 5180edc2421117766fcb9c2d2dc6bfaeefdeb709 Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@intel.com>
Date: Thu, 16 Mar 2017 09:45:09 +0800
Subject: [PATCH 1441/1911] drm/i915/kvmgt: fix suspicious rcu dereference
 usage

The srcu read lock must be held while accessing kvm memslots.
This patch fix below warning for function kvmgt_rw_gpa().

[  165.345093] [ ERR: suspicious RCU usage.  ]
[  165.416538] Call Trace:
[  165.418989]  dump_stack+0x85/0xc2
[  165.422310]  lockdep_rcu_suspicious+0xd7/0x110
[  165.426769]  kvm_read_guest_page+0x195/0x1b0 [kvm]
[  165.431574]  kvm_read_guest+0x50/0x90 [kvm]
[  165.440492]  kvmgt_rw_gpa+0x43/0xa0 [kvmgt]
[  165.444683]  kvmgt_read_gpa+0x11/0x20 [kvmgt]
[  165.449061]  gtt_get_entry64+0x4d/0xc0 [i915]
[  165.453438]  ppgtt_populate_shadow_page_by_guest_entry+0x380/0xdc0 [i915]
[  165.460254]  shadow_mm+0xd1/0x460 [i915]
[  165.472488]  intel_vgpu_create_mm+0x1ab/0x210 [i915]
[  165.477472]  intel_vgpu_g2v_create_ppgtt_mm+0x5f/0xc0 [i915]
[  165.483154]  pvinfo_mmio_write+0x19b/0x1d0 [i915]
[  165.499068]  intel_vgpu_emulate_mmio_write+0x3f9/0x600 [i915]
[  165.504827]  intel_vgpu_rw+0x114/0x150 [kvmgt]
[  165.509281]  intel_vgpu_write+0x16f/0x1a0 [kvmgt]
[  165.513993]  vfio_mdev_write+0x20/0x30 [vfio_mdev]
[  165.518793]  vfio_device_fops_write+0x24/0x30 [vfio]
[  165.523770]  __vfs_write+0x28/0x120
[  165.540529]  vfs_write+0xce/0x1f0

v2: fix Cc format for stable

Signed-off-by: Changbin Du <changbin.du@intel.com>
Cc: <stable@vger.kernel.org> # v4.10+
Reviewed-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Reviewed-by: Jike Song <jike.song@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/kvmgt.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index cd218b07c6f62e..1ea3eb270de8f3 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -1424,7 +1424,7 @@ static int kvmgt_rw_gpa(unsigned long handle, unsigned long gpa,
 {
 	struct kvmgt_guest_info *info;
 	struct kvm *kvm;
-	int ret;
+	int idx, ret;
 	bool kthread = current->mm == NULL;
 
 	if (!handle_valid(handle))
@@ -1436,8 +1436,10 @@ static int kvmgt_rw_gpa(unsigned long handle, unsigned long gpa,
 	if (kthread)
 		use_mm(kvm->mm);
 
+	idx = srcu_read_lock(&kvm->srcu);
 	ret = write ? kvm_write_guest(kvm, gpa, buf, len) :
 		      kvm_read_guest(kvm, gpa, buf, len);
+	srcu_read_unlock(&kvm->srcu, idx);
 
 	if (kthread)
 		unuse_mm(kvm->mm);

From 17f1b1a6d4e5f41df161eb2a90af22c003a243fc Mon Sep 17 00:00:00 2001
From: Tina Zhang <tina.zhang@intel.com>
Date: Wed, 15 Mar 2017 23:16:01 -0400
Subject: [PATCH 1442/1911] drm/i915/gvt: scan shadow indirect context image
 when valid

The shadow indirect context image should be only scanned when valid.
So far, Only RCS ring has the shadow indirect context image. This patch
limits the scan logic only for RCS ring.

v2. refine description of the subject
v3. fix alignment. (Zhenyu)

Signed-off-by: Tina Zhang <tina.zhang@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/scheduler.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index dd8f8cc69a76fc..907e6bc794f61b 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -203,9 +203,12 @@ static int dispatch_workload(struct intel_vgpu_workload *workload)
 	if (ret)
 		goto out;
 
-	ret = intel_gvt_scan_and_shadow_wa_ctx(&workload->wa_ctx);
-	if (ret)
-		goto out;
+	if ((workload->ring_id == RCS) &&
+	    (workload->wa_ctx.indirect_ctx.size != 0)) {
+		ret = intel_gvt_scan_and_shadow_wa_ctx(&workload->wa_ctx);
+		if (ret)
+			goto out;
+	}
 
 	ret = populate_shadow_context(workload);
 	if (ret)

From 3cd23b828b37bd5ccf4b40132f12dbf20d7afdb6 Mon Sep 17 00:00:00 2001
From: Chuanxiao Dong <chuanxiao.dong@intel.com>
Date: Thu, 16 Mar 2017 09:47:58 +0800
Subject: [PATCH 1443/1911] drm/i915/gvt: GVT pin/unpin shadow context

When handling guest request, GVT needs to populate/update shadow_ctx
with guest context. This behavior needs to make sure the shadow_ctx
is pinned. The current implementation is relying on i195 allocate request
to pin but this way cannot guarantee the i915 not to unpin the shadow_ctx
when GVT update the guest context from shadow_ctx. So GVT should pin/unpin
the shadow_ctx by itself.

Signed-off-by: Chuanxiao Dong <chuanxiao.dong@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/scheduler.c | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 907e6bc794f61b..39a83eb7aeccaf 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -175,6 +175,7 @@ static int dispatch_workload(struct intel_vgpu_workload *workload)
 	int ring_id = workload->ring_id;
 	struct i915_gem_context *shadow_ctx = workload->vgpu->shadow_ctx;
 	struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv;
+	struct intel_engine_cs *engine = dev_priv->engine[ring_id];
 	struct drm_i915_gem_request *rq;
 	struct intel_vgpu *vgpu = workload->vgpu;
 	int ret;
@@ -188,6 +189,21 @@ static int dispatch_workload(struct intel_vgpu_workload *workload)
 
 	mutex_lock(&dev_priv->drm.struct_mutex);
 
+	/* pin shadow context by gvt even the shadow context will be pinned
+	 * when i915 alloc request. That is because gvt will update the guest
+	 * context from shadow context when workload is completed, and at that
+	 * moment, i915 may already unpined the shadow context to make the
+	 * shadow_ctx pages invalid. So gvt need to pin itself. After update
+	 * the guest context, gvt can unpin the shadow_ctx safely.
+	 */
+	ret = engine->context_pin(engine, shadow_ctx);
+	if (ret) {
+		gvt_vgpu_err("fail to pin shadow context\n");
+		workload->status = ret;
+		mutex_unlock(&dev_priv->drm.struct_mutex);
+		return ret;
+	}
+
 	rq = i915_gem_request_alloc(dev_priv->engine[ring_id], shadow_ctx);
 	if (IS_ERR(rq)) {
 		gvt_vgpu_err("fail to allocate gem request\n");
@@ -231,6 +247,9 @@ static int dispatch_workload(struct intel_vgpu_workload *workload)
 
 	if (!IS_ERR_OR_NULL(rq))
 		i915_add_request_no_flush(rq);
+	else
+		engine->context_unpin(engine, shadow_ctx);
+
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 	return ret;
 }
@@ -380,6 +399,10 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id)
 	 * For the workload w/o request, directly complete the workload.
 	 */
 	if (workload->req) {
+		struct drm_i915_private *dev_priv =
+			workload->vgpu->gvt->dev_priv;
+		struct intel_engine_cs *engine =
+			dev_priv->engine[workload->ring_id];
 		wait_event(workload->shadow_ctx_status_wq,
 			   !atomic_read(&workload->shadow_ctx_active));
 
@@ -392,6 +415,10 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id)
 					 INTEL_GVT_EVENT_MAX)
 				intel_vgpu_trigger_virtual_event(vgpu, event);
 		}
+		mutex_lock(&dev_priv->drm.struct_mutex);
+		/* unpin shadow ctx as the shadow_ctx update is done */
+		engine->context_unpin(engine, workload->vgpu->shadow_ctx);
+		mutex_unlock(&dev_priv->drm.struct_mutex);
 	}
 
 	gvt_dbg_sched("ring id %d complete workload %p status %d\n",

From 2958b9013fcbabeeba221161d0712f5259f1e15d Mon Sep 17 00:00:00 2001
From: Zhenyu Wang <zhenyuw@linux.intel.com>
Date: Fri, 17 Mar 2017 12:11:20 +0800
Subject: [PATCH 1444/1911] drm/i915/gvt: Fix gvt scheduler interval time

Fix to correctly assign 1ms for gvt scheduler interval time,
as previous code using HZ is pretty broken. And use no delay
for start gvt scheduler function.

Fixes: 4b63960ebd3f ("drm/i915/gvt: vGPU schedule policy framework")
Cc: Zhi Wang <zhi.a.wang@intel.com>
Cc: stable@vger.kernel.org # v4.10+
Acked-by: Chuanxiao Dong <chuanxiao.dong@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/sched_policy.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/sched_policy.c b/drivers/gpu/drm/i915/gvt/sched_policy.c
index 06c9584ac5f033..34b9acdf34791c 100644
--- a/drivers/gpu/drm/i915/gvt/sched_policy.c
+++ b/drivers/gpu/drm/i915/gvt/sched_policy.c
@@ -101,7 +101,7 @@ struct tbs_sched_data {
 	struct list_head runq_head;
 };
 
-#define GVT_DEFAULT_TIME_SLICE (1 * HZ / 1000)
+#define GVT_DEFAULT_TIME_SLICE (msecs_to_jiffies(1))
 
 static void tbs_sched_func(struct work_struct *work)
 {
@@ -223,7 +223,7 @@ static void tbs_sched_start_schedule(struct intel_vgpu *vgpu)
 		return;
 
 	list_add_tail(&vgpu_data->list, &sched_data->runq_head);
-	schedule_delayed_work(&sched_data->work, sched_data->period);
+	schedule_delayed_work(&sched_data->work, 0);
 }
 
 static void tbs_sched_stop_schedule(struct intel_vgpu *vgpu)

From e7ede72a6d40cb3a30c087142d79381ca8a31dab Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 15 Mar 2017 22:53:37 +0100
Subject: [PATCH 1445/1911] perf symbols: Fix symbols__fixup_end heuristic for
 corner cases

The current symbols__fixup_end() heuristic for the last entry in the rb
tree is suboptimal as it leads to not being able to recognize the symbol
in the call graph in a couple of corner cases, for example:

 i) If the symbol has a start address (f.e. exposed via kallsyms)
    that is at a page boundary, then the roundup(curr->start, 4096)
    for the last entry will result in curr->start == curr->end with
    a symbol length of zero.

ii) If the symbol has a start address that is shortly before a page
    boundary, then also here, curr->end - curr->start will just be
    very few bytes, where it's unrealistic that we could perform a
    match against.

Instead, change the heuristic to roundup(curr->start, 4096) + 4096, so
that we can catch such corner cases and have a better chance to find
that specific symbol. It's still just best effort as the real end of the
symbol is unknown to us (and could even be at a larger offset than the
current range), but better than the current situation.

Alexei reported that he recently run into case i) with a JITed eBPF
program (these are all page aligned) as the last symbol which wasn't
properly shown in the call graph (while other eBPF program symbols in
the rb tree were displayed correctly). Since this is a generic issue,
lets try to improve the heuristic a bit.

Reported-and-Tested-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Fixes: 2e538c4a1847 ("perf tools: Improve kernel/modules symbol lookup")
Link: http://lkml.kernel.org/r/bb5c80d27743be6f12afc68405f1956a330e1bc9.1489614365.git.daniel@iogearbox.net
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/symbol.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 70e389bc4af71a..9b4d8ba22fed85 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -202,7 +202,7 @@ void symbols__fixup_end(struct rb_root *symbols)
 
 	/* Last entry */
 	if (curr->end == curr->start)
-		curr->end = roundup(curr->start, 4096);
+		curr->end = roundup(curr->start, 4096) + 4096;
 }
 
 void __map_groups__fixup_end(struct map_groups *mg, enum map_type type)

From cf8c73afb3abf0f8905efbaddd4ce11a0deec9da Mon Sep 17 00:00:00 2001
From: Evan Quan <evan.quan@amd.com>
Date: Fri, 17 Mar 2017 10:22:51 +0800
Subject: [PATCH 1446/1911] drm/amd/amdgpu: add POLARIS12 PCI ID

Signed-off-by: Evan Quan <evan.quan@amd.com>
Reviewed-by: Junwei Zhang <Jerry.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index f7adbace428a49..b76cd699eb0d73 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -421,6 +421,7 @@ static const struct pci_device_id pciidlist[] = {
 	{0x1002, 0x6985, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
 	{0x1002, 0x6986, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
 	{0x1002, 0x6987, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
+	{0x1002, 0x6995, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
 	{0x1002, 0x699F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
 
 	{0, 0, 0}

From 822f5845f710e57d7e2df1fd1ee00d6e19d334fe Mon Sep 17 00:00:00 2001
From: Daniel Drake <drake@endlessm.com>
Date: Tue, 7 Feb 2017 13:08:23 -0600
Subject: [PATCH 1447/1911] efi/esrt: Cleanup bad memory map log messages

The Intel Compute Stick STCK1A8LFC and Weibu F3C platforms both
log 2 error messages during boot:

   efi: requested map not found.
   esrt: ESRT header is not in the memory map.

Searching the web, this seems to affect many other platforms too.
Since these messages are logged as errors, they appear on-screen during
the boot process even when using the "quiet" boot parameter used by
distros.

Demote the ESRT error to a warning so that it does not appear on-screen,
and delete the error logging from efi_mem_desc_lookup; both callsites
of that function log more specific messages upon failure.

Out of curiosity I looked closer at the Weibu F3C. There is no entry in
the UEFI-provided memory map which corresponds to the ESRT pointer, but
hacking the code to map it anyway, the ESRT does appear to be valid with
2 entries.

Signed-off-by: Daniel Drake <drake@endlessm.com>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Acked-by: Peter Jones <pjones@redhat.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
 drivers/firmware/efi/efi.c  | 1 -
 drivers/firmware/efi/esrt.c | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index e7d404059b7316..b372aad3b449c3 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -389,7 +389,6 @@ int __init efi_mem_desc_lookup(u64 phys_addr, efi_memory_desc_t *out_md)
 			return 0;
 		}
 	}
-	pr_err_once("requested map not found.\n");
 	return -ENOENT;
 }
 
diff --git a/drivers/firmware/efi/esrt.c b/drivers/firmware/efi/esrt.c
index 08b026864d4e7d..8554d7aec31c64 100644
--- a/drivers/firmware/efi/esrt.c
+++ b/drivers/firmware/efi/esrt.c
@@ -254,7 +254,7 @@ void __init efi_esrt_init(void)
 
 	rc = efi_mem_desc_lookup(efi.esrt, &md);
 	if (rc < 0) {
-		pr_err("ESRT header is not in the memory map.\n");
+		pr_warn("ESRT header is not in the memory map.\n");
 		return;
 	}
 

From 38a33101dd5fb8f07244e7e6dd04747a6cd2e3fb Mon Sep 17 00:00:00 2001
From: Kinglong Mee <kinglongmee@gmail.com>
Date: Thu, 9 Mar 2017 11:36:36 +0800
Subject: [PATCH 1448/1911] NFS: fix the fault nrequests decreasing for
 nfs_inode COPY

The nfs_commit_file for NFSv4.2's COPY operation goes through
the commit path for normal WRITE, but without increase nrequests,
so, the nrequests decreased in nfs_commit_release_pages is fault.
After that, the nrequests will be wrong.

[ 5670.299881] ------------[ cut here ]------------
[ 5670.300295] WARNING: CPU: 0 PID: 27656 at fs/nfs/inode.c:127 nfs_clear_inode+0x66/0x90 [nfs]
[ 5670.300558] Modules linked in: nfsv4(E) nfs(E) fscache(E) tun bridge stp llc fuse ip_set nfnetlink vmw_vsock_vmci_transport vsock snd_seq_midi snd_seq_midi_event ppdev f2fs coretemp crct10dif_pclmul crc32_pclmul ghash_clmulni_intel snd_ens1371 intel_rapl_perf gameport snd_ac97_codec vmw_balloon ac97_bus snd_seq snd_pcm joydev snd_rawmidi snd_timer snd_seq_device snd soundcore nfit parport_pc parport acpi_cpufreq tpm_tis tpm_tis_core tpm i2c_piix4 vmw_vmci shpchp nfsd auth_rpcgss nfs_acl lockd grace sunrpc xfs libcrc32c vmwgfx drm_kms_helper ttm drm e1000 crc32c_intel mptspi scsi_transport_spi serio_raw mptscsih mptbase ata_generic pata_acpi fjes [last unloaded: fscache]
[ 5670.302925] CPU: 0 PID: 27656 Comm: umount.nfs4 Tainted: G        W   E   4.11.0-rc1+ #519
[ 5670.303292] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 07/02/2015
[ 5670.304094] Call Trace:
[ 5670.304510]  dump_stack+0x63/0x86
[ 5670.304917]  __warn+0xcb/0xf0
[ 5670.305276]  warn_slowpath_null+0x1d/0x20
[ 5670.305661]  nfs_clear_inode+0x66/0x90 [nfs]
[ 5670.306093]  nfs4_evict_inode+0x61/0x70 [nfsv4]
[ 5670.306480]  evict+0xbb/0x1c0
[ 5670.306888]  dispose_list+0x4d/0x70
[ 5670.307233]  evict_inodes+0x178/0x1a0
[ 5670.307579]  generic_shutdown_super+0x44/0xf0
[ 5670.307985]  nfs_kill_super+0x21/0x40 [nfs]
[ 5670.308325]  deactivate_locked_super+0x43/0x70
[ 5670.308698]  deactivate_super+0x5a/0x60
[ 5670.309036]  cleanup_mnt+0x3f/0x90
[ 5670.309407]  __cleanup_mnt+0x12/0x20
[ 5670.309837]  task_work_run+0x80/0xa0
[ 5670.310162]  exit_to_usermode_loop+0x89/0x90
[ 5670.310497]  syscall_return_slowpath+0xaa/0xb0
[ 5670.310875]  entry_SYSCALL_64_fastpath+0xa7/0xa9
[ 5670.311197] RIP: 0033:0x7f1bb3617fe7
[ 5670.311545] RSP: 002b:00007ffecbabb828 EFLAGS: 00000206 ORIG_RAX: 00000000000000a6
[ 5670.311906] RAX: 0000000000000000 RBX: 0000000001dca1f0 RCX: 00007f1bb3617fe7
[ 5670.312239] RDX: 000000000000000c RSI: 0000000000000001 RDI: 0000000001dc83c0
[ 5670.312653] RBP: 0000000001dc83c0 R08: 0000000000000001 R09: 0000000000000000
[ 5670.312998] R10: 0000000000000755 R11: 0000000000000206 R12: 00007ffecbabc66a
[ 5670.313335] R13: 0000000001dc83a0 R14: 0000000000000000 R15: 0000000000000000
[ 5670.313758] ---[ end trace bf4bfe7764e4eb40 ]---

Cc: linux-kernel@vger.kernel.org
Fixes: 67911c8f18 ("NFS: Add nfs_commit_file()")
Signed-off-by: Kinglong Mee <kinglongmee@gmail.com>
Cc: stable@vger.kernel.org # 4.7+
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/write.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index e75b056f46f435..abb2c8a3be42e4 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1784,7 +1784,8 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
 			(long long)req_offset(req));
 		if (status < 0) {
 			nfs_context_set_write_error(req->wb_context, status);
-			nfs_inode_remove_request(req);
+			if (req->wb_page)
+				nfs_inode_remove_request(req);
 			dprintk_cont(", error = %d\n", status);
 			goto next;
 		}
@@ -1793,7 +1794,8 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
 		 * returned by the server against all stored verfs. */
 		if (!nfs_write_verifier_cmp(&req->wb_verf, &data->verf.verifier)) {
 			/* We have a match */
-			nfs_inode_remove_request(req);
+			if (req->wb_page)
+				nfs_inode_remove_request(req);
 			dprintk_cont(" OK\n");
 			goto next;
 		}

From eed50879d64ab1b9f76445dbab822e43a098b309 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Sat, 11 Mar 2017 15:52:47 -0500
Subject: [PATCH 1449/1911] xprtrdma: Squelch kbuild sparse complaint

New complaint from kbuild for 4.9.y:

net/sunrpc/xprtrdma/verbs.c:489:19: sparse: incompatible types in
    comparison expression (different type sizes)

verbs.c:
489	max_sge = min(ia->ri_device->attrs.max_sge, RPCRDMA_MAX_SEND_SGES);

I can't reproduce this running sparse here. Likewise, "make W=1
net/sunrpc/xprtrdma/verbs.o" never indicated any issue.

A little poking suggests that because the range of its values is
small, gcc can make the actual width of RPCRDMA_MAX_SEND_SGES
smaller than the width of an unsigned integer.

Fixes: 16f906d66cd7 ("xprtrdma: Reduce required number of send SGEs")
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Cc: stable@kernel.org
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 net/sunrpc/xprtrdma/verbs.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 81cd31acf690f4..3b332b395045b5 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -503,7 +503,8 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
 	struct ib_cq *sendcq, *recvcq;
 	int rc;
 
-	max_sge = min(ia->ri_device->attrs.max_sge, RPCRDMA_MAX_SEND_SGES);
+	max_sge = min_t(unsigned int, ia->ri_device->attrs.max_sge,
+			RPCRDMA_MAX_SEND_SGES);
 	if (max_sge < RPCRDMA_MIN_SEND_SGES) {
 		pr_warn("rpcrdma: HCA provides only %d send SGEs\n", max_sge);
 		return -ENOMEM;

From 05fae7bbc237bc7de0ee9c3dcf85b2572a80e3b5 Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Fri, 10 Mar 2017 10:48:13 +0800
Subject: [PATCH 1450/1911] nfs: make nfs4_cb_sv_ops static

Fixes the following sparse warning:

fs/nfs/callback.c:235:21: warning: symbol 'nfs4_cb_sv_ops' was not
declared. Should it be static?

Signed-off-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/callback.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 484bebc20bca6a..5c8a096d763e0d 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -231,12 +231,12 @@ static struct svc_serv_ops nfs41_cb_sv_ops = {
 	.svo_module		= THIS_MODULE,
 };
 
-struct svc_serv_ops *nfs4_cb_sv_ops[] = {
+static struct svc_serv_ops *nfs4_cb_sv_ops[] = {
 	[0] = &nfs40_cb_sv_ops,
 	[1] = &nfs41_cb_sv_ops,
 };
 #else
-struct svc_serv_ops *nfs4_cb_sv_ops[] = {
+static struct svc_serv_ops *nfs4_cb_sv_ops[] = {
 	[0] = &nfs40_cb_sv_ops,
 	[1] = NULL,
 };

From 63513232f8cd219dcaa5eafae028740ed3067d83 Mon Sep 17 00:00:00 2001
From: Olga Kornievskaia <kolga@netapp.com>
Date: Mon, 13 Mar 2017 10:36:19 -0400
Subject: [PATCH 1451/1911] NFS prevent double free in async nfs4_exchange_id

Since rpc_task is async, the release function should be called which
will free the impl_id, scope, and owner.

Trond pointed at 2 more problems:
-- use of client pointer after free in the nfs4_exchangeid_release() function
-- cl_count mismatch if rpc_run_task() isn't run

Fixes: 8d89bd70bc9 ("NFS setup async exchange_id")
Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
Cc: stable@vger.kernel.org # 4.9
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfs4proc.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index c1f5369cd33988..c780d98035ccf7 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -7425,11 +7425,11 @@ static void nfs4_exchange_id_release(void *data)
 	struct nfs41_exchange_id_data *cdata =
 					(struct nfs41_exchange_id_data *)data;
 
-	nfs_put_client(cdata->args.client);
 	if (cdata->xprt) {
 		xprt_put(cdata->xprt);
 		rpc_clnt_xprt_switch_put(cdata->args.client->cl_rpcclient);
 	}
+	nfs_put_client(cdata->args.client);
 	kfree(cdata->res.impl_id);
 	kfree(cdata->res.server_scope);
 	kfree(cdata->res.server_owner);
@@ -7536,10 +7536,8 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred,
 	task_setup_data.callback_data = calldata;
 
 	task = rpc_run_task(&task_setup_data);
-	if (IS_ERR(task)) {
-	status = PTR_ERR(task);
-		goto out_impl_id;
-	}
+	if (IS_ERR(task))
+		return PTR_ERR(task);
 
 	if (!xprt) {
 		status = rpc_wait_for_completion_task(task);
@@ -7567,6 +7565,7 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred,
 	kfree(calldata->res.server_owner);
 out_calldata:
 	kfree(calldata);
+	nfs_put_client(clp);
 	goto out;
 }
 

From 033853325fe3bdc70819a8b97915bd3bca41d3af Mon Sep 17 00:00:00 2001
From: Olga Kornievskaia <kolga@netapp.com>
Date: Wed, 8 Mar 2017 14:39:15 -0500
Subject: [PATCH 1452/1911] NFSv4.1 respect server's max size in CREATE_SESSION

Currently client doesn't respect max sizes server returns in CREATE_SESSION.
nfs4_session_set_rwsize() gets called and server->rsize, server->wsize are 0
so they never get set to the sizes returned by the server.

Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfs4client.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 5ae9d64ea08bc8..8346ccbf2d52e5 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -1023,9 +1023,9 @@ static void nfs4_session_set_rwsize(struct nfs_server *server)
 	server_resp_sz = sess->fc_attrs.max_resp_sz - nfs41_maxread_overhead;
 	server_rqst_sz = sess->fc_attrs.max_rqst_sz - nfs41_maxwrite_overhead;
 
-	if (server->rsize > server_resp_sz)
+	if (!server->rsize || server->rsize > server_resp_sz)
 		server->rsize = server_resp_sz;
-	if (server->wsize > server_rqst_sz)
+	if (!server->wsize || server->wsize > server_rqst_sz)
 		server->wsize = server_rqst_sz;
 #endif /* CONFIG_NFS_V4_1 */
 }

From a33e4b036d4612f62220f37a9fa29d273b6fd0ca Mon Sep 17 00:00:00 2001
From: Weston Andros Adamson <dros@monkey.org>
Date: Thu, 9 Mar 2017 12:56:48 -0500
Subject: [PATCH 1453/1911] pNFS: return status from nfs4_pnfs_ds_connect

The nfs4_pnfs_ds_connect path can call rpc_create which can fail or it
can wait on another context to reach the same failure.

This checks that the rpc_create succeeded and returns the error to the
caller.

When an error is returned, both the files and flexfiles layouts will return
NULL from _prepare_ds(). The flexfiles layout will also return the layout
with the error NFS4ERR_NXIO.

Signed-off-by: Weston Andros Adamson <dros@primarydata.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/client.c                           | 25 ++++++++++++++++++++++-
 fs/nfs/filelayout/filelayoutdev.c         |  7 ++++++-
 fs/nfs/flexfilelayout/flexfilelayoutdev.c |  3 ++-
 fs/nfs/internal.h                         |  2 ++
 fs/nfs/pnfs.h                             |  2 +-
 fs/nfs/pnfs_nfs.c                         | 15 ++++++++++++--
 6 files changed, 48 insertions(+), 6 deletions(-)

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 91a8d610ba0fa6..390ada8741bcbf 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -325,10 +325,33 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat
 	return NULL;
 }
 
-static bool nfs_client_init_is_complete(const struct nfs_client *clp)
+/*
+ * Return true if @clp is done initializing, false if still working on it.
+ *
+ * Use nfs_client_init_status to check if it was successful.
+ */
+bool nfs_client_init_is_complete(const struct nfs_client *clp)
 {
 	return clp->cl_cons_state <= NFS_CS_READY;
 }
+EXPORT_SYMBOL_GPL(nfs_client_init_is_complete);
+
+/*
+ * Return 0 if @clp was successfully initialized, -errno otherwise.
+ *
+ * This must be called *after* nfs_client_init_is_complete() returns true,
+ * otherwise it will pop WARN_ON_ONCE and return -EINVAL
+ */
+int nfs_client_init_status(const struct nfs_client *clp)
+{
+	/* called without checking nfs_client_init_is_complete */
+	if (clp->cl_cons_state > NFS_CS_READY) {
+		WARN_ON_ONCE(1);
+		return -EINVAL;
+	}
+	return clp->cl_cons_state;
+}
+EXPORT_SYMBOL_GPL(nfs_client_init_status);
 
 int nfs_wait_client_init_complete(const struct nfs_client *clp)
 {
diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c
index f956ca20a8a359..1881206261791d 100644
--- a/fs/nfs/filelayout/filelayoutdev.c
+++ b/fs/nfs/filelayout/filelayoutdev.c
@@ -266,6 +266,7 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
 	struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg);
 	struct nfs4_pnfs_ds *ret = ds;
 	struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode);
+	int status;
 
 	if (ds == NULL) {
 		printk(KERN_ERR "NFS: %s: No data server for offset index %d\n",
@@ -277,9 +278,13 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
 	if (ds->ds_clp)
 		goto out_test_devid;
 
-	nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo,
+	status = nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo,
 			     dataserver_retrans, 4,
 			     s->nfs_client->cl_minorversion);
+	if (status) {
+		ret = NULL;
+		goto out;
+	}
 
 out_test_devid:
 	if (ret->ds_clp == NULL ||
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index e5a6f248697b36..544e7725e67912 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -384,6 +384,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
 	struct inode *ino = lseg->pls_layout->plh_inode;
 	struct nfs_server *s = NFS_SERVER(ino);
 	unsigned int max_payload;
+	int status;
 
 	if (!ff_layout_mirror_valid(lseg, mirror, true)) {
 		pr_err_ratelimited("NFS: %s: No data server for offset index %d\n",
@@ -404,7 +405,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
 	/* FIXME: For now we assume the server sent only one version of NFS
 	 * to use for the DS.
 	 */
-	nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo,
+	status = nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo,
 			     dataserver_retrans,
 			     mirror->mirror_ds->ds_versions[0].version,
 			     mirror->mirror_ds->ds_versions[0].minor_version);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 09ca5095c04e42..7b38fedb7e0328 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -186,6 +186,8 @@ extern struct nfs_server *nfs_clone_server(struct nfs_server *,
 					   struct nfs_fh *,
 					   struct nfs_fattr *,
 					   rpc_authflavor_t);
+extern bool nfs_client_init_is_complete(const struct nfs_client *clp);
+extern int nfs_client_init_status(const struct nfs_client *clp);
 extern int nfs_wait_client_init_complete(const struct nfs_client *clp);
 extern void nfs_mark_client_ready(struct nfs_client *clp, int state);
 extern struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv,
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 63f77b49a586a5..590e1e35781f0b 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -367,7 +367,7 @@ void nfs4_pnfs_ds_put(struct nfs4_pnfs_ds *ds);
 struct nfs4_pnfs_ds *nfs4_pnfs_ds_add(struct list_head *dsaddrs,
 				      gfp_t gfp_flags);
 void nfs4_pnfs_v3_ds_connect_unload(void);
-void nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds,
+int nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds,
 			  struct nfs4_deviceid_node *devid, unsigned int timeo,
 			  unsigned int retrans, u32 version, u32 minor_version);
 struct nfs4_pnfs_ds_addr *nfs4_decode_mp_ds_addr(struct net *net,
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index 9414b492439fbf..a7691b927af6fa 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -745,9 +745,9 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv,
 /*
  * Create an rpc connection to the nfs4_pnfs_ds data server.
  * Currently only supports IPv4 and IPv6 addresses.
- * If connection fails, make devid unavailable.
+ * If connection fails, make devid unavailable and return a -errno.
  */
-void nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds,
+int nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds,
 			  struct nfs4_deviceid_node *devid, unsigned int timeo,
 			  unsigned int retrans, u32 version, u32 minor_version)
 {
@@ -772,6 +772,17 @@ void nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds,
 	} else {
 		nfs4_wait_ds_connect(ds);
 	}
+
+	/*
+	 * At this point the ds->ds_clp should be ready, but it might have
+	 * hit an error.
+	 */
+	if (!ds->ds_clp || !nfs_client_init_is_complete(ds->ds_clp)) {
+		WARN_ON_ONCE(1);
+		return -EINVAL;
+	}
+
+	return nfs_client_init_status(ds->ds_clp);
 }
 EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_connect);
 

From da066f3f039eba3e72e97b2ccad0dd8b45ba84bd Mon Sep 17 00:00:00 2001
From: Weston Andros Adamson <dros@monkey.org>
Date: Thu, 9 Mar 2017 12:56:49 -0500
Subject: [PATCH 1454/1911] pNFS/flexfiles: never
 nfs4_mark_deviceid_unavailable

The flexfiles layout should never mark a device unavailable.

Move nfs4_mark_deviceid_unavailable out of nfs4_pnfs_ds_connect and call
directly from files layout where it's still needed.

The flexfiles driver still handles marked devices in error paths, but will
now print a rate limited warning.

Signed-off-by: Weston Andros Adamson <dros@primarydata.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/filelayout/filelayoutdev.c         |  1 +
 fs/nfs/flexfilelayout/flexfilelayout.h    | 14 ++++++++++++-
 fs/nfs/flexfilelayout/flexfilelayoutdev.c |  2 +-
 fs/nfs/pnfs_nfs.c                         | 24 +++++++++++++++--------
 4 files changed, 31 insertions(+), 10 deletions(-)

diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c
index 1881206261791d..d913e818858f3f 100644
--- a/fs/nfs/filelayout/filelayoutdev.c
+++ b/fs/nfs/filelayout/filelayoutdev.c
@@ -282,6 +282,7 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
 			     dataserver_retrans, 4,
 			     s->nfs_client->cl_minorversion);
 	if (status) {
+		nfs4_mark_deviceid_unavailable(devid);
 		ret = NULL;
 		goto out;
 	}
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h
index f4f39b0ab09b25..98b34c9b0564b3 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.h
+++ b/fs/nfs/flexfilelayout/flexfilelayout.h
@@ -175,7 +175,19 @@ ff_layout_no_read_on_rw(struct pnfs_layout_segment *lseg)
 static inline bool
 ff_layout_test_devid_unavailable(struct nfs4_deviceid_node *node)
 {
-	return nfs4_test_deviceid_unavailable(node);
+	/*
+	 * Flexfiles should never mark a DS unavailable, but if it does
+	 * print a (ratelimited) warning as this can affect performance.
+	 */
+	if (nfs4_test_deviceid_unavailable(node)) {
+		u32 *p = (u32 *)node->deviceid.data;
+
+		pr_warn_ratelimited("NFS: flexfiles layout referencing an "
+				"unavailable device [%x%x%x%x]\n",
+				p[0], p[1], p[2], p[3]);
+		return true;
+	}
+	return false;
 }
 
 static inline int
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index 544e7725e67912..85fde93dff774e 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -421,11 +421,11 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
 			mirror->mirror_ds->ds_versions[0].wsize = max_payload;
 		goto out;
 	}
+out_fail:
 	ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout),
 				 mirror, lseg->pls_range.offset,
 				 lseg->pls_range.length, NFS4ERR_NXIO,
 				 OP_ILLEGAL, GFP_NOIO);
-out_fail:
 	if (fail_return || !ff_layout_has_available_ds(lseg))
 		pnfs_error_mark_layout_for_return(ino, lseg);
 	ds = NULL;
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index a7691b927af6fa..7250b95549ecc7 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -751,9 +751,11 @@ int nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds,
 			  struct nfs4_deviceid_node *devid, unsigned int timeo,
 			  unsigned int retrans, u32 version, u32 minor_version)
 {
-	if (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) == 0) {
-		int err = 0;
+	int err;
 
+again:
+	err = 0;
+	if (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) == 0) {
 		if (version == 3) {
 			err = _nfs4_pnfs_v3_ds_connect(mds_srv, ds, timeo,
 						       retrans);
@@ -766,23 +768,29 @@ int nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds,
 			err = -EPROTONOSUPPORT;
 		}
 
-		if (err)
-			nfs4_mark_deviceid_unavailable(devid);
 		nfs4_clear_ds_conn_bit(ds);
 	} else {
 		nfs4_wait_ds_connect(ds);
+
+		/* what was waited on didn't connect AND didn't mark unavail */
+		if (!ds->ds_clp && !nfs4_test_deviceid_unavailable(devid))
+			goto again;
 	}
 
 	/*
 	 * At this point the ds->ds_clp should be ready, but it might have
 	 * hit an error.
 	 */
-	if (!ds->ds_clp || !nfs_client_init_is_complete(ds->ds_clp)) {
-		WARN_ON_ONCE(1);
-		return -EINVAL;
+	if (!err) {
+		if (!ds->ds_clp || !nfs_client_init_is_complete(ds->ds_clp)) {
+			WARN_ON_ONCE(ds->ds_clp ||
+				!nfs4_test_deviceid_unavailable(devid));
+			return -EINVAL;
+		}
+		err = nfs_client_init_status(ds->ds_clp);
 	}
 
-	return nfs_client_init_status(ds->ds_clp);
+	return err;
 }
 EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_connect);
 

From 49d4a334727057af57048ded99697d17b016d91b Mon Sep 17 00:00:00 2001
From: Liu Bo <bo.li.liu@oracle.com>
Date: Mon, 6 Mar 2017 18:20:56 -0800
Subject: [PATCH 1455/1911] Btrfs: fix regression in lock_delalloc_pages

The bug is a regression after commit
(da2c7009f6ca "btrfs: teach __process_pages_contig about PAGE_LOCK operation")
and commit
(76c0021db8fd "Btrfs: use helper to simplify lock/unlock pages").

So if the dirty pages which are under writeback got truncated partially
before we lock the dirty pages, we couldn't find all pages mapping to the
delalloc range, and the bug didn't return an error so it kept going on and
found that the delalloc range got truncated and got to unlock the dirty
pages, and then the ASSERT could caught the error, and showed

-----------------------------------------------------------------------------
assertion failed: page_ops & PAGE_LOCK, file: fs/btrfs/extent_io.c, line: 1716
-----------------------------------------------------------------------------

This fixes the bug by returning the proper -EAGAIN.

Cc: David Sterba <dsterba@suse.com>
Reported-by: Dave Jones <davej@codemonkey.org.uk>
Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 28e81922a21c1e..8df797432740df 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1714,7 +1714,8 @@ static int __process_pages_contig(struct address_space *mapping,
 			 * can we find nothing at @index.
 			 */
 			ASSERT(page_ops & PAGE_LOCK);
-			return ret;
+			err = -EAGAIN;
+			goto out;
 		}
 
 		for (i = 0; i < ret; i++) {

From e1699d2d7bf6e6cce3e1baff19f9dd4595a58664 Mon Sep 17 00:00:00 2001
From: Zygo Blaxell <ce3g8jdj@umail.furryterror.org>
Date: Fri, 10 Mar 2017 16:45:44 -0500
Subject: [PATCH 1456/1911] btrfs: add missing memset while reading compressed
 inline extents

This is a story about 4 distinct (and very old) btrfs bugs.

Commit c8b978188c ("Btrfs: Add zlib compression support") added
three data corruption bugs for inline extents (bugs #1-3).

Commit 93c82d5750 ("Btrfs: zero page past end of inline file items")
fixed bug #1:  uncompressed inline extents followed by a hole and more
extents could get non-zero data in the hole as they were read.  The fix
was to add a memset in btrfs_get_extent to zero out the hole.

Commit 166ae5a418 ("btrfs: fix inline compressed read err corruption")
fixed bug #2:  compressed inline extents which contained non-zero bytes
might be replaced with zero bytes in some cases.  This patch removed an
unhelpful memset from uncompress_inline, but the case where memset is
required was missed.

There is also a memset in the decompression code, but this only covers
decompressed data that is shorter than the ram_bytes from the extent
ref record.  This memset doesn't cover the region between the end of the
decompressed data and the end of the page.  It has also moved around a
few times over the years, so there's no single patch to refer to.

This patch fixes bug #3:  compressed inline extents followed by a hole
and more extents could get non-zero data in the hole as they were read
(i.e. bug #3 is the same as bug #1, but s/uncompressed/compressed/).
The fix is the same:  zero out the hole in the compressed case too,
by putting a memset back in uncompress_inline, but this time with
correct parameters.

The last and oldest bug, bug #0, is the cause of the offending inline
extent/hole/extent pattern.  Bug #0 is a subtle and mostly-harmless quirk
of behavior somewhere in the btrfs write code.  In a few special cases,
an inline extent and hole are allowed to persist where they normally
would be combined with later extents in the file.

A fast reproducer for bug #0 is presented below.  A few offending extents
are also created in the wild during large rsync transfers with the -S
flag.  A Linux kernel build (git checkout; make allyesconfig; make -j8)
will produce a handful of offending files as well.  Once an offending
file is created, it can present different content to userspace each
time it is read.

Bug #0 is at least 4 and possibly 8 years old.  I verified every vX.Y
kernel back to v3.5 has this behavior.  There are fossil records of this
bug's effects in commits all the way back to v2.6.32.  I have no reason
to believe bug #0 wasn't present at the beginning of btrfs compression
support in v2.6.29, but I can't easily test kernels that old to be sure.

It is not clear whether bug #0 is worth fixing.  A fix would likely
require injecting extra reads into currently write-only paths, and most
of the exceptional cases caused by bug #0 are already handled now.

Whether we like them or not, bug #0's inline extents followed by holes
are part of the btrfs de-facto disk format now, and we need to be able
to read them without data corruption or an infoleak.  So enough about
bug #0, let's get back to bug #3 (this patch).

An example of on-disk structure leading to data corruption found in
the wild:

        item 61 key (606890 INODE_ITEM 0) itemoff 9662 itemsize 160
                inode generation 50 transid 50 size 47424 nbytes 49141
                block group 0 mode 100644 links 1 uid 0 gid 0
                rdev 0 flags 0x0(none)
        item 62 key (606890 INODE_REF 603050) itemoff 9642 itemsize 20
                inode ref index 3 namelen 10 name: DB_File.so
        item 63 key (606890 EXTENT_DATA 0) itemoff 8280 itemsize 1362
                inline extent data size 1341 ram 4085 compress(zlib)
        item 64 key (606890 EXTENT_DATA 4096) itemoff 8227 itemsize 53
                extent data disk byte 5367308288 nr 20480
                extent data offset 0 nr 45056 ram 45056
                extent compression(zlib)

Different data appears in userspace during each read of the 11 bytes
between 4085 and 4096.  The extent in item 63 is not long enough to
fill the first page of the file, so a memset is required to fill the
space between item 63 (ending at 4085) and item 64 (beginning at 4096)
with zero.

Here is a reproducer from Liu Bo, which demonstrates another method
of creating the same inline extent and hole pattern:

Using 'page_poison=on' kernel command line (or enable
CONFIG_PAGE_POISONING) run the following:

	# touch foo
	# chattr +c foo
	# xfs_io -f -c "pwrite -W 0 1000" foo
	# xfs_io -f -c "falloc 4 8188" foo
	# od -x foo
	# echo 3 >/proc/sys/vm/drop_caches
	# od -x foo

This produce the following on my box:

Correct output:  file contains 1000 data bytes followed
by zeros:

	0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
	*
	0001740 cdcd cdcd cdcd cdcd 0000 0000 0000 0000
	0001760 0000 0000 0000 0000 0000 0000 0000 0000
	*
	0020000

Actual output:  the data after the first 1000 bytes
will be different each run:

	0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
	*
	0001740 cdcd cdcd cdcd cdcd 6c63 7400 635f 006d
	0001760 5f74 6f43 7400 435f 0053 5f74 7363 7400
	0002000 435f 0056 5f74 6164 7400 645f 0062 5f74
	(...)

Signed-off-by: Zygo Blaxell <ce3g8jdj@umail.furryterror.org>
Reviewed-by: Liu Bo <bo.li.liu@oracle.com>
Reviewed-by: Chris Mason <clm@fb.com>
Signed-off-by: Chris Mason <clm@fb.com>
---
 fs/btrfs/inode.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index b2bc07aad1ae98..e57191072aa384 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6709,6 +6709,20 @@ static noinline int uncompress_inline(struct btrfs_path *path,
 	max_size = min_t(unsigned long, PAGE_SIZE, max_size);
 	ret = btrfs_decompress(compress_type, tmp, page,
 			       extent_offset, inline_size, max_size);
+
+	/*
+	 * decompression code contains a memset to fill in any space between the end
+	 * of the uncompressed data and the end of max_size in case the decompressed
+	 * data ends up shorter than ram_bytes.  That doesn't cover the hole between
+	 * the end of an inline extent and the beginning of the next block, so we
+	 * cover that region here.
+	 */
+
+	if (max_size + pg_offset < PAGE_SIZE) {
+		char *map = kmap(page);
+		memset(map + pg_offset + max_size, 0, PAGE_SIZE - max_size - pg_offset);
+		kunmap(page);
+	}
 	kfree(tmp);
 	return ret;
 }

From e7348396c6d51b57c95c6646c390cd078e038e19 Mon Sep 17 00:00:00 2001
From: Masaki Ota <masaki.ota@jp.alps.com>
Date: Fri, 17 Mar 2017 14:10:57 -0700
Subject: [PATCH 1457/1911] Input: ALPS - fix V8+ protocol handling (73 03 28)

Devices identified as E7="73 03 28" use slightly modified version of V8
protocol, with lower count per electrode, different offsets, and different
feature bits in OTP data.

Fixes: aeaa881f9b17 ("Input: ALPS - set DualPoint flag for 74 03 28 devices")
Signed-off-by: Masaki Ota <masaki.ota@jp.alps.com>
Acked-by: Pali Rohar <pali.rohar@gmail.com>
Tested-by: Paul Donohue <linux-kernel@PaulSD.com>
Tested-by: Nick Fletcher <nick.m.fletcher@gmail.com>
Cc: stable@vger.kernel.org
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/mouse/alps.c | 66 +++++++++++++++++++++++++++++---------
 drivers/input/mouse/alps.h | 11 +++++++
 2 files changed, 61 insertions(+), 16 deletions(-)

diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c
index 72b28ebfe36003..262d9b620fcf6e 100644
--- a/drivers/input/mouse/alps.c
+++ b/drivers/input/mouse/alps.c
@@ -2462,14 +2462,34 @@ static int alps_update_device_area_ss4_v2(unsigned char otp[][4],
 	int num_y_electrode;
 	int x_pitch, y_pitch, x_phys, y_phys;
 
-	num_x_electrode = SS4_NUMSENSOR_XOFFSET + (otp[1][0] & 0x0F);
-	num_y_electrode = SS4_NUMSENSOR_YOFFSET + ((otp[1][0] >> 4) & 0x0F);
+	if (IS_SS4PLUS_DEV(priv->dev_id)) {
+		num_x_electrode =
+			SS4PLUS_NUMSENSOR_XOFFSET + (otp[0][2] & 0x0F);
+		num_y_electrode =
+			SS4PLUS_NUMSENSOR_YOFFSET + ((otp[0][2] >> 4) & 0x0F);
 
-	priv->x_max = (num_x_electrode - 1) * SS4_COUNT_PER_ELECTRODE;
-	priv->y_max = (num_y_electrode - 1) * SS4_COUNT_PER_ELECTRODE;
+		priv->x_max =
+			(num_x_electrode - 1) * SS4PLUS_COUNT_PER_ELECTRODE;
+		priv->y_max =
+			(num_y_electrode - 1) * SS4PLUS_COUNT_PER_ELECTRODE;
 
-	x_pitch = ((otp[1][2] >> 2) & 0x07) + SS4_MIN_PITCH_MM;
-	y_pitch = ((otp[1][2] >> 5) & 0x07) + SS4_MIN_PITCH_MM;
+		x_pitch = (otp[0][1] & 0x0F) + SS4PLUS_MIN_PITCH_MM;
+		y_pitch = ((otp[0][1] >> 4) & 0x0F) + SS4PLUS_MIN_PITCH_MM;
+
+	} else {
+		num_x_electrode =
+			SS4_NUMSENSOR_XOFFSET + (otp[1][0] & 0x0F);
+		num_y_electrode =
+			SS4_NUMSENSOR_YOFFSET + ((otp[1][0] >> 4) & 0x0F);
+
+		priv->x_max =
+			(num_x_electrode - 1) * SS4_COUNT_PER_ELECTRODE;
+		priv->y_max =
+			(num_y_electrode - 1) * SS4_COUNT_PER_ELECTRODE;
+
+		x_pitch = ((otp[1][2] >> 2) & 0x07) + SS4_MIN_PITCH_MM;
+		y_pitch = ((otp[1][2] >> 5) & 0x07) + SS4_MIN_PITCH_MM;
+	}
 
 	x_phys = x_pitch * (num_x_electrode - 1); /* In 0.1 mm units */
 	y_phys = y_pitch * (num_y_electrode - 1); /* In 0.1 mm units */
@@ -2485,7 +2505,10 @@ static int alps_update_btn_info_ss4_v2(unsigned char otp[][4],
 {
 	unsigned char is_btnless;
 
-	is_btnless = (otp[1][1] >> 3) & 0x01;
+	if (IS_SS4PLUS_DEV(priv->dev_id))
+		is_btnless = (otp[1][0] >> 1) & 0x01;
+	else
+		is_btnless = (otp[1][1] >> 3) & 0x01;
 
 	if (is_btnless)
 		priv->flags |= ALPS_BUTTONPAD;
@@ -2493,6 +2516,21 @@ static int alps_update_btn_info_ss4_v2(unsigned char otp[][4],
 	return 0;
 }
 
+static int alps_update_dual_info_ss4_v2(unsigned char otp[][4],
+				       struct alps_data *priv)
+{
+	bool is_dual = false;
+
+	if (IS_SS4PLUS_DEV(priv->dev_id))
+		is_dual = (otp[0][0] >> 4) & 0x01;
+
+	if (is_dual)
+		priv->flags |= ALPS_DUALPOINT |
+					ALPS_DUALPOINT_WITH_PRESSURE;
+
+	return 0;
+}
+
 static int alps_set_defaults_ss4_v2(struct psmouse *psmouse,
 				    struct alps_data *priv)
 {
@@ -2508,6 +2546,8 @@ static int alps_set_defaults_ss4_v2(struct psmouse *psmouse,
 
 	alps_update_btn_info_ss4_v2(otp, priv);
 
+	alps_update_dual_info_ss4_v2(otp, priv);
+
 	return 0;
 }
 
@@ -2753,10 +2793,6 @@ static int alps_set_protocol(struct psmouse *psmouse,
 		if (alps_set_defaults_ss4_v2(psmouse, priv))
 			return -EIO;
 
-		if (priv->fw_ver[1] == 0x1)
-			priv->flags |= ALPS_DUALPOINT |
-					ALPS_DUALPOINT_WITH_PRESSURE;
-
 		break;
 	}
 
@@ -2827,10 +2863,7 @@ static int alps_identify(struct psmouse *psmouse, struct alps_data *priv)
 			   ec[2] >= 0x90 && ec[2] <= 0x9d) {
 			protocol = &alps_v3_protocol_data;
 		} else if (e7[0] == 0x73 && e7[1] == 0x03 &&
-			   e7[2] == 0x14 && ec[1] == 0x02) {
-			protocol = &alps_v8_protocol_data;
-		} else if (e7[0] == 0x73 && e7[1] == 0x03 &&
-			   e7[2] == 0x28 && ec[1] == 0x01) {
+			   (e7[2] == 0x14 || e7[2] == 0x28)) {
 			protocol = &alps_v8_protocol_data;
 		} else {
 			psmouse_dbg(psmouse,
@@ -2840,7 +2873,8 @@ static int alps_identify(struct psmouse *psmouse, struct alps_data *priv)
 	}
 
 	if (priv) {
-		/* Save the Firmware version */
+		/* Save Device ID and Firmware version */
+		memcpy(priv->dev_id, e7, 3);
 		memcpy(priv->fw_ver, ec, 3);
 		error = alps_set_protocol(psmouse, priv, protocol);
 		if (error)
diff --git a/drivers/input/mouse/alps.h b/drivers/input/mouse/alps.h
index 6d279aa27cb9a1..4334f2805d93c7 100644
--- a/drivers/input/mouse/alps.h
+++ b/drivers/input/mouse/alps.h
@@ -54,6 +54,16 @@ enum SS4_PACKET_ID {
 
 #define SS4_MASK_NORMAL_BUTTONS		0x07
 
+#define SS4PLUS_COUNT_PER_ELECTRODE	128
+#define SS4PLUS_NUMSENSOR_XOFFSET	16
+#define SS4PLUS_NUMSENSOR_YOFFSET	5
+#define SS4PLUS_MIN_PITCH_MM		37
+
+#define IS_SS4PLUS_DEV(_b)	(((_b[0]) == 0x73) &&	\
+				 ((_b[1]) == 0x03) &&	\
+				 ((_b[2]) == 0x28)		\
+				)
+
 #define SS4_IS_IDLE_V2(_b)	(((_b[0]) == 0x18) &&		\
 				 ((_b[1]) == 0x10) &&		\
 				 ((_b[2]) == 0x00) &&		\
@@ -283,6 +293,7 @@ struct alps_data {
 	int addr_command;
 	u16 proto_version;
 	u8 byte0, mask0;
+	u8 dev_id[3];
 	u8 fw_ver[3];
 	int flags;
 	int x_max;

From 47e6fb4212d09f325c0847d05985dd3d71553095 Mon Sep 17 00:00:00 2001
From: Masaki Ota <masaki.ota@jp.alps.com>
Date: Fri, 17 Mar 2017 14:19:40 -0700
Subject: [PATCH 1458/1911] Input: ALPS - fix trackstick button handling on V8
 devices

Alps stick devices always have physical buttons, so we should not check
ALPS_BUTTONPAD flag to decide whether we should report them.

Fixes: 4777ac220c43 ("Input: ALPS - add touchstick support for SS5 hardware")
Signed-off-by: Masaki Ota <masaki.ota@jp.alps.com>
Acked-by: Pali Rohar <pali.rohar@gmail.com>
Tested-by: Paul Donohue <linux-kernel@PaulSD.com>
Tested-by: Nick Fletcher <nick.m.fletcher@gmail.com>
Cc: stable@vger.kernel.org
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/mouse/alps.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c
index 262d9b620fcf6e..f210e19ddba66b 100644
--- a/drivers/input/mouse/alps.c
+++ b/drivers/input/mouse/alps.c
@@ -1282,10 +1282,8 @@ static int alps_decode_ss4_v2(struct alps_fields *f,
 	/* handle buttons */
 	if (pkt_id == SS4_PACKET_ID_STICK) {
 		f->ts_left = !!(SS4_BTN_V2(p) & 0x01);
-		if (!(priv->flags & ALPS_BUTTONPAD)) {
-			f->ts_right = !!(SS4_BTN_V2(p) & 0x02);
-			f->ts_middle = !!(SS4_BTN_V2(p) & 0x04);
-		}
+		f->ts_right = !!(SS4_BTN_V2(p) & 0x02);
+		f->ts_middle = !!(SS4_BTN_V2(p) & 0x04);
 	} else {
 		f->left = !!(SS4_BTN_V2(p) & 0x01);
 		if (!(priv->flags & ALPS_BUTTONPAD)) {

From 7de32556dfc62b9e1203730cc26b71292da8a244 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Sat, 18 Mar 2017 00:57:39 +0100
Subject: [PATCH 1459/1911] cpufreq: intel_pstate: One set of global limits in
 active mode

In the active mode intel_pstate currently uses two sets of global
limits, each associated with one of the possible scaling_governor
settings in that mode: "powersave" or "performance".

The driver switches over from one of those sets to the other
depending on the scaling_governor setting for the last CPU whose
per-policy cpufreq interface in sysfs was last used to change
parameters exposed in there.  That obviously leads to no end of
issues when the scaling_governor settings differ between CPUs.

The most recent issue was introduced by commit a240c4aa5d0f (cpufreq:
intel_pstate: Do not reinit performance limits in ->setpolicy)
that eliminated the reinitialization of "performance" limits in
intel_pstate_set_policy() preventing the max limit from being set
to anything below 100, among other things.

Namely, an undesirable side effect of commit a240c4aa5d0f is that
now, after setting scaling_governor to "performance" in the active
mode, the per-policy limits for the CPU in question go to the highest
level and stay there even when it is switched back to "powersave"
later.

As it turns out, some distributions set scaling_governor to
"performance" temporarily for all CPUs to speed-up system
initialization, so that change causes them to misbehave later.

To fix that, get rid of the performance/powersave global limits
split and use just one set of global limits for everything.

From the user's persepctive, after this modification, when
scaling_governor is switched from "performance" to "powersave"
or the other way around on one CPU, the limits settings (ie. the
global max/min_perf_pct and per-policy scaling_max/min_freq for
any CPUs) will not change.  Still, switching from "performance"
to "powersave" or the other way around changes the way in which
P-states are selected and in particular "performance" causes the
driver to always request the highest P-state it is allowed to ask
for for the given CPU.

Fixes: a240c4aa5d0f (cpufreq: intel_pstate: Do not reinit performance limits in ->setpolicy)
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 142 +++++++++++----------------------
 1 file changed, 46 insertions(+), 96 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 08e134ffba68e2..7b07803e704256 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -364,9 +364,7 @@ static bool driver_registered __read_mostly;
 static bool acpi_ppc;
 #endif
 
-static struct perf_limits performance_limits;
-static struct perf_limits powersave_limits;
-static struct perf_limits *limits;
+static struct perf_limits global;
 
 static void intel_pstate_init_limits(struct perf_limits *limits)
 {
@@ -377,14 +375,6 @@ static void intel_pstate_init_limits(struct perf_limits *limits)
 	limits->max_sysfs_pct = 100;
 }
 
-static void intel_pstate_set_performance_limits(struct perf_limits *limits)
-{
-	intel_pstate_init_limits(limits);
-	limits->min_perf_pct = 100;
-	limits->min_perf = int_ext_tofp(1);
-	limits->min_sysfs_pct = 100;
-}
-
 static DEFINE_MUTEX(intel_pstate_driver_lock);
 static DEFINE_MUTEX(intel_pstate_limits_lock);
 
@@ -507,7 +497,7 @@ static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy)
 	 * correct max turbo frequency based on the turbo state.
 	 * Also need to convert to MHz as _PSS freq is in MHz.
 	 */
-	if (!limits->turbo_disabled)
+	if (!global.turbo_disabled)
 		cpu->acpi_perf_data.states[0].core_frequency =
 					policy->cpuinfo.max_freq / 1000;
 	cpu->valid_pss_table = true;
@@ -626,7 +616,7 @@ static inline void update_turbo_state(void)
 
 	cpu = all_cpu_data[0];
 	rdmsrl(MSR_IA32_MISC_ENABLE, misc_en);
-	limits->turbo_disabled =
+	global.turbo_disabled =
 		(misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ||
 		 cpu->pstate.max_pstate == cpu->pstate.turbo_pstate);
 }
@@ -851,7 +841,7 @@ static struct freq_attr *hwp_cpufreq_attrs[] = {
 static void intel_pstate_hwp_set(struct cpufreq_policy *policy)
 {
 	int min, hw_min, max, hw_max, cpu;
-	struct perf_limits *perf_limits = limits;
+	struct perf_limits *perf_limits = &global;
 	u64 value, cap;
 
 	for_each_cpu(cpu, policy->cpus) {
@@ -863,19 +853,22 @@ static void intel_pstate_hwp_set(struct cpufreq_policy *policy)
 
 		rdmsrl_on_cpu(cpu, MSR_HWP_CAPABILITIES, &cap);
 		hw_min = HWP_LOWEST_PERF(cap);
-		if (limits->no_turbo)
+		if (global.no_turbo)
 			hw_max = HWP_GUARANTEED_PERF(cap);
 		else
 			hw_max = HWP_HIGHEST_PERF(cap);
 
-		min = fp_ext_toint(hw_max * perf_limits->min_perf);
+		max = fp_ext_toint(hw_max * perf_limits->max_perf);
+		if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE)
+			min = max;
+		else
+			min = fp_ext_toint(hw_max * perf_limits->min_perf);
 
 		rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value);
 
 		value &= ~HWP_MIN_PERF(~0L);
 		value |= HWP_MIN_PERF(min);
 
-		max = fp_ext_toint(hw_max * perf_limits->max_perf);
 		value &= ~HWP_MAX_PERF(~0L);
 		value |= HWP_MAX_PERF(max);
 
@@ -968,20 +961,11 @@ static int intel_pstate_resume(struct cpufreq_policy *policy)
 }
 
 static void intel_pstate_update_policies(void)
-	__releases(&intel_pstate_limits_lock)
-	__acquires(&intel_pstate_limits_lock)
 {
-	struct perf_limits *saved_limits = limits;
 	int cpu;
 
-	mutex_unlock(&intel_pstate_limits_lock);
-
 	for_each_possible_cpu(cpu)
 		cpufreq_update_policy(cpu);
-
-	mutex_lock(&intel_pstate_limits_lock);
-
-	limits = saved_limits;
 }
 
 /************************** debugfs begin ************************/
@@ -1060,7 +1044,7 @@ static void intel_pstate_debug_hide_params(void)
 	static ssize_t show_##file_name					\
 	(struct kobject *kobj, struct attribute *attr, char *buf)	\
 	{								\
-		return sprintf(buf, "%u\n", limits->object);		\
+		return sprintf(buf, "%u\n", global.object);		\
 	}
 
 static ssize_t intel_pstate_show_status(char *buf);
@@ -1151,10 +1135,10 @@ static ssize_t show_no_turbo(struct kobject *kobj,
 	}
 
 	update_turbo_state();
-	if (limits->turbo_disabled)
-		ret = sprintf(buf, "%u\n", limits->turbo_disabled);
+	if (global.turbo_disabled)
+		ret = sprintf(buf, "%u\n", global.turbo_disabled);
 	else
-		ret = sprintf(buf, "%u\n", limits->no_turbo);
+		ret = sprintf(buf, "%u\n", global.no_turbo);
 
 	mutex_unlock(&intel_pstate_driver_lock);
 
@@ -1181,19 +1165,19 @@ static ssize_t store_no_turbo(struct kobject *a, struct attribute *b,
 	mutex_lock(&intel_pstate_limits_lock);
 
 	update_turbo_state();
-	if (limits->turbo_disabled) {
+	if (global.turbo_disabled) {
 		pr_warn("Turbo disabled by BIOS or unavailable on processor\n");
 		mutex_unlock(&intel_pstate_limits_lock);
 		mutex_unlock(&intel_pstate_driver_lock);
 		return -EPERM;
 	}
 
-	limits->no_turbo = clamp_t(int, input, 0, 1);
-
-	intel_pstate_update_policies();
+	global.no_turbo = clamp_t(int, input, 0, 1);
 
 	mutex_unlock(&intel_pstate_limits_lock);
 
+	intel_pstate_update_policies();
+
 	mutex_unlock(&intel_pstate_driver_lock);
 
 	return count;
@@ -1218,19 +1202,16 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b,
 
 	mutex_lock(&intel_pstate_limits_lock);
 
-	limits->max_sysfs_pct = clamp_t(int, input, 0 , 100);
-	limits->max_perf_pct = min(limits->max_policy_pct,
-				   limits->max_sysfs_pct);
-	limits->max_perf_pct = max(limits->min_policy_pct,
-				   limits->max_perf_pct);
-	limits->max_perf_pct = max(limits->min_perf_pct,
-				   limits->max_perf_pct);
-	limits->max_perf = percent_ext_fp(limits->max_perf_pct);
-
-	intel_pstate_update_policies();
+	global.max_sysfs_pct = clamp_t(int, input, 0 , 100);
+	global.max_perf_pct = min(global.max_policy_pct, global.max_sysfs_pct);
+	global.max_perf_pct = max(global.min_policy_pct, global.max_perf_pct);
+	global.max_perf_pct = max(global.min_perf_pct, global.max_perf_pct);
+	global.max_perf = percent_ext_fp(global.max_perf_pct);
 
 	mutex_unlock(&intel_pstate_limits_lock);
 
+	intel_pstate_update_policies();
+
 	mutex_unlock(&intel_pstate_driver_lock);
 
 	return count;
@@ -1255,19 +1236,16 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,
 
 	mutex_lock(&intel_pstate_limits_lock);
 
-	limits->min_sysfs_pct = clamp_t(int, input, 0 , 100);
-	limits->min_perf_pct = max(limits->min_policy_pct,
-				   limits->min_sysfs_pct);
-	limits->min_perf_pct = min(limits->max_policy_pct,
-				   limits->min_perf_pct);
-	limits->min_perf_pct = min(limits->max_perf_pct,
-				   limits->min_perf_pct);
-	limits->min_perf = percent_ext_fp(limits->min_perf_pct);
-
-	intel_pstate_update_policies();
+	global.min_sysfs_pct = clamp_t(int, input, 0 , 100);
+	global.min_perf_pct = max(global.min_policy_pct, global.min_sysfs_pct);
+	global.min_perf_pct = min(global.max_policy_pct, global.min_perf_pct);
+	global.min_perf_pct = min(global.max_perf_pct, global.min_perf_pct);
+	global.min_perf = percent_ext_fp(global.min_perf_pct);
 
 	mutex_unlock(&intel_pstate_limits_lock);
 
+	intel_pstate_update_policies();
+
 	mutex_unlock(&intel_pstate_driver_lock);
 
 	return count;
@@ -1387,7 +1365,7 @@ static u64 atom_get_val(struct cpudata *cpudata, int pstate)
 	u32 vid;
 
 	val = (u64)pstate << 8;
-	if (limits->no_turbo && !limits->turbo_disabled)
+	if (global.no_turbo && !global.turbo_disabled)
 		val |= (u64)1 << 32;
 
 	vid_fp = cpudata->vid.min + mul_fp(
@@ -1557,7 +1535,7 @@ static u64 core_get_val(struct cpudata *cpudata, int pstate)
 	u64 val;
 
 	val = (u64)pstate << 8;
-	if (limits->no_turbo && !limits->turbo_disabled)
+	if (global.no_turbo && !global.turbo_disabled)
 		val |= (u64)1 << 32;
 
 	return val;
@@ -1683,9 +1661,9 @@ static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max)
 	int max_perf = cpu->pstate.turbo_pstate;
 	int max_perf_adj;
 	int min_perf;
-	struct perf_limits *perf_limits = limits;
+	struct perf_limits *perf_limits = &global;
 
-	if (limits->no_turbo || limits->turbo_disabled)
+	if (global.no_turbo || global.turbo_disabled)
 		max_perf = cpu->pstate.max_pstate;
 
 	if (per_cpu_limits)
@@ -1820,7 +1798,7 @@ static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu)
 
 	sample->busy_scaled = busy_frac * 100;
 
-	target = limits->no_turbo || limits->turbo_disabled ?
+	target = global.no_turbo || global.turbo_disabled ?
 			cpu->pstate.max_pstate : cpu->pstate.turbo_pstate;
 	target += target >> 2;
 	target = mul_fp(target, busy_frac);
@@ -2116,7 +2094,7 @@ static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy,
 static int intel_pstate_set_policy(struct cpufreq_policy *policy)
 {
 	struct cpudata *cpu;
-	struct perf_limits *perf_limits = NULL;
+	struct perf_limits *perf_limits = &global;
 
 	if (!policy->cpuinfo.max_freq)
 		return -ENODEV;
@@ -2139,21 +2117,6 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
 
 	mutex_lock(&intel_pstate_limits_lock);
 
-	if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) {
-		pr_debug("set performance\n");
-		if (!perf_limits) {
-			limits = &performance_limits;
-			perf_limits = limits;
-		}
-	} else {
-		pr_debug("set powersave\n");
-		if (!perf_limits) {
-			limits = &powersave_limits;
-			perf_limits = limits;
-		}
-
-	}
-
 	intel_pstate_update_perf_limits(policy, perf_limits);
 
 	if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) {
@@ -2177,16 +2140,9 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
 static int intel_pstate_verify_policy(struct cpufreq_policy *policy)
 {
 	struct cpudata *cpu = all_cpu_data[policy->cpu];
-	struct perf_limits *perf_limits;
-
-	if (policy->policy == CPUFREQ_POLICY_PERFORMANCE)
-		perf_limits = &performance_limits;
-	else
-		perf_limits = &powersave_limits;
 
 	update_turbo_state();
-	policy->cpuinfo.max_freq = perf_limits->turbo_disabled ||
-					perf_limits->no_turbo ?
+	policy->cpuinfo.max_freq = global.turbo_disabled || global.no_turbo ?
 					cpu->pstate.max_freq :
 					cpu->pstate.turbo_freq;
 
@@ -2201,9 +2157,9 @@ static int intel_pstate_verify_policy(struct cpufreq_policy *policy)
 		unsigned int max_freq, min_freq;
 
 		max_freq = policy->cpuinfo.max_freq *
-					perf_limits->max_sysfs_pct / 100;
+					global.max_sysfs_pct / 100;
 		min_freq = policy->cpuinfo.max_freq *
-					perf_limits->min_sysfs_pct / 100;
+					global.min_sysfs_pct / 100;
 		cpufreq_verify_within_limits(policy, min_freq, max_freq);
 	}
 
@@ -2255,7 +2211,7 @@ static int __intel_pstate_cpu_init(struct cpufreq_policy *policy)
 	/* cpuinfo and default policy values */
 	policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling;
 	update_turbo_state();
-	policy->cpuinfo.max_freq = limits->turbo_disabled ?
+	policy->cpuinfo.max_freq = global.turbo_disabled ?
 			cpu->pstate.max_pstate : cpu->pstate.turbo_pstate;
 	policy->cpuinfo.max_freq *= cpu->pstate.scaling;
 
@@ -2275,7 +2231,7 @@ static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
 		return ret;
 
 	policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
-	if (limits->min_perf_pct == 100 && limits->max_perf_pct == 100)
+	if (IS_ENABLED(CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE))
 		policy->policy = CPUFREQ_POLICY_PERFORMANCE;
 	else
 		policy->policy = CPUFREQ_POLICY_POWERSAVE;
@@ -2301,7 +2257,7 @@ static int intel_cpufreq_verify_policy(struct cpufreq_policy *policy)
 	struct cpudata *cpu = all_cpu_data[policy->cpu];
 
 	update_turbo_state();
-	policy->cpuinfo.max_freq = limits->turbo_disabled ?
+	policy->cpuinfo.max_freq = global.turbo_disabled ?
 			cpu->pstate.max_freq : cpu->pstate.turbo_freq;
 
 	cpufreq_verify_within_cpu_limits(policy);
@@ -2317,7 +2273,7 @@ static unsigned int intel_cpufreq_turbo_update(struct cpudata *cpu,
 
 	update_turbo_state();
 
-	max_freq = limits->no_turbo || limits->turbo_disabled ?
+	max_freq = global.no_turbo || global.turbo_disabled ?
 			cpu->pstate.max_freq : cpu->pstate.turbo_freq;
 	policy->cpuinfo.max_freq = max_freq;
 	if (policy->max > max_freq)
@@ -2425,13 +2381,7 @@ static int intel_pstate_register_driver(void)
 {
 	int ret;
 
-	intel_pstate_init_limits(&powersave_limits);
-	intel_pstate_set_performance_limits(&performance_limits);
-	if (IS_ENABLED(CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE) &&
-	    intel_pstate_driver == &intel_pstate)
-		limits = &performance_limits;
-	else
-		limits = &powersave_limits;
+	intel_pstate_init_limits(&global);
 
 	ret = cpufreq_register_driver(intel_pstate_driver);
 	if (ret) {

From 436ecf5519d892397af133a79ccd38a17c25fa51 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= <bjorn@mork.no>
Date: Fri, 17 Mar 2017 17:21:28 +0100
Subject: [PATCH 1460/1911] USB: serial: qcserial: add Dell DW5811e
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is a Dell branded Sierra Wireless EM7455.

Cc: <stable@vger.kernel.org>
Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/qcserial.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c
index 696458db7e3c45..38b3f0d8cd580f 100644
--- a/drivers/usb/serial/qcserial.c
+++ b/drivers/usb/serial/qcserial.c
@@ -169,6 +169,8 @@ static const struct usb_device_id id_table[] = {
 	{DEVICE_SWI(0x413c, 0x81a9)},	/* Dell Wireless 5808e Gobi(TM) 4G LTE Mobile Broadband Card */
 	{DEVICE_SWI(0x413c, 0x81b1)},	/* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card */
 	{DEVICE_SWI(0x413c, 0x81b3)},	/* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card (rev3) */
+	{DEVICE_SWI(0x413c, 0x81b5)},	/* Dell Wireless 5811e QDL */
+	{DEVICE_SWI(0x413c, 0x81b6)},	/* Dell Wireless 5811e QDL */
 
 	/* Huawei devices */
 	{DEVICE_HWI(0x03f0, 0x581d)},	/* HP lt4112 LTE/HSPA+ Gobi 4G Modem (Huawei me906e) */

From 74e3f6e63da6c8e8246fba1689e040bc926b4a1a Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Tue, 14 Mar 2017 15:24:51 +0530
Subject: [PATCH 1461/1911] parisc: perf: Fix potential NULL pointer
 dereference

Fix potential NULL pointer dereference and clean up
coding style errors (code indent, trailing whitespaces).

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/kernel/perf.c | 94 ++++++++++++++++++++-------------------
 1 file changed, 49 insertions(+), 45 deletions(-)

diff --git a/arch/parisc/kernel/perf.c b/arch/parisc/kernel/perf.c
index e282a5131d77e1..6017a5af2e6e2c 100644
--- a/arch/parisc/kernel/perf.c
+++ b/arch/parisc/kernel/perf.c
@@ -39,7 +39,7 @@
  *  the PDC INTRIGUE calls.  This is done to eliminate bugs introduced
  *  in various PDC revisions.  The code is much more maintainable
  *  and reliable this way vs having to debug on every version of PDC
- *  on every box. 
+ *  on every box.
  */
 
 #include <linux/capability.h>
@@ -195,8 +195,8 @@ static int perf_config(uint32_t *image_ptr);
 static int perf_release(struct inode *inode, struct file *file);
 static int perf_open(struct inode *inode, struct file *file);
 static ssize_t perf_read(struct file *file, char __user *buf, size_t cnt, loff_t *ppos);
-static ssize_t perf_write(struct file *file, const char __user *buf, size_t count, 
-	loff_t *ppos);
+static ssize_t perf_write(struct file *file, const char __user *buf,
+	size_t count, loff_t *ppos);
 static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
 static void perf_start_counters(void);
 static int perf_stop_counters(uint32_t *raddr);
@@ -222,7 +222,7 @@ extern void perf_intrigue_disable_perf_counters (void);
 /*
  * configure:
  *
- * Configure the cpu with a given data image.  First turn off the counters, 
+ * Configure the cpu with a given data image.  First turn off the counters,
  * then download the image, then turn the counters back on.
  */
 static int perf_config(uint32_t *image_ptr)
@@ -234,7 +234,7 @@ static int perf_config(uint32_t *image_ptr)
 	error = perf_stop_counters(raddr);
 	if (error != 0) {
 		printk("perf_config: perf_stop_counters = %ld\n", error);
-		return -EINVAL; 
+		return -EINVAL;
 	}
 
 printk("Preparing to write image\n");
@@ -242,7 +242,7 @@ printk("Preparing to write image\n");
 	error = perf_write_image((uint64_t *)image_ptr);
 	if (error != 0) {
 		printk("perf_config: DOWNLOAD = %ld\n", error);
-		return -EINVAL; 
+		return -EINVAL;
 	}
 
 printk("Preparing to start counters\n");
@@ -254,7 +254,7 @@ printk("Preparing to start counters\n");
 }
 
 /*
- * Open the device and initialize all of its memory.  The device is only 
+ * Open the device and initialize all of its memory.  The device is only
  * opened once, but can be "queried" by multiple processes that know its
  * file descriptor.
  */
@@ -298,19 +298,19 @@ static ssize_t perf_read(struct file *file, char __user *buf, size_t cnt, loff_t
  * called on the processor that the download should happen
  * on.
  */
-static ssize_t perf_write(struct file *file, const char __user *buf, size_t count, 
-	loff_t *ppos)
+static ssize_t perf_write(struct file *file, const char __user *buf,
+	size_t count, loff_t *ppos)
 {
 	size_t image_size;
 	uint32_t image_type;
 	uint32_t interface_type;
 	uint32_t test;
 
-	if (perf_processor_interface == ONYX_INTF) 
+	if (perf_processor_interface == ONYX_INTF)
 		image_size = PCXU_IMAGE_SIZE;
-	else if (perf_processor_interface == CUDA_INTF) 
+	else if (perf_processor_interface == CUDA_INTF)
 		image_size = PCXW_IMAGE_SIZE;
-	else 
+	else
 		return -EFAULT;
 
 	if (!capable(CAP_SYS_ADMIN))
@@ -330,22 +330,22 @@ static ssize_t perf_write(struct file *file, const char __user *buf, size_t coun
 
 	/* First check the machine type is correct for
 	   the requested image */
-        if (((perf_processor_interface == CUDA_INTF) &&
-		       (interface_type != CUDA_INTF)) ||
-	    ((perf_processor_interface == ONYX_INTF) &&
-	               (interface_type != ONYX_INTF))) 
+	if (((perf_processor_interface == CUDA_INTF) &&
+			(interface_type != CUDA_INTF)) ||
+		((perf_processor_interface == ONYX_INTF) &&
+			(interface_type != ONYX_INTF)))
 		return -EINVAL;
 
 	/* Next check to make sure the requested image
 	   is valid */
-	if (((interface_type == CUDA_INTF) && 
+	if (((interface_type == CUDA_INTF) &&
 		       (test >= MAX_CUDA_IMAGES)) ||
-	    ((interface_type == ONYX_INTF) && 
-		       (test >= MAX_ONYX_IMAGES))) 
+	    ((interface_type == ONYX_INTF) &&
+		       (test >= MAX_ONYX_IMAGES)))
 		return -EINVAL;
 
 	/* Copy the image into the processor */
-	if (interface_type == CUDA_INTF) 
+	if (interface_type == CUDA_INTF)
 		return perf_config(cuda_images[test]);
 	else
 		return perf_config(onyx_images[test]);
@@ -359,7 +359,7 @@ static ssize_t perf_write(struct file *file, const char __user *buf, size_t coun
 static void perf_patch_images(void)
 {
 #if 0 /* FIXME!! */
-/* 
+/*
  * NOTE:  this routine is VERY specific to the current TLB image.
  * If the image is changed, this routine might also need to be changed.
  */
@@ -367,9 +367,9 @@ static void perf_patch_images(void)
 	extern void $i_dtlb_miss_2_0();
 	extern void PA2_0_iva();
 
-	/* 
+	/*
 	 * We can only use the lower 32-bits, the upper 32-bits should be 0
-	 * anyway given this is in the kernel 
+	 * anyway given this is in the kernel
 	 */
 	uint32_t itlb_addr  = (uint32_t)&($i_itlb_miss_2_0);
 	uint32_t dtlb_addr  = (uint32_t)&($i_dtlb_miss_2_0);
@@ -377,21 +377,21 @@ static void perf_patch_images(void)
 
 	if (perf_processor_interface == ONYX_INTF) {
 		/* clear last 2 bytes */
-		onyx_images[TLBMISS][15] &= 0xffffff00;  
+		onyx_images[TLBMISS][15] &= 0xffffff00;
 		/* set 2 bytes */
 		onyx_images[TLBMISS][15] |= (0x000000ff&((dtlb_addr) >> 24));
 		onyx_images[TLBMISS][16] = (dtlb_addr << 8)&0xffffff00;
 		onyx_images[TLBMISS][17] = itlb_addr;
 
 		/* clear last 2 bytes */
-		onyx_images[TLBHANDMISS][15] &= 0xffffff00;  
+		onyx_images[TLBHANDMISS][15] &= 0xffffff00;
 		/* set 2 bytes */
 		onyx_images[TLBHANDMISS][15] |= (0x000000ff&((dtlb_addr) >> 24));
 		onyx_images[TLBHANDMISS][16] = (dtlb_addr << 8)&0xffffff00;
 		onyx_images[TLBHANDMISS][17] = itlb_addr;
 
 		/* clear last 2 bytes */
-		onyx_images[BIG_CPI][15] &= 0xffffff00;  
+		onyx_images[BIG_CPI][15] &= 0xffffff00;
 		/* set 2 bytes */
 		onyx_images[BIG_CPI][15] |= (0x000000ff&((dtlb_addr) >> 24));
 		onyx_images[BIG_CPI][16] = (dtlb_addr << 8)&0xffffff00;
@@ -404,24 +404,24 @@ static void perf_patch_images(void)
 
 	} else if (perf_processor_interface == CUDA_INTF) {
 		/* Cuda interface */
-		cuda_images[TLBMISS][16] =  
+		cuda_images[TLBMISS][16] =
 			(cuda_images[TLBMISS][16]&0xffff0000) |
 			((dtlb_addr >> 8)&0x0000ffff);
-		cuda_images[TLBMISS][17] = 
+		cuda_images[TLBMISS][17] =
 			((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff);
 		cuda_images[TLBMISS][18] = (itlb_addr << 16)&0xffff0000;
 
-		cuda_images[TLBHANDMISS][16] = 
+		cuda_images[TLBHANDMISS][16] =
 			(cuda_images[TLBHANDMISS][16]&0xffff0000) |
 			((dtlb_addr >> 8)&0x0000ffff);
-		cuda_images[TLBHANDMISS][17] = 
+		cuda_images[TLBHANDMISS][17] =
 			((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff);
 		cuda_images[TLBHANDMISS][18] = (itlb_addr << 16)&0xffff0000;
 
-		cuda_images[BIG_CPI][16] = 
+		cuda_images[BIG_CPI][16] =
 			(cuda_images[BIG_CPI][16]&0xffff0000) |
 			((dtlb_addr >> 8)&0x0000ffff);
-		cuda_images[BIG_CPI][17] = 
+		cuda_images[BIG_CPI][17] =
 			((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff);
 		cuda_images[BIG_CPI][18] = (itlb_addr << 16)&0xffff0000;
 	} else {
@@ -433,7 +433,7 @@ static void perf_patch_images(void)
 
 /*
  * ioctl routine
- * All routines effect the processor that they are executed on.  Thus you 
+ * All routines effect the processor that they are executed on.  Thus you
  * must be running on the processor that you wish to change.
  */
 
@@ -459,7 +459,7 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 			}
 
 			/* copy out the Counters */
-			if (copy_to_user((void __user *)arg, raddr, 
+			if (copy_to_user((void __user *)arg, raddr,
 					sizeof (raddr)) != 0) {
 				error =  -EFAULT;
 				break;
@@ -487,7 +487,7 @@ static const struct file_operations perf_fops = {
 	.open = perf_open,
 	.release = perf_release
 };
-	
+
 static struct miscdevice perf_dev = {
 	MISC_DYNAMIC_MINOR,
 	PA_PERF_DEV,
@@ -595,7 +595,7 @@ static int perf_stop_counters(uint32_t *raddr)
 		/* OR sticky2 (bit 1496) to counter2 bit 32 */
 		tmp64 |= (userbuf[23] >> 8) & 0x0000000080000000;
 		raddr[2] = (uint32_t)tmp64;
-		
+
 		/* Counter3 is bits 1497 to 1528 */
 		tmp64 =  (userbuf[23] >> 7) & 0x00000000ffffffff;
 		/* OR sticky3 (bit 1529) to counter3 bit 32 */
@@ -617,7 +617,7 @@ static int perf_stop_counters(uint32_t *raddr)
 		userbuf[22] = 0;
 		userbuf[23] = 0;
 
-		/* 
+		/*
 		 * Write back the zeroed bytes + the image given
 		 * the read was destructive.
 		 */
@@ -625,13 +625,13 @@ static int perf_stop_counters(uint32_t *raddr)
 	} else {
 
 		/*
-		 * Read RDR-15 which contains the counters and sticky bits 
+		 * Read RDR-15 which contains the counters and sticky bits
 		 */
 		if (!perf_rdr_read_ubuf(15, userbuf)) {
 			return -13;
 		}
 
-		/* 
+		/*
 		 * Clear out the counters
 		 */
 		perf_rdr_clear(15);
@@ -644,7 +644,7 @@ static int perf_stop_counters(uint32_t *raddr)
 		raddr[2] = (uint32_t)((userbuf[1] >> 32) & 0x00000000ffffffffUL);
 		raddr[3] = (uint32_t)(userbuf[1] & 0x00000000ffffffffUL);
 	}
- 
+
 	return 0;
 }
 
@@ -682,7 +682,7 @@ static int perf_rdr_read_ubuf(uint32_t	rdr_num, uint64_t *buffer)
 	i = tentry->num_words;
 	while (i--) {
 		buffer[i] = 0;
-	}	
+	}
 
 	/* Check for bits an even number of 64 */
 	if ((xbits = width & 0x03f) != 0) {
@@ -808,18 +808,22 @@ static int perf_write_image(uint64_t *memaddr)
 	}
 
 	runway = ioremap_nocache(cpu_device->hpa.start, 4096);
+	if (!runway) {
+		pr_err("perf_write_image: ioremap failed!\n");
+		return -ENOMEM;
+	}
 
 	/* Merge intrigue bits into Runway STATUS 0 */
 	tmp64 = __raw_readq(runway + RUNWAY_STATUS) & 0xffecfffffffffffful;
-	__raw_writeq(tmp64 | (*memaddr++ & 0x0013000000000000ul), 
+	__raw_writeq(tmp64 | (*memaddr++ & 0x0013000000000000ul),
 		     runway + RUNWAY_STATUS);
-	
+
 	/* Write RUNWAY DEBUG registers */
 	for (i = 0; i < 8; i++) {
 		__raw_writeq(*memaddr++, runway + RUNWAY_DEBUG);
 	}
 
-	return 0; 
+	return 0;
 }
 
 /*
@@ -843,7 +847,7 @@ printk("perf_rdr_write\n");
 			perf_rdr_shift_out_U(rdr_num, buffer[i]);
 		} else {
 			perf_rdr_shift_out_W(rdr_num, buffer[i]);
-		}	
+		}
 	}
 printk("perf_rdr_write done\n");
 }

From 73580dac7618e4bcd21679f553cf3c97323fec46 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Sat, 18 Mar 2017 17:13:27 +0100
Subject: [PATCH 1462/1911] parisc: Fix system shutdown halt

On those parisc machines which don't provide a software power off
function, the system currently kills the init process at the end of a
shutdown and unexpectedly restarts insteads of halting.
Fix it by adding a loop which will not return.

Signed-off-by: Helge Deller <deller@gmx.de>
Cc: stable@vger.kernel.org # 4.9+
---
 arch/parisc/kernel/process.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index 06f7ca7fe70b61..b76f503eee4a83 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -142,6 +142,8 @@ void machine_power_off(void)
 
 	printk(KERN_EMERG "System shut down completed.\n"
 	       "Please power this system off now.");
+
+	for (;;);
 }
 
 void (*pm_power_off)(void) = machine_power_off;

From 9c28ca4ff8bad7486182291a55b4f67a70af718d Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Wed, 8 Mar 2017 00:09:59 -0800
Subject: [PATCH 1463/1911] target: Drop pointless tfo->check_stop_free check

All in-tree fabric drivers provide a tfo->check_stop_free(),
so there is no need to do the extra check within existing
transport_cmd_check_stop_to_fabric() code.

Just to be sure, add a check in target_fabric_tf_ops_check()
to notify any out-of-tree drivers that might be missing it.

Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_configfs.c  | 4 ++++
 drivers/target/target_core_transport.c | 3 +--
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c
index 54b36c9835be3a..38b5025e4c7a87 100644
--- a/drivers/target/target_core_configfs.c
+++ b/drivers/target/target_core_configfs.c
@@ -421,6 +421,10 @@ static int target_fabric_tf_ops_check(const struct target_core_fabric_ops *tfo)
 		pr_err("Missing tfo->aborted_task()\n");
 		return -EINVAL;
 	}
+	if (!tfo->check_stop_free) {
+		pr_err("Missing tfo->check_stop_free()\n");
+		return -EINVAL;
+	}
 	/*
 	 * We at least require tfo->fabric_make_wwn(), tfo->fabric_drop_wwn()
 	 * tfo->fabric_make_tpg() and tfo->fabric_drop_tpg() in
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 434d9d69398917..b1a3cdb29468cf 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -636,8 +636,7 @@ static int transport_cmd_check_stop_to_fabric(struct se_cmd *cmd)
 	 * Fabric modules are expected to return '1' here if the se_cmd being
 	 * passed is released at this point, or zero if not being released.
 	 */
-	return cmd->se_tfo->check_stop_free ? cmd->se_tfo->check_stop_free(cmd)
-		: 0;
+	return cmd->se_tfo->check_stop_free(cmd);
 }
 
 static void transport_lun_remove_cmd(struct se_cmd *cmd)

From 3abaa2bfdb1e6bb33d38a2e82cf3bb82ec0197bf Mon Sep 17 00:00:00 2001
From: Mike Christie <mchristi@redhat.com>
Date: Wed, 1 Mar 2017 23:14:39 -0600
Subject: [PATCH 1464/1911] tcmu: allow hw_max_sectors greater than 128

tcmu hard codes the hw_max_sectors to 128 which is a litle small.
Userspace uses the max_sectors to report the optimal IO size and
some initiators perform better with larger IOs (open-iscsi seems
to do better with 256 to 512 depending on the test).

(Fix do not display hw max sectors twice - MNC)

Signed-off-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_user.c | 54 ++++++++++++++++++++-----------
 1 file changed, 35 insertions(+), 19 deletions(-)

diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index c3adefe95e50f7..24e8580f07b8ec 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -960,7 +960,8 @@ static int tcmu_configure_device(struct se_device *dev)
 	if (dev->dev_attrib.hw_block_size == 0)
 		dev->dev_attrib.hw_block_size = 512;
 	/* Other attributes can be configured in userspace */
-	dev->dev_attrib.hw_max_sectors = 128;
+	if (!dev->dev_attrib.hw_max_sectors)
+		dev->dev_attrib.hw_max_sectors = 128;
 	dev->dev_attrib.hw_queue_depth = 128;
 
 	ret = tcmu_netlink_event(TCMU_CMD_ADDED_DEVICE, udev->uio_info.name,
@@ -1031,16 +1032,42 @@ static void tcmu_free_device(struct se_device *dev)
 }
 
 enum {
-	Opt_dev_config, Opt_dev_size, Opt_hw_block_size, Opt_err,
+	Opt_dev_config, Opt_dev_size, Opt_hw_block_size, Opt_hw_max_sectors,
+	Opt_err,
 };
 
 static match_table_t tokens = {
 	{Opt_dev_config, "dev_config=%s"},
 	{Opt_dev_size, "dev_size=%u"},
 	{Opt_hw_block_size, "hw_block_size=%u"},
+	{Opt_hw_max_sectors, "hw_max_sectors=%u"},
 	{Opt_err, NULL}
 };
 
+static int tcmu_set_dev_attrib(substring_t *arg, u32 *dev_attrib)
+{
+	unsigned long tmp_ul;
+	char *arg_p;
+	int ret;
+
+	arg_p = match_strdup(arg);
+	if (!arg_p)
+		return -ENOMEM;
+
+	ret = kstrtoul(arg_p, 0, &tmp_ul);
+	kfree(arg_p);
+	if (ret < 0) {
+		pr_err("kstrtoul() failed for dev attrib\n");
+		return ret;
+	}
+	if (!tmp_ul) {
+		pr_err("dev attrib must be nonzero\n");
+		return -EINVAL;
+	}
+	*dev_attrib = tmp_ul;
+	return 0;
+}
+
 static ssize_t tcmu_set_configfs_dev_params(struct se_device *dev,
 		const char *page, ssize_t count)
 {
@@ -1048,7 +1075,6 @@ static ssize_t tcmu_set_configfs_dev_params(struct se_device *dev,
 	char *orig, *ptr, *opts, *arg_p;
 	substring_t args[MAX_OPT_ARGS];
 	int ret = 0, token;
-	unsigned long tmp_ul;
 
 	opts = kstrdup(page, GFP_KERNEL);
 	if (!opts)
@@ -1082,22 +1108,12 @@ static ssize_t tcmu_set_configfs_dev_params(struct se_device *dev,
 				pr_err("kstrtoul() failed for dev_size=\n");
 			break;
 		case Opt_hw_block_size:
-			arg_p = match_strdup(&args[0]);
-			if (!arg_p) {
-				ret = -ENOMEM;
-				break;
-			}
-			ret = kstrtoul(arg_p, 0, &tmp_ul);
-			kfree(arg_p);
-			if (ret < 0) {
-				pr_err("kstrtoul() failed for hw_block_size=\n");
-				break;
-			}
-			if (!tmp_ul) {
-				pr_err("hw_block_size must be nonzero\n");
-				break;
-			}
-			dev->dev_attrib.hw_block_size = tmp_ul;
+			ret = tcmu_set_dev_attrib(&args[0],
+					&(dev->dev_attrib.hw_block_size));
+			break;
+		case Opt_hw_max_sectors:
+			ret = tcmu_set_dev_attrib(&args[0],
+					&(dev->dev_attrib.hw_max_sectors));
 			break;
 		default:
 			break;

From 2579325ca0acc598fdf41ba12b2871d3467f28df Mon Sep 17 00:00:00 2001
From: Mike Christie <mchristi@redhat.com>
Date: Wed, 1 Mar 2017 23:14:40 -0600
Subject: [PATCH 1465/1911] tcmu: return on first Opt parse failure

We only were returing failure if the last opt to be parsed failed.
This has a return failure when we first detect a failure.

Signed-off-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_user.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index 24e8580f07b8ec..4339ab2133b380 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -1118,6 +1118,9 @@ static ssize_t tcmu_set_configfs_dev_params(struct se_device *dev,
 		default:
 			break;
 		}
+
+		if (ret)
+			break;
 	}
 
 	kfree(orig);

From 530c6891b1220cba780b6c18f4691d85a3435080 Mon Sep 17 00:00:00 2001
From: Mike Christie <mchristi@redhat.com>
Date: Wed, 1 Mar 2017 23:13:24 -0600
Subject: [PATCH 1466/1911] target: allow ALUA setup for some passthrough
 backends

This patch allows passthrough backends to use the core/base LIO
ALUA setup and state checks, but still handle the execution of
commands.

This will allow the target_core_user module to execute STPG and RTPG
in userspace, and not have to duplicate the ALUA state checks, path
information (needed so we can check if command is executable on
specific paths) and setup (rtslib sets/updates the configfs ALUA
interface like it does for iblock or file).

For STPG, the target_core_user userspace daemon, tcmu-runner will
still execute the STPG, and to update the core/base LIO state it
will use the existing configfs interface. For RTPG, tcmu-runner
will loop over configfs and/or cache the state.

Signed-off-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_alua.c    | 9 +++++----
 drivers/target/target_core_pscsi.c   | 3 ++-
 drivers/target/target_core_tpg.c     | 3 ++-
 include/target/target_core_backend.h | 7 ++++++-
 4 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c
index f5e330099bfca7..a41bbb8087cf24 100644
--- a/drivers/target/target_core_alua.c
+++ b/drivers/target/target_core_alua.c
@@ -691,7 +691,7 @@ target_alua_state_check(struct se_cmd *cmd)
 
 	if (dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE)
 		return 0;
-	if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH)
+	if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_ALUA)
 		return 0;
 
 	/*
@@ -1973,7 +1973,7 @@ ssize_t core_alua_store_tg_pt_gp_info(
 	unsigned char buf[TG_PT_GROUP_NAME_BUF];
 	int move = 0;
 
-	if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH ||
+	if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_ALUA ||
 	    (dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE))
 		return -ENODEV;
 
@@ -2230,7 +2230,7 @@ ssize_t core_alua_store_offline_bit(
 	unsigned long tmp;
 	int ret;
 
-	if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH ||
+	if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_ALUA ||
 	    (dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE))
 		return -ENODEV;
 
@@ -2316,7 +2316,8 @@ ssize_t core_alua_store_secondary_write_metadata(
 
 int core_setup_alua(struct se_device *dev)
 {
-	if (!(dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) &&
+	if (!(dev->transport->transport_flags &
+	     TRANSPORT_FLAG_PASSTHROUGH_ALUA) &&
 	    !(dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE)) {
 		struct t10_alua_lu_gp_member *lu_gp_mem;
 
diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c
index 44d92f23a3f054..94cda7991e80ab 100644
--- a/drivers/target/target_core_pscsi.c
+++ b/drivers/target/target_core_pscsi.c
@@ -1080,7 +1080,8 @@ static void pscsi_req_done(struct request *req, int uptodate)
 static const struct target_backend_ops pscsi_ops = {
 	.name			= "pscsi",
 	.owner			= THIS_MODULE,
-	.transport_flags	= TRANSPORT_FLAG_PASSTHROUGH,
+	.transport_flags	= TRANSPORT_FLAG_PASSTHROUGH |
+				  TRANSPORT_FLAG_PASSTHROUGH_ALUA,
 	.attach_hba		= pscsi_attach_hba,
 	.detach_hba		= pscsi_detach_hba,
 	.pmode_enable_hba	= pscsi_pmode_enable_hba,
diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c
index c0dbfa01657505..6fb191914f458f 100644
--- a/drivers/target/target_core_tpg.c
+++ b/drivers/target/target_core_tpg.c
@@ -602,7 +602,8 @@ int core_tpg_add_lun(
 	if (ret)
 		goto out_kill_ref;
 
-	if (!(dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) &&
+	if (!(dev->transport->transport_flags &
+	     TRANSPORT_FLAG_PASSTHROUGH_ALUA) &&
 	    !(dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE))
 		target_attach_tg_pt_gp(lun, dev->t10_alua.default_tg_pt_gp);
 
diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h
index b54b98dc2d4a77..1b0f447ce850f0 100644
--- a/include/target/target_core_backend.h
+++ b/include/target/target_core_backend.h
@@ -4,7 +4,12 @@
 #include <linux/types.h>
 #include <target/target_core_base.h>
 
-#define TRANSPORT_FLAG_PASSTHROUGH		1
+#define TRANSPORT_FLAG_PASSTHROUGH		0x1
+/*
+ * ALUA commands, state checks and setup operations are handled by the
+ * backend module.
+ */
+#define TRANSPORT_FLAG_PASSTHROUGH_ALUA		0x2
 
 struct request_queue;
 struct scatterlist;

From 0a4145729871ef29afe8b0c57560a1f5bd736416 Mon Sep 17 00:00:00 2001
From: Mike Christie <mchristi@redhat.com>
Date: Wed, 1 Mar 2017 23:13:25 -0600
Subject: [PATCH 1467/1911] target: fail ALUA transitions for pscsi

We do not setup the LU group for pscsi devices, so if you write
a state to alua_access_state that will cause a transition you will
get a NULL pointer dereference.

This patch will fail attempts to try and transition the path
for backend devices that set the TRANSPORT_FLAG_PASSTHROUGH_ALUA
flag.

Signed-off-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_alua.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c
index a41bbb8087cf24..5b5a1e250a6594 100644
--- a/drivers/target/target_core_alua.c
+++ b/drivers/target/target_core_alua.c
@@ -1149,6 +1149,9 @@ int core_alua_do_port_transition(
 	struct t10_alua_tg_pt_gp *tg_pt_gp;
 	int primary, valid_states, rc = 0;
 
+	if (l_dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_ALUA)
+		return -ENODEV;
+
 	valid_states = l_tg_pt_gp->tg_pt_gp_alua_supported_states;
 	if (core_alua_check_transition(new_state, valid_states, &primary) != 0)
 		return -EINVAL;

From 207ee84133c00a8a2a5bdec94df4a5b37d78881c Mon Sep 17 00:00:00 2001
From: Mike Christie <mchristi@redhat.com>
Date: Wed, 1 Mar 2017 23:13:26 -0600
Subject: [PATCH 1468/1911] target: Use system workqueue for ALUA transitions

If tcmu-runner is processing a STPG and needs to change the kernel's
ALUA state then we cannot use the same work queue for task management
requests and ALUA transitions, because we could deadlock. The problem
occurs when a STPG times out before tcmu-runner is able to
call into target_tg_pt_gp_alua_access_state_store->
core_alua_do_port_transition -> core_alua_do_transition_tg_pt ->
queue_work. In this case, the tmr is on the work queue waiting for
the STPG to complete, but the STPG transition is now queued behind
the waiting tmr.

Note:
This bug will also be fixed by this patch:
http://www.spinics.net/lists/target-devel/msg14560.html
which switches the tmr code to use the system workqueues.

For both, I am not sure if we need a dedicated workqueue since
it is not a performance path and I do not think we need WQ_MEM_RECLAIM
to make forward progress to free up memory like the block layer does.

Signed-off-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_alua.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c
index 5b5a1e250a6594..58bf5e6350ac5c 100644
--- a/drivers/target/target_core_alua.c
+++ b/drivers/target/target_core_alua.c
@@ -1121,13 +1121,11 @@ static int core_alua_do_transition_tg_pt(
 		unsigned long transition_tmo;
 
 		transition_tmo = tg_pt_gp->tg_pt_gp_implicit_trans_secs * HZ;
-		queue_delayed_work(tg_pt_gp->tg_pt_gp_dev->tmr_wq,
-				   &tg_pt_gp->tg_pt_gp_transition_work,
-				   transition_tmo);
+		schedule_delayed_work(&tg_pt_gp->tg_pt_gp_transition_work,
+				      transition_tmo);
 	} else {
 		tg_pt_gp->tg_pt_gp_transition_complete = &wait;
-		queue_delayed_work(tg_pt_gp->tg_pt_gp_dev->tmr_wq,
-				   &tg_pt_gp->tg_pt_gp_transition_work, 0);
+		schedule_delayed_work(&tg_pt_gp->tg_pt_gp_transition_work, 0);
 		wait_for_completion(&wait);
 		tg_pt_gp->tg_pt_gp_transition_complete = NULL;
 	}

From d7175373f2745ed4abe5b388d5aabd06304f801e Mon Sep 17 00:00:00 2001
From: Mike Christie <mchristi@redhat.com>
Date: Thu, 2 Mar 2017 04:59:48 -0600
Subject: [PATCH 1469/1911] target: fix ALUA transition timeout handling

The implicit transition time tells initiators the min time
to wait before timing out a transition. We currently schedule
the transition to occur in tg_pt_gp_implicit_trans_secs
seconds so there is no room for delays. If
core_alua_do_transition_tg_pt_work->core_alua_update_tpg_primary_metadata
needs to write out info to a remote file, then the initiator can
easily time out the operation.

Signed-off-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_alua.c | 23 ++++++++---------------
 include/target/target_core_base.h |  2 +-
 2 files changed, 9 insertions(+), 16 deletions(-)

diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c
index 58bf5e6350ac5c..594807cd92cbdb 100644
--- a/drivers/target/target_core_alua.c
+++ b/drivers/target/target_core_alua.c
@@ -1013,7 +1013,7 @@ static void core_alua_queue_state_change_ua(struct t10_alua_tg_pt_gp *tg_pt_gp)
 static void core_alua_do_transition_tg_pt_work(struct work_struct *work)
 {
 	struct t10_alua_tg_pt_gp *tg_pt_gp = container_of(work,
-		struct t10_alua_tg_pt_gp, tg_pt_gp_transition_work.work);
+		struct t10_alua_tg_pt_gp, tg_pt_gp_transition_work);
 	struct se_device *dev = tg_pt_gp->tg_pt_gp_dev;
 	bool explicit = (tg_pt_gp->tg_pt_gp_alua_access_status ==
 			 ALUA_STATUS_ALTERED_BY_EXPLICIT_STPG);
@@ -1076,13 +1076,12 @@ static int core_alua_do_transition_tg_pt(
 	/*
 	 * Flush any pending transitions
 	 */
-	if (!explicit && tg_pt_gp->tg_pt_gp_implicit_trans_secs &&
-	    atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state) ==
+	if (!explicit && atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state) ==
 	    ALUA_ACCESS_STATE_TRANSITION) {
 		/* Just in case */
 		tg_pt_gp->tg_pt_gp_alua_pending_state = new_state;
 		tg_pt_gp->tg_pt_gp_transition_complete = &wait;
-		flush_delayed_work(&tg_pt_gp->tg_pt_gp_transition_work);
+		flush_work(&tg_pt_gp->tg_pt_gp_transition_work);
 		wait_for_completion(&wait);
 		tg_pt_gp->tg_pt_gp_transition_complete = NULL;
 		return 0;
@@ -1117,15 +1116,9 @@ static int core_alua_do_transition_tg_pt(
 	atomic_inc(&tg_pt_gp->tg_pt_gp_ref_cnt);
 	spin_unlock(&dev->t10_alua.tg_pt_gps_lock);
 
-	if (!explicit && tg_pt_gp->tg_pt_gp_implicit_trans_secs) {
-		unsigned long transition_tmo;
-
-		transition_tmo = tg_pt_gp->tg_pt_gp_implicit_trans_secs * HZ;
-		schedule_delayed_work(&tg_pt_gp->tg_pt_gp_transition_work,
-				      transition_tmo);
-	} else {
+	schedule_work(&tg_pt_gp->tg_pt_gp_transition_work);
+	if (explicit) {
 		tg_pt_gp->tg_pt_gp_transition_complete = &wait;
-		schedule_delayed_work(&tg_pt_gp->tg_pt_gp_transition_work, 0);
 		wait_for_completion(&wait);
 		tg_pt_gp->tg_pt_gp_transition_complete = NULL;
 	}
@@ -1696,8 +1689,8 @@ struct t10_alua_tg_pt_gp *core_alua_allocate_tg_pt_gp(struct se_device *dev,
 	mutex_init(&tg_pt_gp->tg_pt_gp_md_mutex);
 	spin_lock_init(&tg_pt_gp->tg_pt_gp_lock);
 	atomic_set(&tg_pt_gp->tg_pt_gp_ref_cnt, 0);
-	INIT_DELAYED_WORK(&tg_pt_gp->tg_pt_gp_transition_work,
-			  core_alua_do_transition_tg_pt_work);
+	INIT_WORK(&tg_pt_gp->tg_pt_gp_transition_work,
+		  core_alua_do_transition_tg_pt_work);
 	tg_pt_gp->tg_pt_gp_dev = dev;
 	atomic_set(&tg_pt_gp->tg_pt_gp_alua_access_state,
 		ALUA_ACCESS_STATE_ACTIVE_OPTIMIZED);
@@ -1805,7 +1798,7 @@ void core_alua_free_tg_pt_gp(
 	dev->t10_alua.alua_tg_pt_gps_counter--;
 	spin_unlock(&dev->t10_alua.tg_pt_gps_lock);
 
-	flush_delayed_work(&tg_pt_gp->tg_pt_gp_transition_work);
+	flush_work(&tg_pt_gp->tg_pt_gp_transition_work);
 
 	/*
 	 * Allow a struct t10_alua_tg_pt_gp_member * referenced by
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index 37c274e61accee..4b784b6e21c0d9 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -299,7 +299,7 @@ struct t10_alua_tg_pt_gp {
 	struct list_head tg_pt_gp_lun_list;
 	struct se_lun *tg_pt_gp_alua_lun;
 	struct se_node_acl *tg_pt_gp_alua_nacl;
-	struct delayed_work tg_pt_gp_transition_work;
+	struct work_struct tg_pt_gp_transition_work;
 	struct completion *tg_pt_gp_transition_complete;
 };
 

From 1ca4d4fa3bfcbe8964f81e5818a9b90436466eb0 Mon Sep 17 00:00:00 2001
From: Mike Christie <mchristi@redhat.com>
Date: Thu, 2 Mar 2017 04:59:49 -0600
Subject: [PATCH 1470/1911] target: allow userspace to set state to
 transitioning

Userspace target_core_user handlers like tcmu-runner may want to set the
ALUA state to transitioning while it does implicit transitions. This
patch allows that state when set from configfs.

Signed-off-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_alua.c | 37 ++++++++++++++++++-------------
 1 file changed, 22 insertions(+), 15 deletions(-)

diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c
index 594807cd92cbdb..252d4e4b7b33d5 100644
--- a/drivers/target/target_core_alua.c
+++ b/drivers/target/target_core_alua.c
@@ -43,7 +43,7 @@
 #include "target_core_ua.h"
 
 static sense_reason_t core_alua_check_transition(int state, int valid,
-						 int *primary);
+						 int *primary, int explicit);
 static int core_alua_set_tg_pt_secondary_state(
 		struct se_lun *lun, int explicit, int offline);
 
@@ -335,8 +335,8 @@ target_emulate_set_target_port_groups(struct se_cmd *cmd)
 		 * the state is a primary or secondary target port asymmetric
 		 * access state.
 		 */
-		rc = core_alua_check_transition(alua_access_state,
-						valid_states, &primary);
+		rc = core_alua_check_transition(alua_access_state, valid_states,
+						&primary, 1);
 		if (rc) {
 			/*
 			 * If the SET TARGET PORT GROUPS attempts to establish
@@ -762,7 +762,7 @@ target_alua_state_check(struct se_cmd *cmd)
  * Check implicit and explicit ALUA state change request.
  */
 static sense_reason_t
-core_alua_check_transition(int state, int valid, int *primary)
+core_alua_check_transition(int state, int valid, int *primary, int explicit)
 {
 	/*
 	 * OPTIMIZED, NON-OPTIMIZED, STANDBY and UNAVAILABLE are
@@ -804,11 +804,14 @@ core_alua_check_transition(int state, int valid, int *primary)
 		*primary = 0;
 		break;
 	case ALUA_ACCESS_STATE_TRANSITION:
-		/*
-		 * Transitioning is set internally, and
-		 * cannot be selected manually.
-		 */
-		goto not_supported;
+		if (!(valid & ALUA_T_SUP) || explicit)
+			/*
+			 * Transitioning is set internally and by tcmu daemon,
+			 * and cannot be selected through a STPG.
+			 */
+			goto not_supported;
+		*primary = 0;
+		break;
 	default:
 		pr_err("Unknown ALUA access state: 0x%02x\n", state);
 		return TCM_INVALID_PARAMETER_LIST;
@@ -1070,7 +1073,7 @@ static int core_alua_do_transition_tg_pt(
 	if (atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state) == new_state)
 		return 0;
 
-	if (new_state == ALUA_ACCESS_STATE_TRANSITION)
+	if (explicit && new_state == ALUA_ACCESS_STATE_TRANSITION)
 		return -EAGAIN;
 
 	/*
@@ -1091,10 +1094,6 @@ static int core_alua_do_transition_tg_pt(
 	 * Save the old primary ALUA access state, and set the current state
 	 * to ALUA_ACCESS_STATE_TRANSITION.
 	 */
-	tg_pt_gp->tg_pt_gp_alua_previous_state =
-		atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state);
-	tg_pt_gp->tg_pt_gp_alua_pending_state = new_state;
-
 	atomic_set(&tg_pt_gp->tg_pt_gp_alua_access_state,
 			ALUA_ACCESS_STATE_TRANSITION);
 	tg_pt_gp->tg_pt_gp_alua_access_status = (explicit) ?
@@ -1103,6 +1102,13 @@ static int core_alua_do_transition_tg_pt(
 
 	core_alua_queue_state_change_ua(tg_pt_gp);
 
+	if (new_state == ALUA_ACCESS_STATE_TRANSITION)
+		return 0;
+
+	tg_pt_gp->tg_pt_gp_alua_previous_state =
+		atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state);
+	tg_pt_gp->tg_pt_gp_alua_pending_state = new_state;
+
 	/*
 	 * Check for the optional ALUA primary state transition delay
 	 */
@@ -1144,7 +1150,8 @@ int core_alua_do_port_transition(
 		return -ENODEV;
 
 	valid_states = l_tg_pt_gp->tg_pt_gp_alua_supported_states;
-	if (core_alua_check_transition(new_state, valid_states, &primary) != 0)
+	if (core_alua_check_transition(new_state, valid_states, &primary,
+				       explicit) != 0)
 		return -EINVAL;
 
 	local_lu_gp_mem = l_dev->dev_alua_lu_gp_mem;

From 760bf578edf8122f2503a3a6a3f4b0de3b6ce0bb Mon Sep 17 00:00:00 2001
From: Mike Christie <mchristi@redhat.com>
Date: Thu, 2 Mar 2017 04:59:50 -0600
Subject: [PATCH 1471/1911] target: fix race during implicit transition work
 flushes

This fixes the following races:

1. core_alua_do_transition_tg_pt could have read
tg_pt_gp_alua_access_state and gone into this if chunk:

if (!explicit &&
        atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state) ==
           ALUA_ACCESS_STATE_TRANSITION) {

and then core_alua_do_transition_tg_pt_work could update the
state. core_alua_do_transition_tg_pt would then only set
tg_pt_gp_alua_pending_state and the tg_pt_gp_alua_access_state would
not get updated with the second calls state.

2. core_alua_do_transition_tg_pt could be setting
tg_pt_gp_transition_complete while the tg_pt_gp_transition_work
is already completing. core_alua_do_transition_tg_pt then waits on the
completion that will never be called.

To handle these issues, we just call flush_work which will return when
core_alua_do_transition_tg_pt_work has completed so there is no need
to do the complete/wait. And, if core_alua_do_transition_tg_pt_work
was running, instead of trying to sneak in the state change, we just
schedule up another core_alua_do_transition_tg_pt_work call.

Note that this does not handle a possible race where there are multiple
threads call core_alua_do_transition_tg_pt at the same time. I think
we need a mutex in target_tg_pt_gp_alua_access_state_store.

Signed-off-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_alua.c | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c
index 252d4e4b7b33d5..fd7c16a7ca6e06 100644
--- a/drivers/target/target_core_alua.c
+++ b/drivers/target/target_core_alua.c
@@ -1079,16 +1079,8 @@ static int core_alua_do_transition_tg_pt(
 	/*
 	 * Flush any pending transitions
 	 */
-	if (!explicit && atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state) ==
-	    ALUA_ACCESS_STATE_TRANSITION) {
-		/* Just in case */
-		tg_pt_gp->tg_pt_gp_alua_pending_state = new_state;
-		tg_pt_gp->tg_pt_gp_transition_complete = &wait;
+	if (!explicit)
 		flush_work(&tg_pt_gp->tg_pt_gp_transition_work);
-		wait_for_completion(&wait);
-		tg_pt_gp->tg_pt_gp_transition_complete = NULL;
-		return 0;
-	}
 
 	/*
 	 * Save the old primary ALUA access state, and set the current state

From 972c7f167974fa41ea8a2eed4b857cc59f59c42c Mon Sep 17 00:00:00 2001
From: Mike Christie <mchristi@redhat.com>
Date: Thu, 9 Mar 2017 02:42:08 -0600
Subject: [PATCH 1472/1911] tcmu: add helper to check if dev was configured

This adds a helper to check if the dev was configured. It
will be used in the next patch to prevent updates to some
config settings after the device has been setup.

Signed-off-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_user.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index 4339ab2133b380..892b311e78747d 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -998,6 +998,11 @@ static void tcmu_dev_call_rcu(struct rcu_head *p)
 	kfree(udev);
 }
 
+static bool tcmu_dev_configured(struct tcmu_dev *udev)
+{
+	return udev->uio_info.uio_dev ? true : false;
+}
+
 static void tcmu_free_device(struct se_device *dev)
 {
 	struct tcmu_dev *udev = TCMU_DEV(dev);
@@ -1019,8 +1024,7 @@ static void tcmu_free_device(struct se_device *dev)
 	spin_unlock_irq(&udev->commands_lock);
 	WARN_ON(!all_expired);
 
-	/* Device was configured */
-	if (udev->uio_info.uio_dev) {
+	if (tcmu_dev_configured(udev)) {
 		tcmu_netlink_event(TCMU_CMD_REMOVED_DEVICE, udev->uio_info.name,
 				   udev->uio_info.uio_dev->minor);
 

From af980e46a26ac8805685bb70c8572dbc47abb126 Mon Sep 17 00:00:00 2001
From: Mike Christie <mchristi@redhat.com>
Date: Thu, 9 Mar 2017 02:42:09 -0600
Subject: [PATCH 1473/1911] tcmu: make cmd timeout configurable

A single daemon could implement multiple types of devices
using multuple types of real devices that may not support
restarting from crashes and/or handling tcmu timeouts. This
makes the cmd timeout configurable, so handlers that do not
support it can turn if off for now.

Signed-off-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_user.c | 41 ++++++++++++++++++++++++++-----
 1 file changed, 35 insertions(+), 6 deletions(-)

diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index 892b311e78747d..10cc15f0b1faac 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -112,6 +112,7 @@ struct tcmu_dev {
 	spinlock_t commands_lock;
 
 	struct timer_list timeout;
+	unsigned int cmd_time_out;
 
 	char dev_config[TCMU_CONFIG_LEN];
 };
@@ -172,7 +173,9 @@ static struct tcmu_cmd *tcmu_alloc_cmd(struct se_cmd *se_cmd)
 
 	tcmu_cmd->se_cmd = se_cmd;
 	tcmu_cmd->tcmu_dev = udev;
-	tcmu_cmd->deadline = jiffies + msecs_to_jiffies(TCMU_TIME_OUT);
+	if (udev->cmd_time_out)
+		tcmu_cmd->deadline = jiffies +
+					msecs_to_jiffies(udev->cmd_time_out);
 
 	idr_preload(GFP_KERNEL);
 	spin_lock_irq(&udev->commands_lock);
@@ -451,7 +454,11 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
 
 		pr_debug("sleeping for ring space\n");
 		spin_unlock_irq(&udev->cmdr_lock);
-		ret = schedule_timeout(msecs_to_jiffies(TCMU_TIME_OUT));
+		if (udev->cmd_time_out)
+			ret = schedule_timeout(
+					msecs_to_jiffies(udev->cmd_time_out));
+		else
+			ret = schedule_timeout(msecs_to_jiffies(TCMU_TIME_OUT));
 		finish_wait(&udev->wait_cmdr, &__wait);
 		if (!ret) {
 			pr_warn("tcmu: command timed out\n");
@@ -526,8 +533,9 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
 	/* TODO: only if FLUSH and FUA? */
 	uio_event_notify(&udev->uio_info);
 
-	mod_timer(&udev->timeout,
-		round_jiffies_up(jiffies + msecs_to_jiffies(TCMU_TIME_OUT)));
+	if (udev->cmd_time_out)
+		mod_timer(&udev->timeout, round_jiffies_up(jiffies +
+			  msecs_to_jiffies(udev->cmd_time_out)));
 
 	return TCM_NO_SENSE;
 }
@@ -742,6 +750,7 @@ static struct se_device *tcmu_alloc_device(struct se_hba *hba, const char *name)
 	}
 
 	udev->hba = hba;
+	udev->cmd_time_out = TCMU_TIME_OUT;
 
 	init_waitqueue_head(&udev->wait_cmdr);
 	spin_lock_init(&udev->cmdr_lock);
@@ -1037,7 +1046,7 @@ static void tcmu_free_device(struct se_device *dev)
 
 enum {
 	Opt_dev_config, Opt_dev_size, Opt_hw_block_size, Opt_hw_max_sectors,
-	Opt_err,
+	Opt_cmd_time_out, Opt_err,
 };
 
 static match_table_t tokens = {
@@ -1045,6 +1054,7 @@ static match_table_t tokens = {
 	{Opt_dev_size, "dev_size=%u"},
 	{Opt_hw_block_size, "hw_block_size=%u"},
 	{Opt_hw_max_sectors, "hw_max_sectors=%u"},
+	{Opt_cmd_time_out, "cmd_time_out=%u"},
 	{Opt_err, NULL}
 };
 
@@ -1111,6 +1121,23 @@ static ssize_t tcmu_set_configfs_dev_params(struct se_device *dev,
 			if (ret < 0)
 				pr_err("kstrtoul() failed for dev_size=\n");
 			break;
+		case Opt_cmd_time_out:
+			if (tcmu_dev_configured(udev)) {
+				pr_err("Can not update cmd_time_out after device has been configured.\n");
+				ret = -EINVAL;
+				break;
+			}
+			arg_p = match_strdup(&args[0]);
+			if (!arg_p) {
+				ret = -ENOMEM;
+				break;
+			}
+			ret = kstrtouint(arg_p, 0, &udev->cmd_time_out);
+			kfree(arg_p);
+			if (ret < 0)
+				pr_err("kstrtouint() failed for cmd_time_out=\n");
+			udev->cmd_time_out *= MSEC_PER_SEC;
+			break;
 		case Opt_hw_block_size:
 			ret = tcmu_set_dev_attrib(&args[0],
 					&(dev->dev_attrib.hw_block_size));
@@ -1138,7 +1165,9 @@ static ssize_t tcmu_show_configfs_dev_params(struct se_device *dev, char *b)
 
 	bl = sprintf(b + bl, "Config: %s ",
 		     udev->dev_config[0] ? udev->dev_config : "NULL");
-	bl += sprintf(b + bl, "Size: %zu\n", udev->dev_size);
+	bl += sprintf(b + bl, "Size: %zu ", udev->dev_size);
+	bl += sprintf(b + bl, "Cmd Time Out: %lu\n",
+		      udev->cmd_time_out / MSEC_PER_SEC);
 
 	return bl;
 }

From 7d7a743543905a8297dce53b36e793e5307da5d7 Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Sat, 18 Mar 2017 15:04:13 -0700
Subject: [PATCH 1474/1911] tcmu: Convert cmd_time_out into backend device
 attribute

Instead of putting cmd_time_out under ../target/core/user_0/foo/control,
which has historically been used by parameters needed for initial
backend device configuration, go ahead and move cmd_time_out into
a backend device attribute.

In order to do this, tcmu_module_init() has been updated to create
a local struct configfs_attribute **tcmu_attrs, that is based upon
the existing passthrough_attrib_attrs along with the new cmd_time_out
attribute.  Once **tcm_attrs has been setup, go ahead and point
it at tcmu_ops->tb_dev_attrib_attrs so it's picked up by target-core.

Also following MNC's previous change, ->cmd_time_out is stored in
milliseconds but exposed via configfs in seconds.  Also, note this
patch restricts the modification of ->cmd_time_out to before +
after the TCMU device has been configured, but not while it has
active fabric exports.

Cc: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_user.c | 94 ++++++++++++++++++++++---------
 1 file changed, 68 insertions(+), 26 deletions(-)

diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index 10cc15f0b1faac..c6874c38a10bc4 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -28,6 +28,7 @@
 #include <linux/stringify.h>
 #include <linux/bitops.h>
 #include <linux/highmem.h>
+#include <linux/configfs.h>
 #include <net/genetlink.h>
 #include <scsi/scsi_common.h>
 #include <scsi/scsi_proto.h>
@@ -1046,7 +1047,7 @@ static void tcmu_free_device(struct se_device *dev)
 
 enum {
 	Opt_dev_config, Opt_dev_size, Opt_hw_block_size, Opt_hw_max_sectors,
-	Opt_cmd_time_out, Opt_err,
+	Opt_err,
 };
 
 static match_table_t tokens = {
@@ -1054,7 +1055,6 @@ static match_table_t tokens = {
 	{Opt_dev_size, "dev_size=%u"},
 	{Opt_hw_block_size, "hw_block_size=%u"},
 	{Opt_hw_max_sectors, "hw_max_sectors=%u"},
-	{Opt_cmd_time_out, "cmd_time_out=%u"},
 	{Opt_err, NULL}
 };
 
@@ -1121,23 +1121,6 @@ static ssize_t tcmu_set_configfs_dev_params(struct se_device *dev,
 			if (ret < 0)
 				pr_err("kstrtoul() failed for dev_size=\n");
 			break;
-		case Opt_cmd_time_out:
-			if (tcmu_dev_configured(udev)) {
-				pr_err("Can not update cmd_time_out after device has been configured.\n");
-				ret = -EINVAL;
-				break;
-			}
-			arg_p = match_strdup(&args[0]);
-			if (!arg_p) {
-				ret = -ENOMEM;
-				break;
-			}
-			ret = kstrtouint(arg_p, 0, &udev->cmd_time_out);
-			kfree(arg_p);
-			if (ret < 0)
-				pr_err("kstrtouint() failed for cmd_time_out=\n");
-			udev->cmd_time_out *= MSEC_PER_SEC;
-			break;
 		case Opt_hw_block_size:
 			ret = tcmu_set_dev_attrib(&args[0],
 					&(dev->dev_attrib.hw_block_size));
@@ -1165,9 +1148,7 @@ static ssize_t tcmu_show_configfs_dev_params(struct se_device *dev, char *b)
 
 	bl = sprintf(b + bl, "Config: %s ",
 		     udev->dev_config[0] ? udev->dev_config : "NULL");
-	bl += sprintf(b + bl, "Size: %zu ", udev->dev_size);
-	bl += sprintf(b + bl, "Cmd Time Out: %lu\n",
-		      udev->cmd_time_out / MSEC_PER_SEC);
+	bl += sprintf(b + bl, "Size: %zu\n", udev->dev_size);
 
 	return bl;
 }
@@ -1186,7 +1167,48 @@ tcmu_parse_cdb(struct se_cmd *cmd)
 	return passthrough_parse_cdb(cmd, tcmu_queue_cmd);
 }
 
-static const struct target_backend_ops tcmu_ops = {
+static ssize_t tcmu_cmd_time_out_show(struct config_item *item, char *page)
+{
+	struct se_dev_attrib *da = container_of(to_config_group(item),
+					struct se_dev_attrib, da_group);
+	struct tcmu_dev *udev = container_of(da->da_dev,
+					struct tcmu_dev, se_dev);
+
+	return snprintf(page, PAGE_SIZE, "%lu\n", udev->cmd_time_out / MSEC_PER_SEC);
+}
+
+static ssize_t tcmu_cmd_time_out_store(struct config_item *item, const char *page,
+				       size_t count)
+{
+	struct se_dev_attrib *da = container_of(to_config_group(item),
+					struct se_dev_attrib, da_group);
+	struct tcmu_dev *udev = container_of(da->da_dev,
+					struct tcmu_dev, se_dev);
+	u32 val;
+	int ret;
+
+	if (da->da_dev->export_count) {
+		pr_err("Unable to set tcmu cmd_time_out while exports exist\n");
+		return -EINVAL;
+	}
+
+	ret = kstrtou32(page, 0, &val);
+	if (ret < 0)
+		return ret;
+
+	if (!val) {
+		pr_err("Illegal value for cmd_time_out\n");
+		return -EINVAL;
+	}
+
+	udev->cmd_time_out = val * MSEC_PER_SEC;
+	return count;
+}
+CONFIGFS_ATTR(tcmu_, cmd_time_out);
+
+static struct configfs_attribute **tcmu_attrs;
+
+static struct target_backend_ops tcmu_ops = {
 	.name			= "user",
 	.owner			= THIS_MODULE,
 	.transport_flags	= TRANSPORT_FLAG_PASSTHROUGH,
@@ -1200,12 +1222,12 @@ static const struct target_backend_ops tcmu_ops = {
 	.show_configfs_dev_params = tcmu_show_configfs_dev_params,
 	.get_device_type	= sbc_get_device_type,
 	.get_blocks		= tcmu_get_blocks,
-	.tb_dev_attrib_attrs	= passthrough_attrib_attrs,
+	.tb_dev_attrib_attrs	= NULL,
 };
 
 static int __init tcmu_module_init(void)
 {
-	int ret;
+	int ret, i, len = 0;
 
 	BUILD_BUG_ON((sizeof(struct tcmu_cmd_entry) % TCMU_OP_ALIGN_SIZE) != 0);
 
@@ -1227,12 +1249,31 @@ static int __init tcmu_module_init(void)
 		goto out_unreg_device;
 	}
 
+	for (i = 0; passthrough_attrib_attrs[i] != NULL; i++) {
+		len += sizeof(struct configfs_attribute *);
+	}
+	len += sizeof(struct configfs_attribute *) * 2;
+
+	tcmu_attrs = kzalloc(len, GFP_KERNEL);
+	if (!tcmu_attrs) {
+		ret = -ENOMEM;
+		goto out_unreg_genl;
+	}
+
+	for (i = 0; passthrough_attrib_attrs[i] != NULL; i++) {
+		tcmu_attrs[i] = passthrough_attrib_attrs[i];
+	}
+	tcmu_attrs[i] = &tcmu_attr_cmd_time_out;
+	tcmu_ops.tb_dev_attrib_attrs = tcmu_attrs;
+
 	ret = transport_backend_register(&tcmu_ops);
 	if (ret)
-		goto out_unreg_genl;
+		goto out_attrs;
 
 	return 0;
 
+out_attrs:
+	kfree(tcmu_attrs);
 out_unreg_genl:
 	genl_unregister_family(&tcmu_genl_family);
 out_unreg_device:
@@ -1246,6 +1287,7 @@ static int __init tcmu_module_init(void)
 static void __exit tcmu_module_exit(void)
 {
 	target_backend_unregister(&tcmu_ops);
+	kfree(tcmu_attrs);
 	genl_unregister_family(&tcmu_genl_family);
 	root_device_unregister(tcmu_root_device);
 	kmem_cache_destroy(tcmu_cmd_cache);

From c4a9b538ab2a109c5f9798bea1f8f4bf93aadfb9 Mon Sep 17 00:00:00 2001
From: Joe Carnuccio <joe.carnuccio@cavium.com>
Date: Wed, 15 Mar 2017 09:48:43 -0700
Subject: [PATCH 1475/1911] qla2xxx: Allow vref count to timeout on vport
 delete.

Cc: <stable@vger.kernel.org>
Signed-off-by: Joe Carnuccio <joe.carnuccio@cavium.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/qla2xxx/qla_attr.c |  4 +---
 drivers/scsi/qla2xxx/qla_def.h  |  6 +++++-
 drivers/scsi/qla2xxx/qla_init.c |  1 +
 drivers/scsi/qla2xxx/qla_mid.c  | 14 ++++++++------
 drivers/scsi/qla2xxx/qla_os.c   |  1 +
 5 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c
index f610103994afd4..435ff7fd6384a0 100644
--- a/drivers/scsi/qla2xxx/qla_attr.c
+++ b/drivers/scsi/qla2xxx/qla_attr.c
@@ -2154,8 +2154,6 @@ qla24xx_vport_delete(struct fc_vport *fc_vport)
 		    "Timer for the VP[%d] has stopped\n", vha->vp_idx);
 	}
 
-	BUG_ON(atomic_read(&vha->vref_count));
-
 	qla2x00_free_fcports(vha);
 
 	mutex_lock(&ha->vport_lock);
@@ -2166,7 +2164,7 @@ qla24xx_vport_delete(struct fc_vport *fc_vport)
 	dma_free_coherent(&ha->pdev->dev, vha->gnl.size, vha->gnl.l,
 	    vha->gnl.ldma);
 
-	if (vha->qpair->vp_idx == vha->vp_idx) {
+	if (vha->qpair && vha->qpair->vp_idx == vha->vp_idx) {
 		if (qla2xxx_delete_qpair(vha, vha->qpair) != QLA_SUCCESS)
 			ql_log(ql_log_warn, vha, 0x7087,
 			    "Queue Pair delete failed.\n");
diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index 625d438e3cce01..8662ef4192dbee 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -4076,6 +4076,7 @@ typedef struct scsi_qla_host {
 	/* Count of active session/fcport */
 	int fcport_count;
 	wait_queue_head_t fcport_waitQ;
+	wait_queue_head_t vref_waitq;
 } scsi_qla_host_t;
 
 struct qla27xx_image_status {
@@ -4131,14 +4132,17 @@ struct qla2_sgx {
 	mb();						\
 	if (__vha->flags.delete_progress) {		\
 		atomic_dec(&__vha->vref_count);		\
+		wake_up(&__vha->vref_waitq);		\
 		__bail = 1;				\
 	} else {					\
 		__bail = 0;				\
 	}						\
 } while (0)
 
-#define QLA_VHA_MARK_NOT_BUSY(__vha)			\
+#define QLA_VHA_MARK_NOT_BUSY(__vha) do {		\
 	atomic_dec(&__vha->vref_count);			\
+	wake_up(&__vha->vref_waitq);			\
+} while (0)						\
 
 #define QLA_QPAIR_MARK_BUSY(__qpair, __bail) do {	\
 	atomic_inc(&__qpair->ref_count);		\
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index 32fb9007f13770..9f3740c68cc811 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -5148,6 +5148,7 @@ qla2x00_update_fcports(scsi_qla_host_t *base_vha)
 			}
 		}
 		atomic_dec(&vha->vref_count);
+		wake_up(&vha->vref_waitq);
 	}
 	spin_unlock_irqrestore(&ha->vport_slock, flags);
 }
diff --git a/drivers/scsi/qla2xxx/qla_mid.c b/drivers/scsi/qla2xxx/qla_mid.c
index c6d6f0d912ff75..09a490c98763a9 100644
--- a/drivers/scsi/qla2xxx/qla_mid.c
+++ b/drivers/scsi/qla2xxx/qla_mid.c
@@ -74,13 +74,14 @@ qla24xx_deallocate_vp_id(scsi_qla_host_t *vha)
 	 * ensures no active vp_list traversal while the vport is removed
 	 * from the queue)
 	 */
-	spin_lock_irqsave(&ha->vport_slock, flags);
-	while (atomic_read(&vha->vref_count)) {
-		spin_unlock_irqrestore(&ha->vport_slock, flags);
-
-		msleep(500);
+	wait_event_timeout(vha->vref_waitq, atomic_read(&vha->vref_count),
+	    10*HZ);
 
-		spin_lock_irqsave(&ha->vport_slock, flags);
+	spin_lock_irqsave(&ha->vport_slock, flags);
+	if (atomic_read(&vha->vref_count)) {
+		ql_dbg(ql_dbg_vport, vha, 0xfffa,
+		    "vha->vref_count=%u timeout\n", vha->vref_count.counter);
+		vha->vref_count = (atomic_t)ATOMIC_INIT(0);
 	}
 	list_del(&vha->list);
 	qlt_update_vp_map(vha, RESET_VP_IDX);
@@ -269,6 +270,7 @@ qla2x00_alert_all_vps(struct rsp_que *rsp, uint16_t *mb)
 
 			spin_lock_irqsave(&ha->vport_slock, flags);
 			atomic_dec(&vha->vref_count);
+			wake_up(&vha->vref_waitq);
 		}
 		i++;
 	}
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 1fed235a1b4a03..54d4e802bde072 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -4268,6 +4268,7 @@ struct scsi_qla_host *qla2x00_create_host(struct scsi_host_template *sht,
 	spin_lock_init(&vha->work_lock);
 	spin_lock_init(&vha->cmd_list_lock);
 	init_waitqueue_head(&vha->fcport_waitQ);
+	init_waitqueue_head(&vha->vref_waitq);
 
 	vha->gnl.size = sizeof(struct get_name_list_extended) *
 			(ha->max_loop_id + 1);

From ae940f2c472a62904dc18234de5cf3ed28f195ee Mon Sep 17 00:00:00 2001
From: Quinn Tran <quinn.tran@cavium.com>
Date: Wed, 15 Mar 2017 09:48:44 -0700
Subject: [PATCH 1476/1911] qla2xxx: Fix memory leak for abts processing

Cc: <stable@vger.kernel.org>
Signed-off-by: Quinn Tran <quinn.tran@cavium.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/qla2xxx/qla_target.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index 45f5077684f0a5..ecf97c5993e8d0 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -6642,6 +6642,8 @@ qlt_handle_abts_recv_work(struct work_struct *work)
 	spin_lock_irqsave(&ha->hardware_lock, flags);
 	qlt_response_pkt_all_vps(vha, (response_t *)&op->atio);
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
+
+	kfree(op);
 }
 
 void

From 8b666809e10cda9814af3e8be339d35b83909056 Mon Sep 17 00:00:00 2001
From: Quinn Tran <quinn.tran@cavium.com>
Date: Wed, 15 Mar 2017 09:48:45 -0700
Subject: [PATCH 1477/1911] qla2xxx: Fix request queue corruption.

When FW notify driver or driver detects low FW resource,
driver tries to send out Busy SCSI Status to tell Initiator
side to back off. During the send process, the lock was not held.

Cc: <stable@vger.kernel.org>
Signed-off-by: Quinn Tran <quinn.tran@qlogic.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/qla2xxx/qla_target.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index ecf97c5993e8d0..a463bcc57902f2 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -5079,16 +5079,22 @@ qlt_send_busy(struct scsi_qla_host *vha,
 
 static int
 qlt_chk_qfull_thresh_hold(struct scsi_qla_host *vha,
-	struct atio_from_isp *atio)
+	struct atio_from_isp *atio, bool ha_locked)
 {
 	struct qla_hw_data *ha = vha->hw;
 	uint16_t status;
+	unsigned long flags;
 
 	if (ha->tgt.num_pend_cmds < Q_FULL_THRESH_HOLD(ha))
 		return 0;
 
+	if (!ha_locked)
+		spin_lock_irqsave(&ha->hardware_lock, flags);
 	status = temp_sam_status;
 	qlt_send_busy(vha, atio, status);
+	if (!ha_locked)
+		spin_unlock_irqrestore(&ha->hardware_lock, flags);
+
 	return 1;
 }
 
@@ -5133,7 +5139,7 @@ static void qlt_24xx_atio_pkt(struct scsi_qla_host *vha,
 
 
 		if (likely(atio->u.isp24.fcp_cmnd.task_mgmt_flags == 0)) {
-			rc = qlt_chk_qfull_thresh_hold(vha, atio);
+			rc = qlt_chk_qfull_thresh_hold(vha, atio, ha_locked);
 			if (rc != 0) {
 				tgt->atio_irq_cmd_count--;
 				return;
@@ -5256,7 +5262,7 @@ static void qlt_response_pkt(struct scsi_qla_host *vha, response_t *pkt)
 			break;
 		}
 
-		rc = qlt_chk_qfull_thresh_hold(vha, atio);
+		rc = qlt_chk_qfull_thresh_hold(vha, atio, true);
 		if (rc != 0) {
 			tgt->irq_cmd_count--;
 			return;

From 8f6fc8d4e7ae2347d6261d11a7eb2b247d2954d8 Mon Sep 17 00:00:00 2001
From: Quinn Tran <quinn.tran@cavium.com>
Date: Wed, 15 Mar 2017 09:48:46 -0700
Subject: [PATCH 1478/1911] qla2xxx: Fix inadequate lock protection for ABTS.

Normally, ABTS is sent to Target Core as Task MGMT command.
In the case of error, qla2xxx needs to send response, hardware_lock
is required to prevent request queue corruption.

Cc: <stable@vger.kernel.org>
Signed-off-by: Quinn Tran <quinn.tran@cavium.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/qla2xxx/qla_target.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index a463bcc57902f2..a78c3e9bcb571e 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -130,6 +130,9 @@ static void qlt_send_term_imm_notif(struct scsi_qla_host *vha,
 static struct fc_port *qlt_create_sess(struct scsi_qla_host *vha,
 	fc_port_t *fcport, bool local);
 void qlt_unreg_sess(struct fc_port *sess);
+static void qlt_24xx_handle_abts(struct scsi_qla_host *,
+	struct abts_recv_from_24xx *);
+
 /*
  * Global Variables
  */
@@ -389,6 +392,8 @@ static bool qlt_24xx_atio_pkt_all_vps(struct scsi_qla_host *vha,
 			(struct abts_recv_from_24xx *)atio;
 		struct scsi_qla_host *host = qlt_find_host_by_vp_idx(vha,
 			entry->vp_index);
+		unsigned long flags;
+
 		if (unlikely(!host)) {
 			ql_dbg(ql_dbg_tgt, vha, 0xffff,
 			    "qla_target(%d): Response pkt (ABTS_RECV_24XX) "
@@ -396,9 +401,12 @@ static bool qlt_24xx_atio_pkt_all_vps(struct scsi_qla_host *vha,
 			    vha->vp_idx, entry->vp_index);
 			break;
 		}
-		qlt_response_pkt(host, (response_t *)atio);
+		if (!ha_locked)
+			spin_lock_irqsave(&host->hw->hardware_lock, flags);
+		qlt_24xx_handle_abts(host, (struct abts_recv_from_24xx *)atio);
+		if (!ha_locked)
+			spin_unlock_irqrestore(&host->hw->hardware_lock, flags);
 		break;
-
 	}
 
 	/* case PUREX_IOCB_TYPE: ql2xmvasynctoatio */

From f159b3c7cd45c550d0f73806451a10b6b6bc08ae Mon Sep 17 00:00:00 2001
From: Quinn Tran <quinn.tran@cavium.com>
Date: Wed, 15 Mar 2017 09:48:47 -0700
Subject: [PATCH 1479/1911] qla2xxx: Fix sess_lock & hardware_lock lock order
 problem.

The main lock that needs to be held for CMD or TMR submission
to upper layer is the sess_lock. The sess_lock is used to
serialize cmd submission and session deletion. The addition
of hardware_lock being held is not necessary. This patch removes
hardware_lock dependency from CMD/TMR submission.

Use hardware_lock only for error response in this case.

Path1
       CPU0                    CPU1
       ----                    ----
  lock(&(&ha->tgt.sess_lock)->rlock);
                               lock(&(&ha->hardware_lock)->rlock);
                               lock(&(&ha->tgt.sess_lock)->rlock);
  lock(&(&ha->hardware_lock)->rlock);

Path2/deadlock
*** DEADLOCK ***
Call Trace:
dump_stack+0x85/0xc2
print_circular_bug+0x1e3/0x250
__lock_acquire+0x1425/0x1620
lock_acquire+0xbf/0x210
_raw_spin_lock_irqsave+0x53/0x70
qlt_sess_work_fn+0x21d/0x480 [qla2xxx]
process_one_work+0x1f4/0x6e0

Cc: <stable@vger.kernel.org>
Cc: Bart Van Assche <Bart.VanAssche@sandisk.com>
Reported-by: Bart Van Assche <Bart.VanAssche@sandisk.com>
Signed-off-by: Quinn Tran <quinn.tran@cavium.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/qla2xxx/qla_target.c | 41 ++++++++++++++-----------------
 1 file changed, 18 insertions(+), 23 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index a78c3e9bcb571e..989f931af15613 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -5727,30 +5727,23 @@ static void qlt_abort_work(struct qla_tgt *tgt,
 		}
 	}
 
-	spin_lock_irqsave(&ha->hardware_lock, flags);
-
-	if (tgt->tgt_stop)
-		goto out_term;
-
 	rc = __qlt_24xx_handle_abts(vha, &prm->abts, sess);
+	ha->tgt.tgt_ops->put_sess(sess);
+	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags2);
+
 	if (rc != 0)
 		goto out_term;
-	spin_unlock_irqrestore(&ha->hardware_lock, flags);
-	if (sess)
-		ha->tgt.tgt_ops->put_sess(sess);
-	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags2);
 	return;
 
 out_term2:
-	spin_lock_irqsave(&ha->hardware_lock, flags);
+	if (sess)
+		ha->tgt.tgt_ops->put_sess(sess);
+	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags2);
 
 out_term:
+	spin_lock_irqsave(&ha->hardware_lock, flags);
 	qlt_24xx_send_abts_resp(vha, &prm->abts, FCP_TMF_REJECTED, false);
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
-
-	if (sess)
-		ha->tgt.tgt_ops->put_sess(sess);
-	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags2);
 }
 
 static void qlt_tmr_work(struct qla_tgt *tgt,
@@ -5770,7 +5763,7 @@ static void qlt_tmr_work(struct qla_tgt *tgt,
 	spin_lock_irqsave(&ha->tgt.sess_lock, flags);
 
 	if (tgt->tgt_stop)
-		goto out_term;
+		goto out_term2;
 
 	s_id = prm->tm_iocb2.u.isp24.fcp_hdr.s_id;
 	sess = ha->tgt.tgt_ops->find_sess_by_s_id(vha, s_id);
@@ -5782,11 +5775,11 @@ static void qlt_tmr_work(struct qla_tgt *tgt,
 
 		spin_lock_irqsave(&ha->tgt.sess_lock, flags);
 		if (!sess)
-			goto out_term;
+			goto out_term2;
 	} else {
 		if (sess->deleted) {
 			sess = NULL;
-			goto out_term;
+			goto out_term2;
 		}
 
 		if (!kref_get_unless_zero(&sess->sess_kref)) {
@@ -5794,7 +5787,7 @@ static void qlt_tmr_work(struct qla_tgt *tgt,
 			    "%s: kref_get fail %8phC\n",
 			     __func__, sess->port_name);
 			sess = NULL;
-			goto out_term;
+			goto out_term2;
 		}
 	}
 
@@ -5804,17 +5797,19 @@ static void qlt_tmr_work(struct qla_tgt *tgt,
 	unpacked_lun = scsilun_to_int((struct scsi_lun *)&lun);
 
 	rc = qlt_issue_task_mgmt(sess, unpacked_lun, fn, iocb, 0);
-	if (rc != 0)
-		goto out_term;
-
 	ha->tgt.tgt_ops->put_sess(sess);
 	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
+
+	if (rc != 0)
+		goto out_term;
 	return;
 
+out_term2:
+	if (sess)
+		ha->tgt.tgt_ops->put_sess(sess);
+	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
 out_term:
 	qlt_send_term_exchange(vha, NULL, &prm->tm_iocb2, 1, 0);
-	ha->tgt.tgt_ops->put_sess(sess);
-	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
 }
 
 static void qlt_sess_work_fn(struct work_struct *work)

From 5b33469a055c77001fd2c62b0f985c991b0e5b65 Mon Sep 17 00:00:00 2001
From: Quinn Tran <quinn.tran@cavium.com>
Date: Wed, 15 Mar 2017 09:48:48 -0700
Subject: [PATCH 1480/1911] qla2xxx: Allow relogin to proceed if remote login
 did not finish

If the remote port have started the login process, then the
PLOGI and PRLI should be back to back. Driver will allow
the remote port to complete the process. For the case where
the remote port decide to back off from sending PRLI, this
local port sets an expiration timer for the PRLI. Once the
expiration time passes, the relogin retry logic is allowed
to go through and perform login with the remote port.

Signed-off-by: Quinn Tran <quinn.tran@qlogic.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/qla2xxx/qla_def.h    |  2 ++
 drivers/scsi/qla2xxx/qla_init.c   | 12 ++++++++++--
 drivers/scsi/qla2xxx/qla_isr.c    | 25 +++++++++++++++++++------
 drivers/scsi/qla2xxx/qla_target.c |  1 +
 4 files changed, 32 insertions(+), 8 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index 8662ef4192dbee..56cd45fc600ae6 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -2300,6 +2300,8 @@ typedef struct fc_port {
 	struct ct_sns_desc ct_desc;
 	enum discovery_state disc_state;
 	enum login_state fw_login_state;
+	unsigned long plogi_nack_done_deadline;
+
 	u32 login_gen, last_login_gen;
 	u32 rscn_gen, last_rscn_gen;
 	u32 chip_reset;
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index 9f3740c68cc811..a7865a5d556d9d 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -876,10 +876,14 @@ int qla24xx_fcport_handle_login(struct scsi_qla_host *vha, fc_port_t *fcport)
 	fcport->login_retry--;
 
 	if ((fcport->fw_login_state == DSC_LS_PLOGI_PEND) ||
-	    (fcport->fw_login_state == DSC_LS_PLOGI_COMP) ||
 	    (fcport->fw_login_state == DSC_LS_PRLI_PEND))
 		return 0;
 
+	if (fcport->fw_login_state == DSC_LS_PLOGI_COMP) {
+		if (time_before_eq(jiffies, fcport->plogi_nack_done_deadline))
+			return 0;
+	}
+
 	/* for pure Target Mode. Login will not be initiated */
 	if (vha->host->active_mode == MODE_TARGET)
 		return 0;
@@ -1041,10 +1045,14 @@ void qla24xx_handle_relogin_event(scsi_qla_host_t *vha,
 		fcport->flags);
 
 	if ((fcport->fw_login_state == DSC_LS_PLOGI_PEND) ||
-	    (fcport->fw_login_state == DSC_LS_PLOGI_COMP) ||
 	    (fcport->fw_login_state == DSC_LS_PRLI_PEND))
 		return;
 
+	if (fcport->fw_login_state == DSC_LS_PLOGI_COMP) {
+		if (time_before_eq(jiffies, fcport->plogi_nack_done_deadline))
+			return;
+	}
+
 	if (fcport->flags & FCF_ASYNC_SENT) {
 		fcport->login_retry++;
 		set_bit(RELOGIN_NEEDED, &vha->dpc_flags);
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index 3c66ea29de2704..b2c6da752edd2d 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -1620,9 +1620,9 @@ qla24xx_logio_entry(scsi_qla_host_t *vha, struct req_que *req,
 		QLA_LOGIO_LOGIN_RETRIED : 0;
 	if (logio->entry_status) {
 		ql_log(ql_log_warn, fcport->vha, 0x5034,
-		    "Async-%s error entry - hdl=%x"
+		    "Async-%s error entry - %8phC hdl=%x"
 		    "portid=%02x%02x%02x entry-status=%x.\n",
-		    type, sp->handle, fcport->d_id.b.domain,
+		    type, fcport->port_name, sp->handle, fcport->d_id.b.domain,
 		    fcport->d_id.b.area, fcport->d_id.b.al_pa,
 		    logio->entry_status);
 		ql_dump_buffer(ql_dbg_async + ql_dbg_buffer, vha, 0x504d,
@@ -1633,8 +1633,9 @@ qla24xx_logio_entry(scsi_qla_host_t *vha, struct req_que *req,
 
 	if (le16_to_cpu(logio->comp_status) == CS_COMPLETE) {
 		ql_dbg(ql_dbg_async, fcport->vha, 0x5036,
-		    "Async-%s complete - hdl=%x portid=%02x%02x%02x "
-		    "iop0=%x.\n", type, sp->handle, fcport->d_id.b.domain,
+		    "Async-%s complete - %8phC hdl=%x portid=%02x%02x%02x "
+		    "iop0=%x.\n", type, fcport->port_name, sp->handle,
+		    fcport->d_id.b.domain,
 		    fcport->d_id.b.area, fcport->d_id.b.al_pa,
 		    le32_to_cpu(logio->io_parameter[0]));
 
@@ -1674,6 +1675,17 @@ qla24xx_logio_entry(scsi_qla_host_t *vha, struct req_que *req,
 	case LSC_SCODE_NPORT_USED:
 		data[0] = MBS_LOOP_ID_USED;
 		break;
+	case LSC_SCODE_CMD_FAILED:
+		if (iop[1] == 0x0606) {
+			/*
+			 * PLOGI/PRLI Completed. We must have Recv PLOGI/PRLI,
+			 * Target side acked.
+			 */
+			data[0] = MBS_COMMAND_COMPLETE;
+			goto logio_done;
+		}
+		data[0] = MBS_COMMAND_ERROR;
+		break;
 	case LSC_SCODE_NOXCB:
 		vha->hw->exch_starvation++;
 		if (vha->hw->exch_starvation > 5) {
@@ -1695,8 +1707,9 @@ qla24xx_logio_entry(scsi_qla_host_t *vha, struct req_que *req,
 	}
 
 	ql_dbg(ql_dbg_async, fcport->vha, 0x5037,
-	    "Async-%s failed - hdl=%x portid=%02x%02x%02x comp=%x "
-	    "iop0=%x iop1=%x.\n", type, sp->handle, fcport->d_id.b.domain,
+	    "Async-%s failed - %8phC hdl=%x portid=%02x%02x%02x comp=%x "
+	    "iop0=%x iop1=%x.\n", type, fcport->port_name,
+		sp->handle, fcport->d_id.b.domain,
 	    fcport->d_id.b.area, fcport->d_id.b.al_pa,
 	    le16_to_cpu(logio->comp_status),
 	    le32_to_cpu(logio->io_parameter[0]),
diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index 989f931af15613..925d9b858b241e 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -562,6 +562,7 @@ void qla2x00_async_nack_sp_done(void *s, int res)
 		sp->fcport->login_gen++;
 		sp->fcport->fw_login_state = DSC_LS_PLOGI_COMP;
 		sp->fcport->logout_on_delete = 1;
+		sp->fcport->plogi_nack_done_deadline = jiffies + HZ;
 		break;
 
 	case SRB_NACK_PRLI:

From be25152c0d9e236076323abbe9def9714234b761 Mon Sep 17 00:00:00 2001
From: Quinn Tran <quinn.tran@cavium.com>
Date: Wed, 15 Mar 2017 09:48:49 -0700
Subject: [PATCH 1481/1911] qla2xxx: Improve T10-DIF/PI handling in driver.

Add routines to support T10 DIF tag.

Signed-off-by: Quinn Tran <quinn.tran@cavium.com>
Signed-off-by: Anil Gurumurthy <anil.gurumurthy@cavium.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/qla2xxx/qla_dbg.h     |   1 +
 drivers/scsi/qla2xxx/qla_def.h     |  10 +
 drivers/scsi/qla2xxx/qla_gbl.h     |   6 +-
 drivers/scsi/qla2xxx/qla_iocb.c    |  13 +-
 drivers/scsi/qla2xxx/qla_target.c  | 540 +++++++++++++++++------------
 drivers/scsi/qla2xxx/qla_target.h  |  38 +-
 drivers/scsi/qla2xxx/tcm_qla2xxx.c |  49 ++-
 7 files changed, 406 insertions(+), 251 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_dbg.h b/drivers/scsi/qla2xxx/qla_dbg.h
index e1fc4e66966aea..c6bffe929fe7dc 100644
--- a/drivers/scsi/qla2xxx/qla_dbg.h
+++ b/drivers/scsi/qla2xxx/qla_dbg.h
@@ -348,6 +348,7 @@ ql_log_pci(uint32_t, struct pci_dev *pdev, int32_t, const char *fmt, ...);
 #define ql_dbg_tgt	0x00004000 /* Target mode */
 #define ql_dbg_tgt_mgt	0x00002000 /* Target mode management */
 #define ql_dbg_tgt_tmr	0x00001000 /* Target mode task management */
+#define ql_dbg_tgt_dif  0x00000800 /* Target mode dif */
 
 extern int qla27xx_dump_mpi_ram(struct qla_hw_data *, uint32_t, uint32_t *,
 	uint32_t, void **);
diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index 56cd45fc600ae6..9d1d3dcf1c878a 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -3127,6 +3127,16 @@ struct bidi_statistics {
 	unsigned long long transfer_bytes;
 };
 
+struct qla_tc_param {
+	struct scsi_qla_host *vha;
+	uint32_t blk_sz;
+	uint32_t bufflen;
+	struct scatterlist *sg;
+	struct scatterlist *prot_sg;
+	struct crc_context *ctx;
+	uint8_t *ctx_dsd_alloced;
+};
+
 /* Multi queue support */
 #define MBC_INITIALIZE_MULTIQ 0x1f
 #define QLA_QUE_PAGE 0X1000
diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h
index b3d6441d1d90eb..ca6f122e58656d 100644
--- a/drivers/scsi/qla2xxx/qla_gbl.h
+++ b/drivers/scsi/qla2xxx/qla_gbl.h
@@ -256,11 +256,11 @@ extern unsigned long qla2x00_get_async_timeout(struct scsi_qla_host *);
 extern void *qla2x00_alloc_iocbs(scsi_qla_host_t *, srb_t *);
 extern int qla2x00_issue_marker(scsi_qla_host_t *, int);
 extern int qla24xx_walk_and_build_sglist_no_difb(struct qla_hw_data *, srb_t *,
-	uint32_t *, uint16_t, struct qla_tgt_cmd *);
+	uint32_t *, uint16_t, struct qla_tc_param *);
 extern int qla24xx_walk_and_build_sglist(struct qla_hw_data *, srb_t *,
-	uint32_t *, uint16_t, struct qla_tgt_cmd *);
+	uint32_t *, uint16_t, struct qla_tc_param *);
 extern int qla24xx_walk_and_build_prot_sglist(struct qla_hw_data *, srb_t *,
-	uint32_t *, uint16_t, struct qla_tgt_cmd *);
+	uint32_t *, uint16_t, struct qla_tc_param *);
 extern int qla24xx_get_one_block_sg(uint32_t, struct qla2_sgx *, uint32_t *);
 extern int qla24xx_configure_prot_mode(srb_t *, uint16_t *);
 extern int qla24xx_build_scsi_crc_2_iocbs(srb_t *,
diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c
index 535079280288fb..ea027f6a7fd4e9 100644
--- a/drivers/scsi/qla2xxx/qla_iocb.c
+++ b/drivers/scsi/qla2xxx/qla_iocb.c
@@ -889,7 +889,7 @@ qla24xx_get_one_block_sg(uint32_t blk_sz, struct qla2_sgx *sgx,
 
 int
 qla24xx_walk_and_build_sglist_no_difb(struct qla_hw_data *ha, srb_t *sp,
-	uint32_t *dsd, uint16_t tot_dsds, struct qla_tgt_cmd *tc)
+	uint32_t *dsd, uint16_t tot_dsds, struct qla_tc_param *tc)
 {
 	void *next_dsd;
 	uint8_t avail_dsds = 0;
@@ -898,7 +898,6 @@ qla24xx_walk_and_build_sglist_no_difb(struct qla_hw_data *ha, srb_t *sp,
 	struct scatterlist *sg_prot;
 	uint32_t *cur_dsd = dsd;
 	uint16_t	used_dsds = tot_dsds;
-
 	uint32_t	prot_int; /* protection interval */
 	uint32_t	partial;
 	struct qla2_sgx sgx;
@@ -966,7 +965,7 @@ qla24xx_walk_and_build_sglist_no_difb(struct qla_hw_data *ha, srb_t *sp,
 			} else {
 				list_add_tail(&dsd_ptr->list,
 				    &(tc->ctx->dsd_list));
-				tc->ctx_dsd_alloced = 1;
+				*tc->ctx_dsd_alloced = 1;
 			}
 
 
@@ -1005,7 +1004,7 @@ qla24xx_walk_and_build_sglist_no_difb(struct qla_hw_data *ha, srb_t *sp,
 
 int
 qla24xx_walk_and_build_sglist(struct qla_hw_data *ha, srb_t *sp, uint32_t *dsd,
-	uint16_t tot_dsds, struct qla_tgt_cmd *tc)
+	uint16_t tot_dsds, struct qla_tc_param *tc)
 {
 	void *next_dsd;
 	uint8_t avail_dsds = 0;
@@ -1066,7 +1065,7 @@ qla24xx_walk_and_build_sglist(struct qla_hw_data *ha, srb_t *sp, uint32_t *dsd,
 			} else {
 				list_add_tail(&dsd_ptr->list,
 				    &(tc->ctx->dsd_list));
-				tc->ctx_dsd_alloced = 1;
+				*tc->ctx_dsd_alloced = 1;
 			}
 
 			/* add new list to cmd iocb or last list */
@@ -1092,7 +1091,7 @@ qla24xx_walk_and_build_sglist(struct qla_hw_data *ha, srb_t *sp, uint32_t *dsd,
 
 int
 qla24xx_walk_and_build_prot_sglist(struct qla_hw_data *ha, srb_t *sp,
-	uint32_t *dsd, uint16_t tot_dsds, struct qla_tgt_cmd *tc)
+	uint32_t *dsd, uint16_t tot_dsds, struct qla_tc_param *tc)
 {
 	void *next_dsd;
 	uint8_t avail_dsds = 0;
@@ -1158,7 +1157,7 @@ qla24xx_walk_and_build_prot_sglist(struct qla_hw_data *ha, srb_t *sp,
 			} else {
 				list_add_tail(&dsd_ptr->list,
 				    &(tc->ctx->dsd_list));
-				tc->ctx_dsd_alloced = 1;
+				*tc->ctx_dsd_alloced = 1;
 			}
 
 			/* add new list to cmd iocb or last list */
diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index 925d9b858b241e..532004981dbd0a 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -143,6 +143,20 @@ static struct workqueue_struct *qla_tgt_wq;
 static DEFINE_MUTEX(qla_tgt_mutex);
 static LIST_HEAD(qla_tgt_glist);
 
+static const char *prot_op_str(u32 prot_op)
+{
+	switch (prot_op) {
+	case TARGET_PROT_NORMAL:	return "NORMAL";
+	case TARGET_PROT_DIN_INSERT:	return "DIN_INSERT";
+	case TARGET_PROT_DOUT_INSERT:	return "DOUT_INSERT";
+	case TARGET_PROT_DIN_STRIP:	return "DIN_STRIP";
+	case TARGET_PROT_DOUT_STRIP:	return "DOUT_STRIP";
+	case TARGET_PROT_DIN_PASS:	return "DIN_PASS";
+	case TARGET_PROT_DOUT_PASS:	return "DOUT_PASS";
+	default:			return "UNKNOWN";
+	}
+}
+
 /* This API intentionally takes dest as a parameter, rather than returning
  * int value to avoid caller forgetting to issue wmb() after the store */
 void qlt_do_generation_tick(struct scsi_qla_host *vha, int *dest)
@@ -2022,6 +2036,70 @@ void qlt_free_mcmd(struct qla_tgt_mgmt_cmd *mcmd)
 }
 EXPORT_SYMBOL(qlt_free_mcmd);
 
+/*
+ * ha->hardware_lock supposed to be held on entry. Might drop it, then
+ * reacquire
+ */
+void qlt_send_resp_ctio(scsi_qla_host_t *vha, struct qla_tgt_cmd *cmd,
+    uint8_t scsi_status, uint8_t sense_key, uint8_t asc, uint8_t ascq)
+{
+	struct atio_from_isp *atio = &cmd->atio;
+	struct ctio7_to_24xx *ctio;
+	uint16_t temp;
+
+	ql_dbg(ql_dbg_tgt_dif, vha, 0x3066,
+	    "Sending response CTIO7 (vha=%p, atio=%p, scsi_status=%02x, "
+	    "sense_key=%02x, asc=%02x, ascq=%02x",
+	    vha, atio, scsi_status, sense_key, asc, ascq);
+
+	ctio = (struct ctio7_to_24xx *)qla2x00_alloc_iocbs(vha, NULL);
+	if (!ctio) {
+		ql_dbg(ql_dbg_async, vha, 0x3067,
+		    "qla2x00t(%ld): %s failed: unable to allocate request packet",
+		    vha->host_no, __func__);
+		goto out;
+	}
+
+	ctio->entry_type = CTIO_TYPE7;
+	ctio->entry_count = 1;
+	ctio->handle = QLA_TGT_SKIP_HANDLE;
+	ctio->nport_handle = cmd->sess->loop_id;
+	ctio->timeout = cpu_to_le16(QLA_TGT_TIMEOUT);
+	ctio->vp_index = vha->vp_idx;
+	ctio->initiator_id[0] = atio->u.isp24.fcp_hdr.s_id[2];
+	ctio->initiator_id[1] = atio->u.isp24.fcp_hdr.s_id[1];
+	ctio->initiator_id[2] = atio->u.isp24.fcp_hdr.s_id[0];
+	ctio->exchange_addr = atio->u.isp24.exchange_addr;
+	ctio->u.status1.flags = (atio->u.isp24.attr << 9) |
+	    cpu_to_le16(CTIO7_FLAGS_STATUS_MODE_1 | CTIO7_FLAGS_SEND_STATUS);
+	temp = be16_to_cpu(atio->u.isp24.fcp_hdr.ox_id);
+	ctio->u.status1.ox_id = cpu_to_le16(temp);
+	ctio->u.status1.scsi_status =
+	    cpu_to_le16(SS_RESPONSE_INFO_LEN_VALID | scsi_status);
+	ctio->u.status1.response_len = cpu_to_le16(18);
+	ctio->u.status1.residual = cpu_to_le32(get_datalen_for_atio(atio));
+
+	if (ctio->u.status1.residual != 0)
+		ctio->u.status1.scsi_status |=
+		    cpu_to_le16(SS_RESIDUAL_UNDER);
+
+	/* Response code and sense key */
+	put_unaligned_le32(((0x70 << 24) | (sense_key << 8)),
+	    (&ctio->u.status1.sense_data)[0]);
+	/* Additional sense length */
+	put_unaligned_le32(0x0a, (&ctio->u.status1.sense_data)[1]);
+	/* ASC and ASCQ */
+	put_unaligned_le32(((asc << 24) | (ascq << 16)),
+	    (&ctio->u.status1.sense_data)[3]);
+
+	/* Memory Barrier */
+	wmb();
+
+	qla2x00_start_iocbs(vha, vha->req);
+out:
+	return;
+}
+
 /* callback from target fabric module code */
 void qlt_xmit_tm_rsp(struct qla_tgt_mgmt_cmd *mcmd)
 {
@@ -2270,7 +2348,7 @@ static int qlt_24xx_build_ctio_pkt(struct qla_tgt_prm *prm,
 		 */
 		return -EAGAIN;
 	} else
-		ha->tgt.cmds[h-1] = prm->cmd;
+		ha->tgt.cmds[h - 1] = prm->cmd;
 
 	pkt->handle = h | CTIO_COMPLETION_HANDLE_MARK;
 	pkt->nport_handle = prm->cmd->loop_id;
@@ -2400,6 +2478,50 @@ static inline int qlt_has_data(struct qla_tgt_cmd *cmd)
 	return cmd->bufflen > 0;
 }
 
+static void qlt_print_dif_err(struct qla_tgt_prm *prm)
+{
+	struct qla_tgt_cmd *cmd;
+	struct scsi_qla_host *vha;
+
+	/* asc 0x10=dif error */
+	if (prm->sense_buffer && (prm->sense_buffer[12] == 0x10)) {
+		cmd = prm->cmd;
+		vha = cmd->vha;
+		/* ASCQ */
+		switch (prm->sense_buffer[13]) {
+		case 1:
+			ql_dbg(ql_dbg_tgt_dif, vha, 0xffff,
+			    "BE detected Guard TAG ERR: lba[0x%llx|%lld] len[0x%x] "
+			    "se_cmd=%p tag[%x]",
+			    cmd->lba, cmd->lba, cmd->num_blks, &cmd->se_cmd,
+			    cmd->atio.u.isp24.exchange_addr);
+			break;
+		case 2:
+			ql_dbg(ql_dbg_tgt_dif, vha, 0xffff,
+			    "BE detected APP TAG ERR: lba[0x%llx|%lld] len[0x%x] "
+			    "se_cmd=%p tag[%x]",
+			    cmd->lba, cmd->lba, cmd->num_blks, &cmd->se_cmd,
+			    cmd->atio.u.isp24.exchange_addr);
+			break;
+		case 3:
+			ql_dbg(ql_dbg_tgt_dif, vha, 0xffff,
+			    "BE detected REF TAG ERR: lba[0x%llx|%lld] len[0x%x] "
+			    "se_cmd=%p tag[%x]",
+			    cmd->lba, cmd->lba, cmd->num_blks, &cmd->se_cmd,
+			    cmd->atio.u.isp24.exchange_addr);
+			break;
+		default:
+			ql_dbg(ql_dbg_tgt_dif, vha, 0xffff,
+			    "BE detected Dif ERR: lba[%llx|%lld] len[%x] "
+			    "se_cmd=%p tag[%x]",
+			    cmd->lba, cmd->lba, cmd->num_blks, &cmd->se_cmd,
+			    cmd->atio.u.isp24.exchange_addr);
+			break;
+		}
+		ql_dump_buffer(ql_dbg_tgt_dif, vha, 0xffff, cmd->cdb, 16);
+	}
+}
+
 /*
  * Called without ha->hardware_lock held
  */
@@ -2521,18 +2643,9 @@ static void qlt_24xx_init_ctio_to_isp(struct ctio7_to_24xx *ctio,
 		for (i = 0; i < prm->sense_buffer_len/4; i++)
 			((uint32_t *)ctio->u.status1.sense_data)[i] =
 				cpu_to_be32(((uint32_t *)prm->sense_buffer)[i]);
-#if 0
-		if (unlikely((prm->sense_buffer_len % 4) != 0)) {
-			static int q;
-			if (q < 10) {
-				ql_dbg(ql_dbg_tgt, vha, 0xe04f,
-				    "qla_target(%d): %d bytes of sense "
-				    "lost", prm->tgt->ha->vp_idx,
-				    prm->sense_buffer_len % 4);
-				q++;
-			}
-		}
-#endif
+
+		qlt_print_dif_err(prm);
+
 	} else {
 		ctio->u.status1.flags &=
 		    ~cpu_to_le16(CTIO7_FLAGS_STATUS_MODE_0);
@@ -2546,19 +2659,9 @@ static void qlt_24xx_init_ctio_to_isp(struct ctio7_to_24xx *ctio,
 	/* Sense with len > 24, is it possible ??? */
 }
 
-
-
-/* diff  */
 static inline int
 qlt_hba_err_chk_enabled(struct se_cmd *se_cmd)
 {
-	/*
-	 * Uncomment when corresponding SCSI changes are done.
-	 *
-	 if (!sp->cmd->prot_chk)
-	 return 0;
-	 *
-	 */
 	switch (se_cmd->prot_op) {
 	case TARGET_PROT_DOUT_INSERT:
 	case TARGET_PROT_DIN_STRIP:
@@ -2579,16 +2682,38 @@ qlt_hba_err_chk_enabled(struct se_cmd *se_cmd)
 	return 0;
 }
 
+static inline int
+qla_tgt_ref_mask_check(struct se_cmd *se_cmd)
+{
+	switch (se_cmd->prot_op) {
+	case TARGET_PROT_DIN_INSERT:
+	case TARGET_PROT_DOUT_INSERT:
+	case TARGET_PROT_DIN_STRIP:
+	case TARGET_PROT_DOUT_STRIP:
+	case TARGET_PROT_DIN_PASS:
+	case TARGET_PROT_DOUT_PASS:
+	    return 1;
+	default:
+	    return 0;
+	}
+	return 0;
+}
+
 /*
- * qla24xx_set_t10dif_tags_from_cmd - Extract Ref and App tags from SCSI command
- *
+ * qla_tgt_set_dif_tags - Extract Ref and App tags from SCSI command
  */
-static inline void
-qlt_set_t10dif_tags(struct se_cmd *se_cmd, struct crc_context *ctx)
+static void
+qla_tgt_set_dif_tags(struct qla_tgt_cmd *cmd, struct crc_context *ctx,
+    uint16_t *pfw_prot_opts)
 {
+	struct se_cmd *se_cmd = &cmd->se_cmd;
 	uint32_t lba = 0xffffffff & se_cmd->t_task_lba;
+	scsi_qla_host_t *vha = cmd->tgt->vha;
+	struct qla_hw_data *ha = vha->hw;
+	uint32_t t32 = 0;
 
-	/* wait til Mode Sense/Select cmd, modepage Ah, subpage 2
+	/*
+	 * wait till Mode Sense/Select cmd, modepage Ah, subpage 2
 	 * have been immplemented by TCM, before AppTag is avail.
 	 * Look for modesense_handlers[]
 	 */
@@ -2596,65 +2721,73 @@ qlt_set_t10dif_tags(struct se_cmd *se_cmd, struct crc_context *ctx)
 	ctx->app_tag_mask[0] = 0x0;
 	ctx->app_tag_mask[1] = 0x0;
 
+	if (IS_PI_UNINIT_CAPABLE(ha)) {
+		if ((se_cmd->prot_type == TARGET_DIF_TYPE1_PROT) ||
+		    (se_cmd->prot_type == TARGET_DIF_TYPE2_PROT))
+			*pfw_prot_opts |= PO_DIS_VALD_APP_ESC;
+		else if (se_cmd->prot_type == TARGET_DIF_TYPE3_PROT)
+			*pfw_prot_opts |= PO_DIS_VALD_APP_REF_ESC;
+	}
+
+	t32 = ha->tgt.tgt_ops->get_dif_tags(cmd, pfw_prot_opts);
+
 	switch (se_cmd->prot_type) {
 	case TARGET_DIF_TYPE0_PROT:
 		/*
-		 * No check for ql2xenablehba_err_chk, as it would be an
-		 * I/O error if hba tag generation is not done.
+		 * No check for ql2xenablehba_err_chk, as it
+		 * would be an I/O error if hba tag generation
+		 * is not done.
 		 */
 		ctx->ref_tag = cpu_to_le32(lba);
-
-		if (!qlt_hba_err_chk_enabled(se_cmd))
-			break;
-
 		/* enable ALL bytes of the ref tag */
 		ctx->ref_tag_mask[0] = 0xff;
 		ctx->ref_tag_mask[1] = 0xff;
 		ctx->ref_tag_mask[2] = 0xff;
 		ctx->ref_tag_mask[3] = 0xff;
 		break;
-	/*
-	 * For TYpe 1 protection: 16 bit GUARD tag, 32 bit REF tag, and
-	 * 16 bit app tag.
-	 */
 	case TARGET_DIF_TYPE1_PROT:
-		ctx->ref_tag = cpu_to_le32(lba);
-
-		if (!qlt_hba_err_chk_enabled(se_cmd))
-			break;
-
-		/* enable ALL bytes of the ref tag */
-		ctx->ref_tag_mask[0] = 0xff;
-		ctx->ref_tag_mask[1] = 0xff;
-		ctx->ref_tag_mask[2] = 0xff;
-		ctx->ref_tag_mask[3] = 0xff;
-		break;
-	/*
-	 * For TYPE 2 protection: 16 bit GUARD + 32 bit REF tag has to
-	 * match LBA in CDB + N
-	 */
+	    /*
+	     * For TYPE 1 protection: 16 bit GUARD tag, 32 bit
+	     * REF tag, and 16 bit app tag.
+	     */
+	    ctx->ref_tag = cpu_to_le32(lba);
+	    if (!qla_tgt_ref_mask_check(se_cmd) ||
+		!(ha->tgt.tgt_ops->chk_dif_tags(t32))) {
+		    *pfw_prot_opts |= PO_DIS_REF_TAG_VALD;
+		    break;
+	    }
+	    /* enable ALL bytes of the ref tag */
+	    ctx->ref_tag_mask[0] = 0xff;
+	    ctx->ref_tag_mask[1] = 0xff;
+	    ctx->ref_tag_mask[2] = 0xff;
+	    ctx->ref_tag_mask[3] = 0xff;
+	    break;
 	case TARGET_DIF_TYPE2_PROT:
-		ctx->ref_tag = cpu_to_le32(lba);
-
-		if (!qlt_hba_err_chk_enabled(se_cmd))
-			break;
-
-		/* enable ALL bytes of the ref tag */
-		ctx->ref_tag_mask[0] = 0xff;
-		ctx->ref_tag_mask[1] = 0xff;
-		ctx->ref_tag_mask[2] = 0xff;
-		ctx->ref_tag_mask[3] = 0xff;
-		break;
-
-	/* For Type 3 protection: 16 bit GUARD only */
+	    /*
+	     * For TYPE 2 protection: 16 bit GUARD + 32 bit REF
+	     * tag has to match LBA in CDB + N
+	     */
+	    ctx->ref_tag = cpu_to_le32(lba);
+	    if (!qla_tgt_ref_mask_check(se_cmd) ||
+		!(ha->tgt.tgt_ops->chk_dif_tags(t32))) {
+		    *pfw_prot_opts |= PO_DIS_REF_TAG_VALD;
+		    break;
+	    }
+	    /* enable ALL bytes of the ref tag */
+	    ctx->ref_tag_mask[0] = 0xff;
+	    ctx->ref_tag_mask[1] = 0xff;
+	    ctx->ref_tag_mask[2] = 0xff;
+	    ctx->ref_tag_mask[3] = 0xff;
+	    break;
 	case TARGET_DIF_TYPE3_PROT:
-		ctx->ref_tag_mask[0] = ctx->ref_tag_mask[1] =
-			ctx->ref_tag_mask[2] = ctx->ref_tag_mask[3] = 0x00;
-		break;
+	    /* For TYPE 3 protection: 16 bit GUARD only */
+	    *pfw_prot_opts |= PO_DIS_REF_TAG_VALD;
+	    ctx->ref_tag_mask[0] = ctx->ref_tag_mask[1] =
+		ctx->ref_tag_mask[2] = ctx->ref_tag_mask[3] = 0x00;
+	    break;
 	}
 }
 
-
 static inline int
 qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha)
 {
@@ -2673,6 +2806,7 @@ qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha)
 	struct se_cmd		*se_cmd = &cmd->se_cmd;
 	uint32_t h;
 	struct atio_from_isp *atio = &prm->cmd->atio;
+	struct qla_tc_param	tc;
 	uint16_t t16;
 
 	ha = vha->hw;
@@ -2698,16 +2832,15 @@ qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha)
 	case TARGET_PROT_DIN_INSERT:
 	case TARGET_PROT_DOUT_STRIP:
 		transfer_length = data_bytes;
-		data_bytes += dif_bytes;
+		if (cmd->prot_sg_cnt)
+			data_bytes += dif_bytes;
 		break;
-
 	case TARGET_PROT_DIN_STRIP:
 	case TARGET_PROT_DOUT_INSERT:
 	case TARGET_PROT_DIN_PASS:
 	case TARGET_PROT_DOUT_PASS:
 		transfer_length = data_bytes + dif_bytes;
 		break;
-
 	default:
 		BUG();
 		break;
@@ -2743,7 +2876,6 @@ qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha)
 		break;
 	}
 
-
 	/* ---- PKT ---- */
 	/* Update entry type to indicate Command Type CRC_2 IOCB */
 	pkt->entry_type  = CTIO_CRC2;
@@ -2761,9 +2893,8 @@ qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha)
 	} else
 		ha->tgt.cmds[h-1] = prm->cmd;
 
-
 	pkt->handle  = h | CTIO_COMPLETION_HANDLE_MARK;
-	pkt->nport_handle = prm->cmd->loop_id;
+	pkt->nport_handle = cpu_to_le16(prm->cmd->loop_id);
 	pkt->timeout = cpu_to_le16(QLA_TGT_TIMEOUT);
 	pkt->initiator_id[0] = atio->u.isp24.fcp_hdr.s_id[2];
 	pkt->initiator_id[1] = atio->u.isp24.fcp_hdr.s_id[1];
@@ -2784,12 +2915,10 @@ qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha)
 	else if (cmd->dma_data_direction == DMA_FROM_DEVICE)
 		pkt->flags = cpu_to_le16(CTIO7_FLAGS_DATA_OUT);
 
-
 	pkt->dseg_count = prm->tot_dsds;
 	/* Fibre channel byte count */
 	pkt->transfer_length = cpu_to_le32(transfer_length);
 
-
 	/* ----- CRC context -------- */
 
 	/* Allocate CRC context from global pool */
@@ -2809,13 +2938,12 @@ qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha)
 	/* Set handle */
 	crc_ctx_pkt->handle = pkt->handle;
 
-	qlt_set_t10dif_tags(se_cmd, crc_ctx_pkt);
+	qla_tgt_set_dif_tags(cmd, crc_ctx_pkt, &fw_prot_opts);
 
 	pkt->crc_context_address[0] = cpu_to_le32(LSD(crc_ctx_dma));
 	pkt->crc_context_address[1] = cpu_to_le32(MSD(crc_ctx_dma));
 	pkt->crc_context_len = CRC_CONTEXT_LEN_FW;
 
-
 	if (!bundling) {
 		cur_dsd = (uint32_t *) &crc_ctx_pkt->u.nobundling.data_address;
 	} else {
@@ -2836,16 +2964,24 @@ qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha)
 	crc_ctx_pkt->byte_count = cpu_to_le32(data_bytes);
 	crc_ctx_pkt->guard_seed = cpu_to_le16(0);
 
+	memset((uint8_t *)&tc, 0 , sizeof(tc));
+	tc.vha = vha;
+	tc.blk_sz = cmd->blk_sz;
+	tc.bufflen = cmd->bufflen;
+	tc.sg = cmd->sg;
+	tc.prot_sg = cmd->prot_sg;
+	tc.ctx = crc_ctx_pkt;
+	tc.ctx_dsd_alloced = &cmd->ctx_dsd_alloced;
 
 	/* Walks data segments */
 	pkt->flags |= cpu_to_le16(CTIO7_FLAGS_DSD_PTR);
 
 	if (!bundling && prm->prot_seg_cnt) {
 		if (qla24xx_walk_and_build_sglist_no_difb(ha, NULL, cur_dsd,
-			prm->tot_dsds, cmd))
+			prm->tot_dsds, &tc))
 			goto crc_queuing_error;
 	} else if (qla24xx_walk_and_build_sglist(ha, NULL, cur_dsd,
-		(prm->tot_dsds - prm->prot_seg_cnt), cmd))
+		(prm->tot_dsds - prm->prot_seg_cnt), &tc))
 		goto crc_queuing_error;
 
 	if (bundling && prm->prot_seg_cnt) {
@@ -2854,18 +2990,18 @@ qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha)
 
 		cur_dsd = (uint32_t *) &crc_ctx_pkt->u.bundling.dif_address;
 		if (qla24xx_walk_and_build_prot_sglist(ha, NULL, cur_dsd,
-			prm->prot_seg_cnt, cmd))
+			prm->prot_seg_cnt, &tc))
 			goto crc_queuing_error;
 	}
 	return QLA_SUCCESS;
 
 crc_queuing_error:
 	/* Cleanup will be performed by the caller */
+	vha->hw->tgt.cmds[h - 1] = NULL;
 
 	return QLA_FUNCTION_FAILED;
 }
 
-
 /*
  * Callback to setup response of xmit_type of QLA_TGT_XMIT_DATA and *
  * QLA_TGT_XMIT_STATUS for >= 24xx silicon
@@ -3113,139 +3249,113 @@ EXPORT_SYMBOL(qlt_rdy_to_xfer);
 
 
 /*
- * Checks the guard or meta-data for the type of error
- * detected by the HBA.
+ * it is assumed either hardware_lock or qpair lock is held.
  */
-static inline int
+static void
 qlt_handle_dif_error(struct scsi_qla_host *vha, struct qla_tgt_cmd *cmd,
-		struct ctio_crc_from_fw *sts)
+	struct ctio_crc_from_fw *sts)
 {
 	uint8_t		*ap = &sts->actual_dif[0];
 	uint8_t		*ep = &sts->expected_dif[0];
-	uint32_t	e_ref_tag, a_ref_tag;
-	uint16_t	e_app_tag, a_app_tag;
-	uint16_t	e_guard, a_guard;
 	uint64_t	lba = cmd->se_cmd.t_task_lba;
+	uint8_t scsi_status, sense_key, asc, ascq;
+	unsigned long flags;
 
-	a_guard   = be16_to_cpu(*(uint16_t *)(ap + 0));
-	a_app_tag = be16_to_cpu(*(uint16_t *)(ap + 2));
-	a_ref_tag = be32_to_cpu(*(uint32_t *)(ap + 4));
-
-	e_guard   = be16_to_cpu(*(uint16_t *)(ep + 0));
-	e_app_tag = be16_to_cpu(*(uint16_t *)(ep + 2));
-	e_ref_tag = be32_to_cpu(*(uint32_t *)(ep + 4));
-
-	ql_dbg(ql_dbg_tgt, vha, 0xe075,
-	    "iocb(s) %p Returned STATUS.\n", sts);
-
-	ql_dbg(ql_dbg_tgt, vha, 0xf075,
-	    "dif check TGT cdb 0x%x lba 0x%llx: [Actual|Expected] Ref Tag[0x%x|0x%x], App Tag [0x%x|0x%x], Guard [0x%x|0x%x]\n",
-	    cmd->atio.u.isp24.fcp_cmnd.cdb[0], lba,
-	    a_ref_tag, e_ref_tag, a_app_tag, e_app_tag, a_guard, e_guard);
-
-	/*
-	 * Ignore sector if:
-	 * For type     3: ref & app tag is all 'f's
-	 * For type 0,1,2: app tag is all 'f's
-	 */
-	if ((a_app_tag == 0xffff) &&
-	    ((cmd->se_cmd.prot_type != TARGET_DIF_TYPE3_PROT) ||
-	     (a_ref_tag == 0xffffffff))) {
-		uint32_t blocks_done;
-
-		/* 2TB boundary case covered automatically with this */
-		blocks_done = e_ref_tag - (uint32_t)lba + 1;
-		cmd->se_cmd.bad_sector = e_ref_tag;
-		cmd->se_cmd.pi_err = 0;
-		ql_dbg(ql_dbg_tgt, vha, 0xf074,
-			"need to return scsi good\n");
-
-		/* Update protection tag */
-		if (cmd->prot_sg_cnt) {
-			uint32_t i, k = 0, num_ent;
-			struct scatterlist *sg, *sgl;
-
+	cmd->trc_flags |= TRC_DIF_ERR;
 
-			sgl = cmd->prot_sg;
+	cmd->a_guard   = be16_to_cpu(*(uint16_t *)(ap + 0));
+	cmd->a_app_tag = be16_to_cpu(*(uint16_t *)(ap + 2));
+	cmd->a_ref_tag = be32_to_cpu(*(uint32_t *)(ap + 4));
 
-			/* Patch the corresponding protection tags */
-			for_each_sg(sgl, sg, cmd->prot_sg_cnt, i) {
-				num_ent = sg_dma_len(sg) / 8;
-				if (k + num_ent < blocks_done) {
-					k += num_ent;
-					continue;
-				}
-				k = blocks_done;
-				break;
-			}
+	cmd->e_guard   = be16_to_cpu(*(uint16_t *)(ep + 0));
+	cmd->e_app_tag = be16_to_cpu(*(uint16_t *)(ep + 2));
+	cmd->e_ref_tag = be32_to_cpu(*(uint32_t *)(ep + 4));
 
-			if (k != blocks_done) {
-				ql_log(ql_log_warn, vha, 0xf076,
-				    "unexpected tag values tag:lba=%u:%llu)\n",
-				    e_ref_tag, (unsigned long long)lba);
-				goto out;
-			}
+	ql_dbg(ql_dbg_tgt_dif, vha, 0xf075,
+	    "%s: aborted %d state %d\n", __func__, cmd->aborted, cmd->state);
 
-#if 0
-			struct sd_dif_tuple *spt;
-			/* TODO:
-			 * This section came from initiator. Is it valid here?
-			 * should ulp be override with actual val???
-			 */
-			spt = page_address(sg_page(sg)) + sg->offset;
-			spt += j;
+	scsi_status = sense_key = asc = ascq = 0;
 
-			spt->app_tag = 0xffff;
-			if (cmd->se_cmd.prot_type == SCSI_PROT_DIF_TYPE3)
-				spt->ref_tag = 0xffffffff;
-#endif
-		}
-
-		return 0;
-	}
-
-	/* check guard */
-	if (e_guard != a_guard) {
-		cmd->se_cmd.pi_err = TCM_LOGICAL_BLOCK_GUARD_CHECK_FAILED;
-		cmd->se_cmd.bad_sector = cmd->se_cmd.t_task_lba;
-
-		ql_log(ql_log_warn, vha, 0xe076,
-		    "Guard ERR: cdb 0x%x lba 0x%llx: [Actual|Expected] Ref Tag[0x%x|0x%x], App Tag [0x%x|0x%x], Guard [0x%x|0x%x] cmd=%p\n",
-		    cmd->atio.u.isp24.fcp_cmnd.cdb[0], lba,
-		    a_ref_tag, e_ref_tag, a_app_tag, e_app_tag,
-		    a_guard, e_guard, cmd);
-		goto out;
+	/* check appl tag */
+	if (cmd->e_app_tag != cmd->a_app_tag) {
+		ql_dbg(ql_dbg_tgt_dif, vha, 0xffff,
+			"App Tag ERR: cdb[%x] lba[%llx %llx] blks[%x] [Actual|Expected] "
+			"Ref[%x|%x], App[%x|%x], "
+			"Guard [%x|%x] cmd=%p ox_id[%04x]",
+			cmd->cdb[0], lba, (lba+cmd->num_blks), cmd->num_blks,
+			cmd->a_ref_tag, cmd->e_ref_tag,
+			cmd->a_app_tag, cmd->e_app_tag,
+			cmd->a_guard, cmd->e_guard,
+			cmd, cmd->atio.u.isp24.fcp_hdr.ox_id);
+
+		cmd->dif_err_code = DIF_ERR_APP;
+		scsi_status = SAM_STAT_CHECK_CONDITION;
+		sense_key = ABORTED_COMMAND;
+		asc = 0x10;
+		ascq = 0x2;
 	}
 
 	/* check ref tag */
-	if (e_ref_tag != a_ref_tag) {
-		cmd->se_cmd.pi_err = TCM_LOGICAL_BLOCK_REF_TAG_CHECK_FAILED;
-		cmd->se_cmd.bad_sector = e_ref_tag;
-
-		ql_log(ql_log_warn, vha, 0xe077,
-			"Ref Tag ERR: cdb 0x%x lba 0x%llx: [Actual|Expected] Ref Tag[0x%x|0x%x], App Tag [0x%x|0x%x], Guard [0x%x|0x%x] cmd=%p\n",
-			cmd->atio.u.isp24.fcp_cmnd.cdb[0], lba,
-			a_ref_tag, e_ref_tag, a_app_tag, e_app_tag,
-			a_guard, e_guard, cmd);
+	if (cmd->e_ref_tag != cmd->a_ref_tag) {
+		ql_dbg(ql_dbg_tgt_dif, vha, 0xffff,
+			"Ref Tag ERR: cdb[%x] lba[%llx %llx] blks[%x] [Actual|Expected] "
+			"Ref[%x|%x], App[%x|%x], "
+			"Guard[%x|%x] cmd=%p ox_id[%04x] ",
+			cmd->cdb[0], lba, (lba+cmd->num_blks), cmd->num_blks,
+			cmd->a_ref_tag, cmd->e_ref_tag,
+			cmd->a_app_tag, cmd->e_app_tag,
+			cmd->a_guard, cmd->e_guard,
+			cmd, cmd->atio.u.isp24.fcp_hdr.ox_id);
+
+		cmd->dif_err_code = DIF_ERR_REF;
+		scsi_status = SAM_STAT_CHECK_CONDITION;
+		sense_key = ABORTED_COMMAND;
+		asc = 0x10;
+		ascq = 0x3;
 		goto out;
 	}
 
-	/* check appl tag */
-	if (e_app_tag != a_app_tag) {
-		cmd->se_cmd.pi_err = TCM_LOGICAL_BLOCK_APP_TAG_CHECK_FAILED;
-		cmd->se_cmd.bad_sector = cmd->se_cmd.t_task_lba;
-
-		ql_log(ql_log_warn, vha, 0xe078,
-			"App Tag ERR: cdb 0x%x lba 0x%llx: [Actual|Expected] Ref Tag[0x%x|0x%x], App Tag [0x%x|0x%x], Guard [0x%x|0x%x] cmd=%p\n",
-			cmd->atio.u.isp24.fcp_cmnd.cdb[0], lba,
-			a_ref_tag, e_ref_tag, a_app_tag, e_app_tag,
-			a_guard, e_guard, cmd);
-		goto out;
+	/* check guard */
+	if (cmd->e_guard != cmd->a_guard) {
+		ql_dbg(ql_dbg_tgt_dif, vha, 0xffff,
+			"Guard ERR: cdb[%x] lba[%llx %llx] blks[%x] [Actual|Expected] "
+			"Ref[%x|%x], App[%x|%x], "
+			"Guard [%x|%x] cmd=%p ox_id[%04x]",
+			cmd->cdb[0], lba, (lba+cmd->num_blks), cmd->num_blks,
+			cmd->a_ref_tag, cmd->e_ref_tag,
+			cmd->a_app_tag, cmd->e_app_tag,
+			cmd->a_guard, cmd->e_guard,
+			cmd, cmd->atio.u.isp24.fcp_hdr.ox_id);
+		cmd->dif_err_code = DIF_ERR_GRD;
+		scsi_status = SAM_STAT_CHECK_CONDITION;
+		sense_key = ABORTED_COMMAND;
+		asc = 0x10;
+		ascq = 0x1;
 	}
 out:
-	return 1;
-}
+	switch (cmd->state) {
+	case QLA_TGT_STATE_NEED_DATA:
+		/* handle_data will load DIF error code  */
+		cmd->state = QLA_TGT_STATE_DATA_IN;
+		vha->hw->tgt.tgt_ops->handle_data(cmd);
+		break;
+	default:
+		spin_lock_irqsave(&cmd->cmd_lock, flags);
+		if (cmd->aborted) {
+			spin_unlock_irqrestore(&cmd->cmd_lock, flags);
+			vha->hw->tgt.tgt_ops->free_cmd(cmd);
+			break;
+		}
+		spin_unlock_irqrestore(&cmd->cmd_lock, flags);
 
+		qlt_send_resp_ctio(vha, cmd, scsi_status, sense_key, asc, ascq);
+		/* assume scsi status gets out on the wire.
+		 * Will not wait for completion.
+		 */
+		vha->hw->tgt.tgt_ops->free_cmd(cmd);
+		break;
+	}
+}
 
 /* If hardware_lock held on entry, might drop it, then reaquire */
 /* This function sends the appropriate CTIO to ISP 2xxx or 24xx */
@@ -3552,6 +3662,16 @@ static int qlt_term_ctio_exchange(struct scsi_qla_host *vha, void *ctio,
 {
 	int term = 0;
 
+	if (cmd->se_cmd.prot_op)
+		ql_dbg(ql_dbg_tgt_dif, vha, 0xffff,
+		    "Term DIF cmd: lba[0x%llx|%lld] len[0x%x] "
+		    "se_cmd=%p tag[%x] op %#x/%s",
+		     cmd->lba, cmd->lba,
+		     cmd->num_blks, &cmd->se_cmd,
+		     cmd->atio.u.isp24.exchange_addr,
+		     cmd->se_cmd.prot_op,
+		     prot_op_str(cmd->se_cmd.prot_op));
+
 	if (ctio != NULL) {
 		struct ctio7_from_24xx *c = (struct ctio7_from_24xx *)ctio;
 		term = !(c->flags &
@@ -3769,32 +3889,15 @@ static void qlt_do_ctio_completion(struct scsi_qla_host *vha, uint32_t handle,
 			struct ctio_crc_from_fw *crc =
 				(struct ctio_crc_from_fw *)ctio;
 			ql_dbg(ql_dbg_tgt_mgt, vha, 0xf073,
-			    "qla_target(%d): CTIO with DIF_ERROR status %x received (state %x, se_cmd %p) actual_dif[0x%llx] expect_dif[0x%llx]\n",
+			    "qla_target(%d): CTIO with DIF_ERROR status %x "
+			    "received (state %x, ulp_cmd %p) actual_dif[0x%llx] "
+			    "expect_dif[0x%llx]\n",
 			    vha->vp_idx, status, cmd->state, se_cmd,
 			    *((u64 *)&crc->actual_dif[0]),
 			    *((u64 *)&crc->expected_dif[0]));
 
-			if (qlt_handle_dif_error(vha, cmd, ctio)) {
-				if (cmd->state == QLA_TGT_STATE_NEED_DATA) {
-					/* scsi Write/xfer rdy complete */
-					goto skip_term;
-				} else {
-					/* scsi read/xmit respond complete
-					 * call handle dif to send scsi status
-					 * rather than terminate exchange.
-					 */
-					cmd->state = QLA_TGT_STATE_PROCESSED;
-					ha->tgt.tgt_ops->handle_dif_err(cmd);
-					return;
-				}
-			} else {
-				/* Need to generate a SCSI good completion.
-				 * because FW did not send scsi status.
-				 */
-				status = 0;
-				goto skip_term;
-			}
-			break;
+			qlt_handle_dif_error(vha, cmd, ctio);
+			return;
 		}
 		default:
 			ql_dbg(ql_dbg_tgt_mgt, vha, 0xf05b,
@@ -3817,7 +3920,6 @@ static void qlt_do_ctio_completion(struct scsi_qla_host *vha, uint32_t handle,
 				return;
 		}
 	}
-skip_term:
 
 	if (cmd->state == QLA_TGT_STATE_PROCESSED) {
 		cmd->trc_flags |= TRC_CTIO_DONE;
diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h
index a7f90dcaae37d3..c35f889b94a6ca 100644
--- a/drivers/scsi/qla2xxx/qla_target.h
+++ b/drivers/scsi/qla2xxx/qla_target.h
@@ -378,6 +378,14 @@ static inline void adjust_corrupted_atio(struct atio_from_isp *atio)
 	atio->u.isp24.fcp_cmnd.add_cdb_len = 0;
 }
 
+static inline int get_datalen_for_atio(struct atio_from_isp *atio)
+{
+	int len = atio->u.isp24.fcp_cmnd.add_cdb_len;
+
+	return (be32_to_cpu(get_unaligned((uint32_t *)
+	    &atio->u.isp24.fcp_cmnd.add_cdb[len * 4])));
+}
+
 #define CTIO_TYPE7 0x12 /* Continue target I/O entry (for 24xx) */
 
 /*
@@ -667,7 +675,6 @@ struct qla_tgt_func_tmpl {
 	int (*handle_cmd)(struct scsi_qla_host *, struct qla_tgt_cmd *,
 			unsigned char *, uint32_t, int, int, int);
 	void (*handle_data)(struct qla_tgt_cmd *);
-	void (*handle_dif_err)(struct qla_tgt_cmd *);
 	int (*handle_tmr)(struct qla_tgt_mgmt_cmd *, uint32_t, uint16_t,
 			uint32_t);
 	void (*free_cmd)(struct qla_tgt_cmd *);
@@ -684,6 +691,8 @@ struct qla_tgt_func_tmpl {
 	void (*clear_nacl_from_fcport_map)(struct fc_port *);
 	void (*put_sess)(struct fc_port *);
 	void (*shutdown_sess)(struct fc_port *);
+	int (*get_dif_tags)(struct qla_tgt_cmd *cmd, uint16_t *pfw_prot_opts);
+	int (*chk_dif_tags)(uint32_t tag);
 };
 
 int qla2x00_wait_for_hba_online(struct scsi_qla_host *);
@@ -720,8 +729,8 @@ int qla2x00_wait_for_hba_online(struct scsi_qla_host *);
 #define QLA_TGT_ABORT_ALL               0xFFFE
 #define QLA_TGT_NEXUS_LOSS_SESS         0xFFFD
 #define QLA_TGT_NEXUS_LOSS              0xFFFC
-#define QLA_TGT_ABTS					0xFFFB
-#define QLA_TGT_2G_ABORT_TASK			0xFFFA
+#define QLA_TGT_ABTS			0xFFFB
+#define QLA_TGT_2G_ABORT_TASK		0xFFFA
 
 /* Notify Acknowledge flags */
 #define NOTIFY_ACK_RES_COUNT        BIT_8
@@ -845,6 +854,7 @@ enum trace_flags {
 	TRC_CMD_FREE = BIT_17,
 	TRC_DATA_IN = BIT_18,
 	TRC_ABORT = BIT_19,
+	TRC_DIF_ERR = BIT_20,
 };
 
 struct qla_tgt_cmd {
@@ -862,7 +872,6 @@ struct qla_tgt_cmd {
 	unsigned int sg_mapped:1;
 	unsigned int free_sg:1;
 	unsigned int write_data_transferred:1;
-	unsigned int ctx_dsd_alloced:1;
 	unsigned int q_full:1;
 	unsigned int term_exchg:1;
 	unsigned int cmd_sent_to_fw:1;
@@ -885,11 +894,25 @@ struct qla_tgt_cmd {
 	struct list_head cmd_list;
 
 	struct atio_from_isp atio;
-	/* t10dif */
+
+	uint8_t ctx_dsd_alloced;
+
+	/* T10-DIF */
+#define DIF_ERR_NONE 0
+#define DIF_ERR_GRD 1
+#define DIF_ERR_REF 2
+#define DIF_ERR_APP 3
+	int8_t dif_err_code;
 	struct scatterlist *prot_sg;
 	uint32_t prot_sg_cnt;
-	uint32_t blk_sz;
+	uint32_t blk_sz, num_blks;
+	uint8_t scsi_status, sense_key, asc, ascq;
+
 	struct crc_context *ctx;
+	uint8_t		*cdb;
+	uint64_t	lba;
+	uint16_t	a_guard, e_guard, a_app_tag, e_app_tag;
+	uint32_t	a_ref_tag, e_ref_tag;
 
 	uint64_t jiffies_at_alloc;
 	uint64_t jiffies_at_free;
@@ -1053,4 +1076,7 @@ extern int qlt_free_qfull_cmds(struct scsi_qla_host *);
 extern void qlt_logo_completion_handler(fc_port_t *, int);
 extern void qlt_do_generation_tick(struct scsi_qla_host *, int *);
 
+void qlt_send_resp_ctio(scsi_qla_host_t *, struct qla_tgt_cmd *, uint8_t,
+    uint8_t, uint8_t, uint8_t);
+
 #endif /* __QLA_TARGET_H */
diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
index 8e8ab0fa9672a6..7443e4efa3aed4 100644
--- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c
+++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
@@ -531,6 +531,24 @@ static void tcm_qla2xxx_handle_data_work(struct work_struct *work)
 			return;
 		}
 
+		switch (cmd->dif_err_code) {
+		case DIF_ERR_GRD:
+			cmd->se_cmd.pi_err =
+			    TCM_LOGICAL_BLOCK_GUARD_CHECK_FAILED;
+			break;
+		case DIF_ERR_REF:
+			cmd->se_cmd.pi_err =
+			    TCM_LOGICAL_BLOCK_REF_TAG_CHECK_FAILED;
+			break;
+		case DIF_ERR_APP:
+			cmd->se_cmd.pi_err =
+			    TCM_LOGICAL_BLOCK_APP_TAG_CHECK_FAILED;
+			break;
+		case DIF_ERR_NONE:
+		default:
+			break;
+		}
+
 		if (cmd->se_cmd.pi_err)
 			transport_generic_request_failure(&cmd->se_cmd,
 				cmd->se_cmd.pi_err);
@@ -555,25 +573,23 @@ static void tcm_qla2xxx_handle_data(struct qla_tgt_cmd *cmd)
 	queue_work_on(smp_processor_id(), tcm_qla2xxx_free_wq, &cmd->work);
 }
 
-static void tcm_qla2xxx_handle_dif_work(struct work_struct *work)
+static int tcm_qla2xxx_chk_dif_tags(uint32_t tag)
 {
-	struct qla_tgt_cmd *cmd = container_of(work, struct qla_tgt_cmd, work);
-
-	/* take an extra kref to prevent cmd free too early.
-	 * need to wait for SCSI status/check condition to
-	 * finish responding generate by transport_generic_request_failure.
-	 */
-	kref_get(&cmd->se_cmd.cmd_kref);
-	transport_generic_request_failure(&cmd->se_cmd, cmd->se_cmd.pi_err);
+	return 0;
 }
 
-/*
- * Called from qla_target.c:qlt_do_ctio_completion()
- */
-static void tcm_qla2xxx_handle_dif_err(struct qla_tgt_cmd *cmd)
+static int tcm_qla2xxx_dif_tags(struct qla_tgt_cmd *cmd,
+    uint16_t *pfw_prot_opts)
 {
-	INIT_WORK(&cmd->work, tcm_qla2xxx_handle_dif_work);
-	queue_work(tcm_qla2xxx_free_wq, &cmd->work);
+	struct se_cmd *se_cmd = &cmd->se_cmd;
+
+	if (!(se_cmd->prot_checks & TARGET_DIF_CHECK_GUARD))
+		*pfw_prot_opts |= PO_DISABLE_GUARD_CHECK;
+
+	if (!(se_cmd->prot_checks & TARGET_DIF_CHECK_APPTAG))
+		*pfw_prot_opts |= PO_DIS_APP_TAG_VALD;
+
+	return 0;
 }
 
 /*
@@ -1610,7 +1626,6 @@ static void tcm_qla2xxx_update_sess(struct fc_port *sess, port_id_t s_id,
 static struct qla_tgt_func_tmpl tcm_qla2xxx_template = {
 	.handle_cmd		= tcm_qla2xxx_handle_cmd,
 	.handle_data		= tcm_qla2xxx_handle_data,
-	.handle_dif_err		= tcm_qla2xxx_handle_dif_err,
 	.handle_tmr		= tcm_qla2xxx_handle_tmr,
 	.free_cmd		= tcm_qla2xxx_free_cmd,
 	.free_mcmd		= tcm_qla2xxx_free_mcmd,
@@ -1622,6 +1637,8 @@ static struct qla_tgt_func_tmpl tcm_qla2xxx_template = {
 	.clear_nacl_from_fcport_map = tcm_qla2xxx_clear_nacl_from_fcport_map,
 	.put_sess		= tcm_qla2xxx_put_sess,
 	.shutdown_sess		= tcm_qla2xxx_shutdown_sess,
+	.get_dif_tags		= tcm_qla2xxx_dif_tags,
+	.chk_dif_tags		= tcm_qla2xxx_chk_dif_tags,
 };
 
 static int tcm_qla2xxx_init_lport(struct tcm_qla2xxx_lport *lport)

From 54b9993c8cf2d77c0f23be828a22e0817f742442 Mon Sep 17 00:00:00 2001
From: Anil Gurumurthy <anil.gurumurthy@cavium.com>
Date: Wed, 15 Mar 2017 09:48:50 -0700
Subject: [PATCH 1482/1911] qla2xxx: Export DIF stats via debugfs

Signed-off-by: Anil Gurumurthy <anil.gurumurthy@cavium.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/qla2xxx/qla_def.h | 12 ++++++++++++
 drivers/scsi/qla2xxx/qla_dfs.c | 15 +++++++++++++++
 2 files changed, 27 insertions(+)

diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index 9d1d3dcf1c878a..8228dfac6a3150 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -3108,6 +3108,16 @@ struct qla_chip_state_84xx {
 	uint32_t gold_fw_version;
 };
 
+struct qla_dif_statistics {
+	uint64_t dif_input_bytes;
+	uint64_t dif_output_bytes;
+	uint64_t dif_input_requests;
+	uint64_t dif_output_requests;
+	uint32_t dif_guard_err;
+	uint32_t dif_ref_tag_err;
+	uint32_t dif_app_tag_err;
+};
+
 struct qla_statistics {
 	uint32_t total_isp_aborts;
 	uint64_t input_bytes;
@@ -3120,6 +3130,8 @@ struct qla_statistics {
 	uint32_t stat_max_pend_cmds;
 	uint32_t stat_max_qfull_cmds_alloc;
 	uint32_t stat_max_qfull_cmds_dropped;
+
+	struct qla_dif_statistics qla_dif_stats;
 };
 
 struct bidi_statistics {
diff --git a/drivers/scsi/qla2xxx/qla_dfs.c b/drivers/scsi/qla2xxx/qla_dfs.c
index b48cce696bac77..3b35905619b046 100644
--- a/drivers/scsi/qla2xxx/qla_dfs.c
+++ b/drivers/scsi/qla2xxx/qla_dfs.c
@@ -114,6 +114,21 @@ qla_dfs_tgt_counters_show(struct seq_file *s, void *unused)
 	seq_printf(s, "num Q full sent = %lld\n",
 		vha->tgt_counters.num_q_full_sent);
 
+	/* DIF stats */
+	seq_printf(s, "DIF Inp Bytes = %lld\n",
+		vha->qla_stats.qla_dif_stats.dif_input_bytes);
+	seq_printf(s, "DIF Outp Bytes = %lld\n",
+		vha->qla_stats.qla_dif_stats.dif_output_bytes);
+	seq_printf(s, "DIF Inp Req = %lld\n",
+		vha->qla_stats.qla_dif_stats.dif_input_requests);
+	seq_printf(s, "DIF Outp Req = %lld\n",
+		vha->qla_stats.qla_dif_stats.dif_output_requests);
+	seq_printf(s, "DIF Guard err = %d\n",
+		vha->qla_stats.qla_dif_stats.dif_guard_err);
+	seq_printf(s, "DIF Ref tag err = %d\n",
+		vha->qla_stats.qla_dif_stats.dif_ref_tag_err);
+	seq_printf(s, "DIF App tag err = %d\n",
+		vha->qla_stats.qla_dif_stats.dif_app_tag_err);
 	return 0;
 }
 

From f1443eebca7792b3b8b41b27652d67ddc5d31fa2 Mon Sep 17 00:00:00 2001
From: Quinn Tran <quinn.tran@cavium.com>
Date: Wed, 15 Mar 2017 09:48:51 -0700
Subject: [PATCH 1483/1911] qla2xxx: Add async new target notification

Signed-off-by: Quinn Tran <quinn.tran@cavium.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/qla2xxx/qla_target.c | 6 +++---
 drivers/scsi/qla2xxx/qla_target.h | 1 +
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index 532004981dbd0a..563116188c439b 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -6005,13 +6005,13 @@ int qlt_add_target(struct qla_hw_data *ha, struct scsi_qla_host *base_vha)
 	tgt->datasegs_per_cmd = QLA_TGT_DATASEGS_PER_CMD_24XX;
 	tgt->datasegs_per_cont = QLA_TGT_DATASEGS_PER_CONT_24XX;
 
-	if (base_vha->fc_vport)
-		return 0;
-
 	mutex_lock(&qla_tgt_mutex);
 	list_add_tail(&tgt->tgt_list_entry, &qla_tgt_glist);
 	mutex_unlock(&qla_tgt_mutex);
 
+	if (ha->tgt.tgt_ops && ha->tgt.tgt_ops->add_target)
+		ha->tgt.tgt_ops->add_target(base_vha);
+
 	return 0;
 }
 
diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h
index c35f889b94a6ca..d64420251194eb 100644
--- a/drivers/scsi/qla2xxx/qla_target.h
+++ b/drivers/scsi/qla2xxx/qla_target.h
@@ -693,6 +693,7 @@ struct qla_tgt_func_tmpl {
 	void (*shutdown_sess)(struct fc_port *);
 	int (*get_dif_tags)(struct qla_tgt_cmd *cmd, uint16_t *pfw_prot_opts);
 	int (*chk_dif_tags)(uint32_t tag);
+	void (*add_target)(struct scsi_qla_host *);
 };
 
 int qla2x00_wait_for_hba_online(struct scsi_qla_host *);

From 15f30a5752287f20c7de428423c34bc51cfbe465 Mon Sep 17 00:00:00 2001
From: Quinn Tran <quinn.tran@cavium.com>
Date: Wed, 15 Mar 2017 09:48:52 -0700
Subject: [PATCH 1484/1911] qla2xxx: Use IOCB interface to submit non-critical
 MBX.

The Mailbox interface is currently over subscribed. We like
to reserve the Mailbox interface for the chip managment and
link initialization. Any non essential Mailbox command will
be routed through the IOCB interface. The IOCB interface is
able to absorb more commands.

Following commands are being routed through IOCB interface

- Get ID List (007Ch)
- Get Port DB (0064h)
- Get Link Priv Stats (006Dh)

Signed-off-by: Quinn Tran <quinn.tran@cavium.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/qla2xxx/qla_def.h    |  12 +-
 drivers/scsi/qla2xxx/qla_gbl.h    |  10 +-
 drivers/scsi/qla2xxx/qla_init.c   |  46 +----
 drivers/scsi/qla2xxx/qla_isr.c    |   2 +-
 drivers/scsi/qla2xxx/qla_mbx.c    | 270 ++++++++++++++++++++++++++++--
 drivers/scsi/qla2xxx/qla_target.c |   4 +-
 6 files changed, 279 insertions(+), 65 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index 8228dfac6a3150..ae38b7a789b113 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -395,11 +395,15 @@ struct srb_iocb {
 			struct completion comp;
 		} abt;
 		struct ct_arg ctarg;
+#define MAX_IOCB_MB_REG 28
+#define SIZEOF_IOCB_MB_REG (MAX_IOCB_MB_REG * sizeof(uint16_t))
 		struct {
-			__le16 in_mb[28]; 	/* fr fw */
-			__le16 out_mb[28];	/* to fw */
+			__le16 in_mb[MAX_IOCB_MB_REG];	/* from FW */
+			__le16 out_mb[MAX_IOCB_MB_REG];	/* to FW */
 			void *out, *in;
 			dma_addr_t out_dma, in_dma;
+			struct completion comp;
+			int rc;
 		} mbx;
 		struct {
 			struct imm_ntfy_from_isp *ntfy;
@@ -437,7 +441,7 @@ typedef struct srb {
 	uint32_t handle;
 	uint16_t flags;
 	uint16_t type;
-	char *name;
+	const char *name;
 	int iocbs;
 	struct qla_qpair *qpair;
 	u32 gen1;	/* scratch */
@@ -3364,6 +3368,8 @@ struct qla_hw_data {
 		uint32_t	exlogins_enabled:1;
 		uint32_t	exchoffld_enabled:1;
 		/* 35 bits */
+
+		uint32_t	fw_started:1;
 	} flags;
 
 	/* This spinlock is used to protect "io transactions", you must
diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h
index ca6f122e58656d..323b912b47f741 100644
--- a/drivers/scsi/qla2xxx/qla_gbl.h
+++ b/drivers/scsi/qla2xxx/qla_gbl.h
@@ -193,6 +193,7 @@ extern int qla24xx_post_upd_fcport_work(struct scsi_qla_host *, fc_port_t *);
 void qla2x00_handle_login_done_event(struct scsi_qla_host *, fc_port_t *,
 	uint16_t *);
 int qla24xx_post_gnl_work(struct scsi_qla_host *, fc_port_t *);
+int qla24xx_async_abort_cmd(srb_t *);
 
 /*
  * Global Functions in qla_mid.c source file.
@@ -368,7 +369,7 @@ qla2x00_get_link_status(scsi_qla_host_t *, uint16_t, struct link_statistics *,
 
 extern int
 qla24xx_get_isp_stats(scsi_qla_host_t *, struct link_statistics *,
-    dma_addr_t, uint);
+    dma_addr_t, uint16_t);
 
 extern int qla24xx_abort_command(srb_t *);
 extern int qla24xx_async_abort_command(srb_t *);
@@ -472,6 +473,13 @@ qla2x00_dump_mctp_data(scsi_qla_host_t *, dma_addr_t, uint32_t, uint32_t);
 extern int
 qla26xx_dport_diagnostics(scsi_qla_host_t *, void *, uint, uint);
 
+int qla24xx_send_mb_cmd(struct scsi_qla_host *, mbx_cmd_t *);
+int qla24xx_gpdb_wait(struct scsi_qla_host *, fc_port_t *, u8);
+int qla24xx_gidlist_wait(struct scsi_qla_host *, void *, dma_addr_t,
+    uint16_t *);
+int __qla24xx_parse_gpdb(struct scsi_qla_host *, fc_port_t *,
+	struct port_database_24xx *);
+
 /*
  * Global Function Prototypes in qla_isr.c source file.
  */
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index a7865a5d556d9d..b1bfa63f7d4ead 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -629,7 +629,6 @@ void qla24xx_async_gpdb_sp_done(void *s, int res)
 	struct srb *sp = s;
 	struct scsi_qla_host *vha = sp->vha;
 	struct qla_hw_data *ha = vha->hw;
-	uint64_t zero = 0;
 	struct port_database_24xx *pd;
 	fc_port_t *fcport = sp->fcport;
 	u16 *mb = sp->u.iocb_cmd.u.mbx.in_mb;
@@ -649,48 +648,7 @@ void qla24xx_async_gpdb_sp_done(void *s, int res)
 
 	pd = (struct port_database_24xx *)sp->u.iocb_cmd.u.mbx.in;
 
-	/* Check for logged in state. */
-	if (pd->current_login_state != PDS_PRLI_COMPLETE &&
-	    pd->last_login_state != PDS_PRLI_COMPLETE) {
-		ql_dbg(ql_dbg_mbx, vha, 0xffff,
-		    "Unable to verify login-state (%x/%x) for "
-		    "loop_id %x.\n", pd->current_login_state,
-		    pd->last_login_state, fcport->loop_id);
-		rval = QLA_FUNCTION_FAILED;
-		goto gpd_error_out;
-	}
-
-	if (fcport->loop_id == FC_NO_LOOP_ID ||
-	    (memcmp(fcport->port_name, (uint8_t *)&zero, 8) &&
-		memcmp(fcport->port_name, pd->port_name, 8))) {
-		/* We lost the device mid way. */
-		rval = QLA_NOT_LOGGED_IN;
-		goto gpd_error_out;
-	}
-
-	/* Names are little-endian. */
-	memcpy(fcport->node_name, pd->node_name, WWN_SIZE);
-
-	/* Get port_id of device. */
-	fcport->d_id.b.domain = pd->port_id[0];
-	fcport->d_id.b.area = pd->port_id[1];
-	fcport->d_id.b.al_pa = pd->port_id[2];
-	fcport->d_id.b.rsvd_1 = 0;
-
-	/* If not target must be initiator or unknown type. */
-	if ((pd->prli_svc_param_word_3[0] & BIT_4) == 0)
-		fcport->port_type = FCT_INITIATOR;
-	else
-		fcport->port_type = FCT_TARGET;
-
-	/* Passback COS information. */
-	fcport->supported_classes = (pd->flags & PDF_CLASS_2) ?
-		FC_COS_CLASS2 : FC_COS_CLASS3;
-
-	if (pd->prli_svc_param_word_3[0] & BIT_7) {
-		fcport->flags |= FCF_CONF_COMP_SUPPORTED;
-		fcport->conf_compl_supported = 1;
-	}
+	rval = __qla24xx_parse_gpdb(vha, fcport, pd);
 
 gpd_error_out:
 	memset(&ea, 0, sizeof(ea));
@@ -1266,7 +1224,7 @@ qla24xx_abort_sp_done(void *ptr, int res)
 	complete(&abt->u.abt.comp);
 }
 
-static int
+int
 qla24xx_async_abort_cmd(srb_t *cmd_sp)
 {
 	scsi_qla_host_t *vha = cmd_sp->vha;
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index b2c6da752edd2d..3953c8d6af69b3 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -2692,7 +2692,7 @@ qla24xx_abort_iocb_entry(scsi_qla_host_t *vha, struct req_que *req,
 		return;
 
 	abt = &sp->u.iocb_cmd;
-	abt->u.abt.comp_status = le32_to_cpu(pkt->nport_handle);
+	abt->u.abt.comp_status = le16_to_cpu(pkt->nport_handle);
 	sp->done(sp, 0);
 }
 
diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c
index 35079f41741799..e40ed570d3c120 100644
--- a/drivers/scsi/qla2xxx/qla_mbx.c
+++ b/drivers/scsi/qla2xxx/qla_mbx.c
@@ -10,6 +10,28 @@
 #include <linux/delay.h>
 #include <linux/gfp.h>
 
+static struct mb_cmd_name {
+	uint16_t cmd;
+	const char *str;
+} mb_str[] = {
+	{MBC_GET_PORT_DATABASE,		"GPDB"},
+	{MBC_GET_ID_LIST,		"GIDList"},
+	{MBC_GET_LINK_PRIV_STATS,	"Stats"},
+};
+
+static const char *mb_to_str(uint16_t cmd)
+{
+	int i;
+	struct mb_cmd_name *e;
+
+	for (i = 0; i < ARRAY_SIZE(mb_str); i++) {
+		e = mb_str + i;
+		if (cmd == e->cmd)
+			return e->str;
+	}
+	return "unknown";
+}
+
 static struct rom_cmd {
 	uint16_t cmd;
 } rom_cmds[] = {
@@ -2818,7 +2840,7 @@ qla2x00_get_link_status(scsi_qla_host_t *vha, uint16_t loop_id,
 
 int
 qla24xx_get_isp_stats(scsi_qla_host_t *vha, struct link_statistics *stats,
-    dma_addr_t stats_dma, uint options)
+    dma_addr_t stats_dma, uint16_t options)
 {
 	int rval;
 	mbx_cmd_t mc;
@@ -2828,19 +2850,17 @@ qla24xx_get_isp_stats(scsi_qla_host_t *vha, struct link_statistics *stats,
 	ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x1088,
 	    "Entered %s.\n", __func__);
 
-	mcp->mb[0] = MBC_GET_LINK_PRIV_STATS;
-	mcp->mb[2] = MSW(stats_dma);
-	mcp->mb[3] = LSW(stats_dma);
-	mcp->mb[6] = MSW(MSD(stats_dma));
-	mcp->mb[7] = LSW(MSD(stats_dma));
-	mcp->mb[8] = sizeof(struct link_statistics) / 4;
-	mcp->mb[9] = vha->vp_idx;
-	mcp->mb[10] = options;
-	mcp->out_mb = MBX_10|MBX_9|MBX_8|MBX_7|MBX_6|MBX_3|MBX_2|MBX_0;
-	mcp->in_mb = MBX_2|MBX_1|MBX_0;
-	mcp->tov = MBX_TOV_SECONDS;
-	mcp->flags = IOCTL_CMD;
-	rval = qla2x00_mailbox_command(vha, mcp);
+	memset(&mc, 0, sizeof(mc));
+	mc.mb[0] = MBC_GET_LINK_PRIV_STATS;
+	mc.mb[2] = MSW(stats_dma);
+	mc.mb[3] = LSW(stats_dma);
+	mc.mb[6] = MSW(MSD(stats_dma));
+	mc.mb[7] = LSW(MSD(stats_dma));
+	mc.mb[8] = sizeof(struct link_statistics) / 4;
+	mc.mb[9] = cpu_to_le16(vha->vp_idx);
+	mc.mb[10] = cpu_to_le16(options);
+
+	rval = qla24xx_send_mb_cmd(vha, &mc);
 
 	if (rval == QLA_SUCCESS) {
 		if (mcp->mb[0] != MBS_COMMAND_COMPLETE) {
@@ -5827,3 +5847,225 @@ qla26xx_dport_diagnostics(scsi_qla_host_t *vha,
 
 	return rval;
 }
+
+static void qla2x00_async_mb_sp_done(void *s, int res)
+{
+	struct srb *sp = s;
+
+	sp->u.iocb_cmd.u.mbx.rc = res;
+
+	complete(&sp->u.iocb_cmd.u.mbx.comp);
+	/* don't free sp here. Let the caller do the free */
+}
+
+/*
+ * This mailbox uses the iocb interface to send MB command.
+ * This allows non-critial (non chip setup) command to go
+ * out in parrallel.
+ */
+int qla24xx_send_mb_cmd(struct scsi_qla_host *vha, mbx_cmd_t *mcp)
+{
+	int rval = QLA_FUNCTION_FAILED;
+	srb_t *sp;
+	struct srb_iocb *c;
+
+	if (!vha->hw->flags.fw_started)
+		goto done;
+
+	sp = qla2x00_get_sp(vha, NULL, GFP_KERNEL);
+	if (!sp)
+		goto done;
+
+	sp->type = SRB_MB_IOCB;
+	sp->name = mb_to_str(mcp->mb[0]);
+
+	qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2);
+
+	memcpy(sp->u.iocb_cmd.u.mbx.out_mb, mcp->mb, SIZEOF_IOCB_MB_REG);
+
+	c = &sp->u.iocb_cmd;
+	c->timeout = qla2x00_async_iocb_timeout;
+	init_completion(&c->u.mbx.comp);
+
+	sp->done = qla2x00_async_mb_sp_done;
+
+	rval = qla2x00_start_sp(sp);
+	if (rval != QLA_SUCCESS) {
+		ql_dbg(ql_dbg_mbx, vha, 0xffff,
+		    "%s: %s Failed submission. %x.\n",
+		    __func__, sp->name, rval);
+		goto done_free_sp;
+	}
+
+	ql_dbg(ql_dbg_mbx, vha, 0xffff, "MB:%s hndl %x submitted\n",
+	    sp->name, sp->handle);
+
+	wait_for_completion(&c->u.mbx.comp);
+	memcpy(mcp->mb, sp->u.iocb_cmd.u.mbx.in_mb, SIZEOF_IOCB_MB_REG);
+
+	rval = c->u.mbx.rc;
+	switch (rval) {
+	case QLA_FUNCTION_TIMEOUT:
+		ql_dbg(ql_dbg_mbx, vha, 0xffff, "%s: %s Timeout. %x.\n",
+		    __func__, sp->name, rval);
+		break;
+	case  QLA_SUCCESS:
+		ql_dbg(ql_dbg_mbx, vha, 0xffff, "%s: %s done.\n",
+		    __func__, sp->name);
+		sp->free(sp);
+		break;
+	default:
+		ql_dbg(ql_dbg_mbx, vha, 0xffff, "%s: %s Failed. %x.\n",
+		    __func__, sp->name, rval);
+		sp->free(sp);
+		break;
+	}
+
+	return rval;
+
+done_free_sp:
+	sp->free(sp);
+done:
+	return rval;
+}
+
+/*
+ * qla24xx_gpdb_wait
+ * NOTE: Do not call this routine from DPC thread
+ */
+int qla24xx_gpdb_wait(struct scsi_qla_host *vha, fc_port_t *fcport, u8 opt)
+{
+	int rval = QLA_FUNCTION_FAILED;
+	dma_addr_t pd_dma;
+	struct port_database_24xx *pd;
+	struct qla_hw_data *ha = vha->hw;
+	mbx_cmd_t mc;
+
+	if (!vha->hw->flags.fw_started)
+		goto done;
+
+	pd = dma_pool_alloc(ha->s_dma_pool, GFP_KERNEL, &pd_dma);
+	if (pd  == NULL) {
+		ql_log(ql_log_warn, vha, 0xffff,
+			"Failed to allocate port database structure.\n");
+		goto done_free_sp;
+	}
+	memset(pd, 0, max(PORT_DATABASE_SIZE, PORT_DATABASE_24XX_SIZE));
+
+	memset(&mc, 0, sizeof(mc));
+	mc.mb[0] = MBC_GET_PORT_DATABASE;
+	mc.mb[1] = cpu_to_le16(fcport->loop_id);
+	mc.mb[2] = MSW(pd_dma);
+	mc.mb[3] = LSW(pd_dma);
+	mc.mb[6] = MSW(MSD(pd_dma));
+	mc.mb[7] = LSW(MSD(pd_dma));
+	mc.mb[9] = cpu_to_le16(vha->vp_idx);
+	mc.mb[10] = cpu_to_le16((uint16_t)opt);
+
+	rval = qla24xx_send_mb_cmd(vha, &mc);
+	if (rval != QLA_SUCCESS) {
+		ql_dbg(ql_dbg_mbx, vha, 0xffff,
+		    "%s: %8phC fail\n", __func__, fcport->port_name);
+		goto done_free_sp;
+	}
+
+	rval = __qla24xx_parse_gpdb(vha, fcport, pd);
+
+	ql_dbg(ql_dbg_mbx, vha, 0xffff, "%s: %8phC done\n",
+	    __func__, fcport->port_name);
+
+done_free_sp:
+	if (pd)
+		dma_pool_free(ha->s_dma_pool, pd, pd_dma);
+done:
+	return rval;
+}
+
+int __qla24xx_parse_gpdb(struct scsi_qla_host *vha, fc_port_t *fcport,
+    struct port_database_24xx *pd)
+{
+	int rval = QLA_SUCCESS;
+	uint64_t zero = 0;
+
+	/* Check for logged in state. */
+	if (pd->current_login_state != PDS_PRLI_COMPLETE &&
+		pd->last_login_state != PDS_PRLI_COMPLETE) {
+		ql_dbg(ql_dbg_mbx, vha, 0xffff,
+			   "Unable to verify login-state (%x/%x) for "
+			   "loop_id %x.\n", pd->current_login_state,
+			   pd->last_login_state, fcport->loop_id);
+		rval = QLA_FUNCTION_FAILED;
+		goto gpd_error_out;
+	}
+
+	if (fcport->loop_id == FC_NO_LOOP_ID ||
+	    (memcmp(fcport->port_name, (uint8_t *)&zero, 8) &&
+	     memcmp(fcport->port_name, pd->port_name, 8))) {
+		/* We lost the device mid way. */
+		rval = QLA_NOT_LOGGED_IN;
+		goto gpd_error_out;
+	}
+
+	/* Names are little-endian. */
+	memcpy(fcport->node_name, pd->node_name, WWN_SIZE);
+	memcpy(fcport->port_name, pd->port_name, WWN_SIZE);
+
+	/* Get port_id of device. */
+	fcport->d_id.b.domain = pd->port_id[0];
+	fcport->d_id.b.area = pd->port_id[1];
+	fcport->d_id.b.al_pa = pd->port_id[2];
+	fcport->d_id.b.rsvd_1 = 0;
+
+	/* If not target must be initiator or unknown type. */
+	if ((pd->prli_svc_param_word_3[0] & BIT_4) == 0)
+		fcport->port_type = FCT_INITIATOR;
+	else
+		fcport->port_type = FCT_TARGET;
+
+	/* Passback COS information. */
+	fcport->supported_classes = (pd->flags & PDF_CLASS_2) ?
+		FC_COS_CLASS2 : FC_COS_CLASS3;
+
+	if (pd->prli_svc_param_word_3[0] & BIT_7) {
+		fcport->flags |= FCF_CONF_COMP_SUPPORTED;
+		fcport->conf_compl_supported = 1;
+	}
+
+gpd_error_out:
+	return rval;
+}
+
+/*
+ * qla24xx_gidlist__wait
+ * NOTE: don't call this routine from DPC thread.
+ */
+int qla24xx_gidlist_wait(struct scsi_qla_host *vha,
+	void *id_list, dma_addr_t id_list_dma, uint16_t *entries)
+{
+	int rval = QLA_FUNCTION_FAILED;
+	mbx_cmd_t mc;
+
+	if (!vha->hw->flags.fw_started)
+		goto done;
+
+	memset(&mc, 0, sizeof(mc));
+	mc.mb[0] = MBC_GET_ID_LIST;
+	mc.mb[2] = MSW(id_list_dma);
+	mc.mb[3] = LSW(id_list_dma);
+	mc.mb[6] = MSW(MSD(id_list_dma));
+	mc.mb[7] = LSW(MSD(id_list_dma));
+	mc.mb[8] = 0;
+	mc.mb[9] = cpu_to_le16(vha->vp_idx);
+
+	rval = qla24xx_send_mb_cmd(vha, &mc);
+	if (rval != QLA_SUCCESS) {
+		ql_dbg(ql_dbg_mbx, vha, 0xffff,
+			"%s:  fail\n", __func__);
+	} else {
+		*entries = mc.mb[1];
+		ql_dbg(ql_dbg_mbx, vha, 0xffff,
+			"%s:  done\n", __func__);
+	}
+done:
+	return rval;
+}
diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index 563116188c439b..38bdcd325fa438 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -1238,7 +1238,7 @@ static int qla24xx_get_loop_id(struct scsi_qla_host *vha, const uint8_t *s_id,
 	}
 
 	/* Get list of logged in devices */
-	rc = qla2x00_get_id_list(vha, gid_list, gid_list_dma, &entries);
+	rc = qla24xx_gidlist_wait(vha, gid_list, gid_list_dma, &entries);
 	if (rc != QLA_SUCCESS) {
 		ql_dbg(ql_dbg_tgt_mgt, vha, 0xf045,
 		    "qla_target(%d): get_id_list() failed: %x\n",
@@ -5648,7 +5648,7 @@ static fc_port_t *qlt_get_port_database(struct scsi_qla_host *vha,
 
 	fcport->loop_id = loop_id;
 
-	rc = qla2x00_get_port_database(vha, fcport, 0);
+	rc = qla24xx_gpdb_wait(vha, fcport, 0);
 	if (rc != QLA_SUCCESS) {
 		ql_dbg(ql_dbg_tgt_mgt, vha, 0xf070,
 		    "qla_target(%d): Failed to retrieve fcport "

From c423437e3ff41b8ca551ab6621baf11538dbfe9d Mon Sep 17 00:00:00 2001
From: Himanshu Madhani <himanshu.madhani@cavium.com>
Date: Wed, 15 Mar 2017 09:48:53 -0700
Subject: [PATCH 1485/1911] qla2xxx: Add DebugFS node to display Port Database

Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Giridhar Malavali <giridhar.malavali@cavium.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/qla2xxx/qla_def.h |  2 +
 drivers/scsi/qla2xxx/qla_dfs.c | 92 ++++++++++++++++++++++++++++++++--
 2 files changed, 90 insertions(+), 4 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index ae38b7a789b113..089480280558b7 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -3300,6 +3300,8 @@ struct qlt_hw_data {
 	uint8_t tgt_node_name[WWN_SIZE];
 
 	struct dentry *dfs_tgt_sess;
+	struct dentry *dfs_tgt_port_database;
+
 	struct list_head q_full_list;
 	uint32_t num_pend_cmds;
 	uint32_t num_qfull_cmds_alloc;
diff --git a/drivers/scsi/qla2xxx/qla_dfs.c b/drivers/scsi/qla2xxx/qla_dfs.c
index 3b35905619b046..989e17b0758cd5 100644
--- a/drivers/scsi/qla2xxx/qla_dfs.c
+++ b/drivers/scsi/qla2xxx/qla_dfs.c
@@ -19,11 +19,11 @@ qla2x00_dfs_tgt_sess_show(struct seq_file *s, void *unused)
 	struct qla_hw_data *ha = vha->hw;
 	unsigned long flags;
 	struct fc_port *sess = NULL;
-	struct qla_tgt *tgt= vha->vha_tgt.qla_tgt;
+	struct qla_tgt *tgt = vha->vha_tgt.qla_tgt;
 
-	seq_printf(s, "%s\n",vha->host_str);
+	seq_printf(s, "%s\n", vha->host_str);
 	if (tgt) {
-		seq_printf(s, "Port ID   Port Name                Handle\n");
+		seq_puts(s, "Port ID   Port Name                Handle\n");
 
 		spin_lock_irqsave(&ha->tgt.sess_lock, flags);
 		list_for_each_entry(sess, &vha->vp_fcports, list)
@@ -44,7 +44,6 @@ qla2x00_dfs_tgt_sess_open(struct inode *inode, struct file *file)
 	return single_open(file, qla2x00_dfs_tgt_sess_show, vha);
 }
 
-
 static const struct file_operations dfs_tgt_sess_ops = {
 	.open		= qla2x00_dfs_tgt_sess_open,
 	.read		= seq_read,
@@ -52,6 +51,78 @@ static const struct file_operations dfs_tgt_sess_ops = {
 	.release	= single_release,
 };
 
+static int
+qla2x00_dfs_tgt_port_database_show(struct seq_file *s, void *unused)
+{
+	scsi_qla_host_t *vha = s->private;
+	struct qla_hw_data *ha = vha->hw;
+	struct gid_list_info *gid_list;
+	dma_addr_t gid_list_dma;
+	fc_port_t fc_port;
+	char *id_iter;
+	int rc, i;
+	uint16_t entries, loop_id;
+	struct qla_tgt *tgt = vha->vha_tgt.qla_tgt;
+
+	seq_printf(s, "%s\n", vha->host_str);
+	if (tgt) {
+		gid_list = dma_alloc_coherent(&ha->pdev->dev,
+		    qla2x00_gid_list_size(ha),
+		    &gid_list_dma, GFP_KERNEL);
+		if (!gid_list) {
+			ql_dbg(ql_dbg_user, vha, 0x705c,
+			    "DMA allocation failed for %u\n",
+			     qla2x00_gid_list_size(ha));
+			return 0;
+		}
+
+		rc = qla24xx_gidlist_wait(vha, gid_list, gid_list_dma,
+		    &entries);
+		if (rc != QLA_SUCCESS)
+			goto out_free_id_list;
+
+		id_iter = (char *)gid_list;
+
+		seq_puts(s, "Port Name	Port ID 	Loop ID\n");
+
+		for (i = 0; i < entries; i++) {
+			struct gid_list_info *gid =
+			    (struct gid_list_info *)id_iter;
+			loop_id = le16_to_cpu(gid->loop_id);
+			memset(&fc_port, 0, sizeof(fc_port_t));
+
+			fc_port.loop_id = loop_id;
+
+			rc = qla24xx_gpdb_wait(vha, &fc_port, 0);
+			seq_printf(s, "%8phC  %02x%02x%02x  %d\n",
+				fc_port.port_name, fc_port.d_id.b.domain,
+				fc_port.d_id.b.area, fc_port.d_id.b.al_pa,
+				fc_port.loop_id);
+			id_iter += ha->gid_list_info_size;
+		}
+out_free_id_list:
+		dma_free_coherent(&ha->pdev->dev, qla2x00_gid_list_size(ha),
+		    gid_list, gid_list_dma);
+	}
+
+	return 0;
+}
+
+static int
+qla2x00_dfs_tgt_port_database_open(struct inode *inode, struct file *file)
+{
+	scsi_qla_host_t *vha = inode->i_private;
+
+	return single_open(file, qla2x00_dfs_tgt_port_database_show, vha);
+}
+
+static const struct file_operations dfs_tgt_port_database_ops = {
+	.open		= qla2x00_dfs_tgt_port_database_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
 static int
 qla_dfs_fw_resource_cnt_show(struct seq_file *s, void *unused)
 {
@@ -296,6 +367,14 @@ qla2x00_dfs_setup(scsi_qla_host_t *vha)
 		goto out;
 	}
 
+	ha->tgt.dfs_tgt_port_database = debugfs_create_file("tgt_port_database",
+	    S_IRUSR,  ha->dfs_dir, vha, &dfs_tgt_port_database_ops);
+	if (!ha->tgt.dfs_tgt_port_database) {
+		ql_log(ql_log_warn, vha, 0xffff,
+		    "Unable to create debugFS tgt_port_database node.\n");
+		goto out;
+	}
+
 	ha->dfs_fce = debugfs_create_file("fce", S_IRUSR, ha->dfs_dir, vha,
 	    &dfs_fce_ops);
 	if (!ha->dfs_fce) {
@@ -326,6 +405,11 @@ qla2x00_dfs_remove(scsi_qla_host_t *vha)
 		ha->tgt.dfs_tgt_sess = NULL;
 	}
 
+	if (ha->tgt.dfs_tgt_port_database) {
+		debugfs_remove(ha->tgt.dfs_tgt_port_database);
+		ha->tgt.dfs_tgt_port_database = NULL;
+	}
+
 	if (ha->dfs_fw_resource_cnt) {
 		debugfs_remove(ha->dfs_fw_resource_cnt);
 		ha->dfs_fw_resource_cnt = NULL;

From 482c9dc79204bb83c3433a59680c787a0b98c000 Mon Sep 17 00:00:00 2001
From: Quinn Tran <quinn.tran@cavium.com>
Date: Wed, 15 Mar 2017 09:48:54 -0700
Subject: [PATCH 1486/1911] qla2xxx: Change scsi host lookup method.

For target mode, when new scsi command arrive, driver first performs
a look up of the SCSI Host. The current look up method is based on
the ALPA portion of the NPort ID. For Cisco switch, the ALPA can
not be used as the index. Instead, the new search method is based
on the full value of the Nport_ID via btree lib.

Signed-off-by: Quinn Tran <quinn.tran@cavium.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/qla2xxx/Kconfig      |  1 +
 drivers/scsi/qla2xxx/qla_def.h    |  2 +
 drivers/scsi/qla2xxx/qla_gbl.h    |  2 +
 drivers/scsi/qla2xxx/qla_init.c   | 14 ++---
 drivers/scsi/qla2xxx/qla_mbx.c    | 28 +++-------
 drivers/scsi/qla2xxx/qla_os.c     |  1 +
 drivers/scsi/qla2xxx/qla_target.c | 92 ++++++++++++++++++++++++++-----
 7 files changed, 100 insertions(+), 40 deletions(-)

diff --git a/drivers/scsi/qla2xxx/Kconfig b/drivers/scsi/qla2xxx/Kconfig
index 67c0d5aa32125c..de952935b5d2ca 100644
--- a/drivers/scsi/qla2xxx/Kconfig
+++ b/drivers/scsi/qla2xxx/Kconfig
@@ -3,6 +3,7 @@ config SCSI_QLA_FC
 	depends on PCI && SCSI
 	depends on SCSI_FC_ATTRS
 	select FW_LOADER
+	select BTREE
 	---help---
 	This qla2xxx driver supports all QLogic Fibre Channel
 	PCI and PCIe host adapters.
diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index 089480280558b7..9251918773b157 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -25,6 +25,7 @@
 #include <linux/firmware.h>
 #include <linux/aer.h>
 #include <linux/mutex.h>
+#include <linux/btree.h>
 
 #include <scsi/scsi.h>
 #include <scsi/scsi_host.h>
@@ -3311,6 +3312,7 @@ struct qlt_hw_data {
 	spinlock_t sess_lock;
 	int rspq_vector_cpuid;
 	spinlock_t atio_lock ____cacheline_aligned;
+	struct btree_head32 host_map;
 };
 
 #define MAX_QFULL_CMDS_ALLOC	8192
diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h
index 323b912b47f741..5b2451745e9f47 100644
--- a/drivers/scsi/qla2xxx/qla_gbl.h
+++ b/drivers/scsi/qla2xxx/qla_gbl.h
@@ -854,5 +854,7 @@ extern struct fc_port *qlt_find_sess_invalidate_other(scsi_qla_host_t *,
 	uint64_t wwn, port_id_t port_id, uint16_t loop_id, struct fc_port **);
 void qla24xx_delete_sess_fn(struct work_struct *);
 void qlt_unknown_atio_work_fn(struct work_struct *);
+void qlt_update_host_map(struct scsi_qla_host *, port_id_t);
+void qlt_remove_target_resources(struct qla_hw_data *);
 
 #endif /* _QLA_GBL_H */
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index b1bfa63f7d4ead..b0f6ad3020d3a0 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -3340,8 +3340,8 @@ qla2x00_configure_hba(scsi_qla_host_t *vha)
 	uint8_t       domain;
 	char		connect_type[22];
 	struct qla_hw_data *ha = vha->hw;
-	unsigned long flags;
 	scsi_qla_host_t *base_vha = pci_get_drvdata(ha->pdev);
+	port_id_t id;
 
 	/* Get host addresses. */
 	rval = qla2x00_get_adapter_id(vha,
@@ -3419,13 +3419,11 @@ qla2x00_configure_hba(scsi_qla_host_t *vha)
 
 	/* Save Host port and loop ID. */
 	/* byte order - Big Endian */
-	vha->d_id.b.domain = domain;
-	vha->d_id.b.area = area;
-	vha->d_id.b.al_pa = al_pa;
-
-	spin_lock_irqsave(&ha->vport_slock, flags);
-	qlt_update_vp_map(vha, SET_AL_PA);
-	spin_unlock_irqrestore(&ha->vport_slock, flags);
+	id.b.domain = domain;
+	id.b.area = area;
+	id.b.al_pa = al_pa;
+	id.b.rsvd_1 = 0;
+	qlt_update_host_map(vha, id);
 
 	if (!vha->flags.init_done)
 		ql_log(ql_log_info, vha, 0x2010,
diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c
index e40ed570d3c120..53d9579acc746c 100644
--- a/drivers/scsi/qla2xxx/qla_mbx.c
+++ b/drivers/scsi/qla2xxx/qla_mbx.c
@@ -3623,6 +3623,7 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha,
 	scsi_qla_host_t *vp = NULL;
 	unsigned long   flags;
 	int found;
+	port_id_t id;
 
 	ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x10b6,
 	    "Entered %s.\n", __func__);
@@ -3630,6 +3631,11 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha,
 	if (rptid_entry->entry_status != 0)
 		return;
 
+	id.b.domain = rptid_entry->port_id[2];
+	id.b.area   = rptid_entry->port_id[1];
+	id.b.al_pa  = rptid_entry->port_id[0];
+	id.b.rsvd_1 = 0;
+
 	if (rptid_entry->format == 0) {
 		/* loop */
 		ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x10b7,
@@ -3641,13 +3647,7 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha,
 		    rptid_entry->port_id[2], rptid_entry->port_id[1],
 		    rptid_entry->port_id[0]);
 
-		vha->d_id.b.domain = rptid_entry->port_id[2];
-		vha->d_id.b.area = rptid_entry->port_id[1];
-		vha->d_id.b.al_pa = rptid_entry->port_id[0];
-
-		spin_lock_irqsave(&ha->vport_slock, flags);
-		qlt_update_vp_map(vha, SET_AL_PA);
-		spin_unlock_irqrestore(&ha->vport_slock, flags);
+		qlt_update_host_map(vha, id);
 
 	} else if (rptid_entry->format == 1) {
 		/* fabric */
@@ -3673,12 +3673,7 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha,
 					    WWN_SIZE);
 				}
 
-				vha->d_id.b.domain = rptid_entry->port_id[2];
-				vha->d_id.b.area = rptid_entry->port_id[1];
-				vha->d_id.b.al_pa = rptid_entry->port_id[0];
-				spin_lock_irqsave(&ha->vport_slock, flags);
-				qlt_update_vp_map(vha, SET_AL_PA);
-				spin_unlock_irqrestore(&ha->vport_slock, flags);
+				qlt_update_host_map(vha, id);
 			}
 
 			fc_host_port_name(vha->host) =
@@ -3714,12 +3709,7 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha,
 			if (!found)
 				return;
 
-			vp->d_id.b.domain = rptid_entry->port_id[2];
-			vp->d_id.b.area =  rptid_entry->port_id[1];
-			vp->d_id.b.al_pa = rptid_entry->port_id[0];
-			spin_lock_irqsave(&ha->vport_slock, flags);
-			qlt_update_vp_map(vp, SET_AL_PA);
-			spin_unlock_irqrestore(&ha->vport_slock, flags);
+			qlt_update_host_map(vp, id);
 
 			/*
 			 * Cannot configure here as we are still sitting on the
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 54d4e802bde072..344faf59783d7f 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -3469,6 +3469,7 @@ qla2x00_remove_one(struct pci_dev *pdev)
 	qla2x00_free_sysfs_attr(base_vha, true);
 
 	fc_remove_host(base_vha->host);
+	qlt_remove_target_resources(ha);
 
 	scsi_remove_host(base_vha->host);
 
diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index 38bdcd325fa438..7278e046bf87f4 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -187,21 +187,23 @@ static inline
 struct scsi_qla_host *qlt_find_host_by_d_id(struct scsi_qla_host *vha,
 	uint8_t *d_id)
 {
-	struct qla_hw_data *ha = vha->hw;
-	uint8_t vp_idx;
-
-	if ((vha->d_id.b.area != d_id[1]) || (vha->d_id.b.domain != d_id[0]))
-		return NULL;
+	struct scsi_qla_host *host;
+	uint32_t key = 0;
 
-	if (vha->d_id.b.al_pa == d_id[2])
+	if ((vha->d_id.b.area == d_id[1]) && (vha->d_id.b.domain == d_id[0]) &&
+	    (vha->d_id.b.al_pa == d_id[2]))
 		return vha;
 
-	BUG_ON(ha->tgt.tgt_vp_map == NULL);
-	vp_idx = ha->tgt.tgt_vp_map[d_id[2]].idx;
-	if (likely(test_bit(vp_idx, ha->vp_idx_map)))
-		return ha->tgt.tgt_vp_map[vp_idx].vha;
+	key  = (uint32_t)d_id[0] << 16;
+	key |= (uint32_t)d_id[1] <<  8;
+	key |= (uint32_t)d_id[2];
 
-	return NULL;
+	host = btree_lookup32(&vha->hw->tgt.host_map, key);
+	if (!host)
+		ql_dbg(ql_dbg_tgt_mgt, vha, 0xffff,
+			   "Unable to find host %06x\n", key);
+
+	return host;
 }
 
 static inline
@@ -6040,6 +6042,17 @@ int qlt_remove_target(struct qla_hw_data *ha, struct scsi_qla_host *vha)
 	return 0;
 }
 
+void qlt_remove_target_resources(struct qla_hw_data *ha)
+{
+	struct scsi_qla_host *node;
+	u32 key = 0;
+
+	btree_for_each_safe32(&ha->tgt.host_map, key, node)
+		btree_remove32(&ha->tgt.host_map, key);
+
+	btree_destroy32(&ha->tgt.host_map);
+}
+
 static void qlt_lport_dump(struct scsi_qla_host *vha, u64 wwpn,
 	unsigned char *b)
 {
@@ -6693,6 +6706,8 @@ qlt_modify_vp_config(struct scsi_qla_host *vha,
 void
 qlt_probe_one_stage1(struct scsi_qla_host *base_vha, struct qla_hw_data *ha)
 {
+	int rc;
+
 	if (!QLA_TGT_MODE_ENABLED())
 		return;
 
@@ -6712,6 +6727,13 @@ qlt_probe_one_stage1(struct scsi_qla_host *base_vha, struct qla_hw_data *ha)
 	    qlt_unknown_atio_work_fn);
 
 	qlt_clear_mode(base_vha);
+
+	rc = btree_init32(&ha->tgt.host_map);
+	if (rc)
+		ql_log(ql_log_info, base_vha, 0xffff,
+		    "Unable to initialize ha->host_map btree\n");
+
+	qlt_update_vp_map(base_vha, SET_VP_IDX);
 }
 
 irqreturn_t
@@ -6820,25 +6842,69 @@ qlt_mem_free(struct qla_hw_data *ha)
 void
 qlt_update_vp_map(struct scsi_qla_host *vha, int cmd)
 {
+	void *slot;
+	u32 key;
+	int rc;
+
 	if (!QLA_TGT_MODE_ENABLED())
 		return;
 
+	key = vha->d_id.b24;
+
 	switch (cmd) {
 	case SET_VP_IDX:
 		vha->hw->tgt.tgt_vp_map[vha->vp_idx].vha = vha;
 		break;
 	case SET_AL_PA:
-		vha->hw->tgt.tgt_vp_map[vha->d_id.b.al_pa].idx = vha->vp_idx;
+		slot = btree_lookup32(&vha->hw->tgt.host_map, key);
+		if (!slot) {
+			ql_dbg(ql_dbg_tgt_mgt, vha, 0xffff,
+			    "Save vha in host_map %p %06x\n", vha, key);
+			rc = btree_insert32(&vha->hw->tgt.host_map,
+				key, vha, GFP_ATOMIC);
+			if (rc)
+				ql_log(ql_log_info, vha, 0xffff,
+				    "Unable to insert s_id into host_map: %06x\n",
+				    key);
+			return;
+		}
+		ql_dbg(ql_dbg_tgt_mgt, vha, 0xffff,
+			"replace existing vha in host_map %p %06x\n", vha, key);
+		btree_update32(&vha->hw->tgt.host_map, key, vha);
 		break;
 	case RESET_VP_IDX:
 		vha->hw->tgt.tgt_vp_map[vha->vp_idx].vha = NULL;
 		break;
 	case RESET_AL_PA:
-		vha->hw->tgt.tgt_vp_map[vha->d_id.b.al_pa].idx = 0;
+		ql_dbg(ql_dbg_tgt_mgt, vha, 0xffff,
+		   "clear vha in host_map %p %06x\n", vha, key);
+		slot = btree_lookup32(&vha->hw->tgt.host_map, key);
+		if (slot)
+			btree_remove32(&vha->hw->tgt.host_map, key);
+		vha->d_id.b24 = 0;
 		break;
 	}
 }
 
+void qlt_update_host_map(struct scsi_qla_host *vha, port_id_t id)
+{
+	unsigned long flags;
+	struct qla_hw_data *ha = vha->hw;
+
+	if (!vha->d_id.b24) {
+		spin_lock_irqsave(&ha->vport_slock, flags);
+		vha->d_id = id;
+		qlt_update_vp_map(vha, SET_AL_PA);
+		spin_unlock_irqrestore(&ha->vport_slock, flags);
+	} else if (vha->d_id.b24 != id.b24) {
+		spin_lock_irqsave(&ha->vport_slock, flags);
+		qlt_update_vp_map(vha, RESET_AL_PA);
+		vha->d_id = id;
+		qlt_update_vp_map(vha, SET_AL_PA);
+		spin_unlock_irqrestore(&ha->vport_slock, flags);
+	}
+}
+
 static int __init qlt_parse_ini_mode(void)
 {
 	if (strcasecmp(qlini_mode, QLA2XXX_INI_MODE_STR_EXCLUSIVE) == 0)

From ec7193e26055112bc824929fd943035f9a30b06f Mon Sep 17 00:00:00 2001
From: Quinn Tran <quinn.tran@cavium.com>
Date: Wed, 15 Mar 2017 09:48:55 -0700
Subject: [PATCH 1487/1911] qla2xxx: Fix delayed response to command for loop
 mode/direct connect.

Current driver wait for FW to be in the ready state before
processing in-coming commands. For Arbitrated Loop or
Point-to- Point (not switch), FW Ready state can take a while.
FW will transition to ready state after all Nports have been
logged in. In the mean time, certain initiators have completed
the login and starts IO. Driver needs to start processing all
queues if FW is already started.

Signed-off-by: Quinn Tran <quinn.tran@cavium.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/qla2xxx/qla_def.h    | 10 ++++++--
 drivers/scsi/qla2xxx/qla_init.c   | 12 ++++++++++
 drivers/scsi/qla2xxx/qla_isr.c    | 14 +++++++++++-
 drivers/scsi/qla2xxx/qla_mbx.c    |  6 ++---
 drivers/scsi/qla2xxx/qla_os.c     | 21 ++++++++++++++++-
 drivers/scsi/qla2xxx/qla_target.c | 38 ++++++++++++++++++++-----------
 6 files changed, 81 insertions(+), 20 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index 9251918773b157..ae119018dfaae9 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -3322,6 +3322,10 @@ struct qlt_hw_data {
 
 #define LEAK_EXCHG_THRESH_HOLD_PERCENT 75	/* 75 percent */
 
+#define QLA_EARLY_LINKUP(_ha) \
+	((_ha->flags.n2n_ae || _ha->flags.lip_ae) && \
+	 _ha->flags.fw_started && !_ha->flags.fw_init_done)
+
 /*
  * Qlogic host adapter specific data structure.
 */
@@ -3371,9 +3375,11 @@ struct qla_hw_data {
 		uint32_t	fawwpn_enabled:1;
 		uint32_t	exlogins_enabled:1;
 		uint32_t	exchoffld_enabled:1;
-		/* 35 bits */
 
+		uint32_t	lip_ae:1;
+		uint32_t	n2n_ae:1;
 		uint32_t	fw_started:1;
+		uint32_t	fw_init_done:1;
 	} flags;
 
 	/* This spinlock is used to protect "io transactions", you must
@@ -3466,7 +3472,6 @@ struct qla_hw_data {
 #define P2P_LOOP  3
 	uint8_t		interrupts_on;
 	uint32_t	isp_abort_cnt;
-
 #define PCI_DEVICE_ID_QLOGIC_ISP2532    0x2532
 #define PCI_DEVICE_ID_QLOGIC_ISP8432    0x8432
 #define PCI_DEVICE_ID_QLOGIC_ISP8001	0x8001
@@ -3947,6 +3952,7 @@ typedef struct scsi_qla_host {
 	struct list_head vp_fcports;	/* list of fcports */
 	struct list_head work_list;
 	spinlock_t work_lock;
+	struct work_struct iocb_work;
 
 	/* Commonly used flags and state information. */
 	struct Scsi_Host *host;
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index b0f6ad3020d3a0..f9d2fe7b1adedf 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -3178,6 +3178,7 @@ qla2x00_init_rings(scsi_qla_host_t *vha)
 	} else {
 		ql_dbg(ql_dbg_init, vha, 0x00d3,
 		    "Init Firmware -- success.\n");
+		ha->flags.fw_started = 1;
 	}
 
 	return (rval);
@@ -4000,6 +4001,7 @@ qla2x00_configure_loop(scsi_qla_host_t *vha)
 			atomic_set(&vha->loop_state, LOOP_READY);
 			ql_dbg(ql_dbg_disc, vha, 0x2069,
 			    "LOOP READY.\n");
+			ha->flags.fw_init_done = 1;
 
 			/*
 			 * Process any ATIO queue entries that came in
@@ -5491,6 +5493,11 @@ qla2x00_abort_isp_cleanup(scsi_qla_host_t *vha)
 	if (!(IS_P3P_TYPE(ha)))
 		ha->isp_ops->reset_chip(vha);
 
+	ha->flags.n2n_ae = 0;
+	ha->flags.lip_ae = 0;
+	ha->current_topology = 0;
+	ha->flags.fw_started = 0;
+	ha->flags.fw_init_done = 0;
 	ha->chip_reset++;
 
 	atomic_set(&vha->loop_down_timer, LOOP_DOWN_TIME);
@@ -6767,6 +6774,8 @@ qla2x00_try_to_stop_firmware(scsi_qla_host_t *vha)
 		return;
 	if (!ha->fw_major_version)
 		return;
+	if (!ha->flags.fw_started)
+		return;
 
 	ret = qla2x00_stop_firmware(vha);
 	for (retries = 5; ret != QLA_SUCCESS && ret != QLA_FUNCTION_TIMEOUT &&
@@ -6780,6 +6789,9 @@ qla2x00_try_to_stop_firmware(scsi_qla_host_t *vha)
 		    "Attempting retry of stop-firmware command.\n");
 		ret = qla2x00_stop_firmware(vha);
 	}
+
+	ha->flags.fw_started = 0;
+	ha->flags.fw_init_done = 0;
 }
 
 int
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index 3953c8d6af69b3..3203367a4f4236 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -708,6 +708,8 @@ qla2x00_async_event(scsi_qla_host_t *vha, struct rsp_que *rsp, uint16_t *mb)
 		    "mbx7=%xh.\n", mb[1], mb[2], mb[3], mbx);
 
 		ha->isp_ops->fw_dump(vha, 1);
+		ha->flags.fw_init_done = 0;
+		ha->flags.fw_started = 0;
 
 		if (IS_FWI2_CAPABLE(ha)) {
 			if (mb[1] == 0 && mb[2] == 0) {
@@ -761,6 +763,9 @@ qla2x00_async_event(scsi_qla_host_t *vha, struct rsp_que *rsp, uint16_t *mb)
 		break;
 
 	case MBA_LIP_OCCURRED:		/* Loop Initialization Procedure */
+		ha->flags.lip_ae = 1;
+		ha->flags.n2n_ae = 0;
+
 		ql_dbg(ql_dbg_async, vha, 0x5009,
 		    "LIP occurred (%x).\n", mb[1]);
 
@@ -797,6 +802,10 @@ qla2x00_async_event(scsi_qla_host_t *vha, struct rsp_que *rsp, uint16_t *mb)
 		break;
 
 	case MBA_LOOP_DOWN:		/* Loop Down Event */
+		ha->flags.n2n_ae = 0;
+		ha->flags.lip_ae = 0;
+		ha->current_topology = 0;
+
 		mbx = (IS_QLA81XX(ha) || IS_QLA8031(ha))
 			? RD_REG_WORD(&reg24->mailbox4) : 0;
 		mbx = (IS_P3P_TYPE(ha)) ? RD_REG_WORD(&reg82->mailbox_out[4])
@@ -866,6 +875,9 @@ qla2x00_async_event(scsi_qla_host_t *vha, struct rsp_que *rsp, uint16_t *mb)
 
 	/* case MBA_DCBX_COMPLETE: */
 	case MBA_POINT_TO_POINT:	/* Point-to-Point */
+		ha->flags.lip_ae = 0;
+		ha->flags.n2n_ae = 1;
+
 		if (IS_QLA2100(ha))
 			break;
 
@@ -2706,7 +2718,7 @@ void qla24xx_process_response_queue(struct scsi_qla_host *vha,
 	struct sts_entry_24xx *pkt;
 	struct qla_hw_data *ha = vha->hw;
 
-	if (!vha->flags.online)
+	if (!ha->flags.fw_started)
 		return;
 
 	while (rsp->ring_ptr->signature != RESPONSE_PROCESSED) {
diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c
index 53d9579acc746c..a113ab3592a7f8 100644
--- a/drivers/scsi/qla2xxx/qla_mbx.c
+++ b/drivers/scsi/qla2xxx/qla_mbx.c
@@ -3638,11 +3638,11 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha,
 
 	if (rptid_entry->format == 0) {
 		/* loop */
-		ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x10b7,
+		ql_dbg(ql_dbg_async, vha, 0x10b7,
 		    "Format 0 : Number of VPs setup %d, number of "
 		    "VPs acquired %d.\n", rptid_entry->vp_setup,
 		    rptid_entry->vp_acquired);
-		ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x10b8,
+		ql_dbg(ql_dbg_async, vha, 0x10b8,
 		    "Primary port id %02x%02x%02x.\n",
 		    rptid_entry->port_id[2], rptid_entry->port_id[1],
 		    rptid_entry->port_id[0]);
@@ -3651,7 +3651,7 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha,
 
 	} else if (rptid_entry->format == 1) {
 		/* fabric */
-		ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x10b9,
+		ql_dbg(ql_dbg_async, vha, 0x10b9,
 		    "Format 1: VP[%d] enabled - status %d - with "
 		    "port id %02x%02x%02x.\n", rptid_entry->vp_idx,
 			rptid_entry->vp_status,
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 344faf59783d7f..41d5b09f7326fb 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -2560,6 +2560,20 @@ qla2xxx_scan_finished(struct Scsi_Host *shost, unsigned long time)
 	return atomic_read(&vha->loop_state) == LOOP_READY;
 }
 
+static void qla2x00_iocb_work_fn(struct work_struct *work)
+{
+	struct scsi_qla_host *vha = container_of(work,
+		struct scsi_qla_host, iocb_work);
+	int cnt = 0;
+
+	while (!list_empty(&vha->work_list)) {
+		qla2x00_do_work(vha);
+		cnt++;
+		if (cnt > 10)
+			break;
+	}
+}
+
 /*
  * PCI driver interface
  */
@@ -3078,6 +3092,7 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	 */
 	qla2xxx_wake_dpc(base_vha);
 
+	INIT_WORK(&base_vha->iocb_work, qla2x00_iocb_work_fn);
 	INIT_WORK(&ha->board_disable, qla2x00_disable_board_on_pci_error);
 
 	if (IS_QLA8031(ha) || IS_MCTP_CAPABLE(ha)) {
@@ -4321,7 +4336,11 @@ qla2x00_post_work(struct scsi_qla_host *vha, struct qla_work_evt *e)
 	spin_lock_irqsave(&vha->work_lock, flags);
 	list_add_tail(&e->list, &vha->work_list);
 	spin_unlock_irqrestore(&vha->work_lock, flags);
-	qla2xxx_wake_dpc(vha);
+
+	if (QLA_EARLY_LINKUP(vha->hw))
+		schedule_work(&vha->iocb_work);
+	else
+		qla2xxx_wake_dpc(vha);
 
 	return QLA_SUCCESS;
 }
diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index 7278e046bf87f4..0e03ca2ab3e523 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -638,6 +638,7 @@ int qla24xx_async_notify_ack(scsi_qla_host_t *vha, fc_port_t *fcport,
 		break;
 	case SRB_NACK_PRLI:
 		fcport->fw_login_state = DSC_LS_PRLI_PEND;
+		fcport->deleted = 0;
 		c = "PRLI";
 		break;
 	case SRB_NACK_LOGO:
@@ -1576,6 +1577,9 @@ static void qlt_send_notify_ack(struct scsi_qla_host *vha,
 	request_t *pkt;
 	struct nack_to_isp *nack;
 
+	if (!ha->flags.fw_started)
+		return;
+
 	ql_dbg(ql_dbg_tgt, vha, 0xe004, "Sending NOTIFY_ACK (ha=%p)\n", ha);
 
 	/* Send marker if required */
@@ -3053,7 +3057,7 @@ int qlt_xmit_response(struct qla_tgt_cmd *cmd, int xmit_type,
 	else
 		vha->tgt_counters.core_qla_que_buf++;
 
-	if (!vha->flags.online || cmd->reset_count != ha->chip_reset) {
+	if (!ha->flags.fw_started || cmd->reset_count != ha->chip_reset) {
 		/*
 		 * Either the port is not online or this request was from
 		 * previous life, just abort the processing.
@@ -3194,7 +3198,7 @@ int qlt_rdy_to_xfer(struct qla_tgt_cmd *cmd)
 
 	spin_lock_irqsave(&ha->hardware_lock, flags);
 
-	if (!vha->flags.online || (cmd->reset_count != ha->chip_reset) ||
+	if (!ha->flags.fw_started || (cmd->reset_count != ha->chip_reset) ||
 	    (cmd->sess && cmd->sess->deleted)) {
 		/*
 		 * Either the port is not online or this request was from
@@ -3372,7 +3376,7 @@ static int __qlt_send_term_imm_notif(struct scsi_qla_host *vha,
 	ql_dbg(ql_dbg_tgt_tmr, vha, 0xe01c,
 	    "Sending TERM ELS CTIO (ha=%p)\n", ha);
 
-	pkt = (request_t *)qla2x00_alloc_iocbs_ready(vha, NULL);
+	pkt = (request_t *)qla2x00_alloc_iocbs(vha, NULL);
 	if (pkt == NULL) {
 		ql_dbg(ql_dbg_tgt, vha, 0xe080,
 		    "qla_target(%d): %s failed: unable to allocate "
@@ -4697,7 +4701,8 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha,
 		}
 
 		if (sess != NULL) {
-			if (sess->fw_login_state == DSC_LS_PLOGI_PEND) {
+			if (sess->fw_login_state != DSC_LS_PLOGI_PEND &&
+			    sess->fw_login_state != DSC_LS_PLOGI_COMP) {
 				/*
 				 * Impatient initiator sent PRLI before last
 				 * PLOGI could finish. Will force him to re-try,
@@ -4736,15 +4741,23 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha,
 
 		/* Make session global (not used in fabric mode) */
 		if (ha->current_topology != ISP_CFG_F) {
-			set_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags);
-			set_bit(LOCAL_LOOP_UPDATE, &vha->dpc_flags);
-			qla2xxx_wake_dpc(vha);
+			if (sess) {
+				ql_dbg(ql_dbg_disc, vha, 0xffff,
+				    "%s %d %8phC post nack\n",
+				    __func__, __LINE__, sess->port_name);
+				qla24xx_post_nack_work(vha, sess, iocb,
+					SRB_NACK_PRLI);
+				res = 0;
+			} else {
+				set_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags);
+				set_bit(LOCAL_LOOP_UPDATE, &vha->dpc_flags);
+				qla2xxx_wake_dpc(vha);
+			}
 		} else {
 			if (sess) {
 				ql_dbg(ql_dbg_disc, vha, 0xffff,
-					   "%s %d %8phC post nack\n",
-					   __func__, __LINE__, sess->port_name);
-
+				    "%s %d %8phC post nack\n",
+				    __func__, __LINE__, sess->port_name);
 				qla24xx_post_nack_work(vha, sess, iocb,
 					SRB_NACK_PRLI);
 				res = 0;
@@ -4752,7 +4765,6 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha,
 		}
 		break;
 
-
 	case ELS_TPRLO:
 		if (le16_to_cpu(iocb->u.isp24.flags) &
 			NOTIFY24XX_FLAGS_GLOBAL_TPRLO) {
@@ -5222,7 +5234,7 @@ static void qlt_24xx_atio_pkt(struct scsi_qla_host *vha,
 	unsigned long flags;
 
 	if (unlikely(tgt == NULL)) {
-		ql_dbg(ql_dbg_io, vha, 0x3064,
+		ql_dbg(ql_dbg_tgt, vha, 0x3064,
 		    "ATIO pkt, but no tgt (ha %p)", ha);
 		return;
 	}
@@ -6359,7 +6371,7 @@ qlt_24xx_process_atio_queue(struct scsi_qla_host *vha, uint8_t ha_locked)
 	struct atio_from_isp *pkt;
 	int cnt, i;
 
-	if (!vha->flags.online)
+	if (!ha->flags.fw_started)
 		return;
 
 	while ((ha->tgt.atio_ring_ptr->signature != ATIO_PROCESSED) ||

From 6c611d18f386d37cce3afbd921568e2a895bd86e Mon Sep 17 00:00:00 2001
From: Himanshu Madhani <himanshu.madhani@cavium.com>
Date: Wed, 15 Mar 2017 09:48:56 -0700
Subject: [PATCH 1488/1911] qla2xxx: Update driver version to 9.00.00.00-k

Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
signed-off-by: Giridhar Malavali <giridhar.malavali@cavium.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/qla2xxx/qla_version.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_version.h b/drivers/scsi/qla2xxx/qla_version.h
index 3cb1964b7786e4..45bc84e8e3bf50 100644
--- a/drivers/scsi/qla2xxx/qla_version.h
+++ b/drivers/scsi/qla2xxx/qla_version.h
@@ -7,9 +7,9 @@
 /*
  * Driver version
  */
-#define QLA2XXX_VERSION      "8.07.00.38-k"
+#define QLA2XXX_VERSION      "9.00.00.00-k"
 
-#define QLA_DRIVER_MAJOR_VER	8
-#define QLA_DRIVER_MINOR_VER	7
+#define QLA_DRIVER_MAJOR_VER	9
+#define QLA_DRIVER_MINOR_VER	0
 #define QLA_DRIVER_PATCH_VER	0
 #define QLA_DRIVER_BETA_VER	0

From 6985bd5e21901c2d2bfc924b114887e20c002901 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo.bianconi83@gmail.com>
Date: Tue, 14 Mar 2017 22:05:20 +0100
Subject: [PATCH 1489/1911] iio: imu: st_lsm6dsx: fix FIFO_CTRL2 overwrite
 during watermark configuration

Fixes: 290a6ce11d93 (iio: imu: add support to lsm6dsx driver)
Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@st.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c
index 78532ce0744979..81b572d7699a89 100644
--- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c
+++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c
@@ -193,8 +193,8 @@ int st_lsm6dsx_update_watermark(struct st_lsm6dsx_sensor *sensor, u16 watermark)
 	if (err < 0)
 		goto out;
 
-	fifo_watermark = ((data & ~ST_LSM6DSX_FIFO_TH_MASK) << 8) |
-			  (fifo_watermark & ST_LSM6DSX_FIFO_TH_MASK);
+	fifo_watermark = ((data << 8) & ~ST_LSM6DSX_FIFO_TH_MASK) |
+			 (fifo_watermark & ST_LSM6DSX_FIFO_TH_MASK);
 
 	wdata = cpu_to_le16(fifo_watermark);
 	err = hw->tf->write(hw->dev, ST_LSM6DSX_REG_FIFO_THL_ADDR,

From 625fe857e4fac6518716f3c0ff5e5deb8ec6d238 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Fri, 17 Mar 2017 17:02:01 -0700
Subject: [PATCH 1490/1911] scsi: scsi_dh_alua: Check scsi_device_get() return
 value

Do not queue ALUA work nor call scsi_device_put() if the
scsi_device_get() call fails. This patch fixes the following crash:

general protection fault: 0000 [#1] SMP
RIP: 0010:scsi_device_put+0xb/0x30
Call Trace:
 scsi_disk_put+0x2d/0x40
 sd_release+0x3d/0xb0
 __blkdev_put+0x29e/0x360
 blkdev_put+0x49/0x170
 dm_put_table_device+0x58/0xc0 [dm_mod]
 dm_put_device+0x70/0xc0 [dm_mod]
 free_priority_group+0x92/0xc0 [dm_multipath]
 free_multipath+0x70/0xc0 [dm_multipath]
 multipath_dtr+0x19/0x20 [dm_multipath]
 dm_table_destroy+0x67/0x120 [dm_mod]
 dev_suspend+0xde/0x240 [dm_mod]
 ctl_ioctl+0x1f5/0x520 [dm_mod]
 dm_ctl_ioctl+0xe/0x20 [dm_mod]
 do_vfs_ioctl+0x8f/0x700
 SyS_ioctl+0x3c/0x70
 entry_SYSCALL_64_fastpath+0x18/0xad

Fixes: commit 03197b61c5ec ("scsi_dh_alua: Use workqueue for RTPG")
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: Hannes Reinecke <hare@suse.de>
Cc: Tang Junhui <tang.junhui@zte.com.cn>
Cc: <stable@vger.kernel.org>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/device_handler/scsi_dh_alua.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c
index 48e200102221c5..e0b15f3dd303a5 100644
--- a/drivers/scsi/device_handler/scsi_dh_alua.c
+++ b/drivers/scsi/device_handler/scsi_dh_alua.c
@@ -870,7 +870,7 @@ static void alua_rtpg_queue(struct alua_port_group *pg,
 	unsigned long flags;
 	struct workqueue_struct *alua_wq = kaluad_wq;
 
-	if (!pg)
+	if (!pg || scsi_device_get(sdev))
 		return;
 
 	spin_lock_irqsave(&pg->lock, flags);
@@ -884,14 +884,12 @@ static void alua_rtpg_queue(struct alua_port_group *pg,
 		pg->flags |= ALUA_PG_RUN_RTPG;
 		kref_get(&pg->kref);
 		pg->rtpg_sdev = sdev;
-		scsi_device_get(sdev);
 		start_queue = 1;
 	} else if (!(pg->flags & ALUA_PG_RUN_RTPG) && force) {
 		pg->flags |= ALUA_PG_RUN_RTPG;
 		/* Do not queue if the worker is already running */
 		if (!(pg->flags & ALUA_PG_RUNNING)) {
 			kref_get(&pg->kref);
-			sdev = NULL;
 			start_queue = 1;
 		}
 	}
@@ -900,13 +898,15 @@ static void alua_rtpg_queue(struct alua_port_group *pg,
 		alua_wq = kaluad_sync_wq;
 	spin_unlock_irqrestore(&pg->lock, flags);
 
-	if (start_queue &&
-	    !queue_delayed_work(alua_wq, &pg->rtpg_work,
-				msecs_to_jiffies(ALUA_RTPG_DELAY_MSECS))) {
-		if (sdev)
-			scsi_device_put(sdev);
-		kref_put(&pg->kref, release_port_group);
+	if (start_queue) {
+		if (queue_delayed_work(alua_wq, &pg->rtpg_work,
+				msecs_to_jiffies(ALUA_RTPG_DELAY_MSECS)))
+			sdev = NULL;
+		else
+			kref_put(&pg->kref, release_port_group);
 	}
+	if (sdev)
+		scsi_device_put(sdev);
 }
 
 /*

From 7cb689fe42927281b8d98606ae5450173fcc66a9 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Fri, 17 Mar 2017 17:02:02 -0700
Subject: [PATCH 1491/1911] scsi: scsi_dh_alua: Ensure that alua_activate()
 calls the completion function

Callers of scsi_dh_activate(), e.g. dm-mpath, assume that this function
either returns an error code or calls the completion function. Make
alua_activate() call the completion function even if scsi_device_get()
fails.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: Hannes Reinecke <hare@suse.de>
Cc: Tang Junhui <tang.junhui@zte.com.cn>
Cc: <stable@vger.kernel.org>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/device_handler/scsi_dh_alua.c | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c
index e0b15f3dd303a5..b6849d3ecefecb 100644
--- a/drivers/scsi/device_handler/scsi_dh_alua.c
+++ b/drivers/scsi/device_handler/scsi_dh_alua.c
@@ -113,7 +113,7 @@ struct alua_queue_data {
 #define ALUA_POLICY_SWITCH_ALL		1
 
 static void alua_rtpg_work(struct work_struct *work);
-static void alua_rtpg_queue(struct alua_port_group *pg,
+static bool alua_rtpg_queue(struct alua_port_group *pg,
 			    struct scsi_device *sdev,
 			    struct alua_queue_data *qdata, bool force);
 static void alua_check(struct scsi_device *sdev, bool force);
@@ -862,7 +862,13 @@ static void alua_rtpg_work(struct work_struct *work)
 	kref_put(&pg->kref, release_port_group);
 }
 
-static void alua_rtpg_queue(struct alua_port_group *pg,
+/**
+ * alua_rtpg_queue() - cause RTPG to be submitted asynchronously
+ *
+ * Returns true if and only if alua_rtpg_work() will be called asynchronously.
+ * That function is responsible for calling @qdata->fn().
+ */
+static bool alua_rtpg_queue(struct alua_port_group *pg,
 			    struct scsi_device *sdev,
 			    struct alua_queue_data *qdata, bool force)
 {
@@ -871,7 +877,7 @@ static void alua_rtpg_queue(struct alua_port_group *pg,
 	struct workqueue_struct *alua_wq = kaluad_wq;
 
 	if (!pg || scsi_device_get(sdev))
-		return;
+		return false;
 
 	spin_lock_irqsave(&pg->lock, flags);
 	if (qdata) {
@@ -907,6 +913,8 @@ static void alua_rtpg_queue(struct alua_port_group *pg,
 	}
 	if (sdev)
 		scsi_device_put(sdev);
+
+	return true;
 }
 
 /*
@@ -1007,11 +1015,13 @@ static int alua_activate(struct scsi_device *sdev,
 		mutex_unlock(&h->init_mutex);
 		goto out;
 	}
-	fn = NULL;
 	rcu_read_unlock();
 	mutex_unlock(&h->init_mutex);
 
-	alua_rtpg_queue(pg, sdev, qdata, true);
+	if (alua_rtpg_queue(pg, sdev, qdata, true))
+		fn = NULL;
+	else
+		err = SCSI_DH_DEV_OFFLINED;
 	kref_put(&pg->kref, release_port_group);
 out:
 	if (fn)

From 0aeccdfe220c360ab888816da06b8eb67d910ff6 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Fri, 17 Mar 2017 17:02:03 -0700
Subject: [PATCH 1492/1911] scsi: scsi_dh_alua: Warn if the first argument of
 alua_rtpg_queue() is NULL

Callers must provide a valid port group to alua_rtpg_queue().  Issue a
kernel warning if that is not the case.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: Hannes Reinecke <hare@suse.de>
Cc: Tang Junhui <tang.junhui@zte.com.cn>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/device_handler/scsi_dh_alua.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c
index b6849d3ecefecb..c01b47e5b55a89 100644
--- a/drivers/scsi/device_handler/scsi_dh_alua.c
+++ b/drivers/scsi/device_handler/scsi_dh_alua.c
@@ -876,7 +876,7 @@ static bool alua_rtpg_queue(struct alua_port_group *pg,
 	unsigned long flags;
 	struct workqueue_struct *alua_wq = kaluad_wq;
 
-	if (!pg || scsi_device_get(sdev))
+	if (WARN_ON_ONCE(!pg) || scsi_device_get(sdev))
 		return false;
 
 	spin_lock_irqsave(&pg->lock, flags);

From 452b94b8c8c7eb7dd0d0fa9a9776e0d02cd73b97 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 19 Mar 2017 19:00:47 -0700
Subject: [PATCH 1493/1911] mm/swap: don't BUG_ON() due to uninitialized swap
 slot cache

This BUG_ON() triggered for me once at shutdown, and I don't see a
reason for the check.  The code correctly checks whether the swap slot
cache is usable or not, so an uninitialized swap slot cache is not
actually problematic afaik.

I've temporarily just switched the BUG_ON() to a WARN_ON_ONCE(), since
I'm not sure why that seemingly pointless check was there.  I suspect
the real fix is to just remove it entirely, but for now we'll warn about
it but not bring the machine down.

Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/swap_slots.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/swap_slots.c b/mm/swap_slots.c
index 9b5bc86f96ad73..7ebb23836f689e 100644
--- a/mm/swap_slots.c
+++ b/mm/swap_slots.c
@@ -267,7 +267,7 @@ int free_swap_slot(swp_entry_t entry)
 {
 	struct swap_slots_cache *cache;
 
-	BUG_ON(!swap_slot_cache_initialized);
+	WARN_ON_ONCE(!swap_slot_cache_initialized);
 
 	cache = &get_cpu_var(swp_slots);
 	if (use_swap_slot_cache && cache->slots_ret) {

From 97da3854c526d3a6ee05c849c96e48d21527606c Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 19 Mar 2017 19:09:39 -0700
Subject: [PATCH 1494/1911] Linux 4.11-rc3

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index b841fb36beb2b5..b2faa93193729f 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 4
 PATCHLEVEL = 11
 SUBLEVEL = 0
-EXTRAVERSION = -rc2
+EXTRAVERSION = -rc3
 NAME = Fearless Coyote
 
 # *DOCUMENTATION*

From 93a15b58cfb8a24e666ffca432f19fe65c1cd7d1 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Sun, 19 Mar 2017 20:38:40 -0600
Subject: [PATCH 1495/1911] drm/i915/kvmgt: Hold struct kvm reference

The kvmgt code keeps a pointer to the struct kvm associated with the
device, but doesn't actually hold a reference to it.  If we do unclean
shutdown testing (ie. killing the user process), then we can see the
kvm association to the device unset, which causes kvmgt to trigger a
device release via a work queue.  Naturally we cannot guarantee that
the cached struct kvm pointer is still valid at this point without
holding a reference.  The observed failure in this case is a stuck
cpu trying to acquire the spinlock from the invalid reference, but
other failure modes are clearly possible.  Hold a reference to avoid
this.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Cc: stable@vger.kernel.org #v4.10
Cc: Jike Song <jike.song@intel.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Zhenyu Wang <zhenyuw@linux.intel.com>
Cc: Zhi Wang <zhi.a.wang@intel.com>
Reviewed-by: Jike Song <jike.song@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/kvmgt.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index 1ea3eb270de8f3..d641214578a7dc 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -1326,6 +1326,7 @@ static int kvmgt_guest_init(struct mdev_device *mdev)
 	vgpu->handle = (unsigned long)info;
 	info->vgpu = vgpu;
 	info->kvm = kvm;
+	kvm_get_kvm(info->kvm);
 
 	kvmgt_protect_table_init(info);
 	gvt_cache_init(vgpu);
@@ -1347,6 +1348,7 @@ static bool kvmgt_guest_exit(struct kvmgt_guest_info *info)
 	}
 
 	kvm_page_track_unregister_notifier(info->kvm, &info->track_node);
+	kvm_put_kvm(info->kvm);
 	kvmgt_protect_table_destroy(info);
 	gvt_cache_destroy(info->vgpu);
 	vfree(info);

From 0cdefd5b5485ee6eb3512a75739d09a4090176ed Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Sat, 18 Mar 2017 21:53:20 -0700
Subject: [PATCH 1496/1911] ARM: dts: sun7i: lamobo-r1: Fix CPU port RGMII
 settings

The CPU port of the BCM53125 is configured with RGMII (no delays) but
this should actually be RGMII with transmit delay (rgmii-txid) because
STMMAC takes care of inserting the transmitter delay. This fixes
occasional packet loss encountered.

Fixes: d7b9eaff5f0c ("ARM: dts: sun7i: Add BCM53125 switch nodes to the lamobo-r1 board")
Reported-by: Hartmut Knaack <knaack.h@gmx.de>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
---
 arch/arm/boot/dts/sun7i-a20-lamobo-r1.dts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/sun7i-a20-lamobo-r1.dts b/arch/arm/boot/dts/sun7i-a20-lamobo-r1.dts
index 72ec0d5ae052cd..bbf1c8cbaac6aa 100644
--- a/arch/arm/boot/dts/sun7i-a20-lamobo-r1.dts
+++ b/arch/arm/boot/dts/sun7i-a20-lamobo-r1.dts
@@ -167,7 +167,7 @@
 					reg = <8>;
 					label = "cpu";
 					ethernet = <&gmac>;
-					phy-mode = "rgmii";
+					phy-mode = "rgmii-txid";
 					fixed-link {
 						speed = <1000>;
 						full-duplex;

From 9693219aa61dc7a75ac015e5c011e889cb821eec Mon Sep 17 00:00:00 2001
From: Icenowy Zheng <icenowy@aosc.xyz>
Date: Sat, 18 Mar 2017 05:23:15 +0800
Subject: [PATCH 1497/1911] ARM: sun8i: a23/a33: drop bl_en_pin GPIO pinmux in
 reference design DTSI

The bl_en_pin GPIO pinmux is configured as "gpio_in", which makes it
conflicts with the real GPIO usage (out), and makes the backlight not
usable.

Drop the GPIO pinmux for it, thus this GPIO can be correctly used.

Signed-off-by: Icenowy Zheng <icenowy@aosc.xyz>
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
---
 arch/arm/boot/dts/sun8i-reference-design-tablet.dtsi | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/arch/arm/boot/dts/sun8i-reference-design-tablet.dtsi b/arch/arm/boot/dts/sun8i-reference-design-tablet.dtsi
index 7097c18ff487d4..d6bd15898db6d6 100644
--- a/arch/arm/boot/dts/sun8i-reference-design-tablet.dtsi
+++ b/arch/arm/boot/dts/sun8i-reference-design-tablet.dtsi
@@ -50,8 +50,6 @@
 
 	backlight: backlight {
 		compatible = "pwm-backlight";
-		pinctrl-names = "default";
-		pinctrl-0 = <&bl_en_pin>;
 		pwms = <&pwm 0 50000 PWM_POLARITY_INVERTED>;
 		brightness-levels = <0 10 20 30 40 50 60 70 80 90 100>;
 		default-brightness-level = <8>;
@@ -93,11 +91,6 @@
 };
 
 &pio {
-	bl_en_pin: bl_en_pin@0 {
-		pins = "PH6";
-		function = "gpio_in";
-	};
-
 	mmc0_cd_pin: mmc0_cd_pin@0 {
 		pins = "PB4";
 		function = "gpio_in";

From 975629c3f76ce3d86e1f943db9847e0312a98daf Mon Sep 17 00:00:00 2001
From: Pei Zhang <pei.zhang@intel.com>
Date: Mon, 20 Mar 2017 23:49:19 +0800
Subject: [PATCH 1498/1911] drm/i915/gvt: add write handler for mmio mbctl

Guest will write mmio mbctl which need a special handler in gvt to
clear the bit 4 to inidcate the write operation success.

V2: use bit definition macro to make code readable.

Signed-off-by: Pei Zhang <pei.zhang@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/handlers.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index eaff45d417e8a0..6da9ae1618e35e 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -970,6 +970,14 @@ static int dp_aux_ch_ctl_mmio_write(struct intel_vgpu *vgpu,
 	return 0;
 }
 
+static int mbctl_write(struct intel_vgpu *vgpu, unsigned int offset,
+		void *p_data, unsigned int bytes)
+{
+	*(u32 *)p_data &= (~GEN6_MBCTL_ENABLE_BOOT_FETCH);
+	write_vreg(vgpu, offset, p_data, bytes);
+	return 0;
+}
+
 static int vga_control_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
 		void *p_data, unsigned int bytes)
 {
@@ -2238,7 +2246,7 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
 	MMIO_D(0x7180, D_ALL);
 	MMIO_D(0x7408, D_ALL);
 	MMIO_D(0x7c00, D_ALL);
-	MMIO_D(GEN6_MBCTL, D_ALL);
+	MMIO_DH(GEN6_MBCTL, D_ALL, NULL, mbctl_write);
 	MMIO_D(0x911c, D_ALL);
 	MMIO_D(0x9120, D_ALL);
 	MMIO_DFH(GEN7_UCGCTL4, D_ALL, F_CMD_ACCESS, NULL, NULL);

From fe686babf4cf62b9b214b99847c735baa35a9fa2 Mon Sep 17 00:00:00 2001
From: Philipp Tomsich <philipp.tomsich@theobroma-systems.com>
Date: Wed, 15 Mar 2017 12:23:58 +0100
Subject: [PATCH 1499/1911] clk: sunxi-ng: Fix div/mult settings for osc12M on
 A64

The mult/div for osc12M was previously backwards (giving a 48M rate
for osc12M). Fix it.

Signed-off-by: Philipp Tomsich <philipp.tomsich@theobroma-systems.com>
Tested-by: Christoph Muellner <christoph.muellner@theobroma-systems.com>
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
---
 drivers/clk/sunxi-ng/ccu-sun50i-a64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clk/sunxi-ng/ccu-sun50i-a64.c b/drivers/clk/sunxi-ng/ccu-sun50i-a64.c
index e3c084cc6da55e..f54114c607df76 100644
--- a/drivers/clk/sunxi-ng/ccu-sun50i-a64.c
+++ b/drivers/clk/sunxi-ng/ccu-sun50i-a64.c
@@ -566,7 +566,7 @@ static SUNXI_CCU_M_WITH_GATE(gpu_clk, "gpu", "pll-gpu",
 			     0x1a0, 0, 3, BIT(31), CLK_SET_RATE_PARENT);
 
 /* Fixed Factor clocks */
-static CLK_FIXED_FACTOR(osc12M_clk, "osc12M", "osc24M", 1, 2, 0);
+static CLK_FIXED_FACTOR(osc12M_clk, "osc12M", "osc24M", 2, 1, 0);
 
 /* We hardcode the divider to 4 for now */
 static CLK_FIXED_FACTOR(pll_audio_clk, "pll-audio",

From f363a06642f28caaa78cb6446bbad90c73fe183c Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 20 Mar 2017 10:08:19 +0100
Subject: [PATCH 1500/1911] ALSA: ctxfi: Fix the incorrect check of
 dma_set_mask() call

In the commit [15c75b09f8d1: ALSA: ctxfi: Fallback DMA mask to 32bit],
I forgot to put "!" at dam_set_mask() call check in cthw20k1.c (while
cthw20k2.c is OK).  This patch fixes that obvious bug.

(As a side note: although the original commit was completely wrong,
 it's still working for most of machines, as it sets to 32bit DMA mask
 in the end.  So the bug severity is low.)

Fixes: 15c75b09f8d1 ("ALSA: ctxfi: Fallback DMA mask to 32bit")
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/ctxfi/cthw20k1.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/pci/ctxfi/cthw20k1.c b/sound/pci/ctxfi/cthw20k1.c
index ab4cdab5cfa57a..79edd88d5cd083 100644
--- a/sound/pci/ctxfi/cthw20k1.c
+++ b/sound/pci/ctxfi/cthw20k1.c
@@ -1905,7 +1905,7 @@ static int hw_card_start(struct hw *hw)
 		return err;
 
 	/* Set DMA transfer mask */
-	if (dma_set_mask(&pci->dev, DMA_BIT_MASK(dma_bits))) {
+	if (!dma_set_mask(&pci->dev, DMA_BIT_MASK(dma_bits))) {
 		dma_set_coherent_mask(&pci->dev, DMA_BIT_MASK(dma_bits));
 	} else {
 		dma_set_mask(&pci->dev, DMA_BIT_MASK(32));

From 07f5ab6002a4f0b633f3495157166f9f6180871b Mon Sep 17 00:00:00 2001
From: Vaibhav Jain <vaibhav@linux.vnet.ibm.com>
Date: Thu, 23 Feb 2017 08:57:26 +0530
Subject: [PATCH 1501/1911] cxl: Route eeh events to all slices for
 pci_channel_io_perm_failure state

Fix a boundary condition where in some cases an eeh event with state ==
pci_channel_io_perm_failure wont be passed on to a driver attached to
the virtual PCI device associated with a slice. This will happen in case
the slice just before (n-1) doesn't have any vPHB bus associated with
it, that results in an early return from cxl_pci_error_detected()
callback.

With state == pci_channel_io_perm_failure, the adapter will be removed
irrespective of the return value of cxl_vphb_error_detected(). So we now
always return PCI_ERS_RESULT_DISCONNECTED for this case i.e even if
the AFU isn't using a vPHB (currently returns PCI_ERS_RESULT_NONE).

Fixes: e4f5fc001a6("cxl: Do not create vPHB if there are no AFU configuration records")
Signed-off-by: Vaibhav Jain <vaibhav@linux.vnet.ibm.com>
Reviewed-by: Matthew R. Ochs <mrochs@linux.vnet.ibm.com>
Reviewed-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
Acked-by: Frederic Barrat <fbarrat@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 drivers/misc/cxl/pci.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 91f645992c9416..b27ea98b781f77 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -1792,15 +1792,14 @@ static pci_ers_result_t cxl_pci_error_detected(struct pci_dev *pdev,
 
 	/* If we're permanently dead, give up. */
 	if (state == pci_channel_io_perm_failure) {
-		/* Tell the AFU drivers; but we don't care what they
-		 * say, we're going away.
-		 */
 		for (i = 0; i < adapter->slices; i++) {
 			afu = adapter->afu[i];
-			/* Only participate in EEH if we are on a virtual PHB */
-			if (afu->phb == NULL)
-				return PCI_ERS_RESULT_NONE;
-			cxl_vphb_error_detected(afu, state);
+			/*
+			 * Tell the AFU drivers; but we don't care what they
+			 * say, we're going away.
+			 */
+			if (afu->phb != NULL)
+				cxl_vphb_error_detected(afu, state);
 		}
 		return PCI_ERS_RESULT_DISCONNECT;
 	}

From b467e08a15563dede0d37d3233baa24fb97a7310 Mon Sep 17 00:00:00 2001
From: Icenowy Zheng <icenowy@aosc.xyz>
Date: Sat, 18 Mar 2017 04:19:43 +0800
Subject: [PATCH 1502/1911] clk: sunxi-ng: fix recalc_rate formula of NKMP
 clocks

In commit e66f81bbd746 ("clk: sunxi-ng: Implement factors offsets"), the
final formula of NKMP clocks' recalc_rate is refactored; however, the
refactored formula broke the calculation due to some C language operand
priority problem -- the priority of operand >> is lower than * and /,
makes the formula being parsed as "(parent_rate * n * k) >> (p / m)", but
it should be "(parent_rate * n * k >> p) / m".

Add the pair of parentheses to fix up this issue. This pair of
parentheses used to exist in the old formula.

Fixes: e66f81bbd746 ("clk: sunxi-ng: Implement factors offsets")
Signed-off-by: Icenowy Zheng <icenowy@aosc.xyz>
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
---
 drivers/clk/sunxi-ng/ccu_nkmp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clk/sunxi-ng/ccu_nkmp.c b/drivers/clk/sunxi-ng/ccu_nkmp.c
index a2b40a0001577d..488055ed944f2b 100644
--- a/drivers/clk/sunxi-ng/ccu_nkmp.c
+++ b/drivers/clk/sunxi-ng/ccu_nkmp.c
@@ -107,7 +107,7 @@ static unsigned long ccu_nkmp_recalc_rate(struct clk_hw *hw,
 	p = reg >> nkmp->p.shift;
 	p &= (1 << nkmp->p.width) - 1;
 
-	return parent_rate * n * k >> p / m;
+	return (parent_rate * n * k >> p) / m;
 }
 
 static long ccu_nkmp_round_rate(struct clk_hw *hw, unsigned long rate,

From 6d98ce0be541d4a3cfbb52cd75072c0339ebb500 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Fri, 17 Mar 2017 15:13:20 +1000
Subject: [PATCH 1503/1911] powerpc/64s: Fix idle wakeup potential to clobber
 registers

We concluded there may be a window where the idle wakeup code could get
to pnv_wakeup_tb_loss() (which clobbers non-volatile GPRs), but the
hardware may set SRR1[46:47] to 01b (no state loss) which would result
in the wakeup code failing to restore non-volatile GPRs.

I was not able to trigger this condition with trivial tests on real
hardware or simulator, but the ISA (at least 2.07) seems to allow for
it, and Gautham says that it can happen if there is an exception pending
when the sleep/winkle instruction is executed.

Fixes: 1706567117ba ("powerpc/kvm: make hypervisor state restore a function")
Cc: stable@vger.kernel.org # v4.8+
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Acked-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/idle_book3s.S | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index 99572873667707..6fd08219248db7 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -449,9 +449,23 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 _GLOBAL(pnv_wakeup_tb_loss)
 	ld	r1,PACAR1(r13)
 	/*
-	 * Before entering any idle state, the NVGPRs are saved in the stack
-	 * and they are restored before switching to the process context. Hence
-	 * until they are restored, they are free to be used.
+	 * Before entering any idle state, the NVGPRs are saved in the stack.
+	 * If there was a state loss, or PACA_NAPSTATELOST was set, then the
+	 * NVGPRs are restored. If we are here, it is likely that state is lost,
+	 * but not guaranteed -- neither ISA207 nor ISA300 tests to reach
+	 * here are the same as the test to restore NVGPRS:
+	 * PACA_THREAD_IDLE_STATE test for ISA207, PSSCR test for ISA300,
+	 * and SRR1 test for restoring NVGPRs.
+	 *
+	 * We are about to clobber NVGPRs now, so set NAPSTATELOST to
+	 * guarantee they will always be restored. This might be tightened
+	 * with careful reading of specs (particularly for ISA300) but this
+	 * is already a slow wakeup path and it's simpler to be safe.
+	 */
+	li	r0,1
+	stb	r0,PACA_NAPSTATELOST(r13)
+
+	/*
 	 *
 	 * Save SRR1 and LR in NVGPRs as they might be clobbered in
 	 * opal_call() (called in CHECK_HMI_INTERRUPT). SRR1 is required

From 066def56dc712329561abadcea15be9cad7393b6 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Mon, 2 Jan 2017 13:51:43 +0100
Subject: [PATCH 1504/1911] m68k/bitops: Correct signature of test_bit()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

mm/filemap.c: In function ‘clear_bit_unlock_is_negative_byte’:
mm/filemap.c:933: warning: passing argument 2 of ‘test_bit’ discards qualifiers from pointer target type

Make the bitmask pointed to by the "vaddr" parameter volatile to fix
this, like is done on other architectures.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/include/asm/bitops.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/m68k/include/asm/bitops.h b/arch/m68k/include/asm/bitops.h
index b4a9b0d5928dfb..dda58cfe8c22a3 100644
--- a/arch/m68k/include/asm/bitops.h
+++ b/arch/m68k/include/asm/bitops.h
@@ -148,7 +148,7 @@ static inline void bfchg_mem_change_bit(int nr, volatile unsigned long *vaddr)
 #define __change_bit(nr, vaddr)	change_bit(nr, vaddr)
 
 
-static inline int test_bit(int nr, const unsigned long *vaddr)
+static inline int test_bit(int nr, const volatile unsigned long *vaddr)
 {
 	return (vaddr[nr >> 5] & (1UL << (nr & 31))) != 0;
 }

From 3820ed470e71d7aa3d355495f7fd1cd8457e7c96 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Mon, 6 Mar 2017 10:40:02 +0100
Subject: [PATCH 1505/1911] m68k/defconfig: Update defconfigs for v4.11-rc1

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/configs/amiga_defconfig    | 14 +++++++++++++-
 arch/m68k/configs/apollo_defconfig   | 14 +++++++++++++-
 arch/m68k/configs/atari_defconfig    | 14 +++++++++++++-
 arch/m68k/configs/bvme6000_defconfig | 14 +++++++++++++-
 arch/m68k/configs/hp300_defconfig    | 14 +++++++++++++-
 arch/m68k/configs/mac_defconfig      | 14 +++++++++++++-
 arch/m68k/configs/multi_defconfig    | 14 +++++++++++++-
 arch/m68k/configs/mvme147_defconfig  | 14 +++++++++++++-
 arch/m68k/configs/mvme16x_defconfig  | 14 +++++++++++++-
 arch/m68k/configs/q40_defconfig      | 14 +++++++++++++-
 arch/m68k/configs/sun3_defconfig     | 14 +++++++++++++-
 arch/m68k/configs/sun3x_defconfig    | 14 +++++++++++++-
 12 files changed, 156 insertions(+), 12 deletions(-)

diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig
index 048bf076f7df66..531cb9eb3319f4 100644
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -25,6 +25,7 @@ CONFIG_SUN_PARTITION=y
 # CONFIG_EFI_PARTITION is not set
 CONFIG_SYSV68_PARTITION=y
 CONFIG_IOSCHED_DEADLINE=m
+CONFIG_MQ_IOSCHED_DEADLINE=m
 CONFIG_KEXEC=y
 CONFIG_BOOTINFO_PROC=y
 CONFIG_M68020=y
@@ -60,6 +61,7 @@ CONFIG_NET_IPVTI=m
 CONFIG_NET_FOU_IP_TUNNELS=y
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
+CONFIG_INET_ESP_OFFLOAD=m
 CONFIG_INET_IPCOMP=m
 CONFIG_INET_XFRM_MODE_TRANSPORT=m
 CONFIG_INET_XFRM_MODE_TUNNEL=m
@@ -71,6 +73,7 @@ CONFIG_IPV6=m
 CONFIG_IPV6_ROUTER_PREF=y
 CONFIG_INET6_AH=m
 CONFIG_INET6_ESP=m
+CONFIG_INET6_ESP_OFFLOAD=m
 CONFIG_INET6_IPCOMP=m
 CONFIG_IPV6_ILA=m
 CONFIG_IPV6_VTI=m
@@ -101,6 +104,7 @@ CONFIG_NFT_NUMGEN=m
 CONFIG_NFT_CT=m
 CONFIG_NFT_SET_RBTREE=m
 CONFIG_NFT_SET_HASH=m
+CONFIG_NFT_SET_BITMAP=m
 CONFIG_NFT_COUNTER=m
 CONFIG_NFT_LOG=m
 CONFIG_NFT_LIMIT=m
@@ -298,6 +302,8 @@ CONFIG_MPLS_IPTUNNEL=m
 CONFIG_NET_L3_MASTER_DEV=y
 CONFIG_AF_KCM=m
 # CONFIG_WIRELESS is not set
+CONFIG_PSAMPLE=m
+CONFIG_NET_IFE=m
 CONFIG_NET_DEVLINK=m
 # CONFIG_UEVENT_HELPER is not set
 CONFIG_DEVTMPFS=y
@@ -371,6 +377,7 @@ CONFIG_NET_TEAM_MODE_LOADBALANCE=m
 CONFIG_MACVLAN=m
 CONFIG_MACVTAP=m
 CONFIG_IPVLAN=m
+CONFIG_IPVTAP=m
 CONFIG_VXLAN=m
 CONFIG_GENEVE=m
 CONFIG_GTP=m
@@ -383,6 +390,7 @@ CONFIG_VETH=m
 # CONFIG_NET_VENDOR_AMAZON is not set
 CONFIG_A2065=y
 CONFIG_ARIADNE=y
+# CONFIG_NET_VENDOR_AQUANTIA is not set
 # CONFIG_NET_VENDOR_ARC is not set
 # CONFIG_NET_CADENCE is not set
 # CONFIG_NET_VENDOR_BROADCOM is not set
@@ -404,7 +412,6 @@ CONFIG_ZORRO8390=y
 # CONFIG_NET_VENDOR_SOLARFLARE is not set
 # CONFIG_NET_VENDOR_SMSC is not set
 # CONFIG_NET_VENDOR_STMICRO is not set
-# CONFIG_NET_VENDOR_SYNOPSYS is not set
 # CONFIG_NET_VENDOR_VIA is not set
 # CONFIG_NET_VENDOR_WIZNET is not set
 CONFIG_PPP=m
@@ -564,6 +571,8 @@ CONFIG_NLS_MAC_TURKISH=m
 CONFIG_DLM=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
+CONFIG_WW_MUTEX_SELFTEST=m
+CONFIG_ATOMIC64_SELFTEST=m
 CONFIG_ASYNC_RAID6_TEST=m
 CONFIG_TEST_HEXDUMP=m
 CONFIG_TEST_STRING_HELPERS=m
@@ -594,6 +603,7 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
 CONFIG_CRYPTO_KEYWRAP=m
+CONFIG_CRYPTO_CMAC=m
 CONFIG_CRYPTO_XCBC=m
 CONFIG_CRYPTO_VMAC=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
@@ -605,6 +615,7 @@ CONFIG_CRYPTO_SHA512=m
 CONFIG_CRYPTO_SHA3=m
 CONFIG_CRYPTO_TGR192=m
 CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_AES_TI=m
 CONFIG_CRYPTO_ANUBIS=m
 CONFIG_CRYPTO_BLOWFISH=m
 CONFIG_CRYPTO_CAMELLIA=m
@@ -629,4 +640,5 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
 CONFIG_CRYPTO_USER_API_AEAD=m
 # CONFIG_CRYPTO_HW is not set
+CONFIG_CRC32_SELFTEST=m
 CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig
index d4de24963f5f74..ca91d39555da2d 100644
--- a/arch/m68k/configs/apollo_defconfig
+++ b/arch/m68k/configs/apollo_defconfig
@@ -26,6 +26,7 @@ CONFIG_SUN_PARTITION=y
 # CONFIG_EFI_PARTITION is not set
 CONFIG_SYSV68_PARTITION=y
 CONFIG_IOSCHED_DEADLINE=m
+CONFIG_MQ_IOSCHED_DEADLINE=m
 CONFIG_KEXEC=y
 CONFIG_BOOTINFO_PROC=y
 CONFIG_M68020=y
@@ -58,6 +59,7 @@ CONFIG_NET_IPVTI=m
 CONFIG_NET_FOU_IP_TUNNELS=y
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
+CONFIG_INET_ESP_OFFLOAD=m
 CONFIG_INET_IPCOMP=m
 CONFIG_INET_XFRM_MODE_TRANSPORT=m
 CONFIG_INET_XFRM_MODE_TUNNEL=m
@@ -69,6 +71,7 @@ CONFIG_IPV6=m
 CONFIG_IPV6_ROUTER_PREF=y
 CONFIG_INET6_AH=m
 CONFIG_INET6_ESP=m
+CONFIG_INET6_ESP_OFFLOAD=m
 CONFIG_INET6_IPCOMP=m
 CONFIG_IPV6_ILA=m
 CONFIG_IPV6_VTI=m
@@ -99,6 +102,7 @@ CONFIG_NFT_NUMGEN=m
 CONFIG_NFT_CT=m
 CONFIG_NFT_SET_RBTREE=m
 CONFIG_NFT_SET_HASH=m
+CONFIG_NFT_SET_BITMAP=m
 CONFIG_NFT_COUNTER=m
 CONFIG_NFT_LOG=m
 CONFIG_NFT_LIMIT=m
@@ -296,6 +300,8 @@ CONFIG_MPLS_IPTUNNEL=m
 CONFIG_NET_L3_MASTER_DEV=y
 CONFIG_AF_KCM=m
 # CONFIG_WIRELESS is not set
+CONFIG_PSAMPLE=m
+CONFIG_NET_IFE=m
 CONFIG_NET_DEVLINK=m
 # CONFIG_UEVENT_HELPER is not set
 CONFIG_DEVTMPFS=y
@@ -353,6 +359,7 @@ CONFIG_NET_TEAM_MODE_LOADBALANCE=m
 CONFIG_MACVLAN=m
 CONFIG_MACVTAP=m
 CONFIG_IPVLAN=m
+CONFIG_IPVTAP=m
 CONFIG_VXLAN=m
 CONFIG_GENEVE=m
 CONFIG_GTP=m
@@ -362,6 +369,7 @@ CONFIG_NETCONSOLE_DYNAMIC=y
 CONFIG_VETH=m
 # CONFIG_NET_VENDOR_ALACRITECH is not set
 # CONFIG_NET_VENDOR_AMAZON is not set
+# CONFIG_NET_VENDOR_AQUANTIA is not set
 # CONFIG_NET_VENDOR_ARC is not set
 # CONFIG_NET_CADENCE is not set
 # CONFIG_NET_VENDOR_BROADCOM is not set
@@ -378,7 +386,6 @@ CONFIG_VETH=m
 # CONFIG_NET_VENDOR_SEEQ is not set
 # CONFIG_NET_VENDOR_SOLARFLARE is not set
 # CONFIG_NET_VENDOR_STMICRO is not set
-# CONFIG_NET_VENDOR_SYNOPSYS is not set
 # CONFIG_NET_VENDOR_VIA is not set
 # CONFIG_NET_VENDOR_WIZNET is not set
 CONFIG_PPP=m
@@ -523,6 +530,8 @@ CONFIG_NLS_MAC_TURKISH=m
 CONFIG_DLM=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
+CONFIG_WW_MUTEX_SELFTEST=m
+CONFIG_ATOMIC64_SELFTEST=m
 CONFIG_ASYNC_RAID6_TEST=m
 CONFIG_TEST_HEXDUMP=m
 CONFIG_TEST_STRING_HELPERS=m
@@ -553,6 +562,7 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
 CONFIG_CRYPTO_KEYWRAP=m
+CONFIG_CRYPTO_CMAC=m
 CONFIG_CRYPTO_XCBC=m
 CONFIG_CRYPTO_VMAC=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
@@ -564,6 +574,7 @@ CONFIG_CRYPTO_SHA512=m
 CONFIG_CRYPTO_SHA3=m
 CONFIG_CRYPTO_TGR192=m
 CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_AES_TI=m
 CONFIG_CRYPTO_ANUBIS=m
 CONFIG_CRYPTO_BLOWFISH=m
 CONFIG_CRYPTO_CAMELLIA=m
@@ -588,4 +599,5 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
 CONFIG_CRYPTO_USER_API_AEAD=m
 # CONFIG_CRYPTO_HW is not set
+CONFIG_CRC32_SELFTEST=m
 CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig
index fc0fd3f871f334..23a3d8a691e223 100644
--- a/arch/m68k/configs/atari_defconfig
+++ b/arch/m68k/configs/atari_defconfig
@@ -25,6 +25,7 @@ CONFIG_SUN_PARTITION=y
 # CONFIG_EFI_PARTITION is not set
 CONFIG_SYSV68_PARTITION=y
 CONFIG_IOSCHED_DEADLINE=m
+CONFIG_MQ_IOSCHED_DEADLINE=m
 CONFIG_KEXEC=y
 CONFIG_BOOTINFO_PROC=y
 CONFIG_M68020=y
@@ -58,6 +59,7 @@ CONFIG_NET_IPVTI=m
 CONFIG_NET_FOU_IP_TUNNELS=y
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
+CONFIG_INET_ESP_OFFLOAD=m
 CONFIG_INET_IPCOMP=m
 CONFIG_INET_XFRM_MODE_TRANSPORT=m
 CONFIG_INET_XFRM_MODE_TUNNEL=m
@@ -69,6 +71,7 @@ CONFIG_IPV6=m
 CONFIG_IPV6_ROUTER_PREF=y
 CONFIG_INET6_AH=m
 CONFIG_INET6_ESP=m
+CONFIG_INET6_ESP_OFFLOAD=m
 CONFIG_INET6_IPCOMP=m
 CONFIG_IPV6_ILA=m
 CONFIG_IPV6_VTI=m
@@ -99,6 +102,7 @@ CONFIG_NFT_NUMGEN=m
 CONFIG_NFT_CT=m
 CONFIG_NFT_SET_RBTREE=m
 CONFIG_NFT_SET_HASH=m
+CONFIG_NFT_SET_BITMAP=m
 CONFIG_NFT_COUNTER=m
 CONFIG_NFT_LOG=m
 CONFIG_NFT_LIMIT=m
@@ -296,6 +300,8 @@ CONFIG_MPLS_IPTUNNEL=m
 CONFIG_NET_L3_MASTER_DEV=y
 CONFIG_AF_KCM=m
 # CONFIG_WIRELESS is not set
+CONFIG_PSAMPLE=m
+CONFIG_NET_IFE=m
 CONFIG_NET_DEVLINK=m
 # CONFIG_UEVENT_HELPER is not set
 CONFIG_DEVTMPFS=y
@@ -362,6 +368,7 @@ CONFIG_NET_TEAM_MODE_LOADBALANCE=m
 CONFIG_MACVLAN=m
 CONFIG_MACVTAP=m
 CONFIG_IPVLAN=m
+CONFIG_IPVTAP=m
 CONFIG_VXLAN=m
 CONFIG_GENEVE=m
 CONFIG_GTP=m
@@ -372,6 +379,7 @@ CONFIG_VETH=m
 # CONFIG_NET_VENDOR_ALACRITECH is not set
 # CONFIG_NET_VENDOR_AMAZON is not set
 CONFIG_ATARILANCE=y
+# CONFIG_NET_VENDOR_AQUANTIA is not set
 # CONFIG_NET_VENDOR_ARC is not set
 # CONFIG_NET_CADENCE is not set
 # CONFIG_NET_VENDOR_BROADCOM is not set
@@ -389,7 +397,6 @@ CONFIG_NE2000=y
 # CONFIG_NET_VENDOR_SOLARFLARE is not set
 CONFIG_SMC91X=y
 # CONFIG_NET_VENDOR_STMICRO is not set
-# CONFIG_NET_VENDOR_SYNOPSYS is not set
 # CONFIG_NET_VENDOR_VIA is not set
 # CONFIG_NET_VENDOR_WIZNET is not set
 CONFIG_PPP=m
@@ -544,6 +551,8 @@ CONFIG_NLS_MAC_TURKISH=m
 CONFIG_DLM=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
+CONFIG_WW_MUTEX_SELFTEST=m
+CONFIG_ATOMIC64_SELFTEST=m
 CONFIG_ASYNC_RAID6_TEST=m
 CONFIG_TEST_HEXDUMP=m
 CONFIG_TEST_STRING_HELPERS=m
@@ -574,6 +583,7 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
 CONFIG_CRYPTO_KEYWRAP=m
+CONFIG_CRYPTO_CMAC=m
 CONFIG_CRYPTO_XCBC=m
 CONFIG_CRYPTO_VMAC=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
@@ -585,6 +595,7 @@ CONFIG_CRYPTO_SHA512=m
 CONFIG_CRYPTO_SHA3=m
 CONFIG_CRYPTO_TGR192=m
 CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_AES_TI=m
 CONFIG_CRYPTO_ANUBIS=m
 CONFIG_CRYPTO_BLOWFISH=m
 CONFIG_CRYPTO_CAMELLIA=m
@@ -609,4 +620,5 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
 CONFIG_CRYPTO_USER_API_AEAD=m
 # CONFIG_CRYPTO_HW is not set
+CONFIG_CRC32_SELFTEST=m
 CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig
index 52e984a0aa696a..95deb95140fe92 100644
--- a/arch/m68k/configs/bvme6000_defconfig
+++ b/arch/m68k/configs/bvme6000_defconfig
@@ -25,6 +25,7 @@ CONFIG_UNIXWARE_DISKLABEL=y
 CONFIG_SUN_PARTITION=y
 # CONFIG_EFI_PARTITION is not set
 CONFIG_IOSCHED_DEADLINE=m
+CONFIG_MQ_IOSCHED_DEADLINE=m
 CONFIG_KEXEC=y
 CONFIG_BOOTINFO_PROC=y
 CONFIG_M68040=y
@@ -56,6 +57,7 @@ CONFIG_NET_IPVTI=m
 CONFIG_NET_FOU_IP_TUNNELS=y
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
+CONFIG_INET_ESP_OFFLOAD=m
 CONFIG_INET_IPCOMP=m
 CONFIG_INET_XFRM_MODE_TRANSPORT=m
 CONFIG_INET_XFRM_MODE_TUNNEL=m
@@ -67,6 +69,7 @@ CONFIG_IPV6=m
 CONFIG_IPV6_ROUTER_PREF=y
 CONFIG_INET6_AH=m
 CONFIG_INET6_ESP=m
+CONFIG_INET6_ESP_OFFLOAD=m
 CONFIG_INET6_IPCOMP=m
 CONFIG_IPV6_ILA=m
 CONFIG_IPV6_VTI=m
@@ -97,6 +100,7 @@ CONFIG_NFT_NUMGEN=m
 CONFIG_NFT_CT=m
 CONFIG_NFT_SET_RBTREE=m
 CONFIG_NFT_SET_HASH=m
+CONFIG_NFT_SET_BITMAP=m
 CONFIG_NFT_COUNTER=m
 CONFIG_NFT_LOG=m
 CONFIG_NFT_LIMIT=m
@@ -294,6 +298,8 @@ CONFIG_MPLS_IPTUNNEL=m
 CONFIG_NET_L3_MASTER_DEV=y
 CONFIG_AF_KCM=m
 # CONFIG_WIRELESS is not set
+CONFIG_PSAMPLE=m
+CONFIG_NET_IFE=m
 CONFIG_NET_DEVLINK=m
 # CONFIG_UEVENT_HELPER is not set
 CONFIG_DEVTMPFS=y
@@ -352,6 +358,7 @@ CONFIG_NET_TEAM_MODE_LOADBALANCE=m
 CONFIG_MACVLAN=m
 CONFIG_MACVTAP=m
 CONFIG_IPVLAN=m
+CONFIG_IPVTAP=m
 CONFIG_VXLAN=m
 CONFIG_GENEVE=m
 CONFIG_GTP=m
@@ -361,6 +368,7 @@ CONFIG_NETCONSOLE_DYNAMIC=y
 CONFIG_VETH=m
 # CONFIG_NET_VENDOR_ALACRITECH is not set
 # CONFIG_NET_VENDOR_AMAZON is not set
+# CONFIG_NET_VENDOR_AQUANTIA is not set
 # CONFIG_NET_VENDOR_ARC is not set
 # CONFIG_NET_CADENCE is not set
 # CONFIG_NET_VENDOR_BROADCOM is not set
@@ -377,7 +385,6 @@ CONFIG_BVME6000_NET=y
 # CONFIG_NET_VENDOR_SEEQ is not set
 # CONFIG_NET_VENDOR_SOLARFLARE is not set
 # CONFIG_NET_VENDOR_STMICRO is not set
-# CONFIG_NET_VENDOR_SYNOPSYS is not set
 # CONFIG_NET_VENDOR_VIA is not set
 # CONFIG_NET_VENDOR_WIZNET is not set
 CONFIG_PPP=m
@@ -515,6 +522,8 @@ CONFIG_NLS_MAC_TURKISH=m
 CONFIG_DLM=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
+CONFIG_WW_MUTEX_SELFTEST=m
+CONFIG_ATOMIC64_SELFTEST=m
 CONFIG_ASYNC_RAID6_TEST=m
 CONFIG_TEST_HEXDUMP=m
 CONFIG_TEST_STRING_HELPERS=m
@@ -545,6 +554,7 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
 CONFIG_CRYPTO_KEYWRAP=m
+CONFIG_CRYPTO_CMAC=m
 CONFIG_CRYPTO_XCBC=m
 CONFIG_CRYPTO_VMAC=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
@@ -556,6 +566,7 @@ CONFIG_CRYPTO_SHA512=m
 CONFIG_CRYPTO_SHA3=m
 CONFIG_CRYPTO_TGR192=m
 CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_AES_TI=m
 CONFIG_CRYPTO_ANUBIS=m
 CONFIG_CRYPTO_BLOWFISH=m
 CONFIG_CRYPTO_CAMELLIA=m
@@ -580,4 +591,5 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
 CONFIG_CRYPTO_USER_API_AEAD=m
 # CONFIG_CRYPTO_HW is not set
+CONFIG_CRC32_SELFTEST=m
 CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig
index aaeed4422cc975..afae6958db2d77 100644
--- a/arch/m68k/configs/hp300_defconfig
+++ b/arch/m68k/configs/hp300_defconfig
@@ -26,6 +26,7 @@ CONFIG_SUN_PARTITION=y
 # CONFIG_EFI_PARTITION is not set
 CONFIG_SYSV68_PARTITION=y
 CONFIG_IOSCHED_DEADLINE=m
+CONFIG_MQ_IOSCHED_DEADLINE=m
 CONFIG_KEXEC=y
 CONFIG_BOOTINFO_PROC=y
 CONFIG_M68020=y
@@ -58,6 +59,7 @@ CONFIG_NET_IPVTI=m
 CONFIG_NET_FOU_IP_TUNNELS=y
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
+CONFIG_INET_ESP_OFFLOAD=m
 CONFIG_INET_IPCOMP=m
 CONFIG_INET_XFRM_MODE_TRANSPORT=m
 CONFIG_INET_XFRM_MODE_TUNNEL=m
@@ -69,6 +71,7 @@ CONFIG_IPV6=m
 CONFIG_IPV6_ROUTER_PREF=y
 CONFIG_INET6_AH=m
 CONFIG_INET6_ESP=m
+CONFIG_INET6_ESP_OFFLOAD=m
 CONFIG_INET6_IPCOMP=m
 CONFIG_IPV6_ILA=m
 CONFIG_IPV6_VTI=m
@@ -99,6 +102,7 @@ CONFIG_NFT_NUMGEN=m
 CONFIG_NFT_CT=m
 CONFIG_NFT_SET_RBTREE=m
 CONFIG_NFT_SET_HASH=m
+CONFIG_NFT_SET_BITMAP=m
 CONFIG_NFT_COUNTER=m
 CONFIG_NFT_LOG=m
 CONFIG_NFT_LIMIT=m
@@ -296,6 +300,8 @@ CONFIG_MPLS_IPTUNNEL=m
 CONFIG_NET_L3_MASTER_DEV=y
 CONFIG_AF_KCM=m
 # CONFIG_WIRELESS is not set
+CONFIG_PSAMPLE=m
+CONFIG_NET_IFE=m
 CONFIG_NET_DEVLINK=m
 # CONFIG_UEVENT_HELPER is not set
 CONFIG_DEVTMPFS=y
@@ -353,6 +359,7 @@ CONFIG_NET_TEAM_MODE_LOADBALANCE=m
 CONFIG_MACVLAN=m
 CONFIG_MACVTAP=m
 CONFIG_IPVLAN=m
+CONFIG_IPVTAP=m
 CONFIG_VXLAN=m
 CONFIG_GENEVE=m
 CONFIG_GTP=m
@@ -363,6 +370,7 @@ CONFIG_VETH=m
 # CONFIG_NET_VENDOR_ALACRITECH is not set
 # CONFIG_NET_VENDOR_AMAZON is not set
 CONFIG_HPLANCE=y
+# CONFIG_NET_VENDOR_AQUANTIA is not set
 # CONFIG_NET_VENDOR_ARC is not set
 # CONFIG_NET_CADENCE is not set
 # CONFIG_NET_VENDOR_BROADCOM is not set
@@ -379,7 +387,6 @@ CONFIG_HPLANCE=y
 # CONFIG_NET_VENDOR_SEEQ is not set
 # CONFIG_NET_VENDOR_SOLARFLARE is not set
 # CONFIG_NET_VENDOR_STMICRO is not set
-# CONFIG_NET_VENDOR_SYNOPSYS is not set
 # CONFIG_NET_VENDOR_VIA is not set
 # CONFIG_NET_VENDOR_WIZNET is not set
 CONFIG_PPP=m
@@ -525,6 +532,8 @@ CONFIG_NLS_MAC_TURKISH=m
 CONFIG_DLM=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
+CONFIG_WW_MUTEX_SELFTEST=m
+CONFIG_ATOMIC64_SELFTEST=m
 CONFIG_ASYNC_RAID6_TEST=m
 CONFIG_TEST_HEXDUMP=m
 CONFIG_TEST_STRING_HELPERS=m
@@ -555,6 +564,7 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
 CONFIG_CRYPTO_KEYWRAP=m
+CONFIG_CRYPTO_CMAC=m
 CONFIG_CRYPTO_XCBC=m
 CONFIG_CRYPTO_VMAC=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
@@ -566,6 +576,7 @@ CONFIG_CRYPTO_SHA512=m
 CONFIG_CRYPTO_SHA3=m
 CONFIG_CRYPTO_TGR192=m
 CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_AES_TI=m
 CONFIG_CRYPTO_ANUBIS=m
 CONFIG_CRYPTO_BLOWFISH=m
 CONFIG_CRYPTO_CAMELLIA=m
@@ -590,4 +601,5 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
 CONFIG_CRYPTO_USER_API_AEAD=m
 # CONFIG_CRYPTO_HW is not set
+CONFIG_CRC32_SELFTEST=m
 CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index 3bbc9b2f0dac0f..b010734729a79e 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -25,6 +25,7 @@ CONFIG_SUN_PARTITION=y
 # CONFIG_EFI_PARTITION is not set
 CONFIG_SYSV68_PARTITION=y
 CONFIG_IOSCHED_DEADLINE=m
+CONFIG_MQ_IOSCHED_DEADLINE=m
 CONFIG_KEXEC=y
 CONFIG_BOOTINFO_PROC=y
 CONFIG_M68020=y
@@ -57,6 +58,7 @@ CONFIG_NET_IPVTI=m
 CONFIG_NET_FOU_IP_TUNNELS=y
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
+CONFIG_INET_ESP_OFFLOAD=m
 CONFIG_INET_IPCOMP=m
 CONFIG_INET_XFRM_MODE_TRANSPORT=m
 CONFIG_INET_XFRM_MODE_TUNNEL=m
@@ -68,6 +70,7 @@ CONFIG_IPV6=m
 CONFIG_IPV6_ROUTER_PREF=y
 CONFIG_INET6_AH=m
 CONFIG_INET6_ESP=m
+CONFIG_INET6_ESP_OFFLOAD=m
 CONFIG_INET6_IPCOMP=m
 CONFIG_IPV6_ILA=m
 CONFIG_IPV6_VTI=m
@@ -98,6 +101,7 @@ CONFIG_NFT_NUMGEN=m
 CONFIG_NFT_CT=m
 CONFIG_NFT_SET_RBTREE=m
 CONFIG_NFT_SET_HASH=m
+CONFIG_NFT_SET_BITMAP=m
 CONFIG_NFT_COUNTER=m
 CONFIG_NFT_LOG=m
 CONFIG_NFT_LIMIT=m
@@ -298,6 +302,8 @@ CONFIG_MPLS_IPTUNNEL=m
 CONFIG_NET_L3_MASTER_DEV=y
 CONFIG_AF_KCM=m
 # CONFIG_WIRELESS is not set
+CONFIG_PSAMPLE=m
+CONFIG_NET_IFE=m
 CONFIG_NET_DEVLINK=m
 # CONFIG_UEVENT_HELPER is not set
 CONFIG_DEVTMPFS=y
@@ -369,6 +375,7 @@ CONFIG_NET_TEAM_MODE_LOADBALANCE=m
 CONFIG_MACVLAN=m
 CONFIG_MACVTAP=m
 CONFIG_IPVLAN=m
+CONFIG_IPVTAP=m
 CONFIG_VXLAN=m
 CONFIG_GENEVE=m
 CONFIG_GTP=m
@@ -379,6 +386,7 @@ CONFIG_VETH=m
 # CONFIG_NET_VENDOR_ALACRITECH is not set
 # CONFIG_NET_VENDOR_AMAZON is not set
 CONFIG_MACMACE=y
+# CONFIG_NET_VENDOR_AQUANTIA is not set
 # CONFIG_NET_VENDOR_ARC is not set
 # CONFIG_NET_CADENCE is not set
 # CONFIG_NET_VENDOR_BROADCOM is not set
@@ -398,7 +406,6 @@ CONFIG_MAC8390=y
 # CONFIG_NET_VENDOR_SOLARFLARE is not set
 # CONFIG_NET_VENDOR_SMSC is not set
 # CONFIG_NET_VENDOR_STMICRO is not set
-# CONFIG_NET_VENDOR_SYNOPSYS is not set
 # CONFIG_NET_VENDOR_VIA is not set
 # CONFIG_NET_VENDOR_WIZNET is not set
 CONFIG_PPP=m
@@ -547,6 +554,8 @@ CONFIG_NLS_MAC_TURKISH=m
 CONFIG_DLM=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
+CONFIG_WW_MUTEX_SELFTEST=m
+CONFIG_ATOMIC64_SELFTEST=m
 CONFIG_ASYNC_RAID6_TEST=m
 CONFIG_TEST_HEXDUMP=m
 CONFIG_TEST_STRING_HELPERS=m
@@ -577,6 +586,7 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
 CONFIG_CRYPTO_KEYWRAP=m
+CONFIG_CRYPTO_CMAC=m
 CONFIG_CRYPTO_XCBC=m
 CONFIG_CRYPTO_VMAC=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
@@ -588,6 +598,7 @@ CONFIG_CRYPTO_SHA512=m
 CONFIG_CRYPTO_SHA3=m
 CONFIG_CRYPTO_TGR192=m
 CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_AES_TI=m
 CONFIG_CRYPTO_ANUBIS=m
 CONFIG_CRYPTO_BLOWFISH=m
 CONFIG_CRYPTO_CAMELLIA=m
@@ -612,4 +623,5 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
 CONFIG_CRYPTO_USER_API_AEAD=m
 # CONFIG_CRYPTO_HW is not set
+CONFIG_CRC32_SELFTEST=m
 CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index 8f2c0decb2f8ed..0e414549b235b0 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -21,6 +21,7 @@ CONFIG_SOLARIS_X86_PARTITION=y
 CONFIG_UNIXWARE_DISKLABEL=y
 # CONFIG_EFI_PARTITION is not set
 CONFIG_IOSCHED_DEADLINE=m
+CONFIG_MQ_IOSCHED_DEADLINE=m
 CONFIG_KEXEC=y
 CONFIG_BOOTINFO_PROC=y
 CONFIG_M68020=y
@@ -67,6 +68,7 @@ CONFIG_NET_IPVTI=m
 CONFIG_NET_FOU_IP_TUNNELS=y
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
+CONFIG_INET_ESP_OFFLOAD=m
 CONFIG_INET_IPCOMP=m
 CONFIG_INET_XFRM_MODE_TRANSPORT=m
 CONFIG_INET_XFRM_MODE_TUNNEL=m
@@ -78,6 +80,7 @@ CONFIG_IPV6=m
 CONFIG_IPV6_ROUTER_PREF=y
 CONFIG_INET6_AH=m
 CONFIG_INET6_ESP=m
+CONFIG_INET6_ESP_OFFLOAD=m
 CONFIG_INET6_IPCOMP=m
 CONFIG_IPV6_ILA=m
 CONFIG_IPV6_VTI=m
@@ -108,6 +111,7 @@ CONFIG_NFT_NUMGEN=m
 CONFIG_NFT_CT=m
 CONFIG_NFT_SET_RBTREE=m
 CONFIG_NFT_SET_HASH=m
+CONFIG_NFT_SET_BITMAP=m
 CONFIG_NFT_COUNTER=m
 CONFIG_NFT_LOG=m
 CONFIG_NFT_LIMIT=m
@@ -308,6 +312,8 @@ CONFIG_MPLS_IPTUNNEL=m
 CONFIG_NET_L3_MASTER_DEV=y
 CONFIG_AF_KCM=m
 # CONFIG_WIRELESS is not set
+CONFIG_PSAMPLE=m
+CONFIG_NET_IFE=m
 CONFIG_NET_DEVLINK=m
 # CONFIG_UEVENT_HELPER is not set
 CONFIG_DEVTMPFS=y
@@ -402,6 +408,7 @@ CONFIG_NET_TEAM_MODE_LOADBALANCE=m
 CONFIG_MACVLAN=m
 CONFIG_MACVTAP=m
 CONFIG_IPVLAN=m
+CONFIG_IPVTAP=m
 CONFIG_VXLAN=m
 CONFIG_GENEVE=m
 CONFIG_GTP=m
@@ -419,6 +426,7 @@ CONFIG_HPLANCE=y
 CONFIG_MVME147_NET=y
 CONFIG_SUN3LANCE=y
 CONFIG_MACMACE=y
+# CONFIG_NET_VENDOR_AQUANTIA is not set
 # CONFIG_NET_VENDOR_ARC is not set
 # CONFIG_NET_CADENCE is not set
 # CONFIG_NET_VENDOR_BROADCOM is not set
@@ -444,7 +452,6 @@ CONFIG_ZORRO8390=y
 # CONFIG_NET_VENDOR_SOLARFLARE is not set
 CONFIG_SMC91X=y
 # CONFIG_NET_VENDOR_STMICRO is not set
-# CONFIG_NET_VENDOR_SYNOPSYS is not set
 # CONFIG_NET_VENDOR_VIA is not set
 # CONFIG_NET_VENDOR_WIZNET is not set
 CONFIG_PLIP=m
@@ -627,6 +634,8 @@ CONFIG_NLS_MAC_TURKISH=m
 CONFIG_DLM=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
+CONFIG_WW_MUTEX_SELFTEST=m
+CONFIG_ATOMIC64_SELFTEST=m
 CONFIG_ASYNC_RAID6_TEST=m
 CONFIG_TEST_HEXDUMP=m
 CONFIG_TEST_STRING_HELPERS=m
@@ -657,6 +666,7 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
 CONFIG_CRYPTO_KEYWRAP=m
+CONFIG_CRYPTO_CMAC=m
 CONFIG_CRYPTO_XCBC=m
 CONFIG_CRYPTO_VMAC=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
@@ -668,6 +678,7 @@ CONFIG_CRYPTO_SHA512=m
 CONFIG_CRYPTO_SHA3=m
 CONFIG_CRYPTO_TGR192=m
 CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_AES_TI=m
 CONFIG_CRYPTO_ANUBIS=m
 CONFIG_CRYPTO_BLOWFISH=m
 CONFIG_CRYPTO_CAMELLIA=m
@@ -692,4 +703,5 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
 CONFIG_CRYPTO_USER_API_AEAD=m
 # CONFIG_CRYPTO_HW is not set
+CONFIG_CRC32_SELFTEST=m
 CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig
index c743dd22e96f93..b2e687a0ec3d47 100644
--- a/arch/m68k/configs/mvme147_defconfig
+++ b/arch/m68k/configs/mvme147_defconfig
@@ -25,6 +25,7 @@ CONFIG_UNIXWARE_DISKLABEL=y
 CONFIG_SUN_PARTITION=y
 # CONFIG_EFI_PARTITION is not set
 CONFIG_IOSCHED_DEADLINE=m
+CONFIG_MQ_IOSCHED_DEADLINE=m
 CONFIG_KEXEC=y
 CONFIG_BOOTINFO_PROC=y
 CONFIG_M68030=y
@@ -55,6 +56,7 @@ CONFIG_NET_IPVTI=m
 CONFIG_NET_FOU_IP_TUNNELS=y
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
+CONFIG_INET_ESP_OFFLOAD=m
 CONFIG_INET_IPCOMP=m
 CONFIG_INET_XFRM_MODE_TRANSPORT=m
 CONFIG_INET_XFRM_MODE_TUNNEL=m
@@ -66,6 +68,7 @@ CONFIG_IPV6=m
 CONFIG_IPV6_ROUTER_PREF=y
 CONFIG_INET6_AH=m
 CONFIG_INET6_ESP=m
+CONFIG_INET6_ESP_OFFLOAD=m
 CONFIG_INET6_IPCOMP=m
 CONFIG_IPV6_ILA=m
 CONFIG_IPV6_VTI=m
@@ -96,6 +99,7 @@ CONFIG_NFT_NUMGEN=m
 CONFIG_NFT_CT=m
 CONFIG_NFT_SET_RBTREE=m
 CONFIG_NFT_SET_HASH=m
+CONFIG_NFT_SET_BITMAP=m
 CONFIG_NFT_COUNTER=m
 CONFIG_NFT_LOG=m
 CONFIG_NFT_LIMIT=m
@@ -293,6 +297,8 @@ CONFIG_MPLS_IPTUNNEL=m
 CONFIG_NET_L3_MASTER_DEV=y
 CONFIG_AF_KCM=m
 # CONFIG_WIRELESS is not set
+CONFIG_PSAMPLE=m
+CONFIG_NET_IFE=m
 CONFIG_NET_DEVLINK=m
 # CONFIG_UEVENT_HELPER is not set
 CONFIG_DEVTMPFS=y
@@ -351,6 +357,7 @@ CONFIG_NET_TEAM_MODE_LOADBALANCE=m
 CONFIG_MACVLAN=m
 CONFIG_MACVTAP=m
 CONFIG_IPVLAN=m
+CONFIG_IPVTAP=m
 CONFIG_VXLAN=m
 CONFIG_GENEVE=m
 CONFIG_GTP=m
@@ -361,6 +368,7 @@ CONFIG_VETH=m
 # CONFIG_NET_VENDOR_ALACRITECH is not set
 # CONFIG_NET_VENDOR_AMAZON is not set
 CONFIG_MVME147_NET=y
+# CONFIG_NET_VENDOR_AQUANTIA is not set
 # CONFIG_NET_VENDOR_ARC is not set
 # CONFIG_NET_CADENCE is not set
 # CONFIG_NET_VENDOR_BROADCOM is not set
@@ -377,7 +385,6 @@ CONFIG_MVME147_NET=y
 # CONFIG_NET_VENDOR_SEEQ is not set
 # CONFIG_NET_VENDOR_SOLARFLARE is not set
 # CONFIG_NET_VENDOR_STMICRO is not set
-# CONFIG_NET_VENDOR_SYNOPSYS is not set
 # CONFIG_NET_VENDOR_VIA is not set
 # CONFIG_NET_VENDOR_WIZNET is not set
 CONFIG_PPP=m
@@ -515,6 +522,8 @@ CONFIG_NLS_MAC_TURKISH=m
 CONFIG_DLM=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
+CONFIG_WW_MUTEX_SELFTEST=m
+CONFIG_ATOMIC64_SELFTEST=m
 CONFIG_ASYNC_RAID6_TEST=m
 CONFIG_TEST_HEXDUMP=m
 CONFIG_TEST_STRING_HELPERS=m
@@ -545,6 +554,7 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
 CONFIG_CRYPTO_KEYWRAP=m
+CONFIG_CRYPTO_CMAC=m
 CONFIG_CRYPTO_XCBC=m
 CONFIG_CRYPTO_VMAC=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
@@ -556,6 +566,7 @@ CONFIG_CRYPTO_SHA512=m
 CONFIG_CRYPTO_SHA3=m
 CONFIG_CRYPTO_TGR192=m
 CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_AES_TI=m
 CONFIG_CRYPTO_ANUBIS=m
 CONFIG_CRYPTO_BLOWFISH=m
 CONFIG_CRYPTO_CAMELLIA=m
@@ -580,4 +591,5 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
 CONFIG_CRYPTO_USER_API_AEAD=m
 # CONFIG_CRYPTO_HW is not set
+CONFIG_CRC32_SELFTEST=m
 CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig
index 2ccaca858f0533..cbd8ee24d1bc4e 100644
--- a/arch/m68k/configs/mvme16x_defconfig
+++ b/arch/m68k/configs/mvme16x_defconfig
@@ -25,6 +25,7 @@ CONFIG_UNIXWARE_DISKLABEL=y
 CONFIG_SUN_PARTITION=y
 # CONFIG_EFI_PARTITION is not set
 CONFIG_IOSCHED_DEADLINE=m
+CONFIG_MQ_IOSCHED_DEADLINE=m
 CONFIG_KEXEC=y
 CONFIG_BOOTINFO_PROC=y
 CONFIG_M68040=y
@@ -56,6 +57,7 @@ CONFIG_NET_IPVTI=m
 CONFIG_NET_FOU_IP_TUNNELS=y
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
+CONFIG_INET_ESP_OFFLOAD=m
 CONFIG_INET_IPCOMP=m
 CONFIG_INET_XFRM_MODE_TRANSPORT=m
 CONFIG_INET_XFRM_MODE_TUNNEL=m
@@ -67,6 +69,7 @@ CONFIG_IPV6=m
 CONFIG_IPV6_ROUTER_PREF=y
 CONFIG_INET6_AH=m
 CONFIG_INET6_ESP=m
+CONFIG_INET6_ESP_OFFLOAD=m
 CONFIG_INET6_IPCOMP=m
 CONFIG_IPV6_ILA=m
 CONFIG_IPV6_VTI=m
@@ -97,6 +100,7 @@ CONFIG_NFT_NUMGEN=m
 CONFIG_NFT_CT=m
 CONFIG_NFT_SET_RBTREE=m
 CONFIG_NFT_SET_HASH=m
+CONFIG_NFT_SET_BITMAP=m
 CONFIG_NFT_COUNTER=m
 CONFIG_NFT_LOG=m
 CONFIG_NFT_LIMIT=m
@@ -294,6 +298,8 @@ CONFIG_MPLS_IPTUNNEL=m
 CONFIG_NET_L3_MASTER_DEV=y
 CONFIG_AF_KCM=m
 # CONFIG_WIRELESS is not set
+CONFIG_PSAMPLE=m
+CONFIG_NET_IFE=m
 CONFIG_NET_DEVLINK=m
 # CONFIG_UEVENT_HELPER is not set
 CONFIG_DEVTMPFS=y
@@ -352,6 +358,7 @@ CONFIG_NET_TEAM_MODE_LOADBALANCE=m
 CONFIG_MACVLAN=m
 CONFIG_MACVTAP=m
 CONFIG_IPVLAN=m
+CONFIG_IPVTAP=m
 CONFIG_VXLAN=m
 CONFIG_GENEVE=m
 CONFIG_GTP=m
@@ -361,6 +368,7 @@ CONFIG_NETCONSOLE_DYNAMIC=y
 CONFIG_VETH=m
 # CONFIG_NET_VENDOR_ALACRITECH is not set
 # CONFIG_NET_VENDOR_AMAZON is not set
+# CONFIG_NET_VENDOR_AQUANTIA is not set
 # CONFIG_NET_VENDOR_ARC is not set
 # CONFIG_NET_CADENCE is not set
 # CONFIG_NET_VENDOR_BROADCOM is not set
@@ -377,7 +385,6 @@ CONFIG_MVME16x_NET=y
 # CONFIG_NET_VENDOR_SEEQ is not set
 # CONFIG_NET_VENDOR_SOLARFLARE is not set
 # CONFIG_NET_VENDOR_STMICRO is not set
-# CONFIG_NET_VENDOR_SYNOPSYS is not set
 # CONFIG_NET_VENDOR_VIA is not set
 # CONFIG_NET_VENDOR_WIZNET is not set
 CONFIG_PPP=m
@@ -515,6 +522,8 @@ CONFIG_NLS_MAC_TURKISH=m
 CONFIG_DLM=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
+CONFIG_WW_MUTEX_SELFTEST=m
+CONFIG_ATOMIC64_SELFTEST=m
 CONFIG_ASYNC_RAID6_TEST=m
 CONFIG_TEST_HEXDUMP=m
 CONFIG_TEST_STRING_HELPERS=m
@@ -545,6 +554,7 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
 CONFIG_CRYPTO_KEYWRAP=m
+CONFIG_CRYPTO_CMAC=m
 CONFIG_CRYPTO_XCBC=m
 CONFIG_CRYPTO_VMAC=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
@@ -556,6 +566,7 @@ CONFIG_CRYPTO_SHA512=m
 CONFIG_CRYPTO_SHA3=m
 CONFIG_CRYPTO_TGR192=m
 CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_AES_TI=m
 CONFIG_CRYPTO_ANUBIS=m
 CONFIG_CRYPTO_BLOWFISH=m
 CONFIG_CRYPTO_CAMELLIA=m
@@ -580,4 +591,5 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
 CONFIG_CRYPTO_USER_API_AEAD=m
 # CONFIG_CRYPTO_HW is not set
+CONFIG_CRC32_SELFTEST=m
 CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig
index 5599f3fd5fcd44..1e82cc9443399a 100644
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -26,6 +26,7 @@ CONFIG_SUN_PARTITION=y
 # CONFIG_EFI_PARTITION is not set
 CONFIG_SYSV68_PARTITION=y
 CONFIG_IOSCHED_DEADLINE=m
+CONFIG_MQ_IOSCHED_DEADLINE=m
 CONFIG_KEXEC=y
 CONFIG_BOOTINFO_PROC=y
 CONFIG_M68040=y
@@ -56,6 +57,7 @@ CONFIG_NET_IPVTI=m
 CONFIG_NET_FOU_IP_TUNNELS=y
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
+CONFIG_INET_ESP_OFFLOAD=m
 CONFIG_INET_IPCOMP=m
 CONFIG_INET_XFRM_MODE_TRANSPORT=m
 CONFIG_INET_XFRM_MODE_TUNNEL=m
@@ -67,6 +69,7 @@ CONFIG_IPV6=m
 CONFIG_IPV6_ROUTER_PREF=y
 CONFIG_INET6_AH=m
 CONFIG_INET6_ESP=m
+CONFIG_INET6_ESP_OFFLOAD=m
 CONFIG_INET6_IPCOMP=m
 CONFIG_IPV6_ILA=m
 CONFIG_IPV6_VTI=m
@@ -97,6 +100,7 @@ CONFIG_NFT_NUMGEN=m
 CONFIG_NFT_CT=m
 CONFIG_NFT_SET_RBTREE=m
 CONFIG_NFT_SET_HASH=m
+CONFIG_NFT_SET_BITMAP=m
 CONFIG_NFT_COUNTER=m
 CONFIG_NFT_LOG=m
 CONFIG_NFT_LIMIT=m
@@ -294,6 +298,8 @@ CONFIG_MPLS_IPTUNNEL=m
 CONFIG_NET_L3_MASTER_DEV=y
 CONFIG_AF_KCM=m
 # CONFIG_WIRELESS is not set
+CONFIG_PSAMPLE=m
+CONFIG_NET_IFE=m
 CONFIG_NET_DEVLINK=m
 # CONFIG_UEVENT_HELPER is not set
 CONFIG_DEVTMPFS=y
@@ -358,6 +364,7 @@ CONFIG_NET_TEAM_MODE_LOADBALANCE=m
 CONFIG_MACVLAN=m
 CONFIG_MACVTAP=m
 CONFIG_IPVLAN=m
+CONFIG_IPVTAP=m
 CONFIG_VXLAN=m
 CONFIG_GENEVE=m
 CONFIG_GTP=m
@@ -369,6 +376,7 @@ CONFIG_VETH=m
 # CONFIG_NET_VENDOR_ALACRITECH is not set
 # CONFIG_NET_VENDOR_AMAZON is not set
 # CONFIG_NET_VENDOR_AMD is not set
+# CONFIG_NET_VENDOR_AQUANTIA is not set
 # CONFIG_NET_VENDOR_ARC is not set
 # CONFIG_NET_CADENCE is not set
 # CONFIG_NET_VENDOR_BROADCOM is not set
@@ -388,7 +396,6 @@ CONFIG_NE2000=y
 # CONFIG_NET_VENDOR_SOLARFLARE is not set
 # CONFIG_NET_VENDOR_SMSC is not set
 # CONFIG_NET_VENDOR_STMICRO is not set
-# CONFIG_NET_VENDOR_SYNOPSYS is not set
 # CONFIG_NET_VENDOR_VIA is not set
 # CONFIG_NET_VENDOR_WIZNET is not set
 CONFIG_PLIP=m
@@ -538,6 +545,8 @@ CONFIG_NLS_MAC_TURKISH=m
 CONFIG_DLM=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
+CONFIG_WW_MUTEX_SELFTEST=m
+CONFIG_ATOMIC64_SELFTEST=m
 CONFIG_ASYNC_RAID6_TEST=m
 CONFIG_TEST_HEXDUMP=m
 CONFIG_TEST_STRING_HELPERS=m
@@ -568,6 +577,7 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
 CONFIG_CRYPTO_KEYWRAP=m
+CONFIG_CRYPTO_CMAC=m
 CONFIG_CRYPTO_XCBC=m
 CONFIG_CRYPTO_VMAC=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
@@ -579,6 +589,7 @@ CONFIG_CRYPTO_SHA512=m
 CONFIG_CRYPTO_SHA3=m
 CONFIG_CRYPTO_TGR192=m
 CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_AES_TI=m
 CONFIG_CRYPTO_ANUBIS=m
 CONFIG_CRYPTO_BLOWFISH=m
 CONFIG_CRYPTO_CAMELLIA=m
@@ -603,4 +614,5 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
 CONFIG_CRYPTO_USER_API_AEAD=m
 # CONFIG_CRYPTO_HW is not set
+CONFIG_CRC32_SELFTEST=m
 CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig
index 313bf0a562ad33..f9e77f57a97250 100644
--- a/arch/m68k/configs/sun3_defconfig
+++ b/arch/m68k/configs/sun3_defconfig
@@ -25,6 +25,7 @@ CONFIG_UNIXWARE_DISKLABEL=y
 # CONFIG_EFI_PARTITION is not set
 CONFIG_SYSV68_PARTITION=y
 CONFIG_IOSCHED_DEADLINE=m
+CONFIG_MQ_IOSCHED_DEADLINE=m
 CONFIG_KEXEC=y
 CONFIG_BOOTINFO_PROC=y
 CONFIG_SUN3=y
@@ -53,6 +54,7 @@ CONFIG_NET_IPVTI=m
 CONFIG_NET_FOU_IP_TUNNELS=y
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
+CONFIG_INET_ESP_OFFLOAD=m
 CONFIG_INET_IPCOMP=m
 CONFIG_INET_XFRM_MODE_TRANSPORT=m
 CONFIG_INET_XFRM_MODE_TUNNEL=m
@@ -64,6 +66,7 @@ CONFIG_IPV6=m
 CONFIG_IPV6_ROUTER_PREF=y
 CONFIG_INET6_AH=m
 CONFIG_INET6_ESP=m
+CONFIG_INET6_ESP_OFFLOAD=m
 CONFIG_INET6_IPCOMP=m
 CONFIG_IPV6_ILA=m
 CONFIG_IPV6_VTI=m
@@ -94,6 +97,7 @@ CONFIG_NFT_NUMGEN=m
 CONFIG_NFT_CT=m
 CONFIG_NFT_SET_RBTREE=m
 CONFIG_NFT_SET_HASH=m
+CONFIG_NFT_SET_BITMAP=m
 CONFIG_NFT_COUNTER=m
 CONFIG_NFT_LOG=m
 CONFIG_NFT_LIMIT=m
@@ -291,6 +295,8 @@ CONFIG_MPLS_IPTUNNEL=m
 CONFIG_NET_L3_MASTER_DEV=y
 CONFIG_AF_KCM=m
 # CONFIG_WIRELESS is not set
+CONFIG_PSAMPLE=m
+CONFIG_NET_IFE=m
 CONFIG_NET_DEVLINK=m
 # CONFIG_UEVENT_HELPER is not set
 CONFIG_DEVTMPFS=y
@@ -349,6 +355,7 @@ CONFIG_NET_TEAM_MODE_LOADBALANCE=m
 CONFIG_MACVLAN=m
 CONFIG_MACVTAP=m
 CONFIG_IPVLAN=m
+CONFIG_IPVTAP=m
 CONFIG_VXLAN=m
 CONFIG_GENEVE=m
 CONFIG_GTP=m
@@ -359,6 +366,7 @@ CONFIG_VETH=m
 # CONFIG_NET_VENDOR_ALACRITECH is not set
 # CONFIG_NET_VENDOR_AMAZON is not set
 CONFIG_SUN3LANCE=y
+# CONFIG_NET_VENDOR_AQUANTIA is not set
 # CONFIG_NET_VENDOR_ARC is not set
 # CONFIG_NET_CADENCE is not set
 # CONFIG_NET_VENDOR_EZCHIP is not set
@@ -375,7 +383,6 @@ CONFIG_SUN3_82586=y
 # CONFIG_NET_VENDOR_SOLARFLARE is not set
 # CONFIG_NET_VENDOR_STMICRO is not set
 # CONFIG_NET_VENDOR_SUN is not set
-# CONFIG_NET_VENDOR_SYNOPSYS is not set
 # CONFIG_NET_VENDOR_VIA is not set
 # CONFIG_NET_VENDOR_WIZNET is not set
 CONFIG_PPP=m
@@ -517,6 +524,8 @@ CONFIG_NLS_MAC_TURKISH=m
 CONFIG_DLM=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
+CONFIG_WW_MUTEX_SELFTEST=m
+CONFIG_ATOMIC64_SELFTEST=m
 CONFIG_ASYNC_RAID6_TEST=m
 CONFIG_TEST_HEXDUMP=m
 CONFIG_TEST_STRING_HELPERS=m
@@ -546,6 +555,7 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
 CONFIG_CRYPTO_KEYWRAP=m
+CONFIG_CRYPTO_CMAC=m
 CONFIG_CRYPTO_XCBC=m
 CONFIG_CRYPTO_VMAC=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
@@ -557,6 +567,7 @@ CONFIG_CRYPTO_SHA512=m
 CONFIG_CRYPTO_SHA3=m
 CONFIG_CRYPTO_TGR192=m
 CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_AES_TI=m
 CONFIG_CRYPTO_ANUBIS=m
 CONFIG_CRYPTO_BLOWFISH=m
 CONFIG_CRYPTO_CAMELLIA=m
@@ -581,4 +592,5 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
 CONFIG_CRYPTO_USER_API_AEAD=m
 # CONFIG_CRYPTO_HW is not set
+CONFIG_CRC32_SELFTEST=m
 CONFIG_XZ_DEC_TEST=m
diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig
index 38b61365f76927..3c394fcfb36836 100644
--- a/arch/m68k/configs/sun3x_defconfig
+++ b/arch/m68k/configs/sun3x_defconfig
@@ -25,6 +25,7 @@ CONFIG_UNIXWARE_DISKLABEL=y
 # CONFIG_EFI_PARTITION is not set
 CONFIG_SYSV68_PARTITION=y
 CONFIG_IOSCHED_DEADLINE=m
+CONFIG_MQ_IOSCHED_DEADLINE=m
 CONFIG_KEXEC=y
 CONFIG_BOOTINFO_PROC=y
 CONFIG_SUN3X=y
@@ -53,6 +54,7 @@ CONFIG_NET_IPVTI=m
 CONFIG_NET_FOU_IP_TUNNELS=y
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
+CONFIG_INET_ESP_OFFLOAD=m
 CONFIG_INET_IPCOMP=m
 CONFIG_INET_XFRM_MODE_TRANSPORT=m
 CONFIG_INET_XFRM_MODE_TUNNEL=m
@@ -64,6 +66,7 @@ CONFIG_IPV6=m
 CONFIG_IPV6_ROUTER_PREF=y
 CONFIG_INET6_AH=m
 CONFIG_INET6_ESP=m
+CONFIG_INET6_ESP_OFFLOAD=m
 CONFIG_INET6_IPCOMP=m
 CONFIG_IPV6_ILA=m
 CONFIG_IPV6_VTI=m
@@ -94,6 +97,7 @@ CONFIG_NFT_NUMGEN=m
 CONFIG_NFT_CT=m
 CONFIG_NFT_SET_RBTREE=m
 CONFIG_NFT_SET_HASH=m
+CONFIG_NFT_SET_BITMAP=m
 CONFIG_NFT_COUNTER=m
 CONFIG_NFT_LOG=m
 CONFIG_NFT_LIMIT=m
@@ -291,6 +295,8 @@ CONFIG_MPLS_IPTUNNEL=m
 CONFIG_NET_L3_MASTER_DEV=y
 CONFIG_AF_KCM=m
 # CONFIG_WIRELESS is not set
+CONFIG_PSAMPLE=m
+CONFIG_NET_IFE=m
 CONFIG_NET_DEVLINK=m
 # CONFIG_UEVENT_HELPER is not set
 CONFIG_DEVTMPFS=y
@@ -349,6 +355,7 @@ CONFIG_NET_TEAM_MODE_LOADBALANCE=m
 CONFIG_MACVLAN=m
 CONFIG_MACVTAP=m
 CONFIG_IPVLAN=m
+CONFIG_IPVTAP=m
 CONFIG_VXLAN=m
 CONFIG_GENEVE=m
 CONFIG_GTP=m
@@ -359,6 +366,7 @@ CONFIG_VETH=m
 # CONFIG_NET_VENDOR_ALACRITECH is not set
 # CONFIG_NET_VENDOR_AMAZON is not set
 CONFIG_SUN3LANCE=y
+# CONFIG_NET_VENDOR_AQUANTIA is not set
 # CONFIG_NET_VENDOR_ARC is not set
 # CONFIG_NET_CADENCE is not set
 # CONFIG_NET_VENDOR_BROADCOM is not set
@@ -375,7 +383,6 @@ CONFIG_SUN3LANCE=y
 # CONFIG_NET_VENDOR_SEEQ is not set
 # CONFIG_NET_VENDOR_SOLARFLARE is not set
 # CONFIG_NET_VENDOR_STMICRO is not set
-# CONFIG_NET_VENDOR_SYNOPSYS is not set
 # CONFIG_NET_VENDOR_VIA is not set
 # CONFIG_NET_VENDOR_WIZNET is not set
 CONFIG_PPP=m
@@ -517,6 +524,8 @@ CONFIG_NLS_MAC_TURKISH=m
 CONFIG_DLM=m
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_MAGIC_SYSRQ=y
+CONFIG_WW_MUTEX_SELFTEST=m
+CONFIG_ATOMIC64_SELFTEST=m
 CONFIG_ASYNC_RAID6_TEST=m
 CONFIG_TEST_HEXDUMP=m
 CONFIG_TEST_STRING_HELPERS=m
@@ -547,6 +556,7 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
 CONFIG_CRYPTO_KEYWRAP=m
+CONFIG_CRYPTO_CMAC=m
 CONFIG_CRYPTO_XCBC=m
 CONFIG_CRYPTO_VMAC=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
@@ -558,6 +568,7 @@ CONFIG_CRYPTO_SHA512=m
 CONFIG_CRYPTO_SHA3=m
 CONFIG_CRYPTO_TGR192=m
 CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_AES_TI=m
 CONFIG_CRYPTO_ANUBIS=m
 CONFIG_CRYPTO_BLOWFISH=m
 CONFIG_CRYPTO_CAMELLIA=m
@@ -582,4 +593,5 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
 CONFIG_CRYPTO_USER_API_AEAD=m
 # CONFIG_CRYPTO_HW is not set
+CONFIG_CRC32_SELFTEST=m
 CONFIG_XZ_DEC_TEST=m

From e3b1ebd673876f2cdb0b84205e52a33b94a9860f Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Mon, 6 Mar 2017 11:04:22 +0100
Subject: [PATCH 1506/1911] m68k: Wire up statx

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/include/asm/unistd.h      | 2 +-
 arch/m68k/include/uapi/asm/unistd.h | 1 +
 arch/m68k/kernel/syscalltable.S     | 1 +
 3 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/m68k/include/asm/unistd.h b/arch/m68k/include/asm/unistd.h
index a857d82ec5094a..aab1edd0d4bade 100644
--- a/arch/m68k/include/asm/unistd.h
+++ b/arch/m68k/include/asm/unistd.h
@@ -4,7 +4,7 @@
 #include <uapi/asm/unistd.h>
 
 
-#define NR_syscalls		379
+#define NR_syscalls		380
 
 #define __ARCH_WANT_OLD_READDIR
 #define __ARCH_WANT_OLD_STAT
diff --git a/arch/m68k/include/uapi/asm/unistd.h b/arch/m68k/include/uapi/asm/unistd.h
index 9fe674bf911fd2..25589f5b866963 100644
--- a/arch/m68k/include/uapi/asm/unistd.h
+++ b/arch/m68k/include/uapi/asm/unistd.h
@@ -384,5 +384,6 @@
 #define __NR_copy_file_range	376
 #define __NR_preadv2		377
 #define __NR_pwritev2		378
+#define __NR_statx		379
 
 #endif /* _UAPI_ASM_M68K_UNISTD_H_ */
diff --git a/arch/m68k/kernel/syscalltable.S b/arch/m68k/kernel/syscalltable.S
index d6fd6d9ced2474..8c9fcfafe0dd90 100644
--- a/arch/m68k/kernel/syscalltable.S
+++ b/arch/m68k/kernel/syscalltable.S
@@ -399,3 +399,4 @@ ENTRY(sys_call_table)
 	.long sys_copy_file_range
 	.long sys_preadv2
 	.long sys_pwritev2
+	.long sys_statx

From 04d5466a976b096364a39a63ac264c1b3a5f8fa1 Mon Sep 17 00:00:00 2001
From: Jaroslav Kysela <perex@perex.cz>
Date: Thu, 9 Mar 2017 13:29:13 +0100
Subject: [PATCH 1507/1911] ALSA: hda - add support for docking station for HP
 820 G2

This tested patch adds missing initialization for Line-In/Out PINs for
the docking station for HP 820 G2.

Signed-off-by: Jaroslav Kysela <perex@perex.cz>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 4e112221d82546..8d6b3703d0a291 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -4847,6 +4847,7 @@ enum {
 	ALC286_FIXUP_HP_GPIO_LED,
 	ALC280_FIXUP_HP_GPIO2_MIC_HOTKEY,
 	ALC280_FIXUP_HP_DOCK_PINS,
+	ALC269_FIXUP_HP_DOCK_GPIO_MIC1_LED,
 	ALC280_FIXUP_HP_9480M,
 	ALC288_FIXUP_DELL_HEADSET_MODE,
 	ALC288_FIXUP_DELL1_MIC_NO_PRESENCE,
@@ -5388,6 +5389,16 @@ static const struct hda_fixup alc269_fixups[] = {
 		.chained = true,
 		.chain_id = ALC280_FIXUP_HP_GPIO4
 	},
+	[ALC269_FIXUP_HP_DOCK_GPIO_MIC1_LED] = {
+		.type = HDA_FIXUP_PINS,
+		.v.pins = (const struct hda_pintbl[]) {
+			{ 0x1b, 0x21011020 }, /* line-out */
+			{ 0x18, 0x2181103f }, /* line-in */
+			{ },
+		},
+		.chained = true,
+		.chain_id = ALC269_FIXUP_HP_GPIO_MIC1_LED
+	},
 	[ALC280_FIXUP_HP_9480M] = {
 		.type = HDA_FIXUP_FUNC,
 		.v.func = alc280_fixup_hp_9480m,
@@ -5647,7 +5658,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x103c, 0x2256, "HP", ALC269_FIXUP_HP_GPIO_MIC1_LED),
 	SND_PCI_QUIRK(0x103c, 0x2257, "HP", ALC269_FIXUP_HP_GPIO_MIC1_LED),
 	SND_PCI_QUIRK(0x103c, 0x2259, "HP", ALC269_FIXUP_HP_GPIO_MIC1_LED),
-	SND_PCI_QUIRK(0x103c, 0x225a, "HP", ALC269_FIXUP_HP_GPIO_MIC1_LED),
+	SND_PCI_QUIRK(0x103c, 0x225a, "HP", ALC269_FIXUP_HP_DOCK_GPIO_MIC1_LED),
 	SND_PCI_QUIRK(0x103c, 0x2260, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
 	SND_PCI_QUIRK(0x103c, 0x2263, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
 	SND_PCI_QUIRK(0x103c, 0x2264, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
@@ -5816,6 +5827,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = {
 	{.id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC, .name = "headset-mode-no-hp-mic"},
 	{.id = ALC269_FIXUP_LENOVO_DOCK, .name = "lenovo-dock"},
 	{.id = ALC269_FIXUP_HP_GPIO_LED, .name = "hp-gpio-led"},
+	{.id = ALC269_FIXUP_HP_DOCK_GPIO_MIC1_LED, .name = "hp-dock-gpio-mic1-led"},
 	{.id = ALC269_FIXUP_DELL1_MIC_NO_PRESENCE, .name = "dell-headset-multi"},
 	{.id = ALC269_FIXUP_DELL2_MIC_NO_PRESENCE, .name = "dell-headset-dock"},
 	{.id = ALC283_FIXUP_CHROME_BOOK, .name = "alc283-dac-wcaps"},

From cc3a47a248d7791ef0d2c81a35c46769e55e4c6c Mon Sep 17 00:00:00 2001
From: Jaroslav Kysela <perex@perex.cz>
Date: Thu, 9 Mar 2017 13:30:09 +0100
Subject: [PATCH 1508/1911] ALSA: hda - add support for docking station for HP
 840 G3

This tested patch adds missing initialization for Line-In/Out PINs for
the docking station for HP 840 G3.

Signed-off-by: Jaroslav Kysela <perex@perex.cz>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_conexant.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c
index c15c51bea26d0a..69266b8ea2ad7b 100644
--- a/sound/pci/hda/patch_conexant.c
+++ b/sound/pci/hda/patch_conexant.c
@@ -261,6 +261,7 @@ enum {
 	CXT_FIXUP_HP_530,
 	CXT_FIXUP_CAP_MIX_AMP_5047,
 	CXT_FIXUP_MUTE_LED_EAPD,
+	CXT_FIXUP_HP_DOCK,
 	CXT_FIXUP_HP_SPECTRE,
 	CXT_FIXUP_HP_GATE_MIC,
 };
@@ -778,6 +779,14 @@ static const struct hda_fixup cxt_fixups[] = {
 		.type = HDA_FIXUP_FUNC,
 		.v.func = cxt_fixup_mute_led_eapd,
 	},
+	[CXT_FIXUP_HP_DOCK] = {
+		.type = HDA_FIXUP_PINS,
+		.v.pins = (const struct hda_pintbl[]) {
+			{ 0x16, 0x21011020 }, /* line-out */
+			{ 0x18, 0x2181103f }, /* line-in */
+			{ }
+		}
+	},
 	[CXT_FIXUP_HP_SPECTRE] = {
 		.type = HDA_FIXUP_PINS,
 		.v.pins = (const struct hda_pintbl[]) {
@@ -839,6 +848,7 @@ static const struct snd_pci_quirk cxt5066_fixups[] = {
 	SND_PCI_QUIRK(0x1025, 0x0543, "Acer Aspire One 522", CXT_FIXUP_STEREO_DMIC),
 	SND_PCI_QUIRK(0x1025, 0x054c, "Acer Aspire 3830TG", CXT_FIXUP_ASPIRE_DMIC),
 	SND_PCI_QUIRK(0x1025, 0x054f, "Acer Aspire 4830T", CXT_FIXUP_ASPIRE_DMIC),
+	SND_PCI_QUIRK(0x103c, 0x8079, "HP EliteBook 840 G3", CXT_FIXUP_HP_DOCK),
 	SND_PCI_QUIRK(0x103c, 0x8174, "HP Spectre x360", CXT_FIXUP_HP_SPECTRE),
 	SND_PCI_QUIRK(0x103c, 0x8115, "HP Z1 Gen3", CXT_FIXUP_HP_GATE_MIC),
 	SND_PCI_QUIRK(0x1043, 0x138d, "Asus", CXT_FIXUP_HEADPHONE_MIC_PIN),
@@ -871,6 +881,7 @@ static const struct hda_model_fixup cxt5066_fixup_models[] = {
 	{ .id = CXT_PINCFG_LEMOTE_A1205, .name = "lemote-a1205" },
 	{ .id = CXT_FIXUP_OLPC_XO, .name = "olpc-xo" },
 	{ .id = CXT_FIXUP_MUTE_LED_EAPD, .name = "mute-led-eapd" },
+	{ .id = CXT_FIXUP_HP_DOCK, .name = "hp-dock" },
 	{}
 };
 

From 83749abaafed348cdac6a63b5f76aa9b1b42409b Mon Sep 17 00:00:00 2001
From: Bard Liao <bardliao@realtek.com>
Date: Mon, 20 Mar 2017 10:20:53 +0800
Subject: [PATCH 1509/1911] ASoC: rt5665: fix wrong shift
 rt5665_if2_1_adc_in_enum

The shift is RT5665_IF2_1_ADC_IN_SFT not RT5665_IF3_ADC_IN_SFT.

Signed-off-by: Bard Liao <bardliao@realtek.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/rt5665.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/soc/codecs/rt5665.c b/sound/soc/codecs/rt5665.c
index 61137160c11614..476135ec57268c 100644
--- a/sound/soc/codecs/rt5665.c
+++ b/sound/soc/codecs/rt5665.c
@@ -2252,7 +2252,7 @@ static const char * const rt5665_if2_1_adc_in_src[] = {
 
 static const SOC_ENUM_SINGLE_DECL(
 	rt5665_if2_1_adc_in_enum, RT5665_DIG_INF2_DATA,
-	RT5665_IF3_ADC_IN_SFT, rt5665_if2_1_adc_in_src);
+	RT5665_IF2_1_ADC_IN_SFT, rt5665_if2_1_adc_in_src);
 
 static const struct snd_kcontrol_new rt5665_if2_1_adc_in_mux =
 	SOC_DAPM_ENUM("IF2_1 ADC IN Source", rt5665_if2_1_adc_in_enum);

From fdfe4a393e9cd8c92f4489ca207d410f44d05043 Mon Sep 17 00:00:00 2001
From: Stafford Horne <shorne@gmail.com>
Date: Mon, 13 Mar 2017 23:45:21 +0900
Subject: [PATCH 1510/1911] generic syscalls: Wire up statx syscall

The new syscall statx is implemented as generic code, so enable it
for architectures like openrisc which use the generic syscall table.

Fixes: a528d35e8bfcc ("statx: Add a system call to make enhanced file info available")
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: David Howells <dhowells@redhat.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Signed-off-by: Stafford Horne <shorne@gmail.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 include/uapi/asm-generic/unistd.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 9b1462e38b821a..a076cf1a3a23be 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -730,9 +730,11 @@ __SYSCALL(__NR_pkey_mprotect, sys_pkey_mprotect)
 __SYSCALL(__NR_pkey_alloc,    sys_pkey_alloc)
 #define __NR_pkey_free 290
 __SYSCALL(__NR_pkey_free,     sys_pkey_free)
+#define __NR_statx 291
+__SYSCALL(__NR_statx,     sys_statx)
 
 #undef __NR_syscalls
-#define __NR_syscalls 291
+#define __NR_syscalls 292
 
 /*
  * All syscalls below here should go away really,

From 9702c67c6066f583b629cf037d2056245bb7a8e6 Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Thu, 16 Mar 2017 23:07:28 +0800
Subject: [PATCH 1511/1911] scsi: libsas: fix ata xfer length

The total ata xfer length may not be calculated properly, in that we do
not use the proper method to get an sg element dma length.

According to the code comment, sg_dma_len() should be used after
dma_map_sg() is called.

This issue was found by turning on the SMMUv3 in front of the hisi_sas
controller in hip07. Multiple sg elements were being combined into a
single element, but the original first element length was being use as
the total xfer length.

Cc: <stable@vger.kernel.org>
Fixes: ff2aeb1eb64c8a4770a6 ("libata: convert to chained sg")
Signed-off-by: John Garry <john.garry@huawei.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/libsas/sas_ata.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c
index 763f012fdeca00..87f5e694dbedd8 100644
--- a/drivers/scsi/libsas/sas_ata.c
+++ b/drivers/scsi/libsas/sas_ata.c
@@ -221,7 +221,7 @@ static unsigned int sas_ata_qc_issue(struct ata_queued_cmd *qc)
 		task->num_scatter = qc->n_elem;
 	} else {
 		for_each_sg(qc->sg, sg, qc->n_elem, si)
-			xfer += sg->length;
+			xfer += sg_dma_len(sg);
 
 		task->total_xfer_len = xfer;
 		task->num_scatter = si;

From 720037f939fa50fc3531035ae61b4cf4b0ff35e5 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Mon, 6 Mar 2017 11:59:56 -0800
Subject: [PATCH 1512/1911] f2fs: don't overwrite node block by SSR

This patch fixes that SSR can overwrite previous warm node block consisting of
a node chain since the last checkpoint.

Fixes: 5b6c6be2d878 ("f2fs: use SSR for warm node as well")
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/segment.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 4bd7a8b19332d1..29ef7088c5582a 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1163,6 +1163,12 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
 		if (f2fs_discard_en(sbi) &&
 			!f2fs_test_and_set_bit(offset, se->discard_map))
 			sbi->discard_blks--;
+
+		/* don't overwrite by SSR to keep node chain */
+		if (se->type == CURSEG_WARM_NODE) {
+			if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map))
+				se->ckpt_valid_blocks++;
+		}
 	} else {
 		if (!f2fs_test_and_clear_bit(offset, se->cur_valid_map)) {
 #ifdef CONFIG_F2FS_CHECK_FS

From 9f7e4a2c49fd166f17cf4125766a68dce8716764 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Fri, 10 Mar 2017 09:39:57 -0800
Subject: [PATCH 1513/1911] f2fs: declare static functions

This is to avoid build warning reported by kbuild test robot.

Signed-off-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/node.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 94967171dee87a..a0a060c2979b62 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1823,7 +1823,8 @@ static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid)
 		kmem_cache_free(free_nid_slab, i);
 }
 
-void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid, bool set)
+static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid,
+					bool set)
 {
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
 	unsigned int nat_ofs = NAT_BLOCK_OFFSET(nid);
@@ -2383,7 +2384,7 @@ static void __adjust_nat_entry_set(struct nat_entry_set *nes,
 	list_add_tail(&nes->set_list, head);
 }
 
-void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
+static void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
 						struct page *page)
 {
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
@@ -2638,7 +2639,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
 	return 0;
 }
 
-int init_free_nid_cache(struct f2fs_sb_info *sbi)
+static int init_free_nid_cache(struct f2fs_sb_info *sbi)
 {
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
 

From 23380b8568b85cd4b7a056891f4dbf131f7b871d Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Tue, 7 Mar 2017 14:11:06 -0800
Subject: [PATCH 1514/1911] f2fs: use __set{__clear}_bit_le

This patch uses __set{__clear}_bit_le for highter speed.

Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/dir.c  |  2 +-
 fs/f2fs/node.c | 18 +++++++++---------
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 4650c9b85de776..8d5c62b07b283f 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -750,7 +750,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
 	dentry_blk = page_address(page);
 	bit_pos = dentry - dentry_blk->dentry;
 	for (i = 0; i < slots; i++)
-		clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap);
+		__clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap);
 
 	/* Let's check and deallocate this dentry page */
 	bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index a0a060c2979b62..8c81ff614d1a37 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -339,7 +339,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
 	__set_nat_cache_dirty(nm_i, e);
 
 	if (enabled_nat_bits(sbi, NULL) && new_blkaddr == NEW_ADDR)
-		clear_bit_le(NAT_BLOCK_OFFSET(ni->nid), nm_i->empty_nat_bits);
+		__clear_bit_le(NAT_BLOCK_OFFSET(ni->nid), nm_i->empty_nat_bits);
 
 	/* update fsync_mark if its inode nat entry is still alive */
 	if (ni->nid != ni->ino)
@@ -1834,9 +1834,9 @@ static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid,
 		return;
 
 	if (set)
-		set_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
+		__set_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
 	else
-		clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
+		__clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
 }
 
 static void scan_nat_page(struct f2fs_sb_info *sbi,
@@ -1848,7 +1848,7 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
 	unsigned int nat_ofs = NAT_BLOCK_OFFSET(start_nid);
 	int i;
 
-	set_bit_le(nat_ofs, nm_i->nat_block_bitmap);
+	__set_bit_le(nat_ofs, nm_i->nat_block_bitmap);
 
 	i = start_nid % NAT_ENTRY_PER_BLOCK;
 
@@ -2403,16 +2403,16 @@ static void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
 			valid++;
 	}
 	if (valid == 0) {
-		set_bit_le(nat_index, nm_i->empty_nat_bits);
-		clear_bit_le(nat_index, nm_i->full_nat_bits);
+		__set_bit_le(nat_index, nm_i->empty_nat_bits);
+		__clear_bit_le(nat_index, nm_i->full_nat_bits);
 		return;
 	}
 
-	clear_bit_le(nat_index, nm_i->empty_nat_bits);
+	__clear_bit_le(nat_index, nm_i->empty_nat_bits);
 	if (valid == NAT_ENTRY_PER_BLOCK)
-		set_bit_le(nat_index, nm_i->full_nat_bits);
+		__set_bit_le(nat_index, nm_i->full_nat_bits);
 	else
-		clear_bit_le(nat_index, nm_i->full_nat_bits);
+		__clear_bit_le(nat_index, nm_i->full_nat_bits);
 }
 
 static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,

From 586d1492f301982e349797cfb05d9f343002ffa2 Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Wed, 1 Mar 2017 17:09:07 +0800
Subject: [PATCH 1515/1911] f2fs: skip scanning free nid bitmap of full NAT
 blocks

This patch adds to account free nids for each NAT blocks, and while
scanning all free nid bitmap, do check count and skip lookuping in
full NAT block.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/debug.c |  1 +
 fs/f2fs/f2fs.h  |  2 ++
 fs/f2fs/node.c  | 33 +++++++++++++++++++++++++++------
 3 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index a77df377e2e819..ee2d0a485fc347 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -196,6 +196,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
 	si->base_mem += (NM_I(sbi)->nat_bits_blocks << F2FS_BLKSIZE_BITS);
 	si->base_mem += NM_I(sbi)->nat_blocks * NAT_ENTRY_BITMAP_SIZE;
 	si->base_mem += NM_I(sbi)->nat_blocks / 8;
+	si->base_mem += NM_I(sbi)->nat_blocks * sizeof(unsigned short);
 
 get_cache:
 	si->cache_mem = 0;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index e849f83d611407..0a6e115562f62e 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -561,6 +561,8 @@ struct f2fs_nm_info {
 	struct mutex build_lock;	/* lock for build free nids */
 	unsigned char (*free_nid_bitmap)[NAT_ENTRY_BITMAP_SIZE];
 	unsigned char *nat_block_bitmap;
+	unsigned short *free_nid_count;	/* free nid count of NAT block */
+	spinlock_t free_nid_lock;	/* protect updating of nid count */
 
 	/* for checkpoint */
 	char *nat_bitmap;		/* NAT bitmap pointer */
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 8c81ff614d1a37..87a2b1f740cc26 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1824,7 +1824,7 @@ static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid)
 }
 
 static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid,
-					bool set)
+							bool set, bool build)
 {
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
 	unsigned int nat_ofs = NAT_BLOCK_OFFSET(nid);
@@ -1837,6 +1837,13 @@ static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid,
 		__set_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
 	else
 		__clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
+
+	spin_lock(&nm_i->free_nid_lock);
+	if (set)
+		nm_i->free_nid_count[nat_ofs]++;
+	else if (!build)
+		nm_i->free_nid_count[nat_ofs]--;
+	spin_unlock(&nm_i->free_nid_lock);
 }
 
 static void scan_nat_page(struct f2fs_sb_info *sbi,
@@ -1848,6 +1855,9 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
 	unsigned int nat_ofs = NAT_BLOCK_OFFSET(start_nid);
 	int i;
 
+	if (test_bit_le(nat_ofs, nm_i->nat_block_bitmap))
+		return;
+
 	__set_bit_le(nat_ofs, nm_i->nat_block_bitmap);
 
 	i = start_nid % NAT_ENTRY_PER_BLOCK;
@@ -1862,7 +1872,7 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
 		f2fs_bug_on(sbi, blk_addr == NEW_ADDR);
 		if (blk_addr == NULL_ADDR)
 			freed = add_free_nid(sbi, start_nid, true);
-		update_free_nid_bitmap(sbi, start_nid, freed);
+		update_free_nid_bitmap(sbi, start_nid, freed, true);
 	}
 }
 
@@ -1878,6 +1888,8 @@ static void scan_free_nid_bits(struct f2fs_sb_info *sbi)
 	for (i = 0; i < nm_i->nat_blocks; i++) {
 		if (!test_bit_le(i, nm_i->nat_block_bitmap))
 			continue;
+		if (!nm_i->free_nid_count[i])
+			continue;
 		for (idx = 0; idx < NAT_ENTRY_PER_BLOCK; idx++) {
 			nid_t nid;
 
@@ -2082,7 +2094,7 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
 		__insert_nid_to_list(sbi, i, ALLOC_NID_LIST, false);
 		nm_i->available_nids--;
 
-		update_free_nid_bitmap(sbi, *nid, false);
+		update_free_nid_bitmap(sbi, *nid, false, false);
 
 		spin_unlock(&nm_i->nid_list_lock);
 		return true;
@@ -2138,7 +2150,7 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
 
 	nm_i->available_nids++;
 
-	update_free_nid_bitmap(sbi, nid, true);
+	update_free_nid_bitmap(sbi, nid, true, false);
 
 	spin_unlock(&nm_i->nid_list_lock);
 
@@ -2468,11 +2480,11 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
 			add_free_nid(sbi, nid, false);
 			spin_lock(&NM_I(sbi)->nid_list_lock);
 			NM_I(sbi)->available_nids++;
-			update_free_nid_bitmap(sbi, nid, true);
+			update_free_nid_bitmap(sbi, nid, true, false);
 			spin_unlock(&NM_I(sbi)->nid_list_lock);
 		} else {
 			spin_lock(&NM_I(sbi)->nid_list_lock);
-			update_free_nid_bitmap(sbi, nid, false);
+			update_free_nid_bitmap(sbi, nid, false, false);
 			spin_unlock(&NM_I(sbi)->nid_list_lock);
 		}
 	}
@@ -2652,6 +2664,14 @@ static int init_free_nid_cache(struct f2fs_sb_info *sbi)
 								GFP_KERNEL);
 	if (!nm_i->nat_block_bitmap)
 		return -ENOMEM;
+
+	nm_i->free_nid_count = f2fs_kvzalloc(nm_i->nat_blocks *
+					sizeof(unsigned short), GFP_KERNEL);
+	if (!nm_i->free_nid_count)
+		return -ENOMEM;
+
+	spin_lock_init(&nm_i->free_nid_lock);
+
 	return 0;
 }
 
@@ -2731,6 +2751,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
 
 	kvfree(nm_i->nat_block_bitmap);
 	kvfree(nm_i->free_nid_bitmap);
+	kvfree(nm_i->free_nid_count);
 
 	kfree(nm_i->nat_bitmap);
 	kfree(nm_i->nat_bits);

From 7041d5d286fb54635f540c1bb3b43980ed65513a Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Wed, 8 Mar 2017 20:07:49 +0800
Subject: [PATCH 1516/1911] f2fs: combine nat_bits and free_nid_bitmap cache
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Both nat_bits cache and free_nid_bitmap cache provide same functionality
as a intermediate cache between free nid cache and disk, but with
different granularity of indicating free nid range, and different
persistence policy. nat_bits cache provides better persistence ability,
and free_nid_bitmap provides better granularity.

In this patch we combine advantage of both caches, so finally policy of
the intermediate cache would be:
- init: load free nid status from nat_bits into free_nid_bitmap
- lookup: scan free_nid_bitmap before load NAT blocks
- update: update free_nid_bitmap in real-time
- persistence: udpate and persist nat_bits in checkpoint

This patch also resolves performance regression reported by lkp-robot.

commit:
  4ac912427c4214d8031d9ad6fbc3bc75e71512df ("f2fs: introduce free nid bitmap")
  d00030cf9cd0bb96fdccc41e33d3c91dcbb672ba ("f2fs: use __set{__clear}_bit_le")
  1382c0f3f9d3f936c8bc42ed1591cf7a593ef9f7 ("f2fs: combine nat_bits and free_nid_bitmap cache")

4ac912427c4214d8 d00030cf9cd0bb96fdccc41e33 1382c0f3f9d3f936c8bc42ed15
---------------- -------------------------- --------------------------
         %stddev     %change         %stddev     %change         %stddev
             \          |                \          |                \
     77863 ±  0%      +2.1%      79485 ±  1%     +50.8%     117404 ±  0%  aim7.jobs-per-min
    231.63 ±  0%      -2.0%     227.01 ±  1%     -33.6%     153.80 ±  0%  aim7.time.elapsed_time
    231.63 ±  0%      -2.0%     227.01 ±  1%     -33.6%     153.80 ±  0%  aim7.time.elapsed_time.max
    896604 ±  0%      -0.8%     889221 ±  3%     -20.2%     715260 ±  1%  aim7.time.involuntary_context_switches
      2394 ±  1%      +4.6%       2503 ±  1%      +3.7%       2481 ±  2%  aim7.time.maximum_resident_set_size
      6240 ±  0%      -1.5%       6145 ±  1%     -14.1%       5360 ±  1%  aim7.time.system_time
   1111357 ±  3%      +1.9%    1132509 ±  2%      -6.2%    1041932 ±  2%  aim7.time.voluntary_context_switches
...

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Tested-by: Xiaolong Ye <xiaolong.ye@intel.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/node.c | 125 +++++++++++++++++++------------------------------
 1 file changed, 47 insertions(+), 78 deletions(-)

diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 87a2b1f740cc26..481aa8dc79f46f 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -338,9 +338,6 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
 		set_nat_flag(e, IS_CHECKPOINTED, false);
 	__set_nat_cache_dirty(nm_i, e);
 
-	if (enabled_nat_bits(sbi, NULL) && new_blkaddr == NEW_ADDR)
-		__clear_bit_le(NAT_BLOCK_OFFSET(ni->nid), nm_i->empty_nat_bits);
-
 	/* update fsync_mark if its inode nat entry is still alive */
 	if (ni->nid != ni->ino)
 		e = __lookup_nat_cache(nm_i, ni->ino);
@@ -1824,7 +1821,7 @@ static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid)
 }
 
 static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid,
-							bool set, bool build)
+			bool set, bool build, bool locked)
 {
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
 	unsigned int nat_ofs = NAT_BLOCK_OFFSET(nid);
@@ -1838,12 +1835,14 @@ static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid,
 	else
 		__clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
 
-	spin_lock(&nm_i->free_nid_lock);
+	if (!locked)
+		spin_lock(&nm_i->free_nid_lock);
 	if (set)
 		nm_i->free_nid_count[nat_ofs]++;
 	else if (!build)
 		nm_i->free_nid_count[nat_ofs]--;
-	spin_unlock(&nm_i->free_nid_lock);
+	if (!locked)
+		spin_unlock(&nm_i->free_nid_lock);
 }
 
 static void scan_nat_page(struct f2fs_sb_info *sbi,
@@ -1872,7 +1871,7 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
 		f2fs_bug_on(sbi, blk_addr == NEW_ADDR);
 		if (blk_addr == NULL_ADDR)
 			freed = add_free_nid(sbi, start_nid, true);
-		update_free_nid_bitmap(sbi, start_nid, freed, true);
+		update_free_nid_bitmap(sbi, start_nid, freed, true, false);
 	}
 }
 
@@ -1920,58 +1919,6 @@ static void scan_free_nid_bits(struct f2fs_sb_info *sbi)
 	up_read(&nm_i->nat_tree_lock);
 }
 
-static int scan_nat_bits(struct f2fs_sb_info *sbi)
-{
-	struct f2fs_nm_info *nm_i = NM_I(sbi);
-	struct page *page;
-	unsigned int i = 0;
-	nid_t nid;
-
-	if (!enabled_nat_bits(sbi, NULL))
-		return -EAGAIN;
-
-	down_read(&nm_i->nat_tree_lock);
-check_empty:
-	i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i);
-	if (i >= nm_i->nat_blocks) {
-		i = 0;
-		goto check_partial;
-	}
-
-	for (nid = i * NAT_ENTRY_PER_BLOCK; nid < (i + 1) * NAT_ENTRY_PER_BLOCK;
-									nid++) {
-		if (unlikely(nid >= nm_i->max_nid))
-			break;
-		add_free_nid(sbi, nid, true);
-	}
-
-	if (nm_i->nid_cnt[FREE_NID_LIST] >= MAX_FREE_NIDS)
-		goto out;
-	i++;
-	goto check_empty;
-
-check_partial:
-	i = find_next_zero_bit_le(nm_i->full_nat_bits, nm_i->nat_blocks, i);
-	if (i >= nm_i->nat_blocks) {
-		disable_nat_bits(sbi, true);
-		up_read(&nm_i->nat_tree_lock);
-		return -EINVAL;
-	}
-
-	nid = i * NAT_ENTRY_PER_BLOCK;
-	page = get_current_nat_page(sbi, nid);
-	scan_nat_page(sbi, page, nid);
-	f2fs_put_page(page, 1);
-
-	if (nm_i->nid_cnt[FREE_NID_LIST] < MAX_FREE_NIDS) {
-		i++;
-		goto check_partial;
-	}
-out:
-	up_read(&nm_i->nat_tree_lock);
-	return 0;
-}
-
 static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
 {
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
@@ -1993,21 +1940,6 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
 
 		if (nm_i->nid_cnt[FREE_NID_LIST])
 			return;
-
-		/* try to find free nids with nat_bits */
-		if (!scan_nat_bits(sbi) && nm_i->nid_cnt[FREE_NID_LIST])
-			return;
-	}
-
-	/* find next valid candidate */
-	if (enabled_nat_bits(sbi, NULL)) {
-		int idx = find_next_zero_bit_le(nm_i->full_nat_bits,
-					nm_i->nat_blocks, 0);
-
-		if (idx >= nm_i->nat_blocks)
-			set_sbi_flag(sbi, SBI_NEED_FSCK);
-		else
-			nid = idx * NAT_ENTRY_PER_BLOCK;
 	}
 
 	/* readahead nat pages to be scanned */
@@ -2094,7 +2026,7 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
 		__insert_nid_to_list(sbi, i, ALLOC_NID_LIST, false);
 		nm_i->available_nids--;
 
-		update_free_nid_bitmap(sbi, *nid, false, false);
+		update_free_nid_bitmap(sbi, *nid, false, false, false);
 
 		spin_unlock(&nm_i->nid_list_lock);
 		return true;
@@ -2150,7 +2082,7 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
 
 	nm_i->available_nids++;
 
-	update_free_nid_bitmap(sbi, nid, true, false);
+	update_free_nid_bitmap(sbi, nid, true, false, false);
 
 	spin_unlock(&nm_i->nid_list_lock);
 
@@ -2480,11 +2412,11 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
 			add_free_nid(sbi, nid, false);
 			spin_lock(&NM_I(sbi)->nid_list_lock);
 			NM_I(sbi)->available_nids++;
-			update_free_nid_bitmap(sbi, nid, true, false);
+			update_free_nid_bitmap(sbi, nid, true, false, false);
 			spin_unlock(&NM_I(sbi)->nid_list_lock);
 		} else {
 			spin_lock(&NM_I(sbi)->nid_list_lock);
-			update_free_nid_bitmap(sbi, nid, false, false);
+			update_free_nid_bitmap(sbi, nid, false, false, false);
 			spin_unlock(&NM_I(sbi)->nid_list_lock);
 		}
 	}
@@ -2590,6 +2522,40 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
 	return 0;
 }
 
+inline void load_free_nid_bitmap(struct f2fs_sb_info *sbi)
+{
+	struct f2fs_nm_info *nm_i = NM_I(sbi);
+	unsigned int i = 0;
+	nid_t nid, last_nid;
+
+	if (!enabled_nat_bits(sbi, NULL))
+		return;
+
+	for (i = 0; i < nm_i->nat_blocks; i++) {
+		i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i);
+		if (i >= nm_i->nat_blocks)
+			break;
+
+		__set_bit_le(i, nm_i->nat_block_bitmap);
+
+		nid = i * NAT_ENTRY_PER_BLOCK;
+		last_nid = (i + 1) * NAT_ENTRY_PER_BLOCK;
+
+		spin_lock(&nm_i->free_nid_lock);
+		for (; nid < last_nid; nid++)
+			update_free_nid_bitmap(sbi, nid, true, true, true);
+		spin_unlock(&nm_i->free_nid_lock);
+	}
+
+	for (i = 0; i < nm_i->nat_blocks; i++) {
+		i = find_next_bit_le(nm_i->full_nat_bits, nm_i->nat_blocks, i);
+		if (i >= nm_i->nat_blocks)
+			break;
+
+		__set_bit_le(i, nm_i->nat_block_bitmap);
+	}
+}
+
 static int init_node_manager(struct f2fs_sb_info *sbi)
 {
 	struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi);
@@ -2691,6 +2657,9 @@ int build_node_manager(struct f2fs_sb_info *sbi)
 	if (err)
 		return err;
 
+	/* load free nid status from nat_bits table */
+	load_free_nid_bitmap(sbi);
+
 	build_free_nids(sbi, true, true);
 	return 0;
 }

From 6be3b6cce1e225f189b68b4e84fc711d19b4277b Mon Sep 17 00:00:00 2001
From: Ryan Hsu <ryanhsu@qca.qualcomm.com>
Date: Mon, 13 Mar 2017 15:49:03 -0700
Subject: [PATCH 1517/1911] ath10k: fix incorrect wlan_mac_base in qca6174_regs

In the 'commit ebee76f7fa46 ("ath10k: allow setting coverage class")',
it inherits the design and the address offset from ath9k, but the address
is not applicable to QCA6174, which leads to a random crash while doing the
resume() operation, since the set_coverage_class.ops will be called from
ieee80211_reconfig() when resume() (if the wow is not configured).

Fix the incorrect address offset here to avoid the random crash.

Verified on QCA6174/hw3.0 with firmware WLAN.RM.4.4-00022-QCARMSWPZ-2.

kvalo: this also seems to fix a regression with firmware restart.

Fixes: ebee76f7fa46 ("ath10k: allow setting coverage class")
Cc: <stable@vger.kernel.org> # v4.10
Signed-off-by: Ryan Hsu <ryanhsu@qca.qualcomm.com>
Signed-off-by: Kalle Valo <kvalo@qca.qualcomm.com>
---
 drivers/net/wireless/ath/ath10k/hw.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath10k/hw.c b/drivers/net/wireless/ath/ath10k/hw.c
index 33fb26833cd0a6..d9f37ee4bfdd3e 100644
--- a/drivers/net/wireless/ath/ath10k/hw.c
+++ b/drivers/net/wireless/ath/ath10k/hw.c
@@ -51,7 +51,7 @@ const struct ath10k_hw_regs qca6174_regs = {
 	.rtc_soc_base_address			= 0x00000800,
 	.rtc_wmac_base_address			= 0x00001000,
 	.soc_core_base_address			= 0x0003a000,
-	.wlan_mac_base_address			= 0x00020000,
+	.wlan_mac_base_address			= 0x00010000,
 	.ce_wrapper_base_address		= 0x00034000,
 	.ce0_base_address			= 0x00034400,
 	.ce1_base_address			= 0x00034800,

From 6c6c5e0311c83ffe75e14260fb83e05e21e1d488 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= <rkrcmar@redhat.com>
Date: Fri, 13 Jan 2017 18:59:04 +0100
Subject: [PATCH 1518/1911] KVM: VMX: downgrade warning on unexpected exit code
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We never needed the call trace and we better rate-limit if it can be
triggered by a guest.

Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/vmx.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 98e82ee1e69966..e7ec88961b1a45 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -8501,7 +8501,8 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
 	    && kvm_vmx_exit_handlers[exit_reason])
 		return kvm_vmx_exit_handlers[exit_reason](vcpu);
 	else {
-		WARN_ONCE(1, "vmx: unexpected exit reason 0x%x\n", exit_reason);
+		vcpu_unimpl(vcpu, "vmx: unexpected exit reason 0x%x\n",
+				exit_reason);
 		kvm_queue_exception(vcpu, UD_VECTOR);
 		return 1;
 	}

From 3863dff0c3dd72984395c93b12383b393c5c3989 Mon Sep 17 00:00:00 2001
From: Dmitry Vyukov <dvyukov@google.com>
Date: Tue, 24 Jan 2017 14:06:48 +0100
Subject: [PATCH 1519/1911] kvm: fix usage of uninit spinlock in
 avic_vm_destroy()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If avic is not enabled, avic_vm_init() does nothing and returns early.
However, avic_vm_destroy() still tries to destroy what hasn't been created.
The only bad consequence of this now is that avic_vm_destroy() uses
svm_vm_data_hash_lock that hasn't been initialized (and is not meant
to be used at all if avic is not enabled).

Return early from avic_vm_destroy() if avic is not enabled.
It has nothing to destroy.

Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: "Radim Krčmář" <rkrcmar@redhat.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: kvm@vger.kernel.org
Cc: syzkaller@googlegroups.com
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/svm.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index d1efe2c62b3f8d..5fba70646c3279 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1379,6 +1379,9 @@ static void avic_vm_destroy(struct kvm *kvm)
 	unsigned long flags;
 	struct kvm_arch *vm_data = &kvm->arch;
 
+	if (!avic)
+		return;
+
 	avic_free_vm_id(vm_data->avic_vm_id);
 
 	if (vm_data->avic_logical_id_table_page)

From 49e190ec332e96ba28f24b86f7a92f614707819b Mon Sep 17 00:00:00 2001
From: kbuild test robot <fengguang.wu@intel.com>
Date: Sat, 28 Jan 2017 05:01:51 +0800
Subject: [PATCH 1520/1911] PTP: fix ptr_ret.cocci warnings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

drivers/ptp/ptp_kvm.c:229:1-3: WARNING: PTR_ERR_OR_ZERO can be used

 Use PTR_ERR_OR_ZERO rather than if(IS_ERR(...)) + PTR_ERR

Generated by: scripts/coccinelle/api/ptr_ret.cocci

CC: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 drivers/ptp/ptp_kvm.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/ptp/ptp_kvm.c b/drivers/ptp/ptp_kvm.c
index 09b4df74291e26..bb865695d7a62d 100644
--- a/drivers/ptp/ptp_kvm.c
+++ b/drivers/ptp/ptp_kvm.c
@@ -193,10 +193,7 @@ static int __init ptp_kvm_init(void)
 
 	kvm_ptp_clock.ptp_clock = ptp_clock_register(&kvm_ptp_clock.caps, NULL);
 
-	if (IS_ERR(kvm_ptp_clock.ptp_clock))
-		return PTR_ERR(kvm_ptp_clock.ptp_clock);
-
-	return 0;
+	return PTR_ERR_OR_ZERO(kvm_ptp_clock.ptp_clock);
 }
 
 module_init(ptp_kvm_init);

From 6d1b3ad2cd87150fc89bad2331beab173a8ad24d Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpeng.li@hotmail.com>
Date: Sun, 12 Mar 2017 00:53:52 -0800
Subject: [PATCH 1521/1911] KVM: nVMX: don't reset kvm mmu twice
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

kvm mmu is reset once successfully loading CR3 as part of emulating vmentry
in nested_vmx_load_cr3(). We should not reset kvm mmu twice.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/vmx.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index e7ec88961b1a45..c66436530a93af 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -10287,8 +10287,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 				entry_failure_code))
 		return 1;
 
-	kvm_mmu_reset_context(vcpu);
-
 	if (!enable_ept)
 		vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested;
 

From 3aa53859d23e1b9cf1a60f82a9008e35ef10bd6a Mon Sep 17 00:00:00 2001
From: Luiz Capitulino <lcapitulino@redhat.com>
Date: Mon, 13 Mar 2017 09:08:20 -0400
Subject: [PATCH 1522/1911] KVM: Documentation: document MCE ioctls
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Luiz Capitulino <lcapitulino@redhat.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 Documentation/virtual/kvm/api.txt | 63 +++++++++++++++++++++++++++++++
 1 file changed, 63 insertions(+)

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 3c248f772ae616..fd106899afd1b2 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -3377,6 +3377,69 @@ struct kvm_ppc_resize_hpt {
 	__u32 pad;
 };
 
+4.104 KVM_X86_GET_MCE_CAP_SUPPORTED
+
+Capability: KVM_CAP_MCE
+Architectures: x86
+Type: system ioctl
+Parameters: u64 mce_cap (out)
+Returns: 0 on success, -1 on error
+
+Returns supported MCE capabilities. The u64 mce_cap parameter
+has the same format as the MSR_IA32_MCG_CAP register. Supported
+capabilities will have the corresponding bits set.
+
+4.105 KVM_X86_SETUP_MCE
+
+Capability: KVM_CAP_MCE
+Architectures: x86
+Type: vcpu ioctl
+Parameters: u64 mcg_cap (in)
+Returns: 0 on success,
+         -EFAULT if u64 mcg_cap cannot be read,
+         -EINVAL if the requested number of banks is invalid,
+         -EINVAL if requested MCE capability is not supported.
+
+Initializes MCE support for use. The u64 mcg_cap parameter
+has the same format as the MSR_IA32_MCG_CAP register and
+specifies which capabilities should be enabled. The maximum
+supported number of error-reporting banks can be retrieved when
+checking for KVM_CAP_MCE. The supported capabilities can be
+retrieved with KVM_X86_GET_MCE_CAP_SUPPORTED.
+
+4.106 KVM_X86_SET_MCE
+
+Capability: KVM_CAP_MCE
+Architectures: x86
+Type: vcpu ioctl
+Parameters: struct kvm_x86_mce (in)
+Returns: 0 on success,
+         -EFAULT if struct kvm_x86_mce cannot be read,
+         -EINVAL if the bank number is invalid,
+         -EINVAL if VAL bit is not set in status field.
+
+Inject a machine check error (MCE) into the guest. The input
+parameter is:
+
+struct kvm_x86_mce {
+	__u64 status;
+	__u64 addr;
+	__u64 misc;
+	__u64 mcg_status;
+	__u8 bank;
+	__u8 pad1[7];
+	__u64 pad2[3];
+};
+
+If the MCE being reported is an uncorrected error, KVM will
+inject it as an MCE exception into the guest. If the guest
+MCG_STATUS register reports that an MCE is in progress, KVM
+causes an KVM_EXIT_SHUTDOWN vmexit.
+
+Otherwise, if the MCE is a corrected error, KVM will just
+store it in the corresponding bank (provided this bank is
+not holding a previously reported uncorrected error).
+
 5. The kvm_run structure
 ------------------------
 

From c3104aae5d8cc443556f8613466e16737326e215 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 13 Mar 2017 17:36:25 +0100
Subject: [PATCH 1523/1911] remoteproc: qcom: fix QCOM_SMD dependencies

qcom_smd_register_edge() is provided by either QCOM_SMD or RPMSG_QCOM_SMD,
and if both of them are disabled, it does nothing.

The check for the PIL drivers however only checks for QCOM_SMD, so it breaks
with QCOM_SMD=n && RPMSG_QCOM_SMD=m:

drivers/remoteproc/built-in.o: In function `smd_subdev_remove':
qcom_wcnss_iris.c:(.text+0x231c): undefined reference to `qcom_smd_unregister_edge'
drivers/remoteproc/built-in.o: In function `smd_subdev_probe':
qcom_wcnss_iris.c:(.text+0x2344): undefined reference to `qcom_smd_register_edge'
drivers/remoteproc/built-in.o: In function `smd_subdev_probe':
qcom_q6v5_pil.c:(.text+0x3538): undefined reference to `qcom_smd_register_edge'
qcom_q6v5_pil.c:(.text+0x3538): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `qcom_smd_register_edge'

This clarifies the Kconfig dependency.

Fixes: 4b48921a8f74 ("remoteproc: qcom: Use common SMD edge handler")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 drivers/remoteproc/Kconfig | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/remoteproc/Kconfig b/drivers/remoteproc/Kconfig
index 65f86bc24c07c7..1dc43fc5f65f38 100644
--- a/drivers/remoteproc/Kconfig
+++ b/drivers/remoteproc/Kconfig
@@ -76,7 +76,7 @@ config QCOM_ADSP_PIL
 	depends on OF && ARCH_QCOM
 	depends on REMOTEPROC
 	depends on QCOM_SMEM
-	depends on QCOM_SMD || (COMPILE_TEST && QCOM_SMD=n)
+	depends on RPMSG_QCOM_SMD || QCOM_SMD || (COMPILE_TEST && QCOM_SMD=n && RPMSG_QCOM_SMD=n)
 	select MFD_SYSCON
 	select QCOM_MDT_LOADER
 	select QCOM_RPROC_COMMON
@@ -93,7 +93,7 @@ config QCOM_Q6V5_PIL
 	depends on OF && ARCH_QCOM
 	depends on QCOM_SMEM
 	depends on REMOTEPROC
-	depends on QCOM_SMD || (COMPILE_TEST && QCOM_SMD=n)
+	depends on RPMSG_QCOM_SMD || QCOM_SMD || (COMPILE_TEST && QCOM_SMD=n && RPMSG_QCOM_SMD=n)
 	select MFD_SYSCON
 	select QCOM_RPROC_COMMON
 	select QCOM_SCM
@@ -104,7 +104,7 @@ config QCOM_Q6V5_PIL
 config QCOM_WCNSS_PIL
 	tristate "Qualcomm WCNSS Peripheral Image Loader"
 	depends on OF && ARCH_QCOM
-	depends on QCOM_SMD || (COMPILE_TEST && QCOM_SMD=n)
+	depends on RPMSG_QCOM_SMD || QCOM_SMD || (COMPILE_TEST && QCOM_SMD=n && RPMSG_QCOM_SMD=n)
 	depends on QCOM_SMEM
 	depends on REMOTEPROC
 	select QCOM_MDT_LOADER

From 4296f23ed49a15d36949458adcc66ff993dee2a8 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Sun, 19 Mar 2017 14:30:02 +0100
Subject: [PATCH 1524/1911] cpufreq: schedutil: Fix per-CPU structure
 initialization in sugov_start()

sugov_start() only initializes struct sugov_cpu per-CPU structures
for shared policies, but it should do that for single-CPU policies too.

That in particular makes the IO-wait boost mechanism work in the
cases when cpufreq policies correspond to individual CPUs.

Fixes: 21ca6d2c52f8 (cpufreq: schedutil: Add iowait boosting)
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Cc: 4.9+ <stable@vger.kernel.org> # 4.9+
---
 kernel/sched/cpufreq_schedutil.c | 20 +++++++-------------
 1 file changed, 7 insertions(+), 13 deletions(-)

diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index cd7cd489f73981..54c577578da689 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -584,20 +584,14 @@ static int sugov_start(struct cpufreq_policy *policy)
 	for_each_cpu(cpu, policy->cpus) {
 		struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu);
 
+		memset(sg_cpu, 0, sizeof(*sg_cpu));
 		sg_cpu->sg_policy = sg_policy;
-		if (policy_is_shared(policy)) {
-			sg_cpu->util = 0;
-			sg_cpu->max = 0;
-			sg_cpu->flags = SCHED_CPUFREQ_RT;
-			sg_cpu->last_update = 0;
-			sg_cpu->iowait_boost = 0;
-			sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq;
-			cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util,
-						     sugov_update_shared);
-		} else {
-			cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util,
-						     sugov_update_single);
-		}
+		sg_cpu->flags = SCHED_CPUFREQ_RT;
+		sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq;
+		cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util,
+					     policy_is_shared(policy) ?
+							sugov_update_shared :
+							sugov_update_single);
 	}
 	return 0;
 }

From 814a585038e36cd158bee4ef964e579136cf24c6 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Mon, 20 Mar 2017 18:46:15 -0700
Subject: [PATCH 1525/1911] ARCv2: make unimplemented vectors as no-ops rather
 than halt core

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/kernel/entry-arcv2.S | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/arch/arc/kernel/entry-arcv2.S b/arch/arc/kernel/entry-arcv2.S
index 2585632eaa6891..cc558a25b8fa69 100644
--- a/arch/arc/kernel/entry-arcv2.S
+++ b/arch/arc/kernel/entry-arcv2.S
@@ -100,15 +100,21 @@ END(handle_interrupt)
 ;################### Non TLB Exception Handling #############################
 
 ENTRY(EV_SWI)
-	flag 1
+	; TODO: implement this
+	EXCEPTION_PROLOGUE
+	b   ret_from_exception
 END(EV_SWI)
 
 ENTRY(EV_DivZero)
-	flag 1
+	; TODO: implement this
+	EXCEPTION_PROLOGUE
+	b   ret_from_exception
 END(EV_DivZero)
 
 ENTRY(EV_DCError)
-	flag 1
+	; TODO: implement this
+	EXCEPTION_PROLOGUE
+	b   ret_from_exception
 END(EV_DCError)
 
 ; ---------------------------------------------

From 4a53148868493bd5e0b18a9814aaa20bf74e3b26 Mon Sep 17 00:00:00 2001
From: Chuanxiao Dong <chuanxiao.dong@intel.com>
Date: Tue, 21 Mar 2017 09:32:19 +0800
Subject: [PATCH 1526/1911] drm/i915/gvt: fix wrong offset when loading RCS
 mocs

Fix the wrong offset of the RCS specific mocs

Fixes: 178657139307 ("drm/i915/gvt: vGPU context switch")

Signed-off-by: Chuanxiao Dong <chuanxiao.dong@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/render.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gvt/render.c b/drivers/gpu/drm/i915/gvt/render.c
index 95ee091ce085de..0beb83563b0870 100644
--- a/drivers/gpu/drm/i915/gvt/render.c
+++ b/drivers/gpu/drm/i915/gvt/render.c
@@ -207,7 +207,7 @@ static void load_mocs(struct intel_vgpu *vgpu, int ring_id)
 		l3_offset.reg = 0xb020;
 		for (i = 0; i < 32; i++) {
 			gen9_render_mocs_L3[i] = I915_READ(l3_offset);
-			I915_WRITE(l3_offset, vgpu_vreg(vgpu, offset));
+			I915_WRITE(l3_offset, vgpu_vreg(vgpu, l3_offset));
 			POSTING_READ(l3_offset);
 			l3_offset.reg += 4;
 		}

From 14f5ba26aa7060b4cad9c4c288b9a785bd0cd1a8 Mon Sep 17 00:00:00 2001
From: Xu Han <xu.han@intel.com>
Date: Tue, 21 Mar 2017 10:00:59 +0800
Subject: [PATCH 1527/1911] drm/i915/gvt: Fix guest fail to read EDID leading
 to black guest console issue.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It appears missing slaves on the i2c should cause 0xff to be returned
rather than 0. So, when the Windows driver tried to address a slave
at 0x40 and got 0’s back rather than 0xff’s it must have confused it.

Signed-off-by: Paul Durrant <Paul.Durrant@citrix.com>
Signed-off-by: Xu Han <xu.han@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/edid.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gvt/edid.c b/drivers/gpu/drm/i915/gvt/edid.c
index f1648fe5e5eaac..42cd09ec63fa7c 100644
--- a/drivers/gpu/drm/i915/gvt/edid.c
+++ b/drivers/gpu/drm/i915/gvt/edid.c
@@ -495,7 +495,8 @@ void intel_gvt_i2c_handle_aux_ch_write(struct intel_vgpu *vgpu,
 			unsigned char val = edid_get_byte(vgpu);
 
 			aux_data_for_write = (val << 16);
-		}
+		} else
+			aux_data_for_write = (0xff << 16);
 	}
 	/* write the return value in AUX_CH_DATA reg which includes:
 	 * ACK of I2C_WRITE

From 359b69310014511901bd61cc5f7680cb5de1faef Mon Sep 17 00:00:00 2001
From: Xiaoguang Chen <xiaoguang.chen@intel.com>
Date: Tue, 21 Mar 2017 10:54:21 +0800
Subject: [PATCH 1528/1911] drm/i915/gvt: set shadow entry to scratch page
 while p2m failed

Sometimes guest driver will only update partial of the GGTT entry then
access it. In this situation a failure will happen while translating
the gpa to hpa.
Now in this situation we let the corresponding shadow entry pointing
to a scratch page.

Signed-off-by: Zhi Wang <zhi.a.wang@intel.com>
Signed-off-by: Xiaoguang Chen <xiaoguang.chen@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/gtt.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
index da73127158241b..b832bea64e0367 100644
--- a/drivers/gpu/drm/i915/gvt/gtt.c
+++ b/drivers/gpu/drm/i915/gvt/gtt.c
@@ -1837,11 +1837,15 @@ static int emulate_gtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off,
 		ret = gtt_entry_p2m(vgpu, &e, &m);
 		if (ret) {
 			gvt_vgpu_err("fail to translate guest gtt entry\n");
-			return ret;
+			/* guest driver may read/write the entry when partial
+			 * update the entry in this situation p2m will fail
+			 * settting the shadow entry to point to a scratch page
+			 */
+			ops->set_pfn(&m, gvt->gtt.scratch_ggtt_mfn);
 		}
 	} else {
 		m = e;
-		m.val64 = 0;
+		ops->set_pfn(&m, gvt->gtt.scratch_ggtt_mfn);
 	}
 
 	ggtt_set_shadow_entry(ggtt_mm, &m, g_gtt_index);

From ac7ce78ba036c0f9952d9706b1941c7d80c78680 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Tue, 14 Feb 2017 10:46:20 +0300
Subject: [PATCH 1529/1911] drm/exynos/decon5433: & vs | typo

"&" was obviously intended instead of "|".  The original condition is
always true.

Fixes: b93c2e8b5d9d ("drm/exynos/decon5433: configure sysreg in case of hardware trigger")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos5433_drm_decon.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
index 0fd6f7a18364a6..cca32a4fdab39e 100644
--- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
+++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
@@ -678,7 +678,7 @@ static int exynos5433_decon_probe(struct platform_device *pdev)
 		ctx->out_type |= IFTYPE_I80;
 	}
 
-	if (ctx->out_type | I80_HW_TRG) {
+	if (ctx->out_type & I80_HW_TRG) {
 		ctx->sysreg = syscon_regmap_lookup_by_phandle(dev->of_node,
 							"samsung,disp-sysreg");
 		if (IS_ERR(ctx->sysreg)) {

From 6bdc92ee4980ca10171a8de338fad612f00bb48f Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk@kernel.org>
Date: Sat, 11 Mar 2017 20:04:16 +0200
Subject: [PATCH 1530/1911] drm/exynos: Remove support for Exynos4415 (SoC not
 supported anymore)

Support for Exynos4415 is going away because there are no internal nor
external users.

Since commit 46dcf0ff0de3 ("ARM: dts: exynos: Remove exynos4415.dtsi"),
the platform cannot be instantiated so remove also the drivers.

Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Acked-by: Kukjin Kim <kgene@kernel.org>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 .../bindings/display/exynos/exynos_dsim.txt    |  1 -
 .../bindings/display/exynos/samsung-fimd.txt   |  1 -
 drivers/gpu/drm/exynos/exynos_drm_dsi.c        | 15 +--------------
 drivers/gpu/drm/exynos/exynos_drm_fimd.c       | 18 ++----------------
 4 files changed, 3 insertions(+), 32 deletions(-)

diff --git a/Documentation/devicetree/bindings/display/exynos/exynos_dsim.txt b/Documentation/devicetree/bindings/display/exynos/exynos_dsim.txt
index a78265993665a6..ca5204b3bc218b 100644
--- a/Documentation/devicetree/bindings/display/exynos/exynos_dsim.txt
+++ b/Documentation/devicetree/bindings/display/exynos/exynos_dsim.txt
@@ -4,7 +4,6 @@ Required properties:
   - compatible: value should be one of the following
 		"samsung,exynos3250-mipi-dsi" /* for Exynos3250/3472 SoCs */
 		"samsung,exynos4210-mipi-dsi" /* for Exynos4 SoCs */
-		"samsung,exynos4415-mipi-dsi" /* for Exynos4415 SoC */
 		"samsung,exynos5410-mipi-dsi" /* for Exynos5410/5420/5440 SoCs */
 		"samsung,exynos5422-mipi-dsi" /* for Exynos5422/5800 SoCs */
 		"samsung,exynos5433-mipi-dsi" /* for Exynos5433 SoCs */
diff --git a/Documentation/devicetree/bindings/display/exynos/samsung-fimd.txt b/Documentation/devicetree/bindings/display/exynos/samsung-fimd.txt
index 18645e0228b054..5837402c3adeae 100644
--- a/Documentation/devicetree/bindings/display/exynos/samsung-fimd.txt
+++ b/Documentation/devicetree/bindings/display/exynos/samsung-fimd.txt
@@ -11,7 +11,6 @@ Required properties:
 		"samsung,s5pv210-fimd"; /* for S5PV210 SoC */
 		"samsung,exynos3250-fimd"; /* for Exynos3250/3472 SoCs */
 		"samsung,exynos4210-fimd"; /* for Exynos4 SoCs */
-		"samsung,exynos4415-fimd"; /* for Exynos4415 SoC */
 		"samsung,exynos5250-fimd"; /* for Exynos5250 SoCs */
 		"samsung,exynos5420-fimd"; /* for Exynos5420/5422/5800 SoCs */
 
diff --git a/drivers/gpu/drm/exynos/exynos_drm_dsi.c b/drivers/gpu/drm/exynos/exynos_drm_dsi.c
index 812e2ec0761d0b..ef6f9c6de09855 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_dsi.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_dsi.c
@@ -86,7 +86,7 @@
 #define DSIM_SYNC_INFORM		(1 << 27)
 #define DSIM_EOT_DISABLE		(1 << 28)
 #define DSIM_MFLUSH_VS			(1 << 29)
-/* This flag is valid only for exynos3250/3472/4415/5260/5430 */
+/* This flag is valid only for exynos3250/3472/5260/5430 */
 #define DSIM_CLKLANE_STOP		(1 << 30)
 
 /* DSIM_ESCMODE */
@@ -473,17 +473,6 @@ static const struct exynos_dsi_driver_data exynos4_dsi_driver_data = {
 	.reg_values = reg_values,
 };
 
-static const struct exynos_dsi_driver_data exynos4415_dsi_driver_data = {
-	.reg_ofs = exynos_reg_ofs,
-	.plltmr_reg = 0x58,
-	.has_clklane_stop = 1,
-	.num_clks = 2,
-	.max_freq = 1000,
-	.wait_for_reset = 1,
-	.num_bits_resol = 11,
-	.reg_values = reg_values,
-};
-
 static const struct exynos_dsi_driver_data exynos5_dsi_driver_data = {
 	.reg_ofs = exynos_reg_ofs,
 	.plltmr_reg = 0x58,
@@ -521,8 +510,6 @@ static const struct of_device_id exynos_dsi_of_match[] = {
 	  .data = &exynos3_dsi_driver_data },
 	{ .compatible = "samsung,exynos4210-mipi-dsi",
 	  .data = &exynos4_dsi_driver_data },
-	{ .compatible = "samsung,exynos4415-mipi-dsi",
-	  .data = &exynos4415_dsi_driver_data },
 	{ .compatible = "samsung,exynos5410-mipi-dsi",
 	  .data = &exynos5_dsi_driver_data },
 	{ .compatible = "samsung,exynos5422-mipi-dsi",
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
index a9fa444c6053c0..661b9fe049e217 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
@@ -71,10 +71,10 @@
 #define TRIGCON				0x1A4
 #define TRGMODE_ENABLE			(1 << 0)
 #define SWTRGCMD_ENABLE			(1 << 1)
-/* Exynos3250, 3472, 4415, 5260 5410, 5420 and 5422 only supported. */
+/* Exynos3250, 3472, 5260 5410, 5420 and 5422 only supported. */
 #define HWTRGEN_ENABLE			(1 << 3)
 #define HWTRGMASK_ENABLE		(1 << 4)
-/* Exynos3250, 3472, 4415, 5260, 5420 and 5422 only supported. */
+/* Exynos3250, 3472, 5260, 5420 and 5422 only supported. */
 #define HWTRIGEN_PER_ENABLE		(1 << 31)
 
 /* display mode change control register except exynos4 */
@@ -138,18 +138,6 @@ static struct fimd_driver_data exynos4_fimd_driver_data = {
 	.has_vtsel = 1,
 };
 
-static struct fimd_driver_data exynos4415_fimd_driver_data = {
-	.timing_base = 0x20000,
-	.lcdblk_offset = 0x210,
-	.lcdblk_vt_shift = 10,
-	.lcdblk_bypass_shift = 1,
-	.trg_type = I80_HW_TRG,
-	.has_shadowcon = 1,
-	.has_vidoutcon = 1,
-	.has_vtsel = 1,
-	.has_trigger_per_te = 1,
-};
-
 static struct fimd_driver_data exynos5_fimd_driver_data = {
 	.timing_base = 0x20000,
 	.lcdblk_offset = 0x214,
@@ -210,8 +198,6 @@ static const struct of_device_id fimd_driver_dt_match[] = {
 	  .data = &exynos3_fimd_driver_data },
 	{ .compatible = "samsung,exynos4210-fimd",
 	  .data = &exynos4_fimd_driver_data },
-	{ .compatible = "samsung,exynos4415-fimd",
-	  .data = &exynos4415_fimd_driver_data },
 	{ .compatible = "samsung,exynos5250-fimd",
 	  .data = &exynos5_fimd_driver_data },
 	{ .compatible = "samsung,exynos5420-fimd",

From a392276d1dec63e5aabe6f527c37de29a729559a Mon Sep 17 00:00:00 2001
From: Andrzej Hajda <a.hajda@samsung.com>
Date: Tue, 14 Mar 2017 09:27:56 +0100
Subject: [PATCH 1531/1911] drm/exynos: move crtc event handling to drivers
 callbacks

CRTC event is currently send with next vblank, or instantly in case crtc
is being disabled. This approach usually works, but in corner cases it can
result in premature event generation. Only device driver is able to verify
if the event can be sent. This patch is a first step in that direction - it
moves event handling to the drivers.

Signed-off-by: Andrzej Hajda <a.hajda@samsung.com>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos5433_drm_decon.c |  1 +
 drivers/gpu/drm/exynos/exynos7_drm_decon.c    |  1 +
 drivers/gpu/drm/exynos/exynos_drm_crtc.c      | 29 ++++++++++---------
 drivers/gpu/drm/exynos/exynos_drm_crtc.h      |  2 ++
 drivers/gpu/drm/exynos/exynos_drm_fimd.c      |  2 ++
 drivers/gpu/drm/exynos/exynos_drm_vidi.c      |  1 +
 drivers/gpu/drm/exynos/exynos_mixer.c         |  1 +
 7 files changed, 24 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
index cca32a4fdab39e..2130ccf1e0364e 100644
--- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
+++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
@@ -378,6 +378,7 @@ static void decon_atomic_flush(struct exynos_drm_crtc *crtc)
 
 	if (ctx->out_type & IFTYPE_I80)
 		set_bit(BIT_WIN_UPDATED, &ctx->flags);
+	exynos_crtc_handle_event(crtc);
 }
 
 static void decon_swreset(struct decon_context *ctx)
diff --git a/drivers/gpu/drm/exynos/exynos7_drm_decon.c b/drivers/gpu/drm/exynos/exynos7_drm_decon.c
index f9ab19e205e243..48811806fa2727 100644
--- a/drivers/gpu/drm/exynos/exynos7_drm_decon.c
+++ b/drivers/gpu/drm/exynos/exynos7_drm_decon.c
@@ -526,6 +526,7 @@ static void decon_atomic_flush(struct exynos_drm_crtc *crtc)
 
 	for (i = 0; i < WINDOWS_NR; i++)
 		decon_shadow_protect_win(ctx, i, false);
+	exynos_crtc_handle_event(crtc);
 }
 
 static void decon_init(struct decon_context *ctx)
diff --git a/drivers/gpu/drm/exynos/exynos_drm_crtc.c b/drivers/gpu/drm/exynos/exynos_drm_crtc.c
index 5367b6664fe37d..c65f4509932c56 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_crtc.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_crtc.c
@@ -85,16 +85,28 @@ static void exynos_crtc_atomic_flush(struct drm_crtc *crtc,
 				     struct drm_crtc_state *old_crtc_state)
 {
 	struct exynos_drm_crtc *exynos_crtc = to_exynos_crtc(crtc);
-	struct drm_pending_vblank_event *event;
-	unsigned long flags;
 
 	if (exynos_crtc->ops->atomic_flush)
 		exynos_crtc->ops->atomic_flush(exynos_crtc);
+}
+
+static const struct drm_crtc_helper_funcs exynos_crtc_helper_funcs = {
+	.enable		= exynos_drm_crtc_enable,
+	.disable	= exynos_drm_crtc_disable,
+	.mode_set_nofb	= exynos_drm_crtc_mode_set_nofb,
+	.atomic_check	= exynos_crtc_atomic_check,
+	.atomic_begin	= exynos_crtc_atomic_begin,
+	.atomic_flush	= exynos_crtc_atomic_flush,
+};
+
+void exynos_crtc_handle_event(struct exynos_drm_crtc *exynos_crtc)
+{
+	struct drm_crtc *crtc = &exynos_crtc->base;
+	struct drm_pending_vblank_event *event = crtc->state->event;
+	unsigned long flags;
 
-	event = crtc->state->event;
 	if (event) {
 		crtc->state->event = NULL;
-
 		spin_lock_irqsave(&crtc->dev->event_lock, flags);
 		if (drm_crtc_vblank_get(crtc) == 0)
 			drm_crtc_arm_vblank_event(crtc, event);
@@ -105,15 +117,6 @@ static void exynos_crtc_atomic_flush(struct drm_crtc *crtc,
 
 }
 
-static const struct drm_crtc_helper_funcs exynos_crtc_helper_funcs = {
-	.enable		= exynos_drm_crtc_enable,
-	.disable	= exynos_drm_crtc_disable,
-	.mode_set_nofb	= exynos_drm_crtc_mode_set_nofb,
-	.atomic_check	= exynos_crtc_atomic_check,
-	.atomic_begin	= exynos_crtc_atomic_begin,
-	.atomic_flush	= exynos_crtc_atomic_flush,
-};
-
 static void exynos_drm_crtc_destroy(struct drm_crtc *crtc)
 {
 	struct exynos_drm_crtc *exynos_crtc = to_exynos_crtc(crtc);
diff --git a/drivers/gpu/drm/exynos/exynos_drm_crtc.h b/drivers/gpu/drm/exynos/exynos_drm_crtc.h
index 6a581a8af4650f..abd5d6ceac0c2f 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_crtc.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_crtc.h
@@ -40,4 +40,6 @@ int exynos_drm_crtc_get_pipe_from_type(struct drm_device *drm_dev,
  */
 void exynos_drm_crtc_te_handler(struct drm_crtc *crtc);
 
+void exynos_crtc_handle_event(struct exynos_drm_crtc *exynos_crtc);
+
 #endif
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
index 661b9fe049e217..a3162a4566ce76 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
@@ -709,6 +709,8 @@ static void fimd_atomic_flush(struct exynos_drm_crtc *crtc)
 
 	for (i = 0; i < WINDOWS_NR; i++)
 		fimd_shadow_protect_win(ctx, i, false);
+
+	exynos_crtc_handle_event(crtc);
 }
 
 static void fimd_update_plane(struct exynos_drm_crtc *crtc,
diff --git a/drivers/gpu/drm/exynos/exynos_drm_vidi.c b/drivers/gpu/drm/exynos/exynos_drm_vidi.c
index 57fe514d5c5bf9..5d9a62a87eec75 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_vidi.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_vidi.c
@@ -170,6 +170,7 @@ static const struct exynos_drm_crtc_ops vidi_crtc_ops = {
 	.enable_vblank = vidi_enable_vblank,
 	.disable_vblank = vidi_disable_vblank,
 	.update_plane = vidi_update_plane,
+	.atomic_flush = exynos_crtc_handle_event,
 };
 
 static void vidi_fake_vblank_timer(unsigned long arg)
diff --git a/drivers/gpu/drm/exynos/exynos_mixer.c b/drivers/gpu/drm/exynos/exynos_mixer.c
index 72143ac1052526..25edb635a19762 100644
--- a/drivers/gpu/drm/exynos/exynos_mixer.c
+++ b/drivers/gpu/drm/exynos/exynos_mixer.c
@@ -1012,6 +1012,7 @@ static void mixer_atomic_flush(struct exynos_drm_crtc *crtc)
 		return;
 
 	mixer_vsync_set_update(mixer_ctx, true);
+	exynos_crtc_handle_event(crtc);
 }
 
 static void mixer_enable(struct exynos_drm_crtc *crtc)

From 73488331eb9460d14974a1e2c734f77ce8869183 Mon Sep 17 00:00:00 2001
From: Andrzej Hajda <a.hajda@samsung.com>
Date: Tue, 14 Mar 2017 09:27:57 +0100
Subject: [PATCH 1532/1911] drm/exynos/decon5433: fix vblank event handling

Current implementation of event handling assumes that vblank interrupt is
always called at the right time. It is not true, it can be delayed due to
various reasons. As a result different races can happen. The patch fixes
the issue by using hardware frame counter present in DECON to serialize
vblank and commit completion events.

Signed-off-by: Andrzej Hajda <a.hajda@samsung.com>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos5433_drm_decon.c | 78 ++++++++++++++++++-
 include/video/exynos5433_decon.h              |  8 ++
 2 files changed, 85 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
index 2130ccf1e0364e..cfe6f8af849f3e 100644
--- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
+++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
@@ -68,6 +68,8 @@ struct decon_context {
 	unsigned long			flags;
 	unsigned long			out_type;
 	int				first_win;
+	spinlock_t			vblank_lock;
+	u32				frame_id;
 };
 
 static const uint32_t decon_formats[] = {
@@ -122,6 +124,48 @@ static void decon_disable_vblank(struct exynos_drm_crtc *crtc)
 		writel(0, ctx->addr + DECON_VIDINTCON0);
 }
 
+/* return number of starts/ends of frame transmissions since reset */
+static u32 decon_get_frame_count(struct decon_context *ctx, bool end)
+{
+	u32 frm, pfrm, status, cnt = 2;
+
+	/* To get consistent result repeat read until frame id is stable.
+	 * Usually the loop will be executed once, in rare cases when the loop
+	 * is executed at frame change time 2nd pass will be needed.
+	 */
+	frm = readl(ctx->addr + DECON_CRFMID);
+	do {
+		status = readl(ctx->addr + DECON_VIDCON1);
+		pfrm = frm;
+		frm = readl(ctx->addr + DECON_CRFMID);
+	} while (frm != pfrm && --cnt);
+
+	/* CRFMID is incremented on BPORCH in case of I80 and on VSYNC in case
+	 * of RGB, it should be taken into account.
+	 */
+	if (!frm)
+		return 0;
+
+	switch (status & (VIDCON1_VSTATUS_MASK | VIDCON1_I80_ACTIVE)) {
+	case VIDCON1_VSTATUS_VS:
+		if (!(ctx->out_type & IFTYPE_I80))
+			--frm;
+		break;
+	case VIDCON1_VSTATUS_BP:
+		--frm;
+		break;
+	case VIDCON1_I80_ACTIVE:
+	case VIDCON1_VSTATUS_AC:
+		if (end)
+			--frm;
+		break;
+	default:
+		break;
+	}
+
+	return frm;
+}
+
 static void decon_setup_trigger(struct decon_context *ctx)
 {
 	if (!(ctx->out_type & (IFTYPE_I80 | I80_HW_TRG)))
@@ -365,11 +409,14 @@ static void decon_disable_plane(struct exynos_drm_crtc *crtc,
 static void decon_atomic_flush(struct exynos_drm_crtc *crtc)
 {
 	struct decon_context *ctx = crtc->ctx;
+	unsigned long flags;
 	int i;
 
 	if (test_bit(BIT_SUSPENDED, &ctx->flags))
 		return;
 
+	spin_lock_irqsave(&ctx->vblank_lock, flags);
+
 	for (i = ctx->first_win; i < WINDOWS_NR; i++)
 		decon_shadow_protect_win(ctx, i, false);
 
@@ -378,12 +425,18 @@ static void decon_atomic_flush(struct exynos_drm_crtc *crtc)
 
 	if (ctx->out_type & IFTYPE_I80)
 		set_bit(BIT_WIN_UPDATED, &ctx->flags);
+
+	ctx->frame_id = decon_get_frame_count(ctx, true);
+
 	exynos_crtc_handle_event(crtc);
+
+	spin_unlock_irqrestore(&ctx->vblank_lock, flags);
 }
 
 static void decon_swreset(struct decon_context *ctx)
 {
 	unsigned int tries;
+	unsigned long flags;
 
 	writel(0, ctx->addr + DECON_VIDCON0);
 	for (tries = 2000; tries; --tries) {
@@ -401,6 +454,10 @@ static void decon_swreset(struct decon_context *ctx)
 
 	WARN(tries == 0, "failed to software reset DECON\n");
 
+	spin_lock_irqsave(&ctx->vblank_lock, flags);
+	ctx->frame_id = 0;
+	spin_unlock_irqrestore(&ctx->vblank_lock, flags);
+
 	if (!(ctx->out_type & IFTYPE_HDMI))
 		return;
 
@@ -579,6 +636,24 @@ static const struct component_ops decon_component_ops = {
 	.unbind = decon_unbind,
 };
 
+static void decon_handle_vblank(struct decon_context *ctx)
+{
+	u32 frm;
+
+	spin_lock(&ctx->vblank_lock);
+
+	frm = decon_get_frame_count(ctx, true);
+
+	if (frm != ctx->frame_id) {
+		/* handle only if incremented, take care of wrap-around */
+		if ((s32)(frm - ctx->frame_id) > 0)
+			drm_crtc_handle_vblank(&ctx->crtc->base);
+		ctx->frame_id = frm;
+	}
+
+	spin_unlock(&ctx->vblank_lock);
+}
+
 static irqreturn_t decon_irq_handler(int irq, void *dev_id)
 {
 	struct decon_context *ctx = dev_id;
@@ -599,7 +674,7 @@ static irqreturn_t decon_irq_handler(int irq, void *dev_id)
 			    (VIDOUT_INTERLACE_EN_F | VIDOUT_INTERLACE_FIELD_F))
 				return IRQ_HANDLED;
 		}
-		drm_crtc_handle_vblank(&ctx->crtc->base);
+		decon_handle_vblank(ctx);
 	}
 
 out:
@@ -672,6 +747,7 @@ static int exynos5433_decon_probe(struct platform_device *pdev)
 	__set_bit(BIT_SUSPENDED, &ctx->flags);
 	ctx->dev = dev;
 	ctx->out_type = (unsigned long)of_device_get_match_data(dev);
+	spin_lock_init(&ctx->vblank_lock);
 
 	if (ctx->out_type & IFTYPE_HDMI) {
 		ctx->first_win = 1;
diff --git a/include/video/exynos5433_decon.h b/include/video/exynos5433_decon.h
index ef8e2a8ad0afc8..352fc0d9528fe5 100644
--- a/include/video/exynos5433_decon.h
+++ b/include/video/exynos5433_decon.h
@@ -46,6 +46,7 @@
 #define DECON_FRAMEFIFO_STATUS		0x0524
 #define DECON_CMU			0x1404
 #define DECON_UPDATE			0x1410
+#define DECON_CRFMID			0x1414
 #define DECON_UPDATE_SCHEME		0x1438
 #define DECON_VIDCON1			0x2000
 #define DECON_VIDCON2			0x2004
@@ -142,6 +143,13 @@
 #define STANDALONE_UPDATE_F		(1 << 0)
 
 /* DECON_VIDCON1 */
+#define VIDCON1_LINECNT_MASK		(0x0fff << 16)
+#define VIDCON1_I80_ACTIVE		(1 << 15)
+#define VIDCON1_VSTATUS_MASK		(0x3 << 13)
+#define VIDCON1_VSTATUS_VS		(0 << 13)
+#define VIDCON1_VSTATUS_BP		(1 << 13)
+#define VIDCON1_VSTATUS_AC		(2 << 13)
+#define VIDCON1_VSTATUS_FP		(3 << 13)
 #define VIDCON1_VCLK_MASK		(0x3 << 9)
 #define VIDCON1_VCLK_RUN_VDEN_DISABLE	(0x3 << 9)
 #define VIDCON1_VCLK_HOLD		(0x0 << 9)

From f3cce673e1c11112c536fbc8e6912c5414d7141f Mon Sep 17 00:00:00 2001
From: Andrzej Hajda <a.hajda@samsung.com>
Date: Tue, 14 Mar 2017 09:27:58 +0100
Subject: [PATCH 1533/1911] drm/exynos/decon5433: signal frame done interrupt
 at front porch

DECON in case of video mode generates interrupt by default at start
of vertical back porch. As this interrupt is used to generate VBLANK
events more optimal point is start of vertical front porch.

Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos5433_drm_decon.c | 2 +-
 include/video/exynos5433_decon.h              | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
index cfe6f8af849f3e..a43d0fa0b051c8 100644
--- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
+++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
@@ -105,7 +105,7 @@ static int decon_enable_vblank(struct exynos_drm_crtc *crtc)
 		if (ctx->out_type & IFTYPE_I80)
 			val |= VIDINTCON0_FRAMEDONE;
 		else
-			val |= VIDINTCON0_INTFRMEN;
+			val |= VIDINTCON0_INTFRMEN | VIDINTCON0_FRAMESEL_FP;
 
 		writel(val, ctx->addr + DECON_VIDINTCON0);
 	}
diff --git a/include/video/exynos5433_decon.h b/include/video/exynos5433_decon.h
index 352fc0d9528fe5..6b083d327e982c 100644
--- a/include/video/exynos5433_decon.h
+++ b/include/video/exynos5433_decon.h
@@ -127,6 +127,10 @@
 
 /* VIDINTCON0 */
 #define VIDINTCON0_FRAMEDONE		(1 << 17)
+#define VIDINTCON0_FRAMESEL_BP		(0 << 15)
+#define VIDINTCON0_FRAMESEL_VS		(1 << 15)
+#define VIDINTCON0_FRAMESEL_AC		(2 << 15)
+#define VIDINTCON0_FRAMESEL_FP		(3 << 15)
 #define VIDINTCON0_INTFRMEN		(1 << 12)
 #define VIDINTCON0_INTEN		(1 << 0)
 

From 82a01783252be726d76cdbbababc16540f582cec Mon Sep 17 00:00:00 2001
From: Andrzej Hajda <a.hajda@samsung.com>
Date: Tue, 14 Mar 2017 09:27:59 +0100
Subject: [PATCH 1534/1911] drm/exynos/fimd: signal frame done interrupt at
 front porch

VBLANK interrupt should be signalled as soon as scanout ends, front porch
is the best moment.

Signed-off-by: Andrzej Hajda <a.hajda@samsung.com>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos_drm_fimd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
index a3162a4566ce76..3f04d72c448d38 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
@@ -243,7 +243,7 @@ static int fimd_enable_vblank(struct exynos_drm_crtc *crtc)
 			val |= VIDINTCON0_INT_FRAME;
 
 			val &= ~VIDINTCON0_FRAMESEL0_MASK;
-			val |= VIDINTCON0_FRAMESEL0_VSYNC;
+			val |= VIDINTCON0_FRAMESEL0_FRONTPORCH;
 			val &= ~VIDINTCON0_FRAMESEL1_MASK;
 			val |= VIDINTCON0_FRAMESEL1_NONE;
 		}

From f07d9c2864d4ccf0a0b5bd2dd6491f5204eab5fe Mon Sep 17 00:00:00 2001
From: Andrzej Hajda <a.hajda@samsung.com>
Date: Tue, 14 Mar 2017 09:28:00 +0100
Subject: [PATCH 1535/1911] drm/exynos/decon5433: fix software trigger mask

The patch fixes copy/paste bug introduced during code refactoring.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Fixes: b93c2e8b5d9d ("drm/exynos/decon5433: configure sysreg in case of hardware trigger")Fixes:
Signed-off-by: Andrzej Hajda <a.hajda@samsung.com>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos5433_drm_decon.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
index a43d0fa0b051c8..c0e8d3302292c9 100644
--- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
+++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
@@ -172,8 +172,8 @@ static void decon_setup_trigger(struct decon_context *ctx)
 		return;
 
 	if (!(ctx->out_type & I80_HW_TRG)) {
-		writel(TRIGCON_TE_AUTO_MASK | TRIGCON_SWTRIGEN
-		       | TRIGCON_TE_AUTO_MASK | TRIGCON_SWTRIGEN,
+		writel(TRIGCON_TRIGEN_PER_F | TRIGCON_TRIGEN_F |
+		       TRIGCON_TE_AUTO_MASK | TRIGCON_SWTRIGEN,
 		       ctx->addr + DECON_TRIGCON);
 		return;
 	}

From 9cdf0ed25a9b34fd82cb0eb47a8bdc47dc9f4ff5 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk@kernel.org>
Date: Tue, 14 Mar 2017 20:38:04 +0200
Subject: [PATCH 1536/1911] drm/exynos: Print kernel pointers in a restricted
 form

Printing raw kernel pointers might reveal information which sometimes we
try to hide (e.g. with Kernel Address Space Layout Randomization).  Use
the "%pK" format so these pointers will be hidden for unprivileged
users.

Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos_drm_dsi.c     |  4 ++--
 drivers/gpu/drm/exynos/exynos_drm_fimc.c    |  2 +-
 drivers/gpu/drm/exynos/exynos_drm_gem.c     |  2 +-
 drivers/gpu/drm/exynos/exynos_drm_gsc.c     |  2 +-
 drivers/gpu/drm/exynos/exynos_drm_ipp.c     | 22 ++++++++++-----------
 drivers/gpu/drm/exynos/exynos_drm_rotator.c |  2 +-
 6 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/exynos/exynos_drm_dsi.c b/drivers/gpu/drm/exynos/exynos_drm_dsi.c
index ef6f9c6de09855..c671f8e017db1b 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_dsi.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_dsi.c
@@ -966,7 +966,7 @@ static void exynos_dsi_send_to_fifo(struct exynos_dsi *dsi,
 	bool first = !xfer->tx_done;
 	u32 reg;
 
-	dev_dbg(dev, "< xfer %p: tx len %u, done %u, rx len %u, done %u\n",
+	dev_dbg(dev, "< xfer %pK: tx len %u, done %u, rx len %u, done %u\n",
 		xfer, length, xfer->tx_done, xfer->rx_len, xfer->rx_done);
 
 	if (length > DSI_TX_FIFO_SIZE)
@@ -1164,7 +1164,7 @@ static bool exynos_dsi_transfer_finish(struct exynos_dsi *dsi)
 	spin_unlock_irqrestore(&dsi->transfer_lock, flags);
 
 	dev_dbg(dsi->dev,
-		"> xfer %p, tx_len %zu, tx_done %u, rx_len %u, rx_done %u\n",
+		"> xfer %pK, tx_len %zu, tx_done %u, rx_len %u, rx_done %u\n",
 		xfer, xfer->packet.payload_length, xfer->tx_done, xfer->rx_len,
 		xfer->rx_done);
 
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimc.c b/drivers/gpu/drm/exynos/exynos_drm_fimc.c
index 95871577015d8a..5b18b5c5fdf255 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fimc.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimc.c
@@ -1695,7 +1695,7 @@ static int fimc_probe(struct platform_device *pdev)
 		goto err_put_clk;
 	}
 
-	DRM_DEBUG_KMS("id[%d]ippdrv[%p]\n", ctx->id, ippdrv);
+	DRM_DEBUG_KMS("id[%d]ippdrv[%pK]\n", ctx->id, ippdrv);
 
 	spin_lock_init(&ctx->lock);
 	platform_set_drvdata(pdev, ctx);
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c
index 4c28f7ffcc4dd1..55a1579d11b3d7 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gem.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c
@@ -218,7 +218,7 @@ static struct exynos_drm_gem *exynos_drm_gem_init(struct drm_device *dev,
 		return ERR_PTR(ret);
 	}
 
-	DRM_DEBUG_KMS("created file object = %p\n", obj->filp);
+	DRM_DEBUG_KMS("created file object = %pK\n", obj->filp);
 
 	return exynos_gem;
 }
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.c b/drivers/gpu/drm/exynos/exynos_drm_gsc.c
index bef57987759d2c..0506b2b17ac1c4 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gsc.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_gsc.c
@@ -1723,7 +1723,7 @@ static int gsc_probe(struct platform_device *pdev)
 		return ret;
 	}
 
-	DRM_DEBUG_KMS("id[%d]ippdrv[%p]\n", ctx->id, ippdrv);
+	DRM_DEBUG_KMS("id[%d]ippdrv[%pK]\n", ctx->id, ippdrv);
 
 	mutex_init(&ctx->lock);
 	platform_set_drvdata(pdev, ctx);
diff --git a/drivers/gpu/drm/exynos/exynos_drm_ipp.c b/drivers/gpu/drm/exynos/exynos_drm_ipp.c
index 9c84ee76f18adc..3edda18cc2d2d6 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_ipp.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_ipp.c
@@ -208,7 +208,7 @@ static struct exynos_drm_ippdrv *ipp_find_drv_by_handle(u32 prop_id)
 	 * e.g PAUSE state, queue buf, command control.
 	 */
 	list_for_each_entry(ippdrv, &exynos_drm_ippdrv_list, drv_list) {
-		DRM_DEBUG_KMS("count[%d]ippdrv[%p]\n", count++, ippdrv);
+		DRM_DEBUG_KMS("count[%d]ippdrv[%pK]\n", count++, ippdrv);
 
 		mutex_lock(&ippdrv->cmd_lock);
 		list_for_each_entry(c_node, &ippdrv->cmd_list, list) {
@@ -388,7 +388,7 @@ int exynos_drm_ipp_set_property(struct drm_device *drm_dev, void *data,
 	}
 	property->prop_id = ret;
 
-	DRM_DEBUG_KMS("created prop_id[%d]cmd[%d]ippdrv[%p]\n",
+	DRM_DEBUG_KMS("created prop_id[%d]cmd[%d]ippdrv[%pK]\n",
 		property->prop_id, property->cmd, ippdrv);
 
 	/* stored property information and ippdrv in private data */
@@ -518,7 +518,7 @@ static int ipp_put_mem_node(struct drm_device *drm_dev,
 {
 	int i;
 
-	DRM_DEBUG_KMS("node[%p]\n", m_node);
+	DRM_DEBUG_KMS("node[%pK]\n", m_node);
 
 	if (!m_node) {
 		DRM_ERROR("invalid dequeue node.\n");
@@ -562,7 +562,7 @@ static struct drm_exynos_ipp_mem_node
 	m_node->buf_id = qbuf->buf_id;
 	INIT_LIST_HEAD(&m_node->list);
 
-	DRM_DEBUG_KMS("m_node[%p]ops_id[%d]\n", m_node, qbuf->ops_id);
+	DRM_DEBUG_KMS("m_node[%pK]ops_id[%d]\n", m_node, qbuf->ops_id);
 	DRM_DEBUG_KMS("prop_id[%d]buf_id[%d]\n", qbuf->prop_id, m_node->buf_id);
 
 	for_each_ipp_planar(i) {
@@ -659,7 +659,7 @@ static void ipp_put_event(struct drm_exynos_ipp_cmd_node *c_node,
 
 	mutex_lock(&c_node->event_lock);
 	list_for_each_entry_safe(e, te, &c_node->event_list, base.link) {
-		DRM_DEBUG_KMS("count[%d]e[%p]\n", count++, e);
+		DRM_DEBUG_KMS("count[%d]e[%pK]\n", count++, e);
 
 		/*
 		 * qbuf == NULL condition means all event deletion.
@@ -750,7 +750,7 @@ static struct drm_exynos_ipp_mem_node
 
 	/* find memory node from memory list */
 	list_for_each_entry(m_node, head, list) {
-		DRM_DEBUG_KMS("count[%d]m_node[%p]\n", count++, m_node);
+		DRM_DEBUG_KMS("count[%d]m_node[%pK]\n", count++, m_node);
 
 		/* compare buffer id */
 		if (m_node->buf_id == qbuf->buf_id)
@@ -767,7 +767,7 @@ static int ipp_set_mem_node(struct exynos_drm_ippdrv *ippdrv,
 	struct exynos_drm_ipp_ops *ops = NULL;
 	int ret = 0;
 
-	DRM_DEBUG_KMS("node[%p]\n", m_node);
+	DRM_DEBUG_KMS("node[%pK]\n", m_node);
 
 	if (!m_node) {
 		DRM_ERROR("invalid queue node.\n");
@@ -1232,7 +1232,7 @@ static int ipp_start_property(struct exynos_drm_ippdrv *ippdrv,
 			m_node = list_first_entry(head,
 				struct drm_exynos_ipp_mem_node, list);
 
-			DRM_DEBUG_KMS("m_node[%p]\n", m_node);
+			DRM_DEBUG_KMS("m_node[%pK]\n", m_node);
 
 			ret = ipp_set_mem_node(ippdrv, c_node, m_node);
 			if (ret) {
@@ -1601,7 +1601,7 @@ static int ipp_subdrv_probe(struct drm_device *drm_dev, struct device *dev)
 		}
 		ippdrv->prop_list.ipp_id = ret;
 
-		DRM_DEBUG_KMS("count[%d]ippdrv[%p]ipp_id[%d]\n",
+		DRM_DEBUG_KMS("count[%d]ippdrv[%pK]ipp_id[%d]\n",
 			count++, ippdrv, ret);
 
 		/* store parent device for node */
@@ -1659,7 +1659,7 @@ static int ipp_subdrv_open(struct drm_device *drm_dev, struct device *dev,
 
 	file_priv->ipp_dev = dev;
 
-	DRM_DEBUG_KMS("done priv[%p]\n", dev);
+	DRM_DEBUG_KMS("done priv[%pK]\n", dev);
 
 	return 0;
 }
@@ -1676,7 +1676,7 @@ static void ipp_subdrv_close(struct drm_device *drm_dev, struct device *dev,
 		mutex_lock(&ippdrv->cmd_lock);
 		list_for_each_entry_safe(c_node, tc_node,
 			&ippdrv->cmd_list, list) {
-			DRM_DEBUG_KMS("count[%d]ippdrv[%p]\n",
+			DRM_DEBUG_KMS("count[%d]ippdrv[%pK]\n",
 				count++, ippdrv);
 
 			if (c_node->filp == file) {
diff --git a/drivers/gpu/drm/exynos/exynos_drm_rotator.c b/drivers/gpu/drm/exynos/exynos_drm_rotator.c
index 6591e406084c16..79282a820ecce1 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_rotator.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_rotator.c
@@ -748,7 +748,7 @@ static int rotator_probe(struct platform_device *pdev)
 		goto err_ippdrv_register;
 	}
 
-	DRM_DEBUG_KMS("ippdrv[%p]\n", ippdrv);
+	DRM_DEBUG_KMS("ippdrv[%pK]\n", ippdrv);
 
 	platform_set_drvdata(pdev, rot);
 

From 22e098daae7e53763493b9d9976ef8c65190017e Mon Sep 17 00:00:00 2001
From: Andrzej Hajda <a.hajda@samsung.com>
Date: Wed, 15 Mar 2017 12:20:42 +0100
Subject: [PATCH 1537/1911] drm/exynos/dsi: make te-gpios optional

DSI forwards te-gpios interrupts to display controller, but if display
controller works in HW-TRIGGER mode this interrupt is not necessary.
Making te-gpios property optional allows to avoid generating spare
interrupts.
And also if panel device node of command mode panel device doesn't provide
te-gpios property then the panel driver failed to probe. This was a critial
issue.

With this patch we can not only get rid of 60 interrupt callbacks per second
but also fix the critial issues.

Signed-off-by: Andrzej Hajda <a.hajda@samsung.com>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos_drm_dsi.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/exynos/exynos_drm_dsi.c b/drivers/gpu/drm/exynos/exynos_drm_dsi.c
index c671f8e017db1b..d7ef26370e67c5 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_dsi.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_dsi.c
@@ -1335,9 +1335,12 @@ static int exynos_dsi_register_te_irq(struct exynos_dsi *dsi)
 	int te_gpio_irq;
 
 	dsi->te_gpio = of_get_named_gpio(dsi->panel_node, "te-gpios", 0);
+	if (dsi->te_gpio == -ENOENT)
+		return 0;
+
 	if (!gpio_is_valid(dsi->te_gpio)) {
-		dev_err(dsi->dev, "no te-gpios specified\n");
 		ret = dsi->te_gpio;
+		dev_err(dsi->dev, "cannot get te-gpios, %d\n", ret);
 		goto out;
 	}
 

From fc36a903265c18d124cefaba364a7fa71b21be61 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@ozlabs.org>
Date: Tue, 21 Mar 2017 12:38:02 +1100
Subject: [PATCH 1538/1911] Revert "powerpc/64: Disable use of radix under a
 hypervisor"

This reverts commit 3f91a89d424a79f8082525db5a375e438887bb3e.

Now that we do have the machinery for using the radix MMU under a
hypervisor, the extra check and comment introduced in 3f91a89d424a are
no longer correct.  The result is that when booted under a hypervisor
that only allows use of radix, we clear the MMU_FTR_TYPE_RADIX and
then set it again, and print a warning about ignoring the
disable_radix command line option, even though the command line does
not include "disable_radix".

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/mm/init_64.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 9be992083d2a7f..c22f207aa6564b 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -397,8 +397,7 @@ static void early_check_vec5(void)
 void __init mmu_early_init_devtree(void)
 {
 	/* Disable radix mode based on kernel command line. */
-	/* We don't yet have the machinery to do radix as a guest. */
-	if (disable_radix || !(mfmsr() & MSR_HV))
+	if (disable_radix)
 		cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
 
 	/*

From cc638a488a5205713b51eabd047be6ea641cc328 Mon Sep 17 00:00:00 2001
From: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
Date: Mon, 20 Mar 2017 17:55:22 +1100
Subject: [PATCH 1539/1911] gcc-plugins: update architecture list in
 documentation

Commit 65c059bcaa73 ("powerpc: Enable support for GCC plugins") enabled GCC
plugins on powerpc, but neglected to update the architecture list in the
docs. Rectify this.

Fixes: 65c059bcaa73 ("powerpc: Enable support for GCC plugins")
Signed-off-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 Documentation/gcc-plugins.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/gcc-plugins.txt b/Documentation/gcc-plugins.txt
index 891c6946443482..433eaefb4aa171 100644
--- a/Documentation/gcc-plugins.txt
+++ b/Documentation/gcc-plugins.txt
@@ -18,8 +18,8 @@ because gcc versions 4.5 and 4.6 are compiled by a C compiler,
 gcc-4.7 can be compiled by a C or a C++ compiler,
 and versions 4.8+ can only be compiled by a C++ compiler.
 
-Currently the GCC plugin infrastructure supports only the x86, arm and arm64
-architectures.
+Currently the GCC plugin infrastructure supports only the x86, arm, arm64 and
+powerpc architectures.
 
 This infrastructure was ported from grsecurity [6] and PaX [7].
 

From a82f16188a32a3c889916c582ea2d9188e3c2734 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Myl=C3=A8ne=20Josserand?=
 <mylene.josserand@free-electrons.com>
Date: Sat, 18 Mar 2017 08:55:05 +0100
Subject: [PATCH 1540/1911] ASoC: sun8i-codec: Remove analog "HP" widget
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The "HP" widget is already present and take part to
the analog part (sun8i-codec-analog).

Remove it from the digital part as it is unnecessary.

Signed-off-by: Mylène Josserand <mylene.josserand@free-electrons.com>
Acked-by: Chen-Yu Tsai <wens@csie.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/sunxi/sun8i-codec.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/sound/soc/sunxi/sun8i-codec.c b/sound/soc/sunxi/sun8i-codec.c
index b92bdc8361af3a..d60f6fbd36a286 100644
--- a/sound/soc/sunxi/sun8i-codec.c
+++ b/sound/soc/sunxi/sun8i-codec.c
@@ -321,8 +321,6 @@ static const struct snd_soc_dapm_widget sun8i_codec_dapm_widgets[] = {
 			    SUN8I_MOD_RST_CTL_AIF1, 0, NULL, 0),
 	SND_SOC_DAPM_SUPPLY("RST DAC", SUN8I_MOD_RST_CTL,
 			    SUN8I_MOD_RST_CTL_DAC, 0, NULL, 0),
-
-	SND_SOC_DAPM_OUTPUT("HP"),
 };
 
 static const struct snd_soc_dapm_route sun8i_codec_dapm_routes[] = {
@@ -344,10 +342,6 @@ static const struct snd_soc_dapm_route sun8i_codec_dapm_routes[] = {
 	/* DAC Mixer Routes */
 	{ "Left DAC Mixer", "LSlot 0", "Digital Left DAC"},
 	{ "Right DAC Mixer", "RSlot 0", "Digital Right DAC"},
-
-	/* End of route : HP out */
-	{ "HP", NULL, "Left DAC Mixer" },
-	{ "HP", NULL, "Right DAC Mixer" },
 };
 
 static struct snd_soc_dai_ops sun8i_codec_dai_ops = {

From 649d55436137b397accb6a9d1b6975598c693bcd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Myl=C3=A8ne=20Josserand?=
 <mylene.josserand@free-electrons.com>
Date: Sat, 18 Mar 2017 08:55:06 +0100
Subject: [PATCH 1541/1911] ASoC: sun8i-codec: Update mixer to use
 SOC_DAPM_DOUBLE
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Update the driver to use the new SOC_DAPM_DOUBLE definition
on the digital DAC mixer.
Update the names accordingly as, when they are shared, the
controls are not prefixed with the widget's name anymore.

Signed-off-by: Mylène Josserand <mylene.josserand@free-electrons.com>
Acked-by: Chen-Yu Tsai <wens@csie.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/sunxi/sun8i-codec.c | 45 ++++++++++++++++-------------------
 1 file changed, 21 insertions(+), 24 deletions(-)

diff --git a/sound/soc/sunxi/sun8i-codec.c b/sound/soc/sunxi/sun8i-codec.c
index d60f6fbd36a286..107fa82136003f 100644
--- a/sound/soc/sunxi/sun8i-codec.c
+++ b/sound/soc/sunxi/sun8i-codec.c
@@ -259,25 +259,20 @@ static int sun8i_codec_hw_params(struct snd_pcm_substream *substream,
 	return 0;
 }
 
-static const struct snd_kcontrol_new sun8i_output_left_mixer_controls[] = {
-	SOC_DAPM_SINGLE("LSlot 0", SUN8I_DAC_MXR_SRC,
-			SUN8I_DAC_MXR_SRC_DACL_MXR_SRC_AIF1DA0L, 1, 0),
-	SOC_DAPM_SINGLE("LSlot 1", SUN8I_DAC_MXR_SRC,
-			SUN8I_DAC_MXR_SRC_DACL_MXR_SRC_AIF1DA1L, 1, 0),
-	SOC_DAPM_SINGLE("DACL", SUN8I_DAC_MXR_SRC,
-			SUN8I_DAC_MXR_SRC_DACL_MXR_SRC_AIF2DACL, 1, 0),
-	SOC_DAPM_SINGLE("ADCL", SUN8I_DAC_MXR_SRC,
-			SUN8I_DAC_MXR_SRC_DACL_MXR_SRC_ADCL, 1, 0),
-};
-
-static const struct snd_kcontrol_new sun8i_output_right_mixer_controls[] = {
-	SOC_DAPM_SINGLE("RSlot 0", SUN8I_DAC_MXR_SRC,
+static const struct snd_kcontrol_new sun8i_dac_mixer_controls[] = {
+	SOC_DAPM_DOUBLE("AIF1 Slot 0 Digital DAC Playback Switch",
+			SUN8I_DAC_MXR_SRC,
+			SUN8I_DAC_MXR_SRC_DACL_MXR_SRC_AIF1DA0L,
 			SUN8I_DAC_MXR_SRC_DACR_MXR_SRC_AIF1DA0R, 1, 0),
-	SOC_DAPM_SINGLE("RSlot 1", SUN8I_DAC_MXR_SRC,
+	SOC_DAPM_DOUBLE("AIF1 Slot 1 Digital DAC Playback Switch",
+			SUN8I_DAC_MXR_SRC,
+			SUN8I_DAC_MXR_SRC_DACL_MXR_SRC_AIF1DA1L,
 			SUN8I_DAC_MXR_SRC_DACR_MXR_SRC_AIF1DA1R, 1, 0),
-	SOC_DAPM_SINGLE("DACR", SUN8I_DAC_MXR_SRC,
+	SOC_DAPM_DOUBLE("AIF2 Digital DAC Playback Switch", SUN8I_DAC_MXR_SRC,
+			SUN8I_DAC_MXR_SRC_DACL_MXR_SRC_AIF2DACL,
 			SUN8I_DAC_MXR_SRC_DACR_MXR_SRC_AIF2DACR, 1, 0),
-	SOC_DAPM_SINGLE("ADCR", SUN8I_DAC_MXR_SRC,
+	SOC_DAPM_DOUBLE("ADC Digital DAC Playback Switch", SUN8I_DAC_MXR_SRC,
+			SUN8I_DAC_MXR_SRC_DACL_MXR_SRC_ADCL,
 			SUN8I_DAC_MXR_SRC_DACR_MXR_SRC_ADCR, 1, 0),
 };
 
@@ -293,12 +288,12 @@ static const struct snd_soc_dapm_widget sun8i_codec_dapm_widgets[] = {
 			 SUN8I_AIF1_DACDAT_CTRL_AIF1_DA0R_ENA, 0),
 
 	/* DAC Mixers */
-	SND_SOC_DAPM_MIXER("Left DAC Mixer", SND_SOC_NOPM, 0, 0,
-			   sun8i_output_left_mixer_controls,
-			   ARRAY_SIZE(sun8i_output_left_mixer_controls)),
-	SND_SOC_DAPM_MIXER("Right DAC Mixer", SND_SOC_NOPM, 0, 0,
-			   sun8i_output_right_mixer_controls,
-			   ARRAY_SIZE(sun8i_output_right_mixer_controls)),
+	SND_SOC_DAPM_MIXER("Left Digital DAC Mixer", SND_SOC_NOPM, 0, 0,
+			   sun8i_dac_mixer_controls,
+			   ARRAY_SIZE(sun8i_dac_mixer_controls)),
+	SND_SOC_DAPM_MIXER("Right Digital DAC Mixer", SND_SOC_NOPM, 0, 0,
+			   sun8i_dac_mixer_controls,
+			   ARRAY_SIZE(sun8i_dac_mixer_controls)),
 
 	/* Clocks */
 	SND_SOC_DAPM_SUPPLY("MODCLK AFI1", SUN8I_MOD_CLK_ENA,
@@ -340,8 +335,10 @@ static const struct snd_soc_dapm_route sun8i_codec_dapm_routes[] = {
 	{ "Digital Right DAC", NULL, "DAC" },
 
 	/* DAC Mixer Routes */
-	{ "Left DAC Mixer", "LSlot 0", "Digital Left DAC"},
-	{ "Right DAC Mixer", "RSlot 0", "Digital Right DAC"},
+	{ "Left Digital DAC Mixer", "AIF1 Slot 0 Digital DAC Playback Switch",
+	  "Digital Left DAC"},
+	{ "Right Digital DAC Mixer", "AIF1 Slot 0 Digital DAC Playback Switch ",
+	  "Digital Right DAC"},
 };
 
 static struct snd_soc_dai_ops sun8i_codec_dai_ops = {

From 79e26de81448fad80f6c15ae1e3c9e7fca2740cd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Myl=C3=A8ne=20Josserand?=
 <mylene.josserand@free-electrons.com>
Date: Sat, 18 Mar 2017 08:55:07 +0100
Subject: [PATCH 1542/1911] ASoC: sun8i-codec: Fix space on audio-routing
 widget
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

An unwanted space is present in an audio widget's name on the dapm
routing. It causes an error on the recognition of this widget (error:
("no dapm match for AIF1 Slot 0 Right").

Remove the space fixes it.

Signed-off-by: Mylène Josserand <mylene.josserand@free-electrons.com>
Acked-by: Chen-Yu Tsai <wens@csie.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/sunxi/sun8i-codec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/soc/sunxi/sun8i-codec.c b/sound/soc/sunxi/sun8i-codec.c
index 107fa82136003f..adb13fbd200628 100644
--- a/sound/soc/sunxi/sun8i-codec.c
+++ b/sound/soc/sunxi/sun8i-codec.c
@@ -337,7 +337,7 @@ static const struct snd_soc_dapm_route sun8i_codec_dapm_routes[] = {
 	/* DAC Mixer Routes */
 	{ "Left Digital DAC Mixer", "AIF1 Slot 0 Digital DAC Playback Switch",
 	  "Digital Left DAC"},
-	{ "Right Digital DAC Mixer", "AIF1 Slot 0 Digital DAC Playback Switch ",
+	{ "Right Digital DAC Mixer", "AIF1 Slot 0 Digital DAC Playback Switch",
 	  "Digital Right DAC"},
 };
 

From d1792285ca63e17f8a7eb42efa48834c261a2d8f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Myl=C3=A8ne=20Josserand?=
 <mylene.josserand@free-electrons.com>
Date: Sat, 18 Mar 2017 08:55:08 +0100
Subject: [PATCH 1543/1911] ASoC: sun8i-codec: Convert to use
 SND_SOC_DAPM_AIF_IN
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Update the driver to use SND_SOC_DAPM_AIF_IN instead of
SND_SOC_DAPM_DAC.
Rename the interface's widgets to be more precise on which slot
the interface is connected.

Signed-off-by: Mylène Josserand <mylene.josserand@free-electrons.com>
Acked-by: Chen-Yu Tsai <wens@csie.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/sunxi/sun8i-codec.c | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/sound/soc/sunxi/sun8i-codec.c b/sound/soc/sunxi/sun8i-codec.c
index adb13fbd200628..7527ba29a5a0ea 100644
--- a/sound/soc/sunxi/sun8i-codec.c
+++ b/sound/soc/sunxi/sun8i-codec.c
@@ -281,11 +281,13 @@ static const struct snd_soc_dapm_widget sun8i_codec_dapm_widgets[] = {
 	SND_SOC_DAPM_SUPPLY("DAC", SUN8I_DAC_DIG_CTRL, SUN8I_DAC_DIG_CTRL_ENDA,
 			    0, NULL, 0),
 
-	/* Analog DAC */
-	SND_SOC_DAPM_DAC("Digital Left DAC", "Playback", SUN8I_AIF1_DACDAT_CTRL,
-			 SUN8I_AIF1_DACDAT_CTRL_AIF1_DA0L_ENA, 0),
-	SND_SOC_DAPM_DAC("Digital Right DAC", "Playback", SUN8I_AIF1_DACDAT_CTRL,
-			 SUN8I_AIF1_DACDAT_CTRL_AIF1_DA0R_ENA, 0),
+	/* Analog DAC AIF */
+	SND_SOC_DAPM_AIF_IN("AIF1 Slot 0 Left", "Playback", 0,
+			    SUN8I_AIF1_DACDAT_CTRL,
+			    SUN8I_AIF1_DACDAT_CTRL_AIF1_DA0L_ENA, 0),
+	SND_SOC_DAPM_AIF_IN("AIF1 Slot 0 Right", "Playback", 0,
+			    SUN8I_AIF1_DACDAT_CTRL,
+			    SUN8I_AIF1_DACDAT_CTRL_AIF1_DA0R_ENA, 0),
 
 	/* DAC Mixers */
 	SND_SOC_DAPM_MIXER("Left Digital DAC Mixer", SND_SOC_NOPM, 0, 0,
@@ -331,14 +333,14 @@ static const struct snd_soc_dapm_route sun8i_codec_dapm_routes[] = {
 	{ "DAC", NULL, "MODCLK DAC" },
 
 	/* DAC Routes */
-	{ "Digital Left DAC", NULL, "DAC" },
-	{ "Digital Right DAC", NULL, "DAC" },
+	{ "AIF1 Slot 0 Right", NULL, "DAC" },
+	{ "AIF1 Slot 0 Left", NULL, "DAC" },
 
 	/* DAC Mixer Routes */
 	{ "Left Digital DAC Mixer", "AIF1 Slot 0 Digital DAC Playback Switch",
-	  "Digital Left DAC"},
+	  "AIF1 Slot 0 Left"},
 	{ "Right Digital DAC Mixer", "AIF1 Slot 0 Digital DAC Playback Switch",
-	  "Digital Right DAC"},
+	  "AIF1 Slot 0 Right"},
 };
 
 static struct snd_soc_dai_ops sun8i_codec_dai_ops = {

From eb3abaea7ea42619a48fa84e4b1ff48f1b18d863 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Myl=C3=A8ne=20Josserand?=
 <mylene.josserand@free-electrons.com>
Date: Sat, 18 Mar 2017 08:55:09 +0100
Subject: [PATCH 1544/1911] ARM: dts: sun8i: Update audio-routing with renamed
 widgets
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The digital AIF interfaces has been renamed in the sun8i audio codec
driver so the audio-routing in the device tree must be renamed too.

Signed-off-by: Mylène Josserand <mylene.josserand@free-electrons.com>
Acked-by: Chen-Yu Tsai <wens@csie.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 arch/arm/boot/dts/sun8i-a33.dtsi | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/boot/dts/sun8i-a33.dtsi b/arch/arm/boot/dts/sun8i-a33.dtsi
index 18c174fef84f51..0467fb365bfca7 100644
--- a/arch/arm/boot/dts/sun8i-a33.dtsi
+++ b/arch/arm/boot/dts/sun8i-a33.dtsi
@@ -113,8 +113,8 @@
 		simple-audio-card,mclk-fs = <512>;
 		simple-audio-card,aux-devs = <&codec_analog>;
 		simple-audio-card,routing =
-			"Left DAC", "Digital Left DAC",
-			"Right DAC", "Digital Right DAC";
+			"Left DAC", "AIF1 Slot 0 Left",
+			"Right DAC", "AIF1 Slot 0 Right";
 		status = "disabled";
 
 		simple-audio-card,cpu {

From c6736a94d0e527ddc0d1eb99dbc59886a9ecf471 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 21 Mar 2017 13:26:02 +0100
Subject: [PATCH 1545/1911] ALSA: x86: Make CONFIG_SND_X86 bool

CONFIG_SND_X86 is a menu config to filter only for x86-specific
drivers in its sub-menu, and this doesn't have to be tristate but
rather it should be a bool.  Also, like other sub-menu configs, it's
more user-friendly to be default=y; it's merely a menu config and the
actual drivers are configured in the sub-menu, after all.

Fixes: 287599cf2d77 ("ALSA: add Intel HDMI LPE audio driver for BYT/CHT-T")
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/x86/Kconfig | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sound/x86/Kconfig b/sound/x86/Kconfig
index 84c8f8fc597cd6..8adf4d1bd46e71 100644
--- a/sound/x86/Kconfig
+++ b/sound/x86/Kconfig
@@ -1,6 +1,7 @@
 menuconfig SND_X86
-	tristate "X86 sound devices"
+	bool "X86 sound devices"
 	depends on X86
+	default y
 	---help---
 	  X86 sound devices that don't fall under SoC or PCI categories
 

From c520ff3d03f0b5db7146d9beed6373ad5d2a5e0e Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 21 Mar 2017 13:56:04 +0100
Subject: [PATCH 1546/1911] ALSA: seq: Fix racy cell insertions during
 snd_seq_pool_done()

When snd_seq_pool_done() is called, it marks the closing flag to
refuse the further cell insertions.  But snd_seq_pool_done() itself
doesn't clear the cells but just waits until all cells are cleared by
the caller side.  That is, it's racy, and this leads to the endless
stall as syzkaller spotted.

This patch addresses the racy by splitting the setup of pool->closing
flag out of snd_seq_pool_done(), and calling it properly before
snd_seq_pool_done().

BugLink: http://lkml.kernel.org/r/CACT4Y+aqqy8bZA1fFieifNxR2fAfFQQABcBHj801+u5ePV0URw@mail.gmail.com
Reported-and-tested-by: Dmitry Vyukov <dvyukov@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/core/seq/seq_clientmgr.c |  1 +
 sound/core/seq/seq_fifo.c      |  3 +++
 sound/core/seq/seq_memory.c    | 17 +++++++++++++----
 sound/core/seq/seq_memory.h    |  1 +
 4 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c
index 4c935202ce23be..f3b1d7f50b8115 100644
--- a/sound/core/seq/seq_clientmgr.c
+++ b/sound/core/seq/seq_clientmgr.c
@@ -1832,6 +1832,7 @@ static int snd_seq_ioctl_set_client_pool(struct snd_seq_client *client,
 	     info->output_pool != client->pool->size)) {
 		if (snd_seq_write_pool_allocated(client)) {
 			/* remove all existing cells */
+			snd_seq_pool_mark_closing(client->pool);
 			snd_seq_queue_client_leave_cells(client->number);
 			snd_seq_pool_done(client->pool);
 		}
diff --git a/sound/core/seq/seq_fifo.c b/sound/core/seq/seq_fifo.c
index 448efd4e980edf..33980d1c803796 100644
--- a/sound/core/seq/seq_fifo.c
+++ b/sound/core/seq/seq_fifo.c
@@ -72,6 +72,9 @@ void snd_seq_fifo_delete(struct snd_seq_fifo **fifo)
 		return;
 	*fifo = NULL;
 
+	if (f->pool)
+		snd_seq_pool_mark_closing(f->pool);
+
 	snd_seq_fifo_clear(f);
 
 	/* wake up clients if any */
diff --git a/sound/core/seq/seq_memory.c b/sound/core/seq/seq_memory.c
index 1a1acf3ddda4c9..d4c61ec9be13d7 100644
--- a/sound/core/seq/seq_memory.c
+++ b/sound/core/seq/seq_memory.c
@@ -415,6 +415,18 @@ int snd_seq_pool_init(struct snd_seq_pool *pool)
 	return 0;
 }
 
+/* refuse the further insertion to the pool */
+void snd_seq_pool_mark_closing(struct snd_seq_pool *pool)
+{
+	unsigned long flags;
+
+	if (snd_BUG_ON(!pool))
+		return;
+	spin_lock_irqsave(&pool->lock, flags);
+	pool->closing = 1;
+	spin_unlock_irqrestore(&pool->lock, flags);
+}
+
 /* remove events */
 int snd_seq_pool_done(struct snd_seq_pool *pool)
 {
@@ -425,10 +437,6 @@ int snd_seq_pool_done(struct snd_seq_pool *pool)
 		return -EINVAL;
 
 	/* wait for closing all threads */
-	spin_lock_irqsave(&pool->lock, flags);
-	pool->closing = 1;
-	spin_unlock_irqrestore(&pool->lock, flags);
-
 	if (waitqueue_active(&pool->output_sleep))
 		wake_up(&pool->output_sleep);
 
@@ -485,6 +493,7 @@ int snd_seq_pool_delete(struct snd_seq_pool **ppool)
 	*ppool = NULL;
 	if (pool == NULL)
 		return 0;
+	snd_seq_pool_mark_closing(pool);
 	snd_seq_pool_done(pool);
 	kfree(pool);
 	return 0;
diff --git a/sound/core/seq/seq_memory.h b/sound/core/seq/seq_memory.h
index 4a2ec779b8a701..32f959c17786d9 100644
--- a/sound/core/seq/seq_memory.h
+++ b/sound/core/seq/seq_memory.h
@@ -84,6 +84,7 @@ static inline int snd_seq_total_cells(struct snd_seq_pool *pool)
 int snd_seq_pool_init(struct snd_seq_pool *pool);
 
 /* done pool - free events */
+void snd_seq_pool_mark_closing(struct snd_seq_pool *pool);
 int snd_seq_pool_done(struct snd_seq_pool *pool);
 
 /* create pool */

From 49cc4c217c0dbb7d09c402472d1f85a81c093f9f Mon Sep 17 00:00:00 2001
From: Aaron Armstrong Skomra <skomra@gmail.com>
Date: Mon, 6 Mar 2017 10:54:57 -0800
Subject: [PATCH 1547/1911] HID: wacom: Correct Intuos Pro 2 resolution

The features struct for the second gen Intuos Pro uses the wrong constant for
the resolution. This fix is for commit 4922cd2.

Fixes: 4922cd2 ("HID: wacom: Support 2nd-gen Intuos Pro's Bluetooth classic interface")
Signed-off-by: Aaron Armstrong Skomra <skomra@gmail.com>
Reviewed-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/wacom_wac.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c
index 4aa3de9f1163b3..3d864466d473b1 100644
--- a/drivers/hid/wacom_wac.c
+++ b/drivers/hid/wacom_wac.c
@@ -4197,10 +4197,10 @@ static const struct wacom_features wacom_features_0x343 =
 	  WACOM_DTU_OFFSET, WACOM_DTU_OFFSET };
 static const struct wacom_features wacom_features_0x360 =
 	{ "Wacom Intuos Pro M", 44800, 29600, 8191, 63,
-	  INTUOSP2_BT, WACOM_INTUOS_RES, WACOM_INTUOS_RES, 9, .touch_max = 10 };
+	  INTUOSP2_BT, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES, 9, .touch_max = 10 };
 static const struct wacom_features wacom_features_0x361 =
 	{ "Wacom Intuos Pro L", 62200, 43200, 8191, 63,
-	  INTUOSP2_BT, WACOM_INTUOS_RES, WACOM_INTUOS_RES, 9, .touch_max = 10 };
+	  INTUOSP2_BT, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES, 9, .touch_max = 10 };
 
 static const struct wacom_features wacom_features_HID_ANY_ID =
 	{ "Wacom HID", .type = HID_GENERIC, .oVid = HID_ANY_ID, .oPid = HID_ANY_ID };

From b6b1f19b06b7d4dcc261a88d74c5fb0a53988b4e Mon Sep 17 00:00:00 2001
From: Aaron Armstrong Skomra <skomra@gmail.com>
Date: Mon, 6 Mar 2017 10:54:58 -0800
Subject: [PATCH 1548/1911] HID: wacom: don't manually release resources for
 the EKR

Commit 5b779fc introduces the manual release of resources in wacom_remove() as
an addition to the driver's use of devm.  The EKR resources can only be
released through wacom_remote_destroy_one() so we skip the manual release for
it.

Fixes: 5b779fc ("HID: wacom: release the resources before leaving despite devm")
Signed-off-by: Aaron Armstrong Skomra <skomra@gmail.com>
Reviewed-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/wacom_sys.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c
index be8f7e2a026f42..994bddc55b8227 100644
--- a/drivers/hid/wacom_sys.c
+++ b/drivers/hid/wacom_sys.c
@@ -2579,7 +2579,9 @@ static void wacom_remove(struct hid_device *hdev)
 
 	/* make sure we don't trigger the LEDs */
 	wacom_led_groups_release(wacom);
-	wacom_release_resources(wacom);
+
+	if (wacom->wacom_wac.features.type != REMOTE)
+		wacom_release_resources(wacom);
 
 	hid_set_drvdata(hdev, NULL);
 }

From deaba636997557fce46ca7bcb509bff5ea1b0558 Mon Sep 17 00:00:00 2001
From: Oscar Campos <oscar.campos@member.fsf.org>
Date: Fri, 10 Feb 2017 18:23:00 +0000
Subject: [PATCH 1549/1911] HID: corsair: support for K65-K70 Rapidfire and
 Scimitar Pro RGB

Add quirks for several corsair gaming devices to avoid long delays on
report initialization

Supported devices:

 - Corsair K65RGB Rapidfire Gaming Keyboard
 - Corsair K70RGB Rapidfire Gaming Keyboard
 - Corsair Scimitar Pro RGB Gaming Mouse

Signed-off-by: Oscar Campos <oscar.campos@member.fsf.org>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-ids.h           | 3 +++
 drivers/hid/usbhid/hid-quirks.c | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index b3df60da029711..0e2e7c571d2261 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -278,6 +278,9 @@
 #define USB_DEVICE_ID_CORSAIR_K70RGB    0x1b13
 #define USB_DEVICE_ID_CORSAIR_STRAFE    0x1b15
 #define USB_DEVICE_ID_CORSAIR_K65RGB    0x1b17
+#define USB_DEVICE_ID_CORSAIR_K70RGB_RAPIDFIRE  0x1b38
+#define USB_DEVICE_ID_CORSAIR_K65RGB_RAPIDFIRE  0x1b39
+#define USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB  0x1b3e
 
 #define USB_VENDOR_ID_CREATIVELABS	0x041e
 #define USB_DEVICE_ID_CREATIVE_SB_OMNI_SURROUND_51	0x322c
diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c
index d6847a66444652..a69a3c88ab29f5 100644
--- a/drivers/hid/usbhid/hid-quirks.c
+++ b/drivers/hid/usbhid/hid-quirks.c
@@ -80,6 +80,9 @@ static const struct hid_blacklist {
 	{ USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K70RGB, HID_QUIRK_NO_INIT_REPORTS },
 	{ USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K65RGB, HID_QUIRK_NO_INIT_REPORTS },
 	{ USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_STRAFE, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL },
+	{ USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K70RGB_RAPIDFIRE, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL },
+	{ USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K65RGB_RAPIDFIRE, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL },
+	{ USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL },
 	{ USB_VENDOR_ID_CREATIVELABS, USB_DEVICE_ID_CREATIVE_SB_OMNI_SURROUND_51, HID_QUIRK_NOGET },
 	{ USB_VENDOR_ID_DMI, USB_DEVICE_ID_DMI_ENC, HID_QUIRK_NOGET },
 	{ USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_WIIU, HID_QUIRK_MULTI_INPUT },

From 01adc47e885f1127b29d76d0dfb21d8262f9d6b4 Mon Sep 17 00:00:00 2001
From: Oscar Campos <oscar.campos@member.fsf.org>
Date: Mon, 6 Mar 2017 21:02:39 +0000
Subject: [PATCH 1550/1911] HID: corsair: Add driver Scimitar Pro RGB gaming
 mouse 1b1c:1b3e support to hid-corsair

This mouse sold by Corsair as Scimitar PRO RGB defines two consecutive
Logical Minimum items in its Application (Consumer.0001) report making
it non parseable. This patch fixes the report descriptor overriding
byte 77 in rdesc from 0x16 (Logical Minimum with 16 bits value) to 0x26
(Logical Maximum with 16 bits value).

Signed-off-by: Oscar Campos <oscar.campos@member.fsf.org>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/Kconfig       |  1 +
 drivers/hid/hid-core.c    |  1 +
 drivers/hid/hid-corsair.c | 47 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 49 insertions(+)

diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig
index 8eab3200ac9a69..8c54cb8f5d6d10 100644
--- a/drivers/hid/Kconfig
+++ b/drivers/hid/Kconfig
@@ -190,6 +190,7 @@ config HID_CORSAIR
 
 	Supported devices:
 	- Vengeance K90
+	- Scimitar PRO RGB
 
 config HID_PRODIKEYS
 	tristate "Prodikeys PC-MIDI Keyboard support"
diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index ae01ae601d74e6..3ceb4a2af381f0 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -1870,6 +1870,7 @@ static const struct hid_device_id hid_have_special_driver[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_AK1D) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_ACER_SWITCH12) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K90) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CREATIVELABS, USB_DEVICE_ID_PRODIKEYS_PCMIDI) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CYGNAL, USB_DEVICE_ID_CYGNAL_CP2112) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_BARCODE_1) },
diff --git a/drivers/hid/hid-corsair.c b/drivers/hid/hid-corsair.c
index c0303f61c26a94..9ba5d98a118042 100644
--- a/drivers/hid/hid-corsair.c
+++ b/drivers/hid/hid-corsair.c
@@ -3,8 +3,10 @@
  *
  * Supported devices:
  *  - Vengeance K90 Keyboard
+ *  - Scimitar PRO RGB Gaming Mouse
  *
  * Copyright (c) 2015 Clement Vuchener
+ * Copyright (c) 2017 Oscar Campos
  */
 
 /*
@@ -670,10 +672,51 @@ static int corsair_input_mapping(struct hid_device *dev,
 	return 0;
 }
 
+/*
+ * The report descriptor of Corsair Scimitar RGB Pro gaming mouse is
+ * non parseable as they define two consecutive Logical Minimum for
+ * the Usage Page (Consumer) in rdescs bytes 75 and 77 being 77 0x16
+ * that should be obviousy 0x26 for Logical Magimum of 16 bits. This
+ * prevents poper parsing of the report descriptor due Logical
+ * Minimum being larger than Logical Maximum.
+ *
+ * This driver fixes the report descriptor for:
+ * - USB ID b1c:1b3e, sold as Scimitar RGB Pro Gaming mouse
+ */
+
+static __u8 *corsair_mouse_report_fixup(struct hid_device *hdev, __u8 *rdesc,
+        unsigned int *rsize)
+{
+	struct usb_interface *intf = to_usb_interface(hdev->dev.parent);
+
+	if (intf->cur_altsetting->desc.bInterfaceNumber == 1) {
+		/*
+		 * Corsair Scimitar RGB Pro report descriptor is broken and
+		 * defines two different Logical Minimum for the Consumer
+		 * Application. The byte 77 should be a 0x26 defining a 16
+		 * bits integer for the Logical Maximum but it is a 0x16
+		 * instead (Logical Minimum)
+		 */
+		switch (hdev->product) {
+		case USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB:
+			if (*rsize >= 172 && rdesc[75] == 0x15 && rdesc[77] == 0x16
+			&& rdesc[78] == 0xff && rdesc[79] == 0x0f) {
+				hid_info(hdev, "Fixing up report descriptor\n");
+				rdesc[77] = 0x26;
+			}
+			break;
+		}
+
+	}
+	return rdesc;
+}
+
 static const struct hid_device_id corsair_devices[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K90),
 		.driver_data = CORSAIR_USE_K90_MACRO |
 			       CORSAIR_USE_K90_BACKLIGHT },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR,
+            USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB) },
 	{}
 };
 
@@ -686,10 +729,14 @@ static struct hid_driver corsair_driver = {
 	.event = corsair_event,
 	.remove = corsair_remove,
 	.input_mapping = corsair_input_mapping,
+	.report_fixup = corsair_mouse_report_fixup,
 };
 
 module_hid_driver(corsair_driver);
 
 MODULE_LICENSE("GPL");
+/* Original K90 driver author */
 MODULE_AUTHOR("Clement Vuchener");
+/* Scimitar PRO RGB driver author */
+MODULE_AUTHOR("Oscar Campos");
 MODULE_DESCRIPTION("HID driver for Corsair devices");

From 6e5364f5f472d4ea66d37459e299071b0190362c Mon Sep 17 00:00:00 2001
From: Ping Cheng <pinglinux@gmail.com>
Date: Tue, 14 Mar 2017 17:08:16 -0700
Subject: [PATCH 1551/1911] HID: wacom: generic: Wacom mouse is only provided
 for opaque tablets

Commit f85c9dc ("Support tool ID and additional tool types") introduced mouse
and lens cursor tools to generic codepath, which covers both display (direct)
and opaque tablets (indirect devices). However, mouse and lens cursor tools are
only provided for opaque tablets. This patch ignores mouse and lens cursor tools
if the device is a display tablet.

Signed-off-by: Ping Cheng <ping.cheng@wacom.com>
Reviewed-by: Jason Gerecke <jason.gerecke@wacom.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/wacom_wac.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c
index 3d864466d473b1..94250c293be2a1 100644
--- a/drivers/hid/wacom_wac.c
+++ b/drivers/hid/wacom_wac.c
@@ -1959,8 +1959,10 @@ static void wacom_wac_pen_usage_mapping(struct hid_device *hdev,
 		input_set_capability(input, EV_KEY, BTN_TOOL_BRUSH);
 		input_set_capability(input, EV_KEY, BTN_TOOL_PENCIL);
 		input_set_capability(input, EV_KEY, BTN_TOOL_AIRBRUSH);
-		input_set_capability(input, EV_KEY, BTN_TOOL_MOUSE);
-		input_set_capability(input, EV_KEY, BTN_TOOL_LENS);
+		if (!(features->device_type & WACOM_DEVICETYPE_DIRECT)) {
+			input_set_capability(input, EV_KEY, BTN_TOOL_MOUSE);
+			input_set_capability(input, EV_KEY, BTN_TOOL_LENS);
+		}
 		break;
 	case WACOM_HID_WD_FINGERWHEEL:
 		wacom_map_usage(input, usage, field, EV_ABS, ABS_WHEEL, 0);

From ae5c682113f9f94cc5e76f92cf041ee624c173ee Mon Sep 17 00:00:00 2001
From: Liping Zhang <zlpnobody@gmail.com>
Date: Sun, 19 Mar 2017 22:35:59 +0800
Subject: [PATCH 1552/1911] netfilter: nfnl_cthelper: fix incorrect
 helper->expect_class_max

The helper->expect_class_max must be set to the total number of
expect_policy minus 1, since we will use the statement "if (class >
helper->expect_class_max)" to validate the CTA_EXPECT_CLASS attr in
ctnetlink_alloc_expect.

So for compatibility, set the helper->expect_class_max to the
NFCTH_POLICY_SET_NUM attr's value minus 1.

Also: it's invalid when the NFCTH_POLICY_SET_NUM attr's value is zero.
1. this will result "expect_policy = kzalloc(0, GFP_KERNEL);";
2. we cannot set the helper->expect_class_max to a proper value.

So if nla_get_be32(tb[NFCTH_POLICY_SET_NUM]) is zero, report -EINVAL to
the userspace.

Signed-off-by: Liping Zhang <zlpnobody@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nfnetlink_cthelper.c | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index de8782345c8637..3cd41d10540749 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -161,6 +161,7 @@ nfnl_cthelper_parse_expect_policy(struct nf_conntrack_helper *helper,
 	int i, ret;
 	struct nf_conntrack_expect_policy *expect_policy;
 	struct nlattr *tb[NFCTH_POLICY_SET_MAX+1];
+	unsigned int class_max;
 
 	ret = nla_parse_nested(tb, NFCTH_POLICY_SET_MAX, attr,
 			       nfnl_cthelper_expect_policy_set);
@@ -170,19 +171,18 @@ nfnl_cthelper_parse_expect_policy(struct nf_conntrack_helper *helper,
 	if (!tb[NFCTH_POLICY_SET_NUM])
 		return -EINVAL;
 
-	helper->expect_class_max =
-		ntohl(nla_get_be32(tb[NFCTH_POLICY_SET_NUM]));
-
-	if (helper->expect_class_max != 0 &&
-	    helper->expect_class_max > NF_CT_MAX_EXPECT_CLASSES)
+	class_max = ntohl(nla_get_be32(tb[NFCTH_POLICY_SET_NUM]));
+	if (class_max == 0)
+		return -EINVAL;
+	if (class_max > NF_CT_MAX_EXPECT_CLASSES)
 		return -EOVERFLOW;
 
 	expect_policy = kzalloc(sizeof(struct nf_conntrack_expect_policy) *
-				helper->expect_class_max, GFP_KERNEL);
+				class_max, GFP_KERNEL);
 	if (expect_policy == NULL)
 		return -ENOMEM;
 
-	for (i=0; i<helper->expect_class_max; i++) {
+	for (i = 0; i < class_max; i++) {
 		if (!tb[NFCTH_POLICY_SET+i])
 			goto err;
 
@@ -191,6 +191,8 @@ nfnl_cthelper_parse_expect_policy(struct nf_conntrack_helper *helper,
 		if (ret < 0)
 			goto err;
 	}
+
+	helper->expect_class_max = class_max - 1;
 	helper->expect_policy = expect_policy;
 	return 0;
 err:
@@ -377,10 +379,10 @@ nfnl_cthelper_dump_policy(struct sk_buff *skb,
 		goto nla_put_failure;
 
 	if (nla_put_be32(skb, NFCTH_POLICY_SET_NUM,
-			 htonl(helper->expect_class_max)))
+			 htonl(helper->expect_class_max + 1)))
 		goto nla_put_failure;
 
-	for (i=0; i<helper->expect_class_max; i++) {
+	for (i = 0; i < helper->expect_class_max + 1; i++) {
 		nest_parms2 = nla_nest_start(skb,
 				(NFCTH_POLICY_SET+i) | NLA_F_NESTED);
 		if (nest_parms2 == NULL)

From 3d3d18f086cdda72ee18a454db70ca72c6e3246c Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue, 21 Mar 2017 14:45:31 +0000
Subject: [PATCH 1553/1911] drm/i915: Avoid rcu_barrier() from reclaim paths
 (shrinker)

The rcu_barrier() takes the cpu_hotplug mutex which itself is not
reclaim-safe, and so rcu_barrier() is illegal from inside the shrinker.

[  309.661373] =========================================================
[  309.661376] [ INFO: possible irq lock inversion dependency detected ]
[  309.661380] 4.11.0-rc1-CI-CI_DRM_2333+ #1 Tainted: G        W
[  309.661383] ---------------------------------------------------------
[  309.661386] gem_exec_gttfil/6435 just changed the state of lock:
[  309.661389]  (rcu_preempt_state.barrier_mutex){+.+.-.}, at: [<ffffffff81100731>] _rcu_barrier+0x31/0x160
[  309.661399] but this lock took another, RECLAIM_FS-unsafe lock in the past:
[  309.661402]  (cpu_hotplug.lock){+.+.+.}
[  309.661404]

               and interrupts could create inverse lock ordering between them.

[  309.661410]
               other info that might help us debug this:
[  309.661414]  Possible interrupt unsafe locking scenario:

[  309.661417]        CPU0                    CPU1
[  309.661419]        ----                    ----
[  309.661421]   lock(cpu_hotplug.lock);
[  309.661425]                                local_irq_disable();
[  309.661432]                                lock(rcu_preempt_state.barrier_mutex);
[  309.661441]                                lock(cpu_hotplug.lock);
[  309.661446]   <Interrupt>
[  309.661448]     lock(rcu_preempt_state.barrier_mutex);
[  309.661453]
                *** DEADLOCK ***

[  309.661460] 4 locks held by gem_exec_gttfil/6435:
[  309.661464]  #0:  (sb_writers#10){.+.+.+}, at: [<ffffffff8120d83d>] vfs_write+0x17d/0x1f0
[  309.661475]  #1:  (debugfs_srcu){......}, at: [<ffffffff81320491>] debugfs_use_file_start+0x41/0xa0
[  309.661486]  #2:  (&attr->mutex){+.+.+.}, at: [<ffffffff8123a3e7>] simple_attr_write+0x37/0xe0
[  309.661495]  #3:  (&dev->struct_mutex){+.+.+.}, at: [<ffffffffa0091b4a>] i915_drop_caches_set+0x3a/0x150 [i915]
[  309.661540]
               the shortest dependencies between 2nd lock and 1st lock:
[  309.661547]  -> (cpu_hotplug.lock){+.+.+.} ops: 829 {
[  309.661553]     HARDIRQ-ON-W at:
[  309.661560]                       __lock_acquire+0x5e5/0x1b50
[  309.661565]                       lock_acquire+0xc9/0x220
[  309.661572]                       __mutex_lock+0x6e/0x990
[  309.661576]                       mutex_lock_nested+0x16/0x20
[  309.661583]                       get_online_cpus+0x61/0x80
[  309.661590]                       kmem_cache_create+0x25/0x1d0
[  309.661596]                       debug_objects_mem_init+0x30/0x249
[  309.661602]                       start_kernel+0x341/0x3fe
[  309.661607]                       x86_64_start_reservations+0x2a/0x2c
[  309.661612]                       x86_64_start_kernel+0x173/0x186
[  309.661619]                       verify_cpu+0x0/0xfc
[  309.661622]     SOFTIRQ-ON-W at:
[  309.661627]                       __lock_acquire+0x611/0x1b50
[  309.661632]                       lock_acquire+0xc9/0x220
[  309.661636]                       __mutex_lock+0x6e/0x990
[  309.661641]                       mutex_lock_nested+0x16/0x20
[  309.661646]                       get_online_cpus+0x61/0x80
[  309.661650]                       kmem_cache_create+0x25/0x1d0
[  309.661655]                       debug_objects_mem_init+0x30/0x249
[  309.661660]                       start_kernel+0x341/0x3fe
[  309.661664]                       x86_64_start_reservations+0x2a/0x2c
[  309.661669]                       x86_64_start_kernel+0x173/0x186
[  309.661674]                       verify_cpu+0x0/0xfc
[  309.661677]     RECLAIM_FS-ON-W at:
[  309.661682]                          mark_held_locks+0x6f/0xa0
[  309.661687]                          lockdep_trace_alloc+0xb3/0x100
[  309.661693]                          kmem_cache_alloc_trace+0x31/0x2e0
[  309.661699]                          __smpboot_create_thread.part.1+0x27/0xe0
[  309.661704]                          smpboot_create_threads+0x61/0x90
[  309.661709]                          cpuhp_invoke_callback+0x9c/0x8a0
[  309.661713]                          cpuhp_up_callbacks+0x31/0xb0
[  309.661718]                          _cpu_up+0x7a/0xc0
[  309.661723]                          do_cpu_up+0x5f/0x80
[  309.661727]                          cpu_up+0xe/0x10
[  309.661734]                          smp_init+0x71/0xb3
[  309.661738]                          kernel_init_freeable+0x94/0x19e
[  309.661743]                          kernel_init+0x9/0xf0
[  309.661748]                          ret_from_fork+0x2e/0x40
[  309.661752]     INITIAL USE at:
[  309.661757]                      __lock_acquire+0x234/0x1b50
[  309.661761]                      lock_acquire+0xc9/0x220
[  309.661766]                      __mutex_lock+0x6e/0x990
[  309.661771]                      mutex_lock_nested+0x16/0x20
[  309.661775]                      get_online_cpus+0x61/0x80
[  309.661780]                      __cpuhp_setup_state+0x44/0x170
[  309.661785]                      page_alloc_init+0x23/0x3a
[  309.661790]                      start_kernel+0x124/0x3fe
[  309.661794]                      x86_64_start_reservations+0x2a/0x2c
[  309.661799]                      x86_64_start_kernel+0x173/0x186
[  309.661804]                      verify_cpu+0x0/0xfc
[  309.661807]   }
[  309.661813]   ... key      at: [<ffffffff81e37690>] cpu_hotplug+0xb0/0x100
[  309.661817]   ... acquired at:
[  309.661821]    lock_acquire+0xc9/0x220
[  309.661825]    __mutex_lock+0x6e/0x990
[  309.661829]    mutex_lock_nested+0x16/0x20
[  309.661833]    get_online_cpus+0x61/0x80
[  309.661837]    _rcu_barrier+0x9f/0x160
[  309.661841]    rcu_barrier+0x10/0x20
[  309.661847]    netdev_run_todo+0x5f/0x310
[  309.661852]    rtnl_unlock+0x9/0x10
[  309.661856]    default_device_exit_batch+0x133/0x150
[  309.661862]    ops_exit_list.isra.0+0x4d/0x60
[  309.661866]    cleanup_net+0x1d8/0x2c0
[  309.661872]    process_one_work+0x1f4/0x6d0
[  309.661876]    worker_thread+0x49/0x4a0
[  309.661881]    kthread+0x107/0x140
[  309.661884]    ret_from_fork+0x2e/0x40

[  309.661890] -> (rcu_preempt_state.barrier_mutex){+.+.-.} ops: 179 {
[  309.661896]    HARDIRQ-ON-W at:
[  309.661901]                     __lock_acquire+0x5e5/0x1b50
[  309.661905]                     lock_acquire+0xc9/0x220
[  309.661910]                     __mutex_lock+0x6e/0x990
[  309.661914]                     mutex_lock_nested+0x16/0x20
[  309.661919]                     _rcu_barrier+0x31/0x160
[  309.661923]                     rcu_barrier+0x10/0x20
[  309.661928]                     netdev_run_todo+0x5f/0x310
[  309.661932]                     rtnl_unlock+0x9/0x10
[  309.661936]                     default_device_exit_batch+0x133/0x150
[  309.661941]                     ops_exit_list.isra.0+0x4d/0x60
[  309.661946]                     cleanup_net+0x1d8/0x2c0
[  309.661951]                     process_one_work+0x1f4/0x6d0
[  309.661955]                     worker_thread+0x49/0x4a0
[  309.661960]                     kthread+0x107/0x140
[  309.661964]                     ret_from_fork+0x2e/0x40
[  309.661968]    SOFTIRQ-ON-W at:
[  309.661972]                     __lock_acquire+0x611/0x1b50
[  309.661977]                     lock_acquire+0xc9/0x220
[  309.661981]                     __mutex_lock+0x6e/0x990
[  309.661986]                     mutex_lock_nested+0x16/0x20
[  309.661990]                     _rcu_barrier+0x31/0x160
[  309.661995]                     rcu_barrier+0x10/0x20
[  309.661999]                     netdev_run_todo+0x5f/0x310
[  309.662003]                     rtnl_unlock+0x9/0x10
[  309.662008]                     default_device_exit_batch+0x133/0x150
[  309.662013]                     ops_exit_list.isra.0+0x4d/0x60
[  309.662017]                     cleanup_net+0x1d8/0x2c0
[  309.662022]                     process_one_work+0x1f4/0x6d0
[  309.662027]                     worker_thread+0x49/0x4a0
[  309.662031]                     kthread+0x107/0x140
[  309.662035]                     ret_from_fork+0x2e/0x40
[  309.662039]    IN-RECLAIM_FS-W at:
[  309.662043]                        __lock_acquire+0x638/0x1b50
[  309.662048]                        lock_acquire+0xc9/0x220
[  309.662053]                        __mutex_lock+0x6e/0x990
[  309.662058]                        mutex_lock_nested+0x16/0x20
[  309.662062]                        _rcu_barrier+0x31/0x160
[  309.662067]                        rcu_barrier+0x10/0x20
[  309.662089]                        i915_gem_shrink_all+0x33/0x40 [i915]
[  309.662109]                        i915_drop_caches_set+0x141/0x150 [i915]
[  309.662114]                        simple_attr_write+0xc7/0xe0
[  309.662119]                        full_proxy_write+0x4f/0x70
[  309.662124]                        __vfs_write+0x23/0x120
[  309.662128]                        vfs_write+0xc6/0x1f0
[  309.662133]                        SyS_write+0x44/0xb0
[  309.662138]                        entry_SYSCALL_64_fastpath+0x1c/0xb1
[  309.662142]    INITIAL USE at:
[  309.662147]                    __lock_acquire+0x234/0x1b50
[  309.662151]                    lock_acquire+0xc9/0x220
[  309.662156]                    __mutex_lock+0x6e/0x990
[  309.662160]                    mutex_lock_nested+0x16/0x20
[  309.662165]                    _rcu_barrier+0x31/0x160
[  309.662169]                    rcu_barrier+0x10/0x20
[  309.662174]                    netdev_run_todo+0x5f/0x310
[  309.662178]                    rtnl_unlock+0x9/0x10
[  309.662183]                    default_device_exit_batch+0x133/0x150
[  309.662188]                    ops_exit_list.isra.0+0x4d/0x60
[  309.662192]                    cleanup_net+0x1d8/0x2c0
[  309.662197]                    process_one_work+0x1f4/0x6d0
[  309.662202]                    worker_thread+0x49/0x4a0
[  309.662206]                    kthread+0x107/0x140
[  309.662210]                    ret_from_fork+0x2e/0x40
[  309.662214]  }
[  309.662220]  ... key      at: [<ffffffff81e4e1c8>] rcu_preempt_state+0x508/0x780
[  309.662225]  ... acquired at:
[  309.662229]    check_usage_forwards+0x12b/0x130
[  309.662233]    mark_lock+0x360/0x6f0
[  309.662237]    __lock_acquire+0x638/0x1b50
[  309.662241]    lock_acquire+0xc9/0x220
[  309.662245]    __mutex_lock+0x6e/0x990
[  309.662249]    mutex_lock_nested+0x16/0x20
[  309.662253]    _rcu_barrier+0x31/0x160
[  309.662257]    rcu_barrier+0x10/0x20
[  309.662279]    i915_gem_shrink_all+0x33/0x40 [i915]
[  309.662298]    i915_drop_caches_set+0x141/0x150 [i915]
[  309.662303]    simple_attr_write+0xc7/0xe0
[  309.662307]    full_proxy_write+0x4f/0x70
[  309.662311]    __vfs_write+0x23/0x120
[  309.662315]    vfs_write+0xc6/0x1f0
[  309.662319]    SyS_write+0x44/0xb0
[  309.662323]    entry_SYSCALL_64_fastpath+0x1c/0xb1

[  309.662329]
               stack backtrace:
[  309.662335] CPU: 1 PID: 6435 Comm: gem_exec_gttfil Tainted: G        W       4.11.0-rc1-CI-CI_DRM_2333+ #1
[  309.662342] Hardware name: Hewlett-Packard HP Compaq 8100 Elite SFF PC/304Ah, BIOS 786H1 v01.13 07/14/2011
[  309.662348] Call Trace:
[  309.662354]  dump_stack+0x67/0x92
[  309.662359]  print_irq_inversion_bug.part.19+0x1a4/0x1b0
[  309.662365]  check_usage_forwards+0x12b/0x130
[  309.662369]  mark_lock+0x360/0x6f0
[  309.662374]  ? print_shortest_lock_dependencies+0x1a0/0x1a0
[  309.662379]  __lock_acquire+0x638/0x1b50
[  309.662383]  ? __mutex_unlock_slowpath+0x3e/0x2e0
[  309.662388]  ? trace_hardirqs_on+0xd/0x10
[  309.662392]  ? _rcu_barrier+0x31/0x160
[  309.662396]  lock_acquire+0xc9/0x220
[  309.662400]  ? _rcu_barrier+0x31/0x160
[  309.662404]  ? _rcu_barrier+0x31/0x160
[  309.662409]  __mutex_lock+0x6e/0x990
[  309.662412]  ? _rcu_barrier+0x31/0x160
[  309.662416]  ? _rcu_barrier+0x31/0x160
[  309.662421]  ? synchronize_rcu_expedited+0x35/0xb0
[  309.662426]  ? _raw_spin_unlock_irqrestore+0x52/0x60
[  309.662434]  mutex_lock_nested+0x16/0x20
[  309.662438]  _rcu_barrier+0x31/0x160
[  309.662442]  rcu_barrier+0x10/0x20
[  309.662464]  i915_gem_shrink_all+0x33/0x40 [i915]
[  309.662484]  i915_drop_caches_set+0x141/0x150 [i915]
[  309.662489]  simple_attr_write+0xc7/0xe0
[  309.662494]  full_proxy_write+0x4f/0x70
[  309.662498]  __vfs_write+0x23/0x120
[  309.662503]  ? rcu_read_lock_sched_held+0x75/0x80
[  309.662507]  ? rcu_sync_lockdep_assert+0x2a/0x50
[  309.662512]  ? __sb_start_write+0x102/0x210
[  309.662516]  ? vfs_write+0x17d/0x1f0
[  309.662520]  vfs_write+0xc6/0x1f0
[  309.662524]  ? trace_hardirqs_on_caller+0xe7/0x200
[  309.662529]  SyS_write+0x44/0xb0
[  309.662533]  entry_SYSCALL_64_fastpath+0x1c/0xb1
[  309.662537] RIP: 0033:0x7f507eac24a0
[  309.662541] RSP: 002b:00007fffda8720e8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
[  309.662548] RAX: ffffffffffffffda RBX: ffffffff81482bd3 RCX: 00007f507eac24a0
[  309.662552] RDX: 0000000000000005 RSI: 00007fffda8720f0 RDI: 0000000000000005
[  309.662557] RBP: ffffc9000048bf88 R08: 0000000000000000 R09: 000000000000002c
[  309.662561] R10: 0000000000000014 R11: 0000000000000246 R12: 00007fffda872230
[  309.662566] R13: 00007fffda872228 R14: 0000000000000201 R15: 00007fffda8720f0
[  309.662572]  ? __this_cpu_preempt_check+0x13/0x20

Fixes: 0eafec6d3244 ("drm/i915: Enable lockless lookup of request tracking via RCU")
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100192
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: <stable@vger.kernel.org> # v4.9+
Link: http://patchwork.freedesktop.org/patch/msgid/20170314115019.18127-1-chris@chris-wilson.co.uk
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
(cherry picked from commit bd784b7cc41af7a19cfb705fa6d800e511c4ab02)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170321144531.12344-1-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/i915_gem_shrinker.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index 401006b4c6a36b..d5d2b4c6ed382d 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -263,7 +263,7 @@ unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv)
 				I915_SHRINK_BOUND |
 				I915_SHRINK_UNBOUND |
 				I915_SHRINK_ACTIVE);
-	rcu_barrier(); /* wait until our RCU delayed slab frees are completed */
+	synchronize_rcu(); /* wait for our earlier RCU delayed slab frees */
 
 	return freed;
 }

From 590379aef2e3fb7d00a093ed556c0a2714f86916 Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@intel.com>
Date: Tue, 21 Mar 2017 14:47:20 +0000
Subject: [PATCH 1554/1911] drm/i915: make context status notifier head be per
 engine

GVTg has introduced the context status notifier to schedule the GVTg
workload. At that time, the notifier is bound to GVTg context only,
so GVTg is not aware of host workloads.

Now we are going to improve GVTg's guest workload scheduler policy,
and add Guc emulation support for new Gen graphics. Both these two
features require acknowledgment for all contexts running on hardware.
(But will not alter host workload.) So here try to make some change.

The change is simple:
  1. Move the context status notifier head from i915_gem_context to
     intel_engine_cs. Which means there is a notifier head per engine
     instead of per context. Execlist driver still call notifier for
     each context sched-in/out events of current engine.
  2. At GVTg side, it binds a notifier_block for each physical engine
     at GVTg initialization period. Then GVTg can hear all context
     status events.

In this patch, GVTg do nothing for host context event, but later
will add a function there. But in any case, the notifier callback is
a noop if this is no active vGPU.

Since intel_gvt_init() is called at early initialization stage and
require the status notifier head has been initiated, I initiate it in
intel_engine_setup().

v2: remove a redundant newline. (chris)

Fixes: 3c7ba6359d70 ("drm/i915: Introduce execlist context status change notification")
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100232
Signed-off-by: Changbin Du <changbin.du@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: Zhi Wang <zhi.a.wang@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: http://patchwork.freedesktop.org/patch/msgid/20170313024711.28591-1-changbin.du@intel.com
Acked-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
(cherry picked from commit 3fc03069bc6e6c316f19bb526e3c8ce784677477)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170321144720.17020-1-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/gvt/gvt.h          |  2 +-
 drivers/gpu/drm/i915/gvt/scheduler.c    | 45 +++++++++++--------------
 drivers/gpu/drm/i915/i915_gem_context.c |  1 -
 drivers/gpu/drm/i915/i915_gem_context.h |  3 --
 drivers/gpu/drm/i915/intel_engine_cs.c  |  2 ++
 drivers/gpu/drm/i915/intel_lrc.c        |  3 +-
 drivers/gpu/drm/i915/intel_ringbuffer.h |  3 ++
 7 files changed, 27 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
index 23791920ced1ee..6dfc48b63b718b 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.h
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@@ -162,7 +162,6 @@ struct intel_vgpu {
 	atomic_t running_workload_num;
 	DECLARE_BITMAP(tlb_handle_pending, I915_NUM_ENGINES);
 	struct i915_gem_context *shadow_ctx;
-	struct notifier_block shadow_ctx_notifier_block;
 
 #if IS_ENABLED(CONFIG_DRM_I915_GVT_KVMGT)
 	struct {
@@ -233,6 +232,7 @@ struct intel_gvt {
 	struct intel_gvt_gtt gtt;
 	struct intel_gvt_opregion opregion;
 	struct intel_gvt_workload_scheduler scheduler;
+	struct notifier_block shadow_ctx_notifier_block[I915_NUM_ENGINES];
 	DECLARE_HASHTABLE(cmd_table, GVT_CMD_HASH_BITS);
 	struct intel_vgpu_type *types;
 	unsigned int num_types;
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 39a83eb7aeccaf..c4353ed86d4b30 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -130,12 +130,10 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
 static int shadow_context_status_change(struct notifier_block *nb,
 		unsigned long action, void *data)
 {
-	struct intel_vgpu *vgpu = container_of(nb,
-			struct intel_vgpu, shadow_ctx_notifier_block);
-	struct drm_i915_gem_request *req =
-		(struct drm_i915_gem_request *)data;
-	struct intel_gvt_workload_scheduler *scheduler =
-		&vgpu->gvt->scheduler;
+	struct drm_i915_gem_request *req = (struct drm_i915_gem_request *)data;
+	struct intel_gvt *gvt = container_of(nb, struct intel_gvt,
+				shadow_ctx_notifier_block[req->engine->id]);
+	struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
 	struct intel_vgpu_workload *workload =
 		scheduler->current_workload[req->engine->id];
 
@@ -534,15 +532,16 @@ void intel_gvt_wait_vgpu_idle(struct intel_vgpu *vgpu)
 void intel_gvt_clean_workload_scheduler(struct intel_gvt *gvt)
 {
 	struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
-	int i;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id i;
 
 	gvt_dbg_core("clean workload scheduler\n");
 
-	for (i = 0; i < I915_NUM_ENGINES; i++) {
-		if (scheduler->thread[i]) {
-			kthread_stop(scheduler->thread[i]);
-			scheduler->thread[i] = NULL;
-		}
+	for_each_engine(engine, gvt->dev_priv, i) {
+		atomic_notifier_chain_unregister(
+					&engine->context_status_notifier,
+					&gvt->shadow_ctx_notifier_block[i]);
+		kthread_stop(scheduler->thread[i]);
 	}
 }
 
@@ -550,18 +549,15 @@ int intel_gvt_init_workload_scheduler(struct intel_gvt *gvt)
 {
 	struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
 	struct workload_thread_param *param = NULL;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id i;
 	int ret;
-	int i;
 
 	gvt_dbg_core("init workload scheduler\n");
 
 	init_waitqueue_head(&scheduler->workload_complete_wq);
 
-	for (i = 0; i < I915_NUM_ENGINES; i++) {
-		/* check ring mask at init time */
-		if (!HAS_ENGINE(gvt->dev_priv, i))
-			continue;
-
+	for_each_engine(engine, gvt->dev_priv, i) {
 		init_waitqueue_head(&scheduler->waitq[i]);
 
 		param = kzalloc(sizeof(*param), GFP_KERNEL);
@@ -580,6 +576,11 @@ int intel_gvt_init_workload_scheduler(struct intel_gvt *gvt)
 			ret = PTR_ERR(scheduler->thread[i]);
 			goto err;
 		}
+
+		gvt->shadow_ctx_notifier_block[i].notifier_call =
+					shadow_context_status_change;
+		atomic_notifier_chain_register(&engine->context_status_notifier,
+					&gvt->shadow_ctx_notifier_block[i]);
 	}
 	return 0;
 err:
@@ -591,9 +592,6 @@ int intel_gvt_init_workload_scheduler(struct intel_gvt *gvt)
 
 void intel_vgpu_clean_gvt_context(struct intel_vgpu *vgpu)
 {
-	atomic_notifier_chain_unregister(&vgpu->shadow_ctx->status_notifier,
-			&vgpu->shadow_ctx_notifier_block);
-
 	i915_gem_context_put_unlocked(vgpu->shadow_ctx);
 }
 
@@ -608,10 +606,5 @@ int intel_vgpu_init_gvt_context(struct intel_vgpu *vgpu)
 
 	vgpu->shadow_ctx->engine[RCS].initialised = true;
 
-	vgpu->shadow_ctx_notifier_block.notifier_call =
-		shadow_context_status_change;
-
-	atomic_notifier_chain_register(&vgpu->shadow_ctx->status_notifier,
-				       &vgpu->shadow_ctx_notifier_block);
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 17f90c6182081c..e2d83b6d376b03 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -311,7 +311,6 @@ __create_hw_context(struct drm_i915_private *dev_priv,
 	ctx->ring_size = 4 * PAGE_SIZE;
 	ctx->desc_template = GEN8_CTX_ADDRESSING_MODE(dev_priv) <<
 			     GEN8_CTX_ADDRESSING_MODE_SHIFT;
-	ATOMIC_INIT_NOTIFIER_HEAD(&ctx->status_notifier);
 
 	/* GuC requires the ring to be placed above GUC_WOPCM_TOP. If GuC is not
 	 * present or not in use we still need a small bias as ring wraparound
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index 0ac750b90f3d33..e9c008fe14b1d7 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -160,9 +160,6 @@ struct i915_gem_context {
 	/** desc_template: invariant fields for the HW context descriptor */
 	u32 desc_template;
 
-	/** status_notifier: list of callbacks for context-switch changes */
-	struct atomic_notifier_head status_notifier;
-
 	/** guilty_count: How many times this context has caused a GPU hang. */
 	unsigned int guilty_count;
 	/**
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 371acf109e3432..ab1be5c80ea596 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -105,6 +105,8 @@ intel_engine_setup(struct drm_i915_private *dev_priv,
 	/* Nothing to do here, execute in order of dependencies */
 	engine->schedule = NULL;
 
+	ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier);
+
 	dev_priv->engine[id] = engine;
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index ebf8023d21e6fb..471af3b480adc3 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -345,7 +345,8 @@ execlists_context_status_change(struct drm_i915_gem_request *rq,
 	if (!IS_ENABLED(CONFIG_DRM_I915_GVT))
 		return;
 
-	atomic_notifier_call_chain(&rq->ctx->status_notifier, status, rq);
+	atomic_notifier_call_chain(&rq->engine->context_status_notifier,
+				   status, rq);
 }
 
 static void
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 79c2b8d72322cf..13dccb18cd43ed 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -403,6 +403,9 @@ struct intel_engine_cs {
 	 */
 	struct i915_gem_context *legacy_active_context;
 
+	/* status_notifier: list of callbacks for context-switch changes */
+	struct atomic_notifier_head context_status_notifier;
+
 	struct intel_engine_hangcheck hangcheck;
 
 	bool needs_cmd_parser;

From 5b52330bbfe63b3305765354d6046c9f7f89c011 Mon Sep 17 00:00:00 2001
From: Paul Moore <paul@paul-moore.com>
Date: Tue, 21 Mar 2017 11:26:35 -0400
Subject: [PATCH 1555/1911] audit: fix auditd/kernel connection state tracking

What started as a rather straightforward race condition reported by
Dmitry using the syzkaller fuzzer ended up revealing some major
problems with how the audit subsystem managed its netlink sockets and
its connection with the userspace audit daemon.  Fixing this properly
had quite the cascading effect and what we are left with is this rather
large and complicated patch.  My initial goal was to try and decompose
this patch into multiple smaller patches, but the way these changes
are intertwined makes it difficult to split these changes into
meaningful pieces that don't break or somehow make things worse for
the intermediate states.

The patch makes a number of changes, but the most significant are
highlighted below:

* The auditd tracking variables, e.g. audit_sock, are now gone and
replaced by a RCU/spin_lock protected variable auditd_conn which is
a structure containing all of the auditd tracking information.

* We no longer track the auditd sock directly, instead we track it
via the network namespace in which it resides and we use the audit
socket associated with that namespace.  In spirit, this is what the
code was trying to do prior to this patch (at least I think that is
what the original authors intended), but it was done rather poorly
and added a layer of obfuscation that only masked the underlying
problems.

* Big backlog queue cleanup, again.  In v4.10 we made some pretty big
changes to how the audit backlog queues work, here we haven't changed
the queue design so much as cleaned up the implementation.  Brought
about by the locking changes, we've simplified kauditd_thread() quite
a bit by consolidating the queue handling into a new helper function,
kauditd_send_queue(), which allows us to eliminate a lot of very
similar code and makes the looping logic in kauditd_thread() clearer.

* All netlink messages sent to auditd are now sent via
auditd_send_unicast_skb().  Other than just making sense, this makes
the lock handling easier.

* Change the audit_log_start() sleep behavior so that we never sleep
on auditd events (unchanged) or if the caller is holding the
audit_cmd_mutex (changed).  Previously we didn't sleep if the caller
was auditd or if the message type fell between a certain range; the
type check was a poor effort of doing what the cmd_mutex check now
does.  Richard Guy Briggs originally proposed not sleeping the
cmd_mutex owner several years ago but his patch wasn't acceptable
at the time.  At least the idea lives on here.

* A problem with the lost record counter has been resolved.  Steve
Grubb and I both happened to notice this problem and according to
some quick testing by Steve, this problem goes back quite some time.
It's largely a harmless problem, although it may have left some
careful sysadmins quite puzzled.

Cc: <stable@vger.kernel.org> # 4.10.x-
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 kernel/audit.c   | 639 +++++++++++++++++++++++++++++------------------
 kernel/audit.h   |   9 +-
 kernel/auditsc.c |   6 +-
 3 files changed, 399 insertions(+), 255 deletions(-)

diff --git a/kernel/audit.c b/kernel/audit.c
index e794544f5e6333..2f4964cfde0b4f 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -54,6 +54,10 @@
 #include <linux/kthread.h>
 #include <linux/kernel.h>
 #include <linux/syscalls.h>
+#include <linux/spinlock.h>
+#include <linux/rcupdate.h>
+#include <linux/mutex.h>
+#include <linux/gfp.h>
 
 #include <linux/audit.h>
 
@@ -90,13 +94,34 @@ static u32	audit_default;
 /* If auditing cannot proceed, audit_failure selects what happens. */
 static u32	audit_failure = AUDIT_FAIL_PRINTK;
 
-/*
- * If audit records are to be written to the netlink socket, audit_pid
- * contains the pid of the auditd process and audit_nlk_portid contains
- * the portid to use to send netlink messages to that process.
+/* private audit network namespace index */
+static unsigned int audit_net_id;
+
+/**
+ * struct audit_net - audit private network namespace data
+ * @sk: communication socket
+ */
+struct audit_net {
+	struct sock *sk;
+};
+
+/**
+ * struct auditd_connection - kernel/auditd connection state
+ * @pid: auditd PID
+ * @portid: netlink portid
+ * @net: the associated network namespace
+ * @lock: spinlock to protect write access
+ *
+ * Description:
+ * This struct is RCU protected; you must either hold the RCU lock for reading
+ * or the included spinlock for writing.
  */
-int		audit_pid;
-static __u32	audit_nlk_portid;
+static struct auditd_connection {
+	int pid;
+	u32 portid;
+	struct net *net;
+	spinlock_t lock;
+} auditd_conn;
 
 /* If audit_rate_limit is non-zero, limit the rate of sending audit records
  * to that number per second.  This prevents DoS attacks, but results in
@@ -123,10 +148,6 @@ u32		audit_sig_sid = 0;
 */
 static atomic_t	audit_lost = ATOMIC_INIT(0);
 
-/* The netlink socket. */
-static struct sock *audit_sock;
-static unsigned int audit_net_id;
-
 /* Hash for inode-based rules */
 struct list_head audit_inode_hash[AUDIT_INODE_BUCKETS];
 
@@ -139,6 +160,7 @@ static LIST_HEAD(audit_freelist);
 
 /* queue msgs to send via kauditd_task */
 static struct sk_buff_head audit_queue;
+static void kauditd_hold_skb(struct sk_buff *skb);
 /* queue msgs due to temporary unicast send problems */
 static struct sk_buff_head audit_retry_queue;
 /* queue msgs waiting for new auditd connection */
@@ -192,6 +214,43 @@ struct audit_reply {
 	struct sk_buff *skb;
 };
 
+/**
+ * auditd_test_task - Check to see if a given task is an audit daemon
+ * @task: the task to check
+ *
+ * Description:
+ * Return 1 if the task is a registered audit daemon, 0 otherwise.
+ */
+int auditd_test_task(const struct task_struct *task)
+{
+	int rc;
+
+	rcu_read_lock();
+	rc = (auditd_conn.pid && task->tgid == auditd_conn.pid ? 1 : 0);
+	rcu_read_unlock();
+
+	return rc;
+}
+
+/**
+ * audit_get_sk - Return the audit socket for the given network namespace
+ * @net: the destination network namespace
+ *
+ * Description:
+ * Returns the sock pointer if valid, NULL otherwise.  The caller must ensure
+ * that a reference is held for the network namespace while the sock is in use.
+ */
+static struct sock *audit_get_sk(const struct net *net)
+{
+	struct audit_net *aunet;
+
+	if (!net)
+		return NULL;
+
+	aunet = net_generic(net, audit_net_id);
+	return aunet->sk;
+}
+
 static void audit_set_portid(struct audit_buffer *ab, __u32 portid)
 {
 	if (ab) {
@@ -210,9 +269,7 @@ void audit_panic(const char *message)
 			pr_err("%s\n", message);
 		break;
 	case AUDIT_FAIL_PANIC:
-		/* test audit_pid since printk is always losey, why bother? */
-		if (audit_pid)
-			panic("audit: %s\n", message);
+		panic("audit: %s\n", message);
 		break;
 	}
 }
@@ -370,21 +427,87 @@ static int audit_set_failure(u32 state)
 	return audit_do_config_change("audit_failure", &audit_failure, state);
 }
 
-/*
- * For one reason or another this nlh isn't getting delivered to the userspace
- * audit daemon, just send it to printk.
+/**
+ * auditd_set - Set/Reset the auditd connection state
+ * @pid: auditd PID
+ * @portid: auditd netlink portid
+ * @net: auditd network namespace pointer
+ *
+ * Description:
+ * This function will obtain and drop network namespace references as
+ * necessary.
+ */
+static void auditd_set(int pid, u32 portid, struct net *net)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&auditd_conn.lock, flags);
+	auditd_conn.pid = pid;
+	auditd_conn.portid = portid;
+	if (auditd_conn.net)
+		put_net(auditd_conn.net);
+	if (net)
+		auditd_conn.net = get_net(net);
+	else
+		auditd_conn.net = NULL;
+	spin_unlock_irqrestore(&auditd_conn.lock, flags);
+}
+
+/**
+ * auditd_reset - Disconnect the auditd connection
+ *
+ * Description:
+ * Break the auditd/kauditd connection and move all the queued records into the
+ * hold queue in case auditd reconnects.
+ */
+static void auditd_reset(void)
+{
+	struct sk_buff *skb;
+
+	/* if it isn't already broken, break the connection */
+	rcu_read_lock();
+	if (auditd_conn.pid)
+		auditd_set(0, 0, NULL);
+	rcu_read_unlock();
+
+	/* flush all of the main and retry queues to the hold queue */
+	while ((skb = skb_dequeue(&audit_retry_queue)))
+		kauditd_hold_skb(skb);
+	while ((skb = skb_dequeue(&audit_queue)))
+		kauditd_hold_skb(skb);
+}
+
+/**
+ * kauditd_print_skb - Print the audit record to the ring buffer
+ * @skb: audit record
+ *
+ * Whatever the reason, this packet may not make it to the auditd connection
+ * so write it via printk so the information isn't completely lost.
  */
 static void kauditd_printk_skb(struct sk_buff *skb)
 {
 	struct nlmsghdr *nlh = nlmsg_hdr(skb);
 	char *data = nlmsg_data(nlh);
 
-	if (nlh->nlmsg_type != AUDIT_EOE) {
-		if (printk_ratelimit())
-			pr_notice("type=%d %s\n", nlh->nlmsg_type, data);
-		else
-			audit_log_lost("printk limit exceeded");
-	}
+	if (nlh->nlmsg_type != AUDIT_EOE && printk_ratelimit())
+		pr_notice("type=%d %s\n", nlh->nlmsg_type, data);
+}
+
+/**
+ * kauditd_rehold_skb - Handle a audit record send failure in the hold queue
+ * @skb: audit record
+ *
+ * Description:
+ * This should only be used by the kauditd_thread when it fails to flush the
+ * hold queue.
+ */
+static void kauditd_rehold_skb(struct sk_buff *skb)
+{
+	/* put the record back in the queue at the same place */
+	skb_queue_head(&audit_hold_queue, skb);
+
+	/* fail the auditd connection */
+	auditd_reset();
 }
 
 /**
@@ -421,6 +544,9 @@ static void kauditd_hold_skb(struct sk_buff *skb)
 	/* we have no other options - drop the message */
 	audit_log_lost("kauditd hold queue overflow");
 	kfree_skb(skb);
+
+	/* fail the auditd connection */
+	auditd_reset();
 }
 
 /**
@@ -441,51 +567,122 @@ static void kauditd_retry_skb(struct sk_buff *skb)
 }
 
 /**
- * auditd_reset - Disconnect the auditd connection
+ * auditd_send_unicast_skb - Send a record via unicast to auditd
+ * @skb: audit record
  *
  * Description:
- * Break the auditd/kauditd connection and move all the records in the retry
- * queue into the hold queue in case auditd reconnects.  The audit_cmd_mutex
- * must be held when calling this function.
+ * Send a skb to the audit daemon, returns positive/zero values on success and
+ * negative values on failure; in all cases the skb will be consumed by this
+ * function.  If the send results in -ECONNREFUSED the connection with auditd
+ * will be reset.  This function may sleep so callers should not hold any locks
+ * where this would cause a problem.
  */
-static void auditd_reset(void)
+static int auditd_send_unicast_skb(struct sk_buff *skb)
 {
-	struct sk_buff *skb;
-
-	/* break the connection */
-	if (audit_sock) {
-		sock_put(audit_sock);
-		audit_sock = NULL;
+	int rc;
+	u32 portid;
+	struct net *net;
+	struct sock *sk;
+
+	/* NOTE: we can't call netlink_unicast while in the RCU section so
+	 *       take a reference to the network namespace and grab local
+	 *       copies of the namespace, the sock, and the portid; the
+	 *       namespace and sock aren't going to go away while we hold a
+	 *       reference and if the portid does become invalid after the RCU
+	 *       section netlink_unicast() should safely return an error */
+
+	rcu_read_lock();
+	if (!auditd_conn.pid) {
+		rcu_read_unlock();
+		rc = -ECONNREFUSED;
+		goto err;
 	}
-	audit_pid = 0;
-	audit_nlk_portid = 0;
+	net = auditd_conn.net;
+	get_net(net);
+	sk = audit_get_sk(net);
+	portid = auditd_conn.portid;
+	rcu_read_unlock();
 
-	/* flush all of the retry queue to the hold queue */
-	while ((skb = skb_dequeue(&audit_retry_queue)))
-		kauditd_hold_skb(skb);
+	rc = netlink_unicast(sk, skb, portid, 0);
+	put_net(net);
+	if (rc < 0)
+		goto err;
+
+	return rc;
+
+err:
+	if (rc == -ECONNREFUSED)
+		auditd_reset();
+	return rc;
 }
 
 /**
- * kauditd_send_unicast_skb - Send a record via unicast to auditd
- * @skb: audit record
+ * kauditd_send_queue - Helper for kauditd_thread to flush skb queues
+ * @sk: the sending sock
+ * @portid: the netlink destination
+ * @queue: the skb queue to process
+ * @retry_limit: limit on number of netlink unicast failures
+ * @skb_hook: per-skb hook for additional processing
+ * @err_hook: hook called if the skb fails the netlink unicast send
+ *
+ * Description:
+ * Run through the given queue and attempt to send the audit records to auditd,
+ * returns zero on success, negative values on failure.  It is up to the caller
+ * to ensure that the @sk is valid for the duration of this function.
+ *
  */
-static int kauditd_send_unicast_skb(struct sk_buff *skb)
+static int kauditd_send_queue(struct sock *sk, u32 portid,
+			      struct sk_buff_head *queue,
+			      unsigned int retry_limit,
+			      void (*skb_hook)(struct sk_buff *skb),
+			      void (*err_hook)(struct sk_buff *skb))
 {
-	int rc;
+	int rc = 0;
+	struct sk_buff *skb;
+	static unsigned int failed = 0;
 
-	/* if we know nothing is connected, don't even try the netlink call */
-	if (!audit_pid)
-		return -ECONNREFUSED;
+	/* NOTE: kauditd_thread takes care of all our locking, we just use
+	 *       the netlink info passed to us (e.g. sk and portid) */
+
+	while ((skb = skb_dequeue(queue))) {
+		/* call the skb_hook for each skb we touch */
+		if (skb_hook)
+			(*skb_hook)(skb);
+
+		/* can we send to anyone via unicast? */
+		if (!sk) {
+			if (err_hook)
+				(*err_hook)(skb);
+			continue;
+		}
 
-	/* get an extra skb reference in case we fail to send */
-	skb_get(skb);
-	rc = netlink_unicast(audit_sock, skb, audit_nlk_portid, 0);
-	if (rc >= 0) {
-		consume_skb(skb);
-		rc = 0;
+		/* grab an extra skb reference in case of error */
+		skb_get(skb);
+		rc = netlink_unicast(sk, skb, portid, 0);
+		if (rc < 0) {
+			/* fatal failure for our queue flush attempt? */
+			if (++failed >= retry_limit ||
+			    rc == -ECONNREFUSED || rc == -EPERM) {
+				/* yes - error processing for the queue */
+				sk = NULL;
+				if (err_hook)
+					(*err_hook)(skb);
+				if (!skb_hook)
+					goto out;
+				/* keep processing with the skb_hook */
+				continue;
+			} else
+				/* no - requeue to preserve ordering */
+				skb_queue_head(queue, skb);
+		} else {
+			/* it worked - drop the extra reference and continue */
+			consume_skb(skb);
+			failed = 0;
+		}
 	}
 
-	return rc;
+out:
+	return (rc >= 0 ? 0 : rc);
 }
 
 /*
@@ -493,16 +690,19 @@ static int kauditd_send_unicast_skb(struct sk_buff *skb)
  * @skb: audit record
  *
  * Description:
- * This function doesn't consume an skb as might be expected since it has to
- * copy it anyways.
+ * Write a multicast message to anyone listening in the initial network
+ * namespace.  This function doesn't consume an skb as might be expected since
+ * it has to copy it anyways.
  */
 static void kauditd_send_multicast_skb(struct sk_buff *skb)
 {
 	struct sk_buff *copy;
-	struct audit_net *aunet = net_generic(&init_net, audit_net_id);
-	struct sock *sock = aunet->nlsk;
+	struct sock *sock = audit_get_sk(&init_net);
 	struct nlmsghdr *nlh;
 
+	/* NOTE: we are not taking an additional reference for init_net since
+	 *       we don't have to worry about it going away */
+
 	if (!netlink_has_listeners(sock, AUDIT_NLGRP_READLOG))
 		return;
 
@@ -526,149 +726,75 @@ static void kauditd_send_multicast_skb(struct sk_buff *skb)
 }
 
 /**
- * kauditd_wake_condition - Return true when it is time to wake kauditd_thread
- *
- * Description:
- * This function is for use by the wait_event_freezable() call in
- * kauditd_thread().
+ * kauditd_thread - Worker thread to send audit records to userspace
+ * @dummy: unused
  */
-static int kauditd_wake_condition(void)
-{
-	static int pid_last = 0;
-	int rc;
-	int pid = audit_pid;
-
-	/* wake on new messages or a change in the connected auditd */
-	rc = skb_queue_len(&audit_queue) || (pid && pid != pid_last);
-	if (rc)
-		pid_last = pid;
-
-	return rc;
-}
-
 static int kauditd_thread(void *dummy)
 {
 	int rc;
-	int auditd = 0;
-	int reschedule = 0;
-	struct sk_buff *skb;
-	struct nlmsghdr *nlh;
+	u32 portid = 0;
+	struct net *net = NULL;
+	struct sock *sk = NULL;
 
 #define UNICAST_RETRIES 5
-#define AUDITD_BAD(x,y) \
-	((x) == -ECONNREFUSED || (x) == -EPERM || ++(y) >= UNICAST_RETRIES)
-
-	/* NOTE: we do invalidate the auditd connection flag on any sending
-	 * errors, but we only "restore" the connection flag at specific places
-	 * in the loop in order to help ensure proper ordering of audit
-	 * records */
 
 	set_freezable();
 	while (!kthread_should_stop()) {
-		/* NOTE: possible area for future improvement is to look at
-		 *       the hold and retry queues, since only this thread
-		 *       has access to these queues we might be able to do
-		 *       our own queuing and skip some/all of the locking */
-
-		/* NOTE: it might be a fun experiment to split the hold and
-		 *       retry queue handling to another thread, but the
-		 *       synchronization issues and other overhead might kill
-		 *       any performance gains */
+		/* NOTE: see the lock comments in auditd_send_unicast_skb() */
+		rcu_read_lock();
+		if (!auditd_conn.pid) {
+			rcu_read_unlock();
+			goto main_queue;
+		}
+		net = auditd_conn.net;
+		get_net(net);
+		sk = audit_get_sk(net);
+		portid = auditd_conn.portid;
+		rcu_read_unlock();
 
 		/* attempt to flush the hold queue */
-		while (auditd && (skb = skb_dequeue(&audit_hold_queue))) {
-			rc = kauditd_send_unicast_skb(skb);
-			if (rc) {
-				/* requeue to the same spot */
-				skb_queue_head(&audit_hold_queue, skb);
-
-				auditd = 0;
-				if (AUDITD_BAD(rc, reschedule)) {
-					mutex_lock(&audit_cmd_mutex);
-					auditd_reset();
-					mutex_unlock(&audit_cmd_mutex);
-					reschedule = 0;
-				}
-			} else
-				/* we were able to send successfully */
-				reschedule = 0;
+		rc = kauditd_send_queue(sk, portid,
+					&audit_hold_queue, UNICAST_RETRIES,
+					NULL, kauditd_rehold_skb);
+		if (rc < 0) {
+			sk = NULL;
+			goto main_queue;
 		}
 
 		/* attempt to flush the retry queue */
-		while (auditd && (skb = skb_dequeue(&audit_retry_queue))) {
-			rc = kauditd_send_unicast_skb(skb);
-			if (rc) {
-				auditd = 0;
-				if (AUDITD_BAD(rc, reschedule)) {
-					kauditd_hold_skb(skb);
-					mutex_lock(&audit_cmd_mutex);
-					auditd_reset();
-					mutex_unlock(&audit_cmd_mutex);
-					reschedule = 0;
-				} else
-					/* temporary problem (we hope), queue
-					 * to the same spot and retry */
-					skb_queue_head(&audit_retry_queue, skb);
-			} else
-				/* we were able to send successfully */
-				reschedule = 0;
+		rc = kauditd_send_queue(sk, portid,
+					&audit_retry_queue, UNICAST_RETRIES,
+					NULL, kauditd_hold_skb);
+		if (rc < 0) {
+			sk = NULL;
+			goto main_queue;
 		}
 
-		/* standard queue processing, try to be as quick as possible */
-quick_loop:
-		skb = skb_dequeue(&audit_queue);
-		if (skb) {
-			/* setup the netlink header, see the comments in
-			 * kauditd_send_multicast_skb() for length quirks */
-			nlh = nlmsg_hdr(skb);
-			nlh->nlmsg_len = skb->len - NLMSG_HDRLEN;
-
-			/* attempt to send to any multicast listeners */
-			kauditd_send_multicast_skb(skb);
-
-			/* attempt to send to auditd, queue on failure */
-			if (auditd) {
-				rc = kauditd_send_unicast_skb(skb);
-				if (rc) {
-					auditd = 0;
-					if (AUDITD_BAD(rc, reschedule)) {
-						mutex_lock(&audit_cmd_mutex);
-						auditd_reset();
-						mutex_unlock(&audit_cmd_mutex);
-						reschedule = 0;
-					}
-
-					/* move to the retry queue */
-					kauditd_retry_skb(skb);
-				} else
-					/* everything is working so go fast! */
-					goto quick_loop;
-			} else if (reschedule)
-				/* we are currently having problems, move to
-				 * the retry queue */
-				kauditd_retry_skb(skb);
-			else
-				/* dump the message via printk and hold it */
-				kauditd_hold_skb(skb);
-		} else {
-			/* we have flushed the backlog so wake everyone */
-			wake_up(&audit_backlog_wait);
-
-			/* if everything is okay with auditd (if present), go
-			 * to sleep until there is something new in the queue
-			 * or we have a change in the connected auditd;
-			 * otherwise simply reschedule to give things a chance
-			 * to recover */
-			if (reschedule) {
-				set_current_state(TASK_INTERRUPTIBLE);
-				schedule();
-			} else
-				wait_event_freezable(kauditd_wait,
-						     kauditd_wake_condition());
-
-			/* update the auditd connection status */
-			auditd = (audit_pid ? 1 : 0);
+main_queue:
+		/* process the main queue - do the multicast send and attempt
+		 * unicast, dump failed record sends to the retry queue; if
+		 * sk == NULL due to previous failures we will just do the
+		 * multicast send and move the record to the retry queue */
+		kauditd_send_queue(sk, portid, &audit_queue, 1,
+				   kauditd_send_multicast_skb,
+				   kauditd_retry_skb);
+
+		/* drop our netns reference, no auditd sends past this line */
+		if (net) {
+			put_net(net);
+			net = NULL;
 		}
+		sk = NULL;
+
+		/* we have processed all the queues so wake everyone */
+		wake_up(&audit_backlog_wait);
+
+		/* NOTE: we want to wake up if there is anything on the queue,
+		 *       regardless of if an auditd is connected, as we need to
+		 *       do the multicast send and rotate records from the
+		 *       main queue to the retry/hold queues */
+		wait_event_freezable(kauditd_wait,
+				     (skb_queue_len(&audit_queue) ? 1 : 0));
 	}
 
 	return 0;
@@ -678,17 +804,16 @@ int audit_send_list(void *_dest)
 {
 	struct audit_netlink_list *dest = _dest;
 	struct sk_buff *skb;
-	struct net *net = dest->net;
-	struct audit_net *aunet = net_generic(net, audit_net_id);
+	struct sock *sk = audit_get_sk(dest->net);
 
 	/* wait for parent to finish and send an ACK */
 	mutex_lock(&audit_cmd_mutex);
 	mutex_unlock(&audit_cmd_mutex);
 
 	while ((skb = __skb_dequeue(&dest->q)) != NULL)
-		netlink_unicast(aunet->nlsk, skb, dest->portid, 0);
+		netlink_unicast(sk, skb, dest->portid, 0);
 
-	put_net(net);
+	put_net(dest->net);
 	kfree(dest);
 
 	return 0;
@@ -722,16 +847,15 @@ struct sk_buff *audit_make_reply(__u32 portid, int seq, int type, int done,
 static int audit_send_reply_thread(void *arg)
 {
 	struct audit_reply *reply = (struct audit_reply *)arg;
-	struct net *net = reply->net;
-	struct audit_net *aunet = net_generic(net, audit_net_id);
+	struct sock *sk = audit_get_sk(reply->net);
 
 	mutex_lock(&audit_cmd_mutex);
 	mutex_unlock(&audit_cmd_mutex);
 
 	/* Ignore failure. It'll only happen if the sender goes away,
 	   because our timeout is set to infinite. */
-	netlink_unicast(aunet->nlsk , reply->skb, reply->portid, 0);
-	put_net(net);
+	netlink_unicast(sk, reply->skb, reply->portid, 0);
+	put_net(reply->net);
 	kfree(reply);
 	return 0;
 }
@@ -949,12 +1073,12 @@ static int audit_set_feature(struct sk_buff *skb)
 
 static int audit_replace(pid_t pid)
 {
-	struct sk_buff *skb = audit_make_reply(0, 0, AUDIT_REPLACE, 0, 0,
-					       &pid, sizeof(pid));
+	struct sk_buff *skb;
 
+	skb = audit_make_reply(0, 0, AUDIT_REPLACE, 0, 0, &pid, sizeof(pid));
 	if (!skb)
 		return -ENOMEM;
-	return netlink_unicast(audit_sock, skb, audit_nlk_portid, 0);
+	return auditd_send_unicast_skb(skb);
 }
 
 static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
@@ -981,7 +1105,9 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 		memset(&s, 0, sizeof(s));
 		s.enabled		= audit_enabled;
 		s.failure		= audit_failure;
-		s.pid			= audit_pid;
+		rcu_read_lock();
+		s.pid			= auditd_conn.pid;
+		rcu_read_unlock();
 		s.rate_limit		= audit_rate_limit;
 		s.backlog_limit		= audit_backlog_limit;
 		s.lost			= atomic_read(&audit_lost);
@@ -1014,30 +1140,44 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 			 *       from the initial pid namespace, but something
 			 *       to keep in mind if this changes */
 			int new_pid = s.pid;
+			pid_t auditd_pid;
 			pid_t requesting_pid = task_tgid_vnr(current);
 
-			if ((!new_pid) && (requesting_pid != audit_pid)) {
-				audit_log_config_change("audit_pid", new_pid, audit_pid, 0);
+			/* test the auditd connection */
+			audit_replace(requesting_pid);
+
+			rcu_read_lock();
+			auditd_pid = auditd_conn.pid;
+			/* only the current auditd can unregister itself */
+			if ((!new_pid) && (requesting_pid != auditd_pid)) {
+				rcu_read_unlock();
+				audit_log_config_change("audit_pid", new_pid,
+							auditd_pid, 0);
 				return -EACCES;
 			}
-			if (audit_pid && new_pid &&
-			    audit_replace(requesting_pid) != -ECONNREFUSED) {
-				audit_log_config_change("audit_pid", new_pid, audit_pid, 0);
+			/* replacing a healthy auditd is not allowed */
+			if (auditd_pid && new_pid) {
+				rcu_read_unlock();
+				audit_log_config_change("audit_pid", new_pid,
+							auditd_pid, 0);
 				return -EEXIST;
 			}
+			rcu_read_unlock();
+
 			if (audit_enabled != AUDIT_OFF)
-				audit_log_config_change("audit_pid", new_pid, audit_pid, 1);
+				audit_log_config_change("audit_pid", new_pid,
+							auditd_pid, 1);
+
 			if (new_pid) {
-				if (audit_sock)
-					sock_put(audit_sock);
-				audit_pid = new_pid;
-				audit_nlk_portid = NETLINK_CB(skb).portid;
-				sock_hold(skb->sk);
-				audit_sock = skb->sk;
-			} else {
+				/* register a new auditd connection */
+				auditd_set(new_pid,
+					   NETLINK_CB(skb).portid,
+					   sock_net(NETLINK_CB(skb).sk));
+				/* try to process any backlog */
+				wake_up_interruptible(&kauditd_wait);
+			} else
+				/* unregister the auditd connection */
 				auditd_reset();
-			}
-			wake_up_interruptible(&kauditd_wait);
 		}
 		if (s.mask & AUDIT_STATUS_RATE_LIMIT) {
 			err = audit_set_rate_limit(s.rate_limit);
@@ -1090,7 +1230,6 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 				if (err)
 					break;
 			}
-			mutex_unlock(&audit_cmd_mutex);
 			audit_log_common_recv_msg(&ab, msg_type);
 			if (msg_type != AUDIT_USER_TTY)
 				audit_log_format(ab, " msg='%.*s'",
@@ -1108,7 +1247,6 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 			}
 			audit_set_portid(ab, NETLINK_CB(skb).portid);
 			audit_log_end(ab);
-			mutex_lock(&audit_cmd_mutex);
 		}
 		break;
 	case AUDIT_ADD_RULE:
@@ -1298,26 +1436,26 @@ static int __net_init audit_net_init(struct net *net)
 
 	struct audit_net *aunet = net_generic(net, audit_net_id);
 
-	aunet->nlsk = netlink_kernel_create(net, NETLINK_AUDIT, &cfg);
-	if (aunet->nlsk == NULL) {
+	aunet->sk = netlink_kernel_create(net, NETLINK_AUDIT, &cfg);
+	if (aunet->sk == NULL) {
 		audit_panic("cannot initialize netlink socket in namespace");
 		return -ENOMEM;
 	}
-	aunet->nlsk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
+	aunet->sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
+
 	return 0;
 }
 
 static void __net_exit audit_net_exit(struct net *net)
 {
 	struct audit_net *aunet = net_generic(net, audit_net_id);
-	struct sock *sock = aunet->nlsk;
-	mutex_lock(&audit_cmd_mutex);
-	if (sock == audit_sock)
+
+	rcu_read_lock();
+	if (net == auditd_conn.net)
 		auditd_reset();
-	mutex_unlock(&audit_cmd_mutex);
+	rcu_read_unlock();
 
-	netlink_kernel_release(sock);
-	aunet->nlsk = NULL;
+	netlink_kernel_release(aunet->sk);
 }
 
 static struct pernet_operations audit_net_ops __net_initdata = {
@@ -1335,20 +1473,24 @@ static int __init audit_init(void)
 	if (audit_initialized == AUDIT_DISABLED)
 		return 0;
 
-	pr_info("initializing netlink subsys (%s)\n",
-		audit_default ? "enabled" : "disabled");
-	register_pernet_subsys(&audit_net_ops);
+	memset(&auditd_conn, 0, sizeof(auditd_conn));
+	spin_lock_init(&auditd_conn.lock);
 
 	skb_queue_head_init(&audit_queue);
 	skb_queue_head_init(&audit_retry_queue);
 	skb_queue_head_init(&audit_hold_queue);
-	audit_initialized = AUDIT_INITIALIZED;
-	audit_enabled = audit_default;
-	audit_ever_enabled |= !!audit_default;
 
 	for (i = 0; i < AUDIT_INODE_BUCKETS; i++)
 		INIT_LIST_HEAD(&audit_inode_hash[i]);
 
+	pr_info("initializing netlink subsys (%s)\n",
+		audit_default ? "enabled" : "disabled");
+	register_pernet_subsys(&audit_net_ops);
+
+	audit_initialized = AUDIT_INITIALIZED;
+	audit_enabled = audit_default;
+	audit_ever_enabled |= !!audit_default;
+
 	kauditd_task = kthread_run(kauditd_thread, NULL, "kauditd");
 	if (IS_ERR(kauditd_task)) {
 		int err = PTR_ERR(kauditd_task);
@@ -1519,20 +1661,16 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask,
 	if (unlikely(!audit_filter(type, AUDIT_FILTER_TYPE)))
 		return NULL;
 
-	/* don't ever fail/sleep on these two conditions:
+	/* NOTE: don't ever fail/sleep on these two conditions:
 	 * 1. auditd generated record - since we need auditd to drain the
 	 *    queue; also, when we are checking for auditd, compare PIDs using
 	 *    task_tgid_vnr() since auditd_pid is set in audit_receive_msg()
 	 *    using a PID anchored in the caller's namespace
-	 * 2. audit command message - record types 1000 through 1099 inclusive
-	 *    are command messages/records used to manage the kernel subsystem
-	 *    and the audit userspace, blocking on these messages could cause
-	 *    problems under load so don't do it (note: not all of these
-	 *    command types are valid as record types, but it is quicker to
-	 *    just check two ints than a series of ints in a if/switch stmt) */
-	if (!((audit_pid && audit_pid == task_tgid_vnr(current)) ||
-	      (type >= 1000 && type <= 1099))) {
-		long sleep_time = audit_backlog_wait_time;
+	 * 2. generator holding the audit_cmd_mutex - we don't want to block
+	 *    while holding the mutex */
+	if (!(auditd_test_task(current) ||
+	      (current == __mutex_owner(&audit_cmd_mutex)))) {
+		long stime = audit_backlog_wait_time;
 
 		while (audit_backlog_limit &&
 		       (skb_queue_len(&audit_queue) > audit_backlog_limit)) {
@@ -1541,14 +1679,13 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask,
 
 			/* sleep if we are allowed and we haven't exhausted our
 			 * backlog wait limit */
-			if ((gfp_mask & __GFP_DIRECT_RECLAIM) &&
-			    (sleep_time > 0)) {
+			if (gfpflags_allow_blocking(gfp_mask) && (stime > 0)) {
 				DECLARE_WAITQUEUE(wait, current);
 
 				add_wait_queue_exclusive(&audit_backlog_wait,
 							 &wait);
 				set_current_state(TASK_UNINTERRUPTIBLE);
-				sleep_time = schedule_timeout(sleep_time);
+				stime = schedule_timeout(stime);
 				remove_wait_queue(&audit_backlog_wait, &wait);
 			} else {
 				if (audit_rate_check() && printk_ratelimit())
@@ -2127,15 +2264,27 @@ void audit_log_link_denied(const char *operation, const struct path *link)
  */
 void audit_log_end(struct audit_buffer *ab)
 {
+	struct sk_buff *skb;
+	struct nlmsghdr *nlh;
+
 	if (!ab)
 		return;
-	if (!audit_rate_check()) {
-		audit_log_lost("rate limit exceeded");
-	} else {
-		skb_queue_tail(&audit_queue, ab->skb);
-		wake_up_interruptible(&kauditd_wait);
+
+	if (audit_rate_check()) {
+		skb = ab->skb;
 		ab->skb = NULL;
-	}
+
+		/* setup the netlink header, see the comments in
+		 * kauditd_send_multicast_skb() for length quirks */
+		nlh = nlmsg_hdr(skb);
+		nlh->nlmsg_len = skb->len - NLMSG_HDRLEN;
+
+		/* queue the netlink packet and poke the kauditd thread */
+		skb_queue_tail(&audit_queue, skb);
+		wake_up_interruptible(&kauditd_wait);
+	} else
+		audit_log_lost("rate limit exceeded");
+
 	audit_buffer_free(ab);
 }
 
diff --git a/kernel/audit.h b/kernel/audit.h
index ca579880303ab4..0f1cf6d1878ab3 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -218,7 +218,7 @@ extern void audit_log_name(struct audit_context *context,
 			   struct audit_names *n, const struct path *path,
 			   int record_num, int *call_panic);
 
-extern int audit_pid;
+extern int auditd_test_task(const struct task_struct *task);
 
 #define AUDIT_INODE_BUCKETS	32
 extern struct list_head audit_inode_hash[AUDIT_INODE_BUCKETS];
@@ -250,10 +250,6 @@ struct audit_netlink_list {
 
 int audit_send_list(void *);
 
-struct audit_net {
-	struct sock *nlsk;
-};
-
 extern int selinux_audit_rule_update(void);
 
 extern struct mutex audit_filter_mutex;
@@ -340,8 +336,7 @@ extern int audit_filter(int msgtype, unsigned int listtype);
 extern int __audit_signal_info(int sig, struct task_struct *t);
 static inline int audit_signal_info(int sig, struct task_struct *t)
 {
-	if (unlikely((audit_pid && t->tgid == audit_pid) ||
-		     (audit_signals && !audit_dummy_context())))
+	if (auditd_test_task(t) || (audit_signals && !audit_dummy_context()))
 		return __audit_signal_info(sig, t);
 	return 0;
 }
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index d6a8de5f8fa3d0..e59ffc7fc522ad 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -762,7 +762,7 @@ static enum audit_state audit_filter_syscall(struct task_struct *tsk,
 	struct audit_entry *e;
 	enum audit_state state;
 
-	if (audit_pid && tsk->tgid == audit_pid)
+	if (auditd_test_task(tsk))
 		return AUDIT_DISABLED;
 
 	rcu_read_lock();
@@ -816,7 +816,7 @@ void audit_filter_inodes(struct task_struct *tsk, struct audit_context *ctx)
 {
 	struct audit_names *n;
 
-	if (audit_pid && tsk->tgid == audit_pid)
+	if (auditd_test_task(tsk))
 		return;
 
 	rcu_read_lock();
@@ -2256,7 +2256,7 @@ int __audit_signal_info(int sig, struct task_struct *t)
 	struct audit_context *ctx = tsk->audit_context;
 	kuid_t uid = current_uid(), t_uid = task_uid(t);
 
-	if (audit_pid && t->tgid == audit_pid) {
+	if (auditd_test_task(t)) {
 		if (sig == SIGTERM || sig == SIGHUP || sig == SIGUSR1 || sig == SIGUSR2) {
 			audit_sig_pid = task_tgid_nr(tsk);
 			if (uid_valid(tsk->loginuid))

From df630b8c1e851b5e265dc2ca9c87222e342c093b Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Wed, 15 Mar 2017 16:01:17 +0800
Subject: [PATCH 1556/1911] KVM: x86: clear bus pointer when destroyed
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When releasing the bus, let's clear the bus pointers to mark it out. If
any further device unregister happens on this bus, we know that we're
done if we found the bus being released already.

Signed-off-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 virt/kvm/kvm_main.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index a17d78759727f3..7445566fadc1fd 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -727,8 +727,10 @@ static void kvm_destroy_vm(struct kvm *kvm)
 	list_del(&kvm->vm_list);
 	spin_unlock(&kvm_lock);
 	kvm_free_irq_routing(kvm);
-	for (i = 0; i < KVM_NR_BUSES; i++)
+	for (i = 0; i < KVM_NR_BUSES; i++) {
 		kvm_io_bus_destroy(kvm->buses[i]);
+		kvm->buses[i] = NULL;
+	}
 	kvm_coalesced_mmio_free(kvm);
 #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
 	mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm);
@@ -3579,6 +3581,14 @@ int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
 	struct kvm_io_bus *new_bus, *bus;
 
 	bus = kvm->buses[bus_idx];
+
+	/*
+	 * It's possible the bus being released before hand. If so,
+	 * we're done here.
+	 */
+	if (!bus)
+		return 0;
+
 	r = -ENOENT;
 	for (i = 0; i < bus->dev_count; i++)
 		if (bus->range[i].dev == dev) {

From c248c64387fac5a6b31b343d9acb78f478e8619c Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Thu, 9 Mar 2017 13:26:07 +0200
Subject: [PATCH 1557/1911] nvme-rdma: handle cpu unplug when re-establishing
 the controller

If a cpu unplug event has occured, we need to take the minimum
of the provided nr_io_queues and the number of online cpus,
otherwise we won't be able to connect them as blk-mq mapping
won't dispatch to those queues.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/host/rdma.c | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 779f516e7a4ec4..47a479f26e5d7d 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -343,8 +343,6 @@ static int __nvme_rdma_init_request(struct nvme_rdma_ctrl *ctrl,
 	struct ib_device *ibdev = dev->dev;
 	int ret;
 
-	BUG_ON(queue_idx >= ctrl->queue_count);
-
 	ret = nvme_rdma_alloc_qe(ibdev, &req->sqe, sizeof(struct nvme_command),
 			DMA_TO_DEVICE);
 	if (ret)
@@ -652,8 +650,22 @@ static int nvme_rdma_connect_io_queues(struct nvme_rdma_ctrl *ctrl)
 
 static int nvme_rdma_init_io_queues(struct nvme_rdma_ctrl *ctrl)
 {
+	struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
+	unsigned int nr_io_queues;
 	int i, ret;
 
+	nr_io_queues = min(opts->nr_io_queues, num_online_cpus());
+	ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues);
+	if (ret)
+		return ret;
+
+	ctrl->queue_count = nr_io_queues + 1;
+	if (ctrl->queue_count < 2)
+		return 0;
+
+	dev_info(ctrl->ctrl.device,
+		"creating %d I/O queues.\n", nr_io_queues);
+
 	for (i = 1; i < ctrl->queue_count; i++) {
 		ret = nvme_rdma_init_queue(ctrl, i,
 					   ctrl->ctrl.opts->queue_size);
@@ -1791,20 +1803,8 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
 
 static int nvme_rdma_create_io_queues(struct nvme_rdma_ctrl *ctrl)
 {
-	struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
 	int ret;
 
-	ret = nvme_set_queue_count(&ctrl->ctrl, &opts->nr_io_queues);
-	if (ret)
-		return ret;
-
-	ctrl->queue_count = opts->nr_io_queues + 1;
-	if (ctrl->queue_count < 2)
-		return 0;
-
-	dev_info(ctrl->ctrl.device,
-		"creating %d I/O queues.\n", opts->nr_io_queues);
-
 	ret = nvme_rdma_init_io_queues(ctrl);
 	if (ret)
 		return ret;

From 945dd5bacc8978439af276976b5dcbbd42333dbc Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Mon, 13 Mar 2017 13:27:51 +0200
Subject: [PATCH 1558/1911] nvme-loop: handle cpu unplug when re-establishing
 the controller

If a cpu unplug event has occured, we need to take the minimum
of the provided nr_io_queues and the number of online cpus,
otherwise we won't be able to connect them as blk-mq mapping
won't dispatch to those queues.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/target/loop.c | 88 ++++++++++++++++++++++----------------
 1 file changed, 50 insertions(+), 38 deletions(-)

diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 74e04a0855d452..22f7bc6bac7fa7 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -223,8 +223,6 @@ static void nvme_loop_submit_async_event(struct nvme_ctrl *arg, int aer_idx)
 static int nvme_loop_init_iod(struct nvme_loop_ctrl *ctrl,
 		struct nvme_loop_iod *iod, unsigned int queue_idx)
 {
-	BUG_ON(queue_idx >= ctrl->queue_count);
-
 	iod->req.cmd = &iod->cmd;
 	iod->req.rsp = &iod->rsp;
 	iod->queue = &ctrl->queues[queue_idx];
@@ -314,6 +312,43 @@ static void nvme_loop_free_ctrl(struct nvme_ctrl *nctrl)
 	kfree(ctrl);
 }
 
+static void nvme_loop_destroy_io_queues(struct nvme_loop_ctrl *ctrl)
+{
+	int i;
+
+	for (i = 1; i < ctrl->queue_count; i++)
+		nvmet_sq_destroy(&ctrl->queues[i].nvme_sq);
+}
+
+static int nvme_loop_init_io_queues(struct nvme_loop_ctrl *ctrl)
+{
+	struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
+	unsigned int nr_io_queues;
+	int ret, i;
+
+	nr_io_queues = min(opts->nr_io_queues, num_online_cpus());
+	ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues);
+	if (ret || !nr_io_queues)
+		return ret;
+
+	dev_info(ctrl->ctrl.device, "creating %d I/O queues.\n", nr_io_queues);
+
+	for (i = 1; i <= nr_io_queues; i++) {
+		ctrl->queues[i].ctrl = ctrl;
+		ret = nvmet_sq_init(&ctrl->queues[i].nvme_sq);
+		if (ret)
+			goto out_destroy_queues;
+
+		ctrl->queue_count++;
+	}
+
+	return 0;
+
+out_destroy_queues:
+	nvme_loop_destroy_io_queues(ctrl);
+	return ret;
+}
+
 static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
 {
 	int error;
@@ -385,17 +420,13 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
 
 static void nvme_loop_shutdown_ctrl(struct nvme_loop_ctrl *ctrl)
 {
-	int i;
-
 	nvme_stop_keep_alive(&ctrl->ctrl);
 
 	if (ctrl->queue_count > 1) {
 		nvme_stop_queues(&ctrl->ctrl);
 		blk_mq_tagset_busy_iter(&ctrl->tag_set,
 					nvme_cancel_request, &ctrl->ctrl);
-
-		for (i = 1; i < ctrl->queue_count; i++)
-			nvmet_sq_destroy(&ctrl->queues[i].nvme_sq);
+		nvme_loop_destroy_io_queues(ctrl);
 	}
 
 	if (ctrl->ctrl.state == NVME_CTRL_LIVE)
@@ -467,19 +498,14 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work)
 	if (ret)
 		goto out_disable;
 
-	for (i = 1; i <= ctrl->ctrl.opts->nr_io_queues; i++) {
-		ctrl->queues[i].ctrl = ctrl;
-		ret = nvmet_sq_init(&ctrl->queues[i].nvme_sq);
-		if (ret)
-			goto out_free_queues;
-
-		ctrl->queue_count++;
-	}
+	ret = nvme_loop_init_io_queues(ctrl);
+	if (ret)
+		goto out_destroy_admin;
 
-	for (i = 1; i <= ctrl->ctrl.opts->nr_io_queues; i++) {
+	for (i = 1; i < ctrl->queue_count; i++) {
 		ret = nvmf_connect_io_queue(&ctrl->ctrl, i);
 		if (ret)
-			goto out_free_queues;
+			goto out_destroy_io;
 	}
 
 	changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
@@ -492,9 +518,9 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work)
 
 	return;
 
-out_free_queues:
-	for (i = 1; i < ctrl->queue_count; i++)
-		nvmet_sq_destroy(&ctrl->queues[i].nvme_sq);
+out_destroy_io:
+	nvme_loop_destroy_io_queues(ctrl);
+out_destroy_admin:
 	nvme_loop_destroy_admin_queue(ctrl);
 out_disable:
 	dev_warn(ctrl->ctrl.device, "Removing after reset failure\n");
@@ -533,25 +559,12 @@ static const struct nvme_ctrl_ops nvme_loop_ctrl_ops = {
 
 static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl)
 {
-	struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
 	int ret, i;
 
-	ret = nvme_set_queue_count(&ctrl->ctrl, &opts->nr_io_queues);
-	if (ret || !opts->nr_io_queues)
+	ret = nvme_loop_init_io_queues(ctrl);
+	if (ret)
 		return ret;
 
-	dev_info(ctrl->ctrl.device, "creating %d I/O queues.\n",
-		opts->nr_io_queues);
-
-	for (i = 1; i <= opts->nr_io_queues; i++) {
-		ctrl->queues[i].ctrl = ctrl;
-		ret = nvmet_sq_init(&ctrl->queues[i].nvme_sq);
-		if (ret)
-			goto out_destroy_queues;
-
-		ctrl->queue_count++;
-	}
-
 	memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set));
 	ctrl->tag_set.ops = &nvme_loop_mq_ops;
 	ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size;
@@ -575,7 +588,7 @@ static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl)
 		goto out_free_tagset;
 	}
 
-	for (i = 1; i <= opts->nr_io_queues; i++) {
+	for (i = 1; i < ctrl->queue_count; i++) {
 		ret = nvmf_connect_io_queue(&ctrl->ctrl, i);
 		if (ret)
 			goto out_cleanup_connect_q;
@@ -588,8 +601,7 @@ static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl)
 out_free_tagset:
 	blk_mq_free_tag_set(&ctrl->tag_set);
 out_destroy_queues:
-	for (i = 1; i < ctrl->queue_count; i++)
-		nvmet_sq_destroy(&ctrl->queues[i].nvme_sq);
+	nvme_loop_destroy_io_queues(ctrl);
 	return ret;
 }
 

From 0ca10b60ceeb5372da01798ca68c116ae45a6eb6 Mon Sep 17 00:00:00 2001
From: Philipp Zabel <p.zabel@pengutronix.de>
Date: Mon, 20 Mar 2017 11:25:16 +0100
Subject: [PATCH 1559/1911] reset: fix optional reset_control_get stubs to
 return NULL

When RESET_CONTROLLER is not enabled, the optional reset_control_get
stubs should now also return NULL.

Since it is now valid for reset_control_assert/deassert/reset/status/put
to be called unconditionally, with NULL as an argument for optional
resets, the stubs are not allowed to warn anymore.

Fixes: bb475230b8e5 ("reset: make optional functions really optional")
Reported-by: Andrzej Hajda <a.hajda@samsung.com>
Tested-by: Andrzej Hajda <a.hajda@samsung.com>
Reviewed-by: Andrzej Hajda <a.hajda@samsung.com>
Cc: Ramiro Oliveira <Ramiro.Oliveira@synopsys.com>
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 include/linux/reset.h | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/include/linux/reset.h b/include/linux/reset.h
index 86b4ed75359e85..96fb139bdd08fd 100644
--- a/include/linux/reset.h
+++ b/include/linux/reset.h
@@ -31,31 +31,26 @@ static inline int device_reset_optional(struct device *dev)
 
 static inline int reset_control_reset(struct reset_control *rstc)
 {
-	WARN_ON(1);
 	return 0;
 }
 
 static inline int reset_control_assert(struct reset_control *rstc)
 {
-	WARN_ON(1);
 	return 0;
 }
 
 static inline int reset_control_deassert(struct reset_control *rstc)
 {
-	WARN_ON(1);
 	return 0;
 }
 
 static inline int reset_control_status(struct reset_control *rstc)
 {
-	WARN_ON(1);
 	return 0;
 }
 
 static inline void reset_control_put(struct reset_control *rstc)
 {
-	WARN_ON(1);
 }
 
 static inline int __must_check device_reset(struct device *dev)
@@ -74,14 +69,14 @@ static inline struct reset_control *__of_reset_control_get(
 					const char *id, int index, bool shared,
 					bool optional)
 {
-	return ERR_PTR(-ENOTSUPP);
+	return optional ? NULL : ERR_PTR(-ENOTSUPP);
 }
 
 static inline struct reset_control *__devm_reset_control_get(
 					struct device *dev, const char *id,
 					int index, bool shared, bool optional)
 {
-	return ERR_PTR(-ENOTSUPP);
+	return optional ? NULL : ERR_PTR(-ENOTSUPP);
 }
 
 #endif /* CONFIG_RESET_CONTROLLER */

From 7292ae3d5a18fb922be496e6bb687647193569b4 Mon Sep 17 00:00:00 2001
From: Gleb Fotengauer-Malinovskiy <glebfm@altlinux.org>
Date: Mon, 20 Mar 2017 20:15:53 +0300
Subject: [PATCH 1560/1911] jump label: fix passing kbuild_cflags when checking
 for asm goto support

The latest change of asm goto support check added passing of KBUILD_CFLAGS
to compiler.  When these flags reference gcc plugins that are not built yet,
the check fails.

When one runs "make bzImage" followed by "make modules", the kernel is always
built with HAVE_JUMP_LABEL disabled, while the modules are built depending on
CONFIG_JUMP_LABEL.  If HAVE_JUMP_LABEL macro happens to be different, modules
are built with undefined references, e.g.:

ERROR: "static_key_slow_inc" [net/netfilter/xt_TEE.ko] undefined!
ERROR: "static_key_slow_dec" [net/netfilter/xt_TEE.ko] undefined!
ERROR: "static_key_slow_dec" [net/netfilter/nft_meta.ko] undefined!
ERROR: "static_key_slow_inc" [net/netfilter/nft_meta.ko] undefined!
ERROR: "nf_hooks_needed" [net/netfilter/ipvs/ip_vs.ko] undefined!
ERROR: "nf_hooks_needed" [net/ipv6/ipv6.ko] undefined!
ERROR: "static_key_count" [net/ipv6/ipv6.ko] undefined!
ERROR: "static_key_slow_inc" [net/ipv6/ipv6.ko] undefined!

This change moves the check before all these references are added
to KBUILD_CFLAGS.  This is correct because subsequent KBUILD_CFLAGS
modifications are not relevant to this check.

Reported-by: Anton V. Boyarshinov <boyarsh@altlinux.org>
Fixes: 35f860f9ba6a ("jump label: pass kbuild_cflags when checking for asm goto support")
Cc: stable@vger.kernel.org	# v4.10
Signed-off-by: Gleb Fotengauer-Malinovskiy <glebfm@altlinux.org>
Signed-off-by: Dmitry V. Levin <ldv@altlinux.org>
Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Acked-by: David Lin <dtwlin@google.com>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 Makefile | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/Makefile b/Makefile
index b8a6b862ed7376..74d69f241543ee 100644
--- a/Makefile
+++ b/Makefile
@@ -653,6 +653,12 @@ KBUILD_CFLAGS += $(call cc-ifversion, -lt, 0409, \
 # Tell gcc to never replace conditional load with a non-conditional one
 KBUILD_CFLAGS	+= $(call cc-option,--param=allow-store-data-races=0)
 
+# check for 'asm goto'
+ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(CC) $(KBUILD_CFLAGS)), y)
+	KBUILD_CFLAGS += -DCC_HAVE_ASM_GOTO
+	KBUILD_AFLAGS += -DCC_HAVE_ASM_GOTO
+endif
+
 include scripts/Makefile.gcc-plugins
 
 ifdef CONFIG_READABLE_ASM
@@ -798,12 +804,6 @@ KBUILD_CFLAGS   += $(call cc-option,-Werror=incompatible-pointer-types)
 # use the deterministic mode of AR if available
 KBUILD_ARFLAGS := $(call ar-option,D)
 
-# check for 'asm goto'
-ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(CC) $(KBUILD_CFLAGS)), y)
-	KBUILD_CFLAGS += -DCC_HAVE_ASM_GOTO
-	KBUILD_AFLAGS += -DCC_HAVE_ASM_GOTO
-endif
-
 include scripts/Makefile.kasan
 include scripts/Makefile.extrawarn
 include scripts/Makefile.ubsan

From 9be3213b14d44f6328281941193d97f3b97e7d4c Mon Sep 17 00:00:00 2001
From: Nicolas Iooss <nicolas.iooss_linux@m4x.org>
Date: Mon, 13 Mar 2017 20:33:41 +0100
Subject: [PATCH 1561/1911] gconfig: remove misleading parentheses around a
 condition

When building the kernel with clang, the compiler complains about the
presence of a condition inside two pairs of parentheses:

    scripts/kconfig/gconf.c:917:19: error: equality comparison with
    extraneous parentheses [-Werror,-Wparentheses-equality]
                    } else if ((col == COL_OPTION)) {
                                ~~~~^~~~~~~~~~~~~
    scripts/kconfig/gconf.c:917:19: note: remove extraneous parentheses
    around the comparison to silence this warning
                    } else if ((col == COL_OPTION)) {
                               ~    ^            ~
    scripts/kconfig/gconf.c:917:19: note: use '=' to turn this equality
    comparison into an assignment
                    } else if ((col == COL_OPTION)) {
                                    ^~
                                    =

Silence this warning by removing a level of parentheses.

Signed-off-by: Nicolas Iooss <nicolas.iooss_linux@m4x.org>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 scripts/kconfig/gconf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c
index 26d208b435a0d3..cfddddb9c9d722 100644
--- a/scripts/kconfig/gconf.c
+++ b/scripts/kconfig/gconf.c
@@ -914,7 +914,7 @@ on_treeview2_button_press_event(GtkWidget * widget,
 			current = menu;
 			display_tree_part();
 			gtk_widget_set_sensitive(back_btn, TRUE);
-		} else if ((col == COL_OPTION)) {
+		} else if (col == COL_OPTION) {
 			toggle_sym_value(menu);
 			gtk_tree_view_expand_row(view, path, TRUE);
 		}

From 713cc9df6473f0cc8d699987d990482d432c0679 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 21 Mar 2017 18:04:26 +0000
Subject: [PATCH 1562/1911] arm64: compat: Update compat syscalls

Hook up three pkey syscalls (which we don't implement) and the new statx
syscall, as has been done for arch/arm/.

Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/unistd.h   | 2 +-
 arch/arm64/include/asm/unistd32.h | 8 ++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index e78ac26324bd80..bdbeb06dc11ede 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -44,7 +44,7 @@
 #define __ARM_NR_compat_cacheflush	(__ARM_NR_COMPAT_BASE+2)
 #define __ARM_NR_compat_set_tls		(__ARM_NR_COMPAT_BASE+5)
 
-#define __NR_compat_syscalls		394
+#define __NR_compat_syscalls		398
 #endif
 
 #define __ARCH_WANT_SYS_CLONE
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index b7e8ef16ff0dc6..c66b51aab19588 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -809,6 +809,14 @@ __SYSCALL(__NR_copy_file_range, sys_copy_file_range)
 __SYSCALL(__NR_preadv2, compat_sys_preadv2)
 #define __NR_pwritev2 393
 __SYSCALL(__NR_pwritev2, compat_sys_pwritev2)
+#define __NR_pkey_mprotect 394
+__SYSCALL(__NR_pkey_mprotect, sys_pkey_mprotect)
+#define __NR_pkey_alloc 395
+__SYSCALL(__NR_pkey_alloc, sys_pkey_alloc)
+#define __NR_pkey_free 396
+__SYSCALL(__NR_pkey_free, sys_pkey_free)
+#define __NR_statx 397
+__SYSCALL(__NR_statx, sys_statx)
 
 /*
  * Please add new compat syscalls above this comment and update

From 65b1adebfe43c642dfe3b109edb5d992db5fbe72 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Tue, 21 Mar 2017 13:19:09 -0600
Subject: [PATCH 1563/1911] vfio: Rework group release notifier warning

The intent of the original warning is make sure that the mdev vendor
driver has removed any group notifiers at the point where the group
is closed by the user.  Theoretically this would be through an
orderly shutdown where any devices are release prior to the group
release.  We can't always count on an orderly shutdown, the user can
close the group before the notifier can be removed or the user task
might be killed.  We'd like to add this sanity test when the group is
idle and the only references are from the devices within the group
themselves, but we don't have a good way to do that.  Instead check
both when the group itself is removed and when the group is opened.
A bit later than we'd prefer, but better than the current over
aggressive approach.

Fixes: ccd46dbae77d ("vfio: support notifier chain in vfio_group")
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Cc: <stable@vger.kernel.org> # v4.10
Cc: Jike Song <jike.song@intel.com>
---
 drivers/vfio/vfio.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 609f4f982c74c5..561084ab387f3f 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -403,6 +403,7 @@ static void vfio_group_release(struct kref *kref)
 	struct iommu_group *iommu_group = group->iommu_group;
 
 	WARN_ON(!list_empty(&group->device_list));
+	WARN_ON(group->notifier.head);
 
 	list_for_each_entry_safe(unbound, tmp,
 				 &group->unbound_list, unbound_next) {
@@ -1573,6 +1574,10 @@ static int vfio_group_fops_open(struct inode *inode, struct file *filep)
 		return -EBUSY;
 	}
 
+	/* Warn if previous user didn't cleanup and re-init to drop them */
+	if (WARN_ON(group->notifier.head))
+		BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier);
+
 	filep->private_data = group;
 
 	return 0;
@@ -1584,9 +1589,6 @@ static int vfio_group_fops_release(struct inode *inode, struct file *filep)
 
 	filep->private_data = NULL;
 
-	/* Any user didn't unregister? */
-	WARN_ON(group->notifier.head);
-
 	vfio_group_try_dissolve_container(group);
 
 	atomic_dec(&group->opened);

From fb2155e3c30dc2043b52020e26965067a3e7779c Mon Sep 17 00:00:00 2001
From: Matt Redfearn <matt.redfearn@imgtec.com>
Date: Tue, 21 Mar 2017 14:39:19 +0000
Subject: [PATCH 1564/1911] MIPS: smp-cps: Fix retrieval of VPE mask on big
 endian CPUs

The vpe_mask member of struct core_boot_config is of type atomic_t,
which is a 32bit type. In cps-vec.S this member was being retrieved by a
PTR_L macro, which on 64bit systems is a 64bit load. On little endian
systems this is OK, since the double word that is retrieved will have
the required less significant word in the correct position. However, on
big endian systems the less significant word of the load is retrieved
from address+4, and the more significant from address+0. The destination
register therefore ends up with the required word in the more
significant word
e.g. when starting the second VP of a big endian 64bit system, the load

PTR_L    ta2, COREBOOTCFG_VPEMASK(a0)

ends up setting register ta2 to 0x0000000300000000

When this value is written to the CPC it is ignored, since it is
invalid to write anything larger than 4 bits. This results in any VP
other than VP0 in a core failing to start in 64bit big endian systems.

Change the load to a 32bit load word instruction to fix the bug.

Fixes: f12401d7219f ("MIPS: smp-cps: Pull boot config retrieval out of mips_cps_boot_vpes")
Signed-off-by: Matt Redfearn <matt.redfearn@imgtec.com>
Cc: Paul Burton <paul.burton@imgtec.com>
Cc: linux-mips@linux-mips.org
Cc: linux-kernel@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/15787/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/cps-vec.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/mips/kernel/cps-vec.S b/arch/mips/kernel/cps-vec.S
index 59476a607adda0..a00e87b0256d3d 100644
--- a/arch/mips/kernel/cps-vec.S
+++ b/arch/mips/kernel/cps-vec.S
@@ -361,7 +361,7 @@ LEAF(mips_cps_get_bootcfg)
 	END(mips_cps_get_bootcfg)
 
 LEAF(mips_cps_boot_vpes)
-	PTR_L	ta2, COREBOOTCFG_VPEMASK(a0)
+	lw	ta2, COREBOOTCFG_VPEMASK(a0)
 	PTR_L	ta3, COREBOOTCFG_VPECONFIG(a0)
 
 #if defined(CONFIG_CPU_MIPSR6)

From 6ef90877eee63a0d03e83183bb44b64229b624e6 Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens <hauke@hauke-m.de>
Date: Wed, 15 Mar 2017 23:26:42 +0100
Subject: [PATCH 1565/1911] MIPS: Lantiq: fix missing xbar kernel panic

Commit 08b3c894e565 ("MIPS: lantiq: Disable xbar fpi burst mode")
accidentally requested the resources from the pmu address region
instead of the xbar registers region, but the check for the return
value of request_mem_region() was wrong. Commit 98ea51cb0c8c ("MIPS:
Lantiq: Fix another request_mem_region() return code check") fixed the
check of the return value of request_mem_region() which made the kernel
panics.
This patch now makes use of the correct memory region for the cross bar.

Fixes: 08b3c894e565 ("MIPS: lantiq: Disable xbar fpi burst mode")
Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Cc: John Crispin <john@phrozen.org>
Cc: james.hogan@imgtec.com
Cc: arnd@arndb.de
Cc: sergei.shtylyov@cogentembedded.com
Cc: john@phrozen.org
Cc: <stable@vger.kernel.org> # 4.4.x-
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/15751
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/lantiq/xway/sysctrl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/mips/lantiq/xway/sysctrl.c b/arch/mips/lantiq/xway/sysctrl.c
index 3c3aa05891dd78..95bec460b651fd 100644
--- a/arch/mips/lantiq/xway/sysctrl.c
+++ b/arch/mips/lantiq/xway/sysctrl.c
@@ -467,7 +467,7 @@ void __init ltq_soc_init(void)
 
 		if (!np_xbar)
 			panic("Failed to load xbar nodes from devicetree");
-		if (of_address_to_resource(np_pmu, 0, &res_xbar))
+		if (of_address_to_resource(np_xbar, 0, &res_xbar))
 			panic("Failed to get xbar resources");
 		if (!request_mem_region(res_xbar.start, resource_size(&res_xbar),
 			res_xbar.name))

From 033cffeedbd11c140952b98e8639bf652091a17d Mon Sep 17 00:00:00 2001
From: Huacai Chen <chenhc@lemote.com>
Date: Thu, 16 Mar 2017 21:00:25 +0800
Subject: [PATCH 1566/1911] MIPS: Add MIPS_CPU_FTLB for Loongson-3A R2

Loongson-3A R2 and newer CPU have FTLB, but Config0.MT is 1, so add
MIPS_CPU_FTLB to the CPU options.

Signed-off-by: Huacai Chen <chenhc@lemote.com>
Cc: John Crispin <john@phrozen.org>
Cc: Steven J . Hill <Steven.Hill@caviumnetworks.com>
Cc: Fuxin Zhang <zhangfx@lemote.com>
Cc: Zhangjin Wu <wuzhangjin@gmail.com>
Cc: linux-mips@linux-mips.org
Cc: stable@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/15752/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/cpu-probe.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index 07718bb5fc9d86..12422fd4af2335 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -1824,7 +1824,7 @@ static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu)
 		}
 
 		decode_configs(c);
-		c->options |= MIPS_CPU_TLBINV | MIPS_CPU_LDPTE;
+		c->options |= MIPS_CPU_FTLB | MIPS_CPU_TLBINV | MIPS_CPU_LDPTE;
 		c->writecombine = _CACHE_UNCACHED_ACCELERATED;
 		break;
 	default:

From 5a34133167dce36666ea054e30a561b7f4413b7f Mon Sep 17 00:00:00 2001
From: Huacai Chen <chenhc@lemote.com>
Date: Thu, 16 Mar 2017 21:00:26 +0800
Subject: [PATCH 1567/1911] MIPS: Check TLB before handle_ri_rdhwr() for
 Loongson-3

Loongson-3's micro TLB (ITLB) is not strictly a subset of JTLB. That
means: when a JTLB entry is replaced by hardware, there may be an old
valid entry exists in ITLB. So, a TLB miss exception may occur while
handle_ri_rdhwr() is running because it try to access EPC's content.
However, handle_ri_rdhwr() doesn't clear EXL, which makes a TLB Refill
exception be treated as a TLB Invalid exception and tlbp may fail. In
this case, if FTLB (which is usually set-associative instead of set-
associative) is enabled, a tlbp failure will cause an invalid tlbwi,
which will hang the whole system.

This patch rename handle_ri_rdhwr_vivt to handle_ri_rdhwr_tlbp and use
it for Loongson-3. It try to solve the same problem described as below,
but more straightforwards.

https://patchwork.linux-mips.org/patch/12591/

I think Loongson-2 has the same problem, but it has no FTLB, so we just
keep it as is.

Signed-off-by: Huacai Chen <chenhc@lemote.com>
Cc: Rui Wang <wangr@lemote.com>
Cc: John Crispin <john@phrozen.org>
Cc: Steven J . Hill <Steven.Hill@caviumnetworks.com>
Cc: Fuxin Zhang <zhangfx@lemote.com>
Cc: Zhangjin Wu <wuzhangjin@gmail.com>
Cc: Huacai Chen <chenhc@lemote.com>
Cc: linux-mips@linux-mips.org
Cc: stable@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/15753/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/genex.S |  4 ++--
 arch/mips/kernel/traps.c | 17 +++++++++++++----
 2 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S
index 7ec9612cb0078a..2ac6c2625c13d4 100644
--- a/arch/mips/kernel/genex.S
+++ b/arch/mips/kernel/genex.S
@@ -519,7 +519,7 @@ NESTED(nmi_handler, PT_SIZE, sp)
 	BUILD_HANDLER reserved reserved sti verbose	/* others */
 
 	.align	5
-	LEAF(handle_ri_rdhwr_vivt)
+	LEAF(handle_ri_rdhwr_tlbp)
 	.set	push
 	.set	noat
 	.set	noreorder
@@ -538,7 +538,7 @@ NESTED(nmi_handler, PT_SIZE, sp)
 	.set	pop
 	bltz	k1, handle_ri	/* slow path */
 	/* fall thru */
-	END(handle_ri_rdhwr_vivt)
+	END(handle_ri_rdhwr_tlbp)
 
 	LEAF(handle_ri_rdhwr)
 	.set	push
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index c7d17cfb32f678..b49e7bf9f95023 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -83,7 +83,7 @@ extern asmlinkage void handle_dbe(void);
 extern asmlinkage void handle_sys(void);
 extern asmlinkage void handle_bp(void);
 extern asmlinkage void handle_ri(void);
-extern asmlinkage void handle_ri_rdhwr_vivt(void);
+extern asmlinkage void handle_ri_rdhwr_tlbp(void);
 extern asmlinkage void handle_ri_rdhwr(void);
 extern asmlinkage void handle_cpu(void);
 extern asmlinkage void handle_ov(void);
@@ -2408,9 +2408,18 @@ void __init trap_init(void)
 
 	set_except_vector(EXCCODE_SYS, handle_sys);
 	set_except_vector(EXCCODE_BP, handle_bp);
-	set_except_vector(EXCCODE_RI, rdhwr_noopt ? handle_ri :
-			  (cpu_has_vtag_icache ?
-			   handle_ri_rdhwr_vivt : handle_ri_rdhwr));
+
+	if (rdhwr_noopt)
+		set_except_vector(EXCCODE_RI, handle_ri);
+	else {
+		if (cpu_has_vtag_icache)
+			set_except_vector(EXCCODE_RI, handle_ri_rdhwr_tlbp);
+		else if (current_cpu_type() == CPU_LOONGSON3)
+			set_except_vector(EXCCODE_RI, handle_ri_rdhwr_tlbp);
+		else
+			set_except_vector(EXCCODE_RI, handle_ri_rdhwr);
+	}
+
 	set_except_vector(EXCCODE_CPU, handle_cpu);
 	set_except_vector(EXCCODE_OV, handle_ov);
 	set_except_vector(EXCCODE_TR, handle_tr);

From 0115f6cbf26663c86496bc56eeea293f85b77897 Mon Sep 17 00:00:00 2001
From: Huacai Chen <chenhc@lemote.com>
Date: Thu, 16 Mar 2017 21:00:27 +0800
Subject: [PATCH 1568/1911] MIPS: Flush wrong invalid FTLB entry for huge page

On VTLB+FTLB platforms (such as Loongson-3A R2), FTLB's pagesize is
usually configured the same as PAGE_SIZE. In such a case, Huge page
entry is not suitable to write in FTLB.

Unfortunately, when a huge page is created, its page table entries
haven't created immediately. Then the TLB refill handler will fetch an
invalid page table entry which has no "HUGE" bit, and this entry may be
written to FTLB. Since it is invalid, TLB load/store handler will then
use tlbwi to write the valid entry at the same place. However, the
valid entry is a huge page entry which isn't suitable for FTLB.

Our solution is to modify build_huge_handler_tail. Flush the invalid
old entry (whether it is in FTLB or VTLB, this is in order to reduce
branches) and use tlbwr to write the valid new entry.

Signed-off-by: Rui Wang <wangr@lemote.com>
Signed-off-by: Huacai Chen <chenhc@lemote.com>
Cc: John Crispin <john@phrozen.org>
Cc: Steven J . Hill <Steven.Hill@caviumnetworks.com>
Cc: Fuxin Zhang <zhangfx@lemote.com>
Cc: Zhangjin Wu <wuzhangjin@gmail.com>
Cc: Huacai Chen <chenhc@lemote.com>
Cc: linux-mips@linux-mips.org
Cc: stable@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/15754/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/mm/tlbex.c | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index 9bfee8988eaf11..4f642e07c2b198 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c
@@ -760,7 +760,8 @@ static void build_huge_update_entries(u32 **p, unsigned int pte,
 static void build_huge_handler_tail(u32 **p, struct uasm_reloc **r,
 				    struct uasm_label **l,
 				    unsigned int pte,
-				    unsigned int ptr)
+				    unsigned int ptr,
+				    unsigned int flush)
 {
 #ifdef CONFIG_SMP
 	UASM_i_SC(p, pte, 0, ptr);
@@ -769,6 +770,22 @@ static void build_huge_handler_tail(u32 **p, struct uasm_reloc **r,
 #else
 	UASM_i_SW(p, pte, 0, ptr);
 #endif
+	if (cpu_has_ftlb && flush) {
+		BUG_ON(!cpu_has_tlbinv);
+
+		UASM_i_MFC0(p, ptr, C0_ENTRYHI);
+		uasm_i_ori(p, ptr, ptr, MIPS_ENTRYHI_EHINV);
+		UASM_i_MTC0(p, ptr, C0_ENTRYHI);
+		build_tlb_write_entry(p, l, r, tlb_indexed);
+
+		uasm_i_xori(p, ptr, ptr, MIPS_ENTRYHI_EHINV);
+		UASM_i_MTC0(p, ptr, C0_ENTRYHI);
+		build_huge_update_entries(p, pte, ptr);
+		build_huge_tlb_write_entry(p, l, r, pte, tlb_random, 0);
+
+		return;
+	}
+
 	build_huge_update_entries(p, pte, ptr);
 	build_huge_tlb_write_entry(p, l, r, pte, tlb_indexed, 0);
 }
@@ -2199,7 +2216,7 @@ static void build_r4000_tlb_load_handler(void)
 		uasm_l_tlbl_goaround2(&l, p);
 	}
 	uasm_i_ori(&p, wr.r1, wr.r1, (_PAGE_ACCESSED | _PAGE_VALID));
-	build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2);
+	build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2, 1);
 #endif
 
 	uasm_l_nopage_tlbl(&l, p);
@@ -2254,7 +2271,7 @@ static void build_r4000_tlb_store_handler(void)
 	build_tlb_probe_entry(&p);
 	uasm_i_ori(&p, wr.r1, wr.r1,
 		   _PAGE_ACCESSED | _PAGE_MODIFIED | _PAGE_VALID | _PAGE_DIRTY);
-	build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2);
+	build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2, 1);
 #endif
 
 	uasm_l_nopage_tlbs(&l, p);
@@ -2310,7 +2327,7 @@ static void build_r4000_tlb_modify_handler(void)
 	build_tlb_probe_entry(&p);
 	uasm_i_ori(&p, wr.r1, wr.r1,
 		   _PAGE_ACCESSED | _PAGE_MODIFIED | _PAGE_VALID | _PAGE_DIRTY);
-	build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2);
+	build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2, 0);
 #endif
 
 	uasm_l_nopage_tlbm(&l, p);

From 093b995e3b55a0ae0670226ddfcb05bfbf0099ae Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Mon, 20 Mar 2017 14:26:42 +0800
Subject: [PATCH 1569/1911] mm, swap: Remove WARN_ON_ONCE() in free_swap_slot()

Before commit 452b94b8c8c7 ("mm/swap: don't BUG_ON() due to
uninitialized swap slot cache"), the following bug is reported,

  ------------[ cut here ]------------
  kernel BUG at mm/swap_slots.c:270!
  invalid opcode: 0000 [#1] SMP
  CPU: 5 PID: 1745 Comm: (sd-pam) Not tainted 4.11.0-rc1-00243-g24c534bb161b #1
  Hardware name: System manufacturer System Product Name/Z170-K, BIOS 1803 05/06/2016
  RIP: 0010:free_swap_slot+0xba/0xd0
  Call Trace:
   swap_free+0x36/0x40
   do_swap_page+0x360/0x6d0
   __handle_mm_fault+0x880/0x1080
   handle_mm_fault+0xd0/0x240
   __do_page_fault+0x232/0x4d0
   do_page_fault+0x20/0x70
   page_fault+0x22/0x30
  ---[ end trace aefc9ede53e0ab21 ]---

This is raised by the BUG_ON(!swap_slot_cache_initialized) in
free_swap_slot().  This is incorrect, because even if the swap slots
cache fails to be initialized, the swap should operate properly without
the swap slots cache.  And the use_swap_slot_cache check later in the
function will protect the uninitialized swap slots cache case.

In commit 452b94b8c8c7, the BUG_ON() is replaced by WARN_ON_ONCE().  In
the patch, the WARN_ON_ONCE() is removed too.

Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Acked-by: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Michal Hocko <mhocko@suse.com>
Signed-off-by: "Huang, Ying" <ying.huang@intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/swap_slots.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/mm/swap_slots.c b/mm/swap_slots.c
index 7ebb23836f689e..b1ccb58ad39740 100644
--- a/mm/swap_slots.c
+++ b/mm/swap_slots.c
@@ -267,8 +267,6 @@ int free_swap_slot(swp_entry_t entry)
 {
 	struct swap_slots_cache *cache;
 
-	WARN_ON_ONCE(!swap_slot_cache_initialized);
-
 	cache = &get_cpu_var(swp_slots);
 	if (use_swap_slot_cache && cache->slots_ret) {
 		spin_lock_irq(&cache->free_lock);

From 64897b20eed29cee2b998fb5ba362e65523dba3c Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 21 Mar 2017 22:19:07 +0100
Subject: [PATCH 1570/1911] cpufreq: intel_pstate: Fix policy data management
 in passive mode

The policy->cpuinfo.max_freq and policy->max updates in
intel_cpufreq_turbo_update() are excessive as they are done for no
good reason and may lead to problems in principle, so they should be
dropped.  However, after dropping them intel_cpufreq_turbo_update()
becomes almost entirely pointless, because the check made by it is
made again down the road in intel_pstate_prepare_request().  The
only thing in it that still needs to be done is the call to
update_turbo_state(), so drop intel_cpufreq_turbo_update() altogether
and make its callers invoke update_turbo_state() directly instead of
it.

In addition to that, fix intel_cpufreq_verify_policy() so that it
checks global.no_turbo in addition to global.turbo_disabled when
updating policy->cpuinfo.max_freq to make it consistent with
intel_pstate_verify_policy().

Fixes: 001c76f05b01 (cpufreq: intel_pstate: Generic governors support)
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 29 ++++++-----------------------
 1 file changed, 6 insertions(+), 23 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 7b07803e704256..283491f742d3d7 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -2257,7 +2257,7 @@ static int intel_cpufreq_verify_policy(struct cpufreq_policy *policy)
 	struct cpudata *cpu = all_cpu_data[policy->cpu];
 
 	update_turbo_state();
-	policy->cpuinfo.max_freq = global.turbo_disabled ?
+	policy->cpuinfo.max_freq = global.no_turbo || global.turbo_disabled ?
 			cpu->pstate.max_freq : cpu->pstate.turbo_freq;
 
 	cpufreq_verify_within_cpu_limits(policy);
@@ -2265,26 +2265,6 @@ static int intel_cpufreq_verify_policy(struct cpufreq_policy *policy)
 	return 0;
 }
 
-static unsigned int intel_cpufreq_turbo_update(struct cpudata *cpu,
-					       struct cpufreq_policy *policy,
-					       unsigned int target_freq)
-{
-	unsigned int max_freq;
-
-	update_turbo_state();
-
-	max_freq = global.no_turbo || global.turbo_disabled ?
-			cpu->pstate.max_freq : cpu->pstate.turbo_freq;
-	policy->cpuinfo.max_freq = max_freq;
-	if (policy->max > max_freq)
-		policy->max = max_freq;
-
-	if (target_freq > max_freq)
-		target_freq = max_freq;
-
-	return target_freq;
-}
-
 static int intel_cpufreq_target(struct cpufreq_policy *policy,
 				unsigned int target_freq,
 				unsigned int relation)
@@ -2293,8 +2273,10 @@ static int intel_cpufreq_target(struct cpufreq_policy *policy,
 	struct cpufreq_freqs freqs;
 	int target_pstate;
 
+	update_turbo_state();
+
 	freqs.old = policy->cur;
-	freqs.new = intel_cpufreq_turbo_update(cpu, policy, target_freq);
+	freqs.new = target_freq;
 
 	cpufreq_freq_transition_begin(policy, &freqs);
 	switch (relation) {
@@ -2326,7 +2308,8 @@ static unsigned int intel_cpufreq_fast_switch(struct cpufreq_policy *policy,
 	struct cpudata *cpu = all_cpu_data[policy->cpu];
 	int target_pstate;
 
-	target_freq = intel_cpufreq_turbo_update(cpu, policy, target_freq);
+	update_turbo_state();
+
 	target_pstate = DIV_ROUND_UP(target_freq, cpu->pstate.scaling);
 	target_pstate = intel_pstate_prepare_request(cpu, target_pstate);
 	intel_pstate_update_pstate(cpu, target_pstate);

From ad0a45fd9c14feebd000b6e84189d0edff265170 Mon Sep 17 00:00:00 2001
From: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Date: Sun, 19 Mar 2017 00:51:59 +0530
Subject: [PATCH 1571/1911] cpuidle: Validate cpu_dev in cpuidle_add_sysfs()

If a given cpu is not in cpu_present and cpu hotplug
is disabled, arch can skip setting up the cpu_dev.

Arch cpuidle driver should pass correct cpu mask
for registration, but failing to do so by the driver
causes error to propagate and crash like this:

[   30.076045] Unable to handle kernel paging request for data at address 0x00000048
[   30.076100] Faulting instruction address: 0xc0000000007b2f30
cpu 0x4d: Vector: 300 (Data Access) at [c000003feb18b670]
    pc: c0000000007b2f30: kobject_get+0x20/0x70
    lr: c0000000007b3c94: kobject_add_internal+0x54/0x3f0
    sp: c000003feb18b8f0
   msr: 9000000000009033
   dar: 48
 dsisr: 40000000
  current = 0xc000003fd2ed8300
  paca    = 0xc00000000fbab500   softe: 0        irq_happened: 0x01
    pid   = 1, comm = swapper/0
Linux version 4.11.0-rc2-svaidy+ (sv@sagarika) (gcc version 6.2.0
20161005 (Ubuntu 6.2.0-5ubuntu12) ) #10 SMP Sun Mar 19 00:08:09 IST 2017
enter ? for help
[c000003feb18b960] c0000000007b3c94 kobject_add_internal+0x54/0x3f0
[c000003feb18b9f0] c0000000007b43a4 kobject_init_and_add+0x64/0xa0
[c000003feb18ba70] c000000000e284f4 cpuidle_add_sysfs+0xb4/0x130
[c000003feb18baf0] c000000000e26038 cpuidle_register_device+0x118/0x1c0
[c000003feb18bb30] c000000000e26c48 cpuidle_register+0x78/0x120
[c000003feb18bbc0] c00000000168fd9c powernv_processor_idle_init+0x110/0x1c4
[c000003feb18bc40] c00000000000cff8 do_one_initcall+0x68/0x1d0
[c000003feb18bd00] c0000000016242f4 kernel_init_freeable+0x280/0x360
[c000003feb18bdc0] c00000000000d864 kernel_init+0x24/0x160
[c000003feb18be30] c00000000000b4e8 ret_from_kernel_thread+0x5c/0x74

Validating cpu_dev fixes the crash and reports correct error message like:

[   30.163506] Failed to register cpuidle device for cpu136
[   30.173329] Registration of powernv driver failed.

Signed-off-by: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
[ rjw: Comment massage ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/sysfs.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c
index c5adc8c9ac43af..ae948b1da93a37 100644
--- a/drivers/cpuidle/sysfs.c
+++ b/drivers/cpuidle/sysfs.c
@@ -615,6 +615,18 @@ int cpuidle_add_sysfs(struct cpuidle_device *dev)
 	struct device *cpu_dev = get_cpu_device((unsigned long)dev->cpu);
 	int error;
 
+	/*
+	 * Return if cpu_device is not setup for this CPU.
+	 *
+	 * This could happen if the arch did not set up cpu_device
+	 * since this CPU is not in cpu_present mask and the
+	 * driver did not send a correct CPU mask during registration.
+	 * Without this check we would end up passing bogus
+	 * value for &cpu_dev->kobj in kobject_init_and_add()
+	 */
+	if (!cpu_dev)
+		return -ENODEV;
+
 	kdev = kzalloc(sizeof(*kdev), GFP_KERNEL);
 	if (!kdev)
 		return -ENOMEM;

From 98d068ab52b4b11d403995ed14154660797e7136 Mon Sep 17 00:00:00 2001
From: hayeswang <hayeswang@realtek.com>
Date: Tue, 14 Mar 2017 14:15:20 +0800
Subject: [PATCH 1572/1911] r8152: fix the list rx_done may be used without
 initialization

The list rx_done would be initialized when the linking on occurs.
Therefore, if a napi is scheduled without any linking on before,
the following kernel panic would happen.

	BUG: unable to handle kernel NULL pointer dereference at 000000000000008
	IP: [<ffffffffc085efde>] r8152_poll+0xe1e/0x1210 [r8152]
	PGD 0
	Oops: 0002 [#1] SMP

Signed-off-by: Hayes Wang <hayeswang@realtek.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/r8152.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index 986243c932ccd6..bb3eedd07fbe56 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -1362,6 +1362,7 @@ static int alloc_all_mem(struct r8152 *tp)
 	spin_lock_init(&tp->rx_lock);
 	spin_lock_init(&tp->tx_lock);
 	INIT_LIST_HEAD(&tp->tx_free);
+	INIT_LIST_HEAD(&tp->rx_done);
 	skb_queue_head_init(&tp->tx_queue);
 	skb_queue_head_init(&tp->rx_queue);
 

From 8a0f5ccfb33b0b8b51de65b7b3bf342ba10b4fb6 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 14 Mar 2017 18:25:57 +0800
Subject: [PATCH 1573/1911] crypto: deadlock between
 crypto_alg_sem/rtnl_mutex/genl_mutex

On Tue, Mar 14, 2017 at 10:44:10AM +0100, Dmitry Vyukov wrote:
>
> Yes, please.
> Disregarding some reports is not a good way long term.

Please try this patch.

---8<---
Subject: netlink: Annotate nlk cb_mutex by protocol

Currently all occurences of nlk->cb_mutex are annotated by lockdep
as a single class.  This causes a false lcokdep cycle involving
genl and crypto_user.

This patch fixes it by dividing cb_mutex into individual classes
based on the netlink protocol.  As genl and crypto_user do not
use the same netlink protocol this breaks the false dependency
loop.

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/af_netlink.c | 41 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 7b73c7c161a968..596eaff66649e5 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -96,6 +96,44 @@ EXPORT_SYMBOL_GPL(nl_table);
 
 static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait);
 
+static struct lock_class_key nlk_cb_mutex_keys[MAX_LINKS];
+
+static const char *const nlk_cb_mutex_key_strings[MAX_LINKS + 1] = {
+	"nlk_cb_mutex-ROUTE",
+	"nlk_cb_mutex-1",
+	"nlk_cb_mutex-USERSOCK",
+	"nlk_cb_mutex-FIREWALL",
+	"nlk_cb_mutex-SOCK_DIAG",
+	"nlk_cb_mutex-NFLOG",
+	"nlk_cb_mutex-XFRM",
+	"nlk_cb_mutex-SELINUX",
+	"nlk_cb_mutex-ISCSI",
+	"nlk_cb_mutex-AUDIT",
+	"nlk_cb_mutex-FIB_LOOKUP",
+	"nlk_cb_mutex-CONNECTOR",
+	"nlk_cb_mutex-NETFILTER",
+	"nlk_cb_mutex-IP6_FW",
+	"nlk_cb_mutex-DNRTMSG",
+	"nlk_cb_mutex-KOBJECT_UEVENT",
+	"nlk_cb_mutex-GENERIC",
+	"nlk_cb_mutex-17",
+	"nlk_cb_mutex-SCSITRANSPORT",
+	"nlk_cb_mutex-ECRYPTFS",
+	"nlk_cb_mutex-RDMA",
+	"nlk_cb_mutex-CRYPTO",
+	"nlk_cb_mutex-SMC",
+	"nlk_cb_mutex-23",
+	"nlk_cb_mutex-24",
+	"nlk_cb_mutex-25",
+	"nlk_cb_mutex-26",
+	"nlk_cb_mutex-27",
+	"nlk_cb_mutex-28",
+	"nlk_cb_mutex-29",
+	"nlk_cb_mutex-30",
+	"nlk_cb_mutex-31",
+	"nlk_cb_mutex-MAX_LINKS"
+};
+
 static int netlink_dump(struct sock *sk);
 static void netlink_skb_destructor(struct sk_buff *skb);
 
@@ -585,6 +623,9 @@ static int __netlink_create(struct net *net, struct socket *sock,
 	} else {
 		nlk->cb_mutex = &nlk->cb_def_mutex;
 		mutex_init(nlk->cb_mutex);
+		lockdep_set_class_and_name(nlk->cb_mutex,
+					   nlk_cb_mutex_keys + protocol,
+					   nlk_cb_mutex_key_strings[protocol]);
 	}
 	init_waitqueue_head(&nlk->wait);
 

From 36d277bac8080202684e67162ebb157f16631581 Mon Sep 17 00:00:00 2001
From: Peng Tao <bergwolf@gmail.com>
Date: Wed, 15 Mar 2017 09:32:14 +0800
Subject: [PATCH 1574/1911] vsock: track pkt owner vsock

So that we can cancel a queued pkt later if necessary.

Signed-off-by: Peng Tao <bergwolf@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/virtio_vsock.h            | 3 +++
 net/vmw_vsock/virtio_transport_common.c | 7 +++++++
 2 files changed, 10 insertions(+)

diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h
index 9638bfeb0d1f63..584f9a647ad4ac 100644
--- a/include/linux/virtio_vsock.h
+++ b/include/linux/virtio_vsock.h
@@ -48,6 +48,8 @@ struct virtio_vsock_pkt {
 	struct virtio_vsock_hdr	hdr;
 	struct work_struct work;
 	struct list_head list;
+	/* socket refcnt not held, only use for cancellation */
+	struct vsock_sock *vsk;
 	void *buf;
 	u32 len;
 	u32 off;
@@ -56,6 +58,7 @@ struct virtio_vsock_pkt {
 
 struct virtio_vsock_pkt_info {
 	u32 remote_cid, remote_port;
+	struct vsock_sock *vsk;
 	struct msghdr *msg;
 	u32 pkt_len;
 	u16 type;
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 8d592a45b59786..af087b44ceea23 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -58,6 +58,7 @@ virtio_transport_alloc_pkt(struct virtio_vsock_pkt_info *info,
 	pkt->len		= len;
 	pkt->hdr.len		= cpu_to_le32(len);
 	pkt->reply		= info->reply;
+	pkt->vsk		= info->vsk;
 
 	if (info->msg && len > 0) {
 		pkt->buf = kmalloc(len, GFP_KERNEL);
@@ -180,6 +181,7 @@ static int virtio_transport_send_credit_update(struct vsock_sock *vsk,
 	struct virtio_vsock_pkt_info info = {
 		.op = VIRTIO_VSOCK_OP_CREDIT_UPDATE,
 		.type = type,
+		.vsk = vsk,
 	};
 
 	return virtio_transport_send_pkt_info(vsk, &info);
@@ -519,6 +521,7 @@ int virtio_transport_connect(struct vsock_sock *vsk)
 	struct virtio_vsock_pkt_info info = {
 		.op = VIRTIO_VSOCK_OP_REQUEST,
 		.type = VIRTIO_VSOCK_TYPE_STREAM,
+		.vsk = vsk,
 	};
 
 	return virtio_transport_send_pkt_info(vsk, &info);
@@ -534,6 +537,7 @@ int virtio_transport_shutdown(struct vsock_sock *vsk, int mode)
 			  VIRTIO_VSOCK_SHUTDOWN_RCV : 0) |
 			 (mode & SEND_SHUTDOWN ?
 			  VIRTIO_VSOCK_SHUTDOWN_SEND : 0),
+		.vsk = vsk,
 	};
 
 	return virtio_transport_send_pkt_info(vsk, &info);
@@ -560,6 +564,7 @@ virtio_transport_stream_enqueue(struct vsock_sock *vsk,
 		.type = VIRTIO_VSOCK_TYPE_STREAM,
 		.msg = msg,
 		.pkt_len = len,
+		.vsk = vsk,
 	};
 
 	return virtio_transport_send_pkt_info(vsk, &info);
@@ -581,6 +586,7 @@ static int virtio_transport_reset(struct vsock_sock *vsk,
 		.op = VIRTIO_VSOCK_OP_RST,
 		.type = VIRTIO_VSOCK_TYPE_STREAM,
 		.reply = !!pkt,
+		.vsk = vsk,
 	};
 
 	/* Send RST only if the original pkt is not a RST pkt */
@@ -826,6 +832,7 @@ virtio_transport_send_response(struct vsock_sock *vsk,
 		.remote_cid = le64_to_cpu(pkt->hdr.src_cid),
 		.remote_port = le32_to_cpu(pkt->hdr.src_port),
 		.reply = true,
+		.vsk = vsk,
 	};
 
 	return virtio_transport_send_pkt_info(vsk, &info);

From 16320f363ae128d9b9c70e60f00f2a572f57c23d Mon Sep 17 00:00:00 2001
From: Peng Tao <bergwolf@gmail.com>
Date: Wed, 15 Mar 2017 09:32:15 +0800
Subject: [PATCH 1575/1911] vhost-vsock: add pkt cancel capability

To allow canceling all packets of a connection.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Jorgen Hansen <jhansen@vmware.com>
Signed-off-by: Peng Tao <bergwolf@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/vhost/vsock.c  | 41 +++++++++++++++++++++++++++++++++++++++++
 include/net/af_vsock.h |  3 +++
 2 files changed, 44 insertions(+)

diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index ce5e63d2c66aac..44eed8eb0725b2 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -223,6 +223,46 @@ vhost_transport_send_pkt(struct virtio_vsock_pkt *pkt)
 	return len;
 }
 
+static int
+vhost_transport_cancel_pkt(struct vsock_sock *vsk)
+{
+	struct vhost_vsock *vsock;
+	struct virtio_vsock_pkt *pkt, *n;
+	int cnt = 0;
+	LIST_HEAD(freeme);
+
+	/* Find the vhost_vsock according to guest context id  */
+	vsock = vhost_vsock_get(vsk->remote_addr.svm_cid);
+	if (!vsock)
+		return -ENODEV;
+
+	spin_lock_bh(&vsock->send_pkt_list_lock);
+	list_for_each_entry_safe(pkt, n, &vsock->send_pkt_list, list) {
+		if (pkt->vsk != vsk)
+			continue;
+		list_move(&pkt->list, &freeme);
+	}
+	spin_unlock_bh(&vsock->send_pkt_list_lock);
+
+	list_for_each_entry_safe(pkt, n, &freeme, list) {
+		if (pkt->reply)
+			cnt++;
+		list_del(&pkt->list);
+		virtio_transport_free_pkt(pkt);
+	}
+
+	if (cnt) {
+		struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX];
+		int new_cnt;
+
+		new_cnt = atomic_sub_return(cnt, &vsock->queued_replies);
+		if (new_cnt + cnt >= tx_vq->num && new_cnt < tx_vq->num)
+			vhost_poll_queue(&tx_vq->poll);
+	}
+
+	return 0;
+}
+
 static struct virtio_vsock_pkt *
 vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq,
 		      unsigned int out, unsigned int in)
@@ -675,6 +715,7 @@ static struct virtio_transport vhost_transport = {
 		.release                  = virtio_transport_release,
 		.connect                  = virtio_transport_connect,
 		.shutdown                 = virtio_transport_shutdown,
+		.cancel_pkt               = vhost_transport_cancel_pkt,
 
 		.dgram_enqueue            = virtio_transport_dgram_enqueue,
 		.dgram_dequeue            = virtio_transport_dgram_dequeue,
diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index f2758964ce6f89..f32ed9ac181a47 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -100,6 +100,9 @@ struct vsock_transport {
 	void (*destruct)(struct vsock_sock *);
 	void (*release)(struct vsock_sock *);
 
+	/* Cancel all pending packets sent on vsock. */
+	int (*cancel_pkt)(struct vsock_sock *vsk);
+
 	/* Connections. */
 	int (*connect)(struct vsock_sock *);
 

From 073b4f2c50fe67c7c66a059a4d6db52bb1465490 Mon Sep 17 00:00:00 2001
From: Peng Tao <bergwolf@gmail.com>
Date: Wed, 15 Mar 2017 09:32:16 +0800
Subject: [PATCH 1576/1911] vsock: add pkt cancel capability

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Peng Tao <bergwolf@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/vmw_vsock/virtio_transport.c | 42 ++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)

diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 9d24c0e958b18e..68675a151f22b8 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -213,6 +213,47 @@ virtio_transport_send_pkt(struct virtio_vsock_pkt *pkt)
 	return len;
 }
 
+static int
+virtio_transport_cancel_pkt(struct vsock_sock *vsk)
+{
+	struct virtio_vsock *vsock;
+	struct virtio_vsock_pkt *pkt, *n;
+	int cnt = 0;
+	LIST_HEAD(freeme);
+
+	vsock = virtio_vsock_get();
+	if (!vsock) {
+		return -ENODEV;
+	}
+
+	spin_lock_bh(&vsock->send_pkt_list_lock);
+	list_for_each_entry_safe(pkt, n, &vsock->send_pkt_list, list) {
+		if (pkt->vsk != vsk)
+			continue;
+		list_move(&pkt->list, &freeme);
+	}
+	spin_unlock_bh(&vsock->send_pkt_list_lock);
+
+	list_for_each_entry_safe(pkt, n, &freeme, list) {
+		if (pkt->reply)
+			cnt++;
+		list_del(&pkt->list);
+		virtio_transport_free_pkt(pkt);
+	}
+
+	if (cnt) {
+		struct virtqueue *rx_vq = vsock->vqs[VSOCK_VQ_RX];
+		int new_cnt;
+
+		new_cnt = atomic_sub_return(cnt, &vsock->queued_replies);
+		if (new_cnt + cnt >= virtqueue_get_vring_size(rx_vq) &&
+		    new_cnt < virtqueue_get_vring_size(rx_vq))
+			queue_work(virtio_vsock_workqueue, &vsock->rx_work);
+	}
+
+	return 0;
+}
+
 static void virtio_vsock_rx_fill(struct virtio_vsock *vsock)
 {
 	int buf_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE;
@@ -462,6 +503,7 @@ static struct virtio_transport virtio_transport = {
 		.release                  = virtio_transport_release,
 		.connect                  = virtio_transport_connect,
 		.shutdown                 = virtio_transport_shutdown,
+		.cancel_pkt               = virtio_transport_cancel_pkt,
 
 		.dgram_bind               = virtio_transport_dgram_bind,
 		.dgram_dequeue            = virtio_transport_dgram_dequeue,

From 380feae0def7e6a115124a3219c3ec9b654dca32 Mon Sep 17 00:00:00 2001
From: Peng Tao <bergwolf@gmail.com>
Date: Wed, 15 Mar 2017 09:32:17 +0800
Subject: [PATCH 1577/1911] vsock: cancel packets when failing to connect

Otherwise we'll leave the packets queued until releasing vsock device.
E.g., if guest is slow to start up, resulting ETIMEDOUT on connect, guest
will get the connect requests from failed host sockets.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Jorgen Hansen <jhansen@vmware.com>
Signed-off-by: Peng Tao <bergwolf@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/vmw_vsock/af_vsock.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 9f770f33c10098..6f7f6757ceefb5 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1102,10 +1102,19 @@ static const struct proto_ops vsock_dgram_ops = {
 	.sendpage = sock_no_sendpage,
 };
 
+static int vsock_transport_cancel_pkt(struct vsock_sock *vsk)
+{
+	if (!transport->cancel_pkt)
+		return -EOPNOTSUPP;
+
+	return transport->cancel_pkt(vsk);
+}
+
 static void vsock_connect_timeout(struct work_struct *work)
 {
 	struct sock *sk;
 	struct vsock_sock *vsk;
+	int cancel = 0;
 
 	vsk = container_of(work, struct vsock_sock, dwork.work);
 	sk = sk_vsock(vsk);
@@ -1116,8 +1125,11 @@ static void vsock_connect_timeout(struct work_struct *work)
 		sk->sk_state = SS_UNCONNECTED;
 		sk->sk_err = ETIMEDOUT;
 		sk->sk_error_report(sk);
+		cancel = 1;
 	}
 	release_sock(sk);
+	if (cancel)
+		vsock_transport_cancel_pkt(vsk);
 
 	sock_put(sk);
 }
@@ -1224,11 +1236,13 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
 			err = sock_intr_errno(timeout);
 			sk->sk_state = SS_UNCONNECTED;
 			sock->state = SS_UNCONNECTED;
+			vsock_transport_cancel_pkt(vsk);
 			goto out_wait;
 		} else if (timeout == 0) {
 			err = -ETIMEDOUT;
 			sk->sk_state = SS_UNCONNECTED;
 			sock->state = SS_UNCONNECTED;
+			vsock_transport_cancel_pkt(vsk);
 			goto out_wait;
 		}
 

From 0be032c190abcdcfa948082b6a1e0d461184ba4d Mon Sep 17 00:00:00 2001
From: Huacai Chen <chenhc@lemote.com>
Date: Thu, 16 Mar 2017 21:00:29 +0800
Subject: [PATCH 1578/1911] MIPS: c-r4k: Fix Loongson-3's vcache/scache waysize
 calculation

If scache.waysize is 0, r4k___flush_cache_all() will do nothing and
then cause bugs. BTW, though vcache.waysize isn't being used by now,
we also fix its calculation.

Signed-off-by: Huacai Chen <chenhc@lemote.com>
Cc: John Crispin <john@phrozen.org>
Cc: Steven J . Hill <Steven.Hill@caviumnetworks.com>
Cc: Fuxin Zhang <zhangfx@lemote.com>
Cc: Zhangjin Wu <wuzhangjin@gmail.com>
Cc: linux-mips@linux-mips.org
Cc: stable@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/15756/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/mm/c-r4k.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
index e7f798d55fbcca..3fe99cb271a9ca 100644
--- a/arch/mips/mm/c-r4k.c
+++ b/arch/mips/mm/c-r4k.c
@@ -1562,6 +1562,7 @@ static void probe_vcache(void)
 	vcache_size = c->vcache.sets * c->vcache.ways * c->vcache.linesz;
 
 	c->vcache.waybit = 0;
+	c->vcache.waysize = vcache_size / c->vcache.ways;
 
 	pr_info("Unified victim cache %ldkB %s, linesize %d bytes.\n",
 		vcache_size >> 10, way_string[c->vcache.ways], c->vcache.linesz);
@@ -1664,6 +1665,7 @@ static void __init loongson3_sc_init(void)
 	/* Loongson-3 has 4 cores, 1MB scache for each. scaches are shared */
 	scache_size *= 4;
 	c->scache.waybit = 0;
+	c->scache.waysize = scache_size / c->scache.ways;
 	pr_info("Unified secondary cache %ldkB %s, linesize %d bytes.\n",
 	       scache_size >> 10, way_string[c->scache.ways], c->scache.linesz);
 	if (scache_size)

From 7df9c24625b9981779afb8fcdbe2bb4765e61147 Mon Sep 17 00:00:00 2001
From: Andrey Ulanov <andreyu@google.com>
Date: Tue, 14 Mar 2017 20:16:42 -0700
Subject: [PATCH 1579/1911] net: unix: properly re-increment inflight counter
 of GC discarded candidates

Dmitry has reported that a BUG_ON() condition in unix_notinflight()
may be triggered by a simple code that forwards unix socket in an
SCM_RIGHTS message.
That is caused by incorrect unix socket GC implementation in unix_gc().

The GC first collects list of candidates, then (a) decrements their
"children's" inflight counter, (b) checks which inflight counters are
now 0, and then (c) increments all inflight counters back.
(a) and (c) are done by calling scan_children() with inc_inflight or
dec_inflight as the second argument.

Commit 6209344f5a37 ("net: unix: fix inflight counting bug in garbage
collector") changed scan_children() such that it no longer considers
sockets that do not have UNIX_GC_CANDIDATE flag. It also added a block
of code that that unsets this flag _before_ invoking
scan_children(, dec_iflight, ). This may lead to incorrect inflight
counters for some sockets.

This change fixes this bug by changing order of operations:
UNIX_GC_CANDIDATE is now unset only after all inflight counters are
restored to the original state.

  kernel BUG at net/unix/garbage.c:149!
  RIP: 0010:[<ffffffff8717ebf4>]  [<ffffffff8717ebf4>]
  unix_notinflight+0x3b4/0x490 net/unix/garbage.c:149
  Call Trace:
   [<ffffffff8716cfbf>] unix_detach_fds.isra.19+0xff/0x170 net/unix/af_unix.c:1487
   [<ffffffff8716f6a9>] unix_destruct_scm+0xf9/0x210 net/unix/af_unix.c:1496
   [<ffffffff86a90a01>] skb_release_head_state+0x101/0x200 net/core/skbuff.c:655
   [<ffffffff86a9808a>] skb_release_all+0x1a/0x60 net/core/skbuff.c:668
   [<ffffffff86a980ea>] __kfree_skb+0x1a/0x30 net/core/skbuff.c:684
   [<ffffffff86a98284>] kfree_skb+0x184/0x570 net/core/skbuff.c:705
   [<ffffffff871789d5>] unix_release_sock+0x5b5/0xbd0 net/unix/af_unix.c:559
   [<ffffffff87179039>] unix_release+0x49/0x90 net/unix/af_unix.c:836
   [<ffffffff86a694b2>] sock_release+0x92/0x1f0 net/socket.c:570
   [<ffffffff86a6962b>] sock_close+0x1b/0x20 net/socket.c:1017
   [<ffffffff81a76b8e>] __fput+0x34e/0x910 fs/file_table.c:208
   [<ffffffff81a771da>] ____fput+0x1a/0x20 fs/file_table.c:244
   [<ffffffff81483ab0>] task_work_run+0x1a0/0x280 kernel/task_work.c:116
   [<     inline     >] exit_task_work include/linux/task_work.h:21
   [<ffffffff8141287a>] do_exit+0x183a/0x2640 kernel/exit.c:828
   [<ffffffff8141383e>] do_group_exit+0x14e/0x420 kernel/exit.c:931
   [<ffffffff814429d3>] get_signal+0x663/0x1880 kernel/signal.c:2307
   [<ffffffff81239b45>] do_signal+0xc5/0x2190 arch/x86/kernel/signal.c:807
   [<ffffffff8100666a>] exit_to_usermode_loop+0x1ea/0x2d0
  arch/x86/entry/common.c:156
   [<     inline     >] prepare_exit_to_usermode arch/x86/entry/common.c:190
   [<ffffffff81009693>] syscall_return_slowpath+0x4d3/0x570
  arch/x86/entry/common.c:259
   [<ffffffff881478e6>] entry_SYSCALL_64_fastpath+0xc4/0xc6

Link: https://lkml.org/lkml/2017/3/6/252
Signed-off-by: Andrey Ulanov <andreyu@google.com>
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Fixes: 6209344 ("net: unix: fix inflight counting bug in garbage collector")
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/unix/garbage.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index 6a0d48525fcf9a..c36757e728442b 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -146,6 +146,7 @@ void unix_notinflight(struct user_struct *user, struct file *fp)
 	if (s) {
 		struct unix_sock *u = unix_sk(s);
 
+		BUG_ON(!atomic_long_read(&u->inflight));
 		BUG_ON(list_empty(&u->link));
 
 		if (atomic_long_dec_and_test(&u->inflight))
@@ -341,6 +342,14 @@ void unix_gc(void)
 	}
 	list_del(&cursor);
 
+	/* Now gc_candidates contains only garbage.  Restore original
+	 * inflight counters for these as well, and remove the skbuffs
+	 * which are creating the cycle(s).
+	 */
+	skb_queue_head_init(&hitlist);
+	list_for_each_entry(u, &gc_candidates, link)
+		scan_children(&u->sk, inc_inflight, &hitlist);
+
 	/* not_cycle_list contains those sockets which do not make up a
 	 * cycle.  Restore these to the inflight list.
 	 */
@@ -350,14 +359,6 @@ void unix_gc(void)
 		list_move_tail(&u->link, &gc_inflight_list);
 	}
 
-	/* Now gc_candidates contains only garbage.  Restore original
-	 * inflight counters for these as well, and remove the skbuffs
-	 * which are creating the cycle(s).
-	 */
-	skb_queue_head_init(&hitlist);
-	list_for_each_entry(u, &gc_candidates, link)
-	scan_children(&u->sk, inc_inflight, &hitlist);
-
 	spin_unlock(&unix_gc_lock);
 
 	/* Here we are. Hitlist is filled. Die. */

From 09050957fae896e001498af1aa35c446a11cb47d Mon Sep 17 00:00:00 2001
From: Yaroslav Isakov <yaroslav.isakov@gmail.com>
Date: Thu, 16 Mar 2017 22:44:10 +0300
Subject: [PATCH 1580/1911] tun: fix inability to set offloads after disabling
 them via ethtool

Added missing logic in tun driver, which prevents apps to set
offloads using tun ioctl, if offloads were previously disabled via ethtool

Signed-off-by: Yaroslav Isakov <yaroslav.isakov@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tun.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 34cc3c590aa5c5..cc88cd7856f5e5 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1931,6 +1931,8 @@ static int set_offload(struct tun_struct *tun, unsigned long arg)
 		return -EINVAL;
 
 	tun->set_features = features;
+	tun->dev->wanted_features &= ~TUN_USER_FEATURES;
+	tun->dev->wanted_features |= features;
 	netdev_update_features(tun->dev);
 
 	return 0;

From aea92fb2e09e29653b023d4254ac9fbf94221538 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 17 Mar 2017 08:05:28 -0700
Subject: [PATCH 1581/1911] sch_dsmark: fix invalid skb_cow() usage

skb_cow(skb, sizeof(ip header)) is not very helpful in this context.

First we need to use pskb_may_pull() to make sure the ip header
is in skb linear part, then use skb_try_make_writable() to
address clones issues.

Fixes: 4c30719f4f55 ("[PKT_SCHED] dsmark: handle cloned and non-linear skb's")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_dsmark.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 802ac7c2e5e87e..5334e309f17f0e 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -201,9 +201,13 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 	pr_debug("%s(skb %p,sch %p,[qdisc %p])\n", __func__, skb, sch, p);
 
 	if (p->set_tc_index) {
+		int wlen = skb_network_offset(skb);
+
 		switch (tc_skb_protocol(skb)) {
 		case htons(ETH_P_IP):
-			if (skb_cow_head(skb, sizeof(struct iphdr)))
+			wlen += sizeof(struct iphdr);
+			if (!pskb_may_pull(skb, wlen) ||
+			    skb_try_make_writable(skb, wlen))
 				goto drop;
 
 			skb->tc_index = ipv4_get_dsfield(ip_hdr(skb))
@@ -211,7 +215,9 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 			break;
 
 		case htons(ETH_P_IPV6):
-			if (skb_cow_head(skb, sizeof(struct ipv6hdr)))
+			wlen += sizeof(struct ipv6hdr);
+			if (!pskb_may_pull(skb, wlen) ||
+			    skb_try_make_writable(skb, wlen))
 				goto drop;
 
 			skb->tc_index = ipv6_get_dsfield(ipv6_hdr(skb))

From 6bd845d1cf98b45c634baacb8381436dad3c2dd0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= <bjorn@mork.no>
Date: Fri, 17 Mar 2017 17:20:48 +0100
Subject: [PATCH 1582/1911] qmi_wwan: add Dell DW5811e
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is a Dell branded Sierra Wireless EM7455. It is operating in
MBIM mode by default, but can be configured to provide two QMI/RMNET
functions.

Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/qmi_wwan.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 80567455068328..f8d55aa058ecc9 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -925,6 +925,8 @@ static const struct usb_device_id products[] = {
 	{QMI_FIXED_INTF(0x413c, 0x81a9, 8)},	/* Dell Wireless 5808e Gobi(TM) 4G LTE Mobile Broadband Card */
 	{QMI_FIXED_INTF(0x413c, 0x81b1, 8)},	/* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card */
 	{QMI_FIXED_INTF(0x413c, 0x81b3, 8)},	/* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card (rev3) */
+	{QMI_FIXED_INTF(0x413c, 0x81b6, 8)},	/* Dell Wireless 5811e */
+	{QMI_FIXED_INTF(0x413c, 0x81b6, 10)},	/* Dell Wireless 5811e */
 	{QMI_FIXED_INTF(0x03f0, 0x4e1d, 8)},	/* HP lt4111 LTE/EV-DO/HSPA+ Gobi 4G Module */
 	{QMI_FIXED_INTF(0x22de, 0x9061, 3)},	/* WeTelecom WPD-600N */
 	{QMI_FIXED_INTF(0x1e0e, 0x9001, 5)},	/* SIMCom 7230E */

From 13e2d5187f6b965ba3556caedb914baf81b98ed2 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Fri, 17 Mar 2017 23:52:35 +0300
Subject: [PATCH 1583/1911] bna: integer overflow bug in debugfs

We could allocate less memory than intended because we do:

	bnad->regdata = kzalloc(len << 2, GFP_KERNEL);

The shift can overflow leading to a crash.  This is debugfs code so the
impact is very small.

Fixes: 7afc5dbde091 ("bna: Add debugfs interface.")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Rasesh Mody <rasesh.mody@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/brocade/bna/bnad_debugfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/brocade/bna/bnad_debugfs.c b/drivers/net/ethernet/brocade/bna/bnad_debugfs.c
index 05c1c1dd7751bd..cebfe3bd086e36 100644
--- a/drivers/net/ethernet/brocade/bna/bnad_debugfs.c
+++ b/drivers/net/ethernet/brocade/bna/bnad_debugfs.c
@@ -325,7 +325,7 @@ bnad_debugfs_write_regrd(struct file *file, const char __user *buf,
 		return PTR_ERR(kern_buf);
 
 	rc = sscanf(kern_buf, "%x:%x", &addr, &len);
-	if (rc < 2) {
+	if (rc < 2 || len > UINT_MAX >> 2) {
 		netdev_warn(bnad->netdev, "failed to read user buffer\n");
 		kfree(kern_buf);
 		return -EINVAL;

From 3dc857f0e8fc22610a59cbb346ba62c6e921863f Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Fri, 17 Mar 2017 16:07:11 -0700
Subject: [PATCH 1584/1911] net: vrf: Reset rt6i_idev in local dst after put

The VRF driver takes a reference to the inet6_dev on the VRF device for
its rt6_local dst when handling local traffic through the VRF device as
a loopback. When the device is deleted the driver does a put on the idev
but does not reset rt6i_idev in the rt6_info struct. When the dst is
destroyed, dst_destroy calls ip6_dst_destroy which does a second put for
what is essentially the same reference causing it to be prematurely freed.
Reset rt6i_idev after the put in the vrf driver.

Fixes: b4869aa2f881e ("net: vrf: ipv6 support for local traffic to
                       local addresses")
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vrf.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index fea687f35b5ac6..d6988db1930d6b 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -462,8 +462,10 @@ static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf)
 	}
 
 	if (rt6_local) {
-		if (rt6_local->rt6i_idev)
+		if (rt6_local->rt6i_idev) {
 			in6_dev_put(rt6_local->rt6i_idev);
+			rt6_local->rt6i_idev = NULL;
+		}
 
 		dst = &rt6_local->dst;
 		dev_put(dst->dev);

From 486a43db2e26b87125b5629e1ade516f90833934 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sat, 18 Mar 2017 19:12:22 +0800
Subject: [PATCH 1585/1911] sctp: remove temporary variable confirm from
 sctp_packet_transmit

Commit c86a773c7802 ("sctp: add dst_pending_confirm flag") introduced
a temporary variable "confirm" in sctp_packet_transmit.

But it broke the rule that longer lines should be above shorter ones.
Besides, this variable is not necessary, so this patch is to just
remove it and use tp->dst_pending_confirm directly.

Fixes: c86a773c7802 ("sctp: add dst_pending_confirm flag")
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/output.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/net/sctp/output.c b/net/sctp/output.c
index 71ce6b945dcb54..1224421036b3e5 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -546,7 +546,6 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
 	struct sctp_association *asoc = tp->asoc;
 	struct sctp_chunk *chunk, *tmp;
 	int pkt_count, gso = 0;
-	int confirm;
 	struct dst_entry *dst;
 	struct sk_buff *head;
 	struct sctphdr *sh;
@@ -625,13 +624,13 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
 			asoc->peer.last_sent_to = tp;
 	}
 	head->ignore_df = packet->ipfragok;
-	confirm = tp->dst_pending_confirm;
-	if (confirm)
+	if (tp->dst_pending_confirm)
 		skb_set_dst_pending_confirm(head, 1);
 	/* neighbour should be confirmed on successful transmission or
 	 * positive error
 	 */
-	if (tp->af_specific->sctp_xmit(head, tp) >= 0 && confirm)
+	if (tp->af_specific->sctp_xmit(head, tp) >= 0 &&
+	    tp->dst_pending_confirm)
 		tp->dst_pending_confirm = 0;
 
 out:

From 1f904495b79003cd3d881de8731377d48fcbc7e3 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sat, 18 Mar 2017 19:27:23 +0800
Subject: [PATCH 1586/1911] sctp: define dst_pending_confirm as a bit in
 sctp_transport

As tp->dst_pending_confirm's value can only be set 0 or 1, this
patch is to change to define it as a bit instead of __u32.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 07a0b128625a4e..4f645198e9bd7b 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -753,6 +753,8 @@ struct sctp_transport {
 		/* Is the Path MTU update pending on this tranport */
 		pmtu_pending:1,
 
+		dst_pending_confirm:1,	/* need to confirm neighbour */
+
 		/* Has this transport moved the ctsn since we last sacked */
 		sack_generation:1;
 	u32 dst_cookie;
@@ -806,8 +808,6 @@ struct sctp_transport {
 
 	__u32 burst_limited;	/* Holds old cwnd when max.burst is applied */
 
-	__u32 dst_pending_confirm;	/* need to confirm neighbour */
-
 	/* Destination */
 	struct dst_entry *dst;
 	/* Source address. */

From 23bb09cfbe04076ef647da3889a5a5ab6cbe6f15 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sat, 18 Mar 2017 20:03:59 +0800
Subject: [PATCH 1587/1911] sctp: out_qlen should be updated when pruning
 unsent queue

This patch is to fix the issue that sctp_prsctp_prune_sent forgot
to update q->out_qlen when removing a chunk from unsent queue.

Fixes: 8dbdf1f5b09c ("sctp: implement prsctp PRIO policy")
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/outqueue.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index db352e5d61f898..025ccff670720b 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -382,17 +382,18 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc,
 }
 
 static int sctp_prsctp_prune_unsent(struct sctp_association *asoc,
-				    struct sctp_sndrcvinfo *sinfo,
-				    struct list_head *queue, int msg_len)
+				    struct sctp_sndrcvinfo *sinfo, int msg_len)
 {
+	struct sctp_outq *q = &asoc->outqueue;
 	struct sctp_chunk *chk, *temp;
 
-	list_for_each_entry_safe(chk, temp, queue, list) {
+	list_for_each_entry_safe(chk, temp, &q->out_chunk_list, list) {
 		if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) ||
 		    chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive)
 			continue;
 
 		list_del_init(&chk->list);
+		q->out_qlen -= chk->skb->len;
 		asoc->sent_cnt_removable--;
 		asoc->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++;
 
@@ -431,9 +432,7 @@ void sctp_prsctp_prune(struct sctp_association *asoc,
 			return;
 	}
 
-	sctp_prsctp_prune_unsent(asoc, sinfo,
-				 &asoc->outqueue.out_chunk_list,
-				 msg_len);
+	sctp_prsctp_prune_unsent(asoc, sinfo, msg_len);
 }
 
 /* Mark all the eligible packets on a transport for retransmission.  */

From ff010472fb75670cb5c08671e820eeea3af59c87 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 21 Mar 2017 11:36:06 +0530
Subject: [PATCH 1588/1911] cpufreq: Restore policy min/max limits on CPU
 online

On CPU online the cpufreq core restores the previous governor (or
the previous "policy" setting for ->setpolicy drivers), but it does
not restore the min/max limits at the same time, which is confusing,
inconsistent and real pain for users who set the limits and then
suspend/resume the system (using full suspend), in which case the
limits are reset on all CPUs except for the boot one.

Fix this by making cpufreq_online() restore the limits when an inactive
policy is brought online.

The commit log and patch are inspired from Rafael's earlier work.

Reported-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Cc: 4.3+ <stable@vger.kernel.org> # 4.3+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index b8ff617d449d92..5dbdd261aa736b 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1184,6 +1184,9 @@ static int cpufreq_online(unsigned int cpu)
 		for_each_cpu(j, policy->related_cpus)
 			per_cpu(cpufreq_cpu_data, j) = policy;
 		write_unlock_irqrestore(&cpufreq_driver_lock, flags);
+	} else {
+		policy->min = policy->user_policy.min;
+		policy->max = policy->user_policy.max;
 	}
 
 	if (cpufreq_driver->get && !cpufreq_driver->setpolicy) {

From 8605330aac5a5785630aec8f64378a54891937cc Mon Sep 17 00:00:00 2001
From: Soheil Hassas Yeganeh <soheil@google.com>
Date: Sat, 18 Mar 2017 17:02:59 -0400
Subject: [PATCH 1589/1911] tcp: fix SCM_TIMESTAMPING_OPT_STATS for normal skbs

__sock_recv_timestamp can be called for both normal skbs (for
receive timestamps) and for skbs on the error queue (for transmit
timestamps).

Commit 1c885808e456
(tcp: SOF_TIMESTAMPING_OPT_STATS option for SO_TIMESTAMPING)
assumes any skb passed to __sock_recv_timestamp are from
the error queue, containing OPT_STATS in the content of the skb.
This results in accessing invalid memory or generating junk
data.

To fix this, set skb->pkt_type to PACKET_OUTGOING for packets
on the error queue. This is safe because on the receive path
on local sockets skb->pkt_type is never set to PACKET_OUTGOING.
With that, copy OPT_STATS from a packet, only if its pkt_type
is PACKET_OUTGOING.

Fixes: 1c885808e456 ("tcp: SOF_TIMESTAMPING_OPT_STATS option for SO_TIMESTAMPING")
Reported-by: JongHwan Kim <zzoru007@gmail.com>
Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 10 ++++++++++
 net/socket.c      | 13 ++++++++++++-
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index cd4ba8c6b60916..b1fbd1958eb638 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3694,6 +3694,15 @@ static void sock_rmem_free(struct sk_buff *skb)
 	atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
 }
 
+static void skb_set_err_queue(struct sk_buff *skb)
+{
+	/* pkt_type of skbs received on local sockets is never PACKET_OUTGOING.
+	 * So, it is safe to (mis)use it to mark skbs on the error queue.
+	 */
+	skb->pkt_type = PACKET_OUTGOING;
+	BUILD_BUG_ON(PACKET_OUTGOING == 0);
+}
+
 /*
  * Note: We dont mem charge error packets (no sk_forward_alloc changes)
  */
@@ -3707,6 +3716,7 @@ int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
 	skb->sk = sk;
 	skb->destructor = sock_rmem_free;
 	atomic_add(skb->truesize, &sk->sk_rmem_alloc);
+	skb_set_err_queue(skb);
 
 	/* before exiting rcu section, make sure dst is refcounted */
 	skb_dst_force(skb);
diff --git a/net/socket.c b/net/socket.c
index e034fe4164beec..692d6989d2c2b9 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -652,6 +652,16 @@ int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
 }
 EXPORT_SYMBOL(kernel_sendmsg);
 
+static bool skb_is_err_queue(const struct sk_buff *skb)
+{
+	/* pkt_type of skbs enqueued on the error queue are set to
+	 * PACKET_OUTGOING in skb_set_err_queue(). This is only safe to do
+	 * in recvmsg, since skbs received on a local socket will never
+	 * have a pkt_type of PACKET_OUTGOING.
+	 */
+	return skb->pkt_type == PACKET_OUTGOING;
+}
+
 /*
  * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
  */
@@ -695,7 +705,8 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
 		put_cmsg(msg, SOL_SOCKET,
 			 SCM_TIMESTAMPING, sizeof(tss), &tss);
 
-		if (skb->len && (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS))
+		if (skb_is_err_queue(skb) && skb->len &&
+		    (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS))
 			put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS,
 				 skb->len, skb->data);
 	}

From 4ef1b2869447411ad3ef91ad7d4891a83c1a509a Mon Sep 17 00:00:00 2001
From: Soheil Hassas Yeganeh <soheil@google.com>
Date: Sat, 18 Mar 2017 17:03:00 -0400
Subject: [PATCH 1590/1911] tcp: mark skbs with SCM_TIMESTAMPING_OPT_STATS

SOF_TIMESTAMPING_OPT_STATS can be enabled and disabled
while packets are collected on the error queue.
So, checking SOF_TIMESTAMPING_OPT_STATS in sk->sk_tsflags
is not enough to safely assume that the skb contains
OPT_STATS data.

Add a bit in sock_exterr_skb to indicate whether the
skb contains opt_stats data.

Fixes: 1c885808e456 ("tcp: SOF_TIMESTAMPING_OPT_STATS option for SO_TIMESTAMPING")
Reported-by: JongHwan Kim <zzoru007@gmail.com>
Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/errqueue.h |  2 ++
 net/core/skbuff.c        | 17 +++++++++++------
 net/socket.c             |  2 +-
 3 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/include/linux/errqueue.h b/include/linux/errqueue.h
index 9ca23fcfb5d731..6fdfc884fdeb3d 100644
--- a/include/linux/errqueue.h
+++ b/include/linux/errqueue.h
@@ -20,6 +20,8 @@ struct sock_exterr_skb {
 	struct sock_extended_err	ee;
 	u16				addr_offset;
 	__be16				port;
+	u8				opt_stats:1,
+					unused:7;
 };
 
 #endif
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index b1fbd1958eb638..9f781092fda9cb 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3793,16 +3793,20 @@ EXPORT_SYMBOL(skb_clone_sk);
 
 static void __skb_complete_tx_timestamp(struct sk_buff *skb,
 					struct sock *sk,
-					int tstype)
+					int tstype,
+					bool opt_stats)
 {
 	struct sock_exterr_skb *serr;
 	int err;
 
+	BUILD_BUG_ON(sizeof(struct sock_exterr_skb) > sizeof(skb->cb));
+
 	serr = SKB_EXT_ERR(skb);
 	memset(serr, 0, sizeof(*serr));
 	serr->ee.ee_errno = ENOMSG;
 	serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
 	serr->ee.ee_info = tstype;
+	serr->opt_stats = opt_stats;
 	if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) {
 		serr->ee.ee_data = skb_shinfo(skb)->tskey;
 		if (sk->sk_protocol == IPPROTO_TCP &&
@@ -3843,7 +3847,7 @@ void skb_complete_tx_timestamp(struct sk_buff *skb,
 	 */
 	if (likely(atomic_inc_not_zero(&sk->sk_refcnt))) {
 		*skb_hwtstamps(skb) = *hwtstamps;
-		__skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND);
+		__skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND, false);
 		sock_put(sk);
 	}
 }
@@ -3854,7 +3858,7 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
 		     struct sock *sk, int tstype)
 {
 	struct sk_buff *skb;
-	bool tsonly;
+	bool tsonly, opt_stats = false;
 
 	if (!sk)
 		return;
@@ -3867,9 +3871,10 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
 #ifdef CONFIG_INET
 		if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS) &&
 		    sk->sk_protocol == IPPROTO_TCP &&
-		    sk->sk_type == SOCK_STREAM)
+		    sk->sk_type == SOCK_STREAM) {
 			skb = tcp_get_timestamping_opt_stats(sk);
-		else
+			opt_stats = true;
+		} else
 #endif
 			skb = alloc_skb(0, GFP_ATOMIC);
 	} else {
@@ -3888,7 +3893,7 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
 	else
 		skb->tstamp = ktime_get_real();
 
-	__skb_complete_tx_timestamp(skb, sk, tstype);
+	__skb_complete_tx_timestamp(skb, sk, tstype, opt_stats);
 }
 EXPORT_SYMBOL_GPL(__skb_tstamp_tx);
 
diff --git a/net/socket.c b/net/socket.c
index 692d6989d2c2b9..985ef06792d6e5 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -706,7 +706,7 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
 			 SCM_TIMESTAMPING, sizeof(tss), &tss);
 
 		if (skb_is_err_queue(skb) && skb->len &&
-		    (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS))
+		    SKB_EXT_ERR(skb)->opt_stats)
 			put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS,
 				 skb->len, skb->data);
 	}

From e8f1f34a344d060eaf1918089369c4c1172a153b Mon Sep 17 00:00:00 2001
From: Zi Shen Lim <zlim.lnx@gmail.com>
Date: Sun, 19 Mar 2017 23:03:14 -0700
Subject: [PATCH 1591/1911] selftests/bpf: fix broken build, take 2

Merge of 'linux-kselftest-4.11-rc1':

1. Partially removed use of 'test_objs' target, breaking force rebuild of
BPFOBJ, introduced in commit d498f8719a09 ("bpf: Rebuild bpf.o for any
dependency update").

  Update target so dependency on BPFOBJ is restored.

2. Introduced commit 2047f1d8ba28 ("selftests: Fix the .c linking rule")
which fixes order of LDLIBS.

  Commit d02d8986a768 ("bpf: Always test unprivileged programs") added
libcap dependency into CFLAGS. Use LDLIBS instead to fix linking of
test_verifier.

3. Introduced commit d83c3ba0b926 ("selftests: Fix selftests build to
just build, not run tests").

  Reordering the Makefile allows us to remove the 'all' target.

Tested both:
    selftests/bpf$ make
and
    selftests$ make TARGETS=bpf
on Ubuntu 16.04.2.

Signed-off-by: Zi Shen Lim <zlim.lnx@gmail.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Tested-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Tested-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Shuah Khan <shuahkh@osg.samsung.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/bpf/Makefile | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 67531f47781b40..6a1ad58cb66f4a 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -1,22 +1,23 @@
 LIBDIR := ../../../lib
-BPFOBJ := $(LIBDIR)/bpf/bpf.o
+BPFDIR := $(LIBDIR)/bpf
 
-CFLAGS += -Wall -O2 -lcap -I../../../include/uapi -I$(LIBDIR) $(BPFOBJ)
+CFLAGS += -Wall -O2 -I../../../include/uapi -I$(LIBDIR)
+LDLIBS += -lcap
 
 TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map
 
 TEST_PROGS := test_kmod.sh
 
-all: $(TEST_GEN_PROGS)
+include ../lib.mk
+
+BPFOBJ := $(OUTPUT)/bpf.o
+
+$(TEST_GEN_PROGS): $(BPFOBJ)
 
-.PHONY: all clean force
+.PHONY: force
 
 # force a rebuild of BPFOBJ when its dependencies are updated
 force:
 
 $(BPFOBJ): force
-	$(MAKE) -C $(dir $(BPFOBJ))
-
-$(test_objs): $(BPFOBJ)
-
-include ../lib.mk
+	$(MAKE) -C $(BPFDIR) OUTPUT=$(OUTPUT)/

From 4071898bf0f4d79ff353db327af2a15123272548 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Sun, 19 Mar 2017 09:19:57 -0700
Subject: [PATCH 1592/1911] net: qmi_wwan: Add USB IDs for MDM6600 modem on
 Motorola Droid 4
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This gets qmicli working with the MDM6600 modem.

Cc: Bjørn Mork <bjorn@mork.no>
Reviewed-by: Sebastian Reichel <sre@kernel.org>
Tested-by: Sebastian Reichel <sre@kernel.org>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Acked-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/qmi_wwan.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index f8d55aa058ecc9..156f7f85e4860d 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -580,6 +580,10 @@ static const struct usb_device_id products[] = {
 		USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, USB_CLASS_VENDOR_SPEC, 0x01, 0x69),
 		.driver_info        = (unsigned long)&qmi_wwan_info,
 	},
+	{	/* Motorola Mapphone devices with MDM6600 */
+		USB_VENDOR_AND_INTERFACE_INFO(0x22b8, USB_CLASS_VENDOR_SPEC, 0xfb, 0xff),
+		.driver_info        = (unsigned long)&qmi_wwan_info,
+	},
 
 	/* 2. Combined interface devices matching on class+protocol */
 	{	/* Huawei E367 and possibly others in "Windows mode" */

From bc2d4b62db67f817b09c782219996630e9c2f5e2 Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@intel.com>
Date: Wed, 22 Mar 2017 12:35:31 +0800
Subject: [PATCH 1593/1911] drm/i915/gvt: Use force single submit flag to
 distinguish gvt request from i915 request

In my previous Commit ab9da627906a ("drm/i915: make context status
notifier head be per engine") rely on scheduler->current_workload[x]
to distinguish gvt spacial request from i915 request. But this is
not always true since no synchronization between workload_thread and
lrc irq handler.

    lrc irq handler               workload_thread
         ----                          ----
  pick i915 requests;
                                intel_vgpu_submit_execlist();
                                current_workload[x] = xxx;
  shadow_context_status_change();

Then current_workload[x] is not null but current request is of i915 self.
So instead we check ctx flag CONTEXT_FORCE_SINGLE_SUBMISSION. Only gvt
request set this flag and always set.

v2: Reverse the order of multi-condition 'if' statement.

Fixes: ab9da6279 ("drm/i915: make context status notifier head be per engine")
Signed-off-by: Changbin Du <changbin.du@intel.com>
Reviewed-by: Yulei Zhang <yulei.zhang@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/scheduler.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 39a83eb7aeccaf..d0c04155f7d5b4 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -127,6 +127,11 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
 	return 0;
 }
 
+static inline bool is_gvt_request(struct drm_i915_gem_request *req)
+{
+	return i915_gem_context_force_single_submission(req->ctx);
+}
+
 static int shadow_context_status_change(struct notifier_block *nb,
 		unsigned long action, void *data)
 {
@@ -139,7 +144,7 @@ static int shadow_context_status_change(struct notifier_block *nb,
 	struct intel_vgpu_workload *workload =
 		scheduler->current_workload[req->engine->id];
 
-	if (unlikely(!workload))
+	if (!is_gvt_request(req) || unlikely(!workload))
 		return NOTIFY_OK;
 
 	switch (action) {

From ad4830051aac0b967add82ac168f49edf11f01a0 Mon Sep 17 00:00:00 2001
From: Mike Travis <mike.travis@hpe.com>
Date: Tue, 21 Mar 2017 18:16:47 -0500
Subject: [PATCH 1594/1911] x86/platform/uv: Fix calculation of Global Physical
 Address

The calculation of the global physical address (GPA) on UV4 is
incorrect.  The gnode_extra/upper global offset should only be
applied for fixed address space systems (UV1..3).

Tested-by: John Estabrook <john.estabrook@hpe.com>
Signed-off-by: Mike Travis <mike.travis@hpe.com>
Cc: Dimitri Sivanich <dimitri.sivanich@hpe.com>
Cc: John Estabrook <estabrook@sgi.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Russ Anderson <russ.anderson@hpe.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20170321231646.667689538@asylum.americas.sgi.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/uv/uv_hub.h   | 8 +++++---
 arch/x86/kernel/apic/x2apic_uv_x.c | 3 ++-
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index 72e8300b1e8a6a..9cffb44a3cf5df 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -485,15 +485,17 @@ static inline unsigned long uv_soc_phys_ram_to_gpa(unsigned long paddr)
 
 	if (paddr < uv_hub_info->lowmem_remap_top)
 		paddr |= uv_hub_info->lowmem_remap_base;
-	paddr |= uv_hub_info->gnode_upper;
-	if (m_val)
+
+	if (m_val) {
+		paddr |= uv_hub_info->gnode_upper;
 		paddr = ((paddr << uv_hub_info->m_shift)
 						>> uv_hub_info->m_shift) |
 			((paddr >> uv_hub_info->m_val)
 						<< uv_hub_info->n_lshift);
-	else
+	} else {
 		paddr |= uv_soc_phys_ram_to_nasid(paddr)
 						<< uv_hub_info->gpa_shift;
+	}
 	return paddr;
 }
 
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index e9f8f8cdd57085..86f20cc0a65e22 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -1105,7 +1105,8 @@ void __init uv_init_hub_info(struct uv_hub_info_s *hi)
 	node_id.v		= uv_read_local_mmr(UVH_NODE_ID);
 	uv_cpuid.gnode_shift	= max_t(unsigned int, uv_cpuid.gnode_shift, mn.n_val);
 	hi->gnode_extra		= (node_id.s.node_id & ~((1 << uv_cpuid.gnode_shift) - 1)) >> 1;
-	hi->gnode_upper		= (unsigned long)hi->gnode_extra << mn.m_val;
+	if (mn.m_val)
+		hi->gnode_upper	= (u64)hi->gnode_extra << mn.m_val;
 
 	if (uv_gp_table) {
 		hi->global_mmr_base	= uv_gp_table->mmr_base;

From dd7406dd334a98ada3ff5371847a3eeb4ba16313 Mon Sep 17 00:00:00 2001
From: Alex Hemme <ahemme@cisco.com>
Date: Tue, 7 Mar 2017 14:38:29 -0500
Subject: [PATCH 1595/1911] hwmon: (max31790) Set correct PWM value

Traced fans not spinning to incorrect PWM value being written.
The passed in value was written instead of the calulated value.

Fixes: 54187ff9d766 ("hwmon: (max31790) Convert to use new hwmon registration API")
Signed-off-by: Alex Hemme <ahemme@cisco.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/max31790.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hwmon/max31790.c b/drivers/hwmon/max31790.c
index c1b9275978f9d9..281491cca5103a 100644
--- a/drivers/hwmon/max31790.c
+++ b/drivers/hwmon/max31790.c
@@ -311,7 +311,7 @@ static int max31790_write_pwm(struct device *dev, u32 attr, int channel,
 		data->pwm[channel] = val << 8;
 		err = i2c_smbus_write_word_swapped(client,
 						   MAX31790_REG_PWMOUT(channel),
-						   val);
+						   data->pwm[channel]);
 		break;
 	case hwmon_pwm_enable:
 		fan_config = data->fan_config[channel];

From 8358378b22518d92424597503d3c1cd302a490b6 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Sun, 12 Mar 2017 06:18:58 -0700
Subject: [PATCH 1596/1911] hwmon: (it87) Avoid registering the same chip on
 both SIO addresses

IT8705F is known to respond on both SIO addresses. Registering it twice
may result in system lockups.

Reported-by: Russell King <linux@armlinux.org.uk>
Fixes: e84bd9535e2b ("hwmon: (it87) Add support for second Super-IO chip")
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/it87.c | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/drivers/hwmon/it87.c b/drivers/hwmon/it87.c
index efb01c247e2d90..4dfc7238313ebd 100644
--- a/drivers/hwmon/it87.c
+++ b/drivers/hwmon/it87.c
@@ -3198,7 +3198,7 @@ static int __init sm_it87_init(void)
 {
 	int sioaddr[2] = { REG_2E, REG_4E };
 	struct it87_sio_data sio_data;
-	unsigned short isa_address;
+	unsigned short isa_address[2];
 	bool found = false;
 	int i, err;
 
@@ -3208,15 +3208,29 @@ static int __init sm_it87_init(void)
 
 	for (i = 0; i < ARRAY_SIZE(sioaddr); i++) {
 		memset(&sio_data, 0, sizeof(struct it87_sio_data));
-		isa_address = 0;
-		err = it87_find(sioaddr[i], &isa_address, &sio_data);
-		if (err || isa_address == 0)
+		isa_address[i] = 0;
+		err = it87_find(sioaddr[i], &isa_address[i], &sio_data);
+		if (err || isa_address[i] == 0)
 			continue;
+		/*
+		 * Don't register second chip if its ISA address matches
+		 * the first chip's ISA address.
+		 */
+		if (i && isa_address[i] == isa_address[0])
+			break;
 
-		err = it87_device_add(i, isa_address, &sio_data);
+		err = it87_device_add(i, isa_address[i], &sio_data);
 		if (err)
 			goto exit_dev_unregister;
+
 		found = true;
+
+		/*
+		 * IT8705F may respond on both SIO addresses.
+		 * Stop probing after finding one.
+		 */
+		if (sio_data.type == it87)
+			break;
 	}
 
 	if (!found) {

From a5023a99393dab276069cd60dad3e61d57720fda Mon Sep 17 00:00:00 2001
From: Peter Huewe <PeterHuewe@gmx.de>
Date: Fri, 17 Mar 2017 00:28:56 +0100
Subject: [PATCH 1597/1911] hwmon: Add missing HWMON_T_ALARM

Unfortunately the HWMON_T_ALARM define was missing,
although the associated entry was present in hwmon_temp_attributes.
This is needed to convert drivers to the new interface which use channel
based alarms.

Signed-off-by: Peter Huewe <peterhuewe@gmx.de>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 include/linux/hwmon.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h
index 78d59dba563e33..88b6737491210a 100644
--- a/include/linux/hwmon.h
+++ b/include/linux/hwmon.h
@@ -88,6 +88,7 @@ enum hwmon_temp_attributes {
 #define HWMON_T_CRIT_HYST	BIT(hwmon_temp_crit_hyst)
 #define HWMON_T_EMERGENCY	BIT(hwmon_temp_emergency)
 #define HWMON_T_EMERGENCY_HYST	BIT(hwmon_temp_emergency_hyst)
+#define HWMON_T_ALARM		BIT(hwmon_temp_alarm)
 #define HWMON_T_MIN_ALARM	BIT(hwmon_temp_min_alarm)
 #define HWMON_T_MAX_ALARM	BIT(hwmon_temp_max_alarm)
 #define HWMON_T_CRIT_ALARM	BIT(hwmon_temp_crit_alarm)

From d82c0d12c92705ef468683c9b7a8298dd61ed191 Mon Sep 17 00:00:00 2001
From: Marcelo Henrique Cerri <marcelo.cerri@canonical.com>
Date: Mon, 13 Mar 2017 12:14:58 -0300
Subject: [PATCH 1598/1911] s390/decompressor: fix initrd corruption caused by
 bss clear

Reorder the operations in decompress_kernel() to ensure initrd is moved
to a safe location before the bss section is zeroed.

During decompression bss can overlap with the initrd and this can
corrupt the initrd contents depending on the size of the compressed
kernel (which affects where the initrd is placed by the bootloader) and
the size of the bss section of the decompressor.

Also use the correct initrd size when checking for overlaps with
parmblock.

Fixes: 06c0dd72aea3 ([S390] fix boot failures with compressed kernels)
Cc: stable@vger.kernel.org
Reviewed-by: Joy Latten <joy.latten@canonical.com>
Reviewed-by: Vineetha HariPai <vineetha.hari.pai@canonical.com>
Signed-off-by: Marcelo Henrique Cerri <marcelo.cerri@canonical.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/boot/compressed/misc.c | 35 +++++++++++++++++---------------
 1 file changed, 19 insertions(+), 16 deletions(-)

diff --git a/arch/s390/boot/compressed/misc.c b/arch/s390/boot/compressed/misc.c
index fa95041fa9f684..33ca29333e1808 100644
--- a/arch/s390/boot/compressed/misc.c
+++ b/arch/s390/boot/compressed/misc.c
@@ -141,31 +141,34 @@ static void check_ipl_parmblock(void *start, unsigned long size)
 
 unsigned long decompress_kernel(void)
 {
-	unsigned long output_addr;
-	unsigned char *output;
+	void *output, *kernel_end;
 
-	output_addr = ((unsigned long) &_end + HEAP_SIZE + 4095UL) & -4096UL;
-	check_ipl_parmblock((void *) 0, output_addr + SZ__bss_start);
-	memset(&_bss, 0, &_ebss - &_bss);
-	free_mem_ptr = (unsigned long)&_end;
-	free_mem_end_ptr = free_mem_ptr + HEAP_SIZE;
-	output = (unsigned char *) output_addr;
+	output = (void *) ALIGN((unsigned long) &_end + HEAP_SIZE, PAGE_SIZE);
+	kernel_end = output + SZ__bss_start;
+	check_ipl_parmblock((void *) 0, (unsigned long) kernel_end);
 
 #ifdef CONFIG_BLK_DEV_INITRD
 	/*
 	 * Move the initrd right behind the end of the decompressed
-	 * kernel image.
+	 * kernel image. This also prevents initrd corruption caused by
+	 * bss clearing since kernel_end will always be located behind the
+	 * current bss section..
 	 */
-	if (INITRD_START && INITRD_SIZE &&
-	    INITRD_START < (unsigned long) output + SZ__bss_start) {
-		check_ipl_parmblock(output + SZ__bss_start,
-				    INITRD_START + INITRD_SIZE);
-		memmove(output + SZ__bss_start,
-			(void *) INITRD_START, INITRD_SIZE);
-		INITRD_START = (unsigned long) output + SZ__bss_start;
+	if (INITRD_START && INITRD_SIZE && kernel_end > (void *) INITRD_START) {
+		check_ipl_parmblock(kernel_end, INITRD_SIZE);
+		memmove(kernel_end, (void *) INITRD_START, INITRD_SIZE);
+		INITRD_START = (unsigned long) kernel_end;
 	}
 #endif
 
+	/*
+	 * Clear bss section. free_mem_ptr and free_mem_end_ptr need to be
+	 * initialized afterwards since they reside in bss.
+	 */
+	memset(&_bss, 0, &_ebss - &_bss);
+	free_mem_ptr = (unsigned long) &_end;
+	free_mem_end_ptr = free_mem_ptr + HEAP_SIZE;
+
 	puts("Uncompressing Linux... ");
 	__decompress(input_data, input_len, NULL, NULL, output, 0, NULL, error);
 	puts("Ok, booting the kernel.\n");

From 0861b5a754fd5539f527b445aaa61538185c8264 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Thu, 16 Mar 2017 11:02:36 +0100
Subject: [PATCH 1599/1911] s390/smp: fix ipl from cpu with non-zero address

Commit af51160ebd3c ("s390/smp: initialize cpu_present_mask in
setup_arch") initializes the cpu_present_mask much earlier than
before. However the cpu detection code relies on the fact that iff
logical cpu 0 is marked present then also the corresponding physical
cpu address within the pcpu_devices array slot is valid.

Since commit 44fd22992cb7 ("[PATCH] Register the boot-cpu in the cpu
maps earlier") this assumption is not true anymore. The patch marks
logical cpu 0 as present in common code without that architecture code
had a chance to setup the logical to physical map.

With that change the cpu detection code assumes that the physical cpu
address of cpu 0 is also 0, which is not necessarily true.
Subsequently the physical cpu address of the ipl cpu will be mapped to
a different logical cpu. If that cpu is brought online later the ipl
cpu will send itself an initial cpu reset sigp signal. This in turn
completely resets the ipl cpu and the system stops working.

A dump of such a system looks like a "store status" has been
forgotten. But actually the kernel itself removed all traces which
would allow to easily tell what went wrong.

To fix this initialize the logical to physical cpu address already in
smp_setup_processor_id(). In addition remove the initialization of the
cpu_present_mask and cpu_online_mask for cpu 0, since that has already
been done. Also add a sanity check, just in case common code will be
changed again...

The problem can be easily reproduced within a z/VM guest:

> chcpu -d 0
> vmcp ipl

Fixes: af51160ebd3c ("s390/smp: initialize cpu_present_mask in setup_arch")
Reported-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/smp.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 47a973b5b4f184..5dab859b0d543b 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -909,13 +909,11 @@ void __init smp_prepare_boot_cpu(void)
 {
 	struct pcpu *pcpu = pcpu_devices;
 
+	WARN_ON(!cpu_present(0) || !cpu_online(0));
 	pcpu->state = CPU_STATE_CONFIGURED;
-	pcpu->address = stap();
 	pcpu->lowcore = (struct lowcore *)(unsigned long) store_prefix();
 	S390_lowcore.percpu_offset = __per_cpu_offset[0];
 	smp_cpu_set_polarization(0, POLARIZATION_UNKNOWN);
-	set_cpu_present(0, true);
-	set_cpu_online(0, true);
 }
 
 void __init smp_cpus_done(unsigned int max_cpus)
@@ -924,6 +922,7 @@ void __init smp_cpus_done(unsigned int max_cpus)
 
 void __init smp_setup_processor_id(void)
 {
+	pcpu_devices[0].address = stap();
 	S390_lowcore.cpu_nr = 0;
 	S390_lowcore.spinlock_lockval = arch_spin_lockval(0);
 }

From ca681ec860d6533b8debda4f6ab2e70e6d519d89 Mon Sep 17 00:00:00 2001
From: Harald Freudenberger <freude@linux.vnet.ibm.com>
Date: Wed, 15 Mar 2017 10:58:07 +0100
Subject: [PATCH 1600/1911] s390/pkey: Fix wrong handling of secure key with
 old MKVP

When a secure key with an old Master Key Verification
Pattern was given to the pkey_findcard function, there was
no responsible card found because only the current MKVP of
each card was compared. With this fix also the old MKVP
values are considered and so a matching card able to handle
the key is reported back to the caller.

Signed-off-by: Harald Freudenberger <freude@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/crypto/pkey_api.c | 53 ++++++++++++++++++++++------------
 1 file changed, 34 insertions(+), 19 deletions(-)

diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c
index 40f1136f556889..058db724b5a28a 100644
--- a/drivers/s390/crypto/pkey_api.c
+++ b/drivers/s390/crypto/pkey_api.c
@@ -572,6 +572,12 @@ int pkey_sec2protkey(u16 cardnr, u16 domain,
 		rc = -EIO;
 		goto out;
 	}
+	if (prepcblk->ccp_rscode != 0) {
+		DEBUG_WARN(
+			"pkey_sec2protkey unwrap secure key warning, card response %d/%d\n",
+			(int) prepcblk->ccp_rtcode,
+			(int) prepcblk->ccp_rscode);
+	}
 
 	/* process response cprb param block */
 	prepcblk->rpl_parmb = ((u8 *) prepcblk) + sizeof(struct CPRBX);
@@ -761,9 +767,10 @@ static int query_crypto_facility(u16 cardnr, u16 domain,
 }
 
 /*
- * Fetch just the mkvp value via query_crypto_facility from adapter.
+ * Fetch the current and old mkvp values via
+ * query_crypto_facility from adapter.
  */
-static int fetch_mkvp(u16 cardnr, u16 domain, u64 *mkvp)
+static int fetch_mkvp(u16 cardnr, u16 domain, u64 mkvp[2])
 {
 	int rc, found = 0;
 	size_t rlen, vlen;
@@ -779,9 +786,10 @@ static int fetch_mkvp(u16 cardnr, u16 domain, u64 *mkvp)
 	rc = query_crypto_facility(cardnr, domain, "STATICSA",
 				   rarray, &rlen, varray, &vlen);
 	if (rc == 0 && rlen > 8*8 && vlen > 184+8) {
-		if (rarray[64] == '2') {
+		if (rarray[8*8] == '2') {
 			/* current master key state is valid */
-			*mkvp = *((u64 *)(varray + 184));
+			mkvp[0] = *((u64 *)(varray + 184));
+			mkvp[1] = *((u64 *)(varray + 172));
 			found = 1;
 		}
 	}
@@ -796,14 +804,14 @@ struct mkvp_info {
 	struct list_head list;
 	u16 cardnr;
 	u16 domain;
-	u64 mkvp;
+	u64 mkvp[2];
 };
 
 /* a list with mkvp_info entries */
 static LIST_HEAD(mkvp_list);
 static DEFINE_SPINLOCK(mkvp_list_lock);
 
-static int mkvp_cache_fetch(u16 cardnr, u16 domain, u64 *mkvp)
+static int mkvp_cache_fetch(u16 cardnr, u16 domain, u64 mkvp[2])
 {
 	int rc = -ENOENT;
 	struct mkvp_info *ptr;
@@ -812,7 +820,7 @@ static int mkvp_cache_fetch(u16 cardnr, u16 domain, u64 *mkvp)
 	list_for_each_entry(ptr, &mkvp_list, list) {
 		if (ptr->cardnr == cardnr &&
 		    ptr->domain == domain) {
-			*mkvp = ptr->mkvp;
+			memcpy(mkvp, ptr->mkvp, 2 * sizeof(u64));
 			rc = 0;
 			break;
 		}
@@ -822,7 +830,7 @@ static int mkvp_cache_fetch(u16 cardnr, u16 domain, u64 *mkvp)
 	return rc;
 }
 
-static void mkvp_cache_update(u16 cardnr, u16 domain, u64 mkvp)
+static void mkvp_cache_update(u16 cardnr, u16 domain, u64 mkvp[2])
 {
 	int found = 0;
 	struct mkvp_info *ptr;
@@ -831,7 +839,7 @@ static void mkvp_cache_update(u16 cardnr, u16 domain, u64 mkvp)
 	list_for_each_entry(ptr, &mkvp_list, list) {
 		if (ptr->cardnr == cardnr &&
 		    ptr->domain == domain) {
-			ptr->mkvp = mkvp;
+			memcpy(ptr->mkvp, mkvp, 2 * sizeof(u64));
 			found = 1;
 			break;
 		}
@@ -844,7 +852,7 @@ static void mkvp_cache_update(u16 cardnr, u16 domain, u64 mkvp)
 		}
 		ptr->cardnr = cardnr;
 		ptr->domain = domain;
-		ptr->mkvp = mkvp;
+		memcpy(ptr->mkvp, mkvp, 2 * sizeof(u64));
 		list_add(&ptr->list, &mkvp_list);
 	}
 	spin_unlock_bh(&mkvp_list_lock);
@@ -888,8 +896,8 @@ int pkey_findcard(const struct pkey_seckey *seckey,
 	struct secaeskeytoken *t = (struct secaeskeytoken *) seckey;
 	struct zcrypt_device_matrix *device_matrix;
 	u16 card, dom;
-	u64 mkvp;
-	int i, rc;
+	u64 mkvp[2];
+	int i, rc, oi = -1;
 
 	/* mkvp must not be zero */
 	if (t->mkvp == 0)
@@ -910,14 +918,14 @@ int pkey_findcard(const struct pkey_seckey *seckey,
 		    device_matrix->device[i].functions & 0x04) {
 			/* an enabled CCA Coprocessor card */
 			/* try cached mkvp */
-			if (mkvp_cache_fetch(card, dom, &mkvp) == 0 &&
-			    t->mkvp == mkvp) {
+			if (mkvp_cache_fetch(card, dom, mkvp) == 0 &&
+			    t->mkvp == mkvp[0]) {
 				if (!verify)
 					break;
 				/* verify: fetch mkvp from adapter */
-				if (fetch_mkvp(card, dom, &mkvp) == 0) {
+				if (fetch_mkvp(card, dom, mkvp) == 0) {
 					mkvp_cache_update(card, dom, mkvp);
-					if (t->mkvp == mkvp)
+					if (t->mkvp == mkvp[0])
 						break;
 				}
 			}
@@ -936,14 +944,21 @@ int pkey_findcard(const struct pkey_seckey *seckey,
 			card = AP_QID_CARD(device_matrix->device[i].qid);
 			dom = AP_QID_QUEUE(device_matrix->device[i].qid);
 			/* fresh fetch mkvp from adapter */
-			if (fetch_mkvp(card, dom, &mkvp) == 0) {
+			if (fetch_mkvp(card, dom, mkvp) == 0) {
 				mkvp_cache_update(card, dom, mkvp);
-				if (t->mkvp == mkvp)
+				if (t->mkvp == mkvp[0])
 					break;
+				if (t->mkvp == mkvp[1] && oi < 0)
+					oi = i;
 			}
 		}
+		if (i >= MAX_ZDEV_ENTRIES && oi >= 0) {
+			/* old mkvp matched, use this card then */
+			card = AP_QID_CARD(device_matrix->device[oi].qid);
+			dom = AP_QID_QUEUE(device_matrix->device[oi].qid);
+		}
 	}
-	if (i < MAX_ZDEV_ENTRIES) {
+	if (i < MAX_ZDEV_ENTRIES || oi >= 0) {
 		if (pcardnr)
 			*pcardnr = card;
 		if (pdomain)

From 26a37ab319a26d330bab298770d692bb9c852aff Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Mon, 20 Mar 2017 14:40:30 -0700
Subject: [PATCH 1601/1911] x86/mce: Fix copy/paste error in exception table
 entries

Back in commit:

  92b0729c34cab ("x86/mm, x86/mce: Add memcpy_mcsafe()")

... I made a copy/paste error setting up the exception table entries
and ended up with two for label .L_cache_w3 and none for .L_cache_w2.

This means that if we take a machine check on:

  .L_cache_w2: movq 2*8(%rsi), %r10

then we don't have an exception table entry for this instruction
and we can't recover.

Fix: s/3/2/

Signed-off-by: Tony Luck <tony.luck@intel.com>
Cc: <stable@vger.kernel.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 92b0729c34cab ("x86/mm, x86/mce: Add memcpy_mcsafe()")
Link: http://lkml.kernel.org/r/1490046030-25862-1-git-send-email-tony.luck@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/lib/memcpy_64.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index 779782f5832476..9a53a06e5a3efc 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -290,7 +290,7 @@ EXPORT_SYMBOL_GPL(memcpy_mcsafe_unrolled)
 	_ASM_EXTABLE_FAULT(.L_copy_leading_bytes, .L_memcpy_mcsafe_fail)
 	_ASM_EXTABLE_FAULT(.L_cache_w0, .L_memcpy_mcsafe_fail)
 	_ASM_EXTABLE_FAULT(.L_cache_w1, .L_memcpy_mcsafe_fail)
-	_ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail)
+	_ASM_EXTABLE_FAULT(.L_cache_w2, .L_memcpy_mcsafe_fail)
 	_ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail)
 	_ASM_EXTABLE_FAULT(.L_cache_w4, .L_memcpy_mcsafe_fail)
 	_ASM_EXTABLE_FAULT(.L_cache_w5, .L_memcpy_mcsafe_fail)

From de288e36fe33f7e06fa272bc8e2f85aa386d99aa Mon Sep 17 00:00:00 2001
From: Janusz Dziedzic <januszx.dziedzic@intel.com>
Date: Mon, 13 Mar 2017 14:11:32 +0200
Subject: [PATCH 1602/1911] usb: dwc3: gadget: delay unmap of bounced requests

In the case of bounced ep0 requests, we must delay DMA operation until
after ->complete() otherwise we might overwrite contents of req->buf.

This caused problems with RNDIS gadget.

Signed-off-by: Janusz Dziedzic <januszx.dziedzic@intel.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/dwc3/gadget.c | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 0d75158e43fe48..79e7a3480d51b0 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -171,6 +171,7 @@ void dwc3_gadget_giveback(struct dwc3_ep *dep, struct dwc3_request *req,
 		int status)
 {
 	struct dwc3			*dwc = dep->dwc;
+	unsigned int			unmap_after_complete = false;
 
 	req->started = false;
 	list_del(&req->list);
@@ -180,11 +181,19 @@ void dwc3_gadget_giveback(struct dwc3_ep *dep, struct dwc3_request *req,
 	if (req->request.status == -EINPROGRESS)
 		req->request.status = status;
 
-	if (dwc->ep0_bounced && dep->number <= 1)
+	/*
+	 * NOTICE we don't want to unmap before calling ->complete() if we're
+	 * dealing with a bounced ep0 request. If we unmap it here, we would end
+	 * up overwritting the contents of req->buf and this could confuse the
+	 * gadget driver.
+	 */
+	if (dwc->ep0_bounced && dep->number <= 1) {
 		dwc->ep0_bounced = false;
-
-	usb_gadget_unmap_request_by_dev(dwc->sysdev,
-			&req->request, req->direction);
+		unmap_after_complete = true;
+	} else {
+		usb_gadget_unmap_request_by_dev(dwc->sysdev,
+				&req->request, req->direction);
+	}
 
 	trace_dwc3_gadget_giveback(req);
 
@@ -192,6 +201,10 @@ void dwc3_gadget_giveback(struct dwc3_ep *dep, struct dwc3_request *req,
 	usb_gadget_giveback_request(&dep->endpoint, &req->request);
 	spin_lock(&dwc->lock);
 
+	if (unmap_after_complete)
+		usb_gadget_unmap_request_by_dev(dwc->sysdev,
+				&req->request, req->direction);
+
 	if (dep->number > 1)
 		pm_runtime_put(dwc->dev);
 }

From 74098c4ac782afd71b35ac56f9536ae2f5cd7da7 Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oneukum@suse.com>
Date: Tue, 14 Mar 2017 12:09:56 +0100
Subject: [PATCH 1603/1911] usb: gadget: acm: fix endianness in notifications
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The gadget code exports the bitfield for serial status changes
over the wire in its internal endianness. The fix is to convert
to little endian before sending it over the wire.

Signed-off-by: Oliver Neukum <oneukum@suse.com>
Tested-by: 家瑋 <momo1208@gmail.com>
CC: <stable@vger.kernel.org>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/gadget/function/f_acm.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/gadget/function/f_acm.c b/drivers/usb/gadget/function/f_acm.c
index a30766ca422644..5e3828d9dac7f3 100644
--- a/drivers/usb/gadget/function/f_acm.c
+++ b/drivers/usb/gadget/function/f_acm.c
@@ -535,13 +535,15 @@ static int acm_notify_serial_state(struct f_acm *acm)
 {
 	struct usb_composite_dev *cdev = acm->port.func.config->cdev;
 	int			status;
+	__le16			serial_state;
 
 	spin_lock(&acm->lock);
 	if (acm->notify_req) {
 		dev_dbg(&cdev->gadget->dev, "acm ttyGS%d serial state %04x\n",
 			acm->port_num, acm->serial_state);
+		serial_state = cpu_to_le16(acm->serial_state);
 		status = acm_cdc_notify(acm, USB_CDC_NOTIFY_SERIAL_STATE,
-				0, &acm->serial_state, sizeof(acm->serial_state));
+				0, &serial_state, sizeof(acm->serial_state));
 	} else {
 		acm->pending = true;
 		status = 0;

From 09424c50b7dff40cb30011c09114404a4656e023 Mon Sep 17 00:00:00 2001
From: Roger Quadros <rogerq@ti.com>
Date: Wed, 8 Mar 2017 16:05:43 +0200
Subject: [PATCH 1604/1911] usb: gadget: f_uvc: Fix SuperSpeed companion
 descriptor's wBytesPerInterval

The streaming_maxburst module parameter is 0 offset (0..15)
so we must add 1 while using it for wBytesPerInterval
calculation for the SuperSpeed companion descriptor.

Without this host uvcvideo driver will always see the wrong
wBytesPerInterval for SuperSpeed uvc gadget and may not find
a suitable video interface endpoint.
e.g. for streaming_maxburst = 0 case it will always
fail as wBytePerInterval was evaluating to 0.

Cc: stable@vger.kernel.org
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Roger Quadros <rogerq@ti.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/gadget/function/f_uvc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/gadget/function/f_uvc.c b/drivers/usb/gadget/function/f_uvc.c
index 29b41b5dee04d2..c7689d05356c4f 100644
--- a/drivers/usb/gadget/function/f_uvc.c
+++ b/drivers/usb/gadget/function/f_uvc.c
@@ -625,7 +625,7 @@ uvc_function_bind(struct usb_configuration *c, struct usb_function *f)
 	uvc_ss_streaming_comp.bMaxBurst = opts->streaming_maxburst;
 	uvc_ss_streaming_comp.wBytesPerInterval =
 		cpu_to_le16(max_packet_size * max_packet_mult *
-			    opts->streaming_maxburst);
+			    (opts->streaming_maxburst + 1));
 
 	/* Allocate endpoints. */
 	ep = usb_ep_autoconfig(cdev->gadget, &uvc_control_ep);

From 16bb05d98c904a4f6c5ce7e2d992299f794acbf2 Mon Sep 17 00:00:00 2001
From: Roger Quadros <rogerq@ti.com>
Date: Wed, 8 Mar 2017 16:05:44 +0200
Subject: [PATCH 1605/1911] usb: gadget: f_uvc: Sanity check wMaxPacketSize for
 SuperSpeed

As per USB3.0 Specification "Table 9-20. Standard Endpoint Descriptor",
for interrupt and isochronous endpoints, wMaxPacketSize must be set to
1024 if the endpoint defines bMaxBurst to be greater than zero.

Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Roger Quadros <rogerq@ti.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/gadget/function/f_uvc.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/usb/gadget/function/f_uvc.c b/drivers/usb/gadget/function/f_uvc.c
index c7689d05356c4f..f8a1881609a2c8 100644
--- a/drivers/usb/gadget/function/f_uvc.c
+++ b/drivers/usb/gadget/function/f_uvc.c
@@ -594,6 +594,14 @@ uvc_function_bind(struct usb_configuration *c, struct usb_function *f)
 	opts->streaming_maxpacket = clamp(opts->streaming_maxpacket, 1U, 3072U);
 	opts->streaming_maxburst = min(opts->streaming_maxburst, 15U);
 
+	/* For SS, wMaxPacketSize has to be 1024 if bMaxBurst is not 0 */
+	if (opts->streaming_maxburst &&
+	    (opts->streaming_maxpacket % 1024) != 0) {
+		opts->streaming_maxpacket = roundup(opts->streaming_maxpacket, 1024);
+		INFO(cdev, "overriding streaming_maxpacket to %d\n",
+		     opts->streaming_maxpacket);
+	}
+
 	/* Fill in the FS/HS/SS Video Streaming specific descriptors from the
 	 * module parameters.
 	 *

From 1f459262b0e1649a1e5ad12fa4c66eb76c2220ce Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <garsilva@embeddedor.com>
Date: Fri, 10 Mar 2017 15:39:32 -0600
Subject: [PATCH 1606/1911] usb: gadget: udc: remove pointer dereference after
 free

Remove pointer dereference after free.

Addresses-Coverity-ID: 1091173
Acked-by: Michal Nazarewicz <mina86@mina86.com>
Signed-off-by: Gustavo A. R. Silva <garsilva@embeddedor.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/gadget/udc/pch_udc.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/usb/gadget/udc/pch_udc.c b/drivers/usb/gadget/udc/pch_udc.c
index a97da645c1b9ea..8a365aad66fe2e 100644
--- a/drivers/usb/gadget/udc/pch_udc.c
+++ b/drivers/usb/gadget/udc/pch_udc.c
@@ -1523,7 +1523,6 @@ static void pch_udc_free_dma_chain(struct pch_udc_dev *dev,
 		td = phys_to_virt(addr);
 		addr2 = (dma_addr_t)td->next;
 		pci_pool_free(dev->data_requests, td, addr);
-		td->next = 0x00;
 		addr = addr2;
 	}
 	req->chain_len = 1;

From 25cd9721c2b16ee0d775e36ec3af31f392003f80 Mon Sep 17 00:00:00 2001
From: Krzysztof Opasiak <k.opasiak@samsung.com>
Date: Tue, 31 Jan 2017 18:12:31 +0100
Subject: [PATCH 1607/1911] usb: gadget: f_hid: fix: Don't access hidg->req
 without spinlock held

hidg->req should be accessed only with write_spinlock held as it is
set to NULL when we get disabled by host.

Signed-off-by: Krzysztof Opasiak <k.opasiak@samsung.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/gadget/function/f_hid.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/usb/gadget/function/f_hid.c b/drivers/usb/gadget/function/f_hid.c
index 89b48bcc377a16..5eea44823ca06d 100644
--- a/drivers/usb/gadget/function/f_hid.c
+++ b/drivers/usb/gadget/function/f_hid.c
@@ -367,7 +367,7 @@ static ssize_t f_hidg_write(struct file *file, const char __user *buffer,
 	count  = min_t(unsigned, count, hidg->report_length);
 
 	spin_unlock_irqrestore(&hidg->write_spinlock, flags);
-	status = copy_from_user(hidg->req->buf, buffer, count);
+	status = copy_from_user(req->buf, buffer, count);
 
 	if (status != 0) {
 		ERROR(hidg->func.config->cdev,
@@ -378,9 +378,9 @@ static ssize_t f_hidg_write(struct file *file, const char __user *buffer,
 
 	spin_lock_irqsave(&hidg->write_spinlock, flags);
 
-	/* we our function has been disabled by host */
+	/* when our function has been disabled by host */
 	if (!hidg->req) {
-		free_ep_req(hidg->in_ep, hidg->req);
+		free_ep_req(hidg->in_ep, req);
 		/*
 		 * TODO
 		 * Should we fail with error here?
@@ -394,7 +394,7 @@ static ssize_t f_hidg_write(struct file *file, const char __user *buffer,
 	req->complete = f_hidg_req_complete;
 	req->context  = hidg;
 
-	status = usb_ep_queue(hidg->in_ep, hidg->req, GFP_ATOMIC);
+	status = usb_ep_queue(hidg->in_ep, req, GFP_ATOMIC);
 	if (status < 0) {
 		ERROR(hidg->func.config->cdev,
 			"usb_ep_queue error on int endpoint %zd\n", status);

From 90400c5884b89a5db232049721ef4dc1ab2f1f1c Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 24 Feb 2017 14:35:54 +0200
Subject: [PATCH 1608/1911] extcon: int3496: Rename GPIO pins in accordance
 with binding

Update GPIO pin names in extcon-intel-int3496.c driver to follow
the existing extcon binding.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
---
 drivers/extcon/extcon-intel-int3496.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/extcon/extcon-intel-int3496.c b/drivers/extcon/extcon-intel-int3496.c
index 38eb6cab938fff..81713bf7487e31 100644
--- a/drivers/extcon/extcon-intel-int3496.c
+++ b/drivers/extcon/extcon-intel-int3496.c
@@ -105,13 +105,13 @@ static int int3496_probe(struct platform_device *pdev)
 		return data->usb_id_irq;
 	}
 
-	data->gpio_vbus_en = devm_gpiod_get_index(dev, "vbus en",
+	data->gpio_vbus_en = devm_gpiod_get_index(dev, "vbus",
 						 INT3496_GPIO_VBUS_EN,
 						 GPIOD_ASIS);
 	if (IS_ERR(data->gpio_vbus_en))
 		dev_info(dev, "can't request VBUS EN GPIO\n");
 
-	data->gpio_usb_mux = devm_gpiod_get_index(dev, "usb mux",
+	data->gpio_usb_mux = devm_gpiod_get_index(dev, "mux",
 						 INT3496_GPIO_USB_MUX,
 						 GPIOD_ASIS);
 	if (IS_ERR(data->gpio_usb_mux))

From 8cb2cbae45ae46b1e1be16950670d5c5d4408474 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 24 Feb 2017 14:35:55 +0200
Subject: [PATCH 1609/1911] extcon: int3496: Add GPIO ACPI mapping table

In order to make GPIO ACPI library stricter prepare users of
gpiod_get_index() to correctly behave when there no mapping is
provided by firmware.

Here we add explicit mapping between _CRS GpioIo() resources and
their names used in the driver.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
---
 Documentation/extcon/intel-int3496.txt |  5 +++++
 drivers/extcon/extcon-intel-int3496.c  | 20 ++++++++++++++++++++
 2 files changed, 25 insertions(+)

diff --git a/Documentation/extcon/intel-int3496.txt b/Documentation/extcon/intel-int3496.txt
index af0b366c25b733..8155dbc7fad36a 100644
--- a/Documentation/extcon/intel-int3496.txt
+++ b/Documentation/extcon/intel-int3496.txt
@@ -20,3 +20,8 @@ Index 1: The output gpio for enabling Vbus output from the device to the otg
 Index 2: The output gpio for muxing of the data pins between the USB host and
          the USB peripheral controller, write 1 to mux to the peripheral
          controller
+
+There is a mapping between indices and GPIO connection IDs as follows
+	id	index 0
+	vbus	index 1
+	mux	index 2
diff --git a/drivers/extcon/extcon-intel-int3496.c b/drivers/extcon/extcon-intel-int3496.c
index 81713bf7487e31..22a529eb1b1aee 100644
--- a/drivers/extcon/extcon-intel-int3496.c
+++ b/drivers/extcon/extcon-intel-int3496.c
@@ -45,6 +45,17 @@ static const unsigned int int3496_cable[] = {
 	EXTCON_NONE,
 };
 
+static const struct acpi_gpio_params id_gpios = { INT3496_GPIO_USB_ID, 0, false };
+static const struct acpi_gpio_params vbus_gpios = { INT3496_GPIO_VBUS_EN, 0, false };
+static const struct acpi_gpio_params mux_gpios = { INT3496_GPIO_USB_MUX, 0, false };
+
+static const struct acpi_gpio_mapping acpi_int3496_default_gpios[] = {
+	{ "id-gpios", &id_gpios, 1 },
+	{ "vbus-gpios", &vbus_gpios, 1 },
+	{ "mux-gpios", &mux_gpios, 1 },
+	{ },
+};
+
 static void int3496_do_usb_id(struct work_struct *work)
 {
 	struct int3496_data *data =
@@ -83,6 +94,13 @@ static int int3496_probe(struct platform_device *pdev)
 	struct int3496_data *data;
 	int ret;
 
+	ret = acpi_dev_add_driver_gpios(ACPI_COMPANION(dev),
+					acpi_int3496_default_gpios);
+	if (ret) {
+		dev_err(dev, "can't add GPIO ACPI mapping\n");
+		return ret;
+	}
+
 	data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
 	if (!data)
 		return -ENOMEM;
@@ -154,6 +172,8 @@ static int int3496_remove(struct platform_device *pdev)
 	devm_free_irq(&pdev->dev, data->usb_id_irq, data);
 	cancel_delayed_work_sync(&data->work);
 
+	acpi_dev_remove_driver_gpios(ACPI_COMPANION(&pdev->dev));
+
 	return 0;
 }
 

From 059c7874b87575e48d2c7702849ff56017059195 Mon Sep 17 00:00:00 2001
From: Peter Robinson <pbrobinson@gmail.com>
Date: Thu, 2 Mar 2017 18:10:10 +0000
Subject: [PATCH 1610/1911] extcon: int3496: Add dependency on X86 as it's
 Intel specific

Add dependency on X86 so it doesn't show up on other arches and
add a option for compile test so it still gets build coverage.

Signed-off-by: Peter Robinson <pbrobinson@gmail.com>
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
---
 drivers/extcon/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/extcon/Kconfig b/drivers/extcon/Kconfig
index 96bbae579c0b01..fc09c76248b416 100644
--- a/drivers/extcon/Kconfig
+++ b/drivers/extcon/Kconfig
@@ -44,7 +44,7 @@ config EXTCON_GPIO
 
 config EXTCON_INTEL_INT3496
 	tristate "Intel INT3496 ACPI device extcon driver"
-	depends on GPIOLIB && ACPI
+	depends on GPIOLIB && ACPI && (X86 || COMPILE_TEST)
 	help
 	  Say Y here to enable extcon support for USB OTG ports controlled by
 	  an Intel INT3496 ACPI device.

From 408c5b41d215b317ab58c98b959454407ee4a53d Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 10 Mar 2017 21:52:08 +0100
Subject: [PATCH 1611/1911] extcon: int3496: Use gpiod_get instead of
 gpiod_get_index

Now that we've an acpi mapping table we should be using gpiod_get
instead of gpiod_get_index.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
---
 drivers/extcon/extcon-intel-int3496.c | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/drivers/extcon/extcon-intel-int3496.c b/drivers/extcon/extcon-intel-int3496.c
index 22a529eb1b1aee..00f0e60a00cedb 100644
--- a/drivers/extcon/extcon-intel-int3496.c
+++ b/drivers/extcon/extcon-intel-int3496.c
@@ -108,9 +108,7 @@ static int int3496_probe(struct platform_device *pdev)
 	data->dev = dev;
 	INIT_DELAYED_WORK(&data->work, int3496_do_usb_id);
 
-	data->gpio_usb_id = devm_gpiod_get_index(dev, "id",
-						INT3496_GPIO_USB_ID,
-						GPIOD_IN);
+	data->gpio_usb_id = devm_gpiod_get(dev, "id", GPIOD_IN);
 	if (IS_ERR(data->gpio_usb_id)) {
 		ret = PTR_ERR(data->gpio_usb_id);
 		dev_err(dev, "can't request USB ID GPIO: %d\n", ret);
@@ -123,15 +121,11 @@ static int int3496_probe(struct platform_device *pdev)
 		return data->usb_id_irq;
 	}
 
-	data->gpio_vbus_en = devm_gpiod_get_index(dev, "vbus",
-						 INT3496_GPIO_VBUS_EN,
-						 GPIOD_ASIS);
+	data->gpio_vbus_en = devm_gpiod_get(dev, "vbus", GPIOD_ASIS);
 	if (IS_ERR(data->gpio_vbus_en))
 		dev_info(dev, "can't request VBUS EN GPIO\n");
 
-	data->gpio_usb_mux = devm_gpiod_get_index(dev, "mux",
-						 INT3496_GPIO_USB_MUX,
-						 GPIOD_ASIS);
+	data->gpio_usb_mux = devm_gpiod_get(dev, "mux", GPIOD_ASIS);
 	if (IS_ERR(data->gpio_usb_mux))
 		dev_info(dev, "can't request USB MUX GPIO\n");
 

From 70216fd937fea79c20705400540fa48f86ddf1c5 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Mon, 13 Mar 2017 12:28:34 +0100
Subject: [PATCH 1612/1911] extcon: int3496: Set the id pin to direction-input
 if necessary

With the new more strict ACPI gpio code the dsdt's IoRestriction
flags are honored on gpiod_get, but in some dsdt's it is wrong,
so explicitly call gpiod_direction_input on the id gpio if
necessary.

This fixes the following errors when the int3496 code is used
together with the new more strict ACPI gpio code:

[ 2382.484415] gpio gpiochip1: (INT33FF:01): gpiochip_lock_as_irq: tried to flag a GPIO set as output for IRQ
[ 2382.484425] gpio gpiochip1: (INT33FF:01): unable to lock HW IRQ 3 for IRQ
[ 2382.484429] genirq: Failed to request resources for INT3496:00 (irq 174) on irqchip chv-gpio
[ 2382.484518] intel-int3496 INT3496:00: can't request IRQ for USB ID GPIO: -22
[ 2382.500359] intel-int3496: probe of INT3496:00 failed with error -22

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
---
 drivers/extcon/extcon-intel-int3496.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/extcon/extcon-intel-int3496.c b/drivers/extcon/extcon-intel-int3496.c
index 00f0e60a00cedb..9d17984bbbd49a 100644
--- a/drivers/extcon/extcon-intel-int3496.c
+++ b/drivers/extcon/extcon-intel-int3496.c
@@ -113,6 +113,9 @@ static int int3496_probe(struct platform_device *pdev)
 		ret = PTR_ERR(data->gpio_usb_id);
 		dev_err(dev, "can't request USB ID GPIO: %d\n", ret);
 		return ret;
+	} else if (gpiod_get_direction(data->gpio_usb_id) != GPIOF_DIR_IN) {
+		dev_warn(dev, FW_BUG "USB ID GPIO not in input mode, fixing\n");
+		gpiod_direction_input(data->gpio_usb_id);
 	}
 
 	data->usb_id_irq = gpiod_to_irq(data->gpio_usb_id);

From db8466c581cca1a08b505f1319c3ecd246f16fa8 Mon Sep 17 00:00:00 2001
From: Matt Redfearn <matt.redfearn@imgtec.com>
Date: Tue, 21 Mar 2017 14:52:25 +0000
Subject: [PATCH 1613/1911] MIPS: IRQ Stack: Unwind IRQ stack onto task stack

When the separate IRQ stack was introduced, stack unwinding only
proceeded as far as the top of the IRQ stack, leading to kernel
backtraces being less useful, lacking the trace of what was interrupted.

Fix this by providing a means for the kernel to unwind the IRQ stack
onto the interrupted task stack. The processor state is saved to the
kernel task stack on interrupt. The IRQ_STACK_START macro reserves an
unsigned long at the top of the IRQ stack where the interrupted task
stack pointer can be saved. After the active stack is switched to the
IRQ stack, save the interrupted tasks stack pointer to the reserved
location.

Fix the stack unwinding code to look for the frame being the top of the
IRQ stack and if so get the next frame from the saved location. The
existing test does not work with the separate stack since the ra is no
longer pointed at ret_from_{irq,exception}.

The test to stop unwinding the stack 32 bytes from the top of a stack
must be modified to allow unwinding to continue up to the location of
the saved task stack pointer when on the IRQ stack. The low / high marks
of the stack are set depending on whether the sp is on an irq stack or
not.

Signed-off-by: Matt Redfearn <matt.redfearn@imgtec.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Marcin Nowakowski <marcin.nowakowski@imgtec.com>
Cc: Masanari Iida <standby24x7@gmail.com>
Cc: Chris Metcalf <cmetcalf@mellanox.com>
Cc: James Hogan <james.hogan@imgtec.com>
Cc: Paul Burton <paul.burton@imgtec.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jason A. Donenfeld <jason@zx2c4.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: linux-mips@linux-mips.org
Cc: linux-kernel@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/15788/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/irq.h    | 15 +++++++++
 arch/mips/kernel/asm-offsets.c |  1 +
 arch/mips/kernel/genex.S       |  8 +++--
 arch/mips/kernel/process.c     | 56 +++++++++++++++++++++++-----------
 4 files changed, 60 insertions(+), 20 deletions(-)

diff --git a/arch/mips/include/asm/irq.h b/arch/mips/include/asm/irq.h
index 956db6e201d187..ddd1c918103bcc 100644
--- a/arch/mips/include/asm/irq.h
+++ b/arch/mips/include/asm/irq.h
@@ -18,9 +18,24 @@
 #include <irq.h>
 
 #define IRQ_STACK_SIZE			THREAD_SIZE
+#define IRQ_STACK_START			(IRQ_STACK_SIZE - sizeof(unsigned long))
 
 extern void *irq_stack[NR_CPUS];
 
+/*
+ * The highest address on the IRQ stack contains a dummy frame put down in
+ * genex.S (handle_int & except_vec_vi_handler) which is structured as follows:
+ *
+ *   top ------------
+ *       | task sp  | <- irq_stack[cpu] + IRQ_STACK_START
+ *       ------------
+ *       |          | <- First frame of IRQ context
+ *       ------------
+ *
+ * task sp holds a copy of the task stack pointer where the struct pt_regs
+ * from exception entry can be found.
+ */
+
 static inline bool on_irq_stack(int cpu, unsigned long sp)
 {
 	unsigned long low = (unsigned long)irq_stack[cpu];
diff --git a/arch/mips/kernel/asm-offsets.c b/arch/mips/kernel/asm-offsets.c
index bb5c5d34ba8152..a670c0c11875d2 100644
--- a/arch/mips/kernel/asm-offsets.c
+++ b/arch/mips/kernel/asm-offsets.c
@@ -102,6 +102,7 @@ void output_thread_info_defines(void)
 	DEFINE(_THREAD_SIZE, THREAD_SIZE);
 	DEFINE(_THREAD_MASK, THREAD_MASK);
 	DEFINE(_IRQ_STACK_SIZE, IRQ_STACK_SIZE);
+	DEFINE(_IRQ_STACK_START, IRQ_STACK_START);
 	BLANK();
 }
 
diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S
index 2ac6c2625c13d4..ae810da4d499e6 100644
--- a/arch/mips/kernel/genex.S
+++ b/arch/mips/kernel/genex.S
@@ -215,9 +215,11 @@ NESTED(handle_int, PT_SIZE, sp)
 	beq	t0, t1, 2f
 
 	/* Switch to IRQ stack */
-	li	t1, _IRQ_STACK_SIZE
+	li	t1, _IRQ_STACK_START
 	PTR_ADD sp, t0, t1
 
+	/* Save task's sp on IRQ stack so that unwinding can follow it */
+	LONG_S	s1, 0(sp)
 2:
 	jal	plat_irq_dispatch
 
@@ -325,9 +327,11 @@ NESTED(except_vec_vi_handler, 0, sp)
 	beq	t0, t1, 2f
 
 	/* Switch to IRQ stack */
-	li	t1, _IRQ_STACK_SIZE
+	li	t1, _IRQ_STACK_START
 	PTR_ADD sp, t0, t1
 
+	/* Save task's sp on IRQ stack so that unwinding can follow it */
+	LONG_S	s1, 0(sp)
 2:
 	jalr	v0
 
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index fb6b6b650719ad..b68e10fc453d11 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -488,31 +488,52 @@ unsigned long notrace unwind_stack_by_address(unsigned long stack_page,
 					      unsigned long pc,
 					      unsigned long *ra)
 {
+	unsigned long low, high, irq_stack_high;
 	struct mips_frame_info info;
 	unsigned long size, ofs;
+	struct pt_regs *regs;
 	int leaf;
-	extern void ret_from_irq(void);
-	extern void ret_from_exception(void);
 
 	if (!stack_page)
 		return 0;
 
 	/*
-	 * If we reached the bottom of interrupt context,
-	 * return saved pc in pt_regs.
+	 * IRQ stacks start at IRQ_STACK_START
+	 * task stacks at THREAD_SIZE - 32
 	 */
-	if (pc == (unsigned long)ret_from_irq ||
-	    pc == (unsigned long)ret_from_exception) {
-		struct pt_regs *regs;
-		if (*sp >= stack_page &&
-		    *sp + sizeof(*regs) <= stack_page + THREAD_SIZE - 32) {
-			regs = (struct pt_regs *)*sp;
-			pc = regs->cp0_epc;
-			if (!user_mode(regs) && __kernel_text_address(pc)) {
-				*sp = regs->regs[29];
-				*ra = regs->regs[31];
-				return pc;
-			}
+	low = stack_page;
+	if (!preemptible() && on_irq_stack(raw_smp_processor_id(), *sp)) {
+		high = stack_page + IRQ_STACK_START;
+		irq_stack_high = high;
+	} else {
+		high = stack_page + THREAD_SIZE - 32;
+		irq_stack_high = 0;
+	}
+
+	/*
+	 * If we reached the top of the interrupt stack, start unwinding
+	 * the interrupted task stack.
+	 */
+	if (unlikely(*sp == irq_stack_high)) {
+		unsigned long task_sp = *(unsigned long *)*sp;
+
+		/*
+		 * Check that the pointer saved in the IRQ stack head points to
+		 * something within the stack of the current task
+		 */
+		if (!object_is_on_stack((void *)task_sp))
+			return 0;
+
+		/*
+		 * Follow pointer to tasks kernel stack frame where interrupted
+		 * state was saved.
+		 */
+		regs = (struct pt_regs *)task_sp;
+		pc = regs->cp0_epc;
+		if (!user_mode(regs) && __kernel_text_address(pc)) {
+			*sp = regs->regs[29];
+			*ra = regs->regs[31];
+			return pc;
 		}
 		return 0;
 	}
@@ -533,8 +554,7 @@ unsigned long notrace unwind_stack_by_address(unsigned long stack_page,
 	if (leaf < 0)
 		return 0;
 
-	if (*sp < stack_page ||
-	    *sp + info.frame_size > stack_page + THREAD_SIZE - 32)
+	if (*sp < low || *sp + info.frame_size > high)
 		return 0;
 
 	if (leaf)

From 5003ae1e735e6bfe4679d9bed6846274f322e77e Mon Sep 17 00:00:00 2001
From: Koos Vriezen <koos.vriezen@gmail.com>
Date: Wed, 1 Mar 2017 21:02:50 +0100
Subject: [PATCH 1614/1911] iommu/vt-d: Fix NULL pointer dereference in
 device_to_iommu

The function device_to_iommu() in the Intel VT-d driver
lacks a NULL-ptr check, resulting in this oops at boot on
some platforms:

 BUG: unable to handle kernel NULL pointer dereference at 00000000000007ab
 IP: [<ffffffff8132234a>] device_to_iommu+0x11a/0x1a0
 PGD 0

 [...]

 Call Trace:
   ? find_or_alloc_domain.constprop.29+0x1a/0x300
   ? dw_dma_probe+0x561/0x580 [dw_dmac_core]
   ? __get_valid_domain_for_dev+0x39/0x120
   ? __intel_map_single+0x138/0x180
   ? intel_alloc_coherent+0xb6/0x120
   ? sst_hsw_dsp_init+0x173/0x420 [snd_soc_sst_haswell_pcm]
   ? mutex_lock+0x9/0x30
   ? kernfs_add_one+0xdb/0x130
   ? devres_add+0x19/0x60
   ? hsw_pcm_dev_probe+0x46/0xd0 [snd_soc_sst_haswell_pcm]
   ? platform_drv_probe+0x30/0x90
   ? driver_probe_device+0x1ed/0x2b0
   ? __driver_attach+0x8f/0xa0
   ? driver_probe_device+0x2b0/0x2b0
   ? bus_for_each_dev+0x55/0x90
   ? bus_add_driver+0x110/0x210
   ? 0xffffffffa11ea000
   ? driver_register+0x52/0xc0
   ? 0xffffffffa11ea000
   ? do_one_initcall+0x32/0x130
   ? free_vmap_area_noflush+0x37/0x70
   ? kmem_cache_alloc+0x88/0xd0
   ? do_init_module+0x51/0x1c4
   ? load_module+0x1ee9/0x2430
   ? show_taint+0x20/0x20
   ? kernel_read_file+0xfd/0x190
   ? SyS_finit_module+0xa3/0xb0
   ? do_syscall_64+0x4a/0xb0
   ? entry_SYSCALL64_slow_path+0x25/0x25
 Code: 78 ff ff ff 4d 85 c0 74 ee 49 8b 5a 10 0f b6 9b e0 00 00 00 41 38 98 e0 00 00 00 77 da 0f b6 eb 49 39 a8 88 00 00 00 72 ce eb 8f <41> f6 82 ab 07 00 00 04 0f 85 76 ff ff ff 0f b6 4d 08 88 0e 49
 RIP  [<ffffffff8132234a>] device_to_iommu+0x11a/0x1a0
  RSP <ffffc90001457a78>
 CR2: 00000000000007ab
 ---[ end trace 16f974b6d58d0aad ]---

Add the missing pointer check.

Fixes: 1c387188c60f53b338c20eee32db055dfe022a9b ("iommu/vt-d: Fix IOMMU lookup for SR-IOV Virtual Functions")
Signed-off-by: Koos Vriezen <koos.vriezen@gmail.com>
Cc: stable@vger.kernel.org # 4.8.15+
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 238ad3447712d2..91d60493b57cd9 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -916,7 +916,7 @@ static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devf
 				 * which we used for the IOMMU lookup. Strictly speaking
 				 * we could do this for all PCI devices; we only need to
 				 * get the BDF# from the scope table for ACPI matches. */
-				if (pdev->is_virtfn)
+				if (pdev && pdev->is_virtfn)
 					goto got_pdev;
 
 				*bus = drhd->devices[i].bus;

From 2c422257550f123049552b39f7af6e3428a60f43 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 21 Mar 2017 13:32:37 +0100
Subject: [PATCH 1615/1911] netfilter: nfnl_cthelper: fix runtime expectation
 policy updates

We only allow runtime updates of expectation policies for timeout and
maximum number of expectations, otherwise reject the update.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Acked-by: Liping Zhang <zlpnobody@gmail.com>
---
 net/netfilter/nfnetlink_cthelper.c | 86 +++++++++++++++++++++++++++++-
 1 file changed, 84 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index 3cd41d10540749..90f291e27eb190 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -255,6 +255,89 @@ nfnl_cthelper_create(const struct nlattr * const tb[],
 	return ret;
 }
 
+static int
+nfnl_cthelper_update_policy_one(const struct nf_conntrack_expect_policy *policy,
+				struct nf_conntrack_expect_policy *new_policy,
+				const struct nlattr *attr)
+{
+	struct nlattr *tb[NFCTH_POLICY_MAX + 1];
+	int err;
+
+	err = nla_parse_nested(tb, NFCTH_POLICY_MAX, attr,
+			       nfnl_cthelper_expect_pol);
+	if (err < 0)
+		return err;
+
+	if (!tb[NFCTH_POLICY_NAME] ||
+	    !tb[NFCTH_POLICY_EXPECT_MAX] ||
+	    !tb[NFCTH_POLICY_EXPECT_TIMEOUT])
+		return -EINVAL;
+
+	if (nla_strcmp(tb[NFCTH_POLICY_NAME], policy->name))
+		return -EBUSY;
+
+	new_policy->max_expected =
+		ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_MAX]));
+	new_policy->timeout =
+		ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_TIMEOUT]));
+
+	return 0;
+}
+
+static int nfnl_cthelper_update_policy_all(struct nlattr *tb[],
+					   struct nf_conntrack_helper *helper)
+{
+	struct nf_conntrack_expect_policy new_policy[helper->expect_class_max + 1];
+	struct nf_conntrack_expect_policy *policy;
+	int i, err;
+
+	/* Check first that all policy attributes are well-formed, so we don't
+	 * leave things in inconsistent state on errors.
+	 */
+	for (i = 0; i < helper->expect_class_max + 1; i++) {
+
+		if (!tb[NFCTH_POLICY_SET + i])
+			return -EINVAL;
+
+		err = nfnl_cthelper_update_policy_one(&helper->expect_policy[i],
+						      &new_policy[i],
+						      tb[NFCTH_POLICY_SET + i]);
+		if (err < 0)
+			return err;
+	}
+	/* Now we can safely update them. */
+	for (i = 0; i < helper->expect_class_max + 1; i++) {
+		policy = (struct nf_conntrack_expect_policy *)
+				&helper->expect_policy[i];
+		policy->max_expected = new_policy->max_expected;
+		policy->timeout	= new_policy->timeout;
+	}
+
+	return 0;
+}
+
+static int nfnl_cthelper_update_policy(struct nf_conntrack_helper *helper,
+				       const struct nlattr *attr)
+{
+	struct nlattr *tb[NFCTH_POLICY_SET_MAX + 1];
+	unsigned int class_max;
+	int err;
+
+	err = nla_parse_nested(tb, NFCTH_POLICY_SET_MAX, attr,
+			       nfnl_cthelper_expect_policy_set);
+	if (err < 0)
+		return err;
+
+	if (!tb[NFCTH_POLICY_SET_NUM])
+		return -EINVAL;
+
+	class_max = ntohl(nla_get_be32(tb[NFCTH_POLICY_SET_NUM]));
+	if (helper->expect_class_max + 1 != class_max)
+		return -EBUSY;
+
+	return nfnl_cthelper_update_policy_all(tb, helper);
+}
+
 static int
 nfnl_cthelper_update(const struct nlattr * const tb[],
 		     struct nf_conntrack_helper *helper)
@@ -265,8 +348,7 @@ nfnl_cthelper_update(const struct nlattr * const tb[],
 		return -EBUSY;
 
 	if (tb[NFCTH_POLICY]) {
-		ret = nfnl_cthelper_parse_expect_policy(helper,
-							tb[NFCTH_POLICY]);
+		ret = nfnl_cthelper_update_policy(helper, tb[NFCTH_POLICY]);
 		if (ret < 0)
 			return ret;
 	}

From f83bf8da1135ca635aac8f062cad3f001fcf3a26 Mon Sep 17 00:00:00 2001
From: Jeffy Chen <jeffy.chen@rock-chips.com>
Date: Tue, 21 Mar 2017 15:07:10 +0800
Subject: [PATCH 1616/1911] netfilter: nfnl_cthelper: Fix memory leak

We have memory leaks of nf_conntrack_helper & expect_policy.

Signed-off-by: Jeffy Chen <jeffy.chen@rock-chips.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nfnetlink_cthelper.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index 90f291e27eb190..2b987d2a77bce7 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -216,7 +216,7 @@ nfnl_cthelper_create(const struct nlattr * const tb[],
 
 	ret = nfnl_cthelper_parse_expect_policy(helper, tb[NFCTH_POLICY]);
 	if (ret < 0)
-		goto err;
+		goto err1;
 
 	strncpy(helper->name, nla_data(tb[NFCTH_NAME]), NF_CT_HELPER_NAME_LEN);
 	helper->data_len = ntohl(nla_get_be32(tb[NFCTH_PRIV_DATA_LEN]));
@@ -247,10 +247,12 @@ nfnl_cthelper_create(const struct nlattr * const tb[],
 
 	ret = nf_conntrack_helper_register(helper);
 	if (ret < 0)
-		goto err;
+		goto err2;
 
 	return 0;
-err:
+err2:
+	kfree(helper->expect_policy);
+err1:
 	kfree(helper);
 	return ret;
 }
@@ -696,6 +698,8 @@ static int nfnl_cthelper_del(struct net *net, struct sock *nfnl,
 
 			found = true;
 			nf_conntrack_helper_unregister(cur);
+			kfree(cur->expect_policy);
+			kfree(cur);
 		}
 	}
 	/* Make sure we return success if we flush and there is no helpers */
@@ -759,6 +763,8 @@ static void __exit nfnl_cthelper_exit(void)
 				continue;
 
 			nf_conntrack_helper_unregister(cur);
+			kfree(cur->expect_policy);
+			kfree(cur);
 		}
 	}
 }

From 6e2e0eea072f6f82ac0afbcfa8dc38dcc3a29fa4 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 9 Feb 2017 13:09:08 -0200
Subject: [PATCH 1617/1911] [media] coda/imx-vdoa: platform_driver should not
 be const

The device driver platform is actually written to during registration,
for setting the owner field, so platform_driver_register() does not
take a const pointer:

drivers/media/platform/coda/imx-vdoa.c: In function 'vdoa_driver_init':
drivers/media/platform/coda/imx-vdoa.c:333:213: error: passing argument 1 of '__platform_driver_register' discards 'const' qualifier from pointer target type [-Werror=discarded-qualifiers]
 module_platform_driver(vdoa_driver);
In file included from drivers/media/platform/coda/imx-vdoa.c:22:0:
include/linux/platform_device.h:199:12: note: expected 'struct platform_driver *' but argument is of type 'const struct platform_driver *'
 extern int __platform_driver_register(struct platform_driver *,
            ^~~~~~~~~~~~~~~~~~~~~~~~~~
drivers/media/platform/coda/imx-vdoa.c: In function 'vdoa_driver_exit':
drivers/media/platform/coda/imx-vdoa.c:333:626: error: passing argument 1 of 'platform_driver_unregister' discards 'const' qualifier from pointer target type [-Werror=discarded-qualifiers]

Remove the modifier again.

Fixes: d2fe28feaebb ("[media] coda/imx-vdoa: constify structs")

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
---
 drivers/media/platform/coda/imx-vdoa.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/media/platform/coda/imx-vdoa.c b/drivers/media/platform/coda/imx-vdoa.c
index 67fd8ffa60a418..669a4c82f1ffa4 100644
--- a/drivers/media/platform/coda/imx-vdoa.c
+++ b/drivers/media/platform/coda/imx-vdoa.c
@@ -321,7 +321,7 @@ static const struct of_device_id vdoa_dt_ids[] = {
 };
 MODULE_DEVICE_TABLE(of, vdoa_dt_ids);
 
-static const struct platform_driver vdoa_driver = {
+static struct platform_driver vdoa_driver = {
 	.probe		= vdoa_probe,
 	.remove		= vdoa_remove,
 	.driver		= {

From 389e3f0d57e6ab50c207bee5ea2c4a5982a029c2 Mon Sep 17 00:00:00 2001
From: Shailendra Verma <shailendra.v@samsung.com>
Date: Fri, 2 Dec 2016 02:48:01 -0200
Subject: [PATCH 1618/1911] [media] bdisp: Clean up file handle in open() error
 path

The File handle is not yet added in the vdev list.So no need to call
v4l2_fh_del(&ctx->fh)if it fails to create control.

Signed-off-by: Shailendra Verma <shailendra.v@samsung.com>
Reviewed-by: Fabien Dessenne <fabien.dessenne@st.com>
Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
---
 drivers/media/platform/sti/bdisp/bdisp-v4l2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/media/platform/sti/bdisp/bdisp-v4l2.c b/drivers/media/platform/sti/bdisp/bdisp-v4l2.c
index 823608112d89c1..7918b928f0589b 100644
--- a/drivers/media/platform/sti/bdisp/bdisp-v4l2.c
+++ b/drivers/media/platform/sti/bdisp/bdisp-v4l2.c
@@ -632,8 +632,8 @@ static int bdisp_open(struct file *file)
 
 error_ctrls:
 	bdisp_ctrls_delete(ctx);
-error_fh:
 	v4l2_fh_del(&ctx->fh);
+error_fh:
 	v4l2_fh_exit(&ctx->fh);
 	bdisp_hw_free_nodes(ctx);
 mem_ctx:

From 24a47426066c8ba16a4db1b20a41d63187281195 Mon Sep 17 00:00:00 2001
From: Thibault Saunier <thibault.saunier@osg.samsung.com>
Date: Wed, 1 Feb 2017 18:05:21 -0200
Subject: [PATCH 1619/1911] [media] exynos-gsc: Do not swap cb/cr for semi
 planar formats

In the case of semi planar formats cb and cr are in the same plane
in memory, meaning that will be set to 'cb' whatever the format is,
and whatever the (packed) order of those components are.

Suggested-by: Nicolas Dufresne <nicolas.dufresne@collabora.com>
Signed-off-by: Thibault Saunier <thibault.saunier@osg.samsung.com>
Signed-off-by: Javier Martinez Canillas <javier@osg.samsung.com>
Acked-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
---
 drivers/media/platform/exynos-gsc/gsc-core.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/media/platform/exynos-gsc/gsc-core.c b/drivers/media/platform/exynos-gsc/gsc-core.c
index cbb03768f5d735..0f0c389f889713 100644
--- a/drivers/media/platform/exynos-gsc/gsc-core.c
+++ b/drivers/media/platform/exynos-gsc/gsc-core.c
@@ -861,9 +861,7 @@ int gsc_prepare_addr(struct gsc_ctx *ctx, struct vb2_buffer *vb,
 
 	if ((frame->fmt->pixelformat == V4L2_PIX_FMT_VYUY) ||
 		(frame->fmt->pixelformat == V4L2_PIX_FMT_YVYU) ||
-		(frame->fmt->pixelformat == V4L2_PIX_FMT_NV61) ||
 		(frame->fmt->pixelformat == V4L2_PIX_FMT_YVU420) ||
-		(frame->fmt->pixelformat == V4L2_PIX_FMT_NV21) ||
 		(frame->fmt->pixelformat == V4L2_PIX_FMT_YVU420M))
 		swap(addr->cb, addr->cr);
 

From 95a49603707d982b25d17c5b70e220a05556a2f9 Mon Sep 17 00:00:00 2001
From: Ming Lei <tom.leiming@gmail.com>
Date: Wed, 22 Mar 2017 10:14:43 +0800
Subject: [PATCH 1620/1911] blk-mq: don't complete un-started request in
 timeout handler

When iterating busy requests in timeout handler,
if the STARTED flag of one request isn't set, that means
the request is being processed in block layer or driver, and
isn't submitted to hardware yet.

In current implementation of blk_mq_check_expired(),
if the request queue becomes dying, un-started requests are
handled as being completed/freed immediately. This way is
wrong, and can cause rq corruption or double allocation[1][2],
when doing I/O and removing&resetting NVMe device at the sametime.

This patch fixes several issues reported by Yi Zhang.

[1]. oops log 1
[  581.789754] ------------[ cut here ]------------
[  581.789758] kernel BUG at block/blk-mq.c:374!
[  581.789760] invalid opcode: 0000 [#1] SMP
[  581.789761] Modules linked in: vfat fat ipmi_ssif intel_rapl sb_edac
edac_core x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel kvm nvme
irqbypass crct10dif_pclmul nvme_core crc32_pclmul ghash_clmulni_intel
intel_cstate ipmi_si mei_me ipmi_devintf intel_uncore sg ipmi_msghandler
intel_rapl_perf iTCO_wdt mei iTCO_vendor_support mxm_wmi lpc_ich dcdbas shpchp
pcspkr acpi_power_meter wmi nfsd auth_rpcgss nfs_acl lockd dm_multipath grace
sunrpc ip_tables xfs libcrc32c sd_mod mgag200 i2c_algo_bit drm_kms_helper
syscopyarea sysfillrect sysimgblt fb_sys_fops ttm drm ahci libahci
crc32c_intel tg3 libata megaraid_sas i2c_core ptp fjes pps_core dm_mirror
dm_region_hash dm_log dm_mod
[  581.789796] CPU: 1 PID: 1617 Comm: kworker/1:1H Not tainted 4.10.0.bz1420297+ #4
[  581.789797] Hardware name: Dell Inc. PowerEdge R730xd/072T6D, BIOS 2.2.5 09/06/2016
[  581.789804] Workqueue: kblockd blk_mq_timeout_work
[  581.789806] task: ffff8804721c8000 task.stack: ffffc90006ee4000
[  581.789809] RIP: 0010:blk_mq_end_request+0x58/0x70
[  581.789810] RSP: 0018:ffffc90006ee7d50 EFLAGS: 00010202
[  581.789811] RAX: 0000000000000001 RBX: ffff8802e4195340 RCX: ffff88028e2f4b88
[  581.789812] RDX: 0000000000001000 RSI: 0000000000001000 RDI: 0000000000000000
[  581.789813] RBP: ffffc90006ee7d60 R08: 0000000000000003 R09: ffff88028e2f4b00
[  581.789814] R10: 0000000000001000 R11: 0000000000000001 R12: 00000000fffffffb
[  581.789815] R13: ffff88042abe5780 R14: 000000000000002d R15: ffff88046fbdff80
[  581.789817] FS:  0000000000000000(0000) GS:ffff88047fc00000(0000) knlGS:0000000000000000
[  581.789818] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  581.789819] CR2: 00007f64f403a008 CR3: 000000014d078000 CR4: 00000000001406e0
[  581.789820] Call Trace:
[  581.789825]  blk_mq_check_expired+0x76/0x80
[  581.789828]  bt_iter+0x45/0x50
[  581.789830]  blk_mq_queue_tag_busy_iter+0xdd/0x1f0
[  581.789832]  ? blk_mq_rq_timed_out+0x70/0x70
[  581.789833]  ? blk_mq_rq_timed_out+0x70/0x70
[  581.789840]  ? __switch_to+0x140/0x450
[  581.789841]  blk_mq_timeout_work+0x88/0x170
[  581.789845]  process_one_work+0x165/0x410
[  581.789847]  worker_thread+0x137/0x4c0
[  581.789851]  kthread+0x101/0x140
[  581.789853]  ? rescuer_thread+0x3b0/0x3b0
[  581.789855]  ? kthread_park+0x90/0x90
[  581.789860]  ret_from_fork+0x2c/0x40
[  581.789861] Code: 48 85 c0 74 0d 44 89 e6 48 89 df ff d0 5b 41 5c 5d c3 48
8b bb 70 01 00 00 48 85 ff 75 0f 48 89 df e8 7d f0 ff ff 5b 41 5c 5d c3 <0f>
0b e8 71 f0 ff ff 90 eb e9 0f 1f 40 00 66 2e 0f 1f 84 00 00
[  581.789882] RIP: blk_mq_end_request+0x58/0x70 RSP: ffffc90006ee7d50
[  581.789889] ---[ end trace bcaf03d9a14a0a70 ]---

[2]. oops log2
[ 6984.857362] BUG: unable to handle kernel NULL pointer dereference at 0000000000000010
[ 6984.857372] IP: nvme_queue_rq+0x6e6/0x8cd [nvme]
[ 6984.857373] PGD 0
[ 6984.857374]
[ 6984.857376] Oops: 0000 [#1] SMP
[ 6984.857379] Modules linked in: ipmi_ssif vfat fat intel_rapl sb_edac
edac_core x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel kvm
irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel ipmi_si iTCO_wdt
iTCO_vendor_support mxm_wmi ipmi_devintf intel_cstate sg dcdbas intel_uncore
mei_me intel_rapl_perf mei pcspkr lpc_ich ipmi_msghandler shpchp
acpi_power_meter wmi nfsd auth_rpcgss dm_multipath nfs_acl lockd grace sunrpc
ip_tables xfs libcrc32c sd_mod mgag200 i2c_algo_bit drm_kms_helper syscopyarea
sysfillrect crc32c_intel sysimgblt fb_sys_fops ttm nvme drm nvme_core ahci
libahci i2c_core tg3 libata ptp megaraid_sas pps_core fjes dm_mirror
dm_region_hash dm_log dm_mod
[ 6984.857416] CPU: 7 PID: 1635 Comm: kworker/7:1H Not tainted
4.10.0-2.el7.bz1420297.x86_64 #1
[ 6984.857417] Hardware name: Dell Inc. PowerEdge R730xd/072T6D, BIOS 2.2.5 09/06/2016
[ 6984.857427] Workqueue: kblockd blk_mq_run_work_fn
[ 6984.857429] task: ffff880476e3da00 task.stack: ffffc90002e90000
[ 6984.857432] RIP: 0010:nvme_queue_rq+0x6e6/0x8cd [nvme]
[ 6984.857433] RSP: 0018:ffffc90002e93c50 EFLAGS: 00010246
[ 6984.857434] RAX: 0000000000000000 RBX: ffff880275646600 RCX: 0000000000001000
[ 6984.857435] RDX: 0000000000000fff RSI: 00000002fba2a000 RDI: ffff8804734e6950
[ 6984.857436] RBP: ffffc90002e93d30 R08: 0000000000002000 R09: 0000000000001000
[ 6984.857437] R10: 0000000000001000 R11: 0000000000000000 R12: ffff8804741d8000
[ 6984.857438] R13: 0000000000000040 R14: ffff880475649f80 R15: ffff8804734e6780
[ 6984.857439] FS:  0000000000000000(0000) GS:ffff88047fcc0000(0000) knlGS:0000000000000000
[ 6984.857440] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 6984.857442] CR2: 0000000000000010 CR3: 0000000001c09000 CR4: 00000000001406e0
[ 6984.857443] Call Trace:
[ 6984.857451]  ? mempool_free+0x2b/0x80
[ 6984.857455]  ? bio_free+0x4e/0x60
[ 6984.857459]  blk_mq_dispatch_rq_list+0xf5/0x230
[ 6984.857462]  blk_mq_process_rq_list+0x133/0x170
[ 6984.857465]  __blk_mq_run_hw_queue+0x8c/0xa0
[ 6984.857467]  blk_mq_run_work_fn+0x12/0x20
[ 6984.857473]  process_one_work+0x165/0x410
[ 6984.857475]  worker_thread+0x137/0x4c0
[ 6984.857478]  kthread+0x101/0x140
[ 6984.857480]  ? rescuer_thread+0x3b0/0x3b0
[ 6984.857481]  ? kthread_park+0x90/0x90
[ 6984.857489]  ret_from_fork+0x2c/0x40
[ 6984.857490] Code: 8b bd 70 ff ff ff 89 95 50 ff ff ff 89 8d 58 ff ff ff 44
89 95 60 ff ff ff e8 b7 dd 12 e1 8b 95 50 ff ff ff 48 89 85 68 ff ff ff <4c>
8b 48 10 44 8b 58 18 8b 8d 58 ff ff ff 44 8b 95 60 ff ff ff
[ 6984.857511] RIP: nvme_queue_rq+0x6e6/0x8cd [nvme] RSP: ffffc90002e93c50
[ 6984.857512] CR2: 0000000000000010
[ 6984.895359] ---[ end trace 2d7ceb528432bf83 ]---

Cc: stable@vger.kernel.org
Reported-by: Yi Zhang <yizhan@redhat.com>
Tested-by: Yi Zhang <yizhan@redhat.com>
Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Ming Lei <tom.leiming@gmail.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index a4546f060e8093..08a49c69738bb5 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -697,17 +697,8 @@ static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx,
 {
 	struct blk_mq_timeout_data *data = priv;
 
-	if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) {
-		/*
-		 * If a request wasn't started before the queue was
-		 * marked dying, kill it here or it'll go unnoticed.
-		 */
-		if (unlikely(blk_queue_dying(rq->q))) {
-			rq->errors = -EIO;
-			blk_mq_end_request(rq, rq->errors);
-		}
+	if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags))
 		return;
-	}
 
 	if (time_after_eq(jiffies, rq->deadline)) {
 		if (!blk_mark_rq_complete(rq))

From 7d2aa6b814476a2e2794960f844344519246df72 Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Mon, 20 Mar 2017 10:17:56 +0100
Subject: [PATCH 1621/1911] iommu/exynos: Block SYSMMU while invalidating FLPD
 cache

Documentation specifies that SYSMMU should be in blocked state while
performing TLB/FLPD cache invalidation, so add needed calls to
sysmmu_block/unblock.

Fixes: 66a7ed84b345d ("iommu/exynos: Apply workaround of caching fault page table entries")
CC: stable@vger.kernel.org # v4.10+
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/exynos-iommu.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index a7e0821c9967e4..32d43f1994e409 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -512,7 +512,10 @@ static void sysmmu_tlb_invalidate_flpdcache(struct sysmmu_drvdata *data,
 	spin_lock_irqsave(&data->lock, flags);
 	if (data->active && data->version >= MAKE_MMU_VER(3, 3)) {
 		clk_enable(data->clk_master);
-		__sysmmu_tlb_invalidate_entry(data, iova, 1);
+		if (sysmmu_block(data)) {
+			__sysmmu_tlb_invalidate_entry(data, iova, 1);
+			sysmmu_unblock(data);
+		}
 		clk_disable(data->clk_master);
 	}
 	spin_unlock_irqrestore(&data->lock, flags);

From cd37a296a9f890586665bb8974a8b17ee2f17d6d Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Mon, 20 Mar 2017 10:17:57 +0100
Subject: [PATCH 1622/1911] iommu/exynos: Workaround FLPD cache flush issues
 for SYSMMU v5

For some unknown reasons, in some cases, FLPD cache invalidation doesn't
work properly with SYSMMU v5 controllers found in Exynos5433 SoCs. This
can be observed by a firmware crash during initialization phase of MFC
video decoder available in the mentioned SoCs when IOMMU support is
enabled. To workaround this issue perform a full TLB/FLPD invalidation
in case of replacing any first level page descriptors in case of SYSMMU v5.

Fixes: 740a01eee9ada ("iommu/exynos: Add support for v5 SYSMMU")
CC: stable@vger.kernel.org # v4.10+
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Tested-by: Andrzej Hajda <a.hajda@samsung.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/exynos-iommu.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index 32d43f1994e409..c01bfcdb238316 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -513,7 +513,10 @@ static void sysmmu_tlb_invalidate_flpdcache(struct sysmmu_drvdata *data,
 	if (data->active && data->version >= MAKE_MMU_VER(3, 3)) {
 		clk_enable(data->clk_master);
 		if (sysmmu_block(data)) {
-			__sysmmu_tlb_invalidate_entry(data, iova, 1);
+			if (data->version >= MAKE_MMU_VER(5, 0))
+				__sysmmu_tlb_invalidate(data);
+			else
+				__sysmmu_tlb_invalidate_entry(data, iova, 1);
 			sysmmu_unblock(data);
 		}
 		clk_disable(data->clk_master);

From 9d3a4de4cb8db8e71730e36736272ef041836f68 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Thu, 16 Mar 2017 17:00:16 +0000
Subject: [PATCH 1623/1911] iommu: Disambiguate MSI region types

The introduction of reserved regions has left a couple of rough edges
which we could do with sorting out sooner rather than later. Since we
are not yet addressing the potential dynamic aspect of software-managed
reservations and presenting them at arbitrary fixed addresses, it is
incongruous that we end up displaying hardware vs. software-managed MSI
regions to userspace differently, especially since ARM-based systems may
actually require one or the other, or even potentially both at once,
(which iommu-dma currently has no hope of dealing with at all). Let's
resolve the former user-visible inconsistency ASAP before the ABI has
been baked into a kernel release, in a way that also lays the groundwork
for the latter shortcoming to be addressed by follow-up patches.

For clarity, rename the software-managed type to IOMMU_RESV_SW_MSI, use
IOMMU_RESV_MSI to describe the hardware type, and document everything a
little bit. Since the x86 MSI remapping hardware falls squarely under
this meaning of IOMMU_RESV_MSI, apply that type to their regions as well,
so that we tell the same story to userspace across all platforms.

Secondly, as the various region types require quite different handling,
and it really makes little sense to ever try combining them, convert the
bitfield-esque #defines to a plain enum in the process before anyone
gets the wrong impression.

Fixes: d30ddcaa7b02 ("iommu: Add a new type field in iommu_resv_region")
Reviewed-by: Eric Auger <eric.auger@redhat.com>
CC: Alex Williamson <alex.williamson@redhat.com>
CC: David Woodhouse <dwmw2@infradead.org>
CC: kvm@vger.kernel.org
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/amd_iommu.c       |  2 +-
 drivers/iommu/arm-smmu-v3.c     |  2 +-
 drivers/iommu/arm-smmu.c        |  2 +-
 drivers/iommu/intel-iommu.c     |  2 +-
 drivers/iommu/iommu.c           |  5 +++--
 drivers/vfio/vfio_iommu_type1.c |  7 +++----
 include/linux/iommu.h           | 18 +++++++++++++-----
 7 files changed, 23 insertions(+), 15 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 98940d1392cb0c..b17536d6e69bdb 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -3202,7 +3202,7 @@ static void amd_iommu_get_resv_regions(struct device *dev,
 
 	region = iommu_alloc_resv_region(MSI_RANGE_START,
 					 MSI_RANGE_END - MSI_RANGE_START + 1,
-					 0, IOMMU_RESV_RESERVED);
+					 0, IOMMU_RESV_MSI);
 	if (!region)
 		return;
 	list_add_tail(&region->list, head);
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 5806a6acc94ecd..591bb96047c976 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -1888,7 +1888,7 @@ static void arm_smmu_get_resv_regions(struct device *dev,
 	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
 
 	region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
-					 prot, IOMMU_RESV_MSI);
+					 prot, IOMMU_RESV_SW_MSI);
 	if (!region)
 		return;
 
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index abf6496843a617..b493c99e17f74d 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -1608,7 +1608,7 @@ static void arm_smmu_get_resv_regions(struct device *dev,
 	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
 
 	region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
-					 prot, IOMMU_RESV_MSI);
+					 prot, IOMMU_RESV_SW_MSI);
 	if (!region)
 		return;
 
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 91d60493b57cd9..d412a313a37232 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -5249,7 +5249,7 @@ static void intel_iommu_get_resv_regions(struct device *device,
 
 	reg = iommu_alloc_resv_region(IOAPIC_RANGE_START,
 				      IOAPIC_RANGE_END - IOAPIC_RANGE_START + 1,
-				      0, IOMMU_RESV_RESERVED);
+				      0, IOMMU_RESV_MSI);
 	if (!reg)
 		return;
 	list_add_tail(&reg->list, head);
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 8ea14f41a979fd..3b67144dead2e3 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -72,6 +72,7 @@ static const char * const iommu_group_resv_type_string[] = {
 	[IOMMU_RESV_DIRECT]	= "direct",
 	[IOMMU_RESV_RESERVED]	= "reserved",
 	[IOMMU_RESV_MSI]	= "msi",
+	[IOMMU_RESV_SW_MSI]	= "msi",
 };
 
 #define IOMMU_GROUP_ATTR(_name, _mode, _show, _store)		\
@@ -1743,8 +1744,8 @@ void iommu_put_resv_regions(struct device *dev, struct list_head *list)
 }
 
 struct iommu_resv_region *iommu_alloc_resv_region(phys_addr_t start,
-						  size_t length,
-						  int prot, int type)
+						  size_t length, int prot,
+						  enum iommu_resv_type type)
 {
 	struct iommu_resv_region *region;
 
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index c26fa1f3ed8606..32d2633092a37e 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -1182,8 +1182,7 @@ static struct vfio_group *find_iommu_group(struct vfio_domain *domain,
 	return NULL;
 }
 
-static bool vfio_iommu_has_resv_msi(struct iommu_group *group,
-				    phys_addr_t *base)
+static bool vfio_iommu_has_sw_msi(struct iommu_group *group, phys_addr_t *base)
 {
 	struct list_head group_resv_regions;
 	struct iommu_resv_region *region, *next;
@@ -1192,7 +1191,7 @@ static bool vfio_iommu_has_resv_msi(struct iommu_group *group,
 	INIT_LIST_HEAD(&group_resv_regions);
 	iommu_get_group_resv_regions(group, &group_resv_regions);
 	list_for_each_entry(region, &group_resv_regions, list) {
-		if (region->type & IOMMU_RESV_MSI) {
+		if (region->type == IOMMU_RESV_SW_MSI) {
 			*base = region->start;
 			ret = true;
 			goto out;
@@ -1283,7 +1282,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
 	if (ret)
 		goto out_domain;
 
-	resv_msi = vfio_iommu_has_resv_msi(iommu_group, &resv_msi_base);
+	resv_msi = vfio_iommu_has_sw_msi(iommu_group, &resv_msi_base);
 
 	INIT_LIST_HEAD(&domain->group_list);
 	list_add(&group->next, &domain->group_list);
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 6a6de187ddc0ff..2e4de0deee531a 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -125,9 +125,16 @@ enum iommu_attr {
 };
 
 /* These are the possible reserved region types */
-#define IOMMU_RESV_DIRECT	(1 << 0)
-#define IOMMU_RESV_RESERVED	(1 << 1)
-#define IOMMU_RESV_MSI		(1 << 2)
+enum iommu_resv_type {
+	/* Memory regions which must be mapped 1:1 at all times */
+	IOMMU_RESV_DIRECT,
+	/* Arbitrary "never map this or give it to a device" address ranges */
+	IOMMU_RESV_RESERVED,
+	/* Hardware MSI region (untranslated) */
+	IOMMU_RESV_MSI,
+	/* Software-managed MSI translation window */
+	IOMMU_RESV_SW_MSI,
+};
 
 /**
  * struct iommu_resv_region - descriptor for a reserved memory region
@@ -142,7 +149,7 @@ struct iommu_resv_region {
 	phys_addr_t		start;
 	size_t			length;
 	int			prot;
-	int			type;
+	enum iommu_resv_type	type;
 };
 
 #ifdef CONFIG_IOMMU_API
@@ -288,7 +295,8 @@ extern void iommu_get_resv_regions(struct device *dev, struct list_head *list);
 extern void iommu_put_resv_regions(struct device *dev, struct list_head *list);
 extern int iommu_request_dm_for_dev(struct device *dev);
 extern struct iommu_resv_region *
-iommu_alloc_resv_region(phys_addr_t start, size_t length, int prot, int type);
+iommu_alloc_resv_region(phys_addr_t start, size_t length, int prot,
+			enum iommu_resv_type type);
 extern int iommu_get_group_resv_regions(struct iommu_group *group,
 					struct list_head *head);
 

From afd0e5a876703accb95894f23317a13e2c49b523 Mon Sep 17 00:00:00 2001
From: Neeraj Upadhyay <neeraju@codeaurora.org>
Date: Wed, 22 Mar 2017 17:08:25 +0530
Subject: [PATCH 1624/1911] arm64: kaslr: Fix up the kernel image alignment

If kernel image extends across alignment boundary, existing
code increases the KASLR offset by size of kernel image. The
offset is masked after resizing. There are cases, where after
masking, we may still have kernel image extending across
boundary. This eventually results in only 2MB block getting
mapped while creating the page tables. This results in data aborts
while accessing unmapped regions during second relocation (with
kaslr offset) in __primary_switch. To fix this problem, round up the
kernel image size, by swapper block size, before adding it for
correction.

For example consider below case, where kernel image still crosses
1GB alignment boundary, after masking the offset, which is fixed
by rounding up kernel image size.

SWAPPER_TABLE_SHIFT = 30
Swapper using section maps with section size 2MB.
CONFIG_PGTABLE_LEVELS = 3
VA_BITS = 39

_text  : 0xffffff8008080000
_end   : 0xffffff800aa1b000
offset : 0x1f35600000
mask = ((1UL << (VA_BITS - 2)) - 1) & ~(SZ_2M - 1)

(_text + offset) >> SWAPPER_TABLE_SHIFT = 0x3fffffe7c
(_end + offset) >> SWAPPER_TABLE_SHIFT  = 0x3fffffe7d

offset after existing correction (before mask) = 0x1f37f9b000
(_text + offset) >> SWAPPER_TABLE_SHIFT = 0x3fffffe7d
(_end + offset) >> SWAPPER_TABLE_SHIFT  = 0x3fffffe7d

offset (after mask) = 0x1f37e00000
(_text + offset) >> SWAPPER_TABLE_SHIFT = 0x3fffffe7c
(_end + offset) >> SWAPPER_TABLE_SHIFT  = 0x3fffffe7d

new offset w/ rounding up = 0x1f38000000
(_text + offset) >> SWAPPER_TABLE_SHIFT = 0x3fffffe7d
(_end + offset) >> SWAPPER_TABLE_SHIFT  = 0x3fffffe7d

Fixes: f80fb3a3d508 ("arm64: add support for kernel ASLR")
Cc: <stable@vger.kernel.org>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Neeraj Upadhyay <neeraju@codeaurora.org>
Signed-off-by: Srinivas Ramana <sramana@codeaurora.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kernel/kaslr.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c
index 769f24ef628c1e..d7e90d97f5c405 100644
--- a/arch/arm64/kernel/kaslr.c
+++ b/arch/arm64/kernel/kaslr.c
@@ -131,11 +131,15 @@ u64 __init kaslr_early_init(u64 dt_phys, u64 modulo_offset)
 	/*
 	 * The kernel Image should not extend across a 1GB/32MB/512MB alignment
 	 * boundary (for 4KB/16KB/64KB granule kernels, respectively). If this
-	 * happens, increase the KASLR offset by the size of the kernel image.
+	 * happens, increase the KASLR offset by the size of the kernel image
+	 * rounded up by SWAPPER_BLOCK_SIZE.
 	 */
 	if ((((u64)_text + offset + modulo_offset) >> SWAPPER_TABLE_SHIFT) !=
-	    (((u64)_end + offset + modulo_offset) >> SWAPPER_TABLE_SHIFT))
-		offset = (offset + (u64)(_end - _text)) & mask;
+	    (((u64)_end + offset + modulo_offset) >> SWAPPER_TABLE_SHIFT)) {
+		u64 kimg_sz = _end - _text;
+		offset = (offset + round_up(kimg_sz, SWAPPER_BLOCK_SIZE))
+				& mask;
+	}
 
 	if (IS_ENABLED(CONFIG_KASAN))
 		/*

From f0c0cb99f74c03e2407ea553f6d46eb611e262b5 Mon Sep 17 00:00:00 2001
From: Jon Mason <jon.mason@broadcom.com>
Date: Tue, 21 Mar 2017 16:51:19 -0400
Subject: [PATCH 1625/1911] arm64: dts: NS2: Add dma-coherent to relevant DT
 entries

Cache related issues with DMA rings and performance issues related to
caching are being caused by not properly setting the "dma-coherent" flag
in the device tree entries.  Adding it here to correct the issue.

Signed-off-by: Jon Mason <jon.mason@broadcom.com>
Fixes: fd5e5dd56 ("arm64: dts: Add PCIe0 and PCIe4 DT nodes for NS2")
Fixes: dddc3c9d7 ("arm64: dts: NS2: add AMAC ethernet support")
Fixes: e79249143 ("arm64: dts: Add Broadcom Northstar2 device tree entries for PDC driver")
Fixes: ac9aae00f ("arm64: dts: Add SATA3 AHCI and SATA3 PHY DT nodes for NS2")
Fixes: efc877676 ("arm64: dts: Add SDHCI DT node for NS2")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
---
 arch/arm64/boot/dts/broadcom/ns2.dtsi | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/arch/arm64/boot/dts/broadcom/ns2.dtsi b/arch/arm64/boot/dts/broadcom/ns2.dtsi
index 9f9e203c09c5ad..bcb03fc3266552 100644
--- a/arch/arm64/boot/dts/broadcom/ns2.dtsi
+++ b/arch/arm64/boot/dts/broadcom/ns2.dtsi
@@ -114,6 +114,7 @@
 	pcie0: pcie@20020000 {
 		compatible = "brcm,iproc-pcie";
 		reg = <0 0x20020000 0 0x1000>;
+		dma-coherent;
 
 		#interrupt-cells = <1>;
 		interrupt-map-mask = <0 0 0 0>;
@@ -144,6 +145,7 @@
 	pcie4: pcie@50020000 {
 		compatible = "brcm,iproc-pcie";
 		reg = <0 0x50020000 0 0x1000>;
+		dma-coherent;
 
 		#interrupt-cells = <1>;
 		interrupt-map-mask = <0 0 0 0>;
@@ -174,6 +176,7 @@
 	pcie8: pcie@60c00000 {
 		compatible = "brcm,iproc-pcie-paxc";
 		reg = <0 0x60c00000 0 0x1000>;
+		dma-coherent;
 		linux,pci-domain = <8>;
 
 		bus-range = <0x0 0x1>;
@@ -203,6 +206,7 @@
 			      <0x61030000 0x100>;
 			reg-names = "amac_base", "idm_base", "nicpm_base";
 			interrupts = <GIC_SPI 341 IRQ_TYPE_LEVEL_HIGH>;
+			dma-coherent;
 			phy-handle = <&gphy0>;
 			phy-mode = "rgmii";
 			status = "disabled";
@@ -213,6 +217,7 @@
 			reg = <0x612c0000 0x445>;  /* PDC FS0 regs */
 			interrupts = <GIC_SPI 187 IRQ_TYPE_LEVEL_HIGH>;
 			#mbox-cells = <1>;
+			dma-coherent;
 			brcm,rx-status-len = <32>;
 			brcm,use-bcm-hdr;
 		};
@@ -222,6 +227,7 @@
 			reg = <0x612e0000 0x445>;  /* PDC FS1 regs */
 			interrupts = <GIC_SPI 189 IRQ_TYPE_LEVEL_HIGH>;
 			#mbox-cells = <1>;
+			dma-coherent;
 			brcm,rx-status-len = <32>;
 			brcm,use-bcm-hdr;
 		};
@@ -231,6 +237,7 @@
 			reg = <0x61300000 0x445>;  /* PDC FS2 regs */
 			interrupts = <GIC_SPI 191 IRQ_TYPE_LEVEL_HIGH>;
 			#mbox-cells = <1>;
+			dma-coherent;
 			brcm,rx-status-len = <32>;
 			brcm,use-bcm-hdr;
 		};
@@ -240,6 +247,7 @@
 			reg = <0x61320000 0x445>;  /* PDC FS3 regs */
 			interrupts = <GIC_SPI 193 IRQ_TYPE_LEVEL_HIGH>;
 			#mbox-cells = <1>;
+			dma-coherent;
 			brcm,rx-status-len = <32>;
 			brcm,use-bcm-hdr;
 		};
@@ -644,6 +652,7 @@
 		sata: ahci@663f2000 {
 			compatible = "brcm,iproc-ahci", "generic-ahci";
 			reg = <0x663f2000 0x1000>;
+			dma-coherent;
 			reg-names = "ahci";
 			interrupts = <GIC_SPI 438 IRQ_TYPE_LEVEL_HIGH>;
 			#address-cells = <1>;
@@ -667,6 +676,7 @@
 			compatible = "brcm,sdhci-iproc-cygnus";
 			reg = <0x66420000 0x100>;
 			interrupts = <GIC_SPI 421 IRQ_TYPE_LEVEL_HIGH>;
+			dma-coherent;
 			bus-width = <8>;
 			clocks = <&genpll_sw BCM_NS2_GENPLL_SW_SDIO_CLK>;
 			status = "disabled";
@@ -676,6 +686,7 @@
 			compatible = "brcm,sdhci-iproc-cygnus";
 			reg = <0x66430000 0x100>;
 			interrupts = <GIC_SPI 422 IRQ_TYPE_LEVEL_HIGH>;
+			dma-coherent;
 			bus-width = <8>;
 			clocks = <&genpll_sw BCM_NS2_GENPLL_SW_SDIO_CLK>;
 			status = "disabled";

From a05d4fd9176003e0c1f9c3d083f4dac19fd346ab Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 14 Mar 2017 19:25:56 -0400
Subject: [PATCH 1626/1911] cgroup, net_cls: iterate the fds of only the tasks
 which are being migrated

The net_cls controller controls the classid field of each socket which
is associated with the cgroup.  Because the classid is per-socket
attribute, when a task migrates to another cgroup or the configured
classid of the cgroup changes, the controller needs to walk all
sockets and update the classid value, which was implemented by
3b13758f51de ("cgroups: Allow dynamically changing net_classid").

While the approach is not scalable, migrating tasks which have a lot
of fds attached to them is rare and the cost is born by the ones
initiating the operations.  However, for simplicity, both the
migration and classid config change paths call update_classid() which
scans all fds of all tasks in the target css.  This is an overkill for
the migration path which only needs to cover a much smaller subset of
tasks which are actually getting migrated in.

On cgroup v1, this can lead to unexpected scalability issues when one
tries to migrate a task or process into a net_cls cgroup which already
contains a lot of fds.  Even if the migration traget doesn't have many
to get scanned, update_classid() ends up scanning all fds in the
target cgroup which can be extremely numerous.

Unfortunately, on cgroup v2 which doesn't use net_cls, the problem is
even worse.  Before bfc2cf6f61fc ("cgroup: call subsys->*attach() only
for subsystems which are actually affected by migration"), cgroup core
would call the ->css_attach callback even for controllers which don't
see actual migration to a different css.

As net_cls is always disabled but still mounted on cgroup v2, whenever
a process is migrated on the cgroup v2 hierarchy, net_cls sees
identity migration from root to root and cgroup core used to call
->css_attach callback for those.  The net_cls ->css_attach ends up
calling update_classid() on the root net_cls css to which all
processes on the system belong to as the controller isn't used.  This
makes any cgroup v2 migration O(total_number_of_fds_on_the_system)
which is horrible and easily leads to noticeable stalls triggering RCU
stall warnings and so on.

The worst symptom is already fixed in upstream by bfc2cf6f61fc
("cgroup: call subsys->*attach() only for subsystems which are
actually affected by migration"); however, backporting that commit is
too invasive and we want to avoid other cases too.

This patch updates net_cls's cgrp_attach() to iterate fds of only the
processes which are actually getting migrated.  This removes the
surprising migration cost which is dependent on the total number of
fds in the target cgroup.  As this leaves write_classid() the only
user of update_classid(), open-code the helper into write_classid().

Reported-by: David Goode <dgoode@fb.com>
Fixes: 3b13758f51de ("cgroups: Allow dynamically changing net_classid")
Cc: stable@vger.kernel.org # v4.4+
Cc: Nina Schiff <ninasc@fb.com>
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/netclassid_cgroup.c | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c
index 6ae56037bb1336..029a61ac6cdd8a 100644
--- a/net/core/netclassid_cgroup.c
+++ b/net/core/netclassid_cgroup.c
@@ -71,27 +71,17 @@ static int update_classid_sock(const void *v, struct file *file, unsigned n)
 	return 0;
 }
 
-static void update_classid(struct cgroup_subsys_state *css, void *v)
+static void cgrp_attach(struct cgroup_taskset *tset)
 {
-	struct css_task_iter it;
+	struct cgroup_subsys_state *css;
 	struct task_struct *p;
 
-	css_task_iter_start(css, &it);
-	while ((p = css_task_iter_next(&it))) {
+	cgroup_taskset_for_each(p, css, tset) {
 		task_lock(p);
-		iterate_fd(p->files, 0, update_classid_sock, v);
+		iterate_fd(p->files, 0, update_classid_sock,
+			   (void *)(unsigned long)css_cls_state(css)->classid);
 		task_unlock(p);
 	}
-	css_task_iter_end(&it);
-}
-
-static void cgrp_attach(struct cgroup_taskset *tset)
-{
-	struct cgroup_subsys_state *css;
-
-	cgroup_taskset_first(tset, &css);
-	update_classid(css,
-		       (void *)(unsigned long)css_cls_state(css)->classid);
 }
 
 static u64 read_classid(struct cgroup_subsys_state *css, struct cftype *cft)
@@ -103,12 +93,22 @@ static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft,
 			 u64 value)
 {
 	struct cgroup_cls_state *cs = css_cls_state(css);
+	struct css_task_iter it;
+	struct task_struct *p;
 
 	cgroup_sk_alloc_disable();
 
 	cs->classid = (u32)value;
 
-	update_classid(css, (void *)(unsigned long)cs->classid);
+	css_task_iter_start(css, &it);
+	while ((p = css_task_iter_next(&it))) {
+		task_lock(p);
+		iterate_fd(p->files, 0, update_classid_sock,
+			   (void *)(unsigned long)cs->classid);
+		task_unlock(p);
+	}
+	css_task_iter_end(&it);
+
 	return 0;
 }
 

From 210c4f70b4c630b27f0840c8043c138c955edc9e Mon Sep 17 00:00:00 2001
From: hayeswang <hayeswang@realtek.com>
Date: Mon, 20 Mar 2017 16:13:44 +0800
Subject: [PATCH 1627/1911] r8152: set the RMS of RTL8153 according to the mtu

Set the received maximum size (RMS) according to the mtu size. It is
unnecessary to receive a packet which is more than the size we could
transmit. Besides, this could let the rx buffer be used effectively.

Signed-off-by: Hayes Wang <hayeswang@realtek.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/r8152.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index bb3eedd07fbe56..525c258170138e 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -2899,7 +2899,8 @@ static void r8153_first_init(struct r8152 *tp)
 
 	rtl_rx_vlan_en(tp, tp->netdev->features & NETIF_F_HW_VLAN_CTAG_RX);
 
-	ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, RTL8153_RMS);
+	ocp_data = tp->netdev->mtu + VLAN_ETH_HLEN + CRC_SIZE;
+	ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, ocp_data);
 	ocp_write_byte(tp, MCU_TYPE_PLA, PLA_MTPS, MTPS_JUMBO);
 
 	ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_TCR0);
@@ -2951,7 +2952,8 @@ static void r8153_enter_oob(struct r8152 *tp)
 		usleep_range(1000, 2000);
 	}
 
-	ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, RTL8153_RMS);
+	ocp_data = tp->netdev->mtu + VLAN_ETH_HLEN + CRC_SIZE;
+	ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, ocp_data);
 
 	ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_TEREDO_CFG);
 	ocp_data &= ~TEREDO_WAKE_MASK;
@@ -4201,8 +4203,14 @@ static int rtl8152_change_mtu(struct net_device *dev, int new_mtu)
 
 	dev->mtu = new_mtu;
 
-	if (netif_running(dev) && netif_carrier_ok(dev))
-		r8153_set_rx_early_size(tp);
+	if (netif_running(dev)) {
+		u32 rms = new_mtu + VLAN_ETH_HLEN + CRC_SIZE;
+
+		ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, rms);
+
+		if (netif_carrier_ok(dev))
+			r8153_set_rx_early_size(tp);
+	}
 
 	mutex_unlock(&tp->control);
 

From b20cb60e2b865638459e6ec82ad3536d3734e555 Mon Sep 17 00:00:00 2001
From: hayeswang <hayeswang@realtek.com>
Date: Mon, 20 Mar 2017 16:13:45 +0800
Subject: [PATCH 1628/1911] r8152: fix the rx early size of RTL8153

revert commit a59e6d815226 ("r8152: correct the rx early size") and
fix the rx early size as

	(rx buffer size - rx packet size - rx desc size - alignment) / 4

Signed-off-by: Hayes Wang <hayeswang@realtek.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/r8152.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index 525c258170138e..0b1b9188625d52 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -32,7 +32,7 @@
 #define NETNEXT_VERSION		"08"
 
 /* Information for net */
-#define NET_VERSION		"8"
+#define NET_VERSION		"9"
 
 #define DRIVER_VERSION		"v1." NETNEXT_VERSION "." NET_VERSION
 #define DRIVER_AUTHOR "Realtek linux nic maintainers <nic_swsd@realtek.com>"
@@ -501,6 +501,8 @@ enum rtl_register_content {
 #define RTL8153_RMS		RTL8153_MAX_PACKET
 #define RTL8152_TX_TIMEOUT	(5 * HZ)
 #define RTL8152_NAPI_WEIGHT	64
+#define rx_reserved_size(x)	((x) + VLAN_ETH_HLEN + CRC_SIZE + \
+				 sizeof(struct rx_desc) + RX_ALIGN)
 
 /* rtl8152 flags */
 enum rtl8152_flags {
@@ -2253,8 +2255,7 @@ static void r8153_set_rx_early_timeout(struct r8152 *tp)
 
 static void r8153_set_rx_early_size(struct r8152 *tp)
 {
-	u32 mtu = tp->netdev->mtu;
-	u32 ocp_data = (agg_buf_sz - mtu - VLAN_ETH_HLEN - VLAN_HLEN) / 8;
+	u32 ocp_data = (agg_buf_sz - rx_reserved_size(tp->netdev->mtu)) / 4;
 
 	ocp_write_word(tp, MCU_TYPE_USB, USB_RX_EARLY_SIZE, ocp_data);
 }

From be9ca0d33c850192198c22518eeb1f41401268e8 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 20 Mar 2017 09:52:50 +0100
Subject: [PATCH 1629/1911] cpsw/netcp: work around reverse cpts dependency

The dependency is reversed: cpsw and netcp call into cpts,
but cpts depends on the other two in Kconfig. This can lead
to cpts being a loadable module and its callers built-in:

drivers/net/ethernet/ti/cpsw.o: In function `cpsw_remove':
cpsw.c:(.text.cpsw_remove+0xd0): undefined reference to `cpts_release'
drivers/net/ethernet/ti/cpsw.o: In function `cpsw_rx_handler':
cpsw.c:(.text.cpsw_rx_handler+0x2dc): undefined reference to `cpts_rx_timestamp'
drivers/net/ethernet/ti/cpsw.o: In function `cpsw_tx_handler':
cpsw.c:(.text.cpsw_tx_handler+0x7c): undefined reference to `cpts_tx_timestamp'
drivers/net/ethernet/ti/cpsw.o: In function `cpsw_ndo_stop':

As a workaround, I'm introducing another Kconfig symbol to
control the compilation of cpts, while making the actual
module controlled by a silent symbol that is =y when necessary.

Fixes: 6246168b4a38 ("net: ethernet: ti: netcp: add support of cpts")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/Kconfig  | 8 +++++++-
 drivers/net/ethernet/ti/Makefile | 2 +-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig
index 296c8efd0038c8..d923890a9fda89 100644
--- a/drivers/net/ethernet/ti/Kconfig
+++ b/drivers/net/ethernet/ti/Kconfig
@@ -74,7 +74,7 @@ config TI_CPSW
 	  will be called cpsw.
 
 config TI_CPTS
-	tristate "TI Common Platform Time Sync (CPTS) Support"
+	bool "TI Common Platform Time Sync (CPTS) Support"
 	depends on TI_CPSW || TI_KEYSTONE_NETCP
 	imply PTP_1588_CLOCK
 	---help---
@@ -83,6 +83,12 @@ config TI_CPTS
 	  The unit can time stamp PTP UDP/IPv4 and Layer 2 packets, and the
 	  driver offers a PTP Hardware Clock.
 
+config TI_CPTS_MOD
+	tristate
+	depends on TI_CPTS
+	default y if TI_CPSW=y || TI_KEYSTONE_NETCP=y
+	default m
+
 config TI_KEYSTONE_NETCP
 	tristate "TI Keystone NETCP Core Support"
 	select TI_CPSW_ALE
diff --git a/drivers/net/ethernet/ti/Makefile b/drivers/net/ethernet/ti/Makefile
index 1e7c10bf87132c..10e6b0ce51baf3 100644
--- a/drivers/net/ethernet/ti/Makefile
+++ b/drivers/net/ethernet/ti/Makefile
@@ -12,7 +12,7 @@ obj-$(CONFIG_TI_DAVINCI_MDIO) += davinci_mdio.o
 obj-$(CONFIG_TI_DAVINCI_CPDMA) += davinci_cpdma.o
 obj-$(CONFIG_TI_CPSW_PHY_SEL) += cpsw-phy-sel.o
 obj-$(CONFIG_TI_CPSW_ALE) += cpsw_ale.o
-obj-$(CONFIG_TI_CPTS) += cpts.o
+obj-$(CONFIG_TI_CPTS_MOD) += cpts.o
 obj-$(CONFIG_TI_CPSW) += ti_cpsw.o
 ti_cpsw-y := cpsw.o
 

From 07fef3623407444e51c12ea57cd91df38c1069e0 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 20 Mar 2017 09:58:33 +0100
Subject: [PATCH 1630/1911] cpsw/netcp: cpts depends on posix_timers

With posix timers having become optional, we get a build error with
the cpts time sync option of the CPSW driver:

drivers/net/ethernet/ti/cpts.c: In function 'cpts_find_ts':
drivers/net/ethernet/ti/cpts.c:291:23: error: implicit declaration of function 'ptp_classify_raw';did you mean 'ptp_classifier_init'? [-Werror=implicit-function-declaration]

This adds a hard dependency on PTP_CLOCK to avoid the problem, as
building it without PTP support makes no sense anyway.

Fixes: baa73d9e478f ("posix-timers: Make them configurable")
Cc: Nicolas Pitre <nicolas.pitre@linaro.org>
Cc: stable@vger.kernel.org
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig
index d923890a9fda89..9e631952b86f3d 100644
--- a/drivers/net/ethernet/ti/Kconfig
+++ b/drivers/net/ethernet/ti/Kconfig
@@ -76,7 +76,7 @@ config TI_CPSW
 config TI_CPTS
 	bool "TI Common Platform Time Sync (CPTS) Support"
 	depends on TI_CPSW || TI_KEYSTONE_NETCP
-	imply PTP_1588_CLOCK
+	depends on PTP_1588_CLOCK
 	---help---
 	  This driver supports the Common Platform Time Sync unit of
 	  the CPSW Ethernet Switch and Keystone 2 1g/10g Switch Subsystem.

From 1511949c61ec63e4b646c34d602ac6990b38ce30 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Mon, 20 Mar 2017 17:46:27 +0800
Subject: [PATCH 1631/1911] sctp: declare struct sctp_stream before using it

sctp_stream_free uses struct sctp_stream as a param, but struct sctp_stream
is defined after it's declaration.

This patch is to declare struct sctp_stream before sctp_stream_free.

Fixes: a83863174a61 ("sctp: prepare asoc stream for stream reconf")
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 4f645198e9bd7b..592decebac752f 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -83,6 +83,7 @@ struct sctp_bind_addr;
 struct sctp_ulpq;
 struct sctp_ep_common;
 struct crypto_shash;
+struct sctp_stream;
 
 
 #include <net/sctp/tsnmap.h>

From 581947787eaf1ad801959d00b42b9d0131aacb6a Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Mon, 20 Mar 2017 18:00:28 +0800
Subject: [PATCH 1632/1911] sctp: remove useless err from sctp_association_init

This patch is to remove the unnecessary temporary variable 'err' from
sctp_association_init.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/associola.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 2a6835b4562b61..0439a1a6836784 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -71,9 +71,8 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
 {
 	struct net *net = sock_net(sk);
 	struct sctp_sock *sp;
-	int i;
 	sctp_paramhdr_t *p;
-	int err;
+	int i;
 
 	/* Retrieve the SCTP per socket area.  */
 	sp = sctp_sk((struct sock *)sk);
@@ -264,8 +263,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
 
 	/* AUTH related initializations */
 	INIT_LIST_HEAD(&asoc->endpoint_shared_keys);
-	err = sctp_auth_asoc_copy_shkeys(ep, asoc, gfp);
-	if (err)
+	if (sctp_auth_asoc_copy_shkeys(ep, asoc, gfp))
 		goto fail_init;
 
 	asoc->active_key_id = ep->active_key_id;

From 557d054c01da0337ca81de9e9d9206d57245b57e Mon Sep 17 00:00:00 2001
From: Ying Xue <ying.xue@windriver.com>
Date: Tue, 21 Mar 2017 10:47:49 +0100
Subject: [PATCH 1633/1911] tipc: fix nametbl deadlock at
 tipc_nametbl_unsubscribe

Until now, tipc_nametbl_unsubscribe() is called at subscriptions
reference count cleanup. Usually the subscriptions cleanup is
called at subscription timeout or at subscription cancel or at
subscriber delete.

We have ignored the possibility of this being called from other
locations, which causes deadlock as we try to grab the
tn->nametbl_lock while holding it already.

   CPU1:                             CPU2:
----------                     ----------------
tipc_nametbl_publish
spin_lock_bh(&tn->nametbl_lock)
tipc_nametbl_insert_publ
tipc_nameseq_insert_publ
tipc_subscrp_report_overlap
tipc_subscrp_get
tipc_subscrp_send_event
                             tipc_close_conn
                             tipc_subscrb_release_cb
                             tipc_subscrb_delete
                             tipc_subscrp_put
tipc_subscrp_put
tipc_subscrp_kref_release
tipc_nametbl_unsubscribe
spin_lock_bh(&tn->nametbl_lock)
<<grab nametbl_lock again>>

   CPU1:                              CPU2:
----------                     ----------------
tipc_nametbl_stop
spin_lock_bh(&tn->nametbl_lock)
tipc_purge_publications
tipc_nameseq_remove_publ
tipc_subscrp_report_overlap
tipc_subscrp_get
tipc_subscrp_send_event
                             tipc_close_conn
                             tipc_subscrb_release_cb
                             tipc_subscrb_delete
                             tipc_subscrp_put
tipc_subscrp_put
tipc_subscrp_kref_release
tipc_nametbl_unsubscribe
spin_lock_bh(&tn->nametbl_lock)
<<grab nametbl_lock again>>

In this commit, we advance the calling of tipc_nametbl_unsubscribe()
from the refcount cleanup to the intended callers.

Fixes: d094c4d5f5c7 ("tipc: add subscription refcount to avoid invalid delete")
Reported-by: John Thompson <thompa.atl@gmail.com>
Acked-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvaragan@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/subscr.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index 9d94e65d089418..271cd66e4b3b66 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -141,6 +141,11 @@ void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower,
 static void tipc_subscrp_timeout(unsigned long data)
 {
 	struct tipc_subscription *sub = (struct tipc_subscription *)data;
+	struct tipc_subscriber *subscriber = sub->subscriber;
+
+	spin_lock_bh(&subscriber->lock);
+	tipc_nametbl_unsubscribe(sub);
+	spin_unlock_bh(&subscriber->lock);
 
 	/* Notify subscriber of timeout */
 	tipc_subscrp_send_event(sub, sub->evt.s.seq.lower, sub->evt.s.seq.upper,
@@ -173,7 +178,6 @@ static void tipc_subscrp_kref_release(struct kref *kref)
 	struct tipc_subscriber *subscriber = sub->subscriber;
 
 	spin_lock_bh(&subscriber->lock);
-	tipc_nametbl_unsubscribe(sub);
 	list_del(&sub->subscrp_list);
 	atomic_dec(&tn->subscription_count);
 	spin_unlock_bh(&subscriber->lock);
@@ -205,6 +209,7 @@ static void tipc_subscrb_subscrp_delete(struct tipc_subscriber *subscriber,
 		if (s && memcmp(s, &sub->evt.s, sizeof(struct tipc_subscr)))
 			continue;
 
+		tipc_nametbl_unsubscribe(sub);
 		tipc_subscrp_get(sub);
 		spin_unlock_bh(&subscriber->lock);
 		tipc_subscrp_delete(sub);

From 1f30a86c58093046dc3e49c23d2618894e098f7a Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Tue, 21 Mar 2017 15:59:12 +0200
Subject: [PATCH 1634/1911] net/mlx5: Add missing entries for set/query rate
 limit commands

The switch cases for the rate limit set and query commands were
missing, which could get us wrong under fw error or driver reset
flow, fix that.

Fixes: 1466cc5b23d1 ('net/mlx5: Rate limit tables support')
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Reviewed-by: Hadar Hen Zion <hadarh@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index caa837e5e2b991..a380353a78c2d3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -361,6 +361,8 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
 	case MLX5_CMD_OP_QUERY_VPORT_COUNTER:
 	case MLX5_CMD_OP_ALLOC_Q_COUNTER:
 	case MLX5_CMD_OP_QUERY_Q_COUNTER:
+	case MLX5_CMD_OP_SET_RATE_LIMIT:
+	case MLX5_CMD_OP_QUERY_RATE_LIMIT:
 	case MLX5_CMD_OP_ALLOC_PD:
 	case MLX5_CMD_OP_ALLOC_UAR:
 	case MLX5_CMD_OP_CONFIG_INT_MODERATION:
@@ -497,6 +499,8 @@ const char *mlx5_command_str(int command)
 	MLX5_COMMAND_STR_CASE(ALLOC_Q_COUNTER);
 	MLX5_COMMAND_STR_CASE(DEALLOC_Q_COUNTER);
 	MLX5_COMMAND_STR_CASE(QUERY_Q_COUNTER);
+	MLX5_COMMAND_STR_CASE(SET_RATE_LIMIT);
+	MLX5_COMMAND_STR_CASE(QUERY_RATE_LIMIT);
 	MLX5_COMMAND_STR_CASE(ALLOC_PD);
 	MLX5_COMMAND_STR_CASE(DEALLOC_PD);
 	MLX5_COMMAND_STR_CASE(ALLOC_UAR);

From d85cdccbb3fe9a632ec9d0f4e4526c8c84fc3523 Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Tue, 21 Mar 2017 15:59:13 +0200
Subject: [PATCH 1635/1911] net/mlx5e: Change the TC offload rule add/del code
 path to be per NIC or E-Switch

Refactor the code to deal with add/del TC rules to have handler per NIC/E-switch
offloading use case, and push the latter into the e-switch code. This provides
better separation and is to be used in down-stream patch for applying a fix.

Fixes: bffaa916588e ("net/mlx5: E-Switch, Add control for inline mode")
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlx5/core/en_tc.c   | 59 ++++++++++++-------
 .../net/ethernet/mellanox/mlx5/core/eswitch.h |  5 ++
 .../mellanox/mlx5/core/eswitch_offloads.c     | 14 +++++
 3 files changed, 58 insertions(+), 20 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 79481f4cf26432..2825b566545634 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -133,6 +133,23 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
 	return rule;
 }
 
+static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
+				  struct mlx5e_tc_flow *flow)
+{
+	struct mlx5_fc *counter = NULL;
+
+	if (!IS_ERR(flow->rule)) {
+		counter = mlx5_flow_rule_counter(flow->rule);
+		mlx5_del_flow_rules(flow->rule);
+		mlx5_fc_destroy(priv->mdev, counter);
+	}
+
+	if (!mlx5e_tc_num_filters(priv) && (priv->fs.tc.t)) {
+		mlx5_destroy_flow_table(priv->fs.tc.t);
+		priv->fs.tc.t = NULL;
+	}
+}
+
 static struct mlx5_flow_handle *
 mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
 		      struct mlx5_flow_spec *spec,
@@ -149,7 +166,24 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
 }
 
 static void mlx5e_detach_encap(struct mlx5e_priv *priv,
-			       struct mlx5e_tc_flow *flow) {
+			       struct mlx5e_tc_flow *flow);
+
+static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
+				  struct mlx5e_tc_flow *flow)
+{
+	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+	mlx5_eswitch_del_offloaded_rule(esw, flow->rule, flow->attr);
+
+	mlx5_eswitch_del_vlan_action(esw, flow->attr);
+
+	if (flow->attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP)
+		mlx5e_detach_encap(priv, flow);
+}
+
+static void mlx5e_detach_encap(struct mlx5e_priv *priv,
+			       struct mlx5e_tc_flow *flow)
+{
 	struct list_head *next = flow->encap.next;
 
 	list_del(&flow->encap);
@@ -173,25 +207,10 @@ static void mlx5e_detach_encap(struct mlx5e_priv *priv,
 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
 			      struct mlx5e_tc_flow *flow)
 {
-	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
-	struct mlx5_fc *counter = NULL;
-
-	if (!IS_ERR(flow->rule)) {
-		counter = mlx5_flow_rule_counter(flow->rule);
-		mlx5_del_flow_rules(flow->rule);
-		mlx5_fc_destroy(priv->mdev, counter);
-	}
-
-	if (flow->flags & MLX5E_TC_FLOW_ESWITCH) {
-		mlx5_eswitch_del_vlan_action(esw, flow->attr);
-		if (flow->attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP)
-			mlx5e_detach_encap(priv, flow);
-	}
-
-	if (!mlx5e_tc_num_filters(priv) && (priv->fs.tc.t)) {
-		mlx5_destroy_flow_table(priv->fs.tc.t);
-		priv->fs.tc.t = NULL;
-	}
+	if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
+		mlx5e_tc_del_fdb_flow(priv, flow);
+	else
+		mlx5e_tc_del_nic_flow(priv, flow);
 }
 
 static void parse_vxlan_attr(struct mlx5_flow_spec *spec,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 5b78883d565413..9227a83a97e3e2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -271,6 +271,11 @@ struct mlx5_flow_handle *
 mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
 				struct mlx5_flow_spec *spec,
 				struct mlx5_esw_flow_attr *attr);
+void
+mlx5_eswitch_del_offloaded_rule(struct mlx5_eswitch *esw,
+				struct mlx5_flow_handle *rule,
+				struct mlx5_esw_flow_attr *attr);
+
 struct mlx5_flow_handle *
 mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 4f5b0d47d5f382..bfabefe20ac02e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -97,6 +97,20 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
 	return rule;
 }
 
+void
+mlx5_eswitch_del_offloaded_rule(struct mlx5_eswitch *esw,
+				struct mlx5_flow_handle *rule,
+				struct mlx5_esw_flow_attr *attr)
+{
+	struct mlx5_fc *counter = NULL;
+
+	if (!IS_ERR(rule)) {
+		counter = mlx5_flow_rule_counter(rule);
+		mlx5_del_flow_rules(rule);
+		mlx5_fc_destroy(esw->dev, counter);
+	}
+}
+
 static int esw_set_global_vlan_pop(struct mlx5_eswitch *esw, u8 val)
 {
 	struct mlx5_eswitch_rep *rep;

From 375f51e2b5b7b9a42b3139aea519cbb1bfc5d6ef Mon Sep 17 00:00:00 2001
From: Roi Dayan <roid@mellanox.com>
Date: Tue, 21 Mar 2017 15:59:14 +0200
Subject: [PATCH 1636/1911] net/mlx5: E-Switch, Don't allow changing inline
 mode when flows are configured

Changing the eswitch inline mode can potentially cause already configured
flows not to match the policy. E.g. set policy L4, add some L4 rules,
set policy to L2 --> bad! Hence we disallow it.

Keep track of how many offloaded rules are now set and refuse
inline mode changes if this isn't zero.

Fixes: bffaa916588e ("net/mlx5: E-Switch, Add control for inline mode")
Signed-off-by: Roi Dayan <roid@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h         | 1 +
 .../net/ethernet/mellanox/mlx5/core/eswitch_offloads.c    | 8 ++++++++
 2 files changed, 9 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 9227a83a97e3e2..ad329b1680b455 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -209,6 +209,7 @@ struct mlx5_esw_offload {
 	struct mlx5_eswitch_rep *vport_reps;
 	DECLARE_HASHTABLE(encap_tbl, 8);
 	u8 inline_mode;
+	u64 num_flows;
 };
 
 struct mlx5_eswitch {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index bfabefe20ac02e..307ec6c5fd3b62 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -93,6 +93,8 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
 				   spec, &flow_act, dest, i);
 	if (IS_ERR(rule))
 		mlx5_fc_destroy(esw->dev, counter);
+	else
+		esw->offloads.num_flows++;
 
 	return rule;
 }
@@ -108,6 +110,7 @@ mlx5_eswitch_del_offloaded_rule(struct mlx5_eswitch *esw,
 		counter = mlx5_flow_rule_counter(rule);
 		mlx5_del_flow_rules(rule);
 		mlx5_fc_destroy(esw->dev, counter);
+		esw->offloads.num_flows--;
 	}
 }
 
@@ -922,6 +925,11 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode)
 	    MLX5_CAP_INLINE_MODE_VPORT_CONTEXT)
 		return -EOPNOTSUPP;
 
+	if (esw->offloads.num_flows > 0) {
+		esw_warn(dev, "Can't set inline mode when flows are configured\n");
+		return -EOPNOTSUPP;
+	}
+
 	err = esw_inline_mode_from_devlink(mode, &mlx5_mode);
 	if (err)
 		goto out;

From 09c91ddf2cd33489c2c14edfef43ae38d412888e Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Tue, 21 Mar 2017 15:59:15 +0200
Subject: [PATCH 1637/1911] net/mlx5e: Use the proper UAPI values when
 offloading TC vlan actions

Currently we use the non UAPI values and we miss erring on
the modify action which is not supported, fix that.

Fixes: 8b32580df1cb ('net/mlx5e: Add TC vlan action for SRIOV offloads')
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Reported-by: Petr Machata <petrm@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 2825b566545634..9c13abaf3885f4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -1131,14 +1131,16 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
 		}
 
 		if (is_tcf_vlan(a)) {
-			if (tcf_vlan_action(a) == VLAN_F_POP) {
+			if (tcf_vlan_action(a) == TCA_VLAN_ACT_POP) {
 				attr->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
-			} else if (tcf_vlan_action(a) == VLAN_F_PUSH) {
+			} else if (tcf_vlan_action(a) == TCA_VLAN_ACT_PUSH) {
 				if (tcf_vlan_push_proto(a) != htons(ETH_P_8021Q))
 					return -EOPNOTSUPP;
 
 				attr->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH;
 				attr->vlan = tcf_vlan_push_vid(a);
+			} else { /* action is TCA_VLAN_ACT_MODIFY */
+				return -EOPNOTSUPP;
 			}
 			continue;
 		}

From 1ad9a00ae0efc2e9337148d6c382fad3d27bf99a Mon Sep 17 00:00:00 2001
From: Paul Blakey <paulb@mellanox.com>
Date: Tue, 21 Mar 2017 15:59:16 +0200
Subject: [PATCH 1638/1911] net/mlx5e: Avoid supporting udp tunnel port ndo for
 VF reps

This was added to allow the TC offloading code to identify offloading
encap/decap vxlan rules.

The VF reps are effectively related to the same mlx5 PCI device as the
PF. Since the kernel invokes the (say) delete ndo for each netdev, the
FW erred on multiple vxlan dst port deletes when the port was deleted
from the system.

We fix that by keeping the registration to be carried out only by the
PF. Since the PF serves as the uplink device, the VF reps will look
up a port there and realize if they are ok to offload that.

Tested:
 <SETUP VFS>
 <SETUP switchdev mode to have representors>
 ip link add vxlan1 type vxlan id 44 dev ens5f0 dstport 9999
 ip link set vxlan1 up
 ip link del dev vxlan1

Fixes: 4a25730eb202 ('net/mlx5e: Add ndo_udp_tunnel_add to VF representors')
Signed-off-by: Paul Blakey <paulb@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h      | 4 ----
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 8 ++++----
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c  | 2 --
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c   | 9 +++++++--
 4 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index f6a6ded204f61c..dc52053128bc75 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -928,10 +928,6 @@ void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv);
 int mlx5e_attach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev);
 void mlx5e_detach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev);
 u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout);
-void mlx5e_add_vxlan_port(struct net_device *netdev,
-			  struct udp_tunnel_info *ti);
-void mlx5e_del_vxlan_port(struct net_device *netdev,
-			  struct udp_tunnel_info *ti);
 
 int mlx5e_get_offload_stats(int attr_id, const struct net_device *dev,
 			    void *sp);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 8ef64c4db2c21a..66c133757a5ee8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3100,8 +3100,8 @@ static int mlx5e_get_vf_stats(struct net_device *dev,
 					    vf_stats);
 }
 
-void mlx5e_add_vxlan_port(struct net_device *netdev,
-			  struct udp_tunnel_info *ti)
+static void mlx5e_add_vxlan_port(struct net_device *netdev,
+				 struct udp_tunnel_info *ti)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 
@@ -3114,8 +3114,8 @@ void mlx5e_add_vxlan_port(struct net_device *netdev,
 	mlx5e_vxlan_queue_work(priv, ti->sa_family, be16_to_cpu(ti->port), 1);
 }
 
-void mlx5e_del_vxlan_port(struct net_device *netdev,
-			  struct udp_tunnel_info *ti)
+static void mlx5e_del_vxlan_port(struct net_device *netdev,
+				 struct udp_tunnel_info *ti)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 2c864574a9d5fa..f621373bd7a564 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -393,8 +393,6 @@ static const struct net_device_ops mlx5e_netdev_ops_rep = {
 	.ndo_get_phys_port_name  = mlx5e_rep_get_phys_port_name,
 	.ndo_setup_tc            = mlx5e_rep_ndo_setup_tc,
 	.ndo_get_stats64         = mlx5e_rep_get_stats,
-	.ndo_udp_tunnel_add      = mlx5e_add_vxlan_port,
-	.ndo_udp_tunnel_del      = mlx5e_del_vxlan_port,
 	.ndo_has_offload_stats	 = mlx5e_has_offload_stats,
 	.ndo_get_offload_stats	 = mlx5e_get_offload_stats,
 };
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 9c13abaf3885f4..fade7233dac525 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -267,12 +267,15 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv,
 			skb_flow_dissector_target(f->dissector,
 						  FLOW_DISSECTOR_KEY_ENC_PORTS,
 						  f->mask);
+		struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+		struct net_device *up_dev = mlx5_eswitch_get_uplink_netdev(esw);
+		struct mlx5e_priv *up_priv = netdev_priv(up_dev);
 
 		/* Full udp dst port must be given */
 		if (memchr_inv(&mask->dst, 0xff, sizeof(mask->dst)))
 			goto vxlan_match_offload_err;
 
-		if (mlx5e_vxlan_lookup_port(priv, be16_to_cpu(key->dst)) &&
+		if (mlx5e_vxlan_lookup_port(up_priv, be16_to_cpu(key->dst)) &&
 		    MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap))
 			parse_vxlan_attr(spec, f);
 		else {
@@ -995,6 +998,8 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv,
 			      struct mlx5_esw_flow_attr *attr)
 {
 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+	struct net_device *up_dev = mlx5_eswitch_get_uplink_netdev(esw);
+	struct mlx5e_priv *up_priv = netdev_priv(up_dev);
 	unsigned short family = ip_tunnel_info_af(tun_info);
 	struct ip_tunnel_key *key = &tun_info->key;
 	struct mlx5_encap_entry *e;
@@ -1015,7 +1020,7 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv,
 		return -EOPNOTSUPP;
 	}
 
-	if (mlx5e_vxlan_lookup_port(priv, be16_to_cpu(key->tp_dst)) &&
+	if (mlx5e_vxlan_lookup_port(up_priv, be16_to_cpu(key->tp_dst)) &&
 	    MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) {
 		tunnel_type = MLX5_HEADER_TYPE_VXLAN;
 	} else {

From 5f40b4ed975c26016cf41953b7510fe90718e21c Mon Sep 17 00:00:00 2001
From: Maor Gottlieb <maorg@mellanox.com>
Date: Tue, 21 Mar 2017 15:59:17 +0200
Subject: [PATCH 1639/1911] net/mlx5: Increase number of max QPs in default
 profile

With ConnectX-4 sharing SRQs from the same space as QPs, we hit a
limit preventing some applications to allocate needed QPs amount.
Double the size to 256K.

Fixes: e126ba97dba9e ('mlx5: Add driver for Mellanox Connect-IB adapters')
Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index e2bd600d19de09..60154a175bd386 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -87,7 +87,7 @@ static struct mlx5_profile profile[] = {
 	[2] = {
 		.mask		= MLX5_PROF_MASK_QP_SIZE |
 				  MLX5_PROF_MASK_MR_CACHE,
-		.log_max_qp	= 17,
+		.log_max_qp	= 18,
 		.mr_cache[0]	= {
 			.size	= 500,
 			.limit	= 250

From d3a4e4da54c7adb420d5f48e89be913b14bdeff1 Mon Sep 17 00:00:00 2001
From: Gal Pressman <galp@mellanox.com>
Date: Tue, 21 Mar 2017 15:59:18 +0200
Subject: [PATCH 1640/1911] net/mlx5e: Count GSO packets correctly

TX packets statistics ('tx_packets' counter) used to count GSO packets
as one, even though it contains multiple segments.
This patch will increment the counter by the number of segments, and
align the driver with the behavior of other drivers in the stack.

Note that no information is lost in this patch due to 'tx_tso_packets'
counter existence.

Before, ethtool showed:
$ ethtool -S ens6 | egrep "tx_packets|tx_tso_packets"
     tx_packets: 61340
     tx_tso_packets: 60954
     tx_packets_phy: 2451115

Now, we will see the more logical statistics:
$ ethtool -S ens6 | egrep "tx_packets|tx_tso_packets"
     tx_packets: 2451115
     tx_tso_packets: 60954
     tx_packets_phy: 2451115

Fixes: e586b3b0baee ("net/mlx5: Ethernet Datapath files")
Signed-off-by: Gal Pressman <galp@mellanox.com>
Cc: kernel-team@fb.com
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index f193128bac4b8c..57f5e2d7ebd1a9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -274,15 +274,18 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb)
 			sq->stats.tso_bytes += skb->len - ihs;
 		}
 
+		sq->stats.packets += skb_shinfo(skb)->gso_segs;
 		num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs;
 	} else {
 		bf = sq->bf_budget &&
 		     !skb->xmit_more &&
 		     !skb_shinfo(skb)->nr_frags;
 		ihs = mlx5e_get_inline_hdr_size(sq, skb, bf);
+		sq->stats.packets++;
 		num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN);
 	}
 
+	sq->stats.bytes += num_bytes;
 	wi->num_bytes = num_bytes;
 
 	ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS;
@@ -381,8 +384,6 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb)
 	if (bf)
 		sq->bf_budget--;
 
-	sq->stats.packets++;
-	sq->stats.bytes += num_bytes;
 	return NETDEV_TX_OK;
 
 dma_unmap_wqe_err:

From 8ab7e2ae15d84ba758b2c8c6f4075722e9bd2a08 Mon Sep 17 00:00:00 2001
From: Gal Pressman <galp@mellanox.com>
Date: Tue, 21 Mar 2017 15:59:19 +0200
Subject: [PATCH 1641/1911] net/mlx5e: Count LRO packets correctly

RX packets statistics ('rx_packets' counter) used to count LRO packets
as one, even though it contains multiple segments.
This patch will increment the counter by the number of segments, and
align the driver with the behavior of other drivers in the stack.

Note that no information is lost in this patch due to 'rx_lro_packets'
counter existence.

Before, ethtool showed:
$ ethtool -S ens6 | egrep "rx_packets|rx_lro_packets"
     rx_packets: 435277
     rx_lro_packets: 35847
     rx_packets_phy: 1935066

Now, we will see the more logical statistics:
$ ethtool -S ens6 | egrep "rx_packets|rx_lro_packets"
     rx_packets: 1935066
     rx_lro_packets: 35847
     rx_packets_phy: 1935066

Fixes: e586b3b0baee ("net/mlx5: Ethernet Datapath files")
Signed-off-by: Gal Pressman <galp@mellanox.com>
Cc: kernel-team@fb.com
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 3d371688fbbbf3..bafcb349a50c6d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -601,6 +601,10 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe,
 	if (lro_num_seg > 1) {
 		mlx5e_lro_update_hdr(skb, cqe, cqe_bcnt);
 		skb_shinfo(skb)->gso_size = DIV_ROUND_UP(cqe_bcnt, lro_num_seg);
+		/* Subtract one since we already counted this as one
+		 * "regular" packet in mlx5e_complete_rx_cqe()
+		 */
+		rq->stats.packets += lro_num_seg - 1;
 		rq->stats.lro_packets++;
 		rq->stats.lro_bytes += cqe_bcnt;
 	}

From ac23d3cac1f339febd95c403a245ae072dfd0e84 Mon Sep 17 00:00:00 2001
From: Yasuaki Ishimatsu <yasu.isimatu@gmail.com>
Date: Tue, 21 Mar 2017 11:44:25 -0400
Subject: [PATCH 1642/1911] fjes: Do not load fjes driver if system does not
 have extended socket device.

The fjes driver is used only by FUJITSU servers and almost of all
servers in the world never use it. But currently if ACPI PNP0C02
is defined in the ACPI table, the following message is always shown:

 "FUJITSU Extended Socket Network Device Driver - version 1.2
  - Copyright (c) 2015 FUJITSU LIMITED"

The message makes users confused because there is no reason that
the message is shown in other vendor servers.

To avoid the confusion, the patch adds a check that the server
has a extended socket device or not.

Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
CC: Taku Izumi <izumi.taku@jp.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/fjes/fjes_main.c | 52 ++++++++++++++++++++++++++++++++----
 1 file changed, 47 insertions(+), 5 deletions(-)

diff --git a/drivers/net/fjes/fjes_main.c b/drivers/net/fjes/fjes_main.c
index c4b3c4b77a9c1b..7b589649ab463b 100644
--- a/drivers/net/fjes/fjes_main.c
+++ b/drivers/net/fjes/fjes_main.c
@@ -45,6 +45,8 @@ MODULE_DESCRIPTION("FUJITSU Extended Socket Network Device Driver");
 MODULE_LICENSE("GPL");
 MODULE_VERSION(DRV_VERSION);
 
+#define ACPI_MOTHERBOARD_RESOURCE_HID "PNP0C02"
+
 static int fjes_request_irq(struct fjes_adapter *);
 static void fjes_free_irq(struct fjes_adapter *);
 
@@ -78,7 +80,7 @@ static void fjes_rx_irq(struct fjes_adapter *, int);
 static int fjes_poll(struct napi_struct *, int);
 
 static const struct acpi_device_id fjes_acpi_ids[] = {
-	{"PNP0C02", 0},
+	{ACPI_MOTHERBOARD_RESOURCE_HID, 0},
 	{"", 0},
 };
 MODULE_DEVICE_TABLE(acpi, fjes_acpi_ids);
@@ -115,18 +117,17 @@ static struct resource fjes_resource[] = {
 	},
 };
 
-static int fjes_acpi_add(struct acpi_device *device)
+static bool is_extended_socket_device(struct acpi_device *device)
 {
 	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL};
 	char str_buf[sizeof(FJES_ACPI_SYMBOL) + 1];
-	struct platform_device *plat_dev;
 	union acpi_object *str;
 	acpi_status status;
 	int result;
 
 	status = acpi_evaluate_object(device->handle, "_STR", NULL, &buffer);
 	if (ACPI_FAILURE(status))
-		return -ENODEV;
+		return false;
 
 	str = buffer.pointer;
 	result = utf16s_to_utf8s((wchar_t *)str->string.pointer,
@@ -136,10 +137,21 @@ static int fjes_acpi_add(struct acpi_device *device)
 
 	if (strncmp(FJES_ACPI_SYMBOL, str_buf, strlen(FJES_ACPI_SYMBOL)) != 0) {
 		kfree(buffer.pointer);
-		return -ENODEV;
+		return false;
 	}
 	kfree(buffer.pointer);
 
+	return true;
+}
+
+static int fjes_acpi_add(struct acpi_device *device)
+{
+	struct platform_device *plat_dev;
+	acpi_status status;
+
+	if (!is_extended_socket_device(device))
+		return -ENODEV;
+
 	status = acpi_walk_resources(device->handle, METHOD_NAME__CRS,
 				     fjes_get_acpi_resource, fjes_resource);
 	if (ACPI_FAILURE(status))
@@ -1473,11 +1485,41 @@ static void fjes_watch_unshare_task(struct work_struct *work)
 	}
 }
 
+static acpi_status
+acpi_find_extended_socket_device(acpi_handle obj_handle, u32 level,
+				 void *context, void **return_value)
+{
+	struct acpi_device *device;
+	bool *found = context;
+	int result;
+
+	result = acpi_bus_get_device(obj_handle, &device);
+	if (result)
+		return AE_OK;
+
+	if (strcmp(acpi_device_hid(device), ACPI_MOTHERBOARD_RESOURCE_HID))
+		return AE_OK;
+
+	if (!is_extended_socket_device(device))
+		return AE_OK;
+
+	*found = true;
+	return AE_CTRL_TERMINATE;
+}
+
 /* fjes_init_module - Driver Registration Routine */
 static int __init fjes_init_module(void)
 {
+	bool found = false;
 	int result;
 
+	acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX,
+			    acpi_find_extended_socket_device, NULL, &found,
+			    NULL);
+
+	if (!found)
+		return -ENODEV;
+
 	pr_info("%s - version %s - %s\n",
 		fjes_driver_string, fjes_driver_version, fjes_copyright);
 

From 2b396d302650f1ebb770ed758ddcf5a64328ffd5 Mon Sep 17 00:00:00 2001
From: Yasuaki Ishimatsu <yasu.isimatu@gmail.com>
Date: Tue, 21 Mar 2017 11:46:35 -0400
Subject: [PATCH 1643/1911] fjes: Do not load fjes driver if extended socket
 device is not power on.

The extended device socket cannot turn on/off while system is running.
So when system boots up and the device is not power on, the fjes driver
does not need be loaded.

To check the status of the device, the patch adds ACPI _STA method check.

Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
CC: Taku Izumi <izumi.taku@jp.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/fjes/fjes_main.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/drivers/net/fjes/fjes_main.c b/drivers/net/fjes/fjes_main.c
index 7b589649ab463b..ae48c809bac9fe 100644
--- a/drivers/net/fjes/fjes_main.c
+++ b/drivers/net/fjes/fjes_main.c
@@ -144,6 +144,24 @@ static bool is_extended_socket_device(struct acpi_device *device)
 	return true;
 }
 
+static int acpi_check_extended_socket_status(struct acpi_device *device)
+{
+	unsigned long long sta;
+	acpi_status status;
+
+	status = acpi_evaluate_integer(device->handle, "_STA", NULL, &sta);
+	if (ACPI_FAILURE(status))
+		return -ENODEV;
+
+	if (!((sta & ACPI_STA_DEVICE_PRESENT) &&
+	      (sta & ACPI_STA_DEVICE_ENABLED) &&
+	      (sta & ACPI_STA_DEVICE_UI) &&
+	      (sta & ACPI_STA_DEVICE_FUNCTIONING)))
+		return -ENODEV;
+
+	return 0;
+}
+
 static int fjes_acpi_add(struct acpi_device *device)
 {
 	struct platform_device *plat_dev;
@@ -152,6 +170,9 @@ static int fjes_acpi_add(struct acpi_device *device)
 	if (!is_extended_socket_device(device))
 		return -ENODEV;
 
+	if (acpi_check_extended_socket_status(device))
+		return -ENODEV;
+
 	status = acpi_walk_resources(device->handle, METHOD_NAME__CRS,
 				     fjes_get_acpi_resource, fjes_resource);
 	if (ACPI_FAILURE(status))
@@ -1503,6 +1524,9 @@ acpi_find_extended_socket_device(acpi_handle obj_handle, u32 level,
 	if (!is_extended_socket_device(device))
 		return AE_OK;
 
+	if (acpi_check_extended_socket_status(device))
+		return AE_OK;
+
 	*found = true;
 	return AE_CTRL_TERMINATE;
 }

From d515684d78148884d5fc425ba904c50f03844020 Mon Sep 17 00:00:00 2001
From: Alexander Potapenko <glider@google.com>
Date: Tue, 21 Mar 2017 17:14:27 +0100
Subject: [PATCH 1644/1911] ipv6: make sure to initialize sockc.tsflags before
 first use

In the case udp_sk(sk)->pending is AF_INET6, udpv6_sendmsg() would
jump to do_append_data, skipping the initialization of sockc.tsflags.
Fix the problem by moving sockc.tsflags initialization earlier.

The bug was detected with KMSAN.

Fixes: c14ac9451c34 ("sock: enable timestamping using control messages")
Signed-off-by: Alexander Potapenko <glider@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/udp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 4e4c401e3bc690..e28082f0a307eb 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1035,6 +1035,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	ipc6.hlimit = -1;
 	ipc6.tclass = -1;
 	ipc6.dontfrag = -1;
+	sockc.tsflags = sk->sk_tsflags;
 
 	/* destination address check */
 	if (sin6) {
@@ -1159,7 +1160,6 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 
 	fl6.flowi6_mark = sk->sk_mark;
 	fl6.flowi6_uid = sk->sk_uid;
-	sockc.tsflags = sk->sk_tsflags;
 
 	if (msg->msg_controllen) {
 		opt = &opt_space;

From 31739eae738ccbe8b9d627c3f2251017ca03f4d2 Mon Sep 17 00:00:00 2001
From: Doug Berger <opendmb@gmail.com>
Date: Tue, 21 Mar 2017 14:01:06 -0700
Subject: [PATCH 1645/1911] net: bcmgenet: remove bcmgenet_internal_phy_setup()

Commit 6ac3ce8295e6 ("net: bcmgenet: Remove excessive PHY reset")
removed the bcmgenet_mii_reset() function from bcmgenet_power_up() and
bcmgenet_internal_phy_setup() functions.  In so doing it broke the reset
of the internal PHY devices used by the GENETv1-GENETv3 which required
this reset before the UniMAC was enabled.  It also broke the internal
GPHY devices used by the GENETv4 because the config_init that installed
the AFE workaround was no longer occurring after the reset of the GPHY
performed by bcmgenet_phy_power_set() in bcmgenet_internal_phy_setup().
In addition the code in bcmgenet_internal_phy_setup() related to the
"enable APD" comment goes with the bcmgenet_mii_reset() so it should
have also been removed.

Commit bd4060a6108b ("net: bcmgenet: Power on integrated GPHY in
bcmgenet_power_up()") moved the bcmgenet_phy_power_set() call to the
bcmgenet_power_up() function, but failed to remove it from the
bcmgenet_internal_phy_setup() function.  Had it done so, the
bcmgenet_internal_phy_setup() function would have been empty and could
have been removed at that time.

Commit 5dbebbb44a6a ("net: bcmgenet: Software reset EPHY after power on")
was submitted to correct the functional problems introduced by
commit 6ac3ce8295e6 ("net: bcmgenet: Remove excessive PHY reset"). It
was included in v4.4 and made available on 4.3-stable. Unfortunately,
it didn't fully revert the commit because this bcmgenet_mii_reset()
doesn't apply the soft reset to the internal GPHY used by GENETv4 like
the previous one did. This prevents the restoration of the AFE work-
arounds for internal GPHY devices after the bcmgenet_phy_power_set() in
bcmgenet_internal_phy_setup().

This commit takes the alternate approach of removing the unnecessary
bcmgenet_internal_phy_setup() function which shouldn't have been in v4.3
so that when bcmgenet_mii_reset() was restored it should have only gone
into bcmgenet_power_up().  This will avoid the problems while also
removing the redundancy (and hopefully some of the confusion).

Fixes: 6ac3ce8295e6 ("net: bcmgenet: Remove excessive PHY reset")
Signed-off-by: Doug Berger <opendmb@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/genet/bcmmii.c | 15 ---------------
 1 file changed, 15 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c
index e87607621e62a0..2f9281936f0e43 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmmii.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c
@@ -220,20 +220,6 @@ void bcmgenet_phy_power_set(struct net_device *dev, bool enable)
 	udelay(60);
 }
 
-static void bcmgenet_internal_phy_setup(struct net_device *dev)
-{
-	struct bcmgenet_priv *priv = netdev_priv(dev);
-	u32 reg;
-
-	/* Power up PHY */
-	bcmgenet_phy_power_set(dev, true);
-	/* enable APD */
-	reg = bcmgenet_ext_readl(priv, EXT_EXT_PWR_MGMT);
-	reg |= EXT_PWR_DN_EN_LD;
-	bcmgenet_ext_writel(priv, reg, EXT_EXT_PWR_MGMT);
-	bcmgenet_mii_reset(dev);
-}
-
 static void bcmgenet_moca_phy_setup(struct bcmgenet_priv *priv)
 {
 	u32 reg;
@@ -281,7 +267,6 @@ int bcmgenet_mii_config(struct net_device *dev)
 
 		if (priv->internal_phy) {
 			phy_name = "internal PHY";
-			bcmgenet_internal_phy_setup(dev);
 		} else if (priv->phy_interface == PHY_INTERFACE_MODE_MOCA) {
 			phy_name = "MoCA";
 			bcmgenet_moca_phy_setup(priv);

From dd1ef79120e1600cb48320cf80a612ee6510110c Mon Sep 17 00:00:00 2001
From: Govindarajulu Varadarajan <gvaradar@cisco.com>
Date: Tue, 21 Mar 2017 15:07:48 -0700
Subject: [PATCH 1646/1911] enic: update enic maintainers

update enic maintainers

Signed-off-by: Govindarajulu Varadarajan <gvaradar@cisco.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 1 -
 1 file changed, 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 078c38217daabf..c45c02bc6082f3 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3216,7 +3216,6 @@ F:	drivers/platform/chrome/
 
 CISCO VIC ETHERNET NIC DRIVER
 M:	Christian Benvenuti <benve@cisco.com>
-M:	Sujith Sankar <ssujith@cisco.com>
 M:	Govindarajulu Varadarajan <_govind@gmx.com>
 M:	Neel Patel <neepatel@cisco.com>
 S:	Supported

From 8c290e60fa2a51806159522331c9ed41252a8fb3 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@fb.com>
Date: Tue, 21 Mar 2017 19:05:04 -0700
Subject: [PATCH 1647/1911] bpf: fix hashmap extra_elems logic

In both kmalloc and prealloc mode the bpf_map_update_elem() is using
per-cpu extra_elems to do atomic update when the map is full.
There are two issues with it. The logic can be misused, since it allows
max_entries+num_cpus elements to be present in the map. And alloc_extra_elems()
at map creation time can fail percpu alloc for large map values with a warn:
WARNING: CPU: 3 PID: 2752 at ../mm/percpu.c:892 pcpu_alloc+0x119/0xa60
illegal size (32824) or align (8) for percpu allocation

The fixes for both of these issues are different for kmalloc and prealloc modes.
For prealloc mode allocate extra num_possible_cpus elements and store
their pointers into extra_elems array instead of actual elements.
Hence we can use these hidden(spare) elements not only when the map is full
but during bpf_map_update_elem() that replaces existing element too.
That also improves performance, since pcpu_freelist_pop/push is avoided.
Unfortunately this approach cannot be used for kmalloc mode which needs
to kfree elements after rcu grace period. Therefore switch it back to normal
kmalloc even when full and old element exists like it was prior to
commit 6c9059817432 ("bpf: pre-allocate hash map elements").

Add tests to check for over max_entries and large map values.

Reported-by: Dave Jones <davej@codemonkey.org.uk>
Fixes: 6c9059817432 ("bpf: pre-allocate hash map elements")
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/bpf/hashtab.c                    | 144 ++++++++++++------------
 tools/testing/selftests/bpf/test_maps.c |  29 ++++-
 2 files changed, 97 insertions(+), 76 deletions(-)

diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index afe5bab376c981..361a69dfe5434d 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -30,18 +30,12 @@ struct bpf_htab {
 		struct pcpu_freelist freelist;
 		struct bpf_lru lru;
 	};
-	void __percpu *extra_elems;
+	struct htab_elem *__percpu *extra_elems;
 	atomic_t count;	/* number of elements in this hashtable */
 	u32 n_buckets;	/* number of hash buckets */
 	u32 elem_size;	/* size of each element in bytes */
 };
 
-enum extra_elem_state {
-	HTAB_NOT_AN_EXTRA_ELEM = 0,
-	HTAB_EXTRA_ELEM_FREE,
-	HTAB_EXTRA_ELEM_USED
-};
-
 /* each htab element is struct htab_elem + key + value */
 struct htab_elem {
 	union {
@@ -56,7 +50,6 @@ struct htab_elem {
 	};
 	union {
 		struct rcu_head rcu;
-		enum extra_elem_state state;
 		struct bpf_lru_node lru_node;
 	};
 	u32 hash;
@@ -77,6 +70,11 @@ static bool htab_is_percpu(const struct bpf_htab *htab)
 		htab->map.map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH;
 }
 
+static bool htab_is_prealloc(const struct bpf_htab *htab)
+{
+	return !(htab->map.map_flags & BPF_F_NO_PREALLOC);
+}
+
 static inline void htab_elem_set_ptr(struct htab_elem *l, u32 key_size,
 				     void __percpu *pptr)
 {
@@ -128,17 +126,20 @@ static struct htab_elem *prealloc_lru_pop(struct bpf_htab *htab, void *key,
 
 static int prealloc_init(struct bpf_htab *htab)
 {
+	u32 num_entries = htab->map.max_entries;
 	int err = -ENOMEM, i;
 
-	htab->elems = bpf_map_area_alloc(htab->elem_size *
-					 htab->map.max_entries);
+	if (!htab_is_percpu(htab) && !htab_is_lru(htab))
+		num_entries += num_possible_cpus();
+
+	htab->elems = bpf_map_area_alloc(htab->elem_size * num_entries);
 	if (!htab->elems)
 		return -ENOMEM;
 
 	if (!htab_is_percpu(htab))
 		goto skip_percpu_elems;
 
-	for (i = 0; i < htab->map.max_entries; i++) {
+	for (i = 0; i < num_entries; i++) {
 		u32 size = round_up(htab->map.value_size, 8);
 		void __percpu *pptr;
 
@@ -166,11 +167,11 @@ static int prealloc_init(struct bpf_htab *htab)
 	if (htab_is_lru(htab))
 		bpf_lru_populate(&htab->lru, htab->elems,
 				 offsetof(struct htab_elem, lru_node),
-				 htab->elem_size, htab->map.max_entries);
+				 htab->elem_size, num_entries);
 	else
 		pcpu_freelist_populate(&htab->freelist,
 				       htab->elems + offsetof(struct htab_elem, fnode),
-				       htab->elem_size, htab->map.max_entries);
+				       htab->elem_size, num_entries);
 
 	return 0;
 
@@ -191,16 +192,22 @@ static void prealloc_destroy(struct bpf_htab *htab)
 
 static int alloc_extra_elems(struct bpf_htab *htab)
 {
-	void __percpu *pptr;
+	struct htab_elem *__percpu *pptr, *l_new;
+	struct pcpu_freelist_node *l;
 	int cpu;
 
-	pptr = __alloc_percpu_gfp(htab->elem_size, 8, GFP_USER | __GFP_NOWARN);
+	pptr = __alloc_percpu_gfp(sizeof(struct htab_elem *), 8,
+				  GFP_USER | __GFP_NOWARN);
 	if (!pptr)
 		return -ENOMEM;
 
 	for_each_possible_cpu(cpu) {
-		((struct htab_elem *)per_cpu_ptr(pptr, cpu))->state =
-			HTAB_EXTRA_ELEM_FREE;
+		l = pcpu_freelist_pop(&htab->freelist);
+		/* pop will succeed, since prealloc_init()
+		 * preallocated extra num_possible_cpus elements
+		 */
+		l_new = container_of(l, struct htab_elem, fnode);
+		*per_cpu_ptr(pptr, cpu) = l_new;
 	}
 	htab->extra_elems = pptr;
 	return 0;
@@ -342,25 +349,25 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
 		raw_spin_lock_init(&htab->buckets[i].lock);
 	}
 
-	if (!percpu && !lru) {
-		/* lru itself can remove the least used element, so
-		 * there is no need for an extra elem during map_update.
-		 */
-		err = alloc_extra_elems(htab);
-		if (err)
-			goto free_buckets;
-	}
-
 	if (prealloc) {
 		err = prealloc_init(htab);
 		if (err)
-			goto free_extra_elems;
+			goto free_buckets;
+
+		if (!percpu && !lru) {
+			/* lru itself can remove the least used element, so
+			 * there is no need for an extra elem during map_update.
+			 */
+			err = alloc_extra_elems(htab);
+			if (err)
+				goto free_prealloc;
+		}
 	}
 
 	return &htab->map;
 
-free_extra_elems:
-	free_percpu(htab->extra_elems);
+free_prealloc:
+	prealloc_destroy(htab);
 free_buckets:
 	bpf_map_area_free(htab->buckets);
 free_htab:
@@ -575,12 +582,7 @@ static void htab_elem_free_rcu(struct rcu_head *head)
 
 static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
 {
-	if (l->state == HTAB_EXTRA_ELEM_USED) {
-		l->state = HTAB_EXTRA_ELEM_FREE;
-		return;
-	}
-
-	if (!(htab->map.map_flags & BPF_F_NO_PREALLOC)) {
+	if (htab_is_prealloc(htab)) {
 		pcpu_freelist_push(&htab->freelist, &l->fnode);
 	} else {
 		atomic_dec(&htab->count);
@@ -610,47 +612,43 @@ static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr,
 static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
 					 void *value, u32 key_size, u32 hash,
 					 bool percpu, bool onallcpus,
-					 bool old_elem_exists)
+					 struct htab_elem *old_elem)
 {
 	u32 size = htab->map.value_size;
-	bool prealloc = !(htab->map.map_flags & BPF_F_NO_PREALLOC);
-	struct htab_elem *l_new;
+	bool prealloc = htab_is_prealloc(htab);
+	struct htab_elem *l_new, **pl_new;
 	void __percpu *pptr;
-	int err = 0;
 
 	if (prealloc) {
-		struct pcpu_freelist_node *l;
+		if (old_elem) {
+			/* if we're updating the existing element,
+			 * use per-cpu extra elems to avoid freelist_pop/push
+			 */
+			pl_new = this_cpu_ptr(htab->extra_elems);
+			l_new = *pl_new;
+			*pl_new = old_elem;
+		} else {
+			struct pcpu_freelist_node *l;
 
-		l = pcpu_freelist_pop(&htab->freelist);
-		if (!l)
-			err = -E2BIG;
-		else
+			l = pcpu_freelist_pop(&htab->freelist);
+			if (!l)
+				return ERR_PTR(-E2BIG);
 			l_new = container_of(l, struct htab_elem, fnode);
-	} else {
-		if (atomic_inc_return(&htab->count) > htab->map.max_entries) {
-			atomic_dec(&htab->count);
-			err = -E2BIG;
-		} else {
-			l_new = kmalloc(htab->elem_size,
-					GFP_ATOMIC | __GFP_NOWARN);
-			if (!l_new)
-				return ERR_PTR(-ENOMEM);
 		}
-	}
-
-	if (err) {
-		if (!old_elem_exists)
-			return ERR_PTR(err);
-
-		/* if we're updating the existing element and the hash table
-		 * is full, use per-cpu extra elems
-		 */
-		l_new = this_cpu_ptr(htab->extra_elems);
-		if (l_new->state != HTAB_EXTRA_ELEM_FREE)
-			return ERR_PTR(-E2BIG);
-		l_new->state = HTAB_EXTRA_ELEM_USED;
 	} else {
-		l_new->state = HTAB_NOT_AN_EXTRA_ELEM;
+		if (atomic_inc_return(&htab->count) > htab->map.max_entries)
+			if (!old_elem) {
+				/* when map is full and update() is replacing
+				 * old element, it's ok to allocate, since
+				 * old element will be freed immediately.
+				 * Otherwise return an error
+				 */
+				atomic_dec(&htab->count);
+				return ERR_PTR(-E2BIG);
+			}
+		l_new = kmalloc(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN);
+		if (!l_new)
+			return ERR_PTR(-ENOMEM);
 	}
 
 	memcpy(l_new->key, key, key_size);
@@ -731,7 +729,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
 		goto err;
 
 	l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false,
-				!!l_old);
+				l_old);
 	if (IS_ERR(l_new)) {
 		/* all pre-allocated elements are in use or memory exhausted */
 		ret = PTR_ERR(l_new);
@@ -744,7 +742,8 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
 	hlist_nulls_add_head_rcu(&l_new->hash_node, head);
 	if (l_old) {
 		hlist_nulls_del_rcu(&l_old->hash_node);
-		free_htab_elem(htab, l_old);
+		if (!htab_is_prealloc(htab))
+			free_htab_elem(htab, l_old);
 	}
 	ret = 0;
 err:
@@ -856,7 +855,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
 				value, onallcpus);
 	} else {
 		l_new = alloc_htab_elem(htab, key, value, key_size,
-					hash, true, onallcpus, false);
+					hash, true, onallcpus, NULL);
 		if (IS_ERR(l_new)) {
 			ret = PTR_ERR(l_new);
 			goto err;
@@ -1024,8 +1023,7 @@ static void delete_all_elements(struct bpf_htab *htab)
 
 		hlist_nulls_for_each_entry_safe(l, n, head, hash_node) {
 			hlist_nulls_del_rcu(&l->hash_node);
-			if (l->state != HTAB_EXTRA_ELEM_USED)
-				htab_elem_free(htab, l);
+			htab_elem_free(htab, l);
 		}
 	}
 }
@@ -1045,7 +1043,7 @@ static void htab_map_free(struct bpf_map *map)
 	 * not have executed. Wait for them.
 	 */
 	rcu_barrier();
-	if (htab->map.map_flags & BPF_F_NO_PREALLOC)
+	if (!htab_is_prealloc(htab))
 		delete_all_elements(htab);
 	else
 		prealloc_destroy(htab);
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index cada17ac00b8e6..a0aa2009b0e0a8 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -80,8 +80,9 @@ static void test_hashmap(int task, void *data)
 	assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) == 0);
 	key = 2;
 	assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
-	key = 1;
-	assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
+	key = 3;
+	assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+	       errno == E2BIG);
 
 	/* Check that key = 0 doesn't exist. */
 	key = 0;
@@ -110,6 +111,24 @@ static void test_hashmap(int task, void *data)
 	close(fd);
 }
 
+static void test_hashmap_sizes(int task, void *data)
+{
+	int fd, i, j;
+
+	for (i = 1; i <= 512; i <<= 1)
+		for (j = 1; j <= 1 << 18; j <<= 1) {
+			fd = bpf_create_map(BPF_MAP_TYPE_HASH, i, j,
+					    2, map_flags);
+			if (fd < 0) {
+				printf("Failed to create hashmap key=%d value=%d '%s'\n",
+				       i, j, strerror(errno));
+				exit(1);
+			}
+			close(fd);
+			usleep(10); /* give kernel time to destroy */
+		}
+}
+
 static void test_hashmap_percpu(int task, void *data)
 {
 	unsigned int nr_cpus = bpf_num_possible_cpus();
@@ -317,7 +336,10 @@ static void test_arraymap_percpu(int task, void *data)
 static void test_arraymap_percpu_many_keys(void)
 {
 	unsigned int nr_cpus = bpf_num_possible_cpus();
-	unsigned int nr_keys = 20000;
+	/* nr_keys is not too large otherwise the test stresses percpu
+	 * allocator more than anything else
+	 */
+	unsigned int nr_keys = 2000;
 	long values[nr_cpus];
 	int key, fd, i;
 
@@ -419,6 +441,7 @@ static void test_map_stress(void)
 {
 	run_parallel(100, test_hashmap, NULL);
 	run_parallel(100, test_hashmap_percpu, NULL);
+	run_parallel(100, test_hashmap_sizes, NULL);
 
 	run_parallel(100, test_arraymap, NULL);
 	run_parallel(100, test_arraymap_percpu, NULL);

From c64c0b3cac4c5b8cb093727d2c19743ea3965c0b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 21 Mar 2017 19:22:28 -0700
Subject: [PATCH 1648/1911] ipv4: provide stronger user input validation in
 nl_fib_input()

Alexander reported a KMSAN splat caused by reads of uninitialized
field (tb_id_in) from user provided struct fib_result_nl

It turns out nl_fib_input() sanity tests on user input is a bit
wrong :

User can pretend nlh->nlmsg_len is big enough, but provide
at sendmsg() time a too small buffer.

Reported-by: Alexander Potapenko <glider@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_frontend.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 42bfd08109dd78..8f2133ffc2ff1b 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1083,7 +1083,8 @@ static void nl_fib_input(struct sk_buff *skb)
 
 	net = sock_net(skb->sk);
 	nlh = nlmsg_hdr(skb);
-	if (skb->len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len ||
+	if (skb->len < nlmsg_total_size(sizeof(*frn)) ||
+	    skb->len < nlh->nlmsg_len ||
 	    nlmsg_len(nlh) < sizeof(*frn))
 		return;
 

From a97e50cc4cb67e1e7bff56f6b41cda62ca832336 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 22 Mar 2017 13:08:08 +0100
Subject: [PATCH 1649/1911] socket, bpf: fix sk_filter use after free in
 sk_clone_lock

In sk_clone_lock(), we create a new socket and inherit most of the
parent's members via sock_copy() which memcpy()'s various sections.
Now, in case the parent socket had a BPF socket filter attached,
then newsk->sk_filter points to the same instance as the original
sk->sk_filter.

sk_filter_charge() is then called on the newsk->sk_filter to take a
reference and should that fail due to hitting max optmem, we bail
out and release the newsk instance.

The issue is that commit 278571baca2a ("net: filter: simplify socket
charging") wrongly combined the dismantle path with the failure path
of xfrm_sk_clone_policy(). This means, even when charging failed, we
call sk_free_unlock_clone() on the newsk, which then still points to
the same sk_filter as the original sk.

Thus, sk_free_unlock_clone() calls into __sk_destruct() eventually
where it tests for present sk_filter and calls sk_filter_uncharge()
on it, which potentially lets sk_omem_alloc wrap around and releases
the eBPF prog and sk_filter structure from the (still intact) parent.

Fix it by making sure that when sk_filter_charge() failed, we reset
newsk->sk_filter back to NULL before passing to sk_free_unlock_clone(),
so that we don't mess with the parents sk_filter.

Only if xfrm_sk_clone_policy() fails, we did reach the point where
either the parent's filter was NULL and as a result newsk's as well
or where we previously had a successful sk_filter_charge(), thus for
that case, we do need sk_filter_uncharge() to release the prior taken
reference on sk_filter.

Fixes: 278571baca2a ("net: filter: simplify socket charging")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/net/core/sock.c b/net/core/sock.c
index acb0d413749968..2c4f574168fbdc 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1544,6 +1544,12 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
 			is_charged = sk_filter_charge(newsk, filter);
 
 		if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) {
+			/* We need to make sure that we don't uncharge the new
+			 * socket if we couldn't charge it in the first place
+			 * as otherwise we uncharge the parent's filter.
+			 */
+			if (!is_charged)
+				RCU_INIT_POINTER(newsk->sk_filter, NULL);
 			sk_free_unlock_clone(newsk);
 			newsk = NULL;
 			goto out;

From 1d2a6a5e4bf2921531071fcff8538623dce74efa Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <sgruszka@redhat.com>
Date: Wed, 22 Mar 2017 16:08:33 +0100
Subject: [PATCH 1650/1911] genetlink: fix counting regression on
 ctrl_dumpfamily()

Commit 2ae0f17df1cd ("genetlink: use idr to track families") replaced

	if (++n < fams_to_skip)
		continue;
into:

	if (n++ < fams_to_skip)
		continue;

This subtle change cause that on retry ctrl_dumpfamily() call we omit
one family that failed to do ctrl_fill_info() on previous call, because
cb->args[0] = n number counts also family that failed to do
ctrl_fill_info().

Patch fixes the problem and avoid confusion in the future just decrease
n counter when ctrl_fill_info() fail.

User visible problem caused by this bug is failure to get access to
some genetlink family i.e. nl80211. However problem is reproducible
only if number of registered genetlink families is big enough to
cause second call of ctrl_dumpfamily().

Cc: Xose Vazquez Perez <xose.vazquez@gmail.com>
Cc: Larry Finger <Larry.Finger@lwfinger.net>
Cc: Johannes Berg <johannes@sipsolutions.net>
Fixes: 2ae0f17df1cd ("genetlink: use idr to track families")
Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/genetlink.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index fb6e10fdb21743..92e0981f74040d 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -783,8 +783,10 @@ static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb)
 
 		if (ctrl_fill_info(rt, NETLINK_CB(cb->skb).portid,
 				   cb->nlh->nlmsg_seq, NLM_F_MULTI,
-				   skb, CTRL_CMD_NEWFAMILY) < 0)
+				   skb, CTRL_CMD_NEWFAMILY) < 0) {
+			n--;
 			break;
+		}
 	}
 
 	cb->args[0] = n;

From 15bb7745e94a665caf42bfaabf0ce062845b533b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 22 Mar 2017 08:10:21 -0700
Subject: [PATCH 1651/1911] tcp: initialize icsk_ack.lrcvtime at session start
 time

icsk_ack.lrcvtime has a 0 value at socket creation time.

tcpi_last_data_recv can have bogus value if no payload is ever received.

This patch initializes icsk_ack.lrcvtime for active sessions
in tcp_finish_connect(), and for passive sessions in
tcp_create_openreq_child()

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c     | 2 +-
 net/ipv4/tcp_minisocks.c | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 39c393cc0fd3c1..c43119726a62e4 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5541,6 +5541,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
 	struct inet_connection_sock *icsk = inet_csk(sk);
 
 	tcp_set_state(sk, TCP_ESTABLISHED);
+	icsk->icsk_ack.lrcvtime = tcp_time_stamp;
 
 	if (skb) {
 		icsk->icsk_af_ops->sk_rx_dst_set(sk, skb);
@@ -5759,7 +5760,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 			 * to stand against the temptation 8)     --ANK
 			 */
 			inet_csk_schedule_ack(sk);
-			icsk->icsk_ack.lrcvtime = tcp_time_stamp;
 			tcp_enter_quickack_mode(sk);
 			inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
 						  TCP_DELACK_MAX, TCP_RTO_MAX);
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 7e16243cdb58c8..65c0f3d13eca47 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -460,6 +460,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
 		newtp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
 		minmax_reset(&newtp->rtt_min, tcp_time_stamp, ~0U);
 		newicsk->icsk_rto = TCP_TIMEOUT_INIT;
+		newicsk->icsk_ack.lrcvtime = tcp_time_stamp;
 
 		newtp->packets_out = 0;
 		newtp->retrans_out = 0;

From ec4fbd64751de18729eaa816ec69e4b504b5a7a2 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 22 Mar 2017 08:57:15 -0700
Subject: [PATCH 1652/1911] inet: frag: release spinlock before calling
 icmp_send()

Dmitry reported a lockdep splat [1] (false positive) that we can fix
by releasing the spinlock before calling icmp_send() from ip_expire()

This is a false positive because sending an ICMP message can not
possibly re-enter the IP frag engine.

[1]
[ INFO: possible circular locking dependency detected ]
4.10.0+ #29 Not tainted
-------------------------------------------------------
modprobe/12392 is trying to acquire lock:
 (_xmit_ETHER#2){+.-...}, at: [<ffffffff837a8182>] spin_lock
include/linux/spinlock.h:299 [inline]
 (_xmit_ETHER#2){+.-...}, at: [<ffffffff837a8182>] __netif_tx_lock
include/linux/netdevice.h:3486 [inline]
 (_xmit_ETHER#2){+.-...}, at: [<ffffffff837a8182>]
sch_direct_xmit+0x282/0x6d0 net/sched/sch_generic.c:180

but task is already holding lock:
 (&(&q->lock)->rlock){+.-...}, at: [<ffffffff8389a4d1>] spin_lock
include/linux/spinlock.h:299 [inline]
 (&(&q->lock)->rlock){+.-...}, at: [<ffffffff8389a4d1>]
ip_expire+0x51/0x6c0 net/ipv4/ip_fragment.c:201

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #1 (&(&q->lock)->rlock){+.-...}:
       validate_chain kernel/locking/lockdep.c:2267 [inline]
       __lock_acquire+0x2149/0x3430 kernel/locking/lockdep.c:3340
       lock_acquire+0x2a1/0x630 kernel/locking/lockdep.c:3755
       __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline]
       _raw_spin_lock+0x33/0x50 kernel/locking/spinlock.c:151
       spin_lock include/linux/spinlock.h:299 [inline]
       ip_defrag+0x3a2/0x4130 net/ipv4/ip_fragment.c:669
       ip_check_defrag+0x4e3/0x8b0 net/ipv4/ip_fragment.c:713
       packet_rcv_fanout+0x282/0x800 net/packet/af_packet.c:1459
       deliver_skb net/core/dev.c:1834 [inline]
       dev_queue_xmit_nit+0x294/0xa90 net/core/dev.c:1890
       xmit_one net/core/dev.c:2903 [inline]
       dev_hard_start_xmit+0x16b/0xab0 net/core/dev.c:2923
       sch_direct_xmit+0x31f/0x6d0 net/sched/sch_generic.c:182
       __dev_xmit_skb net/core/dev.c:3092 [inline]
       __dev_queue_xmit+0x13e5/0x1e60 net/core/dev.c:3358
       dev_queue_xmit+0x17/0x20 net/core/dev.c:3423
       neigh_resolve_output+0x6b9/0xb10 net/core/neighbour.c:1308
       neigh_output include/net/neighbour.h:478 [inline]
       ip_finish_output2+0x8b8/0x15a0 net/ipv4/ip_output.c:228
       ip_do_fragment+0x1d93/0x2720 net/ipv4/ip_output.c:672
       ip_fragment.constprop.54+0x145/0x200 net/ipv4/ip_output.c:545
       ip_finish_output+0x82d/0xe10 net/ipv4/ip_output.c:314
       NF_HOOK_COND include/linux/netfilter.h:246 [inline]
       ip_output+0x1f0/0x7a0 net/ipv4/ip_output.c:404
       dst_output include/net/dst.h:486 [inline]
       ip_local_out+0x95/0x170 net/ipv4/ip_output.c:124
       ip_send_skb+0x3c/0xc0 net/ipv4/ip_output.c:1492
       ip_push_pending_frames+0x64/0x80 net/ipv4/ip_output.c:1512
       raw_sendmsg+0x26de/0x3a00 net/ipv4/raw.c:655
       inet_sendmsg+0x164/0x5b0 net/ipv4/af_inet.c:761
       sock_sendmsg_nosec net/socket.c:633 [inline]
       sock_sendmsg+0xca/0x110 net/socket.c:643
       ___sys_sendmsg+0x4a3/0x9f0 net/socket.c:1985
       __sys_sendmmsg+0x25c/0x750 net/socket.c:2075
       SYSC_sendmmsg net/socket.c:2106 [inline]
       SyS_sendmmsg+0x35/0x60 net/socket.c:2101
       do_syscall_64+0x2e8/0x930 arch/x86/entry/common.c:281
       return_from_SYSCALL_64+0x0/0x7a

-> #0 (_xmit_ETHER#2){+.-...}:
       check_prev_add kernel/locking/lockdep.c:1830 [inline]
       check_prevs_add+0xa8f/0x19f0 kernel/locking/lockdep.c:1940
       validate_chain kernel/locking/lockdep.c:2267 [inline]
       __lock_acquire+0x2149/0x3430 kernel/locking/lockdep.c:3340
       lock_acquire+0x2a1/0x630 kernel/locking/lockdep.c:3755
       __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline]
       _raw_spin_lock+0x33/0x50 kernel/locking/spinlock.c:151
       spin_lock include/linux/spinlock.h:299 [inline]
       __netif_tx_lock include/linux/netdevice.h:3486 [inline]
       sch_direct_xmit+0x282/0x6d0 net/sched/sch_generic.c:180
       __dev_xmit_skb net/core/dev.c:3092 [inline]
       __dev_queue_xmit+0x13e5/0x1e60 net/core/dev.c:3358
       dev_queue_xmit+0x17/0x20 net/core/dev.c:3423
       neigh_hh_output include/net/neighbour.h:468 [inline]
       neigh_output include/net/neighbour.h:476 [inline]
       ip_finish_output2+0xf6c/0x15a0 net/ipv4/ip_output.c:228
       ip_finish_output+0xa29/0xe10 net/ipv4/ip_output.c:316
       NF_HOOK_COND include/linux/netfilter.h:246 [inline]
       ip_output+0x1f0/0x7a0 net/ipv4/ip_output.c:404
       dst_output include/net/dst.h:486 [inline]
       ip_local_out+0x95/0x170 net/ipv4/ip_output.c:124
       ip_send_skb+0x3c/0xc0 net/ipv4/ip_output.c:1492
       ip_push_pending_frames+0x64/0x80 net/ipv4/ip_output.c:1512
       icmp_push_reply+0x372/0x4d0 net/ipv4/icmp.c:394
       icmp_send+0x156c/0x1c80 net/ipv4/icmp.c:754
       ip_expire+0x40e/0x6c0 net/ipv4/ip_fragment.c:239
       call_timer_fn+0x241/0x820 kernel/time/timer.c:1268
       expire_timers kernel/time/timer.c:1307 [inline]
       __run_timers+0x960/0xcf0 kernel/time/timer.c:1601
       run_timer_softirq+0x21/0x80 kernel/time/timer.c:1614
       __do_softirq+0x31f/0xbe7 kernel/softirq.c:284
       invoke_softirq kernel/softirq.c:364 [inline]
       irq_exit+0x1cc/0x200 kernel/softirq.c:405
       exiting_irq arch/x86/include/asm/apic.h:657 [inline]
       smp_apic_timer_interrupt+0x76/0xa0 arch/x86/kernel/apic/apic.c:962
       apic_timer_interrupt+0x93/0xa0 arch/x86/entry/entry_64.S:707
       __read_once_size include/linux/compiler.h:254 [inline]
       atomic_read arch/x86/include/asm/atomic.h:26 [inline]
       rcu_dynticks_curr_cpu_in_eqs kernel/rcu/tree.c:350 [inline]
       __rcu_is_watching kernel/rcu/tree.c:1133 [inline]
       rcu_is_watching+0x83/0x110 kernel/rcu/tree.c:1147
       rcu_read_lock_held+0x87/0xc0 kernel/rcu/update.c:293
       radix_tree_deref_slot include/linux/radix-tree.h:238 [inline]
       filemap_map_pages+0x6d4/0x1570 mm/filemap.c:2335
       do_fault_around mm/memory.c:3231 [inline]
       do_read_fault mm/memory.c:3265 [inline]
       do_fault+0xbd5/0x2080 mm/memory.c:3370
       handle_pte_fault mm/memory.c:3600 [inline]
       __handle_mm_fault+0x1062/0x2cb0 mm/memory.c:3714
       handle_mm_fault+0x1e2/0x480 mm/memory.c:3751
       __do_page_fault+0x4f6/0xb60 arch/x86/mm/fault.c:1397
       do_page_fault+0x54/0x70 arch/x86/mm/fault.c:1460
       page_fault+0x28/0x30 arch/x86/entry/entry_64.S:1011

other info that might help us debug this:

 Possible unsafe locking scenario:

       CPU0                    CPU1
       ----                    ----
  lock(&(&q->lock)->rlock);
                               lock(_xmit_ETHER#2);
                               lock(&(&q->lock)->rlock);
  lock(_xmit_ETHER#2);

 *** DEADLOCK ***

10 locks held by modprobe/12392:
 #0:  (&mm->mmap_sem){++++++}, at: [<ffffffff81329758>]
__do_page_fault+0x2b8/0xb60 arch/x86/mm/fault.c:1336
 #1:  (rcu_read_lock){......}, at: [<ffffffff8188cab6>]
filemap_map_pages+0x1e6/0x1570 mm/filemap.c:2324
 #2:  (&(ptlock_ptr(page))->rlock#2){+.+...}, at: [<ffffffff81984a78>]
spin_lock include/linux/spinlock.h:299 [inline]
 #2:  (&(ptlock_ptr(page))->rlock#2){+.+...}, at: [<ffffffff81984a78>]
pte_alloc_one_map mm/memory.c:2944 [inline]
 #2:  (&(ptlock_ptr(page))->rlock#2){+.+...}, at: [<ffffffff81984a78>]
alloc_set_pte+0x13b8/0x1b90 mm/memory.c:3072
 #3:  (((&q->timer))){+.-...}, at: [<ffffffff81627e72>]
lockdep_copy_map include/linux/lockdep.h:175 [inline]
 #3:  (((&q->timer))){+.-...}, at: [<ffffffff81627e72>]
call_timer_fn+0x1c2/0x820 kernel/time/timer.c:1258
 #4:  (&(&q->lock)->rlock){+.-...}, at: [<ffffffff8389a4d1>] spin_lock
include/linux/spinlock.h:299 [inline]
 #4:  (&(&q->lock)->rlock){+.-...}, at: [<ffffffff8389a4d1>]
ip_expire+0x51/0x6c0 net/ipv4/ip_fragment.c:201
 #5:  (rcu_read_lock){......}, at: [<ffffffff8389a633>]
ip_expire+0x1b3/0x6c0 net/ipv4/ip_fragment.c:216
 #6:  (slock-AF_INET){+.-...}, at: [<ffffffff839b3313>] spin_trylock
include/linux/spinlock.h:309 [inline]
 #6:  (slock-AF_INET){+.-...}, at: [<ffffffff839b3313>] icmp_xmit_lock
net/ipv4/icmp.c:219 [inline]
 #6:  (slock-AF_INET){+.-...}, at: [<ffffffff839b3313>]
icmp_send+0x803/0x1c80 net/ipv4/icmp.c:681
 #7:  (rcu_read_lock_bh){......}, at: [<ffffffff838ab9a1>]
ip_finish_output2+0x2c1/0x15a0 net/ipv4/ip_output.c:198
 #8:  (rcu_read_lock_bh){......}, at: [<ffffffff836d1dee>]
__dev_queue_xmit+0x23e/0x1e60 net/core/dev.c:3324
 #9:  (dev->qdisc_running_key ?: &qdisc_running_key){+.....}, at:
[<ffffffff836d3a27>] dev_queue_xmit+0x17/0x20 net/core/dev.c:3423

stack backtrace:
CPU: 0 PID: 12392 Comm: modprobe Not tainted 4.10.0+ #29
Hardware name: Google Google Compute Engine/Google Compute Engine,
BIOS Google 01/01/2011
Call Trace:
 <IRQ>
 __dump_stack lib/dump_stack.c:16 [inline]
 dump_stack+0x2ee/0x3ef lib/dump_stack.c:52
 print_circular_bug+0x307/0x3b0 kernel/locking/lockdep.c:1204
 check_prev_add kernel/locking/lockdep.c:1830 [inline]
 check_prevs_add+0xa8f/0x19f0 kernel/locking/lockdep.c:1940
 validate_chain kernel/locking/lockdep.c:2267 [inline]
 __lock_acquire+0x2149/0x3430 kernel/locking/lockdep.c:3340
 lock_acquire+0x2a1/0x630 kernel/locking/lockdep.c:3755
 __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline]
 _raw_spin_lock+0x33/0x50 kernel/locking/spinlock.c:151
 spin_lock include/linux/spinlock.h:299 [inline]
 __netif_tx_lock include/linux/netdevice.h:3486 [inline]
 sch_direct_xmit+0x282/0x6d0 net/sched/sch_generic.c:180
 __dev_xmit_skb net/core/dev.c:3092 [inline]
 __dev_queue_xmit+0x13e5/0x1e60 net/core/dev.c:3358
 dev_queue_xmit+0x17/0x20 net/core/dev.c:3423
 neigh_hh_output include/net/neighbour.h:468 [inline]
 neigh_output include/net/neighbour.h:476 [inline]
 ip_finish_output2+0xf6c/0x15a0 net/ipv4/ip_output.c:228
 ip_finish_output+0xa29/0xe10 net/ipv4/ip_output.c:316
 NF_HOOK_COND include/linux/netfilter.h:246 [inline]
 ip_output+0x1f0/0x7a0 net/ipv4/ip_output.c:404
 dst_output include/net/dst.h:486 [inline]
 ip_local_out+0x95/0x170 net/ipv4/ip_output.c:124
 ip_send_skb+0x3c/0xc0 net/ipv4/ip_output.c:1492
 ip_push_pending_frames+0x64/0x80 net/ipv4/ip_output.c:1512
 icmp_push_reply+0x372/0x4d0 net/ipv4/icmp.c:394
 icmp_send+0x156c/0x1c80 net/ipv4/icmp.c:754
 ip_expire+0x40e/0x6c0 net/ipv4/ip_fragment.c:239
 call_timer_fn+0x241/0x820 kernel/time/timer.c:1268
 expire_timers kernel/time/timer.c:1307 [inline]
 __run_timers+0x960/0xcf0 kernel/time/timer.c:1601
 run_timer_softirq+0x21/0x80 kernel/time/timer.c:1614
 __do_softirq+0x31f/0xbe7 kernel/softirq.c:284
 invoke_softirq kernel/softirq.c:364 [inline]
 irq_exit+0x1cc/0x200 kernel/softirq.c:405
 exiting_irq arch/x86/include/asm/apic.h:657 [inline]
 smp_apic_timer_interrupt+0x76/0xa0 arch/x86/kernel/apic/apic.c:962
 apic_timer_interrupt+0x93/0xa0 arch/x86/entry/entry_64.S:707
RIP: 0010:__read_once_size include/linux/compiler.h:254 [inline]
RIP: 0010:atomic_read arch/x86/include/asm/atomic.h:26 [inline]
RIP: 0010:rcu_dynticks_curr_cpu_in_eqs kernel/rcu/tree.c:350 [inline]
RIP: 0010:__rcu_is_watching kernel/rcu/tree.c:1133 [inline]
RIP: 0010:rcu_is_watching+0x83/0x110 kernel/rcu/tree.c:1147
RSP: 0000:ffff8801c391f120 EFLAGS: 00000a03 ORIG_RAX: ffffffffffffff10
RAX: dffffc0000000000 RBX: ffff8801c391f148 RCX: 0000000000000000
RDX: 0000000000000000 RSI: 000055edd4374000 RDI: ffff8801dbe1ae0c
RBP: ffff8801c391f1a0 R08: 0000000000000002 R09: 0000000000000000
R10: dffffc0000000000 R11: 0000000000000002 R12: 1ffff10038723e25
R13: ffff8801dbe1ae00 R14: ffff8801c391f680 R15: dffffc0000000000
 </IRQ>
 rcu_read_lock_held+0x87/0xc0 kernel/rcu/update.c:293
 radix_tree_deref_slot include/linux/radix-tree.h:238 [inline]
 filemap_map_pages+0x6d4/0x1570 mm/filemap.c:2335
 do_fault_around mm/memory.c:3231 [inline]
 do_read_fault mm/memory.c:3265 [inline]
 do_fault+0xbd5/0x2080 mm/memory.c:3370
 handle_pte_fault mm/memory.c:3600 [inline]
 __handle_mm_fault+0x1062/0x2cb0 mm/memory.c:3714
 handle_mm_fault+0x1e2/0x480 mm/memory.c:3751
 __do_page_fault+0x4f6/0xb60 arch/x86/mm/fault.c:1397
 do_page_fault+0x54/0x70 arch/x86/mm/fault.c:1460
 page_fault+0x28/0x30 arch/x86/entry/entry_64.S:1011
RIP: 0033:0x7f83172f2786
RSP: 002b:00007fffe859ae80 EFLAGS: 00010293
RAX: 000055edd4373040 RBX: 00007f83175111c8 RCX: 000055edd4373238
RDX: 0000000000000000 RSI: 0000000000000000 RDI: 00007f8317510970
RBP: 00007fffe859afd0 R08: 0000000000000009 R09: 0000000000000000
R10: 0000000000000064 R11: 0000000000000000 R12: 000055edd4373040
R13: 0000000000000000 R14: 00007fffe859afe8 R15: 0000000000000000

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_fragment.c | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index bbe7f72db9c157..b3cdeec85f1f2c 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -198,6 +198,7 @@ static void ip_expire(unsigned long arg)
 	qp = container_of((struct inet_frag_queue *) arg, struct ipq, q);
 	net = container_of(qp->q.net, struct net, ipv4.frags);
 
+	rcu_read_lock();
 	spin_lock(&qp->q.lock);
 
 	if (qp->q.flags & INET_FRAG_COMPLETE)
@@ -207,7 +208,7 @@ static void ip_expire(unsigned long arg)
 	__IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
 
 	if (!inet_frag_evicting(&qp->q)) {
-		struct sk_buff *head = qp->q.fragments;
+		struct sk_buff *clone, *head = qp->q.fragments;
 		const struct iphdr *iph;
 		int err;
 
@@ -216,32 +217,40 @@ static void ip_expire(unsigned long arg)
 		if (!(qp->q.flags & INET_FRAG_FIRST_IN) || !qp->q.fragments)
 			goto out;
 
-		rcu_read_lock();
 		head->dev = dev_get_by_index_rcu(net, qp->iif);
 		if (!head->dev)
-			goto out_rcu_unlock;
+			goto out;
+
 
 		/* skb has no dst, perform route lookup again */
 		iph = ip_hdr(head);
 		err = ip_route_input_noref(head, iph->daddr, iph->saddr,
 					   iph->tos, head->dev);
 		if (err)
-			goto out_rcu_unlock;
+			goto out;
 
 		/* Only an end host needs to send an ICMP
 		 * "Fragment Reassembly Timeout" message, per RFC792.
 		 */
 		if (frag_expire_skip_icmp(qp->user) &&
 		    (skb_rtable(head)->rt_type != RTN_LOCAL))
-			goto out_rcu_unlock;
+			goto out;
+
+		clone = skb_clone(head, GFP_ATOMIC);
 
 		/* Send an ICMP "Fragment Reassembly Timeout" message. */
-		icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0);
-out_rcu_unlock:
-		rcu_read_unlock();
+		if (clone) {
+			spin_unlock(&qp->q.lock);
+			icmp_send(clone, ICMP_TIME_EXCEEDED,
+				  ICMP_EXC_FRAGTIME, 0);
+			consume_skb(clone);
+			goto out_rcu_unlock;
+		}
 	}
 out:
 	spin_unlock(&qp->q.lock);
+out_rcu_unlock:
+	rcu_read_unlock();
 	ipq_put(qp);
 }
 

From 6e9e6cc8f4e4f2cd67931510c9f39abf3d9e0d3b Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 20 Mar 2017 15:31:10 -0700
Subject: [PATCH 1653/1911] Bluetooth: btqcomsmd: fix compile-test dependency

compile-testing fails when QCOM_SMD is a loadable module:

drivers/bluetooth/built-in.o: In function `btqcomsmd_send':
btqca.c:(.text+0xa8): undefined reference to `qcom_smd_send'
drivers/bluetooth/built-in.o: In function `btqcomsmd_probe':
btqca.c:(.text+0x3ec): undefined reference to `qcom_wcnss_open_channel'
btqca.c:(.text+0x46c): undefined reference to `qcom_smd_set_drvdata'

This clarifies the dependency to allow compile-testing only when
SMD is completely disabled, otherwise the dependency on QCOM_SMD
will make sure we can link against it.

Fixes: e27ee2b16bad ("Bluetooth: btqcomsmd: Allow driver to build if COMPILE_TEST is enabled")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
[bjorn: Restructure and clarify dependency to QCOM_WCNSS_CTRL]
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Acked-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/bluetooth/Kconfig | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/bluetooth/Kconfig b/drivers/bluetooth/Kconfig
index c2c14a12713b56..08e054507d0bcd 100644
--- a/drivers/bluetooth/Kconfig
+++ b/drivers/bluetooth/Kconfig
@@ -344,7 +344,8 @@ config BT_WILINK
 
 config BT_QCOMSMD
 	tristate "Qualcomm SMD based HCI support"
-	depends on (QCOM_SMD && QCOM_WCNSS_CTRL) || COMPILE_TEST
+	depends on QCOM_SMD || (COMPILE_TEST && QCOM_SMD=n)
+	depends on QCOM_WCNSS_CTRL || (COMPILE_TEST && QCOM_WCNSS_CTRL=n)
 	select BT_QCA
 	help
 	  Qualcomm SMD based HCI driver.

From c04ca616eed02b9abe7afd311382c3ed5eef5c40 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 22 Mar 2017 12:10:02 +0300
Subject: [PATCH 1654/1911] sfc: cleanup a condition in efx_udp_tunnel_del()

Presumably if there is an "add" function, there is also a "del"
function.  But it causes a static checker warning because it looks like
a common cut and paste bug.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Jarod Wilson <jarod@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sfc/efx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index 334bcc6df6b2ba..50d28261b6b9ea 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -2404,7 +2404,7 @@ static void efx_udp_tunnel_del(struct net_device *dev, struct udp_tunnel_info *t
 	tnl.type = (u16)efx_tunnel_type;
 	tnl.port = ti->port;
 
-	if (efx->type->udp_tnl_add_port)
+	if (efx->type->udp_tnl_del_port)
 		(void)efx->type->udp_tnl_del_port(efx, tnl);
 }
 

From f43feef4e6acde10857fcbfdede790d6b3f2c71d Mon Sep 17 00:00:00 2001
From: "Lendacky, Thomas" <Thomas.Lendacky@amd.com>
Date: Wed, 22 Mar 2017 17:25:27 -0500
Subject: [PATCH 1655/1911] amd-xgbe: Fix the ECC-related bit position
 definitions

The ECC bit positions that describe whether the ECC interrupt is for
Tx, Rx or descriptor memory and whether the it is a single correctable
or double detected error were defined in incorrectly (reversed order).
Fix the bit position definitions for these settings so that the proper
ECC handling is performed.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amd/xgbe/xgbe-common.h | 24 ++++++++++-----------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-common.h b/drivers/net/ethernet/amd/xgbe/xgbe-common.h
index 86f1626816ffa6..127adbeefb105c 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-common.h
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-common.h
@@ -984,29 +984,29 @@
 #define XP_ECC_CNT1_DESC_DED_WIDTH		8
 #define XP_ECC_CNT1_DESC_SEC_INDEX		0
 #define XP_ECC_CNT1_DESC_SEC_WIDTH		8
-#define XP_ECC_IER_DESC_DED_INDEX		0
+#define XP_ECC_IER_DESC_DED_INDEX		5
 #define XP_ECC_IER_DESC_DED_WIDTH		1
-#define XP_ECC_IER_DESC_SEC_INDEX		1
+#define XP_ECC_IER_DESC_SEC_INDEX		4
 #define XP_ECC_IER_DESC_SEC_WIDTH		1
-#define XP_ECC_IER_RX_DED_INDEX			2
+#define XP_ECC_IER_RX_DED_INDEX			3
 #define XP_ECC_IER_RX_DED_WIDTH			1
-#define XP_ECC_IER_RX_SEC_INDEX			3
+#define XP_ECC_IER_RX_SEC_INDEX			2
 #define XP_ECC_IER_RX_SEC_WIDTH			1
-#define XP_ECC_IER_TX_DED_INDEX			4
+#define XP_ECC_IER_TX_DED_INDEX			1
 #define XP_ECC_IER_TX_DED_WIDTH			1
-#define XP_ECC_IER_TX_SEC_INDEX			5
+#define XP_ECC_IER_TX_SEC_INDEX			0
 #define XP_ECC_IER_TX_SEC_WIDTH			1
-#define XP_ECC_ISR_DESC_DED_INDEX		0
+#define XP_ECC_ISR_DESC_DED_INDEX		5
 #define XP_ECC_ISR_DESC_DED_WIDTH		1
-#define XP_ECC_ISR_DESC_SEC_INDEX		1
+#define XP_ECC_ISR_DESC_SEC_INDEX		4
 #define XP_ECC_ISR_DESC_SEC_WIDTH		1
-#define XP_ECC_ISR_RX_DED_INDEX			2
+#define XP_ECC_ISR_RX_DED_INDEX			3
 #define XP_ECC_ISR_RX_DED_WIDTH			1
-#define XP_ECC_ISR_RX_SEC_INDEX			3
+#define XP_ECC_ISR_RX_SEC_INDEX			2
 #define XP_ECC_ISR_RX_SEC_WIDTH			1
-#define XP_ECC_ISR_TX_DED_INDEX			4
+#define XP_ECC_ISR_TX_DED_INDEX			1
 #define XP_ECC_ISR_TX_DED_WIDTH			1
-#define XP_ECC_ISR_TX_SEC_INDEX			5
+#define XP_ECC_ISR_TX_SEC_INDEX			0
 #define XP_ECC_ISR_TX_SEC_WIDTH			1
 #define XP_I2C_MUTEX_BUSY_INDEX			31
 #define XP_I2C_MUTEX_BUSY_WIDTH			1

From 68c386590375b2aea5a3154f17882a30170707bf Mon Sep 17 00:00:00 2001
From: Pavel Belous <pavel.belous@aquantia.com>
Date: Thu, 23 Mar 2017 02:20:39 +0300
Subject: [PATCH 1656/1911] net:ethernet:aquantia: Fix for RX checksum offload.

Since AQC-100/107/108 chips supports hardware checksums for RX we should indicate this
via NETIF_F_RXCSUM flag.

v1->v2: 'Signed-off-by' tag added.

Signed-off-by: Pavel Belous <pavel.belous@aquantia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0_internal.h   | 1 +
 .../net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h   | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0_internal.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0_internal.h
index 1093ea18823a32..0592a0330cf0d6 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0_internal.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0_internal.h
@@ -137,6 +137,7 @@ static struct aq_hw_caps_s hw_atl_a0_hw_caps_ = {
 	.tx_rings = HW_ATL_A0_TX_RINGS,
 	.rx_rings = HW_ATL_A0_RX_RINGS,
 	.hw_features = NETIF_F_HW_CSUM |
+			NETIF_F_RXCSUM |
 			NETIF_F_RXHASH |
 			NETIF_F_SG |
 			NETIF_F_TSO,
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h
index 8bdee3ddd5a0bd..f3957e9303405c 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h
@@ -188,6 +188,7 @@ static struct aq_hw_caps_s hw_atl_b0_hw_caps_ = {
 	.tx_rings = HW_ATL_B0_TX_RINGS,
 	.rx_rings = HW_ATL_B0_RX_RINGS,
 	.hw_features = NETIF_F_HW_CSUM |
+			NETIF_F_RXCSUM |
 			NETIF_F_RXHASH |
 			NETIF_F_SG |
 			NETIF_F_TSO |

From 71fdb70eb48784c1f28cdf2e67c4c587dd7f2594 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 13 Mar 2017 13:46:21 +0100
Subject: [PATCH 1657/1911] sched/clock: Fix clear_sched_clock_stable() preempt
 wobbly

Paul reported a problems with clear_sched_clock_stable(). Since we run
all of __clear_sched_clock_stable() from workqueue context, there's a
preempt problem.

Solve it by only running the static_key_disable() from workqueue.

Reported-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: fweisbec@gmail.com
Link: http://lkml.kernel.org/r/20170313124621.GA3328@twins.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/clock.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c
index a08795e216283f..fec0f58c8deeac 100644
--- a/kernel/sched/clock.c
+++ b/kernel/sched/clock.c
@@ -141,7 +141,14 @@ static void __set_sched_clock_stable(void)
 	tick_dep_clear(TICK_DEP_BIT_CLOCK_UNSTABLE);
 }
 
-static void __clear_sched_clock_stable(struct work_struct *work)
+static void __sched_clock_work(struct work_struct *work)
+{
+	static_branch_disable(&__sched_clock_stable);
+}
+
+static DECLARE_WORK(sched_clock_work, __sched_clock_work);
+
+static void __clear_sched_clock_stable(void)
 {
 	struct sched_clock_data *scd = this_scd();
 
@@ -160,11 +167,11 @@ static void __clear_sched_clock_stable(struct work_struct *work)
 			scd->tick_gtod, gtod_offset,
 			scd->tick_raw,  raw_offset);
 
-	static_branch_disable(&__sched_clock_stable);
 	tick_dep_set(TICK_DEP_BIT_CLOCK_UNSTABLE);
-}
 
-static DECLARE_WORK(sched_clock_work, __clear_sched_clock_stable);
+	if (sched_clock_stable())
+		schedule_work(&sched_clock_work);
+}
 
 void clear_sched_clock_stable(void)
 {
@@ -173,7 +180,7 @@ void clear_sched_clock_stable(void)
 	smp_mb(); /* matches sched_clock_init_late() */
 
 	if (sched_clock_running == 2)
-		schedule_work(&sched_clock_work);
+		__clear_sched_clock_stable();
 }
 
 void sched_clock_init_late(void)

From 698eff6355f735d46d1b7113df8b422874cd7988 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 17 Mar 2017 12:48:18 +0100
Subject: [PATCH 1658/1911] sched/clock, x86/perf: Fix "perf test tsc"

People reported that commit:

  5680d8094ffa ("sched/clock: Provide better clock continuity")

broke "perf test tsc".

That commit added another offset to the reported clock value; so
take that into account when computing the provided offset values.

Reported-by: Adrian Hunter <adrian.hunter@intel.com>
Reported-by: Arnaldo Carvalho de Melo <acme@kernel.org>
Tested-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 5680d8094ffa ("sched/clock: Provide better clock continuity")
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/core.c       |  9 ++++++---
 arch/x86/include/asm/timer.h |  2 ++
 arch/x86/kernel/tsc.c        |  4 ++--
 include/linux/sched/clock.h  | 13 +++++++------
 kernel/sched/clock.c         | 22 +++++++++++-----------
 5 files changed, 28 insertions(+), 22 deletions(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 2aa1ad194db21a..580b60f5ac83ce 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2256,6 +2256,7 @@ void arch_perf_update_userpage(struct perf_event *event,
 			       struct perf_event_mmap_page *userpg, u64 now)
 {
 	struct cyc2ns_data *data;
+	u64 offset;
 
 	userpg->cap_user_time = 0;
 	userpg->cap_user_time_zero = 0;
@@ -2263,11 +2264,13 @@ void arch_perf_update_userpage(struct perf_event *event,
 		!!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED);
 	userpg->pmc_width = x86_pmu.cntval_bits;
 
-	if (!sched_clock_stable())
+	if (!using_native_sched_clock() || !sched_clock_stable())
 		return;
 
 	data = cyc2ns_read_begin();
 
+	offset = data->cyc2ns_offset + __sched_clock_offset;
+
 	/*
 	 * Internal timekeeping for enabled/running/stopped times
 	 * is always in the local_clock domain.
@@ -2275,7 +2278,7 @@ void arch_perf_update_userpage(struct perf_event *event,
 	userpg->cap_user_time = 1;
 	userpg->time_mult = data->cyc2ns_mul;
 	userpg->time_shift = data->cyc2ns_shift;
-	userpg->time_offset = data->cyc2ns_offset - now;
+	userpg->time_offset = offset - now;
 
 	/*
 	 * cap_user_time_zero doesn't make sense when we're using a different
@@ -2283,7 +2286,7 @@ void arch_perf_update_userpage(struct perf_event *event,
 	 */
 	if (!event->attr.use_clockid) {
 		userpg->cap_user_time_zero = 1;
-		userpg->time_zero = data->cyc2ns_offset;
+		userpg->time_zero = offset;
 	}
 
 	cyc2ns_read_end(data);
diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h
index a04eabd43d0662..27e9f9d769b892 100644
--- a/arch/x86/include/asm/timer.h
+++ b/arch/x86/include/asm/timer.h
@@ -12,6 +12,8 @@ extern int recalibrate_cpu_khz(void);
 
 extern int no_timer_check;
 
+extern bool using_native_sched_clock(void);
+
 /*
  * We use the full linear equation: f(x) = a + b*x, in order to allow
  * a continuous function in the face of dynamic freq changes.
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index c73a7f9e881aa2..714dfba6a1e713 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -328,7 +328,7 @@ unsigned long long sched_clock(void)
 	return paravirt_sched_clock();
 }
 
-static inline bool using_native_sched_clock(void)
+bool using_native_sched_clock(void)
 {
 	return pv_time_ops.sched_clock == native_sched_clock;
 }
@@ -336,7 +336,7 @@ static inline bool using_native_sched_clock(void)
 unsigned long long
 sched_clock(void) __attribute__((alias("native_sched_clock")));
 
-static inline bool using_native_sched_clock(void) { return true; }
+bool using_native_sched_clock(void) { return true; }
 #endif
 
 int check_tsc_unstable(void)
diff --git a/include/linux/sched/clock.h b/include/linux/sched/clock.h
index 4a68c67912078e..34fe92ce1ebd7c 100644
--- a/include/linux/sched/clock.h
+++ b/include/linux/sched/clock.h
@@ -54,15 +54,16 @@ static inline u64 local_clock(void)
 }
 #else
 extern void sched_clock_init_late(void);
-/*
- * Architectures can set this to 1 if they have specified
- * CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig,
- * but then during bootup it turns out that sched_clock()
- * is reliable after all:
- */
 extern int sched_clock_stable(void);
 extern void clear_sched_clock_stable(void);
 
+/*
+ * When sched_clock_stable(), __sched_clock_offset provides the offset
+ * between local_clock() and sched_clock().
+ */
+extern u64 __sched_clock_offset;
+
+
 extern void sched_clock_tick(void);
 extern void sched_clock_idle_sleep_event(void);
 extern void sched_clock_idle_wakeup_event(u64 delta_ns);
diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c
index fec0f58c8deeac..24a3e01bf8cbc7 100644
--- a/kernel/sched/clock.c
+++ b/kernel/sched/clock.c
@@ -96,10 +96,10 @@ static DEFINE_STATIC_KEY_FALSE(__sched_clock_stable);
 static int __sched_clock_stable_early = 1;
 
 /*
- * We want: ktime_get_ns() + gtod_offset == sched_clock() + raw_offset
+ * We want: ktime_get_ns() + __gtod_offset == sched_clock() + __sched_clock_offset
  */
-static __read_mostly u64 raw_offset;
-static __read_mostly u64 gtod_offset;
+__read_mostly u64 __sched_clock_offset;
+static __read_mostly u64 __gtod_offset;
 
 struct sched_clock_data {
 	u64			tick_raw;
@@ -131,11 +131,11 @@ static void __set_sched_clock_stable(void)
 	/*
 	 * Attempt to make the (initial) unstable->stable transition continuous.
 	 */
-	raw_offset = (scd->tick_gtod + gtod_offset) - (scd->tick_raw);
+	__sched_clock_offset = (scd->tick_gtod + __gtod_offset) - (scd->tick_raw);
 
 	printk(KERN_INFO "sched_clock: Marking stable (%lld, %lld)->(%lld, %lld)\n",
-			scd->tick_gtod, gtod_offset,
-			scd->tick_raw,  raw_offset);
+			scd->tick_gtod, __gtod_offset,
+			scd->tick_raw,  __sched_clock_offset);
 
 	static_branch_enable(&__sched_clock_stable);
 	tick_dep_clear(TICK_DEP_BIT_CLOCK_UNSTABLE);
@@ -161,11 +161,11 @@ static void __clear_sched_clock_stable(void)
 	 *
 	 * Still do what we can.
 	 */
-	gtod_offset = (scd->tick_raw + raw_offset) - (scd->tick_gtod);
+	__gtod_offset = (scd->tick_raw + __sched_clock_offset) - (scd->tick_gtod);
 
 	printk(KERN_INFO "sched_clock: Marking unstable (%lld, %lld)<-(%lld, %lld)\n",
-			scd->tick_gtod, gtod_offset,
-			scd->tick_raw,  raw_offset);
+			scd->tick_gtod, __gtod_offset,
+			scd->tick_raw,  __sched_clock_offset);
 
 	tick_dep_set(TICK_DEP_BIT_CLOCK_UNSTABLE);
 
@@ -238,7 +238,7 @@ static u64 sched_clock_local(struct sched_clock_data *scd)
 	 *		      scd->tick_gtod + TICK_NSEC);
 	 */
 
-	clock = scd->tick_gtod + gtod_offset + delta;
+	clock = scd->tick_gtod + __gtod_offset + delta;
 	min_clock = wrap_max(scd->tick_gtod, old_clock);
 	max_clock = wrap_max(old_clock, scd->tick_gtod + TICK_NSEC);
 
@@ -324,7 +324,7 @@ u64 sched_clock_cpu(int cpu)
 	u64 clock;
 
 	if (sched_clock_stable())
-		return sched_clock() + raw_offset;
+		return sched_clock() + __sched_clock_offset;
 
 	if (unlikely(!sched_clock_running))
 		return 0ull;

From e2ebfb2142acefecc2496e71360f50d25726040b Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Mon, 20 Mar 2017 19:50:29 +0200
Subject: [PATCH 1659/1911] mmc: sdhci: Do not disable interrupts while waiting
 for clock

Disabling interrupts for even a millisecond can cause problems for some
devices. That can happen when sdhci changes clock frequency because it
waits for the clock to become stable under a spin lock.

The spin lock is not necessary here. Anything that is racing with changes
to the I/O state is already broken. The mmc core already provides
synchronization via "claiming" the host.

Although the spin lock probably should be removed from the code paths that
lead to this point, such a patch would touch too much code to be suitable
for stable trees. Consequently, for this patch, just drop the spin lock
while waiting.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: stable@vger.kernel.org
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Tested-by: Ludovic Desroches <ludovic.desroches@microchip.com>
---
 drivers/mmc/host/sdhci.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 6fdd7a70f229b8..9c1a099afbbeca 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -1362,7 +1362,9 @@ void sdhci_enable_clk(struct sdhci_host *host, u16 clk)
 			return;
 		}
 		timeout--;
-		mdelay(1);
+		spin_unlock_irq(&host->lock);
+		usleep_range(900, 1100);
+		spin_lock_irq(&host->lock);
 	}
 
 	clk |= SDHCI_CLOCK_CARD_EN;

From 027fb89e61054b4aedd962adb3e2003dec78a716 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Mon, 20 Mar 2017 19:50:30 +0200
Subject: [PATCH 1660/1911] mmc: sdhci-pci: Do not disable interrupts in
 sdhci_intel_set_power

Disabling interrupts for even a millisecond can cause problems for some
devices. That can happen when Intel host controllers wait for the present
state to propagate.

The spin lock is not necessary here. Anything that is racing with changes
to the I/O state is already broken. The mmc core already provides
synchronization via "claiming" the host.

Although the spin lock probably should be removed from the code paths that
lead to this point, such a patch would touch too much code to be suitable
for stable trees. Consequently, for this patch, just drop the spin lock
while waiting.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: stable@vger.kernel.org # v4.9+
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Tested-by: Ludovic Desroches <ludovic.desroches@microchip.com>
---
 drivers/mmc/host/sdhci-pci-core.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c
index 982b3e34942614..86560d590786f3 100644
--- a/drivers/mmc/host/sdhci-pci-core.c
+++ b/drivers/mmc/host/sdhci-pci-core.c
@@ -451,6 +451,8 @@ static void sdhci_intel_set_power(struct sdhci_host *host, unsigned char mode,
 	if (mode == MMC_POWER_OFF)
 		return;
 
+	spin_unlock_irq(&host->lock);
+
 	/*
 	 * Bus power might not enable after D3 -> D0 transition due to the
 	 * present state not yet having propagated. Retry for up to 2ms.
@@ -463,6 +465,8 @@ static void sdhci_intel_set_power(struct sdhci_host *host, unsigned char mode,
 		reg |= SDHCI_POWER_ON;
 		sdhci_writeb(host, reg, SDHCI_POWER_CONTROL);
 	}
+
+	spin_lock_irq(&host->lock);
 }
 
 static const struct sdhci_ops sdhci_intel_byt_ops = {

From 3f307834e695f59dac4337a40316bdecfb9d0508 Mon Sep 17 00:00:00 2001
From: Hui Wang <hui.wang@canonical.com>
Date: Thu, 23 Mar 2017 10:00:25 +0800
Subject: [PATCH 1661/1911] ALSA: hda - Adding a group of pin definition to fix
 headset problem

A new Dell laptop needs to apply ALC269_FIXUP_DELL1_MIC_NO_PRESENCE to
fix the headset problem, and the pin definiton of this machine is not
in the pin quirk table yet, now adding it to the table.

Signed-off-by: Hui Wang <hui.wang@canonical.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 8d6b3703d0a291..7f989898cbd9aa 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -6102,6 +6102,8 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
 		ALC295_STANDARD_PINS,
 		{0x17, 0x21014040},
 		{0x18, 0x21a19050}),
+	SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE,
+		ALC295_STANDARD_PINS),
 	SND_HDA_PIN_QUIRK(0x10ec0298, 0x1028, "Dell", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE,
 		ALC298_STANDARD_PINS,
 		{0x17, 0x90170110}),

From 6e790555b6cae4da9c578376ff500861337fcfc1 Mon Sep 17 00:00:00 2001
From: Peter Robinson <pbrobinson@gmail.com>
Date: Thu, 2 Mar 2017 17:46:36 +0000
Subject: [PATCH 1662/1911] pinctrl: ti: The IODelay driver is a DRA7xxx
 feature so depend on that SoC

As the IODelay driver is a hardware feature of the  DRA7xxx SoC
depend on that SoC and compile test.

Signed-off-by: Peter Robinson <pbrobinson@gmail.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/ti/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pinctrl/ti/Kconfig b/drivers/pinctrl/ti/Kconfig
index 815a88673d3819..542077069391b9 100644
--- a/drivers/pinctrl/ti/Kconfig
+++ b/drivers/pinctrl/ti/Kconfig
@@ -1,6 +1,6 @@
 config PINCTRL_TI_IODELAY
 	tristate "TI IODelay Module pinconf driver"
-	depends on OF
+	depends on OF && (SOC_DRA7XX || COMPILE_TEST)
 	select GENERIC_PINCTRL_GROUPS
 	select GENERIC_PINMUX_FUNCTIONS
 	select GENERIC_PINCONF

From 08a7f260c88f8d52be52ff2113119b3464b69874 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Tue, 7 Mar 2017 14:31:00 +0100
Subject: [PATCH 1663/1911] pinctrl: meson-gxbb: Fix typo in i2c ao groups

Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/meson/pinctrl-meson-gxbb.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/pinctrl/meson/pinctrl-meson-gxbb.c b/drivers/pinctrl/meson/pinctrl-meson-gxbb.c
index 7671424d46cbe0..31a3a98d067caa 100644
--- a/drivers/pinctrl/meson/pinctrl-meson-gxbb.c
+++ b/drivers/pinctrl/meson/pinctrl-meson-gxbb.c
@@ -667,11 +667,11 @@ static const char * const uart_ao_b_groups[] = {
 };
 
 static const char * const i2c_ao_groups[] = {
-	"i2c_sdk_ao", "i2c_sda_ao",
+	"i2c_sck_ao", "i2c_sda_ao",
 };
 
 static const char * const i2c_slave_ao_groups[] = {
-	"i2c_slave_sdk_ao", "i2c_slave_sda_ao",
+	"i2c_slave_sck_ao", "i2c_slave_sda_ao",
 };
 
 static const char * const remote_input_ao_groups[] = {

From 59f34e80c4910bc69ca643a2bb93fcce51366d83 Mon Sep 17 00:00:00 2001
From: Andrzej Hajda <a.hajda@samsung.com>
Date: Thu, 23 Feb 2017 11:12:40 +0100
Subject: [PATCH 1664/1911] pinctrl: samsung: Fix memory mapping code

Some pinctrls share memory regions, and devm_ioremap_resource does not
allow to share resources, in opposition to devm_ioremap.
This patch restores back usage of devm_ioremap function, but with proper
error handling and logging.

Fixes: baafaca ("pinctrl: samsung: Fix return value check in samsung_pinctrl_get_soc_data()")
Signed-off-by: Andrzej Hajda <a.hajda@samsung.com>
Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
Reviewed-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/samsung/pinctrl-samsung.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/pinctrl/samsung/pinctrl-samsung.c b/drivers/pinctrl/samsung/pinctrl-samsung.c
index f9ddba7decc185..d7aa22cff480ed 100644
--- a/drivers/pinctrl/samsung/pinctrl-samsung.c
+++ b/drivers/pinctrl/samsung/pinctrl-samsung.c
@@ -988,9 +988,16 @@ samsung_pinctrl_get_soc_data(struct samsung_pinctrl_drv_data *d,
 
 	for (i = 0; i < ctrl->nr_ext_resources + 1; i++) {
 		res = platform_get_resource(pdev, IORESOURCE_MEM, i);
-		virt_base[i] = devm_ioremap_resource(&pdev->dev, res);
-		if (IS_ERR(virt_base[i]))
-			return ERR_CAST(virt_base[i]);
+		if (!res) {
+			dev_err(&pdev->dev, "failed to get mem%d resource\n", i);
+			return ERR_PTR(-EINVAL);
+		}
+		virt_base[i] = devm_ioremap(&pdev->dev, res->start,
+						resource_size(res));
+		if (!virt_base[i]) {
+			dev_err(&pdev->dev, "failed to ioremap %pR\n", res);
+			return ERR_PTR(-EIO);
+		}
 	}
 
 	bank = d->pin_banks;

From a6566710adaa4a7dd5e0d99820ff9c9c30ee5951 Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <bjorn.andersson@linaro.org>
Date: Tue, 14 Mar 2017 08:23:26 -0700
Subject: [PATCH 1665/1911] pinctrl: qcom: Don't clear status bit on irq_unmask

Clearing the status bit on irq_unmask will discard any pending interrupt
that did arrive after the irq_ack, i.e. while the IRQ handler function
was executing.

Fixes: f365be092572 ("pinctrl: Add Qualcomm TLMM driver")
Cc: stable@vger.kernel.org
Cc: Stephen Boyd <sboyd@codeaurora.org>
Reported-by: Timur Tabi <timur@codeaurora.org>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/qcom/pinctrl-msm.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/pinctrl/qcom/pinctrl-msm.c b/drivers/pinctrl/qcom/pinctrl-msm.c
index c978be5eb9ebe3..273badd925611a 100644
--- a/drivers/pinctrl/qcom/pinctrl-msm.c
+++ b/drivers/pinctrl/qcom/pinctrl-msm.c
@@ -609,10 +609,6 @@ static void msm_gpio_irq_unmask(struct irq_data *d)
 
 	raw_spin_lock_irqsave(&pctrl->lock, flags);
 
-	val = readl(pctrl->regs + g->intr_status_reg);
-	val &= ~BIT(g->intr_status_bit);
-	writel(val, pctrl->regs + g->intr_status_reg);
-
 	val = readl(pctrl->regs + g->intr_cfg_reg);
 	val |= BIT(g->intr_enable_bit);
 	writel(val, pctrl->regs + g->intr_cfg_reg);

From e855fa9a65c40788b5069abb0d094537daa22e05 Mon Sep 17 00:00:00 2001
From: Patrice Chotard <patrice.chotard@st.com>
Date: Thu, 16 Mar 2017 18:26:02 +0100
Subject: [PATCH 1666/1911] pinctrl: st: add irq_request/release_resources
 callbacks

When using GPIO as IRQ source, the GPIO must be configured
in INPUT. Callbacks dedicated for this was missing in
pinctrl-st driver.

This fix the following kernel error when trying to lock a gpio
as IRQ:

[    7.521095] gpio gpiochip7: (PIO11): gpiochip_lock_as_irq: tried to flag a GPIO set as output for IRQ
[    7.526018] gpio gpiochip7: (PIO11): unable to lock HW IRQ 6 for IRQ
[    7.529405] genirq: Failed to request resources for 0-0053 (irq 81) on irqchip GPIO

Signed-off-by: Patrice Chotard <patrice.chotard@st.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/pinctrl-st.c | 30 ++++++++++++++++++++++++------
 1 file changed, 24 insertions(+), 6 deletions(-)

diff --git a/drivers/pinctrl/pinctrl-st.c b/drivers/pinctrl/pinctrl-st.c
index 676efcc032d261..3ae8066bc1279c 100644
--- a/drivers/pinctrl/pinctrl-st.c
+++ b/drivers/pinctrl/pinctrl-st.c
@@ -1285,6 +1285,22 @@ static void st_gpio_irq_unmask(struct irq_data *d)
 	writel(BIT(d->hwirq), bank->base + REG_PIO_SET_PMASK);
 }
 
+static int st_gpio_irq_request_resources(struct irq_data *d)
+{
+	struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+
+	st_gpio_direction_input(gc, d->hwirq);
+
+	return gpiochip_lock_as_irq(gc, d->hwirq);
+}
+
+static void st_gpio_irq_release_resources(struct irq_data *d)
+{
+	struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+
+	gpiochip_unlock_as_irq(gc, d->hwirq);
+}
+
 static int st_gpio_irq_set_type(struct irq_data *d, unsigned type)
 {
 	struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
@@ -1438,12 +1454,14 @@ static struct gpio_chip st_gpio_template = {
 };
 
 static struct irq_chip st_gpio_irqchip = {
-	.name		= "GPIO",
-	.irq_disable	= st_gpio_irq_mask,
-	.irq_mask	= st_gpio_irq_mask,
-	.irq_unmask	= st_gpio_irq_unmask,
-	.irq_set_type	= st_gpio_irq_set_type,
-	.flags		= IRQCHIP_SKIP_SET_WAKE,
+	.name			= "GPIO",
+	.irq_request_resources	= st_gpio_irq_request_resources,
+	.irq_release_resources	= st_gpio_irq_release_resources,
+	.irq_disable		= st_gpio_irq_mask,
+	.irq_mask		= st_gpio_irq_mask,
+	.irq_unmask		= st_gpio_irq_unmask,
+	.irq_set_type		= st_gpio_irq_set_type,
+	.flags			= IRQCHIP_SKIP_SET_WAKE,
 };
 
 static int st_gpiolib_register_bank(struct st_pinctrl *info,

From d7402de48efae57bbb0072e53d3800c30de57ea5 Mon Sep 17 00:00:00 2001
From: Christian Lamparter <chunkeey@googlemail.com>
Date: Thu, 16 Mar 2017 21:36:07 +0100
Subject: [PATCH 1667/1911] pinctrl: qcom: ipq4019: add missing pingroups for
 pins > 70

This patch adds the missing PINGROUP for GPIO70-99.
This fixes a crash that happens in pinctrl-msm, if any
of the GPIO70-99 are accessed.

Fixes: 5303f7827fcd41d ("pinctrl: qcom: ipq4019: set ngpios to correct value")
Signed-off-by: Christian Lamparter <chunkeey@googlemail.com>
Acked-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/qcom/pinctrl-ipq4019.c | 30 ++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/drivers/pinctrl/qcom/pinctrl-ipq4019.c b/drivers/pinctrl/qcom/pinctrl-ipq4019.c
index b68ae424cee247..743d1f458205fa 100644
--- a/drivers/pinctrl/qcom/pinctrl-ipq4019.c
+++ b/drivers/pinctrl/qcom/pinctrl-ipq4019.c
@@ -405,6 +405,36 @@ static const struct msm_pingroup ipq4019_groups[] = {
 	PINGROUP(67, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
 	PINGROUP(68, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
 	PINGROUP(69, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+	PINGROUP(70, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+	PINGROUP(71, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+	PINGROUP(72, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+	PINGROUP(73, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+	PINGROUP(74, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+	PINGROUP(75, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+	PINGROUP(76, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+	PINGROUP(77, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+	PINGROUP(78, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+	PINGROUP(79, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+	PINGROUP(80, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+	PINGROUP(81, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+	PINGROUP(82, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+	PINGROUP(83, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+	PINGROUP(84, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+	PINGROUP(85, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+	PINGROUP(86, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+	PINGROUP(87, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+	PINGROUP(88, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+	PINGROUP(89, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+	PINGROUP(90, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+	PINGROUP(91, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+	PINGROUP(92, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+	PINGROUP(93, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+	PINGROUP(94, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+	PINGROUP(95, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+	PINGROUP(96, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+	PINGROUP(97, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+	PINGROUP(98, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
+	PINGROUP(99, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
 };
 
 static const struct msm_pinctrl_soc_data ipq4019_pinctrl = {

From 633ee407b9d15a75ac9740ba9d3338815e1fcb95 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Tue, 21 Mar 2017 13:44:28 +0100
Subject: [PATCH 1668/1911] libceph: force GFP_NOIO for socket allocations

sock_alloc_inode() allocates socket+inode and socket_wq with
GFP_KERNEL, which is not allowed on the writeback path:

    Workqueue: ceph-msgr con_work [libceph]
    ffff8810871cb018 0000000000000046 0000000000000000 ffff881085d40000
    0000000000012b00 ffff881025cad428 ffff8810871cbfd8 0000000000012b00
    ffff880102fc1000 ffff881085d40000 ffff8810871cb038 ffff8810871cb148
    Call Trace:
    [<ffffffff816dd629>] schedule+0x29/0x70
    [<ffffffff816e066d>] schedule_timeout+0x1bd/0x200
    [<ffffffff81093ffc>] ? ttwu_do_wakeup+0x2c/0x120
    [<ffffffff81094266>] ? ttwu_do_activate.constprop.135+0x66/0x70
    [<ffffffff816deb5f>] wait_for_completion+0xbf/0x180
    [<ffffffff81097cd0>] ? try_to_wake_up+0x390/0x390
    [<ffffffff81086335>] flush_work+0x165/0x250
    [<ffffffff81082940>] ? worker_detach_from_pool+0xd0/0xd0
    [<ffffffffa03b65b1>] xlog_cil_force_lsn+0x81/0x200 [xfs]
    [<ffffffff816d6b42>] ? __slab_free+0xee/0x234
    [<ffffffffa03b4b1d>] _xfs_log_force_lsn+0x4d/0x2c0 [xfs]
    [<ffffffff811adc1e>] ? lookup_page_cgroup_used+0xe/0x30
    [<ffffffffa039a723>] ? xfs_reclaim_inode+0xa3/0x330 [xfs]
    [<ffffffffa03b4dcf>] xfs_log_force_lsn+0x3f/0xf0 [xfs]
    [<ffffffffa039a723>] ? xfs_reclaim_inode+0xa3/0x330 [xfs]
    [<ffffffffa03a62c6>] xfs_iunpin_wait+0xc6/0x1a0 [xfs]
    [<ffffffff810aa250>] ? wake_atomic_t_function+0x40/0x40
    [<ffffffffa039a723>] xfs_reclaim_inode+0xa3/0x330 [xfs]
    [<ffffffffa039ac07>] xfs_reclaim_inodes_ag+0x257/0x3d0 [xfs]
    [<ffffffffa039bb13>] xfs_reclaim_inodes_nr+0x33/0x40 [xfs]
    [<ffffffffa03ab745>] xfs_fs_free_cached_objects+0x15/0x20 [xfs]
    [<ffffffff811c0c18>] super_cache_scan+0x178/0x180
    [<ffffffff8115912e>] shrink_slab_node+0x14e/0x340
    [<ffffffff811afc3b>] ? mem_cgroup_iter+0x16b/0x450
    [<ffffffff8115af70>] shrink_slab+0x100/0x140
    [<ffffffff8115e425>] do_try_to_free_pages+0x335/0x490
    [<ffffffff8115e7f9>] try_to_free_pages+0xb9/0x1f0
    [<ffffffff816d56e4>] ? __alloc_pages_direct_compact+0x69/0x1be
    [<ffffffff81150cba>] __alloc_pages_nodemask+0x69a/0xb40
    [<ffffffff8119743e>] alloc_pages_current+0x9e/0x110
    [<ffffffff811a0ac5>] new_slab+0x2c5/0x390
    [<ffffffff816d71c4>] __slab_alloc+0x33b/0x459
    [<ffffffff815b906d>] ? sock_alloc_inode+0x2d/0xd0
    [<ffffffff8164bda1>] ? inet_sendmsg+0x71/0xc0
    [<ffffffff815b906d>] ? sock_alloc_inode+0x2d/0xd0
    [<ffffffff811a21f2>] kmem_cache_alloc+0x1a2/0x1b0
    [<ffffffff815b906d>] sock_alloc_inode+0x2d/0xd0
    [<ffffffff811d8566>] alloc_inode+0x26/0xa0
    [<ffffffff811da04a>] new_inode_pseudo+0x1a/0x70
    [<ffffffff815b933e>] sock_alloc+0x1e/0x80
    [<ffffffff815ba855>] __sock_create+0x95/0x220
    [<ffffffff815baa04>] sock_create_kern+0x24/0x30
    [<ffffffffa04794d9>] con_work+0xef9/0x2050 [libceph]
    [<ffffffffa04aa9ec>] ? rbd_img_request_submit+0x4c/0x60 [rbd]
    [<ffffffff81084c19>] process_one_work+0x159/0x4f0
    [<ffffffff8108561b>] worker_thread+0x11b/0x530
    [<ffffffff81085500>] ? create_worker+0x1d0/0x1d0
    [<ffffffff8108b6f9>] kthread+0xc9/0xe0
    [<ffffffff8108b630>] ? flush_kthread_worker+0x90/0x90
    [<ffffffff816e1b98>] ret_from_fork+0x58/0x90
    [<ffffffff8108b630>] ? flush_kthread_worker+0x90/0x90

Use memalloc_noio_{save,restore}() to temporarily force GFP_NOIO here.

Cc: stable@vger.kernel.org # 3.10+, needs backporting
Link: http://tracker.ceph.com/issues/19309
Reported-by: Sergey Jerusalimov <wintchester@gmail.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
---
 net/ceph/messenger.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 38dcf1eb427de5..f76bb333261384 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -7,6 +7,7 @@
 #include <linux/kthread.h>
 #include <linux/net.h>
 #include <linux/nsproxy.h>
+#include <linux/sched/mm.h>
 #include <linux/slab.h>
 #include <linux/socket.h>
 #include <linux/string.h>
@@ -469,11 +470,16 @@ static int ceph_tcp_connect(struct ceph_connection *con)
 {
 	struct sockaddr_storage *paddr = &con->peer_addr.in_addr;
 	struct socket *sock;
+	unsigned int noio_flag;
 	int ret;
 
 	BUG_ON(con->sock);
+
+	/* sock_create_kern() allocates with GFP_KERNEL */
+	noio_flag = memalloc_noio_save();
 	ret = sock_create_kern(read_pnet(&con->msgr->net), paddr->ss_family,
 			       SOCK_STREAM, IPPROTO_TCP, &sock);
+	memalloc_noio_restore(noio_flag);
 	if (ret)
 		return ret;
 	sock->sk->sk_allocation = GFP_NOFS;

From 1c5bf78114d38c2761721108a2d925bccb496027 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Sat, 18 Mar 2017 18:25:05 +0100
Subject: [PATCH 1669/1911] EDAC: Select DEBUG_FS

The debugfs.c functionality relies on DEBUG_FS so select it.

Signed-off-by: Borislav Petkov <bp@suse.de>
---
 drivers/edac/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index be3eac6ad54d89..4773f286723414 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -43,6 +43,7 @@ config EDAC_LEGACY_SYSFS
 
 config EDAC_DEBUG
 	bool "Debugging"
+	select DEBUG_FS
 	help
 	  This turns on debugging information for the entire EDAC subsystem.
 	  You do so by inserting edac_module with "edac_debug_level=x." Valid

From cd1be315ac9766fe83e4fb788d5d9fe032a9b877 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Sat, 18 Mar 2017 18:26:34 +0100
Subject: [PATCH 1670/1911] EDAC, pnd2_edac: Fix !EDAC_DEBUG build
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Provide debugfs function stubs when EDAC_DEBUG is not enabled so that we
don't fail the build:

  drivers/edac/pnd2_edac.c: In function ‘pnd2_init’:
  drivers/edac/pnd2_edac.c:1521:2: error: implicit declaration of function ‘setup_pnd2_debug’ [-Werror=implicit-function-declaration]
    setup_pnd2_debug();
    ^
  drivers/edac/pnd2_edac.c: In function ‘pnd2_exit’:
  drivers/edac/pnd2_edac.c:1529:2: error: implicit declaration of function ‘teardown_pnd2_debug’ [-Werror=implicit-function-declaration]
    teardown_pnd2_debug();
    ^

Signed-off-by: Borislav Petkov <bp@suse.de>
---
 drivers/edac/pnd2_edac.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/edac/pnd2_edac.c b/drivers/edac/pnd2_edac.c
index 14d39f05226efd..ec2e349d728de6 100644
--- a/drivers/edac/pnd2_edac.c
+++ b/drivers/edac/pnd2_edac.c
@@ -1435,7 +1435,11 @@ static void teardown_pnd2_debug(void)
 {
 	debugfs_remove_recursive(pnd2_test);
 }
-#endif
+#else
+static void setup_pnd2_debug(void)	{}
+static void teardown_pnd2_debug(void)	{}
+#endif /* CONFIG_EDAC_DEBUG */
+
 
 static int pnd2_probe(void)
 {

From 1c2593cc8fd5960f8861de1be67135851f884836 Mon Sep 17 00:00:00 2001
From: Ankur Arora <ankur.a.arora@oracle.com>
Date: Tue, 21 Mar 2017 15:43:37 -0700
Subject: [PATCH 1671/1911] xen/acpi: Replace hard coded "ACPI0007"

Replace hard coded "ACPI0007" with ACPI_PROCESSOR_DEVICE_HID

Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
 drivers/xen/xen-acpi-processor.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/xen/xen-acpi-processor.c b/drivers/xen/xen-acpi-processor.c
index 4ce10bcca18b1f..fac0d7b0edf763 100644
--- a/drivers/xen/xen-acpi-processor.c
+++ b/drivers/xen/xen-acpi-processor.c
@@ -408,7 +408,7 @@ static int check_acpi_ids(struct acpi_processor *pr_backup)
 	acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT,
 			    ACPI_UINT32_MAX,
 			    read_acpi_id, NULL, NULL, NULL);
-	acpi_get_devices("ACPI0007", read_acpi_id, NULL, NULL);
+	acpi_get_devices(ACPI_PROCESSOR_DEVICE_HID, read_acpi_id, NULL, NULL);
 
 upload:
 	if (!bitmap_equal(acpi_id_present, acpi_ids_done, nr_acpi_bits)) {

From 12ffed96d4369f086261ba2ee734fa8c932d7f55 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michel=20D=C3=A4nzer?= <michel.daenzer@amd.com>
Date: Thu, 23 Mar 2017 17:53:26 +0900
Subject: [PATCH 1672/1911] drm/fb-helper: Allow var->x/yres(_virtual) <
 fb->width/height again
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Otherwise this can also prevent modesets e.g. for switching VTs, when
multiple monitors with different native resolutions are connected.

The depths must match though, so keep the != test for that.

Also update the DRM_DEBUG output to be slightly more accurate, this
doesn't only affect requests from userspace.

Bugzilla: https://bugs.freedesktop.org/99841
Fixes: 865afb11949e ("drm/fb-helper: reject any changes to the fbdev")
Signed-off-by: Michel Dänzer <michel.daenzer@amd.com>
Reviewed-by: Daniel Stone <daniels@collabora.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/20170323085326.20185-1-michel@daenzer.net
---
 drivers/gpu/drm/drm_fb_helper.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index f6d4d9700734e6..324a688b3f3013 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -1260,9 +1260,9 @@ int drm_fb_helper_check_var(struct fb_var_screeninfo *var,
 	 * to KMS, hence fail if different settings are requested.
 	 */
 	if (var->bits_per_pixel != fb->format->cpp[0] * 8 ||
-	    var->xres != fb->width || var->yres != fb->height ||
-	    var->xres_virtual != fb->width || var->yres_virtual != fb->height) {
-		DRM_DEBUG("fb userspace requested width/height/bpp different than current fb "
+	    var->xres > fb->width || var->yres > fb->height ||
+	    var->xres_virtual > fb->width || var->yres_virtual > fb->height) {
+		DRM_DEBUG("fb requested width/height/bpp can't fit in current fb "
 			  "request %dx%d-%d (virtual %dx%d) > %dx%d-%d\n",
 			  var->xres, var->yres, var->bits_per_pixel,
 			  var->xres_virtual, var->yres_virtual,

From eb94588dabec82e012281608949a860f64752914 Mon Sep 17 00:00:00 2001
From: Tomas Henzl <thenzl@redhat.com>
Date: Mon, 20 Mar 2017 16:42:48 +0100
Subject: [PATCH 1673/1911] scsi: hpsa: fix volume offline state

In a previous patch a hpsa_scsi_dev_t.volume_offline update line has
been removed, so let us put it back..

Fixes: 85b29008d8 (hpsa: update check for logical volume status)
Signed-off-by: Tomas Henzl <thenzl@redhat.com>
Acked-by: Don Brace <don.brace@microsemi.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/hpsa.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index 0d0be7754a6531..9d659aaace15d0 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -3885,6 +3885,7 @@ static int hpsa_update_device_info(struct ctlr_info *h,
 		if (h->fw_support & MISC_FW_RAID_OFFLOAD_BASIC)
 			hpsa_get_ioaccel_status(h, scsi3addr, this_device);
 		volume_offline = hpsa_volume_offline(h, scsi3addr);
+		this_device->volume_offline = volume_offline;
 		if (volume_offline == HPSA_LV_FAILED) {
 			rc = HPSA_LV_FAILED;
 			dev_err(&h->pdev->dev,

From a71e3cdcfce4880a4578915e110e3eaed1659765 Mon Sep 17 00:00:00 2001
From: Dick Kennedy <dick.kennedy@broadcom.com>
Date: Thu, 23 Mar 2017 08:47:18 -0400
Subject: [PATCH 1674/1911] scsi: lpfc: Fix PT2PT PRLI reject

lpfc cannot establish connection with targets that send PRLI in P2P
configurations.

If lpfc rejects a PRLI that is sent from a target the target will not
resend and will reject the PRLI send from the initiator.

[mkp: applied by hand]

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_els.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index d9c61d030034da..a5ca37e45fb682 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -7968,7 +7968,8 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 			did, vport->port_state, ndlp->nlp_flag);
 
 		phba->fc_stat.elsRcvPRLI++;
-		if (vport->port_state < LPFC_DISC_AUTH) {
+		if ((vport->port_state < LPFC_DISC_AUTH) &&
+		    (vport->fc_flag & FC_FABRIC)) {
 			rjt_err = LSRJT_UNABLE_TPC;
 			rjt_exp = LSEXP_NOTHING_MORE;
 			break;

From 223b78ea212852726dfa86ef267a9c8a323d5cc2 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 23 Mar 2017 15:53:45 +0100
Subject: [PATCH 1675/1911] scsi: lpfc: fix building without debugfs support

On a randconfig build without CONFIG_SCSI_LPFC_DEBUG_FS, I ran into
multiple compile failures:

drivers/scsi/lpfc/lpfc_debugfs.h: In function 'lpfc_debug_dump_wq':
drivers/scsi/lpfc/lpfc_debugfs.h:405:15: error: 'DUMP_FCP' undeclared (first use in this function); did you mean 'DUMP_VAR'?
drivers/scsi/lpfc/lpfc_debugfs.h:405:15: note: each undeclared identifier is reported only once for each function it appears in
drivers/scsi/lpfc/lpfc_debugfs.h:408:22: error: 'DUMP_NVME' undeclared (first use in this function); did you mean 'DUMP_NONE'?
drivers/scsi/lpfc/lpfc_nvmet.c: In function 'lpfc_nvmet_xmt_ls_rsp_cmp':
drivers/scsi/lpfc/lpfc_nvmet.c:109:2: error: implicit declaration of function 'lpfc_nvmeio_data'; did you mean 'lpfc_mem_free'? [-Werror=implicit-function-declaration]
drivers/scsi/lpfc/lpfc_nvmet.c: In function 'lpfc_nvmet_xmt_fcp_op':
drivers/scsi/lpfc/lpfc_nvmet.c:523:10: error: unused variable 'id' [-Werror=unused-variable]

They are all trivial to fix, so I'm doing it in a combined patch here.

Fixes: 1d9d5a9879ad ("scsi: lpfc: refactor debugfs queue dump routines")
Fixes: bd2cdd5e400f ("scsi: lpfc: NVME Initiator: Add debugfs support")
Fixes: 2b65e18202fd ("scsi: lpfc: NVME Target: Add debugfs support")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_debugfs.h | 22 ++++++++++++++--------
 drivers/scsi/lpfc/lpfc_nvmet.c   |  4 ++--
 2 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_debugfs.h b/drivers/scsi/lpfc/lpfc_debugfs.h
index c05f56c3023f1e..7b7d314af0e087 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.h
+++ b/drivers/scsi/lpfc/lpfc_debugfs.h
@@ -44,14 +44,6 @@
 /* hbqinfo output buffer size */
 #define LPFC_HBQINFO_SIZE 8192
 
-enum {
-	DUMP_FCP,
-	DUMP_NVME,
-	DUMP_MBX,
-	DUMP_ELS,
-	DUMP_NVMELS,
-};
-
 /* nvmestat output buffer size */
 #define LPFC_NVMESTAT_SIZE 8192
 #define LPFC_NVMEKTIME_SIZE 8192
@@ -283,8 +275,22 @@ struct lpfc_idiag {
 	struct lpfc_idiag_offset offset;
 	void *ptr_private;
 };
+
+#else
+
+#define lpfc_nvmeio_data(phba, fmt, arg...) \
+	no_printk(fmt, ##arg)
+
 #endif
 
+enum {
+	DUMP_FCP,
+	DUMP_NVME,
+	DUMP_MBX,
+	DUMP_ELS,
+	DUMP_NVMELS,
+};
+
 /* Mask for discovery_trace */
 #define LPFC_DISC_TRC_ELS_CMD		0x1	/* Trace ELS commands */
 #define LPFC_DISC_TRC_ELS_RSP		0x2	/* Trace ELS response */
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c
index 7ca868f394da62..acba1b67e505e9 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.c
+++ b/drivers/scsi/lpfc/lpfc_nvmet.c
@@ -520,7 +520,7 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport,
 	struct lpfc_hba *phba = ctxp->phba;
 	struct lpfc_iocbq *nvmewqeq;
 	unsigned long iflags;
-	int rc, id;
+	int rc;
 
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
 	if (phba->ktime_on) {
@@ -530,7 +530,7 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport,
 			ctxp->ts_nvme_data = ktime_get_ns();
 	}
 	if (phba->cpucheck_on & LPFC_CHECK_NVMET_IO) {
-		id = smp_processor_id();
+		int id = smp_processor_id();
 		ctxp->cpu = id;
 		if (id < LPFC_CHECK_CPU_CNT)
 			phba->cpucheck_xmt_io[id]++;

From 1914f0cd203c941bba72f9452c8290324f1ef3dc Mon Sep 17 00:00:00 2001
From: Ankur Arora <ankur.a.arora@oracle.com>
Date: Tue, 21 Mar 2017 15:43:38 -0700
Subject: [PATCH 1676/1911] xen/acpi: upload PM state from init-domain to Xen

This was broken in commit cd979883b9ed ("xen/acpi-processor:
fix enabling interrupts on syscore_resume"). do_suspend (from
xen/manage.c) and thus xen_resume_notifier never get called on
the initial-domain at resume (it is if running as guest.)

The rationale for the breaking change was that upload_pm_data()
potentially does blocking work in syscore_resume(). This patch
addresses the original issue by scheduling upload_pm_data() to
execute in workqueue context.

Cc: Stanislaw Gruszka <sgruszka@redhat.com>
Cc: stable@vger.kernel.org
Based-on-patch-by: Konrad Wilk <konrad.wilk@oracle.com>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Reviewed-by: Stanislaw Gruszka <sgruszka@redhat.com>
Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
 drivers/xen/xen-acpi-processor.c | 34 ++++++++++++++++++++++++--------
 1 file changed, 26 insertions(+), 8 deletions(-)

diff --git a/drivers/xen/xen-acpi-processor.c b/drivers/xen/xen-acpi-processor.c
index fac0d7b0edf763..23e391d3ec015d 100644
--- a/drivers/xen/xen-acpi-processor.c
+++ b/drivers/xen/xen-acpi-processor.c
@@ -27,10 +27,10 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/types.h>
+#include <linux/syscore_ops.h>
 #include <linux/acpi.h>
 #include <acpi/processor.h>
 #include <xen/xen.h>
-#include <xen/xen-ops.h>
 #include <xen/interface/platform.h>
 #include <asm/xen/hypercall.h>
 
@@ -466,15 +466,33 @@ static int xen_upload_processor_pm_data(void)
 	return rc;
 }
 
-static int xen_acpi_processor_resume(struct notifier_block *nb,
-				     unsigned long action, void *data)
+static void xen_acpi_processor_resume_worker(struct work_struct *dummy)
 {
+	int rc;
+
 	bitmap_zero(acpi_ids_done, nr_acpi_bits);
-	return xen_upload_processor_pm_data();
+
+	rc = xen_upload_processor_pm_data();
+	if (rc != 0)
+		pr_info("ACPI data upload failed, error = %d\n", rc);
+}
+
+static void xen_acpi_processor_resume(void)
+{
+	static DECLARE_WORK(wq, xen_acpi_processor_resume_worker);
+
+	/*
+	 * xen_upload_processor_pm_data() calls non-atomic code.
+	 * However, the context for xen_acpi_processor_resume is syscore
+	 * with only the boot CPU online and in an atomic context.
+	 *
+	 * So defer the upload for some point safer.
+	 */
+	schedule_work(&wq);
 }
 
-struct notifier_block xen_acpi_processor_resume_nb = {
-	.notifier_call = xen_acpi_processor_resume,
+static struct syscore_ops xap_syscore_ops = {
+	.resume	= xen_acpi_processor_resume,
 };
 
 static int __init xen_acpi_processor_init(void)
@@ -527,7 +545,7 @@ static int __init xen_acpi_processor_init(void)
 	if (rc)
 		goto err_unregister;
 
-	xen_resume_notifier_register(&xen_acpi_processor_resume_nb);
+	register_syscore_ops(&xap_syscore_ops);
 
 	return 0;
 err_unregister:
@@ -544,7 +562,7 @@ static void __exit xen_acpi_processor_exit(void)
 {
 	int i;
 
-	xen_resume_notifier_unregister(&xen_acpi_processor_resume_nb);
+	unregister_syscore_ops(&xap_syscore_ops);
 	kfree(acpi_ids_done);
 	kfree(acpi_id_present);
 	kfree(acpi_id_cst_present);

From 950712eb8ef03e4a62ac5b3067efde7ec2f8a91e Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Wed, 15 Mar 2017 16:01:18 +0800
Subject: [PATCH 1677/1911] KVM: x86: check existance before destroy
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Mostly used for split irqchip mode. In that case, these two things are
not inited at all, so no need to release.

Signed-off-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/i8259.c  | 3 +++
 arch/x86/kvm/ioapic.c | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 73ea24d4f119c8..047b17a2626961 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -657,6 +657,9 @@ void kvm_pic_destroy(struct kvm *kvm)
 {
 	struct kvm_pic *vpic = kvm->arch.vpic;
 
+	if (!vpic)
+		return;
+
 	kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_master);
 	kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_slave);
 	kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_eclr);
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index 6e219e5c07d27c..289270a6aecbb4 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -635,6 +635,9 @@ void kvm_ioapic_destroy(struct kvm *kvm)
 {
 	struct kvm_ioapic *ioapic = kvm->arch.vioapic;
 
+	if (!ioapic)
+		return;
+
 	cancel_delayed_work_sync(&ioapic->eoi_inject);
 	kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &ioapic->dev);
 	kvm->arch.vioapic = NULL;

From c761159cf81b8641290f7559a8d8e30f6ab92669 Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Wed, 15 Mar 2017 16:01:19 +0800
Subject: [PATCH 1678/1911] KVM: x86: use pic/ioapic destructor when destroy vm
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We have specific destructors for pic/ioapic, we'd better use them when
destroying the VM as well.

Signed-off-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/x86.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1faf620a6fdc20..d30ff491ecc762 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8153,8 +8153,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 	if (kvm_x86_ops->vm_destroy)
 		kvm_x86_ops->vm_destroy(kvm);
 	kvm_iommu_unmap_guest(kvm);
-	kfree(kvm->arch.vpic);
-	kfree(kvm->arch.vioapic);
+	kvm_pic_destroy(kvm);
+	kvm_ioapic_destroy(kvm);
 	kvm_free_vcpus(kvm);
 	kvfree(rcu_dereference_check(kvm->arch.apic_map, 1));
 	kvm_mmu_uninit_vm(kvm);

From fb6c8198431311027c3434d4e94ab8bc040f7aea Mon Sep 17 00:00:00 2001
From: Jim Mattson <jmattson@google.com>
Date: Thu, 16 Mar 2017 13:53:59 -0700
Subject: [PATCH 1679/1911] kvm: vmx: Flush TLB when the APIC-access address
 changes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Quoting from the Intel SDM, volume 3, section 28.3.3.4: Guidelines for
Use of the INVEPT Instruction:

If EPT was in use on a logical processor at one time with EPTP X, it
is recommended that software use the INVEPT instruction with the
"single-context" INVEPT type and with EPTP X in the INVEPT descriptor
before a VM entry on the same logical processor that enables EPT with
EPTP X and either (a) the "virtualize APIC accesses" VM-execution
control was changed from 0 to 1; or (b) the value of the APIC-access
address was changed.

In the nested case, the burden falls on L1, unless L0 enables EPT in
vmcs02 when L1 doesn't enable EPT in vmcs12.

Signed-off-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/vmx.c | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index c66436530a93af..e2f608283a5ab8 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4024,6 +4024,12 @@ static void vmx_flush_tlb(struct kvm_vcpu *vcpu)
 	__vmx_flush_tlb(vcpu, to_vmx(vcpu)->vpid);
 }
 
+static void vmx_flush_tlb_ept_only(struct kvm_vcpu *vcpu)
+{
+	if (enable_ept)
+		vmx_flush_tlb(vcpu);
+}
+
 static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
 {
 	ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits;
@@ -8548,6 +8554,7 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
 	} else {
 		sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
 		sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+		vmx_flush_tlb_ept_only(vcpu);
 	}
 	vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control);
 
@@ -8573,8 +8580,10 @@ static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa)
 	 */
 	if (!is_guest_mode(vcpu) ||
 	    !nested_cpu_has2(get_vmcs12(&vmx->vcpu),
-			     SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
+			     SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
 		vmcs_write64(APIC_ACCESS_ADDR, hpa);
+		vmx_flush_tlb_ept_only(vcpu);
+	}
 }
 
 static void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr)
@@ -10256,6 +10265,9 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 	if (nested_cpu_has_ept(vmcs12)) {
 		kvm_mmu_unload(vcpu);
 		nested_ept_init_mmu_context(vcpu);
+	} else if (nested_cpu_has2(vmcs12,
+				   SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
+		vmx_flush_tlb_ept_only(vcpu);
 	}
 
 	/*
@@ -11055,6 +11067,10 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
 		vmx->nested.change_vmcs01_virtual_x2apic_mode = false;
 		vmx_set_virtual_x2apic_mode(vcpu,
 				vcpu->arch.apic_base & X2APIC_ENABLE);
+	} else if (!nested_cpu_has_ept(vmcs12) &&
+		   nested_cpu_has2(vmcs12,
+				   SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
+		vmx_flush_tlb_ept_only(vcpu);
 	}
 
 	/* This is needed for same reason as it was needed in prepare_vmcs02 */

From 24dccf83a121b8a4ad5c2ad383a8184ef6c266ee Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpeng.li@hotmail.com>
Date: Mon, 20 Mar 2017 21:18:55 -0700
Subject: [PATCH 1680/1911] KVM: x86: correct async page present tracepoint
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

After async pf setup successfully, there is a broadcast wakeup w/ special
token 0xffffffff which tells vCPU that it should wake up all processes
waiting for APFs though there is no real process waiting at the moment.

The async page present tracepoint print prematurely and fails to catch the
special token setup. This patch fixes it by moving the async page present
tracepoint after the special token setup.

Before patch:

qemu-system-x86-8499  [006] ...1  5973.473292: kvm_async_pf_ready: token 0x0 gva 0x0

After patch:

qemu-system-x86-8499  [006] ...1  5973.473292: kvm_async_pf_ready: token 0xffffffff gva 0x0

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index d30ff491ecc762..64697fe475c3cb 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8566,11 +8566,11 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
 {
 	struct x86_exception fault;
 
-	trace_kvm_async_pf_ready(work->arch.token, work->gva);
 	if (work->wakeup_all)
 		work->arch.token = ~0; /* broadcast wakeup */
 	else
 		kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
+	trace_kvm_async_pf_ready(work->arch.token, work->gva);
 
 	if ((vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) &&
 	    !apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) {

From 63cb6d5f004ca44f9b8e562b6dd191f717a4960e Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpeng.li@hotmail.com>
Date: Mon, 20 Mar 2017 21:18:53 -0700
Subject: [PATCH 1681/1911] KVM: nVMX: Fix nested VPID vmx exec control
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This can be reproduced by running kvm-unit-tests/vmx.flat on L0 w/ vpid disabled.

Test suite: VPID
Unhandled exception 6 #UD at ip 00000000004051a6
error_code=0000      rflags=00010047      cs=00000008
rax=0000000000000000 rcx=0000000000000001 rdx=0000000000000047 rbx=0000000000402f79
rbp=0000000000456240 rsi=0000000000000001 rdi=0000000000000000
r8=000000000000000a  r9=00000000000003f8 r10=0000000080010011 r11=0000000000000000
r12=0000000000000003 r13=0000000000000708 r14=0000000000000000 r15=0000000000000000
cr0=0000000080010031 cr2=0000000000000000 cr3=0000000007fff000 cr4=0000000000002020
cr8=0000000000000000
STACK: @4051a6 40523e 400f7f 402059 40028f

We should hide and forbid VPID in L1 if it is disabled on L0. However, nested VPID
enable bit is set unconditionally during setup nested vmx exec controls though VPID
is not exposed through nested VMX capablity. This patch fixes it by don't set nested
VPID enable bit if it is disabled on L0.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: stable@vger.kernel.org
Fixes: 5c614b3583e (KVM: nVMX: nested VPID emulation)
Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index e2f608283a5ab8..5a82766e4b074a 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2753,7 +2753,6 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
 		SECONDARY_EXEC_RDTSCP |
 		SECONDARY_EXEC_DESC |
 		SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
-		SECONDARY_EXEC_ENABLE_VPID |
 		SECONDARY_EXEC_APIC_REGISTER_VIRT |
 		SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
 		SECONDARY_EXEC_WBINVD_EXITING |
@@ -2781,10 +2780,12 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
 	 * though it is treated as global context.  The alternative is
 	 * not failing the single-context invvpid, and it is worse.
 	 */
-	if (enable_vpid)
+	if (enable_vpid) {
+		vmx->nested.nested_vmx_secondary_ctls_high |=
+			SECONDARY_EXEC_ENABLE_VPID;
 		vmx->nested.nested_vmx_vpid_caps = VMX_VPID_INVVPID_BIT |
 			VMX_VPID_EXTENT_SUPPORTED_MASK;
-	else
+	} else
 		vmx->nested.nested_vmx_vpid_caps = 0;
 
 	if (enable_unrestricted_guest)

From 08d839c4b134b8328ec42f2157a9ca4b93227c03 Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpeng.li@hotmail.com>
Date: Thu, 23 Mar 2017 05:30:08 -0700
Subject: [PATCH 1682/1911] KVM: VMX: Fix enable VPID conditions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This can be reproduced by running L2 on L1, and disable VPID on L0
if w/o commit "KVM: nVMX: Fix nested VPID vmx exec control", the L2
crash as below:

KVM: entry failed, hardware error 0x7
EAX=00000000 EBX=00000000 ECX=00000000 EDX=000306c3
ESI=00000000 EDI=00000000 EBP=00000000 ESP=00000000
EIP=0000fff0 EFL=00000002 [-------] CPL=0 II=0 A20=1 SMM=0 HLT=0
ES =0000 00000000 0000ffff 00009300
CS =f000 ffff0000 0000ffff 00009b00
SS =0000 00000000 0000ffff 00009300
DS =0000 00000000 0000ffff 00009300
FS =0000 00000000 0000ffff 00009300
GS =0000 00000000 0000ffff 00009300
LDT=0000 00000000 0000ffff 00008200
TR =0000 00000000 0000ffff 00008b00
GDT=     00000000 0000ffff
IDT=     00000000 0000ffff
CR0=60000010 CR2=00000000 CR3=00000000 CR4=00000000
DR0=0000000000000000 DR1=0000000000000000 DR2=0000000000000000 DR3=0000000000000000
DR6=00000000ffff0ff0 DR7=0000000000000400
EFER=0000000000000000

Reference SDM 30.3 INVVPID:

Protected Mode Exceptions
- #UD
  - If not in VMX operation.
  - If the logical processor does not support VPIDs (IA32_VMX_PROCBASED_CTLS2[37]=0).
  - If the logical processor supports VPIDs (IA32_VMX_PROCBASED_CTLS2[37]=1) but does
    not support the INVVPID instruction (IA32_VMX_EPT_VPID_CAP[32]=0).

So we should check both VPID enable bit in vmx exec control and INVVPID support bit
in vmx capability MSRs to enable VPID. This patch adds the guarantee to not enable
VPID if either INVVPID or single-context/all-context invalidation is not exposed in
vmx capability MSRs.

Reviewed-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 5a82766e4b074a..cd1ba62878f3bc 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1239,6 +1239,11 @@ static inline bool cpu_has_vmx_invvpid_global(void)
 	return vmx_capability.vpid & VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT;
 }
 
+static inline bool cpu_has_vmx_invvpid(void)
+{
+	return vmx_capability.vpid & VMX_VPID_INVVPID_BIT;
+}
+
 static inline bool cpu_has_vmx_ept(void)
 {
 	return vmcs_config.cpu_based_2nd_exec_ctrl &
@@ -6524,8 +6529,10 @@ static __init int hardware_setup(void)
 	if (boot_cpu_has(X86_FEATURE_NX))
 		kvm_enable_efer_bits(EFER_NX);
 
-	if (!cpu_has_vmx_vpid())
+	if (!cpu_has_vmx_vpid() || !cpu_has_vmx_invvpid() ||
+		!(cpu_has_vmx_invvpid_single() || cpu_has_vmx_invvpid_global()))
 		enable_vpid = 0;
+
 	if (!cpu_has_vmx_shadow_vmcs())
 		enable_shadow_vmcs = 0;
 	if (enable_shadow_vmcs)

From 90db10434b163e46da413d34db8d0e77404cc645 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Thu, 23 Mar 2017 18:24:19 +0100
Subject: [PATCH 1683/1911] KVM: kvm_io_bus_unregister_dev() should never fail

No caller currently checks the return value of
kvm_io_bus_unregister_dev(). This is evil, as all callers silently go on
freeing their device. A stale reference will remain in the io_bus,
getting at least used again, when the iobus gets teared down on
kvm_destroy_vm() - leading to use after free errors.

There is nothing the callers could do, except retrying over and over
again.

So let's simply remove the bus altogether, print an error and make
sure no one can access this broken bus again (returning -ENOMEM on any
attempt to access it).

Fixes: e93f8a0f821e ("KVM: convert io_bus to SRCU")
Cc: stable@vger.kernel.org # 3.4+
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h |  4 ++--
 virt/kvm/eventfd.c       |  3 ++-
 virt/kvm/kvm_main.c      | 42 ++++++++++++++++++++++++----------------
 3 files changed, 29 insertions(+), 20 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 2c14ad9809da94..d0250744507a28 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -162,8 +162,8 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
 		    int len, void *val);
 int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
 			    int len, struct kvm_io_device *dev);
-int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
-			      struct kvm_io_device *dev);
+void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
+			       struct kvm_io_device *dev);
 struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx,
 					 gpa_t addr);
 
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index a29786dd952210..4d28a9ddbee010 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -870,7 +870,8 @@ kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx,
 			continue;
 
 		kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev);
-		kvm->buses[bus_idx]->ioeventfd_count--;
+		if (kvm->buses[bus_idx])
+			kvm->buses[bus_idx]->ioeventfd_count--;
 		ioeventfd_release(p);
 		ret = 0;
 		break;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 7445566fadc1fd..ef1aa7f1ed7a56 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -728,7 +728,8 @@ static void kvm_destroy_vm(struct kvm *kvm)
 	spin_unlock(&kvm_lock);
 	kvm_free_irq_routing(kvm);
 	for (i = 0; i < KVM_NR_BUSES; i++) {
-		kvm_io_bus_destroy(kvm->buses[i]);
+		if (kvm->buses[i])
+			kvm_io_bus_destroy(kvm->buses[i]);
 		kvm->buses[i] = NULL;
 	}
 	kvm_coalesced_mmio_free(kvm);
@@ -3476,6 +3477,8 @@ int kvm_io_bus_write(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
 	};
 
 	bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
+	if (!bus)
+		return -ENOMEM;
 	r = __kvm_io_bus_write(vcpu, bus, &range, val);
 	return r < 0 ? r : 0;
 }
@@ -3493,6 +3496,8 @@ int kvm_io_bus_write_cookie(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx,
 	};
 
 	bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
+	if (!bus)
+		return -ENOMEM;
 
 	/* First try the device referenced by cookie. */
 	if ((cookie >= 0) && (cookie < bus->dev_count) &&
@@ -3543,6 +3548,8 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
 	};
 
 	bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
+	if (!bus)
+		return -ENOMEM;
 	r = __kvm_io_bus_read(vcpu, bus, &range, val);
 	return r < 0 ? r : 0;
 }
@@ -3555,6 +3562,9 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
 	struct kvm_io_bus *new_bus, *bus;
 
 	bus = kvm->buses[bus_idx];
+	if (!bus)
+		return -ENOMEM;
+
 	/* exclude ioeventfd which is limited by maximum fd */
 	if (bus->dev_count - bus->ioeventfd_count > NR_IOBUS_DEVS - 1)
 		return -ENOSPC;
@@ -3574,45 +3584,41 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
 }
 
 /* Caller must hold slots_lock. */
-int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
-			      struct kvm_io_device *dev)
+void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
+			       struct kvm_io_device *dev)
 {
-	int i, r;
+	int i;
 	struct kvm_io_bus *new_bus, *bus;
 
 	bus = kvm->buses[bus_idx];
-
-	/*
-	 * It's possible the bus being released before hand. If so,
-	 * we're done here.
-	 */
 	if (!bus)
-		return 0;
+		return;
 
-	r = -ENOENT;
 	for (i = 0; i < bus->dev_count; i++)
 		if (bus->range[i].dev == dev) {
-			r = 0;
 			break;
 		}
 
-	if (r)
-		return r;
+	if (i == bus->dev_count)
+		return;
 
 	new_bus = kmalloc(sizeof(*bus) + ((bus->dev_count - 1) *
 			  sizeof(struct kvm_io_range)), GFP_KERNEL);
-	if (!new_bus)
-		return -ENOMEM;
+	if (!new_bus)  {
+		pr_err("kvm: failed to shrink bus, removing it completely\n");
+		goto broken;
+	}
 
 	memcpy(new_bus, bus, sizeof(*bus) + i * sizeof(struct kvm_io_range));
 	new_bus->dev_count--;
 	memcpy(new_bus->range + i, bus->range + i + 1,
 	       (new_bus->dev_count - i) * sizeof(struct kvm_io_range));
 
+broken:
 	rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
 	synchronize_srcu_expedited(&kvm->srcu);
 	kfree(bus);
-	return r;
+	return;
 }
 
 struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx,
@@ -3625,6 +3631,8 @@ struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx,
 	srcu_idx = srcu_read_lock(&kvm->srcu);
 
 	bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
+	if (!bus)
+		goto out_unlock;
 
 	dev_idx = kvm_io_bus_get_first_dev(bus, addr, 1);
 	if (dev_idx < 0)

From a2125d02443e9a4e68bcfd9f8004fa23239e8329 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 23 Mar 2017 16:03:11 +0100
Subject: [PATCH 1684/1911] hwmon: (asus_atk0110) fix uninitialized data access

The latest gcc-7 snapshot adds a warning to point out that when
atk_read_value_old or atk_read_value_new fails, we copy
uninitialized data into sensor->cached_value:

drivers/hwmon/asus_atk0110.c: In function 'atk_input_show':
drivers/hwmon/asus_atk0110.c:651:26: error: 'value' may be used uninitialized in this function [-Werror=maybe-uninitialized]

Adding an error check avoids this. All versions of the driver
are affected.

Fixes: 2c03d07ad54d ("hwmon: Add Asus ATK0110 support")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Luca Tettamanti <kronos.it@gmail.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/asus_atk0110.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/hwmon/asus_atk0110.c b/drivers/hwmon/asus_atk0110.c
index cccef87963e050..975c43d446f859 100644
--- a/drivers/hwmon/asus_atk0110.c
+++ b/drivers/hwmon/asus_atk0110.c
@@ -646,6 +646,9 @@ static int atk_read_value(struct atk_sensor_data *sensor, u64 *value)
 		else
 			err = atk_read_value_new(sensor, value);
 
+		if (err)
+			return err;
+
 		sensor->is_valid = true;
 		sensor->last_updated = jiffies;
 		sensor->cached_value = *value;

From e88162f9dad426f3c83e23150b7f28b7d9486df8 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 21 Mar 2017 21:13:09 +0200
Subject: [PATCH 1685/1911] Revert "i2c: mux: pca954x: Add ACPI support for
 pca954x"

In ACPI world any ID should be carefully chosen and registered
officially. The commit bbf9d262a147 seems did a wrong assumption because
PCA is the registered PNP ID for "PHILIPS BU ADD ON CARD". I'm pretty
sure this prefix has nothing to do with the driver in question.

Moreover, newer ACPI specification has a support of _DSD method and
special device IDs to allow drivers be enumerated via compatible string.
The slight change to support this kind of enumeration will be added in
sequential patch against pca954x.c.

Revert the commit bbf9d262a147 for good.

Cc: Tin Huynh <tnhuynh@apm.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Peter Rosin <peda@axentia.se>
---
 drivers/i2c/muxes/i2c-mux-pca954x.c | 28 +---------------------------
 1 file changed, 1 insertion(+), 27 deletions(-)

diff --git a/drivers/i2c/muxes/i2c-mux-pca954x.c b/drivers/i2c/muxes/i2c-mux-pca954x.c
index dfc1c0e37c4022..333a3918b65681 100644
--- a/drivers/i2c/muxes/i2c-mux-pca954x.c
+++ b/drivers/i2c/muxes/i2c-mux-pca954x.c
@@ -35,7 +35,6 @@
  * warranty of any kind, whether express or implied.
  */
 
-#include <linux/acpi.h>
 #include <linux/device.h>
 #include <linux/gpio/consumer.h>
 #include <linux/i2c.h>
@@ -141,21 +140,6 @@ static const struct i2c_device_id pca954x_id[] = {
 };
 MODULE_DEVICE_TABLE(i2c, pca954x_id);
 
-#ifdef CONFIG_ACPI
-static const struct acpi_device_id pca954x_acpi_ids[] = {
-	{ .id = "PCA9540", .driver_data = pca_9540 },
-	{ .id = "PCA9542", .driver_data = pca_9542 },
-	{ .id = "PCA9543", .driver_data = pca_9543 },
-	{ .id = "PCA9544", .driver_data = pca_9544 },
-	{ .id = "PCA9545", .driver_data = pca_9545 },
-	{ .id = "PCA9546", .driver_data = pca_9545 },
-	{ .id = "PCA9547", .driver_data = pca_9547 },
-	{ .id = "PCA9548", .driver_data = pca_9548 },
-	{ }
-};
-MODULE_DEVICE_TABLE(acpi, pca954x_acpi_ids);
-#endif
-
 #ifdef CONFIG_OF
 static const struct of_device_id pca954x_of_match[] = {
 	{ .compatible = "nxp,pca9540", .data = &chips[pca_9540] },
@@ -393,17 +377,8 @@ static int pca954x_probe(struct i2c_client *client,
 	match = of_match_device(of_match_ptr(pca954x_of_match), &client->dev);
 	if (match)
 		data->chip = of_device_get_match_data(&client->dev);
-	else if (id)
+	else
 		data->chip = &chips[id->driver_data];
-	else {
-		const struct acpi_device_id *acpi_id;
-
-		acpi_id = acpi_match_device(ACPI_PTR(pca954x_acpi_ids),
-						&client->dev);
-		if (!acpi_id)
-			return -ENODEV;
-		data->chip = &chips[acpi_id->driver_data];
-	}
 
 	data->last_chan = 0;		   /* force the first selection */
 
@@ -492,7 +467,6 @@ static struct i2c_driver pca954x_driver = {
 		.name	= "pca954x",
 		.pm	= &pca954x_pm,
 		.of_match_table = of_match_ptr(pca954x_of_match),
-		.acpi_match_table = ACPI_PTR(pca954x_acpi_ids),
 	},
 	.probe		= pca954x_probe,
 	.remove		= pca954x_remove,

From 81caa91b72fd6a0b8dfc5eb10942c34f7efd2bc5 Mon Sep 17 00:00:00 2001
From: Tomasz Nowicki <tn@semihalf.com>
Date: Thu, 23 Mar 2017 17:10:10 -0500
Subject: [PATCH 1686/1911] PCI: thunder-pem: Use Cavium assigned hardware ID
 for ThunderX host controller

"CAV" is the only PNP/ACPI hardware ID vendor prefix assigned to Cavium so
fix this as it should be from day one.

Fixes: 44f22bd91e88 ("PCI: Add MCFG quirks for Cavium ThunderX pass2.x host controller")
Tested-by: Robert Richter <rrichter@cavium.com>
Signed-off-by: Tomasz Nowicki <tn@semihalf.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Robert Richter <rrichter@cavium.com>
CC: stable@vger.kernel.org	# v4.10+
---
 drivers/pci/host/pci-thunder-pem.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pci/host/pci-thunder-pem.c b/drivers/pci/host/pci-thunder-pem.c
index 52b5bdccf5f0c2..5b88faeebaaabe 100644
--- a/drivers/pci/host/pci-thunder-pem.c
+++ b/drivers/pci/host/pci-thunder-pem.c
@@ -346,7 +346,7 @@ static int thunder_pem_acpi_init(struct pci_config_window *cfg)
 	if (!res_pem)
 		return -ENOMEM;
 
-	ret = acpi_get_rc_resources(dev, "THRX0002", root->segment, res_pem);
+	ret = acpi_get_rc_resources(dev, "CAVA02B", root->segment, res_pem);
 	if (ret) {
 		dev_err(dev, "can't get rc base address\n");
 		return ret;

From 9abb27c7594a62bbf6385e20b7f5a90b4eceae2f Mon Sep 17 00:00:00 2001
From: Tomasz Nowicki <tn@semihalf.com>
Date: Thu, 23 Mar 2017 17:10:16 -0500
Subject: [PATCH 1687/1911] PCI: thunder-pem: Add legacy firmware support for
 Cavium ThunderX host controller

During early days of PCI quirks support, ThunderX firmware did not provide
PNP0c02 node with PCI configuration space and PEM-specific register ranges.
This means that for legacy FW we are not reserving these resources and
cannot gather PEM-specific resources for further PEM initialization.

To support already deployed legacy FW, calculate PEM-specific ranges and
provide resources reservation as fallback scenario into PEM driver when we
could not gather PEM reg base from ACPI tables.

Tested-by: Robert Richter <rrichter@cavium.com>
Signed-off-by: Tomasz Nowicki <tn@semihalf.com>
Signed-off-by: Vadim Lomovtsev <Vadim.Lomovtsev@caviumnetworks.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Robert Richter <rrichter@cavium.com>
CC: stable@vger.kernel.org	# v4.10+
---
 drivers/pci/host/pci-thunder-pem.c | 56 ++++++++++++++++++++++++++++--
 1 file changed, 54 insertions(+), 2 deletions(-)

diff --git a/drivers/pci/host/pci-thunder-pem.c b/drivers/pci/host/pci-thunder-pem.c
index 5b88faeebaaabe..b89c373555c553 100644
--- a/drivers/pci/host/pci-thunder-pem.c
+++ b/drivers/pci/host/pci-thunder-pem.c
@@ -14,6 +14,7 @@
  * Copyright (C) 2015 - 2016 Cavium, Inc.
  */
 
+#include <linux/bitfield.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/of_address.h>
@@ -334,6 +335,50 @@ static int thunder_pem_init(struct device *dev, struct pci_config_window *cfg,
 
 #if defined(CONFIG_ACPI) && defined(CONFIG_PCI_QUIRKS)
 
+#define PEM_RES_BASE		0x87e0c0000000UL
+#define PEM_NODE_MASK		GENMASK(45, 44)
+#define PEM_INDX_MASK		GENMASK(26, 24)
+#define PEM_MIN_DOM_IN_NODE	4
+#define PEM_MAX_DOM_IN_NODE	10
+
+static void thunder_pem_reserve_range(struct device *dev, int seg,
+				      struct resource *r)
+{
+	resource_size_t start = r->start, end = r->end;
+	struct resource *res;
+	const char *regionid;
+
+	regionid = kasprintf(GFP_KERNEL, "PEM RC:%d", seg);
+	if (!regionid)
+		return;
+
+	res = request_mem_region(start, end - start + 1, regionid);
+	if (res)
+		res->flags &= ~IORESOURCE_BUSY;
+	else
+		kfree(regionid);
+
+	dev_info(dev, "%pR %s reserved\n", r,
+		 res ? "has been" : "could not be");
+}
+
+static void thunder_pem_legacy_fw(struct acpi_pci_root *root,
+				 struct resource *res_pem)
+{
+	int node = acpi_get_node(root->device->handle);
+	int index;
+
+	if (node == NUMA_NO_NODE)
+		node = 0;
+
+	index = root->segment - PEM_MIN_DOM_IN_NODE;
+	index -= node * PEM_MAX_DOM_IN_NODE;
+	res_pem->start = PEM_RES_BASE | FIELD_PREP(PEM_NODE_MASK, node) |
+					FIELD_PREP(PEM_INDX_MASK, index);
+	res_pem->end = res_pem->start + SZ_16M - 1;
+	res_pem->flags = IORESOURCE_MEM;
+}
+
 static int thunder_pem_acpi_init(struct pci_config_window *cfg)
 {
 	struct device *dev = cfg->parent;
@@ -347,9 +392,16 @@ static int thunder_pem_acpi_init(struct pci_config_window *cfg)
 		return -ENOMEM;
 
 	ret = acpi_get_rc_resources(dev, "CAVA02B", root->segment, res_pem);
+
+	/*
+	 * If we fail to gather resources it means that we run with old
+	 * FW where we need to calculate PEM-specific resources manually.
+	 */
 	if (ret) {
-		dev_err(dev, "can't get rc base address\n");
-		return ret;
+		thunder_pem_legacy_fw(root, res_pem);
+		/* Reserve PEM-specific resources and PCI configuration space */
+		thunder_pem_reserve_range(dev, root->segment, res_pem);
+		thunder_pem_reserve_range(dev, root->segment, &cfg->res);
 	}
 
 	return thunder_pem_init(dev, cfg, res_pem);

From f35ec35e5e726e7137deaee0dec24f8cbf07a48f Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Wed, 22 Mar 2017 22:40:30 -0700
Subject: [PATCH 1688/1911] net: phy: Export mdiobus_register_board_info()

We can build modular code that uses mdiobus_register_board_info() which would
lead to linking failure since this symbol is not expoerted.

Fixes: 648ea0134069 ("net: phy: Allow pre-declaration of MDIO devices")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/mdio-boardinfo.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/phy/mdio-boardinfo.c b/drivers/net/phy/mdio-boardinfo.c
index 6b988f77da08fc..61941e29daae85 100644
--- a/drivers/net/phy/mdio-boardinfo.c
+++ b/drivers/net/phy/mdio-boardinfo.c
@@ -84,3 +84,4 @@ int mdiobus_register_board_info(const struct mdio_board_info *info,
 
 	return 0;
 }
+EXPORT_SYMBOL(mdiobus_register_board_info);

From 48481c8fa16410ffa45939b13b6c53c2ca609e5f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 23 Mar 2017 12:39:21 -0700
Subject: [PATCH 1689/1911] net: neigh: guard against NULL solicit() method

Dmitry posted a nice reproducer of a bug triggering in neigh_probe()
when dereferencing a NULL neigh->ops->solicit method.

This can happen for arp_direct_ops/ndisc_direct_ops and similar,
which can be used for NUD_NOARP neighbours (created when dev->header_ops
is NULL). Admin can then force changing nud_state to some other state
that would fire neigh timer.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/neighbour.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index e7c12caa20c88a..4526cbd7e28a1f 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -860,7 +860,8 @@ static void neigh_probe(struct neighbour *neigh)
 	if (skb)
 		skb = skb_clone(skb, GFP_ATOMIC);
 	write_unlock(&neigh->lock);
-	neigh->ops->solicit(neigh, skb);
+	if (neigh->ops->solicit)
+		neigh->ops->solicit(neigh, skb);
 	atomic_inc(&neigh->probes);
 	kfree_skb(skb);
 }

From 854fbd6e5f60fe99e8e3a569865409fca378f143 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 23 Mar 2017 15:46:16 -0700
Subject: [PATCH 1690/1911] lib/syscall: Clear return values when no stack
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit:

  aa1f1a639621 ("lib/syscall: Pin the task stack in collect_syscall()")

... added logic to handle a process stack not existing, but left sp and pc
uninitialized, which can be later reported via /proc/$pid/syscall for zombie
processes, potentially exposing kernel memory to userspace.

  Zombie /proc/$pid/syscall before:
  -1 0xffffffff9a060100 0xffff92f42d6ad900

  Zombie /proc/$pid/syscall after:
  -1 0x0 0x0

Reported-by: Robert Święcki <robert@swiecki.net>
Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org # v4.9+
Fixes: aa1f1a639621 ("lib/syscall: Pin the task stack in collect_syscall()")
Link: http://lkml.kernel.org/r/20170323224616.GA92694@beast
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 lib/syscall.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lib/syscall.c b/lib/syscall.c
index 17d5ff5fa6a388..2c6cd1b5c3ea86 100644
--- a/lib/syscall.c
+++ b/lib/syscall.c
@@ -12,6 +12,7 @@ static int collect_syscall(struct task_struct *target, long *callno,
 
 	if (!try_get_task_stack(target)) {
 		/* Task has no stack, so the task isn't in a syscall. */
+		*sp = *pc = 0;
 		*callno = -1;
 		return 0;
 	}

From a46f60d76004965e5669dbf3fc21ef3bc3632eb4 Mon Sep 17 00:00:00 2001
From: Baoquan He <bhe@redhat.com>
Date: Fri, 24 Mar 2017 12:59:52 +0800
Subject: [PATCH 1691/1911] x86/mm/KASLR: Exclude EFI region from KASLR VA
 space randomization

Currently KASLR is enabled on three regions: the direct mapping of physical
memory, vamlloc and vmemmap. However the EFI region is also mistakenly
included for VA space randomization because of misusing EFI_VA_START macro
and assuming EFI_VA_START < EFI_VA_END.

(This breaks kexec and possibly other things that rely on stable addresses.)

The EFI region is reserved for EFI runtime services virtual mapping which
should not be included in KASLR ranges. In Documentation/x86/x86_64/mm.txt,
we can see:

  ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space

EFI uses the space from -4G to -64G thus EFI_VA_START > EFI_VA_END,
Here EFI_VA_START = -4G, and EFI_VA_END = -64G.

Changing EFI_VA_START to EFI_VA_END in mm/kaslr.c fixes this problem.

Signed-off-by: Baoquan He <bhe@redhat.com>
Reviewed-by: Bhupesh Sharma <bhsharma@redhat.com>
Acked-by: Dave Young <dyoung@redhat.com>
Acked-by: Thomas Garnier <thgarnie@google.com>
Cc: <stable@vger.kernel.org> #4.8+
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-efi@vger.kernel.org
Link: http://lkml.kernel.org/r/1490331592-31860-1-git-send-email-bhe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/mm/kaslr.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index 887e5718271682..aed206475aa7c0 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -48,7 +48,7 @@ static const unsigned long vaddr_start = __PAGE_OFFSET_BASE;
 #if defined(CONFIG_X86_ESPFIX64)
 static const unsigned long vaddr_end = ESPFIX_BASE_ADDR;
 #elif defined(CONFIG_EFI)
-static const unsigned long vaddr_end = EFI_VA_START;
+static const unsigned long vaddr_end = EFI_VA_END;
 #else
 static const unsigned long vaddr_end = __START_KERNEL_map;
 #endif
@@ -105,7 +105,7 @@ void __init kernel_randomize_memory(void)
 	 */
 	BUILD_BUG_ON(vaddr_start >= vaddr_end);
 	BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_ESPFIX64) &&
-		     vaddr_end >= EFI_VA_START);
+		     vaddr_end >= EFI_VA_END);
 	BUILD_BUG_ON((IS_ENABLED(CONFIG_X86_ESPFIX64) ||
 		      IS_ENABLED(CONFIG_EFI)) &&
 		     vaddr_end >= __START_KERNEL_map);

From dbe4d69d252e9e65c6c46826980b77b11a142065 Mon Sep 17 00:00:00 2001
From: Mike Looijmans <mike.looijmans@topic.nl>
Date: Thu, 23 Mar 2017 10:00:36 +0100
Subject: [PATCH 1692/1911] i2c: mux: pca954x: Add missing pca9546 definition
 to chip_desc

The spec for the pca9546 was missing. This chip is the same as the pca9545
except that it lacks interrupt lines. While the i2c_device_id table mapped
the pca9546 to the pca9545 definition the compatible table did not.

Signed-off-by: Mike Looijmans <mike.looijmans@topic.nl>
Signed-off-by: Peter Rosin <peda@axentia.se>
---
 drivers/i2c/muxes/i2c-mux-pca954x.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/i2c/muxes/i2c-mux-pca954x.c b/drivers/i2c/muxes/i2c-mux-pca954x.c
index 333a3918b65681..ad31d21da3165f 100644
--- a/drivers/i2c/muxes/i2c-mux-pca954x.c
+++ b/drivers/i2c/muxes/i2c-mux-pca954x.c
@@ -116,6 +116,10 @@ static const struct chip_desc chips[] = {
 		.has_irq = 1,
 		.muxtype = pca954x_isswi,
 	},
+	[pca_9546] = {
+		.nchans = 4,
+		.muxtype = pca954x_isswi,
+	},
 	[pca_9547] = {
 		.nchans = 8,
 		.enable = 0x8,
@@ -133,7 +137,7 @@ static const struct i2c_device_id pca954x_id[] = {
 	{ "pca9543", pca_9543 },
 	{ "pca9544", pca_9544 },
 	{ "pca9545", pca_9545 },
-	{ "pca9546", pca_9545 },
+	{ "pca9546", pca_9546 },
 	{ "pca9547", pca_9547 },
 	{ "pca9548", pca_9548 },
 	{ }

From 74d1cf4897f919837efc4e34d800b996936eb38e Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Fri, 17 Mar 2017 13:58:39 +0100
Subject: [PATCH 1693/1911] dt-bindings: rng: clocks property on omap_rng not
 always mandatory

Commit 52060836f79 ("dt-bindings: omap-rng: Document SafeXcel IP-76
device variant") update the omap_rng Device Tree binding to add support
for the IP-76 variation of the IP. As part of this change, a "clocks"
property was added, but is indicated as "Required", without indicated
it's actually only required for some compatible strings.

This commit fixes that, by explicitly stating that the clocks property
is only required with the inside-secure,safexcel-eip76 compatible
string.

Fixes: 52060836f79 ("dt-bindings: omap-rng: Document SafeXcel IP-76 device variant")
Cc: <stable@vger.kernel.org>
Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 Documentation/devicetree/bindings/rng/omap_rng.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/rng/omap_rng.txt b/Documentation/devicetree/bindings/rng/omap_rng.txt
index 471477299ece16..9cf7876ab43444 100644
--- a/Documentation/devicetree/bindings/rng/omap_rng.txt
+++ b/Documentation/devicetree/bindings/rng/omap_rng.txt
@@ -12,7 +12,8 @@ Required properties:
 - reg : Offset and length of the register set for the module
 - interrupts : the interrupt number for the RNG module.
 		Used for "ti,omap4-rng" and "inside-secure,safexcel-eip76"
-- clocks: the trng clock source
+- clocks: the trng clock source. Only mandatory for the
+  "inside-secure,safexcel-eip76" compatible.
 
 Example:
 /* AM335x */

From de5540d088fe97ad583cc7d396586437b32149a5 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Thu, 23 Mar 2017 12:24:43 +0100
Subject: [PATCH 1694/1911] padata: avoid race in reordering

Under extremely heavy uses of padata, crashes occur, and with list
debugging turned on, this happens instead:

[87487.298728] WARNING: CPU: 1 PID: 882 at lib/list_debug.c:33
__list_add+0xae/0x130
[87487.301868] list_add corruption. prev->next should be next
(ffffb17abfc043d0), but was ffff8dba70872c80. (prev=ffff8dba70872b00).
[87487.339011]  [<ffffffff9a53d075>] dump_stack+0x68/0xa3
[87487.342198]  [<ffffffff99e119a1>] ? console_unlock+0x281/0x6d0
[87487.345364]  [<ffffffff99d6b91f>] __warn+0xff/0x140
[87487.348513]  [<ffffffff99d6b9aa>] warn_slowpath_fmt+0x4a/0x50
[87487.351659]  [<ffffffff9a58b5de>] __list_add+0xae/0x130
[87487.354772]  [<ffffffff9add5094>] ? _raw_spin_lock+0x64/0x70
[87487.357915]  [<ffffffff99eefd66>] padata_reorder+0x1e6/0x420
[87487.361084]  [<ffffffff99ef0055>] padata_do_serial+0xa5/0x120

padata_reorder calls list_add_tail with the list to which its adding
locked, which seems correct:

spin_lock(&squeue->serial.lock);
list_add_tail(&padata->list, &squeue->serial.list);
spin_unlock(&squeue->serial.lock);

This therefore leaves only place where such inconsistency could occur:
if padata->list is added at the same time on two different threads.
This pdata pointer comes from the function call to
padata_get_next(pd), which has in it the following block:

next_queue = per_cpu_ptr(pd->pqueue, cpu);
padata = NULL;
reorder = &next_queue->reorder;
if (!list_empty(&reorder->list)) {
       padata = list_entry(reorder->list.next,
                           struct padata_priv, list);
       spin_lock(&reorder->lock);
       list_del_init(&padata->list);
       atomic_dec(&pd->reorder_objects);
       spin_unlock(&reorder->lock);

       pd->processed++;

       goto out;
}
out:
return padata;

I strongly suspect that the problem here is that two threads can race
on reorder list. Even though the deletion is locked, call to
list_entry is not locked, which means it's feasible that two threads
pick up the same padata object and subsequently call list_add_tail on
them at the same time. The fix is thus be hoist that lock outside of
that block.

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Acked-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 kernel/padata.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/kernel/padata.c b/kernel/padata.c
index 05316c9f32da9d..3202aa17492c80 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -186,19 +186,20 @@ static struct padata_priv *padata_get_next(struct parallel_data *pd)
 
 	reorder = &next_queue->reorder;
 
+	spin_lock(&reorder->lock);
 	if (!list_empty(&reorder->list)) {
 		padata = list_entry(reorder->list.next,
 				    struct padata_priv, list);
 
-		spin_lock(&reorder->lock);
 		list_del_init(&padata->list);
 		atomic_dec(&pd->reorder_objects);
-		spin_unlock(&reorder->lock);
 
 		pd->processed++;
 
+		spin_unlock(&reorder->lock);
 		goto out;
 	}
+	spin_unlock(&reorder->lock);
 
 	if (__this_cpu_read(pd->pqueue->cpu_index) == next_queue->cpu_index) {
 		padata = ERR_PTR(-ENODATA);

From efc989fce8703914bac091dcc4b8ff7a72ccf987 Mon Sep 17 00:00:00 2001
From: Gary R Hook <gary.hook@amd.com>
Date: Thu, 23 Mar 2017 12:53:30 -0500
Subject: [PATCH 1695/1911] crypto: ccp - Make some CCP DMA channels private

The CCP registers its queues as channels capable of handling
general DMA operations. The NTB driver will use DMA if
directed, but as public channels can be reserved for use in
asynchronous operations some channels should be held back
as private. Since the public/private determination is
handled at a device level, reserve the "other" (secondary)
CCP channels as private.

Add a module parameter that allows for override, to be
applied to all channels on all devices.

CC: <stable@vger.kernel.org> # 4.10.x-
Signed-off-by: Gary R Hook <gary.hook@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/ccp/ccp-dev-v5.c    |  1 +
 drivers/crypto/ccp/ccp-dev.h       |  5 ++++
 drivers/crypto/ccp/ccp-dmaengine.c | 41 ++++++++++++++++++++++++++++++
 3 files changed, 47 insertions(+)

diff --git a/drivers/crypto/ccp/ccp-dev-v5.c b/drivers/crypto/ccp/ccp-dev-v5.c
index 41cc853f8569cf..fc08b4ed69d936 100644
--- a/drivers/crypto/ccp/ccp-dev-v5.c
+++ b/drivers/crypto/ccp/ccp-dev-v5.c
@@ -1015,6 +1015,7 @@ const struct ccp_vdata ccpv5a = {
 
 const struct ccp_vdata ccpv5b = {
 	.version = CCP_VERSION(5, 0),
+	.dma_chan_attr = DMA_PRIVATE,
 	.setup = ccp5other_config,
 	.perform = &ccp5_actions,
 	.bar = 2,
diff --git a/drivers/crypto/ccp/ccp-dev.h b/drivers/crypto/ccp/ccp-dev.h
index 2b5c01fade05a5..aa36f3f8186056 100644
--- a/drivers/crypto/ccp/ccp-dev.h
+++ b/drivers/crypto/ccp/ccp-dev.h
@@ -179,6 +179,10 @@
 
 /* ------------------------ General CCP Defines ------------------------ */
 
+#define	CCP_DMA_DFLT			0x0
+#define	CCP_DMA_PRIV			0x1
+#define	CCP_DMA_PUB			0x2
+
 #define CCP_DMAPOOL_MAX_SIZE		64
 #define CCP_DMAPOOL_ALIGN		BIT(5)
 
@@ -636,6 +640,7 @@ struct ccp_actions {
 /* Structure to hold CCP version-specific values */
 struct ccp_vdata {
 	const unsigned int version;
+	const unsigned int dma_chan_attr;
 	void (*setup)(struct ccp_device *);
 	const struct ccp_actions *perform;
 	const unsigned int bar;
diff --git a/drivers/crypto/ccp/ccp-dmaengine.c b/drivers/crypto/ccp/ccp-dmaengine.c
index 8d0eeb46d4a274..e00be01fbf5a03 100644
--- a/drivers/crypto/ccp/ccp-dmaengine.c
+++ b/drivers/crypto/ccp/ccp-dmaengine.c
@@ -10,6 +10,7 @@
  * published by the Free Software Foundation.
  */
 
+#include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/dmaengine.h>
 #include <linux/spinlock.h>
@@ -25,6 +26,37 @@
 	(mask == 0) ? 64 : fls64(mask);	\
 })
 
+/* The CCP as a DMA provider can be configured for public or private
+ * channels. Default is specified in the vdata for the device (PCI ID).
+ * This module parameter will override for all channels on all devices:
+ *   dma_chan_attr = 0x2 to force all channels public
+ *                 = 0x1 to force all channels private
+ *                 = 0x0 to defer to the vdata setting
+ *                 = any other value: warning, revert to 0x0
+ */
+static unsigned int dma_chan_attr = CCP_DMA_DFLT;
+module_param(dma_chan_attr, uint, 0444);
+MODULE_PARM_DESC(dma_chan_attr, "Set DMA channel visibility: 0 (default) = device defaults, 1 = make private, 2 = make public");
+
+unsigned int ccp_get_dma_chan_attr(struct ccp_device *ccp)
+{
+	switch (dma_chan_attr) {
+	case CCP_DMA_DFLT:
+		return ccp->vdata->dma_chan_attr;
+
+	case CCP_DMA_PRIV:
+		return DMA_PRIVATE;
+
+	case CCP_DMA_PUB:
+		return 0;
+
+	default:
+		dev_info_once(ccp->dev, "Invalid value for dma_chan_attr: %d\n",
+			      dma_chan_attr);
+		return ccp->vdata->dma_chan_attr;
+	}
+}
+
 static void ccp_free_cmd_resources(struct ccp_device *ccp,
 				   struct list_head *list)
 {
@@ -675,6 +707,15 @@ int ccp_dmaengine_register(struct ccp_device *ccp)
 	dma_cap_set(DMA_SG, dma_dev->cap_mask);
 	dma_cap_set(DMA_INTERRUPT, dma_dev->cap_mask);
 
+	/* The DMA channels for this device can be set to public or private,
+	 * and overridden by the module parameter dma_chan_attr.
+	 * Default: according to the value in vdata (dma_chan_attr=0)
+	 * dma_chan_attr=0x1: all channels private (override vdata)
+	 * dma_chan_attr=0x2: all channels public (override vdata)
+	 */
+	if (ccp_get_dma_chan_attr(ccp) == DMA_PRIVATE)
+		dma_cap_set(DMA_PRIVATE, dma_dev->cap_mask);
+
 	INIT_LIST_HEAD(&dma_dev->channels);
 	for (i = 0; i < ccp->cmd_q_count; i++) {
 		chan = ccp->ccp_dma_chan + i;

From 9df0eb180c2074451f25556eb566d89c7057c2ac Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Thu, 23 Mar 2017 13:39:46 -0700
Subject: [PATCH 1696/1911] crypto: xts,lrw - fix out-of-bounds write after
 kmalloc failure

In the generic XTS and LRW algorithms, for input data > 128 bytes, a
temporary buffer is allocated to hold the values to be XOR'ed with the
data before and after encryption or decryption.  If the allocation
fails, the fixed-size buffer embedded in the request buffer is meant to
be used as a fallback --- resulting in more calls to the ECB algorithm,
but still producing the correct result.  However, we weren't correctly
limiting subreq->cryptlen in this case, resulting in pre_crypt()
overrunning the embedded buffer.  Fix this by setting subreq->cryptlen
correctly.

Fixes: f1c131b45410 ("crypto: xts - Convert to skcipher")
Fixes: 700cb3f5fe75 ("crypto: lrw - Convert to skcipher")
Cc: stable@vger.kernel.org # v4.10+
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Eric Biggers <ebiggers@google.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/lrw.c | 7 +++++--
 crypto/xts.c | 7 +++++--
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/crypto/lrw.c b/crypto/lrw.c
index ecd8474018e3bd..3ea095adafd9af 100644
--- a/crypto/lrw.c
+++ b/crypto/lrw.c
@@ -286,8 +286,11 @@ static int init_crypt(struct skcipher_request *req, crypto_completion_t done)
 
 	subreq->cryptlen = LRW_BUFFER_SIZE;
 	if (req->cryptlen > LRW_BUFFER_SIZE) {
-		subreq->cryptlen = min(req->cryptlen, (unsigned)PAGE_SIZE);
-		rctx->ext = kmalloc(subreq->cryptlen, gfp);
+		unsigned int n = min(req->cryptlen, (unsigned int)PAGE_SIZE);
+
+		rctx->ext = kmalloc(n, gfp);
+		if (rctx->ext)
+			subreq->cryptlen = n;
 	}
 
 	rctx->src = req->src;
diff --git a/crypto/xts.c b/crypto/xts.c
index baeb34dd8582eb..c976bfac29da52 100644
--- a/crypto/xts.c
+++ b/crypto/xts.c
@@ -230,8 +230,11 @@ static int init_crypt(struct skcipher_request *req, crypto_completion_t done)
 
 	subreq->cryptlen = XTS_BUFFER_SIZE;
 	if (req->cryptlen > XTS_BUFFER_SIZE) {
-		subreq->cryptlen = min(req->cryptlen, (unsigned)PAGE_SIZE);
-		rctx->ext = kmalloc(subreq->cryptlen, gfp);
+		unsigned int n = min(req->cryptlen, (unsigned int)PAGE_SIZE);
+
+		rctx->ext = kmalloc(n, gfp);
+		if (rctx->ext)
+			subreq->cryptlen = n;
 	}
 
 	rctx->src = req->src;

From 9257821c5a1dc57ef3a37f7cbcebaf548395c964 Mon Sep 17 00:00:00 2001
From: Peter Stein <peter@stuntstein.dk>
Date: Fri, 17 Feb 2017 00:00:50 -0800
Subject: [PATCH 1697/1911] HID: xinmo: fix for out of range for THT 2P arcade
 controller.

There is a new clone of the XIN MO arcade controller which has same issue with
out of range like the original.  This fix will solve the issue where 2
directions on the joystick are not recognized by the new THT 2P arcade
controller with device ID 0x75e1.  In details the new device ID is added the
hid-id list and the hid-xinmo source code.

Signed-off-by: Peter Stein <peter@stuntstein.dk>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-core.c  | 1 +
 drivers/hid/hid-ids.h   | 1 +
 drivers/hid/hid-xinmo.c | 1 +
 3 files changed, 3 insertions(+)

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 3ceb4a2af381f0..63ec1993eaaa90 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -2112,6 +2112,7 @@ static const struct hid_device_id hid_have_special_driver[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_SIRIUS_BATTERY_FREE_TABLET) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_X_TENSIONS, USB_DEVICE_ID_SPEEDLINK_VAD_CEZANNE) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_XIN_MO, USB_DEVICE_ID_XIN_MO_DUAL_ARCADE) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_XIN_MO, USB_DEVICE_ID_THT_2P_ARCADE) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_ZEROPLUS, 0x0005) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_ZEROPLUS, 0x0030) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_ZYDACRON, USB_DEVICE_ID_ZYDACRON_REMOTE_CONTROL) },
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index 0e2e7c571d2261..4e2648c86c8c56 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -1082,6 +1082,7 @@
 
 #define USB_VENDOR_ID_XIN_MO			0x16c0
 #define USB_DEVICE_ID_XIN_MO_DUAL_ARCADE	0x05e1
+#define USB_DEVICE_ID_THT_2P_ARCADE		0x75e1
 
 #define USB_VENDOR_ID_XIROKU		0x1477
 #define USB_DEVICE_ID_XIROKU_SPX	0x1006
diff --git a/drivers/hid/hid-xinmo.c b/drivers/hid/hid-xinmo.c
index 7df5227a7e61d6..9ad7731d2e10da 100644
--- a/drivers/hid/hid-xinmo.c
+++ b/drivers/hid/hid-xinmo.c
@@ -46,6 +46,7 @@ static int xinmo_event(struct hid_device *hdev, struct hid_field *field,
 
 static const struct hid_device_id xinmo_devices[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_XIN_MO, USB_DEVICE_ID_XIN_MO_DUAL_ARCADE) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_XIN_MO, USB_DEVICE_ID_THT_2P_ARCADE) },
 	{ }
 };
 

From a95600294157ca7527ee7c70249fb53e09d8c566 Mon Sep 17 00:00:00 2001
From: Sara Sharon <sara.sharon@intel.com>
Date: Thu, 26 Jan 2017 14:43:32 +0200
Subject: [PATCH 1698/1911] iwlwifi: mvm: fix accessing fw_id_to_mac_id

Access should be by rcu_dereference. Issue was found by sparse.

Fixes: 65e254821cee ("iwlwifi: mvm: use firmware station PM notification for AP_LINK_PS")
Signed-off-by: Sara Sharon <sara.sharon@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
index 6927caecd48e51..486dcceed17a4f 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
@@ -2401,7 +2401,7 @@ void iwl_mvm_sta_pm_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb)
 		return;
 
 	rcu_read_lock();
-	sta = mvm->fw_id_to_mac_id[notif->sta_id];
+	sta = rcu_dereference(mvm->fw_id_to_mac_id[notif->sta_id]);
 	if (WARN_ON(IS_ERR_OR_NULL(sta))) {
 		rcu_read_unlock();
 		return;

From 251fe09f13bfb54c1ede66ee8bf8ddd0061c4f7c Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 23 Mar 2017 13:40:00 +0300
Subject: [PATCH 1699/1911] iwlwifi: mvm: writing zero bytes to debugfs causes
 a crash

This is a static analysis fix.  The warning is:

	drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c:912 iwl_mvm_fw_dbg_collect()
	warn: integer overflows 'sizeof(*desc) + len'

I guess this code is supposed to take a NUL character, but if we write
zero bytes then it tries to write -1 characters and crashes.

Fixes: c91b865cb14d ("iwlwifi: mvm: support description for user triggered fw dbg collection")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c
index a260cd5032005b..077bfd8f4c0cd8 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c
@@ -1056,6 +1056,8 @@ static ssize_t iwl_dbgfs_fw_dbg_collect_write(struct iwl_mvm *mvm,
 
 	if (ret)
 		return ret;
+	if (count == 0)
+		return 0;
 
 	iwl_mvm_fw_dbg_collect(mvm, FW_DBG_TRIGGER_USER, buf,
 			       (count - 1), NULL);

From 4d339989acd730f17bc814b5ddb9c54e405766b6 Mon Sep 17 00:00:00 2001
From: Liad Kaufman <liad.kaufman@intel.com>
Date: Tue, 21 Mar 2017 17:13:16 +0200
Subject: [PATCH 1700/1911] iwlwifi: mvm: support ibss in dqa mode

Allow working IBSS also when working in DQA mode.
This is done by setting it to treat the queues the
same as a BSS AP treats the queues.

Fixes: 7948b87308a4 ("iwlwifi: mvm: enable dynamic queue allocation mode")
Signed-off-by: Liad Kaufman <liad.kaufman@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c | 3 ++-
 drivers/net/wireless/intel/iwlwifi/mvm/sta.c      | 9 ++++++---
 drivers/net/wireless/intel/iwlwifi/mvm/tx.c       | 7 +++++--
 3 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c
index 99132ea16ede08..c5734e1a02d27e 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c
@@ -216,7 +216,8 @@ u32 iwl_mvm_mac_get_queues_mask(struct ieee80211_vif *vif)
 			qmask |= BIT(vif->hw_queue[ac]);
 	}
 
-	if (vif->type == NL80211_IFTYPE_AP)
+	if (vif->type == NL80211_IFTYPE_AP ||
+	    vif->type == NL80211_IFTYPE_ADHOC)
 		qmask |= BIT(vif->cab_queue);
 
 	return qmask;
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
index b51a2853cc804a..9d28db7f56aa24 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
@@ -1806,7 +1806,8 @@ int iwl_mvm_send_add_bcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
 			iwl_mvm_get_wd_timeout(mvm, vif, false, false);
 		int queue;
 
-		if (vif->type == NL80211_IFTYPE_AP)
+		if (vif->type == NL80211_IFTYPE_AP ||
+		    vif->type == NL80211_IFTYPE_ADHOC)
 			queue = IWL_MVM_DQA_AP_PROBE_RESP_QUEUE;
 		else if (vif->type == NL80211_IFTYPE_P2P_DEVICE)
 			queue = IWL_MVM_DQA_P2P_DEVICE_QUEUE;
@@ -1837,7 +1838,8 @@ int iwl_mvm_send_add_bcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
 	 * enabled-cab_queue to the mask)
 	 */
 	if (iwl_mvm_is_dqa_supported(mvm) &&
-	    vif->type == NL80211_IFTYPE_AP) {
+	    (vif->type == NL80211_IFTYPE_AP ||
+	     vif->type == NL80211_IFTYPE_ADHOC)) {
 		struct iwl_trans_txq_scd_cfg cfg = {
 			.fifo = IWL_MVM_TX_FIFO_MCAST,
 			.sta_id = mvmvif->bcast_sta.sta_id,
@@ -1862,7 +1864,8 @@ static void iwl_mvm_free_bcast_sta_queues(struct iwl_mvm *mvm,
 
 	lockdep_assert_held(&mvm->mutex);
 
-	if (vif->type == NL80211_IFTYPE_AP)
+	if (vif->type == NL80211_IFTYPE_AP ||
+	    vif->type == NL80211_IFTYPE_ADHOC)
 		iwl_mvm_disable_txq(mvm, vif->cab_queue, vif->cab_queue,
 				    IWL_MAX_TID_COUNT, 0);
 
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
index 3f37075f4cde3c..1ba0a6f5550366 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
@@ -506,6 +506,7 @@ static int iwl_mvm_get_ctrl_vif_queue(struct iwl_mvm *mvm,
 
 	switch (info->control.vif->type) {
 	case NL80211_IFTYPE_AP:
+	case NL80211_IFTYPE_ADHOC:
 		/*
 		 * Handle legacy hostapd as well, where station may be added
 		 * only after assoc. Take care of the case where we send a
@@ -517,7 +518,8 @@ static int iwl_mvm_get_ctrl_vif_queue(struct iwl_mvm *mvm,
 		if (info->hw_queue == info->control.vif->cab_queue)
 			return info->hw_queue;
 
-		WARN_ONCE(1, "fc=0x%02x", le16_to_cpu(fc));
+		WARN_ONCE(info->control.vif->type != NL80211_IFTYPE_ADHOC,
+			  "fc=0x%02x", le16_to_cpu(fc));
 		return IWL_MVM_DQA_AP_PROBE_RESP_QUEUE;
 	case NL80211_IFTYPE_P2P_DEVICE:
 		if (ieee80211_is_mgmt(fc))
@@ -584,7 +586,8 @@ int iwl_mvm_tx_skb_non_sta(struct iwl_mvm *mvm, struct sk_buff *skb)
 			iwl_mvm_vif_from_mac80211(info.control.vif);
 
 		if (info.control.vif->type == NL80211_IFTYPE_P2P_DEVICE ||
-		    info.control.vif->type == NL80211_IFTYPE_AP) {
+		    info.control.vif->type == NL80211_IFTYPE_AP ||
+		    info.control.vif->type == NL80211_IFTYPE_ADHOC) {
 			sta_id = mvmvif->bcast_sta.sta_id;
 			queue = iwl_mvm_get_ctrl_vif_queue(mvm, &info,
 							   hdr->frame_control);

From 2d7d54002e396c180db0c800c1046f0a3c471597 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 24 Mar 2017 17:07:57 +0100
Subject: [PATCH 1701/1911] ALSA: seq: Fix race during FIFO resize

When a new event is queued while processing to resize the FIFO in
snd_seq_fifo_clear(), it may lead to a use-after-free, as the old pool
that is being queued gets removed.  For avoiding this race, we need to
close the pool to be deleted and sync its usage before actually
deleting it.

The issue was spotted by syzkaller.

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/core/seq/seq_fifo.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/sound/core/seq/seq_fifo.c b/sound/core/seq/seq_fifo.c
index 33980d1c803796..01c4cfe30c9fef 100644
--- a/sound/core/seq/seq_fifo.c
+++ b/sound/core/seq/seq_fifo.c
@@ -267,6 +267,10 @@ int snd_seq_fifo_resize(struct snd_seq_fifo *f, int poolsize)
 	/* NOTE: overflow flag is not cleared */
 	spin_unlock_irqrestore(&f->lock, flags);
 
+	/* close the old pool and wait until all users are gone */
+	snd_seq_pool_mark_closing(oldpool);
+	snd_use_lock_sync(&f->use_lock);
+
 	/* release cells in old pool */
 	for (cell = oldhead; cell; cell = next) {
 		next = cell->next;

From 3c9d3f1bc2defd418b5933bbc928096c9c686d3b Mon Sep 17 00:00:00 2001
From: Arnaud Pouliquen <arnaud.pouliquen@st.com>
Date: Thu, 23 Mar 2017 19:39:54 +0100
Subject: [PATCH 1702/1911] ASoC: STI: Fix reader substream pointer set

reader->substream is used in IRQ handler for error case but is never set.
Set value to pcm substream on DAI startup and clean it on dai shutdown.

Signed-off-by: Arnaud Pouliquen <arnaud.pouliquen@st.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/sti/uniperif_reader.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sound/soc/sti/uniperif_reader.c b/sound/soc/sti/uniperif_reader.c
index 5992c6ab3833ef..93a8df6ed880ea 100644
--- a/sound/soc/sti/uniperif_reader.c
+++ b/sound/soc/sti/uniperif_reader.c
@@ -349,6 +349,8 @@ static int uni_reader_startup(struct snd_pcm_substream *substream,
 	struct uniperif *reader = priv->dai_data.uni;
 	int ret;
 
+	reader->substream = substream;
+
 	if (!UNIPERIF_TYPE_IS_TDM(reader))
 		return 0;
 
@@ -378,6 +380,7 @@ static void uni_reader_shutdown(struct snd_pcm_substream *substream,
 		/* Stop the reader */
 		uni_reader_stop(reader);
 	}
+	reader->substream = NULL;
 }
 
 static const struct snd_soc_dai_ops uni_reader_dai_ops = {

From 971edb0a0087b65bd3726d23b2dffff405d48f72 Mon Sep 17 00:00:00 2001
From: Lucas Stach <l.stach@pengutronix.de>
Date: Thu, 23 Mar 2017 15:05:26 +0100
Subject: [PATCH 1703/1911] ASoC: simple-card: fix simple_dai clk lookup

The clock needs to be stored in the simple_dai structure, so it can
be enabled later on. This has been broken during the conversion to use
devm_* functions for the clk lookup.

Fixes: e984fd61e860 (ASoC: simple-card: use devm_get_clk_from_child())
Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
Acked-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/generic/simple-card-utils.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/soc/generic/simple-card-utils.c b/sound/soc/generic/simple-card-utils.c
index 4924575d2e95d3..343b291fc3725f 100644
--- a/sound/soc/generic/simple-card-utils.c
+++ b/sound/soc/generic/simple-card-utils.c
@@ -115,6 +115,7 @@ int asoc_simple_card_parse_clk(struct device *dev,
 	clk = devm_get_clk_from_child(dev, node, NULL);
 	if (!IS_ERR(clk)) {
 		simple_dai->sysclk = clk_get_rate(clk);
+		simple_dai->clk = clk;
 	} else if (!of_property_read_u32(node, "system-clock-frequency", &val)) {
 		simple_dai->sysclk = val;
 	} else {

From 49d52e8108a21749dc2114b924c907db43358984 Mon Sep 17 00:00:00 2001
From: Nathan Sullivan <nathan.sullivan@ni.com>
Date: Wed, 22 Mar 2017 15:27:01 -0500
Subject: [PATCH 1704/1911] net: phy: handle state correctly in
 phy_stop_machine

If the PHY is halted on stop, then do not set the state to PHY_UP.  This
ensures the phy will be restarted later in phy_start when the machine is
started again.

Fixes: 00db8189d984 ("This patch adds a PHY Abstraction Layer to the Linux Kernel, enabling ethernet drivers to remain as ignorant as is reasonable of the connected PHY's design and operation details.")
Signed-off-by: Nathan Sullivan <nathan.sullivan@ni.com>
Signed-off-by: Brad Mouring <brad.mouring@ni.com>
Acked-by: Xander Huff <xander.huff@ni.com>
Acked-by: Kyle Roeschley <kyle.roeschley@ni.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index 1be69d8bc90948..a2bfc82e95d70b 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -681,7 +681,7 @@ void phy_stop_machine(struct phy_device *phydev)
 	cancel_delayed_work_sync(&phydev->state_queue);
 
 	mutex_lock(&phydev->lock);
-	if (phydev->state > PHY_UP)
+	if (phydev->state > PHY_UP && phydev->state != PHY_HALTED)
 		phydev->state = PHY_UP;
 	mutex_unlock(&phydev->lock);
 }

From 2f25abe6bac573928a990ccbdac75873add8127e Mon Sep 17 00:00:00 2001
From: hayeswang <hayeswang@realtek.com>
Date: Thu, 23 Mar 2017 19:14:19 +0800
Subject: [PATCH 1705/1911] r8152: prevent the driver from transmitting packets
 with carrier off

The linking status may be changed when autosuspend. And, after
autoresume, the driver may try to transmit packets when the device
is carrier off, because the interrupt transfer doesn't update the
linking status, yet. And, if the device is in ALDPS mode, the device
would stop working.

The another similar case is
 1. unplug the cable.
 2. interrupt transfer queue a work_queue for linking change.
 3. device enters the ALDPS mode.
 4. a tx occurs before the work_queue is called.

Signed-off-by: Hayes Wang <hayeswang@realtek.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/r8152.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index 0b1b9188625d52..c34df33c6d723e 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -1294,6 +1294,7 @@ static void intr_callback(struct urb *urb)
 		}
 	} else {
 		if (netif_carrier_ok(tp->netdev)) {
+			netif_stop_queue(tp->netdev);
 			set_bit(RTL8152_LINK_CHG, &tp->flags);
 			schedule_delayed_work(&tp->schedule, 0);
 		}
@@ -3169,6 +3170,9 @@ static void set_carrier(struct r8152 *tp)
 			napi_enable(&tp->napi);
 			netif_wake_queue(netdev);
 			netif_info(tp, link, netdev, "carrier on\n");
+		} else if (netif_queue_stopped(netdev) &&
+			   skb_queue_len(&tp->tx_queue) < tp->tx_qlen) {
+			netif_wake_queue(netdev);
 		}
 	} else {
 		if (netif_carrier_ok(netdev)) {
@@ -3702,8 +3706,18 @@ static int rtl8152_resume(struct usb_interface *intf)
 			tp->rtl_ops.autosuspend_en(tp, false);
 			napi_disable(&tp->napi);
 			set_bit(WORK_ENABLE, &tp->flags);
-			if (netif_carrier_ok(tp->netdev))
-				rtl_start_rx(tp);
+
+			if (netif_carrier_ok(tp->netdev)) {
+				if (rtl8152_get_speed(tp) & LINK_STATUS) {
+					rtl_start_rx(tp);
+				} else {
+					netif_carrier_off(tp->netdev);
+					tp->rtl_ops.disable(tp);
+					netif_info(tp, link, tp->netdev,
+						   "linking down\n");
+				}
+			}
+
 			napi_enable(&tp->napi);
 			clear_bit(SELECTIVE_SUSPEND, &tp->flags);
 			smp_mb__after_atomic();

From 1adbddef118ae8bf6409c13208645d28c29731c1 Mon Sep 17 00:00:00 2001
From: Pavel Belous <pavel.belous@aquantia.com>
Date: Thu, 23 Mar 2017 14:19:41 +0300
Subject: [PATCH 1706/1911] net:ethernet:aquantia: Remove adapter re-opening
 when MTU changed.

Closing/opening the adapter is not needed at all.
The new MTU settings take effect immediately.

Fixes: 97bde5c4f909 ("net: ethernet: aquantia: Support for NIC-specific code")

Signed-off-by: Pavel Belous <pavel.belous@aquantia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/aquantia/atlantic/aq_main.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_main.c b/drivers/net/ethernet/aquantia/atlantic/aq_main.c
index d05fbfdce5e52e..5d6c40d86775dd 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_main.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_main.c
@@ -100,11 +100,6 @@ static int aq_ndev_change_mtu(struct net_device *ndev, int new_mtu)
 		goto err_exit;
 	ndev->mtu = new_mtu;
 
-	if (netif_running(ndev)) {
-		aq_ndev_close(ndev);
-		aq_ndev_open(ndev);
-	}
-
 err_exit:
 	return err;
 }

From ea0504f554c8f989eab58549300d15582d36f039 Mon Sep 17 00:00:00 2001
From: Pavel Belous <pavel.belous@aquantia.com>
Date: Thu, 23 Mar 2017 14:19:42 +0300
Subject: [PATCH 1707/1911] net:ethernet:aquantia: Fix packet type detection
 (TCP/UDP) for IPv6.

In order for the checksum offloads to work correctly we need to set the
packet type bit (TCP/UDP) in the TX context buffer.

Fixes: 97bde5c4f909 ("net: ethernet: aquantia: Support for NIC-specific code")

Signed-off-by: Pavel Belous <pavel.belous@aquantia.com>
Tested-by: David Arcari <darcari@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/aquantia/atlantic/aq_nic.c   | 20 +++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
index ee78444bfb8851..db2b51da298826 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
@@ -510,10 +510,22 @@ static unsigned int aq_nic_map_skb(struct aq_nic_s *self,
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		dx_buff->is_ip_cso = (htons(ETH_P_IP) == skb->protocol) ?
 			1U : 0U;
-		dx_buff->is_tcp_cso =
-			(ip_hdr(skb)->protocol == IPPROTO_TCP) ? 1U : 0U;
-		dx_buff->is_udp_cso =
-			(ip_hdr(skb)->protocol == IPPROTO_UDP) ? 1U : 0U;
+
+		if (ip_hdr(skb)->version == 4) {
+			dx_buff->is_tcp_cso =
+				(ip_hdr(skb)->protocol == IPPROTO_TCP) ?
+					1U : 0U;
+			dx_buff->is_udp_cso =
+				(ip_hdr(skb)->protocol == IPPROTO_UDP) ?
+					1U : 0U;
+		} else if (ip_hdr(skb)->version == 6) {
+			dx_buff->is_tcp_cso =
+				(ipv6_hdr(skb)->nexthdr == NEXTHDR_TCP) ?
+					1U : 0U;
+			dx_buff->is_udp_cso =
+				(ipv6_hdr(skb)->nexthdr == NEXTHDR_UDP) ?
+					1U : 0U;
+		}
 	}
 
 	for (; nr_frags--; ++frag_count) {

From 9f0dd8c322e89f137c44b437d6fbecc9dea12204 Mon Sep 17 00:00:00 2001
From: Pavel Belous <pavel.belous@aquantia.com>
Date: Thu, 23 Mar 2017 14:19:43 +0300
Subject: [PATCH 1708/1911] net:ethernet:aquantia: Missing spinlock
 initialization.

Fix for missing initialization aq_ring header.lock spinlock.

Fixes: 018423e90bee ("net: ethernet: aquantia: Add ring support code")

Signed-off-by: Pavel Belous <pavel.belous@aquantia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/aquantia/atlantic/aq_ring.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
index 0358e6072d45ab..3a8a4aa13687ff 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
@@ -101,6 +101,7 @@ int aq_ring_init(struct aq_ring_s *self)
 	self->hw_head = 0;
 	self->sw_head = 0;
 	self->sw_tail = 0;
+	spin_lock_init(&self->header.lock);
 	return 0;
 }
 

From 386aff88e32ec3f82e3f032217bad0c8c8846349 Mon Sep 17 00:00:00 2001
From: Pavel Belous <pavel.belous@aquantia.com>
Date: Thu, 23 Mar 2017 14:19:44 +0300
Subject: [PATCH 1709/1911] net:ethernet:aquantia: Fix for LSO with IPv6.

Fix Context Command bit: L3 type = "0" for IPv4, "1" for IPv6.

Fixes: bab6de8fd180 ("net: ethernet: aquantia:
 Atlantic A0 and B0 specific functions.")

Signed-off-by: Pavel Belous <pavel.belous@aquantia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/aquantia/atlantic/aq_nic.c           | 3 +++
 drivers/net/ethernet/aquantia/atlantic/aq_ring.h          | 3 ++-
 drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c | 3 +++
 drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c | 3 +++
 4 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
index db2b51da298826..cdb02991f249c6 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
@@ -487,6 +487,9 @@ static unsigned int aq_nic_map_skb(struct aq_nic_s *self,
 		dx_buff->mss = skb_shinfo(skb)->gso_size;
 		dx_buff->is_txc = 1U;
 
+		dx_buff->is_ipv6 =
+			(ip_hdr(skb)->version == 6) ? 1U : 0U;
+
 		dx = aq_ring_next_dx(ring, dx);
 		dx_buff = &ring->buff_ring[dx];
 		++ret;
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h
index 2572546450685d..eecd6d1c4d731a 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h
@@ -58,7 +58,8 @@ struct __packed aq_ring_buff_s {
 			u8 len_l2;
 			u8 len_l3;
 			u8 len_l4;
-			u8 rsvd2;
+			u8 is_ipv6:1;
+			u8 rsvd2:7;
 			u32 len_pkt;
 		};
 	};
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
index a2b746a2dd50b8..a536875a7d0dce 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
@@ -433,6 +433,9 @@ static int hw_atl_a0_hw_ring_tx_xmit(struct aq_hw_s *self,
 				    buff->len_l3 +
 				    buff->len_l2);
 			is_gso = true;
+
+			if (buff->is_ipv6)
+				txd->ctl |= HW_ATL_A0_TXD_CTL_CMD_IPV6;
 		} else {
 			buff_pa_len = buff->len;
 
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
index cab2931dab9ac3..69488c9b03e21d 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
@@ -471,6 +471,9 @@ static int hw_atl_b0_hw_ring_tx_xmit(struct aq_hw_s *self,
 				    buff->len_l3 +
 				    buff->len_l2);
 			is_gso = true;
+
+			if (buff->is_ipv6)
+				txd->ctl |= HW_ATL_B0_TXD_CTL_CMD_IPV6;
 		} else {
 			buff_pa_len = buff->len;
 

From 5d73bb863c2ef3aa1a28b5885c85ede7307df8ea Mon Sep 17 00:00:00 2001
From: Pavel Belous <pavel.belous@aquantia.com>
Date: Thu, 23 Mar 2017 14:19:45 +0300
Subject: [PATCH 1710/1911] net:ethernet:aquantia: Reset is_gso flag when EOP
 reached.

We need to reset is_gso flag when EOP reached (entire LSO packet processed).

Fixes: bab6de8fd180 ("net: ethernet: aquantia:
 Atlantic A0 and B0 specific functions.")

Signed-off-by: Pavel Belous <pavel.belous@aquantia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c | 1 +
 drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
index a536875a7d0dce..4ee15ff06a448b 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
@@ -461,6 +461,7 @@ static int hw_atl_a0_hw_ring_tx_xmit(struct aq_hw_s *self,
 			if (unlikely(buff->is_eop)) {
 				txd->ctl |= HW_ATL_A0_TXD_CTL_EOP;
 				txd->ctl |= HW_ATL_A0_TXD_CTL_CMD_WB;
+				is_gso = false;
 			}
 		}
 
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
index 69488c9b03e21d..42150708191dbf 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
@@ -499,6 +499,7 @@ static int hw_atl_b0_hw_ring_tx_xmit(struct aq_hw_s *self,
 			if (unlikely(buff->is_eop)) {
 				txd->ctl |= HW_ATL_B0_TXD_CTL_EOP;
 				txd->ctl |= HW_ATL_B0_TXD_CTL_CMD_WB;
+				is_gso = false;
 			}
 		}
 

From 7d969d2e8890f546c8cec634b3aa5f57d4eef883 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.vnet.ibm.com>
Date: Thu, 23 Mar 2017 14:55:08 +0100
Subject: [PATCH 1711/1911] s390/qeth: size calculation outbound buffers

Depending on the device type, hard_start_xmit() builds different output
buffer formats. For instance with HiperSockets, on both L2 and L3 we
strip the ETH header from the skb - L3 doesn't need it, and L2 carries
it in the buffer's header element.
For this, we pass data_offset = ETH_HLEN all the way down to
__qeth_fill_buffer(), where skb->data is then adjusted accordingly.
But the initial size calculation still considers the *full* skb length
(including the ETH header). So qeth_get_elements_no() can erroneously
reject a skb as too big, even though it would actually fit into an
output buffer once the ETH header has been trimmed off later.

Fix this by passing an additional offset to qeth_get_elements_no(),
that indicates where in the skb the on-wire data actually begins.
Since the current code uses data_offset=-1 for some special handling
on OSA, we need to clamp data_offset to 0...

On HiperSockets this helps when sending ~MTU-size skbs with weird page
alignment. No change for OSA or AF_IUCV.

Signed-off-by: Julian Wiedmann <jwi@linux.vnet.ibm.com>
Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_core.h      | 3 ++-
 drivers/s390/net/qeth_core_main.c | 5 +++--
 drivers/s390/net/qeth_l2_main.c   | 5 +++--
 drivers/s390/net/qeth_l3_main.c   | 5 +++--
 4 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index e7addea8741b79..d9561e39c3b237 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -961,7 +961,8 @@ int qeth_bridgeport_query_ports(struct qeth_card *card,
 int qeth_bridgeport_setrole(struct qeth_card *card, enum qeth_sbp_roles role);
 int qeth_bridgeport_an_set(struct qeth_card *card, int enable);
 int qeth_get_priority_queue(struct qeth_card *, struct sk_buff *, int, int);
-int qeth_get_elements_no(struct qeth_card *, struct sk_buff *, int);
+int qeth_get_elements_no(struct qeth_card *card, struct sk_buff *skb,
+			 int extra_elems, int data_offset);
 int qeth_get_elements_for_frags(struct sk_buff *);
 int qeth_do_send_packet_fast(struct qeth_card *, struct qeth_qdio_out_q *,
 			struct sk_buff *, struct qeth_hdr *, int, int, int);
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 315d8a2db7c066..9a5f99ccb122ba 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -3837,6 +3837,7 @@ EXPORT_SYMBOL_GPL(qeth_get_elements_for_frags);
  * @card:			qeth card structure, to check max. elems.
  * @skb:			SKB address
  * @extra_elems:		extra elems needed, to check against max.
+ * @data_offset:		range starts at skb->data + data_offset
  *
  * Returns the number of pages, and thus QDIO buffer elements, needed to cover
  * skb data, including linear part and fragments. Checks if the result plus
@@ -3844,10 +3845,10 @@ EXPORT_SYMBOL_GPL(qeth_get_elements_for_frags);
  * Note: extra_elems is not included in the returned result.
  */
 int qeth_get_elements_no(struct qeth_card *card,
-		     struct sk_buff *skb, int extra_elems)
+		     struct sk_buff *skb, int extra_elems, int data_offset)
 {
 	int elements = qeth_get_elements_for_range(
-				(addr_t)skb->data,
+				(addr_t)skb->data + data_offset,
 				(addr_t)skb->data + skb_headlen(skb)) +
 			qeth_get_elements_for_frags(skb);
 
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index bea48330761899..af4e6a639fecf2 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -849,7 +849,7 @@ static int qeth_l2_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	 * chaining we can not send long frag lists
 	 */
 	if ((card->info.type != QETH_CARD_TYPE_IQD) &&
-	    !qeth_get_elements_no(card, new_skb, 0)) {
+	    !qeth_get_elements_no(card, new_skb, 0, 0)) {
 		int lin_rc = skb_linearize(new_skb);
 
 		if (card->options.performance_stats) {
@@ -894,7 +894,8 @@ static int qeth_l2_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		}
 	}
 
-	elements = qeth_get_elements_no(card, new_skb, elements_needed);
+	elements = qeth_get_elements_no(card, new_skb, elements_needed,
+					(data_offset > 0) ? data_offset : 0);
 	if (!elements) {
 		if (data_offset >= 0)
 			kmem_cache_free(qeth_core_header_cache, hdr);
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index 06d0addcc058dc..72aa9537a7251c 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -2867,7 +2867,7 @@ static int qeth_l3_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	 */
 	if ((card->info.type != QETH_CARD_TYPE_IQD) &&
 	    ((use_tso && !qeth_l3_get_elements_no_tso(card, new_skb, 1)) ||
-	     (!use_tso && !qeth_get_elements_no(card, new_skb, 0)))) {
+	     (!use_tso && !qeth_get_elements_no(card, new_skb, 0, 0)))) {
 		int lin_rc = skb_linearize(new_skb);
 
 		if (card->options.performance_stats) {
@@ -2909,7 +2909,8 @@ static int qeth_l3_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	elements = use_tso ?
 		   qeth_l3_get_elements_no_tso(card, new_skb, hdr_elements) :
-		   qeth_get_elements_no(card, new_skb, hdr_elements);
+		   qeth_get_elements_no(card, new_skb, hdr_elements,
+					(data_offset > 0) ? data_offset : 0);
 	if (!elements) {
 		if (data_offset >= 0)
 			kmem_cache_free(qeth_core_header_cache, hdr);

From acd9776b5c45ef02d1a210969a6fcc058afb76e3 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.vnet.ibm.com>
Date: Thu, 23 Mar 2017 14:55:09 +0100
Subject: [PATCH 1712/1911] s390/qeth: no ETH header for outbound AF_IUCV

With AF_IUCV traffic, the skb passed to hard_start_xmit() has a 14 byte
slot at skb->data, intended for an ETH header. qeth_l3_fill_af_iucv_hdr()
fills this ETH header... and then immediately moves it to the
skb's headroom, where it disappears and is never seen again.

But it's still possible for us to return NETDEV_TX_BUSY after the skb has
been modified. Since we didn't get a private copy of the skb, the next
time the skb is delivered to hard_start_xmit() it no longer has the
expected layout (we moved the ETH header to the headroom, so skb->data
now starts at the IUCV_TRANS header). So when qeth_l3_fill_af_iucv_hdr()
does another round of rebuilding, the resulting qeth header ends up
all wrong. On transmission, the buffer is then rejected by
the HiperSockets device with SBALF15 = x'04'.
When this error is passed back to af_iucv as TX_NOTIFY_UNREACHABLE, it
tears down the offending socket.

As the ETH header for AF_IUCV serves no purpose, just align the code to
what we do for IP traffic on L3 HiperSockets: keep the ETH header at
skb->data, and pass down data_offset = ETH_HLEN to qeth_fill_buffer().
When mapping the payload into the SBAL elements, the ETH header is then
stripped off. This avoids the skb manipulations in
qeth_l3_fill_af_iucv_hdr(), and any buffer re-entering hard_start_xmit()
after NETDEV_TX_BUSY is now processed properly.

Signed-off-by: Julian Wiedmann <jwi@linux.vnet.ibm.com>
Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_l3_main.c | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index 72aa9537a7251c..653f0fb76573ab 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -2609,17 +2609,13 @@ static void qeth_l3_fill_af_iucv_hdr(struct qeth_card *card,
 	char daddr[16];
 	struct af_iucv_trans_hdr *iucv_hdr;
 
-	skb_pull(skb, 14);
-	card->dev->header_ops->create(skb, card->dev, 0,
-				      card->dev->dev_addr, card->dev->dev_addr,
-				      card->dev->addr_len);
-	skb_pull(skb, 14);
-	iucv_hdr = (struct af_iucv_trans_hdr *)skb->data;
 	memset(hdr, 0, sizeof(struct qeth_hdr));
 	hdr->hdr.l3.id = QETH_HEADER_TYPE_LAYER3;
 	hdr->hdr.l3.ext_flags = 0;
-	hdr->hdr.l3.length = skb->len;
+	hdr->hdr.l3.length = skb->len - ETH_HLEN;
 	hdr->hdr.l3.flags = QETH_HDR_IPV6 | QETH_CAST_UNICAST;
+
+	iucv_hdr = (struct af_iucv_trans_hdr *) (skb->data + ETH_HLEN);
 	memset(daddr, 0, sizeof(daddr));
 	daddr[0] = 0xfe;
 	daddr[1] = 0x80;
@@ -2823,10 +2819,7 @@ static int qeth_l3_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	if ((card->info.type == QETH_CARD_TYPE_IQD) &&
 	    !skb_is_nonlinear(skb)) {
 		new_skb = skb;
-		if (new_skb->protocol == ETH_P_AF_IUCV)
-			data_offset = 0;
-		else
-			data_offset = ETH_HLEN;
+		data_offset = ETH_HLEN;
 		hdr = kmem_cache_alloc(qeth_core_header_cache, GFP_ATOMIC);
 		if (!hdr)
 			goto tx_drop;

From 90b14dc731b1b4aac8ba01850b530bf7ba26462f Mon Sep 17 00:00:00 2001
From: Ursula Braun <ubraun@linux.vnet.ibm.com>
Date: Thu, 23 Mar 2017 14:55:10 +0100
Subject: [PATCH 1713/1911] MAINTAINERS: add Julian Wiedmann

Add Julian Wiedmann as additional maintainer for drivers/s390/net
and net/iucv.

Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index c45c02bc6082f3..e48678d1540124 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10808,6 +10808,7 @@ F:	drivers/s390/block/dasd*
 F:	block/partitions/ibm.c
 
 S390 NETWORK DRIVERS
+M:	Julian Wiedmann <jwi@linux.vnet.ibm.com>
 M:	Ursula Braun <ubraun@linux.vnet.ibm.com>
 L:	linux-s390@vger.kernel.org
 W:	http://www.ibm.com/developerworks/linux/linux390/
@@ -10838,6 +10839,7 @@ S:	Supported
 F:	drivers/s390/scsi/zfcp_*
 
 S390 IUCV NETWORK LAYER
+M:	Julian Wiedmann <jwi@linux.vnet.ibm.com>
 M:	Ursula Braun <ubraun@linux.vnet.ibm.com>
 L:	linux-s390@vger.kernel.org
 W:	http://www.ibm.com/developerworks/linux/linux390/

From a5af83925363eb85d467933e3d6ec5a87001eb7c Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 23 Mar 2017 17:07:26 +0100
Subject: [PATCH 1714/1911] bna: avoid writing uninitialized data into hw
 registers

The latest gcc-7 snapshot warns about bfa_ioc_send_enable/bfa_ioc_send_disable
writing undefined values into the hardware registers:

drivers/net/ethernet/brocade/bna/bfa_ioc.c: In function 'bfa_iocpf_sm_disabling_entry':
arch/arm/include/asm/io.h:109:22: error: '*((void *)&disable_req+4)' is used uninitialized in this function [-Werror=uninitialized]
arch/arm/include/asm/io.h:109:22: error: '*((void *)&disable_req+8)' is used uninitialized in this function [-Werror=uninitialized]

The two functions look like they should do the same thing, but only one
of them initializes the time stamp and clscode field. The fact that we
only get a warning for one of the two functions seems to be arbitrary,
based on the inlining decisions in the compiler.

To address this, I'm making both functions do the same thing:

- set the clscode from the ioc structure in both
- set the time stamp from ktime_get_real_seconds (which also
  avoids the signed-integer overflow in 2038 and extends the
  well-defined behavior until 2106).
- zero-fill the reserved field

Fixes: 8b230ed8ec96 ("bna: Brocade 10Gb Ethernet device driver")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/brocade/bna/bfa_ioc.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/brocade/bna/bfa_ioc.c b/drivers/net/ethernet/brocade/bna/bfa_ioc.c
index 9e59663a6eadb0..0f6811860ad51d 100644
--- a/drivers/net/ethernet/brocade/bna/bfa_ioc.c
+++ b/drivers/net/ethernet/brocade/bna/bfa_ioc.c
@@ -1930,13 +1930,13 @@ static void
 bfa_ioc_send_enable(struct bfa_ioc *ioc)
 {
 	struct bfi_ioc_ctrl_req enable_req;
-	struct timeval tv;
 
 	bfi_h2i_set(enable_req.mh, BFI_MC_IOC, BFI_IOC_H2I_ENABLE_REQ,
 		    bfa_ioc_portid(ioc));
 	enable_req.clscode = htons(ioc->clscode);
-	do_gettimeofday(&tv);
-	enable_req.tv_sec = ntohl(tv.tv_sec);
+	enable_req.rsvd = htons(0);
+	/* overflow in 2106 */
+	enable_req.tv_sec = ntohl(ktime_get_real_seconds());
 	bfa_ioc_mbox_send(ioc, &enable_req, sizeof(struct bfi_ioc_ctrl_req));
 }
 
@@ -1947,6 +1947,10 @@ bfa_ioc_send_disable(struct bfa_ioc *ioc)
 
 	bfi_h2i_set(disable_req.mh, BFI_MC_IOC, BFI_IOC_H2I_DISABLE_REQ,
 		    bfa_ioc_portid(ioc));
+	disable_req.clscode = htons(ioc->clscode);
+	disable_req.rsvd = htons(0);
+	/* overflow in 2106 */
+	disable_req.tv_sec = ntohl(ktime_get_real_seconds());
 	bfa_ioc_mbox_send(ioc, &disable_req, sizeof(struct bfi_ioc_ctrl_req));
 }
 

From a80db69e47d764bbcaf2fec54b1f308925e7c490 Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Thu, 23 Mar 2017 11:03:31 -0700
Subject: [PATCH 1715/1911] kcm: return immediately after copy_from_user()
 failure

There is no reason to continue after a copy_from_user()
failure.

Fixes: ab7ac4eb9832 ("kcm: Kernel Connection Multiplexor module")
Cc: Tom Herbert <tom@herbertland.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/kcm/kcmsock.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index 309062f3debe29..31762f76cdb5f2 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -1687,7 +1687,7 @@ static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 		struct kcm_attach info;
 
 		if (copy_from_user(&info, (void __user *)arg, sizeof(info)))
-			err = -EFAULT;
+			return -EFAULT;
 
 		err = kcm_attach_ioctl(sock, &info);
 
@@ -1697,7 +1697,7 @@ static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 		struct kcm_unattach info;
 
 		if (copy_from_user(&info, (void __user *)arg, sizeof(info)))
-			err = -EFAULT;
+			return -EFAULT;
 
 		err = kcm_unattach_ioctl(sock, &info);
 
@@ -1708,7 +1708,7 @@ static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 		struct socket *newsock = NULL;
 
 		if (copy_from_user(&info, (void __user *)arg, sizeof(info)))
-			err = -EFAULT;
+			return -EFAULT;
 
 		err = kcm_clone(sock, &info, &newsock);
 

From 871a8623d3b40221ad1103aff715dfee0aa4dacf Mon Sep 17 00:00:00 2001
From: Shiraz Saleem <shiraz.saleem@intel.com>
Date: Fri, 17 Mar 2017 18:30:07 -0500
Subject: [PATCH 1716/1911] i40iw: Receive netdev events post INET_NOTIFIER
 state

Netdev notification events are de-registered only when all
client iwdev instances are removed. If a single client is closed
and re-opened, netdev events could arrive even before the Control
Queue-Pair (CQP) is created, causing a NULL pointer dereference crash
in i40iw_get_cqp_request. Fix this by allowing netdev event
notification only after we have reached the INET_NOTIFIER state with
respect to device initialization.

Reported-by: Stefan Assmann <sassmann@redhat.com>
Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com>
Reviewed-by: Yuval Shaia <yuval.shaia@oracle.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/i40iw/i40iw_utils.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c
index 0f5d43d1f5fc30..70c3e9e795082b 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_utils.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c
@@ -160,6 +160,9 @@ int i40iw_inetaddr_event(struct notifier_block *notifier,
 		return NOTIFY_DONE;
 
 	iwdev = &hdl->device;
+	if (iwdev->init_state < INET_NOTIFIER)
+		return NOTIFY_DONE;
+
 	netdev = iwdev->ldev->netdev;
 	upper_dev = netdev_master_upper_dev_get(netdev);
 	if (netdev != event_netdev)
@@ -214,6 +217,9 @@ int i40iw_inet6addr_event(struct notifier_block *notifier,
 		return NOTIFY_DONE;
 
 	iwdev = &hdl->device;
+	if (iwdev->init_state < INET_NOTIFIER)
+		return NOTIFY_DONE;
+
 	netdev = iwdev->ldev->netdev;
 	if (netdev != event_netdev)
 		return NOTIFY_DONE;
@@ -260,6 +266,8 @@ int i40iw_net_event(struct notifier_block *notifier, unsigned long event, void *
 		if (!iwhdl)
 			return NOTIFY_DONE;
 		iwdev = &iwhdl->device;
+		if (iwdev->init_state < INET_NOTIFIER)
+			return NOTIFY_DONE;
 		p = (__be32 *)neigh->primary_key;
 		i40iw_copy_ip_ntohl(local_ipaddr, p);
 		if (neigh->nud_state & NUD_VALID) {

From ae9955aeb8e47c4f60a02add47acf9850ca0ead7 Mon Sep 17 00:00:00 2001
From: Alexey Brodkin <Alexey.Brodkin@synopsys.com>
Date: Thu, 23 Mar 2017 20:34:45 +0300
Subject: [PATCH 1717/1911] ARC: vdk: Fix support of UIO

MotherBoard section has its "ranges" set to 0xE000_0000-0xF000_0000.
But UIO node maps 4 different areas in different memory locations
and all outside MB's ranges.

That obviously breaks UIO mappings in runtime.

Cc: Ruud Derwig <rderwig@synopsys.com>
Cc: stable@vger.kernel.org
Signed-off-by: Alexey Brodkin <abrodkin@synopsys.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/boot/dts/vdk_axs10x_mb.dtsi | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/arch/arc/boot/dts/vdk_axs10x_mb.dtsi b/arch/arc/boot/dts/vdk_axs10x_mb.dtsi
index f0df59b23e21e4..459fc656b759ae 100644
--- a/arch/arc/boot/dts/vdk_axs10x_mb.dtsi
+++ b/arch/arc/boot/dts/vdk_axs10x_mb.dtsi
@@ -112,13 +112,19 @@
 			interrupts = <7>;
 			bus-width = <4>;
 		};
+	};
 
-		/* Embedded Vision subsystem UIO mappings; only relevant for EV VDK */
-		uio_ev: uio@0xD0000000 {
-			compatible = "generic-uio";
-			reg = <0xD0000000 0x2000 0xD1000000 0x2000 0x90000000 0x10000000 0xC0000000 0x10000000>;
-			reg-names = "ev_gsa", "ev_ctrl", "ev_shared_mem", "ev_code_mem";
-			interrupts = <23>;
-		};
+	/*
+	 * Embedded Vision subsystem UIO mappings; only relevant for EV VDK
+	 *
+	 * This node is intentionally put outside of MB above becase
+	 * it maps areas outside of MB's 0xEz-0xFz.
+	 */
+	uio_ev: uio@0xD0000000 {
+		compatible = "generic-uio";
+		reg = <0xD0000000 0x2000 0xD1000000 0x2000 0x90000000 0x10000000 0xC0000000 0x10000000>;
+		reg-names = "ev_gsa", "ev_ctrl", "ev_shared_mem", "ev_code_mem";
+		interrupt-parent = <&mb_intc>;
+		interrupts = <23>;
 	};
 };

From 86f46aba8d1ac3ed0904542158a9b9cb9c7a143c Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Wed, 8 Mar 2017 22:00:52 +0200
Subject: [PATCH 1718/1911] IB/core: Protect against self-requeue of a cq work
 item

We need to make sure that the cq work item does not
run when we are destroying the cq. Unlike flush_work,
cancel_work_sync protects against self-requeue of the
work item (which we can do in ib_cq_poll_work).

Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com>--
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/core/cq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c
index e95510117a6dd7..2746d2eb3d52c4 100644
--- a/drivers/infiniband/core/cq.c
+++ b/drivers/infiniband/core/cq.c
@@ -196,7 +196,7 @@ void ib_free_cq(struct ib_cq *cq)
 		irq_poll_disable(&cq->iop);
 		break;
 	case IB_POLL_WORKQUEUE:
-		flush_work(&cq->work);
+		cancel_work_sync(&cq->work);
 		break;
 	default:
 		WARN_ON_ONCE(1);

From 9f47a48e6eb97d793db85373e3ef4c55d876d334 Mon Sep 17 00:00:00 2001
From: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Date: Thu, 23 Mar 2017 20:47:15 -0700
Subject: [PATCH 1719/1911] Revert "e1000e: driver trying to free already-free
 irq"

This reverts commit 7e54d9d063fa239c95c21548c5267f0ef419ff56.

After additional regression testing, several users are experiencing
kernel panics during shutdown on e1000e devices.  Reverting this
change resolves the issue.

Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/intel/e1000e/netdev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 2175cced402f7f..e9af89ad039c6f 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -6274,8 +6274,8 @@ static int e1000e_pm_freeze(struct device *dev)
 		/* Quiesce the device without resetting the hardware */
 		e1000e_down(adapter, false);
 		e1000_free_irq(adapter);
-		e1000e_reset_interrupt_capability(adapter);
 	}
+	e1000e_reset_interrupt_capability(adapter);
 
 	/* Allow time for pending master requests to run */
 	e1000e_disable_pcie_master(&adapter->hw);

From cb8864559631754ac93d5734b165ccd0cad4728c Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Date: Fri, 10 Mar 2017 11:34:20 -0700
Subject: [PATCH 1720/1911] infiniband: Fix alignment of mmap cookies to
 support VIPT caching

When vmalloc_user is used to create memory that is supposed to be mmap'd
to user space, it is necessary for the mmap cookie (eg the offset) to be
aligned to SHMLBA.

This creates a situation where all virtual mappings of the same physical
page share the same virtual cache index and guarantees VIPT coherence.
Otherwise the cache is non-coherent and the kernel will not see writes
by userspace when reading the shared page (or vice-versa).

Reported-by: Josh Beavers <josh.beavers@gmail.com>
Signed-off-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/sw/rdmavt/mmap.c  | 4 ++--
 drivers/infiniband/sw/rxe/rxe_mmap.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/infiniband/sw/rdmavt/mmap.c b/drivers/infiniband/sw/rdmavt/mmap.c
index e202b8142759f5..6b712eecbd37d9 100644
--- a/drivers/infiniband/sw/rdmavt/mmap.c
+++ b/drivers/infiniband/sw/rdmavt/mmap.c
@@ -170,9 +170,9 @@ struct rvt_mmap_info *rvt_create_mmap_info(struct rvt_dev_info *rdi,
 
 	spin_lock_irq(&rdi->mmap_offset_lock);
 	if (rdi->mmap_offset == 0)
-		rdi->mmap_offset = PAGE_SIZE;
+		rdi->mmap_offset = ALIGN(PAGE_SIZE, SHMLBA);
 	ip->offset = rdi->mmap_offset;
-	rdi->mmap_offset += size;
+	rdi->mmap_offset += ALIGN(size, SHMLBA);
 	spin_unlock_irq(&rdi->mmap_offset_lock);
 
 	INIT_LIST_HEAD(&ip->pending_mmaps);
diff --git a/drivers/infiniband/sw/rxe/rxe_mmap.c b/drivers/infiniband/sw/rxe/rxe_mmap.c
index c572a4c09359c9..bd812e00988ed3 100644
--- a/drivers/infiniband/sw/rxe/rxe_mmap.c
+++ b/drivers/infiniband/sw/rxe/rxe_mmap.c
@@ -156,10 +156,10 @@ struct rxe_mmap_info *rxe_create_mmap_info(struct rxe_dev *rxe,
 	spin_lock_bh(&rxe->mmap_offset_lock);
 
 	if (rxe->mmap_offset == 0)
-		rxe->mmap_offset = PAGE_SIZE;
+		rxe->mmap_offset = ALIGN(PAGE_SIZE, SHMLBA);
 
 	ip->info.offset = rxe->mmap_offset;
-	rxe->mmap_offset += size;
+	rxe->mmap_offset += ALIGN(size, SHMLBA);
 
 	spin_unlock_bh(&rxe->mmap_offset_lock);
 

From 93efe9817e651607d83e5f100076ae62d0ce0b93 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Fri, 24 Mar 2017 12:04:19 -0600
Subject: [PATCH 1721/1911] blk-mq: include errors in did_work calculation

Currently we return true in blk_mq_dispatch_rq_list() if we queued IO
successfully, but we really want to return whether or not the we made
progress. Progress includes if we got an error return.  If we don't,
this can lead to a hang in blk_mq_sched_dispatch_requests() when a
driver is draining IO by returning BLK_MQ_QUEUE_ERROR instead of
manually ending the IO in error and return BLK_MQ_QUEUE_OK.

Tested-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 08a49c69738bb5..6b6e7bc041dbf3 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -969,7 +969,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list)
 	struct request *rq;
 	LIST_HEAD(driver_list);
 	struct list_head *dptr;
-	int queued, ret = BLK_MQ_RQ_QUEUE_OK;
+	int errors, queued, ret = BLK_MQ_RQ_QUEUE_OK;
 
 	/*
 	 * Start off with dptr being NULL, so we start the first request
@@ -980,7 +980,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list)
 	/*
 	 * Now process all the entries, sending them to the driver.
 	 */
-	queued = 0;
+	errors = queued = 0;
 	while (!list_empty(list)) {
 		struct blk_mq_queue_data bd;
 
@@ -1037,6 +1037,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list)
 		default:
 			pr_err("blk-mq: bad return on queue: %d\n", ret);
 		case BLK_MQ_RQ_QUEUE_ERROR:
+			errors++;
 			rq->errors = -EIO;
 			blk_mq_end_request(rq, rq->errors);
 			break;
@@ -1088,7 +1089,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list)
 			blk_mq_run_hw_queue(hctx, true);
 	}
 
-	return queued != 0;
+	return (queued + errors) != 0;
 }
 
 static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)

From 9dd5d3ab49f74e1a3fab92c432a7600bd9081ccc Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fb.com>
Date: Fri, 24 Mar 2017 14:08:26 -0400
Subject: [PATCH 1722/1911] nbd: handle ERESTARTSYS properly

We can submit IO in a processes context, which means there can be
pending signals.  This isn't a fatal error for NBD, but it does require
some finesse.  If the signal happens before we transmit anything then we
are ok, just requeue the request and carry on.  However if we've done a
partial transmit we can't allow anything else to be transmitted on this
socket until we transmit the remaining part of the request.  Deal with
this by keeping track of how much we've sent for the current request,
and if we get an ERESTARTSYS during any part of our transmission save
the state of that request and requeue the IO.  If anybody tries to
submit a request that isn't our pending request then requeue that
request until we are able to service the one that is pending.

Signed-off-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/nbd.c | 115 ++++++++++++++++++++++++++++++++++----------
 1 file changed, 89 insertions(+), 26 deletions(-)

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 7e4287bc19e529..3d1fc37a83b1d2 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -47,6 +47,8 @@ static DEFINE_MUTEX(nbd_index_mutex);
 struct nbd_sock {
 	struct socket *sock;
 	struct mutex tx_lock;
+	struct request *pending;
+	int sent;
 };
 
 #define NBD_TIMEDOUT			0
@@ -202,7 +204,7 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
  *  Send or receive packet.
  */
 static int sock_xmit(struct nbd_device *nbd, int index, int send,
-		     struct iov_iter *iter, int msg_flags)
+		     struct iov_iter *iter, int msg_flags, int *sent)
 {
 	struct socket *sock = nbd->socks[index]->sock;
 	int result;
@@ -237,6 +239,8 @@ static int sock_xmit(struct nbd_device *nbd, int index, int send,
 				result = -EPIPE; /* short read */
 			break;
 		}
+		if (sent)
+			*sent += result;
 	} while (msg_data_left(&msg));
 
 	tsk_restore_flags(current, pflags, PF_MEMALLOC);
@@ -248,6 +252,7 @@ static int sock_xmit(struct nbd_device *nbd, int index, int send,
 static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
 {
 	struct request *req = blk_mq_rq_from_pdu(cmd);
+	struct nbd_sock *nsock = nbd->socks[index];
 	int result;
 	struct nbd_request request = {.magic = htonl(NBD_REQUEST_MAGIC)};
 	struct kvec iov = {.iov_base = &request, .iov_len = sizeof(request)};
@@ -256,6 +261,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
 	struct bio *bio;
 	u32 type;
 	u32 tag = blk_mq_unique_tag(req);
+	int sent = nsock->sent, skip = 0;
 
 	iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request));
 
@@ -283,6 +289,17 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
 		return -EIO;
 	}
 
+	/* We did a partial send previously, and we at least sent the whole
+	 * request struct, so just go and send the rest of the pages in the
+	 * request.
+	 */
+	if (sent) {
+		if (sent >= sizeof(request)) {
+			skip = sent - sizeof(request);
+			goto send_pages;
+		}
+		iov_iter_advance(&from, sent);
+	}
 	request.type = htonl(type);
 	if (type != NBD_CMD_FLUSH) {
 		request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
@@ -294,15 +311,27 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
 		cmd, nbdcmd_to_ascii(type),
 		(unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req));
 	result = sock_xmit(nbd, index, 1, &from,
-			(type == NBD_CMD_WRITE) ? MSG_MORE : 0);
+			(type == NBD_CMD_WRITE) ? MSG_MORE : 0, &sent);
 	if (result <= 0) {
+		if (result == -ERESTARTSYS) {
+			/* If we havne't sent anything we can just return BUSY,
+			 * however if we have sent something we need to make
+			 * sure we only allow this req to be sent until we are
+			 * completely done.
+			 */
+			if (sent) {
+				nsock->pending = req;
+				nsock->sent = sent;
+			}
+			return BLK_MQ_RQ_QUEUE_BUSY;
+		}
 		dev_err_ratelimited(disk_to_dev(nbd->disk),
 			"Send control failed (result %d)\n", result);
 		return -EIO;
 	}
-
+send_pages:
 	if (type != NBD_CMD_WRITE)
-		return 0;
+		goto out;
 
 	bio = req->bio;
 	while (bio) {
@@ -318,8 +347,25 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
 				cmd, bvec.bv_len);
 			iov_iter_bvec(&from, ITER_BVEC | WRITE,
 				      &bvec, 1, bvec.bv_len);
-			result = sock_xmit(nbd, index, 1, &from, flags);
+			if (skip) {
+				if (skip >= iov_iter_count(&from)) {
+					skip -= iov_iter_count(&from);
+					continue;
+				}
+				iov_iter_advance(&from, skip);
+				skip = 0;
+			}
+			result = sock_xmit(nbd, index, 1, &from, flags, &sent);
 			if (result <= 0) {
+				if (result == -ERESTARTSYS) {
+					/* We've already sent the header, we
+					 * have no choice but to set pending and
+					 * return BUSY.
+					 */
+					nsock->pending = req;
+					nsock->sent = sent;
+					return BLK_MQ_RQ_QUEUE_BUSY;
+				}
 				dev_err(disk_to_dev(nbd->disk),
 					"Send data failed (result %d)\n",
 					result);
@@ -336,6 +382,9 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
 		}
 		bio = next;
 	}
+out:
+	nsock->pending = NULL;
+	nsock->sent = 0;
 	return 0;
 }
 
@@ -353,7 +402,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
 
 	reply.magic = 0;
 	iov_iter_kvec(&to, READ | ITER_KVEC, &iov, 1, sizeof(reply));
-	result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL);
+	result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL);
 	if (result <= 0) {
 		if (!test_bit(NBD_DISCONNECTED, &nbd->runtime_flags) &&
 		    !test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags))
@@ -395,7 +444,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
 		rq_for_each_segment(bvec, req, iter) {
 			iov_iter_bvec(&to, ITER_BVEC | READ,
 				      &bvec, 1, bvec.bv_len);
-			result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL);
+			result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL);
 			if (result <= 0) {
 				dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n",
 					result);
@@ -482,22 +531,23 @@ static void nbd_clear_que(struct nbd_device *nbd)
 }
 
 
-static void nbd_handle_cmd(struct nbd_cmd *cmd, int index)
+static int nbd_handle_cmd(struct nbd_cmd *cmd, int index)
 {
 	struct request *req = blk_mq_rq_from_pdu(cmd);
 	struct nbd_device *nbd = cmd->nbd;
 	struct nbd_sock *nsock;
+	int ret;
 
 	if (index >= nbd->num_connections) {
 		dev_err_ratelimited(disk_to_dev(nbd->disk),
 				    "Attempted send on invalid socket\n");
-		goto error_out;
+		return -EINVAL;
 	}
 
 	if (test_bit(NBD_DISCONNECTED, &nbd->runtime_flags)) {
 		dev_err_ratelimited(disk_to_dev(nbd->disk),
 				    "Attempted send on closed socket\n");
-		goto error_out;
+		return -EINVAL;
 	}
 
 	req->errors = 0;
@@ -508,29 +558,30 @@ static void nbd_handle_cmd(struct nbd_cmd *cmd, int index)
 		mutex_unlock(&nsock->tx_lock);
 		dev_err_ratelimited(disk_to_dev(nbd->disk),
 				    "Attempted send on closed socket\n");
-		goto error_out;
+		return -EINVAL;
 	}
 
-	if (nbd_send_cmd(nbd, cmd, index) != 0) {
-		dev_err_ratelimited(disk_to_dev(nbd->disk),
-				    "Request send failed\n");
-		req->errors++;
-		nbd_end_request(cmd);
+	/* Handle the case that we have a pending request that was partially
+	 * transmitted that _has_ to be serviced first.  We need to call requeue
+	 * here so that it gets put _after_ the request that is already on the
+	 * dispatch list.
+	 */
+	if (unlikely(nsock->pending && nsock->pending != req)) {
+		blk_mq_requeue_request(req, true);
+		ret = 0;
+		goto out;
 	}
-
+	ret = nbd_send_cmd(nbd, cmd, index);
+out:
 	mutex_unlock(&nsock->tx_lock);
-
-	return;
-
-error_out:
-	req->errors++;
-	nbd_end_request(cmd);
+	return ret;
 }
 
 static int nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
 			const struct blk_mq_queue_data *bd)
 {
 	struct nbd_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
+	int ret;
 
 	/*
 	 * Since we look at the bio's to send the request over the network we
@@ -543,10 +594,20 @@ static int nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
 	 */
 	init_completion(&cmd->send_complete);
 	blk_mq_start_request(bd->rq);
-	nbd_handle_cmd(cmd, hctx->queue_num);
+
+	/* We can be called directly from the user space process, which means we
+	 * could possibly have signals pending so our sendmsg will fail.  In
+	 * this case we need to return that we are busy, otherwise error out as
+	 * appropriate.
+	 */
+	ret = nbd_handle_cmd(cmd, hctx->queue_num);
+	if (ret < 0)
+		ret = BLK_MQ_RQ_QUEUE_ERROR;
+	if (!ret)
+		ret = BLK_MQ_RQ_QUEUE_OK;
 	complete(&cmd->send_complete);
 
-	return BLK_MQ_RQ_QUEUE_OK;
+	return ret;
 }
 
 static int nbd_add_socket(struct nbd_device *nbd, struct block_device *bdev,
@@ -581,6 +642,8 @@ static int nbd_add_socket(struct nbd_device *nbd, struct block_device *bdev,
 
 	mutex_init(&nsock->tx_lock);
 	nsock->sock = sock;
+	nsock->pending = NULL;
+	nsock->sent = 0;
 	socks[nbd->num_connections++] = nsock;
 
 	if (max_part)
@@ -634,7 +697,7 @@ static void send_disconnects(struct nbd_device *nbd)
 
 	for (i = 0; i < nbd->num_connections; i++) {
 		iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request));
-		ret = sock_xmit(nbd, i, 1, &from, 0);
+		ret = sock_xmit(nbd, i, 1, &from, 0, NULL);
 		if (ret <= 0)
 			dev_err(disk_to_dev(nbd->disk),
 				"Send disconnect failed %d\n", ret);

From c103b4dac8f69ca55196afcd57c4cdd6d3ab88eb Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fb.com>
Date: Fri, 24 Mar 2017 14:08:27 -0400
Subject: [PATCH 1723/1911] nbd: set rq->errors to actual error code

We've been relying on the block layer to assume rq->errors being set
translates into -EIO.  I noticed in testing that sometimes this isn't
true, and really there's not much of a reason to have a counter instead
of just using -EIO.  So set it properly so we don't leak random numbers
to unsuspecting victims.

Signed-off-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/nbd.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 3d1fc37a83b1d2..dbc22f4bed3da5 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -192,7 +192,7 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
 
 	dev_err(nbd_to_dev(nbd), "Connection timed out, shutting down connection\n");
 	set_bit(NBD_TIMEDOUT, &nbd->runtime_flags);
-	req->errors++;
+	req->errors = -EIO;
 
 	mutex_lock(&nbd->config_lock);
 	sock_shutdown(nbd);
@@ -432,7 +432,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
 	if (ntohl(reply.error)) {
 		dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n",
 			ntohl(reply.error));
-		req->errors++;
+		req->errors = -EIO;
 		return cmd;
 	}
 
@@ -448,7 +448,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
 			if (result <= 0) {
 				dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n",
 					result);
-				req->errors++;
+				req->errors = -EIO;
 				return cmd;
 			}
 			dev_dbg(nbd_to_dev(nbd), "request %p: got %d bytes data\n",
@@ -518,7 +518,7 @@ static void nbd_clear_req(struct request *req, void *data, bool reserved)
 	if (!blk_mq_request_started(req))
 		return;
 	cmd = blk_mq_rq_to_pdu(req);
-	req->errors++;
+	req->errors = -EIO;
 	nbd_end_request(cmd);
 }
 

From f8586855031a1d6b243f013c3082631346fddfad Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fb.com>
Date: Fri, 24 Mar 2017 14:08:28 -0400
Subject: [PATCH 1724/1911] nbd: set queue timeout properly

We can't just set the timeout on the tagset, we have to set it on the
queue as it would have been setup already at this point.

Signed-off-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/nbd.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index dbc22f4bed3da5..b0003dab90b9ca 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -844,7 +844,10 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
 		nbd_size_set(nbd, bdev, nbd->blksize, arg);
 		return 0;
 	case NBD_SET_TIMEOUT:
-		nbd->tag_set.timeout = arg * HZ;
+		if (arg) {
+			nbd->tag_set.timeout = arg * HZ;
+			blk_queue_rq_timeout(nbd->disk->queue, arg * HZ);
+		}
 		return 0;
 
 	case NBD_SET_FLAGS:

From abbbdf12497d36b001e0865bc5bc6cc363f3a5e1 Mon Sep 17 00:00:00 2001
From: Ratna Manoj Bolla <manoj.br@gmail.com>
Date: Fri, 24 Mar 2017 14:08:29 -0400
Subject: [PATCH 1725/1911] nbd: replace kill_bdev() with __invalidate_device()

When a filesystem is mounted on a nbd device and on a disconnect, because
of kill_bdev(), and resetting bdev size to zero, buffer_head mappings are
getting destroyed under mounted filesystem.

After a bdev size reset(i.e bdev->bd_inode->i_size = 0) on a disconnect,
followed by a sys_umount(),
        generic_shutdown_super()->...
        ->__sync_blockdev()->...
        -blkdev_writepages()->...
        ->do_invalidatepage()->...
        -discard_buffer()   is discarding superblock buffer_head assumed
to be in mapped state by ext4_commit_super().

[mlin: ported to 4.11-rc2]
Signed-off-by: Ratna Manoj Bolla <manoj.br@gmail.com
Signed-off-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/nbd.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index b0003dab90b9ca..d8a23561b4cb4b 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -126,7 +126,8 @@ static const char *nbdcmd_to_ascii(int cmd)
 
 static int nbd_size_clear(struct nbd_device *nbd, struct block_device *bdev)
 {
-	bd_set_size(bdev, 0);
+	if (bdev->bd_openers <= 1)
+		bd_set_size(bdev, 0);
 	set_capacity(nbd->disk, 0);
 	kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE);
 
@@ -665,6 +666,8 @@ static void nbd_reset(struct nbd_device *nbd)
 
 static void nbd_bdev_reset(struct block_device *bdev)
 {
+	if (bdev->bd_openers > 1)
+		return;
 	set_device_ro(bdev, false);
 	bdev->bd_inode->i_size = 0;
 	if (max_part > 0) {
@@ -728,7 +731,8 @@ static int nbd_clear_sock(struct nbd_device *nbd, struct block_device *bdev)
 {
 	sock_shutdown(nbd);
 	nbd_clear_que(nbd);
-	kill_bdev(bdev);
+
+	__invalidate_device(bdev, true);
 	nbd_bdev_reset(bdev);
 	/*
 	 * We want to give the run thread a chance to wait for everybody

From 95f255211396958c718aef8c45e3923b5211ea7b Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Fri, 24 Mar 2017 09:38:03 -0700
Subject: [PATCH 1726/1911] net: Do not allow negative values for busy_read and
 busy_poll sysctl interfaces

This change basically codifies what I think was already the limitations on
the busy_poll and busy_read sysctl interfaces.  We weren't checking the
lower bounds and as such could input negative values. The behavior when
that was used was dependent on the architecture. In order to prevent any
issues with that I am just disabling support for values less than 0 since
this way we don't have to worry about any odd behaviors.

By limiting the sysctl values this way it also makes it consistent with how
we handle the SO_BUSY_POLL socket option since the value appears to be
reported as a signed integer value and negative values are rejected.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sysctl_net_core.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 4ead336e14ea0b..7f9cc400eca08c 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -408,14 +408,16 @@ static struct ctl_table net_core_table[] = {
 		.data		= &sysctl_net_busy_poll,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
 	},
 	{
 		.procname	= "busy_read",
 		.data		= &sysctl_net_busy_read,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
 	},
 #endif
 #ifdef CONFIG_NET_SCHED

From 6332dee83d8eab80d6d502cc51135b998fe6df79 Mon Sep 17 00:00:00 2001
From: Adit Ranadive <aditr@vmware.com>
Date: Wed, 22 Feb 2017 17:22:56 -0800
Subject: [PATCH 1727/1911] RDMA/vmw_pvrdma: Cleanup unused variables

Removed the unused nreq and redundant index variables.
Moved hardcoded async and cq ring pages number to macro.

Reported-by: Yuval Shaia <yuval.shaia@oracle.com>
Signed-off-by: Adit Ranadive <aditr@vmware.com>
Reviewed-by: Aditya Sarwade <asarwade@vmware.com>
Tested-by: Andrew Boyer <andrew.boyer@dell.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/vmw_pvrdma/pvrdma.h     |  2 ++
 .../infiniband/hw/vmw_pvrdma/pvrdma_main.c    |  4 +--
 drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c  | 33 ++++++++-----------
 3 files changed, 17 insertions(+), 22 deletions(-)

diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
index 3cd96c1b95029d..dbf61c37835c7e 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
@@ -69,6 +69,8 @@
  */
 #define PCI_DEVICE_ID_VMWARE_PVRDMA	0x0820
 
+#define PVRDMA_NUM_RING_PAGES		4
+
 struct pvrdma_dev;
 
 struct pvrdma_page_dir {
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
index 100bea5c42ffb7..e77476ca70ac8e 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
@@ -858,7 +858,7 @@ static int pvrdma_pci_probe(struct pci_dev *pdev,
 	dev->dsr->resp_slot_dma = (u64)slot_dma;
 
 	/* Async event ring */
-	dev->dsr->async_ring_pages.num_pages = 4;
+	dev->dsr->async_ring_pages.num_pages = PVRDMA_NUM_RING_PAGES;
 	ret = pvrdma_page_dir_init(dev, &dev->async_pdir,
 				   dev->dsr->async_ring_pages.num_pages, true);
 	if (ret)
@@ -867,7 +867,7 @@ static int pvrdma_pci_probe(struct pci_dev *pdev,
 	dev->dsr->async_ring_pages.pdir_dma = dev->async_pdir.dir_dma;
 
 	/* CQ notification ring */
-	dev->dsr->cq_ring_pages.num_pages = 4;
+	dev->dsr->cq_ring_pages.num_pages = PVRDMA_NUM_RING_PAGES;
 	ret = pvrdma_page_dir_init(dev, &dev->cq_pdir,
 				   dev->dsr->cq_ring_pages.num_pages, true);
 	if (ret)
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
index dbbfd35e7da7ad..3ffbb2d4217025 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
@@ -554,13 +554,13 @@ int pvrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 	return ret;
 }
 
-static inline void *get_sq_wqe(struct pvrdma_qp *qp, int n)
+static inline void *get_sq_wqe(struct pvrdma_qp *qp, unsigned int n)
 {
 	return pvrdma_page_dir_get_ptr(&qp->pdir,
 				       qp->sq.offset + n * qp->sq.wqe_size);
 }
 
-static inline void *get_rq_wqe(struct pvrdma_qp *qp, int n)
+static inline void *get_rq_wqe(struct pvrdma_qp *qp, unsigned int n)
 {
 	return pvrdma_page_dir_get_ptr(&qp->pdir,
 				       qp->rq.offset + n * qp->rq.wqe_size);
@@ -598,9 +598,7 @@ int pvrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 	unsigned long flags;
 	struct pvrdma_sq_wqe_hdr *wqe_hdr;
 	struct pvrdma_sge *sge;
-	int i, index;
-	int nreq;
-	int ret;
+	int i, ret;
 
 	/*
 	 * In states lower than RTS, we can fail immediately. In other states,
@@ -613,9 +611,8 @@ int pvrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 
 	spin_lock_irqsave(&qp->sq.lock, flags);
 
-	index = pvrdma_idx(&qp->sq.ring->prod_tail, qp->sq.wqe_cnt);
-	for (nreq = 0; wr; nreq++, wr = wr->next) {
-		unsigned int tail;
+	while (wr) {
+		unsigned int tail = 0;
 
 		if (unlikely(!pvrdma_idx_ring_has_space(
 				qp->sq.ring, qp->sq.wqe_cnt, &tail))) {
@@ -680,7 +677,7 @@ int pvrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 			}
 		}
 
-		wqe_hdr = (struct pvrdma_sq_wqe_hdr *)get_sq_wqe(qp, index);
+		wqe_hdr = (struct pvrdma_sq_wqe_hdr *)get_sq_wqe(qp, tail);
 		memset(wqe_hdr, 0, sizeof(*wqe_hdr));
 		wqe_hdr->wr_id = wr->wr_id;
 		wqe_hdr->num_sge = wr->num_sge;
@@ -771,12 +768,11 @@ int pvrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 		/* Make sure wqe is written before index update */
 		smp_wmb();
 
-		index++;
-		if (unlikely(index >= qp->sq.wqe_cnt))
-			index = 0;
 		/* Update shared sq ring */
 		pvrdma_idx_ring_inc(&qp->sq.ring->prod_tail,
 				    qp->sq.wqe_cnt);
+
+		wr = wr->next;
 	}
 
 	ret = 0;
@@ -806,7 +802,6 @@ int pvrdma_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
 	struct pvrdma_qp *qp = to_vqp(ibqp);
 	struct pvrdma_rq_wqe_hdr *wqe_hdr;
 	struct pvrdma_sge *sge;
-	int index, nreq;
 	int ret = 0;
 	int i;
 
@@ -821,9 +816,8 @@ int pvrdma_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
 
 	spin_lock_irqsave(&qp->rq.lock, flags);
 
-	index = pvrdma_idx(&qp->rq.ring->prod_tail, qp->rq.wqe_cnt);
-	for (nreq = 0; wr; nreq++, wr = wr->next) {
-		unsigned int tail;
+	while (wr) {
+		unsigned int tail = 0;
 
 		if (unlikely(wr->num_sge > qp->rq.max_sg ||
 			     wr->num_sge < 0)) {
@@ -843,7 +837,7 @@ int pvrdma_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
 			goto out;
 		}
 
-		wqe_hdr = (struct pvrdma_rq_wqe_hdr *)get_rq_wqe(qp, index);
+		wqe_hdr = (struct pvrdma_rq_wqe_hdr *)get_rq_wqe(qp, tail);
 		wqe_hdr->wr_id = wr->wr_id;
 		wqe_hdr->num_sge = wr->num_sge;
 		wqe_hdr->total_len = 0;
@@ -859,12 +853,11 @@ int pvrdma_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
 		/* Make sure wqe is written before index update */
 		smp_wmb();
 
-		index++;
-		if (unlikely(index >= qp->rq.wqe_cnt))
-			index = 0;
 		/* Update shared rq ring */
 		pvrdma_idx_ring_inc(&qp->rq.ring->prod_tail,
 				    qp->rq.wqe_cnt);
+
+		wr = wr->next;
 	}
 
 	spin_unlock_irqrestore(&qp->rq.lock, flags);

From e51c2fb0331cb3440d7dc83ee78019ee8c7bb366 Mon Sep 17 00:00:00 2001
From: Adit Ranadive <aditr@vmware.com>
Date: Wed, 22 Feb 2017 17:22:57 -0800
Subject: [PATCH 1728/1911] RDMA/vmw_pvrdma: Dont hardcode QP header page

Moved the header page count to a macro.

Reported-by: Yuval Shaia <yuval.shaia@oracle.com>
Signed-off-by: Adit Ranadive <aditr@vmware.com>
Reviewed-by: Aditya Sarwade <asarwade@vmware.com>
Tested-by: Andrew Boyer <andrew.boyer@dell.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/vmw_pvrdma/pvrdma.h    | 1 +
 drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c | 9 +++++----
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
index dbf61c37835c7e..9fbe22d3467b22 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
@@ -70,6 +70,7 @@
 #define PCI_DEVICE_ID_VMWARE_PVRDMA	0x0820
 
 #define PVRDMA_NUM_RING_PAGES		4
+#define PVRDMA_QP_NUM_HEADER_PAGES	1
 
 struct pvrdma_dev;
 
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
index 3ffbb2d4217025..30062aad3af1a2 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
@@ -170,8 +170,9 @@ static int pvrdma_set_sq_size(struct pvrdma_dev *dev, struct ib_qp_cap *req_cap,
 					     sizeof(struct pvrdma_sge) *
 					     qp->sq.max_sg);
 	/* Note: one extra page for the header. */
-	qp->npages_send = 1 + (qp->sq.wqe_cnt * qp->sq.wqe_size +
-			       PAGE_SIZE - 1) / PAGE_SIZE;
+	qp->npages_send = PVRDMA_QP_NUM_HEADER_PAGES +
+			  (qp->sq.wqe_cnt * qp->sq.wqe_size + PAGE_SIZE - 1) /
+								PAGE_SIZE;
 
 	return 0;
 }
@@ -288,7 +289,7 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
 			qp->npages = qp->npages_send + qp->npages_recv;
 
 			/* Skip header page. */
-			qp->sq.offset = PAGE_SIZE;
+			qp->sq.offset = PVRDMA_QP_NUM_HEADER_PAGES * PAGE_SIZE;
 
 			/* Recv queue pages are after send pages. */
 			qp->rq.offset = qp->npages_send * PAGE_SIZE;
@@ -341,7 +342,7 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
 	cmd->qp_type = ib_qp_type_to_pvrdma(init_attr->qp_type);
 	cmd->access_flags = IB_ACCESS_LOCAL_WRITE;
 	cmd->total_chunks = qp->npages;
-	cmd->send_chunks = qp->npages_send - 1;
+	cmd->send_chunks = qp->npages_send - PVRDMA_QP_NUM_HEADER_PAGES;
 	cmd->pdir_dma = qp->pdir.dir_dma;
 
 	dev_dbg(&dev->pdev->dev, "create queuepair with %d, %d, %d, %d\n",

From b172679b0d3b93a6197b2ff892794e7178e3c448 Mon Sep 17 00:00:00 2001
From: Aditya Sarwade <asarwade@vmware.com>
Date: Wed, 22 Feb 2017 17:22:58 -0800
Subject: [PATCH 1729/1911] RDMA/vmw_pvrdma: Activate device on ethernet link
 up

Restore device state when ethernet link changes to active.

Acked-by: George Zhang <georgezhang@vmware.com>
Acked-by: Jorgen Hansen <jhansen@vmware.com>
Acked-by: Bryan Tan <bryantan@vmware.com>
Signed-off-by: Aditya Sarwade <asarwade@vmware.com>
Signed-off-by: Adit Ranadive <aditr@vmware.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h |  2 +-
 drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c    | 13 +++++++++++--
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h
index e69d6f3cae32b5..09078ccfaec719 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h
@@ -132,7 +132,7 @@ enum pvrdma_pci_resource {
 
 enum pvrdma_device_ctl {
 	PVRDMA_DEVICE_CTL_ACTIVATE,	/* Activate device. */
-	PVRDMA_DEVICE_CTL_QUIESCE,	/* Quiesce device. */
+	PVRDMA_DEVICE_CTL_UNQUIESCE,	/* Unquiesce device. */
 	PVRDMA_DEVICE_CTL_RESET,	/* Reset device. */
 };
 
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
index e77476ca70ac8e..34ebc7615411d9 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
@@ -56,7 +56,7 @@
 #include "pvrdma.h"
 
 #define DRV_NAME	"vmw_pvrdma"
-#define DRV_VERSION	"1.0.0.0-k"
+#define DRV_VERSION	"1.0.1.0-k"
 
 static DEFINE_MUTEX(pvrdma_device_list_lock);
 static LIST_HEAD(pvrdma_device_list);
@@ -660,7 +660,16 @@ static void pvrdma_netdevice_event_handle(struct pvrdma_dev *dev,
 		pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ERR);
 		break;
 	case NETDEV_UP:
-		pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ACTIVE);
+		pvrdma_write_reg(dev, PVRDMA_REG_CTL,
+				 PVRDMA_DEVICE_CTL_UNQUIESCE);
+
+		mb();
+
+		if (pvrdma_read_reg(dev, PVRDMA_REG_ERR))
+			dev_err(&dev->pdev->dev,
+				"failed to activate device during link up\n");
+		else
+			pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ACTIVE);
 		break;
 	default:
 		dev_dbg(&dev->pdev->dev, "ignore netdevice event %ld on %s\n",

From ded260235308f340b979258a4c736e06ba12c747 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 8 Mar 2017 08:21:52 +0300
Subject: [PATCH 1730/1911] IB/rxe: double free on error

"goto err;" has it's own kfree_skb() call so it's a double free.  We
only need to free on the "goto exit;" path.

Fixes: 8700e3e7c485 ("Soft RoCE driver")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/sw/rxe/rxe_req.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index dbfde0dc6ff7e7..9f95f50b290904 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -729,11 +729,11 @@ int rxe_requester(void *arg)
 	ret = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp, &pkt, skb);
 	if (ret) {
 		qp->need_req_skb = 1;
-		kfree_skb(skb);
 
 		rollback_state(wqe, qp, &rollback_wqe, rollback_psn);
 
 		if (ret == -EAGAIN) {
+			kfree_skb(skb);
 			rxe_run_task(&qp->req.task, 1);
 			goto exit;
 		}

From 004d18ea99b0f3b7b3d5790e4dc5ca7d5238706d Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 23 Feb 2017 13:40:16 +0300
Subject: [PATCH 1731/1911] RDMA/ocrdma: fix a type issue in
 ocrdma_put_pd_num()

We want to return zero on success or negative error codes.  The type
should be int and not u8.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Yuval Shaia <yuval.shaia@oracle.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index bc9fb144e57b8e..c52edeafd616a3 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -372,7 +372,7 @@ static int _ocrdma_pd_mgr_put_bitmap(struct ocrdma_dev *dev, u16 pd_id,
 	return 0;
 }
 
-static u8 ocrdma_put_pd_num(struct ocrdma_dev *dev, u16 pd_id,
+static int ocrdma_put_pd_num(struct ocrdma_dev *dev, u16 pd_id,
 				   bool dpp_pool)
 {
 	int status;

From a1c5dd13228a1f9e5087375f9702422dfc2adbf1 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leon@kernel.org>
Date: Tue, 28 Feb 2017 21:42:53 +0200
Subject: [PATCH 1732/1911] IB/rxe: Update documentation link

All Soft-RoCE (rxe) is handled now in rdma-core user space library,
so the documentation. The patch below updates the documentation
link to that new location.

Reported-by: Josh Beavers <josh.beavers@gmail.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/sw/rxe/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/sw/rxe/Kconfig b/drivers/infiniband/sw/rxe/Kconfig
index 7d1ac27ed2516d..6332dedc11e8a3 100644
--- a/drivers/infiniband/sw/rxe/Kconfig
+++ b/drivers/infiniband/sw/rxe/Kconfig
@@ -22,4 +22,4 @@ config RDMA_RXE
 	To configure and work with soft-RoCE driver please use the
 	following wiki page under "configure Soft-RoCE (RXE)" section:
 
-	https://github.com/SoftRoCE/rxe-dev/wiki/rxe-dev:-Home
+	https://github.com/linux-rdma/rdma-core/blob/master/Documentation/rxe.md

From 0957c29f78af7d890c4ac506eda8f76bfc5a137a Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Tue, 7 Mar 2017 22:56:53 +0000
Subject: [PATCH 1733/1911] IB/core: Restore I/O MMU, s390 and powerpc support

Avoid that the following error message is reported on the console
while loading an RDMA driver with I/O MMU support enabled:

DMAR: Allocating domain for mlx5_0 failed

Ensure that DMA mapping operations that use to_pci_dev() to
access to struct pci_dev see the correct PCI device. E.g. the s390
and powerpc DMA mapping operations use to_pci_dev() even with I/O
MMU support disabled.

This patch preserves the following changes of the DMA mapping updates
patch series:
- Introduction of dma_virt_ops.
- Removal of ib_device.dma_ops.
- Removal of struct ib_dma_mapping_ops.
- Removal of an if-statement from each ib_dma_*() operation.
- IB HW drivers no longer set dma_device directly.

Reported-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Reported-by: Parav Pandit <parav@mellanox.com>
Fixes: commit 99db9494035f ("IB/core: Remove ib_device.dma_device")
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: parav@mellanox.com
Tested-by: parav@mellanox.com
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/core/device.c | 26 ++++++++++++++++++++------
 include/rdma/ib_verbs.h          | 30 +++++++++++++++++-------------
 2 files changed, 37 insertions(+), 19 deletions(-)

diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 593d2ce6ec7cec..addf869045cc9e 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -336,12 +336,26 @@ int ib_register_device(struct ib_device *device,
 	struct device *parent = device->dev.parent;
 
 	WARN_ON_ONCE(!parent);
-	if (!device->dev.dma_ops)
-		device->dev.dma_ops = parent->dma_ops;
-	if (!device->dev.dma_mask)
-		device->dev.dma_mask = parent->dma_mask;
-	if (!device->dev.coherent_dma_mask)
-		device->dev.coherent_dma_mask = parent->coherent_dma_mask;
+	WARN_ON_ONCE(device->dma_device);
+	if (device->dev.dma_ops) {
+		/*
+		 * The caller provided custom DMA operations. Copy the
+		 * DMA-related fields that are used by e.g. dma_alloc_coherent()
+		 * into device->dev.
+		 */
+		device->dma_device = &device->dev;
+		if (!device->dev.dma_mask)
+			device->dev.dma_mask = parent->dma_mask;
+		if (!device->dev.coherent_dma_mask)
+			device->dev.coherent_dma_mask =
+				parent->coherent_dma_mask;
+	} else {
+		/*
+		 * The caller did not provide custom DMA operations. Use the
+		 * DMA mapping operations of the parent device.
+		 */
+		device->dma_device = parent;
+	}
 
 	mutex_lock(&device_mutex);
 
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 0f1813c1368795..99e4423eb2b80b 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1863,6 +1863,9 @@ struct ib_port_immutable {
 };
 
 struct ib_device {
+	/* Do not access @dma_device directly from ULP nor from HW drivers. */
+	struct device                *dma_device;
+
 	char                          name[IB_DEVICE_NAME_MAX];
 
 	struct list_head              event_handler_list;
@@ -3007,7 +3010,7 @@ static inline int ib_req_ncomp_notif(struct ib_cq *cq, int wc_cnt)
  */
 static inline int ib_dma_mapping_error(struct ib_device *dev, u64 dma_addr)
 {
-	return dma_mapping_error(&dev->dev, dma_addr);
+	return dma_mapping_error(dev->dma_device, dma_addr);
 }
 
 /**
@@ -3021,7 +3024,7 @@ static inline u64 ib_dma_map_single(struct ib_device *dev,
 				    void *cpu_addr, size_t size,
 				    enum dma_data_direction direction)
 {
-	return dma_map_single(&dev->dev, cpu_addr, size, direction);
+	return dma_map_single(dev->dma_device, cpu_addr, size, direction);
 }
 
 /**
@@ -3035,7 +3038,7 @@ static inline void ib_dma_unmap_single(struct ib_device *dev,
 				       u64 addr, size_t size,
 				       enum dma_data_direction direction)
 {
-	dma_unmap_single(&dev->dev, addr, size, direction);
+	dma_unmap_single(dev->dma_device, addr, size, direction);
 }
 
 /**
@@ -3052,7 +3055,7 @@ static inline u64 ib_dma_map_page(struct ib_device *dev,
 				  size_t size,
 					 enum dma_data_direction direction)
 {
-	return dma_map_page(&dev->dev, page, offset, size, direction);
+	return dma_map_page(dev->dma_device, page, offset, size, direction);
 }
 
 /**
@@ -3066,7 +3069,7 @@ static inline void ib_dma_unmap_page(struct ib_device *dev,
 				     u64 addr, size_t size,
 				     enum dma_data_direction direction)
 {
-	dma_unmap_page(&dev->dev, addr, size, direction);
+	dma_unmap_page(dev->dma_device, addr, size, direction);
 }
 
 /**
@@ -3080,7 +3083,7 @@ static inline int ib_dma_map_sg(struct ib_device *dev,
 				struct scatterlist *sg, int nents,
 				enum dma_data_direction direction)
 {
-	return dma_map_sg(&dev->dev, sg, nents, direction);
+	return dma_map_sg(dev->dma_device, sg, nents, direction);
 }
 
 /**
@@ -3094,7 +3097,7 @@ static inline void ib_dma_unmap_sg(struct ib_device *dev,
 				   struct scatterlist *sg, int nents,
 				   enum dma_data_direction direction)
 {
-	dma_unmap_sg(&dev->dev, sg, nents, direction);
+	dma_unmap_sg(dev->dma_device, sg, nents, direction);
 }
 
 static inline int ib_dma_map_sg_attrs(struct ib_device *dev,
@@ -3102,7 +3105,8 @@ static inline int ib_dma_map_sg_attrs(struct ib_device *dev,
 				      enum dma_data_direction direction,
 				      unsigned long dma_attrs)
 {
-	return dma_map_sg_attrs(&dev->dev, sg, nents, direction, dma_attrs);
+	return dma_map_sg_attrs(dev->dma_device, sg, nents, direction,
+				dma_attrs);
 }
 
 static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev,
@@ -3110,7 +3114,7 @@ static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev,
 					 enum dma_data_direction direction,
 					 unsigned long dma_attrs)
 {
-	dma_unmap_sg_attrs(&dev->dev, sg, nents, direction, dma_attrs);
+	dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction, dma_attrs);
 }
 /**
  * ib_sg_dma_address - Return the DMA address from a scatter/gather entry
@@ -3152,7 +3156,7 @@ static inline void ib_dma_sync_single_for_cpu(struct ib_device *dev,
 					      size_t size,
 					      enum dma_data_direction dir)
 {
-	dma_sync_single_for_cpu(&dev->dev, addr, size, dir);
+	dma_sync_single_for_cpu(dev->dma_device, addr, size, dir);
 }
 
 /**
@@ -3167,7 +3171,7 @@ static inline void ib_dma_sync_single_for_device(struct ib_device *dev,
 						 size_t size,
 						 enum dma_data_direction dir)
 {
-	dma_sync_single_for_device(&dev->dev, addr, size, dir);
+	dma_sync_single_for_device(dev->dma_device, addr, size, dir);
 }
 
 /**
@@ -3182,7 +3186,7 @@ static inline void *ib_dma_alloc_coherent(struct ib_device *dev,
 					   dma_addr_t *dma_handle,
 					   gfp_t flag)
 {
-	return dma_alloc_coherent(&dev->dev, size, dma_handle, flag);
+	return dma_alloc_coherent(dev->dma_device, size, dma_handle, flag);
 }
 
 /**
@@ -3196,7 +3200,7 @@ static inline void ib_dma_free_coherent(struct ib_device *dev,
 					size_t size, void *cpu_addr,
 					dma_addr_t dma_handle)
 {
-	dma_free_coherent(&dev->dev, size, cpu_addr, dma_handle);
+	dma_free_coherent(dev->dma_device, size, cpu_addr, dma_handle);
 }
 
 /**

From 812755d69efcf7c12fded57983d456647a0bbadd Mon Sep 17 00:00:00 2001
From: "Dmitry V. Levin" <ldv@altlinux.org>
Date: Fri, 24 Feb 2017 03:28:13 +0300
Subject: [PATCH 1734/1911] uapi: fix rdma/mlx5-abi.h userspace compilation
 errors

Consistently use types from linux/types.h to fix the following
rdma/mlx5-abi.h userspace compilation errors:

/usr/include/rdma/mlx5-abi.h:69:25: error: 'u64' undeclared here (not in a function)
  MLX5_LIB_CAP_4K_UAR = (u64)1 << 0,
/usr/include/rdma/mlx5-abi.h:69:29: error: expected ',' or '}' before numeric constant
  MLX5_LIB_CAP_4K_UAR = (u64)1 << 0,

Include <linux/if_ether.h> to fix the following rdma/mlx5-abi.h
userspace compilation error:

/usr/include/rdma/mlx5-abi.h:286:12: error: 'ETH_ALEN' undeclared here (not in a function)
  __u8 dmac[ETH_ALEN];

Signed-off-by: Dmitry V. Levin <ldv@altlinux.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 include/uapi/rdma/mlx5-abi.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h
index da7cd62bace746..0b3d30837a9f64 100644
--- a/include/uapi/rdma/mlx5-abi.h
+++ b/include/uapi/rdma/mlx5-abi.h
@@ -34,6 +34,7 @@
 #define MLX5_ABI_USER_H
 
 #include <linux/types.h>
+#include <linux/if_ether.h>	/* For ETH_ALEN. */
 
 enum {
 	MLX5_QP_FLAG_SIGNATURE		= 1 << 0,
@@ -66,7 +67,7 @@ struct mlx5_ib_alloc_ucontext_req {
 };
 
 enum mlx5_lib_caps {
-	MLX5_LIB_CAP_4K_UAR	= (u64)1 << 0,
+	MLX5_LIB_CAP_4K_UAR	= (__u64)1 << 0,
 };
 
 struct mlx5_ib_alloc_ucontext_req_v2 {

From 9fcd67d1772c43d2f23e8fca56acc7219e991676 Mon Sep 17 00:00:00 2001
From: David Marchand <david.marchand@6wind.com>
Date: Fri, 24 Feb 2017 15:38:26 +0100
Subject: [PATCH 1735/1911] IB/rxe: increment msn only when completing a
 request

According to C9-147, MSN should only be incremented when the last packet of
a multi packet request has been received.

"Logically, the requester associates a sequential Send Sequence Number
(SSN) with each WQE posted to the send queue. The SSN bears a one-
to-one relationship to the MSN returned by the responder in each re-
sponse packet. Therefore, when the requester receives a response, it in-
terprets the MSN as representing the SSN of the most recent request
completed by the responder to determine which send WQE(s) can be
completed."

Fixes: 8700e3e7c485 ("Soft RoCE driver")

Signed-off-by: David Marchand <david.marchand@6wind.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/sw/rxe/rxe_resp.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index d404a8aba7afca..c9dd385ce62e2c 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -813,18 +813,17 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
 		WARN_ON_ONCE(1);
 	}
 
-	/* We successfully processed this new request. */
-	qp->resp.msn++;
-
 	/* next expected psn, read handles this separately */
 	qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
 
 	qp->resp.opcode = pkt->opcode;
 	qp->resp.status = IB_WC_SUCCESS;
 
-	if (pkt->mask & RXE_COMP_MASK)
+	if (pkt->mask & RXE_COMP_MASK) {
+		/* We successfully processed this new request. */
+		qp->resp.msn++;
 		return RESPST_COMPLETE;
-	else if (qp_type(qp) == IB_QPT_RC)
+	} else if (qp_type(qp) == IB_QPT_RC)
 		return RESPST_ACKNOWLEDGE;
 	else
 		return RESPST_CLEANUP;

From fedd9e1f75828992ace5833379116f38c66ad7bb Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Thu, 16 Mar 2017 18:57:00 +0200
Subject: [PATCH 1736/1911] IB/cq: Don't process more than the given budget

The caller might not want this overhead.

Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Yuval Shaia <yuval.shaia@oracle.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/core/cq.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c
index 2746d2eb3d52c4..f2ae75fa3128b9 100644
--- a/drivers/infiniband/core/cq.c
+++ b/drivers/infiniband/core/cq.c
@@ -29,7 +29,13 @@ static int __ib_process_cq(struct ib_cq *cq, int budget)
 {
 	int i, n, completed = 0;
 
-	while ((n = ib_poll_cq(cq, IB_POLL_BATCH, cq->wc)) > 0) {
+	/*
+	 * budget might be (-1) if the caller does not
+	 * want to bound this call, thus we need unsigned
+	 * minimum here.
+	 */
+	while ((n = ib_poll_cq(cq, min_t(u32, IB_POLL_BATCH,
+			budget - completed), cq->wc)) > 0) {
 		for (i = 0; i < n; i++) {
 			struct ib_wc *wc = &cq->wc[i];
 

From b7363e67b23e04c23c2a99437feefac7292a88bc Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Wed, 8 Mar 2017 22:03:17 +0200
Subject: [PATCH 1737/1911] IB/device: Convert ib-comp-wq to be CPU-bound

This workqueue is used by our storage target mode ULPs
via the new CQ API. Recent observations when working
with very high-end flash storage devices reveal that
UNBOUND workqueue threads can migrate between cpu cores
and even numa nodes (although some numa locality is accounted
for).

While this attribute can be useful in some workloads,
it does not fit in very nicely with the normal
run-to-completion model we usually use in our target-mode
ULPs and the block-mq irq<->cpu affinity facilities.

The whole block-mq concept is that the completion will
land on the same cpu where the submission was performed.
The fact that our submitter thread is migrating cpus
can break this locality.

We assume that as a target mode ULP, we will serve multiple
initiators/clients and we can spread the load enough without
having to use unbound kworkers.

Also, while we're at it, expose this workqueue via sysfs which
is harmless and can be useful for debug.

Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com>--
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/core/device.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index addf869045cc9e..7c9e34d679d325 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -1029,8 +1029,7 @@ static int __init ib_core_init(void)
 		return -ENOMEM;
 
 	ib_comp_wq = alloc_workqueue("ib-comp-wq",
-			WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM,
-			WQ_UNBOUND_MAX_ACTIVE);
+			WQ_HIGHPRI | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
 	if (!ib_comp_wq) {
 		ret = -ENOMEM;
 		goto err;

From 28ee1b746f493b7c62347d714f58fbf4f70df4f0 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 24 Mar 2017 19:42:37 +0100
Subject: [PATCH 1738/1911] secure_seq: downgrade to per-host timestamp offsets

Unfortunately too many devices (not under our control) use tcp_tw_recycle=1,
which depends on timestamps being identical of the same saddr.

Although tcp_tw_recycle got removed in net-next we can't make
such end hosts disappear so downgrade to per-host timestamp offsets.

Cc: Soheil Hassas Yeganeh <soheil@google.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Reported-by: Yvan Vanrossomme <yvan@vanrossomme.net>
Fixes: 95a22caee396c ("tcp: randomize tcp timestamp offsets for each connection")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/secure_seq.c | 31 +++++++++++++++++++++++++++++--
 1 file changed, 29 insertions(+), 2 deletions(-)

diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index 758f140b6bedc5..d28da7d363f170 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -20,9 +20,11 @@
 #include <net/tcp.h>
 
 static siphash_key_t net_secret __read_mostly;
+static siphash_key_t ts_secret __read_mostly;
 
 static __always_inline void net_secret_init(void)
 {
+	net_get_random_once(&ts_secret, sizeof(ts_secret));
 	net_get_random_once(&net_secret, sizeof(net_secret));
 }
 #endif
@@ -45,6 +47,23 @@ static u32 seq_scale(u32 seq)
 #endif
 
 #if IS_ENABLED(CONFIG_IPV6)
+static u32 secure_tcpv6_ts_off(const __be32 *saddr, const __be32 *daddr)
+{
+	const struct {
+		struct in6_addr saddr;
+		struct in6_addr daddr;
+	} __aligned(SIPHASH_ALIGNMENT) combined = {
+		.saddr = *(struct in6_addr *)saddr,
+		.daddr = *(struct in6_addr *)daddr,
+	};
+
+	if (sysctl_tcp_timestamps != 1)
+		return 0;
+
+	return siphash(&combined, offsetofend(typeof(combined), daddr),
+		       &ts_secret);
+}
+
 u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr,
 				 __be16 sport, __be16 dport, u32 *tsoff)
 {
@@ -63,7 +82,7 @@ u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr,
 	net_secret_init();
 	hash = siphash(&combined, offsetofend(typeof(combined), dport),
 		       &net_secret);
-	*tsoff = sysctl_tcp_timestamps == 1 ? (hash >> 32) : 0;
+	*tsoff = secure_tcpv6_ts_off(saddr, daddr);
 	return seq_scale(hash);
 }
 EXPORT_SYMBOL(secure_tcpv6_sequence_number);
@@ -88,6 +107,14 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral);
 #endif
 
 #ifdef CONFIG_INET
+static u32 secure_tcp_ts_off(__be32 saddr, __be32 daddr)
+{
+	if (sysctl_tcp_timestamps != 1)
+		return 0;
+
+	return siphash_2u32((__force u32)saddr, (__force u32)daddr,
+			    &ts_secret);
+}
 
 /* secure_tcp_sequence_number(a, b, 0, d) == secure_ipv4_port_ephemeral(a, b, d),
  * but fortunately, `sport' cannot be 0 in any circumstances. If this changes,
@@ -103,7 +130,7 @@ u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
 	hash = siphash_3u32((__force u32)saddr, (__force u32)daddr,
 			    (__force u32)sport << 16 | (__force u32)dport,
 			    &net_secret);
-	*tsoff = sysctl_tcp_timestamps == 1 ? (hash >> 32) : 0;
+	*tsoff = secure_tcp_ts_off(saddr, daddr);
 	return seq_scale(hash);
 }
 

From 13a8cd191a2b470cfd435b3b57dbd21aa65ff78c Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Fri, 24 Mar 2017 15:01:42 -0700
Subject: [PATCH 1739/1911] i40e: Do not enable NAPI on q_vectors that have no
 rings

When testing the epoll w/ busy poll code I found that I could get into a
state where the i40e driver had q_vectors w/ active NAPI that had no rings.
This was resulting in a divide by zero error.  To correct it I am updating
the driver code so that we only support NAPI on q_vectors that have 1 or
more rings allocated to them.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/intel/i40e/i40e_main.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index e8a8351c8ea998..82a95cc2c8ee38 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -4438,8 +4438,12 @@ static void i40e_napi_enable_all(struct i40e_vsi *vsi)
 	if (!vsi->netdev)
 		return;
 
-	for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++)
-		napi_enable(&vsi->q_vectors[q_idx]->napi);
+	for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++) {
+		struct i40e_q_vector *q_vector = vsi->q_vectors[q_idx];
+
+		if (q_vector->rx.ring || q_vector->tx.ring)
+			napi_enable(&q_vector->napi);
+	}
 }
 
 /**
@@ -4453,8 +4457,12 @@ static void i40e_napi_disable_all(struct i40e_vsi *vsi)
 	if (!vsi->netdev)
 		return;
 
-	for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++)
-		napi_disable(&vsi->q_vectors[q_idx]->napi);
+	for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++) {
+		struct i40e_q_vector *q_vector = vsi->q_vectors[q_idx];
+
+		if (q_vector->rx.ring || q_vector->tx.ring)
+			napi_disable(&q_vector->napi);
+	}
 }
 
 /**

From ea174c9573b0e0c8bc1a7a90fe9360ccb7aa9cbb Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Mon, 27 Feb 2017 20:16:33 +0200
Subject: [PATCH 1740/1911] RDMA/iser: Fix possible mr leak on device removal
 event

When the rdma device is removed, we must cleanup all
the rdma resources within the DEVICE_REMOVAL event
handler to let the device teardown gracefully. When
this happens with live I/O, some memory regions are
occupied. Thus, track them too and dereg all the mr's.

We are safe with mr access by iscsi_iser_cleanup_task.

Reported-by: Raju Rangoju <rajur@chelsio.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Max Gurtovoy <maxg@mellanox.com>
Reviewed-by: Max Gurtovoy <maxg@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/ulp/iser/iscsi_iser.h | 2 ++
 drivers/infiniband/ulp/iser/iser_verbs.c | 8 +++++---
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index 9d0b22ad58c157..c1ae4aeae2f90e 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -430,6 +430,7 @@ struct iser_fr_desc {
 	struct list_head		  list;
 	struct iser_reg_resources	  rsc;
 	struct iser_pi_context		 *pi_ctx;
+	struct list_head                  all_list;
 };
 
 /**
@@ -443,6 +444,7 @@ struct iser_fr_pool {
 	struct list_head        list;
 	spinlock_t              lock;
 	int                     size;
+	struct list_head        all_list;
 };
 
 /**
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 30b622f2ab7382..c538a38c91ce95 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -362,6 +362,7 @@ int iser_alloc_fastreg_pool(struct ib_conn *ib_conn,
 	int i, ret;
 
 	INIT_LIST_HEAD(&fr_pool->list);
+	INIT_LIST_HEAD(&fr_pool->all_list);
 	spin_lock_init(&fr_pool->lock);
 	fr_pool->size = 0;
 	for (i = 0; i < cmds_max; i++) {
@@ -373,6 +374,7 @@ int iser_alloc_fastreg_pool(struct ib_conn *ib_conn,
 		}
 
 		list_add_tail(&desc->list, &fr_pool->list);
+		list_add_tail(&desc->all_list, &fr_pool->all_list);
 		fr_pool->size++;
 	}
 
@@ -392,13 +394,13 @@ void iser_free_fastreg_pool(struct ib_conn *ib_conn)
 	struct iser_fr_desc *desc, *tmp;
 	int i = 0;
 
-	if (list_empty(&fr_pool->list))
+	if (list_empty(&fr_pool->all_list))
 		return;
 
 	iser_info("freeing conn %p fr pool\n", ib_conn);
 
-	list_for_each_entry_safe(desc, tmp, &fr_pool->list, list) {
-		list_del(&desc->list);
+	list_for_each_entry_safe(desc, tmp, &fr_pool->all_list, all_list) {
+		list_del(&desc->all_list);
 		iser_free_reg_res(&desc->rsc);
 		if (desc->pi_ctx)
 			iser_free_pi_ctx(desc->pi_ctx);

From f6aafac184a3e46e919769dd4faa8bf0dc436534 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 14 Mar 2017 13:18:45 +0100
Subject: [PATCH 1741/1911] IB/qib: fix false-postive maybe-uninitialized
 warning

aarch64-linux-gcc-7 complains about code it doesn't fully understand:

drivers/infiniband/hw/qib/qib_iba7322.c: In function 'qib_7322_txchk_change':
include/asm-generic/bitops/non-atomic.h:105:35: error: 'shadow' may be used uninitialized in this function [-Werror=maybe-uninitialized]

The code is right, and despite trying hard, I could not come up with a version
that I liked better than just adding a fake initialization here to shut up the
warning.

Fixes: f931551bafe1 ("IB/qib: Add new qib driver for QLogic PCIe InfiniBand adapters")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Ira Weiny <ira.weiny@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/qib/qib_iba7322.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
index 12c4208fd7013b..af9f596bb68b29 100644
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -7068,7 +7068,7 @@ static void qib_7322_txchk_change(struct qib_devdata *dd, u32 start,
 	unsigned long flags;
 
 	while (wait) {
-		unsigned long shadow;
+		unsigned long shadow = 0;
 		int cstart, previ = -1;
 
 		/*

From 43a6684519ab0a6c52024b5e25322476cabad893 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 24 Mar 2017 19:36:13 -0700
Subject: [PATCH 1742/1911] ping: implement proper locking

We got a report of yet another bug in ping

http://www.openwall.com/lists/oss-security/2017/03/24/6

->disconnect() is not called with socket lock held.

Fix this by acquiring ping rwlock earlier.

Thanks to Daniel, Alexander and Andrey for letting us know this problem.

Fixes: c319b4d76b9e ("net: ipv4: add IPPROTO_ICMP socket kind")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Daniel Jiang <danieljiang0415@gmail.com>
Reported-by: Solar Designer <solar@openwall.com>
Reported-by: Andrey Konovalov <andreyknvl@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ping.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 2af6244b83e27a..ccfbce13a6333a 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -156,17 +156,18 @@ int ping_hash(struct sock *sk)
 void ping_unhash(struct sock *sk)
 {
 	struct inet_sock *isk = inet_sk(sk);
+
 	pr_debug("ping_unhash(isk=%p,isk->num=%u)\n", isk, isk->inet_num);
+	write_lock_bh(&ping_table.lock);
 	if (sk_hashed(sk)) {
-		write_lock_bh(&ping_table.lock);
 		hlist_nulls_del(&sk->sk_nulls_node);
 		sk_nulls_node_init(&sk->sk_nulls_node);
 		sock_put(sk);
 		isk->inet_num = 0;
 		isk->inet_sport = 0;
 		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
-		write_unlock_bh(&ping_table.lock);
 	}
+	write_unlock_bh(&ping_table.lock);
 }
 EXPORT_SYMBOL_GPL(ping_unhash);
 

From b1977682a3858b5584ffea7cfb7bd863f68db18d Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@fb.com>
Date: Fri, 24 Mar 2017 15:57:33 -0700
Subject: [PATCH 1743/1911] bpf: improve verifier packet range checks

llvm can optimize the 'if (ptr > data_end)' checks to be in the order
slightly different than the original C code which will confuse verifier.
Like:
if (ptr + 16 > data_end)
  return TC_ACT_SHOT;
// may be followed by
if (ptr + 14 > data_end)
  return TC_ACT_SHOT;
while llvm can see that 'ptr' is valid for all 16 bytes,
the verifier could not.
Fix verifier logic to account for such case and add a test.

Reported-by: Huapeng Zhou <hzhou@fb.com>
Fixes: 969bf05eb3ce ("bpf: direct packet access")
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/bpf/verifier.c                       |  5 +++--
 tools/testing/selftests/bpf/test_verifier.c | 20 ++++++++++++++++++++
 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 796b68d001198a..5e6202e62265fd 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1973,14 +1973,15 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state,
 
 	for (i = 0; i < MAX_BPF_REG; i++)
 		if (regs[i].type == PTR_TO_PACKET && regs[i].id == dst_reg->id)
-			regs[i].range = dst_reg->off;
+			/* keep the maximum range already checked */
+			regs[i].range = max(regs[i].range, dst_reg->off);
 
 	for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) {
 		if (state->stack_slot_type[i] != STACK_SPILL)
 			continue;
 		reg = &state->spilled_regs[i / BPF_REG_SIZE];
 		if (reg->type == PTR_TO_PACKET && reg->id == dst_reg->id)
-			reg->range = dst_reg->off;
+			reg->range = max(reg->range, dst_reg->off);
 	}
 }
 
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index d1555e4240c0fb..7d761d4cc75907 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -3417,6 +3417,26 @@ static struct bpf_test tests[] = {
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_LWT_XMIT,
 	},
+	{
+		"overlapping checks for direct packet access",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
+			BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_2, 6),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_LWT_XMIT,
+	},
 	{
 		"invalid access of tc_classid for LWT_IN",
 		.insns = {

From dac7a4b4b1f664934e8b713f529b629f67db313c Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Sat, 25 Mar 2017 17:22:47 -0400
Subject: [PATCH 1744/1911] ext4: lock the xattr block before checksuming it

We must lock the xattr block before calculating or verifying the
checksum in order to avoid spurious checksum failures.

https://bugzilla.kernel.org/show_bug.cgi?id=193661

Reported-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@vger.kernel.org
---
 fs/ext4/xattr.c | 65 +++++++++++++++++++++++--------------------------
 1 file changed, 31 insertions(+), 34 deletions(-)

diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 67636acf762475..996e7900d4c8ea 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -131,31 +131,26 @@ static __le32 ext4_xattr_block_csum(struct inode *inode,
 }
 
 static int ext4_xattr_block_csum_verify(struct inode *inode,
-					sector_t block_nr,
-					struct ext4_xattr_header *hdr)
+					struct buffer_head *bh)
 {
-	if (ext4_has_metadata_csum(inode->i_sb) &&
-	    (hdr->h_checksum != ext4_xattr_block_csum(inode, block_nr, hdr)))
-		return 0;
-	return 1;
-}
-
-static void ext4_xattr_block_csum_set(struct inode *inode,
-				      sector_t block_nr,
-				      struct ext4_xattr_header *hdr)
-{
-	if (!ext4_has_metadata_csum(inode->i_sb))
-		return;
+	struct ext4_xattr_header *hdr = BHDR(bh);
+	int ret = 1;
 
-	hdr->h_checksum = ext4_xattr_block_csum(inode, block_nr, hdr);
+	if (ext4_has_metadata_csum(inode->i_sb)) {
+		lock_buffer(bh);
+		ret = (hdr->h_checksum == ext4_xattr_block_csum(inode,
+							bh->b_blocknr, hdr));
+		unlock_buffer(bh);
+	}
+	return ret;
 }
 
-static inline int ext4_handle_dirty_xattr_block(handle_t *handle,
-						struct inode *inode,
-						struct buffer_head *bh)
+static void ext4_xattr_block_csum_set(struct inode *inode,
+				      struct buffer_head *bh)
 {
-	ext4_xattr_block_csum_set(inode, bh->b_blocknr, BHDR(bh));
-	return ext4_handle_dirty_metadata(handle, inode, bh);
+	if (ext4_has_metadata_csum(inode->i_sb))
+		BHDR(bh)->h_checksum = ext4_xattr_block_csum(inode,
+						bh->b_blocknr, BHDR(bh));
 }
 
 static inline const struct xattr_handler *
@@ -233,7 +228,7 @@ ext4_xattr_check_block(struct inode *inode, struct buffer_head *bh)
 	if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) ||
 	    BHDR(bh)->h_blocks != cpu_to_le32(1))
 		return -EFSCORRUPTED;
-	if (!ext4_xattr_block_csum_verify(inode, bh->b_blocknr, BHDR(bh)))
+	if (!ext4_xattr_block_csum_verify(inode, bh))
 		return -EFSBADCRC;
 	error = ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size,
 				       bh->b_data);
@@ -618,23 +613,22 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
 			}
 		}
 
+		ext4_xattr_block_csum_set(inode, bh);
 		/*
 		 * Beware of this ugliness: Releasing of xattr block references
 		 * from different inodes can race and so we have to protect
 		 * from a race where someone else frees the block (and releases
 		 * its journal_head) before we are done dirtying the buffer. In
 		 * nojournal mode this race is harmless and we actually cannot
-		 * call ext4_handle_dirty_xattr_block() with locked buffer as
+		 * call ext4_handle_dirty_metadata() with locked buffer as
 		 * that function can call sync_dirty_buffer() so for that case
 		 * we handle the dirtying after unlocking the buffer.
 		 */
 		if (ext4_handle_valid(handle))
-			error = ext4_handle_dirty_xattr_block(handle, inode,
-							      bh);
+			error = ext4_handle_dirty_metadata(handle, inode, bh);
 		unlock_buffer(bh);
 		if (!ext4_handle_valid(handle))
-			error = ext4_handle_dirty_xattr_block(handle, inode,
-							      bh);
+			error = ext4_handle_dirty_metadata(handle, inode, bh);
 		if (IS_SYNC(inode))
 			ext4_handle_sync(handle);
 		dquot_free_block(inode, EXT4_C2B(EXT4_SB(inode->i_sb), 1));
@@ -863,13 +857,14 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
 				ext4_xattr_cache_insert(ext4_mb_cache,
 					bs->bh);
 			}
+			ext4_xattr_block_csum_set(inode, bs->bh);
 			unlock_buffer(bs->bh);
 			if (error == -EFSCORRUPTED)
 				goto bad_block;
 			if (!error)
-				error = ext4_handle_dirty_xattr_block(handle,
-								      inode,
-								      bs->bh);
+				error = ext4_handle_dirty_metadata(handle,
+								   inode,
+								   bs->bh);
 			if (error)
 				goto cleanup;
 			goto inserted;
@@ -967,10 +962,11 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
 					ce->e_reusable = 0;
 				ea_bdebug(new_bh, "reusing; refcount now=%d",
 					  ref);
+				ext4_xattr_block_csum_set(inode, new_bh);
 				unlock_buffer(new_bh);
-				error = ext4_handle_dirty_xattr_block(handle,
-								      inode,
-								      new_bh);
+				error = ext4_handle_dirty_metadata(handle,
+								   inode,
+								   new_bh);
 				if (error)
 					goto cleanup_dquot;
 			}
@@ -1020,11 +1016,12 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
 				goto getblk_failed;
 			}
 			memcpy(new_bh->b_data, s->base, new_bh->b_size);
+			ext4_xattr_block_csum_set(inode, new_bh);
 			set_buffer_uptodate(new_bh);
 			unlock_buffer(new_bh);
 			ext4_xattr_cache_insert(ext4_mb_cache, new_bh);
-			error = ext4_handle_dirty_xattr_block(handle,
-							      inode, new_bh);
+			error = ext4_handle_dirty_metadata(handle, inode,
+							   new_bh);
 			if (error)
 				goto cleanup;
 		}

From d67d64f423147cf4fe8212658255e1160a4ef02c Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Sat, 25 Mar 2017 17:33:31 -0400
Subject: [PATCH 1745/1911] ext4: fix two spelling nits

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/inode.c       | 2 +-
 fs/ext4/move_extent.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index f622d4a577e35d..f303d3a7f44ae2 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5400,7 +5400,7 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
 	 * If there is inline data in the inode, the inode will normally not
 	 * have data blocks allocated (it may have an external xattr block).
 	 * Report at least one sector for such files, so tools like tar, rsync,
-	 * others doen't incorrectly think the file is completely sparse.
+	 * others don't incorrectly think the file is completely sparse.
 	 */
 	if (unlikely(ext4_has_inline_data(inode)))
 		stat->blocks += (stat->size + 511) >> 9;
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 6fc14def0c7072..615bc03d0fbdf3 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -511,7 +511,7 @@ mext_check_arguments(struct inode *orig_inode,
 	if ((orig_start & ~(PAGE_MASK >> orig_inode->i_blkbits)) !=
 	    (donor_start & ~(PAGE_MASK >> orig_inode->i_blkbits))) {
 		ext4_debug("ext4 move extent: orig and donor's start "
-			"offset are not alligned [ino:orig %lu, donor %lu]\n",
+			"offsets are not aligned [ino:orig %lu, donor %lu]\n",
 			orig_inode->i_ino, donor_inode->i_ino);
 		return -EINVAL;
 	}

From a17f1861b5ea4327f9f35e9edb3c5fadceaa7c64 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 24 Mar 2017 23:02:49 +0100
Subject: [PATCH 1746/1911] net: hns: fix uninitialized data use

When dev_dbg() is enabled, we print uninitialized data, as gcc-7.0.1
now points out:

ethernet/hisilicon/hns/hns_dsaf_main.c: In function 'hns_dsaf_set_promisc_tcam':
ethernet/hisilicon/hns/hns_dsaf_main.c:2947:75: error: 'tbl_tcam_data.low.val' may be used uninitialized in this function [-Werror=maybe-uninitialized]
ethernet/hisilicon/hns/hns_dsaf_main.c:2947:75: error: 'tbl_tcam_data.high.val' may be used uninitialized in this function [-Werror=maybe-uninitialized]

We also pass the data into hns_dsaf_tcam_mc_cfg(), which might later
use it (not sure about that), so it seems safer to just always initialize
the tbl_tcam_data structure.

Fixes: 1f5fa2dd1cfa ("net: hns: fix for promisc mode in HNS driver")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
index 90dbda7926144a..cd93657abe8798 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
@@ -2924,10 +2924,11 @@ void hns_dsaf_set_promisc_tcam(struct dsaf_device *dsaf_dev,
 	/* find the tcam entry index for promisc */
 	entry_index = dsaf_promisc_tcam_entry(port);
 
+	memset(&tbl_tcam_data, 0, sizeof(tbl_tcam_data));
+	memset(&tbl_tcam_mask, 0, sizeof(tbl_tcam_mask));
+
 	/* config key mask */
 	if (enable) {
-		memset(&tbl_tcam_data, 0, sizeof(tbl_tcam_data));
-		memset(&tbl_tcam_mask, 0, sizeof(tbl_tcam_mask));
 		dsaf_set_field(tbl_tcam_data.low.bits.port_vlan,
 			       DSAF_TBL_TCAM_KEY_PORT_M,
 			       DSAF_TBL_TCAM_KEY_PORT_S, port);

From 834a61d455ff8069552f44140b2c1de85e6bc84d Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 24 Mar 2017 23:02:50 +0100
Subject: [PATCH 1747/1911] net: hns: avoid gcc-7.0.1 warning for uninitialized
 data

hns_dsaf_set_mac_key() calls dsaf_set_field() on an uninitialized field,
which will then change only a few of its bits, causing a warning with
the latest gcc:

hisilicon/hns/hns_dsaf_main.c: In function 'hns_dsaf_set_mac_uc_entry':
hisilicon/hns/hns_dsaf_reg.h:1046:12: error: 'mac_key.low.bits.port_vlan' may be used uninitialized in this function [-Werror=maybe-uninitialized]
   (origin) &= (~(mask)); \
            ^~
hisilicon/hns/hns_dsaf_main.c: In function 'hns_dsaf_set_mac_mc_entry':
hisilicon/hns/hns_dsaf_reg.h:1046:12: error: 'mac_key.low.bits.port_vlan' may be used uninitialized in this function [-Werror=maybe-uninitialized]
hisilicon/hns/hns_dsaf_main.c: In function 'hns_dsaf_add_mac_mc_port':
hisilicon/hns/hns_dsaf_reg.h:1046:12: error: 'mac_key.low.bits.port_vlan' may be used uninitialized in this function [-Werror=maybe-uninitialized]
hisilicon/hns/hns_dsaf_main.c: In function 'hns_dsaf_del_mac_entry':
hisilicon/hns/hns_dsaf_reg.h:1046:12: error: 'mac_key.low.bits.port_vlan' may be used uninitialized in this function [-Werror=maybe-uninitialized]
hisilicon/hns/hns_dsaf_main.c: In function 'hns_dsaf_rm_mac_addr':
hisilicon/hns/hns_dsaf_reg.h:1046:12: error: 'mac_key.low.bits.port_vlan' may be used uninitialized in this function [-Werror=maybe-uninitialized]
hisilicon/hns/hns_dsaf_main.c: In function 'hns_dsaf_del_mac_mc_port':
hisilicon/hns/hns_dsaf_reg.h:1046:12: error: 'mac_key.low.bits.port_vlan' may be used uninitialized in this function [-Werror=maybe-uninitialized]
hisilicon/hns/hns_dsaf_main.c: In function 'hns_dsaf_get_mac_uc_entry':
hisilicon/hns/hns_dsaf_reg.h:1046:12: error: 'mac_key.low.bits.port_vlan' may be used uninitialized in this function [-Werror=maybe-uninitialized]
hisilicon/hns/hns_dsaf_main.c: In function 'hns_dsaf_get_mac_mc_entry':
hisilicon/hns/hns_dsaf_reg.h:1046:12: error: 'mac_key.low.bits.port_vlan' may be used uninitialized in this function [-Werror=maybe-uninitialized]

The code is actually correct since we always set all 16 bits of the
port_vlan field, but gcc correctly points out that the first
access does contain uninitialized data.

This initializes the field to zero first before setting the
individual bits.

Fixes: 5483bfcb169c ("net: hns: modify tcam table and set mac key")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
index cd93657abe8798..403ea9db6dbd15 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
@@ -1519,6 +1519,7 @@ static void hns_dsaf_set_mac_key(
 	mac_key->high.bits.mac_3 = addr[3];
 	mac_key->low.bits.mac_4 = addr[4];
 	mac_key->low.bits.mac_5 = addr[5];
+	mac_key->low.bits.port_vlan = 0;
 	dsaf_set_field(mac_key->low.bits.port_vlan, DSAF_TBL_TCAM_KEY_VLAN_M,
 		       DSAF_TBL_TCAM_KEY_VLAN_S, vlan_id);
 	dsaf_set_field(mac_key->low.bits.port_vlan, DSAF_TBL_TCAM_KEY_PORT_M,

From 6ac3b77a6ffff7513ff86b684aa256ea01c0e5b5 Mon Sep 17 00:00:00 2001
From: Alexey Khoroshilov <khoroshilov@ispras.ru>
Date: Sat, 25 Mar 2017 01:48:08 +0300
Subject: [PATCH 1748/1911] irda: vlsi_ir: fix check for DMA mapping errors

vlsi_alloc_ring() checks for DMA mapping errors by comparing
returned address with zero, while pci_dma_mapping_error() should be used.

Found by Linux Driver Verification project (linuxtesting.org).

Signed-off-by: Alexey Khoroshilov <khoroshilov@ispras.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/irda/vlsi_ir.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/irda/vlsi_ir.c b/drivers/net/irda/vlsi_ir.c
index ffedad2a360afb..15b92008625163 100644
--- a/drivers/net/irda/vlsi_ir.c
+++ b/drivers/net/irda/vlsi_ir.c
@@ -418,8 +418,9 @@ static struct vlsi_ring *vlsi_alloc_ring(struct pci_dev *pdev, struct ring_descr
 		memset(rd, 0, sizeof(*rd));
 		rd->hw = hwmap + i;
 		rd->buf = kmalloc(len, GFP_KERNEL|GFP_DMA);
-		if (rd->buf == NULL ||
-		    !(busaddr = pci_map_single(pdev, rd->buf, len, dir))) {
+		if (rd->buf)
+			busaddr = pci_map_single(pdev, rd->buf, len, dir);
+		if (rd->buf == NULL || pci_dma_mapping_error(pdev, busaddr)) {
 			if (rd->buf) {
 				net_err_ratelimited("%s: failed to create PCI-MAP for %p\n",
 						    __func__, rd->buf);
@@ -430,8 +431,7 @@ static struct vlsi_ring *vlsi_alloc_ring(struct pci_dev *pdev, struct ring_descr
 				rd = r->rd + j;
 				busaddr = rd_get_addr(rd);
 				rd_set_addr_status(rd, 0, 0);
-				if (busaddr)
-					pci_unmap_single(pdev, busaddr, len, dir);
+				pci_unmap_single(pdev, busaddr, len, dir);
 				kfree(rd->buf);
 				rd->buf = NULL;
 			}

From 819f60fb7db169d851186d04e571e9bca27321e8 Mon Sep 17 00:00:00 2001
From: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Date: Sat, 25 Mar 2017 19:29:01 +0800
Subject: [PATCH 1749/1911] EDAC, pnd2_edac: Fix reported DIMM number

DIMM number passed to edac_mc_handle_error() was accidentally hardcoded
to zero. Pass in the correct daddr->dimm value.

Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
---
 drivers/edac/pnd2_edac.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/edac/pnd2_edac.c b/drivers/edac/pnd2_edac.c
index ec2e349d728de6..928e0dba41fc23 100644
--- a/drivers/edac/pnd2_edac.c
+++ b/drivers/edac/pnd2_edac.c
@@ -1164,7 +1164,7 @@ static void pnd2_mce_output_error(struct mem_ctl_info *mci, const struct mce *m,
 
 	/* Call the helper to output message */
 	edac_mc_handle_error(tp_event, mci, core_err_cnt, m->addr >> PAGE_SHIFT,
-						 m->addr & ~PAGE_MASK, 0, daddr->chan, 0, -1, optype, msg);
+						 m->addr & ~PAGE_MASK, 0, daddr->chan, daddr->dimm, -1, optype, msg);
 
 	return;
 

From c02ed2e75ef4c74e41e421acb4ef1494671585e8 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 26 Mar 2017 14:15:16 -0700
Subject: [PATCH 1750/1911] Linux 4.11-rc4

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index b2faa93193729f..231e6a7749bd89 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 4
 PATCHLEVEL = 11
 SUBLEVEL = 0
-EXTRAVERSION = -rc3
+EXTRAVERSION = -rc4
 NAME = Fearless Coyote
 
 # *DOCUMENTATION*

From 1633682053a7ee8058e10c76722b9b28e97fb73f Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Fri, 24 Mar 2017 13:38:28 -0400
Subject: [PATCH 1751/1911] USB: fix linked-list corruption in
 rh_call_control()

Using KASAN, Dmitry found a bug in the rh_call_control() routine: If
buffer allocation fails, the routine returns immediately without
unlinking its URB from the control endpoint, eventually leading to
linked-list corruption.

This patch fixes the problem by jumping to the end of the routine
(where the URB is unlinked) when an allocation failure occurs.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Reported-and-tested-by: Dmitry Vyukov <dvyukov@google.com>
CC: <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hcd.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 612fab6e54fb84..79bdca5cb9c7ae 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -520,8 +520,10 @@ static int rh_call_control (struct usb_hcd *hcd, struct urb *urb)
 	 */
 	tbuf_size =  max_t(u16, sizeof(struct usb_hub_descriptor), wLength);
 	tbuf = kzalloc(tbuf_size, GFP_KERNEL);
-	if (!tbuf)
-		return -ENOMEM;
+	if (!tbuf) {
+		status = -ENOMEM;
+		goto err_alloc;
+	}
 
 	bufp = tbuf;
 
@@ -734,6 +736,7 @@ static int rh_call_control (struct usb_hcd *hcd, struct urb *urb)
 	}
 
 	kfree(tbuf);
+ err_alloc:
 
 	/* any errors get returned through the urb completion */
 	spin_lock_irq(&hcd_root_hub_lock);

From 7b09cc5a9debc86c903c2eff8f8a1fdef773c649 Mon Sep 17 00:00:00 2001
From: Pavel Tatashin <pasha.tatashin@oracle.com>
Date: Wed, 22 Mar 2017 16:24:17 -0400
Subject: [PATCH 1752/1911] sched/clock: Fix broken stable to unstable transfer

When it is determined that the clock is actually unstable, and
we switch from stable to unstable, the __clear_sched_clock_stable()
function is eventually called.

In this function we set gtod_offset so the following holds true:

  sched_clock() + raw_offset == ktime_get_ns() + gtod_offset

But instead of getting the latest timestamps, we use the last values
from scd, so instead of sched_clock() we use scd->tick_raw, and
instead of ktime_get_ns() we use scd->tick_gtod.

However, later, when we use gtod_offset sched_clock_local() we do not
add it to scd->tick_gtod to calculate the correct clock value when we
determine the boundaries for min/max clocks.

This can result in tick granularity sched_clock() values, so fix it.

Signed-off-by: Pavel Tatashin <pasha.tatashin@oracle.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: hpa@zytor.com
Fixes: 5680d8094ffa ("sched/clock: Provide better clock continuity")
Link: http://lkml.kernel.org/r/1490214265-899964-2-git-send-email-pasha.tatashin@oracle.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/clock.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c
index 24a3e01bf8cbc7..00a45c45beca09 100644
--- a/kernel/sched/clock.c
+++ b/kernel/sched/clock.c
@@ -221,7 +221,7 @@ static inline u64 wrap_max(u64 x, u64 y)
  */
 static u64 sched_clock_local(struct sched_clock_data *scd)
 {
-	u64 now, clock, old_clock, min_clock, max_clock;
+	u64 now, clock, old_clock, min_clock, max_clock, gtod;
 	s64 delta;
 
 again:
@@ -238,9 +238,10 @@ static u64 sched_clock_local(struct sched_clock_data *scd)
 	 *		      scd->tick_gtod + TICK_NSEC);
 	 */
 
-	clock = scd->tick_gtod + __gtod_offset + delta;
-	min_clock = wrap_max(scd->tick_gtod, old_clock);
-	max_clock = wrap_max(old_clock, scd->tick_gtod + TICK_NSEC);
+	gtod = scd->tick_gtod + __gtod_offset;
+	clock = gtod + delta;
+	min_clock = wrap_max(gtod, old_clock);
+	max_clock = wrap_max(old_clock, gtod + TICK_NSEC);
 
 	clock = wrap_max(clock, min_clock);
 	clock = wrap_min(clock, max_clock);

From 0abfe7e2570d7c729a7662e82c09a23f00f29346 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Wed, 22 Mar 2017 20:59:30 +0000
Subject: [PATCH 1753/1911] drm/i915: Restore marking context objects as dirty
 on pinning

Commit e8a9c58fcd9a ("drm/i915: Unify active context tracking between
legacy/execlists/guc") converted the legacy intel_ringbuffer submission
to the same context pinning mechanism as execlists - that is to pin the
context until the subsequent request is retired. Previously it used the
vma retirement of the context object to keep itself pinned until the
next request (after i915_vma_move_to_active()). In the conversion, I
missed that the vma retirement was also responsible for marking the
object as dirty. Mark the context object as dirty when pinning
(equivalent to execlists) which ensures that if the context is swapped
out due to mempressure or suspend/hibernation, when it is loaded back in
it does so with the previous state (and not all zero).

Fixes: e8a9c58fcd9a ("drm/i915: Unify active context tracking between legacy/execlists/guc")
Reported-by: Dennis Gilmore <dennis@ausil.us>
Reported-by: Mathieu Marquer <mathieu.marquer@gmail.com>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99993
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100181
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: <drm-intel-fixes@lists.freedesktop.org> # v4.11-rc1
Link: http://patchwork.freedesktop.org/patch/msgid/20170322205930.12762-1-chris@chris-wilson.co.uk
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
(cherry picked from commit 5d4bac5503fcc67dd7999571e243cee49371aef7)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/intel_ringbuffer.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 91bc4abf5d3e57..6c5f9958197d55 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2024,6 +2024,8 @@ static int intel_ring_context_pin(struct intel_engine_cs *engine,
 		ret = context_pin(ctx, flags);
 		if (ret)
 			goto error;
+
+		ce->state->obj->mm.dirty = true;
 	}
 
 	/* The kernel context is only used as a placeholder for flushing the

From 3b7dabf029478bb80507a6c4500ca94132a2bc0b Mon Sep 17 00:00:00 2001
From: Liping Zhang <zlpnobody@gmail.com>
Date: Sat, 25 Mar 2017 08:53:12 +0800
Subject: [PATCH 1754/1911] netfilter: invoke synchronize_rcu after set the
 _hook_ to NULL

Otherwise, another CPU may access the invalid pointer. For example:
    CPU0                CPU1
     -              rcu_read_lock();
     -              pfunc = _hook_;
  _hook_ = NULL;          -
  mod unload              -
     -                 pfunc(); // invalid, panic
     -             rcu_read_unlock();

So we must call synchronize_rcu() to wait the rcu reader to finish.

Also note, in nf_nat_snmp_basic_fini, synchronize_rcu() will be invoked
by later nf_conntrack_helper_unregister, but I'm inclined to add a
explicit synchronize_rcu after set the nf_nat_snmp_hook to NULL. Depend
on such obscure assumptions is not a good idea.

Last, in nfnetlink_cttimeout, we use kfree_rcu to free the time object,
so in cttimeout_exit, invoking rcu_barrier() is not necessary at all,
remove it too.

Signed-off-by: Liping Zhang <zlpnobody@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/nf_nat_snmp_basic.c | 1 +
 net/netfilter/nf_conntrack_ecache.c    | 2 ++
 net/netfilter/nf_conntrack_netlink.c   | 1 +
 net/netfilter/nf_nat_core.c            | 2 ++
 net/netfilter/nfnetlink_cttimeout.c    | 2 +-
 5 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index c9b52c361da2e6..5a8f7c360887a7 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -1304,6 +1304,7 @@ static int __init nf_nat_snmp_basic_init(void)
 static void __exit nf_nat_snmp_basic_fini(void)
 {
 	RCU_INIT_POINTER(nf_nat_snmp_hook, NULL);
+	synchronize_rcu();
 	nf_conntrack_helper_unregister(&snmp_trap_helper);
 }
 
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index da9df2d56e669e..22fc32143e9c4a 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -290,6 +290,7 @@ void nf_conntrack_unregister_notifier(struct net *net,
 	BUG_ON(notify != new);
 	RCU_INIT_POINTER(net->ct.nf_conntrack_event_cb, NULL);
 	mutex_unlock(&nf_ct_ecache_mutex);
+	/* synchronize_rcu() is called from ctnetlink_exit. */
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier);
 
@@ -326,6 +327,7 @@ void nf_ct_expect_unregister_notifier(struct net *net,
 	BUG_ON(notify != new);
 	RCU_INIT_POINTER(net->ct.nf_expect_event_cb, NULL);
 	mutex_unlock(&nf_ct_ecache_mutex);
+	/* synchronize_rcu() is called from ctnetlink_exit. */
 }
 EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier);
 
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 6806b5e73567bb..908d858034e4f4 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -3442,6 +3442,7 @@ static void __exit ctnetlink_exit(void)
 #ifdef CONFIG_NETFILTER_NETLINK_GLUE_CT
 	RCU_INIT_POINTER(nfnl_ct_hook, NULL);
 #endif
+	synchronize_rcu();
 }
 
 module_init(ctnetlink_init);
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 94b14c5a8b1772..82802e4a664081 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -903,6 +903,8 @@ static void __exit nf_nat_cleanup(void)
 #ifdef CONFIG_XFRM
 	RCU_INIT_POINTER(nf_nat_decode_session_hook, NULL);
 #endif
+	synchronize_rcu();
+
 	for (i = 0; i < NFPROTO_NUMPROTO; i++)
 		kfree(nf_nat_l4protos[i]);
 
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index 139e0867e56e9e..47d6656c9119fd 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -646,8 +646,8 @@ static void __exit cttimeout_exit(void)
 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
 	RCU_INIT_POINTER(nf_ct_timeout_find_get_hook, NULL);
 	RCU_INIT_POINTER(nf_ct_timeout_put_hook, NULL);
+	synchronize_rcu();
 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
-	rcu_barrier();
 }
 
 module_init(cttimeout_init);

From 83d90219a5df8d950855ce73229a97b63605c317 Mon Sep 17 00:00:00 2001
From: Liping Zhang <zlpnobody@gmail.com>
Date: Sat, 25 Mar 2017 12:09:15 +0800
Subject: [PATCH 1755/1911] netfilter: nfnl_cthelper: fix a race when walk the
 nf_ct_helper_hash table

The nf_ct_helper_hash table is protected by nf_ct_helper_mutex, while
nfct_helper operation is protected by nfnl_lock(NFNL_SUBSYS_CTHELPER).
So it's possible that one CPU is walking the nf_ct_helper_hash for
cthelper add/get/del, another cpu is doing nf_conntrack_helpers_unregister
at the same time. This is dangrous, and may cause use after free error.

Note, delete operation will flush all cthelpers added via nfnetlink, so
using rcu to do protect is not easy.

Now introduce a dummy list to record all the cthelpers added via
nfnetlink, then we can walk the dummy list instead of walking the
nf_ct_helper_hash. Also, keep nfnl_cthelper_dump_table unchanged, it
may be invoked without nfnl_lock(NFNL_SUBSYS_CTHELPER) held.

Signed-off-by: Liping Zhang <zlpnobody@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nfnetlink_cthelper.c | 177 +++++++++++++----------------
 1 file changed, 81 insertions(+), 96 deletions(-)

diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index 2b987d2a77bce7..d45558178da5b6 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -32,6 +32,13 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
 MODULE_DESCRIPTION("nfnl_cthelper: User-space connection tracking helpers");
 
+struct nfnl_cthelper {
+	struct list_head		list;
+	struct nf_conntrack_helper	helper;
+};
+
+static LIST_HEAD(nfnl_cthelper_list);
+
 static int
 nfnl_userspace_cthelper(struct sk_buff *skb, unsigned int protoff,
 			struct nf_conn *ct, enum ip_conntrack_info ctinfo)
@@ -205,14 +212,16 @@ nfnl_cthelper_create(const struct nlattr * const tb[],
 		     struct nf_conntrack_tuple *tuple)
 {
 	struct nf_conntrack_helper *helper;
+	struct nfnl_cthelper *nfcth;
 	int ret;
 
 	if (!tb[NFCTH_TUPLE] || !tb[NFCTH_POLICY] || !tb[NFCTH_PRIV_DATA_LEN])
 		return -EINVAL;
 
-	helper = kzalloc(sizeof(struct nf_conntrack_helper), GFP_KERNEL);
-	if (helper == NULL)
+	nfcth = kzalloc(sizeof(*nfcth), GFP_KERNEL);
+	if (nfcth == NULL)
 		return -ENOMEM;
+	helper = &nfcth->helper;
 
 	ret = nfnl_cthelper_parse_expect_policy(helper, tb[NFCTH_POLICY]);
 	if (ret < 0)
@@ -249,11 +258,12 @@ nfnl_cthelper_create(const struct nlattr * const tb[],
 	if (ret < 0)
 		goto err2;
 
+	list_add_tail(&nfcth->list, &nfnl_cthelper_list);
 	return 0;
 err2:
 	kfree(helper->expect_policy);
 err1:
-	kfree(helper);
+	kfree(nfcth);
 	return ret;
 }
 
@@ -379,7 +389,8 @@ static int nfnl_cthelper_new(struct net *net, struct sock *nfnl,
 	const char *helper_name;
 	struct nf_conntrack_helper *cur, *helper = NULL;
 	struct nf_conntrack_tuple tuple;
-	int ret = 0, i;
+	struct nfnl_cthelper *nlcth;
+	int ret = 0;
 
 	if (!tb[NFCTH_NAME] || !tb[NFCTH_TUPLE])
 		return -EINVAL;
@@ -390,31 +401,22 @@ static int nfnl_cthelper_new(struct net *net, struct sock *nfnl,
 	if (ret < 0)
 		return ret;
 
-	rcu_read_lock();
-	for (i = 0; i < nf_ct_helper_hsize && !helper; i++) {
-		hlist_for_each_entry_rcu(cur, &nf_ct_helper_hash[i], hnode) {
+	list_for_each_entry(nlcth, &nfnl_cthelper_list, list) {
+		cur = &nlcth->helper;
 
-			/* skip non-userspace conntrack helpers. */
-			if (!(cur->flags & NF_CT_HELPER_F_USERSPACE))
-				continue;
+		if (strncmp(cur->name, helper_name, NF_CT_HELPER_NAME_LEN))
+			continue;
 
-			if (strncmp(cur->name, helper_name,
-					NF_CT_HELPER_NAME_LEN) != 0)
-				continue;
+		if ((tuple.src.l3num != cur->tuple.src.l3num ||
+		     tuple.dst.protonum != cur->tuple.dst.protonum))
+			continue;
 
-			if ((tuple.src.l3num != cur->tuple.src.l3num ||
-			     tuple.dst.protonum != cur->tuple.dst.protonum))
-				continue;
+		if (nlh->nlmsg_flags & NLM_F_EXCL)
+			return -EEXIST;
 
-			if (nlh->nlmsg_flags & NLM_F_EXCL) {
-				ret = -EEXIST;
-				goto err;
-			}
-			helper = cur;
-			break;
-		}
+		helper = cur;
+		break;
 	}
-	rcu_read_unlock();
 
 	if (helper == NULL)
 		ret = nfnl_cthelper_create(tb, &tuple);
@@ -422,9 +424,6 @@ static int nfnl_cthelper_new(struct net *net, struct sock *nfnl,
 		ret = nfnl_cthelper_update(tb, helper);
 
 	return ret;
-err:
-	rcu_read_unlock();
-	return ret;
 }
 
 static int
@@ -588,11 +587,12 @@ static int nfnl_cthelper_get(struct net *net, struct sock *nfnl,
 			     struct sk_buff *skb, const struct nlmsghdr *nlh,
 			     const struct nlattr * const tb[])
 {
-	int ret = -ENOENT, i;
+	int ret = -ENOENT;
 	struct nf_conntrack_helper *cur;
 	struct sk_buff *skb2;
 	char *helper_name = NULL;
 	struct nf_conntrack_tuple tuple;
+	struct nfnl_cthelper *nlcth;
 	bool tuple_set = false;
 
 	if (nlh->nlmsg_flags & NLM_F_DUMP) {
@@ -613,45 +613,39 @@ static int nfnl_cthelper_get(struct net *net, struct sock *nfnl,
 		tuple_set = true;
 	}
 
-	for (i = 0; i < nf_ct_helper_hsize; i++) {
-		hlist_for_each_entry_rcu(cur, &nf_ct_helper_hash[i], hnode) {
+	list_for_each_entry(nlcth, &nfnl_cthelper_list, list) {
+		cur = &nlcth->helper;
+		if (helper_name &&
+		    strncmp(cur->name, helper_name, NF_CT_HELPER_NAME_LEN))
+			continue;
 
-			/* skip non-userspace conntrack helpers. */
-			if (!(cur->flags & NF_CT_HELPER_F_USERSPACE))
-				continue;
+		if (tuple_set &&
+		    (tuple.src.l3num != cur->tuple.src.l3num ||
+		     tuple.dst.protonum != cur->tuple.dst.protonum))
+			continue;
 
-			if (helper_name && strncmp(cur->name, helper_name,
-						NF_CT_HELPER_NAME_LEN) != 0) {
-				continue;
-			}
-			if (tuple_set &&
-			    (tuple.src.l3num != cur->tuple.src.l3num ||
-			     tuple.dst.protonum != cur->tuple.dst.protonum))
-				continue;
-
-			skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-			if (skb2 == NULL) {
-				ret = -ENOMEM;
-				break;
-			}
+		skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+		if (skb2 == NULL) {
+			ret = -ENOMEM;
+			break;
+		}
 
-			ret = nfnl_cthelper_fill_info(skb2, NETLINK_CB(skb).portid,
-						nlh->nlmsg_seq,
-						NFNL_MSG_TYPE(nlh->nlmsg_type),
-						NFNL_MSG_CTHELPER_NEW, cur);
-			if (ret <= 0) {
-				kfree_skb(skb2);
-				break;
-			}
+		ret = nfnl_cthelper_fill_info(skb2, NETLINK_CB(skb).portid,
+					      nlh->nlmsg_seq,
+					      NFNL_MSG_TYPE(nlh->nlmsg_type),
+					      NFNL_MSG_CTHELPER_NEW, cur);
+		if (ret <= 0) {
+			kfree_skb(skb2);
+			break;
+		}
 
-			ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).portid,
-						MSG_DONTWAIT);
-			if (ret > 0)
-				ret = 0;
+		ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).portid,
+				      MSG_DONTWAIT);
+		if (ret > 0)
+			ret = 0;
 
-			/* this avoids a loop in nfnetlink. */
-			return ret == -EAGAIN ? -ENOBUFS : ret;
-		}
+		/* this avoids a loop in nfnetlink. */
+		return ret == -EAGAIN ? -ENOBUFS : ret;
 	}
 	return ret;
 }
@@ -662,10 +656,10 @@ static int nfnl_cthelper_del(struct net *net, struct sock *nfnl,
 {
 	char *helper_name = NULL;
 	struct nf_conntrack_helper *cur;
-	struct hlist_node *tmp;
 	struct nf_conntrack_tuple tuple;
 	bool tuple_set = false, found = false;
-	int i, j = 0, ret;
+	struct nfnl_cthelper *nlcth, *n;
+	int j = 0, ret;
 
 	if (tb[NFCTH_NAME])
 		helper_name = nla_data(tb[NFCTH_NAME]);
@@ -678,30 +672,27 @@ static int nfnl_cthelper_del(struct net *net, struct sock *nfnl,
 		tuple_set = true;
 	}
 
-	for (i = 0; i < nf_ct_helper_hsize; i++) {
-		hlist_for_each_entry_safe(cur, tmp, &nf_ct_helper_hash[i],
-								hnode) {
-			/* skip non-userspace conntrack helpers. */
-			if (!(cur->flags & NF_CT_HELPER_F_USERSPACE))
-				continue;
+	list_for_each_entry_safe(nlcth, n, &nfnl_cthelper_list, list) {
+		cur = &nlcth->helper;
+		j++;
 
-			j++;
+		if (helper_name &&
+		    strncmp(cur->name, helper_name, NF_CT_HELPER_NAME_LEN))
+			continue;
 
-			if (helper_name && strncmp(cur->name, helper_name,
-						NF_CT_HELPER_NAME_LEN) != 0) {
-				continue;
-			}
-			if (tuple_set &&
-			    (tuple.src.l3num != cur->tuple.src.l3num ||
-			     tuple.dst.protonum != cur->tuple.dst.protonum))
-				continue;
+		if (tuple_set &&
+		    (tuple.src.l3num != cur->tuple.src.l3num ||
+		     tuple.dst.protonum != cur->tuple.dst.protonum))
+			continue;
 
-			found = true;
-			nf_conntrack_helper_unregister(cur);
-			kfree(cur->expect_policy);
-			kfree(cur);
-		}
+		found = true;
+		nf_conntrack_helper_unregister(cur);
+		kfree(cur->expect_policy);
+
+		list_del(&nlcth->list);
+		kfree(nlcth);
 	}
+
 	/* Make sure we return success if we flush and there is no helpers */
 	return (found || j == 0) ? 0 : -ENOENT;
 }
@@ -750,22 +741,16 @@ static int __init nfnl_cthelper_init(void)
 static void __exit nfnl_cthelper_exit(void)
 {
 	struct nf_conntrack_helper *cur;
-	struct hlist_node *tmp;
-	int i;
+	struct nfnl_cthelper *nlcth, *n;
 
 	nfnetlink_subsys_unregister(&nfnl_cthelper_subsys);
 
-	for (i=0; i<nf_ct_helper_hsize; i++) {
-		hlist_for_each_entry_safe(cur, tmp, &nf_ct_helper_hash[i],
-									hnode) {
-			/* skip non-userspace conntrack helpers. */
-			if (!(cur->flags & NF_CT_HELPER_F_USERSPACE))
-				continue;
+	list_for_each_entry_safe(nlcth, n, &nfnl_cthelper_list, list) {
+		cur = &nlcth->helper;
 
-			nf_conntrack_helper_unregister(cur);
-			kfree(cur->expect_policy);
-			kfree(cur);
-		}
+		nf_conntrack_helper_unregister(cur);
+		kfree(cur->expect_policy);
+		kfree(nlcth);
 	}
 }
 

From 9c3f3794926a997b1cab6c42480ff300efa2d162 Mon Sep 17 00:00:00 2001
From: Liping Zhang <zlpnobody@gmail.com>
Date: Sat, 25 Mar 2017 16:35:29 +0800
Subject: [PATCH 1756/1911] netfilter: nf_ct_ext: fix possible panic after
 nf_ct_extend_unregister

If one cpu is doing nf_ct_extend_unregister while another cpu is doing
__nf_ct_ext_add_length, then we may hit BUG_ON(t == NULL). Moreover,
there's no synchronize_rcu invocation after set nf_ct_ext_types[id] to
NULL, so it's possible that we may access invalid pointer.

But actually, most of the ct extends are built-in, so the problem listed
above will not happen. However, there are two exceptions: NF_CT_EXT_NAT
and NF_CT_EXT_SYNPROXY.

For _EXT_NAT, the panic will not happen, since adding the nat extend and
unregistering the nat extend are located in the same file(nf_nat_core.c),
this means that after the nat module is removed, we cannot add the nat
extend too.

For _EXT_SYNPROXY, synproxy extend may be added by init_conntrack, while
synproxy extend unregister will be done by synproxy_core_exit. So after
nf_synproxy_core.ko is removed, we may still try to add the synproxy
extend, then kernel panic may happen.

I know it's very hard to reproduce this issue, but I can play a tricky
game to make it happen very easily :)

Step 1. Enable SYNPROXY for tcp dport 1234 at FORWARD hook:
  # iptables -I FORWARD -p tcp --dport 1234 -j SYNPROXY
Step 2. Queue the syn packet to the userspace at raw table OUTPUT hook.
        Also note, in the userspace we only add a 20s' delay, then
        reinject the syn packet to the kernel:
  # iptables -t raw -I OUTPUT -p tcp --syn -j NFQUEUE --queue-num 1
Step 3. Using "nc 2.2.2.2 1234" to connect the server.
Step 4. Now remove the nf_synproxy_core.ko quickly:
  # iptables -F FORWARD
  # rmmod ipt_SYNPROXY
  # rmmod nf_synproxy_core
Step 5. After 20s' delay, the syn packet is reinjected to the kernel.

Now you will see the panic like this:
  kernel BUG at net/netfilter/nf_conntrack_extend.c:91!
  Call Trace:
   ? __nf_ct_ext_add_length+0x53/0x3c0 [nf_conntrack]
   init_conntrack+0x12b/0x600 [nf_conntrack]
   nf_conntrack_in+0x4cc/0x580 [nf_conntrack]
   ipv4_conntrack_local+0x48/0x50 [nf_conntrack_ipv4]
   nf_reinject+0x104/0x270
   nfqnl_recv_verdict+0x3e1/0x5f9 [nfnetlink_queue]
   ? nfqnl_recv_verdict+0x5/0x5f9 [nfnetlink_queue]
   ? nla_parse+0xa0/0x100
   nfnetlink_rcv_msg+0x175/0x6a9 [nfnetlink]
   [...]

One possible solution is to make NF_CT_EXT_SYNPROXY extend built-in, i.e.
introduce nf_conntrack_synproxy.c and only do ct extend register and
unregister in it, similar to nf_conntrack_timeout.c.

But having such a obscure restriction of nf_ct_extend_unregister is not a
good idea, so we should invoke synchronize_rcu after set nf_ct_ext_types
to NULL, and check the NULL pointer when do __nf_ct_ext_add_length. Then
it will be easier if we add new ct extend in the future.

Last, we use kfree_rcu to free nf_ct_ext, so rcu_barrier() is unnecessary
anymore, remove it too.

Signed-off-by: Liping Zhang <zlpnobody@gmail.com>
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_extend.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c
index 02bcf00c24920b..008299b7f78fe3 100644
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -53,7 +53,11 @@ nf_ct_ext_create(struct nf_ct_ext **ext, enum nf_ct_ext_id id,
 
 	rcu_read_lock();
 	t = rcu_dereference(nf_ct_ext_types[id]);
-	BUG_ON(t == NULL);
+	if (!t) {
+		rcu_read_unlock();
+		return NULL;
+	}
+
 	off = ALIGN(sizeof(struct nf_ct_ext), t->align);
 	len = off + t->len + var_alloc_len;
 	alloc_size = t->alloc_size + var_alloc_len;
@@ -88,7 +92,10 @@ void *__nf_ct_ext_add_length(struct nf_conn *ct, enum nf_ct_ext_id id,
 
 	rcu_read_lock();
 	t = rcu_dereference(nf_ct_ext_types[id]);
-	BUG_ON(t == NULL);
+	if (!t) {
+		rcu_read_unlock();
+		return NULL;
+	}
 
 	newoff = ALIGN(old->len, t->align);
 	newlen = newoff + t->len + var_alloc_len;
@@ -175,6 +182,6 @@ void nf_ct_extend_unregister(struct nf_ct_ext_type *type)
 	RCU_INIT_POINTER(nf_ct_ext_types[type->id], NULL);
 	update_alloc_size(type);
 	mutex_unlock(&nf_ct_ext_type_mutex);
-	rcu_barrier(); /* Wait for completion of call_rcu()'s */
+	synchronize_rcu();
 }
 EXPORT_SYMBOL_GPL(nf_ct_extend_unregister);

From 75c689dca98851d65ef5a27e5ce26b625b68751c Mon Sep 17 00:00:00 2001
From: Gao Feng <fgao@ikuai8.com>
Date: Sat, 25 Mar 2017 18:24:36 +0800
Subject: [PATCH 1757/1911] netfilter: nf_nat_snmp: Fix panic when
 snmp_trap_helper fails to register

In the commit 93557f53e1fb ("netfilter: nf_conntrack: nf_conntrack snmp
helper"), the snmp_helper is replaced by nf_nat_snmp_hook. So the
snmp_helper is never registered. But it still tries to unregister the
snmp_helper, it could cause the panic.

Now remove the useless snmp_helper and the unregister call in the
error handler.

Fixes: 93557f53e1fb ("netfilter: nf_conntrack: nf_conntrack snmp helper")
Signed-off-by: Gao Feng <fgao@ikuai8.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/nf_nat_snmp_basic.c | 19 +------------------
 1 file changed, 1 insertion(+), 18 deletions(-)

diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index 5a8f7c360887a7..53e49f5011d3ce 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -1260,16 +1260,6 @@ static const struct nf_conntrack_expect_policy snmp_exp_policy = {
 	.timeout	= 180,
 };
 
-static struct nf_conntrack_helper snmp_helper __read_mostly = {
-	.me			= THIS_MODULE,
-	.help			= help,
-	.expect_policy		= &snmp_exp_policy,
-	.name			= "snmp",
-	.tuple.src.l3num	= AF_INET,
-	.tuple.src.u.udp.port	= cpu_to_be16(SNMP_PORT),
-	.tuple.dst.protonum	= IPPROTO_UDP,
-};
-
 static struct nf_conntrack_helper snmp_trap_helper __read_mostly = {
 	.me			= THIS_MODULE,
 	.help			= help,
@@ -1288,17 +1278,10 @@ static struct nf_conntrack_helper snmp_trap_helper __read_mostly = {
 
 static int __init nf_nat_snmp_basic_init(void)
 {
-	int ret = 0;
-
 	BUG_ON(nf_nat_snmp_hook != NULL);
 	RCU_INIT_POINTER(nf_nat_snmp_hook, help);
 
-	ret = nf_conntrack_helper_register(&snmp_trap_helper);
-	if (ret < 0) {
-		nf_conntrack_helper_unregister(&snmp_helper);
-		return ret;
-	}
-	return ret;
+	return nf_conntrack_helper_register(&snmp_trap_helper);
 }
 
 static void __exit nf_nat_snmp_basic_fini(void)

From d4ea7e3c5c0e341c15b073016dbf3ab6c65f12f3 Mon Sep 17 00:00:00 2001
From: Benjamin Coddington <bcodding@redhat.com>
Date: Wed, 15 Mar 2017 21:50:09 -0400
Subject: [PATCH 1758/1911] NFS: Fix old dentry rehash after move

Now that nfs_rename()'s d_move has moved within the RPC task's rpc_call_done
callback, rehashing new_dentry will actually rehash the old dentry's name
in nfs_rename().  d_move() is going to rehash the new dentry for us anyway,
so doing it again here is unnecessary.

Reported-by: Chuck Lever <chuck.lever@oracle.com>
Fixes: 920b4530fb80 ("NFS: nfs_rename() handle -ERESTARTSYS dentry left behind")
Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
Tested-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/dir.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index fb499a3f21b58e..f92ba8d6c55690 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -2055,7 +2055,7 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 {
 	struct inode *old_inode = d_inode(old_dentry);
 	struct inode *new_inode = d_inode(new_dentry);
-	struct dentry *dentry = NULL, *rehash = NULL;
+	struct dentry *dentry = NULL;
 	struct rpc_task *task;
 	int error = -EBUSY;
 
@@ -2078,10 +2078,8 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		 * To prevent any new references to the target during the
 		 * rename, we unhash the dentry in advance.
 		 */
-		if (!d_unhashed(new_dentry)) {
+		if (!d_unhashed(new_dentry))
 			d_drop(new_dentry);
-			rehash = new_dentry;
-		}
 
 		if (d_count(new_dentry) > 2) {
 			int err;
@@ -2098,7 +2096,6 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 				goto out;
 
 			new_dentry = dentry;
-			rehash = NULL;
 			new_inode = NULL;
 		}
 	}
@@ -2119,8 +2116,6 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		error = task->tk_status;
 	rpc_put_task(task);
 out:
-	if (rehash)
-		d_rehash(rehash);
 	trace_nfs_rename_exit(old_dir, old_dentry,
 			new_dir, new_dentry, error);
 	/* new dentry created? */

From 2f0ba790df51721794c11abc7a076d407392f648 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 27 Mar 2017 19:33:09 +0200
Subject: [PATCH 1759/1911] cpufreq: Fix creation of symbolic links to policy
 directories

The cpufreq core only tries to create symbolic links from CPU
directories in sysfs to policy directories in cpufreq_add_dev(),
either when a given CPU is registered or when the cpufreq driver
is registered, whichever happens first.  That is not sufficient,
however, because cpufreq_add_dev() may be called for an offline CPU
whose policy object has not been created yet and, quite obviously,
the symbolic cannot be added in that case.

Fix that by making cpufreq_online() attempt to add symbolic links to
policy objects for the CPUs in the related_cpus mask of every new
policy object created by it.

The cpufreq_driver_lock locking around the for_each_cpu() loop
in cpufreq_online() is dropped, because it is not necessary and the
code is somewhat simpler without it.  Moreover, failures to create
a symbolic link will not be regarded as hard errors any more and
the CPUs without those links will not be taken offline automatically,
but that should not be problematic in practice.

Reported-and-tested-by: Prashanth Prakash <pprakash@codeaurora.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: 4.9+ <stable@vger.kernel.org> # 4.9+
---
 drivers/cpufreq/cpufreq.c | 38 +++++++++++++++++++++-----------------
 1 file changed, 21 insertions(+), 17 deletions(-)

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 5dbdd261aa736b..bc96d423781aa8 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -918,11 +918,19 @@ static struct kobj_type ktype_cpufreq = {
 	.release	= cpufreq_sysfs_release,
 };
 
-static int add_cpu_dev_symlink(struct cpufreq_policy *policy,
-			       struct device *dev)
+static void add_cpu_dev_symlink(struct cpufreq_policy *policy, unsigned int cpu)
 {
+	struct device *dev = get_cpu_device(cpu);
+
+	if (!dev)
+		return;
+
+	if (cpumask_test_and_set_cpu(cpu, policy->real_cpus))
+		return;
+
 	dev_dbg(dev, "%s: Adding symlink\n", __func__);
-	return sysfs_create_link(&dev->kobj, &policy->kobj, "cpufreq");
+	if (sysfs_create_link(&dev->kobj, &policy->kobj, "cpufreq"))
+		dev_err(dev, "cpufreq symlink creation failed\n");
 }
 
 static void remove_cpu_dev_symlink(struct cpufreq_policy *policy,
@@ -1180,10 +1188,10 @@ static int cpufreq_online(unsigned int cpu)
 		policy->user_policy.min = policy->min;
 		policy->user_policy.max = policy->max;
 
-		write_lock_irqsave(&cpufreq_driver_lock, flags);
-		for_each_cpu(j, policy->related_cpus)
+		for_each_cpu(j, policy->related_cpus) {
 			per_cpu(cpufreq_cpu_data, j) = policy;
-		write_unlock_irqrestore(&cpufreq_driver_lock, flags);
+			add_cpu_dev_symlink(policy, j);
+		}
 	} else {
 		policy->min = policy->user_policy.min;
 		policy->max = policy->user_policy.max;
@@ -1275,13 +1283,15 @@ static int cpufreq_online(unsigned int cpu)
 
 	if (cpufreq_driver->exit)
 		cpufreq_driver->exit(policy);
+
+	for_each_cpu(j, policy->real_cpus)
+		remove_cpu_dev_symlink(policy, get_cpu_device(j));
+
 out_free_policy:
 	cpufreq_policy_free(policy);
 	return ret;
 }
 
-static int cpufreq_offline(unsigned int cpu);
-
 /**
  * cpufreq_add_dev - the cpufreq interface for a CPU device.
  * @dev: CPU device.
@@ -1303,16 +1313,10 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 
 	/* Create sysfs link on CPU registration */
 	policy = per_cpu(cpufreq_cpu_data, cpu);
-	if (!policy || cpumask_test_and_set_cpu(cpu, policy->real_cpus))
-		return 0;
+	if (policy)
+		add_cpu_dev_symlink(policy, cpu);
 
-	ret = add_cpu_dev_symlink(policy, dev);
-	if (ret) {
-		cpumask_clear_cpu(cpu, policy->real_cpus);
-		cpufreq_offline(cpu);
-	}
-
-	return ret;
+	return 0;
 }
 
 static int cpufreq_offline(unsigned int cpu)

From ce4b4f228e51219b0b79588caf73225b08b5b779 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michel=20D=C3=A4nzer?= <michel.daenzer@amd.com>
Date: Fri, 24 Mar 2017 19:01:09 +0900
Subject: [PATCH 1760/1911] drm/radeon: Override fpfn for all VRAM placements
 in radeon_evict_flags
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We were accidentally only overriding the first VRAM placement. For BOs
with the RADEON_GEM_NO_CPU_ACCESS flag set,
radeon_ttm_placement_from_domain creates a second VRAM placment with
fpfn == 0. If VRAM is almost full, the first VRAM placement with
fpfn > 0 may not work, but the second one with fpfn == 0 always will
(the BO's current location trivially satisfies it). Because "moving"
the BO to its current location puts it back on the LRU list, this
results in an infinite loop.

Fixes: 2a85aedd117c ("drm/radeon: Try evicting from CPU accessible to
                      inaccessible VRAM first")
Reported-by: Zachary Michaels <zmichaels@oblong.com>
Reported-and-Tested-by: Julien Isorce <jisorce@oblong.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Michel Dänzer <michel.daenzer@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/radeon/radeon_ttm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index 684f1703aa5c71..aaa3e80fecb425 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -213,8 +213,8 @@ static void radeon_evict_flags(struct ttm_buffer_object *bo,
 			rbo->placement.num_busy_placement = 0;
 			for (i = 0; i < rbo->placement.num_placement; i++) {
 				if (rbo->placements[i].flags & TTM_PL_FLAG_VRAM) {
-					if (rbo->placements[0].fpfn < fpfn)
-						rbo->placements[0].fpfn = fpfn;
+					if (rbo->placements[i].fpfn < fpfn)
+						rbo->placements[i].fpfn = fpfn;
 				} else {
 					rbo->placement.busy_placement =
 						&rbo->placements[i];

From 551afbb85b3898e78068405d78708245999c19c0 Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Mon, 20 Mar 2017 18:07:00 -0400
Subject: [PATCH 1761/1911] NFS cleanup struct nfs4_filelayout_segment

Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/filelayout/filelayout.h | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/fs/nfs/filelayout/filelayout.h b/fs/nfs/filelayout/filelayout.h
index 2896cb833a1137..4c4d436a679660 100644
--- a/fs/nfs/filelayout/filelayout.h
+++ b/fs/nfs/filelayout/filelayout.h
@@ -55,15 +55,15 @@ struct nfs4_file_layout_dsaddr {
 };
 
 struct nfs4_filelayout_segment {
-	struct pnfs_layout_segment generic_hdr;
-	u32 stripe_type;
-	u32 commit_through_mds;
-	u32 stripe_unit;
-	u32 first_stripe_index;
-	u64 pattern_offset;
-	struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */
-	unsigned int num_fh;
-	struct nfs_fh **fh_array;
+	struct pnfs_layout_segment	generic_hdr;
+	u32				stripe_type;
+	u32				commit_through_mds;
+	u32				stripe_unit;
+	u32				first_stripe_index;
+	u64				pattern_offset;
+	struct nfs4_file_layout_dsaddr	*dsaddr; /* Point to GETDEVINFO data */
+	unsigned int			num_fh;
+	struct nfs_fh			**fh_array;
 };
 
 struct nfs4_filelayout {

From 248ccd5ee644fcf68984d945cffcdc364992ddbf Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Mon, 27 Mar 2017 10:48:11 -0700
Subject: [PATCH 1762/1911] MAINTAINERS: Add Andrew Lunn as co-maintainer of
 PHYLIB

Andrew has been contributing a lot to PHYLIB over the past months and
his feedback on patches is more than welcome.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Acked-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index e48678d1540124..9975dcefc372ef 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4922,6 +4922,7 @@ F:	include/linux/netfilter_bridge/
 F:	net/bridge/
 
 ETHERNET PHY LIBRARY
+M:	Andrew Lunn <andrew@lunn.ch>
 M:	Florian Fainelli <f.fainelli@gmail.com>
 L:	netdev@vger.kernel.org
 S:	Maintained

From a3902ee98304324f681088203391b5c0353ce977 Mon Sep 17 00:00:00 2001
From: Jaehoon Chung <jh80.chung@samsung.com>
Date: Tue, 21 Mar 2017 21:19:57 +0900
Subject: [PATCH 1763/1911] scsi: ufs: remove the duplicated checking for
 supporting clkscaling

There are same conditions for checking whether supporting clkscaling or
not. When ufshcd is supporting clkscaling, active_reqs should be
decreased by one.

[mkp: addressed comment from Bartlomiej]

Signed-off-by: Jaehoon Chung <jh80.chung@samsung.com>
Reviewed-by: Subhash Jadavani <subhashj@codeaurora.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/ufs/ufshcd.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index e8c26e6e623726..096e95b911bd7b 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -4662,8 +4662,6 @@ static void __ufshcd_transfer_req_compl(struct ufs_hba *hba,
 		}
 		if (ufshcd_is_clkscaling_supported(hba))
 			hba->clk_scaling.active_reqs--;
-		if (ufshcd_is_clkscaling_supported(hba))
-			hba->clk_scaling.active_reqs--;
 	}
 
 	/* clear corresponding bits of completed commands */

From ffefb6f4d6ad699a2b5484241bc46745a53235d0 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 27 Mar 2017 18:00:14 +0100
Subject: [PATCH 1764/1911] net: ipconfig: fix ic_close_devs() use-after-free

Our chosen ic_dev may be anywhere in our list of ic_devs, and we may
free it before attempting to close others. When we compare d->dev and
ic_dev->dev, we're potentially dereferencing memory returned to the
allocator. This causes KASAN to scream for each subsequent ic_dev we
check.

As there's a 1-1 mapping between ic_devs and netdevs, we can instead
compare d and ic_dev directly, which implicitly handles the !ic_dev
case, and avoids the use-after-free. The ic_dev pointer may be stale,
but we will not dereference it.

Original splat:

[    6.487446] ==================================================================
[    6.494693] BUG: KASAN: use-after-free in ic_close_devs+0xc4/0x154 at addr ffff800367efa708
[    6.503013] Read of size 8 by task swapper/0/1
[    6.507452] CPU: 5 PID: 1 Comm: swapper/0 Not tainted 4.11.0-rc3-00002-gda42158 #8
[    6.514993] Hardware name: AppliedMicro Mustang/Mustang, BIOS 3.05.05-beta_rc Jan 27 2016
[    6.523138] Call trace:
[    6.525590] [<ffff200008094778>] dump_backtrace+0x0/0x570
[    6.530976] [<ffff200008094d08>] show_stack+0x20/0x30
[    6.536017] [<ffff200008bee928>] dump_stack+0x120/0x188
[    6.541231] [<ffff20000856d5e4>] kasan_object_err+0x24/0xa0
[    6.546790] [<ffff20000856d924>] kasan_report_error+0x244/0x738
[    6.552695] [<ffff20000856dfec>] __asan_report_load8_noabort+0x54/0x80
[    6.559204] [<ffff20000aae86ac>] ic_close_devs+0xc4/0x154
[    6.564590] [<ffff20000aaedbac>] ip_auto_config+0x2ed4/0x2f1c
[    6.570321] [<ffff200008084b04>] do_one_initcall+0xcc/0x370
[    6.575882] [<ffff20000aa31de8>] kernel_init_freeable+0x5f8/0x6c4
[    6.581959] [<ffff20000a16df00>] kernel_init+0x18/0x190
[    6.587171] [<ffff200008084710>] ret_from_fork+0x10/0x40
[    6.592468] Object at ffff800367efa700, in cache kmalloc-128 size: 128
[    6.598969] Allocated:
[    6.601324] PID = 1
[    6.603427]  save_stack_trace_tsk+0x0/0x418
[    6.607603]  save_stack_trace+0x20/0x30
[    6.611430]  kasan_kmalloc+0xd8/0x188
[    6.615087]  ip_auto_config+0x8c4/0x2f1c
[    6.619002]  do_one_initcall+0xcc/0x370
[    6.622832]  kernel_init_freeable+0x5f8/0x6c4
[    6.627178]  kernel_init+0x18/0x190
[    6.630660]  ret_from_fork+0x10/0x40
[    6.634223] Freed:
[    6.636233] PID = 1
[    6.638334]  save_stack_trace_tsk+0x0/0x418
[    6.642510]  save_stack_trace+0x20/0x30
[    6.646337]  kasan_slab_free+0x88/0x178
[    6.650167]  kfree+0xb8/0x478
[    6.653131]  ic_close_devs+0x130/0x154
[    6.656875]  ip_auto_config+0x2ed4/0x2f1c
[    6.660875]  do_one_initcall+0xcc/0x370
[    6.664705]  kernel_init_freeable+0x5f8/0x6c4
[    6.669051]  kernel_init+0x18/0x190
[    6.672534]  ret_from_fork+0x10/0x40
[    6.676098] Memory state around the buggy address:
[    6.680880]  ffff800367efa600: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[    6.688078]  ffff800367efa680: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[    6.695276] >ffff800367efa700: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[    6.702469]                       ^
[    6.705952]  ffff800367efa780: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[    6.713149]  ffff800367efa800: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[    6.720343] ==================================================================
[    6.727536] Disabling lock debugging due to kernel taint

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Cc: David S. Miller <davem@davemloft.net>
Cc: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
Cc: James Morris <jmorris@namei.org>
Cc: Patrick McHardy <kaber@trash.net>
Cc: netdev@vger.kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipconfig.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index fd9f34bbd7408a..dfb2ab2dd3c84d 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -306,7 +306,7 @@ static void __init ic_close_devs(void)
 	while ((d = next)) {
 		next = d->next;
 		dev = d->dev;
-		if ((!ic_dev || dev != ic_dev->dev) && !netdev_uses_dsa(dev)) {
+		if (d != ic_dev && !netdev_uses_dsa(dev)) {
 			pr_debug("IP-Config: Downing %s\n", dev->name);
 			dev_change_flags(dev, d->flags);
 		}

From cc66afea58f858ff6da7f79b8a595a67bbb4f9a9 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Mon, 27 Mar 2017 11:32:59 +0200
Subject: [PATCH 1765/1911] x86/mce: Don't print MCEs when mcelog is active

Since:

  cd9c57cad3fe ("x86/MCE: Dump MCE to dmesg if no consumers")

all MCEs are printed even when mcelog is running. Fix the regression to
not print to dmesg when mcelog is running as it is a consumer too.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
[ Massage commit message. ]
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Cc: stable@vger.kernel.org # 4.10..
Fixes: cd9c57cad3fe ("x86/MCE: Dump MCE to dmesg if no consumers")
Link: http://lkml.kernel.org/r/20170327093304.10683-2-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>

Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/mcheck/mce.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 8e9725c607ea6a..5accfbdee3f06f 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -54,6 +54,8 @@
 
 static DEFINE_MUTEX(mce_chrdev_read_mutex);
 
+static int mce_chrdev_open_count;	/* #times opened */
+
 #define mce_log_get_idx_check(p) \
 ({ \
 	RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \
@@ -598,6 +600,10 @@ static int mce_default_notifier(struct notifier_block *nb, unsigned long val,
 	if (atomic_read(&num_notifiers) > 2)
 		return NOTIFY_DONE;
 
+	/* Don't print when mcelog is running */
+	if (mce_chrdev_open_count > 0)
+		return NOTIFY_DONE;
+
 	__print_mce(m);
 
 	return NOTIFY_DONE;
@@ -1828,7 +1834,6 @@ void mcheck_cpu_clear(struct cpuinfo_x86 *c)
  */
 
 static DEFINE_SPINLOCK(mce_chrdev_state_lock);
-static int mce_chrdev_open_count;	/* #times opened */
 static int mce_chrdev_open_exclu;	/* already open exclusive? */
 
 static int mce_chrdev_open(struct inode *inode, struct file *file)

From 07de36b378a58f1d1426829acf0ab7cf86f651f3 Mon Sep 17 00:00:00 2001
From: Alexander Kochetkov <al.kochet@gmail.com>
Date: Wed, 22 Mar 2017 17:32:49 +0300
Subject: [PATCH 1766/1911] clockevents: Fix syntax error in clkevt-of macro

The patch fix syntax errors introduced by commit 0c8893c9095d
("clockevents: Add a clkevt-of mechanism like clksrc-of").

Fixes: 0c8893c9095d ("clockevents: Add a clkevt-of mechanism like clksrc-of")
Signed-off-by: Alexander Kochetkov <al.kochet@gmail.com>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/clocksource/clkevt-probe.c | 2 +-
 include/linux/clockchips.h         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/clocksource/clkevt-probe.c b/drivers/clocksource/clkevt-probe.c
index 8c30fec86094df..eb89b502acbdfd 100644
--- a/drivers/clocksource/clkevt-probe.c
+++ b/drivers/clocksource/clkevt-probe.c
@@ -17,7 +17,7 @@
 
 #include <linux/init.h>
 #include <linux/of.h>
-#include <linux/clockchip.h>
+#include <linux/clockchips.h>
 
 extern struct of_device_id __clkevt_of_table[];
 
diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index 5d3053c34fb3d5..6d7edc3082f984 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -229,7 +229,7 @@ static inline void tick_setup_hrtimer_broadcast(void) { }
 
 #ifdef CONFIG_CLKEVT_PROBE
 extern int clockevent_probe(void);
-#els
+#else
 static inline int clockevent_probe(void) { return 0; }
 #endif
 

From 8d09617b076fd03ee9ae124abce94dda17bf3723 Mon Sep 17 00:00:00 2001
From: Alexander Kochetkov <al.kochet@gmail.com>
Date: Wed, 22 Mar 2017 17:29:06 +0300
Subject: [PATCH 1767/1911] vmlinux.lds: Add __clkevt_of_table to kernel
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The code introduced by commit 0c8893c9095d ("clockevents: Add a
clkevt-of mechanism like clksrc-of") refer to __clkevt_of_table
what doesn't exist in the vmlinux. As a result kernel build
failed with error: "clkevt-probe.c:63: undefined reference to
`__clkevt_of_table’"

Fixes: 0c8893c9095d ("clockevents: Add a clkevt-of mechanism like clksrc-of")
Signed-off-by: Alexander Kochetkov <al.kochet@gmail.com>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 include/asm-generic/vmlinux.lds.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 0968d13b388591..8c6b525eb0fa1d 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -173,6 +173,7 @@
 	KEEP(*(__##name##_of_table_end))
 
 #define CLKSRC_OF_TABLES()	OF_TABLE(CONFIG_CLKSRC_OF, clksrc)
+#define CLKEVT_OF_TABLES()	OF_TABLE(CONFIG_CLKEVT_OF, clkevt)
 #define IRQCHIP_OF_MATCH_TABLE() OF_TABLE(CONFIG_IRQCHIP, irqchip)
 #define CLK_OF_TABLES()		OF_TABLE(CONFIG_COMMON_CLK, clk)
 #define IOMMU_OF_TABLES()	OF_TABLE(CONFIG_OF_IOMMU, iommu)
@@ -559,6 +560,7 @@
 	CLK_OF_TABLES()							\
 	RESERVEDMEM_OF_TABLES()						\
 	CLKSRC_OF_TABLES()						\
+	CLKEVT_OF_TABLES()						\
 	IOMMU_OF_TABLES()						\
 	CPU_METHOD_OF_TABLES()						\
 	CPUIDLE_METHOD_OF_TABLES()					\

From 658b299580da2830a69eea44a8b6da1c2579a32e Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Tue, 28 Mar 2017 09:53:00 +0200
Subject: [PATCH 1768/1911] sched/headers: Remove duplicate #include
 <linux/sched/debug.h> line

Vito Caputo reported that the sched.h split-up series
introduced a duplicate #include <linux/sched/debug.h> line
in drivers/tty/vt/keyboard.c.

Remove it.

Reported-by: Vito Caputo <vcaputo@pengaru.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Jiri Slaby <jslaby@suse.cz>
Cc: Alan Cox <alan@linux.intel.com>
Cc: Greg Kroah-Hartman <gregkh@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 drivers/tty/vt/keyboard.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/tty/vt/keyboard.c b/drivers/tty/vt/keyboard.c
index c5f0fc906136b5..8af8d954266337 100644
--- a/drivers/tty/vt/keyboard.c
+++ b/drivers/tty/vt/keyboard.c
@@ -28,7 +28,6 @@
 #include <linux/module.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/debug.h>
-#include <linux/sched/debug.h>
 #include <linux/tty.h>
 #include <linux/tty_flip.h>
 #include <linux/mm.h>

From 0292e169b2d9c8377a168778f0b16eadb1f578fd Mon Sep 17 00:00:00 2001
From: "Herongguang (Stephen)" <herongguang.he@huawei.com>
Date: Mon, 27 Mar 2017 15:21:17 +0800
Subject: [PATCH 1769/1911] KVM: pci-assign: do not map smm memory slot pages
 in vt-d page tables

or VM memory are not put thus leaked in kvm_iommu_unmap_memslots() when
destroy VM.

This is consistent with current vfio implementation.

Signed-off-by: herongguang <herongguang.he@huawei.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 virt/kvm/kvm_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index ef1aa7f1ed7a56..88257b311cb579 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1065,7 +1065,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
 	 * changes) is disallowed above, so any other attribute changes getting
 	 * here can be skipped.
 	 */
-	if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {
+	if (as_id == 0 && (change == KVM_MR_CREATE || change == KVM_MR_MOVE)) {
 		r = kvm_iommu_map_pages(kvm, &new);
 		return r;
 	}

From 7ad658b693536741c37b16aeb07840a2ce75f5b9 Mon Sep 17 00:00:00 2001
From: Ladi Prosek <lprosek@redhat.com>
Date: Thu, 23 Mar 2017 07:18:08 +0100
Subject: [PATCH 1770/1911] KVM: nVMX: fix nested EPT detection

The nested_ept_enabled flag introduced in commit 7ca29de2136 was not
computed correctly. We are interested only in L1's EPT state, not the
the combined L0+L1 value.

In particular, if L0 uses EPT but L1 does not, nested_ept_enabled must
be false to make sure that PDPSTRs are loaded based on CR3 as usual,
because the special case described in 26.3.2.4 Loading Page-Directory-
Pointer-Table Entries does not apply.

Fixes: 7ca29de21362 ("KVM: nVMX: fix CR3 load if L2 uses PAE paging and EPT")
Cc: qemu-stable@nongnu.org
Reported-by: Wanpeng Li <wanpeng.li@hotmail.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Ladi Prosek <lprosek@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index cd1ba62878f3bc..2ee00dbbbd5188 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -9992,7 +9992,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	u32 exec_control;
-	bool nested_ept_enabled = false;
 
 	vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
 	vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
@@ -10139,8 +10138,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 				vmcs12->guest_intr_status);
 		}
 
-		nested_ept_enabled = (exec_control & SECONDARY_EXEC_ENABLE_EPT) != 0;
-
 		/*
 		 * Write an illegal value to APIC_ACCESS_ADDR. Later,
 		 * nested_get_vmcs12_pages will either fix it up or
@@ -10303,7 +10300,7 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 	vmx_set_efer(vcpu, vcpu->arch.efer);
 
 	/* Shadow page tables on either EPT or shadow page tables. */
-	if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_ept_enabled,
+	if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_cpu_has_ept(vmcs12),
 				entry_failure_code))
 		return 1;
 

From 2beb6dad2e8f95d710159d5befb390e4f62ab5cf Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 27 Mar 2017 17:53:50 +0200
Subject: [PATCH 1771/1911] KVM: x86: cleanup the page tracking SRCU instance

SRCU uses a delayed work item.  Skip cleaning it up, and
the result is use-after-free in the work item callbacks.

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Suggested-by: Dmitry Vyukov <dvyukov@google.com>
Cc: stable@vger.kernel.org
Fixes: 0eb05bf290cfe8610d9680b49abef37febd1c38a
Reviewed-by: Xiao Guangrong <xiaoguangrong.eric@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_page_track.h | 1 +
 arch/x86/kvm/page_track.c             | 8 ++++++++
 arch/x86/kvm/x86.c                    | 1 +
 3 files changed, 10 insertions(+)

diff --git a/arch/x86/include/asm/kvm_page_track.h b/arch/x86/include/asm/kvm_page_track.h
index d74747b031ecd2..c4eda791f877b6 100644
--- a/arch/x86/include/asm/kvm_page_track.h
+++ b/arch/x86/include/asm/kvm_page_track.h
@@ -46,6 +46,7 @@ struct kvm_page_track_notifier_node {
 };
 
 void kvm_page_track_init(struct kvm *kvm);
+void kvm_page_track_cleanup(struct kvm *kvm);
 
 void kvm_page_track_free_memslot(struct kvm_memory_slot *free,
 				 struct kvm_memory_slot *dont);
diff --git a/arch/x86/kvm/page_track.c b/arch/x86/kvm/page_track.c
index 37942e419c32e5..60168cdd05463e 100644
--- a/arch/x86/kvm/page_track.c
+++ b/arch/x86/kvm/page_track.c
@@ -160,6 +160,14 @@ bool kvm_page_track_is_active(struct kvm_vcpu *vcpu, gfn_t gfn,
 	return !!ACCESS_ONCE(slot->arch.gfn_track[mode][index]);
 }
 
+void kvm_page_track_cleanup(struct kvm *kvm)
+{
+	struct kvm_page_track_notifier_head *head;
+
+	head = &kvm->arch.track_notifier_head;
+	cleanup_srcu_struct(&head->track_srcu);
+}
+
 void kvm_page_track_init(struct kvm *kvm)
 {
 	struct kvm_page_track_notifier_head *head;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 64697fe475c3cb..ccbd45ecd41a3f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8158,6 +8158,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 	kvm_free_vcpus(kvm);
 	kvfree(rcu_dereference_check(kvm->arch.apic_map, 1));
 	kvm_mmu_uninit_vm(kvm);
+	kvm_page_track_cleanup(kvm);
 }
 
 void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,

From 629dc8704b922f0c46f3025bd3486c2bc51eb7a6 Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Mon, 20 Mar 2017 18:07:01 -0400
Subject: [PATCH 1772/1911] NFS store nfs4_deviceid in struct
 nfs4_filelayout_segment

In preparation for moving the filelayout getdeviceinfo call from
filelayout_alloc_lseg called by pnfs_process_layout

Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/filelayout/filelayout.c | 13 +++++--------
 fs/nfs/filelayout/filelayout.h |  1 +
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index 7aff350f15b1ce..cad74c1c79ff76 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -572,7 +572,6 @@ static int
 filelayout_check_layout(struct pnfs_layout_hdr *lo,
 			struct nfs4_filelayout_segment *fl,
 			struct nfs4_layoutget_res *lgr,
-			struct nfs4_deviceid *id,
 			gfp_t gfp_flags)
 {
 	struct nfs4_deviceid_node *d;
@@ -602,7 +601,7 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
 	}
 
 	/* find and reference the deviceid */
-	d = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode), id,
+	d = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode), &fl->deviceid,
 			lo->plh_lc_cred, gfp_flags);
 	if (d == NULL)
 		goto out;
@@ -657,7 +656,6 @@ static int
 filelayout_decode_layout(struct pnfs_layout_hdr *flo,
 			 struct nfs4_filelayout_segment *fl,
 			 struct nfs4_layoutget_res *lgr,
-			 struct nfs4_deviceid *id,
 			 gfp_t gfp_flags)
 {
 	struct xdr_stream stream;
@@ -682,9 +680,9 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
 	if (unlikely(!p))
 		goto out_err;
 
-	memcpy(id, p, sizeof(*id));
+	memcpy(&fl->deviceid, p, sizeof(fl->deviceid));
 	p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE);
-	nfs4_print_deviceid(id);
+	nfs4_print_deviceid(&fl->deviceid);
 
 	nfl_util = be32_to_cpup(p++);
 	if (nfl_util & NFL4_UFLG_COMMIT_THRU_MDS)
@@ -831,15 +829,14 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
 {
 	struct nfs4_filelayout_segment *fl;
 	int rc;
-	struct nfs4_deviceid id;
 
 	dprintk("--> %s\n", __func__);
 	fl = kzalloc(sizeof(*fl), gfp_flags);
 	if (!fl)
 		return NULL;
 
-	rc = filelayout_decode_layout(layoutid, fl, lgr, &id, gfp_flags);
-	if (rc != 0 || filelayout_check_layout(layoutid, fl, lgr, &id, gfp_flags)) {
+	rc = filelayout_decode_layout(layoutid, fl, lgr, gfp_flags);
+	if (rc != 0 || filelayout_check_layout(layoutid, fl, lgr, gfp_flags)) {
 		_filelayout_free_lseg(fl);
 		return NULL;
 	}
diff --git a/fs/nfs/filelayout/filelayout.h b/fs/nfs/filelayout/filelayout.h
index 4c4d436a679660..79323b5dab0cb3 100644
--- a/fs/nfs/filelayout/filelayout.h
+++ b/fs/nfs/filelayout/filelayout.h
@@ -61,6 +61,7 @@ struct nfs4_filelayout_segment {
 	u32				stripe_unit;
 	u32				first_stripe_index;
 	u64				pattern_offset;
+	struct nfs4_deviceid		deviceid;
 	struct nfs4_file_layout_dsaddr	*dsaddr; /* Point to GETDEVINFO data */
 	unsigned int			num_fh;
 	struct nfs_fh			**fh_array;

From 8d40b0f14846f7d45c7c72d343fe62cb866dda34 Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Mon, 20 Mar 2017 18:07:02 -0400
Subject: [PATCH 1773/1911] NFS filelayout:call GETDEVICEINFO after
 pnfs_layout_process completes

Fix a filelayout GETDEVICEINFO call hang triggered from the LAYOUTGET
pnfs_layout_process where the GETDEVICEINFO call is waiting for a
session slot, and the LAYOUGET call is waiting for pnfs_layout_process
to complete before freeing the slot GETDEVICEINFO is waiting for..

This occurs in testing against the pynfs pNFS server where the
the on-wire reply highest_slotid and slot id are zero, and the
target high slot id is 8 (negotiated in CREATE_SESSION).

The internal fore channel slot table max_slotid, the maximum allowed
table slotid value, has been reduced via nfs41_set_max_slotid_locked
 from 8 to 1.  Thus there is one slot (slotid 0) available for use but
it has not been freed by LAYOUTGET  proir to the GETDEVICEINFO request.

In order to ensure that layoutrecall callbacks are processed in the
correct order, nfs4_proc_layoutget processing needs to be finished
e.g. pnfs_layout_process) before giving up the slot that identifies
the layoutget (see referring_call_exists).

Move the filelayout_check_layout nfs4_find_get_device call outside of
the pnfs_layout_process call tree.

Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/filelayout/filelayout.c | 138 ++++++++++++++++++++++-----------
 1 file changed, 91 insertions(+), 47 deletions(-)

diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index cad74c1c79ff76..367f8eb19bfa89 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -560,6 +560,50 @@ filelayout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
 	return PNFS_ATTEMPTED;
 }
 
+static int
+filelayout_check_deviceid(struct pnfs_layout_hdr *lo,
+			  struct nfs4_filelayout_segment *fl,
+			  gfp_t gfp_flags)
+{
+	struct nfs4_deviceid_node *d;
+	struct nfs4_file_layout_dsaddr *dsaddr;
+	int status = -EINVAL;
+
+	/* find and reference the deviceid */
+	d = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode), &fl->deviceid,
+			lo->plh_lc_cred, gfp_flags);
+	if (d == NULL)
+		goto out;
+
+	dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node);
+	/* Found deviceid is unavailable */
+	if (filelayout_test_devid_unavailable(&dsaddr->id_node))
+		goto out_put;
+
+	fl->dsaddr = dsaddr;
+
+	if (fl->first_stripe_index >= dsaddr->stripe_count) {
+		dprintk("%s Bad first_stripe_index %u\n",
+				__func__, fl->first_stripe_index);
+		goto out_put;
+	}
+
+	if ((fl->stripe_type == STRIPE_SPARSE &&
+	    fl->num_fh > 1 && fl->num_fh != dsaddr->ds_num) ||
+	    (fl->stripe_type == STRIPE_DENSE &&
+	    fl->num_fh != dsaddr->stripe_count)) {
+		dprintk("%s num_fh %u not valid for given packing\n",
+			__func__, fl->num_fh);
+		goto out_put;
+	}
+	status = 0;
+out:
+	return status;
+out_put:
+	nfs4_fl_put_deviceid(dsaddr);
+	goto out;
+}
+
 /*
  * filelayout_check_layout()
  *
@@ -574,8 +618,6 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
 			struct nfs4_layoutget_res *lgr,
 			gfp_t gfp_flags)
 {
-	struct nfs4_deviceid_node *d;
-	struct nfs4_file_layout_dsaddr *dsaddr;
 	int status = -EINVAL;
 
 	dprintk("--> %s\n", __func__);
@@ -600,41 +642,10 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
 		goto out;
 	}
 
-	/* find and reference the deviceid */
-	d = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode), &fl->deviceid,
-			lo->plh_lc_cred, gfp_flags);
-	if (d == NULL)
-		goto out;
-
-	dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node);
-	/* Found deviceid is unavailable */
-	if (filelayout_test_devid_unavailable(&dsaddr->id_node))
-		goto out_put;
-
-	fl->dsaddr = dsaddr;
-
-	if (fl->first_stripe_index >= dsaddr->stripe_count) {
-		dprintk("%s Bad first_stripe_index %u\n",
-				__func__, fl->first_stripe_index);
-		goto out_put;
-	}
-
-	if ((fl->stripe_type == STRIPE_SPARSE &&
-	    fl->num_fh > 1 && fl->num_fh != dsaddr->ds_num) ||
-	    (fl->stripe_type == STRIPE_DENSE &&
-	    fl->num_fh != dsaddr->stripe_count)) {
-		dprintk("%s num_fh %u not valid for given packing\n",
-			__func__, fl->num_fh);
-		goto out_put;
-	}
-
 	status = 0;
 out:
 	dprintk("--> %s returns %d\n", __func__, status);
 	return status;
-out_put:
-	nfs4_fl_put_deviceid(dsaddr);
-	goto out;
 }
 
 static void _filelayout_free_lseg(struct nfs4_filelayout_segment *fl)
@@ -885,18 +896,51 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
 	return min(stripe_unit - (unsigned int)stripe_offset, size);
 }
 
+static struct pnfs_layout_segment *
+fl_pnfs_update_layout(struct inode *ino,
+		      struct nfs_open_context *ctx,
+		      loff_t pos,
+		      u64 count,
+		      enum pnfs_iomode iomode,
+		      bool strict_iomode,
+		      gfp_t gfp_flags)
+{
+	struct pnfs_layout_segment *lseg = NULL;
+	struct pnfs_layout_hdr *lo;
+	struct nfs4_filelayout_segment *fl;
+	int status;
+
+	lseg = pnfs_update_layout(ino, ctx, pos, count, iomode, strict_iomode,
+				  gfp_flags);
+	if (!lseg)
+		lseg = ERR_PTR(-ENOMEM);
+	if (IS_ERR(lseg))
+		goto out;
+
+	lo = NFS_I(ino)->layout;
+	fl = FILELAYOUT_LSEG(lseg);
+
+	status = filelayout_check_deviceid(lo, fl, gfp_flags);
+	if (status)
+		lseg = ERR_PTR(status);
+out:
+	if (IS_ERR(lseg))
+		pnfs_put_lseg(lseg);
+	return lseg;
+}
+
 static void
 filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio,
 			struct nfs_page *req)
 {
 	if (!pgio->pg_lseg) {
-		pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
-					   req->wb_context,
-					   0,
-					   NFS4_MAX_UINT64,
-					   IOMODE_READ,
-					   false,
-					   GFP_KERNEL);
+		pgio->pg_lseg = fl_pnfs_update_layout(pgio->pg_inode,
+						      req->wb_context,
+						      0,
+						      NFS4_MAX_UINT64,
+						      IOMODE_READ,
+						      false,
+						      GFP_KERNEL);
 		if (IS_ERR(pgio->pg_lseg)) {
 			pgio->pg_error = PTR_ERR(pgio->pg_lseg);
 			pgio->pg_lseg = NULL;
@@ -916,13 +960,13 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
 	int status;
 
 	if (!pgio->pg_lseg) {
-		pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
-					   req->wb_context,
-					   0,
-					   NFS4_MAX_UINT64,
-					   IOMODE_RW,
-					   false,
-					   GFP_NOFS);
+		pgio->pg_lseg = fl_pnfs_update_layout(pgio->pg_inode,
+						      req->wb_context,
+						      0,
+						      NFS4_MAX_UINT64,
+						      IOMODE_RW,
+						      false,
+						      GFP_NOFS);
 		if (IS_ERR(pgio->pg_lseg)) {
 			pgio->pg_error = PTR_ERR(pgio->pg_lseg);
 			pgio->pg_lseg = NULL;

From de85ec8b07f82c8c84de7687f769e74bf4c26a1e Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Thu, 23 Mar 2017 13:07:16 +0800
Subject: [PATCH 1774/1911] virtio_pci: fix out of bound access for msix_names

Fedora has received multiple reports of crashes when running
4.11 as a guest

https://bugzilla.redhat.com/show_bug.cgi?id=1430297
https://bugzilla.redhat.com/show_bug.cgi?id=1434462
https://bugzilla.kernel.org/show_bug.cgi?id=194911
https://bugzilla.redhat.com/show_bug.cgi?id=1433899

The crashes are not always consistent but they are generally
some flavor of oops or GPF in virtio related code. Multiple people
have done bisections (Thank you Thorsten Leemhuis and
Richard W.M. Jones) and found this commit to be at fault

07ec51480b5eb1233f8c1b0f5d7a7c8d1247c507 is the first bad commit
commit 07ec51480b5eb1233f8c1b0f5d7a7c8d1247c507
Author: Christoph Hellwig <hch@lst.de>
Date:   Sun Feb 5 18:15:19 2017 +0100

    virtio_pci: use shared interrupts for virtqueues

The issue seems to be an out of bounds access to the msix_names
array corrupting kernel memory.

Fixes: 07ec51480b5e ("virtio_pci: use shared interrupts for virtqueues")
Reported-by: Laura Abbott <labbott@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Tested-by: Richard W.M. Jones <rjones@redhat.com>
Tested-by: Thorsten Leemhuis <linux@leemhuis.info>
---
 drivers/virtio/virtio_pci_common.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c
index df548a6fb844f7..590534910dc617 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -147,7 +147,7 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 	const char *name = dev_name(&vp_dev->vdev.dev);
-	int i, err = -ENOMEM, allocated_vectors, nvectors;
+	int i, j, err = -ENOMEM, allocated_vectors, nvectors;
 	unsigned flags = PCI_IRQ_MSIX;
 	bool shared = false;
 	u16 msix_vec;
@@ -212,7 +212,7 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
 	if (!vp_dev->msix_vector_map)
 		goto out_disable_config_irq;
 
-	allocated_vectors = 1; /* vector 0 is the config interrupt */
+	allocated_vectors = j = 1; /* vector 0 is the config interrupt */
 	for (i = 0; i < nvqs; ++i) {
 		if (!names[i]) {
 			vqs[i] = NULL;
@@ -236,18 +236,19 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
 			continue;
 		}
 
-		snprintf(vp_dev->msix_names[i + 1],
+		snprintf(vp_dev->msix_names[j],
 			 sizeof(*vp_dev->msix_names), "%s-%s",
 			 dev_name(&vp_dev->vdev.dev), names[i]);
 		err = request_irq(pci_irq_vector(vp_dev->pci_dev, msix_vec),
 				  vring_interrupt, IRQF_SHARED,
-				  vp_dev->msix_names[i + 1], vqs[i]);
+				  vp_dev->msix_names[j], vqs[i]);
 		if (err) {
 			/* don't free this irq on error */
 			vp_dev->msix_vector_map[i] = VIRTIO_MSI_NO_VECTOR;
 			goto out_remove_vqs;
 		}
 		vp_dev->msix_vector_map[i] = msix_vec;
+		j++;
 
 		/*
 		 * Use a different vector for each queue if they are available,

From fc8653228c8588a120f6b5dad6983b7b61ff669e Mon Sep 17 00:00:00 2001
From: Ladi Prosek <lprosek@redhat.com>
Date: Thu, 23 Mar 2017 08:04:18 +0100
Subject: [PATCH 1775/1911] virtio_balloon: init 1st buffer in stats vq

When init_vqs runs, virtio_balloon.stats is either uninitialized or
contains stale values. The host updates its state with garbage data
because it has no way of knowing that this is just a marker buffer
used for signaling.

This patch updates the stats before pushing the initial buffer.

Alternative fixes:
* Push an empty buffer in init_vqs. Not easily done with the current
  virtio implementation and violates the spec "Driver MUST supply the
  same subset of statistics in all buffers submitted to the statsq".
* Push a buffer with invalid tags in init_vqs. Violates the same
  spec clause, plus "invalid tag" is not really defined.

Note: the spec says:
	When using the legacy interface, the device SHOULD ignore all values in
	the first buffer in the statsq supplied by the driver after device
	initialization. Note: Historically, drivers supplied an uninitialized
	buffer in the first buffer.

Unfortunately QEMU does not seem to implement the recommendation
even for the legacy interface.

Cc: stable@vger.kernel.org
Signed-off-by: Ladi Prosek <lprosek@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/virtio/virtio_balloon.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 4e1191508228cd..fcd06e16a1a2a9 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -429,6 +429,8 @@ static int init_vqs(struct virtio_balloon *vb)
 		 * Prime this virtqueue with one buffer so the hypervisor can
 		 * use it to signal us later (it can't be broken yet!).
 		 */
+		update_balloon_stats(vb);
+
 		sg_init_one(&sg, vb->stats, sizeof vb->stats);
 		if (virtqueue_add_outbuf(vb->stats_vq, &sg, 1, vb, GFP_KERNEL)
 		    < 0)

From 9646b26e85896ef0256e66649f7937f774dc18a6 Mon Sep 17 00:00:00 2001
From: Ladi Prosek <lprosek@redhat.com>
Date: Tue, 28 Mar 2017 18:46:58 +0200
Subject: [PATCH 1776/1911] virtio-balloon: use actual number of stats for
 stats queue buffers

The virtio balloon driver contained a not-so-obvious invariant that
update_balloon_stats has to update exactly VIRTIO_BALLOON_S_NR counters
in order to send valid stats to the host. This commit fixes it by having
update_balloon_stats return the actual number of counters, and its
callers use it when pushing buffers to the stats virtqueue.

Note that it is still out of spec to change the number of counters
at run-time. "Driver MUST supply the same subset of statistics in all
buffers submitted to the statsq."

Suggested-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Ladi Prosek <lprosek@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/virtio/virtio_balloon.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index fcd06e16a1a2a9..d9544db231ef3f 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -242,11 +242,11 @@ static inline void update_stat(struct virtio_balloon *vb, int idx,
 
 #define pages_to_bytes(x) ((u64)(x) << PAGE_SHIFT)
 
-static void update_balloon_stats(struct virtio_balloon *vb)
+static unsigned int update_balloon_stats(struct virtio_balloon *vb)
 {
 	unsigned long events[NR_VM_EVENT_ITEMS];
 	struct sysinfo i;
-	int idx = 0;
+	unsigned int idx = 0;
 	long available;
 
 	all_vm_events(events);
@@ -266,6 +266,8 @@ static void update_balloon_stats(struct virtio_balloon *vb)
 				pages_to_bytes(i.totalram));
 	update_stat(vb, idx++, VIRTIO_BALLOON_S_AVAIL,
 				pages_to_bytes(available));
+
+	return idx;
 }
 
 /*
@@ -291,14 +293,14 @@ static void stats_handle_request(struct virtio_balloon *vb)
 {
 	struct virtqueue *vq;
 	struct scatterlist sg;
-	unsigned int len;
+	unsigned int len, num_stats;
 
-	update_balloon_stats(vb);
+	num_stats = update_balloon_stats(vb);
 
 	vq = vb->stats_vq;
 	if (!virtqueue_get_buf(vq, &len))
 		return;
-	sg_init_one(&sg, vb->stats, sizeof(vb->stats));
+	sg_init_one(&sg, vb->stats, sizeof(vb->stats[0]) * num_stats);
 	virtqueue_add_outbuf(vq, &sg, 1, vb, GFP_KERNEL);
 	virtqueue_kick(vq);
 }
@@ -423,15 +425,16 @@ static int init_vqs(struct virtio_balloon *vb)
 	vb->deflate_vq = vqs[1];
 	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
 		struct scatterlist sg;
+		unsigned int num_stats;
 		vb->stats_vq = vqs[2];
 
 		/*
 		 * Prime this virtqueue with one buffer so the hypervisor can
 		 * use it to signal us later (it can't be broken yet!).
 		 */
-		update_balloon_stats(vb);
+		num_stats = update_balloon_stats(vb);
 
-		sg_init_one(&sg, vb->stats, sizeof vb->stats);
+		sg_init_one(&sg, vb->stats, sizeof(vb->stats[0]) * num_stats);
 		if (virtqueue_add_outbuf(vb->stats_vq, &sg, 1, vb, GFP_KERNEL)
 		    < 0)
 			BUG();

From f0bb2d50dfcc519f06f901aac88502be6ff1df2c Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 28 Mar 2017 18:46:59 +0200
Subject: [PATCH 1777/1911] virtio_balloon: prevent uninitialized variable use

The latest gcc-7.0.1 snapshot reports a new warning:

virtio/virtio_balloon.c: In function 'update_balloon_stats':
virtio/virtio_balloon.c:258:26: error: 'events[2]' is used uninitialized in this function [-Werror=uninitialized]
virtio/virtio_balloon.c:260:26: error: 'events[3]' is used uninitialized in this function [-Werror=uninitialized]
virtio/virtio_balloon.c:261:56: error: 'events[18]' is used uninitialized in this function [-Werror=uninitialized]
virtio/virtio_balloon.c:262:56: error: 'events[17]' is used uninitialized in this function [-Werror=uninitialized]

This seems absolutely right, so we should add an extra check to
prevent copying uninitialized stack data into the statistics.
>From all I can tell, this has been broken since the statistics code
was originally added in 2.6.34.

Fixes: 9564e138b1f6 ("virtio: Add memory statistics reporting to the balloon driver (V4)")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Ladi Prosek <lprosek@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/virtio/virtio_balloon.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index d9544db231ef3f..34adf9b9c05388 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -254,12 +254,14 @@ static unsigned int update_balloon_stats(struct virtio_balloon *vb)
 
 	available = si_mem_available();
 
+#ifdef CONFIG_VM_EVENT_COUNTERS
 	update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_IN,
 				pages_to_bytes(events[PSWPIN]));
 	update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_OUT,
 				pages_to_bytes(events[PSWPOUT]));
 	update_stat(vb, idx++, VIRTIO_BALLOON_S_MAJFLT, events[PGMAJFAULT]);
 	update_stat(vb, idx++, VIRTIO_BALLOON_S_MINFLT, events[PGFAULT]);
+#endif
 	update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMFREE,
 				pages_to_bytes(i.freeram));
 	update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMTOT,

From e3d5092b6756b9e0b08f94bbeafcc7afe19f0996 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Wed, 22 Mar 2017 18:33:23 +0100
Subject: [PATCH 1778/1911] ACPI: ioapic: Clear on-stack resource before using
 it

The on-stack resource-window 'win' in setup_res() is not
properly initialized. This causes the pointers in the
embedded 'struct resource' to contain stale addresses.

These pointers (in my case the ->child pointer) later get
propagated to the global iomem_resources list, causing a #GP
exception when the list is traversed in
iomem_map_sanity_check().

Fixes: c183619b63ec (x86/irq, ACPI: Implement ACPI driver to support IOAPIC hotplug)
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/ioapic.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/acpi/ioapic.c b/drivers/acpi/ioapic.c
index 1120dfd625b8a1..7e4fbf9a53a3cc 100644
--- a/drivers/acpi/ioapic.c
+++ b/drivers/acpi/ioapic.c
@@ -45,6 +45,12 @@ static acpi_status setup_res(struct acpi_resource *acpi_res, void *data)
 	struct resource *res = data;
 	struct resource_win win;
 
+	/*
+	 * We might assign this to 'res' later, make sure all pointers are
+	 * cleared before the resource is added to the global list
+	 */
+	memset(&win, 0, sizeof(win));
+
 	res->flags = 0;
 	if (acpi_dev_filter_resource_type(acpi_res, IORESOURCE_MEM))
 		return AE_OK;

From 08f63d97749185fab942a3a47ed80f5bd89b8b7d Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Wed, 22 Mar 2017 18:33:25 +0100
Subject: [PATCH 1779/1911] ACPI: Do not create a platform_device for
 IOAPIC/IOxAPIC

No platform-device is required for IO(x)APICs, so don't even
create them.

[ rjw: This fixes a problem with leaking platform device objects
  after IOAPIC/IOxAPIC hot-removal events.]

Signed-off-by: Joerg Roedel <jroedel@suse.de>
Cc: All applicable <stable@vger.kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpi_platform.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/acpi/acpi_platform.c b/drivers/acpi/acpi_platform.c
index b4c1a6a51da482..03250e1f11039b 100644
--- a/drivers/acpi/acpi_platform.c
+++ b/drivers/acpi/acpi_platform.c
@@ -25,9 +25,11 @@
 ACPI_MODULE_NAME("platform");
 
 static const struct acpi_device_id forbidden_id_list[] = {
-	{"PNP0000", 0},	/* PIC */
-	{"PNP0100", 0},	/* Timer */
-	{"PNP0200", 0},	/* AT DMA Controller */
+	{"PNP0000",  0},	/* PIC */
+	{"PNP0100",  0},	/* Timer */
+	{"PNP0200",  0},	/* AT DMA Controller */
+	{"ACPI0009", 0},	/* IOxAPIC */
+	{"ACPI000A", 0},	/* IOAPIC */
 	{"", 0},
 };
 

From 7d64f82cceb21e6d95db312d284f5f195e120154 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Thu, 16 Mar 2017 14:30:39 +0000
Subject: [PATCH 1780/1911] ACPI / APEI: Add missing synchronize_rcu() on
 NOTIFY_SCI removal

When removing a GHES device notified by SCI, list_del_rcu() is used,
ghes_remove() should call synchronize_rcu() before it goes on to call
kfree(ghes), otherwise concurrent RCU readers may still hold this list
entry after it has been freed.

Signed-off-by: James Morse <james.morse@arm.com>
Reviewed-by: "Huang, Ying" <ying.huang@intel.com>
Fixes: 81e88fdc432a (ACPI, APEI, Generic Hardware Error Source POLL/IRQ/NMI notification type support)
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/apei/ghes.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index b192b42a835105..79b3c9c5a3bc94 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -1073,6 +1073,7 @@ static int ghes_remove(struct platform_device *ghes_dev)
 		if (list_empty(&ghes_sci))
 			unregister_acpi_hed_notifier(&ghes_notifier_sci);
 		mutex_unlock(&ghes_list_mutex);
+		synchronize_rcu();
 		break;
 	case ACPI_HEST_NOTIFY_NMI:
 		ghes_nmi_remove(ghes);

From 61b79e16c68d703dde58c25d3935d67210b7d71b Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Thu, 16 Mar 2017 08:56:28 -0500
Subject: [PATCH 1781/1911] ACPI: Fix incompatibility with mcount-based
 function graph tracing

Paul Menzel reported a warning:

  WARNING: CPU: 0 PID: 774 at /build/linux-ROBWaj/linux-4.9.13/kernel/trace/trace_functions_graph.c:233 ftrace_return_to_handler+0x1aa/0x1e0
  Bad frame pointer: expected f6919d98, received f6919db0
    from func acpi_pm_device_sleep_wake return to c43b6f9d

The warning means that function graph tracing is broken for the
acpi_pm_device_sleep_wake() function.  That's because the ACPI Makefile
unconditionally sets the '-Os' gcc flag to optimize for size.  That's an
issue because mcount-based function graph tracing is incompatible with
'-Os' on x86, thanks to the following gcc bug:

  https://gcc.gnu.org/bugzilla/show_bug.cgi?id=42109

I have another patch pending which will ensure that mcount-based
function graph tracing is never used with CONFIG_CC_OPTIMIZE_FOR_SIZE on
x86.

But this patch is needed in addition to that one because the ACPI
Makefile overrides that config option for no apparent reason.  It has
had this flag since the beginning of git history, and there's no related
comment, so I don't know why it's there.  As far as I can tell, there's
no reason for it to be there.  The appropriate behavior is for it to
honor CONFIG_CC_OPTIMIZE_FOR_{SIZE,PERFORMANCE} like the rest of the
kernel.

Reported-by: Paul Menzel <pmenzel@molgen.mpg.de>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: All applicable <stable@vger.kernel.org>
---
 drivers/acpi/Makefile | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index a391bbc48105ae..d94f92f88ca1c9 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -2,7 +2,6 @@
 # Makefile for the Linux ACPI interpreter
 #
 
-ccflags-y			:= -Os
 ccflags-$(CONFIG_ACPI_DEBUG)	+= -DACPI_DEBUG_OUTPUT
 
 #

From f9ba3501d50317697811ff3c48f623f08d616fc8 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Mon, 27 Mar 2017 00:21:15 +0800
Subject: [PATCH 1782/1911] sctp: change to save MSG_MORE flag into assoc

David Laight noticed the support for MSG_MORE with datamsg->force_delay
didn't really work as we expected, as the first msg with MSG_MORE set
would always block the following chunks' dequeuing.

This Patch is to rewrite it by saving the MSG_MORE flag into assoc as
David Laight suggested.

asoc->force_delay is used to save MSG_MORE flag before a msg is sent.
All chunks in queue would not be sent out if asoc->force_delay is set
by the msg with MSG_MORE flag, until a new msg without MSG_MORE flag
clears asoc->force_delay.

Note that this change would not affect the flush is generated by other
triggers, like asoc->state != ESTABLISHED, queue size > pmtu etc.

v1->v2:
  Not clear asoc->force_delay after sending the msg with MSG_MORE flag.

Fixes: 4ea0c32f5f42 ("sctp: add support for MSG_MORE")
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: David Laight <david.laight@aculab.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h | 2 +-
 net/sctp/output.c          | 2 +-
 net/sctp/socket.c          | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 592decebac752f..8caa5ee9e290bc 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -499,7 +499,6 @@ struct sctp_datamsg {
 	/* Did the messenge fail to send? */
 	int send_error;
 	u8 send_failed:1,
-	   force_delay:1,
 	   can_delay;	    /* should this message be Nagle delayed */
 };
 
@@ -1878,6 +1877,7 @@ struct sctp_association {
 
 	__u8 need_ecne:1,	/* Need to send an ECNE Chunk? */
 	     temp:1,		/* Is it a temporary association? */
+	     force_delay:1,
 	     prsctp_enable:1,
 	     reconf_enable:1;
 
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 1224421036b3e5..73fd178007a37e 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -704,7 +704,7 @@ static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet,
 	 */
 
 	if ((sctp_sk(asoc->base.sk)->nodelay || inflight == 0) &&
-	    !chunk->msg->force_delay)
+	    !asoc->force_delay)
 		/* Nothing unacked */
 		return SCTP_XMIT_OK;
 
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 0f378ea2ae3882..baa269a0d52ee3 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1965,7 +1965,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
 		err = PTR_ERR(datamsg);
 		goto out_free;
 	}
-	datamsg->force_delay = !!(msg->msg_flags & MSG_MORE);
+	asoc->force_delay = !!(msg->msg_flags & MSG_MORE);
 
 	/* Now send the (possibly) fragmented message. */
 	list_for_each_entry(chunk, &datamsg->chunks, frag_list) {

From af109a2cf6a9a6271fa420ae2d64d72d86c92b7d Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 28 Mar 2017 12:11:07 +0200
Subject: [PATCH 1783/1911] isdn: kcapi: avoid uninitialized data

gcc-7 points out that the AVMB1_ADDCARD ioctl results in an unintialized
value ending up in the cardnr parameter:

drivers/isdn/capi/kcapi.c: In function 'old_capi_manufacturer':
drivers/isdn/capi/kcapi.c:1042:24: error: 'cdef.cardnr' may be used uninitialized in this function [-Werror=maybe-uninitialized]
   cparams.cardnr = cdef.cardnr;

This has been broken since before the start of the git history, so
either the value is not used for anything important, or the ioctl
command doesn't get called in practice.

Setting the cardnr to zero avoids the warning and makes sure
we have consistent behavior.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/capi/kcapi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/isdn/capi/kcapi.c b/drivers/isdn/capi/kcapi.c
index 1dfd1085a04f87..9ca691d6c13b4d 100644
--- a/drivers/isdn/capi/kcapi.c
+++ b/drivers/isdn/capi/kcapi.c
@@ -1032,6 +1032,7 @@ static int old_capi_manufacturer(unsigned int cmd, void __user *data)
 						     sizeof(avmb1_carddef))))
 				return -EFAULT;
 			cdef.cardtype = AVM_CARDTYPE_B1;
+			cdef.cardnr = 0;
 		} else {
 			if ((retval = copy_from_user(&cdef, data,
 						     sizeof(avmb1_extcarddef))))

From c2b341a620018d4eaeb0e85c16274ac4e5f153d4 Mon Sep 17 00:00:00 2001
From: Jonas Jensen <jonas.jensen@gmail.com>
Date: Tue, 28 Mar 2017 12:12:38 +0200
Subject: [PATCH 1784/1911] net: moxa: fix TX overrun memory leak

moxart_mac_start_xmit() doesn't care where tx_tail is, tx_head can
catch and pass tx_tail, which is bad because moxart_tx_finished()
isn't guaranteed to catch up on freeing resources from tx_tail.

Add a check in moxart_mac_start_xmit() stopping the queue at the
end of the circular buffer. Also add a check in moxart_tx_finished()
waking the queue if the buffer has TX_WAKE_THRESHOLD or more
free descriptors.

While we're at it, move spin_lock_irq() to happen before our
descriptor pointer is assigned in moxart_mac_start_xmit().

Addresses https://bugzilla.kernel.org/show_bug.cgi?id=99451

Signed-off-by: Jonas Jensen <jonas.jensen@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/moxa/moxart_ether.c | 20 ++++++++++++++++++--
 drivers/net/ethernet/moxa/moxart_ether.h |  1 +
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/moxa/moxart_ether.c b/drivers/net/ethernet/moxa/moxart_ether.c
index 06c9f4100cb9bd..6ad44be08b3307 100644
--- a/drivers/net/ethernet/moxa/moxart_ether.c
+++ b/drivers/net/ethernet/moxa/moxart_ether.c
@@ -25,6 +25,7 @@
 #include <linux/of_irq.h>
 #include <linux/crc32.h>
 #include <linux/crc32c.h>
+#include <linux/circ_buf.h>
 
 #include "moxart_ether.h"
 
@@ -278,6 +279,13 @@ static int moxart_rx_poll(struct napi_struct *napi, int budget)
 	return rx;
 }
 
+static int moxart_tx_queue_space(struct net_device *ndev)
+{
+	struct moxart_mac_priv_t *priv = netdev_priv(ndev);
+
+	return CIRC_SPACE(priv->tx_head, priv->tx_tail, TX_DESC_NUM);
+}
+
 static void moxart_tx_finished(struct net_device *ndev)
 {
 	struct moxart_mac_priv_t *priv = netdev_priv(ndev);
@@ -297,6 +305,9 @@ static void moxart_tx_finished(struct net_device *ndev)
 		tx_tail = TX_NEXT(tx_tail);
 	}
 	priv->tx_tail = tx_tail;
+	if (netif_queue_stopped(ndev) &&
+	    moxart_tx_queue_space(ndev) >= TX_WAKE_THRESHOLD)
+		netif_wake_queue(ndev);
 }
 
 static irqreturn_t moxart_mac_interrupt(int irq, void *dev_id)
@@ -324,13 +335,18 @@ static int moxart_mac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 	struct moxart_mac_priv_t *priv = netdev_priv(ndev);
 	void *desc;
 	unsigned int len;
-	unsigned int tx_head = priv->tx_head;
+	unsigned int tx_head;
 	u32 txdes1;
 	int ret = NETDEV_TX_BUSY;
 
+	spin_lock_irq(&priv->txlock);
+
+	tx_head = priv->tx_head;
 	desc = priv->tx_desc_base + (TX_REG_DESC_SIZE * tx_head);
 
-	spin_lock_irq(&priv->txlock);
+	if (moxart_tx_queue_space(ndev) == 1)
+		netif_stop_queue(ndev);
+
 	if (moxart_desc_read(desc + TX_REG_OFFSET_DESC0) & TX_DESC0_DMA_OWN) {
 		net_dbg_ratelimited("no TX space for packet\n");
 		priv->stats.tx_dropped++;
diff --git a/drivers/net/ethernet/moxa/moxart_ether.h b/drivers/net/ethernet/moxa/moxart_ether.h
index 93a9563ac7c673..afc32ec998c043 100644
--- a/drivers/net/ethernet/moxa/moxart_ether.h
+++ b/drivers/net/ethernet/moxa/moxart_ether.h
@@ -59,6 +59,7 @@
 #define TX_NEXT(N)		(((N) + 1) & (TX_DESC_NUM_MASK))
 #define TX_BUF_SIZE		1600
 #define TX_BUF_SIZE_MAX		(TX_DESC1_BUF_SIZE_MASK+1)
+#define TX_WAKE_THRESHOLD	16
 
 #define RX_DESC_NUM		64
 #define RX_DESC_NUM_MASK	(RX_DESC_NUM-1)

From e497ec680c4cd51e76bfcdd49363d9ab8d32a757 Mon Sep 17 00:00:00 2001
From: Talat Batheesh <talatb@mellanox.com>
Date: Tue, 28 Mar 2017 16:13:41 +0300
Subject: [PATCH 1785/1911] net/mlx5: Avoid dereferencing uninitialized pointer

In NETDEV_CHANGEUPPER event the upper_info field is valid
only when linking is true. Otherwise it should be ignored.

Fixes: 7907f23adc18 (net/mlx5: Implement RoCE LAG feature)
Signed-off-by: Talat Batheesh <talatb@mellanox.com>
Reviewed-by: Aviv Heller <avivh@mellanox.com>
Reviewed-by: Moni Shoua <monis@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/lag.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
index 55957246c0e844..b5d5519542e873 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
@@ -294,7 +294,7 @@ static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
 					 struct netdev_notifier_changeupper_info *info)
 {
 	struct net_device *upper = info->upper_dev, *ndev_tmp;
-	struct netdev_lag_upper_info *lag_upper_info;
+	struct netdev_lag_upper_info *lag_upper_info = NULL;
 	bool is_bonded;
 	int bond_status = 0;
 	int num_slaves = 0;
@@ -303,7 +303,8 @@ static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
 	if (!netif_is_lag_master(upper))
 		return 0;
 
-	lag_upper_info = info->upper_info;
+	if (info->linking)
+		lag_upper_info = info->upper_info;
 
 	/* The event may still be of interest if the slave does not belong to
 	 * us, but is enslaved to a master which has one or more of our netdevs

From 23abec20aa7034c5d49b62c0a668f7291c819fab Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Sun, 26 Mar 2017 19:27:35 -0400
Subject: [PATCH 1786/1911] svcrdma: set XPT_CONG_CTRL flag for bc xprt

Same change as Kinglong Mee's fix for the TCP backchannel service.

Fixes: 5283b03ee5cd ("nfs/nfsd/sunrpc: enforce transport...")
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index c13a5c35ce14d9..fc8f14c7bfec60 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -127,6 +127,7 @@ static struct svc_xprt *svc_rdma_bc_create(struct svc_serv *serv,
 	xprt = &cma_xprt->sc_xprt;
 
 	svc_xprt_init(net, &svc_rdma_bc_class, xprt, serv);
+	set_bit(XPT_CONG_CTRL, &xprt->xpt_flags);
 	serv->sv_bc_xprt = xprt;
 
 	dprintk("svcrdma: %s(%p)\n", __func__, xprt);

From 16b8b6de32572207abeb4dfb74ab7bd8409a5690 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 28 Mar 2017 16:11:18 +0200
Subject: [PATCH 1787/1911] rocker: fix Wmaybe-uninitialized false-positive

gcc-7 reports a warning that earlier versions did not have:

drivers/net/ethernet/rocker/rocker_ofdpa.c: In function 'ofdpa_port_stp_update':
arch/x86/include/asm/string_32.h:79:22: error: '*((void *)&prev_ctrls+4)' may be used uninitialized in this function [-Werror=maybe-uninitialized]
   *((short *)to + 2) = *((short *)from + 2);
   ~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~
drivers/net/ethernet/rocker/rocker_ofdpa.c:2218:7: note: '*((void *)&prev_ctrls+4)' was declared here

This is clearly a variation of the warning about 'prev_state' that
was shut up using uninitialized_var().

We can slightly simplify the code and get rid of the warning by unconditionally
saving the prev_state and prev_ctrls variables. The inlined memcpy is not
particularly expensive here, as it just has to read five bytes from one or
two cache lines.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/rocker/rocker_ofdpa.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/rocker/rocker_ofdpa.c b/drivers/net/ethernet/rocker/rocker_ofdpa.c
index 7cd76b6b5cb9f6..2ae85245478087 100644
--- a/drivers/net/ethernet/rocker/rocker_ofdpa.c
+++ b/drivers/net/ethernet/rocker/rocker_ofdpa.c
@@ -2216,18 +2216,15 @@ static int ofdpa_port_stp_update(struct ofdpa_port *ofdpa_port,
 {
 	bool want[OFDPA_CTRL_MAX] = { 0, };
 	bool prev_ctrls[OFDPA_CTRL_MAX];
-	u8 uninitialized_var(prev_state);
+	u8 prev_state;
 	int err;
 	int i;
 
-	if (switchdev_trans_ph_prepare(trans)) {
-		memcpy(prev_ctrls, ofdpa_port->ctrls, sizeof(prev_ctrls));
-		prev_state = ofdpa_port->stp_state;
-	}
-
-	if (ofdpa_port->stp_state == state)
+	prev_state = ofdpa_port->stp_state;
+	if (prev_state == state)
 		return 0;
 
+	memcpy(prev_ctrls, ofdpa_port->ctrls, sizeof(prev_ctrls));
 	ofdpa_port->stp_state = state;
 
 	switch (state) {

From b768b16de58d5e0b1d7c3f936825b25327ced20c Mon Sep 17 00:00:00 2001
From: Jarno Rajahalme <jarno@ovn.org>
Date: Tue, 28 Mar 2017 11:25:26 -0700
Subject: [PATCH 1788/1911] openvswitch: Fix refcount leak on force commit.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The reference count held for skb needs to be released when the skb's
nfct pointer is cleared regardless of if nf_ct_delete() is called or
not.

Failing to release the skb's reference cound led to deferred conntrack
cleanup spinning forever within nf_conntrack_cleanup_net_list() when
cleaning up a network namespace:

   kworker/u16:0-19025 [004] 45981067.173642: sched_switch: kworker/u16:0:19025 [120] R ==> rcu_preempt:7 [120]
   kworker/u16:0-19025 [004] 45981067.173651: kernel_stack: <stack trace>
=> ___preempt_schedule (ffffffffa001ed36)
=> _raw_spin_unlock_bh (ffffffffa0713290)
=> nf_ct_iterate_cleanup (ffffffffc00a4454)
=> nf_conntrack_cleanup_net_list (ffffffffc00a5e1e)
=> nf_conntrack_pernet_exit (ffffffffc00a63dd)
=> ops_exit_list.isra.1 (ffffffffa06075f3)
=> cleanup_net (ffffffffa0607df0)
=> process_one_work (ffffffffa0084c31)
=> worker_thread (ffffffffa008592b)
=> kthread (ffffffffa008bee2)
=> ret_from_fork (ffffffffa071b67c)

Fixes: dd41d33f0b03 ("openvswitch: Add force commit.")
Reported-by: Yang Song <yangsong@vmware.com>
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Joe Stringer <joe@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/conntrack.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index e0a87776a010a3..7b2c2fce408a02 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -643,8 +643,8 @@ static bool skb_nfct_cached(struct net *net,
 		 */
 		if (nf_ct_is_confirmed(ct))
 			nf_ct_delete(ct, 0, 0);
-		else
-			nf_conntrack_put(&ct->ct_general);
+
+		nf_conntrack_put(&ct->ct_general);
 		nf_ct_set(skb, NULL, 0);
 		return false;
 	}

From b3ef5520c1eabb56064474043c7c55a1a65b8708 Mon Sep 17 00:00:00 2001
From: Arend Van Spriel <arend.vanspriel@broadcom.com>
Date: Tue, 28 Mar 2017 09:11:31 +0100
Subject: [PATCH 1789/1911] cfg80211: check rdev resume callback only for
 registered wiphy

We got the following use-after-free KASAN report:

 BUG: KASAN: use-after-free in wiphy_resume+0x591/0x5a0 [cfg80211]
	 at addr ffff8803fc244090
 Read of size 8 by task kworker/u16:24/2587
 CPU: 6 PID: 2587 Comm: kworker/u16:24 Tainted: G    B 4.9.13-debug+
 Hardware name: Dell Inc. XPS 15 9550/0N7TVV, BIOS 1.2.19 12/22/2016
 Workqueue: events_unbound async_run_entry_fn
  ffff880425d4f9d8 ffffffffaeedb541 ffff88042b80ef00 ffff8803fc244088
  ffff880425d4fa00 ffffffffae84d7a1 ffff880425d4fa98 ffff8803fc244080
  ffff88042b80ef00 ffff880425d4fa88 ffffffffae84da3a ffffffffc141f7d9
 Call Trace:
  [<ffffffffaeedb541>] dump_stack+0x85/0xc4
  [<ffffffffae84d7a1>] kasan_object_err+0x21/0x70
  [<ffffffffae84da3a>] kasan_report_error+0x1fa/0x500
  [<ffffffffc141f7d9>] ? cfg80211_bss_age+0x39/0xc0 [cfg80211]
  [<ffffffffc141f83a>] ? cfg80211_bss_age+0x9a/0xc0 [cfg80211]
  [<ffffffffae48d46d>] ? trace_hardirqs_on+0xd/0x10
  [<ffffffffc13fb1c0>] ? wiphy_suspend+0xc70/0xc70 [cfg80211]
  [<ffffffffae84def1>] __asan_report_load8_noabort+0x61/0x70
  [<ffffffffc13fb100>] ? wiphy_suspend+0xbb0/0xc70 [cfg80211]
  [<ffffffffc13fb751>] ? wiphy_resume+0x591/0x5a0 [cfg80211]
  [<ffffffffc13fb751>] wiphy_resume+0x591/0x5a0 [cfg80211]
  [<ffffffffc13fb1c0>] ? wiphy_suspend+0xc70/0xc70 [cfg80211]
  [<ffffffffaf3b206e>] dpm_run_callback+0x6e/0x4f0
  [<ffffffffaf3b31b2>] device_resume+0x1c2/0x670
  [<ffffffffaf3b367d>] async_resume+0x1d/0x50
  [<ffffffffae3ee84e>] async_run_entry_fn+0xfe/0x610
  [<ffffffffae3d0666>] process_one_work+0x716/0x1a50
  [<ffffffffae3d05c9>] ? process_one_work+0x679/0x1a50
  [<ffffffffafdd7b6d>] ? _raw_spin_unlock_irq+0x3d/0x60
  [<ffffffffae3cff50>] ? pwq_dec_nr_in_flight+0x2b0/0x2b0
  [<ffffffffae3d1a80>] worker_thread+0xe0/0x1460
  [<ffffffffae3d19a0>] ? process_one_work+0x1a50/0x1a50
  [<ffffffffae3e54c2>] kthread+0x222/0x2e0
  [<ffffffffae3e52a0>] ? kthread_park+0x80/0x80
  [<ffffffffae3e52a0>] ? kthread_park+0x80/0x80
  [<ffffffffae3e52a0>] ? kthread_park+0x80/0x80
  [<ffffffffafdd86aa>] ret_from_fork+0x2a/0x40
 Object at ffff8803fc244088, in cache kmalloc-1024 size: 1024
 Allocated:
 PID = 71
  save_stack_trace+0x1b/0x20
  save_stack+0x46/0xd0
  kasan_kmalloc+0xad/0xe0
  kasan_slab_alloc+0x12/0x20
  __kmalloc_track_caller+0x134/0x360
  kmemdup+0x20/0x50
  brcmf_cfg80211_attach+0x10b/0x3a90 [brcmfmac]
  brcmf_bus_start+0x19a/0x9a0 [brcmfmac]
  brcmf_pcie_setup+0x1f1a/0x3680 [brcmfmac]
  brcmf_fw_request_nvram_done+0x44c/0x11b0 [brcmfmac]
  request_firmware_work_func+0x135/0x280
  process_one_work+0x716/0x1a50
  worker_thread+0xe0/0x1460
  kthread+0x222/0x2e0
  ret_from_fork+0x2a/0x40
 Freed:
 PID = 2568
  save_stack_trace+0x1b/0x20
  save_stack+0x46/0xd0
  kasan_slab_free+0x71/0xb0
  kfree+0xe8/0x2e0
  brcmf_cfg80211_detach+0x62/0xf0 [brcmfmac]
  brcmf_detach+0x14a/0x2b0 [brcmfmac]
  brcmf_pcie_remove+0x140/0x5d0 [brcmfmac]
  brcmf_pcie_pm_leave_D3+0x198/0x2e0 [brcmfmac]
  pci_pm_resume+0x186/0x220
  dpm_run_callback+0x6e/0x4f0
  device_resume+0x1c2/0x670
  async_resume+0x1d/0x50
  async_run_entry_fn+0xfe/0x610
  process_one_work+0x716/0x1a50
  worker_thread+0xe0/0x1460
  kthread+0x222/0x2e0
  ret_from_fork+0x2a/0x40
 Memory state around the buggy address:
  ffff8803fc243f80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
  ffff8803fc244000: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
 >ffff8803fc244080: fc fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
                          ^
  ffff8803fc244100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
  ffff8803fc244180: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb

What is happening is that brcmf_pcie_resume() detects a device that
is no longer responsive and it decides to unbind resulting in a
wiphy_unregister() and wiphy_free() call. Now the wiphy instance
remains allocated, because PM needs to call wiphy_resume() for it.
However, brcmfmac already does a kfree() for the struct
cfg80211_registered_device::ops field. Change the checks in
wiphy_resume() to only access the struct cfg80211_registered_device::ops
if the wiphy instance is still registered at this time.

Cc: stable@vger.kernel.org # 4.10.x, 4.9.x
Reported-by: Daniel J Blueman <daniel@quora.org>
Reviewed-by: Hante Meuleman <hante.meuleman@broadcom.com>
Reviewed-by: Pieter-Paul Giesberts <pieter-paul.giesberts@broadcom.com>
Reviewed-by: Franky Lin <franky.lin@broadcom.com>
Signed-off-by: Arend van Spriel <arend.vanspriel@broadcom.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/sysfs.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c
index 16b6b5988be969..570a2b67ca1036 100644
--- a/net/wireless/sysfs.c
+++ b/net/wireless/sysfs.c
@@ -132,12 +132,10 @@ static int wiphy_resume(struct device *dev)
 	/* Age scan results with time spent in suspend */
 	cfg80211_bss_age(rdev, get_seconds() - rdev->suspend_at);
 
-	if (rdev->ops->resume) {
-		rtnl_lock();
-		if (rdev->wiphy.registered)
-			ret = rdev_resume(rdev);
-		rtnl_unlock();
-	}
+	rtnl_lock();
+	if (rdev->wiphy.registered && rdev->ops->resume)
+		ret = rdev_resume(rdev);
+	rtnl_unlock();
 
 	return ret;
 }

From b07c12517f2aed0add8ce18146bb426b14099392 Mon Sep 17 00:00:00 2001
From: Adam Wallis <awallis@codeaurora.org>
Date: Tue, 28 Mar 2017 15:55:28 +0300
Subject: [PATCH 1790/1911] xhci: plat: Register shutdown for xhci_plat

Shutdown should be called for xhci_plat devices especially for
situations where kexec might be used by stopping DMA
transactions.

Signed-off-by: Adam Wallis <awallis@codeaurora.org>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-plat.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c
index bd02a6cd8e2c08..6ed468fa7d5e59 100644
--- a/drivers/usb/host/xhci-plat.c
+++ b/drivers/usb/host/xhci-plat.c
@@ -344,6 +344,7 @@ MODULE_DEVICE_TABLE(acpi, usb_xhci_acpi_match);
 static struct platform_driver usb_xhci_driver = {
 	.probe	= xhci_plat_probe,
 	.remove	= xhci_plat_remove,
+	.shutdown	= usb_hcd_platform_shutdown,
 	.driver	= {
 		.name = "xhci-hcd",
 		.pm = DEV_PM_OPS,

From 0ab2881a406b9fd46224a3e8253bbc0141b4f844 Mon Sep 17 00:00:00 2001
From: Mathias Nyman <mathias.nyman@linux.intel.com>
Date: Tue, 28 Mar 2017 15:55:29 +0300
Subject: [PATCH 1791/1911] xhci: Set URB actual length for stopped control
 transfers

A control transfer that stopped at the status stage incorrectly
warned about a "unexpected TRB Type 4", and did not set the
transferred actual_length for the URB.

The URB actual_length for control transfers should contain the
bytes transferred in the data stage.

Bytes of a partially sent setup stage and missing bytes from
status stage should be left out.

Cc: <stable@vger.kernel.org>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-ring.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index d9936c771fa074..a3309aa02993df 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -1989,6 +1989,9 @@ static int process_ctrl_td(struct xhci_hcd *xhci, struct xhci_td *td,
 		case TRB_NORMAL:
 			td->urb->actual_length = requested - remaining;
 			goto finish_td;
+		case TRB_STATUS:
+			td->urb->actual_length = requested;
+			goto finish_td;
 		default:
 			xhci_warn(xhci, "WARN: unexpected TRB Type %d\n",
 				  trb_type);

From d3519b9d9606991a1305596348b6d690bfa3eb27 Mon Sep 17 00:00:00 2001
From: Mathias Nyman <mathias.nyman@linux.intel.com>
Date: Tue, 28 Mar 2017 15:55:30 +0300
Subject: [PATCH 1792/1911] xhci: Manually give back cancelled URB if we can't
 queue it for cancel

xhci needs to take care of four scenarios when asked to cancel a URB.

1 URB is not queued or already given back.
  usb_hcd_check_unlink_urb() will return an error, we pass the error on

2 We fail to find xhci internal structures from urb private data such as
  virtual device and endpoint ring.
  Give back URB immediately, can't do anything about internal structures.

3 URB private data has valid pointers to xhci internal data, but host is
  not  responding.
  give back URB immedately and remove the URB from the endpoint lists.

4 Everyting is working
  add URB to cancel list, queue a command to stop the endpoint, after
  which the URB can be turned to no-op or skipped, removed from lists,
  and given back.

We failed to give back the urb in case 2 where the correct device and
endpoint pointers could not be retrieved from URB private data.

This caused a hang on Dell Inspiron 5558/0VNM2T at resume from suspend
as urb was never returned.

[  245.270505] INFO: task rtsx_usb_ms_1:254 blocked for more than 120 seconds.
[  245.272244]       Tainted: G        W       4.11.0-rc3-ARCH #2
[  245.273983] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[  245.275737] rtsx_usb_ms_1   D    0   254      2 0x00000000
[  245.277524] Call Trace:
[  245.279278]  __schedule+0x2d3/0x8a0
[  245.281077]  schedule+0x3d/0x90
[  245.281961]  usb_kill_urb.part.3+0x6c/0xa0 [usbcore]
[  245.282861]  ? wake_atomic_t_function+0x60/0x60
[  245.283760]  usb_kill_urb+0x21/0x30 [usbcore]
[  245.284649]  usb_start_wait_urb+0xe5/0x170 [usbcore]
[  245.285541]  ? try_to_del_timer_sync+0x53/0x80
[  245.286434]  usb_bulk_msg+0xbd/0x160 [usbcore]
[  245.287326]  rtsx_usb_send_cmd+0x63/0x90 [rtsx_usb]

Reported-by: diego.viola@gmail.com
Tested-by: diego.viola@gmail.com
Cc: stable@vger.kernel.org
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci.c | 43 ++++++++++++++++++++++++-----------------
 1 file changed, 25 insertions(+), 18 deletions(-)

diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 50aee8b7718b30..953fd8f62df078 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -1477,6 +1477,7 @@ int xhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
 	struct xhci_ring *ep_ring;
 	struct xhci_virt_ep *ep;
 	struct xhci_command *command;
+	struct xhci_virt_device *vdev;
 
 	xhci = hcd_to_xhci(hcd);
 	spin_lock_irqsave(&xhci->lock, flags);
@@ -1485,15 +1486,27 @@ int xhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
 
 	/* Make sure the URB hasn't completed or been unlinked already */
 	ret = usb_hcd_check_unlink_urb(hcd, urb, status);
-	if (ret || !urb->hcpriv)
+	if (ret)
 		goto done;
+
+	/* give back URB now if we can't queue it for cancel */
+	vdev = xhci->devs[urb->dev->slot_id];
+	urb_priv = urb->hcpriv;
+	if (!vdev || !urb_priv)
+		goto err_giveback;
+
+	ep_index = xhci_get_endpoint_index(&urb->ep->desc);
+	ep = &vdev->eps[ep_index];
+	ep_ring = xhci_urb_to_transfer_ring(xhci, urb);
+	if (!ep || !ep_ring)
+		goto err_giveback;
+
 	temp = readl(&xhci->op_regs->status);
 	if (temp == 0xffffffff || (xhci->xhc_state & XHCI_STATE_HALTED)) {
 		xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb,
 				"HW died, freeing TD.");
-		urb_priv = urb->hcpriv;
 		for (i = urb_priv->num_tds_done;
-		     i < urb_priv->num_tds && xhci->devs[urb->dev->slot_id];
+		     i < urb_priv->num_tds;
 		     i++) {
 			td = &urb_priv->td[i];
 			if (!list_empty(&td->td_list))
@@ -1501,23 +1514,9 @@ int xhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
 			if (!list_empty(&td->cancelled_td_list))
 				list_del_init(&td->cancelled_td_list);
 		}
-
-		usb_hcd_unlink_urb_from_ep(hcd, urb);
-		spin_unlock_irqrestore(&xhci->lock, flags);
-		usb_hcd_giveback_urb(hcd, urb, -ESHUTDOWN);
-		xhci_urb_free_priv(urb_priv);
-		return ret;
+		goto err_giveback;
 	}
 
-	ep_index = xhci_get_endpoint_index(&urb->ep->desc);
-	ep = &xhci->devs[urb->dev->slot_id]->eps[ep_index];
-	ep_ring = xhci_urb_to_transfer_ring(xhci, urb);
-	if (!ep_ring) {
-		ret = -EINVAL;
-		goto done;
-	}
-
-	urb_priv = urb->hcpriv;
 	i = urb_priv->num_tds_done;
 	if (i < urb_priv->num_tds)
 		xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb,
@@ -1554,6 +1553,14 @@ int xhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
 done:
 	spin_unlock_irqrestore(&xhci->lock, flags);
 	return ret;
+
+err_giveback:
+	if (urb_priv)
+		xhci_urb_free_priv(urb_priv);
+	usb_hcd_unlink_urb_from_ep(hcd, urb);
+	spin_unlock_irqrestore(&xhci->lock, flags);
+	usb_hcd_giveback_urb(hcd, urb, -ESHUTDOWN);
+	return ret;
 }
 
 /* Drop an endpoint from a new bandwidth configuration for this device.

From a7f12a21f6b32bdd8d76d3af81eef9e72ce41ec0 Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javier@osg.samsung.com>
Date: Tue, 28 Mar 2017 15:07:38 -0400
Subject: [PATCH 1793/1911] usb: phy: isp1301: Fix build warning when CONFIG_OF
 is disabled
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit fd567653bdb9 ("usb: phy: isp1301: Add OF device ID table")
added an OF device ID table, but used the of_match_ptr() macro
that will lead to a build warning if CONFIG_OF symbol is disabled:

drivers/usb/phy//phy-isp1301.c:36:34: warning: ‘isp1301_of_match’ defined but not used [-Wunused-const-variable=]
 static const struct of_device_id isp1301_of_match[] = {
                                  ^~~~~~~~~~~~~~~~

Fixes: fd567653bdb9 ("usb: phy: isp1301: Add OF device ID table")
Reported-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Javier Martinez Canillas <javier@osg.samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/phy/phy-isp1301.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/phy/phy-isp1301.c b/drivers/usb/phy/phy-isp1301.c
index b3b33cf7ddf608..f333024660b4d0 100644
--- a/drivers/usb/phy/phy-isp1301.c
+++ b/drivers/usb/phy/phy-isp1301.c
@@ -136,7 +136,7 @@ static int isp1301_remove(struct i2c_client *client)
 static struct i2c_driver isp1301_driver = {
 	.driver = {
 		.name = DRV_NAME,
-		.of_match_table = of_match_ptr(isp1301_of_match),
+		.of_match_table = isp1301_of_match,
 	},
 	.probe = isp1301_probe,
 	.remove = isp1301_remove,

From 77c1c03c5b8ef28e55bb0aff29b1e006037ca645 Mon Sep 17 00:00:00 2001
From: Liping Zhang <zlpnobody@gmail.com>
Date: Tue, 28 Mar 2017 22:59:25 +0800
Subject: [PATCH 1794/1911] netfilter: nfnetlink_queue: fix secctx memory leak

We must call security_release_secctx to free the memory returned by
security_secid_to_secctx, otherwise memory may be leaked forever.

Fixes: ef493bd930ae ("netfilter: nfnetlink_queue: add security context information")
Signed-off-by: Liping Zhang <zlpnobody@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nfnetlink_queue.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 3ee0b8a000a41e..933509ebf3d3e2 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -443,7 +443,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
 	skb = alloc_skb(size, GFP_ATOMIC);
 	if (!skb) {
 		skb_tx_error(entskb);
-		return NULL;
+		goto nlmsg_failure;
 	}
 
 	nlh = nlmsg_put(skb, 0, 0,
@@ -452,7 +452,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
 	if (!nlh) {
 		skb_tx_error(entskb);
 		kfree_skb(skb);
-		return NULL;
+		goto nlmsg_failure;
 	}
 	nfmsg = nlmsg_data(nlh);
 	nfmsg->nfgen_family = entry->state.pf;
@@ -598,12 +598,17 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
 	}
 
 	nlh->nlmsg_len = skb->len;
+	if (seclen)
+		security_release_secctx(secdata, seclen);
 	return skb;
 
 nla_put_failure:
 	skb_tx_error(entskb);
 	kfree_skb(skb);
 	net_err_ratelimited("nf_queue: error creating packet message\n");
+nlmsg_failure:
+	if (seclen)
+		security_release_secctx(secdata, seclen);
 	return NULL;
 }
 

From 7d65f82954dadbbe7b6e1aec7e07ad17bc6d958b Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 29 Mar 2017 14:15:24 +0200
Subject: [PATCH 1795/1911] mac80211: unconditionally start new netdev queues
 with iTXQ support

When internal mac80211 TXQs aren't supported, netdev queues must
always started out started even when driver queues are stopped
while the interface is added. This is necessary because with the
internal TXQ support netdev queues are never stopped and packet
scheduling/dropping is done in mac80211.

Cc: stable@vger.kernel.org # 4.9+
Fixes: 80a83cfc434b1 ("mac80211: skip netdev queue control with software queuing")
Reported-and-tested-by: Sven Eckelmann <sven.eckelmann@openmesh.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/iface.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 40813dd3301c60..5bb0c501281954 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -718,7 +718,8 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
 	ieee80211_recalc_ps(local);
 
 	if (sdata->vif.type == NL80211_IFTYPE_MONITOR ||
-	    sdata->vif.type == NL80211_IFTYPE_AP_VLAN) {
+	    sdata->vif.type == NL80211_IFTYPE_AP_VLAN ||
+	    local->ops->wake_tx_queue) {
 		/* XXX: for AP_VLAN, actually track AP queues */
 		netif_tx_start_all_queues(dev);
 	} else if (dev) {

From 9d0d1c8b1c9d80b17cfa86ecd50c8933a742585c Mon Sep 17 00:00:00 2001
From: Liu Bo <bo.li.liu@oracle.com>
Date: Fri, 24 Mar 2017 15:04:50 -0700
Subject: [PATCH 1796/1911] Btrfs: bring back repair during read

Commit 20a7db8ab3f2 ("btrfs: add dummy callback for readpage_io_failed
and drop checks") made a cleanup around readpage_io_failed_hook, and
it was supposed to keep the original sematics, but it also
unexpectedly disabled repair during read for dup, raid1 and raid10.

This fixes the problem by letting data's inode call the generic
readpage_io_failed callback by returning -EAGAIN from its
readpage_io_failed_hook in order to notify end_bio_extent_readpage to
do the rest.  We don't call it directly because the generic one takes
an offset from end_bio_extent_readpage() to calculate the index in the
checksum array and inode's readpage_io_failed_hook doesn't offer that
offset.

Cc: David Sterba <dsterba@suse.cz>
Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Reviewed-by: David Sterba <dsterba@suse.com>
[ keep the const function attribute ]
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 46 +++++++++++++++++++++++++++-----------------
 fs/btrfs/inode.c     |  6 +++---
 2 files changed, 31 insertions(+), 21 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 8df797432740df..27fdb250b4467f 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2584,26 +2584,36 @@ static void end_bio_extent_readpage(struct bio *bio)
 
 		if (tree->ops) {
 			ret = tree->ops->readpage_io_failed_hook(page, mirror);
-			if (!ret && !bio->bi_error)
-				uptodate = 1;
-		} else {
+			if (ret == -EAGAIN) {
+				/*
+				 * Data inode's readpage_io_failed_hook() always
+				 * returns -EAGAIN.
+				 *
+				 * The generic bio_readpage_error handles errors
+				 * the following way: If possible, new read
+				 * requests are created and submitted and will
+				 * end up in end_bio_extent_readpage as well (if
+				 * we're lucky, not in the !uptodate case). In
+				 * that case it returns 0 and we just go on with
+				 * the next page in our bio. If it can't handle
+				 * the error it will return -EIO and we remain
+				 * responsible for that page.
+				 */
+				ret = bio_readpage_error(bio, offset, page,
+							 start, end, mirror);
+				if (ret == 0) {
+					uptodate = !bio->bi_error;
+					offset += len;
+					continue;
+				}
+			}
+
 			/*
-			 * The generic bio_readpage_error handles errors the
-			 * following way: If possible, new read requests are
-			 * created and submitted and will end up in
-			 * end_bio_extent_readpage as well (if we're lucky, not
-			 * in the !uptodate case). In that case it returns 0 and
-			 * we just go on with the next page in our bio. If it
-			 * can't handle the error it will return -EIO and we
-			 * remain responsible for that page.
+			 * metadata's readpage_io_failed_hook() always returns
+			 * -EIO and fixes nothing.  -EIO is also returned if
+			 * data inode error could not be fixed.
 			 */
-			ret = bio_readpage_error(bio, offset, page, start, end,
-						 mirror);
-			if (ret == 0) {
-				uptodate = !bio->bi_error;
-				offset += len;
-				continue;
-			}
+			ASSERT(ret == -EIO);
 		}
 readpage_ok:
 		if (likely(uptodate)) {
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index e57191072aa384..876f1d36030c1c 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -10523,9 +10523,9 @@ static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
 }
 
 __attribute__((const))
-static int dummy_readpage_io_failed_hook(struct page *page, int failed_mirror)
+static int btrfs_readpage_io_failed_hook(struct page *page, int failed_mirror)
 {
-	return 0;
+	return -EAGAIN;
 }
 
 static const struct inode_operations btrfs_dir_inode_operations = {
@@ -10570,7 +10570,7 @@ static const struct extent_io_ops btrfs_extent_io_ops = {
 	.submit_bio_hook = btrfs_submit_bio_hook,
 	.readpage_end_io_hook = btrfs_readpage_end_io_hook,
 	.merge_bio_hook = btrfs_merge_bio_hook,
-	.readpage_io_failed_hook = dummy_readpage_io_failed_hook,
+	.readpage_io_failed_hook = btrfs_readpage_io_failed_hook,
 
 	/* optional callbacks */
 	.fill_delalloc = run_delalloc_range,

From ce0dcee626c482183b42d45b6ea43198c7223fc7 Mon Sep 17 00:00:00 2001
From: Goldwyn Rodrigues <rgoldwyn@suse.com>
Date: Tue, 14 Mar 2017 05:25:09 -0500
Subject: [PATCH 1797/1911] btrfs: Change qgroup_meta_rsv to 64bit

Using an int value is causing qg->reserved to become negative and
exclusive -EDQUOT to be reached prematurely.

This affects exclusive qgroups only.

TEST CASE:

DEVICE=/dev/vdb
MOUNTPOINT=/mnt
SUBVOL=$MOUNTPOINT/tmp

umount $SUBVOL
umount $MOUNTPOINT

mkfs.btrfs -f $DEVICE
mount /dev/vdb $MOUNTPOINT
btrfs quota enable $MOUNTPOINT
btrfs subvol create $SUBVOL
umount $MOUNTPOINT
mount /dev/vdb $MOUNTPOINT
mount -o subvol=tmp $DEVICE $SUBVOL
btrfs qgroup limit -e 3G $SUBVOL

btrfs quota rescan /mnt -w

for i in `seq 1 44000`; do
  dd if=/dev/zero of=/mnt/tmp/test_$i bs=10k count=1
  if [[ $? > 0 ]]; then
     btrfs qgroup show -pcref $SUBVOL
     exit 1
  fi
done

Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
[ add reproducer to changelog ]
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h   |  2 +-
 fs/btrfs/disk-io.c |  2 +-
 fs/btrfs/qgroup.c  | 10 +++++-----
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index f03c2f285eb1ef..660d485b6e8b14 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1258,7 +1258,7 @@ struct btrfs_root {
 	atomic_t will_be_snapshoted;
 
 	/* For qgroup metadata space reserve */
-	atomic_t qgroup_meta_rsv;
+	atomic64_t qgroup_meta_rsv;
 };
 static inline u32 btrfs_inode_sectorsize(const struct inode *inode)
 {
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 73fdc6bdaea983..982c56f7951517 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1342,7 +1342,7 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
 	atomic_set(&root->orphan_inodes, 0);
 	atomic_set(&root->refs, 1);
 	atomic_set(&root->will_be_snapshoted, 0);
-	atomic_set(&root->qgroup_meta_rsv, 0);
+	atomic64_set(&root->qgroup_meta_rsv, 0);
 	root->log_transid = 0;
 	root->log_transid_committed = -1;
 	root->last_log_commit = 0;
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index a5da750c1087fd..a59801dc2a340b 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -2948,20 +2948,20 @@ int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
 	ret = qgroup_reserve(root, num_bytes, enforce);
 	if (ret < 0)
 		return ret;
-	atomic_add(num_bytes, &root->qgroup_meta_rsv);
+	atomic64_add(num_bytes, &root->qgroup_meta_rsv);
 	return ret;
 }
 
 void btrfs_qgroup_free_meta_all(struct btrfs_root *root)
 {
 	struct btrfs_fs_info *fs_info = root->fs_info;
-	int reserved;
+	u64 reserved;
 
 	if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) ||
 	    !is_fstree(root->objectid))
 		return;
 
-	reserved = atomic_xchg(&root->qgroup_meta_rsv, 0);
+	reserved = atomic64_xchg(&root->qgroup_meta_rsv, 0);
 	if (reserved == 0)
 		return;
 	btrfs_qgroup_free_refroot(fs_info, root->objectid, reserved);
@@ -2976,8 +2976,8 @@ void btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes)
 		return;
 
 	BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize));
-	WARN_ON(atomic_read(&root->qgroup_meta_rsv) < num_bytes);
-	atomic_sub(num_bytes, &root->qgroup_meta_rsv);
+	WARN_ON(atomic64_read(&root->qgroup_meta_rsv) < num_bytes);
+	atomic64_sub(num_bytes, &root->qgroup_meta_rsv);
 	btrfs_qgroup_free_refroot(fs_info, root->objectid, num_bytes);
 }
 

From 457ae7268b29c33dee1c0feb143a15f6029d177b Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Fri, 17 Mar 2017 23:51:20 +0300
Subject: [PATCH 1798/1911] Btrfs: fix an integer overflow check

This isn't super serious because you need CAP_ADMIN to run this code.

I added this integer overflow check last year but apparently I am
rubbish at writing integer overflow checks...  There are two issues.
First, access_ok() works on unsigned long type and not u64 so on 32 bit
systems the access_ok() could be checking a truncated size.  The other
issue is that we should be using a stricter limit so we don't overflow
the kzalloc() setting ctx->clone_roots later in the function after the
access_ok():

	alloc_size = sizeof(struct clone_root) * (arg->clone_sources_count + 1);
	sctx->clone_roots = kzalloc(alloc_size, GFP_KERNEL | __GFP_NOWARN);

Fixes: f5ecec3ce21f ("btrfs: send: silence an integer overflow warning")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: David Sterba <dsterba@suse.com>
[ added comment ]
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/send.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 456c8901489b6c..a60d5bfb8a49e2 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -6305,8 +6305,13 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
 		goto out;
 	}
 
+	/*
+	 * Check that we don't overflow at later allocations, we request
+	 * clone_sources_count + 1 items, and compare to unsigned long inside
+	 * access_ok.
+	 */
 	if (arg->clone_sources_count >
-	    ULLONG_MAX / sizeof(*arg->clone_sources)) {
+	    ULONG_MAX / sizeof(struct clone_root) - 1) {
 		ret = -EINVAL;
 		goto out;
 	}

From f3cd1b064f1179d9e6188c6d67297a2360880e10 Mon Sep 17 00:00:00 2001
From: Lucas Stach <l.stach@pengutronix.de>
Date: Wed, 22 Mar 2017 12:07:23 +0100
Subject: [PATCH 1799/1911] drm/etnaviv: (re-)protect fence allocation with GPU
 mutex

The fence allocation needs to be protected by the GPU mutex, otherwise
the fence seqnos of concurrent submits might not match the insertion order
of the jobs in the kernel ring. This breaks the assumption that jobs
complete with monotonically increasing fence seqnos.

Fixes: d9853490176c (drm/etnaviv: take GPU lock later in the submit process)
CC: stable@vger.kernel.org #4.9+
Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
---
 drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
index 130d7d517a19a1..da48819ff2e655 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
@@ -1311,6 +1311,8 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
 		goto out_pm_put;
 	}
 
+	mutex_lock(&gpu->lock);
+
 	fence = etnaviv_gpu_fence_alloc(gpu);
 	if (!fence) {
 		event_free(gpu, event);
@@ -1318,8 +1320,6 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
 		goto out_pm_put;
 	}
 
-	mutex_lock(&gpu->lock);
-
 	gpu->event[event].fence = fence;
 	submit->fence = fence->seqno;
 	gpu->active_fence = submit->fence;

From 677e806da4d916052585301785d847c3b3e6186a Mon Sep 17 00:00:00 2001
From: Andy Whitcroft <apw@canonical.com>
Date: Wed, 22 Mar 2017 07:29:31 +0000
Subject: [PATCH 1800/1911] xfrm_user: validate XFRM_MSG_NEWAE
 XFRMA_REPLAY_ESN_VAL replay_window

When a new xfrm state is created during an XFRM_MSG_NEWSA call we
validate the user supplied replay_esn to ensure that the size is valid
and to ensure that the replay_window size is within the allocated
buffer.  However later it is possible to update this replay_esn via a
XFRM_MSG_NEWAE call.  There we again validate the size of the supplied
buffer matches the existing state and if so inject the contents.  We do
not at this point check that the replay_window is within the allocated
memory.  This leads to out-of-bounds reads and writes triggered by
netlink packets.  This leads to memory corruption and the potential for
priviledge escalation.

We already attempt to validate the incoming replay information in
xfrm_new_ae() via xfrm_replay_verify_len().  This confirms that the user
is not trying to change the size of the replay state buffer which
includes the replay_esn.  It however does not check the replay_window
remains within that buffer.  Add validation of the contained
replay_window.

CVE-2017-7184
Signed-off-by: Andy Whitcroft <apw@canonical.com>
Acked-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 net/xfrm/xfrm_user.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 9705c279494b24..cdf887fa61d574 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -415,6 +415,9 @@ static inline int xfrm_replay_verify_len(struct xfrm_replay_state_esn *replay_es
 	if (nla_len(rp) < ulen || xfrm_replay_state_esn_len(replay_esn) != ulen)
 		return -EINVAL;
 
+	if (up->replay_window > up->bmp_len * sizeof(__u32) * 8)
+		return -EINVAL;
+
 	return 0;
 }
 

From f843ee6dd019bcece3e74e76ad9df0155655d0df Mon Sep 17 00:00:00 2001
From: Andy Whitcroft <apw@canonical.com>
Date: Thu, 23 Mar 2017 07:45:44 +0000
Subject: [PATCH 1801/1911] xfrm_user: validate XFRM_MSG_NEWAE incoming ESN
 size harder

Kees Cook has pointed out that xfrm_replay_state_esn_len() is subject to
wrapping issues.  To ensure we are correctly ensuring that the two ESN
structures are the same size compare both the overall size as reported
by xfrm_replay_state_esn_len() and the internal length are the same.

CVE-2017-7184
Signed-off-by: Andy Whitcroft <apw@canonical.com>
Acked-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 net/xfrm/xfrm_user.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index cdf887fa61d574..40a8aa39220d67 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -412,7 +412,11 @@ static inline int xfrm_replay_verify_len(struct xfrm_replay_state_esn *replay_es
 	up = nla_data(rp);
 	ulen = xfrm_replay_state_esn_len(up);
 
-	if (nla_len(rp) < ulen || xfrm_replay_state_esn_len(replay_esn) != ulen)
+	/* Check the overall length and the internal bitmap length to avoid
+	 * potential overflow. */
+	if (nla_len(rp) < ulen ||
+	    xfrm_replay_state_esn_len(replay_esn) != ulen ||
+	    replay_esn->bmp_len != up->bmp_len)
 		return -EINVAL;
 
 	if (up->replay_window > up->bmp_len * sizeof(__u32) * 8)

From fb411b837b587a32046dc4f369acb93a10b1def8 Mon Sep 17 00:00:00 2001
From: Dave Martin <Dave.Martin@arm.com>
Date: Mon, 27 Mar 2017 15:10:53 +0100
Subject: [PATCH 1802/1911] c6x/ptrace: Remove useless PTRACE_SETREGSET
 implementation

gpr_set won't work correctly and can never have been tested, and the
correct behaviour is not clear due to the endianness-dependent task
layout.

So, just remove it.  The core code will now return -EOPNOTSUPPORT when
trying to set NT_PRSTATUS on this architecture until/unless a correct
implementation is supplied.

Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/c6x/kernel/ptrace.c | 41 ----------------------------------------
 1 file changed, 41 deletions(-)

diff --git a/arch/c6x/kernel/ptrace.c b/arch/c6x/kernel/ptrace.c
index 3c494e84444d1e..a511ac16a8e37c 100644
--- a/arch/c6x/kernel/ptrace.c
+++ b/arch/c6x/kernel/ptrace.c
@@ -69,46 +69,6 @@ static int gpr_get(struct task_struct *target,
 				   0, sizeof(*regs));
 }
 
-static int gpr_set(struct task_struct *target,
-		   const struct user_regset *regset,
-		   unsigned int pos, unsigned int count,
-		   const void *kbuf, const void __user *ubuf)
-{
-	int ret;
-	struct pt_regs *regs = task_pt_regs(target);
-
-	/* Don't copyin TSR or CSR */
-	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
-				 &regs,
-				 0, PT_TSR * sizeof(long));
-	if (ret)
-		return ret;
-
-	ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
-					PT_TSR * sizeof(long),
-					(PT_TSR + 1) * sizeof(long));
-	if (ret)
-		return ret;
-
-	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
-				 &regs,
-				 (PT_TSR + 1) * sizeof(long),
-				 PT_CSR * sizeof(long));
-	if (ret)
-		return ret;
-
-	ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
-					PT_CSR * sizeof(long),
-					(PT_CSR + 1) * sizeof(long));
-	if (ret)
-		return ret;
-
-	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
-				 &regs,
-				 (PT_CSR + 1) * sizeof(long), -1);
-	return ret;
-}
-
 enum c6x_regset {
 	REGSET_GPR,
 };
@@ -120,7 +80,6 @@ static const struct user_regset c6x_regsets[] = {
 		.size = sizeof(u32),
 		.align = sizeof(u32),
 		.get = gpr_get,
-		.set = gpr_set
 	},
 };
 

From 502585c7555083d4a949c08350306b9ec196779e Mon Sep 17 00:00:00 2001
From: Dave Martin <Dave.Martin@arm.com>
Date: Mon, 27 Mar 2017 15:10:54 +0100
Subject: [PATCH 1803/1911] h8300/ptrace: Fix incorrect register transfer count

regs_set() and regs_get() are vulnerable to an off-by-1 buffer overrun
if CONFIG_CPU_H8S is set, since this adds an extra entry to
register_offset[] but not to user_regs_struct.

So, iterate over user_regs_struct based on its actual size, not based on
the length of register_offset[].

Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/h8300/kernel/ptrace.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/arch/h8300/kernel/ptrace.c b/arch/h8300/kernel/ptrace.c
index 92075544a19ac0..0dc1c8f622bc3f 100644
--- a/arch/h8300/kernel/ptrace.c
+++ b/arch/h8300/kernel/ptrace.c
@@ -95,7 +95,8 @@ static int regs_get(struct task_struct *target,
 	long *reg = (long *)&regs;
 
 	/* build user regs in buffer */
-	for (r = 0; r < ARRAY_SIZE(register_offset); r++)
+	BUILD_BUG_ON(sizeof(regs) % sizeof(long) != 0);
+	for (r = 0; r < sizeof(regs) / sizeof(long); r++)
 		*reg++ = h8300_get_reg(target, r);
 
 	return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
@@ -113,7 +114,8 @@ static int regs_set(struct task_struct *target,
 	long *reg;
 
 	/* build user regs in buffer */
-	for (reg = (long *)&regs, r = 0; r < ARRAY_SIZE(register_offset); r++)
+	BUILD_BUG_ON(sizeof(regs) % sizeof(long) != 0);
+	for (reg = (long *)&regs, r = 0; r < sizeof(regs) / sizeof(long); r++)
 		*reg++ = h8300_get_reg(target, r);
 
 	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
@@ -122,7 +124,7 @@ static int regs_set(struct task_struct *target,
 		return ret;
 
 	/* write back to pt_regs */
-	for (reg = (long *)&regs, r = 0; r < ARRAY_SIZE(register_offset); r++)
+	for (reg = (long *)&regs, r = 0; r < sizeof(regs) / sizeof(long); r++)
 		h8300_put_reg(target, r, *reg++);
 	return 0;
 }

From a78ce80d2c9178351b34d78fec805140c29c193e Mon Sep 17 00:00:00 2001
From: Dave Martin <Dave.Martin@arm.com>
Date: Mon, 27 Mar 2017 15:10:55 +0100
Subject: [PATCH 1804/1911] metag/ptrace: Preserve previous registers for short
 regset write

Ensure that if userspace supplies insufficient data to PTRACE_SETREGSET
to fill all the registers, the thread's old registers are preserved.

Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Acked-by: James Hogan <james.hogan@imgtec.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/metag/kernel/ptrace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/metag/kernel/ptrace.c b/arch/metag/kernel/ptrace.c
index 7563628822bdf6..ae659ba619487d 100644
--- a/arch/metag/kernel/ptrace.c
+++ b/arch/metag/kernel/ptrace.c
@@ -303,7 +303,7 @@ static int metag_tls_set(struct task_struct *target,
 			const void *kbuf, const void __user *ubuf)
 {
 	int ret;
-	void __user *tls;
+	void __user *tls = target->thread.tls_ptr;
 
 	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &tls, 0, -1);
 	if (ret)

From 5fe81fe98123ce41265c65e95d34418d30d005d1 Mon Sep 17 00:00:00 2001
From: Dave Martin <Dave.Martin@arm.com>
Date: Mon, 27 Mar 2017 15:10:56 +0100
Subject: [PATCH 1805/1911] metag/ptrace: Provide default TXSTATUS for short
 NT_PRSTATUS

Ensure that if userspace supplies insufficient data to PTRACE_SETREGSET
to fill TXSTATUS, a well-defined default value is used, based on the
task's current value.

Suggested-by: James Hogan <james.hogan@imgtec.com>
Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/metag/kernel/ptrace.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/arch/metag/kernel/ptrace.c b/arch/metag/kernel/ptrace.c
index ae659ba619487d..2e4dfc15abd330 100644
--- a/arch/metag/kernel/ptrace.c
+++ b/arch/metag/kernel/ptrace.c
@@ -24,6 +24,16 @@
  * user_regset definitions.
  */
 
+static unsigned long user_txstatus(const struct pt_regs *regs)
+{
+	unsigned long data = (unsigned long)regs->ctx.Flags;
+
+	if (regs->ctx.SaveMask & TBICTX_CBUF_BIT)
+		data |= USER_GP_REGS_STATUS_CATCH_BIT;
+
+	return data;
+}
+
 int metag_gp_regs_copyout(const struct pt_regs *regs,
 			  unsigned int pos, unsigned int count,
 			  void *kbuf, void __user *ubuf)
@@ -62,9 +72,7 @@ int metag_gp_regs_copyout(const struct pt_regs *regs,
 	if (ret)
 		goto out;
 	/* TXSTATUS */
-	data = (unsigned long)regs->ctx.Flags;
-	if (regs->ctx.SaveMask & TBICTX_CBUF_BIT)
-		data |= USER_GP_REGS_STATUS_CATCH_BIT;
+	data = user_txstatus(regs);
 	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
 				  &data, 4*25, 4*26);
 	if (ret)
@@ -119,6 +127,7 @@ int metag_gp_regs_copyin(struct pt_regs *regs,
 	if (ret)
 		goto out;
 	/* TXSTATUS */
+	data = user_txstatus(regs);
 	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
 				 &data, 4*25, 4*26);
 	if (ret)

From 7195ee3120d878259e8d94a5d9f808116f34d5ea Mon Sep 17 00:00:00 2001
From: Dave Martin <Dave.Martin@arm.com>
Date: Mon, 27 Mar 2017 15:10:57 +0100
Subject: [PATCH 1806/1911] metag/ptrace: Reject partial NT_METAG_RPIPE writes

It's not clear what behaviour is sensible when doing partial write of
NT_METAG_RPIPE, so just don't bother.

This patch assumes that userspace will never rely on a partial SETREGSET
in this case, since it's not clear what should happen anyway.

Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Acked-by: James Hogan <james.hogan@imgtec.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/metag/kernel/ptrace.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/metag/kernel/ptrace.c b/arch/metag/kernel/ptrace.c
index 2e4dfc15abd330..5e2dc7defd2cea 100644
--- a/arch/metag/kernel/ptrace.c
+++ b/arch/metag/kernel/ptrace.c
@@ -253,6 +253,8 @@ int metag_rp_state_copyin(struct pt_regs *regs,
 	unsigned long long *ptr;
 	int ret, i;
 
+	if (count < 4*13)
+		return -EINVAL;
 	/* Read the entire pipeline before making any changes */
 	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
 				 &rp, 0, 4*13);

From d614fd58a2834cfe4efa472c33c8f3ce2338b09b Mon Sep 17 00:00:00 2001
From: Dave Martin <Dave.Martin@arm.com>
Date: Mon, 27 Mar 2017 15:10:58 +0100
Subject: [PATCH 1807/1911] mips/ptrace: Preserve previous registers for short
 regset write

Ensure that if userspace supplies insufficient data to PTRACE_SETREGSET
to fill all the registers, the thread's old registers are preserved.

Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/mips/kernel/ptrace.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index c8ba2607213298..5d2498eb2340c5 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -485,7 +485,8 @@ static int fpr_set(struct task_struct *target,
 					  &target->thread.fpu,
 					  0, sizeof(elf_fpregset_t));
 
-	for (i = 0; i < NUM_FPU_REGS; i++) {
+	BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t));
+	for (i = 0; i < NUM_FPU_REGS && count >= sizeof(elf_fpreg_t); i++) {
 		err = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
 					 &fpr_val, i * sizeof(elf_fpreg_t),
 					 (i + 1) * sizeof(elf_fpreg_t));

From d3805c546b275c8cc7d40f759d029ae92c7175f2 Mon Sep 17 00:00:00 2001
From: Dave Martin <Dave.Martin@arm.com>
Date: Mon, 27 Mar 2017 15:10:59 +0100
Subject: [PATCH 1808/1911] sparc/ptrace: Preserve previous registers for short
 regset write

Ensure that if userspace supplies insufficient data to PTRACE_SETREGSET
to fill all the registers, the thread's old registers are preserved.

Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/sparc/kernel/ptrace_64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/sparc/kernel/ptrace_64.c b/arch/sparc/kernel/ptrace_64.c
index 901063c1cf7eed..341129a40e944a 100644
--- a/arch/sparc/kernel/ptrace_64.c
+++ b/arch/sparc/kernel/ptrace_64.c
@@ -350,7 +350,7 @@ static int genregs64_set(struct task_struct *target,
 	}
 
 	if (!ret) {
-		unsigned long y;
+		unsigned long y = regs->y;
 
 		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
 					 &y,

From 94d7ee0baa8b764cf64ad91ed69464c1a6a0066b Mon Sep 17 00:00:00 2001
From: Guillaume Nault <g.nault@alphalink.fr>
Date: Wed, 29 Mar 2017 08:44:59 +0200
Subject: [PATCH 1809/1911] l2tp: hold tunnel socket when handling control
 frames in l2tp_ip and l2tp_ip6

The code following l2tp_tunnel_find() expects that a new reference is
held on sk. Either sk_receive_skb() or the discard_put error path will
drop a reference from the tunnel's socket.

This issue exists in both l2tp_ip and l2tp_ip6.

Fixes: a3c18422a4b4 ("l2tp: hold socket before dropping lock in l2tp_ip{, 6}_recv()")
Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_ip.c  | 5 +++--
 net/l2tp/l2tp_ip6.c | 5 +++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index d25038cfd64e1a..7208fbe5856bba 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -178,9 +178,10 @@ static int l2tp_ip_recv(struct sk_buff *skb)
 
 	tunnel_id = ntohl(*(__be32 *) &skb->data[4]);
 	tunnel = l2tp_tunnel_find(net, tunnel_id);
-	if (tunnel != NULL)
+	if (tunnel) {
 		sk = tunnel->sock;
-	else {
+		sock_hold(sk);
+	} else {
 		struct iphdr *iph = (struct iphdr *) skb_network_header(skb);
 
 		read_lock_bh(&l2tp_ip_lock);
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index a4abcbc4c09ae6..516d7ce24ba7b4 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -191,9 +191,10 @@ static int l2tp_ip6_recv(struct sk_buff *skb)
 
 	tunnel_id = ntohl(*(__be32 *) &skb->data[4]);
 	tunnel = l2tp_tunnel_find(net, tunnel_id);
-	if (tunnel != NULL)
+	if (tunnel) {
 		sk = tunnel->sock;
-	else {
+		sock_hold(sk);
+	} else {
 		struct ipv6hdr *iph = ipv6_hdr(skb);
 
 		read_lock_bh(&l2tp_ip6_lock);

From e91793bb615cf6cdd59c0b6749fe173687bb0947 Mon Sep 17 00:00:00 2001
From: Guillaume Nault <g.nault@alphalink.fr>
Date: Wed, 29 Mar 2017 08:45:29 +0200
Subject: [PATCH 1810/1911] l2tp: purge socket queues in the .destruct()
 callback

The Rx path may grab the socket right before pppol2tp_release(), but
nothing guarantees that it will enqueue packets before
skb_queue_purge(). Therefore, the socket can be destroyed without its
queues fully purged.

Fix this by purging queues in pppol2tp_session_destruct() where we're
guaranteed nothing is still referencing the socket.

Fixes: 9e9cb6221aa7 ("l2tp: fix userspace reception on plain L2TP sockets")
Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_ppp.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 36cc56fd041871..123b6a2411a038 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -450,6 +450,10 @@ static void pppol2tp_session_close(struct l2tp_session *session)
 static void pppol2tp_session_destruct(struct sock *sk)
 {
 	struct l2tp_session *session = sk->sk_user_data;
+
+	skb_queue_purge(&sk->sk_receive_queue);
+	skb_queue_purge(&sk->sk_write_queue);
+
 	if (session) {
 		sk->sk_user_data = NULL;
 		BUG_ON(session->magic != L2TP_SESSION_MAGIC);
@@ -488,9 +492,6 @@ static int pppol2tp_release(struct socket *sock)
 		l2tp_session_queue_purge(session);
 		sock_put(sk);
 	}
-	skb_queue_purge(&sk->sk_receive_queue);
-	skb_queue_purge(&sk->sk_write_queue);
-
 	release_sock(sk);
 
 	/* This will delete the session context via

From 554bfeceb8a22d448cd986fc9efce25e833278a1 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Wed, 29 Mar 2017 21:41:05 +0200
Subject: [PATCH 1811/1911] parisc: Fix access fault handling in pa_memcpy()

pa_memcpy() is the major memcpy implementation in the parisc kernel which is
used to do any kind of userspace/kernel memory copies.

Al Viro noticed various bugs in the implementation of pa_mempcy(), most notably
that in case of faults it may report back to have copied more bytes than it
actually did.

Fixing those bugs is quite hard in the C-implementation, because the compiler
is messing around with the registers and we are not guaranteed that specific
variables are always in the same processor registers. This makes proper fault
handling complicated.

This patch implements pa_memcpy() in assembler. That way we have correct fault
handling and adding a 64-bit copy routine was quite easy.

Runtime tested with 32- and 64bit kernels.

Reported-by: Al Viro <viro@ZenIV.linux.org.uk>
Cc: <stable@vger.kernel.org> # v4.9+
Signed-off-by: John David Anglin <dave.anglin@bell.net>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/lib/lusercopy.S | 318 +++++++++++++++++++++++++
 arch/parisc/lib/memcpy.c    | 461 +-----------------------------------
 2 files changed, 321 insertions(+), 458 deletions(-)

diff --git a/arch/parisc/lib/lusercopy.S b/arch/parisc/lib/lusercopy.S
index 56845de6b5dfc9..f01188c044ee83 100644
--- a/arch/parisc/lib/lusercopy.S
+++ b/arch/parisc/lib/lusercopy.S
@@ -5,6 +5,8 @@
  *    Copyright (C) 2000 Richard Hirst <rhirst with parisc-linux.org>
  *    Copyright (C) 2001 Matthieu Delahaye <delahaym at esiee.fr>
  *    Copyright (C) 2003 Randolph Chung <tausq with parisc-linux.org>
+ *    Copyright (C) 2017 Helge Deller <deller@gmx.de>
+ *    Copyright (C) 2017 John David Anglin <dave.anglin@bell.net>
  *
  *
  *    This program is free software; you can redistribute it and/or modify
@@ -132,4 +134,320 @@ ENDPROC_CFI(lstrnlen_user)
 
 	.procend
 
+
+
+/*
+ * unsigned long pa_memcpy(void *dstp, const void *srcp, unsigned long len)
+ *
+ * Inputs:
+ * - sr1 already contains space of source region
+ * - sr2 already contains space of destination region
+ *
+ * Returns:
+ * - number of bytes that could not be copied.
+ *   On success, this will be zero.
+ *
+ * This code is based on a C-implementation of a copy routine written by
+ * Randolph Chung, which in turn was derived from the glibc.
+ *
+ * Several strategies are tried to try to get the best performance for various
+ * conditions. In the optimal case, we copy by loops that copy 32- or 16-bytes
+ * at a time using general registers.  Unaligned copies are handled either by
+ * aligning the destination and then using shift-and-write method, or in a few
+ * cases by falling back to a byte-at-a-time copy.
+ *
+ * Testing with various alignments and buffer sizes shows that this code is
+ * often >10x faster than a simple byte-at-a-time copy, even for strangely
+ * aligned operands. It is interesting to note that the glibc version of memcpy
+ * (written in C) is actually quite fast already. This routine is able to beat
+ * it by 30-40% for aligned copies because of the loop unrolling, but in some
+ * cases the glibc version is still slightly faster. This lends more
+ * credibility that gcc can generate very good code as long as we are careful.
+ *
+ * Possible optimizations:
+ * - add cache prefetching
+ * - try not to use the post-increment address modifiers; they may create
+ *   additional interlocks. Assumption is that those were only efficient on old
+ *   machines (pre PA8000 processors)
+ */
+
+	dst = arg0
+	src = arg1
+	len = arg2
+	end = arg3
+	t1  = r19
+	t2  = r20
+	t3  = r21
+	t4  = r22
+	srcspc = sr1
+	dstspc = sr2
+
+	t0 = r1
+	a1 = t1
+	a2 = t2
+	a3 = t3
+	a0 = t4
+
+	save_src = ret0
+	save_dst = ret1
+	save_len = r31
+
+ENTRY_CFI(pa_memcpy)
+	.proc
+	.callinfo NO_CALLS
+	.entry
+
+	/* Last destination address */
+	add	dst,len,end
+
+	/* short copy with less than 16 bytes? */
+	cmpib,>>=,n 15,len,.Lbyte_loop
+
+	/* same alignment? */
+	xor	src,dst,t0
+	extru	t0,31,2,t1
+	cmpib,<>,n  0,t1,.Lunaligned_copy
+
+#ifdef CONFIG_64BIT
+	/* only do 64-bit copies if we can get aligned. */
+	extru	t0,31,3,t1
+	cmpib,<>,n  0,t1,.Lalign_loop32
+
+	/* loop until we are 64-bit aligned */
+.Lalign_loop64:
+	extru	dst,31,3,t1
+	cmpib,=,n	0,t1,.Lcopy_loop_16
+20:	ldb,ma	1(srcspc,src),t1
+21:	stb,ma	t1,1(dstspc,dst)
+	b	.Lalign_loop64
+	ldo	-1(len),len
+
+	ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
+	ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
+
+	ldi	31,t0
+.Lcopy_loop_16:
+	cmpb,COND(>>=),n t0,len,.Lword_loop
+
+10:	ldd	0(srcspc,src),t1
+11:	ldd	8(srcspc,src),t2
+	ldo	16(src),src
+12:	std,ma	t1,8(dstspc,dst)
+13:	std,ma	t2,8(dstspc,dst)
+14:	ldd	0(srcspc,src),t1
+15:	ldd	8(srcspc,src),t2
+	ldo	16(src),src
+16:	std,ma	t1,8(dstspc,dst)
+17:	std,ma	t2,8(dstspc,dst)
+
+	ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
+	ASM_EXCEPTIONTABLE_ENTRY(11b,.Lcopy16_fault)
+	ASM_EXCEPTIONTABLE_ENTRY(12b,.Lcopy_done)
+	ASM_EXCEPTIONTABLE_ENTRY(13b,.Lcopy_done)
+	ASM_EXCEPTIONTABLE_ENTRY(14b,.Lcopy_done)
+	ASM_EXCEPTIONTABLE_ENTRY(15b,.Lcopy16_fault)
+	ASM_EXCEPTIONTABLE_ENTRY(16b,.Lcopy_done)
+	ASM_EXCEPTIONTABLE_ENTRY(17b,.Lcopy_done)
+
+	b	.Lcopy_loop_16
+	ldo	-32(len),len
+
+.Lword_loop:
+	cmpib,COND(>>=),n 3,len,.Lbyte_loop
+20:	ldw,ma	4(srcspc,src),t1
+21:	stw,ma	t1,4(dstspc,dst)
+	b	.Lword_loop
+	ldo	-4(len),len
+
+	ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
+	ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
+
+#endif /* CONFIG_64BIT */
+
+	/* loop until we are 32-bit aligned */
+.Lalign_loop32:
+	extru	dst,31,2,t1
+	cmpib,=,n	0,t1,.Lcopy_loop_4
+20:	ldb,ma	1(srcspc,src),t1
+21:	stb,ma	t1,1(dstspc,dst)
+	b	.Lalign_loop32
+	ldo	-1(len),len
+
+	ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
+	ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
+
+
+.Lcopy_loop_4:
+	cmpib,COND(>>=),n 15,len,.Lbyte_loop
+
+10:	ldw	0(srcspc,src),t1
+11:	ldw	4(srcspc,src),t2
+12:	stw,ma	t1,4(dstspc,dst)
+13:	stw,ma	t2,4(dstspc,dst)
+14:	ldw	8(srcspc,src),t1
+15:	ldw	12(srcspc,src),t2
+	ldo	16(src),src
+16:	stw,ma	t1,4(dstspc,dst)
+17:	stw,ma	t2,4(dstspc,dst)
+
+	ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
+	ASM_EXCEPTIONTABLE_ENTRY(11b,.Lcopy8_fault)
+	ASM_EXCEPTIONTABLE_ENTRY(12b,.Lcopy_done)
+	ASM_EXCEPTIONTABLE_ENTRY(13b,.Lcopy_done)
+	ASM_EXCEPTIONTABLE_ENTRY(14b,.Lcopy_done)
+	ASM_EXCEPTIONTABLE_ENTRY(15b,.Lcopy8_fault)
+	ASM_EXCEPTIONTABLE_ENTRY(16b,.Lcopy_done)
+	ASM_EXCEPTIONTABLE_ENTRY(17b,.Lcopy_done)
+
+	b	.Lcopy_loop_4
+	ldo	-16(len),len
+
+.Lbyte_loop:
+	cmpclr,COND(<>) len,%r0,%r0
+	b,n	.Lcopy_done
+20:	ldb	0(srcspc,src),t1
+	ldo	1(src),src
+21:	stb,ma	t1,1(dstspc,dst)
+	b	.Lbyte_loop
+	ldo	-1(len),len
+
+	ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
+	ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
+
+.Lcopy_done:
+	bv	%r0(%r2)
+	sub	end,dst,ret0
+
+
+	/* src and dst are not aligned the same way. */
+	/* need to go the hard way */
+.Lunaligned_copy:
+	/* align until dst is 32bit-word-aligned */
+	extru	dst,31,2,t1
+	cmpib,COND(=),n	0,t1,.Lcopy_dstaligned
+20:	ldb	0(srcspc,src),t1
+	ldo	1(src),src
+21:	stb,ma	t1,1(dstspc,dst)
+	b	.Lunaligned_copy
+	ldo	-1(len),len
+
+	ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
+	ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
+
+.Lcopy_dstaligned:
+
+	/* store src, dst and len in safe place */
+	copy	src,save_src
+	copy	dst,save_dst
+	copy	len,save_len
+
+	/* len now needs give number of words to copy */
+	SHRREG	len,2,len
+
+	/*
+	 * Copy from a not-aligned src to an aligned dst using shifts.
+	 * Handles 4 words per loop.
+	 */
+
+	depw,z src,28,2,t0
+	subi 32,t0,t0
+	mtsar t0
+	extru len,31,2,t0
+	cmpib,= 2,t0,.Lcase2
+	/* Make src aligned by rounding it down.  */
+	depi 0,31,2,src
+
+	cmpiclr,<> 3,t0,%r0
+	b,n .Lcase3
+	cmpiclr,<> 1,t0,%r0
+	b,n .Lcase1
+.Lcase0:
+	cmpb,= %r0,len,.Lcda_finish
+	nop
+
+1:	ldw,ma 4(srcspc,src), a3
+	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
+1:	ldw,ma 4(srcspc,src), a0
+	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
+	b,n .Ldo3
+.Lcase1:
+1:	ldw,ma 4(srcspc,src), a2
+	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
+1:	ldw,ma 4(srcspc,src), a3
+	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
+	ldo -1(len),len
+	cmpb,=,n %r0,len,.Ldo0
+.Ldo4:
+1:	ldw,ma 4(srcspc,src), a0
+	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
+	shrpw a2, a3, %sar, t0
+1:	stw,ma t0, 4(dstspc,dst)
+	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
+.Ldo3:
+1:	ldw,ma 4(srcspc,src), a1
+	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
+	shrpw a3, a0, %sar, t0
+1:	stw,ma t0, 4(dstspc,dst)
+	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
+.Ldo2:
+1:	ldw,ma 4(srcspc,src), a2
+	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
+	shrpw a0, a1, %sar, t0
+1:	stw,ma t0, 4(dstspc,dst)
+	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
+.Ldo1:
+1:	ldw,ma 4(srcspc,src), a3
+	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
+	shrpw a1, a2, %sar, t0
+1:	stw,ma t0, 4(dstspc,dst)
+	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
+	ldo -4(len),len
+	cmpb,<> %r0,len,.Ldo4
+	nop
+.Ldo0:
+	shrpw a2, a3, %sar, t0
+1:	stw,ma t0, 4(dstspc,dst)
+	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
+
+.Lcda_rdfault:
+.Lcda_finish:
+	/* calculate new src, dst and len and jump to byte-copy loop */
+	sub	dst,save_dst,t0
+	add	save_src,t0,src
+	b	.Lbyte_loop
+	sub	save_len,t0,len
+
+.Lcase3:
+1:	ldw,ma 4(srcspc,src), a0
+	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
+1:	ldw,ma 4(srcspc,src), a1
+	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
+	b .Ldo2
+	ldo 1(len),len
+.Lcase2:
+1:	ldw,ma 4(srcspc,src), a1
+	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
+1:	ldw,ma 4(srcspc,src), a2
+	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
+	b .Ldo1
+	ldo 2(len),len
+
+
+	/* fault exception fixup handlers: */
+#ifdef CONFIG_64BIT
+.Lcopy16_fault:
+10:	b	.Lcopy_done
+	std,ma	t1,8(dstspc,dst)
+	ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
+#endif
+
+.Lcopy8_fault:
+10:	b	.Lcopy_done
+	stw,ma	t1,4(dstspc,dst)
+	ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
+
+	.exit
+ENDPROC_CFI(pa_memcpy)
+	.procend
+
 	.end
diff --git a/arch/parisc/lib/memcpy.c b/arch/parisc/lib/memcpy.c
index f82ff10ed97411..b3d47ec1d80a24 100644
--- a/arch/parisc/lib/memcpy.c
+++ b/arch/parisc/lib/memcpy.c
@@ -2,7 +2,7 @@
  *    Optimized memory copy routines.
  *
  *    Copyright (C) 2004 Randolph Chung <tausq@debian.org>
- *    Copyright (C) 2013 Helge Deller <deller@gmx.de>
+ *    Copyright (C) 2013-2017 Helge Deller <deller@gmx.de>
  *
  *    This program is free software; you can redistribute it and/or modify
  *    it under the terms of the GNU General Public License as published by
@@ -21,474 +21,21 @@
  *    Portions derived from the GNU C Library
  *    Copyright (C) 1991, 1997, 2003 Free Software Foundation, Inc.
  *
- * Several strategies are tried to try to get the best performance for various
- * conditions. In the optimal case, we copy 64-bytes in an unrolled loop using 
- * fp regs. This is followed by loops that copy 32- or 16-bytes at a time using
- * general registers.  Unaligned copies are handled either by aligning the 
- * destination and then using shift-and-write method, or in a few cases by 
- * falling back to a byte-at-a-time copy.
- *
- * I chose to implement this in C because it is easier to maintain and debug,
- * and in my experiments it appears that the C code generated by gcc (3.3/3.4
- * at the time of writing) is fairly optimal. Unfortunately some of the 
- * semantics of the copy routine (exception handling) is difficult to express
- * in C, so we have to play some tricks to get it to work.
- *
- * All the loads and stores are done via explicit asm() code in order to use
- * the right space registers. 
- * 
- * Testing with various alignments and buffer sizes shows that this code is 
- * often >10x faster than a simple byte-at-a-time copy, even for strangely
- * aligned operands. It is interesting to note that the glibc version
- * of memcpy (written in C) is actually quite fast already. This routine is 
- * able to beat it by 30-40% for aligned copies because of the loop unrolling, 
- * but in some cases the glibc version is still slightly faster. This lends 
- * more credibility that gcc can generate very good code as long as we are 
- * careful.
- *
- * TODO:
- * - cache prefetching needs more experimentation to get optimal settings
- * - try not to use the post-increment address modifiers; they create additional
- *   interlocks
- * - replace byte-copy loops with stybs sequences
  */
 
-#ifdef __KERNEL__
 #include <linux/module.h>
 #include <linux/compiler.h>
 #include <linux/uaccess.h>
-#define s_space "%%sr1"
-#define d_space "%%sr2"
-#else
-#include "memcpy.h"
-#define s_space "%%sr0"
-#define d_space "%%sr0"
-#define pa_memcpy new2_copy
-#endif
 
 DECLARE_PER_CPU(struct exception_data, exception_data);
 
-#define preserve_branch(label)	do {					\
-	volatile int dummy = 0;						\
-	/* The following branch is never taken, it's just here to  */	\
-	/* prevent gcc from optimizing away our exception code. */ 	\
-	if (unlikely(dummy != dummy))					\
-		goto label;						\
-} while (0)
-
 #define get_user_space() (segment_eq(get_fs(), KERNEL_DS) ? 0 : mfsp(3))
 #define get_kernel_space() (0)
 
-#define MERGE(w0, sh_1, w1, sh_2)  ({					\
-	unsigned int _r;						\
-	asm volatile (							\
-	"mtsar %3\n"							\
-	"shrpw %1, %2, %%sar, %0\n"					\
-	: "=r"(_r)							\
-	: "r"(w0), "r"(w1), "r"(sh_2)					\
-	);								\
-	_r;								\
-})
-#define THRESHOLD	16
-
-#ifdef DEBUG_MEMCPY
-#define DPRINTF(fmt, args...) do { printk(KERN_DEBUG "%s:%d:%s ", __FILE__, __LINE__, __func__ ); printk(KERN_DEBUG fmt, ##args ); } while (0)
-#else
-#define DPRINTF(fmt, args...)
-#endif
-
-#define def_load_ai_insn(_insn,_sz,_tt,_s,_a,_t,_e)	\
-	__asm__ __volatile__ (				\
-	"1:\t" #_insn ",ma " #_sz "(" _s ",%1), %0\n\t"	\
-	ASM_EXCEPTIONTABLE_ENTRY(1b,_e)			\
-	: _tt(_t), "+r"(_a)				\
-	: 						\
-	: "r8")
-
-#define def_store_ai_insn(_insn,_sz,_tt,_s,_a,_t,_e) 	\
-	__asm__ __volatile__ (				\
-	"1:\t" #_insn ",ma %1, " #_sz "(" _s ",%0)\n\t"	\
-	ASM_EXCEPTIONTABLE_ENTRY(1b,_e)			\
-	: "+r"(_a) 					\
-	: _tt(_t)					\
-	: "r8")
-
-#define ldbma(_s, _a, _t, _e) def_load_ai_insn(ldbs,1,"=r",_s,_a,_t,_e)
-#define stbma(_s, _t, _a, _e) def_store_ai_insn(stbs,1,"r",_s,_a,_t,_e)
-#define ldwma(_s, _a, _t, _e) def_load_ai_insn(ldw,4,"=r",_s,_a,_t,_e)
-#define stwma(_s, _t, _a, _e) def_store_ai_insn(stw,4,"r",_s,_a,_t,_e)
-#define flddma(_s, _a, _t, _e) def_load_ai_insn(fldd,8,"=f",_s,_a,_t,_e)
-#define fstdma(_s, _t, _a, _e) def_store_ai_insn(fstd,8,"f",_s,_a,_t,_e)
-
-#define def_load_insn(_insn,_tt,_s,_o,_a,_t,_e) 	\
-	__asm__ __volatile__ (				\
-	"1:\t" #_insn " " #_o "(" _s ",%1), %0\n\t"	\
-	ASM_EXCEPTIONTABLE_ENTRY(1b,_e)			\
-	: _tt(_t) 					\
-	: "r"(_a)					\
-	: "r8")
-
-#define def_store_insn(_insn,_tt,_s,_t,_o,_a,_e) 	\
-	__asm__ __volatile__ (				\
-	"1:\t" #_insn " %0, " #_o "(" _s ",%1)\n\t" 	\
-	ASM_EXCEPTIONTABLE_ENTRY(1b,_e)			\
-	: 						\
-	: _tt(_t), "r"(_a)				\
-	: "r8")
-
-#define ldw(_s,_o,_a,_t,_e)	def_load_insn(ldw,"=r",_s,_o,_a,_t,_e)
-#define stw(_s,_t,_o,_a,_e) 	def_store_insn(stw,"r",_s,_t,_o,_a,_e)
-
-#ifdef  CONFIG_PREFETCH
-static inline void prefetch_src(const void *addr)
-{
-	__asm__("ldw 0(" s_space ",%0), %%r0" : : "r" (addr));
-}
-
-static inline void prefetch_dst(const void *addr)
-{
-	__asm__("ldd 0(" d_space ",%0), %%r0" : : "r" (addr));
-}
-#else
-#define prefetch_src(addr) do { } while(0)
-#define prefetch_dst(addr) do { } while(0)
-#endif
-
-#define PA_MEMCPY_OK		0
-#define PA_MEMCPY_LOAD_ERROR	1
-#define PA_MEMCPY_STORE_ERROR	2
-
-/* Copy from a not-aligned src to an aligned dst, using shifts. Handles 4 words
- * per loop.  This code is derived from glibc. 
- */
-static noinline unsigned long copy_dstaligned(unsigned long dst,
-					unsigned long src, unsigned long len)
-{
-	/* gcc complains that a2 and a3 may be uninitialized, but actually
-	 * they cannot be.  Initialize a2/a3 to shut gcc up.
-	 */
-	register unsigned int a0, a1, a2 = 0, a3 = 0;
-	int sh_1, sh_2;
-
-	/* prefetch_src((const void *)src); */
-
-	/* Calculate how to shift a word read at the memory operation
-	   aligned srcp to make it aligned for copy.  */
-	sh_1 = 8 * (src % sizeof(unsigned int));
-	sh_2 = 8 * sizeof(unsigned int) - sh_1;
-
-	/* Make src aligned by rounding it down.  */
-	src &= -sizeof(unsigned int);
-
-	switch (len % 4)
-	{
-		case 2:
-			/* a1 = ((unsigned int *) src)[0];
-			   a2 = ((unsigned int *) src)[1]; */
-			ldw(s_space, 0, src, a1, cda_ldw_exc);
-			ldw(s_space, 4, src, a2, cda_ldw_exc);
-			src -= 1 * sizeof(unsigned int);
-			dst -= 3 * sizeof(unsigned int);
-			len += 2;
-			goto do1;
-		case 3:
-			/* a0 = ((unsigned int *) src)[0];
-			   a1 = ((unsigned int *) src)[1]; */
-			ldw(s_space, 0, src, a0, cda_ldw_exc);
-			ldw(s_space, 4, src, a1, cda_ldw_exc);
-			src -= 0 * sizeof(unsigned int);
-			dst -= 2 * sizeof(unsigned int);
-			len += 1;
-			goto do2;
-		case 0:
-			if (len == 0)
-				return PA_MEMCPY_OK;
-			/* a3 = ((unsigned int *) src)[0];
-			   a0 = ((unsigned int *) src)[1]; */
-			ldw(s_space, 0, src, a3, cda_ldw_exc);
-			ldw(s_space, 4, src, a0, cda_ldw_exc);
-			src -=-1 * sizeof(unsigned int);
-			dst -= 1 * sizeof(unsigned int);
-			len += 0;
-			goto do3;
-		case 1:
-			/* a2 = ((unsigned int *) src)[0];
-			   a3 = ((unsigned int *) src)[1]; */
-			ldw(s_space, 0, src, a2, cda_ldw_exc);
-			ldw(s_space, 4, src, a3, cda_ldw_exc);
-			src -=-2 * sizeof(unsigned int);
-			dst -= 0 * sizeof(unsigned int);
-			len -= 1;
-			if (len == 0)
-				goto do0;
-			goto do4;			/* No-op.  */
-	}
-
-	do
-	{
-		/* prefetch_src((const void *)(src + 4 * sizeof(unsigned int))); */
-do4:
-		/* a0 = ((unsigned int *) src)[0]; */
-		ldw(s_space, 0, src, a0, cda_ldw_exc);
-		/* ((unsigned int *) dst)[0] = MERGE (a2, sh_1, a3, sh_2); */
-		stw(d_space, MERGE (a2, sh_1, a3, sh_2), 0, dst, cda_stw_exc);
-do3:
-		/* a1 = ((unsigned int *) src)[1]; */
-		ldw(s_space, 4, src, a1, cda_ldw_exc);
-		/* ((unsigned int *) dst)[1] = MERGE (a3, sh_1, a0, sh_2); */
-		stw(d_space, MERGE (a3, sh_1, a0, sh_2), 4, dst, cda_stw_exc);
-do2:
-		/* a2 = ((unsigned int *) src)[2]; */
-		ldw(s_space, 8, src, a2, cda_ldw_exc);
-		/* ((unsigned int *) dst)[2] = MERGE (a0, sh_1, a1, sh_2); */
-		stw(d_space, MERGE (a0, sh_1, a1, sh_2), 8, dst, cda_stw_exc);
-do1:
-		/* a3 = ((unsigned int *) src)[3]; */
-		ldw(s_space, 12, src, a3, cda_ldw_exc);
-		/* ((unsigned int *) dst)[3] = MERGE (a1, sh_1, a2, sh_2); */
-		stw(d_space, MERGE (a1, sh_1, a2, sh_2), 12, dst, cda_stw_exc);
-
-		src += 4 * sizeof(unsigned int);
-		dst += 4 * sizeof(unsigned int);
-		len -= 4;
-	}
-	while (len != 0);
-
-do0:
-	/* ((unsigned int *) dst)[0] = MERGE (a2, sh_1, a3, sh_2); */
-	stw(d_space, MERGE (a2, sh_1, a3, sh_2), 0, dst, cda_stw_exc);
-
-	preserve_branch(handle_load_error);
-	preserve_branch(handle_store_error);
-
-	return PA_MEMCPY_OK;
-
-handle_load_error:
-	__asm__ __volatile__ ("cda_ldw_exc:\n");
-	return PA_MEMCPY_LOAD_ERROR;
-
-handle_store_error:
-	__asm__ __volatile__ ("cda_stw_exc:\n");
-	return PA_MEMCPY_STORE_ERROR;
-}
-
-
-/* Returns PA_MEMCPY_OK, PA_MEMCPY_LOAD_ERROR or PA_MEMCPY_STORE_ERROR.
- * In case of an access fault the faulty address can be read from the per_cpu
- * exception data struct. */
-static noinline unsigned long pa_memcpy_internal(void *dstp, const void *srcp,
-					unsigned long len)
-{
-	register unsigned long src, dst, t1, t2, t3;
-	register unsigned char *pcs, *pcd;
-	register unsigned int *pws, *pwd;
-	register double *pds, *pdd;
-	unsigned long ret;
-
-	src = (unsigned long)srcp;
-	dst = (unsigned long)dstp;
-	pcs = (unsigned char *)srcp;
-	pcd = (unsigned char *)dstp;
-
-	/* prefetch_src((const void *)srcp); */
-
-	if (len < THRESHOLD)
-		goto byte_copy;
-
-	/* Check alignment */
-	t1 = (src ^ dst);
-	if (unlikely(t1 & (sizeof(double)-1)))
-		goto unaligned_copy;
-
-	/* src and dst have same alignment. */
-
-	/* Copy bytes till we are double-aligned. */
-	t2 = src & (sizeof(double) - 1);
-	if (unlikely(t2 != 0)) {
-		t2 = sizeof(double) - t2;
-		while (t2 && len) {
-			/* *pcd++ = *pcs++; */
-			ldbma(s_space, pcs, t3, pmc_load_exc);
-			len--;
-			stbma(d_space, t3, pcd, pmc_store_exc);
-			t2--;
-		}
-	}
-
-	pds = (double *)pcs;
-	pdd = (double *)pcd;
-
-#if 0
-	/* Copy 8 doubles at a time */
-	while (len >= 8*sizeof(double)) {
-		register double r1, r2, r3, r4, r5, r6, r7, r8;
-		/* prefetch_src((char *)pds + L1_CACHE_BYTES); */
-		flddma(s_space, pds, r1, pmc_load_exc);
-		flddma(s_space, pds, r2, pmc_load_exc);
-		flddma(s_space, pds, r3, pmc_load_exc);
-		flddma(s_space, pds, r4, pmc_load_exc);
-		fstdma(d_space, r1, pdd, pmc_store_exc);
-		fstdma(d_space, r2, pdd, pmc_store_exc);
-		fstdma(d_space, r3, pdd, pmc_store_exc);
-		fstdma(d_space, r4, pdd, pmc_store_exc);
-
-#if 0
-		if (L1_CACHE_BYTES <= 32)
-			prefetch_src((char *)pds + L1_CACHE_BYTES);
-#endif
-		flddma(s_space, pds, r5, pmc_load_exc);
-		flddma(s_space, pds, r6, pmc_load_exc);
-		flddma(s_space, pds, r7, pmc_load_exc);
-		flddma(s_space, pds, r8, pmc_load_exc);
-		fstdma(d_space, r5, pdd, pmc_store_exc);
-		fstdma(d_space, r6, pdd, pmc_store_exc);
-		fstdma(d_space, r7, pdd, pmc_store_exc);
-		fstdma(d_space, r8, pdd, pmc_store_exc);
-		len -= 8*sizeof(double);
-	}
-#endif
-
-	pws = (unsigned int *)pds;
-	pwd = (unsigned int *)pdd;
-
-word_copy:
-	while (len >= 8*sizeof(unsigned int)) {
-		register unsigned int r1,r2,r3,r4,r5,r6,r7,r8;
-		/* prefetch_src((char *)pws + L1_CACHE_BYTES); */
-		ldwma(s_space, pws, r1, pmc_load_exc);
-		ldwma(s_space, pws, r2, pmc_load_exc);
-		ldwma(s_space, pws, r3, pmc_load_exc);
-		ldwma(s_space, pws, r4, pmc_load_exc);
-		stwma(d_space, r1, pwd, pmc_store_exc);
-		stwma(d_space, r2, pwd, pmc_store_exc);
-		stwma(d_space, r3, pwd, pmc_store_exc);
-		stwma(d_space, r4, pwd, pmc_store_exc);
-
-		ldwma(s_space, pws, r5, pmc_load_exc);
-		ldwma(s_space, pws, r6, pmc_load_exc);
-		ldwma(s_space, pws, r7, pmc_load_exc);
-		ldwma(s_space, pws, r8, pmc_load_exc);
-		stwma(d_space, r5, pwd, pmc_store_exc);
-		stwma(d_space, r6, pwd, pmc_store_exc);
-		stwma(d_space, r7, pwd, pmc_store_exc);
-		stwma(d_space, r8, pwd, pmc_store_exc);
-		len -= 8*sizeof(unsigned int);
-	}
-
-	while (len >= 4*sizeof(unsigned int)) {
-		register unsigned int r1,r2,r3,r4;
-		ldwma(s_space, pws, r1, pmc_load_exc);
-		ldwma(s_space, pws, r2, pmc_load_exc);
-		ldwma(s_space, pws, r3, pmc_load_exc);
-		ldwma(s_space, pws, r4, pmc_load_exc);
-		stwma(d_space, r1, pwd, pmc_store_exc);
-		stwma(d_space, r2, pwd, pmc_store_exc);
-		stwma(d_space, r3, pwd, pmc_store_exc);
-		stwma(d_space, r4, pwd, pmc_store_exc);
-		len -= 4*sizeof(unsigned int);
-	}
-
-	pcs = (unsigned char *)pws;
-	pcd = (unsigned char *)pwd;
-
-byte_copy:
-	while (len) {
-		/* *pcd++ = *pcs++; */
-		ldbma(s_space, pcs, t3, pmc_load_exc);
-		stbma(d_space, t3, pcd, pmc_store_exc);
-		len--;
-	}
-
-	return PA_MEMCPY_OK;
-
-unaligned_copy:
-	/* possibly we are aligned on a word, but not on a double... */
-	if (likely((t1 & (sizeof(unsigned int)-1)) == 0)) {
-		t2 = src & (sizeof(unsigned int) - 1);
-
-		if (unlikely(t2 != 0)) {
-			t2 = sizeof(unsigned int) - t2;
-			while (t2) {
-				/* *pcd++ = *pcs++; */
-				ldbma(s_space, pcs, t3, pmc_load_exc);
-				stbma(d_space, t3, pcd, pmc_store_exc);
-				len--;
-				t2--;
-			}
-		}
-
-		pws = (unsigned int *)pcs;
-		pwd = (unsigned int *)pcd;
-		goto word_copy;
-	}
-
-	/* Align the destination.  */
-	if (unlikely((dst & (sizeof(unsigned int) - 1)) != 0)) {
-		t2 = sizeof(unsigned int) - (dst & (sizeof(unsigned int) - 1));
-		while (t2) {
-			/* *pcd++ = *pcs++; */
-			ldbma(s_space, pcs, t3, pmc_load_exc);
-			stbma(d_space, t3, pcd, pmc_store_exc);
-			len--;
-			t2--;
-		}
-		dst = (unsigned long)pcd;
-		src = (unsigned long)pcs;
-	}
-
-	ret = copy_dstaligned(dst, src, len / sizeof(unsigned int));
-	if (ret)
-		return ret;
-
-	pcs += (len & -sizeof(unsigned int));
-	pcd += (len & -sizeof(unsigned int));
-	len %= sizeof(unsigned int);
-
-	preserve_branch(handle_load_error);
-	preserve_branch(handle_store_error);
-
-	goto byte_copy;
-
-handle_load_error:
-	__asm__ __volatile__ ("pmc_load_exc:\n");
-	return PA_MEMCPY_LOAD_ERROR;
-
-handle_store_error:
-	__asm__ __volatile__ ("pmc_store_exc:\n");
-	return PA_MEMCPY_STORE_ERROR;
-}
-
-
 /* Returns 0 for success, otherwise, returns number of bytes not transferred. */
-static unsigned long pa_memcpy(void *dstp, const void *srcp, unsigned long len)
-{
-	unsigned long ret, fault_addr, reference;
-	struct exception_data *d;
-
-	ret = pa_memcpy_internal(dstp, srcp, len);
-	if (likely(ret == PA_MEMCPY_OK))
-		return 0;
-
-	/* if a load or store fault occured we can get the faulty addr */
-	d = this_cpu_ptr(&exception_data);
-	fault_addr = d->fault_addr;
-
-	/* error in load or store? */
-	if (ret == PA_MEMCPY_LOAD_ERROR)
-		reference = (unsigned long) srcp;
-	else
-		reference = (unsigned long) dstp;
+extern unsigned long pa_memcpy(void *dst, const void *src,
+				unsigned long len);
 
-	DPRINTF("pa_memcpy: fault type = %lu, len=%lu fault_addr=%lu ref=%lu\n",
-		ret, len, fault_addr, reference);
-
-	if (fault_addr >= reference)
-		return len - (fault_addr - reference);
-	else
-		return len;
-}
-
-#ifdef __KERNEL__
 unsigned long __copy_to_user(void __user *dst, const void *src,
 			     unsigned long len)
 {
@@ -537,5 +84,3 @@ long probe_kernel_read(void *dst, const void *src, size_t size)
 
 	return __probe_kernel_read(dst, src, size);
 }
-
-#endif

From d19f5e41b344a057bb2450024a807476f30978d2 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Sat, 25 Mar 2017 11:59:15 +0100
Subject: [PATCH 1812/1911] parisc: Clean up fixup routines for
 get_user()/put_user()

Al Viro noticed that userspace accesses via get_user()/put_user() can be
simplified a lot with regard to usage of the exception handling.

This patch implements a fixup routine for get_user() and put_user() in such
that the exception handler will automatically load -EFAULT into the register
%r8 (the error value) in case on a fault on userspace.  Additionally the fixup
routine will zero the target register on fault in case of a get_user() call.
The target register is extracted out of the faulting assembly instruction.

This patch brings a few benefits over the old implementation:
1. Exception handling gets much cleaner, easier and smaller in size.
2. Helper functions like fixup_get_user_skip_1 (all of fixup.S) can be dropped.
3. No need to hardcode %r9 as target register for get_user() any longer. This
   helps the compiler register allocator and thus creates less assembler
   statements.
4. No dependency on the exception_data contents any longer.
5. Nested faults will be handled cleanly.

Reported-by: Al Viro <viro@ZenIV.linux.org.uk>
Cc: <stable@vger.kernel.org> # v4.9+
Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/include/asm/uaccess.h | 59 +++++++++++--------
 arch/parisc/kernel/parisc_ksyms.c | 10 ----
 arch/parisc/lib/Makefile          |  2 +-
 arch/parisc/lib/fixup.S           | 98 -------------------------------
 arch/parisc/mm/fault.c            | 17 ++++++
 5 files changed, 52 insertions(+), 134 deletions(-)
 delete mode 100644 arch/parisc/lib/fixup.S

diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h
index edfbf9d6a6dd76..8442727f28d273 100644
--- a/arch/parisc/include/asm/uaccess.h
+++ b/arch/parisc/include/asm/uaccess.h
@@ -64,6 +64,15 @@ struct exception_table_entry {
 	".word (" #fault_addr " - .), (" #except_addr " - .)\n\t" \
 	".previous\n"
 
+/*
+ * ASM_EXCEPTIONTABLE_ENTRY_EFAULT() creates a special exception table entry
+ * (with lowest bit set) for which the fault handler in fixup_exception() will
+ * load -EFAULT into %r8 for a read or write fault, and zeroes the target
+ * register in case of a read fault in get_user().
+ */
+#define ASM_EXCEPTIONTABLE_ENTRY_EFAULT( fault_addr, except_addr )\
+	ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr + 1)
+
 /*
  * The page fault handler stores, in a per-cpu area, the following information
  * if a fixup routine is available.
@@ -91,7 +100,7 @@ struct exception_data {
 #define __get_user(x, ptr)                               \
 ({                                                       \
 	register long __gu_err __asm__ ("r8") = 0;       \
-	register long __gu_val __asm__ ("r9") = 0;       \
+	register long __gu_val;				 \
 							 \
 	load_sr2();					 \
 	switch (sizeof(*(ptr))) {			 \
@@ -107,22 +116,23 @@ struct exception_data {
 })
 
 #define __get_user_asm(ldx, ptr)                        \
-	__asm__("\n1:\t" ldx "\t0(%%sr2,%2),%0\n\t"	\
-		ASM_EXCEPTIONTABLE_ENTRY(1b, fixup_get_user_skip_1)\
+	__asm__("1: " ldx " 0(%%sr2,%2),%0\n"		\
+		"9:\n"					\
+		ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b)	\
 		: "=r"(__gu_val), "=r"(__gu_err)        \
-		: "r"(ptr), "1"(__gu_err)		\
-		: "r1");
+		: "r"(ptr), "1"(__gu_err));
 
 #if !defined(CONFIG_64BIT)
 
 #define __get_user_asm64(ptr) 				\
-	__asm__("\n1:\tldw 0(%%sr2,%2),%0"		\
-		"\n2:\tldw 4(%%sr2,%2),%R0\n\t"		\
-		ASM_EXCEPTIONTABLE_ENTRY(1b, fixup_get_user_skip_2)\
-		ASM_EXCEPTIONTABLE_ENTRY(2b, fixup_get_user_skip_1)\
+	__asm__("   copy %%r0,%R0\n"			\
+		"1: ldw 0(%%sr2,%2),%0\n"		\
+		"2: ldw 4(%%sr2,%2),%R0\n"		\
+		"9:\n"					\
+		ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b)	\
+		ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b)	\
 		: "=r"(__gu_val), "=r"(__gu_err)	\
-		: "r"(ptr), "1"(__gu_err)		\
-		: "r1");
+		: "r"(ptr), "1"(__gu_err));
 
 #endif /* !defined(CONFIG_64BIT) */
 
@@ -148,32 +158,31 @@ struct exception_data {
  * The "__put_user/kernel_asm()" macros tell gcc they read from memory
  * instead of writing. This is because they do not write to any memory
  * gcc knows about, so there are no aliasing issues. These macros must
- * also be aware that "fixup_put_user_skip_[12]" are executed in the
- * context of the fault, and any registers used there must be listed
- * as clobbers. In this case only "r1" is used by the current routines.
- * r8/r9 are already listed as err/val.
+ * also be aware that fixups are executed in the context of the fault,
+ * and any registers used there must be listed as clobbers.
+ * r8 is already listed as err.
  */
 
 #define __put_user_asm(stx, x, ptr)                         \
 	__asm__ __volatile__ (                              \
-		"\n1:\t" stx "\t%2,0(%%sr2,%1)\n\t"	    \
-		ASM_EXCEPTIONTABLE_ENTRY(1b, fixup_put_user_skip_1)\
+		"1: " stx " %2,0(%%sr2,%1)\n"		    \
+		"9:\n"					    \
+		ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b)	    \
 		: "=r"(__pu_err)                            \
-		: "r"(ptr), "r"(x), "0"(__pu_err)	    \
-		: "r1")
+		: "r"(ptr), "r"(x), "0"(__pu_err))
 
 
 #if !defined(CONFIG_64BIT)
 
 #define __put_user_asm64(__val, ptr) do {	    	    \
 	__asm__ __volatile__ (				    \
-		"\n1:\tstw %2,0(%%sr2,%1)"		    \
-		"\n2:\tstw %R2,4(%%sr2,%1)\n\t"		    \
-		ASM_EXCEPTIONTABLE_ENTRY(1b, fixup_put_user_skip_2)\
-		ASM_EXCEPTIONTABLE_ENTRY(2b, fixup_put_user_skip_1)\
+		"1: stw %2,0(%%sr2,%1)\n"		    \
+		"2: stw %R2,4(%%sr2,%1)\n"		    \
+		"9:\n"					    \
+		ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b)	    \
+		ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b)	    \
 		: "=r"(__pu_err)                            \
-		: "r"(ptr), "r"(__val), "0"(__pu_err) \
-		: "r1");				    \
+		: "r"(ptr), "r"(__val), "0"(__pu_err));	    \
 } while (0)
 
 #endif /* !defined(CONFIG_64BIT) */
diff --git a/arch/parisc/kernel/parisc_ksyms.c b/arch/parisc/kernel/parisc_ksyms.c
index 7484b3d11e0dbf..c6d6272a934f03 100644
--- a/arch/parisc/kernel/parisc_ksyms.c
+++ b/arch/parisc/kernel/parisc_ksyms.c
@@ -47,16 +47,6 @@ EXPORT_SYMBOL(__cmpxchg_u64);
 EXPORT_SYMBOL(lclear_user);
 EXPORT_SYMBOL(lstrnlen_user);
 
-/* Global fixups - defined as int to avoid creation of function pointers */
-extern int fixup_get_user_skip_1;
-extern int fixup_get_user_skip_2;
-extern int fixup_put_user_skip_1;
-extern int fixup_put_user_skip_2;
-EXPORT_SYMBOL(fixup_get_user_skip_1);
-EXPORT_SYMBOL(fixup_get_user_skip_2);
-EXPORT_SYMBOL(fixup_put_user_skip_1);
-EXPORT_SYMBOL(fixup_put_user_skip_2);
-
 #ifndef CONFIG_64BIT
 /* Needed so insmod can set dp value */
 extern int $global$;
diff --git a/arch/parisc/lib/Makefile b/arch/parisc/lib/Makefile
index 8fa92b8d839abb..f2dac4d73b1b30 100644
--- a/arch/parisc/lib/Makefile
+++ b/arch/parisc/lib/Makefile
@@ -2,7 +2,7 @@
 # Makefile for parisc-specific library files
 #
 
-lib-y	:= lusercopy.o bitops.o checksum.o io.o memset.o fixup.o memcpy.o \
+lib-y	:= lusercopy.o bitops.o checksum.o io.o memset.o memcpy.o \
 	   ucmpdi2.o delay.o
 
 obj-y	:= iomap.o
diff --git a/arch/parisc/lib/fixup.S b/arch/parisc/lib/fixup.S
deleted file mode 100644
index a5b72f22c7a6c4..00000000000000
--- a/arch/parisc/lib/fixup.S
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Linux/PA-RISC Project (http://www.parisc-linux.org/)
- *
- *  Copyright (C) 2004  Randolph Chung <tausq@debian.org>
- *
- *    This program is free software; you can redistribute it and/or modify
- *    it under the terms of the GNU General Public License as published by
- *    the Free Software Foundation; either version 2, or (at your option)
- *    any later version.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    GNU General Public License for more details.
- *
- *    You should have received a copy of the GNU General Public License
- *    along with this program; if not, write to the Free Software
- *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- * 
- * Fixup routines for kernel exception handling.
- */
-#include <asm/asm-offsets.h>
-#include <asm/assembly.h>
-#include <asm/errno.h>
-#include <linux/linkage.h>
-
-#ifdef CONFIG_SMP
-	.macro  get_fault_ip t1 t2
-	loadgp
-	addil LT%__per_cpu_offset,%r27
-	LDREG RT%__per_cpu_offset(%r1),\t1
-	/* t2 = smp_processor_id() */
-	mfctl 30,\t2
-	ldw TI_CPU(\t2),\t2
-#ifdef CONFIG_64BIT
-	extrd,u \t2,63,32,\t2
-#endif
-	/* t2 = &__per_cpu_offset[smp_processor_id()]; */
-	LDREGX \t2(\t1),\t2 
-	addil LT%exception_data,%r27
-	LDREG RT%exception_data(%r1),\t1
-	/* t1 = this_cpu_ptr(&exception_data) */
-	add,l \t1,\t2,\t1
-	/* %r27 = t1->fault_gp - restore gp */
-	LDREG EXCDATA_GP(\t1), %r27
-	/* t1 = t1->fault_ip */
-	LDREG EXCDATA_IP(\t1), \t1
-	.endm
-#else
-	.macro  get_fault_ip t1 t2
-	loadgp
-	/* t1 = this_cpu_ptr(&exception_data) */
-	addil LT%exception_data,%r27
-	LDREG RT%exception_data(%r1),\t2
-	/* %r27 = t2->fault_gp - restore gp */
-	LDREG EXCDATA_GP(\t2), %r27
-	/* t1 = t2->fault_ip */
-	LDREG EXCDATA_IP(\t2), \t1
-	.endm
-#endif
-
-	.level LEVEL
-
-	.text
-	.section .fixup, "ax"
-
-	/* get_user() fixups, store -EFAULT in r8, and 0 in r9 */
-ENTRY_CFI(fixup_get_user_skip_1)
-	get_fault_ip %r1,%r8
-	ldo 4(%r1), %r1
-	ldi -EFAULT, %r8
-	bv %r0(%r1)
-	copy %r0, %r9
-ENDPROC_CFI(fixup_get_user_skip_1)
-
-ENTRY_CFI(fixup_get_user_skip_2)
-	get_fault_ip %r1,%r8
-	ldo 8(%r1), %r1
-	ldi -EFAULT, %r8
-	bv %r0(%r1)
-	copy %r0, %r9
-ENDPROC_CFI(fixup_get_user_skip_2)
-
-	/* put_user() fixups, store -EFAULT in r8 */
-ENTRY_CFI(fixup_put_user_skip_1)
-	get_fault_ip %r1,%r8
-	ldo 4(%r1), %r1
-	bv %r0(%r1)
-	ldi -EFAULT, %r8
-ENDPROC_CFI(fixup_put_user_skip_1)
-
-ENTRY_CFI(fixup_put_user_skip_2)
-	get_fault_ip %r1,%r8
-	ldo 8(%r1), %r1
-	bv %r0(%r1)
-	ldi -EFAULT, %r8
-ENDPROC_CFI(fixup_put_user_skip_2)
-
diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c
index deab89a8915a10..32ec22146141e5 100644
--- a/arch/parisc/mm/fault.c
+++ b/arch/parisc/mm/fault.c
@@ -150,6 +150,23 @@ int fixup_exception(struct pt_regs *regs)
 		d->fault_space = regs->isr;
 		d->fault_addr = regs->ior;
 
+		/*
+		 * Fix up get_user() and put_user().
+		 * ASM_EXCEPTIONTABLE_ENTRY_EFAULT() sets the least-significant
+		 * bit in the relative address of the fixup routine to indicate
+		 * that %r8 should be loaded with -EFAULT to report a userspace
+		 * access error.
+		 */
+		if (fix->fixup & 1) {
+			regs->gr[8] = -EFAULT;
+
+			/* zero target register for get_user() */
+			if (parisc_acctyp(0, regs->iir) == VM_READ) {
+				int treg = regs->iir & 0x1f;
+				regs->gr[treg] = 0;
+			}
+		}
+
 		regs->iaoq[0] = (unsigned long)&fix->fixup + fix->fixup;
 		regs->iaoq[0] &= ~3;
 		/*

From 476e75a44b56038bee9207242d4bc718f6b4de06 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Wed, 29 Mar 2017 08:25:30 +0200
Subject: [PATCH 1813/1911] parisc: Avoid stalled CPU warnings after system
 shutdown

Commit 73580dac7618 ("parisc: Fix system shutdown halt") introduced an endless
loop for systems which don't provide a software power off function.  But the
soft lockup detector will detect this and report stalled CPUs after some time.
Avoid those unwanted warnings by disabling the soft lockup detector.

Fixes: 73580dac7618 ("parisc: Fix system shutdown halt")
Signed-off-by: Helge Deller <deller@gmx.de>
Cc: stable@vger.kernel.org # 4.9+
---
 arch/parisc/kernel/process.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index b76f503eee4a83..4516a5b53f38ef 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -143,6 +143,8 @@ void machine_power_off(void)
 	printk(KERN_EMERG "System shut down completed.\n"
 	       "Please power this system off now.");
 
+	/* prevent soft lockup/stalled CPU messages for endless loop. */
+	rcu_sysrq_start();
 	for (;;);
 }
 

From 293d264f13cbde328d5477f49e3103edbc1dc191 Mon Sep 17 00:00:00 2001
From: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Date: Thu, 23 Mar 2017 20:52:46 +0530
Subject: [PATCH 1814/1911] cpuidle: powernv: Pass correct drv->cpumask for
 registration

drv->cpumask defaults to cpu_possible_mask in __cpuidle_driver_init().
On PowerNV platform cpu_present could be less than cpu_possible in cases
where firmware detects the cpu, but it is not available to the OS.  When
CONFIG_HOTPLUG_CPU=n, such cpus are not hotplugable at runtime and hence
we skip creating cpu_device.

This breaks cpuidle on powernv where register_cpu() is not called for
cpus in cpu_possible_mask that cannot be hot-added at runtime.

Trying cpuidle_register_device() on cpu without cpu_device will cause
crash like this:

cpu 0xf: Vector: 380 (Data SLB Access) at [c000000ff1503490]
    pc: c00000000022c8bc: string+0x34/0x60
    lr: c00000000022ed78: vsnprintf+0x284/0x42c
    sp: c000000ff1503710
   msr: 9000000000009033
   dar: 6000000060000000
  current = 0xc000000ff1480000
  paca    = 0xc00000000fe82d00   softe: 0        irq_happened: 0x01
    pid   = 1, comm = swapper/8
Linux version 4.11.0-rc2 (sv@sagarika) (gcc version 4.9.4
(Buildroot 2017.02-00004-gc28573e) ) #15 SMP Fri Mar 17 19:32:02 IST 2017
enter ? for help
[link register   ] c00000000022ed78 vsnprintf+0x284/0x42c
[c000000ff1503710] c00000000022ebb8 vsnprintf+0xc4/0x42c (unreliable)
[c000000ff1503800] c00000000022ef40 vscnprintf+0x20/0x44
[c000000ff1503830] c0000000000ab61c vprintk_emit+0x94/0x2cc
[c000000ff15038a0] c0000000000acc9c vprintk_func+0x60/0x74
[c000000ff15038c0] c000000000619694 printk+0x38/0x4c
[c000000ff15038e0] c000000000224950 kobject_get+0x40/0x60
[c000000ff1503950] c00000000022507c kobject_add_internal+0x60/0x2c4
[c000000ff15039e0] c000000000225350 kobject_init_and_add+0x70/0x78
[c000000ff1503a60] c00000000053c288 cpuidle_add_sysfs+0x9c/0xe0
[c000000ff1503ae0] c00000000053aeac cpuidle_register_device+0xd4/0x12c
[c000000ff1503b30] c00000000053b108 cpuidle_register+0x98/0xcc
[c000000ff1503bc0] c00000000085eaf0 powernv_processor_idle_init+0x140/0x1e0
[c000000ff1503c60] c00000000000cd60 do_one_initcall+0xc0/0x15c
[c000000ff1503d20] c000000000833e84 kernel_init_freeable+0x1a0/0x25c
[c000000ff1503dc0] c00000000000d478 kernel_init+0x24/0x12c
[c000000ff1503e30] c00000000000b564 ret_from_kernel_thread+0x5c/0x78

This patch fixes the bug by passing correct cpumask from
powernv-cpuidle driver.

Signed-off-by: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
[ rjw: Comment massage ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/cpuidle-powernv.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c
index 370593006f5f76..cda8f62d555b57 100644
--- a/drivers/cpuidle/cpuidle-powernv.c
+++ b/drivers/cpuidle/cpuidle-powernv.c
@@ -175,6 +175,24 @@ static int powernv_cpuidle_driver_init(void)
 		drv->state_count += 1;
 	}
 
+	/*
+	 * On the PowerNV platform cpu_present may be less than cpu_possible in
+	 * cases when firmware detects the CPU, but it is not available to the
+	 * OS.  If CONFIG_HOTPLUG_CPU=n, then such CPUs are not hotplugable at
+	 * run time and hence cpu_devices are not created for those CPUs by the
+	 * generic topology_init().
+	 *
+	 * drv->cpumask defaults to cpu_possible_mask in
+	 * __cpuidle_driver_init().  This breaks cpuidle on PowerNV where
+	 * cpu_devices are not created for CPUs in cpu_possible_mask that
+	 * cannot be hot-added later at run time.
+	 *
+	 * Trying cpuidle_register_device() on a CPU without a cpu_device is
+	 * incorrect, so pass a correct CPU mask to the generic cpuidle driver.
+	 */
+
+	drv->cpumask = (struct cpumask *)cpu_present_mask;
+
 	return 0;
 }
 

From 2247925f0942dc4e7c09b1cde45ca18461d94c5f Mon Sep 17 00:00:00 2001
From: Sankar Patchineelam <sankar.patchineelam@broadcom.com>
Date: Tue, 28 Mar 2017 19:47:29 -0400
Subject: [PATCH 1815/1911] bnxt_en: Fix NULL pointer dereference in reopen
 failure path

Net device reset can fail when the h/w or f/w is in a bad state.
Subsequent netdevice open fails in bnxt_hwrm_stat_ctx_alloc().
The cleanup invokes bnxt_hwrm_resource_free() which inturn
calls bnxt_disable_int().  In this routine, the code segment

if (ring->fw_ring_id != INVALID_HW_RING_ID)
   BNXT_CP_DB(cpr->cp_doorbell, cpr->cp_raw_cons);

results in NULL pointer dereference as cpr->cp_doorbell is not yet
initialized, and fw_ring_id is zero.

The fix is to initialize cpr fw_ring_id to INVALID_HW_RING_ID before
bnxt_init_chip() is invoked.

Signed-off-by: Sankar Patchineelam <sankar.patchineelam@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 32de4589d16a2c..7ee772483f26d9 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -2455,6 +2455,18 @@ static int bnxt_init_one_rx_ring(struct bnxt *bp, int ring_nr)
 	return 0;
 }
 
+static void bnxt_init_cp_rings(struct bnxt *bp)
+{
+	int i;
+
+	for (i = 0; i < bp->cp_nr_rings; i++) {
+		struct bnxt_cp_ring_info *cpr = &bp->bnapi[i]->cp_ring;
+		struct bnxt_ring_struct *ring = &cpr->cp_ring_struct;
+
+		ring->fw_ring_id = INVALID_HW_RING_ID;
+	}
+}
+
 static int bnxt_init_rx_rings(struct bnxt *bp)
 {
 	int i, rc = 0;
@@ -5006,6 +5018,7 @@ static int bnxt_shutdown_nic(struct bnxt *bp, bool irq_re_init)
 
 static int bnxt_init_nic(struct bnxt *bp, bool irq_re_init)
 {
+	bnxt_init_cp_rings(bp);
 	bnxt_init_rx_rings(bp);
 	bnxt_init_tx_rings(bp);
 	bnxt_init_ring_grps(bp, irq_re_init);

From 23e12c893489ed12ecfccbf866fc62af1bead4b0 Mon Sep 17 00:00:00 2001
From: Sankar Patchineelam <sankar.patchineelam@broadcom.com>
Date: Tue, 28 Mar 2017 19:47:30 -0400
Subject: [PATCH 1816/1911] bnxt_en: Correct the order of arguments to
 netdev_err() in bnxt_set_tpa()

Signed-off-by: Sankar Patchineelam <sankar.patchineelam@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 7ee772483f26d9..6c8568780a6a1f 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -4744,7 +4744,7 @@ static int bnxt_set_tpa(struct bnxt *bp, bool set_tpa)
 		rc = bnxt_hwrm_vnic_set_tpa(bp, i, tpa_flags);
 		if (rc) {
 			netdev_err(bp->dev, "hwrm vnic set tpa failure rc for vnic %d: %x\n",
-				   rc, i);
+				   i, rc);
 			return rc;
 		}
 	}

From 3ed3a83e3f3871c57b18cef09b148e96921236ed Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Tue, 28 Mar 2017 19:47:31 -0400
Subject: [PATCH 1817/1911] bnxt_en: Fix DMA unmapping of the RX buffers in XDP
 mode during shutdown.

In bnxt_free_rx_skbs(), which is called to free up all RX buffers during
shutdown, we need to unmap the page if we are running in XDP mode.

Fixes: c61fb99cae51 ("bnxt_en: Add RX page mode support.")
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 6c8568780a6a1f..1f1e54ba0ecb31 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -1983,20 +1983,25 @@ static void bnxt_free_rx_skbs(struct bnxt *bp)
 
 		for (j = 0; j < max_idx; j++) {
 			struct bnxt_sw_rx_bd *rx_buf = &rxr->rx_buf_ring[j];
+			dma_addr_t mapping = rx_buf->mapping;
 			void *data = rx_buf->data;
 
 			if (!data)
 				continue;
 
-			dma_unmap_single(&pdev->dev, rx_buf->mapping,
-					 bp->rx_buf_use_size, bp->rx_dir);
-
 			rx_buf->data = NULL;
 
-			if (BNXT_RX_PAGE_MODE(bp))
+			if (BNXT_RX_PAGE_MODE(bp)) {
+				mapping -= bp->rx_dma_offset;
+				dma_unmap_page(&pdev->dev, mapping,
+					       PAGE_SIZE, bp->rx_dir);
 				__free_page(data);
-			else
+			} else {
+				dma_unmap_single(&pdev->dev, mapping,
+						 bp->rx_buf_use_size,
+						 bp->rx_dir);
 				kfree(data);
+			}
 		}
 
 		for (j = 0; j < max_agg_idx; j++) {

From 358e78b5f445c35f5b7f9241425fb499ee9fe3e2 Mon Sep 17 00:00:00 2001
From: Zakharov Vlad <Vladislav.Zakharov@synopsys.com>
Date: Wed, 29 Mar 2017 13:41:46 +0300
Subject: [PATCH 1818/1911] ezchip: nps_enet: check if napi has been completed

After a new NAPI_STATE_MISSED state was added to NAPI we can get into
this state and in such case we have to reschedule NAPI as some work is
still pending and we have to process it. napi_complete_done() function
returns false if we have to reschedule something (e.g. in case we were
in MISSED state) as current polling have not been completed yet.

nps_enet driver hasn't been verifying the return value of
napi_complete_done() and has been forcibly enabling interrupts. That is
not correct as we should not enable interrupts before we have processed
all scheduled work. As a result we were getting trapped in interrupt
hanlder chain as we had never been able to disabale ethernet
interrupts again.

So this patch makes nps_enet_poll() func verify return value of
napi_complete_done() and enable interrupts only in case all scheduled
work has been completed.

Signed-off-by: Vlad Zakharov <vzakhar@synopsys.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ezchip/nps_enet.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/ezchip/nps_enet.c b/drivers/net/ethernet/ezchip/nps_enet.c
index 992ebe973d25bf..f819843e2bae73 100644
--- a/drivers/net/ethernet/ezchip/nps_enet.c
+++ b/drivers/net/ethernet/ezchip/nps_enet.c
@@ -189,11 +189,9 @@ static int nps_enet_poll(struct napi_struct *napi, int budget)
 
 	nps_enet_tx_handler(ndev);
 	work_done = nps_enet_rx_handler(ndev);
-	if (work_done < budget) {
+	if ((work_done < budget) && napi_complete_done(napi, work_done)) {
 		u32 buf_int_enable_value = 0;
 
-		napi_complete_done(napi, work_done);
-
 		/* set tx_done and rx_rdy bits */
 		buf_int_enable_value |= NPS_ENET_ENABLE << RX_RDY_SHIFT;
 		buf_int_enable_value |= NPS_ENET_ENABLE << TX_DONE_SHIFT;

From d09c5373e8e4eaaa09233552cbf75dc4c4f21203 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 27 Mar 2017 09:48:04 +0200
Subject: [PATCH 1819/1911] s390/uaccess: get_user() should zero on failure
 (again)

Commit fd2d2b191fe7 ("s390: get_user() should zero on failure")
intended to fix s390's get_user() implementation which did not zero
the target operand if the read from user space faulted. Unfortunately
the patch has no effect: the corresponding inline assembly specifies
that the operand is only written to ("=") and the previous value is
discarded.

Therefore the compiler is free to and actually does omit the zero
initialization.

To fix this simply change the contraint modifier to "+", so the
compiler cannot omit the initialization anymore.

Fixes: c9ca78415ac1 ("s390/uaccess: provide inline variants of get_user/put_user")
Fixes: fd2d2b191fe7 ("s390: get_user() should zero on failure")
Cc: stable@vger.kernel.org
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/uaccess.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index 136932ff425020..3ea1554d04b377 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -147,7 +147,7 @@ unsigned long __must_check __copy_to_user(void __user *to, const void *from,
 		"	jg	2b\n"				\
 		".popsection\n"					\
 		EX_TABLE(0b,3b) EX_TABLE(1b,3b)			\
-		: "=d" (__rc), "=Q" (*(to))			\
+		: "=d" (__rc), "+Q" (*(to))			\
 		: "d" (size), "Q" (*(from)),			\
 		  "d" (__reg0), "K" (-EFAULT)			\
 		: "cc");					\

From 8a146fbe1f1461aebc9b8f06a0f22e1a8a4f0dd1 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 24 Mar 2017 11:08:47 +0100
Subject: [PATCH 1820/1911] gpio: acpi: Call enable_irq_wake for _IAE GpioInts
 with Wake set

On Bay Trail / Cherry Trail systems with a LID switch, the LID switch is
often connect to a gpioint handled by an _IAE event handler.
Before this commit such systems would not wake up when opening the lid,
requiring the powerbutton to be pressed after opening the lid to wakeup.

Note that Bay Trail / Cherry Trail systems use suspend-to-idle, so
the interrupts are generated anyway on those lines on lid switch changes,
but they are treated by the IRQ subsystem as spurious while suspended if
not marked as wakeup IRQs.

This commit calls enable_irq_wake() for _IAE GpioInts with a valid
event handler which have their Wake flag set. This fixes such systems
not waking up when opening the lid.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Acked-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpiolib-acpi.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c
index 9b37a3692b3fee..8e318f449d23b4 100644
--- a/drivers/gpio/gpiolib-acpi.c
+++ b/drivers/gpio/gpiolib-acpi.c
@@ -266,6 +266,9 @@ static acpi_status acpi_gpiochip_request_interrupt(struct acpi_resource *ares,
 		goto fail_free_event;
 	}
 
+	if (agpio->wake_capable == ACPI_WAKE_CAPABLE)
+		enable_irq_wake(irq);
+
 	list_add_tail(&event->node, &acpi_gpio->events);
 	return AE_OK;
 
@@ -339,6 +342,9 @@ void acpi_gpiochip_free_interrupts(struct gpio_chip *chip)
 	list_for_each_entry_safe_reverse(event, ep, &acpi_gpio->events, node) {
 		struct gpio_desc *desc;
 
+		if (irqd_is_wakeup_set(irq_get_irq_data(event->irq)))
+			disable_irq_wake(event->irq);
+
 		free_irq(event->irq, event);
 		desc = event->desc;
 		if (WARN_ON(IS_ERR(desc)))

From 693bdaa164b40b7aa6018b98af6f7e40dbd52457 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Thu, 23 Mar 2017 13:21:38 -0700
Subject: [PATCH 1821/1911] ACPI / gpio: do not fall back to parsing _CRS when
 we get a deferral

If, while locating GPIOs by name, we get probe deferral, we should
immediately report it to caller rather than trying to fall back to parsing
unnamed GPIOs from _CRS block.

Cc: stable@vger.kernel.org
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Acked-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Acked-and-Tested-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpiolib-acpi.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c
index 8e318f449d23b4..2bd683e2be022d 100644
--- a/drivers/gpio/gpiolib-acpi.c
+++ b/drivers/gpio/gpiolib-acpi.c
@@ -577,8 +577,10 @@ struct gpio_desc *acpi_find_gpio(struct device *dev,
 		}
 
 		desc = acpi_get_gpiod_by_index(adev, propname, idx, &info);
-		if (!IS_ERR(desc) || (PTR_ERR(desc) == -EPROBE_DEFER))
+		if (!IS_ERR(desc))
 			break;
+		if (PTR_ERR(desc) == -EPROBE_DEFER)
+			return ERR_CAST(desc);
 	}
 
 	/* Then from plain _CRS GPIOs */

From 8b4073596997f2ccbf68d8e72e07b827388a4536 Mon Sep 17 00:00:00 2001
From: Aaron Armstrong Skomra <skomra@gmail.com>
Date: Wed, 29 Mar 2017 10:35:39 -0700
Subject: [PATCH 1822/1911] HID: wacom: Don't add ghost interface as shared
 data

A previous commit (below) adds a check for already probed interfaces to
Wacom's matching heuristic. Unfortunately this causes the Bamboo Pen
(CTL-460) to match itself to its 'ghost' touch interface. After
subsequent changes to the driver this match to the ghost causes the
kernel to crash. This patch avoids calling wacom_add_shared_data()
for the BAMBOO_PEN's ghost touch interface.

Fixes: 41372d5d40e7 ("HID: wacom: Augment 'oVid' and 'oPid' with heuristics for HID_GENERIC")
Cc: stable <stable@vger.kernel.org>     # 4.9
Signed-off-by: Aaron Armstrong Skomra <aaron.skomra@wacom.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/wacom_sys.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c
index 994bddc55b8227..b676f02d4b7fcd 100644
--- a/drivers/hid/wacom_sys.c
+++ b/drivers/hid/wacom_sys.c
@@ -2165,6 +2165,14 @@ static int wacom_parse_and_register(struct wacom *wacom, bool wireless)
 
 	wacom_update_name(wacom, wireless ? " (WL)" : "");
 
+	/* pen only Bamboo neither support touch nor pad */
+	if ((features->type == BAMBOO_PEN) &&
+	    ((features->device_type & WACOM_DEVICETYPE_TOUCH) ||
+	    (features->device_type & WACOM_DEVICETYPE_PAD))) {
+		error = -ENODEV;
+		goto fail;
+	}
+
 	error = wacom_add_shared_data(hdev);
 	if (error)
 		goto fail;
@@ -2212,14 +2220,6 @@ static int wacom_parse_and_register(struct wacom *wacom, bool wireless)
 		goto fail_quirks;
 	}
 
-	/* pen only Bamboo neither support touch nor pad */
-	if ((features->type == BAMBOO_PEN) &&
-	    ((features->device_type & WACOM_DEVICETYPE_TOUCH) ||
-	    (features->device_type & WACOM_DEVICETYPE_PAD))) {
-		error = -ENODEV;
-		goto fail_quirks;
-	}
-
 	if (features->device_type & WACOM_DEVICETYPE_WL_MONITOR)
 		error = hid_hw_open(hdev);
 

From 4d20c332de377fa5b06c46f6f34174c10dd998e4 Mon Sep 17 00:00:00 2001
From: Aaron Armstrong Skomra <skomra@gmail.com>
Date: Wed, 29 Mar 2017 11:41:28 -0700
Subject: [PATCH 1823/1911] HID: wacom: call _query_tablet_data() for
 BAMBOO_TOUCH

Commit a544c619a54b ("HID: wacom: do not attempt to switch mode
while in probe") introduces delayed work for querying (setting the
mode) on all tablets. Bamboo Touch (056a:00d0) has a ghost
interface which claims to be a pen device. Though this device can
be removed, we have to set the mode on the ghost pen interface
before we remove it. After the aforementioned delay was introduced
the device was being removed before the mode setting could be
executed.

Signed-off-by: Aaron Armstrong Skomra <aaron.skomra@wacom.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/wacom_sys.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c
index b676f02d4b7fcd..e2666ef84dc1ca 100644
--- a/drivers/hid/wacom_sys.c
+++ b/drivers/hid/wacom_sys.c
@@ -2216,6 +2216,8 @@ static int wacom_parse_and_register(struct wacom *wacom, bool wireless)
 	/* touch only Bamboo doesn't support pen */
 	if ((features->type == BAMBOO_TOUCH) &&
 	    (features->device_type & WACOM_DEVICETYPE_PEN)) {
+		cancel_delayed_work_sync(&wacom->init_work);
+		_wacom_query_tablet_data(wacom);
 		error = -ENODEV;
 		goto fail_quirks;
 	}

From f7652afa8eadb416b23eb57dec6f158529942041 Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Mon, 27 Mar 2017 11:09:08 +0200
Subject: [PATCH 1824/1911] drm/vmwgfx: Type-check lookups of fence objects

A malicious caller could otherwise hand over handles to other objects
causing all sorts of interesting problems.

Testing done: Ran a Fedora 25 desktop using both Xorg and
gnome-shell/Wayland.

Cc: <stable@vger.kernel.org>
Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Reviewed-by: Sinclair Yeh <syeh@vmware.com>
---
 drivers/gpu/drm/vmwgfx/vmwgfx_fence.c | 77 +++++++++++++++++----------
 1 file changed, 50 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
index 6541dd8b82dc07..4076063e0fdd47 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
@@ -538,7 +538,7 @@ int vmw_fence_create(struct vmw_fence_manager *fman,
 		     struct vmw_fence_obj **p_fence)
 {
 	struct vmw_fence_obj *fence;
-	int ret;
+ 	int ret;
 
 	fence = kzalloc(sizeof(*fence), GFP_KERNEL);
 	if (unlikely(fence == NULL))
@@ -701,6 +701,41 @@ void vmw_fence_fifo_up(struct vmw_fence_manager *fman)
 }
 
 
+/**
+ * vmw_fence_obj_lookup - Look up a user-space fence object
+ *
+ * @tfile: A struct ttm_object_file identifying the caller.
+ * @handle: A handle identifying the fence object.
+ * @return: A struct vmw_user_fence base ttm object on success or
+ * an error pointer on failure.
+ *
+ * The fence object is looked up and type-checked. The caller needs
+ * to have opened the fence object first, but since that happens on
+ * creation and fence objects aren't shareable, that's not an
+ * issue currently.
+ */
+static struct ttm_base_object *
+vmw_fence_obj_lookup(struct ttm_object_file *tfile, u32 handle)
+{
+	struct ttm_base_object *base = ttm_base_object_lookup(tfile, handle);
+
+	if (!base) {
+		pr_err("Invalid fence object handle 0x%08lx.\n",
+		       (unsigned long)handle);
+		return ERR_PTR(-EINVAL);
+	}
+
+	if (base->refcount_release != vmw_user_fence_base_release) {
+		pr_err("Invalid fence object handle 0x%08lx.\n",
+		       (unsigned long)handle);
+		ttm_base_object_unref(&base);
+		return ERR_PTR(-EINVAL);
+	}
+
+	return base;
+}
+
+
 int vmw_fence_obj_wait_ioctl(struct drm_device *dev, void *data,
 			     struct drm_file *file_priv)
 {
@@ -726,13 +761,9 @@ int vmw_fence_obj_wait_ioctl(struct drm_device *dev, void *data,
 		arg->kernel_cookie = jiffies + wait_timeout;
 	}
 
-	base = ttm_base_object_lookup(tfile, arg->handle);
-	if (unlikely(base == NULL)) {
-		printk(KERN_ERR "Wait invalid fence object handle "
-		       "0x%08lx.\n",
-		       (unsigned long)arg->handle);
-		return -EINVAL;
-	}
+	base = vmw_fence_obj_lookup(tfile, arg->handle);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
 
 	fence = &(container_of(base, struct vmw_user_fence, base)->fence);
 
@@ -771,13 +802,9 @@ int vmw_fence_obj_signaled_ioctl(struct drm_device *dev, void *data,
 	struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
 	struct vmw_private *dev_priv = vmw_priv(dev);
 
-	base = ttm_base_object_lookup(tfile, arg->handle);
-	if (unlikely(base == NULL)) {
-		printk(KERN_ERR "Fence signaled invalid fence object handle "
-		       "0x%08lx.\n",
-		       (unsigned long)arg->handle);
-		return -EINVAL;
-	}
+	base = vmw_fence_obj_lookup(tfile, arg->handle);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
 
 	fence = &(container_of(base, struct vmw_user_fence, base)->fence);
 	fman = fman_from_fence(fence);
@@ -1024,6 +1051,7 @@ int vmw_fence_event_ioctl(struct drm_device *dev, void *data,
 		(struct drm_vmw_fence_event_arg *) data;
 	struct vmw_fence_obj *fence = NULL;
 	struct vmw_fpriv *vmw_fp = vmw_fpriv(file_priv);
+	struct ttm_object_file *tfile = vmw_fp->tfile;
 	struct drm_vmw_fence_rep __user *user_fence_rep =
 		(struct drm_vmw_fence_rep __user *)(unsigned long)
 		arg->fence_rep;
@@ -1037,15 +1065,11 @@ int vmw_fence_event_ioctl(struct drm_device *dev, void *data,
 	 */
 	if (arg->handle) {
 		struct ttm_base_object *base =
-			ttm_base_object_lookup_for_ref(dev_priv->tdev,
-						       arg->handle);
-
-		if (unlikely(base == NULL)) {
-			DRM_ERROR("Fence event invalid fence object handle "
-				  "0x%08lx.\n",
-				  (unsigned long)arg->handle);
-			return -EINVAL;
-		}
+			vmw_fence_obj_lookup(tfile, arg->handle);
+
+		if (IS_ERR(base))
+			return PTR_ERR(base);
+
 		fence = &(container_of(base, struct vmw_user_fence,
 				       base)->fence);
 		(void) vmw_fence_obj_reference(fence);
@@ -1053,7 +1077,7 @@ int vmw_fence_event_ioctl(struct drm_device *dev, void *data,
 		if (user_fence_rep != NULL) {
 			bool existed;
 
-			ret = ttm_ref_object_add(vmw_fp->tfile, base,
+			ret = ttm_ref_object_add(tfile, base,
 						 TTM_REF_USAGE, &existed);
 			if (unlikely(ret != 0)) {
 				DRM_ERROR("Failed to reference a fence "
@@ -1097,8 +1121,7 @@ int vmw_fence_event_ioctl(struct drm_device *dev, void *data,
 	return 0;
 out_no_create:
 	if (user_fence_rep != NULL)
-		ttm_ref_object_base_unref(vmw_fpriv(file_priv)->tfile,
-					  handle, TTM_REF_USAGE);
+		ttm_ref_object_base_unref(tfile, handle, TTM_REF_USAGE);
 out_no_ref_obj:
 	vmw_fence_obj_unreference(&fence);
 	return ret;

From 36274ab8c596f1240c606bb514da329add2a1bcd Mon Sep 17 00:00:00 2001
From: Murray McAllister <murray.mcallister@insomniasec.com>
Date: Mon, 27 Mar 2017 11:12:53 +0200
Subject: [PATCH 1825/1911] drm/vmwgfx: NULL pointer dereference in
 vmw_surface_define_ioctl()

Before memory allocations vmw_surface_define_ioctl() checks the
upper-bounds of a user-supplied size, but does not check if the
supplied size is 0.

Add check to avoid NULL pointer dereferences.

Cc: <stable@vger.kernel.org>
Signed-off-by: Murray McAllister <murray.mcallister@insomniasec.com>
Reviewed-by: Sinclair Yeh <syeh@vmware.com>
---
 drivers/gpu/drm/vmwgfx/vmwgfx_surface.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
index b445ce9b975786..f410502cb07527 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
@@ -716,8 +716,8 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
 	for (i = 0; i < DRM_VMW_MAX_SURFACE_FACES; ++i)
 		num_sizes += req->mip_levels[i];
 
-	if (num_sizes > DRM_VMW_MAX_SURFACE_FACES *
-	    DRM_VMW_MAX_MIP_LEVELS)
+	if (num_sizes > DRM_VMW_MAX_SURFACE_FACES * DRM_VMW_MAX_MIP_LEVELS ||
+	    num_sizes == 0)
 		return -EINVAL;
 
 	size = vmw_user_surface_size + 128 +

From 63774069d9527a1aeaa4aa20e929ef5e8e9ecc38 Mon Sep 17 00:00:00 2001
From: Murray McAllister <murray.mcallister@insomniasec.com>
Date: Mon, 27 Mar 2017 11:15:12 +0200
Subject: [PATCH 1826/1911] drm/vmwgfx: avoid calling vzalloc with a 0 size in
 vmw_get_cap_3d_ioctl()

In vmw_get_cap_3d_ioctl(), a user can supply 0 for a size that is
used in vzalloc(). This eventually calls dump_stack() (in warn_alloc()),
which can leak useful addresses to dmesg.

Add check to avoid a size of 0.

Cc: <stable@vger.kernel.org>
Signed-off-by: Murray McAllister <murray.mcallister@insomniasec.com>
Reviewed-by: Sinclair Yeh <syeh@vmware.com>
---
 drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
index b8c6a03c8c54df..1802d0e7fab8f2 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
@@ -186,7 +186,7 @@ int vmw_get_cap_3d_ioctl(struct drm_device *dev, void *data,
 	bool gb_objects = !!(dev_priv->capabilities & SVGA_CAP_GBOBJECTS);
 	struct vmw_fpriv *vmw_fp = vmw_fpriv(file_priv);
 
-	if (unlikely(arg->pad64 != 0)) {
+	if (unlikely(arg->pad64 != 0 || arg->max_size == 0)) {
 		DRM_ERROR("Illegal GET_3D_CAP argument.\n");
 		return -EINVAL;
 	}

From fe25deb7737ce6c0879ccf79c99fa1221d428bf2 Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Mon, 27 Mar 2017 11:21:25 +0200
Subject: [PATCH 1827/1911] drm/ttm, drm/vmwgfx: Relax permission checking when
 opening surfaces

Previously, when a surface was opened using a legacy (non prime) handle,
it was verified to have been created by a client in the same master realm.
Relax this so that opening is also allowed recursively if the client
already has the surface open.

This works around a regression in svga mesa where opening of a shared
surface is used recursively to obtain surface information.

Cc: <stable@vger.kernel.org>
Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Reviewed-by: Sinclair Yeh <syeh@vmware.com>
---
 drivers/gpu/drm/ttm/ttm_object.c         | 10 +++++++---
 drivers/gpu/drm/vmwgfx/vmwgfx_fence.c    |  6 ++----
 drivers/gpu/drm/vmwgfx/vmwgfx_resource.c |  4 ++--
 drivers/gpu/drm/vmwgfx/vmwgfx_surface.c  | 22 +++++++++-------------
 include/drm/ttm/ttm_object.h             |  5 ++++-
 5 files changed, 24 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_object.c b/drivers/gpu/drm/ttm/ttm_object.c
index fdb451e3ec0118..d750140bafbcdb 100644
--- a/drivers/gpu/drm/ttm/ttm_object.c
+++ b/drivers/gpu/drm/ttm/ttm_object.c
@@ -179,7 +179,7 @@ int ttm_base_object_init(struct ttm_object_file *tfile,
 	if (unlikely(ret != 0))
 		goto out_err0;
 
-	ret = ttm_ref_object_add(tfile, base, TTM_REF_USAGE, NULL);
+	ret = ttm_ref_object_add(tfile, base, TTM_REF_USAGE, NULL, false);
 	if (unlikely(ret != 0))
 		goto out_err1;
 
@@ -318,7 +318,8 @@ EXPORT_SYMBOL(ttm_ref_object_exists);
 
 int ttm_ref_object_add(struct ttm_object_file *tfile,
 		       struct ttm_base_object *base,
-		       enum ttm_ref_type ref_type, bool *existed)
+		       enum ttm_ref_type ref_type, bool *existed,
+		       bool require_existed)
 {
 	struct drm_open_hash *ht = &tfile->ref_hash[ref_type];
 	struct ttm_ref_object *ref;
@@ -345,6 +346,9 @@ int ttm_ref_object_add(struct ttm_object_file *tfile,
 		}
 
 		rcu_read_unlock();
+		if (require_existed)
+			return -EPERM;
+
 		ret = ttm_mem_global_alloc(mem_glob, sizeof(*ref),
 					   false, false);
 		if (unlikely(ret != 0))
@@ -635,7 +639,7 @@ int ttm_prime_fd_to_handle(struct ttm_object_file *tfile,
 	prime = (struct ttm_prime_object *) dma_buf->priv;
 	base = &prime->base;
 	*handle = base->hash.key;
-	ret = ttm_ref_object_add(tfile, base, TTM_REF_USAGE, NULL);
+	ret = ttm_ref_object_add(tfile, base, TTM_REF_USAGE, NULL, false);
 
 	dma_buf_put(dma_buf);
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
index 4076063e0fdd47..6b2708b4eafe84 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
@@ -1075,10 +1075,8 @@ int vmw_fence_event_ioctl(struct drm_device *dev, void *data,
 		(void) vmw_fence_obj_reference(fence);
 
 		if (user_fence_rep != NULL) {
-			bool existed;
-
-			ret = ttm_ref_object_add(tfile, base,
-						 TTM_REF_USAGE, &existed);
+			ret = ttm_ref_object_add(vmw_fp->tfile, base,
+						 TTM_REF_USAGE, NULL, false);
 			if (unlikely(ret != 0)) {
 				DRM_ERROR("Failed to reference a fence "
 					  "object.\n");
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
index 65b3f036963671..bf23153d4f5551 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
@@ -589,7 +589,7 @@ static int vmw_user_dmabuf_synccpu_grab(struct vmw_user_dma_buffer *user_bo,
 		return ret;
 
 	ret = ttm_ref_object_add(tfile, &user_bo->prime.base,
-				 TTM_REF_SYNCCPU_WRITE, &existed);
+				 TTM_REF_SYNCCPU_WRITE, &existed, false);
 	if (ret != 0 || existed)
 		ttm_bo_synccpu_write_release(&user_bo->dma.base);
 
@@ -773,7 +773,7 @@ int vmw_user_dmabuf_reference(struct ttm_object_file *tfile,
 
 	*handle = user_bo->prime.base.hash.key;
 	return ttm_ref_object_add(tfile, &user_bo->prime.base,
-				  TTM_REF_USAGE, NULL);
+				  TTM_REF_USAGE, NULL, false);
 }
 
 /*
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
index f410502cb07527..adc023fe67f3cd 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
@@ -891,17 +891,16 @@ vmw_surface_handle_reference(struct vmw_private *dev_priv,
 	uint32_t handle;
 	struct ttm_base_object *base;
 	int ret;
+	bool require_exist = false;
 
 	if (handle_type == DRM_VMW_HANDLE_PRIME) {
 		ret = ttm_prime_fd_to_handle(tfile, u_handle, &handle);
 		if (unlikely(ret != 0))
 			return ret;
 	} else {
-		if (unlikely(drm_is_render_client(file_priv))) {
-			DRM_ERROR("Render client refused legacy "
-				  "surface reference.\n");
-			return -EACCES;
-		}
+		if (unlikely(drm_is_render_client(file_priv)))
+			require_exist = true;
+
 		if (ACCESS_ONCE(vmw_fpriv(file_priv)->locked_master)) {
 			DRM_ERROR("Locked master refused legacy "
 				  "surface reference.\n");
@@ -929,17 +928,14 @@ vmw_surface_handle_reference(struct vmw_private *dev_priv,
 
 		/*
 		 * Make sure the surface creator has the same
-		 * authenticating master.
+		 * authenticating master, or is already registered with us.
 		 */
 		if (drm_is_primary_client(file_priv) &&
-		    user_srf->master != file_priv->master) {
-			DRM_ERROR("Trying to reference surface outside of"
-				  " master domain.\n");
-			ret = -EACCES;
-			goto out_bad_resource;
-		}
+		    user_srf->master != file_priv->master)
+			require_exist = true;
 
-		ret = ttm_ref_object_add(tfile, base, TTM_REF_USAGE, NULL);
+		ret = ttm_ref_object_add(tfile, base, TTM_REF_USAGE, NULL,
+					 require_exist);
 		if (unlikely(ret != 0)) {
 			DRM_ERROR("Could not add a reference to a surface.\n");
 			goto out_bad_resource;
diff --git a/include/drm/ttm/ttm_object.h b/include/drm/ttm/ttm_object.h
index ed953f98f0e144..1487011fe057ba 100644
--- a/include/drm/ttm/ttm_object.h
+++ b/include/drm/ttm/ttm_object.h
@@ -229,6 +229,8 @@ extern void ttm_base_object_unref(struct ttm_base_object **p_base);
  * @ref_type: The type of reference.
  * @existed: Upon completion, indicates that an identical reference object
  * already existed, and the refcount was upped on that object instead.
+ * @require_existed: Fail with -EPERM if an identical ref object didn't
+ * already exist.
  *
  * Checks that the base object is shareable and adds a ref object to it.
  *
@@ -243,7 +245,8 @@ extern void ttm_base_object_unref(struct ttm_base_object **p_base);
  */
 extern int ttm_ref_object_add(struct ttm_object_file *tfile,
 			      struct ttm_base_object *base,
-			      enum ttm_ref_type ref_type, bool *existed);
+			      enum ttm_ref_type ref_type, bool *existed,
+			      bool require_existed);
 
 extern bool ttm_ref_object_exists(struct ttm_object_file *tfile,
 				  struct ttm_base_object *base);

From 3ce7803cf3a7bc52c0eb9f516de8b72a0305ad57 Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Mon, 27 Mar 2017 12:38:25 +0200
Subject: [PATCH 1828/1911] drm/ttm: Avoid calling drm_ht_remove from atomic
 context

On recent kernels, calling drm_ht_remove triggers a might_sleep() warning
from within vfree(). So avoid calling it from atomic context. The use-cases
we fix here are both from destructors so there should be no concurrent
use of the hash tables.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
Reviewed-by: Sinclair Yeh <syeh@vmware.com>
---
 drivers/gpu/drm/ttm/ttm_object.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_object.c b/drivers/gpu/drm/ttm/ttm_object.c
index d750140bafbcdb..26a7ad0f478978 100644
--- a/drivers/gpu/drm/ttm/ttm_object.c
+++ b/drivers/gpu/drm/ttm/ttm_object.c
@@ -453,10 +453,10 @@ void ttm_object_file_release(struct ttm_object_file **p_tfile)
 		ttm_ref_object_release(&ref->kref);
 	}
 
+	spin_unlock(&tfile->lock);
 	for (i = 0; i < TTM_REF_NUM; ++i)
 		drm_ht_remove(&tfile->ref_hash[i]);
 
-	spin_unlock(&tfile->lock);
 	ttm_object_file_unref(&tfile);
 }
 EXPORT_SYMBOL(ttm_object_file_release);
@@ -533,9 +533,7 @@ void ttm_object_device_release(struct ttm_object_device **p_tdev)
 
 	*p_tdev = NULL;
 
-	spin_lock(&tdev->object_lock);
 	drm_ht_remove(&tdev->object_hash);
-	spin_unlock(&tdev->object_lock);
 
 	kfree(tdev);
 }

From 53e16798b0864464c5444a204e1bb93ae246c429 Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Mon, 27 Mar 2017 13:06:05 +0200
Subject: [PATCH 1829/1911] drm/vmwgfx: Remove getparam error message

The mesa winsys sometimes uses unimplemented parameter requests to
check for features. Remove the error message to avoid bloating the
kernel log.

Cc: <stable@vger.kernel.org>
Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
Reviewed-by: Sinclair Yeh <syeh@vmware.com>
---
 drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
index 1802d0e7fab8f2..5ec24fd801cd2b 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
@@ -114,8 +114,6 @@ int vmw_getparam_ioctl(struct drm_device *dev, void *data,
 		param->value = dev_priv->has_dx;
 		break;
 	default:
-		DRM_ERROR("Illegal vmwgfx get param request: %d\n",
-			  param->param);
 		return -EINVAL;
 	}
 

From e7e11f99564222d82f0ce84bd521e57d78a6b678 Mon Sep 17 00:00:00 2001
From: Li Qiang <liq3ea@gmail.com>
Date: Mon, 27 Mar 2017 20:10:53 -0700
Subject: [PATCH 1830/1911] drm/vmwgfx: fix integer overflow in
 vmw_surface_define_ioctl()

In vmw_surface_define_ioctl(), the 'num_sizes' is the sum of the
'req->mip_levels' array. This array can be assigned any value from
the user space. As both the 'num_sizes' and the array is uint32_t,
it is easy to make 'num_sizes' overflow. The later 'mip_levels' is
used as the loop count. This can lead an oob write. Add the check of
'req->mip_levels' to avoid this.

Cc: <stable@vger.kernel.org>
Signed-off-by: Li Qiang <liqiang6-s@360.cn>
Reviewed-by: Thomas Hellstrom <thellstrom@vmware.com>
---
 drivers/gpu/drm/vmwgfx/vmwgfx_surface.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
index adc023fe67f3cd..05fa092c942bee 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
@@ -713,8 +713,11 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
 			128;
 
 	num_sizes = 0;
-	for (i = 0; i < DRM_VMW_MAX_SURFACE_FACES; ++i)
+	for (i = 0; i < DRM_VMW_MAX_SURFACE_FACES; ++i) {
+		if (req->mip_levels[i] > DRM_VMW_MAX_MIP_LEVELS)
+			return -EINVAL;
 		num_sizes += req->mip_levels[i];
+	}
 
 	if (num_sizes > DRM_VMW_MAX_SURFACE_FACES * DRM_VMW_MAX_MIP_LEVELS ||
 	    num_sizes == 0)

From 3f135e57a4f76d24ae8d8a490314331f0ced40c5 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Thu, 16 Mar 2017 14:31:33 -0500
Subject: [PATCH 1831/1911] x86/build: Mostly disable
 '-maccumulate-outgoing-args'

The GCC '-maccumulate-outgoing-args' flag is enabled for most configs,
mostly because of issues which are no longer relevant.  For most
configs, and with most recent versions of GCC, it's no longer needed.

Clarify which cases need it, and only enable it for those cases.  Also
produce a compile-time error for the ftrace graph + mcount + '-Os' case,
which will otherwise cause runtime failures.

The main benefit of '-maccumulate-outgoing-args' is that it prevents an
ugly prologue for functions which have aligned stacks.  But removing the
option also has some benefits: more readable argument saves, smaller
text size, and (presumably) slightly improved performance.

Here are the object size savings for 32-bit and 64-bit defconfig
kernels:

      text	   data	    bss	     dec	    hex	filename
  10006710	3543328	1773568	15323606	 e9d1d6	vmlinux.x86-32.before
   9706358	3547424	1773568	15027350	 e54c96	vmlinux.x86-32.after

      text	   data	    bss	     dec	    hex	filename
  10652105	4537576	 843776	16033457	 f4a6b1	vmlinux.x86-64.before
  10639629	4537576	 843776	16020981	 f475f5	vmlinux.x86-64.after

That comes out to a 3% text size improvement on x86-32 and a 0.1% text
size improvement on x86-64.

Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andrew Lutomirski <luto@kernel.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20170316193133.zrj6gug53766m6nn@treble
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/Makefile        | 35 +++++++++++++++++++++++++++++++----
 arch/x86/Makefile_32.cpu | 18 ------------------
 arch/x86/kernel/ftrace.c |  6 ++++++
 scripts/Kbuild.include   |  4 ++++
 4 files changed, 41 insertions(+), 22 deletions(-)

diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 2d449337a36051..a94a4d10f2dfa4 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -120,10 +120,6 @@ else
         # -funit-at-a-time shrinks the kernel .text considerably
         # unfortunately it makes reading oopses harder.
         KBUILD_CFLAGS += $(call cc-option,-funit-at-a-time)
-
-        # this works around some issues with generating unwind tables in older gccs
-        # newer gccs do it by default
-        KBUILD_CFLAGS += $(call cc-option,-maccumulate-outgoing-args)
 endif
 
 ifdef CONFIG_X86_X32
@@ -147,6 +143,37 @@ ifeq ($(CONFIG_KMEMCHECK),y)
 	KBUILD_CFLAGS += $(call cc-option,-fno-builtin-memcpy)
 endif
 
+#
+# If the function graph tracer is used with mcount instead of fentry,
+# '-maccumulate-outgoing-args' is needed to prevent a GCC bug
+# (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=42109)
+#
+ifdef CONFIG_FUNCTION_GRAPH_TRACER
+  ifndef CONFIG_HAVE_FENTRY
+	ACCUMULATE_OUTGOING_ARGS := 1
+  else
+    ifeq ($(call cc-option-yn, -mfentry), n)
+	ACCUMULATE_OUTGOING_ARGS := 1
+    endif
+  endif
+endif
+
+#
+# Jump labels need '-maccumulate-outgoing-args' for gcc < 4.5.2 to prevent a
+# GCC bug (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=46226).  There's no way
+# to test for this bug at compile-time because the test case needs to execute,
+# which is a no-go for cross compilers.  So check the GCC version instead.
+#
+ifdef CONFIG_JUMP_LABEL
+  ifneq ($(ACCUMULATE_OUTGOING_ARGS), 1)
+	ACCUMULATE_OUTGOING_ARGS = $(call cc-if-fullversion, -lt, 040502, 1)
+  endif
+endif
+
+ifeq ($(ACCUMULATE_OUTGOING_ARGS), 1)
+	KBUILD_CFLAGS += -maccumulate-outgoing-args
+endif
+
 # Stackpointer is addressed different for 32 bit and 64 bit x86
 sp-$(CONFIG_X86_32) := esp
 sp-$(CONFIG_X86_64) := rsp
diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu
index 6647ed49c66c97..a45eb15b7cf290 100644
--- a/arch/x86/Makefile_32.cpu
+++ b/arch/x86/Makefile_32.cpu
@@ -45,24 +45,6 @@ cflags-$(CONFIG_MGEODE_LX)	+= $(call cc-option,-march=geode,-march=pentium-mmx)
 # cpu entries
 cflags-$(CONFIG_X86_GENERIC) 	+= $(call tune,generic,$(call tune,i686))
 
-# Work around the pentium-mmx code generator madness of gcc4.4.x which
-# does stack alignment by generating horrible code _before_ the mcount
-# prologue (push %ebp, mov %esp, %ebp) which breaks the function graph
-# tracer assumptions. For i686, generic, core2 this is set by the
-# compiler anyway
-ifeq ($(CONFIG_FUNCTION_GRAPH_TRACER), y)
-ADD_ACCUMULATE_OUTGOING_ARGS := y
-endif
-
-# Work around to a bug with asm goto with first implementations of it
-# in gcc causing gcc to mess up the push and pop of the stack in some
-# uses of asm goto.
-ifeq ($(CONFIG_JUMP_LABEL), y)
-ADD_ACCUMULATE_OUTGOING_ARGS := y
-endif
-
-cflags-$(ADD_ACCUMULATE_OUTGOING_ARGS) += $(call cc-option,-maccumulate-outgoing-args)
-
 # Bug fix for binutils: this option is required in order to keep
 # binutils from generating NOPL instructions against our will.
 ifneq ($(CONFIG_X86_P6_NOP),y)
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 8f3d9cf26ff9f7..cbd73eb4217026 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -29,6 +29,12 @@
 #include <asm/ftrace.h>
 #include <asm/nops.h>
 
+#if defined(CONFIG_FUNCTION_GRAPH_TRACER) && \
+	!defined(CC_USING_FENTRY) && \
+	!defined(CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE)
+# error The following combination is not supported: ((compiler missing -mfentry) || (CONFIG_X86_32 and !CONFIG_DYNAMIC_FTRACE)) && CONFIG_FUNCTION_GRAPH_TRACER && CONFIG_CC_OPTIMIZE_FOR_SIZE
+#endif
+
 #ifdef CONFIG_DYNAMIC_FTRACE
 
 int ftrace_arch_code_modify_prepare(void)
diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include
index d6ca649cb0e96d..afe3fd3af1e406 100644
--- a/scripts/Kbuild.include
+++ b/scripts/Kbuild.include
@@ -148,6 +148,10 @@ cc-fullversion = $(shell $(CONFIG_SHELL) \
 # Usage:  EXTRA_CFLAGS += $(call cc-ifversion, -lt, 0402, -O1)
 cc-ifversion = $(shell [ $(cc-version) $(1) $(2) ] && echo $(3) || echo $(4))
 
+# cc-if-fullversion
+# Usage:  EXTRA_CFLAGS += $(call cc-if-fullversion, -lt, 040502, -O1)
+cc-if-fullversion = $(shell [ $(cc-fullversion) $(1) $(2) ] && echo $(3) || echo $(4))
+
 # cc-ldoption
 # Usage: ldflags += $(call cc-ldoption, -Wl$(comma)--hash-style=both)
 cc-ldoption = $(call try-run,\

From 6d6e500391875cc372336c88e9a8af377be19c36 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Tue, 28 Mar 2017 13:13:43 -0700
Subject: [PATCH 1832/1911] drm/vc4: Allocate the right amount of space for
 boot-time CRTC state.

Without this, the first modeset would dereference past the allocation
when trying to free the mm node.

Signed-off-by: Eric Anholt <eric@anholt.net>
Tested-by: Stefan Wahren <stefan.wahren@i2se.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170328201343.4884-1-eric@anholt.net
Fixes: d8dbf44f13b9 ("drm/vc4: Make the CRTCs cooperate on allocating display lists.")
Cc: <stable@vger.kernel.org> # v4.6+
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/vc4/vc4_crtc.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c
index 0c06844af4455d..9fcf05ca492b0c 100644
--- a/drivers/gpu/drm/vc4/vc4_crtc.c
+++ b/drivers/gpu/drm/vc4/vc4_crtc.c
@@ -846,6 +846,17 @@ static void vc4_crtc_destroy_state(struct drm_crtc *crtc,
 	drm_atomic_helper_crtc_destroy_state(crtc, state);
 }
 
+static void
+vc4_crtc_reset(struct drm_crtc *crtc)
+{
+	if (crtc->state)
+		__drm_atomic_helper_crtc_destroy_state(crtc->state);
+
+	crtc->state = kzalloc(sizeof(struct vc4_crtc_state), GFP_KERNEL);
+	if (crtc->state)
+		crtc->state->crtc = crtc;
+}
+
 static const struct drm_crtc_funcs vc4_crtc_funcs = {
 	.set_config = drm_atomic_helper_set_config,
 	.destroy = vc4_crtc_destroy,
@@ -853,7 +864,7 @@ static const struct drm_crtc_funcs vc4_crtc_funcs = {
 	.set_property = NULL,
 	.cursor_set = NULL, /* handled by drm_mode_cursor_universal */
 	.cursor_move = NULL, /* handled by drm_mode_cursor_universal */
-	.reset = drm_atomic_helper_crtc_reset,
+	.reset = vc4_crtc_reset,
 	.atomic_duplicate_state = vc4_crtc_duplicate_state,
 	.atomic_destroy_state = vc4_crtc_destroy_state,
 	.gamma_set = vc4_crtc_gamma_set,

From 335d2c2d192266358c5dfa64953a4c162f46e464 Mon Sep 17 00:00:00 2001
From: Mark Salter <msalter@redhat.com>
Date: Fri, 24 Mar 2017 09:53:56 -0400
Subject: [PATCH 1833/1911] arm64: fix NULL dereference in have_cpu_die()

Commit 5c492c3f5255 ("arm64: smp: Add function to determine if cpus are
stuck in the kernel") added a helper function to determine if die() is
supported in cpu_ops. This function assumes a cpu will have a valid
cpu_ops entry, but that may not be the case for cpu0 is spin-table or
parking protocol is used to boot secondary cpus. In that case, there
is a NULL dereference if have_cpu_die() is called by cpu0. So add a
check for a valid cpu_ops before dereferencing it.

Fixes: 5c492c3f5255 ("arm64: smp: Add function to determine if cpus are stuck in the kernel")
Signed-off-by: Mark Salter <msalter@redhat.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kernel/smp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index ef1caae02110ee..9b1036570586f9 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -944,7 +944,7 @@ static bool have_cpu_die(void)
 #ifdef CONFIG_HOTPLUG_CPU
 	int any_cpu = raw_smp_processor_id();
 
-	if (cpu_ops[any_cpu]->cpu_die)
+	if (cpu_ops[any_cpu] && cpu_ops[any_cpu]->cpu_die)
 		return true;
 #endif
 	return false;

From 893dc68f1b18451e6d550b1884fc6be76e1bb90c Mon Sep 17 00:00:00 2001
From: Larry Finger <Larry.Finger@lwfinger.net>
Date: Tue, 21 Mar 2017 09:24:11 -0500
Subject: [PATCH 1834/1911] rtlwifi: Fix scheduling while atomic splat

Following commit cceb0a597320 ("rtlwifi: Add work queue for c2h cmd."),
the following BUG is reported when rtl8723be is used:

BUG: sleeping function called from invalid context at mm/slab.h:432
in_atomic(): 1, irqs_disabled(): 1, pid: 0, name: swapper/0
CPU: 0 PID: 0 Comm: swapper/0 Tainted: G        W  O    4.11.0-rc3-wl+ #276
Hardware name: TOSHIBA TECRA A50-A/TECRA A50-A, BIOS Version 4.50   09/29/2014
Call Trace:
 <IRQ>
 dump_stack+0x63/0x89
 ___might_sleep+0xe9/0x130
 __might_sleep+0x4a/0x90
 kmem_cache_alloc_trace+0x19f/0x200
 ? rtl_c2hcmd_enqueue+0x3e/0x110 [rtlwifi]
 rtl_c2hcmd_enqueue+0x3e/0x110 [rtlwifi]
 rtl8723be_c2h_packet_handler+0xac/0xc0 [rtl8723be]
 rtl8723be_rx_command_packet+0x37/0x5c [rtl8723be]
 _rtl_pci_rx_interrupt+0x200/0x6b0 [rtl_pci]
 _rtl_pci_interrupt+0x20c/0x5d0 [rtl_pci]
 __handle_irq_event_percpu+0x3f/0x1d0
 handle_irq_event_percpu+0x23/0x60
 handle_irq_event+0x3c/0x60
 handle_fasteoi_irq+0xa2/0x170
 handle_irq+0x20/0x30
 do_IRQ+0x48/0xd0
 common_interrupt+0x89/0x89
...

Although commit cceb0a597320 converted most c2h commands to use a work
queue, the Bluetooth coexistence routines can be in atomic mode when
they execute such a call.

Fixes: cceb0a597320 ("rtlwifi: Add work queue for c2h cmd.")
Signed-off-by: Larry Finger <Larry.Finger@lwfinger.net>
Cc: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/realtek/rtlwifi/base.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtlwifi/base.c b/drivers/net/wireless/realtek/rtlwifi/base.c
index caea350f05aac7..bdc379178e8795 100644
--- a/drivers/net/wireless/realtek/rtlwifi/base.c
+++ b/drivers/net/wireless/realtek/rtlwifi/base.c
@@ -1742,12 +1742,14 @@ void rtl_c2hcmd_enqueue(struct ieee80211_hw *hw, u8 tag, u8 len, u8 *val)
 	unsigned long flags;
 	struct rtl_c2hcmd *c2hcmd;
 
-	c2hcmd = kmalloc(sizeof(*c2hcmd), GFP_KERNEL);
+	c2hcmd = kmalloc(sizeof(*c2hcmd),
+			 in_interrupt() ? GFP_ATOMIC : GFP_KERNEL);
 
 	if (!c2hcmd)
 		goto label_err;
 
-	c2hcmd->val = kmalloc(len, GFP_KERNEL);
+	c2hcmd->val = kmalloc(len,
+			      in_interrupt() ? GFP_ATOMIC : GFP_KERNEL);
 
 	if (!c2hcmd->val)
 		goto label_err2;

From d77facb88448cdeaaa3adba5b9704a48ac2ac8d6 Mon Sep 17 00:00:00 2001
From: Arend Van Spriel <arend.vanspriel@broadcom.com>
Date: Tue, 28 Mar 2017 09:11:30 +0100
Subject: [PATCH 1835/1911] brcmfmac: use local iftype avoiding use-after-free
 of virtual interface

A use-after-free was found using KASAN. In brcmf_p2p_del_if() the virtual
interface is removed using call to brcmf_remove_interface(). After that
the virtual interface instance has been freed and should not be referenced.
Solve this by storing the nl80211 iftype in local variable, which is used
in a couple of places anyway.

Cc: stable@vger.kernel.org # 4.10.x, 4.9.x
Reported-by: Daniel J Blueman <daniel@quora.org>
Reviewed-by: Hante Meuleman <hante.meuleman@broadcom.com>
Reviewed-by: Pieter-Paul Giesberts <pieter-paul.giesberts@broadcom.com>
Reviewed-by: Franky Lin <franky.lin@broadcom.com>
Signed-off-by: Arend van Spriel <arend.vanspriel@broadcom.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c
index de19c7c92bc6c0..85d949e03f79f7 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c
@@ -2238,14 +2238,16 @@ int brcmf_p2p_del_vif(struct wiphy *wiphy, struct wireless_dev *wdev)
 	struct brcmf_cfg80211_info *cfg = wiphy_priv(wiphy);
 	struct brcmf_p2p_info *p2p = &cfg->p2p;
 	struct brcmf_cfg80211_vif *vif;
+	enum nl80211_iftype iftype;
 	bool wait_for_disable = false;
 	int err;
 
 	brcmf_dbg(TRACE, "delete P2P vif\n");
 	vif = container_of(wdev, struct brcmf_cfg80211_vif, wdev);
 
+	iftype = vif->wdev.iftype;
 	brcmf_cfg80211_arm_vif_event(cfg, vif);
-	switch (vif->wdev.iftype) {
+	switch (iftype) {
 	case NL80211_IFTYPE_P2P_CLIENT:
 		if (test_bit(BRCMF_VIF_STATUS_DISCONNECTING, &vif->sme_state))
 			wait_for_disable = true;
@@ -2275,7 +2277,7 @@ int brcmf_p2p_del_vif(struct wiphy *wiphy, struct wireless_dev *wdev)
 					    BRCMF_P2P_DISABLE_TIMEOUT);
 
 	err = 0;
-	if (vif->wdev.iftype != NL80211_IFTYPE_P2P_DEVICE) {
+	if (iftype != NL80211_IFTYPE_P2P_DEVICE) {
 		brcmf_vif_clear_mgmt_ies(vif);
 		err = brcmf_p2p_release_p2p_if(vif);
 	}
@@ -2291,7 +2293,7 @@ int brcmf_p2p_del_vif(struct wiphy *wiphy, struct wireless_dev *wdev)
 	brcmf_remove_interface(vif->ifp, true);
 
 	brcmf_cfg80211_arm_vif_event(cfg, NULL);
-	if (vif->wdev.iftype != NL80211_IFTYPE_P2P_DEVICE)
+	if (iftype != NL80211_IFTYPE_P2P_DEVICE)
 		p2p->bss_idx[P2PAPI_BSSCFG_CONNECTION].vif = NULL;
 
 	return err;

From fabbbee0eb0f4b763576ac1e2db4fc3bf6dcc0cc Mon Sep 17 00:00:00 2001
From: Olga Kornievskaia <kolga@netapp.com>
Date: Thu, 30 Mar 2017 10:10:55 -0400
Subject: [PATCH 1836/1911] PNFS fix fallback to MDS if got error on commit to
 DS

Upong receiving some errors (EACCES) on commit to the DS the code
doesn't fallback to MDS and intead retrieds to the same DS again.

Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/filelayout/filelayout.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index 367f8eb19bfa89..c9230fecc77ea1 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -202,10 +202,10 @@ static int filelayout_async_handle_error(struct rpc_task *task,
 			task->tk_status);
 		nfs4_mark_deviceid_unavailable(devid);
 		pnfs_error_mark_layout_for_return(inode, lseg);
-		pnfs_set_lo_fail(lseg);
 		rpc_wake_up(&tbl->slot_tbl_waitq);
 		/* fall through */
 	default:
+		pnfs_set_lo_fail(lseg);
 reset:
 		dprintk("%s Retry through MDS. Error %d\n", __func__,
 			task->tk_status);

From 2b6867c2ce76c596676bec7d2d525af525fdc6e2 Mon Sep 17 00:00:00 2001
From: Andrey Konovalov <andreyknvl@google.com>
Date: Wed, 29 Mar 2017 16:11:20 +0200
Subject: [PATCH 1837/1911] net/packet: fix overflow in check for priv area
 size

Subtracting tp_sizeof_priv from tp_block_size and casting to int
to check whether one is less then the other doesn't always work
(both of them are unsigned ints).

Compare them as is instead.

Also cast tp_sizeof_priv to u64 before using BLK_PLUS_PRIV, as
it can overflow inside BLK_PLUS_PRIV otherwise.

Signed-off-by: Andrey Konovalov <andreyknvl@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index a0dbe7ca8f724c..2323ee35dc0983 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -4193,8 +4193,8 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
 		if (unlikely(!PAGE_ALIGNED(req->tp_block_size)))
 			goto out;
 		if (po->tp_version >= TPACKET_V3 &&
-		    (int)(req->tp_block_size -
-			  BLK_PLUS_PRIV(req_u->req3.tp_sizeof_priv)) <= 0)
+		    req->tp_block_size <=
+			  BLK_PLUS_PRIV((u64)req_u->req3.tp_sizeof_priv))
 			goto out;
 		if (unlikely(req->tp_frame_size < po->tp_hdrlen +
 					po->tp_reserve))

From 8f8d28e4d6d815a391285e121c3a53a0b6cb9e7b Mon Sep 17 00:00:00 2001
From: Andrey Konovalov <andreyknvl@google.com>
Date: Wed, 29 Mar 2017 16:11:21 +0200
Subject: [PATCH 1838/1911] net/packet: fix overflow in check for tp_frame_nr

When calculating rb->frames_per_block * req->tp_block_nr the result
can overflow.

Add a check that tp_block_size * tp_block_nr <= UINT_MAX.

Since frames_per_block <= tp_block_size, the expression would
never overflow.

Signed-off-by: Andrey Konovalov <andreyknvl@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 2323ee35dc0983..3ac286ebb2f48f 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -4205,6 +4205,8 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
 		rb->frames_per_block = req->tp_block_size / req->tp_frame_size;
 		if (unlikely(rb->frames_per_block == 0))
 			goto out;
+		if (unlikely(req->tp_block_size > UINT_MAX / req->tp_block_nr))
+			goto out;
 		if (unlikely((rb->frames_per_block * req->tp_block_nr) !=
 					req->tp_frame_nr))
 			goto out;

From bcc5364bdcfe131e6379363f089e7b4108d35b70 Mon Sep 17 00:00:00 2001
From: Andrey Konovalov <andreyknvl@google.com>
Date: Wed, 29 Mar 2017 16:11:22 +0200
Subject: [PATCH 1839/1911] net/packet: fix overflow in check for tp_reserve

When calculating po->tp_hdrlen + po->tp_reserve the result can overflow.

Fix by checking that tp_reserve <= INT_MAX on assign.

Signed-off-by: Andrey Konovalov <andreyknvl@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 3ac286ebb2f48f..8489beff5c25c9 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -3665,6 +3665,8 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
 			return -EBUSY;
 		if (copy_from_user(&val, optval, sizeof(val)))
 			return -EFAULT;
+		if (val > INT_MAX)
+			return -EINVAL;
 		po->tp_reserve = val;
 		return 0;
 	}

From 3dbcc105d5561e18ccd0842c7baab1c835562a37 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Thu, 30 Mar 2017 01:00:53 +0800
Subject: [PATCH 1840/1911] sctp: alloc stream info when initializing asoc

When sending a msg without asoc established, sctp will send INIT packet
first and then enqueue chunks.

Before receiving INIT_ACK, stream info is not yet alloced. But enqueuing
chunks needs to access stream info, like out stream state and out stream
cnt.

This patch is to fix it by allocing out stream info when initializing an
asoc, allocing in stream and re-allocing out stream when processing init.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h |  3 ++-
 net/sctp/associola.c       |  7 ++++++-
 net/sctp/sm_make_chunk.c   |  9 ++------
 net/sctp/stream.c          | 43 +++++++++++++++++++++++++++++++-------
 4 files changed, 45 insertions(+), 17 deletions(-)

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 8caa5ee9e290bc..a127b7c2c3c932 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -377,7 +377,8 @@ typedef struct sctp_sender_hb_info {
 	__u64 hb_nonce;
 } sctp_sender_hb_info_t;
 
-struct sctp_stream *sctp_stream_new(__u16 incnt, __u16 outcnt, gfp_t gfp);
+int sctp_stream_new(struct sctp_association *asoc, gfp_t gfp);
+int sctp_stream_init(struct sctp_association *asoc, gfp_t gfp);
 void sctp_stream_free(struct sctp_stream *stream);
 void sctp_stream_clear(struct sctp_stream *stream);
 
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 0439a1a6836784..0b26df5f618856 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -246,6 +246,9 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
 	if (!sctp_ulpq_init(&asoc->ulpq, asoc))
 		goto fail_init;
 
+	if (sctp_stream_new(asoc, gfp))
+		goto fail_init;
+
 	/* Assume that peer would support both address types unless we are
 	 * told otherwise.
 	 */
@@ -264,7 +267,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
 	/* AUTH related initializations */
 	INIT_LIST_HEAD(&asoc->endpoint_shared_keys);
 	if (sctp_auth_asoc_copy_shkeys(ep, asoc, gfp))
-		goto fail_init;
+		goto stream_free;
 
 	asoc->active_key_id = ep->active_key_id;
 	asoc->prsctp_enable = ep->prsctp_enable;
@@ -287,6 +290,8 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
 
 	return asoc;
 
+stream_free:
+	sctp_stream_free(asoc->stream);
 fail_init:
 	sock_put(asoc->base.sk);
 	sctp_endpoint_put(asoc->ep);
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 969a30c7bb5431..118faff6a332ee 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -2460,15 +2460,10 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk,
 	 * association.
 	 */
 	if (!asoc->temp) {
-		int error;
-
-		asoc->stream = sctp_stream_new(asoc->c.sinit_max_instreams,
-					       asoc->c.sinit_num_ostreams, gfp);
-		if (!asoc->stream)
+		if (sctp_stream_init(asoc, gfp))
 			goto clean_up;
 
-		error = sctp_assoc_set_id(asoc, gfp);
-		if (error)
+		if (sctp_assoc_set_id(asoc, gfp))
 			goto clean_up;
 	}
 
diff --git a/net/sctp/stream.c b/net/sctp/stream.c
index 1c6cc04fa3a41f..bbed997e1c5f01 100644
--- a/net/sctp/stream.c
+++ b/net/sctp/stream.c
@@ -35,33 +35,60 @@
 #include <net/sctp/sctp.h>
 #include <net/sctp/sm.h>
 
-struct sctp_stream *sctp_stream_new(__u16 incnt, __u16 outcnt, gfp_t gfp)
+int sctp_stream_new(struct sctp_association *asoc, gfp_t gfp)
 {
 	struct sctp_stream *stream;
 	int i;
 
 	stream = kzalloc(sizeof(*stream), gfp);
 	if (!stream)
-		return NULL;
+		return -ENOMEM;
 
-	stream->outcnt = outcnt;
+	stream->outcnt = asoc->c.sinit_num_ostreams;
 	stream->out = kcalloc(stream->outcnt, sizeof(*stream->out), gfp);
 	if (!stream->out) {
 		kfree(stream);
-		return NULL;
+		return -ENOMEM;
 	}
 	for (i = 0; i < stream->outcnt; i++)
 		stream->out[i].state = SCTP_STREAM_OPEN;
 
-	stream->incnt = incnt;
+	asoc->stream = stream;
+
+	return 0;
+}
+
+int sctp_stream_init(struct sctp_association *asoc, gfp_t gfp)
+{
+	struct sctp_stream *stream = asoc->stream;
+	int i;
+
+	/* Initial stream->out size may be very big, so free it and alloc
+	 * a new one with new outcnt to save memory.
+	 */
+	kfree(stream->out);
+	stream->outcnt = asoc->c.sinit_num_ostreams;
+	stream->out = kcalloc(stream->outcnt, sizeof(*stream->out), gfp);
+	if (!stream->out)
+		goto nomem;
+
+	for (i = 0; i < stream->outcnt; i++)
+		stream->out[i].state = SCTP_STREAM_OPEN;
+
+	stream->incnt = asoc->c.sinit_max_instreams;
 	stream->in = kcalloc(stream->incnt, sizeof(*stream->in), gfp);
 	if (!stream->in) {
 		kfree(stream->out);
-		kfree(stream);
-		return NULL;
+		goto nomem;
 	}
 
-	return stream;
+	return 0;
+
+nomem:
+	asoc->stream = NULL;
+	kfree(stream);
+
+	return -ENOMEM;
 }
 
 void sctp_stream_free(struct sctp_stream *stream)

From 34d04f25a98cbc1065e87f10d94798bbe9a8af94 Mon Sep 17 00:00:00 2001
From: Shaokun Zhang <zhangshaokun@hisilicon.com>
Date: Thu, 30 Mar 2017 20:41:49 +0800
Subject: [PATCH 1841/1911] arm64: remove redundant header file in current.h

Commint 9d84fb27fa1 ("arm64: restore get_current() optimisation") has
removed read_sysreg() and asm/sysreg.h is redundant.

This patch removes asm/sysreg.h header file.

Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Shaokun Zhang <zhangshaokun@hisilicon.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/current.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/arm64/include/asm/current.h b/arch/arm64/include/asm/current.h
index 86c404171305ab..f6580d4afb0e0c 100644
--- a/arch/arm64/include/asm/current.h
+++ b/arch/arm64/include/asm/current.h
@@ -3,8 +3,6 @@
 
 #include <linux/compiler.h>
 
-#include <asm/sysreg.h>
-
 #ifndef __ASSEMBLY__
 
 struct task_struct;

From 9b3403ae56e13cdf45252f34db196a8f5f52b6ac Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Fri, 31 Mar 2017 02:44:17 +0900
Subject: [PATCH 1842/1911] arm64: drop non-existing vdso-offsets.h from
 .gitignore

Since commit a66649dab350 ("arm64: fix vdso-offsets.h dependency"),
include/generated/vdso-offsets.h is directly generated without
arch/arm64/kernel/vdso/vdso-offsets.h.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kernel/vdso/.gitignore | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/arm64/kernel/vdso/.gitignore b/arch/arm64/kernel/vdso/.gitignore
index b8cc94e9698b69..f8b69d84238eb4 100644
--- a/arch/arm64/kernel/vdso/.gitignore
+++ b/arch/arm64/kernel/vdso/.gitignore
@@ -1,2 +1 @@
 vdso.lds
-vdso-offsets.h

From 923713b357455cfb9aca2cd3429cb0806a724ed2 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sun, 26 Mar 2017 13:14:45 +0200
Subject: [PATCH 1843/1911] mmc: sdhci: Disable runtime pm when the sdio_irq is
 enabled

SDIO cards may need clock to send the card interrupt to the host.

On a cherrytrail tablet with a RTL8723BS wifi chip, without this patch
pinging the tablet results in:

PING 192.168.1.14 (192.168.1.14) 56(84) bytes of data.
64 bytes from 192.168.1.14: icmp_seq=1 ttl=64 time=78.6 ms
64 bytes from 192.168.1.14: icmp_seq=2 ttl=64 time=1760 ms
64 bytes from 192.168.1.14: icmp_seq=3 ttl=64 time=753 ms
64 bytes from 192.168.1.14: icmp_seq=4 ttl=64 time=3.88 ms
64 bytes from 192.168.1.14: icmp_seq=5 ttl=64 time=795 ms
64 bytes from 192.168.1.14: icmp_seq=6 ttl=64 time=1841 ms
64 bytes from 192.168.1.14: icmp_seq=7 ttl=64 time=810 ms
64 bytes from 192.168.1.14: icmp_seq=8 ttl=64 time=1860 ms
64 bytes from 192.168.1.14: icmp_seq=9 ttl=64 time=812 ms
64 bytes from 192.168.1.14: icmp_seq=10 ttl=64 time=48.6 ms

Where as with this patch I get:

PING 192.168.1.14 (192.168.1.14) 56(84) bytes of data.
64 bytes from 192.168.1.14: icmp_seq=1 ttl=64 time=3.96 ms
64 bytes from 192.168.1.14: icmp_seq=2 ttl=64 time=1.97 ms
64 bytes from 192.168.1.14: icmp_seq=3 ttl=64 time=17.2 ms
64 bytes from 192.168.1.14: icmp_seq=4 ttl=64 time=2.46 ms
64 bytes from 192.168.1.14: icmp_seq=5 ttl=64 time=2.83 ms
64 bytes from 192.168.1.14: icmp_seq=6 ttl=64 time=1.40 ms
64 bytes from 192.168.1.14: icmp_seq=7 ttl=64 time=2.10 ms
64 bytes from 192.168.1.14: icmp_seq=8 ttl=64 time=1.40 ms
64 bytes from 192.168.1.14: icmp_seq=9 ttl=64 time=2.04 ms
64 bytes from 192.168.1.14: icmp_seq=10 ttl=64 time=1.40 ms

Cc: Dong Aisheng <b29396@freescale.com>
Cc: Ian W MORRISON <ianwmorrison@gmail.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Acked-by: Dong Aisheng <aisheng.dong@nxp.com>
Cc: stable@vger.kernel.org
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 9c1a099afbbeca..63bc33a54d0dd8 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -1830,6 +1830,9 @@ static void sdhci_enable_sdio_irq(struct mmc_host *mmc, int enable)
 	struct sdhci_host *host = mmc_priv(mmc);
 	unsigned long flags;
 
+	if (enable)
+		pm_runtime_get_noresume(host->mmc->parent);
+
 	spin_lock_irqsave(&host->lock, flags);
 	if (enable)
 		host->flags |= SDHCI_SDIO_IRQ_ENABLED;
@@ -1838,6 +1841,9 @@ static void sdhci_enable_sdio_irq(struct mmc_host *mmc, int enable)
 
 	sdhci_enable_sdio_irq_nolock(host, enable);
 	spin_unlock_irqrestore(&host->lock, flags);
+
+	if (!enable)
+		pm_runtime_put_noidle(host->mmc->parent);
 }
 
 static int sdhci_start_signal_voltage_switch(struct mmc_host *mmc,

From d0918764c17b94c30bbb2619929b1719ff52707a Mon Sep 17 00:00:00 2001
From: Ludovic Desroches <ludovic.desroches@microchip.com>
Date: Tue, 28 Mar 2017 11:00:45 +0200
Subject: [PATCH 1844/1911] mmc: sdhci-of-at91: fix MMC_DDR_52 timing selection

The controller has different timings for MMC_TIMING_UHS_DDR50 and
MMC_TIMING_MMC_DDR52. Configuring the controller with SDHCI_CTRL_UHS_DDR50,
when MMC_TIMING_MMC_DDR52 timings are requested, is not correct and can
lead to unexpected behavior.

Signed-off-by: Ludovic Desroches <ludovic.desroches@microchip.com>
Fixes: bb5f8ea4d514 ("mmc: sdhci-of-at91: introduce driver for the Atmel SDMMC")
Cc: <stable@vger.kernel.org> # 4.4+
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-of-at91.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/mmc/host/sdhci-of-at91.c b/drivers/mmc/host/sdhci-of-at91.c
index 7fd964256faa7d..d5430ed02a6789 100644
--- a/drivers/mmc/host/sdhci-of-at91.c
+++ b/drivers/mmc/host/sdhci-of-at91.c
@@ -29,6 +29,8 @@
 
 #include "sdhci-pltfm.h"
 
+#define SDMMC_MC1R	0x204
+#define		SDMMC_MC1R_DDR		BIT(3)
 #define SDMMC_CACR	0x230
 #define		SDMMC_CACR_CAPWREN	BIT(0)
 #define		SDMMC_CACR_KEY		(0x46 << 8)
@@ -103,11 +105,18 @@ static void sdhci_at91_set_power(struct sdhci_host *host, unsigned char mode,
 	sdhci_set_power_noreg(host, mode, vdd);
 }
 
+void sdhci_at91_set_uhs_signaling(struct sdhci_host *host, unsigned int timing)
+{
+	if (timing == MMC_TIMING_MMC_DDR52)
+		sdhci_writeb(host, SDMMC_MC1R_DDR, SDMMC_MC1R);
+	sdhci_set_uhs_signaling(host, timing);
+}
+
 static const struct sdhci_ops sdhci_at91_sama5d2_ops = {
 	.set_clock		= sdhci_at91_set_clock,
 	.set_bus_width		= sdhci_set_bus_width,
 	.reset			= sdhci_reset,
-	.set_uhs_signaling	= sdhci_set_uhs_signaling,
+	.set_uhs_signaling	= sdhci_at91_set_uhs_signaling,
 	.set_power		= sdhci_at91_set_power,
 };
 

From 2b83878dd74a7c73bedcb6600663c1c46836e8af Mon Sep 17 00:00:00 2001
From: Max Filippov <jcmvbkbc@gmail.com>
Date: Wed, 29 Mar 2017 15:44:47 -0700
Subject: [PATCH 1845/1911] xtensa: make __pa work with uncached KSEG addresses

When __pa is applied to virtual address in uncached KSEG region the
result is incorrect. Fix it by checking if the original address is in
the uncached KSEG and adjusting the result. It looks better than masking
off bits because pfn_valid would correctly work with new __pa results
and it may be made working in noMMU case, once we get definition for
uncached memory view.

This is required for the dma_common_mmap and DMA debug code to work
correctly: they both indirectly use __pa with coherent DMA addresses.
In case of DMA debug the visible effect is false reports that an address
mapped for DMA is accessed by CPU.

Cc: stable@vger.kernel.org
Tested-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Reviewed-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/xtensa/include/asm/page.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/arch/xtensa/include/asm/page.h b/arch/xtensa/include/asm/page.h
index 976b1d70edbc0a..4ddbfd57a7c824 100644
--- a/arch/xtensa/include/asm/page.h
+++ b/arch/xtensa/include/asm/page.h
@@ -164,8 +164,21 @@ void copy_user_highpage(struct page *to, struct page *from,
 
 #define ARCH_PFN_OFFSET		(PHYS_OFFSET >> PAGE_SHIFT)
 
+#ifdef CONFIG_MMU
+static inline unsigned long ___pa(unsigned long va)
+{
+	unsigned long off = va - PAGE_OFFSET;
+
+	if (off >= XCHAL_KSEG_SIZE)
+		off -= XCHAL_KSEG_SIZE;
+
+	return off + PHYS_OFFSET;
+}
+#define __pa(x)	___pa((unsigned long)(x))
+#else
 #define __pa(x)	\
 	((unsigned long) (x) - PAGE_OFFSET + PHYS_OFFSET)
+#endif
 #define __va(x)	\
 	((void *)((unsigned long) (x) - PHYS_OFFSET + PAGE_OFFSET))
 #define pfn_valid(pfn) \

From 0b98ca2a45e35dfe02f4512fa30b9f5a9900cb29 Mon Sep 17 00:00:00 2001
From: Suresh Reddy <suresh.reddy@broadcom.com>
Date: Thu, 30 Mar 2017 00:58:32 -0400
Subject: [PATCH 1846/1911] be2net: Fix endian issue in logical link config
 command

Use cpu_to_le32() for link_config variable in set_logical_link_config
command as this variable is of type u32.

Signed-off-by: Suresh Reddy <suresh.reddy@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/emulex/benet/be_cmds.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c
index 30e855004c5759..02dd5246dfae9a 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.c
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.c
@@ -4939,8 +4939,9 @@ static int
 __be_cmd_set_logical_link_config(struct be_adapter *adapter,
 				 int link_state, int version, u8 domain)
 {
-	struct be_mcc_wrb *wrb;
 	struct be_cmd_req_set_ll_link *req;
+	struct be_mcc_wrb *wrb;
+	u32 link_config = 0;
 	int status;
 
 	mutex_lock(&adapter->mcc_lock);
@@ -4962,10 +4963,12 @@ __be_cmd_set_logical_link_config(struct be_adapter *adapter,
 
 	if (link_state == IFLA_VF_LINK_STATE_ENABLE ||
 	    link_state == IFLA_VF_LINK_STATE_AUTO)
-		req->link_config |= PLINK_ENABLE;
+		link_config |= PLINK_ENABLE;
 
 	if (link_state == IFLA_VF_LINK_STATE_AUTO)
-		req->link_config |= PLINK_TRACK;
+		link_config |= PLINK_TRACK;
+
+	req->link_config = cpu_to_le32(link_config);
 
 	status = be_mcc_notify_wait(adapter);
 err:

From c70c473396cbdec1168a6eff60e13029c0916854 Mon Sep 17 00:00:00 2001
From: Alexey Brodkin <abrodkin@synopsys.com>
Date: Wed, 29 Mar 2017 17:15:11 +0300
Subject: [PATCH 1847/1911] ARCv2: SLC: Make sure busy bit is set properly on
 SLC flushing

As reported in STAR 9001165532, an SLC control reg read (for checking
busy state) right after SLC invalidate command may incorrectly return
NOT busy causing software to NOT spin-wait while operation is underway.
(and for some reason this only happens if L1 cache is also disabled - as
required by IOC programming model)

Suggested workaround is to do an additional Control Reg read, which
ensures the 2nd read gets the right status.

Cc: stable@vger.kernel.org  #4.10
Signed-off-by: Alexey Brodkin <abrodkin@synopsys.com>
[vgupta: reworte changelog a bit]
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/mm/cache.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c
index d408fa21a07c99..928562967f3cd0 100644
--- a/arch/arc/mm/cache.c
+++ b/arch/arc/mm/cache.c
@@ -633,6 +633,9 @@ noinline static void slc_entire_op(const int op)
 
 	write_aux_reg(ARC_REG_SLC_INVALIDATE, 1);
 
+	/* Make sure "busy" bit reports correct stataus, see STAR 9001165532 */
+	read_aux_reg(r);
+
 	/* Important to wait for flush to complete */
 	while (read_aux_reg(r) & SLC_CTRL_BUSY);
 }

From 4c6fabda1ad1dec6d274c098ef0a91809c74f2e3 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Thu, 30 Mar 2017 10:02:57 -0700
Subject: [PATCH 1848/1911] ARC: fix build warnings with !CONFIG_KPROBES

|   CC      lib/nmi_backtrace.o
| In file included from ../include/linux/kprobes.h:43:0,
|                  from ../lib/nmi_backtrace.c:17:
| ../arch/arc/include/asm/kprobes.h:57:13: warning: 'trap_is_kprobe' defined but not used [-Wunused-function]
|  static void trap_is_kprobe(unsigned long address, struct pt_regs *regs)
|              ^~~~~~~~~~~~~~

The warning started with 7d134b2ce6 ("kprobes: move kprobe declarations
to asm-generic/kprobes.h") which started including <asm/kprobes.h>
unconditionally into <linux/kprobes.h> exposing a stub function for
!CONFIG_KPROBES to rest of world. Fix that by making the stub a macro

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/include/asm/kprobes.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/arch/arc/include/asm/kprobes.h b/arch/arc/include/asm/kprobes.h
index 00bdbe167615ec..2e52d18e6bc7ee 100644
--- a/arch/arc/include/asm/kprobes.h
+++ b/arch/arc/include/asm/kprobes.h
@@ -54,9 +54,7 @@ int kprobe_fault_handler(struct pt_regs *regs, unsigned long cause);
 void kretprobe_trampoline(void);
 void trap_is_kprobe(unsigned long address, struct pt_regs *regs);
 #else
-static void trap_is_kprobe(unsigned long address, struct pt_regs *regs)
-{
-}
+#define trap_is_kprobe(address, regs)
 #endif /* CONFIG_KPROBES */
 
 #endif /* _ARC_KPROBES_H */

From 29f72ce3e4d18066ec75c79c857bee0618a3504b Mon Sep 17 00:00:00 2001
From: Yazen Ghannam <yazen.ghannam@amd.com>
Date: Thu, 30 Mar 2017 13:17:14 +0200
Subject: [PATCH 1849/1911] x86/mce/AMD: Give a name to MCA bank 3 when
 accessed with legacy MSRs

MCA bank 3 is reserved on systems pre-Fam17h, so it didn't have a name.
However, MCA bank 3 is defined on Fam17h systems and can be accessed
using legacy MSRs. Without a name we get a stack trace on Fam17h systems
when trying to register sysfs files for bank 3 on kernels that don't
recognize Scalable MCA.

Call MCA bank 3 "decode_unit" since this is what it represents on
Fam17h. This will allow kernels without SMCA support to see this bank on
Fam17h+ and prevent the stack trace. This will not affect older systems
since this bank is reserved on them, i.e. it'll be ignored.

Tested on AMD Fam15h and Fam17h systems.

  WARNING: CPU: 26 PID: 1 at lib/kobject.c:210 kobject_add_internal
  kobject: (ffff88085bb256c0): attempted to be registered with empty name!
  ...
  Call Trace:
   kobject_add_internal
   kobject_add
   kobject_create_and_add
   threshold_create_device
   threshold_init_device

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: http://lkml.kernel.org/r/1490102285-3659-1-git-send-email-Yazen.Ghannam@amd.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/mcheck/mce_amd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 524cc5780a7796..6e4a047e4b684b 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -60,7 +60,7 @@ static const char * const th_names[] = {
 	"load_store",
 	"insn_fetch",
 	"combined_unit",
-	"",
+	"decode_unit",
 	"northbridge",
 	"execution_unit",
 };

From 6b1cc946ddfcfc17d66c7d02eafa14deeb183437 Mon Sep 17 00:00:00 2001
From: Zhengyi Shen <shenzhengyi@gmail.com>
Date: Wed, 29 Mar 2017 15:00:20 +0800
Subject: [PATCH 1850/1911] x86/boot: Include missing header file

Sparse complains about missing forward declarations:

arch/x86/boot/compressed/error.c:8:6:
	warning: symbol 'warn' was not declared. Should it be static?
arch/x86/boot/compressed/error.c:15:6:
	warning: symbol 'error' was not declared. Should it be static?

Include the missing header file.

Signed-off-by: Zhengyi Shen <shenzhengyi@gmail.com>
Acked-by: Kess Cook <keescook@chromium.org>
Link: http://lkml.kernel.org/r/1490770820-24472-1-git-send-email-shenzhengyi@gmail.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/boot/compressed/error.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/boot/compressed/error.c b/arch/x86/boot/compressed/error.c
index 6248740b68b5a0..31922023de4928 100644
--- a/arch/x86/boot/compressed/error.c
+++ b/arch/x86/boot/compressed/error.c
@@ -4,6 +4,7 @@
  * memcpy() and memmove() are defined for the compressed boot environment.
  */
 #include "misc.h"
+#include "error.h"
 
 void warn(char *m)
 {

From 2f726aec19a9d2c63bec9a8a53a3910ffdcd09f8 Mon Sep 17 00:00:00 2001
From: Hui Wang <hui.wang@canonical.com>
Date: Fri, 31 Mar 2017 10:31:40 +0800
Subject: [PATCH 1851/1911] ALSA: hda - fix a problem for lineout on a Dell AIO
 machine

On this Dell AIO machine, the lineout jack does not work.

We found the pin 0x1a is assigned to lineout on this machine, and in
the past, we applied ALC298_FIXUP_DELL1_MIC_NO_PRESENCE to fix the
heaset-set mic problem for this machine, this fixup will redefine
the pin 0x1a to headphone-mic, as a result the lineout doesn't
work anymore.

After consulting with Dell, they told us this machine doesn't support
microphone via headset jack, so we add a new fixup which only defines
the pin 0x18 as the headset-mic.

[rearranged the fixup insertion position by tiwai in order to make the
 merge with other branches easier -- tiwai]

Fixes: 59ec4b57bcae ("ALSA: hda - Fix headset mic detection problem for two dell machines")
Cc: <stable@vger.kernel.org>
Signed-off-by: Hui Wang <hui.wang@canonical.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 7f989898cbd9aa..299835d1fbaadb 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -4858,6 +4858,7 @@ enum {
 	ALC292_FIXUP_DISABLE_AAMIX,
 	ALC293_FIXUP_DISABLE_AAMIX_MULTIJACK,
 	ALC298_FIXUP_DELL1_MIC_NO_PRESENCE,
+	ALC298_FIXUP_DELL_AIO_MIC_NO_PRESENCE,
 	ALC275_FIXUP_DELL_XPS,
 	ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE,
 	ALC293_FIXUP_LENOVO_SPK_NOISE,
@@ -5470,6 +5471,15 @@ static const struct hda_fixup alc269_fixups[] = {
 		.chained = true,
 		.chain_id = ALC269_FIXUP_HEADSET_MODE
 	},
+	[ALC298_FIXUP_DELL_AIO_MIC_NO_PRESENCE] = {
+		.type = HDA_FIXUP_PINS,
+		.v.pins = (const struct hda_pintbl[]) {
+			{ 0x18, 0x01a1913c }, /* use as headset mic, without its own jack detect */
+			{ }
+		},
+		.chained = true,
+		.chain_id = ALC269_FIXUP_HEADSET_MODE
+	},
 	[ALC275_FIXUP_DELL_XPS] = {
 		.type = HDA_FIXUP_VERBS,
 		.v.verbs = (const struct hda_verb[]) {
@@ -5542,7 +5552,7 @@ static const struct hda_fixup alc269_fixups[] = {
 		.type = HDA_FIXUP_FUNC,
 		.v.func = alc298_fixup_speaker_volume,
 		.chained = true,
-		.chain_id = ALC298_FIXUP_DELL1_MIC_NO_PRESENCE,
+		.chain_id = ALC298_FIXUP_DELL_AIO_MIC_NO_PRESENCE,
 	},
 	[ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER] = {
 		.type = HDA_FIXUP_PINS,

From 42969893b45a7a1864192f5634a8d2626e546a7b Mon Sep 17 00:00:00 2001
From: Matt Redfearn <matt.redfearn@imgtec.com>
Date: Fri, 31 Mar 2017 12:05:32 +0100
Subject: [PATCH 1852/1911] irqchip/mips-gic: Fix Local compare interrupt

Commit 4cfffcfa5106 ("irqchip/mips-gic: Fix local interrupts") added
mapping of several local interrupts during initialisation of the gic
driver. This associates virq numbers with these interrupts.
Unfortunately, as not all of the interrupts are mapped in hardware
order, when drivers subsequently request these interrupts they conflict
with the mappings that have already been set up. For example, this
manifests itself in the gic clocksource driver, which fails to probe
with the message:

clocksource: GIC: mask: 0xffffffffffffffff max_cycles: 0x7350c9738,
max_idle_ns: 440795203769 ns
GIC timer IRQ 25 setup failed: -22

This is because virq 25 (the correct IRQ number specified via device
tree) was allocated to the PERFCTR interrupt (and 24 to the timer, 26 to
the FDC). To fix this, map all of these local interrupts in the hardware
order so as to associate their virq numbers with the correct hw
interrupts.

Fixes: 4cfffcfa5106 ("irqchip/mips-gic: Fix local interrupts")
Acked-by: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Matt Redfearn <matt.redfearn@imgtec.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 drivers/irqchip/irq-mips-gic.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c
index 11d12bccc4e7f1..cd20df12d63d98 100644
--- a/drivers/irqchip/irq-mips-gic.c
+++ b/drivers/irqchip/irq-mips-gic.c
@@ -991,8 +991,12 @@ static void __init gic_map_single_int(struct device_node *node,
 
 static void __init gic_map_interrupts(struct device_node *node)
 {
+	gic_map_single_int(node, GIC_LOCAL_INT_WD);
+	gic_map_single_int(node, GIC_LOCAL_INT_COMPARE);
 	gic_map_single_int(node, GIC_LOCAL_INT_TIMER);
 	gic_map_single_int(node, GIC_LOCAL_INT_PERFCTR);
+	gic_map_single_int(node, GIC_LOCAL_INT_SWINT0);
+	gic_map_single_int(node, GIC_LOCAL_INT_SWINT1);
 	gic_map_single_int(node, GIC_LOCAL_INT_FDC);
 }
 

From a6040bc610554c66088fda3608ae5d6307c548e4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Mon, 20 Mar 2017 10:05:38 +0100
Subject: [PATCH 1853/1911] serial: mxs-auart: Fix baudrate calculation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The reference manual for the i.MX28 recommends to calculate the divisor
as

	divisor = (UARTCLK * 32) / baud rate, rounded to the nearest integer

, so let's do this. For a typical setup of UARTCLK = 24 MHz and baud
rate = 115200 this changes the divisor from 6666 to 6667 and so the
actual baud rate improves from 115211.521 Bd (error ≅ 0.01 %) to
115194.240 Bd (error ≅ 0.005 %).

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/mxs-auart.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/tty/serial/mxs-auart.c b/drivers/tty/serial/mxs-auart.c
index 6989b227d1349b..be94246b6fcca1 100644
--- a/drivers/tty/serial/mxs-auart.c
+++ b/drivers/tty/serial/mxs-auart.c
@@ -1088,7 +1088,7 @@ static void mxs_auart_settermios(struct uart_port *u,
 					AUART_LINECTRL_BAUD_DIV_MAX);
 		baud_max = u->uartclk * 32 / AUART_LINECTRL_BAUD_DIV_MIN;
 		baud = uart_get_baud_rate(u, termios, old, baud_min, baud_max);
-		div = u->uartclk * 32 / baud;
+		div = DIV_ROUND_CLOSEST(u->uartclk * 32, baud);
 	}
 
 	ctrl |= AUART_LINECTRL_BAUD_DIVFRAC(div & 0x3F);

From 31ca2c63fdc0aee725cbd4f207c1256f5deaabde Mon Sep 17 00:00:00 2001
From: Richard Genoud <richard.genoud@gmail.com>
Date: Mon, 20 Mar 2017 11:52:41 +0100
Subject: [PATCH 1854/1911] tty/serial: atmel: fix race condition (TX+DMA)

If uart_flush_buffer() is called between atmel_tx_dma() and
atmel_complete_tx_dma(), the circular buffer has been cleared, but not
atmel_port->tx_len.
That leads to a circular buffer overflow (dumping (UART_XMIT_SIZE -
atmel_port->tx_len) bytes).

Tested-by: Nicolas Ferre <nicolas.ferre@microchip.com>
Signed-off-by: Richard Genoud <richard.genoud@gmail.com>
Cc: stable <stable@vger.kernel.org> # 3.12+
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/atmel_serial.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c
index dcebb28ffbc412..5f644515ed33be 100644
--- a/drivers/tty/serial/atmel_serial.c
+++ b/drivers/tty/serial/atmel_serial.c
@@ -1951,6 +1951,11 @@ static void atmel_flush_buffer(struct uart_port *port)
 		atmel_uart_writel(port, ATMEL_PDC_TCR, 0);
 		atmel_port->pdc_tx.ofs = 0;
 	}
+	/*
+	 * in uart_flush_buffer(), the xmit circular buffer has just
+	 * been cleared, so we have to reset tx_len accordingly.
+	 */
+	atmel_port->tx_len = 0;
 }
 
 /*

From 497e1e16f45c70574dc9922c7f75c642c2162119 Mon Sep 17 00:00:00 2001
From: Nicolas Ferre <nicolas.ferre@microchip.com>
Date: Mon, 20 Mar 2017 16:38:57 +0100
Subject: [PATCH 1855/1911] tty/serial: atmel: fix TX path in
 atmel_console_write()

A side effect of 89d8232411a8 ("tty/serial: atmel_serial: BUG: stop DMA
from transmitting in stop_tx") is that the console can be called with
TX path disabled. Then the system would hang trying to push charecters
out in atmel_console_putchar().

Signed-off-by: Nicolas Ferre <nicolas.ferre@microchip.com>
Fixes: 89d8232411a8 ("tty/serial: atmel_serial: BUG: stop DMA from transmitting
in stop_tx")
Cc: stable <stable@vger.kernel.org>	#4.4+
Acked-by: Richard Genoud <richard.genoud@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/atmel_serial.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c
index 5f644515ed33be..1f50a83ef95860 100644
--- a/drivers/tty/serial/atmel_serial.c
+++ b/drivers/tty/serial/atmel_serial.c
@@ -2488,6 +2488,9 @@ static void atmel_console_write(struct console *co, const char *s, u_int count)
 	pdc_tx = atmel_uart_readl(port, ATMEL_PDC_PTSR) & ATMEL_PDC_TXTEN;
 	atmel_uart_writel(port, ATMEL_PDC_PTCR, ATMEL_PDC_TXTDIS);
 
+	/* Make sure that tx path is actually able to send characters */
+	atmel_uart_writel(port, ATMEL_US_CR, ATMEL_US_TXEN);
+
 	uart_console_write(port, s, count, atmel_console_putchar);
 
 	/*

From 49e1590c2ead870f4ae5568816241c4cb9bc1606 Mon Sep 17 00:00:00 2001
From: Paul Gortmaker <paul.gortmaker@windriver.com>
Date: Mon, 27 Mar 2017 19:39:10 -0400
Subject: [PATCH 1856/1911] serial: 8250_EXAR: fix duplicate Kconfig text and
 add missing help text

In commit d0aeaa83f0b0f7a92615bbdd6b1f96812f7dcfd2 ("serial: exar:
split out the exar code from 8250_pci") the exar driver got its own
Kconfig.  However the text for the new option was never changed from
the original 8250_PCI text, and hence it appears confusing when you
get asked the same question twice:

  8250/16550 PCI device support (SERIAL_8250_PCI) [Y/n/m/?] (NEW)
    8250/16550 PCI device support (SERIAL_8250_EXAR) [Y/n/m] (NEW)

Adding to the confusion, is that there is no help text for this new
option to indicate it is specific to a certain family of cards.

Fix both issues at the same time, as well as the space vs. tab issues
introduced in the same commit.

Fixes: d0aeaa83f0b0 ("serial: exar: split out the exar code from 8250_pci")
Cc: Jiri Slaby <jslaby@suse.com>
Cc: linux-serial@vger.kernel.org
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Acked-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/Kconfig | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/tty/serial/8250/Kconfig b/drivers/tty/serial/8250/Kconfig
index a65fb8197aecb6..0e3f529d50e9d0 100644
--- a/drivers/tty/serial/8250/Kconfig
+++ b/drivers/tty/serial/8250/Kconfig
@@ -128,9 +128,13 @@ config SERIAL_8250_PCI
 	  by the parport_serial driver, enabled with CONFIG_PARPORT_SERIAL.
 
 config SERIAL_8250_EXAR
-        tristate "8250/16550 PCI device support"
-        depends on SERIAL_8250_PCI
+	tristate "8250/16550 Exar/Commtech PCI/PCIe device support"
+	depends on SERIAL_8250_PCI
 	default SERIAL_8250
+	help
+	  This builds support for XR17C1xx, XR17V3xx and some Commtech
+	  422x PCIe serial cards that are not covered by the more generic
+	  SERIAL_8250_PCI option.
 
 config SERIAL_8250_HP300
 	tristate

From 0e3d3e5df07dcf8a50d96e0ecd6ab9a888f55dfc Mon Sep 17 00:00:00 2001
From: Olga Kornievskaia <kolga@netapp.com>
Date: Thu, 30 Mar 2017 13:49:03 -0400
Subject: [PATCH 1857/1911] NFSv4.1 fix infinite loop on IO BAD_STATEID error

Commit 63d63cbf5e03 "NFSv4.1: Don't recheck delegations that
have already been checked" introduced a regression where when a
client received BAD_STATEID error it would not send any TEST_STATEID
and instead go into an infinite loop of resending the IO that caused
the BAD_STATEID.

Fixes: 63d63cbf5e03 ("NFSv4.1: Don't recheck delegations that have already been checked")
Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
Cc: stable@vger.kernel.org # 4.9+
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfs4proc.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index c780d98035ccf7..201ca3f2c4bac1 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2442,17 +2442,14 @@ static void nfs41_check_delegation_stateid(struct nfs4_state *state)
 	}
 
 	nfs4_stateid_copy(&stateid, &delegation->stateid);
-	if (test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) {
+	if (test_bit(NFS_DELEGATION_REVOKED, &delegation->flags) ||
+		!test_and_clear_bit(NFS_DELEGATION_TEST_EXPIRED,
+			&delegation->flags)) {
 		rcu_read_unlock();
 		nfs_finish_clear_delegation_stateid(state, &stateid);
 		return;
 	}
 
-	if (!test_and_clear_bit(NFS_DELEGATION_TEST_EXPIRED, &delegation->flags)) {
-		rcu_read_unlock();
-		return;
-	}
-
 	cred = get_rpccred(delegation->cred);
 	rcu_read_unlock();
 	status = nfs41_test_and_free_expired_stateid(server, &stateid, cred);

From f17f8a14e82cdf34cd6473e3644f3c672b3884f6 Mon Sep 17 00:00:00 2001
From: Tigran Mkrtchyan <tigran.mkrtchyan@desy.de>
Date: Thu, 30 Mar 2017 17:31:18 +0200
Subject: [PATCH 1858/1911] nfs: flexfiles: fix kernel OOPS if MDS returns
 unsupported DS type

this fix aims to fix dereferencing of a mirror in an error state when MDS
returns unsupported DS type (IOW, not v3), which causes the following oops:

[  220.370709] BUG: unable to handle kernel NULL pointer dereference at 0000000000000065
[  220.370842] IP: ff_layout_mirror_valid+0x2d/0x110 [nfs_layout_flexfiles]
[  220.370920] PGD 0

[  220.370972] Oops: 0000 [#1] SMP
[  220.371013] Modules linked in: nfnetlink_queue nfnetlink_log bluetooth nfs_layout_flexfiles rpcsec_gss_krb5 nfsv4 dns_resolver nfs fscache nf_conntrack_netbios_ns nf_conntrack_broadcast xt_CT ip6t_rpfilter ip6t_REJECT nf_reject_ipv6 xt_conntrack ip_set nfnetlink ebtable_nat ebtable_broute bridge stp llc ip6table_raw ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_mangle ip6table_security iptable_raw iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack libcrc32c iptable_mangle iptable_security ebtable_filter ebtables ip6table_filter ip6_tables binfmt_misc intel_rapl x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel btrfs kvm arc4 snd_hda_codec_hdmi iwldvm irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel intel_cstate mac80211 xor uvcvideo
[  220.371814]  videobuf2_vmalloc videobuf2_memops snd_hda_codec_idt mei_wdt videobuf2_v4l2 snd_hda_codec_generic iTCO_wdt ppdev videobuf2_core iTCO_vendor_support dell_rbtn dell_wmi iwlwifi sparse_keymap dell_laptop dell_smbios snd_hda_intel dcdbas videodev snd_hda_codec dell_smm_hwmon snd_hda_core media cfg80211 intel_uncore snd_hwdep raid6_pq snd_seq intel_rapl_perf snd_seq_device joydev i2c_i801 rfkill lpc_ich snd_pcm parport_pc mei_me parport snd_timer dell_smo8800 mei snd shpchp soundcore tpm_tis tpm_tis_core tpm nfsd auth_rpcgss nfs_acl lockd grace sunrpc i915 nouveau mxm_wmi ttm i2c_algo_bit drm_kms_helper crc32c_intel e1000e drm sdhci_pci firewire_ohci sdhci serio_raw mmc_core firewire_core ptp crc_itu_t pps_core wmi fjes video
[  220.372568] CPU: 7 PID: 4988 Comm: cat Not tainted 4.10.5-200.fc25.x86_64 #1
[  220.372647] Hardware name: Dell Inc. Latitude E6520/0J4TFW, BIOS A06 07/11/2011
[  220.372729] task: ffff94791f6ea580 task.stack: ffffb72b88c0c000
[  220.372802] RIP: 0010:ff_layout_mirror_valid+0x2d/0x110 [nfs_layout_flexfiles]
[  220.372883] RSP: 0018:ffffb72b88c0f970 EFLAGS: 00010246
[  220.372945] RAX: 0000000000000000 RBX: ffff9479015ca600 RCX: ffffffffffffffed
[  220.373025] RDX: ffffffffffffffed RSI: ffff9479753dc980 RDI: 0000000000000000
[  220.373104] RBP: ffffb72b88c0f988 R08: 000000000001c980 R09: ffffffffc0ea6112
[  220.373184] R10: ffffef17477d9640 R11: ffff9479753dd6c0 R12: ffff9479211c7440
[  220.373264] R13: ffff9478f45b7790 R14: 0000000000000001 R15: ffff9479015ca600
[  220.373345] FS:  00007f555fa3e700(0000) GS:ffff9479753c0000(0000) knlGS:0000000000000000
[  220.373435] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  220.373506] CR2: 0000000000000065 CR3: 0000000196044000 CR4: 00000000000406e0
[  220.373586] Call Trace:
[  220.373627]  nfs4_ff_layout_prepare_ds+0x5e/0x200 [nfs_layout_flexfiles]
[  220.373708]  ff_layout_pg_init_read+0x81/0x160 [nfs_layout_flexfiles]
[  220.373806]  __nfs_pageio_add_request+0x11f/0x4a0 [nfs]
[  220.373886]  ? nfs_create_request.part.14+0x37/0x330 [nfs]
[  220.373967]  nfs_pageio_add_request+0xb2/0x260 [nfs]
[  220.374042]  readpage_async_filler+0xaf/0x280 [nfs]
[  220.374103]  read_cache_pages+0xef/0x1b0
[  220.374166]  ? nfs_read_completion+0x210/0x210 [nfs]
[  220.374239]  nfs_readpages+0x129/0x200 [nfs]
[  220.374293]  __do_page_cache_readahead+0x1d0/0x2f0
[  220.374352]  ondemand_readahead+0x17d/0x2a0
[  220.374403]  page_cache_sync_readahead+0x2e/0x50
[  220.374460]  generic_file_read_iter+0x6c8/0x950
[  220.374532]  ? nfs_mapping_need_revalidate_inode+0x17/0x40 [nfs]
[  220.374617]  nfs_file_read+0x6e/0xc0 [nfs]
[  220.374670]  __vfs_read+0xe2/0x150
[  220.374715]  vfs_read+0x96/0x130
[  220.374758]  SyS_read+0x55/0xc0
[  220.374801]  entry_SYSCALL_64_fastpath+0x1a/0xa9
[  220.374856] RIP: 0033:0x7f555f570bd0
[  220.374900] RSP: 002b:00007ffeb73e1b38 EFLAGS: 00000246 ORIG_RAX: 0000000000000000
[  220.374986] RAX: ffffffffffffffda RBX: 00007f555f839ae0 RCX: 00007f555f570bd0
[  220.375066] RDX: 0000000000020000 RSI: 00007f555fa41000 RDI: 0000000000000003
[  220.375145] RBP: 0000000000021010 R08: ffffffffffffffff R09: 0000000000000000
[  220.375226] R10: 00007f555fa40010 R11: 0000000000000246 R12: 0000000000022000
[  220.375305] R13: 0000000000021010 R14: 0000000000001000 R15: 0000000000002710
[  220.375386] Code: 66 66 90 55 48 89 e5 41 54 53 49 89 fc 48 83 ec 08 48 85 f6 74 2e 48 8b 4e 30 48 89 f3 48 81 f9 00 f0 ff ff 77 1e 48 85 c9 74 15 <48> 83 79 78 00 b8 01 00 00 00 74 2c 48 83 c4 08 5b 41 5c 5d c3
[  220.375653] RIP: ff_layout_mirror_valid+0x2d/0x110 [nfs_layout_flexfiles] RSP: ffffb72b88c0f970
[  220.375748] CR2: 0000000000000065
[  220.403538] ---[ end trace bcdca752211b7da9 ]---

Signed-off-by: Tigran Mkrtchyan <tigran.mkrtchyan@desy.de>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/flexfilelayout/flexfilelayoutdev.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index 85fde93dff774e..457cfeb1d5c162 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -208,6 +208,10 @@ static bool ff_layout_mirror_valid(struct pnfs_layout_segment *lseg,
 		} else
 			goto outerr;
 	}
+
+	if (IS_ERR(mirror->mirror_ds))
+		goto outerr;
+
 	if (mirror->mirror_ds->ds == NULL) {
 		struct nfs4_deviceid_node *devid;
 		devid = &mirror->mirror_ds->id_node;

From e640cc306388b6f9dc8109d5c5d0550d7e69e5f7 Mon Sep 17 00:00:00 2001
From: Max Filippov <jcmvbkbc@gmail.com>
Date: Fri, 31 Mar 2017 15:58:40 -0700
Subject: [PATCH 1859/1911] xtensa: fix stack dump output

Use %pB in pr_cont format string instead of calling print_symbol
separately. It turns

  [   19.166249] Call Trace:
  [   19.167265]  [<a000e50a>]
  [   19.167843]  __warn+0x92/0xa0
  [   19.169656]  [<a000e554>]
  [   19.170059]  warn_slowpath_fmt+0x3c/0x40
  [   19.171934]  [<a02d5bd8>]

back into

  [   18.123240] Call Trace:
  [   18.125039]  [<a000e4f6>] __warn+0x92/0xa0
  [   18.126961]  [<a000e540>] warn_slowpath_fmt+0x3c/0x40

Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/xtensa/kernel/traps.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c
index c82c43bff2968c..bae697a06a9845 100644
--- a/arch/xtensa/kernel/traps.c
+++ b/arch/xtensa/kernel/traps.c
@@ -483,10 +483,8 @@ void show_regs(struct pt_regs * regs)
 
 static int show_trace_cb(struct stackframe *frame, void *data)
 {
-	if (kernel_text_address(frame->pc)) {
-		pr_cont(" [<%08lx>]", frame->pc);
-		print_symbol(" %s\n", frame->pc);
-	}
+	if (kernel_text_address(frame->pc))
+		pr_cont(" [<%08lx>] %pB\n", frame->pc, (void *)frame->pc);
 	return 0;
 }
 

From 1493aa65ad076293722908f03bab3d4bf3dc3638 Mon Sep 17 00:00:00 2001
From: Max Filippov <jcmvbkbc@gmail.com>
Date: Fri, 31 Mar 2017 16:26:21 -0700
Subject: [PATCH 1860/1911] xtensa: wire up statx system call

Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/xtensa/include/uapi/asm/unistd.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/xtensa/include/uapi/asm/unistd.h b/arch/xtensa/include/uapi/asm/unistd.h
index cd400af4a6b255..6be7eb27fd29d6 100644
--- a/arch/xtensa/include/uapi/asm/unistd.h
+++ b/arch/xtensa/include/uapi/asm/unistd.h
@@ -774,7 +774,10 @@ __SYSCALL(349, sys_pkey_alloc, 2)
 #define __NR_pkey_free				350
 __SYSCALL(350, sys_pkey_free, 1)
 
-#define __NR_syscall_count			351
+#define __NR_statx				351
+__SYSCALL(351, sys_statx, 5)
+
+#define __NR_syscall_count			352
 
 /*
  * sysxtensa syscall handler

From 4b0ece6fa0167b22c004ff69e137dc94ee2e469e Mon Sep 17 00:00:00 2001
From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Date: Fri, 31 Mar 2017 15:11:44 -0700
Subject: [PATCH 1861/1911] mm: migrate: fix remove_migration_pte() for ksm
 pages

I found that calling page migration for ksm pages causes the following
bug:

    page:ffffea0004d51180 count:2 mapcount:2 mapping:ffff88013c785141 index:0x913
    flags: 0x57ffffc0040068(uptodate|lru|active|swapbacked)
    raw: 0057ffffc0040068 ffff88013c785141 0000000000000913 0000000200000001
    raw: ffffea0004d5f9e0 ffffea0004d53f60 0000000000000000 ffff88007d81b800
    page dumped because: VM_BUG_ON_PAGE(!PageLocked(page))
    page->mem_cgroup:ffff88007d81b800
    ------------[ cut here ]------------
    kernel BUG at /src/linux-dev/mm/rmap.c:1086!
    invalid opcode: 0000 [#1] SMP
    Modules linked in: ppdev parport_pc virtio_balloon i2c_piix4 pcspkr parport i2c_core acpi_cpufreq ip_tables xfs libcrc32c ata_generic pata_acpi ata_piix 8139too libata virtio_blk 8139cp crc32c_intel mii virtio_pci virtio_ring serio_raw virtio floppy dm_mirror dm_region_hash dm_log dm_mod
    CPU: 0 PID: 3162 Comm: bash Not tainted 4.11.0-rc2-mm1+ #1
    Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
    RIP: 0010:do_page_add_anon_rmap+0x1ba/0x260
    RSP: 0018:ffffc90002473b30 EFLAGS: 00010282
    RAX: 0000000000000021 RBX: ffffea0004d51180 RCX: 0000000000000006
    RDX: 0000000000000000 RSI: 0000000000000082 RDI: ffff88007dc0dfe0
    RBP: ffffc90002473b58 R08: 00000000fffffffe R09: 00000000000001c1
    R10: 0000000000000005 R11: 00000000000001c0 R12: ffff880139ab3d80
    R13: 0000000000000000 R14: 0000700000000200 R15: 0000160000000000
    FS:  00007f5195f50740(0000) GS:ffff88007dc00000(0000) knlGS:0000000000000000
    CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
    CR2: 00007fd450287000 CR3: 000000007a08e000 CR4: 00000000001406f0
    Call Trace:
     page_add_anon_rmap+0x18/0x20
     remove_migration_pte+0x220/0x2c0
     rmap_walk_ksm+0x143/0x220
     rmap_walk+0x55/0x60
     remove_migration_ptes+0x53/0x80
     migrate_pages+0x8ed/0xb60
     soft_offline_page+0x309/0x8d0
     store_soft_offline_page+0xaf/0xf0
     dev_attr_store+0x18/0x30
     sysfs_kf_write+0x3a/0x50
     kernfs_fop_write+0xff/0x180
     __vfs_write+0x37/0x160
     vfs_write+0xb2/0x1b0
     SyS_write+0x55/0xc0
     do_syscall_64+0x67/0x180
     entry_SYSCALL64_slow_path+0x25/0x25
    RIP: 0033:0x7f51956339e0
    RSP: 002b:00007ffcfa0dffc8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
    RAX: ffffffffffffffda RBX: 000000000000000c RCX: 00007f51956339e0
    RDX: 000000000000000c RSI: 00007f5195f53000 RDI: 0000000000000001
    RBP: 00007f5195f53000 R08: 000000000000000a R09: 00007f5195f50740
    R10: 000000000000000b R11: 0000000000000246 R12: 00007f5195907400
    R13: 000000000000000c R14: 0000000000000001 R15: 0000000000000000
    Code: fe ff ff 48 81 c2 00 02 00 00 48 89 55 d8 e8 2e c3 fd ff 48 8b 55 d8 e9 42 ff ff ff 48 c7 c6 e0 52 a1 81 48 89 df e8 46 ad fe ff <0f> 0b 48 83 e8 01 e9 7f fe ff ff 48 83 e8 01 e9 96 fe ff ff 48
    RIP: do_page_add_anon_rmap+0x1ba/0x260 RSP: ffffc90002473b30
    ---[ end trace a679d00f4af2df48 ]---
    Kernel panic - not syncing: Fatal exception
    Kernel Offset: disabled
    ---[ end Kernel panic - not syncing: Fatal exception

The problem is in the following lines:

    new = page - pvmw.page->index +
        linear_page_index(vma, pvmw.address);

The 'new' is calculated with 'page' which is given by the caller as a
destination page and some offset adjustment for thp.  But this doesn't
properly work for ksm pages because pvmw.page->index doesn't change for
each address but linear_page_index() changes, which means that 'new'
points to different pages for each addresses backed by the ksm page.  As
a result, we try to set totally unrelated pages as destination pages,
and that causes kernel crash.

This patch fixes the miscalculation and makes ksm page migration work
fine.

Fixes: 3fe87967c536 ("mm: convert remove_migration_pte() to use page_vma_mapped_walk()")
Link: http://lkml.kernel.org/r/1489717683-29905-1-git-send-email-n-horiguchi@ah.jp.nec.com
Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/migrate.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/mm/migrate.c b/mm/migrate.c
index 9a0897a14d37be..ed97c2c14fa80b 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -209,8 +209,11 @@ static int remove_migration_pte(struct page *page, struct vm_area_struct *vma,
 
 	VM_BUG_ON_PAGE(PageTail(page), page);
 	while (page_vma_mapped_walk(&pvmw)) {
-		new = page - pvmw.page->index +
-			linear_page_index(vma, pvmw.address);
+		if (PageKsm(page))
+			new = page;
+		else
+			new = page - pvmw.page->index +
+				linear_page_index(vma, pvmw.address);
 
 		get_page(new);
 		pte = pte_mkold(mk_pte(new, READ_ONCE(vma->vm_page_prot)));

From 597b7305dd8bafdb3aef4957d97128bc90af8e9f Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Fri, 31 Mar 2017 15:11:47 -0700
Subject: [PATCH 1862/1911] mm: move mm_percpu_wq initialization earlier

Yang Li has reported that drain_all_pages triggers a WARN_ON which means
that this function is called earlier than the mm_percpu_wq is
initialized on arm64 with CMA configured:

  WARNING: CPU: 2 PID: 1 at mm/page_alloc.c:2423 drain_all_pages+0x244/0x25c
  Modules linked in:
  CPU: 2 PID: 1 Comm: swapper/0 Not tainted 4.11.0-rc1-next-20170310-00027-g64dfbc5 #127
  Hardware name: Freescale Layerscape 2088A RDB Board (DT)
  task: ffffffc07c4a6d00 task.stack: ffffffc07c4a8000
  PC is at drain_all_pages+0x244/0x25c
  LR is at start_isolate_page_range+0x14c/0x1f0
  [...]
   drain_all_pages+0x244/0x25c
   start_isolate_page_range+0x14c/0x1f0
   alloc_contig_range+0xec/0x354
   cma_alloc+0x100/0x1fc
   dma_alloc_from_contiguous+0x3c/0x44
   atomic_pool_init+0x7c/0x208
   arm64_dma_init+0x44/0x4c
   do_one_initcall+0x38/0x128
   kernel_init_freeable+0x1a0/0x240
   kernel_init+0x10/0xfc
   ret_from_fork+0x10/0x20

Fix this by moving the whole setup_vmstat which is an initcall right now
to init_mm_internals which will be called right after the WQ subsystem
is initialized.

Link: http://lkml.kernel.org/r/20170315164021.28532-1-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Reported-by: Yang Li <pku.leo@gmail.com>
Tested-by: Yang Li <pku.leo@gmail.com>
Tested-by: Xiaolong Ye <xiaolong.ye@intel.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 2 ++
 init/main.c        | 2 ++
 mm/vmstat.c        | 4 +---
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 5f01c88f0800da..00a8fa7e366a03 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -32,6 +32,8 @@ struct user_struct;
 struct writeback_control;
 struct bdi_writeback;
 
+void init_mm_internals(void);
+
 #ifndef CONFIG_NEED_MULTIPLE_NODES	/* Don't use mapnrs, do it properly */
 extern unsigned long max_mapnr;
 
diff --git a/init/main.c b/init/main.c
index f9c9d994820327..b0c11cbf5ddf8a 100644
--- a/init/main.c
+++ b/init/main.c
@@ -1022,6 +1022,8 @@ static noinline void __init kernel_init_freeable(void)
 
 	workqueue_init();
 
+	init_mm_internals();
+
 	do_pre_smp_initcalls();
 	lockup_detector_init();
 
diff --git a/mm/vmstat.c b/mm/vmstat.c
index b1947f0cbee2f9..89f95396ec46be 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1764,7 +1764,7 @@ static int vmstat_cpu_dead(unsigned int cpu)
 
 #endif
 
-static int __init setup_vmstat(void)
+void __init init_mm_internals(void)
 {
 #ifdef CONFIG_SMP
 	int ret;
@@ -1792,9 +1792,7 @@ static int __init setup_vmstat(void)
 	proc_create("vmstat", S_IRUGO, NULL, &proc_vmstat_file_operations);
 	proc_create("zoneinfo", S_IRUGO, NULL, &proc_zoneinfo_file_operations);
 #endif
-	return 0;
 }
-module_init(setup_vmstat)
 
 #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)
 

From 553af430e7c981e6e8fa5007c5b7b5773acc63dd Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Fri, 31 Mar 2017 15:11:50 -0700
Subject: [PATCH 1863/1911] mm: rmap: fix huge file mmap accounting in the
 memcg stats

Huge pages are accounted as single units in the memcg's "file_mapped"
counter.  Account the correct number of base pages, like we do in the
corresponding node counter.

Link: http://lkml.kernel.org/r/20170322005111.3156-1-hannes@cmpxchg.org
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: <stable@vger.kernel.org>	[4.8+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 6 ++++++
 mm/rmap.c                  | 4 ++--
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 5af37730388074..bb7250c45cb835 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -740,6 +740,12 @@ static inline bool mem_cgroup_oom_synchronize(bool wait)
 	return false;
 }
 
+static inline void mem_cgroup_update_page_stat(struct page *page,
+					       enum mem_cgroup_stat_index idx,
+					       int nr)
+{
+}
+
 static inline void mem_cgroup_inc_page_stat(struct page *page,
 					    enum mem_cgroup_stat_index idx)
 {
diff --git a/mm/rmap.c b/mm/rmap.c
index 49ed681ccc7b01..f6838015810f56 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1159,7 +1159,7 @@ void page_add_file_rmap(struct page *page, bool compound)
 			goto out;
 	}
 	__mod_node_page_state(page_pgdat(page), NR_FILE_MAPPED, nr);
-	mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED);
+	mem_cgroup_update_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED, nr);
 out:
 	unlock_page_memcg(page);
 }
@@ -1199,7 +1199,7 @@ static void page_remove_file_rmap(struct page *page, bool compound)
 	 * pte lock(a spinlock) is held, which implies preemption disabled.
 	 */
 	__mod_node_page_state(page_pgdat(page), NR_FILE_MAPPED, -nr);
-	mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED);
+	mem_cgroup_update_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED, -nr);
 
 	if (unlikely(PageMlocked(page)))
 		clear_page_mlock(page);

From 0cefabdaf757a6455d75f00cb76874e62703ed18 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Fri, 31 Mar 2017 15:11:52 -0700
Subject: [PATCH 1864/1911] mm: workingset: fix premature shadow node shrinking
 with cgroups

Commit 0a6b76dd23fa ("mm: workingset: make shadow node shrinker memcg
aware") enabled cgroup-awareness in the shadow node shrinker, but forgot
to also enable cgroup-awareness in the list_lru the shadow nodes sit on.

Consequently, all shadow nodes are sitting on a global (per-NUMA node)
list, while the shrinker applies the limits according to the amount of
cache in the cgroup its shrinking.  The result is excessive pressure on
the shadow nodes from cgroups that have very little cache.

Enable memcg-mode on the shadow node LRUs, such that per-cgroup limits
are applied to per-cgroup lists.

Fixes: 0a6b76dd23fa ("mm: workingset: make shadow node shrinker memcg aware")
Link: http://lkml.kernel.org/r/20170322005320.8165-1-hannes@cmpxchg.org
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Vladimir Davydov <vdavydov@tarantool.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: <stable@vger.kernel.org>	[4.6+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/workingset.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/workingset.c b/mm/workingset.c
index ac839fca0e76ae..eda05c71fa49e6 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -532,7 +532,7 @@ static int __init workingset_init(void)
 	pr_info("workingset: timestamp_bits=%d max_order=%d bucket_order=%u\n",
 	       timestamp_bits, max_order, bucket_order);
 
-	ret = list_lru_init_key(&shadow_nodes, &shadow_nodes_key);
+	ret = __list_lru_init(&shadow_nodes, true, &shadow_nodes_key);
 	if (ret)
 		goto err;
 	ret = register_shrinker(&workingset_shadow_shrinker);

From c9d398fa237882ea07167e23bcfc5e6847066518 Mon Sep 17 00:00:00 2001
From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Date: Fri, 31 Mar 2017 15:11:55 -0700
Subject: [PATCH 1865/1911] mm, hugetlb: use pte_present() instead of
 pmd_present() in follow_huge_pmd()

I found the race condition which triggers the following bug when
move_pages() and soft offline are called on a single hugetlb page
concurrently.

    Soft offlining page 0x119400 at 0x700000000000
    BUG: unable to handle kernel paging request at ffffea0011943820
    IP: follow_huge_pmd+0x143/0x190
    PGD 7ffd2067
    PUD 7ffd1067
    PMD 0
        [61163.582052] Oops: 0000 [#1] SMP
    Modules linked in: binfmt_misc ppdev virtio_balloon parport_pc pcspkr i2c_piix4 parport i2c_core acpi_cpufreq ip_tables xfs libcrc32c ata_generic pata_acpi virtio_blk 8139too crc32c_intel ata_piix serio_raw libata virtio_pci 8139cp virtio_ring virtio mii floppy dm_mirror dm_region_hash dm_log dm_mod [last unloaded: cap_check]
    CPU: 0 PID: 22573 Comm: iterate_numa_mo Tainted: P           OE   4.11.0-rc2-mm1+ #2
    Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
    RIP: 0010:follow_huge_pmd+0x143/0x190
    RSP: 0018:ffffc90004bdbcd0 EFLAGS: 00010202
    RAX: 0000000465003e80 RBX: ffffea0004e34d30 RCX: 00003ffffffff000
    RDX: 0000000011943800 RSI: 0000000000080001 RDI: 0000000465003e80
    RBP: ffffc90004bdbd18 R08: 0000000000000000 R09: ffff880138d34000
    R10: ffffea0004650000 R11: 0000000000c363b0 R12: ffffea0011943800
    R13: ffff8801b8d34000 R14: ffffea0000000000 R15: 000077ff80000000
    FS:  00007fc977710740(0000) GS:ffff88007dc00000(0000) knlGS:0000000000000000
    CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
    CR2: ffffea0011943820 CR3: 000000007a746000 CR4: 00000000001406f0
    Call Trace:
     follow_page_mask+0x270/0x550
     SYSC_move_pages+0x4ea/0x8f0
     SyS_move_pages+0xe/0x10
     do_syscall_64+0x67/0x180
     entry_SYSCALL64_slow_path+0x25/0x25
    RIP: 0033:0x7fc976e03949
    RSP: 002b:00007ffe72221d88 EFLAGS: 00000246 ORIG_RAX: 0000000000000117
    RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fc976e03949
    RDX: 0000000000c22390 RSI: 0000000000001400 RDI: 0000000000005827
    RBP: 00007ffe72221e00 R08: 0000000000c2c3a0 R09: 0000000000000004
    R10: 0000000000c363b0 R11: 0000000000000246 R12: 0000000000400650
    R13: 00007ffe72221ee0 R14: 0000000000000000 R15: 0000000000000000
    Code: 81 e4 ff ff 1f 00 48 21 c2 49 c1 ec 0c 48 c1 ea 0c 4c 01 e2 49 bc 00 00 00 00 00 ea ff ff 48 c1 e2 06 49 01 d4 f6 45 bc 04 74 90 <49> 8b 7c 24 20 40 f6 c7 01 75 2b 4c 89 e7 8b 47 1c 85 c0 7e 2a
    RIP: follow_huge_pmd+0x143/0x190 RSP: ffffc90004bdbcd0
    CR2: ffffea0011943820
    ---[ end trace e4f81353a2d23232 ]---
    Kernel panic - not syncing: Fatal exception
    Kernel Offset: disabled

This bug is triggered when pmd_present() returns true for non-present
hugetlb, so fixing the present check in follow_huge_pmd() prevents it.
Using pmd_present() to determine present/non-present for hugetlb is not
correct, because pmd_present() checks multiple bits (not only
_PAGE_PRESENT) for historical reason and it can misjudge hugetlb state.

Fixes: e66f17ff7177 ("mm/hugetlb: take page table lock in follow_huge_pmd()")
Link: http://lkml.kernel.org/r/1490149898-20231-1-git-send-email-n-horiguchi@ah.jp.nec.com
Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Acked-by: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Cc: <stable@vger.kernel.org>        [4.0+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/hugetlb.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 3d0aab9ee80d1f..f501f14f14ce10 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4651,6 +4651,7 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address,
 {
 	struct page *page = NULL;
 	spinlock_t *ptl;
+	pte_t pte;
 retry:
 	ptl = pmd_lockptr(mm, pmd);
 	spin_lock(ptl);
@@ -4660,12 +4661,13 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address,
 	 */
 	if (!pmd_huge(*pmd))
 		goto out;
-	if (pmd_present(*pmd)) {
+	pte = huge_ptep_get((pte_t *)pmd);
+	if (pte_present(pte)) {
 		page = pmd_page(*pmd) + ((address & ~PMD_MASK) >> PAGE_SHIFT);
 		if (flags & FOLL_GET)
 			get_page(page);
 	} else {
-		if (is_hugetlb_entry_migration(huge_ptep_get((pte_t *)pmd))) {
+		if (is_hugetlb_entry_migration(pte)) {
 			spin_unlock(ptl);
 			__migration_entry_wait(mm, (pte_t *)pmd, ptl);
 			goto retry;

From 906f2a51c941e251ca196d5128953d9899a608ef Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 31 Mar 2017 15:11:58 -0700
Subject: [PATCH 1866/1911] mm: fix section name for .data..ro_after_init

A section name for .data..ro_after_init was added by both:

    commit d07a980c1b8d ("s390: add proper __ro_after_init support")

and

    commit d7c19b066dcf ("mm: kmemleak: scan .data.ro_after_init")

The latter adds incorrect wrapping around the existing s390 section, and
came later.  I'd prefer the s390 naming, so this moves the s390-specific
name up to the asm-generic/sections.h and renames the section as used by
kmemleak (and in the future, kernel/extable.c).

Link: http://lkml.kernel.org/r/20170327192213.GA129375@beast
Signed-off-by: Kees Cook <keescook@chromium.org>
Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com>	[s390 parts]
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Cc: Eddie Kovsky <ewk@edkovsky.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/s390/include/asm/sections.h  | 1 -
 arch/s390/kernel/vmlinux.lds.S    | 2 --
 include/asm-generic/sections.h    | 6 +++---
 include/asm-generic/vmlinux.lds.h | 4 ++--
 mm/kmemleak.c                     | 2 +-
 5 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/arch/s390/include/asm/sections.h b/arch/s390/include/asm/sections.h
index 5ce29fe100baaa..fbd9116eb17bf2 100644
--- a/arch/s390/include/asm/sections.h
+++ b/arch/s390/include/asm/sections.h
@@ -4,6 +4,5 @@
 #include <asm-generic/sections.h>
 
 extern char _eshared[], _ehead[];
-extern char __start_ro_after_init[], __end_ro_after_init[];
 
 #endif
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index 5ccf9539625182..72307f108c4038 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -63,11 +63,9 @@ SECTIONS
 
 	. = ALIGN(PAGE_SIZE);
 	__start_ro_after_init = .;
-	__start_data_ro_after_init = .;
 	.data..ro_after_init : {
 		 *(.data..ro_after_init)
 	}
-	__end_data_ro_after_init = .;
 	EXCEPTION_TABLE(16)
 	. = ALIGN(PAGE_SIZE);
 	__end_ro_after_init = .;
diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h
index 4df64a1fc09e7a..532372c6cf15c8 100644
--- a/include/asm-generic/sections.h
+++ b/include/asm-generic/sections.h
@@ -14,8 +14,8 @@
  * [_sdata, _edata]: contains .data.* sections, may also contain .rodata.*
  *                   and/or .init.* sections.
  * [__start_rodata, __end_rodata]: contains .rodata.* sections
- * [__start_data_ro_after_init, __end_data_ro_after_init]:
- *		     contains data.ro_after_init section
+ * [__start_ro_after_init, __end_ro_after_init]:
+ *		     contains .data..ro_after_init section
  * [__init_begin, __init_end]: contains .init.* sections, but .init.text.*
  *                   may be out of this range on some architectures.
  * [_sinittext, _einittext]: contains .init.text.* sections
@@ -33,7 +33,7 @@ extern char _data[], _sdata[], _edata[];
 extern char __bss_start[], __bss_stop[];
 extern char __init_begin[], __init_end[];
 extern char _sinittext[], _einittext[];
-extern char __start_data_ro_after_init[], __end_data_ro_after_init[];
+extern char __start_ro_after_init[], __end_ro_after_init[];
 extern char _end[];
 extern char __per_cpu_load[], __per_cpu_start[], __per_cpu_end[];
 extern char __kprobes_text_start[], __kprobes_text_end[];
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 0968d13b388591..f9f21e2c59f311 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -260,9 +260,9 @@
  */
 #ifndef RO_AFTER_INIT_DATA
 #define RO_AFTER_INIT_DATA						\
-	__start_data_ro_after_init = .;					\
+	__start_ro_after_init = .;					\
 	*(.data..ro_after_init)						\
-	__end_data_ro_after_init = .;
+	__end_ro_after_init = .;
 #endif
 
 /*
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 26c874e90b12ef..20036d4f9f13d4 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -1416,7 +1416,7 @@ static void kmemleak_scan(void)
 	/* data/bss scanning */
 	scan_large_block(_sdata, _edata);
 	scan_large_block(__bss_start, __bss_stop);
-	scan_large_block(__start_data_ro_after_init, __end_data_ro_after_init);
+	scan_large_block(__start_ro_after_init, __end_ro_after_init);
 
 #ifdef CONFIG_SMP
 	/* per-cpu sections scanning */

From 4742a35d9de745e867405b4311e1aac412f0ace1 Mon Sep 17 00:00:00 2001
From: Mike Kravetz <mike.kravetz@oracle.com>
Date: Fri, 31 Mar 2017 15:12:01 -0700
Subject: [PATCH 1867/1911] hugetlbfs: initialize shared policy as part of
 inode allocation

Any time after inode allocation, destroy_inode can be called.  The
hugetlbfs inode contains a shared_policy structure, and
mpol_free_shared_policy is unconditionally called as part of
hugetlbfs_destroy_inode.  Initialize the policy as part of inode
allocation so that any quick (error path) calls to destroy_inode will be
handed an initialized policy.

syzkaller fuzzer found this bug, that resulted in the following:

    BUG: KASAN: user-memory-access in atomic_inc
    include/asm-generic/atomic-instrumented.h:87 [inline] at addr
    000000131730bd7a
    BUG: KASAN: user-memory-access in __lock_acquire+0x21a/0x3a80
    kernel/locking/lockdep.c:3239 at addr 000000131730bd7a
    Write of size 4 by task syz-executor6/14086
    CPU: 3 PID: 14086 Comm: syz-executor6 Not tainted 4.11.0-rc3+ #364
    Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
    Call Trace:
     atomic_inc include/asm-generic/atomic-instrumented.h:87 [inline]
     __lock_acquire+0x21a/0x3a80 kernel/locking/lockdep.c:3239
     lock_acquire+0x1ee/0x590 kernel/locking/lockdep.c:3762
     __raw_write_lock include/linux/rwlock_api_smp.h:210 [inline]
     _raw_write_lock+0x33/0x50 kernel/locking/spinlock.c:295
     mpol_free_shared_policy+0x43/0xb0 mm/mempolicy.c:2536
     hugetlbfs_destroy_inode+0xca/0x120 fs/hugetlbfs/inode.c:952
     alloc_inode+0x10d/0x180 fs/inode.c:216
     new_inode_pseudo+0x69/0x190 fs/inode.c:889
     new_inode+0x1c/0x40 fs/inode.c:918
     hugetlbfs_get_inode+0x40/0x420 fs/hugetlbfs/inode.c:734
     hugetlb_file_setup+0x329/0x9f0 fs/hugetlbfs/inode.c:1282
     newseg+0x422/0xd30 ipc/shm.c:575
     ipcget_new ipc/util.c:285 [inline]
     ipcget+0x21e/0x580 ipc/util.c:639
     SYSC_shmget ipc/shm.c:673 [inline]
     SyS_shmget+0x158/0x230 ipc/shm.c:657
     entry_SYSCALL_64_fastpath+0x1f/0xc2

Analysis provided by Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>

Link: http://lkml.kernel.org/r/1490477850-7944-1-git-send-email-mike.kravetz@oracle.com
Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Acked-by: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/hugetlbfs/inode.c | 25 ++++++++++++-------------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 8f96461236f655..7163fe014b57f4 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -695,14 +695,11 @@ static struct inode *hugetlbfs_get_root(struct super_block *sb,
 
 	inode = new_inode(sb);
 	if (inode) {
-		struct hugetlbfs_inode_info *info;
 		inode->i_ino = get_next_ino();
 		inode->i_mode = S_IFDIR | config->mode;
 		inode->i_uid = config->uid;
 		inode->i_gid = config->gid;
 		inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
-		info = HUGETLBFS_I(inode);
-		mpol_shared_policy_init(&info->policy, NULL);
 		inode->i_op = &hugetlbfs_dir_inode_operations;
 		inode->i_fop = &simple_dir_operations;
 		/* directory inodes start off with i_nlink == 2 (for "." entry) */
@@ -733,7 +730,6 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb,
 
 	inode = new_inode(sb);
 	if (inode) {
-		struct hugetlbfs_inode_info *info;
 		inode->i_ino = get_next_ino();
 		inode_init_owner(inode, dir, mode);
 		lockdep_set_class(&inode->i_mapping->i_mmap_rwsem,
@@ -741,15 +737,6 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb,
 		inode->i_mapping->a_ops = &hugetlbfs_aops;
 		inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
 		inode->i_mapping->private_data = resv_map;
-		info = HUGETLBFS_I(inode);
-		/*
-		 * The policy is initialized here even if we are creating a
-		 * private inode because initialization simply creates an
-		 * an empty rb tree and calls rwlock_init(), later when we
-		 * call mpol_free_shared_policy() it will just return because
-		 * the rb tree will still be empty.
-		 */
-		mpol_shared_policy_init(&info->policy, NULL);
 		switch (mode & S_IFMT) {
 		default:
 			init_special_inode(inode, mode, dev);
@@ -937,6 +924,18 @@ static struct inode *hugetlbfs_alloc_inode(struct super_block *sb)
 		hugetlbfs_inc_free_inodes(sbinfo);
 		return NULL;
 	}
+
+	/*
+	 * Any time after allocation, hugetlbfs_destroy_inode can be called
+	 * for the inode.  mpol_free_shared_policy is unconditionally called
+	 * as part of hugetlbfs_destroy_inode.  So, initialize policy here
+	 * in case of a quick call to destroy.
+	 *
+	 * Note that the policy is initialized even if we are creating a
+	 * private inode.  This simplifies hugetlbfs_destroy_inode.
+	 */
+	mpol_shared_policy_init(&p->policy, NULL);
+
 	return &p->vfs_inode;
 }
 

From b0845ce58379d11dcad4cdb6824a6410de260216 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Fri, 31 Mar 2017 15:12:04 -0700
Subject: [PATCH 1868/1911] kasan: report only the first error by default

Disable kasan after the first report.  There are several reasons for
this:

 - Single bug quite often has multiple invalid memory accesses causing
   storm in the dmesg.

 - Write OOB access might corrupt metadata so the next report will print
   bogus alloc/free stacktraces.

 - Reports after the first easily could be not bugs by itself but just
   side effects of the first one.

Given that multiple reports usually only do harm, it makes sense to
disable kasan after the first one.  If user wants to see all the
reports, the boot-time parameter kasan_multi_shot must be used.

[aryabinin@virtuozzo.com: wrote changelog and doc, added missing include]
Link: http://lkml.kernel.org/r/20170323154416.30257-1-aryabinin@virtuozzo.com
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Andrey Konovalov <andreyknvl@google.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 .../admin-guide/kernel-parameters.txt         |  6 ++++
 include/linux/kasan.h                         |  3 ++
 lib/test_kasan.c                              | 10 ++++++
 mm/kasan/kasan.h                              |  5 ---
 mm/kasan/report.c                             | 36 +++++++++++++++++++
 5 files changed, 55 insertions(+), 5 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 2ba45caabada3e..facc20a3f96280 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1725,6 +1725,12 @@
 			kernel and module base offset ASLR (Address Space
 			Layout Randomization).
 
+	kasan_multi_shot
+			[KNL] Enforce KASAN (Kernel Address Sanitizer) to print
+			report on every invalid memory access. Without this
+			parameter KASAN will print report only for the first
+			invalid access.
+
 	keepinitrd	[HW,ARM]
 
 	kernelcore=	[KNL,X86,IA-64,PPC]
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 5734480c959094..a5c7046f26b4b9 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -76,6 +76,9 @@ size_t ksize(const void *);
 static inline void kasan_unpoison_slab(const void *ptr) { ksize(ptr); }
 size_t kasan_metadata_size(struct kmem_cache *cache);
 
+bool kasan_save_enable_multi_shot(void);
+void kasan_restore_multi_shot(bool enabled);
+
 #else /* CONFIG_KASAN */
 
 static inline void kasan_unpoison_shadow(const void *address, size_t size) {}
diff --git a/lib/test_kasan.c b/lib/test_kasan.c
index 0b1d3140fbb877..a25c9763fce19f 100644
--- a/lib/test_kasan.c
+++ b/lib/test_kasan.c
@@ -20,6 +20,7 @@
 #include <linux/string.h>
 #include <linux/uaccess.h>
 #include <linux/module.h>
+#include <linux/kasan.h>
 
 /*
  * Note: test functions are marked noinline so that their names appear in
@@ -474,6 +475,12 @@ static noinline void __init use_after_scope_test(void)
 
 static int __init kmalloc_tests_init(void)
 {
+	/*
+	 * Temporarily enable multi-shot mode. Otherwise, we'd only get a
+	 * report for the first case.
+	 */
+	bool multishot = kasan_save_enable_multi_shot();
+
 	kmalloc_oob_right();
 	kmalloc_oob_left();
 	kmalloc_node_oob_right();
@@ -499,6 +506,9 @@ static int __init kmalloc_tests_init(void)
 	ksize_unpoisons_memory();
 	copy_user_test();
 	use_after_scope_test();
+
+	kasan_restore_multi_shot(multishot);
+
 	return -EAGAIN;
 }
 
diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h
index 1c260e6b3b3c6a..dd2dea8eb0771a 100644
--- a/mm/kasan/kasan.h
+++ b/mm/kasan/kasan.h
@@ -96,11 +96,6 @@ static inline const void *kasan_shadow_to_mem(const void *shadow_addr)
 		<< KASAN_SHADOW_SCALE_SHIFT);
 }
 
-static inline bool kasan_report_enabled(void)
-{
-	return !current->kasan_depth;
-}
-
 void kasan_report(unsigned long addr, size_t size,
 		bool is_write, unsigned long ip);
 void kasan_report_double_free(struct kmem_cache *cache, void *object,
diff --git a/mm/kasan/report.c b/mm/kasan/report.c
index f479365530b648..ab42a0803f161c 100644
--- a/mm/kasan/report.c
+++ b/mm/kasan/report.c
@@ -13,7 +13,9 @@
  *
  */
 
+#include <linux/bitops.h>
 #include <linux/ftrace.h>
+#include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/printk.h>
@@ -293,6 +295,40 @@ static void kasan_report_error(struct kasan_access_info *info)
 	kasan_end_report(&flags);
 }
 
+static unsigned long kasan_flags;
+
+#define KASAN_BIT_REPORTED	0
+#define KASAN_BIT_MULTI_SHOT	1
+
+bool kasan_save_enable_multi_shot(void)
+{
+	return test_and_set_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags);
+}
+EXPORT_SYMBOL_GPL(kasan_save_enable_multi_shot);
+
+void kasan_restore_multi_shot(bool enabled)
+{
+	if (!enabled)
+		clear_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags);
+}
+EXPORT_SYMBOL_GPL(kasan_restore_multi_shot);
+
+static int __init kasan_set_multi_shot(char *str)
+{
+	set_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags);
+	return 1;
+}
+__setup("kasan_multi_shot", kasan_set_multi_shot);
+
+static inline bool kasan_report_enabled(void)
+{
+	if (current->kasan_depth)
+		return false;
+	if (test_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags))
+		return true;
+	return !test_and_set_bit(KASAN_BIT_REPORTED, &kasan_flags);
+}
+
 void kasan_report(unsigned long addr, size_t size,
 		bool is_write, unsigned long ip)
 {

From ff8c0c53c47530ffea82c22a0a6df6332b56c957 Mon Sep 17 00:00:00 2001
From: Mike Kravetz <mike.kravetz@oracle.com>
Date: Fri, 31 Mar 2017 15:12:07 -0700
Subject: [PATCH 1869/1911] mm/hugetlb.c: don't call region_abort if region_chg
 fails

Changes to hugetlbfs reservation maps is a two step process.  The first
step is a call to region_chg to determine what needs to be changed, and
prepare that change.  This should be followed by a call to call to
region_add to commit the change, or region_abort to abort the change.

The error path in hugetlb_reserve_pages called region_abort after a
failed call to region_chg.  As a result, the adds_in_progress counter in
the reservation map is off by 1.  This is caught by a VM_BUG_ON in
resv_map_release when the reservation map is freed.

syzkaller fuzzer (when using an injected kmalloc failure) found this
bug, that resulted in the following:

 kernel BUG at mm/hugetlb.c:742!
 Call Trace:
  hugetlbfs_evict_inode+0x7b/0xa0 fs/hugetlbfs/inode.c:493
  evict+0x481/0x920 fs/inode.c:553
  iput_final fs/inode.c:1515 [inline]
  iput+0x62b/0xa20 fs/inode.c:1542
  hugetlb_file_setup+0x593/0x9f0 fs/hugetlbfs/inode.c:1306
  newseg+0x422/0xd30 ipc/shm.c:575
  ipcget_new ipc/util.c:285 [inline]
  ipcget+0x21e/0x580 ipc/util.c:639
  SYSC_shmget ipc/shm.c:673 [inline]
  SyS_shmget+0x158/0x230 ipc/shm.c:657
  entry_SYSCALL_64_fastpath+0x1f/0xc2
 RIP: resv_map_release+0x265/0x330 mm/hugetlb.c:742

Link: http://lkml.kernel.org/r/1490821682-23228-1-git-send-email-mike.kravetz@oracle.com
Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Acked-by: Hillf Danton <hillf.zj@alibaba-inc.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/hugetlb.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index f501f14f14ce10..e5828875f7bbd7 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4403,7 +4403,9 @@ int hugetlb_reserve_pages(struct inode *inode,
 	return 0;
 out_err:
 	if (!vma || vma->vm_flags & VM_MAYSHARE)
-		region_abort(resv_map, from, to);
+		/* Don't call region_abort if region_chg failed */
+		if (chg >= 0)
+			region_abort(resv_map, from, to);
 	if (vma && is_vma_resv_set(vma, HPAGE_RESV_OWNER))
 		kref_put(&resv_map->refs, resv_map_release);
 	return ret;

From 4785603bd05b0b029c647080937674d9991600f9 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Fri, 31 Mar 2017 15:12:10 -0700
Subject: [PATCH 1870/1911] drivers/rapidio/devices/tsi721.c: make module
 parameter variable name unique
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

kbuild test robot reported a non-static variable name collision between
a staging driver and a RapidIO driver, with a generic variable name of
'dbg_level'.

Both drivers should be changed so that they don't use this generic
public variable name.  This patch fixes the RapidIO driver but does not
change the user interface (name) for the module parameter.

  drivers/staging/built-in.o:(.bss+0x109d0): multiple definition of `dbg_level'
  drivers/rapidio/built-in.o:(.bss+0x16c): first defined here

Link: http://lkml.kernel.org/r/ab527fc5-aa3c-4b07-5d48-eef5de703192@infradead.org
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reported-by: kbuild test robot <fengguang.wu@intel.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Matt Porter <mporter@kernel.crashing.org>
Cc: Alexandre Bounine <alexandre.bounine@idt.com>
Cc: Jérémy Lefaure <jeremy.lefaure@lse.epita.fr>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/rapidio/devices/tsi721.c | 4 ++--
 drivers/rapidio/devices/tsi721.h | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/rapidio/devices/tsi721.c b/drivers/rapidio/devices/tsi721.c
index 9d19b9a62011b3..315a4be8dc1e64 100644
--- a/drivers/rapidio/devices/tsi721.c
+++ b/drivers/rapidio/devices/tsi721.c
@@ -37,8 +37,8 @@
 #include "tsi721.h"
 
 #ifdef DEBUG
-u32 dbg_level;
-module_param(dbg_level, uint, S_IWUSR | S_IRUGO);
+u32 tsi_dbg_level;
+module_param_named(dbg_level, tsi_dbg_level, uint, S_IWUSR | S_IRUGO);
 MODULE_PARM_DESC(dbg_level, "Debugging output level (default 0 = none)");
 #endif
 
diff --git a/drivers/rapidio/devices/tsi721.h b/drivers/rapidio/devices/tsi721.h
index 5941437cbdd164..957eadc5815095 100644
--- a/drivers/rapidio/devices/tsi721.h
+++ b/drivers/rapidio/devices/tsi721.h
@@ -40,11 +40,11 @@ enum {
 };
 
 #ifdef DEBUG
-extern u32 dbg_level;
+extern u32 tsi_dbg_level;
 
 #define tsi_debug(level, dev, fmt, arg...)				\
 	do {								\
-		if (DBG_##level & dbg_level)				\
+		if (DBG_##level & tsi_dbg_level)				\
 			dev_dbg(dev, "%s: " fmt "\n", __func__, ##arg);	\
 	} while (0)
 #else

From 13a6798e4a03096b11bf402a063786a7be55d426 Mon Sep 17 00:00:00 2001
From: Mike Galbraith <efault@gmx.de>
Date: Fri, 31 Mar 2017 15:12:12 -0700
Subject: [PATCH 1871/1911] kasan: do not sanitize kexec purgatory

Fixes this:

  kexec: Undefined symbol: __asan_load8_noabort
  kexec-bzImage64: Loading purgatory failed

Link: http://lkml.kernel.org/r/1489672155.4458.7.camel@gmx.de
Signed-off-by: Mike Galbraith <efault@gmx.de>
Cc: Alexander Potapenko <glider@google.com>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/purgatory/Makefile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile
index 555b9fa0ad43cb..7dbdb780264df9 100644
--- a/arch/x86/purgatory/Makefile
+++ b/arch/x86/purgatory/Makefile
@@ -8,6 +8,7 @@ PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y))
 LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined -nostdlib -z nodefaultlib
 targets += purgatory.ro
 
+KASAN_SANITIZE	:= n
 KCOV_INSTRUMENT := n
 
 # Default KBUILD_CFLAGS can have -pg option set when FTRACE is enabled. That

From e53e597fd4c4a0b6ae58e57d76a240927fd17eaa Mon Sep 17 00:00:00 2001
From: Timur Tabi <timur@codeaurora.org>
Date: Fri, 31 Mar 2017 17:05:02 -0500
Subject: [PATCH 1872/1911] tty: pl011: fix earlycon work-around for QDF2400
 erratum 44

The work-around for the Qualcomm Datacenter Technologies QDF2400
erratum 44 sets the "qdf2400_e44_present" global variable if the
work-around is needed.  However, this check does not happen until after
earlycon is initialized, which means the work-around is not
used, and the console hangs as soon as it displays one character.

Fixes: d8a4995bcea1 ("tty: pl011: Work around QDF2400 E44 stuck BUSY bit")
Signed-off-by: Timur Tabi <timur@codeaurora.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/amba-pl011.c | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c
index 56f92d7348bf45..b0a377725d636c 100644
--- a/drivers/tty/serial/amba-pl011.c
+++ b/drivers/tty/serial/amba-pl011.c
@@ -2452,18 +2452,37 @@ static void pl011_early_write(struct console *con, const char *s, unsigned n)
 	uart_console_write(&dev->port, s, n, pl011_putc);
 }
 
+/*
+ * On non-ACPI systems, earlycon is enabled by specifying
+ * "earlycon=pl011,<address>" on the kernel command line.
+ *
+ * On ACPI ARM64 systems, an "early" console is enabled via the SPCR table,
+ * by specifying only "earlycon" on the command line.  Because it requires
+ * SPCR, the console starts after ACPI is parsed, which is later than a
+ * traditional early console.
+ *
+ * To get the traditional early console that starts before ACPI is parsed,
+ * specify the full "earlycon=pl011,<address>" option.
+ */
 static int __init pl011_early_console_setup(struct earlycon_device *device,
 					    const char *opt)
 {
 	if (!device->port.membase)
 		return -ENODEV;
 
-	device->con->write = qdf2400_e44_present ?
-				qdf2400_e44_early_write : pl011_early_write;
+	/* On QDF2400 SOCs affected by Erratum 44, the "qdf2400_e44" must
+	 * also be specified, e.g. "earlycon=pl011,<address>,qdf2400_e44".
+	 */
+	if (!strcmp(device->options, "qdf2400_e44"))
+		device->con->write = qdf2400_e44_early_write;
+	else
+		device->con->write = pl011_early_write;
+
 	return 0;
 }
 OF_EARLYCON_DECLARE(pl011, "arm,pl011", pl011_early_console_setup);
 OF_EARLYCON_DECLARE(pl011, "arm,sbsa-uart", pl011_early_console_setup);
+EARLYCON_DECLARE(qdf2400_e44, pl011_early_console_setup);
 
 #else
 #define AMBA_CONSOLE	NULL

From b917078c1c107ce34264af893e436e6115eeb9f6 Mon Sep 17 00:00:00 2001
From: Daode Huang <huangdaode@hisilicon.com>
Date: Thu, 30 Mar 2017 16:37:41 +0100
Subject: [PATCH 1873/1911] net: hns: Add ACPI support to check SFP present

The current code only supports DT to check SFP present.
This patch adds ACPI support as well.

Signed-off-by: Daode Huang <huangdaode@hisilicon.com>
Reviewed-by: Yisen Zhuang <yisen.zhuang@huawei.com>
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/hisilicon/hns/hns_dsaf_mac.c | 11 +++++---
 .../ethernet/hisilicon/hns/hns_dsaf_misc.c    | 28 ++++++++++++++++++-
 2 files changed, 34 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
index 3239d27143b935..bdd8cdd732fb58 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
@@ -82,9 +82,12 @@ void hns_mac_get_link_status(struct hns_mac_cb *mac_cb, u32 *link_status)
 	else
 		*link_status = 0;
 
-	ret = mac_cb->dsaf_dev->misc_op->get_sfp_prsnt(mac_cb, &sfp_prsnt);
-	if (!ret)
-		*link_status = *link_status && sfp_prsnt;
+	if (mac_cb->media_type == HNAE_MEDIA_TYPE_FIBER) {
+		ret = mac_cb->dsaf_dev->misc_op->get_sfp_prsnt(mac_cb,
+							       &sfp_prsnt);
+		if (!ret)
+			*link_status = *link_status && sfp_prsnt;
+	}
 
 	mac_cb->link = *link_status;
 }
@@ -855,7 +858,7 @@ static int  hns_mac_get_info(struct hns_mac_cb *mac_cb)
 		of_node_put(np);
 
 		np = of_parse_phandle(to_of_node(mac_cb->fw_port),
-					"serdes-syscon", 0);
+				      "serdes-syscon", 0);
 		syscon = syscon_node_to_regmap(np);
 		of_node_put(np);
 		if (IS_ERR_OR_NULL(syscon)) {
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
index a2c22d084ce90c..e13aa064a8e943 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
@@ -461,6 +461,32 @@ int hns_mac_get_sfp_prsnt(struct hns_mac_cb *mac_cb, int *sfp_prsnt)
 	return 0;
 }
 
+int hns_mac_get_sfp_prsnt_acpi(struct hns_mac_cb *mac_cb, int *sfp_prsnt)
+{
+	union acpi_object *obj;
+	union acpi_object obj_args, argv4;
+
+	obj_args.integer.type = ACPI_TYPE_INTEGER;
+	obj_args.integer.value = mac_cb->mac_id;
+
+	argv4.type = ACPI_TYPE_PACKAGE,
+	argv4.package.count = 1,
+	argv4.package.elements = &obj_args,
+
+	obj = acpi_evaluate_dsm(ACPI_HANDLE(mac_cb->dev),
+				hns_dsaf_acpi_dsm_uuid, 0,
+				HNS_OP_GET_SFP_STAT_FUNC, &argv4);
+
+	if (!obj || obj->type != ACPI_TYPE_INTEGER)
+		return -ENODEV;
+
+	*sfp_prsnt = obj->integer.value;
+
+	ACPI_FREE(obj);
+
+	return 0;
+}
+
 /**
  * hns_mac_config_sds_loopback - set loop back for serdes
  * @mac_cb: mac control block
@@ -592,7 +618,7 @@ struct dsaf_misc_op *hns_misc_op_get(struct dsaf_device *dsaf_dev)
 		misc_op->hns_dsaf_roce_srst = hns_dsaf_roce_srst_acpi;
 
 		misc_op->get_phy_if = hns_mac_get_phy_if_acpi;
-		misc_op->get_sfp_prsnt = hns_mac_get_sfp_prsnt;
+		misc_op->get_sfp_prsnt = hns_mac_get_sfp_prsnt_acpi;
 
 		misc_op->cfg_serdes_loopback = hns_mac_config_sds_loopback_acpi;
 	} else {

From 3af887c38f1ae0db66c00c4ee2e8a0b1e99ffc29 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Thu, 30 Mar 2017 17:00:12 +0100
Subject: [PATCH 1874/1911] net/faraday: Explicitly include linux/of.h and
 linux/property.h

This driver uses interfaces from linux/of.h and linux/property.h but
relies on implict inclusion of those headers which means that changes in
other headers could break the build, as happened in -next for arm today.
Add a explicit includes.

Signed-off-by: Mark Brown <broonie@kernel.org>
Acked-by: Joel Stanley <joel@jms.id.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/faraday/ftgmac100.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c
index 928b0df2b8e033..ade6b3e4ed1326 100644
--- a/drivers/net/ethernet/faraday/ftgmac100.c
+++ b/drivers/net/ethernet/faraday/ftgmac100.c
@@ -28,8 +28,10 @@
 #include <linux/io.h>
 #include <linux/module.h>
 #include <linux/netdevice.h>
+#include <linux/of.h>
 #include <linux/phy.h>
 #include <linux/platform_device.h>
+#include <linux/property.h>
 #include <net/ip.h>
 #include <net/ncsi.h>
 

From 6f56f6186c18e3fd54122b73da68e870687b8c59 Mon Sep 17 00:00:00 2001
From: Yi-Hung Wei <yihung.wei@gmail.com>
Date: Thu, 30 Mar 2017 12:36:03 -0700
Subject: [PATCH 1875/1911] openvswitch: Fix ovs_flow_key_update()

ovs_flow_key_update() is called when the flow key is invalid, and it is
used to update and revalidate the flow key. Commit 329f45bc4f19
("openvswitch: add mac_proto field to the flow key") introduces mac_proto
field to flow key and use it to determine whether the flow key is valid.
However, the commit does not update the code path in ovs_flow_key_update()
to revalidate the flow key which may cause BUG_ON() on execute_recirc().
This patch addresses the aforementioned issue.

Fixes: 329f45bc4f19 ("openvswitch: add mac_proto field to the flow key")
Signed-off-by: Yi-Hung Wei <yihung.wei@gmail.com>
Acked-by: Jiri Benc <jbenc@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/flow.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 9d4bb8eb63f25c..3f76cb765e5bb7 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -527,7 +527,7 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
 
 	/* Link layer. */
 	clear_vlan(key);
-	if (key->mac_proto == MAC_PROTO_NONE) {
+	if (ovs_key_mac_proto(key) == MAC_PROTO_NONE) {
 		if (unlikely(eth_type_vlan(skb->protocol)))
 			return -EINVAL;
 
@@ -745,7 +745,13 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
 
 int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key)
 {
-	return key_extract(skb, key);
+	int res;
+
+	res = key_extract(skb, key);
+	if (!res)
+		key->mac_proto &= ~SW_FLOW_KEY_INVALID;
+
+	return res;
 }
 
 static int key_extract_mac_proto(struct sk_buff *skb)

From d5b07ccc1bf5fd2ccc6bf9da5677fc448a972e32 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ren=C3=A9=20Rebe?= <rene@exactcode.com>
Date: Tue, 28 Mar 2017 07:56:51 +0200
Subject: [PATCH 1876/1911] r8152: The Microsoft Surface docks also use R8152
 v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Without this the generic cdc_ether grabs the device,
and does not really work.

Signed-off-by: René Rebe <rene@exactcode.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc_ether.c | 15 +++++++++++++++
 drivers/net/usb/r8152.c     |  3 +++
 2 files changed, 18 insertions(+)

diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c
index f5552aaaa77a59..f3ae88fdf332e8 100644
--- a/drivers/net/usb/cdc_ether.c
+++ b/drivers/net/usb/cdc_ether.c
@@ -532,6 +532,7 @@ static const struct driver_info wwan_info = {
 #define LENOVO_VENDOR_ID	0x17ef
 #define NVIDIA_VENDOR_ID	0x0955
 #define HP_VENDOR_ID		0x03f0
+#define MICROSOFT_VENDOR_ID	0x045e
 
 static const struct usb_device_id	products[] = {
 /* BLACKLIST !!
@@ -761,6 +762,20 @@ static const struct usb_device_id	products[] = {
 	.driver_info = 0,
 },
 
+/* Microsoft Surface 2 dock (based on Realtek RTL8152) */
+{
+	USB_DEVICE_AND_INTERFACE_INFO(MICROSOFT_VENDOR_ID, 0x07ab, USB_CLASS_COMM,
+			USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE),
+	.driver_info = 0,
+},
+
+/* Microsoft Surface 3 dock (based on Realtek RTL8153) */
+{
+	USB_DEVICE_AND_INTERFACE_INFO(MICROSOFT_VENDOR_ID, 0x07c6, USB_CLASS_COMM,
+			USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE),
+	.driver_info = 0,
+},
+
 /* WHITELIST!!!
  *
  * CDC Ether uses two interfaces, not necessarily consecutive.
diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index c34df33c6d723e..07f788c49d573f 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -517,6 +517,7 @@ enum rtl8152_flags {
 
 /* Define these values to match your device */
 #define VENDOR_ID_REALTEK		0x0bda
+#define VENDOR_ID_MICROSOFT		0x045e
 #define VENDOR_ID_SAMSUNG		0x04e8
 #define VENDOR_ID_LENOVO		0x17ef
 #define VENDOR_ID_NVIDIA		0x0955
@@ -4521,6 +4522,8 @@ static void rtl8152_disconnect(struct usb_interface *intf)
 static struct usb_device_id rtl8152_table[] = {
 	{REALTEK_USB_DEVICE(VENDOR_ID_REALTEK, 0x8152)},
 	{REALTEK_USB_DEVICE(VENDOR_ID_REALTEK, 0x8153)},
+	{REALTEK_USB_DEVICE(VENDOR_ID_MICROSOFT, 0x07ab)},
+	{REALTEK_USB_DEVICE(VENDOR_ID_MICROSOFT, 0x07c6)},
 	{REALTEK_USB_DEVICE(VENDOR_ID_SAMSUNG, 0xa101)},
 	{REALTEK_USB_DEVICE(VENDOR_ID_LENOVO,  0x304f)},
 	{REALTEK_USB_DEVICE(VENDOR_ID_LENOVO,  0x3062)},

From fce366a9dd0ddc47e7ce05611c266e8574a45116 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 31 Mar 2017 02:24:02 +0200
Subject: [PATCH 1877/1911] bpf, verifier: fix alu ops against map_value{,
 _adj} register types

While looking into map_value_adj, I noticed that alu operations
directly on the map_value() resp. map_value_adj() register (any
alu operation on a map_value() register will turn it into a
map_value_adj() typed register) are not sufficiently protected
against some of the operations. Two non-exhaustive examples are
provided that the verifier needs to reject:

 i) BPF_AND on r0 (map_value_adj):

  0: (bf) r2 = r10
  1: (07) r2 += -8
  2: (7a) *(u64 *)(r2 +0) = 0
  3: (18) r1 = 0xbf842a00
  5: (85) call bpf_map_lookup_elem#1
  6: (15) if r0 == 0x0 goto pc+2
   R0=map_value(ks=8,vs=48,id=0),min_value=0,max_value=0 R10=fp
  7: (57) r0 &= 8
  8: (7a) *(u64 *)(r0 +0) = 22
   R0=map_value_adj(ks=8,vs=48,id=0),min_value=0,max_value=8 R10=fp
  9: (95) exit

  from 6 to 9: R0=inv,min_value=0,max_value=0 R10=fp
  9: (95) exit
  processed 10 insns

ii) BPF_ADD in 32 bit mode on r0 (map_value_adj):

  0: (bf) r2 = r10
  1: (07) r2 += -8
  2: (7a) *(u64 *)(r2 +0) = 0
  3: (18) r1 = 0xc24eee00
  5: (85) call bpf_map_lookup_elem#1
  6: (15) if r0 == 0x0 goto pc+2
   R0=map_value(ks=8,vs=48,id=0),min_value=0,max_value=0 R10=fp
  7: (04) (u32) r0 += (u32) 0
  8: (7a) *(u64 *)(r0 +0) = 22
   R0=map_value_adj(ks=8,vs=48,id=0),min_value=0,max_value=0 R10=fp
  9: (95) exit

  from 6 to 9: R0=inv,min_value=0,max_value=0 R10=fp
  9: (95) exit
  processed 10 insns

Issue is, while min_value / max_value boundaries for the access
are adjusted appropriately, we change the pointer value in a way
that cannot be sufficiently tracked anymore from its origin.
Operations like BPF_{AND,OR,DIV,MUL,etc} on a destination register
that is PTR_TO_MAP_VALUE{,_ADJ} was probably unintended, in fact,
all the test cases coming with 484611357c19 ("bpf: allow access
into map value arrays") perform BPF_ADD only on the destination
register that is PTR_TO_MAP_VALUE_ADJ.

Only for UNKNOWN_VALUE register types such operations make sense,
f.e. with unknown memory content fetched initially from a constant
offset from the map value memory into a register. That register is
then later tested against lower / upper bounds, so that the verifier
can then do the tracking of min_value / max_value, and properly
check once that UNKNOWN_VALUE register is added to the destination
register with type PTR_TO_MAP_VALUE{,_ADJ}. This is also what the
original use-case is solving. Note, tracking on what is being
added is done through adjust_reg_min_max_vals() and later access
to the map value enforced with these boundaries and the given offset
from the insn through check_map_access_adj().

Tests will fail for non-root environment due to prohibited pointer
arithmetic, in particular in check_alu_op(), we bail out on the
is_pointer_value() check on the dst_reg (which is false in root
case as we allow for pointer arithmetic via env->allow_ptr_leaks).

Similarly to PTR_TO_PACKET, one way to fix it is to restrict the
allowed operations on PTR_TO_MAP_VALUE{,_ADJ} registers to 64 bit
mode BPF_ADD. The test_verifier suite runs fine after the patch
and it also rejects mentioned test cases.

Fixes: 484611357c19 ("bpf: allow access into map value arrays")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Josef Bacik <jbacik@fb.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/bpf/verifier.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 5e6202e62265fd..86deddecff2545 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1925,6 +1925,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
 		 * register as unknown.
 		 */
 		if (env->allow_ptr_leaks &&
+		    BPF_CLASS(insn->code) == BPF_ALU64 && opcode == BPF_ADD &&
 		    (dst_reg->type == PTR_TO_MAP_VALUE ||
 		     dst_reg->type == PTR_TO_MAP_VALUE_ADJ))
 			dst_reg->type = PTR_TO_MAP_VALUE_ADJ;

From 79adffcd6489ef43bda2dfded3d637d7fb4fac80 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 31 Mar 2017 02:24:03 +0200
Subject: [PATCH 1878/1911] bpf, verifier: fix rejection of unaligned access
 checks for map_value_adj

Currently, the verifier doesn't reject unaligned access for map_value_adj
register types. Commit 484611357c19 ("bpf: allow access into map value
arrays") added logic to check_ptr_alignment() extending it from PTR_TO_PACKET
to also PTR_TO_MAP_VALUE_ADJ, but for PTR_TO_MAP_VALUE_ADJ no enforcement
is in place, because reg->id for PTR_TO_MAP_VALUE_ADJ reg types is never
non-zero, meaning, we can cause BPF_H/_W/_DW-based unaligned access for
architectures not supporting efficient unaligned access, and thus worst
case could raise exceptions on some archs that are unable to correct the
unaligned access or perform a different memory access to the actual
requested one and such.

i) Unaligned load with !CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
   on r0 (map_value_adj):

   0: (bf) r2 = r10
   1: (07) r2 += -8
   2: (7a) *(u64 *)(r2 +0) = 0
   3: (18) r1 = 0x42533a00
   5: (85) call bpf_map_lookup_elem#1
   6: (15) if r0 == 0x0 goto pc+11
    R0=map_value(ks=8,vs=48,id=0),min_value=0,max_value=0 R10=fp
   7: (61) r1 = *(u32 *)(r0 +0)
   8: (35) if r1 >= 0xb goto pc+9
    R0=map_value(ks=8,vs=48,id=0),min_value=0,max_value=0 R1=inv,min_value=0,max_value=10 R10=fp
   9: (07) r0 += 3
  10: (79) r7 = *(u64 *)(r0 +0)
    R0=map_value_adj(ks=8,vs=48,id=0),min_value=3,max_value=3 R1=inv,min_value=0,max_value=10 R10=fp
  11: (79) r7 = *(u64 *)(r0 +2)
    R0=map_value_adj(ks=8,vs=48,id=0),min_value=3,max_value=3 R1=inv,min_value=0,max_value=10 R7=inv R10=fp
  [...]

ii) Unaligned store with !CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
    on r0 (map_value_adj):

   0: (bf) r2 = r10
   1: (07) r2 += -8
   2: (7a) *(u64 *)(r2 +0) = 0
   3: (18) r1 = 0x4df16a00
   5: (85) call bpf_map_lookup_elem#1
   6: (15) if r0 == 0x0 goto pc+19
    R0=map_value(ks=8,vs=48,id=0),min_value=0,max_value=0 R10=fp
   7: (07) r0 += 3
   8: (7a) *(u64 *)(r0 +0) = 42
    R0=map_value_adj(ks=8,vs=48,id=0),min_value=3,max_value=3 R10=fp
   9: (7a) *(u64 *)(r0 +2) = 43
    R0=map_value_adj(ks=8,vs=48,id=0),min_value=3,max_value=3 R10=fp
  10: (7a) *(u64 *)(r0 -2) = 44
    R0=map_value_adj(ks=8,vs=48,id=0),min_value=3,max_value=3 R10=fp
  [...]

For the PTR_TO_PACKET type, reg->id is initially zero when skb->data
was fetched, it later receives a reg->id from env->id_gen generator
once another register with UNKNOWN_VALUE type was added to it via
check_packet_ptr_add(). The purpose of this reg->id is twofold: i) it
is used in find_good_pkt_pointers() for setting the allowed access
range for regs with PTR_TO_PACKET of same id once verifier matched
on data/data_end tests, and ii) for check_ptr_alignment() to determine
that when not having efficient unaligned access and register with
UNKNOWN_VALUE was added to PTR_TO_PACKET, that we're only allowed
to access the content bytewise due to unknown unalignment. reg->id
was never intended for PTR_TO_MAP_VALUE{,_ADJ} types and thus is
always zero, the only marking is in PTR_TO_MAP_VALUE_OR_NULL that
was added after 484611357c19 via 57a09bf0a416 ("bpf: Detect identical
PTR_TO_MAP_VALUE_OR_NULL registers"). Above tests will fail for
non-root environment due to prohibited pointer arithmetic.

The fix splits register-type specific checks into their own helper
instead of keeping them combined, so we don't run into a similar
issue in future once we extend check_ptr_alignment() further and
forget to add reg->type checks for some of the checks.

Fixes: 484611357c19 ("bpf: allow access into map value arrays")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Josef Bacik <jbacik@fb.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/bpf/verifier.c | 58 ++++++++++++++++++++++++++++---------------
 1 file changed, 38 insertions(+), 20 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 86deddecff2545..a834068a400e27 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -765,38 +765,56 @@ static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
 	}
 }
 
-static int check_ptr_alignment(struct bpf_verifier_env *env,
-			       struct bpf_reg_state *reg, int off, int size)
+static int check_pkt_ptr_alignment(const struct bpf_reg_state *reg,
+				   int off, int size)
 {
-	if (reg->type != PTR_TO_PACKET && reg->type != PTR_TO_MAP_VALUE_ADJ) {
-		if (off % size != 0) {
-			verbose("misaligned access off %d size %d\n",
-				off, size);
-			return -EACCES;
-		} else {
-			return 0;
-		}
-	}
-
-	if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
-		/* misaligned access to packet is ok on x86,arm,arm64 */
-		return 0;
-
 	if (reg->id && size != 1) {
-		verbose("Unknown packet alignment. Only byte-sized access allowed\n");
+		verbose("Unknown alignment. Only byte-sized access allowed in packet access.\n");
 		return -EACCES;
 	}
 
 	/* skb->data is NET_IP_ALIGN-ed */
-	if (reg->type == PTR_TO_PACKET &&
-	    (NET_IP_ALIGN + reg->off + off) % size != 0) {
+	if ((NET_IP_ALIGN + reg->off + off) % size != 0) {
 		verbose("misaligned packet access off %d+%d+%d size %d\n",
 			NET_IP_ALIGN, reg->off, off, size);
 		return -EACCES;
 	}
+
 	return 0;
 }
 
+static int check_val_ptr_alignment(const struct bpf_reg_state *reg,
+				   int size)
+{
+	if (size != 1) {
+		verbose("Unknown alignment. Only byte-sized access allowed in value access.\n");
+		return -EACCES;
+	}
+
+	return 0;
+}
+
+static int check_ptr_alignment(const struct bpf_reg_state *reg,
+			       int off, int size)
+{
+	switch (reg->type) {
+	case PTR_TO_PACKET:
+		return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ? 0 :
+		       check_pkt_ptr_alignment(reg, off, size);
+	case PTR_TO_MAP_VALUE_ADJ:
+		return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ? 0 :
+		       check_val_ptr_alignment(reg, size);
+	default:
+		if (off % size != 0) {
+			verbose("misaligned access off %d size %d\n",
+				off, size);
+			return -EACCES;
+		}
+
+		return 0;
+	}
+}
+
 /* check whether memory at (regno + off) is accessible for t = (read | write)
  * if t==write, value_regno is a register which value is stored into memory
  * if t==read, value_regno is a register which will receive the value from memory
@@ -818,7 +836,7 @@ static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off,
 	if (size < 0)
 		return size;
 
-	err = check_ptr_alignment(env, reg, off, size);
+	err = check_ptr_alignment(reg, off, size);
 	if (err)
 		return err;
 

From 02ea80b1850e48abbce77878896229d7cc5cb230 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 31 Mar 2017 02:24:04 +0200
Subject: [PATCH 1879/1911] bpf: add various verifier test cases for self-tests

Add a couple of test cases, for example, probing for xadd on a spilled
pointer to packet and map_value_adj register, various other map_value_adj
tests including the unaligned load/store, and trying out pointer arithmetic
on map_value_adj register itself. For the unaligned load/store, we need
to figure out whether the architecture has efficient unaligned access and
need to mark affected tests accordingly.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/include/linux/filter.h                |  10 +
 tools/testing/selftests/bpf/Makefile        |   9 +-
 tools/testing/selftests/bpf/test_verifier.c | 270 +++++++++++++++++++-
 3 files changed, 283 insertions(+), 6 deletions(-)

diff --git a/tools/include/linux/filter.h b/tools/include/linux/filter.h
index 122153b16ea4ee..390d7c9685fd61 100644
--- a/tools/include/linux/filter.h
+++ b/tools/include/linux/filter.h
@@ -168,6 +168,16 @@
 		.off   = OFF,					\
 		.imm   = 0 })
 
+/* Atomic memory add, *(uint *)(dst_reg + off16) += src_reg */
+
+#define BPF_STX_XADD(SIZE, DST, SRC, OFF)			\
+	((struct bpf_insn) {					\
+		.code  = BPF_STX | BPF_SIZE(SIZE) | BPF_XADD,	\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
+		.off   = OFF,					\
+		.imm   = 0 })
+
 /* Memory store, *(uint *) (dst_reg + off16) = imm32 */
 
 #define BPF_ST_MEM(SIZE, DST, OFF, IMM)				\
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 6a1ad58cb66f4a..9af09e8099c0aa 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -1,7 +1,14 @@
 LIBDIR := ../../../lib
 BPFDIR := $(LIBDIR)/bpf
+APIDIR := ../../../include/uapi
+GENDIR := ../../../../include/generated
+GENHDR := $(GENDIR)/autoconf.h
 
-CFLAGS += -Wall -O2 -I../../../include/uapi -I$(LIBDIR)
+ifneq ($(wildcard $(GENHDR)),)
+  GENFLAGS := -DHAVE_GENHDR
+endif
+
+CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS)
 LDLIBS += -lcap
 
 TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 7d761d4cc75907..c848e90b642131 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -30,6 +30,14 @@
 
 #include <bpf/bpf.h>
 
+#ifdef HAVE_GENHDR
+# include "autoconf.h"
+#else
+# if defined(__i386) || defined(__x86_64) || defined(__s390x__) || defined(__aarch64__)
+#  define CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS 1
+# endif
+#endif
+
 #include "../../../include/linux/filter.h"
 
 #ifndef ARRAY_SIZE
@@ -39,6 +47,8 @@
 #define MAX_INSNS	512
 #define MAX_FIXUPS	8
 
+#define F_NEEDS_EFFICIENT_UNALIGNED_ACCESS	(1 << 0)
+
 struct bpf_test {
 	const char *descr;
 	struct bpf_insn	insns[MAX_INSNS];
@@ -53,6 +63,7 @@ struct bpf_test {
 		REJECT
 	} result, result_unpriv;
 	enum bpf_prog_type prog_type;
+	uint8_t flags;
 };
 
 /* Note we want this to be 64 bit aligned so that the end of our array is
@@ -2431,6 +2442,30 @@ static struct bpf_test tests[] = {
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
+	{
+		"direct packet access: test15 (spill with xadd)",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 8),
+			BPF_MOV64_IMM(BPF_REG_5, 4096),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+			BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+			BPF_STX_XADD(BPF_DW, BPF_REG_4, BPF_REG_5, 0),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_4, 0),
+			BPF_STX_MEM(BPF_W, BPF_REG_2, BPF_REG_5, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R2 invalid mem access 'inv'",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
 	{
 		"helper access to packet: test1, valid packet_ptr range",
 		.insns = {
@@ -2934,6 +2969,7 @@ static struct bpf_test tests[] = {
 		.errstr_unpriv = "R0 pointer arithmetic prohibited",
 		.result_unpriv = REJECT,
 		.result = ACCEPT,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 	},
 	{
 		"valid map access into an array with a variable",
@@ -2957,6 +2993,7 @@ static struct bpf_test tests[] = {
 		.errstr_unpriv = "R0 pointer arithmetic prohibited",
 		.result_unpriv = REJECT,
 		.result = ACCEPT,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 	},
 	{
 		"valid map access into an array with a signed variable",
@@ -2984,6 +3021,7 @@ static struct bpf_test tests[] = {
 		.errstr_unpriv = "R0 pointer arithmetic prohibited",
 		.result_unpriv = REJECT,
 		.result = ACCEPT,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 	},
 	{
 		"invalid map access into an array with a constant",
@@ -3025,6 +3063,7 @@ static struct bpf_test tests[] = {
 		.errstr = "R0 min value is outside of the array range",
 		.result_unpriv = REJECT,
 		.result = REJECT,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 	},
 	{
 		"invalid map access into an array with a variable",
@@ -3048,6 +3087,7 @@ static struct bpf_test tests[] = {
 		.errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.",
 		.result_unpriv = REJECT,
 		.result = REJECT,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 	},
 	{
 		"invalid map access into an array with no floor check",
@@ -3074,6 +3114,7 @@ static struct bpf_test tests[] = {
 		.errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.",
 		.result_unpriv = REJECT,
 		.result = REJECT,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 	},
 	{
 		"invalid map access into an array with a invalid max check",
@@ -3100,6 +3141,7 @@ static struct bpf_test tests[] = {
 		.errstr = "invalid access to map value, value_size=48 off=44 size=8",
 		.result_unpriv = REJECT,
 		.result = REJECT,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 	},
 	{
 		"invalid map access into an array with a invalid max check",
@@ -3129,6 +3171,7 @@ static struct bpf_test tests[] = {
 		.errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.",
 		.result_unpriv = REJECT,
 		.result = REJECT,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 	},
 	{
 		"multiple registers share map_lookup_elem result",
@@ -3252,6 +3295,7 @@ static struct bpf_test tests[] = {
 		.result = REJECT,
 		.errstr_unpriv = "R0 pointer arithmetic prohibited",
 		.result_unpriv = REJECT,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 	},
 	{
 		"constant register |= constant should keep constant type",
@@ -3981,7 +4025,208 @@ static struct bpf_test tests[] = {
 		.result_unpriv = REJECT,
 	},
 	{
-		"map element value (adjusted) is preserved across register spilling",
+		"map element value or null is marked on register spilling",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -152),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1, 0),
+			BPF_ST_MEM(BPF_DW, BPF_REG_3, 0, 42),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr_unpriv = "R0 leaks addr",
+		.result = ACCEPT,
+		.result_unpriv = REJECT,
+	},
+	{
+		"map element value store of cleared call register",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+			BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr_unpriv = "R1 !read_ok",
+		.errstr = "R1 !read_ok",
+		.result = REJECT,
+		.result_unpriv = REJECT,
+	},
+	{
+		"map element value with unaligned store",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 17),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 3),
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42),
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 2, 43),
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, -2, 44),
+			BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
+			BPF_ST_MEM(BPF_DW, BPF_REG_8, 0, 32),
+			BPF_ST_MEM(BPF_DW, BPF_REG_8, 2, 33),
+			BPF_ST_MEM(BPF_DW, BPF_REG_8, -2, 34),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_8, 5),
+			BPF_ST_MEM(BPF_DW, BPF_REG_8, 0, 22),
+			BPF_ST_MEM(BPF_DW, BPF_REG_8, 4, 23),
+			BPF_ST_MEM(BPF_DW, BPF_REG_8, -7, 24),
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_8),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 3),
+			BPF_ST_MEM(BPF_DW, BPF_REG_7, 0, 22),
+			BPF_ST_MEM(BPF_DW, BPF_REG_7, 4, 23),
+			BPF_ST_MEM(BPF_DW, BPF_REG_7, -4, 24),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr_unpriv = "R0 pointer arithmetic prohibited",
+		.result = ACCEPT,
+		.result_unpriv = REJECT,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"map element value with unaligned load",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11),
+			BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JGE, BPF_REG_1, MAX_ENTRIES, 9),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 3),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 2),
+			BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_8, 0),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_8, 2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 5),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 4),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr_unpriv = "R0 pointer arithmetic prohibited",
+		.result = ACCEPT,
+		.result_unpriv = REJECT,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"map element value illegal alu op, 1",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr_unpriv = "R0 pointer arithmetic prohibited",
+		.errstr = "invalid mem access 'inv'",
+		.result = REJECT,
+		.result_unpriv = REJECT,
+	},
+	{
+		"map element value illegal alu op, 2",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+			BPF_ALU32_IMM(BPF_ADD, BPF_REG_0, 0),
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr_unpriv = "R0 pointer arithmetic prohibited",
+		.errstr = "invalid mem access 'inv'",
+		.result = REJECT,
+		.result_unpriv = REJECT,
+	},
+	{
+		"map element value illegal alu op, 3",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+			BPF_ALU64_IMM(BPF_DIV, BPF_REG_0, 42),
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr_unpriv = "R0 pointer arithmetic prohibited",
+		.errstr = "invalid mem access 'inv'",
+		.result = REJECT,
+		.result_unpriv = REJECT,
+	},
+	{
+		"map element value illegal alu op, 4",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+			BPF_ENDIAN(BPF_FROM_BE, BPF_REG_0, 64),
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr_unpriv = "R0 pointer arithmetic prohibited",
+		.errstr = "invalid mem access 'inv'",
+		.result = REJECT,
+		.result_unpriv = REJECT,
+	},
+	{
+		"map element value illegal alu op, 5",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+			BPF_MOV64_IMM(BPF_REG_3, 4096),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
+			BPF_STX_XADD(BPF_DW, BPF_REG_2, BPF_REG_3, 0),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 0),
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr_unpriv = "R0 invalid mem access 'inv'",
+		.errstr = "R0 invalid mem access 'inv'",
+		.result = REJECT,
+		.result_unpriv = REJECT,
+	},
+	{
+		"map element value is preserved across register spilling",
 		.insns = {
 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
@@ -4003,6 +4248,7 @@ static struct bpf_test tests[] = {
 		.errstr_unpriv = "R0 pointer arithmetic prohibited",
 		.result = ACCEPT,
 		.result_unpriv = REJECT,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 	},
 	{
 		"helper access to variable memory: stack, bitwise AND + JMP, correct bounds",
@@ -4441,6 +4687,7 @@ static struct bpf_test tests[] = {
 		.errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.",
 		.result = REJECT,
 		.result_unpriv = REJECT,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 	},
 	{
 		"invalid range check",
@@ -4472,6 +4719,7 @@ static struct bpf_test tests[] = {
 		.errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.",
 		.result = REJECT,
 		.result_unpriv = REJECT,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 	}
 };
 
@@ -4550,11 +4798,11 @@ static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog,
 static void do_test_single(struct bpf_test *test, bool unpriv,
 			   int *passes, int *errors)
 {
+	int fd_prog, expected_ret, reject_from_alignment;
 	struct bpf_insn *prog = test->insns;
 	int prog_len = probe_filter_length(prog);
 	int prog_type = test->prog_type;
 	int fd_f1 = -1, fd_f2 = -1, fd_f3 = -1;
-	int fd_prog, expected_ret;
 	const char *expected_err;
 
 	do_test_fixup(test, prog, &fd_f1, &fd_f2, &fd_f3);
@@ -4567,8 +4815,19 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 		       test->result_unpriv : test->result;
 	expected_err = unpriv && test->errstr_unpriv ?
 		       test->errstr_unpriv : test->errstr;
+
+	reject_from_alignment = fd_prog < 0 &&
+				(test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS) &&
+				strstr(bpf_vlog, "Unknown alignment.");
+#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+	if (reject_from_alignment) {
+		printf("FAIL\nFailed due to alignment despite having efficient unaligned access: '%s'!\n",
+		       strerror(errno));
+		goto fail_log;
+	}
+#endif
 	if (expected_ret == ACCEPT) {
-		if (fd_prog < 0) {
+		if (fd_prog < 0 && !reject_from_alignment) {
 			printf("FAIL\nFailed to load prog '%s'!\n",
 			       strerror(errno));
 			goto fail_log;
@@ -4578,14 +4837,15 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 			printf("FAIL\nUnexpected success to load!\n");
 			goto fail_log;
 		}
-		if (!strstr(bpf_vlog, expected_err)) {
+		if (!strstr(bpf_vlog, expected_err) && !reject_from_alignment) {
 			printf("FAIL\nUnexpected error message!\n");
 			goto fail_log;
 		}
 	}
 
 	(*passes)++;
-	printf("OK\n");
+	printf("OK%s\n", reject_from_alignment ?
+	       " (NOTE: reject due to unknown alignment)" : "");
 close_fds:
 	close(fd_prog);
 	close(fd_f1);

From afe89962ee0799955b606cc7637ac86a296923a6 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Fri, 31 Mar 2017 17:57:28 +0800
Subject: [PATCH 1880/1911] sctp: use right in and out stream cnt

Since sctp reconf was added in sctp, the real cnt of in/out stream
have not been c.sinit_max_instreams and c.sinit_num_ostreams any
more.

This patch is to replace them with stream->in/outcnt.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/outqueue.c     |  3 +--
 net/sctp/proc.c         |  4 ++--
 net/sctp/sm_statefuns.c |  6 +++---
 net/sctp/socket.c       | 10 +++++-----
 4 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 025ccff670720b..8081476ed313cc 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -1026,8 +1026,7 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
 			/* RFC 2960 6.5 Every DATA chunk MUST carry a valid
 			 * stream identifier.
 			 */
-			if (chunk->sinfo.sinfo_stream >=
-			    asoc->c.sinit_num_ostreams) {
+			if (chunk->sinfo.sinfo_stream >= asoc->stream->outcnt) {
 
 				/* Mark as failed send. */
 				sctp_chunk_fail(chunk, SCTP_ERROR_INV_STRM);
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index 206377fe91ec4d..a0b29d43627f48 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -361,8 +361,8 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
 	sctp_seq_dump_remote_addrs(seq, assoc);
 	seq_printf(seq, "\t%8lu %5d %5d %4d %4d %4d %8d "
 		   "%8d %8d %8d %8d",
-		assoc->hbinterval, assoc->c.sinit_max_instreams,
-		assoc->c.sinit_num_ostreams, assoc->max_retrans,
+		assoc->hbinterval, assoc->stream->incnt,
+		assoc->stream->outcnt, assoc->max_retrans,
 		assoc->init_retries, assoc->shutdown_retries,
 		assoc->rtx_data_chunks,
 		atomic_read(&sk->sk_wmem_alloc),
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index e03bb1aab4d095..24c6ccce753909 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -3946,7 +3946,7 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn(struct net *net,
 
 	/* Silently discard the chunk if stream-id is not valid */
 	sctp_walk_fwdtsn(skip, chunk) {
-		if (ntohs(skip->stream) >= asoc->c.sinit_max_instreams)
+		if (ntohs(skip->stream) >= asoc->stream->incnt)
 			goto discard_noforce;
 	}
 
@@ -4017,7 +4017,7 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn_fast(
 
 	/* Silently discard the chunk if stream-id is not valid */
 	sctp_walk_fwdtsn(skip, chunk) {
-		if (ntohs(skip->stream) >= asoc->c.sinit_max_instreams)
+		if (ntohs(skip->stream) >= asoc->stream->incnt)
 			goto gen_shutdown;
 	}
 
@@ -6353,7 +6353,7 @@ static int sctp_eat_data(const struct sctp_association *asoc,
 	 * and discard the DATA chunk.
 	 */
 	sid = ntohs(data_hdr->stream);
-	if (sid >= asoc->c.sinit_max_instreams) {
+	if (sid >= asoc->stream->incnt) {
 		/* Mark tsn as received even though we drop it */
 		sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_TSN, SCTP_U32(tsn));
 
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index baa269a0d52ee3..12fbae2c1002e3 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1920,7 +1920,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
 	}
 
 	/* Check for invalid stream. */
-	if (sinfo->sinfo_stream >= asoc->c.sinit_num_ostreams) {
+	if (sinfo->sinfo_stream >= asoc->stream->outcnt) {
 		err = -EINVAL;
 		goto out_free;
 	}
@@ -4461,8 +4461,8 @@ int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc,
 	info->sctpi_rwnd = asoc->a_rwnd;
 	info->sctpi_unackdata = asoc->unack_data;
 	info->sctpi_penddata = sctp_tsnmap_pending(&asoc->peer.tsn_map);
-	info->sctpi_instrms = asoc->c.sinit_max_instreams;
-	info->sctpi_outstrms = asoc->c.sinit_num_ostreams;
+	info->sctpi_instrms = asoc->stream->incnt;
+	info->sctpi_outstrms = asoc->stream->outcnt;
 	list_for_each(pos, &asoc->base.inqueue.in_chunk_list)
 		info->sctpi_inqueue++;
 	list_for_each(pos, &asoc->outqueue.out_chunk_list)
@@ -4691,8 +4691,8 @@ static int sctp_getsockopt_sctp_status(struct sock *sk, int len,
 	status.sstat_unackdata = asoc->unack_data;
 
 	status.sstat_penddata = sctp_tsnmap_pending(&asoc->peer.tsn_map);
-	status.sstat_instrms = asoc->c.sinit_max_instreams;
-	status.sstat_outstrms = asoc->c.sinit_num_ostreams;
+	status.sstat_instrms = asoc->stream->incnt;
+	status.sstat_outstrms = asoc->stream->outcnt;
 	status.sstat_fragmentation_point = asoc->frag_point;
 	status.sstat_primary.spinfo_assoc_id = sctp_assoc2id(transport->asoc);
 	memcpy(&status.sstat_primary.spinfo_address, &transport->ipaddr,

From 61b9a047729bb230978178bca6729689d0c50ca2 Mon Sep 17 00:00:00 2001
From: Guillaume Nault <g.nault@alphalink.fr>
Date: Fri, 31 Mar 2017 13:02:25 +0200
Subject: [PATCH 1881/1911] l2tp: fix race in l2tp_recv_common()

Taking a reference on sessions in l2tp_recv_common() is racy; this
has to be done by the callers.

To this end, a new function is required (l2tp_session_get()) to
atomically lookup a session and take a reference on it. Callers then
have to manually drop this reference.

Fixes: fd558d186df2 ("l2tp: Split pppol2tp patch into separate l2tp and ppp parts")
Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_core.c | 73 ++++++++++++++++++++++++++++++++++++--------
 net/l2tp/l2tp_core.h |  3 ++
 net/l2tp/l2tp_ip.c   | 17 ++++++++---
 net/l2tp/l2tp_ip6.c  | 18 ++++++++---
 4 files changed, 88 insertions(+), 23 deletions(-)

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 8adab6335ced9f..8a067536d15c4e 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -278,6 +278,55 @@ struct l2tp_session *l2tp_session_find(struct net *net, struct l2tp_tunnel *tunn
 }
 EXPORT_SYMBOL_GPL(l2tp_session_find);
 
+/* Like l2tp_session_find() but takes a reference on the returned session.
+ * Optionally calls session->ref() too if do_ref is true.
+ */
+struct l2tp_session *l2tp_session_get(struct net *net,
+				      struct l2tp_tunnel *tunnel,
+				      u32 session_id, bool do_ref)
+{
+	struct hlist_head *session_list;
+	struct l2tp_session *session;
+
+	if (!tunnel) {
+		struct l2tp_net *pn = l2tp_pernet(net);
+
+		session_list = l2tp_session_id_hash_2(pn, session_id);
+
+		rcu_read_lock_bh();
+		hlist_for_each_entry_rcu(session, session_list, global_hlist) {
+			if (session->session_id == session_id) {
+				l2tp_session_inc_refcount(session);
+				if (do_ref && session->ref)
+					session->ref(session);
+				rcu_read_unlock_bh();
+
+				return session;
+			}
+		}
+		rcu_read_unlock_bh();
+
+		return NULL;
+	}
+
+	session_list = l2tp_session_id_hash(tunnel, session_id);
+	read_lock_bh(&tunnel->hlist_lock);
+	hlist_for_each_entry(session, session_list, hlist) {
+		if (session->session_id == session_id) {
+			l2tp_session_inc_refcount(session);
+			if (do_ref && session->ref)
+				session->ref(session);
+			read_unlock_bh(&tunnel->hlist_lock);
+
+			return session;
+		}
+	}
+	read_unlock_bh(&tunnel->hlist_lock);
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(l2tp_session_get);
+
 struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth)
 {
 	int hash;
@@ -633,6 +682,9 @@ static int l2tp_recv_data_seq(struct l2tp_session *session, struct sk_buff *skb)
  * a data (not control) frame before coming here. Fields up to the
  * session-id have already been parsed and ptr points to the data
  * after the session-id.
+ *
+ * session->ref() must have been called prior to l2tp_recv_common().
+ * session->deref() will be called automatically after skb is processed.
  */
 void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
 		      unsigned char *ptr, unsigned char *optr, u16 hdrflags,
@@ -642,14 +694,6 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
 	int offset;
 	u32 ns, nr;
 
-	/* The ref count is increased since we now hold a pointer to
-	 * the session. Take care to decrement the refcnt when exiting
-	 * this function from now on...
-	 */
-	l2tp_session_inc_refcount(session);
-	if (session->ref)
-		(*session->ref)(session);
-
 	/* Parse and check optional cookie */
 	if (session->peer_cookie_len > 0) {
 		if (memcmp(ptr, &session->peer_cookie[0], session->peer_cookie_len)) {
@@ -802,8 +846,6 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
 	/* Try to dequeue as many skbs from reorder_q as we can. */
 	l2tp_recv_dequeue(session);
 
-	l2tp_session_dec_refcount(session);
-
 	return;
 
 discard:
@@ -812,8 +854,6 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
 
 	if (session->deref)
 		(*session->deref)(session);
-
-	l2tp_session_dec_refcount(session);
 }
 EXPORT_SYMBOL(l2tp_recv_common);
 
@@ -920,8 +960,14 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
 	}
 
 	/* Find the session context */
-	session = l2tp_session_find(tunnel->l2tp_net, tunnel, session_id);
+	session = l2tp_session_get(tunnel->l2tp_net, tunnel, session_id, true);
 	if (!session || !session->recv_skb) {
+		if (session) {
+			if (session->deref)
+				session->deref(session);
+			l2tp_session_dec_refcount(session);
+		}
+
 		/* Not found? Pass to userspace to deal with */
 		l2tp_info(tunnel, L2TP_MSG_DATA,
 			  "%s: no session found (%u/%u). Passing up.\n",
@@ -930,6 +976,7 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
 	}
 
 	l2tp_recv_common(session, skb, ptr, optr, hdrflags, length, payload_hook);
+	l2tp_session_dec_refcount(session);
 
 	return 0;
 
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index aebf281d09eeb3..4544e81a3d2730 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -230,6 +230,9 @@ static inline struct l2tp_tunnel *l2tp_sock_to_tunnel(struct sock *sk)
 	return tunnel;
 }
 
+struct l2tp_session *l2tp_session_get(struct net *net,
+				      struct l2tp_tunnel *tunnel,
+				      u32 session_id, bool do_ref);
 struct l2tp_session *l2tp_session_find(struct net *net,
 				       struct l2tp_tunnel *tunnel,
 				       u32 session_id);
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 7208fbe5856bba..4d322c1b7233e5 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -143,19 +143,19 @@ static int l2tp_ip_recv(struct sk_buff *skb)
 	}
 
 	/* Ok, this is a data packet. Lookup the session. */
-	session = l2tp_session_find(net, NULL, session_id);
-	if (session == NULL)
+	session = l2tp_session_get(net, NULL, session_id, true);
+	if (!session)
 		goto discard;
 
 	tunnel = session->tunnel;
-	if (tunnel == NULL)
-		goto discard;
+	if (!tunnel)
+		goto discard_sess;
 
 	/* Trace packet contents, if enabled */
 	if (tunnel->debug & L2TP_MSG_DATA) {
 		length = min(32u, skb->len);
 		if (!pskb_may_pull(skb, length))
-			goto discard;
+			goto discard_sess;
 
 		/* Point to L2TP header */
 		optr = ptr = skb->data;
@@ -165,6 +165,7 @@ static int l2tp_ip_recv(struct sk_buff *skb)
 	}
 
 	l2tp_recv_common(session, skb, ptr, optr, 0, skb->len, tunnel->recv_payload_hook);
+	l2tp_session_dec_refcount(session);
 
 	return 0;
 
@@ -203,6 +204,12 @@ static int l2tp_ip_recv(struct sk_buff *skb)
 
 	return sk_receive_skb(sk, skb, 1);
 
+discard_sess:
+	if (session->deref)
+		session->deref(session);
+	l2tp_session_dec_refcount(session);
+	goto discard;
+
 discard_put:
 	sock_put(sk);
 
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 516d7ce24ba7b4..88b397c30d86af 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -156,19 +156,19 @@ static int l2tp_ip6_recv(struct sk_buff *skb)
 	}
 
 	/* Ok, this is a data packet. Lookup the session. */
-	session = l2tp_session_find(net, NULL, session_id);
-	if (session == NULL)
+	session = l2tp_session_get(net, NULL, session_id, true);
+	if (!session)
 		goto discard;
 
 	tunnel = session->tunnel;
-	if (tunnel == NULL)
-		goto discard;
+	if (!tunnel)
+		goto discard_sess;
 
 	/* Trace packet contents, if enabled */
 	if (tunnel->debug & L2TP_MSG_DATA) {
 		length = min(32u, skb->len);
 		if (!pskb_may_pull(skb, length))
-			goto discard;
+			goto discard_sess;
 
 		/* Point to L2TP header */
 		optr = ptr = skb->data;
@@ -179,6 +179,8 @@ static int l2tp_ip6_recv(struct sk_buff *skb)
 
 	l2tp_recv_common(session, skb, ptr, optr, 0, skb->len,
 			 tunnel->recv_payload_hook);
+	l2tp_session_dec_refcount(session);
+
 	return 0;
 
 pass_up:
@@ -216,6 +218,12 @@ static int l2tp_ip6_recv(struct sk_buff *skb)
 
 	return sk_receive_skb(sk, skb, 1);
 
+discard_sess:
+	if (session->deref)
+		session->deref(session);
+	l2tp_session_dec_refcount(session);
+	goto discard;
+
 discard_put:
 	sock_put(sk);
 

From 57377d63547861919ee634b845c7caa38de4a452 Mon Sep 17 00:00:00 2001
From: Guillaume Nault <g.nault@alphalink.fr>
Date: Fri, 31 Mar 2017 13:02:26 +0200
Subject: [PATCH 1882/1911] l2tp: ensure session can't get removed during
 pppol2tp_session_ioctl()

Holding a reference on session is required before calling
pppol2tp_session_ioctl(). The session could get freed while processing the
ioctl otherwise. Since pppol2tp_session_ioctl() uses the session's socket,
we also need to take a reference on it in l2tp_session_get().

Fixes: fd558d186df2 ("l2tp: Split pppol2tp patch into separate l2tp and ppp parts")
Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_ppp.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 123b6a2411a038..827e55c41ba29a 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -1141,11 +1141,18 @@ static int pppol2tp_tunnel_ioctl(struct l2tp_tunnel *tunnel,
 		if (stats.session_id != 0) {
 			/* resend to session ioctl handler */
 			struct l2tp_session *session =
-				l2tp_session_find(sock_net(sk), tunnel, stats.session_id);
-			if (session != NULL)
-				err = pppol2tp_session_ioctl(session, cmd, arg);
-			else
+				l2tp_session_get(sock_net(sk), tunnel,
+						 stats.session_id, true);
+
+			if (session) {
+				err = pppol2tp_session_ioctl(session, cmd,
+							     arg);
+				if (session->deref)
+					session->deref(session);
+				l2tp_session_dec_refcount(session);
+			} else {
 				err = -EBADR;
+			}
 			break;
 		}
 #ifdef CONFIG_XFRM

From dbdbc73b44782e22b3b4b6e8b51e7a3d245f3086 Mon Sep 17 00:00:00 2001
From: Guillaume Nault <g.nault@alphalink.fr>
Date: Fri, 31 Mar 2017 13:02:27 +0200
Subject: [PATCH 1883/1911] l2tp: fix duplicate session creation

l2tp_session_create() relies on its caller for checking for duplicate
sessions. This is racy since a session can be concurrently inserted
after the caller's verification.

Fix this by letting l2tp_session_create() verify sessions uniqueness
upon insertion. Callers need to be adapted to check for
l2tp_session_create()'s return code instead of calling
l2tp_session_find().

pppol2tp_connect() is a bit special because it has to work on existing
sessions (if they're not connected) or to create a new session if none
is found. When acting on a preexisting session, a reference must be
held or it could go away on us. So we have to use l2tp_session_get()
instead of l2tp_session_find() and drop the reference before exiting.

Fixes: d9e31d17ceba ("l2tp: Add L2TP ethernet pseudowire support")
Fixes: fd558d186df2 ("l2tp: Split pppol2tp patch into separate l2tp and ppp parts")
Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_core.c | 70 +++++++++++++++++++++++++++++++++-----------
 net/l2tp/l2tp_eth.c  | 10 ++-----
 net/l2tp/l2tp_ppp.c  | 60 ++++++++++++++++++-------------------
 3 files changed, 84 insertions(+), 56 deletions(-)

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 8a067536d15c4e..46b450a1bc211b 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -374,6 +374,48 @@ struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname)
 }
 EXPORT_SYMBOL_GPL(l2tp_session_find_by_ifname);
 
+static int l2tp_session_add_to_tunnel(struct l2tp_tunnel *tunnel,
+				      struct l2tp_session *session)
+{
+	struct l2tp_session *session_walk;
+	struct hlist_head *g_head;
+	struct hlist_head *head;
+	struct l2tp_net *pn;
+
+	head = l2tp_session_id_hash(tunnel, session->session_id);
+
+	write_lock_bh(&tunnel->hlist_lock);
+	hlist_for_each_entry(session_walk, head, hlist)
+		if (session_walk->session_id == session->session_id)
+			goto exist;
+
+	if (tunnel->version == L2TP_HDR_VER_3) {
+		pn = l2tp_pernet(tunnel->l2tp_net);
+		g_head = l2tp_session_id_hash_2(l2tp_pernet(tunnel->l2tp_net),
+						session->session_id);
+
+		spin_lock_bh(&pn->l2tp_session_hlist_lock);
+		hlist_for_each_entry(session_walk, g_head, global_hlist)
+			if (session_walk->session_id == session->session_id)
+				goto exist_glob;
+
+		hlist_add_head_rcu(&session->global_hlist, g_head);
+		spin_unlock_bh(&pn->l2tp_session_hlist_lock);
+	}
+
+	hlist_add_head(&session->hlist, head);
+	write_unlock_bh(&tunnel->hlist_lock);
+
+	return 0;
+
+exist_glob:
+	spin_unlock_bh(&pn->l2tp_session_hlist_lock);
+exist:
+	write_unlock_bh(&tunnel->hlist_lock);
+
+	return -EEXIST;
+}
+
 /* Lookup a tunnel by id
  */
 struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id)
@@ -1785,6 +1827,7 @@ EXPORT_SYMBOL_GPL(l2tp_session_set_header_len);
 struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg)
 {
 	struct l2tp_session *session;
+	int err;
 
 	session = kzalloc(sizeof(struct l2tp_session) + priv_size, GFP_KERNEL);
 	if (session != NULL) {
@@ -1840,6 +1883,13 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn
 
 		l2tp_session_set_header_len(session, tunnel->version);
 
+		err = l2tp_session_add_to_tunnel(tunnel, session);
+		if (err) {
+			kfree(session);
+
+			return ERR_PTR(err);
+		}
+
 		/* Bump the reference count. The session context is deleted
 		 * only when this drops to zero.
 		 */
@@ -1849,28 +1899,14 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn
 		/* Ensure tunnel socket isn't deleted */
 		sock_hold(tunnel->sock);
 
-		/* Add session to the tunnel's hash list */
-		write_lock_bh(&tunnel->hlist_lock);
-		hlist_add_head(&session->hlist,
-			       l2tp_session_id_hash(tunnel, session_id));
-		write_unlock_bh(&tunnel->hlist_lock);
-
-		/* And to the global session list if L2TPv3 */
-		if (tunnel->version != L2TP_HDR_VER_2) {
-			struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net);
-
-			spin_lock_bh(&pn->l2tp_session_hlist_lock);
-			hlist_add_head_rcu(&session->global_hlist,
-					   l2tp_session_id_hash_2(pn, session_id));
-			spin_unlock_bh(&pn->l2tp_session_hlist_lock);
-		}
-
 		/* Ignore management session in session count value */
 		if (session->session_id != 0)
 			atomic_inc(&l2tp_session_count);
+
+		return session;
 	}
 
-	return session;
+	return ERR_PTR(-ENOMEM);
 }
 EXPORT_SYMBOL_GPL(l2tp_session_create);
 
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 8bf18a5f66e0c4..6fd41d7afe1ef2 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -221,12 +221,6 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p
 		goto out;
 	}
 
-	session = l2tp_session_find(net, tunnel, session_id);
-	if (session) {
-		rc = -EEXIST;
-		goto out;
-	}
-
 	if (cfg->ifname) {
 		dev = dev_get_by_name(net, cfg->ifname);
 		if (dev) {
@@ -240,8 +234,8 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p
 
 	session = l2tp_session_create(sizeof(*spriv), tunnel, session_id,
 				      peer_session_id, cfg);
-	if (!session) {
-		rc = -ENOMEM;
+	if (IS_ERR(session)) {
+		rc = PTR_ERR(session);
 		goto out;
 	}
 
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 827e55c41ba29a..26501902d1a7fe 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -583,6 +583,7 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
 	int error = 0;
 	u32 tunnel_id, peer_tunnel_id;
 	u32 session_id, peer_session_id;
+	bool drop_refcnt = false;
 	int ver = 2;
 	int fd;
 
@@ -684,36 +685,36 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
 	if (tunnel->peer_tunnel_id == 0)
 		tunnel->peer_tunnel_id = peer_tunnel_id;
 
-	/* Create session if it doesn't already exist. We handle the
-	 * case where a session was previously created by the netlink
-	 * interface by checking that the session doesn't already have
-	 * a socket and its tunnel socket are what we expect. If any
-	 * of those checks fail, return EEXIST to the caller.
-	 */
-	session = l2tp_session_find(sock_net(sk), tunnel, session_id);
-	if (session == NULL) {
-		/* Default MTU must allow space for UDP/L2TP/PPP
-		 * headers.
+	session = l2tp_session_get(sock_net(sk), tunnel, session_id, false);
+	if (session) {
+		drop_refcnt = true;
+		ps = l2tp_session_priv(session);
+
+		/* Using a pre-existing session is fine as long as it hasn't
+		 * been connected yet.
 		 */
-		cfg.mtu = cfg.mru = 1500 - PPPOL2TP_HEADER_OVERHEAD;
+		if (ps->sock) {
+			error = -EEXIST;
+			goto end;
+		}
 
-		/* Allocate and initialize a new session context. */
-		session = l2tp_session_create(sizeof(struct pppol2tp_session),
-					      tunnel, session_id,
-					      peer_session_id, &cfg);
-		if (session == NULL) {
-			error = -ENOMEM;
+		/* consistency checks */
+		if (ps->tunnel_sock != tunnel->sock) {
+			error = -EEXIST;
 			goto end;
 		}
 	} else {
-		ps = l2tp_session_priv(session);
-		error = -EEXIST;
-		if (ps->sock != NULL)
-			goto end;
+		/* Default MTU must allow space for UDP/L2TP/PPP headers */
+		cfg.mtu = 1500 - PPPOL2TP_HEADER_OVERHEAD;
+		cfg.mru = cfg.mtu;
 
-		/* consistency checks */
-		if (ps->tunnel_sock != tunnel->sock)
+		session = l2tp_session_create(sizeof(struct pppol2tp_session),
+					      tunnel, session_id,
+					      peer_session_id, &cfg);
+		if (IS_ERR(session)) {
+			error = PTR_ERR(session);
 			goto end;
+		}
 	}
 
 	/* Associate session with its PPPoL2TP socket */
@@ -778,6 +779,8 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
 		  session->name);
 
 end:
+	if (drop_refcnt)
+		l2tp_session_dec_refcount(session);
 	release_sock(sk);
 
 	return error;
@@ -805,12 +808,6 @@ static int pppol2tp_session_create(struct net *net, u32 tunnel_id, u32 session_i
 	if (tunnel->sock == NULL)
 		goto out;
 
-	/* Check that this session doesn't already exist */
-	error = -EEXIST;
-	session = l2tp_session_find(net, tunnel, session_id);
-	if (session != NULL)
-		goto out;
-
 	/* Default MTU values. */
 	if (cfg->mtu == 0)
 		cfg->mtu = 1500 - PPPOL2TP_HEADER_OVERHEAD;
@@ -818,12 +815,13 @@ static int pppol2tp_session_create(struct net *net, u32 tunnel_id, u32 session_i
 		cfg->mru = cfg->mtu;
 
 	/* Allocate and initialize a new session context. */
-	error = -ENOMEM;
 	session = l2tp_session_create(sizeof(struct pppol2tp_session),
 				      tunnel, session_id,
 				      peer_session_id, cfg);
-	if (session == NULL)
+	if (IS_ERR(session)) {
+		error = PTR_ERR(session);
 		goto out;
+	}
 
 	ps = l2tp_session_priv(session);
 	ps->tunnel_sock = tunnel->sock;

From 5e6a9e5a3554a5b3db09cdc22253af1849c65dff Mon Sep 17 00:00:00 2001
From: Guillaume Nault <g.nault@alphalink.fr>
Date: Fri, 31 Mar 2017 13:02:29 +0200
Subject: [PATCH 1884/1911] l2tp: hold session while sending creation
 notifications

l2tp_session_find() doesn't take any reference on the returned session.
Therefore, the session may disappear while sending the notification.

Use l2tp_session_get() instead and decrement session's refcount once
the notification is sent.

Fixes: 33f72e6f0c67 ("l2tp : multicast notification to the registered listeners")
Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_netlink.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index 3620fba317863d..f1b68effb0770d 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -642,10 +642,12 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
 			session_id, peer_session_id, &cfg);
 
 	if (ret >= 0) {
-		session = l2tp_session_find(net, tunnel, session_id);
-		if (session)
+		session = l2tp_session_get(net, tunnel, session_id, false);
+		if (session) {
 			ret = l2tp_session_notify(&l2tp_nl_family, info, session,
 						  L2TP_CMD_SESSION_CREATE);
+			l2tp_session_dec_refcount(session);
+		}
 	}
 
 out:

From 2777e2ab5a9cf2b4524486c6db1517a6ded25261 Mon Sep 17 00:00:00 2001
From: Guillaume Nault <g.nault@alphalink.fr>
Date: Fri, 31 Mar 2017 13:02:30 +0200
Subject: [PATCH 1885/1911] l2tp: take a reference on sessions used in
 genetlink handlers

Callers of l2tp_nl_session_find() need to hold a reference on the
returned session since there's no guarantee that it isn't going to
disappear from under them.

Relying on the fact that no l2tp netlink message may be processed
concurrently isn't enough: sessions can be deleted by other means
(e.g. by closing the PPPOL2TP socket of a ppp pseudowire).

l2tp_nl_cmd_session_delete() is a bit special: it runs a callback
function that may require a previous call to session->ref(). In
particular, for ppp pseudowires, the callback is l2tp_session_delete(),
which then calls pppol2tp_session_close() and dereferences the PPPOL2TP
socket. The socket might already be gone at the moment
l2tp_session_delete() calls session->ref(), so we need to take a
reference during the session lookup. So we need to pass the do_ref
variable down to l2tp_session_get() and l2tp_session_get_by_ifname().

Since all callers have to be updated, l2tp_session_find_by_ifname() and
l2tp_nl_session_find() are renamed to reflect their new behaviour.

Fixes: 309795f4bec2 ("l2tp: Add netlink control API for L2TP")
Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_core.c    |  9 +++++++--
 net/l2tp/l2tp_core.h    |  3 ++-
 net/l2tp/l2tp_netlink.c | 39 ++++++++++++++++++++++++++-------------
 3 files changed, 35 insertions(+), 16 deletions(-)

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 46b450a1bc211b..e927422d8c585f 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -352,7 +352,8 @@ EXPORT_SYMBOL_GPL(l2tp_session_find_nth);
 /* Lookup a session by interface name.
  * This is very inefficient but is only used by management interfaces.
  */
-struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname)
+struct l2tp_session *l2tp_session_get_by_ifname(struct net *net, char *ifname,
+						bool do_ref)
 {
 	struct l2tp_net *pn = l2tp_pernet(net);
 	int hash;
@@ -362,7 +363,11 @@ struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname)
 	for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++) {
 		hlist_for_each_entry_rcu(session, &pn->l2tp_session_hlist[hash], global_hlist) {
 			if (!strcmp(session->ifname, ifname)) {
+				l2tp_session_inc_refcount(session);
+				if (do_ref && session->ref)
+					session->ref(session);
 				rcu_read_unlock_bh();
+
 				return session;
 			}
 		}
@@ -372,7 +377,7 @@ struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname)
 
 	return NULL;
 }
-EXPORT_SYMBOL_GPL(l2tp_session_find_by_ifname);
+EXPORT_SYMBOL_GPL(l2tp_session_get_by_ifname);
 
 static int l2tp_session_add_to_tunnel(struct l2tp_tunnel *tunnel,
 				      struct l2tp_session *session)
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 4544e81a3d2730..3b9b704a84e44d 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -237,7 +237,8 @@ struct l2tp_session *l2tp_session_find(struct net *net,
 				       struct l2tp_tunnel *tunnel,
 				       u32 session_id);
 struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth);
-struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname);
+struct l2tp_session *l2tp_session_get_by_ifname(struct net *net, char *ifname,
+						bool do_ref);
 struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id);
 struct l2tp_tunnel *l2tp_tunnel_find_nth(struct net *net, int nth);
 
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index f1b68effb0770d..93e317377c6655 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -48,7 +48,8 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq,
 /* Accessed under genl lock */
 static const struct l2tp_nl_cmd_ops *l2tp_nl_cmd_ops[__L2TP_PWTYPE_MAX];
 
-static struct l2tp_session *l2tp_nl_session_find(struct genl_info *info)
+static struct l2tp_session *l2tp_nl_session_get(struct genl_info *info,
+						bool do_ref)
 {
 	u32 tunnel_id;
 	u32 session_id;
@@ -59,14 +60,15 @@ static struct l2tp_session *l2tp_nl_session_find(struct genl_info *info)
 
 	if (info->attrs[L2TP_ATTR_IFNAME]) {
 		ifname = nla_data(info->attrs[L2TP_ATTR_IFNAME]);
-		session = l2tp_session_find_by_ifname(net, ifname);
+		session = l2tp_session_get_by_ifname(net, ifname, do_ref);
 	} else if ((info->attrs[L2TP_ATTR_SESSION_ID]) &&
 		   (info->attrs[L2TP_ATTR_CONN_ID])) {
 		tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
 		session_id = nla_get_u32(info->attrs[L2TP_ATTR_SESSION_ID]);
 		tunnel = l2tp_tunnel_find(net, tunnel_id);
 		if (tunnel)
-			session = l2tp_session_find(net, tunnel, session_id);
+			session = l2tp_session_get(net, tunnel, session_id,
+						   do_ref);
 	}
 
 	return session;
@@ -660,7 +662,7 @@ static int l2tp_nl_cmd_session_delete(struct sk_buff *skb, struct genl_info *inf
 	struct l2tp_session *session;
 	u16 pw_type;
 
-	session = l2tp_nl_session_find(info);
+	session = l2tp_nl_session_get(info, true);
 	if (session == NULL) {
 		ret = -ENODEV;
 		goto out;
@@ -674,6 +676,10 @@ static int l2tp_nl_cmd_session_delete(struct sk_buff *skb, struct genl_info *inf
 		if (l2tp_nl_cmd_ops[pw_type] && l2tp_nl_cmd_ops[pw_type]->session_delete)
 			ret = (*l2tp_nl_cmd_ops[pw_type]->session_delete)(session);
 
+	if (session->deref)
+		session->deref(session);
+	l2tp_session_dec_refcount(session);
+
 out:
 	return ret;
 }
@@ -683,7 +689,7 @@ static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *inf
 	int ret = 0;
 	struct l2tp_session *session;
 
-	session = l2tp_nl_session_find(info);
+	session = l2tp_nl_session_get(info, false);
 	if (session == NULL) {
 		ret = -ENODEV;
 		goto out;
@@ -718,6 +724,8 @@ static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *inf
 	ret = l2tp_session_notify(&l2tp_nl_family, info,
 				  session, L2TP_CMD_SESSION_MODIFY);
 
+	l2tp_session_dec_refcount(session);
+
 out:
 	return ret;
 }
@@ -813,29 +821,34 @@ static int l2tp_nl_cmd_session_get(struct sk_buff *skb, struct genl_info *info)
 	struct sk_buff *msg;
 	int ret;
 
-	session = l2tp_nl_session_find(info);
+	session = l2tp_nl_session_get(info, false);
 	if (session == NULL) {
 		ret = -ENODEV;
-		goto out;
+		goto err;
 	}
 
 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
 	if (!msg) {
 		ret = -ENOMEM;
-		goto out;
+		goto err_ref;
 	}
 
 	ret = l2tp_nl_session_send(msg, info->snd_portid, info->snd_seq,
 				   0, session, L2TP_CMD_SESSION_GET);
 	if (ret < 0)
-		goto err_out;
+		goto err_ref_msg;
 
-	return genlmsg_unicast(genl_info_net(info), msg, info->snd_portid);
+	ret = genlmsg_unicast(genl_info_net(info), msg, info->snd_portid);
 
-err_out:
-	nlmsg_free(msg);
+	l2tp_session_dec_refcount(session);
 
-out:
+	return ret;
+
+err_ref_msg:
+	nlmsg_free(msg);
+err_ref:
+	l2tp_session_dec_refcount(session);
+err:
 	return ret;
 }
 

From a71c9a1c779f2499fb2afc0553e543f18aff6edf Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 2 Apr 2017 17:23:54 -0700
Subject: [PATCH 1886/1911] Linux 4.11-rc5

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 231e6a7749bd89..e11989d36c8752 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 4
 PATCHLEVEL = 11
 SUBLEVEL = 0
-EXTRAVERSION = -rc4
+EXTRAVERSION = -rc5
 NAME = Fearless Coyote
 
 # *DOCUMENTATION*

From 75514b6654859e0130b512396dc964d2a9e84967 Mon Sep 17 00:00:00 2001
From: Grygorii Strashko <grygorii.strashko@ti.com>
Date: Fri, 31 Mar 2017 18:41:23 -0500
Subject: [PATCH 1887/1911] net: ethernet: ti: cpsw: wake tx queues on
 ndo_tx_timeout

In case, if TX watchdog is fired some or all netdev TX queues will be
stopped and as part of recovery it is required not only to drain and
reinitailize CPSW TX channeles, but also wake up stoppted TX queues what
doesn't happen now and netdevice will stop transmiting data until
reopenned.

Hence, add netif_tx_wake_all_queues() call in .ndo_tx_timeout() to complete
recovery and restore TX path.

Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/cpsw.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 9f3d9c67e3fe0f..58cdc066ef2c4e 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -1817,6 +1817,8 @@ static void cpsw_ndo_tx_timeout(struct net_device *ndev)
 	}
 
 	cpsw_intr_enable(cpsw);
+	netif_trans_update(ndev);
+	netif_tx_wake_all_queues(ndev);
 }
 
 static int cpsw_ndo_set_mac_address(struct net_device *ndev, void *p)

From 921d701e6f31e1ffaca3560416af1aa04edb4c4f Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Sun, 2 Apr 2017 20:08:04 -0700
Subject: [PATCH 1888/1911] nios2: reserve boot memory for device tree

Make sure to reserve the boot memory for the flattened device tree.
Otherwise it might get overwritten, e.g. when initial_boot_params is
copied, leading to a corrupted FDT and a boot hang/crash:

  bootconsole [early0] enabled
  Early console on uart16650 initialized at 0xf8001600
  OF: fdt: Error -11 processing FDT
  Kernel panic - not syncing: setup_cpuinfo: No CPU found in devicetree!

  ---[ end Kernel panic - not syncing: setup_cpuinfo: No CPU found in devicetree!

Guenter Roeck says:

> I think I found the problem. In unflatten_and_copy_device_tree(), with added
> debug information:
>
> OF: fdt: initial_boot_params=c861e400, dt=c861f000 size=28874 (0x70ca)
>
> ... and then initial_boot_params is copied to dt, which results in corrupted
> fdt since the memory overlaps. Looks like the initial_boot_params memory
> is not reserved and (re-)allocated by early_init_dt_alloc_memory_arch().

Cc: stable@vger.kernel.org
Reported-by: Guenter Roeck <linux@roeck-us.net>
Reference: http://lkml.kernel.org/r/20170226210338.GA19476@roeck-us.net
Tested-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
Acked-by: Ley Foon Tan <ley.foon.tan@intel.com>
---
 arch/nios2/kernel/prom.c  | 7 +++++++
 arch/nios2/kernel/setup.c | 3 +++
 2 files changed, 10 insertions(+)

diff --git a/arch/nios2/kernel/prom.c b/arch/nios2/kernel/prom.c
index 367c5426157ba1..3901b80d442021 100644
--- a/arch/nios2/kernel/prom.c
+++ b/arch/nios2/kernel/prom.c
@@ -48,6 +48,13 @@ void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align)
 	return alloc_bootmem_align(size, align);
 }
 
+int __init early_init_dt_reserve_memory_arch(phys_addr_t base, phys_addr_t size,
+					     bool nomap)
+{
+	reserve_bootmem(base, size, BOOTMEM_DEFAULT);
+	return 0;
+}
+
 void __init early_init_devtree(void *params)
 {
 	__be32 *dtb = (u32 *)__dtb_start;
diff --git a/arch/nios2/kernel/setup.c b/arch/nios2/kernel/setup.c
index 6e57ffa5db2769..6044d9be28b449 100644
--- a/arch/nios2/kernel/setup.c
+++ b/arch/nios2/kernel/setup.c
@@ -201,6 +201,9 @@ void __init setup_arch(char **cmdline_p)
 	}
 #endif /* CONFIG_BLK_DEV_INITRD */
 
+	early_init_fdt_reserve_self();
+	early_init_fdt_scan_reserved_mem();
+
 	unflatten_and_copy_device_tree();
 
 	setup_cpuinfo();

From 78420281a9d74014af7616958806c3aba056319e Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Mon, 3 Apr 2017 12:22:20 -0700
Subject: [PATCH 1889/1911] xfs: rework the inline directory verifiers

The inline directory verifiers should be called on the inode fork data,
which means after iformat_local on the read side, and prior to
ifork_flush on the write side.  This makes the fork verifier more
consistent with the way buffer verifiers work -- i.e. they will operate
on the memory buffer that the code will be reading and writing directly.

Furthermore, revise the verifier function to return -EFSCORRUPTED so
that we don't flood the logs with corruption messages and assert
notices.  This has been a particular problem with xfs/348, which
triggers the XFS_WANT_CORRUPTED_RETURN assertions, which halts the
kernel when CONFIG_XFS_DEBUG=y.  Disk corruption isn't supposed to do
that, at least not in a verifier.

Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_dir2_priv.h  |  3 +-
 fs/xfs/libxfs/xfs_dir2_sf.c    | 63 ++++++++++++++++++++++------------
 fs/xfs/libxfs/xfs_inode_fork.c | 35 +++++++------------
 fs/xfs/libxfs/xfs_inode_fork.h |  2 +-
 fs/xfs/xfs_inode.c             | 19 +++++-----
 5 files changed, 66 insertions(+), 56 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_dir2_priv.h b/fs/xfs/libxfs/xfs_dir2_priv.h
index eb00bc133bca67..39f8604f764e13 100644
--- a/fs/xfs/libxfs/xfs_dir2_priv.h
+++ b/fs/xfs/libxfs/xfs_dir2_priv.h
@@ -125,8 +125,7 @@ extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino);
 extern int xfs_dir2_sf_lookup(struct xfs_da_args *args);
 extern int xfs_dir2_sf_removename(struct xfs_da_args *args);
 extern int xfs_dir2_sf_replace(struct xfs_da_args *args);
-extern int xfs_dir2_sf_verify(struct xfs_mount *mp, struct xfs_dir2_sf_hdr *sfp,
-		int size);
+extern int xfs_dir2_sf_verify(struct xfs_inode *ip);
 
 /* xfs_dir2_readdir.c */
 extern int xfs_readdir(struct xfs_inode *dp, struct dir_context *ctx,
diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c
index 96b45cd6c63f06..e84af093b2ab99 100644
--- a/fs/xfs/libxfs/xfs_dir2_sf.c
+++ b/fs/xfs/libxfs/xfs_dir2_sf.c
@@ -632,36 +632,49 @@ xfs_dir2_sf_check(
 /* Verify the consistency of an inline directory. */
 int
 xfs_dir2_sf_verify(
-	struct xfs_mount		*mp,
-	struct xfs_dir2_sf_hdr		*sfp,
-	int				size)
+	struct xfs_inode		*ip)
 {
+	struct xfs_mount		*mp = ip->i_mount;
+	struct xfs_dir2_sf_hdr		*sfp;
 	struct xfs_dir2_sf_entry	*sfep;
 	struct xfs_dir2_sf_entry	*next_sfep;
 	char				*endp;
 	const struct xfs_dir_ops	*dops;
+	struct xfs_ifork		*ifp;
 	xfs_ino_t			ino;
 	int				i;
 	int				i8count;
 	int				offset;
+	int				size;
+	int				error;
 	__uint8_t			filetype;
 
+	ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_LOCAL);
+	/*
+	 * xfs_iread calls us before xfs_setup_inode sets up ip->d_ops,
+	 * so we can only trust the mountpoint to have the right pointer.
+	 */
 	dops = xfs_dir_get_ops(mp, NULL);
 
+	ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+	sfp = (struct xfs_dir2_sf_hdr *)ifp->if_u1.if_data;
+	size = ifp->if_bytes;
+
 	/*
 	 * Give up if the directory is way too short.
 	 */
-	XFS_WANT_CORRUPTED_RETURN(mp, size >
-			offsetof(struct xfs_dir2_sf_hdr, parent));
-	XFS_WANT_CORRUPTED_RETURN(mp, size >=
-			xfs_dir2_sf_hdr_size(sfp->i8count));
+	if (size <= offsetof(struct xfs_dir2_sf_hdr, parent) ||
+	    size < xfs_dir2_sf_hdr_size(sfp->i8count))
+		return -EFSCORRUPTED;
 
 	endp = (char *)sfp + size;
 
 	/* Check .. entry */
 	ino = dops->sf_get_parent_ino(sfp);
 	i8count = ino > XFS_DIR2_MAX_SHORT_INUM;
-	XFS_WANT_CORRUPTED_RETURN(mp, !xfs_dir_ino_validate(mp, ino));
+	error = xfs_dir_ino_validate(mp, ino);
+	if (error)
+		return error;
 	offset = dops->data_first_offset;
 
 	/* Check all reported entries */
@@ -672,12 +685,12 @@ xfs_dir2_sf_verify(
 		 * Check the fixed-offset parts of the structure are
 		 * within the data buffer.
 		 */
-		XFS_WANT_CORRUPTED_RETURN(mp,
-				((char *)sfep + sizeof(*sfep)) < endp);
+		if (((char *)sfep + sizeof(*sfep)) >= endp)
+			return -EFSCORRUPTED;
 
 		/* Don't allow names with known bad length. */
-		XFS_WANT_CORRUPTED_RETURN(mp, sfep->namelen > 0);
-		XFS_WANT_CORRUPTED_RETURN(mp, sfep->namelen < MAXNAMELEN);
+		if (sfep->namelen == 0)
+			return -EFSCORRUPTED;
 
 		/*
 		 * Check that the variable-length part of the structure is
@@ -685,33 +698,39 @@ xfs_dir2_sf_verify(
 		 * name component, so nextentry is an acceptable test.
 		 */
 		next_sfep = dops->sf_nextentry(sfp, sfep);
-		XFS_WANT_CORRUPTED_RETURN(mp, endp >= (char *)next_sfep);
+		if (endp < (char *)next_sfep)
+			return -EFSCORRUPTED;
 
 		/* Check that the offsets always increase. */
-		XFS_WANT_CORRUPTED_RETURN(mp,
-				xfs_dir2_sf_get_offset(sfep) >= offset);
+		if (xfs_dir2_sf_get_offset(sfep) < offset)
+			return -EFSCORRUPTED;
 
 		/* Check the inode number. */
 		ino = dops->sf_get_ino(sfp, sfep);
 		i8count += ino > XFS_DIR2_MAX_SHORT_INUM;
-		XFS_WANT_CORRUPTED_RETURN(mp, !xfs_dir_ino_validate(mp, ino));
+		error = xfs_dir_ino_validate(mp, ino);
+		if (error)
+			return error;
 
 		/* Check the file type. */
 		filetype = dops->sf_get_ftype(sfep);
-		XFS_WANT_CORRUPTED_RETURN(mp, filetype < XFS_DIR3_FT_MAX);
+		if (filetype >= XFS_DIR3_FT_MAX)
+			return -EFSCORRUPTED;
 
 		offset = xfs_dir2_sf_get_offset(sfep) +
 				dops->data_entsize(sfep->namelen);
 
 		sfep = next_sfep;
 	}
-	XFS_WANT_CORRUPTED_RETURN(mp, i8count == sfp->i8count);
-	XFS_WANT_CORRUPTED_RETURN(mp, (void *)sfep == (void *)endp);
+	if (i8count != sfp->i8count)
+		return -EFSCORRUPTED;
+	if ((void *)sfep != (void *)endp)
+		return -EFSCORRUPTED;
 
 	/* Make sure this whole thing ought to be in local format. */
-	XFS_WANT_CORRUPTED_RETURN(mp, offset +
-	       (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) +
-	       (uint)sizeof(xfs_dir2_block_tail_t) <= mp->m_dir_geo->blksize);
+	if (offset + (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) +
+	    (uint)sizeof(xfs_dir2_block_tail_t) > mp->m_dir_geo->blksize)
+		return -EFSCORRUPTED;
 
 	return 0;
 }
diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
index 9653e964eda4f9..8a37efe04de323 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -212,6 +212,16 @@ xfs_iformat_fork(
 	if (error)
 		return error;
 
+	/* Check inline dir contents. */
+	if (S_ISDIR(VFS_I(ip)->i_mode) &&
+	    dip->di_format == XFS_DINODE_FMT_LOCAL) {
+		error = xfs_dir2_sf_verify(ip);
+		if (error) {
+			xfs_idestroy_fork(ip, XFS_DATA_FORK);
+			return error;
+		}
+	}
+
 	if (xfs_is_reflink_inode(ip)) {
 		ASSERT(ip->i_cowfp == NULL);
 		xfs_ifork_init_cow(ip);
@@ -322,8 +332,6 @@ xfs_iformat_local(
 	int		whichfork,
 	int		size)
 {
-	int		error;
-
 	/*
 	 * If the size is unreasonable, then something
 	 * is wrong and we just bail out rather than crash in
@@ -339,14 +347,6 @@ xfs_iformat_local(
 		return -EFSCORRUPTED;
 	}
 
-	if (S_ISDIR(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK) {
-		error = xfs_dir2_sf_verify(ip->i_mount,
-				(struct xfs_dir2_sf_hdr *)XFS_DFORK_DPTR(dip),
-				size);
-		if (error)
-			return error;
-	}
-
 	xfs_init_local_fork(ip, whichfork, XFS_DFORK_PTR(dip, whichfork), size);
 	return 0;
 }
@@ -867,7 +867,7 @@ xfs_iextents_copy(
  * In these cases, the format always takes precedence, because the
  * format indicates the current state of the fork.
  */
-int
+void
 xfs_iflush_fork(
 	xfs_inode_t		*ip,
 	xfs_dinode_t		*dip,
@@ -877,7 +877,6 @@ xfs_iflush_fork(
 	char			*cp;
 	xfs_ifork_t		*ifp;
 	xfs_mount_t		*mp;
-	int			error;
 	static const short	brootflag[2] =
 		{ XFS_ILOG_DBROOT, XFS_ILOG_ABROOT };
 	static const short	dataflag[2] =
@@ -886,7 +885,7 @@ xfs_iflush_fork(
 		{ XFS_ILOG_DEXT, XFS_ILOG_AEXT };
 
 	if (!iip)
-		return 0;
+		return;
 	ifp = XFS_IFORK_PTR(ip, whichfork);
 	/*
 	 * This can happen if we gave up in iformat in an error path,
@@ -894,19 +893,12 @@ xfs_iflush_fork(
 	 */
 	if (!ifp) {
 		ASSERT(whichfork == XFS_ATTR_FORK);
-		return 0;
+		return;
 	}
 	cp = XFS_DFORK_PTR(dip, whichfork);
 	mp = ip->i_mount;
 	switch (XFS_IFORK_FORMAT(ip, whichfork)) {
 	case XFS_DINODE_FMT_LOCAL:
-		if (S_ISDIR(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK) {
-			error = xfs_dir2_sf_verify(mp,
-					(struct xfs_dir2_sf_hdr *)ifp->if_u1.if_data,
-					ifp->if_bytes);
-			if (error)
-				return error;
-		}
 		if ((iip->ili_fields & dataflag[whichfork]) &&
 		    (ifp->if_bytes > 0)) {
 			ASSERT(ifp->if_u1.if_data != NULL);
@@ -959,7 +951,6 @@ xfs_iflush_fork(
 		ASSERT(0);
 		break;
 	}
-	return 0;
 }
 
 /*
diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h
index 132dc59fdde694..7fb8365326d1a7 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.h
+++ b/fs/xfs/libxfs/xfs_inode_fork.h
@@ -140,7 +140,7 @@ typedef struct xfs_ifork {
 struct xfs_ifork *xfs_iext_state_to_fork(struct xfs_inode *ip, int state);
 
 int		xfs_iformat_fork(struct xfs_inode *, struct xfs_dinode *);
-int		xfs_iflush_fork(struct xfs_inode *, struct xfs_dinode *,
+void		xfs_iflush_fork(struct xfs_inode *, struct xfs_dinode *,
 				struct xfs_inode_log_item *, int);
 void		xfs_idestroy_fork(struct xfs_inode *, int);
 void		xfs_idata_realloc(struct xfs_inode *, int, int);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index c7fe2c2123ab83..7605d839659635 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -50,6 +50,7 @@
 #include "xfs_log.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_reflink.h"
+#include "xfs_dir2_priv.h"
 
 kmem_zone_t *xfs_inode_zone;
 
@@ -3475,7 +3476,6 @@ xfs_iflush_int(
 	struct xfs_inode_log_item *iip = ip->i_itemp;
 	struct xfs_dinode	*dip;
 	struct xfs_mount	*mp = ip->i_mount;
-	int			error;
 
 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
 	ASSERT(xfs_isiflocked(ip));
@@ -3547,6 +3547,12 @@ xfs_iflush_int(
 	if (ip->i_d.di_version < 3)
 		ip->i_d.di_flushiter++;
 
+	/* Check the inline directory data. */
+	if (S_ISDIR(VFS_I(ip)->i_mode) &&
+	    ip->i_d.di_format == XFS_DINODE_FMT_LOCAL &&
+	    xfs_dir2_sf_verify(ip))
+		goto corrupt_out;
+
 	/*
 	 * Copy the dirty parts of the inode into the on-disk inode.  We always
 	 * copy out the core of the inode, because if the inode is dirty at all
@@ -3558,14 +3564,9 @@ xfs_iflush_int(
 	if (ip->i_d.di_flushiter == DI_MAX_FLUSH)
 		ip->i_d.di_flushiter = 0;
 
-	error = xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK);
-	if (error)
-		return error;
-	if (XFS_IFORK_Q(ip)) {
-		error = xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK);
-		if (error)
-			return error;
-	}
+	xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK);
+	if (XFS_IFORK_Q(ip))
+		xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK);
 	xfs_inobp_check(mp, bp);
 
 	/*

From 3dd09d5a8589c640abb49cfcf92b4ed669eafad1 Mon Sep 17 00:00:00 2001
From: Calvin Owens <calvinowens@fb.com>
Date: Mon, 3 Apr 2017 12:22:29 -0700
Subject: [PATCH 1890/1911] xfs: Honor FALLOC_FL_KEEP_SIZE when punching ends
 of files

When punching past EOF on XFS, fallocate(mode=PUNCH_HOLE|KEEP_SIZE) will
round the file size up to the nearest multiple of PAGE_SIZE:

  calvinow@vm-disks/generic-xfs-1 ~$ dd if=/dev/urandom of=test bs=2048 count=1
  calvinow@vm-disks/generic-xfs-1 ~$ stat test
    Size: 2048            Blocks: 8          IO Block: 4096   regular file
  calvinow@vm-disks/generic-xfs-1 ~$ fallocate -n -l 2048 -o 2048 -p test
  calvinow@vm-disks/generic-xfs-1 ~$ stat test
    Size: 4096            Blocks: 8          IO Block: 4096   regular file

Commit 3c2bdc912a1cc050 ("xfs: kill xfs_zero_remaining_bytes") replaced
xfs_zero_remaining_bytes() with calls to iomap helpers. The new helpers
don't enforce that [pos,offset) lies strictly on [0,i_size) when being
called from xfs_free_file_space(), so by "leaking" these ranges into
xfs_zero_range() we get this buggy behavior.

Fix this by reintroducing the checks xfs_zero_remaining_bytes() did
against i_size at the bottom of xfs_free_file_space().

Reported-by: Aaron Gao <gzh@fb.com>
Fixes: 3c2bdc912a1cc050 ("xfs: kill xfs_zero_remaining_bytes")
Cc: Christoph Hellwig <hch@lst.de>
Cc: Brian Foster <bfoster@redhat.com>
Cc: <stable@vger.kernel.org> # 4.8+
Signed-off-by: Calvin Owens <calvinowens@fb.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_bmap_util.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 8b75dcea596680..828532ce0adca8 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1311,8 +1311,16 @@ xfs_free_file_space(
 	/*
 	 * Now that we've unmap all full blocks we'll have to zero out any
 	 * partial block at the beginning and/or end.  xfs_zero_range is
-	 * smart enough to skip any holes, including those we just created.
+	 * smart enough to skip any holes, including those we just created,
+	 * but we must take care not to zero beyond EOF and enlarge i_size.
 	 */
+
+	if (offset >= XFS_ISIZE(ip))
+		return 0;
+
+	if (offset + len > XFS_ISIZE(ip))
+		len = XFS_ISIZE(ip) - offset;
+
 	return xfs_zero_range(ip, offset, len, NULL);
 }
 

From bf9216f922612d2db7666aae01e65064da2ffb3a Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Mon, 3 Apr 2017 12:22:39 -0700
Subject: [PATCH 1891/1911] xfs: fix kernel memory exposure problems

Fix a memory exposure problems in inumbers where we allocate an array of
structures with holes, fail to zero the holes, then blindly copy the
kernel memory contents (junk and all) into userspace.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_itable.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 2a6d9b1558e00d..26d67ce3c18d90 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -583,7 +583,7 @@ xfs_inumbers(
 		return error;
 
 	bcount = MIN(left, (int)(PAGE_SIZE / sizeof(*buffer)));
-	buffer = kmem_alloc(bcount * sizeof(*buffer), KM_SLEEP);
+	buffer = kmem_zalloc(bcount * sizeof(*buffer), KM_SLEEP);
 	do {
 		struct xfs_inobt_rec_incore	r;
 		int				stat;

From 280489daa68bd20364b322c11e3f429a0212c611 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 13 Mar 2017 17:43:48 +0100
Subject: [PATCH 1892/1911] drm/msm: adreno: fix build error without debugfs

The newly added a5xx support fails to build when debugfs is diabled:

drivers/gpu/drm/msm/adreno/a5xx_gpu.c:849:4: error: 'struct msm_gpu_funcs' has no member named 'show'
drivers/gpu/drm/msm/adreno/a5xx_gpu.c:849:11: error: 'a5xx_show' undeclared here (not in a function); did you mean 'a5xx_irq'?

This adds a missing #ifdef.

Fixes: b5f103ab98c7 ("drm/msm: gpu: Add A5XX target support")
Cc: stable@vger.kernel.org
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
index 4414cf73735d26..f0c8bd74ca91fc 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -860,7 +860,9 @@ static const struct adreno_gpu_funcs funcs = {
 		.idle = a5xx_idle,
 		.irq = a5xx_irq,
 		.destroy = a5xx_destroy,
+#ifdef CONFIG_DEBUG_FS
 		.show = a5xx_show,
+#endif
 	},
 	.get_timestamp = a5xx_get_timestamp,
 };

From f456d348b6320a2597a3f1fff3ad0011c5678603 Mon Sep 17 00:00:00 2001
From: Jordan Crouse <jcrouse@codeaurora.org>
Date: Tue, 7 Mar 2017 09:50:27 -0700
Subject: [PATCH 1893/1911] drm/msm: Fix wrong pointer check in a5xx_destroy

Instead of checking for a5xx_gpu->gpmu_iova during destroy we
accidently check a5xx_gpu->gpmu_bo.

Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
index f0c8bd74ca91fc..36602ac7e24835 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2016 The Linux Foundation. All rights reserved.
+/* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -534,7 +534,7 @@ static void a5xx_destroy(struct msm_gpu *gpu)
 	}
 
 	if (a5xx_gpu->gpmu_bo) {
-		if (a5xx_gpu->gpmu_bo)
+		if (a5xx_gpu->gpmu_iova)
 			msm_gem_put_iova(a5xx_gpu->gpmu_bo, gpu->id);
 		drm_gem_object_unreference_unlocked(a5xx_gpu->gpmu_bo);
 	}

From 1a5dff5d74e55608a9632a1d030bb79196e0755c Mon Sep 17 00:00:00 2001
From: Jordan Crouse <jcrouse@codeaurora.org>
Date: Tue, 7 Mar 2017 10:02:51 -0700
Subject: [PATCH 1894/1911] drm/msm: Don't allow zero sized buffer objects

Zero sized buffer objects tend to make various bits of the GEM
infrastructure complain:

 WARNING: CPU: 1 PID: 2323 at drivers/gpu/drm/drm_mm.c:389 drm_mm_insert_node_generic+0x258/0x2f0
 Modules linked in:

 CPU: 1 PID: 2323 Comm: drm-api-test Tainted: G        W 4.9.0-rc4-00906-g693af44 #213
 Hardware name: Qualcomm Technologies, Inc. DB820c (DT)
 task: ffff8000d7353400 task.stack: ffff8000d7720000
 PC is at drm_mm_insert_node_generic+0x258/0x2f0
 LR is at drm_vma_offset_add+0x4c/0x70

Zero sized buffers serve no appreciable value to the user so disallow
them at create time.

Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/msm_gem.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index 59811f29607de6..68e509b3b9e4d0 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -812,6 +812,12 @@ struct drm_gem_object *msm_gem_new(struct drm_device *dev,
 
 	size = PAGE_ALIGN(size);
 
+	/* Disallow zero sized objects as they make the underlying
+	 * infrastructure grumpy
+	 */
+	if (size == 0)
+		return ERR_PTR(-EINVAL);
+
 	ret = msm_gem_new_impl(dev, size, flags, NULL, &obj);
 	if (ret)
 		goto fail;

From a5fef535c529b64b622f9d7eafb3ab86850f5f8f Mon Sep 17 00:00:00 2001
From: Archit Taneja <architt@codeaurora.org>
Date: Thu, 16 Feb 2017 16:29:04 +0530
Subject: [PATCH 1895/1911] drm/msm/dsi: Fix bug in dsi_mgr_phy_enable

A recent commit introduces a bug in dsi_mgr_phy_enable. In the non
dual DSI mode, we reset the mdsi (master DSI) PHY. This isn't right
since master and slave DSI exist only in dual DSI mode. For the normal
mode of operation, we should simply reset the PHY of the DSI device
(i.e. msm_dsi) corresponding to the current bridge.

Usage of the wrong DSI pointer also resulted in a static checker
warning. That too is resolved with this fix.

Fixes: b62aa70a98c5 (drm/msm/dsi: Move PHY operations out of host)

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Archit Taneja <architt@codeaurora.org>
Reviewed-by: Rob Clark <robdclark@gmail.com>
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/dsi/dsi_manager.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/dsi/dsi_manager.c b/drivers/gpu/drm/msm/dsi/dsi_manager.c
index 921270ea6059de..a879ffa534b4d8 100644
--- a/drivers/gpu/drm/msm/dsi/dsi_manager.c
+++ b/drivers/gpu/drm/msm/dsi/dsi_manager.c
@@ -171,7 +171,7 @@ dsi_mgr_phy_enable(int id,
 			}
 		}
 	} else {
-		msm_dsi_host_reset_phy(mdsi->host);
+		msm_dsi_host_reset_phy(msm_dsi->host);
 		ret = enable_phy(msm_dsi, src_pll_id, &shared_timings[id]);
 		if (ret)
 			return ret;

From 30512040ed8e1982e5ba16bb3e3a7f000ff65427 Mon Sep 17 00:00:00 2001
From: Archit Taneja <architt@codeaurora.org>
Date: Fri, 17 Mar 2017 09:09:48 +0530
Subject: [PATCH 1896/1911] drm/msm/mdp5: Update SSPP_MAX value

'SSPP_MAX + 1' is the max number of hwpipes that can be present on a
MDP5 platform. Recently, 2 new cursor hwpipes were added, which
caused overflows in arrays that used SSPP_MAX to represent the number
of elements. Update the SSPP_MAX value to incorporate the extra
hwpipes.

Signed-off-by: Archit Taneja <architt@codeaurora.org>
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/mdp/mdp5/mdp5_pipe.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_pipe.h b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_pipe.h
index 611da7a660c942..238901987e00b0 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_pipe.h
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_pipe.h
@@ -18,7 +18,8 @@
 #ifndef __MDP5_PIPE_H__
 #define __MDP5_PIPE_H__
 
-#define SSPP_MAX	(SSPP_RGB3 + 1) /* TODO: Add SSPP_MAX in mdp5.xml.h */
+/* TODO: Add SSPP_MAX in mdp5.xml.h */
+#define SSPP_MAX	(SSPP_CURSOR1 + 1)
 
 /* represents a hw pipe, which is dynamically assigned to a plane */
 struct mdp5_hw_pipe {

From d322a693f585832130c3d09f2175b8f2b3ae99e1 Mon Sep 17 00:00:00 2001
From: Vinay Simha BN <simhavcs@gmail.com>
Date: Tue, 14 Mar 2017 10:55:56 +0530
Subject: [PATCH 1897/1911] drm/msm/hdmi: redefinitions of macros not required

4 macros already defined in hdmi.h,
which is not required to redefine in hdmi_audio.c

Signed-off-by: Vinay Simha BN <simhavcs@gmail.com>
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/hdmi/hdmi_audio.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_audio.c b/drivers/gpu/drm/msm/hdmi/hdmi_audio.c
index a54d3bb5baad9c..8177e8511afd8c 100644
--- a/drivers/gpu/drm/msm/hdmi/hdmi_audio.c
+++ b/drivers/gpu/drm/msm/hdmi/hdmi_audio.c
@@ -18,13 +18,6 @@
 #include <linux/hdmi.h>
 #include "hdmi.h"
 
-
-/* Supported HDMI Audio channels */
-#define MSM_HDMI_AUDIO_CHANNEL_2		0
-#define MSM_HDMI_AUDIO_CHANNEL_4		1
-#define MSM_HDMI_AUDIO_CHANNEL_6		2
-#define MSM_HDMI_AUDIO_CHANNEL_8		3
-
 /* maps MSM_HDMI_AUDIO_CHANNEL_n consts used by audio driver to # of channels: */
 static int nchannels[] = { 2, 4, 6, 8 };
 

From 028402d4bcfd3e99421504674cc41b0cd32768c8 Mon Sep 17 00:00:00 2001
From: Jordan Crouse <jcrouse@codeaurora.org>
Date: Mon, 6 Feb 2017 10:39:29 -0700
Subject: [PATCH 1898/1911] drm/msm: Make sure to detach the MMU during GPU
 cleanup

We should be detaching the MMU before destroying the address
space. To do this cleanly, the detach has to happen in
adreno_gpu_cleanup() because it needs access to structs
in adreno_gpu.c.  Plus it is better symmetry to have
the attach and detach at the same code level.

Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/adreno/adreno_gpu.c | 29 ++++++++++++++++---------
 drivers/gpu/drm/msm/msm_gpu.c           |  3 ---
 2 files changed, 19 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index c9bd1e6225f4f9..5ae65426b4e559 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -418,18 +418,27 @@ int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev,
 	return 0;
 }
 
-void adreno_gpu_cleanup(struct adreno_gpu *gpu)
+void adreno_gpu_cleanup(struct adreno_gpu *adreno_gpu)
 {
-	if (gpu->memptrs_bo) {
-		if (gpu->memptrs)
-			msm_gem_put_vaddr(gpu->memptrs_bo);
+	struct msm_gpu *gpu = &adreno_gpu->base;
+
+	if (adreno_gpu->memptrs_bo) {
+		if (adreno_gpu->memptrs)
+			msm_gem_put_vaddr(adreno_gpu->memptrs_bo);
+
+		if (adreno_gpu->memptrs_iova)
+			msm_gem_put_iova(adreno_gpu->memptrs_bo, gpu->id);
+
+		drm_gem_object_unreference_unlocked(adreno_gpu->memptrs_bo);
+	}
+	release_firmware(adreno_gpu->pm4);
+	release_firmware(adreno_gpu->pfp);
 
-		if (gpu->memptrs_iova)
-			msm_gem_put_iova(gpu->memptrs_bo, gpu->base.id);
+	msm_gpu_cleanup(gpu);
 
-		drm_gem_object_unreference_unlocked(gpu->memptrs_bo);
+	if (gpu->aspace) {
+		gpu->aspace->mmu->funcs->detach(gpu->aspace->mmu,
+			iommu_ports, ARRAY_SIZE(iommu_ports));
+		msm_gem_address_space_destroy(gpu->aspace);
 	}
-	release_firmware(gpu->pm4);
-	release_firmware(gpu->pfp);
-	msm_gpu_cleanup(&gpu->base);
 }
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index 99e05aacbee181..af5b6ba4095b06 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -706,9 +706,6 @@ void msm_gpu_cleanup(struct msm_gpu *gpu)
 		msm_ringbuffer_destroy(gpu->rb);
 	}
 
-	if (gpu->aspace)
-		msm_gem_address_space_destroy(gpu->aspace);
-
 	if (gpu->fctx)
 		msm_fence_context_free(gpu->fctx);
 }

From ac6a3722fed67c658a435187d0254ae119d845d3 Mon Sep 17 00:00:00 2001
From: Simon Horman <simon.horman@netronome.com>
Date: Mon, 3 Apr 2017 15:42:58 -0400
Subject: [PATCH 1899/1911] flow dissector: correct size of storage for ARP

The last argument to __skb_header_pointer() should be a buffer large
enough to store struct arphdr. This can be a pointer to a struct arphdr
structure. The code was previously using a pointer to a pointer to
struct arphdr.

By my counting the storage available both before and after is 8 bytes on
x86_64.

Fixes: 55733350e5e8 ("flow disector: ARP support")
Reported-by: Nicolas Iooss <nicolas.iooss_linux@m4x.org>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/flow_dissector.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index c35aae13c8d226..d98d4998213da6 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -390,7 +390,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
 			unsigned char ar_tip[4];
 		} *arp_eth, _arp_eth;
 		const struct arphdr *arp;
-		struct arphdr *_arp;
+		struct arphdr _arp;
 
 		arp = __skb_header_pointer(skb, nhoff, sizeof(_arp), data,
 					   hlen, &_arp);

From df2729c3238ed89fb8ccf850d38c732858a5bade Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sat, 1 Apr 2017 17:15:59 +0800
Subject: [PATCH 1900/1911] sctp: check for dst and pathmtu update in
 sctp_packet_config

This patch is to move sctp_transport_dst_check into sctp_packet_config
from sctp_packet_transmit and add pathmtu check in sctp_packet_config.

With this fix, sctp can update dst or pathmtu before appending chunks,
which can void dropping packets in sctp_packet_transmit when dst is
obsolete or dst's mtu is changed.

This patch is also to improve some other codes in sctp_packet_config.
It updates packet max_size with gso_max_size, checks for dst and
pathmtu, and appends ecne chunk only when packet is empty and asoc
is not NULL.

It makes sctp flush work better, as we only need to set up them once
for one flush schedule. It's also safe, since asoc is NULL only when
the packet is created by sctp_ootb_pkt_new in which it just gets the
new dst, no need to do more things for it other than set packet with
transport's pathmtu.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/sctp.h | 17 +++++++++--
 net/sctp/output.c       | 67 ++++++++++++++++++++++-------------------
 2 files changed, 50 insertions(+), 34 deletions(-)

diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 1f71ee5ab51841..d75caa7a629be4 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -596,12 +596,23 @@ static inline void sctp_v4_map_v6(union sctp_addr *addr)
  */
 static inline struct dst_entry *sctp_transport_dst_check(struct sctp_transport *t)
 {
-	if (t->dst && (!dst_check(t->dst, t->dst_cookie) ||
-		       t->pathmtu != max_t(size_t, SCTP_TRUNC4(dst_mtu(t->dst)),
-					   SCTP_DEFAULT_MINSEGMENT)))
+	if (t->dst && !dst_check(t->dst, t->dst_cookie))
 		sctp_transport_dst_release(t);
 
 	return t->dst;
 }
 
+static inline bool sctp_transport_pmtu_check(struct sctp_transport *t)
+{
+	__u32 pmtu = max_t(size_t, SCTP_TRUNC4(dst_mtu(t->dst)),
+			   SCTP_DEFAULT_MINSEGMENT);
+
+	if (t->pathmtu == pmtu)
+		return true;
+
+	t->pathmtu = pmtu;
+
+	return false;
+}
+
 #endif /* __net_sctp_h__ */
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 73fd178007a37e..ec4d50a3871378 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -86,43 +86,53 @@ void sctp_packet_config(struct sctp_packet *packet, __u32 vtag,
 {
 	struct sctp_transport *tp = packet->transport;
 	struct sctp_association *asoc = tp->asoc;
+	struct sock *sk;
 
 	pr_debug("%s: packet:%p vtag:0x%x\n", __func__, packet, vtag);
-
 	packet->vtag = vtag;
 
-	if (asoc && tp->dst) {
-		struct sock *sk = asoc->base.sk;
-
-		rcu_read_lock();
-		if (__sk_dst_get(sk) != tp->dst) {
-			dst_hold(tp->dst);
-			sk_setup_caps(sk, tp->dst);
-		}
-
-		if (sk_can_gso(sk)) {
-			struct net_device *dev = tp->dst->dev;
+	/* do the following jobs only once for a flush schedule */
+	if (!sctp_packet_empty(packet))
+		return;
 
-			packet->max_size = dev->gso_max_size;
-		} else {
-			packet->max_size = asoc->pathmtu;
-		}
-		rcu_read_unlock();
+	/* set packet max_size with pathmtu */
+	packet->max_size = tp->pathmtu;
+	if (!asoc)
+		return;
 
-	} else {
-		packet->max_size = tp->pathmtu;
+	/* update dst or transport pathmtu if in need */
+	sk = asoc->base.sk;
+	if (!sctp_transport_dst_check(tp)) {
+		sctp_transport_route(tp, NULL, sctp_sk(sk));
+		if (asoc->param_flags & SPP_PMTUD_ENABLE)
+			sctp_assoc_sync_pmtu(sk, asoc);
+	} else if (!sctp_transport_pmtu_check(tp)) {
+		if (asoc->param_flags & SPP_PMTUD_ENABLE)
+			sctp_assoc_sync_pmtu(sk, asoc);
 	}
 
-	if (ecn_capable && sctp_packet_empty(packet)) {
-		struct sctp_chunk *chunk;
+	/* If there a is a prepend chunk stick it on the list before
+	 * any other chunks get appended.
+	 */
+	if (ecn_capable) {
+		struct sctp_chunk *chunk = sctp_get_ecne_prepend(asoc);
 
-		/* If there a is a prepend chunk stick it on the list before
-		 * any other chunks get appended.
-		 */
-		chunk = sctp_get_ecne_prepend(asoc);
 		if (chunk)
 			sctp_packet_append_chunk(packet, chunk);
 	}
+
+	if (!tp->dst)
+		return;
+
+	/* set packet max_size with gso_max_size if gso is enabled*/
+	rcu_read_lock();
+	if (__sk_dst_get(sk) != tp->dst) {
+		dst_hold(tp->dst);
+		sk_setup_caps(sk, tp->dst);
+	}
+	packet->max_size = sk_can_gso(sk) ? tp->dst->dev->gso_max_size
+					  : asoc->pathmtu;
+	rcu_read_unlock();
 }
 
 /* Initialize the packet structure. */
@@ -582,12 +592,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
 	sh->vtag = htonl(packet->vtag);
 	sh->checksum = 0;
 
-	/* update dst if in need */
-	if (!sctp_transport_dst_check(tp)) {
-		sctp_transport_route(tp, NULL, sctp_sk(sk));
-		if (asoc && asoc->param_flags & SPP_PMTUD_ENABLE)
-			sctp_assoc_sync_pmtu(sk, asoc);
-	}
+	/* drop packet if no dst */
 	dst = dst_clone(tp->dst);
 	if (!dst) {
 		IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);

From 0b9aefea860063bb39e36bd7fe6c7087fed0ba87 Mon Sep 17 00:00:00 2001
From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Date: Sat, 1 Apr 2017 11:00:21 -0300
Subject: [PATCH 1901/1911] tcp: minimize false-positives on TCP/GRO check

Markus Trippelsdorf reported that after commit dcb17d22e1c2 ("tcp: warn
on bogus MSS and try to amend it") the kernel started logging the
warning for a NIC driver that doesn't even support GRO.

It was diagnosed that it was possibly caused on connections that were
using TCP Timestamps but some packets lacked the Timestamps option. As
we reduce rcv_mss when timestamps are used, the lack of them would cause
the packets to be bigger than expected, although this is a valid case.

As this warning is more as a hint, getting a clean-cut on the
threshold is probably not worth the execution time spent on it. This
patch thus alleviates the false-positives with 2 quick checks: by
accounting for the entire TCP option space and also checking against the
interface MTU if it's available.

These changes, specially the MTU one, might mask some real positives,
though if they are really happening, it's possible that sooner or later
it will be triggered anyway.

Reported-by: Markus Trippelsdorf <markus@trippelsdorf.de>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index c43119726a62e4..97ac6776e47d36 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -126,7 +126,8 @@ int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2;
 #define REXMIT_LOST	1 /* retransmit packets marked lost */
 #define REXMIT_NEW	2 /* FRTO-style transmit of unsent/new packets */
 
-static void tcp_gro_dev_warn(struct sock *sk, const struct sk_buff *skb)
+static void tcp_gro_dev_warn(struct sock *sk, const struct sk_buff *skb,
+			     unsigned int len)
 {
 	static bool __once __read_mostly;
 
@@ -137,8 +138,9 @@ static void tcp_gro_dev_warn(struct sock *sk, const struct sk_buff *skb)
 
 		rcu_read_lock();
 		dev = dev_get_by_index_rcu(sock_net(sk), skb->skb_iif);
-		pr_warn("%s: Driver has suspect GRO implementation, TCP performance may be compromised.\n",
-			dev ? dev->name : "Unknown driver");
+		if (!dev || len >= dev->mtu)
+			pr_warn("%s: Driver has suspect GRO implementation, TCP performance may be compromised.\n",
+				dev ? dev->name : "Unknown driver");
 		rcu_read_unlock();
 	}
 }
@@ -161,8 +163,10 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
 	if (len >= icsk->icsk_ack.rcv_mss) {
 		icsk->icsk_ack.rcv_mss = min_t(unsigned int, len,
 					       tcp_sk(sk)->advmss);
-		if (unlikely(icsk->icsk_ack.rcv_mss != len))
-			tcp_gro_dev_warn(sk, skb);
+		/* Account for possibly-removed options */
+		if (unlikely(len > icsk->icsk_ack.rcv_mss +
+				   MAX_TCP_OPTION_SPACE))
+			tcp_gro_dev_warn(sk, skb, len);
 	} else {
 		/* Otherwise, we make more careful check taking into account,
 		 * that SACKs block is variable.

From e08293a4ccbcc993ded0fdc46f1e57926b833d63 Mon Sep 17 00:00:00 2001
From: Guillaume Nault <g.nault@alphalink.fr>
Date: Mon, 3 Apr 2017 12:03:13 +0200
Subject: [PATCH 1902/1911] l2tp: take reference on sessions being dumped

Take a reference on the sessions returned by l2tp_session_find_nth()
(and rename it l2tp_session_get_nth() to reflect this change), so that
caller is assured that the session isn't going to disappear while
processing it.

For procfs and debugfs handlers, the session is held in the .start()
callback and dropped in .show(). Given that pppol2tp_seq_session_show()
dereferences the associated PPPoL2TP socket and that
l2tp_dfs_seq_session_show() might call pppol2tp_show(), we also need to
call the session's .ref() callback to prevent the socket from going
away from under us.

Fixes: fd558d186df2 ("l2tp: Split pppol2tp patch into separate l2tp and ppp parts")
Fixes: 0ad6614048cf ("l2tp: Add debugfs files for dumping l2tp debug info")
Fixes: 309795f4bec2 ("l2tp: Add netlink control API for L2TP")
Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_core.c    |  8 ++++++--
 net/l2tp/l2tp_core.h    |  3 ++-
 net/l2tp/l2tp_debugfs.c | 10 +++++++---
 net/l2tp/l2tp_netlink.c |  7 +++++--
 net/l2tp/l2tp_ppp.c     | 10 +++++++---
 5 files changed, 27 insertions(+), 11 deletions(-)

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index e927422d8c585f..e37d9554da7b47 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -327,7 +327,8 @@ struct l2tp_session *l2tp_session_get(struct net *net,
 }
 EXPORT_SYMBOL_GPL(l2tp_session_get);
 
-struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth)
+struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth,
+					  bool do_ref)
 {
 	int hash;
 	struct l2tp_session *session;
@@ -337,6 +338,9 @@ struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth)
 	for (hash = 0; hash < L2TP_HASH_SIZE; hash++) {
 		hlist_for_each_entry(session, &tunnel->session_hlist[hash], hlist) {
 			if (++count > nth) {
+				l2tp_session_inc_refcount(session);
+				if (do_ref && session->ref)
+					session->ref(session);
 				read_unlock_bh(&tunnel->hlist_lock);
 				return session;
 			}
@@ -347,7 +351,7 @@ struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth)
 
 	return NULL;
 }
-EXPORT_SYMBOL_GPL(l2tp_session_find_nth);
+EXPORT_SYMBOL_GPL(l2tp_session_get_nth);
 
 /* Lookup a session by interface name.
  * This is very inefficient but is only used by management interfaces.
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 3b9b704a84e44d..8ce7818c7a9d05 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -236,7 +236,8 @@ struct l2tp_session *l2tp_session_get(struct net *net,
 struct l2tp_session *l2tp_session_find(struct net *net,
 				       struct l2tp_tunnel *tunnel,
 				       u32 session_id);
-struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth);
+struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth,
+					  bool do_ref);
 struct l2tp_session *l2tp_session_get_by_ifname(struct net *net, char *ifname,
 						bool do_ref);
 struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id);
diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c
index 2d6760a2ae347b..d100aed3d06fb6 100644
--- a/net/l2tp/l2tp_debugfs.c
+++ b/net/l2tp/l2tp_debugfs.c
@@ -53,7 +53,7 @@ static void l2tp_dfs_next_tunnel(struct l2tp_dfs_seq_data *pd)
 
 static void l2tp_dfs_next_session(struct l2tp_dfs_seq_data *pd)
 {
-	pd->session = l2tp_session_find_nth(pd->tunnel, pd->session_idx);
+	pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx, true);
 	pd->session_idx++;
 
 	if (pd->session == NULL) {
@@ -238,10 +238,14 @@ static int l2tp_dfs_seq_show(struct seq_file *m, void *v)
 	}
 
 	/* Show the tunnel or session context */
-	if (pd->session == NULL)
+	if (!pd->session) {
 		l2tp_dfs_seq_tunnel_show(m, pd->tunnel);
-	else
+	} else {
 		l2tp_dfs_seq_session_show(m, pd->session);
+		if (pd->session->deref)
+			pd->session->deref(pd->session);
+		l2tp_session_dec_refcount(pd->session);
+	}
 
 out:
 	return 0;
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index 93e317377c6655..7e3e669baac42d 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -867,7 +867,7 @@ static int l2tp_nl_cmd_session_dump(struct sk_buff *skb, struct netlink_callback
 				goto out;
 		}
 
-		session = l2tp_session_find_nth(tunnel, si);
+		session = l2tp_session_get_nth(tunnel, si, false);
 		if (session == NULL) {
 			ti++;
 			tunnel = NULL;
@@ -877,8 +877,11 @@ static int l2tp_nl_cmd_session_dump(struct sk_buff *skb, struct netlink_callback
 
 		if (l2tp_nl_session_send(skb, NETLINK_CB(cb->skb).portid,
 					 cb->nlh->nlmsg_seq, NLM_F_MULTI,
-					 session, L2TP_CMD_SESSION_GET) < 0)
+					 session, L2TP_CMD_SESSION_GET) < 0) {
+			l2tp_session_dec_refcount(session);
 			break;
+		}
+		l2tp_session_dec_refcount(session);
 
 		si++;
 	}
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 26501902d1a7fe..7bf73091baa224 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -1560,7 +1560,7 @@ static void pppol2tp_next_tunnel(struct net *net, struct pppol2tp_seq_data *pd)
 
 static void pppol2tp_next_session(struct net *net, struct pppol2tp_seq_data *pd)
 {
-	pd->session = l2tp_session_find_nth(pd->tunnel, pd->session_idx);
+	pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx, true);
 	pd->session_idx++;
 
 	if (pd->session == NULL) {
@@ -1687,10 +1687,14 @@ static int pppol2tp_seq_show(struct seq_file *m, void *v)
 
 	/* Show the tunnel or session context.
 	 */
-	if (pd->session == NULL)
+	if (!pd->session) {
 		pppol2tp_seq_tunnel_show(m, pd->tunnel);
-	else
+	} else {
 		pppol2tp_seq_session_show(m, pd->session);
+		if (pd->session->deref)
+			pd->session->deref(pd->session);
+		l2tp_session_dec_refcount(pd->session);
+	}
 
 out:
 	return 0;

From a8919661d78b90d747b3514197e49ecf8727d08c Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Mon, 3 Apr 2017 11:19:10 +0100
Subject: [PATCH 1903/1911] bnx2x: fix spelling mistake in macros
 HW_INTERRUT_ASSERT_SET_*

Trival fix, rename HW_INTERRUT_ASSERT_SET_* to HW_INTERRUPT_ASSERT_SET_*

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Acked-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x.h    |  6 +++---
 .../net/ethernet/broadcom/bnx2x/bnx2x_main.c   | 18 +++++++++---------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
index 0a23034bbe3ff8..352beff796ae5b 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
@@ -2277,7 +2277,7 @@ void bnx2x_igu_clear_sb_gen(struct bnx2x *bp, u8 func, u8 idu_sb_id,
 				 GENERAL_ATTEN_OFFSET(LATCHED_ATTN_RBCP) | \
 				 GENERAL_ATTEN_OFFSET(LATCHED_ATTN_RSVD_GRC))
 
-#define HW_INTERRUT_ASSERT_SET_0 \
+#define HW_INTERRUPT_ASSERT_SET_0 \
 				(AEU_INPUTS_ATTN_BITS_TSDM_HW_INTERRUPT | \
 				 AEU_INPUTS_ATTN_BITS_TCM_HW_INTERRUPT | \
 				 AEU_INPUTS_ATTN_BITS_TSEMI_HW_INTERRUPT | \
@@ -2290,7 +2290,7 @@ void bnx2x_igu_clear_sb_gen(struct bnx2x *bp, u8 func, u8 idu_sb_id,
 				 AEU_INPUTS_ATTN_BITS_TSEMI_PARITY_ERROR |\
 				 AEU_INPUTS_ATTN_BITS_TCM_PARITY_ERROR |\
 				 AEU_INPUTS_ATTN_BITS_PBCLIENT_PARITY_ERROR)
-#define HW_INTERRUT_ASSERT_SET_1 \
+#define HW_INTERRUPT_ASSERT_SET_1 \
 				(AEU_INPUTS_ATTN_BITS_QM_HW_INTERRUPT | \
 				 AEU_INPUTS_ATTN_BITS_TIMERS_HW_INTERRUPT | \
 				 AEU_INPUTS_ATTN_BITS_XSDM_HW_INTERRUPT | \
@@ -2318,7 +2318,7 @@ void bnx2x_igu_clear_sb_gen(struct bnx2x *bp, u8 func, u8 idu_sb_id,
 				 AEU_INPUTS_ATTN_BITS_UPB_PARITY_ERROR | \
 				 AEU_INPUTS_ATTN_BITS_CSDM_PARITY_ERROR |\
 				 AEU_INPUTS_ATTN_BITS_CCM_PARITY_ERROR)
-#define HW_INTERRUT_ASSERT_SET_2 \
+#define HW_INTERRUPT_ASSERT_SET_2 \
 				(AEU_INPUTS_ATTN_BITS_CSEMI_HW_INTERRUPT | \
 				 AEU_INPUTS_ATTN_BITS_CDU_HW_INTERRUPT | \
 				 AEU_INPUTS_ATTN_BITS_DMAE_HW_INTERRUPT | \
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index ac76fc251d268e..a851f95c307a33 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -4166,14 +4166,14 @@ static void bnx2x_attn_int_deasserted0(struct bnx2x *bp, u32 attn)
 		bnx2x_release_phy_lock(bp);
 	}
 
-	if (attn & HW_INTERRUT_ASSERT_SET_0) {
+	if (attn & HW_INTERRUPT_ASSERT_SET_0) {
 
 		val = REG_RD(bp, reg_offset);
-		val &= ~(attn & HW_INTERRUT_ASSERT_SET_0);
+		val &= ~(attn & HW_INTERRUPT_ASSERT_SET_0);
 		REG_WR(bp, reg_offset, val);
 
 		BNX2X_ERR("FATAL HW block attention set0 0x%x\n",
-			  (u32)(attn & HW_INTERRUT_ASSERT_SET_0));
+			  (u32)(attn & HW_INTERRUPT_ASSERT_SET_0));
 		bnx2x_panic();
 	}
 }
@@ -4191,7 +4191,7 @@ static void bnx2x_attn_int_deasserted1(struct bnx2x *bp, u32 attn)
 			BNX2X_ERR("FATAL error from DORQ\n");
 	}
 
-	if (attn & HW_INTERRUT_ASSERT_SET_1) {
+	if (attn & HW_INTERRUPT_ASSERT_SET_1) {
 
 		int port = BP_PORT(bp);
 		int reg_offset;
@@ -4200,11 +4200,11 @@ static void bnx2x_attn_int_deasserted1(struct bnx2x *bp, u32 attn)
 				     MISC_REG_AEU_ENABLE1_FUNC_0_OUT_1);
 
 		val = REG_RD(bp, reg_offset);
-		val &= ~(attn & HW_INTERRUT_ASSERT_SET_1);
+		val &= ~(attn & HW_INTERRUPT_ASSERT_SET_1);
 		REG_WR(bp, reg_offset, val);
 
 		BNX2X_ERR("FATAL HW block attention set1 0x%x\n",
-			  (u32)(attn & HW_INTERRUT_ASSERT_SET_1));
+			  (u32)(attn & HW_INTERRUPT_ASSERT_SET_1));
 		bnx2x_panic();
 	}
 }
@@ -4235,7 +4235,7 @@ static void bnx2x_attn_int_deasserted2(struct bnx2x *bp, u32 attn)
 		}
 	}
 
-	if (attn & HW_INTERRUT_ASSERT_SET_2) {
+	if (attn & HW_INTERRUPT_ASSERT_SET_2) {
 
 		int port = BP_PORT(bp);
 		int reg_offset;
@@ -4244,11 +4244,11 @@ static void bnx2x_attn_int_deasserted2(struct bnx2x *bp, u32 attn)
 				     MISC_REG_AEU_ENABLE1_FUNC_0_OUT_2);
 
 		val = REG_RD(bp, reg_offset);
-		val &= ~(attn & HW_INTERRUT_ASSERT_SET_2);
+		val &= ~(attn & HW_INTERRUPT_ASSERT_SET_2);
 		REG_WR(bp, reg_offset, val);
 
 		BNX2X_ERR("FATAL HW block attention set2 0x%x\n",
-			  (u32)(attn & HW_INTERRUT_ASSERT_SET_2));
+			  (u32)(attn & HW_INTERRUPT_ASSERT_SET_2));
 		bnx2x_panic();
 	}
 }

From 249ee819e24c180909f43c1173c8ef6724d21faf Mon Sep 17 00:00:00 2001
From: Guillaume Nault <g.nault@alphalink.fr>
Date: Mon, 3 Apr 2017 13:23:15 +0200
Subject: [PATCH 1904/1911] l2tp: fix PPP pseudo-wire auto-loading

PPP pseudo-wire type is 7 (11 is L2TP_PWTYPE_IP).

Fixes: f1f39f911027 ("l2tp: auto load type modules")
Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_ppp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 7bf73091baa224..861b255a2d5195 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -1853,4 +1853,4 @@ MODULE_DESCRIPTION("PPP over L2TP over UDP");
 MODULE_LICENSE("GPL");
 MODULE_VERSION(PPPOL2TP_DRV_VERSION);
 MODULE_ALIAS_NET_PF_PROTO(PF_PPPOX, PX_PROTO_OL2TP);
-MODULE_ALIAS_L2TP_PWTYPE(11);
+MODULE_ALIAS_L2TP_PWTYPE(7);

From 30c57f073449e09ae42f908bfff56c08c8751a6f Mon Sep 17 00:00:00 2001
From: Sekhar Nori <nsekhar@ti.com>
Date: Mon, 3 Apr 2017 17:34:28 +0530
Subject: [PATCH 1905/1911] net: ethernet: ti: cpsw: fix race condition during
 open()

TI's cpsw driver handles both OF and non-OF case for phy
connect. Unfortunately of_phy_connect() returns NULL on
error while phy_connect() returns ERR_PTR().

To handle this, cpsw_slave_open() overrides the return value
from phy_connect() to make it NULL or error.

This leaves a small window, where cpsw_adjust_link() may be
invoked for a slave while slave->phy pointer is temporarily
set to -ENODEV (or some other error) before it is finally set
to NULL.

_cpsw_adjust_link() only handles the NULL case, and an oops
results when ERR_PTR() is seen by it.

Note that cpsw_adjust_link() checks PHY status for each
slave whenever it is invoked. It can so happen that even
though phy_connect() for a given slave returns error,
_cpsw_adjust_link() is still called for that slave because
the link status of another slave changed.

Fix this by using a temporary pointer to store return value
of {of_}phy_connect() and do a one-time write to slave->phy.

Reviewed-by: Grygorii Strashko <grygorii.strashko@ti.com>
Reported-by: Yan Liu <yan-liu@ti.com>
Signed-off-by: Sekhar Nori <nsekhar@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/cpsw.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 58cdc066ef2c4e..fa674a8bda0c8f 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -1267,6 +1267,7 @@ static void soft_reset_slave(struct cpsw_slave *slave)
 static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv)
 {
 	u32 slave_port;
+	struct phy_device *phy;
 	struct cpsw_common *cpsw = priv->cpsw;
 
 	soft_reset_slave(slave);
@@ -1300,27 +1301,28 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv)
 				   1 << slave_port, 0, 0, ALE_MCAST_FWD_2);
 
 	if (slave->data->phy_node) {
-		slave->phy = of_phy_connect(priv->ndev, slave->data->phy_node,
+		phy = of_phy_connect(priv->ndev, slave->data->phy_node,
 				 &cpsw_adjust_link, 0, slave->data->phy_if);
-		if (!slave->phy) {
+		if (!phy) {
 			dev_err(priv->dev, "phy \"%s\" not found on slave %d\n",
 				slave->data->phy_node->full_name,
 				slave->slave_num);
 			return;
 		}
 	} else {
-		slave->phy = phy_connect(priv->ndev, slave->data->phy_id,
+		phy = phy_connect(priv->ndev, slave->data->phy_id,
 				 &cpsw_adjust_link, slave->data->phy_if);
-		if (IS_ERR(slave->phy)) {
+		if (IS_ERR(phy)) {
 			dev_err(priv->dev,
 				"phy \"%s\" not found on slave %d, err %ld\n",
 				slave->data->phy_id, slave->slave_num,
-				PTR_ERR(slave->phy));
-			slave->phy = NULL;
+				PTR_ERR(phy));
 			return;
 		}
 	}
 
+	slave->phy = phy;
+
 	phy_attached_info(slave->phy);
 
 	phy_start(slave->phy);

From b2376407f98920c9b0c411948675f58a9640be35 Mon Sep 17 00:00:00 2001
From: Vic Yang <victoryang@google.com>
Date: Fri, 24 Mar 2017 18:44:01 +0100
Subject: [PATCH 1906/1911] mfd: cros-ec: Fix host command buffer size

For SPI, we can get up to 32 additional bytes for response preamble.
The current overhead (2 bytes) may cause problems when we try to receive
a big response. Update it to 32 bytes.

Without this fix we could see a kernel BUG when we receive a big response
from the Chrome EC when is connected via SPI.

Signed-off-by: Vic Yang <victoryang@google.com>
Tested-by: Enric Balletbo i Serra <enric.balletbo.collabora.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/cros_ec.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h
index 7a01c94496f14e..3eef9fb9968ae7 100644
--- a/include/linux/mfd/cros_ec.h
+++ b/include/linux/mfd/cros_ec.h
@@ -35,10 +35,11 @@
  * Max bus-specific overhead incurred by request/responses.
  * I2C requires 1 additional byte for requests.
  * I2C requires 2 additional bytes for responses.
+ * SPI requires up to 32 additional bytes for responses.
  * */
 #define EC_PROTO_VERSION_UNKNOWN	0
 #define EC_MAX_REQUEST_OVERHEAD		1
-#define EC_MAX_RESPONSE_OVERHEAD	2
+#define EC_MAX_RESPONSE_OVERHEAD	32
 
 /*
  * Command interface between EC and AP, for LPC, I2C and SPI interfaces.

From 62277de758b155dc04b78f195a1cb5208c37b2df Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Date: Fri, 17 Jun 2016 17:33:59 +0000
Subject: [PATCH 1907/1911] ring-buffer: Fix return value check in
 test_ringbuffer()

In case of error, the function kthread_run() returns ERR_PTR()
and never returns NULL. The NULL test in the return value check
should be replaced with IS_ERR().

Link: http://lkml.kernel.org/r/1466184839-14927-1-git-send-email-weiyj_lk@163.com

Cc: stable@vger.kernel.org
Fixes: 6c43e554a ("ring-buffer: Add ring buffer startup selftest")
Signed-off-by: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/ring_buffer.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 96fc3c043ad654..54e7a90db848df 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -4826,9 +4826,9 @@ static __init int test_ringbuffer(void)
 		rb_data[cpu].cnt = cpu;
 		rb_threads[cpu] = kthread_create(rb_test, &rb_data[cpu],
 						 "rbtester/%d", cpu);
-		if (WARN_ON(!rb_threads[cpu])) {
+		if (WARN_ON(IS_ERR(rb_threads[cpu]))) {
 			pr_cont("FAILED\n");
-			ret = -1;
+			ret = PTR_ERR(rb_threads[cpu]);
 			goto out_free;
 		}
 
@@ -4838,9 +4838,9 @@ static __init int test_ringbuffer(void)
 
 	/* Now create the rb hammer! */
 	rb_hammer = kthread_run(rb_hammer_test, NULL, "rbhammer");
-	if (WARN_ON(!rb_hammer)) {
+	if (WARN_ON(IS_ERR(rb_hammer))) {
 		pr_cont("FAILED\n");
-		ret = -1;
+		ret = PTR_ERR(rb_hammer);
 		goto out_free;
 	}
 

From 3ebfdf082184d04f6e73b30cd9446613dc7f8c02 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Tue, 4 Apr 2017 13:39:55 +0800
Subject: [PATCH 1908/1911] sctp: get sock from transport in
 sctp_transport_update_pmtu

This patch is almost to revert commit 02f3d4ce9e81 ("sctp: Adjust PMTU
updates to accomodate route invalidation."). As t->asoc can't be NULL
in sctp_transport_update_pmtu, it could get sk from asoc, and no need
to pass sk into that function.

It is also to remove some duplicated codes from that function.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/sctp.h    |  5 ++---
 include/net/sctp/structs.h |  6 +++---
 net/sctp/associola.c       |  6 +++---
 net/sctp/input.c           |  4 ++--
 net/sctp/output.c          |  4 ++--
 net/sctp/socket.c          |  6 +++---
 net/sctp/transport.c       | 19 +++++++------------
 7 files changed, 22 insertions(+), 28 deletions(-)

diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index d75caa7a629be4..069582ee5d7fd5 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -448,10 +448,9 @@ static inline int sctp_frag_point(const struct sctp_association *asoc, int pmtu)
 	return frag;
 }
 
-static inline void sctp_assoc_pending_pmtu(struct sock *sk, struct sctp_association *asoc)
+static inline void sctp_assoc_pending_pmtu(struct sctp_association *asoc)
 {
-
-	sctp_assoc_sync_pmtu(sk, asoc);
+	sctp_assoc_sync_pmtu(asoc);
 	asoc->pmtu_pending = 0;
 }
 
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index a127b7c2c3c932..138f8615acf099 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -952,8 +952,8 @@ void sctp_transport_lower_cwnd(struct sctp_transport *, sctp_lower_cwnd_t);
 void sctp_transport_burst_limited(struct sctp_transport *);
 void sctp_transport_burst_reset(struct sctp_transport *);
 unsigned long sctp_transport_timeout(struct sctp_transport *);
-void sctp_transport_reset(struct sctp_transport *);
-void sctp_transport_update_pmtu(struct sock *, struct sctp_transport *, u32);
+void sctp_transport_reset(struct sctp_transport *t);
+void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu);
 void sctp_transport_immediate_rtx(struct sctp_transport *);
 void sctp_transport_dst_release(struct sctp_transport *t);
 void sctp_transport_dst_confirm(struct sctp_transport *t);
@@ -1954,7 +1954,7 @@ void sctp_assoc_update(struct sctp_association *old,
 
 __u32 sctp_association_get_next_tsn(struct sctp_association *);
 
-void sctp_assoc_sync_pmtu(struct sock *, struct sctp_association *);
+void sctp_assoc_sync_pmtu(struct sctp_association *asoc);
 void sctp_assoc_rwnd_increase(struct sctp_association *, unsigned int);
 void sctp_assoc_rwnd_decrease(struct sctp_association *, unsigned int);
 void sctp_assoc_set_primary(struct sctp_association *,
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 0b26df5f618856..a9708da28eb53f 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1412,7 +1412,7 @@ sctp_assoc_choose_alter_transport(struct sctp_association *asoc,
 /* Update the association's pmtu and frag_point by going through all the
  * transports. This routine is called when a transport's PMTU has changed.
  */
-void sctp_assoc_sync_pmtu(struct sock *sk, struct sctp_association *asoc)
+void sctp_assoc_sync_pmtu(struct sctp_association *asoc)
 {
 	struct sctp_transport *t;
 	__u32 pmtu = 0;
@@ -1424,8 +1424,8 @@ void sctp_assoc_sync_pmtu(struct sock *sk, struct sctp_association *asoc)
 	list_for_each_entry(t, &asoc->peer.transport_addr_list,
 				transports) {
 		if (t->pmtu_pending && t->dst) {
-			sctp_transport_update_pmtu(sk, t,
-						   SCTP_TRUNC4(dst_mtu(t->dst)));
+			sctp_transport_update_pmtu(
+					t, SCTP_TRUNC4(dst_mtu(t->dst)));
 			t->pmtu_pending = 0;
 		}
 		if (!pmtu || (t->pathmtu < pmtu))
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 2a28ab20487f03..0e06a278d2a911 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -401,10 +401,10 @@ void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc,
 
 	if (t->param_flags & SPP_PMTUD_ENABLE) {
 		/* Update transports view of the MTU */
-		sctp_transport_update_pmtu(sk, t, pmtu);
+		sctp_transport_update_pmtu(t, pmtu);
 
 		/* Update association pmtu. */
-		sctp_assoc_sync_pmtu(sk, asoc);
+		sctp_assoc_sync_pmtu(asoc);
 	}
 
 	/* Retransmit with the new pmtu setting.
diff --git a/net/sctp/output.c b/net/sctp/output.c
index ec4d50a3871378..1409a875ad8e22 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -105,10 +105,10 @@ void sctp_packet_config(struct sctp_packet *packet, __u32 vtag,
 	if (!sctp_transport_dst_check(tp)) {
 		sctp_transport_route(tp, NULL, sctp_sk(sk));
 		if (asoc->param_flags & SPP_PMTUD_ENABLE)
-			sctp_assoc_sync_pmtu(sk, asoc);
+			sctp_assoc_sync_pmtu(asoc);
 	} else if (!sctp_transport_pmtu_check(tp)) {
 		if (asoc->param_flags & SPP_PMTUD_ENABLE)
-			sctp_assoc_sync_pmtu(sk, asoc);
+			sctp_assoc_sync_pmtu(asoc);
 	}
 
 	/* If there a is a prepend chunk stick it on the list before
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 12fbae2c1002e3..c1401f43d40fc5 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1907,7 +1907,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
 	}
 
 	if (asoc->pmtu_pending)
-		sctp_assoc_pending_pmtu(sk, asoc);
+		sctp_assoc_pending_pmtu(asoc);
 
 	/* If fragmentation is disabled and the message length exceeds the
 	 * association fragmentation point, return EMSGSIZE.  The I-D
@@ -2435,7 +2435,7 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
 	if ((params->spp_flags & SPP_PMTUD_DISABLE) && params->spp_pathmtu) {
 		if (trans) {
 			trans->pathmtu = params->spp_pathmtu;
-			sctp_assoc_sync_pmtu(sctp_opt2sk(sp), asoc);
+			sctp_assoc_sync_pmtu(asoc);
 		} else if (asoc) {
 			asoc->pathmtu = params->spp_pathmtu;
 		} else {
@@ -2451,7 +2451,7 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
 				(trans->param_flags & ~SPP_PMTUD) | pmtud_change;
 			if (update) {
 				sctp_transport_pmtu(trans, sctp_opt2sk(sp));
-				sctp_assoc_sync_pmtu(sctp_opt2sk(sp), asoc);
+				sctp_assoc_sync_pmtu(asoc);
 			}
 		} else if (asoc) {
 			asoc->param_flags =
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 3379668af3686d..721eeebfcd8a50 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -251,14 +251,13 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk)
 		transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT;
 }
 
-void sctp_transport_update_pmtu(struct sock *sk, struct sctp_transport *t, u32 pmtu)
+void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu)
 {
-	struct dst_entry *dst;
+	struct dst_entry *dst = sctp_transport_dst_check(t);
 
 	if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) {
 		pr_warn("%s: Reported pmtu %d too low, using default minimum of %d\n",
-			__func__, pmtu,
-			SCTP_DEFAULT_MINSEGMENT);
+			__func__, pmtu, SCTP_DEFAULT_MINSEGMENT);
 		/* Use default minimum segment size and disable
 		 * pmtu discovery on this transport.
 		 */
@@ -267,17 +266,13 @@ void sctp_transport_update_pmtu(struct sock *sk, struct sctp_transport *t, u32 p
 		t->pathmtu = pmtu;
 	}
 
-	dst = sctp_transport_dst_check(t);
-	if (!dst)
-		t->af_specific->get_dst(t, &t->saddr, &t->fl, sk);
-
 	if (dst) {
-		dst->ops->update_pmtu(dst, sk, NULL, pmtu);
-
+		dst->ops->update_pmtu(dst, t->asoc->base.sk, NULL, pmtu);
 		dst = sctp_transport_dst_check(t);
-		if (!dst)
-			t->af_specific->get_dst(t, &t->saddr, &t->fl, sk);
 	}
+
+	if (!dst)
+		t->af_specific->get_dst(t, &t->saddr, &t->fl, t->asoc->base.sk);
 }
 
 /* Caches the dst entry and source address for a transport's destination

From ecde8f36f8a05a023b9d026e9094571aab421d36 Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Tue, 4 Apr 2017 14:15:39 -0700
Subject: [PATCH 1909/1911] tcp: fix lost retransmit SNMP under-counting

The lost retransmit SNMP stat is under-counting retransmission
that uses segment offloading. This patch fixes that so all
retransmission related SNMP counters are consistent.

Fixes: 10d3be569243 ("tcp-tso: do not split TSO packets at retransmit time")
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_recovery.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
index 4ecb38ae85042d..d8acbd9f477a2a 100644
--- a/net/ipv4/tcp_recovery.c
+++ b/net/ipv4/tcp_recovery.c
@@ -12,7 +12,8 @@ static void tcp_rack_mark_skb_lost(struct sock *sk, struct sk_buff *skb)
 		/* Account for retransmits that are lost again */
 		TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
 		tp->retrans_out -= tcp_skb_pcount(skb);
-		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT);
+		NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT,
+			      tcp_skb_pcount(skb));
 	}
 }
 

From 2d2517ee314ef1de0517f74d06c2825fbf597ba3 Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Tue, 4 Apr 2017 14:15:40 -0700
Subject: [PATCH 1910/1911] tcp: fix reordering SNMP under-counting

Currently the reordering SNMP counters only increase if a connection
sees a higher degree then it has previously seen. It ignores if the
reordering degree is not greater than the default system threshold.
This significantly under-counts the number of reordering events
and falsely convey that reordering is rare on the network.

This patch properly and faithfully records the number of reordering
events detected by the TCP stack, just like the comment says "this
exciting event is worth to be remembered". Note that even so TCP
still under-estimate the actual reordering events because TCP
requires TS options or certain packet sequences to detect reordering
(i.e. ACKing never-retransmitted sequence in recovery or disordered
 state).

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 97ac6776e47d36..2c1f59386a7bac 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -878,22 +878,11 @@ static void tcp_update_reordering(struct sock *sk, const int metric,
 				  const int ts)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	if (metric > tp->reordering) {
-		int mib_idx;
+	int mib_idx;
 
+	if (metric > tp->reordering) {
 		tp->reordering = min(sysctl_tcp_max_reordering, metric);
 
-		/* This exciting event is worth to be remembered. 8) */
-		if (ts)
-			mib_idx = LINUX_MIB_TCPTSREORDER;
-		else if (tcp_is_reno(tp))
-			mib_idx = LINUX_MIB_TCPRENOREORDER;
-		else if (tcp_is_fack(tp))
-			mib_idx = LINUX_MIB_TCPFACKREORDER;
-		else
-			mib_idx = LINUX_MIB_TCPSACKREORDER;
-
-		NET_INC_STATS(sock_net(sk), mib_idx);
 #if FASTRETRANS_DEBUG > 1
 		pr_debug("Disorder%d %d %u f%u s%u rr%d\n",
 			 tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state,
@@ -906,6 +895,18 @@ static void tcp_update_reordering(struct sock *sk, const int metric,
 	}
 
 	tp->rack.reord = 1;
+
+	/* This exciting event is worth to be remembered. 8) */
+	if (ts)
+		mib_idx = LINUX_MIB_TCPTSREORDER;
+	else if (tcp_is_reno(tp))
+		mib_idx = LINUX_MIB_TCPRENOREORDER;
+	else if (tcp_is_fack(tp))
+		mib_idx = LINUX_MIB_TCPFACKREORDER;
+	else
+		mib_idx = LINUX_MIB_TCPSACKREORDER;
+
+	NET_INC_STATS(sock_net(sk), mib_idx);
 }
 
 /* This must be called before lost_out is incremented */

From c383bdd14f91562babd269aa7c36b46fee7b6c75 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Tue, 4 Apr 2017 15:56:55 -0700
Subject: [PATCH 1911/1911] nfp: fix potential use after free on xdp prog

We should unregister the net_device first, before we give back
our reference on xdp_prog.  Otherwise xdp_prog may be freed
before .ndo_stop() disabled the datapath.  Found by code inspection.

Fixes: ecd63a0217d5 ("nfp: add XDP support in the driver")
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 9179a99563afa8..a41377e26c07d0 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -3275,9 +3275,10 @@ void nfp_net_netdev_clean(struct net_device *netdev)
 {
 	struct nfp_net *nn = netdev_priv(netdev);
 
+	unregister_netdev(nn->netdev);
+
 	if (nn->xdp_prog)
 		bpf_prog_put(nn->xdp_prog);
 	if (nn->bpf_offload_xdp)
 		nfp_net_xdp_offload(nn, NULL);
-	unregister_netdev(nn->netdev);
 }