diff options
Diffstat (limited to 'net')
69 files changed, 1300 insertions, 1522 deletions
diff --git a/net/ax25/sysctl_net_ax25.c b/net/ax25/sysctl_net_ax25.c index 2154d004d3dc..db66e11e7fe8 100644 --- a/net/ax25/sysctl_net_ax25.c +++ b/net/ax25/sysctl_net_ax25.c @@ -159,7 +159,8 @@ int ax25_register_dev_sysctl(ax25_dev *ax25_dev) table[k].data = &ax25_dev->values[k]; snprintf(path, sizeof(path), "net/ax25/%s", ax25_dev->dev->name); - ax25_dev->sysheader = register_net_sysctl(&init_net, path, table); + ax25_dev->sysheader = register_net_sysctl_sz(&init_net, path, table, + ARRAY_SIZE(ax25_param_table)); if (!ax25_dev->sysheader) { kfree(table); return -ENOMEM; diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index 5697df9d4394..94ec913dfb76 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -771,7 +771,7 @@ static int rfcomm_tty_open(struct tty_struct *tty, struct file *filp) static void rfcomm_tty_close(struct tty_struct *tty, struct file *filp) { - struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data; + struct rfcomm_dev *dev = tty->driver_data; BT_DBG("tty %p dev %p dlc %p opened %d", tty, dev, dev->dlc, dev->port.count); @@ -779,17 +779,18 @@ static void rfcomm_tty_close(struct tty_struct *tty, struct file *filp) tty_port_close(&dev->port, tty, filp); } -static int rfcomm_tty_write(struct tty_struct *tty, const unsigned char *buf, int count) +static ssize_t rfcomm_tty_write(struct tty_struct *tty, const u8 *buf, + size_t count) { - struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data; + struct rfcomm_dev *dev = tty->driver_data; struct rfcomm_dlc *dlc = dev->dlc; struct sk_buff *skb; - int sent = 0, size; + size_t sent = 0, size; - BT_DBG("tty %p count %d", tty, count); + BT_DBG("tty %p count %zu", tty, count); while (count) { - size = min_t(uint, count, dlc->mtu); + size = min_t(size_t, count, dlc->mtu); skb = rfcomm_wmalloc(dev, size + RFCOMM_SKB_RESERVE, GFP_ATOMIC); if (!skb) @@ -810,7 +811,7 @@ static int rfcomm_tty_write(struct tty_struct *tty, const unsigned char *buf, in static unsigned int rfcomm_tty_write_room(struct tty_struct *tty) { - struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data; + struct rfcomm_dev *dev = tty->driver_data; int room = 0; if (dev && dev->dlc) @@ -864,7 +865,7 @@ static void rfcomm_tty_set_termios(struct tty_struct *tty, u8 baud, data_bits, stop_bits, parity, x_on, x_off; u16 changes = 0; - struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data; + struct rfcomm_dev *dev = tty->driver_data; BT_DBG("tty %p termios %p", tty, old); @@ -996,7 +997,7 @@ static void rfcomm_tty_set_termios(struct tty_struct *tty, static void rfcomm_tty_throttle(struct tty_struct *tty) { - struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data; + struct rfcomm_dev *dev = tty->driver_data; BT_DBG("tty %p dev %p", tty, dev); @@ -1005,7 +1006,7 @@ static void rfcomm_tty_throttle(struct tty_struct *tty) static void rfcomm_tty_unthrottle(struct tty_struct *tty) { - struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data; + struct rfcomm_dev *dev = tty->driver_data; BT_DBG("tty %p dev %p", tty, dev); @@ -1014,7 +1015,7 @@ static void rfcomm_tty_unthrottle(struct tty_struct *tty) static unsigned int rfcomm_tty_chars_in_buffer(struct tty_struct *tty) { - struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data; + struct rfcomm_dev *dev = tty->driver_data; BT_DBG("tty %p dev %p", tty, dev); @@ -1029,7 +1030,7 @@ static unsigned int rfcomm_tty_chars_in_buffer(struct tty_struct *tty) static void rfcomm_tty_flush_buffer(struct tty_struct *tty) { - struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data; + struct rfcomm_dev *dev = tty->driver_data; BT_DBG("tty %p dev %p", tty, dev); @@ -1052,7 +1053,7 @@ static void rfcomm_tty_wait_until_sent(struct tty_struct *tty, int timeout) static void rfcomm_tty_hangup(struct tty_struct *tty) { - struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data; + struct rfcomm_dev *dev = tty->driver_data; BT_DBG("tty %p dev %p", tty, dev); @@ -1061,7 +1062,7 @@ static void rfcomm_tty_hangup(struct tty_struct *tty) static int rfcomm_tty_tiocmget(struct tty_struct *tty) { - struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data; + struct rfcomm_dev *dev = tty->driver_data; BT_DBG("tty %p dev %p", tty, dev); @@ -1070,7 +1071,7 @@ static int rfcomm_tty_tiocmget(struct tty_struct *tty) static int rfcomm_tty_tiocmset(struct tty_struct *tty, unsigned int set, unsigned int clear) { - struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data; + struct rfcomm_dev *dev = tty->driver_data; struct rfcomm_dlc *dlc = dev->dlc; u8 v24_sig; diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 1a801fab9543..15186247b59a 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -1135,7 +1135,8 @@ static int br_netfilter_sysctl_init_net(struct net *net) br_netfilter_sysctl_default(brnet); - brnet->ctl_hdr = register_net_sysctl(net, "net/bridge", table); + brnet->ctl_hdr = register_net_sysctl_sz(net, "net/bridge", table, + ARRAY_SIZE(brnf_table)); if (!brnet->ctl_hdr) { if (!net_eq(net, &init_net)) kfree(table); diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 5eb4898cccd4..10a41cd9c523 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -969,6 +969,62 @@ static bool ceph_msg_data_pagelist_advance(struct ceph_msg_data_cursor *cursor, return true; } +static void ceph_msg_data_iter_cursor_init(struct ceph_msg_data_cursor *cursor, + size_t length) +{ + struct ceph_msg_data *data = cursor->data; + + cursor->iov_iter = data->iter; + cursor->lastlen = 0; + iov_iter_truncate(&cursor->iov_iter, length); + cursor->resid = iov_iter_count(&cursor->iov_iter); +} + +static struct page *ceph_msg_data_iter_next(struct ceph_msg_data_cursor *cursor, + size_t *page_offset, size_t *length) +{ + struct page *page; + ssize_t len; + + if (cursor->lastlen) + iov_iter_revert(&cursor->iov_iter, cursor->lastlen); + + len = iov_iter_get_pages2(&cursor->iov_iter, &page, PAGE_SIZE, + 1, page_offset); + BUG_ON(len < 0); + + cursor->lastlen = len; + + /* + * FIXME: The assumption is that the pages represented by the iov_iter + * are pinned, with the references held by the upper-level + * callers, or by virtue of being under writeback. Eventually, + * we'll get an iov_iter_get_pages2 variant that doesn't take + * page refs. Until then, just put the page ref. + */ + VM_BUG_ON_PAGE(!PageWriteback(page) && page_count(page) < 2, page); + put_page(page); + + *length = min_t(size_t, len, cursor->resid); + return page; +} + +static bool ceph_msg_data_iter_advance(struct ceph_msg_data_cursor *cursor, + size_t bytes) +{ + BUG_ON(bytes > cursor->resid); + cursor->resid -= bytes; + + if (bytes < cursor->lastlen) { + cursor->lastlen -= bytes; + } else { + iov_iter_advance(&cursor->iov_iter, bytes - cursor->lastlen); + cursor->lastlen = 0; + } + + return cursor->resid; +} + /* * Message data is handled (sent or received) in pieces, where each * piece resides on a single page. The network layer might not @@ -996,6 +1052,9 @@ static void __ceph_msg_data_cursor_init(struct ceph_msg_data_cursor *cursor) case CEPH_MSG_DATA_BVECS: ceph_msg_data_bvecs_cursor_init(cursor, length); break; + case CEPH_MSG_DATA_ITER: + ceph_msg_data_iter_cursor_init(cursor, length); + break; case CEPH_MSG_DATA_NONE: default: /* BUG(); */ @@ -1013,6 +1072,7 @@ void ceph_msg_data_cursor_init(struct ceph_msg_data_cursor *cursor, cursor->total_resid = length; cursor->data = msg->data; + cursor->sr_resid = 0; __ceph_msg_data_cursor_init(cursor); } @@ -1042,6 +1102,9 @@ struct page *ceph_msg_data_next(struct ceph_msg_data_cursor *cursor, case CEPH_MSG_DATA_BVECS: page = ceph_msg_data_bvecs_next(cursor, page_offset, length); break; + case CEPH_MSG_DATA_ITER: + page = ceph_msg_data_iter_next(cursor, page_offset, length); + break; case CEPH_MSG_DATA_NONE: default: page = NULL; @@ -1080,6 +1143,9 @@ void ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor, size_t bytes) case CEPH_MSG_DATA_BVECS: new_piece = ceph_msg_data_bvecs_advance(cursor, bytes); break; + case CEPH_MSG_DATA_ITER: + new_piece = ceph_msg_data_iter_advance(cursor, bytes); + break; case CEPH_MSG_DATA_NONE: default: BUG(); @@ -1879,6 +1945,18 @@ void ceph_msg_data_add_bvecs(struct ceph_msg *msg, } EXPORT_SYMBOL(ceph_msg_data_add_bvecs); +void ceph_msg_data_add_iter(struct ceph_msg *msg, + struct iov_iter *iter) +{ + struct ceph_msg_data *data; + + data = ceph_msg_data_add(msg); + data->type = CEPH_MSG_DATA_ITER; + data->iter = *iter; + + msg->data_length += iov_iter_count(&data->iter); +} + /* * construct a new message with given type, size * the new msg has a ref count of 1. diff --git a/net/ceph/messenger_v1.c b/net/ceph/messenger_v1.c index 3d57bb48a2b4..f9a50d7f0d20 100644 --- a/net/ceph/messenger_v1.c +++ b/net/ceph/messenger_v1.c @@ -159,9 +159,9 @@ static size_t sizeof_footer(struct ceph_connection *con) static void prepare_message_data(struct ceph_msg *msg, u32 data_len) { - /* Initialize data cursor */ - - ceph_msg_data_cursor_init(&msg->cursor, msg, data_len); + /* Initialize data cursor if it's not a sparse read */ + if (!msg->sparse_read) + ceph_msg_data_cursor_init(&msg->cursor, msg, data_len); } /* @@ -960,9 +960,9 @@ static void process_ack(struct ceph_connection *con) prepare_read_tag(con); } -static int read_partial_message_section(struct ceph_connection *con, - struct kvec *section, - unsigned int sec_len, u32 *crc) +static int read_partial_message_chunk(struct ceph_connection *con, + struct kvec *section, + unsigned int sec_len, u32 *crc) { int ret, left; @@ -978,11 +978,91 @@ static int read_partial_message_section(struct ceph_connection *con, section->iov_len += ret; } if (section->iov_len == sec_len) - *crc = crc32c(0, section->iov_base, section->iov_len); + *crc = crc32c(*crc, section->iov_base, section->iov_len); return 1; } +static inline int read_partial_message_section(struct ceph_connection *con, + struct kvec *section, + unsigned int sec_len, u32 *crc) +{ + *crc = 0; + return read_partial_message_chunk(con, section, sec_len, crc); +} + +static int read_sparse_msg_extent(struct ceph_connection *con, u32 *crc) +{ + struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor; + bool do_bounce = ceph_test_opt(from_msgr(con->msgr), RXBOUNCE); + + if (do_bounce && unlikely(!con->bounce_page)) { + con->bounce_page = alloc_page(GFP_NOIO); + if (!con->bounce_page) { + pr_err("failed to allocate bounce page\n"); + return -ENOMEM; + } + } + + while (cursor->sr_resid > 0) { + struct page *page, *rpage; + size_t off, len; + int ret; + + page = ceph_msg_data_next(cursor, &off, &len); + rpage = do_bounce ? con->bounce_page : page; + + /* clamp to what remains in extent */ + len = min_t(int, len, cursor->sr_resid); + ret = ceph_tcp_recvpage(con->sock, rpage, (int)off, len); + if (ret <= 0) + return ret; + *crc = ceph_crc32c_page(*crc, rpage, off, ret); + ceph_msg_data_advance(cursor, (size_t)ret); + cursor->sr_resid -= ret; + if (do_bounce) + memcpy_page(page, off, rpage, off, ret); + } + return 1; +} + +static int read_sparse_msg_data(struct ceph_connection *con) +{ + struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor; + bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC); + u32 crc = 0; + int ret = 1; + + if (do_datacrc) + crc = con->in_data_crc; + + do { + if (con->v1.in_sr_kvec.iov_base) + ret = read_partial_message_chunk(con, + &con->v1.in_sr_kvec, + con->v1.in_sr_len, + &crc); + else if (cursor->sr_resid > 0) + ret = read_sparse_msg_extent(con, &crc); + + if (ret <= 0) { + if (do_datacrc) + con->in_data_crc = crc; + return ret; + } + + memset(&con->v1.in_sr_kvec, 0, sizeof(con->v1.in_sr_kvec)); + ret = con->ops->sparse_read(con, cursor, + (char **)&con->v1.in_sr_kvec.iov_base); + con->v1.in_sr_len = ret; + } while (ret > 0); + + if (do_datacrc) + con->in_data_crc = crc; + + return ret < 0 ? ret : 1; /* must return > 0 to indicate success */ +} + static int read_partial_msg_data(struct ceph_connection *con) { struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor; @@ -1173,7 +1253,9 @@ static int read_partial_message(struct ceph_connection *con) if (!m->num_data_items) return -EIO; - if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) + if (m->sparse_read) + ret = read_sparse_msg_data(con); + else if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) ret = read_partial_msg_data_bounce(con); else ret = read_partial_msg_data(con); diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c index 1df1d29dee92..d09a39ff2cf0 100644 --- a/net/ceph/messenger_v2.c +++ b/net/ceph/messenger_v2.c @@ -8,9 +8,9 @@ #include <linux/ceph/ceph_debug.h> #include <crypto/aead.h> -#include <crypto/algapi.h> /* for crypto_memneq() */ #include <crypto/hash.h> #include <crypto/sha2.h> +#include <crypto/utils.h> #include <linux/bvec.h> #include <linux/crc32c.h> #include <linux/net.h> @@ -52,14 +52,16 @@ #define FRAME_LATE_STATUS_COMPLETE 0xe #define FRAME_LATE_STATUS_ABORTED_MASK 0xf -#define IN_S_HANDLE_PREAMBLE 1 -#define IN_S_HANDLE_CONTROL 2 -#define IN_S_HANDLE_CONTROL_REMAINDER 3 -#define IN_S_PREPARE_READ_DATA 4 -#define IN_S_PREPARE_READ_DATA_CONT 5 -#define IN_S_PREPARE_READ_ENC_PAGE 6 -#define IN_S_HANDLE_EPILOGUE 7 -#define IN_S_FINISH_SKIP 8 +#define IN_S_HANDLE_PREAMBLE 1 +#define IN_S_HANDLE_CONTROL 2 +#define IN_S_HANDLE_CONTROL_REMAINDER 3 +#define IN_S_PREPARE_READ_DATA 4 +#define IN_S_PREPARE_READ_DATA_CONT 5 +#define IN_S_PREPARE_READ_ENC_PAGE 6 +#define IN_S_PREPARE_SPARSE_DATA 7 +#define IN_S_PREPARE_SPARSE_DATA_CONT 8 +#define IN_S_HANDLE_EPILOGUE 9 +#define IN_S_FINISH_SKIP 10 #define OUT_S_QUEUE_DATA 1 #define OUT_S_QUEUE_DATA_CONT 2 @@ -967,12 +969,48 @@ static void init_sgs_cursor(struct scatterlist **sg, } } +/** + * init_sgs_pages: set up scatterlist on an array of page pointers + * @sg: scatterlist to populate + * @pages: pointer to page array + * @dpos: position in the array to start (bytes) + * @dlen: len to add to sg (bytes) + * @pad: pointer to pad destination (if any) + * + * Populate the scatterlist from the page array, starting at an arbitrary + * byte in the array and running for a specified length. + */ +static void init_sgs_pages(struct scatterlist **sg, struct page **pages, + int dpos, int dlen, u8 *pad) +{ + int idx = dpos >> PAGE_SHIFT; + int off = offset_in_page(dpos); + int resid = dlen; + + do { + int len = min(resid, (int)PAGE_SIZE - off); + + sg_set_page(*sg, pages[idx], len, off); + *sg = sg_next(*sg); + off = 0; + ++idx; + resid -= len; + } while (resid); + + if (need_padding(dlen)) { + sg_set_buf(*sg, pad, padding_len(dlen)); + *sg = sg_next(*sg); + } +} + static int setup_message_sgs(struct sg_table *sgt, struct ceph_msg *msg, u8 *front_pad, u8 *middle_pad, u8 *data_pad, - void *epilogue, bool add_tag) + void *epilogue, struct page **pages, int dpos, + bool add_tag) { struct ceph_msg_data_cursor cursor; struct scatterlist *cur_sg; + int dlen = data_len(msg); int sg_cnt; int ret; @@ -986,9 +1024,15 @@ static int setup_message_sgs(struct sg_table *sgt, struct ceph_msg *msg, if (middle_len(msg)) sg_cnt += calc_sg_cnt(msg->middle->vec.iov_base, middle_len(msg)); - if (data_len(msg)) { - ceph_msg_data_cursor_init(&cursor, msg, data_len(msg)); - sg_cnt += calc_sg_cnt_cursor(&cursor); + if (dlen) { + if (pages) { + sg_cnt += calc_pages_for(dpos, dlen); + if (need_padding(dlen)) + sg_cnt++; + } else { + ceph_msg_data_cursor_init(&cursor, msg, dlen); + sg_cnt += calc_sg_cnt_cursor(&cursor); + } } ret = sg_alloc_table(sgt, sg_cnt, GFP_NOIO); @@ -1002,9 +1046,13 @@ static int setup_message_sgs(struct sg_table *sgt, struct ceph_msg *msg, if (middle_len(msg)) init_sgs(&cur_sg, msg->middle->vec.iov_base, middle_len(msg), middle_pad); - if (data_len(msg)) { - ceph_msg_data_cursor_init(&cursor, msg, data_len(msg)); - init_sgs_cursor(&cur_sg, &cursor, data_pad); + if (dlen) { + if (pages) { + init_sgs_pages(&cur_sg, pages, dpos, dlen, data_pad); + } else { + ceph_msg_data_cursor_init(&cursor, msg, dlen); + init_sgs_cursor(&cur_sg, &cursor, data_pad); + } } WARN_ON(!sg_is_last(cur_sg)); @@ -1039,10 +1087,53 @@ static int decrypt_control_remainder(struct ceph_connection *con) padded_len(rem_len) + CEPH_GCM_TAG_LEN); } +/* Process sparse read data that lives in a buffer */ +static int process_v2_sparse_read(struct ceph_connection *con, + struct page **pages, int spos) +{ + struct ceph_msg_data_cursor *cursor = &con->v2.in_cursor; + int ret; + + for (;;) { + char *buf = NULL; + + ret = con->ops->sparse_read(con, cursor, &buf); + if (ret <= 0) + return ret; + + dout("%s: sparse_read return %x buf %p\n", __func__, ret, buf); + + do { + int idx = spos >> PAGE_SHIFT; + int soff = offset_in_page(spos); + struct page *spage = con->v2.in_enc_pages[idx]; + int len = min_t(int, ret, PAGE_SIZE - soff); + + if (buf) { + memcpy_from_page(buf, spage, soff, len); + buf += len; + } else { + struct bio_vec bv; + + get_bvec_at(cursor, &bv); + len = min_t(int, len, bv.bv_len); + memcpy_page(bv.bv_page, bv.bv_offset, + spage, soff, len); + ceph_msg_data_advance(cursor, len); + } + spos += len; + ret -= len; + } while (ret); + } +} + static int decrypt_tail(struct ceph_connection *con) { struct sg_table enc_sgt = {}; struct sg_table sgt = {}; + struct page **pages = NULL; + bool sparse = con->in_msg->sparse_read; + int dpos = 0; int tail_len; int ret; @@ -1053,9 +1144,14 @@ static int decrypt_tail(struct ceph_connection *con) if (ret) goto out; + if (sparse) { + dpos = padded_len(front_len(con->in_msg) + padded_len(middle_len(con->in_msg))); + pages = con->v2.in_enc_pages; + } + ret = setup_message_sgs(&sgt, con->in_msg, FRONT_PAD(con->v2.in_buf), - MIDDLE_PAD(con->v2.in_buf), DATA_PAD(con->v2.in_buf), - con->v2.in_buf, true); + MIDDLE_PAD(con->v2.in_buf), DATA_PAD(con->v2.in_buf), + con->v2.in_buf, pages, dpos, true); if (ret) goto out; @@ -1065,6 +1161,12 @@ static int decrypt_tail(struct ceph_connection *con) if (ret) goto out; + if (sparse && data_len(con->in_msg)) { + ret = process_v2_sparse_read(con, con->v2.in_enc_pages, dpos); + if (ret) + goto out; + } + WARN_ON(!con->v2.in_enc_page_cnt); ceph_release_page_vector(con->v2.in_enc_pages, con->v2.in_enc_page_cnt); @@ -1588,7 +1690,7 @@ static int prepare_message_secure(struct ceph_connection *con) encode_epilogue_secure(con, false); ret = setup_message_sgs(&sgt, con->out_msg, zerop, zerop, zerop, - &con->v2.out_epil, false); + &con->v2.out_epil, NULL, 0, false); if (ret) goto out; @@ -1825,6 +1927,123 @@ static void prepare_read_data_cont(struct ceph_connection *con) con->v2.in_state = IN_S_HANDLE_EPILOGUE; } +static int prepare_sparse_read_cont(struct ceph_connection *con) +{ + int ret; + struct bio_vec bv; + char *buf = NULL; + struct ceph_msg_data_cursor *cursor = &con->v2.in_cursor; + + WARN_ON(con->v2.in_state != IN_S_PREPARE_SPARSE_DATA_CONT); + + if (iov_iter_is_bvec(&con->v2.in_iter)) { + if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) { + con->in_data_crc = crc32c(con->in_data_crc, + page_address(con->bounce_page), + con->v2.in_bvec.bv_len); + get_bvec_at(cursor, &bv); + memcpy_to_page(bv.bv_page, bv.bv_offset, + page_address(con->bounce_page), + con->v2.in_bvec.bv_len); + } else { + con->in_data_crc = ceph_crc32c_page(con->in_data_crc, + con->v2.in_bvec.bv_page, + con->v2.in_bvec.bv_offset, + con->v2.in_bvec.bv_len); + } + + ceph_msg_data_advance(cursor, con->v2.in_bvec.bv_len); + cursor->sr_resid -= con->v2.in_bvec.bv_len; + dout("%s: advance by 0x%x sr_resid 0x%x\n", __func__, + con->v2.in_bvec.bv_len, cursor->sr_resid); + WARN_ON_ONCE(cursor->sr_resid > cursor->total_resid); + if (cursor->sr_resid) { + get_bvec_at(cursor, &bv); + if (bv.bv_len > cursor->sr_resid) + bv.bv_len = cursor->sr_resid; + if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) { + bv.bv_page = con->bounce_page; + bv.bv_offset = 0; + } + set_in_bvec(con, &bv); + con->v2.data_len_remain -= bv.bv_len; + return 0; + } + } else if (iov_iter_is_kvec(&con->v2.in_iter)) { + /* On first call, we have no kvec so don't compute crc */ + if (con->v2.in_kvec_cnt) { + WARN_ON_ONCE(con->v2.in_kvec_cnt > 1); + con->in_data_crc = crc32c(con->in_data_crc, + con->v2.in_kvecs[0].iov_base, + con->v2.in_kvecs[0].iov_len); + } + } else { + return -EIO; + } + + /* get next extent */ + ret = con->ops->sparse_read(con, cursor, &buf); + if (ret <= 0) { + if (ret < 0) + return ret; + + reset_in_kvecs(con); + add_in_kvec(con, con->v2.in_buf, CEPH_EPILOGUE_PLAIN_LEN); + con->v2.in_state = IN_S_HANDLE_EPILOGUE; + return 0; + } + + if (buf) { + /* receive into buffer */ + reset_in_kvecs(con); + add_in_kvec(con, buf, ret); + con->v2.data_len_remain -= ret; + return 0; + } + + if (ret > cursor->total_resid) { + pr_warn("%s: ret 0x%x total_resid 0x%zx resid 0x%zx\n", + __func__, ret, cursor->total_resid, cursor->resid); + return -EIO; + } + get_bvec_at(cursor, &bv); + if (bv.bv_len > cursor->sr_resid) + bv.bv_len = cursor->sr_resid; + if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) { + if (unlikely(!con->bounce_page)) { + con->bounce_page = alloc_page(GFP_NOIO); + if (!con->bounce_page) { + pr_err("failed to allocate bounce page\n"); + return -ENOMEM; + } + } + + bv.bv_page = con->bounce_page; + bv.bv_offset = 0; + } + set_in_bvec(con, &bv); + con->v2.data_len_remain -= ret; + return ret; +} + +static int prepare_sparse_read_data(struct ceph_connection *con) +{ + struct ceph_msg *msg = con->in_msg; + + dout("%s: starting sparse read\n", __func__); + + if (WARN_ON_ONCE(!con->ops->sparse_read)) + return -EOPNOTSUPP; + + if (!con_secure(con)) + con->in_data_crc = -1; + + reset_in_kvecs(con); + con->v2.in_state = IN_S_PREPARE_SPARSE_DATA_CONT; + con->v2.data_len_remain = data_len(msg); + return prepare_sparse_read_cont(con); +} + static int prepare_read_tail_plain(struct ceph_connection *con) { struct ceph_msg *msg = con->in_msg; @@ -1845,7 +2064,10 @@ static int prepare_read_tail_plain(struct ceph_connection *con) } if (data_len(msg)) { - con->v2.in_state = IN_S_PREPARE_READ_DATA; + if (msg->sparse_read) + con->v2.in_state = IN_S_PREPARE_SPARSE_DATA; + else + con->v2.in_state = IN_S_PREPARE_READ_DATA; } else { add_in_kvec(con, con->v2.in_buf, CEPH_EPILOGUE_PLAIN_LEN); con->v2.in_state = IN_S_HANDLE_EPILOGUE; @@ -2898,6 +3120,12 @@ static int populate_in_iter(struct ceph_connection *con) prepare_read_enc_page(con); ret = 0; break; + case IN_S_PREPARE_SPARSE_DATA: + ret = prepare_sparse_read_data(con); + break; + case IN_S_PREPARE_SPARSE_DATA_CONT: + ret = prepare_sparse_read_cont(con); + break; case IN_S_HANDLE_EPILOGUE: ret = handle_epilogue(con); break; @@ -3489,6 +3717,23 @@ static void revoke_at_prepare_read_enc_page(struct ceph_connection *con) con->v2.in_state = IN_S_FINISH_SKIP; } +static void revoke_at_prepare_sparse_data(struct ceph_connection *con) +{ + int resid; /* current piece of data */ + int remaining; + + WARN_ON(con_secure(con)); + WARN_ON(!data_len(con->in_msg)); + WARN_ON(!iov_iter_is_bvec(&con->v2.in_iter)); + resid = iov_iter_count(&con->v2.in_iter); + dout("%s con %p resid %d\n", __func__, con, resid); + + remaining = CEPH_EPILOGUE_PLAIN_LEN + con->v2.data_len_remain; + con->v2.in_iter.count -= resid; + set_in_skip(con, resid + remaining); + con->v2.in_state = IN_S_FINISH_SKIP; +} + static void revoke_at_handle_epilogue(struct ceph_connection *con) { int resid; @@ -3505,6 +3750,7 @@ static void revoke_at_handle_epilogue(struct ceph_connection *con) void ceph_con_v2_revoke_incoming(struct ceph_connection *con) { switch (con->v2.in_state) { + case IN_S_PREPARE_SPARSE_DATA: case IN_S_PREPARE_READ_DATA: revoke_at_prepare_read_data(con); break; @@ -3514,6 +3760,9 @@ void ceph_con_v2_revoke_incoming(struct ceph_connection *con) case IN_S_PREPARE_READ_ENC_PAGE: revoke_at_prepare_read_enc_page(con); break; + case IN_S_PREPARE_SPARSE_DATA_CONT: + revoke_at_prepare_sparse_data(con); + break; case IN_S_HANDLE_EPILOGUE: revoke_at_handle_epilogue(con); break; diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 658a6f2320cf..d3a759e052c8 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -171,6 +171,13 @@ static void ceph_osd_data_bvecs_init(struct ceph_osd_data *osd_data, osd_data->num_bvecs = num_bvecs; } +static void ceph_osd_iter_init(struct ceph_osd_data *osd_data, + struct iov_iter *iter) +{ + osd_data->type = CEPH_OSD_DATA_TYPE_ITER; + osd_data->iter = *iter; +} + static struct ceph_osd_data * osd_req_op_raw_data_in(struct ceph_osd_request *osd_req, unsigned int which) { @@ -264,6 +271,22 @@ void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req, } EXPORT_SYMBOL(osd_req_op_extent_osd_data_bvec_pos); +/** + * osd_req_op_extent_osd_iter - Set up an operation with an iterator buffer + * @osd_req: The request to set up + * @which: Index of the operation in which to set the iter + * @iter: The buffer iterator + */ +void osd_req_op_extent_osd_iter(struct ceph_osd_request *osd_req, + unsigned int which, struct iov_iter *iter) +{ + struct ceph_osd_data *osd_data; + + osd_data = osd_req_op_data(osd_req, which, extent, osd_data); + ceph_osd_iter_init(osd_data, iter); +} +EXPORT_SYMBOL(osd_req_op_extent_osd_iter); + static void osd_req_op_cls_request_info_pagelist( struct ceph_osd_request *osd_req, unsigned int which, struct ceph_pagelist *pagelist) @@ -346,6 +369,8 @@ static u64 ceph_osd_data_length(struct ceph_osd_data *osd_data) #endif /* CONFIG_BLOCK */ case CEPH_OSD_DATA_TYPE_BVECS: return osd_data->bvec_pos.iter.bi_size; + case CEPH_OSD_DATA_TYPE_ITER: + return iov_iter_count(&osd_data->iter); default: WARN(true, "unrecognized data type %d\n", (int)osd_data->type); return 0; @@ -376,8 +401,10 @@ static void osd_req_op_data_release(struct ceph_osd_request *osd_req, switch (op->op) { case CEPH_OSD_OP_READ: + case CEPH_OSD_OP_SPARSE_READ: case CEPH_OSD_OP_WRITE: case CEPH_OSD_OP_WRITEFULL: + kfree(op->extent.sparse_ext); ceph_osd_data_release(&op->extent.osd_data); break; case CEPH_OSD_OP_CALL: @@ -669,6 +696,7 @@ static void get_num_data_items(struct ceph_osd_request *req, /* reply */ case CEPH_OSD_OP_STAT: case CEPH_OSD_OP_READ: + case CEPH_OSD_OP_SPARSE_READ: case CEPH_OSD_OP_LIST_WATCHERS: *num_reply_data_items += 1; break; @@ -738,7 +766,7 @@ void osd_req_op_extent_init(struct ceph_osd_request *osd_req, BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE && opcode != CEPH_OSD_OP_WRITEFULL && opcode != CEPH_OSD_OP_ZERO && - opcode != CEPH_OSD_OP_TRUNCATE); + opcode != CEPH_OSD_OP_TRUNCATE && opcode != CEPH_OSD_OP_SPARSE_READ); op->extent.offset = offset; op->extent.length = length; @@ -951,6 +979,8 @@ static void ceph_osdc_msg_data_add(struct ceph_msg *msg, #endif } else if (osd_data->type == CEPH_OSD_DATA_TYPE_BVECS) { ceph_msg_data_add_bvecs(msg, &osd_data->bvec_pos); + } else if (osd_data->type == CEPH_OSD_DATA_TYPE_ITER) { + ceph_msg_data_add_iter(msg, &osd_data->iter); } else { BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_NONE); } @@ -963,6 +993,7 @@ static u32 osd_req_encode_op(struct ceph_osd_op *dst, case CEPH_OSD_OP_STAT: break; case CEPH_OSD_OP_READ: + case CEPH_OSD_OP_SPARSE_READ: case CEPH_OSD_OP_WRITE: case CEPH_OSD_OP_WRITEFULL: case CEPH_OSD_OP_ZERO: @@ -1017,6 +1048,10 @@ static u32 osd_req_encode_op(struct ceph_osd_op *dst, dst->copy_from.src_fadvise_flags = cpu_to_le32(src->copy_from.src_fadvise_flags); break; + case CEPH_OSD_OP_ASSERT_VER: + dst->assert_ver.unused = cpu_to_le64(0); + dst->assert_ver.ver = cpu_to_le64(src->assert_ver.ver); + break; default: pr_err("unsupported osd opcode %s\n", ceph_osd_op_name(src->op)); @@ -1059,7 +1094,8 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE && opcode != CEPH_OSD_OP_ZERO && opcode != CEPH_OSD_OP_TRUNCATE && - opcode != CEPH_OSD_OP_CREATE && opcode != CEPH_OSD_OP_DELETE); + opcode != CEPH_OSD_OP_CREATE && opcode != CEPH_OSD_OP_DELETE && + opcode != CEPH_OSD_OP_SPARSE_READ); req = ceph_osdc_alloc_request(osdc, snapc, num_ops, use_mempool, GFP_NOFS); @@ -1100,15 +1136,30 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, if (flags & CEPH_OSD_FLAG_WRITE) req->r_data_offset = off; - if (num_ops > 1) + if (num_ops > 1) { + int num_req_ops, num_rep_ops; + /* - * This is a special case for ceph_writepages_start(), but it - * also covers ceph_uninline_data(). If more multi-op request - * use cases emerge, we will need a separate helper. + * If this is a multi-op write request, assume that we'll need + * request ops. If it's a multi-op read then assume we'll need + * reply ops. Anything else and call it -EINVAL. */ - r = __ceph_osdc_alloc_messages(req, GFP_NOFS, num_ops, 0); - else + if (flags & CEPH_OSD_FLAG_WRITE) { + num_req_ops = num_ops; + num_rep_ops = 0; + } else if (flags & CEPH_OSD_FLAG_READ) { + num_req_ops = 0; + num_rep_ops = num_ops; + } else { + r = -EINVAL; + goto fail; + } + + r = __ceph_osdc_alloc_messages(req, GFP_NOFS, num_req_ops, + num_rep_ops); + } else { r = ceph_osdc_alloc_messages(req, GFP_NOFS); + } if (r) goto fail; @@ -1120,6 +1171,18 @@ fail: } EXPORT_SYMBOL(ceph_osdc_new_request); +int __ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op, int cnt) +{ + op->extent.sparse_ext_cnt = cnt; + op->extent.sparse_ext = kmalloc_array(cnt, + sizeof(*op->extent.sparse_ext), + GFP_NOFS); + if (!op->extent.sparse_ext) + return -ENOMEM; + return 0; +} +EXPORT_SYMBOL(__ceph_alloc_sparse_ext_map); + /* * We keep osd requests in an rbtree, sorted by ->r_tid. */ @@ -1177,6 +1240,7 @@ static void osd_init(struct ceph_osd *osd) { refcount_set(&osd->o_ref, 1); RB_CLEAR_NODE(&osd->o_node); + spin_lock_init(&osd->o_requests_lock); osd->o_requests = RB_ROOT; osd->o_linger_requests = RB_ROOT; osd->o_backoff_mappings = RB_ROOT; @@ -1187,6 +1251,13 @@ static void osd_init(struct ceph_osd *osd) mutex_init(&osd->lock); } +static void ceph_init_sparse_read(struct ceph_sparse_read *sr) +{ + kfree(sr->sr_extent); + memset(sr, '\0', sizeof(*sr)); + sr->sr_state = CEPH_SPARSE_READ_HDR; +} + static void osd_cleanup(struct ceph_osd *osd) { WARN_ON(!RB_EMPTY_NODE(&osd->o_node)); @@ -1197,6 +1268,8 @@ static void osd_cleanup(struct ceph_osd *osd) WARN_ON(!list_empty(&osd->o_osd_lru)); WARN_ON(!list_empty(&osd->o_keepalive_item)); + ceph_init_sparse_read(&osd->o_sparse_read); + if (osd->o_auth.authorizer) { WARN_ON(osd_homeless(osd)); ceph_auth_destroy_authorizer(osd->o_auth.authorizer); @@ -1216,6 +1289,9 @@ static struct ceph_osd *create_osd(struct ceph_osd_client *osdc, int onum) osd_init(osd); osd->o_osdc = osdc; osd->o_osd = onum; + osd->o_sparse_op_idx = -1; + + ceph_init_sparse_read(&osd->o_sparse_read); ceph_con_init(&osd->o_con, osd, &osd_con_ops, &osdc->client->msgr); @@ -1406,7 +1482,9 @@ static void link_request(struct ceph_osd *osd, struct ceph_osd_request *req) atomic_inc(&osd->o_osdc->num_homeless); get_osd(osd); + spin_lock(&osd->o_requests_lock); insert_request(&osd->o_requests, req); + spin_unlock(&osd->o_requests_lock); req->r_osd = osd; } @@ -1418,7 +1496,9 @@ static void unlink_request(struct ceph_osd *osd, struct ceph_osd_request *req) req, req->r_tid); req->r_osd = NULL; + spin_lock(&osd->o_requests_lock); erase_request(&osd->o_requests, req); + spin_unlock(&osd->o_requests_lock); put_osd(osd); if (!osd_homeless(osd)) @@ -2016,6 +2096,7 @@ static void setup_request_data(struct ceph_osd_request *req) &op->raw_data_in); break; case CEPH_OSD_OP_READ: + case CEPH_OSD_OP_SPARSE_READ: ceph_osdc_msg_data_add(reply_msg, &op->extent.osd_data); break; @@ -2435,8 +2516,10 @@ static void finish_request(struct ceph_osd_request *req) req->r_end_latency = ktime_get(); - if (req->r_osd) + if (req->r_osd) { + ceph_init_sparse_read(&req->r_osd->o_sparse_read); unlink_request(req->r_osd, req); + } atomic_dec(&osdc->num_requests); /* @@ -3795,6 +3878,7 @@ static void handle_reply(struct ceph_osd *osd, struct ceph_msg *msg) * one (type of) reply back. */ WARN_ON(!(m.flags & CEPH_OSD_FLAG_ONDISK)); + req->r_version = m.user_version; req->r_result = m.result ?: data_len; finish_request(req); mutex_unlock(&osd->lock); @@ -5348,6 +5432,24 @@ static void osd_dispatch(struct ceph_connection *con, struct ceph_msg *msg) ceph_msg_put(msg); } +/* How much sparse data was requested? */ +static u64 sparse_data_requested(struct ceph_osd_request *req) +{ + u64 len = 0; + + if (req->r_flags & CEPH_OSD_FLAG_READ) { + int i; + + for (i = 0; i < req->r_num_ops; ++i) { + struct ceph_osd_req_op *op = &req->r_ops[i]; + + if (op->op == CEPH_OSD_OP_SPARSE_READ) + len += op->extent.length; + } + } + return len; +} + /* * Lookup and return message for incoming reply. Don't try to do * anything about a larger than preallocated data portion of the @@ -5364,6 +5466,7 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, int front_len = le32_to_cpu(hdr->front_len); int data_len = le32_to_cpu(hdr->data_len); u64 tid = le64_to_cpu(hdr->tid); + u64 srlen; down_read(&osdc->lock); if (!osd_registered(osd)) { @@ -5396,7 +5499,8 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, req->r_reply = m; } - if (data_len > req->r_reply->data_length) { + srlen = sparse_data_requested(req); + if (!srlen && data_len > req->r_reply->data_length) { pr_warn("%s osd%d tid %llu data %d > preallocated %zu, skipping\n", __func__, osd->o_osd, req->r_tid, data_len, req->r_reply->data_length); @@ -5406,6 +5510,8 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, } m = ceph_msg_get(req->r_reply); + m->sparse_read = (bool)srlen; + dout("get_reply tid %lld %p\n", tid, m); out_unlock_session: @@ -5638,9 +5744,217 @@ static int osd_check_message_signature(struct ceph_msg *msg) return ceph_auth_check_message_signature(auth, msg); } +static void advance_cursor(struct ceph_msg_data_cursor *cursor, size_t len, + bool zero) +{ + while (len) { + struct page *page; + size_t poff, plen; + + page = ceph_msg_data_next(cursor, &poff, &plen); + if (plen > len) + plen = len; + if (zero) + zero_user_segment(page, poff, poff + plen); + len -= plen; + ceph_msg_data_advance(cursor, plen); + } +} + +static int prep_next_sparse_read(struct ceph_connection *con, + struct ceph_msg_data_cursor *cursor) +{ + struct ceph_osd *o = con->private; + struct ceph_sparse_read *sr = &o->o_sparse_read; + struct ceph_osd_request *req; + struct ceph_osd_req_op *op; + + spin_lock(&o->o_requests_lock); + req = lookup_request(&o->o_requests, le64_to_cpu(con->in_msg->hdr.tid)); + if (!req) { + spin_unlock(&o->o_requests_lock); + return -EBADR; + } + + if (o->o_sparse_op_idx < 0) { + u64 srlen = sparse_data_requested(req); + + dout("%s: [%d] starting new sparse read req. srlen=0x%llx\n", + __func__, o->o_osd, srlen); + ceph_msg_data_cursor_init(cursor, con->in_msg, srlen); + } else { + u64 end; + + op = &req->r_ops[o->o_sparse_op_idx]; + + WARN_ON_ONCE(op->extent.sparse_ext); + + /* hand back buffer we took earlier */ + op->extent.sparse_ext = sr->sr_extent; + sr->sr_extent = NULL; + op->extent.sparse_ext_cnt = sr->sr_count; + sr->sr_ext_len = 0; + dout("%s: [%d] completed extent array len %d cursor->resid %zd\n", + __func__, o->o_osd, op->extent.sparse_ext_cnt, cursor->resid); + /* Advance to end of data for this operation */ + end = ceph_sparse_ext_map_end(op); + if (end < sr->sr_req_len) + advance_cursor(cursor, sr->sr_req_len - end, false); + } + + ceph_init_sparse_read(sr); + + /* find next op in this request (if any) */ + while (++o->o_sparse_op_idx < req->r_num_ops) { + op = &req->r_ops[o->o_sparse_op_idx]; + if (op->op == CEPH_OSD_OP_SPARSE_READ) + goto found; + } + + /* reset for next sparse read request */ + spin_unlock(&o->o_requests_lock); + o->o_sparse_op_idx = -1; + return 0; +found: + sr->sr_req_off = op->extent.offset; + sr->sr_req_len = op->extent.length; + sr->sr_pos = sr->sr_req_off; + dout("%s: [%d] new sparse read op at idx %d 0x%llx~0x%llx\n", __func__, + o->o_osd, o->o_sparse_op_idx, sr->sr_req_off, sr->sr_req_len); + + /* hand off request's sparse extent map buffer */ + sr->sr_ext_len = op->extent.sparse_ext_cnt; + op->extent.sparse_ext_cnt = 0; + sr->sr_extent = op->extent.sparse_ext; + op->extent.sparse_ext = NULL; + + spin_unlock(&o->o_requests_lock); + return 1; +} + +#ifdef __BIG_ENDIAN +static inline void convert_extent_map(struct ceph_sparse_read *sr) +{ + int i; + + for (i = 0; i < sr->sr_count; i++) { + struct ceph_sparse_extent *ext = &sr->sr_extent[i]; + + ext->off = le64_to_cpu((__force __le64)ext->off); + ext->len = le64_to_cpu((__force __le64)ext->len); + } +} +#else +static inline void convert_extent_map(struct ceph_sparse_read *sr) +{ +} +#endif + +#define MAX_EXTENTS 4096 + +static int osd_sparse_read(struct ceph_connection *con, + struct ceph_msg_data_cursor *cursor, + char **pbuf) +{ + struct ceph_osd *o = con->private; + struct ceph_sparse_read *sr = &o->o_sparse_read; + u32 count = sr->sr_count; + u64 eoff, elen; + int ret; + + switch (sr->sr_state) { + case CEPH_SPARSE_READ_HDR: +next_op: + ret = prep_next_sparse_read(con, cursor); + if (ret <= 0) + return ret; + + /* number of extents */ + ret = sizeof(sr->sr_count); + *pbuf = (char *)&sr->sr_count; + sr->sr_state = CEPH_SPARSE_READ_EXTENTS; + break; + case CEPH_SPARSE_READ_EXTENTS: + /* Convert sr_count to host-endian */ + count = le32_to_cpu((__force __le32)sr->sr_count); + sr->sr_count = count; + dout("[%d] got %u extents\n", o->o_osd, count); + + if (count > 0) { + if (!sr->sr_extent || count > sr->sr_ext_len) { + /* + * Apply a hard cap to the number of extents. + * If we have more, assume something is wrong. + */ + if (count > MAX_EXTENTS) { + dout("%s: OSD returned 0x%x extents in a single reply!\n", + __func__, count); + return -EREMOTEIO; + } + + /* no extent array provided, or too short */ + kfree(sr->sr_extent); + sr->sr_extent = kmalloc_array(count, + sizeof(*sr->sr_extent), + GFP_NOIO); + if (!sr->sr_extent) + return -ENOMEM; + sr->sr_ext_len = count; + } + ret = count * sizeof(*sr->sr_extent); + *pbuf = (char *)sr->sr_extent; + sr->sr_state = CEPH_SPARSE_READ_DATA_LEN; + break; + } + /* No extents? Read data len */ + fallthrough; + case CEPH_SPARSE_READ_DATA_LEN: + convert_extent_map(sr); + ret = sizeof(sr->sr_datalen); + *pbuf = (char *)&sr->sr_datalen; + sr->sr_state = CEPH_SPARSE_READ_DATA; + break; + case CEPH_SPARSE_READ_DATA: + if (sr->sr_index >= count) { + sr->sr_state = CEPH_SPARSE_READ_HDR; + goto next_op; + } + + eoff = sr->sr_extent[sr->sr_index].off; + elen = sr->sr_extent[sr->sr_index].len; + + dout("[%d] ext %d off 0x%llx len 0x%llx\n", + o->o_osd, sr->sr_index, eoff, elen); + + if (elen > INT_MAX) { + dout("Sparse read extent length too long (0x%llx)\n", + elen); + return -EREMOTEIO; + } + + /* zero out anything from sr_pos to start of extent */ + if (sr->sr_pos < eoff) + advance_cursor(cursor, eoff - sr->sr_pos, true); + + /* Set position to end of extent */ + sr->sr_pos = eoff + elen; + + /* send back the new length and nullify the ptr */ + cursor->sr_resid = elen; + ret = elen; + *pbuf = NULL; + + /* Bump the array index */ + ++sr->sr_index; + break; + } + return ret; +} + static const struct ceph_connection_operations osd_con_ops = { .get = osd_get_con, .put = osd_put_con, + .sparse_read = osd_sparse_read, .alloc_msg = osd_alloc_msg, .dispatch = osd_dispatch, .fault = osd_fault, diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index b3b3af0e7844..272f09251343 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -1446,7 +1446,7 @@ proto_again: break; } - nhoff += ntohs(hdr->message_length); + nhoff += sizeof(struct ptp_header); fdret = FLOW_DISSECT_RET_OUT_GOOD; break; } diff --git a/net/core/neighbour.c b/net/core/neighbour.c index ddd0f32de20e..6b76cd103195 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -3779,6 +3779,7 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, const char *dev_name_source; char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ]; char *p_name; + size_t neigh_vars_size; t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL_ACCOUNT); if (!t) @@ -3790,11 +3791,13 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, t->neigh_vars[i].extra2 = p; } + neigh_vars_size = ARRAY_SIZE(t->neigh_vars); if (dev) { dev_name_source = dev->name; /* Terminate the table early */ memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0, sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL])); + neigh_vars_size = NEIGH_VAR_BASE_REACHABLE_TIME_MS + 1; } else { struct neigh_table *tbl = p->tbl; dev_name_source = "default"; @@ -3841,8 +3844,9 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s", p_name, dev_name_source); - t->sysctl_header = - register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars); + t->sysctl_header = register_net_sysctl_sz(neigh_parms_net(p), + neigh_path, t->neigh_vars, + neigh_vars_size); if (!t->sysctl_header) goto free; diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 782273bb93c2..03f1edb948d7 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -712,7 +712,8 @@ static __net_init int sysctl_core_net_init(struct net *net) tmp->data += (char *)net - (char *)&init_net; } - net->core.sysctl_hdr = register_net_sysctl(net, "net/core", tbl); + net->core.sysctl_hdr = register_net_sysctl_sz(net, "net/core", tbl, + ARRAY_SIZE(netns_core_table)); if (net->core.sysctl_hdr == NULL) goto err_reg; diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index 629daacc9607..b71dab630a87 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -594,6 +594,7 @@ static int fill_frame_info(struct hsr_frame_info *frame, proto = vlan_hdr->vlanhdr.h_vlan_encapsulated_proto; /* FIXME: */ netdev_warn_once(skb->dev, "VLAN not yet supported"); + return -EINVAL; } frame->is_from_san = false; diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c index a91283d1e5bf..6dd960ec558c 100644 --- a/net/ieee802154/6lowpan/reassembly.c +++ b/net/ieee802154/6lowpan/reassembly.c @@ -360,6 +360,7 @@ static int __net_init lowpan_frags_ns_sysctl_register(struct net *net) struct ctl_table_header *hdr; struct netns_ieee802154_lowpan *ieee802154_lowpan = net_ieee802154_lowpan(net); + size_t table_size = ARRAY_SIZE(lowpan_frags_ns_ctl_table); table = lowpan_frags_ns_ctl_table; if (!net_eq(net, &init_net)) { @@ -369,8 +370,10 @@ static int __net_init lowpan_frags_ns_sysctl_register(struct net *net) goto err_alloc; /* Don't export sysctls to unprivileged users */ - if (net->user_ns != &init_user_ns) + if (net->user_ns != &init_user_ns) { table[0].procname = NULL; + table_size = 0; + } } table[0].data = &ieee802154_lowpan->fqdir->high_thresh; @@ -379,7 +382,8 @@ static int __net_init lowpan_frags_ns_sysctl_register(struct net *net) table[1].extra2 = &ieee802154_lowpan->fqdir->high_thresh; table[2].data = &ieee802154_lowpan->fqdir->timeout; - hdr = register_net_sysctl(net, "net/ieee802154/6lowpan", table); + hdr = register_net_sysctl_sz(net, "net/ieee802154/6lowpan", table, + table_size); if (hdr == NULL) goto err_reg; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index c3658b8755bc..ca0ff15dc8fa 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -355,14 +355,14 @@ static void __inet_del_ifa(struct in_device *in_dev, { struct in_ifaddr *promote = NULL; struct in_ifaddr *ifa, *ifa1; - struct in_ifaddr *last_prim; + struct in_ifaddr __rcu **last_prim; struct in_ifaddr *prev_prom = NULL; int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev); ASSERT_RTNL(); ifa1 = rtnl_dereference(*ifap); - last_prim = rtnl_dereference(in_dev->ifa_list); + last_prim = ifap; if (in_dev->dead) goto no_promotions; @@ -376,7 +376,7 @@ static void __inet_del_ifa(struct in_device *in_dev, while ((ifa = rtnl_dereference(*ifap1)) != NULL) { if (!(ifa->ifa_flags & IFA_F_SECONDARY) && ifa1->ifa_scope <= ifa->ifa_scope) - last_prim = ifa; + last_prim = &ifa->ifa_next; if (!(ifa->ifa_flags & IFA_F_SECONDARY) || ifa1->ifa_mask != ifa->ifa_mask || @@ -440,9 +440,9 @@ no_promotions: rcu_assign_pointer(prev_prom->ifa_next, next_sec); - last_sec = rtnl_dereference(last_prim->ifa_next); + last_sec = rtnl_dereference(*last_prim); rcu_assign_pointer(promote->ifa_next, last_sec); - rcu_assign_pointer(last_prim->ifa_next, promote); + rcu_assign_pointer(*last_prim, promote); } promote->ifa_flags &= ~IFA_F_SECONDARY; @@ -2737,7 +2737,8 @@ static __net_init int devinet_init_net(struct net *net) goto err_reg_dflt; err = -ENOMEM; - forw_hdr = register_net_sysctl(net, "net/ipv4", tbl); + forw_hdr = register_net_sysctl_sz(net, "net/ipv4", tbl, + ARRAY_SIZE(ctl_forward_entry)); if (!forw_hdr) goto err_reg_ctl; net->ipv4.forw_hdr = forw_hdr; diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 7876b7d703cb..c32f5e28758b 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -815,41 +815,45 @@ static bool inet_bind2_bucket_match(const struct inet_bind2_bucket *tb, const struct net *net, unsigned short port, int l3mdev, const struct sock *sk) { + if (!net_eq(ib2_net(tb), net) || tb->port != port || + tb->l3mdev != l3mdev) + return false; + #if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family != tb->family) + if (sk->sk_family != tb->family) { + if (sk->sk_family == AF_INET) + return ipv6_addr_v4mapped(&tb->v6_rcv_saddr) && + tb->v6_rcv_saddr.s6_addr32[3] == sk->sk_rcv_saddr; + return false; + } if (sk->sk_family == AF_INET6) - return net_eq(ib2_net(tb), net) && tb->port == port && - tb->l3mdev == l3mdev && - ipv6_addr_equal(&tb->v6_rcv_saddr, &sk->sk_v6_rcv_saddr); - else + return ipv6_addr_equal(&tb->v6_rcv_saddr, &sk->sk_v6_rcv_saddr); #endif - return net_eq(ib2_net(tb), net) && tb->port == port && - tb->l3mdev == l3mdev && tb->rcv_saddr == sk->sk_rcv_saddr; + return tb->rcv_saddr == sk->sk_rcv_saddr; } bool inet_bind2_bucket_match_addr_any(const struct inet_bind2_bucket *tb, const struct net *net, unsigned short port, int l3mdev, const struct sock *sk) { + if (!net_eq(ib2_net(tb), net) || tb->port != port || + tb->l3mdev != l3mdev) + return false; + #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family != tb->family) { if (sk->sk_family == AF_INET) - return net_eq(ib2_net(tb), net) && tb->port == port && - tb->l3mdev == l3mdev && - ipv6_addr_any(&tb->v6_rcv_saddr); + return ipv6_addr_any(&tb->v6_rcv_saddr) || + ipv6_addr_v4mapped_any(&tb->v6_rcv_saddr); return false; } if (sk->sk_family == AF_INET6) - return net_eq(ib2_net(tb), net) && tb->port == port && - tb->l3mdev == l3mdev && - ipv6_addr_any(&tb->v6_rcv_saddr); - else + return ipv6_addr_any(&tb->v6_rcv_saddr); #endif - return net_eq(ib2_net(tb), net) && tb->port == port && - tb->l3mdev == l3mdev && tb->rcv_saddr == 0; + return tb->rcv_saddr == 0; } /* The socket's bhash2 hashbucket spinlock must be held when this is called */ diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 69c00ffdcf3e..a4941f53b523 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -615,7 +615,8 @@ static int __net_init ip4_frags_ns_ctl_register(struct net *net) table[2].data = &net->ipv4.fqdir->timeout; table[3].data = &net->ipv4.fqdir->max_dist; - hdr = register_net_sysctl(net, "net/ipv4", table); + hdr = register_net_sysctl_sz(net, "net/ipv4", table, + ARRAY_SIZE(ip4_frags_ns_ctl_table)); if (!hdr) goto err_reg; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 6a3f57a3fa41..66f419e7f9a7 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -3593,6 +3593,7 @@ static struct ctl_table ipv4_route_netns_table[] = { static __net_init int sysctl_route_net_init(struct net *net) { struct ctl_table *tbl; + size_t table_size = ARRAY_SIZE(ipv4_route_netns_table); tbl = ipv4_route_netns_table; if (!net_eq(net, &init_net)) { @@ -3604,8 +3605,10 @@ static __net_init int sysctl_route_net_init(struct net *net) /* Don't export non-whitelisted sysctls to unprivileged users */ if (net->user_ns != &init_user_ns) { - if (tbl[0].procname != ipv4_route_flush_procname) + if (tbl[0].procname != ipv4_route_flush_procname) { tbl[0].procname = NULL; + table_size = 0; + } } /* Update the variables to point into the current struct net @@ -3616,7 +3619,8 @@ static __net_init int sysctl_route_net_init(struct net *net) } tbl[0].extra1 = net; - net->ipv4.route_hdr = register_net_sysctl(net, "net/ipv4/route", tbl); + net->ipv4.route_hdr = register_net_sysctl_sz(net, "net/ipv4/route", + tbl, table_size); if (!net->ipv4.route_hdr) goto err_reg; return 0; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 2afb0870648b..6ac890b4073f 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -1519,7 +1519,8 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) } } - net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table); + net->ipv4.ipv4_hdr = register_net_sysctl_sz(net, "net/ipv4", table, + ARRAY_SIZE(ipv4_net_table)); if (!net->ipv4.ipv4_hdr) goto err_reg; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index cc4b250262c1..0c3040a63ebd 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1742,7 +1742,7 @@ void tcp_update_recv_tstamps(struct sk_buff *skb, } #ifdef CONFIG_MMU -const struct vm_operations_struct tcp_vm_ops = { +static const struct vm_operations_struct tcp_vm_ops = { }; int tcp_mmap(struct file *file, struct socket *sock, @@ -2045,13 +2045,10 @@ static struct vm_area_struct *find_tcp_vma(struct mm_struct *mm, unsigned long address, bool *mmap_locked) { - struct vm_area_struct *vma = NULL; + struct vm_area_struct *vma = lock_vma_under_rcu(mm, address); -#ifdef CONFIG_PER_VMA_LOCK - vma = lock_vma_under_rcu(mm, address); -#endif if (vma) { - if (!vma_is_tcp(vma)) { + if (vma->vm_ops != &tcp_vm_ops) { vma_end_read(vma); return NULL; } @@ -2061,7 +2058,7 @@ static struct vm_area_struct *find_tcp_vma(struct mm_struct *mm, mmap_read_lock(mm); vma = vma_lookup(mm, address); - if (!vma || !vma_is_tcp(vma)) { + if (!vma || vma->vm_ops != &tcp_vm_ops) { mmap_read_unlock(mm); return NULL; } diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index cdcc0f6b4f0a..c33bca2c3841 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -169,7 +169,8 @@ static __net_init int xfrm4_net_sysctl_init(struct net *net) table[0].data = &net->xfrm.xfrm4_dst_ops.gc_thresh; } - hdr = register_net_sysctl(net, "net/ipv4", table); + hdr = register_net_sysctl_sz(net, "net/ipv4", table, + ARRAY_SIZE(xfrm4_policy_table)); if (!hdr) goto err_reg; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 85cdbc252654..0b6ee962c84e 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -7135,7 +7135,8 @@ static int __addrconf_sysctl_register(struct net *net, char *dev_name, snprintf(path, sizeof(path), "net/ipv6/conf/%s", dev_name); - p->sysctl_header = register_net_sysctl(net, path, table); + p->sysctl_header = register_net_sysctl_sz(net, path, table, + ARRAY_SIZE(addrconf_sysctl)); if (!p->sysctl_header) goto free; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 6d88f5248c1f..93a594a901d1 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -1227,4 +1227,9 @@ struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net) } return table; } + +size_t ipv6_icmp_sysctl_table_size(void) +{ + return ARRAY_SIZE(ipv6_icmp_table_template); +} #endif diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index d13240f13607..b2dd48911c8d 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -87,7 +87,8 @@ static int nf_ct_frag6_sysctl_register(struct net *net) table[2].data = &nf_frag->fqdir->high_thresh; table[2].extra1 = &nf_frag->fqdir->low_thresh; - hdr = register_net_sysctl(net, "net/netfilter", table); + hdr = register_net_sysctl_sz(net, "net/netfilter", table, + ARRAY_SIZE(nf_ct_frag6_sysctl_table)); if (hdr == NULL) goto err_reg; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 5bc8a28e67f9..5ebc47da1000 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -470,7 +470,8 @@ static int __net_init ip6_frags_ns_sysctl_register(struct net *net) table[1].extra2 = &net->ipv6.fqdir->high_thresh; table[2].data = &net->ipv6.fqdir->timeout; - hdr = register_net_sysctl(net, "net/ipv6", table); + hdr = register_net_sysctl_sz(net, "net/ipv6", table, + ARRAY_SIZE(ip6_frags_ns_ctl_table)); if (!hdr) goto err_reg; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 01d6d352850a..9c687b357e6a 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -6456,6 +6456,15 @@ struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net) return table; } + +size_t ipv6_route_sysctl_table_size(struct net *net) +{ + /* Don't export sysctls to unprivileged users */ + if (net->user_ns != &init_user_ns) + return 1; + + return ARRAY_SIZE(ipv6_route_table_template); +} #endif static int __net_init ip6_route_net_init(struct net *net) diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 94a0a294c6a1..888676163e90 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -275,17 +275,23 @@ static int __net_init ipv6_sysctl_net_init(struct net *net) if (!ipv6_icmp_table) goto out_ipv6_route_table; - net->ipv6.sysctl.hdr = register_net_sysctl(net, "net/ipv6", ipv6_table); + net->ipv6.sysctl.hdr = register_net_sysctl_sz(net, "net/ipv6", + ipv6_table, + ARRAY_SIZE(ipv6_table_template)); if (!net->ipv6.sysctl.hdr) goto out_ipv6_icmp_table; - net->ipv6.sysctl.route_hdr = - register_net_sysctl(net, "net/ipv6/route", ipv6_route_table); + net->ipv6.sysctl.route_hdr = register_net_sysctl_sz(net, + "net/ipv6/route", + ipv6_route_table, + ipv6_route_sysctl_table_size(net)); if (!net->ipv6.sysctl.route_hdr) goto out_unregister_ipv6_table; - net->ipv6.sysctl.icmp_hdr = - register_net_sysctl(net, "net/ipv6/icmp", ipv6_icmp_table); + net->ipv6.sysctl.icmp_hdr = register_net_sysctl_sz(net, + "net/ipv6/icmp", + ipv6_icmp_table, + ipv6_icmp_sysctl_table_size()); if (!net->ipv6.sysctl.icmp_hdr) goto out_unregister_route_table; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 188224a76685..41a680c76d2e 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -201,7 +201,8 @@ static int __net_init xfrm6_net_sysctl_init(struct net *net) table[0].data = &net->xfrm.xfrm6_dst_ops.gc_thresh; } - hdr = register_net_sysctl(net, "net/ipv6", table); + hdr = register_net_sysctl_sz(net, "net/ipv6", table, + ARRAY_SIZE(xfrm6_policy_table)); if (!hdr) goto err_reg; diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 4580f61426bb..dd1d8ffd5f59 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -930,15 +930,18 @@ partial_message: out_error: kcm_push(kcm); - if (copied && sock->type == SOCK_SEQPACKET) { + if (sock->type == SOCK_SEQPACKET) { /* Wrote some bytes before encountering an * error, return partial success. */ - goto partial_message; - } - - if (head != kcm->seq_skb) + if (copied) + goto partial_message; + if (head != kcm->seq_skb) + kfree_skb(head); + } else { kfree_skb(head); + kcm->seq_skb = NULL; + } err = sk_stream_error(sk, msg->msg_flags, err); diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index bf6e81d56263..1af29af65388 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -1419,7 +1419,8 @@ static int mpls_dev_sysctl_register(struct net_device *dev, snprintf(path, sizeof(path), "net/mpls/conf/%s", dev->name); - mdev->sysctl = register_net_sysctl(net, path, table); + mdev->sysctl = register_net_sysctl_sz(net, path, table, + ARRAY_SIZE(mpls_dev_table)); if (!mdev->sysctl) goto free; @@ -2689,7 +2690,8 @@ static int mpls_net_init(struct net *net) for (i = 0; i < ARRAY_SIZE(mpls_table) - 1; i++) table[i].data = (char *)net + (uintptr_t)table[i].data; - net->mpls.ctl = register_net_sysctl(net, "net/mpls", table); + net->mpls.ctl = register_net_sysctl_sz(net, "net/mpls", table, + ARRAY_SIZE(mpls_table)); if (net->mpls.ctl == NULL) { kfree(table); return -ENOMEM; diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c index c46c22a84d23..e72b518c5d02 100644 --- a/net/mptcp/ctrl.c +++ b/net/mptcp/ctrl.c @@ -164,7 +164,8 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet) table[5].data = &pernet->pm_type; table[6].data = &pernet->scheduler; - hdr = register_net_sysctl(net, MPTCP_SYSCTL_PATH, table); + hdr = register_net_sysctl_sz(net, MPTCP_SYSCTL_PATH, table, + ARRAY_SIZE(mptcp_sysctl_table)); if (!hdr) goto err_reg; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 4bb0d90eca1c..143a341bbc0a 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -4269,6 +4269,7 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs) struct net *net = ipvs->net; struct ctl_table *tbl; int idx, ret; + size_t ctl_table_size = ARRAY_SIZE(vs_vars); atomic_set(&ipvs->dropentry, 0); spin_lock_init(&ipvs->dropentry_lock); @@ -4285,8 +4286,10 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs) return -ENOMEM; /* Don't export sysctls to unprivileged users */ - if (net->user_ns != &init_user_ns) + if (net->user_ns != &init_user_ns) { tbl[0].procname = NULL; + ctl_table_size = 0; + } } else tbl = vs_vars; /* Initialize sysctl defaults */ @@ -4357,7 +4360,8 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs) #endif ret = -ENOMEM; - ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl); + ipvs->sysctl_hdr = register_net_sysctl_sz(net, "net/ipv4/vs", tbl, + ctl_table_size); if (!ipvs->sysctl_hdr) goto err; ipvs->sysctl_tbl = tbl; diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index 1b87214d385e..cf78ba4ce5ff 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -550,6 +550,7 @@ static struct ip_vs_scheduler ip_vs_lblc_scheduler = { static int __net_init __ip_vs_lblc_init(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); + size_t vars_table_size = ARRAY_SIZE(vs_vars_table); if (!ipvs) return -ENOENT; @@ -562,16 +563,19 @@ static int __net_init __ip_vs_lblc_init(struct net *net) return -ENOMEM; /* Don't export sysctls to unprivileged users */ - if (net->user_ns != &init_user_ns) + if (net->user_ns != &init_user_ns) { ipvs->lblc_ctl_table[0].procname = NULL; + vars_table_size = 0; + } } else ipvs->lblc_ctl_table = vs_vars_table; ipvs->sysctl_lblc_expiration = DEFAULT_EXPIRATION; ipvs->lblc_ctl_table[0].data = &ipvs->sysctl_lblc_expiration; - ipvs->lblc_ctl_header = - register_net_sysctl(net, "net/ipv4/vs", ipvs->lblc_ctl_table); + ipvs->lblc_ctl_header = register_net_sysctl_sz(net, "net/ipv4/vs", + ipvs->lblc_ctl_table, + vars_table_size); if (!ipvs->lblc_ctl_header) { if (!net_eq(net, &init_net)) kfree(ipvs->lblc_ctl_table); diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index ad8f5fea6d3a..9eddf118b40e 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -736,6 +736,7 @@ static struct ip_vs_scheduler ip_vs_lblcr_scheduler = static int __net_init __ip_vs_lblcr_init(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); + size_t vars_table_size = ARRAY_SIZE(vs_vars_table); if (!ipvs) return -ENOENT; @@ -748,15 +749,18 @@ static int __net_init __ip_vs_lblcr_init(struct net *net) return -ENOMEM; /* Don't export sysctls to unprivileged users */ - if (net->user_ns != &init_user_ns) + if (net->user_ns != &init_user_ns) { ipvs->lblcr_ctl_table[0].procname = NULL; + vars_table_size = 0; + } } else ipvs->lblcr_ctl_table = vs_vars_table; ipvs->sysctl_lblcr_expiration = DEFAULT_EXPIRATION; ipvs->lblcr_ctl_table[0].data = &ipvs->sysctl_lblcr_expiration; - ipvs->lblcr_ctl_header = - register_net_sysctl(net, "net/ipv4/vs", ipvs->lblcr_ctl_table); + ipvs->lblcr_ctl_header = register_net_sysctl_sz(net, "net/ipv4/vs", + ipvs->lblcr_ctl_table, + vars_table_size); if (!ipvs->lblcr_ctl_header) { if (!net_eq(net, &init_net)) kfree(ipvs->lblcr_ctl_table); diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 169e16fc2bce..0ee98ce5b816 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -1106,7 +1106,9 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net) table[NF_SYSCTL_CT_BUCKETS].mode = 0444; } - cnet->sysctl_header = register_net_sysctl(net, "net/netfilter", table); + cnet->sysctl_header = register_net_sysctl_sz(net, "net/netfilter", + table, + ARRAY_SIZE(nf_ct_sysctl_table)); if (!cnet->sysctl_header) goto out_unregister_netfilter; diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 8a29290149bd..8cc52d2bd31b 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -487,9 +487,10 @@ static int netfilter_log_sysctl_init(struct net *net) for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++) table[i].extra2 = net; - net->nf.nf_log_dir_header = register_net_sysctl(net, - "net/netfilter/nf_log", - table); + net->nf.nf_log_dir_header = register_net_sysctl_sz(net, + "net/netfilter/nf_log", + table, + ARRAY_SIZE(nf_log_sysctl_table)); if (!net->nf.nf_log_dir_header) goto err_reg; diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index fadbd4ed3dc0..c4e0516a8dfa 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -327,7 +327,7 @@ static bool l4proto_in_range(const struct nf_conntrack_tuple *tuple, /* If we source map this tuple so reply looks like reply_tuple, will * that meet the constraints of range. */ -static int in_range(const struct nf_conntrack_tuple *tuple, +static int nf_in_range(const struct nf_conntrack_tuple *tuple, const struct nf_nat_range2 *range) { /* If we are supposed to map IPs, then we must be in the @@ -376,7 +376,7 @@ find_appropriate_src(struct net *net, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); result->dst = tuple->dst; - if (in_range(result, range)) + if (nf_in_range(result, range)) return 1; } } @@ -607,7 +607,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, if (maniptype == NF_NAT_MANIP_SRC && !(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) { /* try the original tuple first */ - if (in_range(orig_tuple, range)) { + if (nf_in_range(orig_tuple, range)) { if (!nf_nat_used_tuple(orig_tuple, ct)) { *tuple = *orig_tuple; return; diff --git a/net/nfc/nci/uart.c b/net/nfc/nci/uart.c index cc8fa9e36159..ed1508a9e093 100644 --- a/net/nfc/nci/uart.c +++ b/net/nfc/nci/uart.c @@ -172,7 +172,7 @@ static int nci_uart_tty_open(struct tty_struct *tty) */ static void nci_uart_tty_close(struct tty_struct *tty) { - struct nci_uart *nu = (void *)tty->disc_data; + struct nci_uart *nu = tty->disc_data; /* Detach from the tty */ tty->disc_data = NULL; @@ -204,7 +204,7 @@ static void nci_uart_tty_close(struct tty_struct *tty) */ static void nci_uart_tty_wakeup(struct tty_struct *tty) { - struct nci_uart *nu = (void *)tty->disc_data; + struct nci_uart *nu = tty->disc_data; if (!nu) return; @@ -296,9 +296,9 @@ static int nci_uart_default_recv_buf(struct nci_uart *nu, const u8 *data, * Return Value: None */ static void nci_uart_tty_receive(struct tty_struct *tty, const u8 *data, - const char *flags, int count) + const u8 *flags, size_t count) { - struct nci_uart *nu = (void *)tty->disc_data; + struct nci_uart *nu = tty->disc_data; if (!nu || tty != nu->tty) return; @@ -325,7 +325,7 @@ static void nci_uart_tty_receive(struct tty_struct *tty, const u8 *data, static int nci_uart_tty_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg) { - struct nci_uart *nu = (void *)tty->disc_data; + struct nci_uart *nu = tty->disc_data; int err = 0; switch (cmd) { @@ -345,20 +345,14 @@ static int nci_uart_tty_ioctl(struct tty_struct *tty, unsigned int cmd, /* We don't provide read/write/poll interface for user space. */ static ssize_t nci_uart_tty_read(struct tty_struct *tty, struct file *file, - unsigned char *buf, size_t nr, - void **cookie, unsigned long offset) + u8 *buf, size_t nr, void **cookie, + unsigned long offset) { return 0; } static ssize_t nci_uart_tty_write(struct tty_struct *tty, struct file *file, - const unsigned char *data, size_t count) -{ - return 0; -} - -static __poll_t nci_uart_tty_poll(struct tty_struct *tty, - struct file *filp, poll_table *wait) + const u8 *data, size_t count) { return 0; } @@ -435,7 +429,6 @@ static struct tty_ldisc_ops nci_uart_ldisc = { .close = nci_uart_tty_close, .read = nci_uart_tty_read, .write = nci_uart_tty_write, - .poll = nci_uart_tty_poll, .receive_buf = nci_uart_tty_receive, .write_wakeup = nci_uart_tty_wakeup, .ioctl = nci_uart_tty_ioctl, diff --git a/net/rds/tcp.c b/net/rds/tcp.c index c5b86066ff66..2dba7505b414 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -565,7 +565,8 @@ static __net_init int rds_tcp_init_net(struct net *net) } tbl[RDS_TCP_SNDBUF].data = &rtn->sndbuf_size; tbl[RDS_TCP_RCVBUF].data = &rtn->rcvbuf_size; - rtn->rds_tcp_sysctl = register_net_sysctl(net, "net/rds/tcp", tbl); + rtn->rds_tcp_sysctl = register_net_sysctl_sz(net, "net/rds/tcp", tbl, + ARRAY_SIZE(rds_tcp_sysctl_table)); if (!rtn->rds_tcp_sysctl) { pr_warn("could not register sysctl\n"); err = -ENOMEM; diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index a7a9136198fd..f65d6f92afcb 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -612,7 +612,9 @@ int sctp_sysctl_net_register(struct net *net) table[SCTP_PF_RETRANS_IDX].extra2 = &net->sctp.ps_retrans; table[SCTP_PS_RETRANS_IDX].extra1 = &net->sctp.pf_retrans; - net->sctp.sysctl_header = register_net_sysctl(net, "net/sctp", table); + net->sctp.sysctl_header = register_net_sysctl_sz(net, "net/sctp", + table, + ARRAY_SIZE(sctp_net_table)); if (net->sctp.sysctl_header == NULL) { kfree(table); return -ENOMEM; diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index bd01dd31e4bd..d520ee62c8ec 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -1662,6 +1662,7 @@ void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport) { struct smc_link_group *lgr, *n; + spin_lock_bh(&smc_lgr_list.lock); list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) { struct smc_link *link; @@ -1680,6 +1681,7 @@ void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport) if (link) smc_llc_add_link_local(link); } + spin_unlock_bh(&smc_lgr_list.lock); } /* link is down - switch connections to alternate link, diff --git a/net/smc/smc_stats.h b/net/smc/smc_stats.h index b60fe1eb37ab..aa8928975cc6 100644 --- a/net/smc/smc_stats.h +++ b/net/smc/smc_stats.h @@ -243,8 +243,9 @@ while (0) #define SMC_STAT_SERV_SUCC_INC(net, _ini) \ do { \ typeof(_ini) i = (_ini); \ - bool is_v2 = (i->smcd_version & SMC_V2); \ bool is_smcd = (i->is_smcd); \ + u8 version = is_smcd ? i->smcd_version : i->smcr_version; \ + bool is_v2 = (version & SMC_V2); \ typeof(net->smc.smc_stats) smc_stats = (net)->smc.smc_stats; \ if (is_v2 && is_smcd) \ this_cpu_inc(smc_stats->smc[SMC_TYPE_D].srv_v2_succ_cnt); \ diff --git a/net/smc/smc_sysctl.c b/net/smc/smc_sysctl.c index 0b2a957ca5f5..5cbc18c6e62b 100644 --- a/net/smc/smc_sysctl.c +++ b/net/smc/smc_sysctl.c @@ -87,7 +87,8 @@ int __net_init smc_sysctl_net_init(struct net *net) table[i].data += (void *)net - (void *)&init_net; } - net->smc.smc_hdr = register_net_sysctl(net, "net/smc", table); + net->smc.smc_hdr = register_net_sysctl_sz(net, "net/smc", table, + ARRAY_SIZE(smc_table)); if (!net->smc.smc_hdr) goto err_reg; diff --git a/net/socket.c b/net/socket.c index 928b05811cfd..c8b08b32f097 100644 --- a/net/socket.c +++ b/net/socket.c @@ -88,6 +88,7 @@ #include <linux/xattr.h> #include <linux/nospec.h> #include <linux/indirect_call_wrapper.h> +#include <linux/io_uring.h> #include <linux/uaccess.h> #include <asm/unistd.h> @@ -160,6 +161,7 @@ static const struct file_operations socket_file_ops = { #ifdef CONFIG_COMPAT .compat_ioctl = compat_sock_ioctl, #endif + .uring_cmd = io_uring_cmd_sock, .mmap = sock_mmap, .release = sock_close, .fasync = sock_fasync, diff --git a/net/sunrpc/.kunitconfig b/net/sunrpc/.kunitconfig index a55a00fa649b..eb02b906c295 100644 --- a/net/sunrpc/.kunitconfig +++ b/net/sunrpc/.kunitconfig @@ -23,7 +23,6 @@ CONFIG_NFS_FS=y CONFIG_SUNRPC=y CONFIG_SUNRPC_GSS=y CONFIG_RPCSEC_GSS_KRB5=y -CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_DES=y CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_AES_SHA1=y CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_CAMELLIA=y CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_AES_SHA2=y diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig index 4afc5fd71d44..2d8b67dac7b5 100644 --- a/net/sunrpc/Kconfig +++ b/net/sunrpc/Kconfig @@ -34,38 +34,6 @@ config RPCSEC_GSS_KRB5 If unsure, say Y. -config RPCSEC_GSS_KRB5_SIMPLIFIED - bool - depends on RPCSEC_GSS_KRB5 - -config RPCSEC_GSS_KRB5_CRYPTOSYSTEM - bool - depends on RPCSEC_GSS_KRB5 - -config RPCSEC_GSS_KRB5_ENCTYPES_DES - bool "Enable Kerberos enctypes based on DES (deprecated)" - depends on RPCSEC_GSS_KRB5 - depends on CRYPTO_CBC && CRYPTO_CTS && CRYPTO_ECB - depends on CRYPTO_HMAC && CRYPTO_MD5 && CRYPTO_SHA1 - depends on CRYPTO_DES - default n - select RPCSEC_GSS_KRB5_SIMPLIFIED - help - Choose Y to enable the use of deprecated Kerberos 5 - encryption types that utilize Data Encryption Standard - (DES) based ciphers. These include des-cbc-md5, - des-cbc-crc, and des-cbc-md4, which were deprecated by - RFC 6649, and des3-cbc-sha1, which was deprecated by RFC - 8429. - - These encryption types are known to be insecure, therefore - the default setting of this option is N. Support for these - encryption types is available only for compatibility with - legacy NFS client and server implementations. - - Removal of support is planned for a subsequent kernel - release. - config RPCSEC_GSS_KRB5_ENCTYPES_AES_SHA1 bool "Enable Kerberos enctypes based on AES and SHA-1" depends on RPCSEC_GSS_KRB5 @@ -73,7 +41,6 @@ config RPCSEC_GSS_KRB5_ENCTYPES_AES_SHA1 depends on CRYPTO_HMAC && CRYPTO_SHA1 depends on CRYPTO_AES default y - select RPCSEC_GSS_KRB5_CRYPTOSYSTEM help Choose Y to enable the use of Kerberos 5 encryption types that utilize Advanced Encryption Standard (AES) ciphers and @@ -86,7 +53,6 @@ config RPCSEC_GSS_KRB5_ENCTYPES_CAMELLIA depends on CRYPTO_CBC && CRYPTO_CTS && CRYPTO_CAMELLIA depends on CRYPTO_CMAC default n - select RPCSEC_GSS_KRB5_CRYPTOSYSTEM help Choose Y to enable the use of Kerberos 5 encryption types that utilize Camellia ciphers (RFC 3713) and CMAC digests @@ -100,7 +66,6 @@ config RPCSEC_GSS_KRB5_ENCTYPES_AES_SHA2 depends on CRYPTO_HMAC && CRYPTO_SHA256 && CRYPTO_SHA512 depends on CRYPTO_AES default n - select RPCSEC_GSS_KRB5_CRYPTOSYSTEM help Choose Y to enable the use of Kerberos 5 encryption types that utilize Advanced Encryption Standard (AES) ciphers and diff --git a/net/sunrpc/auth_gss/Makefile b/net/sunrpc/auth_gss/Makefile index 012ae1720689..ad1736d93b76 100644 --- a/net/sunrpc/auth_gss/Makefile +++ b/net/sunrpc/auth_gss/Makefile @@ -12,6 +12,6 @@ auth_rpcgss-y := auth_gss.o gss_generic_token.o \ obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o rpcsec_gss_krb5-y := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \ - gss_krb5_seqnum.o gss_krb5_wrap.o gss_krb5_crypto.o gss_krb5_keys.o + gss_krb5_wrap.o gss_krb5_crypto.o gss_krb5_keys.o obj-$(CONFIG_RPCSEC_GSS_KRB5_KUNIT_TEST) += gss_krb5_test.o diff --git a/net/sunrpc/auth_gss/gss_krb5_internal.h b/net/sunrpc/auth_gss/gss_krb5_internal.h index b673e2626acb..3afd4065bf3d 100644 --- a/net/sunrpc/auth_gss/gss_krb5_internal.h +++ b/net/sunrpc/auth_gss/gss_krb5_internal.h @@ -33,7 +33,6 @@ struct gss_krb5_enctype { const u32 Ke_length; /* encryption subkey length, in octets */ const u32 Ki_length; /* integrity subkey length, in octets */ - int (*import_ctx)(struct krb5_ctx *ctx, gfp_t gfp_mask); int (*derive_key)(const struct gss_krb5_enctype *gk5e, const struct xdr_netobj *in, struct xdr_netobj *out, @@ -85,24 +84,15 @@ struct krb5_ctx { * GSS Kerberos 5 mechanism Per-Message calls. */ -u32 gss_krb5_get_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *text, - struct xdr_netobj *token); u32 gss_krb5_get_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *text, struct xdr_netobj *token); -u32 gss_krb5_verify_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *message_buffer, - struct xdr_netobj *read_token); u32 gss_krb5_verify_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *message_buffer, struct xdr_netobj *read_token); -u32 gss_krb5_wrap_v1(struct krb5_ctx *kctx, int offset, - struct xdr_buf *buf, struct page **pages); u32 gss_krb5_wrap_v2(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf, struct page **pages); -u32 gss_krb5_unwrap_v1(struct krb5_ctx *kctx, int offset, int len, - struct xdr_buf *buf, unsigned int *slack, - unsigned int *align); u32 gss_krb5_unwrap_v2(struct krb5_ctx *kctx, int offset, int len, struct xdr_buf *buf, unsigned int *slack, unsigned int *align); @@ -113,12 +103,6 @@ u32 gss_krb5_unwrap_v2(struct krb5_ctx *kctx, int offset, int len, /* Key Derivation Functions */ -int krb5_derive_key_v1(const struct gss_krb5_enctype *gk5e, - const struct xdr_netobj *inkey, - struct xdr_netobj *outkey, - const struct xdr_netobj *label, - gfp_t gfp_mask); - int krb5_derive_key_v2(const struct gss_krb5_enctype *gk5e, const struct xdr_netobj *inkey, struct xdr_netobj *outkey, @@ -169,13 +153,6 @@ static inline int krb5_derive_key(struct krb5_ctx *kctx, return gk5e->derive_key(gk5e, inkey, outkey, &label, gfp_mask); } -s32 krb5_make_seq_num(struct krb5_ctx *kctx, struct crypto_sync_skcipher *key, - int direction, u32 seqnum, unsigned char *cksum, - unsigned char *buf); - -s32 krb5_get_seq_num(struct krb5_ctx *kctx, unsigned char *cksum, - unsigned char *buf, int *direction, u32 *seqnum); - void krb5_make_confounder(u8 *p, int conflen); u32 make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen, diff --git a/net/sunrpc/auth_gss/gss_krb5_keys.c b/net/sunrpc/auth_gss/gss_krb5_keys.c index 5347fe1cc93f..06d8ee0db000 100644 --- a/net/sunrpc/auth_gss/gss_krb5_keys.c +++ b/net/sunrpc/auth_gss/gss_krb5_keys.c @@ -222,90 +222,6 @@ err_return: return ret; } -#define smask(step) ((1<<step)-1) -#define pstep(x, step) (((x)&smask(step))^(((x)>>step)&smask(step))) -#define parity_char(x) pstep(pstep(pstep((x), 4), 2), 1) - -static void mit_des_fixup_key_parity(u8 key[8]) -{ - int i; - for (i = 0; i < 8; i++) { - key[i] &= 0xfe; - key[i] |= 1^parity_char(key[i]); - } -} - -static int krb5_random_to_key_v1(const struct gss_krb5_enctype *gk5e, - struct xdr_netobj *randombits, - struct xdr_netobj *key) -{ - int i, ret = -EINVAL; - - if (key->len != 24) { - dprintk("%s: key->len is %d\n", __func__, key->len); - goto err_out; - } - if (randombits->len != 21) { - dprintk("%s: randombits->len is %d\n", - __func__, randombits->len); - goto err_out; - } - - /* take the seven bytes, move them around into the top 7 bits of the - 8 key bytes, then compute the parity bits. Do this three times. */ - - for (i = 0; i < 3; i++) { - memcpy(key->data + i*8, randombits->data + i*7, 7); - key->data[i*8+7] = (((key->data[i*8]&1)<<1) | - ((key->data[i*8+1]&1)<<2) | - ((key->data[i*8+2]&1)<<3) | - ((key->data[i*8+3]&1)<<4) | - ((key->data[i*8+4]&1)<<5) | - ((key->data[i*8+5]&1)<<6) | - ((key->data[i*8+6]&1)<<7)); - - mit_des_fixup_key_parity(key->data + i*8); - } - ret = 0; -err_out: - return ret; -} - -/** - * krb5_derive_key_v1 - Derive a subkey for an RFC 3961 enctype - * @gk5e: Kerberos 5 enctype profile - * @inkey: base protocol key - * @outkey: OUT: derived key - * @label: subkey usage label - * @gfp_mask: memory allocation control flags - * - * Caller sets @outkey->len to the desired length of the derived key. - * - * On success, returns 0 and fills in @outkey. A negative errno value - * is returned on failure. - */ -int krb5_derive_key_v1(const struct gss_krb5_enctype *gk5e, - const struct xdr_netobj *inkey, - struct xdr_netobj *outkey, - const struct xdr_netobj *label, - gfp_t gfp_mask) -{ - struct xdr_netobj inblock; - int ret; - - inblock.len = gk5e->keybytes; - inblock.data = kmalloc(inblock.len, gfp_mask); - if (!inblock.data) - return -ENOMEM; - - ret = krb5_DK(gk5e, inkey, inblock.data, label, gfp_mask); - if (!ret) - ret = krb5_random_to_key_v1(gk5e, &inblock, outkey); - - kfree_sensitive(inblock.data); - return ret; -} - /* * This is the identity function, with some sanity checking. */ diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index 20e21d08badb..e31cfdf7eadc 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -30,61 +30,7 @@ static struct gss_api_mech gss_kerberos_mech; -#if defined(CONFIG_RPCSEC_GSS_KRB5_SIMPLIFIED) -static int gss_krb5_import_ctx_des(struct krb5_ctx *ctx, gfp_t gfp_mask); -static int gss_krb5_import_ctx_v1(struct krb5_ctx *ctx, gfp_t gfp_mask); -#endif -#if defined(CONFIG_RPCSEC_GSS_KRB5_CRYPTOSYSTEM) -static int gss_krb5_import_ctx_v2(struct krb5_ctx *ctx, gfp_t gfp_mask); -#endif - static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = { -#if defined(CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_DES) - /* - * DES (All DES enctypes are mapped to the same gss functionality) - */ - { - .etype = ENCTYPE_DES_CBC_RAW, - .ctype = CKSUMTYPE_RSA_MD5, - .name = "des-cbc-crc", - .encrypt_name = "cbc(des)", - .cksum_name = "md5", - .import_ctx = gss_krb5_import_ctx_des, - .get_mic = gss_krb5_get_mic_v1, - .verify_mic = gss_krb5_verify_mic_v1, - .wrap = gss_krb5_wrap_v1, - .unwrap = gss_krb5_unwrap_v1, - .signalg = SGN_ALG_DES_MAC_MD5, - .sealalg = SEAL_ALG_DES, - .keybytes = 7, - .keylength = 8, - .cksumlength = 8, - .keyed_cksum = 0, - }, - /* - * 3DES - */ - { - .etype = ENCTYPE_DES3_CBC_RAW, - .ctype = CKSUMTYPE_HMAC_SHA1_DES3, - .name = "des3-hmac-sha1", - .encrypt_name = "cbc(des3_ede)", - .cksum_name = "hmac(sha1)", - .import_ctx = gss_krb5_import_ctx_v1, - .derive_key = krb5_derive_key_v1, - .get_mic = gss_krb5_get_mic_v1, - .verify_mic = gss_krb5_verify_mic_v1, - .wrap = gss_krb5_wrap_v1, - .unwrap = gss_krb5_unwrap_v1, - .signalg = SGN_ALG_HMAC_SHA1_DES3_KD, - .sealalg = SEAL_ALG_DES3KD, - .keybytes = 21, - .keylength = 24, - .cksumlength = 20, - .keyed_cksum = 1, - }, -#endif - #if defined(CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_AES_SHA1) /* * AES-128 with SHA-1 (RFC 3962) @@ -96,7 +42,6 @@ static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = { .encrypt_name = "cts(cbc(aes))", .aux_cipher = "cbc(aes)", .cksum_name = "hmac(sha1)", - .import_ctx = gss_krb5_import_ctx_v2, .derive_key = krb5_derive_key_v2, .encrypt = gss_krb5_aes_encrypt, .decrypt = gss_krb5_aes_decrypt, @@ -126,7 +71,6 @@ static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = { .encrypt_name = "cts(cbc(aes))", .aux_cipher = "cbc(aes)", .cksum_name = "hmac(sha1)", - .import_ctx = gss_krb5_import_ctx_v2, .derive_key = krb5_derive_key_v2, .encrypt = gss_krb5_aes_encrypt, .decrypt = gss_krb5_aes_decrypt, @@ -166,7 +110,6 @@ static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = { .Ke_length = BITS2OCTETS(128), .Ki_length = BITS2OCTETS(128), - .import_ctx = gss_krb5_import_ctx_v2, .derive_key = krb5_kdf_feedback_cmac, .encrypt = gss_krb5_aes_encrypt, .decrypt = gss_krb5_aes_decrypt, @@ -193,7 +136,6 @@ static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = { .Ke_length = BITS2OCTETS(256), .Ki_length = BITS2OCTETS(256), - .import_ctx = gss_krb5_import_ctx_v2, .derive_key = krb5_kdf_feedback_cmac, .encrypt = gss_krb5_aes_encrypt, .decrypt = gss_krb5_aes_decrypt, @@ -223,7 +165,6 @@ static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = { .Ke_length = BITS2OCTETS(128), .Ki_length = BITS2OCTETS(128), - .import_ctx = gss_krb5_import_ctx_v2, .derive_key = krb5_kdf_hmac_sha2, .encrypt = krb5_etm_encrypt, .decrypt = krb5_etm_decrypt, @@ -250,7 +191,6 @@ static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = { .Ke_length = BITS2OCTETS(256), .Ki_length = BITS2OCTETS(192), - .import_ctx = gss_krb5_import_ctx_v2, .derive_key = krb5_kdf_hmac_sha2, .encrypt = krb5_etm_encrypt, .decrypt = krb5_etm_decrypt, @@ -284,12 +224,6 @@ static void gss_krb5_prepare_enctype_priority_list(void) ENCTYPE_AES256_CTS_HMAC_SHA1_96, ENCTYPE_AES128_CTS_HMAC_SHA1_96, #endif -#if defined(CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_DES) - ENCTYPE_DES3_CBC_SHA1, - ENCTYPE_DES_CBC_MD5, - ENCTYPE_DES_CBC_CRC, - ENCTYPE_DES_CBC_MD4, -#endif }; size_t total, i; char buf[16]; @@ -330,185 +264,6 @@ const struct gss_krb5_enctype *gss_krb5_lookup_enctype(u32 etype) EXPORT_SYMBOL_IF_KUNIT(gss_krb5_lookup_enctype); static struct crypto_sync_skcipher * -gss_krb5_alloc_cipher_v1(struct krb5_ctx *ctx, struct xdr_netobj *key) -{ - struct crypto_sync_skcipher *tfm; - - tfm = crypto_alloc_sync_skcipher(ctx->gk5e->encrypt_name, 0, 0); - if (IS_ERR(tfm)) - return NULL; - if (crypto_sync_skcipher_setkey(tfm, key->data, key->len)) { - crypto_free_sync_skcipher(tfm); - return NULL; - } - return tfm; -} - -static inline const void * -get_key(const void *p, const void *end, - struct krb5_ctx *ctx, struct crypto_sync_skcipher **res) -{ - struct crypto_sync_skcipher *tfm; - struct xdr_netobj key; - int alg; - - p = simple_get_bytes(p, end, &alg, sizeof(alg)); - if (IS_ERR(p)) - goto out_err; - switch (alg) { - case ENCTYPE_DES_CBC_CRC: - case ENCTYPE_DES_CBC_MD4: - case ENCTYPE_DES_CBC_MD5: - /* Map all these key types to ENCTYPE_DES_CBC_RAW */ - alg = ENCTYPE_DES_CBC_RAW; - break; - } - if (!gss_krb5_lookup_enctype(alg)) { - pr_warn("gss_krb5: unsupported enctype: %d\n", alg); - goto out_err_inval; - } - - p = simple_get_netobj(p, end, &key); - if (IS_ERR(p)) - goto out_err; - tfm = gss_krb5_alloc_cipher_v1(ctx, &key); - kfree(key.data); - if (!tfm) { - pr_warn("gss_krb5: failed to initialize cipher '%s'\n", - ctx->gk5e->encrypt_name); - goto out_err_inval; - } - *res = tfm; - - return p; - -out_err_inval: - p = ERR_PTR(-EINVAL); -out_err: - return p; -} - -static int -gss_import_v1_context(const void *p, const void *end, struct krb5_ctx *ctx) -{ - u32 seq_send; - int tmp; - u32 time32; - - p = simple_get_bytes(p, end, &ctx->initiate, sizeof(ctx->initiate)); - if (IS_ERR(p)) - goto out_err; - - /* Old format supports only DES! Any other enctype uses new format */ - ctx->enctype = ENCTYPE_DES_CBC_RAW; - - ctx->gk5e = gss_krb5_lookup_enctype(ctx->enctype); - if (ctx->gk5e == NULL) { - p = ERR_PTR(-EINVAL); - goto out_err; - } - - /* The downcall format was designed before we completely understood - * the uses of the context fields; so it includes some stuff we - * just give some minimal sanity-checking, and some we ignore - * completely (like the next twenty bytes): */ - if (unlikely(p + 20 > end || p + 20 < p)) { - p = ERR_PTR(-EFAULT); - goto out_err; - } - p += 20; - p = simple_get_bytes(p, end, &tmp, sizeof(tmp)); - if (IS_ERR(p)) - goto out_err; - if (tmp != SGN_ALG_DES_MAC_MD5) { - p = ERR_PTR(-ENOSYS); - goto out_err; - } - p = simple_get_bytes(p, end, &tmp, sizeof(tmp)); - if (IS_ERR(p)) - goto out_err; - if (tmp != SEAL_ALG_DES) { - p = ERR_PTR(-ENOSYS); - goto out_err; - } - p = simple_get_bytes(p, end, &time32, sizeof(time32)); - if (IS_ERR(p)) - goto out_err; - /* unsigned 32-bit time overflows in year 2106 */ - ctx->endtime = (time64_t)time32; - p = simple_get_bytes(p, end, &seq_send, sizeof(seq_send)); - if (IS_ERR(p)) - goto out_err; - atomic_set(&ctx->seq_send, seq_send); - p = simple_get_netobj(p, end, &ctx->mech_used); - if (IS_ERR(p)) - goto out_err; - p = get_key(p, end, ctx, &ctx->enc); - if (IS_ERR(p)) - goto out_err_free_mech; - p = get_key(p, end, ctx, &ctx->seq); - if (IS_ERR(p)) - goto out_err_free_key1; - if (p != end) { - p = ERR_PTR(-EFAULT); - goto out_err_free_key2; - } - - return 0; - -out_err_free_key2: - crypto_free_sync_skcipher(ctx->seq); -out_err_free_key1: - crypto_free_sync_skcipher(ctx->enc); -out_err_free_mech: - kfree(ctx->mech_used.data); -out_err: - return PTR_ERR(p); -} - -#if defined(CONFIG_RPCSEC_GSS_KRB5_SIMPLIFIED) -static int -gss_krb5_import_ctx_des(struct krb5_ctx *ctx, gfp_t gfp_mask) -{ - return -EINVAL; -} - -static int -gss_krb5_import_ctx_v1(struct krb5_ctx *ctx, gfp_t gfp_mask) -{ - struct xdr_netobj keyin, keyout; - - keyin.data = ctx->Ksess; - keyin.len = ctx->gk5e->keylength; - - ctx->seq = gss_krb5_alloc_cipher_v1(ctx, &keyin); - if (ctx->seq == NULL) - goto out_err; - ctx->enc = gss_krb5_alloc_cipher_v1(ctx, &keyin); - if (ctx->enc == NULL) - goto out_free_seq; - - /* derive cksum */ - keyout.data = ctx->cksum; - keyout.len = ctx->gk5e->keylength; - if (krb5_derive_key(ctx, &keyin, &keyout, KG_USAGE_SIGN, - KEY_USAGE_SEED_CHECKSUM, gfp_mask)) - goto out_free_enc; - - return 0; - -out_free_enc: - crypto_free_sync_skcipher(ctx->enc); -out_free_seq: - crypto_free_sync_skcipher(ctx->seq); -out_err: - return -EINVAL; -} -#endif - -#if defined(CONFIG_RPCSEC_GSS_KRB5_CRYPTOSYSTEM) - -static struct crypto_sync_skcipher * gss_krb5_alloc_cipher_v2(const char *cname, const struct xdr_netobj *key) { struct crypto_sync_skcipher *tfm; @@ -636,8 +391,6 @@ out_free: goto out; } -#endif - static int gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx, gfp_t gfp_mask) @@ -671,9 +424,6 @@ gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx, p = simple_get_bytes(p, end, &ctx->enctype, sizeof(ctx->enctype)); if (IS_ERR(p)) goto out_err; - /* Map ENCTYPE_DES3_CBC_SHA1 to ENCTYPE_DES3_CBC_RAW */ - if (ctx->enctype == ENCTYPE_DES3_CBC_SHA1) - ctx->enctype = ENCTYPE_DES3_CBC_RAW; ctx->gk5e = gss_krb5_lookup_enctype(ctx->enctype); if (ctx->gk5e == NULL) { dprintk("gss_kerberos_mech: unsupported krb5 enctype %u\n", @@ -700,7 +450,7 @@ gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx, } ctx->mech_used.len = gss_kerberos_mech.gm_oid.len; - return ctx->gk5e->import_ctx(ctx, gfp_mask); + return gss_krb5_import_ctx_v2(ctx, gfp_mask); out_err: return PTR_ERR(p); @@ -718,10 +468,7 @@ gss_krb5_import_sec_context(const void *p, size_t len, struct gss_ctx *ctx_id, if (ctx == NULL) return -ENOMEM; - if (len == 85) - ret = gss_import_v1_context(p, end, ctx); - else - ret = gss_import_v2_context(p, end, ctx, gfp_mask); + ret = gss_import_v2_context(p, end, ctx, gfp_mask); memzero_explicit(&ctx->Ksess, sizeof(ctx->Ksess)); if (ret) { kfree(ctx); diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c index 146aa755f07d..ce540df9bce4 100644 --- a/net/sunrpc/auth_gss/gss_krb5_seal.c +++ b/net/sunrpc/auth_gss/gss_krb5_seal.c @@ -71,75 +71,6 @@ # define RPCDBG_FACILITY RPCDBG_AUTH #endif -#if defined(CONFIG_RPCSEC_GSS_KRB5_SIMPLIFIED) - -static void * -setup_token(struct krb5_ctx *ctx, struct xdr_netobj *token) -{ - u16 *ptr; - void *krb5_hdr; - int body_size = GSS_KRB5_TOK_HDR_LEN + ctx->gk5e->cksumlength; - - token->len = g_token_size(&ctx->mech_used, body_size); - - ptr = (u16 *)token->data; - g_make_token_header(&ctx->mech_used, body_size, (unsigned char **)&ptr); - - /* ptr now at start of header described in rfc 1964, section 1.2.1: */ - krb5_hdr = ptr; - *ptr++ = KG_TOK_MIC_MSG; - /* - * signalg is stored as if it were converted from LE to host endian, even - * though it's an opaque pair of bytes according to the RFC. - */ - *ptr++ = (__force u16)cpu_to_le16(ctx->gk5e->signalg); - *ptr++ = SEAL_ALG_NONE; - *ptr = 0xffff; - - return krb5_hdr; -} - -u32 -gss_krb5_get_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *text, - struct xdr_netobj *token) -{ - char cksumdata[GSS_KRB5_MAX_CKSUM_LEN]; - struct xdr_netobj md5cksum = {.len = sizeof(cksumdata), - .data = cksumdata}; - void *ptr; - time64_t now; - u32 seq_send; - u8 *cksumkey; - - dprintk("RPC: %s\n", __func__); - BUG_ON(ctx == NULL); - - now = ktime_get_real_seconds(); - - ptr = setup_token(ctx, token); - - if (ctx->gk5e->keyed_cksum) - cksumkey = ctx->cksum; - else - cksumkey = NULL; - - if (make_checksum(ctx, ptr, 8, text, 0, cksumkey, - KG_USAGE_SIGN, &md5cksum)) - return GSS_S_FAILURE; - - memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data, md5cksum.len); - - seq_send = atomic_fetch_inc(&ctx->seq_send); - - if (krb5_make_seq_num(ctx, ctx->seq, ctx->initiate ? 0 : 0xff, - seq_send, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8)) - return GSS_S_FAILURE; - - return (ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE; -} - -#endif - static void * setup_token_v2(struct krb5_ctx *ctx, struct xdr_netobj *token) { diff --git a/net/sunrpc/auth_gss/gss_krb5_seqnum.c b/net/sunrpc/auth_gss/gss_krb5_seqnum.c deleted file mode 100644 index 1babc3474e10..000000000000 --- a/net/sunrpc/auth_gss/gss_krb5_seqnum.c +++ /dev/null @@ -1,106 +0,0 @@ -/* - * linux/net/sunrpc/gss_krb5_seqnum.c - * - * Adapted from MIT Kerberos 5-1.2.1 lib/gssapi/krb5/util_seqnum.c - * - * Copyright (c) 2000 The Regents of the University of Michigan. - * All rights reserved. - * - * Andy Adamson <andros@umich.edu> - */ - -/* - * Copyright 1993 by OpenVision Technologies, Inc. - * - * Permission to use, copy, modify, distribute, and sell this software - * and its documentation for any purpose is hereby granted without fee, - * provided that the above copyright notice appears in all copies and - * that both that copyright notice and this permission notice appear in - * supporting documentation, and that the name of OpenVision not be used - * in advertising or publicity pertaining to distribution of the software - * without specific, written prior permission. OpenVision makes no - * representations about the suitability of this software for any - * purpose. It is provided "as is" without express or implied warranty. - * - * OPENVISION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, - * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO - * EVENT SHALL OPENVISION BE LIABLE FOR ANY SPECIAL, INDIRECT OR - * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF - * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR - * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR - * PERFORMANCE OF THIS SOFTWARE. - */ - -#include <crypto/skcipher.h> -#include <linux/types.h> -#include <linux/sunrpc/gss_krb5.h> - -#include "gss_krb5_internal.h" - -#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) -# define RPCDBG_FACILITY RPCDBG_AUTH -#endif - -s32 -krb5_make_seq_num(struct krb5_ctx *kctx, - struct crypto_sync_skcipher *key, - int direction, - u32 seqnum, - unsigned char *cksum, unsigned char *buf) -{ - unsigned char *plain; - s32 code; - - plain = kmalloc(8, GFP_KERNEL); - if (!plain) - return -ENOMEM; - - plain[0] = (unsigned char) (seqnum & 0xff); - plain[1] = (unsigned char) ((seqnum >> 8) & 0xff); - plain[2] = (unsigned char) ((seqnum >> 16) & 0xff); - plain[3] = (unsigned char) ((seqnum >> 24) & 0xff); - - plain[4] = direction; - plain[5] = direction; - plain[6] = direction; - plain[7] = direction; - - code = krb5_encrypt(key, cksum, plain, buf, 8); - kfree(plain); - return code; -} - -s32 -krb5_get_seq_num(struct krb5_ctx *kctx, - unsigned char *cksum, - unsigned char *buf, - int *direction, u32 *seqnum) -{ - s32 code; - unsigned char *plain; - struct crypto_sync_skcipher *key = kctx->seq; - - dprintk("RPC: krb5_get_seq_num:\n"); - - plain = kmalloc(8, GFP_KERNEL); - if (!plain) - return -ENOMEM; - - if ((code = krb5_decrypt(key, cksum, buf, plain, 8))) - goto out; - - if ((plain[4] != plain[5]) || (plain[4] != plain[6]) || - (plain[4] != plain[7])) { - code = (s32)KG_BAD_SEQ; - goto out; - } - - *direction = plain[4]; - - *seqnum = ((plain[0]) | - (plain[1] << 8) | (plain[2] << 16) | (plain[3] << 24)); - -out: - kfree(plain); - return code; -} diff --git a/net/sunrpc/auth_gss/gss_krb5_test.c b/net/sunrpc/auth_gss/gss_krb5_test.c index 95ca783795c5..85625e3f3814 100644 --- a/net/sunrpc/auth_gss/gss_krb5_test.c +++ b/net/sunrpc/auth_gss/gss_krb5_test.c @@ -320,208 +320,12 @@ static void rfc3961_nfold_case(struct kunit *test) "result mismatch"); } -/* - * RFC 3961 Appendix A.3. DES3 DR and DK - * - * These tests show the derived-random and derived-key values for the - * des3-hmac-sha1-kd encryption scheme, using the DR and DK functions - * defined in section 6.3.1. The input keys were randomly generated; - * the usage values are from this specification. - * - * This test material is copyright (C) The Internet Society (2005). - */ - -DEFINE_HEX_XDR_NETOBJ(des3_dk_usage_155, - 0x00, 0x00, 0x00, 0x01, 0x55 -); - -DEFINE_HEX_XDR_NETOBJ(des3_dk_usage_1aa, - 0x00, 0x00, 0x00, 0x01, 0xaa -); - -DEFINE_HEX_XDR_NETOBJ(des3_dk_usage_kerberos, - 0x6b, 0x65, 0x72, 0x62, 0x65, 0x72, 0x6f, 0x73 -); - -DEFINE_HEX_XDR_NETOBJ(des3_dk_test1_base_key, - 0xdc, 0xe0, 0x6b, 0x1f, 0x64, 0xc8, 0x57, 0xa1, - 0x1c, 0x3d, 0xb5, 0x7c, 0x51, 0x89, 0x9b, 0x2c, - 0xc1, 0x79, 0x10, 0x08, 0xce, 0x97, 0x3b, 0x92 -); -DEFINE_HEX_XDR_NETOBJ(des3_dk_test1_derived_key, - 0x92, 0x51, 0x79, 0xd0, 0x45, 0x91, 0xa7, 0x9b, - 0x5d, 0x31, 0x92, 0xc4, 0xa7, 0xe9, 0xc2, 0x89, - 0xb0, 0x49, 0xc7, 0x1f, 0x6e, 0xe6, 0x04, 0xcd -); - -DEFINE_HEX_XDR_NETOBJ(des3_dk_test2_base_key, - 0x5e, 0x13, 0xd3, 0x1c, 0x70, 0xef, 0x76, 0x57, - 0x46, 0x57, 0x85, 0x31, 0xcb, 0x51, 0xc1, 0x5b, - 0xf1, 0x1c, 0xa8, 0x2c, 0x97, 0xce, 0xe9, 0xf2 -); -DEFINE_HEX_XDR_NETOBJ(des3_dk_test2_derived_key, - 0x9e, 0x58, 0xe5, 0xa1, 0x46, 0xd9, 0x94, 0x2a, - 0x10, 0x1c, 0x46, 0x98, 0x45, 0xd6, 0x7a, 0x20, - 0xe3, 0xc4, 0x25, 0x9e, 0xd9, 0x13, 0xf2, 0x07 -); - -DEFINE_HEX_XDR_NETOBJ(des3_dk_test3_base_key, - 0x98, 0xe6, 0xfd, 0x8a, 0x04, 0xa4, 0xb6, 0x85, - 0x9b, 0x75, 0xa1, 0x76, 0x54, 0x0b, 0x97, 0x52, - 0xba, 0xd3, 0xec, 0xd6, 0x10, 0xa2, 0x52, 0xbc -); -DEFINE_HEX_XDR_NETOBJ(des3_dk_test3_derived_key, - 0x13, 0xfe, 0xf8, 0x0d, 0x76, 0x3e, 0x94, 0xec, - 0x6d, 0x13, 0xfd, 0x2c, 0xa1, 0xd0, 0x85, 0x07, - 0x02, 0x49, 0xda, 0xd3, 0x98, 0x08, 0xea, 0xbf -); - -DEFINE_HEX_XDR_NETOBJ(des3_dk_test4_base_key, - 0x62, 0x2a, 0xec, 0x25, 0xa2, 0xfe, 0x2c, 0xad, - 0x70, 0x94, 0x68, 0x0b, 0x7c, 0x64, 0x94, 0x02, - 0x80, 0x08, 0x4c, 0x1a, 0x7c, 0xec, 0x92, 0xb5 -); -DEFINE_HEX_XDR_NETOBJ(des3_dk_test4_derived_key, - 0xf8, 0xdf, 0xbf, 0x04, 0xb0, 0x97, 0xe6, 0xd9, - 0xdc, 0x07, 0x02, 0x68, 0x6b, 0xcb, 0x34, 0x89, - 0xd9, 0x1f, 0xd9, 0xa4, 0x51, 0x6b, 0x70, 0x3e -); - -DEFINE_HEX_XDR_NETOBJ(des3_dk_test5_base_key, - 0xd3, 0xf8, 0x29, 0x8c, 0xcb, 0x16, 0x64, 0x38, - 0xdc, 0xb9, 0xb9, 0x3e, 0xe5, 0xa7, 0x62, 0x92, - 0x86, 0xa4, 0x91, 0xf8, 0x38, 0xf8, 0x02, 0xfb -); -DEFINE_HEX_XDR_NETOBJ(des3_dk_test5_derived_key, - 0x23, 0x70, 0xda, 0x57, 0x5d, 0x2a, 0x3d, 0xa8, - 0x64, 0xce, 0xbf, 0xdc, 0x52, 0x04, 0xd5, 0x6d, - 0xf7, 0x79, 0xa7, 0xdf, 0x43, 0xd9, 0xda, 0x43 -); - -DEFINE_HEX_XDR_NETOBJ(des3_dk_test6_base_key, - 0xc1, 0x08, 0x16, 0x49, 0xad, 0xa7, 0x43, 0x62, - 0xe6, 0xa1, 0x45, 0x9d, 0x01, 0xdf, 0xd3, 0x0d, - 0x67, 0xc2, 0x23, 0x4c, 0x94, 0x07, 0x04, 0xda -); -DEFINE_HEX_XDR_NETOBJ(des3_dk_test6_derived_key, - 0x34, 0x80, 0x57, 0xec, 0x98, 0xfd, 0xc4, 0x80, - 0x16, 0x16, 0x1c, 0x2a, 0x4c, 0x7a, 0x94, 0x3e, - 0x92, 0xae, 0x49, 0x2c, 0x98, 0x91, 0x75, 0xf7 -); - -DEFINE_HEX_XDR_NETOBJ(des3_dk_test7_base_key, - 0x5d, 0x15, 0x4a, 0xf2, 0x38, 0xf4, 0x67, 0x13, - 0x15, 0x57, 0x19, 0xd5, 0x5e, 0x2f, 0x1f, 0x79, - 0x0d, 0xd6, 0x61, 0xf2, 0x79, 0xa7, 0x91, 0x7c -); -DEFINE_HEX_XDR_NETOBJ(des3_dk_test7_derived_key, - 0xa8, 0x80, 0x8a, 0xc2, 0x67, 0xda, 0xda, 0x3d, - 0xcb, 0xe9, 0xa7, 0xc8, 0x46, 0x26, 0xfb, 0xc7, - 0x61, 0xc2, 0x94, 0xb0, 0x13, 0x15, 0xe5, 0xc1 -); - -DEFINE_HEX_XDR_NETOBJ(des3_dk_test8_base_key, - 0x79, 0x85, 0x62, 0xe0, 0x49, 0x85, 0x2f, 0x57, - 0xdc, 0x8c, 0x34, 0x3b, 0xa1, 0x7f, 0x2c, 0xa1, - 0xd9, 0x73, 0x94, 0xef, 0xc8, 0xad, 0xc4, 0x43 -); -DEFINE_HEX_XDR_NETOBJ(des3_dk_test8_derived_key, - 0xc8, 0x13, 0xf8, 0x8a, 0x3b, 0xe3, 0xb3, 0x34, - 0xf7, 0x54, 0x25, 0xce, 0x91, 0x75, 0xfb, 0xe3, - 0xc8, 0x49, 0x3b, 0x89, 0xc8, 0x70, 0x3b, 0x49 -); - -DEFINE_HEX_XDR_NETOBJ(des3_dk_test9_base_key, - 0x26, 0xdc, 0xe3, 0x34, 0xb5, 0x45, 0x29, 0x2f, - 0x2f, 0xea, 0xb9, 0xa8, 0x70, 0x1a, 0x89, 0xa4, - 0xb9, 0x9e, 0xb9, 0x94, 0x2c, 0xec, 0xd0, 0x16 -); -DEFINE_HEX_XDR_NETOBJ(des3_dk_test9_derived_key, - 0xf4, 0x8f, 0xfd, 0x6e, 0x83, 0xf8, 0x3e, 0x73, - 0x54, 0xe6, 0x94, 0xfd, 0x25, 0x2c, 0xf8, 0x3b, - 0xfe, 0x58, 0xf7, 0xd5, 0xba, 0x37, 0xec, 0x5d -); - -static const struct gss_krb5_test_param rfc3961_kdf_test_params[] = { - { - .desc = "des3-hmac-sha1 key derivation case 1", - .enctype = ENCTYPE_DES3_CBC_RAW, - .base_key = &des3_dk_test1_base_key, - .usage = &des3_dk_usage_155, - .expected_result = &des3_dk_test1_derived_key, - }, - { - .desc = "des3-hmac-sha1 key derivation case 2", - .enctype = ENCTYPE_DES3_CBC_RAW, - .base_key = &des3_dk_test2_base_key, - .usage = &des3_dk_usage_1aa, - .expected_result = &des3_dk_test2_derived_key, - }, - { - .desc = "des3-hmac-sha1 key derivation case 3", - .enctype = ENCTYPE_DES3_CBC_RAW, - .base_key = &des3_dk_test3_base_key, - .usage = &des3_dk_usage_155, - .expected_result = &des3_dk_test3_derived_key, - }, - { - .desc = "des3-hmac-sha1 key derivation case 4", - .enctype = ENCTYPE_DES3_CBC_RAW, - .base_key = &des3_dk_test4_base_key, - .usage = &des3_dk_usage_1aa, - .expected_result = &des3_dk_test4_derived_key, - }, - { - .desc = "des3-hmac-sha1 key derivation case 5", - .enctype = ENCTYPE_DES3_CBC_RAW, - .base_key = &des3_dk_test5_base_key, - .usage = &des3_dk_usage_kerberos, - .expected_result = &des3_dk_test5_derived_key, - }, - { - .desc = "des3-hmac-sha1 key derivation case 6", - .enctype = ENCTYPE_DES3_CBC_RAW, - .base_key = &des3_dk_test6_base_key, - .usage = &des3_dk_usage_155, - .expected_result = &des3_dk_test6_derived_key, - }, - { - .desc = "des3-hmac-sha1 key derivation case 7", - .enctype = ENCTYPE_DES3_CBC_RAW, - .base_key = &des3_dk_test7_base_key, - .usage = &des3_dk_usage_1aa, - .expected_result = &des3_dk_test7_derived_key, - }, - { - .desc = "des3-hmac-sha1 key derivation case 8", - .enctype = ENCTYPE_DES3_CBC_RAW, - .base_key = &des3_dk_test8_base_key, - .usage = &des3_dk_usage_155, - .expected_result = &des3_dk_test8_derived_key, - }, - { - .desc = "des3-hmac-sha1 key derivation case 9", - .enctype = ENCTYPE_DES3_CBC_RAW, - .base_key = &des3_dk_test9_base_key, - .usage = &des3_dk_usage_1aa, - .expected_result = &des3_dk_test9_derived_key, - }, -}; - -/* Creates the function rfc3961_kdf_gen_params */ -KUNIT_ARRAY_PARAM(rfc3961_kdf, rfc3961_kdf_test_params, gss_krb5_get_desc); - static struct kunit_case rfc3961_test_cases[] = { { .name = "RFC 3961 n-fold", .run_case = rfc3961_nfold_case, .generate_params = rfc3961_nfold_gen_params, }, - { - .name = "RFC 3961 key derivation", - .run_case = kdf_case, - .generate_params = rfc3961_kdf_gen_params, - }, {} }; diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c index 7d6d4ae4a3c9..4fbc50a0a2c4 100644 --- a/net/sunrpc/auth_gss/gss_krb5_unseal.c +++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c @@ -69,83 +69,6 @@ # define RPCDBG_FACILITY RPCDBG_AUTH #endif - -#if defined(CONFIG_RPCSEC_GSS_KRB5_SIMPLIFIED) -/* read_token is a mic token, and message_buffer is the data that the mic was - * supposedly taken over. */ -u32 -gss_krb5_verify_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *message_buffer, - struct xdr_netobj *read_token) -{ - int signalg; - int sealalg; - char cksumdata[GSS_KRB5_MAX_CKSUM_LEN]; - struct xdr_netobj md5cksum = {.len = sizeof(cksumdata), - .data = cksumdata}; - s32 now; - int direction; - u32 seqnum; - unsigned char *ptr = (unsigned char *)read_token->data; - int bodysize; - u8 *cksumkey; - - dprintk("RPC: krb5_read_token\n"); - - if (g_verify_token_header(&ctx->mech_used, &bodysize, &ptr, - read_token->len)) - return GSS_S_DEFECTIVE_TOKEN; - - if ((ptr[0] != ((KG_TOK_MIC_MSG >> 8) & 0xff)) || - (ptr[1] != (KG_TOK_MIC_MSG & 0xff))) - return GSS_S_DEFECTIVE_TOKEN; - - /* XXX sanity-check bodysize?? */ - - signalg = ptr[2] + (ptr[3] << 8); - if (signalg != ctx->gk5e->signalg) - return GSS_S_DEFECTIVE_TOKEN; - - sealalg = ptr[4] + (ptr[5] << 8); - if (sealalg != SEAL_ALG_NONE) - return GSS_S_DEFECTIVE_TOKEN; - - if ((ptr[6] != 0xff) || (ptr[7] != 0xff)) - return GSS_S_DEFECTIVE_TOKEN; - - if (ctx->gk5e->keyed_cksum) - cksumkey = ctx->cksum; - else - cksumkey = NULL; - - if (make_checksum(ctx, ptr, 8, message_buffer, 0, - cksumkey, KG_USAGE_SIGN, &md5cksum)) - return GSS_S_FAILURE; - - if (memcmp(md5cksum.data, ptr + GSS_KRB5_TOK_HDR_LEN, - ctx->gk5e->cksumlength)) - return GSS_S_BAD_SIG; - - /* it got through unscathed. Make sure the context is unexpired */ - - now = ktime_get_real_seconds(); - - if (now > ctx->endtime) - return GSS_S_CONTEXT_EXPIRED; - - /* do sequencing checks */ - - if (krb5_get_seq_num(ctx, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8, - &direction, &seqnum)) - return GSS_S_FAILURE; - - if ((ctx->initiate && direction != 0xff) || - (!ctx->initiate && direction != 0)) - return GSS_S_BAD_SIG; - - return GSS_S_COMPLETE; -} -#endif - u32 gss_krb5_verify_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *message_buffer, struct xdr_netobj *read_token) diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c index 6d6b082380b2..b3e1738ff6bf 100644 --- a/net/sunrpc/auth_gss/gss_krb5_wrap.c +++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c @@ -40,293 +40,6 @@ # define RPCDBG_FACILITY RPCDBG_AUTH #endif -#if defined(CONFIG_RPCSEC_GSS_KRB5_SIMPLIFIED) - -static inline int -gss_krb5_padding(int blocksize, int length) -{ - return blocksize - (length % blocksize); -} - -static inline void -gss_krb5_add_padding(struct xdr_buf *buf, int offset, int blocksize) -{ - int padding = gss_krb5_padding(blocksize, buf->len - offset); - char *p; - struct kvec *iov; - - if (buf->page_len || buf->tail[0].iov_len) - iov = &buf->tail[0]; - else - iov = &buf->head[0]; - p = iov->iov_base + iov->iov_len; - iov->iov_len += padding; - buf->len += padding; - memset(p, padding, padding); -} - -static inline int -gss_krb5_remove_padding(struct xdr_buf *buf, int blocksize) -{ - u8 *ptr; - u8 pad; - size_t len = buf->len; - - if (len <= buf->head[0].iov_len) { - pad = *(u8 *)(buf->head[0].iov_base + len - 1); - if (pad > buf->head[0].iov_len) - return -EINVAL; - buf->head[0].iov_len -= pad; - goto out; - } else - len -= buf->head[0].iov_len; - if (len <= buf->page_len) { - unsigned int last = (buf->page_base + len - 1) - >>PAGE_SHIFT; - unsigned int offset = (buf->page_base + len - 1) - & (PAGE_SIZE - 1); - ptr = kmap_atomic(buf->pages[last]); - pad = *(ptr + offset); - kunmap_atomic(ptr); - goto out; - } else - len -= buf->page_len; - BUG_ON(len > buf->tail[0].iov_len); - pad = *(u8 *)(buf->tail[0].iov_base + len - 1); -out: - /* XXX: NOTE: we do not adjust the page lengths--they represent - * a range of data in the real filesystem page cache, and we need - * to know that range so the xdr code can properly place read data. - * However adjusting the head length, as we do above, is harmless. - * In the case of a request that fits into a single page, the server - * also uses length and head length together to determine the original - * start of the request to copy the request for deferal; so it's - * easier on the server if we adjust head and tail length in tandem. - * It's not really a problem that we don't fool with the page and - * tail lengths, though--at worst badly formed xdr might lead the - * server to attempt to parse the padding. - * XXX: Document all these weird requirements for gss mechanism - * wrap/unwrap functions. */ - if (pad > blocksize) - return -EINVAL; - if (buf->len > pad) - buf->len -= pad; - else - return -EINVAL; - return 0; -} - -/* Assumptions: the head and tail of inbuf are ours to play with. - * The pages, however, may be real pages in the page cache and we replace - * them with scratch pages from **pages before writing to them. */ -/* XXX: obviously the above should be documentation of wrap interface, - * and shouldn't be in this kerberos-specific file. */ - -/* XXX factor out common code with seal/unseal. */ - -u32 -gss_krb5_wrap_v1(struct krb5_ctx *kctx, int offset, - struct xdr_buf *buf, struct page **pages) -{ - char cksumdata[GSS_KRB5_MAX_CKSUM_LEN]; - struct xdr_netobj md5cksum = {.len = sizeof(cksumdata), - .data = cksumdata}; - int blocksize = 0, plainlen; - unsigned char *ptr, *msg_start; - time64_t now; - int headlen; - struct page **tmp_pages; - u32 seq_send; - u8 *cksumkey; - u32 conflen = crypto_sync_skcipher_blocksize(kctx->enc); - - dprintk("RPC: %s\n", __func__); - - now = ktime_get_real_seconds(); - - blocksize = crypto_sync_skcipher_blocksize(kctx->enc); - gss_krb5_add_padding(buf, offset, blocksize); - BUG_ON((buf->len - offset) % blocksize); - plainlen = conflen + buf->len - offset; - - headlen = g_token_size(&kctx->mech_used, - GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength + plainlen) - - (buf->len - offset); - - ptr = buf->head[0].iov_base + offset; - /* shift data to make room for header. */ - xdr_extend_head(buf, offset, headlen); - - /* XXX Would be cleverer to encrypt while copying. */ - BUG_ON((buf->len - offset - headlen) % blocksize); - - g_make_token_header(&kctx->mech_used, - GSS_KRB5_TOK_HDR_LEN + - kctx->gk5e->cksumlength + plainlen, &ptr); - - - /* ptr now at header described in rfc 1964, section 1.2.1: */ - ptr[0] = (unsigned char) ((KG_TOK_WRAP_MSG >> 8) & 0xff); - ptr[1] = (unsigned char) (KG_TOK_WRAP_MSG & 0xff); - - msg_start = ptr + GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength; - - /* - * signalg and sealalg are stored as if they were converted from LE - * to host endian, even though they're opaque pairs of bytes according - * to the RFC. - */ - *(__le16 *)(ptr + 2) = cpu_to_le16(kctx->gk5e->signalg); - *(__le16 *)(ptr + 4) = cpu_to_le16(kctx->gk5e->sealalg); - ptr[6] = 0xff; - ptr[7] = 0xff; - - krb5_make_confounder(msg_start, conflen); - - if (kctx->gk5e->keyed_cksum) - cksumkey = kctx->cksum; - else - cksumkey = NULL; - - /* XXXJBF: UGH!: */ - tmp_pages = buf->pages; - buf->pages = pages; - if (make_checksum(kctx, ptr, 8, buf, offset + headlen - conflen, - cksumkey, KG_USAGE_SEAL, &md5cksum)) - return GSS_S_FAILURE; - buf->pages = tmp_pages; - - memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data, md5cksum.len); - - seq_send = atomic_fetch_inc(&kctx->seq_send); - - /* XXX would probably be more efficient to compute checksum - * and encrypt at the same time: */ - if ((krb5_make_seq_num(kctx, kctx->seq, kctx->initiate ? 0 : 0xff, - seq_send, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8))) - return GSS_S_FAILURE; - - if (gss_encrypt_xdr_buf(kctx->enc, buf, - offset + headlen - conflen, pages)) - return GSS_S_FAILURE; - - return (kctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE; -} - -u32 -gss_krb5_unwrap_v1(struct krb5_ctx *kctx, int offset, int len, - struct xdr_buf *buf, unsigned int *slack, - unsigned int *align) -{ - int signalg; - int sealalg; - char cksumdata[GSS_KRB5_MAX_CKSUM_LEN]; - struct xdr_netobj md5cksum = {.len = sizeof(cksumdata), - .data = cksumdata}; - time64_t now; - int direction; - s32 seqnum; - unsigned char *ptr; - int bodysize; - void *data_start, *orig_start; - int data_len; - int blocksize; - u32 conflen = crypto_sync_skcipher_blocksize(kctx->enc); - int crypt_offset; - u8 *cksumkey; - unsigned int saved_len = buf->len; - - dprintk("RPC: gss_unwrap_kerberos\n"); - - ptr = (u8 *)buf->head[0].iov_base + offset; - if (g_verify_token_header(&kctx->mech_used, &bodysize, &ptr, - len - offset)) - return GSS_S_DEFECTIVE_TOKEN; - - if ((ptr[0] != ((KG_TOK_WRAP_MSG >> 8) & 0xff)) || - (ptr[1] != (KG_TOK_WRAP_MSG & 0xff))) - return GSS_S_DEFECTIVE_TOKEN; - - /* XXX sanity-check bodysize?? */ - - /* get the sign and seal algorithms */ - - signalg = ptr[2] + (ptr[3] << 8); - if (signalg != kctx->gk5e->signalg) - return GSS_S_DEFECTIVE_TOKEN; - - sealalg = ptr[4] + (ptr[5] << 8); - if (sealalg != kctx->gk5e->sealalg) - return GSS_S_DEFECTIVE_TOKEN; - - if ((ptr[6] != 0xff) || (ptr[7] != 0xff)) - return GSS_S_DEFECTIVE_TOKEN; - - /* - * Data starts after token header and checksum. ptr points - * to the beginning of the token header - */ - crypt_offset = ptr + (GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength) - - (unsigned char *)buf->head[0].iov_base; - - buf->len = len; - if (gss_decrypt_xdr_buf(kctx->enc, buf, crypt_offset)) - return GSS_S_DEFECTIVE_TOKEN; - - if (kctx->gk5e->keyed_cksum) - cksumkey = kctx->cksum; - else - cksumkey = NULL; - - if (make_checksum(kctx, ptr, 8, buf, crypt_offset, - cksumkey, KG_USAGE_SEAL, &md5cksum)) - return GSS_S_FAILURE; - - if (memcmp(md5cksum.data, ptr + GSS_KRB5_TOK_HDR_LEN, - kctx->gk5e->cksumlength)) - return GSS_S_BAD_SIG; - - /* it got through unscathed. Make sure the context is unexpired */ - - now = ktime_get_real_seconds(); - - if (now > kctx->endtime) - return GSS_S_CONTEXT_EXPIRED; - - /* do sequencing checks */ - - if (krb5_get_seq_num(kctx, ptr + GSS_KRB5_TOK_HDR_LEN, - ptr + 8, &direction, &seqnum)) - return GSS_S_BAD_SIG; - - if ((kctx->initiate && direction != 0xff) || - (!kctx->initiate && direction != 0)) - return GSS_S_BAD_SIG; - - /* Copy the data back to the right position. XXX: Would probably be - * better to copy and encrypt at the same time. */ - - blocksize = crypto_sync_skcipher_blocksize(kctx->enc); - data_start = ptr + (GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength) + - conflen; - orig_start = buf->head[0].iov_base + offset; - data_len = (buf->head[0].iov_base + buf->head[0].iov_len) - data_start; - memmove(orig_start, data_start, data_len); - buf->head[0].iov_len -= (data_start - orig_start); - buf->len = len - (data_start - orig_start); - - if (gss_krb5_remove_padding(buf, blocksize)) - return GSS_S_DEFECTIVE_TOKEN; - - /* slack must include room for krb5 padding */ - *slack = XDR_QUADLEN(saved_len - buf->len); - /* The GSS blob always precedes the RPC message payload */ - *align = *slack; - return GSS_S_COMPLETE; -} - -#endif - /* * We can shift data by up to LOCAL_BUF_LEN bytes in a pass. If we need * to do more than that, we shift repeatedly. Kevin Coffman reports diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index c4a566737085..18734e70c5dd 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -986,7 +986,7 @@ bad_unwrap: return -EINVAL; } -static int +static enum svc_auth_status svcauth_gss_set_client(struct svc_rqst *rqstp) { struct gss_svc_data *svcdata = rqstp->rq_auth_data; @@ -1634,7 +1634,7 @@ svcauth_gss_decode_credbody(struct xdr_stream *xdr, * * The rqstp->rq_auth_stat field is also set (see RFCs 2203 and 5531). */ -static int +static enum svc_auth_status svcauth_gss_accept(struct svc_rqst *rqstp) { struct gss_svc_data *svcdata = rqstp->rq_auth_data; @@ -1945,9 +1945,6 @@ bad_wrap: * %0: the Reply is ready to be sent * %-ENOMEM: failed to allocate memory * %-EINVAL: encoding error - * - * XXX: These return values do not match the return values documented - * for the auth_ops ->release method in linux/sunrpc/svcauth.h. */ static int svcauth_gss_release(struct svc_rqst *rqstp) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index d7c697af3762..8d75290f1a31 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -534,6 +534,8 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) .servername = args->servername, .bc_xprt = args->bc_xprt, .xprtsec = args->xprtsec, + .connect_timeout = args->connect_timeout, + .reconnect_timeout = args->reconnect_timeout, }; char servername[48]; struct rpc_clnt *clnt; @@ -2602,6 +2604,7 @@ out: case 0: task->tk_action = rpc_exit_task; task->tk_status = rpcauth_unwrap_resp(task, &xdr); + xdr_finish_decode(&xdr); return; case -EAGAIN: task->tk_status = 0; @@ -3069,6 +3072,11 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt, } xprt->resvport = resvport; xprt->reuseport = reuseport; + + if (xprtargs->connect_timeout) + connect_timeout = xprtargs->connect_timeout; + if (xprtargs->reconnect_timeout) + reconnect_timeout = xprtargs->reconnect_timeout; if (xprt->ops->set_connect_timeout != NULL) xprt->ops->set_connect_timeout(xprt, connect_timeout, diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 587811a002c9..812fda9d45dd 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -513,9 +513,9 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, INIT_LIST_HEAD(&pool->sp_all_threads); spin_lock_init(&pool->sp_lock); + percpu_counter_init(&pool->sp_messages_arrived, 0, GFP_KERNEL); percpu_counter_init(&pool->sp_sockets_queued, 0, GFP_KERNEL); percpu_counter_init(&pool->sp_threads_woken, 0, GFP_KERNEL); - percpu_counter_init(&pool->sp_threads_timedout, 0, GFP_KERNEL); } return serv; @@ -588,9 +588,9 @@ svc_destroy(struct kref *ref) for (i = 0; i < serv->sv_nrpools; i++) { struct svc_pool *pool = &serv->sv_pools[i]; + percpu_counter_destroy(&pool->sp_messages_arrived); percpu_counter_destroy(&pool->sp_sockets_queued); percpu_counter_destroy(&pool->sp_threads_woken); - percpu_counter_destroy(&pool->sp_threads_timedout); } kfree(serv->sv_pools); kfree(serv); @@ -689,23 +689,44 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node) return rqstp; } -/* - * Choose a pool in which to create a new thread, for svc_set_num_threads +/** + * svc_pool_wake_idle_thread - Awaken an idle thread in @pool + * @pool: service thread pool + * + * Can be called from soft IRQ or process context. Finding an idle + * service thread and marking it BUSY is atomic with respect to + * other calls to svc_pool_wake_idle_thread(). + * */ -static inline struct svc_pool * -choose_pool(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state) +void svc_pool_wake_idle_thread(struct svc_pool *pool) { - if (pool != NULL) - return pool; + struct svc_rqst *rqstp; + + rcu_read_lock(); + list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) { + if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags)) + continue; + + WRITE_ONCE(rqstp->rq_qtime, ktime_get()); + wake_up_process(rqstp->rq_task); + rcu_read_unlock(); + percpu_counter_inc(&pool->sp_threads_woken); + trace_svc_wake_up(rqstp->rq_task->pid); + return; + } + rcu_read_unlock(); - return &serv->sv_pools[(*state)++ % serv->sv_nrpools]; + set_bit(SP_CONGESTED, &pool->sp_flags); } -/* - * Choose a thread to kill, for svc_set_num_threads - */ -static inline struct task_struct * -choose_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state) +static struct svc_pool * +svc_pool_next(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state) +{ + return pool ? pool : &serv->sv_pools[(*state)++ % serv->sv_nrpools]; +} + +static struct task_struct * +svc_pool_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state) { unsigned int i; struct task_struct *task = NULL; @@ -713,7 +734,6 @@ choose_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state) if (pool != NULL) { spin_lock_bh(&pool->sp_lock); } else { - /* choose a pool in round-robin fashion */ for (i = 0; i < serv->sv_nrpools; i++) { pool = &serv->sv_pools[--(*state) % serv->sv_nrpools]; spin_lock_bh(&pool->sp_lock); @@ -728,21 +748,15 @@ found_pool: if (!list_empty(&pool->sp_all_threads)) { struct svc_rqst *rqstp; - /* - * Remove from the pool->sp_all_threads list - * so we don't try to kill it again. - */ rqstp = list_entry(pool->sp_all_threads.next, struct svc_rqst, rq_all); set_bit(RQ_VICTIM, &rqstp->rq_flags); list_del_rcu(&rqstp->rq_all); task = rqstp->rq_task; } spin_unlock_bh(&pool->sp_lock); - return task; } -/* create new threads */ static int svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) { @@ -754,13 +768,12 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) do { nrservs--; - chosen_pool = choose_pool(serv, pool, &state); - + chosen_pool = svc_pool_next(serv, pool, &state); node = svc_pool_map_get_node(chosen_pool->sp_id); + rqstp = svc_prepare_thread(serv, chosen_pool, node); if (IS_ERR(rqstp)) return PTR_ERR(rqstp); - task = kthread_create_on_node(serv->sv_threadfn, rqstp, node, "%s", serv->sv_name); if (IS_ERR(task)) { @@ -779,15 +792,6 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) return 0; } -/* - * Create or destroy enough new threads to make the number - * of threads the given number. If `pool' is non-NULL, applies - * only to threads in that pool, otherwise round-robins between - * all pools. Caller must ensure that mutual exclusion between this and - * server startup or shutdown. - */ - -/* destroy old threads */ static int svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) { @@ -795,9 +799,8 @@ svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) struct task_struct *task; unsigned int state = serv->sv_nrthreads-1; - /* destroy old threads */ do { - task = choose_victim(serv, pool, &state); + task = svc_pool_victim(serv, pool, &state); if (task == NULL) break; rqstp = kthread_data(task); @@ -809,6 +812,23 @@ svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) return 0; } +/** + * svc_set_num_threads - adjust number of threads per RPC service + * @serv: RPC service to adjust + * @pool: Specific pool from which to choose threads, or NULL + * @nrservs: New number of threads for @serv (0 or less means kill all threads) + * + * Create or destroy threads to make the number of threads for @serv the + * given number. If @pool is non-NULL, change only threads in that pool; + * otherwise, round-robin between all pools for @serv. @serv's + * sv_nrthreads is adjusted for each thread created or destroyed. + * + * Caller must ensure mutual exclusion between this and server startup or + * shutdown. + * + * Returns zero on success or a negative errno if an error occurred while + * starting a thread. + */ int svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) { @@ -1277,8 +1297,9 @@ svc_process_common(struct svc_rqst *rqstp) const struct svc_procedure *procp = NULL; struct svc_serv *serv = rqstp->rq_server; struct svc_process_info process; - int auth_res, rc; + enum svc_auth_status auth_res; unsigned int aoffset; + int rc; __be32 *p; /* Will be turned off by GSS integrity and privacy services */ @@ -1333,6 +1354,9 @@ svc_process_common(struct svc_rqst *rqstp) goto dropit; case SVC_COMPLETE: goto sendit; + default: + pr_warn_once("Unexpected svc_auth_status (%d)\n", auth_res); + goto err_system_err; } if (progp == NULL) @@ -1370,6 +1394,8 @@ svc_process_common(struct svc_rqst *rqstp) rc = process.dispatch(rqstp); if (procp->pc_release) procp->pc_release(rqstp); + xdr_finish_decode(xdr); + if (!rc) goto dropit; if (rqstp->rq_auth_stat != rpc_auth_ok) @@ -1516,7 +1542,6 @@ out_baddir: out_drop: svc_drop(rqstp); } -EXPORT_SYMBOL_GPL(svc_process); #if defined(CONFIG_SUNRPC_BACKCHANNEL) /* diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 62c7919ea610..4cfe9640df48 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -434,6 +434,7 @@ static bool svc_xprt_ready(struct svc_xprt *xprt) smp_rmb(); xpt_flags = READ_ONCE(xprt->xpt_flags); + trace_svc_xprt_enqueue(xprt, xpt_flags); if (xpt_flags & BIT(XPT_BUSY)) return false; if (xpt_flags & (BIT(XPT_CONN) | BIT(XPT_CLOSE) | BIT(XPT_HANDSHAKE))) @@ -456,7 +457,6 @@ static bool svc_xprt_ready(struct svc_xprt *xprt) void svc_xprt_enqueue(struct svc_xprt *xprt) { struct svc_pool *pool; - struct svc_rqst *rqstp = NULL; if (!svc_xprt_ready(xprt)) return; @@ -476,21 +476,7 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) list_add_tail(&xprt->xpt_ready, &pool->sp_sockets); spin_unlock_bh(&pool->sp_lock); - /* find a thread for this xprt */ - rcu_read_lock(); - list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) { - if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags)) - continue; - percpu_counter_inc(&pool->sp_threads_woken); - rqstp->rq_qtime = ktime_get(); - wake_up_process(rqstp->rq_task); - goto out_unlock; - } - set_bit(SP_CONGESTED, &pool->sp_flags); - rqstp = NULL; -out_unlock: - rcu_read_unlock(); - trace_svc_xprt_enqueue(xprt, rqstp); + svc_pool_wake_idle_thread(pool); } EXPORT_SYMBOL_GPL(svc_xprt_enqueue); @@ -581,7 +567,10 @@ static void svc_xprt_release(struct svc_rqst *rqstp) svc_xprt_put(xprt); } -/* +/** + * svc_wake_up - Wake up a service thread for non-transport work + * @serv: RPC service + * * Some svc_serv's will have occasional work to do, even when a xprt is not * waiting to be serviced. This function is there to "kick" a task in one of * those services so that it can wake up and do that work. Note that we only @@ -590,27 +579,10 @@ static void svc_xprt_release(struct svc_rqst *rqstp) */ void svc_wake_up(struct svc_serv *serv) { - struct svc_rqst *rqstp; - struct svc_pool *pool; - - pool = &serv->sv_pools[0]; + struct svc_pool *pool = &serv->sv_pools[0]; - rcu_read_lock(); - list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) { - /* skip any that aren't queued */ - if (test_bit(RQ_BUSY, &rqstp->rq_flags)) - continue; - rcu_read_unlock(); - wake_up_process(rqstp->rq_task); - trace_svc_wake_up(rqstp->rq_task->pid); - return; - } - rcu_read_unlock(); - - /* No free entries available */ set_bit(SP_TASK_PENDING, &pool->sp_flags); - smp_wmb(); - trace_svc_wake_up(0); + svc_pool_wake_idle_thread(pool); } EXPORT_SYMBOL_GPL(svc_wake_up); @@ -679,7 +651,7 @@ static void svc_check_conn_limits(struct svc_serv *serv) } } -static int svc_alloc_arg(struct svc_rqst *rqstp) +static bool svc_alloc_arg(struct svc_rqst *rqstp) { struct svc_serv *serv = rqstp->rq_server; struct xdr_buf *arg = &rqstp->rq_arg; @@ -701,10 +673,10 @@ static int svc_alloc_arg(struct svc_rqst *rqstp) /* Made progress, don't sleep yet */ continue; - set_current_state(TASK_INTERRUPTIBLE); - if (signalled() || kthread_should_stop()) { + set_current_state(TASK_IDLE); + if (kthread_should_stop()) { set_current_state(TASK_RUNNING); - return -EINTR; + return false; } trace_svc_alloc_arg_err(pages, ret); memalloc_retry_wait(GFP_KERNEL); @@ -723,7 +695,7 @@ static int svc_alloc_arg(struct svc_rqst *rqstp) arg->tail[0].iov_len = 0; rqstp->rq_xid = xdr_zero; - return 0; + return true; } static bool @@ -732,7 +704,7 @@ rqst_should_sleep(struct svc_rqst *rqstp) struct svc_pool *pool = rqstp->rq_pool; /* did someone call svc_wake_up? */ - if (test_and_clear_bit(SP_TASK_PENDING, &pool->sp_flags)) + if (test_bit(SP_TASK_PENDING, &pool->sp_flags)) return false; /* was a socket queued? */ @@ -740,7 +712,7 @@ rqst_should_sleep(struct svc_rqst *rqstp) return false; /* are we shutting down? */ - if (signalled() || kthread_should_stop()) + if (kthread_should_stop()) return false; /* are we freezing? */ @@ -750,10 +722,9 @@ rqst_should_sleep(struct svc_rqst *rqstp) return true; } -static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) +static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp) { struct svc_pool *pool = rqstp->rq_pool; - long time_left = 0; /* rq_xprt should be clear on entry */ WARN_ON_ONCE(rqstp->rq_xprt); @@ -762,18 +733,14 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) if (rqstp->rq_xprt) goto out_found; - /* - * We have to be able to interrupt this wait - * to bring down the daemons ... - */ - set_current_state(TASK_INTERRUPTIBLE); + set_current_state(TASK_IDLE); smp_mb__before_atomic(); clear_bit(SP_CONGESTED, &pool->sp_flags); clear_bit(RQ_BUSY, &rqstp->rq_flags); smp_mb__after_atomic(); if (likely(rqst_should_sleep(rqstp))) - time_left = schedule_timeout(timeout); + schedule(); else __set_current_state(TASK_RUNNING); @@ -781,17 +748,16 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) set_bit(RQ_BUSY, &rqstp->rq_flags); smp_mb__after_atomic(); + clear_bit(SP_TASK_PENDING, &pool->sp_flags); rqstp->rq_xprt = svc_xprt_dequeue(pool); if (rqstp->rq_xprt) goto out_found; - if (!time_left) - percpu_counter_inc(&pool->sp_threads_timedout); - - if (signalled() || kthread_should_stop()) - return ERR_PTR(-EINTR); - return ERR_PTR(-EAGAIN); + if (kthread_should_stop()) + return NULL; + return NULL; out_found: + clear_bit(SP_TASK_PENDING, &pool->sp_flags); /* Normally we will wait up to 5 seconds for any required * cache information to be provided. */ @@ -867,37 +833,35 @@ out: return len; } -/* - * Receive the next request on any transport. This code is carefully - * organised not to touch any cachelines in the shared svc_serv - * structure, only cachelines in the local svc_pool. +/** + * svc_recv - Receive and process the next request on any transport + * @rqstp: an idle RPC service thread + * + * This code is carefully organised not to touch any cachelines in + * the shared svc_serv structure, only cachelines in the local + * svc_pool. */ -int svc_recv(struct svc_rqst *rqstp, long timeout) +void svc_recv(struct svc_rqst *rqstp) { struct svc_xprt *xprt = NULL; struct svc_serv *serv = rqstp->rq_server; - int len, err; + int len; - err = svc_alloc_arg(rqstp); - if (err) + if (!svc_alloc_arg(rqstp)) goto out; try_to_freeze(); cond_resched(); - err = -EINTR; - if (signalled() || kthread_should_stop()) + if (kthread_should_stop()) goto out; - xprt = svc_get_next_xprt(rqstp, timeout); - if (IS_ERR(xprt)) { - err = PTR_ERR(xprt); + xprt = svc_get_next_xprt(rqstp); + if (!xprt) goto out; - } len = svc_handle_xprt(rqstp, xprt); /* No data, incomplete (TCP) read, or accept() */ - err = -EAGAIN; if (len <= 0) goto out_release; @@ -909,13 +873,14 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) if (serv->sv_stats) serv->sv_stats->netcnt++; + percpu_counter_inc(&rqstp->rq_pool->sp_messages_arrived); rqstp->rq_stime = ktime_get(); - return len; + svc_process(rqstp); +out: + return; out_release: rqstp->rq_res.len = 0; svc_xprt_release(rqstp); -out: - return err; } EXPORT_SYMBOL_GPL(svc_recv); @@ -1456,12 +1421,11 @@ static int svc_pool_stats_show(struct seq_file *m, void *p) return 0; } - seq_printf(m, "%u %llu %llu %llu %llu\n", - pool->sp_id, - percpu_counter_sum_positive(&pool->sp_sockets_queued), - percpu_counter_sum_positive(&pool->sp_sockets_queued), - percpu_counter_sum_positive(&pool->sp_threads_woken), - percpu_counter_sum_positive(&pool->sp_threads_timedout)); + seq_printf(m, "%u %llu %llu %llu 0\n", + pool->sp_id, + percpu_counter_sum_positive(&pool->sp_messages_arrived), + percpu_counter_sum_positive(&pool->sp_sockets_queued), + percpu_counter_sum_positive(&pool->sp_threads_woken)); return 0; } diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c index 67d8245a08af..aa4429d0b810 100644 --- a/net/sunrpc/svcauth.c +++ b/net/sunrpc/svcauth.c @@ -60,8 +60,19 @@ svc_put_auth_ops(struct auth_ops *aops) module_put(aops->owner); } -int -svc_authenticate(struct svc_rqst *rqstp) +/** + * svc_authenticate - Initialize an outgoing credential + * @rqstp: RPC execution context + * + * Return values: + * %SVC_OK: XDR encoding of the result can begin + * %SVC_DENIED: Credential or verifier is not valid + * %SVC_GARBAGE: Failed to decode credential or verifier + * %SVC_COMPLETE: GSS context lifetime event; no further action + * %SVC_DROP: Drop this request; no further action + * %SVC_CLOSE: Like drop, but also close transport connection + */ +enum svc_auth_status svc_authenticate(struct svc_rqst *rqstp) { struct auth_ops *aops; u32 flavor; @@ -89,16 +100,28 @@ svc_authenticate(struct svc_rqst *rqstp) } EXPORT_SYMBOL_GPL(svc_authenticate); -int svc_set_client(struct svc_rqst *rqstp) +/** + * svc_set_client - Assign an appropriate 'auth_domain' as the client + * @rqstp: RPC execution context + * + * Return values: + * %SVC_OK: Client was found and assigned + * %SVC_DENY: Client was explicitly denied + * %SVC_DROP: Ignore this request + * %SVC_CLOSE: Ignore this request and close the connection + */ +enum svc_auth_status svc_set_client(struct svc_rqst *rqstp) { rqstp->rq_client = NULL; return rqstp->rq_authop->set_client(rqstp); } EXPORT_SYMBOL_GPL(svc_set_client); -/* A request, which was authenticated, has now executed. - * Time to finalise the credentials and verifier - * and release and resources +/** + * svc_authorise - Finalize credentials/verifier and release resources + * @rqstp: RPC execution context + * + * Returns zero on success, or a negative errno. */ int svc_authorise(struct svc_rqst *rqstp) { diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 174783f804fa..04b45588ae6f 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -665,7 +665,7 @@ static struct group_info *unix_gid_find(kuid_t uid, struct svc_rqst *rqstp) } } -int +enum svc_auth_status svcauth_unix_set_client(struct svc_rqst *rqstp) { struct sockaddr_in *sin; @@ -736,7 +736,6 @@ out: rqstp->rq_auth_stat = rpc_auth_ok; return SVC_OK; } - EXPORT_SYMBOL_GPL(svcauth_unix_set_client); /** @@ -751,7 +750,7 @@ EXPORT_SYMBOL_GPL(svcauth_unix_set_client); * * rqstp->rq_auth_stat is set as mandated by RFC 5531. */ -static int +static enum svc_auth_status svcauth_null_accept(struct svc_rqst *rqstp) { struct xdr_stream *xdr = &rqstp->rq_arg_stream; @@ -828,7 +827,7 @@ struct auth_ops svcauth_null = { * * rqstp->rq_auth_stat is set as mandated by RFC 5531. */ -static int +static enum svc_auth_status svcauth_tls_accept(struct svc_rqst *rqstp) { struct xdr_stream *xdr = &rqstp->rq_arg_stream; @@ -913,7 +912,7 @@ struct auth_ops svcauth_tls = { * * rqstp->rq_auth_stat is set as mandated by RFC 5531. */ -static int +static enum svc_auth_status svcauth_unix_accept(struct svc_rqst *rqstp) { struct xdr_stream *xdr = &rqstp->rq_arg_stream; diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 8c9a8ee76aa0..998687421fa6 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -36,6 +36,8 @@ #include <linux/skbuff.h> #include <linux/file.h> #include <linux/freezer.h> +#include <linux/bvec.h> + #include <net/sock.h> #include <net/checksum.h> #include <net/ip.h> @@ -695,9 +697,10 @@ static int svc_udp_sendto(struct svc_rqst *rqstp) .msg_name = &rqstp->rq_addr, .msg_namelen = rqstp->rq_addrlen, .msg_control = cmh, + .msg_flags = MSG_SPLICE_PAGES, .msg_controllen = sizeof(buffer), }; - unsigned int sent; + unsigned int count; int err; svc_udp_release_ctxt(xprt, rqstp->rq_xprt_ctxt); @@ -710,22 +713,23 @@ static int svc_udp_sendto(struct svc_rqst *rqstp) if (svc_xprt_is_dead(xprt)) goto out_notconn; - err = xdr_alloc_bvec(xdr, GFP_KERNEL); - if (err < 0) - goto out_unlock; + count = xdr_buf_to_bvec(rqstp->rq_bvec, + ARRAY_SIZE(rqstp->rq_bvec), xdr); - err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent); + iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec, + count, 0); + err = sock_sendmsg(svsk->sk_sock, &msg); if (err == -ECONNREFUSED) { /* ICMP error on earlier request. */ - err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent); + iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec, + count, 0); + err = sock_sendmsg(svsk->sk_sock, &msg); } - xdr_free_bvec(xdr); + trace_svcsock_udp_send(xprt, err); -out_unlock: + mutex_unlock(&xprt->xpt_mutex); - if (err < 0) - return err; - return sent; + return err; out_notconn: mutex_unlock(&xprt->xpt_mutex); @@ -1089,6 +1093,9 @@ static void svc_tcp_fragment_received(struct svc_sock *svsk) /* If we have more data, signal svc_xprt_enqueue() to try again */ svsk->sk_tcplen = 0; svsk->sk_marker = xdr_zero; + + smp_wmb(); + tcp_set_rcvlowat(svsk->sk_sk, 1); } /** @@ -1178,10 +1185,17 @@ err_incomplete: goto err_delete; if (len == want) svc_tcp_fragment_received(svsk); - else + else { + /* Avoid more ->sk_data_ready() calls until the rest + * of the message has arrived. This reduces service + * thread wake-ups on large incoming messages. */ + tcp_set_rcvlowat(svsk->sk_sk, + svc_sock_reclen(svsk) - svsk->sk_tcplen); + trace_svcsock_tcp_recv_short(&svsk->sk_xprt, svc_sock_reclen(svsk), svsk->sk_tcplen - sizeof(rpc_fraghdr)); + } goto err_noclose; error: if (len != -EAGAIN) @@ -1198,75 +1212,51 @@ err_noclose: return 0; /* record not complete */ } -static int svc_tcp_send_kvec(struct socket *sock, const struct kvec *vec, - int flags) -{ - struct msghdr msg = { .msg_flags = MSG_SPLICE_PAGES | flags, }; - - iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, vec, 1, vec->iov_len); - return sock_sendmsg(sock, &msg); -} - /* * MSG_SPLICE_PAGES is used exclusively to reduce the number of * copy operations in this path. Therefore the caller must ensure * that the pages backing @xdr are unchanging. * - * In addition, the logic assumes that * .bv_len is never larger - * than PAGE_SIZE. + * Note that the send is non-blocking. The caller has incremented + * the reference count on each page backing the RPC message, and + * the network layer will "put" these pages when transmission is + * complete. + * + * This is safe for our RPC services because the memory backing + * the head and tail components is never kmalloc'd. These always + * come from pages in the svc_rqst::rq_pages array. */ -static int svc_tcp_sendmsg(struct socket *sock, struct xdr_buf *xdr, +static int svc_tcp_sendmsg(struct svc_sock *svsk, struct svc_rqst *rqstp, rpc_fraghdr marker, unsigned int *sentp) { - const struct kvec *head = xdr->head; - const struct kvec *tail = xdr->tail; - struct kvec rm = { - .iov_base = &marker, - .iov_len = sizeof(marker), - }; struct msghdr msg = { - .msg_flags = 0, + .msg_flags = MSG_SPLICE_PAGES, }; + unsigned int count; + void *buf; int ret; *sentp = 0; - ret = xdr_alloc_bvec(xdr, GFP_KERNEL); - if (ret < 0) - return ret; - - ret = kernel_sendmsg(sock, &msg, &rm, 1, rm.iov_len); - if (ret < 0) - return ret; - *sentp += ret; - if (ret != rm.iov_len) - return -EAGAIN; - - ret = svc_tcp_send_kvec(sock, head, 0); - if (ret < 0) - return ret; - *sentp += ret; - if (ret != head->iov_len) - goto out; - if (xdr_buf_pagecount(xdr)) - xdr->bvec[0].bv_offset = offset_in_page(xdr->page_base); - - msg.msg_flags = MSG_SPLICE_PAGES; - iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, xdr->bvec, - xdr_buf_pagecount(xdr), xdr->page_len); - ret = sock_sendmsg(sock, &msg); + /* The stream record marker is copied into a temporary page + * fragment buffer so that it can be included in rq_bvec. + */ + buf = page_frag_alloc(&svsk->sk_frag_cache, sizeof(marker), + GFP_KERNEL); + if (!buf) + return -ENOMEM; + memcpy(buf, &marker, sizeof(marker)); + bvec_set_virt(rqstp->rq_bvec, buf, sizeof(marker)); + + count = xdr_buf_to_bvec(rqstp->rq_bvec + 1, + ARRAY_SIZE(rqstp->rq_bvec) - 1, &rqstp->rq_res); + + iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec, + 1 + count, sizeof(marker) + rqstp->rq_res.len); + ret = sock_sendmsg(svsk->sk_sock, &msg); if (ret < 0) return ret; *sentp += ret; - - if (tail->iov_len) { - ret = svc_tcp_send_kvec(sock, tail, 0); - if (ret < 0) - return ret; - *sentp += ret; - } - -out: return 0; } @@ -1292,23 +1282,17 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp) svc_tcp_release_ctxt(xprt, rqstp->rq_xprt_ctxt); rqstp->rq_xprt_ctxt = NULL; - atomic_inc(&svsk->sk_sendqlen); mutex_lock(&xprt->xpt_mutex); if (svc_xprt_is_dead(xprt)) goto out_notconn; - tcp_sock_set_cork(svsk->sk_sk, true); - err = svc_tcp_sendmsg(svsk->sk_sock, xdr, marker, &sent); - xdr_free_bvec(xdr); + err = svc_tcp_sendmsg(svsk, rqstp, marker, &sent); trace_svcsock_tcp_send(xprt, err < 0 ? (long)err : sent); if (err < 0 || sent != (xdr->len + sizeof(marker))) goto out_close; - if (atomic_dec_and_test(&svsk->sk_sendqlen)) - tcp_sock_set_cork(svsk->sk_sk, false); mutex_unlock(&xprt->xpt_mutex); return sent; out_notconn: - atomic_dec(&svsk->sk_sendqlen); mutex_unlock(&xprt->xpt_mutex); return -ENOTCONN; out_close: @@ -1317,7 +1301,6 @@ out_close: (err < 0) ? "got error" : "sent", (err < 0) ? err : sent, xdr->len); svc_xprt_deferred_close(xprt); - atomic_dec(&svsk->sk_sendqlen); mutex_unlock(&xprt->xpt_mutex); return -EAGAIN; } @@ -1644,6 +1627,7 @@ static void svc_tcp_sock_detach(struct svc_xprt *xprt) static void svc_sock_free(struct svc_xprt *xprt) { struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); + struct page_frag_cache *pfc = &svsk->sk_frag_cache; struct socket *sock = svsk->sk_sock; trace_svcsock_free(svsk, sock); @@ -1653,5 +1637,8 @@ static void svc_sock_free(struct svc_xprt *xprt) sockfd_put(sock); else sock_release(sock); + if (pfc->va) + __page_frag_cache_drain(virt_to_head_page(pfc->va), + pfc->pagecnt_bias); kfree(svsk); } diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 2a22e78af116..62e07c330a66 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -165,6 +165,56 @@ xdr_free_bvec(struct xdr_buf *buf) } /** + * xdr_buf_to_bvec - Copy components of an xdr_buf into a bio_vec array + * @bvec: bio_vec array to populate + * @bvec_size: element count of @bio_vec + * @xdr: xdr_buf to be copied + * + * Returns the number of entries consumed in @bvec. + */ +unsigned int xdr_buf_to_bvec(struct bio_vec *bvec, unsigned int bvec_size, + const struct xdr_buf *xdr) +{ + const struct kvec *head = xdr->head; + const struct kvec *tail = xdr->tail; + unsigned int count = 0; + + if (head->iov_len) { + bvec_set_virt(bvec++, head->iov_base, head->iov_len); + ++count; + } + + if (xdr->page_len) { + unsigned int offset, len, remaining; + struct page **pages = xdr->pages; + + offset = offset_in_page(xdr->page_base); + remaining = xdr->page_len; + while (remaining > 0) { + len = min_t(unsigned int, remaining, + PAGE_SIZE - offset); + bvec_set_page(bvec++, *pages++, len, offset); + remaining -= len; + offset = 0; + if (unlikely(++count > bvec_size)) + goto bvec_overflow; + } + } + + if (tail->iov_len) { + bvec_set_virt(bvec, tail->iov_base, tail->iov_len); + if (unlikely(++count > bvec_size)) + goto bvec_overflow; + } + + return count; + +bvec_overflow: + pr_warn_once("%s: bio_vec array overflow\n", __func__); + return count - 1; +} + +/** * xdr_inline_pages - Prepare receive buffer for a large reply * @xdr: xdr_buf into which reply will be placed * @offset: expected offset where data payload will start, in bytes @@ -1288,6 +1338,14 @@ static unsigned int xdr_set_tail_base(struct xdr_stream *xdr, return xdr_set_iov(xdr, buf->tail, base, len); } +static void xdr_stream_unmap_current_page(struct xdr_stream *xdr) +{ + if (xdr->page_kaddr) { + kunmap_local(xdr->page_kaddr); + xdr->page_kaddr = NULL; + } +} + static unsigned int xdr_set_page_base(struct xdr_stream *xdr, unsigned int base, unsigned int len) { @@ -1305,12 +1363,18 @@ static unsigned int xdr_set_page_base(struct xdr_stream *xdr, if (len > maxlen) len = maxlen; + xdr_stream_unmap_current_page(xdr); xdr_stream_page_set_pos(xdr, base); base += xdr->buf->page_base; pgnr = base >> PAGE_SHIFT; xdr->page_ptr = &xdr->buf->pages[pgnr]; - kaddr = page_address(*xdr->page_ptr); + + if (PageHighMem(*xdr->page_ptr)) { + xdr->page_kaddr = kmap_local_page(*xdr->page_ptr); + kaddr = xdr->page_kaddr; + } else + kaddr = page_address(*xdr->page_ptr); pgoff = base & ~PAGE_MASK; xdr->p = (__be32*)(kaddr + pgoff); @@ -1364,6 +1428,7 @@ void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p, struct rpc_rqst *rqst) { xdr->buf = buf; + xdr->page_kaddr = NULL; xdr_reset_scratch_buffer(xdr); xdr->nwords = XDR_QUADLEN(buf->len); if (xdr_set_iov(xdr, buf->head, 0, buf->len) == 0 && @@ -1396,6 +1461,16 @@ void xdr_init_decode_pages(struct xdr_stream *xdr, struct xdr_buf *buf, } EXPORT_SYMBOL_GPL(xdr_init_decode_pages); +/** + * xdr_finish_decode - Clean up the xdr_stream after decoding data. + * @xdr: pointer to xdr_stream struct + */ +void xdr_finish_decode(struct xdr_stream *xdr) +{ + xdr_stream_unmap_current_page(xdr); +} +EXPORT_SYMBOL(xdr_finish_decode); + static __be32 * __xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes) { unsigned int nwords = XDR_QUADLEN(nbytes); diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 5e5ff6784ef5..da409450dfc0 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -593,7 +593,6 @@ void xprt_rdma_cleanup(void); int xprt_rdma_bc_setup(struct rpc_xprt *, unsigned int); size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *); unsigned int xprt_rdma_bc_max_slots(struct rpc_xprt *); -int rpcrdma_bc_post_recv(struct rpcrdma_xprt *, unsigned int); void rpcrdma_bc_receive_call(struct rpcrdma_xprt *, struct rpcrdma_rep *); int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst); void xprt_rdma_bc_free_rqst(struct rpc_rqst *); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 268a2cc61acd..71cd916e384f 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2237,9 +2237,13 @@ static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt, struct socket *sock) { struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); + struct net *net = sock_net(sock->sk); + unsigned long connect_timeout; + unsigned long syn_retries; unsigned int keepidle; unsigned int keepcnt; unsigned int timeo; + unsigned long t; spin_lock(&xprt->transport_lock); keepidle = DIV_ROUND_UP(xprt->timeout->to_initval, HZ); @@ -2257,6 +2261,35 @@ static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt, /* TCP user timeout (see RFC5482) */ tcp_sock_set_user_timeout(sock->sk, timeo); + + /* Connect timeout */ + connect_timeout = max_t(unsigned long, + DIV_ROUND_UP(xprt->connect_timeout, HZ), 1); + syn_retries = max_t(unsigned long, + READ_ONCE(net->ipv4.sysctl_tcp_syn_retries), 1); + for (t = 0; t <= syn_retries && (1UL << t) < connect_timeout; t++) + ; + if (t <= syn_retries) + tcp_sock_set_syncnt(sock->sk, t - 1); +} + +static void xs_tcp_do_set_connect_timeout(struct rpc_xprt *xprt, + unsigned long connect_timeout) +{ + struct sock_xprt *transport = + container_of(xprt, struct sock_xprt, xprt); + struct rpc_timeout to; + unsigned long initval; + + memcpy(&to, xprt->timeout, sizeof(to)); + /* Arbitrary lower limit */ + initval = max_t(unsigned long, connect_timeout, XS_TCP_INIT_REEST_TO); + to.to_initval = initval; + to.to_maxval = initval; + to.to_retries = 0; + memcpy(&transport->tcp_timeout, &to, sizeof(transport->tcp_timeout)); + xprt->timeout = &transport->tcp_timeout; + xprt->connect_timeout = connect_timeout; } static void xs_tcp_set_connect_timeout(struct rpc_xprt *xprt, @@ -2264,25 +2297,12 @@ static void xs_tcp_set_connect_timeout(struct rpc_xprt *xprt, unsigned long reconnect_timeout) { struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); - struct rpc_timeout to; - unsigned long initval; spin_lock(&xprt->transport_lock); if (reconnect_timeout < xprt->max_reconnect_timeout) xprt->max_reconnect_timeout = reconnect_timeout; - if (connect_timeout < xprt->connect_timeout) { - memcpy(&to, xprt->timeout, sizeof(to)); - initval = DIV_ROUND_UP(connect_timeout, to.to_retries + 1); - /* Arbitrary lower limit */ - if (initval < XS_TCP_INIT_REEST_TO << 1) - initval = XS_TCP_INIT_REEST_TO << 1; - to.to_initval = initval; - to.to_maxval = initval; - memcpy(&transport->tcp_timeout, &to, - sizeof(transport->tcp_timeout)); - xprt->timeout = &transport->tcp_timeout; - xprt->connect_timeout = connect_timeout; - } + if (connect_timeout < xprt->connect_timeout) + xs_tcp_do_set_connect_timeout(xprt, connect_timeout); set_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state); spin_unlock(&xprt->transport_lock); } @@ -3335,8 +3355,13 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args) xprt->timeout = &xs_tcp_default_timeout; xprt->max_reconnect_timeout = xprt->timeout->to_maxval; + if (args->reconnect_timeout) + xprt->max_reconnect_timeout = args->reconnect_timeout; + xprt->connect_timeout = xprt->timeout->to_initval * (xprt->timeout->to_retries + 1); + if (args->connect_timeout) + xs_tcp_do_set_connect_timeout(xprt, args->connect_timeout); INIT_WORK(&transport->recv_worker, xs_stream_data_receive_workfn); INIT_WORK(&transport->error_worker, xs_error_handle); diff --git a/net/sysctl_net.c b/net/sysctl_net.c index 4b45ed631eb8..051ed5f6fc93 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c @@ -101,7 +101,7 @@ __init int net_sysctl_init(void) * registering "/proc/sys/net" as an empty directory not in a * network namespace. */ - net_header = register_sysctl("net", empty); + net_header = register_sysctl_sz("net", empty, 0); if (!net_header) goto out; ret = register_pernet_subsys(&sysctl_pernet_ops); @@ -122,12 +122,13 @@ out1: * allocated. */ static void ensure_safe_net_sysctl(struct net *net, const char *path, - struct ctl_table *table) + struct ctl_table *table, size_t table_size) { struct ctl_table *ent; pr_debug("Registering net sysctl (net %p): %s\n", net, path); - for (ent = table; ent->procname; ent++) { + ent = table; + for (size_t i = 0; i < table_size && ent->procname; ent++, i++) { unsigned long addr; const char *where; @@ -160,15 +161,24 @@ static void ensure_safe_net_sysctl(struct net *net, const char *path, } } -struct ctl_table_header *register_net_sysctl(struct net *net, - const char *path, struct ctl_table *table) +struct ctl_table_header *register_net_sysctl_sz(struct net *net, + const char *path, + struct ctl_table *table, + size_t table_size) { + int count; + struct ctl_table *entry; + if (!net_eq(net, &init_net)) - ensure_safe_net_sysctl(net, path, table); + ensure_safe_net_sysctl(net, path, table, table_size); + + entry = table; + for (count = 0 ; count < table_size && entry->procname; entry++, count++) + ; - return __register_sysctl_table(&net->sysctls, path, table); + return __register_sysctl_table(&net->sysctls, path, table, count); } -EXPORT_SYMBOL_GPL(register_net_sysctl); +EXPORT_SYMBOL_GPL(register_net_sysctl_sz); void unregister_net_sysctl_table(struct ctl_table_header *header) { diff --git a/net/tipc/core.h b/net/tipc/core.h index 0a3f7a70a50a..7eccd97e0609 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -197,7 +197,7 @@ static inline int less(u16 left, u16 right) return less_eq(left, right) && (mod(right) != mod(left)); } -static inline int in_range(u16 val, u16 min, u16 max) +static inline int tipc_in_range(u16 val, u16 min, u16 max) { return !less(val, min) && !more(val, max); } diff --git a/net/tipc/link.c b/net/tipc/link.c index 2eff1c7949cb..e33b4f29f77c 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1623,7 +1623,7 @@ next_gap_ack: last_ga->bgack_cnt); } /* Check against the last Gap ACK block */ - if (in_range(seqno, start, end)) + if (tipc_in_range(seqno, start, end)) continue; /* Update/release the packet peer is acking */ bc_has_acked = true; @@ -2251,12 +2251,12 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, strncpy(if_name, data, TIPC_MAX_IF_NAME); /* Update own tolerance if peer indicates a non-zero value */ - if (in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL)) { + if (tipc_in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL)) { l->tolerance = peers_tol; l->bc_rcvlink->tolerance = peers_tol; } /* Update own priority if peer's priority is higher */ - if (in_range(peers_prio, l->priority + 1, TIPC_MAX_LINK_PRI)) + if (tipc_in_range(peers_prio, l->priority + 1, TIPC_MAX_LINK_PRI)) l->priority = peers_prio; /* If peer is going down we want full re-establish cycle */ @@ -2299,13 +2299,13 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, l->rcv_nxt_state = msg_seqno(hdr) + 1; /* Update own tolerance if peer indicates a non-zero value */ - if (in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL)) { + if (tipc_in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL)) { l->tolerance = peers_tol; l->bc_rcvlink->tolerance = peers_tol; } /* Update own prio if peer indicates a different value */ if ((peers_prio != l->priority) && - in_range(peers_prio, 1, TIPC_MAX_LINK_PRI)) { + tipc_in_range(peers_prio, 1, TIPC_MAX_LINK_PRI)) { l->priority = peers_prio; rc = tipc_link_fsm_evt(l, LINK_FAILURE_EVT); } diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 1ed4a611631f..d1fc295b83b5 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -817,7 +817,7 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk, psock = sk_psock_get(sk); if (!psock || !policy) { err = tls_push_record(sk, flags, record_type); - if (err && sk->sk_err == EBADMSG) { + if (err && err != -EINPROGRESS && sk->sk_err == EBADMSG) { *copied -= sk_msg_free(sk, msg); tls_free_open_rec(sk); err = -sk->sk_err; @@ -846,7 +846,7 @@ more_data: switch (psock->eval) { case __SK_PASS: err = tls_push_record(sk, flags, record_type); - if (err && sk->sk_err == EBADMSG) { + if (err && err != -EINPROGRESS && sk->sk_err == EBADMSG) { *copied -= sk_msg_free(sk, msg); tls_free_open_rec(sk); err = -sk->sk_err; diff --git a/net/unix/sysctl_net_unix.c b/net/unix/sysctl_net_unix.c index 500129aa710c..3e84b31c355a 100644 --- a/net/unix/sysctl_net_unix.c +++ b/net/unix/sysctl_net_unix.c @@ -36,7 +36,8 @@ int __net_init unix_sysctl_register(struct net *net) table[0].data = &net->unx.sysctl_max_dgram_qlen; } - net->unx.ctl = register_net_sysctl(net, "net/unix", table); + net->unx.ctl = register_net_sysctl_sz(net, "net/unix", table, + ARRAY_SIZE(unix_table)); if (net->unx.ctl == NULL) goto err_reg; diff --git a/net/xfrm/xfrm_sysctl.c b/net/xfrm/xfrm_sysctl.c index 0c6c5ef65f9d..7fdeafc838a7 100644 --- a/net/xfrm/xfrm_sysctl.c +++ b/net/xfrm/xfrm_sysctl.c @@ -44,6 +44,7 @@ static struct ctl_table xfrm_table[] = { int __net_init xfrm_sysctl_init(struct net *net) { struct ctl_table *table; + size_t table_size = ARRAY_SIZE(xfrm_table); __xfrm_sysctl_init(net); @@ -56,10 +57,13 @@ int __net_init xfrm_sysctl_init(struct net *net) table[3].data = &net->xfrm.sysctl_acq_expires; /* Don't export sysctls to unprivileged users */ - if (net->user_ns != &init_user_ns) + if (net->user_ns != &init_user_ns) { table[0].procname = NULL; + table_size = 0; + } - net->xfrm.sysctl_hdr = register_net_sysctl(net, "net/core", table); + net->xfrm.sysctl_hdr = register_net_sysctl_sz(net, "net/core", table, + table_size); if (!net->xfrm.sysctl_hdr) goto out_register; return 0; |