From: ordex (C. Review) <ge...@op...> - 2025-07-22 20:22:38
|
Attention is currently required from: flichtenheld, plaisthos. Hello plaisthos, flichtenheld, I'd like you to do a code review. Please visit http://gerrit.openvpn.net/c/openvpn/+/1100?usp=email to review the following change. Change subject: dco_linux: fix async message reception ...................................................................... dco_linux: fix async message reception Currently whenever we send a PEER_GET request to ovpn, we also set the CB that is supposed to parse the reply. However, due to the async nature of netlink messages, we could get an unrelated notification, sent by ovpn upon some event, after having set the CB, but before parsing the awaited reply. When this happens, the notification is then parsed with the configured CB instead of the notification parser, thus effectively rejecting the notification and losing the event. To fix this inconsistency, make ovpn_handle_msg() the default and only netlink parser CB. It is configured upon DCO initialization and is never removed. ovpn_handle_msg() will check the message type and will call the according parser. This way, no matter what message we get at what time, but we'll always parse it correctly. As a bonus we can also simplify the nl_sendmsg() API as we don't need to pass the cb and its argument anymore. The ID of the NLCTRL family is now also stored in the DCO context as we need it to check when we receive a mcast ID lookup message. Change-Id: I23ad79e14844aefde9ece34dadef0b75ff267201 Signed-off-by: Antonio Quartulli <an...@ma...> --- M src/openvpn/dco_linux.c M src/openvpn/dco_linux.h 2 files changed, 130 insertions(+), 85 deletions(-) git pull ssh://gerrit.openvpn.net:29418/openvpn refs/changes/00/1100/1 diff --git a/src/openvpn/dco_linux.c b/src/openvpn/dco_linux.c index 0a22879..62052f3 100644 --- a/src/openvpn/dco_linux.c +++ b/src/openvpn/dco_linux.c @@ -172,18 +172,14 @@ * The method will also free nl_msg * @param dco The dco context to use * @param nl_msg the message to use - * @param cb An optional callback if the caller expects an answer - * @param cb_arg An optional param to pass to the callback * @param prefix A prefix to report in the error message to give the user context * @return status of sending the message */ static int -ovpn_nl_msg_send(dco_context_t *dco, struct nl_msg *nl_msg, ovpn_nl_cb cb, - void *cb_arg, const char *prefix) +ovpn_nl_msg_send(dco_context_t *dco, struct nl_msg *nl_msg, const char *prefix) { dco->status = 1; - nl_cb_set(dco->nl_cb, NL_CB_VALID, NL_CB_CUSTOM, cb, cb_arg); nl_send_auto(dco->nl_sock, nl_msg); while (dco->status == 1) @@ -285,7 +281,7 @@ } nla_nest_end(nl_msg, attr); - ret = ovpn_nl_msg_send(dco, nl_msg, NULL, NULL, __func__); + ret = ovpn_nl_msg_send(dco, nl_msg, __func__); nla_put_failure: nlmsg_free(nl_msg); @@ -385,6 +381,29 @@ } static void +ovpn_dco_register(dco_context_t *dco) +{ + msg(D_DCO_DEBUG, __func__); + ovpn_get_mcast_id(dco); + + if (dco->ovpn_dco_mcast_id < 0) + { + msg(M_FATAL, "cannot get mcast group: %s", nl_geterror(dco->ovpn_dco_mcast_id)); + } + + /* Register for ovpn-dco specific multicast messages that the kernel may + * send + */ + int ret = nl_socket_add_membership(dco->nl_sock, dco->ovpn_dco_mcast_id); + if (ret) + { + msg(M_FATAL, "%s: failed to join groups: %d", __func__, ret); + } +} + +static int ovpn_handle_msg(struct nl_msg *msg, void *arg); + +static void ovpn_dco_init_netlink(dco_context_t *dco) { dco->ovpn_dco_id = resolve_ovpn_netlink_id(M_FATAL); @@ -420,11 +439,15 @@ nl_socket_set_cb(dco->nl_sock, dco->nl_cb); + dco->dco_message_peer_id = -1; nl_cb_err(dco->nl_cb, NL_CB_CUSTOM, ovpn_nl_cb_error, &dco->status); nl_cb_set(dco->nl_cb, NL_CB_FINISH, NL_CB_CUSTOM, ovpn_nl_cb_finish, &dco->status); nl_cb_set(dco->nl_cb, NL_CB_ACK, NL_CB_CUSTOM, ovpn_nl_cb_finish, &dco->status); + nl_cb_set(dco->nl_cb, NL_CB_VALID, NL_CB_CUSTOM, ovpn_handle_msg, dco); + + ovpn_dco_register(dco); /* The async PACKET messages confuse libnl and it will drop them with * wrong sequence numbers (NLE_SEQ_MISMATCH), so disable libnl's sequence @@ -476,27 +499,6 @@ CLEAR(dco); } -static void -ovpn_dco_register(dco_context_t *dco) -{ - msg(D_DCO_DEBUG, __func__); - ovpn_get_mcast_id(dco); - - if (dco->ovpn_dco_mcast_id < 0) - { - msg(M_FATAL, "cannot get mcast group: %s", nl_geterror(dco->ovpn_dco_mcast_id)); - } - - /* Register for ovpn-dco specific multicast messages that the kernel may - * send - */ - int ret = nl_socket_add_membership(dco->nl_sock, dco->ovpn_dco_mcast_id); - if (ret) - { - msg(M_FATAL, "%s: failed to join groups: %d", __func__, ret); - } -} - int open_tun_dco(struct tuntap *tt, openvpn_net_ctx_t *ctx, const char *dev) { @@ -516,10 +518,6 @@ msg(M_FATAL, "DCO: cannot retrieve ifindex for interface %s", dev); } - tt->dco.dco_message_peer_id = -1; - - ovpn_dco_register(&tt->dco); - return 0; } @@ -548,7 +546,7 @@ NLA_PUT_U32(nl_msg, OVPN_A_KEYCONF_PEER_ID, peerid); nla_nest_end(nl_msg, attr); - ret = ovpn_nl_msg_send(dco, nl_msg, NULL, NULL, __func__); + ret = ovpn_nl_msg_send(dco, nl_msg, __func__); nla_put_failure: nlmsg_free(nl_msg); @@ -572,7 +570,7 @@ NLA_PUT_U32(nl_msg, OVPN_A_PEER_ID, peerid); nla_nest_end(nl_msg, attr); - ret = ovpn_nl_msg_send(dco, nl_msg, NULL, NULL, __func__); + ret = ovpn_nl_msg_send(dco, nl_msg, __func__); nla_put_failure: nlmsg_free(nl_msg); @@ -598,7 +596,7 @@ NLA_PUT_U32(nl_msg, OVPN_A_KEYCONF_SLOT, slot); nla_nest_end(nl_msg, keyconf); - ret = ovpn_nl_msg_send(dco, nl_msg, NULL, NULL, __func__); + ret = ovpn_nl_msg_send(dco, nl_msg, __func__); nla_put_failure: nlmsg_free(nl_msg); @@ -657,7 +655,7 @@ nla_nest_end(nl_msg, key_conf); - ret = ovpn_nl_msg_send(dco, nl_msg, NULL, NULL, __func__); + ret = ovpn_nl_msg_send(dco, nl_msg, __func__); nla_put_failure: nlmsg_free(nl_msg); @@ -686,7 +684,7 @@ keepalive_timeout); nla_nest_end(nl_msg, attr); - ret = ovpn_nl_msg_send(dco, nl_msg, NULL, NULL, __func__); + ret = ovpn_nl_msg_send(dco, nl_msg, __func__); nla_put_failure: nlmsg_free(nl_msg); @@ -754,7 +752,7 @@ /* Even though 'nlctrl' is a constant, there seem to be no library * provided define for it */ - int ctrlid = genl_ctrl_resolve(dco->nl_sock, "nlctrl"); + dco->ctrlid = genl_ctrl_resolve(dco->nl_sock, "nlctrl"); struct nl_msg *nl_msg = nlmsg_alloc(); if (!nl_msg) @@ -762,12 +760,12 @@ return -ENOMEM; } - genlmsg_put(nl_msg, 0, 0, ctrlid, 0, 0, CTRL_CMD_GETFAMILY, 0); + genlmsg_put(nl_msg, 0, 0, dco->ctrlid, 0, 0, CTRL_CMD_GETFAMILY, 0); int ret = -EMSGSIZE; NLA_PUT_STRING(nl_msg, CTRL_ATTR_FAMILY_NAME, OVPN_FAMILY_NAME); - ret = ovpn_nl_msg_send(dco, nl_msg, mcast_family_handler, dco, __func__); + ret = ovpn_nl_msg_send(dco, nl_msg, __func__); nla_put_failure: nlmsg_free(nl_msg); @@ -879,23 +877,26 @@ } static int -dco_parse_peer_multi(struct nl_msg *msg, void *arg) +ovpn_handle_peer_multi(dco_context_t *dco, struct nlattr *attrs[]) { - struct nlattr *tb[OVPN_A_MAX + 1]; - struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg)); - msg(D_DCO_DEBUG, "%s: parsing message...", __func__); - nla_parse(tb, OVPN_A_MAX, genlmsg_attrdata(gnlh, 0), - genlmsg_attrlen(gnlh, 0), NULL); + /* this function assumes openvpn is running in multipeer mode as + * it accesses c->multi + */ + if (dco->ifmode != OVPN_MODE_MP) + { + msg(M_WARN, "%s: can't parse 'multi-peer' message on P2P instance", __func__); + return NL_SKIP; + } - if (!tb[OVPN_A_PEER]) + if (!attrs[OVPN_A_PEER]) { return NL_SKIP; } struct nlattr *tb_peer[OVPN_A_PEER_MAX + 1]; - nla_parse_nested(tb_peer, OVPN_A_PEER_MAX, tb[OVPN_A_PEER], NULL); + nla_parse_nested(tb_peer, OVPN_A_PEER_MAX, attrs[OVPN_A_PEER], NULL); if (!tb_peer[OVPN_A_PEER_ID]) { @@ -903,7 +904,7 @@ return NL_SKIP; } - struct multi_context *m = arg; + struct multi_context *m = dco->c->multi; uint32_t peer_id = nla_get_u32(tb_peer[OVPN_A_PEER_ID]); if (peer_id >= m->max_clients || !m->instances[peer_id]) @@ -919,25 +920,18 @@ } static int -dco_parse_peer(struct nl_msg *msg, void *arg) +ovpn_handle_peer(dco_context_t *dco, struct nlattr *attrs[]) { - struct context *c = arg; - struct nlattr *tb[OVPN_A_MAX + 1]; - struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg)); - msg(D_DCO_DEBUG, "%s: parsing message...", __func__); - nla_parse(tb, OVPN_A_MAX, genlmsg_attrdata(gnlh, 0), - genlmsg_attrlen(gnlh, 0), NULL); - - if (!tb[OVPN_A_PEER]) + if (!attrs[OVPN_A_PEER]) { msg(D_DCO_DEBUG, "%s: malformed reply", __func__); return NL_SKIP; } struct nlattr *tb_peer[OVPN_A_PEER_MAX + 1]; - nla_parse_nested(tb_peer, OVPN_A_PEER_MAX, tb[OVPN_A_PEER], NULL); + nla_parse_nested(tb_peer, OVPN_A_PEER_MAX, attrs[OVPN_A_PEER], NULL); if (!tb_peer[OVPN_A_PEER_ID]) { @@ -946,18 +940,39 @@ } uint32_t peer_id = nla_get_u32(tb_peer[OVPN_A_PEER_ID]); - if (c->c2.tls_multi->dco_peer_id != peer_id) + struct context_2 *c2; + + if (dco->ifmode == OVPN_MODE_P2P) + { + c2 = &dco->c->c2; + } + else + { + struct multi_instance *mi = dco->c->multi->instances[peer_id]; + if (!mi) + { + msg(M_WARN, "%s: received data for a non-existing peer %u", __func__, peer_id); + return NL_SKIP; + } + + c2 = &mi->context.c2; + } + + /* at this point this check should never fail for MP mode, + * but it's still fully valid for P2P mode + */ + if (c2->tls_multi->dco_peer_id != peer_id) { return NL_SKIP; } - dco_update_peer_stat(&c->c2, tb_peer, peer_id); + dco_update_peer_stat(c2, tb_peer, peer_id); return NL_OK; } -static int -ovpn_handle_peer_del_ntf(dco_context_t *dco, struct nlattr *attrs[]) +static bool +ovpn_iface_check(dco_context_t *dco, struct nlattr *attrs[]) { /* we must know which interface this message is referring to in order to * avoid mixing messages for other instances @@ -965,6 +980,25 @@ if (!attrs[OVPN_A_IFINDEX]) { msg(D_DCO, "ovpn-dco: Received message without ifindex"); + return false; + } + + uint32_t ifindex = nla_get_u32(attrs[OVPN_A_IFINDEX]); + if (ifindex != dco->ifindex) + { + msg(D_DCO_DEBUG, + "ovpn-dco: ignoring message for foreign ifindex %d", ifindex); + return false; + } + + return true; +} + +static int +ovpn_handle_peer_del_ntf(dco_context_t *dco, struct nlattr *attrs[]) +{ + if (!ovpn_iface_check(dco, attrs)) + { return NL_STOP; } @@ -1008,12 +1042,8 @@ static int ovpn_handle_peer_float_ntf(dco_context_t *dco, struct nlattr *attrs[]) { - /* we must know which interface this message is referring to in order to - * avoid mixing messages for other instances - */ - if (!attrs[OVPN_A_IFINDEX]) + if (!ovpn_iface_check(dco, attrs)) { - msg(D_DCO, "ovpn-dco: Received message without ifindex"); return NL_STOP; } @@ -1058,12 +1088,8 @@ static int ovpn_handle_key_swap_ntf(dco_context_t *dco, struct nlattr *attrs[]) { - /* we must know which interface this message is referring to in order to - * avoid mixing messages for other instances - */ - if (!attrs[OVPN_A_IFINDEX]) + if (!ovpn_iface_check(dco, attrs)) { - msg(D_DCO, "ovpn-dco: Received message without ifindex"); return NL_STOP; } @@ -1109,9 +1135,22 @@ { dco_context_t *dco = arg; - struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg)); struct nlattr *attrs[OVPN_A_MAX + 1]; struct nlmsghdr *nlh = nlmsg_hdr(msg); + struct genlmsghdr *gnlh = genlmsg_hdr(nlh); + + msg(D_DCO_DEBUG, "ovpn-dco: received netlink message type=%u cmd=%u flags=%#.4x", + nlh->nlmsg_type, gnlh->cmd, nlh->nlmsg_flags); + + /* if we get a message from the NLCTRL family, it means + * this is the reply to the mcast ID resolution request + * and we parse it accordingly. + */ + if (nlh->nlmsg_type == dco->ctrlid) + { + msg(D_DCO_DEBUG, "ovpn-dco: received CTRLID message"); + return mcast_family_handler(msg, dco); + } if (!genlmsg_valid_hdr(nlh, 0)) { @@ -1126,15 +1165,6 @@ return NL_STOP; } - uint32_t ifindex = nla_get_u32(attrs[OVPN_A_IFINDEX]); - if (ifindex != dco->ifindex) - { - msg(D_DCO_DEBUG, - "ovpn-dco: ignoring message (type=%d) for foreign ifindex %d", - gnlh->cmd, ifindex); - return NL_STOP; - } - /* based on the message type, we parse the subobject contained in the * message, that stores the type-specific attributes. * @@ -1144,6 +1174,21 @@ */ switch (gnlh->cmd) { + case OVPN_CMD_PEER_GET: + { + /* this message is part of a peer list dump, hence triggered + * by a MP/server instance + */ + if (nlh->nlmsg_flags & NLM_F_MULTI) + { + return ovpn_handle_peer_multi(dco, attrs); + } + else + { + return ovpn_handle_peer(dco, attrs); + } + } + case OVPN_CMD_PEER_DEL_NTF: { return ovpn_handle_peer_del_ntf(dco, attrs); @@ -1172,7 +1217,6 @@ dco_do_read(dco_context_t *dco) { msg(D_DCO_DEBUG, __func__); - nl_cb_set(dco->nl_cb, NL_CB_VALID, NL_CB_CUSTOM, ovpn_handle_msg, dco); return ovpn_nl_recvmsgs(dco, __func__); } @@ -1187,7 +1231,7 @@ nlmsg_hdr(nl_msg)->nlmsg_flags |= NLM_F_DUMP; - int ret = ovpn_nl_msg_send(dco, nl_msg, dco_parse_peer_multi, m, __func__); + int ret = ovpn_nl_msg_send(dco, nl_msg, __func__); nlmsg_free(nl_msg); @@ -1225,7 +1269,7 @@ NLA_PUT_U32(nl_msg, OVPN_A_PEER_ID, peer_id); nla_nest_end(nl_msg, attr); - ret = ovpn_nl_msg_send(dco, nl_msg, dco_parse_peer, c, __func__); + ret = ovpn_nl_msg_send(dco, nl_msg, __func__); nla_put_failure: nlmsg_free(nl_msg); diff --git a/src/openvpn/dco_linux.h b/src/openvpn/dco_linux.h index 5e61cf1..cc14f45 100644 --- a/src/openvpn/dco_linux.h +++ b/src/openvpn/dco_linux.h @@ -66,6 +66,7 @@ int status; struct context *c; + int ctrlid; enum ovpn_mode ifmode; -- To view, visit http://gerrit.openvpn.net/c/openvpn/+/1100?usp=email To unsubscribe, or for help writing mail filters, visit http://gerrit.openvpn.net/settings Gerrit-Project: openvpn Gerrit-Branch: master Gerrit-Change-Id: I23ad79e14844aefde9ece34dadef0b75ff267201 Gerrit-Change-Number: 1100 Gerrit-PatchSet: 1 Gerrit-Owner: ordex <an...@ma...> Gerrit-Reviewer: flichtenheld <fr...@li...> Gerrit-Reviewer: plaisthos <arn...@rf...> Gerrit-CC: openvpn-devel <ope...@li...> Gerrit-Attention: plaisthos <arn...@rf...> Gerrit-Attention: flichtenheld <fr...@li...> Gerrit-MessageType: newchange |
From: ordex (C. Review) <ge...@op...> - 2025-07-23 10:00:00
|
Attention is currently required from: flichtenheld, plaisthos. Hello flichtenheld, plaisthos, I'd like you to reexamine a change. Please visit http://gerrit.openvpn.net/c/openvpn/+/1100?usp=email to look at the new patch set (#3). Change subject: dco_linux: fix async message reception ...................................................................... dco_linux: fix async message reception Currently whenever we send a PEER_GET request to ovpn, we also set the CB that is supposed to parse the reply. However, due to the async nature of netlink messages, we could get an unrelated notification, sent by ovpn upon some event, after having set the CB, but before parsing the awaited reply. When this happens, the notification is then parsed with the configured CB instead of the notification parser, thus effectively rejecting the notification and losing the event. To fix this inconsistency, make ovpn_handle_msg() the default and only netlink parser CB. It is configured upon DCO initialization and is never removed. ovpn_handle_msg() will check the message type and will call the according parser. This way, no matter what message we get at what time, but we'll always parse it correctly. As a bonus we can also simplify the nl_sendmsg() API as we don't need to pass the cb and its argument anymore. The ID of the NLCTRL family is now also stored in the DCO context as we need it to check when we receive a mcast ID lookup message. Change-Id: I23ad79e14844aefde9ece34dadef0b75ff267201 Github: OpenVPN/openvpn#793 Signed-off-by: Antonio Quartulli <an...@ma...> --- M src/openvpn/dco_linux.c M src/openvpn/dco_linux.h 2 files changed, 130 insertions(+), 85 deletions(-) git pull ssh://gerrit.openvpn.net:29418/openvpn refs/changes/00/1100/3 diff --git a/src/openvpn/dco_linux.c b/src/openvpn/dco_linux.c index 0a22879..62052f3 100644 --- a/src/openvpn/dco_linux.c +++ b/src/openvpn/dco_linux.c @@ -172,18 +172,14 @@ * The method will also free nl_msg * @param dco The dco context to use * @param nl_msg the message to use - * @param cb An optional callback if the caller expects an answer - * @param cb_arg An optional param to pass to the callback * @param prefix A prefix to report in the error message to give the user context * @return status of sending the message */ static int -ovpn_nl_msg_send(dco_context_t *dco, struct nl_msg *nl_msg, ovpn_nl_cb cb, - void *cb_arg, const char *prefix) +ovpn_nl_msg_send(dco_context_t *dco, struct nl_msg *nl_msg, const char *prefix) { dco->status = 1; - nl_cb_set(dco->nl_cb, NL_CB_VALID, NL_CB_CUSTOM, cb, cb_arg); nl_send_auto(dco->nl_sock, nl_msg); while (dco->status == 1) @@ -285,7 +281,7 @@ } nla_nest_end(nl_msg, attr); - ret = ovpn_nl_msg_send(dco, nl_msg, NULL, NULL, __func__); + ret = ovpn_nl_msg_send(dco, nl_msg, __func__); nla_put_failure: nlmsg_free(nl_msg); @@ -385,6 +381,29 @@ } static void +ovpn_dco_register(dco_context_t *dco) +{ + msg(D_DCO_DEBUG, __func__); + ovpn_get_mcast_id(dco); + + if (dco->ovpn_dco_mcast_id < 0) + { + msg(M_FATAL, "cannot get mcast group: %s", nl_geterror(dco->ovpn_dco_mcast_id)); + } + + /* Register for ovpn-dco specific multicast messages that the kernel may + * send + */ + int ret = nl_socket_add_membership(dco->nl_sock, dco->ovpn_dco_mcast_id); + if (ret) + { + msg(M_FATAL, "%s: failed to join groups: %d", __func__, ret); + } +} + +static int ovpn_handle_msg(struct nl_msg *msg, void *arg); + +static void ovpn_dco_init_netlink(dco_context_t *dco) { dco->ovpn_dco_id = resolve_ovpn_netlink_id(M_FATAL); @@ -420,11 +439,15 @@ nl_socket_set_cb(dco->nl_sock, dco->nl_cb); + dco->dco_message_peer_id = -1; nl_cb_err(dco->nl_cb, NL_CB_CUSTOM, ovpn_nl_cb_error, &dco->status); nl_cb_set(dco->nl_cb, NL_CB_FINISH, NL_CB_CUSTOM, ovpn_nl_cb_finish, &dco->status); nl_cb_set(dco->nl_cb, NL_CB_ACK, NL_CB_CUSTOM, ovpn_nl_cb_finish, &dco->status); + nl_cb_set(dco->nl_cb, NL_CB_VALID, NL_CB_CUSTOM, ovpn_handle_msg, dco); + + ovpn_dco_register(dco); /* The async PACKET messages confuse libnl and it will drop them with * wrong sequence numbers (NLE_SEQ_MISMATCH), so disable libnl's sequence @@ -476,27 +499,6 @@ CLEAR(dco); } -static void -ovpn_dco_register(dco_context_t *dco) -{ - msg(D_DCO_DEBUG, __func__); - ovpn_get_mcast_id(dco); - - if (dco->ovpn_dco_mcast_id < 0) - { - msg(M_FATAL, "cannot get mcast group: %s", nl_geterror(dco->ovpn_dco_mcast_id)); - } - - /* Register for ovpn-dco specific multicast messages that the kernel may - * send - */ - int ret = nl_socket_add_membership(dco->nl_sock, dco->ovpn_dco_mcast_id); - if (ret) - { - msg(M_FATAL, "%s: failed to join groups: %d", __func__, ret); - } -} - int open_tun_dco(struct tuntap *tt, openvpn_net_ctx_t *ctx, const char *dev) { @@ -516,10 +518,6 @@ msg(M_FATAL, "DCO: cannot retrieve ifindex for interface %s", dev); } - tt->dco.dco_message_peer_id = -1; - - ovpn_dco_register(&tt->dco); - return 0; } @@ -548,7 +546,7 @@ NLA_PUT_U32(nl_msg, OVPN_A_KEYCONF_PEER_ID, peerid); nla_nest_end(nl_msg, attr); - ret = ovpn_nl_msg_send(dco, nl_msg, NULL, NULL, __func__); + ret = ovpn_nl_msg_send(dco, nl_msg, __func__); nla_put_failure: nlmsg_free(nl_msg); @@ -572,7 +570,7 @@ NLA_PUT_U32(nl_msg, OVPN_A_PEER_ID, peerid); nla_nest_end(nl_msg, attr); - ret = ovpn_nl_msg_send(dco, nl_msg, NULL, NULL, __func__); + ret = ovpn_nl_msg_send(dco, nl_msg, __func__); nla_put_failure: nlmsg_free(nl_msg); @@ -598,7 +596,7 @@ NLA_PUT_U32(nl_msg, OVPN_A_KEYCONF_SLOT, slot); nla_nest_end(nl_msg, keyconf); - ret = ovpn_nl_msg_send(dco, nl_msg, NULL, NULL, __func__); + ret = ovpn_nl_msg_send(dco, nl_msg, __func__); nla_put_failure: nlmsg_free(nl_msg); @@ -657,7 +655,7 @@ nla_nest_end(nl_msg, key_conf); - ret = ovpn_nl_msg_send(dco, nl_msg, NULL, NULL, __func__); + ret = ovpn_nl_msg_send(dco, nl_msg, __func__); nla_put_failure: nlmsg_free(nl_msg); @@ -686,7 +684,7 @@ keepalive_timeout); nla_nest_end(nl_msg, attr); - ret = ovpn_nl_msg_send(dco, nl_msg, NULL, NULL, __func__); + ret = ovpn_nl_msg_send(dco, nl_msg, __func__); nla_put_failure: nlmsg_free(nl_msg); @@ -754,7 +752,7 @@ /* Even though 'nlctrl' is a constant, there seem to be no library * provided define for it */ - int ctrlid = genl_ctrl_resolve(dco->nl_sock, "nlctrl"); + dco->ctrlid = genl_ctrl_resolve(dco->nl_sock, "nlctrl"); struct nl_msg *nl_msg = nlmsg_alloc(); if (!nl_msg) @@ -762,12 +760,12 @@ return -ENOMEM; } - genlmsg_put(nl_msg, 0, 0, ctrlid, 0, 0, CTRL_CMD_GETFAMILY, 0); + genlmsg_put(nl_msg, 0, 0, dco->ctrlid, 0, 0, CTRL_CMD_GETFAMILY, 0); int ret = -EMSGSIZE; NLA_PUT_STRING(nl_msg, CTRL_ATTR_FAMILY_NAME, OVPN_FAMILY_NAME); - ret = ovpn_nl_msg_send(dco, nl_msg, mcast_family_handler, dco, __func__); + ret = ovpn_nl_msg_send(dco, nl_msg, __func__); nla_put_failure: nlmsg_free(nl_msg); @@ -879,23 +877,26 @@ } static int -dco_parse_peer_multi(struct nl_msg *msg, void *arg) +ovpn_handle_peer_multi(dco_context_t *dco, struct nlattr *attrs[]) { - struct nlattr *tb[OVPN_A_MAX + 1]; - struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg)); - msg(D_DCO_DEBUG, "%s: parsing message...", __func__); - nla_parse(tb, OVPN_A_MAX, genlmsg_attrdata(gnlh, 0), - genlmsg_attrlen(gnlh, 0), NULL); + /* this function assumes openvpn is running in multipeer mode as + * it accesses c->multi + */ + if (dco->ifmode != OVPN_MODE_MP) + { + msg(M_WARN, "%s: can't parse 'multi-peer' message on P2P instance", __func__); + return NL_SKIP; + } - if (!tb[OVPN_A_PEER]) + if (!attrs[OVPN_A_PEER]) { return NL_SKIP; } struct nlattr *tb_peer[OVPN_A_PEER_MAX + 1]; - nla_parse_nested(tb_peer, OVPN_A_PEER_MAX, tb[OVPN_A_PEER], NULL); + nla_parse_nested(tb_peer, OVPN_A_PEER_MAX, attrs[OVPN_A_PEER], NULL); if (!tb_peer[OVPN_A_PEER_ID]) { @@ -903,7 +904,7 @@ return NL_SKIP; } - struct multi_context *m = arg; + struct multi_context *m = dco->c->multi; uint32_t peer_id = nla_get_u32(tb_peer[OVPN_A_PEER_ID]); if (peer_id >= m->max_clients || !m->instances[peer_id]) @@ -919,25 +920,18 @@ } static int -dco_parse_peer(struct nl_msg *msg, void *arg) +ovpn_handle_peer(dco_context_t *dco, struct nlattr *attrs[]) { - struct context *c = arg; - struct nlattr *tb[OVPN_A_MAX + 1]; - struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg)); - msg(D_DCO_DEBUG, "%s: parsing message...", __func__); - nla_parse(tb, OVPN_A_MAX, genlmsg_attrdata(gnlh, 0), - genlmsg_attrlen(gnlh, 0), NULL); - - if (!tb[OVPN_A_PEER]) + if (!attrs[OVPN_A_PEER]) { msg(D_DCO_DEBUG, "%s: malformed reply", __func__); return NL_SKIP; } struct nlattr *tb_peer[OVPN_A_PEER_MAX + 1]; - nla_parse_nested(tb_peer, OVPN_A_PEER_MAX, tb[OVPN_A_PEER], NULL); + nla_parse_nested(tb_peer, OVPN_A_PEER_MAX, attrs[OVPN_A_PEER], NULL); if (!tb_peer[OVPN_A_PEER_ID]) { @@ -946,18 +940,39 @@ } uint32_t peer_id = nla_get_u32(tb_peer[OVPN_A_PEER_ID]); - if (c->c2.tls_multi->dco_peer_id != peer_id) + struct context_2 *c2; + + if (dco->ifmode == OVPN_MODE_P2P) + { + c2 = &dco->c->c2; + } + else + { + struct multi_instance *mi = dco->c->multi->instances[peer_id]; + if (!mi) + { + msg(M_WARN, "%s: received data for a non-existing peer %u", __func__, peer_id); + return NL_SKIP; + } + + c2 = &mi->context.c2; + } + + /* at this point this check should never fail for MP mode, + * but it's still fully valid for P2P mode + */ + if (c2->tls_multi->dco_peer_id != peer_id) { return NL_SKIP; } - dco_update_peer_stat(&c->c2, tb_peer, peer_id); + dco_update_peer_stat(c2, tb_peer, peer_id); return NL_OK; } -static int -ovpn_handle_peer_del_ntf(dco_context_t *dco, struct nlattr *attrs[]) +static bool +ovpn_iface_check(dco_context_t *dco, struct nlattr *attrs[]) { /* we must know which interface this message is referring to in order to * avoid mixing messages for other instances @@ -965,6 +980,25 @@ if (!attrs[OVPN_A_IFINDEX]) { msg(D_DCO, "ovpn-dco: Received message without ifindex"); + return false; + } + + uint32_t ifindex = nla_get_u32(attrs[OVPN_A_IFINDEX]); + if (ifindex != dco->ifindex) + { + msg(D_DCO_DEBUG, + "ovpn-dco: ignoring message for foreign ifindex %d", ifindex); + return false; + } + + return true; +} + +static int +ovpn_handle_peer_del_ntf(dco_context_t *dco, struct nlattr *attrs[]) +{ + if (!ovpn_iface_check(dco, attrs)) + { return NL_STOP; } @@ -1008,12 +1042,8 @@ static int ovpn_handle_peer_float_ntf(dco_context_t *dco, struct nlattr *attrs[]) { - /* we must know which interface this message is referring to in order to - * avoid mixing messages for other instances - */ - if (!attrs[OVPN_A_IFINDEX]) + if (!ovpn_iface_check(dco, attrs)) { - msg(D_DCO, "ovpn-dco: Received message without ifindex"); return NL_STOP; } @@ -1058,12 +1088,8 @@ static int ovpn_handle_key_swap_ntf(dco_context_t *dco, struct nlattr *attrs[]) { - /* we must know which interface this message is referring to in order to - * avoid mixing messages for other instances - */ - if (!attrs[OVPN_A_IFINDEX]) + if (!ovpn_iface_check(dco, attrs)) { - msg(D_DCO, "ovpn-dco: Received message without ifindex"); return NL_STOP; } @@ -1109,9 +1135,22 @@ { dco_context_t *dco = arg; - struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg)); struct nlattr *attrs[OVPN_A_MAX + 1]; struct nlmsghdr *nlh = nlmsg_hdr(msg); + struct genlmsghdr *gnlh = genlmsg_hdr(nlh); + + msg(D_DCO_DEBUG, "ovpn-dco: received netlink message type=%u cmd=%u flags=%#.4x", + nlh->nlmsg_type, gnlh->cmd, nlh->nlmsg_flags); + + /* if we get a message from the NLCTRL family, it means + * this is the reply to the mcast ID resolution request + * and we parse it accordingly. + */ + if (nlh->nlmsg_type == dco->ctrlid) + { + msg(D_DCO_DEBUG, "ovpn-dco: received CTRLID message"); + return mcast_family_handler(msg, dco); + } if (!genlmsg_valid_hdr(nlh, 0)) { @@ -1126,15 +1165,6 @@ return NL_STOP; } - uint32_t ifindex = nla_get_u32(attrs[OVPN_A_IFINDEX]); - if (ifindex != dco->ifindex) - { - msg(D_DCO_DEBUG, - "ovpn-dco: ignoring message (type=%d) for foreign ifindex %d", - gnlh->cmd, ifindex); - return NL_STOP; - } - /* based on the message type, we parse the subobject contained in the * message, that stores the type-specific attributes. * @@ -1144,6 +1174,21 @@ */ switch (gnlh->cmd) { + case OVPN_CMD_PEER_GET: + { + /* this message is part of a peer list dump, hence triggered + * by a MP/server instance + */ + if (nlh->nlmsg_flags & NLM_F_MULTI) + { + return ovpn_handle_peer_multi(dco, attrs); + } + else + { + return ovpn_handle_peer(dco, attrs); + } + } + case OVPN_CMD_PEER_DEL_NTF: { return ovpn_handle_peer_del_ntf(dco, attrs); @@ -1172,7 +1217,6 @@ dco_do_read(dco_context_t *dco) { msg(D_DCO_DEBUG, __func__); - nl_cb_set(dco->nl_cb, NL_CB_VALID, NL_CB_CUSTOM, ovpn_handle_msg, dco); return ovpn_nl_recvmsgs(dco, __func__); } @@ -1187,7 +1231,7 @@ nlmsg_hdr(nl_msg)->nlmsg_flags |= NLM_F_DUMP; - int ret = ovpn_nl_msg_send(dco, nl_msg, dco_parse_peer_multi, m, __func__); + int ret = ovpn_nl_msg_send(dco, nl_msg, __func__); nlmsg_free(nl_msg); @@ -1225,7 +1269,7 @@ NLA_PUT_U32(nl_msg, OVPN_A_PEER_ID, peer_id); nla_nest_end(nl_msg, attr); - ret = ovpn_nl_msg_send(dco, nl_msg, dco_parse_peer, c, __func__); + ret = ovpn_nl_msg_send(dco, nl_msg, __func__); nla_put_failure: nlmsg_free(nl_msg); diff --git a/src/openvpn/dco_linux.h b/src/openvpn/dco_linux.h index 5e61cf1..cc14f45 100644 --- a/src/openvpn/dco_linux.h +++ b/src/openvpn/dco_linux.h @@ -66,6 +66,7 @@ int status; struct context *c; + int ctrlid; enum ovpn_mode ifmode; -- To view, visit http://gerrit.openvpn.net/c/openvpn/+/1100?usp=email To unsubscribe, or for help writing mail filters, visit http://gerrit.openvpn.net/settings Gerrit-Project: openvpn Gerrit-Branch: master Gerrit-Change-Id: I23ad79e14844aefde9ece34dadef0b75ff267201 Gerrit-Change-Number: 1100 Gerrit-PatchSet: 3 Gerrit-Owner: ordex <an...@ma...> Gerrit-Reviewer: flichtenheld <fr...@li...> Gerrit-Reviewer: plaisthos <arn...@rf...> Gerrit-CC: openvpn-devel <ope...@li...> Gerrit-Attention: plaisthos <arn...@rf...> Gerrit-Attention: flichtenheld <fr...@li...> Gerrit-MessageType: newpatchset |
From: cron2 (C. Review) <ge...@op...> - 2025-07-23 18:02:28
|
Attention is currently required from: flichtenheld, ordex, plaisthos. cron2 has posted comments on this change. ( http://gerrit.openvpn.net/c/openvpn/+/1100?usp=email ) Change subject: dco_linux: fix async message reception ...................................................................... Patch Set 4: (1 comment) Patchset: PS4: So it has passed the t_server testbed, and also a bit of stare-at-code. I do want to do more tests - especially float with interesting timings, multiple instances with individual counters (crosstalk...?), peers timing out or being killed from userspace, and so on. -- To view, visit http://gerrit.openvpn.net/c/openvpn/+/1100?usp=email To unsubscribe, or for help writing mail filters, visit http://gerrit.openvpn.net/settings Gerrit-Project: openvpn Gerrit-Branch: master Gerrit-Change-Id: I23ad79e14844aefde9ece34dadef0b75ff267201 Gerrit-Change-Number: 1100 Gerrit-PatchSet: 4 Gerrit-Owner: ordex <an...@ma...> Gerrit-Reviewer: flichtenheld <fr...@li...> Gerrit-Reviewer: plaisthos <arn...@rf...> Gerrit-CC: cron2 <ge...@gr...> Gerrit-CC: openvpn-devel <ope...@li...> Gerrit-Attention: plaisthos <arn...@rf...> Gerrit-Attention: flichtenheld <fr...@li...> Gerrit-Attention: ordex <an...@ma...> Gerrit-Comment-Date: Wed, 23 Jul 2025 18:02:13 +0000 Gerrit-HasComments: Yes Gerrit-Has-Labels: No Gerrit-MessageType: comment |
From: cron2 (C. Review) <ge...@op...> - 2025-07-23 18:13:49
|
Attention is currently required from: flichtenheld, ordex, plaisthos. cron2 has posted comments on this change. ( http://gerrit.openvpn.net/c/openvpn/+/1100?usp=email ) Change subject: dco_linux: fix async message reception ...................................................................... Patch Set 4: Code-Review-1 (7 comments) Patchset: PS4: more polish needed File src/openvpn/dco_linux.c: http://gerrit.openvpn.net/c/openvpn/+/1100/comment/24854dbd_add7b22c : PS4, Line 170: * Send a prepared netlink message and registers cb as callback if non-null. this comment needs changing, though... "and registers cb" http://gerrit.openvpn.net/c/openvpn/+/1100/comment/6281b84e_49f7ecd7 : PS4, Line 903: msg(M_WARN, "%s: no peer-id provided in reply", __func__); maybe change the "reply" to "whatever this message type is", similar to the other "no peer-id provided" things? http://gerrit.openvpn.net/c/openvpn/+/1100/comment/94873fa3_d40d8bb4 : PS4, Line 938: msg(M_WARN, "%s: no peer-id provided in reply", __func__); as well http://gerrit.openvpn.net/c/openvpn/+/1100/comment/41388847_81aa443f : PS4, Line 951: struct multi_instance *mi = dco->c->multi->instances[peer_id]; how can we ever end here? http://gerrit.openvpn.net/c/openvpn/+/1100/comment/5c6b04ae_b5f56bc3 : PS4, Line 1182: if (nlh->nlmsg_flags & NLM_F_MULTI) I'm not sure I understand this. p2mp peer messages are sent multicast, and p2p messages not? Or what does `NLM_F_MULTI` signal? http://gerrit.openvpn.net/c/openvpn/+/1100/comment/a876ff1f_882629d9 : PS4, Line 1189: } I do wonder if this `if()` shouldn't just look at p->mode? Since `multi` does not want to deal with MODE_P2P, and _peer() now has extra code to deal with !MODE_P"P, just check beforehand? -- To view, visit http://gerrit.openvpn.net/c/openvpn/+/1100?usp=email To unsubscribe, or for help writing mail filters, visit http://gerrit.openvpn.net/settings Gerrit-Project: openvpn Gerrit-Branch: master Gerrit-Change-Id: I23ad79e14844aefde9ece34dadef0b75ff267201 Gerrit-Change-Number: 1100 Gerrit-PatchSet: 4 Gerrit-Owner: ordex <an...@ma...> Gerrit-Reviewer: cron2 <ge...@gr...> Gerrit-Reviewer: flichtenheld <fr...@li...> Gerrit-Reviewer: plaisthos <arn...@rf...> Gerrit-CC: openvpn-devel <ope...@li...> Gerrit-Attention: plaisthos <arn...@rf...> Gerrit-Attention: flichtenheld <fr...@li...> Gerrit-Attention: ordex <an...@ma...> Gerrit-Comment-Date: Wed, 23 Jul 2025 18:13:35 +0000 Gerrit-HasComments: Yes Gerrit-Has-Labels: Yes Gerrit-MessageType: comment |
From: ordex (C. Review) <ge...@op...> - 2025-07-23 21:55:19
|
Attention is currently required from: cron2, flichtenheld, plaisthos. ordex has posted comments on this change. ( http://gerrit.openvpn.net/c/openvpn/+/1100?usp=email ) Change subject: dco_linux: fix async message reception ...................................................................... Patch Set 4: (6 comments) File src/openvpn/dco_linux.c: http://gerrit.openvpn.net/c/openvpn/+/1100/comment/b17821a3_854b55f6 : PS4, Line 170: * Send a prepared netlink message and registers cb as callback if non-null. > this comment needs changing, though... […] Acknowledged http://gerrit.openvpn.net/c/openvpn/+/1100/comment/6d73e782_65951dfb : PS4, Line 903: msg(M_WARN, "%s: no peer-id provided in reply", __func__); > maybe change the "reply" to "whatever this message type is", similar to the other "no peer-id provid […] the difference is that here we print the function name, so the context is obvious. In the other messages we only have the "ovpn-dco" prefix (because that code was all embedded in ovpn_handle_msg() before this patch). I think it makes sense to follow your suggestion, but I will also switch to the "ovpn-dco:" prefix http://gerrit.openvpn.net/c/openvpn/+/1100/comment/69bbc8ae_718dd631 : PS4, Line 938: msg(M_WARN, "%s: no peer-id provided in reply", __func__); > as well Acknowledged http://gerrit.openvpn.net/c/openvpn/+/1100/comment/c0bbb1b8_731eaf6c : PS4, Line 951: struct multi_instance *mi = dco->c->multi->instances[peer_id]; > how can we ever end here? check_timeout() for --inactive. In that code path we sadly check each peer one by one and thus we call PEER_GET on each specific peer one by one. http://gerrit.openvpn.net/c/openvpn/+/1100/comment/25f2c886_37016329 : PS4, Line 1182: if (nlh->nlmsg_flags & NLM_F_MULTI) > I'm not sure I understand this. […] hehe! Check: https://elixir.bootlin.com/linux/v6.16-rc6/source/include/uapi/linux/netlink.h#L63 This flag means this message is part of a set of messages, together forming the actual reply. We get this kind of reply when we issue a PEER_GET with no specific peer-id. Which is what the P2MP code does to fetch the stats. P2P will never call this because it always query its one specific peer. http://gerrit.openvpn.net/c/openvpn/+/1100/comment/131162e2_128a7d4a : PS4, Line 1189: } > I do wonder if this `if()` shouldn't just look at p->mode? Since `multi` does not want to deal with […] as mentioned above, MODE_MP may still need to call ovpn_handle_peer() -- To view, visit http://gerrit.openvpn.net/c/openvpn/+/1100?usp=email To unsubscribe, or for help writing mail filters, visit http://gerrit.openvpn.net/settings Gerrit-Project: openvpn Gerrit-Branch: master Gerrit-Change-Id: I23ad79e14844aefde9ece34dadef0b75ff267201 Gerrit-Change-Number: 1100 Gerrit-PatchSet: 4 Gerrit-Owner: ordex <an...@ma...> Gerrit-Reviewer: cron2 <ge...@gr...> Gerrit-Reviewer: flichtenheld <fr...@li...> Gerrit-Reviewer: plaisthos <arn...@rf...> Gerrit-CC: openvpn-devel <ope...@li...> Gerrit-Attention: plaisthos <arn...@rf...> Gerrit-Attention: cron2 <ge...@gr...> Gerrit-Attention: flichtenheld <fr...@li...> Gerrit-Comment-Date: Wed, 23 Jul 2025 21:55:04 +0000 Gerrit-HasComments: Yes Gerrit-Has-Labels: No Comment-In-Reply-To: cron2 <ge...@gr...> Gerrit-MessageType: comment |
From: ordex (C. Review) <ge...@op...> - 2025-07-23 22:00:28
|
Attention is currently required from: cron2, flichtenheld, plaisthos. Hello cron2, flichtenheld, plaisthos, I'd like you to reexamine a change. Please visit http://gerrit.openvpn.net/c/openvpn/+/1100?usp=email to look at the new patch set (#5). The following approvals got outdated and were removed: Code-Review-1 by cron2 Change subject: dco_linux: fix async message reception ...................................................................... dco_linux: fix async message reception Currently whenever we send a PEER_GET request to ovpn, we also set the CB that is supposed to parse the reply. However, due to the async nature of netlink messages, we could get an unrelated notification, sent by ovpn upon some event, after having set the CB, but before parsing the awaited reply. When this happens, the notification is then parsed with the configured CB instead of the notification parser, thus effectively rejecting the notification and losing the event. To fix this inconsistency, make ovpn_handle_msg() the default and only netlink parser CB. It is configured upon DCO initialization and is never removed. ovpn_handle_msg() will check the message type and will call the according parser. This way, no matter what message we get at what time, but we'll always parse it correctly. As a bonus we can also simplify the nl_sendmsg() API as we don't need to pass the cb and its argument anymore. The ID of the NLCTRL family is now also stored in the DCO context as we need it to check when we receive a mcast ID lookup message. Change-Id: I23ad79e14844aefde9ece34dadef0b75ff267201 Github: OpenVPN/openvpn#793 Signed-off-by: Antonio Quartulli <an...@ma...> --- M src/openvpn/dco_linux.c M src/openvpn/dco_linux.h 2 files changed, 110 insertions(+), 67 deletions(-) git pull ssh://gerrit.openvpn.net:29418/openvpn refs/changes/00/1100/5 diff --git a/src/openvpn/dco_linux.c b/src/openvpn/dco_linux.c index 7c639d9..728fb7e 100644 --- a/src/openvpn/dco_linux.c +++ b/src/openvpn/dco_linux.c @@ -167,23 +167,19 @@ } /** - * Send a prepared netlink message and registers cb as callback if non-null. + * Send a prepared netlink message. * * The method will also free nl_msg * @param dco The dco context to use * @param nl_msg the message to use - * @param cb An optional callback if the caller expects an answer - * @param cb_arg An optional param to pass to the callback * @param prefix A prefix to report in the error message to give the user context * @return status of sending the message */ static int -ovpn_nl_msg_send(dco_context_t *dco, struct nl_msg *nl_msg, ovpn_nl_cb cb, - void *cb_arg, const char *prefix) +ovpn_nl_msg_send(dco_context_t *dco, struct nl_msg *nl_msg, const char *prefix) { dco->status = 1; - nl_cb_set(dco->nl_cb, NL_CB_VALID, NL_CB_CUSTOM, cb, cb_arg); nl_send_auto(dco->nl_sock, nl_msg); while (dco->status == 1) @@ -285,7 +281,7 @@ } nla_nest_end(nl_msg, attr); - ret = ovpn_nl_msg_send(dco, nl_msg, NULL, NULL, __func__); + ret = ovpn_nl_msg_send(dco, nl_msg, __func__); nla_put_failure: nlmsg_free(nl_msg); @@ -385,6 +381,29 @@ } static void +ovpn_dco_register(dco_context_t *dco) +{ + msg(D_DCO_DEBUG, __func__); + ovpn_get_mcast_id(dco); + + if (dco->ovpn_dco_mcast_id < 0) + { + msg(M_FATAL, "cannot get mcast group: %s", nl_geterror(dco->ovpn_dco_mcast_id)); + } + + /* Register for ovpn-dco specific multicast messages that the kernel may + * send + */ + int ret = nl_socket_add_membership(dco->nl_sock, dco->ovpn_dco_mcast_id); + if (ret) + { + msg(M_FATAL, "%s: failed to join groups: %d", __func__, ret); + } +} + +static int ovpn_handle_msg(struct nl_msg *msg, void *arg); + +static void ovpn_dco_init_netlink(dco_context_t *dco) { dco->ovpn_dco_id = resolve_ovpn_netlink_id(M_FATAL); @@ -420,11 +439,15 @@ nl_socket_set_cb(dco->nl_sock, dco->nl_cb); + dco->dco_message_peer_id = -1; nl_cb_err(dco->nl_cb, NL_CB_CUSTOM, ovpn_nl_cb_error, &dco->status); nl_cb_set(dco->nl_cb, NL_CB_FINISH, NL_CB_CUSTOM, ovpn_nl_cb_finish, &dco->status); nl_cb_set(dco->nl_cb, NL_CB_ACK, NL_CB_CUSTOM, ovpn_nl_cb_finish, &dco->status); + nl_cb_set(dco->nl_cb, NL_CB_VALID, NL_CB_CUSTOM, ovpn_handle_msg, dco); + + ovpn_dco_register(dco); /* The async PACKET messages confuse libnl and it will drop them with * wrong sequence numbers (NLE_SEQ_MISMATCH), so disable libnl's sequence @@ -476,27 +499,6 @@ CLEAR(dco); } -static void -ovpn_dco_register(dco_context_t *dco) -{ - msg(D_DCO_DEBUG, __func__); - ovpn_get_mcast_id(dco); - - if (dco->ovpn_dco_mcast_id < 0) - { - msg(M_FATAL, "cannot get mcast group: %s", nl_geterror(dco->ovpn_dco_mcast_id)); - } - - /* Register for ovpn-dco specific multicast messages that the kernel may - * send - */ - int ret = nl_socket_add_membership(dco->nl_sock, dco->ovpn_dco_mcast_id); - if (ret) - { - msg(M_FATAL, "%s: failed to join groups: %d", __func__, ret); - } -} - int open_tun_dco(struct tuntap *tt, openvpn_net_ctx_t *ctx, const char *dev) { @@ -516,10 +518,6 @@ msg(M_FATAL, "DCO: cannot retrieve ifindex for interface %s", dev); } - tt->dco.dco_message_peer_id = -1; - - ovpn_dco_register(&tt->dco); - return 0; } @@ -548,7 +546,7 @@ NLA_PUT_U32(nl_msg, OVPN_A_KEYCONF_PEER_ID, peerid); nla_nest_end(nl_msg, attr); - ret = ovpn_nl_msg_send(dco, nl_msg, NULL, NULL, __func__); + ret = ovpn_nl_msg_send(dco, nl_msg, __func__); nla_put_failure: nlmsg_free(nl_msg); @@ -572,7 +570,7 @@ NLA_PUT_U32(nl_msg, OVPN_A_PEER_ID, peerid); nla_nest_end(nl_msg, attr); - ret = ovpn_nl_msg_send(dco, nl_msg, NULL, NULL, __func__); + ret = ovpn_nl_msg_send(dco, nl_msg, __func__); nla_put_failure: nlmsg_free(nl_msg); @@ -598,7 +596,7 @@ NLA_PUT_U32(nl_msg, OVPN_A_KEYCONF_SLOT, slot); nla_nest_end(nl_msg, keyconf); - ret = ovpn_nl_msg_send(dco, nl_msg, NULL, NULL, __func__); + ret = ovpn_nl_msg_send(dco, nl_msg, __func__); nla_put_failure: nlmsg_free(nl_msg); @@ -657,7 +655,7 @@ nla_nest_end(nl_msg, key_conf); - ret = ovpn_nl_msg_send(dco, nl_msg, NULL, NULL, __func__); + ret = ovpn_nl_msg_send(dco, nl_msg, __func__); nla_put_failure: nlmsg_free(nl_msg); @@ -686,7 +684,7 @@ keepalive_timeout); nla_nest_end(nl_msg, attr); - ret = ovpn_nl_msg_send(dco, nl_msg, NULL, NULL, __func__); + ret = ovpn_nl_msg_send(dco, nl_msg, __func__); nla_put_failure: nlmsg_free(nl_msg); @@ -754,7 +752,7 @@ /* Even though 'nlctrl' is a constant, there seem to be no library * provided define for it */ - int ctrlid = genl_ctrl_resolve(dco->nl_sock, "nlctrl"); + dco->ctrlid = genl_ctrl_resolve(dco->nl_sock, "nlctrl"); struct nl_msg *nl_msg = nlmsg_alloc(); if (!nl_msg) @@ -762,12 +760,12 @@ return -ENOMEM; } - genlmsg_put(nl_msg, 0, 0, ctrlid, 0, 0, CTRL_CMD_GETFAMILY, 0); + genlmsg_put(nl_msg, 0, 0, dco->ctrlid, 0, 0, CTRL_CMD_GETFAMILY, 0); int ret = -EMSGSIZE; NLA_PUT_STRING(nl_msg, CTRL_ATTR_FAMILY_NAME, OVPN_FAMILY_NAME); - ret = ovpn_nl_msg_send(dco, nl_msg, mcast_family_handler, dco, __func__); + ret = ovpn_nl_msg_send(dco, nl_msg, __func__); nla_put_failure: nlmsg_free(nl_msg); @@ -879,31 +877,34 @@ } static int -dco_parse_peer_multi(struct nl_msg *msg, void *arg) +ovpn_handle_peer_multi(dco_context_t *dco, struct nlattr *attrs[]) { - struct nlattr *tb[OVPN_A_MAX + 1]; - struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg)); - msg(D_DCO_DEBUG, "%s: parsing message...", __func__); - nla_parse(tb, OVPN_A_MAX, genlmsg_attrdata(gnlh, 0), - genlmsg_attrlen(gnlh, 0), NULL); + /* this function assumes openvpn is running in multipeer mode as + * it accesses c->multi + */ + if (dco->ifmode != OVPN_MODE_MP) + { + msg(M_WARN, "%s: can't parse 'multi-peer' message on P2P instance", __func__); + return NL_SKIP; + } - if (!tb[OVPN_A_PEER]) + if (!attrs[OVPN_A_PEER]) { return NL_SKIP; } struct nlattr *tb_peer[OVPN_A_PEER_MAX + 1]; - nla_parse_nested(tb_peer, OVPN_A_PEER_MAX, tb[OVPN_A_PEER], NULL); + nla_parse_nested(tb_peer, OVPN_A_PEER_MAX, attrs[OVPN_A_PEER], NULL); if (!tb_peer[OVPN_A_PEER_ID]) { - msg(M_WARN, "%s: no peer-id provided in reply", __func__); + msg(M_WARN, "ovpn-dco: no peer-id provided in (MULTI) PEER_GET reply"); return NL_SKIP; } - struct multi_context *m = arg; + struct multi_context *m = dco->c->multi; uint32_t peer_id = nla_get_u32(tb_peer[OVPN_A_PEER_ID]); if (peer_id >= m->max_clients || !m->instances[peer_id]) @@ -919,39 +920,53 @@ } static int -dco_parse_peer(struct nl_msg *msg, void *arg) +ovpn_handle_peer(dco_context_t *dco, struct nlattr *attrs[]) { - struct context *c = arg; - struct nlattr *tb[OVPN_A_MAX + 1]; - struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg)); - msg(D_DCO_DEBUG, "%s: parsing message...", __func__); - nla_parse(tb, OVPN_A_MAX, genlmsg_attrdata(gnlh, 0), - genlmsg_attrlen(gnlh, 0), NULL); - - if (!tb[OVPN_A_PEER]) + if (!attrs[OVPN_A_PEER]) { msg(D_DCO_DEBUG, "%s: malformed reply", __func__); return NL_SKIP; } struct nlattr *tb_peer[OVPN_A_PEER_MAX + 1]; - nla_parse_nested(tb_peer, OVPN_A_PEER_MAX, tb[OVPN_A_PEER], NULL); + nla_parse_nested(tb_peer, OVPN_A_PEER_MAX, attrs[OVPN_A_PEER], NULL); if (!tb_peer[OVPN_A_PEER_ID]) { - msg(M_WARN, "%s: no peer-id provided in reply", __func__); + msg(M_WARN, "ovpn-dco: no peer-id provided in PEER_GET reply"); return NL_SKIP; } uint32_t peer_id = nla_get_u32(tb_peer[OVPN_A_PEER_ID]); - if (c->c2.tls_multi->dco_peer_id != peer_id) + struct context_2 *c2; + + if (dco->ifmode == OVPN_MODE_P2P) + { + c2 = &dco->c->c2; + } + else + { + struct multi_instance *mi = dco->c->multi->instances[peer_id]; + if (!mi) + { + msg(M_WARN, "%s: received data for a non-existing peer %u", __func__, peer_id); + return NL_SKIP; + } + + c2 = &mi->context.c2; + } + + /* at this point this check should never fail for MP mode, + * but it's still fully valid for P2P mode + */ + if (c2->tls_multi->dco_peer_id != peer_id) { return NL_SKIP; } - dco_update_peer_stat(&c->c2, tb_peer, peer_id); + dco_update_peer_stat(c2, tb_peer, peer_id); return NL_OK; } @@ -1120,9 +1135,22 @@ { dco_context_t *dco = arg; - struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg)); struct nlattr *attrs[OVPN_A_MAX + 1]; struct nlmsghdr *nlh = nlmsg_hdr(msg); + struct genlmsghdr *gnlh = genlmsg_hdr(nlh); + + msg(D_DCO_DEBUG, "ovpn-dco: received netlink message type=%u cmd=%u flags=%#.4x", + nlh->nlmsg_type, gnlh->cmd, nlh->nlmsg_flags); + + /* if we get a message from the NLCTRL family, it means + * this is the reply to the mcast ID resolution request + * and we parse it accordingly. + */ + if (nlh->nlmsg_type == dco->ctrlid) + { + msg(D_DCO_DEBUG, "ovpn-dco: received CTRLID message"); + return mcast_family_handler(msg, dco); + } if (!genlmsg_valid_hdr(nlh, 0)) { @@ -1146,6 +1174,21 @@ */ switch (gnlh->cmd) { + case OVPN_CMD_PEER_GET: + { + /* this message is part of a peer list dump, hence triggered + * by a MP/server instance + */ + if (nlh->nlmsg_flags & NLM_F_MULTI) + { + return ovpn_handle_peer_multi(dco, attrs); + } + else + { + return ovpn_handle_peer(dco, attrs); + } + } + case OVPN_CMD_PEER_DEL_NTF: { return ovpn_handle_peer_del_ntf(dco, attrs); @@ -1174,7 +1217,6 @@ dco_do_read(dco_context_t *dco) { msg(D_DCO_DEBUG, __func__); - nl_cb_set(dco->nl_cb, NL_CB_VALID, NL_CB_CUSTOM, ovpn_handle_msg, dco); return ovpn_nl_recvmsgs(dco, __func__); } @@ -1189,7 +1231,7 @@ nlmsg_hdr(nl_msg)->nlmsg_flags |= NLM_F_DUMP; - int ret = ovpn_nl_msg_send(dco, nl_msg, dco_parse_peer_multi, m, __func__); + int ret = ovpn_nl_msg_send(dco, nl_msg, __func__); nlmsg_free(nl_msg); @@ -1227,7 +1269,7 @@ NLA_PUT_U32(nl_msg, OVPN_A_PEER_ID, peer_id); nla_nest_end(nl_msg, attr); - ret = ovpn_nl_msg_send(dco, nl_msg, dco_parse_peer, c, __func__); + ret = ovpn_nl_msg_send(dco, nl_msg, __func__); nla_put_failure: nlmsg_free(nl_msg); diff --git a/src/openvpn/dco_linux.h b/src/openvpn/dco_linux.h index 5e61cf1..cc14f45 100644 --- a/src/openvpn/dco_linux.h +++ b/src/openvpn/dco_linux.h @@ -66,6 +66,7 @@ int status; struct context *c; + int ctrlid; enum ovpn_mode ifmode; -- To view, visit http://gerrit.openvpn.net/c/openvpn/+/1100?usp=email To unsubscribe, or for help writing mail filters, visit http://gerrit.openvpn.net/settings Gerrit-Project: openvpn Gerrit-Branch: master Gerrit-Change-Id: I23ad79e14844aefde9ece34dadef0b75ff267201 Gerrit-Change-Number: 1100 Gerrit-PatchSet: 5 Gerrit-Owner: ordex <an...@ma...> Gerrit-Reviewer: cron2 <ge...@gr...> Gerrit-Reviewer: flichtenheld <fr...@li...> Gerrit-Reviewer: plaisthos <arn...@rf...> Gerrit-CC: openvpn-devel <ope...@li...> Gerrit-Attention: plaisthos <arn...@rf...> Gerrit-Attention: cron2 <ge...@gr...> Gerrit-Attention: flichtenheld <fr...@li...> Gerrit-MessageType: newpatchset |
From: cron2 (C. Review) <ge...@op...> - 2025-07-25 17:05:03
|
Attention is currently required from: flichtenheld, ordex, plaisthos. cron2 has posted comments on this change. ( http://gerrit.openvpn.net/c/openvpn/+/1100?usp=email ) Change subject: dco_linux: fix async message reception ...................................................................... Patch Set 5: (1 comment) File src/openvpn/dco_linux.c: http://gerrit.openvpn.net/c/openvpn/+/1100/comment/acf86e82_8c8c0854 : PS4, Line 1189: } > as mentioned above, MODE_MP may still need to call ovpn_handle_peer() Now you got me started... if we need to have one function handle both P2P and P2MP modes anyway, why do we need two functions? They look similar enough that using `ovpn_handle_peer()` for all `OVPN_CMD_PEER_GET` replies might just work (possibly moving the peer-id check inside the P2P clause to avoid having it in the MP path where it's more like an CANTHAPPEN...)? -- To view, visit http://gerrit.openvpn.net/c/openvpn/+/1100?usp=email To unsubscribe, or for help writing mail filters, visit http://gerrit.openvpn.net/settings Gerrit-Project: openvpn Gerrit-Branch: master Gerrit-Change-Id: I23ad79e14844aefde9ece34dadef0b75ff267201 Gerrit-Change-Number: 1100 Gerrit-PatchSet: 5 Gerrit-Owner: ordex <an...@ma...> Gerrit-Reviewer: cron2 <ge...@gr...> Gerrit-Reviewer: flichtenheld <fr...@li...> Gerrit-Reviewer: plaisthos <arn...@rf...> Gerrit-CC: openvpn-devel <ope...@li...> Gerrit-Attention: plaisthos <arn...@rf...> Gerrit-Attention: flichtenheld <fr...@li...> Gerrit-Attention: ordex <an...@ma...> Gerrit-Comment-Date: Fri, 25 Jul 2025 17:04:49 +0000 Gerrit-HasComments: Yes Gerrit-Has-Labels: No Comment-In-Reply-To: cron2 <ge...@gr...> Comment-In-Reply-To: ordex <an...@ma...> Gerrit-MessageType: comment |
From: cron2 (C. Review) <ge...@op...> - 2025-07-25 17:20:41
|
Attention is currently required from: flichtenheld, ordex, plaisthos. cron2 has posted comments on this change. ( http://gerrit.openvpn.net/c/openvpn/+/1100?usp=email ) Change subject: dco_linux: fix async message reception ...................................................................... Patch Set 5: Code-Review+2 (1 comment) Patchset: PS5: Hah, gotcha ``` Jul 25 19:18:56 ubuntu2004 tun-udp-p2mp[186147]: dco_get_peer_stats_multi Jul 25 19:18:56 ubuntu2004 tun-udp-p2p-tls-sha256[186220]: dco_do_read Jul 25 19:18:56 ubuntu2004 tun-udp-p2p-tls-sha256[186220]: ovpn-dco: received netlink message type=31 cmd=11 flags=0000 Jul 25 19:18:56 ubuntu2004 tun-udp-p2p-tls-sha256[186220]: ovpn-dco: ignoring message for foreign ifindex 982 Jul 25 19:18:56 ubuntu2004 tun-udp-p2mp[186147]: ovpn-dco: received netlink message type=31 cmd=11 flags=0000 Jul 25 19:18:56 ubuntu2004 tun-udp-p2mp[186147]: ovpn-dco: received CMD_PEER_FLOAT_NTF, ifindex: 982, peer-id 2, address: [AF_INET]193.149.48.173:59979 Jul 25 19:18:56 ubuntu2004 tun-udp-p2mp[186147]: ovpn-dco: received netlink message type=31 cmd=3 flags=0x0002 Jul 25 19:18:56 ubuntu2004 tun-udp-p2mp[186147]: ovpn_handle_peer_multi: parsing message... Jul 25 19:18:56 ubuntu2004 tun-udp-p2mp[186147]: dco_update_peer_stat / dco_read_bytes(0): 615704 Jul 25 19:18:56 ubuntu2004 tun-udp-p2mp[186147]: dco_update_peer_stat / dco_write_bytes(0): 625840 Jul 25 19:18:56 ubuntu2004 tun-udp-p2mp[186147]: dco_update_peer_stat / tun_read_bytes(0): 562992 Jul 25 19:18:56 ubuntu2004 tun-udp-p2mp[186147]: dco_update_peer_stat / tun_write_bytes(0): 568872 Jul 25 19:18:56 ubuntu2004 tun-udp-p2mp[186147]: ovpn-dco: received netlink message type=31 cmd=3 flags=0x0002 Jul 25 19:18:56 ubuntu2004 tun-udp-p2mp[186147]: ovpn_handle_peer_multi: parsing message... Jul 25 19:18:56 ubuntu2004 tun-udp-p2mp[186147]: dco_update_peer_stat / dco_read_bytes(2): 11732 Jul 25 19:18:56 ubuntu2004 tun-udp-p2mp[186147]: dco_update_peer_stat / dco_write_bytes(2): 11392 Jul 25 19:18:56 ubuntu2004 tun-udp-p2mp[186147]: dco_update_peer_stat / tun_read_bytes(2): 9256 Jul 25 19:18:56 ubuntu2004 tun-udp-p2mp[186147]: dco_update_peer_stat / tun_write_bytes(2): 9256 Jul 25 19:18:56 ubuntu2004 tun-udp-p2mp[186147]: dco_do_read Jul 25 19:18:56 ubuntu2004 tun-udp-p2mp[186147]: dco_do_read: netlink reports blocking read - aborting wait Jul 25 19:18:56 ubuntu2004 tun-udp-p2mp[186147]: peer 2 (cron2-freebsd-tc-amd64) floated from udp6:193.149.48.172:59979 to [AF_INET6]::ffff:193.149.48.173:59979 Jul 25 19:18:57 ubuntu2004 kernel: [294871.134548] tun1: peer 2 floated to 193.149.48.173:59979 ``` this is precisely the situation that makes the pre-1100 code lose the "FLOAT!" message, because it arrives *after* `dco_get_peer_stats_multi()` starts running (or just before, but we do not look at DCO events) and before the actual cmd=3 reply comes in. Good work! -- To view, visit http://gerrit.openvpn.net/c/openvpn/+/1100?usp=email To unsubscribe, or for help writing mail filters, visit http://gerrit.openvpn.net/settings Gerrit-Project: openvpn Gerrit-Branch: master Gerrit-Change-Id: I23ad79e14844aefde9ece34dadef0b75ff267201 Gerrit-Change-Number: 1100 Gerrit-PatchSet: 5 Gerrit-Owner: ordex <an...@ma...> Gerrit-Reviewer: cron2 <ge...@gr...> Gerrit-Reviewer: flichtenheld <fr...@li...> Gerrit-Reviewer: plaisthos <arn...@rf...> Gerrit-CC: openvpn-devel <ope...@li...> Gerrit-Attention: plaisthos <arn...@rf...> Gerrit-Attention: flichtenheld <fr...@li...> Gerrit-Attention: ordex <an...@ma...> Gerrit-Comment-Date: Fri, 25 Jul 2025 17:20:30 +0000 Gerrit-HasComments: Yes Gerrit-Has-Labels: Yes Gerrit-MessageType: comment |
From: Gert D. <ge...@gr...> - 2025-07-25 17:27:22
|
From: Antonio Quartulli <an...@ma...> Currently whenever we send a PEER_GET request to ovpn, we also set the CB that is supposed to parse the reply. However, due to the async nature of netlink messages, we could get an unrelated notification, sent by ovpn upon some event, after having set the CB, but before parsing the awaited reply. When this happens, the notification is then parsed with the configured CB instead of the notification parser, thus effectively rejecting the notification and losing the event. To fix this inconsistency, make ovpn_handle_msg() the default and only netlink parser CB. It is configured upon DCO initialization and is never removed. ovpn_handle_msg() will check the message type and will call the according parser. This way, no matter what message we get at what time, but we'll always parse it correctly. As a bonus we can also simplify the nl_sendmsg() API as we don't need to pass the cb and its argument anymore. The ID of the NLCTRL family is now also stored in the DCO context as we need it to check when we receive a mcast ID lookup message. Change-Id: I23ad79e14844aefde9ece34dadef0b75ff267201 Github: OpenVPN/openvpn#793 Signed-off-by: Antonio Quartulli <an...@ma...> Acked-by: Gert Doering <ge...@gr...> --- This change was reviewed on Gerrit and approved by at least one developer. I request to merge it to master. Gerrit URL: https://gerrit.openvpn.net/c/openvpn/+/1100 This mail reflects revision 5 of this Change. Acked-by according to Gerrit (reflected above): Gert Doering <ge...@gr...> diff --git a/src/openvpn/dco_linux.c b/src/openvpn/dco_linux.c index 7c639d9..728fb7e 100644 --- a/src/openvpn/dco_linux.c +++ b/src/openvpn/dco_linux.c @@ -167,23 +167,19 @@ } /** - * Send a prepared netlink message and registers cb as callback if non-null. + * Send a prepared netlink message. * * The method will also free nl_msg * @param dco The dco context to use * @param nl_msg the message to use - * @param cb An optional callback if the caller expects an answer - * @param cb_arg An optional param to pass to the callback * @param prefix A prefix to report in the error message to give the user context * @return status of sending the message */ static int -ovpn_nl_msg_send(dco_context_t *dco, struct nl_msg *nl_msg, ovpn_nl_cb cb, - void *cb_arg, const char *prefix) +ovpn_nl_msg_send(dco_context_t *dco, struct nl_msg *nl_msg, const char *prefix) { dco->status = 1; - nl_cb_set(dco->nl_cb, NL_CB_VALID, NL_CB_CUSTOM, cb, cb_arg); nl_send_auto(dco->nl_sock, nl_msg); while (dco->status == 1) @@ -285,7 +281,7 @@ } nla_nest_end(nl_msg, attr); - ret = ovpn_nl_msg_send(dco, nl_msg, NULL, NULL, __func__); + ret = ovpn_nl_msg_send(dco, nl_msg, __func__); nla_put_failure: nlmsg_free(nl_msg); @@ -385,6 +381,29 @@ } static void +ovpn_dco_register(dco_context_t *dco) +{ + msg(D_DCO_DEBUG, __func__); + ovpn_get_mcast_id(dco); + + if (dco->ovpn_dco_mcast_id < 0) + { + msg(M_FATAL, "cannot get mcast group: %s", nl_geterror(dco->ovpn_dco_mcast_id)); + } + + /* Register for ovpn-dco specific multicast messages that the kernel may + * send + */ + int ret = nl_socket_add_membership(dco->nl_sock, dco->ovpn_dco_mcast_id); + if (ret) + { + msg(M_FATAL, "%s: failed to join groups: %d", __func__, ret); + } +} + +static int ovpn_handle_msg(struct nl_msg *msg, void *arg); + +static void ovpn_dco_init_netlink(dco_context_t *dco) { dco->ovpn_dco_id = resolve_ovpn_netlink_id(M_FATAL); @@ -420,11 +439,15 @@ nl_socket_set_cb(dco->nl_sock, dco->nl_cb); + dco->dco_message_peer_id = -1; nl_cb_err(dco->nl_cb, NL_CB_CUSTOM, ovpn_nl_cb_error, &dco->status); nl_cb_set(dco->nl_cb, NL_CB_FINISH, NL_CB_CUSTOM, ovpn_nl_cb_finish, &dco->status); nl_cb_set(dco->nl_cb, NL_CB_ACK, NL_CB_CUSTOM, ovpn_nl_cb_finish, &dco->status); + nl_cb_set(dco->nl_cb, NL_CB_VALID, NL_CB_CUSTOM, ovpn_handle_msg, dco); + + ovpn_dco_register(dco); /* The async PACKET messages confuse libnl and it will drop them with * wrong sequence numbers (NLE_SEQ_MISMATCH), so disable libnl's sequence @@ -476,27 +499,6 @@ CLEAR(dco); } -static void -ovpn_dco_register(dco_context_t *dco) -{ - msg(D_DCO_DEBUG, __func__); - ovpn_get_mcast_id(dco); - - if (dco->ovpn_dco_mcast_id < 0) - { - msg(M_FATAL, "cannot get mcast group: %s", nl_geterror(dco->ovpn_dco_mcast_id)); - } - - /* Register for ovpn-dco specific multicast messages that the kernel may - * send - */ - int ret = nl_socket_add_membership(dco->nl_sock, dco->ovpn_dco_mcast_id); - if (ret) - { - msg(M_FATAL, "%s: failed to join groups: %d", __func__, ret); - } -} - int open_tun_dco(struct tuntap *tt, openvpn_net_ctx_t *ctx, const char *dev) { @@ -516,10 +518,6 @@ msg(M_FATAL, "DCO: cannot retrieve ifindex for interface %s", dev); } - tt->dco.dco_message_peer_id = -1; - - ovpn_dco_register(&tt->dco); - return 0; } @@ -548,7 +546,7 @@ NLA_PUT_U32(nl_msg, OVPN_A_KEYCONF_PEER_ID, peerid); nla_nest_end(nl_msg, attr); - ret = ovpn_nl_msg_send(dco, nl_msg, NULL, NULL, __func__); + ret = ovpn_nl_msg_send(dco, nl_msg, __func__); nla_put_failure: nlmsg_free(nl_msg); @@ -572,7 +570,7 @@ NLA_PUT_U32(nl_msg, OVPN_A_PEER_ID, peerid); nla_nest_end(nl_msg, attr); - ret = ovpn_nl_msg_send(dco, nl_msg, NULL, NULL, __func__); + ret = ovpn_nl_msg_send(dco, nl_msg, __func__); nla_put_failure: nlmsg_free(nl_msg); @@ -598,7 +596,7 @@ NLA_PUT_U32(nl_msg, OVPN_A_KEYCONF_SLOT, slot); nla_nest_end(nl_msg, keyconf); - ret = ovpn_nl_msg_send(dco, nl_msg, NULL, NULL, __func__); + ret = ovpn_nl_msg_send(dco, nl_msg, __func__); nla_put_failure: nlmsg_free(nl_msg); @@ -657,7 +655,7 @@ nla_nest_end(nl_msg, key_conf); - ret = ovpn_nl_msg_send(dco, nl_msg, NULL, NULL, __func__); + ret = ovpn_nl_msg_send(dco, nl_msg, __func__); nla_put_failure: nlmsg_free(nl_msg); @@ -686,7 +684,7 @@ keepalive_timeout); nla_nest_end(nl_msg, attr); - ret = ovpn_nl_msg_send(dco, nl_msg, NULL, NULL, __func__); + ret = ovpn_nl_msg_send(dco, nl_msg, __func__); nla_put_failure: nlmsg_free(nl_msg); @@ -754,7 +752,7 @@ /* Even though 'nlctrl' is a constant, there seem to be no library * provided define for it */ - int ctrlid = genl_ctrl_resolve(dco->nl_sock, "nlctrl"); + dco->ctrlid = genl_ctrl_resolve(dco->nl_sock, "nlctrl"); struct nl_msg *nl_msg = nlmsg_alloc(); if (!nl_msg) @@ -762,12 +760,12 @@ return -ENOMEM; } - genlmsg_put(nl_msg, 0, 0, ctrlid, 0, 0, CTRL_CMD_GETFAMILY, 0); + genlmsg_put(nl_msg, 0, 0, dco->ctrlid, 0, 0, CTRL_CMD_GETFAMILY, 0); int ret = -EMSGSIZE; NLA_PUT_STRING(nl_msg, CTRL_ATTR_FAMILY_NAME, OVPN_FAMILY_NAME); - ret = ovpn_nl_msg_send(dco, nl_msg, mcast_family_handler, dco, __func__); + ret = ovpn_nl_msg_send(dco, nl_msg, __func__); nla_put_failure: nlmsg_free(nl_msg); @@ -879,31 +877,34 @@ } static int -dco_parse_peer_multi(struct nl_msg *msg, void *arg) +ovpn_handle_peer_multi(dco_context_t *dco, struct nlattr *attrs[]) { - struct nlattr *tb[OVPN_A_MAX + 1]; - struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg)); - msg(D_DCO_DEBUG, "%s: parsing message...", __func__); - nla_parse(tb, OVPN_A_MAX, genlmsg_attrdata(gnlh, 0), - genlmsg_attrlen(gnlh, 0), NULL); + /* this function assumes openvpn is running in multipeer mode as + * it accesses c->multi + */ + if (dco->ifmode != OVPN_MODE_MP) + { + msg(M_WARN, "%s: can't parse 'multi-peer' message on P2P instance", __func__); + return NL_SKIP; + } - if (!tb[OVPN_A_PEER]) + if (!attrs[OVPN_A_PEER]) { return NL_SKIP; } struct nlattr *tb_peer[OVPN_A_PEER_MAX + 1]; - nla_parse_nested(tb_peer, OVPN_A_PEER_MAX, tb[OVPN_A_PEER], NULL); + nla_parse_nested(tb_peer, OVPN_A_PEER_MAX, attrs[OVPN_A_PEER], NULL); if (!tb_peer[OVPN_A_PEER_ID]) { - msg(M_WARN, "%s: no peer-id provided in reply", __func__); + msg(M_WARN, "ovpn-dco: no peer-id provided in (MULTI) PEER_GET reply"); return NL_SKIP; } - struct multi_context *m = arg; + struct multi_context *m = dco->c->multi; uint32_t peer_id = nla_get_u32(tb_peer[OVPN_A_PEER_ID]); if (peer_id >= m->max_clients || !m->instances[peer_id]) @@ -919,39 +920,53 @@ } static int -dco_parse_peer(struct nl_msg *msg, void *arg) +ovpn_handle_peer(dco_context_t *dco, struct nlattr *attrs[]) { - struct context *c = arg; - struct nlattr *tb[OVPN_A_MAX + 1]; - struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg)); - msg(D_DCO_DEBUG, "%s: parsing message...", __func__); - nla_parse(tb, OVPN_A_MAX, genlmsg_attrdata(gnlh, 0), - genlmsg_attrlen(gnlh, 0), NULL); - - if (!tb[OVPN_A_PEER]) + if (!attrs[OVPN_A_PEER]) { msg(D_DCO_DEBUG, "%s: malformed reply", __func__); return NL_SKIP; } struct nlattr *tb_peer[OVPN_A_PEER_MAX + 1]; - nla_parse_nested(tb_peer, OVPN_A_PEER_MAX, tb[OVPN_A_PEER], NULL); + nla_parse_nested(tb_peer, OVPN_A_PEER_MAX, attrs[OVPN_A_PEER], NULL); if (!tb_peer[OVPN_A_PEER_ID]) { - msg(M_WARN, "%s: no peer-id provided in reply", __func__); + msg(M_WARN, "ovpn-dco: no peer-id provided in PEER_GET reply"); return NL_SKIP; } uint32_t peer_id = nla_get_u32(tb_peer[OVPN_A_PEER_ID]); - if (c->c2.tls_multi->dco_peer_id != peer_id) + struct context_2 *c2; + + if (dco->ifmode == OVPN_MODE_P2P) + { + c2 = &dco->c->c2; + } + else + { + struct multi_instance *mi = dco->c->multi->instances[peer_id]; + if (!mi) + { + msg(M_WARN, "%s: received data for a non-existing peer %u", __func__, peer_id); + return NL_SKIP; + } + + c2 = &mi->context.c2; + } + + /* at this point this check should never fail for MP mode, + * but it's still fully valid for P2P mode + */ + if (c2->tls_multi->dco_peer_id != peer_id) { return NL_SKIP; } - dco_update_peer_stat(&c->c2, tb_peer, peer_id); + dco_update_peer_stat(c2, tb_peer, peer_id); return NL_OK; } @@ -1120,9 +1135,22 @@ { dco_context_t *dco = arg; - struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg)); struct nlattr *attrs[OVPN_A_MAX + 1]; struct nlmsghdr *nlh = nlmsg_hdr(msg); + struct genlmsghdr *gnlh = genlmsg_hdr(nlh); + + msg(D_DCO_DEBUG, "ovpn-dco: received netlink message type=%u cmd=%u flags=%#.4x", + nlh->nlmsg_type, gnlh->cmd, nlh->nlmsg_flags); + + /* if we get a message from the NLCTRL family, it means + * this is the reply to the mcast ID resolution request + * and we parse it accordingly. + */ + if (nlh->nlmsg_type == dco->ctrlid) + { + msg(D_DCO_DEBUG, "ovpn-dco: received CTRLID message"); + return mcast_family_handler(msg, dco); + } if (!genlmsg_valid_hdr(nlh, 0)) { @@ -1146,6 +1174,21 @@ */ switch (gnlh->cmd) { + case OVPN_CMD_PEER_GET: + { + /* this message is part of a peer list dump, hence triggered + * by a MP/server instance + */ + if (nlh->nlmsg_flags & NLM_F_MULTI) + { + return ovpn_handle_peer_multi(dco, attrs); + } + else + { + return ovpn_handle_peer(dco, attrs); + } + } + case OVPN_CMD_PEER_DEL_NTF: { return ovpn_handle_peer_del_ntf(dco, attrs); @@ -1174,7 +1217,6 @@ dco_do_read(dco_context_t *dco) { msg(D_DCO_DEBUG, __func__); - nl_cb_set(dco->nl_cb, NL_CB_VALID, NL_CB_CUSTOM, ovpn_handle_msg, dco); return ovpn_nl_recvmsgs(dco, __func__); } @@ -1189,7 +1231,7 @@ nlmsg_hdr(nl_msg)->nlmsg_flags |= NLM_F_DUMP; - int ret = ovpn_nl_msg_send(dco, nl_msg, dco_parse_peer_multi, m, __func__); + int ret = ovpn_nl_msg_send(dco, nl_msg, __func__); nlmsg_free(nl_msg); @@ -1227,7 +1269,7 @@ NLA_PUT_U32(nl_msg, OVPN_A_PEER_ID, peer_id); nla_nest_end(nl_msg, attr); - ret = ovpn_nl_msg_send(dco, nl_msg, dco_parse_peer, c, __func__); + ret = ovpn_nl_msg_send(dco, nl_msg, __func__); nla_put_failure: nlmsg_free(nl_msg); diff --git a/src/openvpn/dco_linux.h b/src/openvpn/dco_linux.h index 5e61cf1..cc14f45 100644 --- a/src/openvpn/dco_linux.h +++ b/src/openvpn/dco_linux.h @@ -66,6 +66,7 @@ int status; struct context *c; + int ctrlid; enum ovpn_mode ifmode; |
From: Gert D. <ge...@gr...> - 2025-07-25 18:06:13
|
Thanks for that work. I tested all the "easy" notifications (DEL_PEER and FLOAT), and can confirm that the race/mixup we observed in gerrit#1086 (issue #793) is now fixed for good - with sufficient patience it's still possible to inject a FLOAT message between "ask for peer stats" and "receive peer stats", but the new dispatching logic handles things just fine, we're no longer losing FLOAT messages, and life is good. I did not test KEY_SWAP_NTF as that's not easy to trigger (without kernel changes to lower the threshold) but as that code is not affected at all by this commit I do not expect any surprises there. Besides the manual test, this handled the DCO client/server test runs just fine. There's some suggestions to the code I still have, but "it works, ship it" (so we get more exposure to the code we considered fixed) - the cleanup can be a separate patch. Your patch has been applied to the master branch. commit f353b7100c859a02e70723c998594c3efd83c419 Author: Antonio Quartulli Date: Fri Jul 25 19:27:02 2025 +0200 dco_linux: fix async message reception Signed-off-by: Antonio Quartulli <an...@ma...> Acked-by: Gert Doering <ge...@gr...> Message-Id: <202...@gr...> URL: https://www.mail-archive.com/ope...@li.../msg32339.html Signed-off-by: Gert Doering <ge...@gr...> -- kind regards, Gert Doering |
From: cron2 (C. Review) <ge...@op...> - 2025-07-25 18:06:33
|
cron2 has uploaded a new patch set (#6) to the change originally created by ordex. ( http://gerrit.openvpn.net/c/openvpn/+/1100?usp=email ) The following approvals got outdated and were removed: Code-Review+2 by cron2 Change subject: dco_linux: fix async message reception ...................................................................... dco_linux: fix async message reception Currently whenever we send a PEER_GET request to ovpn, we also set the CB that is supposed to parse the reply. However, due to the async nature of netlink messages, we could get an unrelated notification, sent by ovpn (kernel) upon some event, after userland has set the CB, but before parsing the awaited reply. When this happens, the notification is then parsed with the configured CB instead of the notification parser, thus effectively rejecting the notification and losing the event. To fix this inconsistency, make ovpn_handle_msg() the default and only netlink parser CB. It is configured upon DCO initialization and is never removed. ovpn_handle_msg() will check the message type and will call the corresponding handler. This way, no matter what message we get at what time, we'll always parse it correctly. As a bonus we can also simplify the nl_sendmsg() API as we don't need to pass the cb and its argument anymore. The ID of the NLCTRL family is now also stored in the DCO context as we need it to check when we receive a mcast ID lookup message. Change-Id: I23ad79e14844aefde9ece34dadef0b75ff267201 Github: closes OpenVPN/openvpn#793 Signed-off-by: Antonio Quartulli <an...@ma...> Acked-by: Gert Doering <ge...@gr...> Message-Id: <202...@gr...> URL: https://www.mail-archive.com/ope...@li.../msg32339.html Signed-off-by: Gert Doering <ge...@gr...> --- M src/openvpn/dco_linux.c M src/openvpn/dco_linux.h 2 files changed, 110 insertions(+), 67 deletions(-) git pull ssh://gerrit.openvpn.net:29418/openvpn refs/changes/00/1100/6 diff --git a/src/openvpn/dco_linux.c b/src/openvpn/dco_linux.c index 7c639d9..728fb7e 100644 --- a/src/openvpn/dco_linux.c +++ b/src/openvpn/dco_linux.c @@ -167,23 +167,19 @@ } /** - * Send a prepared netlink message and registers cb as callback if non-null. + * Send a prepared netlink message. * * The method will also free nl_msg * @param dco The dco context to use * @param nl_msg the message to use - * @param cb An optional callback if the caller expects an answer - * @param cb_arg An optional param to pass to the callback * @param prefix A prefix to report in the error message to give the user context * @return status of sending the message */ static int -ovpn_nl_msg_send(dco_context_t *dco, struct nl_msg *nl_msg, ovpn_nl_cb cb, - void *cb_arg, const char *prefix) +ovpn_nl_msg_send(dco_context_t *dco, struct nl_msg *nl_msg, const char *prefix) { dco->status = 1; - nl_cb_set(dco->nl_cb, NL_CB_VALID, NL_CB_CUSTOM, cb, cb_arg); nl_send_auto(dco->nl_sock, nl_msg); while (dco->status == 1) @@ -285,7 +281,7 @@ } nla_nest_end(nl_msg, attr); - ret = ovpn_nl_msg_send(dco, nl_msg, NULL, NULL, __func__); + ret = ovpn_nl_msg_send(dco, nl_msg, __func__); nla_put_failure: nlmsg_free(nl_msg); @@ -385,6 +381,29 @@ } static void +ovpn_dco_register(dco_context_t *dco) +{ + msg(D_DCO_DEBUG, __func__); + ovpn_get_mcast_id(dco); + + if (dco->ovpn_dco_mcast_id < 0) + { + msg(M_FATAL, "cannot get mcast group: %s", nl_geterror(dco->ovpn_dco_mcast_id)); + } + + /* Register for ovpn-dco specific multicast messages that the kernel may + * send + */ + int ret = nl_socket_add_membership(dco->nl_sock, dco->ovpn_dco_mcast_id); + if (ret) + { + msg(M_FATAL, "%s: failed to join groups: %d", __func__, ret); + } +} + +static int ovpn_handle_msg(struct nl_msg *msg, void *arg); + +static void ovpn_dco_init_netlink(dco_context_t *dco) { dco->ovpn_dco_id = resolve_ovpn_netlink_id(M_FATAL); @@ -420,11 +439,15 @@ nl_socket_set_cb(dco->nl_sock, dco->nl_cb); + dco->dco_message_peer_id = -1; nl_cb_err(dco->nl_cb, NL_CB_CUSTOM, ovpn_nl_cb_error, &dco->status); nl_cb_set(dco->nl_cb, NL_CB_FINISH, NL_CB_CUSTOM, ovpn_nl_cb_finish, &dco->status); nl_cb_set(dco->nl_cb, NL_CB_ACK, NL_CB_CUSTOM, ovpn_nl_cb_finish, &dco->status); + nl_cb_set(dco->nl_cb, NL_CB_VALID, NL_CB_CUSTOM, ovpn_handle_msg, dco); + + ovpn_dco_register(dco); /* The async PACKET messages confuse libnl and it will drop them with * wrong sequence numbers (NLE_SEQ_MISMATCH), so disable libnl's sequence @@ -476,27 +499,6 @@ CLEAR(dco); } -static void -ovpn_dco_register(dco_context_t *dco) -{ - msg(D_DCO_DEBUG, __func__); - ovpn_get_mcast_id(dco); - - if (dco->ovpn_dco_mcast_id < 0) - { - msg(M_FATAL, "cannot get mcast group: %s", nl_geterror(dco->ovpn_dco_mcast_id)); - } - - /* Register for ovpn-dco specific multicast messages that the kernel may - * send - */ - int ret = nl_socket_add_membership(dco->nl_sock, dco->ovpn_dco_mcast_id); - if (ret) - { - msg(M_FATAL, "%s: failed to join groups: %d", __func__, ret); - } -} - int open_tun_dco(struct tuntap *tt, openvpn_net_ctx_t *ctx, const char *dev) { @@ -516,10 +518,6 @@ msg(M_FATAL, "DCO: cannot retrieve ifindex for interface %s", dev); } - tt->dco.dco_message_peer_id = -1; - - ovpn_dco_register(&tt->dco); - return 0; } @@ -548,7 +546,7 @@ NLA_PUT_U32(nl_msg, OVPN_A_KEYCONF_PEER_ID, peerid); nla_nest_end(nl_msg, attr); - ret = ovpn_nl_msg_send(dco, nl_msg, NULL, NULL, __func__); + ret = ovpn_nl_msg_send(dco, nl_msg, __func__); nla_put_failure: nlmsg_free(nl_msg); @@ -572,7 +570,7 @@ NLA_PUT_U32(nl_msg, OVPN_A_PEER_ID, peerid); nla_nest_end(nl_msg, attr); - ret = ovpn_nl_msg_send(dco, nl_msg, NULL, NULL, __func__); + ret = ovpn_nl_msg_send(dco, nl_msg, __func__); nla_put_failure: nlmsg_free(nl_msg); @@ -598,7 +596,7 @@ NLA_PUT_U32(nl_msg, OVPN_A_KEYCONF_SLOT, slot); nla_nest_end(nl_msg, keyconf); - ret = ovpn_nl_msg_send(dco, nl_msg, NULL, NULL, __func__); + ret = ovpn_nl_msg_send(dco, nl_msg, __func__); nla_put_failure: nlmsg_free(nl_msg); @@ -657,7 +655,7 @@ nla_nest_end(nl_msg, key_conf); - ret = ovpn_nl_msg_send(dco, nl_msg, NULL, NULL, __func__); + ret = ovpn_nl_msg_send(dco, nl_msg, __func__); nla_put_failure: nlmsg_free(nl_msg); @@ -686,7 +684,7 @@ keepalive_timeout); nla_nest_end(nl_msg, attr); - ret = ovpn_nl_msg_send(dco, nl_msg, NULL, NULL, __func__); + ret = ovpn_nl_msg_send(dco, nl_msg, __func__); nla_put_failure: nlmsg_free(nl_msg); @@ -754,7 +752,7 @@ /* Even though 'nlctrl' is a constant, there seem to be no library * provided define for it */ - int ctrlid = genl_ctrl_resolve(dco->nl_sock, "nlctrl"); + dco->ctrlid = genl_ctrl_resolve(dco->nl_sock, "nlctrl"); struct nl_msg *nl_msg = nlmsg_alloc(); if (!nl_msg) @@ -762,12 +760,12 @@ return -ENOMEM; } - genlmsg_put(nl_msg, 0, 0, ctrlid, 0, 0, CTRL_CMD_GETFAMILY, 0); + genlmsg_put(nl_msg, 0, 0, dco->ctrlid, 0, 0, CTRL_CMD_GETFAMILY, 0); int ret = -EMSGSIZE; NLA_PUT_STRING(nl_msg, CTRL_ATTR_FAMILY_NAME, OVPN_FAMILY_NAME); - ret = ovpn_nl_msg_send(dco, nl_msg, mcast_family_handler, dco, __func__); + ret = ovpn_nl_msg_send(dco, nl_msg, __func__); nla_put_failure: nlmsg_free(nl_msg); @@ -879,31 +877,34 @@ } static int -dco_parse_peer_multi(struct nl_msg *msg, void *arg) +ovpn_handle_peer_multi(dco_context_t *dco, struct nlattr *attrs[]) { - struct nlattr *tb[OVPN_A_MAX + 1]; - struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg)); - msg(D_DCO_DEBUG, "%s: parsing message...", __func__); - nla_parse(tb, OVPN_A_MAX, genlmsg_attrdata(gnlh, 0), - genlmsg_attrlen(gnlh, 0), NULL); + /* this function assumes openvpn is running in multipeer mode as + * it accesses c->multi + */ + if (dco->ifmode != OVPN_MODE_MP) + { + msg(M_WARN, "%s: can't parse 'multi-peer' message on P2P instance", __func__); + return NL_SKIP; + } - if (!tb[OVPN_A_PEER]) + if (!attrs[OVPN_A_PEER]) { return NL_SKIP; } struct nlattr *tb_peer[OVPN_A_PEER_MAX + 1]; - nla_parse_nested(tb_peer, OVPN_A_PEER_MAX, tb[OVPN_A_PEER], NULL); + nla_parse_nested(tb_peer, OVPN_A_PEER_MAX, attrs[OVPN_A_PEER], NULL); if (!tb_peer[OVPN_A_PEER_ID]) { - msg(M_WARN, "%s: no peer-id provided in reply", __func__); + msg(M_WARN, "ovpn-dco: no peer-id provided in (MULTI) PEER_GET reply"); return NL_SKIP; } - struct multi_context *m = arg; + struct multi_context *m = dco->c->multi; uint32_t peer_id = nla_get_u32(tb_peer[OVPN_A_PEER_ID]); if (peer_id >= m->max_clients || !m->instances[peer_id]) @@ -919,39 +920,53 @@ } static int -dco_parse_peer(struct nl_msg *msg, void *arg) +ovpn_handle_peer(dco_context_t *dco, struct nlattr *attrs[]) { - struct context *c = arg; - struct nlattr *tb[OVPN_A_MAX + 1]; - struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg)); - msg(D_DCO_DEBUG, "%s: parsing message...", __func__); - nla_parse(tb, OVPN_A_MAX, genlmsg_attrdata(gnlh, 0), - genlmsg_attrlen(gnlh, 0), NULL); - - if (!tb[OVPN_A_PEER]) + if (!attrs[OVPN_A_PEER]) { msg(D_DCO_DEBUG, "%s: malformed reply", __func__); return NL_SKIP; } struct nlattr *tb_peer[OVPN_A_PEER_MAX + 1]; - nla_parse_nested(tb_peer, OVPN_A_PEER_MAX, tb[OVPN_A_PEER], NULL); + nla_parse_nested(tb_peer, OVPN_A_PEER_MAX, attrs[OVPN_A_PEER], NULL); if (!tb_peer[OVPN_A_PEER_ID]) { - msg(M_WARN, "%s: no peer-id provided in reply", __func__); + msg(M_WARN, "ovpn-dco: no peer-id provided in PEER_GET reply"); return NL_SKIP; } uint32_t peer_id = nla_get_u32(tb_peer[OVPN_A_PEER_ID]); - if (c->c2.tls_multi->dco_peer_id != peer_id) + struct context_2 *c2; + + if (dco->ifmode == OVPN_MODE_P2P) + { + c2 = &dco->c->c2; + } + else + { + struct multi_instance *mi = dco->c->multi->instances[peer_id]; + if (!mi) + { + msg(M_WARN, "%s: received data for a non-existing peer %u", __func__, peer_id); + return NL_SKIP; + } + + c2 = &mi->context.c2; + } + + /* at this point this check should never fail for MP mode, + * but it's still fully valid for P2P mode + */ + if (c2->tls_multi->dco_peer_id != peer_id) { return NL_SKIP; } - dco_update_peer_stat(&c->c2, tb_peer, peer_id); + dco_update_peer_stat(c2, tb_peer, peer_id); return NL_OK; } @@ -1120,9 +1135,22 @@ { dco_context_t *dco = arg; - struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg)); struct nlattr *attrs[OVPN_A_MAX + 1]; struct nlmsghdr *nlh = nlmsg_hdr(msg); + struct genlmsghdr *gnlh = genlmsg_hdr(nlh); + + msg(D_DCO_DEBUG, "ovpn-dco: received netlink message type=%u cmd=%u flags=%#.4x", + nlh->nlmsg_type, gnlh->cmd, nlh->nlmsg_flags); + + /* if we get a message from the NLCTRL family, it means + * this is the reply to the mcast ID resolution request + * and we parse it accordingly. + */ + if (nlh->nlmsg_type == dco->ctrlid) + { + msg(D_DCO_DEBUG, "ovpn-dco: received CTRLID message"); + return mcast_family_handler(msg, dco); + } if (!genlmsg_valid_hdr(nlh, 0)) { @@ -1146,6 +1174,21 @@ */ switch (gnlh->cmd) { + case OVPN_CMD_PEER_GET: + { + /* this message is part of a peer list dump, hence triggered + * by a MP/server instance + */ + if (nlh->nlmsg_flags & NLM_F_MULTI) + { + return ovpn_handle_peer_multi(dco, attrs); + } + else + { + return ovpn_handle_peer(dco, attrs); + } + } + case OVPN_CMD_PEER_DEL_NTF: { return ovpn_handle_peer_del_ntf(dco, attrs); @@ -1174,7 +1217,6 @@ dco_do_read(dco_context_t *dco) { msg(D_DCO_DEBUG, __func__); - nl_cb_set(dco->nl_cb, NL_CB_VALID, NL_CB_CUSTOM, ovpn_handle_msg, dco); return ovpn_nl_recvmsgs(dco, __func__); } @@ -1189,7 +1231,7 @@ nlmsg_hdr(nl_msg)->nlmsg_flags |= NLM_F_DUMP; - int ret = ovpn_nl_msg_send(dco, nl_msg, dco_parse_peer_multi, m, __func__); + int ret = ovpn_nl_msg_send(dco, nl_msg, __func__); nlmsg_free(nl_msg); @@ -1227,7 +1269,7 @@ NLA_PUT_U32(nl_msg, OVPN_A_PEER_ID, peer_id); nla_nest_end(nl_msg, attr); - ret = ovpn_nl_msg_send(dco, nl_msg, dco_parse_peer, c, __func__); + ret = ovpn_nl_msg_send(dco, nl_msg, __func__); nla_put_failure: nlmsg_free(nl_msg); diff --git a/src/openvpn/dco_linux.h b/src/openvpn/dco_linux.h index 5e61cf1..cc14f45 100644 --- a/src/openvpn/dco_linux.h +++ b/src/openvpn/dco_linux.h @@ -66,6 +66,7 @@ int status; struct context *c; + int ctrlid; enum ovpn_mode ifmode; -- To view, visit http://gerrit.openvpn.net/c/openvpn/+/1100?usp=email To unsubscribe, or for help writing mail filters, visit http://gerrit.openvpn.net/settings Gerrit-Project: openvpn Gerrit-Branch: master Gerrit-Change-Id: I23ad79e14844aefde9ece34dadef0b75ff267201 Gerrit-Change-Number: 1100 Gerrit-PatchSet: 6 Gerrit-Owner: ordex <an...@ma...> Gerrit-Reviewer: cron2 <ge...@gr...> Gerrit-Reviewer: flichtenheld <fr...@li...> Gerrit-Reviewer: plaisthos <arn...@rf...> Gerrit-CC: openvpn-devel <ope...@li...> Gerrit-MessageType: newpatchset |
From: cron2 (C. Review) <ge...@op...> - 2025-07-25 18:06:40
|
cron2 has submitted this change. ( http://gerrit.openvpn.net/c/openvpn/+/1100?usp=email ) Change subject: dco_linux: fix async message reception ...................................................................... dco_linux: fix async message reception Currently whenever we send a PEER_GET request to ovpn, we also set the CB that is supposed to parse the reply. However, due to the async nature of netlink messages, we could get an unrelated notification, sent by ovpn (kernel) upon some event, after userland has set the CB, but before parsing the awaited reply. When this happens, the notification is then parsed with the configured CB instead of the notification parser, thus effectively rejecting the notification and losing the event. To fix this inconsistency, make ovpn_handle_msg() the default and only netlink parser CB. It is configured upon DCO initialization and is never removed. ovpn_handle_msg() will check the message type and will call the corresponding handler. This way, no matter what message we get at what time, we'll always parse it correctly. As a bonus we can also simplify the nl_sendmsg() API as we don't need to pass the cb and its argument anymore. The ID of the NLCTRL family is now also stored in the DCO context as we need it to check when we receive a mcast ID lookup message. Change-Id: I23ad79e14844aefde9ece34dadef0b75ff267201 Github: closes OpenVPN/openvpn#793 Signed-off-by: Antonio Quartulli <an...@ma...> Acked-by: Gert Doering <ge...@gr...> Message-Id: <202...@gr...> URL: https://www.mail-archive.com/ope...@li.../msg32339.html Signed-off-by: Gert Doering <ge...@gr...> --- M src/openvpn/dco_linux.c M src/openvpn/dco_linux.h 2 files changed, 110 insertions(+), 67 deletions(-) diff --git a/src/openvpn/dco_linux.c b/src/openvpn/dco_linux.c index 7c639d9..728fb7e 100644 --- a/src/openvpn/dco_linux.c +++ b/src/openvpn/dco_linux.c @@ -167,23 +167,19 @@ } /** - * Send a prepared netlink message and registers cb as callback if non-null. + * Send a prepared netlink message. * * The method will also free nl_msg * @param dco The dco context to use * @param nl_msg the message to use - * @param cb An optional callback if the caller expects an answer - * @param cb_arg An optional param to pass to the callback * @param prefix A prefix to report in the error message to give the user context * @return status of sending the message */ static int -ovpn_nl_msg_send(dco_context_t *dco, struct nl_msg *nl_msg, ovpn_nl_cb cb, - void *cb_arg, const char *prefix) +ovpn_nl_msg_send(dco_context_t *dco, struct nl_msg *nl_msg, const char *prefix) { dco->status = 1; - nl_cb_set(dco->nl_cb, NL_CB_VALID, NL_CB_CUSTOM, cb, cb_arg); nl_send_auto(dco->nl_sock, nl_msg); while (dco->status == 1) @@ -285,7 +281,7 @@ } nla_nest_end(nl_msg, attr); - ret = ovpn_nl_msg_send(dco, nl_msg, NULL, NULL, __func__); + ret = ovpn_nl_msg_send(dco, nl_msg, __func__); nla_put_failure: nlmsg_free(nl_msg); @@ -385,6 +381,29 @@ } static void +ovpn_dco_register(dco_context_t *dco) +{ + msg(D_DCO_DEBUG, __func__); + ovpn_get_mcast_id(dco); + + if (dco->ovpn_dco_mcast_id < 0) + { + msg(M_FATAL, "cannot get mcast group: %s", nl_geterror(dco->ovpn_dco_mcast_id)); + } + + /* Register for ovpn-dco specific multicast messages that the kernel may + * send + */ + int ret = nl_socket_add_membership(dco->nl_sock, dco->ovpn_dco_mcast_id); + if (ret) + { + msg(M_FATAL, "%s: failed to join groups: %d", __func__, ret); + } +} + +static int ovpn_handle_msg(struct nl_msg *msg, void *arg); + +static void ovpn_dco_init_netlink(dco_context_t *dco) { dco->ovpn_dco_id = resolve_ovpn_netlink_id(M_FATAL); @@ -420,11 +439,15 @@ nl_socket_set_cb(dco->nl_sock, dco->nl_cb); + dco->dco_message_peer_id = -1; nl_cb_err(dco->nl_cb, NL_CB_CUSTOM, ovpn_nl_cb_error, &dco->status); nl_cb_set(dco->nl_cb, NL_CB_FINISH, NL_CB_CUSTOM, ovpn_nl_cb_finish, &dco->status); nl_cb_set(dco->nl_cb, NL_CB_ACK, NL_CB_CUSTOM, ovpn_nl_cb_finish, &dco->status); + nl_cb_set(dco->nl_cb, NL_CB_VALID, NL_CB_CUSTOM, ovpn_handle_msg, dco); + + ovpn_dco_register(dco); /* The async PACKET messages confuse libnl and it will drop them with * wrong sequence numbers (NLE_SEQ_MISMATCH), so disable libnl's sequence @@ -476,27 +499,6 @@ CLEAR(dco); } -static void -ovpn_dco_register(dco_context_t *dco) -{ - msg(D_DCO_DEBUG, __func__); - ovpn_get_mcast_id(dco); - - if (dco->ovpn_dco_mcast_id < 0) - { - msg(M_FATAL, "cannot get mcast group: %s", nl_geterror(dco->ovpn_dco_mcast_id)); - } - - /* Register for ovpn-dco specific multicast messages that the kernel may - * send - */ - int ret = nl_socket_add_membership(dco->nl_sock, dco->ovpn_dco_mcast_id); - if (ret) - { - msg(M_FATAL, "%s: failed to join groups: %d", __func__, ret); - } -} - int open_tun_dco(struct tuntap *tt, openvpn_net_ctx_t *ctx, const char *dev) { @@ -516,10 +518,6 @@ msg(M_FATAL, "DCO: cannot retrieve ifindex for interface %s", dev); } - tt->dco.dco_message_peer_id = -1; - - ovpn_dco_register(&tt->dco); - return 0; } @@ -548,7 +546,7 @@ NLA_PUT_U32(nl_msg, OVPN_A_KEYCONF_PEER_ID, peerid); nla_nest_end(nl_msg, attr); - ret = ovpn_nl_msg_send(dco, nl_msg, NULL, NULL, __func__); + ret = ovpn_nl_msg_send(dco, nl_msg, __func__); nla_put_failure: nlmsg_free(nl_msg); @@ -572,7 +570,7 @@ NLA_PUT_U32(nl_msg, OVPN_A_PEER_ID, peerid); nla_nest_end(nl_msg, attr); - ret = ovpn_nl_msg_send(dco, nl_msg, NULL, NULL, __func__); + ret = ovpn_nl_msg_send(dco, nl_msg, __func__); nla_put_failure: nlmsg_free(nl_msg); @@ -598,7 +596,7 @@ NLA_PUT_U32(nl_msg, OVPN_A_KEYCONF_SLOT, slot); nla_nest_end(nl_msg, keyconf); - ret = ovpn_nl_msg_send(dco, nl_msg, NULL, NULL, __func__); + ret = ovpn_nl_msg_send(dco, nl_msg, __func__); nla_put_failure: nlmsg_free(nl_msg); @@ -657,7 +655,7 @@ nla_nest_end(nl_msg, key_conf); - ret = ovpn_nl_msg_send(dco, nl_msg, NULL, NULL, __func__); + ret = ovpn_nl_msg_send(dco, nl_msg, __func__); nla_put_failure: nlmsg_free(nl_msg); @@ -686,7 +684,7 @@ keepalive_timeout); nla_nest_end(nl_msg, attr); - ret = ovpn_nl_msg_send(dco, nl_msg, NULL, NULL, __func__); + ret = ovpn_nl_msg_send(dco, nl_msg, __func__); nla_put_failure: nlmsg_free(nl_msg); @@ -754,7 +752,7 @@ /* Even though 'nlctrl' is a constant, there seem to be no library * provided define for it */ - int ctrlid = genl_ctrl_resolve(dco->nl_sock, "nlctrl"); + dco->ctrlid = genl_ctrl_resolve(dco->nl_sock, "nlctrl"); struct nl_msg *nl_msg = nlmsg_alloc(); if (!nl_msg) @@ -762,12 +760,12 @@ return -ENOMEM; } - genlmsg_put(nl_msg, 0, 0, ctrlid, 0, 0, CTRL_CMD_GETFAMILY, 0); + genlmsg_put(nl_msg, 0, 0, dco->ctrlid, 0, 0, CTRL_CMD_GETFAMILY, 0); int ret = -EMSGSIZE; NLA_PUT_STRING(nl_msg, CTRL_ATTR_FAMILY_NAME, OVPN_FAMILY_NAME); - ret = ovpn_nl_msg_send(dco, nl_msg, mcast_family_handler, dco, __func__); + ret = ovpn_nl_msg_send(dco, nl_msg, __func__); nla_put_failure: nlmsg_free(nl_msg); @@ -879,31 +877,34 @@ } static int -dco_parse_peer_multi(struct nl_msg *msg, void *arg) +ovpn_handle_peer_multi(dco_context_t *dco, struct nlattr *attrs[]) { - struct nlattr *tb[OVPN_A_MAX + 1]; - struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg)); - msg(D_DCO_DEBUG, "%s: parsing message...", __func__); - nla_parse(tb, OVPN_A_MAX, genlmsg_attrdata(gnlh, 0), - genlmsg_attrlen(gnlh, 0), NULL); + /* this function assumes openvpn is running in multipeer mode as + * it accesses c->multi + */ + if (dco->ifmode != OVPN_MODE_MP) + { + msg(M_WARN, "%s: can't parse 'multi-peer' message on P2P instance", __func__); + return NL_SKIP; + } - if (!tb[OVPN_A_PEER]) + if (!attrs[OVPN_A_PEER]) { return NL_SKIP; } struct nlattr *tb_peer[OVPN_A_PEER_MAX + 1]; - nla_parse_nested(tb_peer, OVPN_A_PEER_MAX, tb[OVPN_A_PEER], NULL); + nla_parse_nested(tb_peer, OVPN_A_PEER_MAX, attrs[OVPN_A_PEER], NULL); if (!tb_peer[OVPN_A_PEER_ID]) { - msg(M_WARN, "%s: no peer-id provided in reply", __func__); + msg(M_WARN, "ovpn-dco: no peer-id provided in (MULTI) PEER_GET reply"); return NL_SKIP; } - struct multi_context *m = arg; + struct multi_context *m = dco->c->multi; uint32_t peer_id = nla_get_u32(tb_peer[OVPN_A_PEER_ID]); if (peer_id >= m->max_clients || !m->instances[peer_id]) @@ -919,39 +920,53 @@ } static int -dco_parse_peer(struct nl_msg *msg, void *arg) +ovpn_handle_peer(dco_context_t *dco, struct nlattr *attrs[]) { - struct context *c = arg; - struct nlattr *tb[OVPN_A_MAX + 1]; - struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg)); - msg(D_DCO_DEBUG, "%s: parsing message...", __func__); - nla_parse(tb, OVPN_A_MAX, genlmsg_attrdata(gnlh, 0), - genlmsg_attrlen(gnlh, 0), NULL); - - if (!tb[OVPN_A_PEER]) + if (!attrs[OVPN_A_PEER]) { msg(D_DCO_DEBUG, "%s: malformed reply", __func__); return NL_SKIP; } struct nlattr *tb_peer[OVPN_A_PEER_MAX + 1]; - nla_parse_nested(tb_peer, OVPN_A_PEER_MAX, tb[OVPN_A_PEER], NULL); + nla_parse_nested(tb_peer, OVPN_A_PEER_MAX, attrs[OVPN_A_PEER], NULL); if (!tb_peer[OVPN_A_PEER_ID]) { - msg(M_WARN, "%s: no peer-id provided in reply", __func__); + msg(M_WARN, "ovpn-dco: no peer-id provided in PEER_GET reply"); return NL_SKIP; } uint32_t peer_id = nla_get_u32(tb_peer[OVPN_A_PEER_ID]); - if (c->c2.tls_multi->dco_peer_id != peer_id) + struct context_2 *c2; + + if (dco->ifmode == OVPN_MODE_P2P) + { + c2 = &dco->c->c2; + } + else + { + struct multi_instance *mi = dco->c->multi->instances[peer_id]; + if (!mi) + { + msg(M_WARN, "%s: received data for a non-existing peer %u", __func__, peer_id); + return NL_SKIP; + } + + c2 = &mi->context.c2; + } + + /* at this point this check should never fail for MP mode, + * but it's still fully valid for P2P mode + */ + if (c2->tls_multi->dco_peer_id != peer_id) { return NL_SKIP; } - dco_update_peer_stat(&c->c2, tb_peer, peer_id); + dco_update_peer_stat(c2, tb_peer, peer_id); return NL_OK; } @@ -1120,9 +1135,22 @@ { dco_context_t *dco = arg; - struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg)); struct nlattr *attrs[OVPN_A_MAX + 1]; struct nlmsghdr *nlh = nlmsg_hdr(msg); + struct genlmsghdr *gnlh = genlmsg_hdr(nlh); + + msg(D_DCO_DEBUG, "ovpn-dco: received netlink message type=%u cmd=%u flags=%#.4x", + nlh->nlmsg_type, gnlh->cmd, nlh->nlmsg_flags); + + /* if we get a message from the NLCTRL family, it means + * this is the reply to the mcast ID resolution request + * and we parse it accordingly. + */ + if (nlh->nlmsg_type == dco->ctrlid) + { + msg(D_DCO_DEBUG, "ovpn-dco: received CTRLID message"); + return mcast_family_handler(msg, dco); + } if (!genlmsg_valid_hdr(nlh, 0)) { @@ -1146,6 +1174,21 @@ */ switch (gnlh->cmd) { + case OVPN_CMD_PEER_GET: + { + /* this message is part of a peer list dump, hence triggered + * by a MP/server instance + */ + if (nlh->nlmsg_flags & NLM_F_MULTI) + { + return ovpn_handle_peer_multi(dco, attrs); + } + else + { + return ovpn_handle_peer(dco, attrs); + } + } + case OVPN_CMD_PEER_DEL_NTF: { return ovpn_handle_peer_del_ntf(dco, attrs); @@ -1174,7 +1217,6 @@ dco_do_read(dco_context_t *dco) { msg(D_DCO_DEBUG, __func__); - nl_cb_set(dco->nl_cb, NL_CB_VALID, NL_CB_CUSTOM, ovpn_handle_msg, dco); return ovpn_nl_recvmsgs(dco, __func__); } @@ -1189,7 +1231,7 @@ nlmsg_hdr(nl_msg)->nlmsg_flags |= NLM_F_DUMP; - int ret = ovpn_nl_msg_send(dco, nl_msg, dco_parse_peer_multi, m, __func__); + int ret = ovpn_nl_msg_send(dco, nl_msg, __func__); nlmsg_free(nl_msg); @@ -1227,7 +1269,7 @@ NLA_PUT_U32(nl_msg, OVPN_A_PEER_ID, peer_id); nla_nest_end(nl_msg, attr); - ret = ovpn_nl_msg_send(dco, nl_msg, dco_parse_peer, c, __func__); + ret = ovpn_nl_msg_send(dco, nl_msg, __func__); nla_put_failure: nlmsg_free(nl_msg); diff --git a/src/openvpn/dco_linux.h b/src/openvpn/dco_linux.h index 5e61cf1..cc14f45 100644 --- a/src/openvpn/dco_linux.h +++ b/src/openvpn/dco_linux.h @@ -66,6 +66,7 @@ int status; struct context *c; + int ctrlid; enum ovpn_mode ifmode; -- To view, visit http://gerrit.openvpn.net/c/openvpn/+/1100?usp=email To unsubscribe, or for help writing mail filters, visit http://gerrit.openvpn.net/settings Gerrit-Project: openvpn Gerrit-Branch: master Gerrit-Change-Id: I23ad79e14844aefde9ece34dadef0b75ff267201 Gerrit-Change-Number: 1100 Gerrit-PatchSet: 6 Gerrit-Owner: ordex <an...@ma...> Gerrit-Reviewer: cron2 <ge...@gr...> Gerrit-Reviewer: flichtenheld <fr...@li...> Gerrit-Reviewer: plaisthos <arn...@rf...> Gerrit-CC: openvpn-devel <ope...@li...> Gerrit-MessageType: merged |