accel-ppp-users Mailing List for accel-ppp (Page 3)

Status: Beta

Brought to you by: xebd

accel-ppp-users — This mailing list is for all users and developers.

You can subscribe to this list here.

2012	Jan	Feb (2)	Mar (4)	Apr	May (3)	Jun (4)	Jul (9)	Aug (38)	Sep (24)	Oct (25)	Nov (2)	Dec (5)
2013	Jan (20)	Feb (64)	Mar (58)	Apr (17)	May	Jun (29)	Jul (16)	Aug	Sep (1)	Oct (8)	Nov (1)	Dec
2014	Jan (25)	Feb (11)	Mar (15)	Apr (33)	May (2)	Jun (23)	Jul (3)	Aug (11)	Sep (12)	Oct (37)	Nov (8)	Dec
2015	Jan (10)	Feb (4)	Mar (2)	Apr (4)	May	Jun (3)	Jul (3)	Aug	Sep	Oct	Nov	Dec (12)
2016	Jan	Feb (1)	Mar (3)	Apr (18)	May (21)	Jun (28)	Jul (11)	Aug	Sep	Oct (1)	Nov (21)	Dec
2017	Jan	Feb	Mar	Apr	May	Jun	Jul	Aug (1)	Sep	Oct	Nov	Dec
2018	Jan (4)	Feb (6)	Mar (7)	Apr	May	Jun (13)	Jul (1)	Aug	Sep (3)	Oct (50)	Nov (21)	Dec (16)

Flat | Threaded

<< < 1 2 3 4 5 .. 31 > >> (Page 3 of 31)

[Accel-ppp-users] [PATCH 04/16] core: make ap_net.h self-contained

From: Guillaume N. <g....@al...> - 2018-10-24 14:36:10

We need to include <sys/socket.h> to define 'socklen_t', <sys/types.h>
for 'ssize_t' and "list.h" for 'struct list_head'.

Also, let's include "libnetlink.h" so that we don't need a forward
declaration for 'struct rtnl_handle'.

Signed-off-by: Guillaume Nault <g....@al...>
---
 accel-pppd/include/ap_net.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/accel-pppd/include/ap_net.h b/accel-pppd/include/ap_net.h
index 25121d94..91ebdd5e 100644
--- a/accel-pppd/include/ap_net.h
+++ b/accel-pppd/include/ap_net.h
@@ -1,7 +1,11 @@
 #ifndef __AP_NET_H
 #define __AP_NET_H
 
-struct rtnl_handle;
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include "libnetlink.h"
+#include "list.h"
 
 struct ap_net {
 	struct list_head entry;
-- 
2.19.1

[Accel-ppp-users] [PATCH 03/16] connlimit: make connlimit.h self-contained

From: Guillaume N. <g....@al...> - 2018-10-24 14:36:10

We need to include <stdint.h> to define 'uint*_t' and <string.h> for
'memcpy'.

Signed-off-by: Guillaume Nault <g....@al...>
---
 accel-pppd/include/connlimit.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/accel-pppd/include/connlimit.h b/accel-pppd/include/connlimit.h
index 8711cf8b..2b2f0f4d 100644
--- a/accel-pppd/include/connlimit.h
+++ b/accel-pppd/include/connlimit.h
@@ -1,6 +1,9 @@
 #ifndef __CONNLIMIT_H
 #define __CONNLIMIT_H
 
+#include <stdint.h>
+#include <string.h>
+
 static inline uint64_t cl_key_from_ipv4(uint32_t ip)
 {
 	return ip;
-- 
2.19.1

[Accel-ppp-users] [PATCH 02/16] backup: make backup.h self-contained

From: Guillaume N. <g....@al...> - 2018-10-24 14:36:09

We need to include <sys/types.h> to define 'size_t'.

Also, let's include "ap_session.h" so that we don't need a forward
declaration for 'struct ap_session'.

Signed-off-by: Guillaume Nault <g....@al...>
---
 accel-pppd/backup/backup.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/accel-pppd/backup/backup.h b/accel-pppd/backup/backup.h
index d36f70bd..106303e2 100644
--- a/accel-pppd/backup/backup.h
+++ b/accel-pppd/backup/backup.h
@@ -2,6 +2,9 @@
 #define __BACKUP_H
 
 #include <stdint.h>
+#include <sys/types.h>
+
+#include "ap_session.h"
 #include "list.h"
 
 #define MODID_COMMON  1
@@ -12,8 +15,6 @@
 #define MODID_L2TP    6
 #define MODID_IPPOOL  7
 
-
-struct ap_session;
 struct backup_storage;
 struct backup_data;
 
-- 
2.19.1

[Accel-ppp-users] [PATCH 01/16] backup: make ap_session_backup.h self-contained

From: Guillaume N. <g....@al...> - 2018-10-24 14:36:03

We need to include <netinet/in.h> to define 'struct in6_addr' and
<stdint.h> for 'uint8_t'.

Signed-off-by: Guillaume Nault <g....@al...>
---
 accel-pppd/include/ap_session_backup.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/accel-pppd/include/ap_session_backup.h b/accel-pppd/include/ap_session_backup.h
index e6382ff5..9e175c85 100644
--- a/accel-pppd/include/ap_session_backup.h
+++ b/accel-pppd/include/ap_session_backup.h
@@ -1,6 +1,9 @@
 #ifndef __AP_SESSION_BACKUP_H
 #define __AP_SESSION_BACKUP_H
 
+#include <netinet/in.h>
+#include <stdint.h>
+
 #define SES_TAG_USERNAME           1
 #define SES_TAG_SESSIONID          2
 #define SES_TAG_START_TIME         3
-- 
2.19.1

[Accel-ppp-users] [PATCH 00/16] Fix public include files

From: Guillaume N. <g....@al...> - 2018-10-24 14:36:01

This patch series fixes header files found under include/ so that they
include their own dependencies. This way, one can include any file
from include/ without having to manually figure out the dependencies.

Each of the include/ header files has been tested for correct
compilation when included in an otherwise empty .c file.


Guillaume Nault (16):
  backup: make ap_session_backup.h self-contained
  backup: make backup.h self-contained
  connlimit: make connlimit.h self-contained
  core: make ap_net.h self-contained
  iputils: make iputils.h self-contained
  libnetlink: make ipset.h self-contained
  libnetlink: make libnetlink.h self-contained
  lua: make luasupp.h self-contained
  memdebug: make memdebug.h self-contained even when MEMDEBUG isn't
    defined
  ppp: make ppp_lcp.h self-contained
  ppp: make ppp_auth.h self-contained
  ppp: make ppp_fsm.h self-contained
  radius: make radius.h self-contained
  triton: make mempool.h self-contained even when MEMDEBUG is defined
  vlan_mon: make vlan_mon.h self-contained
  cli: fix include directive for list.h

 accel-pppd/backup/backup.h             | 5 +++--
 accel-pppd/cli/cli.h                   | 3 ++-
 accel-pppd/include/ap_net.h            | 6 +++++-
 accel-pppd/include/ap_session_backup.h | 3 +++
 accel-pppd/include/connlimit.h         | 3 +++
 accel-pppd/include/vlan_mon.h          | 2 ++
 accel-pppd/libnetlink/ipset.h          | 2 ++
 accel-pppd/libnetlink/iputils.h        | 2 ++
 accel-pppd/libnetlink/libnetlink.h     | 1 +
 accel-pppd/lua/luasupp.h               | 2 ++
 accel-pppd/memdebug.h                  | 5 +++++
 accel-pppd/ppp/ppp_auth.h              | 3 +++
 accel-pppd/ppp/ppp_fsm.h               | 7 +++++--
 accel-pppd/ppp/ppp_lcp.h               | 2 ++
 accel-pppd/radius/radius.h             | 6 ++++--
 accel-pppd/triton/mempool.h            | 2 ++
 16 files changed, 46 insertions(+), 8 deletions(-)

-- 
2.19.1

Re: [Accel-ppp-users] [PATCH] ipv6: fix IPv6 processing of sessions that only have only a link-local address

From: Dmitry K. <xe...@ma...> - 2018-10-23 07:27:48

>Several modules assume that if ses->ipv6 is set, then
>ses->ipv6->addr_list contains at least one element. But this is not
>true if ipv6 was allocated by the pseudo ipdb backend of ipv6cp
>(ipv6cp_opt_intfid.c). That is, if the PPP session only has an
>automatic link-local address.
>
>This leads modules like pppd-compat and dhcpv6 to access invalid memory
>when trying to retrieve the IPv6 address of a PPP session.
>
>Signed-off-by: Guillaume Nault < g....@al... >
>---
>This patch probably fixes github's issue #49
>https://github.com/xebd/accel-ppp/issues/49
>
>Also, we could alternatively remove the ipv6cp ipdb backend entirely.
>But there would be side effects. Currently, if no IPv6 is defined (by
>radius or ipv6db), we can still negotiate a link-local address, then
>define a routable IPv6 address statically with pppd_compat. Without the
>pseudo ipdb backend, accel-ppp would reject IPv6CP entirely in such
>cases.
>
> accel-pppd/cli/show_sessions.c | 2 +-
> accel-pppd/extra/pppd_compat.c | 2 +-
> accel-pppd/ipv6/dhcpv6.c       | 2 +-
> accel-pppd/lua/session.c       | 2 +-
> 4 files changed, 4 insertions(+), 4 deletions(-)

Applied

-- 
Dmitry Kozlov

Re: [Accel-ppp-users] [PATCH] cli: add network namespace column in "show sessions"

From: Dmitry K. <xe...@ma...> - 2018-10-23 06:46:17

>Define a new column, called "netns", that prints the network namespace
>in which sessions are set.
>
>Signed-off-by: Guillaume Nault < g....@al... >
>---
> accel-pppd/cli/show_sessions.c | 6 ++++++
> 1 file changed, 6 insertions(+)

Applied, thanks.

-- 
Dmitry Kozlov

Re: [Accel-ppp-users] [PATCH] ipv6: fix IPv6 processing of sessions that only have only a link-local address

From: Guillaume N. <g....@al...> - 2018-10-22 10:29:41

On Sat, Oct 20, 2018 at 10:55:54AM +0300, Dmitry Kozlov wrote:
> >On Tue, Oct 09, 2018 at 07:45:48PM +0200, Guillaume Nault wrote:
> >> On Fri, Sep 21, 2018 at 02:16:17PM +0200, Guillaume Nault wrote:
> >> > Several modules assume that if ses->ipv6 is set, then
> >> > ses->ipv6->addr_list contains at least one element. But this is not
> >> > true if ipv6 was allocated by the pseudo ipdb backend of ipv6cp
> >> > (ipv6cp_opt_intfid.c). That is, if the PPP session only has an
> >> > automatic link-local address.
> >> > 
> >> I had no news for this patch. Is there anything wrong with it?
> >> 
> >BTW, this patch probably fixes github's issue #49:
> >https://github.com/xebd/accel-ppp/issues/49
> >
> >Guillaume
> 
> didn't get this patch too
> 
Ok, I've just resent it, together with another patch for which I had no
news (netns column in "show sessions").

[Accel-ppp-users] [PATCH] cli: add network namespace column in "show sessions"

From: Guillaume N. <g....@al...> - 2018-10-22 10:25:23

Define a new column, called "netns", that prints the network namespace
in which sessions are set.

Signed-off-by: Guillaume Nault <g....@al...>
---
 accel-pppd/cli/show_sessions.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/accel-pppd/cli/show_sessions.c b/accel-pppd/cli/show_sessions.c
index 443b186a..a96a31d8 100644
--- a/accel-pppd/cli/show_sessions.c
+++ b/accel-pppd/cli/show_sessions.c
@@ -379,6 +379,11 @@ early_out:
 	goto out;
 }
 
+static void print_netns(struct ap_session *ses, char *buf)
+{
+	snprintf(buf, CELL_SIZE, "%s", ses->net->name);
+}
+
 static void print_ifname(struct ap_session *ses, char *buf)
 {
 	snprintf(buf, CELL_SIZE, "%s", ses->ifname);
@@ -633,6 +638,7 @@ static void init(void)
 
 	cli_register_simple_cmd2(show_ses_exec, show_ses_help, 2, "show", "sessions");
 
+	cli_show_ses_register("netns", "network namespace name", print_netns);
 	cli_show_ses_register("ifname", "interface name", print_ifname);
 	cli_show_ses_register("username", "user name", print_username);
 	cli_show_ses_register("ip", "IP address", print_ip);
-- 
2.19.1

[Accel-ppp-users] [PATCH] ipv6: fix IPv6 processing of sessions that only have only a link-local address

From: Guillaume N. <g....@al...> - 2018-10-22 10:24:17

Several modules assume that if ses->ipv6 is set, then
ses->ipv6->addr_list contains at least one element. But this is not
true if ipv6 was allocated by the pseudo ipdb backend of ipv6cp
(ipv6cp_opt_intfid.c). That is, if the PPP session only has an
automatic link-local address.

This leads modules like pppd-compat and dhcpv6 to access invalid memory
when trying to retrieve the IPv6 address of a PPP session.

Signed-off-by: Guillaume Nault <g....@al...>
---
This patch probably fixes github's issue #49
https://github.com/xebd/accel-ppp/issues/49

Also, we could alternatively remove the ipv6cp ipdb backend entirely.
But there would be side effects. Currently, if no IPv6 is defined (by
radius or ipv6db), we can still negotiate a link-local address, then
define a routable IPv6 address statically with pppd_compat. Without the
pseudo ipdb backend, accel-ppp would reject IPv6CP entirely in such
cases.

 accel-pppd/cli/show_sessions.c | 2 +-
 accel-pppd/extra/pppd_compat.c | 2 +-
 accel-pppd/ipv6/dhcpv6.c       | 2 +-
 accel-pppd/lua/session.c       | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/accel-pppd/cli/show_sessions.c b/accel-pppd/cli/show_sessions.c
index 443b186a..c725e05d 100644
--- a/accel-pppd/cli/show_sessions.c
+++ b/accel-pppd/cli/show_sessions.c
@@ -414,7 +414,7 @@ static void print_ip6(struct ap_session *ses, char *buf)
 	struct in6_addr addr;
 	char *ptr;
 
-	if (!ses->ipv6) {
+	if (!ses->ipv6 || list_empty(&ses->ipv6->addr_list)) {
 		*buf = 0;
 		return;
 	}
diff --git a/accel-pppd/extra/pppd_compat.c b/accel-pppd/extra/pppd_compat.c
index 318327e8..7c23eca0 100644
--- a/accel-pppd/extra/pppd_compat.c
+++ b/accel-pppd/extra/pppd_compat.c
@@ -622,7 +622,7 @@ static void fill_env(char **env, char *mem, struct pppd_compat_pd *pd)
 	mem += write_sz + 1;
 	++n;
 
-	if (ses->ipv6) {
+	if (ses->ipv6 && !list_empty(&ses->ipv6->addr_list)) {
 		///FIXME only first address is passed to env
 		struct ipv6db_addr_t *a = list_first_entry(&ses->ipv6->addr_list,
 							   typeof(*a), entry);
diff --git a/accel-pppd/ipv6/dhcpv6.c b/accel-pppd/ipv6/dhcpv6.c
index 84d36caa..36a5f15e 100644
--- a/accel-pppd/ipv6/dhcpv6.c
+++ b/accel-pppd/ipv6/dhcpv6.c
@@ -69,7 +69,7 @@ static void ev_ses_started(struct ap_session *ses)
 	int sock;
 	int f = 1;
 
-	if (!ses->ipv6)
+	if (!ses->ipv6 || list_empty(&ses->ipv6->addr_list))
 		return;
 
 	a = list_entry(ses->ipv6->addr_list.next, typeof(*a), entry);
diff --git a/accel-pppd/lua/session.c b/accel-pppd/lua/session.c
index 277b299f..bd98911b 100644
--- a/accel-pppd/lua/session.c
+++ b/accel-pppd/lua/session.c
@@ -197,7 +197,7 @@ static int session_ipv6(lua_State *L)
 	if (!ses)
 		return 0;
 
-	if (ses->ipv6) {
+	if (ses->ipv6 && !list_empty(&ses->ipv6->addr_list)) {
 		a = list_entry(ses->ipv6->addr_list.next, typeof(*a), entry);
 		if (a->prefix_len) {
 			build_ip6_addr(a, ses->ipv6->peer_intf_id, &addr);
-- 
2.19.1

Re: [Accel-ppp-users] [PATCH 0/2] pppd_compat: fixes fork() realted crashes and dead-locks

From: Guillaume N. <g....@al...> - 2018-10-22 10:13:39

On Sat, Oct 20, 2018 at 10:45:30AM +0300, Dmitry Kozlov wrote:
> >May I get some feedbacks for this series? We can discuss other solutions,
> >but I feel these issues are quite important to resolve.
> >
> >Regards,
> >
> >Guillaume
> 
> Hi,
> I didn't get these patches, please try to resend
> 
Ok, now resent (with the alternative fix too). Thanks Dmitry.

Re: [Accel-ppp-users] [PATCH 1/2] triton: fix context schedule/wakeup race

From: Guillaume N. <g....@al...> - 2018-10-22 10:09:08

On Mon, Oct 22, 2018 at 12:00:00PM +0200, Guillaume Nault wrote:
> The problem is that there is no synchronisation between the parent and
> its child. When under stress, the child may execute faster than its
> parent and the sigchld callback might run triton_context_wakeup()
> before the parent had time to call triton_context_schedule().
> 
> Then accel-ppp might crash because the triton thread might have reset
> ctx->thread to NULL, making triton_context_wakeup() write to invalid
> memory when trying to insert the context in ctx->thread->wakeup_list[].
> 
> Synchronising the parent and its child completion's callback would
> require cooperation from triton_context_schedule(). Otherwise we would
> still have a time frame between the moment we let the callback waking
> up the context and the moment we put the context in sleep mode.
> Allowing schedule/wakeup call inversion in triton looks simpler since
> it avoids modifying the current API.
> 
Here's what this alternative approach would look like:
  Let triton_context_schedule() take a functionn pointer as parameter
  and call it just before jumping back to the triton_thread. Then we
  have a race-free way to unlock the sigchld module in pppd-compat.

-------- 8< --------
diff --git a/accel-pppd/extra/pppd_compat.c b/accel-pppd/extra/pppd_compat.c
index 318327e8..0d3ac031 100644
--- a/accel-pppd/extra/pppd_compat.c
+++ b/accel-pppd/extra/pppd_compat.c
@@ -252,9 +252,8 @@ static void ev_ses_pre_up(struct ap_session *ses)
 		sigchld_register_handler(&pd->hnd);
 		if (conf_verbose)
 			log_ppp_info2("pppd_compat: ip-pre-up started (pid %i)\n", pid);
-		sigchld_unlock();
 
-		triton_context_schedule();
+		triton_context_schedule_after(sigchld_unlock);
 
 		pthread_mutex_lock(&pd->hnd.lock);
 		pthread_mutex_unlock(&pd->hnd.lock);
@@ -368,9 +367,8 @@ static void ev_ses_finished(struct ap_session *ses)
 			sigchld_register_handler(&pd->hnd);
 			if (conf_verbose)
 				log_ppp_info2("pppd_compat: ip-down started (pid %i)\n", pid);
-			sigchld_unlock();
 
-			triton_context_schedule();
+			triton_context_schedule_after(sigchld_unlock);
 
 			pthread_mutex_lock(&pd->hnd.lock);
 			pthread_mutex_unlock(&pd->hnd.lock);
diff --git a/accel-pppd/triton/triton.c b/accel-pppd/triton/triton.c
index d5bd01d3..2e8fb4ad 100644
--- a/accel-pppd/triton/triton.c
+++ b/accel-pppd/triton/triton.c
@@ -486,7 +486,7 @@ static ucontext_t * __attribute__((noinline)) alloc_context()
 	return uc;
 }
 
-void __export triton_context_schedule()
+void __export triton_context_schedule_after(void (*func)(void))
 {
 	volatile struct _triton_context_t *ctx = (struct _triton_context_t *)this_ctx->tpd;
 
@@ -512,6 +512,8 @@ void __export triton_context_schedule()
 	} else {
 		ctx->thread->ctx = NULL;
 		spin_unlock(&threads_lock);
+		if (func)
+			func();
 		longjmp(jmp_env, 1);
 	}
 }
diff --git a/accel-pppd/triton/triton.h b/accel-pppd/triton/triton.h
index 79bbee1b..2b2bf460 100644
--- a/accel-pppd/triton/triton.h
+++ b/accel-pppd/triton/triton.h
@@ -74,12 +74,17 @@ extern struct triton_stat_t triton_stat;
 int triton_context_register(struct triton_context_t *, void *arg);
 void triton_context_unregister(struct triton_context_t *);
 void triton_context_set_priority(struct triton_context_t *, int);
-void triton_context_schedule(void);
+void triton_context_schedule_after(void (*func)(void));
 void triton_context_wakeup(struct triton_context_t *);
 int triton_context_call(struct triton_context_t *, void (*func)(void *), void *arg);
 void triton_cancel_call(struct triton_context_t *, void (*func)(void *));
 struct triton_context_t *triton_context_self(void);
 
+static inline void triton_context_schedule(void)
+{
+	triton_context_schedule_after(NULL);
+}
+
 #define MD_MODE_READ 1
 #define MD_MODE_WRITE 2

[Accel-ppp-users] [PATCH 2/2] pppd_compat: fix handling of fork() failures

From: Guillaume N. <g....@al...> - 2018-10-22 10:00:27

When accel-ppp is under stress (for example because of massive
disconnections) it may enter a state where no session could be
created or destroyed anymore.

This happens when at least one of the pppd_compat fork() fail. In this
case, the error code path doesn't unlock the sigchld handler, which
prevents it from running the completion callbacks of running scripts.
If the "fork-limit" option is used, failure to call the completion
callback will prevent other scripts from running. This will block
setting up and tearing down sessions, as those will wait indefinitely
for their pppd_compat scripts to run.

Therefore, we have to unlock the sigchld handler when fork() fails.
We also need to call fork_queue_wakeup(), because the previous
check_fork_limit() call already took one reference in the fork limit.

Finally, ev_ses_pre_up() is a bit special because it has to tear the
session down if the ip-pre-up script failed. Therefore it also has to
call ap_session_terminate() upon fork() failures.

Signed-off-by: Guillaume Nault <g....@al...>
---
 accel-pppd/extra/pppd_compat.c | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/accel-pppd/extra/pppd_compat.c b/accel-pppd/extra/pppd_compat.c
index 318327e8..cf18af4c 100644
--- a/accel-pppd/extra/pppd_compat.c
+++ b/accel-pppd/extra/pppd_compat.c
@@ -275,8 +275,12 @@ static void ev_ses_pre_up(struct ap_session *ses)
 
 		log_emerg("pppd_compat: exec '%s': %s\n", conf_ip_pre_up, strerror(errno));
 		_exit(EXIT_FAILURE);
-	} else
+	} else {
+		sigchld_unlock();
+		fork_queue_wakeup();
 		log_error("pppd_compat: fork: %s\n", strerror(errno));
+		ap_session_terminate(ses, TERM_NAS_ERROR, 0);
+	}
 }
 
 static void ev_ses_started(struct ap_session *ses)
@@ -325,8 +329,11 @@ static void ev_ses_started(struct ap_session *ses)
 
 		log_emerg("pppd_compat: exec '%s': %s\n", conf_ip_up, strerror(errno));
 		_exit(EXIT_FAILURE);
-	} else
+	} else {
+		sigchld_unlock();
+		fork_queue_wakeup();
 		log_error("pppd_compat: fork: %s\n", strerror(errno));
+	}
 }
 
 static void ev_ses_finished(struct ap_session *ses)
@@ -385,8 +392,11 @@ static void ev_ses_finished(struct ap_session *ses)
 
 			log_emerg("pppd_compat: exec '%s': %s\n", conf_ip_down, strerror(errno));
 			_exit(EXIT_FAILURE);
-		} else
+		} else {
+			sigchld_unlock();
+			fork_queue_wakeup();
 			log_error("pppd_compat: fork: %s\n", strerror(errno));
+		}
 	}
 
 	if (pd->ip_up_hnd.pid) {
@@ -474,8 +484,11 @@ static void ev_radius_coa(struct ev_radius_t *ev)
 
 			log_emerg("pppd_compat: exec '%s': %s\n", conf_ip_change, strerror(errno));
 			_exit(EXIT_FAILURE);
-		} else
+		} else {
+			sigchld_unlock();
+			fork_queue_wakeup();
 			log_error("pppd_compat: fork: %s\n", strerror(errno));
+		}
 	}
 }
 
-- 
2.19.1

[Accel-ppp-users] [PATCH 1/2] triton: fix context schedule/wakeup race

From: Guillaume N. <g....@al...> - 2018-10-22 10:00:18

Allow triton_context_wakeup() to run before triton_context_schedule().
When that happens, triton_context_schedule() now lets the context
running instead of putting it in sleep mode.

Note that, even though triton now allows triton_context_wakeup() to
happen before triton_context_schedule(), these two functions still need
to be paired and not nested. That is, in a sequence like the following,

triton_context_wakeup()
triton_context_wakeup()
triton_context_schedule()
triton_context_schedule()

the second triton_context_schedule() would put the context in sleep
mode. No matter how many triton_context_wakeup() have been called, the
first triton_context_schedule() "consumes" them all.

Being immune to schedule/wakeup inversion allows to fix the pppd_compat
module. This module needs to fork() to execute external programs. The
parent then waits for completion of its child using
triton_context_schedule(). When child terminates, the sigchld module
runs a callback that has to call triton_context_wakeup() to resume
execution of the parent.

The problem is that there is no synchronisation between the parent and
its child. When under stress, the child may execute faster than its
parent and the sigchld callback might run triton_context_wakeup()
before the parent had time to call triton_context_schedule().

Then accel-ppp might crash because the triton thread might have reset
ctx->thread to NULL, making triton_context_wakeup() write to invalid
memory when trying to insert the context in ctx->thread->wakeup_list[].

Synchronising the parent and its child completion's callback would
require cooperation from triton_context_schedule(). Otherwise we would
still have a time frame between the moment we let the callback waking
up the context and the moment we put the context in sleep mode.
Allowing schedule/wakeup call inversion in triton looks simpler since
it avoids modifying the current API.

Signed-off-by: Guillaume Nault <g....@al...>
---
 accel-pppd/triton/triton.c   | 18 +++++++++++++++---
 accel-pppd/triton/triton_p.h |  1 +
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/accel-pppd/triton/triton.c b/accel-pppd/triton/triton.c
index d5bd01d3..3ea72f02 100644
--- a/accel-pppd/triton/triton.c
+++ b/accel-pppd/triton/triton.c
@@ -125,7 +125,7 @@ static void* triton_thread(struct _triton_thread_t *thread)
 	while (1) {
 		spin_lock(&threads_lock);
 		if (!need_config_reload && check_ctx_queue_empty(thread)) {
-			if (thread->ctx->wakeup) {
+			if (thread->ctx->asleep && thread->ctx->wakeup) {
 				log_debug2("thread: %p: wakeup ctx %p\n", thread, thread->ctx);
 				list_del(&thread->ctx->entry2);
 				spin_unlock(&threads_lock);
@@ -503,6 +503,7 @@ void __export triton_context_schedule()
 
 	spin_lock(&threads_lock);
 	if (ctx->wakeup) {
+		ctx->asleep = 0;
 		ctx->wakeup = 0;
 		spin_unlock(&threads_lock);
 		_free(ctx->uc);
@@ -510,6 +511,7 @@ void __export triton_context_schedule()
 		__sync_sub_and_fetch(&triton_stat.context_sleeping, 1);
 		log_debug2("ctx %p: exit schedule\n", ctx);
 	} else {
+		ctx->asleep = 1;
 		ctx->thread->ctx = NULL;
 		spin_unlock(&threads_lock);
 		longjmp(jmp_env, 1);
@@ -532,9 +534,19 @@ void __export triton_context_wakeup(struct triton_context_t *ud)
 		spin_unlock(&ctx->lock);
 	} else {
 		spin_lock(&threads_lock);
+		/* In some cases (pppd_compat.c), triton_context_wakeup() might
+		 * be called before triton_context_schedule(). When that
+		 * happens, we must not add 'ctx' to the wakeup_list as it is
+		 * still awake. However we need to set the 'wakeup' flag. This
+		 * way, when triton_context_schedule() will run, it will
+		 * realise that triton_context_wakeup() was already executed
+		 * and will avoid putting 'ctx' in sleep mode.
+		 */
 		ctx->wakeup = 1;
-		list_add_tail(&ctx->entry2, &ctx->thread->wakeup_list[ctx->priority]);
-		r = ctx->thread->ctx == NULL;
+		if (ctx->asleep) {
+			list_add_tail(&ctx->entry2, &ctx->thread->wakeup_list[ctx->priority]);
+			r = ctx->thread->ctx == NULL;
+		}
 		spin_unlock(&threads_lock);
 	}
 
diff --git a/accel-pppd/triton/triton_p.h b/accel-pppd/triton/triton_p.h
index fe735bf6..a727e62e 100644
--- a/accel-pppd/triton/triton_p.h
+++ b/accel-pppd/triton/triton_p.h
@@ -40,6 +40,7 @@ struct _triton_context_t
 	int init;
 	int queued;
 	int wakeup;
+	int asleep;
 	int need_close;
 	int need_free;
 	int pending;
-- 
2.19.1

[Accel-ppp-users] [PATCH 0/2] pppd_compat: fixes fork() realted crashes and dead-locks

From: Guillaume N. <g....@al...> - 2018-10-22 10:00:15

This series fixes two problems that can happen when many pppd_compat
scripts need to run at the same moment.

Patch 1 is a bit tricky. If fixes a race condition that happens inside
triton when triton_context_wakeup() is run before the corresponding
triton_context_schedule(). As explained in the patch, pppd_compat has
no way to ensure proper ordering of these functions with the current
triton API. So this patch modifies triton to explicitly handle the
case where triton_context_wakeup() is called before
triton_context_schedule().

An alternative fix would be to extend triton_context_schedule() so that
it could run an arbitrary function and yield atomically. I will post
this alternative fix too, so that one can compare both approaches.

Patch 2 fixes a deadlock that happens when fork() fails. The failed
attempt was not removed from the fork counter, preventing other scripts
from being executed when the "fork-limit" option was set.


Guillaume Nault (2):
  triton: fix context schedule/wakeup race
  pppd_compat: fix handling of fork() failures

 accel-pppd/extra/pppd_compat.c | 21 +++++++++++++++++----
 accel-pppd/triton/triton.c     | 18 +++++++++++++++---
 accel-pppd/triton/triton_p.h   |  1 +
 3 files changed, 33 insertions(+), 7 deletions(-)

-- 
2.19.1

Re: [Accel-ppp-users] [PATCH] ipv6: fix IPv6 processing of sessions that only have only a link-local address

From: Dmitry K. <xe...@ma...> - 2018-10-20 07:56:07

>On Tue, Oct 09, 2018 at 07:45:48PM +0200, Guillaume Nault wrote:
>> On Fri, Sep 21, 2018 at 02:16:17PM +0200, Guillaume Nault wrote:
>> > Several modules assume that if ses->ipv6 is set, then
>> > ses->ipv6->addr_list contains at least one element. But this is not
>> > true if ipv6 was allocated by the pseudo ipdb backend of ipv6cp
>> > (ipv6cp_opt_intfid.c). That is, if the PPP session only has an
>> > automatic link-local address.
>> > 
>> I had no news for this patch. Is there anything wrong with it?
>> 
>BTW, this patch probably fixes github's issue #49:
>https://github.com/xebd/accel-ppp/issues/49
>
>Guillaume

didn't get this patch too

-- 
Dmitry Kozlov

Re: [Accel-ppp-users] [PATCH 0/2] pppd_compat: fixes fork() realted crashes and dead-locks

From: Dmitry K. <xe...@ma...> - 2018-10-20 07:45:41

>May I get some feedbacks for this series? We can discuss other solutions,
>but I feel these issues are quite important to resolve.
>
>Regards,
>
>Guillaume

Hi,
I didn't get these patches, please try to resend

-- 
Dmitry Kozlov

Re: [Accel-ppp-users] [PATCH] ipv6: fix IPv6 processing of sessions that only have only a link-local address

From: Guillaume N. <g....@al...> - 2018-10-19 09:49:53

On Tue, Oct 09, 2018 at 07:45:48PM +0200, Guillaume Nault wrote:
> On Fri, Sep 21, 2018 at 02:16:17PM +0200, Guillaume Nault wrote:
> > Several modules assume that if ses->ipv6 is set, then
> > ses->ipv6->addr_list contains at least one element. But this is not
> > true if ipv6 was allocated by the pseudo ipdb backend of ipv6cp
> > (ipv6cp_opt_intfid.c). That is, if the PPP session only has an
> > automatic link-local address.
> > 
> I had no news for this patch. Is there anything wrong with it?
> 
BTW, this patch probably fixes github's issue #49:
https://github.com/xebd/accel-ppp/issues/49

Guillaume

Re: [Accel-ppp-users] [PATCH 0/2] pppd_compat: fixes fork() realted crashes and dead-locks

From: Guillaume N. <g....@al...> - 2018-10-19 09:46:48

On Tue, Oct 09, 2018 at 06:58:29PM +0200, Guillaume Nault wrote:
> This series fixes two problems that can happen when many pppd_compat
> scripts need to run at the same moment.
> 
> 
> Patch 1 is a bit tricky. If fixes a race condition that happens inside
> triton when triton_context_wakeup() is run before the corresponding
> triton_context_schedule(). As explained in the patch, pppd_compat has
> no way to ensure proper ordering of these functions with the current
> triton API. So this patch modifies triton to explicitly handle the
> case where triton_context_wakeup() is called before
> triton_context_schedule().
> 
> An alternative fix would be to extend triton_context_schedule() so that
> it can run an arbitrary function and yield atomically. I will post this
> alternative fix too, so that one can compare both approaches.
> 
> 
> Patch 2 fixes a deadlock that happens when fork() fails. The failed
> attempt was not removed from the fork counter, preventing other scripts
> from being executed when the "fork-limit" option was set.
> 
Hi,

May I get some feedbacks for this series? We can discuss other solutions,
but I feel these issues are quite important to resolve.

Regards,

Guillaume

Re: [Accel-ppp-users] discarding message with invalid tid 0

From: Guillaume N. <g....@al...> - 2018-10-18 10:18:12

On Sun, Oct 14, 2018 at 10:18:42PM +0200, Alarig Le Lay wrote:
> On ven. 12 oct. 17:21:01 2018, Guillaume Nault wrote:
> > There are a few places where the bug might come from. Either the host
> > doesn't tell the driver that checksum isn't fully computed, or the
> > driver advertises checksum offload capabilities without actually
> > implementing it.
> > If you use virtualisation, the virtual NIC might advertise offload
> > support and rely on the physical NIC driver to actually perform the
> > computation. If the physical NIC doesn't offer this feature, the
> > virtual NIC should provide software fallback.
> > I suppose there is a bug somewhere in this chain.
> > 
> > So I imagine something changed in your setup wrt. checksum offload,
> > either on the LNS or the hypervisor.
> 
> The checksums seem to be enabled on both LNS:
> 
> judicael-adsl ~ # ethtool -k eth0 | grep tx-checksum                                                                                                                                                                                         
> tx-checksumming: on
>         tx-checksum-ipv4: off [fixed]
>         tx-checksum-ip-generic: on
>         tx-checksum-ipv6: off [fixed]
>         tx-checksum-fcoe-crc: off [fixed]
>         tx-checksum-sctp: off [fixed]
> 
> lns02 ~ # ethtool -k eth2 | grep tx-checksum
> tx-checksumming: on
>         tx-checksum-ipv4: off [fixed]
>         tx-checksum-ip-generic: on
>         tx-checksum-ipv6: off [fixed]
>         tx-checksum-fcoe-crc: off [fixed]
>         tx-checksum-sctp: off [fixed]
> 
> But, as soon as I re-enable checksumming on the BGP router, I see the
> same kind of messages in the logs.
> 
> root@bgp-adsl:~ # ifconfig vtnet2 rxcsum txcsum tso lro
> 
> lns02 ~ # tail -F /var/log/daemon.log
> […]
> Oct 14 22:00:42 lns02 accel-pppd: l2tp tunnel 347-46302 (78.41.184.80:1701): retransmission #1
> Oct 14 22:00:42 lns02 accel-pppd: l2tp tunnel 347-46302 (78.41.184.80:1701): retransmit (timeout) [L2TP tid=46302 sid=0 <Message-Type Hello>]
> Oct 14 22:00:44 lns02 accel-pppd: l2tp tunnel 347-46302 (78.41.184.80:1701): retransmission #2
> Oct 14 22:00:44 lns02 accel-pppd: l2tp tunnel 347-46302 (78.41.184.80:1701): retransmit (timeout) [L2TP tid=46302 sid=0 <Message-Type Hello>]
> Oct 14 22:00:48 lns02 accel-pppd: l2tp tunnel 347-46302 (78.41.184.80:1701): retransmission #3
> Oct 14 22:00:48 lns02 accel-pppd: l2tp tunnel 347-46302 (78.41.184.80:1701): retransmit (timeout) [L2TP tid=46302 sid=0 <Message-Type Hello>]
> Oct 14 22:00:56 lns02 accel-pppd: l2tp tunnel 347-46302 (78.41.184.80:1701): retransmission #4
> Oct 14 22:00:56 lns02 accel-pppd: l2tp tunnel 347-46302 (78.41.184.80:1701): retransmit (timeout) [L2TP tid=46302 sid=0 <Message-Type Hello>]
> Oct 14 22:01:12 lns02 accel-pppd: l2tp tunnel 347-46302 (78.41.184.80:1701): retransmission #5
> Oct 14 22:01:12 lns02 accel-pppd: l2tp tunnel 347-46302 (78.41.184.80:1701): retransmit (timeout) [L2TP tid=46302 sid=0 <Message-Type Hello>]
> Oct 14 22:01:28 lns02 accel-pppd: l2tp tunnel 347-46302 (78.41.184.80:1701): no acknowledgement from peer after 5 retransmissions, deleting tunnel
> Oct 14 22:01:28 lns02 accel-pppd: l2tp tunnel 347-46302 (78.41.184.80:1701): deleting tunnel
> Oct 14 22:01:28 lns02 accel-pppd: l2tp session 347-46302, 61625-32197: deleting session
> Oct 14 22:01:28 lns02 accel-pppd: l2tp session 347-46302, 61625-32197: deleting data channel
> Oct 14 22:01:28 lns02 accel-pppd: l2tp session 347-46302, 45186-40850: deleting session
> Oct 14 22:01:28 lns02 accel-pppd: l2tp session 347-46302, 45186-40850: deleting data channel
> Oct 14 22:01:28 lns02 accel-pppd: ppp0:qu...@gr...el: send [RADIUS(1) Accounting-Request id=1 <User-Name "qu...@gr...el"> <NAS-Identifier "accel-ppp"> <NAS-IP-Address 89.234.186.205> <NAS-Port 0> <NAS-Port-Id "ppp0"> <NAS-Port-Type Virtual> <Service-Type Framed-User> <Framed-Protocol PPP> <Calling-Station-Id "78.41.184.80"> <Called-Station-Id "89.234.186.12"> <Acct-Status-Type Stop> <Acct-Authentic RADIUS> <Acct-Session-Id "0dbb447ec6fa5cbf"> <Acct-Session-Time 73568> <Acct-Input-Octets 365708680> <Acct-Output-Octets 1036838156> <Acct-Input-Packets 2436312> <Acct-Output-Packets 4425180> <Acct-Input-Gigawords 0> <Acct-Output-Gigawords 1> <Framed-IP-Address 89.234.186.38> <Framed-Interface-Id 0:0:0:2> <Framed-IPv6-Prefix 2a00:5884:1100:3::/64> <Acct-Terminate-Cause NAS-Request>]
> Oct 14 22:01:28 lns02 accel-pppd: ppp0:qu...@gr...el: pppd_compat: ip-down started (pid 19050)
> Oct 14 22:01:28 lns02 accel-pppd: ppp1:un...@gr...el: send [RADIUS(1) Accounting-Request id=1 <User-Name "un...@gr...el"> <NAS-Identifier "accel-ppp"> <NAS-IP-Address 89.234.186.205> <NAS-Port 1> <NAS-Port-Id "ppp1"> <NAS-Port-Type Virtual> <Service-Type Framed-User> <Framed-Protocol PPP> <Calling-Station-Id "78.41.184.80"> <Called-Station-Id "89.234.186.12"> <Acct-Status-Type Stop> <Acct-Authentic RADIUS> <Acct-Session-Id "0dbb447ec6fa4c93"> <Acct-Session-Time 153647> <Acct-Input-Octets 391223201> <Acct-Output-Octets 3536240375> <Acct-Input-Packets 4975569> <Acct-Output-Packets 6891147> <Acct-Input-Gigawords 0> <Acct-Output-Gigawords 1> <Framed-IP-Address 89.234.186.39> <Framed-Interface-Id 6:6b84:6:6b7d> <Framed-IPv6-Prefix 2a00:5884:1100:7::/64> <Acct-Terminate-Cause NAS-Request>]
> Oct 14 22:01:28 lns02 accel-pppd: ppp1:un...@gr...el: pppd_compat: ip-down started (pid 19051)
> Oct 14 22:01:28 lns02 accel-pppd: ppp0:qu...@gr...el: pppd_compat: ip-down finished (0)
> Oct 14 22:01:28 lns02 accel-pppd: ppp0:: session destroyed
> Oct 14 22:01:28 lns02 accel-pppd: l2tp session 347-46302, 45186-40850: session destroyed
> Oct 14 22:01:28 lns02 accel-pppd: ppp1:un...@gr...el: pppd_compat: ip-down finished (0)
> Oct 14 22:01:28 lns02 accel-pppd: ppp1:: session destroyed
> Oct 14 22:01:28 lns02 accel-pppd: l2tp session 347-46302, 61625-32197: session destroyed
> Oct 14 22:01:28 lns02 accel-pppd: l2tp tunnel 347-46302 (78.41.184.80:1701): tunnel destroyed
> Oct 14 22:01:28 lns02 bird: KRT: Received route 2a00:5884:1107::/48 with strange next-hop fe80::6:6b84:6:6b7d
> Oct 14 22:01:28 lns02 accel-pppd: recv [RADIUS(1) Accounting-Response id=1]
> Oct 14 22:01:28 lns02 accel-pppd: recv [RADIUS(1) Accounting-Response id=1]
> Oct 14 22:01:41 lns02 accel-pppd: l2tp: discarding unexpected message from 78.41.184.80: invalid tid 347
> Oct 14 22:01:42 lns02 accel-pppd: l2tp: discarding unexpected message from 78.41.184.80: invalid tid 347
> 
> So, the two ADSL lines on this LNS at this moment went down, and the
> same kind of message appears (unless the tunnel number was already
> negotiated)
> 
Makes sense.

> The two hypervisors have the same kind of network cards
> 01:00.0 Ethernet controller: Broadcom Limited NetXtreme II BCM5709 Gigabit Ethernet (rev 20)
> or
> 02:00.0 Ethernet controller: Broadcom Limited NetXtreme II BCM5716 Gigabit Ethernet (rev 20)
> 
Do these physical interfaces have checksum offload activated?

> So for me, the problem is on the checksums, but is located on the FreeBSD IP stack (again…).
> 
> What I still don’t understand, is why xl2tpd isn’t affected, but I
> think it’s not the right place to discuss about it ;)
> 
Skimming through the xl2tp code, it seems that it disables UDP checksums.
That could be a workaround, but I'd be better find the root issue.

Re: [Accel-ppp-users] discarding message with invalid tid 0

From: Alarig Le L. <al...@sw...> - 2018-10-14 20:19:01

On ven. 12 oct. 17:21:01 2018, Guillaume Nault wrote:
> There are a few places where the bug might come from. Either the host
> doesn't tell the driver that checksum isn't fully computed, or the
> driver advertises checksum offload capabilities without actually
> implementing it.
> If you use virtualisation, the virtual NIC might advertise offload
> support and rely on the physical NIC driver to actually perform the
> computation. If the physical NIC doesn't offer this feature, the
> virtual NIC should provide software fallback.
> I suppose there is a bug somewhere in this chain.
> 
> So I imagine something changed in your setup wrt. checksum offload,
> either on the LNS or the hypervisor.

The checksums seem to be enabled on both LNS:

judicael-adsl ~ # ethtool -k eth0 | grep tx-checksum                                                                                                                                                                                         
tx-checksumming: on
        tx-checksum-ipv4: off [fixed]
        tx-checksum-ip-generic: on
        tx-checksum-ipv6: off [fixed]
        tx-checksum-fcoe-crc: off [fixed]
        tx-checksum-sctp: off [fixed]

lns02 ~ # ethtool -k eth2 | grep tx-checksum
tx-checksumming: on
        tx-checksum-ipv4: off [fixed]
        tx-checksum-ip-generic: on
        tx-checksum-ipv6: off [fixed]
        tx-checksum-fcoe-crc: off [fixed]
        tx-checksum-sctp: off [fixed]

But, as soon as I re-enable checksumming on the BGP router, I see the
same kind of messages in the logs.

root@bgp-adsl:~ # ifconfig vtnet2 rxcsum txcsum tso lro

lns02 ~ # tail -F /var/log/daemon.log
[…]
Oct 14 22:00:42 lns02 accel-pppd: l2tp tunnel 347-46302 (78.41.184.80:1701): retransmission #1
Oct 14 22:00:42 lns02 accel-pppd: l2tp tunnel 347-46302 (78.41.184.80:1701): retransmit (timeout) [L2TP tid=46302 sid=0 <Message-Type Hello>]
Oct 14 22:00:44 lns02 accel-pppd: l2tp tunnel 347-46302 (78.41.184.80:1701): retransmission #2
Oct 14 22:00:44 lns02 accel-pppd: l2tp tunnel 347-46302 (78.41.184.80:1701): retransmit (timeout) [L2TP tid=46302 sid=0 <Message-Type Hello>]
Oct 14 22:00:48 lns02 accel-pppd: l2tp tunnel 347-46302 (78.41.184.80:1701): retransmission #3
Oct 14 22:00:48 lns02 accel-pppd: l2tp tunnel 347-46302 (78.41.184.80:1701): retransmit (timeout) [L2TP tid=46302 sid=0 <Message-Type Hello>]
Oct 14 22:00:56 lns02 accel-pppd: l2tp tunnel 347-46302 (78.41.184.80:1701): retransmission #4
Oct 14 22:00:56 lns02 accel-pppd: l2tp tunnel 347-46302 (78.41.184.80:1701): retransmit (timeout) [L2TP tid=46302 sid=0 <Message-Type Hello>]
Oct 14 22:01:12 lns02 accel-pppd: l2tp tunnel 347-46302 (78.41.184.80:1701): retransmission #5
Oct 14 22:01:12 lns02 accel-pppd: l2tp tunnel 347-46302 (78.41.184.80:1701): retransmit (timeout) [L2TP tid=46302 sid=0 <Message-Type Hello>]
Oct 14 22:01:28 lns02 accel-pppd: l2tp tunnel 347-46302 (78.41.184.80:1701): no acknowledgement from peer after 5 retransmissions, deleting tunnel
Oct 14 22:01:28 lns02 accel-pppd: l2tp tunnel 347-46302 (78.41.184.80:1701): deleting tunnel
Oct 14 22:01:28 lns02 accel-pppd: l2tp session 347-46302, 61625-32197: deleting session
Oct 14 22:01:28 lns02 accel-pppd: l2tp session 347-46302, 61625-32197: deleting data channel
Oct 14 22:01:28 lns02 accel-pppd: l2tp session 347-46302, 45186-40850: deleting session
Oct 14 22:01:28 lns02 accel-pppd: l2tp session 347-46302, 45186-40850: deleting data channel
Oct 14 22:01:28 lns02 accel-pppd: ppp0:qu...@gr...el: send [RADIUS(1) Accounting-Request id=1 <User-Name "qu...@gr...el"> <NAS-Identifier "accel-ppp"> <NAS-IP-Address 89.234.186.205> <NAS-Port 0> <NAS-Port-Id "ppp0"> <NAS-Port-Type Virtual> <Service-Type Framed-User> <Framed-Protocol PPP> <Calling-Station-Id "78.41.184.80"> <Called-Station-Id "89.234.186.12"> <Acct-Status-Type Stop> <Acct-Authentic RADIUS> <Acct-Session-Id "0dbb447ec6fa5cbf"> <Acct-Session-Time 73568> <Acct-Input-Octets 365708680> <Acct-Output-Octets 1036838156> <Acct-Input-Packets 2436312> <Acct-Output-Packets 4425180> <Acct-Input-Gigawords 0> <Acct-Output-Gigawords 1> <Framed-IP-Address 89.234.186.38> <Framed-Interface-Id 0:0:0:2> <Framed-IPv6-Prefix 2a00:5884:1100:3::/64> <Acct-Terminate-Cause NAS-Request>]
Oct 14 22:01:28 lns02 accel-pppd: ppp0:qu...@gr...el: pppd_compat: ip-down started (pid 19050)
Oct 14 22:01:28 lns02 accel-pppd: ppp1:un...@gr...el: send [RADIUS(1) Accounting-Request id=1 <User-Name "un...@gr...el"> <NAS-Identifier "accel-ppp"> <NAS-IP-Address 89.234.186.205> <NAS-Port 1> <NAS-Port-Id "ppp1"> <NAS-Port-Type Virtual> <Service-Type Framed-User> <Framed-Protocol PPP> <Calling-Station-Id "78.41.184.80"> <Called-Station-Id "89.234.186.12"> <Acct-Status-Type Stop> <Acct-Authentic RADIUS> <Acct-Session-Id "0dbb447ec6fa4c93"> <Acct-Session-Time 153647> <Acct-Input-Octets 391223201> <Acct-Output-Octets 3536240375> <Acct-Input-Packets 4975569> <Acct-Output-Packets 6891147> <Acct-Input-Gigawords 0> <Acct-Output-Gigawords 1> <Framed-IP-Address 89.234.186.39> <Framed-Interface-Id 6:6b84:6:6b7d> <Framed-IPv6-Prefix 2a00:5884:1100:7::/64> <Acct-Terminate-Cause NAS-Request>]
Oct 14 22:01:28 lns02 accel-pppd: ppp1:un...@gr...el: pppd_compat: ip-down started (pid 19051)
Oct 14 22:01:28 lns02 accel-pppd: ppp0:qu...@gr...el: pppd_compat: ip-down finished (0)
Oct 14 22:01:28 lns02 accel-pppd: ppp0:: session destroyed
Oct 14 22:01:28 lns02 accel-pppd: l2tp session 347-46302, 45186-40850: session destroyed
Oct 14 22:01:28 lns02 accel-pppd: ppp1:un...@gr...el: pppd_compat: ip-down finished (0)
Oct 14 22:01:28 lns02 accel-pppd: ppp1:: session destroyed
Oct 14 22:01:28 lns02 accel-pppd: l2tp session 347-46302, 61625-32197: session destroyed
Oct 14 22:01:28 lns02 accel-pppd: l2tp tunnel 347-46302 (78.41.184.80:1701): tunnel destroyed
Oct 14 22:01:28 lns02 bird: KRT: Received route 2a00:5884:1107::/48 with strange next-hop fe80::6:6b84:6:6b7d
Oct 14 22:01:28 lns02 accel-pppd: recv [RADIUS(1) Accounting-Response id=1]
Oct 14 22:01:28 lns02 accel-pppd: recv [RADIUS(1) Accounting-Response id=1]
Oct 14 22:01:41 lns02 accel-pppd: l2tp: discarding unexpected message from 78.41.184.80: invalid tid 347
Oct 14 22:01:42 lns02 accel-pppd: l2tp: discarding unexpected message from 78.41.184.80: invalid tid 347

So, the two ADSL lines on this LNS at this moment went down, and the
same kind of message appears (unless the tunnel number was already
negotiated)

The two hypervisors have the same kind of network cards
01:00.0 Ethernet controller: Broadcom Limited NetXtreme II BCM5709 Gigabit Ethernet (rev 20)
or
02:00.0 Ethernet controller: Broadcom Limited NetXtreme II BCM5716 Gigabit Ethernet (rev 20)

So for me, the problem is on the checksums, but is located on the FreeBSD IP stack (again…).

What I still don’t understand, is why xl2tpd isn’t affected, but I
think it’s not the right place to discuss about it ;)

Cheers,
-- 
Alarig

Re: [Accel-ppp-users] discarding message with invalid tid 0

From: Guillaume N. <g....@al...> - 2018-10-12 15:21:14

On Sat, Oct 06, 2018 at 10:33:18AM +0200, Alarig Le Lay wrote:
> On ven.  5 oct. 17:41:12 2018, Guillaume Nault wrote:
> > Which kernel version is your Gentoo? Could you send a pcap of a
> > successful tunnel establishment (one from the Gentoo and one from the
> > FreeBSD, as you did in your previous message)?
> 
> My Gentoo runs 4.14.65.
> The offload on it is currently configured as following:
> lns02 ~ # ethtool -k eth2 | grep offload
> tcp-segmentation-offload: on
> udp-fragmentation-offload: off
> generic-segmentation-offload: on
> generic-receive-offload: on
> large-receive-offload: off [fixed]
> rx-vlan-offload: off [fixed]
> tx-vlan-offload: off [fixed]
> l2-fwd-offload: off [fixed]
> hw-tc-offload: off [fixed]
> esp-hw-offload: off [fixed]
> esp-tx-csum-hw-offload: off [fixed]
> rx-udp_tunnel-port-offload: off [fixed]
> 
> The pcap for the Gentoo (LNS) and the FreeBSD (BGP) are here:
> https://bulbizarre.swordarmor.fr/garbage/documents/l2tp-bgp-up.pcap
> https://bulbizarre.swordarmor.fr/garbage/documents/l2tp-lns-up.pcap
> 
Ok, so I have taken a look at these network captures. Sorry for the
delay.

In the pcap taken on the LNS, the outer UDP checksum is partial. That
is, it is filled with the pseudo-header's sum only, instead of the full
checksum covering the whole data. That means you have checksum offload
activated on this host. The driver is supposed to complete the checksum
later, before it sends the packet on the wire. Your first LNS network
trace exhibited the same behaviour (which is perfectly normal as
tcpdump captures packets before the driver computes the checksum).
So nothing changed on this side.

However, the pcap files taken on the router differ. On the new pcap,
the checksum seen on the router is correct. That is, the partial
checksum has been properly completed before the packet was sent.
But on the original pcap, the router did see the same partial checksum
as seen on the LNS capture. So the driver sent it as-is, without
finalising the computation.

The problem is probably not related to accel-ppp or l2tp at all. For
sending data on the control channel, a plain UDP socket is used.
Therefore I believe that any UDP (and probably TCP) packet sent by this
host in the original setup would have invalid checksum.

There are a few places where the bug might come from. Either the host
doesn't tell the driver that checksum isn't fully computed, or the
driver advertises checksum offload capabilities without actually
implementing it.
If you use virtualisation, the virtual NIC might advertise offload
support and rely on the physical NIC driver to actually perform the
computation. If the physical NIC doesn't offer this feature, the
virtual NIC should provide software fallback.
I suppose there is a bug somewhere in this chain.

So I imagine something changed in your setup wrt. checksum offload,
either on the LNS or the hypervisor.

> > I guess something is still wrong. It would be strange if it was the
> > router that was at fault.
> > My guess is that, by disabling offloads, the router stopped checking
> > the UDP checksum of transit packets. If the LAC does not verify UDP
> > checksums, then invalid checksums don't prevent the LAC and LNS from
> > communicating. If you capture trafic on the router, do you still see
> > invalid checksums for packets originated by the LNS? And correct
> > checksums for packets sent by the other LNS?
> 
> On the other LNS, I see [no chksum] for the L2TP packets, but [udp sum
> ok] for the packets inside the L2TP.
>From what I can see from the pcap, your provider disables checksums of
L2TP data packets (outer header). This is common behaviour. Original
packets are sent as-is, so the inner headers and checksums aren't
modified.

Guillaume

[Accel-ppp-users] [PATCH] cli: add network namespace column in "show sessions"

From: Guillaume N. <g....@al...> - 2018-10-12 11:10:27

Define a new column, called "netns", that prints the network namespace
in which sessions are set.

Signed-off-by: Guillaume Nault <g....@al...>
---
 accel-pppd/cli/show_sessions.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/accel-pppd/cli/show_sessions.c b/accel-pppd/cli/show_sessions.c
index 443b186a..a96a31d8 100644
--- a/accel-pppd/cli/show_sessions.c
+++ b/accel-pppd/cli/show_sessions.c
@@ -379,6 +379,11 @@ early_out:
 	goto out;
 }
 
+static void print_netns(struct ap_session *ses, char *buf)
+{
+	snprintf(buf, CELL_SIZE, "%s", ses->net->name);
+}
+
 static void print_ifname(struct ap_session *ses, char *buf)
 {
 	snprintf(buf, CELL_SIZE, "%s", ses->ifname);
@@ -633,6 +638,7 @@ static void init(void)
 
 	cli_register_simple_cmd2(show_ses_exec, show_ses_help, 2, "show", "sessions");
 
+	cli_show_ses_register("netns", "network namespace name", print_netns);
 	cli_show_ses_register("ifname", "interface name", print_ifname);
 	cli_show_ses_register("username", "user name", print_username);
 	cli_show_ses_register("ip", "IP address", print_ip);
-- 
2.19.1

Re: [Accel-ppp-users] [PATCH] ipv6: fix IPv6 processing of sessions that only have only a link-local address

From: Guillaume N. <g....@al...> - 2018-10-09 17:45:57

On Fri, Sep 21, 2018 at 02:16:17PM +0200, Guillaume Nault wrote:
> Several modules assume that if ses->ipv6 is set, then
> ses->ipv6->addr_list contains at least one element. But this is not
> true if ipv6 was allocated by the pseudo ipdb backend of ipv6cp
> (ipv6cp_opt_intfid.c). That is, if the PPP session only has an
> automatic link-local address.
> 
I had no news for this patch. Is there anything wrong with it?

Re: [Accel-ppp-users] [PATCH 1/2] triton: fix context schedule/wakeup race

From: Guillaume N. <g....@al...> - 2018-10-09 17:14:18

On Tue, Oct 09, 2018 at 06:58:31PM +0200, Guillaume Nault wrote:
> The problem is that there is no synchronisation between the parent and
> its child. When under stress, the child may execute faster than its
> parent and the sigchld callback might run triton_context_wakeup()
> before the parent had time to call triton_context_schedule().
> 

As an alternative fix, we could let triton_context_schedule() run a
caller-provided function right before jumping back to the triton_thread.
This way, the parent could unlock the sigchld module and yield in a race-free
way. Then the child couldn't possibly run between both operations.

Here's how it would look like. As per my tests both approaches work
equally well. If the triton_context_schedule_after() approach is
preferred, then I can re-submit the whole series.

-------- 8< --------
diff --git a/accel-pppd/extra/pppd_compat.c b/accel-pppd/extra/pppd_compat.c
index 318327e8..0d3ac031 100644
--- a/accel-pppd/extra/pppd_compat.c
+++ b/accel-pppd/extra/pppd_compat.c
@@ -252,9 +252,8 @@ static void ev_ses_pre_up(struct ap_session *ses)
                sigchld_register_handler(&pd->hnd);
                if (conf_verbose)
                        log_ppp_info2("pppd_compat: ip-pre-up started (pid %i)\n", pid);
-               sigchld_unlock();

-               triton_context_schedule();
+               triton_context_schedule_after(sigchld_unlock);

                pthread_mutex_lock(&pd->hnd.lock);
                pthread_mutex_unlock(&pd->hnd.lock);
@@ -368,9 +367,8 @@ static void ev_ses_finished(struct ap_session *ses)
                        sigchld_register_handler(&pd->hnd);
                        if (conf_verbose)
                                log_ppp_info2("pppd_compat: ip-down started (pid %i)\n", pid);
-                       sigchld_unlock();

-                       triton_context_schedule();
+                       triton_context_schedule_after(sigchld_unlock);

                        pthread_mutex_lock(&pd->hnd.lock);
                        pthread_mutex_unlock(&pd->hnd.lock);
diff --git a/accel-pppd/triton/triton.c b/accel-pppd/triton/triton.c
index d5bd01d3..2e8fb4ad 100644
--- a/accel-pppd/triton/triton.c
+++ b/accel-pppd/triton/triton.c
@@ -486,7 +486,7 @@ static ucontext_t * __attribute__((noinline)) alloc_context()
        return uc;
 }

-void __export triton_context_schedule()
+void __export triton_context_schedule_after(void (*func)(void))
 {
        volatile struct _triton_context_t *ctx = (struct _triton_context_t *)this_ctx->tpd;

@@ -512,6 +512,8 @@ void __export triton_context_schedule()
        } else {
                ctx->thread->ctx = NULL;
                spin_unlock(&threads_lock);
+               if (func)
+                       func();
                longjmp(jmp_env, 1);
        }
 }
diff --git a/accel-pppd/triton/triton.h b/accel-pppd/triton/triton.h
index 79bbee1b..2b2bf460 100644
--- a/accel-pppd/triton/triton.h
+++ b/accel-pppd/triton/triton.h
@@ -74,12 +74,17 @@ extern struct triton_stat_t triton_stat;
 int triton_context_register(struct triton_context_t *, void *arg);
 void triton_context_unregister(struct triton_context_t *);
 void triton_context_set_priority(struct triton_context_t *, int);
-void triton_context_schedule(void);
+void triton_context_schedule_after(void (*func)(void));
 void triton_context_wakeup(struct triton_context_t *);
 int triton_context_call(struct triton_context_t *, void (*func)(void *), void *arg);
 void triton_cancel_call(struct triton_context_t *, void (*func)(void *));
 struct triton_context_t *triton_context_self(void);

+static inline void triton_context_schedule(void)
+{
+       triton_context_schedule_after(NULL);
+}
+
 #define MD_MODE_READ 1
 #define MD_MODE_WRITE 2

Flat | Threaded

<< < 1 2 3 4 5 .. 31 > >> (Page 3 of 31)