From Kerrighed

Contents

Centralizing (Kernel) Logs

This page intend to help you through the process of centralizing your nodes logs. Several advantages :

* You can disable "local" logging that takes places inside the NFSROOT, leading to inconsistencies (all nodes writes to the same files) ;
* You can centralize logs and have a unified interface to browse them ;
* You can use this to retrieve kernel stacktrace, which is our main objective.

We will achieve our goal by following several steps :

  1. Patch the kernel ;
  2. Install a central Syslog server ;
  3. Configure nodes to forward their logs to the syslog server ;
  4. Enable and automatically load the NetConsole kernel module ;

Considerations

This has been written from a already running Debian-based system. You might need to adapt some path and/or tools to your distribution.

The Syslog server and NFS server are common in my setup. It's not needed. All syslog trafic is udp/6666. You might change this according to your setup.

Syslog/NFS server is identified as 192.168.5.1.

This setup makes use of syslog-ng.

Patching the kernel

If you use the plain old style netconsole module, you might end up with some logs, but hardly usable. It's exposed in this LKML thread. Side effect is that it's hard to use kdb and copy/paste data from logs. You can, but you'll loose a considerable amount of time at just reformatting the output. Jeff Moyer proposed a patch in the same thread, that allows us to have better output ;)

Go to the kernel/ directory of kerrighed sources, then issue :

# wget -O - http://lkml.org/lkml/diff/2004/6/25/143/1|patch -p1

This should patch the kernel and add the 'do_syslog' parameter. That's all you need. This patchs applies correctly on the 2.6.20 kernel, used by Kerrighed 2.4.0.

For instance, in case LKML is down, here is the patch, working for Kerrighed 2.4 :

--- linux-2.6.6/drivers/net/netconsole.c	2004-06-21 14:06:34.000000000 -0400
+++ linux-2.6.6-netdump/drivers/net/netconsole.c	2004-06-25 13:51:54.000000000 -0400
@@ -54,6 +54,10 @@ static char config[256];
 module_param_string(netconsole, config, 256, 0);
 MODULE_PARM_DESC(netconsole, " netconsole=[src-port]@[src-ip]/[dev],[tgt-port]@<tgt-ip>/[tgt-macaddr]\n");
 
+static int do_syslog;
+module_param(do_syslog, bool, 000);
+MODULE_PARM_DESC(do_syslog, " do_syslog=<yes|no>\n");
+
 static struct netpoll np = {
 	.name = "netconsole",
 	.dev_name = "eth0",
@@ -64,10 +68,36 @@ static struct netpoll np = {
 static int configured = 0;
 
 #define MAX_PRINT_CHUNK 1000
+#define SYSLOG_HEADER_LEN 4
+
+static int syslog_chars = SYSLOG_HEADER_LEN;
+static unsigned char syslog_line [MAX_PRINT_CHUNK + 10] = {
+	'<',
+	'5',
+	'>',
+	' ',
+	[4 ... MAX_PRINT_CHUNK+5] = '\0',
+};
+
+/*
+ * We feed kernel messages char by char, and send the UDP packet
+ * one linefeed. We buffer all characters received.
+ */
+static inline void feed_syslog_char(const unsigned char c)
+{
+	if (syslog_chars == MAX_PRINT_CHUNK)
+		syslog_chars--;
+	syslog_line[syslog_chars] = c;
+	syslog_chars++;
+	if (c == '\n') {
+		netpoll_send_udp(&np, syslog_line, syslog_chars);
+		syslog_chars = SYSLOG_HEADER_LEN;
+	}
+}
 
 static void write_msg(struct console *con, const char *msg, unsigned int len)
 {
-	int frag, left;
+	int frag, left, i;
 	unsigned long flags;
 
 	if (!np.dev)
@@ -75,11 +105,16 @@ static void write_msg(struct console *co
 
 	local_irq_save(flags);
 
-	for(left = len; left; ) {
-		frag = min(left, MAX_PRINT_CHUNK);
-		netpoll_send_udp(&np, msg, frag);
-		msg += frag;
-		left -= frag;
+	if (do_syslog) {
+		for (i = 0; i < len; i++)
+			feed_syslog_char(msg[i]);
+	} else {
+		for(left = len; left; ) {
+			frag = min(left, MAX_PRINT_CHUNK);
+			netpoll_send_udp(&np, msg, frag);
+			msg += frag;
+			left -= frag;
+		}
 	}
 
 	local_irq_restore(flags);

Here is an upgrade of the patch for Kerrighed on Linux 2.6.30 :

From 8ddcad8c7db7b89c6b6289ca4c6fe44072939eeb Mon Sep 17 00:00:00 2001
From: Alexandre Lissy <alexandre.lissy@etu.univ-tours.fr>
Date: Mon, 29 Mar 2010 18:55:58 +0200
Subject: [PATCH] Applying the di_syslog patch to netconsole module.

This patch is coming from LKML post:
http://lkml.org/lkml/diff/2004/6/25/143/1

The main idea is to provide a way to correctly format netconsole
output for syslog.

Signed-off-by: Alexandre Lissy <alexandre.lissy@etu.univ-tours.fr>
---
 drivers/net/netconsole.c |   50 +++++++++++++++++++++++++++++++++++++++------
 1 files changed, 43 insertions(+), 7 deletions(-)

diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c
index bf4af52..bfc9f1c 100644
--- a/drivers/net/netconsole.c
+++ b/drivers/net/netconsole.c
@@ -50,11 +50,25 @@ MODULE_LICENSE("GPL");
 
 #define MAX_PARAM_LENGTH	256
 #define MAX_PRINT_CHUNK		1000
+#define SYSLOG_HEADER_LEN 4
+
+static int syslog_chars = SYSLOG_HEADER_LEN;
+static unsigned char syslog_line [MAX_PRINT_CHUNK + 10] = {
+	'<',
+	'5',
+	'>',
+	' ',
+	[4 ... MAX_PRINT_CHUNK+5] = '\0',
+};
 
 static char config[MAX_PARAM_LENGTH];
 module_param_string(netconsole, config, MAX_PARAM_LENGTH, 0);
 MODULE_PARM_DESC(netconsole, " netconsole=[src-port]@[src-ip]/[dev],[tgt-port]@<tgt-ip>/[tgt-macaddr]");
 
+static int do_syslog;
+module_param(do_syslog, bool, 000);
+MODULE_PARM_DESC(do_syslog, " do_syslog=<yes|no>\n");
+
 #ifndef	MODULE
 static int __init option_setup(char *opt)
 {
@@ -64,6 +78,23 @@ static int __init option_setup(char *opt)
 __setup("netconsole=", option_setup);
 #endif	/* MODULE */
 
+/*
+ * We feed kernel messages char by char, and send the UDP packet
+ * one linefeed. We buffer all characters received.
+ */
+static inline void feed_syslog_char(struct netpoll *np, const unsigned char c)
+{
+	if (syslog_chars == MAX_PRINT_CHUNK)
+		syslog_chars--;
+
+	syslog_line[syslog_chars] = c;
+	syslog_chars++;
+	if (c == '\n') {
+		netpoll_send_udp(np, syslog_line, syslog_chars);
+		syslog_chars = SYSLOG_HEADER_LEN;
+	}
+}
+
 /* Linked list of all configured targets */
 static LIST_HEAD(target_list);
 
@@ -700,7 +731,7 @@ static struct notifier_block netconsole_netdev_notifier = {
 
 static void write_msg(struct console *con, const char *msg, unsigned int len)
 {
-	int frag, left;
+	int frag, left, i;
 	unsigned long flags;
 	struct netconsole_target *nt;
 	const char *tmp;
@@ -719,12 +750,17 @@ static void write_msg(struct console *con, const char *msg, unsigned int len)
 			 * at least one target if we die inside here, instead
 			 * of unnecessarily keeping all targets in lock-step.
 			 */
-			tmp = msg;
-			for (left = len; left;) {
-				frag = min(left, MAX_PRINT_CHUNK);
-				netpoll_send_udp(&nt->np, tmp, frag);
-				tmp += frag;
-				left -= frag;
+			if (do_syslog) {
+				for (i = 0; i < len; i++)
+					feed_syslog_char(&nt->np, msg[i]);
+			} else {
+				tmp = msg;
+				for (left = len; left;) {
+					frag = min(left, MAX_PRINT_CHUNK);
+					netpoll_send_udp(&nt->np, tmp, frag);
+					tmp += frag;
+					left -= frag;
+				}
 			}
 		}
 		netconsole_target_put(nt);
-- 
1.7.0.3

Installing a central Syslog Server

As stated, this part deals with the installation and configuration of Syslog-ng to have a centralized syslog server to collect logs from nodes.

Install Syslog-NG

First, install syslog-ng on your Syslog server. This can be achieved under debian by issuing :

# aptitude install syslog-ng

Configure the Syslog-NG Server

Now, we have to configure the server to handle the logs of our nodes. Syslog-ng needs three components to handle log : a source, a destination, and a log statement.

Defining the new Syslog Source

So, to retrieve our logs, we're going to add a new source. This source will collect everything that comes to the udp port 6666 at 192.168.5.1.

source s_krg_nodes {
        # Collecter ce qui arrive en UDP
        udp(ip("192.168.5.1"), port(6666));
};

Defining the new Syslog Destination

Now, we indicate Syslog-NG how and where he should put our logs. To do so, we define a new destination :

destination d_krg_nodes {
        file("/var/log/krg_nodes/$FULLHOST.log", perm(0644));
};

This will create a log file per node (per IP address) under /var/log/krg_nodes/. Makes sure that your default configuration file has the "create_dir(yes);" statement, otherwise you'll have to create the 'krg_nodes' directory.

Add the logging directives

We just have to create the log action that will take our source to our destination. This is pretty straightforward :

log {
        source(s_krg_nodes);
        destination(d_krg_nodes);
};

Setting up Syslog for the nodes

Now, we need to install and configure the syslog server that's running on each node. This one will have to collect local logs from software (syslog(3)), and bring them back to the server. We'll first take a look at the installation, then configure it.

Installing Syslog-NG on the nodes

You need to ensure that you're inside the CHROOT. The installation is just the same for the nodes than for the server :

# aptitude install syslog-ng

That's it.

Configuring Syslog-NG for the nodes

The default configuration logs into /var/log. We could remove this part, but it's not necessary regarding our objectives, so we'll let you do it if you want.

As stated before, Syslog-NG needs a source, a destination and a log statement to work (several are possible, for sure ;)). Our goal here is to send data. So we just need two new things : a new destination, and modifying the current logs behavior.

Adding the new destination

We just need to define the syslog server to which we want to log :

# Send all messages to the NFS/syslog server
destination d_nfs_syslog { udp("192.168.5.1", port(6666)); };

Altering default logging behavior

Now, you need to find the 'log' statement that manages /var/log/syslog. In the default Debian configuration file, you can find it easily by searching for "# *.*;auth,authpriv.none -/var/log/syslog".

Once you found this statement, you need to add the destination we just defined, by adding "destination(df_syslog);" to the statement.

This should give you something like :

log {
        source(s_all);
        filter(f_syslog);
        destination(df_syslog);
        destination(d_nfs_syslog);
};

If you prefer, you can remove the "destination(df_syslog);", to not log to "local" files. You can do the same to all others log statements.

Remote Kernel Logging with NetConsole

Last part, we now want to redirect kernel console to the syslog server. This will help us, as we can get the stacktrace in a plain file !

All our manipulations still takes places inside the chroot.

Prerequisties

Before we continue, you need to ensure that the NetConsole kernel module is present, and compiled as a module. Check either the /boot/config-? file corresponding to your Kerrighed Kernel or 'cat /proc/config.gz|gunzip', and verify that you have CONFIG_NETCONSOLE=m. If you have CONFIG_NETCONSOLE=y, it's not good. Change it and recompile, reinstall your kernel.

The NetConsole.sh Kernel Module Loading Script

Copy the following to your /etc/init.d/netconsole.sh :

#!/bin/sh

# Copyright (c) 2009 - Alexandre Lissy <alexandre.lissy@etu.univ-tours.fr>
# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation
# files (the "Software"), to deal in the Software without
# restriction, including without limitation the rights to use,
# copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following
# conditions:
# 
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
# 
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.

extract_param()
{
	CMD=$(cat /proc/cmdline)
	DELIM=$1
	VAR="${CMD##*$DELIM}"
	VAR="${VAR%% *}"
	eval "$2=$VAR"
}

extract_param "logdest=" DEST
extract_param "logdev=" LOGDEV

CMDLINE=$(cat /proc/cmdline)
IPROUTE=$(ip -f inet addr show dev ${LOGDEV}|sed -n 's/^ *inet *\([.0-9]*\).*/\1/p')

REMOTEIP="${DEST%%:*}"
REMOTEPORT="${DEST##*:}"
LOCALIP="${IPROUTE%%/*}"
LOCALPORT=${REMOTEPORT}

MODPROBE=modprobe

echo "Loading NetConsole Kernel module ... "
echo "${MODPROBE} netconsole do_syslog=yes netconsole=\"${LOCALPORT}@${LOCALIP}/${LOGDEV},${REMOTEPORT}@${REMOTEIP}/\""
${MODPROBE} netconsole do_syslog=yes netconsole="${LOCALPORT}@${LOCALIP}/${LOGDEV},${REMOTEPORT}@${REMOTEIP}/"

Don't forget to chmod +x it ... This scripts mainly depends on the iproute2 software. If it's not installed, you should install it right now.

Now that it's installed, configure your distribution to launch it as early as possible. Under Debian, issue :

# update-rc.d netconsole.sh defaults 05

Notice the Kernel

We have all we need. One remaining part, is to notify the kernel /where/ to send logs. This can be done by adding the following to your kernel command line (grub.conf if you're using PXEGrub, or pxelinux.cfg/default for PXELinux users) :

logdest=192.168.5.1:6666 logdev=eth0

This will be parsed by the previous script. One can try with NetConsole compiled in (CONFIG_NETCONSOLE=y) but issues have been reported with this, and DHCP setup.