[format] order the formats based on sample matches

pull/196/head
Timothy Stack 9 years ago
parent fd55176df8
commit a300ce9977

@ -31,6 +31,10 @@ lnav v0.7.3:
specially on OS X. If you come across this error, use the
'--with-readline=prefix' switch to specify the path to the correct
location of libreadline.
* The order that log formats are tried against a log file is now
automatically determined so that more specific formats are tested
before more general ones. The order is determined on startup based on
how each format matches each other formats sample lines.
lnav v0.7.2:
* Added log formats for vdsm, openstack, and the vmkernel.

@ -185,3 +185,21 @@ Executing the format file should then install it automatically::
$ chmod ugo+rx myformat.json
$ ./myformat.json
info: installed: /home/example/.lnav/formats/installed/myformat_log.json
Format Order When Scanning a File
---------------------------------
When **lnav** loads a file, it tries each log format against the first ~1000
lines of the file trying to find a match. When a match is found, that log
format will be locked in and used for the rest of the lines in that file.
Since there may be overlap between formats, **lnav** performs a test on
startup to determine which formats match each others sample lines. Using
this information it will create an ordering of the formats so that the more
specific formats are tried before the more generic ones. For example, a
format that matches certain syslog messages will match its own sample lines,
but not the ones in the syslog samples. On the other hand, the syslog format
will match its own samples and those in the more specific format. You can
see the order of the format by enabling debugging and checking the **lnav**
log file for the "Format order" message::
$ lnav -d /tmp/lnav.log

@ -1213,8 +1213,9 @@ void external_log_format::build(std::vector<std::string> &errors)
++pat_iter) {
pattern &pat = *(*pat_iter);
if (!pat.p_pcre)
if (!pat.p_pcre) {
continue;
}
std::string line_partial = iter->s_line;
@ -1243,6 +1244,32 @@ void external_log_format::build(std::vector<std::string> &errors)
}
}
bool external_log_format::match_samples(const vector<sample> &samples) const
{
for (vector<sample>::const_iterator sample_iter = samples.begin();
sample_iter != samples.end();
++sample_iter) {
for (std::vector<external_log_format::pattern *>::const_iterator pat_iter = this->elf_pattern_order.begin();
pat_iter != this->elf_pattern_order.end();
++pat_iter) {
pattern &pat = *(*pat_iter);
if (!pat.p_pcre) {
continue;
}
pcre_context_static<128> pc;
pcre_input pi(sample_iter->s_line);
if (pat.p_pcre->match(pc, pi)) {
return true;
}
}
}
return false;
}
class external_log_table : public log_vtab_impl {
public:
external_log_table(const external_log_format &elf) :

@ -40,6 +40,7 @@
#include <sys/types.h>
#include <set>
#include <list>
#include <string>
#include <vector>
#include <memory>
@ -688,6 +689,8 @@ public:
void build(std::vector<std::string> &errors);
bool match_samples(const std::vector<sample> &samples) const;
std::auto_ptr<log_format> specialized() {
external_log_format *elf = new external_log_format(*this);
std::auto_ptr<log_format> retval((log_format *)elf);
@ -744,6 +747,7 @@ public:
};
std::set<std::string> elf_source_path;
std::list<std::string> elf_collision;
std::string elf_file_pattern;
pcrepp *elf_filename_pcre;
std::map<std::string, pattern> elf_patterns;

@ -541,13 +541,78 @@ void load_formats(const std::vector<std::string> &extra_paths,
load_from_path(*path_iter, errors);
}
vector<external_log_format *> alpha_ordered_formats;
for (map<string, external_log_format *>::iterator iter = LOG_FORMATS.begin();
iter != LOG_FORMATS.end();
++iter) {
iter->second->build(errors);
external_log_format *elf = iter->second;
elf->build(errors);
for (map<string, external_log_format *>::iterator check_iter = LOG_FORMATS.begin();
check_iter != LOG_FORMATS.end();
++check_iter) {
if (iter->first == check_iter->first) {
continue;
}
external_log_format *check_elf = check_iter->second;
if (elf->match_samples(check_elf->elf_samples)) {
log_warning("Format collision, format '%s' matches sample from '%s'",
elf->get_name().c_str(),
check_elf->get_name().c_str());
elf->elf_collision.push_back(check_elf->get_name());
}
}
if (errors.empty()) {
log_format::get_root_formats().insert(log_format::get_root_formats().begin(), iter->second);
alpha_ordered_formats.push_back(elf);
}
}
vector<external_log_format *> graph_ordered_formats;
while (!alpha_ordered_formats.empty()) {
vector<string> popped_formats;
for (vector<external_log_format *>::iterator iter = alpha_ordered_formats.begin();
iter != alpha_ordered_formats.end();) {
external_log_format *elf = *iter;
if (elf->elf_collision.empty()) {
iter = alpha_ordered_formats.erase(iter);
popped_formats.push_back(elf->get_name());
graph_ordered_formats.push_back(elf);
}
else {
++iter;
}
}
if (popped_formats.empty() && !alpha_ordered_formats.empty()) {
external_log_format *elf = alpha_ordered_formats.front();
log_warning("Detected a cycle, breaking by picking %s",
elf->get_name().c_str());
elf->elf_collision.clear();
}
for (vector<external_log_format *>::iterator iter = alpha_ordered_formats.begin();
iter != alpha_ordered_formats.end();
++iter) {
external_log_format *elf = *iter;
for (vector<string>::iterator pop_iter = popped_formats.begin();
pop_iter != popped_formats.end();
++pop_iter) {
elf->elf_collision.remove(*pop_iter);
}
}
}
log_info("Format order:")
for (vector<external_log_format *>::iterator iter = graph_ordered_formats.begin();
iter != graph_ordered_formats.end();
++iter) {
log_info(" %s", (*iter)->get_name().c_str());
}
vector<log_format *> &roots = log_format::get_root_formats();
roots.insert(roots.begin(), graph_ordered_formats.begin(), graph_ordered_formats.end());
}

@ -222,6 +222,7 @@ dist_noinst_DATA = \
listview_output.6 \
logfile_access_log.0 \
logfile_access_log.1 \
logfile_blued.0 \
logfile_empty.0 \
logfile_filter.0 \
logfile_for_join.0 \
@ -247,6 +248,7 @@ dist_noinst_DATA = \
vt52_curses_input.1 \
vt52_curses_output.0 \
vt52_curses_output.1 \
formats/collision/format.json \
formats/jsontest/format.json \
log-samples/sample-27353a72ba4025448f261dcfa6ea16e474187795.txt \
log-samples/sample-70c906b3c1a1cf03f15bde92ee78edfa6f9b7960.txt \

@ -67,6 +67,7 @@ int main(int argc, char *argv[])
{
std::vector<std::string> paths, errors;
paths.push_back(getenv("test_dir"));
load_formats(paths, errors);
}

@ -0,0 +1,47 @@
{
"zblued_log" : {
"title" : "blued",
"regex" : {
"std" : {
"pattern" : "^(?<timestamp>\\w{3}\\s+\\d{1,2} \\d{2}:\\d{2}:\\d{2})(?: (?<log_hostname>[a-zA-Z0-9:][^ ]+[a-zA-Z0-9]))? blued(?:\\[(?<log_pid>\\d+)])?:(?<body>(?:.|\\n)*)$"
}
},
"level-field" : "body",
"level" : {
"error" : "(?:failed|failure|error)",
"warning" : "(?:warn|not responding|init: cannot execute)"
},
"value" : {
"log_hostname" : {
"kind" : "string",
"collate" : "ipaddress",
"identifier" : true
}
},
"sample" : [
{
"line" : "Apr 4 20:02:32 Tim-Stacks-iMac.local blued[59]: Release the WiFi lock"
}
]
},
"xerror_log" : {
"title" : "Common Error Log",
"description" : "The default web error log format for servers like Apache.",
"regex" : {
"cups" : {
"pattern" : "^(?<level>\\w) \\[(?<timestamp>[^\\]]+)\\] (?<body>.*)"
}
},
"level-field": "level",
"level" : {
"error" : "E",
"warning" : "W",
"info" : "I"
},
"sample" : [
{
"line" : "E [08/Jun/2013:11:28:58 -0700] Unknown directive BrowseOrder on line 22 of /private/etc/cups/cupsd.conf."
}
]
}
}

@ -0,0 +1,6 @@
Apr 4 20:02:32 Tim-Stacks-iMac.local blued[59]: Release the WiFi lock
Apr 4 20:02:32 Tim-Stacks-iMac.local blued[59]: Acquiring the wifi lock
Apr 4 20:02:32 Tim-Stacks-iMac.local blued[59]: Acquired the wifi lock
Apr 4 20:02:42 Tim-Stacks-iMac.local blued[59]: Release the WiFi lock
Apr 4 20:02:42 Tim-Stacks-iMac.local blued[59]: Acquiring the wifi lock
Apr 4 20:02:42 Tim-Stacks-iMac.local blued[59]: Acquired the wifi lock

@ -16,6 +16,10 @@ run_test ./drive_logfile -f strace_log ${srcdir}/logfile_strace_log.0
on_error_fail_with "Didn't infer strace_log log format?"
run_test ./drive_logfile -f zblued_log ${srcdir}/logfile_blued.0
on_error_fail_with "Didn't infer blued_log that collides with syslog?"
run_test ./drive_logfile ${srcdir}/logfile_empty.0

Loading…
Cancel
Save