diff --git a/NEWS b/NEWS index 659eaa23..c1a706b6 100644 --- a/NEWS +++ b/NEWS @@ -31,6 +31,10 @@ lnav v0.7.3: specially on OS X. If you come across this error, use the '--with-readline=prefix' switch to specify the path to the correct location of libreadline. + * The order that log formats are tried against a log file is now + automatically determined so that more specific formats are tested + before more general ones. The order is determined on startup based on + how each format matches each other formats sample lines. lnav v0.7.2: * Added log formats for vdsm, openstack, and the vmkernel. diff --git a/docs/source/formats.rst b/docs/source/formats.rst index 55d9f4a9..27b7bef4 100644 --- a/docs/source/formats.rst +++ b/docs/source/formats.rst @@ -185,3 +185,21 @@ Executing the format file should then install it automatically:: $ chmod ugo+rx myformat.json $ ./myformat.json info: installed: /home/example/.lnav/formats/installed/myformat_log.json + +Format Order When Scanning a File +--------------------------------- + +When **lnav** loads a file, it tries each log format against the first ~1000 +lines of the file trying to find a match. When a match is found, that log +format will be locked in and used for the rest of the lines in that file. +Since there may be overlap between formats, **lnav** performs a test on +startup to determine which formats match each others sample lines. Using +this information it will create an ordering of the formats so that the more +specific formats are tried before the more generic ones. For example, a +format that matches certain syslog messages will match its own sample lines, +but not the ones in the syslog samples. On the other hand, the syslog format +will match its own samples and those in the more specific format. You can +see the order of the format by enabling debugging and checking the **lnav** +log file for the "Format order" message:: + + $ lnav -d /tmp/lnav.log diff --git a/src/log_format.cc b/src/log_format.cc index 35c4d9e2..bbf0cd73 100644 --- a/src/log_format.cc +++ b/src/log_format.cc @@ -1213,8 +1213,9 @@ void external_log_format::build(std::vector &errors) ++pat_iter) { pattern &pat = *(*pat_iter); - if (!pat.p_pcre) + if (!pat.p_pcre) { continue; + } std::string line_partial = iter->s_line; @@ -1243,6 +1244,32 @@ void external_log_format::build(std::vector &errors) } } +bool external_log_format::match_samples(const vector &samples) const +{ + for (vector::const_iterator sample_iter = samples.begin(); + sample_iter != samples.end(); + ++sample_iter) { + for (std::vector::const_iterator pat_iter = this->elf_pattern_order.begin(); + pat_iter != this->elf_pattern_order.end(); + ++pat_iter) { + pattern &pat = *(*pat_iter); + + if (!pat.p_pcre) { + continue; + } + + pcre_context_static<128> pc; + pcre_input pi(sample_iter->s_line); + + if (pat.p_pcre->match(pc, pi)) { + return true; + } + } + } + + return false; +} + class external_log_table : public log_vtab_impl { public: external_log_table(const external_log_format &elf) : diff --git a/src/log_format.hh b/src/log_format.hh index 3f17a09a..b45072c9 100644 --- a/src/log_format.hh +++ b/src/log_format.hh @@ -40,6 +40,7 @@ #include #include +#include #include #include #include @@ -688,6 +689,8 @@ public: void build(std::vector &errors); + bool match_samples(const std::vector &samples) const; + std::auto_ptr specialized() { external_log_format *elf = new external_log_format(*this); std::auto_ptr retval((log_format *)elf); @@ -744,6 +747,7 @@ public: }; std::set elf_source_path; + std::list elf_collision; std::string elf_file_pattern; pcrepp *elf_filename_pcre; std::map elf_patterns; diff --git a/src/log_format_loader.cc b/src/log_format_loader.cc index 5d42a305..6c9ba6e9 100644 --- a/src/log_format_loader.cc +++ b/src/log_format_loader.cc @@ -541,13 +541,78 @@ void load_formats(const std::vector &extra_paths, load_from_path(*path_iter, errors); } + vector alpha_ordered_formats; for (map::iterator iter = LOG_FORMATS.begin(); iter != LOG_FORMATS.end(); ++iter) { - iter->second->build(errors); + external_log_format *elf = iter->second; + elf->build(errors); + + for (map::iterator check_iter = LOG_FORMATS.begin(); + check_iter != LOG_FORMATS.end(); + ++check_iter) { + if (iter->first == check_iter->first) { + continue; + } + + external_log_format *check_elf = check_iter->second; + if (elf->match_samples(check_elf->elf_samples)) { + log_warning("Format collision, format '%s' matches sample from '%s'", + elf->get_name().c_str(), + check_elf->get_name().c_str()); + elf->elf_collision.push_back(check_elf->get_name()); + } + } if (errors.empty()) { - log_format::get_root_formats().insert(log_format::get_root_formats().begin(), iter->second); + alpha_ordered_formats.push_back(elf); + } + } + + vector graph_ordered_formats; + + while (!alpha_ordered_formats.empty()) { + vector popped_formats; + + for (vector::iterator iter = alpha_ordered_formats.begin(); + iter != alpha_ordered_formats.end();) { + external_log_format *elf = *iter; + if (elf->elf_collision.empty()) { + iter = alpha_ordered_formats.erase(iter); + popped_formats.push_back(elf->get_name()); + graph_ordered_formats.push_back(elf); + } + else { + ++iter; + } + } + + if (popped_formats.empty() && !alpha_ordered_formats.empty()) { + external_log_format *elf = alpha_ordered_formats.front(); + log_warning("Detected a cycle, breaking by picking %s", + elf->get_name().c_str()); + elf->elf_collision.clear(); + } + + for (vector::iterator iter = alpha_ordered_formats.begin(); + iter != alpha_ordered_formats.end(); + ++iter) { + external_log_format *elf = *iter; + for (vector::iterator pop_iter = popped_formats.begin(); + pop_iter != popped_formats.end(); + ++pop_iter) { + elf->elf_collision.remove(*pop_iter); + } } } + + log_info("Format order:") + for (vector::iterator iter = graph_ordered_formats.begin(); + iter != graph_ordered_formats.end(); + ++iter) { + log_info(" %s", (*iter)->get_name().c_str()); + } + + vector &roots = log_format::get_root_formats(); + roots.insert(roots.begin(), graph_ordered_formats.begin(), graph_ordered_formats.end()); } diff --git a/test/Makefile.am b/test/Makefile.am index a5a1090a..a6429494 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -222,6 +222,7 @@ dist_noinst_DATA = \ listview_output.6 \ logfile_access_log.0 \ logfile_access_log.1 \ + logfile_blued.0 \ logfile_empty.0 \ logfile_filter.0 \ logfile_for_join.0 \ @@ -247,6 +248,7 @@ dist_noinst_DATA = \ vt52_curses_input.1 \ vt52_curses_output.0 \ vt52_curses_output.1 \ + formats/collision/format.json \ formats/jsontest/format.json \ log-samples/sample-27353a72ba4025448f261dcfa6ea16e474187795.txt \ log-samples/sample-70c906b3c1a1cf03f15bde92ee78edfa6f9b7960.txt \ diff --git a/test/drive_logfile.cc b/test/drive_logfile.cc index dc6bde35..449ce620 100644 --- a/test/drive_logfile.cc +++ b/test/drive_logfile.cc @@ -67,6 +67,7 @@ int main(int argc, char *argv[]) { std::vector paths, errors; + paths.push_back(getenv("test_dir")); load_formats(paths, errors); } diff --git a/test/formats/collision/format.json b/test/formats/collision/format.json new file mode 100644 index 00000000..824da8a0 --- /dev/null +++ b/test/formats/collision/format.json @@ -0,0 +1,47 @@ +{ + "zblued_log" : { + "title" : "blued", + "regex" : { + "std" : { + "pattern" : "^(?\\w{3}\\s+\\d{1,2} \\d{2}:\\d{2}:\\d{2})(?: (?[a-zA-Z0-9:][^ ]+[a-zA-Z0-9]))? blued(?:\\[(?\\d+)])?:(?(?:.|\\n)*)$" + } + }, + "level-field" : "body", + "level" : { + "error" : "(?:failed|failure|error)", + "warning" : "(?:warn|not responding|init: cannot execute)" + }, + "value" : { + "log_hostname" : { + "kind" : "string", + "collate" : "ipaddress", + "identifier" : true + } + }, + "sample" : [ + { + "line" : "Apr 4 20:02:32 Tim-Stacks-iMac.local blued[59]: Release the WiFi lock" + } + ] + }, + "xerror_log" : { + "title" : "Common Error Log", + "description" : "The default web error log format for servers like Apache.", + "regex" : { + "cups" : { + "pattern" : "^(?\\w) \\[(?[^\\]]+)\\] (?.*)" + } + }, + "level-field": "level", + "level" : { + "error" : "E", + "warning" : "W", + "info" : "I" + }, + "sample" : [ + { + "line" : "E [08/Jun/2013:11:28:58 -0700] Unknown directive BrowseOrder on line 22 of /private/etc/cups/cupsd.conf." + } + ] + } +} diff --git a/test/logfile_blued.0 b/test/logfile_blued.0 new file mode 100644 index 00000000..14226155 --- /dev/null +++ b/test/logfile_blued.0 @@ -0,0 +1,6 @@ +Apr 4 20:02:32 Tim-Stacks-iMac.local blued[59]: Release the WiFi lock +Apr 4 20:02:32 Tim-Stacks-iMac.local blued[59]: Acquiring the wifi lock +Apr 4 20:02:32 Tim-Stacks-iMac.local blued[59]: Acquired the wifi lock +Apr 4 20:02:42 Tim-Stacks-iMac.local blued[59]: Release the WiFi lock +Apr 4 20:02:42 Tim-Stacks-iMac.local blued[59]: Acquiring the wifi lock +Apr 4 20:02:42 Tim-Stacks-iMac.local blued[59]: Acquired the wifi lock diff --git a/test/test_logfile.sh b/test/test_logfile.sh index 0f3f5b60..674b3449 100644 --- a/test/test_logfile.sh +++ b/test/test_logfile.sh @@ -16,6 +16,10 @@ run_test ./drive_logfile -f strace_log ${srcdir}/logfile_strace_log.0 on_error_fail_with "Didn't infer strace_log log format?" +run_test ./drive_logfile -f zblued_log ${srcdir}/logfile_blued.0 + +on_error_fail_with "Didn't infer blued_log that collides with syslog?" + run_test ./drive_logfile ${srcdir}/logfile_empty.0