From 4172b8dc145cc9bc719f7c84955873a9a06554e7 Mon Sep 17 00:00:00 2001 From: Jason Orendorff Date: Tue, 21 Nov 2017 12:42:02 -0600 Subject: [PATCH] More comments. --- src/main.rs | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/src/main.rs b/src/main.rs index 2b83054..eec7757 100644 --- a/src/main.rs +++ b/src/main.rs @@ -134,6 +134,19 @@ fn start_file_indexing_thread(texts: Receiver) (receiver, handle) } +/// Start a thread that merges in-memory indexes. +/// +/// `file_indexes` receives a stream of indexes from the file indexing thread. +/// These indexes typically vary a lot in size, since the input documents will +/// typically be all different sizes. +/// +/// The thread created by this function merges those indexes into "large" +/// indexes and passes these large indexes on to a new channel. +/// +/// This returns a pair: a receiver, the sequence of large indexes produced by +/// merging the input indexes; and a `JoinHandle` that can be used to wait for +/// this thread to exit. This stage of the pipeline is infallible (it performs +/// no I/O). fn start_in_memory_merge_thread(file_indexes: Receiver) -> (Receiver, JoinHandle<()>) { @@ -158,6 +171,14 @@ fn start_in_memory_merge_thread(file_indexes: Receiver) (receiver, handle) } +/// Start a thread that saves large indexes to temporary files. +/// +/// This thread generates a meaningless unique filename for each index in +/// `big_indexes`, saves the data, and passes the filename on to a new channel. +/// +/// This returns a pair: a receiver that receives the filenames; and a +/// `JoinHandle` that can be used to wait for this thread to exit and receive +/// any I/O errors it encountered. fn start_index_writer_thread(big_indexes: Receiver, output_dir: &Path) -> (Receiver, JoinHandle>) @@ -178,6 +199,8 @@ fn start_index_writer_thread(big_indexes: Receiver, (receiver, handle) } +/// Given a sequence of filenames of index data files, merge all the files +/// into a single index data file. fn merge_index_files(files: Receiver, output_dir: &Path) -> io::Result<()> { @@ -218,6 +241,13 @@ fn run_pipeline(documents: Vec, output_dir: PathBuf) result } +/// Given some paths, generate the complete list of text files to index. We check +/// on disk whether the path is the name of a file or a directory; for +/// directories, all .txt files immediately under the directory are indexed. +/// Relative paths are fine. +/// +/// It's an error if any of the `args` is not a valid path to an existing file +/// or directory. fn expand_filename_arguments(args: Vec) -> io::Result> { let mut filenames = vec![]; for arg in args { @@ -236,6 +266,7 @@ fn expand_filename_arguments(args: Vec) -> io::Result> { Ok(filenames) } +/// Generate an index for a bunch of text files. fn run(filenames: Vec, single_threaded: bool) -> io::Result<()> { let output_dir = PathBuf::from("."); let documents = expand_filename_arguments(filenames)?;