chore: make code more idiomatic

Signed-off-by: simonsan <14062932+simonsan@users.noreply.github.com>
3 months ago · 12031aa1ee
parent e538dfe98b
commit 12031aa1ee
6 changed files with 50 additions and 29 deletions
--- a/src/bin/fingertips.rs
+++ b/src/bin/fingertips.rs
@ -32,8 +32,8 @@ fn main() {
            "filenames",
            Collect,
            "Names of files/directories to index. \
-                           For directories, all .txt files immediately \
-                           under the directory are indexed.",
+                For directories, all .txt files immediately \
+                under the directory are indexed.",
        );
        ap.parse_args_or_exit();
    }
--- a/src/index.rs
+++ b/src/index.rs
@ -21,6 +21,7 @@ fn tokenize(text: &str) -> Vec<&str> {
 /// answer simple search queries. And you can use the `read`, `write`, and
 /// `merge` modules to save an in-memory index to disk and merge it with other
 /// indices, producing a large index.
+#[derive(Default, Debug, Clone, PartialEq, Eq)]
 pub struct InMemoryIndex {
    /// The total number of words in the indexed documents.
    pub word_count: usize,
@ -48,10 +49,7 @@ pub type Hit = Vec<u8>;
 impl InMemoryIndex {
    /// Create a new, empty index.
    pub fn new() -> Self {
-        Self {
-            word_count: 0,
-            map: HashMap::new(),
-        }
+        Self::default()
    }

    /// Index a single document.
--- a/src/merge.rs
+++ b/src/merge.rs
@ -1,22 +1,39 @@
-use std::fs::{self, File};
-use std::io::{self, BufWriter};
-use std::mem;
 use std::path::{Path, PathBuf};
+use std::{fmt, mem};
+use std::{
+    fmt::Debug,
+    fs::{self, File},
+};
+use std::{
+    fmt::Formatter,
+    io::{self, BufWriter},
+};

 use crate::read::IndexFileReader;
 use crate::tmp::TmpDir;
 use crate::write::IndexFileWriter;

+pub(crate) mod constants {
+    // How many files to merge at a time, at most.
+    pub const NSTREAMS: usize = 8;
+    pub const MERGED_FILENAME: &str = "index.dat";
+}
+
+#[derive(Clone)]
 pub struct FileMerge {
    output_dir: PathBuf,
    tmp_dir: TmpDir,
    stacks: Vec<Vec<PathBuf>>,
 }

-// How many files to merge at a time, at most.
-const NSTREAMS: usize = 8;
-
-const MERGED_FILENAME: &str = "index.dat";
+impl Debug for FileMerge {
+    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+        f.debug_struct("FileMerge")
+            .field("output_dir", &self.output_dir)
+            .field("stacks", &self.stacks)
+            .finish()
+    }
+}

 impl FileMerge {
    pub fn new(output_dir: &Path) -> Self {
@ -34,7 +51,7 @@ impl FileMerge {
                self.stacks.push(vec![]);
            }
            self.stacks[level].push(file);
-            if self.stacks[level].len() < NSTREAMS {
+            if self.stacks[level].len() < constants::NSTREAMS {
                break;
            }
            let (filename, out) = self.tmp_dir.create()?;
@ -48,11 +65,11 @@ impl FileMerge {
    }

    pub fn finish(mut self) -> io::Result<()> {
-        let mut tmp = Vec::with_capacity(NSTREAMS);
+        let mut tmp = Vec::with_capacity(constants::NSTREAMS);
        for stack in self.stacks {
            for file in stack.into_iter().rev() {
                tmp.push(file);
-                if tmp.len() == NSTREAMS {
+                if tmp.len() == constants::NSTREAMS {
                    merge_reversed(&mut tmp, &mut self.tmp_dir)?;
                }
            }
@ -63,7 +80,9 @@ impl FileMerge {
        }
        assert!(tmp.len() <= 1);
        match tmp.pop() {
-            Some(last_file) => fs::rename(last_file, self.output_dir.join(MERGED_FILENAME)),
+            Some(last_file) => {
+                fs::rename(last_file, self.output_dir.join(constants::MERGED_FILENAME))
+            }
            None => Err(io::Error::new(
                io::ErrorKind::Other,
                "no documents were parsed or none contained any words",
@ -122,7 +141,7 @@ fn merge_streams(files: Vec<PathBuf>, out: BufWriter<File>) -> io::Result<()> {
 fn merge_reversed(filenames: &mut Vec<PathBuf>, tmp_dir: &mut TmpDir) -> io::Result<()> {
    filenames.reverse();
    let (merged_filename, out) = tmp_dir.create()?;
-    let mut to_merge = Vec::with_capacity(NSTREAMS);
+    let mut to_merge = Vec::with_capacity(constants::NSTREAMS);
    mem::swap(filenames, &mut to_merge);
    merge_streams(to_merge, out)?;
    filenames.push(merged_filename);
--- a/src/read.rs
+++ b/src/read.rs
@ -38,6 +38,7 @@ pub struct IndexFileReader {
 /// Each entry in the table of contents is small. It consists of a string, the
 /// `term`; summary information about that term, as used in the corpus (`df`);
 /// and a pointer to bulkier data that tells more (`offset` and `nbytes`).
+#[derive(Default, Debug, Clone, PartialEq, Eq, Hash)]
 pub struct Entry {
    /// The term is a word that appears in one or more documents in the corpus.
    /// The index file contains information about the documents that use this
--- a/src/tmp.rs
+++ b/src/tmp.rs
@ -1,39 +1,41 @@
-use std::io::{self, BufWriter};
 use std::fs::{self, File};
+use std::io::{self, BufWriter};
 use std::path::{Path, PathBuf};

-#[derive(Clone)]
+#[derive(Default, Debug, Clone, PartialEq, Eq)]
 pub struct TmpDir {
    dir: PathBuf,
-    n: usize
+    n: usize,
 }

 impl TmpDir {
    pub fn new<P: AsRef<Path>>(dir: P) -> Self {
        Self {
            dir: dir.as_ref().to_owned(),
-            n: 1
+            n: 1,
        }
    }

    pub fn create(&mut self) -> io::Result<(PathBuf, BufWriter<File>)> {
        let mut r#try = 1;
        loop {
-            let filename = self.dir.join(PathBuf::from(format!("tmp{:08x}.dat", self.n)));
+            let filename = self
+                .dir
+                .join(PathBuf::from(format!("tmp{:08x}.dat", self.n)));
            self.n += 1;
            match fs::OpenOptions::new()
-                  .write(true)
-                  .create_new(true)
-                  .open(&filename)
+                .write(true)
+                .create_new(true)
+                .open(&filename)
            {
-                Ok(f) =>
-                    return Ok((filename, BufWriter::new(f))),
-                Err(exc) =>
+                Ok(f) => return Ok((filename, BufWriter::new(f))),
+                Err(exc) => {
                    if r#try < 999 && exc.kind() == io::ErrorKind::AlreadyExists {
                        // keep going
                    } else {
                        return Err(exc);
                    }
+                }
            }
            r#try += 1;
        }
--- a/src/write.rs
+++ b/src/write.rs
@ -15,6 +15,7 @@ use std::path::PathBuf;

 /// An index file has two parts. The main part of the file is a sequence of
 /// entries, stored back-to-back; the
+#[derive(Debug)]
 pub struct IndexFileWriter {
    /// The number of bytes written so far.
    offset: u64,