From 5bca12406b8c594b79775f70e7fcb71dc005ceac Mon Sep 17 00:00:00 2001 From: Olivia Brooks <109807080+Cutieguwu@users.noreply.github.com> Date: Wed, 21 Jan 2026 11:07:09 -0500 Subject: [PATCH] Replace that POS defrag, update Algo names, fix tests, rework initial algo. --- README.md | 9 +-- src/io.rs | 7 +++ src/mapping/cluster.rs | 2 + src/mapping/map.rs | 139 +++++++++++++++++++++-------------------- src/mapping/stage.rs | 8 ++- src/recovery.rs | 109 +++++++++++++++++++------------- 6 files changed, 152 insertions(+), 122 deletions(-) diff --git a/README.md b/README.md index 9639c40..7736187 100644 --- a/README.md +++ b/README.md @@ -41,13 +41,10 @@ This is still in very early development, so expect old maps to no longer work. ## Recovery Strategy -### Initial Pass (Stage::Untested) +### Initial Pass / Patchworking -Tries to read clusters of `max_buffer_size`, marking clusters with errors as -`ForIsolation` (note that the name has not yet be updated to -`Patchwork{ depth }`). - -### Patchworking +Tries to read clusters of `max_buffer_size`, marking clusters with errors with +an increasing `level`. This works by halving the length of the read buffer until one of two conditions is met: diff --git a/src/io.rs b/src/io.rs index 2a17623..d88b29e 100644 --- a/src/io.rs +++ b/src/io.rs @@ -97,9 +97,16 @@ fn backup>(path: P) -> std::io::Result<()> { ) } +#[derive(Debug)] #[repr(C, align(512))] pub struct DirectIOBuffer(pub [u8; crate::MAX_BUFFER_SIZE]); +impl DirectIOBuffer { + pub fn new() -> Self { + Self::default() + } +} + impl Default for DirectIOBuffer { fn default() -> Self { Self([crate::FB_NULL_VALUE; _]) diff --git a/src/mapping/cluster.rs b/src/mapping/cluster.rs index 47b38c7..a2fcaaf 100644 --- a/src/mapping/cluster.rs +++ b/src/mapping/cluster.rs @@ -3,6 +3,8 @@ use super::{Domain, Stage}; use serde::{Deserialize, Serialize}; /// A map for data stored in memory for processing and saving to disk. +// derived Ord impl *should* use self.domain.start to sort? Not sure. +// Use `sort_by_key()` to be safe. #[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)] pub struct Cluster { pub domain: Domain, diff --git a/src/mapping/map.rs b/src/mapping/map.rs index d8cba29..6b2eabb 100644 --- a/src/mapping/map.rs +++ b/src/mapping/map.rs @@ -1,6 +1,8 @@ use std::fs::File; use std::io::Write; +use crate::mapping::cluster; + use super::{Cluster, Domain, DomainOverlap, Stage}; use anyhow; @@ -30,7 +32,7 @@ impl Default for MapFile { domain: Domain::default(), map: vec![Cluster { domain: Domain::default(), - stage: Stage::Untested, + stage: Stage::Patchwork { depth: 0 }, }], } } @@ -81,18 +83,26 @@ impl MapFile { let mut recover_stage = Stage::Damaged; for cluster in self.map.iter() { + /* match cluster.stage { - Stage::Untested => return Stage::Untested, - Stage::ForIsolation { .. } => { + Stage::Patchwork { depth } => {if recover_stage > (Stage::Patchwork { depth })}, + Stage::Isolate => { if recover_stage == Stage::Damaged || cluster.stage < recover_stage { // Note that recover_stage after first condition is // only ever Stage::ForIsolation(_), thus PartialEq, // PartialOrd are useful for comparing the internal value. recover_stage = cluster.stage } + }, + Stage::BruteForceAndDesperation => { + if recover_stage > Stage::BruteForceAndDesperation || } - Stage::Damaged => (), - Stage::Intact => (), + Stage::Damaged | Stage::Intact => (), + } + */ + + if cluster.stage < recover_stage { + recover_stage = cluster.stage; } } @@ -116,52 +126,26 @@ impl MapFile { /// Defragments cluster groups. /// I.E. check forwards every cluster from current until stage changes, /// then group at once. - pub fn defrag(&mut self) -> &mut Self { + pub fn defrag(&mut self) { + self.map.sort_by_key(|c| c.domain.start); + let mut new_map: Vec = vec![]; + let mut idx = 0; + let mut master; + while idx < self.map.len() - 1 { + master = self.map[idx]; - // Fetch first cluster. - let mut start_cluster = self.map.iter().find(|c| c.domain.start == 0).unwrap(); - - // Even though this would be initialized by its first read, - // the compiler won't stop whining, and idk how to assert that to it. - let mut end_cluster = Cluster::default(); - let mut new_cluster: Cluster; - - let mut stage_common: bool; - let mut is_finished = false; - - while !is_finished { - stage_common = true; - - // Start a new cluster based on the cluster following - // the end of last new_cluster. - new_cluster = start_cluster.to_owned(); - - // While stage is common, and not finished, - // find each trailing cluster. - while stage_common && !is_finished { - end_cluster = start_cluster.to_owned(); - - if end_cluster.domain.end != self.domain.end { - start_cluster = self - .map - .iter() - .find(|c| end_cluster.domain.end == c.domain.start) - .unwrap(); - - stage_common = new_cluster.stage == start_cluster.stage - } else { - is_finished = true; + for c in self.map[idx + 1..self.map.len()].into_iter() { + if c.stage != master.stage { + break; } + + idx += 1; } - // Set the new ending, encapsulating any clusters of common stage. - new_cluster.domain.end = end_cluster.domain.end; - new_map.push(new_cluster); + master.domain.end = self.map[idx].domain.end; + new_map.push(master); } - - self.map = new_map; - self } /// Extend the domain of the MapFile. @@ -622,7 +606,7 @@ mod tests { let mut map = MapFile { map: vec![Cluster { domain: Domain { start: 0, end: 3 }, - stage: Stage::Untested, + stage: Stage::Patchwork { depth: 0 }, }], ..Default::default() }; @@ -726,17 +710,16 @@ mod tests { // If this fails here, there's something SERIOUSLY wrong. assert!( - mf_stage == Stage::Untested, + mf_stage == Stage::Patchwork { depth: 0 }, "Determined stage to be {:?}, when {:?} was expeccted.", mf_stage, - Stage::Untested + Stage::Patchwork { depth: 0 } ); let stages = vec![ Stage::Damaged, - Stage::ForIsolation { level: 1 }, - Stage::ForIsolation { level: 0 }, - Stage::Untested, + Stage::Patchwork { depth: 1 }, + Stage::Patchwork { depth: 0 }, ]; mf.map = vec![]; @@ -762,20 +745,17 @@ mod tests { mf.map = vec![ *Cluster::default().set_stage(Stage::Damaged), - *Cluster::default().set_stage(Stage::ForIsolation { level: 0 }), - *Cluster::default().set_stage(Stage::ForIsolation { level: 1 }), + *Cluster::default().set_stage(Stage::Patchwork { depth: 1 }), Cluster::default(), Cluster::default(), - *Cluster::default().set_stage(Stage::ForIsolation { level: 1 }), - *Cluster::default().set_stage(Stage::ForIsolation { level: 0 }), + *Cluster::default().set_stage(Stage::Patchwork { depth: 1 }), *Cluster::default().set_stage(Stage::Damaged), ]; let stages = vec![ Stage::Damaged, - Stage::ForIsolation { level: 1 }, - Stage::ForIsolation { level: 0 }, - Stage::Untested, + Stage::Patchwork { depth: 1 }, + Stage::Patchwork { depth: 0 }, ]; for stage in stages { @@ -803,63 +783,84 @@ mod tests { map: vec![ Cluster { domain: Domain { start: 0, end: 1 }, - stage: Stage::Untested, + stage: Stage::Patchwork { depth: 0 }, }, Cluster { domain: Domain { start: 1, end: 2 }, - stage: Stage::Untested, + stage: Stage::Patchwork { depth: 0 }, }, Cluster { domain: Domain { start: 2, end: 3 }, - stage: Stage::Untested, + stage: Stage::Patchwork { depth: 0 }, }, Cluster { domain: Domain { start: 3, end: 4 }, - stage: Stage::ForIsolation { level: 0 }, + stage: Stage::Isolate, }, Cluster { domain: Domain { start: 4, end: 5 }, - stage: Stage::ForIsolation { level: 0 }, + stage: Stage::Isolate, }, Cluster { domain: Domain { start: 5, end: 6 }, - stage: Stage::ForIsolation { level: 1 }, + stage: Stage::Patchwork { depth: 1 }, }, Cluster { domain: Domain { start: 6, end: 7 }, - stage: Stage::ForIsolation { level: 0 }, + stage: Stage::Patchwork { depth: 0 }, }, Cluster { domain: Domain { start: 7, end: 8 }, stage: Stage::Damaged, }, + Cluster { + domain: Domain { start: 8, end: 10 }, + stage: Stage::Intact, + }, + Cluster { + domain: Domain { start: 10, end: 11 }, + stage: Stage::BruteForceAndDesperation, + }, + Cluster { + domain: Domain { start: 11, end: 12 }, + stage: Stage::BruteForceAndDesperation, + }, ], }; let expected = vec![ Cluster { domain: Domain { start: 0, end: 3 }, - stage: Stage::Untested, + stage: Stage::Patchwork { depth: 0 }, }, Cluster { domain: Domain { start: 3, end: 5 }, - stage: Stage::ForIsolation { level: 0 }, + stage: Stage::Isolate, }, Cluster { domain: Domain { start: 5, end: 6 }, - stage: Stage::ForIsolation { level: 1 }, + stage: Stage::Patchwork { depth: 1 }, }, Cluster { domain: Domain { start: 6, end: 7 }, - stage: Stage::ForIsolation { level: 0 }, + stage: Stage::Patchwork { depth: 0 }, }, Cluster { domain: Domain { start: 7, end: 8 }, stage: Stage::Damaged, }, + Cluster { + domain: Domain { start: 8, end: 10 }, + stage: Stage::Intact, + }, + Cluster { + domain: Domain { start: 10, end: 12 }, + stage: Stage::BruteForceAndDesperation, + }, ]; mf.defrag(); + mf.map.sort_by_key(|c| c.domain.start); let received = mf.map; diff --git a/src/mapping/stage.rs b/src/mapping/stage.rs index 85813c9..7e40f64 100644 --- a/src/mapping/stage.rs +++ b/src/mapping/stage.rs @@ -2,15 +2,17 @@ use serde::{Deserialize, Serialize}; #[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)] pub enum Stage { - Untested, - ForIsolation { level: u8 }, + // Don't mess with the order. + Patchwork { depth: usize }, + Isolate, + BruteForceAndDesperation, Damaged, Intact, } impl Default for Stage { fn default() -> Self { - Stage::Untested + Stage::Patchwork { depth: 0 } } } diff --git a/src/recovery.rs b/src/recovery.rs index 46a7262..d770d8a 100644 --- a/src/recovery.rs +++ b/src/recovery.rs @@ -49,8 +49,9 @@ impl Recover { self.map.defrag(); match self.map.get_stage() { - Stage::Untested => self.copy_untested()?, - Stage::ForIsolation { .. } => todo!(), + Stage::Patchwork { depth } => self.copy_patchwork(depth)?, + Stage::Isolate => todo!(), + Stage::BruteForceAndDesperation => todo!(), Stage::Damaged | Stage::Intact => { println!("Cannot recover further."); @@ -84,39 +85,52 @@ impl Recover { } /// Attempt to copy all untested blocks. - fn copy_untested(&mut self) -> anyhow::Result<()> { - let mut buf = DirectIOBuffer::default(); + fn copy_patchwork(&mut self, mut depth: usize) -> anyhow::Result<()> { + let mut buf = DirectIOBuffer::new(); + let mut buf_capacity = self.get_buf_capacity() as usize; - for untested in self.map.get_clusters(Stage::Untested) { - // Caching. - let mut read_position: usize; - let mut cluster: Cluster; - let mut buf_capacity = self.get_buf_capacity() as usize; + while self.map.get_stage() == (Stage::Patchwork { depth }) { + // Order of these two expressions matters, stupid. + buf_capacity /= depth; + depth += 1; - dbg!(untested.domain); - read_position = untested.domain.start; + for cluster in self.map.get_clusters(Stage::Patchwork { depth }) { + self.read_domain(buf.as_mut(), cluster.domain, buf_capacity, Stage::Isolate)?; + } + } - while read_position < untested.domain.end { - dbg!(read_position); + Ok(()) + } - buf_capacity = buf_capacity.min(untested.domain.end - read_position); + fn read_domain( + &mut self, + buf: &mut [u8], + domain: Domain, + mut buf_capacity: usize, + next_stage: Stage, + ) -> anyhow::Result<()> { + let mut cluster; + let mut read_position = domain.start; - cluster = Cluster { - domain: Domain { - start: read_position, - end: read_position + buf_capacity, - }, - stage: Stage::Intact, - }; + while read_position < domain.end { + buf_capacity = buf_capacity.min(domain.end - read_position); - if let Err(err) = self.input.read_exact(&mut buf.as_mut()) { - // If buf were zeroed out before every read, one could theoretically recover - // part of that read given the assumption that all null values from the end to - // the first non-null value are unread, and some further padding from the last - // values are potentially invalid. - // - // That padding should have a cli arg to control it. + cluster = Cluster { + domain: Domain { + start: read_position, + end: read_position + buf_capacity, + }, + stage: Stage::Intact, + }; + match self.read_sectors(buf.as_mut()) { + Ok(bytes) => { + self.output + .write_all(&buf[0..bytes]) + .context("Failed to write data to output file")?; + read_position += bytes; + } + Err(err) => { println!("Hit error: {:?}", err); if CONFIG.reopen_on_error { self.reload_input() @@ -130,26 +144,14 @@ impl Recover { .seek_relative(buf_capacity as i64) .context("Failed to seek output by buf_capacity to skip previous error")?; - // I don't remember what level was for. - cluster.stage = Stage::ForIsolation { level: 1 }; + cluster.stage = next_stage.clone(); } - - if cluster.stage == Stage::Intact { - self.output - .write_all(&buf[0..buf_capacity]) - .context("Failed to write data to output file")?; - } - - self.map.update(cluster); - self.map.write_to(&mut crate::io::load_map_write()?)?; - read_position += buf_capacity; } + + self.map.update(cluster); + self.map.write_to(&mut crate::io::load_map_write()?)?; } - drop(buf); - - self.map.write_to(&mut crate::io::load_map_write()?)?; - Ok(()) } @@ -167,6 +169,25 @@ impl Recover { self.input.seek(SeekFrom::Start(seek_pos))?; Ok(()) } + + fn read_sectors(&mut self, mut buf: &mut [u8]) -> std::io::Result { + let mut raw_buf = vec![crate::FB_NULL_VALUE; buf.len()]; + let result = self.input.read(&mut raw_buf); + + if result.is_err() { + return result; + } else if let Ok(mut bytes) = result + && bytes >= CONFIG.sector_size + { + // Remember that this is integer division (floor division) + bytes = (bytes / CONFIG.sector_size) * CONFIG.sector_size; + buf.write_all(&raw_buf[..bytes]).unwrap(); + + return Ok(bytes); + } else { + return Ok(0); + } + } } #[cfg(test)]