Replace that POS defrag, update Algo names, fix tests, rework initial

algo.
This commit is contained in:
Olivia Brooks
2026-01-21 11:07:09 -05:00
parent d71f6fd8d8
commit 5bca12406b
6 changed files with 152 additions and 122 deletions

View File

@@ -41,13 +41,10 @@ This is still in very early development, so expect old maps to no longer work.
## Recovery Strategy ## Recovery Strategy
### Initial Pass (Stage::Untested) ### Initial Pass / Patchworking
Tries to read clusters of `max_buffer_size`, marking clusters with errors as Tries to read clusters of `max_buffer_size`, marking clusters with errors with
`ForIsolation` (note that the name has not yet be updated to an increasing `level`.
`Patchwork{ depth }`).
### Patchworking
This works by halving the length of the read buffer until one of two This works by halving the length of the read buffer until one of two
conditions is met: conditions is met:

View File

@@ -97,9 +97,16 @@ fn backup<P: AsRef<Path>>(path: P) -> std::io::Result<()> {
) )
} }
#[derive(Debug)]
#[repr(C, align(512))] #[repr(C, align(512))]
pub struct DirectIOBuffer(pub [u8; crate::MAX_BUFFER_SIZE]); pub struct DirectIOBuffer(pub [u8; crate::MAX_BUFFER_SIZE]);
impl DirectIOBuffer {
pub fn new() -> Self {
Self::default()
}
}
impl Default for DirectIOBuffer { impl Default for DirectIOBuffer {
fn default() -> Self { fn default() -> Self {
Self([crate::FB_NULL_VALUE; _]) Self([crate::FB_NULL_VALUE; _])

View File

@@ -3,6 +3,8 @@ use super::{Domain, Stage};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
/// A map for data stored in memory for processing and saving to disk. /// A map for data stored in memory for processing and saving to disk.
// derived Ord impl *should* use self.domain.start to sort? Not sure.
// Use `sort_by_key()` to be safe.
#[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)] #[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)]
pub struct Cluster { pub struct Cluster {
pub domain: Domain, pub domain: Domain,

View File

@@ -1,6 +1,8 @@
use std::fs::File; use std::fs::File;
use std::io::Write; use std::io::Write;
use crate::mapping::cluster;
use super::{Cluster, Domain, DomainOverlap, Stage}; use super::{Cluster, Domain, DomainOverlap, Stage};
use anyhow; use anyhow;
@@ -30,7 +32,7 @@ impl Default for MapFile {
domain: Domain::default(), domain: Domain::default(),
map: vec![Cluster { map: vec![Cluster {
domain: Domain::default(), domain: Domain::default(),
stage: Stage::Untested, stage: Stage::Patchwork { depth: 0 },
}], }],
} }
} }
@@ -81,18 +83,26 @@ impl MapFile {
let mut recover_stage = Stage::Damaged; let mut recover_stage = Stage::Damaged;
for cluster in self.map.iter() { for cluster in self.map.iter() {
/*
match cluster.stage { match cluster.stage {
Stage::Untested => return Stage::Untested, Stage::Patchwork { depth } => {if recover_stage > (Stage::Patchwork { depth })},
Stage::ForIsolation { .. } => { Stage::Isolate => {
if recover_stage == Stage::Damaged || cluster.stage < recover_stage { if recover_stage == Stage::Damaged || cluster.stage < recover_stage {
// Note that recover_stage after first condition is // Note that recover_stage after first condition is
// only ever Stage::ForIsolation(_), thus PartialEq, // only ever Stage::ForIsolation(_), thus PartialEq,
// PartialOrd are useful for comparing the internal value. // PartialOrd are useful for comparing the internal value.
recover_stage = cluster.stage recover_stage = cluster.stage
} }
},
Stage::BruteForceAndDesperation => {
if recover_stage > Stage::BruteForceAndDesperation ||
} }
Stage::Damaged => (), Stage::Damaged | Stage::Intact => (),
Stage::Intact => (), }
*/
if cluster.stage < recover_stage {
recover_stage = cluster.stage;
} }
} }
@@ -116,52 +126,26 @@ impl MapFile {
/// Defragments cluster groups. /// Defragments cluster groups.
/// I.E. check forwards every cluster from current until stage changes, /// I.E. check forwards every cluster from current until stage changes,
/// then group at once. /// then group at once.
pub fn defrag(&mut self) -> &mut Self { pub fn defrag(&mut self) {
self.map.sort_by_key(|c| c.domain.start);
let mut new_map: Vec<Cluster> = vec![]; let mut new_map: Vec<Cluster> = vec![];
let mut idx = 0;
let mut master;
while idx < self.map.len() - 1 {
master = self.map[idx];
// Fetch first cluster. for c in self.map[idx + 1..self.map.len()].into_iter() {
let mut start_cluster = self.map.iter().find(|c| c.domain.start == 0).unwrap(); if c.stage != master.stage {
break;
// Even though this would be initialized by its first read,
// the compiler won't stop whining, and idk how to assert that to it.
let mut end_cluster = Cluster::default();
let mut new_cluster: Cluster;
let mut stage_common: bool;
let mut is_finished = false;
while !is_finished {
stage_common = true;
// Start a new cluster based on the cluster following
// the end of last new_cluster.
new_cluster = start_cluster.to_owned();
// While stage is common, and not finished,
// find each trailing cluster.
while stage_common && !is_finished {
end_cluster = start_cluster.to_owned();
if end_cluster.domain.end != self.domain.end {
start_cluster = self
.map
.iter()
.find(|c| end_cluster.domain.end == c.domain.start)
.unwrap();
stage_common = new_cluster.stage == start_cluster.stage
} else {
is_finished = true;
} }
idx += 1;
} }
// Set the new ending, encapsulating any clusters of common stage. master.domain.end = self.map[idx].domain.end;
new_cluster.domain.end = end_cluster.domain.end; new_map.push(master);
new_map.push(new_cluster);
} }
self.map = new_map;
self
} }
/// Extend the domain of the MapFile. /// Extend the domain of the MapFile.
@@ -622,7 +606,7 @@ mod tests {
let mut map = MapFile { let mut map = MapFile {
map: vec![Cluster { map: vec![Cluster {
domain: Domain { start: 0, end: 3 }, domain: Domain { start: 0, end: 3 },
stage: Stage::Untested, stage: Stage::Patchwork { depth: 0 },
}], }],
..Default::default() ..Default::default()
}; };
@@ -726,17 +710,16 @@ mod tests {
// If this fails here, there's something SERIOUSLY wrong. // If this fails here, there's something SERIOUSLY wrong.
assert!( assert!(
mf_stage == Stage::Untested, mf_stage == Stage::Patchwork { depth: 0 },
"Determined stage to be {:?}, when {:?} was expeccted.", "Determined stage to be {:?}, when {:?} was expeccted.",
mf_stage, mf_stage,
Stage::Untested Stage::Patchwork { depth: 0 }
); );
let stages = vec![ let stages = vec![
Stage::Damaged, Stage::Damaged,
Stage::ForIsolation { level: 1 }, Stage::Patchwork { depth: 1 },
Stage::ForIsolation { level: 0 }, Stage::Patchwork { depth: 0 },
Stage::Untested,
]; ];
mf.map = vec![]; mf.map = vec![];
@@ -762,20 +745,17 @@ mod tests {
mf.map = vec![ mf.map = vec![
*Cluster::default().set_stage(Stage::Damaged), *Cluster::default().set_stage(Stage::Damaged),
*Cluster::default().set_stage(Stage::ForIsolation { level: 0 }), *Cluster::default().set_stage(Stage::Patchwork { depth: 1 }),
*Cluster::default().set_stage(Stage::ForIsolation { level: 1 }),
Cluster::default(), Cluster::default(),
Cluster::default(), Cluster::default(),
*Cluster::default().set_stage(Stage::ForIsolation { level: 1 }), *Cluster::default().set_stage(Stage::Patchwork { depth: 1 }),
*Cluster::default().set_stage(Stage::ForIsolation { level: 0 }),
*Cluster::default().set_stage(Stage::Damaged), *Cluster::default().set_stage(Stage::Damaged),
]; ];
let stages = vec![ let stages = vec![
Stage::Damaged, Stage::Damaged,
Stage::ForIsolation { level: 1 }, Stage::Patchwork { depth: 1 },
Stage::ForIsolation { level: 0 }, Stage::Patchwork { depth: 0 },
Stage::Untested,
]; ];
for stage in stages { for stage in stages {
@@ -803,63 +783,84 @@ mod tests {
map: vec![ map: vec![
Cluster { Cluster {
domain: Domain { start: 0, end: 1 }, domain: Domain { start: 0, end: 1 },
stage: Stage::Untested, stage: Stage::Patchwork { depth: 0 },
}, },
Cluster { Cluster {
domain: Domain { start: 1, end: 2 }, domain: Domain { start: 1, end: 2 },
stage: Stage::Untested, stage: Stage::Patchwork { depth: 0 },
}, },
Cluster { Cluster {
domain: Domain { start: 2, end: 3 }, domain: Domain { start: 2, end: 3 },
stage: Stage::Untested, stage: Stage::Patchwork { depth: 0 },
}, },
Cluster { Cluster {
domain: Domain { start: 3, end: 4 }, domain: Domain { start: 3, end: 4 },
stage: Stage::ForIsolation { level: 0 }, stage: Stage::Isolate,
}, },
Cluster { Cluster {
domain: Domain { start: 4, end: 5 }, domain: Domain { start: 4, end: 5 },
stage: Stage::ForIsolation { level: 0 }, stage: Stage::Isolate,
}, },
Cluster { Cluster {
domain: Domain { start: 5, end: 6 }, domain: Domain { start: 5, end: 6 },
stage: Stage::ForIsolation { level: 1 }, stage: Stage::Patchwork { depth: 1 },
}, },
Cluster { Cluster {
domain: Domain { start: 6, end: 7 }, domain: Domain { start: 6, end: 7 },
stage: Stage::ForIsolation { level: 0 }, stage: Stage::Patchwork { depth: 0 },
}, },
Cluster { Cluster {
domain: Domain { start: 7, end: 8 }, domain: Domain { start: 7, end: 8 },
stage: Stage::Damaged, stage: Stage::Damaged,
}, },
Cluster {
domain: Domain { start: 8, end: 10 },
stage: Stage::Intact,
},
Cluster {
domain: Domain { start: 10, end: 11 },
stage: Stage::BruteForceAndDesperation,
},
Cluster {
domain: Domain { start: 11, end: 12 },
stage: Stage::BruteForceAndDesperation,
},
], ],
}; };
let expected = vec![ let expected = vec![
Cluster { Cluster {
domain: Domain { start: 0, end: 3 }, domain: Domain { start: 0, end: 3 },
stage: Stage::Untested, stage: Stage::Patchwork { depth: 0 },
}, },
Cluster { Cluster {
domain: Domain { start: 3, end: 5 }, domain: Domain { start: 3, end: 5 },
stage: Stage::ForIsolation { level: 0 }, stage: Stage::Isolate,
}, },
Cluster { Cluster {
domain: Domain { start: 5, end: 6 }, domain: Domain { start: 5, end: 6 },
stage: Stage::ForIsolation { level: 1 }, stage: Stage::Patchwork { depth: 1 },
}, },
Cluster { Cluster {
domain: Domain { start: 6, end: 7 }, domain: Domain { start: 6, end: 7 },
stage: Stage::ForIsolation { level: 0 }, stage: Stage::Patchwork { depth: 0 },
}, },
Cluster { Cluster {
domain: Domain { start: 7, end: 8 }, domain: Domain { start: 7, end: 8 },
stage: Stage::Damaged, stage: Stage::Damaged,
}, },
Cluster {
domain: Domain { start: 8, end: 10 },
stage: Stage::Intact,
},
Cluster {
domain: Domain { start: 10, end: 12 },
stage: Stage::BruteForceAndDesperation,
},
]; ];
mf.defrag(); mf.defrag();
mf.map.sort_by_key(|c| c.domain.start);
let received = mf.map; let received = mf.map;

View File

@@ -2,15 +2,17 @@ use serde::{Deserialize, Serialize};
#[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)] #[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)]
pub enum Stage { pub enum Stage {
Untested, // Don't mess with the order.
ForIsolation { level: u8 }, Patchwork { depth: usize },
Isolate,
BruteForceAndDesperation,
Damaged, Damaged,
Intact, Intact,
} }
impl Default for Stage { impl Default for Stage {
fn default() -> Self { fn default() -> Self {
Stage::Untested Stage::Patchwork { depth: 0 }
} }
} }

View File

@@ -49,8 +49,9 @@ impl Recover {
self.map.defrag(); self.map.defrag();
match self.map.get_stage() { match self.map.get_stage() {
Stage::Untested => self.copy_untested()?, Stage::Patchwork { depth } => self.copy_patchwork(depth)?,
Stage::ForIsolation { .. } => todo!(), Stage::Isolate => todo!(),
Stage::BruteForceAndDesperation => todo!(),
Stage::Damaged | Stage::Intact => { Stage::Damaged | Stage::Intact => {
println!("Cannot recover further."); println!("Cannot recover further.");
@@ -84,39 +85,52 @@ impl Recover {
} }
/// Attempt to copy all untested blocks. /// Attempt to copy all untested blocks.
fn copy_untested(&mut self) -> anyhow::Result<()> { fn copy_patchwork(&mut self, mut depth: usize) -> anyhow::Result<()> {
let mut buf = DirectIOBuffer::default(); let mut buf = DirectIOBuffer::new();
let mut buf_capacity = self.get_buf_capacity() as usize;
for untested in self.map.get_clusters(Stage::Untested) { while self.map.get_stage() == (Stage::Patchwork { depth }) {
// Caching. // Order of these two expressions matters, stupid.
let mut read_position: usize; buf_capacity /= depth;
let mut cluster: Cluster; depth += 1;
let mut buf_capacity = self.get_buf_capacity() as usize;
dbg!(untested.domain); for cluster in self.map.get_clusters(Stage::Patchwork { depth }) {
read_position = untested.domain.start; self.read_domain(buf.as_mut(), cluster.domain, buf_capacity, Stage::Isolate)?;
}
}
while read_position < untested.domain.end { Ok(())
dbg!(read_position); }
buf_capacity = buf_capacity.min(untested.domain.end - read_position); fn read_domain(
&mut self,
buf: &mut [u8],
domain: Domain,
mut buf_capacity: usize,
next_stage: Stage,
) -> anyhow::Result<()> {
let mut cluster;
let mut read_position = domain.start;
cluster = Cluster { while read_position < domain.end {
domain: Domain { buf_capacity = buf_capacity.min(domain.end - read_position);
start: read_position,
end: read_position + buf_capacity,
},
stage: Stage::Intact,
};
if let Err(err) = self.input.read_exact(&mut buf.as_mut()) { cluster = Cluster {
// If buf were zeroed out before every read, one could theoretically recover domain: Domain {
// part of that read given the assumption that all null values from the end to start: read_position,
// the first non-null value are unread, and some further padding from the last end: read_position + buf_capacity,
// values are potentially invalid. },
// stage: Stage::Intact,
// That padding should have a cli arg to control it. };
match self.read_sectors(buf.as_mut()) {
Ok(bytes) => {
self.output
.write_all(&buf[0..bytes])
.context("Failed to write data to output file")?;
read_position += bytes;
}
Err(err) => {
println!("Hit error: {:?}", err); println!("Hit error: {:?}", err);
if CONFIG.reopen_on_error { if CONFIG.reopen_on_error {
self.reload_input() self.reload_input()
@@ -130,26 +144,14 @@ impl Recover {
.seek_relative(buf_capacity as i64) .seek_relative(buf_capacity as i64)
.context("Failed to seek output by buf_capacity to skip previous error")?; .context("Failed to seek output by buf_capacity to skip previous error")?;
// I don't remember what level was for. cluster.stage = next_stage.clone();
cluster.stage = Stage::ForIsolation { level: 1 };
} }
if cluster.stage == Stage::Intact {
self.output
.write_all(&buf[0..buf_capacity])
.context("Failed to write data to output file")?;
}
self.map.update(cluster);
self.map.write_to(&mut crate::io::load_map_write()?)?;
read_position += buf_capacity;
} }
self.map.update(cluster);
self.map.write_to(&mut crate::io::load_map_write()?)?;
} }
drop(buf);
self.map.write_to(&mut crate::io::load_map_write()?)?;
Ok(()) Ok(())
} }
@@ -167,6 +169,25 @@ impl Recover {
self.input.seek(SeekFrom::Start(seek_pos))?; self.input.seek(SeekFrom::Start(seek_pos))?;
Ok(()) Ok(())
} }
fn read_sectors(&mut self, mut buf: &mut [u8]) -> std::io::Result<usize> {
let mut raw_buf = vec![crate::FB_NULL_VALUE; buf.len()];
let result = self.input.read(&mut raw_buf);
if result.is_err() {
return result;
} else if let Ok(mut bytes) = result
&& bytes >= CONFIG.sector_size
{
// Remember that this is integer division (floor division)
bytes = (bytes / CONFIG.sector_size) * CONFIG.sector_size;
buf.write_all(&raw_buf[..bytes]).unwrap();
return Ok(bytes);
} else {
return Ok(0);
}
}
} }
#[cfg(test)] #[cfg(test)]