Replace that POS defrag, update Algo names, fix tests, rework initial

algo.
This commit is contained in:
Olivia Brooks
2026-01-21 11:07:09 -05:00
parent d71f6fd8d8
commit 5bca12406b
6 changed files with 152 additions and 122 deletions

View File

@@ -41,13 +41,10 @@ This is still in very early development, so expect old maps to no longer work.
## Recovery Strategy
### Initial Pass (Stage::Untested)
### Initial Pass / Patchworking
Tries to read clusters of `max_buffer_size`, marking clusters with errors as
`ForIsolation` (note that the name has not yet be updated to
`Patchwork{ depth }`).
### Patchworking
Tries to read clusters of `max_buffer_size`, marking clusters with errors with
an increasing `level`.
This works by halving the length of the read buffer until one of two
conditions is met:

View File

@@ -97,9 +97,16 @@ fn backup<P: AsRef<Path>>(path: P) -> std::io::Result<()> {
)
}
#[derive(Debug)]
#[repr(C, align(512))]
pub struct DirectIOBuffer(pub [u8; crate::MAX_BUFFER_SIZE]);
impl DirectIOBuffer {
pub fn new() -> Self {
Self::default()
}
}
impl Default for DirectIOBuffer {
fn default() -> Self {
Self([crate::FB_NULL_VALUE; _])

View File

@@ -3,6 +3,8 @@ use super::{Domain, Stage};
use serde::{Deserialize, Serialize};
/// A map for data stored in memory for processing and saving to disk.
// derived Ord impl *should* use self.domain.start to sort? Not sure.
// Use `sort_by_key()` to be safe.
#[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)]
pub struct Cluster {
pub domain: Domain,

View File

@@ -1,6 +1,8 @@
use std::fs::File;
use std::io::Write;
use crate::mapping::cluster;
use super::{Cluster, Domain, DomainOverlap, Stage};
use anyhow;
@@ -30,7 +32,7 @@ impl Default for MapFile {
domain: Domain::default(),
map: vec![Cluster {
domain: Domain::default(),
stage: Stage::Untested,
stage: Stage::Patchwork { depth: 0 },
}],
}
}
@@ -81,18 +83,26 @@ impl MapFile {
let mut recover_stage = Stage::Damaged;
for cluster in self.map.iter() {
/*
match cluster.stage {
Stage::Untested => return Stage::Untested,
Stage::ForIsolation { .. } => {
Stage::Patchwork { depth } => {if recover_stage > (Stage::Patchwork { depth })},
Stage::Isolate => {
if recover_stage == Stage::Damaged || cluster.stage < recover_stage {
// Note that recover_stage after first condition is
// only ever Stage::ForIsolation(_), thus PartialEq,
// PartialOrd are useful for comparing the internal value.
recover_stage = cluster.stage
}
},
Stage::BruteForceAndDesperation => {
if recover_stage > Stage::BruteForceAndDesperation ||
}
Stage::Damaged => (),
Stage::Intact => (),
Stage::Damaged | Stage::Intact => (),
}
*/
if cluster.stage < recover_stage {
recover_stage = cluster.stage;
}
}
@@ -116,52 +126,26 @@ impl MapFile {
/// Defragments cluster groups.
/// I.E. check forwards every cluster from current until stage changes,
/// then group at once.
pub fn defrag(&mut self) -> &mut Self {
pub fn defrag(&mut self) {
self.map.sort_by_key(|c| c.domain.start);
let mut new_map: Vec<Cluster> = vec![];
let mut idx = 0;
let mut master;
while idx < self.map.len() - 1 {
master = self.map[idx];
// Fetch first cluster.
let mut start_cluster = self.map.iter().find(|c| c.domain.start == 0).unwrap();
// Even though this would be initialized by its first read,
// the compiler won't stop whining, and idk how to assert that to it.
let mut end_cluster = Cluster::default();
let mut new_cluster: Cluster;
let mut stage_common: bool;
let mut is_finished = false;
while !is_finished {
stage_common = true;
// Start a new cluster based on the cluster following
// the end of last new_cluster.
new_cluster = start_cluster.to_owned();
// While stage is common, and not finished,
// find each trailing cluster.
while stage_common && !is_finished {
end_cluster = start_cluster.to_owned();
if end_cluster.domain.end != self.domain.end {
start_cluster = self
.map
.iter()
.find(|c| end_cluster.domain.end == c.domain.start)
.unwrap();
stage_common = new_cluster.stage == start_cluster.stage
} else {
is_finished = true;
}
for c in self.map[idx + 1..self.map.len()].into_iter() {
if c.stage != master.stage {
break;
}
// Set the new ending, encapsulating any clusters of common stage.
new_cluster.domain.end = end_cluster.domain.end;
new_map.push(new_cluster);
idx += 1;
}
self.map = new_map;
self
master.domain.end = self.map[idx].domain.end;
new_map.push(master);
}
}
/// Extend the domain of the MapFile.
@@ -622,7 +606,7 @@ mod tests {
let mut map = MapFile {
map: vec![Cluster {
domain: Domain { start: 0, end: 3 },
stage: Stage::Untested,
stage: Stage::Patchwork { depth: 0 },
}],
..Default::default()
};
@@ -726,17 +710,16 @@ mod tests {
// If this fails here, there's something SERIOUSLY wrong.
assert!(
mf_stage == Stage::Untested,
mf_stage == Stage::Patchwork { depth: 0 },
"Determined stage to be {:?}, when {:?} was expeccted.",
mf_stage,
Stage::Untested
Stage::Patchwork { depth: 0 }
);
let stages = vec![
Stage::Damaged,
Stage::ForIsolation { level: 1 },
Stage::ForIsolation { level: 0 },
Stage::Untested,
Stage::Patchwork { depth: 1 },
Stage::Patchwork { depth: 0 },
];
mf.map = vec![];
@@ -762,20 +745,17 @@ mod tests {
mf.map = vec![
*Cluster::default().set_stage(Stage::Damaged),
*Cluster::default().set_stage(Stage::ForIsolation { level: 0 }),
*Cluster::default().set_stage(Stage::ForIsolation { level: 1 }),
*Cluster::default().set_stage(Stage::Patchwork { depth: 1 }),
Cluster::default(),
Cluster::default(),
*Cluster::default().set_stage(Stage::ForIsolation { level: 1 }),
*Cluster::default().set_stage(Stage::ForIsolation { level: 0 }),
*Cluster::default().set_stage(Stage::Patchwork { depth: 1 }),
*Cluster::default().set_stage(Stage::Damaged),
];
let stages = vec![
Stage::Damaged,
Stage::ForIsolation { level: 1 },
Stage::ForIsolation { level: 0 },
Stage::Untested,
Stage::Patchwork { depth: 1 },
Stage::Patchwork { depth: 0 },
];
for stage in stages {
@@ -803,63 +783,84 @@ mod tests {
map: vec![
Cluster {
domain: Domain { start: 0, end: 1 },
stage: Stage::Untested,
stage: Stage::Patchwork { depth: 0 },
},
Cluster {
domain: Domain { start: 1, end: 2 },
stage: Stage::Untested,
stage: Stage::Patchwork { depth: 0 },
},
Cluster {
domain: Domain { start: 2, end: 3 },
stage: Stage::Untested,
stage: Stage::Patchwork { depth: 0 },
},
Cluster {
domain: Domain { start: 3, end: 4 },
stage: Stage::ForIsolation { level: 0 },
stage: Stage::Isolate,
},
Cluster {
domain: Domain { start: 4, end: 5 },
stage: Stage::ForIsolation { level: 0 },
stage: Stage::Isolate,
},
Cluster {
domain: Domain { start: 5, end: 6 },
stage: Stage::ForIsolation { level: 1 },
stage: Stage::Patchwork { depth: 1 },
},
Cluster {
domain: Domain { start: 6, end: 7 },
stage: Stage::ForIsolation { level: 0 },
stage: Stage::Patchwork { depth: 0 },
},
Cluster {
domain: Domain { start: 7, end: 8 },
stage: Stage::Damaged,
},
Cluster {
domain: Domain { start: 8, end: 10 },
stage: Stage::Intact,
},
Cluster {
domain: Domain { start: 10, end: 11 },
stage: Stage::BruteForceAndDesperation,
},
Cluster {
domain: Domain { start: 11, end: 12 },
stage: Stage::BruteForceAndDesperation,
},
],
};
let expected = vec![
Cluster {
domain: Domain { start: 0, end: 3 },
stage: Stage::Untested,
stage: Stage::Patchwork { depth: 0 },
},
Cluster {
domain: Domain { start: 3, end: 5 },
stage: Stage::ForIsolation { level: 0 },
stage: Stage::Isolate,
},
Cluster {
domain: Domain { start: 5, end: 6 },
stage: Stage::ForIsolation { level: 1 },
stage: Stage::Patchwork { depth: 1 },
},
Cluster {
domain: Domain { start: 6, end: 7 },
stage: Stage::ForIsolation { level: 0 },
stage: Stage::Patchwork { depth: 0 },
},
Cluster {
domain: Domain { start: 7, end: 8 },
stage: Stage::Damaged,
},
Cluster {
domain: Domain { start: 8, end: 10 },
stage: Stage::Intact,
},
Cluster {
domain: Domain { start: 10, end: 12 },
stage: Stage::BruteForceAndDesperation,
},
];
mf.defrag();
mf.map.sort_by_key(|c| c.domain.start);
let received = mf.map;

View File

@@ -2,15 +2,17 @@ use serde::{Deserialize, Serialize};
#[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)]
pub enum Stage {
Untested,
ForIsolation { level: u8 },
// Don't mess with the order.
Patchwork { depth: usize },
Isolate,
BruteForceAndDesperation,
Damaged,
Intact,
}
impl Default for Stage {
fn default() -> Self {
Stage::Untested
Stage::Patchwork { depth: 0 }
}
}

View File

@@ -49,8 +49,9 @@ impl Recover {
self.map.defrag();
match self.map.get_stage() {
Stage::Untested => self.copy_untested()?,
Stage::ForIsolation { .. } => todo!(),
Stage::Patchwork { depth } => self.copy_patchwork(depth)?,
Stage::Isolate => todo!(),
Stage::BruteForceAndDesperation => todo!(),
Stage::Damaged | Stage::Intact => {
println!("Cannot recover further.");
@@ -84,22 +85,35 @@ impl Recover {
}
/// Attempt to copy all untested blocks.
fn copy_untested(&mut self) -> anyhow::Result<()> {
let mut buf = DirectIOBuffer::default();
for untested in self.map.get_clusters(Stage::Untested) {
// Caching.
let mut read_position: usize;
let mut cluster: Cluster;
fn copy_patchwork(&mut self, mut depth: usize) -> anyhow::Result<()> {
let mut buf = DirectIOBuffer::new();
let mut buf_capacity = self.get_buf_capacity() as usize;
dbg!(untested.domain);
read_position = untested.domain.start;
while self.map.get_stage() == (Stage::Patchwork { depth }) {
// Order of these two expressions matters, stupid.
buf_capacity /= depth;
depth += 1;
while read_position < untested.domain.end {
dbg!(read_position);
for cluster in self.map.get_clusters(Stage::Patchwork { depth }) {
self.read_domain(buf.as_mut(), cluster.domain, buf_capacity, Stage::Isolate)?;
}
}
buf_capacity = buf_capacity.min(untested.domain.end - read_position);
Ok(())
}
fn read_domain(
&mut self,
buf: &mut [u8],
domain: Domain,
mut buf_capacity: usize,
next_stage: Stage,
) -> anyhow::Result<()> {
let mut cluster;
let mut read_position = domain.start;
while read_position < domain.end {
buf_capacity = buf_capacity.min(domain.end - read_position);
cluster = Cluster {
domain: Domain {
@@ -109,14 +123,14 @@ impl Recover {
stage: Stage::Intact,
};
if let Err(err) = self.input.read_exact(&mut buf.as_mut()) {
// If buf were zeroed out before every read, one could theoretically recover
// part of that read given the assumption that all null values from the end to
// the first non-null value are unread, and some further padding from the last
// values are potentially invalid.
//
// That padding should have a cli arg to control it.
match self.read_sectors(buf.as_mut()) {
Ok(bytes) => {
self.output
.write_all(&buf[0..bytes])
.context("Failed to write data to output file")?;
read_position += bytes;
}
Err(err) => {
println!("Hit error: {:?}", err);
if CONFIG.reopen_on_error {
self.reload_input()
@@ -130,25 +144,13 @@ impl Recover {
.seek_relative(buf_capacity as i64)
.context("Failed to seek output by buf_capacity to skip previous error")?;
// I don't remember what level was for.
cluster.stage = Stage::ForIsolation { level: 1 };
cluster.stage = next_stage.clone();
}
if cluster.stage == Stage::Intact {
self.output
.write_all(&buf[0..buf_capacity])
.context("Failed to write data to output file")?;
}
self.map.update(cluster);
self.map.write_to(&mut crate::io::load_map_write()?)?;
read_position += buf_capacity;
}
}
drop(buf);
self.map.write_to(&mut crate::io::load_map_write()?)?;
Ok(())
}
@@ -167,6 +169,25 @@ impl Recover {
self.input.seek(SeekFrom::Start(seek_pos))?;
Ok(())
}
fn read_sectors(&mut self, mut buf: &mut [u8]) -> std::io::Result<usize> {
let mut raw_buf = vec![crate::FB_NULL_VALUE; buf.len()];
let result = self.input.read(&mut raw_buf);
if result.is_err() {
return result;
} else if let Ok(mut bytes) = result
&& bytes >= CONFIG.sector_size
{
// Remember that this is integer division (floor division)
bytes = (bytes / CONFIG.sector_size) * CONFIG.sector_size;
buf.write_all(&raw_buf[..bytes]).unwrap();
return Ok(bytes);
} else {
return Ok(0);
}
}
}
#[cfg(test)]