commit 79fddd7802c1863309db9a402974ef2db0550357 Author: Cutieguwu Date: Wed Feb 19 08:32:14 2025 -0500 Initial Commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..bbc3eb5 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,158 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "anstyle" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" + +[[package]] +name = "base64" +version = "0.21.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" + +[[package]] +name = "bitflags" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f68f53c83ab957f72c32642f3868eec03eb974d1fb82e453128456482613d36" +dependencies = [ + "serde", +] + +[[package]] +name = "clap" +version = "4.5.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "769b0145982b4b48713e01ec42d61614425f27b7058bda7180a3a41f30104796" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b26884eb4b57140e4d2d93652abfa49498b938b3c9179f9fc487b0acc3edad7" +dependencies = [ + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.5.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54b755194d6389280185988721fffba69495eed5ee9feeee9a599b53db80318c" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "kramer" +version = "0.1.0" +dependencies = [ + "clap", + "libc", + "ron", + "serde", +] + +[[package]] +name = "libc" +version = "0.2.169" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a" + +[[package]] +name = "proc-macro2" +version = "1.0.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "ron" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b91f7eff05f748767f183df4320a63d6936e9c6107d97c9e6bdd9784f4289c94" +dependencies = [ + "base64", + "bitflags", + "serde", + "serde_derive", +] + +[[package]] +name = "serde" +version = "1.0.217" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02fc4265df13d6fa1d00ecff087228cc0a2b5f3c0e87e258d8b94a156e984c70" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.217" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "syn" +version = "2.0.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44d46482f1c1c87acd84dea20c1bf5ebff4c757009ed6bf19cfd36fb10e92c4e" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..7ea04bc --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,25 @@ +[package] +name = "kramer" +version = "0.1.0" +edition = "2021" + +[dependencies] +# For clap info, see [dependencies.clap] +libc = "0.2.169" +ron = "0.8.1" +serde = { version = "1.0.217", features = ["derive"] } + +[dependencies.clap] +version = "4.5.27" +default-features = false +features = [ + # From default features collection + "error-context", + "help", + "std", + "suggestions", + "usage", + + # Optional features + "derive", +] diff --git a/README.adoc b/README.adoc new file mode 100644 index 0000000..1804fb2 --- /dev/null +++ b/README.adoc @@ -0,0 +1,40 @@ += kramer +:toc: + +// Hello people reading the README source :) + +== Prelude + +I needed a program to efficiently repair the data on optical discs. + + +== Goals + +* [ ] CLI Args +** [ ] Input device +** [ ] Output file (ISO 9660) +** [ ] Repair map file +** [ ] sequence_length +** [ ] brute_passes +** [ ] Sector size override? + +* Repair Algorithm +** Stage 1: Trial +*** [ ] 1 - From first sector, parse forward to error. +*** [ ] 2 - From last sector, parse backwards to error. +*** [ ] 3 - From center of data for trial, parse forward to error or end of remaining trial domain. +*** [ ] 4 - Stripe-skip remaining data, attempting to read largest trial domains first. +**** [ ] If data keeps reading good, no skip will occur until an error is reached. +** Stage 2: Isolation +*** [ ] From largest to smallest untrustworthy sequence, attempt to read each sequence at half sequence_length. +*** [ ] Same, but at quarter sequence_length. +*** [ ] Same, but at eighth sequence_length. +*** [ ] By sector, parse untrustworthy sequences from start to error, and end to error. Mark mid section for brute force. +** Stage 3: Brute Force +*** [ ] Desperately attempt to recover data from marked sections. +*** [ ] Attempt for brute_passes, retrying all failed sectors. + +* [ ] Repair Map +** [ ] I'll figure out some kind of language for this... + +== License \ No newline at end of file diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..a1e2283 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,161 @@ +mod recovery; +mod mapping; + +use clap::Parser; +use libc::O_DIRECT; +use mapping::MapFile; +use recovery::Recover; +use std::{ + fs::{File, OpenOptions}, + io::{self, Seek, SeekFrom}, + os::unix::fs::OpenOptionsExt, + path::PathBuf, +}; + + +const FB_SECTOR_SIZE: u16 = 2048; + + +#[derive(Parser, Debug)] +struct Args { + /// Path to source file or block device + #[arg(short, long, value_hint = clap::ValueHint::DirPath)] + input_file: PathBuf, + + /// Path to output file. Defaults to {input_file}.iso + #[arg(short, long, value_hint = clap::ValueHint::DirPath)] + output_file: Option, + + /// Path to rescue map. Defaults to {input_file}.map + #[arg(short, long, value_hint = clap::ValueHint::DirPath)] + map_file: Option, + + /// Max number of consecutive sectors to test as a group + #[arg(short, long, default_value_t = 128)] + cluster_length: u16, + + /// Number of brute force read passes + #[arg(short, long, default_value_t = 2)] + brute_passes: usize, + + /// Sector size + #[arg(short, long, default_value_t = FB_SECTOR_SIZE)] + sector_size: u16, +} + + +fn main() { + let config = Args::parse(); + + // Live with it, prefer to use expect() here. + // I'm lazy and don't want to mess around with comparing error types. + // Thus, any error in I/O here should be treated as fatal. + + let mut input_file: File = { + match OpenOptions::new() + .custom_flags(O_DIRECT) + .read(true) + .write(false) + .append(false) + .create(false) + .open(&config.input_file.as_path()) + { + Ok(f) => f, + Err(err) => panic!("Failed to open input file: {:?}", err) + } + }; + + let mut output_file: File = { + // Keep this clean, make a short-lived binding. + let path = get_path( + &config.output_file, + &config.input_file.to_str().unwrap(), + "iso" + ); + + match OpenOptions::new() + .custom_flags(O_DIRECT) + .read(true) + .write(true) + .create(true) + .open(path) + { + Ok(f) => f, + Err(err) => panic!("Failed to open/create output file. {:?}", err) + } + }; + + // Check if output file is shorter than input. + // If so, autoextend the output file. + { + let input_len = get_stream_length(&mut input_file) + .expect("Failed to get the length of the input data."); + let output_len = get_stream_length(&mut output_file) + .expect("Failed to get the length of the output file."); + + if output_len < input_len { + output_file.set_len(input_len) + .expect("Failed to autofill output file.") + } + } + + let map: MapFile = { + let path = get_path( + &config.output_file, + &config.input_file.to_str().unwrap(), + "map" + ); + + let file = match OpenOptions::new() + .read(true) + .create(true) + .open(path) + { + Ok(f) => f, + Err(err) => panic!("Failed to open/create mapping file. {:?}", err) + }; + + if let Ok(map) = MapFile::try_from(file) { + map + } else { + MapFile::new(config.sector_size) + } + }; + + let recover_tool = Recover::new(config, input_file, output_file, map); + + recover_tool.run_full(); + + todo!("Recovery, Map saving, and closure of all files."); +} + +/// Generates a file path if one not provided. +/// source_file for fallback name. +fn get_path( + output_file: &Option, + source_file: &str, + extention: &str +) -> PathBuf { + if let Some(f) = output_file { + f.to_owned() + } else { + PathBuf::from(format!( + "{:?}.{}", + source_file, + extention, + )) + .as_path() + .to_owned() + } +} + +/// Get length of data stream. +/// Physical length of data stream in bytes +/// (multiple of sector_size, rather than actual). +fn get_stream_length(file: &mut S) -> io::Result { + let len = file.seek(SeekFrom::End(0))?; + + let _ = file.seek(SeekFrom::Start(0)); + + Ok(len) +} \ No newline at end of file diff --git a/src/mapping.rs b/src/mapping.rs new file mode 100644 index 0000000..53f656a --- /dev/null +++ b/src/mapping.rs @@ -0,0 +1,310 @@ +use ron::de::{from_reader, SpannedError}; +use serde::Deserialize; +use std::fs::File; + +use crate::FB_SECTOR_SIZE; + + +/// Domain, in sectors. +/// Requires sector_size to be provided elsewhere for conversion to bytes. +#[derive(Clone, Copy, Debug, Deserialize)] +pub struct Domain { + pub start: usize, + pub end: usize, +} + +impl Default for Domain { + fn default() -> Self { + Domain { start: 0, end: 1 } + } +} + +impl Domain { + /// Return length of domain in sectors. + pub fn len(self) -> usize { + self.end - self.start + } +} + +/// A map for data stored in memory for processing and saving to disk. +#[allow(unused)] +#[derive(Clone, Debug, Deserialize)] +pub struct Cluster { + data: Option>, + domain: Domain, + status: Status, +} + +impl Default for Cluster { + fn default() -> Self { + Cluster { + data: None, + domain: Domain::default(), + status: Status::default() + } + } +} + + +/// Map for data stored on disk. +/// Rather have a second cluster type than inflating size +/// of output map by defining Option::None constantly. +#[derive(Clone, Copy, Debug, Deserialize)] +pub struct MapCluster { + pub domain: Domain, + pub status: Status, +} + +impl Default for MapCluster { + fn default() -> Self { + MapCluster { domain: Domain::default(), status: Status::default() } + } +} + +impl From for MapCluster { + fn from(cluster: Cluster) -> Self { + MapCluster { + domain: cluster.domain, + status: cluster.status, + } + } +} + + +#[derive(Clone, Copy, Debug, Deserialize, PartialEq)] +pub enum Status { + Untested, + ForIsolation(u8), + Damaged, +} + +impl Default for Status { + fn default() -> Self { + Status::Untested + } +} + + +#[allow(unused)] +#[derive(Clone, Debug, Deserialize)] +pub struct MapFile { + pub sector_size: u16, + pub domain: Domain, + pub map: Vec, +} + +impl TryFrom for MapFile { + type Error = SpannedError; + + fn try_from(file: File) -> Result { + from_reader(file) + } +} + +impl Default for MapFile { + fn default() -> Self { + MapFile { + sector_size: FB_SECTOR_SIZE, + domain: Domain::default(), + map: vec![MapCluster { + domain: Domain::default(), + status: Status::Untested, + }], + } + } +} + +#[allow(dead_code)] +impl MapFile { + pub fn new(sector_size: u16) -> Self { + MapFile::default() + .set_sector_size(sector_size) + .to_owned() + } + + pub fn set_sector_size(&mut self, sector_size: u16) -> &mut Self { + self.sector_size = sector_size; + self + } + + /// Recalculate cluster mappings. + fn update(self, new_cluster: Cluster) { + let mut new_map: Vec = vec![MapCluster::from(new_cluster.to_owned())]; + + for map_cluster in self.map.iter() { + let mut map_cluster = *map_cluster; + + // If new_cluster doesn't start ahead and end short, map_cluster is forgotten. + if new_cluster.domain.start < map_cluster.domain.start + && new_cluster.domain.end < map_cluster.domain.end { + /* + new_cluster overlaps the start of map_cluster, + but ends short of map_cluster end. + + ACTION: Crop map_cluster to start at end of new_cluster. + */ + + map_cluster.domain.start = new_cluster.domain.end; + new_map.push(map_cluster); + + } else if new_cluster.domain.end < map_cluster.domain.end { + /* + new_cluster starts within map_cluster domain. + + ACTION: Crop + */ + + let domain_end = map_cluster.domain.end; + + // Crop current object. + map_cluster.domain.end = new_cluster.domain.start; + new_map.push(map_cluster); + + if new_cluster.domain.end < map_cluster.domain.end { + /* + new_cluster is within map_cluster. + + ACTION: Crop & Fracture map_cluster + NOTE: Crop completed above. + */ + + new_map.push(MapCluster { + domain: Domain { + start: new_cluster.domain.end, + end: domain_end, + }, + status: map_cluster.status.to_owned() + }); + } + } else { + /* + No overlap. + + ACTION: Transfer + */ + + new_map.push(map_cluster); + } + } + } + + /// Get current recovery status. + pub fn get_state(self) -> Status { + let mut recover_status = Status::Damaged; + let mut cluster_stage: Option = Option::None; + + for cluster in self.map { + match cluster.status { + Status::Untested => return Status::Untested, + Status::ForIsolation(cs) => { + if recover_status == Status::Damaged { + recover_status = cluster.status; + } else { + cluster_stage = Some(cs); + } + }, + Status::Damaged => (), + } + + if cluster_stage.is_some() { + let recover_stage = match recover_status { + Status::ForIsolation(rs) => rs, + _ => unreachable!(), + }; + + if cluster_stage.unwrap() < recover_stage { + recover_status = cluster.status + } + + cluster_stage = None + } + } + + recover_status + } + + /// Get clusters of common status. + pub fn get_clusters(self, state: Status) -> Vec { + self.map.iter() + .filter_map(|mc| { + if mc.status == state { Some(mc.to_owned()) } else { None } + }) + .collect() + } + + /// Defragments cluster groups. + /// Algorithm could be improved to reduce extra looping. + /// I.E. check forwards every cluster from current until status changes, + /// then group at once. + fn defrag(self) { + let mut new_map: Vec = vec![]; + let mut did_defrag = false; + + // Until completely defragged. + while did_defrag { + did_defrag = false; + + for current_cluster in self.map.iter() { + // Find the trailing cluster of current. + let trailing_cluster = self.map.iter() + .filter_map(|c| { + if c.domain.start == current_cluster.domain.end { + Some(c) + } else { + None + } + }) + .nth(0); + + // If a cluster was found to be trailing + // (current cluster isn't the ending cluster) + if trailing_cluster.is_some() { + let trailing_cluster = trailing_cluster.unwrap(); + + // Share common status; Defrag clusters. + if trailing_cluster.status == current_cluster.status { + // Create cluster encompassing both. + new_map.push(MapCluster { + domain: Domain { + start: current_cluster.domain.start, + end: trailing_cluster.domain.end, + }, + status: current_cluster.status.to_owned(), + }); + did_defrag = true; + } else { + // Otherwise, can't defrag this portion. + // Transfer current cluster to new_map + new_map.push(current_cluster.to_owned()); + } + } + } + } + } + + fn defrag_new(self) { + let mut new_map: Vec = vec![]; + + // ENSURE TO SORT OLD MAP IN ORDER OF SECTOR SEQUENCE + + let old_map = self.map.iter().enumerate(); + + let mut pos: usize = 0; + let end = old_map.last().unwrap().0; + + let new_map: Vec = old_map + .filter(|(index, cluster)| { + if index < &pos { + return None + } + + if old_map.nth(pos + 1).map(|(_, c)| c.status == cluster.status)? { + + } + + Some(**cluster) + }) + .map(|(_, c)| *c) + .collect(); + } +} \ No newline at end of file diff --git a/src/recovery.rs b/src/recovery.rs new file mode 100644 index 0000000..f471ec0 --- /dev/null +++ b/src/recovery.rs @@ -0,0 +1,65 @@ +use std::{ + io::{BufReader, BufWriter}, + fs::File, +}; + +use crate::{ + Args, + mapping::{MapFile, Status}, +}; + + +#[allow(unused)] +#[derive(Debug)] +pub struct Recover { + buf_capacity: usize, + config: Args, + input: BufReader, + output: BufWriter, + map: MapFile, + stage: Status, +} + +#[allow(dead_code)] +impl Recover { + pub fn new(config: Args, input: File, output: File, map: MapFile) -> Self { + // Temporarily make buffer length one sector. + let buf_capacity = config.sector_size as usize; + let mut r = Recover { + buf_capacity, + config, + input: BufReader::with_capacity( + buf_capacity, + input, + ), + output: BufWriter::with_capacity( + buf_capacity, + output, + ), + map, + stage: Status::Untested, + }; + + // Ensure that buffer capacity is adjusted based on progress. + r.set_buf_capacity(); + r + } + + /// Recover media from blank slate. + pub fn run_full(self) {} + + /// Recover media given a partial recovery. + pub fn run_limited(self) {} + + /// Attempt to copy all untested blocks. + fn copy_untested(self) { + + } + + /// Set buffer capacities as cluster length in bytes. + /// Varies depending on the recovery stage. + fn set_buf_capacity(&mut self) { + self.buf_capacity = (self.config.sector_size * self.config.cluster_length) as usize; + } +} +