diff --git a/Cargo.lock b/Cargo.lock index b0785f2..b59fa8d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -122,27 +122,6 @@ dependencies = [ "serde", ] -[[package]] -name = "byte-strings" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "252ec070bc6db225f08a3cf614d2ce361e3d0d0ca0aa798ef66b116afd01ffe4" -dependencies = [ - "byte-strings-proc-macro", - "cfg-if 0.1.10", -] - -[[package]] -name = "byte-strings-proc-macro" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7085f61338b897099839049301aa125aae59365e7ac4d71593ca9a1812c8d55" -dependencies = [ - "cfg-if 0.1.10", - "quote 0.6.13", - "syn 0.15.44", -] - [[package]] name = "bytecount" version = "0.6.2" @@ -155,12 +134,6 @@ version = "1.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" -[[package]] -name = "cfg-if" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" - [[package]] name = "cfg-if" version = "1.0.0" @@ -173,7 +146,7 @@ version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06ed27e177f16d65f0f0c22a213e17c696ace5dd64b14258b52f9417ccb52db4" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "crossbeam-utils", ] @@ -183,7 +156,7 @@ version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6455c0ca19f0d2fbf751b908d5c55c1f5cbc65e03c4225427254b46890bdde1e" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "crossbeam-epoch", "crossbeam-utils", ] @@ -194,7 +167,7 @@ version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4ec02e091aa634e2c3ada4a392989e7c3116673ef0ac5b72232439094d73b7fd" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "crossbeam-utils", "lazy_static", "memoffset", @@ -207,7 +180,7 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d82cfc11ce7f2c3faef78d8a684447b40d503d9681acebed6cb728d45940c4db" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "lazy_static", ] @@ -245,9 +218,9 @@ version = "0.5.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3418329ca0ad70234b9735dc4ceed10af4df60eff9c8e7b06cb5e520d92c3535" dependencies = [ - "proc-macro2 1.0.29", - "quote 1.0.9", - "syn 1.0.76", + "proc-macro2", + "quote", + "syn", ] [[package]] @@ -271,9 +244,9 @@ version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "84278eae0af6e34ff6c1db44c11634a694aafac559ff3080e4db4e4ac35907aa" dependencies = [ - "proc-macro2 1.0.29", - "quote 1.0.9", - "syn 1.0.76", + "proc-macro2", + "quote", + "syn", ] [[package]] @@ -309,7 +282,7 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7fcd999463524c52659517fe2cea98493cfe485d10565e7b0fb07dbba7ad2753" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "libc", "wasi", ] @@ -321,9 +294,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24b328c01a4d71d2d8173daa93562a73ab0fe85616876f02500f53d82948c504" dependencies = [ "proc-macro-error", - "proc-macro2 1.0.29", - "quote 1.0.9", - "syn 1.0.76", + "proc-macro2", + "quote", + "syn", ] [[package]] @@ -389,7 +362,6 @@ name = "krust" version = "0.1.0" dependencies = [ "bio", - "byte-strings", "rayon", ] @@ -468,9 +440,9 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "01fcc0b8149b4632adc89ac3b7b31a12fb6099a0317a4eb2ebff574ef7de7218" dependencies = [ - "proc-macro2 1.0.29", - "quote 1.0.9", - "syn 1.0.76", + "proc-macro2", + "quote", + "syn", ] [[package]] @@ -583,9 +555,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" dependencies = [ "proc-macro-error-attr", - "proc-macro2 1.0.29", - "quote 1.0.9", - "syn 1.0.76", + "proc-macro2", + "quote", + "syn", "version_check", ] @@ -595,36 +567,18 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" dependencies = [ - "proc-macro2 1.0.29", - "quote 1.0.9", + "proc-macro2", + "quote", "version_check", ] -[[package]] -name = "proc-macro2" -version = "0.4.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf3d2011ab5c909338f7887f4fc896d35932e29146c12c8d01da6b22a80ba759" -dependencies = [ - "unicode-xid 0.1.0", -] - [[package]] name = "proc-macro2" version = "1.0.29" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b9f5105d4fdaab20335ca9565e106a5d9b82b6219b5ba735731124ac6711d23d" dependencies = [ - "unicode-xid 0.2.2", -] - -[[package]] -name = "quote" -version = "0.6.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ce23b6b870e8f94f81fb0a363d65d86675884b34a09043c81e5562f11c1f8e1" -dependencies = [ - "proc-macro2 0.4.30", + "unicode-xid", ] [[package]] @@ -633,7 +587,7 @@ version = "1.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c3d0b9745dc2debf507c8422de05d7226cc1f0644216dfdfead988f9b1ab32a7" dependencies = [ - "proc-macro2 1.0.29", + "proc-macro2", ] [[package]] @@ -782,9 +736,9 @@ version = "1.0.130" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d7bc1a1ab1961464eae040d96713baa5a724a8152c1222492465b54322ec508b" dependencies = [ - "proc-macro2 1.0.29", - "quote 1.0.9", - "syn 1.0.76", + "proc-macro2", + "quote", + "syn", ] [[package]] @@ -825,9 +779,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ee8bc6b87a5112aeeab1f4a9f7ab634fe6cbefc4850006df31267f4cfb9e3149" dependencies = [ "heck", - "proc-macro2 1.0.29", - "quote 1.0.9", - "syn 1.0.76", + "proc-macro2", + "quote", + "syn", ] [[package]] @@ -837,20 +791,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d06aaeeee809dbc59eb4556183dd927df67db1540de5be8d3ec0b6636358a5ec" dependencies = [ "heck", - "proc-macro2 1.0.29", - "quote 1.0.9", - "syn 1.0.76", -] - -[[package]] -name = "syn" -version = "0.15.44" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ca4b3b69a77cbe1ffc9e198781b7acb0c7365a883670e8f1c1bc66fba79a5c5" -dependencies = [ - "proc-macro2 0.4.30", - "quote 0.6.13", - "unicode-xid 0.1.0", + "proc-macro2", + "quote", + "syn", ] [[package]] @@ -859,9 +802,9 @@ version = "1.0.76" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c6f107db402c2c2055242dbf4d2af0e69197202e9faacbef9571bbe47f5a1b84" dependencies = [ - "proc-macro2 1.0.29", - "quote 1.0.9", - "unicode-xid 0.2.2", + "proc-macro2", + "quote", + "unicode-xid", ] [[package]] @@ -879,9 +822,9 @@ version = "1.0.29" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bad553cc2c78e8de258400763a647e80e6d1b31ee237275d756f6836d204494c" dependencies = [ - "proc-macro2 1.0.29", - "quote 1.0.9", - "syn 1.0.76", + "proc-macro2", + "quote", + "syn", ] [[package]] @@ -902,12 +845,6 @@ version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8895849a949e7845e06bd6dc1aa51731a103c42707010a5b591c0038fb73385b" -[[package]] -name = "unicode-xid" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc" - [[package]] name = "unicode-xid" version = "0.2.2" diff --git a/src/lib.rs b/src/lib.rs index 4316758..c716eeb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,6 @@ use bio::{alignment::sparse::hash_kmers, alphabets::dna::revcomp, io::fasta}; use rayon::prelude::*; -use std::{collections::HashMap, env, error::Error, fs::File, io::Write, str, time::Instant}; +use std::{collections::{HashMap, HashSet}, env, error::Error, fs::File, io::Write, str, time::Instant}; pub struct Config { pub kmer_len: usize, @@ -31,7 +31,7 @@ pub fn hash_fasta_rec( let mut new_hashmap = HashMap::new(); - for (kmer, kmer_pos) in hash_kmers(result_data.seq(), k) { + for (kmer, kmer_pos) in hash_kmers(result_data.seq(), k) { // rust-bio's hash_kmers function, returns iterator of tuples (&[u8], Vec), the Vec being a list of indices of positions of kmer. new_hashmap.insert(kmer, kmer_pos.len()); } new_hashmap @@ -49,19 +49,36 @@ pub fn run(config: Config) -> Result<(), Box> { let fasta_records: Vec> = reader.records().collect(); - let hash_vec: Vec> = fasta_records + let mut hash_vec: Vec> = fasta_records .par_iter() .map(|result| hash_fasta_rec(result, k)) .collect(); - + let hash_duration = start.elapsed(); eprintln!( "Time elapsed creating hashmaps of all kmers in all sequences: {:?}\n", hash_duration ); + // merging hashmaps + //eprintln!("length of hash_vec now: {}", hash_vec.len()); + + let mut hash_len_vec = HashSet::new(); // create set of number of kmers + + for h in &hash_vec { + hash_len_vec.insert(h.len()); + } + //eprintln!("hashmap lengths: {:?}", hash_len_vec); + + let longest_len = hash_len_vec.iter().max().unwrap(); + + let i = &hash_vec.iter().position(|h| h.len() == *longest_len).unwrap(); + + let mut final_hash = hash_vec.remove(*i); + + //eprintln!("this is the hash we're basing off: {:?}", final_hash); - let mut final_hash = HashMap::new(); + //eprintln!("length of hash_vec post removal: {}", hash_vec.len()); hash_vec.into_iter().for_each(|h| { for (kmer, freq) in h {