Skip to content

Commit

Permalink
We need an owned type to update a single DashMap in parallel, but we …
Browse files Browse the repository at this point in the history
…can use a Box<[u8]> for the hashmap keys instead of Vec<u8>.
  • Loading branch information
“suchapalaver” committed Oct 7, 2021
1 parent 7d9ff74 commit 8d6c976
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 27 deletions.
28 changes: 14 additions & 14 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

25 changes: 12 additions & 13 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,30 +37,29 @@ pub fn run(config: Config) -> Result<(), Box<dyn Error>> {
let k: usize = config.kmer_len;

// 'N'
let n: u8 = 78;
const N: u8 = b'N';

// Create fasta file reader
let reader: fasta::Reader<std::io::BufReader<File>> =
fasta::Reader::from_file(&filepath).unwrap();

// Create a DashMap
let fasta_hash: DashMap<Vec<u8>, u32> = DashMap::new();
let fasta_hash: DashMap<Box<[u8]>, u32> = DashMap::new();

// Read fasta records into a Dashmap, a hashmap mutably accessible from different parallel processes
reader
.records()
.into_iter()
.par_bridge()
.for_each(|result| {
let seq: &[u8] = result.as_ref().unwrap().seq();
.for_each(|record| {
let seq: &[u8] = record.as_ref().unwrap().seq();

for i in 0..(seq.len() + 1).saturating_sub(k) {
// Irradicate kmers containing 'N'
if !seq[i..i + k].contains(&n) {
// Make output kmers the lexicographically smaller of
// (kmer, reverse-complement)
if !seq[i..i + k].contains(&N) {
// Canonicalize by lexicographically smaller of kmer/reverse-complement
*fasta_hash
.entry(min(seq[i..i + k].to_vec(), revcomp(&seq[i..i + k])))
.entry(Box::from(min(&seq[i..i + k], &revcomp(&seq[i..i + k]))))
.or_insert(0) += 1;
} else {
}
Expand All @@ -73,14 +72,14 @@ pub fn run(config: Config) -> Result<(), Box<dyn Error>> {

let mut buf = BufWriter::new(handle);

fasta_hash.into_iter().for_each(|(k, f)| {
fasta_hash.into_iter().for_each(|(kmer, f)| {
// Write:
// >frequency across fasta file for both kmer and its reverse complement
// k-mer (lexicographically smaller of k-mer, reverse complement pair)
writeln!(buf, ">{}\n{}", f, str::from_utf8(&k).unwrap()).expect("Unable to write data");
// canonical k-mer
writeln!(buf, ">{}\n{}", f, str::from_utf8(&kmer).unwrap()).expect("Unable to write data");
});

buf.flush().unwrap();

Ok(())
}

0 comments on commit 8d6c976

Please sign in to comment.