Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions rust/bioscript-cli/src/report_html_observations.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ fn render_observation_table(
let show_facets = observations
.iter()
.any(|observation| !json_field_as_tsv(observation.get("facets")).is_empty());
let show_imputed_reference_note = observations.iter().any(observation_is_imputed_vcf_reference);
let headers = all_headers
.iter()
.copied()
Expand All @@ -66,6 +67,9 @@ fn render_observation_table(
out.push_str("</tr>");
}
out.push_str("</tbody></table></div>");
if show_imputed_reference_note {
out.push_str("<p class=\"muted observation-note\">* In variant-only VCF inputs, absent queried variant rows are shown as imputed reference genotypes. This is usually appropriate for variant-only VCFs, but it may be wrong if the VCF omits loci for another reason.</p>");
}
}

fn observation_filter_group(observation: &serde_json::Value) -> &'static str {
Expand Down Expand Up @@ -125,6 +129,14 @@ fn observation_row_class(observation: &serde_json::Value) -> &'static str {

fn render_observation_cell(out: &mut String, observation: &serde_json::Value, header: &str) {
let cell_class = table_column_class(header);
if header == "outcome" {
let mut value = json_field_as_tsv(observation.get(header));
if value == "reference" && observation_is_imputed_vcf_reference(observation) {
value.push('*');
}
let _ = write!(out, "<td class=\"{}\">{}</td>", cell_class, html_escape(&value));
return;
}
if header == "ref_alt" {
class_cell(out, &observation_ref_alt(observation), "mono");
return;
Expand Down Expand Up @@ -184,6 +196,15 @@ fn render_observation_cell(out: &mut String, observation: &serde_json::Value, he
);
}

fn observation_is_imputed_vcf_reference(observation: &serde_json::Value) -> bool {
observation
.get("evidence_raw")
.and_then(serde_json::Value::as_str)
.is_some_and(|evidence| {
evidence.contains("imputed reference genotype from absent variant-only VCF record")
})
}

fn observation_ref_alt(observation: &serde_json::Value) -> String {
let ref_allele = observation
.get("ref")
Expand Down
16 changes: 15 additions & 1 deletion rust/bioscript-cli/src/report_html_sections.rs
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@ fn render_input_debug(out: &mut String, reports: &[serde_json::Value], show_part
"Source",
"Assembly",
"Inferred Sex",
"VCF Ref Imputation",
"Evidence",
]);
for (idx, header) in headers.iter().enumerate() {
Expand Down Expand Up @@ -217,6 +218,7 @@ fn render_input_debug(out: &mut String, reports: &[serde_json::Value], show_part
value_str(sex, "method"),
]),
);
table_cell(out, input_debug_vcf_imputation(debug));
table_cell(out, &input_debug_evidence(debug));
out.push_str("</tr>");
}
Expand Down Expand Up @@ -256,6 +258,7 @@ fn render_input_debug_key_values(out: &mut String, report: &serde_json::Value) {
value_str(sex, "method"),
]),
);
input_debug_kv(out, "VCF ref imputation", input_debug_vcf_imputation(debug));
input_debug_kv(out, "Evidence", &input_debug_evidence(debug));
out.push_str("</dl>");
}
Expand All @@ -278,6 +281,18 @@ fn compact_join(values: &[&str]) -> String {
.join(" / ")
}

fn input_debug_vcf_imputation(debug: &serde_json::Value) -> &'static str {
if debug
.get("vcf_missing_reference_imputation")
.and_then(serde_json::Value::as_bool)
== Some(true)
{
"used"
} else {
""
}
}

fn input_debug_evidence(debug: &serde_json::Value) -> String {
let mut evidence = Vec::new();
collect_string_array(debug.get("evidence"), &mut evidence);
Expand All @@ -300,4 +315,3 @@ fn collect_string_array(value: Option<&serde_json::Value>, out: &mut Vec<String>
}
}
}

27 changes: 26 additions & 1 deletion rust/bioscript-cli/src/report_output.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,20 @@ fn app_report_json(input: AppReportJsonInput<'_>) -> serde_json::Value {
item.get("call_status").and_then(serde_json::Value::as_str) == Some("called")
})
.count();
let input_debug = input
.input_inspection
.map(|inspection| {
let mut value = input_inspection_json(inspection);
if observations_have_imputed_vcf_references(input.observations)
&& let Some(object) = value.as_object_mut()
{
object.insert(
"vcf_missing_reference_imputation".to_owned(),
serde_json::Value::Bool(true),
);
}
value
});
serde_json::json!({
"schema": "bioscript:report:1.0",
"version": "1.0",
Expand All @@ -29,7 +43,7 @@ fn app_report_json(input: AppReportJsonInput<'_>) -> serde_json::Value {
"input": {
"file_name": input.input_file.file_name().and_then(|value| value.to_str()).unwrap_or_default(),
"file_path": input.input_file.display().to_string(),
"debug": input.input_inspection.map(input_inspection_json),
"debug": input_debug,
},
"report_status": if called == input.observations.len() { "complete" } else { "partial" },
"derived_from": input.observations.iter().filter_map(|item| item.get("variant_key").cloned()).collect::<Vec<_>>(),
Expand All @@ -46,6 +60,17 @@ fn app_report_json(input: AppReportJsonInput<'_>) -> serde_json::Value {
})
}

fn observations_have_imputed_vcf_references(observations: &[serde_json::Value]) -> bool {
observations.iter().any(|observation| {
observation
.get("evidence_raw")
.and_then(serde_json::Value::as_str)
.is_some_and(|evidence| {
evidence.contains("imputed reference genotype from absent variant-only VCF record")
})
})
}

fn report_manifest_metadata(path: &Path) -> Result<serde_json::Value, String> {
let text = fs::read_to_string(path)
.map_err(|err| format!("failed to read manifest metadata {}: {err}", path.display()))?;
Expand Down
18 changes: 18 additions & 0 deletions rust/bioscript-formats/src/genotype.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,10 @@ impl GenotypeStore {
GenotypeSourceFormat::Zip => Self::from_zip_file(path),
GenotypeSourceFormat::Vcf => Ok(Self::from_vcf_file(path, options)),
GenotypeSourceFormat::Cram => Self::from_cram_file(path, options),
GenotypeSourceFormat::Bam => Err(RuntimeError::Unsupported(format!(
"BAM alignment lookup is not implemented yet for {}",
path.display()
))),
}
}

Expand Down Expand Up @@ -361,6 +365,7 @@ impl RsidMapBackend {
GenotypeSourceFormat::Zip => "zip",
GenotypeSourceFormat::Vcf => "vcf",
GenotypeSourceFormat::Cram => "cram",
GenotypeSourceFormat::Bam => "bam",
}
}

Expand Down Expand Up @@ -392,6 +397,7 @@ impl DelimitedBackend {
GenotypeSourceFormat::Zip => "zip",
GenotypeSourceFormat::Vcf => "vcf",
GenotypeSourceFormat::Cram => "cram",
GenotypeSourceFormat::Bam => "bam",
}
}

Expand Down Expand Up @@ -939,6 +945,18 @@ mod tests {
detect_source_format(&text, Some(GenotypeSourceFormat::Cram)).unwrap(),
GenotypeSourceFormat::Cram
));
let bam = dir.join("sample.bam");
fs::write(&bam, b"BAM").unwrap();
assert!(matches!(
detect_source_format(&bam, None).unwrap(),
GenotypeSourceFormat::Bam
));
assert!(
GenotypeStore::from_file(&bam)
.unwrap_err()
.to_string()
.contains("BAM alignment lookup is not implemented yet")
);
assert!(!looks_like_vcf_lines(&["rsid\tgenotype".to_owned()]));

let backend = DelimitedBackend {
Expand Down
1 change: 1 addition & 0 deletions rust/bioscript-formats/src/genotype/cram_backend.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ use crate::alignment;
use super::GenotypeLoadOptions;

mod indel;
mod observation;
mod reader;
mod store;

Expand Down
Loading
Loading