Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions rust/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions rust/bioscript-cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ bioscript-schema = { path = "../bioscript-schema" }
monty = { path = "../../monty/crates/monty" }
serde_json = { version = "1.0.133", features = ["preserve_order"] }
serde_yaml = "0.9.34"
sha2 = "0.10"
zip = { version = "2.2.0", default-features = false, features = ["deflate"] }

[lints.clippy]
Expand Down
2 changes: 1 addition & 1 deletion rust/bioscript-cli/src/cli_bootstrap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ fn run_cli() -> Result<(), String> {
Ok(())
}

const USAGE: &str = "usage: bioscript <script.py|manifest.yaml|package.zip|https://.../package.zip> [--root <dir>] [--input-file <path>] [--output-file <path>] [--participant-id <id>] [--trace-report <path>] [--timing-report <path>] [--filter key=value] [--input-format auto|text|zip|vcf|cram] [--input-index <path>] [--reference-file <path>] [--reference-index <path>] [--auto-index] [--cache-dir <path>] [--max-duration-ms N] [--max-memory-bytes N] [--max-allocations N] [--max-recursion-depth N]\n bioscript report <manifest.yaml|package.zip|https://.../package.zip> --input-file <path> [--input-file <path>...] --output-dir <dir> [--html] [--open] [--root <dir>] [--input-format auto|text|zip|vcf|cram] [--detect-sex] [--sample-sex male|female|unknown] [--analysis-max-duration-ms N]\n bioscript review <manifest.yaml|package.zip> --cases <cases.yaml> --output-dir <dir> [--html] [--root <dir>] [--filter key=value]\n bioscript import-package <package.zip|https://.../package.zip> [--root <dir>] [--output-dir <dir>]\n bioscript validate-variants <path> [--report <file>]\n bioscript validate-panels <path> [--report <file>]\n bioscript validate-assays <path> [--report <file>]\n bioscript prepare [--root <dir>] [--input-file <path>] [--reference-file <path>] [--input-format auto|text|zip|vcf|cram] [--cache-dir <path>]\n bioscript inspect <path> [--input-index <path>] [--reference-file <path>] [--reference-index <path>] [--detect-sex]";
const USAGE: &str = "usage: bioscript <script.py|manifest.yaml|package.yaml|package.zip|https://.../package.yaml|https://.../package.zip> [--root <dir>] [--input-file <path>] [--output-file <path>] [--participant-id <id>] [--trace-report <path>] [--timing-report <path>] [--filter key=value] [--input-format auto|text|zip|vcf|cram] [--input-index <path>] [--reference-file <path>] [--reference-index <path>] [--auto-index] [--cache-dir <path>] [--max-duration-ms N] [--max-memory-bytes N] [--max-allocations N] [--max-recursion-depth N]\n bioscript report <manifest.yaml|package.yaml|package.zip|https://.../package.yaml|https://.../package.zip> --input-file <path> [--input-file <path>...] --output-dir <dir> [--html] [--open] [--root <dir>] [--input-format auto|text|zip|vcf|cram] [--detect-sex] [--sample-sex male|female|unknown] [--analysis-max-duration-ms N]\n bioscript review <manifest.yaml|package.yaml|package.zip> --cases <cases.yaml> --output-dir <dir> [--html] [--root <dir>] [--filter key=value]\n bioscript import-package <package.yaml|package.zip|https://.../package.yaml|https://.../package.zip> [--root <dir>] [--output-dir <dir>]\n bioscript validate-variants <path> [--report <file>]\n bioscript validate-panels <path> [--report <file>]\n bioscript validate-assays <path> [--report <file>]\n bioscript prepare [--root <dir>] [--input-file <path>] [--reference-file <path>] [--input-format auto|text|zip|vcf|cram] [--cache-dir <path>]\n bioscript inspect <path> [--input-index <path>] [--reference-file <path>] [--reference-index <path>] [--detect-sex]";

struct CliOptions {
script_path: Option<PathBuf>,
Expand Down
67 changes: 50 additions & 17 deletions rust/bioscript-cli/src/manifest_runner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -122,9 +122,10 @@ fn run_panel_manifest_with_store(
participant_id: Option<&str>,
filters: &[String],
) -> Result<Vec<BTreeMap<String, String>>, String> {
let mut rows = Vec::new();
let mut rows_by_member: Vec<Vec<BTreeMap<String, String>>> = vec![Vec::new(); panel.members.len()];
let mut variant_entries = Vec::new();

for member in &panel.members {
for (member_index, member) in panel.members.iter().enumerate() {
let Some(path) = &member.path else {
return Err("remote panel members are not executable yet".to_owned());
};
Expand All @@ -134,21 +135,16 @@ fn run_panel_manifest_with_store(
if !matches_filters(&manifest, &resolved, filters) {
continue;
}
rows.push(run_variant_manifest_with_store(
runtime_root,
&manifest,
store,
participant_id,
)?);
variant_entries.push((member_index, resolved, manifest));
} else if member.kind == "assay" {
let assay = load_assay_manifest(&resolved)?;
rows.extend(run_assay_manifest_with_store(
rows_by_member[member_index] = run_assay_manifest_with_store(
runtime_root,
&assay,
store,
participant_id,
filters,
)?);
)?;
} else {
return Err(format!(
"panel member kind '{}' is not executable",
Expand All @@ -157,6 +153,29 @@ fn run_panel_manifest_with_store(
}
}

let observations = store
.lookup_variants(
&variant_entries
.iter()
.map(|(_, _, manifest)| manifest.spec.clone())
.collect::<Vec<_>>(),
)
.map_err(|err| err.to_string())?;

for ((member_index, resolved, manifest), observation) in
variant_entries.into_iter().zip(observations)
{
rows_by_member[member_index].push(variant_row(
runtime_root,
&resolved,
&manifest.name,
&manifest.tags,
&observation,
participant_id,
));
}

let rows = rows_by_member.into_iter().flatten().collect();
Ok(rows)
}

Expand All @@ -181,7 +200,7 @@ fn run_assay_manifest_with_store(
participant_id: Option<&str>,
filters: &[String],
) -> Result<Vec<BTreeMap<String, String>>, String> {
let mut rows = Vec::new();
let mut entries = Vec::new();

for member in &assay.members {
if member.kind != "variant" {
Expand All @@ -198,18 +217,32 @@ fn run_assay_manifest_with_store(
if !matches_filters(&manifest, &resolved, filters) {
continue;
}
let observation = store
.lookup_variant(&manifest.spec)
.map_err(|err| err.to_string())?;
rows.push(variant_row(
entries.push((resolved, manifest));
}

let observations = store
.lookup_variants(
&entries
.iter()
.map(|(_, manifest)| manifest.spec.clone())
.collect::<Vec<_>>(),
)
.map_err(|err| err.to_string())?;

let rows = entries
.into_iter()
.zip(observations)
.map(|((resolved, manifest), observation)| {
variant_row(
runtime_root,
&resolved,
&manifest.name,
&manifest.tags,
&observation,
participant_id,
));
}
)
})
.collect();

Ok(rows)
}
Expand Down
65 changes: 57 additions & 8 deletions rust/bioscript-cli/src/package.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,35 @@ const MAX_PACKAGE_FILES: usize = 1000;
const MAX_PACKAGE_FILE_BYTES: u64 = 16 * 1024 * 1024;
const MAX_PACKAGE_TOTAL_BYTES: u64 = 64 * 1024 * 1024;

include!("package_release.rs");

fn prepare_package_entrypoint_from_arg(
runtime_root: &Path,
source: &Path,
) -> Result<PathBuf, String> {
let source_text = source.to_string_lossy();
let package_path = if is_package_url(&source_text) {
download_package_url(runtime_root, &source_text)?
let source_url = if is_package_url(&source_text) {
Some(source_text.to_string())
} else {
None
};
let package_path = if let Some(url) = &source_url {
download_package_url(runtime_root, url)?
} else {
source.to_path_buf()
};
if is_package_zip_path(&package_path) {
let imported = import_package_zip(runtime_root, &package_path, None)?;
Ok(imported.entrypoint)
} else if is_package_release_path(&package_path) {
match package_zip_from_release_manifest(runtime_root, &package_path, source_url.as_deref())?
{
Some(zip_path) => {
let imported = import_package_zip(runtime_root, &zip_path, None)?;
Ok(imported.entrypoint)
}
None => Ok(package_path),
}
} else {
Ok(package_path)
}
Expand Down Expand Up @@ -47,11 +63,19 @@ fn run_import_package(args: Vec<String>) -> Result<(), String> {
.map_or_else(env::current_dir, Ok)
.map_err(|err| format!("failed to get current directory: {err}"))?;
let source_text = source.to_string_lossy();
let package_path = if is_package_url(&source_text) {
download_package_url(&runtime_root, &source_text)?
let source_url = if is_package_url(&source_text) {
Some(source_text.to_string())
} else {
None
};
let package_path = if let Some(url) = &source_url {
download_package_url(&runtime_root, url)?
} else {
absolutize(&runtime_root, &source)
};
let package_path =
package_zip_from_release_manifest(&runtime_root, &package_path, source_url.as_deref())?
.unwrap_or(package_path);
let imported = import_package_zip(&runtime_root, &package_path, output_dir.as_deref())?;
println!("root\t{}", imported.root.display());
println!("entrypoint\t{}", imported.entrypoint.display());
Expand Down Expand Up @@ -170,6 +194,23 @@ fn load_package_descriptor(root: &Path) -> Result<PackageDescriptor, String> {
.ok_or_else(|| {
format!("package descriptor {} is missing schema", path.display())
})?;
if matches!(
schema,
"bioscript:panel:1.0"
| "bioscript:assay:1.0"
| "bioscript:variant:1.0"
| "bioscript:variant"
) {
let package_name = value
.as_mapping()
.and_then(|mapping| mapping.get(serde_yaml::Value::String("name".to_owned())))
.and_then(serde_yaml::Value::as_str)
.map(ToOwned::to_owned);
return Ok(PackageDescriptor {
entrypoint: PathBuf::from(PACKAGE_DESCRIPTOR),
name: package_name,
});
}
if schema != "bioscript:package:1.0" {
return Err(format!(
"package descriptor {} has unsupported schema '{schema}'",
Expand Down Expand Up @@ -352,11 +393,13 @@ fn download_package_url(runtime_root: &Path, url: &str) -> Result<PathBuf, Strin
return Err("package URLs must use https://".to_owned());
}
let url_path = url.split('?').next().unwrap_or(url);
if !Path::new(url_path)
let extension = Path::new(url_path)
.extension()
.is_some_and(|ext| ext.eq_ignore_ascii_case("zip"))
{
return Err("package URL must point to a .zip file".to_owned());
.and_then(|ext| ext.to_str())
.unwrap_or_default()
.to_ascii_lowercase();
if !matches!(extension.as_str(), "zip" | "yaml" | "yml") {
return Err("package URL must point to a .zip, .yaml, or .yml file".to_owned());
}
let downloads = runtime_root.join(PACKAGE_DOWNLOAD_DIR);
fs::create_dir_all(&downloads).map_err(|err| {
Expand Down Expand Up @@ -408,3 +451,9 @@ fn is_package_zip_path(path: &Path) -> bool {
.and_then(|ext| ext.to_str())
.is_some_and(|ext| ext.eq_ignore_ascii_case("zip"))
}

fn is_package_release_path(path: &Path) -> bool {
path.extension()
.and_then(|ext| ext.to_str())
.is_some_and(|ext| matches!(ext.to_ascii_lowercase().as_str(), "yaml" | "yml"))
}
94 changes: 94 additions & 0 deletions rust/bioscript-cli/src/package_release.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
fn package_zip_from_release_manifest(
runtime_root: &Path,
path: &Path,
source_url: Option<&str>,
) -> Result<Option<PathBuf>, String> {
if !is_package_release_path(path) || !path.exists() {
return Ok(None);
}
let text = fs::read_to_string(path)
.map_err(|err| format!("failed to read package release {}: {err}", path.display()))?;
let value: serde_yaml::Value = serde_yaml::from_str(&text)
.map_err(|err| format!("failed to parse package release {}: {err}", path.display()))?;
let schema = yaml_string(&value, "schema");
if schema.as_deref() != Some("bioscript:package-release:1.0") {
return Ok(None);
}
let artifact = value
.as_mapping()
.and_then(|mapping| mapping.get(serde_yaml::Value::String("artifact".to_owned())))
.and_then(serde_yaml::Value::as_mapping)
.ok_or_else(|| format!("package release {} is missing artifact", path.display()))?;
let artifact_path = artifact
.get(serde_yaml::Value::String("path".to_owned()))
.and_then(serde_yaml::Value::as_str);
let artifact_url = artifact
.get(serde_yaml::Value::String("url".to_owned()))
.and_then(serde_yaml::Value::as_str);
let zip_path = if let Some(url) = artifact_url {
download_package_url(runtime_root, url)?
} else if let Some(relative) = artifact_path {
if let Some(base_url) = source_url {
download_package_url(runtime_root, &join_url(base_url, relative))?
} else {
path.parent()
.ok_or_else(|| format!("package release has no parent: {}", path.display()))?
.join(checked_relative_package_path(relative)?)
}
} else {
return Err(format!(
"package release {} artifact needs path or url",
path.display()
));
};
if let Some(expected) = artifact
.get(serde_yaml::Value::String("sha256".to_owned()))
.and_then(serde_yaml::Value::as_str)
{
let actual = sha256_file(&zip_path)?;
if actual != expected {
return Err(format!(
"package artifact sha256 mismatch for {}: expected {expected}, got {actual}",
zip_path.display()
));
}
}
Ok(Some(zip_path))
}

fn yaml_string(value: &serde_yaml::Value, key: &str) -> Option<String> {
value
.as_mapping()
.and_then(|mapping| mapping.get(serde_yaml::Value::String(key.to_owned())))
.and_then(serde_yaml::Value::as_str)
.map(ToOwned::to_owned)
}

fn sha256_file(path: &Path) -> Result<String, String> {
use sha2::{Digest, Sha256};

let mut file = fs::File::open(path)
.map_err(|err| format!("failed to open artifact {}: {err}", path.display()))?;
let mut digest = Sha256::new();
let mut buffer = vec![0_u8; 1024 * 64];
loop {
let n = std::io::Read::read(&mut file, &mut buffer)
.map_err(|err| format!("failed to read artifact {}: {err}", path.display()))?;
if n == 0 {
break;
}
digest.update(&buffer[..n]);
}
Ok(format!("{:x}", digest.finalize()))
}

fn join_url(base_url: &str, relative: &str) -> String {
if relative.starts_with("https://") || relative.starts_with("http://") {
return relative.to_owned();
}
let base = base_url.split('?').next().unwrap_or(base_url);
match base.rsplit_once('/') {
Some((prefix, _)) => format!("{prefix}/{relative}"),
None => relative.to_owned(),
}
}
19 changes: 17 additions & 2 deletions rust/bioscript-cli/src/report_options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -255,12 +255,13 @@ fn generate_app_report(options: &AppReportOptions) -> Result<(), String> {
if let Some(sample_sex) = options.sample_sex {
input_inspection.inferred_sex = Some(explicit_sample_sex_inference(sample_sex));
}
let input_loader = loader_with_inspection(&options.loader, &input_inspection);
let rows = run_manifest_rows_for_report(
&options.root,
&options.manifest_path,
input_file,
&participant_id,
&options.loader,
&input_loader,
&options.filters,
)?;
let input_observations = rows
Expand All @@ -280,7 +281,7 @@ fn generate_app_report(options: &AppReportOptions) -> Result<(), String> {
runtime_root: &options.root,
input_file,
participant_id: &participant_id,
loader: &options.loader,
loader: &input_loader,
output_dir: &options.output_dir,
filters: &options.filters,
max_duration_ms: options.analysis_max_duration_ms,
Expand Down Expand Up @@ -317,6 +318,20 @@ fn generate_app_report(options: &AppReportOptions) -> Result<(), String> {
Ok(())
}

fn loader_with_inspection(
base: &GenotypeLoadOptions,
inspection: &bioscript_formats::FileInspection,
) -> GenotypeLoadOptions {
let mut loader = base.clone();
loader.assembly = inspection.assembly.or(loader.assembly);
loader.inferred_sex = inspection
.inferred_sex
.as_ref()
.map(|inference| inference.sex)
.or(loader.inferred_sex);
loader
}

fn open_app_html_report_if_requested(options: &AppReportOptions) {
if options.open_report
&& let Err(err) = open_html_report(&options.output_dir.join("index.html"))
Expand Down
Loading
Loading