In this vignette, we will use the biocompute package to recreate the example Biocompute Object (HCV1a.json) used in the BCO specification.

library("biocompute")

Provenance domain

name <- "HCV1a ledipasvir resistance SNP detection"
version <- "1.0.0"
review <- data.frame(
  "status" = c("approved", "approved"),
  "reviewer_comment" = c("Approved by [company name] staff. Waiting for approval from FDA Reviewer", "The revised BCO looks fine"),
  "date" = c(
    as.POSIXct("2017-11-12T12:30:48", format = "%Y-%m-%dT%H:%M:%S", tz = "EST"),
    as.POSIXct("2017-12-12T12:30:48", format = "%Y-%m-%dT%H:%M:%S", tz = "America/Los_Angeles")
  ),
  "reviewer_name" = c("Jane Doe", "John Doe"),
  "reviewer_affiliation" = c("Seven Bridges Genomics", "U.S. Food and Drug Administration"),
  "reviewer_email" = c("example@sevenbridges.com", "example@fda.gov"),
  "reviewer_contribution" = c("curatedBy", "curatedBy"),
  "reviewer_orcid" = c("https://orcid.org/0000-0000-0000-0000", NA),
  stringsAsFactors = FALSE
)

derived_from <- "https://github.com/biocompute-objects/BCO_Specification/blob/1.2.1-beta/HCV1a.json"
obsolete_after <- as.POSIXct("2018-11-12T12:30:48", format = "%Y-%m-%dT%H:%M:%S", tz = "EST")

embargo <- c(
  "start_time" = as.POSIXct("2017-10-12T12:30:48", format = "%Y-%m-%dT%H:%M:%S", tz = "EST"),
  "end_time" = as.POSIXct("2017-11-12T12:30:48", format = "%Y-%m-%dT%H:%M:%S", tz = "EST")
)

created <- as.POSIXct("2017-01-20T09:40:17", format = "%Y-%m-%dT%H:%M:%S", tz = "EST")

modified <- as.POSIXct("2019-05-10T09:40:17", format = "%Y-%m-%dT%H:%M:%S", tz = "EST")

contributors <- data.frame(
  "name" = c("Jane Doe", "John Doe"),
  "affiliation" = c("Seven Bridges Genomics", "U.S. Food and Drug Administration"),
  "email" = c("example@sevenbridges.com", "example@fda.gov"),
  "contribution" = I(list(c("createdBy", "curatedBy"), c("authoredBy"))),
  "orcid" = c("https://orcid.org/0000-0000-0000-0000", NA),
  stringsAsFactors = FALSE
)

license <- "https://creativecommons.org/licenses/by/4.0/"

provenance <- compose_provenance(
  name, version, review, derived_from, obsolete_after,
  embargo, created, modified, contributors, license
)
provenance %>% convert_json()

{
  "name": "HCV1a ledipasvir resistance SNP detection",
  "version": "1.0.0",
  "review": [
    {
      "status": "approved",
      "reviewer_comment": "Approved by [company name] staff. Waiting for approval from FDA Reviewer",
      "date": 1510507848,
      "reviewer": [
        {
          "reviewer_name": "Jane Doe",
          "reviewer_affiliation": "Seven Bridges Genomics",
          "reviewer_email": "example@sevenbridges.com",
          "reviewer_contribution": "curatedBy",
          "reviewer_orcid": "https://orcid.org/0000-0000-0000-0000"
        }
      ]
    },
    {
      "status": "approved",
      "reviewer_comment": "The revised BCO looks fine",
      "date": 1513110648,
      "reviewer": [
        {
          "reviewer_name": "John Doe",
          "reviewer_affiliation": "U.S. Food and Drug Administration",
          "reviewer_email": "example@fda.gov",
          "reviewer_contribution": "curatedBy",
          "reviewer_orcid": "NA"
        }
      ]
    }
  ],
  "derived_from": "https://github.com/biocompute-objects/BCO_Specification/blob/1.2.1-beta/HCV1a.json",
  "obsolete_after": "2018-11-12T12:30:48-0500",
  "embargo": ["2017-10-12T12:30:48-0500", "2017-11-12T12:30:48-0500"],
  "created": "2017-01-20T09:40:17-0500",
  "modified": "2019-05-10T09:40:17-0500",
  "contributors": [
    {
      "name": "Jane Doe",
      "affiliation": "Seven Bridges Genomics",
      "email": "example@sevenbridges.com",
      "contribution": ["createdBy", "curatedBy"],
      "orcid": "https://orcid.org/0000-0000-0000-0000"
    },
    {
      "name": "John Doe",
      "affiliation": "U.S. Food and Drug Administration",
      "email": "example@fda.gov",
      "contribution": "authoredBy",
      "orcid": "NA"
    }
  ],
  "license": "https://creativecommons.org/licenses/by/4.0/"
}

Usability Domain

text <- c(
  "Identify baseline single nucleotide polymorphisms (SNPs)[SO:0000694], (insertions)[SO:0000667], and (deletions)[SO:0000045] that correlate with reduced (ledipasvir)[pubchem.compound:67505836] antiviral drug efficacy in (Hepatitis C virus subtype 1)[taxonomy:31646]",
  "Identify treatment emergent amino acid (substitutions)[SO:1000002] that correlate with antiviral drug treatment failure",
  "Determine whether the treatment emergent amino acid (substitutions)[SO:1000002] identified correlate with treatment failure involving other drugs against the same virus"
)

usability <- compose_usability(text)
usability %>% convert_json()

["Identify baseline single nucleotide polymorphisms (SNPs)[SO:0000694], (insertions)[SO:0000667], and (deletions)[SO:0000045] that correlate with reduced (ledipasvir)[pubchem.compound:67505836] antiviral drug efficacy in (Hepatitis C virus subtype 1)[taxonomy:31646]", "Identify treatment emergent amino acid (substitutions)[SO:1000002] that correlate with antiviral drug treatment failure", "Determine whether the treatment emergent amino acid (substitutions)[SO:1000002] identified correlate with treatment failure involving other drugs against the same virus"]

Extension domain

FHIR extension

fhir_endpoint <- "https://fhirtest.uhn.ca/baseDstu3"
fhir_version <- "3"
fhir_resources <- data.frame(
  "id" = c("21376", "6288583", "25544", "92440", "4588936"),
  "resource" = c(
    "Sequence", "DiagnosticReport", "ProcedureRequest",
    "Observation", "FamilyMemberHistory"
  ),
  stringsAsFactors = FALSE
)

fhir <- compose_fhir(fhir_endpoint, fhir_version, fhir_resources)
fhir %>% convert_json()

{
  "fhir_endpoint": "https://fhirtest.uhn.ca/baseDstu3",
  "fhir_version": "3",
  "fhir_resources": [
    [
      {
        "fhir_id": "21376",
        "fhir_resource": "Sequence"
      }
    ],
    [
      {
        "fhir_id": "6288583",
        "fhir_resource": "DiagnosticReport"
      }
    ],
    [
      {
        "fhir_id": "25544",
        "fhir_resource": "ProcedureRequest"
      }
    ],
    [
      {
        "fhir_id": "92440",
        "fhir_resource": "Observation"
      }
    ],
    [
      {
        "fhir_id": "4588936",
        "fhir_resource": "FamilyMemberHistory"
      }
    ]
  ]
}

SCM extension

scm_repository <- "https://github.com/example/repo1"
scm_type <- "git"
scm_commit <- "c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21"
scm_path <- "workflow/hive-viral-mutation-detection.cwl"
scm_preview <- "https://github.com/example/repo1/blob/c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21/workflow/hive-viral-mutation-detection.cwl"

scm <- compose_scm(scm_repository, scm_type, scm_commit, scm_path, scm_preview)
scm %>% convert_json()

{
  "scm_repository": "https://github.com/example/repo1",
  "scm_type": "git",
  "scm_commit": "c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21",
  "scm_path": "workflow/hive-viral-mutation-detection.cwl",
  "scm_preview": "https://github.com/example/repo1/blob/c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21/workflow/hive-viral-mutation-detection.cwl"
}

extension <- compose_extension(fhir, scm)
extension %>% convert_json()

{
  "fhir_extension": {
    "fhir_endpoint": "https://fhirtest.uhn.ca/baseDstu3",
    "fhir_version": "3",
    "fhir_resources": [
      [
        {
          "fhir_id": "21376",
          "fhir_resource": "Sequence"
        }
      ],
      [
        {
          "fhir_id": "6288583",
          "fhir_resource": "DiagnosticReport"
        }
      ],
      [
        {
          "fhir_id": "25544",
          "fhir_resource": "ProcedureRequest"
        }
      ],
      [
        {
          "fhir_id": "92440",
          "fhir_resource": "Observation"
        }
      ],
      [
        {
          "fhir_id": "4588936",
          "fhir_resource": "FamilyMemberHistory"
        }
      ]
    ]
  },
  "scm_extension": {
    "scm_repository": "https://github.com/example/repo1",
    "scm_type": "git",
    "scm_commit": "c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21",
    "scm_path": "workflow/hive-viral-mutation-detection.cwl",
    "scm_preview": "https://github.com/example/repo1/blob/c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21/workflow/hive-viral-mutation-detection.cwl"
  }
}

Description domain

keywords <- c("HCV1a", "Ledipasvir", "antiviral resistance", "SNP", "amino acid substitutions")
xref <- data.frame(
  "namespace" = c("pubchem.compound", "pubmed", "so", "taxonomy"),
  "name" = c("PubChem-compound", "PubMed", "Sequence Ontology", "Taxonomy"),
  "ids" = I(list(
    "67505836",
    "26508693",
    c("SO:000002", "SO:0000694", "SO:0000667", "SO:0000045"),
    "31646"
  )),
  "access_time" = c(
    as.POSIXct("2017-01-20T09:40:17", format = "%Y-%m-%dT%H:%M:%S", tz = "EST"),
    as.POSIXct("2017-01-21T09:40:17", format = "%Y-%m-%dT%H:%M:%S", tz = "EST"),
    as.POSIXct("2017-01-22T09:40:17", format = "%Y-%m-%dT%H:%M:%S", tz = "EST"),
    as.POSIXct("2017-01-23T09:40:17", format = "%Y-%m-%dT%H:%M:%S", tz = "EST")
  ),
  stringsAsFactors = FALSE
)

platform <- "Seven Bridges Platform"

pipeline_meta <- data.frame(
  "step_number" = c("1"),
  "name" = c("HIVE-hexagon"),
  "description" = c("Alignment of reads to a set of references"),
  "version" = c("1.3"),
  stringsAsFactors = FALSE
)

pipeline_prerequisite <- data.frame(
  "step_number" = rep("1", 5),
  "name" = c(
    "Hepatitis C virus genotype 1",
    "Hepatitis C virus type 1b complete genome",
    "Hepatitis C virus (isolate JFH-1) genomic RNA",
    "Hepatitis C virus clone J8CF, complete genome",
    "Hepatitis C virus S52 polyprotein gene"
  ),
  "uri" = c(
    "https://www.ncbi.nlm.nih.gov/nuccore/22129792",
    "https://www.ncbi.nlm.nih.gov/nuccore/5420376",
    "https://www.ncbi.nlm.nih.gov/nuccore/13122261",
    "https://www.ncbi.nlm.nih.gov/nuccore/386646758",
    "https://www.ncbi.nlm.nih.gov/nuccore/295311559"
  ),
  "access_time" = c(
    as.POSIXct("2017-01-24T09:40:17", format = "%Y-%m-%dT%H:%M:%S", tz = "EST"),
    as.POSIXct("2017-01-24T09:40:17", format = "%Y-%m-%dT%H:%M:%S", tz = "EST"),
    as.POSIXct("2017-01-24T09:40:17", format = "%Y-%m-%dT%H:%M:%S", tz = "EST"),
    as.POSIXct("2017-01-24T09:40:17", format = "%Y-%m-%dT%H:%M:%S", tz = "EST"),
    as.POSIXct("2017-01-24T09:40:17", format = "%Y-%m-%dT%H:%M:%S", tz = "EST")
  ),
  stringsAsFactors = FALSE
)

pipeline_input <- data.frame(
  "step_number" = rep("1", 2),
  "uri" = c(
    "https://example.com/dna.cgi?cmd=objFile&ids=514683",
    "https://example.com/dna.cgi?cmd=objFile&ids=514682"
  ),
  "access_time" = c(
    as.POSIXct("2017-01-24T09:40:17", format = "%Y-%m-%dT%H:%M:%S", tz = "EST"),
    as.POSIXct("2017-01-24T09:40:17", format = "%Y-%m-%dT%H:%M:%S", tz = "EST")
  ),
  stringsAsFactors = FALSE
)

pipeline_output <- data.frame(
  "step_number" = rep("1", 2),
  "uri" = c(
    "https://example.com/data/514769/allCount-aligned.csv",
    "https://example.com/data/514801/SNPProfile*.csv"
  ),
  "access_time" = c(
    as.POSIXct("2017-01-24T09:40:17", format = "%Y-%m-%dT%H:%M:%S", tz = "EST"),
    as.POSIXct("2017-01-24T09:40:17", format = "%Y-%m-%dT%H:%M:%S", tz = "EST")
  ),
  stringsAsFactors = FALSE
)

description <- compose_description(
  keywords, xref, platform,
  pipeline_meta, pipeline_prerequisite, pipeline_input, pipeline_output
)
description %>% convert_json()

{
  "keywords": ["HCV1a", "Ledipasvir", "antiviral resistance", "SNP", "amino acid substitutions"],
  "xref": [
    {
      "namespace": "pubchem.compound",
      "name": "PubChem-compound",
      "ids": "67505836",
      "access_time": "2017-01-20T09:40:17-0500"
    },
    {
      "namespace": "pubmed",
      "name": "PubMed",
      "ids": "26508693",
      "access_time": "2017-01-21T09:40:17-0500"
    },
    {
      "namespace": "so",
      "name": "Sequence Ontology",
      "ids": ["SO:000002", "SO:0000694", "SO:0000667", "SO:0000045"],
      "access_time": "2017-01-22T09:40:17-0500"
    },
    {
      "namespace": "taxonomy",
      "name": "Taxonomy",
      "ids": "31646",
      "access_time": "2017-01-23T09:40:17-0500"
    }
  ],
  "platform": [
    "Seven Bridges Platform"
  ],
  "pipeline_steps": [
    {
      "step_number": "1",
      "name": "HIVE-hexagon",
      "description": "Alignment of reads to a set of references",
      "version": "1.3",
      "prerequisite": [
        {
          "name": "Hepatitis C virus genotype 1",
          "uri": {
            "uri": "https://www.ncbi.nlm.nih.gov/nuccore/22129792",
            "access_time": "2017-01-24 09:40:17"
          }
        },
        {
          "name": "Hepatitis C virus type 1b complete genome",
          "uri": {
            "uri": "https://www.ncbi.nlm.nih.gov/nuccore/5420376",
            "access_time": "2017-01-24 09:40:17"
          }
        },
        {
          "name": "Hepatitis C virus (isolate JFH-1) genomic RNA",
          "uri": {
            "uri": "https://www.ncbi.nlm.nih.gov/nuccore/13122261",
            "access_time": "2017-01-24 09:40:17"
          }
        },
        {
          "name": "Hepatitis C virus clone J8CF, complete genome",
          "uri": {
            "uri": "https://www.ncbi.nlm.nih.gov/nuccore/386646758",
            "access_time": "2017-01-24 09:40:17"
          }
        },
        {
          "name": "Hepatitis C virus S52 polyprotein gene",
          "uri": {
            "uri": "https://www.ncbi.nlm.nih.gov/nuccore/295311559",
            "access_time": "2017-01-24 09:40:17"
          }
        }
      ],
      "input_list": [
        {
          "uri": "https://example.com/dna.cgi?cmd=objFile&ids=514683",
          "access_time": "2017-01-24 09:40:17"
        },
        {
          "uri": "https://example.com/dna.cgi?cmd=objFile&ids=514682",
          "access_time": "2017-01-24 09:40:17"
        }
      ],
      "output_list": [
        {
          "uri": "https://example.com/data/514769/allCount-aligned.csv",
          "access_time": "2017-01-24 09:40:17"
        },
        {
          "uri": "https://example.com/data/514801/SNPProfile*.csv",
          "access_time": "2017-01-24 09:40:17"
        }
      ]
    }
  ]
}

Execution domain

script <- "https://example.com/workflows/antiviral_resistance_detection_hive.py"
script_driver <- "shell"
software_prerequisites <- data.frame(
  "name" = c("HIVE-hexagon", "HIVE-heptagon"),
  "version" = c("babajanian.1", "albinoni.2"),
  "uri" = c(
    "https://example.com/dna.cgi?cmd=dna-hexagon&cmdMode=-",
    "https://example.com/dna.cgi?cmd=dna-heptagon&cmdMode=-"
  ),
  "access_time" = c(
    as.POSIXct("2017-01-24T09:40:17", format = "%Y-%m-%dT%H:%M:%S", tz = "EST"),
    as.POSIXct("2017-01-24T09:40:17", format = "%Y-%m-%dT%H:%M:%S", tz = "EST")
  ),
  "sha1_chksum" = c("d60f506cddac09e9e816531e7905ca1ca6641e3c", NA),
  stringsAsFactors = FALSE
)
external_data_endpoints <- data.frame(
  "name" = c("generic name", "access to ftp server", "access to e-utils web service"),
  "url" = c(
    "protocol://domain:port/application/path",
    "ftp://data.example.com:21/",
    "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
  ),
  stringsAsFactors = FALSE
)
environment_variables <- data.frame(
  "key" = c("HOSTTYPE", "EDITOR"),
  "value" = c("x86_64-linux", "vim")
)

execution <- compose_execution(
  script, script_driver, software_prerequisites, external_data_endpoints, environment_variables
)
execution %>% convert_json()

{
  "script": [
    "https://example.com/workflows/antiviral_resistance_detection_hive.py"
  ],
  "script_driver": "shell",
  "software_prerequisites": [
    {
      "name": "HIVE-hexagon",
      "version": "babajanian.1",
      "uri": [
        {
          "uri": "https://example.com/dna.cgi?cmd=dna-hexagon&cmdMode=-",
          "access_time": "2017-01-24 09:40:17",
          "sha1_chksum": "d60f506cddac09e9e816531e7905ca1ca6641e3c"
        }
      ]
    },
    {
      "name": "HIVE-heptagon",
      "version": "albinoni.2",
      "uri": [
        {
          "uri": "https://example.com/dna.cgi?cmd=dna-heptagon&cmdMode=-",
          "access_time": "2017-01-24 09:40:17",
          "sha1_chksum": "NA"
        }
      ]
    }
  ],
  "external_data_endpoints": [
    [
      {
        "name": "generic name",
        "url": "protocol://domain:port/application/path"
      }
    ],
    [
      {
        "name": "access to ftp server",
        "url": "ftp://data.example.com:21/"
      }
    ],
    [
      {
        "name": "access to e-utils web service",
        "url": "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
      }
    ]
  ],
  "environment_variables": {
    "HOSTTYPE": "x86_64-linux",
    "EDITOR": "vim"
  }
}

Parametric domain

df_parametric <- data.frame(
  "param" = c(
    "seed", "minimum_match_len",
    "divergence_threshold_percent",
    "minimum_coverage", "freq_cutoff"
  ),
  "value" = c("14", "66", "0.30", "15", "0.10"),
  "step" = c(1, 1, 1, 2, 2),
  stringsAsFactors = FALSE
)

parametric <- compose_parametric(df_parametric)
parametric %>% convert_json()

[
  {
    "param": "seed",
    "value": "14",
    "step": "1"
  },
  {
    "param": "minimum_match_len",
    "value": "66",
    "step": "1"
  },
  {
    "param": "divergence_threshold_percent",
    "value": "0.30",
    "step": "1"
  },
  {
    "param": "minimum_coverage",
    "value": "15",
    "step": "2"
  },
  {
    "param": "freq_cutoff",
    "value": "0.10",
    "step": "2"
  }
]

I/O domain

input_subdomain <- data.frame(
  "filename" = c(
    "Hepatitis C virus genotype 1",
    "Hepatitis C virus type 1b complete genome"
  ),
  "uri" = c(
    "https://www.ncbi.nlm.nih.gov/nuccore/22129792",
    "https://www.ncbi.nlm.nih.gov/nuccore/5420376"
  ),
  "access_time" = c(
    as.POSIXct("2017-01-24T09:40:17", format = "%Y-%m-%dT%H:%M:%S", tz = "EST"),
    as.POSIXct("2017-01-24T09:40:17", format = "%Y-%m-%dT%H:%M:%S", tz = "EST")
  ),
  stringsAsFactors = FALSE
)

output_subdomain <- data.frame(
  "mediatype" = c("text/csv", "text/csv"),
  "uri" = c(
    "https://example.com/data/514769/dnaAccessionBased.csv",
    "https://example.com/data/514801/SNPProfile*.csv"
  ),
  "access_time" = c(
    as.POSIXct("2017-01-24T09:40:17", format = "%Y-%m-%dT%H:%M:%S", tz = "EST"),
    as.POSIXct("2017-01-24T09:40:17", format = "%Y-%m-%dT%H:%M:%S", tz = "EST")
  ),
  stringsAsFactors = FALSE
)

io <- compose_io(input_subdomain, output_subdomain)
io %>% convert_json()

{
  "input_subdomain": [
    {
      "uri": [
        {
          "filename": "Hepatitis C virus genotype 1",
          "uri": "https://www.ncbi.nlm.nih.gov/nuccore/22129792",
          "access_time": "2017-01-24T09:40:17-0500"
        }
      ]
    },
    {
      "uri": [
        {
          "filename": "Hepatitis C virus type 1b complete genome",
          "uri": "https://www.ncbi.nlm.nih.gov/nuccore/5420376",
          "access_time": "2017-01-24T09:40:17-0500"
        }
      ]
    }
  ],
  "output_subdomain": [
    {
      "mediatype": "text/csv",
      "uri": [
        {
          "uri": "https://example.com/data/514769/dnaAccessionBased.csv",
          "access_time": "2017-01-24T09:40:17-0500"
        }
      ]
    },
    {
      "mediatype": "text/csv",
      "uri": [
        {
          "uri": "https://example.com/data/514801/SNPProfile*.csv",
          "access_time": "2017-01-24T09:40:17-0500"
        }
      ]
    }
  ]
}

Error domain

empirical <- data.frame(
  "key" = c("false_negative_alignment_hits", "false_discovery"),
  "value" = c("<0.0010", "<0.05"),
  stringsAsFactors = FALSE
)

algorithmic <- data.frame(
  "key" = c("false_positive_mutation_calls", "false_discovery"),
  "value" = c("<0.00005", "0.005"),
  stringsAsFactors = FALSE
)

error <- compose_error(empirical, algorithmic)
error %>% convert_json()

{
  "empirical_error": {
    "false_negative_alignment_hits": "<0.0010",
    "false_discovery": "<0.05"
  },
  "algorithmic_error": {
    "false_positive_mutation_calls": "<0.00005",
    "false_discovery": "0.005"
  }
}

Top level fields

tlf <- compose_tlf(
  provenance, usability, extension, description,
  execution, parametric, io, error
)
tlf %>% convert_json()

["https://w3id.org/biocompute/1.4.2/", "https://biocompute.sbgenomics.com/bco/c9ef66e4-41cb-4a56-a6ba-cf8356fd062c", "d18deb41a97a3108e743231af5fec0055dac321b945a569ca58bf33503ec526b"]

Complete BCO

bco <- biocompute::compose(
  tlf, provenance, usability, extension, description,
  execution, parametric, io, error
)
bco %>% convert_json()

{
  "spec_version": "https://w3id.org/biocompute/1.4.2/",
  "object_id": "https://biocompute.sbgenomics.com/bco/c9ef66e4-41cb-4a56-a6ba-cf8356fd062c",
  "etag": "d18deb41a97a3108e743231af5fec0055dac321b945a569ca58bf33503ec526b",
  "provenance_domain": {
    "name": "HCV1a ledipasvir resistance SNP detection",
    "version": "1.0.0",
    "review": [
      {
        "status": "approved",
        "reviewer_comment": "Approved by [company name] staff. Waiting for approval from FDA Reviewer",
        "date": 1510507848,
        "reviewer": [
          {
            "reviewer_name": "Jane Doe",
            "reviewer_affiliation": "Seven Bridges Genomics",
            "reviewer_email": "example@sevenbridges.com",
            "reviewer_contribution": "curatedBy",
            "reviewer_orcid": "https://orcid.org/0000-0000-0000-0000"
          }
        ]
      },
      {
        "status": "approved",
        "reviewer_comment": "The revised BCO looks fine",
        "date": 1513110648,
        "reviewer": [
          {
            "reviewer_name": "John Doe",
            "reviewer_affiliation": "U.S. Food and Drug Administration",
            "reviewer_email": "example@fda.gov",
            "reviewer_contribution": "curatedBy",
            "reviewer_orcid": "NA"
          }
        ]
      }
    ],
    "derived_from": "https://github.com/biocompute-objects/BCO_Specification/blob/1.2.1-beta/HCV1a.json",
    "obsolete_after": "2018-11-12T12:30:48-0500",
    "embargo": ["2017-10-12T12:30:48-0500", "2017-11-12T12:30:48-0500"],
    "created": "2017-01-20T09:40:17-0500",
    "modified": "2019-05-10T09:40:17-0500",
    "contributors": [
      {
        "name": "Jane Doe",
        "affiliation": "Seven Bridges Genomics",
        "email": "example@sevenbridges.com",
        "contribution": ["createdBy", "curatedBy"],
        "orcid": "https://orcid.org/0000-0000-0000-0000"
      },
      {
        "name": "John Doe",
        "affiliation": "U.S. Food and Drug Administration",
        "email": "example@fda.gov",
        "contribution": "authoredBy",
        "orcid": "NA"
      }
    ],
    "license": "https://creativecommons.org/licenses/by/4.0/"
  },
  "usability_domain": ["Identify baseline single nucleotide polymorphisms (SNPs)[SO:0000694], (insertions)[SO:0000667], and (deletions)[SO:0000045] that correlate with reduced (ledipasvir)[pubchem.compound:67505836] antiviral drug efficacy in (Hepatitis C virus subtype 1)[taxonomy:31646]", "Identify treatment emergent amino acid (substitutions)[SO:1000002] that correlate with antiviral drug treatment failure", "Determine whether the treatment emergent amino acid (substitutions)[SO:1000002] identified correlate with treatment failure involving other drugs against the same virus"],
  "extension_domain": {
    "fhir_extension": {
      "fhir_endpoint": "https://fhirtest.uhn.ca/baseDstu3",
      "fhir_version": "3",
      "fhir_resources": [
        [
          {
            "fhir_id": "21376",
            "fhir_resource": "Sequence"
          }
        ],
        [
          {
            "fhir_id": "6288583",
            "fhir_resource": "DiagnosticReport"
          }
        ],
        [
          {
            "fhir_id": "25544",
            "fhir_resource": "ProcedureRequest"
          }
        ],
        [
          {
            "fhir_id": "92440",
            "fhir_resource": "Observation"
          }
        ],
        [
          {
            "fhir_id": "4588936",
            "fhir_resource": "FamilyMemberHistory"
          }
        ]
      ]
    },
    "scm_extension": {
      "scm_repository": "https://github.com/example/repo1",
      "scm_type": "git",
      "scm_commit": "c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21",
      "scm_path": "workflow/hive-viral-mutation-detection.cwl",
      "scm_preview": "https://github.com/example/repo1/blob/c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21/workflow/hive-viral-mutation-detection.cwl"
    }
  },
  "description_domain": {
    "keywords": ["HCV1a", "Ledipasvir", "antiviral resistance", "SNP", "amino acid substitutions"],
    "xref": [
      {
        "namespace": "pubchem.compound",
        "name": "PubChem-compound",
        "ids": "67505836",
        "access_time": "2017-01-20T09:40:17-0500"
      },
      {
        "namespace": "pubmed",
        "name": "PubMed",
        "ids": "26508693",
        "access_time": "2017-01-21T09:40:17-0500"
      },
      {
        "namespace": "so",
        "name": "Sequence Ontology",
        "ids": ["SO:000002", "SO:0000694", "SO:0000667", "SO:0000045"],
        "access_time": "2017-01-22T09:40:17-0500"
      },
      {
        "namespace": "taxonomy",
        "name": "Taxonomy",
        "ids": "31646",
        "access_time": "2017-01-23T09:40:17-0500"
      }
    ],
    "platform": [
      "Seven Bridges Platform"
    ],
    "pipeline_steps": [
      {
        "step_number": "1",
        "name": "HIVE-hexagon",
        "description": "Alignment of reads to a set of references",
        "version": "1.3",
        "prerequisite": [
          {
            "name": "Hepatitis C virus genotype 1",
            "uri": {
              "uri": "https://www.ncbi.nlm.nih.gov/nuccore/22129792",
              "access_time": "2017-01-24 09:40:17"
            }
          },
          {
            "name": "Hepatitis C virus type 1b complete genome",
            "uri": {
              "uri": "https://www.ncbi.nlm.nih.gov/nuccore/5420376",
              "access_time": "2017-01-24 09:40:17"
            }
          },
          {
            "name": "Hepatitis C virus (isolate JFH-1) genomic RNA",
            "uri": {
              "uri": "https://www.ncbi.nlm.nih.gov/nuccore/13122261",
              "access_time": "2017-01-24 09:40:17"
            }
          },
          {
            "name": "Hepatitis C virus clone J8CF, complete genome",
            "uri": {
              "uri": "https://www.ncbi.nlm.nih.gov/nuccore/386646758",
              "access_time": "2017-01-24 09:40:17"
            }
          },
          {
            "name": "Hepatitis C virus S52 polyprotein gene",
            "uri": {
              "uri": "https://www.ncbi.nlm.nih.gov/nuccore/295311559",
              "access_time": "2017-01-24 09:40:17"
            }
          }
        ],
        "input_list": [
          {
            "uri": "https://example.com/dna.cgi?cmd=objFile&ids=514683",
            "access_time": "2017-01-24 09:40:17"
          },
          {
            "uri": "https://example.com/dna.cgi?cmd=objFile&ids=514682",
            "access_time": "2017-01-24 09:40:17"
          }
        ],
        "output_list": [
          {
            "uri": "https://example.com/data/514769/allCount-aligned.csv",
            "access_time": "2017-01-24 09:40:17"
          },
          {
            "uri": "https://example.com/data/514801/SNPProfile*.csv",
            "access_time": "2017-01-24 09:40:17"
          }
        ]
      }
    ]
  },
  "execution_domain": {
    "script": [
      "https://example.com/workflows/antiviral_resistance_detection_hive.py"
    ],
    "script_driver": "shell",
    "software_prerequisites": [
      {
        "name": "HIVE-hexagon",
        "version": "babajanian.1",
        "uri": [
          {
            "uri": "https://example.com/dna.cgi?cmd=dna-hexagon&cmdMode=-",
            "access_time": "2017-01-24 09:40:17",
            "sha1_chksum": "d60f506cddac09e9e816531e7905ca1ca6641e3c"
          }
        ]
      },
      {
        "name": "HIVE-heptagon",
        "version": "albinoni.2",
        "uri": [
          {
            "uri": "https://example.com/dna.cgi?cmd=dna-heptagon&cmdMode=-",
            "access_time": "2017-01-24 09:40:17",
            "sha1_chksum": "NA"
          }
        ]
      }
    ],
    "external_data_endpoints": [
      [
        {
          "name": "generic name",
          "url": "protocol://domain:port/application/path"
        }
      ],
      [
        {
          "name": "access to ftp server",
          "url": "ftp://data.example.com:21/"
        }
      ],
      [
        {
          "name": "access to e-utils web service",
          "url": "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
        }
      ]
    ],
    "environment_variables": {
      "HOSTTYPE": "x86_64-linux",
      "EDITOR": "vim"
    }
  },
  "parametric_domain": [
    {
      "param": "seed",
      "value": "14",
      "step": "1"
    },
    {
      "param": "minimum_match_len",
      "value": "66",
      "step": "1"
    },
    {
      "param": "divergence_threshold_percent",
      "value": "0.30",
      "step": "1"
    },
    {
      "param": "minimum_coverage",
      "value": "15",
      "step": "2"
    },
    {
      "param": "freq_cutoff",
      "value": "0.10",
      "step": "2"
    }
  ],
  "io_domain": {
    "input_subdomain": [
      {
        "uri": [
          {
            "filename": "Hepatitis C virus genotype 1",
            "uri": "https://www.ncbi.nlm.nih.gov/nuccore/22129792",
            "access_time": "2017-01-24T09:40:17-0500"
          }
        ]
      },
      {
        "uri": [
          {
            "filename": "Hepatitis C virus type 1b complete genome",
            "uri": "https://www.ncbi.nlm.nih.gov/nuccore/5420376",
            "access_time": "2017-01-24T09:40:17-0500"
          }
        ]
      }
    ],
    "output_subdomain": [
      {
        "mediatype": "text/csv",
        "uri": [
          {
            "uri": "https://example.com/data/514769/dnaAccessionBased.csv",
            "access_time": "2017-01-24T09:40:17-0500"
          }
        ]
      },
      {
        "mediatype": "text/csv",
        "uri": [
          {
            "uri": "https://example.com/data/514801/SNPProfile*.csv",
            "access_time": "2017-01-24T09:40:17-0500"
          }
        ]
      }
    ]
  },
  "error_domain": {
    "empirical_error": {
      "false_negative_alignment_hits": "<0.0010",
      "false_discovery": "<0.05"
    },
    "algorithmic_error": {
      "false_positive_mutation_calls": "<0.00005",
      "false_discovery": "0.005"
    }
  }
}

Authoring Biocompute Objects with R: A Case Study

2022-03-29

Provenance domain

Usability Domain

Extension domain

FHIR extension

SCM extension

Description domain

Execution domain

Parametric domain

I/O domain

Error domain

Top level fields

Complete BCO