This function attempts to detect the clinical data standard used in a given R data frame.

detectStandard(data, domain = NULL, meta = NULL)

Arguments

data

A data frame in which to detect the data standard - required.

domain

the domain to evaluate - should match a value of meta$domain. Uses the first value in meta$domain if no value is provided.

meta

the metadata containing the data standards.

Value

A data frame describing the detected standard for each "text_key" in the provided metadata. Columns are "domain", "text_key", "column" and "standard".

Details

This function compares the columns in the provided "data" with the required columns for a given data standard/domain combination. The function is designed to work with the SDTM and ADaM CDISC(https://www.cdisc.org/) standards for clinical trial data by default. Additional standards can be added by modifying the "meta" data set included as part of this package.

Examples

detectStandard(data=safetyData::adam_adae, meta=safetyCharts::meta_aes) 
#> $details
#> $details$adam
#> $details$adam$standard
#> [1] "adam"
#> 
#> $details$adam$mapping
#> # A tibble: 8 × 3
#> # Rowwise: 
#>   text_key     current  valid
#>   <chr>        <chr>    <lgl>
#> 1 id_col       USUBJID  TRUE 
#> 2 seq_col      AESEQ    TRUE 
#> 3 stdy_col     ASTDY    TRUE 
#> 4 endy_col     AENDY    TRUE 
#> 5 term_col     AEDECOD  TRUE 
#> 6 bodsys_col   AEBODSYS TRUE 
#> 7 severity_col AESEV    TRUE 
#> 8 serious_col  AESER    TRUE 
#> 
#> $details$adam$total_count
#> [1] 8
#> 
#> $details$adam$valid_count
#> [1] 8
#> 
#> $details$adam$invalid_count
#> [1] 0
#> 
#> $details$adam$match_percent
#> [1] 1
#> 
#> $details$adam$match
#> [1] "full"
#> 
#> $details$adam$label
#> [1] "ADaM"
#> 
#> 
#> $details$sdtm
#> $details$sdtm$standard
#> [1] "sdtm"
#> 
#> $details$sdtm$mapping
#> # A tibble: 8 × 3
#> # Rowwise: 
#>   text_key     current  valid
#>   <chr>        <chr>    <lgl>
#> 1 id_col       USUBJID  TRUE 
#> 2 seq_col      AESEQ    TRUE 
#> 3 stdy_col     NA       FALSE
#> 4 endy_col     NA       FALSE
#> 5 term_col     AEDECOD  TRUE 
#> 6 bodsys_col   AEBODSYS TRUE 
#> 7 severity_col AESEV    TRUE 
#> 8 serious_col  AESER    TRUE 
#> 
#> $details$sdtm$total_count
#> [1] 8
#> 
#> $details$sdtm$valid_count
#> [1] 6
#> 
#> $details$sdtm$invalid_count
#> [1] 2
#> 
#> $details$sdtm$match_percent
#> [1] 0.75
#> 
#> $details$sdtm$match
#> [1] "partial"
#> 
#> $details$sdtm$label
#> [1] "Partial SDTM"
#> 
#> $details$sdtm$details
#> [1] "(6/8 cols/fields matched)"
#> 
#> 
#> 
#> $standard
#> [1] "adam"
#> 
#> $label
#> [1] "ADaM"
#> 
#> $standard_percent
#> [1] 1
#> 
#> $mapping
#> # A tibble: 8 × 3
#> # Rowwise: 
#>   text_key     current  valid
#>   <chr>        <chr>    <lgl>
#> 1 id_col       USUBJID  TRUE 
#> 2 seq_col      AESEQ    TRUE 
#> 3 stdy_col     ASTDY    TRUE 
#> 4 endy_col     AENDY    TRUE 
#> 5 term_col     AEDECOD  TRUE 
#> 6 bodsys_col   AEBODSYS TRUE 
#> 7 severity_col AESEV    TRUE 
#> 8 serious_col  AESER    TRUE 
#> 
detectStandard(data=safetyData::adam_adlbc,meta=safetyCharts::meta_labs, domain="labs" ) 
#> $details
#> $details$adam
#> $details$adam$standard
#> [1] "adam"
#> 
#> $details$adam$mapping
#> # A tibble: 8 × 3
#> # Rowwise: 
#>   text_key        current  valid
#>   <chr>           <chr>    <lgl>
#> 1 id_col          USUBJID  TRUE 
#> 2 value_col       AVAL     TRUE 
#> 3 measure_col     PARAM    TRUE 
#> 4 normal_col_low  A1LO     TRUE 
#> 5 normal_col_high A1HI     TRUE 
#> 6 studyday_col    ADY      TRUE 
#> 7 visit_col       VISIT    TRUE 
#> 8 visitn_col      VISITNUM TRUE 
#> 
#> $details$adam$total_count
#> [1] 8
#> 
#> $details$adam$valid_count
#> [1] 8
#> 
#> $details$adam$invalid_count
#> [1] 0
#> 
#> $details$adam$match_percent
#> [1] 1
#> 
#> $details$adam$match
#> [1] "full"
#> 
#> $details$adam$label
#> [1] "ADaM"
#> 
#> 
#> $details$sdtm
#> $details$sdtm$standard
#> [1] "sdtm"
#> 
#> $details$sdtm$mapping
#> # A tibble: 9 × 3
#> # Rowwise: 
#>   text_key        current  valid
#>   <chr>           <chr>    <lgl>
#> 1 id_col          USUBJID  TRUE 
#> 2 value_col       LBSTRESN TRUE 
#> 3 measure_col     NA       FALSE
#> 4 normal_col_low  NA       FALSE
#> 5 normal_col_high NA       FALSE
#> 6 studyday_col    NA       FALSE
#> 7 visit_col       VISIT    TRUE 
#> 8 visitn_col      VISITNUM TRUE 
#> 9 unit_col        NA       FALSE
#> 
#> $details$sdtm$total_count
#> [1] 9
#> 
#> $details$sdtm$valid_count
#> [1] 4
#> 
#> $details$sdtm$invalid_count
#> [1] 5
#> 
#> $details$sdtm$match_percent
#> [1] 0.4444444
#> 
#> $details$sdtm$match
#> [1] "partial"
#> 
#> $details$sdtm$label
#> [1] "Partial SDTM"
#> 
#> $details$sdtm$details
#> [1] "(4/9 cols/fields matched)"
#> 
#> 
#> 
#> $standard
#> [1] "adam"
#> 
#> $label
#> [1] "ADaM"
#> 
#> $standard_percent
#> [1] 1
#> 
#> $mapping
#> # A tibble: 8 × 3
#> # Rowwise: 
#>   text_key        current  valid
#>   <chr>           <chr>    <lgl>
#> 1 id_col          USUBJID  TRUE 
#> 2 value_col       AVAL     TRUE 
#> 3 measure_col     PARAM    TRUE 
#> 4 normal_col_low  A1LO     TRUE 
#> 5 normal_col_high A1HI     TRUE 
#> 6 studyday_col    ADY      TRUE 
#> 7 visit_col       VISIT    TRUE 
#> 8 visitn_col      VISITNUM TRUE 
#>