This function determines who are the homozygous or heterozygous samples given
a data table of zygosity levels and a SNP of interest. Samples whose zygosity
level is NA
will be returned as FALSE
.
Arguments
- zygosity
A data frame of zygosity levels:
"hom"
for homozygous or"het"
for heterozygous. Each row is for a locus. The locus identity is indicated in the first column and namedsnp
. Remaining columns are samples.- snp
String with SNP identifier.
- na_as_false
Whether to return
FALSE
when the zygosity level isNA
.
Examples
# Let us start by reading in an example data set with zygosity levels
zygosity <- read_snp_zygosity(file = daeqtlr_example("zygosity.csv"))
# Checking out SNP rsX005
zygosity['rsX005']
#> snp s01 s02 s03 s04 s05 s06 s07 s08 s09 s10 s11 s12 s13 s14 s15 s16 s17
#> 1: rsX005 het hom het het hom het het het hom hom hom hom het hom het hom het
#> s18 s19 s20 s21 s22 s23 s24 s25 s26 s27 s28 s29 s30 s31 s32 s33 s34 s35
#> 1: het hom het het hom hom hom het het het het het hom <NA> <NA> het hom hom
#> s36 s37 s38 s39 s40 s41 s42 s43 s44 s45 s46 s47 s48 s49 s50
#> 1: hom het het hom hom hom hom hom het het het hom hom het het
is_hom(zygosity, 'rsX005')
#> [1] FALSE TRUE FALSE FALSE TRUE FALSE FALSE FALSE TRUE TRUE TRUE TRUE
#> [13] FALSE TRUE FALSE TRUE FALSE FALSE TRUE FALSE FALSE TRUE TRUE TRUE
#> [25] FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE TRUE TRUE TRUE
#> [37] FALSE FALSE TRUE TRUE TRUE TRUE TRUE FALSE FALSE FALSE TRUE TRUE
#> [49] FALSE FALSE
is_het(zygosity, 'rsX005')
#> [1] TRUE FALSE TRUE TRUE FALSE TRUE TRUE TRUE FALSE FALSE FALSE FALSE
#> [13] TRUE FALSE TRUE FALSE TRUE TRUE FALSE TRUE TRUE FALSE FALSE FALSE
#> [25] TRUE TRUE TRUE TRUE TRUE FALSE FALSE FALSE TRUE FALSE FALSE FALSE
#> [37] TRUE TRUE FALSE FALSE FALSE FALSE FALSE TRUE TRUE TRUE FALSE FALSE
#> [49] TRUE TRUE
# Translate the logical vector to sample names
# Note that first column is excluded because it is the SNP identifier col
# Homozygous samples
(homs <- colnames(zygosity)[-1][is_hom(zygosity, 'rsX005')])
#> [1] "s02" "s05" "s09" "s10" "s11" "s12" "s14" "s16" "s19" "s22" "s23" "s24"
#> [13] "s30" "s34" "s35" "s36" "s39" "s40" "s41" "s42" "s43" "s47" "s48"
# Heterozygous samples
(hets <- colnames(zygosity)[-1][is_het(zygosity, 'rsX005')])
#> [1] "s01" "s03" "s04" "s06" "s07" "s08" "s13" "s15" "s17" "s18" "s20" "s21"
#> [13] "s25" "s26" "s27" "s28" "s29" "s33" "s37" "s38" "s44" "s45" "s46" "s49"
#> [25] "s50"
# Some samples are neither homozygous nor heterozygous because of NAs
# Note the `- 1` because the first column of `zygosity` is the SNP identifier.
(ncol(zygosity) - 1) - length(c(homs, hets))
#> [1] 2
# The samples whose zygosity is NA are:
setdiff(colnames(zygosity)[-1], c(homs, hets))
#> [1] "s31" "s32"