Load data from dryad repository and select one of the access options:
temp <- tempfile()
#download.file("https://doi.org/10.5061/dryad.7h44j0zrj",temp, mode="wb") #for use once the data set is public
unzip(temp, "VietnamWithInterpretations_20200323_dryad_v2.csv")
H1 <- read.csv("VietnamWithInterpretations_20200323_dryad_v2.csv")
#Alternately download .csv file here and move to your local directory - https://datadryad.org/stash/share/pk3wVUxFNzTuCYZ9t8haKRPmx7V8YhTDBuHpG8JJ9kU - for use during PLOS One review process
H1 <- read.csv("VietnamWithInterpretations_20200323_dryad_v2.csv")
head(H1)
## Country SiteName District StateProv SiteLatitude
## 1 Vietnam CPCP civet restaurant confiscation Nho Quan Ninh Binh 20.32
## 2 Vietnam CPCP civet restaurant confiscation Nho Quan Ninh Binh 20.32
## 3 Vietnam CPCP civet restaurant confiscation Nho Quan Ninh Binh 20.32
## 4 Vietnam CPCP civet restaurant confiscation Nho Quan Ninh Binh 20.32
## 5 Vietnam CPCP civet restaurant confiscation Nho Quan Ninh Binh 20.32
## 6 Vietnam CPCP civet restaurant confiscation Nho Quan Ninh Binh 20.32
## SiteLongitude EventName DomesticAnimals
## 1 105.61 Cuc Phuong NP civet restaurant confiscation
## 2 105.61 Cuc Phuong NP civet restaurant confiscation
## 3 105.61 Cuc Phuong NP civet restaurant confiscation
## 4 105.61 Cuc Phuong NP civet restaurant confiscation
## 5 105.61 Cuc Phuong NP civet restaurant confiscation
## 6 105.61 Cuc Phuong NP civet restaurant confiscation
## AnimalID TaxaGroup SpeciesScientificName CommonName
## 1 VN11A0002 Carnivores Paradoxurus hermaphroditus Common Palm Civet
## 2 VN11A0002 Carnivores Paradoxurus hermaphroditus Common Palm Civet
## 3 VN11A0002 Carnivores Paradoxurus hermaphroditus Common Palm Civet
## 4 VN11A0002 Carnivores Paradoxurus hermaphroditus Common Palm Civet
## 5 VN11A0002 Carnivores Paradoxurus hermaphroditus Common Palm Civet
## 6 VN11A0001 Carnivores Paguma larvata Masked Palm Civet
## CommonNameFieldMorphology IDCertainty Order Family Genus
## 1 Common Palm Civet field ID certain CARNIVORA VIVERRIDAE Paradoxurus
## 2 Common Palm Civet field ID certain CARNIVORA VIVERRIDAE Paradoxurus
## 3 Common Palm Civet field ID certain CARNIVORA VIVERRIDAE Paradoxurus
## 4 Common Palm Civet field ID certain CARNIVORA VIVERRIDAE Paradoxurus
## 5 Common Palm Civet field ID certain CARNIVORA VIVERRIDAE Paradoxurus
## 6 Masked Palm Civet field ID certain CARNIVORA VIVERRIDAE Paguma
## SampleDate SpecimenID SpecimenType TestType TestRequested
## 1 4/22/11 HUA 52 red blood cells Conventional PCR Coronaviruses
## 2 4/22/11 HUA 55 oral swab Conventional PCR Coronaviruses
## 3 4/22/11 HUA 56 rectal swab Conventional PCR Coronaviruses
## 4 4/22/11 HUA 57 rectal swab Conventional PCR Coronaviruses
## 5 4/22/11 HUA 58 oral swab Conventional PCR Coronaviruses
## 6 4/22/11 HUA 51 red blood cells Conventional PCR Coronaviruses
## TestRequestedProtocol ConfirmationResult Sequence GenbankAccessionNumber
## 1 Quan | Watanabe Negative NULL NULL
## 2 Quan | Watanabe Negative NULL NULL
## 3 Quan | Watanabe Negative NULL NULL
## 4 Quan | Watanabe Negative NULL NULL
## 5 Quan | Watanabe Negative NULL NULL
## 6 Quan | Watanabe Negative NULL NULL
## Virus VirusGroup
## 1 NULL NULL
## 2 NULL NULL
## 3 NULL NULL
## 4 NULL NULL
## 5 NULL NULL
## 6 NULL NULL
colnames(H1)
## [1] "Country" "SiteName"
## [3] "District" "StateProv"
## [5] "SiteLatitude" "SiteLongitude"
## [7] "EventName" "DomesticAnimals"
## [9] "AnimalID" "TaxaGroup"
## [11] "SpeciesScientificName" "CommonName"
## [13] "CommonNameFieldMorphology" "IDCertainty"
## [15] "Order" "Family"
## [17] "Genus" "SampleDate"
## [19] "SpecimenID" "SpecimenType"
## [21] "TestType" "TestRequested"
## [23] "TestRequestedProtocol" "ConfirmationResult"
## [25] "Sequence" "GenbankAccessionNumber"
## [27] "Virus" "VirusGroup"
names(H1) <- to_snake_case(names(H1))
H1$sample_date<-as.character(H1$sample_date)
H1$sample_date<-as.POSIXct(strptime(H1$sample_date, "%m/%d/%y", tz="Asia/bangkok"))
H1$sample_month<-month(H1$sample_date)
Note: The sample dates that are earlier than 2011 correspond to archived samples that were obtained from Tam Dao Bear rescue center
Establish primary interface groups based on site names.
H1$primary_interface_group<-NA
H1$primary_interface_group<-as.character(rep("captive facilities", n=nrow(H1)))
H1$primary_interface_group[is.element(H1$site_name, c("Rat market CT - Dong Thap","Rat market LVO - Dong Thap", "Rat market LVU - Dong Thap", "Rat market MX1 - Soc Trang", "Rat market MX2 - Soc Trang", "Rat market N5(1) - Soc Trang", "Rat market N5(2) - Soc Trang", "Rat market SD - Dong Thap"))] <-'trader'
H1$primary_interface_group[is.element(H1$site_name, c("Rat market CLC1 - Dong Thap", "Rat market CLC2 - Dong Thap", "Rat market CLC3 - Dong Thap", "Rat market CLD - Dong Thap", "Rat market HND - Dong Thap", "Rat market HNT - Dong Thap", "Rat market TB1 - Dong Thap", "Rat market TB2 - Dong Thap", "Rat market TH - Dong Thap", "Rat market TM1 - Dong Thap", "Rat market TM2 - Dong Thap", "Rat market TN - Dong Thap", "Rat market TT - Soc Trang", "Wet market (Whole sale sellers) - Soc Trang"))] <-'large market'
H1$primary_interface_group[is.element(H1$site_name, c("CPCP civet restaurant confiscation","Rat restaurant ST - Soc Trang", "Restaurant - Soc Trang", "Lam Dong FPD"))] <-'restaurant'
H1$primary_interface_group[is.element(H1$site_name, c("Bat pagoda - Soc Trang"))] <-'natural roost'
H1$primary_interface_group[is.element(H1$site_name, c("Bat roost and quano farm - Soc Trang", "Bat roost and quano farm CL - Dong Thap", "Bat roost and quano farm CLD2 - Soc Trang", "Bat roost and quano farm CLD3 - Soc Trang", "Bat roost and quano farm CLD5 - Soc Trang", "Bat roost and quano farm CLD6 - Soc Trang", "Bat roost and quano farm CLD7 - Soc Trang", "Bat roost and quano farm CT - Dong Thap", "Bat roost and quano farm HN - Dong Thap", "Bat roost and quano farm LP - Soc Trang", "Bat roost and quano farm LV - Dong Thap", "Bat roost and quano farm TM - Dong Thap", "Bat roost and quano farm CLD1 - Soc Trang", "Bat roost and quano farm CLD4 - Soc Trang", "Bat roost and quano farm MX - Soc Trang", "Bat roost and quano farm N5(1) - Soc Trang", "Bat roost and quano farm N5(2) - Soc Trang"))] <-'bat guano farm'
H1$primary_interface_group[is.element(H1$site_name, c("Farm 1 - Bien Hoa - DN", "Farm 10 - Vinh Cuu - DN", "Farm 12 - Nhon Trach - DN", "Farm 13 - Nhon Trach - DN", "Farm 14 - Long Thanh - DN", "Farm 15 - Long Thanh - DN", "Farm 16 - Long Thanh - DN", "Farm 17 - Long Thanh - DN", "Farm 18 - Cam My - DN", "Farm 19 - Cam My - DN", "Farm 2 - Bien Hoa - DN", "Farm 20 - Cam My - DN", "Farm 21 - Xuan Loc - DN", "Farm 22 - Xuan Loc - DN", "Farm 23 - Xuan Loc - DN", "Farm 24 - Xuan Loc - DN", "Farm 25 - Long Khanh - DN", "Farm 26 - Xuan Loc - DN", "Farm 28 - Dinh Quan - DN", "Farm 29 - Tan Phu - DN", "Farm 3 - Bien Hoa - DN", "Farm 4 - Thong Nhat - DN", "Farm 5 - Thong Nhat - DN", "Farm 6 - Trang Bom - DN", "Farm 8 - Vinh Cuu - DN", "Farm 9 - Vinh Cuu - DN", "Wildlife farm 1 - Dong Nai", "Wildlife farm 3 - Dong Nai", "Wildlife farm 4 - Dong Nai"))] <-'wildlife farm'
H1$primary_interface_group <- as.factor(H1$primary_interface_group)
H1$primary_interface_group <- factor(H1$primary_interface_group, levels = c("trader", "large market", "restaurant", "natural roost","bat guano farm","wildlife farm","captive facilities"))
Add family for some missing genus in the data (couple squirrels and unidentified viverridae). Also added the captive facility for the bears and macaques, even though they tested negative (avoid missing values)
H1$new_genus<-NA
H1$new_genus<-as.character(H1$genus)
H1<-H1 %>%
mutate(new_genus=replace(new_genus,H1$genus=="NULL" & species_scientific_name=="Viverridae","Viverridae"))
H1<-H1 %>%
mutate(new_genus=replace(new_genus,H1$genus=="NULL" & species_scientific_name=="Sciuridae","Sciuridae"))
H1<-H1 %>%
mutate(new_genus=replace(new_genus,H1$genus=="NULL" & species_scientific_name=="Chiroptera","Microchiroptera"))
H1<-H1 %>%
mutate(new_genus=replace(new_genus,primary_interface_group=="trader" & family=="MURIDAE","Field Rats"))
H1<-H1 %>%
mutate(new_genus=replace(new_genus,primary_interface_group=="large market" & family=="MURIDAE","Field Rats"))
H1<-H1 %>%
mutate(new_genus=replace(new_genus,primary_interface_group=="restaurant" & family=="MURIDAE","Field Rats"))
Each row in the data set is a test and virus sequence result so need to add a field that summarizes all viruses found in a single individual to establish co-infection status
tempH1<-as.data.frame(H1)
tempH1$virus<-as.character(tempH1$virus)
tempH1$virus[tempH1$virus == "NULL"] <- NA #replace NULL with NA
temp1 <- tempH1 %>%
drop_na(virus) %>%
group_by(animal_id) %>%
summarize(virus_ind = (paste0(unique(virus_group), collapse=" | "))) %>%
as.data.frame()
temp2 <- tempH1 %>%
group_by(animal_id, specimen_id) %>%
summarize(virus_specimen = paste(unique(virus_group), collapse=" | "))%>% as.data.frame()
temp3<-full_join(temp1, temp2, by="animal_id")
temp4<-subset(temp3, select=-animal_id)
H1<-full_join(temp4, H1, by="specimen_id")
The annual wet season in southern Vietnam occurs from May 1st through November 30th, and the dry season from December 1st through April 30th.
H1$hseason <- NA
H1$hseason <- ifelse(H1$sample_month %in% c(12,1:4), "Dry","Wet")
table(H1$sample_month,H1$hseason)
##
## Dry Wet
## 1 643 0
## 3 850 0
## 4 82 0
## 6 0 100
## 8 0 46
## 9 0 9
## 10 0 2793
## 11 0 96
## 12 3 0
H1<-subset(H1, is.element(taxa_group,c("Bats","Rodents & Shrews")))
H1<-droplevels(H1)
Sampling site description by interface type and province:
H1 %>%
group_by(primary_interface_group) %>%
summarize(site_counts=length(unique(site_name)))
## # A tibble: 6 x 2
## primary_interface_group site_counts
## <fct> <int>
## 1 trader 8
## 2 large market 14
## 3 restaurant 2
## 4 natural roost 1
## 5 bat guano farm 17
## 6 wildlife farm 28
H1 %>%
group_by(primary_interface_group, state_prov) %>%
summarize(site_counts=length(unique(site_name)))
## # A tibble: 9 x 3
## # Groups: primary_interface_group [6]
## primary_interface_group state_prov site_counts
## <fct> <fct> <int>
## 1 trader Dong Thap 4
## 2 trader Soc Trang 4
## 3 large market Dong Thap 12
## 4 large market Soc Trang 2
## 5 restaurant Soc Trang 2
## 6 natural roost Soc Trang 1
## 7 bat guano farm Dong Thap 5
## 8 bat guano farm Soc Trang 12
## 9 wildlife farm Dong Nai 28
Timing of sampling:
H1 %>%
group_by(hseason) %>%
summarize(site_count=length(unique(site_name)))
## # A tibble: 2 x 2
## hseason site_count
## <chr> <int>
## 1 Dry 30
## 2 Wet 41
min(H1$sample_date)
## [1] "2013-01-11 +07"
max(H1$sample_date)
## [1] "2014-03-29 +07"
Number of visits per site (and per season):
H1 %>%
group_by(site_name) %>%
summarize(site_visits=length(unique(event_name)))
## # A tibble: 70 x 2
## site_name site_visits
## <fct> <int>
## 1 Bat pagoda - Soc Trang 3
## 2 Bat roost and quano farm - Soc Trang 1
## 3 Bat roost and quano farm CL - Dong Thap 1
## 4 Bat roost and quano farm CLD1 - Soc Trang 1
## 5 Bat roost and quano farm CLD2 - Soc Trang 1
## 6 Bat roost and quano farm CLD3 - Soc Trang 1
## 7 Bat roost and quano farm CLD4 - Soc Trang 1
## 8 Bat roost and quano farm CLD5 - Soc Trang 1
## 9 Bat roost and quano farm CLD6 - Soc Trang 1
## 10 Bat roost and quano farm CLD7 - Soc Trang 1
## # … with 60 more rows
H1 %>%
group_by(site_name,hseason) %>%
summarize(vists=length(unique(sample_date)))
## # A tibble: 71 x 3
## # Groups: site_name [70]
## site_name hseason vists
## <fct> <chr> <int>
## 1 Bat pagoda - Soc Trang Dry 5
## 2 Bat pagoda - Soc Trang Wet 1
## 3 Bat roost and quano farm - Soc Trang Dry 1
## 4 Bat roost and quano farm CL - Dong Thap Wet 1
## 5 Bat roost and quano farm CLD1 - Soc Trang Wet 1
## 6 Bat roost and quano farm CLD2 - Soc Trang Wet 1
## 7 Bat roost and quano farm CLD3 - Soc Trang Wet 1
## 8 Bat roost and quano farm CLD4 - Soc Trang Wet 1
## 9 Bat roost and quano farm CLD5 - Soc Trang Wet 1
## 10 Bat roost and quano farm CLD6 - Soc Trang Wet 1
## # … with 61 more rows
In most cases, there was only one visit per site, except for the bat pagoda. This should call for caution when looking at seasonal effect, as site and season are confounded (so season effect may just be a result of the timing of sampling across the different sites). Only the Soc Trang bat pagoda had sampling in two seasons and is suitable for assessment of seasonal effect.
Summary tables clarifying what sampling protocol was used in each interface/site.
H1 %>%
select(primary_interface_group,specimen_type) %>%
group_by(primary_interface_group) %>%
count(specimen_type) %>%
pivot_wider(names_from = specimen_type,values_from = n)
## # A tibble: 6 x 12
## # Groups: primary_interface_group [6]
## primary_interfa… brain lung `oral swab` `small intestin… feces kidney
## <fct> <int> <int> <int> <int> <int> <int>
## 1 trader 50 94 308 232 NA NA
## 2 large market 90 102 589 443 26 122
## 3 restaurant NA 170 239 191 4 NA
## 4 natural roost NA NA 30 NA 90 NA
## 5 bat guano farm NA NA NA NA 624 NA
## 6 wildlife farm NA NA NA NA 796 NA
## # … with 5 more variables: `rectal swab` <int>, `urine/urogenital swab` <int>,
## # spleen <int>, urine <int>, `environmental sample` <int>
H1 %>%
select(site_name,specimen_type) %>%
group_by(site_name) %>%
count(specimen_type) %>%
pivot_wider(names_from = specimen_type,values_from = n)
## # A tibble: 70 x 12
## # Groups: site_name [70]
## site_name feces `oral swab` `rectal swab` urine `environmental …
## <fct> <int> <int> <int> <int> <int>
## 1 Bat pago… 90 30 36 4 NA
## 2 Bat roos… 16 NA NA NA NA
## 3 Bat roos… 19 NA NA NA NA
## 4 Bat roos… 12 NA NA NA NA
## 5 Bat roos… 20 NA NA NA NA
## 6 Bat roos… 14 NA NA NA NA
## 7 Bat roos… 10 NA NA NA NA
## 8 Bat roos… 24 NA NA NA NA
## 9 Bat roos… 6 NA NA NA NA
## 10 Bat roos… 10 NA NA NA NA
## # … with 60 more rows, and 6 more variables: `urine/urogenital swab` <int>,
## # kidney <int>, `small intestine` <int>, brain <int>, lung <int>,
## # spleen <int>
List viral taxonomic units
H1$virus[H1$virus=="NULL"] <- NA
H1 %>%
distinct(virus)
## virus
## 1 strain of Bat coronavirus 512/2005
## 2 PREDICT_CoV-35
## 3 <NA>
## 4 PREDICT_CoV-17
## 5 strain of Murine coronavirus
## 6 strain of Longquan Aa mouse coronavirus
## 7 strain of Infectious bronchitis virus (IBV)
Number of viruses obtained with Watanbe protocol:
H1 %>%
filter(test_requested_protocol=='Modified Watanabe et al, RdRp gene') %>%
distinct(virus)
## virus
## 1 strain of Bat coronavirus 512/2005
## 2 <NA>
## 3 PREDICT_CoV-35
## 4 PREDICT_CoV-17
## 5 strain of Murine coronavirus
## 6 strain of Longquan Aa mouse coronavirus
Number of viruses obtained with Quan protocol:
H1 %>%
filter( test_requested_protocol=='Quan et al, RdRp gene')%>%
distinct(virus)
## virus
## 1 strain of Bat coronavirus 512/2005
## 2 PREDICT_CoV-35
## 3 <NA>
## 4 strain of Murine coronavirus
## 5 strain of Longquan Aa mouse coronavirus
## 6 PREDICT_CoV-17
## 7 strain of Infectious bronchitis virus (IBV)
allpos<-H1 %>%
filter(confirmation_result=="Positive") %>%
group_by(animal_id) %>%
summarize(ind_count=length(unique(animal_id)),
want_pos=length(unique(animal_id[test_requested_protocol=="Modified Watanabe et al, RdRp gene" & confirmation_result=="Positive"])),
quan_pos=length(unique(animal_id[test_requested_protocol=="Quan et al, RdRp gene" & confirmation_result=="Positive"])),
both_tests_pos = ifelse(want_pos+quan_pos==2,1,0))
colSums(allpos[,2:5])
## ind_count want_pos quan_pos both_tests_pos
## 504 433 410 339
Viruses found in bats
H1 %>%
filter(virus_group=="PREDICT_CoV-17") %>%
group_by(new_genus) %>%
summarize(n_Infected_with_PCov17=length(unique(animal_id)))
## # A tibble: 2 x 2
## new_genus n_Infected_with_PCov17
## <chr> <int>
## 1 Microchiroptera 1
## 2 Pteropus 3
H1 %>%
filter(virus_group=="PREDICT_CoV-35") %>%
group_by(new_genus) %>%
summarize(n_Infected_with_PCov35=length(unique(animal_id)))
## # A tibble: 2 x 2
## new_genus n_Infected_with_PCov35
## <chr> <int>
## 1 Microchiroptera 38
## 2 Pteropus 1
H1 %>%
filter(virus_group=="Bat coronavirus 512/2005") %>%
group_by(new_genus) %>%
summarize(n_Infected_with_BCov5122005=length(unique(animal_id)))
## # A tibble: 4 x 2
## new_genus n_Infected_with_BCov5122005
## <chr> <int>
## 1 Hystrix 19
## 2 Microchiroptera 216
## 3 Rattus 1
## 4 Rhizomys 5
See also Table 1.
Viruses found in rodents
H1 %>%
group_by(taxa_group, new_genus, virus_group) %>%
summarize(ind_count_virus=length(unique(animal_id))) %>%
mutate(virus_ind_count=paste0(virus_group, " (", ind_count_virus,")"))%>%
group_by(taxa_group, new_genus) %>%
summarize(viral_species=paste0(virus_ind_count ,collapse = ', ')) %>%
arrange(desc(taxa_group))
## # A tibble: 8 x 3
## # Groups: taxa_group [2]
## taxa_group new_genus viral_species
## <fct> <chr> <chr>
## 1 Rodents & Shr… Field Rats Longquan Aa mouse coronavirus (56), Murine corona…
## 2 Rodents & Shr… Hystrix Bat coronavirus 512/2005 (19), Infectious bronchi…
## 3 Rodents & Shr… Rattus Bat coronavirus 512/2005 (1), NULL (1)
## 4 Rodents & Shr… Rhizomys Bat coronavirus 512/2005 (5), Infectious bronchit…
## 5 Rodents & Shr… Sciuridae NULL (1)
## 6 Bats Cynopterus NULL (2)
## 7 Bats Microchirop… Bat coronavirus 512/2005 (216), NULL (126), PREDI…
## 8 Bats Pteropus NULL (59), PREDICT_CoV-17 (3), PREDICT_CoV-35 (1)
and see code for Table 1 above
Details on IBV detection:
H1 %>%
filter(virus_group=="Infectious bronchitis virus (IBV)") %>%
group_by(primary_interface_group,taxa_group,species_scientific_name, site_name, id_certainty) %>%
summarize(ind_count=length(unique(animal_id)))
## # A tibble: 2 x 6
## # Groups: primary_interface_group, taxa_group, species_scientific_name,
## # site_name [2]
## primary_interfac… taxa_group species_scienti… site_name id_certainty ind_count
## <fct> <fct> <fct> <fct> <fct> <int>
## 1 wildlife farm Rodents &… Hystrix brachyu… Farm 25 … field ID ce… 1
## 2 wildlife farm Rodents &… Rhizomys sp. Farm 4 -… field ID ce… 1