# ----------------------------------------- libraries ---------------------------------------------- library(ggplot2) library(magrittr) library(ggpubr) library(plyr) library(dplyr) library(rptR) library(car) library(Matrix) library(lme4) library(stats) library(lmerTest) library(grid) library(gridExtra) library(reshape2) library(cowplot) # ------------------------------------- loading in data -------------------------------------------- remove(list=ls()) # set the working directory setwd("C:/Users/nynke/Documents/South Africa/data_archive/R") # read the sound data # write the name of the file, sep means how the numbers of the data are seperated # header means that the first line of the file are the headers sodata<-read.csv2("Sound_data.csv",sep=";",header=T, fill = T) # ------------------------------------- look at the data ------------------------------------------- names(sodata) # remove columns that you for sure do not need # sound: column 21, 22, 30, 42 # scent: column 20, 21, 29, 41 sodata <- sodata[,c(1:20, 26:30, 32:42, 44:47)] # this shows all the columns I want to keep names(sodata) # rename columns so you can easily combine the data colnames(sodata) colnames(sodata)[2] <- "Individual" colnames(sodata)[3] <- "Test_nr" colnames(sodata)[13] <- "More_20m" colnames(sodata)[14] <- "Between_10_20m" colnames(sodata)[15] <- "Less_10m" colnames(sodata)[20] <- "Min_distance" colnames(sodata)[23] <- "Time_more_20m" colnames(sodata)[24] <- "Time_10_20m" colnames(sodata)[25] <- "Time_less_10m" colnames(sodata)[37] <- "Hes_far" colnames(sodata)[38] <- "Hes_close" # make factors sodata$Area <- as.factor(sodata$Area) sodata$Individual <- as.factor(sodata$Individual) sodata$Test_nr <- as.factor(sodata$Test_nr) sodata$Sex <- as.factor(sodata$Sex) sodata$Partner <- as.factor(sodata$Partner) sodata$Test_sound <- as.factor(sodata$Test_sound) sodata$Test_model <- as.factor(sodata$Test_model) sodata$Test_temp <- as.factor(sodata$Test_temp) sodata$Daily_temp <- as.factor(sodata$Daily_temp) sodata$Date <- as.factor(sodata$Date) # remove the individuals from the dataset with only 1 test. sodata <- sodata[-c(32, 33), ] # ----------------------------------------- Repeatability ------------------------------------------- # respeatability can only be calculated with numeric or integer variables, not factor # do not use model, observer and sound in the code because they have low variables (<6) # Repeatability explained: # R is how good you can recognize an individual when comparing it to another # E.g. you have a group of all white chickens. This give a low R because there is less difference between the chickens # variation between individuals smaller, so R is smaller # in order to calculate the repeatability, make Hes present into 0 and 1 # sodata$Hes_present <- revalue(sodata$Hes_present, c("Y"=1)) # sodata$Hes_present <- revalue(sodata$Hes_present, c("N"=0)) library(rptR) ressound <- rpt(Less_10m ~ (1|Individual), grname = "Individual", data = sodata, datatype = "Binary", nboot = 1000, npermut = 0) print(ressound) # R = 0.039 # SE = 0.082 # CI = [0, 0.286] # P = 0.366 [LRT] # Original-scale approximation: # R = 0.039 # SE = 0.078 # CI = [0, 0.273] # P = 0.366 [LRT] ressound2 <- rpt(Time_less_10m ~ (1|Individual), grname = "Individual", data = sodata, datatype = "Gaussian", nboot = 1000, npermut = 0) print(ressound2) # Repeatability for Individual # R = 0.003 # SE = 0.073 # CI = [0, 0.239] # P = 1 [LRT] ressound3 <- rpt(Hes_present ~ (1|Individual), grname = "Individual", data = sodata, datatype = "Binary", nboot = 1000, npermut = 0) print(ressound3) # Link-scale approximation: # R = 0.03 # SE = 0.078 # CI = [0, 0.27] # P = 0.391 [LRT] # Original-scale approximation: # R = 0.03 # SE = 0.076 # CI = [0, 0.263] # P = 0.391 [LRT] ressound4 <- rpt(Total_hes ~ (1|Individual), grname = "Individual", data = sodata, datatype = "Poisson", nboot = 1000, npermut = 0) print(ressound4) # R individual link-scale = 0.166 # R individual original scale = 0.029 # it is still a little low, but could be due to a lack of individuals # P value is 0.21 --> not significant. Again could be due to the lack of tests # could also be that the response is not an individual trait, but that it could be due to learning, or copying # --------------------------------------- Make basic plots ------------------------------------------ # look at the start distance, where do captive individuals start in comparison to wild # sum of all cheetahs at Ashia that had a <10m start distance # 2 test of 30 started at <10m # Total of 11 individuals ashiadata1 <- sodata[1:30, ] # subset of the ashia data (1 for sound, 2 for scent) length(ashiadata1$Start_distance[which(ashiadata1$Start_distance == "<10"),]) # now look at the start distance for wild individuals # 5 tests of 33 started at <10m # Total of 8 individuals (the 4 cubs seen as one) kuzukodata1 <- sodata[31:64, ] # subset of kuzukodata (1 for sound, 2 for scent) length(kuzukodata1$Start_distance[which(kuzukodata1$Start_distance == "<10"),]) # In how many tests does an individual walk at <10m from the set-up # 18 tests in <10m of which 2 started at <10 ASHIA # 5 tests in <10m of which 1 started at <10 KUZUKO plot(sodata$Area, sodata$Less_10m) # captive individuals walk more often towards the set-up # meaning that they mostly actively walked towards the set-up since they did not start there # Test for significance. # --------------------- Are captive individuals present in <10m more often -------------------------- glm1=glmer(sodata$Less_10m~sodata$Area+(1|sodata$Individual), family = binomial(link = "logit")) summary(glm1) # P = 0.0004 *** # highly SIGNIFICANT drop1(glm1) # make a graph # do it in percentages to correct for the amount of individuals per area ! ####### ashia: 30 tests, 12 N, 18 Y --> 40% N, 60% Y ####### kuzuko: 34 tests, 29 N, 5 Y --> 85% N, 15% Y # create dataset N <- c(40,85) Yes <- c(60,15) Area <- c("Captive", "Wild") df10 <- data.frame(Yes, Area) df10_2 <- melt(df10, id.vars='Area') head(df10_2) # now you see it made 3 columns # graph it grob <- grobTree(textGrob("P < 0.001", x=0.60, y=0.85, hjust=0, gp=gpar(col="black", fontsize=20, fontface="italic"))) # to add the p value as a text in the graph p1<-ggplot(df10_2, aes(x=Area, y=value, fill=Area)) + ylim(0,70) + geom_bar(position = "dodge", stat='identity') + # the y axis is a % of all the times <10 happens within an area # geom_text(aes(label=value), vjust=1.6, color="black", # position = position_dodge(0.9), size=4.5) + scale_fill_manual(values = c("gray21","gray70")) + labs(title="Individual presence at <10m", x="Area", y = "Individuals present at <10m (%)")+ # annotation_custom(grob) + # p value theme(plot.title = element_text(hjust = 0.5, size = 30)) + theme(axis.text = element_text(size=20)) + theme(axis.title = element_text(size=25)) p1 # ---------------- Do captive individuals also spent more time close to the set up? ------------------ # Make another dataset with only the animals/observations that came <10m # in this dataset you look if the area influenced HOW LONG a cheetah stayed within 10m sodataY=subset(sodata, Less_10m=="1") Time10M_lmer=lmer(Time_less_10m~Area+(1|Individual),data=sodataY) resT10M=residuals(Time10M_lmer) shapiro.test(resT10M) # P = 0.006 # so as predicted not normally distributed at all hist(resT10M) # does not look very normal... qqnorm(resT10M) qqline(resT10M) # also does not look normal # log transform the data # This means that a short time spend <10m "counts stronger" than a long time sodataY$logTime_less_10m = log(sodataY$Time_less_10m+1) Time10M_lmer=lmer(sodataY$logTime_less_10m~Area+(1|Individual),data=sodataY) resT10M=residuals(Time10M_lmer) shapiro.test(resT10M) # P = 0.52 # so now its normally distributed summary(Time10M_lmer) # t value = -0.587 # there is a negative correlation between ashia and kuzuko # Kuzuko has a lower time spend <10m # however, P = 0.568, so NOT SIGNIFICANT # can be explained that when an individual does not know being close is dangerous # it also does not care how long it spends there # graph it grob2 <- grobTree(textGrob("P = 0.568", x=0.4, y=0.85, hjust=0, gp=gpar(col="black", fontsize=20, fontface="italic"))) # to add the p value as a text in the graph box10<-plyr::ddply(sodataY,c("Area"), summarise, N=length(Time_less_10m),mean_div=mean(Time_less_10m,na.rm=T),sd_div=sd(Time_less_10m,na.rm = T),se_div=sd_div/sqrt(N)) box10 p2<-ggplot(box10, aes(x=Area, y=mean_div, fill=Area))+ geom_bar(stat = "identity", position = "dodge")+ geom_errorbar(aes(ymin=mean_div-se_div, ymax=mean_div+se_div), width = 0.2, position=position_dodge(.9))+ scale_fill_manual(values = c("Gray21","Gray70")) + labs(title = "Time spent at <10m per area", x="Area", y="Mean time spent <10m (±SE)")+ # annotation_custom(grob2) + # p value theme(plot.title = element_text(hjust = 0.5, size = 30)) + theme(axis.text = element_text(size=20)) + theme(axis.title = element_text(size=25)) p2 # combine p1 (present at <10m) with p2 (time <10m) in one grid plot_grid(p1, p2, labels = c("A", "B"), ncol = 2, nrow = 1) # --------------------------------Are captive individuals more aggressive towards the set-up? ------------------------ p<-ggplot(data=sodata, aes(x=Area, y=Time_aggr, fill=Area)) + geom_bar(stat = "identity") + scale_fill_manual(values = c("Saddlebrown","Orange1")) p+labs(title="Time spend being aggressive per area", x="Area", y = "Time spend being aggressive")+ theme(plot.title = element_text(hjust = 0.5, size = 20)) + theme(axis.text = element_text(size=12)) + theme(axis.title = element_text(size=16)) # yes but since there are 0 individuals aggressive at Kuzuko # we cannot perform a P value test on this # Do captive individuals hesitate more towards the set up? ggplot(data=sodata, aes(x=Area, y=Hes_present)) + geom_bar(stat = "identity") # shows barplot of the total amount of tests where hesitation was present # captive are more hesitant, they walk closer to the set-up more often # check if this is significant # -------------------- Is hesitation more often present in captive individuals? -------------------- glm2=glmer(sodata$Hes_present~sodata$Area+(1|sodata$Individual), family = binomial(link = "logit")) summary(glm2) # highly significant # P = 0.004 # make a graph # this also has to be in percentages to control for the amount of tests done per area ##### ashia: 30 tests, 11 N, 19 Y --> 37%N, 63%Y ##### kuzuko: 34 tests, 28 N, 6 Y --> 82%N, 18%Y # create dataset N <- c(37,82) Yes <- c(63,18) Area <- c("Captive", "Wild") dfh <- data.frame(Yes, Area) dfh2 <- melt(dfh, id.vars='Area') head(dfh2) # graph it grob3 <- grobTree(textGrob("P = 0.006", x=0.6, y=0.85, hjust=0, gp=gpar(col="black", fontsize=20, fontface="italic"))) # to add the p value as a text in the graph p3<-ggplot(dfh2, aes(x=Area, y=value, fill=Area)) + ylim(0,70) + geom_bar(position = "dodge", stat='identity') + # geom_text(aes(label=value), vjust=1.6, color="black", # position = position_dodge(0.9), size=4.5) + scale_fill_manual(values = c("Gray21","Gray70")) + labs(title="Hesitation presence per area", x="Area", y = "Individuals hesitating (%)")+ # annotation_custom(grob3) + # p value theme(plot.title = element_text(hjust = 0.5, size = 30)) + theme(axis.text = element_text(size=20)) + theme(axis.title = element_text(size=25)) p3 # --------------------- Do captive individuals hesitate more in totality? -------------------------- glm4=glmer(sodata$Total_hes~sodata$Area+(1|sodata$Individual), family = poisson) summary(glm4) # highly significant # P < 0.0001 #graph it grob4 <- grobTree(textGrob("P < 0.001", x=0.6, y=0.85, hjust=0, gp=gpar(col="black", fontsize=20, fontface="italic"))) # to add the p value as a text in the graph boxhes<-plyr::ddply(sodata,c("Area"), summarise, N=length(Total_hes),mean_div=mean(Total_hes,na.rm=T),sd_div=sd(Total_hes,na.rm = T),se_div=sd_div/sqrt(N)) boxhes p4<-ggplot(boxhes, aes(x=Area, y=mean_div, fill=Area))+ geom_bar(stat = "identity", position = "dodge")+ geom_errorbar(aes(ymin=mean_div-se_div, ymax=mean_div+se_div), width = 0.2, position=position_dodge(.9))+ scale_fill_manual(values = c("Gray21","Gray70")) + labs(title = "Total hesitation per area", x="Area", y="Mean hesitation present (±SE)")+ # annotation_custom(grob4) + # p value theme(plot.title = element_text(hjust = 0.5, size = 30)) + theme(axis.text = element_text(size=20)) + theme(axis.title = element_text(size=25)) p4 # combine p3 (hes present) with p4 (total hes) in one grid plot_grid(p3, p4, labels = c("A", "B"), ncol = 2, nrow = 1) # combine all plot_grid(p1, p2, p3, p4, labels = c("A", "B", "C", "D"), ncol = 2, nrow = 2) # ----------------------------------------------- Do they learn ----------------------------------------------------------- # check for the presence at <10m # use the subset sodataY to see if the amount of "Y" decreases per test # I made a small data set to help with this smalldata1<-read.csv2("sound_small.csv",sep=";",header=T, fill = T) smalldata1$Test_nr <- as.factor(smalldata1$Test_nr) # check with glmer per area # ashia ashia1<-subset(sodata, Area=="Captive") glmL1=glmer(ashia1$Less_10m~ashia1$Test_nr+ashia1$Partner+(1|ashia1$Individual), family = binomial(link = "logit")) summary(glmL1) #kuzuko kuzuko1<-subset(sodata, Area=="Wild") glmL2=glmer(kuzuko1$Less_10m~kuzuko1$Test_nr+kuzuko1$Partner+(1|kuzuko1$Individual), family = binomial(link = "logit")) summary(glmL2) # nothing significant l1<-ggplot(data=smalldata1, aes(x=Area, y=Per_less, fill=Test_nr)) + ylim(0,90) + geom_bar(stat="identity", position=position_dodge())+ # geom_text(aes(label=Test_nr), vjust=1.5, color="black", # position = position_dodge(0.9), size=3.5)+ scale_fill_brewer(palette="Greys")+ theme_minimal() + labs(title="Presence at <10m", x="Test", y = "Proportion at <10m (%)")+ theme(plot.title = element_text(hjust = 0.5, size = 25)) + theme(axis.text = element_text(size=15)) + theme(axis.title = element_text(size=20)) l1 # time spent <10m # check again with lmer per area # ashia ashia2<-subset(sodataY, Area=="Captive") Time=lmer(Time_less_10m~Test_nr+(1|Individual),data=ashia2) res=residuals(Time) shapiro.test(res) # quite okay summary(Time) #kuzuko kuzuko2<-subset(sodataY, Area=="Wild") Time2=lmer(Time_less_10m~Test_nr+(1|Individual),data=kuzuko2) # too little observations soy<-read.csv2("SOdataY2.csv",sep=";",header=T, fill = T) soy$Test. <- as.factor(soy$Test.) boxtimec<-plyr::ddply(soy,c("Area","Test."), summarise, N=length(Time_.10),mean_div=mean(Time_.10,na.rm=T),sd_div=sd(Time_.10,na.rm = T),se_div=sd_div/sqrt(N)) boxtimec l2<-ggplot(boxtimec, aes(x=Area, y=mean_div, fill=Test.))+ ylim(0,900) + geom_bar(stat = "identity", position = "dodge")+geom_errorbar(aes(ymin=mean_div-se_div, ymax=mean_div+se_div), width = 0.2, position=position_dodge(.9))+ labs(x="Tests per area", y="Time spent <10m") + theme_classic(base_size=14) + labs(title = "Time spent <10m", x="Test", y="Mean time spent <10m (± SE)")+ scale_fill_brewer(palette="Greys") + theme_minimal() + theme(plot.title = element_text(hjust = 0.5, size = 25)) + theme(axis.text = element_text(size=15)) + theme(axis.title = element_text(size=20)) l2 # at ashia there seems to be a small learning trend # at kuzuko not at all # this could be because at ashia they can easier copy and eavesdrop # nothing differs significantly # combine p5 (learning plot presence <10m) with p6 (time <10m) plot_grid(l1, l2, labels = c("A", "B"), ncol = 1, nrow = 2) # hesitation presence # check again with glmer per area # ashia glmL3=glmer(ashia1$Hes_present~ashia1$Test_nr+ashia1$Partner+(1|ashia1$Individual), family = binomial(link = "logit")) summary(glmL3) #kuzuko glmL4=glmer(kuzuko1$Hes_present~kuzuko1$Test_nr+kuzuko1$Partner+(1|kuzuko1$Individual), family = binomial(link = "logit")) summary(glmL4) # nothing significant l3<-ggplot(data=smalldata1, aes(x=Area, y=Per_hes, fill=Test_nr)) + ylim(0,90) + geom_bar(stat="identity", position=position_dodge())+ # geom_text(aes(label=Per_hes), vjust=1.6, color="black", # position = position_dodge(0.9), size=3.5)+ scale_fill_brewer(palette="Greys")+ theme_minimal() + labs(title="Hesitation present", x="Test", y = "Proportion of hesitation (%)")+ theme(plot.title = element_text(hjust = 0.5, size = 25)) + theme(axis.text = element_text(size=15)) + theme(axis.title = element_text(size=20)) l3 # total hesitation # check again with glmer per area # ashia glmL5=glmer(ashia1$Total_hes~ashia1$Test_nr+ashia1$Partner+(1|ashia1$Individual), family = poisson) summary(glmL5) # test 3 differs significantly #kuzuko glmL6=glmer(kuzuko1$Total_hes~kuzuko1$Test_nr+kuzuko1$Partner+(1|kuzuko1$Individual), family = poisson) summary(glmL6) # nothing significant boxhes<-plyr::ddply(sodata,c("Area","Test_nr"), summarise, N=length(Total_hes),mean_div=mean(Total_hes,na.rm=T),sd_div=sd(Total_hes,na.rm = T),se_div=sd_div/sqrt(N)) boxhes l4<-ggplot(boxhes, aes(x=Area, y=mean_div, fill=Test_nr))+ ylim(0,9) + geom_bar(stat = "identity", position = "dodge")+geom_errorbar(aes(ymin=mean_div-se_div, ymax=mean_div+se_div), width = 0.2, position=position_dodge(.9))+ labs(x="Tests per area", y="Total hesitation present") + theme_classic(base_size=14) + labs(title = "Total hesitation", x="Test", y="Mean amount of hesitation (± SE)")+ scale_fill_brewer(palette="Greys") + theme_minimal() + theme(plot.title = element_text(hjust = 0.5, size = 25)) + theme(axis.text = element_text(size=15)) + theme(axis.title = element_text(size=20)) l4 # a de-learning curve at ashia ?? # Nothing differs significantly # combine p7 (hes present) and p8 (total hes) in one grid plot_grid(l3, l4, labels = c("A", "B"), ncol = 1, nrow = 2) plot_grid(l1, l2, l3, l4, labels = c("A", "B", "C", "D"), ncol = 2, nrow = 2) # -------------------------- Difference between control and lion sound ----------------------------- # presence at <10m and time <10m # only do this for ashia, because: # safety issues --> could not start tests at <10m at kuzuko # control does not give movement and lion chases them away ssdata<-read.csv2("control_ssmall.csv",sep=";",header=T, fill = T) # check again with glmer per area # ashia glm5=glmer(sodata$Less_10m~sodata$Test_sound+sodata$Area+(1|sodata$Individual), family = binomial(link = "logit")) summary(glm5) # no significant treatment effect on <10m # graph it # use ssdata because it has the percentages (proportion) p5<-ggplot(data=ssdata, aes(x=Area, y=Per_less, fill=Test_sound)) + ylim(0,70) + geom_bar(stat="identity", position=position_dodge())+ # geom_text(aes(label=Per_less), vjust=1.6, color="black", # position = position_dodge(0.9), size=3.5)+ scale_fill_manual(values = c("Gray21","Gray70")) + theme_minimal() + labs(title="Presence at <10m", x="Area", y = "Proportion at <10m (%)")+ theme(plot.title = element_text(hjust = 0.5, size = 25)) + theme(axis.text = element_text(size=15)) + theme(axis.title = element_text(size=20)) p5 # time spent <10m sodataY=subset(sodata, Less_10m=="1") # ashia ashia2<-subset(sodataY, Area=="Captive") Time=lmer(Time_less_10m~Test_sound+Area+(1|Individual),data=sodataY) res=residuals(Time) shapiro.test(res) # quite okay summary(Time) # nothing significant kuzuko2<-subset(sodataY, Area=="Wild") Time2=lmer(Time_less_10m~Test_sound+Area+(1|Individual),data=sodataY) res2=residuals(Time2) shapiro.test(res2) # quite okay summary(Time2) # nothing significant # graph it boxcont<-plyr::ddply(sodataY,c("Area","Test_sound"), summarise, N=length(Time_less_10m),mean_div=mean(Time_less_10m,na.rm=T),sd_div=sd(Time_less_10m,na.rm = T),se_div=sd_div/sqrt(N)) boxcont p6<-ggplot(boxcont, aes(x=Area, y=mean_div, fill=Test_sound))+ geom_bar(stat = "identity", position = "dodge")+geom_errorbar(aes(ymin=mean_div-se_div, ymax=mean_div+se_div), width = 0.2, position=position_dodge(.9))+ labs(x="Area", y="Mean time spent <10m (± SE)") + theme_minimal() + labs(title = "Time spent <10m")+ scale_fill_manual(values = c("Gray21","Gray70")) + theme(plot.title = element_text(hjust = 0.5, size = 25)) + theme(axis.text = element_text(size=15)) + theme(axis.title = element_text(size=20)) p6 # at kuzuko no errorbar because its only 1 individual # this also explains the huge bar difference # no significant difference between control and lion sound # at ashia this is logical because they dont know the danger # at kuzuko this is because i could not start ther test at <10m (except for once) # so with lion sound they will walk away and dont come closer # and with control they dont care and stay where they are, which is not within 10m plot_grid(p5, p6, labels = c("A", "B"), ncol = 1, nrow = 2) # nothing is significantly different # hesitation glm7=glmer(sodata$Hes_present~sodata$Test_sound+sodata$Area+(1|sodata$Individual), family = binomial(link = "logit")) summary(glm7) # no significant treatment differences # graph it # again use ssdata to get percentages p7<-ggplot(data=ssdata, aes(x=Area, y=Per_hes, fill=Test_sound)) + ylim(0,70) + geom_bar(stat="identity", position=position_dodge())+ # geom_text(aes(label=Per_hes), vjust=1.6, color="black", # position = position_dodge(0.9), size=3.5)+ scale_fill_manual(values = c("Gray21","Gray70")) + theme_minimal() + labs(title="Presence of hesitation", x="Area", y = "Proportion hesitation (%)")+ theme(plot.title = element_text(hjust = 0.5, size = 25)) + theme(axis.text = element_text(size=15)) + theme(axis.title = element_text(size=20)) p7 # total hesitation glm8=glmer(sodata$Total_hes~sodata$Test_sound+sodata$Area+(1|sodata$Individual), family = poisson) summary(glm8) # significant difference between the areas # significant difference between treatments # make model and graph to check which area(s) differ in treatment captive<-subset(sodata, Area=="Captive") glm9=glmer(captive$Total_hes~captive$Test_sound+(1|captive$Individual), family = poisson) summary(glm9) # P = 0.0021 # the treatment diference is within ashia # individuals hesitate more often when lion sound is played in captivity # maladaptive! # graph it boxconth<-plyr::ddply(sodata,c("Area","Test_sound"), summarise, N=length(Total_hes),mean_div=mean(Total_hes,na.rm=T),sd_div=sd(Total_hes,na.rm = T),se_div=sd_div/sqrt(N)) boxconth p8<-ggplot(boxconth, aes(x=Area, y=mean_div, fill=Test_sound))+ geom_bar(stat = "identity", position = "dodge")+geom_errorbar(aes(ymin=mean_div-se_div, ymax=mean_div+se_div), width = 0.2, position=position_dodge(.9))+ labs(x="Area", y="Mean amount of hesitation (± SE)") + theme_classic(base_size=14) + labs(title = "Total hesitation")+ scale_fill_manual(values = c("Gray21","Gray70")) + theme_minimal() + theme(plot.title = element_text(hjust = 0.5, size = 25)) + theme(axis.text = element_text(size=15)) + theme(axis.title = element_text(size=20)) p8 plot_grid(c3, c4, labels = c("A", "B"), ncol = 1, nrow = 2) # plot all 4 together plot_grid(p5, p6, p7, p8, labels = c("A", "B", "C", "D"), ncol = 2, nrow = 2) # ------------------------------------------------------------------------------ # in order to check if individuals fled or stayed at control or lion sound # check all the times an individual came >20m while also be within 20m # made a dataset with all the individuals that come both <20m and >20m in one test cdata<-read.csv2("Control_lion_T.csv", sep = ";", header = T, fill = T) # file sodata but with a column end-start # check for normality in the new column hist(cdata$End_Start) qqnorm(cdata$End_Start) qqline(cdata$End_Start) shapiro.test(cdata$End_Start) # not normally distributed # transform the data to all + so you can use poisson # since this can end in negative numbers, I added an extra column (E_S_T) # this column has the lowest number of end-start to 0, and added that number to all the other to make everything positive # check for significance (non parametric) ashiadata1.1<-subset(cdata, Area=="Ashia") ashiadata1.2<-subset(ashiadata1.1, Test_sound=="Control") ashiadata1.3<-subset(ashiadata1.1, Test_sound=="Lion") wilcox.test(ashiadata1.2$Start_distance, ashiadata1.2$Distance_end) # P = 0.025 # so just a significant difference between control start and end wilcox.test(ashiadata1.3$Start_distance, ashiadata1.3$Distance_end) # P = 0.058 # just not significant difference between start and end lion kuzukodata1.1<-subset(cdata, Area=="Kuzuko") kuzukodata1.2<-subset(kuzukodata1.1, Test_sound=="Control") kuzukodata1.3<-subset(kuzukodata1.1, Test_sound=="Lion") wilcox.test(kuzukodata1.2$Start_distance, kuzukodata1.2$Distance_end) # P = 0.43 # so no significant difference between control start and end wilcox.test(kuzukodata1.3$Start_distance, kuzukodata1.3$Distance_end) # P = 0.018 # A significant difference between start and end lion # check again but parametric (glmer etc) cdata$Test_sound <- as.factor(cdata$Test_sound) cdata$Individual <- as.factor(cdata$Individual) # remove the rows with NA in them cdata <- cdata[-c(67, 68, 69), ] # check for ashia ashiadata1.1<-subset(cdata, Area=="Ashia") glm10=glmer(ashiadata1.1$E_S_T~ashiadata1.1$Test_sound+(1|ashiadata1.1$Individual), family = poisson) summary(glm10) # no within individual effect of treatment within the captive area # Thus when comparing the means of all individuals seperately within the captive area, # no significant difference between control and lion sound is present # check for kuzuko kuzukodata1.1<-subset(cdata, Area=="Kuzuko") glm11=glmer(kuzukodata1.1$E_S_T~kuzukodata1.1$Test_sound+(1|kuzukodata1.1$Individual), family = poisson) summary(glm11) # a significant difference of treatment within the wild population is present # P = 0.0069 where wild individuals end significantly further away when lion sound is played in comparison to control # make graph of both areas seperately to later grid them together # for each area look at the start and end distance # see if there is a difference between control and lion treatment # ashia Start <- c(19.6, 18.93) End <- c(15.94, 16.43) Treatment <- c("Control", "Lion") dfa <- data.frame(Start, End, Treatment) dfa_2 <- melt(dfa, id.vars='Treatment') head(dfa_2) # now you see it made 3 columns groba <- grobTree(textGrob("P = 0.025", x=0.3, y=0.95, hjust=0, gp=gpar(col="black", fontsize=16, fontface="italic"))) # to add the p value as a text in the graph p10<-ggplot(dfa_2, aes(x=variable, y=value, fill=Treatment)) + geom_bar(position = "dodge", stat='identity') + # the y axis is a % of all the times <10 happens within an area # geom_text(aes(label=value), vjust=1.6, color="black", # position = position_dodge(0.9), size=6) + scale_fill_manual(values = c("Gray21", "Gray70")) + labs(title="Captive", subtitle = "Start and end distance", x="Start/end distance per treatment", y = "Distance (m)")+ expand_limits(y=c(0, 23)) + # annotation_custom(groba) + theme(plot.title = element_text(hjust = 0.5, size = 25)) + theme(plot.subtitle = element_text(hjust = 0.5, size = 20)) + theme(axis.text = element_text(size=15)) + theme(axis.title = element_text(size=20)) p10 # kuzuko Start <- c(16.25, 16.25) End <- c(15, 18.75) Treatment <- c("Control", "Lion") dfk <- data.frame(Start, End, Treatment) dfk_2 <- melt(dfk, id.vars='Treatment') head(dfk_2) # now you see it made 3 columns grobk <- grobTree(textGrob("P = 0.018", x=0.4, y=0.95, hjust=0, gp=gpar(col="black", fontsize=16, fontface="italic"))) # to add the p value as a text in the graph p11<-ggplot(dfk_2, aes(x=variable, y=value, fill=Treatment)) + geom_bar(position = "dodge", stat='identity') + # the y axis is a % of all the times <10 happens within an area # geom_text(aes(label=value), vjust=1.6, color="black", # position = position_dodge(0.9), size=6) + scale_fill_manual(values = c("Gray21", "Gray70")) + labs(title="Wild", subtitle = "Start and end distance", x="Start/end distance per treatment", y = "Distance (m)")+ expand_limits(y=c(0, 23)) + # annotation_custom(grobk) + theme(plot.title = element_text(hjust = 0.5, size = 25)) + theme(plot.subtitle = element_text(hjust = 0.5, size = 20)) + theme(axis.text = element_text(size=15)) + theme(axis.title = element_text(size=20)) p11 plot_grid(pa, pk, labels = c("A", "B"), ncol = 2, nrow = 1) # ------------------------------------ make graph with end-start distance ---------------------------- # make a graph and immediately add the SE's attach(cdata) names(cdata) DatA2=droplevels(subset(cdata, Area=="Ashia")) # ashia # calculate the means for the captive population TempStart2=split(DatA2$End_Start,DatA2$Test_sound) MeanStart2=sapply(TempStart2, mean); MeanStart2 # op dezelfde manier kun SD en sampesizes uitrekenen, en daarmee kan je SE uitrekenen SDStart2=sapply(TempStart2, sd) nStart2 = sapply(TempStart2, length) #SE=SD/Sqrt(n) SEStart2=SDStart2/sqrt(nStart2) # kuzuko # calculate the means for the wild population DatK2=droplevels(subset(cdata, Area=="Kuzuko")) TempStartK2=split(DatK2$End_Start,DatK2$Test_sound) MeanStartK2=sapply(TempStartK2, mean); MeanStartK2 # op dezelfde manier kun SD en sampesizes uitrekenen, en daarmee kan je SE uitrekenen SDStartK2=sapply(TempStartK2, sd) nStartK2 = sapply(TempStartK2, length) #SE=SD/Sqrt(n) SEStartK2=SDStartK2/sqrt(nStartK2) Treatment2=c("Control","Lion", "Control", "Lion") Area = c("Captive", "Captive", "Wild", "Wild") # Put all numbers of captive and wild in a column Means2=c(MeanStart2,MeanStartK2) SDs2=c(SDStart2,SDStartK2) SEs2=c(SEStart2,SEStartK2) Ns2=c(nStart2,nStartK2) # Make a complete dataframe of both populations dfa_5=data.frame(Area,Treatment2,Means2,SDs2,SEs2,Ns2) dfa_5 # change the order of the y-axis Area1<-factor(dfa_5$Area, levels=c("Wild", "Captive")) # put the SE in this graph p12 <- ggplot(dfa_5, aes(x = Area1, y = Means2, fill = Treatment2)) + geom_bar(stat = "identity", position = "dodge") + geom_errorbar(aes(ymin=Means2-SEs2, ymax=Means2+SEs2), width = 0.2, position=position_dodge(.9))+ scale_fill_manual(values = c("Gray21", "Gray70")) + labs(title="Control vs lion sound", subtitle = "End - start distance", x="", y = "Difference in distance (m) (±SE)")+ theme(plot.title = element_text(hjust = 0.5, size = 25)) + theme(plot.subtitle = element_text(hjust = 0.5, size = 20)) + theme(axis.text = element_text(size=20)) + theme(axis.title = element_text(size=22)) p12 + coord_flip()