# R script analyses for Mesoudi, Magid & Hussain (submitted) How do people become WEIRD? Migration reveals the cultural transmission mechanisms underlying variation in psychological processes # The following contains (i) data input and preparation (ii) demographic analysis and (iii) a series of analyses for each of the measures (individualism, collectivism, etc.). Three lines separate each analysis. The analyses (ii) and (iii) can be run independently, but the data preparation (i) must be run before any of them. # LOAD LIBRARIES----------------------------- library(ggplot2) library(multcomp) library(reshape2) library(MuMIn) library(car) library(pastecs) library(QuantPsyc) library(ordinal) library(grid) library(stringr) library(MASS) # LOAD DATA FILE ---------------------------------------------------------- #ensure S2_File_bengaliData.csv is in the working directory, or add the path below bengaliData = read.csv("S2_File_bengaliData.csv", header=TRUE) # DATA PREPARATION -------------------------------------------------------- #run all this before any further analyses # Remove implausible outliers ----------------------------------------------------------- #remove implausible outliers with >40 family interaction and >30 family contact bengaliData$family_contact <- ifelse(bengaliData$family_contact >= 30, NA, bengaliData$family_contact) bengaliData$family_interaction <- ifelse(bengaliData$family_interaction >= 40, NA, bengaliData$family_interaction) #remove implausible outliers where hours spent on internet or watching TV per day exceeds 20 bengaliData$media_internet <- ifelse(bengaliData$media_internet >= 20, NA, bengaliData$media_internet) bengaliData$media_tv <- ifelse(bengaliData$media_tv >= 20, NA, bengaliData$media_tv) #remove implausible outlier where hours with family exceeds 600 bengaliData$tot_relhours <- ifelse(bengaliData$tot_relhours >= 400, NA, bengaliData$tot_relhours) #get rid of whitespace around religion bengaliData$religion <- str_trim(bengaliData$religion) # create cultural groups from country born -------------------------------- #recode variables into Bangladeshi, UK or NA, retaining the ORIGINALs bengaliData$country_bornORIGINAL <- bengaliData$country_born bengaliData$country_born <- car::recode(bengaliData$country_bornORIGINAL, "'Bangladesh'='Bangladesh';'England'='UK';'UK'='UK';'england'='UK';'Engand'='UK';'Ireland'='UK';'Scotland'='UK';'scotland'='UK';'bangladesh'='Bangladesh';'wales'='UK';'ireland'='UK';'bangaldesh'='Bangladesh';'uk'='UK';'britain'='UK';'uki'='UK';'bengladesh'='Bangladesh';'dhaka bangladesh'='Bangladesh';else=NA") bengaliData$country_born <- factor(bengaliData$country_born, levels=c("UK","Bangladesh")) bengaliData$father_countrybornORIGINAL <- bengaliData$father_countryborn bengaliData$father_countryborn <- car::recode(bengaliData$father_countrybornORIGINAL, "'Bangladesh'='Bangladesh';'England'='UK';'england'='UK';'Engand'='UK';'Ireland'='UK';'Scotland'='UK';'scotland'='UK';'bangladesh'='Bangladesh';'wales'='UK';'ireland'='UK';'bangaldesh'='Bangladesh';'pangsha bangladesh'='Bangladesh';'dhaka bangladesh'='Bangladesh';'uk'='UK';'britain'='UK';'uki'='UK';'bengladesh'='Bangladesh';else=NA") bengaliData$mother_countrybornORIGINAL <- bengaliData$mother_countryborn bengaliData$mother_countryborn <- car::recode(bengaliData$mother_countrybornORIGINAL, "'Bangladesh'='Bangladesh';'England'='UK';'england'='UK';'Engand'='UK';'Ireland'='UK';'Scotland'='UK';'scotland'='UK';'bangladesh'='Bangladesh';'wales'='UK';'ireland'='UK';'bangaldesh'='Bangladesh';'bangadesh'='Bangladesh';'rajbari bangladesh'='Bangladesh';'dhaka bangladesh'='Bangladesh';'uk'='UK';'britain'='UK';'uki'='UK';'bengladesh'='Bangladesh';else=NA") #some code to create cultural_group variable from country_born, mother_born and father_born #0=non-migrant (non-migrant), 1=2ndgen, 2=1stgen, NA=other bengaliData$cultural_group = 0 for (i in 1:330) { if (is.na(bengaliData$country_born[i]) == FALSE && is.na(bengaliData$mother_countryborn[i]) == FALSE && is.na(bengaliData$father_countryborn[i]) == FALSE) { if (bengaliData$country_born[i] == "UK" && bengaliData$mother_countryborn[i] == "UK" && bengaliData$father_countryborn[i] == "UK") { bengaliData$cultural_group[i] <- 0 } else { if (bengaliData$country_born[i] == "Bangladesh" && bengaliData$mother_countryborn[i] == "Bangladesh" && bengaliData$father_countryborn[i] == "Bangladesh") { bengaliData$cultural_group[i] <- 2 } else { if (bengaliData$country_born[i] == "UK" && bengaliData$mother_countryborn[i] == "Bangladesh" && bengaliData$father_countryborn[i] == "Bangladesh") { bengaliData$cultural_group[i] <- 1 } } } } else { bengaliData$cultural_group[i] <- NA } } # move 5 1stgen Ps who were born in Bangladesh but migrated before the age of 14, and set their country of birth to UK for (i in 1:330) { if (is.na(bengaliData$cultural_group[i]) == FALSE) { if (is.na(bengaliData$maxagemig[i]) == FALSE) { if (bengaliData$maxagemig[i] < 14 && bengaliData$cultural_group[i] == 2) { bengaliData$cultural_group[i] <- 1 bengaliData$country_born[i] <- "UK" } } } } rm(i) #turn into a factor bengaliData$cultural_group = factor(bengaliData$cultural_group, levels = c(0,1,2), labels = c("Non-migrant","2nd gen","1st gen")) #also create parents born variable (which will now be the same for mother and father, for participants who have a valid cultural group (both UK or both Bangladeshi)) bengaliData$parents_born <- factor(bengaliData$father_countryborn, levels = c("UK","Bangladesh")) #calculate part 1 ind-col measures------------- bengaliData$individualism <- ((bengaliData$s1_q3 + bengaliData$s1_q4 + bengaliData$s1_q9 + bengaliData$s1_q10 + bengaliData$s1_q11 + bengaliData$s1_q13 + bengaliData$s1_q15 + bengaliData$s1_q16)/8) bengaliData$individualism <- 8-bengaliData$individualism bengaliData$collectivism <- ((bengaliData$s1_q1 + bengaliData$s1_q2 + bengaliData$s1_q5 + bengaliData$s1_q6 + bengaliData$s1_q7 + bengaliData$s1_q8 + bengaliData$s1_q12 + bengaliData$s1_q14)/8) bengaliData$collectivism <- 8-bengaliData$collectivism # #horizontal and vertical, although not used in analysis # bengaliData$horizontal <- ((bengaliData$s1_q5 + bengaliData$s1_q6 + bengaliData$s1_q7 + bengaliData$s1_q10 + bengaliData$s1_q13 + bengaliData$s1_q14 + bengaliData$s1_q15 + bengaliData$s1_q16)/8) # bengaliData$horizontal <- 8-bengaliData$horizontal # # bengaliData$vertical <- ((bengaliData$s1_q1 + bengaliData$s1_q2 + bengaliData$s1_q3 + bengaliData$s1_q4 + bengaliData$s1_q8 + bengaliData$s1_q9 + bengaliData$s1_q11 + bengaliData$s1_q12)/8) # bengaliData$vertical <- 8-bengaliData$vertical #calculate part 2 measure of closeness--------------- bengaliData$closeness <- bengaliData$s2_q2_closeness #calculate part 3 measure of self-serving bias---------- #(NB values below 50 = more self-serving, i.e. fewer people better than me, higher than 50 = less self-serving, i.e. more people better than me; 50 = accurate) bengaliData$selfserving <- (bengaliData$s3_q1 + bengaliData$s3_q2 + bengaliData$s3_q3 + bengaliData$s3_q4 + bengaliData$s3_q5 + bengaliData$s3_q6 + bengaliData$s3_q7 + bengaliData$s3_q8 + bengaliData$s3_q9 + bengaliData$s3_q10)/10 #calculate part 4 measure of categorisation---------- #create function to count number of 'Holistic's and 'Analytic's countHolistic <- function(x) sum(x=="Holistic", na.rm=TRUE) countAnalytic <- function(x) sum(x=="Analytic", na.rm=TRUE) #create new variables with number of each in them bengaliData$holistic_categorisation <- apply(bengaliData[c("s4_q1","s4_q2","s4_q3","s4_q4","s4_q5","s4_q6","s4_q7","s4_q8","s4_q9","s4_q10")], 1, countHolistic) bengaliData$analytic_categorisation <- apply(bengaliData[c("s4_q1","s4_q2","s4_q3","s4_q4","s4_q5","s4_q6","s4_q7","s4_q8","s4_q9","s4_q10")], 1, countAnalytic) #only need a single variable as they are mirrors of each other. Create proportions to avoid messing up due to missing or 'neither' values (some may be less than 10) bengaliData$holistic_categorisation <- bengaliData$holistic_categorisation / (bengaliData$holistic_categorisation + bengaliData$analytic_categorisation) rm(countAnalytic) rm(countHolistic) #calculate part 5 measure of attribution----------- bengaliData$dispositional_attribution <- 8-(bengaliData$s5_q1a + bengaliData$s5_q1c + bengaliData$s5_q2b + bengaliData$s5_q2c)/4 bengaliData$situational_attribution <- 8-(bengaliData$s5_q1b + bengaliData$s5_q1d + bengaliData$s5_q2a + bengaliData$s5_q2d)/4 #calculate part 6 measure of drawing style-------------- bengaliData$horizon_ratio <- bengaliData$s6_horizon_height / bengaliData$s6_boxheight bengaliData$additional_objects <- bengaliData$s6_additional_items #---recording these demographic vars here, as reminders------- bengaliData$age <- bengaliData$age bengaliData$sex <- bengaliData$sex bengaliData$years_education <- bengaliData$years_education bengaliData$family_interaction <- bengaliData$family_interaction bengaliData$family_contact <- bengaliData$family_contact bengaliData$media_tv <- bengaliData$media_tv bengaliData$media_internet <- bengaliData$media_internet bengaliData$media_magazine <- 5-bengaliData$media_magazine #reverse newspaper and magazine so that 4=every day, 1= secondary, p=0.034). Cultural group now also sig (1st gen higher than non-migrant) summary(educultureModelInd <- lm(individualism_log ~ cultural_group + years_education + occupation, data = bengaliData, subset=(individualism_log != "NA"))) #remove years education, occupation is still sig (graduates > secondary, p<0.017). summary(educultureModelInd <- lm(individualism_log ~ cultural_group + occupation, data = bengaliData, subset=(individualism_log != "NA"))) #model comparison - sig better fit at p<0.012 summary(cultureModelIndEdu <- lm(individualism_log ~ cultural_group, data = bengaliData, subset=(individualism_log != "NA" & occupation != "NA"))) anova(cultureModelIndEdu, educultureModelInd) #graph shows higher IND for graduates ggplot(bengaliData[!is.na(bengaliData$occupation), ], aes(occupation, individualism)) + stat_summary(fun.y = mean, geom = "line", aes(group=1),fill = "White", color = "Black", size = 1) + stat_summary(fun.y = mean, geom = "point", size = 4) + coord_cartesian(ylim = c(4, 6)) + labs(x = "", y = "Individualism") + stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width = 0.2, size = 1) + theme(axis.text.x = element_text(color = "Black", size = 24), axis.text.y = element_text(color = "Black", size = 24), axis.title.y = element_text(size = 24, vjust = 1.3), panel.background = element_rect(fill = "white"), axis.line = element_line(size = 1), panel.grid.minor = element_blank(), axis.ticks = element_line(size = 1, color = "black")) #all media variables: print highly sig, also occupation still summary(mediacultureModelInd <- lm(individualism_log ~ cultural_group + occupation + media_internet + media_tv + media_print, data = bengaliData, subset=(individualism_log != "NA"))) #remove TV and internet summary(mediacultureModelInd <- lm(individualism_log ~ cultural_group + occupation + media_print, data = bengaliData, subset=(individualism_log != "NA"))) #model comparison - media increases model fit summary(cultureModelIndEduMed <- lm(individualism_log ~ cultural_group, data = bengaliData, subset=(individualism_log != "NA" & occupation != "NA" & media_print != "NA"))) summary(educultureModelIndEduMed <- lm(individualism_log ~ cultural_group + occupation, data = bengaliData, subset=(individualism_log != "NA" & occupation != "NA" & media_print != "NA"))) summary(mediacultureModelIndEduMed <- lm(individualism_log ~ cultural_group + occupation + media_print, data = bengaliData, subset=(individualism_log != "NA" & occupation != "NA" & media_print != "NA"))) anova(cultureModelIndEduMed, educultureModelIndEduMed, mediacultureModelIndEduMed) #graph of print use ggplot(bengaliData, aes(media_print, individualism_log)) + geom_point(size = 2.5) + geom_smooth(method=glm, alpha = 0.5, size=1.4, colour = "Black") + scale_y_continuous(limits = c(0,2), breaks = seq(0,2,by=0.5)) + scale_x_continuous(breaks = seq(1,4,by=1)) + labs(x = "print media use", y = "ln(Individualism)") + theme(axis.text.x = element_text(family="Times",color = "Black", size = 18), axis.text.y = element_text(family="Times",color = "Black", size = 18), axis.title.y = element_text(family="Times",size = 26, vjust = 1.5), axis.title.x = element_text(family="Times",size = 26, vjust = 0.0), strip.text = element_text(family="Times",size = 18), panel.background = element_rect(fill = "white"), axis.line = element_line(size = 1), panel.grid.minor = element_blank(), legend.title = element_blank(), legend.text = element_text(family="Times",size = 18), legend.key.width = unit(1, 'cm'), legend.background = element_rect(color = "black", size = 0.7), legend.justification = c(1,0), legend.position = c(0.95,0.55), legend.key = element_blank(), axis.ticks = element_line(size = 1, color = "black")) + guides(colour = guide_legend(override.aes = list(size=1.5))) + scale_colour_manual(values=c("indianred3", "darkolivegreen4", "Orange", "royalblue")) + scale_shape_manual(values=c(15,16,17,18)) #graph of print use by cultural group - no interaction obvious ggplot(bengaliDataIndEduMed, aes(media_print, individualism_log, colour = cultural_group)) + geom_point(size = 2.5, aes(shape = cultural_group)) + geom_smooth(method=glm, alpha = 0.0, size=1.4) + scale_y_continuous(limits = c(0,2), breaks = seq(0,2,by=0.5)) + scale_x_continuous(breaks = seq(1,4,by=1)) + labs(x = "print media use", y = "ln(Individualism)") + theme(axis.text.x = element_text(family="Times",color = "Black", size = 18), axis.text.y = element_text(family="Times",color = "Black", size = 18), axis.title.y = element_text(family="Times",size = 26, vjust = 1.5), axis.title.x = element_text(family="Times",size = 26, vjust = 0.0), strip.text = element_text(family="Times",size = 18), panel.background = element_rect(fill = "white"), axis.line = element_line(size = 1), panel.grid.minor = element_blank(), legend.title = element_blank(), legend.text = element_text(family="Times",size = 18), legend.key.width = unit(1, 'cm'), legend.background = element_rect(color = "black", size = 0.7), legend.justification = c(1,0), legend.key = element_blank(), axis.ticks = element_line(size = 1, color = "black")) + guides(colour = guide_legend(override.aes = list(size=1.5))) + scale_colour_manual(values=c("indianred3", "darkolivegreen4", "Orange", "royalblue")) + scale_shape_manual(values=c(15,16,17,18)) #add religiosity; no effect summary(religionModelInd <- lm(individualism_log ~ cultural_group + occupation + media_print + religiosity, data = bengaliData, subset=(individualism_log != "NA"))) #add family variables; no effects summary(familyModelInd <- lm(individualism_log ~ cultural_group + occupation + media_print + family_contact + family_interaction, data = bengaliData, subset=(individualism_log != "NA"))) #add languages spoken; no effect summary(languagesModelInd <- lm(individualism_log ~ cultural_group + occupation + media_print + languages + heritage_language, data = bengaliData, subset=(individualism_log != "NA"))) #conclusion------------------- # culture + media + education model is best fit. Culture makes an independent contribution, with 1st gen more ind than non-migrants, and no other sig diffs. However, 2nd gen is closer to non-migrant than 1st gen. Also higher IND amongst higher SES occupations and amongst more frequent print media users. summary(finalModelInd <- lm(individualism_log ~ cultural_group + occupation + media_print, data = bengaliData, na.action = na.exclude)) #posthocs confirm sig diff between non-migrant and 1st gen, and that 2nd gen are closer to non-migrant than 1st gen summary(posthocs <- glht(finalModelInd, linfct = mcp(cultural_group = "Tukey"))) #diagnostic plots - seems fine plot(finalModelInd) #confidence intervals confint(finalModelInd) confint(posthocs) #within Bengali groups only-------------- #previous model: culture no longer sig as expected, given that only sig diff was 1st gen vs non-migrant. Print still sig, not occupation though summary(finalModelIndImm <- lm(individualism_log ~ cultural_group + occupation + media_print, data = bengaliData, subset=(individualism_log != "NA" & cultural_group != "Non-migrant"))) #acculturation: both sig. More identifiction with UK -> less IND, more identification with Bangladesh -> more IND summary(acculturationModelIndImm <- lm(individualism_log ~ cultural_group + occupation + media_print + acculturation_UK + acculturation_heritage, data = bengaliData, subset=(individualism_log != "NA" & cultural_group != "Non-migrant"))) #model comparison - sig increases model fit summary(finalModelIndImmAcc <- lm(individualism_log ~ cultural_group + occupation + media_print, data = bengaliData, subset=(individualism_log != "NA" & cultural_group != "Non-migrant" & acculturation_UK != "NA" & acculturation_heritage != "NA"))) anova(finalModelIndImmAcc,acculturationModelIndImm) #diagnostic plots plot(acculturationModelIndImm) #95% CIs confint(acculturationModelIndImm) #conclusion: acculturation plays a sig role in individualism in ways expected based on cultural group differences. That is, acculturation to the UK decreases individualism, while acculturation to Bengali culture increases individualism. #within 1st gen group only---------------------- #nothing now sig, although print is marginal (p<0.05) summary(finalModelInd1stgen <- lm(individualism_log ~ occupation + media_print + acculturation_UK + acculturation_heritage, data = bengaliData, subset=(individualism_log != "NA" & cultural_group == "1st gen"))) #add age of migration, not sig summary(migrationModelInd1stgen <- lm(individualism_log ~ occupation + media_print + acculturation_UK + acculturation_heritage + age_migration, data = bengaliData, subset=(individualism_log != "NA" & cultural_group == "1st gen"))) #conclusion: age of migration has no effect # COLLECTIVISM ------------------------------------------------------------ #raw means and sds across groups---------------------------------------- by(bengaliData$collectivism, bengaliData$cultural_group, summary) by(bengaliData$collectivism, bengaliData$cultural_group, sd, na.rm = TRUE) #check assumptions / transformations-------------- #draw histogram, looks right skewed ggplot(bengaliData, aes(collectivism)) + geom_histogram(aes(y = ..density..), colour = "black", fill = "white") + labs(x= "collectivism", y="Density") #not normal (p<0.05) shapiro.test(bengaliData$collectivism) #levene's test - significant (p<0.02) so assumption of equal variances NOT met leveneTest(bengaliData$collectivism, bengaliData$cultural_group, center=median) #reflect then natural log transformation, then reflect again bengaliData$collectivism_log <- log(7) - (log(8 - bengaliData$collectivism)) summary(bengaliData$collectivism_log) #slightly better than before transformation ggplot(bengaliData, aes(collectivism_log)) + geom_histogram(aes(y = ..density..), colour = "black", fill = "white") + labs(x= "collectivism_log", y="Density") #still not normal (p<0.05) but better shapiro.test(bengaliData$collectivism_log) #levene's test - now equal variances met leveneTest(bengaliData$collectivism_log, bengaliData$cultural_group, center=median) #culture comparison------------------------------- #graphically ggplot(bengaliData, aes(cultural_group, collectivism)) + stat_summary(fun.y = mean, geom = "line", aes(group=1),fill = "White", color = "Black", size = 1) + stat_summary(fun.y = mean, geom = "point", size = 4) + coord_cartesian(ylim = c(4, 7)) + labs(x = "", y = "Collectivism") + stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width = 0.2, size = 1) + theme(axis.text.x = element_text(color = "Black", size = 14), axis.text.y = element_text(color = "Black", size = 18), axis.title.y = element_text(size = 18, vjust = 1.3), panel.background = element_rect(fill = "white"), axis.line = element_line(size = 1), panel.grid.minor = element_blank(), axis.ticks = element_line(size = 1, color = "black")) #culture-only regression model bengaliDataCol <- subset(bengaliData, is.na(cultural_group)==F) bengaliDataCol <- subset(bengaliDataCol, is.na(collectivism_log)==F) #null model summary(nullModelCol <- lm(collectivism_log ~ 1, data = bengaliData, subset=(collectivism_log != "NA"))) #culturalgroup: both highly sig, 1st gen > non-migrant and 2nd gen > non-migrant summary(cultureModelCol <- lm(collectivism_log ~ cultural_group, data = bengaliData, subset=(collectivism_log != "NA"))) #model comparison with null: highly sig anova(nullModelCol, cultureModelCol) #posthoc tests show sig diff between all 3 groups, 1st gen > 2nd gen > non-migrant summary(posthocs <- glht(cultureModelCol, linfct = mcp(cultural_group = "Tukey"))) #conf intervals confint(cultureModelCol) confint(posthocs) #model comparison-------------------------- #ensure all models based on same data bengaliDataSub <- subset(bengaliData, is.na(collectivism_log)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(sex)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(age)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(family_contact)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(family_interaction)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(heritage_language)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(media_tv)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(media_internet)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(media_print)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(religiosity)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(years_education)==F) #254 in total after all the exclusions nrow(bengaliDataSub) #demography (age + sex) null model summary(demographyModel <- lm(collectivism_log ~ sex + age, data = bengaliDataSub, na.action = na.exclude)) #model adding parents country of birth summary(parentsModel <- lm(collectivism_log ~ sex + age + parents_born, data = bengaliDataSub, na.action = na.exclude)) #model adding P's country of birth summary(birthModel <- lm(collectivism_log ~ sex + age + country_born, data = bengaliDataSub, na.action = na.exclude)) #'culture' model adding both parents and P's country of birth summary(cultureModel <- lm(collectivism_log ~ sex + age + country_born + parents_born, data = bengaliDataSub, na.action = na.exclude)) #model including all vertical variables (relating to family/parents) summary(verticalModel <- lm(collectivism_log ~ sex + age + parents_born + religiosity + family_contact + family_interaction + heritage_language, data = bengaliDataSub, na.action = na.exclude)) #model including all horizontal varialbes (relating to peers/local culture) summary(horizontalModel <- lm(collectivism_log ~ sex + age + country_born + media_print + media_tv + media_internet + years_education, data = bengaliDataSub, na.action = na.exclude)) #global model including all variables summary(globalModel <- lm(collectivism_log ~ sex + age + country_born + parents_born + media_print + media_tv + media_internet + years_education + religiosity + family_contact + family_interaction + heritage_language, data = bengaliDataSub, na.action = na.exclude)) #diagnostics of the global model- looks fine plot(globalModel) hist(resid(globalModel)) #model comparison using MUMIn. global, vertical and culture models are best supported (modelComparison<-model.sel(demographyModel, parentsModel, birthModel, cultureModel, horizontalModel, verticalModel, globalModel)) #models with delta less than 4 subset(modelComparison, delta <4) #tidy up output table<-as.data.frame(modelComparison)[14:18] table[,2:3]<- round(table[,2:3],2) table[,4:5]<- round(table[,4:5],3) table #multimodel averaging across all models - suggests sex, & country born as important, secondarily religiosity, family contact summary(model.avg(modelComparison, subset = delta < 4, revised.var = TRUE)) confint(model.avg(modelComparison, subset = delta < 4, revised.var = TRUE)) #exploratory regression models--------------------- #null model summary(nullModelCol <- lm(collectivism_log ~ 1, data = bengaliData, subset=(collectivism_log != "NA"))) #culturalgroup: both highly sig, 1st gen > non-migrant and 2nd gen > non-migrant summary(cultureModelCol <- lm(collectivism_log ~ cultural_group, data = bengaliData, subset=(collectivism_log != "NA"))) #model comparison with null: highly sig anova(nullModelCol, cultureModelCol) #add age and sex, age is marginally sig (p<0.1), sex highly sig (p<0.003) summary(agesexcultureModelCol <- lm(collectivism_log ~ cultural_group + age + sex, data = bengaliData, subset=(collectivism_log != "NA"))) #model comparison - significantly improved fit for sex but not age summary(sexcultureModelCol <- lm(collectivism_log ~ cultural_group + sex, data = bengaliData, subset=(collectivism_log != "NA"))) anova(cultureModelCol, sexcultureModelCol, agesexcultureModelCol) #posthoc tests on sexageculture model still show sig diff between all 3 groups, 1st gen > 2nd gen > non-migrant summary(posthocs <- glht(sexcultureModelCol, linfct = mcp(cultural_group = "Tukey"))) #plot sex diff, given it's a sig predictor ggplot(bengaliData[!is.na(bengaliData$sex), ], aes(sex, collectivism_log)) + stat_summary(fun.y = mean, geom = "line", aes(group=1),fill = "Black", color = "Black", size = 1) + stat_summary(fun.y = mean, geom = "point", size = 4) + coord_cartesian(ylim = c(1, 1.5)) + labs(x = "Sex", y = "ln(Collectivism)") + stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width = 0.2, size = 1) + theme(axis.text.x = element_text(color = "Black", size = 16), axis.text.y = element_text(color = "Black", size = 16), axis.title.y = element_text(size = 20, vjust = 1.3), axis.title.x = element_text(size = 20, vjust = 0.3), panel.background = element_rect(fill = "white"), axis.line = element_line(size = 1), panel.grid.minor = element_blank(), axis.ticks = element_line(size = 1, color = "black")) #culturalgroup + sex. Looks like the biggest sex diff is in 2ndgen, suggesting an interaction ggplot(bengaliData[!is.na(bengaliData$cultural_group), ], aes(cultural_group, collectivism_log, linetype=sex)) + stat_summary(fun.y = mean, geom = "line", aes(group=sex),fill = "White", color = "Black", size = 1) + stat_summary(fun.y = mean, geom = "point", size = 4) + coord_cartesian(ylim = c(0.5, 2)) + labs(x = "", y = "ln(Collectivism)") + stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width = 0.2, size = 1) + theme(axis.text.x = element_text(color = "Black", size = 24), axis.text.y = element_text(color = "Black", size = 24), axis.title.y = element_text(size = 24, vjust = 0.3), panel.background = element_rect(fill = "white"), axis.line = element_line(size = 1), panel.grid.minor = element_blank(), axis.ticks = element_line(size = 1, color = "black"), legend.title = element_text(size=16), legend.text = element_text(size=15), axis.text = element_text(size=14), legend.background = element_rect(color = "Black")) #add culture*sex interaction: approaching sig (<0.1) for 2ndgen * sex summary(sexBYcultureModelCol <- lm(collectivism_log ~ cultural_group * sex, data = bengaliData, subset=(collectivism_log != "NA"))) #however, sex*culture does not sig better fit the data than sex+culture, so omit interaction anova(cultureModelCol, sexcultureModelCol, sexBYcultureModelCol) #plot effect of age - not a clear effect ggplot(bengaliDataCol, aes(age, collectivism_log)) + geom_point(size = 2.5) + geom_smooth(method=glm, alpha = 0.5, size=1.4, colour = "Black") + scale_y_continuous(limits = c(0,2), breaks = seq(0,2,by=0.5)) + scale_x_continuous(breaks = seq(0,100,by=10)) + labs(x = "age", y = "ln(Collectivism)") + theme(axis.text.x = element_text(family="Times",color = "Black", size = 18), axis.text.y = element_text(family="Times",color = "Black", size = 18), axis.title.y = element_text(family="Times",size = 26, vjust = 1.5), axis.title.x = element_text(family="Times",size = 26, vjust = 0.0), strip.text = element_text(family="Times",size = 18), panel.background = element_rect(fill = "white"), axis.line = element_line(size = 1), panel.grid.minor = element_blank(), legend.title = element_blank(), legend.text = element_text(family="Times",size = 18), legend.key.width = unit(1, 'cm'), legend.background = element_rect(color = "black", size = 0.7), legend.justification = c(1,0), legend.position = c(0.95,0.55), legend.key = element_blank(), axis.ticks = element_line(size = 1, color = "black")) + guides(colour = guide_legend(override.aes = list(size=1.5))) + scale_colour_manual(values=c("indianred3", "darkolivegreen4", "Orange", "royalblue")) + scale_shape_manual(values=c(15,16,17,18)) #graph of age by cultural group - no effect for non-migrant, mild for 1st gen, biggest for 2nd gen ggplot(bengaliDataCol, aes(age, collectivism_log, colour = cultural_group)) + geom_point(size = 2.5, aes(shape = cultural_group)) + geom_smooth(method=glm, alpha = 0.0, size=1.4) + scale_y_continuous(limits = c(0,2), breaks = seq(0,2,by=0.5)) + scale_x_continuous(breaks = seq(0,100,by=10)) + labs(x = "print media use", y = "ln(Collectivism)") + theme(axis.text.x = element_text(family="Times",color = "Black", size = 18), axis.text.y = element_text(family="Times",color = "Black", size = 18), axis.title.y = element_text(family="Times",size = 26, vjust = 1.5), axis.title.x = element_text(family="Times",size = 26, vjust = 0.0), strip.text = element_text(family="Times",size = 18), panel.background = element_rect(fill = "white"), axis.line = element_line(size = 1), panel.grid.minor = element_blank(), legend.title = element_blank(), legend.text = element_text(family="Times",size = 18), legend.key.width = unit(1, 'cm'), legend.background = element_rect(color = "black", size = 0.7), legend.justification = c(1,0), legend.key = element_blank(), axis.ticks = element_line(size = 1, color = "black")) + guides(colour = guide_legend(override.aes = list(size=1.5))) + scale_colour_manual(values=c("indianred3", "darkolivegreen4", "Orange", "royalblue")) + scale_shape_manual(values=c(15,16,17,18)) #add culture*age interaction: not sig for 2nd gen * age summary(ageBYcultureModelCol <- lm(collectivism_log ~ cultural_group + sex + age + cultural_group * age, data = bengaliData, subset=(collectivism_log != "NA"))) #age*culture does not sig better fit the data than age+culture, so omit interaction anova(cultureModelCol, agesexcultureModelCol, ageBYcultureModelCol) #years education and occupation: not significant summary(educationModelCol <- lm(collectivism_log ~ cultural_group + sex + years_education + occupation, data = bengaliData, subset=(collectivism_log != "NA"))) #all media variables: none significant summary(mediaModelCol <- lm(collectivism_log ~ cultural_group + sex + media_internet + media_tv + media_print, data = bengaliData, subset=(collectivism_log != "NA"))) #add religiosity; sig effect summary(religionModelCol <- lm(collectivism_log ~ cultural_group + sex + religiosity, data = bengaliData, subset=(collectivism_log != "NA"))) #model comparison - sig improved fit summary(sexcultureModelColRel <- lm(collectivism_log ~ cultural_group + sex, data = bengaliData, subset=(collectivism_log != "NA" & religiosity != "NA"))) anova(sexcultureModelColRel, religionModelCol) #graph of religiosity - strong effect ggplot(bengaliDataCol, aes(religiosity, collectivism_log)) + geom_point(size = 2.5) + geom_smooth(method=glm, alpha = 0.5, size=1.4, colour = "Black") + scale_y_continuous(limits = c(0,2), breaks = seq(0,2,by=0.5)) + scale_x_continuous(breaks = seq(0,7,by=1)) + labs(x = "Religiosity", y = "ln(Collectivism)") + theme(axis.text.x = element_text(family="Times",color = "Black", size = 18), axis.text.y = element_text(family="Times",color = "Black", size = 18), axis.title.y = element_text(family="Times",size = 26, vjust = 1.5), axis.title.x = element_text(family="Times",size = 26, vjust = 0.0), strip.text = element_text(family="Times",size = 18), panel.background = element_rect(fill = "white"), axis.line = element_line(size = 1), panel.grid.minor = element_blank(), legend.title = element_blank(), legend.text = element_text(family="Times",size = 18), legend.key.width = unit(1, 'cm'), legend.background = element_rect(color = "black", size = 0.7), legend.justification = c(1,0), legend.position = c(0.95,0.55), legend.key = element_blank(), axis.ticks = element_line(size = 1, color = "black")) + guides(colour = guide_legend(override.aes = list(size=1.5))) + scale_colour_manual(values=c("indianred3", "darkolivegreen4", "Orange", "royalblue")) + scale_shape_manual(values=c(15,16,17,18)) #religiosity by culture - strongest effect in 1st gen, no effect in 2nd gen ggplot(bengaliDataCol, aes(religiosity, collectivism_log, colour = cultural_group)) + geom_point(size = 2.5, aes(shape = cultural_group)) + geom_smooth(method=glm, alpha = 0.0, size=1.4) + scale_y_continuous(limits = c(0,2), breaks = seq(0,2,by=0.5)) + scale_x_continuous(breaks = seq(0,7,by=1)) + labs(x = "Religiosity", y = "ln(Collectivism)") + theme(axis.text.x = element_text(family="Times",color = "Black", size = 18), axis.text.y = element_text(family="Times",color = "Black", size = 18), axis.title.y = element_text(family="Times",size = 26, vjust = 1.5), axis.title.x = element_text(family="Times",size = 26, vjust = 0.0), strip.text = element_text(family="Times",size = 18), panel.background = element_rect(fill = "white"), axis.line = element_line(size = 1), panel.grid.minor = element_blank(), legend.title = element_blank(), legend.text = element_text(family="Times",size = 18), legend.key.width = unit(1, 'cm'), legend.background = element_rect(color = "black", size = 0.7), legend.justification = c(1,0), legend.key = element_blank(), axis.ticks = element_line(size = 1, color = "black")) + guides(colour = guide_legend(override.aes = list(size=1.5))) + scale_colour_manual(values=c("indianred3", "darkolivegreen4", "Orange", "royalblue")) + scale_shape_manual(values=c(15,16,17,18)) #but interaction is not sig summary(religionBYcultureModelCol <- lm(collectivism_log ~ cultural_group + sex + religiosity + religiosity * cultural_group, data = bengaliData, subset=(collectivism_log != "NA"))) #family variables; family_contact is sig, sex and religiosity just about summary(familyModelCol <- lm(collectivism_log ~ cultural_group + sex + religiosity + family_contact + family_interaction, data = bengaliData, subset=(collectivism_log != "NA"))) #remove family interaction; family_contact, sex and religiosity all sig summary(familyModelCol <- lm(collectivism_log ~ cultural_group + sex + religiosity + family_contact, data = bengaliData, subset=(collectivism_log != "NA"))) #model comparison - family contact sig improves model fit (p<0.033) summary(religionModelColFam <- lm(collectivism_log ~ cultural_group + sex + religiosity, data = bengaliData, subset=(family_contact != "NA"))) anova(religionModelColFam, familyModelCol) #graph of family contact ggplot(bengaliData[!is.na(bengaliData$cultural_group), ], aes(family_contact, collectivism_log)) + geom_point(size = 2.5) + geom_smooth(method=glm, alpha = 0.5, size=1.4, colour="Black") + scale_y_continuous(limits = c(0,2), breaks = seq(0,2,by=0.5)) + scale_x_continuous(breaks = seq(0,100,by=5)) + labs(x = "Family contact", y = "ln(Collectivism)") + theme(axis.text.x = element_text(family="Times",color = "Black", size = 18), axis.text.y = element_text(family="Times",color = "Black", size = 18), axis.title.y = element_text(family="Times",size = 26, vjust = 1.5), axis.title.x = element_text(family="Times",size = 26, vjust = 0.0), strip.text = element_text(family="Times",size = 18), panel.background = element_rect(fill = "white"), axis.line = element_line(size = 1), panel.grid.minor = element_blank(), legend.title = element_blank(), legend.text = element_text(family="Times",size = 18), legend.key.width = unit(1, 'cm'), legend.background = element_rect(color = "black", size = 0.7), legend.justification = c(1,0), legend.position = c(0.95,0.05), legend.key = element_blank(), axis.ticks = element_line(size = 1, color = "black")) + guides(colour = guide_legend(override.aes = list(size=1.5))) + scale_colour_manual(values=c("indianred3", "darkolivegreen4", "Orange", "royalblue")) + scale_shape_manual(values=c(15,16,17,18)) #graph of family contact by culture - no interaction ggplot(bengaliData[!is.na(bengaliData$cultural_group), ], aes(family_contact, collectivism_log, colour = cultural_group)) + geom_point(size = 2.5, aes(shape = cultural_group)) + geom_smooth(method=glm, alpha = 0.0, size=1.4) + scale_y_continuous(limits = c(0,2), breaks = seq(0,2,by=0.5)) + scale_x_continuous(breaks = seq(0,100,by=5)) + labs(x = "Family contact", y = "ln(Collectivism)") + theme(axis.text.x = element_text(family="Times",color = "Black", size = 18), axis.text.y = element_text(family="Times",color = "Black", size = 18), axis.title.y = element_text(family="Times",size = 26, vjust = 1.5), axis.title.x = element_text(family="Times",size = 26, vjust = 0.0), strip.text = element_text(family="Times",size = 18), panel.background = element_rect(fill = "white"), axis.line = element_line(size = 1), panel.grid.minor = element_blank(), legend.title = element_blank(), legend.text = element_text(family="Times",size = 18), legend.key.width = unit(1, 'cm'), legend.background = element_rect(color = "black", size = 0.7), legend.justification = c(1,0), legend.position = c(0.95,0.05), legend.key = element_blank(), axis.ticks = element_line(size = 1, color = "black")) + guides(colour = guide_legend(override.aes = list(size=1.5))) + scale_colour_manual(values=c("indianred3", "darkolivegreen4", "Orange", "royalblue")) + scale_shape_manual(values=c(15,16,17,18)) #add languages; no effect summary(languagesModelCol <- lm(collectivism_log ~ cultural_group + sex + religiosity + family_contact + languages + heritage_language, data = bengaliData, subset=(collectivism_log != "NA"))) #final model-------------------------------------------------------- #final model: culture, sex, religiosity and family contact are all sig predictors of collectivism. 1st gen more collectivist than 2nd gen/non-migrant (who do not differ), women more COL than men, COL increases with religiosity, and COL increases with family contact summary(finalModelCol <- lm(collectivism_log ~ cultural_group + sex + religiosity + family_contact, data = bengaliData)) #posthoc tests show no diff between 2nd gen and non-migrant, controlling for sex, age, religion and family summary(posthocs <- glht(finalModelCol, linfct = mcp(cultural_group = "Tukey"))) #diagnostics seem OK for final model plot(finalModelCol) #conf intervals confint(finalModelCol) #within 1st gen and 2nd gen groups only----------------- #apply best-fitting model to subset: culture is still sig, family just sig, sex is marginal (p<0.08) summary(finalModelColImm <- lm(collectivism_log ~ cultural_group + sex + religiosity + family_contact, data = bengaliData, subset=(cultural_group != "Non-migrant"))) #add acculturation vars: heritage is highly sig, non-migrant not. Family contact now sig summary(finalModelColImm <- lm(collectivism_log ~ cultural_group + sex + religiosity + family_contact + acculturation_UK + acculturation_heritage, data = bengaliData, subset=(cultural_group != "Non-migrant"))) #remove UK acculturation. Again, family contact only other sig var sig summary(finalModelColImm <- lm(collectivism_log ~ cultural_group + sex + religiosity + family_contact + acculturation_heritage, data = bengaliData, subset=(cultural_group != "Non-migrant"))) #conclusion - greater heritage acculturation is associated with higher collectivism, reflecting overall group differences #within 1st gen group only------------------------- #age of migration is significant, and all others are now ns summary(finalModelColImm <- lm(collectivism_log ~ sex + religiosity + family_contact + acculturation_heritage + age_migration, data = bengaliData, subset=(cultural_group == "1st gen"))) #model comparison - sig at p=0.031 summary(finalModelColImm_noage <- lm(collectivism_log ~ sex + religiosity + family_contact + acculturation_heritage, data = bengaliData, subset=(cultural_group == "1st gen" & age_migration != "NA"))) anova(finalModelColImm_noage, finalModelColImm) #conclusion: migrants who migrated when they were older have higher COL #CLOSENESS--------------------- #raw means and sds across groups---------------------------------------- by(bengaliData$closeness, bengaliData$cultural_group, summary) by(bengaliData$closeness, bengaliData$cultural_group, sd, na.rm = TRUE) #check assumptions / transformations----------------- #draw histogram - not normal and not continuous data, as it comes from a Likert scale ggplot(bengaliData, aes(closeness)) + geom_histogram(aes(y = ..density..), colour = "black", fill = "white") + labs(x= "closeness", y="Density") #not normal (p<0.05) shapiro.test(bengaliData$closeness) #levene's test - significant (p<0.02) so assumption of equal variances NOT met leveneTest(bengaliData$closeness, bengaliData$cultural_group, center=median) #given that data not normal or continuous, run multinomial logistic regressions, converted to ordered factors, using clm function of ordinal package. NB Can change to linear regressions by running lm instead of clm; there are no diffs between the methods apart from minor diffs in betas #culture comparison------------------------------ #graphically - 1stgen and 2ndgen look identical, both higher than UK ggplot(bengaliData, aes(cultural_group, closeness)) + stat_summary(fun.y = mean, geom = "line", aes(group=1),fill = "White", color = "Black", size = 1) + stat_summary(fun.y = mean, geom = "point", size = 4) + coord_cartesian(ylim = c(1, 7)) + labs(x = "", y = "closeness") + stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width = 0.2, size = 1) + theme(axis.text.x = element_text(color = "Black", size = 14), axis.text.y = element_text(color = "Black", size = 18), axis.title.y = element_text(size = 18, vjust = 1.3), panel.background = element_rect(fill = "white"), axis.line = element_line(size = 1), panel.grid.minor = element_blank(), axis.ticks = element_line(size = 1, color = "black")) #for paper: 12-pt font, no legend (add labels in later) ggplot(bengaliData, aes(cultural_group, closeness)) + stat_summary(fun.y = mean, geom = "line", aes(group=1)) + stat_summary(fun.y = mean, geom = "point") + coord_cartesian(ylim = c(3, 6)) + labs(x = "", y = "Mean Score") + stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width = 0.2) + theme(axis.text.x = element_text(color = "Black"), axis.line = element_line(), axis.text.y = element_text(color = "Black"), panel.background = element_rect(fill = "white"), panel.grid.minor = element_blank(), axis.ticks = element_line(color = "black"), legend.position = "none", plot.margin=unit(c(5,0,0,0),"mm")) + scale_colour_manual(values=c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")) ggsave(file="closeness.tiff", width = 6.5, height = 6, units = "cm") #culture-only regression model #given that data not normal or continuous, run multinomial logistic regressions, converted to ordered factors summary(nullModelCLO <- clm(factor(closeness) ~ 1, data = bengaliData)) #culturalgroup: both sig, 1st gen > non-migrant and 2nd gen > non-migrant summary(cultureModelCLO <- clm(factor(closeness) ~ cultural_group, data = bengaliData)) anova(nullModelCLO, cultureModelCLO) #conf intervals confint(cultureModelCLO) #alternative method with polr from Quantpsyc- allows posthocs (nullModelCLO <- polr(factor(closeness) ~ 1, data = bengaliData, Hess = TRUE)) (cultureModelCLO <- polr(factor(closeness) ~ cultural_group, data = bengaliData, Hess = TRUE)) ## calculate p values ctable <- coef(summary(cultureModelCLO)) p <- pnorm(abs(ctable[, "t value"]), lower.tail = FALSE) * 2 (ctable <- cbind(ctable, "p value" = round(p, digits=3))) #compare null vs culture anova(nullModelCLO, cultureModelCLO) #posthocs - 1st gen > non-migrant, 2nd gen > non-migrant, and 1st gen=2nd gen summary(posthocs <- glht(cultureModelCLO, linfct = mcp(cultural_group = "Tukey"))) #conf intervals confint(cultureModelCLO) confint(posthocs) #model comparison-------------------------- bengaliDataSub <- subset(bengaliData, is.na(closeness)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(sex)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(age)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(family_contact)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(family_interaction)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(heritage_language)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(media_tv)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(media_internet)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(media_print)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(religiosity)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(years_education)==F) #257 in total after all the exclusions nrow(bengaliDataSub) bengaliDataSub$closeness <- factor(bengaliDataSub$closeness) #to get clm to work #demography (age + sex) null model summary(demographyModel <- clm(closeness ~ sex + age, data = bengaliDataSub)) #model adding parents country of birth summary(parentsModel <- clm(closeness ~ sex + age + parents_born, data = bengaliDataSub)) #model adding P's country of birth summary(birthModel <- clm(closeness ~ sex + age + country_born, data = bengaliDataSub)) #'culture' model adding both parents and P's country of birth summary(cultureModel <- clm(closeness ~ sex + age + country_born + parents_born, data = bengaliDataSub)) #model including all vertical variables (relating to family/parents) summary(verticalModel <- clm(closeness ~ sex + age + parents_born + religiosity + family_contact + family_interaction + heritage_language, data = bengaliDataSub)) #model including all horizontal varialbes (relating to peers/local culture) summary(horizontalModel <- clm(closeness ~ sex + age + country_born + media_print + media_tv + media_internet + years_education, data = bengaliDataSub)) #global model including all variables summary(globalModel <- clm(closeness ~ sex + age + country_born + parents_born + media_print + media_tv + media_internet + years_education + religiosity + family_contact + family_interaction + heritage_language, data = bengaliDataSub)) #assess global model fit vs null. Not significant but borderline, so continue summary(nullModel <- clm(closeness ~ 1, data = bengaliDataSub)) anova(nullModel, globalModel) #model comparison using MUMIn. PAR, CUL & VCT are best supported (modelComparison <- model.sel(demographyModel, parentsModel, birthModel, cultureModel, horizontalModel, verticalModel, globalModel)) #models with delta less than 4 subset(modelComparison, delta <4) #tidy up output table<-as.data.frame(modelComparison)[15:19] table[,2:3]<- round(table[,2:3],2) table[,4:5]<- round(table[,4:5],3) table #multimodel averaging across all models - suggests parents born & fam interaction as important summary(model.avg(modelComparison, subset = delta < 4, revised.var = TRUE)) confint(model.avg(modelComparison, subset = delta < 4, revised.var = TRUE)) #exploratory regression models--------------------------------- #given that data not normal or continuous, run multinomial logistic regressions, converted to ordered factors summary(nullModelCLO <- clm(factor(closeness) ~ 1, data = bengaliData)) #culturalgroup: both sig, 1st gen > non-migrant and 2nd gen > non-migrant summary(cultureModelCLO <- clm(factor(closeness) ~ cultural_group, data = bengaliData)) #model comparison is sig anova(nullModelCLO, cultureModelCLO) #add age & sex. Age shows a sig interaction with culture. After controlling for age, 1st gen is now less close than non-migrant. Sex not sig. summary(agecultureModelCLO <- clm(factor(closeness) ~ cultural_group * age + sex, data = bengaliData)) #drop sex, model comparison shows sig better fit for interaction model summary(agePLUScultureModelCLO <- clm(factor(closeness) ~ cultural_group + age, data = bengaliData)) summary(ageBYcultureModelCLO <- clm(factor(closeness) ~ cultural_group * age, data = bengaliData)) anova(cultureModelCLO,agePLUScultureModelCLO,ageBYcultureModelCLO) #graph of age x culture interaction. For UK, closeness decreases with age. For Bengali groups, closeness increases ggplot(bengaliData, aes(age, closeness, colour = cultural_group)) + geom_point(size = 2.5, aes(shape = cultural_group)) + geom_smooth(method=glm, alpha = 0.0, size=1.4) + scale_y_continuous(limits = c(1,7), breaks = seq(1,7,by=1)) + scale_x_continuous(breaks = seq(0,100,by=5)) + labs(x = "age", y = "closeness") + theme(axis.text.x = element_text(family="Times",color = "Black", size = 14), axis.text.y = element_text(family="Times",color = "Black", size = 14), axis.title.y = element_text(family="Times",size = 18, vjust = 1.5), axis.title.x = element_text(family="Times",size = 18, vjust = 0.0), strip.text = element_text(family="Times",size = 14), panel.background = element_rect(fill = "white"), axis.line = element_line(size = 1), panel.grid.minor = element_blank(), legend.title = element_blank(), legend.text = element_text(family="Times",size = 12), legend.key.width = unit(1, 'cm'), legend.background = element_rect(color = "black", size = 0.7), legend.justification = c(1,0), legend.key = element_blank(), axis.ticks = element_line(size = 1, color = "black")) + guides(colour = guide_legend(override.aes = list(size=1.5))) + scale_colour_manual(values=c("indianred3", "darkolivegreen4", "Orange", "royalblue")) + scale_shape_manual(values=c(15,16,17,18)) #version without points ggplot(bengaliData, aes(age, closeness, linetype = cultural_group, color = cultural_group)) + geom_point(size = 2.5, aes(shape = cultural_group), alpha = 0.2) + geom_smooth(method=glm, alpha = 0.05, size=1.6) + scale_y_continuous(limits = c(1,7), breaks = seq(1,7,by=1)) + scale_x_continuous(breaks = seq(0,100,by=5)) + labs(x = "age", y = "closeness") + theme(axis.text.x = element_text(family="Times",color = "Black", size = 14), axis.text.y = element_text(family="Times",color = "Black", size = 14), axis.title.y = element_text(family="Times",size = 18, vjust = 1.5), axis.title.x = element_text(family="Times",size = 18, vjust = 0.0), strip.text = element_text(family="Times",size = 14), panel.background = element_rect(fill = "white"), axis.line = element_line(size = 1), panel.grid.minor = element_blank(), legend.title = element_blank(), legend.text = element_text(family="Times",size = 18), legend.key.width = unit(1, 'cm'), legend.background = element_rect(color = "black", size = 0.7), legend.justification = c(1,0), legend.key = element_blank(), axis.ticks = element_line(size = 1, color = "black")) + guides(colour = guide_legend(override.aes = list(size=1.5))) + scale_colour_manual(values=c("indianred3", "darkolivegreen4", "Orange", "royalblue")) #12-pt font, no legend (add labels in later) ggplot(bengaliData, aes(age, closeness, color = cultural_group)) + geom_smooth(method=glm, alpha = 0.1) + scale_y_continuous(limits = c(1,7), breaks = seq(1,7,by=1)) + scale_x_continuous(breaks = seq(0,100,by=10)) + labs(x = "Age", y = "Closeness") + theme(axis.text.x = element_text(color = "Black"), axis.line = element_line(), axis.text.y = element_text(color = "Black"), panel.background = element_rect(fill = "white"), panel.grid.minor = element_blank(), axis.ticks = element_line(color = "black"), legend.position = "none", plot.margin=unit(c(5,0,0,0),"mm")) + scale_colour_manual(values=c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")) ggsave(file="closeness_ageculture.tiff", width = 5.93, height = 6, units = "cm") #years education and occupation: not significant summary(eduagecultureModelCLO <- clm(factor(closeness) ~ cultural_group * age + years_education + occupation, data = bengaliData)) #all media variables: none sig summary(mediaagecultureModelCLO <- clm(factor(closeness) ~ cultural_group * age + media_internet + media_tv + media_print, data = bengaliData)) #add religiosity; not sig summary(religionagecultureModelCLO <- clm(factor(closeness) ~ cultural_group * age + religiosity, data = bengaliData)) #add family variables; age interaction stil sig, family_interaction almost sig at p<.08 summary(familyagecultureModelCLO <- clm(factor(closeness) ~ cultural_group * age + family_contact + family_interaction, data = bengaliData)) #final model: culture by age interaction (closeness decreases with age for non-migrant, increases with age in Benglalis, esp 1stgen) summary(finalModelCLO <- clm(factor(closeness) ~ cultural_group * age, data = bengaliData)) #model comparison against null model summary(nullModelCLO <- clm(factor(closeness) ~ 1, data = bengaliData)) anova(nullModelCLO, finalModelCLO) #get McFadden's pseudo R^2 1 - finalModelCLO$logLik / nullModelCLO$logLik #within 1st gen and 2nd gen groups only---------------------- #apply best-fitting model to subset: nothing is now significant summary(agefamilyModelCLOImm <- clm(factor(closeness) ~ cultural_group * age, data = bengaliData, subset=(cultural_group != "Non-migrant"))) #remove interactions (given that interactions were all driven by non-migrant group). Now only age is sig: older Ps are more close to others summary(agefamilyModelCLOImm <- clm(factor(closeness) ~ cultural_group + age, data = bengaliData, subset=(cultural_group != "Non-migrant"))) #add acculturation vars: neither sig summary(accagefamilyModelCLOImm <- clm(factor(closeness) ~ cultural_group + age + acculturation_UK + acculturation_heritage, data = bengaliData, subset=(cultural_group != "Non-migrant"))) #within 1st gen group only------------------------- #age is still sig summary(agefamilyModelCLO1st <- clm(factor(closeness) ~ age, data = bengaliData, subset=(cultural_group == "1st gen"))) #add age of migration, not sig summary(agefamilyModelCLO1st <- clm(factor(closeness) ~ age + age_migration, data = bengaliData, subset=(cultural_group == "1st gen"))) #conclusion: no effect of age of migration, only absolute age #SELF-ENHANCEMENT-------------------------------- #raw means and sds across groups---------------------------------------- by(bengaliData$selfserving, bengaliData$cultural_group, summary) by(bengaliData$selfserving, bengaliData$cultural_group, sd, na.rm = TRUE) #check assumptions / transformations---------------------- #draw histogram - looks normal ggplot(bengaliData, aes(selfserving)) + geom_histogram(aes(y = ..density..), colour = "black", fill = "white") + labs(x= "selfserving", y="Density") #almost normally distributed (p=0.033) shapiro.test(bengaliData$selfserving) #levene's test - not significant so assumption of equal variances is met. No need to transform leveneTest(bengaliData$selfserving, bengaliData$cultural_group, center=median) #culture comparison------------------------------- #graphically - looks like higher SSB in 2nd gen, but big overlap ggplot(bengaliData, aes(cultural_group, selfserving)) + stat_summary(fun.y = mean, geom = "line", aes(group=1),fill = "White", color = "Black", size = 1) + stat_summary(fun.y = mean, geom = "point", size = 4) + coord_cartesian(ylim = c(0,50)) + labs(x = "", y = "Self-enhancement") + stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width = 0.2, size = 1) + theme(axis.text.x = element_text(color = "Black", size = 14), axis.text.y = element_text(color = "Black", size = 18), axis.title.y = element_text(size = 18, vjust = 1.3), panel.background = element_rect(fill = "white"), axis.line = element_line(size = 1), panel.grid.minor = element_blank(), axis.ticks = element_line(size = 1, color = "black")) #culture-only regression model #null model summary(nullModelSSB <- lm(selfserving ~ 1, data = bengaliData, na.action = na.exclude)) #culturalgroup: no sig diffs summary(cultureModelSSB <- lm(selfserving ~ cultural_group, data = bengaliData, na.action = na.exclude)) anova(nullModelSSB,cultureModelSSB) #posthoc tests summary(posthocs <- glht(cultureModelSSB, linfct = mcp(cultural_group = "Tukey"))) #conf intervals confint(cultureModelSSB) confint(posthocs) #model comparison------------------------- #ensure all models based on same data bengaliDataSub <- subset(bengaliData, is.na(selfserving)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(sex)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(age)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(family_contact)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(family_interaction)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(heritage_language)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(media_tv)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(media_internet)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(media_print)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(religiosity)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(years_education)==F) #256 in total after all the exclusions nrow(bengaliDataSub) #demography (age + sex) null model summary(demographyModel <- lm(selfserving ~ sex + age, data = bengaliDataSub, na.action = na.exclude)) #model adding parents country of birth summary(parentsModel <- lm(selfserving ~ sex + age + parents_born, data = bengaliDataSub, na.action = na.exclude)) #model adding P's country of birth summary(birthModel <- lm(selfserving ~ sex + age + country_born, data = bengaliDataSub, na.action = na.exclude)) #'culture' model adding both parents and P's country of birth summary(cultureModel <- lm(selfserving ~ sex + age + country_born + parents_born, data = bengaliDataSub, na.action = na.exclude)) #model including all vertical variables (relating to family/parents) summary(verticalModel <- lm(selfserving ~ sex + age + parents_born + religiosity + family_contact + family_interaction + heritage_language, data = bengaliDataSub, na.action = na.exclude)) #model including all horizontal varialbes (relating to peers/local culture) summary(horizontalModel <- lm(selfserving ~ sex + age + country_born + media_print + media_tv + media_internet + years_education, data = bengaliDataSub, na.action = na.exclude)) #global model including all variables summary(globalModel <- lm(selfserving ~ sex + age + country_born + parents_born + media_print + media_tv + media_internet + years_education + religiosity + family_contact + family_interaction + heritage_language, data = bengaliDataSub, na.action = na.exclude)) #GLOBAL MODEL DOES NOT FIT THE DATA, SO NO MODEL COMPARISON IS POSSIBLE - DO NOT CONTINUE #exploratory regression models-------------------------- #null model summary(nullModelSSB <- lm(selfserving ~ 1, data = bengaliData, na.action = na.exclude)) #culturalgroup: no sig diffs summary(cultureModelSSB <- lm(selfserving ~ cultural_group, data = bengaliData, na.action = na.exclude)) anova(nullModelSSB,cultureModelSSB) #add age & sex summary(agesexModelSSB <- lm(selfserving ~ cultural_group + age + sex, data = bengaliData, na.action = na.exclude)) #remove age, sex is marginal (p<0.065) summary(sexModelSSB <- lm(selfserving ~ cultural_group + sex, data = bengaliData, na.action = na.exclude)) anova(nullModelSSB,cultureModelSSB,sexModelSSB) #plot sex diff, given it's almost a sig predictor ggplot(bengaliData, aes(sex, selfserving)) + stat_summary(fun.y = mean, geom = "line", aes(group=1),fill = "Black", color = "Black", size = 1) + stat_summary(fun.y = mean, geom = "point", size = 4) + coord_cartesian(ylim = c(0, 50)) + labs(x = "Sex", y = "Self-enhancement") + stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width = 0.2, size = 1) + theme(axis.text.x = element_text(color = "Black", size = 16), axis.text.y = element_text(color = "Black", size = 16), axis.title.y = element_text(size = 20, vjust = 1.3), axis.title.x = element_text(size = 20, vjust = 0.3), panel.background = element_rect(fill = "white"), axis.line = element_line(size = 1), panel.grid.minor = element_blank(), axis.ticks = element_line(size = 1, color = "black")) #culturalgroup + sex. Looks like the biggest sex diff is in 2ndgen, suggesting an interaction ggplot(bengaliData, aes(cultural_group, selfserving, linetype=sex)) + stat_summary(fun.y = mean, geom = "line", aes(group=sex),fill = "White", color = "Black", size = 1) + stat_summary(fun.y = mean, geom = "point", size = 4) + coord_cartesian(ylim = c(0, 70)) + labs(x = "", y = "selfserving") + stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width = 0.2, size = 1) + theme(axis.text.x = element_text(color = "Black", size = 24), axis.text.y = element_text(color = "Black", size = 24), axis.title.y = element_text(size = 24, vjust = 0.3), panel.background = element_rect(fill = "white"), axis.line = element_line(size = 1), panel.grid.minor = element_blank(), axis.ticks = element_line(size = 1, color = "black"), legend.title = element_text(size=16), legend.text = element_text(size=15), axis.text = element_text(size=14), legend.background = element_rect(color = "Black")) #or with lines indicating culture and sex on the x-axis ggplot(bengaliData, aes(sex, selfserving, linetype=cultural_group)) + stat_summary(fun.y = mean, geom = "line", aes(group=cultural_group),fill = "White", color = "Black", size = 1) + stat_summary(fun.y = mean, geom = "point", size = 4) + coord_cartesian(ylim = c(20, 60)) + labs(x = "", y = "selfserving") + stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width = 0.2, size = 1) + theme(axis.text.x = element_text(color = "Black", size = 24), axis.text.y = element_text(color = "Black", size = 24), axis.title.y = element_text(size = 24, vjust = 0.3), panel.background = element_rect(fill = "white"), axis.line = element_line(size = 1), panel.grid.minor = element_blank(), axis.ticks = element_line(size = 1, color = "black"), legend.title = element_text(size=16), legend.text = element_text(size=15), axis.text = element_text(size=14), legend.background = element_rect(color = "Black")) #adding a sex * culture interaction isn't sig as a predictor, but does improve model fit summary(sexBYcultureModelSSB <- lm(selfserving ~ cultural_group * sex, data = bengaliData, na.action = na.exclude)) anova(nullModelSSB,cultureModelSSB,sexBYcultureModelSSB) #years education and occupation: years education is significant: more education = more self-enhancement summary(edusexModelSSB <- lm(selfserving ~ cultural_group * sex + years_education + occupation, data = bengaliData, na.action = na.exclude)) #remove occupation, years_education still sig, and in interaction summary(edusexModelSSB <- lm(selfserving ~ cultural_group * sex * years_education, data = bengaliData, na.action = na.exclude)) #model comparison, years education does improve model fit, and interaction improves additive summary(nullModelSSB_edu <- lm(selfserving ~ 1, data = bengaliData, subset=(years_education != "NA"))) summary(cultureModelSSB_edu <- lm(selfserving ~ cultural_group, data = bengaliData, subset=(years_education != "NA"))) summary(sexBYcultureModelSSB_edu <- lm(selfserving ~ cultural_group * sex, data = bengaliData, subset=(years_education != "NA"))) summary(eduBYsexBYcultureModelSSB_edu <- lm(selfserving ~ cultural_group * sex * years_education, data = bengaliData)) anova(nullModelSSB_edu, cultureModelSSB_edu, sexBYcultureModelSSB_edu, eduBYsexBYcultureModelSSB_edu) #graph of years_education by culture ggplot(bengaliData, aes(years_education, selfserving, colour = cultural_group)) + geom_point(size = 2.5, aes(shape = cultural_group)) + geom_smooth(method=glm, alpha = 0.0, size=1.4) + scale_y_continuous(limits = c(0,100), breaks = seq(0,100,by=10)) + scale_x_continuous(breaks = seq(0,100,by=5)) + labs(x = "years_education", y = "selfserving") + theme(axis.text.x = element_text(family="Times",color = "Black", size = 18), axis.text.y = element_text(family="Times",color = "Black", size = 18), axis.title.y = element_text(family="Times",size = 26, vjust = 1.5), axis.title.x = element_text(family="Times",size = 26, vjust = 0.0), strip.text = element_text(family="Times",size = 18), panel.background = element_rect(fill = "white"), axis.line = element_line(size = 1), panel.grid.minor = element_blank(), legend.title = element_blank(), legend.text = element_text(family="Times",size = 18), legend.key.width = unit(1, 'cm'), legend.background = element_rect(color = "black", size = 0.7), legend.justification = c(1,0), legend.key = element_blank(), axis.ticks = element_line(size = 1, color = "black")) + guides(colour = guide_legend(override.aes = list(size=1.5))) + scale_colour_manual(values=c("indianred3", "darkolivegreen4", "Orange", "royalblue")) + scale_shape_manual(values=c(15,16,17,18)) #graph of years_education by sex ggplot(bengaliData, aes(years_education, selfserving, colour = sex)) + geom_point(size = 2.5, aes(shape = sex)) + geom_smooth(method=glm, alpha = 0.0, size=1.4) + scale_y_continuous(limits = c(0,100), breaks = seq(0,100,by=10)) + scale_x_continuous(breaks = seq(0,100,by=5)) + labs(x = "years_education", y = "selfserving") + theme(axis.text.x = element_text(family="Times",color = "Black", size = 18), axis.text.y = element_text(family="Times",color = "Black", size = 18), axis.title.y = element_text(family="Times",size = 26, vjust = 1.5), axis.title.x = element_text(family="Times",size = 26, vjust = 0.0), strip.text = element_text(family="Times",size = 18), panel.background = element_rect(fill = "white"), axis.line = element_line(size = 1), panel.grid.minor = element_blank(), legend.title = element_blank(), legend.text = element_text(family="Times",size = 18), legend.key.width = unit(1, 'cm'), legend.background = element_rect(color = "black", size = 0.7), legend.justification = c(1,0), legend.key = element_blank(), axis.ticks = element_line(size = 1, color = "black")) + guides(colour = guide_legend(override.aes = list(size=1.5))) + scale_colour_manual(values=c("indianred3", "darkolivegreen4", "Orange", "royalblue")) + scale_shape_manual(values=c(15,16,17,18)) #all media variables: none sig, although 3-way culture*sex*education is now even bigger summary(mediasexModelSSB <- lm(selfserving ~ cultural_group * sex * years_education + media_internet + media_tv + media_print, data = bengaliData, na.action = na.exclude)) #add religiosity; not sig summary(religionsexModelSSB <- lm(selfserving ~ cultural_group * sex * years_education + religiosity, data = bengaliData, na.action = na.exclude)) #add family variables; not sig summary(familysexModelSSB <- lm(selfserving ~ cultural_group * sex * years_education + family_contact + family_interaction, data = bengaliData, na.action = na.exclude)) #add language variables; not sig summary(familysexModelSSB <- lm(selfserving ~ cultural_group * sex * years_education + languages + heritage_language, data = bengaliData, na.action = na.exclude)) #final model---------------------- # culture*sex*years_education interaction where men are self-enhancing all the time irrespective of culture and education; 1st gen women also are self-enhancing irrespecitve of education; but nonmigrants and 2nd gen women both show more self-enhancing in response to years of education. summary(finalModelSSB <- lm(selfserving ~ cultural_group * sex * years_education, data = bengaliData, na.action = na.exclude)) #attempt at visualising 3-way interaction #graph of years_education by sex ggplot(bengaliData, aes(years_education, selfserving, colour = sex)) + facet_wrap(~ cultural_group) + geom_point(size = 2.5, aes(shape = sex)) + geom_smooth(method=glm, alpha = 0.0, size=1.4) + scale_y_continuous(limits = c(0,100), breaks = seq(0,100,by=10)) + scale_x_continuous(breaks = seq(0,100,by=5)) + labs(x = "years_education", y = "selfserving") + theme(axis.text.x = element_text(family="Times",color = "Black", size = 18), axis.text.y = element_text(family="Times",color = "Black", size = 18), axis.title.y = element_text(family="Times",size = 26, vjust = 1.5), axis.title.x = element_text(family="Times",size = 26, vjust = 0.0), strip.text = element_text(family="Times",size = 18), panel.background = element_rect(fill = "white"), axis.line = element_line(size = 1), panel.grid.minor = element_blank(), legend.title = element_blank(), legend.text = element_text(family="Times",size = 18), legend.key.width = unit(1, 'cm'), legend.background = element_rect(color = "black", size = 0.7), legend.justification = c(1,0), legend.position = c(0.95,0.05), legend.key = element_blank(), axis.ticks = element_line(size = 1, color = "black")) + guides(colour = guide_legend(override.aes = list(size=1.5))) + scale_colour_manual(values=c("indianred3", "darkolivegreen4", "Orange", "royalblue")) + scale_shape_manual(values=c(15,16,17,18)) #or...(clearer I think) ggplot(bengaliData, aes(years_education, selfserving, colour = cultural_group)) + facet_wrap(~ sex) + geom_point(size = 2.5) + geom_smooth(method=glm, alpha = 0.0, size=1.4) + scale_y_continuous(limits = c(0,100), breaks = seq(0,100,by=10)) + scale_x_continuous(breaks = seq(0,100,by=5)) + labs(x = "Years of education", y = "Self-enhancement") + theme(axis.text.x = element_text(family="Times",color = "Black", size = 18), axis.text.y = element_text(family="Times",color = "Black", size = 18), axis.title.y = element_text(family="Times",size = 26, vjust = 1.5), axis.title.x = element_text(family="Times",size = 26, vjust = 0.0), strip.text = element_text(family="Times",size = 18), panel.background = element_rect(fill = "white"), axis.line = element_line(size = 1), panel.grid.minor = element_blank(), legend.title = element_blank(), legend.text = element_text(family="Times",size = 18), legend.key.width = unit(1, 'cm'), legend.background = element_rect(color = "black", size = 0.7), legend.justification = c(1,0), legend.key = element_blank(), axis.ticks = element_line(size = 1, color = "black")) + guides(colour = guide_legend(override.aes = list(size=1.5))) + scale_colour_manual(values=c("indianred3", "darkolivegreen4", "Orange", "royalblue")) + scale_shape_manual(values=c(15,16,17,18)) #and with faded points ggplot(bengaliData, aes(years_education, selfserving, color = cultural_group, linetype = cultural_group)) + facet_wrap(~ sex) + geom_point(aes(shape=cultural_group), size = 2.5, alpha = 0.2) + geom_smooth(method=glm, alpha = 0.0, size=1.4) + scale_y_continuous(limits = c(0,100), breaks = seq(0,100,by=10)) + scale_x_continuous(breaks = seq(0,100,by=5)) + labs(x = "Years of education", y = "Self-enhancement") + theme(axis.text.x = element_text(family="Times",color = "Black", size = 18), axis.text.y = element_text(family="Times",color = "Black", size = 18), axis.title.y = element_text(family="Times",size = 26, vjust = 1.5), axis.title.x = element_text(family="Times",size = 26, vjust = 0.0), strip.text = element_text(family="Times",size = 18), panel.background = element_rect(fill = "white"), axis.line = element_line(size = 1), panel.grid.minor = element_blank(), legend.title = element_blank(), legend.text = element_text(family="Times",size = 18), legend.key.width = unit(1, 'cm'), legend.background = element_rect(color = "black", size = 0.7), legend.justification = c(1,0), legend.key = element_blank(), axis.ticks = element_line(size = 1, color = "black")) + guides(colour = guide_legend(override.aes = list(size=1.5))) + scale_colour_manual(values=c("indianred3", "darkolivegreen4", "Orange", "royalblue")) + scale_shape_manual(values=c(15,16,17,18)) #no points, 12-pt font, no legend (add labels in later) ggplot(bengaliData, aes(years_education, selfserving, colour = cultural_group)) + facet_wrap(~ sex) + geom_smooth(method=glm, alpha = 0.1) + scale_y_continuous(limits = c(0,100), breaks = seq(0,100,by=10)) + scale_x_continuous(breaks = seq(0,100,by=10)) + labs(x = "Years education", y = "Self-enhancement") + theme(axis.text.x = element_text(color = "Black"), axis.line = element_line(), axis.text.y = element_text(color = "Black"), panel.background = element_rect(fill = "white"), panel.grid.minor = element_blank(), axis.ticks = element_line(color = "black"), legend.position = "none", plot.margin=unit(c(5,1,0,0),"mm")) + scale_colour_manual(values=c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")) ggsave(file="ssb_sexeducation.tiff", width = 7, height = 6, units = "cm") #within 1st gen and 2nd gen groups only------------------- #apply previously best-fitting model to subset: years education no longer doing much summary(sexcultureeduModelSSBImm <- lm(selfserving ~ cultural_group * sex * years_education, data = bengaliData, subset=(cultural_group != "Non-migrant"))) #add acculturation vars: neither acculturation variable is sig summary(accsexBYcultureModelSSBImm <- lm(selfserving ~ cultural_group * sex * years_education + acculturation_UK + acculturation_heritage, data = bengaliData, subset=(cultural_group != "Non-migrant"))) #within 1st gen group only--------------------------- #now can't do culture, but sex*education interaction no longer significant summary(sexModelSSB1st <- lm(selfserving ~ sex * years_education, data = bengaliData, subset=(cultural_group == "1st gen"))) #add age of migration, not sig summary(agemigModelSSB1st <- lm(selfserving ~ age_migration, data = bengaliData, subset=(cultural_group == "1st gen"))) #conclusion: no effect of age of migration # CATEGORISATION------------------------------- #raw means and sds across groups---------------------------------------- by(bengaliData$holistic_categorisation, bengaliData$cultural_group, summary) by(bengaliData$holistic_categorisation, bengaliData$cultural_group, sd, na.rm = TRUE) #check assumptions / transformations-------------------------- #draw histogram - looks very un-normal ggplot(bengaliData, aes(holistic_categorisation)) + geom_histogram(aes(y = ..density..), colour = "black", fill = "white") + labs(x= "holistic_categorisation", y="Density") #not normally distributed at all shapiro.test(bengaliData$holistic_categorisation) #levene's test - not significant so assumption of equal variances is met leveneTest(bengaliData$holistic_categorisation, bengaliData$cultural_group, center=median) #culture comparison----------------------------------- #graphically - non-migrant slightly lower than Bengali groups, albeit with large error bars ggplot(bengaliData, aes(cultural_group, holistic_categorisation)) + stat_summary(fun.y = mean, geom = "line", aes(group=1),fill = "White", color = "Black", size = 1) + stat_summary(fun.y = mean, geom = "point", size = 4) + coord_cartesian(ylim = c(0.5,1)) + labs(x = "", y = "Holistic categorisation") + stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width = 0.2, size = 1) + theme(axis.text.x = element_text(color = "Black", size = 14), axis.text.y = element_text(color = "Black", size = 18), axis.title.y = element_text(size = 18, vjust = 1.3), panel.background = element_rect(fill = "white"), axis.line = element_line(size = 1), panel.grid.minor = element_blank(), axis.ticks = element_line(size = 1, color = "black")) # use quasibinomial due to non-normality and under-dispersion #assumptions checks - linear model is clearly not right summary(linearModel <- lm(holistic_categorisation ~ cultural_group, data = bengaliData, na.action = na.exclude)) plot(linearModel) hist(residuals(linearModel)) #null model summary(nullModelHOLqb <- glm(holistic_categorisation ~ 1, data = bengaliData, family = "quasibinomial", na.action = na.exclude)) #culturalgroup: no sig diffs summary(cultureModelHOLqb <- glm(holistic_categorisation ~ cultural_group, data = bengaliData, family = "quasibinomial", na.action = na.exclude)) anova(nullModelHOLqb,cultureModelHOLqb, test = "F") summary(posthocs <- glht(cultureModelHOLqb, linfct = mcp(cultural_group = "Tukey"))) #conf intervals confint(cultureModelHOLqb) confint(posthocs) #model comparison---------------------- #ensure all models based on same data bengaliDataSub <- subset(bengaliData, is.na(holistic_categorisation)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(sex)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(age)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(family_contact)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(family_interaction)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(heritage_language)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(media_tv)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(media_internet)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(media_print)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(religiosity)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(years_education)==F) #257 in total after all the exclusions nrow(bengaliDataSub) #hack from MUMiN for calculating log likelihoods from quasibinomial models, allowing comparison table to be produced. See ?model.sel and example(QAIC) x.quasibinomial <- function(...) { res <- quasibinomial(...) res$aic <- binomial(...)$aic res } #demography (age + sex) null model summary(demographyModel <- glm(holistic_categorisation ~ sex + age, data = bengaliDataSub, family = "x.quasibinomial", na.action = na.exclude)) #model adding parents country of birth summary(parentsModel <- glm(holistic_categorisation ~ sex + age + parents_born, data = bengaliDataSub, family = "x.quasibinomial", na.action = na.exclude)) #model adding P's country of birth summary(birthModel <- glm(holistic_categorisation ~ sex + age + country_born, data = bengaliDataSub, family = "x.quasibinomial", na.action = na.exclude)) #'culture' model adding both parents and P's country of birth summary(cultureModel <- glm(holistic_categorisation ~ sex + age + country_born + parents_born, data = bengaliDataSub, family = "x.quasibinomial", na.action = na.exclude)) #model including all vertical variables (relating to family/parents) summary(verticalModel <- glm(holistic_categorisation ~ sex + age + parents_born + religiosity + family_contact + family_interaction + heritage_language, data = bengaliDataSub, family = "x.quasibinomial", na.action = na.exclude)) #model including all horizontal varialbes (relating to peers/local culture) summary(horizontalModel <- glm(holistic_categorisation ~ sex + age + country_born + media_print + media_tv + media_internet + years_education, data = bengaliDataSub, family = "x.quasibinomial", na.action = na.exclude)) #global model including all variables summary(globalModel <- glm(holistic_categorisation ~ sex + age + country_born + parents_born + media_print + media_tv + media_internet + years_education + religiosity + family_contact + family_interaction + heritage_language, data = bengaliDataSub, family = "x.quasibinomial", na.action = na.exclude)) summary(nullModel <- glm(holistic_categorisation ~ 1, data = bengaliDataSub, family = "x.quasibinomial", na.action = na.exclude)) anova(nullModel, globalModel, test = "F") #NO SIG MODEL FIT FOR THE GLOBAL MODEL SO CAN'T DO MODEL COMPARISON, DO NOT CONTINUE #exploratory regression models----------------------------- #null model summary(nullModelHOLqb <- glm(holistic_categorisation ~ 1, data = bengaliData, family = "quasibinomial", na.action = na.exclude)) #culturalgroup: no sig diffs summary(cultureModelHOLqb <- glm(holistic_categorisation ~ cultural_group, data = bengaliData, family = "quasibinomial", na.action = na.exclude)) anova(nullModelHOLqb,cultureModelHOLqb, test = "F") #try age and sex, neither sig summary(ageModelHOLqb <- glm(holistic_categorisation ~ cultural_group + sex + age, data = bengaliData, family = "quasibinomial", na.action = na.exclude)) #years education and occupation: occupation is almost significant (graduates are less holistic) summary(eduoccModelHOLqb <- glm(holistic_categorisation ~ cultural_group + years_education + occupation, data = bengaliData, family = "quasibinomial", na.action = na.exclude)) #just occupation: again not significant. Interaction also not significant summary(eduoccModelHOLqb <- glm(holistic_categorisation ~ cultural_group + occupation, data = bengaliData, family = "quasibinomial", na.action = na.exclude)) #model comparison, occupation does not improve model fit, either linear or in interaction summary(cultureModelHOL_occ <- glm(holistic_categorisation ~ cultural_group, data = bengaliData, family = "quasibinomial", subset=(occupation != "NA"))) summary(occcultureModelHOL_occ <- glm(holistic_categorisation ~ cultural_group + occupation, data = bengaliData, family = "quasibinomial", subset=(occupation != "NA"))) summary(occBYcultureModelHOL_occ <- glm(holistic_categorisation ~ cultural_group * occupation, data = bengaliData, family = "quasibinomial", subset=(occupation != "NA"))) anova(cultureModelHOL_occ, occcultureModelHOL_occ, occBYcultureModelHOL_occ, test = "F") #graph of occupation: lower in graduates but big error bar ggplot(bengaliData[!is.na(bengaliData$occupation), ], aes(occupation, holistic_categorisation)) + stat_summary(fun.y = mean, geom = "line", aes(group=1),fill = "Black", color = "Black", size = 1) + stat_summary(fun.y = mean, geom = "point", size = 4) + coord_cartesian(ylim = c(0, 1)) + labs(x = "occupation", y = "holistic_categorisation") + stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width = 0.2, size = 1) + theme(axis.text.x = element_text(color = "Black", size = 16), axis.text.y = element_text(color = "Black", size = 16), axis.title.y = element_text(size = 20, vjust = 1.3), axis.title.x = element_text(size = 20, vjust = 0.3), panel.background = element_rect(fill = "white"), axis.line = element_line(size = 1), panel.grid.minor = element_blank(), axis.ticks = element_line(size = 1, color = "black")) #occupation * culture: no effect in 1st gen, big effect in non-migrant ggplot(bengaliData[!is.na(bengaliData$occupation), ], aes(occupation, holistic_categorisation, color=cultural_group)) + stat_summary(fun.y = mean, geom = "line", aes(group=cultural_group),fill = "Black", size = 1) + stat_summary(fun.y = mean, geom = "point", size = 4) + coord_cartesian(ylim = c(0, 1)) + labs(x = "occupation", y = "holistic_categorisation") + stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width = 0.2, size = 1) + theme(axis.text.x = element_text(color = "Black", size = 16), axis.text.y = element_text(color = "Black", size = 16), axis.title.y = element_text(size = 20, vjust = 1.3), axis.title.x = element_text(size = 20, vjust = 0.3), panel.background = element_rect(fill = "white"), axis.line = element_line(size = 1), panel.grid.minor = element_blank(), axis.ticks = element_line(size = 1, color = "black")) #all media variables: TV marginally sig (more TV watching = less holistic) summary(mediaModelHOLqb <- glm(holistic_categorisation ~ cultural_group + occupation + media_internet + media_tv + media_print, data = bengaliData, family = "quasibinomial", na.action = na.exclude)) #remove print and internet: TV sig (more TV watching = less holistic) summary(mediaModelHOLqb <- glm(holistic_categorisation ~ cultural_group + occupation + media_tv, data = bengaliData, family = "quasibinomial", na.action = na.exclude)) #plot TV ggplot(bengaliData, aes(media_tv, holistic_categorisation)) + geom_point(size = 2.5) + geom_smooth(method=glm, alpha = 0.0, size=1.4) + scale_y_continuous(limits = c(0,1), breaks = seq(0,1,by=0.1)) + scale_x_continuous(breaks = seq(0,100,by=5)) + labs(x = "media_tv", y = "holistic_categorisation") + theme(axis.text.x = element_text(family="Times",color = "Black", size = 18), axis.text.y = element_text(family="Times",color = "Black", size = 18), axis.title.y = element_text(family="Times",size = 26, vjust = 1.5), axis.title.x = element_text(family="Times",size = 26, vjust = 0.0), strip.text = element_text(family="Times",size = 18), panel.background = element_rect(fill = "white"), axis.line = element_line(size = 1), panel.grid.minor = element_blank(), legend.title = element_blank(), legend.text = element_text(family="Times",size = 18), legend.key.width = unit(1, 'cm'), legend.background = element_rect(color = "black", size = 0.7), legend.justification = c(1,0), legend.position = c(1,0.62), legend.key = element_blank(), axis.ticks = element_line(size = 1, color = "black")) + guides(colour = guide_legend(override.aes = list(size=1.5))) + scale_colour_manual(values=c("indianred3", "darkolivegreen4", "Orange", "royalblue")) + scale_shape_manual(values=c(15,16,17,18)) #plot TV * culture: no interaction apparent ggplot(bengaliData, aes(media_tv, holistic_categorisation, colour=cultural_group)) + geom_point(size = 2.5) + geom_smooth(method=glm, alpha = 0.0, size=1.4) + scale_y_continuous(limits = c(0,1), breaks = seq(0,1,by=0.1)) + scale_x_continuous(breaks = seq(0,100,by=5)) + labs(x = "media_tv", y = "holistic_categorisation") + theme(axis.text.x = element_text(family="Times",color = "Black", size = 18), axis.text.y = element_text(family="Times",color = "Black", size = 18), axis.title.y = element_text(family="Times",size = 26, vjust = 1.5), axis.title.x = element_text(family="Times",size = 26, vjust = 0.0), strip.text = element_text(family="Times",size = 18), panel.background = element_rect(fill = "white"), axis.line = element_line(size = 1), panel.grid.minor = element_blank(), legend.title = element_blank(), legend.text = element_text(family="Times",size = 18), legend.key.width = unit(1, 'cm'), legend.background = element_rect(color = "black", size = 0.7), legend.justification = c(1,0), legend.position = c(1,0.62), legend.key = element_blank(), axis.ticks = element_line(size = 1, color = "black")) + guides(colour = guide_legend(override.aes = list(size=1.5))) + scale_colour_manual(values=c("indianred3", "darkolivegreen4", "Orange", "royalblue")) + scale_shape_manual(values=c(15,16,17,18)) #model comparison, TV improves model fit (but not in interaction) summary(cultureModel <- glm(holistic_categorisation ~ cultural_group, data = bengaliData, family = "quasibinomial", subset=(media_tv != "NA"))) summary(culturemediaModel <- glm(holistic_categorisation ~ cultural_group + media_tv, data = bengaliData, family = "quasibinomial", subset=(media_tv != "NA"))) summary(mediaBYcultureModel <- glm(holistic_categorisation ~ cultural_group * media_tv, data = bengaliData, family = "quasibinomial", subset=(media_tv != "NA"))) anova(cultureModel, culturemediaModel, mediaBYcultureModel, test = "F") #combine TV and occupation, given findings for individualism: both sig at p<0.05, both reduce holistic categorisation summary(mediaocccultureModelHOL <- glm(holistic_categorisation ~ cultural_group + occupation + media_tv, data = bengaliData, family = "quasibinomial", na.action = na.exclude)) #add religiosity; not sig summary(relModelHOLqb <- glm(holistic_categorisation ~ cultural_group + occupation + media_tv + religiosity, data = bengaliData, family = "quasibinomial", na.action = na.exclude)) #add family variables; not sig summary(familyModelHOLqb <- glm(holistic_categorisation ~ cultural_group + occupation + media_tv + family_interaction + family_contact, data = bengaliData, family = "quasibinomial", na.action = na.exclude)) #add languages variables; not sig summary(mediaModelHOLqb <- glm(holistic_categorisation ~ cultural_group + occupation + media_tv + languages + heritage_language, data = bengaliData, family = "quasibinomial", na.action = na.exclude)) #final model------------------------------- # media_tv and occupation are sig predictors of categorisation summary(finalModelHOLqb <- glm(holistic_categorisation ~ cultural_group + media_tv + occupation, data = bengaliData, family = "quasibinomial", na.action = na.exclude)) #posthocs confirm no diff between 1st gen and 2nd gen, and smaller diff than non-migrant vs others summary(posthocs <- glht(finalModelHOLqb, linfct = mcp(cultural_group = "Tukey"))) #model comparison with null summary(nullModel <- glm(holistic_categorisation ~ 1, data = bengaliData, family = "quasibinomial", subset=(media_tv != "NA" & occupation != "NA"))) summary(finalModel <- glm(holistic_categorisation ~ cultural_group + media_tv + occupation, data = bengaliData, family = "quasibinomial", na.action = na.exclude)) anova(nullModel, finalModel, test="F") #get pseudo-Rsquared 1 - finalModel$deviance / finalModel$null.deviance #within 1st gen and 2nd gen groups only------------------------- #apply previously best-fitting model to subset: TV and occupation no longer sig summary(accModelHOLqb <- glm(holistic_categorisation ~ cultural_group + media_tv + occupation, data = bengaliData, family = "quasibinomial", subset=(cultural_group != "Non-migrant"))) #add acculturation vars: neither sig summary(accModelHOLqb <- glm(holistic_categorisation ~ cultural_group + acculturation_UK + acculturation_heritage, data = bengaliData, family = "quasibinomial", subset=(cultural_group != "Non-migrant"))) #conclusion: no effect of acculturation #within 1st gen group only----------------------------- #add age of migration, not sig summary(accModelHOLqb <- glm(holistic_categorisation ~ age_migration, data = bengaliData, family = "quasibinomial", subset=(cultural_group == "1st gen"))) #conclusion: no effect of age of migration #DISPOSITIONAL ATTRIBUTION--------------------------------------- #raw means and sds across groups---------------------------------------- by(bengaliData$dispositional_attribution, bengaliData$cultural_group, summary) by(bengaliData$dispositional_attribution, bengaliData$cultural_group, sd, na.rm = TRUE) #check assumptions / transformations----------------------------- #draw histogram - looks normal ggplot(bengaliData, aes(dispositional_attribution)) + geom_histogram(aes(y = ..density..), colour = "black", fill = "white") + labs(x= "dispositional_attribution", y="Density") #not normally distributed but think it's OK shapiro.test(bengaliData$dispositional_attribution) #levene's test - not significant so assumption of equal variances is met leveneTest(bengaliData$dispositional_attribution, bengaliData$cultural_group, center=median) #culture comparison------------------------------------------ #graphically - looks like lower DIS in 1stgen, but error bars overlap quite a bit ggplot(bengaliData, aes(cultural_group, dispositional_attribution)) + stat_summary(fun.y = mean, geom = "line", aes(group=1),fill = "White", color = "Black", size = 1) + stat_summary(fun.y = mean, geom = "point", size = 4) + coord_cartesian(ylim = c(3,7)) + labs(x = "", y = "Dispositional attribution") + stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width = 0.2, size = 1) + theme(axis.text.x = element_text(color = "Black", size = 14), axis.text.y = element_text(color = "Black", size = 18), axis.title.y = element_text(size = 18, vjust = 1.3), panel.background = element_rect(fill = "white"), axis.line = element_line(size = 1), panel.grid.minor = element_blank(), axis.ticks = element_line(size = 1, color = "black")) #dis & sit combined bengaliDataDISSIT <- bengaliData[, c("cultural_group","dispositional_attribution","situational_attribution")] bengaliDataDISSIT <- melt(bengaliDataDISSIT, id="cultural_group") bengaliDataDISSIT$variable <- car::recode(bengaliDataDISSIT$variable, "'dispositional_attribution'='Dispositional';'situational_attribution'='Situational'") ggplot(bengaliDataDISSIT[!is.na(bengaliData$cultural_group), ], aes(cultural_group, value, colour=variable)) + stat_summary(fun.y = mean, geom = "line", aes(group=variable), size=3) + stat_summary(fun.y = mean, geom = "point", size = 6) + coord_cartesian(ylim = c(3, 6)) + labs(x = "", y = "Mean Likert Score") + stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width = 0.2, size = 2) + theme(axis.text.x = element_text(color = "Black", size = 36), axis.text.y = element_text(color = "Black", size = 40), axis.title.y = element_text(size = 36, vjust = 1.8), panel.background = element_rect(fill = "white"), axis.line = element_line(size = 2), panel.grid.minor = element_blank(), axis.ticks.margin = unit(0.5, 'cm'), axis.ticks = element_line(size = 2, color = "black"), legend.title = element_blank(), legend.text = element_text(size = 36), legend.key.width = unit(2, 'cm'), legend.key.height = unit(1.5, 'cm'), legend.background = element_rect(color = "black", size = 1), legend.justification = c(1,0), legend.key = element_blank() ) + scale_colour_manual(values=c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")) ggplot(bengaliDataDISSIT[!is.na(bengaliData$cultural_group), ], aes(cultural_group, value, linetype=variable)) + stat_summary(fun.y = mean, geom = "line", aes(group=variable), size=1) + stat_summary(fun.y = mean, geom = "point", size = 4) + coord_cartesian(ylim = c(3, 7)) + labs(x = "", y = "Mean Value") + stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width = 0.2, size = 1) + theme(axis.text.x = element_text(color = "Black", size = 14), axis.text.y = element_text(color = "Black", size = 18), axis.title.y = element_text(size = 18, vjust = 1.3), panel.background = element_rect(fill = "white"), axis.line = element_line(size = 1), panel.grid.minor = element_blank(), axis.ticks = element_line(size = 1, color = "black"), legend.title = element_blank(), legend.text = element_text(family="Times",size = 18), legend.key.width = unit(1, 'cm'), legend.background = element_rect(color = "black", size = 0.8), legend.justification = c(1,0), legend.key = element_blank(), legend.position = c(0.65,0.67)) #12-pt font, no legend (add labels in later) ggplot(bengaliDataDISSIT[!is.na(bengaliData$cultural_group), ], aes(cultural_group, value, colour=variable)) + stat_summary(fun.y = mean, geom = "line", aes(group=variable)) + stat_summary(fun.y = mean, geom = "point") + coord_cartesian(ylim = c(3, 6)) + labs(x = "", y = "Mean Score") + stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width = 0.2) + theme(axis.text.x = element_text(color = "Black"), axis.line = element_line(), axis.text.y = element_text(color = "Black"), panel.background = element_rect(fill = "white"), panel.grid.minor = element_blank(), axis.ticks = element_line(color = "black"), legend.position = "none", plot.margin=unit(c(5,0,0,0),"mm")) + scale_colour_manual(values=c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")) ggsave(file="sitdisp.tiff", width = 6.5, height = 6, units = "cm") #null model summary(nullModelDIS <- lm(dispositional_attribution ~ 1, data = bengaliData, na.action = na.exclude)) #culturalgroup: 1st gen sig less DIS than non-migrants. No diff between 2nd gen and non-migrant summary(cultureModelDIS <- lm(dispositional_attribution ~ cultural_group, data = bengaliData, na.action = na.exclude)) anova(nullModelDIS,cultureModelDIS) #posthoc tests show only sig diff is between non-migrant and 1st gen, no sig diff between 1st & 2nd gen summary(posthocs <- glht(cultureModelDIS, linfct = mcp(cultural_group = "Tukey"))) #conf intervals confint(cultureModelDIS) confint(posthocs) #model comparison-------------------------------------- bengaliDataSub <- subset(bengaliData, is.na(dispositional_attribution)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(sex)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(age)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(family_contact)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(family_interaction)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(heritage_language)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(media_tv)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(media_internet)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(media_print)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(religiosity)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(years_education)==F) #257 in total after all the exclusions nrow(bengaliDataSub) #demography (age + sex) null model summary(demographyModel <- lm(dispositional_attribution ~ sex + age, data = bengaliDataSub, na.action = na.exclude)) #model adding parents country of birth summary(parentsModel <- lm(dispositional_attribution ~ sex + age + parents_born, data = bengaliDataSub, na.action = na.exclude)) #model adding P's country of birth summary(birthModel <- lm(dispositional_attribution ~ sex + age + country_born, data = bengaliDataSub, na.action = na.exclude)) #'culture' model adding both parents and P's country of birth summary(cultureModel <- lm(dispositional_attribution ~ sex + age + country_born + parents_born, data = bengaliDataSub, na.action = na.exclude)) #model including all vertical variables (relating to family/parents) summary(verticalModel <- lm(dispositional_attribution ~ sex + age + parents_born + religiosity + family_contact + family_interaction + heritage_language, data = bengaliDataSub, na.action = na.exclude)) #model including all horizontal varialbes (relating to peers/local culture) summary(horizontalModel <- lm(dispositional_attribution ~ sex + age + country_born + media_print + media_tv + media_internet + years_education, data = bengaliDataSub, na.action = na.exclude)) #global model including all variables summary(globalModel <- lm(dispositional_attribution ~ sex + age + country_born + parents_born + media_print + media_tv + media_internet + years_education + religiosity + family_contact + family_interaction + heritage_language, data = bengaliDataSub, na.action = na.exclude)) #diagnostics of the global model- looks fine plot(globalModel) hist(resid(globalModel)) #model comparison using MUMIn. birth model is best supported, closely followed by culture (modelComparison <- model.sel(demographyModel, parentsModel, birthModel, cultureModel, horizontalModel, verticalModel, globalModel)) #models with delta less than 4: birth and culture. NB culture only has one more predictor, so actually should reject culture (Burnham & Anderson) subset(modelComparison, delta <4) #tidy up output table<-as.data.frame(modelComparison)[14:18] table[,2:3]<- round(table[,2:3],2) table[,4:5]<- round(table[,4:5],3) table #multimodel averaging across all models - suggests age & country born as important summary(model.avg(modelComparison, subset = delta < 4, revised.var = TRUE)) confint(model.avg(modelComparison, subset = delta < 4, revised.var = TRUE)) #exploratory regression models------------------------------------- #null model summary(nullModelDIS <- lm(dispositional_attribution ~ 1, data = bengaliData, na.action = na.exclude)) #culturalgroup: 1st gen sig less DIS than non-migrant. No diff between 2nd gen and non-migrant summary(cultureModelDIS <- lm(dispositional_attribution ~ cultural_group, data = bengaliData, na.action = na.exclude)) anova(nullModelDIS,cultureModelDIS) #add age and sex, age is almost sig (p=0.055) summary(agesexcultureModelDIS <- lm(dispositional_attribution ~ cultural_group + age + sex, data = bengaliData, na.action = na.exclude)) #remove sex, age still marginal summary(agecultureModelDIS <- lm(dispositional_attribution ~ cultural_group + age, data = bengaliData, na.action = na.exclude)) #graph of age by cultural group: strong effect in non-migrant, no effect in BB groups ggplot(bengaliData, aes(age, dispositional_attribution, colour = cultural_group)) + geom_point(size = 2.5, aes(shape = cultural_group)) + geom_smooth(method=glm, alpha = 0.0, size=1.4) + scale_y_continuous(limits = c(4,7), breaks = seq(4,7,by=1)) + scale_x_continuous(breaks = seq(0,100,by=5)) + labs(x = "age", y = "dispositional") + theme(axis.text.x = element_text(family="Times",color = "Black", size = 18), axis.text.y = element_text(family="Times",color = "Black", size = 18), axis.title.y = element_text(family="Times",size = 26, vjust = 1.5), axis.title.x = element_text(family="Times",size = 26, vjust = 0.0), strip.text = element_text(family="Times",size = 18), panel.background = element_rect(fill = "white"), axis.line = element_line(size = 1), panel.grid.minor = element_blank(), legend.title = element_blank(), legend.text = element_text(family="Times",size = 18), legend.key.width = unit(1, 'cm'), legend.background = element_rect(color = "black", size = 0.7), legend.justification = c(1,0), legend.position = c(0.95,0.05), legend.key = element_blank(), axis.ticks = element_line(size = 1, color = "black")) + guides(colour = guide_legend(override.aes = list(size=1.5))) + scale_colour_manual(values=c("indianred3", "darkolivegreen4", "Orange", "royalblue")) + scale_shape_manual(values=c(15,16,17,18)) #culture*age interaction, not significant summary(ageBYcultureModelDIS <- lm(dispositional_attribution ~ cultural_group * age, data = bengaliData, na.action = na.exclude)) #model comparison confirms age doesn't improve model fit in interaction, but leave age in just in case anova(nullModelDIS,cultureModelDIS,agecultureModelDIS,ageBYcultureModelDIS) #graph of just age - doesn't look like a big effect ggplot(bengaliData, aes(age, dispositional_attribution)) + geom_point(size = 2.5) + geom_smooth(method=glm, alpha = 0.0, size=1.4) + scale_y_continuous(limits = c(4,7), breaks = seq(4,7,by=1)) + scale_x_continuous(breaks = seq(0,100,by=5)) + labs(x = "age", y = "dispositional") + theme(axis.text.x = element_text(family="Times",color = "Black", size = 18), axis.text.y = element_text(family="Times",color = "Black", size = 18), axis.title.y = element_text(family="Times",size = 26, vjust = 1.5), axis.title.x = element_text(family="Times",size = 26, vjust = 0.0), strip.text = element_text(family="Times",size = 18), panel.background = element_rect(fill = "white"), axis.line = element_line(size = 1), panel.grid.minor = element_blank(), legend.title = element_blank(), legend.text = element_text(family="Times",size = 18), legend.key.width = unit(1, 'cm'), legend.background = element_rect(color = "black", size = 0.7), legend.justification = c(1,0), legend.position = c(0.95,0.05), legend.key = element_blank(), axis.ticks = element_line(size = 1, color = "black")) + guides(colour = guide_legend(override.aes = list(size=1.5))) + scale_colour_manual(values=c("indianred3", "darkolivegreen4", "Orange", "royalblue")) + scale_shape_manual(values=c(15,16,17,18)) #years education and occupation: occupation is significant: more education = more dispositional summary(eduagecultureModelDIS <- lm(dispositional_attribution ~ cultural_group + age + years_education + occupation, data = bengaliData, na.action = na.exclude)) #remove years_education, occupation still sig summary(occagecultureModelDIS <- lm(dispositional_attribution ~ cultural_group + age + occupation, data = bengaliData, na.action = na.exclude)) #model comparison, occupation does improve model fit summary(occagecultureModelDIS_occ <- lm(dispositional_attribution ~ cultural_group + age + occupation, data = bengaliData, subset=(occupation != "NA"))) summary(agecultureModelDIS_occ <- lm(dispositional_attribution ~ cultural_group + age, data = bengaliData, subset=(occupation != "NA"))) anova(agecultureModelDIS_occ,occagecultureModelDIS_occ) #all media variables: none sig summary(occagecultureModelDIS <- lm(dispositional_attribution ~ cultural_group + age + occupation + media_internet + media_tv + media_print, data = bengaliData, na.action = na.exclude)) #add religiosity; not sig summary(occagecultureModelDIS <- lm(dispositional_attribution ~ cultural_group + age + occupation + religiosity, data = bengaliData, na.action = na.exclude)) #add family variables; not sig but marginal, but none are bigger effect than culture/age/occ so keep out summary(occagecultureModelDIS <- lm(dispositional_attribution ~ cultural_group + age + occupation + family_contact + family_interaction, data = bengaliData, na.action = na.exclude)) #add language; not sig summary(occagecultureModelDIS <- lm(dispositional_attribution ~ cultural_group + age + occupation + languages + heritage_language, data = bengaliData, na.action = na.exclude)) #final model---------------------------- # culture (1st gen less disp than non-migrant, but ns), age (older=more disp) and occupation (higher SES=more disp) summary(finalModelDIS <- lm(dispositional_attribution ~ cultural_group + age + occupation, data = bengaliData, na.action = na.exclude)) #posthoc tests show no sig diffs summary(posthocs <- glht(finalModelDIS, linfct = mcp(cultural_group = "Tukey"))) #within 1st gen and 2nd gen groups only------------------------------ #apply previously best-fitting model to subset: age & occupation no longer sig summary(occagecultureModelDISImm <- lm(dispositional_attribution ~ cultural_group + age + occupation, data = bengaliData, subset=(cultural_group != "Non-migrant"))) #add acculturation vars: not sig summary(accoccagecultureModelDISImm <- lm(dispositional_attribution ~ cultural_group + age + occupation + acculturation_UK + acculturation_heritage, data = bengaliData, subset=(cultural_group != "Non-migrant"))) #conclusion: no effect of acculturation #within 1st gen group only-------------------------------------------- #now can't do culture, age & occ not sig now summary(accoccagecultureModelDIS1st <- lm(dispositional_attribution ~ age + occupation, data = bengaliData, subset=(cultural_group == "1st gen"))) #add age of migration, not sig summary(migoccagecultureModelDIS1st <- lm(dispositional_attribution ~ age + occupation + age_migration, data = bengaliData, subset=(cultural_group == "1st gen"))) #conclusion: no effect of age of migration #SITUATIONAL ATTRIBUTION----------------------------------------- #raw means and sds across groups---------------------------------------- by(bengaliData$situational_attribution, bengaliData$cultural_group, summary) by(bengaliData$situational_attribution, bengaliData$cultural_group, sd, na.rm = TRUE) #check assumptions / transformations----------------------------- #draw histogram - looks normal ggplot(bengaliData, aes(situational_attribution)) + geom_histogram(aes(y = ..density..), colour = "black", fill = "white") + labs(x= "situational_attribution", y="Density") #not normally distributed but think it's OK shapiro.test(bengaliData$situational_attribution) #levene's test - not significant so assumption of equal variances is met leveneTest(bengaliData$situational_attribution, bengaliData$cultural_group, center=median) #culture comparison------------------------------------- #graphically - looks like lower SIT in non-migrant, then 2nd gen, then 1st gen ggplot(bengaliData, aes(cultural_group, situational_attribution)) + stat_summary(fun.y = mean, geom = "line", aes(group=1),fill = "White", color = "Black", size = 1) + stat_summary(fun.y = mean, geom = "point", size = 4) + coord_cartesian(ylim = c(3,6)) + labs(x = "", y = "Situational attribution") + stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width = 0.2, size = 1) + theme(axis.text.x = element_text(color = "Black", size = 14), axis.text.y = element_text(color = "Black", size = 18), axis.title.y = element_text(size = 18, vjust = 1.3), panel.background = element_rect(fill = "white"), axis.line = element_line(size = 1), panel.grid.minor = element_blank(), axis.ticks = element_line(size = 1, color = "black")) #culture-only regressions #null model summary(nullModelSIT <- lm(situational_attribution ~ 1, data = bengaliData, na.action = na.exclude)) #culturalgroup: both 1st gen and 2nd gen sig more SIT than non-migrant summary(cultureModelSIT <- lm(situational_attribution ~ cultural_group, data = bengaliData, na.action = na.exclude)) anova(nullModelSIT,cultureModelSIT) #posthoc tests show no sig diff between 1st gen and 2nd gen summary(posthocs <- glht(cultureModelSIT, linfct = mcp(cultural_group = "Tukey"))) #conf intervals confint(cultureModelSIT) confint(posthocs) #model comparison-------------------------------- bengaliDataSub <- subset(bengaliData, is.na(situational_attribution)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(sex)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(age)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(family_contact)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(family_interaction)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(heritage_language)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(media_tv)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(media_internet)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(media_print)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(religiosity)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(years_education)==F) #257 in total after all the exclusions nrow(bengaliDataSub) #demography (age + sex) null model summary(demographyModel <- lm(situational_attribution ~ sex + age, data = bengaliDataSub, na.action = na.exclude)) #model adding parents country of birth summary(parentsModel <- lm(situational_attribution ~ sex + age + parents_born, data = bengaliDataSub, na.action = na.exclude)) #model adding P's country of birth summary(birthModel <- lm(situational_attribution ~ sex + age + country_born, data = bengaliDataSub, na.action = na.exclude)) #'culture' model adding both parents and P's country of birth summary(cultureModel <- lm(situational_attribution ~ sex + age + country_born + parents_born, data = bengaliDataSub, na.action = na.exclude)) #model including all vertical variables (relating to family/parents) summary(verticalModel <- lm(situational_attribution ~ sex + age + parents_born + religiosity + family_contact + family_interaction + heritage_language, data = bengaliDataSub, na.action = na.exclude)) #model including all horizontal varialbes (relating to peers/local culture) summary(horizontalModel <- lm(situational_attribution ~ sex + age + country_born + media_print + media_tv + media_internet + years_education, data = bengaliDataSub, na.action = na.exclude)) #global model including all variables summary(globalModel <- lm(situational_attribution ~ sex + age + country_born + parents_born + media_print + media_tv + media_internet + years_education + religiosity + family_contact + family_interaction + heritage_language, data = bengaliDataSub, na.action = na.exclude)) #diagnostics of the global model- looks fine plot(globalModel) hist(resid(globalModel)) #model comparison using MUMIn. culture model is best supported, closely followed by parents (modelComparison <- model.sel(demographyModel, parentsModel, birthModel, cultureModel, horizontalModel, verticalModel, globalModel)) #models with delta less than 4: parents and culture subset(modelComparison, delta <4) #tidy up output table<-as.data.frame(modelComparison)[14:18] table[,2:3]<- round(table[,2:3],2) table[,4:5]<- round(table[,4:5],3) table #multimodel averaging across all models - suggests parents born as important summary(model.avg(modelComparison, subset = delta < 4, revised.var = TRUE)) confint(model.avg(modelComparison, subset = delta < 4, revised.var = TRUE)) #exploratory regression models------------------------------------ #null model summary(nullModelSIT <- lm(situational_attribution ~ 1, data = bengaliData, na.action = na.exclude)) #culturalgroup: both 1st gen and 2nd gen sig more SIT than non-migrant summary(cultureModelSIT <- lm(situational_attribution ~ cultural_group, data = bengaliData, na.action = na.exclude)) anova(nullModelSIT,cultureModelSIT) #add age and sex, not sig summary(agesexcultureModelSIT <- lm(situational_attribution ~ cultural_group + age + sex, data = bengaliData, na.action = na.exclude)) #years education and occupation: neither significant summary(eduagecultureModelSIT <- lm(situational_attribution ~ cultural_group + years_education + occupation, data = bengaliData, na.action = na.exclude)) #all media variables: none sig summary(mediacultureModelSIT <- lm(situational_attribution ~ cultural_group + media_internet + media_tv + media_print, data = bengaliData, na.action = na.exclude)) #add religiosity; not sig summary(relcultureModelSIT <- lm(situational_attribution ~ cultural_group + religiosity, data = bengaliData, na.action = na.exclude)) #add family variables; not sig summary(familycultureModelSIT <- lm(situational_attribution ~ cultural_group + family_contact + family_interaction, data = bengaliData, na.action = na.exclude)) #add languages; sig interaction between languages & culture summary(lancultureModelSIT <- lm(situational_attribution ~ cultural_group + languages + heritage_language, data = bengaliData, na.action = na.exclude)) summary(lanBYcultureModelSIT <- lm(situational_attribution ~ cultural_group * languages + heritage_language, data = bengaliData, na.action = na.exclude)) #remove heritage language summary(lancultureModelSIT <- lm(situational_attribution ~ cultural_group + languages, data = bengaliData, na.action = na.exclude)) summary(lanBYcultureModelSIT <- lm(situational_attribution ~ cultural_group * languages, data = bengaliData, na.action = na.exclude)) #model comparison: interaction only marginally better than additive (p=0.06), but given interaction plot, keep it in summary(cultureModelSITlang <- lm(situational_attribution ~ cultural_group, data = bengaliData, subset=(languages != "NA"))) anova(cultureModelSITlang, lancultureModelSIT, lanBYcultureModelSIT) #plot languages * culture interaction: bengali groups decrease in SIT with languages; UK increase (but max UK is 2...) ggplot(bengaliData, aes(languages, situational_attribution, colour = cultural_group)) + geom_point(size = 2.5, aes(shape = cultural_group)) + geom_smooth(method=glm, alpha = 0.0, size=1.4) + scale_y_continuous(limits = c(1,7), breaks = seq(0,100,by=1)) + scale_x_continuous(breaks = seq(0,100,by=1)) + labs(x = "Languages spoken", y = "Situational attribution") + theme(axis.text.x = element_text(family="Times",color = "Black", size = 18), axis.text.y = element_text(family="Times",color = "Black", size = 18), axis.title.y = element_text(family="Times",size = 26, vjust = 1.5), axis.title.x = element_text(family="Times",size = 26, vjust = 0.0), strip.text = element_text(family="Times",size = 18), panel.background = element_rect(fill = "white"), axis.line = element_line(size = 1), panel.grid.minor = element_blank(), legend.title = element_blank(), legend.text = element_text(family="Times",size = 18), legend.key.width = unit(1, 'cm'), legend.background = element_rect(color = "black", size = 0.7), legend.justification = c(1,0), legend.key = element_blank(), axis.ticks = element_line(size = 1, color = "black")) + guides(colour = guide_legend(override.aes = list(size=1.5))) + scale_colour_manual(values=c("indianred3", "darkolivegreen4", "Orange", "royalblue")) + scale_shape_manual(values=c(15,16,17,18)) #12-pt font, no legend (add labels in later) ggplot(bengaliData, aes(languages, situational_attribution, colour = cultural_group)) + geom_smooth(method=glm, alpha = 0.1) + scale_y_continuous(limits = c(1,7), breaks = seq(1,7,by=1)) + scale_x_continuous(breaks = seq(0,100,by=1)) + labs(x = "Languages spoken", y = "Situational attribution") + theme(axis.text.x = element_text(color = "Black"), axis.line = element_line(), axis.text.y = element_text(color = "Black"), panel.background = element_rect(fill = "white"), panel.grid.minor = element_blank(), axis.ticks = element_line(color = "black"), legend.position = "none", plot.margin=unit(c(5,1,0,0),"mm")) + scale_colour_manual(values=c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")) ggsave(file="sitatt_languages.tiff", width = 6.5, height = 6, units = "cm") #final model---------------------------------------------------- #final model: culture * language interaction. Bengali groups are more SIT than non-migrant. Languages spoken increases SIT for non-migrant group, decreases SIT for Bengali groups summary(finalModelSIT <- lm(situational_attribution ~ cultural_group * languages, data = bengaliData, na.action = na.exclude)) #check whether SIT decreases in non-migrant group - yes it does (p=0.0097) summary(finalModelSIT <- lm(situational_attribution ~ languages, data = bengaliData, subset = (cultural_group == "Non-migrant"))) #within 1st gen and 2nd gen groups only---------------------------- #apply previously best-fitting model to subset: now nothing is sig summary(langBYcultureModelSITImm <- lm(situational_attribution ~ cultural_group * languages, data = bengaliData, subset = (cultural_group != "Non-migrant"))) #languages alone also not sig; culture is marginal summary(langcultureModelSITImm <- lm(situational_attribution ~ cultural_group + languages, data = bengaliData, subset = (cultural_group != "Non-migrant"))) #add acculturation vars: not sig summary(accoccagecultureModelSITImm <- lm(situational_attribution ~ cultural_group + languages + acculturation_UK + acculturation_heritage, data = bengaliData, subset = (cultural_group != "Non-migrant"))) #conclusion: no effect of acculturation #within 1st gen group only--------------------- #now can't do culture, languages not sig summary(langcultureModelSIT1st <- lm(situational_attribution ~ languages, data = bengaliData, subset = (cultural_group == "1st gen"))) #add age of migration, approaching significance p=0.07 summary(miglangModelSIT1st <- lm(situational_attribution ~ languages + age_migration, data = bengaliData, subset = (cultural_group == "1st gen"))) #conclusion: no effect of age of migration #HORIZON RATIO--------------------------------- #raw means and sds across groups---------------------------------------- by(bengaliData$horizon_ratio, bengaliData$cultural_group, summary) by(bengaliData$horizon_ratio, bengaliData$cultural_group, sd, na.rm = TRUE) #check assumptions / transformations------------------------ #draw histogram - big spike at zero ggplot(bengaliData, aes(horizon_ratio)) + geom_histogram(aes(y = ..density..), colour = "black", fill = "white") + labs(x= "horizon_ratio", y="Density") #not normally distributed shapiro.test(bengaliData$horizon_ratio) #levene's test - significant so assumption of equal variances is not quite met leveneTest(bengaliData$horizon_ratio, bengaliData$cultural_group, center=median) #on assumption that horizon of zero and 1 suggests not following the instructions (i.e. not including a horizon at all), remove these Ps bengaliData$horizon_ratioSUB <- bengaliData$horizon_ratio bengaliData$horizon_ratioSUB <- ifelse(bengaliData$horizon_ratioSUB == 1, NA, bengaliData$horizon_ratioSUB) bengaliData$horizon_ratioSUB <- ifelse(bengaliData$horizon_ratioSUB == 0, NA, bengaliData$horizon_ratioSUB) #draw histogram - much better ggplot(bengaliData, aes(horizon_ratioSUB)) + geom_histogram(aes(y = ..density..), colour = "black", fill = "white") + labs(x= "horizon_ratio", y="Density") #not normally distributed, but much closer shapiro.test(bengaliData$horizon_ratioSUB) #levene's test - marginal (p<0.045) so probably fine leveneTest(bengaliData$horizon_ratioSUB, bengaliData$cultural_group, center=median) #redo raw means and sds across groups---------------------------------------- by(bengaliData$horizon_ratioSUB, bengaliData$cultural_group, summary) by(bengaliData$horizon_ratioSUB, bengaliData$cultural_group, sd, na.rm = TRUE) #culture comparison----------------------------------- #graphically - looks like no diffs ggplot(bengaliData, aes(cultural_group, horizon_ratioSUB)) + stat_summary(fun.y = mean, geom = "line", aes(group=1),fill = "White", color = "Black", size = 1) + stat_summary(fun.y = mean, geom = "point", size = 4) + coord_cartesian(ylim = c(0,1)) + labs(x = "", y = "Horizon ratio") + stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width = 0.2, size = 1) + theme(axis.text.x = element_text(color = "Black", size = 14), axis.text.y = element_text(color = "Black", size = 18), axis.title.y = element_text(size = 18, vjust = 1.3), panel.background = element_rect(fill = "white"), axis.line = element_line(size = 1), panel.grid.minor = element_blank(), axis.ticks = element_line(size = 1, color = "black")) #culture-only regressions with subset data excluding 0 and 1 #null model summary(nullModelHOR <- lm(horizon_ratioSUB ~ 1, data = bengaliData, na.action = na.exclude)) #culture model, not sig summary(cultureModelHOR <- lm(horizon_ratioSUB ~ cultural_group, data = bengaliData, na.action = na.exclude)) anova(nullModelHOR,cultureModelHOR) #posthoc tests show no sig diff between 1st gen and 2nd gen summary(posthocs <- glht(cultureModelHOR, linfct = mcp(cultural_group = "Tukey"))) #conf intervals confint(cultureModelHOR) confint(posthocs) #model comparison------------------------------ #from above, in case I forget - need to exclude zeroes to run linear model bengaliData$horizon_ratioSUB <- bengaliData$horizon_ratio bengaliData$horizon_ratioSUB <- ifelse(bengaliData$horizon_ratioSUB == 1, NA, bengaliData$horizon_ratioSUB) bengaliData$horizon_ratioSUB <- ifelse(bengaliData$horizon_ratioSUB == 0, NA, bengaliData$horizon_ratioSUB) bengaliDataSub <- subset(bengaliData, is.na(horizon_ratioSUB)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(sex)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(age)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(family_contact)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(family_interaction)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(heritage_language)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(media_tv)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(media_internet)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(media_print)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(religiosity)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(years_education)==F) #223 in total after all the exclusions nrow(bengaliDataSub) #demography (age + sex) null model summary(demographyModel <- lm(horizon_ratioSUB ~ sex + age, data = bengaliDataSub, na.action = na.exclude)) #model adding parents country of birth summary(parentsModel <- lm(horizon_ratioSUB ~ sex + age + parents_born, data = bengaliDataSub, na.action = na.exclude)) #model adding P's country of birth summary(birthModel <- lm(horizon_ratioSUB ~ sex + age + country_born, data = bengaliDataSub, na.action = na.exclude)) #'culture' model adding both parents and P's country of birth summary(cultureModel <- lm(horizon_ratioSUB ~ sex + age + country_born + parents_born, data = bengaliDataSub, na.action = na.exclude)) #model including all vertical variables (relating to family/parents) summary(verticalModel <- lm(horizon_ratioSUB ~ sex + age + parents_born + religiosity + family_contact + family_interaction + heritage_language, data = bengaliDataSub, na.action = na.exclude)) #model including all horizontal varialbes (relating to peers/local culture) summary(horizontalModel <- lm(horizon_ratioSUB ~ sex + age + country_born + media_print + media_tv + media_internet + years_education, data = bengaliDataSub, na.action = na.exclude)) #global model including all variables summary(globalModel <- lm(horizon_ratioSUB ~ sex + age + country_born + parents_born + media_print + media_tv + media_internet + years_education + religiosity + family_contact + family_interaction + heritage_language, data = bengaliDataSub, na.action = na.exclude)) #POOR GLOBAL MODEL FIT, DO NOT CONTINUE #exploratory regression models---------------------------- #null model summary(nullModelHOR <- lm(horizon_ratioSUB ~ 1, data = bengaliData, na.action = na.exclude)) #culture model, not sig summary(cultureModelHOR <- lm(horizon_ratioSUB ~ cultural_group, data = bengaliData, na.action = na.exclude)) anova(nullModelHOR,cultureModelHOR) #try age and sex, not sig summary(culturesexageModelHOR <- lm(horizon_ratioSUB ~ cultural_group + sex + age, data = bengaliData, na.action = na.exclude)) #years education and occupation: not sig summary(eduoccModelHOR <- lm(horizon_ratioSUB ~ years_education + occupation, data = bengaliData, na.action = na.exclude)) #all media variables: none sig summary(mediaModelHOR <- lm(horizon_ratioSUB ~ media_internet + media_tv + media_print, data = bengaliData, na.action = na.exclude)) #add religiosity; not sig summary(religionModelHOR <- lm(horizon_ratioSUB ~ religiosity, data = bengaliData, na.action = na.exclude)) #add family variables; not sig summary(familyModelHOR <- lm(horizon_ratioSUB ~ family_contact + family_interaction, data = bengaliData, na.action = na.exclude)) #add language; not sig summary(languageModelHOR <- lm(horizon_ratioSUB ~ languages + heritage_language, data = bengaliData, na.action = na.exclude)) #final model------------------------- # nothing significant. Nothing predicts horizon height #within 1st gen and 2nd gen groups only---------------------------- #culture, age & sex still not sig summary(cultureModelHORImm <- lm(horizon_ratioSUB ~ cultural_group + age + sex, data = bengaliData, subset = (cultural_group != "Non-migrant"))) #add acculturation vars: neither are sig summary(accModelHORImm <- lm(horizon_ratioSUB ~ acculturation_UK + acculturation_heritage, data = bengaliData, subset = (cultural_group != "Non-migrant"))) #within 1st gen group only-------------------------- #still no sex or age summary(sexageModelHOR1st <- lm(horizon_ratioSUB ~ sex + age, data = bengaliData, subset = (cultural_group == "1st gen"))) #add age of migration, sig but given lack of other effects, don't buy it summary(agemigModelHOR1st <- lm(horizon_ratioSUB ~ age_migration, data = bengaliData, subset = (cultural_group == "1st gen"))) # ADDITIONAL OBJECTS----------------------------------------------- #raw means and sds across groups---------------------------------------- by(bengaliData$additional_objects, bengaliData$cultural_group, summary) by(bengaliData$additional_objects, bengaliData$cultural_group, sd, na.rm = TRUE) #check assumptions / transformations------------------------ #draw histogram - not normal, bunched up at low values ggplot(bengaliData, aes(additional_objects)) + geom_histogram(aes(y = ..density..), colour = "black", fill = "white") + labs(x= "additional_objects", y="Density") #not normally distributed shapiro.test(bengaliData$additional_objects) #levene's test - not significant so assumption of equal variances is met leveneTest(bengaliData$additional_objects, bengaliData$cultural_group, center=median) #culture comparison-------------------------------- #graphically - big error bars but look lower in 2nd gen than others ggplot(bengaliData, aes(cultural_group, additional_objects)) + stat_summary(fun.y = mean, geom = "line", aes(group=1),fill = "White", color = "Black", size = 1) + stat_summary(fun.y = mean, geom = "point", size = 4) + coord_cartesian(ylim = c(0,25)) + labs(x = "", y = "additional_objects") + stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width = 0.2, size = 1) + theme(axis.text.x = element_text(color = "Black", size = 24), axis.text.y = element_text(color = "Black", size = 24), axis.title.y = element_text(size = 24, vjust = 1.3), panel.background = element_rect(fill = "white"), axis.line = element_line(size = 1), panel.grid.minor = element_blank(), axis.ticks = element_line(size = 1, color = "black")) #culture-only regressions bengaliDataOBJ <- subset(bengaliData, is.na(additional_objects)==F) #check poisson assumption that mean = variance - no, mean is 14, var is 395 mean(bengaliDataOBJ$additional_objects) var(bengaliDataOBJ$additional_objects) describe(bengaliDataOBJ$additional_objects) #from histogram, there are a few outliers above 90 objects that don't fit the others - remove these bengaliDataOBJ$additional_objectsSUB <- bengaliDataOBJ$additional_objects bengaliDataOBJ$additional_objectsSUB <- ifelse(bengaliDataOBJ$additional_objectsSUB >90, NA, bengaliDataOBJ$additional_objectsSUB) #draw histogram again- same distribution but no extreme values ggplot(bengaliDataOBJ, aes(additional_objectsSUB)) + geom_histogram(aes(y = ..density..), colour = "black", fill = "white") + labs(x= "additional_objects", y="Density") #also smaller error bars - and even less diff between cultural groups ggplot(bengaliDataOBJ, aes(cultural_group, additional_objectsSUB)) + stat_summary(fun.y = mean, geom = "line", aes(group=1),fill = "White", color = "Black", size = 1) + stat_summary(fun.y = mean, geom = "point", size = 4) + coord_cartesian(ylim = c(0,25)) + labs(x = "", y = "Additional objects") + stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width = 0.2, size = 1) + theme(axis.text.x = element_text(color = "Black", size = 14), axis.text.y = element_text(color = "Black", size = 18), axis.title.y = element_text(size = 18, vjust = 1.3), panel.background = element_rect(fill = "white"), axis.line = element_line(size = 1), panel.grid.minor = element_blank(), axis.ticks = element_line(size = 1, color = "black")) #BUT, mean still != variance - now, mean is 12, var is 201 - still no good for poisson mean(bengaliDataOBJ$additional_objectsSUB, na.rm = TRUE) var(bengaliDataOBJ$additional_objectsSUB, na.rm = TRUE) ##number of removed outliers for methods 5 describe(bengaliDataOBJ$additional_objects) describe(bengaliDataOBJ$additional_objectsSUB) #also, null model with poisson distribution shows poor fit, v. high res dev summary(nullModelOBJpoi <- glm(additional_objectsSUB ~ 1, data = bengaliDataOBJ, family = "poisson", na.action = na.exclude)) with(nullModelOBJpoi, cbind(res.deviance = deviance, df = df.residual, p = pchisq(deviance, df.residual, lower.tail=FALSE))) #quasipoisson is also poor fit summary(nullModelOBJqpoi <- glm(additional_objectsSUB ~ 1, data = bengaliDataOBJ, family = "quasipoisson", na.action = na.exclude)) with(nullModelOBJqpoi, cbind(res.deviance = deviance, df = df.residual, p = pchisq(deviance, df.residual, lower.tail=FALSE))) #try negative binomial, designed for count data when var is bigger than mean. Much better summary(nullModelOBJnb <- glm.nb(additional_objectsSUB ~ 1, data = bengaliDataOBJ, na.action = na.exclude)) with(nullModelOBJnb, cbind(res.deviance = deviance, df = df.residual, p = pchisq(deviance, df.residual, lower.tail=FALSE))) #add culture, not sig summary(cultureModelOBJnb <- glm.nb(additional_objectsSUB ~ cultural_group, data = bengaliDataOBJ, na.action = na.exclude)) anova(nullModelOBJnb, cultureModelOBJnb) #posthoc tests show no sig diffs between any groups, although WB and 1st gen is largest summary(posthocs <- glht(cultureModelOBJnb, linfct = mcp(cultural_group = "Tukey"))) #conf intervals confint(cultureModelOBJnb) confint(posthocs) #model comparison------------------------------------------- #run commands above to get bengaliDataOBJ (exlcude outliers and NA values) bengaliDataSub <- subset(bengaliDataOBJ, is.na(sex)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(age)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(family_contact)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(family_interaction)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(heritage_language)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(media_tv)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(media_internet)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(media_print)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(religiosity)==F) bengaliDataSub <- subset(bengaliDataSub, is.na(years_education)==F) #256 in total after all the exclusions nrow(bengaliDataSub) #demography (age + sex) null model summary(demographyModel <- glm.nb(additional_objectsSUB ~ sex + age, data = bengaliDataSub, na.action = na.exclude)) #model adding parents country of birth summary(parentsModel <- glm.nb(additional_objectsSUB ~ sex + age + parents_born, data = bengaliDataSub, na.action = na.exclude)) #model adding P's country of birth summary(birthModel <- glm.nb(additional_objectsSUB ~ sex + age + country_born, data = bengaliDataSub, na.action = na.exclude)) #'culture' model adding both parents and P's country of birth summary(cultureModel <- glm.nb(additional_objectsSUB ~ sex + age + country_born + parents_born, data = bengaliDataSub, na.action = na.exclude)) #model including all vertical variables (relating to family/parents) summary(verticalModel <- glm.nb(additional_objectsSUB ~ sex + age + parents_born + religiosity + family_contact + family_interaction + heritage_language, data = bengaliDataSub, na.action = na.exclude)) #model including all horizontal variables (relating to peers/local culture) summary(horizontalModel <- glm.nb(additional_objectsSUB ~ sex + age + country_born + media_print + media_tv + media_internet + years_education, data = bengaliDataSub, na.action = na.exclude)) #global model including all variables summary(globalModel <- glm.nb(additional_objectsSUB ~ sex + age + country_born + parents_born + media_print + media_tv + media_internet + years_education + religiosity + family_contact + family_interaction + heritage_language, data = bengaliDataSub, na.action = na.exclude)) #check that global fits better than null: no it doesn't, so no further model comparison is possible summary(nullModel <- glm.nb(additional_objectsSUB ~ 1, data = bengaliDataSub, na.action = na.exclude)) anova(nullModel, globalModel) #exploratory regression models------------------------------------- #null model summary(nullModelOBJnb <- glm.nb(additional_objectsSUB ~ 1, data = bengaliDataOBJ, na.action = na.exclude)) #add culture, not sig summary(cultureModelOBJnb <- glm.nb(additional_objectsSUB ~ cultural_group, data = bengaliDataOBJ, na.action = na.exclude)) anova(nullModelOBJnb, cultureModelOBJnb) #add age and sex - age is sig, sex not summary(agesexcultureModelOBJnb <- glm.nb(additional_objectsSUB ~ cultural_group + sex + age, data = bengaliDataOBJ, na.action = na.exclude)) # age alone is sig summary(agecultureModelOBJnb <- glm.nb(additional_objectsSUB ~ cultural_group + age, data = bengaliDataOBJ, na.action = na.exclude)) #but model comparison shows no increase in fit anova(nullModelOBJnb,agecultureModelOBJnb) #seems to be a culture * age interaction summary(ageBYcultureModelOBJnb <- glm.nb(additional_objectsSUB ~ cultural_group * age, data = bengaliDataOBJ, na.action = na.exclude)) #plot culture*age. Objects declines with age in non-migrant and 1st gen groups, increases in 2nd gen group. Although may be an artifact of fewer old people in 2nd gen group. ggplot(bengaliDataOBJ, aes(age, additional_objectsSUB, colour = cultural_group, linetype=cultural_group)) + geom_point(size = 2.5, aes(shape = cultural_group),alpha=0.2) + geom_smooth(method=glm, alpha = 0.0, size=1.4) + scale_y_continuous(limits = c(0,100), breaks = seq(0,100,by=10)) + scale_x_continuous(breaks = seq(0,100,by=5)) + labs(x = "age", y = "Additional objects") + theme(axis.text.x = element_text(family="Times",color = "Black", size = 18), axis.text.y = element_text(family="Times",color = "Black", size = 18), axis.title.y = element_text(family="Times",size = 26, vjust = 1.5), axis.title.x = element_text(family="Times",size = 26, vjust = 0.0), strip.text = element_text(family="Times",size = 18), panel.background = element_rect(fill = "white"), axis.line = element_line(size = 1), panel.grid.minor = element_blank(), legend.title = element_blank(), legend.text = element_text(family="Times",size = 18), legend.key.width = unit(1, 'cm'), legend.background = element_rect(color = "black", size = 0.7), legend.justification = c(1,0), legend.key = element_blank(), axis.ticks = element_line(size = 1, color = "black")) + guides(colour = guide_legend(override.aes = list(size=1.5))) + scale_colour_manual(values=c("indianred3", "darkolivegreen4", "Orange", "royalblue")) + scale_shape_manual(values=c(15,16,17,18)) #model comparison - adding interaction doesn't improve model fit significantly (p<0.08). Given possible artifact, don't keep interaction. bengaliDataOBJage <- subset(bengaliDataOBJ, is.na(age)==F) bengaliDataOBJage <- subset(bengaliDataOBJage, is.na(cultural_group)==F) bengaliDataOBJage <- subset(bengaliDataOBJage, is.na(additional_objectsSUB)==F) summary(agecultureModelOBJnb <- glm.nb(additional_objectsSUB ~ cultural_group + age, data = bengaliDataOBJage, na.action = na.exclude)) summary(ageBYcultureModelOBJnb <- glm.nb(additional_objectsSUB ~ cultural_group * age, data = bengaliDataOBJage, na.action = na.exclude)) anova(agecultureModelOBJnb,ageBYcultureModelOBJnb) #plot age - very weak effect ggplot(bengaliDataOBJ, aes(age, additional_objectsSUB)) + geom_point(size = 2.5) + geom_smooth(method=glm, alpha = 0.0, size=1.4) + scale_y_continuous(limits = c(0,100), breaks = seq(0,100,by=10)) + scale_x_continuous(breaks = seq(0,100,by=5)) + labs(x = "age", y = "Additional objects") + theme(axis.text.x = element_text(family="Times",color = "Black", size = 18), axis.text.y = element_text(family="Times",color = "Black", size = 18), axis.title.y = element_text(family="Times",size = 26, vjust = 1.5), axis.title.x = element_text(family="Times",size = 26, vjust = 0.0), strip.text = element_text(family="Times",size = 18), panel.background = element_rect(fill = "white"), axis.line = element_line(size = 1), panel.grid.minor = element_blank(), legend.title = element_blank(), legend.text = element_text(family="Times",size = 18), legend.key.width = unit(1, 'cm'), legend.background = element_rect(color = "black", size = 0.7), legend.justification = c(1,0), legend.position = c(0.95,0.55), legend.key = element_blank(), axis.ticks = element_line(size = 1, color = "black")) + guides(colour = guide_legend(override.aes = list(size=1.5))) + scale_colour_manual(values=c("indianred3", "darkolivegreen4", "Orange", "royalblue")) + scale_shape_manual(values=c(15,16,17,18)) #years education and occupation: occupation is marginally sig summary(occeducultureModelOBJnb <- glm.nb(additional_objectsSUB ~ cultural_group + age + years_education + occupation, data = bengaliDataOBJ, na.action = na.exclude)) #model comparison - not significant bengaliDataOBJocc <- subset(bengaliDataOBJage, is.na(occupation)==F) summary(agecultureModelOBJnb <- glm.nb(additional_objectsSUB ~ cultural_group + age, data = bengaliDataOBJocc, na.action = na.exclude)) summary(occagecultureModelOBJnb <- glm.nb(additional_objectsSUB ~ cultural_group + age + occupation, data = bengaliDataOBJocc, na.action = na.exclude)) anova(agecultureModelOBJnb,occagecultureModelOBJnb) #all media variables: none sig summary(mediaagecultureModelOBJnb <- glm.nb(additional_objectsSUB ~ cultural_group + age + media_print + media_internet + media_tv, data = bengaliDataOBJ, na.action = na.exclude)) #add religiosity; not sig summary(relagecultureModelOBJnb <- glm.nb(additional_objectsSUB ~ cultural_group + age + religiosity, data = bengaliDataOBJ, na.action = na.exclude)) #add family variables; not sig summary(relagecultureModelOBJnb <- glm.nb(additional_objectsSUB ~ cultural_group + age + family_interaction + family_contact, data = bengaliDataOBJ, na.action = na.exclude)) #add language; not sig summary(langagecultureModelOBJnb <- glm.nb(additional_objectsSUB ~ cultural_group + age + heritage_language + languages, data = bengaliDataOBJ, na.action = na.exclude)) #final model---------------------------------------- # no significant predictors #within 1st gen and 2nd gen groups only--------------------- bengaliDataOBJImm <- subset(bengaliDataOBJ, cultural_group!="Non migrant") #apply previously best-fitting model to subset: as expected, age is sig alone summary(langagecultureModelOBJnb <- glm.nb(additional_objectsSUB ~ cultural_group + age, data = bengaliDataOBJImm, na.action = na.exclude)) #add acculturation vars: no effect of either summary(acclangagecultureModelOBJnb <- glm.nb(additional_objectsSUB ~ cultural_group + age + acculturation_UK + acculturation_heritage, data = bengaliDataOBJImm, na.action = na.exclude)) #within 1st gen group only------------------------------- bengaliDataOBJ1stgen <- subset(bengaliDataOBJ, cultural_group=="1st gen") #effect of language as before summary(langageModelOBJnb <- glm.nb(additional_objectsSUB ~ age, data = bengaliDataOBJImm, na.action = na.exclude)) #add age of migration, not sig summary(miglangageModelOBJnb <- glm.nb(additional_objectsSUB ~ age + age_migration, data = bengaliDataOBJImm, na.action = na.exclude)) #conclusion: no effect of age of migration #END OF FILE--------------------------------------------------------------