This Rmarkdown file shows the data processing, statistical analysis, and output. Anybody wishing to replicate and/or discuss the findings of the article can do so with the following data and code.
Loads required packages (installing them if needed).
rm(list=ls()) # clear workspace
# Load packages (installing them if needed)
list.of.packages <- c("rmarkdown","ggplot2", "plyr", "dplyr","reshape2","lme4","ggthemes","Hmisc", "Rmisc", "pastecs", "gtools")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)
sapply(list.of.packages, suppressPackageStartupMessages(require), warn.conflicts=F,quietly=T, character.only=T)
## Warning: package 'ggthemes' was built under R version 3.2.4
##
## Attaching package: 'boot'
## The following object is masked from 'package:survival':
##
## aml
## The following object is masked from 'package:lattice':
##
## melanoma
## rmarkdown ggplot2 plyr dplyr reshape2 lme4 ggthemes
## TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## Hmisc Rmisc pastecs gtools
## TRUE TRUE TRUE TRUE
Version information for R base and packages used:
R.version.string
## [1] "R version 3.2.3 (2015-12-10)"
versions <- lapply(list.of.packages,packageVersion)
names(versions) <- list.of.packages
as.data.frame(versions)
## rmarkdown ggplot2 plyr dplyr reshape2 lme4 ggthemes Hmisc Rmisc
## 1 0.2.68 2.0.0 1.8.3 0.4.3 1.4.1 1.1.10 3.0.2 3.17.1 1.5
## pastecs gtools
## 1 1.3.18 3.5.0
Load the full data file from clickbaitdatatoshare.txt.
data <- read.table("clickbaitdatatoshare.txt", header=TRUE)
# per rater
colMeans(data[,5:19])
## PositiveFramingGL ArousalSocialCurrencyGL ArousalPhrasingGL
## 0.18398876 0.58941948 0.37453184
## WordplayGL QuestionGL PositiveFramingAF
## 0.05571161 0.08239700 0.20973783
## ArousalSocialCurrencyAF ArousalPhrasingAF WordplayAF
## 0.20224719 0.12453184 0.01029963
## QuestionAF PositiveFramingES ArousalSocialCurrencyES
## 0.08239700 0.16619850 0.18679775
## ArousalPhrasingES WordplayES QuestionES
## 0.11189139 0.05758427 0.08239700
# for best of three
colMeans(data[,20:24])
## PositiveFramingBest ArousalSocialCurrencyBest
## 0.18305243 0.26451311
## ArousalPhrasingBest WordplayBest
## 0.15215356 0.03558052
## QuestionBest
## 0.08239700
mean(data$Score)
## [1] 9.916199
aggregate(Score~PositiveFramingBest, data, mean)
## PositiveFramingBest Score
## 1 0 9.064183
## 2 1 13.718670
aggregate(Score~ArousalSocialCurrencyBest, data, mean)
## ArousalSocialCurrencyBest Score
## 1 0 7.403565
## 2 1 16.902655
aggregate(Score~ArousalPhrasingBest, data, mean)
## ArousalPhrasingBest Score
## 1 0 8.618443
## 2 1 17.147692
aggregate(Score~WordplayBest, data, mean)
## WordplayBest Score
## 1 0 9.966505
## 2 1 8.552632
aggregate(Score~QuestionBest, data, mean)
## QuestionBest Score
## 1 0 9.70102
## 2 1 12.31250
scores13 <- subset(data, Year == "2013")
scores14 <- subset(data, Year == "2014")
t.test(scores13$Score,scores14$Score)
##
## Welch Two Sample t-test
##
## data: scores13$Score and scores14$Score
## t = 1.0566, df = 1530.1, p-value = 0.2909
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -1.183791 3.948229
## sample estimates:
## mean of x mean of y
## 10.758084 9.375865
## define custom mean function
temp <- select(data, Title, Score, PositiveFramingBest, ArousalPhrasingBest, WordplayBest, QuestionBest)
melteddf <- melt(temp, id=c("Score","Title"))
melteddf$value <- as.factor(melteddf$value)
plot.mean <- function(x) {
m <- mean(x)
c(y = m, ymin = m, ymax = m)
}
ggplot(melteddf, aes(x=as.numeric(interaction(value, variable)), y=Score, colour=value)) +
geom_jitter(position = position_jitter(width = 0.3, height = 0.3), aes(colour=value), alpha=0.2, size=4.5) +
stat_summary(fun.data="plot.mean", geom="errorbar", colour="black", width=0.5, size=1.5) +
coord_cartesian(ylim=c(0, 40))+
ggtitle("Measures of clickbait and Altmetric score")+
scale_colour_manual(values=c("#D55E00", "#009E73"), labels=c("no", "yes"))+
ylab("Altmetric score")+
xlab("Clickbait measure")+
scale_x_continuous(breaks = c(1.5,3.5,5.5, 7.5), labels = c("Positive Framing", "Phrasing Arousal", "Wordplay", "Question")) +
guides(colour = guide_legend(title = "Present?", override.aes = list(alpha = 1))) +
theme_bw()+
theme(plot.title= element_text( face="bold"),
axis.text = element_text(size=12),
text = element_text(size=14))#,
rcorr(data$Length, data$Score)
## x y
## x 1.00 -0.04
## y -0.04 1.00
##
## n= 2136
##
##
## P
## x y
## x 0.0827
## y 0.0827
coef(lm(Score ~ Length, data = data))
## (Intercept) Length
## 12.7788637 -0.2331345
# intercept = 12.7788637, slope = -0.2331345
ggplot(data, aes(x=Length, y=Score)) +
geom_point() +
geom_abline(intercept = 12.7788637, slope = -0.2331345, size = 1, colour="red") +
ggtitle("Title length and Altmetric Score") +
xlab("Article Title Length") +
ylab("Altmetric Score") +
theme_classic() +
scale_x_continuous(limits=c(0, 30), breaks=seq(0,30,by=5))+
theme(plot.title= element_text( face="bold"),
axis.text = element_text(size=12),
text = element_text(size=14))#,
data$Title <- as.factor(data$Title)
data$ArousalPhrasingBest <- as.factor(as.character(data$ArousalPhrasingBest))
data$PositiveFramingBest <- as.factor(as.character(data$PositiveFramingBest))
data$QuestionBest <- as.factor(as.character(data$QuestionBest))
data$WordplayBest <- as.factor(as.character(data$WordplayBest))
data$ArousalSocialCurrencyBest <- as.factor(as.character(data$ArousalSocialCurrencyBest))
# log transform score
data$ScoreTransformed <- log(data$Score)
model.full <- lm(Score ~ ArousalPhrasingBest +
PositiveFramingBest +
WordplayBest +
ArousalSocialCurrencyBest +
Length,
data=data)
model.fullT <- lm(ScoreTransformed ~ ArousalPhrasingBest +
PositiveFramingBest +
WordplayBest +
ArousalSocialCurrencyBest +
Length,
data=data)
model.first <- lm(ScoreTransformed ~ ArousalSocialCurrencyBest + Length,
data=data)
model.framing <- lm(ScoreTransformed ~ PositiveFramingBest +
ArousalSocialCurrencyBest + Length,
data=data)
model.framingphrasing <- lm(ScoreTransformed ~ PositiveFramingBest +
ArousalPhrasingBest +
ArousalSocialCurrencyBest + Length,
data=data)
model.framingphrasingwordplay <- lm(ScoreTransformed ~ PositiveFramingBest +
ArousalPhrasingBest +
WordplayBest +
ArousalSocialCurrencyBest + Length,
data=data)
model.framingphrasingwordplayquestion <- lm(ScoreTransformed ~ PositiveFramingBest +
ArousalPhrasingBest +
WordplayBest +
QuestionBest +
ArousalSocialCurrencyBest + Length,
data=data)
anova(model.first, model.framing, model.framingphrasing, model.framingphrasingwordplay, model.framingphrasingwordplayquestion)
## Analysis of Variance Table
##
## Model 1: ScoreTransformed ~ ArousalSocialCurrencyBest + Length
## Model 2: ScoreTransformed ~ PositiveFramingBest + ArousalSocialCurrencyBest +
## Length
## Model 3: ScoreTransformed ~ PositiveFramingBest + ArousalPhrasingBest +
## ArousalSocialCurrencyBest + Length
## Model 4: ScoreTransformed ~ PositiveFramingBest + ArousalPhrasingBest +
## WordplayBest + ArousalSocialCurrencyBest + Length
## Model 5: ScoreTransformed ~ PositiveFramingBest + ArousalPhrasingBest +
## WordplayBest + QuestionBest + ArousalSocialCurrencyBest +
## Length
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 2133 2453.7
## 2 2132 2447.7 1 5.968 5.3465 0.02086 *
## 3 2131 2382.1 1 65.609 58.7724 2.672e-14 ***
## 4 2130 2376.7 1 5.445 4.8772 0.02732 *
## 5 2129 2376.6 1 0.045 0.0400 0.84143
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# fit of model continues to improve while adding the al clickbait factors except question
# so full model with no question is good
summary(model.fullT)
##
## Call:
## lm(formula = ScoreTransformed ~ ArousalPhrasingBest + PositiveFramingBest +
## WordplayBest + ArousalSocialCurrencyBest + Length, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.4763 -0.6761 -0.1538 0.5745 4.9411
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.515340 0.066677 22.726 < 2e-16 ***
## ArousalPhrasingBest1 0.565216 0.072109 7.838 7.15e-15 ***
## PositiveFramingBest1 0.152892 0.059842 2.555 0.0107 *
## WordplayBest1 -0.306143 0.138593 -2.209 0.0273 *
## ArousalSocialCurrencyBest1 0.512581 0.052459 9.771 < 2e-16 ***
## Length -0.029217 0.005088 -5.743 1.06e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.056 on 2130 degrees of freedom
## Multiple R-squared: 0.09029, Adjusted R-squared: 0.08816
## F-statistic: 42.28 on 5 and 2130 DF, p-value: < 2.2e-16
# but we need to transform estimates back to normal scale
exp(coef(model.fullT))[1]
## (Intercept)
## 4.550968
# gives intercept as altmetric points (i.e. mean for none of the effects and for mean length)
exp(coef(model.fullT)[2:length(coef(model.fullT))] + coef(model.fullT)[1])
## ArousalPhrasingBest1 PositiveFramingBest1
## 8.008921 5.302783
## WordplayBest1 ArousalSocialCurrencyBest1
## 3.350792 7.598273
## Length
## 4.419926
# gives altmetric points per effect compared to intercept
# actual coefficients:
(exp(coef(model.fullT)[2:length(coef(model.fullT))] + coef(model.fullT)[1])) - exp(coef(model.fullT))[1]
## ArousalPhrasingBest1 PositiveFramingBest1
## 3.4579526 0.7518143
## WordplayBest1 ArousalSocialCurrencyBest1
## -1.2001767 3.0473048
## Length
## -0.1310420
# interaction model
model.interactions <- lm(ScoreTransformed ~ PositiveFramingBest *
ArousalPhrasingBest *
WordplayBest *
ArousalSocialCurrencyBest * Length,
data=data)
summary(model.interactions)
##
## Call:
## lm(formula = ScoreTransformed ~ PositiveFramingBest * ArousalPhrasingBest *
## WordplayBest * ArousalSocialCurrencyBest * Length, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.5087 -0.6546 -0.1529 0.5743 4.9160
##
## Coefficients: (8 not defined because of singularities)
## Estimate
## (Intercept) 1.467918
## PositiveFramingBest1 0.338878
## ArousalPhrasingBest1 0.808871
## WordplayBest1 -0.636365
## ArousalSocialCurrencyBest1 0.326466
## Length -0.024041
## PositiveFramingBest1:ArousalPhrasingBest1 -0.557040
## PositiveFramingBest1:WordplayBest1 1.246547
## ArousalPhrasingBest1:WordplayBest1 NA
## PositiveFramingBest1:ArousalSocialCurrencyBest1 0.844180
## ArousalPhrasingBest1:ArousalSocialCurrencyBest1 -0.029296
## WordplayBest1:ArousalSocialCurrencyBest1 0.688464
## PositiveFramingBest1:Length -0.017452
## ArousalPhrasingBest1:Length -0.029035
## WordplayBest1:Length 0.042444
## ArousalSocialCurrencyBest1:Length 0.010448
## PositiveFramingBest1:ArousalPhrasingBest1:WordplayBest1 NA
## PositiveFramingBest1:ArousalPhrasingBest1:ArousalSocialCurrencyBest1 -0.668655
## PositiveFramingBest1:WordplayBest1:ArousalSocialCurrencyBest1 -3.785558
## ArousalPhrasingBest1:WordplayBest1:ArousalSocialCurrencyBest1 NA
## PositiveFramingBest1:ArousalPhrasingBest1:Length 0.052234
## PositiveFramingBest1:WordplayBest1:Length -0.095325
## ArousalPhrasingBest1:WordplayBest1:Length NA
## PositiveFramingBest1:ArousalSocialCurrencyBest1:Length -0.052848
## ArousalPhrasingBest1:ArousalSocialCurrencyBest1:Length 0.026324
## WordplayBest1:ArousalSocialCurrencyBest1:Length -0.094664
## PositiveFramingBest1:ArousalPhrasingBest1:WordplayBest1:ArousalSocialCurrencyBest1 NA
## PositiveFramingBest1:ArousalPhrasingBest1:WordplayBest1:Length NA
## PositiveFramingBest1:ArousalPhrasingBest1:ArousalSocialCurrencyBest1:Length 0.028011
## PositiveFramingBest1:WordplayBest1:ArousalSocialCurrencyBest1:Length 0.248399
## ArousalPhrasingBest1:WordplayBest1:ArousalSocialCurrencyBest1:Length NA
## PositiveFramingBest1:ArousalPhrasingBest1:WordplayBest1:ArousalSocialCurrencyBest1:Length NA
## Std. Error
## (Intercept) 0.090667
## PositiveFramingBest1 0.232297
## ArousalPhrasingBest1 0.275726
## WordplayBest1 0.555001
## ArousalSocialCurrencyBest1 0.186553
## Length 0.007267
## PositiveFramingBest1:ArousalPhrasingBest1 0.914143
## PositiveFramingBest1:WordplayBest1 1.536790
## ArousalPhrasingBest1:WordplayBest1 NA
## PositiveFramingBest1:ArousalSocialCurrencyBest1 0.442640
## ArousalPhrasingBest1:ArousalSocialCurrencyBest1 0.447460
## WordplayBest1:ArousalSocialCurrencyBest1 0.881172
## PositiveFramingBest1:Length 0.016894
## ArousalPhrasingBest1:Length 0.020508
## WordplayBest1:Length 0.040511
## ArousalSocialCurrencyBest1:Length 0.014217
## PositiveFramingBest1:ArousalPhrasingBest1:WordplayBest1 NA
## PositiveFramingBest1:ArousalPhrasingBest1:ArousalSocialCurrencyBest1 1.262130
## PositiveFramingBest1:WordplayBest1:ArousalSocialCurrencyBest1 3.021820
## ArousalPhrasingBest1:WordplayBest1:ArousalSocialCurrencyBest1 NA
## PositiveFramingBest1:ArousalPhrasingBest1:Length 0.062578
## PositiveFramingBest1:WordplayBest1:Length 0.101059
## ArousalPhrasingBest1:WordplayBest1:Length NA
## PositiveFramingBest1:ArousalSocialCurrencyBest1:Length 0.031325
## ArousalPhrasingBest1:ArousalSocialCurrencyBest1:Length 0.034398
## WordplayBest1:ArousalSocialCurrencyBest1:Length 0.066114
## PositiveFramingBest1:ArousalPhrasingBest1:WordplayBest1:ArousalSocialCurrencyBest1 NA
## PositiveFramingBest1:ArousalPhrasingBest1:WordplayBest1:Length NA
## PositiveFramingBest1:ArousalPhrasingBest1:ArousalSocialCurrencyBest1:Length 0.084712
## PositiveFramingBest1:WordplayBest1:ArousalSocialCurrencyBest1:Length 0.187150
## ArousalPhrasingBest1:WordplayBest1:ArousalSocialCurrencyBest1:Length NA
## PositiveFramingBest1:ArousalPhrasingBest1:WordplayBest1:ArousalSocialCurrencyBest1:Length NA
## t value
## (Intercept) 16.190
## PositiveFramingBest1 1.459
## ArousalPhrasingBest1 2.934
## WordplayBest1 -1.147
## ArousalSocialCurrencyBest1 1.750
## Length -3.309
## PositiveFramingBest1:ArousalPhrasingBest1 -0.609
## PositiveFramingBest1:WordplayBest1 0.811
## ArousalPhrasingBest1:WordplayBest1 NA
## PositiveFramingBest1:ArousalSocialCurrencyBest1 1.907
## ArousalPhrasingBest1:ArousalSocialCurrencyBest1 -0.065
## WordplayBest1:ArousalSocialCurrencyBest1 0.781
## PositiveFramingBest1:Length -1.033
## ArousalPhrasingBest1:Length -1.416
## WordplayBest1:Length 1.048
## ArousalSocialCurrencyBest1:Length 0.735
## PositiveFramingBest1:ArousalPhrasingBest1:WordplayBest1 NA
## PositiveFramingBest1:ArousalPhrasingBest1:ArousalSocialCurrencyBest1 -0.530
## PositiveFramingBest1:WordplayBest1:ArousalSocialCurrencyBest1 -1.253
## ArousalPhrasingBest1:WordplayBest1:ArousalSocialCurrencyBest1 NA
## PositiveFramingBest1:ArousalPhrasingBest1:Length 0.835
## PositiveFramingBest1:WordplayBest1:Length -0.943
## ArousalPhrasingBest1:WordplayBest1:Length NA
## PositiveFramingBest1:ArousalSocialCurrencyBest1:Length -1.687
## ArousalPhrasingBest1:ArousalSocialCurrencyBest1:Length 0.765
## WordplayBest1:ArousalSocialCurrencyBest1:Length -1.432
## PositiveFramingBest1:ArousalPhrasingBest1:WordplayBest1:ArousalSocialCurrencyBest1 NA
## PositiveFramingBest1:ArousalPhrasingBest1:WordplayBest1:Length NA
## PositiveFramingBest1:ArousalPhrasingBest1:ArousalSocialCurrencyBest1:Length 0.331
## PositiveFramingBest1:WordplayBest1:ArousalSocialCurrencyBest1:Length 1.327
## ArousalPhrasingBest1:WordplayBest1:ArousalSocialCurrencyBest1:Length NA
## PositiveFramingBest1:ArousalPhrasingBest1:WordplayBest1:ArousalSocialCurrencyBest1:Length NA
## Pr(>|t|)
## (Intercept) < 2e-16
## PositiveFramingBest1 0.144765
## ArousalPhrasingBest1 0.003387
## WordplayBest1 0.251676
## ArousalSocialCurrencyBest1 0.080265
## Length 0.000954
## PositiveFramingBest1:ArousalPhrasingBest1 0.542353
## PositiveFramingBest1:WordplayBest1 0.417378
## ArousalPhrasingBest1:WordplayBest1 NA
## PositiveFramingBest1:ArousalSocialCurrencyBest1 0.056637
## ArousalPhrasingBest1:ArousalSocialCurrencyBest1 0.947805
## WordplayBest1:ArousalSocialCurrencyBest1 0.434711
## PositiveFramingBest1:Length 0.301710
## ArousalPhrasingBest1:Length 0.156990
## WordplayBest1:Length 0.294886
## ArousalSocialCurrencyBest1:Length 0.462460
## PositiveFramingBest1:ArousalPhrasingBest1:WordplayBest1 NA
## PositiveFramingBest1:ArousalPhrasingBest1:ArousalSocialCurrencyBest1 0.596318
## PositiveFramingBest1:WordplayBest1:ArousalSocialCurrencyBest1 0.210439
## ArousalPhrasingBest1:WordplayBest1:ArousalSocialCurrencyBest1 NA
## PositiveFramingBest1:ArousalPhrasingBest1:Length 0.403973
## PositiveFramingBest1:WordplayBest1:Length 0.345653
## ArousalPhrasingBest1:WordplayBest1:Length NA
## PositiveFramingBest1:ArousalSocialCurrencyBest1:Length 0.091738
## ArousalPhrasingBest1:ArousalSocialCurrencyBest1:Length 0.444199
## WordplayBest1:ArousalSocialCurrencyBest1:Length 0.152339
## PositiveFramingBest1:ArousalPhrasingBest1:WordplayBest1:ArousalSocialCurrencyBest1 NA
## PositiveFramingBest1:ArousalPhrasingBest1:WordplayBest1:Length NA
## PositiveFramingBest1:ArousalPhrasingBest1:ArousalSocialCurrencyBest1:Length 0.740934
## PositiveFramingBest1:WordplayBest1:ArousalSocialCurrencyBest1:Length 0.184563
## ArousalPhrasingBest1:WordplayBest1:ArousalSocialCurrencyBest1:Length NA
## PositiveFramingBest1:ArousalPhrasingBest1:WordplayBest1:ArousalSocialCurrencyBest1:Length NA
##
## (Intercept) ***
## PositiveFramingBest1
## ArousalPhrasingBest1 **
## WordplayBest1
## ArousalSocialCurrencyBest1 .
## Length ***
## PositiveFramingBest1:ArousalPhrasingBest1
## PositiveFramingBest1:WordplayBest1
## ArousalPhrasingBest1:WordplayBest1
## PositiveFramingBest1:ArousalSocialCurrencyBest1 .
## ArousalPhrasingBest1:ArousalSocialCurrencyBest1
## WordplayBest1:ArousalSocialCurrencyBest1
## PositiveFramingBest1:Length
## ArousalPhrasingBest1:Length
## WordplayBest1:Length
## ArousalSocialCurrencyBest1:Length
## PositiveFramingBest1:ArousalPhrasingBest1:WordplayBest1
## PositiveFramingBest1:ArousalPhrasingBest1:ArousalSocialCurrencyBest1
## PositiveFramingBest1:WordplayBest1:ArousalSocialCurrencyBest1
## ArousalPhrasingBest1:WordplayBest1:ArousalSocialCurrencyBest1
## PositiveFramingBest1:ArousalPhrasingBest1:Length
## PositiveFramingBest1:WordplayBest1:Length
## ArousalPhrasingBest1:WordplayBest1:Length
## PositiveFramingBest1:ArousalSocialCurrencyBest1:Length .
## ArousalPhrasingBest1:ArousalSocialCurrencyBest1:Length
## WordplayBest1:ArousalSocialCurrencyBest1:Length
## PositiveFramingBest1:ArousalPhrasingBest1:WordplayBest1:ArousalSocialCurrencyBest1
## PositiveFramingBest1:ArousalPhrasingBest1:WordplayBest1:Length
## PositiveFramingBest1:ArousalPhrasingBest1:ArousalSocialCurrencyBest1:Length
## PositiveFramingBest1:WordplayBest1:ArousalSocialCurrencyBest1:Length
## ArousalPhrasingBest1:WordplayBest1:ArousalSocialCurrencyBest1:Length
## PositiveFramingBest1:ArousalPhrasingBest1:WordplayBest1:ArousalSocialCurrencyBest1:Length
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.056 on 2112 degrees of freedom
## Multiple R-squared: 0.09794, Adjusted R-squared: 0.08812
## F-statistic: 9.97 on 23 and 2112 DF, p-value: < 2.2e-16