# Read in the data
song_list <- read.csv(here("data","songs_top_30.csv"))
song_list <- song_list[1:484,1:8]
# get stop words
data(stop_words)
stop_words <- stop_words %>%
filter(!(word == "little"))
artist_list <- song_list %>% group_by(Artist) %>%
tally() %>% arrange(desc(n)) %>% rename(!!"Songs" := n)
datatable(artist_list,
filter = 'top',
extensions = 'Buttons',
options = list(pageLength = 10,
dom = 'Bfrtip',
buttons = c('copy', 'csv', 'excel', 'pdf', 'print')))
There are 121 different artists with a top 30 song since 2013. Of these artists, only 41 have had 5 or more top 30 songs.
# years
years <- year_list$Entered.Top.30.In.
years <- years[-1]
# words to look for
veh <- c("truck", "pickup", "chevy", "ford", "tacoma",
"chevrolet", "tailgate", "car")
cl <- c("jeans", "cutoffs", "jean", "shirt", "hat",
"shoe", "dress", "boots")
dr <- c("beer", "drink", "wine", "whiskey", "shots",
"tequila", "vodka", "margarita", "cocktail", "sippin",
"alcohol")
bod <- c("eyes", "lips", "hair", "mouth", "hand",
"hands", "feet", "chest", "finger", "ears",
"hips", "legs", "body", "shoulder")
gen <- c("yeah", "night", "tonight", "little", "town",
"eyes", "eye", "drink", "drinkin", "girl",
"boy", "baby", "wanna", "gonna", "road",
"hand", "kiss", "love", "lovin", "time",
"heart", "feel", "feeling")
# Prep df by year
prep_df <- function(df, year){
songs <- as.character(df$Lyrics)[which(df$Entered.Top.30.In. == year)]
text_df <- tibble(
song = as.character(df$Song[which(df$Entered.Top.30.In. == year)]),
text = songs)
return(text_df)
}
# function to find a word
search_word <- function(df, term, year, plural=NULL){
df.y <- prep_df(df, year = year)
counts.temp <- df.y %>% unnest_tokens(word, text) %>%
group_by(song) %>% count(word, sort = TRUE)
counts <- counts.temp %>%
filter(word %in% term)
if(!is.null(plural)){
for(i in 1:nrow(plural)){
counts$word <- stringr::str_replace_all(counts$word, plural[i,1], plural[i,2])
}
}
counts<- counts %>%
tally()
return(counts)
}
named <- "vehicle"
veh.l <- lapply(years, function(x){
search_word(df = song_list, term = veh, year = x)
})
names(veh.l) <- years
vehicle <- do.call(rbind, lapply(veh.l, nrow)) %>%
data.frame(row.names = years) %>%
`colnames<-`(named)
veh.c <- veh.l %>% map_df(I, .id = "year") %>%
as.matrix() %>% data.frame()
datatable(veh.c,
filter = 'top',
extensions = 'Buttons',
options = list(pageLength = 10,
dom = 'Bfrtip',
buttons = c('copy', 'csv', 'excel', 'pdf', 'print')))
veh.d <- lapply(years, function(x){
lapply(veh, function(i){
search_word(df = song_list, term = i, year = x)
}) %>% `names<-`(veh)
})
names(veh.d) <- years
veh.df <- lapply(1:length(years), function(x){
bind_rows(veh.d[[x]], .id = "term") %>%
group_by(term) %>% tally()
})
names(veh.df) <- years
veh.df <- bind_rows(veh.df, .id = "year")
veh.df$percentage <- 0
veh.df$percentage[veh.df$year == 2014] <-
(veh.df$n[veh.df$year == 2014]/vehicle$vehicle[1])*100
veh.df$percentage[veh.df$year == 2015] <-
(veh.df$n[veh.df$year == 2015]/vehicle$vehicle[2])*100
veh.df$percentage[veh.df$year == 2016] <-
(veh.df$n[veh.df$year == 2016]/vehicle$vehicle[3])*100
veh.df$percentage[veh.df$year == 2017] <-
(veh.df$n[veh.df$year == 2017]/vehicle$vehicle[4])*100
veh.df$percentage[veh.df$year == 2018] <-
(veh.df$n[veh.df$year == 2018]/vehicle$vehicle[5])*100
veh.df$percentage[veh.df$year == 2019] <-
(veh.df$n[veh.df$year == 2019]/vehicle$vehicle[6])*100
veh.df$percentage <- round(veh.df$percentage, 2)
datatable(veh.df,
filter = 'top',
extensions = 'Buttons',
options = list(pageLength = 10,
dom = 'Bfrtip',
buttons = c('copy', 'csv', 'excel', 'pdf', 'print')))
named <- "clothes"
plural <- rbind(c("jeans", "jean"))
cl.l <- lapply(years, function(x){
search_word(df = song_list, term = cl, year = x, plural = plural)
})
names(cl.l) <- years
clothes <- do.call(rbind, lapply(cl.l, nrow)) %>%
data.frame(row.names = years) %>%
`colnames<-`(named)
cl.c <- cl.l %>% map_df(I, .id = "year") %>%
as.matrix() %>% data.frame()
datatable(cl.c,
filter = 'top',
extensions = 'Buttons',
options = list(pageLength = 10,
dom = 'Bfrtip',
buttons = c('copy', 'csv', 'excel', 'pdf', 'print')))
#cl <- cl[-c(1,3)]
cl.d <- lapply(years, function(x){
lapply(cl[-c(1,3)], function(i){
search_word(df = song_list, term = i, year = x)
}) %>% `names<-`(cl[-c(1,3)])
})
names(cl.d) <- years
# deal w/ plurals
cl.d.2 <- lapply(years, function(x){
search_word(df = song_list, term = cl[c(1,3)], year = x)
})
names(cl.d.2) <- years
cl.df <- lapply(1:length(years), function(x){
bind_rows(cl.d[[x]], .id = "term") %>%
group_by(term) %>% tally()
})
names(cl.df) <- years
temp <- sapply(cl.d.2, nrow)
cl.df.t <- data.frame(year = names(temp),
term = "jean(s)",
n = temp)
cl.df <- bind_rows(cl.df, .id = "year")
cl.df <- cl.df %>% add_row(cl.df.t)
cl.df$percentage <- 0
cl.df$percentage[cl.df$year == 2014] <-
(cl.df$n[cl.df$year == 2014]/clothes$clothes[1])*100
cl.df$percentage[cl.df$year == 2015] <-
(cl.df$n[cl.df$year == 2015]/clothes$clothes[2])*100
cl.df$percentage[cl.df$year == 2016] <-
(cl.df$n[cl.df$year == 2016]/clothes$clothes[3])*100
cl.df$percentage[cl.df$year == 2017] <-
(cl.df$n[cl.df$year == 2017]/clothes$clothes[4])*100
cl.df$percentage[cl.df$year == 2018] <-
(cl.df$n[cl.df$year == 2018]/clothes$clothes[5])*100
cl.df$percentage[cl.df$year == 2019] <-
(cl.df$n[cl.df$year == 2019]/clothes$clothes[6])*100
cl.df$percentage <- round(cl.df$percentage, 2)
datatable(cl.df,
filter = 'top',
extensions = 'Buttons',
options = list(pageLength = 10,
dom = 'Bfrtip',
buttons = c('copy', 'csv', 'excel', 'pdf', 'print')))
named <- "drinks"
dr.l <- lapply(years, function(x){
search_word(df = song_list, term = dr, year = x)
})
names(dr.l) <- years
drinks <- do.call(rbind, lapply(dr.l, nrow)) %>%
data.frame(row.names = years) %>%
`colnames<-`(named)
dr.c <- dr.l %>% map_df(I, .id = "year") %>%
as.matrix() %>% data.frame()
datatable(dr.c,
filter = 'top',
extensions = 'Buttons',
options = list(pageLength = 10,
dom = 'Bfrtip',
buttons = c('copy', 'csv', 'excel', 'pdf', 'print')))
dr.d <- lapply(years, function(x){
lapply(dr, function(i){
search_word(df = song_list, term = i, year = x)
}) %>% `names<-`(dr)
})
names(dr.d) <- years
dr.df <- lapply(1:length(years), function(x){
bind_rows(dr.d[[x]], .id = "term") %>%
group_by(term) %>% tally()
})
names(dr.df) <- years
dr.df <- bind_rows(dr.df, .id = "year")
dr.df$percentage <- 0
dr.df$percentage[dr.df$year == 2014] <-
(dr.df$n[dr.df$year == 2014]/drinks$drinks[1])*100
dr.df$percentage[dr.df$year == 2015] <-
(dr.df$n[dr.df$year == 2015]/drinks$drinks[2])*100
dr.df$percentage[dr.df$year == 2016] <-
(dr.df$n[dr.df$year == 2016]/drinks$drinks[3])*100
dr.df$percentage[dr.df$year == 2017] <-
(dr.df$n[dr.df$year == 2017]/drinks$drinks[4])*100
dr.df$percentage[dr.df$year == 2018] <-
(dr.df$n[dr.df$year == 2018]/drinks$drinks[5])*100
dr.df$percentage[dr.df$year == 2019] <-
(dr.df$n[dr.df$year == 2019]/drinks$drinks[6])*100
dr.df$percentage <- round(dr.df$percentage, 2)
datatable(dr.df,
filter = 'top',
extensions = 'Buttons',
options = list(pageLength = 10,
dom = 'Bfrtip',
buttons = c('copy', 'csv', 'excel', 'pdf', 'print')))
named <- "body"
plural <- rbind(c("hands", "hand"))
bod.l <- lapply(years, function(x){
search_word(df = song_list, term = bod, year = x, plural = plural)
})
names(bod.l) <- years
body <- do.call(rbind, lapply(bod.l, nrow)) %>%
data.frame(row.names = years) %>%
`colnames<-`(named)
bod.c <- bod.l %>% map_df(I, .id = "year") %>%
as.matrix() %>% data.frame()
datatable(bod.c,
filter = 'top',
extensions = 'Buttons',
options = list(pageLength = 10,
dom = 'Bfrtip',
buttons = c('copy', 'csv', 'excel', 'pdf', 'print')))
bod.d <- lapply(years, function(x){
lapply(bod[-c(5,6)], function(i){
search_word(df = song_list, term = i, year = x)
}) %>% `names<-`(bod[-c(5,6)])
})
names(bod.d) <- years
bod.df <- lapply(1:length(years), function(x){
bind_rows(bod.d[[x]], .id = "term") %>%
group_by(term) %>% tally()
})
names(bod.df) <- years
bod.df <- bind_rows(bod.df, .id = "year")
bod.d.2 <- lapply(years, function(x){
search_word(df = song_list, term = bod[c(5,6)], year = x)
})
names(bod.d.2) <- years
temp <- sapply(bod.d.2, nrow)
bod.df.t <- data.frame(year = names(temp),
term = "hand(s)",
n = temp)
bod.df <- bod.df %>% add_row(bod.df.t)
bod.df$percentage <- 0
bod.df$percentage[bod.df$year == 2014] <-
(bod.df$n[bod.df$year == 2014]/body$body[1])*100
bod.df$percentage[bod.df$year == 2015] <-
(bod.df$n[bod.df$year == 2015]/body$body[2])*100
bod.df$percentage[bod.df$year == 2016] <-
(bod.df$n[bod.df$year == 2016]/body$body[3])*100
bod.df$percentage[bod.df$year == 2017] <-
(bod.df$n[bod.df$year == 2017]/body$body[4])*100
bod.df$percentage[bod.df$year == 2018] <-
(bod.df$n[bod.df$year == 2018]/body$body[5])*100
bod.df$percentage[bod.df$year == 2019] <-
(bod.df$n[bod.df$year == 2019]/body$body[6])*100
bod.df$percentage <- round(bod.df$percentage, 2)
datatable(bod.df,
filter = 'top',
extensions = 'Buttons',
options = list(pageLength = 10,
dom = 'Bfrtip',
buttons = c('copy', 'csv', 'excel', 'pdf', 'print')))
named <- "general"
plural <- rbind(c("eyes", "eye"))
gen.l <- lapply(years, function(x){
search_word(df = song_list, term = gen, year = x)
})
names(gen.l) <- years
general <- do.call(rbind, lapply(gen.l, nrow)) %>%
data.frame(row.names = years) %>%
`colnames<-`(named)
gen.c <- gen.l %>% map_df(I, .id = "year") %>%
as.matrix() %>% data.frame()
datatable(gen.c,
filter = 'top',
extensions = 'Buttons',
options = list(pageLength = 10,
dom = 'Bfrtip',
buttons = c('copy', 'csv', 'excel', 'pdf', 'print')))
gen.d <- lapply(years, function(x){
lapply(gen[-c(6,7)], function(i){
search_word(df = song_list, term = i, year = x)
}) %>% `names<-`(gen[-c(6,7)])
})
names(gen.d) <- years
gen.df <- lapply(1:length(years), function(x){
bind_rows(gen.d[[x]], .id = "term") %>%
group_by(term) %>% tally()
})
names(gen.df) <- years
gen.df <- bind_rows(gen.df, .id = "year")
# deal w/ plurals
gen.d.2 <- lapply(years, function(x){
search_word(df = song_list, term = gen[c(6,7)], year = x)
})
names(gen.d.2) <- years
temp <- sapply(gen.d.2, nrow)
gen.df.t <- data.frame(year = names(temp),
term = "eye(s)",
n = temp)
gen.df <- gen.df %>% add_row(gen.df.t)
gen.df$percentage <- 0
gen.df$percentage[gen.df$year == 2014] <-
(gen.df$n[gen.df$year == 2014]/general$general[1])*100
gen.df$percentage[gen.df$year == 2015] <-
(gen.df$n[gen.df$year == 2015]/general$general[2])*100
gen.df$percentage[gen.df$year == 2016] <-
(gen.df$n[gen.df$year == 2016]/general$general[3])*100
gen.df$percentage[gen.df$year == 2017] <-
(gen.df$n[gen.df$year == 2017]/general$general[4])*100
gen.df$percentage[gen.df$year == 2018] <-
(gen.df$n[gen.df$year == 2018]/general$general[5])*100
gen.df$percentage[gen.df$year == 2019] <-
(gen.df$n[gen.df$year == 2019]/general$general[6])*100
gen.df$percentage <- round(gen.df$percentage, 2)
datatable(gen.df,
filter = 'top',
extensions = 'Buttons',
options = list(pageLength = 10,
dom = 'Bfrtip',
buttons = c('copy', 'csv', 'excel', 'pdf', 'print')))
year <- 2014
tm_2014 <- prep_df(song_list, year = year) %>% unnest_tokens(word, text) %>%
group_by(song) %>% count(word, sort = TRUE) %>%
anti_join(stop_words) %>% group_by(word) %>%
tally() %>% arrange_at("n", desc)
datatable(tm_2014,
filter = 'top',
extensions = 'Buttons',
options = list(pageLength = 10,
dom = 'Bfrtip',
buttons = c('copy', 'csv', 'excel', 'pdf', 'print')))
year <- 2015
tm_2015 <- prep_df(song_list, year = year) %>% unnest_tokens(word, text) %>%
group_by(song) %>% count(word, sort = TRUE) %>%
anti_join(stop_words) %>% group_by(word) %>%
tally() %>% arrange_at("n", desc)
datatable(tm_2015,
filter = 'top',
extensions = 'Buttons',
options = list(pageLength = 10,
dom = 'Bfrtip',
buttons = c('copy', 'csv', 'excel', 'pdf', 'print')))
year <- 2016
tm_2016 <- prep_df(song_list, year = year) %>% unnest_tokens(word, text) %>%
group_by(song) %>% count(word, sort = TRUE) %>%
anti_join(stop_words) %>% group_by(word) %>%
tally() %>% arrange_at("n", desc)
datatable(tm_2016,
filter = 'top',
extensions = 'Buttons',
options = list(pageLength = 10,
dom = 'Bfrtip',
buttons = c('copy', 'csv', 'excel', 'pdf', 'print')))
year <- 2017
tm_2017 <- prep_df(song_list, year = year) %>% unnest_tokens(word, text) %>%
group_by(song) %>% count(word, sort = TRUE) %>%
anti_join(stop_words) %>% group_by(word) %>%
tally() %>% arrange_at("n", desc)
datatable(tm_2017,
filter = 'top',
extensions = 'Buttons',
options = list(pageLength = 10,
dom = 'Bfrtip',
buttons = c('copy', 'csv', 'excel', 'pdf', 'print')))
year <- 2018
tm_2018 <- prep_df(song_list, year = year) %>% unnest_tokens(word, text) %>%
group_by(song) %>% count(word, sort = TRUE) %>%
anti_join(stop_words) %>% group_by(word) %>%
tally() %>% arrange_at("n", desc)
datatable(tm_2018,
filter = 'top',
extensions = 'Buttons',
options = list(pageLength = 10,
dom = 'Bfrtip',
buttons = c('copy', 'csv', 'excel', 'pdf', 'print')))
year <- 2019
tm_2019 <- prep_df(song_list, year = year) %>% unnest_tokens(word, text) %>%
group_by(song) %>% count(word, sort = TRUE) %>%
anti_join(stop_words) %>% group_by(word) %>%
tally() %>% arrange_at("n", desc)
datatable(tm_2019,
filter = 'top',
extensions = 'Buttons',
options = list(pageLength = 10,
dom = 'Bfrtip',
buttons = c('copy', 'csv', 'excel', 'pdf', 'print')))
percent$year <- rownames(percent)
percent.l <- pivot_longer(percent, 1:(ncol(percent)-1))
percent.l$year <- as.numeric(as.character(percent.l$year))
g <- ggplot(percent.l, aes(x = year,
y = value, color = name)) +
geom_point() + geom_line() +
xlab("Year") + ylab("Percentage of Songs") +
theme_bw() + ggtitle("Comparison of Categories")
g
## [1] "R version 4.0.2 (2020-06-22)"
## [2] "Platform: x86_64-w64-mingw32/x64 (64-bit)"
## [3] "Running under: Windows 10 x64 (build 18363)"
## [4] ""
## [5] "Matrix products: default"
## [6] ""
## [7] "locale:"
## [8] "[1] LC_COLLATE=English_United States.1252 "
## [9] "[2] LC_CTYPE=English_United States.1252 "
## [10] "[3] LC_MONETARY=English_United States.1252"
## [11] "[4] LC_NUMERIC=C "
## [12] "[5] LC_TIME=English_United States.1252 "
## [13] ""
## [14] "attached base packages:"
## [15] "[1] stats graphics grDevices utils datasets methods base "
## [16] ""
## [17] "other attached packages:"
## [18] " [1] wordcloud_2.6 RColorBrewer_1.1-2 purrr_0.3.4 cowplot_1.0.0 "
## [19] " [5] plotly_4.9.2.1 ggplot2_3.3.2 dplyr_0.8.5 tidytext_0.2.4 "
## [20] " [9] here_0.1 tidyr_1.0.3 DT_0.13 pander_0.6.3 "
## [21] "[13] knitr_1.28 "
## [22] ""
## [23] "loaded via a namespace (and not attached):"
## [24] " [1] Rcpp_1.0.4.6 pillar_1.4.4 compiler_4.0.2 tokenizers_0.2.1 "
## [25] " [5] tools_4.0.2 digest_0.6.25 viridisLite_0.3.0 jsonlite_1.6.1 "
## [26] " [9] gtable_0.3.0 evaluate_0.14 lifecycle_0.2.0 tibble_3.0.1 "
## [27] "[13] lattice_0.20-41 pkgconfig_2.0.3 rlang_0.4.6 Matrix_1.2-18 "
## [28] "[17] crosstalk_1.1.0.1 yaml_2.2.1 xfun_0.13 httr_1.4.1 "
## [29] "[21] withr_2.2.0 janeaustenr_0.1.5 stringr_1.4.0 generics_0.0.2 "
## [30] "[25] vctrs_0.3.0 htmlwidgets_1.5.1 rprojroot_1.3-2 grid_4.0.2 "
## [31] "[29] tidyselect_1.1.0 data.table_1.12.8 glue_1.4.0 R6_2.4.1 "
## [32] "[33] rmarkdown_2.1 farver_2.0.3 magrittr_1.5 scales_1.1.1 "
## [33] "[37] backports_1.1.6 SnowballC_0.7.0 ellipsis_0.3.0 htmltools_0.4.0 "
## [34] "[41] assertthat_0.2.1 colorspace_1.4-1 labeling_0.3 stringi_1.4.6 "
## [35] "[45] lazyeval_0.2.2 munsell_0.5.0 crayon_1.3.4 "