# Read in the data
song_list <- read.csv(here("data","songs_top_30.csv"))
song_list <- song_list[1:484,1:8]
# get stop words
data(stop_words)
stop_words <- stop_words %>%
  filter(!(word == "little"))

Summary Tables

The Number of Top 30 Songs per Artist

artist_list <- song_list %>% group_by(Artist) %>% 
  tally() %>% arrange(desc(n)) %>% rename(!!"Songs" := n)
datatable(artist_list,
          filter = 'top',
          extensions = 'Buttons',
          options = list(pageLength = 10,
                         dom = 'Bfrtip', 
                         buttons = c('copy', 'csv', 'excel', 'pdf', 'print')))

There are 121 different artists with a top 30 song since 2013. Of these artists, only 41 have had 5 or more top 30 songs.

The Number of Top 30 Songs per year

year_list <- song_list %>% group_by(Entered.Top.30.In.) %>% 
  tally() %>% rename(!!"Songs" := n)
datatable(year_list[-1,],
          filter = 'top',
          extensions = 'Buttons',
          options = list(pageLength = 10,
                         dom = 'Bfrtip', 
                         buttons = c('copy', 'csv', 'excel', 'pdf', 'print')))

Specific Word Search

# years
years <- year_list$Entered.Top.30.In.
years <- years[-1]
# words to look for
veh <- c("truck", "pickup", "chevy", "ford", "tacoma", 
         "chevrolet", "tailgate", "car")
cl <- c("jeans", "cutoffs", "jean", "shirt", "hat", 
        "shoe", "dress", "boots")
dr <- c("beer", "drink", "wine", "whiskey", "shots",
        "tequila", "vodka", "margarita", "cocktail", "sippin",
        "alcohol")
bod <- c("eyes", "lips", "hair", "mouth", "hand", 
         "hands", "feet", "chest", "finger", "ears",
         "hips", "legs", "body", "shoulder")
gen <- c("yeah", "night", "tonight", "little", "town",
           "eyes", "eye", "drink", "drinkin", "girl", 
           "boy", "baby", "wanna", "gonna", "road",
           "hand", "kiss", "love", "lovin", "time",
           "heart", "feel", "feeling")

# Prep df by year
prep_df <- function(df, year){
  songs <- as.character(df$Lyrics)[which(df$Entered.Top.30.In. == year)]
  text_df <- tibble(
    song = as.character(df$Song[which(df$Entered.Top.30.In. == year)]),
    text = songs)
  return(text_df)
}
# function to find a word
search_word <- function(df, term, year, plural=NULL){
  df.y <- prep_df(df, year = year)
  counts.temp <- df.y %>% unnest_tokens(word, text) %>%
    group_by(song) %>% count(word, sort = TRUE)
  counts <- counts.temp %>% 
    filter(word %in% term)
  if(!is.null(plural)){
    for(i in 1:nrow(plural)){
      counts$word <- stringr::str_replace_all(counts$word, plural[i,1], plural[i,2]) 
    }
  }
  counts<- counts %>% 
    tally()
  return(counts)
}

Vehicle

Looking for:
- truck, pickup, chevy, ford, tacoma, chevrolet, tailgate, car

Total

named <- "vehicle"
veh.l <- lapply(years, function(x){
  search_word(df = song_list, term = veh, year = x)
})
names(veh.l) <- years
vehicle <- do.call(rbind, lapply(veh.l, nrow)) %>% 
  data.frame(row.names = years) %>% 
  `colnames<-`(named)
veh.c <- veh.l %>% map_df(I, .id = "year") %>% 
  as.matrix() %>% data.frame()
datatable(veh.c,
          filter = 'top',
          extensions = 'Buttons',
          options = list(pageLength = 10,
                         dom = 'Bfrtip', 
                         buttons = c('copy', 'csv', 'excel', 'pdf', 'print')))

Breakdown

percentage = % of category, not total songs of the specified year

veh.d <- lapply(years, function(x){
  lapply(veh, function(i){
    search_word(df = song_list, term = i, year = x)
  }) %>% `names<-`(veh)
})
names(veh.d) <- years
veh.df <- lapply(1:length(years), function(x){
  bind_rows(veh.d[[x]], .id = "term") %>% 
    group_by(term) %>% tally()
})
names(veh.df) <- years
veh.df <- bind_rows(veh.df, .id = "year")
veh.df$percentage <- 0
veh.df$percentage[veh.df$year == 2014] <- 
  (veh.df$n[veh.df$year == 2014]/vehicle$vehicle[1])*100
veh.df$percentage[veh.df$year == 2015] <- 
  (veh.df$n[veh.df$year == 2015]/vehicle$vehicle[2])*100
veh.df$percentage[veh.df$year == 2016] <- 
  (veh.df$n[veh.df$year == 2016]/vehicle$vehicle[3])*100
veh.df$percentage[veh.df$year == 2017] <- 
  (veh.df$n[veh.df$year == 2017]/vehicle$vehicle[4])*100
veh.df$percentage[veh.df$year == 2018] <- 
  (veh.df$n[veh.df$year == 2018]/vehicle$vehicle[5])*100
veh.df$percentage[veh.df$year == 2019] <- 
  (veh.df$n[veh.df$year == 2019]/vehicle$vehicle[6])*100
veh.df$percentage <- round(veh.df$percentage, 2)
datatable(veh.df,
          filter = 'top',
          extensions = 'Buttons',
          options = list(pageLength = 10,
                         dom = 'Bfrtip', 
                         buttons = c('copy', 'csv', 'excel', 'pdf', 'print')))

ggplot(veh.df, aes(x = percentage, y = year, fill=term)) + 
  geom_bar(stat="identity", position = "dodge", alpha=1) + 
  theme_bw() + ggtitle("Vehicle Breakdown by Year")

ggplot(veh.df, aes(x = percentage, y = year, fill=term)) + 
  geom_bar(stat="identity", alpha=1) + 
  theme_bw() + ggtitle("Vehicle Breakdown by Year")

Clothes

Looking for:
- jeans, cutoffs, jean, shirt, hat, shoe, dress, boots
- merged jeans/jean

Total

named <- "clothes"
plural <- rbind(c("jeans", "jean"))
cl.l <- lapply(years, function(x){
  search_word(df = song_list, term = cl, year = x, plural = plural)
})
names(cl.l) <- years
clothes <- do.call(rbind, lapply(cl.l, nrow)) %>% 
  data.frame(row.names = years) %>% 
  `colnames<-`(named)
cl.c <- cl.l %>% map_df(I, .id = "year") %>% 
  as.matrix() %>% data.frame()
datatable(cl.c,
          filter = 'top',
          extensions = 'Buttons',
          options = list(pageLength = 10,
                         dom = 'Bfrtip', 
                         buttons = c('copy', 'csv', 'excel', 'pdf', 'print')))

Breakdown

percentage = % of category, not total songs of the specified year

#cl <- cl[-c(1,3)]
cl.d <- lapply(years, function(x){
  lapply(cl[-c(1,3)], function(i){
    search_word(df = song_list, term = i, year = x)
  }) %>% `names<-`(cl[-c(1,3)])
})
names(cl.d) <- years
# deal w/ plurals
cl.d.2 <- lapply(years, function(x){
  search_word(df = song_list, term = cl[c(1,3)], year = x)
})
names(cl.d.2) <- years

cl.df <- lapply(1:length(years), function(x){
  bind_rows(cl.d[[x]], .id = "term") %>% 
    group_by(term) %>% tally()
})
names(cl.df) <- years
temp <- sapply(cl.d.2, nrow)
cl.df.t <- data.frame(year = names(temp),
                      term = "jean(s)",
                      n = temp)

cl.df <- bind_rows(cl.df, .id = "year")

cl.df <- cl.df %>% add_row(cl.df.t)

cl.df$percentage <- 0
cl.df$percentage[cl.df$year == 2014] <- 
  (cl.df$n[cl.df$year == 2014]/clothes$clothes[1])*100
cl.df$percentage[cl.df$year == 2015] <- 
  (cl.df$n[cl.df$year == 2015]/clothes$clothes[2])*100
cl.df$percentage[cl.df$year == 2016] <- 
  (cl.df$n[cl.df$year == 2016]/clothes$clothes[3])*100
cl.df$percentage[cl.df$year == 2017] <- 
  (cl.df$n[cl.df$year == 2017]/clothes$clothes[4])*100
cl.df$percentage[cl.df$year == 2018] <- 
  (cl.df$n[cl.df$year == 2018]/clothes$clothes[5])*100
cl.df$percentage[cl.df$year == 2019] <- 
  (cl.df$n[cl.df$year == 2019]/clothes$clothes[6])*100
cl.df$percentage <- round(cl.df$percentage, 2)
datatable(cl.df,
          filter = 'top',
          extensions = 'Buttons',
          options = list(pageLength = 10,
                         dom = 'Bfrtip', 
                         buttons = c('copy', 'csv', 'excel', 'pdf', 'print')))

ggplot(cl.df, aes(x = percentage, y = year, fill=term)) + 
  geom_bar(stat="identity", position = "dodge", alpha=1) + 
  theme_bw() + ggtitle("Clothes Breakdown by Year")

ggplot(cl.df, aes(x = percentage, y = year, fill=term)) + 
  geom_bar(stat="identity", alpha=1) + 
  theme_bw() + ggtitle("Clothes Breakdown by Year")

Drinks

Looking for:
- beer, drink, wine, whiskey, shots, tequila, vodka, margarita, cocktail, sippin, alcohol

Total

named <- "drinks"
dr.l <- lapply(years, function(x){
  search_word(df = song_list, term = dr, year = x)
})
names(dr.l) <- years
drinks <- do.call(rbind, lapply(dr.l, nrow)) %>% 
  data.frame(row.names = years) %>% 
  `colnames<-`(named)
dr.c <- dr.l %>% map_df(I, .id = "year") %>% 
  as.matrix() %>% data.frame()
datatable(dr.c,
          filter = 'top',
          extensions = 'Buttons',
          options = list(pageLength = 10,
                         dom = 'Bfrtip', 
                         buttons = c('copy', 'csv', 'excel', 'pdf', 'print')))

Breakdown

percentage = % of category, not total songs of the specified year

dr.d <- lapply(years, function(x){
  lapply(dr, function(i){
    search_word(df = song_list, term = i, year = x)
  }) %>% `names<-`(dr)
})
names(dr.d) <- years
dr.df <- lapply(1:length(years), function(x){
  bind_rows(dr.d[[x]], .id = "term") %>% 
    group_by(term) %>% tally()
})
names(dr.df) <- years
dr.df <- bind_rows(dr.df, .id = "year")
dr.df$percentage <- 0
dr.df$percentage[dr.df$year == 2014] <- 
  (dr.df$n[dr.df$year == 2014]/drinks$drinks[1])*100
dr.df$percentage[dr.df$year == 2015] <- 
  (dr.df$n[dr.df$year == 2015]/drinks$drinks[2])*100
dr.df$percentage[dr.df$year == 2016] <- 
  (dr.df$n[dr.df$year == 2016]/drinks$drinks[3])*100
dr.df$percentage[dr.df$year == 2017] <- 
  (dr.df$n[dr.df$year == 2017]/drinks$drinks[4])*100
dr.df$percentage[dr.df$year == 2018] <- 
  (dr.df$n[dr.df$year == 2018]/drinks$drinks[5])*100
dr.df$percentage[dr.df$year == 2019] <- 
  (dr.df$n[dr.df$year == 2019]/drinks$drinks[6])*100
dr.df$percentage <- round(dr.df$percentage, 2)
datatable(dr.df,
          filter = 'top',
          extensions = 'Buttons',
          options = list(pageLength = 10,
                         dom = 'Bfrtip', 
                         buttons = c('copy', 'csv', 'excel', 'pdf', 'print')))

ggplot(dr.df, aes(x = percentage, y = year, fill=term)) + 
  geom_bar(stat="identity", position = "dodge", alpha=1) + 
  theme_bw() + ggtitle("Drinks Breakdown by Year")

ggplot(dr.df, aes(x = percentage, y = year, fill=term)) + 
  geom_bar(stat="identity", alpha=1) + 
  theme_bw() + ggtitle("Drinks Breakdown by Year")

Body

Looking for:
- eyes, lips, hair, mouth, hand, hands, feet, chest, finger, ears, hips, legs, body, shoulder
- merged hand/hands

Total

named <- "body"
plural <- rbind(c("hands", "hand"))
bod.l <- lapply(years, function(x){
  search_word(df = song_list, term = bod, year = x, plural = plural)
})
names(bod.l) <- years
body <- do.call(rbind, lapply(bod.l, nrow)) %>% 
  data.frame(row.names = years) %>% 
  `colnames<-`(named)
bod.c <- bod.l %>% map_df(I, .id = "year") %>% 
  as.matrix() %>% data.frame()
datatable(bod.c,
          filter = 'top',
          extensions = 'Buttons',
          options = list(pageLength = 10,
                         dom = 'Bfrtip', 
                         buttons = c('copy', 'csv', 'excel', 'pdf', 'print')))

Breakdown

percentage = % of category, not total songs of the specified year

bod.d <- lapply(years, function(x){
  lapply(bod[-c(5,6)], function(i){
    search_word(df = song_list, term = i, year = x)
  }) %>% `names<-`(bod[-c(5,6)])
})
names(bod.d) <- years
bod.df <- lapply(1:length(years), function(x){
  bind_rows(bod.d[[x]], .id = "term") %>% 
    group_by(term) %>% tally()
})
names(bod.df) <- years
bod.df <- bind_rows(bod.df, .id = "year")

bod.d.2 <- lapply(years, function(x){
  search_word(df = song_list, term = bod[c(5,6)], year = x)
})
names(bod.d.2) <- years
temp <- sapply(bod.d.2, nrow)
bod.df.t <- data.frame(year = names(temp),
                      term = "hand(s)",
                      n = temp)
bod.df <- bod.df %>% add_row(bod.df.t)

bod.df$percentage <- 0
bod.df$percentage[bod.df$year == 2014] <- 
  (bod.df$n[bod.df$year == 2014]/body$body[1])*100
bod.df$percentage[bod.df$year == 2015] <- 
  (bod.df$n[bod.df$year == 2015]/body$body[2])*100
bod.df$percentage[bod.df$year == 2016] <- 
  (bod.df$n[bod.df$year == 2016]/body$body[3])*100
bod.df$percentage[bod.df$year == 2017] <- 
  (bod.df$n[bod.df$year == 2017]/body$body[4])*100
bod.df$percentage[bod.df$year == 2018] <- 
  (bod.df$n[bod.df$year == 2018]/body$body[5])*100
bod.df$percentage[bod.df$year == 2019] <- 
  (bod.df$n[bod.df$year == 2019]/body$body[6])*100
bod.df$percentage <- round(bod.df$percentage, 2)
datatable(bod.df,
          filter = 'top',
          extensions = 'Buttons',
          options = list(pageLength = 10,
                         dom = 'Bfrtip', 
                         buttons = c('copy', 'csv', 'excel', 'pdf', 'print')))

ggplot(bod.df, aes(x = percentage, y = year, fill=term)) + 
  geom_bar(stat="identity", position = "dodge", alpha=1) + 
  theme_bw() + ggtitle("Body Breakdown by Year")

ggplot(bod.df, aes(x = percentage, y = year, fill=term)) + 
  geom_bar(stat="identity", alpha=1) + 
  theme_bw() + ggtitle("Body Breakdown by Year")

General

Looking for:
- yeah, night, tonight, little, town, eyes, eye, drink, drinkin, girl, boy, baby, wanna, gonna, road, hand, kiss, love, lovin, time, heart, feel, feeling

Total

named <- "general"
plural <- rbind(c("eyes", "eye"))
gen.l <- lapply(years, function(x){
  search_word(df = song_list, term = gen, year = x)
})
names(gen.l) <- years
general <- do.call(rbind, lapply(gen.l, nrow)) %>% 
  data.frame(row.names = years) %>% 
  `colnames<-`(named)
gen.c <- gen.l %>% map_df(I, .id = "year") %>% 
  as.matrix() %>% data.frame()
datatable(gen.c,
          filter = 'top',
          extensions = 'Buttons',
          options = list(pageLength = 10,
                         dom = 'Bfrtip', 
                         buttons = c('copy', 'csv', 'excel', 'pdf', 'print')))

Breakdown

percentage = % of category, not total songs of the specified year

gen.d <- lapply(years, function(x){
  lapply(gen[-c(6,7)], function(i){
    search_word(df = song_list, term = i, year = x)
  }) %>% `names<-`(gen[-c(6,7)])
})
names(gen.d) <- years
gen.df <- lapply(1:length(years), function(x){
  bind_rows(gen.d[[x]], .id = "term") %>% 
    group_by(term) %>% tally()
})
names(gen.df) <- years
gen.df <- bind_rows(gen.df, .id = "year")

# deal w/ plurals
gen.d.2 <- lapply(years, function(x){
  search_word(df = song_list, term = gen[c(6,7)], year = x)
})
names(gen.d.2) <- years
temp <- sapply(gen.d.2, nrow)
gen.df.t <- data.frame(year = names(temp),
                      term = "eye(s)",
                      n = temp)
gen.df <- gen.df %>% add_row(gen.df.t)

gen.df$percentage <- 0
gen.df$percentage[gen.df$year == 2014] <- 
  (gen.df$n[gen.df$year == 2014]/general$general[1])*100
gen.df$percentage[gen.df$year == 2015] <- 
  (gen.df$n[gen.df$year == 2015]/general$general[2])*100
gen.df$percentage[gen.df$year == 2016] <- 
  (gen.df$n[gen.df$year == 2016]/general$general[3])*100
gen.df$percentage[gen.df$year == 2017] <- 
  (gen.df$n[gen.df$year == 2017]/general$general[4])*100
gen.df$percentage[gen.df$year == 2018] <- 
  (gen.df$n[gen.df$year == 2018]/general$general[5])*100
gen.df$percentage[gen.df$year == 2019] <- 
  (gen.df$n[gen.df$year == 2019]/general$general[6])*100
gen.df$percentage <- round(gen.df$percentage, 2)
datatable(gen.df,
          filter = 'top',
          extensions = 'Buttons',
          options = list(pageLength = 10,
                         dom = 'Bfrtip', 
                         buttons = c('copy', 'csv', 'excel', 'pdf', 'print')))

ggplot(gen.df, aes(x = percentage, y = year, fill=term)) + 
  geom_bar(stat="identity", position = "dodge", alpha=1) + 
  theme_bw() + ggtitle("General Terms Breakdown by Year")

ggplot(gen.df, aes(x = percentage, y = year, fill=term)) + 
  geom_bar(stat="identity", alpha=1) + 
  theme_bw() + ggtitle("General Terms Breakdown by Year")

General Word Search

2014

Removed “stop words”
Counted the number of songs a specific word occurred in

Table

year <- 2014
tm_2014 <- prep_df(song_list, year = year) %>% unnest_tokens(word, text) %>%
  group_by(song) %>% count(word, sort = TRUE) %>%
  anti_join(stop_words) %>% group_by(word) %>% 
  tally() %>% arrange_at("n", desc)

datatable(tm_2014,
          filter = 'top',
          extensions = 'Buttons',
          options = list(pageLength = 10,
                         dom = 'Bfrtip', 
                         buttons = c('copy', 'csv', 'excel', 'pdf', 'print')))

Barplot

tm_2014 %>% top_n(20) %>%
  mutate(word = reorder(word, n)) %>%
  ggplot(aes(word, n)) +
  geom_col() +
  xlab(NULL) +
  ylab("Number of Song that Contain the Word") +
  ggtitle("Top 20 Words: 2014") +
  coord_flip() + 
  theme_bw()

Word Cloud

Top 200 words
Must have at least 2 occurrences

wordcloud(tm_2014$word, tm_2014$n, scale=c(4,.3), 
          min.freq=2, max.words=200, 
          random.order=T, rot.per=.15,
          colors = brewer.pal(8,"Dark2"),
          vfont=c("sans serif","plain"))

2015

Removed “stop words”
Counted the number of songs a specific word occurred in

Table

year <- 2015
tm_2015 <- prep_df(song_list, year = year) %>% unnest_tokens(word, text) %>%
  group_by(song) %>% count(word, sort = TRUE) %>%
  anti_join(stop_words) %>% group_by(word) %>% 
  tally() %>% arrange_at("n", desc)

datatable(tm_2015,
          filter = 'top',
          extensions = 'Buttons',
          options = list(pageLength = 10,
                         dom = 'Bfrtip', 
                         buttons = c('copy', 'csv', 'excel', 'pdf', 'print')))

Barplot

tm_2015 %>% top_n(20) %>%
  mutate(word = reorder(word, n)) %>%
  ggplot(aes(word, n)) +
  geom_col() +
  xlab(NULL) +
  ylab("Number of Song that Contain the Word") +
  ggtitle("Top 20 Words: 2015") +
  coord_flip() + 
  theme_bw()

Word Cloud

Top 200 words
Must have at least 2 occurrences

wordcloud(tm_2015$word, tm_2015$n, scale=c(4,.3), 
          min.freq=2, max.words=200, 
          random.order=T, rot.per=.15,
          colors = brewer.pal(8,"Dark2"),
          vfont=c("sans serif","plain"))

2016

Removed “stop words”
Counted the number of songs a specific word occurred in

Table

year <- 2016
tm_2016 <- prep_df(song_list, year = year) %>% unnest_tokens(word, text) %>%
  group_by(song) %>% count(word, sort = TRUE) %>%
  anti_join(stop_words) %>% group_by(word) %>% 
  tally() %>% arrange_at("n", desc)

datatable(tm_2016,
          filter = 'top',
          extensions = 'Buttons',
          options = list(pageLength = 10,
                         dom = 'Bfrtip', 
                         buttons = c('copy', 'csv', 'excel', 'pdf', 'print')))

Barplot

tm_2016 %>% top_n(20) %>%
  mutate(word = reorder(word, n)) %>%
  ggplot(aes(word, n)) +
  geom_col() +
  xlab(NULL) +
  ylab("Number of Song that Contain the Word") +
  ggtitle("Top 20 Words: 2016") +
  coord_flip() + 
  theme_bw()

Word Cloud

Top 200 words
Must have at least 2 occurrences

wordcloud(tm_2016$word, tm_2016$n, scale=c(4,.3), 
          min.freq=2, max.words=200, 
          random.order=T, rot.per=.15,
          colors = brewer.pal(8,"Dark2"),
          vfont=c("sans serif","plain"))

2017

Removed “stop words”
Counted the number of songs a specific word occurred in

Table

year <- 2017
tm_2017 <- prep_df(song_list, year = year) %>% unnest_tokens(word, text) %>%
  group_by(song) %>% count(word, sort = TRUE) %>%
  anti_join(stop_words) %>% group_by(word) %>% 
  tally() %>% arrange_at("n", desc)

datatable(tm_2017,
          filter = 'top',
          extensions = 'Buttons',
          options = list(pageLength = 10,
                         dom = 'Bfrtip', 
                         buttons = c('copy', 'csv', 'excel', 'pdf', 'print')))

Barplot

tm_2017 %>% top_n(20) %>%
  mutate(word = reorder(word, n)) %>%
  ggplot(aes(word, n)) +
  geom_col() +
  xlab(NULL) +
  ylab("Number of Song that Contain the Word") +
  ggtitle("Top 20 Words: 2017") +
  coord_flip() + 
  theme_bw()

Word Cloud

Top 200 words
Must have at least 2 occurrences

wordcloud(tm_2017$word, tm_2017$n, scale=c(4,.3), 
          min.freq=2, max.words=200, 
          random.order=T, rot.per=.15,
          colors = brewer.pal(8,"Dark2"),
          vfont=c("sans serif","plain"))

2018

Removed “stop words”
Counted the number of songs a specific word occurred in

Table

year <- 2018
tm_2018 <- prep_df(song_list, year = year) %>% unnest_tokens(word, text) %>%
  group_by(song) %>% count(word, sort = TRUE) %>%
  anti_join(stop_words) %>% group_by(word) %>% 
  tally() %>% arrange_at("n", desc)

datatable(tm_2018,
          filter = 'top',
          extensions = 'Buttons',
          options = list(pageLength = 10,
                         dom = 'Bfrtip', 
                         buttons = c('copy', 'csv', 'excel', 'pdf', 'print')))

Barplot

tm_2018 %>% top_n(20) %>%
  mutate(word = reorder(word, n)) %>%
  ggplot(aes(word, n)) +
  geom_col() +
  xlab(NULL) +
  ylab("Number of Song that Contain the Word") +
  ggtitle("Top 20 Words: 2018") +
  coord_flip() + 
  theme_bw()

Word Cloud

Top 200 words
Must have at least 2 occurrences

wordcloud(tm_2018$word, tm_2018$n, scale=c(4,.3), 
          min.freq=2, max.words=200, 
          random.order=T, rot.per=.15,
          colors = brewer.pal(8,"Dark2"),
          vfont=c("sans serif","plain"))

2019

Removed “stop words”
Counted the number of songs a specific word occurred in

Table

year <- 2019
tm_2019 <- prep_df(song_list, year = year) %>% unnest_tokens(word, text) %>%
  group_by(song) %>% count(word, sort = TRUE) %>%
  anti_join(stop_words) %>% group_by(word) %>% 
  tally() %>% arrange_at("n", desc)

datatable(tm_2019,
          filter = 'top',
          extensions = 'Buttons',
          options = list(pageLength = 10,
                         dom = 'Bfrtip', 
                         buttons = c('copy', 'csv', 'excel', 'pdf', 'print')))

Barplot

tm_2019 %>% top_n(20) %>%
  mutate(word = reorder(word, n)) %>%
  ggplot(aes(word, n)) +
  geom_col() +
  xlab(NULL) +
  ylab("Number of Song that Contain the Word") +
  ggtitle("Top 20 Words: 2019") +
  coord_flip() + 
  theme_bw()

Word Cloud

Top 200 words
Must have at least 2 occurrences

wordcloud(tm_2019$word, tm_2019$n, scale=c(4,.3), 
          min.freq=2, max.words=200, 
          random.order=T, rot.per=.15,
          colors = brewer.pal(8,"Dark2"),
          vfont=c("sans serif","plain"))

Summary

Tables

Raw Numbers

sums <- data.frame(vehicle, clothes, drinks, body)
datatable(sums,
          filter = 'top',
          extensions = 'Buttons',
          options = list(pageLength = 10,
                         dom = 'Bfrtip', 
                         buttons = c('copy', 'csv', 'excel', 'pdf', 'print')))

Percentage

By percentage of top 30 songs per year

percent <- round((sums/year_list$Songs[-1])*100, digits = 2)
datatable(percent,
          filter = 'top',
          extensions = 'Buttons',
          options = list(pageLength = 10,
                         dom = 'Bfrtip', 
                         buttons = c('copy', 'csv', 'excel', 'pdf', 'print')))

Figures

percent$year <- rownames(percent)
percent.l <- pivot_longer(percent, 1:(ncol(percent)-1))
percent.l$year <- as.numeric(as.character(percent.l$year))
g <- ggplot(percent.l, aes(x = year, 
                           y = value, color = name)) + 
  geom_point() + geom_line() +
  xlab("Year") + ylab("Percentage of Songs") +
  theme_bw() + ggtitle("Comparison of Categories")
g

ggplotly(g)

gen.df$year <- as.numeric(as.character(gen.df$year))
p <- ggplot(gen.df, aes(x = year, 
                           y = percentage, color = term)) + 
  geom_point() + geom_line() +
  xlab("Year") + ylab("Percentage of Songs") +
  ggtitle("Comparison of Popular Terms") +
  theme_bw()
p

ggplotly(p)

R-session Information

##  [1] "R version 4.0.2 (2020-06-22)"                                                    
##  [2] "Platform: x86_64-w64-mingw32/x64 (64-bit)"                                       
##  [3] "Running under: Windows 10 x64 (build 18363)"                                     
##  [4] ""                                                                                
##  [5] "Matrix products: default"                                                        
##  [6] ""                                                                                
##  [7] "locale:"                                                                         
##  [8] "[1] LC_COLLATE=English_United States.1252 "                                      
##  [9] "[2] LC_CTYPE=English_United States.1252   "                                      
## [10] "[3] LC_MONETARY=English_United States.1252"                                      
## [11] "[4] LC_NUMERIC=C                          "                                      
## [12] "[5] LC_TIME=English_United States.1252    "                                      
## [13] ""                                                                                
## [14] "attached base packages:"                                                         
## [15] "[1] stats     graphics  grDevices utils     datasets  methods   base     "       
## [16] ""                                                                                
## [17] "other attached packages:"                                                        
## [18] " [1] wordcloud_2.6      RColorBrewer_1.1-2 purrr_0.3.4        cowplot_1.0.0     "
## [19] " [5] plotly_4.9.2.1     ggplot2_3.3.2      dplyr_0.8.5        tidytext_0.2.4    "
## [20] " [9] here_0.1           tidyr_1.0.3        DT_0.13            pander_0.6.3      "
## [21] "[13] knitr_1.28        "                                                         
## [22] ""                                                                                
## [23] "loaded via a namespace (and not attached):"                                      
## [24] " [1] Rcpp_1.0.4.6      pillar_1.4.4      compiler_4.0.2    tokenizers_0.2.1 "    
## [25] " [5] tools_4.0.2       digest_0.6.25     viridisLite_0.3.0 jsonlite_1.6.1   "    
## [26] " [9] gtable_0.3.0      evaluate_0.14     lifecycle_0.2.0   tibble_3.0.1     "    
## [27] "[13] lattice_0.20-41   pkgconfig_2.0.3   rlang_0.4.6       Matrix_1.2-18    "    
## [28] "[17] crosstalk_1.1.0.1 yaml_2.2.1        xfun_0.13         httr_1.4.1       "    
## [29] "[21] withr_2.2.0       janeaustenr_0.1.5 stringr_1.4.0     generics_0.0.2   "    
## [30] "[25] vctrs_0.3.0       htmlwidgets_1.5.1 rprojroot_1.3-2   grid_4.0.2       "    
## [31] "[29] tidyselect_1.1.0  data.table_1.12.8 glue_1.4.0        R6_2.4.1         "    
## [32] "[33] rmarkdown_2.1     farver_2.0.3      magrittr_1.5      scales_1.1.1     "    
## [33] "[37] backports_1.1.6   SnowballC_0.7.0   ellipsis_0.3.0    htmltools_0.4.0  "    
## [34] "[41] assertthat_0.2.1  colorspace_1.4-1  labeling_0.3      stringi_1.4.6    "    
## [35] "[45] lazyeval_0.2.2    munsell_0.5.0     crayon_1.3.4     "

Top 30 Country Songs Lyric Analysis

Dana Gibbon

2020-09-14

Summary Tables

The Number of Top 30 Songs per Artist

The Number of Top 30 Songs per year

Specific Word Search

Vehicle

Total

Breakdown

Clothes

Total

Breakdown

Drinks

Total

Breakdown

Body

Total

Breakdown

General

Total

Breakdown

General Word Search

2014

Table

Barplot

Word Cloud

2015

Table

Barplot

Word Cloud

2016

Table

Barplot

Word Cloud

2017

Table

Barplot

Word Cloud

2018

Table

Barplot

Word Cloud

2019

Table

Barplot

Word Cloud

Summary

Tables

Raw Numbers

Percentage

Figures

R-session Information