Skip to content

Commit

Permalink
ajout histo/wordclouds
Browse files Browse the repository at this point in the history
  • Loading branch information
rantaluca committed Dec 24, 2023
1 parent b58c771 commit 675e35c
Show file tree
Hide file tree
Showing 23 changed files with 53 additions and 27 deletions.
Binary file added .DS_Store
Binary file not shown.
32 changes: 21 additions & 11 deletions comments_count.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ remDr$getStatus()

#----------Navigation-------------
account_link<-NA
account_link<-"https://www.tiktok.com/@raphael_metivet/video/7213843034193153286"
account_link<-"https://www.tiktok.com/@sophia.panda06/video/7281969560457186562"

remDr$navigate(account_link)

Sys.sleep(3)
Expand All @@ -35,16 +35,17 @@ tryCatch(

#----------Scrolling-------------

#for (x in 1:20) {
for (x in 1:300) {

# remDr$executeScript("window.scrollBy(0,5000);")
#Sys.sleep(2)
#}
remDr$executeScript("window.scrollBy(0,5000);")
Sys.sleep(0.7)
}
scrollToEnd <- function() {
scroll_increment <- 500 # Define the scroll distance
last_height <- remDr$executeScript("return document.body.scrollHeight")
while (TRUE) {
remDr$executeScript("window.scrollBy(0, document.body.scrollHeight);")
Sys.sleep(1)
remDr$executeScript(paste0("window.scrollBy(0, ", scroll_increment, ");"))
Sys.sleep(2) # Adjust this sleep duration if needed
new_height <- remDr$executeScript("return document.body.scrollHeight")
if (identical(new_height, last_height)) {
break
Expand Down Expand Up @@ -105,13 +106,22 @@ time_list <-as.Date(time_list)
print(time_list)

#----------Occurences-------------#
occurence <- list()
occurences <- table(unlist(time_list))
occurences <- as.data.frame(table(unlist(time_list)))
colnames(occurences) <- c('Date','Nbcomments')
print(occurences)
occurences$Date <- as.Date(occurences$Date)

ggplot(occurences, aes(x = Date, y = Nbcomments)) +
geom_bar(stat = "identity", fill = "skyblue", color = "black") +
labs(title = "Evolution des commentaires panda fyp", x = "Date", y = "Nb de commentaires") +
theme_minimal() +
scale_x_date(date_breaks = "1 month", date_labels = "%Y-%m-%d")

# Création d'un graphique à barres
ggplot(occurences, aes(x = Date, y = Nbcomments)) +
geom_bar(stat = "identity", fill = "skyblue", color = "black") +
labs(title = "Evolution des Commentaires", x = "Jour", y = "Nb de commentaires") +
theme_minimal()
labs(title = "Evolution des Commentaires fyp dance", x = "Date", y = "Nb de commentaires") +
theme_minimal() +
scale_x_date(date_breaks = "1 day", date_labels = "%Y-%m-%d") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
1 change: 0 additions & 1 deletion comments_mots.R
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,6 @@ toremove<-c(stopwords ("french"), stopwords("english"),",","a","le","la","de", "
"+", "toutes", "|", "via", "mettre", "in", "of", "👉", "👇","","#fyp","#pourtoi","de","#viral","#foryou","#fypシ","le", "the",
"!","a","mdr","lol",".",",",";","?","et", "#fypシ゚viral","#foryoupage", "un", "même", "Même", "je", "tu", "il", "on", "Bardella", "bardella", "Jordan")

# Remove specified words
tokenized_text <- tokenized_text[!tokenized_text %in% toremove]


Expand Down
Binary file added data/.DS_Store
Binary file not shown.
Binary file added plots/.DS_Store
Binary file not shown.
Binary file added plots/histograms/Panda_historgam.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added plots/histograms/Plot_Dance.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added plots/histograms/plot_emeutes.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added plots/histograms/ronaldo_weeks.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added plots/wordclouds/.DS_Store
Binary file not shown.
Binary file added plots/wordclouds/Emeutes/emeutes.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added plots/wordclouds/Emeutes/like_emeutes1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added plots/wordclouds/Emeutes/like_emeutes2.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added plots/wordclouds/Emeutes/like_emeutes3.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added plots/wordclouds/Panda/panda.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added plots/wordclouds/Panda/panda2.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added plots/wordclouds/Panda/panda3.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added plots/wordclouds/Panda/panda4.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added plots/wordclouds/Ronaldo/Ronaldo 2.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added plots/wordclouds/Ronaldo/Ronaldo.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added plots/wordclouds/Ronaldo/Ronaldo3.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added plots/wordclouds/Ronaldo/Ronaldo4.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
47 changes: 32 additions & 15 deletions tiktok_likes.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@ remDr$getStatus()

#----------Navigation-------------

account_link<-"https://www.tiktok.com/@" #<---- ici metre l'url
account_link<-"https://www.tiktok.com/@wolfandpig29" #<---- ici metre l'url


remDr$navigate(account_link)

Sys.sleep(3)

#----------Fermeture onglet inscription-------------
Expand Down Expand Up @@ -48,11 +48,11 @@ tryCatch(

#----------Scrolling-------------

#for (x in 1:20) {
for (x in 1:300) {

# remDr$executeScript("window.scrollBy(0,5000);")
#Sys.sleep(2)
#}
remDr$executeScript("window.scrollBy(0,5000);")
Sys.sleep(1)
}
scrollToEnd <- function() {
last_height <- remDr$executeScript("return document.body.scrollHeight")
while (TRUE) {
Expand Down Expand Up @@ -81,9 +81,9 @@ titles_likes <- remDr$findElements("xpath", "//div[@data-e2e='user-liked-item-de
views_vids <- remDr$findElements("xpath", "//div[@data-e2e='user-liked-item']//strong[@data-e2e='video-views']")


links_likes <- list()
title_list <- list()
views_list <- list()
links_likes <-NA
title_list <- NA
views_list <- NA

#----------Récupération data-------------

Expand Down Expand Up @@ -116,18 +116,18 @@ usernames <- list()
usernames <- lapply(links_likes, get_username)


# Obtenez la longueur maximale parmi les trois catégories
# Longueur maximale parmi les trois catégories
max_length <- max(length(links_likes), length(title_list), length(views_list), length(usernames))

# Assurez-vous que chaque catégorie a la même longueur
# Chaque catégorie a la même longueur
padList <- function(lst, len) {
if (length(lst) < len) {
lst <- c(lst, rep(NA, len - length(lst)))
}
return(lst)
}

# Remplissez chaque catégorie pour qu'elles aient la même longueur
# Remplissage chaque catégorie pour qu'elles aient la même longueur
links_likes <- padList(links_likes, max_length)
title_list <- padList(title_list, max_length)
views_list <- padList(views_list, max_length)
Expand All @@ -154,7 +154,6 @@ top_redundant_words <- names(sort(word_freq, decreasing = TRUE))[1:top_n]
print(top_redundant_words)

#----------Analyse textuelle texte le plus like-------------

titles <- as.character(bdd_likes[, 4])
tokenized_titles <- unlist(strsplit(titles, "\\s+"))

Expand Down Expand Up @@ -183,7 +182,12 @@ toremove<-c("’","a","le","la","de", "des", "les", "en", "sur", "à", "il", "el
"vais", "vraiment", "y'a", "vas", "bla", "e", "d'être", "veux", "mois", "sen",
"bah", "regarde", "tiens", "complètement", "completement", "sait", "ten", "vers",
"+", "toutes", "|", "via", "mettre", "in", "of", "👉", "👇","","#fyp","#pourtoi","de","#viral","#foryou","#fypシ","le", "the",
"!","a","mdr","lol",".",",",";","?","et", "#fypシ゚viral","#foryoupage" )
"!","a","mdr","lol","ce","qui",".",",",";","?","et", "#fypシ゚viral","#foryoupage","avec", "I", "love", "you","o", "u","y","v","#tiktok",
"#الشعب_الصيني_ماله_حل😂😂", "#اكسبلور" ,":" , "-" ,"#parati" , "lo" , "lo" , "el" , "es" , "l" , "#trending", "que","un","pour","je",
"_","est","fy", "che" , "per" , "è" ,
"*" , "una" , "#neiperte" ,"#perte" , "di" ,"une", "#fy","my","#CapCut" ,"#edit","é" ,"😂" , "não" ,
"eu" , "do" , "to" , "and" , "is" ,"so" ,"this" ,"for" ,"i","that" )


# Remove specified words
tokenized_titles <- tokenized_titles[!tokenized_titles %in% toremove]
Expand All @@ -195,4 +199,17 @@ top_n <- 10
top_redundant_words <- names(sort(word_freq, decreasing = TRUE))[1:top_n]
print(top_redundant_words)



install.packages(c("wordcloud","tidyverse"))

library(wordcloud)
library(tidyverse)

# Création d'un data frame pour le nuage de mots
word_freq_df <- data.frame(word = names(word_freq), freq = as.numeric(word_freq))

# Générer le nuage de mots
set.seed(15621) # Pour reproduire les résultats
wordcloud(words = word_freq_df$word, freq = word_freq_df$freq, min.freq = 6,
scale = c(3, 0.2), colors = brewer.pal(8, "Dark2"))
title(main = "Centre d'intérêts d'une utilisateur ayant liké Ronaldo", col.main = "black", font.main = 1)

0 comments on commit 675e35c

Please sign in to comment.