ajout histo/wordclouds

4l3x4ndre · Dec 24, 2023 · 675e35c · 675e35c
1 parent b58c771
commit 675e35c
Show file tree

Hide file tree

Showing 23 changed files with 53 additions and 27 deletions.
diff --git a/.DS_Store b/.DS_Store
diff --git a/comments_count.R b/comments_count.R
@@ -16,8 +16,8 @@ remDr$getStatus()
 
 #----------Navigation-------------
 account_link<-NA
-account_link<-"https://www.tiktok.com/@raphael_metivet/video/7213843034193153286" 
-  
+account_link<-"https://www.tiktok.com/@sophia.panda06/video/7281969560457186562" 
+
 remDr$navigate(account_link)
 
 Sys.sleep(3)
@@ -35,16 +35,17 @@ tryCatch(
 
 #----------Scrolling-------------
 
-#for (x in 1:20) {
+for (x in 1:300) {
 
-# remDr$executeScript("window.scrollBy(0,5000);")
-#Sys.sleep(2)
-#}
+ remDr$executeScript("window.scrollBy(0,5000);")
+Sys.sleep(0.7)
+}
 scrollToEnd <- function() {
+  scroll_increment <- 500  # Define the scroll distance
   last_height <- remDr$executeScript("return document.body.scrollHeight")
   while (TRUE) {
-    remDr$executeScript("window.scrollBy(0, document.body.scrollHeight);")
-    Sys.sleep(1)
+    remDr$executeScript(paste0("window.scrollBy(0, ", scroll_increment, ");"))
+    Sys.sleep(2)  # Adjust this sleep duration if needed
     new_height <- remDr$executeScript("return document.body.scrollHeight")
     if (identical(new_height, last_height)) {
       break
@@ -105,13 +106,22 @@ time_list <-as.Date(time_list)
 print(time_list)
 
 #----------Occurences-------------#
+occurence <- list()
 occurences <- table(unlist(time_list))
 occurences <- as.data.frame(table(unlist(time_list)))
 colnames(occurences) <- c('Date','Nbcomments') 
 print(occurences)
+occurences$Date <- as.Date(occurences$Date)
+
+ggplot(occurences, aes(x = Date, y = Nbcomments)) +
+  geom_bar(stat = "identity", fill = "skyblue", color = "black") +
+  labs(title = "Evolution des commentaires panda fyp", x = "Date", y = "Nb de commentaires") +
+  theme_minimal() +
+  scale_x_date(date_breaks = "1 month", date_labels = "%Y-%m-%d")
 
-# Création d'un graphique à barres
 ggplot(occurences, aes(x = Date, y = Nbcomments)) +
   geom_bar(stat = "identity", fill = "skyblue", color = "black") +
-  labs(title = "Evolution des Commentaires", x = "Jour", y = "Nb de commentaires") +
-  theme_minimal()
+  labs(title = "Evolution des Commentaires fyp dance", x = "Date", y = "Nb de commentaires") +
+  theme_minimal() +
+  scale_x_date(date_breaks = "1 day", date_labels = "%Y-%m-%d") +
+  theme(axis.text.x = element_text(angle = 45, hjust = 1))
diff --git a/comments_mots.R b/comments_mots.R
@@ -100,7 +100,6 @@ toremove<-c(stopwords ("french"), stopwords("english"),",","a","le","la","de", "
             "+", "toutes", "|", "via", "mettre", "in", "of", "👉", "👇","➡","#fyp","#pourtoi","de","#viral","#foryou","#fypシ","le", "the",  
             "!","a","mdr","lol",".",",",";","?","et", "#fypシ゚viral","#foryoupage", "un", "même", "Même", "je", "tu", "il", "on", "Bardella", "bardella", "Jordan")
 
-# Remove specified words
 tokenized_text <- tokenized_text[!tokenized_text %in% toremove]
 
 

diff --git a/data/.DS_Store b/data/.DS_Store
diff --git a/plots/.DS_Store b/plots/.DS_Store
diff --git a/plots/histograms/Panda_historgam.png b/plots/histograms/Panda_historgam.png
diff --git a/plots/histograms/Plot_Dance.png b/plots/histograms/Plot_Dance.png
diff --git a/plots/histograms/plot_emeutes.png b/plots/histograms/plot_emeutes.png
diff --git a/plots/histograms/ronaldo_weeks.png b/plots/histograms/ronaldo_weeks.png
diff --git a/plots/wordclouds/.DS_Store b/plots/wordclouds/.DS_Store
diff --git a/plots/wordclouds/Emeutes/emeutes.png b/plots/wordclouds/Emeutes/emeutes.png
diff --git a/plots/wordclouds/Emeutes/like_emeutes1.png b/plots/wordclouds/Emeutes/like_emeutes1.png
diff --git a/plots/wordclouds/Emeutes/like_emeutes2.png b/plots/wordclouds/Emeutes/like_emeutes2.png
diff --git a/plots/wordclouds/Emeutes/like_emeutes3.png b/plots/wordclouds/Emeutes/like_emeutes3.png
diff --git a/plots/wordclouds/Panda/panda.png b/plots/wordclouds/Panda/panda.png
diff --git a/plots/wordclouds/Panda/panda2.png b/plots/wordclouds/Panda/panda2.png
diff --git a/plots/wordclouds/Panda/panda3.png b/plots/wordclouds/Panda/panda3.png
diff --git a/plots/wordclouds/Panda/panda4.png b/plots/wordclouds/Panda/panda4.png
diff --git a/plots/wordclouds/Ronaldo/Ronaldo 2.png b/plots/wordclouds/Ronaldo/Ronaldo 2.png
diff --git a/plots/wordclouds/Ronaldo/Ronaldo.png b/plots/wordclouds/Ronaldo/Ronaldo.png
diff --git a/plots/wordclouds/Ronaldo/Ronaldo3.png b/plots/wordclouds/Ronaldo/Ronaldo3.png
diff --git a/plots/wordclouds/Ronaldo/Ronaldo4.png b/plots/wordclouds/Ronaldo/Ronaldo4.png
diff --git a/tiktok_likes.R b/tiktok_likes.R
@@ -14,11 +14,11 @@ remDr$getStatus()
 
 #----------Navigation-------------
 
-account_link<-"https://www.tiktok.com/@" #<---- ici metre l'url
+account_link<-"https://www.tiktok.com/@wolfandpig29" #<---- ici metre l'url
 
 
 remDr$navigate(account_link)
-
+ 
 Sys.sleep(3)
 
 #----------Fermeture onglet inscription-------------
@@ -48,11 +48,11 @@ tryCatch(
 
 #----------Scrolling-------------
 
-#for (x in 1:20) {
+for (x in 1:300) {
 
- # remDr$executeScript("window.scrollBy(0,5000);")
-  #Sys.sleep(2)
-#}
+  remDr$executeScript("window.scrollBy(0,5000);")
+  Sys.sleep(1)
+}
 scrollToEnd <- function() {
   last_height <- remDr$executeScript("return document.body.scrollHeight")
   while (TRUE) {
@@ -81,9 +81,9 @@ titles_likes <- remDr$findElements("xpath", "//div[@data-e2e='user-liked-item-de
 views_vids <- remDr$findElements("xpath", "//div[@data-e2e='user-liked-item']//strong[@data-e2e='video-views']")
 
 
-links_likes <- list()
-title_list <- list()
-views_list <- list()
+links_likes <-NA 
+title_list <- NA
+views_list <- NA
 
 #----------Récupération data-------------
 
@@ -116,18 +116,18 @@ usernames <- list()
 usernames <- lapply(links_likes, get_username)
 
 
-# Obtenez la longueur maximale parmi les trois catégories
+# Longueur maximale parmi les trois catégories
 max_length <- max(length(links_likes), length(title_list), length(views_list), length(usernames))
 
-# Assurez-vous que chaque catégorie a la même longueur
+# Chaque catégorie a la même longueur
 padList <- function(lst, len) {
   if (length(lst) < len) {
     lst <- c(lst, rep(NA, len - length(lst)))
   }
   return(lst)
 }
 
-# Remplissez chaque catégorie pour qu'elles aient la même longueur
+# Remplissage chaque catégorie pour qu'elles aient la même longueur
 links_likes <- padList(links_likes, max_length)
 title_list <- padList(title_list, max_length)
 views_list <- padList(views_list, max_length)
@@ -154,7 +154,6 @@ top_redundant_words <- names(sort(word_freq, decreasing = TRUE))[1:top_n]
 print(top_redundant_words)
 
 #----------Analyse textuelle texte le plus like-------------
-
 titles <- as.character(bdd_likes[, 4])
 tokenized_titles <- unlist(strsplit(titles, "\\s+"))
 
@@ -183,7 +182,12 @@ toremove<-c("’","a","le","la","de", "des", "les", "en", "sur", "à", "il", "el
             "vais", "vraiment", "y'a", "vas", "bla", "e", "d'être", "veux", "mois", "sen", 
             "bah", "regarde", "tiens", "complètement", "completement", "sait", "ten", "vers", 
             "+", "toutes", "|", "via", "mettre", "in", "of", "👉", "👇","➡","#fyp","#pourtoi","de","#viral","#foryou","#fypシ","le", "the",  
-            "!","a","mdr","lol",".",",",";","?","et", "#fypシ゚viral","#foryoupage" )
+            "!","a","mdr","lol","ce","qui",".",",",";","?","et", "#fypシ゚viral","#foryoupage","avec", "I", "love", "you","o", "u","y","v","#tiktok",
+            "#الشعب_الصيني_ماله_حل😂😂",  "#اكسبلور" ,":"  , "-" ,"#parati" ,   "lo" ,  "lo"       ,  "el"  , "es"   , "l"  , "#trending", "que","un","pour","je",
+            "_","est","fy", "che"   ,       "per"   ,       "è"        ,   
+        "*"      ,      "una"   ,   "#neiperte"  ,"#perte" ,  "di" ,"une", "#fy","my","#CapCut" ,"#edit","é" ,"😂" , "não"  ,
+        "eu" , "do"  , "to"    ,   "and"    ,  "is" ,"so" ,"this" ,"for" ,"i","that"    )
+
 
 # Remove specified words
 tokenized_titles <- tokenized_titles[!tokenized_titles %in% toremove]
@@ -195,4 +199,17 @@ top_n <- 10
 top_redundant_words <- names(sort(word_freq, decreasing = TRUE))[1:top_n]
 print(top_redundant_words)
 
-
+
+install.packages(c("wordcloud","tidyverse"))
+
+library(wordcloud)
+library(tidyverse)
+
+# Création d'un data frame pour le nuage de mots
+word_freq_df <- data.frame(word = names(word_freq), freq = as.numeric(word_freq))
+
+# Générer le nuage de mots
+set.seed(15621) # Pour reproduire les résultats
+wordcloud(words = word_freq_df$word, freq = word_freq_df$freq, min.freq = 6, 
+          scale = c(3, 0.2), colors = brewer.pal(8, "Dark2"))
+title(main = "Centre d'intérêts d'une utilisateur ayant liké Ronaldo", col.main = "black", font.main = 1)