Commit1

Rolfstx · Jun 25, 2020 · 3ebc2bd · 3ebc2bd
1 parent 55da922
commit 3ebc2bd
Show file tree

Hide file tree

Showing 4,866 changed files with 6,308,750 additions and 0 deletions.
diff --git a/.DS_Store b/.DS_Store
diff --git a/0_Admin/youtube_recommendation_scrapper/.DS_Store b/0_Admin/youtube_recommendation_scrapper/.DS_Store
diff --git a/1_Climate_Change/.DS_Store b/1_Climate_Change/.DS_Store
diff --git a/1_Climate_Change/execution_notes b/1_Climate_Change/execution_notes
@@ -0,0 +1,46 @@
+#install bs4 for python2.7
+sudo easy_install-2.7 pip
+sudo python2.7 -m pip install bs4
+sudo python2.7 -m pip install lxml
+cd ~/Documents/GitHub/esade_fake_news/1_Climate_Change/youtube_query
+
+#v1
+python2.7 follow-youtube-recommendations.py --query="climate change" --searches=1 --branch=4 --depth=4 --name="climate_v1_b4_d4" --gl="US" --language="en" --"loopok"=True
+
+#v2
+python2.7 follow-youtube-recommendations_v3.py --query="climate change" --searches=1 --branch=4 --depth=4 --name="climate_v1_b4_d4" --gl="US" --language="en" --"loopok"=True
+total videos = 69
+unique videos = 67
+
+#youtube.py query
+#v1
+python3 youtube.py --query="climate change" --searches=1 --branch=3 --depth=6 --channels="False" --key='AIzaSyAn8lYgin9KQIOuJYQsxJhELOuXq2w2-9E'
+total videos = 131
+unique videos = 98
+
+#v2
+python3 youtube.py --query="climate change is a lie" --searches=1 --branch=3 --depth=6 --channels="False" --key='AIzaSyAn8lYgin9KQIOuJYQsxJhELOuXq2w2-9E'
+total videos = 364
+unique videos = 221
+
+#v3
+python3 youtube.py --query="climate change a hoax" --searches=3 --branch=3 --depth=6 --channels="False" --key='AIzaSyAn8lYgin9KQIOuJYQsxJhELOuXq2w2-9E'
+total videos = 1080
+unique videos = 271
+
+#v4
+python3 youtube.py --query="climate skeptic" --searches=3 --branch=4 --depth=6 --channels="False" --key='AIzaSyAn8lYgin9KQIOuJYQsxJhELOuXq2w2-9E'
+total videos = 
+unique videos =
+
+#v5
+python3 youtube.py --query="death penalty" --searches=3 --branch=4 --depth=6 --channels="False" --key='AIzaSyAn8lYgin9KQIOuJYQsxJhELOuXq2w2-9E'
+
+#v6
+python3 youtube.py --query="gun control" --searches=3 --branch=3 --depth=6 --channels="False" --key='AIzaSyAn8lYgin9KQIOuJYQsxJhELOuXq2w2-9E'
+
+#query = seed search for youtube
+#searches = after searching for "climate change", for example, it takes the first 4 searches and starts the loop. I put searches = 1 because we want to start with the top result
+#branch = how many recommendations to follow. I put branch = 10 meaning to follow top 10 recommendations of each video.
+#depth = how deep should the scraping go. i.e. how many loops.
+#name = how the file will be saved
diff --git a/1_Climate_Change/youtube_api_request_info/.DS_Store b/1_Climate_Change/youtube_api_request_info/.DS_Store
diff --git a/1_Climate_Change/youtube_api_request_info/.Rhistory b/1_Climate_Change/youtube_api_request_info/.Rhistory
diff --git a/1_Climate_Change/youtube_api_request_info/avoid_parsing_errors_in_get_info.R b/1_Climate_Change/youtube_api_request_info/avoid_parsing_errors_in_get_info.R
@@ -0,0 +1,49 @@
+#Detele observations with error in parsing JSON (snippet part in Youtube's API)
+allids2 <- allids[-c(262, 514, 513, 557, 581, 669, 765, 802, 1023, 1030, 1173, 1209, 1278, 1325),]
+allids2 <- allids[-c(1354, 1444, 1570, 1774, 1806, 2001, 2171, 2234),]
+
+
+for(i in 2235:nrow(allids2)){
+  cat('Iteracio', i, '/', nrow(allids2), '\n')
+  url2 = paste(URL_base, allids2[i, ], URL_details2, URL_key, sep = "")
+  dd2 <- getURL(url2)
+  result2 <- fromJSON(dd2)
+  id2 = result2$items$id[[1]]
+  publishedAt = result2$items$snippet$publishedAt
+  channelid = result2$items$snippet$channelId
+  channeltitle = result2$items$snippet$channelTitle
+  title = result2$items$snippet$title
+  description = result2$items$snippet$description
+  alldata2 = rbind(alldata2, data.frame(id2, title, description, publishedAt, channelid, channeltitle))
+} 
+
+#Detele observations with error in parsing JSON (statistics part in Youtube's API)
+allids3 <- allids[-c(8, 19, 33, 83, 97, 112, 166, 256, 271, 279, 283),]
+allids3 <- allids[-c(301, 312, 337, 340, 398, 400, 423, 425, 429, 451),]
+allids3 <- allids[-c(464,547, 549, 586, 602, 613, 618, 649, 654, 688),]
+allids3 <- allids[-c(696, 736, 755, 810, 811, 818, 879, 901, 932, 935),]
+allids3 <- allids[-c(972, 998, 1024, 1038, 1097, 1129, 1151, 1192, 1204),]
+allids3 <- allids[-c(1221, 1265, 1304, 1373, 1381, 1442, 1489, 1490, 1492),]
+allids3 <- allids[-c(1496, 1515, 1530, 1543, 1547, 1566, 1595, 1600),]
+allids3 <- allids[-c(1625, 1652, 1675, 1682, 1724, 1727, 1729, 1744, 1747),]
+allids3 <- allids[-c(1831, 1898, 1954, 1957, 1964, 1981, 1988, 2011, 2020),]
+allids3 <- allids[-c(2090, 2136, 2156, 2298, 2301, 2306, 2320, 2342, 2354),]
+allids3 <- allids[-c(2368, 2394, 2416, 2473, 2486, 2504, 2516, 2566, 2585),]
+allids3 <- allids[-c(2389, 2602, 2613, 2623),]
+
+
+for(i in 2624:nrow(allids3)){
+  cat('Iteracio', i, '/', nrow(allids3), '\n')
+  url3 = paste(URL_base, allids3[i, ], URL_details3, URL_key, sep = "")
+  dd3 <- getURL(url3)
+  result3 <- fromJSON(dd3)
+  id3 = result3$items$id[[1]]
+  views = result3$items$statistics$viewCount
+  likes = result3$items$statistics$likeCount
+  dislikes = result3$items$statistics$dislikeCount
+  favorite = result3$items$statistics$favoriteCount
+  comments = result3$items$statistics$commentCount
+  alldata3 = rbind(alldata3, data.frame(id3, views, likes, dislikes, favorite, comments))
+} 
+
+
diff --git a/1_Climate_Change/youtube_api_request_info/data/.DS_Store b/1_Climate_Change/youtube_api_request_info/data/.DS_Store
diff --git a/1_Climate_Change/youtube_api_request_info/data/_old/.DS_Store b/1_Climate_Change/youtube_api_request_info/data/_old/.DS_Store
diff --git a/1_Climate_Change/youtube_api_request_info/data/climate_category_list.csv b/1_Climate_Change/youtube_api_request_info/data/climate_category_list.csv
@@ -0,0 +1,9 @@
+"","unique.alldata5.category."
+"1","29"
+"2","27"
+"3","22"
+"4","25"
+"5","28"
+"6","1"
+"7","20"
+"8","10"