Skip to content

Commit

Permalink
all videos with media bias
Browse files Browse the repository at this point in the history
  • Loading branch information
Rolfstx committed Jun 20, 2020
1 parent 5a8897b commit 02ca85a
Show file tree
Hide file tree
Showing 13 changed files with 39,753 additions and 18 deletions.
Binary file modified 4_Politics/python3_script/.DS_Store
Binary file not shown.
20 changes: 18 additions & 2 deletions 4_Politics/python3_script/data/Labelling.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@ library(data.table)

#load dataset
#unique video dataset or all videos dataset
df = fread("~/Documents/GitHub/esade_fake_news/4_Politics/python3_script/data/20200504-193926_joe_biden.csv")
df = fread("~/Documents/GitHub/esade_fake_news/4_Politics/python3_script/data/videos/20200504-193926_joe_biden_recommendations.csv")
df[, 'V1':=NULL]

#df = fread("~/Documents/GitHub/esade_fake_news/4_Politics/python3_script/data/videos/20200504-193926_joe_biden.csv")

#groupby genre and count number of videos
Expand Down Expand Up @@ -62,6 +64,16 @@ df_merged2 = merge(df_relevant,
df_merged,
by='channel')

#complete dataset with all videos
#all 19'000 videos, used for graph/network analysis
df_all =merge(df,
df_merged,
by='channel',
all.x=TRUE)

#ensure compelte_cases in views
df_all = df_all[complete.cases(views), ]

# % of videos still present in raw dataset.
nrow(df_merged2) / nrow(df)

Expand All @@ -74,7 +86,7 @@ tblFun <- function(x){
}

#
group_by_bias = do.call(rbind,lapply(df_merged2[, 'Bias'],tblFun))
group_by_bias = do.call(rbind,lapply(df_all[, 'Bias'],tblFun))
group_by_bias

#write.csv(group_by_bias, "~/Documents/GitHub/esade_fake_news/4_Politics/python3_script/data/nlp/20200504-193926_joe_biden_group_bias.csv")
Expand All @@ -89,12 +101,16 @@ mapping <- c("Left" = 0, "Left-Center" = 0,
"Least Biased" = 1, "Right" = 1, "Right-Center" = 1)

df_merged2$Bias_num <- mapping[df_merged2$Bias]
#df_all$Bias_num <- mapping[df_all$Bias]

#keep columns title, description and Bias_num for NLP dataset
df_nlp = df_merged2[, c('Bias_num' ,'title', 'description', 'channel','id')]

#export NLP dataset
#write.csv(df_nlp, "~/Documents/GitHub/esade_fake_news/4_Politics/python3_script/data/nlp/20200504-193926_joe_biden_nlp.csv")

#export all videos dataset with media bias
write.csv(df_all, "~/Documents/GitHub/esade_fake_news/4_Politics/python3_script/data/videos/20200504-193926_joe_biden_all.csv")

# 32% Bias_num is 1, 68% is 0
sum(df_nlp$Bias_num) / nrow(df_nlp)
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
[
{
"kind": "youtube#channelListResponse",
"etag": "rDQY5AqhXlCIdzTcdJ2xHHjGkFk",
"pageInfo": {
"totalResults": 1,
"resultsPerPage": 1
},
"items": [
{
"kind": "youtube#channel",
"etag": "7nv-J9a_MqwztzsPSzH7LikqqlM",
"id": "UCqnbDFdCpuN8CMEg0VuEBqA",
"snippet": {
"title": "The New York Times",
"description": "The New York Times is the most powerful engine for independent, boots-on-the-ground and deeply reported journalism. We set the standard for the most ambitious and innovative storytelling across features, news and investigations. Because we\u2019re journalists, we\u2019re excited to report the news as quickly as possible, use new technological resources to uncover the truth, and unearth new cultural phenomenons with our critics. The internet didn\u2019t plant these ideas in our heads. We\u2019ve always been this way. It's all the news that's fit to watch. On YouTube.",
"customUrl": "nytimes",
"publishedAt": "2006-10-13T21:57:36Z",
"thumbnails": {
"default": {
"url": "https://yt3.ggpht.com/a/AATXAJzn56cpcGaVMwoSbfws4yNCSPWXWzbuzAtwHg=s88-c-k-c0xffffffff-no-rj-mo",
"width": 88,
"height": 88
},
"medium": {
"url": "https://yt3.ggpht.com/a/AATXAJzn56cpcGaVMwoSbfws4yNCSPWXWzbuzAtwHg=s240-c-k-c0xffffffff-no-rj-mo",
"width": 240,
"height": 240
},
"high": {
"url": "https://yt3.ggpht.com/a/AATXAJzn56cpcGaVMwoSbfws4yNCSPWXWzbuzAtwHg=s800-c-k-c0xffffffff-no-rj-mo",
"width": 800,
"height": 800
}
},
"localized": {
"title": "The New York Times",
"description": "The New York Times is the most powerful engine for independent, boots-on-the-ground and deeply reported journalism. We set the standard for the most ambitious and innovative storytelling across features, news and investigations. Because we\u2019re journalists, we\u2019re excited to report the news as quickly as possible, use new technological resources to uncover the truth, and unearth new cultural phenomenons with our critics. The internet didn\u2019t plant these ideas in our heads. We\u2019ve always been this way. It's all the news that's fit to watch. On YouTube."
}
},
"statistics": {
"viewCount": "1013306636",
"commentCount": "0",
"subscriberCount": "2990000",
"hiddenSubscriberCount": false,
"videoCount": "9788"
}
}
]
}
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
[
{
"kind": "youtube#channelListResponse",
"etag": "rDQY5AqhXlCIdzTcdJ2xHHjGkFk",
"pageInfo": {
"totalResults": 1,
"resultsPerPage": 1
},
"items": [
{
"kind": "youtube#channel",
"etag": "7nv-J9a_MqwztzsPSzH7LikqqlM",
"id": "UCqnbDFdCpuN8CMEg0VuEBqA",
"snippet": {
"title": "The New York Times",
"description": "The New York Times is the most powerful engine for independent, boots-on-the-ground and deeply reported journalism. We set the standard for the most ambitious and innovative storytelling across features, news and investigations. Because we\u2019re journalists, we\u2019re excited to report the news as quickly as possible, use new technological resources to uncover the truth, and unearth new cultural phenomenons with our critics. The internet didn\u2019t plant these ideas in our heads. We\u2019ve always been this way. It's all the news that's fit to watch. On YouTube.",
"customUrl": "nytimes",
"publishedAt": "2006-10-13T21:57:36Z",
"thumbnails": {
"default": {
"url": "https://yt3.ggpht.com/a/AATXAJzn56cpcGaVMwoSbfws4yNCSPWXWzbuzAtwHg=s88-c-k-c0xffffffff-no-rj-mo",
"width": 88,
"height": 88
},
"medium": {
"url": "https://yt3.ggpht.com/a/AATXAJzn56cpcGaVMwoSbfws4yNCSPWXWzbuzAtwHg=s240-c-k-c0xffffffff-no-rj-mo",
"width": 240,
"height": 240
},
"high": {
"url": "https://yt3.ggpht.com/a/AATXAJzn56cpcGaVMwoSbfws4yNCSPWXWzbuzAtwHg=s800-c-k-c0xffffffff-no-rj-mo",
"width": 800,
"height": 800
}
},
"localized": {
"title": "The New York Times",
"description": "The New York Times is the most powerful engine for independent, boots-on-the-ground and deeply reported journalism. We set the standard for the most ambitious and innovative storytelling across features, news and investigations. Because we\u2019re journalists, we\u2019re excited to report the news as quickly as possible, use new technological resources to uncover the truth, and unearth new cultural phenomenons with our critics. The internet didn\u2019t plant these ideas in our heads. We\u2019ve always been this way. It's all the news that's fit to watch. On YouTube."
}
},
"statistics": {
"viewCount": "1013306636",
"commentCount": "0",
"subscriberCount": "2990000",
"hiddenSubscriberCount": false,
"videoCount": "9788"
}
}
]
}
]
731 changes: 731 additions & 0 deletions 4_Politics/python3_script/data/logs/vksEJR9EPQ8_1.html

Large diffs are not rendered by default.

Loading

0 comments on commit 02ca85a

Please sign in to comment.