diff --git a/.zenodo.json b/.zenodo.json new file mode 100644 index 0000000..45a911c --- /dev/null +++ b/.zenodo.json @@ -0,0 +1,56 @@ +{ + "creators": [ + { + "orcid": "0000-0003-3282-8083", + "affiliation": "Utrecht University", + "name": "Huijser, Dorien" + }, + { + "orcid": "0000-0003-1412-4402", + "affiliation": "Utrecht University", + "name": "Moopen, Neha" + } + ], + "contributors": [ + { + "orcid": "0000-0001-9510-0802", + "affiliation": "Utrecht University", + "name": "Janssen, Rik D.T." + } + ], + "title": "Data Privacy Survey: Challenges and needs of privacy-related services for research at Utrecht University", + "access_right": "open", + "related_identifiers": [ + { + "scheme": "urls", + "identifier": "https://utrechtuniversity.github.io/dataprivacysurvey", + "relation": "isAlternateIdentifier", + "resource_type": "other" + }, + { + "scheme": "urls", + "identifier": "https://utrechtuniversity.github.io/dataprivacyproject", + "relation": "describes", + "resource_type": "other" + } + ], + "keywords": [ + "privacy", + "research", + "gdpr", + "academia" + ], + "communities": [ + { + "identifier": "uu-rdm-support" + } + ], + "grants": [ + { + "id": "10.13039/501100003246" + } + ], + "upload_type": "publication", + "publication_type": "report", + "language": "eng" +} \ No newline at end of file diff --git a/assets/wordcloud_uu.png b/assets/wordcloud_uu.png new file mode 100644 index 0000000..9500f99 Binary files /dev/null and b/assets/wordcloud_uu.png differ diff --git a/docs/data-privacy-survey-recommendations.html b/docs/data-privacy-survey-recommendations.html index 5832a2f..fba6142 100644 --- a/docs/data-privacy-survey-recommendations.html +++ b/docs/data-privacy-survey-recommendations.html @@ -9,9 +9,9 @@ - + - +
Abstract
In the second quarter of 2022, Utrecht University (UU) Research Data @@ -2889,7 +2890,7 @@
Our goal at Research -Data Management Support (RDM Support) is to assist researchers with -any issues surrounding the management of their research data, including +
Our goal at Research Data +Management Support (RDM Support) is to assist researchers with any +issues surrounding the management of their research data, including research data that contain personal data. To understand how we can best help researchers with their privacy-related questions and needs, we wanted to investigate:
@@ -2932,15 +2933,11 @@This survey was part of a larger project at UU, the -Data -Privacy Project3, a data support effort led by RDM Support -at UU that aims to provide actionable and FAIR (Findable, Accessible, -Interoperable, Reusable) information and tools for researchers to handle -personal data in their research.
+description of the methods and results, please refer to the Data Privacy Survey Report. +This survey was part of a larger project, the Data Privacy Project4, a data support effort +led by RDM Support at UU that aims to provide actionable and FAIR +(Findable, Accessible, Interoperable, Reusable) information and tools +for researchers to handle personal data in their research.
All relevant materials can be found in the -GitHub -repository and are described in the -full -report.
+All relevant materials can be found in the GitHub repository and are described in the full report.
Below is a summary of all results, which are described in full in the -Data -Privacy Survey Report.
+Data Privacy Survey Report.The Data Privacy Survey showed that personal data are processed in @@ -2998,7 +2990,8 @@
Some researchers were not content with the support they had received in the past. For example, their research project may have suffered significant delays due to the fact that support personnel only pointed @@ -3016,7 +3009,7 @@
Research Data Management Support, Utrecht University↩︎
Information Technology Services, Utrecht University↩︎
The Data Privacy Project is funded by Utrecht +
Research Data Management Support, Utrecht University, +ORCID: 0000-0003-3282-8083↩︎
Research Data Management Support, Utrecht University, +ORCID: 0000-0003-1412-4402↩︎
Information Technology Services, Utrecht University, +ORCID: 0000-0001-9510-0802↩︎
The Data Privacy Project was funded by Utrecht University’s Research IT program and a Digital Competence Center grant -from the Dutch Organization for Scientific Research (NWO).↩︎
Abstract
In the second quarter of 2022, Utrecht University (UU) Research Data @@ -5128,13 +5128,13 @@
Our goal at Research -Data Management Support (RDM Support) is to assist researchers with -any issues surrounding the management of their research data, including +
Our goal at Research Data +Management Support (RDM Support) is to assist researchers with any +issues surrounding the management of their research data, including research data that contain personal data. To understand how we can best help researchers with their privacy-related questions and needs, we wanted to investigate:
@@ -5166,14 +5166,11 @@This survey was part of a larger project at UU, the -Data -Privacy Project2, a data support effort led by RDM Support -at UU that aims to provide actionable and FAIR (Findable, Accessible, -Interoperable, Reusable) information and tools for researchers to handle -personal data in their research.
+recommendations on how to move forward, please refer to the Recommendations report. +This survey was part of a larger project, the Data Privacy Project3, a data support effort +led by RDM Support at UU that aims to provide actionable and FAIR +(Findable, Accessible, Interoperable, Reusable) information and tools +for researchers to handle personal data in their research.
Note that for the faculty-specific figures, the amount of respondents +from the entire faculty were not always equal to the sum of respondents +per department. This was caused by the fact that the Department question +was not mandatory, and allowed multiple responses. So some respondents +could be part of several departments, and others are not displayed in +the Department plot, because they did not leave an answer to that +question.
From the survey respondents, 7 researchers from the faculty of Law, Economics, and Governance (LEG) left their email address to be -contacted. Of those, 2 online meetings have been conducted, most of them +contacted. Of those, 2 online meetings have been conducted, both of them with one of the privacy officers present. Below, the division over positions can be seen for all interviewees from the faculty of Law, Economics, and Governance (LEG).
@@ -5399,10 +5399,9 @@From the survey respondents, 1 researcher from the Faculty of -Geosciences left their email address to be contacted and agreed to -indeed meet online. Below, this person’s position can be seen.
-From the survey respondents, 1 PhD/junior/postdoctoral researcher +from the Faculty of Geosciences left their email address to be contacted +and agreed to indeed meet online.
From the raw Qualtrics output, we first cleaned and split the data into different data files (cleaned and closed survey responses, open -text responses, email addresses, see the -pseudonymise-data.R script for details). Both the +text responses, email addresses, see the pseudonymise-data.R script for details). Both the open text responses and the notes made during the one-on-one meetings were separately and manually coded to enable the extraction of action -points (see the file -codes-open-text-responses-meetings.csv for the codes +points (see the file codes-open-text-responses-meetings.csv for the codes used).
Below we report on the descriptive statistics or summaries from the survey questions and notes made during the one-on-one meetings. As we @@ -5426,57 +5423,41 @@
All survey-related documentation can be found in the -dedicated -survey repository.
+All survey-related documentation can be found in the dedicated GitHub repository.
The repository contains all - -scripts and documents used to clean the data and write the -reports.
+The repository contains all scripts and documents used to clean the data and +write the reports.
As the dataset contains personal information (demographic information, open text responses, email addresses, etc.), and no consent was obtained to share those details, we are unable to share them in this -repository. We did however create a -synthetic -(fake) dataset with 100 rows using Qualtrics’s “Generate responses” -functionality. This dataset can be used to regenerate most of the -current report, but will not create any realistic results.
+repository. We did however create a synthetic (fake) dataset with 100 rows using +Qualtrics’s “Generate responses” functionality. This dataset can be used +to regenerate most of the current report, but will not create any +realistic results.To reproduce this report:
Finally, contact details of the Data Protection Officer (DPO) were not always included either, whereas these always have to be provided -when working with personal data -(art. +when working with personal data (art. 13)
Below the experience and help received with DPIAs is displayed for the Faculty of Science.
-Below the experience and help received with DPIAs is displayed for the Faculty of Social and Behavioural Sciences (FSBS).
-Below the experience and help received with DPIAs is displayed for the Faculty of Humanities.
-Below the experience and help received with DPIAs is displayed for the Faculty of Veterinary Medicine.
-Below the experience and help received with DPIAs is displayed for the Faculty of Law, Economics and Governance (LEG).
-Below the experience and help received with DPIAs is displayed for the Faculty of Geosciences.
-Below the data sharing practices across the Faculty of Science are visualised.
-Below the data sharing practices across the Faculty of Social and Behavioural Sciences (FSBS) are visualised.
-Below the data sharing practices across the Faculty of Humanities are visualised.
-Below the data sharing practices across the Faculty of Veterinary Medicine are visualised.
-Below the data sharing practices across the Faculty of Law, Economics and Governance (LEG) are visualised.
-Below the data sharing practices across the Faculty of Geosciences are visualised.
-Below we highlight the most frequently mentioned challenges and needs expressed by researchers in the survey (open questions) and one-one-one meetings, along with the amount of times they were mentioned. A full -summary of the results and recommendations can be found in the Recommendations -report.
+summary of the results and recommendations can be found in the Recommendations report.It should be clear where to go for help (to whom or which webpage, @@ -7685,13 +7664,13 @@
-Soms zijn we door deze regels heiliger dan de paus. +“Soms zijn we door deze regels heiliger dan de paus.”
-Actual getting-your-hands-dirty support: not the kind that tells you -what to do, but also the kind that helps you by doing. +“Actual getting-your-hands-dirty support: not the kind that tells you +what to do, but also the kind that helps you by doing.”
Processes were often experienced as time-inefficient, and sometimes
-longer and more bureaucratic than necessary (mentioned
-22 times). For example, the DPIA process was mentioned explicitly 10
-times, as well as having to fill out too many forms with overlapping
-content (e.g., Privacy Scan, Data Management Plan, DPIA). Some (9)
-researchers argued that (part of) this burden should be relieved or
-carried by support staff:
-Minder acties die gericht zijn op inhoudelijk trainen van WP en meer uit -handen nemen van deze groep. +“Minder acties die gericht zijn op inhoudelijk trainen van WP en meer +uit handen nemen van deze groep.”
-Sharing data costs a lot of time and is inefficient when you do not do -it often. +“Sharing data costs a lot of time and is inefficient when you do not do +it often.”
-De informatie is gewoon veel te generiek, er zouden templates moeten -zijn per type onderzoek. +“De informatie is gewoon veel te generiek, er zouden templates moeten +zijn per type onderzoek.”
-Veel templates en uitlegmodellen spreken over data, data packages en +“Veel templates en uitlegmodellen spreken over data, data packages en metadata, maar die woorden zijn niet ingebed in historisch onderzoek. Er ontstaat al snel verwarring over wat historici nu precies moeten met -archiefmateriaal in het licht van privacy. +archiefmateriaal in het licht van privacy.”
-There is no one who tells at the start of your PhD how you should handle -your data. […] I think new PhD students should get a basic course on -data management and privacy.” +“There is no one who tells at the start of your PhD how you should +handle your data. […] I think new PhD students should get a basic course +on data management and privacy.”
-The data manager and privacy officer of the faculty of humanities help a -lot. This support is essential! +“The data manager and privacy officer of the faculty of humanities help +a lot. This support is essential!”
-Tot nu toe heb ik niet veel problemen gehad. De institutional review +“Tot nu toe heb ik niet veel problemen gehad. De institutional review board van onze afdeling kijkt altijd kritisch naar de -onderzoeksvoorstellen, ook met name op omgaan met persoonsgegevens. +onderzoeksvoorstellen, ook met name op omgaan met persoonsgegevens.”
A full summary of the results and recommendations can be found in the -Recommendations -report.
+Recommendations report.This report was created in -R -markdown, and was last generated on 2022-10-18. It was created in +
This report was created in R +markdown, and was last generated on 2022-10-26. It was created in the following local environment:
## R version 4.2.0 (2022-04-22 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
@@ -7922,10 +7899,13 @@ Technical information
-Research Data Management Support, Utrecht University↩︎
-The Data Privacy Project is funded by Utrecht
+
Research Data Management Support, Utrecht University,
+ORCID: 0000-0003-3282-8083↩︎
+Research Data Management Support, Utrecht University,
+ORCID: 0000-0003-1412-4402↩︎
+The Data Privacy Project was funded by Utrecht
University’s Research IT program and a Digital Competence Center grant
-from the Dutch Organization for Scientific Research (NWO).↩︎
+from the Dutch Organization for Scientific Research (NWO).↩︎
diff --git a/docs/data-privacy-survey-report.pdf b/docs/data-privacy-survey-report.pdf
new file mode 100644
index 0000000..9d7ac7e
Binary files /dev/null and b/docs/data-privacy-survey-report.pdf differ
diff --git a/docs/data-privacy-survey-report-v0.1.html b/docs/old/data-privacy-survey-report-v0.1.html
similarity index 100%
rename from docs/data-privacy-survey-report-v0.1.html
rename to docs/old/data-privacy-survey-report-v0.1.html
diff --git a/src/data-privacy-survey-recommendations.Rmd b/src/data-privacy-survey-recommendations.Rmd
index 39a25eb..7b96b4d 100644
--- a/src/data-privacy-survey-recommendations.Rmd
+++ b/src/data-privacy-survey-recommendations.Rmd
@@ -1,7 +1,7 @@
---
title: "Data Privacy Survey"
subtitle: "Recommendations for improving privacy-related services for research at Utrecht University"
-author: "Dorien Huijser, Neha Moopen^[Research Data Management Support, Utrecht University] & Rik Janssen^[Information Technology Services, Utrecht University]"
+author: "Dorien Huijser^[Research Data Management Support, Utrecht University, ORCID: 0000-0003-3282-8083], Neha Moopen^[Research Data Management Support, Utrecht University, ORCID: 0000-0003-1412-4402] & Rik D.T. Janssen^[Information Technology Services, Utrecht University, ORCID: 0000-0001-9510-0802]"
date: "`r format(Sys.time(), '%Y-%m-%d')`"
abstract: |
In the second quarter of 2022, Utrecht University (UU) Research Data Management
@@ -12,8 +12,8 @@ abstract: |
personal data in research, and 3) How support at UU can improve their
services concerning personal data in research. The results showed that most
researchers knew to take privacy into account in their projects. However,
- there were vast differences in knowledge on this topic, as well as in how privacy-
- related practices were applied. Many researchers expressed concerns on the current
+ there were vast differences in knowledge on this topic, as well as in how
+ privacy-related practices were applied. Many researchers expressed concerns on the current
quantity, findability and quality of privacy-related support at UU. These
concerns were translated to recommendations in the current report. In short,
our recommendations concern organising privacy-related research support
@@ -24,7 +24,7 @@ abstract: |
organisation will likely both increase overall GDPR compliance and help
researchers focus more on performing high-quality research.
knit: (function(inputFile, encoding) {
- rmarkdown::render(inputFile, encoding = encoding, output_dir = "../docs") })
+ rmarkdown::render(inputFile, encoding = encoding, output_dir = "../docs", output_format = "all") })
output:
html_document:
toc: true
@@ -38,6 +38,14 @@ output:
css: "../assets/style.css"
includes:
in_header: "../includes/header.html"
+ pdf_document:
+ toc: true
+ toc_depth: 2
+ highlight: tango
+ number_sections: true
+urlcolor: #C00A35
+linkcolor: #C00A35
+filecolor: #C00A35
---
```{r setup, include=FALSE}
@@ -56,8 +64,8 @@ its sharing and publication. In our own experience, the number and complexity of
questions on handling personal data in scientific research at Utrecht University
(UU) is increasing.
-Our goal at Research Data
-Management Support (RDM Support) is to assist researchers with any issues
+Our goal at [Research Data Management Support](https://uu.nl/rdm){target="_blank"}
+(RDM Support) is to assist researchers with any issues
surrounding the management of their research data, including research data that
contain personal data. To understand how we can best help researchers with their
privacy-related questions and needs, we wanted to investigate:
@@ -71,12 +79,11 @@ meetings with a selection of UU researchers. This report summarises our findings
and describes recommendations to improve privacy-related services for research
at Utrecht University. For a full description of the methods and results,
please refer to the
-Data Privacy Survey Report.
+[Data Privacy Survey Report](https://utrechtuniversity.github.io/dataprivacysurvey/docs/data-privacy-survey-report.html){target="_blank"}.
-This survey was part of a larger project at UU, the
-Data
-Privacy Project^[The
-Data Privacy Project is funded by Utrecht University’s Research IT program and
+This survey was part of a larger project, the
+[Data Privacy Project](https://utrechtuniversity.github.io/dataprivacyproject/){target="_blank"}^[The
+Data Privacy Project was funded by Utrecht University’s Research IT program and
a Digital Competence Center grant from the Dutch Organization for Scientific
Research (NWO).], a data support effort led by RDM Support at UU that aims to
provide actionable and FAIR (Findable, Accessible, Interoperable, Reusable)
@@ -94,13 +101,13 @@ area.
experiences, challenges and needs in this area.
All relevant materials can be found in the
-GitHub repository
+[GitHub repository](https://github.com/utrechtuniversity/dataprivacysurvey){target="_blank"}
and are described in the
-full report.
+[full report](https://utrechtuniversity.github.io/dataprivacysurvey/docs/data-privacy-survey-report.html){target="_blank"}.
# Results summary {#results-summary}
Below is a summary of all results, which are described in full in the
-Data Privacy Survey Report.
+[Data Privacy Survey Report](https://utrechtuniversity.github.io/dataprivacysurvey/docs/data-privacy-survey-report.html){target="_blank"}.
## Current practices
The Data Privacy Survey showed that personal data are processed in research from
@@ -131,7 +138,7 @@ from their side, when and from whom should they ask help? The latter question
is relevant when considering the large variety of support personnel in each
faculty: privacy officers, data managers, security officers, Ethical Review
Boards, Research Support Officers and the Data Protection Officer can all help
-researchers, but their exact role is unclear to researchers and may even differ
+researchers, but their exact role is often unclear to researchers and may even differ
across faculties.
Some researchers were not content with the support they had received in the
@@ -150,7 +157,7 @@ to two reasons:
Privacy and Research, the RDM Support website, websites from the Ethical Review
Boards, faculty data support websites, etc. Researchers indicated to find it
hard to assess which information resource was the one they should be using.
-- Information that was too abstract, contained too much jargon to understand,
+- Existing information was too abstract, contained too much jargon to understand,
was not tailored to research, or not tailored to specific research scenarios.
Finally, many problems seemed to arise from the fact that privacy had not been
@@ -171,7 +178,7 @@ we make this recommendation more concrete:
support staff
- Smoothen the process of handling personal data in research
- Make current and future information and tools better findable
-- Increase the hands-on nature of support (people)
+- Increase the hands-on nature of support
- Improve the quality of information and tools
## Provide clarity on the process and responsibilities of researchers and data support staff {#clarity-process}
@@ -205,8 +212,8 @@ Data Protection Impact Assessment (DPIA) should be smoother and more clear:
privacy officers should make agreements on when a DPIA should be performed. DPIAs
from similar projects – also from other faculties if relevant - should be
reused to prevent having to perform a DPIA from scratch. Ideally, a DPIA should
-only be performed if absolutely necessary. At the moment, privacy officers are
-already working on this recommendation.
+only be performed if absolutely necessary. At the moment, UU's privacy officers
+are already working on this recommendation.
## Make current and future information and tools better findable {#findable-info-tools}
As several researchers indicated to have difficulties finding the correct
@@ -214,7 +221,7 @@ information and tools among the many sources of information, we recommend to:
- Collectively create one central webpage with all relevant information for
researchers. This is currently being created in the form of the
-Data Privacy Handbook,
+[Data Privacy Handbook](https://utrechtuniversity.github.io/dataprivacyhandbook){target="_blank"},
initiated by the RDM Support team. Ideally, similar content that currently
exists on other UU-related websites can be consolidated in the Data Privacy
Handbook in consultation with those responsible for such websites.
diff --git a/src/data-privacy-survey-report.Rmd b/src/data-privacy-survey-report.Rmd
index b07a280..a3e30c4 100644
--- a/src/data-privacy-survey-report.Rmd
+++ b/src/data-privacy-survey-report.Rmd
@@ -1,7 +1,7 @@
---
title: "Data Privacy Survey"
subtitle: "Challenges and needs of privacy-related services for research at Utrecht University"
-author: "Dorien Huijser & Neha Moopen^[Research Data Management Support, Utrecht University]"
+author: "Dorien Huijser^[Research Data Management Support, Utrecht University, ORCID: 0000-0003-3282-8083] & Neha Moopen^[Research Data Management Support, Utrecht University, ORCID: 0000-0003-1412-4402]"
date: "`r format(Sys.time(), '%Y-%m-%d')`
"
abstract: |
In the second quarter of 2022, Utrecht University (UU) Research Data Management
@@ -12,16 +12,16 @@ abstract: |
personal data in research, and 3) How support at UU can improve their
services concerning personal data in research. The results showed that most
researchers knew to take privacy into account in their projects. However,
- there were vast differences in knowledge on this topic, as well as in how privacy-
- related practices were applied. Many researchers expressed concerns on the current
+ there were vast differences in knowledge on this topic, as well as in how
+ privacy-related practices were applied. Many researchers expressed concerns on the current
quantity, findability and quality of privacy-related support at UU. These
concerns were translated to recommendations in the
- [Recommendations report](data-privacy-survey-recommendations.html). The current
- report describes the methodology and full results of the survey and the
- one-on-one meetings with researchers, both for the entire UU, as well as for
- each UU faculty separately.
+ [Recommendations report](https://utrechtuniversity.github.io/dataprivacysurvey/docs/data-privacy-survey-recommendations.html){target="_blank"}.
+ The current report describes the methodology and full results of the survey and
+ the one-on-one meetings with researchers, both for the entire UU, as well as
+ for each UU faculty separately.
knit: (function(inputFile, encoding) {
- rmarkdown::render(inputFile, encoding = encoding, output_dir = "../docs") })
+ rmarkdown::render(inputFile, encoding = encoding, output_dir = "../docs", output_format = "all") })
output:
html_document:
toc: true
@@ -35,6 +35,15 @@ output:
css: "../assets/style.css"
includes:
in_header: "../includes/header.html"
+ pdf_document:
+ toc: true
+ toc_depth: 2
+ highlight: tango
+ number_sections: true
+ df_print: kable
+urlcolor: #C00A35
+linkcolor: #C00A35
+filecolor: #C00A35
---
```{r setup, include=FALSE}
@@ -60,8 +69,8 @@ its sharing and publication. In our own experience, the number and complexity of
questions on handling personal data in scientific research at Utrecht University
(UU) is increasing.
-Our goal at Research Data
-Management Support (RDM Support) is to assist researchers with any issues
+Our goal at [Research Data Management Support](https://uu.nl/rdm){target="_blank"}
+(RDM Support) is to assist researchers with any issues
surrounding the management of their research data, including research data that
contain personal data. To understand how we can best help researchers with their
privacy-related questions and needs, we wanted to investigate:
@@ -74,12 +83,11 @@ To answer these questions, we set up an online survey and planned one-on-one
meetings with a selection of UU researchers. This report describes our methods
and full results of both the online survey and the one-on-one meetings. For a
full summary of the results and recommendations on how to move forward, please
-refer to the [Recommendations report](data-privacy-survey-recommendations.html).
+refer to the [Recommendations report](https://utrechtuniversity.github.io/dataprivacysurvey/docs/data-privacy-survey-recommendations.html){target="_blank"}.
-This survey was part of a larger project at UU, the
-Data
-Privacy Project^[The
-Data Privacy Project is funded by Utrecht University’s Research IT program and
+This survey was part of a larger project, the
+[Data Privacy Project](https://utrechtuniversity.github.io/dataprivacyproject/){target="_blank"}^[The
+Data Privacy Project was funded by Utrecht University’s Research IT program and
a Digital Competence Center grant from the Dutch Organization for Scientific
Research (NWO).], a data support effort led by RDM Support at UU that aims to
provide actionable and FAIR (Findable, Accessible, Interoperable, Reusable)
@@ -134,16 +142,16 @@ The survey was created in Qualtrics and distributed from March 21st, 2022
onwards via several communication channels to reach as many UU researchers as
possible:
-- An email was sent to all academic staff at UU through
+- An [email](https://utrechtuniversity.github.io/dataprivacyproject/20220321-data-privacy-survey.html){target="_blank"}
+was sent to all academic staff at UU through
central communication channels. The reasoning was that this would be the most
effective way to reach as many UU researchers as possible - taking for granted
that we would likely miss a small part of non-academic personnel also involved
in research in some way.
- A mention in several Faculty newsletters.
-- Social media messages (e.g., on Twitter).
-- A news item on the UU intranet.
+- Social media messages (e.g., on [Twitter](https://twitter.com/RDMsupportUU/status/1517390979707412481){target="_blank"}.
+- A [news item](https://intranet.uu.nl/en/news/news-items/survey-better-support-for-handling-privacy-sensitive-data-in-research){target="_blank"}
+on the UU intranet.
- Via data support colleagues, who were asked to point researchers they were
in contact with to the survey.
@@ -178,6 +186,13 @@ greater need for support with respect to handling personal data.
```{r facultyplot}
```
+Note that for the faculty-specific figures, the amount of respondents from the
+entire faculty were not always equal to the sum of respondents per department. This
+was caused by the fact that the Department question was not mandatory, and
+allowed multiple responses. So some respondents could be part of several
+departments, and others are not displayed in the Department plot, because they
+did not leave an answer to that question.
+
### Science
There were `r dim(dppsurvey_science)[1]` respondents from the Science faculty
in the online survey. It took them a median of
@@ -367,8 +382,8 @@ positionsmeetings(interviews_vet,
From the survey respondents, `r sum(dppsurvey_leg$Email == "Yes", na.rm=TRUE)`
researchers from the faculty of Law, Economics, and Governance (LEG) left their
email address to be contacted. Of those, `r dim(interviews_leg)[1]` online
-meetings have been conducted, most of them with one of the privacy officers
-present. Below, the division over positions can be seen for all interviewees
+meetings have been conducted, both of them with one of the privacy officers
+present. Below, the division over positions can be seen for all interviewees
from the faculty of Law, Economics, and Governance (LEG).
```{r positionsmeetings-leg}
@@ -380,13 +395,14 @@ positionsmeetings(interviews_leg,
### Geo
From the survey respondents, `r sum(dppsurvey_geo$Email == "Yes", na.rm=TRUE)`
-researcher from the Faculty of Geosciences left their email address to be contacted
-and agreed to indeed meet online. Below, this person's position can be seen.
+PhD/junior/postdoctoral researcher from the Faculty of Geosciences left their
+email address to be contacted and agreed to indeed meet online.
```{r positionsmeetings-geo}
-positionsmeetings(interviews_geo,
- title = "Positions one-on-one meetings Geosciences",
- caption = "Positions of Geosciences researchers in one-on-one meetings")
+# Greyed out because just 1 researcher
+#positionsmeetings(interviews_geo,
+# title = "Positions one-on-one meetings Geosciences",
+# caption = "Positions of Geosciences researchers in one-on-one meetings")
```
### {-}
@@ -395,11 +411,11 @@ positionsmeetings(interviews_geo,
From the raw Qualtrics output, we first cleaned and split the data into different
data files (cleaned and closed survey responses, open text responses, email
addresses, see the
-pseudonymise-data.R script for details). Both the open text
+[pseudonymise-data.R](https://github.com/UtrechtUniversity/dataprivacysurvey/blob/main/src/pseudonymise-data.R){target="_blank"}
+script for details). Both the open text
responses and the notes made during the one-on-one meetings were separately and
-manually coded to enable the extraction of action points (see the file codes-open-text-responses-meetings.csv for the codes used).
+manually coded to enable the extraction of action points (see the file [codes-open-text-responses-meetings.csv](https://github.com/UtrechtUniversity/dataprivacysurvey/blob/main/documentation/codes-open-text-responses-meetings.csv){target="_blank"}
+for the codes used).
Below we report on the descriptive statistics or summaries from the
survey questions and notes made during the one-on-one meetings. As we did not
@@ -407,44 +423,43 @@ formulate hypotheses, no statistical analyses were performed.
## Data and material availability {#data-and-material-availability}
All survey-related documentation can be found in the
-dedicated
-survey repository.
+[dedicated GitHub repository](https://github.com/UtrechtUniversity/dataprivacysurvey){target="_blank"}.
### Documentation {#documentation-availability}
-- A general
-description and links to the relevant written reports.
+- A [general description](https://utrechtuniversity.github.io/dataprivacysurvey){target="_blank"}
+and links to the relevant written reports.
- The
-full survey.
-- The survey's privacy statement.
-- The survey's Data Management Plan.
-- The codes used to score open text responses and meeting notes.
-- Codebook of the survey in .csv format.
+[full survey](https://utrechtuniversity.github.io/dataprivacysurvey/documentation/survey-questions-qualtrics.pdf){target="_blank"}.
+- The survey's [privacy statement](https://utrechtuniversity.github.io/dataprivacysurvey/documentation/survey-privacy-statement.pdf){target="_blank"}.
+- The survey's [Data Management Plan](https://utrechtuniversity.github.io/dataprivacysurvey/documentation/survey-data-management-plan.pdf){target="_blank"}.
+- The [codes used](https://github.com/UtrechtUniversity/dataprivacysurvey/blob/main/documentation/codes-open-text-responses-meetings.csv){target="_blank"}
+to score open text responses and meeting notes.
+- [Codebook](https://github.com/UtrechtUniversity/dataprivacysurvey/blob/main/documentation/survey-codebook.csv){target="_blank"}
+of the survey in .csv format.
### Code {#code-availability}
-The repository contains all
-scripts and documents used to clean the data and write the reports.
+The repository contains all [scripts and documents](https://github.com/UtrechtUniversity/dataprivacysurvey/tree/main/src){target="_blank"}
+used to clean the data and write the reports.
### Data {#data-availability}
As the dataset contains personal information (demographic information, open text
responses, email addresses, etc.), and no consent was obtained to share those
details, we are unable to share them in this repository. We did however create a
-synthetic (fake) dataset with 100 rows using
-Qualtrics's "Generate responses" functionality. This dataset can be used to
-regenerate most of the current report, but will not create any realistic results.
+[synthetic (fake) dataset](https://github.com/UtrechtUniversity/dataprivacysurvey/tree/main/data/processed/Data_Privacy_Survey_fakedataset_20220929.csv){target="_blank"}
+with 100 rows using Qualtrics's "Generate responses" functionality. This dataset
+can be used to regenerate most of the current report, but will not create any
+realistic results.
To reproduce this report:
-1. Clone the repository from GitHub.
-2. Open pseudonymise-data.R, change the path to the ("raw") dataset
-and run the script. This will create a clean version of the dataset, stored in
-either the data/processed or data/pseud folder.
-3. Open the file data-privacy-survey-report.Rmd and knit the document. Note that
-the final part of the report cannot be reproduced, because there is no synthetic
-data available for the open text responses and one-on-one meeting notes.
+1. Clone [the repository](https://github.com/UtrechtUniversity/dataprivacysurvey){target="_blank"} from GitHub.
+2. Open [pseudonymise-data.R](https://github.com/UtrechtUniversity/dataprivacysurvey/blob/main/src/pseudonymise-data.R){target="_blank"},
+change the path to the ("raw") dataset and run the script. This will create a
+clean version of the dataset, stored in either the data/processed or data/pseud folder.
+3. Open the file [data-privacy-survey-report.Rmd](https://github.com/UtrechtUniversity/dataprivacysurvey/blob/main/src/data-privacy-survey-report.Rmd){target="_blank"}
+and knit the document. Note that the final part of the report cannot be
+reproduced, because there is no synthetic data available for the open text
+responses and one-on-one meeting notes.
# Results {#results}
## Types of research data {#types-of-research-data .tabset}
@@ -461,13 +476,21 @@ researchers mostly recognised these types of data as being personal data.
```{r datatypesuu, results = "asis"}
# Source: https://stackoverflow.com/questions/17717323/align-two-data-frames-next-to-each-other-with-knitr
-tables <- datatypes(dppsurvey)
-cat(c('',
- tables[[1]],
- ' ',
- tables[[2]],
- '
'),
- sep = '')
+tables_uu <- datatypes(dppsurvey)
+
+if(knitr::is_html_output()){
+ htmltables <- datatypes_html(tables_uu)
+ cat(c('',
+ htmltables[[1]],
+ ' ',
+ htmltables[[2]],
+ '
'),
+ sep = '')
+}
+
+if(knitr::is_latex_output()){
+ datatypes_latex(tables_uu)
+}
```
@@ -490,8 +513,20 @@ information, contact information, direct identifiers).
```{r datatypesscience, results = "asis"}
tables_science <- datatypes(dppsurvey_science)
-cat(c('', tables_science[[1]], ' ', tables_science[[2]], '
'),
+
+if(knitr::is_html_output()){
+ htmltables_science <- datatypes_html(tables_science)
+ cat(c('',
+ htmltables_science[[1]],
+ ' ',
+ htmltables_science[[2]],
+ '
'),
sep = '')
+}
+
+if(knitr::is_latex_output()){
+ datatypes_latex(tables_science)
+}
```
@@ -520,12 +555,21 @@ relative to the UU-wide responses.
```{r datatypesfsw, results = "asis"}
tables_fsw <- datatypes(dppsurvey_fsw)
-cat(c('',
- tables_fsw[[1]],
- ' ',
- tables_fsw[[2]],
- '
'),
- sep = '')
+
+if(knitr::is_html_output()){
+ htmltables_fsw <- datatypes_html(tables_fsw)
+ cat(c('',
+ htmltables_fsw[[1]],
+ ' ',
+ htmltables_fsw[[2]],
+ '
'),
+ sep = '')
+}
+
+if(knitr::is_latex_output()){
+ datatypes_latex(tables_fsw)
+}
+
```
@@ -550,12 +594,20 @@ information, contact information, direct identifiers).
```{r datatypeshum, results = "asis"}
tables_hum <- datatypes(dppsurvey_hum)
-cat(c('',
- tables_hum[[1]],
- ' ',
- tables_hum[[2]],
- '
'),
- sep = '')
+
+if(knitr::is_html_output()){
+ htmltables_hum <- datatypes_html(tables_hum)
+ cat(c('',
+ htmltables_hum[[1]],
+ ' ',
+ htmltables_hum[[2]],
+ '
'),
+ sep = '')
+}
+
+if(knitr::is_latex_output()){
+ datatypes_latex(tables_hum)
+}
```
@@ -584,12 +636,20 @@ deal with.
```{r datatypesvet, results = "asis"}
tables_vet <- datatypes(dppsurvey_vet)
-cat(c('',
- tables_vet[[1]],
- ' ',
- tables_vet[[2]],
- '
'),
- sep = '')
+
+if(knitr::is_html_output()){
+ htmltables_vet <- datatypes_html(tables_vet)
+ cat(c('',
+ htmltables_vet[[1]],
+ ' ',
+ htmltables_vet[[2]],
+ '
'),
+ sep = '')
+}
+
+if(knitr::is_latex_output()){
+ datatypes_latex(tables_vet)
+}
```
@@ -613,12 +673,20 @@ direct identifiers).
```{r datatypesleg, results = "asis"}
tables_leg <- datatypes(dppsurvey_leg)
-cat(c('',
- tables_leg[[1]],
- ' ',
- tables_leg[[2]],
- '
'),
- sep = '')
+
+if(knitr::is_html_output()){
+ htmltables_leg <- datatypes_html(tables_leg)
+ cat(c('',
+ htmltables_leg[[1]],
+ ' ',
+ htmltables_leg[[2]],
+ '
'),
+ sep = '')
+}
+
+if(knitr::is_latex_output()){
+ datatypes_latex(tables_leg)
+}
```
@@ -644,12 +712,20 @@ information, contact information, direct identifiers).
```{r datatypesgeo, results = "asis"}
tables_geo <- datatypes(dppsurvey_geo)
-cat(c('',
- tables_geo[[1]],
- ' ',
- tables_geo[[2]],
- '
'),
- sep = '')
+
+if(knitr::is_html_output()){
+ htmltables_geo <- datatypes_html(tables_geo)
+ cat(c('',
+ htmltables_geo[[1]],
+ ' ',
+ htmltables_geo[[2]],
+ '
'),
+ sep = '')
+}
+
+if(knitr::is_latex_output()){
+ datatypes_latex(tables_geo)
+}
```
@@ -829,7 +905,7 @@ project may not be reused again, reducing the scientific value of the data.
Finally, contact details of the Data Protection Officer (DPO) were not always
included either, whereas these always have to be provided when working with
-personal data (art. 13)
+personal data ([art. 13](https://gdpr-info.eu/art-13-gdpr/){target="_blank"})
```{r consentforms-uuwide}
consentforms(dppsurvey)
@@ -1382,7 +1458,7 @@ indicated by researchers from the Faculty of Geosciences:
bettersupportplot(dppsurvey_geo)
```
-## {-}
+### {-}
## Challenges and needs (open questions, meetings) {#challenges-needs-open .tabset}
@@ -1440,7 +1516,13 @@ totaltools <- counttools(surveydataset = opentext,
### UU-wide {.active}
```{r wordcloud-uuwide}
-createwordcloud(totalcodes)
+if(knitr::is_html_output()){
+ createwordcloud(totalcodes)
+}
+
+if(knitr::is_latex_output()){
+ include_graphics("../assets/wordcloud_uu.png")
+}
```
### Science
@@ -1504,7 +1586,7 @@ Below you can find the wordcloud for just the Faculty of Geosciences:

-## {-}
+### {-}
## Most common challenges and needs {#common-challenges-needs}
@@ -1512,7 +1594,7 @@ Below we highlight the most frequently mentioned challenges and needs expressed
by researchers in the survey (open questions) and one-one-one meetings, along
with the amount of times they were mentioned. A full
summary of the results and recommendations can be found in the
-[Recommendations report](data-privacy-survey-recommendations).
+[Recommendations report](https://utrechtuniversity.github.io/dataprivacysurvey/docs/data-privacy-survey-recommendations.html){target="_blank"}.
### Visibility and findability {#visibility-and-findability}
`r totalcodes$Meaning[totalcodes$Code == "visibility"]` (mentioned
@@ -1559,11 +1641,11 @@ delays in their project, instead of looking at how to concretely solve existing
issues in practice (mentioned `r totalcodes$Times_mentioned_total[totalcodes$Code == "strict"]`
times):
-Soms zijn we door deze regels heiliger dan de paus.
Actual getting-your-hands-dirty support: not the kind that tells you what to do, but also the kind that helps you by doing.
+"Soms zijn we door deze regels heiliger dan de paus."
"Actual getting-your-hands-dirty support: not the kind that tells you what to do, but also the kind that helps you by doing."
### Less bureaucracy {#less-bureaucracy}
Processes were often experienced as time-inefficient, and sometimes longer
-and more bureaucratic than necessary (mentioned
+and more bureaucratic than necessary (mentioned
`r totalcodes$Times_mentioned_total[totalcodes$Code == "quicker"]` times).
For example, the DPIA process was mentioned explicitly
`r totalcodes$Times_mentioned_total[totalcodes$Code == "dpia-process"]` times,
@@ -1573,7 +1655,7 @@ Privacy Scan, Data Management Plan, DPIA). Some
researchers argued that (part of) this burden should be relieved or
carried by support staff:
-Minder acties die gericht zijn op inhoudelijk trainen van WP en meer uit handen nemen van deze groep.
Sharing data costs a lot of time and is inefficient when you do not do it often.
+"Minder acties die gericht zijn op inhoudelijk trainen van WP en meer uit handen nemen van deze groep."
"Sharing data costs a lot of time and is inefficient when you do not do it often."
### Unclear processes and guidelines {#unclear-processes-and-guidelines}
Many (`r totalcodes$Times_mentioned_total[totalcodes$Code == "unclear-process"]`)
@@ -1583,8 +1665,7 @@ or more practical guidelines on this topic (mentioned
`r totalcodes$Times_mentioned_total[totalcodes$Code == "guidelines"]` times),
for example on:
-- What steps do researchers need to take? (
-`r totalcodes$Times_mentioned_total[totalcodes$Code == "assessment-steps"]` researchers)
+- What steps do researchers need to take? (`r totalcodes$Times_mentioned_total[totalcodes$Code == "assessment-steps"]` researchers)
- Who should researcher ask for help in which situation?
- `r totalcodes$Meaning[totalcodes$Code=="responsibility"]` (`r totalcodes$Times_mentioned_total[totalcodes$Code == "responsibility"]` researchers)
- `r totalcodes$Meaning[totalcodes$Code=="changing-rules"]` (`r totalcodes$Times_mentioned_total[totalcodes$Code == "changing-rules"]` researchers)
@@ -1616,8 +1697,8 @@ historical data (mentioned `r totalcodes$Times_mentioned_total[totalcodes$Code =
times), or video data (mentioned `r totalcodes$Times_mentioned_total[totalcodes$Code == "data-video"]`
times).
-De informatie is gewoon veel te generiek, er zouden templates
-moeten zijn per type onderzoek.
Veel templates en uitlegmodellen spreken over data, data packages en metadata, maar die woorden zijn niet ingebed in historisch onderzoek. Er ontstaat al snel verwarring over wat historici nu precies moeten met archiefmateriaal in het licht van privacy.
+"De informatie is gewoon veel te generiek, er zouden templates
+moeten zijn per type onderzoek."
"Veel templates en uitlegmodellen spreken over data, data packages en metadata, maar die woorden zijn niet ingebed in historisch onderzoek. Er ontstaat al snel verwarring over wat historici nu precies moeten met archiefmateriaal in het licht van privacy."
#### Frequently asked questions {#faq}
A selection of researchers used the space in the open questions and/or the meetings
@@ -1663,7 +1744,7 @@ and/or teachers (mentioned
- A course on how to handle personal data in research (mentioned
`r totalcodes$Times_mentioned_total[totalcodes$Code == "course"]` times)
-There is no one who tells at the start of your PhD how you should
+"There is no one who tells at the start of your PhD how you should
handle your data. [...] I think new PhD students should get a basic course on
data management and privacy."
@@ -1673,11 +1754,11 @@ Notably, there were also researchers who indicated **not** to have run into issu
`r totalcodes$Times_mentioned_total[totalcodes$Code == "positive-existing-support"]`
times). For example:
-The data manager and privacy officer of the faculty of humanities help a lot. This support is essential!
Tot nu toe heb ik niet veel problemen gehad. De institutional review board van onze afdeling kijkt altijd kritisch naar de onderzoeksvoorstellen, ook met name op omgaan met persoonsgegevens.
+"The data manager and privacy officer of the faculty of humanities help a lot. This support is essential!"
"Tot nu toe heb ik niet veel problemen gehad. De institutional review board van onze afdeling kijkt altijd kritisch naar de onderzoeksvoorstellen, ook met name op omgaan met persoonsgegevens."
# Summary {#summary}
A full summary of the results and recommendations can be found in the
-[Recommendations report](data-privacy-survey-recommendations.html).
+[Recommendations report](https://utrechtuniversity.github.io/dataprivacysurvey/docs/data-privacy-survey-recommendations.html){target="_blank"}.
# Discussion {#discussion}
In order to interpret the results described in this report correctly, there
@@ -1698,7 +1779,7 @@ where the open questions and one-on-one meetings are concerned.
# Technical information {#technical-information}
This report was created in
-R markdown,
+[R markdown](https://bookdown.org/yihui/rmarkdown/){target="_blank"},
and was last generated on `r as.character(Sys.Date())`. It was created in the
following local environment:
diff --git a/src/data-privacy-survey-report-v0.1.Rmd b/src/old/data-privacy-survey-report-v0.1.Rmd
similarity index 100%
rename from src/data-privacy-survey-report-v0.1.Rmd
rename to src/old/data-privacy-survey-report-v0.1.Rmd
diff --git a/src/plot-data.R b/src/plot-data.R
index 818c15d..886bbd0 100644
--- a/src/plot-data.R
+++ b/src/plot-data.R
@@ -329,8 +329,13 @@ datatypes <- function(data){
mutate(Frequency = paste0(Count, " (", Percentage, "%)")) %>%
select(-Count, -Percentage)
- t1 <- kable(datatypetable, format = "html", output = FALSE,
- col.names = gsub("_", " ", names(datatypetable),),
+ tables <- list(datatypetable, tablepersdata)
+ return(tables)
+}
+
+datatypes_html <- function(tables){
+ t1 <- kable(tables[[1]], format = "html", output = FALSE,
+ col.names = gsub("_", " ", names(tables[[1]]),),
caption = "Types of research data",
table.attr='cellpadding="3", cellspacing="3"') %>%
kable_styling(bootstrap_options = c("striped",
@@ -338,8 +343,8 @@ datatypes <- function(data){
"condensed",
"responsive"),
fixed_thead = T)
- t2 <- kable(tablepersdata, format = "html", output = FALSE,
- col.names = gsub("_", " ", names(tablepersdata)),
+ t2 <- kable(tables[[2]], format = "html", output = FALSE,
+ col.names = gsub("_", " ", names(tables[[2]])),
caption = "Types of personal data",
table.attr='cellpadding="3"') %>%
kable_styling(bootstrap_options = c("striped",
@@ -348,8 +353,21 @@ datatypes <- function(data){
"responsive"),
fixed_thead = T)
- tables <- list(t1, t2)
- return(tables)
+ htmltables <- list(t1, t2)
+ return(htmltables)
+}
+
+datatypes_latex <- function(tables){
+ kables(list(
+ kable(tables[[1]], "simple",
+ col.names = gsub("_", " ", names(tables[[1]]),),
+ caption = "Types of research data",
+ valign = 't'),
+ kable(tables[[2]], "simple",
+ col.names = gsub("_", " ", names(tables[[2]])),
+ caption = "Types of personal data",
+ valign = 't')
+ ))
}
@@ -393,8 +411,7 @@ dppsurvey %>%
plot.background=element_blank(),
strip.background = element_rect(color="white",
size=1.5),
- strip.text = element_text(family = "Verdana",
- size = 10,
+ strip.text = element_text(size = 10,
face = "bold"))
@@ -437,8 +454,7 @@ datatypesdepartments <- function(data, string,
plot.background=element_blank(),
strip.background = element_rect(color="white",
size=1.5),
- strip.text = element_text(family = "Verdana",
- size = 10,
+ strip.text = element_text(size = 10,
face = "bold"))
}
@@ -521,9 +537,9 @@ consentforms <- function(data, title1 = "Consent Forms",
## ---- dpia --------
dpiaplot <- function(data, title1 = "Experience with DPIAs",
- caption1 = "Have you ever conducted, or will you conduct, a Data Protection Impact Assessment (DPIA)?",
+ caption1 = "Have you ever conducted, or will you \nconduct, a Data Protection Impact Assessment (DPIA)?",
title2 = "Received support DPIAs",
- caption2 = "Did/Will you ask for help in conducting the DPIA?"){
+ caption2 = "Did/Will you ask for help \nin conducting the DPIA?"){
DPIA_experience_plot <-
data %>%
pivot_longer(cols = grep("^DPIA_experience_[0-9]+$", names(data), value=TRUE),
@@ -559,9 +575,9 @@ dpiaplot <- function(data, title1 = "Experience with DPIAs",
## ---- datasharing --------
datasharingplot <- function(data,
title1 = "External sharing",
- caption1 = "Do you / Will you share research data containing personal data outside of the UU?",
+ caption1 = "Do you / Will you share research data \ncontaining personal data outside of the UU?",
title2 = "Sharing measures",
- caption2 = "What actions do you take to transfer personal data securely outside of the UU?"){
+ caption2 = "What actions do you take to transfer \npersonal data securely outside of the UU?"){
external_sharing_plot <-
data %>%
@@ -698,8 +714,7 @@ dppsurvey %>%
plot.background=element_blank(),
strip.background = element_rect(color="white",
size=1.5),
- strip.text = element_text(#family = "Verdana",
- size = 10,
+ strip.text = element_text(size = 10,
face = "bold"))