-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy pathPS239T.tex
20737 lines (17018 loc) · 784 KB
/
PS239T.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
% Options for packages loaded elsewhere
\PassOptionsToPackage{unicode}{hyperref}
\PassOptionsToPackage{hyphens}{url}
\PassOptionsToPackage{dvipsnames,svgnames*,x11names*}{xcolor}
%
\documentclass[
]{book}
\usepackage{lmodern}
\usepackage{amssymb,amsmath}
\usepackage{ifxetex,ifluatex}
\ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex
\usepackage[T1]{fontenc}
\usepackage[utf8]{inputenc}
\usepackage{textcomp} % provide euro and other symbols
\else % if luatex or xetex
\usepackage{unicode-math}
\defaultfontfeatures{Scale=MatchLowercase}
\defaultfontfeatures[\rmfamily]{Ligatures=TeX,Scale=1}
\fi
% Use upquote if available, for straight quotes in verbatim environments
\IfFileExists{upquote.sty}{\usepackage{upquote}}{}
\IfFileExists{microtype.sty}{% use microtype if available
\usepackage[]{microtype}
\UseMicrotypeSet[protrusion]{basicmath} % disable protrusion for tt fonts
}{}
\makeatletter
\@ifundefined{KOMAClassName}{% if non-KOMA class
\IfFileExists{parskip.sty}{%
\usepackage{parskip}
}{% else
\setlength{\parindent}{0pt}
\setlength{\parskip}{6pt plus 2pt minus 1pt}}
}{% if KOMA class
\KOMAoptions{parskip=half}}
\makeatother
\usepackage{xcolor}
\IfFileExists{xurl.sty}{\usepackage{xurl}}{} % add URL line breaks if available
\IfFileExists{bookmark.sty}{\usepackage{bookmark}}{\usepackage{hyperref}}
\hypersetup{
pdftitle={Computational Thinking for Social Scientists},
pdfauthor={Jae Yeon Kim},
colorlinks=true,
linkcolor=Maroon,
filecolor=Maroon,
citecolor=Blue,
urlcolor=Blue,
pdfcreator={LaTeX via pandoc}}
\urlstyle{same} % disable monospaced font for URLs
\usepackage{color}
\usepackage{fancyvrb}
\newcommand{\VerbBar}{|}
\newcommand{\VERB}{\Verb[commandchars=\\\{\}]}
\DefineVerbatimEnvironment{Highlighting}{Verbatim}{commandchars=\\\{\}}
% Add ',fontsize=\small' for more characters per line
\usepackage{framed}
\definecolor{shadecolor}{RGB}{248,248,248}
\newenvironment{Shaded}{\begin{snugshade}}{\end{snugshade}}
\newcommand{\AlertTok}[1]{\textcolor[rgb]{0.94,0.16,0.16}{#1}}
\newcommand{\AnnotationTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textbf{\textit{#1}}}}
\newcommand{\AttributeTok}[1]{\textcolor[rgb]{0.77,0.63,0.00}{#1}}
\newcommand{\BaseNTok}[1]{\textcolor[rgb]{0.00,0.00,0.81}{#1}}
\newcommand{\BuiltInTok}[1]{#1}
\newcommand{\CharTok}[1]{\textcolor[rgb]{0.31,0.60,0.02}{#1}}
\newcommand{\CommentTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textit{#1}}}
\newcommand{\CommentVarTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textbf{\textit{#1}}}}
\newcommand{\ConstantTok}[1]{\textcolor[rgb]{0.00,0.00,0.00}{#1}}
\newcommand{\ControlFlowTok}[1]{\textcolor[rgb]{0.13,0.29,0.53}{\textbf{#1}}}
\newcommand{\DataTypeTok}[1]{\textcolor[rgb]{0.13,0.29,0.53}{#1}}
\newcommand{\DecValTok}[1]{\textcolor[rgb]{0.00,0.00,0.81}{#1}}
\newcommand{\DocumentationTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textbf{\textit{#1}}}}
\newcommand{\ErrorTok}[1]{\textcolor[rgb]{0.64,0.00,0.00}{\textbf{#1}}}
\newcommand{\ExtensionTok}[1]{#1}
\newcommand{\FloatTok}[1]{\textcolor[rgb]{0.00,0.00,0.81}{#1}}
\newcommand{\FunctionTok}[1]{\textcolor[rgb]{0.00,0.00,0.00}{#1}}
\newcommand{\ImportTok}[1]{#1}
\newcommand{\InformationTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textbf{\textit{#1}}}}
\newcommand{\KeywordTok}[1]{\textcolor[rgb]{0.13,0.29,0.53}{\textbf{#1}}}
\newcommand{\NormalTok}[1]{#1}
\newcommand{\OperatorTok}[1]{\textcolor[rgb]{0.81,0.36,0.00}{\textbf{#1}}}
\newcommand{\OtherTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{#1}}
\newcommand{\PreprocessorTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textit{#1}}}
\newcommand{\RegionMarkerTok}[1]{#1}
\newcommand{\SpecialCharTok}[1]{\textcolor[rgb]{0.00,0.00,0.00}{#1}}
\newcommand{\SpecialStringTok}[1]{\textcolor[rgb]{0.31,0.60,0.02}{#1}}
\newcommand{\StringTok}[1]{\textcolor[rgb]{0.31,0.60,0.02}{#1}}
\newcommand{\VariableTok}[1]{\textcolor[rgb]{0.00,0.00,0.00}{#1}}
\newcommand{\VerbatimStringTok}[1]{\textcolor[rgb]{0.31,0.60,0.02}{#1}}
\newcommand{\WarningTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textbf{\textit{#1}}}}
\usepackage{longtable,booktabs}
% Correct order of tables after \paragraph or \subparagraph
\usepackage{etoolbox}
\makeatletter
\patchcmd\longtable{\par}{\if@noskipsec\mbox{}\fi\par}{}{}
\makeatother
% Allow footnotes in longtable head/foot
\IfFileExists{footnotehyper.sty}{\usepackage{footnotehyper}}{\usepackage{footnote}}
\makesavenoteenv{longtable}
\usepackage{graphicx}
\makeatletter
\def\maxwidth{\ifdim\Gin@nat@width>\linewidth\linewidth\else\Gin@nat@width\fi}
\def\maxheight{\ifdim\Gin@nat@height>\textheight\textheight\else\Gin@nat@height\fi}
\makeatother
% Scale images if necessary, so that they will not overflow the page
% margins by default, and it is still possible to overwrite the defaults
% using explicit options in \includegraphics[width, height, ...]{}
\setkeys{Gin}{width=\maxwidth,height=\maxheight,keepaspectratio}
% Set default figure placement to htbp
\makeatletter
\def\fps@figure{htbp}
\makeatother
\setlength{\emergencystretch}{3em} % prevent overfull lines
\providecommand{\tightlist}{%
\setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}}
\setcounter{secnumdepth}{5}
\usepackage{booktabs}
\usepackage{amsthm}
\makeatletter
\def\thm@space@setup{%
\thm@preskip=8pt plus 2pt minus 4pt
\thm@postskip=\thm@preskip
}
\makeatother
\usepackage{booktabs}
\usepackage{longtable}
\usepackage{array}
\usepackage{multirow}
\usepackage{wrapfig}
\usepackage{float}
\usepackage{colortbl}
\usepackage{pdflscape}
\usepackage{tabu}
\usepackage{threeparttable}
\usepackage{threeparttablex}
\usepackage[normalem]{ulem}
\usepackage{makecell}
\usepackage{xcolor}
\usepackage{fontspec}
\usepackage{multicol}
\usepackage{hhline}
\usepackage{hyperref}
\usepackage[]{natbib}
\bibliographystyle{apalike}
\title{Computational Thinking for Social Scientists}
\author{\href{https://jaeyk.github.io/}{Jae Yeon Kim}}
\date{2022-01-30}
\begin{document}
\maketitle
{
\hypersetup{linkcolor=}
\setcounter{tocdepth}{1}
\tableofcontents
}
\hypertarget{hello-world}{%
\chapter{Hello World}\label{hello-world}}
\begin{Shaded}
\begin{Highlighting}[]
\KeywordTok{print}\NormalTok{(}\StringTok{"Hello, World!"}\NormalTok{)}
\end{Highlighting}
\end{Shaded}
\begin{verbatim}
## [1] "Hello, World!"
\end{verbatim}
\begin{quote}
Make simple things simple, and complex things possible. - \href{https://www.quora.com/What-is-the-story-behind-Alan-Kay-s-adage-Simple-things-should-be-simple-complex-things-should-be-possible}{Alan Kay}
\end{quote}
This is the website for \emph{Computational Thinking for Social Scientists}. This open-access book intends to help social scientists think computationally and develop proficiency with computational tools and techniques to research computational social science. Mastering these tools and techniques not only enables social scientists to collect, wrangle, analyze, and interpret data with less pain and more fun, but it also let them work on research projects that would previously seem impossible.
Horace Mann, the first great American advocate of public education, claimed that `'Education, then, beyond all other divides of human origin, is a great equalizer of conditions of men---the balance wheel of the social machinery.'' I believe in this potential of education; however, I also fully acknowledge that quality education is not accessible equally. Often, the gap between education and technology is greater among historically disadvantaged groups than advantaged groups. As an educator, this book is my small contribution to making this democratic vision of education possible, at least in the emerging field of computational social science.
That said, this book is not intended to be a comprehensive guide for computational social science or any particular programming language, computational tool, or technique. If you are interested in a general introduction to computational social science, I highly recommend \href{http://www.princeton.edu/~mjs3/}{Matthew Salganik}'s \href{https://www.bitbybitbook.com/}{Bit By Bit (2017)}. Salganik's book is comprehensive, accessible, and pedagogically friendly.
The book comprises two main subjects (fundamentals and applications) and eight main sessions.
\hypertarget{part-i-fundamentals}{%
\section{Part I Fundamentals}\label{part-i-fundamentals}}
\begin{enumerate}
\def\labelenumi{\arabic{enumi}.}
\item
\protect\hyperlink{motivation}{Why computational thinking}
\item
\protect\hyperlink{git_bash}{Best practices in data and code management using Git and Bash}
\item
\protect\hyperlink{tidy_data}{How to wrangle, model, and visualize data easier and faster}
\item
\protect\hyperlink{functional_programming}{How to use functional programming to automate repeated things}
\item
\protect\hyperlink{products}{How to develop data products (e.g., packages and shiny apps)}
\end{enumerate}
\hypertarget{part-ii-applications}{%
\section{Part II Applications}\label{part-ii-applications}}
\begin{enumerate}
\def\labelenumi{\arabic{enumi}.}
\setcounter{enumi}{5}
\item
\protect\hyperlink{semi_structured_data}{How to collect and parse semi-structured data at scale (e.g., using APIs and web scraping)}
\item
\protect\hyperlink{machine_learning}{How to analyze high-dimensional data (e.g., text) using machine learning}
\item
\protect\hyperlink{big_data}{How to access, query, and manage big data using SQL}
\end{enumerate}
The book teaches how to do all of these, mostly in \href{https://www.r-project.org/about.html}{\textbf{R}}, and sometimes in \href{https://www.gnu.org/software/bash/}{\textbf{bash}} and \href{https://www.python.org/about/}{\textbf{Python}}.
\begin{itemize}
\item
Why R? R is free, easy to learn (thanks to \href{https://www.tidyverse.org/}{\texttt{tidyverse}} and \href{https://rstudio.com/}{RStudio}), fast (thanks to \href{https://cran.r-project.org/web/packages/Rcpp/index.html}{\texttt{Rcpp}}), runs everywhere (Mac/Windows/Linux), open (16,000+ packages; counting only ones \href{https://cran.r-project.org/web/packages/}{available at CRAN}), and has a growing, large, and inclusive community (\href{https://twitter.com/search?q=\%23rstats\&src=typed_query}{\texttt{\#rstats}}).
\item
Why R + Python + bash?
\begin{quote}
\begin{quote}
``For R and Python, Python is first and foremost a programming language. And that has a lot of good features, but it tends to mean, that if you are going to do data science in Python, you have to first learn how to program in Python. Whereas I think you are going to get up and running faster with R, than with Python because there's just a bunch more stuff built in and you don't have to learn as many programming concepts. You can focus on being a great political scientist or whatever you do and learning enough R that you don't have to become an expert programmer as well to get stuff done.'' - Hadley Wickham
\end{quote}
\end{quote}
\begin{itemize}
\tightlist
\item
However, this feature of the R community also raises a challenge.
\end{itemize}
\begin{quote}
\begin{quote}
Compared to other programming languages, the R community tends to be more focused on results instead of processes. Knowledge of software engineering best practices is patchy: for instance, not enough R programmers use source code control or automated testing. Inconsistency is rife across contributed packages, even within base R. You are confronted with over 20 years of evolution every time you use R. R is not a particularly fast programming language, and poorly written R code can be terribly slow. R is also a profligate user of memory. - Hadley Wickham
\end{quote}
\end{quote}
\begin{itemize}
\item
RStudio, especially the tidyverse team, has made heroic efforts to overcome the limitations mentioned above. Readers will learn these recent advances in the R ecosystem and complement R with Python and Bash.
\item
Nevertheless, if you're serious about programming, I highly recommend learning Python. Learning Python also helps you fill gaps in software engineering that could be useful to be highly proficient in R.
\end{itemize}
\end{itemize}
\hypertarget{special-thanks}{%
\section{Special thanks}\label{special-thanks}}
This book is collected as much as it is authored. It is a remix version of \href{https://github.com/rochelleterman/PS239T}{PS239T}, a graduate-level computational methods course at UC Berkeley, originally developed by \href{http://rochelleterman.com/}{Rochelle Terman} (Assistant Professor of Political Science, Chicago) then revised by \href{http://rachelbernhard.com/}{Rachel Bernhard} (Assistant Professor of Political Science, UC Davis). I have taught \href{https://github.com/PS239T/spring_2021}{PS239T} as lead instructor in Spring 2019 and TA in Spring 2018 and taught it with \href{https://nicholaskuipers.com/}{Nick Kuipers} (Postdoc, Stanford) in Spring 2020. Other teaching materials draw from the workshops I have created for \href{https://dlab.berkeley.edu/}{D-Lab} and \href{https://data.berkeley.edu/research/discovery-program-home}{Data Science Discovery Program} at UC Berkeley and \href{https://sicss.io/2021/howard-mathematica/}{the Summer Institute in Computational Social Science hosted by Howard University and Mathematica}. I also have cited all the other references whenever I am aware of related books, articles, slides, blog posts, or YouTube video clips.
\hypertarget{suggestions-questions-or-comments}{%
\section{Suggestions, questions, or comments}\label{suggestions-questions-or-comments}}
Please feel free to \href{https://github.com/jaeyk/PS239T/issues}{create issues}; if you find typos, errors, missing citations, please report them via the GitHub repository associated with this book.
\hypertarget{license}{%
\section{License}\label{license}}
\includegraphics{https://licensebuttons.net/l/by/4.0/88x31.png} This work is licensed under a \href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons Attribution 4.0 International License}.
\hypertarget{motivation}{%
\chapter{Computational thinking}\label{motivation}}
\hypertarget{why-computational-thinking}{%
\section{Why computational thinking}\label{why-computational-thinking}}
If social scientists want to know how to work smart and not just hard, they need to take full advantage of the power of modern programming languages, and that power is \textbf{automation}.
Let's think about the following two cases (these examples come from \href{https://dlab.berkeley.edu/blog/why-teaching-social-scientists-how-code-professional-important}{the column} I contributed to the D-Lab website)
\begin{itemize}
\tightlist
\item
Case 1: Suppose a social scientist needs to collect data on civic organizations in the United States from websites, Internal Revenue Service reports, and social media posts. As the number of these organizations is large, the researcher could not collect a large volume of data from diverse sources, so they would hire undergraduates and distribute tasks. This is a typical data collection plan in social science research, and it is labor-intensive. Automation is not part of the game plan. Yet, it is critical for so many reasons. Because the process is costly, no one is likely to replicate or update the data collection effort.
\end{itemize}
Case 1 illustrates that it is challenging to be reproducible and scalable without efficient data analytics pipelines.
\begin{itemize}
\tightlist
\item
Case 2: An alternative is to write computer programs that collect such data automatically, parse them, and store them in interconnected databases. Additionally, someone may need to maintain and validate the quality of the data infrastructure. Nevertheless, this approach lowers the cost of the data collection process, thereby substantially increasing the \textbf{reproducibility} and \textbf{scalability}. Furthermore, the researcher can document their code and publicly share it using their GitHub repository or even gather some of the functions they used and distribute them as open-source libraries.
\end{itemize}
Case 2 illustrates the power of automation and how it benefits the academic community and the general public.
To reap these benefits, one needs to learn how to program. In the era of data science, programming is as valuable a skill as writing in social science research because the extent to which a researcher can automate the research process can determine its efficiency, reproducibility, and scalability.
Below is an insightful quote from Hadley Wickham, who won the 2019 COPSS Presidents' Award for his outstanding contribution to statistics via developing tidyverse R packages that have transformed how people wrangle, analyze, and visualize data. Even if you don't do big data or machine learning, you can still benefit from learning how to program and applying the programming skills to data analysis because it's a ``force multiplier.''
\begin{quote}
Every modern statistical and data analysis problem needs code to solve it. You shouldn't learn just the basics of programming, spend some time gaining mastery. Improving your programming skills pays off because code is a \textbf{force multiplier}: once you've solved a problem once, code allows you to solve it much faster in the future. As your programming skill increases, the generality of your solutions improves: you solve not just the precise problem you encountered, but a wider class of related problems (in this way programming skill is very much like mathematical skill). Finally, sharing your code with others allows them to benefit from your experience. - \href{https://imstat.org/2014/12/16/hadley-wickham-impact-the-world-by-being-useful/}{Hadley Wickham}
\end{quote}
However, I also do not claim that social scientists should learn programming like software engineers learn the subject. For social scientists, programming is a means, not an end. I encourage readers to think about what aspects of the social science research process can be automated. Again, programming is just a way to teach a machine to perform these tasks and get them done.
\begin{figure}
\centering
\includegraphics{https://bam.files.bbci.co.uk/bam/live/content/znmb87h/large}
\caption{From BBC Bitesize}
\end{figure}
Teaching a computer to perform a particular task requires computational thinking: ``formulating a problem and expressing its solution in a way that a computer---human or machine---can effectively carry out'' (defined by \href{http://www.cs.cmu.edu/afs/cs/usr/wing/www/publications/Wing06.pdf}{Jeannette M. Wing}). Specifically, this means readers need to get familiar with how computers think about data and handle them.
\hypertarget{how-to-teach-and-learn-computational-thinking}{%
\section{How to teach and learn computational thinking}\label{how-to-teach-and-learn-computational-thinking}}
This book teaches how you learn this art in incremental steps.
\begin{itemize}
\item
From graphic user interface to command-line interface (ch 3)
\item
From short programs to long programs (ch 4-5)
\item
The ultimate goal is to solve complex problems at scale using computation (ch 6-7)
\end{itemize}
I will cover programming concepts, but I will emphasize practicing them more. As the following John Chamber's quote indicates, this approach helps you learn computational thinking and apply it in particular contexts by coding and solving problems.
\begin{quote}
``{[}W{]}e wanted users to be able to begin in an interactive environment, where they did not consciously think of themselves as programming. Then as their needs became clearer and their sophistication increased, they should be able to slide gradually into programming, when the language and system aspects would become more important.'' - \emph{Stages in the Evolution of S} by John Chambers (S is the progenitor of R)
\end{quote}
Here are also some valuable reminders.
\begin{itemize}
\item
Beginners! Learning programming is a long game. The essential component of learning (for almost any subject) is consistency. Never stop writing code, even though your current code may fall far short of perfection.
\includegraphics{misc/wickham.png}
\item
Intermediate programmers! Try to empower, not intimidate, newbies. The most important rule in the computational social science community (at least, in my opinion) is being nice. Please read David Robinson's \href{http://varianceexplained.org/programming/bad-code/}{``A Million Lines of Bad Code''} for more insights.
\end{itemize}
\includegraphics{http://imgs.xkcd.com/comics/code_quality.png}
Finally, have fun. I've talked about how learning programming pays off. But I've taught long enough to know that this will not convince people to learn to program, especially those who've had negative experiences learning STEM.
Instead, I will try to make the materials as accessible as possible by emphasizing the following two ideas in teaching: showing the \textbf{BIG PICTURE} and walking through the \textbf{WORKFLOW.} With \href{https://media.illinois.edu/margaret-yee-man-ng}{Margaret Ng} (Assistant Professor of Journalism, UIUC), I wrote about why these two concepts are pedagogically important for teaching computational social science for all. \href{https://osf.io/preprints/socarxiv/pf7n6/?fbclid=IwAR2ZI0yw_pehS0mxAmeUBOGpzIhiO2LMUPGBzBLTLNo4C2HrJSoH9uZhgTY}{The article} is forthcoming in \emph{PS: Political Science and Politics.} If you are interested in my full argument, please read the article.
Here is a quick summary of why I think they matter for social science students' inclusive teaching of programming.
Showing the big picture: Every time you teach a new skill or technique, remind students what the input and output data type is. Students from either Excel, SPSS, or Stata backgrounds are not used to thinking about data structure when working on data. So, providing these guideposts is crucial to help them avoid making an obvious mistake (e.g., providing a character vector when a numeric vector is needed for the input data) and seeing the connection between different skills (e.g., using API and web scraping).\\
Walking through the workflow: Break down the steps involved in moving from the input to the output data. This way helps students feel less overwhelmed by learning the complex steps required to solve a particular task. It also helps students learn how to formulate a workflow when they encourage the same problem in a different context. Although the exact context is not identical, they can find patterns across them. Finally, teaching the workflow means breaking down these steps as well as putting them together, ideally using functions. Acquiring this skill is critical for students to advance from beginners to intermediate programmers who can write readable and reusable code.
\hypertarget{git_bash}{%
\chapter{Managing data and code}\label{git_bash}}
\hypertarget{the-command-line}{%
\section{The Command Line}\label{the-command-line}}
\hypertarget{the-big-picture}{%
\subsection{The Big Picture}\label{the-big-picture}}
As William Shotts the author of \emph{\href{http://linuxcommand.org/tlcl.php}{The Linux Command Line}} put it:
\begin{quote}
graphical user interfaces make easy tasks easy, while command-line interfaces make difficult tasks possible.
\end{quote}
\hypertarget{why-bother-using-the-command-line}{%
\subsection{Why bother using the command line?}\label{why-bother-using-the-command-line}}
Suppose that we want to create a plain text file that contains the word ``test.'' If we want to do this in the command line, you need to know the following commands.
\begin{enumerate}
\def\labelenumi{\arabic{enumi}.}
\tightlist
\item
\texttt{echo}: ``Write arguments to the standard output'' This is equivalent to using a text editor (e.g., nano, vim, emacs) and writing something.
\item
\texttt{\textgreater{}\ test} Save the expression in a file named test.
\end{enumerate}
We can put these commands together like the following:
\begin{Shaded}
\begin{Highlighting}[]
\BuiltInTok{echo} \StringTok{"sth"} \OperatorTok{\textgreater{}}\NormalTok{ test }
\end{Highlighting}
\end{Shaded}
Don't worry if you are worried about memorizing these and more commands. Memorization is a far less important aspect of learning programming. In general, if you don't know what a command does, just type \texttt{\textless{}command\ name\textgreater{}\ -\/-help.} You can do \texttt{man\ \textless{}command\ name\textgreater{}} to obtain further information. Here, \texttt{man} stands for manual. If you need more user-friendly information, please consider using \href{https://tldr.sh/}{\texttt{tldr}}.
Let's make this simple case complex by scaling up. Suppose we want to make 100 duplicates of the \texttt{test} file. Below is the one-line code that performs the task!
\begin{Shaded}
\begin{Highlighting}[]
\KeywordTok{for} \ExtensionTok{i}\NormalTok{ in }\DataTypeTok{\{1..100\}}\KeywordTok{;} \KeywordTok{do} \FunctionTok{cp}\NormalTok{ test }\StringTok{"test\_}\VariableTok{$i}\StringTok{"}\KeywordTok{;} \KeywordTok{done}
\end{Highlighting}
\end{Shaded}
Let me break down the seemingly complex workflow.
1. \texttt{for\ i\ in\ \{1..100\}.} This is for loop. The numbers 1..100 inside the curly braces \texttt{\{\}} indicates the range of integers from 1 to 100. In R, this is equivalent to for (i in 1:100) \{\}\\
2. \texttt{;} is used to use multiple commands without making line breaks. ; works in the same way in R.
3. \texttt{\$var} returns the value associated with a variable. Type \texttt{name=\textless{}Your\ name\textgreater{}}. Then, type \texttt{echo\ \$name.} You should see your name printed. Variable assignment is one of the most basic things you'll learn in any programming. In R, we do this by using -\textgreater{}
If you have zero experience in programming, I might have provided too many concepts too early, like variable assignment and for loop. However, you don't need to worry about them at this point. We will cover them in the next chapter.
I will give you one more example to illustrate how powerful the command line is. Suppose we want to find which file contains the character ``COVID.'' This is equivalent to finding a needle in a haystack. It's a daunting task for humans, but not for computers. Commands are verbs. So, to express this problem in a language that computers could understand, let's first find what command we should use. Often, a simple Google or \href{https://stackoverflow.com/}{Stack Overflow} search leads to an answer.
In this case, \texttt{grep} is the answer (there's also grep in R). This command finds PATTERNS in each FIEL. What follows - are options (called flags): \texttt{r} (recursive), \texttt{n} (line number), \texttt{w} (match only whole words), \texttt{e} (use patterns for matching). \texttt{rnw} are for output control and \texttt{e} is for pattern selection.
So, to perform the task above, you just need one-line code: \texttt{grep\ -r\ -n\ -w\ -e\ "COVID\textquotesingle{}\textquotesingle{}}
\textbf{Quick reminders}
- \texttt{grep}: command
- \texttt{-rnw\ -e}: flags
- \texttt{COVID}: argument (usually file or file paths)
Let's remove (=\texttt{rm}) all the duplicate files and the original file. \texttt{*} (any number of characters) is a wildcard (if you want to identify a single number of characters, use \texttt{?}). It finds every file whose name starts with \texttt{test\_}.
\begin{Shaded}
\begin{Highlighting}[]
\FunctionTok{rm}\NormalTok{ test\_* test }
\end{Highlighting}
\end{Shaded}
Enough with demonstrations. What is this black magic? Can you do the same thing using a graphical interface? Which method is more efficient? I hope that my demonstrations give you enough sense of why learning the command line could be incredibly useful. In my experience, mastering the command line helps automate your research process from end to end. For instance, you don't need to write files from a website using your web browser. Instead, you can run the \texttt{wget} command in the terminal. Better yet, you don't even need to run the command for the second time. You can write a Shell script (\texttt{*.sh}) that automates downloading, moving, and sorting multiple files.
\hypertarget{unix-shell}{%
\subsection{UNIX Shell}\label{unix-shell}}
The other thing you might have noticed is that there are many overlaps between the commands and base R functions (R functions that can be used without installing additional packages). This connection is not coincident. UNIX preceded and influenced many programming languages, including R.
The following materials on UNIX and Shell are adapted from {[}the software carpentry{]}(\url{https://bids.GitHub.io/2015-06-04-berkeley/shell/00-intro.html}.
\hypertarget{unix}{%
\subsubsection{Unix}\label{unix}}
UNIX is an \textbf{operating system + a set of tools (utilities)}. It was developed by AT \& T employees at Bell Labs (1969-1971). From Mac OS X to Linux, many of the current operation systems are some versions of UNIX. Command-line INTERFACE is a way to communicate with your OS by typing, not pointing, and clicking.
For this reason, if you're using Max OS, then you don't need to do anything else to experience UNIX. You're already all set.
If you're using Windows, you need to install either GitBash (a good option if you only use Bash for Git and GitHub) or Windows Subsystem (highly recommended if your use case goes beyond Git and GitHub). For more information, see \href{https://GitHub.com/PS239T/spring_2021/blob/main/B_Install.md}{this installation guideline} from the course repo. If you're a Windows user and don't use Windows 10, I recommend installing \href{https://www.virtualbox.org/}{VirtualBox}.
UNIX is old, but it is still mainstream, and it will be. Moreover, \href{https://en.wikipedia.org/wiki/Unix_philosophy}{the UNIX philosophy} (``Do One Thing And Do It Well'')---minimalist, modular software development---is highly and widely influential.
\begin{figure}
\centering
\includegraphics{https://upload.wikimedia.org/wikipedia/commons/1/1b/Ken_Thompson_and_Dennis_Ritchie--1973.jpg}
\caption{Ken Thompson and Dennis Ritchie, key proponents of the Unix philosophy}
\end{figure}
\hypertarget{kernel}{%
\subsubsection{Kernel}\label{kernel}}
The kernel of UNIX is the hub of the operating system: it allocates time and memory to programs. It handles the \href{http://users.ox.ac.uk/~martinw/unix/chap3.html}{filestore} (e.g., files and directories) and communications in response to system calls.
\hypertarget{shell}{%
\subsubsection{Shell}\label{shell}}
The shell is an interactive program that provides an interface between the user and the kernel. The shell interprets commands entered by the user or supplied by a Shell script and passes them to the kernel for execution.
\hypertarget{human-computer-interfaces}{%
\subsubsection{Human-Computer interfaces}\label{human-computer-interfaces}}
At a high level, computers do four things:
\begin{itemize}
\tightlist
\item
run programs
\item
store data
\item
communicate with each other
\item
interact with us (through either CLI or GUI)
\end{itemize}
\hypertarget{the-command-line-1}{%
\subsubsection{The Command Line}\label{the-command-line-1}}
This kind of interface is called a \textbf{command-line interface}, or CLI,
to distinguish it from the \textbf{graphical user interface}, or GUI, that most people now use.
The heart of a CLI is a \textbf{read-evaluate-print loop}, or REPL: when the user types a command and then presses the enter (or return) key, the computer reads it, executes it, and prints its output. The user then types another command, and so on until the user logs off.
If you're using RStudio, you can use terminal inside RStudio (next to the ``Console''). (For instance, type Alt + Shift + M)
\hypertarget{the-shell}{%
\subsubsection{The Shell}\label{the-shell}}
This description makes it sound as though the user sends commands directly to the computer and sends the output directly to the user. In fact, there is usually a program in between called a \textbf{command shell}.
\begin{figure}
\centering
\includegraphics{https://miro.medium.com/max/1032/1*GuB5q_bWOSZa-8sDg1lEDA.png}
\caption{Source: Prashant Lakhera}
\end{figure}
What the user types go into the shell; it figures out what commands to run and orders the computer to execute them.
Note, the shell is called \emph{the shell}: it encloses the operating system to hide some of its complexity and make it simpler to interact with.
A shell is a program like any other. What's special about it is that its job is to run other programs rather than do calculations itself. The commands are themselves programs: when they terminate, the shell gives the user another prompt (\$ on our systems).
\hypertarget{bash}{%
\subsubsection{Bash}\label{bash}}
The most popular Unix shell is \textbf{Bash}, the Bourne Again Shell (so-called because it's derived from a shell written by Stephen Bourne --- this is what passes for wit among programmers). Bash is the default shell on most modern implementations of \textbf{Unix} and in most packages that provide Unix-like tools for Windows.
\hypertarget{why-shell}{%
\subsubsection{Why Shell?}\label{why-shell}}
Using Bash or any other shell sometimes feels more like programming than like using a mouse. Commands are terse (often only a couple of characters long), their names are frequently cryptic, and their output is lines of text rather than something visual like a graph.
On the other hand, the shell allows us to combine existing tools in powerful ways with only a few keystrokes and set up pipelines to handle large volumes of data automatically.
In addition, the command line is often the easiest way to interact with remote machines (explains why we learn Bash before learning Git and GitHub). If you work in a team and your team manages data in a remote server, you will likely need to get access the server via something like \texttt{ssh} (I will explain this when I explain \texttt{git}) and access a SQL database (this is the subject of the final chapter).
\hypertarget{our-first-command}{%
\subsubsection{Our first command}\label{our-first-command}}
The part of the operating system responsible for managing files and directories is called the \textbf{file system}. It organizes our data into files, which hold information, and directories (also called ``folders''), which hold files or other directories.
Several commands are frequently used to create, inspect, rename, and delete files and directories. To start exploring them, let's open a shell window:
\begin{Shaded}
\begin{Highlighting}[]
\ExtensionTok{jae@jae{-}X705UDR}\NormalTok{:\textasciitilde{}$ }
\end{Highlighting}
\end{Shaded}
Let's demystify the output above. There's nothing complicated.
\begin{itemize}
\tightlist
\item
jae: a specific user name
\item
jae-X705UDR: your computer/server name
\item
\texttt{\textasciitilde{}}: current directory (\texttt{\textasciitilde{}} = home)
\item
\texttt{\$}: a \textbf{prompt}, which shows us that the shell is waiting for input; your shell may show something more elaborate.
\end{itemize}
Type the command \texttt{whoami,} then press the Enter key (sometimes marked Return) to send the command to the shell.
The command's output is the ID of the current user, i.e., it shows us who the shell thinks we are:
\begin{Shaded}
\begin{Highlighting}[]
\NormalTok{$ }\FunctionTok{whoami}
\CommentTok{\# Should be your user name }
\ExtensionTok{jae}
\end{Highlighting}
\end{Shaded}
More specifically, when we type \texttt{whoami} the shell, the following sequence of events occurs behind the screen.
\begin{enumerate}
\def\labelenumi{\arabic{enumi}.}
\tightlist
\item
Finds a program called \texttt{whoami},
\item
Runs that program,
\item
Displays that program's output, then
\item
Displays a new prompt to tell us that it's ready for more commands.
\end{enumerate}
\hypertarget{communicating-to-other-systems}{%
\subsubsection{Communicating to other systems}\label{communicating-to-other-systems}}
In the next unit, we'll focus on the structure of our own operating systems. But our operating systems rarely work in isolation; we often rely on the Internet to communicate with others! You can visualize this sort of communication within your own shell by asking your computer to \texttt{ping} (based on the old term for submarine sonar) an IP address provided by Google (8.8.8.8); in effect, this will test whether your Internet is working.
\begin{Shaded}
\begin{Highlighting}[]
\NormalTok{$ }\FunctionTok{ping}\NormalTok{ 8.8.8.8}
\end{Highlighting}
\end{Shaded}
Note: Windows users may have to try a slightly different alternative:
\begin{Shaded}
\begin{Highlighting}[]
\NormalTok{$ }\FunctionTok{ping}\NormalTok{ {-}t 8.8.8.8}
\end{Highlighting}
\end{Shaded}
(Thanks \href{http://www.paulthissen.org/}{Paul Thissen} for the suggestion!)
\hypertarget{file-system-organization}{%
\subsubsection{File system organization}\label{file-system-organization}}
Next, let's find out where we are by running a \texttt{pwd} command (\textbf{print working directory}).
At any moment, our \textbf{current working directory} is our current default directory, i.e., the directory that the computer assumes we want to run commands in unless we explicitly specify something else.
Here, the computer's response is \texttt{/home/jae,} which is the \textbf{home directory}:
\begin{Shaded}
\begin{Highlighting}[]
\NormalTok{$ }\BuiltInTok{pwd}
\ExtensionTok{/home/jae}
\end{Highlighting}
\end{Shaded}
\textbf{Additional tips}
You can also download files to your computer in the terminal.
\begin{enumerate}
\def\labelenumi{\arabic{enumi}.}
\tightlist
\item
Install wget utility
\end{enumerate}
\begin{Shaded}
\begin{Highlighting}[]
\CommentTok{\# sudo = super user }
\FunctionTok{sudo}\NormalTok{ apt{-}get install wget }
\end{Highlighting}
\end{Shaded}
\begin{enumerate}
\def\labelenumi{\arabic{enumi}.}
\setcounter{enumi}{1}
\tightlist
\item
Download target files
\end{enumerate}
\begin{Shaded}
\begin{Highlighting}[]
\FunctionTok{wget}\NormalTok{ https://download1.rstudio.org/desktop/bionic/amd64/rstudio{-}1.4.1103{-}amd64.deb}
\end{Highlighting}
\end{Shaded}
\includegraphics{misc/wget.png}
\begin{quote}
\hypertarget{home-directory}{%
\subsubsection{Home Directory}\label{home-directory}}
The home directory path will look different on different operating systems. For example, on Linux, it will look like \texttt{/home/jae,} and on Windows, it will be similar to \texttt{C:\textbackslash{}Documents\ and\ Settings\textbackslash{}jae.} Note that it may look slightly different for different versions of Windows.
\end{quote}
\begin{quote}
\hypertarget{whoami}{%
\subsubsection{whoami}\label{whoami}}
If the command to find out who we are is \texttt{whoami,} the command to find out where we are ought to be called \texttt{whereami,} so why is it \texttt{pwd} instead? The usual answer is that in the early 1970s, when Unix was first being developed, every keystroke counted: the devices of the day were slow, and backspacing on a teletype was so painful that cutting the number of keystrokes to cut the number of typing mistakes was a win for usability. The reality is that commands were added to Unix one by one, without any master plan, by people who were immersed in its jargon.
The good news: because these basic commands were so integral to the development of early Unix, they have stuck around and appear (in some form) in almost all programming languages.
\end{quote}
\begin{quote}
If you're working on a Mac, the file structure will look similar, but not identical. The following image shows a file system graph for the typical Mac.
\end{quote}
\begin{figure}
\centering
\includegraphics{https://swcarpentry.GitHub.io/shell-novice/fig/home-directories.svg}
\caption{File Directory}
\end{figure}
We know that our current working directory \texttt{/home/jae} is stored inside \texttt{/home} because \texttt{/home} is the first part of its name. Similarly, we know that \texttt{/home} is stored inside the root directory \texttt{/} because its name begins with \texttt{/}.
\hypertarget{listing}{%
\subsubsection{Listing}\label{listing}}
Let's see what's in your home directory by running \texttt{ls} (**list files and directories):
\begin{Shaded}
\begin{Highlighting}[]
\NormalTok{$ }\FunctionTok{ls}
\ExtensionTok{Applications}\NormalTok{ Dropbox Pictures}
\ExtensionTok{Creative}\NormalTok{ Cloud Files Google Drive Public}
\ExtensionTok{Desktop}\NormalTok{ Library Untitled.ipynb}
\ExtensionTok{Documents}\NormalTok{ Movies anaconda}
\ExtensionTok{Downloads}\NormalTok{ Music file.txt}
\end{Highlighting}
\end{Shaded}
\texttt{ls} prints the names of the files and directories in the current directory in alphabetical order, arranged neatly into columns.
We can make \texttt{ls} more useful by adding flags. For instance, you can make your computer show only directories in the file system using the following command. Here \texttt{-F} flag classifies files based on some types. For example, \texttt{/} indicates directories.
\begin{Shaded}
\begin{Highlighting}[]
\FunctionTok{ls}\NormalTok{ {-}F /}
\end{Highlighting}
\end{Shaded}
The leading \texttt{/} tells the computer to follow the path from the file system's root, so it always refers to exactly one directory, no matter where we are when we run the command.
If you want to see only directories in the current working directory, you can do the following. (Remember \texttt{\^{}}? This wildcard identifies a single number of characters. In this case, `d'.)
\begin{Shaded}
\begin{Highlighting}[]
\FunctionTok{ls}\NormalTok{ {-}l }\KeywordTok{|} \FunctionTok{grep} \StringTok{"\^{}d"}
\end{Highlighting}
\end{Shaded}
What if we want to change our current working directory? Before we do this, \texttt{pwd} shows us that we're in \texttt{/home/jae,} and \texttt{ls} without any arguments shows us that directory's contents:
\begin{Shaded}
\begin{Highlighting}[]
\NormalTok{$ }\BuiltInTok{pwd}
\ExtensionTok{/home/jae}
\NormalTok{$ }\FunctionTok{ls}
\ExtensionTok{Applications}\NormalTok{ Dropbox Pictures}
\ExtensionTok{Creative}\NormalTok{ Cloud Files Google Drive Public}
\ExtensionTok{Desktop}\NormalTok{ Library Untitled.ipynb}
\ExtensionTok{Documents}\NormalTok{ Movies anaconda}
\ExtensionTok{Downloads}\NormalTok{ Music file.txt}
\end{Highlighting}
\end{Shaded}
Use relative paths (e.g., \texttt{../spring\_2021/references.md}) whenever it's possible so that your code is not dependable on how your system is configured.
\textbf{Additional tips}
How can I find pdf files in \texttt{Downloads} using the terminal? Remember \texttt{*} wildcard?
\begin{Shaded}
\begin{Highlighting}[]
\BuiltInTok{cd}\NormalTok{ Downloads/ }
\FunctionTok{find}\NormalTok{ *.pdf}
\end{Highlighting}
\end{Shaded}
Also, note that you don't need to type every character. Type the first few characters, then press TAB (autocomplete). This is called \textbf{tab-completion}, and we will see it in R as we go on.
\hypertarget{moving-around}{%
\subsubsection{Moving around}\label{moving-around}}
We can use \texttt{cd} (\textbf{change directory}) followed by a directory name to change our working directory.
\begin{Shaded}
\begin{Highlighting}[]
\NormalTok{$ }\BuiltInTok{cd}\NormalTok{ Desktop}
\end{Highlighting}
\end{Shaded}
\texttt{cd} doesn't print anything, but if we run \texttt{pwd} after it, we can see that we are now in \texttt{/home/jae/Desktop.}
If we run \texttt{ls} without arguments now, it lists the contents of \texttt{/home/jae/Desktop,} because that's where we now are:
\begin{Shaded}
\begin{Highlighting}[]
\NormalTok{$ }\BuiltInTok{pwd}
\ExtensionTok{/home/jae/Desktop}
\end{Highlighting}
\end{Shaded}
We now know how to go down the directory tree: how do we go up? We could use an absolute path:
\begin{Shaded}
\begin{Highlighting}[]
\NormalTok{$ }\BuiltInTok{cd}\NormalTok{ /home/jae/}
\end{Highlighting}
\end{Shaded}
but it's almost always simpler to use \texttt{cd\ ..} to go up one level:
\begin{Shaded}
\begin{Highlighting}[]
\NormalTok{$ }\BuiltInTok{pwd}
\ExtensionTok{/home/jae/Desktop}
\NormalTok{$ }\BuiltInTok{cd}\NormalTok{ ..}
\end{Highlighting}
\end{Shaded}
\texttt{..} is a special directory name meaning ``the directory containing this one,'' or more succinctly, the \textbf{parent} of the current directory. Sure enough, if we run \texttt{pwd} after running \texttt{cd\ ..}, we're back in \texttt{/home/jae/}:
\begin{Shaded}
\begin{Highlighting}[]
\NormalTok{$ }\BuiltInTok{pwd}
\ExtensionTok{/home/jae/}
\end{Highlighting}
\end{Shaded}
The special directory \texttt{..} doesn't usually show up when we run \texttt{ls}. If we want to display it, we can give \texttt{ls} the `-a' flag:
\begin{Shaded}
\begin{Highlighting}[]
\NormalTok{$ }\FunctionTok{ls}\NormalTok{ {-}a}
\BuiltInTok{.} \ExtensionTok{.localized}\NormalTok{ Shared}
\ExtensionTok{..}\NormalTok{ Guest rachel}
\end{Highlighting}
\end{Shaded}
\texttt{-a\textquotesingle{}\ stands\ for\ "show\ all";\ it\ forces}ls\texttt{to\ show\ us\ file\ and\ directory\ names\ that\ begin\ with}.\texttt{,\ such\ as}..`.
\begin{quote}
\hypertarget{hidden-files-for-your-own-protection}{%
\subsubsection{Hidden Files: For Your Own Protection}\label{hidden-files-for-your-own-protection}}
As you can see, many other items just appeared when we enter \texttt{ls\ -a\textquotesingle{}.\ These\ files\ and\ directories\ begin\ with}.` followed by a name. Usually, files and directories hold important programmatic information. They are kept hidden so that users don't accidentally delete or edit them without knowing what they're doing.
\end{quote}
As you can see, it also displays another special directory that's just called \texttt{.}, which means ``the current working directory''. It may seem redundant to have a name for it, but we'll see some uses for it soon.
\textbf{Additional tips}
The above navigating exercises help us know about \texttt{cd} command, but not very exciting. So let's do something more concrete and potentially useful. Let's say you downloaded a file using your web browser and locate that file. How could you do that?
Your first step should be learning more about the \texttt{ls} command. You can do that by Googling or typing \texttt{ls\ -\/-help.} By looking at the documentation, you can recognize that you need to add \texttt{-t} (sort by time). Then, what's \texttt{\textbar{}}? It's called pipe, and it chains commands. For instance, if \texttt{\textless{}command\ 1\textgreater{}\ \textbar{}\ \textless{}command\ 2\textgreater{}}, then command1's output will be command2's input. \texttt{head} list the first ten lines of a file. \texttt{-n1} flag makes it show only the first line of the output (n1).
\begin{Shaded}
\begin{Highlighting}[]
\CommentTok{\# Don\textquotesingle{}t forget to use TAB completion}
\BuiltInTok{cd}\NormalTok{ Downloads/ }
\FunctionTok{ls}\NormalTok{ {-}t }\KeywordTok{|} \FunctionTok{head}\NormalTok{ {-}n1}
\end{Highlighting}
\end{Shaded}
Yeah! We can do more cool things. For example, how can you find the most recently downloaded PDF file? You can do this by combining the two neat tricks you learned earlier.
\begin{Shaded}
\begin{Highlighting}[]
\FunctionTok{ls}\NormalTok{ {-}t }\KeywordTok{|} \FunctionTok{find}\NormalTok{ *.pdf }\KeywordTok{|} \FunctionTok{head}\NormalTok{ {-}n1 }
\end{Highlighting}
\end{Shaded}
\hypertarget{creating-copying-removing-and-renaming-files}{%
\subsubsection{Creating, copying, removing, and renaming files}\label{creating-copying-removing-and-renaming-files}}
\hypertarget{creating-files}{%
\paragraph{Creating files}\label{creating-files}}
\begin{enumerate}
\def\labelenumi{\arabic{enumi}.}
\tightlist
\item
First, let's create an empty directory named exercise
\end{enumerate}
\begin{Shaded}
\begin{Highlighting}[]
\FunctionTok{mkdir}\NormalTok{ exercise }
\end{Highlighting}
\end{Shaded}
\begin{enumerate}
\def\labelenumi{\arabic{enumi}.}
\setcounter{enumi}{1}
\item
You can check whether the directory is created by typing \texttt{ls}. If the print format is challenging to read, add \texttt{-l} flag. Did you notice the difference?
\item
Let's move to the \texttt{exercise} subdirectory and create a file named test
\end{enumerate}
\begin{Shaded}
\begin{Highlighting}[]
\BuiltInTok{cd}\NormalTok{ exercise }\KeywordTok{;} \FunctionTok{touch}\NormalTok{ test }\KeywordTok{;} \FunctionTok{ls}
\end{Highlighting}
\end{Shaded}
\begin{enumerate}
\def\labelenumi{\arabic{enumi}.}
\setcounter{enumi}{3}
\tightlist
\item
Read test
\end{enumerate}
\begin{Shaded}
\begin{Highlighting}[]
\FunctionTok{cat}\NormalTok{ test }
\end{Highlighting}
\end{Shaded}
\begin{enumerate}
\def\labelenumi{\arabic{enumi}.}
\setcounter{enumi}{4}
\tightlist
\item
Hmn. It's empty. Let's add something there. \texttt{\textgreater{}} = overwrite
\end{enumerate}
\begin{Shaded}
\begin{Highlighting}[]
\BuiltInTok{echo} \StringTok{"something"} \OperatorTok{\textgreater{}}\NormalTok{ test }\KeywordTok{;} \FunctionTok{cat}\NormalTok{ test }
\end{Highlighting}
\end{Shaded}
\begin{enumerate}
\def\labelenumi{\arabic{enumi}.}
\setcounter{enumi}{5}
\tightlist
\item
Yeah! Can you add more? \texttt{\textgreater{}\textgreater{}} = append
\end{enumerate}
\begin{Shaded}
\begin{Highlighting}[]
\BuiltInTok{echo} \StringTok{"anything"} \OperatorTok{\textgreater{}\textgreater{}}\NormalTok{ test }\KeywordTok{;} \FunctionTok{cat}\NormalTok{ test }
\end{Highlighting}
\end{Shaded}
\begin{enumerate}
\def\labelenumi{\arabic{enumi}.}
\setcounter{enumi}{6}
\tightlist
\item
Removing ``anything'' from \texttt{test} is a little bit more complex because you need to know how to use \texttt{grep} (remember that we used this command in the very first example). Here, I just demonstrate that you can do this task using Bash, and let's dig into this more when we talk about working with text files.
\end{enumerate}
\begin{Shaded}
\begin{Highlighting}[]
\FunctionTok{grep}\NormalTok{ {-}v }\StringTok{\textquotesingle{}anything\textquotesingle{}}\NormalTok{ test}
\end{Highlighting}
\end{Shaded}
\hypertarget{copying-and-removing-files}{%
\paragraph{Copying and Removing Files}\label{copying-and-removing-files}}
\begin{enumerate}
\def\labelenumi{\arabic{enumi}.}
\tightlist
\item
Can we make a copy of \texttt{test}? Yes!
\end{enumerate}
\begin{Shaded}
\begin{Highlighting}[]
\FunctionTok{cp}\NormalTok{ test test\_1}\KeywordTok{;} \FunctionTok{cat}
\end{Highlighting}
\end{Shaded}
\begin{enumerate}
\def\labelenumi{\arabic{enumi}.}
\setcounter{enumi}{1}
\tightlist
\item
Can we make 100 copies of \texttt{test?} Yes!
\end{enumerate}
You can do this
\begin{Shaded}
\begin{Highlighting}[]
\FunctionTok{cp}\NormalTok{ test test\_1 }
\FunctionTok{cp}\NormalTok{ test test\_2}
\FunctionTok{cp}\NormalTok{ test test\_3 }
\ExtensionTok{...}
\end{Highlighting}
\end{Shaded}
or
\begin{Shaded}
\begin{Highlighting}[]
\KeywordTok{for} \ExtensionTok{i}\NormalTok{ in }\DataTypeTok{\{1..100\}}\KeywordTok{;} \KeywordTok{do} \FunctionTok{cp}\NormalTok{ test }\StringTok{"test\_}\VariableTok{$i}\StringTok{"}\KeywordTok{;} \KeywordTok{done}
\end{Highlighting}
\end{Shaded}
Which one do you like? (Again, don't focus on for loop. We'll learn it and other similar tools to deal with iterations in the later chapters.)
\begin{enumerate}
\def\labelenumi{\arabic{enumi}.}
\setcounter{enumi}{2}
\tightlist
\item
Can you remove all of the \texttt{test\_} files?
\end{enumerate}
You can do this
\begin{Shaded}
\begin{Highlighting}[]
\FunctionTok{rm}\NormalTok{ test\_1}
\FunctionTok{rm}\NormalTok{ test\_2}
\FunctionTok{rm}\NormalTok{ test\_3 }
\ExtensionTok{...}
\end{Highlighting}
\end{Shaded}