-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsimpletok
executable file
·161 lines (153 loc) · 4.31 KB
/
simpletok
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
#!/bin/bash
# I hereby release all rights to this script. It belongs to humanity.
# 2018.08.02 Jeremy Adelsgruber
###
NAME="$(basename "$0")"
VERSION='1.1'
ESCAPE='\'
DELIMS="$IFS"
version(){
echo "$NAME $VERSION"
echo "This software is in the Public Domain."
echo "You have the right to modify and redistribute this software."
echo "This software comes with NO WARRANTY, to the extent pemitted by law."
exit
}
usage(){
echo "Usage: $NAME [-h|-v|-c|-f N] [-d S] [-e C] [STRING]"
echo
echo "Perform basic tokenization of potentially quoted strings."
echo
echo " -h, --help Display this message and exit"
echo " -v, --version Display version and exit"
echo " -c, --count Print the number of tokens"
echo " -f N, --field N Print token N (zero indexed, negative-friendly) of each line"
echo " -d S, --delims S Define custom single-char delimiters (default: IFS env var)"
echo " -e C, --escape C Define custom escape character (default: '$ESCAPE')"
echo
echo "If no options provided, print each token on a new line."
echo "If no string provided, read from STDIN."
echo
echo "Sequential delimiters with no text between them count as just one."
echo "Delimiters inside quotes or escaped by a backslash will behave as text."
echo "Double-quotes may be escaped by a backslash. Single-quotes may not."
echo "Backslashes not preceeding a double-quote or delimiter are treated as text."
echo
echo "Examples:"
echo " \$ $NAME -f 1 '\"Alice McSurname\" Bob\\ von\\ Nachname Carl'"
echo " Bob von Nachname"
echo
echo " \$ cat /tmp/somefile"
echo " ln -s \"/path/to/a file with spaces/in it\" '/path/to/link 1'"
echo " ln -s -f \"/path/to/another/file\" /path/to/link\\ 2"
echo " ln -s -f /this\" p'ath/\"is/'v\"e\"r\"y'/du\\\"mb\\yo /path/to/'link 3'"
echo
echo " \$ $NAME -f -2 < /tmp/somefile"
echo " /path/to/a file with spaces/in it"
echo " /path/to/another/file"
echo " /this p'ath/is/v\"e\"r\"y/du\"mb\\yo"
echo
echo " \$ $NAME -f -1 < /tmp/somefile"
echo " /path/to/link 1"
echo " /path/to/link 2"
echo " /path/to/link 3"
exit
}
parse(){
LINE="$1"
LEN="${#LINE}"
MAX=$((LEN-1))
MODE='raw'
unset TOKEN
unset TOKENS
#tokenize $LINE
if [ "$LINE" ]; then
for i in $(seq 0 $MAX); do
CHAR="${LINE:i:1}"
if [ "$MODE" = "single" ]; then
#we're currenly inside single-quotes
if [ "$CHAR" = "'" ]; then
MODE="raw"
else
TOKEN+="$CHAR"
fi
elif [ "$MODE" = "double" ]; then
#we're currenly inside double-quotes
if [ "$CHAR" = '"' ]; then
MODE="raw"
elif [ "$CHAR" = "$ESCAPE" ]; then
LAST_MODE="$MODE"
MODE="escape"
else
TOKEN+="$CHAR"
fi
elif [ "$MODE" = "escape" ]; then
#an escape character was found, but we need to make sure
#it's either escaping a double-quote an unquoted delimiter,
#otherwise we'll need to put it back
if [ "$CHAR" != '"' ] && { [[ "$DELIMS" != *"$CHAR"* ]] || [ "$LAST_MODE" != "raw" ]; }; then
#nothing to escape, so record the escape character after all
TOKEN+="$ESCAPE"
fi
MODE="$LAST_MODE"
TOKEN+="$CHAR"
else
#we're at the outermost level, reading raw unquoted chars
if [ "$CHAR" = "'" ]; then
MODE="single"
elif [ "$CHAR" = '"' ]; then
MODE="double"
elif [ "$CHAR" = "$ESCAPE" ]; then
LAST_MODE="$MODE"
MODE="escape"
elif [[ "$DELIMS" == *"$CHAR"* ]]; then
if [ "$TOKEN" ]; then
TOKENS[${#TOKENS[@]}]="$TOKEN"
TOKEN=''
fi
else
TOKEN+="$CHAR"
fi
fi
done
#check if we have a trailing escape character to re-add
[ "$MODE" = "escape" ] && TOKEN+="$ESCAPE"
#add the final token to the list
if [ "$TOKEN" ]; then
TOKENS[${#TOKENS[@]}]="$TOKEN"
TOKEN=''
fi
fi
#now get the user their result
if [ "$COUNT" ]; then
echo ${#TOKENS[@]}
elif [ "$FIELD" ]; then
echo "${TOKENS[$FIELD]}"
else
for i in "${TOKENS[@]}"; do
echo "$i"
done
fi
}
#parameters
while [ "$1" ]; do
case "$1" in
-h|--help) usage; break;;
-v|--version) version; break;;
-c|--count) COUNT="true";;
-f|--field) FIELD="$2"; shift;;
-e|--escape) ESCAPE="$2"; shift;;
-d|--delims) DELIMS="$2"; shift;;
*) DATA="$*"; break;;
esac
shift
done
if [ "$DATA" ]; then
#string provided
parse "$DATA"
else
#no string; try STDIN
while read -r LINE; do
parse "$LINE"
done
fi