-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnote.go
220 lines (196 loc) · 6.82 KB
/
note.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
// Package bearnotes provides tools to read Markdown files generated
// by the Bear app. It can also convert those files to a format suitable
// for Zettlr.
//
// It handles notes, embedded images and file attachments.
//
// Note: there are some Unicode normalization issues between the filenames
// in the filesystem and paths in the Markdown file. It is up to the caller
// to normalize strings when required.
package bearnotes
import (
"fmt"
"net/url"
"regexp"
"sort"
"strings"
"unicode"
"unicode/utf8"
)
// Regular expression to detect Bear tags.
// Examples:
// - #foo
// - #bar/baz
var reTag *regexp.Regexp
// Regular expression to detect file attachments.
// Example: <a href='my%20file.pdf'>my file.pdf</a>
var reFile *regexp.Regexp
// Regular expression to detect embedded images.
// Example: ![](note/my-image.png)
var reImage *regexp.Regexp
func init() {
// This regex has a catch: it matches a leading and trailing extra character.
// This is because Go does not support look-ahead/look-behind markers.
// So we need to implement look-ahead/look-behind by ourself.
reTag = regexp.MustCompile(`(^|.?)#([\p{L}][-\p{L}\p{N}/$_§%=+°({[\\@]*)(.?|$)`)
// Those two regex are straightforward
reFile = regexp.MustCompile(`<a +href=['"]([^'"]+)['"]>([^<]+)</a>`)
reImage = regexp.MustCompile(`!\[([^\]]*)]\(([^())]+|[^(]+\([^)]+\)[^)]+)\)`)
}
// Tag represents a Bear tag (#foo)
type Tag struct {
// The name of the tag (without the leading hashtag)
Name string
// Position of this tag in the Markdown file
position []int
// The character before the tag (for look-ahead, see Regex description above)
before string
// The character after the tag (for look-behind, see Regex description above)
after string
}
// NewTag creates a Tag from its content (including leading and trailing
// characters) and position in file.
func NewTag(content string, position []int) Tag {
var tag Tag
parts := reTag.FindStringSubmatch(content)
if len(parts) > 0 {
beforeIsEmpty := len(parts[1]) == 0
before, _ := utf8.DecodeRuneInString(parts[1])
beforeIsSpace := unicode.IsSpace(before)
afterIsEmpty := len(parts[3]) == 0
after, _ := utf8.DecodeRuneInString(parts[3])
afterIsSpace := unicode.IsSpace(after)
// A valid tag is surrounded by either a space character or nothing
if (beforeIsEmpty || beforeIsSpace) && (afterIsEmpty || afterIsSpace) {
tag.position = position
tag.before = parts[1]
tag.Name = parts[2]
tag.after = parts[3]
}
}
return tag
}
// String converts the Tag back to string.
func (tag *Tag) String() string {
if len(tag.Name) == 0 {
return fmt.Sprintf("%s%s", tag.before, tag.after)
}
return fmt.Sprintf("%s#%s%s", tag.before, tag.Name, tag.after)
}
// File represents a file attachment in a note.
type File struct {
Location string // The path to the file attachment
Name string // The name of the file
position []int // The position in the Markdown file
}
// NewFile creates a File from the Markdown content and position in file.
func NewFile(content string, position []int) File {
var file File
parts := reFile.FindStringSubmatch(content)
if len(parts) > 0 {
file.Location, _ = url.PathUnescape(parts[1])
file.Name = parts[2]
file.position = position
}
return file
}
// URL encode a path, component by component so that slashes do not go
// through URL encoding.
func escapePath(path string) string {
pathComponents := strings.Split(path, "/")
var escapedPath strings.Builder
for i, pathComponent := range pathComponents {
if i > 0 {
escapedPath.WriteString("/")
}
escapedPath.WriteString(url.PathEscape(pathComponent))
}
return escapedPath.String()
}
// String converts a file attachment back to Markdown syntax suitable for Zettlr.
func (file *File) String() string {
return fmt.Sprintf("[%s](%s)", file.Name, escapePath(file.Location))
}
// Image represents an embedded image in a note.
type Image struct {
Location string // The path to the embedded image
Description string // The alternative text for the image
position []int // The position in the Markdown file
}
// NewImage creates an Image from the Markdown content and position in file.
func NewImage(content string, position []int) Image {
var image Image
parts := reImage.FindStringSubmatch(content)
if len(parts) > 0 {
image.Location, _ = url.PathUnescape(parts[2])
image.Description = parts[1]
image.position = position
}
return image
}
// String converts an image back to Markdown syntax suitable for Zettlr.
func (image *Image) String() string {
return fmt.Sprintf("![%s](%s)", image.Description, escapePath(image.Location))
}
// Note represents a Bear note with its tags, file attachments and embedded images.
type Note struct {
Tags []Tag // All the tags
Files []File // All the file attachments
Images []Image // All the embedded images
content string // The full note content
}
// LoadNote parses a Bear note in Markdown format and returns a Note object.
func LoadNote(content string) *Note {
var note Note
note.content = content
for _, match := range reTag.FindAllStringIndex(content, -1) {
tag := NewTag(content[match[0]:match[1]], match)
if len(tag.Name) > 0 {
note.Tags = append(note.Tags, tag)
}
}
for _, match := range reFile.FindAllStringIndex(content, -1) {
note.Files = append(note.Files, NewFile(content[match[0]:match[1]], match))
}
for _, match := range reImage.FindAllStringIndex(content, -1) {
note.Images = append(note.Images, NewImage(content[match[0]:match[1]], match))
}
return ¬e
}
// updatedItem is used to sort tags, images and files by their order
// of appearance in the file.
type updatedItem struct {
content string // tag, file or image content
position []int // position in file
}
// WriteNote converts the note back into a format suitable for Zettlr.
func (note *Note) WriteNote() string {
// Tags, Images and Files are all stored into a common list
var items []updatedItem
for _, item := range note.Tags {
items = append(items, updatedItem{item.String(), item.position})
}
for _, item := range note.Files {
items = append(items, updatedItem{item.String(), item.position})
}
for _, item := range note.Images {
items = append(items, updatedItem{item.String(), item.position})
}
// And sorted by their order of appearance in the file
// Note: this only works when items do not overlap (which hopefully
// is the case in most, if not all, markdown files).
sort.Slice(items, func(i, j int) bool {
return items[i].position[0] < items[j].position[1]
})
// Go through all items and copy the updated version of the item along
// with the interleaved original excerpts
var current int
var newContent strings.Builder
for _, item := range items {
newContent.WriteString(note.content[current:item.position[0]])
newContent.WriteString(item.content)
current = item.position[1]
}
newContent.WriteString(note.content[current:len(note.content)])
return newContent.String()
}