-
-
Notifications
You must be signed in to change notification settings - Fork 116
/
Copy pathitemgraph.go
424 lines (381 loc) · 12.7 KB
/
itemgraph.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
package timeliner
import (
"bytes"
"encoding/gob"
"io"
"time"
)
// Item is the central concept of a piece of content
// from a service or data source. Take note of which
// methods are required to return non-empty values.
//
// The actual content of an item is stored either in
// the database or on disk as a file. Generally,
// content that is text-encoded can and should be
// stored in the database where it will be indexed.
// However, if the item's content (for example, the
// bytes of a photo or video) are not text or if the
// text is too large to store well in a database (for
// example, an entire novel), it should be stored
// on disk, and this interface has methods to
// accommodate both. Note that an item may have both
// text and non-text content, too: for example, photos
// and videos may have descriptions that are as much
// "content" as the media iteself. One part of an item
// is not mutually exclusive with any other.
type Item interface {
// The unique ID of the item assigned by the service.
// If the service does not assign one, then invent
// one such that the ID is unique to the content or
// substance of the item (for example, an ID derived
// from timestamp or from the actual content of the
// item -- whatever makes it unique). The ID need
// only be unique for the account it is associated
// with, although more unique is, of course, acceptable.
//
// REQUIRED.
ID() string
// The originating timestamp of the item, which
// may be different from when the item was posted
// or created. For example, a photo may be taken
// one day but uploaded a week later. Prefer the
// time when the original item content was captured.
//
// REQUIRED.
Timestamp() time.Time
// A classification of the item's kind.
//
// REQUIRED.
Class() ItemClass
// The user/account ID of the owner or
// originator of the content, along with their
// username or real name. The ID is used to
// relate the item with the person behind it;
// the name is used to make the person
// recognizable to the human reader. If the
// ID is nil, the current account owner will
// be assumed. (Use the ID as given by the
// data source.) If the data source only
// provides a name but no ID, you may return
// the name as the ID with the understanding
// that a different name will be counted as a
// different person. You may also return the
// name as the name and leave the ID nil and
// have correct results if it is safe to assume
// the name belongs to the current account owner.
Owner() (id *string, name *string)
// Returns the text of the item, if any.
// This field is indexed in the DB, so don't
// use for unimportant metadata or huge
// swaths of text; if there is a large
// amount of text, use an item file instead.
DataText() (*string, error)
// For primary content which is not text or
// which is too large to be stored well in a
// database, the content can be downloaded
// into a file. If so, the following methods
// should return the necessary information,
// if available from the service, so that a
// data file can be obtained, stored, and
// later read successfully.
//
// DataFileName returns the filename (NOT full
// path or URL) of the file; prefer the original
// filename if it originated as a file. If the
// filename is not unique on disk when downloaded,
// it will be made unique by modifying it. If
// this value is nil/empty, a filename will be
// generated from the item's other data.
//
// DataFileReader returns a way to read the data.
// It will be closed when the read is completed.
//
// DataFileHash returns the checksum of the
// content as provided by the service. If the
// service (or data source) does not provide a
// hash, leave this field empty, but note that
// later it will be impossible to efficiently
// know whether the content has changed on the
// service from what is stored locally.
//
// DataFileMIMEType returns the MIME type of
// the data file, if known.
DataFileName() *string
DataFileReader() (io.ReadCloser, error)
DataFileHash() []byte
DataFileMIMEType() *string
// Metadata returns any optional metadata.
// Feel free to leave as many fields empty
// as you'd like: the less fields that are
// filled out, the smaller the storage size.
// Metadata is not indexed by the DB but is
// rendered in projections and queries
// according to the item's classification.
Metadata() (*Metadata, error)
// Location returns an item's location,
// if known. For now, only Earth
// coordinates are accepted, but we can
// improve this later.
Location() (*Location, error)
}
// ItemClass classifies an item.
type ItemClass int
// Various classes of items.
const (
ClassUnknown ItemClass = iota
ClassImage
ClassVideo
ClassAudio
ClassPost
ClassLocation
ClassEmail
ClassPrivateMessage
ClassMessage
)
// These are the standard relationships that Timeliner
// recognizes. Using these known relationships is not
// required, but it makes it easier to translate them to
// human-friendly phrases when visualizing the timeline.
var (
RelReplyTo = Relation{Label: "reply_to", Bidirectional: false} // "<from> is in reply to <to>"
RelAttached = Relation{Label: "attached", Bidirectional: true} // "<to|from> is attached to <from|to>"
RelQuotes = Relation{Label: "quotes", Bidirectional: false} // "<from> quotes <to>"
RelCCed = Relation{Label: "carbon_copied", Bidirectional: false} // "<from_item> is carbon-copied to <to_person>"
)
// ItemRow has the structure of an item's row in our DB.
type ItemRow struct {
ID int64
AccountID int64
OriginalID string
PersonID int64
Timestamp time.Time
Stored time.Time
Modified *time.Time
Class ItemClass
MIMEType *string
DataText *string
DataFile *string
DataHash *string // base64-encoded SHA-256
Metadata *Metadata
Location
metaGob []byte // use Metadata.(encode/decode)
item Item
}
// Location contains location information.
type Location struct {
Latitude *float64
Longitude *float64
}
// ItemGraph is an item with optional connections to other items.
// All ItemGraph values should be pointers to ensure consistency.
// The usual weird/fun thing about representing graph data structures
// in memory is that a graph is a node, and a node is a graph. 🤓
type ItemGraph struct {
// The node item. This can be nil, but note that
// Edges will not be traversed if Node is nil,
// because there must be a node on both ends of
// an edge.
//
// Optional.
Node Item
// Edges are represented as 1:many relations
// to other "graphs" (nodes in the graph).
// Fill this out to add multiple items to the
// timeline at once, while drawing the
// designated relationships between them.
// Useful when processing related items in
// batches.
//
// Directional relationships go from Node to
// the map key.
//
// If the items involved in a relationship are
// not efficiently available at the same time
// (i.e. if loading both items involved in the
// relationship would take a non-trivial amount
// of time or API calls), you can use the
// Relations field instead, but only after the
// items have been added to the timeline.
//
// Optional.
Edges map[*ItemGraph][]Relation
// If items in the graph belong to a collection,
// specify them here. If the collection does not
// exist (by row ID or AccountID+OriginalID), it
// will be created. If it already exists, the
// collection in the DB will be unioned with the
// collection specified here. Collections are
// processed regardless of Node and Edges.
//
// Optional.
Collections []Collection
// Relationships between existing items in the
// timeline can be represented here in a list
// of item IDs that are connected by a label.
// This field is useful when relationships and
// the items involved in them are not discovered
// at the same time. Relations in this list will
// be added to the timeline, joined by the item
// IDs described in the RawRelations, only if
// the items having those IDs (as provided by
// the data source; we're not talking about DB
// row IDs here) already exist in the timeline.
// In other words, this is a best-effort field;
// useful for forming relationships of existing
// items, but without access to the actual items
// themselves. If you have the items involved in
// the relationships, use Edges instead.
//
// Optional.
Relations []RawRelation
}
// NewItemGraph returns a new node/graph.
func NewItemGraph(node Item) *ItemGraph {
return &ItemGraph{
Node: node,
Edges: make(map[*ItemGraph][]Relation),
}
}
// Add adds item to the graph ig by making an edge described
// by rel from the node ig to a new node for item.
//
// This method is for simple inserts, where the only thing to add
// to the graph at this moment is a single item, since the graph
// it inserts contains only a single node populated by item. To
// add a full graph with multiple items (i.e. a graph with edges),
// call ig.Connect directly.
func (ig *ItemGraph) Add(item Item, rel Relation) {
ig.Connect(NewItemGraph(item), rel)
}
// Connect is a simple convenience function that adds a graph (node)
// to ig by an edge described by rel.
func (ig *ItemGraph) Connect(node *ItemGraph, rel Relation) {
if ig.Edges == nil {
ig.Edges = make(map[*ItemGraph][]Relation)
}
ig.Edges[node] = append(ig.Edges[node], rel)
}
// RawRelation represents a relationship between
// two items or people (or both) from the same
// data source (but not necessarily the same
// accounts; we assume that a data source's item
// IDs are globally unique across accounts).
// The item IDs should be those which are
// assigned/provided by the data source, NOT a
// database row ID. Likewise, the persons' user
// IDs should be the IDs of the user as associated
// with the data source, NOT their row IDs.
type RawRelation struct {
FromItemID string
ToItemID string
FromPersonUserID string
ToPersonUserID string
Relation
}
// Relation describes how two nodes in a graph are related.
// It's essentially an edge on a graph.
type Relation struct {
Label string
Bidirectional bool
}
// Collection represents a group of items, like an album.
type Collection struct {
// The ID of the collection as given
// by the service; for example, the
// album ID. If the service does not
// provide an ID for the collection,
// invent one such that the next time
// the collection is encountered and
// processed, its ID will be the same.
// An ID is necessary here to ensure
// uniqueness.
//
// REQUIRED.
OriginalID string
// The name of the collection as
// given by the service; for example,
// the album title.
//
// Optional.
Name *string
// The description, caption, or any
// other relevant text describing
// the collection.
//
// Optional.
Description *string
// The items for the collection;
// if ordering is significant,
// specify each item's Position
// field; the order of elememts
// of this slice will not be
// considered important.
Items []CollectionItem
}
// CollectionItem represents an item
// stored in a collection.
type CollectionItem struct {
// The item to add to the collection.
Item Item
// Specify if ordering is important.
Position int
// Used when processing; this will
// store the row ID of the item
// after the item has been inserted
// into the DB.
itemRowID int64
}
// Metadata is a unified structure for storing
// item metadata in the DB.
type Metadata struct {
// A hash or etag provided by the service to
// make it easy to know if it has changed
ServiceHash []byte
// Locations
LocationAccuracy int
Altitude int // meters
AltitudeAccuracy int
Heading int // degrees
Velocity int
GeneralArea string // natural language description of a location
// Photos and videos
EXIF map[string]interface{}
// TODO: Should we have some of the "most important" EXIF fields explicitly here?
Width int
Height int
// TODO: Google Photos (how many of these belong in EXIF?)
CameraMake string
CameraModel string
FocalLength float64
ApertureFNumber float64
ISOEquivalent int
ExposureTime time.Duration
FPS float64 // Frames Per Second
// Posts (Facebook so far)
Link string
Description string
Name string
ParentID string
StatusType string
Type string
Shares int // aka "Retweets" or "Reshares"
Likes int
}
func (m *Metadata) encode() ([]byte, error) {
// then encode the actual data, and trim off
// schema from the beginning
buf := new(bytes.Buffer)
err := gob.NewEncoder(buf).Encode(m)
if err != nil {
return nil, err
}
return buf.Bytes()[len(metadataGobPrefix):], nil
}
func (m *Metadata) decode(b []byte) error {
if b == nil {
return nil
}
fullGob := append(metadataGobPrefix, b...)
return gob.NewDecoder(bytes.NewReader(fullGob)).Decode(m)
}
var metadataGobPrefix []byte