This repository has been archived by the owner on Oct 21, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathmain.go
145 lines (118 loc) · 3.43 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
/*
Command csvsplit splits a .csv into multiple, smaller files.
Resulting files will be saved as 1.csv, 2.csv, etc. in the currect directory, unless the -output flag is used.
Install
Requires Go to be installed first, https://golang.org/doc/install.
$ go get github.com/JeffPaine/csvsplit
Flags
Basic usage: csvsplit -records <number of records> <file>
-records
Number of records per file
-output
Output filename / path (optional)
-headers
Number of header lines in the input file to add to each ouput file (optional, default=0)
Examples
Split file.csv into files with 300 records a piece.
$ csvplit -records 300 file.csv
Accept csv data from stdin.
$ cat file.csv | csvsplit -records 20
Split file.csv into files with 40 records a piece and two header lines (preserved in all files).
$ csvplit -records 40 -headers 2 file.csv
You can use the -output flag to customize the resulting filenames.
The below will generate custom_filename-001.csv, custom_filename-002.csv, etc..
$ csvsplit -records 20 -output custom_filename- file.csv
Split file.csv into files with 37 records a piece into the subfolder 'stuff'.
$ csvplit -records 37 -output stuff/ file.csv
*/
package main
import (
"encoding/csv"
"flag"
"fmt"
"io"
"log"
"os"
"path/filepath"
)
var (
records = flag.Int("records", 0, "The number of records per output file")
output = flag.String("output", "", "Filename / path of the output file (leave blank for current directory)")
headers = flag.Int("headers", 0, "Number of header lines in the input file to preserve in each output file")
)
func main() {
flag.Parse()
// Sanity check command line flags.
flag.Usage = func() {
fmt.Fprintln(os.Stderr, "usage: csvsplit [options] -records <number of records> <file>")
flag.PrintDefaults()
os.Exit(1)
}
if *records < 1 {
fmt.Fprintln(os.Stderr, "-records must be > 1")
flag.Usage()
}
if *headers < 0 {
fmt.Fprintln(os.Stderr, "-headers must be > 0")
flag.Usage()
}
if *headers >= *records {
fmt.Fprintln(os.Stderr, "-headers must be >= -records")
flag.Usage()
}
// Get input from a given file or stdin
var r *csv.Reader
if len(flag.Args()) == 1 {
f, err := os.Open(flag.Args()[0])
if err != nil {
log.Fatal(err)
}
defer f.Close()
r = csv.NewReader(f)
} else {
r = csv.NewReader(os.Stdin)
}
// Read the input .csv file line by line. Save to a new file after reaching
// the amount of records prescribed by the -records flag.
var recs [][]string
count := 1
for {
record, err := r.Read()
if err == io.EOF {
save(&recs, count)
break
} else if err != nil {
log.Fatal(err)
}
recs = append(recs, record)
if len(recs) == *records {
save(&recs, count)
// Reset records to include just the header lines (if any)
recs = recs[:*headers]
count++
}
}
}
// save() saves the given *[][]string of csv data to a .csv file. Files are named
// sequentially in the form of 1.csv, 2.csv, etc.
func save(recs *[][]string, c int) {
name := fmt.Sprintf("%v%d%v", *output, c, ".csv")
// Make sure we don't overwrite existing files
if _, err := os.Stat(name); err == nil {
log.Fatal("file exists: ", name)
}
// If a directory is specified, make sure that directory exists
if filepath.Dir(*output) != "." {
_, err := os.Stat(filepath.Dir(*output))
if err != nil {
log.Fatal("no such directory:", *output)
}
}
f, err := os.Create(name)
if err != nil {
log.Fatal(err)
}
defer f.Close()
w := csv.NewWriter(f)
w.WriteAll(*recs)
}