Skip to content

Commit

Permalink
Fixed importing for added hour-format files to same directory, added …
Browse files Browse the repository at this point in the history
…support for corelight format file names

Co-Authored-By: Naomi Kramer <[email protected]>
  • Loading branch information
lisaSW and caffeinatedpixel committed Aug 2, 2024
1 parent 5d22b1c commit 8c43e44
Show file tree
Hide file tree
Showing 4 changed files with 288 additions and 22 deletions.
2 changes: 1 addition & 1 deletion cmd/delete_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ func TestTrimWildcards(t *testing.T) {
}
}

//nolint:unused // will be used in the future
//lint:ignore U1000 // will be used in the future
func validateCommandsExist(t *testing.T, commands []*cli.Command, expected []string) {
t.Helper()
expectedCmds := make(map[string]bool)
Expand Down
45 changes: 32 additions & 13 deletions cmd/import.go
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,6 @@ func RunImportCmd(startTime time.Time, cfg *config.Config, afs afero.Fs, logDir
}

var elapsedTime int64
// var dayStartedAt time.Time

// loop through each day
for day, hourlyLogs := range logMap {
Expand Down Expand Up @@ -213,10 +212,15 @@ func RunImportCmd(startTime time.Time, cfg *config.Config, afs afero.Fs, logDir

// import the data
err = importer.Import(afs, files)
if err != nil {
if err != nil && !errors.Is(err, i.ErrAllFilesPreviouslyImported) {
return importResults, err
}

// skip the rest of this hour's import if all files have been previously imported
if errors.Is(err, i.ErrAllFilesPreviouslyImported) {
continue
}

// update result counts (used for testing)
importResults.Conn += importer.ResultCounts.Conn
importResults.OpenConn += importer.ResultCounts.OpenConn
Expand Down Expand Up @@ -297,6 +301,10 @@ func RunImportCmd(startTime time.Time, cfg *config.Config, afs afero.Fs, logDir
}
}

// if after going through every day/hour and there are no import IDs, return error stating all files were previously imported
if len(importResults.ImportID) == 0 {
return importResults, i.ErrAllFilesPreviouslyImported
}
logger.Info().Str("elapsed_time", fmt.Sprintf("%1.1fs", time.Since(startTime).Seconds())).Msg("🎊✨ Finished Import! ✨🎊")

return importResults, nil
Expand Down Expand Up @@ -580,20 +588,31 @@ func ParseHourFromFilename(filename string) (int, error) {
// attempt to find a match in the filename
matches := timeRegex.FindStringSubmatch(filename)

// if hour pattern didn't match, check if the filename is a simple log file
// if hour pattern didn't match, check if filename is a security onion log or simple log file
if matches == nil {
// regex to identify simple log files (ie, conn.log, open_conn.log, /logs/conn.log.gz, etc) without hour
simpleLogPattern := `^\w+\.log(\.gz)?$`
simpleLogRegex := regexp.MustCompile(simpleLogPattern)
timePatternSecOnion := `[A-Za-z]+[:/_]\d{8}[:/_](\d{2})[:/_]\d{2}`

// if the filename matches the simple log pattern, consider file as 0 hour and return
if simpleLogRegex.MatchString(filepath.Base(filename)) {
return 0, nil
}
// attempt to match the second onion pattern
// compile the timeRegex
timeRegex := regexp.MustCompile(timePatternSecOnion)

// if format doesn't match the hourly pattern or the simple log pattern, return an error
// to catch malformed hour formats
return 0, ErrInvalidLogHourFormat
// attempt to find a match in the filename
matches = timeRegex.FindStringSubmatch(filename)

if matches == nil {
// regex to identify simple log files (ie, conn.log, open_conn.log, /logs/conn.log.gz, etc) without hour
simpleLogPattern := `^\w+\.log(\.gz)?$`
simpleLogRegex := regexp.MustCompile(simpleLogPattern)

// if the filename matches the simple log pattern, consider file as 0 hour and return
if simpleLogRegex.MatchString(filepath.Base(filename)) {
return 0, nil
}

// if format doesn't match the hourly pattern or the simple log pattern, return an error
// to catch malformed hour formats
return 0, ErrInvalidLogHourFormat
}
}

// convert the extracted hour string to an integer
Expand Down
262 changes: 254 additions & 8 deletions cmd/import_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -285,8 +285,11 @@ func (c *CmdTestSuite) TestRunImportCmd() {

// if we are using the mock directory, we need to create it along with the files
if strings.HasPrefix(db.logDir, "/logs") {
// create mock directory with files
createMockZeekLogs(t, tc.afs, db.logDir, files, true)
// create mock directory
err := tc.afs.MkdirAll(db.logDir, os.FileMode(0o775))
require.NoError(t, err, "creating directory should not produce an error")
// create mock files
createMockZeekConnLogs(t, tc.afs, db.logDir, files, true)
}

// run the import command
Expand Down Expand Up @@ -357,15 +360,182 @@ func (c *CmdTestSuite) TestRunImportCmd() {

}

// createMockZeekLogs creates a directory with files that contain mock Zeek logs, filling them with valid
func (c *CmdTestSuite) TestRollingLogsBeingAddedToSameFolder() {
t := c.T()

type importCase struct {
connLogs []string
dnsLogs []string
httpLogs []string
sslLogs []string
openConnLogs []string
}

testCases := []struct {
name string
cases []importCase
expectedError error
}{
{
name: "Same Folder Non Hour",
cases: []importCase{
{
connLogs: []string{"conn.log"},
},
{
dnsLogs: []string{"dns.log"},
},
},
},
{
name: "Same Folder - Hour",
cases: []importCase{
{
connLogs: []string{"conn.00:00:00-01:00:00.log"},
dnsLogs: []string{"dns.00:00:00-01:00:00.log"},
httpLogs: []string{"http.00:00:00-01:10:00.log"},
sslLogs: []string{"ssl.00:00:00-01:10:00.log"},
},
{
connLogs: []string{"conn.01:00:00-02:00:00.log"},
dnsLogs: []string{"dns.01:00:00-02:00:00.log"},
httpLogs: []string{"http.01:00:00-02:50:00.log"},
sslLogs: []string{"ssl.01:00:00-02:10:00.log"},
},
},
},
{
name: "Subfolders - One Has Non Imported",
cases: []importCase{
{
connLogs: []string{"conn.log", "/subfolder/conn.log"},
dnsLogs: []string{"dns.log", "/subfolder/dns.log"},
httpLogs: []string{"http.log", "/subfolder/http.log"},
sslLogs: []string{"ssl.log", "/subfolder/ssl.log"},
},
{
connLogs: []string{"/subfolder2/conn.log"},
dnsLogs: []string{"/subfolder2/dns.log"},
httpLogs: []string{"/subfolder2/http.log"},
sslLogs: []string{"/subfolder2/ssl.log"},
},
},
},
{
name: "Subfolders - Both Have Non Imported",
cases: []importCase{
{
connLogs: []string{"conn.log"},
dnsLogs: []string{"dns.log"},
httpLogs: []string{"http.log"},
sslLogs: []string{"ssl.log"},
},
{
connLogs: []string{"/subfolder/conn.log", "/subfolder2/conn.log"},
dnsLogs: []string{"/subfolder/conn.log", "/subfolder2/dns.log"},
httpLogs: []string{"/subfolder/conn.log", "/subfolder2/http.log"},
sslLogs: []string{"/subfolder/conn.log", "/subfolder2/ssl.log"},
},
},
},
{
name: "Subfolders - All Have Non Imported",
cases: []importCase{
{
connLogs: []string{"conn.log"},
dnsLogs: []string{"dns.log"},
httpLogs: []string{"http.log"},
sslLogs: []string{"ssl.log"},
},
{
connLogs: []string{"conn.00:00:00-01:00:00.log", "/subfolder/conn.log", "/subfolder2/conn.log"},
dnsLogs: []string{"dns.03:00:00-04:00:00.log", "/subfolder/dns.log", "/subfolder2/dns.log"},
httpLogs: []string{"http.00:00:00-01:00:00.log", "/subfolder/http.log", "/subfolder2/http.log"},
sslLogs: []string{"ssl.00:00:00-01:00:00.log", "/subfolder/ssl.log", "/subfolder2/ssl.log"},
},
},
},
{
name: "Subfolders - All Previously Imported",
cases: []importCase{
{
connLogs: []string{"conn.00:00:00-01:00:00.log", "/subfolder/conn.log", "/subfolder2/conn.log"},
dnsLogs: []string{"dns.03:00:00-04:00:00.log", "/subfolder/dns.log", "/subfolder2/dns.log"},
httpLogs: []string{"http.00:00:00-01:00:00.log", "/subfolder/http.log", "/subfolder2/http.log"},
sslLogs: []string{"ssl.00:00:00-01:00:00.log", "/subfolder/ssl.log", "/subfolder2/ssl.log"},
},
{
connLogs: []string{"conn.00:00:00-01:00:00.log", "/subfolder/conn.log", "/subfolder2/conn.log"},
dnsLogs: []string{"dns.03:00:00-04:00:00.log", "/subfolder/dns.log", "/subfolder2/dns.log"},
httpLogs: []string{"http.00:00:00-01:00:00.log", "/subfolder/http.log", "/subfolder2/http.log"},
sslLogs: []string{"ssl.00:00:00-01:00:00.log", "/subfolder/ssl.log", "/subfolder2/ssl.log"},
},
},
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {

afs := afero.NewMemMapFs()
err := afs.MkdirAll("/logs", 0o755)
require.NoError(t, err)

for hour, importCase := range tc.cases {
// create conn files
createMockZeekConnLogs(t, afs, "/logs", importCase.connLogs, true)
createMockZeekProtoLogs(t, afs, "/logs", importCase.dnsLogs, true, "query")
createMockZeekConnLogs(t, afs, "/logs", importCase.openConnLogs, true)
createMockZeekProtoLogs(t, afs, "/logs", importCase.httpLogs, true, "host")
createMockZeekProtoLogs(t, afs, "/logs", importCase.sslLogs, true, "server_name")

results, err := cmd.RunImportCmd(time.Now(), c.cfg, afs, "/logs", "test_rolling_same_folder", true, true)

// check if we expect an error
if tc.expectedError != nil {
require.Error(t, err, "running import command should produce an error")
require.Contains(t, err.Error(), tc.expectedError.Error(), "error should contain expected value")
} else {
require.NoError(t, err, "running import command should not produce an error")

if len(importCase.connLogs) > 0 {
require.Greater(t, results.ResultCounts.Conn, uint64(0), "hour %d: conn logs should be imported", hour)
}

if len(importCase.dnsLogs) > 0 {
require.Greater(t, results.ResultCounts.DNS, uint64(0), "hour %d: dns logs should be imported", hour)
}

if len(importCase.httpLogs) > 0 {
require.Greater(t, results.ResultCounts.HTTP, uint64(0), "hour %d: http logs should be imported", hour)
}

if len(importCase.sslLogs) > 0 {
require.Greater(t, results.ResultCounts.SSL, uint64(0), "hour %d: ssl logs should be imported", hour)
}

if len(importCase.openConnLogs) > 0 {
require.Greater(t, results.ResultCounts.OpenConn, uint64(0), "hour %d: open conn logs should be imported", hour)
}

}

}

require.NoError(t, afs.RemoveAll("/logs"), "removing directory should not produce an error")

// clean up the database
require.NoError(t, c.server.DeleteSensorDB("test_rolling_same_folder"), "dropping database should not produce an error")
})
}

}

// createMockZeekConnLogs creates a directory with files that contain mock Zeek logs, filling them with valid
// log values if necessary for the test
func createMockZeekLogs(t *testing.T, afs afero.Fs, directory string, files []string, valid bool) {
func createMockZeekConnLogs(t *testing.T, afs afero.Fs, directory string, files []string, valid bool) {
t.Helper()

// create directory
err := afs.MkdirAll(directory, os.FileMode(0o775))
require.NoError(t, err, "creating directory should not produce an error")

// create files
for _, file := range files {
data := []byte("test")
Expand Down Expand Up @@ -395,6 +565,38 @@ func createMockZeekLogs(t *testing.T, afs afero.Fs, directory string, files []st
}
}

func createMockZeekProtoLogs(t *testing.T, afs afero.Fs, directory string, files []string, valid bool, field string) {
t.Helper()

// create files
for _, file := range files {
data := []byte("test")
if valid {
data = []byte("#separator \\x09\n" +
"#set_separator\t,\n" +
"#empty_field\t(empty)\n" +
"#unset_field\t-\n" +
"#path\tdns\n" +
"#open\t2019-02-28-12-07-01\n" +
"#fields\tts\tuid\tid.orig_h\tid.resp_h\t" + field + "\n" +
"#types\ttime\tstring\taddr\taddr\tstring\n" +
"1715640994.367201\tCxT121\t10.0.0.1\t52.12.0.1\tmicrosoft.com\n" +
"1715640994.367201\tCxT121\t10.0.0.1\t52.12.0.1\ta.microsoft.com\n" +
"1715641054.367201\tCxT122\t10.0.0.2\t52.12.0.2\tgoogle.com\n" +
"1715641054.367201\tCxT122\t10.0.0.2\t52.12.0.2\tyahoo.com\n" +
"1715641114.367201\tCxT123\t10.0.0.3\t52.12.0.3\ttime.apple.com\n" +
"1715641114.367201\tCxT123\t10.0.0.3\t52.12.0.3\treddit.com\n" +
"1715641174.367201\tCxT124\t10.0.0.4\t52.12.0.4\tnasa.org\n" +
"1715641174.367201\tCxT124\t10.0.0.4\t52.12.0.4\tyoutube.com\n" +
"1715641234.367201\tCxT125\t10.0.0.5\t52.12.0.5\ttwitch.tv\n" +
"1715641234.367201\tCxT125\t10.0.0.5\t52.12.0.5\tmaps.google.com\n",
)
}
err := afero.WriteFile(afs, filepath.Join(directory, file), data, os.FileMode(0o775))
require.NoError(t, err, "creating files should not produce an error")
}
}

func createExpectedResults(logs []cmd.HourlyZeekLogs) []cmd.HourlyZeekLogs {
var data []cmd.HourlyZeekLogs

Expand Down Expand Up @@ -561,6 +763,32 @@ func TestWalkFiles(t *testing.T) {
expectedWalkErrors: nil,
expectedError: nil,
},
{
name: "Hour Logs, Containing all Log Types - Corelight Format",
directory: "/logs",
directoryPermissions: os.FileMode(0o775),
filePermissions: os.FileMode(0o775),
files: []string{
"conn_red.00:00:00-01:00:00.log",
"conn_20240722_12:00:00-13:00:00+0000.log",
},
expectedFiles: createExpectedResults([]cmd.HourlyZeekLogs{
0: {
0: {
importer.ConnPrefix: []string{
"/logs/conn_red.00:00:00-01:00:00.log",
},
},
12: {
importer.ConnPrefix: []string{
"/logs/conn_20240722_12:00:00-13:00:00+0000.log",
},
},
},
}),
expectedWalkErrors: nil,
expectedError: nil,
},
{
name: "Hour Logs, Missing conn & open_conn Logs",
directory: "/logs",
Expand Down Expand Up @@ -1066,6 +1294,24 @@ func TestParseHourFromFilename(t *testing.T) {
wantHour: 23,
wantErr: nil,
},
{
name: "Valid Corelight Format",
filename: "conn_20240722_12:00:00-13:00:00+0000",
wantHour: 12,
wantErr: nil,
},
{
name: "Invalid Corelight Format - Bad Date",
filename: "conn_123456789_12:00:00-13:00:00",
wantHour: 0,
wantErr: cmd.ErrInvalidLogHourFormat,
},
{
name: "Invalid Corelight Format - Ending Period",
filename: "conn_20240722.12:00:00-13:00:00",
wantHour: 0,
wantErr: cmd.ErrInvalidLogHourFormat,
},
{
name: "Invalid Hour Range",
filename: "log.24:00",
Expand Down
1 change: 1 addition & 0 deletions database/metadb.go
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,7 @@ func (db *DB) AddImportFinishedRecordToMetaDB(importID util.FixedString, minTS,
// CheckIfFilesWereAlreadyImported calls checkFileHashes for each log type
func (db *DB) CheckIfFilesWereAlreadyImported(fileMap map[string][]string) (int, error) {
totalFileCount := 0
// loop over each log type in the hour's filemap
for logType, logList := range fileMap {
results, err := db.checkFileHashes(logList)
if err != nil {
Expand Down

0 comments on commit 8c43e44

Please sign in to comment.