-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmax_folder_depth.pl
133 lines (113 loc) · 3.55 KB
/
max_folder_depth.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# max_folder_depth.pl: find the deepest subfolder in Elasticsearch indexed folders.
# This is largely stolen from elastic_indexer.pl.
# Why? IntraMine's Elasticsearch index has a fixed number of folder-related fields,
# with the hard-coded names folder1, folder2, folder3,...folder32. 32 was chosen as the
# maximum after running this program and seeing a maxumum folder depth of 20.
# see elasticsearch_bulk_indexer.pm#AddDocumentToIndex().
# perl C:\perlprogs\mine\max_folder_depth.pl
use strict;
use utf8;
use FileHandle;
use File::Find;
use Math::SimpleHisto::XS;
use Path::Tiny qw(path);
use lib path($0)->absolute->parent->child('libs')->stringify;
use common;
use intramine_config;
LoadConfigValues(); # intramine_config.pm
# Hack, tired of indexing hundreds of log files, so optionally skip any
# file with a .log or .out extension. See DoOne() below.
my $SKIPLOGFILES = 1;
# Optionally allow files with no extension.
# !!NOTE!! Setting ES_INDEX_NO_EXTENSION to 1 is COMPLETELY UNTESTED!
my $IndexIfNoExtension = CVal('ES_INDEX_NO_EXTENSION');
# Load up list of directories to index.
my @DirectoriesToIndex;
my @DirectoriesToIgnore;
LoadDirectoriesToIndex();
my $MaximumDepth = 0;
my $PathForMaximumDepth = '';
my $numDirs = @DirectoriesToIndex;
for (my $i = 0; $i < $numDirs; ++$i)
{
if ($DirectoriesToIndex[$i] eq '_INTRAMINE_') # Meaning the dir that holds this program, and by default all IntraMine files
{
$DirectoriesToIndex[$i] = path($0)->absolute->parent->stringify;
}
Output("Getting paths to $DirectoriesToIndex[$i]...\n");
finddepth(\&DoOne, $DirectoriesToIndex[$i]);
}
Output("Max depth $MaximumDepth for |$PathForMaximumDepth|\n");
Output("Done.\n");
########## subs
############## subs
sub Output {
my ($txt) = @_;
print("$txt");
}
sub LoadDirectoriesToIndex {
my $configFilePath = FullDirectoryPath('ELASTICSEARCHDIRECTORIESPATH');
my $dirCount = 0;
if (-f $configFilePath)
{
my @dummyMonitorArray;
my $haveSome = LoadSearchDirectoriesToArrays($configFilePath, \@DirectoriesToIndex,
\@dummyMonitorArray, \@DirectoriesToIgnore);
$dirCount = @DirectoriesToIndex;
if (!$dirCount)
{
die("ERROR, no directories found in |$configFilePath|\n");
}
}
else
{
die("ERROR, |$configFilePath| not found!");
}
return($dirCount); # not currently used
}
# Pick up lists of all files in all directories to index, one file at a time. Called by
# finddepth() above.
sub DoOne {
my $sourceFileName = $_;
my $sourceFileFullPath = $File::Find::name;
if ($_ eq "." || ! -f $sourceFileFullPath)
{
;# not a file
return;
}
# else it's a file.
else
{
# Full path list. Set %rawPathList for *all* files, eg to pick up folders that are all images.
my $pathForwardSlashes = lc($sourceFileFullPath);
$pathForwardSlashes =~ s![\\]!/!g;
if (!ShouldIgnoreFile($pathForwardSlashes))
{
my $slashCount = $pathForwardSlashes =~ tr!/!!;
if ($MaximumDepth < $slashCount)
{
$MaximumDepth = $slashCount;
$PathForMaximumDepth = $pathForwardSlashes;
Output ("Depth $MaximumDepth for |$PathForMaximumDepth|\n");
}
}
}
}
# Ignore file path if it starts with path to a folder to ignore, as
# listed in data/search_directories.txt. Comparisons are done
# in lower case with forward slashes only.
sub ShouldIgnoreFile {
my ($fullPath) = @_; # lc, / only
#$fullPath = lc($fullPath);
#$fullPath =~ s!\\!/!g;
my $result = 0;
for (my $i = 0; $i < @DirectoriesToIgnore; ++$i)
{
if (index($fullPath, $DirectoriesToIgnore[$i]) == 0)
{
$result = 1;
last;
}
}
return($result);
}