Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Patch set 53 #659

Merged
merged 25 commits into from
Dec 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
5fc2e2b
Fix deadlock in case of the bottleneck HashJoin with shared scan belo…
KnightMurloc Oct 13, 2023
e971286
ADBDEV-4349: gplogfilter: fix timerange validation (#627)
HustonMmmavr Oct 19, 2023
7256227
arenadata_tookit: replace using pg_partitions(#629)
red1452 Oct 1, 2023
6b28182
arenadata_tooklit: change temp db_files_current to pg_temp.db_files_c…
red1452 Oct 17, 2023
742bf6f
Fix inconsistency between the format of queries_now.dat and the queri…
KnightMurloc Oct 30, 2023
03e575e
Tweak catalog indexing abstraction for upcoming WARM
alvherre Jan 31, 2017
15bc162
Provide CatalogTupleDelete() as a wrapper around simple_heap_delete().
tglsfdc Feb 1, 2017
74f3375
Fix CatalogTupleInsert/Update abstraction for case of shared indstate.
tglsfdc Feb 1, 2017
3386c56
Update comments overlooked by 2f5c9d9c9cec436e55847ec580606d7e88067df6.
robertmhaas Mar 2, 2017
c60885a
Fix inconsistency between gp_fastsequence row and index after crash
huansong Feb 27, 2023
206ab6b
Only write WAL when needed in heap_freeze_tuple_wal_logged()
huansong Apr 12, 2023
1502bef
Fix possible inconsistency between bitmap LOV table and index
huansong Apr 5, 2023
85a7ee5
Change how new aoseg/aocsseg tuples are frozen
huansong Apr 5, 2023
8a40ff6
Remove remainings of "frozen insert"
huansong Apr 5, 2023
8cf6144
Improve performance of find_tabstat_entry()/get_tabstat_entry()
feodor Mar 27, 2017
fc87ae5
Make pgstat tabstat lookup hash table less fragile.
tglsfdc May 15, 2017
38c8f80
Implement vacuum/analyze ordering functions for arenadata_toolkit (#641)
RekGRpth Nov 17, 2023
120aaef
Fix CTEs with volatile target list for case of SegmentGeneral and Gen…
KnightMurloc Nov 24, 2023
f5215ad
Fix ORCA's triggers on debug build (#650)
Nov 28, 2023
32ad64e
Fix 'DROP OWNED BY' failure when some protocol is accessible by the u…
higuoxing Oct 28, 2021
fd5939d
Fix partition selection during DML execution when dropped columns are…
bimboterminator1 Dec 4, 2023
0941fac
Merge remote-tracking branch 'arena/adb-6.x-dev' into 6.26.0-sync2dev
Stolb27 Dec 5, 2023
6f87a84
alter precedence in favor of ipv4 during resolving (#657)
Stolb27 Dec 6, 2023
9d4ae43
Merge branch 'adb-6.x-dev' into 6.26.0-sync2dev
Stolb27 Dec 6, 2023
974ba91
Merge pull request #656 from arenadata/6.26.0-sync2dev
Stolb27 Dec 7, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion arenadata/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@ RUN yum -y install centos-release-scl && \
echo -e 'source /opt/rh/devtoolset-7/enable' >> /opt/gcc_env.sh && \
echo -e '#!/bin/sh' >> /etc/profile.d/jdk_home.sh && \
echo -e 'export JAVA_HOME=/etc/alternatives/java_sdk' >> /etc/profile.d/jdk_home.sh && \
echo -e 'export PATH=$JAVA_HOME/bin:$PATH' >> /etc/profile.d/jdk_home.sh
echo -e 'export PATH=$JAVA_HOME/bin:$PATH' >> /etc/profile.d/jdk_home.sh && \
echo -e 'precedence ::ffff:0:0/96 100' >> /etc/gai.conf

RUN rpm -i $sigar && rpm -i $sigar_headers

Expand Down
14 changes: 7 additions & 7 deletions gpAux/gpperfmon/src/gpmon/gpmon_agg.c
Original file line number Diff line number Diff line change
Expand Up @@ -1304,7 +1304,6 @@ static void fmt_qlog(char* line, const int line_size, qdnode_t* qdnode, const ch
char timfinished[GPMON_DATE_BUF_SIZE];
double cpu_skew = 0.0f;
double row_skew = 0.0f;
int query_hash = 0;
apr_int64_t rowsout = 0;
float cpu_current;
cpu_skew = get_cpu_skew(qdnode);
Expand Down Expand Up @@ -1340,7 +1339,7 @@ static void fmt_qlog(char* line, const int line_size, qdnode_t* qdnode, const ch
snprintf(timfinished, GPMON_DATE_BUF_SIZE, "null");
}

snprintf(line, line_size, "%s|%d|%d|%d|%s|%s|%d|%s|%s|%s|%s|%" FMT64 "|%" FMT64 "|%.4f|%.2f|%.2f|%d",
snprintf(line, line_size, "%s|%d|%d|%d|%s|%s|%d|%s|%s|%s|%s|%" FMT64 "|%" FMT64 "|%.4f|%.2f|%.2f",
nowstr,
qdnode->qlog.key.tmid,
qdnode->qlog.key.ssid,
Expand All @@ -1356,8 +1355,7 @@ static void fmt_qlog(char* line, const int line_size, qdnode_t* qdnode, const ch
qdnode->qlog.cpu_elapsed,
cpu_current,
cpu_skew,
row_skew,
query_hash);
row_skew);
}


Expand All @@ -1377,8 +1375,8 @@ static apr_uint32_t write_qlog(FILE* fp, qdnode_t *qdnode, const char* nowstr, a
}
else
{
/* Query text "joined" by python script */
fprintf(fp, "%s|||||\n", line);
fputs(line, fp);
fputc('\n', fp);
return bytes_written;
}
}
Expand Down Expand Up @@ -1476,7 +1474,9 @@ static apr_uint32_t write_qlog_full(FILE* fp, qdnode_t *qdnode, const char* nows
return 0;
}

fprintf(fp, "%s", line);
/* The query hash column is always 0 */
fprintf(fp, "%s|0", line);
bytes_written += 2;

snprintf(qfname, qfname_size, GPMON_DIR "q%d-%d-%d.txt", qdnode->qlog.key.tmid,
qdnode->qlog.key.ssid, qdnode->qlog.key.ccnt);
Expand Down
6 changes: 5 additions & 1 deletion gpAux/gpperfmon/src/gpmon/gpmon_catqrynow.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,11 @@
# print the line

for line in open(os.path.join(GPMONDIR, "queries_now.dat")):
line = line.split('|')
line = line.strip().split('|')
line.append('0') # query hash
# allocate space for qrytxt, query plan, appname, rsqname and priority
line.extend(['']*5)

(tmid, xid, cid) = line[1:4]
qrytxt = ''
appname = ''
Expand Down
63 changes: 26 additions & 37 deletions gpMgmt/bin/gplogfilter
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,12 @@ if (options.zip is None and
options.zip = '9'

try:
if (begin and end) and begin >= end:
raise IOError(
'Invalid arguments: "begin" date (%s) is >= "end" date (%s)'
% (begin, end)
)

# If no inputfile arg, try MASTER_DATA_DIRECTORY environment variable
if len(args) == 0:
s = os.getenv('MASTER_DATA_DIRECTORY')
Expand Down Expand Up @@ -400,44 +406,27 @@ try:
% (begin or 'beginning of data', end or 'end of data'))
print >> sys.stderr, msg

# Loop over input files
for ifn in args:
"""
Open each file in the logs directory. Check to see if the file name
looks anything like a log file name with a time stamp that we
recognize. If true, and the user specified a time range, skip the
file if it is outside the range. That is, close the file and any
associated temporary files.

All other files with names that do not look like time stamps are
processed. That is, their log information is extracted, and if
the user specified a time range, only those entries that are
within that range are kept.
"""
# transform given array of log names (args array) into corresponding
# array of LogNameInfo structures. Final array is concatenation of
# LogNameInfo arrays with and without timestamps in log names.
# Both arrays are ordered:
# - first part is ordered by the time stamp of log name and after by
# name (in case of time stamp equality)
# - second part is ordered by name and lays out at the end
# All LogNameInfo structures are marked for belonging to the user
# specified time range. Log names without time stamp always should
# be processed, because them may contain log entries inside specified
# range, so LogNameInfo for such names always marked as belonging to
# the range.
logsInfoArr = getLogInfoArrayByNamesOrderedAndMarkedInTSRange(args, begin, end)

for logInfo in logsInfoArr:
if not logInfo.belongsToTimeRangeFilter:
print >> sys.stderr, "SKIP file: %s" % logInfo.name
continue

# Open next input file
fileIn, inputFilesToClose, ifn, zname = openInputFile(ifn, options)

# if we can skip the whole file, let's do so
if zname.startswith('gpdb') and zname.endswith('.csv'):
goodFormat = True
try:
# try format YYYY-MM-DD_HHMMSS
filedate = datetime.strptime(zname[5:-4], '%Y-%m-%d_%H%M%S')
except:
try:
# try format YYYY-MM-DD
filedate = datetime.strptime(zname[5:-4], '%Y-%m-%d')
except:
# the format isn't anything I understand
goodFormat = False

if goodFormat and begin and filedate < begin:
if end and filedate > end:
print >> sys.stderr, "SKIP file: %s" % zname
for f in inputFilesToClose:
f.close()
inputFilesToClose = []
continue
fileIn, inputFilesToClose, ifn, zname = openInputFile(logInfo.name, options)

# Announce each input file *before* its output file if --out is dir
if options.verbose and outputFilePerInputFile:
Expand Down
121 changes: 121 additions & 0 deletions gpMgmt/bin/gppylib/logfilter.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,14 @@
# A timezone specifier may follow the timestamp, but we ignore that.


# This pattern matches the date and time stamp at the log file name of a
# GPDB log file. The timestamp format is: YYYY-MM-DD_HHMMSS or the
# YYYY-MM-DD (to preserve an old behaviour).
logNameTSPattern = re.compile(
'^.*gpdb-(?P<datetime>\d+-\d+-\d+(?P<time>_\d+)?)\.csv$'
)


def FilterLogEntries(iterable,
msgfile=sys.stderr,
verbose=False,
Expand Down Expand Up @@ -981,3 +989,116 @@ def spiffInterval(begin=None, end=None, duration=None):

return begin, end

class LogNameInfo(object):
"""
Object to store main information about log file name:
- name of log file
- parsed time stamp from name, if exists
- belonging to user specified time range
"""
def __init__(self, name, dateTime=None, belongsToTimeRangeFilter=True):
self.name = name
self.dateTime = dateTime
self.belongsToTimeRangeFilter = belongsToTimeRangeFilter

def __eq__(self, other):
if type(self) != type(other):
raise TypeError('comparing different types: %s and %s' % (type(self), type(other)))

return (self.name == other.name and self.dateTime == other.dateTime
and self.belongsToTimeRangeFilter == other.belongsToTimeRangeFilter)

def _parseLogFileName(name):
"""
Parses log file into LogNameInfo. The log name may contain the time
stamp, when it was created, so if log name matches `logNameTSPattern`
the result LogNameInfo would contain this time stamp, but there may be
other log files that doesn't match such format (for ex. startup.log) in
such case result won't contain time stamp.
"""

# matchedGroup in case of match would be not None and contains next
# groups:
# - group(0) - whole file name
# - group('datetime') - timestamp of log (may be without time suffix)
# - group('time') - not None if time suffix exists
matchedGroup = logNameTSPattern.match(name)

if not matchedGroup:
return LogNameInfo(name)

pattern = '%Y-%m-%d'
if matchedGroup.group('time'):
# we have time preix, so use extended pattern
pattern = '%Y-%m-%d_%H%M%S'

dt = None
try:
dt = datetime.strptime(matchedGroup.group('datetime'), pattern)
except:
pass

return LogNameInfo(name, dt)

def _getOrderedLogNameInfoArrByNameTS(fileNames):
"""
By the given array of log names this function returns ordered LogNameInfo
array. The returned array is a concatenation of two LogNameInfo arrays
with time stamp in log name and without. Both arrays are ordered:
- first part is ordered by the time stamp of log name and after by the log
name. Ordering by name is used for test purposes, because under the normal
conditions log directory shouldn't contain different log files with the same
time stamp, but with different string representation of time stamp (for ex.
gpdb-2023-1-1_000000.csv and gpdb-2023-01-01_000000.csv, the last variant is
a correct representation of log name with time stamp).
- the second part is ordered only by name.
"""

withTS = []
withoutTS = []
for name in fileNames:
info = _parseLogFileName(name)
if info.dateTime:
withTS.append(info)
else:
withoutTS.append(info)

withTS.sort(key = lambda x: (x.dateTime, x.name))
withoutTS.sort(key = lambda x: x.name)
return withTS + withoutTS


def getLogInfoArrayByNamesOrderedAndMarkedInTSRange(logNames, begin, end):
"""
By the given array of log names and time range filters `begin` (inclusive)
and `end` (exclusive) this function returns the ordered array of LogNameInfo,
where each LogNameInfo is marked for belonging to the given range.
Array is ordered by the log time stamp (and in case of equaltiy by the name)
log file names without time stamp is ordered by name and lays out at the end
of array.
"""

orderedInfoArr = _getOrderedLogNameInfoArrByNameTS(logNames)

# we should find the nearest date before (or equal) the user-specified
# `begin` inside dates from log file names, because of each file with time
# stamp contain log entries with time stamp, matching next range:
# time stamp of log name <= time stamp of log entry AND
# time stamp of log entry < (log name time stamp + GUC:`log_rotation_age`) or restart time
beginDateRounded = begin
if beginDateRounded:
for info in orderedInfoArr:
if info.dateTime is None or begin < info.dateTime:
break
beginDateRounded = info.dateTime

for info in orderedInfoArr:
dt = info.dateTime
if dt is None:
# file names without time stamp - no need to process
break

if (beginDateRounded and dt < beginDateRounded) or (end and end <= dt):
info.belongsToTimeRangeFilter = False

return orderedInfoArr
Loading
Loading