Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added long running commands runtime to log files. #5260

Draft
wants to merge 13 commits into
base: master
Choose a base branch
from
Draft
2 changes: 2 additions & 0 deletions src/drt/src/TritonRoute.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
#include "sta/StaMain.hh"
#include "stt/SteinerTreeBuilder.h"
#include "ta/FlexTA.h"
#include "utl/timer.h"

namespace sta {
// Tcl files encoded into strings.
Expand Down Expand Up @@ -917,6 +918,7 @@ void TritonRoute::sendDesignUpdates(const std::string& globals_path)

int TritonRoute::main()
{
utl::ScopedStatistics stat(logger_, "detailed_route");
if (DBPROCESSNODE == "GF14_13M_3Mx_2Cx_4Kx_2Hx_2Gx_LB") {
USENONPREFTRACKS = false;
}
Expand Down
2 changes: 2 additions & 0 deletions src/gpl/src/nesterovPlace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
#include "routeBase.h"
#include "timingBase.h"
#include "utl/Logger.h"
#include "utl/timer.h"

namespace gpl {
using utl::GPL;
Expand All @@ -62,6 +63,7 @@ NesterovPlace::NesterovPlace(const NesterovPlaceVars& npVars,
utl::Logger* log)
: NesterovPlace()
{
utl::ScopedStatistics stat(log, "global_place");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should go in Replace::doNesterovPlace to capture initial placement as well.

npVars_ = npVars;
pbc_ = pbc;
nbc_ = nbc;
Expand Down
2 changes: 2 additions & 0 deletions src/grt/src/GlobalRouter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@
#include "stt/SteinerTreeBuilder.h"
#include "utl/Logger.h"
#include "utl/algorithms.h"
#include "utl/timer.h"

namespace grt {

Expand Down Expand Up @@ -248,6 +249,7 @@ void GlobalRouter::globalRoute(bool save_guides,
bool start_incremental,
bool end_incremental)
{
utl::ScopedStatistics stat(logger_, "global_route");
if (start_incremental && end_incremental) {
logger_->error(GRT,
251,
Expand Down
2 changes: 2 additions & 0 deletions src/mpl2/src/rtl_mp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
#include "hier_rtlmp.h"
#include "object.h"
#include "odb/db.h"
#include "utl/timer.h"

namespace mpl2 {
using odb::dbDatabase;
Expand Down Expand Up @@ -94,6 +95,7 @@ bool MacroPlacer2::place(const int num_threads,
const bool bus_planning_on,
const char* report_directory)
{
utl::ScopedStatistics stat(logger_, "rtl_macro_placer");
hier_rtlmp_->setClusterSize(
max_num_macro, min_num_macro, max_num_inst, min_num_inst);
hier_rtlmp_->setClusterSizeTolerance(tolerance);
Expand Down
2 changes: 2 additions & 0 deletions src/rsz/src/RepairDesign.cc
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
#include "sta/Search.hh"
#include "sta/SearchPred.hh"
#include "sta/Units.hh"
#include "utl/timer.h"

namespace rsz {

Expand Down Expand Up @@ -93,6 +94,7 @@ void RepairDesign::repairDesign(double max_wire_length,
bool verbose)
{
init();
utl::ScopedStatistics stat(logger_, "repair_design");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Move to Resizer::repairDesign. This API might be called internally.

int repaired_net_count, slew_violations, cap_violations;
int fanout_violations, length_violations;
repairDesign(max_wire_length,
Expand Down
3 changes: 3 additions & 0 deletions src/rsz/src/RepairHold.cc
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
#include "sta/TimingArc.hh"
#include "sta/Units.hh"
#include "utl/Logger.h"
#include "utl/timer.h"

namespace rsz {

Expand Down Expand Up @@ -93,6 +94,7 @@ void RepairHold::repairHold(
const bool verbose)
{
init();
utl::ScopedStatistics stat(logger_, "repair_timing");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Move to Resizer::repairHold

sta_->checkSlewLimitPreamble();
sta_->checkCapacitanceLimitPreamble();
LibertyCell* buffer_cell = findHoldBuffer();
Expand Down Expand Up @@ -132,6 +134,7 @@ void RepairHold::repairHold(const Pin* end_pin,
const int max_passes)
{
init();
utl::ScopedStatistics stat(logger_, "repair_timing");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Move to Resizer::repairHold

sta_->checkSlewLimitPreamble();
sta_->checkCapacitanceLimitPreamble();
LibertyCell* buffer_cell = findHoldBuffer();
Expand Down
21 changes: 21 additions & 0 deletions src/utl/include/utl/timer.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@

#pragma once

#include <unistd.h>

#include <chrono>
#include <iostream>

Expand Down Expand Up @@ -78,4 +80,23 @@ class DebugScopedTimer : public Timer

std::ostream& operator<<(std::ostream& os, const Timer& t);

class ScopedStatistics : public Timer
{
public:
ScopedStatistics(utl::Logger* logger, std::string msg);
~ScopedStatistics() override;

private:
size_t getMemoryUsage(const char* tag);
size_t getStartRSZ();
size_t getPeakRSZ();
size_t getStartVSZ();
size_t getPeakVSZ();
maliberty marked this conversation as resolved.
Show resolved Hide resolved

std::string msg_;
size_t start_rsz_;
size_t start_vsz_;
utl::Logger* logger_;
};

} // namespace utl
71 changes: 71 additions & 0 deletions src/utl/src/timer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,13 @@
//
///////////////////////////////////////////////////////////////////////////////

#define _GNU_SOURCE
annapetrosyan26 marked this conversation as resolved.
Show resolved Hide resolved
annapetrosyan26 marked this conversation as resolved.
Show resolved Hide resolved
#include "utl/timer.h"

#include <sys/resource.h>

#include <fstream>

namespace utl {

void Timer::reset()
Expand Down Expand Up @@ -74,4 +79,70 @@ DebugScopedTimer::~DebugScopedTimer()
debugPrint(logger_, tool_, group_.c_str(), level_, msg_, *this);
}

ScopedStatistics::ScopedStatistics(utl::Logger* logger, std::string msg)
: Timer(),
msg_(msg),
annapetrosyan26 marked this conversation as resolved.
Show resolved Hide resolved
start_rsz_(getStartRSZ()),
start_vsz_(getStartVSZ()),
logger_(logger)
{
}

size_t ScopedStatistics::getMemoryUsage(const char* tag)
{
FILE* file = fopen("/proc/self/status", "r");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

procfs doesn't exist on OSX unfortunately. src/drt/src/db/infra/frTime_helper.cpp attempts to handle OSX

if (file == NULL) {
annapetrosyan26 marked this conversation as resolved.
Show resolved Hide resolved
perror("Failed to open /proc/self/status");
exit(EXIT_FAILURE);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is too harsh a form of error handling. Issuing a logger warning is sufficient.

}

size_t result = (size_t) -1;
char line[128];
size_t tagLength = strlen(tag);

while (fgets(line, sizeof(line), file) != NULL) {
annapetrosyan26 marked this conversation as resolved.
Show resolved Hide resolved
if (strncmp(line, tag, tagLength) == 0) {
const char* p = line;
while (*p < '0' || *p > '9')
p++;
annapetrosyan26 marked this conversation as resolved.
Show resolved Hide resolved
result = (size_t) atoi(p);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if this input was human made, I would recommend more error handling, like checking that the number is a valid decimal integer and not something like 00031243, but this is well defined machine generated input, an API to the OS, so I think this is fine as it is.

break;
}
}

fclose(file);
if (result == (size_t) -1) {
fprintf(stderr, "%s not found in /proc/self/status\n", tag);
exit(EXIT_FAILURE);
}

return result / 1024;
}

size_t ScopedStatistics::getStartVSZ()
{
return getMemoryUsage("VmSize:");
}

size_t ScopedStatistics::getPeakVSZ()
{
return getMemoryUsage("VmPeak:");
}

size_t ScopedStatistics::getStartRSZ()
{
return getMemoryUsage("VmRSS:");
}

size_t ScopedStatistics::getPeakRSZ()
{
return getMemoryUsage("VmHWM:");
}
annapetrosyan26 marked this conversation as resolved.
Show resolved Hide resolved

ScopedStatistics::~ScopedStatistics()
{
logger_->report(msg_ + ": runtime {} seconds, usage: rsz = {} MB, vsz = {} MB, peak: rsz = {} MB, vsz = {} MB", static_cast<int>(Timer::elapsed()), (getPeakRSZ() - start_rsz_),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

runtime is unclear as it could be elapsed or cpu time. I think using elapsed is clearer.

Personally I think peak is sufficient (@oharboe any opinion)?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we don't know the starting memory consumption, we can miss false positives: the problem is not with the algorithm that is reported as too high usage, but something in the setup.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doesn't tell you the starting memory - only the end - start

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

correct. pick a way to present two numbers....

start memory is trivially calculated from peak and usagr, true for any cpmbination of these two numbers. most time usage and peak is what is of interested

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

usage and peak do not give start. The peak may well be in the middle not at the end.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Which is what peak should say and why I don't think usage matters much.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed. Keep it simple. If there is something wrong(large initial memory allocation outside of scope), then it can be debugged otherwise. I think normal operation, determining required RAM, is more important and other information clouds that...

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so let's go with the DRT format above (with prefix)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From the conversation below it's not clear what the final message should be. Please confirm if it should look like the DRT message you provided.
[INFO DRT-0267] cpu time = 00:00:04, elapsed time = 00:00:01, memory = 2197.27 (MB), peak = 4031.35 (MB)
Otherwise please provide the exact way you would like the message to be.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was thinking the prefix would be about what is being timed, eg

repair_design: cpu time = 00:00:04, elapsed time = 00:00:01, memory = 2197.27 (MB), peak = 4031.35 (MB)

Though the prefix would be up to the client code.

(getPeakVSZ() - start_vsz_), (getPeakRSZ()), (getPeakVSZ()));
}

} // namespace utl
1 change: 1 addition & 0 deletions src/utl/test/regression_tests.tcl
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ record_tests {
test_error
test_suppress_message
test_metrics
test_statistics
#utl_man_tcl_check
#utl_readme_msgs_check
#test_error_exception
Expand Down
1 change: 1 addition & 0 deletions src/utl/test/test_statistics.ok
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Passed: Statistics are logged.
42 changes: 42 additions & 0 deletions src/utl/test/test_statistics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import os
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hook up to CI?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't using pathlib or os a matter of taste ?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What I mean with "CI" is continuous integration, I wanted to know if this test is hooked up to the tests run on pull requests in github.

Regarding os vs pathlib, I think pathlib is more recent and a bit more elegant(but a little bit more complex than os) in the Python world but that os is more familiar(I know it better). I have no strong opinion on either for such simple scripts as these. It depends on what my colleagues prefer.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test is included in the regression, so it's in CI.

import subprocess
import re

def run_regression_script(regression_script_path, argument):
try:
subprocess.run([regression_script_path, argument], check=True)
except subprocess.CalledProcessError as e:
print(f"Error running regression script: {e}")
return False
return True

def check_log_file(log_path):
pattern = re.compile(r"repair_design: runtime.*seconds.*usage.*rsz.*MB.*vsz.*MB.*peak.*rsz.*vsz.*")
runtime_count = 0

with open(log_path, 'r') as log_file:
for line in log_file:
if pattern.search(line):
runtime_count += 1
if runtime_count == 2:
print("Passed: Statistics are logged.")
exit(0)
else:
print("Statistics are not logged enough times.")
exit(1)

def main():
script_dir = os.path.dirname(os.path.abspath(__file__))
base_dir = os.path.abspath(os.path.join(script_dir, "../../rsz/test"))
log_path = os.path.join(base_dir, "results", "repair_design4-tcl.log")
regression_script_path = os.path.join(base_dir, "regression")

if not os.path.exists(log_path):
if not run_regression_script(regression_script_path, "repair_design4"):
exit(1)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is a regression test calling regression again? This will create a conflict with the results of running repair_design4 (especially if run in parallel with this test). It is better to make this test self-contained.


check_log_file(log_path)

if __name__ == "__main__":
main()

46 changes: 37 additions & 9 deletions test/regression.tcl
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,40 @@ proc run_test { test } {
}
}

proc log_comparer {test log_file ok_file diff_file diff_options error_msg} {
global result_dir errors

# Filter dos '/r's from log file.
set tmp_file [file join $result_dir $test.tmp]
exec tr -d "\r" < $log_file > $tmp_file
file rename -force $tmp_file $log_file

# Create a temporary filtered log file that ignores lines with the specific pattern
set tmp_log_file [file join $result_dir "${test}_log.tmp"]

set sed_result [catch \
{exec sed {/.*runtime.*seconds.*usage.*rsz.*MB.*vsz.*MB.*peak.*rsz.*vsz.*/d} \
$log_file > $tmp_log_file} sed_error]
if {$sed_result != 0} {
puts "Error running sed"
puts "Error message: $sed_error"
incr errors(fail)
return 1
} else {
# Proceed with the diff operation
set result [catch {exec diff $diff_options $ok_file $tmp_log_file >> $diff_file}]
if {$result != 0} {
puts " *FAIL log_and_ok*$error_msg"
return 1
} else {
puts " pass$error_msg"
return 0
}
}
# Clean up the temporary filtered log file.
file delete -force $tmp_log_file
}

proc run_test_lang { test lang } {
global result_dir diff_file errors diff_options

Expand Down Expand Up @@ -226,17 +260,11 @@ proc run_test_lang { test lang } {
switch [test_pass_criteria $test] {
compare_logfile {
if { [file exists $ok_file] } {
# Filter dos '/r's from log file.
set tmp_file [file join $result_dir $test.tmp]
exec tr -d "\r" < $log_file > $tmp_file
file rename -force $tmp_file $log_file
if [catch [concat exec diff $diff_options $ok_file $log_file \
>> $diff_file]] {
puts " *FAIL*$error_msg"
set compare_result [log_comparer $test $log_file $ok_file \
$diff_file $diff_options $error_msg]
if {$compare_result != 0} {
append_failure $test
incr errors(fail)
} else {
puts " pass$error_msg"
}
} else {
puts " *NO OK FILE*$error_msg"
Expand Down
Loading