diff --git a/python/py_src/sudachipy/command_line.py b/python/py_src/sudachipy/command_line.py index 07f59c19..e7574bf1 100644 --- a/python/py_src/sudachipy/command_line.py +++ b/python/py_src/sudachipy/command_line.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019 Works Applications Co., Ltd. +# Copyright (c) 2019-2024 Works Applications Co., Ltd. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -24,6 +24,13 @@ from . import sudachipy +logging.basicConfig( + style="{", + format='{levelname} {asctime} [{module}:{funcName}:{lineno}] {message}', + datefmt="%m-%d-%Y %H:%M:%S", +) + + def _set_default_subparser(self, name, args=None): """ copy and modify code from https://bitbucket.org/ruamel/std.argparse @@ -51,7 +58,7 @@ def _set_default_subparser(self, name, args=None): argparse.ArgumentParser.set_default_subparser = _set_default_subparser -def run(tokenizer, input_, output, print_all, morphs, is_stdout): +def run(tokenizer, input_, output, print_all, pos_list, is_stdout): # get an empty MorphemeList for memory reuse mlist = tokenizer.tokenize("") for line in input_: @@ -60,7 +67,7 @@ def run(tokenizer, input_, output, print_all, morphs, is_stdout): for m in tokenizer.tokenize(line, out=mlist): list_info = [ m.surface(), - morphs[m.part_of_speech_id()], + pos_list[m.part_of_speech_id()], m.normalized_form()] if print_all: list_info += [ @@ -97,27 +104,27 @@ def _command_tokenize(args, print_usage): if args.fpath_out: output = open(args.fpath_out, "w", encoding="utf-8") - stdout_logger = logging.getLogger(__name__) - handler = logging.StreamHandler(sys.stdout) - handler.setLevel(logging.DEBUG) - stdout_logger.addHandler(handler) - stdout_logger.setLevel(logging.DEBUG) - stdout_logger.propagate = False + logger = logging.getLogger(__name__) + logger.setLevel(logging.DEBUG) print_all = args.a + debug = args.d + if debug: + logger.warning("-d option is not implemented in python.") try: dict_ = Dictionary(config_path=args.fpath_setting, dict_type=args.system_dict_type) # empty matcher - get all POS tags - all_morphs = dict_.pos_matcher([()]) + all_pos_matcher = dict_.pos_matcher([()]) # precompute output POS strings - morphs = [",".join(ms) for ms in all_morphs] + pos_list = [",".join(ms) for ms in all_pos_matcher] tokenizer_obj = dict_.create(mode=args.mode) input_ = fileinput.input( args.in_files, openhook=fileinput.hook_encoded("utf-8")) - run(tokenizer_obj, input_, output, print_all, morphs, is_stdout=args.fpath_out is None) + run(tokenizer_obj, input_, output, print_all, + pos_list, is_stdout=args.fpath_out is None) finally: if args.fpath_out: output.close() @@ -139,7 +146,8 @@ def _command_build(args, print_usage): out_file = Path(args.out_file) if out_file.exists(): - print("File", out_file, "already exists, refusing to overwrite it", file=sys.stderr) + print("File", out_file, + "already exists, refusing to overwrite it", file=sys.stderr) return description = args.description or "" @@ -161,7 +169,8 @@ def _command_build(args, print_usage): def _command_user_build(args, print_usage): system = Path(args.system_dic) if not system.exists(): - print("System dictionary file", system, "does not exist", file=sys.stderr) + print("System dictionary file", system, + "does not exist", file=sys.stderr) return print_usage() in_files = [] @@ -174,7 +183,8 @@ def _command_user_build(args, print_usage): out_file = Path(args.out_file) if out_file.exists(): - print("File", out_file, "already exists, refusing to overwrite it", file=sys.stderr) + print("File", out_file, + "already exists, refusing to overwrite it", file=sys.stderr) return description = args.description or "" @@ -217,7 +227,7 @@ def main(): parser_tk.add_argument("-a", action="store_true", help="print all of the fields") parser_tk.add_argument("-d", action="store_true", - help="print the debug information") + help="print the debug information (not implemented yet)") parser_tk.add_argument("-v", "--version", action="store_true", dest="version", help="print sudachipy version") parser_tk.add_argument("in_files", metavar="file",