Skip to content

Commit

Permalink
Merge pull request #278 from WorksApplications/feature/warn-pycli-deb…
Browse files Browse the repository at this point in the history
…ug-option

warn on `-d` option of python cli
  • Loading branch information
mh-northlander authored Nov 11, 2024
2 parents c262ad2 + 1cad6c9 commit e759196
Showing 1 changed file with 26 additions and 16 deletions.
42 changes: 26 additions & 16 deletions python/py_src/sudachipy/command_line.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2019 Works Applications Co., Ltd.
# Copyright (c) 2019-2024 Works Applications Co., Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -24,6 +24,13 @@
from . import sudachipy


logging.basicConfig(
style="{",
format='{levelname} {asctime} [{module}:{funcName}:{lineno}] {message}',
datefmt="%m-%d-%Y %H:%M:%S",
)


def _set_default_subparser(self, name, args=None):
"""
copy and modify code from https://bitbucket.org/ruamel/std.argparse
Expand Down Expand Up @@ -51,7 +58,7 @@ def _set_default_subparser(self, name, args=None):
argparse.ArgumentParser.set_default_subparser = _set_default_subparser


def run(tokenizer, input_, output, print_all, morphs, is_stdout):
def run(tokenizer, input_, output, print_all, pos_list, is_stdout):
# get an empty MorphemeList for memory reuse
mlist = tokenizer.tokenize("")
for line in input_:
Expand All @@ -60,7 +67,7 @@ def run(tokenizer, input_, output, print_all, morphs, is_stdout):
for m in tokenizer.tokenize(line, out=mlist):
list_info = [
m.surface(),
morphs[m.part_of_speech_id()],
pos_list[m.part_of_speech_id()],
m.normalized_form()]
if print_all:
list_info += [
Expand Down Expand Up @@ -97,27 +104,27 @@ def _command_tokenize(args, print_usage):
if args.fpath_out:
output = open(args.fpath_out, "w", encoding="utf-8")

stdout_logger = logging.getLogger(__name__)
handler = logging.StreamHandler(sys.stdout)
handler.setLevel(logging.DEBUG)
stdout_logger.addHandler(handler)
stdout_logger.setLevel(logging.DEBUG)
stdout_logger.propagate = False
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)

print_all = args.a
debug = args.d
if debug:
logger.warning("-d option is not implemented in python.")

try:
dict_ = Dictionary(config_path=args.fpath_setting,
dict_type=args.system_dict_type)
# empty matcher - get all POS tags
all_morphs = dict_.pos_matcher([()])
all_pos_matcher = dict_.pos_matcher([()])
# precompute output POS strings
morphs = [",".join(ms) for ms in all_morphs]
pos_list = [",".join(ms) for ms in all_pos_matcher]

tokenizer_obj = dict_.create(mode=args.mode)
input_ = fileinput.input(
args.in_files, openhook=fileinput.hook_encoded("utf-8"))
run(tokenizer_obj, input_, output, print_all, morphs, is_stdout=args.fpath_out is None)
run(tokenizer_obj, input_, output, print_all,
pos_list, is_stdout=args.fpath_out is None)
finally:
if args.fpath_out:
output.close()
Expand All @@ -139,7 +146,8 @@ def _command_build(args, print_usage):

out_file = Path(args.out_file)
if out_file.exists():
print("File", out_file, "already exists, refusing to overwrite it", file=sys.stderr)
print("File", out_file,
"already exists, refusing to overwrite it", file=sys.stderr)
return

description = args.description or ""
Expand All @@ -161,7 +169,8 @@ def _command_build(args, print_usage):
def _command_user_build(args, print_usage):
system = Path(args.system_dic)
if not system.exists():
print("System dictionary file", system, "does not exist", file=sys.stderr)
print("System dictionary file", system,
"does not exist", file=sys.stderr)
return print_usage()

in_files = []
Expand All @@ -174,7 +183,8 @@ def _command_user_build(args, print_usage):

out_file = Path(args.out_file)
if out_file.exists():
print("File", out_file, "already exists, refusing to overwrite it", file=sys.stderr)
print("File", out_file,
"already exists, refusing to overwrite it", file=sys.stderr)
return

description = args.description or ""
Expand Down Expand Up @@ -217,7 +227,7 @@ def main():
parser_tk.add_argument("-a", action="store_true",
help="print all of the fields")
parser_tk.add_argument("-d", action="store_true",
help="print the debug information")
help="print the debug information (not implemented yet)")
parser_tk.add_argument("-v", "--version", action="store_true",
dest="version", help="print sudachipy version")
parser_tk.add_argument("in_files", metavar="file",
Expand Down

0 comments on commit e759196

Please sign in to comment.