diff --git a/CHANGELOG.md b/CHANGELOG.md index 0548c0ee9..1d4c361cd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,7 +6,7 @@ Most recent releases are shown at the top. Each release shows: - **Changed**: Additional parameters, changes to inputs or outputs, etc - **Fixed**: Bug fixes that don't change documented behaviour -## 0.31.3 (TBD) +## 0.31.3 (2022-07-15) ### new: - N/A @@ -15,7 +15,7 @@ Most recent releases are shown at the top. Each release shows: - added `alnum` check and period check to `KeywordExtractor` ### fixed: -- N/A +- fixed bug in `text.qa.core` caused by previous refactoring of `paragraph_tokenize` and `tokenize` ## 0.31.1 (2022-05-17) diff --git a/ktrain/text/textutils.py b/ktrain/text/textutils.py index 992d6d4b2..4b57ecbe6 100644 --- a/ktrain/text/textutils.py +++ b/ktrain/text/textutils.py @@ -438,15 +438,17 @@ def paragraph_tokenize( elif join_sentences and not join_tokens: sents = [item for sublist in sents for item in sublist] paragraphs.append(sents) - paragraphs = paragraphs[0] if len(paragraphs) == 1 else paragraphs + # 20220715: moved to tokenize due to text/qa/core.py usage + # paragraphs = paragraphs[0] if len(paragraphs) == 1 else paragraphs return paragraphs def tokenize(s, join_tokens=False, join_sentences=True, join_char=" "): s = s.replace("\n", " ") - return paragraph_tokenize( + paragraphs = paragraph_tokenize( s, join_tokens=join_tokens, join_sentences=join_sentences, join_char=join_char ) + return paragraphs[0] if len(paragraphs) == 1 else paragraphs def extract_noun_phrases(text):