dmmiller612 · echoyinke · May 20, 2022 · May 20, 2022 · May 20, 2022
diff --git a/README.md b/README.md
@@ -1,4 +1,7 @@
-# Bert Extractive Summarizer
+
+modify with chinese compatible
+
+# Bert Extractive Summarizer(modify with chinese compatible)
 
 ![Build Status](https://github.com/dmmiller612/bert-extractive-summarizer/actions/workflows/test.yml/badge.svg)
 [![license](https://img.shields.io/github/license/mashape/apistatus.svg?maxAge=2592000)](https://github.com/dmmiller612/bert-extractive-summarizer)

diff --git a/summarizer/text_processors/sentence_handler.py b/summarizer/text_processors/sentence_handler.py
@@ -1,18 +1,19 @@
 from typing import List
 
-from spacy.lang.en import English
+from spacy.lang.zh import Chinese
 from spacy.language import Language
 from summarizer.text_processors.sentence_abc import SentenceABC
 
 
 class SentenceHandler(SentenceABC):
     """Basic Sentence Handler."""
 
-    def __init__(self, language: Language = English):
+    def __init__(self, language: Language = Chinese):
         """
         Base Sentence Handler with Spacy support.
 
-        :param language: Determines the language to use with spacy.
+        :param language: Determines the language to ussummary_processor
+        e with spacy.
         """
         nlp = language()
 

diff --git a/test.py b/test.py
@@ -0,0 +1,23 @@
+# -*- coding: gbk -*-
+from transformers import *
+
+# Load model, model config and tokenizer via Transformers
+custom_config = AutoConfig.from_pretrained('bert-base-chinese')
+custom_config.output_hidden_states=True
+custom_tokenizer = AutoTokenizer.from_pretrained('bert-base-chinese')
+custom_model = AutoModel.from_pretrained('bert-base-chinese', config=custom_config)
+
+from summarizer import Summarizer
+from summarizer.sbert import SBertSummarizer
+
+body = '�й����𱨼��� ��� n���깫ļ��ҵ�ٶ�������ְ�������зǡ���Ӧ�������衢��Ө�����ǻ�������ְ������ԭ�����������Ǽ��Ų�Ʒ�ɱ�Ļ������ӹܣ�Ͷ���߸���ο�������ȥ���������⣬���ǻ��������뿪���Ի���˾������ЩӰ�죬�˲��Ƿ��ܽ����ϣ�Ҳ����ҵ�ڹ�ע�����߲ɷ���һЩҵ����ʿ�������Щ���⣬������ΪͶ���߿��Ը���һ���Ĺ۲��ڣ����������λ���������������Ĵ��㡣�˲������ǻ�����ҵ��������������Ҳ�������Իͣ�����˾��Ҫ��ǿ�˲��ݶӽ��衣 n��ְ�������ǲ�Ʒ�������ܹ�עͶ���߲������۲��ڡ��������� nWind������ʾ������4��17�գ����ڹ���64�һ���˾��92�����������Σ����ȥ��ͬ������23%�� n�������ǻ���������ְ������ע��������֤ȫ�����ԭ���ܾ������зǴ�ǰ������ְ����ԭ����������ȫ���ƵȻ�����л��������Ȼ������ӹܡ���ŷ����ԭ���ǻ�������Ӧ��ж�κ���ŷʱ���ȷ�Ȼ�������ε�ġ���ΰΰ�Ƚ��֡����У�ũ�������������衢��ӯ����ФФ���κ󣬹�˾�����˲�ͬ������������ԭ�����������ǻ����Ʒ�� n�����о�Ժ��Ϊ������Ͷ���߶��ǿ������������µ���������ν��ѡ��������ѡ�ˡ���������ְ�Ļ������У�Ͷ�����޳���5�ꡢͶ�������껯�ر��ʳ���10%�Ļ���������12λ������ְ���������������ǲ�Ʒ�ɹ�˾���������˽��Σ���ҪӰ�����ڻ�����֮���Ͷ������ó�����Ͷ�ʷ�񲻾���ͬ���������ı䶯ֱ��Ӱ���Ż����ҵ�����֣��������ǲ�Ʒ���������������ǿ���������������𣬻���������ְ���ܶԲ���Ͷ���߼����������ǲ�Ʒ��̬�������ɶ����� n�����о�Ժ���飬��������Ȩ��������Ͷ���ߣ�����׷�����λ���������������ʷҵ������ʷ�ڱ���Ͷ�������������ۺϽ��п��������������Լ�һ��ʱ��ġ��۲��ڡ��������������λ����������µĻ����Ʒ�Ƿ��������Ԥ�ڣ��������л���ش��㡣 n�����о��ƣ���������ְ֮�������ߵĲ���˼·�����֮ǰ����������ģ���ʹ������������������Ȼ�ǿƼ������ѷ�񣬵��ǻ�������Ͷ�ʷ���һ�㶼����һ�����졣���Խ���Ͷ�����������Ӹ����Ʒ������򵥵���ҵ���������ơ������Ի���������������Ƿ����꣬������Ϊ���������͹ۡ�����Ͷ���������������ʱ�������������λ�������Ͷ��������������ˮƽ���پ������ꡣ�� nͨ�����ݸ߼������о�Ա��������ʾ����	'
+# model = Summarizer(custom_model=custom_model, custom_tokenizer=custom_tokenizer)
+# a =model(body, min_length =5,ratio=0.1,num_sentences=2)
+# print(a)
+
+
+from summarizer.sbert import SBertSummarizer
+
+model = SBertSummarizer('distiluse-base-multilingual-cased-v1')
+result = model(body, min_length=5, num_sentences=3)
+print(result)