From 5d652b4424b71ba95f771219a73fd74150be896c Mon Sep 17 00:00:00 2001
From: BLKSerene <blkserene@gmail.com>
Date: Mon, 17 Jun 2024 03:12:53 +0800
Subject: [PATCH] Doc: Update README

---
 .github/workflows/tests.yml                   |  2 +-
 README.md                                     |  2 +-
 WORKS_USING_WORDLESS.md                       | 49 +++++++++++++------
 azure-pipelines.yml                           |  2 +-
 doc/trs/zho_cn/README.md                      | 14 +++---
 doc/trs/zho_cn/WORKS_USING_WORDLESS.md        | 33 ++++++++++---
 doc/trs/zho_tw/README.md                      | 12 ++---
 doc/trs/zho_tw/WORKS_USING_WORDLESS.md        | 33 ++++++++++---
 requirements/requirements_tests.txt           |  7 +--
 .../test_file_area_file_types.py              |  2 +-
 10 files changed, 104 insertions(+), 52 deletions(-)
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 6ad3279bb..24941f243 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -64,7 +64,7 @@ jobs:
 
   # macOS
   build-macos:
-    runs-on: macos-11
+    runs-on: macos-12
 
     steps:
       - uses: actions/checkout@v4
diff --git a/README.md b/README.md
index 6afc132be..794254b58 100644
--- a/README.md
+++ b/README.md
@@ -92,7 +92,7 @@ Release|Remarks
 > 
 > **Note 3:** While opening corpora in languages other than English in *Wordless*, extra model files might need to be downloaded from the internet. If you encounter a **Network Error** dialog while downloading the model, chances are that it's a literal network error, so you just need to check your internet connections following the instructions in the error message and try downloading the model once more.
 > 
-> Users in China, where connections to Github and Hugging Face Hub are unstable, are recommended to use a proxy and set properly the settings in **Menu - Preferences - Settings - General - Proxy Settings**. Alternatively, Chinese users can choose to manually download model files from [Baidu Netdisk](https://pan.baidu.com/s/1--ZzABrDQBZlZagWlVQMbg?pwd=wdls#list/path=%2FWordless%2Fmodels&parentPath=%2F). The steps of installing models are as follows:
+> Users in China, where connections to Github and Hugging Face Hub are unstable, are recommended to use a proxy and set properly the settings in **Menu Bar → Preferences → Settings → General → Proxy Settings**. Alternatively, Chinese users can choose to manually download model files from [Baidu Netdisk](https://pan.baidu.com/s/1--ZzABrDQBZlZagWlVQMbg?pwd=wdls#list/path=%2FWordless%2Fmodels&parentPath=%2F). The steps of installing models are as follows:
 > 
 > 1. Check the error message displayed in the **Network Error** dialog. If **stanza** is found in the error message, you need a *Stanza* model, otherwise you need a *spaCy* model.
 > 2. Download model files for the language of your corpus from the above link and **extract all files**.
diff --git a/WORKS_USING_WORDLESS.md b/WORKS_USING_WORDLESS.md
index fc4f6ea90..a7b7f3945 100644
--- a/WORKS_USING_WORDLESS.md
+++ b/WORKS_USING_WORDLESS.md
@@ -21,32 +21,49 @@
 The following lists are not intended to be exhaustive and are just for reference. If you are aware of other works using Wordless that are not listed below, please contact the author for them to be added here.<br>
 
 <div align="center">
-    <a href="https://github.com/BLKSerene/Wordless/blob/main/WORKS_USING_WORDLESS.md#journal-articles">Journal Articles</a> | <a href="https://github.com/BLKSerene/Wordless/blob/main/WORKS_USING_WORDLESS.md#masters-theses">Master's Theses</a> | <a href="https://github.com/BLKSerene/Wordless/blob/main/WORKS_USING_WORDLESS.md#doctoral-dissertations">Doctoral Dissertations</a>
+    <a href="https://github.com/BLKSerene/Wordless/blob/main/WORKS_USING_WORDLESS.md#journal-articles">Journal Articles</a> | <a href="https://github.com/BLKSerene/Wordless/blob/main/WORKS_USING_WORDLESS.md#masters-theses">Master's Theses</a> | <a href="https://github.com/BLKSerene/Wordless/blob/main/WORKS_USING_WORDLESS.md#doctoral-dissertations">Doctoral Dissertations</a> | <a href="https://github.com/BLKSerene/Wordless/blob/main/WORKS_USING_WORDLESS.md#conference-papers">Conference Papers</a>
 </div>
 
 <div align="center"><h2>Journal Articles</h2></div>
 
-1. Dong, H. (2019). Jiyu yuliaoku de Xi Jinping tan zhiguo lizheng ying yiben zhong gaopinci de yingyong tezheng fenxi. *Yangtze River Series*, *2019*(10), 104–105.
-1. Gao, L, & Wang, J. (2023). Jiyu yuliaoku de yiben fengge yanjiu: yi Ku Chenxiong qifeng le de xinli miaoxie fanyi wei zhongxin. *Dongfang wenhua zhoukan*, *2023*(34), 1–3.
-1. Li, G., Wang, H., & Wang, J. (2023). Jiyu yuliaoku de “hongxing zhaoyao zhongguo” yizhe fengge yanjiu: yuyan yu fei yuyan biaozheng kaocha. *Cultural Journal*, *2023*(8), 179–183.
-1. Lv, R., & Ma, W.-L. (2020). Wenxin Diaolong san ge ying yiben ke jieshou du duibi yanjiu [Comparative study on acceptability of three English versions of the Literary Mind and the Carving of Dragons]. *Journal of Hubei University of Education*, *37*(3), 17–22.
-1. Shao, K. (2022). Yiwu jiaoyu tongbian yuwen jiaokeshu shouci fenci: jiyu yu Yiwu Jiaoyu Changyong Cibiao (Caoan) de bijiao [An analysis of the vocabulary in Chinese language textbooks for compulsory education: Based on the comparison with Yiwu Jiaoyu Changyong Cibiao (Caoan)]. *Modern Chinese*, *2022*(12), 67–75.
-1. Shi, H. (2022). Yuliaoku yuyanxue shijiao xia de shudao nan yingyi yanjiu [A C-E translation study of the Shu Way Is Hard from the perspective of corpus linguistics]. *Modern Linguistics*, *10*(6), 1347–1355. https://doi.org/10.12677/ml.2022.106180
-1. Song, Z., Li, L., & Liang, Y. (2021). “Goushi qudong” duxie jiaoxue moshi xia de goushi xide yanjiu [Experimental research on construction acquisition in the construction-driven reading and writing teaching mode]. *Journal of Chengdu Normal University*, *37*(3), 51–58.
-1. Tao, Y. (2021). Jiyu emei yuliaoku de xinguan feiyan zhutici yu yiqing bianqian yanjiu [Corpus-based study on the changes of key words and epidemic situations of COVID-19 in Russian media]. *Journal of Nanchang Hangkong University: Social Sciences*, *23*(1), 114–124.
+1. Chang, Y. (2020). A corpus-based comparative study of translation universals in two English translations of Li Sao. *Journal of Literature and Art Studies*, *10*(10), 916–922. https://doi.org/*10.17265/2159-5836/2020.10.007
+1. Li, G., Wang, H., & Wang, J. (2023). Jiyu yuliaoku de “Hongxing Zhaoyao Zhongguo” yizhe fengge yanjiu: yuyan yu fei yuyan biaozheng kaocha. *Wenhua Xuekan*, *2023*(8), 179–183.
+1. Lv, R., & Ma, W. (2020). “Wenxindiaolong” san ge yingyiben kejieshoudu duibi yanjiu [Comparative study on acceptability of three English versions of the Literary Mind and the Carving of Dragons]. *Hubei Dier Shifan Xueyuan Xuebao*, *37*(3), 17–22.
+1. Shao, K. (2022). Yiwu jiaoyu tongbian yuwen jiaokeshu shouci fenci: jiyu yu “Yiwu Jiaoyu Changyong Cibiao (Caoan)” de bijiao [An analysis of the vocabulary in Chinese language textbooks for compulsory education: Based on the comparison with Yiwu Jiaoyu Changyong Cibiao (Caoan)]. *Xiandai Yuwen*, *2022*(12), 67–75.
+1. Shi, H. (2022). Yuliaoku yuyanxue shijiao xia de “Shudao Nan” yingyi yanjiu [A C-E translation study of the Shu Way Is Hard from the perspective of corpus linguistics]. *Xiandai Yuyanxue*, *10*(6), 1347–1355. https://doi.org/10.12677/ml.2022.106180
+1. Song, Z., Li, L., & Liang, Y. (2021). “Goushi qudong” duxie jiaoxue moshi xia de goushi xide yanjiu [Experimental research on construction acquisition in the construction-driven reading and writing teaching mode]. *Chengdu Shifan Xueyuan Xuebao*, *37*(3), 51–58.
+1. Sun, Y., Kong, D., & Zhou, C. (2023). Economy or ecology: Metaphor use over time in China’s government work reports. *Language and Cognition*, *15*(3), 551–573. https://doi.org/10.1017/langcog.2023.18
+1. Tao, Y. (2021). Jiyu Emei yuliaoku de xinguan feiyan zhutici yu yiqing bianqian yanjiu [Corpus-based study on the changes of key words and epidemic situations of COVID-19 in Russian media]. *Nanchang Hangkong Daxue Xuebao: Shehui Kexue*, *23*(1), 114–124.
+1. Wang, L., & Zhu, Y. (2024). Shuzi renwen shiyu xia ershisishi lunzan yinyu de yanjuxing yanjiu [The evidentiality of the quotation in the argument and praise discourse of the twenty-four histories from the perspective of digital humanities]. *Shuzi Renwen*, *2024*(1).
+1. Wang, S. (2023). Cómo fomentar la incorporación de agendas políticas feministas en el discurso político: Estrategias para la construcción del espacio discursivo en respuesta a las crisis de género [How to promote feminist political agendas in political discourse: Strategies for constructing a discursive space in addressing gender crises]. *Comunicación y Género*, *6*(2), 147–157. https://doi.org/10.5209/cgen.91079
+1. Xiong, L. (2023). Audio description for educational videos on COVID-19 response: A corpus-based study on linguistic and textual idiosyncrasies. *Journal of Literature and Art Studies*, *13*(4), 276–285. https://doi.org/10.17265/2159-5836/2023.04.008
+1. Xu, B., & Tao, Y. (2023). National identity in media discourses from Russia and Ukraine: Amid the 2022 Russo-Ukranian War. *Zeitschrift für Slawistik*, *68*(3), 419–439. https://doi.org/10.1515/slaw-2023-0021
+1. Yang, Y., & Yang, K. (2022). Оценка в китайском и русском официальных политических дискурсах в эпоху пандемии COVID-19 (на материале выступлений глав КНР и РФ на Всемирном экономическом форуме 2021 г.) [Evaluation in Chinese and Russian Official Political Discourses in the Era of the COVID-19 Pandemic (Based on the Speeches of the Leaders of the PRC and the Russian Federation at the World Economic Forum 2021)]. *Политическая Лингвистика*, *2022*(1), 135–142. https://doi.org/10.26170/1999-2629_2022_01_15
+1. Yi, W., & DeKeyser, R. (2022). Incidental learning of semantically transparent and opaque Chinese compounds from reading: An eye-tracking approach. *System*, *2022*(107). https://doi.org/10.1016/j.system.2022.102825
 1. Yih, T., & Liu, H. (2023). The meaning distributions on different levels of granularity. *Glottometrics*, *54*, 13–38. https://doi.org/10.53482/2023_54_405
-1. Zhang, W. (2022). Jiyu yuliaoku de mimafa yingyu yanjiu [A corpus-based study of English translation on cryptography law of the People's Republic of China]. *Journal of Beijing Electronic Science and Technology Institute*, *30*(3), 152–160.
-1. Zhao, Y. (2022). Jiyu yuliaoku de Fu Lei fanyi fengge xin tan: yuyan yu qinggan de ronghe [A corpus-based study on Fu Lei's translation style: The fusion of language and emotion]. *Technology Enhanced Foreign Language Education*, *2022*(2), 96–103.
+1. Zhang, W. (2022). Jiyu yuliaoku de “Mima Fa” yingyi yanjiu [A corpus-based study of English translation on cryptography law of the People's Republic of China]. *Beijing Dianzi Keji Xueyuan Xuebao*, *30*(3), 152–160.
+1. Zhao, Y. (2022). Jiyu yuliaoku de Fu Lei fanyi fengge xintan: Yuyan yu qinggan de ronghe [A corpus-based study on Fu Lei's translation style: The fusion of language and emotion]. *Waiyu Dianhua Jiaoxue*, *2022*(2), 96–103.
+1. Zhou, Y., Jiang, J., & Liu, H. (2024). Modifying language for a higher goal: Investigating quantitative features of Apple’s launch event speech from 2016 to 2022. *Journal of Quantitative Linguistics*, *31*(2), 139–160. https://doi.org/10.1080/09296174.2024.2345969
 
 <div align="center"><h2>Master's Theses</h2></div>
 
-1. Kang, Z. (2023). *Investigating the translator's style: A corpus-based analysis of Howard Goldblatt's style in his English translations of Mo Yan's novels* [Master's thesis, Beijing Foreign Studies University]. CNKI. https://kns.cnki.net/kcms2/article/abstract?v=PT3z46FIkGlLA1-e2pir7L8gvEGmJIlFDzWm_AIfZ8GVKLE--YrKDGTL-EojaJLlXTLXfDE8XLGNIsGyoAoU7tO24iMEDUx6619Rp4bUxI3jYOuXKQtTWMyss-7WpFyzwgvBns_kpzOGXuc3MNNExg==&uniplatform=NZKPT&language=CHS
+1. Kang, Z. (2023). *Investigating the translator's style: A corpus-based analysis of Howard Goldblatt's style in his English translations of Mo Yan's novels* [Master's thesis, Beijing Foreign Studies University]. CNKI. https://link.cnki.net/doi/10.27316/d.cnki.gswyu.2020.000479
+1. Li, S. (2023). *A corpus-based study on the international image of China: A case study of on building a human community with a shared future* [Master's thesis, Hebei University]. CNKI. https://link.cnki.net/doi/10.27103/d.cnki.ghebu.2023.000324
+1. Luo, H. (2023). *“Xueba” de huayu jiangou: Xuexizhe shenfen de piping xushu tanjiu* [Discursive construction of “xue ba”: A critical narrative inquiry on learner identity] [Master's thesis, Shanghai International Studies University]. CNKI. https://link.cnki.net/doi/10.27316/d.cnki.gswyu.2023.000538
 1. Lv, R. (2020). *A comparative study of acceptability of three English versions of Wenxin Diaolong* [Master’s thesis, Wuhan University of Technology]. Wanfang Data. https://d.wanfangdata.com.cn/thesis/ChJUaGVzaXNOZXdTMjAyMzA5MDESCUQwMjMyMDc1MhoIMjdsMzc4YTM%3D
-1. Shen, Y. (2020). *On the influence of translator's habitus on translation: A case study of the two English versions of Bian Cheng* [Master’s thesis, Shanghai International Studies University]. CNKI. https://kns.cnki.net/kcms2/article/abstract?v=PT3z46FIkGk7eK61V3ne_FsBIfvOoS8kW8plhk60QuBjWp-9ECuNMPhiM5SXrAHsbdBjns45qQSotahrIzXKkrlLpfZtWhYojDpeBtRx1yrO9gr1fTQFxmCRye2D71pRL-1rHeUC5aKPxNMDVfxYpQ==&uniplatform=NZKPT&language=CHS
-1. Yan, M. (2023). *A comparative study on the translation styles of three English versions of Na Han and Pang Huang* [Master's thesis, Beijing Foreign Studies University]. CNKI. https://kns.cnki.net/kcms2/article/abstract?v=PT3z46FIkGm-Rs1silP3PF7mbatdKd2tgG-QmgGZ61zyg5BPO3b4ZpCRBPoUQJFOody-wAPizCwhCxIB-2WWqc8fJi2BMfyvgC5DHJyJ8ehTJvnODZhXFfaBXObvjpcZIxXq6eD61kV0aNyFAzKxgQ==&uniplatform=NZKPT&language=CHS
-1. Zhao, J. (2023). *Yiwu jiaoyu jieduan yuwen jiaocai ciyu yanjiu* [A research on the vocabulary of Chinese textbooks in the compulsory education stage] [Master's thesis, Liaoning Normal University]. CNKI. https://kns.cnki.net/kcms2/article/abstract?v=PT3z46FIkGn00G83ItuRwxNe9K2MszFejKu1pKy-fk1i33EizT4LW8kNzvkKl9xt4Kjnk6N1OR7hZP8gNuNWMGDPdtAYyuI8SIqfOWcwdZNRQuEea1x1VYpPjm1kSXLIVrjS1gnIMLODthdCBiz4RA==&uniplatform=NZKPT&language=CHS
-1. Zhou, Y. (2023). *Rensheng e yiben xiangtu zhongguo wenhua xingxiang chonggou yanjiu* [Master's thesis, Heilongjiang University]. CNKI. https://kns.cnki.net/kcms2/article/abstract?v=ACks_bcdpKlVZB2NYTmNVizHGcB_J8zdXzpUwrFBD9mD-lII8VjtClmD8S_303LHkz_0RjJSd79ALt4PmYWJPslFYxPN0a9W3e2X6Gxunrnii3HefbbyOxoFbCD29whp2r0o9v1MsAx7gcye1XD75A==&uniplatform=NZKPT&language=CHS
+1. Ma, L. (2023). *Kōkō·daigaku ni okeru Nihongo kyōkasho no renketsu ni kansuru kenkyū: “Futsū Kōkō Kyōkasho Nigo” to “Shinseki Daigaku Nigo” o rei ni* [A study on the articulation between the high school and college Japanese textbooks: A case study of High School Japanese and New Century Japanese] [Master's thesis, Harbin Normal University]. CNKI. https://link.cnki.net/doi/10.27064/d.cnki.ghasu.2023.001925
+1. Shen, Y. (2020). *On the influence of translator's habitus on translation: A case study of the two English versions of Bian Cheng* [Master’s thesis, Shanghai International Studies University]. CNKI. https://link.cnki.net/doi/10.27316/d.cnki.gswyu.2020.000479
+1. Yan, M. (2023). *A comparative study on the translation styles of three English versions of Na Han and Pang Huang* [Master's thesis, Beijing Foreign Studies University]. CNKI. https://link.cnki.net/doi/10.26962/d.cnki.gbjwu.2023.000337
+1. Zhao, J. (2023). *Yiwu jiaoyu jieduan yuwen jiaocai ciyu yanjiu* [A research on the vocabulary of Chinese textbooks in the compulsory education stage] [Master's thesis, Liaoning Normal University]. CNKI. https://link.cnki.net/doi/10.27212/d.cnki.glnsu.2023.000956
+1. Zhou, Y. (2023). *“Rensheng” Eyiben xiangtu Zhongguo wenhua xingxiang chonggou yanjiu* [Master's thesis, Heilongjiang University]. CNKI. https://link.cnki.net/doi/10.13963/j.cnki.hhuxb.2023.06.004
 
 <div align="center"><h2>Doctoral Dissertations</h2></div>
 
 1. Dai, Z. (2023). *Dangdai huayu liuxing ge geci jiliang yanjiu (1978-2021)* [Unpublished doctoral dissertation]. Zhejiang University.
+
+<div align="center"><h2>Conference Papers</h2></div>
+
+1. Cheng, Y., & Xia, Y. (2023). Exploring the transcreation of Hong Lou Meng: A corpus-assisted comparative study. In V. Nimehchisalem & H. Habil (Eds.), *Proceedings of the Malaysian association of applied linguistics international conference (MAALIC)* (pp. 16–21). Malaysia Association of Applied Linguistics.
+1. Xu, B., & Wang, G. (2020). A corpus-based study of translation styles of three Chinese versions of Turgenev’s collection Poems in Prose. In L. Hale Cox, J. Zhang, Lindsay J., & Q. Fang (Eds.), *Proceedings of the ninth Northeast Asia international symposium on language, literature and translation* (pp. 133–139). American Scholars Press. https://link.cnki.net/doi/10.26914/c.cnkihy.2020.055679
+1. Xu, B., & Wang, G. (2020). A corpus-based study of translation styles of two Russian versions of Tao Te Ching. In R. Hou, R. Zhu, & Y. Zhang (Eds.), *Proceedings of the 2020 conference on education, language and inter-cultural communication (ELIC 2020)* (pp. 445–451). Atlantis Press. https://doi.org/10.2991/assehr.k.201127.089
+1. Wei, J., Chen, X., Xiao, H., Tang, S., Xie, X., & Li, Z. Natural language processing-based requirements modeling: A case study on problem frames. In *Proceedings of 2023 30th Asia-Pacific Software Engineering Conference (APSEC)* (pp. 191–200). IEEE. https://doi.org/10.1109/APSEC60848.2023.00029
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 6b7e6d274..ed835e9e9 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -63,7 +63,7 @@ jobs:
   - job: "macOS"
 
     pool:
-      vmImage: macOS-11
+      vmImage: macOS-12
 
     steps:
       - task: UsePythonVersion@0
diff --git a/doc/trs/zho_cn/README.md b/doc/trs/zho_cn/README.md
index ebe797130..a228c1807 100644
--- a/doc/trs/zho_cn/README.md
+++ b/doc/trs/zho_cn/README.md
@@ -65,11 +65,11 @@
 
 <br>
 
-Wordless 是一款拥有多语种支持的语料库集成工具，其可用于语言学、文学及翻译研究，由当时就读于上海外国语大学口译研究专业的硕士研究生叶磊设计并开发。
+Wordless 是一款拥有多语种支持的语料库集成工具，可用于语言学、文学及翻译研究，由当时就读于上海外国语大学口译研究专业的硕士研究生叶磊设计并开发。
 
 ## 下载
 
-Wordless 最新版（**3.4.0**）支持 **Windows 7/8/8.1/10/11**、**macOS 10.11 或更高版本**及 **Ubuntu 18.04 或更高版本**，**均仅支持**64位操作系统**。英特尔和 Apple 芯片的苹果电脑均有支持。
+Wordless 最新版（**3.4.0**）支持 **Windows 7/8/8.1/10/11**、**macOS 10.11 或更高版本**及 **Ubuntu 18.04 或更高版本**，均仅支持**64位操作系统**。英特尔和 Apple 芯片的苹果电脑均有支持。
 
 如需完整的更新日志，请参阅 [CHANGELOG.md](/CHANGELOG.md)（待翻译）。
 
@@ -82,21 +82,21 @@ Wordless 最新版（**3.4.0**）支持 **Windows 7/8/8.1/10/11**、**macOS 10.1
 [百度网盘](https://pan.baidu.com/s/1--ZzABrDQBZlZagWlVQMbg?pwd=wdls#list/path=%2FWordless%2FWordless%203.4.0)|中国用户若 Github 连接不稳定可尝试该下载链接（**提取码：wdls**）
 
 > [!IMPORTANT]
-> **备注 1**：Wordless 的路径中**不建议包含任何非 ASCII 字符，如汉字和带变音记号的字母等**。
+> **备注 1：**Wordless 的路径中**不建议包含任何非 ASCII 字符，如汉字和带变音记号的字母等**。
 > 
-> **备注 2**：苹果电脑用户如遇提示 **“Wordless”已损坏，无法打开**，请打开**终端**（启动台 → 其他）后运行：
+> **备注 2：**苹果电脑用户如遇提示 **“Wordless”已损坏，无法打开**，请打开**终端**（启动台 → 其他）后运行：
 > 
 > <code>xattr -rc /Applications/Wordless.app</code><br>
 > 
 > 请注意将 **/Applications/Wordless.app** 替换为 Wordless 在电脑上的实际存放路径（可直接拖拽 **Wordless.app** 文件至**终端**中）。然后，**再次运行 Wordless**（若程序可成功运行，终端中提示的警告可忽略）。
 > 
-> **备注 3:** 在 *Wordless* 中打开除英语外语种的语料库时，可能需要从网上下载额外的模型文件。如果出现**网络错误**对话框，那么大概率是字面意义上的网络错误，所以只需要按照错误信息中的指示检查网络连接，然后再次尝试下载模型即可。
+> **备注 3：**在 *Wordless* 中打开除英语外语种的语料库时，可能需要从网上下载额外的模型文件。如果出现**网络错误**对话框，那么大概率是字面意义上的网络错误，所以只需要按照错误信息中的指示检查网络连接，然后再次尝试下载模型即可。
 > 
-> 中国国内和 Github 以及 Hugging Face Hub 的连接不稳定，因此建议用户使用代理并在**菜单 - 偏好 - 设置 - 全局 - 代理设置**中进行设置。或者也可以手动从[百度网盘](https://pan.baidu.com/s/1--ZzABrDQBZlZagWlVQMbg?pwd=wdls#list/path=%2FWordless%2Fmodels&parentPath=%2F)下载模型文件。模型的安装步骤如下：
+> 中国国内和 Github 以及 Hugging Face Hub 的连接不稳定，因此建议用户使用代理并在**菜单栏 → 偏好 → 设置 → 全局 → 代理设置**处进行设置。或者也可以手动从[百度网盘](https://pan.baidu.com/s/1--ZzABrDQBZlZagWlVQMbg?pwd=wdls#list/path=%2FWordless%2Fmodels&parentPath=%2F)下载模型文件。模型的安装步骤如下：
 > 
 > 1. 查看**网络错误**对话框中的错误信息。如果错误信息中能找到 **stanza** 的字样，那么你需要的是 *Stanza* 模型，否则你需要的是 *spaCy* 模型。
 > 2. 从上方链接中下载你的语料库语种所对应的模型文件然后**将所有文件解压出来**。
-> 3. Windows 以及 Linux 用户需将 *spaCy* 模型置于 **Wordless/libs** 目录下，将 *Stanza* 模型置于 **Wordless/libs/stanza_resources** 目录下。 苹果系统用户需右键单击 **Wordless.app** 图标，选择 **显示套件内容（Show Package Contents）**，然后将 *spaCy* 模型置于 **Contents/Frameworks** 目录下，将 *Stanza* 模型置于 **Contents/Frameworks/stanza_resources**目录下。
+> 3. Windows 以及 Linux 用户需将 *spaCy* 模型置于 **Wordless/libs** 目录下，将 *Stanza* 模型置于 **Wordless/libs/stanza_resources** 目录下。 苹果系统用户需右键单击 **Wordless.app** 图标，选择**显示套件内容**，然后将 *spaCy* 模型置于 **Contents/Frameworks** 目录下，将 *Stanza* 模型置于 **Contents/Frameworks/stanza_resources** 目录下。
 > 4. 如果你的语料库有多个语种或者 *spaCy* 和 *Stanza* 模型都需要，那么重复步骤 1 ~ 3 直至**网络错误**的对话框不再出现为止。
 > 5. 再次尝试在 *Wordless* 中打开你的语料库，此时应该会跳过模型下载过程。
 > 
diff --git a/doc/trs/zho_cn/WORKS_USING_WORDLESS.md b/doc/trs/zho_cn/WORKS_USING_WORDLESS.md
index 2c028e5f3..56f6eef69 100644
--- a/doc/trs/zho_cn/WORKS_USING_WORDLESS.md
+++ b/doc/trs/zho_cn/WORKS_USING_WORDLESS.md
@@ -21,32 +21,49 @@
 以下列表并非详尽无遗，仅供参考。如果你知道其他使用了 Wordless 的作品没有在下方列出，欢迎提供详细的文献信息。<br>
 
 <div align="center">
-    <a href="https://github.com/BLKSerene/Wordless/blob/main/doc/trs/zho_cn/WORKS_USING_WORDLESS.md#期刊文章">期刊文章</a> | <a href="https://github.com/BLKSerene/Wordless/blob/main/doc/trs/zho_cn/WORKS_USING_WORDLESS.md#硕士论文">硕士论文</a> | <a href="https://github.com/BLKSerene/Wordless/blob/main/doc/trs/zho_cn/WORKS_USING_WORDLESS.md#博士论文">博士论文</a>
+    <a href="https://github.com/BLKSerene/Wordless/blob/main/doc/trs/zho_cn/WORKS_USING_WORDLESS.md#期刊文章">期刊文章</a> | <a href="https://github.com/BLKSerene/Wordless/blob/main/doc/trs/zho_cn/WORKS_USING_WORDLESS.md#硕士论文">硕士论文</a> | <a href="https://github.com/BLKSerene/Wordless/blob/main/doc/trs/zho_cn/WORKS_USING_WORDLESS.md#博士论文">博士论文</a> | <a href="https://github.com/BLKSerene/Wordless/blob/main/doc/trs/zho_cn/WORKS_USING_WORDLESS.md#会议论文">会议论文</a>
 </div>
 
 <div align="center"><h2>期刊文章</h2></div>
 
-1. 董贺. (2019). 基于语料库的《习近平谈治国理政》英译本中高频词的应用特征分析. *长江丛刊*, *2019*(10), 104–105.
-1. 高岭, & 王珺. (2023). 基于语料库的译本风格研究: 以堀辰雄《起风了》的心理描写翻译为中心. *东方文化周刊*, *2023*(34), 1–3.
+1. Chang, Y. (2020). A corpus-based comparative study of translation universals in two English translations of Li Sao. *Journal of Literature and Art Studies*, *10*(10), 916–922. https://doi.org/*10.17265/2159-5836/2020.10.007
 1. 李国强, 王辉, & 王精诚. (2023). 基于语料库的《红星照耀中国》译者风格研究: 语言与非语言表征考察. *文化学刊*, *2023*(8), 179–183.
 1. 吕荣, & 马文丽. (2020). 《文心雕龙》三个英译本可接受度对比研究. *湖北第二师范学院学报*, *37*(3), 17–22.
 1. 邵克金. (2022). 义务教育统编语文教科书收词分析: 基于与《义务教育常用词表（草案）》的比较. *现代语文*, *2022*(12), 67–75.
 1. 石红燕. (2022). 语料库语言学视角下的《蜀道难》英译研究. *现代语言学*, *10*(6), 1347–1355. https://doi.org/10.12677/ml.2022.106180
 1. 宋祝, 李玲, & 梁渊. (2021). “构式驱动”读写教学模式下的构式习得研究. *成都师范学院学报*, *37*(3), 51–58.
+1. Sun, Y., Kong, D., & Zhou, C. (2023). Economy or ecology: Metaphor use over time in China’s government work reports. *Language and Cognition*, *15*(3), 551–573. https://doi.org/10.1017/langcog.2023.18
 1. 陶源. (2021). 基于俄媒语料库的新冠肺炎主题词与疫情变迁研究. *南昌航空大学学报：社会科学*, *23*(1), 114–124.
+1. 汪蓝玉, & 朱玉彬. (2024). 数字人文视域下二十四史论赞引语的言据性研究. *数字人文*, *2024*(1).
+1. Wang, S. (2023). Cómo fomentar la incorporación de agendas políticas feministas en el discurso político: Estrategias para la construcción del espacio discursivo en respuesta a las crisis de género. *Comunicación y Género*, *6*(2), 147–157. https://doi.org/10.5209/cgen.91079
+1. Xiong, L. (2023). Audio description for educational videos on COVID-19 response: A corpus-based study on linguistic and textual idiosyncrasies. *Journal of Literature and Art Studies*, *13*(4), 276–285. https://doi.org/10.17265/2159-5836/2023.04.008
+1. Xu, B., & Tao, Y. (2023). National identity in media discourses from Russia and Ukraine: Amid the 2022 Russo-Ukranian War. *Zeitschrift für Slawistik*, *68*(3), 419–439. https://doi.org/10.1515/slaw-2023-0021
+1. Yang, Y., & Yang, K. (2022). Оценка в китайском и русском официальных политических дискурсах в эпоху пандемии COVID-19 (на материале выступлений глав КНР и РФ на Всемирном экономическом форуме 2021 г.). *Политическая Лингвистика*, *2022*(1), 135–142. https://doi.org/10.26170/1999-2629_2022_01_15
+1. Yi, W., & DeKeyser, R. (2022). Incidental learning of semantically transparent and opaque Chinese compounds from reading: An eye-tracking approach. *System*, *2022*(107). https://doi.org/10.1016/j.system.2022.102825
 1. Yih, T., & Liu, H. (2023). The meaning distributions on different levels of granularity. *Glottometrics*, *54*, 13–38. https://doi.org/10.53482/2023_54_405
 1. 张武江. (2022). 基于语料库的《密码法》英译研究. *北京电子科技学院学报*, *30*(3), 152–160.
 1. 赵宇霞. (2022). 基于语料库的傅雷翻译风格新探: 语言与情感的融合. *外语电化教学*, *2022*(2), 96–103.
+1. Zhou, Y., Jiang, J., & Liu, H. (2024). Modifying language for a higher goal: Investigating quantitative features of Apple’s launch event speech from 2016 to 2022. *Journal of Quantitative Linguistics*, *31*(2), 139–160. https://doi.org/10.1080/09296174.2024.2345969
 
 <div align="center"><h2>硕士论文</h2></div>
 
-1. 康政. (2023). *Investigating the translator's style: A corpus-based analysis of Howard Goldblatt's style in his English translations of Mo Yan's novels* [Master's thesis, 北京外国语大学]. CNKI. https://kns.cnki.net/kcms2/article/abstract?v=PT3z46FIkGlLA1-e2pir7L8gvEGmJIlFDzWm_AIfZ8GVKLE--YrKDGTL-EojaJLlXTLXfDE8XLGNIsGyoAoU7tO24iMEDUx6619Rp4bUxI3jYOuXKQtTWMyss-7WpFyzwgvBns_kpzOGXuc3MNNExg==&uniplatform=NZKPT&language=CHS
+1. 康政. (2023). *Investigating the translator's style: A corpus-based analysis of Howard Goldblatt's style in his English translations of Mo Yan's novels* [Master's thesis, 北京外国语大学]. CNKI. https://link.cnki.net/doi/10.27316/d.cnki.gswyu.2020.000479
+1. 李硕. (2023). *A corpus-based study on the international image of China: A case study of on building a human community with a shared future* [Master's thesis, 河北大学]. CNKI. https://link.cnki.net/doi/10.27103/d.cnki.ghebu.2023.000324
+1. 罗豪伟. (2023). *“学霸”的话语建构: 学习者身份的批评叙事探究* [Master's thesis, 上海外国语大学]. CNKI. https://link.cnki.net/doi/10.27316/d.cnki.gswyu.2023.000538
 1. 吕荣. (2020). *A comparative study of acceptability of three English versions of Wenxin Diaolong* [Master's thesis, 武汉理工大学]. 万方. https://d.wanfangdata.com.cn/thesis/ChJUaGVzaXNOZXdTMjAyMzA5MDESCUQwMjMyMDc1MhoIMjdsMzc4YTM%3D
-1. 沈雅楠. (2020). *On the influence of translator's habitus on translation: A case study of the two English versions of Bian Cheng* [Master's thesis, 上海外国语大学]. CNKI. https://kns.cnki.net/kcms2/article/abstract?v=PT3z46FIkGk7eK61V3ne_FsBIfvOoS8kW8plhk60QuBjWp-9ECuNMPhiM5SXrAHsbdBjns45qQSotahrIzXKkrlLpfZtWhYojDpeBtRx1yrO9gr1fTQFxmCRye2D71pRL-1rHeUC5aKPxNMDVfxYpQ==&uniplatform=NZKPT&language=CHS
-1. 严明. (2023). *A comparative study on the translation styles of three English versions of Na Han and Pang Huang* [Master's thesis, 北京外国语大学]. CNKI. https://kns.cnki.net/kcms2/article/abstract?v=PT3z46FIkGm-Rs1silP3PF7mbatdKd2tgG-QmgGZ61zyg5BPO3b4ZpCRBPoUQJFOody-wAPizCwhCxIB-2WWqc8fJi2BMfyvgC5DHJyJ8ehTJvnODZhXFfaBXObvjpcZIxXq6eD61kV0aNyFAzKxgQ==&uniplatform=NZKPT&language=CHS
-1. 赵瑾乾. (2023). *义务教育阶段语文教材词语研究* [Master's thesis, 辽宁师范大学]. CNKI. https://kns.cnki.net/kcms2/article/abstract?v=PT3z46FIkGn00G83ItuRwxNe9K2MszFejKu1pKy-fk1i33EizT4LW8kNzvkKl9xt4Kjnk6N1OR7hZP8gNuNWMGDPdtAYyuI8SIqfOWcwdZNRQuEea1x1VYpPjm1kSXLIVrjS1gnIMLODthdCBiz4RA==&uniplatform=NZKPT&language=CHS
-1. 周一钦. (2023). *《人生》俄译本乡土中国文化形象重构研究* [Master's thesis, 黑龙江大学]. CNKI. https://kns.cnki.net/kcms2/article/abstract?v=ACks_bcdpKlVZB2NYTmNVizHGcB_J8zdXzpUwrFBD9mD-lII8VjtClmD8S_303LHkz_0RjJSd79ALt4PmYWJPslFYxPN0a9W3e2X6Gxunrnii3HefbbyOxoFbCD29whp2r0o9v1MsAx7gcye1XD75A==&uniplatform=NZKPT&language=CHS
+1. 马琳. (2023). *高校·大学における日本語教科書の連結に関する研究: 『普通高校教科書 日語』と『新世紀大学日語』を例に* [Master's thesis, 哈尔滨师范大学]. CNKI. https://link.cnki.net/doi/10.27064/d.cnki.ghasu.2023.001925
+1. 沈雅楠. (2020). *On the influence of translator's habitus on translation: A case study of the two English versions of Bian Cheng* [Master's thesis, 上海外国语大学]. CNKI. https://link.cnki.net/doi/10.27316/d.cnki.gswyu.2020.000479
+1. 严明. (2023). *A comparative study on the translation styles of three English versions of Na Han and Pang Huang* [Master's thesis, 北京外国语大学]. CNKI. https://link.cnki.net/doi/10.26962/d.cnki.gbjwu.2023.000337
+1. 赵瑾乾. (2023). *义务教育阶段语文教材词语研究* [Master's thesis, 辽宁师范大学]. CNKI. https://link.cnki.net/doi/10.27212/d.cnki.glnsu.2023.000956
+1. 周一钦. (2023). *《人生》俄译本乡土中国文化形象重构研究* [Master's thesis, 黑龙江大学]. CNKI. https://link.cnki.net/doi/10.13963/j.cnki.hhuxb.2023.06.004
 
 <div align="center"><h2>博士论文</h2></div>
 
 1. 戴哲远. (2023). *当代华语流行歌词词汇计量研究（1978-2021）* [Unpublished doctoral dissertation]. 浙江大学.
+
+<div align="center"><h2>会议论文</h2></div>
+
+1. Cheng, Y., & Xia, Y. (2023). Exploring the transcreation of Hong Lou Meng: A corpus-assisted comparative study. In V. Nimehchisalem & H. Habil (Eds.), *Proceedings of the Malaysian association of applied linguistics international conference (MAALIC)* (pp. 16–21). Malaysia Association of Applied Linguistics.
+1. Xu, B., & Wang, G. (2020). A corpus-based study of translation styles of three Chinese versions of Turgenev’s collection Poems in Prose. In L. Hale Cox, J. Zhang, Lindsay J., & Q. Fang (Eds.), *Proceedings of the ninth Northeast Asia international symposium on language, literature and translation* (pp. 133–139). American Scholars Press. https://link.cnki.net/doi/10.26914/c.cnkihy.2020.055679
+1. Xu, B., & Wang, G. (2020). A corpus-based study of translation styles of two Russian versions of Tao Te Ching. In R. Hou, R. Zhu, & Y. Zhang (Eds.), *Proceedings of the 2020 conference on education, language and inter-cultural communication (ELIC 2020)* (pp. 445–451). Atlantis Press. https://doi.org/10.2991/assehr.k.201127.089
+1. Wei, J., Chen, X., Xiao, H., Tang, S., Xie, X., & Li, Z. Natural language processing-based requirements modeling: A case study on problem frames. In *Proceedings of 2023 30th Asia-Pacific Software Engineering Conference (APSEC)* (pp. 191–200). IEEE. https://doi.org/10.1109/APSEC60848.2023.00029
diff --git a/doc/trs/zho_tw/README.md b/doc/trs/zho_tw/README.md
index 4978a9a5d..43c12bcd4 100644
--- a/doc/trs/zho_tw/README.md
+++ b/doc/trs/zho_tw/README.md
@@ -65,7 +65,7 @@
 
 <br>
 
-Wordless 是一款擁有多語種支援的語料庫整合工具，其可用於語言學、文學及翻譯研究，由當時就讀於上海外國語大學口譯研究專業的碩士研究生葉磊設計並開發。
+Wordless 是一款擁有多語種支援的語料庫整合工具，可用於語言學、文學及翻譯研究，由當時就讀於上海外國語大學口譯研究專業的碩士研究生葉磊設計並開發。
 
 ## 下載
 
@@ -82,20 +82,20 @@ Wordless 最新版（**3.4.0**）支援 **Windows 7/8/8.1/10/11**、**macOS 10.1
 [百度網盤](https://pan.baidu.com/s/1--ZzABrDQBZlZagWlVQMbg?pwd=wdls#list/path=%2FWordless%2FWordless%203.4.0)|中國大陸使用者若 Github 連線不穩定可嘗試該下載連結（**提取碼：wdls**）
 
 > [!IMPORTANT]
-> **備註 1**：Wordless 的路徑中**不建議包含任何非 ASCII 字元，如漢字和帶變音記號的字母等**。
+> **備註 1：**Wordless 的路徑中**不建議包含任何非 ASCII 字元，如漢字和帶變音記號的字母等**。
 > 
-> **備註 2**：macOS 系統使用者如遇提示 **“Wordless”已損壞，無法開啟**，請開啟**終端**（啟動臺 → 其他）後執行：
+> **備註 2：**macOS 系統使用者如遇提示 **“Wordless”已損壞，無法開啟**，請開啟**終端**（啟動臺 → 其他）後執行：
 > 
 > <code>xattr -rc /Applications/Wordless.app</code><br>
 > 請注意將 **/Applications/Wordless.app** 替換為 Wordless 在電腦上的實際存放路徑（可直接拖拽 **Wordless.app** 檔案至**終端**中）。然後，**再次執行 Wordless**（若程式可成功執行，終端中提示的警告可忽略）。
 > 
-> **備註 3:** 在 *Wordless* 中開啟除英語外語種的語料庫時，可能需要從網上下載額外的模型檔案。如果出現**網路錯誤**對話方塊，那麼大機率是字面意義上的網路錯誤，所以只需要按照錯誤資訊中的指示檢查網路連線，然後再次嘗試下載模型即可。
+> **備註 3：**在 *Wordless* 中開啟除英語外語種的語料庫時，可能需要從網上下載額外的模型檔案。如果出現**網路錯誤**對話方塊，那麼大機率是字面意義上的網路錯誤，所以只需要按照錯誤資訊中的指示檢查網路連線，然後再次嘗試下載模型即可。
 > 
-> 中國大陸和 Github 以及 Hugging Face Hub 的連線不穩定，因此建議使用者建議使用代理並在**選單 - 偏好 - 設定 - 全域性 - 代理設定**中進行設定。或者也可以手動從[百度網盤](https://pan.baidu.com/s/1--ZzABrDQBZlZagWlVQMbg?pwd=wdls#list/path=%2FWordless%2Fmodels&parentPath=%2F)下載模型檔案。模型的安裝步驟如下：
+> 中國大陸和 Github 以及 Hugging Face Hub 的連線不穩定，因此建議使用者建議使用代理並在**選單欄 → 偏好 → 設定 → 全域性 → 代理設定**処進行設定。或者也可以手動從[百度網盤](https://pan.baidu.com/s/1--ZzABrDQBZlZagWlVQMbg?pwd=wdls#list/path=%2FWordless%2Fmodels&parentPath=%2F)下載模型檔案。模型的安裝步驟如下：
 > 
 > 1. 檢視**網路錯誤**對話方塊中的錯誤資訊。如果錯誤資訊中能找到 **stanza** 的字樣，那麼你需要的是 *Stanza* 模型，否則你需要的是 *spaCy* 模型。
 > 2. 從上方連結中下載你的語料庫語種所對應的模型檔案然後**將所有檔案解壓出來**。
-> 3. Windows 以及 Linux 使用者需將 *spaCy* 模型置於 **Wordless/libs** 目錄下，將 *Stanza* 模型置於 **Wordless/libs/stanza_resources** 目錄下。 蘋果系統使用者需右鍵單擊 **Wordless.app** 圖示，選擇 **顯示套件內容（Show Package Contents）**，然後將 *spaCy* 模型置於 **Contents/Frameworks** 目錄下，將 *Stanza* 模型置於 **Contents/Frameworks/stanza_resources**目錄下。
+> 3. Windows 以及 Linux 使用者需將 *spaCy* 模型置於 **Wordless/libs** 目錄下，將 *Stanza* 模型置於 **Wordless/libs/stanza_resources** 目錄下。 蘋果系統使用者需右鍵單擊 **Wordless.app** 圖示，選擇**顯示套件內容**，然後將 *spaCy* 模型置於 **Contents/Frameworks** 目錄下，將 *Stanza* 模型置於 **Contents/Frameworks/stanza_resources** 目錄下。
 > 4. 如果你的語料庫有多個語種或者 *spaCy* 和 *Stanza* 模型都需要，那麼重複步驟 1 ~ 3 直至**網路錯誤**的對話方塊不再出現為止。
 > 5. 再次嘗試在 *Wordless* 中開啟你的語料庫，此時應該會跳過模型下載過程。
 > 
diff --git a/doc/trs/zho_tw/WORKS_USING_WORDLESS.md b/doc/trs/zho_tw/WORKS_USING_WORDLESS.md
index 03fb297d7..fa8d4f990 100644
--- a/doc/trs/zho_tw/WORKS_USING_WORDLESS.md
+++ b/doc/trs/zho_tw/WORKS_USING_WORDLESS.md
@@ -21,32 +21,49 @@
 以下列表並非詳盡無遺，僅供參考。如果你知道其他使用了 Wordless 的作品沒有在下方列出，歡迎提供詳細的文獻信息。<br>
 
 <div align="center">
-    <a href="https://github.com/BLKSerene/Wordless/blob/main/doc/trs/zho_cn/WORKS_USING_WORDLESS.md#期刊文章">期刊文章</a> | <a href="https://github.com/BLKSerene/Wordless/blob/main/doc/trs/zho_cn/WORKS_USING_WORDLESS.md#碩士論文">碩士論文</a> | <a href="https://github.com/BLKSerene/Wordless/blob/main/doc/trs/zho_cn/WORKS_USING_WORDLESS.md#博士論文">博士論文</a>
+    <a href="https://github.com/BLKSerene/Wordless/blob/main/doc/trs/zho_tw/WORKS_USING_WORDLESS.md#期刊文章">期刊文章</a> | <a href="https://github.com/BLKSerene/Wordless/blob/main/doc/trs/zho_tw/WORKS_USING_WORDLESS.md#碩士論文">碩士論文</a> | <a href="https://github.com/BLKSerene/Wordless/blob/main/doc/trs/zho_tw/WORKS_USING_WORDLESS.md#博士論文">博士論文</a> | <a href="https://github.com/BLKSerene/Wordless/blob/main/doc/trs/zho_tw/WORKS_USING_WORDLESS.md#會議論文">會議論文</a>
 </div>
 
 <div align="center"><h2>期刊文章</h2></div>
 
-1. 董賀. (2019). 基於語料庫的《習近平談治國理政》英譯本中高頻詞的應用特徵分析. *長江叢刊*, *2019*(10), 104–105.
-1. 高嶺, & 王珺. (2023). 基於語料庫的譯本風格研究: 以堀辰雄《起風了》的心理描寫翻譯爲中心. *東方文化週刊*, *2023*(34), 1–3.
+1. Chang, Y. (2020). A corpus-based comparative study of translation universals in two English translations of Li Sao. *Journal of Literature and Art Studies*, *10*(10), 916–922. https://doi.org/*10.17265/2159-5836/2020.10.007
 1. 李國強, 王輝, & 王精誠. (2023). 基於語料庫的《紅星照耀中國》譯者風格研究: 語言與非語言表徵考察. *文化學刊*, *2023*(8), 179–183.
 1. 呂榮, & 馬文麗. (2020). 《文心雕龍》三個英譯本可接受度對比研究. *湖北第二師範學院學報*, *37*(3), 17–22.
 1. 邵克金. (2022). 義務教育統編語文教科書收詞分析: 基於與《義務教育常用詞表（草案）》的比較. *現代語文*, *2022*(12), 67–75.
 1. 石紅燕. (2022). 語料庫語言學視角下的《蜀道難》英譯研究. *現代語言學*, *10*(6), 1347–1355. https://doi.org/10.12677/ml.2022.106180
 1. 宋祝, 李玲, & 梁淵. (2021). “構式驅動”讀寫教學模式下的構式習得研究. *成都師範學院學報*, *37*(3), 51–58.
+1. Sun, Y., Kong, D., & Zhou, C. (2023). Economy or ecology: Metaphor use over time in China’s government work reports. *Language and Cognition*, *15*(3), 551–573. https://doi.org/10.1017/langcog.2023.18
 1. 陶源. (2021). 基於俄媒語料庫的新冠肺炎主題詞與疫情變遷研究. *南昌航空大學學報：社會科學*, *23*(1), 114–124.
+1. 汪藍玉, & 朱玉彬. (2024). 數字人文視域下二十四史論贊引語的言據性研究. *數字人文*, *2024*(1).
+1. Wang, S. (2023). Cómo fomentar la incorporación de agendas políticas feministas en el discurso político: Estrategias para la construcción del espacio discursivo en respuesta a las crisis de género. *Comunicación y Género*, *6*(2), 147–157. https://doi.org/10.5209/cgen.91079
+1. Xiong, L. (2023). Audio description for educational videos on COVID-19 response: A corpus-based study on linguistic and textual idiosyncrasies. *Journal of Literature and Art Studies*, *13*(4), 276–285. https://doi.org/10.17265/2159-5836/2023.04.008
+1. Xu, B., & Tao, Y. (2023). National identity in media discourses from Russia and Ukraine: Amid the 2022 Russo-Ukranian War. *Zeitschrift für Slawistik*, *68*(3), 419–439. https://doi.org/10.1515/slaw-2023-0021
+1. Yang, Y., & Yang, K. (2022). Оценка в китайском и русском официальных политических дискурсах в эпоху пандемии COVID-19 (на материале выступлений глав КНР и РФ на Всемирном экономическом форуме 2021 г.). *Политическая Лингвистика*, *2022*(1), 135–142. https://doi.org/10.26170/1999-2629_2022_01_15
+1. Yi, W., & DeKeyser, R. (2022). Incidental learning of semantically transparent and opaque Chinese compounds from reading: An eye-tracking approach. *System*, *2022*(107). https://doi.org/10.1016/j.system.2022.102825
 1. Yih, T., & Liu, H. (2023). The meaning distributions on different levels of granularity. *Glottometrics*, *54*, 13–38. https://doi.org/10.53482/2023_54_405
 1. 張武江. (2022). 基於語料庫的《密碼法》英譯研究. *北京電子科技學院學報*, *30*(3), 152–160.
 1. 趙宇霞. (2022). 基於語料庫的傅雷翻譯風格新探: 語言與情感的融合. *外語電化教學*, *2022*(2), 96–103.
+1. Zhou, Y., Jiang, J., & Liu, H. (2024). Modifying language for a higher goal: Investigating quantitative features of Apple’s launch event speech from 2016 to 2022. *Journal of Quantitative Linguistics*, *31*(2), 139–160. https://doi.org/10.1080/09296174.2024.2345969
 
 <div align="center"><h2>碩士論文</h2></div>
 
-1. 康政. (2023). *Investigating the translator\'s style: A corpus-based analysis of Howard Goldblatt\'s style in his English translations of Mo Yan\'s novels* [Master\'s thesis, 北京外國語大學]. CNKI. https://kns.cnki.net/kcms2/article/abstract?v=PT3z46FIkGlLA1-e2pir7L8gvEGmJIlFDzWm_AIfZ8GVKLE--YrKDGTL-EojaJLlXTLXfDE8XLGNIsGyoAoU7tO24iMEDUx6619Rp4bUxI3jYOuXKQtTWMyss-7WpFyzwgvBns_kpzOGXuc3MNNExg==&uniplatform=NZKPT&language=CHS
+1. 康政. (2023). *Investigating the translator\'s style: A corpus-based analysis of Howard Goldblatt\'s style in his English translations of Mo Yan\'s novels* [Master\'s thesis, 北京外國語大學]. CNKI. https://link.cnki.net/doi/10.27316/d.cnki.gswyu.2020.000479
+1. 李碩. (2023). *A corpus-based study on the international image of China: A case study of on building a human community with a shared future* [Master\'s thesis, 河北大學]. CNKI. https://link.cnki.net/doi/10.27103/d.cnki.ghebu.2023.000324
+1. 羅豪偉. (2023). *“學霸”的話語建構: 學習者身份的批評敘事探究* [Master\'s thesis, 上海外國語大學]. CNKI. https://link.cnki.net/doi/10.27316/d.cnki.gswyu.2023.000538
 1. 呂榮. (2020). *A comparative study of acceptability of three English versions of Wenxin Diaolong* [Master\'s thesis, 武漢理工大學]. 萬方. https://d.wanfangdata.com.cn/thesis/ChJUaGVzaXNOZXdTMjAyMzA5MDESCUQwMjMyMDc1MhoIMjdsMzc4YTM%3D
-1. 沈雅楠. (2020). *On the influence of translator\'s habitus on translation: A case study of the two English versions of Bian Cheng* [Master\'s thesis, 上海外國語大學]. CNKI. https://kns.cnki.net/kcms2/article/abstract?v=PT3z46FIkGk7eK61V3ne_FsBIfvOoS8kW8plhk60QuBjWp-9ECuNMPhiM5SXrAHsbdBjns45qQSotahrIzXKkrlLpfZtWhYojDpeBtRx1yrO9gr1fTQFxmCRye2D71pRL-1rHeUC5aKPxNMDVfxYpQ==&uniplatform=NZKPT&language=CHS
-1. 嚴明. (2023). *A comparative study on the translation styles of three English versions of Na Han and Pang Huang* [Master\'s thesis, 北京外國語大學]. CNKI. https://kns.cnki.net/kcms2/article/abstract?v=PT3z46FIkGm-Rs1silP3PF7mbatdKd2tgG-QmgGZ61zyg5BPO3b4ZpCRBPoUQJFOody-wAPizCwhCxIB-2WWqc8fJi2BMfyvgC5DHJyJ8ehTJvnODZhXFfaBXObvjpcZIxXq6eD61kV0aNyFAzKxgQ==&uniplatform=NZKPT&language=CHS
-1. 趙瑾乾. (2023). *義務教育階段語文教材詞語研究* [Master\'s thesis, 遼寧師範大學]. CNKI. https://kns.cnki.net/kcms2/article/abstract?v=PT3z46FIkGn00G83ItuRwxNe9K2MszFejKu1pKy-fk1i33EizT4LW8kNzvkKl9xt4Kjnk6N1OR7hZP8gNuNWMGDPdtAYyuI8SIqfOWcwdZNRQuEea1x1VYpPjm1kSXLIVrjS1gnIMLODthdCBiz4RA==&uniplatform=NZKPT&language=CHS
-1. 週一欽. (2023). *《人生》俄譯本鄉土中國文化形象重構研究* [Master\'s thesis, 黑龍江大學]. CNKI. https://kns.cnki.net/kcms2/article/abstract?v=ACks_bcdpKlVZB2NYTmNVizHGcB_J8zdXzpUwrFBD9mD-lII8VjtClmD8S_303LHkz_0RjJSd79ALt4PmYWJPslFYxPN0a9W3e2X6Gxunrnii3HefbbyOxoFbCD29whp2r0o9v1MsAx7gcye1XD75A==&uniplatform=NZKPT&language=CHS
+1. 馬琳. (2023). *高校·大學における日本語教科書の連結に関する研究: 『普通高校教科書 日語』と『新世紀大學日語』を例に* [Master\'s thesis, 哈爾濱師範大學]. CNKI. https://link.cnki.net/doi/10.27064/d.cnki.ghasu.2023.001925
+1. 沈雅楠. (2020). *On the influence of translator\'s habitus on translation: A case study of the two English versions of Bian Cheng* [Master\'s thesis, 上海外國語大學]. CNKI. https://link.cnki.net/doi/10.27316/d.cnki.gswyu.2020.000479
+1. 嚴明. (2023). *A comparative study on the translation styles of three English versions of Na Han and Pang Huang* [Master\'s thesis, 北京外國語大學]. CNKI. https://link.cnki.net/doi/10.26962/d.cnki.gbjwu.2023.000337
+1. 趙瑾乾. (2023). *義務教育階段語文教材詞語研究* [Master\'s thesis, 遼寧師範大學]. CNKI. https://link.cnki.net/doi/10.27212/d.cnki.glnsu.2023.000956
+1. 週一欽. (2023). *《人生》俄譯本鄉土中國文化形象重構研究* [Master\'s thesis, 黑龍江大學]. CNKI. https://link.cnki.net/doi/10.13963/j.cnki.hhuxb.2023.06.004
 
 <div align="center"><h2>博士論文</h2></div>
 
 1. 戴哲遠. (2023). *當代華語流行歌詞詞彙計量研究（1978-2021）* [Unpublished doctoral dissertation]. 浙江大學.
+
+<div align="center"><h2>會議論文</h2></div>
+
+1. Cheng, Y., & Xia, Y. (2023). Exploring the transcreation of Hong Lou Meng: A corpus-assisted comparative study. In V. Nimehchisalem & H. Habil (Eds.), *Proceedings of the Malaysian association of applied linguistics international conference (MAALIC)* (pp. 16–21). Malaysia Association of Applied Linguistics.
+1. Xu, B., & Wang, G. (2020). A corpus-based study of translation styles of three Chinese versions of Turgenev’s collection Poems in Prose. In L. Hale Cox, J. Zhang, Lindsay J., & Q. Fang (Eds.), *Proceedings of the ninth Northeast Asia international symposium on language, literature and translation* (pp. 133–139). American Scholars Press. https://link.cnki.net/doi/10.26914/c.cnkihy.2020.055679
+1. Xu, B., & Wang, G. (2020). A corpus-based study of translation styles of two Russian versions of Tao Te Ching. In R. Hou, R. Zhu, & Y. Zhang (Eds.), *Proceedings of the 2020 conference on education, language and inter-cultural communication (ELIC 2020)* (pp. 445–451). Atlantis Press. https://doi.org/10.2991/assehr.k.201127.089
+1. Wei, J., Chen, X., Xiao, H., Tang, S., Xie, X., & Li, Z. Natural language processing-based requirements modeling: A case study on problem frames. In *Proceedings of 2023 30th Asia-Pacific Software Engineering Conference (APSEC)* (pp. 191–200). IEEE. https://doi.org/10.1109/APSEC60848.2023.00029
diff --git a/requirements/requirements_tests.txt b/requirements/requirements_tests.txt
index 726537a56..33f921ca5 100644
--- a/requirements/requirements_tests.txt
+++ b/requirements/requirements_tests.txt
@@ -29,8 +29,6 @@ pythainlp == 5.0.3
 sacremoses == 0.1.1
 simplemma == 0.9.1
 stanza == 1.7.0
-# Underthesea is incompatible with scikit-learn >= 1.5
-scikit-learn == 1.4.2
 underthesea == 6.8.0
 vaderSentiment == 3.3.2
 
@@ -57,7 +55,8 @@ beautifulsoup4
 lxml
 matplotlib
 networkx
-numpy
+## spaCy is incompatible with NumPy 2.0
+numpy == 1.26.4
 opencc-python-reimplemented
 openpyxl
 pypdf
@@ -66,6 +65,8 @@ pytest
 python-docx
 python-pptx
 requests
+# Underthesea is incompatible with scikit-learn 1.5
+scikit-learn == 1.4.2
 scipy
 wordcloud
 
diff --git a/tests/tests_file_area/test_file_area_file_types.py b/tests/tests_file_area/test_file_area_file_types.py
index 3ac2601d3..bb2a63353 100644
--- a/tests/tests_file_area/test_file_area_file_types.py
+++ b/tests/tests_file_area/test_file_area_file_types.py
@@ -191,7 +191,7 @@ def update_gui_file_types(err_msg, new_files):
                 assert tokens == [[[['<', 'bncDoc', 'xml', ':'], ['id=', "''", 'A00', "''", '>', '<', 'teiHeader', '>', '<', 'fileDesc', '>', '<', 'titleStmt', '>', '<', 'title', '>', '[', 'ACET', 'factsheets', '&', 'amp', ';'], ['newsletters', ']', '.']], [['Sample', 'containing', 'about', '6688', 'words', 'of', 'miscellanea', '(', 'domain', ':'], ['social', 'science', ')', '<', '/title', '>', '<', 'respStmt', '>', '<', 'resp', '>', 'Data', 'capture', 'and', 'transcription', '<', '/resp', '>', '<', 'name', '>', 'Oxford', 'University', 'Press', '<', '/name', '>', '<', '/respStmt', '>', '<', '/titleStmt', '>', '<', '/fileDesc', '>', '<', '/teiHeader', '>']]], [[['<', 'wtext', 'type=', "''", 'NONAC', "''", '>', '<', 'div', 'level=', "''", '1', "''", 'n=', "''", '1', "''", 'type=', "''", 'leaflet', "''", '>', '<', 'head', 'type=', "''", 'MAIN', "''", '>']]], [[['<', 's', 'n=', "''", '1', "''", '>', '<', 'w', 'c5=', "''", 'NN1', "''", 'hw=', "''", 'factsheet', "''", 'pos=', "''", 'SUBST', "''", '>', 'FACTSHEET', '<', '/w', '>', '<', 'w', 'c5=', "''", 'DTQ', "''", 'hw=', "''", 'what', "''", 'pos=', "''", 'PRON', "''", '>', 'WHAT', '<', '/w', '>', '<', 'w', 'c5=', "''", 'VBZ', "''", 'hw=', "''", 'be', "''", 'pos=', "''", 'VERB', "''", '>', 'IS', '<', '/w', '>', '<', 'w', 'c5=', "''", 'NN1', "''", 'hw=', "''", 'aids', "''", 'pos=', "''", 'SUBST', "''", '>', 'AIDS', '<', '/w', '>', '<', 'c', 'c5=', "''", 'PUN', "''", '>', '?'], ['<', '/c', '>', '<', '/s', '>', '<', '/head', '>', '<', 'p', '>']]], [[['<', 's', 'n=', "''", '2', "''", '>', '<', 'hi', 'rend=', "''", 'bo', "''", '>', '<', 'w', 'c5=', "''", 'NN1', "''", 'hw=', "''", 'aids', "''", 'pos=', "''", 'SUBST', "''", '>', 'AIDS', '<', '/w', '>', '<', 'c', 'c5=', "''", 'PUL', "''", '>', '(', '<', '/c', '>', '<', 'w', 'c5=', "''", 'VVN-AJ0', "''", 'hw=', "''", 'acquire', "''", 'pos=', "''", 'VERB', "''", '>', 'Acquired', '<', '/w', '>', '<', 'w', 'c5=', "''", 'AJ0', "''", 'hw=', "''", 'immune', "''", 'pos=', "''", 'ADJ', "''", '>', 'Immune', '<', '/w', '>', '<', 'w', 'c5=', "''", 'NN1', "''", 'hw=', "''", 'deficiency', "''", 'pos=', "''", 'SUBST', "''", '>', 'Deficiency', '<', '/w', '>', '<', 'w', 'c5=', "''", 'NN1', "''", 'hw=', "''", 'syndrome', "''", 'pos=', "''", 'SUBST', "''", '>', 'Syndrome', '<', '/w', '>', '<', 'c', 'c5=', "''", 'PUR', "''", '>', ')', '<', '/c', '>', '<', '/hi', '>', '<', 'w', 'c5=', "''", 'VBZ', "''", 'hw=', "''", 'be', "''", 'pos=', "''", 'VERB', "''", '>', 'is', '<', '/w', '>', '<', 'w', 'c5=', "''", 'AT0', "''", 'hw=', "''", 'a', "''", 'pos=', "''", 'ART', "''", '>', 'a', '<', '/w', '>', '<', 'w', 'c5=', "''", 'NN1', "''", 'hw=', "''", 'condition', "''", 'pos=', "''", 'SUBST', "''", '>', 'condition', '<', '/w', '>', '<', 'w', 'c5=', "''", 'VVN', "''", 'hw=', "''", 'cause', "''", 'pos=', "''", 'VERB', "''", '>', 'caused', '<', '/w', '>', '<', 'w', 'c5=', "''", 'PRP', "''", 'hw=', "''", 'by', "''", 'pos=', "''", 'PREP', "''", '>', 'by', '<', '/w', '>', '<', 'w', 'c5=', "''", 'AT0', "''", 'hw=', "''", 'a', "''", 'pos=', "''", 'ART', "''", '>', 'a', '<', '/w', '>', '<', 'w', 'c5=', "''", 'NN1', "''", 'hw=', "''", 'virus', "''", 'pos=', "''", 'SUBST', "''", '>', 'virus', '<', '/w', '>', '<', 'w', 'c5=', "''", 'VVN', "''", 'hw=', "''", 'call', "''", 'pos=', "''", 'VERB', "''", '>', 'called', '<', '/w', '>', '<', 'w', 'c5=', "''", 'NP0', "''", 'hw=', "''", 'hiv', "''", 'pos=', "''", 'SUBST', "''", '>', 'HIV', '<', '/w', '>', '<', 'c', 'c5=', "''", 'PUL', "''", '>', '(', '<', '/c', '>', '<', 'w', 'c5=', "''", 'AJ0-NN1', "''", 'hw=', "''", 'human', "''", 'pos=', "''", 'ADJ', "''", '>', 'Human', '<', '/w', '>', '<', 'w', 'c5=', "''", 'NN1', "''", 'hw=', "''", 'immuno', "''", 'pos=', "''", 'SUBST', "''", '>', 'Immuno', '<', '/w', '>', '<', 'w', 'c5=', "''", 'NN1', "''", 'hw=', "''", 'deficiency', "''", 'pos=', "''", 'SUBST', "''", '>', 'Deficiency', '<', '/w', '>', '<', 'w', 'c5=', "''", 'NN1', "''", 'hw=', "''", 'virus', "''", 'pos=', "''", 'SUBST', "''", '>', 'Virus', '<', '/w', '>', '<', 'c', 'c5=', "''", 'PUR', "''", '>', ')', '<', '/c', '>', '<', 'c', 'c5=', "''", 'PUN', "''", '>', '.'], ['<', '/c', '>', '<', '/s', '>']]], [[['<', 's', 'n=', "''", '3', "''", '>', '<', 'w', 'c5=', "''", 'DT0', "''", 'hw=', "''", 'this', "''", 'pos=', "''", 'ADJ', "''", '>', 'This', '<', '/w', '>', '<', 'w', 'c5=', "''", 'NN1', "''", 'hw=', "''", 'virus', "''", 'pos=', "''", 'SUBST', "''", '>', 'virus', '<', '/w', '>', '<', 'w', 'c5=', "''", 'VVZ', "''", 'hw=', "''", 'affect', "''", 'pos=', "''", 'VERB', "''", '>', 'affects', '<', '/w', '>', '<', 'w', 'c5=', "''", 'AT0', "''", 'hw=', "''", 'the', "''", 'pos=', "''", 'ART', "''", '>', 'the', '<', '/w', '>', '<', 'w', 'c5=', "''", 'NN1', "''", 'hw=', "''", 'body', "''", 'pos=', "''", 'SUBST', "''", '>', 'body', '<', '/w', '>', '<', 'w', 'c5=', "''", 'POS', "''", 'hw=', "''", "'s", "''", 'pos=', "''", 'UNC', "''", '>', "'s", '<', '/w', '>', '<', 'w', 'c5=', "''", 'NN1', "''", 'hw=', "''", 'defence', "''", 'pos=', "''", 'SUBST', "''", '>', 'defence', '<', '/w', '>', '<', 'w', 'c5=', "''", 'NN1', "''", 'hw=', "''", 'system', "''", 'pos=', "''", 'SUBST', "''", '>', 'system', '<', '/w', '>', '<', 'mw', 'c5=', "''", 'CJS', "''", '>', '<', 'w', 'c5=', "''", 'AV0', "''", 'hw=', "''", 'so', "''", 'pos=', "''", 'ADV', "''", '>', 'so', '<', '/w', '>', '<', 'w', 'c5=', "''", 'CJT', "''", 'hw=', "''", 'that', "''", 'pos=', "''", 'CONJ', "''", '>', 'that', '<', '/w', '>', '<', '/mw', '>', '<', 'w', 'c5=', "''", 'PNP', "''", 'hw=', "''", 'it', "''", 'pos=', "''", 'PRON', "''", '>', 'it', '<', '/w', '>', '<', 'w', 'c5=', "''", 'VM0', "''", 'hw=', "''", 'can', "''", 'pos=', "''", 'VERB', "''", '>', 'can', '<', '/w', '>', '<', 'w', 'c5=', "''", 'XX0', "''", 'hw=', "''", 'not', "''", 'pos=', "''", 'ADV', "''", '>', 'not', '<', '/w', '>', '<', 'w', 'c5=', "''", 'VVI', "''", 'hw=', "''", 'fight', "''", 'pos=', "''", 'VERB', "''", '>', 'fight', '<', '/w', '>', '<', 'w', 'c5=', "''", 'NN1', "''", 'hw=', "''", 'infection', "''", 'pos=', "''", 'SUBST', "''", '>', 'infection', '<', '/w', '>', '<', 'c', 'c5=', "''", 'PUN', "''", '>', '.'], ['<', '/c', '>', '<', '/s', '>', '<', '/p', '>']]]]
             # Tokenized & Untagged
             case 'xml (5).xml':
-                assert tokens == [[[['<bncDoc', 'xml:'], ['id="A00"><teiHeader><fileDesc><titleStmt><title>', '[ACET', 'factsheets', '&amp;'], ['newsletters].']], [['Sample', 'containing', 'about', '6688', 'words', 'of', 'miscellanea', '(domain:'], ['social', 'science)', '</title><respStmt><resp>', 'Data', 'capture', 'and', 'transcription', '</resp><name>', 'Oxford', 'University', 'Press', '</name>', '</respStmt></titleStmt></fileDesc></teiHeader>']]], [[['<wtext', 'type="NONAC"><div', 'level="1"', 'n="1"', 'type="leaflet"><head', 'type="MAIN">']]], [[['<s', 'n="1"><w', 'c5="NN1"', 'hw="factsheet"', 'pos="SUBST">FACTSHEET', '</w><w', 'c5="DTQ"', 'hw="what"', 'pos="PRON">WHAT', '</w><w', 'c5="VBZ"', 'hw="be"', 'pos="VERB">IS', '</w><w', 'c5="NN1"', 'hw="aids"', 'pos="SUBST">AIDS</w><c', 'c5="PUN">?']], [['</c></s></head><p>']]], [[['<s', 'n="2"><hi', 'rend="bo"><w', 'c5="NN1"', 'hw="aids"', 'pos="SUBST">AIDS', '</w><c', 'c5="PUL">(</c><w', 'c5="VVN-AJ0"', 'hw="acquire"', 'pos="VERB">Acquired', '</w><w', 'c5="AJ0"', 'hw="immune"', 'pos="ADJ">Immune', '</w><w', 'c5="NN1"', 'hw="deficiency"', 'pos="SUBST">Deficiency', '</w><w', 'c5="NN1"', 'hw="syndrome"', 'pos="SUBST">Syndrome</w><c', 'c5="PUR">)</c></hi><w', 'c5="VBZ"', 'hw="be"', 'pos="VERB">is', '</w><w', 'c5="AT0"', 'hw="a"', 'pos="ART">a', '</w><w', 'c5="NN1"', 'hw="condition"', 'pos="SUBST">condition', '</w><w', 'c5="VVN"', 'hw="cause"', 'pos="VERB">caused', '</w><w', 'c5="PRP"', 'hw="by"', 'pos="PREP">by', '</w><w', 'c5="AT0"', 'hw="a"', 'pos="ART">a', '</w><w', 'c5="NN1"', 'hw="virus"', 'pos="SUBST">virus', '</w><w', 'c5="VVN"', 'hw="call"', 'pos="VERB">called', '</w><w', 'c5="NP0"', 'hw="hiv"', 'pos="SUBST">HIV', '</w><c', 'c5="PUL">(</c><w', 'c5="AJ0-NN1"', 'hw="human"', 'pos="ADJ">Human', '</w><w', 'c5="NN1"', 'hw="immuno"', 'pos="SUBST">Immuno', '</w><w', 'c5="NN1"', 'hw="deficiency"', 'pos="SUBST">Deficiency', '</w><w', 'c5="NN1"', 'hw="virus"', 'pos="SUBST">Virus</w><c', 'c5="PUR">)</c><c', 'c5="PUN">.']], [['</c></s>']]], [[['<s', 'n="3"><w', 'c5="DT0"', 'hw="this"', 'pos="ADJ">This', '</w><w', 'c5="NN1"', 'hw="virus"', 'pos="SUBST">virus', '</w><w', 'c5="VVZ"', 'hw="affect"', 'pos="VERB">affects', '</w><w', 'c5="AT0"', 'hw="the"', 'pos="ART">the', '</w><w', 'c5="NN1"', 'hw="body"', 'pos="SUBST">body</w><w', 'c5="POS"', 'hw="\'s"', 'pos="UNC">\'s', '</w><w', 'c5="NN1"', 'hw="defence"', 'pos="SUBST">defence', '</w><w', 'c5="NN1"', 'hw="system"', 'pos="SUBST">system', '</w><mw', 'c5="CJS"><w', 'c5="AV0"', 'hw="so"', 'pos="ADV">so', '</w><w', 'c5="CJT"', 'hw="that"', 'pos="CONJ">that', '</w></mw><w', 'c5="PNP"', 'hw="it"', 'pos="PRON">it', '</w><w', 'c5="VM0"', 'hw="can"', 'pos="VERB">can</w><w', 'c5="XX0"', 'hw="not"', 'pos="ADV">not', '</w><w', 'c5="VVI"', 'hw="fight"', 'pos="VERB">fight', '</w><w', 'c5="NN1"', 'hw="infection"', 'pos="SUBST">infection</w><c', 'c5="PUN">.']], [['</c></s></p>']]]]
+                assert tokens == [[[['<bncDoc', 'xml:id="A00"><teiHeader><fileDesc><titleStmt><title>', '[ACET', 'factsheets', '&amp;'], ['newsletters].']], [['Sample', 'containing', 'about', '6688', 'words', 'of', 'miscellanea', '(domain:'], ['social', 'science)', '</title><respStmt><resp>', 'Data', 'capture', 'and', 'transcription', '</resp><name>', 'Oxford', 'University', 'Press', '</name>', '</respStmt></titleStmt></fileDesc></teiHeader>']]], [[['<wtext', 'type="NONAC"><div', 'level="1"', 'n="1"', 'type="leaflet"><head', 'type="MAIN">']]], [[['<s', 'n="1"><w', 'c5="NN1"', 'hw="factsheet"', 'pos="SUBST">FACTSHEET', '</w><w', 'c5="DTQ"', 'hw="what"', 'pos="PRON">WHAT', '</w><w', 'c5="VBZ"', 'hw="be"', 'pos="VERB">IS', '</w><w', 'c5="NN1"', 'hw="aids"', 'pos="SUBST">AIDS</w><c', 'c5="PUN">?</c></s></head><p>']]], [[['<s', 'n="2"><hi', 'rend="bo"><w', 'c5="NN1"', 'hw="aids"', 'pos="SUBST">AIDS', '</w><c', 'c5="PUL">(</c><w', 'c5="VVN-AJ0"', 'hw="acquire"', 'pos="VERB">Acquired', '</w><w', 'c5="AJ0"', 'hw="immune"', 'pos="ADJ">Immune', '</w><w', 'c5="NN1"', 'hw="deficiency"', 'pos="SUBST">Deficiency', '</w><w', 'c5="NN1"', 'hw="syndrome"', 'pos="SUBST">Syndrome</w><c', 'c5="PUR">)</c></hi><w', 'c5="VBZ"', 'hw="be"', 'pos="VERB">is', '</w><w', 'c5="AT0"', 'hw="a"', 'pos="ART">a', '</w><w', 'c5="NN1"', 'hw="condition"', 'pos="SUBST">condition', '</w><w', 'c5="VVN"', 'hw="cause"', 'pos="VERB">caused', '</w><w', 'c5="PRP"', 'hw="by"', 'pos="PREP">by', '</w><w', 'c5="AT0"', 'hw="a"', 'pos="ART">a', '</w><w', 'c5="NN1"', 'hw="virus"', 'pos="SUBST">virus', '</w><w', 'c5="VVN"', 'hw="call"', 'pos="VERB">called', '</w><w', 'c5="NP0"', 'hw="hiv"', 'pos="SUBST">HIV', '</w><c', 'c5="PUL">(</c><w', 'c5="AJ0-NN1"', 'hw="human"', 'pos="ADJ">Human', '</w><w', 'c5="NN1"', 'hw="immuno"', 'pos="SUBST">Immuno', '</w><w', 'c5="NN1"', 'hw="deficiency"', 'pos="SUBST">Deficiency', '</w><w', 'c5="NN1"', 'hw="virus"', 'pos="SUBST">Virus</w><c', 'c5="PUR">)</c><c', 'c5="PUN">.</c></s>']]], [[['<s', 'n="3"><w', 'c5="DT0"', 'hw="this"', 'pos="ADJ">This', '</w><w', 'c5="NN1"', 'hw="virus"', 'pos="SUBST">virus', '</w><w', 'c5="VVZ"', 'hw="affect"', 'pos="VERB">affects', '</w><w', 'c5="AT0"', 'hw="the"', 'pos="ART">the', '</w><w', 'c5="NN1"', 'hw="body"', 'pos="SUBST">body</w><w', 'c5="POS"', 'hw="\'s"', 'pos="UNC">\'s', '</w><w', 'c5="NN1"', 'hw="defence"', 'pos="SUBST">defence', '</w><w', 'c5="NN1"', 'hw="system"', 'pos="SUBST">system', '</w><mw', 'c5="CJS"><w', 'c5="AV0"', 'hw="so"', 'pos="ADV">so', '</w><w', 'c5="CJT"', 'hw="that"', 'pos="CONJ">that', '</w></mw><w', 'c5="PNP"', 'hw="it"', 'pos="PRON">it', '</w><w', 'c5="VM0"', 'hw="can"', 'pos="VERB">can</w><w', 'c5="XX0"', 'hw="not"', 'pos="ADV">not', '</w><w', 'c5="VVI"', 'hw="fight"', 'pos="VERB">fight', '</w><w', 'c5="NN1"', 'hw="infection"', 'pos="SUBST">infection</w><c', 'c5="PUN">.</c></s></p>']]]]
 
         assert tags == [None] * file_text.num_tokens
     # TMX files