From c7525c8901f1c97ced078ebf384b03b126064178 Mon Sep 17 00:00:00 2001 From: AXeonV <2607343351@qq.com> Date: Wed, 17 Jun 2026 12:49:31 +0800 Subject: [PATCH 1/4] fix auto enc handling --- src/Data/Convert/Verbatim/verbatim.cpp | 4 ++-- tests/Data/String/converter_test.cpp | 8 ++++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/Data/Convert/Verbatim/verbatim.cpp b/src/Data/Convert/Verbatim/verbatim.cpp index 167f4de815..b47f2060cd 100644 --- a/src/Data/Convert/Verbatim/verbatim.cpp +++ b/src/Data/Convert/Verbatim/verbatim.cpp @@ -295,8 +295,8 @@ un_special (string s) { static string encode (string s, string enc) { - if (enc == "auto") return western_to_cork (s); - else if (enc == "utf-8") return utf8_to_cork (s); + if (enc == "auto") enc= get_locale_charset (); + if (enc == "utf-8" || enc == "UTF-8") return utf8_to_cork (s); else if (enc == "iso-8859-1") return tm_encode (s); else if (enc == "SourceCode") return sourcecode_to_cork (s); else return tm_encode (s); diff --git a/tests/Data/String/converter_test.cpp b/tests/Data/String/converter_test.cpp index 08e509a5e6..7864889632 100644 --- a/tests/Data/String/converter_test.cpp +++ b/tests/Data/String/converter_test.cpp @@ -12,6 +12,7 @@ #include "base.hpp" #include "converter.hpp" +#include "convert.hpp" #include "file.hpp" class TestConverter : public QObject { @@ -21,6 +22,7 @@ private slots: void init () { init_lolly (); }; void test_utf8_to_cork (); void test_cork_to_utf8 (); + void test_verbatim_to_tree_auto (); }; void @@ -37,5 +39,11 @@ TestConverter::test_cork_to_utf8 () { qcompare (cork_to_utf8 ("\x11"), "”"); } +void +TestConverter::test_verbatim_to_tree_auto () { + tree t = verbatim_to_tree ("中", false, "auto"); + qcompare (as_string (t), "<#4E2D>"); +} + QTEST_MAIN (TestConverter) #include "converter_test.moc" From a33b092c1d9595e196fffe90a629ff2604ae3370 Mon Sep 17 00:00:00 2001 From: AXeonV <2607343351@qq.com> Date: Wed, 17 Jun 2026 14:14:41 +0800 Subject: [PATCH 2/4] move protocol from SourceCode to UTF-8 --- TeXmacs/plugins/autosave/progs/init-autosave.scm | 2 +- TeXmacs/plugins/goldfish/progs/init-goldfish.scm | 2 +- TeXmacs/plugins/julia/julia/MoganJulia.jl | 2 +- TeXmacs/plugins/julia/progs/init-julia.scm | 2 +- TeXmacs/plugins/python/progs/init-python.scm | 2 +- TeXmacs/plugins/quiver/progs/init-quiver.scm | 2 +- TeXmacs/plugins/tikz/progs/init-tikz.scm | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/TeXmacs/plugins/autosave/progs/init-autosave.scm b/TeXmacs/plugins/autosave/progs/init-autosave.scm index 22767fd809..c60d2d8a8a 100644 --- a/TeXmacs/plugins/autosave/progs/init-autosave.scm +++ b/TeXmacs/plugins/autosave/progs/init-autosave.scm @@ -19,7 +19,7 @@ (with u (pre-serialize lan t) (with s - (texmacs->code (stree->tree u) "SourceCode") + (texmacs->code (stree->tree u) "utf-8") (string-append s "\n\n") ) ;with ) ;with diff --git a/TeXmacs/plugins/goldfish/progs/init-goldfish.scm b/TeXmacs/plugins/goldfish/progs/init-goldfish.scm index c02f355135..30a0953ab0 100644 --- a/TeXmacs/plugins/goldfish/progs/init-goldfish.scm +++ b/TeXmacs/plugins/goldfish/progs/init-goldfish.scm @@ -17,7 +17,7 @@ (define (goldfish-serialize lan t) (let* ((u (pre-serialize lan t)) - (s (texmacs->code (stree->tree u) "SourceCode"))) + (s (texmacs->code (stree->tree u) "utf-8"))) (string-append s "\n\n"))) (define (goldfish-launch mode) diff --git a/TeXmacs/plugins/julia/julia/MoganJulia.jl b/TeXmacs/plugins/julia/julia/MoganJulia.jl index 188f24a48a..bc24b0102f 100644 --- a/TeXmacs/plugins/julia/julia/MoganJulia.jl +++ b/TeXmacs/plugins/julia/julia/MoganJulia.jl @@ -267,7 +267,7 @@ function do_tab_complete(cmd::AbstractString) if isa(arg1,AbstractString) && isa(arg2,Integer) ret,range,shouldcomplete = completions(arg1,arg2) compls = join(unique!(map(x -> "\"$(completion_text(x)[range.stop+2-range.start:end])\"",ret))," ") - tm_out("scheme:", "(tuple \"$(arg1[range])\" $(compls))") + tm_out("scheme_u8:", "(tuple \"$(arg1[range])\" $(compls))") end catch e # ignore errors diff --git a/TeXmacs/plugins/julia/progs/init-julia.scm b/TeXmacs/plugins/julia/progs/init-julia.scm index 885d705bdc..1d80dd0e63 100644 --- a/TeXmacs/plugins/julia/progs/init-julia.scm +++ b/TeXmacs/plugins/julia/progs/init-julia.scm @@ -15,7 +15,7 @@ (define (julia-serialize lan t) (let* ((u (pre-serialize lan t)) - (s (texmacs->code (stree->tree u) "SourceCode"))) + (s (texmacs->code (stree->tree u) "utf-8"))) (string-append s "\n\n"))) (define (julia-entry) diff --git a/TeXmacs/plugins/python/progs/init-python.scm b/TeXmacs/plugins/python/progs/init-python.scm index 21db55ef0b..da595fa48a 100644 --- a/TeXmacs/plugins/python/progs/init-python.scm +++ b/TeXmacs/plugins/python/progs/init-python.scm @@ -34,7 +34,7 @@ (define (python-serialize lan t) (with u (pre-serialize lan t) - (with s (texmacs->code (stree->tree u) "SourceCode") + (with s (texmacs->code (stree->tree u) "utf-8") (string-append s "\n\n")))) (define (python-utf8-command) diff --git a/TeXmacs/plugins/quiver/progs/init-quiver.scm b/TeXmacs/plugins/quiver/progs/init-quiver.scm index ca9618ddb2..f99d954f6e 100644 --- a/TeXmacs/plugins/quiver/progs/init-quiver.scm +++ b/TeXmacs/plugins/quiver/progs/init-quiver.scm @@ -18,7 +18,7 @@ (define (quiver-serialize lan t) (let* ((u (pre-serialize lan t)) - (s (texmacs->code (stree->tree u) "SourceCode"))) + (s (texmacs->code (stree->tree u) "utf-8"))) (string-append s "\n\n") ) ) diff --git a/TeXmacs/plugins/tikz/progs/init-tikz.scm b/TeXmacs/plugins/tikz/progs/init-tikz.scm index 13909100d7..66fc3d07df 100644 --- a/TeXmacs/plugins/tikz/progs/init-tikz.scm +++ b/TeXmacs/plugins/tikz/progs/init-tikz.scm @@ -20,7 +20,7 @@ (define (tikz-serialize lan t) (let* ((u (pre-serialize lan t)) - (s (texmacs->code (stree->tree u) "SourceCode"))) + (s (texmacs->code (stree->tree u) "utf-8"))) (string-append s "\n\n") ) ) From f0c50cf0cce66d3d3c05fbebd0152dea3781bad2 Mon Sep 17 00:00:00 2001 From: AXeonV <2607343351@qq.com> Date: Wed, 17 Jun 2026 15:16:21 +0800 Subject: [PATCH 3/4] add devel and tests --- devel/0803.md | 87 ++++++++++++++++++++++++++++ tests/Data/String/converter_test.cpp | 11 +++- 2 files changed, 96 insertions(+), 2 deletions(-) create mode 100644 devel/0803.md diff --git a/devel/0803.md b/devel/0803.md new file mode 100644 index 0000000000..7c44b98379 --- /dev/null +++ b/devel/0803.md @@ -0,0 +1,87 @@ +# [0803] 将插件中编码协议从SourceCode改为UTF-8 + +## 1 相关文档 +- [0801.md](0801.md) — 集成 Julia 交互式会话插件 + +## 2 任务相关的代码文件 +- `src/Data/Convert/Verbatim/verbatim.cpp` — 统一输入端 `"auto"` 映射至 Locale(UTF-8),修复编解码不对称 +- `tests/Data/String/converter_test.cpp` — 扩展 `test_verbatim_to_tree_auto` 单元测试,补充多行 CJK 边界覆盖 +- `TeXmacs/plugins/julia/julia/MoganJulia.jl` — 升级自动补全通道为 UTF-8 的 `"scheme_u8:"`,消灭乱码 +- `TeXmacs/plugins/julia/progs/init-julia.scm` — 将序列化编码由 `"SourceCode"` 变更为 `"utf-8"` +- `TeXmacs/plugins/python/progs/init-python.scm` — 将序列化编码由 `"SourceCode"` 变更为 `"utf-8"` +- `TeXmacs/plugins/goldfish/progs/init-goldfish.scm` — 将序列化编码由 `"SourceCode"` 变更为 `"utf-8"` +- `TeXmacs/plugins/autosave/progs/init-autosave.scm` — 将序列化编码由 `"SourceCode"` 变更为 `"utf-8"` +- `TeXmacs/plugins/quiver/progs/init-quiver.scm` — 将序列化编码由 `"SourceCode"` 变更为 `"utf-8"` +- `TeXmacs/plugins/tikz/progs/init-tikz.scm` — 将序列化编码由 `"SourceCode"` 变更为 `"utf-8"` + +## 3 如何测试 + +### 3.1 确定性测试(单元测试) +```bash +xmake b converter_test && xmake r converter_test +``` + +### 3.2 非确定性测试(文档验证) +进入插件会话,新建Python、Julia或Goldfish会话,输入中文,不会出现乱码(验证本次协议迁移不会带来字体解析问题) + +### 3.3 基准测试(量化优化效率) +```python +import re +import time + +# 1. 模拟 1000 个中文字符的文本进行备份或输出 +text_utf8 = "南方科技大学(简称:南科大)是深圳在中国高等教育改革发展的时代背景下创建的一所高起点、高定位的公办新型研究型大学。2022年2月14日,教育部等三部委公布第二轮“双一流”建设高校及建设学科名单,南方科技大学及数学学科入选“双一流”建设高校及建设学科名单。学校借鉴世界一流理工科大学的学科设置和办学模式,以理、工、医为主,兼具商科和特色人文社科的学科体系,在本科、硕士、博士层次办学,在一系列新的学科方向上开展研究,使学校成为引领社会发展的思想库和新知识、新技术的源泉。\n南方科技大学扎根中国大地,紧抓粤港澳大湾区、深圳先行示范区“双区”驱动,深圳经济特区、深圳先行示范区“双区”叠加的历史机遇,发扬“敢闯敢试、求真务实、改革创新、追求卓越”的创校精神,突出“创知、创新、创业”的办学特色,践行“明德求是、日新自强”的校训精神,努力服务创新型国家建设及深圳国际化现代化创新型城市建设,快速建设成为聚集一流师资、培养拔尖创新人才、创造国际一流学术成果并推动科技应用的国际化高水平研究型大学,为尽早实现建成世界一流研究型大学的宏伟目标打下坚实基础。\n" * 2 +# 转为旧版 SourceCode 格式 (Cork 编码转义) +text_sourcecode = re.sub(r"[^\x00-\x7F]", lambda m: f"<#{hex(ord(m.group(0)))[2:].upper()}>", text_utf8) + +print(text_utf8) +print(text_sourcecode) +print("\n") + +# 计算内存 / 传输带宽节省 +bytes_utf8 = len(text_utf8.encode("utf-8")) +bytes_sourcecode = len(text_sourcecode.encode("ascii")) +saved_bytes = bytes_sourcecode - bytes_utf8 +savings_ratio = (saved_bytes / bytes_sourcecode) * 100 + +print(f"=== 空间 / 传输带宽优化 (以 1000 个中文汉字/标点为例) ===") +print(f"旧版 SourceCode 协议体积: {bytes_sourcecode} 字节") +print(f"新版 UTF-8 协议体积: {bytes_utf8} 字节") +print(f"网络/管道带宽节省: {saved_bytes} 字节") +print(f"传输体积减少率: {savings_ratio:.2f}%\n") + +# 2. 模拟插件端(Python)接收 10000 次此数据时的反向解析 CPU 开销 +iterations = 10000 + +# 旧版需要用正则循环翻译 '<#...>' +start_old = time.perf_counter() +pattern = re.compile(r"<#([0-9A-F]+)>") +for _ in range(iterations): + decoded = pattern.sub(lambda m: chr(int(m.group(1), 16)), text_sourcecode) +end_old = time.perf_counter() +time_old = (end_old - start_old) * 1000 + +# 新版直接接收,0 解码开销 +start_new = time.perf_counter() +for _ in range(iterations): + decoded = text_utf8 # 直接读取,无需任何操作 +end_new = time.perf_counter() +time_new = (end_new - start_new) * 1000 + +print(f"=== 插件端 CPU 解析开销优化 (执行 {iterations} 次) ===") +print(f"旧版插件端正则解析耗时: {time_old:.2f} 毫秒") +print(f"新版插件端原生接收耗时: {time_new:.2f} 毫秒 (0 额外解析开销)") +print(f"插件端 CPU 计算耗时降低: {(time_old - time_new):.2f} 毫秒 (性能提升 {time_old/time_new:.1f} 倍)") +``` + +## 4 What +- 更改插件系统(Python、Julia、Goldfish、Autosave、Quiver、TikZ)的字符编码协议为 **UTF-8** +- 保持 Mogan 编辑器内部“母语”仍为 **Cork** 编码以保证渲染与排版树稳定 + +## 5 Why +旧版使用 `SourceCode`(Cork转义ASCII)传输数据,内存带宽高且效率低下 + +## 6 How +1. 修正 `verbatim.cpp` 的 `encode` 映射,统一输入输出端 `"auto"` 行为。在输入端通过 `get_locale_charset ()` 降级为 `"UTF-8"` 匹配 `utf8_to_cork` +2. 更替 6 个插件 init 配置,将序列化编码由 `"SourceCode"` 变更为 `"utf-8"` +3. 将 `MoganJulia.jl` 的自动补全格式从 `"scheme:"` 通道更替为具有 Scheme 树级 UTF-8 还原能力的 `"scheme_u8:"` 接口 diff --git a/tests/Data/String/converter_test.cpp b/tests/Data/String/converter_test.cpp index 7864889632..9e19f79443 100644 --- a/tests/Data/String/converter_test.cpp +++ b/tests/Data/String/converter_test.cpp @@ -41,8 +41,15 @@ TestConverter::test_cork_to_utf8 () { void TestConverter::test_verbatim_to_tree_auto () { - tree t = verbatim_to_tree ("中", false, "auto"); - qcompare (as_string (t), "<#4E2D>"); + // 单个中文字符 + tree t1 = verbatim_to_tree ("中", false, "auto"); + qcompare (as_string (t1), "<#4E2D>"); + + // 多行中文字符以及特殊标点符号 + tree t2 = verbatim_to_tree ("你好\n世界!", false, "auto"); + qcompare (N (t2), 2); + qcompare (as_string (t2[0]), "<#4F60><#597D>"); + qcompare (as_string (t2[1]), "<#4E16><#754C>!"); } QTEST_MAIN (TestConverter) From efdce6c7add932195afbce36bf710cfd86b24639 Mon Sep 17 00:00:00 2001 From: AXeonV <2607343351@qq.com> Date: Wed, 17 Jun 2026 15:29:31 +0800 Subject: [PATCH 4/4] fix format --- tests/Data/String/converter_test.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/Data/String/converter_test.cpp b/tests/Data/String/converter_test.cpp index 9e19f79443..0522233042 100644 --- a/tests/Data/String/converter_test.cpp +++ b/tests/Data/String/converter_test.cpp @@ -11,8 +11,8 @@ #include #include "base.hpp" -#include "converter.hpp" #include "convert.hpp" +#include "converter.hpp" #include "file.hpp" class TestConverter : public QObject { @@ -42,11 +42,11 @@ TestConverter::test_cork_to_utf8 () { void TestConverter::test_verbatim_to_tree_auto () { // 单个中文字符 - tree t1 = verbatim_to_tree ("中", false, "auto"); + tree t1= verbatim_to_tree ("中", false, "auto"); qcompare (as_string (t1), "<#4E2D>"); // 多行中文字符以及特殊标点符号 - tree t2 = verbatim_to_tree ("你好\n世界!", false, "auto"); + tree t2= verbatim_to_tree ("你好\n世界!", false, "auto"); qcompare (N (t2), 2); qcompare (as_string (t2[0]), "<#4F60><#597D>"); qcompare (as_string (t2[1]), "<#4E16><#754C>!");