Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion TeXmacs/plugins/autosave/progs/init-autosave.scm
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
(with u
(pre-serialize lan t)
(with s
(texmacs->code (stree->tree u) "SourceCode")
(texmacs->code (stree->tree u) "utf-8")
(string-append s "\n<EOF>\n")
) ;with
) ;with
Expand Down
2 changes: 1 addition & 1 deletion TeXmacs/plugins/goldfish/progs/init-goldfish.scm
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

(define (goldfish-serialize lan t)
(let* ((u (pre-serialize lan t))
(s (texmacs->code (stree->tree u) "SourceCode")))
(s (texmacs->code (stree->tree u) "utf-8")))
(string-append s "\n<EOF>\n")))

(define (goldfish-launch mode)
Expand Down
2 changes: 1 addition & 1 deletion TeXmacs/plugins/julia/julia/MoganJulia.jl
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ function do_tab_complete(cmd::AbstractString)
if isa(arg1,AbstractString) && isa(arg2,Integer)
ret,range,shouldcomplete = completions(arg1,arg2)
compls = join(unique!(map(x -> "\"$(completion_text(x)[range.stop+2-range.start:end])\"",ret))," ")
tm_out("scheme:", "(tuple \"$(arg1[range])\" $(compls))")
tm_out("scheme_u8:", "(tuple \"$(arg1[range])\" $(compls))")
end
catch e
# ignore errors
Expand Down
2 changes: 1 addition & 1 deletion TeXmacs/plugins/julia/progs/init-julia.scm
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

(define (julia-serialize lan t)
(let* ((u (pre-serialize lan t))
(s (texmacs->code (stree->tree u) "SourceCode")))
(s (texmacs->code (stree->tree u) "utf-8")))
(string-append s "\n<EOF>\n")))

(define (julia-entry)
Expand Down
2 changes: 1 addition & 1 deletion TeXmacs/plugins/python/progs/init-python.scm
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@

(define (python-serialize lan t)
(with u (pre-serialize lan t)
(with s (texmacs->code (stree->tree u) "SourceCode")
(with s (texmacs->code (stree->tree u) "utf-8")
(string-append s "\n<EOF>\n"))))

(define (python-utf8-command)
Expand Down
2 changes: 1 addition & 1 deletion TeXmacs/plugins/quiver/progs/init-quiver.scm
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

(define (quiver-serialize lan t)
(let* ((u (pre-serialize lan t))
(s (texmacs->code (stree->tree u) "SourceCode")))
(s (texmacs->code (stree->tree u) "utf-8")))
(string-append s "\n<EOF>\n")
)
)
Expand Down
2 changes: 1 addition & 1 deletion TeXmacs/plugins/tikz/progs/init-tikz.scm
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

(define (tikz-serialize lan t)
(let* ((u (pre-serialize lan t))
(s (texmacs->code (stree->tree u) "SourceCode")))
(s (texmacs->code (stree->tree u) "utf-8")))
(string-append s "\n<EOF>\n")
)
)
Expand Down
87 changes: 87 additions & 0 deletions devel/0803.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
# [0803] 将插件中编码协议从SourceCode改为UTF-8

## 1 相关文档
- [0801.md](0801.md) — 集成 Julia 交互式会话插件

## 2 任务相关的代码文件
- `src/Data/Convert/Verbatim/verbatim.cpp` — 统一输入端 `"auto"` 映射至 Locale(UTF-8),修复编解码不对称
- `tests/Data/String/converter_test.cpp` — 扩展 `test_verbatim_to_tree_auto` 单元测试,补充多行 CJK 边界覆盖
- `TeXmacs/plugins/julia/julia/MoganJulia.jl` — 升级自动补全通道为 UTF-8 的 `"scheme_u8:"`,消灭乱码
- `TeXmacs/plugins/julia/progs/init-julia.scm` — 将序列化编码由 `"SourceCode"` 变更为 `"utf-8"`
- `TeXmacs/plugins/python/progs/init-python.scm` — 将序列化编码由 `"SourceCode"` 变更为 `"utf-8"`
- `TeXmacs/plugins/goldfish/progs/init-goldfish.scm` — 将序列化编码由 `"SourceCode"` 变更为 `"utf-8"`
- `TeXmacs/plugins/autosave/progs/init-autosave.scm` — 将序列化编码由 `"SourceCode"` 变更为 `"utf-8"`
- `TeXmacs/plugins/quiver/progs/init-quiver.scm` — 将序列化编码由 `"SourceCode"` 变更为 `"utf-8"`
- `TeXmacs/plugins/tikz/progs/init-tikz.scm` — 将序列化编码由 `"SourceCode"` 变更为 `"utf-8"`

## 3 如何测试

### 3.1 确定性测试(单元测试)
```bash
xmake b converter_test && xmake r converter_test
```

### 3.2 非确定性测试(文档验证)
进入插件会话,新建Python、Julia或Goldfish会话,输入中文,不会出现乱码(验证本次协议迁移不会带来字体解析问题)

### 3.3 基准测试(量化优化效率)
```python
import re
import time

# 1. 模拟 1000 个中文字符的文本进行备份或输出
text_utf8 = "南方科技大学(简称:南科大)是深圳在中国高等教育改革发展的时代背景下创建的一所高起点、高定位的公办新型研究型大学。2022年2月14日,教育部等三部委公布第二轮“双一流”建设高校及建设学科名单,南方科技大学及数学学科入选“双一流”建设高校及建设学科名单。学校借鉴世界一流理工科大学的学科设置和办学模式,以理、工、医为主,兼具商科和特色人文社科的学科体系,在本科、硕士、博士层次办学,在一系列新的学科方向上开展研究,使学校成为引领社会发展的思想库和新知识、新技术的源泉。\n南方科技大学扎根中国大地,紧抓粤港澳大湾区、深圳先行示范区“双区”驱动,深圳经济特区、深圳先行示范区“双区”叠加的历史机遇,发扬“敢闯敢试、求真务实、改革创新、追求卓越”的创校精神,突出“创知、创新、创业”的办学特色,践行“明德求是、日新自强”的校训精神,努力服务创新型国家建设及深圳国际化现代化创新型城市建设,快速建设成为聚集一流师资、培养拔尖创新人才、创造国际一流学术成果并推动科技应用的国际化高水平研究型大学,为尽早实现建成世界一流研究型大学的宏伟目标打下坚实基础。\n" * 2
# 转为旧版 SourceCode 格式 (Cork 编码转义)
text_sourcecode = re.sub(r"[^\x00-\x7F]", lambda m: f"<#{hex(ord(m.group(0)))[2:].upper()}>", text_utf8)

print(text_utf8)
print(text_sourcecode)
print("\n")

# 计算内存 / 传输带宽节省
bytes_utf8 = len(text_utf8.encode("utf-8"))
bytes_sourcecode = len(text_sourcecode.encode("ascii"))
saved_bytes = bytes_sourcecode - bytes_utf8
savings_ratio = (saved_bytes / bytes_sourcecode) * 100

print(f"=== 空间 / 传输带宽优化 (以 1000 个中文汉字/标点为例) ===")
print(f"旧版 SourceCode 协议体积: {bytes_sourcecode} 字节")
print(f"新版 UTF-8 协议体积: {bytes_utf8} 字节")
print(f"网络/管道带宽节省: {saved_bytes} 字节")
print(f"传输体积减少率: {savings_ratio:.2f}%\n")

# 2. 模拟插件端(Python)接收 10000 次此数据时的反向解析 CPU 开销
iterations = 10000

# 旧版需要用正则循环翻译 '<#...>'
start_old = time.perf_counter()
pattern = re.compile(r"<#([0-9A-F]+)>")
for _ in range(iterations):
decoded = pattern.sub(lambda m: chr(int(m.group(1), 16)), text_sourcecode)
end_old = time.perf_counter()
time_old = (end_old - start_old) * 1000

# 新版直接接收,0 解码开销
start_new = time.perf_counter()
for _ in range(iterations):
decoded = text_utf8 # 直接读取,无需任何操作
end_new = time.perf_counter()
time_new = (end_new - start_new) * 1000

print(f"=== 插件端 CPU 解析开销优化 (执行 {iterations} 次) ===")
print(f"旧版插件端正则解析耗时: {time_old:.2f} 毫秒")
print(f"新版插件端原生接收耗时: {time_new:.2f} 毫秒 (0 额外解析开销)")
print(f"插件端 CPU 计算耗时降低: {(time_old - time_new):.2f} 毫秒 (性能提升 {time_old/time_new:.1f} 倍)")
```

## 4 What
- 更改插件系统(Python、Julia、Goldfish、Autosave、Quiver、TikZ)的字符编码协议为 **UTF-8**
- 保持 Mogan 编辑器内部“母语”仍为 **Cork** 编码以保证渲染与排版树稳定

## 5 Why
旧版使用 `SourceCode`(Cork转义ASCII)传输数据,内存带宽高且效率低下

## 6 How
1. 修正 `verbatim.cpp` 的 `encode` 映射,统一输入输出端 `"auto"` 行为。在输入端通过 `get_locale_charset ()` 降级为 `"UTF-8"` 匹配 `utf8_to_cork`
2. 更替 6 个插件 init 配置,将序列化编码由 `"SourceCode"` 变更为 `"utf-8"`
3. 将 `MoganJulia.jl` 的自动补全格式从 `"scheme:"` 通道更替为具有 Scheme 树级 UTF-8 还原能力的 `"scheme_u8:"` 接口
4 changes: 2 additions & 2 deletions src/Data/Convert/Verbatim/verbatim.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -295,8 +295,8 @@ un_special (string s) {

static string
encode (string s, string enc) {
if (enc == "auto") return western_to_cork (s);
else if (enc == "utf-8") return utf8_to_cork (s);
if (enc == "auto") enc= get_locale_charset ();
if (enc == "utf-8" || enc == "UTF-8") return utf8_to_cork (s);
else if (enc == "iso-8859-1") return tm_encode (s);
else if (enc == "SourceCode") return sourcecode_to_cork (s);
Comment on lines 297 to 301
else return tm_encode (s);
Expand Down
15 changes: 15 additions & 0 deletions tests/Data/String/converter_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <QtTest/QtTest>

#include "base.hpp"
#include "convert.hpp"
#include "converter.hpp"
#include "file.hpp"

Expand All @@ -21,6 +22,7 @@ private slots:
void init () { init_lolly (); };
void test_utf8_to_cork ();
void test_cork_to_utf8 ();
void test_verbatim_to_tree_auto ();
};

void
Expand All @@ -37,5 +39,18 @@ TestConverter::test_cork_to_utf8 () {
qcompare (cork_to_utf8 ("\x11"), "”");
}

void
TestConverter::test_verbatim_to_tree_auto () {
// 单个中文字符
tree t1= verbatim_to_tree ("中", false, "auto");
qcompare (as_string (t1), "<#4E2D>");

// 多行中文字符以及特殊标点符号
tree t2= verbatim_to_tree ("你好\n世界!", false, "auto");
qcompare (N (t2), 2);
qcompare (as_string (t2[0]), "<#4F60><#597D>");
qcompare (as_string (t2[1]), "<#4E16><#754C>!");
}
Comment on lines +42 to +53

QTEST_MAIN (TestConverter)
#include "converter_test.moc"
Loading