Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion TeXmacs/plugins/autosave/progs/init-autosave.scm
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
(with u
(pre-serialize lan t)
(with s
(texmacs->code (stree->tree u) "SourceCode")
(texmacs->code (stree->tree u) "utf-8")
(string-append s "\n<EOF>\n")
) ;with
) ;with
Expand Down
2 changes: 1 addition & 1 deletion TeXmacs/plugins/goldfish/progs/init-goldfish.scm
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

(define (goldfish-serialize lan t)
(let* ((u (pre-serialize lan t))
(s (texmacs->code (stree->tree u) "SourceCode")))
(s (texmacs->code (stree->tree u) "utf-8")))
(string-append s "\n<EOF>\n")))

(define (goldfish-launch mode)
Expand Down
2 changes: 1 addition & 1 deletion TeXmacs/plugins/julia/julia/MoganJulia.jl
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ function do_tab_complete(cmd::AbstractString)
if isa(arg1,AbstractString) && isa(arg2,Integer)
ret,range,shouldcomplete = completions(arg1,arg2)
compls = join(unique!(map(x -> "\"$(completion_text(x)[range.stop+2-range.start:end])\"",ret))," ")
tm_out("scheme:", "(tuple \"$(arg1[range])\" $(compls))")
tm_out("scheme_u8:", "(tuple \"$(arg1[range])\" $(compls))")
end
catch e
# ignore errors
Expand Down
2 changes: 1 addition & 1 deletion TeXmacs/plugins/julia/progs/init-julia.scm
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

(define (julia-serialize lan t)
(let* ((u (pre-serialize lan t))
(s (texmacs->code (stree->tree u) "SourceCode")))
(s (texmacs->code (stree->tree u) "utf-8")))
(string-append s "\n<EOF>\n")))

(define (julia-entry)
Expand Down
2 changes: 1 addition & 1 deletion TeXmacs/plugins/python/progs/init-python.scm
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@

(define (python-serialize lan t)
(with u (pre-serialize lan t)
(with s (texmacs->code (stree->tree u) "SourceCode")
(with s (texmacs->code (stree->tree u) "utf-8")
(string-append s "\n<EOF>\n"))))

(define (python-utf8-command)
Expand Down
2 changes: 1 addition & 1 deletion TeXmacs/plugins/quiver/progs/init-quiver.scm
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

(define (quiver-serialize lan t)
(let* ((u (pre-serialize lan t))
(s (texmacs->code (stree->tree u) "SourceCode")))
(s (texmacs->code (stree->tree u) "utf-8")))
(string-append s "\n<EOF>\n")
)
)
Expand Down
2 changes: 1 addition & 1 deletion TeXmacs/plugins/tikz/progs/init-tikz.scm
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

(define (tikz-serialize lan t)
(let* ((u (pre-serialize lan t))
(s (texmacs->code (stree->tree u) "SourceCode")))
(s (texmacs->code (stree->tree u) "utf-8")))
(string-append s "\n<EOF>\n")
)
)
Expand Down
87 changes: 87 additions & 0 deletions devel/0803.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
# [0803] 将插件中编码协议从SourceCode改为UTF-8

## 1 相关文档
- [0801.md](0801.md) — 集成 Julia 交互式会话插件

## 2 任务相关的代码文件
- `src/Data/Convert/Verbatim/verbatim.cpp` — 统一输入端 `"auto"` 映射至 Locale(UTF-8),修复编解码不对称
- `tests/Data/String/converter_test.cpp` — 扩展 `test_verbatim_to_tree_auto` 单元测试,补充多行 CJK 边界覆盖
- `TeXmacs/plugins/julia/julia/MoganJulia.jl` — 升级自动补全通道为 UTF-8 的 `"scheme_u8:"`,消灭乱码
- `TeXmacs/plugins/julia/progs/init-julia.scm` — 将序列化编码由 `"SourceCode"` 变更为 `"utf-8"`
- `TeXmacs/plugins/python/progs/init-python.scm` — 将序列化编码由 `"SourceCode"` 变更为 `"utf-8"`
- `TeXmacs/plugins/goldfish/progs/init-goldfish.scm` — 将序列化编码由 `"SourceCode"` 变更为 `"utf-8"`
- `TeXmacs/plugins/autosave/progs/init-autosave.scm` — 将序列化编码由 `"SourceCode"` 变更为 `"utf-8"`
- `TeXmacs/plugins/quiver/progs/init-quiver.scm` — 将序列化编码由 `"SourceCode"` 变更为 `"utf-8"`
- `TeXmacs/plugins/tikz/progs/init-tikz.scm` — 将序列化编码由 `"SourceCode"` 变更为 `"utf-8"`

## 3 如何测试

### 3.1 确定性测试(单元测试)
```bash
xmake b converter_test && xmake r converter_test
```

### 3.2 非确定性测试(文档验证)
进入插件会话,新建Python、Julia或Goldfish会话,输入中文,不会出现乱码(验证本次协议迁移不会带来字体解析问题)

### 3.3 基准测试(量化优化效率)
```python
import re
import time

# 1. 模拟 1000 个中文字符的文本进行备份或输出
text_utf8 = "南方科技大学(简称:南科大)是深圳在中国高等教育改革发展的时代背景下创建的一所高起点、高定位的公办新型研究型大学。2022年2月14日,教育部等三部委公布第二轮“双一流”建设高校及建设学科名单,南方科技大学及数学学科入选“双一流”建设高校及建设学科名单。学校借鉴世界一流理工科大学的学科设置和办学模式,以理、工、医为主,兼具商科和特色人文社科的学科体系,在本科、硕士、博士层次办学,在一系列新的学科方向上开展研究,使学校成为引领社会发展的思想库和新知识、新技术的源泉。\n南方科技大学扎根中国大地,紧抓粤港澳大湾区、深圳先行示范区“双区”驱动,深圳经济特区、深圳先行示范区“双区”叠加的历史机遇,发扬“敢闯敢试、求真务实、改革创新、追求卓越”的创校精神,突出“创知、创新、创业”的办学特色,践行“明德求是、日新自强”的校训精神,努力服务创新型国家建设及深圳国际化现代化创新型城市建设,快速建设成为聚集一流师资、培养拔尖创新人才、创造国际一流学术成果并推动科技应用的国际化高水平研究型大学,为尽早实现建成世界一流研究型大学的宏伟目标打下坚实基础。\n" * 2
# 转为旧版 SourceCode 格式 (Cork 编码转义)
text_sourcecode = re.sub(r"[^\x00-\x7F]", lambda m: f"<#{hex(ord(m.group(0)))[2:].upper()}>", text_utf8)

print(text_utf8)
print(text_sourcecode)
print("\n")

# 计算内存 / 传输带宽节省
bytes_utf8 = len(text_utf8.encode("utf-8"))
bytes_sourcecode = len(text_sourcecode.encode("ascii"))
saved_bytes = bytes_sourcecode - bytes_utf8
savings_ratio = (saved_bytes / bytes_sourcecode) * 100

print(f"=== 空间 / 传输带宽优化 (以 1000 个中文汉字/标点为例) ===")
print(f"旧版 SourceCode 协议体积: {bytes_sourcecode} 字节")
print(f"新版 UTF-8 协议体积: {bytes_utf8} 字节")
print(f"网络/管道带宽节省: {saved_bytes} 字节")
print(f"传输体积减少率: {savings_ratio:.2f}%\n")

# 2. 模拟插件端(Python)接收 10000 次此数据时的反向解析 CPU 开销
iterations = 10000

# 旧版需要用正则循环翻译 '<#...>'
start_old = time.perf_counter()
pattern = re.compile(r"<#([0-9A-F]+)>")
for _ in range(iterations):
decoded = pattern.sub(lambda m: chr(int(m.group(1), 16)), text_sourcecode)
end_old = time.perf_counter()
time_old = (end_old - start_old) * 1000

# 新版直接接收,0 解码开销
start_new = time.perf_counter()
for _ in range(iterations):
decoded = text_utf8 # 直接读取,无需任何操作
end_new = time.perf_counter()
time_new = (end_new - start_new) * 1000

print(f"=== 插件端 CPU 解析开销优化 (执行 {iterations} 次) ===")
print(f"旧版插件端正则解析耗时: {time_old:.2f} 毫秒")
print(f"新版插件端原生接收耗时: {time_new:.2f} 毫秒 (0 额外解析开销)")
print(f"插件端 CPU 计算耗时降低: {(time_old - time_new):.2f} 毫秒 (性能提升 {time_old/time_new:.1f} 倍)")
```

## 4 What
- 更改插件系统(Python、Julia、Goldfish、Autosave、Quiver、TikZ)的字符编码协议为 **UTF-8**
- 保持 Mogan 编辑器内部“母语”仍为 **Cork** 编码以保证渲染与排版树稳定

## 5 Why
旧版使用 `SourceCode`(Cork转义ASCII)传输数据,内存带宽高且效率低下

## 6 How
1. 修正 `verbatim.cpp` 的 `encode` 映射,统一输入输出端 `"auto"` 行为。在输入端通过 `get_locale_charset ()` 降级为 `"UTF-8"` 匹配 `utf8_to_cork`
2. 更替 6 个插件 init 配置,将序列化编码由 `"SourceCode"` 变更为 `"utf-8"`
3. 将 `MoganJulia.jl` 的自动补全格式从 `"scheme:"` 通道更替为具有 Scheme 树级 UTF-8 还原能力的 `"scheme_u8:"` 接口
4 changes: 2 additions & 2 deletions src/Data/Convert/Verbatim/verbatim.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -295,8 +295,8 @@ un_special (string s) {

static string
encode (string s, string enc) {
if (enc == "auto") return western_to_cork (s);
else if (enc == "utf-8") return utf8_to_cork (s);
if (enc == "auto") enc= get_locale_charset ();
if (enc == "utf-8" || enc == "UTF-8") return utf8_to_cork (s);
else if (enc == "iso-8859-1") return tm_encode (s);
else if (enc == "SourceCode") return sourcecode_to_cork (s);
Comment on lines 297 to 301
else return tm_encode (s);
Expand Down
15 changes: 15 additions & 0 deletions tests/Data/String/converter_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

#include "base.hpp"
#include "converter.hpp"
#include "convert.hpp"
#include "file.hpp"

class TestConverter : public QObject {
Expand All @@ -21,6 +22,7 @@ private slots:
void init () { init_lolly (); };
void test_utf8_to_cork ();
void test_cork_to_utf8 ();
void test_verbatim_to_tree_auto ();
};

void
Expand All @@ -37,5 +39,18 @@ TestConverter::test_cork_to_utf8 () {
qcompare (cork_to_utf8 ("\x11"), "”");
}

void
TestConverter::test_verbatim_to_tree_auto () {
// 单个中文字符
tree t1 = verbatim_to_tree ("中", false, "auto");
qcompare (as_string (t1), "<#4E2D>");

// 多行中文字符以及特殊标点符号
tree t2 = verbatim_to_tree ("你好\n世界!", false, "auto");
qcompare (N (t2), 2);
qcompare (as_string (t2[0]), "<#4F60><#597D>");
qcompare (as_string (t2[1]), "<#4E16><#754C>!");
}
Comment on lines +42 to +53

QTEST_MAIN (TestConverter)
#include "converter_test.moc"
Loading