重构AST抽象rewrite

author: 魏曹先生 <1992414357@qq.com> 2026-04-23 18:58:41 +0800
committer: 魏曹先生 <1992414357@qq.com> 2026-04-23 18:58:41 +0800
commit: 7d9f9be43469748148da5cdf516cd8b32238e1f5 (patch)
tree: e3904be9901294e0193419cb30e8f6fa1d33fae3 /src
parent: 7525fe0834e47bef425135e8cda1d576c44060a5 (diff)
3 files changed, 287 insertions, 57 deletions
diff --git a/src/ast.rs b/src/ast.rs
index 49cfd6f..3f8882d 100644
--- a/src/ast.rs
+++ b/src/ast.rs
@@ -3,93 +3,164 @@ use std::collections::HashMap;
 pub mod parser;
 pub mod renderer;
 
+/// 语法树
 pub struct MarkdownAST {
-    pub root: Layer,
+    /// 块列表
+    pub blocks: Vec<Block>,
 }
 
+/// 层级
 pub type Level = u8;
+
+/// 语言（用于代码块）
 pub type Lang = String;
+
+/// 标题（用于 Headings）
 pub type Title = String;
+
+/// 链接（用于 Link）
 pub type Url = String;
 
-pub struct Layer {
-    pub range_row_begin: u32,
-    pub range_row_end: u32,
-    pub lines: Vec<Line>,
+/// 块
+pub struct Block {
+    /// 块开始行
+    pub begin_row: u32,
+
+    /// 块结束行
+    pub end_row: u32,
+
+    /// 块数据
+    pub data: BlockData,
+}
+
+/// 块数据
+pub enum BlockData {
+    // 头
+    Heading(Level, Line),
+
+    // 列表
+    UnorderedList(Vec<Line>, UnorderedListPrefix),
+    OrderedList(Vec<Line>, u32),
+
+    /// 普通行
+    Lines(Vec<Line>),
+
+    /// 表格
+    Table(Table),
+
+    /// 代码块
+    Code(Fragment, Lang),
+
+    // 引用
+    Blockquotes(Vec<Line>),
+
+    // 分割线
+    HorizontalRule(HorizontalRuleStyle),
 }
 
+/// 下划线风格
+pub enum HorizontalRuleStyle {
+    Stars,
+    Dashes,
+    Underscores,
+}
+
+/// 无序列表前缀
+pub enum UnorderedListPrefix {
+    /// 星号 "*"
+    Star,
+
+    /// 减号 "-"
+    Dash,
+
+    /// 加号 "+"
+    Plus,
+}
+
+/// 行
 pub struct Line {
+    /// 行所在位置
     pub row: u32,
+
+    /// 行内记录的词
     pub tokens: Vec<Token>,
 }
 
+/// 词
 pub struct Token {
+    /// 开始行
     pub begin_row: u32,
+
+    /// 开始列
     pub begin_col: u16,
+
+    /// 结束行
     pub end_row: u32,
+
+    /// 结束列
     pub end_col: u16,
-    pub token: TokenData,
+
+    /// 词数据
+    pub data: TokenData,
 }
 
+/// 词数据
 pub enum TokenData {
+    /// 普通 - 记录片段
     Normal(Fragment),
-    Newline,
-    Newlayer(Layer),
-
-    // Headings
-    Heading(Level, Line, Layer),
-
-    // Emphasis
-    Emphasis(Vec<TokenData>, EmphasisStyle),
-
-    // Lists
-    UnorderedList(Line, UnorderedListPrefix),
-    OrderedList(Line, u32),
-
-    // Links
-    Link(Vec<Fragment>, Url, Option<Title>, LinkType),
-
-    // Code
-    InlineCode(Vec<Fragment>),
-    CodeBlock(Vec<Line>, Option<Lang>),
 
-    // Blockquotes
-    Blockquotes(Layer),
+    // 强调 - 记录词列表（嵌套）和强调样式
+    Emphasis(Vec<Token>, EmphasisFormat),
 
-    // HorizontalRule
-    HorizontalRule(HorizontalRuleType),
+    // 链接 - 记录词列表（嵌套）、链接、标题、链接类型
+    Link(Vec<Token>, Url, Option<Title>, LinkType),
 
-    // Table
-    Table(Table),
+    // 内联代码 - 记录词列表（嵌套）
+    InlineCode(Vec<Token>),
 }
 
+/// 片段
+#[derive(Debug, Default)]
 pub struct Fragment {
+    /// 片段文本
     pub str: String,
 }
 
-pub struct EmphasisStyle {
+/// 强调格式
+pub struct EmphasisFormat {
+    /// 强调样式
+    pub style: EmphasisStyle,
+
+    /// 是否为粗体
     pub bold: bool,
+
+    /// 是否为斜体
     pub italic: bool,
-    pub strikethrough: bool,
 }
 
-pub enum UnorderedListPrefix {
+/// 强调样式
+pub enum EmphasisStyle {
+    /// 星号 "*"
     Star,
-    Dash,
-    Plus,
+
+    /// 下划线 "_"
+    Underline,
 }
 
+/// 链接类型
 pub enum LinkType {
+    /// 是否为图像链接
     Image,
+
+    /// 是否为网页链接
     Url,
-}
 
-pub enum HorizontalRuleType {
-    Stars,
-    Dashes,
-    Underscores,
+    /// 是否为段落链接
+    Section,
 }
 
+/// 表格
 pub struct Table {
-    pub contents: HashMap<(u32, u32), Vec<Fragment>>,
+    /// 位置 映射 词列表
+    pub contents: HashMap<(u32, u32), Vec<Token>>,
 }
diff --git a/src/ast/parser.rs b/src/ast/parser.rs
index 9add926..bfcf271 100644
--- a/src/ast/parser.rs
+++ b/src/ast/parser.rs
@@ -1,25 +1,26 @@
 use std::{any::Any, collections::HashMap, str::Chars};
 
-use crate::ast::{Layer, Line, MarkdownAST, Token};
+use crate::ast::{Fragment, Line, MarkdownAST, Token};
 
+pub mod emphasis;
 pub mod headings;
 
 type ProcessFn = fn(&char, &mut ParserInternalStatus) -> ParserMatchResult;
 
 fn match_fn_list() -> Vec<ProcessFn> {
-    // 要求以 预处理、词、行、层、后处理 的顺序编写列表
-    // 因为 词处理器 将会写入 records_tokens 由 行处理器 消费
-    // 接着 行处理器 将会写入 records_lines 由 层处理器 消费
-    // 最后 层处理器 将所有行写入当前层中
+    // 要求以 预处理、词、行、块、后处理 的顺序编写列表
+    // 然后 词处理器 将会写入 records_tokens 由 行处理器 消费
+    // 接着 行处理器 将会写入 records_lines 由 块处理器 消费
+    // 块处理器则将自身加入块列表中
     vec![
         // 预处理器
         // ...
         // 词处理器
-        // ...
+        emphasis::proc,
         // 行处理器
-        headings::proc,
-        // 层处理器
         // ...
+        // 块处理器
+        headings::proc,
         // 后处理器
         post,
     ]
@@ -76,6 +77,9 @@ pub(crate) struct ParserInternalStatus<'a> {
     /// 记录的 Token，用于暂存无归属的 Token
     records_tokens: Vec<Token>,
 
+    /// 记录的 Fragment，用于暂存无归属的 Fragment
+    records_fragment: Fragment,
+
     /// 临时类型表
     tmp: HashMap<&'a str, Box<dyn Any>>,
 }
@@ -120,17 +124,19 @@ pub(crate) enum ParserMatchResult {
 }
 
 pub fn markdown_parser(
-    content: &str,
+    content: String,
     cfg: MarkdownParserConfig,
 ) -> Result<MarkdownAST, MarkdownASTParseError> {
     // 创建空 AST，无任何内容
-    let ast = MarkdownAST {
-        root: Layer {
-            range_row_begin: 0,
-            range_row_end: 0,
-            lines: Vec::new(),
-        },
+    let ast = MarkdownAST { blocks: vec![] };
+
+    // 在末尾添加换行符，以确保末尾行一定能被执行
+    let mut content = content;
+    let ending = match cfg.ending_rule {
+        LineEndingRule::CRLF => "\r\n",
+        LineEndingRule::LF => "\n",
     };
+    content.push_str(ending);
 
     // 初始化内部状态
     let mut inr = ParserInternalStatus {
@@ -142,6 +148,7 @@ pub fn markdown_parser(
         col: 0,
         records_lines: Vec::new(),
         records_tokens: Vec::new(),
+        records_fragment: Fragment::default(),
         tmp: HashMap::new(),
     };
 
@@ -165,6 +172,9 @@ pub fn markdown_parser(
         // 当前处理函数的索引值
         let mut idx: u8 = 0;
 
+        // 该字符是否已完成处理
+        let mut done = false;
+
         for v in &match_vec {
             // 当前处理函数在放弃列表中
             if aborted.contains(&idx) {
@@ -180,8 +190,13 @@ pub fn markdown_parser(
                     //
                     // 在 `post` 中，必须处理该字符的换行逻辑，否则会产生字符位置指针异常
                     if !matches!(c, '\r' | '\n') {
+                        // 为字符标记为已完成处理
+                        done = true;
+
                         // 跳过当前步骤前，提前将列指针右移
                         inr.col += 1;
+
+                        // 所有处理器跳过当前步骤
                         break;
                     }
                 }
@@ -213,9 +228,16 @@ pub fn markdown_parser(
                     ));
                 }
             }
+
+            // 下一个处理器继续处理
             idx += 1;
         }
 
+        if !done {
+            // 如果字符未完成处理，说明是普通字符，需要加入 records_fragment
+            inr.records_fragment.str.push(c);
+        }
+
         // 将列指针右移
         inr.col += 1;
     }
diff --git a/src/ast/parser/emphasis.rs b/src/ast/parser/emphasis.rs
new file mode 100644
index 0000000..c8e6b9b
--- /dev/null
+++ b/src/ast/parser/emphasis.rs
@@ -0,0 +1,137 @@
+use crate::ast::parser::{ParserInternalStatus, ParserMatchResult};
+
+#[derive(Default)]
+struct EmphasisTmp {
+    /// 强调开始的列
+    emphasis_begin_col: u16,
+
+    /// 前缀，用于后缀匹配
+    prefix_count: String,
+
+    /// 是否正在输入强调前缀
+    typing_emphasis_prefix: bool,
+
+    /// 是否正在输入强调内容
+    typing_emphasis_content: bool,
+}
+
+#[derive(Default, PartialEq, Eq)]
+#[repr(u8)]
+enum Style {
+    /// 无样式
+    #[default]
+    None,
+
+    /// 星号
+    Star,
+
+    /// 下划线
+    Underline,
+}
+
+impl Style {
+    /// 反转样式
+    pub fn invert(&self) -> Style {
+        match self {
+            Style::Underline => Style::Star,
+            Style::Star => Style::Underline,
+            Style::None => Style::None,
+        }
+    }
+}
+
+impl std::fmt::Display for Style {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Style::None => write!(f, ""),
+            Style::Star => write!(f, "*"),
+            Style::Underline => write!(f, "_"),
+        }
+    }
+}
+
+impl From<Style> for char {
+    fn from(style: Style) -> char {
+        match style {
+            Style::None => ' ',
+            Style::Star => '*',
+            Style::Underline => '_',
+        }
+    }
+}
+
+fn get_tmp<'a>(inr: &'a mut ParserInternalStatus) -> &'a mut EmphasisTmp {
+    inr.get_tmp_or_init::<EmphasisTmp>("emphasis_tmp")
+}
+
+pub(crate) fn proc(c: &char, inr: &mut ParserInternalStatus) -> ParserMatchResult {
+    match c {
+        // 输入能被识别的符号时
+
+        // 星星
+        '*' => typed_emphasis_char(Style::Star, inr),
+
+        // 下划线
+        '_' => typed_emphasis_char(Style::Underline, inr),
+
+        // 输入其他字符时
+        _ => typed_other_char(c, inr),
+    }
+}
+
+fn typed_emphasis_char(s: Style, inr: &mut ParserInternalStatus) -> ParserMatchResult {
+    let col = inr.col;
+    let tmp = get_tmp(inr);
+
+    // 如果没设置样式（没初始化）
+    if tmp.prefix_style == Style::None {
+        // 设置样式
+        tmp.prefix_style = s;
+        // 设置前缀长度
+        tmp.prefix_count = 1;
+        // 设置开始位置
+        tmp.emphasis_begin_col = col;
+
+        tmp.typing_emphasis_prefix = true;
+        tmp.typing_emphasis_content = false;
+    } else
+    // 如果设置了样式，则判断其是否匹配
+    if tmp.prefix_style == s {
+        // 如果匹配，增加前缀长度
+        tmp.prefix_count += 1;
+
+        // 增加长度后，如果前缀长度大于 3（最长），将报语法错误
+        if tmp.prefix_count > 3 {
+            return ParserMatchResult::SyntaxError {
+                begin_col: tmp.emphasis_begin_col,
+                begin_row: inr.row,
+                end_col: inr.col,
+                end_row: inr.row,
+                msg: "Emphasis characters can be at most 3".to_string(),
+            };
+        }
+    } else {
+        // 如果不匹配，将报语法错误
+        return ParserMatchResult::SyntaxError {
+            begin_col: inr.col,
+            begin_row: inr.row,
+            end_col: inr.col,
+            end_row: inr.row,
+            msg: format!(
+                "Emphasis statement (style: \"{}\") cannot use another emphasis statement (style: \"{}\") before closing",
+                s.invert(),
+                s
+            ),
+        };
+    }
+
+    ParserMatchResult::Done
+}
+
+fn typed_other_char(c: &char, inr: &mut ParserInternalStatus) -> ParserMatchResult {
+    let tmp = get_tmp(inr);
+
+    // 修改当前状态
+    tmp.typing_emphasis_prefix = false;
+    tmp.typing_emphasis_content = true;
+}
author	魏曹先生 <1992414357@qq.com>	2026-04-23 18:58:41 +0800
committer	魏曹先生 <1992414357@qq.com>	2026-04-23 18:58:41 +0800
commit	7d9f9be43469748148da5cdf516cd8b32238e1f5 (patch)
tree	e3904be9901294e0193419cb30e8f6fa1d33fae3 /src
parent	7525fe0834e47bef425135e8cda1d576c44060a5 (diff)