Move prototype files to _prototype directoryHEAD master

author: 魏曹先生 <1992414357@qq.com> 2026-04-22 20:24:18 +0800
committer: 魏曹先生 <1992414357@qq.com> 2026-04-22 20:24:18 +0800
commit: 277bc93f84b298c7cb24e136f67eb237fb3a68a2 (patch)
tree: 3a678677882e9884a2a17fa828358d1c9fd3dd9c /_prototype/converter/src/parse.rs
parent: 3e3ffa97985ed588ef46e007edb7507189a65e8e (diff)
1 files changed, 1303 insertions, 0 deletions
diff --git a/_prototype/converter/src/parse.rs b/_prototype/converter/src/parse.rs
new file mode 100644
index 0000000..292a761
--- /dev/null
+++ b/_prototype/converter/src/parse.rs
@@ -0,0 +1,1303 @@
+use std::path::PathBuf;
+
+use crate::{
+    error::Exit,
+    syntax_checker::{check_duplicate_marker, check_markdown_syntax},
+};
+
+pub fn parse(input: PathBuf, ir_output: PathBuf) -> Result<(), Exit> {
+    let result = std::fs::read_to_string(&input)?;
+
+    check_markdown_syntax(&result)?;
+
+    let result = unwrap_includes::proc(result, input)?;
+
+    check_duplicate_marker(&result)?;
+
+    let result = markdown_cleanup::proc(result)?;
+    let result = markdown_jump_fix::proc(result)?;
+    let result = markdown_marker_rename::proc(result)?;
+    let result = markdown_struct_build::proc(result)?;
+    let result = markdown_strip_invalid_jump::proc(result)?;
+    let result = markdown_convert_image::proc(result)?;
+    let result = markdown_apply_codes::proc(result)?;
+    let result = markdown_split_and_encode::proc(result)?;
+
+    std::fs::write(&ir_output, result)?;
+    Ok(())
+}
+
+pub mod unwrap_includes {
+    use crate::{error::Exit, utils::path_fmt::format_path};
+    use regex::Regex;
+    use std::collections::HashMap;
+    use std::path::{Path, PathBuf};
+
+    /// Expand text includes of [[markdown]] (searches for markdown.md) and image paths
+    pub fn proc(input: String, self_path: PathBuf) -> Result<String, Exit> {
+        let mut stack = Vec::<PathBuf>::new();
+        let mut cache = HashMap::<String, PathBuf>::new();
+        let mut img_cache = HashMap::<String, PathBuf>::new();
+        let root_path = self_path.clone();
+        expand_recursive(
+            input,
+            &self_path,
+            &root_path,
+            &mut stack,
+            &mut cache,
+            &mut img_cache,
+        )
+    }
+
+    fn expand_recursive(
+        content: String,
+        current_path: &Path,
+        root_path: &Path,
+        stack: &mut Vec<PathBuf>,
+        cache: &mut HashMap<String, PathBuf>,
+        img_cache: &mut HashMap<String, PathBuf>,
+    ) -> Result<String, Exit> {
+        let mut output = String::new();
+        let mut in_code_block = false;
+        let obsidian_image_re = Regex::new(r"!\[\[([^\]]+)\]\]").unwrap();
+
+        let current_norm = format_path(current_path)?;
+
+        if stack.contains(&current_norm) {
+            return Err(Exit::CycleDependency(current_norm));
+        }
+
+        stack.push(current_norm.clone());
+
+        for line in content.lines() {
+            if line.trim().starts_with("```") {
+                in_code_block = !in_code_block;
+                output.push_str(line);
+                output.push('\n');
+                continue;
+            }
+
+            if in_code_block {
+                output.push_str(line);
+                output.push('\n');
+                continue;
+            }
+
+            if let Some(include_name) = extract_include(line) {
+                let include_abs = if let Some(cached_path) = cache.get(include_name) {
+                    cached_path.clone()
+                } else {
+                    let base_dir = current_path.parent().unwrap();
+                    let mut queue = vec![base_dir.to_path_buf()];
+                    let mut visited = std::collections::HashSet::new();
+                    let mut found_path = None;
+
+                    while let Some(dir) = queue.pop() {
+                        if visited.contains(&dir) {
+                            continue;
+                        }
+                        visited.insert(dir.clone());
+
+                        let candidate1 = dir.join(include_name);
+                        if candidate1.exists()
+                            && candidate1.extension().map_or(false, |ext| ext == "md")
+                        {
+                            found_path = Some(format_path(&candidate1)?);
+                            break;
+                        }
+
+                        // Try add .md extension
+                        let candidate2 = dir.join(format!("{}.md", include_name));
+                        if candidate2.exists() {
+                            found_path = Some(format_path(&candidate2)?);
+                            break;
+                        }
+
+                        if let Some(parent) = dir.parent() {
+                            queue.push(parent.to_path_buf());
+                        }
+
+                        if let Ok(entries) = std::fs::read_dir(&dir) {
+                            for entry in entries.flatten() {
+                                if let Ok(file_type) = entry.file_type() {
+                                    if file_type.is_dir() {
+                                        queue.push(entry.path());
+                                    }
+                                }
+                            }
+                        }
+                    }
+
+                    match found_path {
+                        Some(path) => {
+                            cache.insert(include_name.to_string(), path.clone());
+                            path
+                        }
+                        None => {
+                            return Err(Exit::FileNotFound(
+                                format!(
+                                    "{} or {}.md (searched from {:?})",
+                                    include_name, include_name, base_dir
+                                )
+                                .into(),
+                            ));
+                        }
+                    }
+                };
+
+                let include_content = std::fs::read_to_string(&include_abs).map_err(|e| {
+                    if e.kind() == std::io::ErrorKind::NotFound {
+                        Exit::FileNotFound(include_abs.clone())
+                    } else {
+                        Exit::IoError(e)
+                    }
+                })?;
+
+                let expanded = expand_recursive(
+                    include_content,
+                    &include_abs,
+                    root_path,
+                    stack,
+                    cache,
+                    img_cache,
+                )?;
+                output.push_str(&expanded);
+            } else {
+                // Process image links
+                let mut processed_line = line.to_string();
+
+                // First process Obsidian image syntax ![[...]]
+                let mut obsidian_matches: Vec<(usize, usize, String)> = Vec::new();
+
+                for caps in obsidian_image_re.captures_iter(line) {
+                    let full_match = caps.get(0).unwrap();
+                    let image_ref = caps.get(1).unwrap().as_str();
+
+                    // Try to get from img_cache first
+                    let image_abs = if let Some(cached_path) = img_cache.get(image_ref) {
+                        cached_path.clone()
+                    } else {
+                        // Start search from root directory for Obsidian syntax
+                        let root_dir = root_path.parent().unwrap();
+                        let mut queue = vec![root_dir.to_path_buf()];
+                        let mut visited = std::collections::HashSet::new();
+                        let mut found_image_path = None;
+
+                        while let Some(dir) = queue.pop() {
+                            if visited.contains(&dir) {
+                                continue;
+                            }
+                            visited.insert(dir.clone());
+
+                            // Check if file exists in this directory
+                            let candidate = dir.join(image_ref);
+                            if candidate.exists() {
+                                found_image_path = Some(format_path(&candidate)?);
+                                break;
+                            }
+
+                            // Add parent directory to queue (breadth-first upward)
+                            if let Some(parent) = dir.parent() {
+                                queue.push(parent.to_path_buf());
+                            }
+
+                            // Add subdirectories to queue (breadth-first downward)
+                            if let Ok(entries) = std::fs::read_dir(&dir) {
+                                for entry in entries.flatten() {
+                                    if let Ok(file_type) = entry.file_type() {
+                                        if file_type.is_dir() {
+                                            queue.push(entry.path());
+                                        }
+                                    }
+                                }
+                            }
+                        }
+
+                        match found_image_path {
+                            Some(path) => {
+                                img_cache.insert(image_ref.to_string(), path.clone());
+                                path
+                            }
+                            None => {
+                                // If image not found, keep the original syntax
+                                continue;
+                            }
+                        }
+                    };
+
+                    // Store the replacement information
+                    let start = full_match.start();
+                    let end = full_match.end();
+                    let new_image_markdown = format!("![]({})", image_abs.display());
+                    obsidian_matches.push((start, end, new_image_markdown));
+                }
+
+                // Apply Obsidian replacements in reverse order to maintain correct indices
+                for (start, end, replacement) in obsidian_matches.into_iter().rev() {
+                    processed_line.replace_range(start..end, &replacement);
+                }
+
+                output.push_str(&processed_line);
+                output.push('\n');
+            }
+        }
+
+        stack.pop();
+
+        Ok(output)
+    }
+
+    fn extract_include(line: &str) -> Option<&str> {
+        line.trim()
+            .strip_prefix("[[")
+            .and_then(|s| s.strip_suffix("]]"))
+    }
+}
+
+pub mod markdown_cleanup {
+    use crate::error::Exit;
+
+    /// Clean Markdown
+    /// 1. Remove blockquotes
+    /// 2. Remove empty lines
+    /// 3. Trim each line
+    pub fn proc(i: String) -> Result<String, Exit> {
+        let lines = i.lines();
+        let mut cleaned = Vec::new();
+
+        for line in lines {
+            if line.starts_with('>') {
+                continue;
+            }
+            let trimmed = line.trim();
+            if trimmed.is_empty() {
+                continue;
+            }
+            cleaned.push(trimmed.to_string());
+        }
+
+        Ok(cleaned.join("\n"))
+    }
+
+    #[cfg(test)]
+    mod test_clean_markdown {
+        use super::*;
+
+        #[test]
+        fn test_clean_markdown_removes_blockquotes() {
+            let input = "> This is a blockquote\nNormal text\n> Another blockquote".to_string();
+            let expected = "Normal text".to_string();
+            let Ok(result) = proc(input) else {
+                panic!("Parse error!");
+            };
+            assert_eq!(result, expected);
+        }
+
+        #[test]
+        fn test_clean_markdown_removes_empty_lines() {
+            let input = "Line 1\n\n\nLine 2\n\n".to_string();
+            let expected = "Line 1\nLine 2".to_string();
+            let Ok(result) = proc(input) else {
+                panic!("Parse error!");
+            };
+            assert_eq!(result, expected);
+        }
+
+        #[test]
+        fn test_clean_markdown_trims_lines() {
+            let input = "  Line 1  \n\tLine 2\t\n".to_string();
+            let expected = "Line 1\nLine 2".to_string();
+            let Ok(result) = proc(input) else {
+                panic!("Parse error!");
+            };
+            assert_eq!(result, expected);
+        }
+
+        #[test]
+        fn test_clean_markdown_combined() {
+            let input = "> Blockquote\n\n  Line 1  \n> Another\n\nLine 2\n\n".to_string();
+            let expected = "Line 1\nLine 2".to_string();
+            let Ok(result) = proc(input) else {
+                panic!("Parse error!");
+            };
+            assert_eq!(result, expected);
+        }
+
+        #[test]
+        fn test_clean_markdown_empty_input() {
+            let input = "".to_string();
+            let expected = "".to_string();
+            let Ok(result) = proc(input) else {
+                panic!("Parse error!");
+            };
+            assert_eq!(result, expected);
+        }
+
+        #[test]
+        fn test_clean_markdown_only_blockquotes() {
+            let input = "> Quote 1\n> Quote 2".to_string();
+            let expected = "".to_string();
+            let Ok(result) = proc(input) else {
+                panic!("Parse error!");
+            };
+            assert_eq!(result, expected);
+        }
+
+        #[test]
+        fn test_clean_markdown_only_whitespace() {
+            let input = "   \n\t\n  ".to_string();
+            let expected = "".to_string();
+            let Ok(result) = proc(input) else {
+                panic!("Parse error!");
+            };
+            assert_eq!(result, expected);
+        }
+    }
+}
+
+pub mod markdown_jump_fix {
+    use crate::error::Exit;
+
+    /// Fix jump syntax in each line
+    /// 1. Correct the following syntax
+    /// ```ignore
+    /// - It's [Item](#Mark)
+    /// > corrected to
+    /// - It's Item [](#Mark)
+    /// ```
+    ///
+    /// 2. If there are multiple options, take the first one
+    /// ```ignore
+    /// - There might be two options: [A](#A) and [B](#B)!
+    /// > corrected to
+    /// - There might be two options: A and B! [](#A)
+    /// ```
+    pub fn proc(i: String) -> Result<String, Exit> {
+        let mut result = String::new();
+
+        for line in i.lines() {
+            let (processed_content, first_link_dest) = process_line_content(line);
+            let processed_line = format_line_with_link(processed_content, first_link_dest);
+            let final_line = convert_ordered_list_marker(processed_line);
+
+            result.push_str(&final_line);
+            result.push('\n');
+        }
+
+        if result.ends_with('\n') {
+            result.pop();
+        }
+
+        Ok(result)
+    }
+
+    pub fn process_line_content(line: &str) -> (String, Option<String>) {
+        // Check if line is an image line (starts with "![")
+        if line.starts_with("![") {
+            // Return the original line unchanged with no link destination
+            return (line.to_string(), None);
+        }
+
+        let mut processed = String::new();
+        let mut chars = line.chars().peekable();
+        let mut first_link_dest = None;
+        let mut has_link = false;
+
+        while let Some(ch) = chars.next() {
+            if ch == '[' {
+                if let Some((link_text, link_dest, remaining_chars)) = helper_parse_link(&mut chars)
+                {
+                    processed.push_str(&link_text);
+                    if !has_link {
+                        first_link_dest = Some(link_dest);
+                        has_link = true;
+                    }
+                    chars = remaining_chars;
+                    continue;
+                } else {
+                    // Invalid
+                    processed.push(ch);
+                }
+            } else {
+                processed.push(ch);
+            }
+        }
+
+        (processed, first_link_dest)
+    }
+
+    pub fn helper_parse_link<'a>(
+        chars: &mut std::iter::Peekable<std::str::Chars<'a>>,
+    ) -> Option<(String, String, std::iter::Peekable<std::str::Chars<'a>>)> {
+        let mut link_text = String::new();
+
+        while let Some(&ch) = chars.peek() {
+            chars.next();
+            if ch == ']' {
+                break;
+            }
+            link_text.push(ch);
+        }
+
+        if chars.next() != Some('(') || chars.next() != Some('#') {
+            return None;
+        }
+
+        let mut link_dest = String::new();
+        while let Some(ch) = chars.next() {
+            if ch == ')' {
+                break;
+            }
+            link_dest.push(ch);
+        }
+
+        let cleaned_dest = link_dest.trim().replace(' ', "").replace('#', "");
+
+        Some((link_text, cleaned_dest, chars.clone()))
+    }
+
+    pub fn format_line_with_link(content: String, link_dest: Option<String>) -> String {
+        match link_dest {
+            Some(dest) if !dest.trim().is_empty() => {
+                format!("{} [](#{})", content.trim_end(), dest.trim())
+                    .trim()
+                    .to_string()
+            }
+            _ => content,
+        }
+    }
+
+    pub fn convert_ordered_list_marker(line: String) -> String {
+        let trimmed = line.trim_start();
+
+        if let Some(_rest) = trimmed.strip_prefix(|c: char| c.is_ascii_digit()) {
+            let mut chars = trimmed.chars();
+            let mut digit_count = 0;
+
+            while let Some(c) = chars.next() {
+                if c.is_ascii_digit() {
+                    digit_count += 1;
+                } else {
+                    break;
+                }
+            }
+
+            if digit_count > 0 {
+                let rest_after_digits = &trimmed[digit_count..];
+                if let Some(content) = rest_after_digits.strip_prefix(". ") {
+                    return format!("- {}", content);
+                }
+            }
+        }
+
+        line
+    }
+
+    #[cfg(test)]
+    mod test_fix_mark_jump {
+        use super::*;
+
+        #[test]
+        fn test_fix_mark_jump_single_link() {
+            let input = "- It's [Item](#Mark)".to_string();
+            let expected = "- It's Item [](#Mark)".to_string();
+            let Ok(result) = proc(input) else {
+                panic!("Parse error!");
+            };
+            assert_eq!(result, expected);
+        }
+
+        #[test]
+        fn test_fix_mark_jump_multiple_links_takes_first() {
+            let input = "- There might be two options: [A](#A) and [B](#B)!".to_string();
+            let expected = "- There might be two options: A and B! [](#A)".to_string();
+            let Ok(result) = proc(input) else {
+                panic!("Parse error!");
+            };
+            assert_eq!(result, expected);
+        }
+
+        #[test]
+        fn test_fix_mark_jump_no_link() {
+            let input = "- Just a normal line".to_string();
+            let expected = "- Just a normal line".to_string();
+            let Ok(result) = proc(input) else {
+                panic!("Parse error!");
+            };
+            assert_eq!(result, expected);
+        }
+
+        #[test]
+        fn test_fix_mark_jump_empty_line() {
+            let input = "".to_string();
+            let expected = "".to_string();
+            let Ok(result) = proc(input) else {
+                panic!("Parse error!");
+            };
+            assert_eq!(result, expected);
+        }
+
+        #[test]
+        fn test_fix_mark_jump_multiple_lines() {
+            let input = "- First [Item](#First)\n- Second [Item](#Second)".to_string();
+            let expected = "- First Item [](#First)\n- Second Item [](#Second)".to_string();
+            let Ok(result) = proc(input) else {
+                panic!("Parse error!");
+            };
+            assert_eq!(result, expected);
+        }
+
+        #[test]
+        fn test_fix_mark_jump_link_at_end() {
+            let input = "- End with [link](#target)".to_string();
+            let expected = "- End with link [](#target)".to_string();
+            let Ok(result) = proc(input) else {
+                panic!("Parse error!");
+            };
+            assert_eq!(result, expected);
+        }
+
+        #[test]
+        fn test_fix_mark_jump_link_at_beginning() {
+            let input = "- [Start](#target) with link".to_string();
+            let expected = "- Start with link [](#target)".to_string();
+            let Ok(result) = proc(input) else {
+                panic!("Parse error!");
+            };
+            assert_eq!(result, expected);
+        }
+
+        #[test]
+        fn test_fix_mark_jump_link_in_middle() {
+            let input = "- Text [middle](#target) text".to_string();
+            let expected = "- Text middle text [](#target)".to_string();
+            let Ok(result) = proc(input) else {
+                panic!("Parse error!");
+            };
+            assert_eq!(result, expected);
+        }
+
+        #[test]
+        fn test_fix_mark_jump_ordered_list_conversion() {
+            let input = "1. [Item](#target)".to_string();
+            let expected = "- Item [](#target)".to_string();
+            let Ok(result) = proc(input) else {
+                panic!("Parse error!");
+            };
+            assert_eq!(result, expected);
+        }
+
+        #[test]
+        fn test_fix_mark_jump_ordered_list_multiple_digits() {
+            let input = "10. [Tenth](#target) item".to_string();
+            let expected = "- Tenth item [](#target)".to_string();
+            let Ok(result) = proc(input) else {
+                panic!("Parse error!");
+            };
+            assert_eq!(result, expected);
+        }
+
+        #[test]
+        fn test_fix_mark_jump_mixed_ordered_and_unordered() {
+            let input = "1. [First](#first)\n- [Second](#second)\n2. [Third](#third)".to_string();
+            let expected =
+                "- First [](#first)\n- Second [](#second)\n- Third [](#third)".to_string();
+            let Ok(result) = proc(input) else {
+                panic!("Parse error!");
+            };
+            assert_eq!(result, expected);
+        }
+
+        #[test]
+        fn test_fix_mark_jump_invalid_link_format() {
+            let input = "- Invalid [link format".to_string();
+            let expected = "- Invalid [".to_string();
+            let Ok(result) = proc(input) else {
+                panic!("Parse error!");
+            };
+            assert_eq!(result, expected);
+        }
+
+        #[test]
+        fn test_fix_mark_jump_link_with_spaces_in_target() {
+            let input = "- Link [text](#  target#)".to_string();
+            let expected = "- Link text [](#target)".to_string();
+            let Ok(result) = proc(input) else {
+                panic!("Parse error!");
+            };
+            assert_eq!(result, expected);
+        }
+
+        #[test]
+        fn test_fix_mark_jump_empty_link_text() {
+            let input = "- [](#target)".to_string();
+            let expected = "- [](#target)".to_string();
+            let Ok(result) = proc(input) else {
+                panic!("Parse error!");
+            };
+            assert_eq!(result, expected);
+        }
+
+        #[test]
+        fn test_fix_mark_jump_only_whitespace() {
+            let input = "   ".to_string();
+            let expected = "   ".to_string();
+            let Ok(result) = proc(input) else {
+                panic!("Parse error!");
+            };
+            assert_eq!(result, expected);
+        }
+
+        #[test]
+        fn test_fix_mark_jump_complex_multiple_links() {
+            let input = "- Choose [A](#A), [B](#B), or [C](#C)!".to_string();
+            let expected = "- Choose A, B, or C! [](#A)".to_string();
+            let Ok(result) = proc(input) else {
+                panic!("Parse error!");
+            };
+            assert_eq!(result, expected);
+        }
+    }
+}
+
+pub mod markdown_marker_rename {
+    use regex::Regex;
+    use sha2::{Digest, Sha256};
+
+    use crate::error::Exit;
+
+    /// Replace marker names: replace heading text and link anchors with corresponding SHA256
+    ///
+    /// Example:
+    /// ```ignore
+    /// # Original text
+    /// # Chapter Title
+    /// - Jump to [Chapter Title](#Chapter Title)
+    ///
+    /// # After processing
+    /// # a1b2c3d4
+    /// - Jump to [](#a1b2c3d4)
+    /// ```
+    pub fn proc(i: String) -> Result<String, Exit> {
+        let mut result = i;
+
+        let heading_re = Regex::new(r"^(#{1,5})\s+(.+)$").unwrap();
+        let mut heading_map = std::collections::HashMap::new();
+
+        for line in result.lines() {
+            if let Some(caps) = heading_re.captures(line) {
+                let heading_text = caps[2].trim().to_string();
+                let hash = format!("{:x}", Sha256::digest(heading_text.as_bytes()));
+                let short_hash = &hash[..8];
+                heading_map.insert(heading_text, short_hash.to_string());
+            }
+        }
+
+        let mut lines: Vec<String> = Vec::new();
+        for line in result.lines() {
+            if let Some(caps) = heading_re.captures(line) {
+                let level = &caps[1];
+                let heading_text = caps[2].trim();
+
+                if let Some(hash) = heading_map.get(heading_text) {
+                    lines.push(format!("{} {}", level, hash));
+                } else {
+                    lines.push(line.to_string());
+                }
+            } else {
+                lines.push(line.to_string());
+            }
+        }
+        result = lines.join("\n");
+
+        let link_re = Regex::new(r"\[\]\(#([^)]+)\)").unwrap();
+        result = link_re
+            .replace_all(&result, |caps: &regex::Captures| {
+                let anchor_name = &caps[1];
+                if let Some(hash) = heading_map.get(anchor_name) {
+                    format!("[](#{})", hash)
+                } else {
+                    let hash = format!("{:x}", Sha256::digest(anchor_name.as_bytes()));
+                    let short_hash = &hash[..8];
+                    format!("[](#{})", short_hash)
+                }
+            })
+            .to_string();
+
+        Ok(result)
+    }
+}
+
+pub mod markdown_struct_build {
+    use regex::Regex;
+
+    use crate::error::Exit;
+
+    /// Split content into Step + Sentence structure
+    pub fn proc(input: String) -> Result<String, Exit> {
+        let mut result = String::new();
+        let mut current_marker = String::new();
+        let mut current_step_id = 0;
+        let mut current_character = String::new();
+        let mut has_no_switch_flag = false;
+
+        let mut code_record_mode = false;
+        let mut option_record_mode = false;
+
+        let mut sentences_buffer = String::new();
+        for line in input.split("\n") {
+            // Record code
+            if code_record_mode {
+                // If code block marker is found again, end code recording
+                if line.starts_with("```") && code_record_mode {
+                    sentences_buffer.push_str("\n");
+                    code_record_mode = false;
+                    continue;
+                }
+                sentences_buffer.push_str(format!("`{}`", line).as_str());
+                continue;
+            }
+
+            // Record options
+            if option_record_mode {
+                // Still an option, continue appending
+                if line.starts_with("- ") {
+                    let (sentence, next) = get_jump_from_line(line);
+                    let next = if let Some(next) = next {
+                        format!("->[#{}_0]", next)
+                    } else {
+                        next_flag(current_marker.as_str(), current_step_id)
+                    };
+                    let option_line = format!(
+                        "{}[{}]{}",
+                        character(&current_character, has_no_switch_flag),
+                        sentence,
+                        next
+                    );
+                    sentences_buffer.push_str(option_line.as_str());
+                    sentences_buffer.push('\n');
+                    continue;
+                } else {
+                    // When ending option recording, create and advance one Step
+                    result.push_str(step_line(current_marker.as_str(), current_step_id).as_str());
+                    result.push('\n');
+                    result.push_str(sentences_buffer.as_str());
+                    sentences_buffer.clear();
+                    current_step_id += 1;
+                    // Clean "Has no switch flag"
+                    has_no_switch_flag = false;
+                    // Close option mode
+                    option_record_mode = false;
+                    // Do not continue here, proceed to process subsequent content
+                }
+            }
+
+            // Refresh heading
+            if is_marker(line) {
+                current_marker = read_maker(line).to_string();
+                current_step_id = 0;
+                continue;
+            }
+
+            // Refresh character
+            if is_character(line) {
+                let (character, no_switch_flag) = read_character(line);
+                current_character = character.to_string();
+                has_no_switch_flag = no_switch_flag;
+                continue;
+            }
+
+            // Image recording
+            if line.starts_with('!') {
+                sentences_buffer.push_str(line);
+                sentences_buffer.push('\n');
+                continue;
+            }
+
+            // Start code recording
+            if line.starts_with("```") && !code_record_mode {
+                code_record_mode = true;
+                continue;
+            }
+
+            // Option recording
+            if line.starts_with("- ") {
+                let (sentence, next) = get_jump_from_line(line);
+                let next = if let Some(next) = next {
+                    format!("->[#{}_0]", next)
+                } else {
+                    next_flag(current_marker.as_str(), current_step_id)
+                };
+                let option_line = format!(
+                    "{}[{}]{}",
+                    character(&current_character, has_no_switch_flag),
+                    sentence,
+                    next
+                );
+                sentences_buffer.push_str(option_line.as_str());
+                sentences_buffer.push('\n');
+
+                // Start option recording mode
+                if !option_record_mode {
+                    option_record_mode = true;
+                }
+                continue;
+            }
+
+            // Normal sentence
+            let (sentence, next) = get_jump_from_line(line);
+            let next = if let Some(next) = next {
+                format!("->[#{}_0]", next)
+            } else {
+                next_flag(current_marker.as_str(), current_step_id)
+            };
+            let sentence_line = format!(
+                "{}[{}]{}",
+                character(&current_character, has_no_switch_flag),
+                sentence,
+                next
+            );
+            has_no_switch_flag = false;
+
+            // Create and advance one Step
+            result.push_str(step_line(current_marker.as_str(), current_step_id).as_str());
+            result.push('\n');
+            result.push_str(sentences_buffer.as_str());
+            sentences_buffer.clear();
+            result.push_str(sentence_line.as_str());
+            result.push('\n');
+            current_step_id += 1;
+        }
+
+        Ok(result)
+    }
+
+    pub fn character(character: &str, has_no_switch_flag: bool) -> String {
+        let flag = if has_no_switch_flag { "*" } else { "" };
+        format!("[{}{}{}]:", &flag, character, &flag)
+    }
+
+    pub fn step_name(marker: &str, current_id: i64) -> String {
+        format!("{}_{}", marker, current_id)
+    }
+
+    pub fn step_line(marker: &str, current_id: i64) -> String {
+        format!("@@@@@@@@@@ {}_{}", marker, current_id)
+    }
+
+    pub fn next_flag(marker: &str, current_id: i64) -> String {
+        format!("->[#{}_{}]", marker, current_id + 1)
+    }
+
+    pub fn is_marker(line: &str) -> bool {
+        line.starts_with("# ")
+            || line.starts_with("## ")
+            || line.starts_with("### ")
+            || line.starts_with("#### ")
+            || line.starts_with("##### ")
+    }
+
+    pub fn read_maker(line: &str) -> &str {
+        let trimmed = line.trim_start();
+        if trimmed.starts_with('#') {
+            if trimmed.starts_with("# ")
+                || trimmed.starts_with("## ")
+                || trimmed.starts_with("### ")
+                || trimmed.starts_with("#### ")
+                || trimmed.starts_with("##### ")
+            {
+                let parts: Vec<&str> = trimmed.splitn(2, ' ').collect();
+                if parts.len() == 2 {
+                    return parts[1].trim();
+                }
+            }
+        }
+        ""
+    }
+
+    pub fn is_character(line: &str) -> bool {
+        line.starts_with("######")
+    }
+
+    pub fn read_character(line: &str) -> (&str, bool) {
+        let trimmed = line.trim_start();
+        if trimmed.starts_with("######") {
+            let parts: Vec<&str> = trimmed.splitn(2, ' ').collect();
+            if parts.len() == 2 {
+                let character = parts[1].trim();
+                if character.starts_with('*') && character.ends_with('*') {
+                    let trimmed = character.trim_matches('*');
+                    return (trimmed.trim(), true);
+                } else {
+                    return (character.trim(), false);
+                }
+            }
+        }
+        ("", false)
+    }
+
+    pub fn get_jump_from_line(line: &str) -> (String, Option<String>) {
+        let pattern = r"\[\]\(#([^)]+)\)$";
+        let re = Regex::new(pattern).unwrap();
+
+        if let Some(caps) = re.captures(line.trim_end()) {
+            let target = caps.get(1).unwrap().as_str();
+            let line_without_jump = line
+                .trim_end()
+                .replace(&format!(" [](#{})", target), "")
+                .to_string();
+            return (
+                line_without_jump.trim_start_matches("- ").to_string(),
+                Some(format!("{}", target)),
+            );
+        }
+
+        (line.trim_start_matches("- ").to_string(), None)
+    }
+}
+
+pub mod markdown_strip_invalid_jump {
+    use crate::error::Exit;
+    use regex::Regex;
+
+    /// Strip all jumps that have not appeared
+    pub fn proc(input: String) -> Result<String, Exit> {
+        let lines: Vec<&str> = input.lines().collect();
+        let mut valid_ids = std::collections::HashSet::new();
+
+        for line in &lines {
+            if line.starts_with("@@@@@@@@@@ ") {
+                let id = line.trim_start_matches("@@@@@@@@@@ ").trim();
+                valid_ids.insert(id.to_string());
+            }
+        }
+
+        let mut result_lines = Vec::new();
+        let link_re = Regex::new(r"\[#([^)]+)\]").unwrap();
+
+        for line in lines {
+            let processed_line = link_re.replace_all(line, |caps: &regex::Captures| {
+                let id = &caps[1];
+                if valid_ids.contains(id) {
+                    format!("[#{}]", id)
+                } else {
+                    "[]".to_string()
+                }
+            });
+            result_lines.push(processed_line.to_string());
+        }
+
+        Ok(result_lines.join("\n"))
+    }
+}
+
+pub mod markdown_convert_image {
+    use regex::Regex;
+
+    use crate::error::Exit;
+
+    /// Convert image lines to code lines
+    pub fn proc(input: String) -> Result<String, Exit> {
+        let mut result = String::new();
+        let lines: Vec<&str> = input.lines().collect();
+        let image_re = Regex::new(r"^!\[[^\]]*\]\(([^)]+)\)$").unwrap();
+
+        for line in lines {
+            if let Some(caps) = image_re.captures(line) {
+                let image_path = caps.get(1).unwrap().as_str();
+                result.push_str(&format!("`image \"{}\"`\n", image_path));
+            } else {
+                result.push_str(line);
+                result.push('\n');
+            }
+        }
+
+        // Remove trailing newline if present
+        if result.ends_with('\n') {
+            result.pop();
+        }
+
+        Ok(result)
+    }
+}
+
+pub mod markdown_apply_codes {
+    use crate::error::Exit;
+
+    /// Apply code lines to sentences
+    pub fn proc(input: String) -> Result<String, Exit> {
+        let mut out = String::new();
+        let lines: Vec<&str> = input.lines().collect();
+
+        let mut i = 0;
+        while i < lines.len() {
+            let line = lines[i];
+
+            if !line.trim_start().starts_with('`') {
+                out.push_str(line);
+                out.push('\n');
+                i += 1;
+                continue;
+            }
+
+            let mut code_buf = String::new();
+            while i < lines.len() && {
+                let line: &str = lines[i];
+                line.trim_start().starts_with('`')
+            } {
+                code_buf.push_str(lines[i].trim());
+                i += 1;
+            }
+
+            if i >= lines.len()
+                || !{
+                    let line: &str = lines[i];
+                    line.trim_start().starts_with('[')
+                }
+            {
+                continue;
+            }
+
+            if i + 1 < lines.len() && {
+                let line: &str = lines[i + 1];
+                line.trim_start().starts_with('[')
+            } {
+                continue;
+            }
+
+            let merged = merge_code_into_sentence(&code_buf, lines[i]);
+            out.push_str(&merged);
+            out.push('\n');
+            i += 1;
+        }
+
+        Ok(out)
+    }
+
+    fn merge_code_into_sentence(code: &str, sentence: &str) -> String {
+        if let Some(start) = sentence.find(":[") {
+            if let Some(_) = sentence[start + 2..].find(']') {
+                let content_start = start + 2;
+
+                let mut result = String::new();
+                result.push_str(&sentence[..content_start]);
+                result.push_str(code);
+                result.push_str(&sentence[content_start..]);
+                return result;
+            }
+        }
+
+        sentence.to_string()
+    }
+}
+
+pub mod markdown_split_and_encode {
+    use crate::error::Exit;
+
+    /// Split sentences into embeddable tokens and perform Unicode encoding
+    pub fn proc(input: String) -> Result<String, Exit> {
+        let mut result = String::new();
+        let lines: Vec<&str> = input.lines().collect();
+
+        for line in lines {
+            if line.starts_with('[') && line.contains("]:[") && line.contains("]->[") {
+                if let Some(start) = line.find("]:[") {
+                    if let Some(end) = line.find("]->[") {
+                        let content = &line[start + 3..end];
+                        let processed_content = process_sentence_content(content);
+
+                        let suffix = &line[end + 1..];
+
+                        let char_end = start;
+                        let char_start = 1;
+                        let character = &line[char_start..char_end];
+                        let encoded_character = encode_unicode(character);
+
+                        // Build the new line with encoded character and processed content
+                        let new_line =
+                            format!("[{}]:{}{}", encoded_character, processed_content, suffix);
+                        result.push_str(&format!("{}\n", new_line));
+                        continue;
+                    }
+                }
+            }
+            result.push_str(&format!("{}\n", line));
+        }
+
+        if result.ends_with('\n') {
+            result.pop();
+        }
+
+        Ok(result)
+    }
+
+    fn process_sentence_content(content: &str) -> String {
+        let mut result = String::new();
+        let mut chars = content.chars().peekable();
+        let mut current_text = String::new();
+        let mut in_code = false;
+        let mut in_bold = false;
+        let mut in_italic = false;
+        let mut code_buffer = String::new();
+        let mut backticks_count = 0;
+
+        while let Some(ch) = chars.next() {
+            match ch {
+                '`' => {
+                    backticks_count += 1;
+                    if backticks_count == 1 {
+                        // Start of code block
+                        if !current_text.is_empty() {
+                            let encoded_text = encode_unicode(&current_text);
+                            result.push_str(&format!("[text:[{}]]", encoded_text));
+                            current_text.clear();
+                        }
+                        code_buffer.push(ch);
+                        in_code = true;
+                    } else if backticks_count == 2 && in_code {
+                        // End of code block
+                        code_buffer.push(ch);
+                        let encoded_code = encode_unicode(&code_buffer);
+                        result.push_str(&format!("[code:[{}]]", encoded_code));
+                        code_buffer.clear();
+                        backticks_count = 0;
+                        in_code = false;
+                    } else if backticks_count == 1 && !in_code {
+                        // Single backtick in text
+                        current_text.push(ch);
+                    }
+                }
+                '*' => {
+                    if in_code {
+                        code_buffer.push(ch);
+                        continue;
+                    }
+
+                    // Check for bold
+                    if chars.peek() == Some(&'*') {
+                        chars.next(); // Consume the second '*'
+
+                        // Check for bold_italic (***)
+                        if chars.peek() == Some(&'*') {
+                            chars.next(); // Consume the third '*'
+
+                            if in_bold && in_italic {
+                                // End bold_italic
+                                if !current_text.is_empty() {
+                                    let encoded_text = encode_unicode(&current_text);
+                                    result.push_str(&format!("[bold_italic:[{}]]", encoded_text));
+                                    current_text.clear();
+                                }
+                                in_bold = false;
+                                in_italic = false;
+                            } else {
+                                // Start bold_italic
+                                if !current_text.is_empty() {
+                                    let encoded_text = encode_unicode(&current_text);
+                                    result.push_str(&format!("[text:[{}]]", encoded_text));
+                                    current_text.clear();
+                                }
+                                in_bold = true;
+                                in_italic = true;
+                            }
+                        } else {
+                            // Handle ** (bold)
+                            if in_bold && !in_italic {
+                                // End bold
+                                if !current_text.is_empty() {
+                                    let encoded_text = encode_unicode(&current_text);
+                                    result.push_str(&format!("[bold:[{}]]", encoded_text));
+                                    current_text.clear();
+                                }
+                                in_bold = false;
+                            } else if in_italic {
+                                // Currently in italic, encountering ** means we need to end italic and start bold_italic
+                                if !current_text.is_empty() {
+                                    let encoded_text = encode_unicode(&current_text);
+                                    result.push_str(&format!("[italic:[{}]]", encoded_text));
+                                    current_text.clear();
+                                }
+                                in_italic = false;
+                                in_bold = true;
+                            } else {
+                                // Start bold
+                                if !current_text.is_empty() {
+                                    let encoded_text = encode_unicode(&current_text);
+                                    result.push_str(&format!("[text:[{}]]", encoded_text));
+                                    current_text.clear();
+                                }
+                                in_bold = true;
+                            }
+                        }
+                    } else {
+                        // Single * (italic)
+                        if in_italic && !in_bold {
+                            // End italic
+                            if !current_text.is_empty() {
+                                let encoded_text = encode_unicode(&current_text);
+                                result.push_str(&format!("[italic:[{}]]", encoded_text));
+                                current_text.clear();
+                            }
+                            in_italic = false;
+                        } else if in_bold {
+                            // Currently in bold, encountering * means we need to start bold_italic
+                            if !current_text.is_empty() {
+                                let encoded_text = encode_unicode(&current_text);
+                                result.push_str(&format!("[bold:[{}]]", encoded_text));
+                                current_text.clear();
+                            }
+                            in_italic = true;
+                        } else {
+                            // Start italic
+                            if !current_text.is_empty() {
+                                let encoded_text = encode_unicode(&current_text);
+                                result.push_str(&format!("[text:[{}]]", encoded_text));
+                                current_text.clear();
+                            }
+                            in_italic = true;
+                        }
+                    }
+                }
+                _ => {
+                    if in_code {
+                        code_buffer.push(ch);
+                    } else {
+                        current_text.push(ch);
+                    }
+                }
+            }
+        }
+
+        // Handle any remaining text
+        if !code_buffer.is_empty() {
+            let encoded_code = encode_unicode(&code_buffer);
+            result.push_str(&format!("[code:[{}]]", encoded_code));
+        }
+
+        if !current_text.is_empty() {
+            let style = match (in_bold, in_italic) {
+                (true, true) => "bold_italic",
+                (true, false) => "bold",
+                (false, true) => "italic",
+                (false, false) => "text",
+            };
+            let encoded_text = encode_unicode(&current_text);
+            result.push_str(&format!("[{}:[{}]]", style, encoded_text));
+        }
+
+        result
+    }
+
+    fn encode_unicode(s: &str) -> String {
+        let mut result = String::new();
+        for ch in s.chars() {
+            let code = ch as u32;
+            if code <= 0x7F {
+                result.push(ch);
+            } else {
+                result.push_str(&format!("\\u{:X}", code));
+            }
+        }
+        result
+    }
+}
author	魏曹先生 <1992414357@qq.com>	2026-04-22 20:24:18 +0800
committer	魏曹先生 <1992414357@qq.com>	2026-04-22 20:24:18 +0800
commit	277bc93f84b298c7cb24e136f67eb237fb3a68a2 (patch)
tree	3a678677882e9884a2a17fa828358d1c9fd3dd9c /_prototype/converter/src/parse.rs
parent	3e3ffa97985ed588ef46e007edb7507189a65e8e (diff)